gitx.do 0.1.0 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +40 -353
- package/dist/do/logger.d.ts +50 -0
- package/dist/do/logger.d.ts.map +1 -0
- package/dist/do/logger.js +122 -0
- package/dist/do/logger.js.map +1 -0
- package/dist/{durable-object → do}/schema.d.ts +3 -3
- package/dist/do/schema.d.ts.map +1 -0
- package/dist/{durable-object → do}/schema.js +4 -3
- package/dist/do/schema.js.map +1 -0
- package/dist/do/types.d.ts +267 -0
- package/dist/do/types.d.ts.map +1 -0
- package/dist/do/types.js +62 -0
- package/dist/do/types.js.map +1 -0
- package/dist/index.d.ts +15 -415
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +31 -483
- package/dist/index.js.map +1 -1
- package/package.json +13 -21
- package/dist/cli/commands/add.d.ts +0 -174
- package/dist/cli/commands/add.d.ts.map +0 -1
- package/dist/cli/commands/add.js +0 -131
- package/dist/cli/commands/add.js.map +0 -1
- package/dist/cli/commands/blame.d.ts +0 -259
- package/dist/cli/commands/blame.d.ts.map +0 -1
- package/dist/cli/commands/blame.js +0 -609
- package/dist/cli/commands/blame.js.map +0 -1
- package/dist/cli/commands/branch.d.ts +0 -249
- package/dist/cli/commands/branch.d.ts.map +0 -1
- package/dist/cli/commands/branch.js +0 -693
- package/dist/cli/commands/branch.js.map +0 -1
- package/dist/cli/commands/commit.d.ts +0 -182
- package/dist/cli/commands/commit.d.ts.map +0 -1
- package/dist/cli/commands/commit.js +0 -437
- package/dist/cli/commands/commit.js.map +0 -1
- package/dist/cli/commands/diff.d.ts +0 -464
- package/dist/cli/commands/diff.d.ts.map +0 -1
- package/dist/cli/commands/diff.js +0 -958
- package/dist/cli/commands/diff.js.map +0 -1
- package/dist/cli/commands/log.d.ts +0 -239
- package/dist/cli/commands/log.d.ts.map +0 -1
- package/dist/cli/commands/log.js +0 -535
- package/dist/cli/commands/log.js.map +0 -1
- package/dist/cli/commands/merge.d.ts +0 -106
- package/dist/cli/commands/merge.d.ts.map +0 -1
- package/dist/cli/commands/merge.js +0 -55
- package/dist/cli/commands/merge.js.map +0 -1
- package/dist/cli/commands/review.d.ts +0 -457
- package/dist/cli/commands/review.d.ts.map +0 -1
- package/dist/cli/commands/review.js +0 -533
- package/dist/cli/commands/review.js.map +0 -1
- package/dist/cli/commands/status.d.ts +0 -269
- package/dist/cli/commands/status.d.ts.map +0 -1
- package/dist/cli/commands/status.js +0 -493
- package/dist/cli/commands/status.js.map +0 -1
- package/dist/cli/commands/web.d.ts +0 -199
- package/dist/cli/commands/web.d.ts.map +0 -1
- package/dist/cli/commands/web.js +0 -696
- package/dist/cli/commands/web.js.map +0 -1
- package/dist/cli/fs-adapter.d.ts +0 -656
- package/dist/cli/fs-adapter.d.ts.map +0 -1
- package/dist/cli/fs-adapter.js +0 -1179
- package/dist/cli/fs-adapter.js.map +0 -1
- package/dist/cli/fsx-cli-adapter.d.ts +0 -359
- package/dist/cli/fsx-cli-adapter.d.ts.map +0 -1
- package/dist/cli/fsx-cli-adapter.js +0 -619
- package/dist/cli/fsx-cli-adapter.js.map +0 -1
- package/dist/cli/index.d.ts +0 -387
- package/dist/cli/index.d.ts.map +0 -1
- package/dist/cli/index.js +0 -523
- package/dist/cli/index.js.map +0 -1
- package/dist/cli/ui/components/DiffView.d.ts +0 -7
- package/dist/cli/ui/components/DiffView.d.ts.map +0 -1
- package/dist/cli/ui/components/DiffView.js +0 -11
- package/dist/cli/ui/components/DiffView.js.map +0 -1
- package/dist/cli/ui/components/ErrorDisplay.d.ts +0 -6
- package/dist/cli/ui/components/ErrorDisplay.d.ts.map +0 -1
- package/dist/cli/ui/components/ErrorDisplay.js +0 -11
- package/dist/cli/ui/components/ErrorDisplay.js.map +0 -1
- package/dist/cli/ui/components/FuzzySearch.d.ts +0 -9
- package/dist/cli/ui/components/FuzzySearch.d.ts.map +0 -1
- package/dist/cli/ui/components/FuzzySearch.js +0 -12
- package/dist/cli/ui/components/FuzzySearch.js.map +0 -1
- package/dist/cli/ui/components/LoadingSpinner.d.ts +0 -6
- package/dist/cli/ui/components/LoadingSpinner.d.ts.map +0 -1
- package/dist/cli/ui/components/LoadingSpinner.js +0 -10
- package/dist/cli/ui/components/LoadingSpinner.js.map +0 -1
- package/dist/cli/ui/components/NavigationList.d.ts +0 -9
- package/dist/cli/ui/components/NavigationList.d.ts.map +0 -1
- package/dist/cli/ui/components/NavigationList.js +0 -11
- package/dist/cli/ui/components/NavigationList.js.map +0 -1
- package/dist/cli/ui/components/ScrollableContent.d.ts +0 -8
- package/dist/cli/ui/components/ScrollableContent.d.ts.map +0 -1
- package/dist/cli/ui/components/ScrollableContent.js +0 -11
- package/dist/cli/ui/components/ScrollableContent.js.map +0 -1
- package/dist/cli/ui/components/index.d.ts +0 -7
- package/dist/cli/ui/components/index.d.ts.map +0 -1
- package/dist/cli/ui/components/index.js +0 -9
- package/dist/cli/ui/components/index.js.map +0 -1
- package/dist/cli/ui/terminal-ui.d.ts +0 -52
- package/dist/cli/ui/terminal-ui.d.ts.map +0 -1
- package/dist/cli/ui/terminal-ui.js +0 -121
- package/dist/cli/ui/terminal-ui.js.map +0 -1
- package/dist/do/BashModule.d.ts +0 -871
- package/dist/do/BashModule.d.ts.map +0 -1
- package/dist/do/BashModule.js +0 -1143
- package/dist/do/BashModule.js.map +0 -1
- package/dist/do/FsModule.d.ts +0 -601
- package/dist/do/FsModule.d.ts.map +0 -1
- package/dist/do/FsModule.js +0 -1120
- package/dist/do/FsModule.js.map +0 -1
- package/dist/do/GitModule.d.ts +0 -635
- package/dist/do/GitModule.d.ts.map +0 -1
- package/dist/do/GitModule.js +0 -781
- package/dist/do/GitModule.js.map +0 -1
- package/dist/do/GitRepoDO.d.ts +0 -281
- package/dist/do/GitRepoDO.d.ts.map +0 -1
- package/dist/do/GitRepoDO.js +0 -479
- package/dist/do/GitRepoDO.js.map +0 -1
- package/dist/do/bash-ast.d.ts +0 -246
- package/dist/do/bash-ast.d.ts.map +0 -1
- package/dist/do/bash-ast.js +0 -888
- package/dist/do/bash-ast.js.map +0 -1
- package/dist/do/container-executor.d.ts +0 -491
- package/dist/do/container-executor.d.ts.map +0 -1
- package/dist/do/container-executor.js +0 -730
- package/dist/do/container-executor.js.map +0 -1
- package/dist/do/index.d.ts +0 -53
- package/dist/do/index.d.ts.map +0 -1
- package/dist/do/index.js +0 -91
- package/dist/do/index.js.map +0 -1
- package/dist/do/tiered-storage.d.ts +0 -403
- package/dist/do/tiered-storage.d.ts.map +0 -1
- package/dist/do/tiered-storage.js +0 -689
- package/dist/do/tiered-storage.js.map +0 -1
- package/dist/do/withBash.d.ts +0 -231
- package/dist/do/withBash.d.ts.map +0 -1
- package/dist/do/withBash.js +0 -244
- package/dist/do/withBash.js.map +0 -1
- package/dist/do/withFs.d.ts +0 -237
- package/dist/do/withFs.d.ts.map +0 -1
- package/dist/do/withFs.js +0 -387
- package/dist/do/withFs.js.map +0 -1
- package/dist/do/withGit.d.ts +0 -180
- package/dist/do/withGit.d.ts.map +0 -1
- package/dist/do/withGit.js +0 -271
- package/dist/do/withGit.js.map +0 -1
- package/dist/durable-object/object-store.d.ts +0 -633
- package/dist/durable-object/object-store.d.ts.map +0 -1
- package/dist/durable-object/object-store.js +0 -1161
- package/dist/durable-object/object-store.js.map +0 -1
- package/dist/durable-object/schema.d.ts.map +0 -1
- package/dist/durable-object/schema.js.map +0 -1
- package/dist/durable-object/wal.d.ts +0 -416
- package/dist/durable-object/wal.d.ts.map +0 -1
- package/dist/durable-object/wal.js +0 -445
- package/dist/durable-object/wal.js.map +0 -1
- package/dist/mcp/adapter.d.ts +0 -772
- package/dist/mcp/adapter.d.ts.map +0 -1
- package/dist/mcp/adapter.js +0 -895
- package/dist/mcp/adapter.js.map +0 -1
- package/dist/mcp/sandbox/miniflare-evaluator.d.ts +0 -22
- package/dist/mcp/sandbox/miniflare-evaluator.d.ts.map +0 -1
- package/dist/mcp/sandbox/miniflare-evaluator.js +0 -140
- package/dist/mcp/sandbox/miniflare-evaluator.js.map +0 -1
- package/dist/mcp/sandbox/object-store-proxy.d.ts +0 -32
- package/dist/mcp/sandbox/object-store-proxy.d.ts.map +0 -1
- package/dist/mcp/sandbox/object-store-proxy.js +0 -30
- package/dist/mcp/sandbox/object-store-proxy.js.map +0 -1
- package/dist/mcp/sandbox/template.d.ts +0 -17
- package/dist/mcp/sandbox/template.d.ts.map +0 -1
- package/dist/mcp/sandbox/template.js +0 -71
- package/dist/mcp/sandbox/template.js.map +0 -1
- package/dist/mcp/sandbox.d.ts +0 -764
- package/dist/mcp/sandbox.d.ts.map +0 -1
- package/dist/mcp/sandbox.js +0 -1362
- package/dist/mcp/sandbox.js.map +0 -1
- package/dist/mcp/sdk-adapter.d.ts +0 -835
- package/dist/mcp/sdk-adapter.d.ts.map +0 -1
- package/dist/mcp/sdk-adapter.js +0 -974
- package/dist/mcp/sdk-adapter.js.map +0 -1
- package/dist/mcp/tools/do.d.ts +0 -32
- package/dist/mcp/tools/do.d.ts.map +0 -1
- package/dist/mcp/tools/do.js +0 -115
- package/dist/mcp/tools/do.js.map +0 -1
- package/dist/mcp/tools.d.ts +0 -548
- package/dist/mcp/tools.d.ts.map +0 -1
- package/dist/mcp/tools.js +0 -1934
- package/dist/mcp/tools.js.map +0 -1
- package/dist/ops/blame.d.ts +0 -551
- package/dist/ops/blame.d.ts.map +0 -1
- package/dist/ops/blame.js +0 -1037
- package/dist/ops/blame.js.map +0 -1
- package/dist/ops/branch.d.ts +0 -766
- package/dist/ops/branch.d.ts.map +0 -1
- package/dist/ops/branch.js +0 -950
- package/dist/ops/branch.js.map +0 -1
- package/dist/ops/commit-traversal.d.ts +0 -349
- package/dist/ops/commit-traversal.d.ts.map +0 -1
- package/dist/ops/commit-traversal.js +0 -821
- package/dist/ops/commit-traversal.js.map +0 -1
- package/dist/ops/commit.d.ts +0 -555
- package/dist/ops/commit.d.ts.map +0 -1
- package/dist/ops/commit.js +0 -826
- package/dist/ops/commit.js.map +0 -1
- package/dist/ops/merge-base.d.ts +0 -397
- package/dist/ops/merge-base.d.ts.map +0 -1
- package/dist/ops/merge-base.js +0 -691
- package/dist/ops/merge-base.js.map +0 -1
- package/dist/ops/merge.d.ts +0 -855
- package/dist/ops/merge.d.ts.map +0 -1
- package/dist/ops/merge.js +0 -1551
- package/dist/ops/merge.js.map +0 -1
- package/dist/ops/tag.d.ts +0 -247
- package/dist/ops/tag.d.ts.map +0 -1
- package/dist/ops/tag.js +0 -649
- package/dist/ops/tag.js.map +0 -1
- package/dist/ops/tree-builder.d.ts +0 -178
- package/dist/ops/tree-builder.d.ts.map +0 -1
- package/dist/ops/tree-builder.js +0 -271
- package/dist/ops/tree-builder.js.map +0 -1
- package/dist/ops/tree-diff.d.ts +0 -291
- package/dist/ops/tree-diff.d.ts.map +0 -1
- package/dist/ops/tree-diff.js +0 -705
- package/dist/ops/tree-diff.js.map +0 -1
- package/dist/pack/delta.d.ts +0 -248
- package/dist/pack/delta.d.ts.map +0 -1
- package/dist/pack/delta.js +0 -736
- package/dist/pack/delta.js.map +0 -1
- package/dist/pack/format.d.ts +0 -446
- package/dist/pack/format.d.ts.map +0 -1
- package/dist/pack/format.js +0 -572
- package/dist/pack/format.js.map +0 -1
- package/dist/pack/full-generation.d.ts +0 -612
- package/dist/pack/full-generation.d.ts.map +0 -1
- package/dist/pack/full-generation.js +0 -1378
- package/dist/pack/full-generation.js.map +0 -1
- package/dist/pack/generation.d.ts +0 -441
- package/dist/pack/generation.d.ts.map +0 -1
- package/dist/pack/generation.js +0 -707
- package/dist/pack/generation.js.map +0 -1
- package/dist/pack/index.d.ts +0 -502
- package/dist/pack/index.d.ts.map +0 -1
- package/dist/pack/index.js +0 -833
- package/dist/pack/index.js.map +0 -1
- package/dist/refs/branch.d.ts +0 -668
- package/dist/refs/branch.d.ts.map +0 -1
- package/dist/refs/branch.js +0 -897
- package/dist/refs/branch.js.map +0 -1
- package/dist/refs/storage.d.ts +0 -833
- package/dist/refs/storage.d.ts.map +0 -1
- package/dist/refs/storage.js +0 -1023
- package/dist/refs/storage.js.map +0 -1
- package/dist/refs/tag.d.ts +0 -860
- package/dist/refs/tag.d.ts.map +0 -1
- package/dist/refs/tag.js +0 -996
- package/dist/refs/tag.js.map +0 -1
- package/dist/storage/backend.d.ts +0 -425
- package/dist/storage/backend.d.ts.map +0 -1
- package/dist/storage/backend.js +0 -41
- package/dist/storage/backend.js.map +0 -1
- package/dist/storage/fsx-adapter.d.ts +0 -204
- package/dist/storage/fsx-adapter.d.ts.map +0 -1
- package/dist/storage/fsx-adapter.js +0 -470
- package/dist/storage/fsx-adapter.js.map +0 -1
- package/dist/storage/lru-cache.d.ts +0 -691
- package/dist/storage/lru-cache.d.ts.map +0 -1
- package/dist/storage/lru-cache.js +0 -813
- package/dist/storage/lru-cache.js.map +0 -1
- package/dist/storage/object-index.d.ts +0 -585
- package/dist/storage/object-index.d.ts.map +0 -1
- package/dist/storage/object-index.js +0 -532
- package/dist/storage/object-index.js.map +0 -1
- package/dist/storage/r2-pack.d.ts +0 -1257
- package/dist/storage/r2-pack.d.ts.map +0 -1
- package/dist/storage/r2-pack.js +0 -1770
- package/dist/storage/r2-pack.js.map +0 -1
- package/dist/tiered/cdc-pipeline.d.ts +0 -1888
- package/dist/tiered/cdc-pipeline.d.ts.map +0 -1
- package/dist/tiered/cdc-pipeline.js +0 -1880
- package/dist/tiered/cdc-pipeline.js.map +0 -1
- package/dist/tiered/migration.d.ts +0 -1104
- package/dist/tiered/migration.d.ts.map +0 -1
- package/dist/tiered/migration.js +0 -1214
- package/dist/tiered/migration.js.map +0 -1
- package/dist/tiered/parquet-writer.d.ts +0 -1145
- package/dist/tiered/parquet-writer.d.ts.map +0 -1
- package/dist/tiered/parquet-writer.js +0 -1183
- package/dist/tiered/parquet-writer.js.map +0 -1
- package/dist/tiered/read-path.d.ts +0 -835
- package/dist/tiered/read-path.d.ts.map +0 -1
- package/dist/tiered/read-path.js +0 -487
- package/dist/tiered/read-path.js.map +0 -1
- package/dist/types/capability.d.ts +0 -1385
- package/dist/types/capability.d.ts.map +0 -1
- package/dist/types/capability.js +0 -36
- package/dist/types/capability.js.map +0 -1
- package/dist/types/index.d.ts +0 -13
- package/dist/types/index.d.ts.map +0 -1
- package/dist/types/index.js +0 -18
- package/dist/types/index.js.map +0 -1
- package/dist/types/objects.d.ts +0 -692
- package/dist/types/objects.d.ts.map +0 -1
- package/dist/types/objects.js +0 -837
- package/dist/types/objects.js.map +0 -1
- package/dist/types/storage.d.ts +0 -603
- package/dist/types/storage.d.ts.map +0 -1
- package/dist/types/storage.js +0 -191
- package/dist/types/storage.js.map +0 -1
- package/dist/types/worker-loader.d.ts +0 -60
- package/dist/types/worker-loader.d.ts.map +0 -1
- package/dist/types/worker-loader.js +0 -62
- package/dist/types/worker-loader.js.map +0 -1
- package/dist/utils/hash.d.ts +0 -197
- package/dist/utils/hash.d.ts.map +0 -1
- package/dist/utils/hash.js +0 -268
- package/dist/utils/hash.js.map +0 -1
- package/dist/utils/sha1.d.ts +0 -290
- package/dist/utils/sha1.d.ts.map +0 -1
- package/dist/utils/sha1.js +0 -582
- package/dist/utils/sha1.js.map +0 -1
- package/dist/wire/capabilities.d.ts +0 -1044
- package/dist/wire/capabilities.d.ts.map +0 -1
- package/dist/wire/capabilities.js +0 -941
- package/dist/wire/capabilities.js.map +0 -1
- package/dist/wire/path-security.d.ts +0 -157
- package/dist/wire/path-security.d.ts.map +0 -1
- package/dist/wire/path-security.js +0 -307
- package/dist/wire/path-security.js.map +0 -1
- package/dist/wire/pkt-line.d.ts +0 -345
- package/dist/wire/pkt-line.d.ts.map +0 -1
- package/dist/wire/pkt-line.js +0 -381
- package/dist/wire/pkt-line.js.map +0 -1
- package/dist/wire/receive-pack.d.ts +0 -1059
- package/dist/wire/receive-pack.d.ts.map +0 -1
- package/dist/wire/receive-pack.js +0 -1414
- package/dist/wire/receive-pack.js.map +0 -1
- package/dist/wire/smart-http.d.ts +0 -799
- package/dist/wire/smart-http.d.ts.map +0 -1
- package/dist/wire/smart-http.js +0 -945
- package/dist/wire/smart-http.js.map +0 -1
- package/dist/wire/upload-pack.d.ts +0 -727
- package/dist/wire/upload-pack.d.ts.map +0 -1
- package/dist/wire/upload-pack.js +0 -1138
- package/dist/wire/upload-pack.js.map +0 -1
|
@@ -1,1145 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* @fileoverview Parquet Writer for Git Analytics
|
|
3
|
-
*
|
|
4
|
-
* @description
|
|
5
|
-
* Provides functionality to write git analytics data to Parquet format, a
|
|
6
|
-
* columnar storage format optimized for analytical queries. This module
|
|
7
|
-
* enables efficient storage and querying of Git repository data.
|
|
8
|
-
*
|
|
9
|
-
* **Key Features:**
|
|
10
|
-
* - Schema definition with various field types (STRING, INT32, INT64, etc.)
|
|
11
|
-
* - Multiple compression algorithms (SNAPPY, GZIP, ZSTD, LZ4, UNCOMPRESSED)
|
|
12
|
-
* - Row group management for efficient columnar storage
|
|
13
|
-
* - Automatic and manual row group flushing
|
|
14
|
-
* - Column-level statistics generation (min, max, null count)
|
|
15
|
-
* - Custom key-value metadata support
|
|
16
|
-
* - Memory-efficient streaming writes
|
|
17
|
-
*
|
|
18
|
-
* **Parquet Format:**
|
|
19
|
-
* The generated files follow the Parquet format with:
|
|
20
|
-
* - Magic bytes "PAR1" at start and end
|
|
21
|
-
* - Row group data organized by columns
|
|
22
|
-
* - Footer metadata containing schema and statistics
|
|
23
|
-
*
|
|
24
|
-
* @example
|
|
25
|
-
* ```typescript
|
|
26
|
-
* // Define schema for commit analytics
|
|
27
|
-
* const schema = defineSchema([
|
|
28
|
-
* { name: 'commit_sha', type: ParquetFieldType.STRING, required: true },
|
|
29
|
-
* { name: 'author', type: ParquetFieldType.STRING, required: true },
|
|
30
|
-
* { name: 'timestamp', type: ParquetFieldType.TIMESTAMP_MILLIS, required: true },
|
|
31
|
-
* { name: 'file_count', type: ParquetFieldType.INT32, required: false }
|
|
32
|
-
* ])
|
|
33
|
-
*
|
|
34
|
-
* // Create writer with options
|
|
35
|
-
* const writer = createParquetWriter(schema, {
|
|
36
|
-
* rowGroupSize: 10000,
|
|
37
|
-
* compression: ParquetCompression.SNAPPY,
|
|
38
|
-
* enableStatistics: true
|
|
39
|
-
* })
|
|
40
|
-
*
|
|
41
|
-
* // Write data
|
|
42
|
-
* await writer.writeRows([
|
|
43
|
-
* { commit_sha: 'abc123...', author: 'alice', timestamp: Date.now(), file_count: 5 },
|
|
44
|
-
* { commit_sha: 'def456...', author: 'bob', timestamp: Date.now(), file_count: 3 }
|
|
45
|
-
* ])
|
|
46
|
-
*
|
|
47
|
-
* // Generate the Parquet file
|
|
48
|
-
* const buffer = await writer.toBuffer()
|
|
49
|
-
* ```
|
|
50
|
-
*
|
|
51
|
-
* @module tiered/parquet-writer
|
|
52
|
-
* @see {@link ParquetWriter} - Main writer class
|
|
53
|
-
* @see {@link defineSchema} - Schema definition helper
|
|
54
|
-
*/
|
|
55
|
-
/**
|
|
56
|
-
* Supported Parquet field types.
|
|
57
|
-
*
|
|
58
|
-
* @description
|
|
59
|
-
* Defines the data types that can be used for fields in a Parquet schema.
|
|
60
|
-
* Each type maps to an appropriate physical and logical Parquet type.
|
|
61
|
-
*
|
|
62
|
-
* @example
|
|
63
|
-
* ```typescript
|
|
64
|
-
* const field: ParquetField = {
|
|
65
|
-
* name: 'count',
|
|
66
|
-
* type: ParquetFieldType.INT64,
|
|
67
|
-
* required: true
|
|
68
|
-
* }
|
|
69
|
-
* ```
|
|
70
|
-
*
|
|
71
|
-
* @enum {string}
|
|
72
|
-
*/
|
|
73
|
-
export declare enum ParquetFieldType {
|
|
74
|
-
/**
|
|
75
|
-
* UTF-8 encoded string.
|
|
76
|
-
* Maps to Parquet BYTE_ARRAY with UTF8 logical type.
|
|
77
|
-
*/
|
|
78
|
-
STRING = "STRING",
|
|
79
|
-
/**
|
|
80
|
-
* 32-bit signed integer.
|
|
81
|
-
* Maps to Parquet INT32 physical type.
|
|
82
|
-
*/
|
|
83
|
-
INT32 = "INT32",
|
|
84
|
-
/**
|
|
85
|
-
* 64-bit signed integer.
|
|
86
|
-
* Maps to Parquet INT64 physical type.
|
|
87
|
-
*/
|
|
88
|
-
INT64 = "INT64",
|
|
89
|
-
/**
|
|
90
|
-
* Boolean value (true/false).
|
|
91
|
-
* Maps to Parquet BOOLEAN physical type.
|
|
92
|
-
*/
|
|
93
|
-
BOOLEAN = "BOOLEAN",
|
|
94
|
-
/**
|
|
95
|
-
* 32-bit IEEE 754 floating point.
|
|
96
|
-
* Maps to Parquet FLOAT physical type.
|
|
97
|
-
*/
|
|
98
|
-
FLOAT = "FLOAT",
|
|
99
|
-
/**
|
|
100
|
-
* 64-bit IEEE 754 floating point.
|
|
101
|
-
* Maps to Parquet DOUBLE physical type.
|
|
102
|
-
*/
|
|
103
|
-
DOUBLE = "DOUBLE",
|
|
104
|
-
/**
|
|
105
|
-
* Raw binary data.
|
|
106
|
-
* Maps to Parquet BYTE_ARRAY physical type.
|
|
107
|
-
*/
|
|
108
|
-
BINARY = "BINARY",
|
|
109
|
-
/**
|
|
110
|
-
* Timestamp with millisecond precision.
|
|
111
|
-
* Maps to Parquet INT64 with TIMESTAMP_MILLIS logical type.
|
|
112
|
-
*/
|
|
113
|
-
TIMESTAMP_MILLIS = "TIMESTAMP_MILLIS",
|
|
114
|
-
/**
|
|
115
|
-
* Timestamp with microsecond precision.
|
|
116
|
-
* Maps to Parquet INT64 with TIMESTAMP_MICROS logical type.
|
|
117
|
-
*/
|
|
118
|
-
TIMESTAMP_MICROS = "TIMESTAMP_MICROS"
|
|
119
|
-
}
|
|
120
|
-
/**
|
|
121
|
-
* Supported compression types for Parquet data.
|
|
122
|
-
*
|
|
123
|
-
* @description
|
|
124
|
-
* Different compression algorithms offer trade-offs between compression
|
|
125
|
-
* ratio, compression speed, and decompression speed.
|
|
126
|
-
*
|
|
127
|
-
* **Comparison:**
|
|
128
|
-
* - SNAPPY: Fast compression/decompression, moderate ratio (default)
|
|
129
|
-
* - GZIP: Higher ratio, slower compression, fast decompression
|
|
130
|
-
* - ZSTD: Best ratio, good speed, requires more memory
|
|
131
|
-
* - LZ4: Fastest, lower ratio
|
|
132
|
-
* - UNCOMPRESSED: No compression overhead
|
|
133
|
-
*
|
|
134
|
-
* @example
|
|
135
|
-
* ```typescript
|
|
136
|
-
* const writer = createParquetWriter(schema, {
|
|
137
|
-
* compression: ParquetCompression.ZSTD
|
|
138
|
-
* })
|
|
139
|
-
* ```
|
|
140
|
-
*
|
|
141
|
-
* @enum {string}
|
|
142
|
-
*/
|
|
143
|
-
export declare enum ParquetCompression {
|
|
144
|
-
/**
|
|
145
|
-
* No compression applied.
|
|
146
|
-
* Fastest writes, largest file size.
|
|
147
|
-
*/
|
|
148
|
-
UNCOMPRESSED = "UNCOMPRESSED",
|
|
149
|
-
/**
|
|
150
|
-
* Snappy compression (default).
|
|
151
|
-
* Good balance of speed and compression ratio.
|
|
152
|
-
*/
|
|
153
|
-
SNAPPY = "SNAPPY",
|
|
154
|
-
/**
|
|
155
|
-
* GZIP compression.
|
|
156
|
-
* Higher compression ratio, slower compression.
|
|
157
|
-
*/
|
|
158
|
-
GZIP = "GZIP",
|
|
159
|
-
/**
|
|
160
|
-
* Zstandard compression.
|
|
161
|
-
* Best compression ratio with good speed.
|
|
162
|
-
*/
|
|
163
|
-
ZSTD = "ZSTD",
|
|
164
|
-
/**
|
|
165
|
-
* LZ4 compression.
|
|
166
|
-
* Fastest compression, lower ratio.
|
|
167
|
-
*/
|
|
168
|
-
LZ4 = "LZ4"
|
|
169
|
-
}
|
|
170
|
-
/**
|
|
171
|
-
* Field definition for a Parquet schema.
|
|
172
|
-
*
|
|
173
|
-
* @description
|
|
174
|
-
* Defines a single column in the Parquet schema, including its name,
|
|
175
|
-
* data type, nullability, and optional metadata.
|
|
176
|
-
*
|
|
177
|
-
* @example
|
|
178
|
-
* ```typescript
|
|
179
|
-
* const nameField: ParquetField = {
|
|
180
|
-
* name: 'user_name',
|
|
181
|
-
* type: ParquetFieldType.STRING,
|
|
182
|
-
* required: true,
|
|
183
|
-
* metadata: { description: 'The user display name' }
|
|
184
|
-
* }
|
|
185
|
-
*
|
|
186
|
-
* const ageField: ParquetField = {
|
|
187
|
-
* name: 'age',
|
|
188
|
-
* type: ParquetFieldType.INT32,
|
|
189
|
-
* required: false // nullable
|
|
190
|
-
* }
|
|
191
|
-
* ```
|
|
192
|
-
*
|
|
193
|
-
* @interface ParquetField
|
|
194
|
-
*/
|
|
195
|
-
export interface ParquetField {
|
|
196
|
-
/**
|
|
197
|
-
* Column name.
|
|
198
|
-
* Must be unique within the schema and non-empty.
|
|
199
|
-
*/
|
|
200
|
-
name: string;
|
|
201
|
-
/**
|
|
202
|
-
* Data type of the column.
|
|
203
|
-
*
|
|
204
|
-
* @see {@link ParquetFieldType}
|
|
205
|
-
*/
|
|
206
|
-
type: ParquetFieldType;
|
|
207
|
-
/**
|
|
208
|
-
* Whether the field is required (non-nullable).
|
|
209
|
-
* If true, null values will cause validation errors.
|
|
210
|
-
*/
|
|
211
|
-
required: boolean;
|
|
212
|
-
/**
|
|
213
|
-
* Optional key-value metadata for the field.
|
|
214
|
-
* Can be used for descriptions, units, etc.
|
|
215
|
-
*/
|
|
216
|
-
metadata?: Record<string, string>;
|
|
217
|
-
}
|
|
218
|
-
/**
|
|
219
|
-
* Parquet schema definition.
|
|
220
|
-
*
|
|
221
|
-
* @description
|
|
222
|
-
* Defines the complete schema for a Parquet file, including all fields
|
|
223
|
-
* and optional schema-level metadata.
|
|
224
|
-
*
|
|
225
|
-
* @example
|
|
226
|
-
* ```typescript
|
|
227
|
-
* const schema: ParquetSchema = {
|
|
228
|
-
* fields: [
|
|
229
|
-
* { name: 'id', type: ParquetFieldType.INT64, required: true },
|
|
230
|
-
* { name: 'name', type: ParquetFieldType.STRING, required: true }
|
|
231
|
-
* ],
|
|
232
|
-
* metadata: {
|
|
233
|
-
* created_by: 'gitdo',
|
|
234
|
-
* version: '1.0'
|
|
235
|
-
* }
|
|
236
|
-
* }
|
|
237
|
-
* ```
|
|
238
|
-
*
|
|
239
|
-
* @interface ParquetSchema
|
|
240
|
-
*/
|
|
241
|
-
export interface ParquetSchema {
|
|
242
|
-
/**
|
|
243
|
-
* Array of field definitions for all columns.
|
|
244
|
-
* Order determines column order in the file.
|
|
245
|
-
*/
|
|
246
|
-
fields: ParquetField[];
|
|
247
|
-
/**
|
|
248
|
-
* Optional schema-level metadata.
|
|
249
|
-
* Stored in the Parquet file footer.
|
|
250
|
-
*/
|
|
251
|
-
metadata?: Record<string, string>;
|
|
252
|
-
}
|
|
253
|
-
/**
|
|
254
|
-
* Options for creating a Parquet writer.
|
|
255
|
-
*
|
|
256
|
-
* @description
|
|
257
|
-
* Configuration options that control how the Parquet file is written,
|
|
258
|
-
* including row group sizing, compression, and statistics generation.
|
|
259
|
-
*
|
|
260
|
-
* @example
|
|
261
|
-
* ```typescript
|
|
262
|
-
* const options: ParquetWriteOptions = {
|
|
263
|
-
* rowGroupSize: 50000, // 50K rows per group
|
|
264
|
-
* rowGroupMemoryLimit: 64 * 1024 * 1024, // 64MB memory limit
|
|
265
|
-
* compression: ParquetCompression.ZSTD,
|
|
266
|
-
* columnCompression: {
|
|
267
|
-
* 'binary_data': ParquetCompression.LZ4 // Fast for binary
|
|
268
|
-
* },
|
|
269
|
-
* enableStatistics: true,
|
|
270
|
-
* sortBy: ['timestamp'],
|
|
271
|
-
* partitionColumns: ['date']
|
|
272
|
-
* }
|
|
273
|
-
* ```
|
|
274
|
-
*
|
|
275
|
-
* @interface ParquetWriteOptions
|
|
276
|
-
*/
|
|
277
|
-
export interface ParquetWriteOptions {
|
|
278
|
-
/**
|
|
279
|
-
* Maximum number of rows per row group.
|
|
280
|
-
* Smaller groups = more granular reads, larger groups = better compression.
|
|
281
|
-
*
|
|
282
|
-
* @default 65536
|
|
283
|
-
*/
|
|
284
|
-
rowGroupSize?: number;
|
|
285
|
-
/**
|
|
286
|
-
* Maximum memory size in bytes for a row group.
|
|
287
|
-
* Triggers flush when reached, regardless of row count.
|
|
288
|
-
*/
|
|
289
|
-
rowGroupMemoryLimit?: number;
|
|
290
|
-
/**
|
|
291
|
-
* Default compression algorithm for all columns.
|
|
292
|
-
*
|
|
293
|
-
* @default ParquetCompression.SNAPPY
|
|
294
|
-
*/
|
|
295
|
-
compression?: ParquetCompression;
|
|
296
|
-
/**
|
|
297
|
-
* Per-column compression overrides.
|
|
298
|
-
* Keys are column names, values are compression types.
|
|
299
|
-
*/
|
|
300
|
-
columnCompression?: Record<string, ParquetCompression>;
|
|
301
|
-
/**
|
|
302
|
-
* Whether to compute and store column statistics.
|
|
303
|
-
* Enables predicate pushdown during queries.
|
|
304
|
-
*
|
|
305
|
-
* @default false
|
|
306
|
-
*/
|
|
307
|
-
enableStatistics?: boolean;
|
|
308
|
-
/**
|
|
309
|
-
* Columns to sort data by within each row group.
|
|
310
|
-
* Improves query performance for sorted access patterns.
|
|
311
|
-
*/
|
|
312
|
-
sortBy?: string[];
|
|
313
|
-
/**
|
|
314
|
-
* Columns used for partitioning.
|
|
315
|
-
* Informational metadata for partitioned datasets.
|
|
316
|
-
*/
|
|
317
|
-
partitionColumns?: string[];
|
|
318
|
-
}
|
|
319
|
-
/**
|
|
320
|
-
* Statistics for a single column in a row group.
|
|
321
|
-
*
|
|
322
|
-
* @description
|
|
323
|
-
* Column statistics enable query engines to skip row groups that don't
|
|
324
|
-
* contain relevant data (predicate pushdown).
|
|
325
|
-
*
|
|
326
|
-
* @example
|
|
327
|
-
* ```typescript
|
|
328
|
-
* const stats: ColumnStatistics = {
|
|
329
|
-
* min: 100,
|
|
330
|
-
* max: 999,
|
|
331
|
-
* nullCount: 5,
|
|
332
|
-
* distinctCount: 850
|
|
333
|
-
* }
|
|
334
|
-
* ```
|
|
335
|
-
*
|
|
336
|
-
* @interface ColumnStatistics
|
|
337
|
-
*/
|
|
338
|
-
export interface ColumnStatistics {
|
|
339
|
-
/**
|
|
340
|
-
* Minimum value in the column.
|
|
341
|
-
* Type depends on column type.
|
|
342
|
-
*/
|
|
343
|
-
min?: number | string | boolean;
|
|
344
|
-
/**
|
|
345
|
-
* Maximum value in the column.
|
|
346
|
-
* Type depends on column type.
|
|
347
|
-
*/
|
|
348
|
-
max?: number | string | boolean;
|
|
349
|
-
/**
|
|
350
|
-
* Number of null values in the column.
|
|
351
|
-
*/
|
|
352
|
-
nullCount?: number;
|
|
353
|
-
/**
|
|
354
|
-
* Approximate distinct value count.
|
|
355
|
-
* May not be exact for large datasets.
|
|
356
|
-
*/
|
|
357
|
-
distinctCount?: number;
|
|
358
|
-
}
|
|
359
|
-
/**
|
|
360
|
-
* Metadata for a column chunk within a row group.
|
|
361
|
-
*
|
|
362
|
-
* @description
|
|
363
|
-
* Contains information about a single column's data within a row group,
|
|
364
|
-
* including compression, sizes, and statistics.
|
|
365
|
-
*
|
|
366
|
-
* @interface ColumnChunkMetadata
|
|
367
|
-
*/
|
|
368
|
-
export interface ColumnChunkMetadata {
|
|
369
|
-
/**
|
|
370
|
-
* Column name.
|
|
371
|
-
*/
|
|
372
|
-
column: string;
|
|
373
|
-
/**
|
|
374
|
-
* Data type of the column.
|
|
375
|
-
*/
|
|
376
|
-
type: ParquetFieldType;
|
|
377
|
-
/**
|
|
378
|
-
* Compression used for this column chunk.
|
|
379
|
-
*/
|
|
380
|
-
compression: ParquetCompression;
|
|
381
|
-
/**
|
|
382
|
-
* Size in bytes after compression.
|
|
383
|
-
*/
|
|
384
|
-
encodedSize: number;
|
|
385
|
-
/**
|
|
386
|
-
* Size in bytes before compression.
|
|
387
|
-
*/
|
|
388
|
-
uncompressedSize: number;
|
|
389
|
-
/**
|
|
390
|
-
* Column statistics if statistics are enabled.
|
|
391
|
-
*/
|
|
392
|
-
statistics?: ColumnStatistics;
|
|
393
|
-
}
|
|
394
|
-
/**
|
|
395
|
-
* Row group representation in the Parquet file.
|
|
396
|
-
*
|
|
397
|
-
* @description
|
|
398
|
-
* A row group is a horizontal partition of the data containing all columns
|
|
399
|
-
* for a subset of rows. Row groups enable parallel processing and predicate
|
|
400
|
-
* pushdown optimizations.
|
|
401
|
-
*
|
|
402
|
-
* @interface RowGroup
|
|
403
|
-
*/
|
|
404
|
-
export interface RowGroup {
|
|
405
|
-
/**
|
|
406
|
-
* Number of rows in this row group.
|
|
407
|
-
*/
|
|
408
|
-
numRows: number;
|
|
409
|
-
/**
|
|
410
|
-
* Total compressed size in bytes.
|
|
411
|
-
*/
|
|
412
|
-
totalByteSize: number;
|
|
413
|
-
/**
|
|
414
|
-
* Metadata for each column chunk.
|
|
415
|
-
*/
|
|
416
|
-
columns: ColumnChunkMetadata[];
|
|
417
|
-
}
|
|
418
|
-
/**
|
|
419
|
-
* Complete metadata for a Parquet file.
|
|
420
|
-
*
|
|
421
|
-
* @description
|
|
422
|
-
* Contains all metadata stored in the Parquet file footer, including
|
|
423
|
-
* schema, row groups, and statistics. Used when reading files.
|
|
424
|
-
*
|
|
425
|
-
* @example
|
|
426
|
-
* ```typescript
|
|
427
|
-
* const metadata = getMetadata(parquetBuffer)
|
|
428
|
-
* console.log(`Rows: ${metadata.numRows}`)
|
|
429
|
-
* console.log(`Row groups: ${metadata.rowGroups.length}`)
|
|
430
|
-
* console.log(`Compression: ${metadata.compression}`)
|
|
431
|
-
* ```
|
|
432
|
-
*
|
|
433
|
-
* @interface ParquetMetadata
|
|
434
|
-
*/
|
|
435
|
-
export interface ParquetMetadata {
|
|
436
|
-
/**
|
|
437
|
-
* The file's schema definition.
|
|
438
|
-
*/
|
|
439
|
-
schema: ParquetSchema;
|
|
440
|
-
/**
|
|
441
|
-
* Total number of rows in the file.
|
|
442
|
-
*/
|
|
443
|
-
numRows: number;
|
|
444
|
-
/**
|
|
445
|
-
* Array of row group metadata.
|
|
446
|
-
*/
|
|
447
|
-
rowGroups: RowGroup[];
|
|
448
|
-
/**
|
|
449
|
-
* Default compression algorithm used.
|
|
450
|
-
*/
|
|
451
|
-
compression: ParquetCompression;
|
|
452
|
-
/**
|
|
453
|
-
* Per-column compression settings.
|
|
454
|
-
*/
|
|
455
|
-
columnMetadata?: Record<string, {
|
|
456
|
-
compression: ParquetCompression;
|
|
457
|
-
}>;
|
|
458
|
-
/**
|
|
459
|
-
* Custom key-value metadata.
|
|
460
|
-
*/
|
|
461
|
-
keyValueMetadata?: Record<string, string>;
|
|
462
|
-
/**
|
|
463
|
-
* Unix timestamp when the file was created.
|
|
464
|
-
*/
|
|
465
|
-
createdAt: number;
|
|
466
|
-
/**
|
|
467
|
-
* Total file size in bytes.
|
|
468
|
-
*/
|
|
469
|
-
fileSize: number;
|
|
470
|
-
/**
|
|
471
|
-
* Columns the data is sorted by.
|
|
472
|
-
*/
|
|
473
|
-
sortedBy?: string[];
|
|
474
|
-
/**
|
|
475
|
-
* Columns used for partitioning.
|
|
476
|
-
*/
|
|
477
|
-
partitionColumns?: string[];
|
|
478
|
-
}
|
|
479
|
-
/**
|
|
480
|
-
* Mock output stream interface for writing Parquet data.
|
|
481
|
-
*
|
|
482
|
-
* @description
|
|
483
|
-
* Simple interface for streaming Parquet output to a destination.
|
|
484
|
-
* Can be implemented for files, network streams, etc.
|
|
485
|
-
*
|
|
486
|
-
* @example
|
|
487
|
-
* ```typescript
|
|
488
|
-
* class BufferOutputStream implements OutputStream {
|
|
489
|
-
* private chunks: Uint8Array[] = []
|
|
490
|
-
*
|
|
491
|
-
* write(data: Uint8Array): void {
|
|
492
|
-
* this.chunks.push(data)
|
|
493
|
-
* }
|
|
494
|
-
*
|
|
495
|
-
* getBuffer(): Uint8Array {
|
|
496
|
-
* const total = this.chunks.reduce((sum, c) => sum + c.length, 0)
|
|
497
|
-
* const result = new Uint8Array(total)
|
|
498
|
-
* let offset = 0
|
|
499
|
-
* for (const chunk of this.chunks) {
|
|
500
|
-
* result.set(chunk, offset)
|
|
501
|
-
* offset += chunk.length
|
|
502
|
-
* }
|
|
503
|
-
* return result
|
|
504
|
-
* }
|
|
505
|
-
* }
|
|
506
|
-
* ```
|
|
507
|
-
*
|
|
508
|
-
* @interface OutputStream
|
|
509
|
-
*/
|
|
510
|
-
export interface OutputStream {
|
|
511
|
-
/**
|
|
512
|
-
* Writes data to the output stream.
|
|
513
|
-
*
|
|
514
|
-
* @param data - The data to write
|
|
515
|
-
*/
|
|
516
|
-
write(data: Uint8Array): void;
|
|
517
|
-
}
|
|
518
|
-
/**
|
|
519
|
-
* Error class for Parquet-related operations.
|
|
520
|
-
*
|
|
521
|
-
* @description
|
|
522
|
-
* Thrown when Parquet operations fail, such as schema validation errors,
|
|
523
|
-
* invalid data types, or malformed files.
|
|
524
|
-
*
|
|
525
|
-
* @example
|
|
526
|
-
* ```typescript
|
|
527
|
-
* try {
|
|
528
|
-
* await writer.writeRow({ invalid_field: 'value' })
|
|
529
|
-
* } catch (error) {
|
|
530
|
-
* if (error instanceof ParquetError) {
|
|
531
|
-
* console.log(`Parquet error (${error.code}): ${error.message}`)
|
|
532
|
-
* }
|
|
533
|
-
* }
|
|
534
|
-
* ```
|
|
535
|
-
*
|
|
536
|
-
* @class ParquetError
|
|
537
|
-
* @extends Error
|
|
538
|
-
*/
|
|
539
|
-
export declare class ParquetError extends Error {
|
|
540
|
-
readonly code: string;
|
|
541
|
-
/**
|
|
542
|
-
* Creates a new ParquetError.
|
|
543
|
-
*
|
|
544
|
-
* @param message - Human-readable error message
|
|
545
|
-
* @param code - Error code for programmatic handling
|
|
546
|
-
*
|
|
547
|
-
* @example
|
|
548
|
-
* ```typescript
|
|
549
|
-
* throw new ParquetError('Field name cannot be empty', 'EMPTY_FIELD_NAME')
|
|
550
|
-
* ```
|
|
551
|
-
*/
|
|
552
|
-
constructor(message: string, code: string);
|
|
553
|
-
}
|
|
554
|
-
/**
|
|
555
|
-
* Parquet writer for git analytics data.
|
|
556
|
-
*
|
|
557
|
-
* @description
|
|
558
|
-
* ParquetWriter provides a streaming interface for writing data to Parquet
|
|
559
|
-
* format. It handles schema validation, row group management, compression,
|
|
560
|
-
* and statistics generation.
|
|
561
|
-
*
|
|
562
|
-
* **Usage Pattern:**
|
|
563
|
-
* 1. Create a schema using `defineSchema()`
|
|
564
|
-
* 2. Create a writer with `createParquetWriter()` or `new ParquetWriter()`
|
|
565
|
-
* 3. Write rows using `writeRow()` or `writeRows()`
|
|
566
|
-
* 4. Generate the file with `toBuffer()` or `writeTo()`
|
|
567
|
-
*
|
|
568
|
-
* **Row Group Management:**
|
|
569
|
-
* Rows are buffered in memory until the row group is full (by row count
|
|
570
|
-
* or memory limit), then flushed. You can also manually flush with
|
|
571
|
-
* `flushRowGroup()`.
|
|
572
|
-
*
|
|
573
|
-
* **Thread Safety:**
|
|
574
|
-
* Not thread-safe. Use separate writer instances for concurrent writes.
|
|
575
|
-
*
|
|
576
|
-
* @example
|
|
577
|
-
* ```typescript
|
|
578
|
-
* // Create schema
|
|
579
|
-
* const schema = defineSchema([
|
|
580
|
-
* { name: 'sha', type: ParquetFieldType.STRING, required: true },
|
|
581
|
-
* { name: 'type', type: ParquetFieldType.STRING, required: true },
|
|
582
|
-
* { name: 'size', type: ParquetFieldType.INT64, required: true },
|
|
583
|
-
* { name: 'timestamp', type: ParquetFieldType.TIMESTAMP_MILLIS, required: true }
|
|
584
|
-
* ])
|
|
585
|
-
*
|
|
586
|
-
* // Create writer
|
|
587
|
-
* const writer = new ParquetWriter(schema, {
|
|
588
|
-
* rowGroupSize: 10000,
|
|
589
|
-
* compression: ParquetCompression.SNAPPY,
|
|
590
|
-
* enableStatistics: true
|
|
591
|
-
* })
|
|
592
|
-
*
|
|
593
|
-
* // Write data
|
|
594
|
-
* for (const object of gitObjects) {
|
|
595
|
-
* await writer.writeRow({
|
|
596
|
-
* sha: object.sha,
|
|
597
|
-
* type: object.type,
|
|
598
|
-
* size: object.size,
|
|
599
|
-
* timestamp: Date.now()
|
|
600
|
-
* })
|
|
601
|
-
* }
|
|
602
|
-
*
|
|
603
|
-
* // Set custom metadata
|
|
604
|
-
* writer.setMetadata('git_version', '2.40.0')
|
|
605
|
-
* writer.setMetadata('repository', 'github.com/org/repo')
|
|
606
|
-
*
|
|
607
|
-
* // Generate file
|
|
608
|
-
* const buffer = await writer.toBuffer()
|
|
609
|
-
* console.log(`Generated ${buffer.length} bytes`)
|
|
610
|
-
* console.log(`Rows: ${writer.rowCount}`)
|
|
611
|
-
* console.log(`Row groups: ${writer.rowGroupCount}`)
|
|
612
|
-
*
|
|
613
|
-
* // Reset for reuse
|
|
614
|
-
* writer.reset()
|
|
615
|
-
* ```
|
|
616
|
-
*
|
|
617
|
-
* @class ParquetWriter
|
|
618
|
-
*/
|
|
619
|
-
export declare class ParquetWriter {
|
|
620
|
-
/**
|
|
621
|
-
* The Parquet schema for this writer.
|
|
622
|
-
* @readonly
|
|
623
|
-
*/
|
|
624
|
-
readonly schema: ParquetSchema;
|
|
625
|
-
/**
|
|
626
|
-
* Resolved options with defaults applied.
|
|
627
|
-
* @readonly
|
|
628
|
-
*/
|
|
629
|
-
readonly options: Required<Pick<ParquetWriteOptions, 'rowGroupSize' | 'compression'>> & ParquetWriteOptions;
|
|
630
|
-
/**
|
|
631
|
-
* Total row count written.
|
|
632
|
-
* @private
|
|
633
|
-
*/
|
|
634
|
-
private _rowCount;
|
|
635
|
-
/**
|
|
636
|
-
* Completed row groups.
|
|
637
|
-
* @private
|
|
638
|
-
*/
|
|
639
|
-
private _rowGroups;
|
|
640
|
-
/**
|
|
641
|
-
* Current row group being built.
|
|
642
|
-
* @private
|
|
643
|
-
*/
|
|
644
|
-
private _currentRowGroup;
|
|
645
|
-
/**
|
|
646
|
-
* Whether the writer has been closed.
|
|
647
|
-
* @private
|
|
648
|
-
*/
|
|
649
|
-
private _isClosed;
|
|
650
|
-
/**
|
|
651
|
-
* Custom key-value metadata.
|
|
652
|
-
* @private
|
|
653
|
-
*/
|
|
654
|
-
private _keyValueMetadata;
|
|
655
|
-
/**
|
|
656
|
-
* Creation timestamp.
|
|
657
|
-
* @private
|
|
658
|
-
*/
|
|
659
|
-
private _createdAt;
|
|
660
|
-
/**
|
|
661
|
-
* Creates a new ParquetWriter instance.
|
|
662
|
-
*
|
|
663
|
-
* @param schema - The Parquet schema defining columns
|
|
664
|
-
* @param options - Writer configuration options
|
|
665
|
-
*
|
|
666
|
-
* @example
|
|
667
|
-
* ```typescript
|
|
668
|
-
* const writer = new ParquetWriter(schema, {
|
|
669
|
-
* rowGroupSize: 50000,
|
|
670
|
-
* compression: ParquetCompression.GZIP
|
|
671
|
-
* })
|
|
672
|
-
* ```
|
|
673
|
-
*/
|
|
674
|
-
constructor(schema: ParquetSchema, options?: ParquetWriteOptions);
|
|
675
|
-
/**
|
|
676
|
-
* Gets the total row count written to the writer.
|
|
677
|
-
*
|
|
678
|
-
* @description
|
|
679
|
-
* Returns the total number of rows written, including rows in the
|
|
680
|
-
* current unflushed row group.
|
|
681
|
-
*
|
|
682
|
-
* @returns Total row count
|
|
683
|
-
*
|
|
684
|
-
* @example
|
|
685
|
-
* ```typescript
|
|
686
|
-
* await writer.writeRows(data)
|
|
687
|
-
* console.log(`Wrote ${writer.rowCount} rows`)
|
|
688
|
-
* ```
|
|
689
|
-
*/
|
|
690
|
-
get rowCount(): number;
|
|
691
|
-
/**
|
|
692
|
-
* Gets the number of row groups.
|
|
693
|
-
*
|
|
694
|
-
* @description
|
|
695
|
-
* Returns the number of completed row groups plus one if there's
|
|
696
|
-
* a pending row group with data.
|
|
697
|
-
*
|
|
698
|
-
* @returns Number of row groups
|
|
699
|
-
*
|
|
700
|
-
* @example
|
|
701
|
-
* ```typescript
|
|
702
|
-
* console.log(`Row groups: ${writer.rowGroupCount}`)
|
|
703
|
-
* ```
|
|
704
|
-
*/
|
|
705
|
-
get rowGroupCount(): number;
|
|
706
|
-
/**
|
|
707
|
-
* Checks if the writer has been closed.
|
|
708
|
-
*
|
|
709
|
-
* @description
|
|
710
|
-
* A closed writer cannot accept new rows. Writers are closed
|
|
711
|
-
* implicitly by `closeWriter()`.
|
|
712
|
-
*
|
|
713
|
-
* @returns true if closed
|
|
714
|
-
*
|
|
715
|
-
* @example
|
|
716
|
-
* ```typescript
|
|
717
|
-
* if (!writer.isClosed) {
|
|
718
|
-
* await writer.writeRow(row)
|
|
719
|
-
* }
|
|
720
|
-
* ```
|
|
721
|
-
*/
|
|
722
|
-
get isClosed(): boolean;
|
|
723
|
-
/**
|
|
724
|
-
* Writes a single row to the Parquet file.
|
|
725
|
-
*
|
|
726
|
-
* @description
|
|
727
|
-
* Validates the row against the schema and adds it to the current
|
|
728
|
-
* row group. Automatically flushes the row group when it reaches
|
|
729
|
-
* the configured size or memory limit.
|
|
730
|
-
*
|
|
731
|
-
* @param row - Object with column values keyed by column name
|
|
732
|
-
* @returns Promise that resolves when the row is written
|
|
733
|
-
*
|
|
734
|
-
* @throws {ParquetError} WRITER_CLOSED - If writer is closed
|
|
735
|
-
* @throws {ParquetError} MISSING_REQUIRED_FIELD - If required field is missing
|
|
736
|
-
* @throws {ParquetError} INVALID_FIELD_TYPE - If field value type doesn't match schema
|
|
737
|
-
*
|
|
738
|
-
* @example
|
|
739
|
-
* ```typescript
|
|
740
|
-
* await writer.writeRow({
|
|
741
|
-
* id: 123,
|
|
742
|
-
* name: 'Alice',
|
|
743
|
-
* active: true
|
|
744
|
-
* })
|
|
745
|
-
* ```
|
|
746
|
-
*/
|
|
747
|
-
writeRow(row: Record<string, unknown>): Promise<void>;
|
|
748
|
-
/**
|
|
749
|
-
* Writes multiple rows to the Parquet file.
|
|
750
|
-
*
|
|
751
|
-
* @description
|
|
752
|
-
* Convenience method that writes an array of rows sequentially.
|
|
753
|
-
* Each row is validated and may trigger row group flushes.
|
|
754
|
-
*
|
|
755
|
-
* @param rows - Array of row objects to write
|
|
756
|
-
* @returns Promise that resolves when all rows are written
|
|
757
|
-
*
|
|
758
|
-
* @throws {ParquetError} Any error from writeRow()
|
|
759
|
-
*
|
|
760
|
-
* @example
|
|
761
|
-
* ```typescript
|
|
762
|
-
* await writer.writeRows([
|
|
763
|
-
* { id: 1, name: 'Alice' },
|
|
764
|
-
* { id: 2, name: 'Bob' },
|
|
765
|
-
* { id: 3, name: 'Carol' }
|
|
766
|
-
* ])
|
|
767
|
-
* ```
|
|
768
|
-
*/
|
|
769
|
-
writeRows(rows: Record<string, unknown>[]): Promise<void>;
|
|
770
|
-
/**
|
|
771
|
-
* Manually flushes the current row group.
|
|
772
|
-
*
|
|
773
|
-
* @description
|
|
774
|
-
* Forces the current row group to be finalized and stored, even if
|
|
775
|
-
* it hasn't reached the size limit. Has no effect if the current
|
|
776
|
-
* row group is empty.
|
|
777
|
-
*
|
|
778
|
-
* @returns Promise that resolves when flush is complete
|
|
779
|
-
*
|
|
780
|
-
* @example
|
|
781
|
-
* ```typescript
|
|
782
|
-
* // Write some rows
|
|
783
|
-
* await writer.writeRows(batch1)
|
|
784
|
-
*
|
|
785
|
-
* // Force flush before writing next batch
|
|
786
|
-
* await writer.flushRowGroup()
|
|
787
|
-
*
|
|
788
|
-
* // Continue writing
|
|
789
|
-
* await writer.writeRows(batch2)
|
|
790
|
-
* ```
|
|
791
|
-
*/
|
|
792
|
-
flushRowGroup(): Promise<void>;
|
|
793
|
-
/**
|
|
794
|
-
* Gets the current row group's memory size.
|
|
795
|
-
*
|
|
796
|
-
* @description
|
|
797
|
-
* Returns the estimated memory consumption of the unflushed row group.
|
|
798
|
-
* Useful for monitoring memory usage during streaming writes.
|
|
799
|
-
*
|
|
800
|
-
* @returns Memory size in bytes
|
|
801
|
-
*
|
|
802
|
-
* @example
|
|
803
|
-
* ```typescript
|
|
804
|
-
* if (writer.currentRowGroupMemorySize() > 50 * 1024 * 1024) {
|
|
805
|
-
* console.log('Row group using significant memory')
|
|
806
|
-
* await writer.flushRowGroup()
|
|
807
|
-
* }
|
|
808
|
-
* ```
|
|
809
|
-
*/
|
|
810
|
-
currentRowGroupMemorySize(): number;
|
|
811
|
-
/**
|
|
812
|
-
* Gets the completed row groups.
|
|
813
|
-
*
|
|
814
|
-
* @description
|
|
815
|
-
* Returns a copy of the completed row group metadata array.
|
|
816
|
-
* Does not include the current unflushed row group.
|
|
817
|
-
*
|
|
818
|
-
* @returns Array of row group metadata
|
|
819
|
-
*
|
|
820
|
-
* @example
|
|
821
|
-
* ```typescript
|
|
822
|
-
* for (const rg of writer.getRowGroups()) {
|
|
823
|
-
* console.log(`Row group: ${rg.numRows} rows, ${rg.totalByteSize} bytes`)
|
|
824
|
-
* }
|
|
825
|
-
* ```
|
|
826
|
-
*/
|
|
827
|
-
getRowGroups(): RowGroup[];
|
|
828
|
-
/**
|
|
829
|
-
* Sets a custom key-value metadata entry.
|
|
830
|
-
*
|
|
831
|
-
* @description
|
|
832
|
-
* Adds custom metadata that will be stored in the Parquet file footer.
|
|
833
|
-
* Can be used for versioning, provenance, or application-specific data.
|
|
834
|
-
*
|
|
835
|
-
* @param key - Metadata key
|
|
836
|
-
* @param value - Metadata value
|
|
837
|
-
*
|
|
838
|
-
* @example
|
|
839
|
-
* ```typescript
|
|
840
|
-
* writer.setMetadata('created_by', 'gitdo-analytics')
|
|
841
|
-
* writer.setMetadata('schema_version', '2.0')
|
|
842
|
-
* writer.setMetadata('repository', 'github.com/org/repo')
|
|
843
|
-
* ```
|
|
844
|
-
*/
|
|
845
|
-
setMetadata(key: string, value: string): void;
|
|
846
|
-
/**
|
|
847
|
-
* Generates the Parquet file as a buffer.
|
|
848
|
-
*
|
|
849
|
-
* @description
|
|
850
|
-
* Finalizes the file by flushing any remaining rows and generating
|
|
851
|
-
* the complete Parquet file structure including header, row groups,
|
|
852
|
-
* and footer with metadata.
|
|
853
|
-
*
|
|
854
|
-
* @returns Promise resolving to the complete Parquet file as Uint8Array
|
|
855
|
-
*
|
|
856
|
-
* @example
|
|
857
|
-
* ```typescript
|
|
858
|
-
* const buffer = await writer.toBuffer()
|
|
859
|
-
* await fs.writeFile('data.parquet', buffer)
|
|
860
|
-
* ```
|
|
861
|
-
*/
|
|
862
|
-
toBuffer(): Promise<Uint8Array>;
|
|
863
|
-
/**
|
|
864
|
-
* Writes the Parquet file to an output stream.
|
|
865
|
-
*
|
|
866
|
-
* @description
|
|
867
|
-
* Generates the file and writes it to the provided output stream.
|
|
868
|
-
* Useful for streaming to files or network destinations.
|
|
869
|
-
*
|
|
870
|
-
* @param output - The output stream to write to
|
|
871
|
-
* @returns Promise that resolves when writing is complete
|
|
872
|
-
*
|
|
873
|
-
* @example
|
|
874
|
-
* ```typescript
|
|
875
|
-
* const output = new FileOutputStream('data.parquet')
|
|
876
|
-
* await writer.writeTo(output)
|
|
877
|
-
* output.close()
|
|
878
|
-
* ```
|
|
879
|
-
*/
|
|
880
|
-
writeTo(output: OutputStream): Promise<void>;
|
|
881
|
-
/**
|
|
882
|
-
* Resets the writer to its initial state.
|
|
883
|
-
*
|
|
884
|
-
* @description
|
|
885
|
-
* Clears all written data, row groups, and metadata. The schema
|
|
886
|
-
* and options remain unchanged. Useful for writing multiple files
|
|
887
|
-
* with the same configuration.
|
|
888
|
-
*
|
|
889
|
-
* @example
|
|
890
|
-
* ```typescript
|
|
891
|
-
* // Write first file
|
|
892
|
-
* await writer.writeRows(batch1)
|
|
893
|
-
* const file1 = await writer.toBuffer()
|
|
894
|
-
*
|
|
895
|
-
* // Reset and write second file
|
|
896
|
-
* writer.reset()
|
|
897
|
-
* await writer.writeRows(batch2)
|
|
898
|
-
* const file2 = await writer.toBuffer()
|
|
899
|
-
* ```
|
|
900
|
-
*/
|
|
901
|
-
reset(): void;
|
|
902
|
-
/**
|
|
903
|
-
* Validates a row against the schema.
|
|
904
|
-
*
|
|
905
|
-
* @param row - The row to validate
|
|
906
|
-
* @throws {ParquetError} If validation fails
|
|
907
|
-
* @private
|
|
908
|
-
*/
|
|
909
|
-
private _validateRow;
|
|
910
|
-
/**
|
|
911
|
-
* Validates a value matches the expected Parquet type.
|
|
912
|
-
*
|
|
913
|
-
* @param value - The value to validate
|
|
914
|
-
* @param type - The expected Parquet type
|
|
915
|
-
* @returns true if valid, false otherwise
|
|
916
|
-
* @private
|
|
917
|
-
*/
|
|
918
|
-
private _validateType;
|
|
919
|
-
/**
|
|
920
|
-
* Estimates the memory size of a row.
|
|
921
|
-
*
|
|
922
|
-
* @param row - The row to estimate
|
|
923
|
-
* @returns Estimated size in bytes
|
|
924
|
-
* @private
|
|
925
|
-
*/
|
|
926
|
-
private _estimateRowSize;
|
|
927
|
-
/**
|
|
928
|
-
* Builds a row group from internal representation.
|
|
929
|
-
*
|
|
930
|
-
* @param internal - The internal row group data
|
|
931
|
-
* @returns The row group metadata
|
|
932
|
-
* @private
|
|
933
|
-
*/
|
|
934
|
-
private _buildRowGroup;
|
|
935
|
-
/**
|
|
936
|
-
* Computes statistics for a column.
|
|
937
|
-
*
|
|
938
|
-
* @param values - The column values
|
|
939
|
-
* @param type - The column type
|
|
940
|
-
* @returns Column statistics
|
|
941
|
-
* @private
|
|
942
|
-
*/
|
|
943
|
-
private _computeStatistics;
|
|
944
|
-
/**
|
|
945
|
-
* Estimates the encoded size after compression.
|
|
946
|
-
*
|
|
947
|
-
* @param values - The column values
|
|
948
|
-
* @param type - The column type
|
|
949
|
-
* @param compression - The compression type
|
|
950
|
-
* @returns Estimated compressed size in bytes
|
|
951
|
-
* @private
|
|
952
|
-
*/
|
|
953
|
-
private _estimateEncodedSize;
|
|
954
|
-
/**
|
|
955
|
-
* Estimates the uncompressed size of column values.
|
|
956
|
-
*
|
|
957
|
-
* @param values - The column values
|
|
958
|
-
* @param type - The column type
|
|
959
|
-
* @returns Estimated uncompressed size in bytes
|
|
960
|
-
* @private
|
|
961
|
-
*/
|
|
962
|
-
private _estimateUncompressedSize;
|
|
963
|
-
/**
|
|
964
|
-
* Generates the complete Parquet file bytes.
|
|
965
|
-
*
|
|
966
|
-
* @returns The complete Parquet file as Uint8Array
|
|
967
|
-
* @private
|
|
968
|
-
*/
|
|
969
|
-
private _generateParquetBytes;
|
|
970
|
-
/**
|
|
971
|
-
* Simple compression simulation for non-gzip formats.
|
|
972
|
-
*
|
|
973
|
-
* @param data - Data to compress
|
|
974
|
-
* @param compression - Compression type
|
|
975
|
-
* @returns Compressed data
|
|
976
|
-
* @private
|
|
977
|
-
*/
|
|
978
|
-
private _simpleCompress;
|
|
979
|
-
}
|
|
980
|
-
/**
|
|
981
|
-
* Defines a Parquet schema.
|
|
982
|
-
*
|
|
983
|
-
* @description
|
|
984
|
-
* Creates a validated Parquet schema from field definitions. Validates that:
|
|
985
|
-
* - Schema has at least one field
|
|
986
|
-
* - All field names are non-empty
|
|
987
|
-
* - All field names are unique
|
|
988
|
-
*
|
|
989
|
-
* @param fields - Array of field definitions
|
|
990
|
-
* @param metadata - Optional schema-level metadata
|
|
991
|
-
* @returns Validated Parquet schema
|
|
992
|
-
*
|
|
993
|
-
* @throws {ParquetError} EMPTY_SCHEMA - If fields array is empty
|
|
994
|
-
* @throws {ParquetError} EMPTY_FIELD_NAME - If any field name is empty
|
|
995
|
-
* @throws {ParquetError} DUPLICATE_FIELD - If field names are not unique
|
|
996
|
-
*
|
|
997
|
-
* @example
|
|
998
|
-
* ```typescript
|
|
999
|
-
* const schema = defineSchema([
|
|
1000
|
-
* { name: 'id', type: ParquetFieldType.INT64, required: true },
|
|
1001
|
-
* { name: 'name', type: ParquetFieldType.STRING, required: true },
|
|
1002
|
-
* { name: 'age', type: ParquetFieldType.INT32, required: false },
|
|
1003
|
-
* { name: 'created_at', type: ParquetFieldType.TIMESTAMP_MILLIS, required: true }
|
|
1004
|
-
* ], {
|
|
1005
|
-
* version: '1.0',
|
|
1006
|
-
* description: 'User records'
|
|
1007
|
-
* })
|
|
1008
|
-
* ```
|
|
1009
|
-
*/
|
|
1010
|
-
export declare function defineSchema(fields: ParquetField[], metadata?: Record<string, string>): ParquetSchema;
|
|
1011
|
-
/**
|
|
1012
|
-
* Creates a Parquet writer.
|
|
1013
|
-
*
|
|
1014
|
-
* @description
|
|
1015
|
-
* Factory function to create a ParquetWriter with the specified schema
|
|
1016
|
-
* and options. Equivalent to `new ParquetWriter(schema, options)`.
|
|
1017
|
-
*
|
|
1018
|
-
* @param schema - The Parquet schema
|
|
1019
|
-
* @param options - Writer options
|
|
1020
|
-
* @returns A new ParquetWriter instance
|
|
1021
|
-
*
|
|
1022
|
-
* @example
|
|
1023
|
-
* ```typescript
|
|
1024
|
-
* const writer = createParquetWriter(schema, {
|
|
1025
|
-
* rowGroupSize: 10000,
|
|
1026
|
-
* compression: ParquetCompression.SNAPPY
|
|
1027
|
-
* })
|
|
1028
|
-
* ```
|
|
1029
|
-
*/
|
|
1030
|
-
export declare function createParquetWriter(schema: ParquetSchema, options?: ParquetWriteOptions): ParquetWriter;
|
|
1031
|
-
/**
|
|
1032
|
-
* Writes data directly to a Parquet file buffer.
|
|
1033
|
-
*
|
|
1034
|
-
* @description
|
|
1035
|
-
* Convenience function that creates a writer, writes all rows, and returns
|
|
1036
|
-
* the complete Parquet file. Useful for simple one-shot writes.
|
|
1037
|
-
*
|
|
1038
|
-
* @param schema - The Parquet schema
|
|
1039
|
-
* @param rows - Array of rows to write
|
|
1040
|
-
* @param options - Writer options
|
|
1041
|
-
* @returns Promise resolving to the complete Parquet file as Uint8Array
|
|
1042
|
-
*
|
|
1043
|
-
* @example
|
|
1044
|
-
* ```typescript
|
|
1045
|
-
* const buffer = await writeParquetFile(schema, [
|
|
1046
|
-
* { id: 1, name: 'Alice' },
|
|
1047
|
-
* { id: 2, name: 'Bob' }
|
|
1048
|
-
* ], {
|
|
1049
|
-
* compression: ParquetCompression.GZIP
|
|
1050
|
-
* })
|
|
1051
|
-
*
|
|
1052
|
-
* await fs.writeFile('data.parquet', buffer)
|
|
1053
|
-
* ```
|
|
1054
|
-
*/
|
|
1055
|
-
export declare function writeParquetFile(schema: ParquetSchema, rows: Record<string, unknown>[], options?: ParquetWriteOptions): Promise<Uint8Array>;
|
|
1056
|
-
/**
|
|
1057
|
-
* Closes a writer and returns the final buffer.
|
|
1058
|
-
*
|
|
1059
|
-
* @description
|
|
1060
|
-
* Generates the final Parquet file buffer and marks the writer as closed.
|
|
1061
|
-
* The writer cannot be used for further writes after calling this function.
|
|
1062
|
-
*
|
|
1063
|
-
* @param writer - The ParquetWriter to close
|
|
1064
|
-
* @returns Promise resolving to the complete Parquet file as Uint8Array
|
|
1065
|
-
*
|
|
1066
|
-
* @example
|
|
1067
|
-
* ```typescript
|
|
1068
|
-
* await writer.writeRows(data)
|
|
1069
|
-
* const buffer = await closeWriter(writer)
|
|
1070
|
-
* console.log(writer.isClosed) // true
|
|
1071
|
-
* ```
|
|
1072
|
-
*/
|
|
1073
|
-
export declare function closeWriter(writer: ParquetWriter): Promise<Uint8Array>;
|
|
1074
|
-
/**
|
|
1075
|
-
* Adds a row group to the writer.
|
|
1076
|
-
*
|
|
1077
|
-
* @description
|
|
1078
|
-
* Writes multiple rows and then flushes them as a single row group.
|
|
1079
|
-
* Useful when you want explicit control over row group boundaries.
|
|
1080
|
-
*
|
|
1081
|
-
* @param writer - The ParquetWriter to use
|
|
1082
|
-
* @param rows - Array of rows for this row group
|
|
1083
|
-
* @returns Promise that resolves when the row group is written
|
|
1084
|
-
*
|
|
1085
|
-
* @example
|
|
1086
|
-
* ```typescript
|
|
1087
|
-
* // Add explicit row groups
|
|
1088
|
-
* await addRowGroup(writer, batch1) // First row group
|
|
1089
|
-
* await addRowGroup(writer, batch2) // Second row group
|
|
1090
|
-
* ```
|
|
1091
|
-
*/
|
|
1092
|
-
export declare function addRowGroup(writer: ParquetWriter, rows: Record<string, unknown>[]): Promise<void>;
|
|
1093
|
-
/**
|
|
1094
|
-
* Gets metadata from a Parquet file buffer.
|
|
1095
|
-
*
|
|
1096
|
-
* @description
|
|
1097
|
-
* Parses a Parquet file buffer and extracts the metadata including
|
|
1098
|
-
* schema, row groups, compression settings, and custom metadata.
|
|
1099
|
-
*
|
|
1100
|
-
* @param bytes - The Parquet file buffer
|
|
1101
|
-
* @returns The parsed metadata
|
|
1102
|
-
*
|
|
1103
|
-
* @throws {ParquetError} INVALID_MAGIC - If file doesn't have valid Parquet magic bytes
|
|
1104
|
-
*
|
|
1105
|
-
* @example
|
|
1106
|
-
* ```typescript
|
|
1107
|
-
* const buffer = await fs.readFile('data.parquet')
|
|
1108
|
-
* const metadata = getMetadata(buffer)
|
|
1109
|
-
*
|
|
1110
|
-
* console.log(`Rows: ${metadata.numRows}`)
|
|
1111
|
-
* console.log(`Schema: ${metadata.schema.fields.map(f => f.name).join(', ')}`)
|
|
1112
|
-
* console.log(`Row groups: ${metadata.rowGroups.length}`)
|
|
1113
|
-
*
|
|
1114
|
-
* for (const rg of metadata.rowGroups) {
|
|
1115
|
-
* console.log(` - ${rg.numRows} rows, ${rg.totalByteSize} bytes`)
|
|
1116
|
-
* }
|
|
1117
|
-
* ```
|
|
1118
|
-
*/
|
|
1119
|
-
export declare function getMetadata(bytes: Uint8Array): ParquetMetadata;
|
|
1120
|
-
/**
|
|
1121
|
-
* Sets the compression type for a writer.
|
|
1122
|
-
*
|
|
1123
|
-
* @description
|
|
1124
|
-
* Updates the default compression algorithm for a writer. Affects all
|
|
1125
|
-
* subsequently written data. Columns with explicit compression settings
|
|
1126
|
-
* in columnCompression are not affected.
|
|
1127
|
-
*
|
|
1128
|
-
* @param writer - The ParquetWriter to update
|
|
1129
|
-
* @param compression - The new compression type
|
|
1130
|
-
*
|
|
1131
|
-
* @example
|
|
1132
|
-
* ```typescript
|
|
1133
|
-
* const writer = createParquetWriter(schema)
|
|
1134
|
-
*
|
|
1135
|
-
* // Write some rows with SNAPPY (default)
|
|
1136
|
-
* await writer.writeRows(batch1)
|
|
1137
|
-
* await writer.flushRowGroup()
|
|
1138
|
-
*
|
|
1139
|
-
* // Switch to GZIP for remaining data
|
|
1140
|
-
* setCompression(writer, ParquetCompression.GZIP)
|
|
1141
|
-
* await writer.writeRows(batch2)
|
|
1142
|
-
* ```
|
|
1143
|
-
*/
|
|
1144
|
-
export declare function setCompression(writer: ParquetWriter, compression: ParquetCompression): void;
|
|
1145
|
-
//# sourceMappingURL=parquet-writer.d.ts.map
|