gitx.do 0.0.1 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (237) hide show
  1. package/dist/cli/commands/blame.d.ts +259 -0
  2. package/dist/cli/commands/blame.d.ts.map +1 -0
  3. package/dist/cli/commands/blame.js +609 -0
  4. package/dist/cli/commands/blame.js.map +1 -0
  5. package/dist/cli/commands/branch.d.ts +249 -0
  6. package/dist/cli/commands/branch.d.ts.map +1 -0
  7. package/dist/cli/commands/branch.js +693 -0
  8. package/dist/cli/commands/branch.js.map +1 -0
  9. package/dist/cli/commands/commit.d.ts +182 -0
  10. package/dist/cli/commands/commit.d.ts.map +1 -0
  11. package/dist/cli/commands/commit.js +437 -0
  12. package/dist/cli/commands/commit.js.map +1 -0
  13. package/dist/cli/commands/diff.d.ts +464 -0
  14. package/dist/cli/commands/diff.d.ts.map +1 -0
  15. package/dist/cli/commands/diff.js +958 -0
  16. package/dist/cli/commands/diff.js.map +1 -0
  17. package/dist/cli/commands/log.d.ts +239 -0
  18. package/dist/cli/commands/log.d.ts.map +1 -0
  19. package/dist/cli/commands/log.js +535 -0
  20. package/dist/cli/commands/log.js.map +1 -0
  21. package/dist/cli/commands/review.d.ts +457 -0
  22. package/dist/cli/commands/review.d.ts.map +1 -0
  23. package/dist/cli/commands/review.js +533 -0
  24. package/dist/cli/commands/review.js.map +1 -0
  25. package/dist/cli/commands/status.d.ts +269 -0
  26. package/dist/cli/commands/status.d.ts.map +1 -0
  27. package/dist/cli/commands/status.js +493 -0
  28. package/dist/cli/commands/status.js.map +1 -0
  29. package/dist/cli/commands/web.d.ts +199 -0
  30. package/dist/cli/commands/web.d.ts.map +1 -0
  31. package/dist/cli/commands/web.js +696 -0
  32. package/dist/cli/commands/web.js.map +1 -0
  33. package/dist/cli/fs-adapter.d.ts +656 -0
  34. package/dist/cli/fs-adapter.d.ts.map +1 -0
  35. package/dist/cli/fs-adapter.js +1179 -0
  36. package/dist/cli/fs-adapter.js.map +1 -0
  37. package/dist/cli/index.d.ts +387 -0
  38. package/dist/cli/index.d.ts.map +1 -0
  39. package/dist/cli/index.js +523 -0
  40. package/dist/cli/index.js.map +1 -0
  41. package/dist/cli/ui/components/DiffView.d.ts +7 -0
  42. package/dist/cli/ui/components/DiffView.d.ts.map +1 -0
  43. package/dist/cli/ui/components/DiffView.js +11 -0
  44. package/dist/cli/ui/components/DiffView.js.map +1 -0
  45. package/dist/cli/ui/components/ErrorDisplay.d.ts +6 -0
  46. package/dist/cli/ui/components/ErrorDisplay.d.ts.map +1 -0
  47. package/dist/cli/ui/components/ErrorDisplay.js +11 -0
  48. package/dist/cli/ui/components/ErrorDisplay.js.map +1 -0
  49. package/dist/cli/ui/components/FuzzySearch.d.ts +9 -0
  50. package/dist/cli/ui/components/FuzzySearch.d.ts.map +1 -0
  51. package/dist/cli/ui/components/FuzzySearch.js +12 -0
  52. package/dist/cli/ui/components/FuzzySearch.js.map +1 -0
  53. package/dist/cli/ui/components/LoadingSpinner.d.ts +6 -0
  54. package/dist/cli/ui/components/LoadingSpinner.d.ts.map +1 -0
  55. package/dist/cli/ui/components/LoadingSpinner.js +10 -0
  56. package/dist/cli/ui/components/LoadingSpinner.js.map +1 -0
  57. package/dist/cli/ui/components/NavigationList.d.ts +9 -0
  58. package/dist/cli/ui/components/NavigationList.d.ts.map +1 -0
  59. package/dist/cli/ui/components/NavigationList.js +11 -0
  60. package/dist/cli/ui/components/NavigationList.js.map +1 -0
  61. package/dist/cli/ui/components/ScrollableContent.d.ts +8 -0
  62. package/dist/cli/ui/components/ScrollableContent.d.ts.map +1 -0
  63. package/dist/cli/ui/components/ScrollableContent.js +11 -0
  64. package/dist/cli/ui/components/ScrollableContent.js.map +1 -0
  65. package/dist/cli/ui/components/index.d.ts +7 -0
  66. package/dist/cli/ui/components/index.d.ts.map +1 -0
  67. package/dist/cli/ui/components/index.js +9 -0
  68. package/dist/cli/ui/components/index.js.map +1 -0
  69. package/dist/cli/ui/terminal-ui.d.ts +52 -0
  70. package/dist/cli/ui/terminal-ui.d.ts.map +1 -0
  71. package/dist/cli/ui/terminal-ui.js +121 -0
  72. package/dist/cli/ui/terminal-ui.js.map +1 -0
  73. package/dist/durable-object/object-store.d.ts +401 -23
  74. package/dist/durable-object/object-store.d.ts.map +1 -1
  75. package/dist/durable-object/object-store.js +414 -25
  76. package/dist/durable-object/object-store.js.map +1 -1
  77. package/dist/durable-object/schema.d.ts +188 -0
  78. package/dist/durable-object/schema.d.ts.map +1 -1
  79. package/dist/durable-object/schema.js +160 -0
  80. package/dist/durable-object/schema.js.map +1 -1
  81. package/dist/durable-object/wal.d.ts +336 -31
  82. package/dist/durable-object/wal.d.ts.map +1 -1
  83. package/dist/durable-object/wal.js +272 -27
  84. package/dist/durable-object/wal.js.map +1 -1
  85. package/dist/index.d.ts +379 -3
  86. package/dist/index.d.ts.map +1 -1
  87. package/dist/index.js +379 -7
  88. package/dist/index.js.map +1 -1
  89. package/dist/mcp/adapter.d.ts +579 -38
  90. package/dist/mcp/adapter.d.ts.map +1 -1
  91. package/dist/mcp/adapter.js +426 -33
  92. package/dist/mcp/adapter.js.map +1 -1
  93. package/dist/mcp/sandbox.d.ts +532 -29
  94. package/dist/mcp/sandbox.d.ts.map +1 -1
  95. package/dist/mcp/sandbox.js +389 -22
  96. package/dist/mcp/sandbox.js.map +1 -1
  97. package/dist/mcp/sdk-adapter.d.ts +478 -56
  98. package/dist/mcp/sdk-adapter.d.ts.map +1 -1
  99. package/dist/mcp/sdk-adapter.js +346 -44
  100. package/dist/mcp/sdk-adapter.js.map +1 -1
  101. package/dist/mcp/tools.d.ts +445 -30
  102. package/dist/mcp/tools.d.ts.map +1 -1
  103. package/dist/mcp/tools.js +363 -33
  104. package/dist/mcp/tools.js.map +1 -1
  105. package/dist/ops/blame.d.ts +424 -21
  106. package/dist/ops/blame.d.ts.map +1 -1
  107. package/dist/ops/blame.js +303 -20
  108. package/dist/ops/blame.js.map +1 -1
  109. package/dist/ops/branch.d.ts +583 -32
  110. package/dist/ops/branch.d.ts.map +1 -1
  111. package/dist/ops/branch.js +365 -23
  112. package/dist/ops/branch.js.map +1 -1
  113. package/dist/ops/commit-traversal.d.ts +164 -24
  114. package/dist/ops/commit-traversal.d.ts.map +1 -1
  115. package/dist/ops/commit-traversal.js +68 -2
  116. package/dist/ops/commit-traversal.js.map +1 -1
  117. package/dist/ops/commit.d.ts +387 -53
  118. package/dist/ops/commit.d.ts.map +1 -1
  119. package/dist/ops/commit.js +249 -29
  120. package/dist/ops/commit.js.map +1 -1
  121. package/dist/ops/merge-base.d.ts +195 -21
  122. package/dist/ops/merge-base.d.ts.map +1 -1
  123. package/dist/ops/merge-base.js +122 -12
  124. package/dist/ops/merge-base.js.map +1 -1
  125. package/dist/ops/merge.d.ts +600 -130
  126. package/dist/ops/merge.d.ts.map +1 -1
  127. package/dist/ops/merge.js +408 -60
  128. package/dist/ops/merge.js.map +1 -1
  129. package/dist/ops/tag.d.ts +67 -2
  130. package/dist/ops/tag.d.ts.map +1 -1
  131. package/dist/ops/tag.js +42 -1
  132. package/dist/ops/tag.js.map +1 -1
  133. package/dist/ops/tree-builder.d.ts +102 -6
  134. package/dist/ops/tree-builder.d.ts.map +1 -1
  135. package/dist/ops/tree-builder.js +30 -5
  136. package/dist/ops/tree-builder.js.map +1 -1
  137. package/dist/ops/tree-diff.d.ts +50 -2
  138. package/dist/ops/tree-diff.d.ts.map +1 -1
  139. package/dist/ops/tree-diff.js +50 -2
  140. package/dist/ops/tree-diff.js.map +1 -1
  141. package/dist/pack/delta.d.ts +211 -39
  142. package/dist/pack/delta.d.ts.map +1 -1
  143. package/dist/pack/delta.js +232 -46
  144. package/dist/pack/delta.js.map +1 -1
  145. package/dist/pack/format.d.ts +390 -28
  146. package/dist/pack/format.d.ts.map +1 -1
  147. package/dist/pack/format.js +344 -33
  148. package/dist/pack/format.js.map +1 -1
  149. package/dist/pack/full-generation.d.ts +313 -28
  150. package/dist/pack/full-generation.d.ts.map +1 -1
  151. package/dist/pack/full-generation.js +238 -19
  152. package/dist/pack/full-generation.js.map +1 -1
  153. package/dist/pack/generation.d.ts +346 -23
  154. package/dist/pack/generation.d.ts.map +1 -1
  155. package/dist/pack/generation.js +269 -21
  156. package/dist/pack/generation.js.map +1 -1
  157. package/dist/pack/index.d.ts +407 -86
  158. package/dist/pack/index.d.ts.map +1 -1
  159. package/dist/pack/index.js +351 -70
  160. package/dist/pack/index.js.map +1 -1
  161. package/dist/refs/branch.d.ts +517 -71
  162. package/dist/refs/branch.d.ts.map +1 -1
  163. package/dist/refs/branch.js +410 -26
  164. package/dist/refs/branch.js.map +1 -1
  165. package/dist/refs/storage.d.ts +610 -57
  166. package/dist/refs/storage.d.ts.map +1 -1
  167. package/dist/refs/storage.js +481 -29
  168. package/dist/refs/storage.js.map +1 -1
  169. package/dist/refs/tag.d.ts +677 -67
  170. package/dist/refs/tag.d.ts.map +1 -1
  171. package/dist/refs/tag.js +497 -30
  172. package/dist/refs/tag.js.map +1 -1
  173. package/dist/storage/lru-cache.d.ts +556 -53
  174. package/dist/storage/lru-cache.d.ts.map +1 -1
  175. package/dist/storage/lru-cache.js +439 -36
  176. package/dist/storage/lru-cache.js.map +1 -1
  177. package/dist/storage/object-index.d.ts +483 -38
  178. package/dist/storage/object-index.d.ts.map +1 -1
  179. package/dist/storage/object-index.js +388 -22
  180. package/dist/storage/object-index.js.map +1 -1
  181. package/dist/storage/r2-pack.d.ts +957 -94
  182. package/dist/storage/r2-pack.d.ts.map +1 -1
  183. package/dist/storage/r2-pack.js +756 -48
  184. package/dist/storage/r2-pack.js.map +1 -1
  185. package/dist/tiered/cdc-pipeline.d.ts +1610 -38
  186. package/dist/tiered/cdc-pipeline.d.ts.map +1 -1
  187. package/dist/tiered/cdc-pipeline.js +1131 -22
  188. package/dist/tiered/cdc-pipeline.js.map +1 -1
  189. package/dist/tiered/migration.d.ts +903 -41
  190. package/dist/tiered/migration.d.ts.map +1 -1
  191. package/dist/tiered/migration.js +646 -24
  192. package/dist/tiered/migration.js.map +1 -1
  193. package/dist/tiered/parquet-writer.d.ts +944 -47
  194. package/dist/tiered/parquet-writer.d.ts.map +1 -1
  195. package/dist/tiered/parquet-writer.js +667 -39
  196. package/dist/tiered/parquet-writer.js.map +1 -1
  197. package/dist/tiered/read-path.d.ts +728 -34
  198. package/dist/tiered/read-path.d.ts.map +1 -1
  199. package/dist/tiered/read-path.js +310 -27
  200. package/dist/tiered/read-path.js.map +1 -1
  201. package/dist/types/objects.d.ts +457 -0
  202. package/dist/types/objects.d.ts.map +1 -1
  203. package/dist/types/objects.js +305 -4
  204. package/dist/types/objects.js.map +1 -1
  205. package/dist/types/storage.d.ts +407 -35
  206. package/dist/types/storage.d.ts.map +1 -1
  207. package/dist/types/storage.js +27 -3
  208. package/dist/types/storage.js.map +1 -1
  209. package/dist/utils/hash.d.ts +133 -12
  210. package/dist/utils/hash.d.ts.map +1 -1
  211. package/dist/utils/hash.js +133 -12
  212. package/dist/utils/hash.js.map +1 -1
  213. package/dist/utils/sha1.d.ts +102 -9
  214. package/dist/utils/sha1.d.ts.map +1 -1
  215. package/dist/utils/sha1.js +114 -11
  216. package/dist/utils/sha1.js.map +1 -1
  217. package/dist/wire/capabilities.d.ts +896 -88
  218. package/dist/wire/capabilities.d.ts.map +1 -1
  219. package/dist/wire/capabilities.js +566 -62
  220. package/dist/wire/capabilities.js.map +1 -1
  221. package/dist/wire/pkt-line.d.ts +293 -15
  222. package/dist/wire/pkt-line.d.ts.map +1 -1
  223. package/dist/wire/pkt-line.js +251 -15
  224. package/dist/wire/pkt-line.js.map +1 -1
  225. package/dist/wire/receive-pack.d.ts +814 -64
  226. package/dist/wire/receive-pack.d.ts.map +1 -1
  227. package/dist/wire/receive-pack.js +542 -41
  228. package/dist/wire/receive-pack.js.map +1 -1
  229. package/dist/wire/smart-http.d.ts +575 -97
  230. package/dist/wire/smart-http.d.ts.map +1 -1
  231. package/dist/wire/smart-http.js +337 -46
  232. package/dist/wire/smart-http.js.map +1 -1
  233. package/dist/wire/upload-pack.d.ts +492 -98
  234. package/dist/wire/upload-pack.d.ts.map +1 -1
  235. package/dist/wire/upload-pack.js +347 -59
  236. package/dist/wire/upload-pack.js.map +1 -1
  237. package/package.json +10 -2
@@ -1,248 +1,1145 @@
1
1
  /**
2
- * Parquet Writer for Git Analytics
2
+ * @fileoverview Parquet Writer for Git Analytics
3
3
  *
4
- * Provides functionality to write git analytics data to Parquet format:
5
- * - Schema definition with various field types
6
- * - Compression support (SNAPPY, GZIP, ZSTD, LZ4, UNCOMPRESSED)
7
- * - Row group management
8
- * - Metadata handling with statistics
4
+ * @description
5
+ * Provides functionality to write git analytics data to Parquet format, a
6
+ * columnar storage format optimized for analytical queries. This module
7
+ * enables efficient storage and querying of Git repository data.
9
8
  *
10
- * gitdo-6rz: Parquet writer implementation
9
+ * **Key Features:**
10
+ * - Schema definition with various field types (STRING, INT32, INT64, etc.)
11
+ * - Multiple compression algorithms (SNAPPY, GZIP, ZSTD, LZ4, UNCOMPRESSED)
12
+ * - Row group management for efficient columnar storage
13
+ * - Automatic and manual row group flushing
14
+ * - Column-level statistics generation (min, max, null count)
15
+ * - Custom key-value metadata support
16
+ * - Memory-efficient streaming writes
17
+ *
18
+ * **Parquet Format:**
19
+ * The generated files follow the Parquet format with:
20
+ * - Magic bytes "PAR1" at start and end
21
+ * - Row group data organized by columns
22
+ * - Footer metadata containing schema and statistics
23
+ *
24
+ * @example
25
+ * ```typescript
26
+ * // Define schema for commit analytics
27
+ * const schema = defineSchema([
28
+ * { name: 'commit_sha', type: ParquetFieldType.STRING, required: true },
29
+ * { name: 'author', type: ParquetFieldType.STRING, required: true },
30
+ * { name: 'timestamp', type: ParquetFieldType.TIMESTAMP_MILLIS, required: true },
31
+ * { name: 'file_count', type: ParquetFieldType.INT32, required: false }
32
+ * ])
33
+ *
34
+ * // Create writer with options
35
+ * const writer = createParquetWriter(schema, {
36
+ * rowGroupSize: 10000,
37
+ * compression: ParquetCompression.SNAPPY,
38
+ * enableStatistics: true
39
+ * })
40
+ *
41
+ * // Write data
42
+ * await writer.writeRows([
43
+ * { commit_sha: 'abc123...', author: 'alice', timestamp: Date.now(), file_count: 5 },
44
+ * { commit_sha: 'def456...', author: 'bob', timestamp: Date.now(), file_count: 3 }
45
+ * ])
46
+ *
47
+ * // Generate the Parquet file
48
+ * const buffer = await writer.toBuffer()
49
+ * ```
50
+ *
51
+ * @module tiered/parquet-writer
52
+ * @see {@link ParquetWriter} - Main writer class
53
+ * @see {@link defineSchema} - Schema definition helper
11
54
  */
12
55
  /**
13
- * Supported Parquet field types
56
+ * Supported Parquet field types.
57
+ *
58
+ * @description
59
+ * Defines the data types that can be used for fields in a Parquet schema.
60
+ * Each type maps to an appropriate physical and logical Parquet type.
61
+ *
62
+ * @example
63
+ * ```typescript
64
+ * const field: ParquetField = {
65
+ * name: 'count',
66
+ * type: ParquetFieldType.INT64,
67
+ * required: true
68
+ * }
69
+ * ```
70
+ *
71
+ * @enum {string}
14
72
  */
15
73
  export declare enum ParquetFieldType {
74
+ /**
75
+ * UTF-8 encoded string.
76
+ * Maps to Parquet BYTE_ARRAY with UTF8 logical type.
77
+ */
16
78
  STRING = "STRING",
79
+ /**
80
+ * 32-bit signed integer.
81
+ * Maps to Parquet INT32 physical type.
82
+ */
17
83
  INT32 = "INT32",
84
+ /**
85
+ * 64-bit signed integer.
86
+ * Maps to Parquet INT64 physical type.
87
+ */
18
88
  INT64 = "INT64",
89
+ /**
90
+ * Boolean value (true/false).
91
+ * Maps to Parquet BOOLEAN physical type.
92
+ */
19
93
  BOOLEAN = "BOOLEAN",
94
+ /**
95
+ * 32-bit IEEE 754 floating point.
96
+ * Maps to Parquet FLOAT physical type.
97
+ */
20
98
  FLOAT = "FLOAT",
99
+ /**
100
+ * 64-bit IEEE 754 floating point.
101
+ * Maps to Parquet DOUBLE physical type.
102
+ */
21
103
  DOUBLE = "DOUBLE",
104
+ /**
105
+ * Raw binary data.
106
+ * Maps to Parquet BYTE_ARRAY physical type.
107
+ */
22
108
  BINARY = "BINARY",
109
+ /**
110
+ * Timestamp with millisecond precision.
111
+ * Maps to Parquet INT64 with TIMESTAMP_MILLIS logical type.
112
+ */
23
113
  TIMESTAMP_MILLIS = "TIMESTAMP_MILLIS",
114
+ /**
115
+ * Timestamp with microsecond precision.
116
+ * Maps to Parquet INT64 with TIMESTAMP_MICROS logical type.
117
+ */
24
118
  TIMESTAMP_MICROS = "TIMESTAMP_MICROS"
25
119
  }
26
120
  /**
27
- * Supported compression types
121
+ * Supported compression types for Parquet data.
122
+ *
123
+ * @description
124
+ * Different compression algorithms offer trade-offs between compression
125
+ * ratio, compression speed, and decompression speed.
126
+ *
127
+ * **Comparison:**
128
+ * - SNAPPY: Fast compression/decompression, moderate ratio (default)
129
+ * - GZIP: Higher ratio, slower compression, fast decompression
130
+ * - ZSTD: Best ratio, good speed, requires more memory
131
+ * - LZ4: Fastest, lower ratio
132
+ * - UNCOMPRESSED: No compression overhead
133
+ *
134
+ * @example
135
+ * ```typescript
136
+ * const writer = createParquetWriter(schema, {
137
+ * compression: ParquetCompression.ZSTD
138
+ * })
139
+ * ```
140
+ *
141
+ * @enum {string}
28
142
  */
29
143
  export declare enum ParquetCompression {
144
+ /**
145
+ * No compression applied.
146
+ * Fastest writes, largest file size.
147
+ */
30
148
  UNCOMPRESSED = "UNCOMPRESSED",
149
+ /**
150
+ * Snappy compression (default).
151
+ * Good balance of speed and compression ratio.
152
+ */
31
153
  SNAPPY = "SNAPPY",
154
+ /**
155
+ * GZIP compression.
156
+ * Higher compression ratio, slower compression.
157
+ */
32
158
  GZIP = "GZIP",
159
+ /**
160
+ * Zstandard compression.
161
+ * Best compression ratio with good speed.
162
+ */
33
163
  ZSTD = "ZSTD",
164
+ /**
165
+ * LZ4 compression.
166
+ * Fastest compression, lower ratio.
167
+ */
34
168
  LZ4 = "LZ4"
35
169
  }
36
170
  /**
37
- * Field definition for schema
171
+ * Field definition for a Parquet schema.
172
+ *
173
+ * @description
174
+ * Defines a single column in the Parquet schema, including its name,
175
+ * data type, nullability, and optional metadata.
176
+ *
177
+ * @example
178
+ * ```typescript
179
+ * const nameField: ParquetField = {
180
+ * name: 'user_name',
181
+ * type: ParquetFieldType.STRING,
182
+ * required: true,
183
+ * metadata: { description: 'The user display name' }
184
+ * }
185
+ *
186
+ * const ageField: ParquetField = {
187
+ * name: 'age',
188
+ * type: ParquetFieldType.INT32,
189
+ * required: false // nullable
190
+ * }
191
+ * ```
192
+ *
193
+ * @interface ParquetField
38
194
  */
39
195
  export interface ParquetField {
196
+ /**
197
+ * Column name.
198
+ * Must be unique within the schema and non-empty.
199
+ */
40
200
  name: string;
201
+ /**
202
+ * Data type of the column.
203
+ *
204
+ * @see {@link ParquetFieldType}
205
+ */
41
206
  type: ParquetFieldType;
207
+ /**
208
+ * Whether the field is required (non-nullable).
209
+ * If true, null values will cause validation errors.
210
+ */
42
211
  required: boolean;
212
+ /**
213
+ * Optional key-value metadata for the field.
214
+ * Can be used for descriptions, units, etc.
215
+ */
43
216
  metadata?: Record<string, string>;
44
217
  }
45
218
  /**
46
- * Parquet schema definition
219
+ * Parquet schema definition.
220
+ *
221
+ * @description
222
+ * Defines the complete schema for a Parquet file, including all fields
223
+ * and optional schema-level metadata.
224
+ *
225
+ * @example
226
+ * ```typescript
227
+ * const schema: ParquetSchema = {
228
+ * fields: [
229
+ * { name: 'id', type: ParquetFieldType.INT64, required: true },
230
+ * { name: 'name', type: ParquetFieldType.STRING, required: true }
231
+ * ],
232
+ * metadata: {
233
+ * created_by: 'gitdo',
234
+ * version: '1.0'
235
+ * }
236
+ * }
237
+ * ```
238
+ *
239
+ * @interface ParquetSchema
47
240
  */
48
241
  export interface ParquetSchema {
242
+ /**
243
+ * Array of field definitions for all columns.
244
+ * Order determines column order in the file.
245
+ */
49
246
  fields: ParquetField[];
247
+ /**
248
+ * Optional schema-level metadata.
249
+ * Stored in the Parquet file footer.
250
+ */
50
251
  metadata?: Record<string, string>;
51
252
  }
52
253
  /**
53
- * Options for creating a Parquet writer
254
+ * Options for creating a Parquet writer.
255
+ *
256
+ * @description
257
+ * Configuration options that control how the Parquet file is written,
258
+ * including row group sizing, compression, and statistics generation.
259
+ *
260
+ * @example
261
+ * ```typescript
262
+ * const options: ParquetWriteOptions = {
263
+ * rowGroupSize: 50000, // 50K rows per group
264
+ * rowGroupMemoryLimit: 64 * 1024 * 1024, // 64MB memory limit
265
+ * compression: ParquetCompression.ZSTD,
266
+ * columnCompression: {
267
+ * 'binary_data': ParquetCompression.LZ4 // Fast for binary
268
+ * },
269
+ * enableStatistics: true,
270
+ * sortBy: ['timestamp'],
271
+ * partitionColumns: ['date']
272
+ * }
273
+ * ```
274
+ *
275
+ * @interface ParquetWriteOptions
54
276
  */
55
277
  export interface ParquetWriteOptions {
278
+ /**
279
+ * Maximum number of rows per row group.
280
+ * Smaller groups = more granular reads, larger groups = better compression.
281
+ *
282
+ * @default 65536
283
+ */
56
284
  rowGroupSize?: number;
285
+ /**
286
+ * Maximum memory size in bytes for a row group.
287
+ * Triggers flush when reached, regardless of row count.
288
+ */
57
289
  rowGroupMemoryLimit?: number;
290
+ /**
291
+ * Default compression algorithm for all columns.
292
+ *
293
+ * @default ParquetCompression.SNAPPY
294
+ */
58
295
  compression?: ParquetCompression;
296
+ /**
297
+ * Per-column compression overrides.
298
+ * Keys are column names, values are compression types.
299
+ */
59
300
  columnCompression?: Record<string, ParquetCompression>;
301
+ /**
302
+ * Whether to compute and store column statistics.
303
+ * Enables predicate pushdown during queries.
304
+ *
305
+ * @default false
306
+ */
60
307
  enableStatistics?: boolean;
308
+ /**
309
+ * Columns to sort data by within each row group.
310
+ * Improves query performance for sorted access patterns.
311
+ */
61
312
  sortBy?: string[];
313
+ /**
314
+ * Columns used for partitioning.
315
+ * Informational metadata for partitioned datasets.
316
+ */
62
317
  partitionColumns?: string[];
63
318
  }
64
319
  /**
65
- * Column statistics
320
+ * Statistics for a single column in a row group.
321
+ *
322
+ * @description
323
+ * Column statistics enable query engines to skip row groups that don't
324
+ * contain relevant data (predicate pushdown).
325
+ *
326
+ * @example
327
+ * ```typescript
328
+ * const stats: ColumnStatistics = {
329
+ * min: 100,
330
+ * max: 999,
331
+ * nullCount: 5,
332
+ * distinctCount: 850
333
+ * }
334
+ * ```
335
+ *
336
+ * @interface ColumnStatistics
66
337
  */
67
338
  export interface ColumnStatistics {
339
+ /**
340
+ * Minimum value in the column.
341
+ * Type depends on column type.
342
+ */
68
343
  min?: number | string | boolean;
344
+ /**
345
+ * Maximum value in the column.
346
+ * Type depends on column type.
347
+ */
69
348
  max?: number | string | boolean;
349
+ /**
350
+ * Number of null values in the column.
351
+ */
70
352
  nullCount?: number;
353
+ /**
354
+ * Approximate distinct value count.
355
+ * May not be exact for large datasets.
356
+ */
71
357
  distinctCount?: number;
72
358
  }
73
359
  /**
74
- * Column metadata in a row group
360
+ * Metadata for a column chunk within a row group.
361
+ *
362
+ * @description
363
+ * Contains information about a single column's data within a row group,
364
+ * including compression, sizes, and statistics.
365
+ *
366
+ * @interface ColumnChunkMetadata
75
367
  */
76
368
  export interface ColumnChunkMetadata {
369
+ /**
370
+ * Column name.
371
+ */
77
372
  column: string;
373
+ /**
374
+ * Data type of the column.
375
+ */
78
376
  type: ParquetFieldType;
377
+ /**
378
+ * Compression used for this column chunk.
379
+ */
79
380
  compression: ParquetCompression;
381
+ /**
382
+ * Size in bytes after compression.
383
+ */
80
384
  encodedSize: number;
385
+ /**
386
+ * Size in bytes before compression.
387
+ */
81
388
  uncompressedSize: number;
389
+ /**
390
+ * Column statistics if statistics are enabled.
391
+ */
82
392
  statistics?: ColumnStatistics;
83
393
  }
84
394
  /**
85
- * Row group representation
395
+ * Row group representation in the Parquet file.
396
+ *
397
+ * @description
398
+ * A row group is a horizontal partition of the data containing all columns
399
+ * for a subset of rows. Row groups enable parallel processing and predicate
400
+ * pushdown optimizations.
401
+ *
402
+ * @interface RowGroup
86
403
  */
87
404
  export interface RowGroup {
405
+ /**
406
+ * Number of rows in this row group.
407
+ */
88
408
  numRows: number;
409
+ /**
410
+ * Total compressed size in bytes.
411
+ */
89
412
  totalByteSize: number;
413
+ /**
414
+ * Metadata for each column chunk.
415
+ */
90
416
  columns: ColumnChunkMetadata[];
91
417
  }
92
418
  /**
93
- * Parquet file metadata
419
+ * Complete metadata for a Parquet file.
420
+ *
421
+ * @description
422
+ * Contains all metadata stored in the Parquet file footer, including
423
+ * schema, row groups, and statistics. Used when reading files.
424
+ *
425
+ * @example
426
+ * ```typescript
427
+ * const metadata = getMetadata(parquetBuffer)
428
+ * console.log(`Rows: ${metadata.numRows}`)
429
+ * console.log(`Row groups: ${metadata.rowGroups.length}`)
430
+ * console.log(`Compression: ${metadata.compression}`)
431
+ * ```
432
+ *
433
+ * @interface ParquetMetadata
94
434
  */
95
435
  export interface ParquetMetadata {
436
+ /**
437
+ * The file's schema definition.
438
+ */
96
439
  schema: ParquetSchema;
440
+ /**
441
+ * Total number of rows in the file.
442
+ */
97
443
  numRows: number;
444
+ /**
445
+ * Array of row group metadata.
446
+ */
98
447
  rowGroups: RowGroup[];
448
+ /**
449
+ * Default compression algorithm used.
450
+ */
99
451
  compression: ParquetCompression;
452
+ /**
453
+ * Per-column compression settings.
454
+ */
100
455
  columnMetadata?: Record<string, {
101
456
  compression: ParquetCompression;
102
457
  }>;
458
+ /**
459
+ * Custom key-value metadata.
460
+ */
103
461
  keyValueMetadata?: Record<string, string>;
462
+ /**
463
+ * Unix timestamp when the file was created.
464
+ */
104
465
  createdAt: number;
466
+ /**
467
+ * Total file size in bytes.
468
+ */
105
469
  fileSize: number;
470
+ /**
471
+ * Columns the data is sorted by.
472
+ */
106
473
  sortedBy?: string[];
474
+ /**
475
+ * Columns used for partitioning.
476
+ */
107
477
  partitionColumns?: string[];
108
478
  }
109
479
  /**
110
- * Mock output stream interface
480
+ * Mock output stream interface for writing Parquet data.
481
+ *
482
+ * @description
483
+ * Simple interface for streaming Parquet output to a destination.
484
+ * Can be implemented for files, network streams, etc.
485
+ *
486
+ * @example
487
+ * ```typescript
488
+ * class BufferOutputStream implements OutputStream {
489
+ * private chunks: Uint8Array[] = []
490
+ *
491
+ * write(data: Uint8Array): void {
492
+ * this.chunks.push(data)
493
+ * }
494
+ *
495
+ * getBuffer(): Uint8Array {
496
+ * const total = this.chunks.reduce((sum, c) => sum + c.length, 0)
497
+ * const result = new Uint8Array(total)
498
+ * let offset = 0
499
+ * for (const chunk of this.chunks) {
500
+ * result.set(chunk, offset)
501
+ * offset += chunk.length
502
+ * }
503
+ * return result
504
+ * }
505
+ * }
506
+ * ```
507
+ *
508
+ * @interface OutputStream
111
509
  */
112
510
  export interface OutputStream {
511
+ /**
512
+ * Writes data to the output stream.
513
+ *
514
+ * @param data - The data to write
515
+ */
113
516
  write(data: Uint8Array): void;
114
517
  }
115
518
  /**
116
- * Error class for Parquet operations
519
+ * Error class for Parquet-related operations.
520
+ *
521
+ * @description
522
+ * Thrown when Parquet operations fail, such as schema validation errors,
523
+ * invalid data types, or malformed files.
524
+ *
525
+ * @example
526
+ * ```typescript
527
+ * try {
528
+ * await writer.writeRow({ invalid_field: 'value' })
529
+ * } catch (error) {
530
+ * if (error instanceof ParquetError) {
531
+ * console.log(`Parquet error (${error.code}): ${error.message}`)
532
+ * }
533
+ * }
534
+ * ```
535
+ *
536
+ * @class ParquetError
537
+ * @extends Error
117
538
  */
118
539
  export declare class ParquetError extends Error {
119
540
  readonly code: string;
541
+ /**
542
+ * Creates a new ParquetError.
543
+ *
544
+ * @param message - Human-readable error message
545
+ * @param code - Error code for programmatic handling
546
+ *
547
+ * @example
548
+ * ```typescript
549
+ * throw new ParquetError('Field name cannot be empty', 'EMPTY_FIELD_NAME')
550
+ * ```
551
+ */
120
552
  constructor(message: string, code: string);
121
553
  }
122
554
  /**
123
- * Parquet writer for git analytics data
555
+ * Parquet writer for git analytics data.
556
+ *
557
+ * @description
558
+ * ParquetWriter provides a streaming interface for writing data to Parquet
559
+ * format. It handles schema validation, row group management, compression,
560
+ * and statistics generation.
561
+ *
562
+ * **Usage Pattern:**
563
+ * 1. Create a schema using `defineSchema()`
564
+ * 2. Create a writer with `createParquetWriter()` or `new ParquetWriter()`
565
+ * 3. Write rows using `writeRow()` or `writeRows()`
566
+ * 4. Generate the file with `toBuffer()` or `writeTo()`
567
+ *
568
+ * **Row Group Management:**
569
+ * Rows are buffered in memory until the row group is full (by row count
570
+ * or memory limit), then flushed. You can also manually flush with
571
+ * `flushRowGroup()`.
572
+ *
573
+ * **Thread Safety:**
574
+ * Not thread-safe. Use separate writer instances for concurrent writes.
575
+ *
576
+ * @example
577
+ * ```typescript
578
+ * // Create schema
579
+ * const schema = defineSchema([
580
+ * { name: 'sha', type: ParquetFieldType.STRING, required: true },
581
+ * { name: 'type', type: ParquetFieldType.STRING, required: true },
582
+ * { name: 'size', type: ParquetFieldType.INT64, required: true },
583
+ * { name: 'timestamp', type: ParquetFieldType.TIMESTAMP_MILLIS, required: true }
584
+ * ])
585
+ *
586
+ * // Create writer
587
+ * const writer = new ParquetWriter(schema, {
588
+ * rowGroupSize: 10000,
589
+ * compression: ParquetCompression.SNAPPY,
590
+ * enableStatistics: true
591
+ * })
592
+ *
593
+ * // Write data
594
+ * for (const object of gitObjects) {
595
+ * await writer.writeRow({
596
+ * sha: object.sha,
597
+ * type: object.type,
598
+ * size: object.size,
599
+ * timestamp: Date.now()
600
+ * })
601
+ * }
602
+ *
603
+ * // Set custom metadata
604
+ * writer.setMetadata('git_version', '2.40.0')
605
+ * writer.setMetadata('repository', 'github.com/org/repo')
606
+ *
607
+ * // Generate file
608
+ * const buffer = await writer.toBuffer()
609
+ * console.log(`Generated ${buffer.length} bytes`)
610
+ * console.log(`Rows: ${writer.rowCount}`)
611
+ * console.log(`Row groups: ${writer.rowGroupCount}`)
612
+ *
613
+ * // Reset for reuse
614
+ * writer.reset()
615
+ * ```
616
+ *
617
+ * @class ParquetWriter
124
618
  */
125
619
  export declare class ParquetWriter {
620
+ /**
621
+ * The Parquet schema for this writer.
622
+ * @readonly
623
+ */
126
624
  readonly schema: ParquetSchema;
625
+ /**
626
+ * Resolved options with defaults applied.
627
+ * @readonly
628
+ */
127
629
  readonly options: Required<Pick<ParquetWriteOptions, 'rowGroupSize' | 'compression'>> & ParquetWriteOptions;
630
+ /**
631
+ * Total row count written.
632
+ * @private
633
+ */
128
634
  private _rowCount;
635
+ /**
636
+ * Completed row groups.
637
+ * @private
638
+ */
129
639
  private _rowGroups;
640
+ /**
641
+ * Current row group being built.
642
+ * @private
643
+ */
130
644
  private _currentRowGroup;
645
+ /**
646
+ * Whether the writer has been closed.
647
+ * @private
648
+ */
131
649
  private _isClosed;
650
+ /**
651
+ * Custom key-value metadata.
652
+ * @private
653
+ */
132
654
  private _keyValueMetadata;
655
+ /**
656
+ * Creation timestamp.
657
+ * @private
658
+ */
133
659
  private _createdAt;
660
+ /**
661
+ * Creates a new ParquetWriter instance.
662
+ *
663
+ * @param schema - The Parquet schema defining columns
664
+ * @param options - Writer configuration options
665
+ *
666
+ * @example
667
+ * ```typescript
668
+ * const writer = new ParquetWriter(schema, {
669
+ * rowGroupSize: 50000,
670
+ * compression: ParquetCompression.GZIP
671
+ * })
672
+ * ```
673
+ */
134
674
  constructor(schema: ParquetSchema, options?: ParquetWriteOptions);
135
675
  /**
136
- * Get total row count
676
+ * Gets the total row count written to the writer.
677
+ *
678
+ * @description
679
+ * Returns the total number of rows written, including rows in the
680
+ * current unflushed row group.
681
+ *
682
+ * @returns Total row count
683
+ *
684
+ * @example
685
+ * ```typescript
686
+ * await writer.writeRows(data)
687
+ * console.log(`Wrote ${writer.rowCount} rows`)
688
+ * ```
137
689
  */
138
690
  get rowCount(): number;
139
691
  /**
140
- * Get number of row groups (including current pending row group if non-empty)
692
+ * Gets the number of row groups.
693
+ *
694
+ * @description
695
+ * Returns the number of completed row groups plus one if there's
696
+ * a pending row group with data.
697
+ *
698
+ * @returns Number of row groups
699
+ *
700
+ * @example
701
+ * ```typescript
702
+ * console.log(`Row groups: ${writer.rowGroupCount}`)
703
+ * ```
141
704
  */
142
705
  get rowGroupCount(): number;
143
706
  /**
144
- * Check if writer is closed
707
+ * Checks if the writer has been closed.
708
+ *
709
+ * @description
710
+ * A closed writer cannot accept new rows. Writers are closed
711
+ * implicitly by `closeWriter()`.
712
+ *
713
+ * @returns true if closed
714
+ *
715
+ * @example
716
+ * ```typescript
717
+ * if (!writer.isClosed) {
718
+ * await writer.writeRow(row)
719
+ * }
720
+ * ```
145
721
  */
146
722
  get isClosed(): boolean;
147
723
  /**
148
- * Write a single row
724
+ * Writes a single row to the Parquet file.
725
+ *
726
+ * @description
727
+ * Validates the row against the schema and adds it to the current
728
+ * row group. Automatically flushes the row group when it reaches
729
+ * the configured size or memory limit.
730
+ *
731
+ * @param row - Object with column values keyed by column name
732
+ * @returns Promise that resolves when the row is written
733
+ *
734
+ * @throws {ParquetError} WRITER_CLOSED - If writer is closed
735
+ * @throws {ParquetError} MISSING_REQUIRED_FIELD - If required field is missing
736
+ * @throws {ParquetError} INVALID_FIELD_TYPE - If field value type doesn't match schema
737
+ *
738
+ * @example
739
+ * ```typescript
740
+ * await writer.writeRow({
741
+ * id: 123,
742
+ * name: 'Alice',
743
+ * active: true
744
+ * })
745
+ * ```
149
746
  */
150
747
  writeRow(row: Record<string, unknown>): Promise<void>;
151
748
  /**
152
- * Write multiple rows at once
749
+ * Writes multiple rows to the Parquet file.
750
+ *
751
+ * @description
752
+ * Convenience method that writes an array of rows sequentially.
753
+ * Each row is validated and may trigger row group flushes.
754
+ *
755
+ * @param rows - Array of row objects to write
756
+ * @returns Promise that resolves when all rows are written
757
+ *
758
+ * @throws {ParquetError} Any error from writeRow()
759
+ *
760
+ * @example
761
+ * ```typescript
762
+ * await writer.writeRows([
763
+ * { id: 1, name: 'Alice' },
764
+ * { id: 2, name: 'Bob' },
765
+ * { id: 3, name: 'Carol' }
766
+ * ])
767
+ * ```
153
768
  */
154
769
  writeRows(rows: Record<string, unknown>[]): Promise<void>;
155
770
  /**
156
- * Manually flush the current row group
771
+ * Manually flushes the current row group.
772
+ *
773
+ * @description
774
+ * Forces the current row group to be finalized and stored, even if
775
+ * it hasn't reached the size limit. Has no effect if the current
776
+ * row group is empty.
777
+ *
778
+ * @returns Promise that resolves when flush is complete
779
+ *
780
+ * @example
781
+ * ```typescript
782
+ * // Write some rows
783
+ * await writer.writeRows(batch1)
784
+ *
785
+ * // Force flush before writing next batch
786
+ * await writer.flushRowGroup()
787
+ *
788
+ * // Continue writing
789
+ * await writer.writeRows(batch2)
790
+ * ```
157
791
  */
158
792
  flushRowGroup(): Promise<void>;
159
793
  /**
160
- * Get the current row group memory size
794
+ * Gets the current row group's memory size.
795
+ *
796
+ * @description
797
+ * Returns the estimated memory consumption of the unflushed row group.
798
+ * Useful for monitoring memory usage during streaming writes.
799
+ *
800
+ * @returns Memory size in bytes
801
+ *
802
+ * @example
803
+ * ```typescript
804
+ * if (writer.currentRowGroupMemorySize() > 50 * 1024 * 1024) {
805
+ * console.log('Row group using significant memory')
806
+ * await writer.flushRowGroup()
807
+ * }
808
+ * ```
161
809
  */
162
810
  currentRowGroupMemorySize(): number;
163
811
  /**
164
- * Get completed row groups
812
+ * Gets the completed row groups.
813
+ *
814
+ * @description
815
+ * Returns a copy of the completed row group metadata array.
816
+ * Does not include the current unflushed row group.
817
+ *
818
+ * @returns Array of row group metadata
819
+ *
820
+ * @example
821
+ * ```typescript
822
+ * for (const rg of writer.getRowGroups()) {
823
+ * console.log(`Row group: ${rg.numRows} rows, ${rg.totalByteSize} bytes`)
824
+ * }
825
+ * ```
165
826
  */
166
827
  getRowGroups(): RowGroup[];
167
828
  /**
168
- * Set custom key-value metadata
829
+ * Sets a custom key-value metadata entry.
830
+ *
831
+ * @description
832
+ * Adds custom metadata that will be stored in the Parquet file footer.
833
+ * Can be used for versioning, provenance, or application-specific data.
834
+ *
835
+ * @param key - Metadata key
836
+ * @param value - Metadata value
837
+ *
838
+ * @example
839
+ * ```typescript
840
+ * writer.setMetadata('created_by', 'gitdo-analytics')
841
+ * writer.setMetadata('schema_version', '2.0')
842
+ * writer.setMetadata('repository', 'github.com/org/repo')
843
+ * ```
169
844
  */
170
845
  setMetadata(key: string, value: string): void;
171
846
  /**
172
- * Generate the Parquet file as a buffer
847
+ * Generates the Parquet file as a buffer.
848
+ *
849
+ * @description
850
+ * Finalizes the file by flushing any remaining rows and generating
851
+ * the complete Parquet file structure including header, row groups,
852
+ * and footer with metadata.
853
+ *
854
+ * @returns Promise resolving to the complete Parquet file as Uint8Array
855
+ *
856
+ * @example
857
+ * ```typescript
858
+ * const buffer = await writer.toBuffer()
859
+ * await fs.writeFile('data.parquet', buffer)
860
+ * ```
173
861
  */
174
862
  toBuffer(): Promise<Uint8Array>;
175
863
  /**
176
- * Write to an output stream
864
+ * Writes the Parquet file to an output stream.
865
+ *
866
+ * @description
867
+ * Generates the file and writes it to the provided output stream.
868
+ * Useful for streaming to files or network destinations.
869
+ *
870
+ * @param output - The output stream to write to
871
+ * @returns Promise that resolves when writing is complete
872
+ *
873
+ * @example
874
+ * ```typescript
875
+ * const output = new FileOutputStream('data.parquet')
876
+ * await writer.writeTo(output)
877
+ * output.close()
878
+ * ```
177
879
  */
178
880
  writeTo(output: OutputStream): Promise<void>;
179
881
  /**
180
- * Reset the writer state
882
+ * Resets the writer to its initial state.
883
+ *
884
+ * @description
885
+ * Clears all written data, row groups, and metadata. The schema
886
+ * and options remain unchanged. Useful for writing multiple files
887
+ * with the same configuration.
888
+ *
889
+ * @example
890
+ * ```typescript
891
+ * // Write first file
892
+ * await writer.writeRows(batch1)
893
+ * const file1 = await writer.toBuffer()
894
+ *
895
+ * // Reset and write second file
896
+ * writer.reset()
897
+ * await writer.writeRows(batch2)
898
+ * const file2 = await writer.toBuffer()
899
+ * ```
181
900
  */
182
901
  reset(): void;
183
902
  /**
184
- * Validate a row against the schema
903
+ * Validates a row against the schema.
904
+ *
905
+ * @param row - The row to validate
906
+ * @throws {ParquetError} If validation fails
907
+ * @private
185
908
  */
186
909
  private _validateRow;
187
910
  /**
188
- * Validate a value matches the expected type
911
+ * Validates a value matches the expected Parquet type.
912
+ *
913
+ * @param value - The value to validate
914
+ * @param type - The expected Parquet type
915
+ * @returns true if valid, false otherwise
916
+ * @private
189
917
  */
190
918
  private _validateType;
191
919
  /**
192
- * Estimate the memory size of a row
920
+ * Estimates the memory size of a row.
921
+ *
922
+ * @param row - The row to estimate
923
+ * @returns Estimated size in bytes
924
+ * @private
193
925
  */
194
926
  private _estimateRowSize;
195
927
  /**
196
- * Build a row group from internal representation
928
+ * Builds a row group from internal representation.
929
+ *
930
+ * @param internal - The internal row group data
931
+ * @returns The row group metadata
932
+ * @private
197
933
  */
198
934
  private _buildRowGroup;
199
935
  /**
200
- * Compute statistics for a column
936
+ * Computes statistics for a column.
937
+ *
938
+ * @param values - The column values
939
+ * @param type - The column type
940
+ * @returns Column statistics
941
+ * @private
201
942
  */
202
943
  private _computeStatistics;
203
944
  /**
204
- * Estimate encoded size after compression
945
+ * Estimates the encoded size after compression.
946
+ *
947
+ * @param values - The column values
948
+ * @param type - The column type
949
+ * @param compression - The compression type
950
+ * @returns Estimated compressed size in bytes
951
+ * @private
205
952
  */
206
953
  private _estimateEncodedSize;
207
954
  /**
208
- * Estimate uncompressed size
955
+ * Estimates the uncompressed size of column values.
956
+ *
957
+ * @param values - The column values
958
+ * @param type - The column type
959
+ * @returns Estimated uncompressed size in bytes
960
+ * @private
209
961
  */
210
962
  private _estimateUncompressedSize;
211
963
  /**
212
- * Generate the complete Parquet file bytes
964
+ * Generates the complete Parquet file bytes.
965
+ *
966
+ * @returns The complete Parquet file as Uint8Array
967
+ * @private
213
968
  */
214
969
  private _generateParquetBytes;
215
970
  /**
216
- * Simple compression simulation for non-gzip formats
971
+ * Simple compression simulation for non-gzip formats.
972
+ *
973
+ * @param data - Data to compress
974
+ * @param compression - Compression type
975
+ * @returns Compressed data
976
+ * @private
217
977
  */
218
978
  private _simpleCompress;
219
979
  }
220
980
  /**
221
- * Define a Parquet schema
981
+ * Defines a Parquet schema.
982
+ *
983
+ * @description
984
+ * Creates a validated Parquet schema from field definitions. Validates that:
985
+ * - Schema has at least one field
986
+ * - All field names are non-empty
987
+ * - All field names are unique
988
+ *
989
+ * @param fields - Array of field definitions
990
+ * @param metadata - Optional schema-level metadata
991
+ * @returns Validated Parquet schema
992
+ *
993
+ * @throws {ParquetError} EMPTY_SCHEMA - If fields array is empty
994
+ * @throws {ParquetError} EMPTY_FIELD_NAME - If any field name is empty
995
+ * @throws {ParquetError} DUPLICATE_FIELD - If field names are not unique
996
+ *
997
+ * @example
998
+ * ```typescript
999
+ * const schema = defineSchema([
1000
+ * { name: 'id', type: ParquetFieldType.INT64, required: true },
1001
+ * { name: 'name', type: ParquetFieldType.STRING, required: true },
1002
+ * { name: 'age', type: ParquetFieldType.INT32, required: false },
1003
+ * { name: 'created_at', type: ParquetFieldType.TIMESTAMP_MILLIS, required: true }
1004
+ * ], {
1005
+ * version: '1.0',
1006
+ * description: 'User records'
1007
+ * })
1008
+ * ```
222
1009
  */
223
1010
  export declare function defineSchema(fields: ParquetField[], metadata?: Record<string, string>): ParquetSchema;
224
1011
  /**
225
- * Create a Parquet writer
1012
+ * Creates a Parquet writer.
1013
+ *
1014
+ * @description
1015
+ * Factory function to create a ParquetWriter with the specified schema
1016
+ * and options. Equivalent to `new ParquetWriter(schema, options)`.
1017
+ *
1018
+ * @param schema - The Parquet schema
1019
+ * @param options - Writer options
1020
+ * @returns A new ParquetWriter instance
1021
+ *
1022
+ * @example
1023
+ * ```typescript
1024
+ * const writer = createParquetWriter(schema, {
1025
+ * rowGroupSize: 10000,
1026
+ * compression: ParquetCompression.SNAPPY
1027
+ * })
1028
+ * ```
226
1029
  */
227
1030
  export declare function createParquetWriter(schema: ParquetSchema, options?: ParquetWriteOptions): ParquetWriter;
228
1031
  /**
229
- * Write data directly to a Parquet file
1032
+ * Writes data directly to a Parquet file buffer.
1033
+ *
1034
+ * @description
1035
+ * Convenience function that creates a writer, writes all rows, and returns
1036
+ * the complete Parquet file. Useful for simple one-shot writes.
1037
+ *
1038
+ * @param schema - The Parquet schema
1039
+ * @param rows - Array of rows to write
1040
+ * @param options - Writer options
1041
+ * @returns Promise resolving to the complete Parquet file as Uint8Array
1042
+ *
1043
+ * @example
1044
+ * ```typescript
1045
+ * const buffer = await writeParquetFile(schema, [
1046
+ * { id: 1, name: 'Alice' },
1047
+ * { id: 2, name: 'Bob' }
1048
+ * ], {
1049
+ * compression: ParquetCompression.GZIP
1050
+ * })
1051
+ *
1052
+ * await fs.writeFile('data.parquet', buffer)
1053
+ * ```
230
1054
  */
231
1055
  export declare function writeParquetFile(schema: ParquetSchema, rows: Record<string, unknown>[], options?: ParquetWriteOptions): Promise<Uint8Array>;
232
1056
  /**
233
- * Close a writer and return the final buffer
1057
+ * Closes a writer and returns the final buffer.
1058
+ *
1059
+ * @description
1060
+ * Generates the final Parquet file buffer and marks the writer as closed.
1061
+ * The writer cannot be used for further writes after calling this function.
1062
+ *
1063
+ * @param writer - The ParquetWriter to close
1064
+ * @returns Promise resolving to the complete Parquet file as Uint8Array
1065
+ *
1066
+ * @example
1067
+ * ```typescript
1068
+ * await writer.writeRows(data)
1069
+ * const buffer = await closeWriter(writer)
1070
+ * console.log(writer.isClosed) // true
1071
+ * ```
234
1072
  */
235
1073
  export declare function closeWriter(writer: ParquetWriter): Promise<Uint8Array>;
236
1074
  /**
237
- * Add a row group to the writer
1075
+ * Adds a row group to the writer.
1076
+ *
1077
+ * @description
1078
+ * Writes multiple rows and then flushes them as a single row group.
1079
+ * Useful when you want explicit control over row group boundaries.
1080
+ *
1081
+ * @param writer - The ParquetWriter to use
1082
+ * @param rows - Array of rows for this row group
1083
+ * @returns Promise that resolves when the row group is written
1084
+ *
1085
+ * @example
1086
+ * ```typescript
1087
+ * // Add explicit row groups
1088
+ * await addRowGroup(writer, batch1) // First row group
1089
+ * await addRowGroup(writer, batch2) // Second row group
1090
+ * ```
238
1091
  */
239
1092
  export declare function addRowGroup(writer: ParquetWriter, rows: Record<string, unknown>[]): Promise<void>;
240
1093
  /**
241
- * Get metadata from a Parquet file buffer
1094
+ * Gets metadata from a Parquet file buffer.
1095
+ *
1096
+ * @description
1097
+ * Parses a Parquet file buffer and extracts the metadata including
1098
+ * schema, row groups, compression settings, and custom metadata.
1099
+ *
1100
+ * @param bytes - The Parquet file buffer
1101
+ * @returns The parsed metadata
1102
+ *
1103
+ * @throws {ParquetError} INVALID_MAGIC - If file doesn't have valid Parquet magic bytes
1104
+ *
1105
+ * @example
1106
+ * ```typescript
1107
+ * const buffer = await fs.readFile('data.parquet')
1108
+ * const metadata = getMetadata(buffer)
1109
+ *
1110
+ * console.log(`Rows: ${metadata.numRows}`)
1111
+ * console.log(`Schema: ${metadata.schema.fields.map(f => f.name).join(', ')}`)
1112
+ * console.log(`Row groups: ${metadata.rowGroups.length}`)
1113
+ *
1114
+ * for (const rg of metadata.rowGroups) {
1115
+ * console.log(` - ${rg.numRows} rows, ${rg.totalByteSize} bytes`)
1116
+ * }
1117
+ * ```
242
1118
  */
243
1119
  export declare function getMetadata(bytes: Uint8Array): ParquetMetadata;
244
1120
  /**
245
- * Set compression for a writer
1121
+ * Sets the compression type for a writer.
1122
+ *
1123
+ * @description
1124
+ * Updates the default compression algorithm for a writer. Affects all
1125
+ * subsequently written data. Columns with explicit compression settings
1126
+ * in columnCompression are not affected.
1127
+ *
1128
+ * @param writer - The ParquetWriter to update
1129
+ * @param compression - The new compression type
1130
+ *
1131
+ * @example
1132
+ * ```typescript
1133
+ * const writer = createParquetWriter(schema)
1134
+ *
1135
+ * // Write some rows with SNAPPY (default)
1136
+ * await writer.writeRows(batch1)
1137
+ * await writer.flushRowGroup()
1138
+ *
1139
+ * // Switch to GZIP for remaining data
1140
+ * setCompression(writer, ParquetCompression.GZIP)
1141
+ * await writer.writeRows(batch2)
1142
+ * ```
246
1143
  */
247
1144
  export declare function setCompression(writer: ParquetWriter, compression: ParquetCompression): void;
248
1145
  //# sourceMappingURL=parquet-writer.d.ts.map