gitx.do 0.1.0 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (344) hide show
  1. package/README.md +40 -353
  2. package/dist/do/logger.d.ts +50 -0
  3. package/dist/do/logger.d.ts.map +1 -0
  4. package/dist/do/logger.js +122 -0
  5. package/dist/do/logger.js.map +1 -0
  6. package/dist/{durable-object → do}/schema.d.ts +3 -3
  7. package/dist/do/schema.d.ts.map +1 -0
  8. package/dist/{durable-object → do}/schema.js +4 -3
  9. package/dist/do/schema.js.map +1 -0
  10. package/dist/do/types.d.ts +267 -0
  11. package/dist/do/types.d.ts.map +1 -0
  12. package/dist/do/types.js +62 -0
  13. package/dist/do/types.js.map +1 -0
  14. package/dist/index.d.ts +15 -415
  15. package/dist/index.d.ts.map +1 -1
  16. package/dist/index.js +31 -483
  17. package/dist/index.js.map +1 -1
  18. package/package.json +13 -21
  19. package/dist/cli/commands/add.d.ts +0 -174
  20. package/dist/cli/commands/add.d.ts.map +0 -1
  21. package/dist/cli/commands/add.js +0 -131
  22. package/dist/cli/commands/add.js.map +0 -1
  23. package/dist/cli/commands/blame.d.ts +0 -259
  24. package/dist/cli/commands/blame.d.ts.map +0 -1
  25. package/dist/cli/commands/blame.js +0 -609
  26. package/dist/cli/commands/blame.js.map +0 -1
  27. package/dist/cli/commands/branch.d.ts +0 -249
  28. package/dist/cli/commands/branch.d.ts.map +0 -1
  29. package/dist/cli/commands/branch.js +0 -693
  30. package/dist/cli/commands/branch.js.map +0 -1
  31. package/dist/cli/commands/commit.d.ts +0 -182
  32. package/dist/cli/commands/commit.d.ts.map +0 -1
  33. package/dist/cli/commands/commit.js +0 -437
  34. package/dist/cli/commands/commit.js.map +0 -1
  35. package/dist/cli/commands/diff.d.ts +0 -464
  36. package/dist/cli/commands/diff.d.ts.map +0 -1
  37. package/dist/cli/commands/diff.js +0 -958
  38. package/dist/cli/commands/diff.js.map +0 -1
  39. package/dist/cli/commands/log.d.ts +0 -239
  40. package/dist/cli/commands/log.d.ts.map +0 -1
  41. package/dist/cli/commands/log.js +0 -535
  42. package/dist/cli/commands/log.js.map +0 -1
  43. package/dist/cli/commands/merge.d.ts +0 -106
  44. package/dist/cli/commands/merge.d.ts.map +0 -1
  45. package/dist/cli/commands/merge.js +0 -55
  46. package/dist/cli/commands/merge.js.map +0 -1
  47. package/dist/cli/commands/review.d.ts +0 -457
  48. package/dist/cli/commands/review.d.ts.map +0 -1
  49. package/dist/cli/commands/review.js +0 -533
  50. package/dist/cli/commands/review.js.map +0 -1
  51. package/dist/cli/commands/status.d.ts +0 -269
  52. package/dist/cli/commands/status.d.ts.map +0 -1
  53. package/dist/cli/commands/status.js +0 -493
  54. package/dist/cli/commands/status.js.map +0 -1
  55. package/dist/cli/commands/web.d.ts +0 -199
  56. package/dist/cli/commands/web.d.ts.map +0 -1
  57. package/dist/cli/commands/web.js +0 -696
  58. package/dist/cli/commands/web.js.map +0 -1
  59. package/dist/cli/fs-adapter.d.ts +0 -656
  60. package/dist/cli/fs-adapter.d.ts.map +0 -1
  61. package/dist/cli/fs-adapter.js +0 -1179
  62. package/dist/cli/fs-adapter.js.map +0 -1
  63. package/dist/cli/fsx-cli-adapter.d.ts +0 -359
  64. package/dist/cli/fsx-cli-adapter.d.ts.map +0 -1
  65. package/dist/cli/fsx-cli-adapter.js +0 -619
  66. package/dist/cli/fsx-cli-adapter.js.map +0 -1
  67. package/dist/cli/index.d.ts +0 -387
  68. package/dist/cli/index.d.ts.map +0 -1
  69. package/dist/cli/index.js +0 -523
  70. package/dist/cli/index.js.map +0 -1
  71. package/dist/cli/ui/components/DiffView.d.ts +0 -7
  72. package/dist/cli/ui/components/DiffView.d.ts.map +0 -1
  73. package/dist/cli/ui/components/DiffView.js +0 -11
  74. package/dist/cli/ui/components/DiffView.js.map +0 -1
  75. package/dist/cli/ui/components/ErrorDisplay.d.ts +0 -6
  76. package/dist/cli/ui/components/ErrorDisplay.d.ts.map +0 -1
  77. package/dist/cli/ui/components/ErrorDisplay.js +0 -11
  78. package/dist/cli/ui/components/ErrorDisplay.js.map +0 -1
  79. package/dist/cli/ui/components/FuzzySearch.d.ts +0 -9
  80. package/dist/cli/ui/components/FuzzySearch.d.ts.map +0 -1
  81. package/dist/cli/ui/components/FuzzySearch.js +0 -12
  82. package/dist/cli/ui/components/FuzzySearch.js.map +0 -1
  83. package/dist/cli/ui/components/LoadingSpinner.d.ts +0 -6
  84. package/dist/cli/ui/components/LoadingSpinner.d.ts.map +0 -1
  85. package/dist/cli/ui/components/LoadingSpinner.js +0 -10
  86. package/dist/cli/ui/components/LoadingSpinner.js.map +0 -1
  87. package/dist/cli/ui/components/NavigationList.d.ts +0 -9
  88. package/dist/cli/ui/components/NavigationList.d.ts.map +0 -1
  89. package/dist/cli/ui/components/NavigationList.js +0 -11
  90. package/dist/cli/ui/components/NavigationList.js.map +0 -1
  91. package/dist/cli/ui/components/ScrollableContent.d.ts +0 -8
  92. package/dist/cli/ui/components/ScrollableContent.d.ts.map +0 -1
  93. package/dist/cli/ui/components/ScrollableContent.js +0 -11
  94. package/dist/cli/ui/components/ScrollableContent.js.map +0 -1
  95. package/dist/cli/ui/components/index.d.ts +0 -7
  96. package/dist/cli/ui/components/index.d.ts.map +0 -1
  97. package/dist/cli/ui/components/index.js +0 -9
  98. package/dist/cli/ui/components/index.js.map +0 -1
  99. package/dist/cli/ui/terminal-ui.d.ts +0 -52
  100. package/dist/cli/ui/terminal-ui.d.ts.map +0 -1
  101. package/dist/cli/ui/terminal-ui.js +0 -121
  102. package/dist/cli/ui/terminal-ui.js.map +0 -1
  103. package/dist/do/BashModule.d.ts +0 -871
  104. package/dist/do/BashModule.d.ts.map +0 -1
  105. package/dist/do/BashModule.js +0 -1143
  106. package/dist/do/BashModule.js.map +0 -1
  107. package/dist/do/FsModule.d.ts +0 -601
  108. package/dist/do/FsModule.d.ts.map +0 -1
  109. package/dist/do/FsModule.js +0 -1120
  110. package/dist/do/FsModule.js.map +0 -1
  111. package/dist/do/GitModule.d.ts +0 -635
  112. package/dist/do/GitModule.d.ts.map +0 -1
  113. package/dist/do/GitModule.js +0 -781
  114. package/dist/do/GitModule.js.map +0 -1
  115. package/dist/do/GitRepoDO.d.ts +0 -281
  116. package/dist/do/GitRepoDO.d.ts.map +0 -1
  117. package/dist/do/GitRepoDO.js +0 -479
  118. package/dist/do/GitRepoDO.js.map +0 -1
  119. package/dist/do/bash-ast.d.ts +0 -246
  120. package/dist/do/bash-ast.d.ts.map +0 -1
  121. package/dist/do/bash-ast.js +0 -888
  122. package/dist/do/bash-ast.js.map +0 -1
  123. package/dist/do/container-executor.d.ts +0 -491
  124. package/dist/do/container-executor.d.ts.map +0 -1
  125. package/dist/do/container-executor.js +0 -730
  126. package/dist/do/container-executor.js.map +0 -1
  127. package/dist/do/index.d.ts +0 -53
  128. package/dist/do/index.d.ts.map +0 -1
  129. package/dist/do/index.js +0 -91
  130. package/dist/do/index.js.map +0 -1
  131. package/dist/do/tiered-storage.d.ts +0 -403
  132. package/dist/do/tiered-storage.d.ts.map +0 -1
  133. package/dist/do/tiered-storage.js +0 -689
  134. package/dist/do/tiered-storage.js.map +0 -1
  135. package/dist/do/withBash.d.ts +0 -231
  136. package/dist/do/withBash.d.ts.map +0 -1
  137. package/dist/do/withBash.js +0 -244
  138. package/dist/do/withBash.js.map +0 -1
  139. package/dist/do/withFs.d.ts +0 -237
  140. package/dist/do/withFs.d.ts.map +0 -1
  141. package/dist/do/withFs.js +0 -387
  142. package/dist/do/withFs.js.map +0 -1
  143. package/dist/do/withGit.d.ts +0 -180
  144. package/dist/do/withGit.d.ts.map +0 -1
  145. package/dist/do/withGit.js +0 -271
  146. package/dist/do/withGit.js.map +0 -1
  147. package/dist/durable-object/object-store.d.ts +0 -633
  148. package/dist/durable-object/object-store.d.ts.map +0 -1
  149. package/dist/durable-object/object-store.js +0 -1161
  150. package/dist/durable-object/object-store.js.map +0 -1
  151. package/dist/durable-object/schema.d.ts.map +0 -1
  152. package/dist/durable-object/schema.js.map +0 -1
  153. package/dist/durable-object/wal.d.ts +0 -416
  154. package/dist/durable-object/wal.d.ts.map +0 -1
  155. package/dist/durable-object/wal.js +0 -445
  156. package/dist/durable-object/wal.js.map +0 -1
  157. package/dist/mcp/adapter.d.ts +0 -772
  158. package/dist/mcp/adapter.d.ts.map +0 -1
  159. package/dist/mcp/adapter.js +0 -895
  160. package/dist/mcp/adapter.js.map +0 -1
  161. package/dist/mcp/sandbox/miniflare-evaluator.d.ts +0 -22
  162. package/dist/mcp/sandbox/miniflare-evaluator.d.ts.map +0 -1
  163. package/dist/mcp/sandbox/miniflare-evaluator.js +0 -140
  164. package/dist/mcp/sandbox/miniflare-evaluator.js.map +0 -1
  165. package/dist/mcp/sandbox/object-store-proxy.d.ts +0 -32
  166. package/dist/mcp/sandbox/object-store-proxy.d.ts.map +0 -1
  167. package/dist/mcp/sandbox/object-store-proxy.js +0 -30
  168. package/dist/mcp/sandbox/object-store-proxy.js.map +0 -1
  169. package/dist/mcp/sandbox/template.d.ts +0 -17
  170. package/dist/mcp/sandbox/template.d.ts.map +0 -1
  171. package/dist/mcp/sandbox/template.js +0 -71
  172. package/dist/mcp/sandbox/template.js.map +0 -1
  173. package/dist/mcp/sandbox.d.ts +0 -764
  174. package/dist/mcp/sandbox.d.ts.map +0 -1
  175. package/dist/mcp/sandbox.js +0 -1362
  176. package/dist/mcp/sandbox.js.map +0 -1
  177. package/dist/mcp/sdk-adapter.d.ts +0 -835
  178. package/dist/mcp/sdk-adapter.d.ts.map +0 -1
  179. package/dist/mcp/sdk-adapter.js +0 -974
  180. package/dist/mcp/sdk-adapter.js.map +0 -1
  181. package/dist/mcp/tools/do.d.ts +0 -32
  182. package/dist/mcp/tools/do.d.ts.map +0 -1
  183. package/dist/mcp/tools/do.js +0 -115
  184. package/dist/mcp/tools/do.js.map +0 -1
  185. package/dist/mcp/tools.d.ts +0 -548
  186. package/dist/mcp/tools.d.ts.map +0 -1
  187. package/dist/mcp/tools.js +0 -1934
  188. package/dist/mcp/tools.js.map +0 -1
  189. package/dist/ops/blame.d.ts +0 -551
  190. package/dist/ops/blame.d.ts.map +0 -1
  191. package/dist/ops/blame.js +0 -1037
  192. package/dist/ops/blame.js.map +0 -1
  193. package/dist/ops/branch.d.ts +0 -766
  194. package/dist/ops/branch.d.ts.map +0 -1
  195. package/dist/ops/branch.js +0 -950
  196. package/dist/ops/branch.js.map +0 -1
  197. package/dist/ops/commit-traversal.d.ts +0 -349
  198. package/dist/ops/commit-traversal.d.ts.map +0 -1
  199. package/dist/ops/commit-traversal.js +0 -821
  200. package/dist/ops/commit-traversal.js.map +0 -1
  201. package/dist/ops/commit.d.ts +0 -555
  202. package/dist/ops/commit.d.ts.map +0 -1
  203. package/dist/ops/commit.js +0 -826
  204. package/dist/ops/commit.js.map +0 -1
  205. package/dist/ops/merge-base.d.ts +0 -397
  206. package/dist/ops/merge-base.d.ts.map +0 -1
  207. package/dist/ops/merge-base.js +0 -691
  208. package/dist/ops/merge-base.js.map +0 -1
  209. package/dist/ops/merge.d.ts +0 -855
  210. package/dist/ops/merge.d.ts.map +0 -1
  211. package/dist/ops/merge.js +0 -1551
  212. package/dist/ops/merge.js.map +0 -1
  213. package/dist/ops/tag.d.ts +0 -247
  214. package/dist/ops/tag.d.ts.map +0 -1
  215. package/dist/ops/tag.js +0 -649
  216. package/dist/ops/tag.js.map +0 -1
  217. package/dist/ops/tree-builder.d.ts +0 -178
  218. package/dist/ops/tree-builder.d.ts.map +0 -1
  219. package/dist/ops/tree-builder.js +0 -271
  220. package/dist/ops/tree-builder.js.map +0 -1
  221. package/dist/ops/tree-diff.d.ts +0 -291
  222. package/dist/ops/tree-diff.d.ts.map +0 -1
  223. package/dist/ops/tree-diff.js +0 -705
  224. package/dist/ops/tree-diff.js.map +0 -1
  225. package/dist/pack/delta.d.ts +0 -248
  226. package/dist/pack/delta.d.ts.map +0 -1
  227. package/dist/pack/delta.js +0 -736
  228. package/dist/pack/delta.js.map +0 -1
  229. package/dist/pack/format.d.ts +0 -446
  230. package/dist/pack/format.d.ts.map +0 -1
  231. package/dist/pack/format.js +0 -572
  232. package/dist/pack/format.js.map +0 -1
  233. package/dist/pack/full-generation.d.ts +0 -612
  234. package/dist/pack/full-generation.d.ts.map +0 -1
  235. package/dist/pack/full-generation.js +0 -1378
  236. package/dist/pack/full-generation.js.map +0 -1
  237. package/dist/pack/generation.d.ts +0 -441
  238. package/dist/pack/generation.d.ts.map +0 -1
  239. package/dist/pack/generation.js +0 -707
  240. package/dist/pack/generation.js.map +0 -1
  241. package/dist/pack/index.d.ts +0 -502
  242. package/dist/pack/index.d.ts.map +0 -1
  243. package/dist/pack/index.js +0 -833
  244. package/dist/pack/index.js.map +0 -1
  245. package/dist/refs/branch.d.ts +0 -668
  246. package/dist/refs/branch.d.ts.map +0 -1
  247. package/dist/refs/branch.js +0 -897
  248. package/dist/refs/branch.js.map +0 -1
  249. package/dist/refs/storage.d.ts +0 -833
  250. package/dist/refs/storage.d.ts.map +0 -1
  251. package/dist/refs/storage.js +0 -1023
  252. package/dist/refs/storage.js.map +0 -1
  253. package/dist/refs/tag.d.ts +0 -860
  254. package/dist/refs/tag.d.ts.map +0 -1
  255. package/dist/refs/tag.js +0 -996
  256. package/dist/refs/tag.js.map +0 -1
  257. package/dist/storage/backend.d.ts +0 -425
  258. package/dist/storage/backend.d.ts.map +0 -1
  259. package/dist/storage/backend.js +0 -41
  260. package/dist/storage/backend.js.map +0 -1
  261. package/dist/storage/fsx-adapter.d.ts +0 -204
  262. package/dist/storage/fsx-adapter.d.ts.map +0 -1
  263. package/dist/storage/fsx-adapter.js +0 -470
  264. package/dist/storage/fsx-adapter.js.map +0 -1
  265. package/dist/storage/lru-cache.d.ts +0 -691
  266. package/dist/storage/lru-cache.d.ts.map +0 -1
  267. package/dist/storage/lru-cache.js +0 -813
  268. package/dist/storage/lru-cache.js.map +0 -1
  269. package/dist/storage/object-index.d.ts +0 -585
  270. package/dist/storage/object-index.d.ts.map +0 -1
  271. package/dist/storage/object-index.js +0 -532
  272. package/dist/storage/object-index.js.map +0 -1
  273. package/dist/storage/r2-pack.d.ts +0 -1257
  274. package/dist/storage/r2-pack.d.ts.map +0 -1
  275. package/dist/storage/r2-pack.js +0 -1770
  276. package/dist/storage/r2-pack.js.map +0 -1
  277. package/dist/tiered/cdc-pipeline.d.ts +0 -1888
  278. package/dist/tiered/cdc-pipeline.d.ts.map +0 -1
  279. package/dist/tiered/cdc-pipeline.js +0 -1880
  280. package/dist/tiered/cdc-pipeline.js.map +0 -1
  281. package/dist/tiered/migration.d.ts +0 -1104
  282. package/dist/tiered/migration.d.ts.map +0 -1
  283. package/dist/tiered/migration.js +0 -1214
  284. package/dist/tiered/migration.js.map +0 -1
  285. package/dist/tiered/parquet-writer.d.ts +0 -1145
  286. package/dist/tiered/parquet-writer.d.ts.map +0 -1
  287. package/dist/tiered/parquet-writer.js +0 -1183
  288. package/dist/tiered/parquet-writer.js.map +0 -1
  289. package/dist/tiered/read-path.d.ts +0 -835
  290. package/dist/tiered/read-path.d.ts.map +0 -1
  291. package/dist/tiered/read-path.js +0 -487
  292. package/dist/tiered/read-path.js.map +0 -1
  293. package/dist/types/capability.d.ts +0 -1385
  294. package/dist/types/capability.d.ts.map +0 -1
  295. package/dist/types/capability.js +0 -36
  296. package/dist/types/capability.js.map +0 -1
  297. package/dist/types/index.d.ts +0 -13
  298. package/dist/types/index.d.ts.map +0 -1
  299. package/dist/types/index.js +0 -18
  300. package/dist/types/index.js.map +0 -1
  301. package/dist/types/objects.d.ts +0 -692
  302. package/dist/types/objects.d.ts.map +0 -1
  303. package/dist/types/objects.js +0 -837
  304. package/dist/types/objects.js.map +0 -1
  305. package/dist/types/storage.d.ts +0 -603
  306. package/dist/types/storage.d.ts.map +0 -1
  307. package/dist/types/storage.js +0 -191
  308. package/dist/types/storage.js.map +0 -1
  309. package/dist/types/worker-loader.d.ts +0 -60
  310. package/dist/types/worker-loader.d.ts.map +0 -1
  311. package/dist/types/worker-loader.js +0 -62
  312. package/dist/types/worker-loader.js.map +0 -1
  313. package/dist/utils/hash.d.ts +0 -197
  314. package/dist/utils/hash.d.ts.map +0 -1
  315. package/dist/utils/hash.js +0 -268
  316. package/dist/utils/hash.js.map +0 -1
  317. package/dist/utils/sha1.d.ts +0 -290
  318. package/dist/utils/sha1.d.ts.map +0 -1
  319. package/dist/utils/sha1.js +0 -582
  320. package/dist/utils/sha1.js.map +0 -1
  321. package/dist/wire/capabilities.d.ts +0 -1044
  322. package/dist/wire/capabilities.d.ts.map +0 -1
  323. package/dist/wire/capabilities.js +0 -941
  324. package/dist/wire/capabilities.js.map +0 -1
  325. package/dist/wire/path-security.d.ts +0 -157
  326. package/dist/wire/path-security.d.ts.map +0 -1
  327. package/dist/wire/path-security.js +0 -307
  328. package/dist/wire/path-security.js.map +0 -1
  329. package/dist/wire/pkt-line.d.ts +0 -345
  330. package/dist/wire/pkt-line.d.ts.map +0 -1
  331. package/dist/wire/pkt-line.js +0 -381
  332. package/dist/wire/pkt-line.js.map +0 -1
  333. package/dist/wire/receive-pack.d.ts +0 -1059
  334. package/dist/wire/receive-pack.d.ts.map +0 -1
  335. package/dist/wire/receive-pack.js +0 -1414
  336. package/dist/wire/receive-pack.js.map +0 -1
  337. package/dist/wire/smart-http.d.ts +0 -799
  338. package/dist/wire/smart-http.d.ts.map +0 -1
  339. package/dist/wire/smart-http.js +0 -945
  340. package/dist/wire/smart-http.js.map +0 -1
  341. package/dist/wire/upload-pack.d.ts +0 -727
  342. package/dist/wire/upload-pack.d.ts.map +0 -1
  343. package/dist/wire/upload-pack.js +0 -1138
  344. package/dist/wire/upload-pack.js.map +0 -1
@@ -1,1145 +0,0 @@
1
- /**
2
- * @fileoverview Parquet Writer for Git Analytics
3
- *
4
- * @description
5
- * Provides functionality to write git analytics data to Parquet format, a
6
- * columnar storage format optimized for analytical queries. This module
7
- * enables efficient storage and querying of Git repository data.
8
- *
9
- * **Key Features:**
10
- * - Schema definition with various field types (STRING, INT32, INT64, etc.)
11
- * - Multiple compression algorithms (SNAPPY, GZIP, ZSTD, LZ4, UNCOMPRESSED)
12
- * - Row group management for efficient columnar storage
13
- * - Automatic and manual row group flushing
14
- * - Column-level statistics generation (min, max, null count)
15
- * - Custom key-value metadata support
16
- * - Memory-efficient streaming writes
17
- *
18
- * **Parquet Format:**
19
- * The generated files follow the Parquet format with:
20
- * - Magic bytes "PAR1" at start and end
21
- * - Row group data organized by columns
22
- * - Footer metadata containing schema and statistics
23
- *
24
- * @example
25
- * ```typescript
26
- * // Define schema for commit analytics
27
- * const schema = defineSchema([
28
- * { name: 'commit_sha', type: ParquetFieldType.STRING, required: true },
29
- * { name: 'author', type: ParquetFieldType.STRING, required: true },
30
- * { name: 'timestamp', type: ParquetFieldType.TIMESTAMP_MILLIS, required: true },
31
- * { name: 'file_count', type: ParquetFieldType.INT32, required: false }
32
- * ])
33
- *
34
- * // Create writer with options
35
- * const writer = createParquetWriter(schema, {
36
- * rowGroupSize: 10000,
37
- * compression: ParquetCompression.SNAPPY,
38
- * enableStatistics: true
39
- * })
40
- *
41
- * // Write data
42
- * await writer.writeRows([
43
- * { commit_sha: 'abc123...', author: 'alice', timestamp: Date.now(), file_count: 5 },
44
- * { commit_sha: 'def456...', author: 'bob', timestamp: Date.now(), file_count: 3 }
45
- * ])
46
- *
47
- * // Generate the Parquet file
48
- * const buffer = await writer.toBuffer()
49
- * ```
50
- *
51
- * @module tiered/parquet-writer
52
- * @see {@link ParquetWriter} - Main writer class
53
- * @see {@link defineSchema} - Schema definition helper
54
- */
55
- /**
56
- * Supported Parquet field types.
57
- *
58
- * @description
59
- * Defines the data types that can be used for fields in a Parquet schema.
60
- * Each type maps to an appropriate physical and logical Parquet type.
61
- *
62
- * @example
63
- * ```typescript
64
- * const field: ParquetField = {
65
- * name: 'count',
66
- * type: ParquetFieldType.INT64,
67
- * required: true
68
- * }
69
- * ```
70
- *
71
- * @enum {string}
72
- */
73
- export declare enum ParquetFieldType {
74
- /**
75
- * UTF-8 encoded string.
76
- * Maps to Parquet BYTE_ARRAY with UTF8 logical type.
77
- */
78
- STRING = "STRING",
79
- /**
80
- * 32-bit signed integer.
81
- * Maps to Parquet INT32 physical type.
82
- */
83
- INT32 = "INT32",
84
- /**
85
- * 64-bit signed integer.
86
- * Maps to Parquet INT64 physical type.
87
- */
88
- INT64 = "INT64",
89
- /**
90
- * Boolean value (true/false).
91
- * Maps to Parquet BOOLEAN physical type.
92
- */
93
- BOOLEAN = "BOOLEAN",
94
- /**
95
- * 32-bit IEEE 754 floating point.
96
- * Maps to Parquet FLOAT physical type.
97
- */
98
- FLOAT = "FLOAT",
99
- /**
100
- * 64-bit IEEE 754 floating point.
101
- * Maps to Parquet DOUBLE physical type.
102
- */
103
- DOUBLE = "DOUBLE",
104
- /**
105
- * Raw binary data.
106
- * Maps to Parquet BYTE_ARRAY physical type.
107
- */
108
- BINARY = "BINARY",
109
- /**
110
- * Timestamp with millisecond precision.
111
- * Maps to Parquet INT64 with TIMESTAMP_MILLIS logical type.
112
- */
113
- TIMESTAMP_MILLIS = "TIMESTAMP_MILLIS",
114
- /**
115
- * Timestamp with microsecond precision.
116
- * Maps to Parquet INT64 with TIMESTAMP_MICROS logical type.
117
- */
118
- TIMESTAMP_MICROS = "TIMESTAMP_MICROS"
119
- }
120
- /**
121
- * Supported compression types for Parquet data.
122
- *
123
- * @description
124
- * Different compression algorithms offer trade-offs between compression
125
- * ratio, compression speed, and decompression speed.
126
- *
127
- * **Comparison:**
128
- * - SNAPPY: Fast compression/decompression, moderate ratio (default)
129
- * - GZIP: Higher ratio, slower compression, fast decompression
130
- * - ZSTD: Best ratio, good speed, requires more memory
131
- * - LZ4: Fastest, lower ratio
132
- * - UNCOMPRESSED: No compression overhead
133
- *
134
- * @example
135
- * ```typescript
136
- * const writer = createParquetWriter(schema, {
137
- * compression: ParquetCompression.ZSTD
138
- * })
139
- * ```
140
- *
141
- * @enum {string}
142
- */
143
- export declare enum ParquetCompression {
144
- /**
145
- * No compression applied.
146
- * Fastest writes, largest file size.
147
- */
148
- UNCOMPRESSED = "UNCOMPRESSED",
149
- /**
150
- * Snappy compression (default).
151
- * Good balance of speed and compression ratio.
152
- */
153
- SNAPPY = "SNAPPY",
154
- /**
155
- * GZIP compression.
156
- * Higher compression ratio, slower compression.
157
- */
158
- GZIP = "GZIP",
159
- /**
160
- * Zstandard compression.
161
- * Best compression ratio with good speed.
162
- */
163
- ZSTD = "ZSTD",
164
- /**
165
- * LZ4 compression.
166
- * Fastest compression, lower ratio.
167
- */
168
- LZ4 = "LZ4"
169
- }
170
- /**
171
- * Field definition for a Parquet schema.
172
- *
173
- * @description
174
- * Defines a single column in the Parquet schema, including its name,
175
- * data type, nullability, and optional metadata.
176
- *
177
- * @example
178
- * ```typescript
179
- * const nameField: ParquetField = {
180
- * name: 'user_name',
181
- * type: ParquetFieldType.STRING,
182
- * required: true,
183
- * metadata: { description: 'The user display name' }
184
- * }
185
- *
186
- * const ageField: ParquetField = {
187
- * name: 'age',
188
- * type: ParquetFieldType.INT32,
189
- * required: false // nullable
190
- * }
191
- * ```
192
- *
193
- * @interface ParquetField
194
- */
195
- export interface ParquetField {
196
- /**
197
- * Column name.
198
- * Must be unique within the schema and non-empty.
199
- */
200
- name: string;
201
- /**
202
- * Data type of the column.
203
- *
204
- * @see {@link ParquetFieldType}
205
- */
206
- type: ParquetFieldType;
207
- /**
208
- * Whether the field is required (non-nullable).
209
- * If true, null values will cause validation errors.
210
- */
211
- required: boolean;
212
- /**
213
- * Optional key-value metadata for the field.
214
- * Can be used for descriptions, units, etc.
215
- */
216
- metadata?: Record<string, string>;
217
- }
218
- /**
219
- * Parquet schema definition.
220
- *
221
- * @description
222
- * Defines the complete schema for a Parquet file, including all fields
223
- * and optional schema-level metadata.
224
- *
225
- * @example
226
- * ```typescript
227
- * const schema: ParquetSchema = {
228
- * fields: [
229
- * { name: 'id', type: ParquetFieldType.INT64, required: true },
230
- * { name: 'name', type: ParquetFieldType.STRING, required: true }
231
- * ],
232
- * metadata: {
233
- * created_by: 'gitdo',
234
- * version: '1.0'
235
- * }
236
- * }
237
- * ```
238
- *
239
- * @interface ParquetSchema
240
- */
241
- export interface ParquetSchema {
242
- /**
243
- * Array of field definitions for all columns.
244
- * Order determines column order in the file.
245
- */
246
- fields: ParquetField[];
247
- /**
248
- * Optional schema-level metadata.
249
- * Stored in the Parquet file footer.
250
- */
251
- metadata?: Record<string, string>;
252
- }
253
- /**
254
- * Options for creating a Parquet writer.
255
- *
256
- * @description
257
- * Configuration options that control how the Parquet file is written,
258
- * including row group sizing, compression, and statistics generation.
259
- *
260
- * @example
261
- * ```typescript
262
- * const options: ParquetWriteOptions = {
263
- * rowGroupSize: 50000, // 50K rows per group
264
- * rowGroupMemoryLimit: 64 * 1024 * 1024, // 64MB memory limit
265
- * compression: ParquetCompression.ZSTD,
266
- * columnCompression: {
267
- * 'binary_data': ParquetCompression.LZ4 // Fast for binary
268
- * },
269
- * enableStatistics: true,
270
- * sortBy: ['timestamp'],
271
- * partitionColumns: ['date']
272
- * }
273
- * ```
274
- *
275
- * @interface ParquetWriteOptions
276
- */
277
- export interface ParquetWriteOptions {
278
- /**
279
- * Maximum number of rows per row group.
280
- * Smaller groups = more granular reads, larger groups = better compression.
281
- *
282
- * @default 65536
283
- */
284
- rowGroupSize?: number;
285
- /**
286
- * Maximum memory size in bytes for a row group.
287
- * Triggers flush when reached, regardless of row count.
288
- */
289
- rowGroupMemoryLimit?: number;
290
- /**
291
- * Default compression algorithm for all columns.
292
- *
293
- * @default ParquetCompression.SNAPPY
294
- */
295
- compression?: ParquetCompression;
296
- /**
297
- * Per-column compression overrides.
298
- * Keys are column names, values are compression types.
299
- */
300
- columnCompression?: Record<string, ParquetCompression>;
301
- /**
302
- * Whether to compute and store column statistics.
303
- * Enables predicate pushdown during queries.
304
- *
305
- * @default false
306
- */
307
- enableStatistics?: boolean;
308
- /**
309
- * Columns to sort data by within each row group.
310
- * Improves query performance for sorted access patterns.
311
- */
312
- sortBy?: string[];
313
- /**
314
- * Columns used for partitioning.
315
- * Informational metadata for partitioned datasets.
316
- */
317
- partitionColumns?: string[];
318
- }
319
- /**
320
- * Statistics for a single column in a row group.
321
- *
322
- * @description
323
- * Column statistics enable query engines to skip row groups that don't
324
- * contain relevant data (predicate pushdown).
325
- *
326
- * @example
327
- * ```typescript
328
- * const stats: ColumnStatistics = {
329
- * min: 100,
330
- * max: 999,
331
- * nullCount: 5,
332
- * distinctCount: 850
333
- * }
334
- * ```
335
- *
336
- * @interface ColumnStatistics
337
- */
338
- export interface ColumnStatistics {
339
- /**
340
- * Minimum value in the column.
341
- * Type depends on column type.
342
- */
343
- min?: number | string | boolean;
344
- /**
345
- * Maximum value in the column.
346
- * Type depends on column type.
347
- */
348
- max?: number | string | boolean;
349
- /**
350
- * Number of null values in the column.
351
- */
352
- nullCount?: number;
353
- /**
354
- * Approximate distinct value count.
355
- * May not be exact for large datasets.
356
- */
357
- distinctCount?: number;
358
- }
359
- /**
360
- * Metadata for a column chunk within a row group.
361
- *
362
- * @description
363
- * Contains information about a single column's data within a row group,
364
- * including compression, sizes, and statistics.
365
- *
366
- * @interface ColumnChunkMetadata
367
- */
368
- export interface ColumnChunkMetadata {
369
- /**
370
- * Column name.
371
- */
372
- column: string;
373
- /**
374
- * Data type of the column.
375
- */
376
- type: ParquetFieldType;
377
- /**
378
- * Compression used for this column chunk.
379
- */
380
- compression: ParquetCompression;
381
- /**
382
- * Size in bytes after compression.
383
- */
384
- encodedSize: number;
385
- /**
386
- * Size in bytes before compression.
387
- */
388
- uncompressedSize: number;
389
- /**
390
- * Column statistics if statistics are enabled.
391
- */
392
- statistics?: ColumnStatistics;
393
- }
394
- /**
395
- * Row group representation in the Parquet file.
396
- *
397
- * @description
398
- * A row group is a horizontal partition of the data containing all columns
399
- * for a subset of rows. Row groups enable parallel processing and predicate
400
- * pushdown optimizations.
401
- *
402
- * @interface RowGroup
403
- */
404
- export interface RowGroup {
405
- /**
406
- * Number of rows in this row group.
407
- */
408
- numRows: number;
409
- /**
410
- * Total compressed size in bytes.
411
- */
412
- totalByteSize: number;
413
- /**
414
- * Metadata for each column chunk.
415
- */
416
- columns: ColumnChunkMetadata[];
417
- }
418
- /**
419
- * Complete metadata for a Parquet file.
420
- *
421
- * @description
422
- * Contains all metadata stored in the Parquet file footer, including
423
- * schema, row groups, and statistics. Used when reading files.
424
- *
425
- * @example
426
- * ```typescript
427
- * const metadata = getMetadata(parquetBuffer)
428
- * console.log(`Rows: ${metadata.numRows}`)
429
- * console.log(`Row groups: ${metadata.rowGroups.length}`)
430
- * console.log(`Compression: ${metadata.compression}`)
431
- * ```
432
- *
433
- * @interface ParquetMetadata
434
- */
435
- export interface ParquetMetadata {
436
- /**
437
- * The file's schema definition.
438
- */
439
- schema: ParquetSchema;
440
- /**
441
- * Total number of rows in the file.
442
- */
443
- numRows: number;
444
- /**
445
- * Array of row group metadata.
446
- */
447
- rowGroups: RowGroup[];
448
- /**
449
- * Default compression algorithm used.
450
- */
451
- compression: ParquetCompression;
452
- /**
453
- * Per-column compression settings.
454
- */
455
- columnMetadata?: Record<string, {
456
- compression: ParquetCompression;
457
- }>;
458
- /**
459
- * Custom key-value metadata.
460
- */
461
- keyValueMetadata?: Record<string, string>;
462
- /**
463
- * Unix timestamp when the file was created.
464
- */
465
- createdAt: number;
466
- /**
467
- * Total file size in bytes.
468
- */
469
- fileSize: number;
470
- /**
471
- * Columns the data is sorted by.
472
- */
473
- sortedBy?: string[];
474
- /**
475
- * Columns used for partitioning.
476
- */
477
- partitionColumns?: string[];
478
- }
479
- /**
480
- * Mock output stream interface for writing Parquet data.
481
- *
482
- * @description
483
- * Simple interface for streaming Parquet output to a destination.
484
- * Can be implemented for files, network streams, etc.
485
- *
486
- * @example
487
- * ```typescript
488
- * class BufferOutputStream implements OutputStream {
489
- * private chunks: Uint8Array[] = []
490
- *
491
- * write(data: Uint8Array): void {
492
- * this.chunks.push(data)
493
- * }
494
- *
495
- * getBuffer(): Uint8Array {
496
- * const total = this.chunks.reduce((sum, c) => sum + c.length, 0)
497
- * const result = new Uint8Array(total)
498
- * let offset = 0
499
- * for (const chunk of this.chunks) {
500
- * result.set(chunk, offset)
501
- * offset += chunk.length
502
- * }
503
- * return result
504
- * }
505
- * }
506
- * ```
507
- *
508
- * @interface OutputStream
509
- */
510
- export interface OutputStream {
511
- /**
512
- * Writes data to the output stream.
513
- *
514
- * @param data - The data to write
515
- */
516
- write(data: Uint8Array): void;
517
- }
518
- /**
519
- * Error class for Parquet-related operations.
520
- *
521
- * @description
522
- * Thrown when Parquet operations fail, such as schema validation errors,
523
- * invalid data types, or malformed files.
524
- *
525
- * @example
526
- * ```typescript
527
- * try {
528
- * await writer.writeRow({ invalid_field: 'value' })
529
- * } catch (error) {
530
- * if (error instanceof ParquetError) {
531
- * console.log(`Parquet error (${error.code}): ${error.message}`)
532
- * }
533
- * }
534
- * ```
535
- *
536
- * @class ParquetError
537
- * @extends Error
538
- */
539
- export declare class ParquetError extends Error {
540
- readonly code: string;
541
- /**
542
- * Creates a new ParquetError.
543
- *
544
- * @param message - Human-readable error message
545
- * @param code - Error code for programmatic handling
546
- *
547
- * @example
548
- * ```typescript
549
- * throw new ParquetError('Field name cannot be empty', 'EMPTY_FIELD_NAME')
550
- * ```
551
- */
552
- constructor(message: string, code: string);
553
- }
554
- /**
555
- * Parquet writer for git analytics data.
556
- *
557
- * @description
558
- * ParquetWriter provides a streaming interface for writing data to Parquet
559
- * format. It handles schema validation, row group management, compression,
560
- * and statistics generation.
561
- *
562
- * **Usage Pattern:**
563
- * 1. Create a schema using `defineSchema()`
564
- * 2. Create a writer with `createParquetWriter()` or `new ParquetWriter()`
565
- * 3. Write rows using `writeRow()` or `writeRows()`
566
- * 4. Generate the file with `toBuffer()` or `writeTo()`
567
- *
568
- * **Row Group Management:**
569
- * Rows are buffered in memory until the row group is full (by row count
570
- * or memory limit), then flushed. You can also manually flush with
571
- * `flushRowGroup()`.
572
- *
573
- * **Thread Safety:**
574
- * Not thread-safe. Use separate writer instances for concurrent writes.
575
- *
576
- * @example
577
- * ```typescript
578
- * // Create schema
579
- * const schema = defineSchema([
580
- * { name: 'sha', type: ParquetFieldType.STRING, required: true },
581
- * { name: 'type', type: ParquetFieldType.STRING, required: true },
582
- * { name: 'size', type: ParquetFieldType.INT64, required: true },
583
- * { name: 'timestamp', type: ParquetFieldType.TIMESTAMP_MILLIS, required: true }
584
- * ])
585
- *
586
- * // Create writer
587
- * const writer = new ParquetWriter(schema, {
588
- * rowGroupSize: 10000,
589
- * compression: ParquetCompression.SNAPPY,
590
- * enableStatistics: true
591
- * })
592
- *
593
- * // Write data
594
- * for (const object of gitObjects) {
595
- * await writer.writeRow({
596
- * sha: object.sha,
597
- * type: object.type,
598
- * size: object.size,
599
- * timestamp: Date.now()
600
- * })
601
- * }
602
- *
603
- * // Set custom metadata
604
- * writer.setMetadata('git_version', '2.40.0')
605
- * writer.setMetadata('repository', 'github.com/org/repo')
606
- *
607
- * // Generate file
608
- * const buffer = await writer.toBuffer()
609
- * console.log(`Generated ${buffer.length} bytes`)
610
- * console.log(`Rows: ${writer.rowCount}`)
611
- * console.log(`Row groups: ${writer.rowGroupCount}`)
612
- *
613
- * // Reset for reuse
614
- * writer.reset()
615
- * ```
616
- *
617
- * @class ParquetWriter
618
- */
619
- export declare class ParquetWriter {
620
- /**
621
- * The Parquet schema for this writer.
622
- * @readonly
623
- */
624
- readonly schema: ParquetSchema;
625
- /**
626
- * Resolved options with defaults applied.
627
- * @readonly
628
- */
629
- readonly options: Required<Pick<ParquetWriteOptions, 'rowGroupSize' | 'compression'>> & ParquetWriteOptions;
630
- /**
631
- * Total row count written.
632
- * @private
633
- */
634
- private _rowCount;
635
- /**
636
- * Completed row groups.
637
- * @private
638
- */
639
- private _rowGroups;
640
- /**
641
- * Current row group being built.
642
- * @private
643
- */
644
- private _currentRowGroup;
645
- /**
646
- * Whether the writer has been closed.
647
- * @private
648
- */
649
- private _isClosed;
650
- /**
651
- * Custom key-value metadata.
652
- * @private
653
- */
654
- private _keyValueMetadata;
655
- /**
656
- * Creation timestamp.
657
- * @private
658
- */
659
- private _createdAt;
660
- /**
661
- * Creates a new ParquetWriter instance.
662
- *
663
- * @param schema - The Parquet schema defining columns
664
- * @param options - Writer configuration options
665
- *
666
- * @example
667
- * ```typescript
668
- * const writer = new ParquetWriter(schema, {
669
- * rowGroupSize: 50000,
670
- * compression: ParquetCompression.GZIP
671
- * })
672
- * ```
673
- */
674
- constructor(schema: ParquetSchema, options?: ParquetWriteOptions);
675
- /**
676
- * Gets the total row count written to the writer.
677
- *
678
- * @description
679
- * Returns the total number of rows written, including rows in the
680
- * current unflushed row group.
681
- *
682
- * @returns Total row count
683
- *
684
- * @example
685
- * ```typescript
686
- * await writer.writeRows(data)
687
- * console.log(`Wrote ${writer.rowCount} rows`)
688
- * ```
689
- */
690
- get rowCount(): number;
691
- /**
692
- * Gets the number of row groups.
693
- *
694
- * @description
695
- * Returns the number of completed row groups plus one if there's
696
- * a pending row group with data.
697
- *
698
- * @returns Number of row groups
699
- *
700
- * @example
701
- * ```typescript
702
- * console.log(`Row groups: ${writer.rowGroupCount}`)
703
- * ```
704
- */
705
- get rowGroupCount(): number;
706
- /**
707
- * Checks if the writer has been closed.
708
- *
709
- * @description
710
- * A closed writer cannot accept new rows. Writers are closed
711
- * implicitly by `closeWriter()`.
712
- *
713
- * @returns true if closed
714
- *
715
- * @example
716
- * ```typescript
717
- * if (!writer.isClosed) {
718
- * await writer.writeRow(row)
719
- * }
720
- * ```
721
- */
722
- get isClosed(): boolean;
723
- /**
724
- * Writes a single row to the Parquet file.
725
- *
726
- * @description
727
- * Validates the row against the schema and adds it to the current
728
- * row group. Automatically flushes the row group when it reaches
729
- * the configured size or memory limit.
730
- *
731
- * @param row - Object with column values keyed by column name
732
- * @returns Promise that resolves when the row is written
733
- *
734
- * @throws {ParquetError} WRITER_CLOSED - If writer is closed
735
- * @throws {ParquetError} MISSING_REQUIRED_FIELD - If required field is missing
736
- * @throws {ParquetError} INVALID_FIELD_TYPE - If field value type doesn't match schema
737
- *
738
- * @example
739
- * ```typescript
740
- * await writer.writeRow({
741
- * id: 123,
742
- * name: 'Alice',
743
- * active: true
744
- * })
745
- * ```
746
- */
747
- writeRow(row: Record<string, unknown>): Promise<void>;
748
- /**
749
- * Writes multiple rows to the Parquet file.
750
- *
751
- * @description
752
- * Convenience method that writes an array of rows sequentially.
753
- * Each row is validated and may trigger row group flushes.
754
- *
755
- * @param rows - Array of row objects to write
756
- * @returns Promise that resolves when all rows are written
757
- *
758
- * @throws {ParquetError} Any error from writeRow()
759
- *
760
- * @example
761
- * ```typescript
762
- * await writer.writeRows([
763
- * { id: 1, name: 'Alice' },
764
- * { id: 2, name: 'Bob' },
765
- * { id: 3, name: 'Carol' }
766
- * ])
767
- * ```
768
- */
769
- writeRows(rows: Record<string, unknown>[]): Promise<void>;
770
- /**
771
- * Manually flushes the current row group.
772
- *
773
- * @description
774
- * Forces the current row group to be finalized and stored, even if
775
- * it hasn't reached the size limit. Has no effect if the current
776
- * row group is empty.
777
- *
778
- * @returns Promise that resolves when flush is complete
779
- *
780
- * @example
781
- * ```typescript
782
- * // Write some rows
783
- * await writer.writeRows(batch1)
784
- *
785
- * // Force flush before writing next batch
786
- * await writer.flushRowGroup()
787
- *
788
- * // Continue writing
789
- * await writer.writeRows(batch2)
790
- * ```
791
- */
792
- flushRowGroup(): Promise<void>;
793
- /**
794
- * Gets the current row group's memory size.
795
- *
796
- * @description
797
- * Returns the estimated memory consumption of the unflushed row group.
798
- * Useful for monitoring memory usage during streaming writes.
799
- *
800
- * @returns Memory size in bytes
801
- *
802
- * @example
803
- * ```typescript
804
- * if (writer.currentRowGroupMemorySize() > 50 * 1024 * 1024) {
805
- * console.log('Row group using significant memory')
806
- * await writer.flushRowGroup()
807
- * }
808
- * ```
809
- */
810
- currentRowGroupMemorySize(): number;
811
- /**
812
- * Gets the completed row groups.
813
- *
814
- * @description
815
- * Returns a copy of the completed row group metadata array.
816
- * Does not include the current unflushed row group.
817
- *
818
- * @returns Array of row group metadata
819
- *
820
- * @example
821
- * ```typescript
822
- * for (const rg of writer.getRowGroups()) {
823
- * console.log(`Row group: ${rg.numRows} rows, ${rg.totalByteSize} bytes`)
824
- * }
825
- * ```
826
- */
827
- getRowGroups(): RowGroup[];
828
- /**
829
- * Sets a custom key-value metadata entry.
830
- *
831
- * @description
832
- * Adds custom metadata that will be stored in the Parquet file footer.
833
- * Can be used for versioning, provenance, or application-specific data.
834
- *
835
- * @param key - Metadata key
836
- * @param value - Metadata value
837
- *
838
- * @example
839
- * ```typescript
840
- * writer.setMetadata('created_by', 'gitdo-analytics')
841
- * writer.setMetadata('schema_version', '2.0')
842
- * writer.setMetadata('repository', 'github.com/org/repo')
843
- * ```
844
- */
845
- setMetadata(key: string, value: string): void;
846
- /**
847
- * Generates the Parquet file as a buffer.
848
- *
849
- * @description
850
- * Finalizes the file by flushing any remaining rows and generating
851
- * the complete Parquet file structure including header, row groups,
852
- * and footer with metadata.
853
- *
854
- * @returns Promise resolving to the complete Parquet file as Uint8Array
855
- *
856
- * @example
857
- * ```typescript
858
- * const buffer = await writer.toBuffer()
859
- * await fs.writeFile('data.parquet', buffer)
860
- * ```
861
- */
862
- toBuffer(): Promise<Uint8Array>;
863
- /**
864
- * Writes the Parquet file to an output stream.
865
- *
866
- * @description
867
- * Generates the file and writes it to the provided output stream.
868
- * Useful for streaming to files or network destinations.
869
- *
870
- * @param output - The output stream to write to
871
- * @returns Promise that resolves when writing is complete
872
- *
873
- * @example
874
- * ```typescript
875
- * const output = new FileOutputStream('data.parquet')
876
- * await writer.writeTo(output)
877
- * output.close()
878
- * ```
879
- */
880
- writeTo(output: OutputStream): Promise<void>;
881
- /**
882
- * Resets the writer to its initial state.
883
- *
884
- * @description
885
- * Clears all written data, row groups, and metadata. The schema
886
- * and options remain unchanged. Useful for writing multiple files
887
- * with the same configuration.
888
- *
889
- * @example
890
- * ```typescript
891
- * // Write first file
892
- * await writer.writeRows(batch1)
893
- * const file1 = await writer.toBuffer()
894
- *
895
- * // Reset and write second file
896
- * writer.reset()
897
- * await writer.writeRows(batch2)
898
- * const file2 = await writer.toBuffer()
899
- * ```
900
- */
901
- reset(): void;
902
- /**
903
- * Validates a row against the schema.
904
- *
905
- * @param row - The row to validate
906
- * @throws {ParquetError} If validation fails
907
- * @private
908
- */
909
- private _validateRow;
910
- /**
911
- * Validates a value matches the expected Parquet type.
912
- *
913
- * @param value - The value to validate
914
- * @param type - The expected Parquet type
915
- * @returns true if valid, false otherwise
916
- * @private
917
- */
918
- private _validateType;
919
- /**
920
- * Estimates the memory size of a row.
921
- *
922
- * @param row - The row to estimate
923
- * @returns Estimated size in bytes
924
- * @private
925
- */
926
- private _estimateRowSize;
927
- /**
928
- * Builds a row group from internal representation.
929
- *
930
- * @param internal - The internal row group data
931
- * @returns The row group metadata
932
- * @private
933
- */
934
- private _buildRowGroup;
935
- /**
936
- * Computes statistics for a column.
937
- *
938
- * @param values - The column values
939
- * @param type - The column type
940
- * @returns Column statistics
941
- * @private
942
- */
943
- private _computeStatistics;
944
- /**
945
- * Estimates the encoded size after compression.
946
- *
947
- * @param values - The column values
948
- * @param type - The column type
949
- * @param compression - The compression type
950
- * @returns Estimated compressed size in bytes
951
- * @private
952
- */
953
- private _estimateEncodedSize;
954
- /**
955
- * Estimates the uncompressed size of column values.
956
- *
957
- * @param values - The column values
958
- * @param type - The column type
959
- * @returns Estimated uncompressed size in bytes
960
- * @private
961
- */
962
- private _estimateUncompressedSize;
963
- /**
964
- * Generates the complete Parquet file bytes.
965
- *
966
- * @returns The complete Parquet file as Uint8Array
967
- * @private
968
- */
969
- private _generateParquetBytes;
970
- /**
971
- * Simple compression simulation for non-gzip formats.
972
- *
973
- * @param data - Data to compress
974
- * @param compression - Compression type
975
- * @returns Compressed data
976
- * @private
977
- */
978
- private _simpleCompress;
979
- }
980
- /**
981
- * Defines a Parquet schema.
982
- *
983
- * @description
984
- * Creates a validated Parquet schema from field definitions. Validates that:
985
- * - Schema has at least one field
986
- * - All field names are non-empty
987
- * - All field names are unique
988
- *
989
- * @param fields - Array of field definitions
990
- * @param metadata - Optional schema-level metadata
991
- * @returns Validated Parquet schema
992
- *
993
- * @throws {ParquetError} EMPTY_SCHEMA - If fields array is empty
994
- * @throws {ParquetError} EMPTY_FIELD_NAME - If any field name is empty
995
- * @throws {ParquetError} DUPLICATE_FIELD - If field names are not unique
996
- *
997
- * @example
998
- * ```typescript
999
- * const schema = defineSchema([
1000
- * { name: 'id', type: ParquetFieldType.INT64, required: true },
1001
- * { name: 'name', type: ParquetFieldType.STRING, required: true },
1002
- * { name: 'age', type: ParquetFieldType.INT32, required: false },
1003
- * { name: 'created_at', type: ParquetFieldType.TIMESTAMP_MILLIS, required: true }
1004
- * ], {
1005
- * version: '1.0',
1006
- * description: 'User records'
1007
- * })
1008
- * ```
1009
- */
1010
- export declare function defineSchema(fields: ParquetField[], metadata?: Record<string, string>): ParquetSchema;
1011
- /**
1012
- * Creates a Parquet writer.
1013
- *
1014
- * @description
1015
- * Factory function to create a ParquetWriter with the specified schema
1016
- * and options. Equivalent to `new ParquetWriter(schema, options)`.
1017
- *
1018
- * @param schema - The Parquet schema
1019
- * @param options - Writer options
1020
- * @returns A new ParquetWriter instance
1021
- *
1022
- * @example
1023
- * ```typescript
1024
- * const writer = createParquetWriter(schema, {
1025
- * rowGroupSize: 10000,
1026
- * compression: ParquetCompression.SNAPPY
1027
- * })
1028
- * ```
1029
- */
1030
- export declare function createParquetWriter(schema: ParquetSchema, options?: ParquetWriteOptions): ParquetWriter;
1031
- /**
1032
- * Writes data directly to a Parquet file buffer.
1033
- *
1034
- * @description
1035
- * Convenience function that creates a writer, writes all rows, and returns
1036
- * the complete Parquet file. Useful for simple one-shot writes.
1037
- *
1038
- * @param schema - The Parquet schema
1039
- * @param rows - Array of rows to write
1040
- * @param options - Writer options
1041
- * @returns Promise resolving to the complete Parquet file as Uint8Array
1042
- *
1043
- * @example
1044
- * ```typescript
1045
- * const buffer = await writeParquetFile(schema, [
1046
- * { id: 1, name: 'Alice' },
1047
- * { id: 2, name: 'Bob' }
1048
- * ], {
1049
- * compression: ParquetCompression.GZIP
1050
- * })
1051
- *
1052
- * await fs.writeFile('data.parquet', buffer)
1053
- * ```
1054
- */
1055
- export declare function writeParquetFile(schema: ParquetSchema, rows: Record<string, unknown>[], options?: ParquetWriteOptions): Promise<Uint8Array>;
1056
- /**
1057
- * Closes a writer and returns the final buffer.
1058
- *
1059
- * @description
1060
- * Generates the final Parquet file buffer and marks the writer as closed.
1061
- * The writer cannot be used for further writes after calling this function.
1062
- *
1063
- * @param writer - The ParquetWriter to close
1064
- * @returns Promise resolving to the complete Parquet file as Uint8Array
1065
- *
1066
- * @example
1067
- * ```typescript
1068
- * await writer.writeRows(data)
1069
- * const buffer = await closeWriter(writer)
1070
- * console.log(writer.isClosed) // true
1071
- * ```
1072
- */
1073
- export declare function closeWriter(writer: ParquetWriter): Promise<Uint8Array>;
1074
- /**
1075
- * Adds a row group to the writer.
1076
- *
1077
- * @description
1078
- * Writes multiple rows and then flushes them as a single row group.
1079
- * Useful when you want explicit control over row group boundaries.
1080
- *
1081
- * @param writer - The ParquetWriter to use
1082
- * @param rows - Array of rows for this row group
1083
- * @returns Promise that resolves when the row group is written
1084
- *
1085
- * @example
1086
- * ```typescript
1087
- * // Add explicit row groups
1088
- * await addRowGroup(writer, batch1) // First row group
1089
- * await addRowGroup(writer, batch2) // Second row group
1090
- * ```
1091
- */
1092
- export declare function addRowGroup(writer: ParquetWriter, rows: Record<string, unknown>[]): Promise<void>;
1093
- /**
1094
- * Gets metadata from a Parquet file buffer.
1095
- *
1096
- * @description
1097
- * Parses a Parquet file buffer and extracts the metadata including
1098
- * schema, row groups, compression settings, and custom metadata.
1099
- *
1100
- * @param bytes - The Parquet file buffer
1101
- * @returns The parsed metadata
1102
- *
1103
- * @throws {ParquetError} INVALID_MAGIC - If file doesn't have valid Parquet magic bytes
1104
- *
1105
- * @example
1106
- * ```typescript
1107
- * const buffer = await fs.readFile('data.parquet')
1108
- * const metadata = getMetadata(buffer)
1109
- *
1110
- * console.log(`Rows: ${metadata.numRows}`)
1111
- * console.log(`Schema: ${metadata.schema.fields.map(f => f.name).join(', ')}`)
1112
- * console.log(`Row groups: ${metadata.rowGroups.length}`)
1113
- *
1114
- * for (const rg of metadata.rowGroups) {
1115
- * console.log(` - ${rg.numRows} rows, ${rg.totalByteSize} bytes`)
1116
- * }
1117
- * ```
1118
- */
1119
- export declare function getMetadata(bytes: Uint8Array): ParquetMetadata;
1120
- /**
1121
- * Sets the compression type for a writer.
1122
- *
1123
- * @description
1124
- * Updates the default compression algorithm for a writer. Affects all
1125
- * subsequently written data. Columns with explicit compression settings
1126
- * in columnCompression are not affected.
1127
- *
1128
- * @param writer - The ParquetWriter to update
1129
- * @param compression - The new compression type
1130
- *
1131
- * @example
1132
- * ```typescript
1133
- * const writer = createParquetWriter(schema)
1134
- *
1135
- * // Write some rows with SNAPPY (default)
1136
- * await writer.writeRows(batch1)
1137
- * await writer.flushRowGroup()
1138
- *
1139
- * // Switch to GZIP for remaining data
1140
- * setCompression(writer, ParquetCompression.GZIP)
1141
- * await writer.writeRows(batch2)
1142
- * ```
1143
- */
1144
- export declare function setCompression(writer: ParquetWriter, compression: ParquetCompression): void;
1145
- //# sourceMappingURL=parquet-writer.d.ts.map