gitx.do 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (356) hide show
  1. package/README.md +40 -353
  2. package/dist/do/logger.d.ts +50 -0
  3. package/dist/do/logger.d.ts.map +1 -0
  4. package/dist/do/logger.js +122 -0
  5. package/dist/do/logger.js.map +1 -0
  6. package/dist/{durable-object → do}/schema.d.ts +3 -3
  7. package/dist/do/schema.d.ts.map +1 -0
  8. package/dist/{durable-object → do}/schema.js +4 -3
  9. package/dist/do/schema.js.map +1 -0
  10. package/dist/do/types.d.ts +267 -0
  11. package/dist/do/types.d.ts.map +1 -0
  12. package/dist/do/types.js +62 -0
  13. package/dist/do/types.js.map +1 -0
  14. package/dist/index.d.ts +14 -469
  15. package/dist/index.d.ts.map +1 -1
  16. package/dist/index.js +31 -483
  17. package/dist/index.js.map +1 -1
  18. package/package.json +13 -21
  19. package/dist/cli/commands/add.d.ts +0 -176
  20. package/dist/cli/commands/add.d.ts.map +0 -1
  21. package/dist/cli/commands/add.js +0 -979
  22. package/dist/cli/commands/add.js.map +0 -1
  23. package/dist/cli/commands/blame.d.ts +0 -259
  24. package/dist/cli/commands/blame.d.ts.map +0 -1
  25. package/dist/cli/commands/blame.js +0 -609
  26. package/dist/cli/commands/blame.js.map +0 -1
  27. package/dist/cli/commands/branch.d.ts +0 -249
  28. package/dist/cli/commands/branch.d.ts.map +0 -1
  29. package/dist/cli/commands/branch.js +0 -693
  30. package/dist/cli/commands/branch.js.map +0 -1
  31. package/dist/cli/commands/checkout.d.ts +0 -73
  32. package/dist/cli/commands/checkout.d.ts.map +0 -1
  33. package/dist/cli/commands/checkout.js +0 -725
  34. package/dist/cli/commands/checkout.js.map +0 -1
  35. package/dist/cli/commands/commit.d.ts +0 -182
  36. package/dist/cli/commands/commit.d.ts.map +0 -1
  37. package/dist/cli/commands/commit.js +0 -457
  38. package/dist/cli/commands/commit.js.map +0 -1
  39. package/dist/cli/commands/diff.d.ts +0 -464
  40. package/dist/cli/commands/diff.d.ts.map +0 -1
  41. package/dist/cli/commands/diff.js +0 -959
  42. package/dist/cli/commands/diff.js.map +0 -1
  43. package/dist/cli/commands/log.d.ts +0 -239
  44. package/dist/cli/commands/log.d.ts.map +0 -1
  45. package/dist/cli/commands/log.js +0 -535
  46. package/dist/cli/commands/log.js.map +0 -1
  47. package/dist/cli/commands/merge.d.ts +0 -106
  48. package/dist/cli/commands/merge.d.ts.map +0 -1
  49. package/dist/cli/commands/merge.js +0 -852
  50. package/dist/cli/commands/merge.js.map +0 -1
  51. package/dist/cli/commands/review.d.ts +0 -457
  52. package/dist/cli/commands/review.d.ts.map +0 -1
  53. package/dist/cli/commands/review.js +0 -558
  54. package/dist/cli/commands/review.js.map +0 -1
  55. package/dist/cli/commands/stash.d.ts +0 -157
  56. package/dist/cli/commands/stash.d.ts.map +0 -1
  57. package/dist/cli/commands/stash.js +0 -655
  58. package/dist/cli/commands/stash.js.map +0 -1
  59. package/dist/cli/commands/status.d.ts +0 -269
  60. package/dist/cli/commands/status.d.ts.map +0 -1
  61. package/dist/cli/commands/status.js +0 -492
  62. package/dist/cli/commands/status.js.map +0 -1
  63. package/dist/cli/commands/web.d.ts +0 -199
  64. package/dist/cli/commands/web.d.ts.map +0 -1
  65. package/dist/cli/commands/web.js +0 -697
  66. package/dist/cli/commands/web.js.map +0 -1
  67. package/dist/cli/fs-adapter.d.ts +0 -656
  68. package/dist/cli/fs-adapter.d.ts.map +0 -1
  69. package/dist/cli/fs-adapter.js +0 -1177
  70. package/dist/cli/fs-adapter.js.map +0 -1
  71. package/dist/cli/fsx-cli-adapter.d.ts +0 -359
  72. package/dist/cli/fsx-cli-adapter.d.ts.map +0 -1
  73. package/dist/cli/fsx-cli-adapter.js +0 -619
  74. package/dist/cli/fsx-cli-adapter.js.map +0 -1
  75. package/dist/cli/index.d.ts +0 -387
  76. package/dist/cli/index.d.ts.map +0 -1
  77. package/dist/cli/index.js +0 -579
  78. package/dist/cli/index.js.map +0 -1
  79. package/dist/cli/ui/components/DiffView.d.ts +0 -12
  80. package/dist/cli/ui/components/DiffView.d.ts.map +0 -1
  81. package/dist/cli/ui/components/DiffView.js +0 -11
  82. package/dist/cli/ui/components/DiffView.js.map +0 -1
  83. package/dist/cli/ui/components/ErrorDisplay.d.ts +0 -10
  84. package/dist/cli/ui/components/ErrorDisplay.d.ts.map +0 -1
  85. package/dist/cli/ui/components/ErrorDisplay.js +0 -11
  86. package/dist/cli/ui/components/ErrorDisplay.js.map +0 -1
  87. package/dist/cli/ui/components/FuzzySearch.d.ts +0 -15
  88. package/dist/cli/ui/components/FuzzySearch.d.ts.map +0 -1
  89. package/dist/cli/ui/components/FuzzySearch.js +0 -12
  90. package/dist/cli/ui/components/FuzzySearch.js.map +0 -1
  91. package/dist/cli/ui/components/LoadingSpinner.d.ts +0 -10
  92. package/dist/cli/ui/components/LoadingSpinner.d.ts.map +0 -1
  93. package/dist/cli/ui/components/LoadingSpinner.js +0 -10
  94. package/dist/cli/ui/components/LoadingSpinner.js.map +0 -1
  95. package/dist/cli/ui/components/NavigationList.d.ts +0 -14
  96. package/dist/cli/ui/components/NavigationList.d.ts.map +0 -1
  97. package/dist/cli/ui/components/NavigationList.js +0 -11
  98. package/dist/cli/ui/components/NavigationList.js.map +0 -1
  99. package/dist/cli/ui/components/ScrollableContent.d.ts +0 -13
  100. package/dist/cli/ui/components/ScrollableContent.d.ts.map +0 -1
  101. package/dist/cli/ui/components/ScrollableContent.js +0 -11
  102. package/dist/cli/ui/components/ScrollableContent.js.map +0 -1
  103. package/dist/cli/ui/components/index.d.ts +0 -7
  104. package/dist/cli/ui/components/index.d.ts.map +0 -1
  105. package/dist/cli/ui/components/index.js +0 -9
  106. package/dist/cli/ui/components/index.js.map +0 -1
  107. package/dist/cli/ui/terminal-ui.d.ts +0 -85
  108. package/dist/cli/ui/terminal-ui.d.ts.map +0 -1
  109. package/dist/cli/ui/terminal-ui.js +0 -121
  110. package/dist/cli/ui/terminal-ui.js.map +0 -1
  111. package/dist/do/BashModule.d.ts +0 -871
  112. package/dist/do/BashModule.d.ts.map +0 -1
  113. package/dist/do/BashModule.js +0 -1143
  114. package/dist/do/BashModule.js.map +0 -1
  115. package/dist/do/FsModule.d.ts +0 -612
  116. package/dist/do/FsModule.d.ts.map +0 -1
  117. package/dist/do/FsModule.js +0 -1120
  118. package/dist/do/FsModule.js.map +0 -1
  119. package/dist/do/GitModule.d.ts +0 -635
  120. package/dist/do/GitModule.d.ts.map +0 -1
  121. package/dist/do/GitModule.js +0 -784
  122. package/dist/do/GitModule.js.map +0 -1
  123. package/dist/do/GitRepoDO.d.ts +0 -281
  124. package/dist/do/GitRepoDO.d.ts.map +0 -1
  125. package/dist/do/GitRepoDO.js +0 -479
  126. package/dist/do/GitRepoDO.js.map +0 -1
  127. package/dist/do/bash-ast.d.ts +0 -246
  128. package/dist/do/bash-ast.d.ts.map +0 -1
  129. package/dist/do/bash-ast.js +0 -888
  130. package/dist/do/bash-ast.js.map +0 -1
  131. package/dist/do/container-executor.d.ts +0 -491
  132. package/dist/do/container-executor.d.ts.map +0 -1
  133. package/dist/do/container-executor.js +0 -731
  134. package/dist/do/container-executor.js.map +0 -1
  135. package/dist/do/index.d.ts +0 -53
  136. package/dist/do/index.d.ts.map +0 -1
  137. package/dist/do/index.js +0 -91
  138. package/dist/do/index.js.map +0 -1
  139. package/dist/do/tiered-storage.d.ts +0 -403
  140. package/dist/do/tiered-storage.d.ts.map +0 -1
  141. package/dist/do/tiered-storage.js +0 -689
  142. package/dist/do/tiered-storage.js.map +0 -1
  143. package/dist/do/withBash.d.ts +0 -231
  144. package/dist/do/withBash.d.ts.map +0 -1
  145. package/dist/do/withBash.js +0 -244
  146. package/dist/do/withBash.js.map +0 -1
  147. package/dist/do/withFs.d.ts +0 -237
  148. package/dist/do/withFs.d.ts.map +0 -1
  149. package/dist/do/withFs.js +0 -387
  150. package/dist/do/withFs.js.map +0 -1
  151. package/dist/do/withGit.d.ts +0 -180
  152. package/dist/do/withGit.d.ts.map +0 -1
  153. package/dist/do/withGit.js +0 -271
  154. package/dist/do/withGit.js.map +0 -1
  155. package/dist/durable-object/object-store.d.ts +0 -633
  156. package/dist/durable-object/object-store.d.ts.map +0 -1
  157. package/dist/durable-object/object-store.js +0 -1164
  158. package/dist/durable-object/object-store.js.map +0 -1
  159. package/dist/durable-object/schema.d.ts.map +0 -1
  160. package/dist/durable-object/schema.js.map +0 -1
  161. package/dist/durable-object/wal.d.ts +0 -416
  162. package/dist/durable-object/wal.d.ts.map +0 -1
  163. package/dist/durable-object/wal.js +0 -445
  164. package/dist/durable-object/wal.js.map +0 -1
  165. package/dist/mcp/adapter.d.ts +0 -772
  166. package/dist/mcp/adapter.d.ts.map +0 -1
  167. package/dist/mcp/adapter.js +0 -895
  168. package/dist/mcp/adapter.js.map +0 -1
  169. package/dist/mcp/sandbox/miniflare-evaluator.d.ts +0 -22
  170. package/dist/mcp/sandbox/miniflare-evaluator.d.ts.map +0 -1
  171. package/dist/mcp/sandbox/miniflare-evaluator.js +0 -140
  172. package/dist/mcp/sandbox/miniflare-evaluator.js.map +0 -1
  173. package/dist/mcp/sandbox/object-store-proxy.d.ts +0 -32
  174. package/dist/mcp/sandbox/object-store-proxy.d.ts.map +0 -1
  175. package/dist/mcp/sandbox/object-store-proxy.js +0 -30
  176. package/dist/mcp/sandbox/object-store-proxy.js.map +0 -1
  177. package/dist/mcp/sandbox/template.d.ts +0 -17
  178. package/dist/mcp/sandbox/template.d.ts.map +0 -1
  179. package/dist/mcp/sandbox/template.js +0 -71
  180. package/dist/mcp/sandbox/template.js.map +0 -1
  181. package/dist/mcp/sandbox.d.ts +0 -764
  182. package/dist/mcp/sandbox.d.ts.map +0 -1
  183. package/dist/mcp/sandbox.js +0 -1362
  184. package/dist/mcp/sandbox.js.map +0 -1
  185. package/dist/mcp/sdk-adapter.d.ts +0 -835
  186. package/dist/mcp/sdk-adapter.d.ts.map +0 -1
  187. package/dist/mcp/sdk-adapter.js +0 -974
  188. package/dist/mcp/sdk-adapter.js.map +0 -1
  189. package/dist/mcp/tools/do.d.ts +0 -32
  190. package/dist/mcp/tools/do.d.ts.map +0 -1
  191. package/dist/mcp/tools/do.js +0 -117
  192. package/dist/mcp/tools/do.js.map +0 -1
  193. package/dist/mcp/tools.d.ts +0 -548
  194. package/dist/mcp/tools.d.ts.map +0 -1
  195. package/dist/mcp/tools.js +0 -3170
  196. package/dist/mcp/tools.js.map +0 -1
  197. package/dist/ops/blame.d.ts +0 -551
  198. package/dist/ops/blame.d.ts.map +0 -1
  199. package/dist/ops/blame.js +0 -1037
  200. package/dist/ops/blame.js.map +0 -1
  201. package/dist/ops/branch.d.ts +0 -766
  202. package/dist/ops/branch.d.ts.map +0 -1
  203. package/dist/ops/branch.js +0 -950
  204. package/dist/ops/branch.js.map +0 -1
  205. package/dist/ops/commit-traversal.d.ts +0 -349
  206. package/dist/ops/commit-traversal.d.ts.map +0 -1
  207. package/dist/ops/commit-traversal.js +0 -821
  208. package/dist/ops/commit-traversal.js.map +0 -1
  209. package/dist/ops/commit.d.ts +0 -555
  210. package/dist/ops/commit.d.ts.map +0 -1
  211. package/dist/ops/commit.js +0 -826
  212. package/dist/ops/commit.js.map +0 -1
  213. package/dist/ops/merge-base.d.ts +0 -397
  214. package/dist/ops/merge-base.d.ts.map +0 -1
  215. package/dist/ops/merge-base.js +0 -691
  216. package/dist/ops/merge-base.js.map +0 -1
  217. package/dist/ops/merge.d.ts +0 -855
  218. package/dist/ops/merge.d.ts.map +0 -1
  219. package/dist/ops/merge.js +0 -1551
  220. package/dist/ops/merge.js.map +0 -1
  221. package/dist/ops/tag.d.ts +0 -247
  222. package/dist/ops/tag.d.ts.map +0 -1
  223. package/dist/ops/tag.js +0 -649
  224. package/dist/ops/tag.js.map +0 -1
  225. package/dist/ops/tree-builder.d.ts +0 -178
  226. package/dist/ops/tree-builder.d.ts.map +0 -1
  227. package/dist/ops/tree-builder.js +0 -271
  228. package/dist/ops/tree-builder.js.map +0 -1
  229. package/dist/ops/tree-diff.d.ts +0 -291
  230. package/dist/ops/tree-diff.d.ts.map +0 -1
  231. package/dist/ops/tree-diff.js +0 -705
  232. package/dist/ops/tree-diff.js.map +0 -1
  233. package/dist/pack/delta.d.ts +0 -248
  234. package/dist/pack/delta.d.ts.map +0 -1
  235. package/dist/pack/delta.js +0 -740
  236. package/dist/pack/delta.js.map +0 -1
  237. package/dist/pack/format.d.ts +0 -446
  238. package/dist/pack/format.d.ts.map +0 -1
  239. package/dist/pack/format.js +0 -572
  240. package/dist/pack/format.js.map +0 -1
  241. package/dist/pack/full-generation.d.ts +0 -612
  242. package/dist/pack/full-generation.d.ts.map +0 -1
  243. package/dist/pack/full-generation.js +0 -1378
  244. package/dist/pack/full-generation.js.map +0 -1
  245. package/dist/pack/generation.d.ts +0 -441
  246. package/dist/pack/generation.d.ts.map +0 -1
  247. package/dist/pack/generation.js +0 -707
  248. package/dist/pack/generation.js.map +0 -1
  249. package/dist/pack/index.d.ts +0 -502
  250. package/dist/pack/index.d.ts.map +0 -1
  251. package/dist/pack/index.js +0 -833
  252. package/dist/pack/index.js.map +0 -1
  253. package/dist/refs/branch.d.ts +0 -683
  254. package/dist/refs/branch.d.ts.map +0 -1
  255. package/dist/refs/branch.js +0 -881
  256. package/dist/refs/branch.js.map +0 -1
  257. package/dist/refs/storage.d.ts +0 -833
  258. package/dist/refs/storage.d.ts.map +0 -1
  259. package/dist/refs/storage.js +0 -1023
  260. package/dist/refs/storage.js.map +0 -1
  261. package/dist/refs/tag.d.ts +0 -860
  262. package/dist/refs/tag.d.ts.map +0 -1
  263. package/dist/refs/tag.js +0 -996
  264. package/dist/refs/tag.js.map +0 -1
  265. package/dist/storage/backend.d.ts +0 -425
  266. package/dist/storage/backend.d.ts.map +0 -1
  267. package/dist/storage/backend.js +0 -41
  268. package/dist/storage/backend.js.map +0 -1
  269. package/dist/storage/fsx-adapter.d.ts +0 -204
  270. package/dist/storage/fsx-adapter.d.ts.map +0 -1
  271. package/dist/storage/fsx-adapter.js +0 -518
  272. package/dist/storage/fsx-adapter.js.map +0 -1
  273. package/dist/storage/lru-cache.d.ts +0 -691
  274. package/dist/storage/lru-cache.d.ts.map +0 -1
  275. package/dist/storage/lru-cache.js +0 -813
  276. package/dist/storage/lru-cache.js.map +0 -1
  277. package/dist/storage/object-index.d.ts +0 -585
  278. package/dist/storage/object-index.d.ts.map +0 -1
  279. package/dist/storage/object-index.js +0 -532
  280. package/dist/storage/object-index.js.map +0 -1
  281. package/dist/storage/r2-pack.d.ts +0 -1257
  282. package/dist/storage/r2-pack.d.ts.map +0 -1
  283. package/dist/storage/r2-pack.js +0 -1773
  284. package/dist/storage/r2-pack.js.map +0 -1
  285. package/dist/tiered/cdc-pipeline.d.ts +0 -1888
  286. package/dist/tiered/cdc-pipeline.d.ts.map +0 -1
  287. package/dist/tiered/cdc-pipeline.js +0 -1880
  288. package/dist/tiered/cdc-pipeline.js.map +0 -1
  289. package/dist/tiered/migration.d.ts +0 -1104
  290. package/dist/tiered/migration.d.ts.map +0 -1
  291. package/dist/tiered/migration.js +0 -1217
  292. package/dist/tiered/migration.js.map +0 -1
  293. package/dist/tiered/parquet-writer.d.ts +0 -1145
  294. package/dist/tiered/parquet-writer.d.ts.map +0 -1
  295. package/dist/tiered/parquet-writer.js +0 -1183
  296. package/dist/tiered/parquet-writer.js.map +0 -1
  297. package/dist/tiered/read-path.d.ts +0 -835
  298. package/dist/tiered/read-path.d.ts.map +0 -1
  299. package/dist/tiered/read-path.js +0 -487
  300. package/dist/tiered/read-path.js.map +0 -1
  301. package/dist/types/capability.d.ts +0 -1385
  302. package/dist/types/capability.d.ts.map +0 -1
  303. package/dist/types/capability.js +0 -36
  304. package/dist/types/capability.js.map +0 -1
  305. package/dist/types/index.d.ts +0 -13
  306. package/dist/types/index.d.ts.map +0 -1
  307. package/dist/types/index.js +0 -18
  308. package/dist/types/index.js.map +0 -1
  309. package/dist/types/interfaces.d.ts +0 -673
  310. package/dist/types/interfaces.d.ts.map +0 -1
  311. package/dist/types/interfaces.js +0 -26
  312. package/dist/types/interfaces.js.map +0 -1
  313. package/dist/types/objects.d.ts +0 -692
  314. package/dist/types/objects.d.ts.map +0 -1
  315. package/dist/types/objects.js +0 -837
  316. package/dist/types/objects.js.map +0 -1
  317. package/dist/types/storage.d.ts +0 -603
  318. package/dist/types/storage.d.ts.map +0 -1
  319. package/dist/types/storage.js +0 -191
  320. package/dist/types/storage.js.map +0 -1
  321. package/dist/types/worker-loader.d.ts +0 -60
  322. package/dist/types/worker-loader.d.ts.map +0 -1
  323. package/dist/types/worker-loader.js +0 -62
  324. package/dist/types/worker-loader.js.map +0 -1
  325. package/dist/utils/hash.d.ts +0 -198
  326. package/dist/utils/hash.d.ts.map +0 -1
  327. package/dist/utils/hash.js +0 -272
  328. package/dist/utils/hash.js.map +0 -1
  329. package/dist/utils/sha1.d.ts +0 -325
  330. package/dist/utils/sha1.d.ts.map +0 -1
  331. package/dist/utils/sha1.js +0 -635
  332. package/dist/utils/sha1.js.map +0 -1
  333. package/dist/wire/capabilities.d.ts +0 -1044
  334. package/dist/wire/capabilities.d.ts.map +0 -1
  335. package/dist/wire/capabilities.js +0 -941
  336. package/dist/wire/capabilities.js.map +0 -1
  337. package/dist/wire/path-security.d.ts +0 -157
  338. package/dist/wire/path-security.d.ts.map +0 -1
  339. package/dist/wire/path-security.js +0 -307
  340. package/dist/wire/path-security.js.map +0 -1
  341. package/dist/wire/pkt-line.d.ts +0 -345
  342. package/dist/wire/pkt-line.d.ts.map +0 -1
  343. package/dist/wire/pkt-line.js +0 -381
  344. package/dist/wire/pkt-line.js.map +0 -1
  345. package/dist/wire/receive-pack.d.ts +0 -1059
  346. package/dist/wire/receive-pack.d.ts.map +0 -1
  347. package/dist/wire/receive-pack.js +0 -1414
  348. package/dist/wire/receive-pack.js.map +0 -1
  349. package/dist/wire/smart-http.d.ts +0 -799
  350. package/dist/wire/smart-http.d.ts.map +0 -1
  351. package/dist/wire/smart-http.js +0 -945
  352. package/dist/wire/smart-http.js.map +0 -1
  353. package/dist/wire/upload-pack.d.ts +0 -727
  354. package/dist/wire/upload-pack.d.ts.map +0 -1
  355. package/dist/wire/upload-pack.js +0 -1141
  356. package/dist/wire/upload-pack.js.map +0 -1
@@ -1,1145 +0,0 @@
1
- /**
2
- * @fileoverview Parquet Writer for Git Analytics
3
- *
4
- * @description
5
- * Provides functionality to write git analytics data to Parquet format, a
6
- * columnar storage format optimized for analytical queries. This module
7
- * enables efficient storage and querying of Git repository data.
8
- *
9
- * **Key Features:**
10
- * - Schema definition with various field types (STRING, INT32, INT64, etc.)
11
- * - Multiple compression algorithms (SNAPPY, GZIP, ZSTD, LZ4, UNCOMPRESSED)
12
- * - Row group management for efficient columnar storage
13
- * - Automatic and manual row group flushing
14
- * - Column-level statistics generation (min, max, null count)
15
- * - Custom key-value metadata support
16
- * - Memory-efficient streaming writes
17
- *
18
- * **Parquet Format:**
19
- * The generated files follow the Parquet format with:
20
- * - Magic bytes "PAR1" at start and end
21
- * - Row group data organized by columns
22
- * - Footer metadata containing schema and statistics
23
- *
24
- * @example
25
- * ```typescript
26
- * // Define schema for commit analytics
27
- * const schema = defineSchema([
28
- * { name: 'commit_sha', type: ParquetFieldType.STRING, required: true },
29
- * { name: 'author', type: ParquetFieldType.STRING, required: true },
30
- * { name: 'timestamp', type: ParquetFieldType.TIMESTAMP_MILLIS, required: true },
31
- * { name: 'file_count', type: ParquetFieldType.INT32, required: false }
32
- * ])
33
- *
34
- * // Create writer with options
35
- * const writer = createParquetWriter(schema, {
36
- * rowGroupSize: 10000,
37
- * compression: ParquetCompression.SNAPPY,
38
- * enableStatistics: true
39
- * })
40
- *
41
- * // Write data
42
- * await writer.writeRows([
43
- * { commit_sha: 'abc123...', author: 'alice', timestamp: Date.now(), file_count: 5 },
44
- * { commit_sha: 'def456...', author: 'bob', timestamp: Date.now(), file_count: 3 }
45
- * ])
46
- *
47
- * // Generate the Parquet file
48
- * const buffer = await writer.toBuffer()
49
- * ```
50
- *
51
- * @module tiered/parquet-writer
52
- * @see {@link ParquetWriter} - Main writer class
53
- * @see {@link defineSchema} - Schema definition helper
54
- */
55
- /**
56
- * Supported Parquet field types.
57
- *
58
- * @description
59
- * Defines the data types that can be used for fields in a Parquet schema.
60
- * Each type maps to an appropriate physical and logical Parquet type.
61
- *
62
- * @example
63
- * ```typescript
64
- * const field: ParquetField = {
65
- * name: 'count',
66
- * type: ParquetFieldType.INT64,
67
- * required: true
68
- * }
69
- * ```
70
- *
71
- * @enum {string}
72
- */
73
- export declare enum ParquetFieldType {
74
- /**
75
- * UTF-8 encoded string.
76
- * Maps to Parquet BYTE_ARRAY with UTF8 logical type.
77
- */
78
- STRING = "STRING",
79
- /**
80
- * 32-bit signed integer.
81
- * Maps to Parquet INT32 physical type.
82
- */
83
- INT32 = "INT32",
84
- /**
85
- * 64-bit signed integer.
86
- * Maps to Parquet INT64 physical type.
87
- */
88
- INT64 = "INT64",
89
- /**
90
- * Boolean value (true/false).
91
- * Maps to Parquet BOOLEAN physical type.
92
- */
93
- BOOLEAN = "BOOLEAN",
94
- /**
95
- * 32-bit IEEE 754 floating point.
96
- * Maps to Parquet FLOAT physical type.
97
- */
98
- FLOAT = "FLOAT",
99
- /**
100
- * 64-bit IEEE 754 floating point.
101
- * Maps to Parquet DOUBLE physical type.
102
- */
103
- DOUBLE = "DOUBLE",
104
- /**
105
- * Raw binary data.
106
- * Maps to Parquet BYTE_ARRAY physical type.
107
- */
108
- BINARY = "BINARY",
109
- /**
110
- * Timestamp with millisecond precision.
111
- * Maps to Parquet INT64 with TIMESTAMP_MILLIS logical type.
112
- */
113
- TIMESTAMP_MILLIS = "TIMESTAMP_MILLIS",
114
- /**
115
- * Timestamp with microsecond precision.
116
- * Maps to Parquet INT64 with TIMESTAMP_MICROS logical type.
117
- */
118
- TIMESTAMP_MICROS = "TIMESTAMP_MICROS"
119
- }
120
- /**
121
- * Supported compression types for Parquet data.
122
- *
123
- * @description
124
- * Different compression algorithms offer trade-offs between compression
125
- * ratio, compression speed, and decompression speed.
126
- *
127
- * **Comparison:**
128
- * - SNAPPY: Fast compression/decompression, moderate ratio (default)
129
- * - GZIP: Higher ratio, slower compression, fast decompression
130
- * - ZSTD: Best ratio, good speed, requires more memory
131
- * - LZ4: Fastest, lower ratio
132
- * - UNCOMPRESSED: No compression overhead
133
- *
134
- * @example
135
- * ```typescript
136
- * const writer = createParquetWriter(schema, {
137
- * compression: ParquetCompression.ZSTD
138
- * })
139
- * ```
140
- *
141
- * @enum {string}
142
- */
143
- export declare enum ParquetCompression {
144
- /**
145
- * No compression applied.
146
- * Fastest writes, largest file size.
147
- */
148
- UNCOMPRESSED = "UNCOMPRESSED",
149
- /**
150
- * Snappy compression (default).
151
- * Good balance of speed and compression ratio.
152
- */
153
- SNAPPY = "SNAPPY",
154
- /**
155
- * GZIP compression.
156
- * Higher compression ratio, slower compression.
157
- */
158
- GZIP = "GZIP",
159
- /**
160
- * Zstandard compression.
161
- * Best compression ratio with good speed.
162
- */
163
- ZSTD = "ZSTD",
164
- /**
165
- * LZ4 compression.
166
- * Fastest compression, lower ratio.
167
- */
168
- LZ4 = "LZ4"
169
- }
170
- /**
171
- * Field definition for a Parquet schema.
172
- *
173
- * @description
174
- * Defines a single column in the Parquet schema, including its name,
175
- * data type, nullability, and optional metadata.
176
- *
177
- * @example
178
- * ```typescript
179
- * const nameField: ParquetField = {
180
- * name: 'user_name',
181
- * type: ParquetFieldType.STRING,
182
- * required: true,
183
- * metadata: { description: 'The user display name' }
184
- * }
185
- *
186
- * const ageField: ParquetField = {
187
- * name: 'age',
188
- * type: ParquetFieldType.INT32,
189
- * required: false // nullable
190
- * }
191
- * ```
192
- *
193
- * @interface ParquetField
194
- */
195
- export interface ParquetField {
196
- /**
197
- * Column name.
198
- * Must be unique within the schema and non-empty.
199
- */
200
- name: string;
201
- /**
202
- * Data type of the column.
203
- *
204
- * @see {@link ParquetFieldType}
205
- */
206
- type: ParquetFieldType;
207
- /**
208
- * Whether the field is required (non-nullable).
209
- * If true, null values will cause validation errors.
210
- */
211
- required: boolean;
212
- /**
213
- * Optional key-value metadata for the field.
214
- * Can be used for descriptions, units, etc.
215
- */
216
- metadata?: Record<string, string>;
217
- }
218
- /**
219
- * Parquet schema definition.
220
- *
221
- * @description
222
- * Defines the complete schema for a Parquet file, including all fields
223
- * and optional schema-level metadata.
224
- *
225
- * @example
226
- * ```typescript
227
- * const schema: ParquetSchema = {
228
- * fields: [
229
- * { name: 'id', type: ParquetFieldType.INT64, required: true },
230
- * { name: 'name', type: ParquetFieldType.STRING, required: true }
231
- * ],
232
- * metadata: {
233
- * created_by: 'gitdo',
234
- * version: '1.0'
235
- * }
236
- * }
237
- * ```
238
- *
239
- * @interface ParquetSchema
240
- */
241
- export interface ParquetSchema {
242
- /**
243
- * Array of field definitions for all columns.
244
- * Order determines column order in the file.
245
- */
246
- fields: ParquetField[];
247
- /**
248
- * Optional schema-level metadata.
249
- * Stored in the Parquet file footer.
250
- */
251
- metadata?: Record<string, string>;
252
- }
253
- /**
254
- * Options for creating a Parquet writer.
255
- *
256
- * @description
257
- * Configuration options that control how the Parquet file is written,
258
- * including row group sizing, compression, and statistics generation.
259
- *
260
- * @example
261
- * ```typescript
262
- * const options: ParquetWriteOptions = {
263
- * rowGroupSize: 50000, // 50K rows per group
264
- * rowGroupMemoryLimit: 64 * 1024 * 1024, // 64MB memory limit
265
- * compression: ParquetCompression.ZSTD,
266
- * columnCompression: {
267
- * 'binary_data': ParquetCompression.LZ4 // Fast for binary
268
- * },
269
- * enableStatistics: true,
270
- * sortBy: ['timestamp'],
271
- * partitionColumns: ['date']
272
- * }
273
- * ```
274
- *
275
- * @interface ParquetWriteOptions
276
- */
277
- export interface ParquetWriteOptions {
278
- /**
279
- * Maximum number of rows per row group.
280
- * Smaller groups = more granular reads, larger groups = better compression.
281
- *
282
- * @default 65536
283
- */
284
- rowGroupSize?: number;
285
- /**
286
- * Maximum memory size in bytes for a row group.
287
- * Triggers flush when reached, regardless of row count.
288
- */
289
- rowGroupMemoryLimit?: number;
290
- /**
291
- * Default compression algorithm for all columns.
292
- *
293
- * @default ParquetCompression.SNAPPY
294
- */
295
- compression?: ParquetCompression;
296
- /**
297
- * Per-column compression overrides.
298
- * Keys are column names, values are compression types.
299
- */
300
- columnCompression?: Record<string, ParquetCompression>;
301
- /**
302
- * Whether to compute and store column statistics.
303
- * Enables predicate pushdown during queries.
304
- *
305
- * @default false
306
- */
307
- enableStatistics?: boolean;
308
- /**
309
- * Columns to sort data by within each row group.
310
- * Improves query performance for sorted access patterns.
311
- */
312
- sortBy?: string[];
313
- /**
314
- * Columns used for partitioning.
315
- * Informational metadata for partitioned datasets.
316
- */
317
- partitionColumns?: string[];
318
- }
319
- /**
320
- * Statistics for a single column in a row group.
321
- *
322
- * @description
323
- * Column statistics enable query engines to skip row groups that don't
324
- * contain relevant data (predicate pushdown).
325
- *
326
- * @example
327
- * ```typescript
328
- * const stats: ColumnStatistics = {
329
- * min: 100,
330
- * max: 999,
331
- * nullCount: 5,
332
- * distinctCount: 850
333
- * }
334
- * ```
335
- *
336
- * @interface ColumnStatistics
337
- */
338
- export interface ColumnStatistics {
339
- /**
340
- * Minimum value in the column.
341
- * Type depends on column type.
342
- */
343
- min?: number | string | boolean;
344
- /**
345
- * Maximum value in the column.
346
- * Type depends on column type.
347
- */
348
- max?: number | string | boolean;
349
- /**
350
- * Number of null values in the column.
351
- */
352
- nullCount?: number;
353
- /**
354
- * Approximate distinct value count.
355
- * May not be exact for large datasets.
356
- */
357
- distinctCount?: number;
358
- }
359
- /**
360
- * Metadata for a column chunk within a row group.
361
- *
362
- * @description
363
- * Contains information about a single column's data within a row group,
364
- * including compression, sizes, and statistics.
365
- *
366
- * @interface ColumnChunkMetadata
367
- */
368
- export interface ColumnChunkMetadata {
369
- /**
370
- * Column name.
371
- */
372
- column: string;
373
- /**
374
- * Data type of the column.
375
- */
376
- type: ParquetFieldType;
377
- /**
378
- * Compression used for this column chunk.
379
- */
380
- compression: ParquetCompression;
381
- /**
382
- * Size in bytes after compression.
383
- */
384
- encodedSize: number;
385
- /**
386
- * Size in bytes before compression.
387
- */
388
- uncompressedSize: number;
389
- /**
390
- * Column statistics if statistics are enabled.
391
- */
392
- statistics?: ColumnStatistics;
393
- }
394
- /**
395
- * Row group representation in the Parquet file.
396
- *
397
- * @description
398
- * A row group is a horizontal partition of the data containing all columns
399
- * for a subset of rows. Row groups enable parallel processing and predicate
400
- * pushdown optimizations.
401
- *
402
- * @interface RowGroup
403
- */
404
- export interface RowGroup {
405
- /**
406
- * Number of rows in this row group.
407
- */
408
- numRows: number;
409
- /**
410
- * Total compressed size in bytes.
411
- */
412
- totalByteSize: number;
413
- /**
414
- * Metadata for each column chunk.
415
- */
416
- columns: ColumnChunkMetadata[];
417
- }
418
- /**
419
- * Complete metadata for a Parquet file.
420
- *
421
- * @description
422
- * Contains all metadata stored in the Parquet file footer, including
423
- * schema, row groups, and statistics. Used when reading files.
424
- *
425
- * @example
426
- * ```typescript
427
- * const metadata = getMetadata(parquetBuffer)
428
- * console.log(`Rows: ${metadata.numRows}`)
429
- * console.log(`Row groups: ${metadata.rowGroups.length}`)
430
- * console.log(`Compression: ${metadata.compression}`)
431
- * ```
432
- *
433
- * @interface ParquetMetadata
434
- */
435
- export interface ParquetMetadata {
436
- /**
437
- * The file's schema definition.
438
- */
439
- schema: ParquetSchema;
440
- /**
441
- * Total number of rows in the file.
442
- */
443
- numRows: number;
444
- /**
445
- * Array of row group metadata.
446
- */
447
- rowGroups: RowGroup[];
448
- /**
449
- * Default compression algorithm used.
450
- */
451
- compression: ParquetCompression;
452
- /**
453
- * Per-column compression settings.
454
- */
455
- columnMetadata?: Record<string, {
456
- compression: ParquetCompression;
457
- }>;
458
- /**
459
- * Custom key-value metadata.
460
- */
461
- keyValueMetadata?: Record<string, string>;
462
- /**
463
- * Unix timestamp when the file was created.
464
- */
465
- createdAt: number;
466
- /**
467
- * Total file size in bytes.
468
- */
469
- fileSize: number;
470
- /**
471
- * Columns the data is sorted by.
472
- */
473
- sortedBy?: string[];
474
- /**
475
- * Columns used for partitioning.
476
- */
477
- partitionColumns?: string[];
478
- }
479
- /**
480
- * Mock output stream interface for writing Parquet data.
481
- *
482
- * @description
483
- * Simple interface for streaming Parquet output to a destination.
484
- * Can be implemented for files, network streams, etc.
485
- *
486
- * @example
487
- * ```typescript
488
- * class BufferOutputStream implements OutputStream {
489
- * private chunks: Uint8Array[] = []
490
- *
491
- * write(data: Uint8Array): void {
492
- * this.chunks.push(data)
493
- * }
494
- *
495
- * getBuffer(): Uint8Array {
496
- * const total = this.chunks.reduce((sum, c) => sum + c.length, 0)
497
- * const result = new Uint8Array(total)
498
- * let offset = 0
499
- * for (const chunk of this.chunks) {
500
- * result.set(chunk, offset)
501
- * offset += chunk.length
502
- * }
503
- * return result
504
- * }
505
- * }
506
- * ```
507
- *
508
- * @interface OutputStream
509
- */
510
- export interface OutputStream {
511
- /**
512
- * Writes data to the output stream.
513
- *
514
- * @param data - The data to write
515
- */
516
- write(data: Uint8Array): void;
517
- }
518
- /**
519
- * Error class for Parquet-related operations.
520
- *
521
- * @description
522
- * Thrown when Parquet operations fail, such as schema validation errors,
523
- * invalid data types, or malformed files.
524
- *
525
- * @example
526
- * ```typescript
527
- * try {
528
- * await writer.writeRow({ invalid_field: 'value' })
529
- * } catch (error) {
530
- * if (error instanceof ParquetError) {
531
- * console.log(`Parquet error (${error.code}): ${error.message}`)
532
- * }
533
- * }
534
- * ```
535
- *
536
- * @class ParquetError
537
- * @extends Error
538
- */
539
- export declare class ParquetError extends Error {
540
- readonly code: string;
541
- /**
542
- * Creates a new ParquetError.
543
- *
544
- * @param message - Human-readable error message
545
- * @param code - Error code for programmatic handling
546
- *
547
- * @example
548
- * ```typescript
549
- * throw new ParquetError('Field name cannot be empty', 'EMPTY_FIELD_NAME')
550
- * ```
551
- */
552
- constructor(message: string, code: string);
553
- }
554
- /**
555
- * Parquet writer for git analytics data.
556
- *
557
- * @description
558
- * ParquetWriter provides a streaming interface for writing data to Parquet
559
- * format. It handles schema validation, row group management, compression,
560
- * and statistics generation.
561
- *
562
- * **Usage Pattern:**
563
- * 1. Create a schema using `defineSchema()`
564
- * 2. Create a writer with `createParquetWriter()` or `new ParquetWriter()`
565
- * 3. Write rows using `writeRow()` or `writeRows()`
566
- * 4. Generate the file with `toBuffer()` or `writeTo()`
567
- *
568
- * **Row Group Management:**
569
- * Rows are buffered in memory until the row group is full (by row count
570
- * or memory limit), then flushed. You can also manually flush with
571
- * `flushRowGroup()`.
572
- *
573
- * **Thread Safety:**
574
- * Not thread-safe. Use separate writer instances for concurrent writes.
575
- *
576
- * @example
577
- * ```typescript
578
- * // Create schema
579
- * const schema = defineSchema([
580
- * { name: 'sha', type: ParquetFieldType.STRING, required: true },
581
- * { name: 'type', type: ParquetFieldType.STRING, required: true },
582
- * { name: 'size', type: ParquetFieldType.INT64, required: true },
583
- * { name: 'timestamp', type: ParquetFieldType.TIMESTAMP_MILLIS, required: true }
584
- * ])
585
- *
586
- * // Create writer
587
- * const writer = new ParquetWriter(schema, {
588
- * rowGroupSize: 10000,
589
- * compression: ParquetCompression.SNAPPY,
590
- * enableStatistics: true
591
- * })
592
- *
593
- * // Write data
594
- * for (const object of gitObjects) {
595
- * await writer.writeRow({
596
- * sha: object.sha,
597
- * type: object.type,
598
- * size: object.size,
599
- * timestamp: Date.now()
600
- * })
601
- * }
602
- *
603
- * // Set custom metadata
604
- * writer.setMetadata('git_version', '2.40.0')
605
- * writer.setMetadata('repository', 'github.com/org/repo')
606
- *
607
- * // Generate file
608
- * const buffer = await writer.toBuffer()
609
- * console.log(`Generated ${buffer.length} bytes`)
610
- * console.log(`Rows: ${writer.rowCount}`)
611
- * console.log(`Row groups: ${writer.rowGroupCount}`)
612
- *
613
- * // Reset for reuse
614
- * writer.reset()
615
- * ```
616
- *
617
- * @class ParquetWriter
618
- */
619
- export declare class ParquetWriter {
620
- /**
621
- * The Parquet schema for this writer.
622
- * @readonly
623
- */
624
- readonly schema: ParquetSchema;
625
- /**
626
- * Resolved options with defaults applied.
627
- * @readonly
628
- */
629
- readonly options: Required<Pick<ParquetWriteOptions, 'rowGroupSize' | 'compression'>> & ParquetWriteOptions;
630
- /**
631
- * Total row count written.
632
- * @private
633
- */
634
- private _rowCount;
635
- /**
636
- * Completed row groups.
637
- * @private
638
- */
639
- private _rowGroups;
640
- /**
641
- * Current row group being built.
642
- * @private
643
- */
644
- private _currentRowGroup;
645
- /**
646
- * Whether the writer has been closed.
647
- * @private
648
- */
649
- private _isClosed;
650
- /**
651
- * Custom key-value metadata.
652
- * @private
653
- */
654
- private _keyValueMetadata;
655
- /**
656
- * Creation timestamp.
657
- * @private
658
- */
659
- private _createdAt;
660
- /**
661
- * Creates a new ParquetWriter instance.
662
- *
663
- * @param schema - The Parquet schema defining columns
664
- * @param options - Writer configuration options
665
- *
666
- * @example
667
- * ```typescript
668
- * const writer = new ParquetWriter(schema, {
669
- * rowGroupSize: 50000,
670
- * compression: ParquetCompression.GZIP
671
- * })
672
- * ```
673
- */
674
- constructor(schema: ParquetSchema, options?: ParquetWriteOptions);
675
- /**
676
- * Gets the total row count written to the writer.
677
- *
678
- * @description
679
- * Returns the total number of rows written, including rows in the
680
- * current unflushed row group.
681
- *
682
- * @returns Total row count
683
- *
684
- * @example
685
- * ```typescript
686
- * await writer.writeRows(data)
687
- * console.log(`Wrote ${writer.rowCount} rows`)
688
- * ```
689
- */
690
- get rowCount(): number;
691
- /**
692
- * Gets the number of row groups.
693
- *
694
- * @description
695
- * Returns the number of completed row groups plus one if there's
696
- * a pending row group with data.
697
- *
698
- * @returns Number of row groups
699
- *
700
- * @example
701
- * ```typescript
702
- * console.log(`Row groups: ${writer.rowGroupCount}`)
703
- * ```
704
- */
705
- get rowGroupCount(): number;
706
- /**
707
- * Checks if the writer has been closed.
708
- *
709
- * @description
710
- * A closed writer cannot accept new rows. Writers are closed
711
- * implicitly by `closeWriter()`.
712
- *
713
- * @returns true if closed
714
- *
715
- * @example
716
- * ```typescript
717
- * if (!writer.isClosed) {
718
- * await writer.writeRow(row)
719
- * }
720
- * ```
721
- */
722
- get isClosed(): boolean;
723
- /**
724
- * Writes a single row to the Parquet file.
725
- *
726
- * @description
727
- * Validates the row against the schema and adds it to the current
728
- * row group. Automatically flushes the row group when it reaches
729
- * the configured size or memory limit.
730
- *
731
- * @param row - Object with column values keyed by column name
732
- * @returns Promise that resolves when the row is written
733
- *
734
- * @throws {ParquetError} WRITER_CLOSED - If writer is closed
735
- * @throws {ParquetError} MISSING_REQUIRED_FIELD - If required field is missing
736
- * @throws {ParquetError} INVALID_FIELD_TYPE - If field value type doesn't match schema
737
- *
738
- * @example
739
- * ```typescript
740
- * await writer.writeRow({
741
- * id: 123,
742
- * name: 'Alice',
743
- * active: true
744
- * })
745
- * ```
746
- */
747
- writeRow(row: Record<string, unknown>): Promise<void>;
748
- /**
749
- * Writes multiple rows to the Parquet file.
750
- *
751
- * @description
752
- * Convenience method that writes an array of rows sequentially.
753
- * Each row is validated and may trigger row group flushes.
754
- *
755
- * @param rows - Array of row objects to write
756
- * @returns Promise that resolves when all rows are written
757
- *
758
- * @throws {ParquetError} Any error from writeRow()
759
- *
760
- * @example
761
- * ```typescript
762
- * await writer.writeRows([
763
- * { id: 1, name: 'Alice' },
764
- * { id: 2, name: 'Bob' },
765
- * { id: 3, name: 'Carol' }
766
- * ])
767
- * ```
768
- */
769
- writeRows(rows: Record<string, unknown>[]): Promise<void>;
770
- /**
771
- * Manually flushes the current row group.
772
- *
773
- * @description
774
- * Forces the current row group to be finalized and stored, even if
775
- * it hasn't reached the size limit. Has no effect if the current
776
- * row group is empty.
777
- *
778
- * @returns Promise that resolves when flush is complete
779
- *
780
- * @example
781
- * ```typescript
782
- * // Write some rows
783
- * await writer.writeRows(batch1)
784
- *
785
- * // Force flush before writing next batch
786
- * await writer.flushRowGroup()
787
- *
788
- * // Continue writing
789
- * await writer.writeRows(batch2)
790
- * ```
791
- */
792
- flushRowGroup(): Promise<void>;
793
- /**
794
- * Gets the current row group's memory size.
795
- *
796
- * @description
797
- * Returns the estimated memory consumption of the unflushed row group.
798
- * Useful for monitoring memory usage during streaming writes.
799
- *
800
- * @returns Memory size in bytes
801
- *
802
- * @example
803
- * ```typescript
804
- * if (writer.currentRowGroupMemorySize() > 50 * 1024 * 1024) {
805
- * console.log('Row group using significant memory')
806
- * await writer.flushRowGroup()
807
- * }
808
- * ```
809
- */
810
- currentRowGroupMemorySize(): number;
811
- /**
812
- * Gets the completed row groups.
813
- *
814
- * @description
815
- * Returns a copy of the completed row group metadata array.
816
- * Does not include the current unflushed row group.
817
- *
818
- * @returns Array of row group metadata
819
- *
820
- * @example
821
- * ```typescript
822
- * for (const rg of writer.getRowGroups()) {
823
- * console.log(`Row group: ${rg.numRows} rows, ${rg.totalByteSize} bytes`)
824
- * }
825
- * ```
826
- */
827
- getRowGroups(): RowGroup[];
828
- /**
829
- * Sets a custom key-value metadata entry.
830
- *
831
- * @description
832
- * Adds custom metadata that will be stored in the Parquet file footer.
833
- * Can be used for versioning, provenance, or application-specific data.
834
- *
835
- * @param key - Metadata key
836
- * @param value - Metadata value
837
- *
838
- * @example
839
- * ```typescript
840
- * writer.setMetadata('created_by', 'gitdo-analytics')
841
- * writer.setMetadata('schema_version', '2.0')
842
- * writer.setMetadata('repository', 'github.com/org/repo')
843
- * ```
844
- */
845
- setMetadata(key: string, value: string): void;
846
- /**
847
- * Generates the Parquet file as a buffer.
848
- *
849
- * @description
850
- * Finalizes the file by flushing any remaining rows and generating
851
- * the complete Parquet file structure including header, row groups,
852
- * and footer with metadata.
853
- *
854
- * @returns Promise resolving to the complete Parquet file as Uint8Array
855
- *
856
- * @example
857
- * ```typescript
858
- * const buffer = await writer.toBuffer()
859
- * await fs.writeFile('data.parquet', buffer)
860
- * ```
861
- */
862
- toBuffer(): Promise<Uint8Array>;
863
- /**
864
- * Writes the Parquet file to an output stream.
865
- *
866
- * @description
867
- * Generates the file and writes it to the provided output stream.
868
- * Useful for streaming to files or network destinations.
869
- *
870
- * @param output - The output stream to write to
871
- * @returns Promise that resolves when writing is complete
872
- *
873
- * @example
874
- * ```typescript
875
- * const output = new FileOutputStream('data.parquet')
876
- * await writer.writeTo(output)
877
- * output.close()
878
- * ```
879
- */
880
- writeTo(output: OutputStream): Promise<void>;
881
- /**
882
- * Resets the writer to its initial state.
883
- *
884
- * @description
885
- * Clears all written data, row groups, and metadata. The schema
886
- * and options remain unchanged. Useful for writing multiple files
887
- * with the same configuration.
888
- *
889
- * @example
890
- * ```typescript
891
- * // Write first file
892
- * await writer.writeRows(batch1)
893
- * const file1 = await writer.toBuffer()
894
- *
895
- * // Reset and write second file
896
- * writer.reset()
897
- * await writer.writeRows(batch2)
898
- * const file2 = await writer.toBuffer()
899
- * ```
900
- */
901
- reset(): void;
902
- /**
903
- * Validates a row against the schema.
904
- *
905
- * @param row - The row to validate
906
- * @throws {ParquetError} If validation fails
907
- * @private
908
- */
909
- private _validateRow;
910
- /**
911
- * Validates a value matches the expected Parquet type.
912
- *
913
- * @param value - The value to validate
914
- * @param type - The expected Parquet type
915
- * @returns true if valid, false otherwise
916
- * @private
917
- */
918
- private _validateType;
919
- /**
920
- * Estimates the memory size of a row.
921
- *
922
- * @param row - The row to estimate
923
- * @returns Estimated size in bytes
924
- * @private
925
- */
926
- private _estimateRowSize;
927
- /**
928
- * Builds a row group from internal representation.
929
- *
930
- * @param internal - The internal row group data
931
- * @returns The row group metadata
932
- * @private
933
- */
934
- private _buildRowGroup;
935
- /**
936
- * Computes statistics for a column.
937
- *
938
- * @param values - The column values
939
- * @param type - The column type
940
- * @returns Column statistics
941
- * @private
942
- */
943
- private _computeStatistics;
944
- /**
945
- * Estimates the encoded size after compression.
946
- *
947
- * @param values - The column values
948
- * @param type - The column type
949
- * @param compression - The compression type
950
- * @returns Estimated compressed size in bytes
951
- * @private
952
- */
953
- private _estimateEncodedSize;
954
- /**
955
- * Estimates the uncompressed size of column values.
956
- *
957
- * @param values - The column values
958
- * @param type - The column type
959
- * @returns Estimated uncompressed size in bytes
960
- * @private
961
- */
962
- private _estimateUncompressedSize;
963
- /**
964
- * Generates the complete Parquet file bytes.
965
- *
966
- * @returns The complete Parquet file as Uint8Array
967
- * @private
968
- */
969
- private _generateParquetBytes;
970
- /**
971
- * Simple compression simulation for non-gzip formats.
972
- *
973
- * @param data - Data to compress
974
- * @param compression - Compression type
975
- * @returns Compressed data
976
- * @private
977
- */
978
- private _simpleCompress;
979
- }
980
- /**
981
- * Defines a Parquet schema.
982
- *
983
- * @description
984
- * Creates a validated Parquet schema from field definitions. Validates that:
985
- * - Schema has at least one field
986
- * - All field names are non-empty
987
- * - All field names are unique
988
- *
989
- * @param fields - Array of field definitions
990
- * @param metadata - Optional schema-level metadata
991
- * @returns Validated Parquet schema
992
- *
993
- * @throws {ParquetError} EMPTY_SCHEMA - If fields array is empty
994
- * @throws {ParquetError} EMPTY_FIELD_NAME - If any field name is empty
995
- * @throws {ParquetError} DUPLICATE_FIELD - If field names are not unique
996
- *
997
- * @example
998
- * ```typescript
999
- * const schema = defineSchema([
1000
- * { name: 'id', type: ParquetFieldType.INT64, required: true },
1001
- * { name: 'name', type: ParquetFieldType.STRING, required: true },
1002
- * { name: 'age', type: ParquetFieldType.INT32, required: false },
1003
- * { name: 'created_at', type: ParquetFieldType.TIMESTAMP_MILLIS, required: true }
1004
- * ], {
1005
- * version: '1.0',
1006
- * description: 'User records'
1007
- * })
1008
- * ```
1009
- */
1010
- export declare function defineSchema(fields: ParquetField[], metadata?: Record<string, string>): ParquetSchema;
1011
- /**
1012
- * Creates a Parquet writer.
1013
- *
1014
- * @description
1015
- * Factory function to create a ParquetWriter with the specified schema
1016
- * and options. Equivalent to `new ParquetWriter(schema, options)`.
1017
- *
1018
- * @param schema - The Parquet schema
1019
- * @param options - Writer options
1020
- * @returns A new ParquetWriter instance
1021
- *
1022
- * @example
1023
- * ```typescript
1024
- * const writer = createParquetWriter(schema, {
1025
- * rowGroupSize: 10000,
1026
- * compression: ParquetCompression.SNAPPY
1027
- * })
1028
- * ```
1029
- */
1030
- export declare function createParquetWriter(schema: ParquetSchema, options?: ParquetWriteOptions): ParquetWriter;
1031
- /**
1032
- * Writes data directly to a Parquet file buffer.
1033
- *
1034
- * @description
1035
- * Convenience function that creates a writer, writes all rows, and returns
1036
- * the complete Parquet file. Useful for simple one-shot writes.
1037
- *
1038
- * @param schema - The Parquet schema
1039
- * @param rows - Array of rows to write
1040
- * @param options - Writer options
1041
- * @returns Promise resolving to the complete Parquet file as Uint8Array
1042
- *
1043
- * @example
1044
- * ```typescript
1045
- * const buffer = await writeParquetFile(schema, [
1046
- * { id: 1, name: 'Alice' },
1047
- * { id: 2, name: 'Bob' }
1048
- * ], {
1049
- * compression: ParquetCompression.GZIP
1050
- * })
1051
- *
1052
- * await fs.writeFile('data.parquet', buffer)
1053
- * ```
1054
- */
1055
- export declare function writeParquetFile(schema: ParquetSchema, rows: Record<string, unknown>[], options?: ParquetWriteOptions): Promise<Uint8Array>;
1056
- /**
1057
- * Closes a writer and returns the final buffer.
1058
- *
1059
- * @description
1060
- * Generates the final Parquet file buffer and marks the writer as closed.
1061
- * The writer cannot be used for further writes after calling this function.
1062
- *
1063
- * @param writer - The ParquetWriter to close
1064
- * @returns Promise resolving to the complete Parquet file as Uint8Array
1065
- *
1066
- * @example
1067
- * ```typescript
1068
- * await writer.writeRows(data)
1069
- * const buffer = await closeWriter(writer)
1070
- * console.log(writer.isClosed) // true
1071
- * ```
1072
- */
1073
- export declare function closeWriter(writer: ParquetWriter): Promise<Uint8Array>;
1074
- /**
1075
- * Adds a row group to the writer.
1076
- *
1077
- * @description
1078
- * Writes multiple rows and then flushes them as a single row group.
1079
- * Useful when you want explicit control over row group boundaries.
1080
- *
1081
- * @param writer - The ParquetWriter to use
1082
- * @param rows - Array of rows for this row group
1083
- * @returns Promise that resolves when the row group is written
1084
- *
1085
- * @example
1086
- * ```typescript
1087
- * // Add explicit row groups
1088
- * await addRowGroup(writer, batch1) // First row group
1089
- * await addRowGroup(writer, batch2) // Second row group
1090
- * ```
1091
- */
1092
- export declare function addRowGroup(writer: ParquetWriter, rows: Record<string, unknown>[]): Promise<void>;
1093
- /**
1094
- * Gets metadata from a Parquet file buffer.
1095
- *
1096
- * @description
1097
- * Parses a Parquet file buffer and extracts the metadata including
1098
- * schema, row groups, compression settings, and custom metadata.
1099
- *
1100
- * @param bytes - The Parquet file buffer
1101
- * @returns The parsed metadata
1102
- *
1103
- * @throws {ParquetError} INVALID_MAGIC - If file doesn't have valid Parquet magic bytes
1104
- *
1105
- * @example
1106
- * ```typescript
1107
- * const buffer = await fs.readFile('data.parquet')
1108
- * const metadata = getMetadata(buffer)
1109
- *
1110
- * console.log(`Rows: ${metadata.numRows}`)
1111
- * console.log(`Schema: ${metadata.schema.fields.map(f => f.name).join(', ')}`)
1112
- * console.log(`Row groups: ${metadata.rowGroups.length}`)
1113
- *
1114
- * for (const rg of metadata.rowGroups) {
1115
- * console.log(` - ${rg.numRows} rows, ${rg.totalByteSize} bytes`)
1116
- * }
1117
- * ```
1118
- */
1119
- export declare function getMetadata(bytes: Uint8Array): ParquetMetadata;
1120
- /**
1121
- * Sets the compression type for a writer.
1122
- *
1123
- * @description
1124
- * Updates the default compression algorithm for a writer. Affects all
1125
- * subsequently written data. Columns with explicit compression settings
1126
- * in columnCompression are not affected.
1127
- *
1128
- * @param writer - The ParquetWriter to update
1129
- * @param compression - The new compression type
1130
- *
1131
- * @example
1132
- * ```typescript
1133
- * const writer = createParquetWriter(schema)
1134
- *
1135
- * // Write some rows with SNAPPY (default)
1136
- * await writer.writeRows(batch1)
1137
- * await writer.flushRowGroup()
1138
- *
1139
- * // Switch to GZIP for remaining data
1140
- * setCompression(writer, ParquetCompression.GZIP)
1141
- * await writer.writeRows(batch2)
1142
- * ```
1143
- */
1144
- export declare function setCompression(writer: ParquetWriter, compression: ParquetCompression): void;
1145
- //# sourceMappingURL=parquet-writer.d.ts.map