gitx.do 0.1.0 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (344) hide show
  1. package/README.md +40 -353
  2. package/dist/do/logger.d.ts +50 -0
  3. package/dist/do/logger.d.ts.map +1 -0
  4. package/dist/do/logger.js +122 -0
  5. package/dist/do/logger.js.map +1 -0
  6. package/dist/{durable-object → do}/schema.d.ts +3 -3
  7. package/dist/do/schema.d.ts.map +1 -0
  8. package/dist/{durable-object → do}/schema.js +4 -3
  9. package/dist/do/schema.js.map +1 -0
  10. package/dist/do/types.d.ts +267 -0
  11. package/dist/do/types.d.ts.map +1 -0
  12. package/dist/do/types.js +62 -0
  13. package/dist/do/types.js.map +1 -0
  14. package/dist/index.d.ts +15 -415
  15. package/dist/index.d.ts.map +1 -1
  16. package/dist/index.js +31 -483
  17. package/dist/index.js.map +1 -1
  18. package/package.json +13 -21
  19. package/dist/cli/commands/add.d.ts +0 -174
  20. package/dist/cli/commands/add.d.ts.map +0 -1
  21. package/dist/cli/commands/add.js +0 -131
  22. package/dist/cli/commands/add.js.map +0 -1
  23. package/dist/cli/commands/blame.d.ts +0 -259
  24. package/dist/cli/commands/blame.d.ts.map +0 -1
  25. package/dist/cli/commands/blame.js +0 -609
  26. package/dist/cli/commands/blame.js.map +0 -1
  27. package/dist/cli/commands/branch.d.ts +0 -249
  28. package/dist/cli/commands/branch.d.ts.map +0 -1
  29. package/dist/cli/commands/branch.js +0 -693
  30. package/dist/cli/commands/branch.js.map +0 -1
  31. package/dist/cli/commands/commit.d.ts +0 -182
  32. package/dist/cli/commands/commit.d.ts.map +0 -1
  33. package/dist/cli/commands/commit.js +0 -437
  34. package/dist/cli/commands/commit.js.map +0 -1
  35. package/dist/cli/commands/diff.d.ts +0 -464
  36. package/dist/cli/commands/diff.d.ts.map +0 -1
  37. package/dist/cli/commands/diff.js +0 -958
  38. package/dist/cli/commands/diff.js.map +0 -1
  39. package/dist/cli/commands/log.d.ts +0 -239
  40. package/dist/cli/commands/log.d.ts.map +0 -1
  41. package/dist/cli/commands/log.js +0 -535
  42. package/dist/cli/commands/log.js.map +0 -1
  43. package/dist/cli/commands/merge.d.ts +0 -106
  44. package/dist/cli/commands/merge.d.ts.map +0 -1
  45. package/dist/cli/commands/merge.js +0 -55
  46. package/dist/cli/commands/merge.js.map +0 -1
  47. package/dist/cli/commands/review.d.ts +0 -457
  48. package/dist/cli/commands/review.d.ts.map +0 -1
  49. package/dist/cli/commands/review.js +0 -533
  50. package/dist/cli/commands/review.js.map +0 -1
  51. package/dist/cli/commands/status.d.ts +0 -269
  52. package/dist/cli/commands/status.d.ts.map +0 -1
  53. package/dist/cli/commands/status.js +0 -493
  54. package/dist/cli/commands/status.js.map +0 -1
  55. package/dist/cli/commands/web.d.ts +0 -199
  56. package/dist/cli/commands/web.d.ts.map +0 -1
  57. package/dist/cli/commands/web.js +0 -696
  58. package/dist/cli/commands/web.js.map +0 -1
  59. package/dist/cli/fs-adapter.d.ts +0 -656
  60. package/dist/cli/fs-adapter.d.ts.map +0 -1
  61. package/dist/cli/fs-adapter.js +0 -1179
  62. package/dist/cli/fs-adapter.js.map +0 -1
  63. package/dist/cli/fsx-cli-adapter.d.ts +0 -359
  64. package/dist/cli/fsx-cli-adapter.d.ts.map +0 -1
  65. package/dist/cli/fsx-cli-adapter.js +0 -619
  66. package/dist/cli/fsx-cli-adapter.js.map +0 -1
  67. package/dist/cli/index.d.ts +0 -387
  68. package/dist/cli/index.d.ts.map +0 -1
  69. package/dist/cli/index.js +0 -523
  70. package/dist/cli/index.js.map +0 -1
  71. package/dist/cli/ui/components/DiffView.d.ts +0 -7
  72. package/dist/cli/ui/components/DiffView.d.ts.map +0 -1
  73. package/dist/cli/ui/components/DiffView.js +0 -11
  74. package/dist/cli/ui/components/DiffView.js.map +0 -1
  75. package/dist/cli/ui/components/ErrorDisplay.d.ts +0 -6
  76. package/dist/cli/ui/components/ErrorDisplay.d.ts.map +0 -1
  77. package/dist/cli/ui/components/ErrorDisplay.js +0 -11
  78. package/dist/cli/ui/components/ErrorDisplay.js.map +0 -1
  79. package/dist/cli/ui/components/FuzzySearch.d.ts +0 -9
  80. package/dist/cli/ui/components/FuzzySearch.d.ts.map +0 -1
  81. package/dist/cli/ui/components/FuzzySearch.js +0 -12
  82. package/dist/cli/ui/components/FuzzySearch.js.map +0 -1
  83. package/dist/cli/ui/components/LoadingSpinner.d.ts +0 -6
  84. package/dist/cli/ui/components/LoadingSpinner.d.ts.map +0 -1
  85. package/dist/cli/ui/components/LoadingSpinner.js +0 -10
  86. package/dist/cli/ui/components/LoadingSpinner.js.map +0 -1
  87. package/dist/cli/ui/components/NavigationList.d.ts +0 -9
  88. package/dist/cli/ui/components/NavigationList.d.ts.map +0 -1
  89. package/dist/cli/ui/components/NavigationList.js +0 -11
  90. package/dist/cli/ui/components/NavigationList.js.map +0 -1
  91. package/dist/cli/ui/components/ScrollableContent.d.ts +0 -8
  92. package/dist/cli/ui/components/ScrollableContent.d.ts.map +0 -1
  93. package/dist/cli/ui/components/ScrollableContent.js +0 -11
  94. package/dist/cli/ui/components/ScrollableContent.js.map +0 -1
  95. package/dist/cli/ui/components/index.d.ts +0 -7
  96. package/dist/cli/ui/components/index.d.ts.map +0 -1
  97. package/dist/cli/ui/components/index.js +0 -9
  98. package/dist/cli/ui/components/index.js.map +0 -1
  99. package/dist/cli/ui/terminal-ui.d.ts +0 -52
  100. package/dist/cli/ui/terminal-ui.d.ts.map +0 -1
  101. package/dist/cli/ui/terminal-ui.js +0 -121
  102. package/dist/cli/ui/terminal-ui.js.map +0 -1
  103. package/dist/do/BashModule.d.ts +0 -871
  104. package/dist/do/BashModule.d.ts.map +0 -1
  105. package/dist/do/BashModule.js +0 -1143
  106. package/dist/do/BashModule.js.map +0 -1
  107. package/dist/do/FsModule.d.ts +0 -601
  108. package/dist/do/FsModule.d.ts.map +0 -1
  109. package/dist/do/FsModule.js +0 -1120
  110. package/dist/do/FsModule.js.map +0 -1
  111. package/dist/do/GitModule.d.ts +0 -635
  112. package/dist/do/GitModule.d.ts.map +0 -1
  113. package/dist/do/GitModule.js +0 -781
  114. package/dist/do/GitModule.js.map +0 -1
  115. package/dist/do/GitRepoDO.d.ts +0 -281
  116. package/dist/do/GitRepoDO.d.ts.map +0 -1
  117. package/dist/do/GitRepoDO.js +0 -479
  118. package/dist/do/GitRepoDO.js.map +0 -1
  119. package/dist/do/bash-ast.d.ts +0 -246
  120. package/dist/do/bash-ast.d.ts.map +0 -1
  121. package/dist/do/bash-ast.js +0 -888
  122. package/dist/do/bash-ast.js.map +0 -1
  123. package/dist/do/container-executor.d.ts +0 -491
  124. package/dist/do/container-executor.d.ts.map +0 -1
  125. package/dist/do/container-executor.js +0 -730
  126. package/dist/do/container-executor.js.map +0 -1
  127. package/dist/do/index.d.ts +0 -53
  128. package/dist/do/index.d.ts.map +0 -1
  129. package/dist/do/index.js +0 -91
  130. package/dist/do/index.js.map +0 -1
  131. package/dist/do/tiered-storage.d.ts +0 -403
  132. package/dist/do/tiered-storage.d.ts.map +0 -1
  133. package/dist/do/tiered-storage.js +0 -689
  134. package/dist/do/tiered-storage.js.map +0 -1
  135. package/dist/do/withBash.d.ts +0 -231
  136. package/dist/do/withBash.d.ts.map +0 -1
  137. package/dist/do/withBash.js +0 -244
  138. package/dist/do/withBash.js.map +0 -1
  139. package/dist/do/withFs.d.ts +0 -237
  140. package/dist/do/withFs.d.ts.map +0 -1
  141. package/dist/do/withFs.js +0 -387
  142. package/dist/do/withFs.js.map +0 -1
  143. package/dist/do/withGit.d.ts +0 -180
  144. package/dist/do/withGit.d.ts.map +0 -1
  145. package/dist/do/withGit.js +0 -271
  146. package/dist/do/withGit.js.map +0 -1
  147. package/dist/durable-object/object-store.d.ts +0 -633
  148. package/dist/durable-object/object-store.d.ts.map +0 -1
  149. package/dist/durable-object/object-store.js +0 -1161
  150. package/dist/durable-object/object-store.js.map +0 -1
  151. package/dist/durable-object/schema.d.ts.map +0 -1
  152. package/dist/durable-object/schema.js.map +0 -1
  153. package/dist/durable-object/wal.d.ts +0 -416
  154. package/dist/durable-object/wal.d.ts.map +0 -1
  155. package/dist/durable-object/wal.js +0 -445
  156. package/dist/durable-object/wal.js.map +0 -1
  157. package/dist/mcp/adapter.d.ts +0 -772
  158. package/dist/mcp/adapter.d.ts.map +0 -1
  159. package/dist/mcp/adapter.js +0 -895
  160. package/dist/mcp/adapter.js.map +0 -1
  161. package/dist/mcp/sandbox/miniflare-evaluator.d.ts +0 -22
  162. package/dist/mcp/sandbox/miniflare-evaluator.d.ts.map +0 -1
  163. package/dist/mcp/sandbox/miniflare-evaluator.js +0 -140
  164. package/dist/mcp/sandbox/miniflare-evaluator.js.map +0 -1
  165. package/dist/mcp/sandbox/object-store-proxy.d.ts +0 -32
  166. package/dist/mcp/sandbox/object-store-proxy.d.ts.map +0 -1
  167. package/dist/mcp/sandbox/object-store-proxy.js +0 -30
  168. package/dist/mcp/sandbox/object-store-proxy.js.map +0 -1
  169. package/dist/mcp/sandbox/template.d.ts +0 -17
  170. package/dist/mcp/sandbox/template.d.ts.map +0 -1
  171. package/dist/mcp/sandbox/template.js +0 -71
  172. package/dist/mcp/sandbox/template.js.map +0 -1
  173. package/dist/mcp/sandbox.d.ts +0 -764
  174. package/dist/mcp/sandbox.d.ts.map +0 -1
  175. package/dist/mcp/sandbox.js +0 -1362
  176. package/dist/mcp/sandbox.js.map +0 -1
  177. package/dist/mcp/sdk-adapter.d.ts +0 -835
  178. package/dist/mcp/sdk-adapter.d.ts.map +0 -1
  179. package/dist/mcp/sdk-adapter.js +0 -974
  180. package/dist/mcp/sdk-adapter.js.map +0 -1
  181. package/dist/mcp/tools/do.d.ts +0 -32
  182. package/dist/mcp/tools/do.d.ts.map +0 -1
  183. package/dist/mcp/tools/do.js +0 -115
  184. package/dist/mcp/tools/do.js.map +0 -1
  185. package/dist/mcp/tools.d.ts +0 -548
  186. package/dist/mcp/tools.d.ts.map +0 -1
  187. package/dist/mcp/tools.js +0 -1934
  188. package/dist/mcp/tools.js.map +0 -1
  189. package/dist/ops/blame.d.ts +0 -551
  190. package/dist/ops/blame.d.ts.map +0 -1
  191. package/dist/ops/blame.js +0 -1037
  192. package/dist/ops/blame.js.map +0 -1
  193. package/dist/ops/branch.d.ts +0 -766
  194. package/dist/ops/branch.d.ts.map +0 -1
  195. package/dist/ops/branch.js +0 -950
  196. package/dist/ops/branch.js.map +0 -1
  197. package/dist/ops/commit-traversal.d.ts +0 -349
  198. package/dist/ops/commit-traversal.d.ts.map +0 -1
  199. package/dist/ops/commit-traversal.js +0 -821
  200. package/dist/ops/commit-traversal.js.map +0 -1
  201. package/dist/ops/commit.d.ts +0 -555
  202. package/dist/ops/commit.d.ts.map +0 -1
  203. package/dist/ops/commit.js +0 -826
  204. package/dist/ops/commit.js.map +0 -1
  205. package/dist/ops/merge-base.d.ts +0 -397
  206. package/dist/ops/merge-base.d.ts.map +0 -1
  207. package/dist/ops/merge-base.js +0 -691
  208. package/dist/ops/merge-base.js.map +0 -1
  209. package/dist/ops/merge.d.ts +0 -855
  210. package/dist/ops/merge.d.ts.map +0 -1
  211. package/dist/ops/merge.js +0 -1551
  212. package/dist/ops/merge.js.map +0 -1
  213. package/dist/ops/tag.d.ts +0 -247
  214. package/dist/ops/tag.d.ts.map +0 -1
  215. package/dist/ops/tag.js +0 -649
  216. package/dist/ops/tag.js.map +0 -1
  217. package/dist/ops/tree-builder.d.ts +0 -178
  218. package/dist/ops/tree-builder.d.ts.map +0 -1
  219. package/dist/ops/tree-builder.js +0 -271
  220. package/dist/ops/tree-builder.js.map +0 -1
  221. package/dist/ops/tree-diff.d.ts +0 -291
  222. package/dist/ops/tree-diff.d.ts.map +0 -1
  223. package/dist/ops/tree-diff.js +0 -705
  224. package/dist/ops/tree-diff.js.map +0 -1
  225. package/dist/pack/delta.d.ts +0 -248
  226. package/dist/pack/delta.d.ts.map +0 -1
  227. package/dist/pack/delta.js +0 -736
  228. package/dist/pack/delta.js.map +0 -1
  229. package/dist/pack/format.d.ts +0 -446
  230. package/dist/pack/format.d.ts.map +0 -1
  231. package/dist/pack/format.js +0 -572
  232. package/dist/pack/format.js.map +0 -1
  233. package/dist/pack/full-generation.d.ts +0 -612
  234. package/dist/pack/full-generation.d.ts.map +0 -1
  235. package/dist/pack/full-generation.js +0 -1378
  236. package/dist/pack/full-generation.js.map +0 -1
  237. package/dist/pack/generation.d.ts +0 -441
  238. package/dist/pack/generation.d.ts.map +0 -1
  239. package/dist/pack/generation.js +0 -707
  240. package/dist/pack/generation.js.map +0 -1
  241. package/dist/pack/index.d.ts +0 -502
  242. package/dist/pack/index.d.ts.map +0 -1
  243. package/dist/pack/index.js +0 -833
  244. package/dist/pack/index.js.map +0 -1
  245. package/dist/refs/branch.d.ts +0 -668
  246. package/dist/refs/branch.d.ts.map +0 -1
  247. package/dist/refs/branch.js +0 -897
  248. package/dist/refs/branch.js.map +0 -1
  249. package/dist/refs/storage.d.ts +0 -833
  250. package/dist/refs/storage.d.ts.map +0 -1
  251. package/dist/refs/storage.js +0 -1023
  252. package/dist/refs/storage.js.map +0 -1
  253. package/dist/refs/tag.d.ts +0 -860
  254. package/dist/refs/tag.d.ts.map +0 -1
  255. package/dist/refs/tag.js +0 -996
  256. package/dist/refs/tag.js.map +0 -1
  257. package/dist/storage/backend.d.ts +0 -425
  258. package/dist/storage/backend.d.ts.map +0 -1
  259. package/dist/storage/backend.js +0 -41
  260. package/dist/storage/backend.js.map +0 -1
  261. package/dist/storage/fsx-adapter.d.ts +0 -204
  262. package/dist/storage/fsx-adapter.d.ts.map +0 -1
  263. package/dist/storage/fsx-adapter.js +0 -470
  264. package/dist/storage/fsx-adapter.js.map +0 -1
  265. package/dist/storage/lru-cache.d.ts +0 -691
  266. package/dist/storage/lru-cache.d.ts.map +0 -1
  267. package/dist/storage/lru-cache.js +0 -813
  268. package/dist/storage/lru-cache.js.map +0 -1
  269. package/dist/storage/object-index.d.ts +0 -585
  270. package/dist/storage/object-index.d.ts.map +0 -1
  271. package/dist/storage/object-index.js +0 -532
  272. package/dist/storage/object-index.js.map +0 -1
  273. package/dist/storage/r2-pack.d.ts +0 -1257
  274. package/dist/storage/r2-pack.d.ts.map +0 -1
  275. package/dist/storage/r2-pack.js +0 -1770
  276. package/dist/storage/r2-pack.js.map +0 -1
  277. package/dist/tiered/cdc-pipeline.d.ts +0 -1888
  278. package/dist/tiered/cdc-pipeline.d.ts.map +0 -1
  279. package/dist/tiered/cdc-pipeline.js +0 -1880
  280. package/dist/tiered/cdc-pipeline.js.map +0 -1
  281. package/dist/tiered/migration.d.ts +0 -1104
  282. package/dist/tiered/migration.d.ts.map +0 -1
  283. package/dist/tiered/migration.js +0 -1214
  284. package/dist/tiered/migration.js.map +0 -1
  285. package/dist/tiered/parquet-writer.d.ts +0 -1145
  286. package/dist/tiered/parquet-writer.d.ts.map +0 -1
  287. package/dist/tiered/parquet-writer.js +0 -1183
  288. package/dist/tiered/parquet-writer.js.map +0 -1
  289. package/dist/tiered/read-path.d.ts +0 -835
  290. package/dist/tiered/read-path.d.ts.map +0 -1
  291. package/dist/tiered/read-path.js +0 -487
  292. package/dist/tiered/read-path.js.map +0 -1
  293. package/dist/types/capability.d.ts +0 -1385
  294. package/dist/types/capability.d.ts.map +0 -1
  295. package/dist/types/capability.js +0 -36
  296. package/dist/types/capability.js.map +0 -1
  297. package/dist/types/index.d.ts +0 -13
  298. package/dist/types/index.d.ts.map +0 -1
  299. package/dist/types/index.js +0 -18
  300. package/dist/types/index.js.map +0 -1
  301. package/dist/types/objects.d.ts +0 -692
  302. package/dist/types/objects.d.ts.map +0 -1
  303. package/dist/types/objects.js +0 -837
  304. package/dist/types/objects.js.map +0 -1
  305. package/dist/types/storage.d.ts +0 -603
  306. package/dist/types/storage.d.ts.map +0 -1
  307. package/dist/types/storage.js +0 -191
  308. package/dist/types/storage.js.map +0 -1
  309. package/dist/types/worker-loader.d.ts +0 -60
  310. package/dist/types/worker-loader.d.ts.map +0 -1
  311. package/dist/types/worker-loader.js +0 -62
  312. package/dist/types/worker-loader.js.map +0 -1
  313. package/dist/utils/hash.d.ts +0 -197
  314. package/dist/utils/hash.d.ts.map +0 -1
  315. package/dist/utils/hash.js +0 -268
  316. package/dist/utils/hash.js.map +0 -1
  317. package/dist/utils/sha1.d.ts +0 -290
  318. package/dist/utils/sha1.d.ts.map +0 -1
  319. package/dist/utils/sha1.js +0 -582
  320. package/dist/utils/sha1.js.map +0 -1
  321. package/dist/wire/capabilities.d.ts +0 -1044
  322. package/dist/wire/capabilities.d.ts.map +0 -1
  323. package/dist/wire/capabilities.js +0 -941
  324. package/dist/wire/capabilities.js.map +0 -1
  325. package/dist/wire/path-security.d.ts +0 -157
  326. package/dist/wire/path-security.d.ts.map +0 -1
  327. package/dist/wire/path-security.js +0 -307
  328. package/dist/wire/path-security.js.map +0 -1
  329. package/dist/wire/pkt-line.d.ts +0 -345
  330. package/dist/wire/pkt-line.d.ts.map +0 -1
  331. package/dist/wire/pkt-line.js +0 -381
  332. package/dist/wire/pkt-line.js.map +0 -1
  333. package/dist/wire/receive-pack.d.ts +0 -1059
  334. package/dist/wire/receive-pack.d.ts.map +0 -1
  335. package/dist/wire/receive-pack.js +0 -1414
  336. package/dist/wire/receive-pack.js.map +0 -1
  337. package/dist/wire/smart-http.d.ts +0 -799
  338. package/dist/wire/smart-http.d.ts.map +0 -1
  339. package/dist/wire/smart-http.js +0 -945
  340. package/dist/wire/smart-http.js.map +0 -1
  341. package/dist/wire/upload-pack.d.ts +0 -727
  342. package/dist/wire/upload-pack.d.ts.map +0 -1
  343. package/dist/wire/upload-pack.js +0 -1138
  344. package/dist/wire/upload-pack.js.map +0 -1
@@ -1,1880 +0,0 @@
1
- /**
2
- * @fileoverview CDC (Change Data Capture) Pipeline for Git Operations
3
- *
4
- * @description
5
- * This module provides a comprehensive Change Data Capture system for Git operations,
6
- * enabling real-time event streaming, transformation, and analytics for Git repository events.
7
- *
8
- * ## Key Features
9
- *
10
- * - **Event Capture**: Captures git operations (push, fetch, commits, branches, tags, merges)
11
- * - **Parquet Transformation**: Converts events to columnar Parquet format for analytics
12
- * - **Batching**: Efficient event batching with configurable size and time-based flushing
13
- * - **Retry Policies**: Configurable exponential backoff with jitter for resilient processing
14
- * - **Dead Letter Queue**: Handles failed events for later reprocessing
15
- * - **Metrics**: Built-in tracking for events processed, batches, errors, and latency
16
- *
17
- * ## Architecture
18
- *
19
- * The pipeline consists of several components:
20
- * 1. **CDCEventCapture**: Captures git operations and converts them to CDCEvents
21
- * 2. **CDCBatcher**: Batches events for efficient processing
22
- * 3. **ParquetTransformer**: Transforms events to Parquet format
23
- * 4. **CDCPipeline**: Orchestrates the entire flow with error handling
24
- *
25
- * ## Event Flow
26
- *
27
- * ```
28
- * Git Operation -> CDCEventCapture -> CDCBatcher -> ParquetTransformer -> Output
29
- * |
30
- * v
31
- * (On failure) Dead Letter Queue
32
- * ```
33
- *
34
- * @module tiered/cdc-pipeline
35
- *
36
- * @example
37
- * ```typescript
38
- * // Create and start a pipeline
39
- * const pipeline = new CDCPipeline({
40
- * batchSize: 100,
41
- * flushIntervalMs: 5000,
42
- * maxRetries: 3,
43
- * parquetCompression: 'snappy',
44
- * outputPath: '/analytics',
45
- * schemaVersion: 1
46
- * })
47
- *
48
- * await pipeline.start()
49
- *
50
- * // Process events
51
- * pipeline.onOutput((output) => {
52
- * console.log(`Generated batch: ${output.batchId}`)
53
- * console.log(`Events: ${output.events.length}`)
54
- * console.log(`Parquet size: ${output.parquetBuffer.length} bytes`)
55
- * })
56
- *
57
- * pipeline.onDeadLetter((events, error) => {
58
- * console.error(`Failed events: ${events.length}`, error)
59
- * })
60
- *
61
- * // Create and process an event
62
- * const event = createCDCEvent('COMMIT_CREATED', 'push', {
63
- * operation: 'commit-create',
64
- * sha: 'abc123...',
65
- * treeSha: 'def456...',
66
- * parentShas: ['parent1...']
67
- * })
68
- *
69
- * await pipeline.process(event)
70
- *
71
- * // Get metrics
72
- * const metrics = pipeline.getMetrics()
73
- * console.log(`Processed: ${metrics.eventsProcessed}`)
74
- * console.log(`Batches: ${metrics.batchesGenerated}`)
75
- *
76
- * // Stop the pipeline
77
- * await pipeline.stop()
78
- * ```
79
- *
80
- * @see {@link CDCPipeline} - Main pipeline orchestration class
81
- * @see {@link CDCEventCapture} - Event capture from git operations
82
- * @see {@link ParquetTransformer} - Parquet format transformation
83
- */
84
- // ============================================================================
85
- // Error Classes
86
- // ============================================================================
87
- /**
88
- * Custom error class for CDC operations.
89
- *
90
- * @description
91
- * CDCError provides structured error information for CDC pipeline failures,
92
- * including an error type for programmatic handling and optional cause for
93
- * error chaining.
94
- *
95
- * @example
96
- * ```typescript
97
- * try {
98
- * await pipeline.process(event)
99
- * } catch (error) {
100
- * if (error instanceof CDCError) {
101
- * switch (error.type) {
102
- * case 'VALIDATION_ERROR':
103
- * console.log('Invalid event:', error.message)
104
- * break
105
- * case 'PROCESSING_ERROR':
106
- * console.log('Processing failed:', error.message)
107
- * if (error.cause) {
108
- * console.log('Caused by:', error.cause.message)
109
- * }
110
- * break
111
- * }
112
- * }
113
- * }
114
- * ```
115
- *
116
- * @class CDCError
117
- * @extends Error
118
- */
119
- export class CDCError extends Error {
120
- type;
121
- cause;
122
- /**
123
- * Creates a new CDCError.
124
- *
125
- * @param type - Error type for categorization
126
- * @param message - Human-readable error message
127
- * @param cause - Optional underlying error that caused this error
128
- */
129
- constructor(type, message, cause) {
130
- super(message);
131
- this.type = type;
132
- this.cause = cause;
133
- this.name = 'CDCError';
134
- }
135
- }
136
- /**
137
- * Retry policy implementing exponential backoff with optional jitter.
138
- *
139
- * @description
140
- * Provides a robust retry mechanism for handling transient failures.
141
- * Uses exponential backoff to space out retry attempts, with optional
142
- * jitter to prevent synchronized retries from multiple clients.
143
- *
144
- * **Backoff Formula:**
145
- * `delay = min(initialDelay * (multiplier ^ attempt), maxDelay)`
146
- *
147
- * **With Jitter:**
148
- * `delay = delay * random(0.5, 1.5)`
149
- *
150
- * @example
151
- * ```typescript
152
- * const policy = new CDCRetryPolicy({
153
- * maxRetries: 3,
154
- * initialDelayMs: 100,
155
- * maxDelayMs: 5000,
156
- * backoffMultiplier: 2,
157
- * jitter: true
158
- * })
159
- *
160
- * let attempts = 0
161
- * while (attempts < 10) {
162
- * try {
163
- * await doOperation()
164
- * break
165
- * } catch (error) {
166
- * attempts++
167
- * if (!policy.shouldRetry(attempts)) {
168
- * throw new Error('Max retries exceeded')
169
- * }
170
- * const delay = policy.getDelay(attempts)
171
- * console.log(`Retry ${attempts} after ${delay}ms`)
172
- * await sleep(delay)
173
- * }
174
- * }
175
- * ```
176
- *
177
- * @class CDCRetryPolicy
178
- */
179
- export class CDCRetryPolicy {
180
- /**
181
- * Retry configuration.
182
- * @private
183
- */
184
- config;
185
- /**
186
- * Creates a new retry policy.
187
- *
188
- * @param config - Retry policy configuration
189
- */
190
- constructor(config) {
191
- this.config = config;
192
- }
193
- /**
194
- * Determines whether another retry should be attempted.
195
- *
196
- * @param attemptCount - Number of attempts already made
197
- * @returns true if more retries are allowed, false otherwise
198
- *
199
- * @example
200
- * ```typescript
201
- * if (policy.shouldRetry(3)) {
202
- * // Retry is allowed
203
- * }
204
- * ```
205
- */
206
- shouldRetry(attemptCount) {
207
- return attemptCount < this.config.maxRetries;
208
- }
209
- /**
210
- * Calculates the delay before the next retry.
211
- *
212
- * @description
213
- * Computes delay using exponential backoff, capped at maxDelayMs.
214
- * If jitter is enabled, applies a random factor between 0.5x and 1.5x.
215
- *
216
- * @param attemptCount - Number of attempts already made (1-indexed)
217
- * @returns Delay in milliseconds before next retry
218
- *
219
- * @example
220
- * ```typescript
221
- * // With initialDelay=100, multiplier=2:
222
- * // Attempt 1: 100ms * 2^0 = 100ms
223
- * // Attempt 2: 100ms * 2^1 = 200ms
224
- * // Attempt 3: 100ms * 2^2 = 400ms
225
- * const delay = policy.getDelay(attemptCount)
226
- * await sleep(delay)
227
- * ```
228
- */
229
- getDelay(attemptCount) {
230
- let delay = this.config.initialDelayMs * Math.pow(this.config.backoffMultiplier, attemptCount);
231
- delay = Math.min(delay, this.config.maxDelayMs);
232
- if (this.config.jitter) {
233
- // Add random jitter between 0.5x and 1.5x
234
- const jitterFactor = 0.5 + Math.random();
235
- delay = Math.floor(delay * jitterFactor);
236
- }
237
- return delay;
238
- }
239
- }
240
- // ============================================================================
241
- // CDC Event Capture
242
- // ============================================================================
243
- /**
244
- * Captures git operations and converts them to CDC events.
245
- *
246
- * @description
247
- * CDCEventCapture hooks into git operations and generates CDCEvents for each
248
- * operation. It maintains an internal buffer of events that can be flushed
249
- * manually or automatically when the buffer reaches a configured size.
250
- *
251
- * **Supported Operations:**
252
- * - Object creation/deletion (blobs, trees, commits, tags)
253
- * - Reference updates (branches, tags)
254
- * - Commit creation
255
- * - Pack reception
256
- * - Branch creation/deletion
257
- * - Tag creation
258
- * - Merge completion
259
- *
260
- * **Event Ordering:**
261
- * Events are assigned monotonically increasing sequence numbers within a
262
- * capture session. This ensures proper ordering for replay and analytics.
263
- *
264
- * @example
265
- * ```typescript
266
- * const capture = new CDCEventCapture({ maxBufferSize: 100 })
267
- *
268
- * // Add a listener for real-time processing
269
- * capture.addListener((event) => {
270
- * console.log(`Event: ${event.type} - ${event.id}`)
271
- * })
272
- *
273
- * // Capture git operations
274
- * await capture.onCommitCreated('abc123...', 'tree456...', ['parent789...'])
275
- * await capture.onRefUpdate('refs/heads/main', 'old...', 'new...')
276
- *
277
- * // Get buffered events
278
- * console.log(`Buffer size: ${capture.getBufferSize()}`)
279
- *
280
- * // Flush buffer
281
- * const events = await capture.flush()
282
- * console.log(`Flushed ${events.length} events`)
283
- * ```
284
- *
285
- * @class CDCEventCapture
286
- */
287
- export class CDCEventCapture {
288
- /**
289
- * Buffer of captured events.
290
- * @private
291
- */
292
- events = [];
293
- /**
294
- * Monotonically increasing sequence counter.
295
- * @private
296
- */
297
- sequenceCounter = 0;
298
- /**
299
- * Registered event listeners.
300
- * @private
301
- */
302
- listeners = [];
303
- /**
304
- * Maximum buffer size before auto-flush.
305
- * @private
306
- */
307
- maxBufferSize;
308
- /**
309
- * Creates a new CDC event capture instance.
310
- *
311
- * @param options - Configuration options
312
- */
313
- constructor(options = {}) {
314
- this.maxBufferSize = options.maxBufferSize ?? Infinity;
315
- }
316
- /**
317
- * Generates a unique event ID.
318
- * @private
319
- */
320
- generateEventId() {
321
- return `evt-${Date.now()}-${Math.random().toString(36).slice(2)}`;
322
- }
323
- /**
324
- * Emits an event to the buffer and notifies listeners.
325
- * @private
326
- */
327
- async emitEvent(event) {
328
- // Auto-flush if buffer is full
329
- if (this.events.length >= this.maxBufferSize) {
330
- await this.flush();
331
- }
332
- this.events.push(event);
333
- // Notify all listeners
334
- for (const listener of this.listeners) {
335
- listener(event);
336
- }
337
- }
338
- /**
339
- * Returns the next sequence number.
340
- * @private
341
- */
342
- nextSequence() {
343
- return ++this.sequenceCounter;
344
- }
345
- /**
346
- * Captures an object put (creation) operation.
347
- *
348
- * @description
349
- * Called when a git object (blob, tree, commit, tag) is written to storage.
350
- *
351
- * @param sha - SHA-1 hash of the object
352
- * @param type - Object type (blob, tree, commit, tag)
353
- * @param data - Raw object data
354
- *
355
- * @example
356
- * ```typescript
357
- * await capture.onObjectPut('abc123...', 'blob', blobData)
358
- * ```
359
- */
360
- async onObjectPut(sha, type, data) {
361
- const event = {
362
- id: this.generateEventId(),
363
- type: 'OBJECT_CREATED',
364
- source: 'push',
365
- timestamp: Date.now(),
366
- payload: {
367
- operation: 'put',
368
- sha,
369
- data,
370
- metadata: { type, size: data.length }
371
- },
372
- sequence: this.nextSequence(),
373
- version: 1
374
- };
375
- await this.emitEvent(event);
376
- }
377
- /**
378
- * Captures an object deletion operation.
379
- *
380
- * @description
381
- * Called when a git object is deleted, typically during garbage collection.
382
- *
383
- * @param sha - SHA-1 hash of the deleted object
384
- *
385
- * @example
386
- * ```typescript
387
- * await capture.onObjectDelete('abc123...')
388
- * ```
389
- */
390
- async onObjectDelete(sha) {
391
- const event = {
392
- id: this.generateEventId(),
393
- type: 'OBJECT_DELETED',
394
- source: 'gc',
395
- timestamp: Date.now(),
396
- payload: {
397
- operation: 'delete',
398
- sha
399
- },
400
- sequence: this.nextSequence(),
401
- version: 1
402
- };
403
- await this.emitEvent(event);
404
- }
405
- /**
406
- * Captures a reference update operation.
407
- *
408
- * @description
409
- * Called when a git reference (branch, tag) is updated to point to a new commit.
410
- *
411
- * @param refName - Full reference name (e.g., 'refs/heads/main')
412
- * @param oldSha - Previous SHA (all zeros for new refs)
413
- * @param newSha - New SHA (all zeros for deleted refs)
414
- *
415
- * @example
416
- * ```typescript
417
- * await capture.onRefUpdate(
418
- * 'refs/heads/main',
419
- * 'oldcommit123...',
420
- * 'newcommit456...'
421
- * )
422
- * ```
423
- */
424
- async onRefUpdate(refName, oldSha, newSha) {
425
- const event = {
426
- id: this.generateEventId(),
427
- type: 'REF_UPDATED',
428
- source: 'push',
429
- timestamp: Date.now(),
430
- payload: {
431
- operation: 'ref-update',
432
- refName,
433
- oldSha,
434
- newSha
435
- },
436
- sequence: this.nextSequence(),
437
- version: 1
438
- };
439
- await this.emitEvent(event);
440
- }
441
- /**
442
- * Captures a commit creation operation.
443
- *
444
- * @description
445
- * Called when a new commit object is created.
446
- *
447
- * @param commitSha - SHA-1 hash of the commit
448
- * @param treeSha - SHA-1 hash of the tree the commit points to
449
- * @param parentShas - Array of parent commit SHAs
450
- *
451
- * @example
452
- * ```typescript
453
- * await capture.onCommitCreated(
454
- * 'commitabc123...',
455
- * 'treedef456...',
456
- * ['parent1...', 'parent2...']
457
- * )
458
- * ```
459
- */
460
- async onCommitCreated(commitSha, treeSha, parentShas) {
461
- const event = {
462
- id: this.generateEventId(),
463
- type: 'COMMIT_CREATED',
464
- source: 'push',
465
- timestamp: Date.now(),
466
- payload: {
467
- operation: 'commit-create',
468
- sha: commitSha,
469
- treeSha,
470
- parentShas
471
- },
472
- sequence: this.nextSequence(),
473
- version: 1
474
- };
475
- await this.emitEvent(event);
476
- }
477
- /**
478
- * Captures a pack reception operation.
479
- *
480
- * @description
481
- * Called when a packfile is received during a push or fetch operation.
482
- *
483
- * @param packData - Raw packfile data
484
- * @param objectCount - Number of objects in the pack
485
- *
486
- * @example
487
- * ```typescript
488
- * await capture.onPackReceived(packBuffer, 42)
489
- * ```
490
- */
491
- async onPackReceived(packData, objectCount) {
492
- const event = {
493
- id: this.generateEventId(),
494
- type: 'PACK_RECEIVED',
495
- source: 'push',
496
- timestamp: Date.now(),
497
- payload: {
498
- operation: 'pack-receive',
499
- data: packData,
500
- objectCount
501
- },
502
- sequence: this.nextSequence(),
503
- version: 1
504
- };
505
- await this.emitEvent(event);
506
- }
507
- /**
508
- * Captures a branch creation operation.
509
- *
510
- * @param branchName - Name of the branch (without refs/heads/ prefix)
511
- * @param sha - SHA-1 hash the branch points to
512
- *
513
- * @example
514
- * ```typescript
515
- * await capture.onBranchCreated('feature-x', 'abc123...')
516
- * ```
517
- */
518
- async onBranchCreated(branchName, sha) {
519
- const event = {
520
- id: this.generateEventId(),
521
- type: 'BRANCH_CREATED',
522
- source: 'push',
523
- timestamp: Date.now(),
524
- payload: {
525
- operation: 'branch-create',
526
- branchName,
527
- sha
528
- },
529
- sequence: this.nextSequence(),
530
- version: 1
531
- };
532
- await this.emitEvent(event);
533
- }
534
- /**
535
- * Captures a branch deletion operation.
536
- *
537
- * @param branchName - Name of the deleted branch
538
- *
539
- * @example
540
- * ```typescript
541
- * await capture.onBranchDeleted('feature-x')
542
- * ```
543
- */
544
- async onBranchDeleted(branchName) {
545
- const event = {
546
- id: this.generateEventId(),
547
- type: 'BRANCH_DELETED',
548
- source: 'push',
549
- timestamp: Date.now(),
550
- payload: {
551
- operation: 'branch-delete',
552
- branchName
553
- },
554
- sequence: this.nextSequence(),
555
- version: 1
556
- };
557
- await this.emitEvent(event);
558
- }
559
- /**
560
- * Captures a tag creation operation.
561
- *
562
- * @param tagName - Name of the tag
563
- * @param sha - SHA-1 hash the tag points to
564
- *
565
- * @example
566
- * ```typescript
567
- * await capture.onTagCreated('v1.0.0', 'abc123...')
568
- * ```
569
- */
570
- async onTagCreated(tagName, sha) {
571
- const event = {
572
- id: this.generateEventId(),
573
- type: 'TAG_CREATED',
574
- source: 'push',
575
- timestamp: Date.now(),
576
- payload: {
577
- operation: 'tag-create',
578
- tagName,
579
- sha
580
- },
581
- sequence: this.nextSequence(),
582
- version: 1
583
- };
584
- await this.emitEvent(event);
585
- }
586
- /**
587
- * Captures a merge completion operation.
588
- *
589
- * @param mergeSha - SHA-1 hash of the merge commit
590
- * @param baseSha - SHA-1 hash of the base commit
591
- * @param headSha - SHA-1 hash of the head commit being merged
592
- *
593
- * @example
594
- * ```typescript
595
- * await capture.onMergeCompleted('merge123...', 'base456...', 'head789...')
596
- * ```
597
- */
598
- async onMergeCompleted(mergeSha, baseSha, headSha) {
599
- const event = {
600
- id: this.generateEventId(),
601
- type: 'MERGE_COMPLETED',
602
- source: 'push',
603
- timestamp: Date.now(),
604
- payload: {
605
- operation: 'merge-complete',
606
- sha: mergeSha,
607
- baseSha,
608
- headSha
609
- },
610
- sequence: this.nextSequence(),
611
- version: 1
612
- };
613
- await this.emitEvent(event);
614
- }
615
- /**
616
- * Returns a copy of all buffered events.
617
- *
618
- * @returns Array of buffered events
619
- */
620
- getEvents() {
621
- return [...this.events];
622
- }
623
- /**
624
- * Returns the current buffer size.
625
- *
626
- * @returns Number of events in the buffer
627
- */
628
- getBufferSize() {
629
- return this.events.length;
630
- }
631
- /**
632
- * Flushes all buffered events.
633
- *
634
- * @description
635
- * Returns and clears all events from the buffer. The returned events
636
- * can be processed, serialized, or forwarded to downstream systems.
637
- *
638
- * @returns Array of flushed events
639
- *
640
- * @example
641
- * ```typescript
642
- * const events = await capture.flush()
643
- * console.log(`Flushed ${events.length} events`)
644
- * await sendToAnalytics(events)
645
- * ```
646
- */
647
- async flush() {
648
- const flushed = [...this.events];
649
- this.events = [];
650
- return flushed;
651
- }
652
- /**
653
- * Adds an event listener.
654
- *
655
- * @description
656
- * Listeners are called synchronously for each event as it is captured.
657
- *
658
- * @param listener - Callback function to invoke for each event
659
- *
660
- * @example
661
- * ```typescript
662
- * capture.addListener((event) => {
663
- * console.log(`New event: ${event.type}`)
664
- * })
665
- * ```
666
- */
667
- addListener(listener) {
668
- this.listeners.push(listener);
669
- }
670
- /**
671
- * Removes an event listener.
672
- *
673
- * @param listener - The listener to remove
674
- */
675
- removeListener(listener) {
676
- const index = this.listeners.indexOf(listener);
677
- if (index !== -1) {
678
- this.listeners.splice(index, 1);
679
- }
680
- }
681
- }
682
- // ============================================================================
683
- // Parquet Schema
684
- // ============================================================================
685
- /**
686
- * Default field definitions for CDC event Parquet schema.
687
- * @internal
688
- */
689
- const CDC_EVENT_FIELDS = [
690
- { name: 'event_id', type: 'STRING', nullable: false },
691
- { name: 'event_type', type: 'STRING', nullable: false },
692
- { name: 'source', type: 'STRING', nullable: false },
693
- { name: 'timestamp', type: 'TIMESTAMP', nullable: false },
694
- { name: 'sequence', type: 'INT64', nullable: false },
695
- { name: 'version', type: 'INT64', nullable: false },
696
- { name: 'payload_json', type: 'STRING', nullable: false },
697
- { name: 'sha', type: 'STRING', nullable: true }
698
- ];
699
- /**
700
- * Parquet schema definition for CDC events.
701
- *
702
- * @description
703
- * Defines the column structure for CDC event Parquet files. The default
704
- * schema includes standard CDC event fields and can be extended with
705
- * custom fields for domain-specific data.
706
- *
707
- * @example
708
- * ```typescript
709
- * // Create default schema
710
- * const schema = ParquetSchema.forCDCEvents()
711
- *
712
- * // Create schema with custom fields
713
- * const customSchema = ParquetSchema.forCDCEvents([
714
- * { name: 'repository_id', type: 'STRING', nullable: false },
715
- * { name: 'user_id', type: 'STRING', nullable: true }
716
- * ])
717
- * ```
718
- *
719
- * @class ParquetSchema
720
- */
721
- export class ParquetSchema {
722
- fields;
723
- /**
724
- * Creates a new ParquetSchema.
725
- *
726
- * @param fields - Array of field definitions
727
- */
728
- constructor(fields) {
729
- this.fields = fields;
730
- }
731
- /**
732
- * Creates a schema for CDC events with optional custom fields.
733
- *
734
- * @description
735
- * Returns a schema with the standard CDC event fields. Additional
736
- * custom fields can be appended for domain-specific data.
737
- *
738
- * @param customFields - Optional additional fields to add
739
- * @returns A new ParquetSchema instance
740
- *
741
- * @example
742
- * ```typescript
743
- * const schema = ParquetSchema.forCDCEvents()
744
- * // Schema includes: event_id, event_type, source, timestamp,
745
- * // sequence, version, payload_json, sha
746
- * ```
747
- */
748
- static forCDCEvents(customFields) {
749
- const fields = [...CDC_EVENT_FIELDS];
750
- if (customFields) {
751
- fields.push(...customFields);
752
- }
753
- return new ParquetSchema(fields);
754
- }
755
- }
756
- /**
757
- * Transforms CDC events to Parquet format.
758
- *
759
- * @description
760
- * ParquetTransformer converts CDC events to Parquet-compatible rows and
761
- * serializes batches of events to Parquet file format. It handles:
762
- *
763
- * - Event to row conversion (flattening the event structure)
764
- * - JSON serialization of complex payloads
765
- * - Batch creation with schema and metadata
766
- * - Parquet file generation with compression
767
- *
768
- * @example
769
- * ```typescript
770
- * const transformer = new ParquetTransformer({ compression: 'snappy' })
771
- *
772
- * // Transform single event to row
773
- * const row = transformer.eventToRow(event)
774
- *
775
- * // Transform batch of events
776
- * const batch = transformer.eventsToBatch(events)
777
- *
778
- * // Generate Parquet file
779
- * const buffer = await transformer.toParquetBuffer(batch)
780
- * await r2.put('events.parquet', buffer)
781
- * ```
782
- *
783
- * @class ParquetTransformer
784
- */
785
- export class ParquetTransformer {
786
- /**
787
- * Compression algorithm to use.
788
- * @private
789
- */
790
- compression;
791
- /**
792
- * Creates a new ParquetTransformer.
793
- *
794
- * @param options - Transformer configuration
795
- */
796
- constructor(options = {}) {
797
- this.compression = options.compression ?? 'snappy';
798
- }
799
- /**
800
- * Converts a CDC event to a Parquet row.
801
- *
802
- * @description
803
- * Flattens the event structure and serializes the payload to JSON
804
- * for storage in Parquet format.
805
- *
806
- * @param event - The CDC event to convert
807
- * @returns A Parquet row representation
808
- *
809
- * @example
810
- * ```typescript
811
- * const row = transformer.eventToRow(event)
812
- * console.log(row.event_id, row.event_type, row.sha)
813
- * ```
814
- */
815
- eventToRow(event) {
816
- // Create a serializable copy of the payload (Uint8Array not JSON-serializable)
817
- const serializablePayload = {
818
- ...event.payload,
819
- data: event.payload.data ? Array.from(event.payload.data) : undefined
820
- };
821
- return {
822
- event_id: event.id,
823
- event_type: event.type,
824
- source: event.source,
825
- timestamp: event.timestamp,
826
- sequence: event.sequence,
827
- version: event.version,
828
- payload_json: JSON.stringify(serializablePayload),
829
- sha: event.payload.sha ?? null
830
- };
831
- }
832
- /**
833
- * Converts multiple CDC events to a Parquet batch.
834
- *
835
- * @description
836
- * Transforms an array of events into a ParquetBatch structure
837
- * ready for serialization to Parquet format.
838
- *
839
- * @param events - Array of CDC events to batch
840
- * @returns A ParquetBatch ready for serialization
841
- *
842
- * @example
843
- * ```typescript
844
- * const batch = transformer.eventsToBatch(events)
845
- * console.log(`Batch has ${batch.rowCount} rows`)
846
- * ```
847
- */
848
- eventsToBatch(events) {
849
- const rows = events.map(e => this.eventToRow(e));
850
- return {
851
- rows,
852
- rowCount: rows.length,
853
- createdAt: Date.now(),
854
- schema: ParquetSchema.forCDCEvents(),
855
- compression: this.compression
856
- };
857
- }
858
- /**
859
- * Serializes a ParquetBatch to a Parquet file buffer.
860
- *
861
- * @description
862
- * Generates a Parquet-format file from the batch data. The output
863
- * includes PAR1 magic bytes, compressed data, and footer metadata.
864
- *
865
- * @param batch - The ParquetBatch to serialize
866
- * @returns Promise resolving to Parquet file as Uint8Array
867
- *
868
- * @example
869
- * ```typescript
870
- * const buffer = await transformer.toParquetBuffer(batch)
871
- * await r2.put('events.parquet', buffer)
872
- * ```
873
- */
874
- async toParquetBuffer(batch) {
875
- // Build a simplified Parquet-like buffer
876
- // Real implementation would use a proper Parquet library
877
- const encoder = new TextEncoder();
878
- // Magic bytes
879
- const magic = encoder.encode('PAR1');
880
- // Serialize batch data
881
- const dataJson = JSON.stringify({
882
- rows: batch.rows,
883
- rowCount: batch.rowCount,
884
- createdAt: batch.createdAt,
885
- schema: batch.schema,
886
- compression: batch.compression
887
- });
888
- let dataBytes = encoder.encode(dataJson);
889
- // Apply compression
890
- if (this.compression === 'gzip') {
891
- dataBytes = await this.gzipCompress(dataBytes);
892
- }
893
- else if (this.compression === 'snappy') {
894
- // Snappy simulation (use simple compression)
895
- dataBytes = await this.simpleCompress(dataBytes);
896
- }
897
- // Build final buffer: PAR1 + data + length (4 bytes) + PAR1
898
- const lengthBytes = new Uint8Array(4);
899
- new DataView(lengthBytes.buffer).setUint32(0, dataBytes.length, true);
900
- const totalSize = 4 + dataBytes.length + 4 + 4;
901
- const result = new Uint8Array(totalSize);
902
- let offset = 0;
903
- result.set(magic, offset);
904
- offset += 4;
905
- result.set(dataBytes, offset);
906
- offset += dataBytes.length;
907
- result.set(lengthBytes, offset);
908
- offset += 4;
909
- result.set(magic, offset);
910
- return result;
911
- }
912
- async gzipCompress(data) {
913
- // Use CompressionStream if available (modern browsers/Node 18+)
914
- if (typeof CompressionStream !== 'undefined') {
915
- const stream = new CompressionStream('gzip');
916
- const writer = stream.writable.getWriter();
917
- writer.write(data);
918
- writer.close();
919
- const reader = stream.readable.getReader();
920
- const chunks = [];
921
- let done = false;
922
- while (!done) {
923
- const result = await reader.read();
924
- done = result.done;
925
- if (result.value) {
926
- chunks.push(result.value);
927
- }
928
- }
929
- const totalLength = chunks.reduce((acc, chunk) => acc + chunk.length, 0);
930
- const result = new Uint8Array(totalLength);
931
- let offset = 0;
932
- for (const chunk of chunks) {
933
- result.set(chunk, offset);
934
- offset += chunk.length;
935
- }
936
- return result;
937
- }
938
- // Fallback: return data as-is (no compression)
939
- return data;
940
- }
941
- async simpleCompress(data) {
942
- // For snappy, we just return data as-is (real snappy compression would require a library)
943
- // This is a simplified implementation that avoids async stream issues with fake timers
944
- return data;
945
- }
946
- }
947
- /**
948
- * Batches CDC events for efficient processing.
949
- *
950
- * @description
951
- * CDCBatcher collects CDC events and groups them into batches based on
952
- * count or time thresholds. This enables efficient downstream processing
953
- * by reducing the number of I/O operations and enabling bulk operations.
954
- *
955
- * **Batching Strategies:**
956
- * - **Count-based**: Flush when batch reaches `batchSize` events
957
- * - **Time-based**: Flush after `flushIntervalMs` even if batch is not full
958
- *
959
- * **Features:**
960
- * - Async batch handlers for non-blocking processing
961
- * - Multiple handlers for parallel processing pipelines
962
- * - Graceful stop with pending event flush
963
- * - Batch metadata (sequences, timestamps) for tracking
964
- *
965
- * @example
966
- * ```typescript
967
- * const batcher = new CDCBatcher({
968
- * batchSize: 100,
969
- * flushIntervalMs: 5000
970
- * })
971
- *
972
- * // Register batch handler
973
- * batcher.onBatch(async (batch) => {
974
- * console.log(`Processing ${batch.eventCount} events`)
975
- * console.log(`Sequence range: ${batch.minSequence} - ${batch.maxSequence}`)
976
- * await saveToStorage(batch.events)
977
- * })
978
- *
979
- * // Add events
980
- * await batcher.add(event1)
981
- * await batcher.add(event2)
982
- *
983
- * // Check pending events
984
- * console.log(`Pending: ${batcher.getPendingCount()}`)
985
- *
986
- * // Manual flush
987
- * const result = await batcher.flush()
988
- *
989
- * // Stop the batcher
990
- * await batcher.stop()
991
- * ```
992
- *
993
- * @class CDCBatcher
994
- */
995
- export class CDCBatcher {
996
- /**
997
- * Batch configuration.
998
- * @private
999
- */
1000
- config;
1001
- /**
1002
- * Buffer of pending events.
1003
- * @private
1004
- */
1005
- events = [];
1006
- /**
1007
- * Registered batch handlers.
1008
- * @private
1009
- */
1010
- batchHandlers = [];
1011
- /**
1012
- * Timer for time-based flushing.
1013
- * @private
1014
- */
1015
- flushTimer = null;
1016
- /**
1017
- * Whether the batcher has been stopped.
1018
- * @private
1019
- */
1020
- stopped = false;
1021
- /**
1022
- * Creates a new CDCBatcher.
1023
- *
1024
- * @param config - Batch configuration
1025
- */
1026
- constructor(config) {
1027
- this.config = config;
1028
- // Don't start timer in constructor - start when first event is added
1029
- }
1030
- ensureTimerRunning() {
1031
- if (this.stopped)
1032
- return;
1033
- if (this.flushTimer !== null)
1034
- return; // Already have a timer
1035
- this.flushTimer = setTimeout(() => {
1036
- this.flushTimer = null;
1037
- if (this.stopped)
1038
- return;
1039
- // Process pending events if any
1040
- if (this.events.length > 0) {
1041
- // Build batch result
1042
- const batchEvents = [...this.events];
1043
- this.events = [];
1044
- const sequences = batchEvents.map(e => e.sequence);
1045
- const timestamps = batchEvents.map(e => e.timestamp);
1046
- const result = {
1047
- events: batchEvents,
1048
- eventCount: batchEvents.length,
1049
- success: true,
1050
- minSequence: Math.min(...sequences),
1051
- maxSequence: Math.max(...sequences),
1052
- minTimestamp: Math.min(...timestamps),
1053
- maxTimestamp: Math.max(...timestamps)
1054
- };
1055
- // Notify handlers and handle promises
1056
- const handlerPromises = [];
1057
- for (const handler of this.batchHandlers) {
1058
- try {
1059
- const maybePromise = handler(result);
1060
- if (maybePromise && typeof maybePromise.then === 'function') {
1061
- handlerPromises.push(maybePromise);
1062
- }
1063
- }
1064
- catch {
1065
- // Ignore handler errors in timer context
1066
- }
1067
- }
1068
- // Execute all handlers and ignore the result
1069
- if (handlerPromises.length > 0) {
1070
- void Promise.all(handlerPromises);
1071
- }
1072
- }
1073
- // DON'T reschedule here - timer will be scheduled on next add() call
1074
- }, this.config.flushIntervalMs);
1075
- }
1076
- clearFlushTimer() {
1077
- if (this.flushTimer !== null) {
1078
- clearTimeout(this.flushTimer);
1079
- this.flushTimer = null;
1080
- }
1081
- }
1082
- /**
1083
- * Adds an event to the batch.
1084
- *
1085
- * @description
1086
- * Adds the event to the pending batch. If the batch reaches the
1087
- * configured size, it is automatically flushed. The flush timer
1088
- * is started/restarted as needed.
1089
- *
1090
- * @param event - The CDC event to add
1091
- *
1092
- * @example
1093
- * ```typescript
1094
- * await batcher.add(event)
1095
- * ```
1096
- */
1097
- async add(event) {
1098
- this.events.push(event);
1099
- // Ensure flush timer is running when we have pending events
1100
- this.ensureTimerRunning();
1101
- if (this.events.length >= this.config.batchSize) {
1102
- this.clearFlushTimer();
1103
- await this.flushInternal();
1104
- // Timer will be re-started on next add() if needed
1105
- }
1106
- }
1107
- /**
1108
- * Internal flush implementation.
1109
- * @private
1110
- */
1111
- async flushInternal() {
1112
- if (this.events.length === 0) {
1113
- return { events: [], eventCount: 0, success: true };
1114
- }
1115
- const batchEvents = [...this.events];
1116
- this.events = [];
1117
- const sequences = batchEvents.map(e => e.sequence);
1118
- const timestamps = batchEvents.map(e => e.timestamp);
1119
- const result = {
1120
- events: batchEvents,
1121
- eventCount: batchEvents.length,
1122
- success: true,
1123
- minSequence: Math.min(...sequences),
1124
- maxSequence: Math.max(...sequences),
1125
- minTimestamp: Math.min(...timestamps),
1126
- maxTimestamp: Math.max(...timestamps)
1127
- };
1128
- // Notify handlers (await async handlers)
1129
- for (const handler of this.batchHandlers) {
1130
- await handler(result);
1131
- }
1132
- return result;
1133
- }
1134
- /**
1135
- * Manually flushes pending events.
1136
- *
1137
- * @description
1138
- * Forces an immediate flush of all pending events, regardless of
1139
- * batch size or timer. Clears the flush timer.
1140
- *
1141
- * @returns Promise resolving to the batch result
1142
- *
1143
- * @example
1144
- * ```typescript
1145
- * const result = await batcher.flush()
1146
- * console.log(`Flushed ${result.eventCount} events`)
1147
- * ```
1148
- */
1149
- async flush() {
1150
- this.clearFlushTimer();
1151
- const result = await this.flushInternal();
1152
- // Don't restart timer - it will be started on next add() if needed
1153
- return result;
1154
- }
1155
- /**
1156
- * Returns the number of pending events.
1157
- *
1158
- * @returns Number of events waiting to be flushed
1159
- */
1160
- getPendingCount() {
1161
- return this.events.length;
1162
- }
1163
- /**
1164
- * Registers a batch handler.
1165
- *
1166
- * @description
1167
- * Handlers are called when a batch is flushed (automatically or manually).
1168
- * Multiple handlers can be registered for parallel processing.
1169
- *
1170
- * @param handler - Callback function to invoke for each batch
1171
- *
1172
- * @example
1173
- * ```typescript
1174
- * batcher.onBatch(async (batch) => {
1175
- * await saveToStorage(batch.events)
1176
- * })
1177
- * ```
1178
- */
1179
- onBatch(handler) {
1180
- this.batchHandlers.push(handler);
1181
- }
1182
- /**
1183
- * Stops the batcher.
1184
- *
1185
- * @description
1186
- * Stops the flush timer and prevents further processing.
1187
- * Does NOT automatically flush pending events - call flush() first
1188
- * if you need to process remaining events.
1189
- *
1190
- * @example
1191
- * ```typescript
1192
- * await batcher.flush() // Process remaining events
1193
- * await batcher.stop() // Stop the timer
1194
- * ```
1195
- */
1196
- async stop() {
1197
- this.stopped = true;
1198
- this.clearFlushTimer();
1199
- }
1200
- }
1201
- /**
1202
- * Main CDC Pipeline for processing git operation events.
1203
- *
1204
- * @description
1205
- * CDCPipeline orchestrates the complete change data capture flow from
1206
- * event ingestion to Parquet output. It integrates batching, transformation,
1207
- * retry handling, and dead letter queue management.
1208
- *
1209
- * **Pipeline Flow:**
1210
- * 1. Events are submitted via `process()` or `processMany()`
1211
- * 2. Events are validated and added to the batcher
1212
- * 3. When a batch is ready, it's transformed to Parquet format
1213
- * 4. On success, output handlers are notified
1214
- * 5. On failure, retries are attempted with exponential backoff
1215
- * 6. After max retries, events go to dead letter queue
1216
- *
1217
- * **Features:**
1218
- * - Configurable batch size and flush interval
1219
- * - Automatic retry with exponential backoff
1220
- * - Dead letter queue for failed events
1221
- * - Real-time metrics for monitoring
1222
- * - Graceful shutdown with pending event flush
1223
- *
1224
- * @example
1225
- * ```typescript
1226
- * const pipeline = new CDCPipeline({
1227
- * batchSize: 100,
1228
- * flushIntervalMs: 5000,
1229
- * maxRetries: 3,
1230
- * parquetCompression: 'snappy',
1231
- * outputPath: '/analytics',
1232
- * schemaVersion: 1
1233
- * })
1234
- *
1235
- * // Register handlers
1236
- * pipeline.onOutput(async (output) => {
1237
- * await r2.put(`cdc/${output.batchId}.parquet`, output.parquetBuffer)
1238
- * })
1239
- *
1240
- * pipeline.onDeadLetter((events, error) => {
1241
- * console.error(`Failed ${events.length} events:`, error)
1242
- * })
1243
- *
1244
- * // Start the pipeline
1245
- * await pipeline.start()
1246
- *
1247
- * // Process events
1248
- * await pipeline.process(event)
1249
- *
1250
- * // Check metrics
1251
- * const metrics = pipeline.getMetrics()
1252
- *
1253
- * // Stop gracefully
1254
- * const result = await pipeline.stop()
1255
- * console.log(`Flushed ${result.flushedCount} events on shutdown`)
1256
- * ```
1257
- *
1258
- * @class CDCPipeline
1259
- */
1260
- export class CDCPipeline {
1261
- /**
1262
- * Pipeline configuration.
1263
- * @private
1264
- */
1265
- config;
1266
- /**
1267
- * Current pipeline state.
1268
- * @private
1269
- */
1270
- state = 'stopped';
1271
- /**
1272
- * Event batcher instance.
1273
- * @private
1274
- */
1275
- batcher = null;
1276
- /**
1277
- * Parquet transformer instance.
1278
- * @private
1279
- */
1280
- transformer;
1281
- /**
1282
- * Registered output handlers.
1283
- * @private
1284
- */
1285
- outputHandlers = [];
1286
- /**
1287
- * Registered dead letter handlers.
1288
- * @private
1289
- */
1290
- deadLetterHandlers = [];
1291
- /**
1292
- * Pipeline metrics.
1293
- * @private
1294
- */
1295
- metrics = {
1296
- eventsProcessed: 0,
1297
- batchesGenerated: 0,
1298
- bytesWritten: 0,
1299
- errors: 0,
1300
- avgProcessingLatencyMs: 0
1301
- };
1302
- /**
1303
- * Processing latency samples.
1304
- * @private
1305
- */
1306
- processingLatencies = [];
1307
- /**
1308
- * Retry policy instance.
1309
- * @private
1310
- */
1311
- retryPolicy;
1312
- /**
1313
- * Creates a new CDCPipeline.
1314
- *
1315
- * @param config - Pipeline configuration
1316
- */
1317
- constructor(config) {
1318
- this.config = config;
1319
- this.transformer = new ParquetTransformer({
1320
- compression: config.parquetCompression
1321
- });
1322
- this.retryPolicy = new CDCRetryPolicy({
1323
- maxRetries: config.maxRetries,
1324
- initialDelayMs: 100,
1325
- maxDelayMs: 5000,
1326
- backoffMultiplier: 2
1327
- });
1328
- }
1329
- /**
1330
- * Returns the current pipeline state.
1331
- *
1332
- * @returns Current state ('stopped', 'running', or 'paused')
1333
- */
1334
- getState() {
1335
- return this.state;
1336
- }
1337
- /**
1338
- * Starts the pipeline.
1339
- *
1340
- * @description
1341
- * Initializes the batcher and begins accepting events. If already
1342
- * running, this method is a no-op.
1343
- *
1344
- * @example
1345
- * ```typescript
1346
- * await pipeline.start()
1347
- * console.log(pipeline.getState()) // 'running'
1348
- * ```
1349
- */
1350
- async start() {
1351
- if (this.state === 'running')
1352
- return;
1353
- this.batcher = new CDCBatcher({
1354
- batchSize: this.config.batchSize,
1355
- flushIntervalMs: this.config.flushIntervalMs
1356
- });
1357
- this.batcher.onBatch(async (batch) => {
1358
- await this.handleBatch(batch);
1359
- });
1360
- this.state = 'running';
1361
- }
1362
- /**
1363
- * Stops the pipeline.
1364
- *
1365
- * @description
1366
- * Flushes any pending events, stops the batcher, and sets state to stopped.
1367
- * Returns information about events flushed during shutdown.
1368
- *
1369
- * @returns Promise resolving to stop result with flushed event count
1370
- *
1371
- * @example
1372
- * ```typescript
1373
- * const result = await pipeline.stop()
1374
- * console.log(`Flushed ${result.flushedCount} events on shutdown`)
1375
- * ```
1376
- */
1377
- async stop() {
1378
- if (this.state === 'stopped') {
1379
- return { flushedCount: 0 };
1380
- }
1381
- let flushedCount = 0;
1382
- if (this.batcher) {
1383
- const result = await this.batcher.flush();
1384
- flushedCount = result.eventCount;
1385
- await this.batcher.stop();
1386
- this.batcher = null;
1387
- }
1388
- this.state = 'stopped';
1389
- return { flushedCount };
1390
- }
1391
- /**
1392
- * Processes a single event.
1393
- *
1394
- * @description
1395
- * Validates the event and adds it to the batcher for processing.
1396
- * Updates metrics including latency tracking.
1397
- *
1398
- * @param event - The CDC event to process
1399
- * @returns Promise resolving to process result
1400
- *
1401
- * @throws {CDCError} PROCESSING_ERROR - If pipeline is not running
1402
- * @throws {CDCError} VALIDATION_ERROR - If event fails validation
1403
- *
1404
- * @example
1405
- * ```typescript
1406
- * const result = await pipeline.process(event)
1407
- * if (result.success) {
1408
- * console.log(`Processed event: ${result.eventId}`)
1409
- * }
1410
- * ```
1411
- */
1412
- async process(event) {
1413
- if (this.state !== 'running') {
1414
- throw new CDCError('PROCESSING_ERROR', 'Pipeline is not running');
1415
- }
1416
- // Validate event
1417
- validateCDCEvent(event);
1418
- const startTime = Date.now();
1419
- await this.batcher.add(event);
1420
- this.metrics.eventsProcessed++;
1421
- const latency = Date.now() - startTime;
1422
- this.processingLatencies.push(latency);
1423
- this.updateAvgLatency();
1424
- return { success: true, eventId: event.id };
1425
- }
1426
- /**
1427
- * Processes multiple events.
1428
- *
1429
- * @description
1430
- * Convenience method to process an array of events sequentially.
1431
- *
1432
- * @param events - Array of CDC events to process
1433
- * @returns Promise resolving to array of process results
1434
- *
1435
- * @example
1436
- * ```typescript
1437
- * const results = await pipeline.processMany(events)
1438
- * const successCount = results.filter(r => r.success).length
1439
- * console.log(`Processed ${successCount}/${events.length} events`)
1440
- * ```
1441
- */
1442
- async processMany(events) {
1443
- const results = [];
1444
- for (const event of events) {
1445
- const result = await this.process(event);
1446
- results.push(result);
1447
- }
1448
- return results;
1449
- }
1450
- /**
1451
- * Manually flushes pending events.
1452
- *
1453
- * @description
1454
- * Forces an immediate flush of the batcher and processes the
1455
- * resulting batch through the pipeline.
1456
- *
1457
- * @example
1458
- * ```typescript
1459
- * await pipeline.flush()
1460
- * console.log('All pending events flushed')
1461
- * ```
1462
- */
1463
- async flush() {
1464
- if (this.batcher) {
1465
- const result = await this.batcher.flush();
1466
- if (result.eventCount > 0) {
1467
- await this.handleBatch(result);
1468
- }
1469
- }
1470
- }
1471
- /**
1472
- * Handles a batch of events with retry logic.
1473
- * @private
1474
- */
1475
- async handleBatch(batch) {
1476
- let attempts = 0;
1477
- let lastError = null;
1478
- while (attempts <= this.config.maxRetries) {
1479
- try {
1480
- const parquetBatch = this.transformer.eventsToBatch(batch.events);
1481
- const parquetBuffer = await this.transformer.toParquetBuffer(parquetBatch);
1482
- const output = {
1483
- parquetBuffer,
1484
- events: batch.events,
1485
- batchId: `batch-${Date.now()}-${Math.random().toString(36).slice(2)}`
1486
- };
1487
- // Notify output handlers
1488
- for (const handler of this.outputHandlers) {
1489
- handler(output);
1490
- }
1491
- this.metrics.batchesGenerated++;
1492
- this.metrics.bytesWritten += parquetBuffer.length;
1493
- return; // Success
1494
- }
1495
- catch (error) {
1496
- lastError = error;
1497
- attempts++;
1498
- this.metrics.errors++;
1499
- if (this.retryPolicy.shouldRetry(attempts)) {
1500
- const delay = this.retryPolicy.getDelay(attempts);
1501
- await this.sleep(delay);
1502
- }
1503
- }
1504
- }
1505
- // All retries exhausted - send to dead letter queue
1506
- if (lastError) {
1507
- for (const handler of this.deadLetterHandlers) {
1508
- handler(batch.events, lastError);
1509
- }
1510
- }
1511
- }
1512
- /**
1513
- * Sleeps for the specified duration.
1514
- * @private
1515
- */
1516
- sleep(ms) {
1517
- return new Promise(resolve => setTimeout(resolve, ms));
1518
- }
1519
- /**
1520
- * Updates the average latency metric.
1521
- * @private
1522
- */
1523
- updateAvgLatency() {
1524
- if (this.processingLatencies.length === 0)
1525
- return;
1526
- // Keep only last 1000 measurements
1527
- if (this.processingLatencies.length > 1000) {
1528
- this.processingLatencies = this.processingLatencies.slice(-1000);
1529
- }
1530
- const sum = this.processingLatencies.reduce((a, b) => a + b, 0);
1531
- this.metrics.avgProcessingLatencyMs = sum / this.processingLatencies.length;
1532
- }
1533
- /**
1534
- * Returns current pipeline metrics.
1535
- *
1536
- * @description
1537
- * Returns a copy of the current metrics. Metrics are cumulative
1538
- * since pipeline creation.
1539
- *
1540
- * @returns Copy of current pipeline metrics
1541
- *
1542
- * @example
1543
- * ```typescript
1544
- * const metrics = pipeline.getMetrics()
1545
- * console.log(`Processed: ${metrics.eventsProcessed}`)
1546
- * console.log(`Batches: ${metrics.batchesGenerated}`)
1547
- * console.log(`Errors: ${metrics.errors}`)
1548
- * console.log(`Avg latency: ${metrics.avgProcessingLatencyMs}ms`)
1549
- * ```
1550
- */
1551
- getMetrics() {
1552
- return { ...this.metrics };
1553
- }
1554
- /**
1555
- * Registers an output handler.
1556
- *
1557
- * @description
1558
- * Output handlers are called when a batch is successfully processed
1559
- * and converted to Parquet format. Multiple handlers can be registered.
1560
- *
1561
- * @param handler - Callback to invoke for each successful batch
1562
- *
1563
- * @example
1564
- * ```typescript
1565
- * pipeline.onOutput(async (output) => {
1566
- * await r2.put(`cdc/${output.batchId}.parquet`, output.parquetBuffer)
1567
- * console.log(`Wrote ${output.events.length} events`)
1568
- * })
1569
- * ```
1570
- */
1571
- onOutput(handler) {
1572
- this.outputHandlers.push(handler);
1573
- }
1574
- /**
1575
- * Registers a dead letter handler.
1576
- *
1577
- * @description
1578
- * Dead letter handlers are called when a batch fails after all
1579
- * retry attempts are exhausted. Use this for alerting, logging,
1580
- * or storing failed events for later reprocessing.
1581
- *
1582
- * @param handler - Callback to invoke for failed events
1583
- *
1584
- * @example
1585
- * ```typescript
1586
- * pipeline.onDeadLetter((events, error) => {
1587
- * console.error(`Failed to process ${events.length} events:`, error)
1588
- * // Store in dead letter queue for later retry
1589
- * await dlq.put(events)
1590
- * })
1591
- * ```
1592
- */
1593
- onDeadLetter(handler) {
1594
- this.deadLetterHandlers.push(handler);
1595
- }
1596
- }
1597
- // ============================================================================
1598
- // Utility Functions
1599
- // ============================================================================
1600
- /**
1601
- * Valid CDC event types for validation.
1602
- * @internal
1603
- */
1604
- const VALID_EVENT_TYPES = [
1605
- 'OBJECT_CREATED',
1606
- 'OBJECT_DELETED',
1607
- 'REF_UPDATED',
1608
- 'PACK_RECEIVED',
1609
- 'COMMIT_CREATED',
1610
- 'TREE_MODIFIED',
1611
- 'BRANCH_CREATED',
1612
- 'BRANCH_DELETED',
1613
- 'TAG_CREATED',
1614
- 'MERGE_COMPLETED'
1615
- ];
1616
- /**
1617
- * Creates a new CDC event.
1618
- *
1619
- * @description
1620
- * Factory function to create a properly structured CDC event with
1621
- * automatically generated ID and timestamp.
1622
- *
1623
- * @param type - The event type
1624
- * @param source - The event source
1625
- * @param payload - Event payload data
1626
- * @param options - Optional configuration
1627
- * @param options.sequence - Custom sequence number (default: 0)
1628
- * @returns A new CDCEvent
1629
- *
1630
- * @example
1631
- * ```typescript
1632
- * const event = createCDCEvent('COMMIT_CREATED', 'push', {
1633
- * operation: 'commit-create',
1634
- * sha: 'abc123...',
1635
- * treeSha: 'def456...',
1636
- * parentShas: ['parent1...']
1637
- * })
1638
- *
1639
- * // With sequence number
1640
- * const sequencedEvent = createCDCEvent('REF_UPDATED', 'push', {
1641
- * operation: 'ref-update',
1642
- * refName: 'refs/heads/main',
1643
- * oldSha: 'old...',
1644
- * newSha: 'new...'
1645
- * }, { sequence: 42 })
1646
- * ```
1647
- */
1648
- export function createCDCEvent(type, source, payload, options) {
1649
- return {
1650
- id: `evt-${Date.now()}-${Math.random().toString(36).slice(2)}`,
1651
- type,
1652
- source,
1653
- timestamp: Date.now(),
1654
- payload,
1655
- sequence: options?.sequence ?? 0,
1656
- version: 1
1657
- };
1658
- }
1659
- /**
1660
- * Serializes a CDC event to bytes.
1661
- *
1662
- * @description
1663
- * Converts a CDCEvent to a JSON-encoded Uint8Array for storage or
1664
- * transmission. Handles Uint8Array payload data by converting to arrays.
1665
- *
1666
- * @param event - The CDC event to serialize
1667
- * @returns The serialized event as a Uint8Array
1668
- *
1669
- * @example
1670
- * ```typescript
1671
- * const bytes = serializeEvent(event)
1672
- * await r2.put(`events/${event.id}`, bytes)
1673
- * ```
1674
- *
1675
- * @see {@link deserializeEvent} - Reverse operation
1676
- */
1677
- export function serializeEvent(event) {
1678
- // Create a serializable copy (Uint8Array is not JSON-serializable)
1679
- const serializable = {
1680
- ...event,
1681
- payload: {
1682
- ...event.payload,
1683
- data: event.payload.data ? Array.from(event.payload.data) : undefined
1684
- }
1685
- };
1686
- const json = JSON.stringify(serializable);
1687
- return new TextEncoder().encode(json);
1688
- }
1689
- /**
1690
- * Deserializes bytes to a CDC event.
1691
- *
1692
- * @description
1693
- * Reconstructs a CDCEvent from JSON-encoded bytes. Handles Uint8Array
1694
- * restoration for payload data that was converted to arrays during
1695
- * serialization.
1696
- *
1697
- * @param bytes - The serialized event bytes
1698
- * @returns The deserialized CDCEvent
1699
- *
1700
- * @example
1701
- * ```typescript
1702
- * const bytes = await r2.get(`events/${eventId}`)
1703
- * const event = deserializeEvent(bytes)
1704
- * console.log(`Event type: ${event.type}`)
1705
- * ```
1706
- *
1707
- * @see {@link serializeEvent} - Reverse operation
1708
- */
1709
- export function deserializeEvent(bytes) {
1710
- const json = new TextDecoder().decode(bytes);
1711
- const parsed = JSON.parse(json);
1712
- // Restore Uint8Array if data was serialized
1713
- if (parsed.payload?.data && Array.isArray(parsed.payload.data)) {
1714
- parsed.payload.data = new Uint8Array(parsed.payload.data);
1715
- }
1716
- return parsed;
1717
- }
1718
- /**
1719
- * Validates a CDC event.
1720
- *
1721
- * @description
1722
- * Checks that an event has all required fields and valid values.
1723
- * Throws a CDCError if validation fails.
1724
- *
1725
- * **Validation Rules:**
1726
- * - Event must not be null/undefined
1727
- * - Event ID must be a non-empty string
1728
- * - Event type must be a valid CDCEventType
1729
- * - Timestamp must be a non-negative number
1730
- * - Sequence must be a non-negative number
1731
- *
1732
- * @param event - The CDC event to validate
1733
- * @returns The validated event (for chaining)
1734
- *
1735
- * @throws {CDCError} VALIDATION_ERROR - If validation fails
1736
- *
1737
- * @example
1738
- * ```typescript
1739
- * try {
1740
- * validateCDCEvent(event)
1741
- * // Event is valid
1742
- * } catch (error) {
1743
- * if (error instanceof CDCError) {
1744
- * console.log(`Invalid: ${error.message}`)
1745
- * }
1746
- * }
1747
- * ```
1748
- */
1749
- export function validateCDCEvent(event) {
1750
- if (!event) {
1751
- throw new CDCError('VALIDATION_ERROR', 'Event is null or undefined');
1752
- }
1753
- if (!event.id || typeof event.id !== 'string' || event.id.length === 0) {
1754
- throw new CDCError('VALIDATION_ERROR', 'Event id is missing or invalid');
1755
- }
1756
- if (!VALID_EVENT_TYPES.includes(event.type)) {
1757
- throw new CDCError('VALIDATION_ERROR', `Invalid event type: ${event.type}`);
1758
- }
1759
- if (typeof event.timestamp !== 'number' || event.timestamp < 0) {
1760
- throw new CDCError('VALIDATION_ERROR', 'Invalid timestamp');
1761
- }
1762
- if (typeof event.sequence !== 'number' || event.sequence < 0) {
1763
- throw new CDCError('VALIDATION_ERROR', 'Invalid sequence number');
1764
- }
1765
- return event;
1766
- }
1767
- // ============================================================================
1768
- // Pipeline Operations
1769
- // ============================================================================
1770
- /**
1771
- * Registry of active pipelines by ID.
1772
- * @internal
1773
- */
1774
- const activePipelines = new Map();
1775
- /**
1776
- * Starts a new pipeline with the given configuration.
1777
- *
1778
- * @description
1779
- * Creates and starts a new CDCPipeline, registering it by ID for
1780
- * later access. If a pipeline with the same ID already exists,
1781
- * it will be replaced (the old pipeline is not automatically stopped).
1782
- *
1783
- * @param id - Unique identifier for the pipeline
1784
- * @param config - Pipeline configuration
1785
- * @returns The started pipeline instance
1786
- *
1787
- * @example
1788
- * ```typescript
1789
- * const pipeline = startPipeline('main', {
1790
- * batchSize: 100,
1791
- * flushIntervalMs: 5000,
1792
- * maxRetries: 3,
1793
- * parquetCompression: 'snappy',
1794
- * outputPath: '/analytics',
1795
- * schemaVersion: 1
1796
- * })
1797
- *
1798
- * // Register handlers
1799
- * pipeline.onOutput((output) => console.log(`Batch: ${output.batchId}`))
1800
- * ```
1801
- */
1802
- export function startPipeline(id, config) {
1803
- const pipeline = new CDCPipeline(config);
1804
- pipeline.start();
1805
- activePipelines.set(id, pipeline);
1806
- return pipeline;
1807
- }
1808
- /**
1809
- * Stops a pipeline by ID.
1810
- *
1811
- * @description
1812
- * Stops the pipeline identified by the given ID, flushing any pending
1813
- * events and removing it from the registry.
1814
- *
1815
- * @param id - Pipeline identifier
1816
- * @returns Promise resolving to stop result (0 if pipeline not found)
1817
- *
1818
- * @example
1819
- * ```typescript
1820
- * const result = await stopPipeline('main')
1821
- * console.log(`Flushed ${result.flushedCount} events on shutdown`)
1822
- * ```
1823
- */
1824
- export async function stopPipeline(id) {
1825
- const pipeline = activePipelines.get(id);
1826
- if (!pipeline) {
1827
- return { flushedCount: 0 };
1828
- }
1829
- const result = await pipeline.stop();
1830
- activePipelines.delete(id);
1831
- return result;
1832
- }
1833
- /**
1834
- * Flushes a pipeline by ID.
1835
- *
1836
- * @description
1837
- * Forces an immediate flush of all pending events in the pipeline.
1838
- * No-op if pipeline not found.
1839
- *
1840
- * @param id - Pipeline identifier
1841
- *
1842
- * @example
1843
- * ```typescript
1844
- * await flushPipeline('main')
1845
- * console.log('All pending events flushed')
1846
- * ```
1847
- */
1848
- export async function flushPipeline(id) {
1849
- const pipeline = activePipelines.get(id);
1850
- if (pipeline) {
1851
- await pipeline.flush();
1852
- }
1853
- }
1854
- /**
1855
- * Gets metrics for a pipeline by ID.
1856
- *
1857
- * @description
1858
- * Returns a copy of the current metrics for the specified pipeline.
1859
- * Returns null if the pipeline is not found.
1860
- *
1861
- * @param id - Pipeline identifier
1862
- * @returns Pipeline metrics or null if not found
1863
- *
1864
- * @example
1865
- * ```typescript
1866
- * const metrics = getPipelineMetrics('main')
1867
- * if (metrics) {
1868
- * console.log(`Events processed: ${metrics.eventsProcessed}`)
1869
- * console.log(`Errors: ${metrics.errors}`)
1870
- * }
1871
- * ```
1872
- */
1873
- export function getPipelineMetrics(id) {
1874
- const pipeline = activePipelines.get(id);
1875
- if (!pipeline) {
1876
- return null;
1877
- }
1878
- return pipeline.getMetrics();
1879
- }
1880
- //# sourceMappingURL=cdc-pipeline.js.map