gitx.do 0.1.0 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (344) hide show
  1. package/README.md +40 -353
  2. package/dist/do/logger.d.ts +50 -0
  3. package/dist/do/logger.d.ts.map +1 -0
  4. package/dist/do/logger.js +122 -0
  5. package/dist/do/logger.js.map +1 -0
  6. package/dist/{durable-object → do}/schema.d.ts +3 -3
  7. package/dist/do/schema.d.ts.map +1 -0
  8. package/dist/{durable-object → do}/schema.js +4 -3
  9. package/dist/do/schema.js.map +1 -0
  10. package/dist/do/types.d.ts +267 -0
  11. package/dist/do/types.d.ts.map +1 -0
  12. package/dist/do/types.js +62 -0
  13. package/dist/do/types.js.map +1 -0
  14. package/dist/index.d.ts +15 -415
  15. package/dist/index.d.ts.map +1 -1
  16. package/dist/index.js +31 -483
  17. package/dist/index.js.map +1 -1
  18. package/package.json +13 -21
  19. package/dist/cli/commands/add.d.ts +0 -174
  20. package/dist/cli/commands/add.d.ts.map +0 -1
  21. package/dist/cli/commands/add.js +0 -131
  22. package/dist/cli/commands/add.js.map +0 -1
  23. package/dist/cli/commands/blame.d.ts +0 -259
  24. package/dist/cli/commands/blame.d.ts.map +0 -1
  25. package/dist/cli/commands/blame.js +0 -609
  26. package/dist/cli/commands/blame.js.map +0 -1
  27. package/dist/cli/commands/branch.d.ts +0 -249
  28. package/dist/cli/commands/branch.d.ts.map +0 -1
  29. package/dist/cli/commands/branch.js +0 -693
  30. package/dist/cli/commands/branch.js.map +0 -1
  31. package/dist/cli/commands/commit.d.ts +0 -182
  32. package/dist/cli/commands/commit.d.ts.map +0 -1
  33. package/dist/cli/commands/commit.js +0 -437
  34. package/dist/cli/commands/commit.js.map +0 -1
  35. package/dist/cli/commands/diff.d.ts +0 -464
  36. package/dist/cli/commands/diff.d.ts.map +0 -1
  37. package/dist/cli/commands/diff.js +0 -958
  38. package/dist/cli/commands/diff.js.map +0 -1
  39. package/dist/cli/commands/log.d.ts +0 -239
  40. package/dist/cli/commands/log.d.ts.map +0 -1
  41. package/dist/cli/commands/log.js +0 -535
  42. package/dist/cli/commands/log.js.map +0 -1
  43. package/dist/cli/commands/merge.d.ts +0 -106
  44. package/dist/cli/commands/merge.d.ts.map +0 -1
  45. package/dist/cli/commands/merge.js +0 -55
  46. package/dist/cli/commands/merge.js.map +0 -1
  47. package/dist/cli/commands/review.d.ts +0 -457
  48. package/dist/cli/commands/review.d.ts.map +0 -1
  49. package/dist/cli/commands/review.js +0 -533
  50. package/dist/cli/commands/review.js.map +0 -1
  51. package/dist/cli/commands/status.d.ts +0 -269
  52. package/dist/cli/commands/status.d.ts.map +0 -1
  53. package/dist/cli/commands/status.js +0 -493
  54. package/dist/cli/commands/status.js.map +0 -1
  55. package/dist/cli/commands/web.d.ts +0 -199
  56. package/dist/cli/commands/web.d.ts.map +0 -1
  57. package/dist/cli/commands/web.js +0 -696
  58. package/dist/cli/commands/web.js.map +0 -1
  59. package/dist/cli/fs-adapter.d.ts +0 -656
  60. package/dist/cli/fs-adapter.d.ts.map +0 -1
  61. package/dist/cli/fs-adapter.js +0 -1179
  62. package/dist/cli/fs-adapter.js.map +0 -1
  63. package/dist/cli/fsx-cli-adapter.d.ts +0 -359
  64. package/dist/cli/fsx-cli-adapter.d.ts.map +0 -1
  65. package/dist/cli/fsx-cli-adapter.js +0 -619
  66. package/dist/cli/fsx-cli-adapter.js.map +0 -1
  67. package/dist/cli/index.d.ts +0 -387
  68. package/dist/cli/index.d.ts.map +0 -1
  69. package/dist/cli/index.js +0 -523
  70. package/dist/cli/index.js.map +0 -1
  71. package/dist/cli/ui/components/DiffView.d.ts +0 -7
  72. package/dist/cli/ui/components/DiffView.d.ts.map +0 -1
  73. package/dist/cli/ui/components/DiffView.js +0 -11
  74. package/dist/cli/ui/components/DiffView.js.map +0 -1
  75. package/dist/cli/ui/components/ErrorDisplay.d.ts +0 -6
  76. package/dist/cli/ui/components/ErrorDisplay.d.ts.map +0 -1
  77. package/dist/cli/ui/components/ErrorDisplay.js +0 -11
  78. package/dist/cli/ui/components/ErrorDisplay.js.map +0 -1
  79. package/dist/cli/ui/components/FuzzySearch.d.ts +0 -9
  80. package/dist/cli/ui/components/FuzzySearch.d.ts.map +0 -1
  81. package/dist/cli/ui/components/FuzzySearch.js +0 -12
  82. package/dist/cli/ui/components/FuzzySearch.js.map +0 -1
  83. package/dist/cli/ui/components/LoadingSpinner.d.ts +0 -6
  84. package/dist/cli/ui/components/LoadingSpinner.d.ts.map +0 -1
  85. package/dist/cli/ui/components/LoadingSpinner.js +0 -10
  86. package/dist/cli/ui/components/LoadingSpinner.js.map +0 -1
  87. package/dist/cli/ui/components/NavigationList.d.ts +0 -9
  88. package/dist/cli/ui/components/NavigationList.d.ts.map +0 -1
  89. package/dist/cli/ui/components/NavigationList.js +0 -11
  90. package/dist/cli/ui/components/NavigationList.js.map +0 -1
  91. package/dist/cli/ui/components/ScrollableContent.d.ts +0 -8
  92. package/dist/cli/ui/components/ScrollableContent.d.ts.map +0 -1
  93. package/dist/cli/ui/components/ScrollableContent.js +0 -11
  94. package/dist/cli/ui/components/ScrollableContent.js.map +0 -1
  95. package/dist/cli/ui/components/index.d.ts +0 -7
  96. package/dist/cli/ui/components/index.d.ts.map +0 -1
  97. package/dist/cli/ui/components/index.js +0 -9
  98. package/dist/cli/ui/components/index.js.map +0 -1
  99. package/dist/cli/ui/terminal-ui.d.ts +0 -52
  100. package/dist/cli/ui/terminal-ui.d.ts.map +0 -1
  101. package/dist/cli/ui/terminal-ui.js +0 -121
  102. package/dist/cli/ui/terminal-ui.js.map +0 -1
  103. package/dist/do/BashModule.d.ts +0 -871
  104. package/dist/do/BashModule.d.ts.map +0 -1
  105. package/dist/do/BashModule.js +0 -1143
  106. package/dist/do/BashModule.js.map +0 -1
  107. package/dist/do/FsModule.d.ts +0 -601
  108. package/dist/do/FsModule.d.ts.map +0 -1
  109. package/dist/do/FsModule.js +0 -1120
  110. package/dist/do/FsModule.js.map +0 -1
  111. package/dist/do/GitModule.d.ts +0 -635
  112. package/dist/do/GitModule.d.ts.map +0 -1
  113. package/dist/do/GitModule.js +0 -781
  114. package/dist/do/GitModule.js.map +0 -1
  115. package/dist/do/GitRepoDO.d.ts +0 -281
  116. package/dist/do/GitRepoDO.d.ts.map +0 -1
  117. package/dist/do/GitRepoDO.js +0 -479
  118. package/dist/do/GitRepoDO.js.map +0 -1
  119. package/dist/do/bash-ast.d.ts +0 -246
  120. package/dist/do/bash-ast.d.ts.map +0 -1
  121. package/dist/do/bash-ast.js +0 -888
  122. package/dist/do/bash-ast.js.map +0 -1
  123. package/dist/do/container-executor.d.ts +0 -491
  124. package/dist/do/container-executor.d.ts.map +0 -1
  125. package/dist/do/container-executor.js +0 -730
  126. package/dist/do/container-executor.js.map +0 -1
  127. package/dist/do/index.d.ts +0 -53
  128. package/dist/do/index.d.ts.map +0 -1
  129. package/dist/do/index.js +0 -91
  130. package/dist/do/index.js.map +0 -1
  131. package/dist/do/tiered-storage.d.ts +0 -403
  132. package/dist/do/tiered-storage.d.ts.map +0 -1
  133. package/dist/do/tiered-storage.js +0 -689
  134. package/dist/do/tiered-storage.js.map +0 -1
  135. package/dist/do/withBash.d.ts +0 -231
  136. package/dist/do/withBash.d.ts.map +0 -1
  137. package/dist/do/withBash.js +0 -244
  138. package/dist/do/withBash.js.map +0 -1
  139. package/dist/do/withFs.d.ts +0 -237
  140. package/dist/do/withFs.d.ts.map +0 -1
  141. package/dist/do/withFs.js +0 -387
  142. package/dist/do/withFs.js.map +0 -1
  143. package/dist/do/withGit.d.ts +0 -180
  144. package/dist/do/withGit.d.ts.map +0 -1
  145. package/dist/do/withGit.js +0 -271
  146. package/dist/do/withGit.js.map +0 -1
  147. package/dist/durable-object/object-store.d.ts +0 -633
  148. package/dist/durable-object/object-store.d.ts.map +0 -1
  149. package/dist/durable-object/object-store.js +0 -1161
  150. package/dist/durable-object/object-store.js.map +0 -1
  151. package/dist/durable-object/schema.d.ts.map +0 -1
  152. package/dist/durable-object/schema.js.map +0 -1
  153. package/dist/durable-object/wal.d.ts +0 -416
  154. package/dist/durable-object/wal.d.ts.map +0 -1
  155. package/dist/durable-object/wal.js +0 -445
  156. package/dist/durable-object/wal.js.map +0 -1
  157. package/dist/mcp/adapter.d.ts +0 -772
  158. package/dist/mcp/adapter.d.ts.map +0 -1
  159. package/dist/mcp/adapter.js +0 -895
  160. package/dist/mcp/adapter.js.map +0 -1
  161. package/dist/mcp/sandbox/miniflare-evaluator.d.ts +0 -22
  162. package/dist/mcp/sandbox/miniflare-evaluator.d.ts.map +0 -1
  163. package/dist/mcp/sandbox/miniflare-evaluator.js +0 -140
  164. package/dist/mcp/sandbox/miniflare-evaluator.js.map +0 -1
  165. package/dist/mcp/sandbox/object-store-proxy.d.ts +0 -32
  166. package/dist/mcp/sandbox/object-store-proxy.d.ts.map +0 -1
  167. package/dist/mcp/sandbox/object-store-proxy.js +0 -30
  168. package/dist/mcp/sandbox/object-store-proxy.js.map +0 -1
  169. package/dist/mcp/sandbox/template.d.ts +0 -17
  170. package/dist/mcp/sandbox/template.d.ts.map +0 -1
  171. package/dist/mcp/sandbox/template.js +0 -71
  172. package/dist/mcp/sandbox/template.js.map +0 -1
  173. package/dist/mcp/sandbox.d.ts +0 -764
  174. package/dist/mcp/sandbox.d.ts.map +0 -1
  175. package/dist/mcp/sandbox.js +0 -1362
  176. package/dist/mcp/sandbox.js.map +0 -1
  177. package/dist/mcp/sdk-adapter.d.ts +0 -835
  178. package/dist/mcp/sdk-adapter.d.ts.map +0 -1
  179. package/dist/mcp/sdk-adapter.js +0 -974
  180. package/dist/mcp/sdk-adapter.js.map +0 -1
  181. package/dist/mcp/tools/do.d.ts +0 -32
  182. package/dist/mcp/tools/do.d.ts.map +0 -1
  183. package/dist/mcp/tools/do.js +0 -115
  184. package/dist/mcp/tools/do.js.map +0 -1
  185. package/dist/mcp/tools.d.ts +0 -548
  186. package/dist/mcp/tools.d.ts.map +0 -1
  187. package/dist/mcp/tools.js +0 -1934
  188. package/dist/mcp/tools.js.map +0 -1
  189. package/dist/ops/blame.d.ts +0 -551
  190. package/dist/ops/blame.d.ts.map +0 -1
  191. package/dist/ops/blame.js +0 -1037
  192. package/dist/ops/blame.js.map +0 -1
  193. package/dist/ops/branch.d.ts +0 -766
  194. package/dist/ops/branch.d.ts.map +0 -1
  195. package/dist/ops/branch.js +0 -950
  196. package/dist/ops/branch.js.map +0 -1
  197. package/dist/ops/commit-traversal.d.ts +0 -349
  198. package/dist/ops/commit-traversal.d.ts.map +0 -1
  199. package/dist/ops/commit-traversal.js +0 -821
  200. package/dist/ops/commit-traversal.js.map +0 -1
  201. package/dist/ops/commit.d.ts +0 -555
  202. package/dist/ops/commit.d.ts.map +0 -1
  203. package/dist/ops/commit.js +0 -826
  204. package/dist/ops/commit.js.map +0 -1
  205. package/dist/ops/merge-base.d.ts +0 -397
  206. package/dist/ops/merge-base.d.ts.map +0 -1
  207. package/dist/ops/merge-base.js +0 -691
  208. package/dist/ops/merge-base.js.map +0 -1
  209. package/dist/ops/merge.d.ts +0 -855
  210. package/dist/ops/merge.d.ts.map +0 -1
  211. package/dist/ops/merge.js +0 -1551
  212. package/dist/ops/merge.js.map +0 -1
  213. package/dist/ops/tag.d.ts +0 -247
  214. package/dist/ops/tag.d.ts.map +0 -1
  215. package/dist/ops/tag.js +0 -649
  216. package/dist/ops/tag.js.map +0 -1
  217. package/dist/ops/tree-builder.d.ts +0 -178
  218. package/dist/ops/tree-builder.d.ts.map +0 -1
  219. package/dist/ops/tree-builder.js +0 -271
  220. package/dist/ops/tree-builder.js.map +0 -1
  221. package/dist/ops/tree-diff.d.ts +0 -291
  222. package/dist/ops/tree-diff.d.ts.map +0 -1
  223. package/dist/ops/tree-diff.js +0 -705
  224. package/dist/ops/tree-diff.js.map +0 -1
  225. package/dist/pack/delta.d.ts +0 -248
  226. package/dist/pack/delta.d.ts.map +0 -1
  227. package/dist/pack/delta.js +0 -736
  228. package/dist/pack/delta.js.map +0 -1
  229. package/dist/pack/format.d.ts +0 -446
  230. package/dist/pack/format.d.ts.map +0 -1
  231. package/dist/pack/format.js +0 -572
  232. package/dist/pack/format.js.map +0 -1
  233. package/dist/pack/full-generation.d.ts +0 -612
  234. package/dist/pack/full-generation.d.ts.map +0 -1
  235. package/dist/pack/full-generation.js +0 -1378
  236. package/dist/pack/full-generation.js.map +0 -1
  237. package/dist/pack/generation.d.ts +0 -441
  238. package/dist/pack/generation.d.ts.map +0 -1
  239. package/dist/pack/generation.js +0 -707
  240. package/dist/pack/generation.js.map +0 -1
  241. package/dist/pack/index.d.ts +0 -502
  242. package/dist/pack/index.d.ts.map +0 -1
  243. package/dist/pack/index.js +0 -833
  244. package/dist/pack/index.js.map +0 -1
  245. package/dist/refs/branch.d.ts +0 -668
  246. package/dist/refs/branch.d.ts.map +0 -1
  247. package/dist/refs/branch.js +0 -897
  248. package/dist/refs/branch.js.map +0 -1
  249. package/dist/refs/storage.d.ts +0 -833
  250. package/dist/refs/storage.d.ts.map +0 -1
  251. package/dist/refs/storage.js +0 -1023
  252. package/dist/refs/storage.js.map +0 -1
  253. package/dist/refs/tag.d.ts +0 -860
  254. package/dist/refs/tag.d.ts.map +0 -1
  255. package/dist/refs/tag.js +0 -996
  256. package/dist/refs/tag.js.map +0 -1
  257. package/dist/storage/backend.d.ts +0 -425
  258. package/dist/storage/backend.d.ts.map +0 -1
  259. package/dist/storage/backend.js +0 -41
  260. package/dist/storage/backend.js.map +0 -1
  261. package/dist/storage/fsx-adapter.d.ts +0 -204
  262. package/dist/storage/fsx-adapter.d.ts.map +0 -1
  263. package/dist/storage/fsx-adapter.js +0 -470
  264. package/dist/storage/fsx-adapter.js.map +0 -1
  265. package/dist/storage/lru-cache.d.ts +0 -691
  266. package/dist/storage/lru-cache.d.ts.map +0 -1
  267. package/dist/storage/lru-cache.js +0 -813
  268. package/dist/storage/lru-cache.js.map +0 -1
  269. package/dist/storage/object-index.d.ts +0 -585
  270. package/dist/storage/object-index.d.ts.map +0 -1
  271. package/dist/storage/object-index.js +0 -532
  272. package/dist/storage/object-index.js.map +0 -1
  273. package/dist/storage/r2-pack.d.ts +0 -1257
  274. package/dist/storage/r2-pack.d.ts.map +0 -1
  275. package/dist/storage/r2-pack.js +0 -1770
  276. package/dist/storage/r2-pack.js.map +0 -1
  277. package/dist/tiered/cdc-pipeline.d.ts +0 -1888
  278. package/dist/tiered/cdc-pipeline.d.ts.map +0 -1
  279. package/dist/tiered/cdc-pipeline.js +0 -1880
  280. package/dist/tiered/cdc-pipeline.js.map +0 -1
  281. package/dist/tiered/migration.d.ts +0 -1104
  282. package/dist/tiered/migration.d.ts.map +0 -1
  283. package/dist/tiered/migration.js +0 -1214
  284. package/dist/tiered/migration.js.map +0 -1
  285. package/dist/tiered/parquet-writer.d.ts +0 -1145
  286. package/dist/tiered/parquet-writer.d.ts.map +0 -1
  287. package/dist/tiered/parquet-writer.js +0 -1183
  288. package/dist/tiered/parquet-writer.js.map +0 -1
  289. package/dist/tiered/read-path.d.ts +0 -835
  290. package/dist/tiered/read-path.d.ts.map +0 -1
  291. package/dist/tiered/read-path.js +0 -487
  292. package/dist/tiered/read-path.js.map +0 -1
  293. package/dist/types/capability.d.ts +0 -1385
  294. package/dist/types/capability.d.ts.map +0 -1
  295. package/dist/types/capability.js +0 -36
  296. package/dist/types/capability.js.map +0 -1
  297. package/dist/types/index.d.ts +0 -13
  298. package/dist/types/index.d.ts.map +0 -1
  299. package/dist/types/index.js +0 -18
  300. package/dist/types/index.js.map +0 -1
  301. package/dist/types/objects.d.ts +0 -692
  302. package/dist/types/objects.d.ts.map +0 -1
  303. package/dist/types/objects.js +0 -837
  304. package/dist/types/objects.js.map +0 -1
  305. package/dist/types/storage.d.ts +0 -603
  306. package/dist/types/storage.d.ts.map +0 -1
  307. package/dist/types/storage.js +0 -191
  308. package/dist/types/storage.js.map +0 -1
  309. package/dist/types/worker-loader.d.ts +0 -60
  310. package/dist/types/worker-loader.d.ts.map +0 -1
  311. package/dist/types/worker-loader.js +0 -62
  312. package/dist/types/worker-loader.js.map +0 -1
  313. package/dist/utils/hash.d.ts +0 -197
  314. package/dist/utils/hash.d.ts.map +0 -1
  315. package/dist/utils/hash.js +0 -268
  316. package/dist/utils/hash.js.map +0 -1
  317. package/dist/utils/sha1.d.ts +0 -290
  318. package/dist/utils/sha1.d.ts.map +0 -1
  319. package/dist/utils/sha1.js +0 -582
  320. package/dist/utils/sha1.js.map +0 -1
  321. package/dist/wire/capabilities.d.ts +0 -1044
  322. package/dist/wire/capabilities.d.ts.map +0 -1
  323. package/dist/wire/capabilities.js +0 -941
  324. package/dist/wire/capabilities.js.map +0 -1
  325. package/dist/wire/path-security.d.ts +0 -157
  326. package/dist/wire/path-security.d.ts.map +0 -1
  327. package/dist/wire/path-security.js +0 -307
  328. package/dist/wire/path-security.js.map +0 -1
  329. package/dist/wire/pkt-line.d.ts +0 -345
  330. package/dist/wire/pkt-line.d.ts.map +0 -1
  331. package/dist/wire/pkt-line.js +0 -381
  332. package/dist/wire/pkt-line.js.map +0 -1
  333. package/dist/wire/receive-pack.d.ts +0 -1059
  334. package/dist/wire/receive-pack.d.ts.map +0 -1
  335. package/dist/wire/receive-pack.js +0 -1414
  336. package/dist/wire/receive-pack.js.map +0 -1
  337. package/dist/wire/smart-http.d.ts +0 -799
  338. package/dist/wire/smart-http.d.ts.map +0 -1
  339. package/dist/wire/smart-http.js +0 -945
  340. package/dist/wire/smart-http.js.map +0 -1
  341. package/dist/wire/upload-pack.d.ts +0 -727
  342. package/dist/wire/upload-pack.d.ts.map +0 -1
  343. package/dist/wire/upload-pack.js +0 -1138
  344. package/dist/wire/upload-pack.js.map +0 -1
@@ -1,1888 +0,0 @@
1
- /**
2
- * @fileoverview CDC (Change Data Capture) Pipeline for Git Operations
3
- *
4
- * @description
5
- * This module provides a comprehensive Change Data Capture system for Git operations,
6
- * enabling real-time event streaming, transformation, and analytics for Git repository events.
7
- *
8
- * ## Key Features
9
- *
10
- * - **Event Capture**: Captures git operations (push, fetch, commits, branches, tags, merges)
11
- * - **Parquet Transformation**: Converts events to columnar Parquet format for analytics
12
- * - **Batching**: Efficient event batching with configurable size and time-based flushing
13
- * - **Retry Policies**: Configurable exponential backoff with jitter for resilient processing
14
- * - **Dead Letter Queue**: Handles failed events for later reprocessing
15
- * - **Metrics**: Built-in tracking for events processed, batches, errors, and latency
16
- *
17
- * ## Architecture
18
- *
19
- * The pipeline consists of several components:
20
- * 1. **CDCEventCapture**: Captures git operations and converts them to CDCEvents
21
- * 2. **CDCBatcher**: Batches events for efficient processing
22
- * 3. **ParquetTransformer**: Transforms events to Parquet format
23
- * 4. **CDCPipeline**: Orchestrates the entire flow with error handling
24
- *
25
- * ## Event Flow
26
- *
27
- * ```
28
- * Git Operation -> CDCEventCapture -> CDCBatcher -> ParquetTransformer -> Output
29
- * |
30
- * v
31
- * (On failure) Dead Letter Queue
32
- * ```
33
- *
34
- * @module tiered/cdc-pipeline
35
- *
36
- * @example
37
- * ```typescript
38
- * // Create and start a pipeline
39
- * const pipeline = new CDCPipeline({
40
- * batchSize: 100,
41
- * flushIntervalMs: 5000,
42
- * maxRetries: 3,
43
- * parquetCompression: 'snappy',
44
- * outputPath: '/analytics',
45
- * schemaVersion: 1
46
- * })
47
- *
48
- * await pipeline.start()
49
- *
50
- * // Process events
51
- * pipeline.onOutput((output) => {
52
- * console.log(`Generated batch: ${output.batchId}`)
53
- * console.log(`Events: ${output.events.length}`)
54
- * console.log(`Parquet size: ${output.parquetBuffer.length} bytes`)
55
- * })
56
- *
57
- * pipeline.onDeadLetter((events, error) => {
58
- * console.error(`Failed events: ${events.length}`, error)
59
- * })
60
- *
61
- * // Create and process an event
62
- * const event = createCDCEvent('COMMIT_CREATED', 'push', {
63
- * operation: 'commit-create',
64
- * sha: 'abc123...',
65
- * treeSha: 'def456...',
66
- * parentShas: ['parent1...']
67
- * })
68
- *
69
- * await pipeline.process(event)
70
- *
71
- * // Get metrics
72
- * const metrics = pipeline.getMetrics()
73
- * console.log(`Processed: ${metrics.eventsProcessed}`)
74
- * console.log(`Batches: ${metrics.batchesGenerated}`)
75
- *
76
- * // Stop the pipeline
77
- * await pipeline.stop()
78
- * ```
79
- *
80
- * @see {@link CDCPipeline} - Main pipeline orchestration class
81
- * @see {@link CDCEventCapture} - Event capture from git operations
82
- * @see {@link ParquetTransformer} - Parquet format transformation
83
- */
84
- /**
85
- * CDC Event Types representing different git operations.
86
- *
87
- * @description
88
- * Enumeration of all supported Git operation types that can be captured
89
- * by the CDC system. Each type corresponds to a specific Git operation.
90
- *
91
- * @example
92
- * ```typescript
93
- * const eventType: CDCEventType = 'COMMIT_CREATED'
94
- * ```
95
- */
96
- export type CDCEventType = 'OBJECT_CREATED' | 'OBJECT_DELETED' | 'REF_UPDATED' | 'PACK_RECEIVED' | 'COMMIT_CREATED' | 'TREE_MODIFIED' | 'BRANCH_CREATED' | 'BRANCH_DELETED' | 'TAG_CREATED' | 'MERGE_COMPLETED';
97
- /**
98
- * CDC Event Source indicating the origin of the event.
99
- *
100
- * @description
101
- * Identifies the source system or operation that generated the CDC event.
102
- * This helps with event filtering, routing, and analytics.
103
- *
104
- * - `push`: Events from git push operations
105
- * - `fetch`: Events from git fetch operations
106
- * - `internal`: Events from internal system operations
107
- * - `replication`: Events from repository replication
108
- * - `gc`: Events from garbage collection
109
- *
110
- * @example
111
- * ```typescript
112
- * const source: CDCEventSource = 'push'
113
- * ```
114
- */
115
- export type CDCEventSource = 'push' | 'fetch' | 'internal' | 'replication' | 'gc';
116
- /**
117
- * Payload data for CDC events.
118
- *
119
- * @description
120
- * Contains the detailed data associated with a CDC event. Different event
121
- * types use different subsets of these fields.
122
- *
123
- * @example
124
- * ```typescript
125
- * // Commit created payload
126
- * const payload: CDCEventPayload = {
127
- * operation: 'commit-create',
128
- * sha: 'abc123...',
129
- * treeSha: 'def456...',
130
- * parentShas: ['parent1...']
131
- * }
132
- *
133
- * // Ref updated payload
134
- * const refPayload: CDCEventPayload = {
135
- * operation: 'ref-update',
136
- * refName: 'refs/heads/main',
137
- * oldSha: 'old123...',
138
- * newSha: 'new456...'
139
- * }
140
- * ```
141
- */
142
- export interface CDCEventPayload {
143
- /**
144
- * The type of operation performed.
145
- *
146
- * @example 'commit-create', 'ref-update', 'branch-create'
147
- */
148
- operation: string;
149
- /**
150
- * SHA-1 hash of the affected object.
151
- * Present for object-related events.
152
- */
153
- sha?: string;
154
- /**
155
- * Timestamp of the operation in milliseconds since epoch.
156
- */
157
- timestamp?: number;
158
- /**
159
- * Raw binary data associated with the event.
160
- * Used for object creation and pack reception events.
161
- */
162
- data?: Uint8Array;
163
- /**
164
- * Additional metadata key-value pairs.
165
- * Can include object type, size, etc.
166
- */
167
- metadata?: Record<string, unknown>;
168
- /**
169
- * Git reference name (e.g., 'refs/heads/main').
170
- * Present for ref update events.
171
- */
172
- refName?: string;
173
- /**
174
- * Previous SHA for ref update events.
175
- * May be all zeros for new refs.
176
- */
177
- oldSha?: string;
178
- /**
179
- * New SHA for ref update events.
180
- * May be all zeros for deleted refs.
181
- */
182
- newSha?: string;
183
- /**
184
- * Number of objects in a pack.
185
- * Present for pack received events.
186
- */
187
- objectCount?: number;
188
- /**
189
- * Tree SHA for commit events.
190
- */
191
- treeSha?: string;
192
- /**
193
- * Parent commit SHAs for commit events.
194
- */
195
- parentShas?: string[];
196
- /**
197
- * Branch name for branch-related events.
198
- */
199
- branchName?: string;
200
- /**
201
- * Tag name for tag-related events.
202
- */
203
- tagName?: string;
204
- /**
205
- * Base commit SHA for merge events.
206
- */
207
- baseSha?: string;
208
- /**
209
- * Head commit SHA for merge events.
210
- */
211
- headSha?: string;
212
- }
213
- /**
214
- * CDC Event structure representing a single change data capture event.
215
- *
216
- * @description
217
- * A CDCEvent captures a single git operation with all metadata needed
218
- * for replication, analytics, and auditing. Events are immutable once
219
- * created and ordered by their sequence number.
220
- *
221
- * @example
222
- * ```typescript
223
- * const event: CDCEvent = {
224
- * id: 'evt-1234567890-abc123',
225
- * type: 'COMMIT_CREATED',
226
- * source: 'push',
227
- * timestamp: 1703980800000,
228
- * payload: {
229
- * operation: 'commit-create',
230
- * sha: 'abc123...',
231
- * treeSha: 'def456...',
232
- * parentShas: ['parent1...']
233
- * },
234
- * sequence: 42,
235
- * version: 1
236
- * }
237
- * ```
238
- */
239
- export interface CDCEvent {
240
- /**
241
- * Unique identifier for this event.
242
- * Format: `evt-{timestamp}-{random}`
243
- */
244
- id: string;
245
- /**
246
- * Type of git operation that generated this event.
247
- *
248
- * @see {@link CDCEventType}
249
- */
250
- type: CDCEventType;
251
- /**
252
- * Source system or operation that generated this event.
253
- *
254
- * @see {@link CDCEventSource}
255
- */
256
- source: CDCEventSource;
257
- /**
258
- * Unix timestamp in milliseconds when the event was created.
259
- */
260
- timestamp: number;
261
- /**
262
- * Event payload containing operation-specific data.
263
- */
264
- payload: CDCEventPayload;
265
- /**
266
- * Monotonically increasing sequence number within a capture session.
267
- * Used for ordering and deduplication.
268
- */
269
- sequence: number;
270
- /**
271
- * Schema version of the event format.
272
- * Used for backward compatibility during upgrades.
273
- */
274
- version: number;
275
- }
276
- /**
277
- * Configuration for the CDC pipeline.
278
- *
279
- * @description
280
- * Defines all configuration options for creating and running a CDC pipeline,
281
- * including batching behavior, retry policy, and output format.
282
- *
283
- * @example
284
- * ```typescript
285
- * const config: CDCPipelineConfig = {
286
- * batchSize: 100, // Flush every 100 events
287
- * flushIntervalMs: 5000, // Or every 5 seconds
288
- * maxRetries: 3, // Retry failed batches 3 times
289
- * parquetCompression: 'snappy',
290
- * outputPath: '/analytics/cdc',
291
- * schemaVersion: 1
292
- * }
293
- * ```
294
- */
295
- export interface CDCPipelineConfig {
296
- /**
297
- * Maximum number of events to batch before flushing.
298
- * Lower values reduce latency, higher values improve throughput.
299
- */
300
- batchSize: number;
301
- /**
302
- * Maximum time in milliseconds to wait before flushing a batch.
303
- * Ensures events are processed even with low throughput.
304
- */
305
- flushIntervalMs: number;
306
- /**
307
- * Maximum number of retry attempts for failed batch processing.
308
- * Uses exponential backoff between attempts.
309
- */
310
- maxRetries: number;
311
- /**
312
- * Compression algorithm for Parquet output.
313
- *
314
- * - `snappy`: Fast compression with moderate ratio (recommended)
315
- * - `gzip`: Higher compression ratio, slower
316
- * - `none`: No compression
317
- */
318
- parquetCompression: 'snappy' | 'gzip' | 'none';
319
- /**
320
- * Base path for output files.
321
- * Parquet files will be written to this directory.
322
- */
323
- outputPath: string;
324
- /**
325
- * Schema version for event format.
326
- * Used for backward compatibility during upgrades.
327
- */
328
- schemaVersion: number;
329
- }
330
- /**
331
- * Pipeline operational state.
332
- *
333
- * @description
334
- * Indicates the current state of the CDC pipeline.
335
- *
336
- * - `stopped`: Pipeline is not running, no events are processed
337
- * - `running`: Pipeline is active and processing events
338
- * - `paused`: Pipeline is temporarily suspended (reserved for future use)
339
- */
340
- export type CDCPipelineState = 'stopped' | 'running' | 'paused';
341
- /**
342
- * Configuration for event batching.
343
- *
344
- * @description
345
- * Controls how events are grouped into batches for processing.
346
- *
347
- * @example
348
- * ```typescript
349
- * const config: BatchConfig = {
350
- * batchSize: 100,
351
- * flushIntervalMs: 5000
352
- * }
353
- * ```
354
- */
355
- export interface BatchConfig {
356
- /**
357
- * Maximum number of events per batch.
358
- */
359
- batchSize: number;
360
- /**
361
- * Maximum time to wait before flushing a partial batch.
362
- */
363
- flushIntervalMs: number;
364
- }
365
- /**
366
- * Result of a batch flush operation.
367
- *
368
- * @description
369
- * Contains the events in the batch and metadata about the batch
370
- * for downstream processing and monitoring.
371
- *
372
- * @example
373
- * ```typescript
374
- * batcher.onBatch((result: BatchResult) => {
375
- * console.log(`Batch: ${result.eventCount} events`)
376
- * console.log(`Sequences: ${result.minSequence} - ${result.maxSequence}`)
377
- * console.log(`Time range: ${result.minTimestamp} - ${result.maxTimestamp}`)
378
- * })
379
- * ```
380
- */
381
- export interface BatchResult {
382
- /**
383
- * Array of events in this batch.
384
- */
385
- events: CDCEvent[];
386
- /**
387
- * Number of events in the batch.
388
- */
389
- eventCount: number;
390
- /**
391
- * Whether the batch was processed successfully.
392
- */
393
- success: boolean;
394
- /**
395
- * Minimum sequence number in the batch.
396
- * Useful for tracking progress and resumption.
397
- */
398
- minSequence?: number;
399
- /**
400
- * Maximum sequence number in the batch.
401
- */
402
- maxSequence?: number;
403
- /**
404
- * Earliest event timestamp in the batch (milliseconds).
405
- */
406
- minTimestamp?: number;
407
- /**
408
- * Latest event timestamp in the batch (milliseconds).
409
- */
410
- maxTimestamp?: number;
411
- }
412
- /**
413
- * CDC Error types for categorizing failures.
414
- *
415
- * @description
416
- * Error codes that help identify the type of failure for
417
- * appropriate error handling and recovery strategies.
418
- *
419
- * - `VALIDATION_ERROR`: Event failed validation checks
420
- * - `PROCESSING_ERROR`: Error during event processing
421
- * - `SERIALIZATION_ERROR`: Error serializing/deserializing events
422
- * - `STORAGE_ERROR`: Error writing to storage
423
- * - `TIMEOUT_ERROR`: Operation timed out
424
- * - `BUFFER_OVERFLOW_ERROR`: Event buffer exceeded capacity
425
- * - `UNKNOWN_ERROR`: Unclassified error
426
- */
427
- export type CDCErrorType = 'VALIDATION_ERROR' | 'PROCESSING_ERROR' | 'SERIALIZATION_ERROR' | 'STORAGE_ERROR' | 'TIMEOUT_ERROR' | 'BUFFER_OVERFLOW_ERROR' | 'UNKNOWN_ERROR';
428
- /**
429
- * Field definition for Parquet schema.
430
- *
431
- * @description
432
- * Defines a single column in the Parquet output schema.
433
- */
434
- export interface ParquetField {
435
- /**
436
- * Column name.
437
- */
438
- name: string;
439
- /**
440
- * Column data type (STRING, INT64, TIMESTAMP, etc.).
441
- */
442
- type: string;
443
- /**
444
- * Whether the column can contain null values.
445
- */
446
- nullable: boolean;
447
- }
448
- /**
449
- * Row representation for Parquet output.
450
- *
451
- * @description
452
- * Represents a single CDC event as a Parquet row with
453
- * flattened fields for efficient columnar storage.
454
- */
455
- export interface ParquetRow {
456
- /**
457
- * Event unique identifier.
458
- */
459
- event_id: string;
460
- /**
461
- * Event type (e.g., 'COMMIT_CREATED').
462
- */
463
- event_type: string;
464
- /**
465
- * Event source (e.g., 'push').
466
- */
467
- source: string;
468
- /**
469
- * Event timestamp in milliseconds.
470
- */
471
- timestamp: number;
472
- /**
473
- * Event sequence number.
474
- */
475
- sequence: number;
476
- /**
477
- * Event schema version.
478
- */
479
- version: number;
480
- /**
481
- * JSON-serialized event payload.
482
- */
483
- payload_json: string;
484
- /**
485
- * SHA from the payload, extracted for efficient filtering.
486
- */
487
- sha: string | null;
488
- }
489
- /**
490
- * Batch of Parquet rows ready for writing.
491
- *
492
- * @description
493
- * Contains transformed rows and metadata needed to write
494
- * a Parquet file.
495
- */
496
- export interface ParquetBatch {
497
- /**
498
- * Array of Parquet rows.
499
- */
500
- rows: ParquetRow[];
501
- /**
502
- * Number of rows in the batch.
503
- */
504
- rowCount: number;
505
- /**
506
- * Batch creation timestamp.
507
- */
508
- createdAt: number;
509
- /**
510
- * Parquet schema definition.
511
- */
512
- schema: {
513
- fields: ParquetField[];
514
- };
515
- /**
516
- * Compression algorithm used.
517
- */
518
- compression: string;
519
- }
520
- /**
521
- * Output from the CDC pipeline.
522
- *
523
- * @description
524
- * Contains the Parquet-formatted data and metadata for a
525
- * processed batch of events.
526
- *
527
- * @example
528
- * ```typescript
529
- * pipeline.onOutput((output: PipelineOutput) => {
530
- * console.log(`Batch ID: ${output.batchId}`)
531
- * console.log(`Events: ${output.events.length}`)
532
- * console.log(`Size: ${output.parquetBuffer.length} bytes`)
533
- *
534
- * // Write to storage
535
- * await r2.put(`cdc/${output.batchId}.parquet`, output.parquetBuffer)
536
- * })
537
- * ```
538
- */
539
- export interface PipelineOutput {
540
- /**
541
- * Parquet-formatted data as a byte array.
542
- */
543
- parquetBuffer: Uint8Array;
544
- /**
545
- * Original events included in this batch.
546
- */
547
- events: CDCEvent[];
548
- /**
549
- * Unique identifier for this batch.
550
- * Format: `batch-{timestamp}-{random}`
551
- */
552
- batchId: string;
553
- }
554
- /**
555
- * Metrics for monitoring pipeline performance.
556
- *
557
- * @description
558
- * Provides operational metrics for monitoring and alerting
559
- * on pipeline health and performance.
560
- *
561
- * @example
562
- * ```typescript
563
- * const metrics = pipeline.getMetrics()
564
- * console.log(`Events processed: ${metrics.eventsProcessed}`)
565
- * console.log(`Batches generated: ${metrics.batchesGenerated}`)
566
- * console.log(`Bytes written: ${metrics.bytesWritten}`)
567
- * console.log(`Errors: ${metrics.errors}`)
568
- * console.log(`Avg latency: ${metrics.avgProcessingLatencyMs}ms`)
569
- * ```
570
- */
571
- export interface PipelineMetrics {
572
- /**
573
- * Total number of events processed.
574
- */
575
- eventsProcessed: number;
576
- /**
577
- * Total number of batches generated.
578
- */
579
- batchesGenerated: number;
580
- /**
581
- * Total bytes written to output.
582
- */
583
- bytesWritten: number;
584
- /**
585
- * Total number of errors encountered.
586
- */
587
- errors: number;
588
- /**
589
- * Average event processing latency in milliseconds.
590
- * Calculated from the last 1000 events.
591
- */
592
- avgProcessingLatencyMs: number;
593
- }
594
- /**
595
- * Result of processing a single event.
596
- *
597
- * @description
598
- * Returned when an event is successfully queued for processing.
599
- */
600
- export interface ProcessResult {
601
- /**
602
- * Whether the event was successfully queued.
603
- */
604
- success: boolean;
605
- /**
606
- * ID of the processed event.
607
- */
608
- eventId: string;
609
- }
610
- /**
611
- * Result of stopping the pipeline.
612
- *
613
- * @description
614
- * Contains information about any pending events that were
615
- * flushed during shutdown.
616
- */
617
- export interface StopResult {
618
- /**
619
- * Number of events flushed during stop.
620
- */
621
- flushedCount: number;
622
- }
623
- /**
624
- * Custom error class for CDC operations.
625
- *
626
- * @description
627
- * CDCError provides structured error information for CDC pipeline failures,
628
- * including an error type for programmatic handling and optional cause for
629
- * error chaining.
630
- *
631
- * @example
632
- * ```typescript
633
- * try {
634
- * await pipeline.process(event)
635
- * } catch (error) {
636
- * if (error instanceof CDCError) {
637
- * switch (error.type) {
638
- * case 'VALIDATION_ERROR':
639
- * console.log('Invalid event:', error.message)
640
- * break
641
- * case 'PROCESSING_ERROR':
642
- * console.log('Processing failed:', error.message)
643
- * if (error.cause) {
644
- * console.log('Caused by:', error.cause.message)
645
- * }
646
- * break
647
- * }
648
- * }
649
- * }
650
- * ```
651
- *
652
- * @class CDCError
653
- * @extends Error
654
- */
655
- export declare class CDCError extends Error {
656
- readonly type: CDCErrorType;
657
- readonly cause?: Error | undefined;
658
- /**
659
- * Creates a new CDCError.
660
- *
661
- * @param type - Error type for categorization
662
- * @param message - Human-readable error message
663
- * @param cause - Optional underlying error that caused this error
664
- */
665
- constructor(type: CDCErrorType, message: string, cause?: Error | undefined);
666
- }
667
- /**
668
- * Configuration for the retry policy.
669
- *
670
- * @description
671
- * Configures exponential backoff behavior for failed operations.
672
- *
673
- * @example
674
- * ```typescript
675
- * const config: RetryPolicyConfig = {
676
- * maxRetries: 3,
677
- * initialDelayMs: 100,
678
- * maxDelayMs: 5000,
679
- * backoffMultiplier: 2,
680
- * jitter: true // Add randomness to prevent thundering herd
681
- * }
682
- * ```
683
- */
684
- export interface RetryPolicyConfig {
685
- /**
686
- * Maximum number of retry attempts before giving up.
687
- */
688
- maxRetries: number;
689
- /**
690
- * Initial delay in milliseconds before first retry.
691
- */
692
- initialDelayMs: number;
693
- /**
694
- * Maximum delay in milliseconds between retries.
695
- * Caps exponential growth.
696
- */
697
- maxDelayMs: number;
698
- /**
699
- * Multiplier applied to delay after each attempt.
700
- * A value of 2 doubles the delay each time.
701
- */
702
- backoffMultiplier: number;
703
- /**
704
- * Whether to add random jitter to delays.
705
- * Helps prevent thundering herd problems.
706
- */
707
- jitter?: boolean;
708
- }
709
- /**
710
- * Retry policy implementing exponential backoff with optional jitter.
711
- *
712
- * @description
713
- * Provides a robust retry mechanism for handling transient failures.
714
- * Uses exponential backoff to space out retry attempts, with optional
715
- * jitter to prevent synchronized retries from multiple clients.
716
- *
717
- * **Backoff Formula:**
718
- * `delay = min(initialDelay * (multiplier ^ attempt), maxDelay)`
719
- *
720
- * **With Jitter:**
721
- * `delay = delay * random(0.5, 1.5)`
722
- *
723
- * @example
724
- * ```typescript
725
- * const policy = new CDCRetryPolicy({
726
- * maxRetries: 3,
727
- * initialDelayMs: 100,
728
- * maxDelayMs: 5000,
729
- * backoffMultiplier: 2,
730
- * jitter: true
731
- * })
732
- *
733
- * let attempts = 0
734
- * while (attempts < 10) {
735
- * try {
736
- * await doOperation()
737
- * break
738
- * } catch (error) {
739
- * attempts++
740
- * if (!policy.shouldRetry(attempts)) {
741
- * throw new Error('Max retries exceeded')
742
- * }
743
- * const delay = policy.getDelay(attempts)
744
- * console.log(`Retry ${attempts} after ${delay}ms`)
745
- * await sleep(delay)
746
- * }
747
- * }
748
- * ```
749
- *
750
- * @class CDCRetryPolicy
751
- */
752
- export declare class CDCRetryPolicy {
753
- /**
754
- * Retry configuration.
755
- * @private
756
- */
757
- private readonly config;
758
- /**
759
- * Creates a new retry policy.
760
- *
761
- * @param config - Retry policy configuration
762
- */
763
- constructor(config: RetryPolicyConfig);
764
- /**
765
- * Determines whether another retry should be attempted.
766
- *
767
- * @param attemptCount - Number of attempts already made
768
- * @returns true if more retries are allowed, false otherwise
769
- *
770
- * @example
771
- * ```typescript
772
- * if (policy.shouldRetry(3)) {
773
- * // Retry is allowed
774
- * }
775
- * ```
776
- */
777
- shouldRetry(attemptCount: number): boolean;
778
- /**
779
- * Calculates the delay before the next retry.
780
- *
781
- * @description
782
- * Computes delay using exponential backoff, capped at maxDelayMs.
783
- * If jitter is enabled, applies a random factor between 0.5x and 1.5x.
784
- *
785
- * @param attemptCount - Number of attempts already made (1-indexed)
786
- * @returns Delay in milliseconds before next retry
787
- *
788
- * @example
789
- * ```typescript
790
- * // With initialDelay=100, multiplier=2:
791
- * // Attempt 1: 100ms * 2^0 = 100ms
792
- * // Attempt 2: 100ms * 2^1 = 200ms
793
- * // Attempt 3: 100ms * 2^2 = 400ms
794
- * const delay = policy.getDelay(attemptCount)
795
- * await sleep(delay)
796
- * ```
797
- */
798
- getDelay(attemptCount: number): number;
799
- }
800
- /**
801
- * Configuration options for CDC event capture.
802
- *
803
- * @example
804
- * ```typescript
805
- * const options: CDCEventCaptureOptions = {
806
- * maxBufferSize: 1000 // Auto-flush when buffer reaches 1000 events
807
- * }
808
- * ```
809
- */
810
- export interface CDCEventCaptureOptions {
811
- /**
812
- * Maximum number of events to buffer before auto-flushing.
813
- * Defaults to Infinity (no auto-flush).
814
- */
815
- maxBufferSize?: number;
816
- }
817
- /**
818
- * Callback function for git operation events.
819
- *
820
- * @param event - The captured CDC event
821
- */
822
- export type GitOperationListener = (event: CDCEvent) => void;
823
- /**
824
- * Captures git operations and converts them to CDC events.
825
- *
826
- * @description
827
- * CDCEventCapture hooks into git operations and generates CDCEvents for each
828
- * operation. It maintains an internal buffer of events that can be flushed
829
- * manually or automatically when the buffer reaches a configured size.
830
- *
831
- * **Supported Operations:**
832
- * - Object creation/deletion (blobs, trees, commits, tags)
833
- * - Reference updates (branches, tags)
834
- * - Commit creation
835
- * - Pack reception
836
- * - Branch creation/deletion
837
- * - Tag creation
838
- * - Merge completion
839
- *
840
- * **Event Ordering:**
841
- * Events are assigned monotonically increasing sequence numbers within a
842
- * capture session. This ensures proper ordering for replay and analytics.
843
- *
844
- * @example
845
- * ```typescript
846
- * const capture = new CDCEventCapture({ maxBufferSize: 100 })
847
- *
848
- * // Add a listener for real-time processing
849
- * capture.addListener((event) => {
850
- * console.log(`Event: ${event.type} - ${event.id}`)
851
- * })
852
- *
853
- * // Capture git operations
854
- * await capture.onCommitCreated('abc123...', 'tree456...', ['parent789...'])
855
- * await capture.onRefUpdate('refs/heads/main', 'old...', 'new...')
856
- *
857
- * // Get buffered events
858
- * console.log(`Buffer size: ${capture.getBufferSize()}`)
859
- *
860
- * // Flush buffer
861
- * const events = await capture.flush()
862
- * console.log(`Flushed ${events.length} events`)
863
- * ```
864
- *
865
- * @class CDCEventCapture
866
- */
867
- export declare class CDCEventCapture {
868
- /**
869
- * Buffer of captured events.
870
- * @private
871
- */
872
- private events;
873
- /**
874
- * Monotonically increasing sequence counter.
875
- * @private
876
- */
877
- private sequenceCounter;
878
- /**
879
- * Registered event listeners.
880
- * @private
881
- */
882
- private listeners;
883
- /**
884
- * Maximum buffer size before auto-flush.
885
- * @private
886
- */
887
- private readonly maxBufferSize;
888
- /**
889
- * Creates a new CDC event capture instance.
890
- *
891
- * @param options - Configuration options
892
- */
893
- constructor(options?: CDCEventCaptureOptions);
894
- /**
895
- * Generates a unique event ID.
896
- * @private
897
- */
898
- private generateEventId;
899
- /**
900
- * Emits an event to the buffer and notifies listeners.
901
- * @private
902
- */
903
- private emitEvent;
904
- /**
905
- * Returns the next sequence number.
906
- * @private
907
- */
908
- private nextSequence;
909
- /**
910
- * Captures an object put (creation) operation.
911
- *
912
- * @description
913
- * Called when a git object (blob, tree, commit, tag) is written to storage.
914
- *
915
- * @param sha - SHA-1 hash of the object
916
- * @param type - Object type (blob, tree, commit, tag)
917
- * @param data - Raw object data
918
- *
919
- * @example
920
- * ```typescript
921
- * await capture.onObjectPut('abc123...', 'blob', blobData)
922
- * ```
923
- */
924
- onObjectPut(sha: string, type: string, data: Uint8Array): Promise<void>;
925
- /**
926
- * Captures an object deletion operation.
927
- *
928
- * @description
929
- * Called when a git object is deleted, typically during garbage collection.
930
- *
931
- * @param sha - SHA-1 hash of the deleted object
932
- *
933
- * @example
934
- * ```typescript
935
- * await capture.onObjectDelete('abc123...')
936
- * ```
937
- */
938
- onObjectDelete(sha: string): Promise<void>;
939
- /**
940
- * Captures a reference update operation.
941
- *
942
- * @description
943
- * Called when a git reference (branch, tag) is updated to point to a new commit.
944
- *
945
- * @param refName - Full reference name (e.g., 'refs/heads/main')
946
- * @param oldSha - Previous SHA (all zeros for new refs)
947
- * @param newSha - New SHA (all zeros for deleted refs)
948
- *
949
- * @example
950
- * ```typescript
951
- * await capture.onRefUpdate(
952
- * 'refs/heads/main',
953
- * 'oldcommit123...',
954
- * 'newcommit456...'
955
- * )
956
- * ```
957
- */
958
- onRefUpdate(refName: string, oldSha: string, newSha: string): Promise<void>;
959
- /**
960
- * Captures a commit creation operation.
961
- *
962
- * @description
963
- * Called when a new commit object is created.
964
- *
965
- * @param commitSha - SHA-1 hash of the commit
966
- * @param treeSha - SHA-1 hash of the tree the commit points to
967
- * @param parentShas - Array of parent commit SHAs
968
- *
969
- * @example
970
- * ```typescript
971
- * await capture.onCommitCreated(
972
- * 'commitabc123...',
973
- * 'treedef456...',
974
- * ['parent1...', 'parent2...']
975
- * )
976
- * ```
977
- */
978
- onCommitCreated(commitSha: string, treeSha: string, parentShas: string[]): Promise<void>;
979
- /**
980
- * Captures a pack reception operation.
981
- *
982
- * @description
983
- * Called when a packfile is received during a push or fetch operation.
984
- *
985
- * @param packData - Raw packfile data
986
- * @param objectCount - Number of objects in the pack
987
- *
988
- * @example
989
- * ```typescript
990
- * await capture.onPackReceived(packBuffer, 42)
991
- * ```
992
- */
993
- onPackReceived(packData: Uint8Array, objectCount: number): Promise<void>;
994
- /**
995
- * Captures a branch creation operation.
996
- *
997
- * @param branchName - Name of the branch (without refs/heads/ prefix)
998
- * @param sha - SHA-1 hash the branch points to
999
- *
1000
- * @example
1001
- * ```typescript
1002
- * await capture.onBranchCreated('feature-x', 'abc123...')
1003
- * ```
1004
- */
1005
- onBranchCreated(branchName: string, sha: string): Promise<void>;
1006
- /**
1007
- * Captures a branch deletion operation.
1008
- *
1009
- * @param branchName - Name of the deleted branch
1010
- *
1011
- * @example
1012
- * ```typescript
1013
- * await capture.onBranchDeleted('feature-x')
1014
- * ```
1015
- */
1016
- onBranchDeleted(branchName: string): Promise<void>;
1017
- /**
1018
- * Captures a tag creation operation.
1019
- *
1020
- * @param tagName - Name of the tag
1021
- * @param sha - SHA-1 hash the tag points to
1022
- *
1023
- * @example
1024
- * ```typescript
1025
- * await capture.onTagCreated('v1.0.0', 'abc123...')
1026
- * ```
1027
- */
1028
- onTagCreated(tagName: string, sha: string): Promise<void>;
1029
- /**
1030
- * Captures a merge completion operation.
1031
- *
1032
- * @param mergeSha - SHA-1 hash of the merge commit
1033
- * @param baseSha - SHA-1 hash of the base commit
1034
- * @param headSha - SHA-1 hash of the head commit being merged
1035
- *
1036
- * @example
1037
- * ```typescript
1038
- * await capture.onMergeCompleted('merge123...', 'base456...', 'head789...')
1039
- * ```
1040
- */
1041
- onMergeCompleted(mergeSha: string, baseSha: string, headSha: string): Promise<void>;
1042
- /**
1043
- * Returns a copy of all buffered events.
1044
- *
1045
- * @returns Array of buffered events
1046
- */
1047
- getEvents(): CDCEvent[];
1048
- /**
1049
- * Returns the current buffer size.
1050
- *
1051
- * @returns Number of events in the buffer
1052
- */
1053
- getBufferSize(): number;
1054
- /**
1055
- * Flushes all buffered events.
1056
- *
1057
- * @description
1058
- * Returns and clears all events from the buffer. The returned events
1059
- * can be processed, serialized, or forwarded to downstream systems.
1060
- *
1061
- * @returns Array of flushed events
1062
- *
1063
- * @example
1064
- * ```typescript
1065
- * const events = await capture.flush()
1066
- * console.log(`Flushed ${events.length} events`)
1067
- * await sendToAnalytics(events)
1068
- * ```
1069
- */
1070
- flush(): Promise<CDCEvent[]>;
1071
- /**
1072
- * Adds an event listener.
1073
- *
1074
- * @description
1075
- * Listeners are called synchronously for each event as it is captured.
1076
- *
1077
- * @param listener - Callback function to invoke for each event
1078
- *
1079
- * @example
1080
- * ```typescript
1081
- * capture.addListener((event) => {
1082
- * console.log(`New event: ${event.type}`)
1083
- * })
1084
- * ```
1085
- */
1086
- addListener(listener: GitOperationListener): void;
1087
- /**
1088
- * Removes an event listener.
1089
- *
1090
- * @param listener - The listener to remove
1091
- */
1092
- removeListener(listener: GitOperationListener): void;
1093
- }
1094
- /**
1095
- * Parquet schema definition for CDC events.
1096
- *
1097
- * @description
1098
- * Defines the column structure for CDC event Parquet files. The default
1099
- * schema includes standard CDC event fields and can be extended with
1100
- * custom fields for domain-specific data.
1101
- *
1102
- * @example
1103
- * ```typescript
1104
- * // Create default schema
1105
- * const schema = ParquetSchema.forCDCEvents()
1106
- *
1107
- * // Create schema with custom fields
1108
- * const customSchema = ParquetSchema.forCDCEvents([
1109
- * { name: 'repository_id', type: 'STRING', nullable: false },
1110
- * { name: 'user_id', type: 'STRING', nullable: true }
1111
- * ])
1112
- * ```
1113
- *
1114
- * @class ParquetSchema
1115
- */
1116
- export declare class ParquetSchema {
1117
- readonly fields: ParquetField[];
1118
- /**
1119
- * Creates a new ParquetSchema.
1120
- *
1121
- * @param fields - Array of field definitions
1122
- */
1123
- constructor(fields: ParquetField[]);
1124
- /**
1125
- * Creates a schema for CDC events with optional custom fields.
1126
- *
1127
- * @description
1128
- * Returns a schema with the standard CDC event fields. Additional
1129
- * custom fields can be appended for domain-specific data.
1130
- *
1131
- * @param customFields - Optional additional fields to add
1132
- * @returns A new ParquetSchema instance
1133
- *
1134
- * @example
1135
- * ```typescript
1136
- * const schema = ParquetSchema.forCDCEvents()
1137
- * // Schema includes: event_id, event_type, source, timestamp,
1138
- * // sequence, version, payload_json, sha
1139
- * ```
1140
- */
1141
- static forCDCEvents(customFields?: ParquetField[]): ParquetSchema;
1142
- }
1143
- /**
1144
- * Configuration options for the Parquet transformer.
1145
- */
1146
- export interface ParquetTransformerOptions {
1147
- /**
1148
- * Compression algorithm to use.
1149
- * @default 'snappy'
1150
- */
1151
- compression?: 'snappy' | 'gzip' | 'none';
1152
- }
1153
- /**
1154
- * Transforms CDC events to Parquet format.
1155
- *
1156
- * @description
1157
- * ParquetTransformer converts CDC events to Parquet-compatible rows and
1158
- * serializes batches of events to Parquet file format. It handles:
1159
- *
1160
- * - Event to row conversion (flattening the event structure)
1161
- * - JSON serialization of complex payloads
1162
- * - Batch creation with schema and metadata
1163
- * - Parquet file generation with compression
1164
- *
1165
- * @example
1166
- * ```typescript
1167
- * const transformer = new ParquetTransformer({ compression: 'snappy' })
1168
- *
1169
- * // Transform single event to row
1170
- * const row = transformer.eventToRow(event)
1171
- *
1172
- * // Transform batch of events
1173
- * const batch = transformer.eventsToBatch(events)
1174
- *
1175
- * // Generate Parquet file
1176
- * const buffer = await transformer.toParquetBuffer(batch)
1177
- * await r2.put('events.parquet', buffer)
1178
- * ```
1179
- *
1180
- * @class ParquetTransformer
1181
- */
1182
- export declare class ParquetTransformer {
1183
- /**
1184
- * Compression algorithm to use.
1185
- * @private
1186
- */
1187
- private readonly compression;
1188
- /**
1189
- * Creates a new ParquetTransformer.
1190
- *
1191
- * @param options - Transformer configuration
1192
- */
1193
- constructor(options?: ParquetTransformerOptions);
1194
- /**
1195
- * Converts a CDC event to a Parquet row.
1196
- *
1197
- * @description
1198
- * Flattens the event structure and serializes the payload to JSON
1199
- * for storage in Parquet format.
1200
- *
1201
- * @param event - The CDC event to convert
1202
- * @returns A Parquet row representation
1203
- *
1204
- * @example
1205
- * ```typescript
1206
- * const row = transformer.eventToRow(event)
1207
- * console.log(row.event_id, row.event_type, row.sha)
1208
- * ```
1209
- */
1210
- eventToRow(event: CDCEvent): ParquetRow;
1211
- /**
1212
- * Converts multiple CDC events to a Parquet batch.
1213
- *
1214
- * @description
1215
- * Transforms an array of events into a ParquetBatch structure
1216
- * ready for serialization to Parquet format.
1217
- *
1218
- * @param events - Array of CDC events to batch
1219
- * @returns A ParquetBatch ready for serialization
1220
- *
1221
- * @example
1222
- * ```typescript
1223
- * const batch = transformer.eventsToBatch(events)
1224
- * console.log(`Batch has ${batch.rowCount} rows`)
1225
- * ```
1226
- */
1227
- eventsToBatch(events: CDCEvent[]): ParquetBatch;
1228
- /**
1229
- * Serializes a ParquetBatch to a Parquet file buffer.
1230
- *
1231
- * @description
1232
- * Generates a Parquet-format file from the batch data. The output
1233
- * includes PAR1 magic bytes, compressed data, and footer metadata.
1234
- *
1235
- * @param batch - The ParquetBatch to serialize
1236
- * @returns Promise resolving to Parquet file as Uint8Array
1237
- *
1238
- * @example
1239
- * ```typescript
1240
- * const buffer = await transformer.toParquetBuffer(batch)
1241
- * await r2.put('events.parquet', buffer)
1242
- * ```
1243
- */
1244
- toParquetBuffer(batch: ParquetBatch): Promise<Uint8Array>;
1245
- private gzipCompress;
1246
- private simpleCompress;
1247
- }
1248
- /**
1249
- * Callback function for batch processing.
1250
- *
1251
- * @param batch - The batch result containing events and metadata
1252
- * @returns void or a Promise that resolves when processing is complete
1253
- */
1254
- type BatchHandler = (batch: BatchResult) => void | Promise<void>;
1255
- /**
1256
- * Batches CDC events for efficient processing.
1257
- *
1258
- * @description
1259
- * CDCBatcher collects CDC events and groups them into batches based on
1260
- * count or time thresholds. This enables efficient downstream processing
1261
- * by reducing the number of I/O operations and enabling bulk operations.
1262
- *
1263
- * **Batching Strategies:**
1264
- * - **Count-based**: Flush when batch reaches `batchSize` events
1265
- * - **Time-based**: Flush after `flushIntervalMs` even if batch is not full
1266
- *
1267
- * **Features:**
1268
- * - Async batch handlers for non-blocking processing
1269
- * - Multiple handlers for parallel processing pipelines
1270
- * - Graceful stop with pending event flush
1271
- * - Batch metadata (sequences, timestamps) for tracking
1272
- *
1273
- * @example
1274
- * ```typescript
1275
- * const batcher = new CDCBatcher({
1276
- * batchSize: 100,
1277
- * flushIntervalMs: 5000
1278
- * })
1279
- *
1280
- * // Register batch handler
1281
- * batcher.onBatch(async (batch) => {
1282
- * console.log(`Processing ${batch.eventCount} events`)
1283
- * console.log(`Sequence range: ${batch.minSequence} - ${batch.maxSequence}`)
1284
- * await saveToStorage(batch.events)
1285
- * })
1286
- *
1287
- * // Add events
1288
- * await batcher.add(event1)
1289
- * await batcher.add(event2)
1290
- *
1291
- * // Check pending events
1292
- * console.log(`Pending: ${batcher.getPendingCount()}`)
1293
- *
1294
- * // Manual flush
1295
- * const result = await batcher.flush()
1296
- *
1297
- * // Stop the batcher
1298
- * await batcher.stop()
1299
- * ```
1300
- *
1301
- * @class CDCBatcher
1302
- */
1303
- export declare class CDCBatcher {
1304
- /**
1305
- * Batch configuration.
1306
- * @private
1307
- */
1308
- private readonly config;
1309
- /**
1310
- * Buffer of pending events.
1311
- * @private
1312
- */
1313
- private events;
1314
- /**
1315
- * Registered batch handlers.
1316
- * @private
1317
- */
1318
- private batchHandlers;
1319
- /**
1320
- * Timer for time-based flushing.
1321
- * @private
1322
- */
1323
- private flushTimer;
1324
- /**
1325
- * Whether the batcher has been stopped.
1326
- * @private
1327
- */
1328
- private stopped;
1329
- /**
1330
- * Creates a new CDCBatcher.
1331
- *
1332
- * @param config - Batch configuration
1333
- */
1334
- constructor(config: BatchConfig);
1335
- private ensureTimerRunning;
1336
- private clearFlushTimer;
1337
- /**
1338
- * Adds an event to the batch.
1339
- *
1340
- * @description
1341
- * Adds the event to the pending batch. If the batch reaches the
1342
- * configured size, it is automatically flushed. The flush timer
1343
- * is started/restarted as needed.
1344
- *
1345
- * @param event - The CDC event to add
1346
- *
1347
- * @example
1348
- * ```typescript
1349
- * await batcher.add(event)
1350
- * ```
1351
- */
1352
- add(event: CDCEvent): Promise<void>;
1353
- /**
1354
- * Internal flush implementation.
1355
- * @private
1356
- */
1357
- private flushInternal;
1358
- /**
1359
- * Manually flushes pending events.
1360
- *
1361
- * @description
1362
- * Forces an immediate flush of all pending events, regardless of
1363
- * batch size or timer. Clears the flush timer.
1364
- *
1365
- * @returns Promise resolving to the batch result
1366
- *
1367
- * @example
1368
- * ```typescript
1369
- * const result = await batcher.flush()
1370
- * console.log(`Flushed ${result.eventCount} events`)
1371
- * ```
1372
- */
1373
- flush(): Promise<BatchResult>;
1374
- /**
1375
- * Returns the number of pending events.
1376
- *
1377
- * @returns Number of events waiting to be flushed
1378
- */
1379
- getPendingCount(): number;
1380
- /**
1381
- * Registers a batch handler.
1382
- *
1383
- * @description
1384
- * Handlers are called when a batch is flushed (automatically or manually).
1385
- * Multiple handlers can be registered for parallel processing.
1386
- *
1387
- * @param handler - Callback function to invoke for each batch
1388
- *
1389
- * @example
1390
- * ```typescript
1391
- * batcher.onBatch(async (batch) => {
1392
- * await saveToStorage(batch.events)
1393
- * })
1394
- * ```
1395
- */
1396
- onBatch(handler: BatchHandler): void;
1397
- /**
1398
- * Stops the batcher.
1399
- *
1400
- * @description
1401
- * Stops the flush timer and prevents further processing.
1402
- * Does NOT automatically flush pending events - call flush() first
1403
- * if you need to process remaining events.
1404
- *
1405
- * @example
1406
- * ```typescript
1407
- * await batcher.flush() // Process remaining events
1408
- * await batcher.stop() // Stop the timer
1409
- * ```
1410
- */
1411
- stop(): Promise<void>;
1412
- }
1413
- /**
1414
- * Callback for successful batch output.
1415
- *
1416
- * @param output - The pipeline output containing Parquet data
1417
- */
1418
- type OutputHandler = (output: PipelineOutput) => void;
1419
- /**
1420
- * Callback for failed events sent to dead letter queue.
1421
- *
1422
- * @param events - Array of failed events
1423
- * @param error - The error that caused the failure
1424
- */
1425
- type DeadLetterHandler = (events: CDCEvent[], error: Error) => void;
1426
- /**
1427
- * Main CDC Pipeline for processing git operation events.
1428
- *
1429
- * @description
1430
- * CDCPipeline orchestrates the complete change data capture flow from
1431
- * event ingestion to Parquet output. It integrates batching, transformation,
1432
- * retry handling, and dead letter queue management.
1433
- *
1434
- * **Pipeline Flow:**
1435
- * 1. Events are submitted via `process()` or `processMany()`
1436
- * 2. Events are validated and added to the batcher
1437
- * 3. When a batch is ready, it's transformed to Parquet format
1438
- * 4. On success, output handlers are notified
1439
- * 5. On failure, retries are attempted with exponential backoff
1440
- * 6. After max retries, events go to dead letter queue
1441
- *
1442
- * **Features:**
1443
- * - Configurable batch size and flush interval
1444
- * - Automatic retry with exponential backoff
1445
- * - Dead letter queue for failed events
1446
- * - Real-time metrics for monitoring
1447
- * - Graceful shutdown with pending event flush
1448
- *
1449
- * @example
1450
- * ```typescript
1451
- * const pipeline = new CDCPipeline({
1452
- * batchSize: 100,
1453
- * flushIntervalMs: 5000,
1454
- * maxRetries: 3,
1455
- * parquetCompression: 'snappy',
1456
- * outputPath: '/analytics',
1457
- * schemaVersion: 1
1458
- * })
1459
- *
1460
- * // Register handlers
1461
- * pipeline.onOutput(async (output) => {
1462
- * await r2.put(`cdc/${output.batchId}.parquet`, output.parquetBuffer)
1463
- * })
1464
- *
1465
- * pipeline.onDeadLetter((events, error) => {
1466
- * console.error(`Failed ${events.length} events:`, error)
1467
- * })
1468
- *
1469
- * // Start the pipeline
1470
- * await pipeline.start()
1471
- *
1472
- * // Process events
1473
- * await pipeline.process(event)
1474
- *
1475
- * // Check metrics
1476
- * const metrics = pipeline.getMetrics()
1477
- *
1478
- * // Stop gracefully
1479
- * const result = await pipeline.stop()
1480
- * console.log(`Flushed ${result.flushedCount} events on shutdown`)
1481
- * ```
1482
- *
1483
- * @class CDCPipeline
1484
- */
1485
- export declare class CDCPipeline {
1486
- /**
1487
- * Pipeline configuration.
1488
- * @private
1489
- */
1490
- private readonly config;
1491
- /**
1492
- * Current pipeline state.
1493
- * @private
1494
- */
1495
- private state;
1496
- /**
1497
- * Event batcher instance.
1498
- * @private
1499
- */
1500
- private batcher;
1501
- /**
1502
- * Parquet transformer instance.
1503
- * @private
1504
- */
1505
- private transformer;
1506
- /**
1507
- * Registered output handlers.
1508
- * @private
1509
- */
1510
- private outputHandlers;
1511
- /**
1512
- * Registered dead letter handlers.
1513
- * @private
1514
- */
1515
- private deadLetterHandlers;
1516
- /**
1517
- * Pipeline metrics.
1518
- * @private
1519
- */
1520
- private metrics;
1521
- /**
1522
- * Processing latency samples.
1523
- * @private
1524
- */
1525
- private processingLatencies;
1526
- /**
1527
- * Retry policy instance.
1528
- * @private
1529
- */
1530
- private retryPolicy;
1531
- /**
1532
- * Creates a new CDCPipeline.
1533
- *
1534
- * @param config - Pipeline configuration
1535
- */
1536
- constructor(config: CDCPipelineConfig);
1537
- /**
1538
- * Returns the current pipeline state.
1539
- *
1540
- * @returns Current state ('stopped', 'running', or 'paused')
1541
- */
1542
- getState(): CDCPipelineState;
1543
- /**
1544
- * Starts the pipeline.
1545
- *
1546
- * @description
1547
- * Initializes the batcher and begins accepting events. If already
1548
- * running, this method is a no-op.
1549
- *
1550
- * @example
1551
- * ```typescript
1552
- * await pipeline.start()
1553
- * console.log(pipeline.getState()) // 'running'
1554
- * ```
1555
- */
1556
- start(): Promise<void>;
1557
- /**
1558
- * Stops the pipeline.
1559
- *
1560
- * @description
1561
- * Flushes any pending events, stops the batcher, and sets state to stopped.
1562
- * Returns information about events flushed during shutdown.
1563
- *
1564
- * @returns Promise resolving to stop result with flushed event count
1565
- *
1566
- * @example
1567
- * ```typescript
1568
- * const result = await pipeline.stop()
1569
- * console.log(`Flushed ${result.flushedCount} events on shutdown`)
1570
- * ```
1571
- */
1572
- stop(): Promise<StopResult>;
1573
- /**
1574
- * Processes a single event.
1575
- *
1576
- * @description
1577
- * Validates the event and adds it to the batcher for processing.
1578
- * Updates metrics including latency tracking.
1579
- *
1580
- * @param event - The CDC event to process
1581
- * @returns Promise resolving to process result
1582
- *
1583
- * @throws {CDCError} PROCESSING_ERROR - If pipeline is not running
1584
- * @throws {CDCError} VALIDATION_ERROR - If event fails validation
1585
- *
1586
- * @example
1587
- * ```typescript
1588
- * const result = await pipeline.process(event)
1589
- * if (result.success) {
1590
- * console.log(`Processed event: ${result.eventId}`)
1591
- * }
1592
- * ```
1593
- */
1594
- process(event: CDCEvent): Promise<ProcessResult>;
1595
- /**
1596
- * Processes multiple events.
1597
- *
1598
- * @description
1599
- * Convenience method to process an array of events sequentially.
1600
- *
1601
- * @param events - Array of CDC events to process
1602
- * @returns Promise resolving to array of process results
1603
- *
1604
- * @example
1605
- * ```typescript
1606
- * const results = await pipeline.processMany(events)
1607
- * const successCount = results.filter(r => r.success).length
1608
- * console.log(`Processed ${successCount}/${events.length} events`)
1609
- * ```
1610
- */
1611
- processMany(events: CDCEvent[]): Promise<ProcessResult[]>;
1612
- /**
1613
- * Manually flushes pending events.
1614
- *
1615
- * @description
1616
- * Forces an immediate flush of the batcher and processes the
1617
- * resulting batch through the pipeline.
1618
- *
1619
- * @example
1620
- * ```typescript
1621
- * await pipeline.flush()
1622
- * console.log('All pending events flushed')
1623
- * ```
1624
- */
1625
- flush(): Promise<void>;
1626
- /**
1627
- * Handles a batch of events with retry logic.
1628
- * @private
1629
- */
1630
- private handleBatch;
1631
- /**
1632
- * Sleeps for the specified duration.
1633
- * @private
1634
- */
1635
- private sleep;
1636
- /**
1637
- * Updates the average latency metric.
1638
- * @private
1639
- */
1640
- private updateAvgLatency;
1641
- /**
1642
- * Returns current pipeline metrics.
1643
- *
1644
- * @description
1645
- * Returns a copy of the current metrics. Metrics are cumulative
1646
- * since pipeline creation.
1647
- *
1648
- * @returns Copy of current pipeline metrics
1649
- *
1650
- * @example
1651
- * ```typescript
1652
- * const metrics = pipeline.getMetrics()
1653
- * console.log(`Processed: ${metrics.eventsProcessed}`)
1654
- * console.log(`Batches: ${metrics.batchesGenerated}`)
1655
- * console.log(`Errors: ${metrics.errors}`)
1656
- * console.log(`Avg latency: ${metrics.avgProcessingLatencyMs}ms`)
1657
- * ```
1658
- */
1659
- getMetrics(): PipelineMetrics;
1660
- /**
1661
- * Registers an output handler.
1662
- *
1663
- * @description
1664
- * Output handlers are called when a batch is successfully processed
1665
- * and converted to Parquet format. Multiple handlers can be registered.
1666
- *
1667
- * @param handler - Callback to invoke for each successful batch
1668
- *
1669
- * @example
1670
- * ```typescript
1671
- * pipeline.onOutput(async (output) => {
1672
- * await r2.put(`cdc/${output.batchId}.parquet`, output.parquetBuffer)
1673
- * console.log(`Wrote ${output.events.length} events`)
1674
- * })
1675
- * ```
1676
- */
1677
- onOutput(handler: OutputHandler): void;
1678
- /**
1679
- * Registers a dead letter handler.
1680
- *
1681
- * @description
1682
- * Dead letter handlers are called when a batch fails after all
1683
- * retry attempts are exhausted. Use this for alerting, logging,
1684
- * or storing failed events for later reprocessing.
1685
- *
1686
- * @param handler - Callback to invoke for failed events
1687
- *
1688
- * @example
1689
- * ```typescript
1690
- * pipeline.onDeadLetter((events, error) => {
1691
- * console.error(`Failed to process ${events.length} events:`, error)
1692
- * // Store in dead letter queue for later retry
1693
- * await dlq.put(events)
1694
- * })
1695
- * ```
1696
- */
1697
- onDeadLetter(handler: DeadLetterHandler): void;
1698
- }
1699
- /**
1700
- * Creates a new CDC event.
1701
- *
1702
- * @description
1703
- * Factory function to create a properly structured CDC event with
1704
- * automatically generated ID and timestamp.
1705
- *
1706
- * @param type - The event type
1707
- * @param source - The event source
1708
- * @param payload - Event payload data
1709
- * @param options - Optional configuration
1710
- * @param options.sequence - Custom sequence number (default: 0)
1711
- * @returns A new CDCEvent
1712
- *
1713
- * @example
1714
- * ```typescript
1715
- * const event = createCDCEvent('COMMIT_CREATED', 'push', {
1716
- * operation: 'commit-create',
1717
- * sha: 'abc123...',
1718
- * treeSha: 'def456...',
1719
- * parentShas: ['parent1...']
1720
- * })
1721
- *
1722
- * // With sequence number
1723
- * const sequencedEvent = createCDCEvent('REF_UPDATED', 'push', {
1724
- * operation: 'ref-update',
1725
- * refName: 'refs/heads/main',
1726
- * oldSha: 'old...',
1727
- * newSha: 'new...'
1728
- * }, { sequence: 42 })
1729
- * ```
1730
- */
1731
- export declare function createCDCEvent(type: CDCEventType, source: CDCEventSource, payload: CDCEventPayload, options?: {
1732
- sequence?: number;
1733
- }): CDCEvent;
1734
- /**
1735
- * Serializes a CDC event to bytes.
1736
- *
1737
- * @description
1738
- * Converts a CDCEvent to a JSON-encoded Uint8Array for storage or
1739
- * transmission. Handles Uint8Array payload data by converting to arrays.
1740
- *
1741
- * @param event - The CDC event to serialize
1742
- * @returns The serialized event as a Uint8Array
1743
- *
1744
- * @example
1745
- * ```typescript
1746
- * const bytes = serializeEvent(event)
1747
- * await r2.put(`events/${event.id}`, bytes)
1748
- * ```
1749
- *
1750
- * @see {@link deserializeEvent} - Reverse operation
1751
- */
1752
- export declare function serializeEvent(event: CDCEvent): Uint8Array;
1753
- /**
1754
- * Deserializes bytes to a CDC event.
1755
- *
1756
- * @description
1757
- * Reconstructs a CDCEvent from JSON-encoded bytes. Handles Uint8Array
1758
- * restoration for payload data that was converted to arrays during
1759
- * serialization.
1760
- *
1761
- * @param bytes - The serialized event bytes
1762
- * @returns The deserialized CDCEvent
1763
- *
1764
- * @example
1765
- * ```typescript
1766
- * const bytes = await r2.get(`events/${eventId}`)
1767
- * const event = deserializeEvent(bytes)
1768
- * console.log(`Event type: ${event.type}`)
1769
- * ```
1770
- *
1771
- * @see {@link serializeEvent} - Reverse operation
1772
- */
1773
- export declare function deserializeEvent(bytes: Uint8Array): CDCEvent;
1774
- /**
1775
- * Validates a CDC event.
1776
- *
1777
- * @description
1778
- * Checks that an event has all required fields and valid values.
1779
- * Throws a CDCError if validation fails.
1780
- *
1781
- * **Validation Rules:**
1782
- * - Event must not be null/undefined
1783
- * - Event ID must be a non-empty string
1784
- * - Event type must be a valid CDCEventType
1785
- * - Timestamp must be a non-negative number
1786
- * - Sequence must be a non-negative number
1787
- *
1788
- * @param event - The CDC event to validate
1789
- * @returns The validated event (for chaining)
1790
- *
1791
- * @throws {CDCError} VALIDATION_ERROR - If validation fails
1792
- *
1793
- * @example
1794
- * ```typescript
1795
- * try {
1796
- * validateCDCEvent(event)
1797
- * // Event is valid
1798
- * } catch (error) {
1799
- * if (error instanceof CDCError) {
1800
- * console.log(`Invalid: ${error.message}`)
1801
- * }
1802
- * }
1803
- * ```
1804
- */
1805
- export declare function validateCDCEvent(event: CDCEvent): CDCEvent;
1806
- /**
1807
- * Starts a new pipeline with the given configuration.
1808
- *
1809
- * @description
1810
- * Creates and starts a new CDCPipeline, registering it by ID for
1811
- * later access. If a pipeline with the same ID already exists,
1812
- * it will be replaced (the old pipeline is not automatically stopped).
1813
- *
1814
- * @param id - Unique identifier for the pipeline
1815
- * @param config - Pipeline configuration
1816
- * @returns The started pipeline instance
1817
- *
1818
- * @example
1819
- * ```typescript
1820
- * const pipeline = startPipeline('main', {
1821
- * batchSize: 100,
1822
- * flushIntervalMs: 5000,
1823
- * maxRetries: 3,
1824
- * parquetCompression: 'snappy',
1825
- * outputPath: '/analytics',
1826
- * schemaVersion: 1
1827
- * })
1828
- *
1829
- * // Register handlers
1830
- * pipeline.onOutput((output) => console.log(`Batch: ${output.batchId}`))
1831
- * ```
1832
- */
1833
- export declare function startPipeline(id: string, config: CDCPipelineConfig): CDCPipeline;
1834
- /**
1835
- * Stops a pipeline by ID.
1836
- *
1837
- * @description
1838
- * Stops the pipeline identified by the given ID, flushing any pending
1839
- * events and removing it from the registry.
1840
- *
1841
- * @param id - Pipeline identifier
1842
- * @returns Promise resolving to stop result (0 if pipeline not found)
1843
- *
1844
- * @example
1845
- * ```typescript
1846
- * const result = await stopPipeline('main')
1847
- * console.log(`Flushed ${result.flushedCount} events on shutdown`)
1848
- * ```
1849
- */
1850
- export declare function stopPipeline(id: string): Promise<StopResult>;
1851
- /**
1852
- * Flushes a pipeline by ID.
1853
- *
1854
- * @description
1855
- * Forces an immediate flush of all pending events in the pipeline.
1856
- * No-op if pipeline not found.
1857
- *
1858
- * @param id - Pipeline identifier
1859
- *
1860
- * @example
1861
- * ```typescript
1862
- * await flushPipeline('main')
1863
- * console.log('All pending events flushed')
1864
- * ```
1865
- */
1866
- export declare function flushPipeline(id: string): Promise<void>;
1867
- /**
1868
- * Gets metrics for a pipeline by ID.
1869
- *
1870
- * @description
1871
- * Returns a copy of the current metrics for the specified pipeline.
1872
- * Returns null if the pipeline is not found.
1873
- *
1874
- * @param id - Pipeline identifier
1875
- * @returns Pipeline metrics or null if not found
1876
- *
1877
- * @example
1878
- * ```typescript
1879
- * const metrics = getPipelineMetrics('main')
1880
- * if (metrics) {
1881
- * console.log(`Events processed: ${metrics.eventsProcessed}`)
1882
- * console.log(`Errors: ${metrics.errors}`)
1883
- * }
1884
- * ```
1885
- */
1886
- export declare function getPipelineMetrics(id: string): PipelineMetrics | null;
1887
- export {};
1888
- //# sourceMappingURL=cdc-pipeline.d.ts.map