gitx.do 0.0.1 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (237) hide show
  1. package/dist/cli/commands/blame.d.ts +259 -0
  2. package/dist/cli/commands/blame.d.ts.map +1 -0
  3. package/dist/cli/commands/blame.js +609 -0
  4. package/dist/cli/commands/blame.js.map +1 -0
  5. package/dist/cli/commands/branch.d.ts +249 -0
  6. package/dist/cli/commands/branch.d.ts.map +1 -0
  7. package/dist/cli/commands/branch.js +693 -0
  8. package/dist/cli/commands/branch.js.map +1 -0
  9. package/dist/cli/commands/commit.d.ts +182 -0
  10. package/dist/cli/commands/commit.d.ts.map +1 -0
  11. package/dist/cli/commands/commit.js +437 -0
  12. package/dist/cli/commands/commit.js.map +1 -0
  13. package/dist/cli/commands/diff.d.ts +464 -0
  14. package/dist/cli/commands/diff.d.ts.map +1 -0
  15. package/dist/cli/commands/diff.js +958 -0
  16. package/dist/cli/commands/diff.js.map +1 -0
  17. package/dist/cli/commands/log.d.ts +239 -0
  18. package/dist/cli/commands/log.d.ts.map +1 -0
  19. package/dist/cli/commands/log.js +535 -0
  20. package/dist/cli/commands/log.js.map +1 -0
  21. package/dist/cli/commands/review.d.ts +457 -0
  22. package/dist/cli/commands/review.d.ts.map +1 -0
  23. package/dist/cli/commands/review.js +533 -0
  24. package/dist/cli/commands/review.js.map +1 -0
  25. package/dist/cli/commands/status.d.ts +269 -0
  26. package/dist/cli/commands/status.d.ts.map +1 -0
  27. package/dist/cli/commands/status.js +493 -0
  28. package/dist/cli/commands/status.js.map +1 -0
  29. package/dist/cli/commands/web.d.ts +199 -0
  30. package/dist/cli/commands/web.d.ts.map +1 -0
  31. package/dist/cli/commands/web.js +696 -0
  32. package/dist/cli/commands/web.js.map +1 -0
  33. package/dist/cli/fs-adapter.d.ts +656 -0
  34. package/dist/cli/fs-adapter.d.ts.map +1 -0
  35. package/dist/cli/fs-adapter.js +1179 -0
  36. package/dist/cli/fs-adapter.js.map +1 -0
  37. package/dist/cli/index.d.ts +387 -0
  38. package/dist/cli/index.d.ts.map +1 -0
  39. package/dist/cli/index.js +523 -0
  40. package/dist/cli/index.js.map +1 -0
  41. package/dist/cli/ui/components/DiffView.d.ts +7 -0
  42. package/dist/cli/ui/components/DiffView.d.ts.map +1 -0
  43. package/dist/cli/ui/components/DiffView.js +11 -0
  44. package/dist/cli/ui/components/DiffView.js.map +1 -0
  45. package/dist/cli/ui/components/ErrorDisplay.d.ts +6 -0
  46. package/dist/cli/ui/components/ErrorDisplay.d.ts.map +1 -0
  47. package/dist/cli/ui/components/ErrorDisplay.js +11 -0
  48. package/dist/cli/ui/components/ErrorDisplay.js.map +1 -0
  49. package/dist/cli/ui/components/FuzzySearch.d.ts +9 -0
  50. package/dist/cli/ui/components/FuzzySearch.d.ts.map +1 -0
  51. package/dist/cli/ui/components/FuzzySearch.js +12 -0
  52. package/dist/cli/ui/components/FuzzySearch.js.map +1 -0
  53. package/dist/cli/ui/components/LoadingSpinner.d.ts +6 -0
  54. package/dist/cli/ui/components/LoadingSpinner.d.ts.map +1 -0
  55. package/dist/cli/ui/components/LoadingSpinner.js +10 -0
  56. package/dist/cli/ui/components/LoadingSpinner.js.map +1 -0
  57. package/dist/cli/ui/components/NavigationList.d.ts +9 -0
  58. package/dist/cli/ui/components/NavigationList.d.ts.map +1 -0
  59. package/dist/cli/ui/components/NavigationList.js +11 -0
  60. package/dist/cli/ui/components/NavigationList.js.map +1 -0
  61. package/dist/cli/ui/components/ScrollableContent.d.ts +8 -0
  62. package/dist/cli/ui/components/ScrollableContent.d.ts.map +1 -0
  63. package/dist/cli/ui/components/ScrollableContent.js +11 -0
  64. package/dist/cli/ui/components/ScrollableContent.js.map +1 -0
  65. package/dist/cli/ui/components/index.d.ts +7 -0
  66. package/dist/cli/ui/components/index.d.ts.map +1 -0
  67. package/dist/cli/ui/components/index.js +9 -0
  68. package/dist/cli/ui/components/index.js.map +1 -0
  69. package/dist/cli/ui/terminal-ui.d.ts +52 -0
  70. package/dist/cli/ui/terminal-ui.d.ts.map +1 -0
  71. package/dist/cli/ui/terminal-ui.js +121 -0
  72. package/dist/cli/ui/terminal-ui.js.map +1 -0
  73. package/dist/durable-object/object-store.d.ts +401 -23
  74. package/dist/durable-object/object-store.d.ts.map +1 -1
  75. package/dist/durable-object/object-store.js +414 -25
  76. package/dist/durable-object/object-store.js.map +1 -1
  77. package/dist/durable-object/schema.d.ts +188 -0
  78. package/dist/durable-object/schema.d.ts.map +1 -1
  79. package/dist/durable-object/schema.js +160 -0
  80. package/dist/durable-object/schema.js.map +1 -1
  81. package/dist/durable-object/wal.d.ts +336 -31
  82. package/dist/durable-object/wal.d.ts.map +1 -1
  83. package/dist/durable-object/wal.js +272 -27
  84. package/dist/durable-object/wal.js.map +1 -1
  85. package/dist/index.d.ts +379 -3
  86. package/dist/index.d.ts.map +1 -1
  87. package/dist/index.js +379 -7
  88. package/dist/index.js.map +1 -1
  89. package/dist/mcp/adapter.d.ts +579 -38
  90. package/dist/mcp/adapter.d.ts.map +1 -1
  91. package/dist/mcp/adapter.js +426 -33
  92. package/dist/mcp/adapter.js.map +1 -1
  93. package/dist/mcp/sandbox.d.ts +532 -29
  94. package/dist/mcp/sandbox.d.ts.map +1 -1
  95. package/dist/mcp/sandbox.js +389 -22
  96. package/dist/mcp/sandbox.js.map +1 -1
  97. package/dist/mcp/sdk-adapter.d.ts +478 -56
  98. package/dist/mcp/sdk-adapter.d.ts.map +1 -1
  99. package/dist/mcp/sdk-adapter.js +346 -44
  100. package/dist/mcp/sdk-adapter.js.map +1 -1
  101. package/dist/mcp/tools.d.ts +445 -30
  102. package/dist/mcp/tools.d.ts.map +1 -1
  103. package/dist/mcp/tools.js +363 -33
  104. package/dist/mcp/tools.js.map +1 -1
  105. package/dist/ops/blame.d.ts +424 -21
  106. package/dist/ops/blame.d.ts.map +1 -1
  107. package/dist/ops/blame.js +303 -20
  108. package/dist/ops/blame.js.map +1 -1
  109. package/dist/ops/branch.d.ts +583 -32
  110. package/dist/ops/branch.d.ts.map +1 -1
  111. package/dist/ops/branch.js +365 -23
  112. package/dist/ops/branch.js.map +1 -1
  113. package/dist/ops/commit-traversal.d.ts +164 -24
  114. package/dist/ops/commit-traversal.d.ts.map +1 -1
  115. package/dist/ops/commit-traversal.js +68 -2
  116. package/dist/ops/commit-traversal.js.map +1 -1
  117. package/dist/ops/commit.d.ts +387 -53
  118. package/dist/ops/commit.d.ts.map +1 -1
  119. package/dist/ops/commit.js +249 -29
  120. package/dist/ops/commit.js.map +1 -1
  121. package/dist/ops/merge-base.d.ts +195 -21
  122. package/dist/ops/merge-base.d.ts.map +1 -1
  123. package/dist/ops/merge-base.js +122 -12
  124. package/dist/ops/merge-base.js.map +1 -1
  125. package/dist/ops/merge.d.ts +600 -130
  126. package/dist/ops/merge.d.ts.map +1 -1
  127. package/dist/ops/merge.js +408 -60
  128. package/dist/ops/merge.js.map +1 -1
  129. package/dist/ops/tag.d.ts +67 -2
  130. package/dist/ops/tag.d.ts.map +1 -1
  131. package/dist/ops/tag.js +42 -1
  132. package/dist/ops/tag.js.map +1 -1
  133. package/dist/ops/tree-builder.d.ts +102 -6
  134. package/dist/ops/tree-builder.d.ts.map +1 -1
  135. package/dist/ops/tree-builder.js +30 -5
  136. package/dist/ops/tree-builder.js.map +1 -1
  137. package/dist/ops/tree-diff.d.ts +50 -2
  138. package/dist/ops/tree-diff.d.ts.map +1 -1
  139. package/dist/ops/tree-diff.js +50 -2
  140. package/dist/ops/tree-diff.js.map +1 -1
  141. package/dist/pack/delta.d.ts +211 -39
  142. package/dist/pack/delta.d.ts.map +1 -1
  143. package/dist/pack/delta.js +232 -46
  144. package/dist/pack/delta.js.map +1 -1
  145. package/dist/pack/format.d.ts +390 -28
  146. package/dist/pack/format.d.ts.map +1 -1
  147. package/dist/pack/format.js +344 -33
  148. package/dist/pack/format.js.map +1 -1
  149. package/dist/pack/full-generation.d.ts +313 -28
  150. package/dist/pack/full-generation.d.ts.map +1 -1
  151. package/dist/pack/full-generation.js +238 -19
  152. package/dist/pack/full-generation.js.map +1 -1
  153. package/dist/pack/generation.d.ts +346 -23
  154. package/dist/pack/generation.d.ts.map +1 -1
  155. package/dist/pack/generation.js +269 -21
  156. package/dist/pack/generation.js.map +1 -1
  157. package/dist/pack/index.d.ts +407 -86
  158. package/dist/pack/index.d.ts.map +1 -1
  159. package/dist/pack/index.js +351 -70
  160. package/dist/pack/index.js.map +1 -1
  161. package/dist/refs/branch.d.ts +517 -71
  162. package/dist/refs/branch.d.ts.map +1 -1
  163. package/dist/refs/branch.js +410 -26
  164. package/dist/refs/branch.js.map +1 -1
  165. package/dist/refs/storage.d.ts +610 -57
  166. package/dist/refs/storage.d.ts.map +1 -1
  167. package/dist/refs/storage.js +481 -29
  168. package/dist/refs/storage.js.map +1 -1
  169. package/dist/refs/tag.d.ts +677 -67
  170. package/dist/refs/tag.d.ts.map +1 -1
  171. package/dist/refs/tag.js +497 -30
  172. package/dist/refs/tag.js.map +1 -1
  173. package/dist/storage/lru-cache.d.ts +556 -53
  174. package/dist/storage/lru-cache.d.ts.map +1 -1
  175. package/dist/storage/lru-cache.js +439 -36
  176. package/dist/storage/lru-cache.js.map +1 -1
  177. package/dist/storage/object-index.d.ts +483 -38
  178. package/dist/storage/object-index.d.ts.map +1 -1
  179. package/dist/storage/object-index.js +388 -22
  180. package/dist/storage/object-index.js.map +1 -1
  181. package/dist/storage/r2-pack.d.ts +957 -94
  182. package/dist/storage/r2-pack.d.ts.map +1 -1
  183. package/dist/storage/r2-pack.js +756 -48
  184. package/dist/storage/r2-pack.js.map +1 -1
  185. package/dist/tiered/cdc-pipeline.d.ts +1610 -38
  186. package/dist/tiered/cdc-pipeline.d.ts.map +1 -1
  187. package/dist/tiered/cdc-pipeline.js +1131 -22
  188. package/dist/tiered/cdc-pipeline.js.map +1 -1
  189. package/dist/tiered/migration.d.ts +903 -41
  190. package/dist/tiered/migration.d.ts.map +1 -1
  191. package/dist/tiered/migration.js +646 -24
  192. package/dist/tiered/migration.js.map +1 -1
  193. package/dist/tiered/parquet-writer.d.ts +944 -47
  194. package/dist/tiered/parquet-writer.d.ts.map +1 -1
  195. package/dist/tiered/parquet-writer.js +667 -39
  196. package/dist/tiered/parquet-writer.js.map +1 -1
  197. package/dist/tiered/read-path.d.ts +728 -34
  198. package/dist/tiered/read-path.d.ts.map +1 -1
  199. package/dist/tiered/read-path.js +310 -27
  200. package/dist/tiered/read-path.js.map +1 -1
  201. package/dist/types/objects.d.ts +457 -0
  202. package/dist/types/objects.d.ts.map +1 -1
  203. package/dist/types/objects.js +305 -4
  204. package/dist/types/objects.js.map +1 -1
  205. package/dist/types/storage.d.ts +407 -35
  206. package/dist/types/storage.d.ts.map +1 -1
  207. package/dist/types/storage.js +27 -3
  208. package/dist/types/storage.js.map +1 -1
  209. package/dist/utils/hash.d.ts +133 -12
  210. package/dist/utils/hash.d.ts.map +1 -1
  211. package/dist/utils/hash.js +133 -12
  212. package/dist/utils/hash.js.map +1 -1
  213. package/dist/utils/sha1.d.ts +102 -9
  214. package/dist/utils/sha1.d.ts.map +1 -1
  215. package/dist/utils/sha1.js +114 -11
  216. package/dist/utils/sha1.js.map +1 -1
  217. package/dist/wire/capabilities.d.ts +896 -88
  218. package/dist/wire/capabilities.d.ts.map +1 -1
  219. package/dist/wire/capabilities.js +566 -62
  220. package/dist/wire/capabilities.js.map +1 -1
  221. package/dist/wire/pkt-line.d.ts +293 -15
  222. package/dist/wire/pkt-line.d.ts.map +1 -1
  223. package/dist/wire/pkt-line.js +251 -15
  224. package/dist/wire/pkt-line.js.map +1 -1
  225. package/dist/wire/receive-pack.d.ts +814 -64
  226. package/dist/wire/receive-pack.d.ts.map +1 -1
  227. package/dist/wire/receive-pack.js +542 -41
  228. package/dist/wire/receive-pack.js.map +1 -1
  229. package/dist/wire/smart-http.d.ts +575 -97
  230. package/dist/wire/smart-http.d.ts.map +1 -1
  231. package/dist/wire/smart-http.js +337 -46
  232. package/dist/wire/smart-http.js.map +1 -1
  233. package/dist/wire/upload-pack.d.ts +492 -98
  234. package/dist/wire/upload-pack.d.ts.map +1 -1
  235. package/dist/wire/upload-pack.js +347 -59
  236. package/dist/wire/upload-pack.js.map +1 -1
  237. package/package.json +10 -2
@@ -1,23 +1,131 @@
1
1
  /**
2
- * CDC (Change Data Capture) Pipeline for Git Operations
2
+ * @fileoverview CDC (Change Data Capture) Pipeline for Git Operations
3
3
  *
4
- * Provides functionality to capture, transform, batch, and output git operation events:
5
- * - Event capture from git operations (push, fetch, commits, etc.)
6
- * - Parquet transformation for analytics storage
7
- * - Batching with size and time-based flushing
8
- * - Error handling with retry policies
4
+ * @description
5
+ * This module provides a comprehensive Change Data Capture system for Git operations,
6
+ * enabling real-time event streaming, transformation, and analytics for Git repository events.
9
7
  *
10
- * gitdo: CDC pipeline implementation
8
+ * ## Key Features
9
+ *
10
+ * - **Event Capture**: Captures git operations (push, fetch, commits, branches, tags, merges)
11
+ * - **Parquet Transformation**: Converts events to columnar Parquet format for analytics
12
+ * - **Batching**: Efficient event batching with configurable size and time-based flushing
13
+ * - **Retry Policies**: Configurable exponential backoff with jitter for resilient processing
14
+ * - **Dead Letter Queue**: Handles failed events for later reprocessing
15
+ * - **Metrics**: Built-in tracking for events processed, batches, errors, and latency
16
+ *
17
+ * ## Architecture
18
+ *
19
+ * The pipeline consists of several components:
20
+ * 1. **CDCEventCapture**: Captures git operations and converts them to CDCEvents
21
+ * 2. **CDCBatcher**: Batches events for efficient processing
22
+ * 3. **ParquetTransformer**: Transforms events to Parquet format
23
+ * 4. **CDCPipeline**: Orchestrates the entire flow with error handling
24
+ *
25
+ * ## Event Flow
26
+ *
27
+ * ```
28
+ * Git Operation -> CDCEventCapture -> CDCBatcher -> ParquetTransformer -> Output
29
+ * |
30
+ * v
31
+ * (On failure) Dead Letter Queue
32
+ * ```
33
+ *
34
+ * @module tiered/cdc-pipeline
35
+ *
36
+ * @example
37
+ * ```typescript
38
+ * // Create and start a pipeline
39
+ * const pipeline = new CDCPipeline({
40
+ * batchSize: 100,
41
+ * flushIntervalMs: 5000,
42
+ * maxRetries: 3,
43
+ * parquetCompression: 'snappy',
44
+ * outputPath: '/analytics',
45
+ * schemaVersion: 1
46
+ * })
47
+ *
48
+ * await pipeline.start()
49
+ *
50
+ * // Process events
51
+ * pipeline.onOutput((output) => {
52
+ * console.log(`Generated batch: ${output.batchId}`)
53
+ * console.log(`Events: ${output.events.length}`)
54
+ * console.log(`Parquet size: ${output.parquetBuffer.length} bytes`)
55
+ * })
56
+ *
57
+ * pipeline.onDeadLetter((events, error) => {
58
+ * console.error(`Failed events: ${events.length}`, error)
59
+ * })
60
+ *
61
+ * // Create and process an event
62
+ * const event = createCDCEvent('COMMIT_CREATED', 'push', {
63
+ * operation: 'commit-create',
64
+ * sha: 'abc123...',
65
+ * treeSha: 'def456...',
66
+ * parentShas: ['parent1...']
67
+ * })
68
+ *
69
+ * await pipeline.process(event)
70
+ *
71
+ * // Get metrics
72
+ * const metrics = pipeline.getMetrics()
73
+ * console.log(`Processed: ${metrics.eventsProcessed}`)
74
+ * console.log(`Batches: ${metrics.batchesGenerated}`)
75
+ *
76
+ * // Stop the pipeline
77
+ * await pipeline.stop()
78
+ * ```
79
+ *
80
+ * @see {@link CDCPipeline} - Main pipeline orchestration class
81
+ * @see {@link CDCEventCapture} - Event capture from git operations
82
+ * @see {@link ParquetTransformer} - Parquet format transformation
11
83
  */
12
84
  // ============================================================================
13
85
  // Error Classes
14
86
  // ============================================================================
15
87
  /**
16
- * Custom error class for CDC operations
88
+ * Custom error class for CDC operations.
89
+ *
90
+ * @description
91
+ * CDCError provides structured error information for CDC pipeline failures,
92
+ * including an error type for programmatic handling and optional cause for
93
+ * error chaining.
94
+ *
95
+ * @example
96
+ * ```typescript
97
+ * try {
98
+ * await pipeline.process(event)
99
+ * } catch (error) {
100
+ * if (error instanceof CDCError) {
101
+ * switch (error.type) {
102
+ * case 'VALIDATION_ERROR':
103
+ * console.log('Invalid event:', error.message)
104
+ * break
105
+ * case 'PROCESSING_ERROR':
106
+ * console.log('Processing failed:', error.message)
107
+ * if (error.cause) {
108
+ * console.log('Caused by:', error.cause.message)
109
+ * }
110
+ * break
111
+ * }
112
+ * }
113
+ * }
114
+ * ```
115
+ *
116
+ * @class CDCError
117
+ * @extends Error
17
118
  */
18
119
  export class CDCError extends Error {
19
120
  type;
20
121
  cause;
122
+ /**
123
+ * Creates a new CDCError.
124
+ *
125
+ * @param type - Error type for categorization
126
+ * @param message - Human-readable error message
127
+ * @param cause - Optional underlying error that caused this error
128
+ */
21
129
  constructor(type, message, cause) {
22
130
  super(message);
23
131
  this.type = type;
@@ -26,16 +134,98 @@ export class CDCError extends Error {
26
134
  }
27
135
  }
28
136
  /**
29
- * Retry policy with exponential backoff
137
+ * Retry policy implementing exponential backoff with optional jitter.
138
+ *
139
+ * @description
140
+ * Provides a robust retry mechanism for handling transient failures.
141
+ * Uses exponential backoff to space out retry attempts, with optional
142
+ * jitter to prevent synchronized retries from multiple clients.
143
+ *
144
+ * **Backoff Formula:**
145
+ * `delay = min(initialDelay * (multiplier ^ attempt), maxDelay)`
146
+ *
147
+ * **With Jitter:**
148
+ * `delay = delay * random(0.5, 1.5)`
149
+ *
150
+ * @example
151
+ * ```typescript
152
+ * const policy = new CDCRetryPolicy({
153
+ * maxRetries: 3,
154
+ * initialDelayMs: 100,
155
+ * maxDelayMs: 5000,
156
+ * backoffMultiplier: 2,
157
+ * jitter: true
158
+ * })
159
+ *
160
+ * let attempts = 0
161
+ * while (attempts < 10) {
162
+ * try {
163
+ * await doOperation()
164
+ * break
165
+ * } catch (error) {
166
+ * attempts++
167
+ * if (!policy.shouldRetry(attempts)) {
168
+ * throw new Error('Max retries exceeded')
169
+ * }
170
+ * const delay = policy.getDelay(attempts)
171
+ * console.log(`Retry ${attempts} after ${delay}ms`)
172
+ * await sleep(delay)
173
+ * }
174
+ * }
175
+ * ```
176
+ *
177
+ * @class CDCRetryPolicy
30
178
  */
31
179
  export class CDCRetryPolicy {
180
+ /**
181
+ * Retry configuration.
182
+ * @private
183
+ */
32
184
  config;
185
+ /**
186
+ * Creates a new retry policy.
187
+ *
188
+ * @param config - Retry policy configuration
189
+ */
33
190
  constructor(config) {
34
191
  this.config = config;
35
192
  }
193
+ /**
194
+ * Determines whether another retry should be attempted.
195
+ *
196
+ * @param attemptCount - Number of attempts already made
197
+ * @returns true if more retries are allowed, false otherwise
198
+ *
199
+ * @example
200
+ * ```typescript
201
+ * if (policy.shouldRetry(3)) {
202
+ * // Retry is allowed
203
+ * }
204
+ * ```
205
+ */
36
206
  shouldRetry(attemptCount) {
37
207
  return attemptCount < this.config.maxRetries;
38
208
  }
209
+ /**
210
+ * Calculates the delay before the next retry.
211
+ *
212
+ * @description
213
+ * Computes delay using exponential backoff, capped at maxDelayMs.
214
+ * If jitter is enabled, applies a random factor between 0.5x and 1.5x.
215
+ *
216
+ * @param attemptCount - Number of attempts already made (1-indexed)
217
+ * @returns Delay in milliseconds before next retry
218
+ *
219
+ * @example
220
+ * ```typescript
221
+ * // With initialDelay=100, multiplier=2:
222
+ * // Attempt 1: 100ms * 2^0 = 100ms
223
+ * // Attempt 2: 100ms * 2^1 = 200ms
224
+ * // Attempt 3: 100ms * 2^2 = 400ms
225
+ * const delay = policy.getDelay(attemptCount)
226
+ * await sleep(delay)
227
+ * ```
228
+ */
39
229
  getDelay(attemptCount) {
40
230
  let delay = this.config.initialDelayMs * Math.pow(this.config.backoffMultiplier, attemptCount);
41
231
  delay = Math.min(delay, this.config.maxDelayMs);
@@ -51,19 +241,89 @@ export class CDCRetryPolicy {
51
241
  // CDC Event Capture
52
242
  // ============================================================================
53
243
  /**
54
- * Captures git operations and converts them to CDC events
244
+ * Captures git operations and converts them to CDC events.
245
+ *
246
+ * @description
247
+ * CDCEventCapture hooks into git operations and generates CDCEvents for each
248
+ * operation. It maintains an internal buffer of events that can be flushed
249
+ * manually or automatically when the buffer reaches a configured size.
250
+ *
251
+ * **Supported Operations:**
252
+ * - Object creation/deletion (blobs, trees, commits, tags)
253
+ * - Reference updates (branches, tags)
254
+ * - Commit creation
255
+ * - Pack reception
256
+ * - Branch creation/deletion
257
+ * - Tag creation
258
+ * - Merge completion
259
+ *
260
+ * **Event Ordering:**
261
+ * Events are assigned monotonically increasing sequence numbers within a
262
+ * capture session. This ensures proper ordering for replay and analytics.
263
+ *
264
+ * @example
265
+ * ```typescript
266
+ * const capture = new CDCEventCapture({ maxBufferSize: 100 })
267
+ *
268
+ * // Add a listener for real-time processing
269
+ * capture.addListener((event) => {
270
+ * console.log(`Event: ${event.type} - ${event.id}`)
271
+ * })
272
+ *
273
+ * // Capture git operations
274
+ * await capture.onCommitCreated('abc123...', 'tree456...', ['parent789...'])
275
+ * await capture.onRefUpdate('refs/heads/main', 'old...', 'new...')
276
+ *
277
+ * // Get buffered events
278
+ * console.log(`Buffer size: ${capture.getBufferSize()}`)
279
+ *
280
+ * // Flush buffer
281
+ * const events = await capture.flush()
282
+ * console.log(`Flushed ${events.length} events`)
283
+ * ```
284
+ *
285
+ * @class CDCEventCapture
55
286
  */
56
287
  export class CDCEventCapture {
288
+ /**
289
+ * Buffer of captured events.
290
+ * @private
291
+ */
57
292
  events = [];
293
+ /**
294
+ * Monotonically increasing sequence counter.
295
+ * @private
296
+ */
58
297
  sequenceCounter = 0;
298
+ /**
299
+ * Registered event listeners.
300
+ * @private
301
+ */
59
302
  listeners = [];
303
+ /**
304
+ * Maximum buffer size before auto-flush.
305
+ * @private
306
+ */
60
307
  maxBufferSize;
308
+ /**
309
+ * Creates a new CDC event capture instance.
310
+ *
311
+ * @param options - Configuration options
312
+ */
61
313
  constructor(options = {}) {
62
314
  this.maxBufferSize = options.maxBufferSize ?? Infinity;
63
315
  }
316
+ /**
317
+ * Generates a unique event ID.
318
+ * @private
319
+ */
64
320
  generateEventId() {
65
321
  return `evt-${Date.now()}-${Math.random().toString(36).slice(2)}`;
66
322
  }
323
+ /**
324
+ * Emits an event to the buffer and notifies listeners.
325
+ * @private
326
+ */
67
327
  async emitEvent(event) {
68
328
  // Auto-flush if buffer is full
69
329
  if (this.events.length >= this.maxBufferSize) {
@@ -75,9 +335,28 @@ export class CDCEventCapture {
75
335
  listener(event);
76
336
  }
77
337
  }
338
+ /**
339
+ * Returns the next sequence number.
340
+ * @private
341
+ */
78
342
  nextSequence() {
79
343
  return ++this.sequenceCounter;
80
344
  }
345
+ /**
346
+ * Captures an object put (creation) operation.
347
+ *
348
+ * @description
349
+ * Called when a git object (blob, tree, commit, tag) is written to storage.
350
+ *
351
+ * @param sha - SHA-1 hash of the object
352
+ * @param type - Object type (blob, tree, commit, tag)
353
+ * @param data - Raw object data
354
+ *
355
+ * @example
356
+ * ```typescript
357
+ * await capture.onObjectPut('abc123...', 'blob', blobData)
358
+ * ```
359
+ */
81
360
  async onObjectPut(sha, type, data) {
82
361
  const event = {
83
362
  id: this.generateEventId(),
@@ -95,6 +374,19 @@ export class CDCEventCapture {
95
374
  };
96
375
  await this.emitEvent(event);
97
376
  }
377
+ /**
378
+ * Captures an object deletion operation.
379
+ *
380
+ * @description
381
+ * Called when a git object is deleted, typically during garbage collection.
382
+ *
383
+ * @param sha - SHA-1 hash of the deleted object
384
+ *
385
+ * @example
386
+ * ```typescript
387
+ * await capture.onObjectDelete('abc123...')
388
+ * ```
389
+ */
98
390
  async onObjectDelete(sha) {
99
391
  const event = {
100
392
  id: this.generateEventId(),
@@ -110,6 +402,25 @@ export class CDCEventCapture {
110
402
  };
111
403
  await this.emitEvent(event);
112
404
  }
405
+ /**
406
+ * Captures a reference update operation.
407
+ *
408
+ * @description
409
+ * Called when a git reference (branch, tag) is updated to point to a new commit.
410
+ *
411
+ * @param refName - Full reference name (e.g., 'refs/heads/main')
412
+ * @param oldSha - Previous SHA (all zeros for new refs)
413
+ * @param newSha - New SHA (all zeros for deleted refs)
414
+ *
415
+ * @example
416
+ * ```typescript
417
+ * await capture.onRefUpdate(
418
+ * 'refs/heads/main',
419
+ * 'oldcommit123...',
420
+ * 'newcommit456...'
421
+ * )
422
+ * ```
423
+ */
113
424
  async onRefUpdate(refName, oldSha, newSha) {
114
425
  const event = {
115
426
  id: this.generateEventId(),
@@ -127,6 +438,25 @@ export class CDCEventCapture {
127
438
  };
128
439
  await this.emitEvent(event);
129
440
  }
441
+ /**
442
+ * Captures a commit creation operation.
443
+ *
444
+ * @description
445
+ * Called when a new commit object is created.
446
+ *
447
+ * @param commitSha - SHA-1 hash of the commit
448
+ * @param treeSha - SHA-1 hash of the tree the commit points to
449
+ * @param parentShas - Array of parent commit SHAs
450
+ *
451
+ * @example
452
+ * ```typescript
453
+ * await capture.onCommitCreated(
454
+ * 'commitabc123...',
455
+ * 'treedef456...',
456
+ * ['parent1...', 'parent2...']
457
+ * )
458
+ * ```
459
+ */
130
460
  async onCommitCreated(commitSha, treeSha, parentShas) {
131
461
  const event = {
132
462
  id: this.generateEventId(),
@@ -144,6 +474,20 @@ export class CDCEventCapture {
144
474
  };
145
475
  await this.emitEvent(event);
146
476
  }
477
+ /**
478
+ * Captures a pack reception operation.
479
+ *
480
+ * @description
481
+ * Called when a packfile is received during a push or fetch operation.
482
+ *
483
+ * @param packData - Raw packfile data
484
+ * @param objectCount - Number of objects in the pack
485
+ *
486
+ * @example
487
+ * ```typescript
488
+ * await capture.onPackReceived(packBuffer, 42)
489
+ * ```
490
+ */
147
491
  async onPackReceived(packData, objectCount) {
148
492
  const event = {
149
493
  id: this.generateEventId(),
@@ -160,6 +504,17 @@ export class CDCEventCapture {
160
504
  };
161
505
  await this.emitEvent(event);
162
506
  }
507
+ /**
508
+ * Captures a branch creation operation.
509
+ *
510
+ * @param branchName - Name of the branch (without refs/heads/ prefix)
511
+ * @param sha - SHA-1 hash the branch points to
512
+ *
513
+ * @example
514
+ * ```typescript
515
+ * await capture.onBranchCreated('feature-x', 'abc123...')
516
+ * ```
517
+ */
163
518
  async onBranchCreated(branchName, sha) {
164
519
  const event = {
165
520
  id: this.generateEventId(),
@@ -176,6 +531,16 @@ export class CDCEventCapture {
176
531
  };
177
532
  await this.emitEvent(event);
178
533
  }
534
+ /**
535
+ * Captures a branch deletion operation.
536
+ *
537
+ * @param branchName - Name of the deleted branch
538
+ *
539
+ * @example
540
+ * ```typescript
541
+ * await capture.onBranchDeleted('feature-x')
542
+ * ```
543
+ */
179
544
  async onBranchDeleted(branchName) {
180
545
  const event = {
181
546
  id: this.generateEventId(),
@@ -191,6 +556,17 @@ export class CDCEventCapture {
191
556
  };
192
557
  await this.emitEvent(event);
193
558
  }
559
+ /**
560
+ * Captures a tag creation operation.
561
+ *
562
+ * @param tagName - Name of the tag
563
+ * @param sha - SHA-1 hash the tag points to
564
+ *
565
+ * @example
566
+ * ```typescript
567
+ * await capture.onTagCreated('v1.0.0', 'abc123...')
568
+ * ```
569
+ */
194
570
  async onTagCreated(tagName, sha) {
195
571
  const event = {
196
572
  id: this.generateEventId(),
@@ -207,6 +583,18 @@ export class CDCEventCapture {
207
583
  };
208
584
  await this.emitEvent(event);
209
585
  }
586
+ /**
587
+ * Captures a merge completion operation.
588
+ *
589
+ * @param mergeSha - SHA-1 hash of the merge commit
590
+ * @param baseSha - SHA-1 hash of the base commit
591
+ * @param headSha - SHA-1 hash of the head commit being merged
592
+ *
593
+ * @example
594
+ * ```typescript
595
+ * await capture.onMergeCompleted('merge123...', 'base456...', 'head789...')
596
+ * ```
597
+ */
210
598
  async onMergeCompleted(mergeSha, baseSha, headSha) {
211
599
  const event = {
212
600
  id: this.generateEventId(),
@@ -224,20 +612,66 @@ export class CDCEventCapture {
224
612
  };
225
613
  await this.emitEvent(event);
226
614
  }
615
+ /**
616
+ * Returns a copy of all buffered events.
617
+ *
618
+ * @returns Array of buffered events
619
+ */
227
620
  getEvents() {
228
621
  return [...this.events];
229
622
  }
623
+ /**
624
+ * Returns the current buffer size.
625
+ *
626
+ * @returns Number of events in the buffer
627
+ */
230
628
  getBufferSize() {
231
629
  return this.events.length;
232
630
  }
631
+ /**
632
+ * Flushes all buffered events.
633
+ *
634
+ * @description
635
+ * Returns and clears all events from the buffer. The returned events
636
+ * can be processed, serialized, or forwarded to downstream systems.
637
+ *
638
+ * @returns Array of flushed events
639
+ *
640
+ * @example
641
+ * ```typescript
642
+ * const events = await capture.flush()
643
+ * console.log(`Flushed ${events.length} events`)
644
+ * await sendToAnalytics(events)
645
+ * ```
646
+ */
233
647
  async flush() {
234
648
  const flushed = [...this.events];
235
649
  this.events = [];
236
650
  return flushed;
237
651
  }
652
+ /**
653
+ * Adds an event listener.
654
+ *
655
+ * @description
656
+ * Listeners are called synchronously for each event as it is captured.
657
+ *
658
+ * @param listener - Callback function to invoke for each event
659
+ *
660
+ * @example
661
+ * ```typescript
662
+ * capture.addListener((event) => {
663
+ * console.log(`New event: ${event.type}`)
664
+ * })
665
+ * ```
666
+ */
238
667
  addListener(listener) {
239
668
  this.listeners.push(listener);
240
669
  }
670
+ /**
671
+ * Removes an event listener.
672
+ *
673
+ * @param listener - The listener to remove
674
+ */
241
675
  removeListener(listener) {
242
676
  const index = this.listeners.indexOf(listener);
243
677
  if (index !== -1) {
@@ -248,6 +682,10 @@ export class CDCEventCapture {
248
682
  // ============================================================================
249
683
  // Parquet Schema
250
684
  // ============================================================================
685
+ /**
686
+ * Default field definitions for CDC event Parquet schema.
687
+ * @internal
688
+ */
251
689
  const CDC_EVENT_FIELDS = [
252
690
  { name: 'event_id', type: 'STRING', nullable: false },
253
691
  { name: 'event_type', type: 'STRING', nullable: false },
@@ -259,13 +697,54 @@ const CDC_EVENT_FIELDS = [
259
697
  { name: 'sha', type: 'STRING', nullable: true }
260
698
  ];
261
699
  /**
262
- * Parquet schema definition for CDC events
700
+ * Parquet schema definition for CDC events.
701
+ *
702
+ * @description
703
+ * Defines the column structure for CDC event Parquet files. The default
704
+ * schema includes standard CDC event fields and can be extended with
705
+ * custom fields for domain-specific data.
706
+ *
707
+ * @example
708
+ * ```typescript
709
+ * // Create default schema
710
+ * const schema = ParquetSchema.forCDCEvents()
711
+ *
712
+ * // Create schema with custom fields
713
+ * const customSchema = ParquetSchema.forCDCEvents([
714
+ * { name: 'repository_id', type: 'STRING', nullable: false },
715
+ * { name: 'user_id', type: 'STRING', nullable: true }
716
+ * ])
717
+ * ```
718
+ *
719
+ * @class ParquetSchema
263
720
  */
264
721
  export class ParquetSchema {
265
722
  fields;
723
+ /**
724
+ * Creates a new ParquetSchema.
725
+ *
726
+ * @param fields - Array of field definitions
727
+ */
266
728
  constructor(fields) {
267
729
  this.fields = fields;
268
730
  }
731
+ /**
732
+ * Creates a schema for CDC events with optional custom fields.
733
+ *
734
+ * @description
735
+ * Returns a schema with the standard CDC event fields. Additional
736
+ * custom fields can be appended for domain-specific data.
737
+ *
738
+ * @param customFields - Optional additional fields to add
739
+ * @returns A new ParquetSchema instance
740
+ *
741
+ * @example
742
+ * ```typescript
743
+ * const schema = ParquetSchema.forCDCEvents()
744
+ * // Schema includes: event_id, event_type, source, timestamp,
745
+ * // sequence, version, payload_json, sha
746
+ * ```
747
+ */
269
748
  static forCDCEvents(customFields) {
270
749
  const fields = [...CDC_EVENT_FIELDS];
271
750
  if (customFields) {
@@ -275,13 +754,64 @@ export class ParquetSchema {
275
754
  }
276
755
  }
277
756
  /**
278
- * Transforms CDC events to Parquet format
757
+ * Transforms CDC events to Parquet format.
758
+ *
759
+ * @description
760
+ * ParquetTransformer converts CDC events to Parquet-compatible rows and
761
+ * serializes batches of events to Parquet file format. It handles:
762
+ *
763
+ * - Event to row conversion (flattening the event structure)
764
+ * - JSON serialization of complex payloads
765
+ * - Batch creation with schema and metadata
766
+ * - Parquet file generation with compression
767
+ *
768
+ * @example
769
+ * ```typescript
770
+ * const transformer = new ParquetTransformer({ compression: 'snappy' })
771
+ *
772
+ * // Transform single event to row
773
+ * const row = transformer.eventToRow(event)
774
+ *
775
+ * // Transform batch of events
776
+ * const batch = transformer.eventsToBatch(events)
777
+ *
778
+ * // Generate Parquet file
779
+ * const buffer = await transformer.toParquetBuffer(batch)
780
+ * await r2.put('events.parquet', buffer)
781
+ * ```
782
+ *
783
+ * @class ParquetTransformer
279
784
  */
280
785
  export class ParquetTransformer {
786
+ /**
787
+ * Compression algorithm to use.
788
+ * @private
789
+ */
281
790
  compression;
791
+ /**
792
+ * Creates a new ParquetTransformer.
793
+ *
794
+ * @param options - Transformer configuration
795
+ */
282
796
  constructor(options = {}) {
283
797
  this.compression = options.compression ?? 'snappy';
284
798
  }
799
+ /**
800
+ * Converts a CDC event to a Parquet row.
801
+ *
802
+ * @description
803
+ * Flattens the event structure and serializes the payload to JSON
804
+ * for storage in Parquet format.
805
+ *
806
+ * @param event - The CDC event to convert
807
+ * @returns A Parquet row representation
808
+ *
809
+ * @example
810
+ * ```typescript
811
+ * const row = transformer.eventToRow(event)
812
+ * console.log(row.event_id, row.event_type, row.sha)
813
+ * ```
814
+ */
285
815
  eventToRow(event) {
286
816
  // Create a serializable copy of the payload (Uint8Array not JSON-serializable)
287
817
  const serializablePayload = {
@@ -299,6 +829,22 @@ export class ParquetTransformer {
299
829
  sha: event.payload.sha ?? null
300
830
  };
301
831
  }
832
+ /**
833
+ * Converts multiple CDC events to a Parquet batch.
834
+ *
835
+ * @description
836
+ * Transforms an array of events into a ParquetBatch structure
837
+ * ready for serialization to Parquet format.
838
+ *
839
+ * @param events - Array of CDC events to batch
840
+ * @returns A ParquetBatch ready for serialization
841
+ *
842
+ * @example
843
+ * ```typescript
844
+ * const batch = transformer.eventsToBatch(events)
845
+ * console.log(`Batch has ${batch.rowCount} rows`)
846
+ * ```
847
+ */
302
848
  eventsToBatch(events) {
303
849
  const rows = events.map(e => this.eventToRow(e));
304
850
  return {
@@ -309,6 +855,22 @@ export class ParquetTransformer {
309
855
  compression: this.compression
310
856
  };
311
857
  }
858
+ /**
859
+ * Serializes a ParquetBatch to a Parquet file buffer.
860
+ *
861
+ * @description
862
+ * Generates a Parquet-format file from the batch data. The output
863
+ * includes PAR1 magic bytes, compressed data, and footer metadata.
864
+ *
865
+ * @param batch - The ParquetBatch to serialize
866
+ * @returns Promise resolving to Parquet file as Uint8Array
867
+ *
868
+ * @example
869
+ * ```typescript
870
+ * const buffer = await transformer.toParquetBuffer(batch)
871
+ * await r2.put('events.parquet', buffer)
872
+ * ```
873
+ */
312
874
  async toParquetBuffer(batch) {
313
875
  // Build a simplified Parquet-like buffer
314
876
  // Real implementation would use a proper Parquet library
@@ -383,14 +945,84 @@ export class ParquetTransformer {
383
945
  }
384
946
  }
385
947
  /**
386
- * Batches CDC events for efficient processing
948
+ * Batches CDC events for efficient processing.
949
+ *
950
+ * @description
951
+ * CDCBatcher collects CDC events and groups them into batches based on
952
+ * count or time thresholds. This enables efficient downstream processing
953
+ * by reducing the number of I/O operations and enabling bulk operations.
954
+ *
955
+ * **Batching Strategies:**
956
+ * - **Count-based**: Flush when batch reaches `batchSize` events
957
+ * - **Time-based**: Flush after `flushIntervalMs` even if batch is not full
958
+ *
959
+ * **Features:**
960
+ * - Async batch handlers for non-blocking processing
961
+ * - Multiple handlers for parallel processing pipelines
962
+ * - Graceful stop with pending event flush
963
+ * - Batch metadata (sequences, timestamps) for tracking
964
+ *
965
+ * @example
966
+ * ```typescript
967
+ * const batcher = new CDCBatcher({
968
+ * batchSize: 100,
969
+ * flushIntervalMs: 5000
970
+ * })
971
+ *
972
+ * // Register batch handler
973
+ * batcher.onBatch(async (batch) => {
974
+ * console.log(`Processing ${batch.eventCount} events`)
975
+ * console.log(`Sequence range: ${batch.minSequence} - ${batch.maxSequence}`)
976
+ * await saveToStorage(batch.events)
977
+ * })
978
+ *
979
+ * // Add events
980
+ * await batcher.add(event1)
981
+ * await batcher.add(event2)
982
+ *
983
+ * // Check pending events
984
+ * console.log(`Pending: ${batcher.getPendingCount()}`)
985
+ *
986
+ * // Manual flush
987
+ * const result = await batcher.flush()
988
+ *
989
+ * // Stop the batcher
990
+ * await batcher.stop()
991
+ * ```
992
+ *
993
+ * @class CDCBatcher
387
994
  */
388
995
  export class CDCBatcher {
996
+ /**
997
+ * Batch configuration.
998
+ * @private
999
+ */
389
1000
  config;
1001
+ /**
1002
+ * Buffer of pending events.
1003
+ * @private
1004
+ */
390
1005
  events = [];
1006
+ /**
1007
+ * Registered batch handlers.
1008
+ * @private
1009
+ */
391
1010
  batchHandlers = [];
1011
+ /**
1012
+ * Timer for time-based flushing.
1013
+ * @private
1014
+ */
392
1015
  flushTimer = null;
1016
+ /**
1017
+ * Whether the batcher has been stopped.
1018
+ * @private
1019
+ */
393
1020
  stopped = false;
1021
+ /**
1022
+ * Creates a new CDCBatcher.
1023
+ *
1024
+ * @param config - Batch configuration
1025
+ */
394
1026
  constructor(config) {
395
1027
  this.config = config;
396
1028
  // Don't start timer in constructor - start when first event is added
@@ -447,6 +1079,21 @@ export class CDCBatcher {
447
1079
  this.flushTimer = null;
448
1080
  }
449
1081
  }
1082
+ /**
1083
+ * Adds an event to the batch.
1084
+ *
1085
+ * @description
1086
+ * Adds the event to the pending batch. If the batch reaches the
1087
+ * configured size, it is automatically flushed. The flush timer
1088
+ * is started/restarted as needed.
1089
+ *
1090
+ * @param event - The CDC event to add
1091
+ *
1092
+ * @example
1093
+ * ```typescript
1094
+ * await batcher.add(event)
1095
+ * ```
1096
+ */
450
1097
  async add(event) {
451
1098
  this.events.push(event);
452
1099
  // Ensure flush timer is running when we have pending events
@@ -457,6 +1104,10 @@ export class CDCBatcher {
457
1104
  // Timer will be re-started on next add() if needed
458
1105
  }
459
1106
  }
1107
+ /**
1108
+ * Internal flush implementation.
1109
+ * @private
1110
+ */
460
1111
  async flushInternal() {
461
1112
  if (this.events.length === 0) {
462
1113
  return { events: [], eventCount: 0, success: true };
@@ -480,33 +1131,167 @@ export class CDCBatcher {
480
1131
  }
481
1132
  return result;
482
1133
  }
1134
+ /**
1135
+ * Manually flushes pending events.
1136
+ *
1137
+ * @description
1138
+ * Forces an immediate flush of all pending events, regardless of
1139
+ * batch size or timer. Clears the flush timer.
1140
+ *
1141
+ * @returns Promise resolving to the batch result
1142
+ *
1143
+ * @example
1144
+ * ```typescript
1145
+ * const result = await batcher.flush()
1146
+ * console.log(`Flushed ${result.eventCount} events`)
1147
+ * ```
1148
+ */
483
1149
  async flush() {
484
1150
  this.clearFlushTimer();
485
1151
  const result = await this.flushInternal();
486
1152
  // Don't restart timer - it will be started on next add() if needed
487
1153
  return result;
488
1154
  }
1155
+ /**
1156
+ * Returns the number of pending events.
1157
+ *
1158
+ * @returns Number of events waiting to be flushed
1159
+ */
489
1160
  getPendingCount() {
490
1161
  return this.events.length;
491
1162
  }
1163
+ /**
1164
+ * Registers a batch handler.
1165
+ *
1166
+ * @description
1167
+ * Handlers are called when a batch is flushed (automatically or manually).
1168
+ * Multiple handlers can be registered for parallel processing.
1169
+ *
1170
+ * @param handler - Callback function to invoke for each batch
1171
+ *
1172
+ * @example
1173
+ * ```typescript
1174
+ * batcher.onBatch(async (batch) => {
1175
+ * await saveToStorage(batch.events)
1176
+ * })
1177
+ * ```
1178
+ */
492
1179
  onBatch(handler) {
493
1180
  this.batchHandlers.push(handler);
494
1181
  }
1182
+ /**
1183
+ * Stops the batcher.
1184
+ *
1185
+ * @description
1186
+ * Stops the flush timer and prevents further processing.
1187
+ * Does NOT automatically flush pending events - call flush() first
1188
+ * if you need to process remaining events.
1189
+ *
1190
+ * @example
1191
+ * ```typescript
1192
+ * await batcher.flush() // Process remaining events
1193
+ * await batcher.stop() // Stop the timer
1194
+ * ```
1195
+ */
495
1196
  async stop() {
496
1197
  this.stopped = true;
497
1198
  this.clearFlushTimer();
498
1199
  }
499
1200
  }
500
1201
  /**
501
- * Main CDC Pipeline for processing git operation events
1202
+ * Main CDC Pipeline for processing git operation events.
1203
+ *
1204
+ * @description
1205
+ * CDCPipeline orchestrates the complete change data capture flow from
1206
+ * event ingestion to Parquet output. It integrates batching, transformation,
1207
+ * retry handling, and dead letter queue management.
1208
+ *
1209
+ * **Pipeline Flow:**
1210
+ * 1. Events are submitted via `process()` or `processMany()`
1211
+ * 2. Events are validated and added to the batcher
1212
+ * 3. When a batch is ready, it's transformed to Parquet format
1213
+ * 4. On success, output handlers are notified
1214
+ * 5. On failure, retries are attempted with exponential backoff
1215
+ * 6. After max retries, events go to dead letter queue
1216
+ *
1217
+ * **Features:**
1218
+ * - Configurable batch size and flush interval
1219
+ * - Automatic retry with exponential backoff
1220
+ * - Dead letter queue for failed events
1221
+ * - Real-time metrics for monitoring
1222
+ * - Graceful shutdown with pending event flush
1223
+ *
1224
+ * @example
1225
+ * ```typescript
1226
+ * const pipeline = new CDCPipeline({
1227
+ * batchSize: 100,
1228
+ * flushIntervalMs: 5000,
1229
+ * maxRetries: 3,
1230
+ * parquetCompression: 'snappy',
1231
+ * outputPath: '/analytics',
1232
+ * schemaVersion: 1
1233
+ * })
1234
+ *
1235
+ * // Register handlers
1236
+ * pipeline.onOutput(async (output) => {
1237
+ * await r2.put(`cdc/${output.batchId}.parquet`, output.parquetBuffer)
1238
+ * })
1239
+ *
1240
+ * pipeline.onDeadLetter((events, error) => {
1241
+ * console.error(`Failed ${events.length} events:`, error)
1242
+ * })
1243
+ *
1244
+ * // Start the pipeline
1245
+ * await pipeline.start()
1246
+ *
1247
+ * // Process events
1248
+ * await pipeline.process(event)
1249
+ *
1250
+ * // Check metrics
1251
+ * const metrics = pipeline.getMetrics()
1252
+ *
1253
+ * // Stop gracefully
1254
+ * const result = await pipeline.stop()
1255
+ * console.log(`Flushed ${result.flushedCount} events on shutdown`)
1256
+ * ```
1257
+ *
1258
+ * @class CDCPipeline
502
1259
  */
503
1260
  export class CDCPipeline {
1261
+ /**
1262
+ * Pipeline configuration.
1263
+ * @private
1264
+ */
504
1265
  config;
1266
+ /**
1267
+ * Current pipeline state.
1268
+ * @private
1269
+ */
505
1270
  state = 'stopped';
1271
+ /**
1272
+ * Event batcher instance.
1273
+ * @private
1274
+ */
506
1275
  batcher = null;
1276
+ /**
1277
+ * Parquet transformer instance.
1278
+ * @private
1279
+ */
507
1280
  transformer;
1281
+ /**
1282
+ * Registered output handlers.
1283
+ * @private
1284
+ */
508
1285
  outputHandlers = [];
1286
+ /**
1287
+ * Registered dead letter handlers.
1288
+ * @private
1289
+ */
509
1290
  deadLetterHandlers = [];
1291
+ /**
1292
+ * Pipeline metrics.
1293
+ * @private
1294
+ */
510
1295
  metrics = {
511
1296
  eventsProcessed: 0,
512
1297
  batchesGenerated: 0,
@@ -514,8 +1299,21 @@ export class CDCPipeline {
514
1299
  errors: 0,
515
1300
  avgProcessingLatencyMs: 0
516
1301
  };
1302
+ /**
1303
+ * Processing latency samples.
1304
+ * @private
1305
+ */
517
1306
  processingLatencies = [];
1307
+ /**
1308
+ * Retry policy instance.
1309
+ * @private
1310
+ */
518
1311
  retryPolicy;
1312
+ /**
1313
+ * Creates a new CDCPipeline.
1314
+ *
1315
+ * @param config - Pipeline configuration
1316
+ */
519
1317
  constructor(config) {
520
1318
  this.config = config;
521
1319
  this.transformer = new ParquetTransformer({
@@ -528,9 +1326,27 @@ export class CDCPipeline {
528
1326
  backoffMultiplier: 2
529
1327
  });
530
1328
  }
1329
+ /**
1330
+ * Returns the current pipeline state.
1331
+ *
1332
+ * @returns Current state ('stopped', 'running', or 'paused')
1333
+ */
531
1334
  getState() {
532
1335
  return this.state;
533
1336
  }
1337
+ /**
1338
+ * Starts the pipeline.
1339
+ *
1340
+ * @description
1341
+ * Initializes the batcher and begins accepting events. If already
1342
+ * running, this method is a no-op.
1343
+ *
1344
+ * @example
1345
+ * ```typescript
1346
+ * await pipeline.start()
1347
+ * console.log(pipeline.getState()) // 'running'
1348
+ * ```
1349
+ */
534
1350
  async start() {
535
1351
  if (this.state === 'running')
536
1352
  return;
@@ -543,6 +1359,21 @@ export class CDCPipeline {
543
1359
  });
544
1360
  this.state = 'running';
545
1361
  }
1362
+ /**
1363
+ * Stops the pipeline.
1364
+ *
1365
+ * @description
1366
+ * Flushes any pending events, stops the batcher, and sets state to stopped.
1367
+ * Returns information about events flushed during shutdown.
1368
+ *
1369
+ * @returns Promise resolving to stop result with flushed event count
1370
+ *
1371
+ * @example
1372
+ * ```typescript
1373
+ * const result = await pipeline.stop()
1374
+ * console.log(`Flushed ${result.flushedCount} events on shutdown`)
1375
+ * ```
1376
+ */
546
1377
  async stop() {
547
1378
  if (this.state === 'stopped') {
548
1379
  return { flushedCount: 0 };
@@ -557,6 +1388,27 @@ export class CDCPipeline {
557
1388
  this.state = 'stopped';
558
1389
  return { flushedCount };
559
1390
  }
1391
+ /**
1392
+ * Processes a single event.
1393
+ *
1394
+ * @description
1395
+ * Validates the event and adds it to the batcher for processing.
1396
+ * Updates metrics including latency tracking.
1397
+ *
1398
+ * @param event - The CDC event to process
1399
+ * @returns Promise resolving to process result
1400
+ *
1401
+ * @throws {CDCError} PROCESSING_ERROR - If pipeline is not running
1402
+ * @throws {CDCError} VALIDATION_ERROR - If event fails validation
1403
+ *
1404
+ * @example
1405
+ * ```typescript
1406
+ * const result = await pipeline.process(event)
1407
+ * if (result.success) {
1408
+ * console.log(`Processed event: ${result.eventId}`)
1409
+ * }
1410
+ * ```
1411
+ */
560
1412
  async process(event) {
561
1413
  if (this.state !== 'running') {
562
1414
  throw new CDCError('PROCESSING_ERROR', 'Pipeline is not running');
@@ -571,6 +1423,22 @@ export class CDCPipeline {
571
1423
  this.updateAvgLatency();
572
1424
  return { success: true, eventId: event.id };
573
1425
  }
1426
+ /**
1427
+ * Processes multiple events.
1428
+ *
1429
+ * @description
1430
+ * Convenience method to process an array of events sequentially.
1431
+ *
1432
+ * @param events - Array of CDC events to process
1433
+ * @returns Promise resolving to array of process results
1434
+ *
1435
+ * @example
1436
+ * ```typescript
1437
+ * const results = await pipeline.processMany(events)
1438
+ * const successCount = results.filter(r => r.success).length
1439
+ * console.log(`Processed ${successCount}/${events.length} events`)
1440
+ * ```
1441
+ */
574
1442
  async processMany(events) {
575
1443
  const results = [];
576
1444
  for (const event of events) {
@@ -579,6 +1447,19 @@ export class CDCPipeline {
579
1447
  }
580
1448
  return results;
581
1449
  }
1450
+ /**
1451
+ * Manually flushes pending events.
1452
+ *
1453
+ * @description
1454
+ * Forces an immediate flush of the batcher and processes the
1455
+ * resulting batch through the pipeline.
1456
+ *
1457
+ * @example
1458
+ * ```typescript
1459
+ * await pipeline.flush()
1460
+ * console.log('All pending events flushed')
1461
+ * ```
1462
+ */
582
1463
  async flush() {
583
1464
  if (this.batcher) {
584
1465
  const result = await this.batcher.flush();
@@ -587,6 +1468,10 @@ export class CDCPipeline {
587
1468
  }
588
1469
  }
589
1470
  }
1471
+ /**
1472
+ * Handles a batch of events with retry logic.
1473
+ * @private
1474
+ */
590
1475
  async handleBatch(batch) {
591
1476
  let attempts = 0;
592
1477
  let lastError = null;
@@ -624,9 +1509,17 @@ export class CDCPipeline {
624
1509
  }
625
1510
  }
626
1511
  }
1512
+ /**
1513
+ * Sleeps for the specified duration.
1514
+ * @private
1515
+ */
627
1516
  sleep(ms) {
628
1517
  return new Promise(resolve => setTimeout(resolve, ms));
629
1518
  }
1519
+ /**
1520
+ * Updates the average latency metric.
1521
+ * @private
1522
+ */
630
1523
  updateAvgLatency() {
631
1524
  if (this.processingLatencies.length === 0)
632
1525
  return;
@@ -637,12 +1530,66 @@ export class CDCPipeline {
637
1530
  const sum = this.processingLatencies.reduce((a, b) => a + b, 0);
638
1531
  this.metrics.avgProcessingLatencyMs = sum / this.processingLatencies.length;
639
1532
  }
1533
+ /**
1534
+ * Returns current pipeline metrics.
1535
+ *
1536
+ * @description
1537
+ * Returns a copy of the current metrics. Metrics are cumulative
1538
+ * since pipeline creation.
1539
+ *
1540
+ * @returns Copy of current pipeline metrics
1541
+ *
1542
+ * @example
1543
+ * ```typescript
1544
+ * const metrics = pipeline.getMetrics()
1545
+ * console.log(`Processed: ${metrics.eventsProcessed}`)
1546
+ * console.log(`Batches: ${metrics.batchesGenerated}`)
1547
+ * console.log(`Errors: ${metrics.errors}`)
1548
+ * console.log(`Avg latency: ${metrics.avgProcessingLatencyMs}ms`)
1549
+ * ```
1550
+ */
640
1551
  getMetrics() {
641
1552
  return { ...this.metrics };
642
1553
  }
1554
+ /**
1555
+ * Registers an output handler.
1556
+ *
1557
+ * @description
1558
+ * Output handlers are called when a batch is successfully processed
1559
+ * and converted to Parquet format. Multiple handlers can be registered.
1560
+ *
1561
+ * @param handler - Callback to invoke for each successful batch
1562
+ *
1563
+ * @example
1564
+ * ```typescript
1565
+ * pipeline.onOutput(async (output) => {
1566
+ * await r2.put(`cdc/${output.batchId}.parquet`, output.parquetBuffer)
1567
+ * console.log(`Wrote ${output.events.length} events`)
1568
+ * })
1569
+ * ```
1570
+ */
643
1571
  onOutput(handler) {
644
1572
  this.outputHandlers.push(handler);
645
1573
  }
1574
+ /**
1575
+ * Registers a dead letter handler.
1576
+ *
1577
+ * @description
1578
+ * Dead letter handlers are called when a batch fails after all
1579
+ * retry attempts are exhausted. Use this for alerting, logging,
1580
+ * or storing failed events for later reprocessing.
1581
+ *
1582
+ * @param handler - Callback to invoke for failed events
1583
+ *
1584
+ * @example
1585
+ * ```typescript
1586
+ * pipeline.onDeadLetter((events, error) => {
1587
+ * console.error(`Failed to process ${events.length} events:`, error)
1588
+ * // Store in dead letter queue for later retry
1589
+ * await dlq.put(events)
1590
+ * })
1591
+ * ```
1592
+ */
646
1593
  onDeadLetter(handler) {
647
1594
  this.deadLetterHandlers.push(handler);
648
1595
  }
@@ -650,6 +1597,10 @@ export class CDCPipeline {
650
1597
  // ============================================================================
651
1598
  // Utility Functions
652
1599
  // ============================================================================
1600
+ /**
1601
+ * Valid CDC event types for validation.
1602
+ * @internal
1603
+ */
653
1604
  const VALID_EVENT_TYPES = [
654
1605
  'OBJECT_CREATED',
655
1606
  'OBJECT_DELETED',
@@ -663,7 +1614,36 @@ const VALID_EVENT_TYPES = [
663
1614
  'MERGE_COMPLETED'
664
1615
  ];
665
1616
  /**
666
- * Create a new CDC event
1617
+ * Creates a new CDC event.
1618
+ *
1619
+ * @description
1620
+ * Factory function to create a properly structured CDC event with
1621
+ * automatically generated ID and timestamp.
1622
+ *
1623
+ * @param type - The event type
1624
+ * @param source - The event source
1625
+ * @param payload - Event payload data
1626
+ * @param options - Optional configuration
1627
+ * @param options.sequence - Custom sequence number (default: 0)
1628
+ * @returns A new CDCEvent
1629
+ *
1630
+ * @example
1631
+ * ```typescript
1632
+ * const event = createCDCEvent('COMMIT_CREATED', 'push', {
1633
+ * operation: 'commit-create',
1634
+ * sha: 'abc123...',
1635
+ * treeSha: 'def456...',
1636
+ * parentShas: ['parent1...']
1637
+ * })
1638
+ *
1639
+ * // With sequence number
1640
+ * const sequencedEvent = createCDCEvent('REF_UPDATED', 'push', {
1641
+ * operation: 'ref-update',
1642
+ * refName: 'refs/heads/main',
1643
+ * oldSha: 'old...',
1644
+ * newSha: 'new...'
1645
+ * }, { sequence: 42 })
1646
+ * ```
667
1647
  */
668
1648
  export function createCDCEvent(type, source, payload, options) {
669
1649
  return {
@@ -677,7 +1657,22 @@ export function createCDCEvent(type, source, payload, options) {
677
1657
  };
678
1658
  }
679
1659
  /**
680
- * Serialize a CDC event to bytes
1660
+ * Serializes a CDC event to bytes.
1661
+ *
1662
+ * @description
1663
+ * Converts a CDCEvent to a JSON-encoded Uint8Array for storage or
1664
+ * transmission. Handles Uint8Array payload data by converting to arrays.
1665
+ *
1666
+ * @param event - The CDC event to serialize
1667
+ * @returns The serialized event as a Uint8Array
1668
+ *
1669
+ * @example
1670
+ * ```typescript
1671
+ * const bytes = serializeEvent(event)
1672
+ * await r2.put(`events/${event.id}`, bytes)
1673
+ * ```
1674
+ *
1675
+ * @see {@link deserializeEvent} - Reverse operation
681
1676
  */
682
1677
  export function serializeEvent(event) {
683
1678
  // Create a serializable copy (Uint8Array is not JSON-serializable)
@@ -692,7 +1687,24 @@ export function serializeEvent(event) {
692
1687
  return new TextEncoder().encode(json);
693
1688
  }
694
1689
  /**
695
- * Deserialize bytes to a CDC event
1690
+ * Deserializes bytes to a CDC event.
1691
+ *
1692
+ * @description
1693
+ * Reconstructs a CDCEvent from JSON-encoded bytes. Handles Uint8Array
1694
+ * restoration for payload data that was converted to arrays during
1695
+ * serialization.
1696
+ *
1697
+ * @param bytes - The serialized event bytes
1698
+ * @returns The deserialized CDCEvent
1699
+ *
1700
+ * @example
1701
+ * ```typescript
1702
+ * const bytes = await r2.get(`events/${eventId}`)
1703
+ * const event = deserializeEvent(bytes)
1704
+ * console.log(`Event type: ${event.type}`)
1705
+ * ```
1706
+ *
1707
+ * @see {@link serializeEvent} - Reverse operation
696
1708
  */
697
1709
  export function deserializeEvent(bytes) {
698
1710
  const json = new TextDecoder().decode(bytes);
@@ -704,7 +1716,35 @@ export function deserializeEvent(bytes) {
704
1716
  return parsed;
705
1717
  }
706
1718
  /**
707
- * Validate a CDC event
1719
+ * Validates a CDC event.
1720
+ *
1721
+ * @description
1722
+ * Checks that an event has all required fields and valid values.
1723
+ * Throws a CDCError if validation fails.
1724
+ *
1725
+ * **Validation Rules:**
1726
+ * - Event must not be null/undefined
1727
+ * - Event ID must be a non-empty string
1728
+ * - Event type must be a valid CDCEventType
1729
+ * - Timestamp must be a non-negative number
1730
+ * - Sequence must be a non-negative number
1731
+ *
1732
+ * @param event - The CDC event to validate
1733
+ * @returns The validated event (for chaining)
1734
+ *
1735
+ * @throws {CDCError} VALIDATION_ERROR - If validation fails
1736
+ *
1737
+ * @example
1738
+ * ```typescript
1739
+ * try {
1740
+ * validateCDCEvent(event)
1741
+ * // Event is valid
1742
+ * } catch (error) {
1743
+ * if (error instanceof CDCError) {
1744
+ * console.log(`Invalid: ${error.message}`)
1745
+ * }
1746
+ * }
1747
+ * ```
708
1748
  */
709
1749
  export function validateCDCEvent(event) {
710
1750
  if (!event) {
@@ -727,9 +1767,37 @@ export function validateCDCEvent(event) {
727
1767
  // ============================================================================
728
1768
  // Pipeline Operations
729
1769
  // ============================================================================
1770
+ /**
1771
+ * Registry of active pipelines by ID.
1772
+ * @internal
1773
+ */
730
1774
  const activePipelines = new Map();
731
1775
  /**
732
- * Start a pipeline with the given configuration
1776
+ * Starts a new pipeline with the given configuration.
1777
+ *
1778
+ * @description
1779
+ * Creates and starts a new CDCPipeline, registering it by ID for
1780
+ * later access. If a pipeline with the same ID already exists,
1781
+ * it will be replaced (the old pipeline is not automatically stopped).
1782
+ *
1783
+ * @param id - Unique identifier for the pipeline
1784
+ * @param config - Pipeline configuration
1785
+ * @returns The started pipeline instance
1786
+ *
1787
+ * @example
1788
+ * ```typescript
1789
+ * const pipeline = startPipeline('main', {
1790
+ * batchSize: 100,
1791
+ * flushIntervalMs: 5000,
1792
+ * maxRetries: 3,
1793
+ * parquetCompression: 'snappy',
1794
+ * outputPath: '/analytics',
1795
+ * schemaVersion: 1
1796
+ * })
1797
+ *
1798
+ * // Register handlers
1799
+ * pipeline.onOutput((output) => console.log(`Batch: ${output.batchId}`))
1800
+ * ```
733
1801
  */
734
1802
  export function startPipeline(id, config) {
735
1803
  const pipeline = new CDCPipeline(config);
@@ -738,7 +1806,20 @@ export function startPipeline(id, config) {
738
1806
  return pipeline;
739
1807
  }
740
1808
  /**
741
- * Stop a pipeline by ID
1809
+ * Stops a pipeline by ID.
1810
+ *
1811
+ * @description
1812
+ * Stops the pipeline identified by the given ID, flushing any pending
1813
+ * events and removing it from the registry.
1814
+ *
1815
+ * @param id - Pipeline identifier
1816
+ * @returns Promise resolving to stop result (0 if pipeline not found)
1817
+ *
1818
+ * @example
1819
+ * ```typescript
1820
+ * const result = await stopPipeline('main')
1821
+ * console.log(`Flushed ${result.flushedCount} events on shutdown`)
1822
+ * ```
742
1823
  */
743
1824
  export async function stopPipeline(id) {
744
1825
  const pipeline = activePipelines.get(id);
@@ -750,7 +1831,19 @@ export async function stopPipeline(id) {
750
1831
  return result;
751
1832
  }
752
1833
  /**
753
- * Flush a pipeline by ID
1834
+ * Flushes a pipeline by ID.
1835
+ *
1836
+ * @description
1837
+ * Forces an immediate flush of all pending events in the pipeline.
1838
+ * No-op if pipeline not found.
1839
+ *
1840
+ * @param id - Pipeline identifier
1841
+ *
1842
+ * @example
1843
+ * ```typescript
1844
+ * await flushPipeline('main')
1845
+ * console.log('All pending events flushed')
1846
+ * ```
754
1847
  */
755
1848
  export async function flushPipeline(id) {
756
1849
  const pipeline = activePipelines.get(id);
@@ -759,7 +1852,23 @@ export async function flushPipeline(id) {
759
1852
  }
760
1853
  }
761
1854
  /**
762
- * Get metrics for a pipeline by ID
1855
+ * Gets metrics for a pipeline by ID.
1856
+ *
1857
+ * @description
1858
+ * Returns a copy of the current metrics for the specified pipeline.
1859
+ * Returns null if the pipeline is not found.
1860
+ *
1861
+ * @param id - Pipeline identifier
1862
+ * @returns Pipeline metrics or null if not found
1863
+ *
1864
+ * @example
1865
+ * ```typescript
1866
+ * const metrics = getPipelineMetrics('main')
1867
+ * if (metrics) {
1868
+ * console.log(`Events processed: ${metrics.eventsProcessed}`)
1869
+ * console.log(`Errors: ${metrics.errors}`)
1870
+ * }
1871
+ * ```
763
1872
  */
764
1873
  export function getPipelineMetrics(id) {
765
1874
  const pipeline = activePipelines.get(id);