gitx.do 0.0.1 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (237) hide show
  1. package/dist/cli/commands/blame.d.ts +259 -0
  2. package/dist/cli/commands/blame.d.ts.map +1 -0
  3. package/dist/cli/commands/blame.js +609 -0
  4. package/dist/cli/commands/blame.js.map +1 -0
  5. package/dist/cli/commands/branch.d.ts +249 -0
  6. package/dist/cli/commands/branch.d.ts.map +1 -0
  7. package/dist/cli/commands/branch.js +693 -0
  8. package/dist/cli/commands/branch.js.map +1 -0
  9. package/dist/cli/commands/commit.d.ts +182 -0
  10. package/dist/cli/commands/commit.d.ts.map +1 -0
  11. package/dist/cli/commands/commit.js +437 -0
  12. package/dist/cli/commands/commit.js.map +1 -0
  13. package/dist/cli/commands/diff.d.ts +464 -0
  14. package/dist/cli/commands/diff.d.ts.map +1 -0
  15. package/dist/cli/commands/diff.js +958 -0
  16. package/dist/cli/commands/diff.js.map +1 -0
  17. package/dist/cli/commands/log.d.ts +239 -0
  18. package/dist/cli/commands/log.d.ts.map +1 -0
  19. package/dist/cli/commands/log.js +535 -0
  20. package/dist/cli/commands/log.js.map +1 -0
  21. package/dist/cli/commands/review.d.ts +457 -0
  22. package/dist/cli/commands/review.d.ts.map +1 -0
  23. package/dist/cli/commands/review.js +533 -0
  24. package/dist/cli/commands/review.js.map +1 -0
  25. package/dist/cli/commands/status.d.ts +269 -0
  26. package/dist/cli/commands/status.d.ts.map +1 -0
  27. package/dist/cli/commands/status.js +493 -0
  28. package/dist/cli/commands/status.js.map +1 -0
  29. package/dist/cli/commands/web.d.ts +199 -0
  30. package/dist/cli/commands/web.d.ts.map +1 -0
  31. package/dist/cli/commands/web.js +696 -0
  32. package/dist/cli/commands/web.js.map +1 -0
  33. package/dist/cli/fs-adapter.d.ts +656 -0
  34. package/dist/cli/fs-adapter.d.ts.map +1 -0
  35. package/dist/cli/fs-adapter.js +1179 -0
  36. package/dist/cli/fs-adapter.js.map +1 -0
  37. package/dist/cli/index.d.ts +387 -0
  38. package/dist/cli/index.d.ts.map +1 -0
  39. package/dist/cli/index.js +523 -0
  40. package/dist/cli/index.js.map +1 -0
  41. package/dist/cli/ui/components/DiffView.d.ts +7 -0
  42. package/dist/cli/ui/components/DiffView.d.ts.map +1 -0
  43. package/dist/cli/ui/components/DiffView.js +11 -0
  44. package/dist/cli/ui/components/DiffView.js.map +1 -0
  45. package/dist/cli/ui/components/ErrorDisplay.d.ts +6 -0
  46. package/dist/cli/ui/components/ErrorDisplay.d.ts.map +1 -0
  47. package/dist/cli/ui/components/ErrorDisplay.js +11 -0
  48. package/dist/cli/ui/components/ErrorDisplay.js.map +1 -0
  49. package/dist/cli/ui/components/FuzzySearch.d.ts +9 -0
  50. package/dist/cli/ui/components/FuzzySearch.d.ts.map +1 -0
  51. package/dist/cli/ui/components/FuzzySearch.js +12 -0
  52. package/dist/cli/ui/components/FuzzySearch.js.map +1 -0
  53. package/dist/cli/ui/components/LoadingSpinner.d.ts +6 -0
  54. package/dist/cli/ui/components/LoadingSpinner.d.ts.map +1 -0
  55. package/dist/cli/ui/components/LoadingSpinner.js +10 -0
  56. package/dist/cli/ui/components/LoadingSpinner.js.map +1 -0
  57. package/dist/cli/ui/components/NavigationList.d.ts +9 -0
  58. package/dist/cli/ui/components/NavigationList.d.ts.map +1 -0
  59. package/dist/cli/ui/components/NavigationList.js +11 -0
  60. package/dist/cli/ui/components/NavigationList.js.map +1 -0
  61. package/dist/cli/ui/components/ScrollableContent.d.ts +8 -0
  62. package/dist/cli/ui/components/ScrollableContent.d.ts.map +1 -0
  63. package/dist/cli/ui/components/ScrollableContent.js +11 -0
  64. package/dist/cli/ui/components/ScrollableContent.js.map +1 -0
  65. package/dist/cli/ui/components/index.d.ts +7 -0
  66. package/dist/cli/ui/components/index.d.ts.map +1 -0
  67. package/dist/cli/ui/components/index.js +9 -0
  68. package/dist/cli/ui/components/index.js.map +1 -0
  69. package/dist/cli/ui/terminal-ui.d.ts +52 -0
  70. package/dist/cli/ui/terminal-ui.d.ts.map +1 -0
  71. package/dist/cli/ui/terminal-ui.js +121 -0
  72. package/dist/cli/ui/terminal-ui.js.map +1 -0
  73. package/dist/durable-object/object-store.d.ts +401 -23
  74. package/dist/durable-object/object-store.d.ts.map +1 -1
  75. package/dist/durable-object/object-store.js +414 -25
  76. package/dist/durable-object/object-store.js.map +1 -1
  77. package/dist/durable-object/schema.d.ts +188 -0
  78. package/dist/durable-object/schema.d.ts.map +1 -1
  79. package/dist/durable-object/schema.js +160 -0
  80. package/dist/durable-object/schema.js.map +1 -1
  81. package/dist/durable-object/wal.d.ts +336 -31
  82. package/dist/durable-object/wal.d.ts.map +1 -1
  83. package/dist/durable-object/wal.js +272 -27
  84. package/dist/durable-object/wal.js.map +1 -1
  85. package/dist/index.d.ts +379 -3
  86. package/dist/index.d.ts.map +1 -1
  87. package/dist/index.js +379 -7
  88. package/dist/index.js.map +1 -1
  89. package/dist/mcp/adapter.d.ts +579 -38
  90. package/dist/mcp/adapter.d.ts.map +1 -1
  91. package/dist/mcp/adapter.js +426 -33
  92. package/dist/mcp/adapter.js.map +1 -1
  93. package/dist/mcp/sandbox.d.ts +532 -29
  94. package/dist/mcp/sandbox.d.ts.map +1 -1
  95. package/dist/mcp/sandbox.js +389 -22
  96. package/dist/mcp/sandbox.js.map +1 -1
  97. package/dist/mcp/sdk-adapter.d.ts +478 -56
  98. package/dist/mcp/sdk-adapter.d.ts.map +1 -1
  99. package/dist/mcp/sdk-adapter.js +346 -44
  100. package/dist/mcp/sdk-adapter.js.map +1 -1
  101. package/dist/mcp/tools.d.ts +445 -30
  102. package/dist/mcp/tools.d.ts.map +1 -1
  103. package/dist/mcp/tools.js +363 -33
  104. package/dist/mcp/tools.js.map +1 -1
  105. package/dist/ops/blame.d.ts +424 -21
  106. package/dist/ops/blame.d.ts.map +1 -1
  107. package/dist/ops/blame.js +303 -20
  108. package/dist/ops/blame.js.map +1 -1
  109. package/dist/ops/branch.d.ts +583 -32
  110. package/dist/ops/branch.d.ts.map +1 -1
  111. package/dist/ops/branch.js +365 -23
  112. package/dist/ops/branch.js.map +1 -1
  113. package/dist/ops/commit-traversal.d.ts +164 -24
  114. package/dist/ops/commit-traversal.d.ts.map +1 -1
  115. package/dist/ops/commit-traversal.js +68 -2
  116. package/dist/ops/commit-traversal.js.map +1 -1
  117. package/dist/ops/commit.d.ts +387 -53
  118. package/dist/ops/commit.d.ts.map +1 -1
  119. package/dist/ops/commit.js +249 -29
  120. package/dist/ops/commit.js.map +1 -1
  121. package/dist/ops/merge-base.d.ts +195 -21
  122. package/dist/ops/merge-base.d.ts.map +1 -1
  123. package/dist/ops/merge-base.js +122 -12
  124. package/dist/ops/merge-base.js.map +1 -1
  125. package/dist/ops/merge.d.ts +600 -130
  126. package/dist/ops/merge.d.ts.map +1 -1
  127. package/dist/ops/merge.js +408 -60
  128. package/dist/ops/merge.js.map +1 -1
  129. package/dist/ops/tag.d.ts +67 -2
  130. package/dist/ops/tag.d.ts.map +1 -1
  131. package/dist/ops/tag.js +42 -1
  132. package/dist/ops/tag.js.map +1 -1
  133. package/dist/ops/tree-builder.d.ts +102 -6
  134. package/dist/ops/tree-builder.d.ts.map +1 -1
  135. package/dist/ops/tree-builder.js +30 -5
  136. package/dist/ops/tree-builder.js.map +1 -1
  137. package/dist/ops/tree-diff.d.ts +50 -2
  138. package/dist/ops/tree-diff.d.ts.map +1 -1
  139. package/dist/ops/tree-diff.js +50 -2
  140. package/dist/ops/tree-diff.js.map +1 -1
  141. package/dist/pack/delta.d.ts +211 -39
  142. package/dist/pack/delta.d.ts.map +1 -1
  143. package/dist/pack/delta.js +232 -46
  144. package/dist/pack/delta.js.map +1 -1
  145. package/dist/pack/format.d.ts +390 -28
  146. package/dist/pack/format.d.ts.map +1 -1
  147. package/dist/pack/format.js +344 -33
  148. package/dist/pack/format.js.map +1 -1
  149. package/dist/pack/full-generation.d.ts +313 -28
  150. package/dist/pack/full-generation.d.ts.map +1 -1
  151. package/dist/pack/full-generation.js +238 -19
  152. package/dist/pack/full-generation.js.map +1 -1
  153. package/dist/pack/generation.d.ts +346 -23
  154. package/dist/pack/generation.d.ts.map +1 -1
  155. package/dist/pack/generation.js +269 -21
  156. package/dist/pack/generation.js.map +1 -1
  157. package/dist/pack/index.d.ts +407 -86
  158. package/dist/pack/index.d.ts.map +1 -1
  159. package/dist/pack/index.js +351 -70
  160. package/dist/pack/index.js.map +1 -1
  161. package/dist/refs/branch.d.ts +517 -71
  162. package/dist/refs/branch.d.ts.map +1 -1
  163. package/dist/refs/branch.js +410 -26
  164. package/dist/refs/branch.js.map +1 -1
  165. package/dist/refs/storage.d.ts +610 -57
  166. package/dist/refs/storage.d.ts.map +1 -1
  167. package/dist/refs/storage.js +481 -29
  168. package/dist/refs/storage.js.map +1 -1
  169. package/dist/refs/tag.d.ts +677 -67
  170. package/dist/refs/tag.d.ts.map +1 -1
  171. package/dist/refs/tag.js +497 -30
  172. package/dist/refs/tag.js.map +1 -1
  173. package/dist/storage/lru-cache.d.ts +556 -53
  174. package/dist/storage/lru-cache.d.ts.map +1 -1
  175. package/dist/storage/lru-cache.js +439 -36
  176. package/dist/storage/lru-cache.js.map +1 -1
  177. package/dist/storage/object-index.d.ts +483 -38
  178. package/dist/storage/object-index.d.ts.map +1 -1
  179. package/dist/storage/object-index.js +388 -22
  180. package/dist/storage/object-index.js.map +1 -1
  181. package/dist/storage/r2-pack.d.ts +957 -94
  182. package/dist/storage/r2-pack.d.ts.map +1 -1
  183. package/dist/storage/r2-pack.js +756 -48
  184. package/dist/storage/r2-pack.js.map +1 -1
  185. package/dist/tiered/cdc-pipeline.d.ts +1610 -38
  186. package/dist/tiered/cdc-pipeline.d.ts.map +1 -1
  187. package/dist/tiered/cdc-pipeline.js +1131 -22
  188. package/dist/tiered/cdc-pipeline.js.map +1 -1
  189. package/dist/tiered/migration.d.ts +903 -41
  190. package/dist/tiered/migration.d.ts.map +1 -1
  191. package/dist/tiered/migration.js +646 -24
  192. package/dist/tiered/migration.js.map +1 -1
  193. package/dist/tiered/parquet-writer.d.ts +944 -47
  194. package/dist/tiered/parquet-writer.d.ts.map +1 -1
  195. package/dist/tiered/parquet-writer.js +667 -39
  196. package/dist/tiered/parquet-writer.js.map +1 -1
  197. package/dist/tiered/read-path.d.ts +728 -34
  198. package/dist/tiered/read-path.d.ts.map +1 -1
  199. package/dist/tiered/read-path.js +310 -27
  200. package/dist/tiered/read-path.js.map +1 -1
  201. package/dist/types/objects.d.ts +457 -0
  202. package/dist/types/objects.d.ts.map +1 -1
  203. package/dist/types/objects.js +305 -4
  204. package/dist/types/objects.js.map +1 -1
  205. package/dist/types/storage.d.ts +407 -35
  206. package/dist/types/storage.d.ts.map +1 -1
  207. package/dist/types/storage.js +27 -3
  208. package/dist/types/storage.js.map +1 -1
  209. package/dist/utils/hash.d.ts +133 -12
  210. package/dist/utils/hash.d.ts.map +1 -1
  211. package/dist/utils/hash.js +133 -12
  212. package/dist/utils/hash.js.map +1 -1
  213. package/dist/utils/sha1.d.ts +102 -9
  214. package/dist/utils/sha1.d.ts.map +1 -1
  215. package/dist/utils/sha1.js +114 -11
  216. package/dist/utils/sha1.js.map +1 -1
  217. package/dist/wire/capabilities.d.ts +896 -88
  218. package/dist/wire/capabilities.d.ts.map +1 -1
  219. package/dist/wire/capabilities.js +566 -62
  220. package/dist/wire/capabilities.js.map +1 -1
  221. package/dist/wire/pkt-line.d.ts +293 -15
  222. package/dist/wire/pkt-line.d.ts.map +1 -1
  223. package/dist/wire/pkt-line.js +251 -15
  224. package/dist/wire/pkt-line.js.map +1 -1
  225. package/dist/wire/receive-pack.d.ts +814 -64
  226. package/dist/wire/receive-pack.d.ts.map +1 -1
  227. package/dist/wire/receive-pack.js +542 -41
  228. package/dist/wire/receive-pack.js.map +1 -1
  229. package/dist/wire/smart-http.d.ts +575 -97
  230. package/dist/wire/smart-http.d.ts.map +1 -1
  231. package/dist/wire/smart-http.js +337 -46
  232. package/dist/wire/smart-http.js.map +1 -1
  233. package/dist/wire/upload-pack.d.ts +492 -98
  234. package/dist/wire/upload-pack.d.ts.map +1 -1
  235. package/dist/wire/upload-pack.js +347 -59
  236. package/dist/wire/upload-pack.js.map +1 -1
  237. package/package.json +10 -2
@@ -1,315 +1,1887 @@
1
1
  /**
2
- * CDC (Change Data Capture) Pipeline for Git Operations
2
+ * @fileoverview CDC (Change Data Capture) Pipeline for Git Operations
3
3
  *
4
- * Provides functionality to capture, transform, batch, and output git operation events:
5
- * - Event capture from git operations (push, fetch, commits, etc.)
6
- * - Parquet transformation for analytics storage
7
- * - Batching with size and time-based flushing
8
- * - Error handling with retry policies
4
+ * @description
5
+ * This module provides a comprehensive Change Data Capture system for Git operations,
6
+ * enabling real-time event streaming, transformation, and analytics for Git repository events.
9
7
  *
10
- * gitdo: CDC pipeline implementation
8
+ * ## Key Features
9
+ *
10
+ * - **Event Capture**: Captures git operations (push, fetch, commits, branches, tags, merges)
11
+ * - **Parquet Transformation**: Converts events to columnar Parquet format for analytics
12
+ * - **Batching**: Efficient event batching with configurable size and time-based flushing
13
+ * - **Retry Policies**: Configurable exponential backoff with jitter for resilient processing
14
+ * - **Dead Letter Queue**: Handles failed events for later reprocessing
15
+ * - **Metrics**: Built-in tracking for events processed, batches, errors, and latency
16
+ *
17
+ * ## Architecture
18
+ *
19
+ * The pipeline consists of several components:
20
+ * 1. **CDCEventCapture**: Captures git operations and converts them to CDCEvents
21
+ * 2. **CDCBatcher**: Batches events for efficient processing
22
+ * 3. **ParquetTransformer**: Transforms events to Parquet format
23
+ * 4. **CDCPipeline**: Orchestrates the entire flow with error handling
24
+ *
25
+ * ## Event Flow
26
+ *
27
+ * ```
28
+ * Git Operation -> CDCEventCapture -> CDCBatcher -> ParquetTransformer -> Output
29
+ * |
30
+ * v
31
+ * (On failure) Dead Letter Queue
32
+ * ```
33
+ *
34
+ * @module tiered/cdc-pipeline
35
+ *
36
+ * @example
37
+ * ```typescript
38
+ * // Create and start a pipeline
39
+ * const pipeline = new CDCPipeline({
40
+ * batchSize: 100,
41
+ * flushIntervalMs: 5000,
42
+ * maxRetries: 3,
43
+ * parquetCompression: 'snappy',
44
+ * outputPath: '/analytics',
45
+ * schemaVersion: 1
46
+ * })
47
+ *
48
+ * await pipeline.start()
49
+ *
50
+ * // Process events
51
+ * pipeline.onOutput((output) => {
52
+ * console.log(`Generated batch: ${output.batchId}`)
53
+ * console.log(`Events: ${output.events.length}`)
54
+ * console.log(`Parquet size: ${output.parquetBuffer.length} bytes`)
55
+ * })
56
+ *
57
+ * pipeline.onDeadLetter((events, error) => {
58
+ * console.error(`Failed events: ${events.length}`, error)
59
+ * })
60
+ *
61
+ * // Create and process an event
62
+ * const event = createCDCEvent('COMMIT_CREATED', 'push', {
63
+ * operation: 'commit-create',
64
+ * sha: 'abc123...',
65
+ * treeSha: 'def456...',
66
+ * parentShas: ['parent1...']
67
+ * })
68
+ *
69
+ * await pipeline.process(event)
70
+ *
71
+ * // Get metrics
72
+ * const metrics = pipeline.getMetrics()
73
+ * console.log(`Processed: ${metrics.eventsProcessed}`)
74
+ * console.log(`Batches: ${metrics.batchesGenerated}`)
75
+ *
76
+ * // Stop the pipeline
77
+ * await pipeline.stop()
78
+ * ```
79
+ *
80
+ * @see {@link CDCPipeline} - Main pipeline orchestration class
81
+ * @see {@link CDCEventCapture} - Event capture from git operations
82
+ * @see {@link ParquetTransformer} - Parquet format transformation
11
83
  */
12
84
  /**
13
- * CDC Event Types representing different git operations
85
+ * CDC Event Types representing different git operations.
86
+ *
87
+ * @description
88
+ * Enumeration of all supported Git operation types that can be captured
89
+ * by the CDC system. Each type corresponds to a specific Git operation.
90
+ *
91
+ * @example
92
+ * ```typescript
93
+ * const eventType: CDCEventType = 'COMMIT_CREATED'
94
+ * ```
14
95
  */
15
96
  export type CDCEventType = 'OBJECT_CREATED' | 'OBJECT_DELETED' | 'REF_UPDATED' | 'PACK_RECEIVED' | 'COMMIT_CREATED' | 'TREE_MODIFIED' | 'BRANCH_CREATED' | 'BRANCH_DELETED' | 'TAG_CREATED' | 'MERGE_COMPLETED';
16
97
  /**
17
- * CDC Event Source indicating origin of the event
98
+ * CDC Event Source indicating the origin of the event.
99
+ *
100
+ * @description
101
+ * Identifies the source system or operation that generated the CDC event.
102
+ * This helps with event filtering, routing, and analytics.
103
+ *
104
+ * - `push`: Events from git push operations
105
+ * - `fetch`: Events from git fetch operations
106
+ * - `internal`: Events from internal system operations
107
+ * - `replication`: Events from repository replication
108
+ * - `gc`: Events from garbage collection
109
+ *
110
+ * @example
111
+ * ```typescript
112
+ * const source: CDCEventSource = 'push'
113
+ * ```
18
114
  */
19
115
  export type CDCEventSource = 'push' | 'fetch' | 'internal' | 'replication' | 'gc';
20
116
  /**
21
- * Payload for CDC events
117
+ * Payload data for CDC events.
118
+ *
119
+ * @description
120
+ * Contains the detailed data associated with a CDC event. Different event
121
+ * types use different subsets of these fields.
122
+ *
123
+ * @example
124
+ * ```typescript
125
+ * // Commit created payload
126
+ * const payload: CDCEventPayload = {
127
+ * operation: 'commit-create',
128
+ * sha: 'abc123...',
129
+ * treeSha: 'def456...',
130
+ * parentShas: ['parent1...']
131
+ * }
132
+ *
133
+ * // Ref updated payload
134
+ * const refPayload: CDCEventPayload = {
135
+ * operation: 'ref-update',
136
+ * refName: 'refs/heads/main',
137
+ * oldSha: 'old123...',
138
+ * newSha: 'new456...'
139
+ * }
140
+ * ```
22
141
  */
23
142
  export interface CDCEventPayload {
143
+ /**
144
+ * The type of operation performed.
145
+ *
146
+ * @example 'commit-create', 'ref-update', 'branch-create'
147
+ */
24
148
  operation: string;
149
+ /**
150
+ * SHA-1 hash of the affected object.
151
+ * Present for object-related events.
152
+ */
25
153
  sha?: string;
154
+ /**
155
+ * Timestamp of the operation in milliseconds since epoch.
156
+ */
26
157
  timestamp?: number;
158
+ /**
159
+ * Raw binary data associated with the event.
160
+ * Used for object creation and pack reception events.
161
+ */
27
162
  data?: Uint8Array;
163
+ /**
164
+ * Additional metadata key-value pairs.
165
+ * Can include object type, size, etc.
166
+ */
28
167
  metadata?: Record<string, unknown>;
168
+ /**
169
+ * Git reference name (e.g., 'refs/heads/main').
170
+ * Present for ref update events.
171
+ */
29
172
  refName?: string;
173
+ /**
174
+ * Previous SHA for ref update events.
175
+ * May be all zeros for new refs.
176
+ */
30
177
  oldSha?: string;
178
+ /**
179
+ * New SHA for ref update events.
180
+ * May be all zeros for deleted refs.
181
+ */
31
182
  newSha?: string;
183
+ /**
184
+ * Number of objects in a pack.
185
+ * Present for pack received events.
186
+ */
32
187
  objectCount?: number;
188
+ /**
189
+ * Tree SHA for commit events.
190
+ */
33
191
  treeSha?: string;
192
+ /**
193
+ * Parent commit SHAs for commit events.
194
+ */
34
195
  parentShas?: string[];
196
+ /**
197
+ * Branch name for branch-related events.
198
+ */
35
199
  branchName?: string;
200
+ /**
201
+ * Tag name for tag-related events.
202
+ */
36
203
  tagName?: string;
204
+ /**
205
+ * Base commit SHA for merge events.
206
+ */
37
207
  baseSha?: string;
208
+ /**
209
+ * Head commit SHA for merge events.
210
+ */
38
211
  headSha?: string;
39
212
  }
40
213
  /**
41
- * CDC Event structure
214
+ * CDC Event structure representing a single change data capture event.
215
+ *
216
+ * @description
217
+ * A CDCEvent captures a single git operation with all metadata needed
218
+ * for replication, analytics, and auditing. Events are immutable once
219
+ * created and ordered by their sequence number.
220
+ *
221
+ * @example
222
+ * ```typescript
223
+ * const event: CDCEvent = {
224
+ * id: 'evt-1234567890-abc123',
225
+ * type: 'COMMIT_CREATED',
226
+ * source: 'push',
227
+ * timestamp: 1703980800000,
228
+ * payload: {
229
+ * operation: 'commit-create',
230
+ * sha: 'abc123...',
231
+ * treeSha: 'def456...',
232
+ * parentShas: ['parent1...']
233
+ * },
234
+ * sequence: 42,
235
+ * version: 1
236
+ * }
237
+ * ```
42
238
  */
43
239
  export interface CDCEvent {
240
+ /**
241
+ * Unique identifier for this event.
242
+ * Format: `evt-{timestamp}-{random}`
243
+ */
44
244
  id: string;
245
+ /**
246
+ * Type of git operation that generated this event.
247
+ *
248
+ * @see {@link CDCEventType}
249
+ */
45
250
  type: CDCEventType;
251
+ /**
252
+ * Source system or operation that generated this event.
253
+ *
254
+ * @see {@link CDCEventSource}
255
+ */
46
256
  source: CDCEventSource;
257
+ /**
258
+ * Unix timestamp in milliseconds when the event was created.
259
+ */
47
260
  timestamp: number;
261
+ /**
262
+ * Event payload containing operation-specific data.
263
+ */
48
264
  payload: CDCEventPayload;
265
+ /**
266
+ * Monotonically increasing sequence number within a capture session.
267
+ * Used for ordering and deduplication.
268
+ */
49
269
  sequence: number;
270
+ /**
271
+ * Schema version of the event format.
272
+ * Used for backward compatibility during upgrades.
273
+ */
50
274
  version: number;
51
275
  }
52
276
  /**
53
- * Pipeline configuration
277
+ * Configuration for the CDC pipeline.
278
+ *
279
+ * @description
280
+ * Defines all configuration options for creating and running a CDC pipeline,
281
+ * including batching behavior, retry policy, and output format.
282
+ *
283
+ * @example
284
+ * ```typescript
285
+ * const config: CDCPipelineConfig = {
286
+ * batchSize: 100, // Flush every 100 events
287
+ * flushIntervalMs: 5000, // Or every 5 seconds
288
+ * maxRetries: 3, // Retry failed batches 3 times
289
+ * parquetCompression: 'snappy',
290
+ * outputPath: '/analytics/cdc',
291
+ * schemaVersion: 1
292
+ * }
293
+ * ```
54
294
  */
55
295
  export interface CDCPipelineConfig {
296
+ /**
297
+ * Maximum number of events to batch before flushing.
298
+ * Lower values reduce latency, higher values improve throughput.
299
+ */
56
300
  batchSize: number;
301
+ /**
302
+ * Maximum time in milliseconds to wait before flushing a batch.
303
+ * Ensures events are processed even with low throughput.
304
+ */
57
305
  flushIntervalMs: number;
306
+ /**
307
+ * Maximum number of retry attempts for failed batch processing.
308
+ * Uses exponential backoff between attempts.
309
+ */
58
310
  maxRetries: number;
311
+ /**
312
+ * Compression algorithm for Parquet output.
313
+ *
314
+ * - `snappy`: Fast compression with moderate ratio (recommended)
315
+ * - `gzip`: Higher compression ratio, slower
316
+ * - `none`: No compression
317
+ */
59
318
  parquetCompression: 'snappy' | 'gzip' | 'none';
319
+ /**
320
+ * Base path for output files.
321
+ * Parquet files will be written to this directory.
322
+ */
60
323
  outputPath: string;
324
+ /**
325
+ * Schema version for event format.
326
+ * Used for backward compatibility during upgrades.
327
+ */
61
328
  schemaVersion: number;
62
329
  }
63
330
  /**
64
- * Pipeline state
331
+ * Pipeline operational state.
332
+ *
333
+ * @description
334
+ * Indicates the current state of the CDC pipeline.
335
+ *
336
+ * - `stopped`: Pipeline is not running, no events are processed
337
+ * - `running`: Pipeline is active and processing events
338
+ * - `paused`: Pipeline is temporarily suspended (reserved for future use)
65
339
  */
66
340
  export type CDCPipelineState = 'stopped' | 'running' | 'paused';
67
341
  /**
68
- * Batch configuration
342
+ * Configuration for event batching.
343
+ *
344
+ * @description
345
+ * Controls how events are grouped into batches for processing.
346
+ *
347
+ * @example
348
+ * ```typescript
349
+ * const config: BatchConfig = {
350
+ * batchSize: 100,
351
+ * flushIntervalMs: 5000
352
+ * }
353
+ * ```
69
354
  */
70
355
  export interface BatchConfig {
356
+ /**
357
+ * Maximum number of events per batch.
358
+ */
71
359
  batchSize: number;
360
+ /**
361
+ * Maximum time to wait before flushing a partial batch.
362
+ */
72
363
  flushIntervalMs: number;
73
364
  }
74
365
  /**
75
- * Batch result metadata
366
+ * Result of a batch flush operation.
367
+ *
368
+ * @description
369
+ * Contains the events in the batch and metadata about the batch
370
+ * for downstream processing and monitoring.
371
+ *
372
+ * @example
373
+ * ```typescript
374
+ * batcher.onBatch((result: BatchResult) => {
375
+ * console.log(`Batch: ${result.eventCount} events`)
376
+ * console.log(`Sequences: ${result.minSequence} - ${result.maxSequence}`)
377
+ * console.log(`Time range: ${result.minTimestamp} - ${result.maxTimestamp}`)
378
+ * })
379
+ * ```
76
380
  */
77
381
  export interface BatchResult {
382
+ /**
383
+ * Array of events in this batch.
384
+ */
78
385
  events: CDCEvent[];
386
+ /**
387
+ * Number of events in the batch.
388
+ */
79
389
  eventCount: number;
390
+ /**
391
+ * Whether the batch was processed successfully.
392
+ */
80
393
  success: boolean;
394
+ /**
395
+ * Minimum sequence number in the batch.
396
+ * Useful for tracking progress and resumption.
397
+ */
81
398
  minSequence?: number;
399
+ /**
400
+ * Maximum sequence number in the batch.
401
+ */
82
402
  maxSequence?: number;
403
+ /**
404
+ * Earliest event timestamp in the batch (milliseconds).
405
+ */
83
406
  minTimestamp?: number;
407
+ /**
408
+ * Latest event timestamp in the batch (milliseconds).
409
+ */
84
410
  maxTimestamp?: number;
85
411
  }
86
412
  /**
87
- * CDC Error types
413
+ * CDC Error types for categorizing failures.
414
+ *
415
+ * @description
416
+ * Error codes that help identify the type of failure for
417
+ * appropriate error handling and recovery strategies.
418
+ *
419
+ * - `VALIDATION_ERROR`: Event failed validation checks
420
+ * - `PROCESSING_ERROR`: Error during event processing
421
+ * - `SERIALIZATION_ERROR`: Error serializing/deserializing events
422
+ * - `STORAGE_ERROR`: Error writing to storage
423
+ * - `TIMEOUT_ERROR`: Operation timed out
424
+ * - `BUFFER_OVERFLOW_ERROR`: Event buffer exceeded capacity
425
+ * - `UNKNOWN_ERROR`: Unclassified error
88
426
  */
89
427
  export type CDCErrorType = 'VALIDATION_ERROR' | 'PROCESSING_ERROR' | 'SERIALIZATION_ERROR' | 'STORAGE_ERROR' | 'TIMEOUT_ERROR' | 'BUFFER_OVERFLOW_ERROR' | 'UNKNOWN_ERROR';
90
428
  /**
91
- * Parquet field definition
429
+ * Field definition for Parquet schema.
430
+ *
431
+ * @description
432
+ * Defines a single column in the Parquet output schema.
92
433
  */
93
434
  export interface ParquetField {
435
+ /**
436
+ * Column name.
437
+ */
94
438
  name: string;
439
+ /**
440
+ * Column data type (STRING, INT64, TIMESTAMP, etc.).
441
+ */
95
442
  type: string;
443
+ /**
444
+ * Whether the column can contain null values.
445
+ */
96
446
  nullable: boolean;
97
447
  }
98
448
  /**
99
- * Parquet row representation
449
+ * Row representation for Parquet output.
450
+ *
451
+ * @description
452
+ * Represents a single CDC event as a Parquet row with
453
+ * flattened fields for efficient columnar storage.
100
454
  */
101
455
  export interface ParquetRow {
456
+ /**
457
+ * Event unique identifier.
458
+ */
102
459
  event_id: string;
460
+ /**
461
+ * Event type (e.g., 'COMMIT_CREATED').
462
+ */
103
463
  event_type: string;
464
+ /**
465
+ * Event source (e.g., 'push').
466
+ */
104
467
  source: string;
468
+ /**
469
+ * Event timestamp in milliseconds.
470
+ */
105
471
  timestamp: number;
472
+ /**
473
+ * Event sequence number.
474
+ */
106
475
  sequence: number;
476
+ /**
477
+ * Event schema version.
478
+ */
107
479
  version: number;
480
+ /**
481
+ * JSON-serialized event payload.
482
+ */
108
483
  payload_json: string;
484
+ /**
485
+ * SHA from the payload, extracted for efficient filtering.
486
+ */
109
487
  sha: string | null;
110
488
  }
111
489
  /**
112
- * Parquet batch representation
490
+ * Batch of Parquet rows ready for writing.
491
+ *
492
+ * @description
493
+ * Contains transformed rows and metadata needed to write
494
+ * a Parquet file.
113
495
  */
114
496
  export interface ParquetBatch {
497
+ /**
498
+ * Array of Parquet rows.
499
+ */
115
500
  rows: ParquetRow[];
501
+ /**
502
+ * Number of rows in the batch.
503
+ */
116
504
  rowCount: number;
505
+ /**
506
+ * Batch creation timestamp.
507
+ */
117
508
  createdAt: number;
509
+ /**
510
+ * Parquet schema definition.
511
+ */
118
512
  schema: {
119
513
  fields: ParquetField[];
120
514
  };
515
+ /**
516
+ * Compression algorithm used.
517
+ */
121
518
  compression: string;
122
519
  }
123
520
  /**
124
- * Pipeline output
521
+ * Output from the CDC pipeline.
522
+ *
523
+ * @description
524
+ * Contains the Parquet-formatted data and metadata for a
525
+ * processed batch of events.
526
+ *
527
+ * @example
528
+ * ```typescript
529
+ * pipeline.onOutput((output: PipelineOutput) => {
530
+ * console.log(`Batch ID: ${output.batchId}`)
531
+ * console.log(`Events: ${output.events.length}`)
532
+ * console.log(`Size: ${output.parquetBuffer.length} bytes`)
533
+ *
534
+ * // Write to storage
535
+ * await r2.put(`cdc/${output.batchId}.parquet`, output.parquetBuffer)
536
+ * })
537
+ * ```
125
538
  */
126
539
  export interface PipelineOutput {
540
+ /**
541
+ * Parquet-formatted data as a byte array.
542
+ */
127
543
  parquetBuffer: Uint8Array;
544
+ /**
545
+ * Original events included in this batch.
546
+ */
128
547
  events: CDCEvent[];
548
+ /**
549
+ * Unique identifier for this batch.
550
+ * Format: `batch-{timestamp}-{random}`
551
+ */
129
552
  batchId: string;
130
553
  }
131
554
  /**
132
- * Pipeline metrics
555
+ * Metrics for monitoring pipeline performance.
556
+ *
557
+ * @description
558
+ * Provides operational metrics for monitoring and alerting
559
+ * on pipeline health and performance.
560
+ *
561
+ * @example
562
+ * ```typescript
563
+ * const metrics = pipeline.getMetrics()
564
+ * console.log(`Events processed: ${metrics.eventsProcessed}`)
565
+ * console.log(`Batches generated: ${metrics.batchesGenerated}`)
566
+ * console.log(`Bytes written: ${metrics.bytesWritten}`)
567
+ * console.log(`Errors: ${metrics.errors}`)
568
+ * console.log(`Avg latency: ${metrics.avgProcessingLatencyMs}ms`)
569
+ * ```
133
570
  */
134
571
  export interface PipelineMetrics {
572
+ /**
573
+ * Total number of events processed.
574
+ */
135
575
  eventsProcessed: number;
576
+ /**
577
+ * Total number of batches generated.
578
+ */
136
579
  batchesGenerated: number;
580
+ /**
581
+ * Total bytes written to output.
582
+ */
137
583
  bytesWritten: number;
584
+ /**
585
+ * Total number of errors encountered.
586
+ */
138
587
  errors: number;
588
+ /**
589
+ * Average event processing latency in milliseconds.
590
+ * Calculated from the last 1000 events.
591
+ */
139
592
  avgProcessingLatencyMs: number;
140
593
  }
141
594
  /**
142
- * Process result
595
+ * Result of processing a single event.
596
+ *
597
+ * @description
598
+ * Returned when an event is successfully queued for processing.
143
599
  */
144
600
  export interface ProcessResult {
601
+ /**
602
+ * Whether the event was successfully queued.
603
+ */
145
604
  success: boolean;
605
+ /**
606
+ * ID of the processed event.
607
+ */
146
608
  eventId: string;
147
609
  }
148
610
  /**
149
- * Stop result
611
+ * Result of stopping the pipeline.
612
+ *
613
+ * @description
614
+ * Contains information about any pending events that were
615
+ * flushed during shutdown.
150
616
  */
151
617
  export interface StopResult {
618
+ /**
619
+ * Number of events flushed during stop.
620
+ */
152
621
  flushedCount: number;
153
622
  }
154
623
  /**
155
- * Custom error class for CDC operations
624
+ * Custom error class for CDC operations.
625
+ *
626
+ * @description
627
+ * CDCError provides structured error information for CDC pipeline failures,
628
+ * including an error type for programmatic handling and optional cause for
629
+ * error chaining.
630
+ *
631
+ * @example
632
+ * ```typescript
633
+ * try {
634
+ * await pipeline.process(event)
635
+ * } catch (error) {
636
+ * if (error instanceof CDCError) {
637
+ * switch (error.type) {
638
+ * case 'VALIDATION_ERROR':
639
+ * console.log('Invalid event:', error.message)
640
+ * break
641
+ * case 'PROCESSING_ERROR':
642
+ * console.log('Processing failed:', error.message)
643
+ * if (error.cause) {
644
+ * console.log('Caused by:', error.cause.message)
645
+ * }
646
+ * break
647
+ * }
648
+ * }
649
+ * }
650
+ * ```
651
+ *
652
+ * @class CDCError
653
+ * @extends Error
156
654
  */
157
655
  export declare class CDCError extends Error {
158
656
  readonly type: CDCErrorType;
159
657
  readonly cause?: Error | undefined;
658
+ /**
659
+ * Creates a new CDCError.
660
+ *
661
+ * @param type - Error type for categorization
662
+ * @param message - Human-readable error message
663
+ * @param cause - Optional underlying error that caused this error
664
+ */
160
665
  constructor(type: CDCErrorType, message: string, cause?: Error | undefined);
161
666
  }
667
+ /**
668
+ * Configuration for the retry policy.
669
+ *
670
+ * @description
671
+ * Configures exponential backoff behavior for failed operations.
672
+ *
673
+ * @example
674
+ * ```typescript
675
+ * const config: RetryPolicyConfig = {
676
+ * maxRetries: 3,
677
+ * initialDelayMs: 100,
678
+ * maxDelayMs: 5000,
679
+ * backoffMultiplier: 2,
680
+ * jitter: true // Add randomness to prevent thundering herd
681
+ * }
682
+ * ```
683
+ */
162
684
  export interface RetryPolicyConfig {
685
+ /**
686
+ * Maximum number of retry attempts before giving up.
687
+ */
163
688
  maxRetries: number;
689
+ /**
690
+ * Initial delay in milliseconds before first retry.
691
+ */
164
692
  initialDelayMs: number;
693
+ /**
694
+ * Maximum delay in milliseconds between retries.
695
+ * Caps exponential growth.
696
+ */
165
697
  maxDelayMs: number;
698
+ /**
699
+ * Multiplier applied to delay after each attempt.
700
+ * A value of 2 doubles the delay each time.
701
+ */
166
702
  backoffMultiplier: number;
703
+ /**
704
+ * Whether to add random jitter to delays.
705
+ * Helps prevent thundering herd problems.
706
+ */
167
707
  jitter?: boolean;
168
708
  }
169
709
  /**
170
- * Retry policy with exponential backoff
710
+ * Retry policy implementing exponential backoff with optional jitter.
711
+ *
712
+ * @description
713
+ * Provides a robust retry mechanism for handling transient failures.
714
+ * Uses exponential backoff to space out retry attempts, with optional
715
+ * jitter to prevent synchronized retries from multiple clients.
716
+ *
717
+ * **Backoff Formula:**
718
+ * `delay = min(initialDelay * (multiplier ^ attempt), maxDelay)`
719
+ *
720
+ * **With Jitter:**
721
+ * `delay = delay * random(0.5, 1.5)`
722
+ *
723
+ * @example
724
+ * ```typescript
725
+ * const policy = new CDCRetryPolicy({
726
+ * maxRetries: 3,
727
+ * initialDelayMs: 100,
728
+ * maxDelayMs: 5000,
729
+ * backoffMultiplier: 2,
730
+ * jitter: true
731
+ * })
732
+ *
733
+ * let attempts = 0
734
+ * while (attempts < 10) {
735
+ * try {
736
+ * await doOperation()
737
+ * break
738
+ * } catch (error) {
739
+ * attempts++
740
+ * if (!policy.shouldRetry(attempts)) {
741
+ * throw new Error('Max retries exceeded')
742
+ * }
743
+ * const delay = policy.getDelay(attempts)
744
+ * console.log(`Retry ${attempts} after ${delay}ms`)
745
+ * await sleep(delay)
746
+ * }
747
+ * }
748
+ * ```
749
+ *
750
+ * @class CDCRetryPolicy
171
751
  */
172
752
  export declare class CDCRetryPolicy {
753
+ /**
754
+ * Retry configuration.
755
+ * @private
756
+ */
173
757
  private readonly config;
758
+ /**
759
+ * Creates a new retry policy.
760
+ *
761
+ * @param config - Retry policy configuration
762
+ */
174
763
  constructor(config: RetryPolicyConfig);
764
+ /**
765
+ * Determines whether another retry should be attempted.
766
+ *
767
+ * @param attemptCount - Number of attempts already made
768
+ * @returns true if more retries are allowed, false otherwise
769
+ *
770
+ * @example
771
+ * ```typescript
772
+ * if (policy.shouldRetry(3)) {
773
+ * // Retry is allowed
774
+ * }
775
+ * ```
776
+ */
175
777
  shouldRetry(attemptCount: number): boolean;
778
+ /**
779
+ * Calculates the delay before the next retry.
780
+ *
781
+ * @description
782
+ * Computes delay using exponential backoff, capped at maxDelayMs.
783
+ * If jitter is enabled, applies a random factor between 0.5x and 1.5x.
784
+ *
785
+ * @param attemptCount - Number of attempts already made (1-indexed)
786
+ * @returns Delay in milliseconds before next retry
787
+ *
788
+ * @example
789
+ * ```typescript
790
+ * // With initialDelay=100, multiplier=2:
791
+ * // Attempt 1: 100ms * 2^0 = 100ms
792
+ * // Attempt 2: 100ms * 2^1 = 200ms
793
+ * // Attempt 3: 100ms * 2^2 = 400ms
794
+ * const delay = policy.getDelay(attemptCount)
795
+ * await sleep(delay)
796
+ * ```
797
+ */
176
798
  getDelay(attemptCount: number): number;
177
799
  }
800
+ /**
801
+ * Configuration options for CDC event capture.
802
+ *
803
+ * @example
804
+ * ```typescript
805
+ * const options: CDCEventCaptureOptions = {
806
+ * maxBufferSize: 1000 // Auto-flush when buffer reaches 1000 events
807
+ * }
808
+ * ```
809
+ */
178
810
  export interface CDCEventCaptureOptions {
811
+ /**
812
+ * Maximum number of events to buffer before auto-flushing.
813
+ * Defaults to Infinity (no auto-flush).
814
+ */
179
815
  maxBufferSize?: number;
180
816
  }
817
+ /**
818
+ * Callback function for git operation events.
819
+ *
820
+ * @param event - The captured CDC event
821
+ */
181
822
  export type GitOperationListener = (event: CDCEvent) => void;
182
823
  /**
183
- * Captures git operations and converts them to CDC events
824
+ * Captures git operations and converts them to CDC events.
825
+ *
826
+ * @description
827
+ * CDCEventCapture hooks into git operations and generates CDCEvents for each
828
+ * operation. It maintains an internal buffer of events that can be flushed
829
+ * manually or automatically when the buffer reaches a configured size.
830
+ *
831
+ * **Supported Operations:**
832
+ * - Object creation/deletion (blobs, trees, commits, tags)
833
+ * - Reference updates (branches, tags)
834
+ * - Commit creation
835
+ * - Pack reception
836
+ * - Branch creation/deletion
837
+ * - Tag creation
838
+ * - Merge completion
839
+ *
840
+ * **Event Ordering:**
841
+ * Events are assigned monotonically increasing sequence numbers within a
842
+ * capture session. This ensures proper ordering for replay and analytics.
843
+ *
844
+ * @example
845
+ * ```typescript
846
+ * const capture = new CDCEventCapture({ maxBufferSize: 100 })
847
+ *
848
+ * // Add a listener for real-time processing
849
+ * capture.addListener((event) => {
850
+ * console.log(`Event: ${event.type} - ${event.id}`)
851
+ * })
852
+ *
853
+ * // Capture git operations
854
+ * await capture.onCommitCreated('abc123...', 'tree456...', ['parent789...'])
855
+ * await capture.onRefUpdate('refs/heads/main', 'old...', 'new...')
856
+ *
857
+ * // Get buffered events
858
+ * console.log(`Buffer size: ${capture.getBufferSize()}`)
859
+ *
860
+ * // Flush buffer
861
+ * const events = await capture.flush()
862
+ * console.log(`Flushed ${events.length} events`)
863
+ * ```
864
+ *
865
+ * @class CDCEventCapture
184
866
  */
185
867
  export declare class CDCEventCapture {
868
+ /**
869
+ * Buffer of captured events.
870
+ * @private
871
+ */
186
872
  private events;
873
+ /**
874
+ * Monotonically increasing sequence counter.
875
+ * @private
876
+ */
187
877
  private sequenceCounter;
878
+ /**
879
+ * Registered event listeners.
880
+ * @private
881
+ */
188
882
  private listeners;
883
+ /**
884
+ * Maximum buffer size before auto-flush.
885
+ * @private
886
+ */
189
887
  private readonly maxBufferSize;
888
+ /**
889
+ * Creates a new CDC event capture instance.
890
+ *
891
+ * @param options - Configuration options
892
+ */
190
893
  constructor(options?: CDCEventCaptureOptions);
894
+ /**
895
+ * Generates a unique event ID.
896
+ * @private
897
+ */
191
898
  private generateEventId;
899
+ /**
900
+ * Emits an event to the buffer and notifies listeners.
901
+ * @private
902
+ */
192
903
  private emitEvent;
904
+ /**
905
+ * Returns the next sequence number.
906
+ * @private
907
+ */
193
908
  private nextSequence;
909
+ /**
910
+ * Captures an object put (creation) operation.
911
+ *
912
+ * @description
913
+ * Called when a git object (blob, tree, commit, tag) is written to storage.
914
+ *
915
+ * @param sha - SHA-1 hash of the object
916
+ * @param type - Object type (blob, tree, commit, tag)
917
+ * @param data - Raw object data
918
+ *
919
+ * @example
920
+ * ```typescript
921
+ * await capture.onObjectPut('abc123...', 'blob', blobData)
922
+ * ```
923
+ */
194
924
  onObjectPut(sha: string, type: string, data: Uint8Array): Promise<void>;
925
+ /**
926
+ * Captures an object deletion operation.
927
+ *
928
+ * @description
929
+ * Called when a git object is deleted, typically during garbage collection.
930
+ *
931
+ * @param sha - SHA-1 hash of the deleted object
932
+ *
933
+ * @example
934
+ * ```typescript
935
+ * await capture.onObjectDelete('abc123...')
936
+ * ```
937
+ */
195
938
  onObjectDelete(sha: string): Promise<void>;
939
+ /**
940
+ * Captures a reference update operation.
941
+ *
942
+ * @description
943
+ * Called when a git reference (branch, tag) is updated to point to a new commit.
944
+ *
945
+ * @param refName - Full reference name (e.g., 'refs/heads/main')
946
+ * @param oldSha - Previous SHA (all zeros for new refs)
947
+ * @param newSha - New SHA (all zeros for deleted refs)
948
+ *
949
+ * @example
950
+ * ```typescript
951
+ * await capture.onRefUpdate(
952
+ * 'refs/heads/main',
953
+ * 'oldcommit123...',
954
+ * 'newcommit456...'
955
+ * )
956
+ * ```
957
+ */
196
958
  onRefUpdate(refName: string, oldSha: string, newSha: string): Promise<void>;
959
+ /**
960
+ * Captures a commit creation operation.
961
+ *
962
+ * @description
963
+ * Called when a new commit object is created.
964
+ *
965
+ * @param commitSha - SHA-1 hash of the commit
966
+ * @param treeSha - SHA-1 hash of the tree the commit points to
967
+ * @param parentShas - Array of parent commit SHAs
968
+ *
969
+ * @example
970
+ * ```typescript
971
+ * await capture.onCommitCreated(
972
+ * 'commitabc123...',
973
+ * 'treedef456...',
974
+ * ['parent1...', 'parent2...']
975
+ * )
976
+ * ```
977
+ */
197
978
  onCommitCreated(commitSha: string, treeSha: string, parentShas: string[]): Promise<void>;
979
+ /**
980
+ * Captures a pack reception operation.
981
+ *
982
+ * @description
983
+ * Called when a packfile is received during a push or fetch operation.
984
+ *
985
+ * @param packData - Raw packfile data
986
+ * @param objectCount - Number of objects in the pack
987
+ *
988
+ * @example
989
+ * ```typescript
990
+ * await capture.onPackReceived(packBuffer, 42)
991
+ * ```
992
+ */
198
993
  onPackReceived(packData: Uint8Array, objectCount: number): Promise<void>;
994
+ /**
995
+ * Captures a branch creation operation.
996
+ *
997
+ * @param branchName - Name of the branch (without refs/heads/ prefix)
998
+ * @param sha - SHA-1 hash the branch points to
999
+ *
1000
+ * @example
1001
+ * ```typescript
1002
+ * await capture.onBranchCreated('feature-x', 'abc123...')
1003
+ * ```
1004
+ */
199
1005
  onBranchCreated(branchName: string, sha: string): Promise<void>;
1006
+ /**
1007
+ * Captures a branch deletion operation.
1008
+ *
1009
+ * @param branchName - Name of the deleted branch
1010
+ *
1011
+ * @example
1012
+ * ```typescript
1013
+ * await capture.onBranchDeleted('feature-x')
1014
+ * ```
1015
+ */
200
1016
  onBranchDeleted(branchName: string): Promise<void>;
1017
+ /**
1018
+ * Captures a tag creation operation.
1019
+ *
1020
+ * @param tagName - Name of the tag
1021
+ * @param sha - SHA-1 hash the tag points to
1022
+ *
1023
+ * @example
1024
+ * ```typescript
1025
+ * await capture.onTagCreated('v1.0.0', 'abc123...')
1026
+ * ```
1027
+ */
201
1028
  onTagCreated(tagName: string, sha: string): Promise<void>;
1029
+ /**
1030
+ * Captures a merge completion operation.
1031
+ *
1032
+ * @param mergeSha - SHA-1 hash of the merge commit
1033
+ * @param baseSha - SHA-1 hash of the base commit
1034
+ * @param headSha - SHA-1 hash of the head commit being merged
1035
+ *
1036
+ * @example
1037
+ * ```typescript
1038
+ * await capture.onMergeCompleted('merge123...', 'base456...', 'head789...')
1039
+ * ```
1040
+ */
202
1041
  onMergeCompleted(mergeSha: string, baseSha: string, headSha: string): Promise<void>;
1042
+ /**
1043
+ * Returns a copy of all buffered events.
1044
+ *
1045
+ * @returns Array of buffered events
1046
+ */
203
1047
  getEvents(): CDCEvent[];
1048
+ /**
1049
+ * Returns the current buffer size.
1050
+ *
1051
+ * @returns Number of events in the buffer
1052
+ */
204
1053
  getBufferSize(): number;
1054
+ /**
1055
+ * Flushes all buffered events.
1056
+ *
1057
+ * @description
1058
+ * Returns and clears all events from the buffer. The returned events
1059
+ * can be processed, serialized, or forwarded to downstream systems.
1060
+ *
1061
+ * @returns Array of flushed events
1062
+ *
1063
+ * @example
1064
+ * ```typescript
1065
+ * const events = await capture.flush()
1066
+ * console.log(`Flushed ${events.length} events`)
1067
+ * await sendToAnalytics(events)
1068
+ * ```
1069
+ */
205
1070
  flush(): Promise<CDCEvent[]>;
1071
+ /**
1072
+ * Adds an event listener.
1073
+ *
1074
+ * @description
1075
+ * Listeners are called synchronously for each event as it is captured.
1076
+ *
1077
+ * @param listener - Callback function to invoke for each event
1078
+ *
1079
+ * @example
1080
+ * ```typescript
1081
+ * capture.addListener((event) => {
1082
+ * console.log(`New event: ${event.type}`)
1083
+ * })
1084
+ * ```
1085
+ */
206
1086
  addListener(listener: GitOperationListener): void;
1087
+ /**
1088
+ * Removes an event listener.
1089
+ *
1090
+ * @param listener - The listener to remove
1091
+ */
207
1092
  removeListener(listener: GitOperationListener): void;
208
1093
  }
209
1094
  /**
210
- * Parquet schema definition for CDC events
1095
+ * Parquet schema definition for CDC events.
1096
+ *
1097
+ * @description
1098
+ * Defines the column structure for CDC event Parquet files. The default
1099
+ * schema includes standard CDC event fields and can be extended with
1100
+ * custom fields for domain-specific data.
1101
+ *
1102
+ * @example
1103
+ * ```typescript
1104
+ * // Create default schema
1105
+ * const schema = ParquetSchema.forCDCEvents()
1106
+ *
1107
+ * // Create schema with custom fields
1108
+ * const customSchema = ParquetSchema.forCDCEvents([
1109
+ * { name: 'repository_id', type: 'STRING', nullable: false },
1110
+ * { name: 'user_id', type: 'STRING', nullable: true }
1111
+ * ])
1112
+ * ```
1113
+ *
1114
+ * @class ParquetSchema
211
1115
  */
212
1116
  export declare class ParquetSchema {
213
1117
  readonly fields: ParquetField[];
1118
+ /**
1119
+ * Creates a new ParquetSchema.
1120
+ *
1121
+ * @param fields - Array of field definitions
1122
+ */
214
1123
  constructor(fields: ParquetField[]);
1124
+ /**
1125
+ * Creates a schema for CDC events with optional custom fields.
1126
+ *
1127
+ * @description
1128
+ * Returns a schema with the standard CDC event fields. Additional
1129
+ * custom fields can be appended for domain-specific data.
1130
+ *
1131
+ * @param customFields - Optional additional fields to add
1132
+ * @returns A new ParquetSchema instance
1133
+ *
1134
+ * @example
1135
+ * ```typescript
1136
+ * const schema = ParquetSchema.forCDCEvents()
1137
+ * // Schema includes: event_id, event_type, source, timestamp,
1138
+ * // sequence, version, payload_json, sha
1139
+ * ```
1140
+ */
215
1141
  static forCDCEvents(customFields?: ParquetField[]): ParquetSchema;
216
1142
  }
1143
+ /**
1144
+ * Configuration options for the Parquet transformer.
1145
+ */
217
1146
  export interface ParquetTransformerOptions {
1147
+ /**
1148
+ * Compression algorithm to use.
1149
+ * @default 'snappy'
1150
+ */
218
1151
  compression?: 'snappy' | 'gzip' | 'none';
219
1152
  }
220
1153
  /**
221
- * Transforms CDC events to Parquet format
1154
+ * Transforms CDC events to Parquet format.
1155
+ *
1156
+ * @description
1157
+ * ParquetTransformer converts CDC events to Parquet-compatible rows and
1158
+ * serializes batches of events to Parquet file format. It handles:
1159
+ *
1160
+ * - Event to row conversion (flattening the event structure)
1161
+ * - JSON serialization of complex payloads
1162
+ * - Batch creation with schema and metadata
1163
+ * - Parquet file generation with compression
1164
+ *
1165
+ * @example
1166
+ * ```typescript
1167
+ * const transformer = new ParquetTransformer({ compression: 'snappy' })
1168
+ *
1169
+ * // Transform single event to row
1170
+ * const row = transformer.eventToRow(event)
1171
+ *
1172
+ * // Transform batch of events
1173
+ * const batch = transformer.eventsToBatch(events)
1174
+ *
1175
+ * // Generate Parquet file
1176
+ * const buffer = await transformer.toParquetBuffer(batch)
1177
+ * await r2.put('events.parquet', buffer)
1178
+ * ```
1179
+ *
1180
+ * @class ParquetTransformer
222
1181
  */
223
1182
  export declare class ParquetTransformer {
1183
+ /**
1184
+ * Compression algorithm to use.
1185
+ * @private
1186
+ */
224
1187
  private readonly compression;
1188
+ /**
1189
+ * Creates a new ParquetTransformer.
1190
+ *
1191
+ * @param options - Transformer configuration
1192
+ */
225
1193
  constructor(options?: ParquetTransformerOptions);
1194
+ /**
1195
+ * Converts a CDC event to a Parquet row.
1196
+ *
1197
+ * @description
1198
+ * Flattens the event structure and serializes the payload to JSON
1199
+ * for storage in Parquet format.
1200
+ *
1201
+ * @param event - The CDC event to convert
1202
+ * @returns A Parquet row representation
1203
+ *
1204
+ * @example
1205
+ * ```typescript
1206
+ * const row = transformer.eventToRow(event)
1207
+ * console.log(row.event_id, row.event_type, row.sha)
1208
+ * ```
1209
+ */
226
1210
  eventToRow(event: CDCEvent): ParquetRow;
1211
+ /**
1212
+ * Converts multiple CDC events to a Parquet batch.
1213
+ *
1214
+ * @description
1215
+ * Transforms an array of events into a ParquetBatch structure
1216
+ * ready for serialization to Parquet format.
1217
+ *
1218
+ * @param events - Array of CDC events to batch
1219
+ * @returns A ParquetBatch ready for serialization
1220
+ *
1221
+ * @example
1222
+ * ```typescript
1223
+ * const batch = transformer.eventsToBatch(events)
1224
+ * console.log(`Batch has ${batch.rowCount} rows`)
1225
+ * ```
1226
+ */
227
1227
  eventsToBatch(events: CDCEvent[]): ParquetBatch;
1228
+ /**
1229
+ * Serializes a ParquetBatch to a Parquet file buffer.
1230
+ *
1231
+ * @description
1232
+ * Generates a Parquet-format file from the batch data. The output
1233
+ * includes PAR1 magic bytes, compressed data, and footer metadata.
1234
+ *
1235
+ * @param batch - The ParquetBatch to serialize
1236
+ * @returns Promise resolving to Parquet file as Uint8Array
1237
+ *
1238
+ * @example
1239
+ * ```typescript
1240
+ * const buffer = await transformer.toParquetBuffer(batch)
1241
+ * await r2.put('events.parquet', buffer)
1242
+ * ```
1243
+ */
228
1244
  toParquetBuffer(batch: ParquetBatch): Promise<Uint8Array>;
229
1245
  private gzipCompress;
230
1246
  private simpleCompress;
231
1247
  }
1248
+ /**
1249
+ * Callback function for batch processing.
1250
+ *
1251
+ * @param batch - The batch result containing events and metadata
1252
+ * @returns void or a Promise that resolves when processing is complete
1253
+ */
232
1254
  type BatchHandler = (batch: BatchResult) => void | Promise<void>;
233
1255
  /**
234
- * Batches CDC events for efficient processing
1256
+ * Batches CDC events for efficient processing.
1257
+ *
1258
+ * @description
1259
+ * CDCBatcher collects CDC events and groups them into batches based on
1260
+ * count or time thresholds. This enables efficient downstream processing
1261
+ * by reducing the number of I/O operations and enabling bulk operations.
1262
+ *
1263
+ * **Batching Strategies:**
1264
+ * - **Count-based**: Flush when batch reaches `batchSize` events
1265
+ * - **Time-based**: Flush after `flushIntervalMs` even if batch is not full
1266
+ *
1267
+ * **Features:**
1268
+ * - Async batch handlers for non-blocking processing
1269
+ * - Multiple handlers for parallel processing pipelines
1270
+ * - Graceful stop with pending event flush
1271
+ * - Batch metadata (sequences, timestamps) for tracking
1272
+ *
1273
+ * @example
1274
+ * ```typescript
1275
+ * const batcher = new CDCBatcher({
1276
+ * batchSize: 100,
1277
+ * flushIntervalMs: 5000
1278
+ * })
1279
+ *
1280
+ * // Register batch handler
1281
+ * batcher.onBatch(async (batch) => {
1282
+ * console.log(`Processing ${batch.eventCount} events`)
1283
+ * console.log(`Sequence range: ${batch.minSequence} - ${batch.maxSequence}`)
1284
+ * await saveToStorage(batch.events)
1285
+ * })
1286
+ *
1287
+ * // Add events
1288
+ * await batcher.add(event1)
1289
+ * await batcher.add(event2)
1290
+ *
1291
+ * // Check pending events
1292
+ * console.log(`Pending: ${batcher.getPendingCount()}`)
1293
+ *
1294
+ * // Manual flush
1295
+ * const result = await batcher.flush()
1296
+ *
1297
+ * // Stop the batcher
1298
+ * await batcher.stop()
1299
+ * ```
1300
+ *
1301
+ * @class CDCBatcher
235
1302
  */
236
1303
  export declare class CDCBatcher {
1304
+ /**
1305
+ * Batch configuration.
1306
+ * @private
1307
+ */
237
1308
  private readonly config;
1309
+ /**
1310
+ * Buffer of pending events.
1311
+ * @private
1312
+ */
238
1313
  private events;
1314
+ /**
1315
+ * Registered batch handlers.
1316
+ * @private
1317
+ */
239
1318
  private batchHandlers;
1319
+ /**
1320
+ * Timer for time-based flushing.
1321
+ * @private
1322
+ */
240
1323
  private flushTimer;
1324
+ /**
1325
+ * Whether the batcher has been stopped.
1326
+ * @private
1327
+ */
241
1328
  private stopped;
1329
+ /**
1330
+ * Creates a new CDCBatcher.
1331
+ *
1332
+ * @param config - Batch configuration
1333
+ */
242
1334
  constructor(config: BatchConfig);
243
1335
  private ensureTimerRunning;
244
1336
  private clearFlushTimer;
1337
+ /**
1338
+ * Adds an event to the batch.
1339
+ *
1340
+ * @description
1341
+ * Adds the event to the pending batch. If the batch reaches the
1342
+ * configured size, it is automatically flushed. The flush timer
1343
+ * is started/restarted as needed.
1344
+ *
1345
+ * @param event - The CDC event to add
1346
+ *
1347
+ * @example
1348
+ * ```typescript
1349
+ * await batcher.add(event)
1350
+ * ```
1351
+ */
245
1352
  add(event: CDCEvent): Promise<void>;
1353
+ /**
1354
+ * Internal flush implementation.
1355
+ * @private
1356
+ */
246
1357
  private flushInternal;
1358
+ /**
1359
+ * Manually flushes pending events.
1360
+ *
1361
+ * @description
1362
+ * Forces an immediate flush of all pending events, regardless of
1363
+ * batch size or timer. Clears the flush timer.
1364
+ *
1365
+ * @returns Promise resolving to the batch result
1366
+ *
1367
+ * @example
1368
+ * ```typescript
1369
+ * const result = await batcher.flush()
1370
+ * console.log(`Flushed ${result.eventCount} events`)
1371
+ * ```
1372
+ */
247
1373
  flush(): Promise<BatchResult>;
1374
+ /**
1375
+ * Returns the number of pending events.
1376
+ *
1377
+ * @returns Number of events waiting to be flushed
1378
+ */
248
1379
  getPendingCount(): number;
1380
+ /**
1381
+ * Registers a batch handler.
1382
+ *
1383
+ * @description
1384
+ * Handlers are called when a batch is flushed (automatically or manually).
1385
+ * Multiple handlers can be registered for parallel processing.
1386
+ *
1387
+ * @param handler - Callback function to invoke for each batch
1388
+ *
1389
+ * @example
1390
+ * ```typescript
1391
+ * batcher.onBatch(async (batch) => {
1392
+ * await saveToStorage(batch.events)
1393
+ * })
1394
+ * ```
1395
+ */
249
1396
  onBatch(handler: BatchHandler): void;
1397
+ /**
1398
+ * Stops the batcher.
1399
+ *
1400
+ * @description
1401
+ * Stops the flush timer and prevents further processing.
1402
+ * Does NOT automatically flush pending events - call flush() first
1403
+ * if you need to process remaining events.
1404
+ *
1405
+ * @example
1406
+ * ```typescript
1407
+ * await batcher.flush() // Process remaining events
1408
+ * await batcher.stop() // Stop the timer
1409
+ * ```
1410
+ */
250
1411
  stop(): Promise<void>;
251
1412
  }
1413
+ /**
1414
+ * Callback for successful batch output.
1415
+ *
1416
+ * @param output - The pipeline output containing Parquet data
1417
+ */
252
1418
  type OutputHandler = (output: PipelineOutput) => void;
1419
+ /**
1420
+ * Callback for failed events sent to dead letter queue.
1421
+ *
1422
+ * @param events - Array of failed events
1423
+ * @param error - The error that caused the failure
1424
+ */
253
1425
  type DeadLetterHandler = (events: CDCEvent[], error: Error) => void;
254
1426
  /**
255
- * Main CDC Pipeline for processing git operation events
1427
+ * Main CDC Pipeline for processing git operation events.
1428
+ *
1429
+ * @description
1430
+ * CDCPipeline orchestrates the complete change data capture flow from
1431
+ * event ingestion to Parquet output. It integrates batching, transformation,
1432
+ * retry handling, and dead letter queue management.
1433
+ *
1434
+ * **Pipeline Flow:**
1435
+ * 1. Events are submitted via `process()` or `processMany()`
1436
+ * 2. Events are validated and added to the batcher
1437
+ * 3. When a batch is ready, it's transformed to Parquet format
1438
+ * 4. On success, output handlers are notified
1439
+ * 5. On failure, retries are attempted with exponential backoff
1440
+ * 6. After max retries, events go to dead letter queue
1441
+ *
1442
+ * **Features:**
1443
+ * - Configurable batch size and flush interval
1444
+ * - Automatic retry with exponential backoff
1445
+ * - Dead letter queue for failed events
1446
+ * - Real-time metrics for monitoring
1447
+ * - Graceful shutdown with pending event flush
1448
+ *
1449
+ * @example
1450
+ * ```typescript
1451
+ * const pipeline = new CDCPipeline({
1452
+ * batchSize: 100,
1453
+ * flushIntervalMs: 5000,
1454
+ * maxRetries: 3,
1455
+ * parquetCompression: 'snappy',
1456
+ * outputPath: '/analytics',
1457
+ * schemaVersion: 1
1458
+ * })
1459
+ *
1460
+ * // Register handlers
1461
+ * pipeline.onOutput(async (output) => {
1462
+ * await r2.put(`cdc/${output.batchId}.parquet`, output.parquetBuffer)
1463
+ * })
1464
+ *
1465
+ * pipeline.onDeadLetter((events, error) => {
1466
+ * console.error(`Failed ${events.length} events:`, error)
1467
+ * })
1468
+ *
1469
+ * // Start the pipeline
1470
+ * await pipeline.start()
1471
+ *
1472
+ * // Process events
1473
+ * await pipeline.process(event)
1474
+ *
1475
+ * // Check metrics
1476
+ * const metrics = pipeline.getMetrics()
1477
+ *
1478
+ * // Stop gracefully
1479
+ * const result = await pipeline.stop()
1480
+ * console.log(`Flushed ${result.flushedCount} events on shutdown`)
1481
+ * ```
1482
+ *
1483
+ * @class CDCPipeline
256
1484
  */
257
1485
  export declare class CDCPipeline {
1486
+ /**
1487
+ * Pipeline configuration.
1488
+ * @private
1489
+ */
258
1490
  private readonly config;
1491
+ /**
1492
+ * Current pipeline state.
1493
+ * @private
1494
+ */
259
1495
  private state;
1496
+ /**
1497
+ * Event batcher instance.
1498
+ * @private
1499
+ */
260
1500
  private batcher;
1501
+ /**
1502
+ * Parquet transformer instance.
1503
+ * @private
1504
+ */
261
1505
  private transformer;
1506
+ /**
1507
+ * Registered output handlers.
1508
+ * @private
1509
+ */
262
1510
  private outputHandlers;
1511
+ /**
1512
+ * Registered dead letter handlers.
1513
+ * @private
1514
+ */
263
1515
  private deadLetterHandlers;
1516
+ /**
1517
+ * Pipeline metrics.
1518
+ * @private
1519
+ */
264
1520
  private metrics;
1521
+ /**
1522
+ * Processing latency samples.
1523
+ * @private
1524
+ */
265
1525
  private processingLatencies;
1526
+ /**
1527
+ * Retry policy instance.
1528
+ * @private
1529
+ */
266
1530
  private retryPolicy;
1531
+ /**
1532
+ * Creates a new CDCPipeline.
1533
+ *
1534
+ * @param config - Pipeline configuration
1535
+ */
267
1536
  constructor(config: CDCPipelineConfig);
1537
+ /**
1538
+ * Returns the current pipeline state.
1539
+ *
1540
+ * @returns Current state ('stopped', 'running', or 'paused')
1541
+ */
268
1542
  getState(): CDCPipelineState;
1543
+ /**
1544
+ * Starts the pipeline.
1545
+ *
1546
+ * @description
1547
+ * Initializes the batcher and begins accepting events. If already
1548
+ * running, this method is a no-op.
1549
+ *
1550
+ * @example
1551
+ * ```typescript
1552
+ * await pipeline.start()
1553
+ * console.log(pipeline.getState()) // 'running'
1554
+ * ```
1555
+ */
269
1556
  start(): Promise<void>;
1557
+ /**
1558
+ * Stops the pipeline.
1559
+ *
1560
+ * @description
1561
+ * Flushes any pending events, stops the batcher, and sets state to stopped.
1562
+ * Returns information about events flushed during shutdown.
1563
+ *
1564
+ * @returns Promise resolving to stop result with flushed event count
1565
+ *
1566
+ * @example
1567
+ * ```typescript
1568
+ * const result = await pipeline.stop()
1569
+ * console.log(`Flushed ${result.flushedCount} events on shutdown`)
1570
+ * ```
1571
+ */
270
1572
  stop(): Promise<StopResult>;
1573
+ /**
1574
+ * Processes a single event.
1575
+ *
1576
+ * @description
1577
+ * Validates the event and adds it to the batcher for processing.
1578
+ * Updates metrics including latency tracking.
1579
+ *
1580
+ * @param event - The CDC event to process
1581
+ * @returns Promise resolving to process result
1582
+ *
1583
+ * @throws {CDCError} PROCESSING_ERROR - If pipeline is not running
1584
+ * @throws {CDCError} VALIDATION_ERROR - If event fails validation
1585
+ *
1586
+ * @example
1587
+ * ```typescript
1588
+ * const result = await pipeline.process(event)
1589
+ * if (result.success) {
1590
+ * console.log(`Processed event: ${result.eventId}`)
1591
+ * }
1592
+ * ```
1593
+ */
271
1594
  process(event: CDCEvent): Promise<ProcessResult>;
1595
+ /**
1596
+ * Processes multiple events.
1597
+ *
1598
+ * @description
1599
+ * Convenience method to process an array of events sequentially.
1600
+ *
1601
+ * @param events - Array of CDC events to process
1602
+ * @returns Promise resolving to array of process results
1603
+ *
1604
+ * @example
1605
+ * ```typescript
1606
+ * const results = await pipeline.processMany(events)
1607
+ * const successCount = results.filter(r => r.success).length
1608
+ * console.log(`Processed ${successCount}/${events.length} events`)
1609
+ * ```
1610
+ */
272
1611
  processMany(events: CDCEvent[]): Promise<ProcessResult[]>;
1612
+ /**
1613
+ * Manually flushes pending events.
1614
+ *
1615
+ * @description
1616
+ * Forces an immediate flush of the batcher and processes the
1617
+ * resulting batch through the pipeline.
1618
+ *
1619
+ * @example
1620
+ * ```typescript
1621
+ * await pipeline.flush()
1622
+ * console.log('All pending events flushed')
1623
+ * ```
1624
+ */
273
1625
  flush(): Promise<void>;
1626
+ /**
1627
+ * Handles a batch of events with retry logic.
1628
+ * @private
1629
+ */
274
1630
  private handleBatch;
1631
+ /**
1632
+ * Sleeps for the specified duration.
1633
+ * @private
1634
+ */
275
1635
  private sleep;
1636
+ /**
1637
+ * Updates the average latency metric.
1638
+ * @private
1639
+ */
276
1640
  private updateAvgLatency;
1641
+ /**
1642
+ * Returns current pipeline metrics.
1643
+ *
1644
+ * @description
1645
+ * Returns a copy of the current metrics. Metrics are cumulative
1646
+ * since pipeline creation.
1647
+ *
1648
+ * @returns Copy of current pipeline metrics
1649
+ *
1650
+ * @example
1651
+ * ```typescript
1652
+ * const metrics = pipeline.getMetrics()
1653
+ * console.log(`Processed: ${metrics.eventsProcessed}`)
1654
+ * console.log(`Batches: ${metrics.batchesGenerated}`)
1655
+ * console.log(`Errors: ${metrics.errors}`)
1656
+ * console.log(`Avg latency: ${metrics.avgProcessingLatencyMs}ms`)
1657
+ * ```
1658
+ */
277
1659
  getMetrics(): PipelineMetrics;
1660
+ /**
1661
+ * Registers an output handler.
1662
+ *
1663
+ * @description
1664
+ * Output handlers are called when a batch is successfully processed
1665
+ * and converted to Parquet format. Multiple handlers can be registered.
1666
+ *
1667
+ * @param handler - Callback to invoke for each successful batch
1668
+ *
1669
+ * @example
1670
+ * ```typescript
1671
+ * pipeline.onOutput(async (output) => {
1672
+ * await r2.put(`cdc/${output.batchId}.parquet`, output.parquetBuffer)
1673
+ * console.log(`Wrote ${output.events.length} events`)
1674
+ * })
1675
+ * ```
1676
+ */
278
1677
  onOutput(handler: OutputHandler): void;
1678
+ /**
1679
+ * Registers a dead letter handler.
1680
+ *
1681
+ * @description
1682
+ * Dead letter handlers are called when a batch fails after all
1683
+ * retry attempts are exhausted. Use this for alerting, logging,
1684
+ * or storing failed events for later reprocessing.
1685
+ *
1686
+ * @param handler - Callback to invoke for failed events
1687
+ *
1688
+ * @example
1689
+ * ```typescript
1690
+ * pipeline.onDeadLetter((events, error) => {
1691
+ * console.error(`Failed to process ${events.length} events:`, error)
1692
+ * // Store in dead letter queue for later retry
1693
+ * await dlq.put(events)
1694
+ * })
1695
+ * ```
1696
+ */
279
1697
  onDeadLetter(handler: DeadLetterHandler): void;
280
1698
  }
281
1699
  /**
282
- * Create a new CDC event
1700
+ * Creates a new CDC event.
1701
+ *
1702
+ * @description
1703
+ * Factory function to create a properly structured CDC event with
1704
+ * automatically generated ID and timestamp.
1705
+ *
1706
+ * @param type - The event type
1707
+ * @param source - The event source
1708
+ * @param payload - Event payload data
1709
+ * @param options - Optional configuration
1710
+ * @param options.sequence - Custom sequence number (default: 0)
1711
+ * @returns A new CDCEvent
1712
+ *
1713
+ * @example
1714
+ * ```typescript
1715
+ * const event = createCDCEvent('COMMIT_CREATED', 'push', {
1716
+ * operation: 'commit-create',
1717
+ * sha: 'abc123...',
1718
+ * treeSha: 'def456...',
1719
+ * parentShas: ['parent1...']
1720
+ * })
1721
+ *
1722
+ * // With sequence number
1723
+ * const sequencedEvent = createCDCEvent('REF_UPDATED', 'push', {
1724
+ * operation: 'ref-update',
1725
+ * refName: 'refs/heads/main',
1726
+ * oldSha: 'old...',
1727
+ * newSha: 'new...'
1728
+ * }, { sequence: 42 })
1729
+ * ```
283
1730
  */
284
1731
  export declare function createCDCEvent(type: CDCEventType, source: CDCEventSource, payload: CDCEventPayload, options?: {
285
1732
  sequence?: number;
286
1733
  }): CDCEvent;
287
1734
  /**
288
- * Serialize a CDC event to bytes
1735
+ * Serializes a CDC event to bytes.
1736
+ *
1737
+ * @description
1738
+ * Converts a CDCEvent to a JSON-encoded Uint8Array for storage or
1739
+ * transmission. Handles Uint8Array payload data by converting to arrays.
1740
+ *
1741
+ * @param event - The CDC event to serialize
1742
+ * @returns The serialized event as a Uint8Array
1743
+ *
1744
+ * @example
1745
+ * ```typescript
1746
+ * const bytes = serializeEvent(event)
1747
+ * await r2.put(`events/${event.id}`, bytes)
1748
+ * ```
1749
+ *
1750
+ * @see {@link deserializeEvent} - Reverse operation
289
1751
  */
290
1752
  export declare function serializeEvent(event: CDCEvent): Uint8Array;
291
1753
  /**
292
- * Deserialize bytes to a CDC event
1754
+ * Deserializes bytes to a CDC event.
1755
+ *
1756
+ * @description
1757
+ * Reconstructs a CDCEvent from JSON-encoded bytes. Handles Uint8Array
1758
+ * restoration for payload data that was converted to arrays during
1759
+ * serialization.
1760
+ *
1761
+ * @param bytes - The serialized event bytes
1762
+ * @returns The deserialized CDCEvent
1763
+ *
1764
+ * @example
1765
+ * ```typescript
1766
+ * const bytes = await r2.get(`events/${eventId}`)
1767
+ * const event = deserializeEvent(bytes)
1768
+ * console.log(`Event type: ${event.type}`)
1769
+ * ```
1770
+ *
1771
+ * @see {@link serializeEvent} - Reverse operation
293
1772
  */
294
1773
  export declare function deserializeEvent(bytes: Uint8Array): CDCEvent;
295
1774
  /**
296
- * Validate a CDC event
1775
+ * Validates a CDC event.
1776
+ *
1777
+ * @description
1778
+ * Checks that an event has all required fields and valid values.
1779
+ * Throws a CDCError if validation fails.
1780
+ *
1781
+ * **Validation Rules:**
1782
+ * - Event must not be null/undefined
1783
+ * - Event ID must be a non-empty string
1784
+ * - Event type must be a valid CDCEventType
1785
+ * - Timestamp must be a non-negative number
1786
+ * - Sequence must be a non-negative number
1787
+ *
1788
+ * @param event - The CDC event to validate
1789
+ * @returns The validated event (for chaining)
1790
+ *
1791
+ * @throws {CDCError} VALIDATION_ERROR - If validation fails
1792
+ *
1793
+ * @example
1794
+ * ```typescript
1795
+ * try {
1796
+ * validateCDCEvent(event)
1797
+ * // Event is valid
1798
+ * } catch (error) {
1799
+ * if (error instanceof CDCError) {
1800
+ * console.log(`Invalid: ${error.message}`)
1801
+ * }
1802
+ * }
1803
+ * ```
297
1804
  */
298
1805
  export declare function validateCDCEvent(event: CDCEvent): CDCEvent;
299
1806
  /**
300
- * Start a pipeline with the given configuration
1807
+ * Starts a new pipeline with the given configuration.
1808
+ *
1809
+ * @description
1810
+ * Creates and starts a new CDCPipeline, registering it by ID for
1811
+ * later access. If a pipeline with the same ID already exists,
1812
+ * it will be replaced (the old pipeline is not automatically stopped).
1813
+ *
1814
+ * @param id - Unique identifier for the pipeline
1815
+ * @param config - Pipeline configuration
1816
+ * @returns The started pipeline instance
1817
+ *
1818
+ * @example
1819
+ * ```typescript
1820
+ * const pipeline = startPipeline('main', {
1821
+ * batchSize: 100,
1822
+ * flushIntervalMs: 5000,
1823
+ * maxRetries: 3,
1824
+ * parquetCompression: 'snappy',
1825
+ * outputPath: '/analytics',
1826
+ * schemaVersion: 1
1827
+ * })
1828
+ *
1829
+ * // Register handlers
1830
+ * pipeline.onOutput((output) => console.log(`Batch: ${output.batchId}`))
1831
+ * ```
301
1832
  */
302
1833
  export declare function startPipeline(id: string, config: CDCPipelineConfig): CDCPipeline;
303
1834
  /**
304
- * Stop a pipeline by ID
1835
+ * Stops a pipeline by ID.
1836
+ *
1837
+ * @description
1838
+ * Stops the pipeline identified by the given ID, flushing any pending
1839
+ * events and removing it from the registry.
1840
+ *
1841
+ * @param id - Pipeline identifier
1842
+ * @returns Promise resolving to stop result (0 if pipeline not found)
1843
+ *
1844
+ * @example
1845
+ * ```typescript
1846
+ * const result = await stopPipeline('main')
1847
+ * console.log(`Flushed ${result.flushedCount} events on shutdown`)
1848
+ * ```
305
1849
  */
306
1850
  export declare function stopPipeline(id: string): Promise<StopResult>;
307
1851
  /**
308
- * Flush a pipeline by ID
1852
+ * Flushes a pipeline by ID.
1853
+ *
1854
+ * @description
1855
+ * Forces an immediate flush of all pending events in the pipeline.
1856
+ * No-op if pipeline not found.
1857
+ *
1858
+ * @param id - Pipeline identifier
1859
+ *
1860
+ * @example
1861
+ * ```typescript
1862
+ * await flushPipeline('main')
1863
+ * console.log('All pending events flushed')
1864
+ * ```
309
1865
  */
310
1866
  export declare function flushPipeline(id: string): Promise<void>;
311
1867
  /**
312
- * Get metrics for a pipeline by ID
1868
+ * Gets metrics for a pipeline by ID.
1869
+ *
1870
+ * @description
1871
+ * Returns a copy of the current metrics for the specified pipeline.
1872
+ * Returns null if the pipeline is not found.
1873
+ *
1874
+ * @param id - Pipeline identifier
1875
+ * @returns Pipeline metrics or null if not found
1876
+ *
1877
+ * @example
1878
+ * ```typescript
1879
+ * const metrics = getPipelineMetrics('main')
1880
+ * if (metrics) {
1881
+ * console.log(`Events processed: ${metrics.eventsProcessed}`)
1882
+ * console.log(`Errors: ${metrics.errors}`)
1883
+ * }
1884
+ * ```
313
1885
  */
314
1886
  export declare function getPipelineMetrics(id: string): PipelineMetrics | null;
315
1887
  export {};