gitx.do 0.0.1 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/commands/blame.d.ts +259 -0
- package/dist/cli/commands/blame.d.ts.map +1 -0
- package/dist/cli/commands/blame.js +609 -0
- package/dist/cli/commands/blame.js.map +1 -0
- package/dist/cli/commands/branch.d.ts +249 -0
- package/dist/cli/commands/branch.d.ts.map +1 -0
- package/dist/cli/commands/branch.js +693 -0
- package/dist/cli/commands/branch.js.map +1 -0
- package/dist/cli/commands/commit.d.ts +182 -0
- package/dist/cli/commands/commit.d.ts.map +1 -0
- package/dist/cli/commands/commit.js +437 -0
- package/dist/cli/commands/commit.js.map +1 -0
- package/dist/cli/commands/diff.d.ts +464 -0
- package/dist/cli/commands/diff.d.ts.map +1 -0
- package/dist/cli/commands/diff.js +958 -0
- package/dist/cli/commands/diff.js.map +1 -0
- package/dist/cli/commands/log.d.ts +239 -0
- package/dist/cli/commands/log.d.ts.map +1 -0
- package/dist/cli/commands/log.js +535 -0
- package/dist/cli/commands/log.js.map +1 -0
- package/dist/cli/commands/review.d.ts +457 -0
- package/dist/cli/commands/review.d.ts.map +1 -0
- package/dist/cli/commands/review.js +533 -0
- package/dist/cli/commands/review.js.map +1 -0
- package/dist/cli/commands/status.d.ts +269 -0
- package/dist/cli/commands/status.d.ts.map +1 -0
- package/dist/cli/commands/status.js +493 -0
- package/dist/cli/commands/status.js.map +1 -0
- package/dist/cli/commands/web.d.ts +199 -0
- package/dist/cli/commands/web.d.ts.map +1 -0
- package/dist/cli/commands/web.js +696 -0
- package/dist/cli/commands/web.js.map +1 -0
- package/dist/cli/fs-adapter.d.ts +656 -0
- package/dist/cli/fs-adapter.d.ts.map +1 -0
- package/dist/cli/fs-adapter.js +1179 -0
- package/dist/cli/fs-adapter.js.map +1 -0
- package/dist/cli/index.d.ts +387 -0
- package/dist/cli/index.d.ts.map +1 -0
- package/dist/cli/index.js +523 -0
- package/dist/cli/index.js.map +1 -0
- package/dist/cli/ui/components/DiffView.d.ts +7 -0
- package/dist/cli/ui/components/DiffView.d.ts.map +1 -0
- package/dist/cli/ui/components/DiffView.js +11 -0
- package/dist/cli/ui/components/DiffView.js.map +1 -0
- package/dist/cli/ui/components/ErrorDisplay.d.ts +6 -0
- package/dist/cli/ui/components/ErrorDisplay.d.ts.map +1 -0
- package/dist/cli/ui/components/ErrorDisplay.js +11 -0
- package/dist/cli/ui/components/ErrorDisplay.js.map +1 -0
- package/dist/cli/ui/components/FuzzySearch.d.ts +9 -0
- package/dist/cli/ui/components/FuzzySearch.d.ts.map +1 -0
- package/dist/cli/ui/components/FuzzySearch.js +12 -0
- package/dist/cli/ui/components/FuzzySearch.js.map +1 -0
- package/dist/cli/ui/components/LoadingSpinner.d.ts +6 -0
- package/dist/cli/ui/components/LoadingSpinner.d.ts.map +1 -0
- package/dist/cli/ui/components/LoadingSpinner.js +10 -0
- package/dist/cli/ui/components/LoadingSpinner.js.map +1 -0
- package/dist/cli/ui/components/NavigationList.d.ts +9 -0
- package/dist/cli/ui/components/NavigationList.d.ts.map +1 -0
- package/dist/cli/ui/components/NavigationList.js +11 -0
- package/dist/cli/ui/components/NavigationList.js.map +1 -0
- package/dist/cli/ui/components/ScrollableContent.d.ts +8 -0
- package/dist/cli/ui/components/ScrollableContent.d.ts.map +1 -0
- package/dist/cli/ui/components/ScrollableContent.js +11 -0
- package/dist/cli/ui/components/ScrollableContent.js.map +1 -0
- package/dist/cli/ui/components/index.d.ts +7 -0
- package/dist/cli/ui/components/index.d.ts.map +1 -0
- package/dist/cli/ui/components/index.js +9 -0
- package/dist/cli/ui/components/index.js.map +1 -0
- package/dist/cli/ui/terminal-ui.d.ts +52 -0
- package/dist/cli/ui/terminal-ui.d.ts.map +1 -0
- package/dist/cli/ui/terminal-ui.js +121 -0
- package/dist/cli/ui/terminal-ui.js.map +1 -0
- package/dist/durable-object/object-store.d.ts +401 -23
- package/dist/durable-object/object-store.d.ts.map +1 -1
- package/dist/durable-object/object-store.js +414 -25
- package/dist/durable-object/object-store.js.map +1 -1
- package/dist/durable-object/schema.d.ts +188 -0
- package/dist/durable-object/schema.d.ts.map +1 -1
- package/dist/durable-object/schema.js +160 -0
- package/dist/durable-object/schema.js.map +1 -1
- package/dist/durable-object/wal.d.ts +336 -31
- package/dist/durable-object/wal.d.ts.map +1 -1
- package/dist/durable-object/wal.js +272 -27
- package/dist/durable-object/wal.js.map +1 -1
- package/dist/index.d.ts +379 -3
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +379 -7
- package/dist/index.js.map +1 -1
- package/dist/mcp/adapter.d.ts +579 -38
- package/dist/mcp/adapter.d.ts.map +1 -1
- package/dist/mcp/adapter.js +426 -33
- package/dist/mcp/adapter.js.map +1 -1
- package/dist/mcp/sandbox.d.ts +532 -29
- package/dist/mcp/sandbox.d.ts.map +1 -1
- package/dist/mcp/sandbox.js +389 -22
- package/dist/mcp/sandbox.js.map +1 -1
- package/dist/mcp/sdk-adapter.d.ts +478 -56
- package/dist/mcp/sdk-adapter.d.ts.map +1 -1
- package/dist/mcp/sdk-adapter.js +346 -44
- package/dist/mcp/sdk-adapter.js.map +1 -1
- package/dist/mcp/tools.d.ts +445 -30
- package/dist/mcp/tools.d.ts.map +1 -1
- package/dist/mcp/tools.js +363 -33
- package/dist/mcp/tools.js.map +1 -1
- package/dist/ops/blame.d.ts +424 -21
- package/dist/ops/blame.d.ts.map +1 -1
- package/dist/ops/blame.js +303 -20
- package/dist/ops/blame.js.map +1 -1
- package/dist/ops/branch.d.ts +583 -32
- package/dist/ops/branch.d.ts.map +1 -1
- package/dist/ops/branch.js +365 -23
- package/dist/ops/branch.js.map +1 -1
- package/dist/ops/commit-traversal.d.ts +164 -24
- package/dist/ops/commit-traversal.d.ts.map +1 -1
- package/dist/ops/commit-traversal.js +68 -2
- package/dist/ops/commit-traversal.js.map +1 -1
- package/dist/ops/commit.d.ts +387 -53
- package/dist/ops/commit.d.ts.map +1 -1
- package/dist/ops/commit.js +249 -29
- package/dist/ops/commit.js.map +1 -1
- package/dist/ops/merge-base.d.ts +195 -21
- package/dist/ops/merge-base.d.ts.map +1 -1
- package/dist/ops/merge-base.js +122 -12
- package/dist/ops/merge-base.js.map +1 -1
- package/dist/ops/merge.d.ts +600 -130
- package/dist/ops/merge.d.ts.map +1 -1
- package/dist/ops/merge.js +408 -60
- package/dist/ops/merge.js.map +1 -1
- package/dist/ops/tag.d.ts +67 -2
- package/dist/ops/tag.d.ts.map +1 -1
- package/dist/ops/tag.js +42 -1
- package/dist/ops/tag.js.map +1 -1
- package/dist/ops/tree-builder.d.ts +102 -6
- package/dist/ops/tree-builder.d.ts.map +1 -1
- package/dist/ops/tree-builder.js +30 -5
- package/dist/ops/tree-builder.js.map +1 -1
- package/dist/ops/tree-diff.d.ts +50 -2
- package/dist/ops/tree-diff.d.ts.map +1 -1
- package/dist/ops/tree-diff.js +50 -2
- package/dist/ops/tree-diff.js.map +1 -1
- package/dist/pack/delta.d.ts +211 -39
- package/dist/pack/delta.d.ts.map +1 -1
- package/dist/pack/delta.js +232 -46
- package/dist/pack/delta.js.map +1 -1
- package/dist/pack/format.d.ts +390 -28
- package/dist/pack/format.d.ts.map +1 -1
- package/dist/pack/format.js +344 -33
- package/dist/pack/format.js.map +1 -1
- package/dist/pack/full-generation.d.ts +313 -28
- package/dist/pack/full-generation.d.ts.map +1 -1
- package/dist/pack/full-generation.js +238 -19
- package/dist/pack/full-generation.js.map +1 -1
- package/dist/pack/generation.d.ts +346 -23
- package/dist/pack/generation.d.ts.map +1 -1
- package/dist/pack/generation.js +269 -21
- package/dist/pack/generation.js.map +1 -1
- package/dist/pack/index.d.ts +407 -86
- package/dist/pack/index.d.ts.map +1 -1
- package/dist/pack/index.js +351 -70
- package/dist/pack/index.js.map +1 -1
- package/dist/refs/branch.d.ts +517 -71
- package/dist/refs/branch.d.ts.map +1 -1
- package/dist/refs/branch.js +410 -26
- package/dist/refs/branch.js.map +1 -1
- package/dist/refs/storage.d.ts +610 -57
- package/dist/refs/storage.d.ts.map +1 -1
- package/dist/refs/storage.js +481 -29
- package/dist/refs/storage.js.map +1 -1
- package/dist/refs/tag.d.ts +677 -67
- package/dist/refs/tag.d.ts.map +1 -1
- package/dist/refs/tag.js +497 -30
- package/dist/refs/tag.js.map +1 -1
- package/dist/storage/lru-cache.d.ts +556 -53
- package/dist/storage/lru-cache.d.ts.map +1 -1
- package/dist/storage/lru-cache.js +439 -36
- package/dist/storage/lru-cache.js.map +1 -1
- package/dist/storage/object-index.d.ts +483 -38
- package/dist/storage/object-index.d.ts.map +1 -1
- package/dist/storage/object-index.js +388 -22
- package/dist/storage/object-index.js.map +1 -1
- package/dist/storage/r2-pack.d.ts +957 -94
- package/dist/storage/r2-pack.d.ts.map +1 -1
- package/dist/storage/r2-pack.js +756 -48
- package/dist/storage/r2-pack.js.map +1 -1
- package/dist/tiered/cdc-pipeline.d.ts +1610 -38
- package/dist/tiered/cdc-pipeline.d.ts.map +1 -1
- package/dist/tiered/cdc-pipeline.js +1131 -22
- package/dist/tiered/cdc-pipeline.js.map +1 -1
- package/dist/tiered/migration.d.ts +903 -41
- package/dist/tiered/migration.d.ts.map +1 -1
- package/dist/tiered/migration.js +646 -24
- package/dist/tiered/migration.js.map +1 -1
- package/dist/tiered/parquet-writer.d.ts +944 -47
- package/dist/tiered/parquet-writer.d.ts.map +1 -1
- package/dist/tiered/parquet-writer.js +667 -39
- package/dist/tiered/parquet-writer.js.map +1 -1
- package/dist/tiered/read-path.d.ts +728 -34
- package/dist/tiered/read-path.d.ts.map +1 -1
- package/dist/tiered/read-path.js +310 -27
- package/dist/tiered/read-path.js.map +1 -1
- package/dist/types/objects.d.ts +457 -0
- package/dist/types/objects.d.ts.map +1 -1
- package/dist/types/objects.js +305 -4
- package/dist/types/objects.js.map +1 -1
- package/dist/types/storage.d.ts +407 -35
- package/dist/types/storage.d.ts.map +1 -1
- package/dist/types/storage.js +27 -3
- package/dist/types/storage.js.map +1 -1
- package/dist/utils/hash.d.ts +133 -12
- package/dist/utils/hash.d.ts.map +1 -1
- package/dist/utils/hash.js +133 -12
- package/dist/utils/hash.js.map +1 -1
- package/dist/utils/sha1.d.ts +102 -9
- package/dist/utils/sha1.d.ts.map +1 -1
- package/dist/utils/sha1.js +114 -11
- package/dist/utils/sha1.js.map +1 -1
- package/dist/wire/capabilities.d.ts +896 -88
- package/dist/wire/capabilities.d.ts.map +1 -1
- package/dist/wire/capabilities.js +566 -62
- package/dist/wire/capabilities.js.map +1 -1
- package/dist/wire/pkt-line.d.ts +293 -15
- package/dist/wire/pkt-line.d.ts.map +1 -1
- package/dist/wire/pkt-line.js +251 -15
- package/dist/wire/pkt-line.js.map +1 -1
- package/dist/wire/receive-pack.d.ts +814 -64
- package/dist/wire/receive-pack.d.ts.map +1 -1
- package/dist/wire/receive-pack.js +542 -41
- package/dist/wire/receive-pack.js.map +1 -1
- package/dist/wire/smart-http.d.ts +575 -97
- package/dist/wire/smart-http.d.ts.map +1 -1
- package/dist/wire/smart-http.js +337 -46
- package/dist/wire/smart-http.js.map +1 -1
- package/dist/wire/upload-pack.d.ts +492 -98
- package/dist/wire/upload-pack.d.ts.map +1 -1
- package/dist/wire/upload-pack.js +347 -59
- package/dist/wire/upload-pack.js.map +1 -1
- package/package.json +10 -2
|
@@ -1,23 +1,131 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* CDC (Change Data Capture) Pipeline for Git Operations
|
|
2
|
+
* @fileoverview CDC (Change Data Capture) Pipeline for Git Operations
|
|
3
3
|
*
|
|
4
|
-
*
|
|
5
|
-
*
|
|
6
|
-
* -
|
|
7
|
-
* - Batching with size and time-based flushing
|
|
8
|
-
* - Error handling with retry policies
|
|
4
|
+
* @description
|
|
5
|
+
* This module provides a comprehensive Change Data Capture system for Git operations,
|
|
6
|
+
* enabling real-time event streaming, transformation, and analytics for Git repository events.
|
|
9
7
|
*
|
|
10
|
-
*
|
|
8
|
+
* ## Key Features
|
|
9
|
+
*
|
|
10
|
+
* - **Event Capture**: Captures git operations (push, fetch, commits, branches, tags, merges)
|
|
11
|
+
* - **Parquet Transformation**: Converts events to columnar Parquet format for analytics
|
|
12
|
+
* - **Batching**: Efficient event batching with configurable size and time-based flushing
|
|
13
|
+
* - **Retry Policies**: Configurable exponential backoff with jitter for resilient processing
|
|
14
|
+
* - **Dead Letter Queue**: Handles failed events for later reprocessing
|
|
15
|
+
* - **Metrics**: Built-in tracking for events processed, batches, errors, and latency
|
|
16
|
+
*
|
|
17
|
+
* ## Architecture
|
|
18
|
+
*
|
|
19
|
+
* The pipeline consists of several components:
|
|
20
|
+
* 1. **CDCEventCapture**: Captures git operations and converts them to CDCEvents
|
|
21
|
+
* 2. **CDCBatcher**: Batches events for efficient processing
|
|
22
|
+
* 3. **ParquetTransformer**: Transforms events to Parquet format
|
|
23
|
+
* 4. **CDCPipeline**: Orchestrates the entire flow with error handling
|
|
24
|
+
*
|
|
25
|
+
* ## Event Flow
|
|
26
|
+
*
|
|
27
|
+
* ```
|
|
28
|
+
* Git Operation -> CDCEventCapture -> CDCBatcher -> ParquetTransformer -> Output
|
|
29
|
+
* |
|
|
30
|
+
* v
|
|
31
|
+
* (On failure) Dead Letter Queue
|
|
32
|
+
* ```
|
|
33
|
+
*
|
|
34
|
+
* @module tiered/cdc-pipeline
|
|
35
|
+
*
|
|
36
|
+
* @example
|
|
37
|
+
* ```typescript
|
|
38
|
+
* // Create and start a pipeline
|
|
39
|
+
* const pipeline = new CDCPipeline({
|
|
40
|
+
* batchSize: 100,
|
|
41
|
+
* flushIntervalMs: 5000,
|
|
42
|
+
* maxRetries: 3,
|
|
43
|
+
* parquetCompression: 'snappy',
|
|
44
|
+
* outputPath: '/analytics',
|
|
45
|
+
* schemaVersion: 1
|
|
46
|
+
* })
|
|
47
|
+
*
|
|
48
|
+
* await pipeline.start()
|
|
49
|
+
*
|
|
50
|
+
* // Process events
|
|
51
|
+
* pipeline.onOutput((output) => {
|
|
52
|
+
* console.log(`Generated batch: ${output.batchId}`)
|
|
53
|
+
* console.log(`Events: ${output.events.length}`)
|
|
54
|
+
* console.log(`Parquet size: ${output.parquetBuffer.length} bytes`)
|
|
55
|
+
* })
|
|
56
|
+
*
|
|
57
|
+
* pipeline.onDeadLetter((events, error) => {
|
|
58
|
+
* console.error(`Failed events: ${events.length}`, error)
|
|
59
|
+
* })
|
|
60
|
+
*
|
|
61
|
+
* // Create and process an event
|
|
62
|
+
* const event = createCDCEvent('COMMIT_CREATED', 'push', {
|
|
63
|
+
* operation: 'commit-create',
|
|
64
|
+
* sha: 'abc123...',
|
|
65
|
+
* treeSha: 'def456...',
|
|
66
|
+
* parentShas: ['parent1...']
|
|
67
|
+
* })
|
|
68
|
+
*
|
|
69
|
+
* await pipeline.process(event)
|
|
70
|
+
*
|
|
71
|
+
* // Get metrics
|
|
72
|
+
* const metrics = pipeline.getMetrics()
|
|
73
|
+
* console.log(`Processed: ${metrics.eventsProcessed}`)
|
|
74
|
+
* console.log(`Batches: ${metrics.batchesGenerated}`)
|
|
75
|
+
*
|
|
76
|
+
* // Stop the pipeline
|
|
77
|
+
* await pipeline.stop()
|
|
78
|
+
* ```
|
|
79
|
+
*
|
|
80
|
+
* @see {@link CDCPipeline} - Main pipeline orchestration class
|
|
81
|
+
* @see {@link CDCEventCapture} - Event capture from git operations
|
|
82
|
+
* @see {@link ParquetTransformer} - Parquet format transformation
|
|
11
83
|
*/
|
|
12
84
|
// ============================================================================
|
|
13
85
|
// Error Classes
|
|
14
86
|
// ============================================================================
|
|
15
87
|
/**
|
|
16
|
-
* Custom error class for CDC operations
|
|
88
|
+
* Custom error class for CDC operations.
|
|
89
|
+
*
|
|
90
|
+
* @description
|
|
91
|
+
* CDCError provides structured error information for CDC pipeline failures,
|
|
92
|
+
* including an error type for programmatic handling and optional cause for
|
|
93
|
+
* error chaining.
|
|
94
|
+
*
|
|
95
|
+
* @example
|
|
96
|
+
* ```typescript
|
|
97
|
+
* try {
|
|
98
|
+
* await pipeline.process(event)
|
|
99
|
+
* } catch (error) {
|
|
100
|
+
* if (error instanceof CDCError) {
|
|
101
|
+
* switch (error.type) {
|
|
102
|
+
* case 'VALIDATION_ERROR':
|
|
103
|
+
* console.log('Invalid event:', error.message)
|
|
104
|
+
* break
|
|
105
|
+
* case 'PROCESSING_ERROR':
|
|
106
|
+
* console.log('Processing failed:', error.message)
|
|
107
|
+
* if (error.cause) {
|
|
108
|
+
* console.log('Caused by:', error.cause.message)
|
|
109
|
+
* }
|
|
110
|
+
* break
|
|
111
|
+
* }
|
|
112
|
+
* }
|
|
113
|
+
* }
|
|
114
|
+
* ```
|
|
115
|
+
*
|
|
116
|
+
* @class CDCError
|
|
117
|
+
* @extends Error
|
|
17
118
|
*/
|
|
18
119
|
export class CDCError extends Error {
|
|
19
120
|
type;
|
|
20
121
|
cause;
|
|
122
|
+
/**
|
|
123
|
+
* Creates a new CDCError.
|
|
124
|
+
*
|
|
125
|
+
* @param type - Error type for categorization
|
|
126
|
+
* @param message - Human-readable error message
|
|
127
|
+
* @param cause - Optional underlying error that caused this error
|
|
128
|
+
*/
|
|
21
129
|
constructor(type, message, cause) {
|
|
22
130
|
super(message);
|
|
23
131
|
this.type = type;
|
|
@@ -26,16 +134,98 @@ export class CDCError extends Error {
|
|
|
26
134
|
}
|
|
27
135
|
}
|
|
28
136
|
/**
|
|
29
|
-
* Retry policy
|
|
137
|
+
* Retry policy implementing exponential backoff with optional jitter.
|
|
138
|
+
*
|
|
139
|
+
* @description
|
|
140
|
+
* Provides a robust retry mechanism for handling transient failures.
|
|
141
|
+
* Uses exponential backoff to space out retry attempts, with optional
|
|
142
|
+
* jitter to prevent synchronized retries from multiple clients.
|
|
143
|
+
*
|
|
144
|
+
* **Backoff Formula:**
|
|
145
|
+
* `delay = min(initialDelay * (multiplier ^ attempt), maxDelay)`
|
|
146
|
+
*
|
|
147
|
+
* **With Jitter:**
|
|
148
|
+
* `delay = delay * random(0.5, 1.5)`
|
|
149
|
+
*
|
|
150
|
+
* @example
|
|
151
|
+
* ```typescript
|
|
152
|
+
* const policy = new CDCRetryPolicy({
|
|
153
|
+
* maxRetries: 3,
|
|
154
|
+
* initialDelayMs: 100,
|
|
155
|
+
* maxDelayMs: 5000,
|
|
156
|
+
* backoffMultiplier: 2,
|
|
157
|
+
* jitter: true
|
|
158
|
+
* })
|
|
159
|
+
*
|
|
160
|
+
* let attempts = 0
|
|
161
|
+
* while (attempts < 10) {
|
|
162
|
+
* try {
|
|
163
|
+
* await doOperation()
|
|
164
|
+
* break
|
|
165
|
+
* } catch (error) {
|
|
166
|
+
* attempts++
|
|
167
|
+
* if (!policy.shouldRetry(attempts)) {
|
|
168
|
+
* throw new Error('Max retries exceeded')
|
|
169
|
+
* }
|
|
170
|
+
* const delay = policy.getDelay(attempts)
|
|
171
|
+
* console.log(`Retry ${attempts} after ${delay}ms`)
|
|
172
|
+
* await sleep(delay)
|
|
173
|
+
* }
|
|
174
|
+
* }
|
|
175
|
+
* ```
|
|
176
|
+
*
|
|
177
|
+
* @class CDCRetryPolicy
|
|
30
178
|
*/
|
|
31
179
|
export class CDCRetryPolicy {
|
|
180
|
+
/**
|
|
181
|
+
* Retry configuration.
|
|
182
|
+
* @private
|
|
183
|
+
*/
|
|
32
184
|
config;
|
|
185
|
+
/**
|
|
186
|
+
* Creates a new retry policy.
|
|
187
|
+
*
|
|
188
|
+
* @param config - Retry policy configuration
|
|
189
|
+
*/
|
|
33
190
|
constructor(config) {
|
|
34
191
|
this.config = config;
|
|
35
192
|
}
|
|
193
|
+
/**
|
|
194
|
+
* Determines whether another retry should be attempted.
|
|
195
|
+
*
|
|
196
|
+
* @param attemptCount - Number of attempts already made
|
|
197
|
+
* @returns true if more retries are allowed, false otherwise
|
|
198
|
+
*
|
|
199
|
+
* @example
|
|
200
|
+
* ```typescript
|
|
201
|
+
* if (policy.shouldRetry(3)) {
|
|
202
|
+
* // Retry is allowed
|
|
203
|
+
* }
|
|
204
|
+
* ```
|
|
205
|
+
*/
|
|
36
206
|
shouldRetry(attemptCount) {
|
|
37
207
|
return attemptCount < this.config.maxRetries;
|
|
38
208
|
}
|
|
209
|
+
/**
|
|
210
|
+
* Calculates the delay before the next retry.
|
|
211
|
+
*
|
|
212
|
+
* @description
|
|
213
|
+
* Computes delay using exponential backoff, capped at maxDelayMs.
|
|
214
|
+
* If jitter is enabled, applies a random factor between 0.5x and 1.5x.
|
|
215
|
+
*
|
|
216
|
+
* @param attemptCount - Number of attempts already made (1-indexed)
|
|
217
|
+
* @returns Delay in milliseconds before next retry
|
|
218
|
+
*
|
|
219
|
+
* @example
|
|
220
|
+
* ```typescript
|
|
221
|
+
* // With initialDelay=100, multiplier=2:
|
|
222
|
+
* // Attempt 1: 100ms * 2^0 = 100ms
|
|
223
|
+
* // Attempt 2: 100ms * 2^1 = 200ms
|
|
224
|
+
* // Attempt 3: 100ms * 2^2 = 400ms
|
|
225
|
+
* const delay = policy.getDelay(attemptCount)
|
|
226
|
+
* await sleep(delay)
|
|
227
|
+
* ```
|
|
228
|
+
*/
|
|
39
229
|
getDelay(attemptCount) {
|
|
40
230
|
let delay = this.config.initialDelayMs * Math.pow(this.config.backoffMultiplier, attemptCount);
|
|
41
231
|
delay = Math.min(delay, this.config.maxDelayMs);
|
|
@@ -51,19 +241,89 @@ export class CDCRetryPolicy {
|
|
|
51
241
|
// CDC Event Capture
|
|
52
242
|
// ============================================================================
|
|
53
243
|
/**
|
|
54
|
-
* Captures git operations and converts them to CDC events
|
|
244
|
+
* Captures git operations and converts them to CDC events.
|
|
245
|
+
*
|
|
246
|
+
* @description
|
|
247
|
+
* CDCEventCapture hooks into git operations and generates CDCEvents for each
|
|
248
|
+
* operation. It maintains an internal buffer of events that can be flushed
|
|
249
|
+
* manually or automatically when the buffer reaches a configured size.
|
|
250
|
+
*
|
|
251
|
+
* **Supported Operations:**
|
|
252
|
+
* - Object creation/deletion (blobs, trees, commits, tags)
|
|
253
|
+
* - Reference updates (branches, tags)
|
|
254
|
+
* - Commit creation
|
|
255
|
+
* - Pack reception
|
|
256
|
+
* - Branch creation/deletion
|
|
257
|
+
* - Tag creation
|
|
258
|
+
* - Merge completion
|
|
259
|
+
*
|
|
260
|
+
* **Event Ordering:**
|
|
261
|
+
* Events are assigned monotonically increasing sequence numbers within a
|
|
262
|
+
* capture session. This ensures proper ordering for replay and analytics.
|
|
263
|
+
*
|
|
264
|
+
* @example
|
|
265
|
+
* ```typescript
|
|
266
|
+
* const capture = new CDCEventCapture({ maxBufferSize: 100 })
|
|
267
|
+
*
|
|
268
|
+
* // Add a listener for real-time processing
|
|
269
|
+
* capture.addListener((event) => {
|
|
270
|
+
* console.log(`Event: ${event.type} - ${event.id}`)
|
|
271
|
+
* })
|
|
272
|
+
*
|
|
273
|
+
* // Capture git operations
|
|
274
|
+
* await capture.onCommitCreated('abc123...', 'tree456...', ['parent789...'])
|
|
275
|
+
* await capture.onRefUpdate('refs/heads/main', 'old...', 'new...')
|
|
276
|
+
*
|
|
277
|
+
* // Get buffered events
|
|
278
|
+
* console.log(`Buffer size: ${capture.getBufferSize()}`)
|
|
279
|
+
*
|
|
280
|
+
* // Flush buffer
|
|
281
|
+
* const events = await capture.flush()
|
|
282
|
+
* console.log(`Flushed ${events.length} events`)
|
|
283
|
+
* ```
|
|
284
|
+
*
|
|
285
|
+
* @class CDCEventCapture
|
|
55
286
|
*/
|
|
56
287
|
export class CDCEventCapture {
|
|
288
|
+
/**
|
|
289
|
+
* Buffer of captured events.
|
|
290
|
+
* @private
|
|
291
|
+
*/
|
|
57
292
|
events = [];
|
|
293
|
+
/**
|
|
294
|
+
* Monotonically increasing sequence counter.
|
|
295
|
+
* @private
|
|
296
|
+
*/
|
|
58
297
|
sequenceCounter = 0;
|
|
298
|
+
/**
|
|
299
|
+
* Registered event listeners.
|
|
300
|
+
* @private
|
|
301
|
+
*/
|
|
59
302
|
listeners = [];
|
|
303
|
+
/**
|
|
304
|
+
* Maximum buffer size before auto-flush.
|
|
305
|
+
* @private
|
|
306
|
+
*/
|
|
60
307
|
maxBufferSize;
|
|
308
|
+
/**
|
|
309
|
+
* Creates a new CDC event capture instance.
|
|
310
|
+
*
|
|
311
|
+
* @param options - Configuration options
|
|
312
|
+
*/
|
|
61
313
|
constructor(options = {}) {
|
|
62
314
|
this.maxBufferSize = options.maxBufferSize ?? Infinity;
|
|
63
315
|
}
|
|
316
|
+
/**
|
|
317
|
+
* Generates a unique event ID.
|
|
318
|
+
* @private
|
|
319
|
+
*/
|
|
64
320
|
generateEventId() {
|
|
65
321
|
return `evt-${Date.now()}-${Math.random().toString(36).slice(2)}`;
|
|
66
322
|
}
|
|
323
|
+
/**
|
|
324
|
+
* Emits an event to the buffer and notifies listeners.
|
|
325
|
+
* @private
|
|
326
|
+
*/
|
|
67
327
|
async emitEvent(event) {
|
|
68
328
|
// Auto-flush if buffer is full
|
|
69
329
|
if (this.events.length >= this.maxBufferSize) {
|
|
@@ -75,9 +335,28 @@ export class CDCEventCapture {
|
|
|
75
335
|
listener(event);
|
|
76
336
|
}
|
|
77
337
|
}
|
|
338
|
+
/**
|
|
339
|
+
* Returns the next sequence number.
|
|
340
|
+
* @private
|
|
341
|
+
*/
|
|
78
342
|
nextSequence() {
|
|
79
343
|
return ++this.sequenceCounter;
|
|
80
344
|
}
|
|
345
|
+
/**
|
|
346
|
+
* Captures an object put (creation) operation.
|
|
347
|
+
*
|
|
348
|
+
* @description
|
|
349
|
+
* Called when a git object (blob, tree, commit, tag) is written to storage.
|
|
350
|
+
*
|
|
351
|
+
* @param sha - SHA-1 hash of the object
|
|
352
|
+
* @param type - Object type (blob, tree, commit, tag)
|
|
353
|
+
* @param data - Raw object data
|
|
354
|
+
*
|
|
355
|
+
* @example
|
|
356
|
+
* ```typescript
|
|
357
|
+
* await capture.onObjectPut('abc123...', 'blob', blobData)
|
|
358
|
+
* ```
|
|
359
|
+
*/
|
|
81
360
|
async onObjectPut(sha, type, data) {
|
|
82
361
|
const event = {
|
|
83
362
|
id: this.generateEventId(),
|
|
@@ -95,6 +374,19 @@ export class CDCEventCapture {
|
|
|
95
374
|
};
|
|
96
375
|
await this.emitEvent(event);
|
|
97
376
|
}
|
|
377
|
+
/**
|
|
378
|
+
* Captures an object deletion operation.
|
|
379
|
+
*
|
|
380
|
+
* @description
|
|
381
|
+
* Called when a git object is deleted, typically during garbage collection.
|
|
382
|
+
*
|
|
383
|
+
* @param sha - SHA-1 hash of the deleted object
|
|
384
|
+
*
|
|
385
|
+
* @example
|
|
386
|
+
* ```typescript
|
|
387
|
+
* await capture.onObjectDelete('abc123...')
|
|
388
|
+
* ```
|
|
389
|
+
*/
|
|
98
390
|
async onObjectDelete(sha) {
|
|
99
391
|
const event = {
|
|
100
392
|
id: this.generateEventId(),
|
|
@@ -110,6 +402,25 @@ export class CDCEventCapture {
|
|
|
110
402
|
};
|
|
111
403
|
await this.emitEvent(event);
|
|
112
404
|
}
|
|
405
|
+
/**
|
|
406
|
+
* Captures a reference update operation.
|
|
407
|
+
*
|
|
408
|
+
* @description
|
|
409
|
+
* Called when a git reference (branch, tag) is updated to point to a new commit.
|
|
410
|
+
*
|
|
411
|
+
* @param refName - Full reference name (e.g., 'refs/heads/main')
|
|
412
|
+
* @param oldSha - Previous SHA (all zeros for new refs)
|
|
413
|
+
* @param newSha - New SHA (all zeros for deleted refs)
|
|
414
|
+
*
|
|
415
|
+
* @example
|
|
416
|
+
* ```typescript
|
|
417
|
+
* await capture.onRefUpdate(
|
|
418
|
+
* 'refs/heads/main',
|
|
419
|
+
* 'oldcommit123...',
|
|
420
|
+
* 'newcommit456...'
|
|
421
|
+
* )
|
|
422
|
+
* ```
|
|
423
|
+
*/
|
|
113
424
|
async onRefUpdate(refName, oldSha, newSha) {
|
|
114
425
|
const event = {
|
|
115
426
|
id: this.generateEventId(),
|
|
@@ -127,6 +438,25 @@ export class CDCEventCapture {
|
|
|
127
438
|
};
|
|
128
439
|
await this.emitEvent(event);
|
|
129
440
|
}
|
|
441
|
+
/**
|
|
442
|
+
* Captures a commit creation operation.
|
|
443
|
+
*
|
|
444
|
+
* @description
|
|
445
|
+
* Called when a new commit object is created.
|
|
446
|
+
*
|
|
447
|
+
* @param commitSha - SHA-1 hash of the commit
|
|
448
|
+
* @param treeSha - SHA-1 hash of the tree the commit points to
|
|
449
|
+
* @param parentShas - Array of parent commit SHAs
|
|
450
|
+
*
|
|
451
|
+
* @example
|
|
452
|
+
* ```typescript
|
|
453
|
+
* await capture.onCommitCreated(
|
|
454
|
+
* 'commitabc123...',
|
|
455
|
+
* 'treedef456...',
|
|
456
|
+
* ['parent1...', 'parent2...']
|
|
457
|
+
* )
|
|
458
|
+
* ```
|
|
459
|
+
*/
|
|
130
460
|
async onCommitCreated(commitSha, treeSha, parentShas) {
|
|
131
461
|
const event = {
|
|
132
462
|
id: this.generateEventId(),
|
|
@@ -144,6 +474,20 @@ export class CDCEventCapture {
|
|
|
144
474
|
};
|
|
145
475
|
await this.emitEvent(event);
|
|
146
476
|
}
|
|
477
|
+
/**
|
|
478
|
+
* Captures a pack reception operation.
|
|
479
|
+
*
|
|
480
|
+
* @description
|
|
481
|
+
* Called when a packfile is received during a push or fetch operation.
|
|
482
|
+
*
|
|
483
|
+
* @param packData - Raw packfile data
|
|
484
|
+
* @param objectCount - Number of objects in the pack
|
|
485
|
+
*
|
|
486
|
+
* @example
|
|
487
|
+
* ```typescript
|
|
488
|
+
* await capture.onPackReceived(packBuffer, 42)
|
|
489
|
+
* ```
|
|
490
|
+
*/
|
|
147
491
|
async onPackReceived(packData, objectCount) {
|
|
148
492
|
const event = {
|
|
149
493
|
id: this.generateEventId(),
|
|
@@ -160,6 +504,17 @@ export class CDCEventCapture {
|
|
|
160
504
|
};
|
|
161
505
|
await this.emitEvent(event);
|
|
162
506
|
}
|
|
507
|
+
/**
|
|
508
|
+
* Captures a branch creation operation.
|
|
509
|
+
*
|
|
510
|
+
* @param branchName - Name of the branch (without refs/heads/ prefix)
|
|
511
|
+
* @param sha - SHA-1 hash the branch points to
|
|
512
|
+
*
|
|
513
|
+
* @example
|
|
514
|
+
* ```typescript
|
|
515
|
+
* await capture.onBranchCreated('feature-x', 'abc123...')
|
|
516
|
+
* ```
|
|
517
|
+
*/
|
|
163
518
|
async onBranchCreated(branchName, sha) {
|
|
164
519
|
const event = {
|
|
165
520
|
id: this.generateEventId(),
|
|
@@ -176,6 +531,16 @@ export class CDCEventCapture {
|
|
|
176
531
|
};
|
|
177
532
|
await this.emitEvent(event);
|
|
178
533
|
}
|
|
534
|
+
/**
|
|
535
|
+
* Captures a branch deletion operation.
|
|
536
|
+
*
|
|
537
|
+
* @param branchName - Name of the deleted branch
|
|
538
|
+
*
|
|
539
|
+
* @example
|
|
540
|
+
* ```typescript
|
|
541
|
+
* await capture.onBranchDeleted('feature-x')
|
|
542
|
+
* ```
|
|
543
|
+
*/
|
|
179
544
|
async onBranchDeleted(branchName) {
|
|
180
545
|
const event = {
|
|
181
546
|
id: this.generateEventId(),
|
|
@@ -191,6 +556,17 @@ export class CDCEventCapture {
|
|
|
191
556
|
};
|
|
192
557
|
await this.emitEvent(event);
|
|
193
558
|
}
|
|
559
|
+
/**
|
|
560
|
+
* Captures a tag creation operation.
|
|
561
|
+
*
|
|
562
|
+
* @param tagName - Name of the tag
|
|
563
|
+
* @param sha - SHA-1 hash the tag points to
|
|
564
|
+
*
|
|
565
|
+
* @example
|
|
566
|
+
* ```typescript
|
|
567
|
+
* await capture.onTagCreated('v1.0.0', 'abc123...')
|
|
568
|
+
* ```
|
|
569
|
+
*/
|
|
194
570
|
async onTagCreated(tagName, sha) {
|
|
195
571
|
const event = {
|
|
196
572
|
id: this.generateEventId(),
|
|
@@ -207,6 +583,18 @@ export class CDCEventCapture {
|
|
|
207
583
|
};
|
|
208
584
|
await this.emitEvent(event);
|
|
209
585
|
}
|
|
586
|
+
/**
|
|
587
|
+
* Captures a merge completion operation.
|
|
588
|
+
*
|
|
589
|
+
* @param mergeSha - SHA-1 hash of the merge commit
|
|
590
|
+
* @param baseSha - SHA-1 hash of the base commit
|
|
591
|
+
* @param headSha - SHA-1 hash of the head commit being merged
|
|
592
|
+
*
|
|
593
|
+
* @example
|
|
594
|
+
* ```typescript
|
|
595
|
+
* await capture.onMergeCompleted('merge123...', 'base456...', 'head789...')
|
|
596
|
+
* ```
|
|
597
|
+
*/
|
|
210
598
|
async onMergeCompleted(mergeSha, baseSha, headSha) {
|
|
211
599
|
const event = {
|
|
212
600
|
id: this.generateEventId(),
|
|
@@ -224,20 +612,66 @@ export class CDCEventCapture {
|
|
|
224
612
|
};
|
|
225
613
|
await this.emitEvent(event);
|
|
226
614
|
}
|
|
615
|
+
/**
|
|
616
|
+
* Returns a copy of all buffered events.
|
|
617
|
+
*
|
|
618
|
+
* @returns Array of buffered events
|
|
619
|
+
*/
|
|
227
620
|
getEvents() {
|
|
228
621
|
return [...this.events];
|
|
229
622
|
}
|
|
623
|
+
/**
|
|
624
|
+
* Returns the current buffer size.
|
|
625
|
+
*
|
|
626
|
+
* @returns Number of events in the buffer
|
|
627
|
+
*/
|
|
230
628
|
getBufferSize() {
|
|
231
629
|
return this.events.length;
|
|
232
630
|
}
|
|
631
|
+
/**
|
|
632
|
+
* Flushes all buffered events.
|
|
633
|
+
*
|
|
634
|
+
* @description
|
|
635
|
+
* Returns and clears all events from the buffer. The returned events
|
|
636
|
+
* can be processed, serialized, or forwarded to downstream systems.
|
|
637
|
+
*
|
|
638
|
+
* @returns Array of flushed events
|
|
639
|
+
*
|
|
640
|
+
* @example
|
|
641
|
+
* ```typescript
|
|
642
|
+
* const events = await capture.flush()
|
|
643
|
+
* console.log(`Flushed ${events.length} events`)
|
|
644
|
+
* await sendToAnalytics(events)
|
|
645
|
+
* ```
|
|
646
|
+
*/
|
|
233
647
|
async flush() {
|
|
234
648
|
const flushed = [...this.events];
|
|
235
649
|
this.events = [];
|
|
236
650
|
return flushed;
|
|
237
651
|
}
|
|
652
|
+
/**
|
|
653
|
+
* Adds an event listener.
|
|
654
|
+
*
|
|
655
|
+
* @description
|
|
656
|
+
* Listeners are called synchronously for each event as it is captured.
|
|
657
|
+
*
|
|
658
|
+
* @param listener - Callback function to invoke for each event
|
|
659
|
+
*
|
|
660
|
+
* @example
|
|
661
|
+
* ```typescript
|
|
662
|
+
* capture.addListener((event) => {
|
|
663
|
+
* console.log(`New event: ${event.type}`)
|
|
664
|
+
* })
|
|
665
|
+
* ```
|
|
666
|
+
*/
|
|
238
667
|
addListener(listener) {
|
|
239
668
|
this.listeners.push(listener);
|
|
240
669
|
}
|
|
670
|
+
/**
|
|
671
|
+
* Removes an event listener.
|
|
672
|
+
*
|
|
673
|
+
* @param listener - The listener to remove
|
|
674
|
+
*/
|
|
241
675
|
removeListener(listener) {
|
|
242
676
|
const index = this.listeners.indexOf(listener);
|
|
243
677
|
if (index !== -1) {
|
|
@@ -248,6 +682,10 @@ export class CDCEventCapture {
|
|
|
248
682
|
// ============================================================================
|
|
249
683
|
// Parquet Schema
|
|
250
684
|
// ============================================================================
|
|
685
|
+
/**
|
|
686
|
+
* Default field definitions for CDC event Parquet schema.
|
|
687
|
+
* @internal
|
|
688
|
+
*/
|
|
251
689
|
const CDC_EVENT_FIELDS = [
|
|
252
690
|
{ name: 'event_id', type: 'STRING', nullable: false },
|
|
253
691
|
{ name: 'event_type', type: 'STRING', nullable: false },
|
|
@@ -259,13 +697,54 @@ const CDC_EVENT_FIELDS = [
|
|
|
259
697
|
{ name: 'sha', type: 'STRING', nullable: true }
|
|
260
698
|
];
|
|
261
699
|
/**
|
|
262
|
-
* Parquet schema definition for CDC events
|
|
700
|
+
* Parquet schema definition for CDC events.
|
|
701
|
+
*
|
|
702
|
+
* @description
|
|
703
|
+
* Defines the column structure for CDC event Parquet files. The default
|
|
704
|
+
* schema includes standard CDC event fields and can be extended with
|
|
705
|
+
* custom fields for domain-specific data.
|
|
706
|
+
*
|
|
707
|
+
* @example
|
|
708
|
+
* ```typescript
|
|
709
|
+
* // Create default schema
|
|
710
|
+
* const schema = ParquetSchema.forCDCEvents()
|
|
711
|
+
*
|
|
712
|
+
* // Create schema with custom fields
|
|
713
|
+
* const customSchema = ParquetSchema.forCDCEvents([
|
|
714
|
+
* { name: 'repository_id', type: 'STRING', nullable: false },
|
|
715
|
+
* { name: 'user_id', type: 'STRING', nullable: true }
|
|
716
|
+
* ])
|
|
717
|
+
* ```
|
|
718
|
+
*
|
|
719
|
+
* @class ParquetSchema
|
|
263
720
|
*/
|
|
264
721
|
export class ParquetSchema {
|
|
265
722
|
fields;
|
|
723
|
+
/**
|
|
724
|
+
* Creates a new ParquetSchema.
|
|
725
|
+
*
|
|
726
|
+
* @param fields - Array of field definitions
|
|
727
|
+
*/
|
|
266
728
|
constructor(fields) {
|
|
267
729
|
this.fields = fields;
|
|
268
730
|
}
|
|
731
|
+
/**
|
|
732
|
+
* Creates a schema for CDC events with optional custom fields.
|
|
733
|
+
*
|
|
734
|
+
* @description
|
|
735
|
+
* Returns a schema with the standard CDC event fields. Additional
|
|
736
|
+
* custom fields can be appended for domain-specific data.
|
|
737
|
+
*
|
|
738
|
+
* @param customFields - Optional additional fields to add
|
|
739
|
+
* @returns A new ParquetSchema instance
|
|
740
|
+
*
|
|
741
|
+
* @example
|
|
742
|
+
* ```typescript
|
|
743
|
+
* const schema = ParquetSchema.forCDCEvents()
|
|
744
|
+
* // Schema includes: event_id, event_type, source, timestamp,
|
|
745
|
+
* // sequence, version, payload_json, sha
|
|
746
|
+
* ```
|
|
747
|
+
*/
|
|
269
748
|
static forCDCEvents(customFields) {
|
|
270
749
|
const fields = [...CDC_EVENT_FIELDS];
|
|
271
750
|
if (customFields) {
|
|
@@ -275,13 +754,64 @@ export class ParquetSchema {
|
|
|
275
754
|
}
|
|
276
755
|
}
|
|
277
756
|
/**
|
|
278
|
-
* Transforms CDC events to Parquet format
|
|
757
|
+
* Transforms CDC events to Parquet format.
|
|
758
|
+
*
|
|
759
|
+
* @description
|
|
760
|
+
* ParquetTransformer converts CDC events to Parquet-compatible rows and
|
|
761
|
+
* serializes batches of events to Parquet file format. It handles:
|
|
762
|
+
*
|
|
763
|
+
* - Event to row conversion (flattening the event structure)
|
|
764
|
+
* - JSON serialization of complex payloads
|
|
765
|
+
* - Batch creation with schema and metadata
|
|
766
|
+
* - Parquet file generation with compression
|
|
767
|
+
*
|
|
768
|
+
* @example
|
|
769
|
+
* ```typescript
|
|
770
|
+
* const transformer = new ParquetTransformer({ compression: 'snappy' })
|
|
771
|
+
*
|
|
772
|
+
* // Transform single event to row
|
|
773
|
+
* const row = transformer.eventToRow(event)
|
|
774
|
+
*
|
|
775
|
+
* // Transform batch of events
|
|
776
|
+
* const batch = transformer.eventsToBatch(events)
|
|
777
|
+
*
|
|
778
|
+
* // Generate Parquet file
|
|
779
|
+
* const buffer = await transformer.toParquetBuffer(batch)
|
|
780
|
+
* await r2.put('events.parquet', buffer)
|
|
781
|
+
* ```
|
|
782
|
+
*
|
|
783
|
+
* @class ParquetTransformer
|
|
279
784
|
*/
|
|
280
785
|
export class ParquetTransformer {
|
|
786
|
+
/**
|
|
787
|
+
* Compression algorithm to use.
|
|
788
|
+
* @private
|
|
789
|
+
*/
|
|
281
790
|
compression;
|
|
791
|
+
/**
|
|
792
|
+
* Creates a new ParquetTransformer.
|
|
793
|
+
*
|
|
794
|
+
* @param options - Transformer configuration
|
|
795
|
+
*/
|
|
282
796
|
constructor(options = {}) {
|
|
283
797
|
this.compression = options.compression ?? 'snappy';
|
|
284
798
|
}
|
|
799
|
+
/**
|
|
800
|
+
* Converts a CDC event to a Parquet row.
|
|
801
|
+
*
|
|
802
|
+
* @description
|
|
803
|
+
* Flattens the event structure and serializes the payload to JSON
|
|
804
|
+
* for storage in Parquet format.
|
|
805
|
+
*
|
|
806
|
+
* @param event - The CDC event to convert
|
|
807
|
+
* @returns A Parquet row representation
|
|
808
|
+
*
|
|
809
|
+
* @example
|
|
810
|
+
* ```typescript
|
|
811
|
+
* const row = transformer.eventToRow(event)
|
|
812
|
+
* console.log(row.event_id, row.event_type, row.sha)
|
|
813
|
+
* ```
|
|
814
|
+
*/
|
|
285
815
|
eventToRow(event) {
|
|
286
816
|
// Create a serializable copy of the payload (Uint8Array not JSON-serializable)
|
|
287
817
|
const serializablePayload = {
|
|
@@ -299,6 +829,22 @@ export class ParquetTransformer {
|
|
|
299
829
|
sha: event.payload.sha ?? null
|
|
300
830
|
};
|
|
301
831
|
}
|
|
832
|
+
/**
|
|
833
|
+
* Converts multiple CDC events to a Parquet batch.
|
|
834
|
+
*
|
|
835
|
+
* @description
|
|
836
|
+
* Transforms an array of events into a ParquetBatch structure
|
|
837
|
+
* ready for serialization to Parquet format.
|
|
838
|
+
*
|
|
839
|
+
* @param events - Array of CDC events to batch
|
|
840
|
+
* @returns A ParquetBatch ready for serialization
|
|
841
|
+
*
|
|
842
|
+
* @example
|
|
843
|
+
* ```typescript
|
|
844
|
+
* const batch = transformer.eventsToBatch(events)
|
|
845
|
+
* console.log(`Batch has ${batch.rowCount} rows`)
|
|
846
|
+
* ```
|
|
847
|
+
*/
|
|
302
848
|
eventsToBatch(events) {
|
|
303
849
|
const rows = events.map(e => this.eventToRow(e));
|
|
304
850
|
return {
|
|
@@ -309,6 +855,22 @@ export class ParquetTransformer {
|
|
|
309
855
|
compression: this.compression
|
|
310
856
|
};
|
|
311
857
|
}
|
|
858
|
+
/**
|
|
859
|
+
* Serializes a ParquetBatch to a Parquet file buffer.
|
|
860
|
+
*
|
|
861
|
+
* @description
|
|
862
|
+
* Generates a Parquet-format file from the batch data. The output
|
|
863
|
+
* includes PAR1 magic bytes, compressed data, and footer metadata.
|
|
864
|
+
*
|
|
865
|
+
* @param batch - The ParquetBatch to serialize
|
|
866
|
+
* @returns Promise resolving to Parquet file as Uint8Array
|
|
867
|
+
*
|
|
868
|
+
* @example
|
|
869
|
+
* ```typescript
|
|
870
|
+
* const buffer = await transformer.toParquetBuffer(batch)
|
|
871
|
+
* await r2.put('events.parquet', buffer)
|
|
872
|
+
* ```
|
|
873
|
+
*/
|
|
312
874
|
async toParquetBuffer(batch) {
|
|
313
875
|
// Build a simplified Parquet-like buffer
|
|
314
876
|
// Real implementation would use a proper Parquet library
|
|
@@ -383,14 +945,84 @@ export class ParquetTransformer {
|
|
|
383
945
|
}
|
|
384
946
|
}
|
|
385
947
|
/**
|
|
386
|
-
* Batches CDC events for efficient processing
|
|
948
|
+
* Batches CDC events for efficient processing.
|
|
949
|
+
*
|
|
950
|
+
* @description
|
|
951
|
+
* CDCBatcher collects CDC events and groups them into batches based on
|
|
952
|
+
* count or time thresholds. This enables efficient downstream processing
|
|
953
|
+
* by reducing the number of I/O operations and enabling bulk operations.
|
|
954
|
+
*
|
|
955
|
+
* **Batching Strategies:**
|
|
956
|
+
* - **Count-based**: Flush when batch reaches `batchSize` events
|
|
957
|
+
* - **Time-based**: Flush after `flushIntervalMs` even if batch is not full
|
|
958
|
+
*
|
|
959
|
+
* **Features:**
|
|
960
|
+
* - Async batch handlers for non-blocking processing
|
|
961
|
+
* - Multiple handlers for parallel processing pipelines
|
|
962
|
+
* - Graceful stop with pending event flush
|
|
963
|
+
* - Batch metadata (sequences, timestamps) for tracking
|
|
964
|
+
*
|
|
965
|
+
* @example
|
|
966
|
+
* ```typescript
|
|
967
|
+
* const batcher = new CDCBatcher({
|
|
968
|
+
* batchSize: 100,
|
|
969
|
+
* flushIntervalMs: 5000
|
|
970
|
+
* })
|
|
971
|
+
*
|
|
972
|
+
* // Register batch handler
|
|
973
|
+
* batcher.onBatch(async (batch) => {
|
|
974
|
+
* console.log(`Processing ${batch.eventCount} events`)
|
|
975
|
+
* console.log(`Sequence range: ${batch.minSequence} - ${batch.maxSequence}`)
|
|
976
|
+
* await saveToStorage(batch.events)
|
|
977
|
+
* })
|
|
978
|
+
*
|
|
979
|
+
* // Add events
|
|
980
|
+
* await batcher.add(event1)
|
|
981
|
+
* await batcher.add(event2)
|
|
982
|
+
*
|
|
983
|
+
* // Check pending events
|
|
984
|
+
* console.log(`Pending: ${batcher.getPendingCount()}`)
|
|
985
|
+
*
|
|
986
|
+
* // Manual flush
|
|
987
|
+
* const result = await batcher.flush()
|
|
988
|
+
*
|
|
989
|
+
* // Stop the batcher
|
|
990
|
+
* await batcher.stop()
|
|
991
|
+
* ```
|
|
992
|
+
*
|
|
993
|
+
* @class CDCBatcher
|
|
387
994
|
*/
|
|
388
995
|
export class CDCBatcher {
|
|
996
|
+
/**
|
|
997
|
+
* Batch configuration.
|
|
998
|
+
* @private
|
|
999
|
+
*/
|
|
389
1000
|
config;
|
|
1001
|
+
/**
|
|
1002
|
+
* Buffer of pending events.
|
|
1003
|
+
* @private
|
|
1004
|
+
*/
|
|
390
1005
|
events = [];
|
|
1006
|
+
/**
|
|
1007
|
+
* Registered batch handlers.
|
|
1008
|
+
* @private
|
|
1009
|
+
*/
|
|
391
1010
|
batchHandlers = [];
|
|
1011
|
+
/**
|
|
1012
|
+
* Timer for time-based flushing.
|
|
1013
|
+
* @private
|
|
1014
|
+
*/
|
|
392
1015
|
flushTimer = null;
|
|
1016
|
+
/**
|
|
1017
|
+
* Whether the batcher has been stopped.
|
|
1018
|
+
* @private
|
|
1019
|
+
*/
|
|
393
1020
|
stopped = false;
|
|
1021
|
+
/**
|
|
1022
|
+
* Creates a new CDCBatcher.
|
|
1023
|
+
*
|
|
1024
|
+
* @param config - Batch configuration
|
|
1025
|
+
*/
|
|
394
1026
|
constructor(config) {
|
|
395
1027
|
this.config = config;
|
|
396
1028
|
// Don't start timer in constructor - start when first event is added
|
|
@@ -447,6 +1079,21 @@ export class CDCBatcher {
|
|
|
447
1079
|
this.flushTimer = null;
|
|
448
1080
|
}
|
|
449
1081
|
}
|
|
1082
|
+
/**
|
|
1083
|
+
* Adds an event to the batch.
|
|
1084
|
+
*
|
|
1085
|
+
* @description
|
|
1086
|
+
* Adds the event to the pending batch. If the batch reaches the
|
|
1087
|
+
* configured size, it is automatically flushed. The flush timer
|
|
1088
|
+
* is started/restarted as needed.
|
|
1089
|
+
*
|
|
1090
|
+
* @param event - The CDC event to add
|
|
1091
|
+
*
|
|
1092
|
+
* @example
|
|
1093
|
+
* ```typescript
|
|
1094
|
+
* await batcher.add(event)
|
|
1095
|
+
* ```
|
|
1096
|
+
*/
|
|
450
1097
|
async add(event) {
|
|
451
1098
|
this.events.push(event);
|
|
452
1099
|
// Ensure flush timer is running when we have pending events
|
|
@@ -457,6 +1104,10 @@ export class CDCBatcher {
|
|
|
457
1104
|
// Timer will be re-started on next add() if needed
|
|
458
1105
|
}
|
|
459
1106
|
}
|
|
1107
|
+
/**
|
|
1108
|
+
* Internal flush implementation.
|
|
1109
|
+
* @private
|
|
1110
|
+
*/
|
|
460
1111
|
async flushInternal() {
|
|
461
1112
|
if (this.events.length === 0) {
|
|
462
1113
|
return { events: [], eventCount: 0, success: true };
|
|
@@ -480,33 +1131,167 @@ export class CDCBatcher {
|
|
|
480
1131
|
}
|
|
481
1132
|
return result;
|
|
482
1133
|
}
|
|
1134
|
+
/**
|
|
1135
|
+
* Manually flushes pending events.
|
|
1136
|
+
*
|
|
1137
|
+
* @description
|
|
1138
|
+
* Forces an immediate flush of all pending events, regardless of
|
|
1139
|
+
* batch size or timer. Clears the flush timer.
|
|
1140
|
+
*
|
|
1141
|
+
* @returns Promise resolving to the batch result
|
|
1142
|
+
*
|
|
1143
|
+
* @example
|
|
1144
|
+
* ```typescript
|
|
1145
|
+
* const result = await batcher.flush()
|
|
1146
|
+
* console.log(`Flushed ${result.eventCount} events`)
|
|
1147
|
+
* ```
|
|
1148
|
+
*/
|
|
483
1149
|
async flush() {
|
|
484
1150
|
this.clearFlushTimer();
|
|
485
1151
|
const result = await this.flushInternal();
|
|
486
1152
|
// Don't restart timer - it will be started on next add() if needed
|
|
487
1153
|
return result;
|
|
488
1154
|
}
|
|
1155
|
+
/**
|
|
1156
|
+
* Returns the number of pending events.
|
|
1157
|
+
*
|
|
1158
|
+
* @returns Number of events waiting to be flushed
|
|
1159
|
+
*/
|
|
489
1160
|
getPendingCount() {
|
|
490
1161
|
return this.events.length;
|
|
491
1162
|
}
|
|
1163
|
+
/**
|
|
1164
|
+
* Registers a batch handler.
|
|
1165
|
+
*
|
|
1166
|
+
* @description
|
|
1167
|
+
* Handlers are called when a batch is flushed (automatically or manually).
|
|
1168
|
+
* Multiple handlers can be registered for parallel processing.
|
|
1169
|
+
*
|
|
1170
|
+
* @param handler - Callback function to invoke for each batch
|
|
1171
|
+
*
|
|
1172
|
+
* @example
|
|
1173
|
+
* ```typescript
|
|
1174
|
+
* batcher.onBatch(async (batch) => {
|
|
1175
|
+
* await saveToStorage(batch.events)
|
|
1176
|
+
* })
|
|
1177
|
+
* ```
|
|
1178
|
+
*/
|
|
492
1179
|
onBatch(handler) {
|
|
493
1180
|
this.batchHandlers.push(handler);
|
|
494
1181
|
}
|
|
1182
|
+
/**
|
|
1183
|
+
* Stops the batcher.
|
|
1184
|
+
*
|
|
1185
|
+
* @description
|
|
1186
|
+
* Stops the flush timer and prevents further processing.
|
|
1187
|
+
* Does NOT automatically flush pending events - call flush() first
|
|
1188
|
+
* if you need to process remaining events.
|
|
1189
|
+
*
|
|
1190
|
+
* @example
|
|
1191
|
+
* ```typescript
|
|
1192
|
+
* await batcher.flush() // Process remaining events
|
|
1193
|
+
* await batcher.stop() // Stop the timer
|
|
1194
|
+
* ```
|
|
1195
|
+
*/
|
|
495
1196
|
async stop() {
|
|
496
1197
|
this.stopped = true;
|
|
497
1198
|
this.clearFlushTimer();
|
|
498
1199
|
}
|
|
499
1200
|
}
|
|
500
1201
|
/**
|
|
501
|
-
* Main CDC Pipeline for processing git operation events
|
|
1202
|
+
* Main CDC Pipeline for processing git operation events.
|
|
1203
|
+
*
|
|
1204
|
+
* @description
|
|
1205
|
+
* CDCPipeline orchestrates the complete change data capture flow from
|
|
1206
|
+
* event ingestion to Parquet output. It integrates batching, transformation,
|
|
1207
|
+
* retry handling, and dead letter queue management.
|
|
1208
|
+
*
|
|
1209
|
+
* **Pipeline Flow:**
|
|
1210
|
+
* 1. Events are submitted via `process()` or `processMany()`
|
|
1211
|
+
* 2. Events are validated and added to the batcher
|
|
1212
|
+
* 3. When a batch is ready, it's transformed to Parquet format
|
|
1213
|
+
* 4. On success, output handlers are notified
|
|
1214
|
+
* 5. On failure, retries are attempted with exponential backoff
|
|
1215
|
+
* 6. After max retries, events go to dead letter queue
|
|
1216
|
+
*
|
|
1217
|
+
* **Features:**
|
|
1218
|
+
* - Configurable batch size and flush interval
|
|
1219
|
+
* - Automatic retry with exponential backoff
|
|
1220
|
+
* - Dead letter queue for failed events
|
|
1221
|
+
* - Real-time metrics for monitoring
|
|
1222
|
+
* - Graceful shutdown with pending event flush
|
|
1223
|
+
*
|
|
1224
|
+
* @example
|
|
1225
|
+
* ```typescript
|
|
1226
|
+
* const pipeline = new CDCPipeline({
|
|
1227
|
+
* batchSize: 100,
|
|
1228
|
+
* flushIntervalMs: 5000,
|
|
1229
|
+
* maxRetries: 3,
|
|
1230
|
+
* parquetCompression: 'snappy',
|
|
1231
|
+
* outputPath: '/analytics',
|
|
1232
|
+
* schemaVersion: 1
|
|
1233
|
+
* })
|
|
1234
|
+
*
|
|
1235
|
+
* // Register handlers
|
|
1236
|
+
* pipeline.onOutput(async (output) => {
|
|
1237
|
+
* await r2.put(`cdc/${output.batchId}.parquet`, output.parquetBuffer)
|
|
1238
|
+
* })
|
|
1239
|
+
*
|
|
1240
|
+
* pipeline.onDeadLetter((events, error) => {
|
|
1241
|
+
* console.error(`Failed ${events.length} events:`, error)
|
|
1242
|
+
* })
|
|
1243
|
+
*
|
|
1244
|
+
* // Start the pipeline
|
|
1245
|
+
* await pipeline.start()
|
|
1246
|
+
*
|
|
1247
|
+
* // Process events
|
|
1248
|
+
* await pipeline.process(event)
|
|
1249
|
+
*
|
|
1250
|
+
* // Check metrics
|
|
1251
|
+
* const metrics = pipeline.getMetrics()
|
|
1252
|
+
*
|
|
1253
|
+
* // Stop gracefully
|
|
1254
|
+
* const result = await pipeline.stop()
|
|
1255
|
+
* console.log(`Flushed ${result.flushedCount} events on shutdown`)
|
|
1256
|
+
* ```
|
|
1257
|
+
*
|
|
1258
|
+
* @class CDCPipeline
|
|
502
1259
|
*/
|
|
503
1260
|
export class CDCPipeline {
|
|
1261
|
+
/**
|
|
1262
|
+
* Pipeline configuration.
|
|
1263
|
+
* @private
|
|
1264
|
+
*/
|
|
504
1265
|
config;
|
|
1266
|
+
/**
|
|
1267
|
+
* Current pipeline state.
|
|
1268
|
+
* @private
|
|
1269
|
+
*/
|
|
505
1270
|
state = 'stopped';
|
|
1271
|
+
/**
|
|
1272
|
+
* Event batcher instance.
|
|
1273
|
+
* @private
|
|
1274
|
+
*/
|
|
506
1275
|
batcher = null;
|
|
1276
|
+
/**
|
|
1277
|
+
* Parquet transformer instance.
|
|
1278
|
+
* @private
|
|
1279
|
+
*/
|
|
507
1280
|
transformer;
|
|
1281
|
+
/**
|
|
1282
|
+
* Registered output handlers.
|
|
1283
|
+
* @private
|
|
1284
|
+
*/
|
|
508
1285
|
outputHandlers = [];
|
|
1286
|
+
/**
|
|
1287
|
+
* Registered dead letter handlers.
|
|
1288
|
+
* @private
|
|
1289
|
+
*/
|
|
509
1290
|
deadLetterHandlers = [];
|
|
1291
|
+
/**
|
|
1292
|
+
* Pipeline metrics.
|
|
1293
|
+
* @private
|
|
1294
|
+
*/
|
|
510
1295
|
metrics = {
|
|
511
1296
|
eventsProcessed: 0,
|
|
512
1297
|
batchesGenerated: 0,
|
|
@@ -514,8 +1299,21 @@ export class CDCPipeline {
|
|
|
514
1299
|
errors: 0,
|
|
515
1300
|
avgProcessingLatencyMs: 0
|
|
516
1301
|
};
|
|
1302
|
+
/**
|
|
1303
|
+
* Processing latency samples.
|
|
1304
|
+
* @private
|
|
1305
|
+
*/
|
|
517
1306
|
processingLatencies = [];
|
|
1307
|
+
/**
|
|
1308
|
+
* Retry policy instance.
|
|
1309
|
+
* @private
|
|
1310
|
+
*/
|
|
518
1311
|
retryPolicy;
|
|
1312
|
+
/**
|
|
1313
|
+
* Creates a new CDCPipeline.
|
|
1314
|
+
*
|
|
1315
|
+
* @param config - Pipeline configuration
|
|
1316
|
+
*/
|
|
519
1317
|
constructor(config) {
|
|
520
1318
|
this.config = config;
|
|
521
1319
|
this.transformer = new ParquetTransformer({
|
|
@@ -528,9 +1326,27 @@ export class CDCPipeline {
|
|
|
528
1326
|
backoffMultiplier: 2
|
|
529
1327
|
});
|
|
530
1328
|
}
|
|
1329
|
+
/**
|
|
1330
|
+
* Returns the current pipeline state.
|
|
1331
|
+
*
|
|
1332
|
+
* @returns Current state ('stopped', 'running', or 'paused')
|
|
1333
|
+
*/
|
|
531
1334
|
getState() {
|
|
532
1335
|
return this.state;
|
|
533
1336
|
}
|
|
1337
|
+
/**
|
|
1338
|
+
* Starts the pipeline.
|
|
1339
|
+
*
|
|
1340
|
+
* @description
|
|
1341
|
+
* Initializes the batcher and begins accepting events. If already
|
|
1342
|
+
* running, this method is a no-op.
|
|
1343
|
+
*
|
|
1344
|
+
* @example
|
|
1345
|
+
* ```typescript
|
|
1346
|
+
* await pipeline.start()
|
|
1347
|
+
* console.log(pipeline.getState()) // 'running'
|
|
1348
|
+
* ```
|
|
1349
|
+
*/
|
|
534
1350
|
async start() {
|
|
535
1351
|
if (this.state === 'running')
|
|
536
1352
|
return;
|
|
@@ -543,6 +1359,21 @@ export class CDCPipeline {
|
|
|
543
1359
|
});
|
|
544
1360
|
this.state = 'running';
|
|
545
1361
|
}
|
|
1362
|
+
/**
|
|
1363
|
+
* Stops the pipeline.
|
|
1364
|
+
*
|
|
1365
|
+
* @description
|
|
1366
|
+
* Flushes any pending events, stops the batcher, and sets state to stopped.
|
|
1367
|
+
* Returns information about events flushed during shutdown.
|
|
1368
|
+
*
|
|
1369
|
+
* @returns Promise resolving to stop result with flushed event count
|
|
1370
|
+
*
|
|
1371
|
+
* @example
|
|
1372
|
+
* ```typescript
|
|
1373
|
+
* const result = await pipeline.stop()
|
|
1374
|
+
* console.log(`Flushed ${result.flushedCount} events on shutdown`)
|
|
1375
|
+
* ```
|
|
1376
|
+
*/
|
|
546
1377
|
async stop() {
|
|
547
1378
|
if (this.state === 'stopped') {
|
|
548
1379
|
return { flushedCount: 0 };
|
|
@@ -557,6 +1388,27 @@ export class CDCPipeline {
|
|
|
557
1388
|
this.state = 'stopped';
|
|
558
1389
|
return { flushedCount };
|
|
559
1390
|
}
|
|
1391
|
+
/**
|
|
1392
|
+
* Processes a single event.
|
|
1393
|
+
*
|
|
1394
|
+
* @description
|
|
1395
|
+
* Validates the event and adds it to the batcher for processing.
|
|
1396
|
+
* Updates metrics including latency tracking.
|
|
1397
|
+
*
|
|
1398
|
+
* @param event - The CDC event to process
|
|
1399
|
+
* @returns Promise resolving to process result
|
|
1400
|
+
*
|
|
1401
|
+
* @throws {CDCError} PROCESSING_ERROR - If pipeline is not running
|
|
1402
|
+
* @throws {CDCError} VALIDATION_ERROR - If event fails validation
|
|
1403
|
+
*
|
|
1404
|
+
* @example
|
|
1405
|
+
* ```typescript
|
|
1406
|
+
* const result = await pipeline.process(event)
|
|
1407
|
+
* if (result.success) {
|
|
1408
|
+
* console.log(`Processed event: ${result.eventId}`)
|
|
1409
|
+
* }
|
|
1410
|
+
* ```
|
|
1411
|
+
*/
|
|
560
1412
|
async process(event) {
|
|
561
1413
|
if (this.state !== 'running') {
|
|
562
1414
|
throw new CDCError('PROCESSING_ERROR', 'Pipeline is not running');
|
|
@@ -571,6 +1423,22 @@ export class CDCPipeline {
|
|
|
571
1423
|
this.updateAvgLatency();
|
|
572
1424
|
return { success: true, eventId: event.id };
|
|
573
1425
|
}
|
|
1426
|
+
/**
|
|
1427
|
+
* Processes multiple events.
|
|
1428
|
+
*
|
|
1429
|
+
* @description
|
|
1430
|
+
* Convenience method to process an array of events sequentially.
|
|
1431
|
+
*
|
|
1432
|
+
* @param events - Array of CDC events to process
|
|
1433
|
+
* @returns Promise resolving to array of process results
|
|
1434
|
+
*
|
|
1435
|
+
* @example
|
|
1436
|
+
* ```typescript
|
|
1437
|
+
* const results = await pipeline.processMany(events)
|
|
1438
|
+
* const successCount = results.filter(r => r.success).length
|
|
1439
|
+
* console.log(`Processed ${successCount}/${events.length} events`)
|
|
1440
|
+
* ```
|
|
1441
|
+
*/
|
|
574
1442
|
async processMany(events) {
|
|
575
1443
|
const results = [];
|
|
576
1444
|
for (const event of events) {
|
|
@@ -579,6 +1447,19 @@ export class CDCPipeline {
|
|
|
579
1447
|
}
|
|
580
1448
|
return results;
|
|
581
1449
|
}
|
|
1450
|
+
/**
|
|
1451
|
+
* Manually flushes pending events.
|
|
1452
|
+
*
|
|
1453
|
+
* @description
|
|
1454
|
+
* Forces an immediate flush of the batcher and processes the
|
|
1455
|
+
* resulting batch through the pipeline.
|
|
1456
|
+
*
|
|
1457
|
+
* @example
|
|
1458
|
+
* ```typescript
|
|
1459
|
+
* await pipeline.flush()
|
|
1460
|
+
* console.log('All pending events flushed')
|
|
1461
|
+
* ```
|
|
1462
|
+
*/
|
|
582
1463
|
async flush() {
|
|
583
1464
|
if (this.batcher) {
|
|
584
1465
|
const result = await this.batcher.flush();
|
|
@@ -587,6 +1468,10 @@ export class CDCPipeline {
|
|
|
587
1468
|
}
|
|
588
1469
|
}
|
|
589
1470
|
}
|
|
1471
|
+
/**
|
|
1472
|
+
* Handles a batch of events with retry logic.
|
|
1473
|
+
* @private
|
|
1474
|
+
*/
|
|
590
1475
|
async handleBatch(batch) {
|
|
591
1476
|
let attempts = 0;
|
|
592
1477
|
let lastError = null;
|
|
@@ -624,9 +1509,17 @@ export class CDCPipeline {
|
|
|
624
1509
|
}
|
|
625
1510
|
}
|
|
626
1511
|
}
|
|
1512
|
+
/**
|
|
1513
|
+
* Sleeps for the specified duration.
|
|
1514
|
+
* @private
|
|
1515
|
+
*/
|
|
627
1516
|
sleep(ms) {
|
|
628
1517
|
return new Promise(resolve => setTimeout(resolve, ms));
|
|
629
1518
|
}
|
|
1519
|
+
/**
|
|
1520
|
+
* Updates the average latency metric.
|
|
1521
|
+
* @private
|
|
1522
|
+
*/
|
|
630
1523
|
updateAvgLatency() {
|
|
631
1524
|
if (this.processingLatencies.length === 0)
|
|
632
1525
|
return;
|
|
@@ -637,12 +1530,66 @@ export class CDCPipeline {
|
|
|
637
1530
|
const sum = this.processingLatencies.reduce((a, b) => a + b, 0);
|
|
638
1531
|
this.metrics.avgProcessingLatencyMs = sum / this.processingLatencies.length;
|
|
639
1532
|
}
|
|
1533
|
+
/**
|
|
1534
|
+
* Returns current pipeline metrics.
|
|
1535
|
+
*
|
|
1536
|
+
* @description
|
|
1537
|
+
* Returns a copy of the current metrics. Metrics are cumulative
|
|
1538
|
+
* since pipeline creation.
|
|
1539
|
+
*
|
|
1540
|
+
* @returns Copy of current pipeline metrics
|
|
1541
|
+
*
|
|
1542
|
+
* @example
|
|
1543
|
+
* ```typescript
|
|
1544
|
+
* const metrics = pipeline.getMetrics()
|
|
1545
|
+
* console.log(`Processed: ${metrics.eventsProcessed}`)
|
|
1546
|
+
* console.log(`Batches: ${metrics.batchesGenerated}`)
|
|
1547
|
+
* console.log(`Errors: ${metrics.errors}`)
|
|
1548
|
+
* console.log(`Avg latency: ${metrics.avgProcessingLatencyMs}ms`)
|
|
1549
|
+
* ```
|
|
1550
|
+
*/
|
|
640
1551
|
getMetrics() {
|
|
641
1552
|
return { ...this.metrics };
|
|
642
1553
|
}
|
|
1554
|
+
/**
|
|
1555
|
+
* Registers an output handler.
|
|
1556
|
+
*
|
|
1557
|
+
* @description
|
|
1558
|
+
* Output handlers are called when a batch is successfully processed
|
|
1559
|
+
* and converted to Parquet format. Multiple handlers can be registered.
|
|
1560
|
+
*
|
|
1561
|
+
* @param handler - Callback to invoke for each successful batch
|
|
1562
|
+
*
|
|
1563
|
+
* @example
|
|
1564
|
+
* ```typescript
|
|
1565
|
+
* pipeline.onOutput(async (output) => {
|
|
1566
|
+
* await r2.put(`cdc/${output.batchId}.parquet`, output.parquetBuffer)
|
|
1567
|
+
* console.log(`Wrote ${output.events.length} events`)
|
|
1568
|
+
* })
|
|
1569
|
+
* ```
|
|
1570
|
+
*/
|
|
643
1571
|
onOutput(handler) {
|
|
644
1572
|
this.outputHandlers.push(handler);
|
|
645
1573
|
}
|
|
1574
|
+
/**
|
|
1575
|
+
* Registers a dead letter handler.
|
|
1576
|
+
*
|
|
1577
|
+
* @description
|
|
1578
|
+
* Dead letter handlers are called when a batch fails after all
|
|
1579
|
+
* retry attempts are exhausted. Use this for alerting, logging,
|
|
1580
|
+
* or storing failed events for later reprocessing.
|
|
1581
|
+
*
|
|
1582
|
+
* @param handler - Callback to invoke for failed events
|
|
1583
|
+
*
|
|
1584
|
+
* @example
|
|
1585
|
+
* ```typescript
|
|
1586
|
+
* pipeline.onDeadLetter((events, error) => {
|
|
1587
|
+
* console.error(`Failed to process ${events.length} events:`, error)
|
|
1588
|
+
* // Store in dead letter queue for later retry
|
|
1589
|
+
* await dlq.put(events)
|
|
1590
|
+
* })
|
|
1591
|
+
* ```
|
|
1592
|
+
*/
|
|
646
1593
|
onDeadLetter(handler) {
|
|
647
1594
|
this.deadLetterHandlers.push(handler);
|
|
648
1595
|
}
|
|
@@ -650,6 +1597,10 @@ export class CDCPipeline {
|
|
|
650
1597
|
// ============================================================================
|
|
651
1598
|
// Utility Functions
|
|
652
1599
|
// ============================================================================
|
|
1600
|
+
/**
|
|
1601
|
+
* Valid CDC event types for validation.
|
|
1602
|
+
* @internal
|
|
1603
|
+
*/
|
|
653
1604
|
const VALID_EVENT_TYPES = [
|
|
654
1605
|
'OBJECT_CREATED',
|
|
655
1606
|
'OBJECT_DELETED',
|
|
@@ -663,7 +1614,36 @@ const VALID_EVENT_TYPES = [
|
|
|
663
1614
|
'MERGE_COMPLETED'
|
|
664
1615
|
];
|
|
665
1616
|
/**
|
|
666
|
-
*
|
|
1617
|
+
* Creates a new CDC event.
|
|
1618
|
+
*
|
|
1619
|
+
* @description
|
|
1620
|
+
* Factory function to create a properly structured CDC event with
|
|
1621
|
+
* automatically generated ID and timestamp.
|
|
1622
|
+
*
|
|
1623
|
+
* @param type - The event type
|
|
1624
|
+
* @param source - The event source
|
|
1625
|
+
* @param payload - Event payload data
|
|
1626
|
+
* @param options - Optional configuration
|
|
1627
|
+
* @param options.sequence - Custom sequence number (default: 0)
|
|
1628
|
+
* @returns A new CDCEvent
|
|
1629
|
+
*
|
|
1630
|
+
* @example
|
|
1631
|
+
* ```typescript
|
|
1632
|
+
* const event = createCDCEvent('COMMIT_CREATED', 'push', {
|
|
1633
|
+
* operation: 'commit-create',
|
|
1634
|
+
* sha: 'abc123...',
|
|
1635
|
+
* treeSha: 'def456...',
|
|
1636
|
+
* parentShas: ['parent1...']
|
|
1637
|
+
* })
|
|
1638
|
+
*
|
|
1639
|
+
* // With sequence number
|
|
1640
|
+
* const sequencedEvent = createCDCEvent('REF_UPDATED', 'push', {
|
|
1641
|
+
* operation: 'ref-update',
|
|
1642
|
+
* refName: 'refs/heads/main',
|
|
1643
|
+
* oldSha: 'old...',
|
|
1644
|
+
* newSha: 'new...'
|
|
1645
|
+
* }, { sequence: 42 })
|
|
1646
|
+
* ```
|
|
667
1647
|
*/
|
|
668
1648
|
export function createCDCEvent(type, source, payload, options) {
|
|
669
1649
|
return {
|
|
@@ -677,7 +1657,22 @@ export function createCDCEvent(type, source, payload, options) {
|
|
|
677
1657
|
};
|
|
678
1658
|
}
|
|
679
1659
|
/**
|
|
680
|
-
*
|
|
1660
|
+
* Serializes a CDC event to bytes.
|
|
1661
|
+
*
|
|
1662
|
+
* @description
|
|
1663
|
+
* Converts a CDCEvent to a JSON-encoded Uint8Array for storage or
|
|
1664
|
+
* transmission. Handles Uint8Array payload data by converting to arrays.
|
|
1665
|
+
*
|
|
1666
|
+
* @param event - The CDC event to serialize
|
|
1667
|
+
* @returns The serialized event as a Uint8Array
|
|
1668
|
+
*
|
|
1669
|
+
* @example
|
|
1670
|
+
* ```typescript
|
|
1671
|
+
* const bytes = serializeEvent(event)
|
|
1672
|
+
* await r2.put(`events/${event.id}`, bytes)
|
|
1673
|
+
* ```
|
|
1674
|
+
*
|
|
1675
|
+
* @see {@link deserializeEvent} - Reverse operation
|
|
681
1676
|
*/
|
|
682
1677
|
export function serializeEvent(event) {
|
|
683
1678
|
// Create a serializable copy (Uint8Array is not JSON-serializable)
|
|
@@ -692,7 +1687,24 @@ export function serializeEvent(event) {
|
|
|
692
1687
|
return new TextEncoder().encode(json);
|
|
693
1688
|
}
|
|
694
1689
|
/**
|
|
695
|
-
*
|
|
1690
|
+
* Deserializes bytes to a CDC event.
|
|
1691
|
+
*
|
|
1692
|
+
* @description
|
|
1693
|
+
* Reconstructs a CDCEvent from JSON-encoded bytes. Handles Uint8Array
|
|
1694
|
+
* restoration for payload data that was converted to arrays during
|
|
1695
|
+
* serialization.
|
|
1696
|
+
*
|
|
1697
|
+
* @param bytes - The serialized event bytes
|
|
1698
|
+
* @returns The deserialized CDCEvent
|
|
1699
|
+
*
|
|
1700
|
+
* @example
|
|
1701
|
+
* ```typescript
|
|
1702
|
+
* const bytes = await r2.get(`events/${eventId}`)
|
|
1703
|
+
* const event = deserializeEvent(bytes)
|
|
1704
|
+
* console.log(`Event type: ${event.type}`)
|
|
1705
|
+
* ```
|
|
1706
|
+
*
|
|
1707
|
+
* @see {@link serializeEvent} - Reverse operation
|
|
696
1708
|
*/
|
|
697
1709
|
export function deserializeEvent(bytes) {
|
|
698
1710
|
const json = new TextDecoder().decode(bytes);
|
|
@@ -704,7 +1716,35 @@ export function deserializeEvent(bytes) {
|
|
|
704
1716
|
return parsed;
|
|
705
1717
|
}
|
|
706
1718
|
/**
|
|
707
|
-
*
|
|
1719
|
+
* Validates a CDC event.
|
|
1720
|
+
*
|
|
1721
|
+
* @description
|
|
1722
|
+
* Checks that an event has all required fields and valid values.
|
|
1723
|
+
* Throws a CDCError if validation fails.
|
|
1724
|
+
*
|
|
1725
|
+
* **Validation Rules:**
|
|
1726
|
+
* - Event must not be null/undefined
|
|
1727
|
+
* - Event ID must be a non-empty string
|
|
1728
|
+
* - Event type must be a valid CDCEventType
|
|
1729
|
+
* - Timestamp must be a non-negative number
|
|
1730
|
+
* - Sequence must be a non-negative number
|
|
1731
|
+
*
|
|
1732
|
+
* @param event - The CDC event to validate
|
|
1733
|
+
* @returns The validated event (for chaining)
|
|
1734
|
+
*
|
|
1735
|
+
* @throws {CDCError} VALIDATION_ERROR - If validation fails
|
|
1736
|
+
*
|
|
1737
|
+
* @example
|
|
1738
|
+
* ```typescript
|
|
1739
|
+
* try {
|
|
1740
|
+
* validateCDCEvent(event)
|
|
1741
|
+
* // Event is valid
|
|
1742
|
+
* } catch (error) {
|
|
1743
|
+
* if (error instanceof CDCError) {
|
|
1744
|
+
* console.log(`Invalid: ${error.message}`)
|
|
1745
|
+
* }
|
|
1746
|
+
* }
|
|
1747
|
+
* ```
|
|
708
1748
|
*/
|
|
709
1749
|
export function validateCDCEvent(event) {
|
|
710
1750
|
if (!event) {
|
|
@@ -727,9 +1767,37 @@ export function validateCDCEvent(event) {
|
|
|
727
1767
|
// ============================================================================
|
|
728
1768
|
// Pipeline Operations
|
|
729
1769
|
// ============================================================================
|
|
1770
|
+
/**
|
|
1771
|
+
* Registry of active pipelines by ID.
|
|
1772
|
+
* @internal
|
|
1773
|
+
*/
|
|
730
1774
|
const activePipelines = new Map();
|
|
731
1775
|
/**
|
|
732
|
-
*
|
|
1776
|
+
* Starts a new pipeline with the given configuration.
|
|
1777
|
+
*
|
|
1778
|
+
* @description
|
|
1779
|
+
* Creates and starts a new CDCPipeline, registering it by ID for
|
|
1780
|
+
* later access. If a pipeline with the same ID already exists,
|
|
1781
|
+
* it will be replaced (the old pipeline is not automatically stopped).
|
|
1782
|
+
*
|
|
1783
|
+
* @param id - Unique identifier for the pipeline
|
|
1784
|
+
* @param config - Pipeline configuration
|
|
1785
|
+
* @returns The started pipeline instance
|
|
1786
|
+
*
|
|
1787
|
+
* @example
|
|
1788
|
+
* ```typescript
|
|
1789
|
+
* const pipeline = startPipeline('main', {
|
|
1790
|
+
* batchSize: 100,
|
|
1791
|
+
* flushIntervalMs: 5000,
|
|
1792
|
+
* maxRetries: 3,
|
|
1793
|
+
* parquetCompression: 'snappy',
|
|
1794
|
+
* outputPath: '/analytics',
|
|
1795
|
+
* schemaVersion: 1
|
|
1796
|
+
* })
|
|
1797
|
+
*
|
|
1798
|
+
* // Register handlers
|
|
1799
|
+
* pipeline.onOutput((output) => console.log(`Batch: ${output.batchId}`))
|
|
1800
|
+
* ```
|
|
733
1801
|
*/
|
|
734
1802
|
export function startPipeline(id, config) {
|
|
735
1803
|
const pipeline = new CDCPipeline(config);
|
|
@@ -738,7 +1806,20 @@ export function startPipeline(id, config) {
|
|
|
738
1806
|
return pipeline;
|
|
739
1807
|
}
|
|
740
1808
|
/**
|
|
741
|
-
*
|
|
1809
|
+
* Stops a pipeline by ID.
|
|
1810
|
+
*
|
|
1811
|
+
* @description
|
|
1812
|
+
* Stops the pipeline identified by the given ID, flushing any pending
|
|
1813
|
+
* events and removing it from the registry.
|
|
1814
|
+
*
|
|
1815
|
+
* @param id - Pipeline identifier
|
|
1816
|
+
* @returns Promise resolving to stop result (0 if pipeline not found)
|
|
1817
|
+
*
|
|
1818
|
+
* @example
|
|
1819
|
+
* ```typescript
|
|
1820
|
+
* const result = await stopPipeline('main')
|
|
1821
|
+
* console.log(`Flushed ${result.flushedCount} events on shutdown`)
|
|
1822
|
+
* ```
|
|
742
1823
|
*/
|
|
743
1824
|
export async function stopPipeline(id) {
|
|
744
1825
|
const pipeline = activePipelines.get(id);
|
|
@@ -750,7 +1831,19 @@ export async function stopPipeline(id) {
|
|
|
750
1831
|
return result;
|
|
751
1832
|
}
|
|
752
1833
|
/**
|
|
753
|
-
*
|
|
1834
|
+
* Flushes a pipeline by ID.
|
|
1835
|
+
*
|
|
1836
|
+
* @description
|
|
1837
|
+
* Forces an immediate flush of all pending events in the pipeline.
|
|
1838
|
+
* No-op if pipeline not found.
|
|
1839
|
+
*
|
|
1840
|
+
* @param id - Pipeline identifier
|
|
1841
|
+
*
|
|
1842
|
+
* @example
|
|
1843
|
+
* ```typescript
|
|
1844
|
+
* await flushPipeline('main')
|
|
1845
|
+
* console.log('All pending events flushed')
|
|
1846
|
+
* ```
|
|
754
1847
|
*/
|
|
755
1848
|
export async function flushPipeline(id) {
|
|
756
1849
|
const pipeline = activePipelines.get(id);
|
|
@@ -759,7 +1852,23 @@ export async function flushPipeline(id) {
|
|
|
759
1852
|
}
|
|
760
1853
|
}
|
|
761
1854
|
/**
|
|
762
|
-
*
|
|
1855
|
+
* Gets metrics for a pipeline by ID.
|
|
1856
|
+
*
|
|
1857
|
+
* @description
|
|
1858
|
+
* Returns a copy of the current metrics for the specified pipeline.
|
|
1859
|
+
* Returns null if the pipeline is not found.
|
|
1860
|
+
*
|
|
1861
|
+
* @param id - Pipeline identifier
|
|
1862
|
+
* @returns Pipeline metrics or null if not found
|
|
1863
|
+
*
|
|
1864
|
+
* @example
|
|
1865
|
+
* ```typescript
|
|
1866
|
+
* const metrics = getPipelineMetrics('main')
|
|
1867
|
+
* if (metrics) {
|
|
1868
|
+
* console.log(`Events processed: ${metrics.eventsProcessed}`)
|
|
1869
|
+
* console.log(`Errors: ${metrics.errors}`)
|
|
1870
|
+
* }
|
|
1871
|
+
* ```
|
|
763
1872
|
*/
|
|
764
1873
|
export function getPipelineMetrics(id) {
|
|
765
1874
|
const pipeline = activePipelines.get(id);
|