gitx.do 0.0.2 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/commands/blame.d.ts +259 -0
- package/dist/cli/commands/blame.d.ts.map +1 -0
- package/dist/cli/commands/blame.js +609 -0
- package/dist/cli/commands/blame.js.map +1 -0
- package/dist/cli/commands/branch.d.ts +249 -0
- package/dist/cli/commands/branch.d.ts.map +1 -0
- package/dist/cli/commands/branch.js +693 -0
- package/dist/cli/commands/branch.js.map +1 -0
- package/dist/cli/commands/commit.d.ts +182 -0
- package/dist/cli/commands/commit.d.ts.map +1 -0
- package/dist/cli/commands/commit.js +437 -0
- package/dist/cli/commands/commit.js.map +1 -0
- package/dist/cli/commands/diff.d.ts +464 -0
- package/dist/cli/commands/diff.d.ts.map +1 -0
- package/dist/cli/commands/diff.js +958 -0
- package/dist/cli/commands/diff.js.map +1 -0
- package/dist/cli/commands/log.d.ts +239 -0
- package/dist/cli/commands/log.d.ts.map +1 -0
- package/dist/cli/commands/log.js +535 -0
- package/dist/cli/commands/log.js.map +1 -0
- package/dist/cli/commands/review.d.ts +457 -0
- package/dist/cli/commands/review.d.ts.map +1 -0
- package/dist/cli/commands/review.js +533 -0
- package/dist/cli/commands/review.js.map +1 -0
- package/dist/cli/commands/status.d.ts +269 -0
- package/dist/cli/commands/status.d.ts.map +1 -0
- package/dist/cli/commands/status.js +493 -0
- package/dist/cli/commands/status.js.map +1 -0
- package/dist/cli/commands/web.d.ts +199 -0
- package/dist/cli/commands/web.d.ts.map +1 -0
- package/dist/cli/commands/web.js +696 -0
- package/dist/cli/commands/web.js.map +1 -0
- package/dist/cli/fs-adapter.d.ts +656 -0
- package/dist/cli/fs-adapter.d.ts.map +1 -0
- package/dist/cli/fs-adapter.js +1179 -0
- package/dist/cli/fs-adapter.js.map +1 -0
- package/dist/cli/index.d.ts +387 -0
- package/dist/cli/index.d.ts.map +1 -0
- package/dist/cli/index.js +523 -0
- package/dist/cli/index.js.map +1 -0
- package/dist/cli/ui/components/DiffView.d.ts +7 -0
- package/dist/cli/ui/components/DiffView.d.ts.map +1 -0
- package/dist/cli/ui/components/DiffView.js +11 -0
- package/dist/cli/ui/components/DiffView.js.map +1 -0
- package/dist/cli/ui/components/ErrorDisplay.d.ts +6 -0
- package/dist/cli/ui/components/ErrorDisplay.d.ts.map +1 -0
- package/dist/cli/ui/components/ErrorDisplay.js +11 -0
- package/dist/cli/ui/components/ErrorDisplay.js.map +1 -0
- package/dist/cli/ui/components/FuzzySearch.d.ts +9 -0
- package/dist/cli/ui/components/FuzzySearch.d.ts.map +1 -0
- package/dist/cli/ui/components/FuzzySearch.js +12 -0
- package/dist/cli/ui/components/FuzzySearch.js.map +1 -0
- package/dist/cli/ui/components/LoadingSpinner.d.ts +6 -0
- package/dist/cli/ui/components/LoadingSpinner.d.ts.map +1 -0
- package/dist/cli/ui/components/LoadingSpinner.js +10 -0
- package/dist/cli/ui/components/LoadingSpinner.js.map +1 -0
- package/dist/cli/ui/components/NavigationList.d.ts +9 -0
- package/dist/cli/ui/components/NavigationList.d.ts.map +1 -0
- package/dist/cli/ui/components/NavigationList.js +11 -0
- package/dist/cli/ui/components/NavigationList.js.map +1 -0
- package/dist/cli/ui/components/ScrollableContent.d.ts +8 -0
- package/dist/cli/ui/components/ScrollableContent.d.ts.map +1 -0
- package/dist/cli/ui/components/ScrollableContent.js +11 -0
- package/dist/cli/ui/components/ScrollableContent.js.map +1 -0
- package/dist/cli/ui/components/index.d.ts +7 -0
- package/dist/cli/ui/components/index.d.ts.map +1 -0
- package/dist/cli/ui/components/index.js +9 -0
- package/dist/cli/ui/components/index.js.map +1 -0
- package/dist/cli/ui/terminal-ui.d.ts +52 -0
- package/dist/cli/ui/terminal-ui.d.ts.map +1 -0
- package/dist/cli/ui/terminal-ui.js +121 -0
- package/dist/cli/ui/terminal-ui.js.map +1 -0
- package/dist/durable-object/object-store.d.ts +401 -23
- package/dist/durable-object/object-store.d.ts.map +1 -1
- package/dist/durable-object/object-store.js +414 -25
- package/dist/durable-object/object-store.js.map +1 -1
- package/dist/durable-object/schema.d.ts +188 -0
- package/dist/durable-object/schema.d.ts.map +1 -1
- package/dist/durable-object/schema.js +160 -0
- package/dist/durable-object/schema.js.map +1 -1
- package/dist/durable-object/wal.d.ts +336 -31
- package/dist/durable-object/wal.d.ts.map +1 -1
- package/dist/durable-object/wal.js +272 -27
- package/dist/durable-object/wal.js.map +1 -1
- package/dist/index.d.ts +379 -3
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +379 -7
- package/dist/index.js.map +1 -1
- package/dist/mcp/adapter.d.ts +579 -38
- package/dist/mcp/adapter.d.ts.map +1 -1
- package/dist/mcp/adapter.js +426 -33
- package/dist/mcp/adapter.js.map +1 -1
- package/dist/mcp/sandbox.d.ts +532 -29
- package/dist/mcp/sandbox.d.ts.map +1 -1
- package/dist/mcp/sandbox.js +389 -22
- package/dist/mcp/sandbox.js.map +1 -1
- package/dist/mcp/sdk-adapter.d.ts +478 -56
- package/dist/mcp/sdk-adapter.d.ts.map +1 -1
- package/dist/mcp/sdk-adapter.js +346 -44
- package/dist/mcp/sdk-adapter.js.map +1 -1
- package/dist/mcp/tools.d.ts +445 -30
- package/dist/mcp/tools.d.ts.map +1 -1
- package/dist/mcp/tools.js +363 -33
- package/dist/mcp/tools.js.map +1 -1
- package/dist/ops/blame.d.ts +424 -21
- package/dist/ops/blame.d.ts.map +1 -1
- package/dist/ops/blame.js +303 -20
- package/dist/ops/blame.js.map +1 -1
- package/dist/ops/branch.d.ts +583 -32
- package/dist/ops/branch.d.ts.map +1 -1
- package/dist/ops/branch.js +365 -23
- package/dist/ops/branch.js.map +1 -1
- package/dist/ops/commit-traversal.d.ts +164 -24
- package/dist/ops/commit-traversal.d.ts.map +1 -1
- package/dist/ops/commit-traversal.js +68 -2
- package/dist/ops/commit-traversal.js.map +1 -1
- package/dist/ops/commit.d.ts +387 -53
- package/dist/ops/commit.d.ts.map +1 -1
- package/dist/ops/commit.js +249 -29
- package/dist/ops/commit.js.map +1 -1
- package/dist/ops/merge-base.d.ts +195 -21
- package/dist/ops/merge-base.d.ts.map +1 -1
- package/dist/ops/merge-base.js +122 -12
- package/dist/ops/merge-base.js.map +1 -1
- package/dist/ops/merge.d.ts +600 -130
- package/dist/ops/merge.d.ts.map +1 -1
- package/dist/ops/merge.js +408 -60
- package/dist/ops/merge.js.map +1 -1
- package/dist/ops/tag.d.ts +67 -2
- package/dist/ops/tag.d.ts.map +1 -1
- package/dist/ops/tag.js +42 -1
- package/dist/ops/tag.js.map +1 -1
- package/dist/ops/tree-builder.d.ts +102 -6
- package/dist/ops/tree-builder.d.ts.map +1 -1
- package/dist/ops/tree-builder.js +30 -5
- package/dist/ops/tree-builder.js.map +1 -1
- package/dist/ops/tree-diff.d.ts +50 -2
- package/dist/ops/tree-diff.d.ts.map +1 -1
- package/dist/ops/tree-diff.js +50 -2
- package/dist/ops/tree-diff.js.map +1 -1
- package/dist/pack/delta.d.ts +211 -39
- package/dist/pack/delta.d.ts.map +1 -1
- package/dist/pack/delta.js +232 -46
- package/dist/pack/delta.js.map +1 -1
- package/dist/pack/format.d.ts +390 -28
- package/dist/pack/format.d.ts.map +1 -1
- package/dist/pack/format.js +344 -33
- package/dist/pack/format.js.map +1 -1
- package/dist/pack/full-generation.d.ts +313 -28
- package/dist/pack/full-generation.d.ts.map +1 -1
- package/dist/pack/full-generation.js +238 -19
- package/dist/pack/full-generation.js.map +1 -1
- package/dist/pack/generation.d.ts +346 -23
- package/dist/pack/generation.d.ts.map +1 -1
- package/dist/pack/generation.js +269 -21
- package/dist/pack/generation.js.map +1 -1
- package/dist/pack/index.d.ts +407 -86
- package/dist/pack/index.d.ts.map +1 -1
- package/dist/pack/index.js +351 -70
- package/dist/pack/index.js.map +1 -1
- package/dist/refs/branch.d.ts +517 -71
- package/dist/refs/branch.d.ts.map +1 -1
- package/dist/refs/branch.js +410 -26
- package/dist/refs/branch.js.map +1 -1
- package/dist/refs/storage.d.ts +610 -57
- package/dist/refs/storage.d.ts.map +1 -1
- package/dist/refs/storage.js +481 -29
- package/dist/refs/storage.js.map +1 -1
- package/dist/refs/tag.d.ts +677 -67
- package/dist/refs/tag.d.ts.map +1 -1
- package/dist/refs/tag.js +497 -30
- package/dist/refs/tag.js.map +1 -1
- package/dist/storage/lru-cache.d.ts +556 -53
- package/dist/storage/lru-cache.d.ts.map +1 -1
- package/dist/storage/lru-cache.js +439 -36
- package/dist/storage/lru-cache.js.map +1 -1
- package/dist/storage/object-index.d.ts +483 -38
- package/dist/storage/object-index.d.ts.map +1 -1
- package/dist/storage/object-index.js +388 -22
- package/dist/storage/object-index.js.map +1 -1
- package/dist/storage/r2-pack.d.ts +957 -94
- package/dist/storage/r2-pack.d.ts.map +1 -1
- package/dist/storage/r2-pack.js +756 -48
- package/dist/storage/r2-pack.js.map +1 -1
- package/dist/tiered/cdc-pipeline.d.ts +1610 -38
- package/dist/tiered/cdc-pipeline.d.ts.map +1 -1
- package/dist/tiered/cdc-pipeline.js +1131 -22
- package/dist/tiered/cdc-pipeline.js.map +1 -1
- package/dist/tiered/migration.d.ts +903 -41
- package/dist/tiered/migration.d.ts.map +1 -1
- package/dist/tiered/migration.js +646 -24
- package/dist/tiered/migration.js.map +1 -1
- package/dist/tiered/parquet-writer.d.ts +944 -47
- package/dist/tiered/parquet-writer.d.ts.map +1 -1
- package/dist/tiered/parquet-writer.js +667 -39
- package/dist/tiered/parquet-writer.js.map +1 -1
- package/dist/tiered/read-path.d.ts +728 -34
- package/dist/tiered/read-path.d.ts.map +1 -1
- package/dist/tiered/read-path.js +310 -27
- package/dist/tiered/read-path.js.map +1 -1
- package/dist/types/objects.d.ts +457 -0
- package/dist/types/objects.d.ts.map +1 -1
- package/dist/types/objects.js +305 -4
- package/dist/types/objects.js.map +1 -1
- package/dist/types/storage.d.ts +407 -35
- package/dist/types/storage.d.ts.map +1 -1
- package/dist/types/storage.js +27 -3
- package/dist/types/storage.js.map +1 -1
- package/dist/utils/hash.d.ts +133 -12
- package/dist/utils/hash.d.ts.map +1 -1
- package/dist/utils/hash.js +133 -12
- package/dist/utils/hash.js.map +1 -1
- package/dist/utils/sha1.d.ts +102 -9
- package/dist/utils/sha1.d.ts.map +1 -1
- package/dist/utils/sha1.js +114 -11
- package/dist/utils/sha1.js.map +1 -1
- package/dist/wire/capabilities.d.ts +896 -88
- package/dist/wire/capabilities.d.ts.map +1 -1
- package/dist/wire/capabilities.js +566 -62
- package/dist/wire/capabilities.js.map +1 -1
- package/dist/wire/pkt-line.d.ts +293 -15
- package/dist/wire/pkt-line.d.ts.map +1 -1
- package/dist/wire/pkt-line.js +251 -15
- package/dist/wire/pkt-line.js.map +1 -1
- package/dist/wire/receive-pack.d.ts +814 -64
- package/dist/wire/receive-pack.d.ts.map +1 -1
- package/dist/wire/receive-pack.js +542 -41
- package/dist/wire/receive-pack.js.map +1 -1
- package/dist/wire/smart-http.d.ts +575 -97
- package/dist/wire/smart-http.d.ts.map +1 -1
- package/dist/wire/smart-http.js +337 -46
- package/dist/wire/smart-http.js.map +1 -1
- package/dist/wire/upload-pack.d.ts +492 -98
- package/dist/wire/upload-pack.d.ts.map +1 -1
- package/dist/wire/upload-pack.js +347 -59
- package/dist/wire/upload-pack.js.map +1 -1
- package/package.json +1 -1
|
@@ -1,315 +1,1887 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* CDC (Change Data Capture) Pipeline for Git Operations
|
|
2
|
+
* @fileoverview CDC (Change Data Capture) Pipeline for Git Operations
|
|
3
3
|
*
|
|
4
|
-
*
|
|
5
|
-
*
|
|
6
|
-
* -
|
|
7
|
-
* - Batching with size and time-based flushing
|
|
8
|
-
* - Error handling with retry policies
|
|
4
|
+
* @description
|
|
5
|
+
* This module provides a comprehensive Change Data Capture system for Git operations,
|
|
6
|
+
* enabling real-time event streaming, transformation, and analytics for Git repository events.
|
|
9
7
|
*
|
|
10
|
-
*
|
|
8
|
+
* ## Key Features
|
|
9
|
+
*
|
|
10
|
+
* - **Event Capture**: Captures git operations (push, fetch, commits, branches, tags, merges)
|
|
11
|
+
* - **Parquet Transformation**: Converts events to columnar Parquet format for analytics
|
|
12
|
+
* - **Batching**: Efficient event batching with configurable size and time-based flushing
|
|
13
|
+
* - **Retry Policies**: Configurable exponential backoff with jitter for resilient processing
|
|
14
|
+
* - **Dead Letter Queue**: Handles failed events for later reprocessing
|
|
15
|
+
* - **Metrics**: Built-in tracking for events processed, batches, errors, and latency
|
|
16
|
+
*
|
|
17
|
+
* ## Architecture
|
|
18
|
+
*
|
|
19
|
+
* The pipeline consists of several components:
|
|
20
|
+
* 1. **CDCEventCapture**: Captures git operations and converts them to CDCEvents
|
|
21
|
+
* 2. **CDCBatcher**: Batches events for efficient processing
|
|
22
|
+
* 3. **ParquetTransformer**: Transforms events to Parquet format
|
|
23
|
+
* 4. **CDCPipeline**: Orchestrates the entire flow with error handling
|
|
24
|
+
*
|
|
25
|
+
* ## Event Flow
|
|
26
|
+
*
|
|
27
|
+
* ```
|
|
28
|
+
* Git Operation -> CDCEventCapture -> CDCBatcher -> ParquetTransformer -> Output
|
|
29
|
+
* |
|
|
30
|
+
* v
|
|
31
|
+
* (On failure) Dead Letter Queue
|
|
32
|
+
* ```
|
|
33
|
+
*
|
|
34
|
+
* @module tiered/cdc-pipeline
|
|
35
|
+
*
|
|
36
|
+
* @example
|
|
37
|
+
* ```typescript
|
|
38
|
+
* // Create and start a pipeline
|
|
39
|
+
* const pipeline = new CDCPipeline({
|
|
40
|
+
* batchSize: 100,
|
|
41
|
+
* flushIntervalMs: 5000,
|
|
42
|
+
* maxRetries: 3,
|
|
43
|
+
* parquetCompression: 'snappy',
|
|
44
|
+
* outputPath: '/analytics',
|
|
45
|
+
* schemaVersion: 1
|
|
46
|
+
* })
|
|
47
|
+
*
|
|
48
|
+
* await pipeline.start()
|
|
49
|
+
*
|
|
50
|
+
* // Process events
|
|
51
|
+
* pipeline.onOutput((output) => {
|
|
52
|
+
* console.log(`Generated batch: ${output.batchId}`)
|
|
53
|
+
* console.log(`Events: ${output.events.length}`)
|
|
54
|
+
* console.log(`Parquet size: ${output.parquetBuffer.length} bytes`)
|
|
55
|
+
* })
|
|
56
|
+
*
|
|
57
|
+
* pipeline.onDeadLetter((events, error) => {
|
|
58
|
+
* console.error(`Failed events: ${events.length}`, error)
|
|
59
|
+
* })
|
|
60
|
+
*
|
|
61
|
+
* // Create and process an event
|
|
62
|
+
* const event = createCDCEvent('COMMIT_CREATED', 'push', {
|
|
63
|
+
* operation: 'commit-create',
|
|
64
|
+
* sha: 'abc123...',
|
|
65
|
+
* treeSha: 'def456...',
|
|
66
|
+
* parentShas: ['parent1...']
|
|
67
|
+
* })
|
|
68
|
+
*
|
|
69
|
+
* await pipeline.process(event)
|
|
70
|
+
*
|
|
71
|
+
* // Get metrics
|
|
72
|
+
* const metrics = pipeline.getMetrics()
|
|
73
|
+
* console.log(`Processed: ${metrics.eventsProcessed}`)
|
|
74
|
+
* console.log(`Batches: ${metrics.batchesGenerated}`)
|
|
75
|
+
*
|
|
76
|
+
* // Stop the pipeline
|
|
77
|
+
* await pipeline.stop()
|
|
78
|
+
* ```
|
|
79
|
+
*
|
|
80
|
+
* @see {@link CDCPipeline} - Main pipeline orchestration class
|
|
81
|
+
* @see {@link CDCEventCapture} - Event capture from git operations
|
|
82
|
+
* @see {@link ParquetTransformer} - Parquet format transformation
|
|
11
83
|
*/
|
|
12
84
|
/**
|
|
13
|
-
* CDC Event Types representing different git operations
|
|
85
|
+
* CDC Event Types representing different git operations.
|
|
86
|
+
*
|
|
87
|
+
* @description
|
|
88
|
+
* Enumeration of all supported Git operation types that can be captured
|
|
89
|
+
* by the CDC system. Each type corresponds to a specific Git operation.
|
|
90
|
+
*
|
|
91
|
+
* @example
|
|
92
|
+
* ```typescript
|
|
93
|
+
* const eventType: CDCEventType = 'COMMIT_CREATED'
|
|
94
|
+
* ```
|
|
14
95
|
*/
|
|
15
96
|
export type CDCEventType = 'OBJECT_CREATED' | 'OBJECT_DELETED' | 'REF_UPDATED' | 'PACK_RECEIVED' | 'COMMIT_CREATED' | 'TREE_MODIFIED' | 'BRANCH_CREATED' | 'BRANCH_DELETED' | 'TAG_CREATED' | 'MERGE_COMPLETED';
|
|
16
97
|
/**
|
|
17
|
-
* CDC Event Source indicating origin of the event
|
|
98
|
+
* CDC Event Source indicating the origin of the event.
|
|
99
|
+
*
|
|
100
|
+
* @description
|
|
101
|
+
* Identifies the source system or operation that generated the CDC event.
|
|
102
|
+
* This helps with event filtering, routing, and analytics.
|
|
103
|
+
*
|
|
104
|
+
* - `push`: Events from git push operations
|
|
105
|
+
* - `fetch`: Events from git fetch operations
|
|
106
|
+
* - `internal`: Events from internal system operations
|
|
107
|
+
* - `replication`: Events from repository replication
|
|
108
|
+
* - `gc`: Events from garbage collection
|
|
109
|
+
*
|
|
110
|
+
* @example
|
|
111
|
+
* ```typescript
|
|
112
|
+
* const source: CDCEventSource = 'push'
|
|
113
|
+
* ```
|
|
18
114
|
*/
|
|
19
115
|
export type CDCEventSource = 'push' | 'fetch' | 'internal' | 'replication' | 'gc';
|
|
20
116
|
/**
|
|
21
|
-
* Payload for CDC events
|
|
117
|
+
* Payload data for CDC events.
|
|
118
|
+
*
|
|
119
|
+
* @description
|
|
120
|
+
* Contains the detailed data associated with a CDC event. Different event
|
|
121
|
+
* types use different subsets of these fields.
|
|
122
|
+
*
|
|
123
|
+
* @example
|
|
124
|
+
* ```typescript
|
|
125
|
+
* // Commit created payload
|
|
126
|
+
* const payload: CDCEventPayload = {
|
|
127
|
+
* operation: 'commit-create',
|
|
128
|
+
* sha: 'abc123...',
|
|
129
|
+
* treeSha: 'def456...',
|
|
130
|
+
* parentShas: ['parent1...']
|
|
131
|
+
* }
|
|
132
|
+
*
|
|
133
|
+
* // Ref updated payload
|
|
134
|
+
* const refPayload: CDCEventPayload = {
|
|
135
|
+
* operation: 'ref-update',
|
|
136
|
+
* refName: 'refs/heads/main',
|
|
137
|
+
* oldSha: 'old123...',
|
|
138
|
+
* newSha: 'new456...'
|
|
139
|
+
* }
|
|
140
|
+
* ```
|
|
22
141
|
*/
|
|
23
142
|
export interface CDCEventPayload {
|
|
143
|
+
/**
|
|
144
|
+
* The type of operation performed.
|
|
145
|
+
*
|
|
146
|
+
* @example 'commit-create', 'ref-update', 'branch-create'
|
|
147
|
+
*/
|
|
24
148
|
operation: string;
|
|
149
|
+
/**
|
|
150
|
+
* SHA-1 hash of the affected object.
|
|
151
|
+
* Present for object-related events.
|
|
152
|
+
*/
|
|
25
153
|
sha?: string;
|
|
154
|
+
/**
|
|
155
|
+
* Timestamp of the operation in milliseconds since epoch.
|
|
156
|
+
*/
|
|
26
157
|
timestamp?: number;
|
|
158
|
+
/**
|
|
159
|
+
* Raw binary data associated with the event.
|
|
160
|
+
* Used for object creation and pack reception events.
|
|
161
|
+
*/
|
|
27
162
|
data?: Uint8Array;
|
|
163
|
+
/**
|
|
164
|
+
* Additional metadata key-value pairs.
|
|
165
|
+
* Can include object type, size, etc.
|
|
166
|
+
*/
|
|
28
167
|
metadata?: Record<string, unknown>;
|
|
168
|
+
/**
|
|
169
|
+
* Git reference name (e.g., 'refs/heads/main').
|
|
170
|
+
* Present for ref update events.
|
|
171
|
+
*/
|
|
29
172
|
refName?: string;
|
|
173
|
+
/**
|
|
174
|
+
* Previous SHA for ref update events.
|
|
175
|
+
* May be all zeros for new refs.
|
|
176
|
+
*/
|
|
30
177
|
oldSha?: string;
|
|
178
|
+
/**
|
|
179
|
+
* New SHA for ref update events.
|
|
180
|
+
* May be all zeros for deleted refs.
|
|
181
|
+
*/
|
|
31
182
|
newSha?: string;
|
|
183
|
+
/**
|
|
184
|
+
* Number of objects in a pack.
|
|
185
|
+
* Present for pack received events.
|
|
186
|
+
*/
|
|
32
187
|
objectCount?: number;
|
|
188
|
+
/**
|
|
189
|
+
* Tree SHA for commit events.
|
|
190
|
+
*/
|
|
33
191
|
treeSha?: string;
|
|
192
|
+
/**
|
|
193
|
+
* Parent commit SHAs for commit events.
|
|
194
|
+
*/
|
|
34
195
|
parentShas?: string[];
|
|
196
|
+
/**
|
|
197
|
+
* Branch name for branch-related events.
|
|
198
|
+
*/
|
|
35
199
|
branchName?: string;
|
|
200
|
+
/**
|
|
201
|
+
* Tag name for tag-related events.
|
|
202
|
+
*/
|
|
36
203
|
tagName?: string;
|
|
204
|
+
/**
|
|
205
|
+
* Base commit SHA for merge events.
|
|
206
|
+
*/
|
|
37
207
|
baseSha?: string;
|
|
208
|
+
/**
|
|
209
|
+
* Head commit SHA for merge events.
|
|
210
|
+
*/
|
|
38
211
|
headSha?: string;
|
|
39
212
|
}
|
|
40
213
|
/**
|
|
41
|
-
* CDC Event structure
|
|
214
|
+
* CDC Event structure representing a single change data capture event.
|
|
215
|
+
*
|
|
216
|
+
* @description
|
|
217
|
+
* A CDCEvent captures a single git operation with all metadata needed
|
|
218
|
+
* for replication, analytics, and auditing. Events are immutable once
|
|
219
|
+
* created and ordered by their sequence number.
|
|
220
|
+
*
|
|
221
|
+
* @example
|
|
222
|
+
* ```typescript
|
|
223
|
+
* const event: CDCEvent = {
|
|
224
|
+
* id: 'evt-1234567890-abc123',
|
|
225
|
+
* type: 'COMMIT_CREATED',
|
|
226
|
+
* source: 'push',
|
|
227
|
+
* timestamp: 1703980800000,
|
|
228
|
+
* payload: {
|
|
229
|
+
* operation: 'commit-create',
|
|
230
|
+
* sha: 'abc123...',
|
|
231
|
+
* treeSha: 'def456...',
|
|
232
|
+
* parentShas: ['parent1...']
|
|
233
|
+
* },
|
|
234
|
+
* sequence: 42,
|
|
235
|
+
* version: 1
|
|
236
|
+
* }
|
|
237
|
+
* ```
|
|
42
238
|
*/
|
|
43
239
|
export interface CDCEvent {
|
|
240
|
+
/**
|
|
241
|
+
* Unique identifier for this event.
|
|
242
|
+
* Format: `evt-{timestamp}-{random}`
|
|
243
|
+
*/
|
|
44
244
|
id: string;
|
|
245
|
+
/**
|
|
246
|
+
* Type of git operation that generated this event.
|
|
247
|
+
*
|
|
248
|
+
* @see {@link CDCEventType}
|
|
249
|
+
*/
|
|
45
250
|
type: CDCEventType;
|
|
251
|
+
/**
|
|
252
|
+
* Source system or operation that generated this event.
|
|
253
|
+
*
|
|
254
|
+
* @see {@link CDCEventSource}
|
|
255
|
+
*/
|
|
46
256
|
source: CDCEventSource;
|
|
257
|
+
/**
|
|
258
|
+
* Unix timestamp in milliseconds when the event was created.
|
|
259
|
+
*/
|
|
47
260
|
timestamp: number;
|
|
261
|
+
/**
|
|
262
|
+
* Event payload containing operation-specific data.
|
|
263
|
+
*/
|
|
48
264
|
payload: CDCEventPayload;
|
|
265
|
+
/**
|
|
266
|
+
* Monotonically increasing sequence number within a capture session.
|
|
267
|
+
* Used for ordering and deduplication.
|
|
268
|
+
*/
|
|
49
269
|
sequence: number;
|
|
270
|
+
/**
|
|
271
|
+
* Schema version of the event format.
|
|
272
|
+
* Used for backward compatibility during upgrades.
|
|
273
|
+
*/
|
|
50
274
|
version: number;
|
|
51
275
|
}
|
|
52
276
|
/**
|
|
53
|
-
*
|
|
277
|
+
* Configuration for the CDC pipeline.
|
|
278
|
+
*
|
|
279
|
+
* @description
|
|
280
|
+
* Defines all configuration options for creating and running a CDC pipeline,
|
|
281
|
+
* including batching behavior, retry policy, and output format.
|
|
282
|
+
*
|
|
283
|
+
* @example
|
|
284
|
+
* ```typescript
|
|
285
|
+
* const config: CDCPipelineConfig = {
|
|
286
|
+
* batchSize: 100, // Flush every 100 events
|
|
287
|
+
* flushIntervalMs: 5000, // Or every 5 seconds
|
|
288
|
+
* maxRetries: 3, // Retry failed batches 3 times
|
|
289
|
+
* parquetCompression: 'snappy',
|
|
290
|
+
* outputPath: '/analytics/cdc',
|
|
291
|
+
* schemaVersion: 1
|
|
292
|
+
* }
|
|
293
|
+
* ```
|
|
54
294
|
*/
|
|
55
295
|
export interface CDCPipelineConfig {
|
|
296
|
+
/**
|
|
297
|
+
* Maximum number of events to batch before flushing.
|
|
298
|
+
* Lower values reduce latency, higher values improve throughput.
|
|
299
|
+
*/
|
|
56
300
|
batchSize: number;
|
|
301
|
+
/**
|
|
302
|
+
* Maximum time in milliseconds to wait before flushing a batch.
|
|
303
|
+
* Ensures events are processed even with low throughput.
|
|
304
|
+
*/
|
|
57
305
|
flushIntervalMs: number;
|
|
306
|
+
/**
|
|
307
|
+
* Maximum number of retry attempts for failed batch processing.
|
|
308
|
+
* Uses exponential backoff between attempts.
|
|
309
|
+
*/
|
|
58
310
|
maxRetries: number;
|
|
311
|
+
/**
|
|
312
|
+
* Compression algorithm for Parquet output.
|
|
313
|
+
*
|
|
314
|
+
* - `snappy`: Fast compression with moderate ratio (recommended)
|
|
315
|
+
* - `gzip`: Higher compression ratio, slower
|
|
316
|
+
* - `none`: No compression
|
|
317
|
+
*/
|
|
59
318
|
parquetCompression: 'snappy' | 'gzip' | 'none';
|
|
319
|
+
/**
|
|
320
|
+
* Base path for output files.
|
|
321
|
+
* Parquet files will be written to this directory.
|
|
322
|
+
*/
|
|
60
323
|
outputPath: string;
|
|
324
|
+
/**
|
|
325
|
+
* Schema version for event format.
|
|
326
|
+
* Used for backward compatibility during upgrades.
|
|
327
|
+
*/
|
|
61
328
|
schemaVersion: number;
|
|
62
329
|
}
|
|
63
330
|
/**
|
|
64
|
-
* Pipeline state
|
|
331
|
+
* Pipeline operational state.
|
|
332
|
+
*
|
|
333
|
+
* @description
|
|
334
|
+
* Indicates the current state of the CDC pipeline.
|
|
335
|
+
*
|
|
336
|
+
* - `stopped`: Pipeline is not running, no events are processed
|
|
337
|
+
* - `running`: Pipeline is active and processing events
|
|
338
|
+
* - `paused`: Pipeline is temporarily suspended (reserved for future use)
|
|
65
339
|
*/
|
|
66
340
|
export type CDCPipelineState = 'stopped' | 'running' | 'paused';
|
|
67
341
|
/**
|
|
68
|
-
*
|
|
342
|
+
* Configuration for event batching.
|
|
343
|
+
*
|
|
344
|
+
* @description
|
|
345
|
+
* Controls how events are grouped into batches for processing.
|
|
346
|
+
*
|
|
347
|
+
* @example
|
|
348
|
+
* ```typescript
|
|
349
|
+
* const config: BatchConfig = {
|
|
350
|
+
* batchSize: 100,
|
|
351
|
+
* flushIntervalMs: 5000
|
|
352
|
+
* }
|
|
353
|
+
* ```
|
|
69
354
|
*/
|
|
70
355
|
export interface BatchConfig {
|
|
356
|
+
/**
|
|
357
|
+
* Maximum number of events per batch.
|
|
358
|
+
*/
|
|
71
359
|
batchSize: number;
|
|
360
|
+
/**
|
|
361
|
+
* Maximum time to wait before flushing a partial batch.
|
|
362
|
+
*/
|
|
72
363
|
flushIntervalMs: number;
|
|
73
364
|
}
|
|
74
365
|
/**
|
|
75
|
-
*
|
|
366
|
+
* Result of a batch flush operation.
|
|
367
|
+
*
|
|
368
|
+
* @description
|
|
369
|
+
* Contains the events in the batch and metadata about the batch
|
|
370
|
+
* for downstream processing and monitoring.
|
|
371
|
+
*
|
|
372
|
+
* @example
|
|
373
|
+
* ```typescript
|
|
374
|
+
* batcher.onBatch((result: BatchResult) => {
|
|
375
|
+
* console.log(`Batch: ${result.eventCount} events`)
|
|
376
|
+
* console.log(`Sequences: ${result.minSequence} - ${result.maxSequence}`)
|
|
377
|
+
* console.log(`Time range: ${result.minTimestamp} - ${result.maxTimestamp}`)
|
|
378
|
+
* })
|
|
379
|
+
* ```
|
|
76
380
|
*/
|
|
77
381
|
export interface BatchResult {
|
|
382
|
+
/**
|
|
383
|
+
* Array of events in this batch.
|
|
384
|
+
*/
|
|
78
385
|
events: CDCEvent[];
|
|
386
|
+
/**
|
|
387
|
+
* Number of events in the batch.
|
|
388
|
+
*/
|
|
79
389
|
eventCount: number;
|
|
390
|
+
/**
|
|
391
|
+
* Whether the batch was processed successfully.
|
|
392
|
+
*/
|
|
80
393
|
success: boolean;
|
|
394
|
+
/**
|
|
395
|
+
* Minimum sequence number in the batch.
|
|
396
|
+
* Useful for tracking progress and resumption.
|
|
397
|
+
*/
|
|
81
398
|
minSequence?: number;
|
|
399
|
+
/**
|
|
400
|
+
* Maximum sequence number in the batch.
|
|
401
|
+
*/
|
|
82
402
|
maxSequence?: number;
|
|
403
|
+
/**
|
|
404
|
+
* Earliest event timestamp in the batch (milliseconds).
|
|
405
|
+
*/
|
|
83
406
|
minTimestamp?: number;
|
|
407
|
+
/**
|
|
408
|
+
* Latest event timestamp in the batch (milliseconds).
|
|
409
|
+
*/
|
|
84
410
|
maxTimestamp?: number;
|
|
85
411
|
}
|
|
86
412
|
/**
|
|
87
|
-
* CDC Error types
|
|
413
|
+
* CDC Error types for categorizing failures.
|
|
414
|
+
*
|
|
415
|
+
* @description
|
|
416
|
+
* Error codes that help identify the type of failure for
|
|
417
|
+
* appropriate error handling and recovery strategies.
|
|
418
|
+
*
|
|
419
|
+
* - `VALIDATION_ERROR`: Event failed validation checks
|
|
420
|
+
* - `PROCESSING_ERROR`: Error during event processing
|
|
421
|
+
* - `SERIALIZATION_ERROR`: Error serializing/deserializing events
|
|
422
|
+
* - `STORAGE_ERROR`: Error writing to storage
|
|
423
|
+
* - `TIMEOUT_ERROR`: Operation timed out
|
|
424
|
+
* - `BUFFER_OVERFLOW_ERROR`: Event buffer exceeded capacity
|
|
425
|
+
* - `UNKNOWN_ERROR`: Unclassified error
|
|
88
426
|
*/
|
|
89
427
|
export type CDCErrorType = 'VALIDATION_ERROR' | 'PROCESSING_ERROR' | 'SERIALIZATION_ERROR' | 'STORAGE_ERROR' | 'TIMEOUT_ERROR' | 'BUFFER_OVERFLOW_ERROR' | 'UNKNOWN_ERROR';
|
|
90
428
|
/**
|
|
91
|
-
* Parquet
|
|
429
|
+
* Field definition for Parquet schema.
|
|
430
|
+
*
|
|
431
|
+
* @description
|
|
432
|
+
* Defines a single column in the Parquet output schema.
|
|
92
433
|
*/
|
|
93
434
|
export interface ParquetField {
|
|
435
|
+
/**
|
|
436
|
+
* Column name.
|
|
437
|
+
*/
|
|
94
438
|
name: string;
|
|
439
|
+
/**
|
|
440
|
+
* Column data type (STRING, INT64, TIMESTAMP, etc.).
|
|
441
|
+
*/
|
|
95
442
|
type: string;
|
|
443
|
+
/**
|
|
444
|
+
* Whether the column can contain null values.
|
|
445
|
+
*/
|
|
96
446
|
nullable: boolean;
|
|
97
447
|
}
|
|
98
448
|
/**
|
|
99
|
-
* Parquet
|
|
449
|
+
* Row representation for Parquet output.
|
|
450
|
+
*
|
|
451
|
+
* @description
|
|
452
|
+
* Represents a single CDC event as a Parquet row with
|
|
453
|
+
* flattened fields for efficient columnar storage.
|
|
100
454
|
*/
|
|
101
455
|
export interface ParquetRow {
|
|
456
|
+
/**
|
|
457
|
+
* Event unique identifier.
|
|
458
|
+
*/
|
|
102
459
|
event_id: string;
|
|
460
|
+
/**
|
|
461
|
+
* Event type (e.g., 'COMMIT_CREATED').
|
|
462
|
+
*/
|
|
103
463
|
event_type: string;
|
|
464
|
+
/**
|
|
465
|
+
* Event source (e.g., 'push').
|
|
466
|
+
*/
|
|
104
467
|
source: string;
|
|
468
|
+
/**
|
|
469
|
+
* Event timestamp in milliseconds.
|
|
470
|
+
*/
|
|
105
471
|
timestamp: number;
|
|
472
|
+
/**
|
|
473
|
+
* Event sequence number.
|
|
474
|
+
*/
|
|
106
475
|
sequence: number;
|
|
476
|
+
/**
|
|
477
|
+
* Event schema version.
|
|
478
|
+
*/
|
|
107
479
|
version: number;
|
|
480
|
+
/**
|
|
481
|
+
* JSON-serialized event payload.
|
|
482
|
+
*/
|
|
108
483
|
payload_json: string;
|
|
484
|
+
/**
|
|
485
|
+
* SHA from the payload, extracted for efficient filtering.
|
|
486
|
+
*/
|
|
109
487
|
sha: string | null;
|
|
110
488
|
}
|
|
111
489
|
/**
|
|
112
|
-
* Parquet
|
|
490
|
+
* Batch of Parquet rows ready for writing.
|
|
491
|
+
*
|
|
492
|
+
* @description
|
|
493
|
+
* Contains transformed rows and metadata needed to write
|
|
494
|
+
* a Parquet file.
|
|
113
495
|
*/
|
|
114
496
|
export interface ParquetBatch {
|
|
497
|
+
/**
|
|
498
|
+
* Array of Parquet rows.
|
|
499
|
+
*/
|
|
115
500
|
rows: ParquetRow[];
|
|
501
|
+
/**
|
|
502
|
+
* Number of rows in the batch.
|
|
503
|
+
*/
|
|
116
504
|
rowCount: number;
|
|
505
|
+
/**
|
|
506
|
+
* Batch creation timestamp.
|
|
507
|
+
*/
|
|
117
508
|
createdAt: number;
|
|
509
|
+
/**
|
|
510
|
+
* Parquet schema definition.
|
|
511
|
+
*/
|
|
118
512
|
schema: {
|
|
119
513
|
fields: ParquetField[];
|
|
120
514
|
};
|
|
515
|
+
/**
|
|
516
|
+
* Compression algorithm used.
|
|
517
|
+
*/
|
|
121
518
|
compression: string;
|
|
122
519
|
}
|
|
123
520
|
/**
|
|
124
|
-
*
|
|
521
|
+
* Output from the CDC pipeline.
|
|
522
|
+
*
|
|
523
|
+
* @description
|
|
524
|
+
* Contains the Parquet-formatted data and metadata for a
|
|
525
|
+
* processed batch of events.
|
|
526
|
+
*
|
|
527
|
+
* @example
|
|
528
|
+
* ```typescript
|
|
529
|
+
* pipeline.onOutput((output: PipelineOutput) => {
|
|
530
|
+
* console.log(`Batch ID: ${output.batchId}`)
|
|
531
|
+
* console.log(`Events: ${output.events.length}`)
|
|
532
|
+
* console.log(`Size: ${output.parquetBuffer.length} bytes`)
|
|
533
|
+
*
|
|
534
|
+
* // Write to storage
|
|
535
|
+
* await r2.put(`cdc/${output.batchId}.parquet`, output.parquetBuffer)
|
|
536
|
+
* })
|
|
537
|
+
* ```
|
|
125
538
|
*/
|
|
126
539
|
export interface PipelineOutput {
|
|
540
|
+
/**
|
|
541
|
+
* Parquet-formatted data as a byte array.
|
|
542
|
+
*/
|
|
127
543
|
parquetBuffer: Uint8Array;
|
|
544
|
+
/**
|
|
545
|
+
* Original events included in this batch.
|
|
546
|
+
*/
|
|
128
547
|
events: CDCEvent[];
|
|
548
|
+
/**
|
|
549
|
+
* Unique identifier for this batch.
|
|
550
|
+
* Format: `batch-{timestamp}-{random}`
|
|
551
|
+
*/
|
|
129
552
|
batchId: string;
|
|
130
553
|
}
|
|
131
554
|
/**
|
|
132
|
-
*
|
|
555
|
+
* Metrics for monitoring pipeline performance.
|
|
556
|
+
*
|
|
557
|
+
* @description
|
|
558
|
+
* Provides operational metrics for monitoring and alerting
|
|
559
|
+
* on pipeline health and performance.
|
|
560
|
+
*
|
|
561
|
+
* @example
|
|
562
|
+
* ```typescript
|
|
563
|
+
* const metrics = pipeline.getMetrics()
|
|
564
|
+
* console.log(`Events processed: ${metrics.eventsProcessed}`)
|
|
565
|
+
* console.log(`Batches generated: ${metrics.batchesGenerated}`)
|
|
566
|
+
* console.log(`Bytes written: ${metrics.bytesWritten}`)
|
|
567
|
+
* console.log(`Errors: ${metrics.errors}`)
|
|
568
|
+
* console.log(`Avg latency: ${metrics.avgProcessingLatencyMs}ms`)
|
|
569
|
+
* ```
|
|
133
570
|
*/
|
|
134
571
|
export interface PipelineMetrics {
|
|
572
|
+
/**
|
|
573
|
+
* Total number of events processed.
|
|
574
|
+
*/
|
|
135
575
|
eventsProcessed: number;
|
|
576
|
+
/**
|
|
577
|
+
* Total number of batches generated.
|
|
578
|
+
*/
|
|
136
579
|
batchesGenerated: number;
|
|
580
|
+
/**
|
|
581
|
+
* Total bytes written to output.
|
|
582
|
+
*/
|
|
137
583
|
bytesWritten: number;
|
|
584
|
+
/**
|
|
585
|
+
* Total number of errors encountered.
|
|
586
|
+
*/
|
|
138
587
|
errors: number;
|
|
588
|
+
/**
|
|
589
|
+
* Average event processing latency in milliseconds.
|
|
590
|
+
* Calculated from the last 1000 events.
|
|
591
|
+
*/
|
|
139
592
|
avgProcessingLatencyMs: number;
|
|
140
593
|
}
|
|
141
594
|
/**
|
|
142
|
-
*
|
|
595
|
+
* Result of processing a single event.
|
|
596
|
+
*
|
|
597
|
+
* @description
|
|
598
|
+
* Returned when an event is successfully queued for processing.
|
|
143
599
|
*/
|
|
144
600
|
export interface ProcessResult {
|
|
601
|
+
/**
|
|
602
|
+
* Whether the event was successfully queued.
|
|
603
|
+
*/
|
|
145
604
|
success: boolean;
|
|
605
|
+
/**
|
|
606
|
+
* ID of the processed event.
|
|
607
|
+
*/
|
|
146
608
|
eventId: string;
|
|
147
609
|
}
|
|
148
610
|
/**
|
|
149
|
-
*
|
|
611
|
+
* Result of stopping the pipeline.
|
|
612
|
+
*
|
|
613
|
+
* @description
|
|
614
|
+
* Contains information about any pending events that were
|
|
615
|
+
* flushed during shutdown.
|
|
150
616
|
*/
|
|
151
617
|
export interface StopResult {
|
|
618
|
+
/**
|
|
619
|
+
* Number of events flushed during stop.
|
|
620
|
+
*/
|
|
152
621
|
flushedCount: number;
|
|
153
622
|
}
|
|
154
623
|
/**
|
|
155
|
-
* Custom error class for CDC operations
|
|
624
|
+
* Custom error class for CDC operations.
|
|
625
|
+
*
|
|
626
|
+
* @description
|
|
627
|
+
* CDCError provides structured error information for CDC pipeline failures,
|
|
628
|
+
* including an error type for programmatic handling and optional cause for
|
|
629
|
+
* error chaining.
|
|
630
|
+
*
|
|
631
|
+
* @example
|
|
632
|
+
* ```typescript
|
|
633
|
+
* try {
|
|
634
|
+
* await pipeline.process(event)
|
|
635
|
+
* } catch (error) {
|
|
636
|
+
* if (error instanceof CDCError) {
|
|
637
|
+
* switch (error.type) {
|
|
638
|
+
* case 'VALIDATION_ERROR':
|
|
639
|
+
* console.log('Invalid event:', error.message)
|
|
640
|
+
* break
|
|
641
|
+
* case 'PROCESSING_ERROR':
|
|
642
|
+
* console.log('Processing failed:', error.message)
|
|
643
|
+
* if (error.cause) {
|
|
644
|
+
* console.log('Caused by:', error.cause.message)
|
|
645
|
+
* }
|
|
646
|
+
* break
|
|
647
|
+
* }
|
|
648
|
+
* }
|
|
649
|
+
* }
|
|
650
|
+
* ```
|
|
651
|
+
*
|
|
652
|
+
* @class CDCError
|
|
653
|
+
* @extends Error
|
|
156
654
|
*/
|
|
157
655
|
export declare class CDCError extends Error {
|
|
158
656
|
readonly type: CDCErrorType;
|
|
159
657
|
readonly cause?: Error | undefined;
|
|
658
|
+
/**
|
|
659
|
+
* Creates a new CDCError.
|
|
660
|
+
*
|
|
661
|
+
* @param type - Error type for categorization
|
|
662
|
+
* @param message - Human-readable error message
|
|
663
|
+
* @param cause - Optional underlying error that caused this error
|
|
664
|
+
*/
|
|
160
665
|
constructor(type: CDCErrorType, message: string, cause?: Error | undefined);
|
|
161
666
|
}
|
|
667
|
+
/**
|
|
668
|
+
* Configuration for the retry policy.
|
|
669
|
+
*
|
|
670
|
+
* @description
|
|
671
|
+
* Configures exponential backoff behavior for failed operations.
|
|
672
|
+
*
|
|
673
|
+
* @example
|
|
674
|
+
* ```typescript
|
|
675
|
+
* const config: RetryPolicyConfig = {
|
|
676
|
+
* maxRetries: 3,
|
|
677
|
+
* initialDelayMs: 100,
|
|
678
|
+
* maxDelayMs: 5000,
|
|
679
|
+
* backoffMultiplier: 2,
|
|
680
|
+
* jitter: true // Add randomness to prevent thundering herd
|
|
681
|
+
* }
|
|
682
|
+
* ```
|
|
683
|
+
*/
|
|
162
684
|
export interface RetryPolicyConfig {
|
|
685
|
+
/**
|
|
686
|
+
* Maximum number of retry attempts before giving up.
|
|
687
|
+
*/
|
|
163
688
|
maxRetries: number;
|
|
689
|
+
/**
|
|
690
|
+
* Initial delay in milliseconds before first retry.
|
|
691
|
+
*/
|
|
164
692
|
initialDelayMs: number;
|
|
693
|
+
/**
|
|
694
|
+
* Maximum delay in milliseconds between retries.
|
|
695
|
+
* Caps exponential growth.
|
|
696
|
+
*/
|
|
165
697
|
maxDelayMs: number;
|
|
698
|
+
/**
|
|
699
|
+
* Multiplier applied to delay after each attempt.
|
|
700
|
+
* A value of 2 doubles the delay each time.
|
|
701
|
+
*/
|
|
166
702
|
backoffMultiplier: number;
|
|
703
|
+
/**
|
|
704
|
+
* Whether to add random jitter to delays.
|
|
705
|
+
* Helps prevent thundering herd problems.
|
|
706
|
+
*/
|
|
167
707
|
jitter?: boolean;
|
|
168
708
|
}
|
|
169
709
|
/**
|
|
170
|
-
* Retry policy
|
|
710
|
+
* Retry policy implementing exponential backoff with optional jitter.
|
|
711
|
+
*
|
|
712
|
+
* @description
|
|
713
|
+
* Provides a robust retry mechanism for handling transient failures.
|
|
714
|
+
* Uses exponential backoff to space out retry attempts, with optional
|
|
715
|
+
* jitter to prevent synchronized retries from multiple clients.
|
|
716
|
+
*
|
|
717
|
+
* **Backoff Formula:**
|
|
718
|
+
* `delay = min(initialDelay * (multiplier ^ attempt), maxDelay)`
|
|
719
|
+
*
|
|
720
|
+
* **With Jitter:**
|
|
721
|
+
* `delay = delay * random(0.5, 1.5)`
|
|
722
|
+
*
|
|
723
|
+
* @example
|
|
724
|
+
* ```typescript
|
|
725
|
+
* const policy = new CDCRetryPolicy({
|
|
726
|
+
* maxRetries: 3,
|
|
727
|
+
* initialDelayMs: 100,
|
|
728
|
+
* maxDelayMs: 5000,
|
|
729
|
+
* backoffMultiplier: 2,
|
|
730
|
+
* jitter: true
|
|
731
|
+
* })
|
|
732
|
+
*
|
|
733
|
+
* let attempts = 0
|
|
734
|
+
* while (attempts < 10) {
|
|
735
|
+
* try {
|
|
736
|
+
* await doOperation()
|
|
737
|
+
* break
|
|
738
|
+
* } catch (error) {
|
|
739
|
+
* attempts++
|
|
740
|
+
* if (!policy.shouldRetry(attempts)) {
|
|
741
|
+
* throw new Error('Max retries exceeded')
|
|
742
|
+
* }
|
|
743
|
+
* const delay = policy.getDelay(attempts)
|
|
744
|
+
* console.log(`Retry ${attempts} after ${delay}ms`)
|
|
745
|
+
* await sleep(delay)
|
|
746
|
+
* }
|
|
747
|
+
* }
|
|
748
|
+
* ```
|
|
749
|
+
*
|
|
750
|
+
* @class CDCRetryPolicy
|
|
171
751
|
*/
|
|
172
752
|
export declare class CDCRetryPolicy {
|
|
753
|
+
/**
|
|
754
|
+
* Retry configuration.
|
|
755
|
+
* @private
|
|
756
|
+
*/
|
|
173
757
|
private readonly config;
|
|
758
|
+
/**
|
|
759
|
+
* Creates a new retry policy.
|
|
760
|
+
*
|
|
761
|
+
* @param config - Retry policy configuration
|
|
762
|
+
*/
|
|
174
763
|
constructor(config: RetryPolicyConfig);
|
|
764
|
+
/**
|
|
765
|
+
* Determines whether another retry should be attempted.
|
|
766
|
+
*
|
|
767
|
+
* @param attemptCount - Number of attempts already made
|
|
768
|
+
* @returns true if more retries are allowed, false otherwise
|
|
769
|
+
*
|
|
770
|
+
* @example
|
|
771
|
+
* ```typescript
|
|
772
|
+
* if (policy.shouldRetry(3)) {
|
|
773
|
+
* // Retry is allowed
|
|
774
|
+
* }
|
|
775
|
+
* ```
|
|
776
|
+
*/
|
|
175
777
|
shouldRetry(attemptCount: number): boolean;
|
|
778
|
+
/**
|
|
779
|
+
* Calculates the delay before the next retry.
|
|
780
|
+
*
|
|
781
|
+
* @description
|
|
782
|
+
* Computes delay using exponential backoff, capped at maxDelayMs.
|
|
783
|
+
* If jitter is enabled, applies a random factor between 0.5x and 1.5x.
|
|
784
|
+
*
|
|
785
|
+
* @param attemptCount - Number of attempts already made (1-indexed)
|
|
786
|
+
* @returns Delay in milliseconds before next retry
|
|
787
|
+
*
|
|
788
|
+
* @example
|
|
789
|
+
* ```typescript
|
|
790
|
+
* // With initialDelay=100, multiplier=2:
|
|
791
|
+
* // Attempt 1: 100ms * 2^0 = 100ms
|
|
792
|
+
* // Attempt 2: 100ms * 2^1 = 200ms
|
|
793
|
+
* // Attempt 3: 100ms * 2^2 = 400ms
|
|
794
|
+
* const delay = policy.getDelay(attemptCount)
|
|
795
|
+
* await sleep(delay)
|
|
796
|
+
* ```
|
|
797
|
+
*/
|
|
176
798
|
getDelay(attemptCount: number): number;
|
|
177
799
|
}
|
|
800
|
+
/**
|
|
801
|
+
* Configuration options for CDC event capture.
|
|
802
|
+
*
|
|
803
|
+
* @example
|
|
804
|
+
* ```typescript
|
|
805
|
+
* const options: CDCEventCaptureOptions = {
|
|
806
|
+
* maxBufferSize: 1000 // Auto-flush when buffer reaches 1000 events
|
|
807
|
+
* }
|
|
808
|
+
* ```
|
|
809
|
+
*/
|
|
178
810
|
export interface CDCEventCaptureOptions {
|
|
811
|
+
/**
|
|
812
|
+
* Maximum number of events to buffer before auto-flushing.
|
|
813
|
+
* Defaults to Infinity (no auto-flush).
|
|
814
|
+
*/
|
|
179
815
|
maxBufferSize?: number;
|
|
180
816
|
}
|
|
817
|
+
/**
|
|
818
|
+
* Callback function for git operation events.
|
|
819
|
+
*
|
|
820
|
+
* @param event - The captured CDC event
|
|
821
|
+
*/
|
|
181
822
|
export type GitOperationListener = (event: CDCEvent) => void;
|
|
182
823
|
/**
|
|
183
|
-
* Captures git operations and converts them to CDC events
|
|
824
|
+
* Captures git operations and converts them to CDC events.
|
|
825
|
+
*
|
|
826
|
+
* @description
|
|
827
|
+
* CDCEventCapture hooks into git operations and generates CDCEvents for each
|
|
828
|
+
* operation. It maintains an internal buffer of events that can be flushed
|
|
829
|
+
* manually or automatically when the buffer reaches a configured size.
|
|
830
|
+
*
|
|
831
|
+
* **Supported Operations:**
|
|
832
|
+
* - Object creation/deletion (blobs, trees, commits, tags)
|
|
833
|
+
* - Reference updates (branches, tags)
|
|
834
|
+
* - Commit creation
|
|
835
|
+
* - Pack reception
|
|
836
|
+
* - Branch creation/deletion
|
|
837
|
+
* - Tag creation
|
|
838
|
+
* - Merge completion
|
|
839
|
+
*
|
|
840
|
+
* **Event Ordering:**
|
|
841
|
+
* Events are assigned monotonically increasing sequence numbers within a
|
|
842
|
+
* capture session. This ensures proper ordering for replay and analytics.
|
|
843
|
+
*
|
|
844
|
+
* @example
|
|
845
|
+
* ```typescript
|
|
846
|
+
* const capture = new CDCEventCapture({ maxBufferSize: 100 })
|
|
847
|
+
*
|
|
848
|
+
* // Add a listener for real-time processing
|
|
849
|
+
* capture.addListener((event) => {
|
|
850
|
+
* console.log(`Event: ${event.type} - ${event.id}`)
|
|
851
|
+
* })
|
|
852
|
+
*
|
|
853
|
+
* // Capture git operations
|
|
854
|
+
* await capture.onCommitCreated('abc123...', 'tree456...', ['parent789...'])
|
|
855
|
+
* await capture.onRefUpdate('refs/heads/main', 'old...', 'new...')
|
|
856
|
+
*
|
|
857
|
+
* // Get buffered events
|
|
858
|
+
* console.log(`Buffer size: ${capture.getBufferSize()}`)
|
|
859
|
+
*
|
|
860
|
+
* // Flush buffer
|
|
861
|
+
* const events = await capture.flush()
|
|
862
|
+
* console.log(`Flushed ${events.length} events`)
|
|
863
|
+
* ```
|
|
864
|
+
*
|
|
865
|
+
* @class CDCEventCapture
|
|
184
866
|
*/
|
|
185
867
|
export declare class CDCEventCapture {
|
|
868
|
+
/**
|
|
869
|
+
* Buffer of captured events.
|
|
870
|
+
* @private
|
|
871
|
+
*/
|
|
186
872
|
private events;
|
|
873
|
+
/**
|
|
874
|
+
* Monotonically increasing sequence counter.
|
|
875
|
+
* @private
|
|
876
|
+
*/
|
|
187
877
|
private sequenceCounter;
|
|
878
|
+
/**
|
|
879
|
+
* Registered event listeners.
|
|
880
|
+
* @private
|
|
881
|
+
*/
|
|
188
882
|
private listeners;
|
|
883
|
+
/**
|
|
884
|
+
* Maximum buffer size before auto-flush.
|
|
885
|
+
* @private
|
|
886
|
+
*/
|
|
189
887
|
private readonly maxBufferSize;
|
|
888
|
+
/**
|
|
889
|
+
* Creates a new CDC event capture instance.
|
|
890
|
+
*
|
|
891
|
+
* @param options - Configuration options
|
|
892
|
+
*/
|
|
190
893
|
constructor(options?: CDCEventCaptureOptions);
|
|
894
|
+
/**
|
|
895
|
+
* Generates a unique event ID.
|
|
896
|
+
* @private
|
|
897
|
+
*/
|
|
191
898
|
private generateEventId;
|
|
899
|
+
/**
|
|
900
|
+
* Emits an event to the buffer and notifies listeners.
|
|
901
|
+
* @private
|
|
902
|
+
*/
|
|
192
903
|
private emitEvent;
|
|
904
|
+
/**
|
|
905
|
+
* Returns the next sequence number.
|
|
906
|
+
* @private
|
|
907
|
+
*/
|
|
193
908
|
private nextSequence;
|
|
909
|
+
/**
|
|
910
|
+
* Captures an object put (creation) operation.
|
|
911
|
+
*
|
|
912
|
+
* @description
|
|
913
|
+
* Called when a git object (blob, tree, commit, tag) is written to storage.
|
|
914
|
+
*
|
|
915
|
+
* @param sha - SHA-1 hash of the object
|
|
916
|
+
* @param type - Object type (blob, tree, commit, tag)
|
|
917
|
+
* @param data - Raw object data
|
|
918
|
+
*
|
|
919
|
+
* @example
|
|
920
|
+
* ```typescript
|
|
921
|
+
* await capture.onObjectPut('abc123...', 'blob', blobData)
|
|
922
|
+
* ```
|
|
923
|
+
*/
|
|
194
924
|
onObjectPut(sha: string, type: string, data: Uint8Array): Promise<void>;
|
|
925
|
+
/**
|
|
926
|
+
* Captures an object deletion operation.
|
|
927
|
+
*
|
|
928
|
+
* @description
|
|
929
|
+
* Called when a git object is deleted, typically during garbage collection.
|
|
930
|
+
*
|
|
931
|
+
* @param sha - SHA-1 hash of the deleted object
|
|
932
|
+
*
|
|
933
|
+
* @example
|
|
934
|
+
* ```typescript
|
|
935
|
+
* await capture.onObjectDelete('abc123...')
|
|
936
|
+
* ```
|
|
937
|
+
*/
|
|
195
938
|
onObjectDelete(sha: string): Promise<void>;
|
|
939
|
+
/**
|
|
940
|
+
* Captures a reference update operation.
|
|
941
|
+
*
|
|
942
|
+
* @description
|
|
943
|
+
* Called when a git reference (branch, tag) is updated to point to a new commit.
|
|
944
|
+
*
|
|
945
|
+
* @param refName - Full reference name (e.g., 'refs/heads/main')
|
|
946
|
+
* @param oldSha - Previous SHA (all zeros for new refs)
|
|
947
|
+
* @param newSha - New SHA (all zeros for deleted refs)
|
|
948
|
+
*
|
|
949
|
+
* @example
|
|
950
|
+
* ```typescript
|
|
951
|
+
* await capture.onRefUpdate(
|
|
952
|
+
* 'refs/heads/main',
|
|
953
|
+
* 'oldcommit123...',
|
|
954
|
+
* 'newcommit456...'
|
|
955
|
+
* )
|
|
956
|
+
* ```
|
|
957
|
+
*/
|
|
196
958
|
onRefUpdate(refName: string, oldSha: string, newSha: string): Promise<void>;
|
|
959
|
+
/**
|
|
960
|
+
* Captures a commit creation operation.
|
|
961
|
+
*
|
|
962
|
+
* @description
|
|
963
|
+
* Called when a new commit object is created.
|
|
964
|
+
*
|
|
965
|
+
* @param commitSha - SHA-1 hash of the commit
|
|
966
|
+
* @param treeSha - SHA-1 hash of the tree the commit points to
|
|
967
|
+
* @param parentShas - Array of parent commit SHAs
|
|
968
|
+
*
|
|
969
|
+
* @example
|
|
970
|
+
* ```typescript
|
|
971
|
+
* await capture.onCommitCreated(
|
|
972
|
+
* 'commitabc123...',
|
|
973
|
+
* 'treedef456...',
|
|
974
|
+
* ['parent1...', 'parent2...']
|
|
975
|
+
* )
|
|
976
|
+
* ```
|
|
977
|
+
*/
|
|
197
978
|
onCommitCreated(commitSha: string, treeSha: string, parentShas: string[]): Promise<void>;
|
|
979
|
+
/**
|
|
980
|
+
* Captures a pack reception operation.
|
|
981
|
+
*
|
|
982
|
+
* @description
|
|
983
|
+
* Called when a packfile is received during a push or fetch operation.
|
|
984
|
+
*
|
|
985
|
+
* @param packData - Raw packfile data
|
|
986
|
+
* @param objectCount - Number of objects in the pack
|
|
987
|
+
*
|
|
988
|
+
* @example
|
|
989
|
+
* ```typescript
|
|
990
|
+
* await capture.onPackReceived(packBuffer, 42)
|
|
991
|
+
* ```
|
|
992
|
+
*/
|
|
198
993
|
onPackReceived(packData: Uint8Array, objectCount: number): Promise<void>;
|
|
994
|
+
/**
|
|
995
|
+
* Captures a branch creation operation.
|
|
996
|
+
*
|
|
997
|
+
* @param branchName - Name of the branch (without refs/heads/ prefix)
|
|
998
|
+
* @param sha - SHA-1 hash the branch points to
|
|
999
|
+
*
|
|
1000
|
+
* @example
|
|
1001
|
+
* ```typescript
|
|
1002
|
+
* await capture.onBranchCreated('feature-x', 'abc123...')
|
|
1003
|
+
* ```
|
|
1004
|
+
*/
|
|
199
1005
|
onBranchCreated(branchName: string, sha: string): Promise<void>;
|
|
1006
|
+
/**
|
|
1007
|
+
* Captures a branch deletion operation.
|
|
1008
|
+
*
|
|
1009
|
+
* @param branchName - Name of the deleted branch
|
|
1010
|
+
*
|
|
1011
|
+
* @example
|
|
1012
|
+
* ```typescript
|
|
1013
|
+
* await capture.onBranchDeleted('feature-x')
|
|
1014
|
+
* ```
|
|
1015
|
+
*/
|
|
200
1016
|
onBranchDeleted(branchName: string): Promise<void>;
|
|
1017
|
+
/**
|
|
1018
|
+
* Captures a tag creation operation.
|
|
1019
|
+
*
|
|
1020
|
+
* @param tagName - Name of the tag
|
|
1021
|
+
* @param sha - SHA-1 hash the tag points to
|
|
1022
|
+
*
|
|
1023
|
+
* @example
|
|
1024
|
+
* ```typescript
|
|
1025
|
+
* await capture.onTagCreated('v1.0.0', 'abc123...')
|
|
1026
|
+
* ```
|
|
1027
|
+
*/
|
|
201
1028
|
onTagCreated(tagName: string, sha: string): Promise<void>;
|
|
1029
|
+
/**
|
|
1030
|
+
* Captures a merge completion operation.
|
|
1031
|
+
*
|
|
1032
|
+
* @param mergeSha - SHA-1 hash of the merge commit
|
|
1033
|
+
* @param baseSha - SHA-1 hash of the base commit
|
|
1034
|
+
* @param headSha - SHA-1 hash of the head commit being merged
|
|
1035
|
+
*
|
|
1036
|
+
* @example
|
|
1037
|
+
* ```typescript
|
|
1038
|
+
* await capture.onMergeCompleted('merge123...', 'base456...', 'head789...')
|
|
1039
|
+
* ```
|
|
1040
|
+
*/
|
|
202
1041
|
onMergeCompleted(mergeSha: string, baseSha: string, headSha: string): Promise<void>;
|
|
1042
|
+
/**
|
|
1043
|
+
* Returns a copy of all buffered events.
|
|
1044
|
+
*
|
|
1045
|
+
* @returns Array of buffered events
|
|
1046
|
+
*/
|
|
203
1047
|
getEvents(): CDCEvent[];
|
|
1048
|
+
/**
|
|
1049
|
+
* Returns the current buffer size.
|
|
1050
|
+
*
|
|
1051
|
+
* @returns Number of events in the buffer
|
|
1052
|
+
*/
|
|
204
1053
|
getBufferSize(): number;
|
|
1054
|
+
/**
|
|
1055
|
+
* Flushes all buffered events.
|
|
1056
|
+
*
|
|
1057
|
+
* @description
|
|
1058
|
+
* Returns and clears all events from the buffer. The returned events
|
|
1059
|
+
* can be processed, serialized, or forwarded to downstream systems.
|
|
1060
|
+
*
|
|
1061
|
+
* @returns Array of flushed events
|
|
1062
|
+
*
|
|
1063
|
+
* @example
|
|
1064
|
+
* ```typescript
|
|
1065
|
+
* const events = await capture.flush()
|
|
1066
|
+
* console.log(`Flushed ${events.length} events`)
|
|
1067
|
+
* await sendToAnalytics(events)
|
|
1068
|
+
* ```
|
|
1069
|
+
*/
|
|
205
1070
|
flush(): Promise<CDCEvent[]>;
|
|
1071
|
+
/**
|
|
1072
|
+
* Adds an event listener.
|
|
1073
|
+
*
|
|
1074
|
+
* @description
|
|
1075
|
+
* Listeners are called synchronously for each event as it is captured.
|
|
1076
|
+
*
|
|
1077
|
+
* @param listener - Callback function to invoke for each event
|
|
1078
|
+
*
|
|
1079
|
+
* @example
|
|
1080
|
+
* ```typescript
|
|
1081
|
+
* capture.addListener((event) => {
|
|
1082
|
+
* console.log(`New event: ${event.type}`)
|
|
1083
|
+
* })
|
|
1084
|
+
* ```
|
|
1085
|
+
*/
|
|
206
1086
|
addListener(listener: GitOperationListener): void;
|
|
1087
|
+
/**
|
|
1088
|
+
* Removes an event listener.
|
|
1089
|
+
*
|
|
1090
|
+
* @param listener - The listener to remove
|
|
1091
|
+
*/
|
|
207
1092
|
removeListener(listener: GitOperationListener): void;
|
|
208
1093
|
}
|
|
209
1094
|
/**
|
|
210
|
-
* Parquet schema definition for CDC events
|
|
1095
|
+
* Parquet schema definition for CDC events.
|
|
1096
|
+
*
|
|
1097
|
+
* @description
|
|
1098
|
+
* Defines the column structure for CDC event Parquet files. The default
|
|
1099
|
+
* schema includes standard CDC event fields and can be extended with
|
|
1100
|
+
* custom fields for domain-specific data.
|
|
1101
|
+
*
|
|
1102
|
+
* @example
|
|
1103
|
+
* ```typescript
|
|
1104
|
+
* // Create default schema
|
|
1105
|
+
* const schema = ParquetSchema.forCDCEvents()
|
|
1106
|
+
*
|
|
1107
|
+
* // Create schema with custom fields
|
|
1108
|
+
* const customSchema = ParquetSchema.forCDCEvents([
|
|
1109
|
+
* { name: 'repository_id', type: 'STRING', nullable: false },
|
|
1110
|
+
* { name: 'user_id', type: 'STRING', nullable: true }
|
|
1111
|
+
* ])
|
|
1112
|
+
* ```
|
|
1113
|
+
*
|
|
1114
|
+
* @class ParquetSchema
|
|
211
1115
|
*/
|
|
212
1116
|
export declare class ParquetSchema {
|
|
213
1117
|
readonly fields: ParquetField[];
|
|
1118
|
+
/**
|
|
1119
|
+
* Creates a new ParquetSchema.
|
|
1120
|
+
*
|
|
1121
|
+
* @param fields - Array of field definitions
|
|
1122
|
+
*/
|
|
214
1123
|
constructor(fields: ParquetField[]);
|
|
1124
|
+
/**
|
|
1125
|
+
* Creates a schema for CDC events with optional custom fields.
|
|
1126
|
+
*
|
|
1127
|
+
* @description
|
|
1128
|
+
* Returns a schema with the standard CDC event fields. Additional
|
|
1129
|
+
* custom fields can be appended for domain-specific data.
|
|
1130
|
+
*
|
|
1131
|
+
* @param customFields - Optional additional fields to add
|
|
1132
|
+
* @returns A new ParquetSchema instance
|
|
1133
|
+
*
|
|
1134
|
+
* @example
|
|
1135
|
+
* ```typescript
|
|
1136
|
+
* const schema = ParquetSchema.forCDCEvents()
|
|
1137
|
+
* // Schema includes: event_id, event_type, source, timestamp,
|
|
1138
|
+
* // sequence, version, payload_json, sha
|
|
1139
|
+
* ```
|
|
1140
|
+
*/
|
|
215
1141
|
static forCDCEvents(customFields?: ParquetField[]): ParquetSchema;
|
|
216
1142
|
}
|
|
1143
|
+
/**
|
|
1144
|
+
* Configuration options for the Parquet transformer.
|
|
1145
|
+
*/
|
|
217
1146
|
export interface ParquetTransformerOptions {
|
|
1147
|
+
/**
|
|
1148
|
+
* Compression algorithm to use.
|
|
1149
|
+
* @default 'snappy'
|
|
1150
|
+
*/
|
|
218
1151
|
compression?: 'snappy' | 'gzip' | 'none';
|
|
219
1152
|
}
|
|
220
1153
|
/**
|
|
221
|
-
* Transforms CDC events to Parquet format
|
|
1154
|
+
* Transforms CDC events to Parquet format.
|
|
1155
|
+
*
|
|
1156
|
+
* @description
|
|
1157
|
+
* ParquetTransformer converts CDC events to Parquet-compatible rows and
|
|
1158
|
+
* serializes batches of events to Parquet file format. It handles:
|
|
1159
|
+
*
|
|
1160
|
+
* - Event to row conversion (flattening the event structure)
|
|
1161
|
+
* - JSON serialization of complex payloads
|
|
1162
|
+
* - Batch creation with schema and metadata
|
|
1163
|
+
* - Parquet file generation with compression
|
|
1164
|
+
*
|
|
1165
|
+
* @example
|
|
1166
|
+
* ```typescript
|
|
1167
|
+
* const transformer = new ParquetTransformer({ compression: 'snappy' })
|
|
1168
|
+
*
|
|
1169
|
+
* // Transform single event to row
|
|
1170
|
+
* const row = transformer.eventToRow(event)
|
|
1171
|
+
*
|
|
1172
|
+
* // Transform batch of events
|
|
1173
|
+
* const batch = transformer.eventsToBatch(events)
|
|
1174
|
+
*
|
|
1175
|
+
* // Generate Parquet file
|
|
1176
|
+
* const buffer = await transformer.toParquetBuffer(batch)
|
|
1177
|
+
* await r2.put('events.parquet', buffer)
|
|
1178
|
+
* ```
|
|
1179
|
+
*
|
|
1180
|
+
* @class ParquetTransformer
|
|
222
1181
|
*/
|
|
223
1182
|
export declare class ParquetTransformer {
|
|
1183
|
+
/**
|
|
1184
|
+
* Compression algorithm to use.
|
|
1185
|
+
* @private
|
|
1186
|
+
*/
|
|
224
1187
|
private readonly compression;
|
|
1188
|
+
/**
|
|
1189
|
+
* Creates a new ParquetTransformer.
|
|
1190
|
+
*
|
|
1191
|
+
* @param options - Transformer configuration
|
|
1192
|
+
*/
|
|
225
1193
|
constructor(options?: ParquetTransformerOptions);
|
|
1194
|
+
/**
|
|
1195
|
+
* Converts a CDC event to a Parquet row.
|
|
1196
|
+
*
|
|
1197
|
+
* @description
|
|
1198
|
+
* Flattens the event structure and serializes the payload to JSON
|
|
1199
|
+
* for storage in Parquet format.
|
|
1200
|
+
*
|
|
1201
|
+
* @param event - The CDC event to convert
|
|
1202
|
+
* @returns A Parquet row representation
|
|
1203
|
+
*
|
|
1204
|
+
* @example
|
|
1205
|
+
* ```typescript
|
|
1206
|
+
* const row = transformer.eventToRow(event)
|
|
1207
|
+
* console.log(row.event_id, row.event_type, row.sha)
|
|
1208
|
+
* ```
|
|
1209
|
+
*/
|
|
226
1210
|
eventToRow(event: CDCEvent): ParquetRow;
|
|
1211
|
+
/**
|
|
1212
|
+
* Converts multiple CDC events to a Parquet batch.
|
|
1213
|
+
*
|
|
1214
|
+
* @description
|
|
1215
|
+
* Transforms an array of events into a ParquetBatch structure
|
|
1216
|
+
* ready for serialization to Parquet format.
|
|
1217
|
+
*
|
|
1218
|
+
* @param events - Array of CDC events to batch
|
|
1219
|
+
* @returns A ParquetBatch ready for serialization
|
|
1220
|
+
*
|
|
1221
|
+
* @example
|
|
1222
|
+
* ```typescript
|
|
1223
|
+
* const batch = transformer.eventsToBatch(events)
|
|
1224
|
+
* console.log(`Batch has ${batch.rowCount} rows`)
|
|
1225
|
+
* ```
|
|
1226
|
+
*/
|
|
227
1227
|
eventsToBatch(events: CDCEvent[]): ParquetBatch;
|
|
1228
|
+
/**
|
|
1229
|
+
* Serializes a ParquetBatch to a Parquet file buffer.
|
|
1230
|
+
*
|
|
1231
|
+
* @description
|
|
1232
|
+
* Generates a Parquet-format file from the batch data. The output
|
|
1233
|
+
* includes PAR1 magic bytes, compressed data, and footer metadata.
|
|
1234
|
+
*
|
|
1235
|
+
* @param batch - The ParquetBatch to serialize
|
|
1236
|
+
* @returns Promise resolving to Parquet file as Uint8Array
|
|
1237
|
+
*
|
|
1238
|
+
* @example
|
|
1239
|
+
* ```typescript
|
|
1240
|
+
* const buffer = await transformer.toParquetBuffer(batch)
|
|
1241
|
+
* await r2.put('events.parquet', buffer)
|
|
1242
|
+
* ```
|
|
1243
|
+
*/
|
|
228
1244
|
toParquetBuffer(batch: ParquetBatch): Promise<Uint8Array>;
|
|
229
1245
|
private gzipCompress;
|
|
230
1246
|
private simpleCompress;
|
|
231
1247
|
}
|
|
1248
|
+
/**
|
|
1249
|
+
* Callback function for batch processing.
|
|
1250
|
+
*
|
|
1251
|
+
* @param batch - The batch result containing events and metadata
|
|
1252
|
+
* @returns void or a Promise that resolves when processing is complete
|
|
1253
|
+
*/
|
|
232
1254
|
type BatchHandler = (batch: BatchResult) => void | Promise<void>;
|
|
233
1255
|
/**
|
|
234
|
-
* Batches CDC events for efficient processing
|
|
1256
|
+
* Batches CDC events for efficient processing.
|
|
1257
|
+
*
|
|
1258
|
+
* @description
|
|
1259
|
+
* CDCBatcher collects CDC events and groups them into batches based on
|
|
1260
|
+
* count or time thresholds. This enables efficient downstream processing
|
|
1261
|
+
* by reducing the number of I/O operations and enabling bulk operations.
|
|
1262
|
+
*
|
|
1263
|
+
* **Batching Strategies:**
|
|
1264
|
+
* - **Count-based**: Flush when batch reaches `batchSize` events
|
|
1265
|
+
* - **Time-based**: Flush after `flushIntervalMs` even if batch is not full
|
|
1266
|
+
*
|
|
1267
|
+
* **Features:**
|
|
1268
|
+
* - Async batch handlers for non-blocking processing
|
|
1269
|
+
* - Multiple handlers for parallel processing pipelines
|
|
1270
|
+
* - Graceful stop with pending event flush
|
|
1271
|
+
* - Batch metadata (sequences, timestamps) for tracking
|
|
1272
|
+
*
|
|
1273
|
+
* @example
|
|
1274
|
+
* ```typescript
|
|
1275
|
+
* const batcher = new CDCBatcher({
|
|
1276
|
+
* batchSize: 100,
|
|
1277
|
+
* flushIntervalMs: 5000
|
|
1278
|
+
* })
|
|
1279
|
+
*
|
|
1280
|
+
* // Register batch handler
|
|
1281
|
+
* batcher.onBatch(async (batch) => {
|
|
1282
|
+
* console.log(`Processing ${batch.eventCount} events`)
|
|
1283
|
+
* console.log(`Sequence range: ${batch.minSequence} - ${batch.maxSequence}`)
|
|
1284
|
+
* await saveToStorage(batch.events)
|
|
1285
|
+
* })
|
|
1286
|
+
*
|
|
1287
|
+
* // Add events
|
|
1288
|
+
* await batcher.add(event1)
|
|
1289
|
+
* await batcher.add(event2)
|
|
1290
|
+
*
|
|
1291
|
+
* // Check pending events
|
|
1292
|
+
* console.log(`Pending: ${batcher.getPendingCount()}`)
|
|
1293
|
+
*
|
|
1294
|
+
* // Manual flush
|
|
1295
|
+
* const result = await batcher.flush()
|
|
1296
|
+
*
|
|
1297
|
+
* // Stop the batcher
|
|
1298
|
+
* await batcher.stop()
|
|
1299
|
+
* ```
|
|
1300
|
+
*
|
|
1301
|
+
* @class CDCBatcher
|
|
235
1302
|
*/
|
|
236
1303
|
export declare class CDCBatcher {
|
|
1304
|
+
/**
|
|
1305
|
+
* Batch configuration.
|
|
1306
|
+
* @private
|
|
1307
|
+
*/
|
|
237
1308
|
private readonly config;
|
|
1309
|
+
/**
|
|
1310
|
+
* Buffer of pending events.
|
|
1311
|
+
* @private
|
|
1312
|
+
*/
|
|
238
1313
|
private events;
|
|
1314
|
+
/**
|
|
1315
|
+
* Registered batch handlers.
|
|
1316
|
+
* @private
|
|
1317
|
+
*/
|
|
239
1318
|
private batchHandlers;
|
|
1319
|
+
/**
|
|
1320
|
+
* Timer for time-based flushing.
|
|
1321
|
+
* @private
|
|
1322
|
+
*/
|
|
240
1323
|
private flushTimer;
|
|
1324
|
+
/**
|
|
1325
|
+
* Whether the batcher has been stopped.
|
|
1326
|
+
* @private
|
|
1327
|
+
*/
|
|
241
1328
|
private stopped;
|
|
1329
|
+
/**
|
|
1330
|
+
* Creates a new CDCBatcher.
|
|
1331
|
+
*
|
|
1332
|
+
* @param config - Batch configuration
|
|
1333
|
+
*/
|
|
242
1334
|
constructor(config: BatchConfig);
|
|
243
1335
|
private ensureTimerRunning;
|
|
244
1336
|
private clearFlushTimer;
|
|
1337
|
+
/**
|
|
1338
|
+
* Adds an event to the batch.
|
|
1339
|
+
*
|
|
1340
|
+
* @description
|
|
1341
|
+
* Adds the event to the pending batch. If the batch reaches the
|
|
1342
|
+
* configured size, it is automatically flushed. The flush timer
|
|
1343
|
+
* is started/restarted as needed.
|
|
1344
|
+
*
|
|
1345
|
+
* @param event - The CDC event to add
|
|
1346
|
+
*
|
|
1347
|
+
* @example
|
|
1348
|
+
* ```typescript
|
|
1349
|
+
* await batcher.add(event)
|
|
1350
|
+
* ```
|
|
1351
|
+
*/
|
|
245
1352
|
add(event: CDCEvent): Promise<void>;
|
|
1353
|
+
/**
|
|
1354
|
+
* Internal flush implementation.
|
|
1355
|
+
* @private
|
|
1356
|
+
*/
|
|
246
1357
|
private flushInternal;
|
|
1358
|
+
/**
|
|
1359
|
+
* Manually flushes pending events.
|
|
1360
|
+
*
|
|
1361
|
+
* @description
|
|
1362
|
+
* Forces an immediate flush of all pending events, regardless of
|
|
1363
|
+
* batch size or timer. Clears the flush timer.
|
|
1364
|
+
*
|
|
1365
|
+
* @returns Promise resolving to the batch result
|
|
1366
|
+
*
|
|
1367
|
+
* @example
|
|
1368
|
+
* ```typescript
|
|
1369
|
+
* const result = await batcher.flush()
|
|
1370
|
+
* console.log(`Flushed ${result.eventCount} events`)
|
|
1371
|
+
* ```
|
|
1372
|
+
*/
|
|
247
1373
|
flush(): Promise<BatchResult>;
|
|
1374
|
+
/**
|
|
1375
|
+
* Returns the number of pending events.
|
|
1376
|
+
*
|
|
1377
|
+
* @returns Number of events waiting to be flushed
|
|
1378
|
+
*/
|
|
248
1379
|
getPendingCount(): number;
|
|
1380
|
+
/**
|
|
1381
|
+
* Registers a batch handler.
|
|
1382
|
+
*
|
|
1383
|
+
* @description
|
|
1384
|
+
* Handlers are called when a batch is flushed (automatically or manually).
|
|
1385
|
+
* Multiple handlers can be registered for parallel processing.
|
|
1386
|
+
*
|
|
1387
|
+
* @param handler - Callback function to invoke for each batch
|
|
1388
|
+
*
|
|
1389
|
+
* @example
|
|
1390
|
+
* ```typescript
|
|
1391
|
+
* batcher.onBatch(async (batch) => {
|
|
1392
|
+
* await saveToStorage(batch.events)
|
|
1393
|
+
* })
|
|
1394
|
+
* ```
|
|
1395
|
+
*/
|
|
249
1396
|
onBatch(handler: BatchHandler): void;
|
|
1397
|
+
/**
|
|
1398
|
+
* Stops the batcher.
|
|
1399
|
+
*
|
|
1400
|
+
* @description
|
|
1401
|
+
* Stops the flush timer and prevents further processing.
|
|
1402
|
+
* Does NOT automatically flush pending events - call flush() first
|
|
1403
|
+
* if you need to process remaining events.
|
|
1404
|
+
*
|
|
1405
|
+
* @example
|
|
1406
|
+
* ```typescript
|
|
1407
|
+
* await batcher.flush() // Process remaining events
|
|
1408
|
+
* await batcher.stop() // Stop the timer
|
|
1409
|
+
* ```
|
|
1410
|
+
*/
|
|
250
1411
|
stop(): Promise<void>;
|
|
251
1412
|
}
|
|
1413
|
+
/**
|
|
1414
|
+
* Callback for successful batch output.
|
|
1415
|
+
*
|
|
1416
|
+
* @param output - The pipeline output containing Parquet data
|
|
1417
|
+
*/
|
|
252
1418
|
type OutputHandler = (output: PipelineOutput) => void;
|
|
1419
|
+
/**
|
|
1420
|
+
* Callback for failed events sent to dead letter queue.
|
|
1421
|
+
*
|
|
1422
|
+
* @param events - Array of failed events
|
|
1423
|
+
* @param error - The error that caused the failure
|
|
1424
|
+
*/
|
|
253
1425
|
type DeadLetterHandler = (events: CDCEvent[], error: Error) => void;
|
|
254
1426
|
/**
|
|
255
|
-
* Main CDC Pipeline for processing git operation events
|
|
1427
|
+
* Main CDC Pipeline for processing git operation events.
|
|
1428
|
+
*
|
|
1429
|
+
* @description
|
|
1430
|
+
* CDCPipeline orchestrates the complete change data capture flow from
|
|
1431
|
+
* event ingestion to Parquet output. It integrates batching, transformation,
|
|
1432
|
+
* retry handling, and dead letter queue management.
|
|
1433
|
+
*
|
|
1434
|
+
* **Pipeline Flow:**
|
|
1435
|
+
* 1. Events are submitted via `process()` or `processMany()`
|
|
1436
|
+
* 2. Events are validated and added to the batcher
|
|
1437
|
+
* 3. When a batch is ready, it's transformed to Parquet format
|
|
1438
|
+
* 4. On success, output handlers are notified
|
|
1439
|
+
* 5. On failure, retries are attempted with exponential backoff
|
|
1440
|
+
* 6. After max retries, events go to dead letter queue
|
|
1441
|
+
*
|
|
1442
|
+
* **Features:**
|
|
1443
|
+
* - Configurable batch size and flush interval
|
|
1444
|
+
* - Automatic retry with exponential backoff
|
|
1445
|
+
* - Dead letter queue for failed events
|
|
1446
|
+
* - Real-time metrics for monitoring
|
|
1447
|
+
* - Graceful shutdown with pending event flush
|
|
1448
|
+
*
|
|
1449
|
+
* @example
|
|
1450
|
+
* ```typescript
|
|
1451
|
+
* const pipeline = new CDCPipeline({
|
|
1452
|
+
* batchSize: 100,
|
|
1453
|
+
* flushIntervalMs: 5000,
|
|
1454
|
+
* maxRetries: 3,
|
|
1455
|
+
* parquetCompression: 'snappy',
|
|
1456
|
+
* outputPath: '/analytics',
|
|
1457
|
+
* schemaVersion: 1
|
|
1458
|
+
* })
|
|
1459
|
+
*
|
|
1460
|
+
* // Register handlers
|
|
1461
|
+
* pipeline.onOutput(async (output) => {
|
|
1462
|
+
* await r2.put(`cdc/${output.batchId}.parquet`, output.parquetBuffer)
|
|
1463
|
+
* })
|
|
1464
|
+
*
|
|
1465
|
+
* pipeline.onDeadLetter((events, error) => {
|
|
1466
|
+
* console.error(`Failed ${events.length} events:`, error)
|
|
1467
|
+
* })
|
|
1468
|
+
*
|
|
1469
|
+
* // Start the pipeline
|
|
1470
|
+
* await pipeline.start()
|
|
1471
|
+
*
|
|
1472
|
+
* // Process events
|
|
1473
|
+
* await pipeline.process(event)
|
|
1474
|
+
*
|
|
1475
|
+
* // Check metrics
|
|
1476
|
+
* const metrics = pipeline.getMetrics()
|
|
1477
|
+
*
|
|
1478
|
+
* // Stop gracefully
|
|
1479
|
+
* const result = await pipeline.stop()
|
|
1480
|
+
* console.log(`Flushed ${result.flushedCount} events on shutdown`)
|
|
1481
|
+
* ```
|
|
1482
|
+
*
|
|
1483
|
+
* @class CDCPipeline
|
|
256
1484
|
*/
|
|
257
1485
|
export declare class CDCPipeline {
|
|
1486
|
+
/**
|
|
1487
|
+
* Pipeline configuration.
|
|
1488
|
+
* @private
|
|
1489
|
+
*/
|
|
258
1490
|
private readonly config;
|
|
1491
|
+
/**
|
|
1492
|
+
* Current pipeline state.
|
|
1493
|
+
* @private
|
|
1494
|
+
*/
|
|
259
1495
|
private state;
|
|
1496
|
+
/**
|
|
1497
|
+
* Event batcher instance.
|
|
1498
|
+
* @private
|
|
1499
|
+
*/
|
|
260
1500
|
private batcher;
|
|
1501
|
+
/**
|
|
1502
|
+
* Parquet transformer instance.
|
|
1503
|
+
* @private
|
|
1504
|
+
*/
|
|
261
1505
|
private transformer;
|
|
1506
|
+
/**
|
|
1507
|
+
* Registered output handlers.
|
|
1508
|
+
* @private
|
|
1509
|
+
*/
|
|
262
1510
|
private outputHandlers;
|
|
1511
|
+
/**
|
|
1512
|
+
* Registered dead letter handlers.
|
|
1513
|
+
* @private
|
|
1514
|
+
*/
|
|
263
1515
|
private deadLetterHandlers;
|
|
1516
|
+
/**
|
|
1517
|
+
* Pipeline metrics.
|
|
1518
|
+
* @private
|
|
1519
|
+
*/
|
|
264
1520
|
private metrics;
|
|
1521
|
+
/**
|
|
1522
|
+
* Processing latency samples.
|
|
1523
|
+
* @private
|
|
1524
|
+
*/
|
|
265
1525
|
private processingLatencies;
|
|
1526
|
+
/**
|
|
1527
|
+
* Retry policy instance.
|
|
1528
|
+
* @private
|
|
1529
|
+
*/
|
|
266
1530
|
private retryPolicy;
|
|
1531
|
+
/**
|
|
1532
|
+
* Creates a new CDCPipeline.
|
|
1533
|
+
*
|
|
1534
|
+
* @param config - Pipeline configuration
|
|
1535
|
+
*/
|
|
267
1536
|
constructor(config: CDCPipelineConfig);
|
|
1537
|
+
/**
|
|
1538
|
+
* Returns the current pipeline state.
|
|
1539
|
+
*
|
|
1540
|
+
* @returns Current state ('stopped', 'running', or 'paused')
|
|
1541
|
+
*/
|
|
268
1542
|
getState(): CDCPipelineState;
|
|
1543
|
+
/**
|
|
1544
|
+
* Starts the pipeline.
|
|
1545
|
+
*
|
|
1546
|
+
* @description
|
|
1547
|
+
* Initializes the batcher and begins accepting events. If already
|
|
1548
|
+
* running, this method is a no-op.
|
|
1549
|
+
*
|
|
1550
|
+
* @example
|
|
1551
|
+
* ```typescript
|
|
1552
|
+
* await pipeline.start()
|
|
1553
|
+
* console.log(pipeline.getState()) // 'running'
|
|
1554
|
+
* ```
|
|
1555
|
+
*/
|
|
269
1556
|
start(): Promise<void>;
|
|
1557
|
+
/**
|
|
1558
|
+
* Stops the pipeline.
|
|
1559
|
+
*
|
|
1560
|
+
* @description
|
|
1561
|
+
* Flushes any pending events, stops the batcher, and sets state to stopped.
|
|
1562
|
+
* Returns information about events flushed during shutdown.
|
|
1563
|
+
*
|
|
1564
|
+
* @returns Promise resolving to stop result with flushed event count
|
|
1565
|
+
*
|
|
1566
|
+
* @example
|
|
1567
|
+
* ```typescript
|
|
1568
|
+
* const result = await pipeline.stop()
|
|
1569
|
+
* console.log(`Flushed ${result.flushedCount} events on shutdown`)
|
|
1570
|
+
* ```
|
|
1571
|
+
*/
|
|
270
1572
|
stop(): Promise<StopResult>;
|
|
1573
|
+
/**
|
|
1574
|
+
* Processes a single event.
|
|
1575
|
+
*
|
|
1576
|
+
* @description
|
|
1577
|
+
* Validates the event and adds it to the batcher for processing.
|
|
1578
|
+
* Updates metrics including latency tracking.
|
|
1579
|
+
*
|
|
1580
|
+
* @param event - The CDC event to process
|
|
1581
|
+
* @returns Promise resolving to process result
|
|
1582
|
+
*
|
|
1583
|
+
* @throws {CDCError} PROCESSING_ERROR - If pipeline is not running
|
|
1584
|
+
* @throws {CDCError} VALIDATION_ERROR - If event fails validation
|
|
1585
|
+
*
|
|
1586
|
+
* @example
|
|
1587
|
+
* ```typescript
|
|
1588
|
+
* const result = await pipeline.process(event)
|
|
1589
|
+
* if (result.success) {
|
|
1590
|
+
* console.log(`Processed event: ${result.eventId}`)
|
|
1591
|
+
* }
|
|
1592
|
+
* ```
|
|
1593
|
+
*/
|
|
271
1594
|
process(event: CDCEvent): Promise<ProcessResult>;
|
|
1595
|
+
/**
|
|
1596
|
+
* Processes multiple events.
|
|
1597
|
+
*
|
|
1598
|
+
* @description
|
|
1599
|
+
* Convenience method to process an array of events sequentially.
|
|
1600
|
+
*
|
|
1601
|
+
* @param events - Array of CDC events to process
|
|
1602
|
+
* @returns Promise resolving to array of process results
|
|
1603
|
+
*
|
|
1604
|
+
* @example
|
|
1605
|
+
* ```typescript
|
|
1606
|
+
* const results = await pipeline.processMany(events)
|
|
1607
|
+
* const successCount = results.filter(r => r.success).length
|
|
1608
|
+
* console.log(`Processed ${successCount}/${events.length} events`)
|
|
1609
|
+
* ```
|
|
1610
|
+
*/
|
|
272
1611
|
processMany(events: CDCEvent[]): Promise<ProcessResult[]>;
|
|
1612
|
+
/**
|
|
1613
|
+
* Manually flushes pending events.
|
|
1614
|
+
*
|
|
1615
|
+
* @description
|
|
1616
|
+
* Forces an immediate flush of the batcher and processes the
|
|
1617
|
+
* resulting batch through the pipeline.
|
|
1618
|
+
*
|
|
1619
|
+
* @example
|
|
1620
|
+
* ```typescript
|
|
1621
|
+
* await pipeline.flush()
|
|
1622
|
+
* console.log('All pending events flushed')
|
|
1623
|
+
* ```
|
|
1624
|
+
*/
|
|
273
1625
|
flush(): Promise<void>;
|
|
1626
|
+
/**
|
|
1627
|
+
* Handles a batch of events with retry logic.
|
|
1628
|
+
* @private
|
|
1629
|
+
*/
|
|
274
1630
|
private handleBatch;
|
|
1631
|
+
/**
|
|
1632
|
+
* Sleeps for the specified duration.
|
|
1633
|
+
* @private
|
|
1634
|
+
*/
|
|
275
1635
|
private sleep;
|
|
1636
|
+
/**
|
|
1637
|
+
* Updates the average latency metric.
|
|
1638
|
+
* @private
|
|
1639
|
+
*/
|
|
276
1640
|
private updateAvgLatency;
|
|
1641
|
+
/**
|
|
1642
|
+
* Returns current pipeline metrics.
|
|
1643
|
+
*
|
|
1644
|
+
* @description
|
|
1645
|
+
* Returns a copy of the current metrics. Metrics are cumulative
|
|
1646
|
+
* since pipeline creation.
|
|
1647
|
+
*
|
|
1648
|
+
* @returns Copy of current pipeline metrics
|
|
1649
|
+
*
|
|
1650
|
+
* @example
|
|
1651
|
+
* ```typescript
|
|
1652
|
+
* const metrics = pipeline.getMetrics()
|
|
1653
|
+
* console.log(`Processed: ${metrics.eventsProcessed}`)
|
|
1654
|
+
* console.log(`Batches: ${metrics.batchesGenerated}`)
|
|
1655
|
+
* console.log(`Errors: ${metrics.errors}`)
|
|
1656
|
+
* console.log(`Avg latency: ${metrics.avgProcessingLatencyMs}ms`)
|
|
1657
|
+
* ```
|
|
1658
|
+
*/
|
|
277
1659
|
getMetrics(): PipelineMetrics;
|
|
1660
|
+
/**
|
|
1661
|
+
* Registers an output handler.
|
|
1662
|
+
*
|
|
1663
|
+
* @description
|
|
1664
|
+
* Output handlers are called when a batch is successfully processed
|
|
1665
|
+
* and converted to Parquet format. Multiple handlers can be registered.
|
|
1666
|
+
*
|
|
1667
|
+
* @param handler - Callback to invoke for each successful batch
|
|
1668
|
+
*
|
|
1669
|
+
* @example
|
|
1670
|
+
* ```typescript
|
|
1671
|
+
* pipeline.onOutput(async (output) => {
|
|
1672
|
+
* await r2.put(`cdc/${output.batchId}.parquet`, output.parquetBuffer)
|
|
1673
|
+
* console.log(`Wrote ${output.events.length} events`)
|
|
1674
|
+
* })
|
|
1675
|
+
* ```
|
|
1676
|
+
*/
|
|
278
1677
|
onOutput(handler: OutputHandler): void;
|
|
1678
|
+
/**
|
|
1679
|
+
* Registers a dead letter handler.
|
|
1680
|
+
*
|
|
1681
|
+
* @description
|
|
1682
|
+
* Dead letter handlers are called when a batch fails after all
|
|
1683
|
+
* retry attempts are exhausted. Use this for alerting, logging,
|
|
1684
|
+
* or storing failed events for later reprocessing.
|
|
1685
|
+
*
|
|
1686
|
+
* @param handler - Callback to invoke for failed events
|
|
1687
|
+
*
|
|
1688
|
+
* @example
|
|
1689
|
+
* ```typescript
|
|
1690
|
+
* pipeline.onDeadLetter((events, error) => {
|
|
1691
|
+
* console.error(`Failed to process ${events.length} events:`, error)
|
|
1692
|
+
* // Store in dead letter queue for later retry
|
|
1693
|
+
* await dlq.put(events)
|
|
1694
|
+
* })
|
|
1695
|
+
* ```
|
|
1696
|
+
*/
|
|
279
1697
|
onDeadLetter(handler: DeadLetterHandler): void;
|
|
280
1698
|
}
|
|
281
1699
|
/**
|
|
282
|
-
*
|
|
1700
|
+
* Creates a new CDC event.
|
|
1701
|
+
*
|
|
1702
|
+
* @description
|
|
1703
|
+
* Factory function to create a properly structured CDC event with
|
|
1704
|
+
* automatically generated ID and timestamp.
|
|
1705
|
+
*
|
|
1706
|
+
* @param type - The event type
|
|
1707
|
+
* @param source - The event source
|
|
1708
|
+
* @param payload - Event payload data
|
|
1709
|
+
* @param options - Optional configuration
|
|
1710
|
+
* @param options.sequence - Custom sequence number (default: 0)
|
|
1711
|
+
* @returns A new CDCEvent
|
|
1712
|
+
*
|
|
1713
|
+
* @example
|
|
1714
|
+
* ```typescript
|
|
1715
|
+
* const event = createCDCEvent('COMMIT_CREATED', 'push', {
|
|
1716
|
+
* operation: 'commit-create',
|
|
1717
|
+
* sha: 'abc123...',
|
|
1718
|
+
* treeSha: 'def456...',
|
|
1719
|
+
* parentShas: ['parent1...']
|
|
1720
|
+
* })
|
|
1721
|
+
*
|
|
1722
|
+
* // With sequence number
|
|
1723
|
+
* const sequencedEvent = createCDCEvent('REF_UPDATED', 'push', {
|
|
1724
|
+
* operation: 'ref-update',
|
|
1725
|
+
* refName: 'refs/heads/main',
|
|
1726
|
+
* oldSha: 'old...',
|
|
1727
|
+
* newSha: 'new...'
|
|
1728
|
+
* }, { sequence: 42 })
|
|
1729
|
+
* ```
|
|
283
1730
|
*/
|
|
284
1731
|
export declare function createCDCEvent(type: CDCEventType, source: CDCEventSource, payload: CDCEventPayload, options?: {
|
|
285
1732
|
sequence?: number;
|
|
286
1733
|
}): CDCEvent;
|
|
287
1734
|
/**
|
|
288
|
-
*
|
|
1735
|
+
* Serializes a CDC event to bytes.
|
|
1736
|
+
*
|
|
1737
|
+
* @description
|
|
1738
|
+
* Converts a CDCEvent to a JSON-encoded Uint8Array for storage or
|
|
1739
|
+
* transmission. Handles Uint8Array payload data by converting to arrays.
|
|
1740
|
+
*
|
|
1741
|
+
* @param event - The CDC event to serialize
|
|
1742
|
+
* @returns The serialized event as a Uint8Array
|
|
1743
|
+
*
|
|
1744
|
+
* @example
|
|
1745
|
+
* ```typescript
|
|
1746
|
+
* const bytes = serializeEvent(event)
|
|
1747
|
+
* await r2.put(`events/${event.id}`, bytes)
|
|
1748
|
+
* ```
|
|
1749
|
+
*
|
|
1750
|
+
* @see {@link deserializeEvent} - Reverse operation
|
|
289
1751
|
*/
|
|
290
1752
|
export declare function serializeEvent(event: CDCEvent): Uint8Array;
|
|
291
1753
|
/**
|
|
292
|
-
*
|
|
1754
|
+
* Deserializes bytes to a CDC event.
|
|
1755
|
+
*
|
|
1756
|
+
* @description
|
|
1757
|
+
* Reconstructs a CDCEvent from JSON-encoded bytes. Handles Uint8Array
|
|
1758
|
+
* restoration for payload data that was converted to arrays during
|
|
1759
|
+
* serialization.
|
|
1760
|
+
*
|
|
1761
|
+
* @param bytes - The serialized event bytes
|
|
1762
|
+
* @returns The deserialized CDCEvent
|
|
1763
|
+
*
|
|
1764
|
+
* @example
|
|
1765
|
+
* ```typescript
|
|
1766
|
+
* const bytes = await r2.get(`events/${eventId}`)
|
|
1767
|
+
* const event = deserializeEvent(bytes)
|
|
1768
|
+
* console.log(`Event type: ${event.type}`)
|
|
1769
|
+
* ```
|
|
1770
|
+
*
|
|
1771
|
+
* @see {@link serializeEvent} - Reverse operation
|
|
293
1772
|
*/
|
|
294
1773
|
export declare function deserializeEvent(bytes: Uint8Array): CDCEvent;
|
|
295
1774
|
/**
|
|
296
|
-
*
|
|
1775
|
+
* Validates a CDC event.
|
|
1776
|
+
*
|
|
1777
|
+
* @description
|
|
1778
|
+
* Checks that an event has all required fields and valid values.
|
|
1779
|
+
* Throws a CDCError if validation fails.
|
|
1780
|
+
*
|
|
1781
|
+
* **Validation Rules:**
|
|
1782
|
+
* - Event must not be null/undefined
|
|
1783
|
+
* - Event ID must be a non-empty string
|
|
1784
|
+
* - Event type must be a valid CDCEventType
|
|
1785
|
+
* - Timestamp must be a non-negative number
|
|
1786
|
+
* - Sequence must be a non-negative number
|
|
1787
|
+
*
|
|
1788
|
+
* @param event - The CDC event to validate
|
|
1789
|
+
* @returns The validated event (for chaining)
|
|
1790
|
+
*
|
|
1791
|
+
* @throws {CDCError} VALIDATION_ERROR - If validation fails
|
|
1792
|
+
*
|
|
1793
|
+
* @example
|
|
1794
|
+
* ```typescript
|
|
1795
|
+
* try {
|
|
1796
|
+
* validateCDCEvent(event)
|
|
1797
|
+
* // Event is valid
|
|
1798
|
+
* } catch (error) {
|
|
1799
|
+
* if (error instanceof CDCError) {
|
|
1800
|
+
* console.log(`Invalid: ${error.message}`)
|
|
1801
|
+
* }
|
|
1802
|
+
* }
|
|
1803
|
+
* ```
|
|
297
1804
|
*/
|
|
298
1805
|
export declare function validateCDCEvent(event: CDCEvent): CDCEvent;
|
|
299
1806
|
/**
|
|
300
|
-
*
|
|
1807
|
+
* Starts a new pipeline with the given configuration.
|
|
1808
|
+
*
|
|
1809
|
+
* @description
|
|
1810
|
+
* Creates and starts a new CDCPipeline, registering it by ID for
|
|
1811
|
+
* later access. If a pipeline with the same ID already exists,
|
|
1812
|
+
* it will be replaced (the old pipeline is not automatically stopped).
|
|
1813
|
+
*
|
|
1814
|
+
* @param id - Unique identifier for the pipeline
|
|
1815
|
+
* @param config - Pipeline configuration
|
|
1816
|
+
* @returns The started pipeline instance
|
|
1817
|
+
*
|
|
1818
|
+
* @example
|
|
1819
|
+
* ```typescript
|
|
1820
|
+
* const pipeline = startPipeline('main', {
|
|
1821
|
+
* batchSize: 100,
|
|
1822
|
+
* flushIntervalMs: 5000,
|
|
1823
|
+
* maxRetries: 3,
|
|
1824
|
+
* parquetCompression: 'snappy',
|
|
1825
|
+
* outputPath: '/analytics',
|
|
1826
|
+
* schemaVersion: 1
|
|
1827
|
+
* })
|
|
1828
|
+
*
|
|
1829
|
+
* // Register handlers
|
|
1830
|
+
* pipeline.onOutput((output) => console.log(`Batch: ${output.batchId}`))
|
|
1831
|
+
* ```
|
|
301
1832
|
*/
|
|
302
1833
|
export declare function startPipeline(id: string, config: CDCPipelineConfig): CDCPipeline;
|
|
303
1834
|
/**
|
|
304
|
-
*
|
|
1835
|
+
* Stops a pipeline by ID.
|
|
1836
|
+
*
|
|
1837
|
+
* @description
|
|
1838
|
+
* Stops the pipeline identified by the given ID, flushing any pending
|
|
1839
|
+
* events and removing it from the registry.
|
|
1840
|
+
*
|
|
1841
|
+
* @param id - Pipeline identifier
|
|
1842
|
+
* @returns Promise resolving to stop result (0 if pipeline not found)
|
|
1843
|
+
*
|
|
1844
|
+
* @example
|
|
1845
|
+
* ```typescript
|
|
1846
|
+
* const result = await stopPipeline('main')
|
|
1847
|
+
* console.log(`Flushed ${result.flushedCount} events on shutdown`)
|
|
1848
|
+
* ```
|
|
305
1849
|
*/
|
|
306
1850
|
export declare function stopPipeline(id: string): Promise<StopResult>;
|
|
307
1851
|
/**
|
|
308
|
-
*
|
|
1852
|
+
* Flushes a pipeline by ID.
|
|
1853
|
+
*
|
|
1854
|
+
* @description
|
|
1855
|
+
* Forces an immediate flush of all pending events in the pipeline.
|
|
1856
|
+
* No-op if pipeline not found.
|
|
1857
|
+
*
|
|
1858
|
+
* @param id - Pipeline identifier
|
|
1859
|
+
*
|
|
1860
|
+
* @example
|
|
1861
|
+
* ```typescript
|
|
1862
|
+
* await flushPipeline('main')
|
|
1863
|
+
* console.log('All pending events flushed')
|
|
1864
|
+
* ```
|
|
309
1865
|
*/
|
|
310
1866
|
export declare function flushPipeline(id: string): Promise<void>;
|
|
311
1867
|
/**
|
|
312
|
-
*
|
|
1868
|
+
* Gets metrics for a pipeline by ID.
|
|
1869
|
+
*
|
|
1870
|
+
* @description
|
|
1871
|
+
* Returns a copy of the current metrics for the specified pipeline.
|
|
1872
|
+
* Returns null if the pipeline is not found.
|
|
1873
|
+
*
|
|
1874
|
+
* @param id - Pipeline identifier
|
|
1875
|
+
* @returns Pipeline metrics or null if not found
|
|
1876
|
+
*
|
|
1877
|
+
* @example
|
|
1878
|
+
* ```typescript
|
|
1879
|
+
* const metrics = getPipelineMetrics('main')
|
|
1880
|
+
* if (metrics) {
|
|
1881
|
+
* console.log(`Events processed: ${metrics.eventsProcessed}`)
|
|
1882
|
+
* console.log(`Errors: ${metrics.errors}`)
|
|
1883
|
+
* }
|
|
1884
|
+
* ```
|
|
313
1885
|
*/
|
|
314
1886
|
export declare function getPipelineMetrics(id: string): PipelineMetrics | null;
|
|
315
1887
|
export {};
|