gitx.do 0.1.0 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +40 -353
- package/dist/do/logger.d.ts +50 -0
- package/dist/do/logger.d.ts.map +1 -0
- package/dist/do/logger.js +122 -0
- package/dist/do/logger.js.map +1 -0
- package/dist/{durable-object → do}/schema.d.ts +3 -3
- package/dist/do/schema.d.ts.map +1 -0
- package/dist/{durable-object → do}/schema.js +4 -3
- package/dist/do/schema.js.map +1 -0
- package/dist/do/types.d.ts +267 -0
- package/dist/do/types.d.ts.map +1 -0
- package/dist/do/types.js +62 -0
- package/dist/do/types.js.map +1 -0
- package/dist/index.d.ts +15 -415
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +31 -483
- package/dist/index.js.map +1 -1
- package/package.json +13 -21
- package/dist/cli/commands/add.d.ts +0 -174
- package/dist/cli/commands/add.d.ts.map +0 -1
- package/dist/cli/commands/add.js +0 -131
- package/dist/cli/commands/add.js.map +0 -1
- package/dist/cli/commands/blame.d.ts +0 -259
- package/dist/cli/commands/blame.d.ts.map +0 -1
- package/dist/cli/commands/blame.js +0 -609
- package/dist/cli/commands/blame.js.map +0 -1
- package/dist/cli/commands/branch.d.ts +0 -249
- package/dist/cli/commands/branch.d.ts.map +0 -1
- package/dist/cli/commands/branch.js +0 -693
- package/dist/cli/commands/branch.js.map +0 -1
- package/dist/cli/commands/commit.d.ts +0 -182
- package/dist/cli/commands/commit.d.ts.map +0 -1
- package/dist/cli/commands/commit.js +0 -437
- package/dist/cli/commands/commit.js.map +0 -1
- package/dist/cli/commands/diff.d.ts +0 -464
- package/dist/cli/commands/diff.d.ts.map +0 -1
- package/dist/cli/commands/diff.js +0 -958
- package/dist/cli/commands/diff.js.map +0 -1
- package/dist/cli/commands/log.d.ts +0 -239
- package/dist/cli/commands/log.d.ts.map +0 -1
- package/dist/cli/commands/log.js +0 -535
- package/dist/cli/commands/log.js.map +0 -1
- package/dist/cli/commands/merge.d.ts +0 -106
- package/dist/cli/commands/merge.d.ts.map +0 -1
- package/dist/cli/commands/merge.js +0 -55
- package/dist/cli/commands/merge.js.map +0 -1
- package/dist/cli/commands/review.d.ts +0 -457
- package/dist/cli/commands/review.d.ts.map +0 -1
- package/dist/cli/commands/review.js +0 -533
- package/dist/cli/commands/review.js.map +0 -1
- package/dist/cli/commands/status.d.ts +0 -269
- package/dist/cli/commands/status.d.ts.map +0 -1
- package/dist/cli/commands/status.js +0 -493
- package/dist/cli/commands/status.js.map +0 -1
- package/dist/cli/commands/web.d.ts +0 -199
- package/dist/cli/commands/web.d.ts.map +0 -1
- package/dist/cli/commands/web.js +0 -696
- package/dist/cli/commands/web.js.map +0 -1
- package/dist/cli/fs-adapter.d.ts +0 -656
- package/dist/cli/fs-adapter.d.ts.map +0 -1
- package/dist/cli/fs-adapter.js +0 -1179
- package/dist/cli/fs-adapter.js.map +0 -1
- package/dist/cli/fsx-cli-adapter.d.ts +0 -359
- package/dist/cli/fsx-cli-adapter.d.ts.map +0 -1
- package/dist/cli/fsx-cli-adapter.js +0 -619
- package/dist/cli/fsx-cli-adapter.js.map +0 -1
- package/dist/cli/index.d.ts +0 -387
- package/dist/cli/index.d.ts.map +0 -1
- package/dist/cli/index.js +0 -523
- package/dist/cli/index.js.map +0 -1
- package/dist/cli/ui/components/DiffView.d.ts +0 -7
- package/dist/cli/ui/components/DiffView.d.ts.map +0 -1
- package/dist/cli/ui/components/DiffView.js +0 -11
- package/dist/cli/ui/components/DiffView.js.map +0 -1
- package/dist/cli/ui/components/ErrorDisplay.d.ts +0 -6
- package/dist/cli/ui/components/ErrorDisplay.d.ts.map +0 -1
- package/dist/cli/ui/components/ErrorDisplay.js +0 -11
- package/dist/cli/ui/components/ErrorDisplay.js.map +0 -1
- package/dist/cli/ui/components/FuzzySearch.d.ts +0 -9
- package/dist/cli/ui/components/FuzzySearch.d.ts.map +0 -1
- package/dist/cli/ui/components/FuzzySearch.js +0 -12
- package/dist/cli/ui/components/FuzzySearch.js.map +0 -1
- package/dist/cli/ui/components/LoadingSpinner.d.ts +0 -6
- package/dist/cli/ui/components/LoadingSpinner.d.ts.map +0 -1
- package/dist/cli/ui/components/LoadingSpinner.js +0 -10
- package/dist/cli/ui/components/LoadingSpinner.js.map +0 -1
- package/dist/cli/ui/components/NavigationList.d.ts +0 -9
- package/dist/cli/ui/components/NavigationList.d.ts.map +0 -1
- package/dist/cli/ui/components/NavigationList.js +0 -11
- package/dist/cli/ui/components/NavigationList.js.map +0 -1
- package/dist/cli/ui/components/ScrollableContent.d.ts +0 -8
- package/dist/cli/ui/components/ScrollableContent.d.ts.map +0 -1
- package/dist/cli/ui/components/ScrollableContent.js +0 -11
- package/dist/cli/ui/components/ScrollableContent.js.map +0 -1
- package/dist/cli/ui/components/index.d.ts +0 -7
- package/dist/cli/ui/components/index.d.ts.map +0 -1
- package/dist/cli/ui/components/index.js +0 -9
- package/dist/cli/ui/components/index.js.map +0 -1
- package/dist/cli/ui/terminal-ui.d.ts +0 -52
- package/dist/cli/ui/terminal-ui.d.ts.map +0 -1
- package/dist/cli/ui/terminal-ui.js +0 -121
- package/dist/cli/ui/terminal-ui.js.map +0 -1
- package/dist/do/BashModule.d.ts +0 -871
- package/dist/do/BashModule.d.ts.map +0 -1
- package/dist/do/BashModule.js +0 -1143
- package/dist/do/BashModule.js.map +0 -1
- package/dist/do/FsModule.d.ts +0 -601
- package/dist/do/FsModule.d.ts.map +0 -1
- package/dist/do/FsModule.js +0 -1120
- package/dist/do/FsModule.js.map +0 -1
- package/dist/do/GitModule.d.ts +0 -635
- package/dist/do/GitModule.d.ts.map +0 -1
- package/dist/do/GitModule.js +0 -781
- package/dist/do/GitModule.js.map +0 -1
- package/dist/do/GitRepoDO.d.ts +0 -281
- package/dist/do/GitRepoDO.d.ts.map +0 -1
- package/dist/do/GitRepoDO.js +0 -479
- package/dist/do/GitRepoDO.js.map +0 -1
- package/dist/do/bash-ast.d.ts +0 -246
- package/dist/do/bash-ast.d.ts.map +0 -1
- package/dist/do/bash-ast.js +0 -888
- package/dist/do/bash-ast.js.map +0 -1
- package/dist/do/container-executor.d.ts +0 -491
- package/dist/do/container-executor.d.ts.map +0 -1
- package/dist/do/container-executor.js +0 -730
- package/dist/do/container-executor.js.map +0 -1
- package/dist/do/index.d.ts +0 -53
- package/dist/do/index.d.ts.map +0 -1
- package/dist/do/index.js +0 -91
- package/dist/do/index.js.map +0 -1
- package/dist/do/tiered-storage.d.ts +0 -403
- package/dist/do/tiered-storage.d.ts.map +0 -1
- package/dist/do/tiered-storage.js +0 -689
- package/dist/do/tiered-storage.js.map +0 -1
- package/dist/do/withBash.d.ts +0 -231
- package/dist/do/withBash.d.ts.map +0 -1
- package/dist/do/withBash.js +0 -244
- package/dist/do/withBash.js.map +0 -1
- package/dist/do/withFs.d.ts +0 -237
- package/dist/do/withFs.d.ts.map +0 -1
- package/dist/do/withFs.js +0 -387
- package/dist/do/withFs.js.map +0 -1
- package/dist/do/withGit.d.ts +0 -180
- package/dist/do/withGit.d.ts.map +0 -1
- package/dist/do/withGit.js +0 -271
- package/dist/do/withGit.js.map +0 -1
- package/dist/durable-object/object-store.d.ts +0 -633
- package/dist/durable-object/object-store.d.ts.map +0 -1
- package/dist/durable-object/object-store.js +0 -1161
- package/dist/durable-object/object-store.js.map +0 -1
- package/dist/durable-object/schema.d.ts.map +0 -1
- package/dist/durable-object/schema.js.map +0 -1
- package/dist/durable-object/wal.d.ts +0 -416
- package/dist/durable-object/wal.d.ts.map +0 -1
- package/dist/durable-object/wal.js +0 -445
- package/dist/durable-object/wal.js.map +0 -1
- package/dist/mcp/adapter.d.ts +0 -772
- package/dist/mcp/adapter.d.ts.map +0 -1
- package/dist/mcp/adapter.js +0 -895
- package/dist/mcp/adapter.js.map +0 -1
- package/dist/mcp/sandbox/miniflare-evaluator.d.ts +0 -22
- package/dist/mcp/sandbox/miniflare-evaluator.d.ts.map +0 -1
- package/dist/mcp/sandbox/miniflare-evaluator.js +0 -140
- package/dist/mcp/sandbox/miniflare-evaluator.js.map +0 -1
- package/dist/mcp/sandbox/object-store-proxy.d.ts +0 -32
- package/dist/mcp/sandbox/object-store-proxy.d.ts.map +0 -1
- package/dist/mcp/sandbox/object-store-proxy.js +0 -30
- package/dist/mcp/sandbox/object-store-proxy.js.map +0 -1
- package/dist/mcp/sandbox/template.d.ts +0 -17
- package/dist/mcp/sandbox/template.d.ts.map +0 -1
- package/dist/mcp/sandbox/template.js +0 -71
- package/dist/mcp/sandbox/template.js.map +0 -1
- package/dist/mcp/sandbox.d.ts +0 -764
- package/dist/mcp/sandbox.d.ts.map +0 -1
- package/dist/mcp/sandbox.js +0 -1362
- package/dist/mcp/sandbox.js.map +0 -1
- package/dist/mcp/sdk-adapter.d.ts +0 -835
- package/dist/mcp/sdk-adapter.d.ts.map +0 -1
- package/dist/mcp/sdk-adapter.js +0 -974
- package/dist/mcp/sdk-adapter.js.map +0 -1
- package/dist/mcp/tools/do.d.ts +0 -32
- package/dist/mcp/tools/do.d.ts.map +0 -1
- package/dist/mcp/tools/do.js +0 -115
- package/dist/mcp/tools/do.js.map +0 -1
- package/dist/mcp/tools.d.ts +0 -548
- package/dist/mcp/tools.d.ts.map +0 -1
- package/dist/mcp/tools.js +0 -1934
- package/dist/mcp/tools.js.map +0 -1
- package/dist/ops/blame.d.ts +0 -551
- package/dist/ops/blame.d.ts.map +0 -1
- package/dist/ops/blame.js +0 -1037
- package/dist/ops/blame.js.map +0 -1
- package/dist/ops/branch.d.ts +0 -766
- package/dist/ops/branch.d.ts.map +0 -1
- package/dist/ops/branch.js +0 -950
- package/dist/ops/branch.js.map +0 -1
- package/dist/ops/commit-traversal.d.ts +0 -349
- package/dist/ops/commit-traversal.d.ts.map +0 -1
- package/dist/ops/commit-traversal.js +0 -821
- package/dist/ops/commit-traversal.js.map +0 -1
- package/dist/ops/commit.d.ts +0 -555
- package/dist/ops/commit.d.ts.map +0 -1
- package/dist/ops/commit.js +0 -826
- package/dist/ops/commit.js.map +0 -1
- package/dist/ops/merge-base.d.ts +0 -397
- package/dist/ops/merge-base.d.ts.map +0 -1
- package/dist/ops/merge-base.js +0 -691
- package/dist/ops/merge-base.js.map +0 -1
- package/dist/ops/merge.d.ts +0 -855
- package/dist/ops/merge.d.ts.map +0 -1
- package/dist/ops/merge.js +0 -1551
- package/dist/ops/merge.js.map +0 -1
- package/dist/ops/tag.d.ts +0 -247
- package/dist/ops/tag.d.ts.map +0 -1
- package/dist/ops/tag.js +0 -649
- package/dist/ops/tag.js.map +0 -1
- package/dist/ops/tree-builder.d.ts +0 -178
- package/dist/ops/tree-builder.d.ts.map +0 -1
- package/dist/ops/tree-builder.js +0 -271
- package/dist/ops/tree-builder.js.map +0 -1
- package/dist/ops/tree-diff.d.ts +0 -291
- package/dist/ops/tree-diff.d.ts.map +0 -1
- package/dist/ops/tree-diff.js +0 -705
- package/dist/ops/tree-diff.js.map +0 -1
- package/dist/pack/delta.d.ts +0 -248
- package/dist/pack/delta.d.ts.map +0 -1
- package/dist/pack/delta.js +0 -736
- package/dist/pack/delta.js.map +0 -1
- package/dist/pack/format.d.ts +0 -446
- package/dist/pack/format.d.ts.map +0 -1
- package/dist/pack/format.js +0 -572
- package/dist/pack/format.js.map +0 -1
- package/dist/pack/full-generation.d.ts +0 -612
- package/dist/pack/full-generation.d.ts.map +0 -1
- package/dist/pack/full-generation.js +0 -1378
- package/dist/pack/full-generation.js.map +0 -1
- package/dist/pack/generation.d.ts +0 -441
- package/dist/pack/generation.d.ts.map +0 -1
- package/dist/pack/generation.js +0 -707
- package/dist/pack/generation.js.map +0 -1
- package/dist/pack/index.d.ts +0 -502
- package/dist/pack/index.d.ts.map +0 -1
- package/dist/pack/index.js +0 -833
- package/dist/pack/index.js.map +0 -1
- package/dist/refs/branch.d.ts +0 -668
- package/dist/refs/branch.d.ts.map +0 -1
- package/dist/refs/branch.js +0 -897
- package/dist/refs/branch.js.map +0 -1
- package/dist/refs/storage.d.ts +0 -833
- package/dist/refs/storage.d.ts.map +0 -1
- package/dist/refs/storage.js +0 -1023
- package/dist/refs/storage.js.map +0 -1
- package/dist/refs/tag.d.ts +0 -860
- package/dist/refs/tag.d.ts.map +0 -1
- package/dist/refs/tag.js +0 -996
- package/dist/refs/tag.js.map +0 -1
- package/dist/storage/backend.d.ts +0 -425
- package/dist/storage/backend.d.ts.map +0 -1
- package/dist/storage/backend.js +0 -41
- package/dist/storage/backend.js.map +0 -1
- package/dist/storage/fsx-adapter.d.ts +0 -204
- package/dist/storage/fsx-adapter.d.ts.map +0 -1
- package/dist/storage/fsx-adapter.js +0 -470
- package/dist/storage/fsx-adapter.js.map +0 -1
- package/dist/storage/lru-cache.d.ts +0 -691
- package/dist/storage/lru-cache.d.ts.map +0 -1
- package/dist/storage/lru-cache.js +0 -813
- package/dist/storage/lru-cache.js.map +0 -1
- package/dist/storage/object-index.d.ts +0 -585
- package/dist/storage/object-index.d.ts.map +0 -1
- package/dist/storage/object-index.js +0 -532
- package/dist/storage/object-index.js.map +0 -1
- package/dist/storage/r2-pack.d.ts +0 -1257
- package/dist/storage/r2-pack.d.ts.map +0 -1
- package/dist/storage/r2-pack.js +0 -1770
- package/dist/storage/r2-pack.js.map +0 -1
- package/dist/tiered/cdc-pipeline.d.ts +0 -1888
- package/dist/tiered/cdc-pipeline.d.ts.map +0 -1
- package/dist/tiered/cdc-pipeline.js +0 -1880
- package/dist/tiered/cdc-pipeline.js.map +0 -1
- package/dist/tiered/migration.d.ts +0 -1104
- package/dist/tiered/migration.d.ts.map +0 -1
- package/dist/tiered/migration.js +0 -1214
- package/dist/tiered/migration.js.map +0 -1
- package/dist/tiered/parquet-writer.d.ts +0 -1145
- package/dist/tiered/parquet-writer.d.ts.map +0 -1
- package/dist/tiered/parquet-writer.js +0 -1183
- package/dist/tiered/parquet-writer.js.map +0 -1
- package/dist/tiered/read-path.d.ts +0 -835
- package/dist/tiered/read-path.d.ts.map +0 -1
- package/dist/tiered/read-path.js +0 -487
- package/dist/tiered/read-path.js.map +0 -1
- package/dist/types/capability.d.ts +0 -1385
- package/dist/types/capability.d.ts.map +0 -1
- package/dist/types/capability.js +0 -36
- package/dist/types/capability.js.map +0 -1
- package/dist/types/index.d.ts +0 -13
- package/dist/types/index.d.ts.map +0 -1
- package/dist/types/index.js +0 -18
- package/dist/types/index.js.map +0 -1
- package/dist/types/objects.d.ts +0 -692
- package/dist/types/objects.d.ts.map +0 -1
- package/dist/types/objects.js +0 -837
- package/dist/types/objects.js.map +0 -1
- package/dist/types/storage.d.ts +0 -603
- package/dist/types/storage.d.ts.map +0 -1
- package/dist/types/storage.js +0 -191
- package/dist/types/storage.js.map +0 -1
- package/dist/types/worker-loader.d.ts +0 -60
- package/dist/types/worker-loader.d.ts.map +0 -1
- package/dist/types/worker-loader.js +0 -62
- package/dist/types/worker-loader.js.map +0 -1
- package/dist/utils/hash.d.ts +0 -197
- package/dist/utils/hash.d.ts.map +0 -1
- package/dist/utils/hash.js +0 -268
- package/dist/utils/hash.js.map +0 -1
- package/dist/utils/sha1.d.ts +0 -290
- package/dist/utils/sha1.d.ts.map +0 -1
- package/dist/utils/sha1.js +0 -582
- package/dist/utils/sha1.js.map +0 -1
- package/dist/wire/capabilities.d.ts +0 -1044
- package/dist/wire/capabilities.d.ts.map +0 -1
- package/dist/wire/capabilities.js +0 -941
- package/dist/wire/capabilities.js.map +0 -1
- package/dist/wire/path-security.d.ts +0 -157
- package/dist/wire/path-security.d.ts.map +0 -1
- package/dist/wire/path-security.js +0 -307
- package/dist/wire/path-security.js.map +0 -1
- package/dist/wire/pkt-line.d.ts +0 -345
- package/dist/wire/pkt-line.d.ts.map +0 -1
- package/dist/wire/pkt-line.js +0 -381
- package/dist/wire/pkt-line.js.map +0 -1
- package/dist/wire/receive-pack.d.ts +0 -1059
- package/dist/wire/receive-pack.d.ts.map +0 -1
- package/dist/wire/receive-pack.js +0 -1414
- package/dist/wire/receive-pack.js.map +0 -1
- package/dist/wire/smart-http.d.ts +0 -799
- package/dist/wire/smart-http.d.ts.map +0 -1
- package/dist/wire/smart-http.js +0 -945
- package/dist/wire/smart-http.js.map +0 -1
- package/dist/wire/upload-pack.d.ts +0 -727
- package/dist/wire/upload-pack.d.ts.map +0 -1
- package/dist/wire/upload-pack.js +0 -1138
- package/dist/wire/upload-pack.js.map +0 -1
|
@@ -1,1880 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* @fileoverview CDC (Change Data Capture) Pipeline for Git Operations
|
|
3
|
-
*
|
|
4
|
-
* @description
|
|
5
|
-
* This module provides a comprehensive Change Data Capture system for Git operations,
|
|
6
|
-
* enabling real-time event streaming, transformation, and analytics for Git repository events.
|
|
7
|
-
*
|
|
8
|
-
* ## Key Features
|
|
9
|
-
*
|
|
10
|
-
* - **Event Capture**: Captures git operations (push, fetch, commits, branches, tags, merges)
|
|
11
|
-
* - **Parquet Transformation**: Converts events to columnar Parquet format for analytics
|
|
12
|
-
* - **Batching**: Efficient event batching with configurable size and time-based flushing
|
|
13
|
-
* - **Retry Policies**: Configurable exponential backoff with jitter for resilient processing
|
|
14
|
-
* - **Dead Letter Queue**: Handles failed events for later reprocessing
|
|
15
|
-
* - **Metrics**: Built-in tracking for events processed, batches, errors, and latency
|
|
16
|
-
*
|
|
17
|
-
* ## Architecture
|
|
18
|
-
*
|
|
19
|
-
* The pipeline consists of several components:
|
|
20
|
-
* 1. **CDCEventCapture**: Captures git operations and converts them to CDCEvents
|
|
21
|
-
* 2. **CDCBatcher**: Batches events for efficient processing
|
|
22
|
-
* 3. **ParquetTransformer**: Transforms events to Parquet format
|
|
23
|
-
* 4. **CDCPipeline**: Orchestrates the entire flow with error handling
|
|
24
|
-
*
|
|
25
|
-
* ## Event Flow
|
|
26
|
-
*
|
|
27
|
-
* ```
|
|
28
|
-
* Git Operation -> CDCEventCapture -> CDCBatcher -> ParquetTransformer -> Output
|
|
29
|
-
* |
|
|
30
|
-
* v
|
|
31
|
-
* (On failure) Dead Letter Queue
|
|
32
|
-
* ```
|
|
33
|
-
*
|
|
34
|
-
* @module tiered/cdc-pipeline
|
|
35
|
-
*
|
|
36
|
-
* @example
|
|
37
|
-
* ```typescript
|
|
38
|
-
* // Create and start a pipeline
|
|
39
|
-
* const pipeline = new CDCPipeline({
|
|
40
|
-
* batchSize: 100,
|
|
41
|
-
* flushIntervalMs: 5000,
|
|
42
|
-
* maxRetries: 3,
|
|
43
|
-
* parquetCompression: 'snappy',
|
|
44
|
-
* outputPath: '/analytics',
|
|
45
|
-
* schemaVersion: 1
|
|
46
|
-
* })
|
|
47
|
-
*
|
|
48
|
-
* await pipeline.start()
|
|
49
|
-
*
|
|
50
|
-
* // Process events
|
|
51
|
-
* pipeline.onOutput((output) => {
|
|
52
|
-
* console.log(`Generated batch: ${output.batchId}`)
|
|
53
|
-
* console.log(`Events: ${output.events.length}`)
|
|
54
|
-
* console.log(`Parquet size: ${output.parquetBuffer.length} bytes`)
|
|
55
|
-
* })
|
|
56
|
-
*
|
|
57
|
-
* pipeline.onDeadLetter((events, error) => {
|
|
58
|
-
* console.error(`Failed events: ${events.length}`, error)
|
|
59
|
-
* })
|
|
60
|
-
*
|
|
61
|
-
* // Create and process an event
|
|
62
|
-
* const event = createCDCEvent('COMMIT_CREATED', 'push', {
|
|
63
|
-
* operation: 'commit-create',
|
|
64
|
-
* sha: 'abc123...',
|
|
65
|
-
* treeSha: 'def456...',
|
|
66
|
-
* parentShas: ['parent1...']
|
|
67
|
-
* })
|
|
68
|
-
*
|
|
69
|
-
* await pipeline.process(event)
|
|
70
|
-
*
|
|
71
|
-
* // Get metrics
|
|
72
|
-
* const metrics = pipeline.getMetrics()
|
|
73
|
-
* console.log(`Processed: ${metrics.eventsProcessed}`)
|
|
74
|
-
* console.log(`Batches: ${metrics.batchesGenerated}`)
|
|
75
|
-
*
|
|
76
|
-
* // Stop the pipeline
|
|
77
|
-
* await pipeline.stop()
|
|
78
|
-
* ```
|
|
79
|
-
*
|
|
80
|
-
* @see {@link CDCPipeline} - Main pipeline orchestration class
|
|
81
|
-
* @see {@link CDCEventCapture} - Event capture from git operations
|
|
82
|
-
* @see {@link ParquetTransformer} - Parquet format transformation
|
|
83
|
-
*/
|
|
84
|
-
// ============================================================================
|
|
85
|
-
// Error Classes
|
|
86
|
-
// ============================================================================
|
|
87
|
-
/**
|
|
88
|
-
* Custom error class for CDC operations.
|
|
89
|
-
*
|
|
90
|
-
* @description
|
|
91
|
-
* CDCError provides structured error information for CDC pipeline failures,
|
|
92
|
-
* including an error type for programmatic handling and optional cause for
|
|
93
|
-
* error chaining.
|
|
94
|
-
*
|
|
95
|
-
* @example
|
|
96
|
-
* ```typescript
|
|
97
|
-
* try {
|
|
98
|
-
* await pipeline.process(event)
|
|
99
|
-
* } catch (error) {
|
|
100
|
-
* if (error instanceof CDCError) {
|
|
101
|
-
* switch (error.type) {
|
|
102
|
-
* case 'VALIDATION_ERROR':
|
|
103
|
-
* console.log('Invalid event:', error.message)
|
|
104
|
-
* break
|
|
105
|
-
* case 'PROCESSING_ERROR':
|
|
106
|
-
* console.log('Processing failed:', error.message)
|
|
107
|
-
* if (error.cause) {
|
|
108
|
-
* console.log('Caused by:', error.cause.message)
|
|
109
|
-
* }
|
|
110
|
-
* break
|
|
111
|
-
* }
|
|
112
|
-
* }
|
|
113
|
-
* }
|
|
114
|
-
* ```
|
|
115
|
-
*
|
|
116
|
-
* @class CDCError
|
|
117
|
-
* @extends Error
|
|
118
|
-
*/
|
|
119
|
-
export class CDCError extends Error {
|
|
120
|
-
type;
|
|
121
|
-
cause;
|
|
122
|
-
/**
|
|
123
|
-
* Creates a new CDCError.
|
|
124
|
-
*
|
|
125
|
-
* @param type - Error type for categorization
|
|
126
|
-
* @param message - Human-readable error message
|
|
127
|
-
* @param cause - Optional underlying error that caused this error
|
|
128
|
-
*/
|
|
129
|
-
constructor(type, message, cause) {
|
|
130
|
-
super(message);
|
|
131
|
-
this.type = type;
|
|
132
|
-
this.cause = cause;
|
|
133
|
-
this.name = 'CDCError';
|
|
134
|
-
}
|
|
135
|
-
}
|
|
136
|
-
/**
|
|
137
|
-
* Retry policy implementing exponential backoff with optional jitter.
|
|
138
|
-
*
|
|
139
|
-
* @description
|
|
140
|
-
* Provides a robust retry mechanism for handling transient failures.
|
|
141
|
-
* Uses exponential backoff to space out retry attempts, with optional
|
|
142
|
-
* jitter to prevent synchronized retries from multiple clients.
|
|
143
|
-
*
|
|
144
|
-
* **Backoff Formula:**
|
|
145
|
-
* `delay = min(initialDelay * (multiplier ^ attempt), maxDelay)`
|
|
146
|
-
*
|
|
147
|
-
* **With Jitter:**
|
|
148
|
-
* `delay = delay * random(0.5, 1.5)`
|
|
149
|
-
*
|
|
150
|
-
* @example
|
|
151
|
-
* ```typescript
|
|
152
|
-
* const policy = new CDCRetryPolicy({
|
|
153
|
-
* maxRetries: 3,
|
|
154
|
-
* initialDelayMs: 100,
|
|
155
|
-
* maxDelayMs: 5000,
|
|
156
|
-
* backoffMultiplier: 2,
|
|
157
|
-
* jitter: true
|
|
158
|
-
* })
|
|
159
|
-
*
|
|
160
|
-
* let attempts = 0
|
|
161
|
-
* while (attempts < 10) {
|
|
162
|
-
* try {
|
|
163
|
-
* await doOperation()
|
|
164
|
-
* break
|
|
165
|
-
* } catch (error) {
|
|
166
|
-
* attempts++
|
|
167
|
-
* if (!policy.shouldRetry(attempts)) {
|
|
168
|
-
* throw new Error('Max retries exceeded')
|
|
169
|
-
* }
|
|
170
|
-
* const delay = policy.getDelay(attempts)
|
|
171
|
-
* console.log(`Retry ${attempts} after ${delay}ms`)
|
|
172
|
-
* await sleep(delay)
|
|
173
|
-
* }
|
|
174
|
-
* }
|
|
175
|
-
* ```
|
|
176
|
-
*
|
|
177
|
-
* @class CDCRetryPolicy
|
|
178
|
-
*/
|
|
179
|
-
export class CDCRetryPolicy {
|
|
180
|
-
/**
|
|
181
|
-
* Retry configuration.
|
|
182
|
-
* @private
|
|
183
|
-
*/
|
|
184
|
-
config;
|
|
185
|
-
/**
|
|
186
|
-
* Creates a new retry policy.
|
|
187
|
-
*
|
|
188
|
-
* @param config - Retry policy configuration
|
|
189
|
-
*/
|
|
190
|
-
constructor(config) {
|
|
191
|
-
this.config = config;
|
|
192
|
-
}
|
|
193
|
-
/**
|
|
194
|
-
* Determines whether another retry should be attempted.
|
|
195
|
-
*
|
|
196
|
-
* @param attemptCount - Number of attempts already made
|
|
197
|
-
* @returns true if more retries are allowed, false otherwise
|
|
198
|
-
*
|
|
199
|
-
* @example
|
|
200
|
-
* ```typescript
|
|
201
|
-
* if (policy.shouldRetry(3)) {
|
|
202
|
-
* // Retry is allowed
|
|
203
|
-
* }
|
|
204
|
-
* ```
|
|
205
|
-
*/
|
|
206
|
-
shouldRetry(attemptCount) {
|
|
207
|
-
return attemptCount < this.config.maxRetries;
|
|
208
|
-
}
|
|
209
|
-
/**
|
|
210
|
-
* Calculates the delay before the next retry.
|
|
211
|
-
*
|
|
212
|
-
* @description
|
|
213
|
-
* Computes delay using exponential backoff, capped at maxDelayMs.
|
|
214
|
-
* If jitter is enabled, applies a random factor between 0.5x and 1.5x.
|
|
215
|
-
*
|
|
216
|
-
* @param attemptCount - Number of attempts already made (1-indexed)
|
|
217
|
-
* @returns Delay in milliseconds before next retry
|
|
218
|
-
*
|
|
219
|
-
* @example
|
|
220
|
-
* ```typescript
|
|
221
|
-
* // With initialDelay=100, multiplier=2:
|
|
222
|
-
* // Attempt 1: 100ms * 2^0 = 100ms
|
|
223
|
-
* // Attempt 2: 100ms * 2^1 = 200ms
|
|
224
|
-
* // Attempt 3: 100ms * 2^2 = 400ms
|
|
225
|
-
* const delay = policy.getDelay(attemptCount)
|
|
226
|
-
* await sleep(delay)
|
|
227
|
-
* ```
|
|
228
|
-
*/
|
|
229
|
-
getDelay(attemptCount) {
|
|
230
|
-
let delay = this.config.initialDelayMs * Math.pow(this.config.backoffMultiplier, attemptCount);
|
|
231
|
-
delay = Math.min(delay, this.config.maxDelayMs);
|
|
232
|
-
if (this.config.jitter) {
|
|
233
|
-
// Add random jitter between 0.5x and 1.5x
|
|
234
|
-
const jitterFactor = 0.5 + Math.random();
|
|
235
|
-
delay = Math.floor(delay * jitterFactor);
|
|
236
|
-
}
|
|
237
|
-
return delay;
|
|
238
|
-
}
|
|
239
|
-
}
|
|
240
|
-
// ============================================================================
|
|
241
|
-
// CDC Event Capture
|
|
242
|
-
// ============================================================================
|
|
243
|
-
/**
|
|
244
|
-
* Captures git operations and converts them to CDC events.
|
|
245
|
-
*
|
|
246
|
-
* @description
|
|
247
|
-
* CDCEventCapture hooks into git operations and generates CDCEvents for each
|
|
248
|
-
* operation. It maintains an internal buffer of events that can be flushed
|
|
249
|
-
* manually or automatically when the buffer reaches a configured size.
|
|
250
|
-
*
|
|
251
|
-
* **Supported Operations:**
|
|
252
|
-
* - Object creation/deletion (blobs, trees, commits, tags)
|
|
253
|
-
* - Reference updates (branches, tags)
|
|
254
|
-
* - Commit creation
|
|
255
|
-
* - Pack reception
|
|
256
|
-
* - Branch creation/deletion
|
|
257
|
-
* - Tag creation
|
|
258
|
-
* - Merge completion
|
|
259
|
-
*
|
|
260
|
-
* **Event Ordering:**
|
|
261
|
-
* Events are assigned monotonically increasing sequence numbers within a
|
|
262
|
-
* capture session. This ensures proper ordering for replay and analytics.
|
|
263
|
-
*
|
|
264
|
-
* @example
|
|
265
|
-
* ```typescript
|
|
266
|
-
* const capture = new CDCEventCapture({ maxBufferSize: 100 })
|
|
267
|
-
*
|
|
268
|
-
* // Add a listener for real-time processing
|
|
269
|
-
* capture.addListener((event) => {
|
|
270
|
-
* console.log(`Event: ${event.type} - ${event.id}`)
|
|
271
|
-
* })
|
|
272
|
-
*
|
|
273
|
-
* // Capture git operations
|
|
274
|
-
* await capture.onCommitCreated('abc123...', 'tree456...', ['parent789...'])
|
|
275
|
-
* await capture.onRefUpdate('refs/heads/main', 'old...', 'new...')
|
|
276
|
-
*
|
|
277
|
-
* // Get buffered events
|
|
278
|
-
* console.log(`Buffer size: ${capture.getBufferSize()}`)
|
|
279
|
-
*
|
|
280
|
-
* // Flush buffer
|
|
281
|
-
* const events = await capture.flush()
|
|
282
|
-
* console.log(`Flushed ${events.length} events`)
|
|
283
|
-
* ```
|
|
284
|
-
*
|
|
285
|
-
* @class CDCEventCapture
|
|
286
|
-
*/
|
|
287
|
-
export class CDCEventCapture {
|
|
288
|
-
/**
|
|
289
|
-
* Buffer of captured events.
|
|
290
|
-
* @private
|
|
291
|
-
*/
|
|
292
|
-
events = [];
|
|
293
|
-
/**
|
|
294
|
-
* Monotonically increasing sequence counter.
|
|
295
|
-
* @private
|
|
296
|
-
*/
|
|
297
|
-
sequenceCounter = 0;
|
|
298
|
-
/**
|
|
299
|
-
* Registered event listeners.
|
|
300
|
-
* @private
|
|
301
|
-
*/
|
|
302
|
-
listeners = [];
|
|
303
|
-
/**
|
|
304
|
-
* Maximum buffer size before auto-flush.
|
|
305
|
-
* @private
|
|
306
|
-
*/
|
|
307
|
-
maxBufferSize;
|
|
308
|
-
/**
|
|
309
|
-
* Creates a new CDC event capture instance.
|
|
310
|
-
*
|
|
311
|
-
* @param options - Configuration options
|
|
312
|
-
*/
|
|
313
|
-
constructor(options = {}) {
|
|
314
|
-
this.maxBufferSize = options.maxBufferSize ?? Infinity;
|
|
315
|
-
}
|
|
316
|
-
/**
|
|
317
|
-
* Generates a unique event ID.
|
|
318
|
-
* @private
|
|
319
|
-
*/
|
|
320
|
-
generateEventId() {
|
|
321
|
-
return `evt-${Date.now()}-${Math.random().toString(36).slice(2)}`;
|
|
322
|
-
}
|
|
323
|
-
/**
|
|
324
|
-
* Emits an event to the buffer and notifies listeners.
|
|
325
|
-
* @private
|
|
326
|
-
*/
|
|
327
|
-
async emitEvent(event) {
|
|
328
|
-
// Auto-flush if buffer is full
|
|
329
|
-
if (this.events.length >= this.maxBufferSize) {
|
|
330
|
-
await this.flush();
|
|
331
|
-
}
|
|
332
|
-
this.events.push(event);
|
|
333
|
-
// Notify all listeners
|
|
334
|
-
for (const listener of this.listeners) {
|
|
335
|
-
listener(event);
|
|
336
|
-
}
|
|
337
|
-
}
|
|
338
|
-
/**
|
|
339
|
-
* Returns the next sequence number.
|
|
340
|
-
* @private
|
|
341
|
-
*/
|
|
342
|
-
nextSequence() {
|
|
343
|
-
return ++this.sequenceCounter;
|
|
344
|
-
}
|
|
345
|
-
/**
|
|
346
|
-
* Captures an object put (creation) operation.
|
|
347
|
-
*
|
|
348
|
-
* @description
|
|
349
|
-
* Called when a git object (blob, tree, commit, tag) is written to storage.
|
|
350
|
-
*
|
|
351
|
-
* @param sha - SHA-1 hash of the object
|
|
352
|
-
* @param type - Object type (blob, tree, commit, tag)
|
|
353
|
-
* @param data - Raw object data
|
|
354
|
-
*
|
|
355
|
-
* @example
|
|
356
|
-
* ```typescript
|
|
357
|
-
* await capture.onObjectPut('abc123...', 'blob', blobData)
|
|
358
|
-
* ```
|
|
359
|
-
*/
|
|
360
|
-
async onObjectPut(sha, type, data) {
|
|
361
|
-
const event = {
|
|
362
|
-
id: this.generateEventId(),
|
|
363
|
-
type: 'OBJECT_CREATED',
|
|
364
|
-
source: 'push',
|
|
365
|
-
timestamp: Date.now(),
|
|
366
|
-
payload: {
|
|
367
|
-
operation: 'put',
|
|
368
|
-
sha,
|
|
369
|
-
data,
|
|
370
|
-
metadata: { type, size: data.length }
|
|
371
|
-
},
|
|
372
|
-
sequence: this.nextSequence(),
|
|
373
|
-
version: 1
|
|
374
|
-
};
|
|
375
|
-
await this.emitEvent(event);
|
|
376
|
-
}
|
|
377
|
-
/**
|
|
378
|
-
* Captures an object deletion operation.
|
|
379
|
-
*
|
|
380
|
-
* @description
|
|
381
|
-
* Called when a git object is deleted, typically during garbage collection.
|
|
382
|
-
*
|
|
383
|
-
* @param sha - SHA-1 hash of the deleted object
|
|
384
|
-
*
|
|
385
|
-
* @example
|
|
386
|
-
* ```typescript
|
|
387
|
-
* await capture.onObjectDelete('abc123...')
|
|
388
|
-
* ```
|
|
389
|
-
*/
|
|
390
|
-
async onObjectDelete(sha) {
|
|
391
|
-
const event = {
|
|
392
|
-
id: this.generateEventId(),
|
|
393
|
-
type: 'OBJECT_DELETED',
|
|
394
|
-
source: 'gc',
|
|
395
|
-
timestamp: Date.now(),
|
|
396
|
-
payload: {
|
|
397
|
-
operation: 'delete',
|
|
398
|
-
sha
|
|
399
|
-
},
|
|
400
|
-
sequence: this.nextSequence(),
|
|
401
|
-
version: 1
|
|
402
|
-
};
|
|
403
|
-
await this.emitEvent(event);
|
|
404
|
-
}
|
|
405
|
-
/**
|
|
406
|
-
* Captures a reference update operation.
|
|
407
|
-
*
|
|
408
|
-
* @description
|
|
409
|
-
* Called when a git reference (branch, tag) is updated to point to a new commit.
|
|
410
|
-
*
|
|
411
|
-
* @param refName - Full reference name (e.g., 'refs/heads/main')
|
|
412
|
-
* @param oldSha - Previous SHA (all zeros for new refs)
|
|
413
|
-
* @param newSha - New SHA (all zeros for deleted refs)
|
|
414
|
-
*
|
|
415
|
-
* @example
|
|
416
|
-
* ```typescript
|
|
417
|
-
* await capture.onRefUpdate(
|
|
418
|
-
* 'refs/heads/main',
|
|
419
|
-
* 'oldcommit123...',
|
|
420
|
-
* 'newcommit456...'
|
|
421
|
-
* )
|
|
422
|
-
* ```
|
|
423
|
-
*/
|
|
424
|
-
async onRefUpdate(refName, oldSha, newSha) {
|
|
425
|
-
const event = {
|
|
426
|
-
id: this.generateEventId(),
|
|
427
|
-
type: 'REF_UPDATED',
|
|
428
|
-
source: 'push',
|
|
429
|
-
timestamp: Date.now(),
|
|
430
|
-
payload: {
|
|
431
|
-
operation: 'ref-update',
|
|
432
|
-
refName,
|
|
433
|
-
oldSha,
|
|
434
|
-
newSha
|
|
435
|
-
},
|
|
436
|
-
sequence: this.nextSequence(),
|
|
437
|
-
version: 1
|
|
438
|
-
};
|
|
439
|
-
await this.emitEvent(event);
|
|
440
|
-
}
|
|
441
|
-
/**
|
|
442
|
-
* Captures a commit creation operation.
|
|
443
|
-
*
|
|
444
|
-
* @description
|
|
445
|
-
* Called when a new commit object is created.
|
|
446
|
-
*
|
|
447
|
-
* @param commitSha - SHA-1 hash of the commit
|
|
448
|
-
* @param treeSha - SHA-1 hash of the tree the commit points to
|
|
449
|
-
* @param parentShas - Array of parent commit SHAs
|
|
450
|
-
*
|
|
451
|
-
* @example
|
|
452
|
-
* ```typescript
|
|
453
|
-
* await capture.onCommitCreated(
|
|
454
|
-
* 'commitabc123...',
|
|
455
|
-
* 'treedef456...',
|
|
456
|
-
* ['parent1...', 'parent2...']
|
|
457
|
-
* )
|
|
458
|
-
* ```
|
|
459
|
-
*/
|
|
460
|
-
async onCommitCreated(commitSha, treeSha, parentShas) {
|
|
461
|
-
const event = {
|
|
462
|
-
id: this.generateEventId(),
|
|
463
|
-
type: 'COMMIT_CREATED',
|
|
464
|
-
source: 'push',
|
|
465
|
-
timestamp: Date.now(),
|
|
466
|
-
payload: {
|
|
467
|
-
operation: 'commit-create',
|
|
468
|
-
sha: commitSha,
|
|
469
|
-
treeSha,
|
|
470
|
-
parentShas
|
|
471
|
-
},
|
|
472
|
-
sequence: this.nextSequence(),
|
|
473
|
-
version: 1
|
|
474
|
-
};
|
|
475
|
-
await this.emitEvent(event);
|
|
476
|
-
}
|
|
477
|
-
/**
|
|
478
|
-
* Captures a pack reception operation.
|
|
479
|
-
*
|
|
480
|
-
* @description
|
|
481
|
-
* Called when a packfile is received during a push or fetch operation.
|
|
482
|
-
*
|
|
483
|
-
* @param packData - Raw packfile data
|
|
484
|
-
* @param objectCount - Number of objects in the pack
|
|
485
|
-
*
|
|
486
|
-
* @example
|
|
487
|
-
* ```typescript
|
|
488
|
-
* await capture.onPackReceived(packBuffer, 42)
|
|
489
|
-
* ```
|
|
490
|
-
*/
|
|
491
|
-
async onPackReceived(packData, objectCount) {
|
|
492
|
-
const event = {
|
|
493
|
-
id: this.generateEventId(),
|
|
494
|
-
type: 'PACK_RECEIVED',
|
|
495
|
-
source: 'push',
|
|
496
|
-
timestamp: Date.now(),
|
|
497
|
-
payload: {
|
|
498
|
-
operation: 'pack-receive',
|
|
499
|
-
data: packData,
|
|
500
|
-
objectCount
|
|
501
|
-
},
|
|
502
|
-
sequence: this.nextSequence(),
|
|
503
|
-
version: 1
|
|
504
|
-
};
|
|
505
|
-
await this.emitEvent(event);
|
|
506
|
-
}
|
|
507
|
-
/**
|
|
508
|
-
* Captures a branch creation operation.
|
|
509
|
-
*
|
|
510
|
-
* @param branchName - Name of the branch (without refs/heads/ prefix)
|
|
511
|
-
* @param sha - SHA-1 hash the branch points to
|
|
512
|
-
*
|
|
513
|
-
* @example
|
|
514
|
-
* ```typescript
|
|
515
|
-
* await capture.onBranchCreated('feature-x', 'abc123...')
|
|
516
|
-
* ```
|
|
517
|
-
*/
|
|
518
|
-
async onBranchCreated(branchName, sha) {
|
|
519
|
-
const event = {
|
|
520
|
-
id: this.generateEventId(),
|
|
521
|
-
type: 'BRANCH_CREATED',
|
|
522
|
-
source: 'push',
|
|
523
|
-
timestamp: Date.now(),
|
|
524
|
-
payload: {
|
|
525
|
-
operation: 'branch-create',
|
|
526
|
-
branchName,
|
|
527
|
-
sha
|
|
528
|
-
},
|
|
529
|
-
sequence: this.nextSequence(),
|
|
530
|
-
version: 1
|
|
531
|
-
};
|
|
532
|
-
await this.emitEvent(event);
|
|
533
|
-
}
|
|
534
|
-
/**
|
|
535
|
-
* Captures a branch deletion operation.
|
|
536
|
-
*
|
|
537
|
-
* @param branchName - Name of the deleted branch
|
|
538
|
-
*
|
|
539
|
-
* @example
|
|
540
|
-
* ```typescript
|
|
541
|
-
* await capture.onBranchDeleted('feature-x')
|
|
542
|
-
* ```
|
|
543
|
-
*/
|
|
544
|
-
async onBranchDeleted(branchName) {
|
|
545
|
-
const event = {
|
|
546
|
-
id: this.generateEventId(),
|
|
547
|
-
type: 'BRANCH_DELETED',
|
|
548
|
-
source: 'push',
|
|
549
|
-
timestamp: Date.now(),
|
|
550
|
-
payload: {
|
|
551
|
-
operation: 'branch-delete',
|
|
552
|
-
branchName
|
|
553
|
-
},
|
|
554
|
-
sequence: this.nextSequence(),
|
|
555
|
-
version: 1
|
|
556
|
-
};
|
|
557
|
-
await this.emitEvent(event);
|
|
558
|
-
}
|
|
559
|
-
/**
|
|
560
|
-
* Captures a tag creation operation.
|
|
561
|
-
*
|
|
562
|
-
* @param tagName - Name of the tag
|
|
563
|
-
* @param sha - SHA-1 hash the tag points to
|
|
564
|
-
*
|
|
565
|
-
* @example
|
|
566
|
-
* ```typescript
|
|
567
|
-
* await capture.onTagCreated('v1.0.0', 'abc123...')
|
|
568
|
-
* ```
|
|
569
|
-
*/
|
|
570
|
-
async onTagCreated(tagName, sha) {
|
|
571
|
-
const event = {
|
|
572
|
-
id: this.generateEventId(),
|
|
573
|
-
type: 'TAG_CREATED',
|
|
574
|
-
source: 'push',
|
|
575
|
-
timestamp: Date.now(),
|
|
576
|
-
payload: {
|
|
577
|
-
operation: 'tag-create',
|
|
578
|
-
tagName,
|
|
579
|
-
sha
|
|
580
|
-
},
|
|
581
|
-
sequence: this.nextSequence(),
|
|
582
|
-
version: 1
|
|
583
|
-
};
|
|
584
|
-
await this.emitEvent(event);
|
|
585
|
-
}
|
|
586
|
-
/**
|
|
587
|
-
* Captures a merge completion operation.
|
|
588
|
-
*
|
|
589
|
-
* @param mergeSha - SHA-1 hash of the merge commit
|
|
590
|
-
* @param baseSha - SHA-1 hash of the base commit
|
|
591
|
-
* @param headSha - SHA-1 hash of the head commit being merged
|
|
592
|
-
*
|
|
593
|
-
* @example
|
|
594
|
-
* ```typescript
|
|
595
|
-
* await capture.onMergeCompleted('merge123...', 'base456...', 'head789...')
|
|
596
|
-
* ```
|
|
597
|
-
*/
|
|
598
|
-
async onMergeCompleted(mergeSha, baseSha, headSha) {
|
|
599
|
-
const event = {
|
|
600
|
-
id: this.generateEventId(),
|
|
601
|
-
type: 'MERGE_COMPLETED',
|
|
602
|
-
source: 'push',
|
|
603
|
-
timestamp: Date.now(),
|
|
604
|
-
payload: {
|
|
605
|
-
operation: 'merge-complete',
|
|
606
|
-
sha: mergeSha,
|
|
607
|
-
baseSha,
|
|
608
|
-
headSha
|
|
609
|
-
},
|
|
610
|
-
sequence: this.nextSequence(),
|
|
611
|
-
version: 1
|
|
612
|
-
};
|
|
613
|
-
await this.emitEvent(event);
|
|
614
|
-
}
|
|
615
|
-
/**
|
|
616
|
-
* Returns a copy of all buffered events.
|
|
617
|
-
*
|
|
618
|
-
* @returns Array of buffered events
|
|
619
|
-
*/
|
|
620
|
-
getEvents() {
|
|
621
|
-
return [...this.events];
|
|
622
|
-
}
|
|
623
|
-
/**
|
|
624
|
-
* Returns the current buffer size.
|
|
625
|
-
*
|
|
626
|
-
* @returns Number of events in the buffer
|
|
627
|
-
*/
|
|
628
|
-
getBufferSize() {
|
|
629
|
-
return this.events.length;
|
|
630
|
-
}
|
|
631
|
-
/**
|
|
632
|
-
* Flushes all buffered events.
|
|
633
|
-
*
|
|
634
|
-
* @description
|
|
635
|
-
* Returns and clears all events from the buffer. The returned events
|
|
636
|
-
* can be processed, serialized, or forwarded to downstream systems.
|
|
637
|
-
*
|
|
638
|
-
* @returns Array of flushed events
|
|
639
|
-
*
|
|
640
|
-
* @example
|
|
641
|
-
* ```typescript
|
|
642
|
-
* const events = await capture.flush()
|
|
643
|
-
* console.log(`Flushed ${events.length} events`)
|
|
644
|
-
* await sendToAnalytics(events)
|
|
645
|
-
* ```
|
|
646
|
-
*/
|
|
647
|
-
async flush() {
|
|
648
|
-
const flushed = [...this.events];
|
|
649
|
-
this.events = [];
|
|
650
|
-
return flushed;
|
|
651
|
-
}
|
|
652
|
-
/**
|
|
653
|
-
* Adds an event listener.
|
|
654
|
-
*
|
|
655
|
-
* @description
|
|
656
|
-
* Listeners are called synchronously for each event as it is captured.
|
|
657
|
-
*
|
|
658
|
-
* @param listener - Callback function to invoke for each event
|
|
659
|
-
*
|
|
660
|
-
* @example
|
|
661
|
-
* ```typescript
|
|
662
|
-
* capture.addListener((event) => {
|
|
663
|
-
* console.log(`New event: ${event.type}`)
|
|
664
|
-
* })
|
|
665
|
-
* ```
|
|
666
|
-
*/
|
|
667
|
-
addListener(listener) {
|
|
668
|
-
this.listeners.push(listener);
|
|
669
|
-
}
|
|
670
|
-
/**
|
|
671
|
-
* Removes an event listener.
|
|
672
|
-
*
|
|
673
|
-
* @param listener - The listener to remove
|
|
674
|
-
*/
|
|
675
|
-
removeListener(listener) {
|
|
676
|
-
const index = this.listeners.indexOf(listener);
|
|
677
|
-
if (index !== -1) {
|
|
678
|
-
this.listeners.splice(index, 1);
|
|
679
|
-
}
|
|
680
|
-
}
|
|
681
|
-
}
|
|
682
|
-
// ============================================================================
|
|
683
|
-
// Parquet Schema
|
|
684
|
-
// ============================================================================
|
|
685
|
-
/**
|
|
686
|
-
* Default field definitions for CDC event Parquet schema.
|
|
687
|
-
* @internal
|
|
688
|
-
*/
|
|
689
|
-
const CDC_EVENT_FIELDS = [
|
|
690
|
-
{ name: 'event_id', type: 'STRING', nullable: false },
|
|
691
|
-
{ name: 'event_type', type: 'STRING', nullable: false },
|
|
692
|
-
{ name: 'source', type: 'STRING', nullable: false },
|
|
693
|
-
{ name: 'timestamp', type: 'TIMESTAMP', nullable: false },
|
|
694
|
-
{ name: 'sequence', type: 'INT64', nullable: false },
|
|
695
|
-
{ name: 'version', type: 'INT64', nullable: false },
|
|
696
|
-
{ name: 'payload_json', type: 'STRING', nullable: false },
|
|
697
|
-
{ name: 'sha', type: 'STRING', nullable: true }
|
|
698
|
-
];
|
|
699
|
-
/**
|
|
700
|
-
* Parquet schema definition for CDC events.
|
|
701
|
-
*
|
|
702
|
-
* @description
|
|
703
|
-
* Defines the column structure for CDC event Parquet files. The default
|
|
704
|
-
* schema includes standard CDC event fields and can be extended with
|
|
705
|
-
* custom fields for domain-specific data.
|
|
706
|
-
*
|
|
707
|
-
* @example
|
|
708
|
-
* ```typescript
|
|
709
|
-
* // Create default schema
|
|
710
|
-
* const schema = ParquetSchema.forCDCEvents()
|
|
711
|
-
*
|
|
712
|
-
* // Create schema with custom fields
|
|
713
|
-
* const customSchema = ParquetSchema.forCDCEvents([
|
|
714
|
-
* { name: 'repository_id', type: 'STRING', nullable: false },
|
|
715
|
-
* { name: 'user_id', type: 'STRING', nullable: true }
|
|
716
|
-
* ])
|
|
717
|
-
* ```
|
|
718
|
-
*
|
|
719
|
-
* @class ParquetSchema
|
|
720
|
-
*/
|
|
721
|
-
export class ParquetSchema {
|
|
722
|
-
fields;
|
|
723
|
-
/**
|
|
724
|
-
* Creates a new ParquetSchema.
|
|
725
|
-
*
|
|
726
|
-
* @param fields - Array of field definitions
|
|
727
|
-
*/
|
|
728
|
-
constructor(fields) {
|
|
729
|
-
this.fields = fields;
|
|
730
|
-
}
|
|
731
|
-
/**
|
|
732
|
-
* Creates a schema for CDC events with optional custom fields.
|
|
733
|
-
*
|
|
734
|
-
* @description
|
|
735
|
-
* Returns a schema with the standard CDC event fields. Additional
|
|
736
|
-
* custom fields can be appended for domain-specific data.
|
|
737
|
-
*
|
|
738
|
-
* @param customFields - Optional additional fields to add
|
|
739
|
-
* @returns A new ParquetSchema instance
|
|
740
|
-
*
|
|
741
|
-
* @example
|
|
742
|
-
* ```typescript
|
|
743
|
-
* const schema = ParquetSchema.forCDCEvents()
|
|
744
|
-
* // Schema includes: event_id, event_type, source, timestamp,
|
|
745
|
-
* // sequence, version, payload_json, sha
|
|
746
|
-
* ```
|
|
747
|
-
*/
|
|
748
|
-
static forCDCEvents(customFields) {
|
|
749
|
-
const fields = [...CDC_EVENT_FIELDS];
|
|
750
|
-
if (customFields) {
|
|
751
|
-
fields.push(...customFields);
|
|
752
|
-
}
|
|
753
|
-
return new ParquetSchema(fields);
|
|
754
|
-
}
|
|
755
|
-
}
|
|
756
|
-
/**
|
|
757
|
-
* Transforms CDC events to Parquet format.
|
|
758
|
-
*
|
|
759
|
-
* @description
|
|
760
|
-
* ParquetTransformer converts CDC events to Parquet-compatible rows and
|
|
761
|
-
* serializes batches of events to Parquet file format. It handles:
|
|
762
|
-
*
|
|
763
|
-
* - Event to row conversion (flattening the event structure)
|
|
764
|
-
* - JSON serialization of complex payloads
|
|
765
|
-
* - Batch creation with schema and metadata
|
|
766
|
-
* - Parquet file generation with compression
|
|
767
|
-
*
|
|
768
|
-
* @example
|
|
769
|
-
* ```typescript
|
|
770
|
-
* const transformer = new ParquetTransformer({ compression: 'snappy' })
|
|
771
|
-
*
|
|
772
|
-
* // Transform single event to row
|
|
773
|
-
* const row = transformer.eventToRow(event)
|
|
774
|
-
*
|
|
775
|
-
* // Transform batch of events
|
|
776
|
-
* const batch = transformer.eventsToBatch(events)
|
|
777
|
-
*
|
|
778
|
-
* // Generate Parquet file
|
|
779
|
-
* const buffer = await transformer.toParquetBuffer(batch)
|
|
780
|
-
* await r2.put('events.parquet', buffer)
|
|
781
|
-
* ```
|
|
782
|
-
*
|
|
783
|
-
* @class ParquetTransformer
|
|
784
|
-
*/
|
|
785
|
-
export class ParquetTransformer {
|
|
786
|
-
/**
|
|
787
|
-
* Compression algorithm to use.
|
|
788
|
-
* @private
|
|
789
|
-
*/
|
|
790
|
-
compression;
|
|
791
|
-
/**
|
|
792
|
-
* Creates a new ParquetTransformer.
|
|
793
|
-
*
|
|
794
|
-
* @param options - Transformer configuration
|
|
795
|
-
*/
|
|
796
|
-
constructor(options = {}) {
|
|
797
|
-
this.compression = options.compression ?? 'snappy';
|
|
798
|
-
}
|
|
799
|
-
/**
|
|
800
|
-
* Converts a CDC event to a Parquet row.
|
|
801
|
-
*
|
|
802
|
-
* @description
|
|
803
|
-
* Flattens the event structure and serializes the payload to JSON
|
|
804
|
-
* for storage in Parquet format.
|
|
805
|
-
*
|
|
806
|
-
* @param event - The CDC event to convert
|
|
807
|
-
* @returns A Parquet row representation
|
|
808
|
-
*
|
|
809
|
-
* @example
|
|
810
|
-
* ```typescript
|
|
811
|
-
* const row = transformer.eventToRow(event)
|
|
812
|
-
* console.log(row.event_id, row.event_type, row.sha)
|
|
813
|
-
* ```
|
|
814
|
-
*/
|
|
815
|
-
eventToRow(event) {
|
|
816
|
-
// Create a serializable copy of the payload (Uint8Array not JSON-serializable)
|
|
817
|
-
const serializablePayload = {
|
|
818
|
-
...event.payload,
|
|
819
|
-
data: event.payload.data ? Array.from(event.payload.data) : undefined
|
|
820
|
-
};
|
|
821
|
-
return {
|
|
822
|
-
event_id: event.id,
|
|
823
|
-
event_type: event.type,
|
|
824
|
-
source: event.source,
|
|
825
|
-
timestamp: event.timestamp,
|
|
826
|
-
sequence: event.sequence,
|
|
827
|
-
version: event.version,
|
|
828
|
-
payload_json: JSON.stringify(serializablePayload),
|
|
829
|
-
sha: event.payload.sha ?? null
|
|
830
|
-
};
|
|
831
|
-
}
|
|
832
|
-
/**
|
|
833
|
-
* Converts multiple CDC events to a Parquet batch.
|
|
834
|
-
*
|
|
835
|
-
* @description
|
|
836
|
-
* Transforms an array of events into a ParquetBatch structure
|
|
837
|
-
* ready for serialization to Parquet format.
|
|
838
|
-
*
|
|
839
|
-
* @param events - Array of CDC events to batch
|
|
840
|
-
* @returns A ParquetBatch ready for serialization
|
|
841
|
-
*
|
|
842
|
-
* @example
|
|
843
|
-
* ```typescript
|
|
844
|
-
* const batch = transformer.eventsToBatch(events)
|
|
845
|
-
* console.log(`Batch has ${batch.rowCount} rows`)
|
|
846
|
-
* ```
|
|
847
|
-
*/
|
|
848
|
-
eventsToBatch(events) {
|
|
849
|
-
const rows = events.map(e => this.eventToRow(e));
|
|
850
|
-
return {
|
|
851
|
-
rows,
|
|
852
|
-
rowCount: rows.length,
|
|
853
|
-
createdAt: Date.now(),
|
|
854
|
-
schema: ParquetSchema.forCDCEvents(),
|
|
855
|
-
compression: this.compression
|
|
856
|
-
};
|
|
857
|
-
}
|
|
858
|
-
/**
|
|
859
|
-
* Serializes a ParquetBatch to a Parquet file buffer.
|
|
860
|
-
*
|
|
861
|
-
* @description
|
|
862
|
-
* Generates a Parquet-format file from the batch data. The output
|
|
863
|
-
* includes PAR1 magic bytes, compressed data, and footer metadata.
|
|
864
|
-
*
|
|
865
|
-
* @param batch - The ParquetBatch to serialize
|
|
866
|
-
* @returns Promise resolving to Parquet file as Uint8Array
|
|
867
|
-
*
|
|
868
|
-
* @example
|
|
869
|
-
* ```typescript
|
|
870
|
-
* const buffer = await transformer.toParquetBuffer(batch)
|
|
871
|
-
* await r2.put('events.parquet', buffer)
|
|
872
|
-
* ```
|
|
873
|
-
*/
|
|
874
|
-
async toParquetBuffer(batch) {
|
|
875
|
-
// Build a simplified Parquet-like buffer
|
|
876
|
-
// Real implementation would use a proper Parquet library
|
|
877
|
-
const encoder = new TextEncoder();
|
|
878
|
-
// Magic bytes
|
|
879
|
-
const magic = encoder.encode('PAR1');
|
|
880
|
-
// Serialize batch data
|
|
881
|
-
const dataJson = JSON.stringify({
|
|
882
|
-
rows: batch.rows,
|
|
883
|
-
rowCount: batch.rowCount,
|
|
884
|
-
createdAt: batch.createdAt,
|
|
885
|
-
schema: batch.schema,
|
|
886
|
-
compression: batch.compression
|
|
887
|
-
});
|
|
888
|
-
let dataBytes = encoder.encode(dataJson);
|
|
889
|
-
// Apply compression
|
|
890
|
-
if (this.compression === 'gzip') {
|
|
891
|
-
dataBytes = await this.gzipCompress(dataBytes);
|
|
892
|
-
}
|
|
893
|
-
else if (this.compression === 'snappy') {
|
|
894
|
-
// Snappy simulation (use simple compression)
|
|
895
|
-
dataBytes = await this.simpleCompress(dataBytes);
|
|
896
|
-
}
|
|
897
|
-
// Build final buffer: PAR1 + data + length (4 bytes) + PAR1
|
|
898
|
-
const lengthBytes = new Uint8Array(4);
|
|
899
|
-
new DataView(lengthBytes.buffer).setUint32(0, dataBytes.length, true);
|
|
900
|
-
const totalSize = 4 + dataBytes.length + 4 + 4;
|
|
901
|
-
const result = new Uint8Array(totalSize);
|
|
902
|
-
let offset = 0;
|
|
903
|
-
result.set(magic, offset);
|
|
904
|
-
offset += 4;
|
|
905
|
-
result.set(dataBytes, offset);
|
|
906
|
-
offset += dataBytes.length;
|
|
907
|
-
result.set(lengthBytes, offset);
|
|
908
|
-
offset += 4;
|
|
909
|
-
result.set(magic, offset);
|
|
910
|
-
return result;
|
|
911
|
-
}
|
|
912
|
-
async gzipCompress(data) {
|
|
913
|
-
// Use CompressionStream if available (modern browsers/Node 18+)
|
|
914
|
-
if (typeof CompressionStream !== 'undefined') {
|
|
915
|
-
const stream = new CompressionStream('gzip');
|
|
916
|
-
const writer = stream.writable.getWriter();
|
|
917
|
-
writer.write(data);
|
|
918
|
-
writer.close();
|
|
919
|
-
const reader = stream.readable.getReader();
|
|
920
|
-
const chunks = [];
|
|
921
|
-
let done = false;
|
|
922
|
-
while (!done) {
|
|
923
|
-
const result = await reader.read();
|
|
924
|
-
done = result.done;
|
|
925
|
-
if (result.value) {
|
|
926
|
-
chunks.push(result.value);
|
|
927
|
-
}
|
|
928
|
-
}
|
|
929
|
-
const totalLength = chunks.reduce((acc, chunk) => acc + chunk.length, 0);
|
|
930
|
-
const result = new Uint8Array(totalLength);
|
|
931
|
-
let offset = 0;
|
|
932
|
-
for (const chunk of chunks) {
|
|
933
|
-
result.set(chunk, offset);
|
|
934
|
-
offset += chunk.length;
|
|
935
|
-
}
|
|
936
|
-
return result;
|
|
937
|
-
}
|
|
938
|
-
// Fallback: return data as-is (no compression)
|
|
939
|
-
return data;
|
|
940
|
-
}
|
|
941
|
-
async simpleCompress(data) {
|
|
942
|
-
// For snappy, we just return data as-is (real snappy compression would require a library)
|
|
943
|
-
// This is a simplified implementation that avoids async stream issues with fake timers
|
|
944
|
-
return data;
|
|
945
|
-
}
|
|
946
|
-
}
|
|
947
|
-
/**
|
|
948
|
-
* Batches CDC events for efficient processing.
|
|
949
|
-
*
|
|
950
|
-
* @description
|
|
951
|
-
* CDCBatcher collects CDC events and groups them into batches based on
|
|
952
|
-
* count or time thresholds. This enables efficient downstream processing
|
|
953
|
-
* by reducing the number of I/O operations and enabling bulk operations.
|
|
954
|
-
*
|
|
955
|
-
* **Batching Strategies:**
|
|
956
|
-
* - **Count-based**: Flush when batch reaches `batchSize` events
|
|
957
|
-
* - **Time-based**: Flush after `flushIntervalMs` even if batch is not full
|
|
958
|
-
*
|
|
959
|
-
* **Features:**
|
|
960
|
-
* - Async batch handlers for non-blocking processing
|
|
961
|
-
* - Multiple handlers for parallel processing pipelines
|
|
962
|
-
* - Graceful stop with pending event flush
|
|
963
|
-
* - Batch metadata (sequences, timestamps) for tracking
|
|
964
|
-
*
|
|
965
|
-
* @example
|
|
966
|
-
* ```typescript
|
|
967
|
-
* const batcher = new CDCBatcher({
|
|
968
|
-
* batchSize: 100,
|
|
969
|
-
* flushIntervalMs: 5000
|
|
970
|
-
* })
|
|
971
|
-
*
|
|
972
|
-
* // Register batch handler
|
|
973
|
-
* batcher.onBatch(async (batch) => {
|
|
974
|
-
* console.log(`Processing ${batch.eventCount} events`)
|
|
975
|
-
* console.log(`Sequence range: ${batch.minSequence} - ${batch.maxSequence}`)
|
|
976
|
-
* await saveToStorage(batch.events)
|
|
977
|
-
* })
|
|
978
|
-
*
|
|
979
|
-
* // Add events
|
|
980
|
-
* await batcher.add(event1)
|
|
981
|
-
* await batcher.add(event2)
|
|
982
|
-
*
|
|
983
|
-
* // Check pending events
|
|
984
|
-
* console.log(`Pending: ${batcher.getPendingCount()}`)
|
|
985
|
-
*
|
|
986
|
-
* // Manual flush
|
|
987
|
-
* const result = await batcher.flush()
|
|
988
|
-
*
|
|
989
|
-
* // Stop the batcher
|
|
990
|
-
* await batcher.stop()
|
|
991
|
-
* ```
|
|
992
|
-
*
|
|
993
|
-
* @class CDCBatcher
|
|
994
|
-
*/
|
|
995
|
-
export class CDCBatcher {
|
|
996
|
-
/**
|
|
997
|
-
* Batch configuration.
|
|
998
|
-
* @private
|
|
999
|
-
*/
|
|
1000
|
-
config;
|
|
1001
|
-
/**
|
|
1002
|
-
* Buffer of pending events.
|
|
1003
|
-
* @private
|
|
1004
|
-
*/
|
|
1005
|
-
events = [];
|
|
1006
|
-
/**
|
|
1007
|
-
* Registered batch handlers.
|
|
1008
|
-
* @private
|
|
1009
|
-
*/
|
|
1010
|
-
batchHandlers = [];
|
|
1011
|
-
/**
|
|
1012
|
-
* Timer for time-based flushing.
|
|
1013
|
-
* @private
|
|
1014
|
-
*/
|
|
1015
|
-
flushTimer = null;
|
|
1016
|
-
/**
|
|
1017
|
-
* Whether the batcher has been stopped.
|
|
1018
|
-
* @private
|
|
1019
|
-
*/
|
|
1020
|
-
stopped = false;
|
|
1021
|
-
/**
|
|
1022
|
-
* Creates a new CDCBatcher.
|
|
1023
|
-
*
|
|
1024
|
-
* @param config - Batch configuration
|
|
1025
|
-
*/
|
|
1026
|
-
constructor(config) {
|
|
1027
|
-
this.config = config;
|
|
1028
|
-
// Don't start timer in constructor - start when first event is added
|
|
1029
|
-
}
|
|
1030
|
-
ensureTimerRunning() {
|
|
1031
|
-
if (this.stopped)
|
|
1032
|
-
return;
|
|
1033
|
-
if (this.flushTimer !== null)
|
|
1034
|
-
return; // Already have a timer
|
|
1035
|
-
this.flushTimer = setTimeout(() => {
|
|
1036
|
-
this.flushTimer = null;
|
|
1037
|
-
if (this.stopped)
|
|
1038
|
-
return;
|
|
1039
|
-
// Process pending events if any
|
|
1040
|
-
if (this.events.length > 0) {
|
|
1041
|
-
// Build batch result
|
|
1042
|
-
const batchEvents = [...this.events];
|
|
1043
|
-
this.events = [];
|
|
1044
|
-
const sequences = batchEvents.map(e => e.sequence);
|
|
1045
|
-
const timestamps = batchEvents.map(e => e.timestamp);
|
|
1046
|
-
const result = {
|
|
1047
|
-
events: batchEvents,
|
|
1048
|
-
eventCount: batchEvents.length,
|
|
1049
|
-
success: true,
|
|
1050
|
-
minSequence: Math.min(...sequences),
|
|
1051
|
-
maxSequence: Math.max(...sequences),
|
|
1052
|
-
minTimestamp: Math.min(...timestamps),
|
|
1053
|
-
maxTimestamp: Math.max(...timestamps)
|
|
1054
|
-
};
|
|
1055
|
-
// Notify handlers and handle promises
|
|
1056
|
-
const handlerPromises = [];
|
|
1057
|
-
for (const handler of this.batchHandlers) {
|
|
1058
|
-
try {
|
|
1059
|
-
const maybePromise = handler(result);
|
|
1060
|
-
if (maybePromise && typeof maybePromise.then === 'function') {
|
|
1061
|
-
handlerPromises.push(maybePromise);
|
|
1062
|
-
}
|
|
1063
|
-
}
|
|
1064
|
-
catch {
|
|
1065
|
-
// Ignore handler errors in timer context
|
|
1066
|
-
}
|
|
1067
|
-
}
|
|
1068
|
-
// Execute all handlers and ignore the result
|
|
1069
|
-
if (handlerPromises.length > 0) {
|
|
1070
|
-
void Promise.all(handlerPromises);
|
|
1071
|
-
}
|
|
1072
|
-
}
|
|
1073
|
-
// DON'T reschedule here - timer will be scheduled on next add() call
|
|
1074
|
-
}, this.config.flushIntervalMs);
|
|
1075
|
-
}
|
|
1076
|
-
clearFlushTimer() {
|
|
1077
|
-
if (this.flushTimer !== null) {
|
|
1078
|
-
clearTimeout(this.flushTimer);
|
|
1079
|
-
this.flushTimer = null;
|
|
1080
|
-
}
|
|
1081
|
-
}
|
|
1082
|
-
/**
|
|
1083
|
-
* Adds an event to the batch.
|
|
1084
|
-
*
|
|
1085
|
-
* @description
|
|
1086
|
-
* Adds the event to the pending batch. If the batch reaches the
|
|
1087
|
-
* configured size, it is automatically flushed. The flush timer
|
|
1088
|
-
* is started/restarted as needed.
|
|
1089
|
-
*
|
|
1090
|
-
* @param event - The CDC event to add
|
|
1091
|
-
*
|
|
1092
|
-
* @example
|
|
1093
|
-
* ```typescript
|
|
1094
|
-
* await batcher.add(event)
|
|
1095
|
-
* ```
|
|
1096
|
-
*/
|
|
1097
|
-
async add(event) {
|
|
1098
|
-
this.events.push(event);
|
|
1099
|
-
// Ensure flush timer is running when we have pending events
|
|
1100
|
-
this.ensureTimerRunning();
|
|
1101
|
-
if (this.events.length >= this.config.batchSize) {
|
|
1102
|
-
this.clearFlushTimer();
|
|
1103
|
-
await this.flushInternal();
|
|
1104
|
-
// Timer will be re-started on next add() if needed
|
|
1105
|
-
}
|
|
1106
|
-
}
|
|
1107
|
-
/**
|
|
1108
|
-
* Internal flush implementation.
|
|
1109
|
-
* @private
|
|
1110
|
-
*/
|
|
1111
|
-
async flushInternal() {
|
|
1112
|
-
if (this.events.length === 0) {
|
|
1113
|
-
return { events: [], eventCount: 0, success: true };
|
|
1114
|
-
}
|
|
1115
|
-
const batchEvents = [...this.events];
|
|
1116
|
-
this.events = [];
|
|
1117
|
-
const sequences = batchEvents.map(e => e.sequence);
|
|
1118
|
-
const timestamps = batchEvents.map(e => e.timestamp);
|
|
1119
|
-
const result = {
|
|
1120
|
-
events: batchEvents,
|
|
1121
|
-
eventCount: batchEvents.length,
|
|
1122
|
-
success: true,
|
|
1123
|
-
minSequence: Math.min(...sequences),
|
|
1124
|
-
maxSequence: Math.max(...sequences),
|
|
1125
|
-
minTimestamp: Math.min(...timestamps),
|
|
1126
|
-
maxTimestamp: Math.max(...timestamps)
|
|
1127
|
-
};
|
|
1128
|
-
// Notify handlers (await async handlers)
|
|
1129
|
-
for (const handler of this.batchHandlers) {
|
|
1130
|
-
await handler(result);
|
|
1131
|
-
}
|
|
1132
|
-
return result;
|
|
1133
|
-
}
|
|
1134
|
-
/**
|
|
1135
|
-
* Manually flushes pending events.
|
|
1136
|
-
*
|
|
1137
|
-
* @description
|
|
1138
|
-
* Forces an immediate flush of all pending events, regardless of
|
|
1139
|
-
* batch size or timer. Clears the flush timer.
|
|
1140
|
-
*
|
|
1141
|
-
* @returns Promise resolving to the batch result
|
|
1142
|
-
*
|
|
1143
|
-
* @example
|
|
1144
|
-
* ```typescript
|
|
1145
|
-
* const result = await batcher.flush()
|
|
1146
|
-
* console.log(`Flushed ${result.eventCount} events`)
|
|
1147
|
-
* ```
|
|
1148
|
-
*/
|
|
1149
|
-
async flush() {
|
|
1150
|
-
this.clearFlushTimer();
|
|
1151
|
-
const result = await this.flushInternal();
|
|
1152
|
-
// Don't restart timer - it will be started on next add() if needed
|
|
1153
|
-
return result;
|
|
1154
|
-
}
|
|
1155
|
-
/**
|
|
1156
|
-
* Returns the number of pending events.
|
|
1157
|
-
*
|
|
1158
|
-
* @returns Number of events waiting to be flushed
|
|
1159
|
-
*/
|
|
1160
|
-
getPendingCount() {
|
|
1161
|
-
return this.events.length;
|
|
1162
|
-
}
|
|
1163
|
-
/**
|
|
1164
|
-
* Registers a batch handler.
|
|
1165
|
-
*
|
|
1166
|
-
* @description
|
|
1167
|
-
* Handlers are called when a batch is flushed (automatically or manually).
|
|
1168
|
-
* Multiple handlers can be registered for parallel processing.
|
|
1169
|
-
*
|
|
1170
|
-
* @param handler - Callback function to invoke for each batch
|
|
1171
|
-
*
|
|
1172
|
-
* @example
|
|
1173
|
-
* ```typescript
|
|
1174
|
-
* batcher.onBatch(async (batch) => {
|
|
1175
|
-
* await saveToStorage(batch.events)
|
|
1176
|
-
* })
|
|
1177
|
-
* ```
|
|
1178
|
-
*/
|
|
1179
|
-
onBatch(handler) {
|
|
1180
|
-
this.batchHandlers.push(handler);
|
|
1181
|
-
}
|
|
1182
|
-
/**
|
|
1183
|
-
* Stops the batcher.
|
|
1184
|
-
*
|
|
1185
|
-
* @description
|
|
1186
|
-
* Stops the flush timer and prevents further processing.
|
|
1187
|
-
* Does NOT automatically flush pending events - call flush() first
|
|
1188
|
-
* if you need to process remaining events.
|
|
1189
|
-
*
|
|
1190
|
-
* @example
|
|
1191
|
-
* ```typescript
|
|
1192
|
-
* await batcher.flush() // Process remaining events
|
|
1193
|
-
* await batcher.stop() // Stop the timer
|
|
1194
|
-
* ```
|
|
1195
|
-
*/
|
|
1196
|
-
async stop() {
|
|
1197
|
-
this.stopped = true;
|
|
1198
|
-
this.clearFlushTimer();
|
|
1199
|
-
}
|
|
1200
|
-
}
|
|
1201
|
-
/**
|
|
1202
|
-
* Main CDC Pipeline for processing git operation events.
|
|
1203
|
-
*
|
|
1204
|
-
* @description
|
|
1205
|
-
* CDCPipeline orchestrates the complete change data capture flow from
|
|
1206
|
-
* event ingestion to Parquet output. It integrates batching, transformation,
|
|
1207
|
-
* retry handling, and dead letter queue management.
|
|
1208
|
-
*
|
|
1209
|
-
* **Pipeline Flow:**
|
|
1210
|
-
* 1. Events are submitted via `process()` or `processMany()`
|
|
1211
|
-
* 2. Events are validated and added to the batcher
|
|
1212
|
-
* 3. When a batch is ready, it's transformed to Parquet format
|
|
1213
|
-
* 4. On success, output handlers are notified
|
|
1214
|
-
* 5. On failure, retries are attempted with exponential backoff
|
|
1215
|
-
* 6. After max retries, events go to dead letter queue
|
|
1216
|
-
*
|
|
1217
|
-
* **Features:**
|
|
1218
|
-
* - Configurable batch size and flush interval
|
|
1219
|
-
* - Automatic retry with exponential backoff
|
|
1220
|
-
* - Dead letter queue for failed events
|
|
1221
|
-
* - Real-time metrics for monitoring
|
|
1222
|
-
* - Graceful shutdown with pending event flush
|
|
1223
|
-
*
|
|
1224
|
-
* @example
|
|
1225
|
-
* ```typescript
|
|
1226
|
-
* const pipeline = new CDCPipeline({
|
|
1227
|
-
* batchSize: 100,
|
|
1228
|
-
* flushIntervalMs: 5000,
|
|
1229
|
-
* maxRetries: 3,
|
|
1230
|
-
* parquetCompression: 'snappy',
|
|
1231
|
-
* outputPath: '/analytics',
|
|
1232
|
-
* schemaVersion: 1
|
|
1233
|
-
* })
|
|
1234
|
-
*
|
|
1235
|
-
* // Register handlers
|
|
1236
|
-
* pipeline.onOutput(async (output) => {
|
|
1237
|
-
* await r2.put(`cdc/${output.batchId}.parquet`, output.parquetBuffer)
|
|
1238
|
-
* })
|
|
1239
|
-
*
|
|
1240
|
-
* pipeline.onDeadLetter((events, error) => {
|
|
1241
|
-
* console.error(`Failed ${events.length} events:`, error)
|
|
1242
|
-
* })
|
|
1243
|
-
*
|
|
1244
|
-
* // Start the pipeline
|
|
1245
|
-
* await pipeline.start()
|
|
1246
|
-
*
|
|
1247
|
-
* // Process events
|
|
1248
|
-
* await pipeline.process(event)
|
|
1249
|
-
*
|
|
1250
|
-
* // Check metrics
|
|
1251
|
-
* const metrics = pipeline.getMetrics()
|
|
1252
|
-
*
|
|
1253
|
-
* // Stop gracefully
|
|
1254
|
-
* const result = await pipeline.stop()
|
|
1255
|
-
* console.log(`Flushed ${result.flushedCount} events on shutdown`)
|
|
1256
|
-
* ```
|
|
1257
|
-
*
|
|
1258
|
-
* @class CDCPipeline
|
|
1259
|
-
*/
|
|
1260
|
-
export class CDCPipeline {
|
|
1261
|
-
/**
|
|
1262
|
-
* Pipeline configuration.
|
|
1263
|
-
* @private
|
|
1264
|
-
*/
|
|
1265
|
-
config;
|
|
1266
|
-
/**
|
|
1267
|
-
* Current pipeline state.
|
|
1268
|
-
* @private
|
|
1269
|
-
*/
|
|
1270
|
-
state = 'stopped';
|
|
1271
|
-
/**
|
|
1272
|
-
* Event batcher instance.
|
|
1273
|
-
* @private
|
|
1274
|
-
*/
|
|
1275
|
-
batcher = null;
|
|
1276
|
-
/**
|
|
1277
|
-
* Parquet transformer instance.
|
|
1278
|
-
* @private
|
|
1279
|
-
*/
|
|
1280
|
-
transformer;
|
|
1281
|
-
/**
|
|
1282
|
-
* Registered output handlers.
|
|
1283
|
-
* @private
|
|
1284
|
-
*/
|
|
1285
|
-
outputHandlers = [];
|
|
1286
|
-
/**
|
|
1287
|
-
* Registered dead letter handlers.
|
|
1288
|
-
* @private
|
|
1289
|
-
*/
|
|
1290
|
-
deadLetterHandlers = [];
|
|
1291
|
-
/**
|
|
1292
|
-
* Pipeline metrics.
|
|
1293
|
-
* @private
|
|
1294
|
-
*/
|
|
1295
|
-
metrics = {
|
|
1296
|
-
eventsProcessed: 0,
|
|
1297
|
-
batchesGenerated: 0,
|
|
1298
|
-
bytesWritten: 0,
|
|
1299
|
-
errors: 0,
|
|
1300
|
-
avgProcessingLatencyMs: 0
|
|
1301
|
-
};
|
|
1302
|
-
/**
|
|
1303
|
-
* Processing latency samples.
|
|
1304
|
-
* @private
|
|
1305
|
-
*/
|
|
1306
|
-
processingLatencies = [];
|
|
1307
|
-
/**
|
|
1308
|
-
* Retry policy instance.
|
|
1309
|
-
* @private
|
|
1310
|
-
*/
|
|
1311
|
-
retryPolicy;
|
|
1312
|
-
/**
|
|
1313
|
-
* Creates a new CDCPipeline.
|
|
1314
|
-
*
|
|
1315
|
-
* @param config - Pipeline configuration
|
|
1316
|
-
*/
|
|
1317
|
-
constructor(config) {
|
|
1318
|
-
this.config = config;
|
|
1319
|
-
this.transformer = new ParquetTransformer({
|
|
1320
|
-
compression: config.parquetCompression
|
|
1321
|
-
});
|
|
1322
|
-
this.retryPolicy = new CDCRetryPolicy({
|
|
1323
|
-
maxRetries: config.maxRetries,
|
|
1324
|
-
initialDelayMs: 100,
|
|
1325
|
-
maxDelayMs: 5000,
|
|
1326
|
-
backoffMultiplier: 2
|
|
1327
|
-
});
|
|
1328
|
-
}
|
|
1329
|
-
/**
|
|
1330
|
-
* Returns the current pipeline state.
|
|
1331
|
-
*
|
|
1332
|
-
* @returns Current state ('stopped', 'running', or 'paused')
|
|
1333
|
-
*/
|
|
1334
|
-
getState() {
|
|
1335
|
-
return this.state;
|
|
1336
|
-
}
|
|
1337
|
-
/**
|
|
1338
|
-
* Starts the pipeline.
|
|
1339
|
-
*
|
|
1340
|
-
* @description
|
|
1341
|
-
* Initializes the batcher and begins accepting events. If already
|
|
1342
|
-
* running, this method is a no-op.
|
|
1343
|
-
*
|
|
1344
|
-
* @example
|
|
1345
|
-
* ```typescript
|
|
1346
|
-
* await pipeline.start()
|
|
1347
|
-
* console.log(pipeline.getState()) // 'running'
|
|
1348
|
-
* ```
|
|
1349
|
-
*/
|
|
1350
|
-
async start() {
|
|
1351
|
-
if (this.state === 'running')
|
|
1352
|
-
return;
|
|
1353
|
-
this.batcher = new CDCBatcher({
|
|
1354
|
-
batchSize: this.config.batchSize,
|
|
1355
|
-
flushIntervalMs: this.config.flushIntervalMs
|
|
1356
|
-
});
|
|
1357
|
-
this.batcher.onBatch(async (batch) => {
|
|
1358
|
-
await this.handleBatch(batch);
|
|
1359
|
-
});
|
|
1360
|
-
this.state = 'running';
|
|
1361
|
-
}
|
|
1362
|
-
/**
|
|
1363
|
-
* Stops the pipeline.
|
|
1364
|
-
*
|
|
1365
|
-
* @description
|
|
1366
|
-
* Flushes any pending events, stops the batcher, and sets state to stopped.
|
|
1367
|
-
* Returns information about events flushed during shutdown.
|
|
1368
|
-
*
|
|
1369
|
-
* @returns Promise resolving to stop result with flushed event count
|
|
1370
|
-
*
|
|
1371
|
-
* @example
|
|
1372
|
-
* ```typescript
|
|
1373
|
-
* const result = await pipeline.stop()
|
|
1374
|
-
* console.log(`Flushed ${result.flushedCount} events on shutdown`)
|
|
1375
|
-
* ```
|
|
1376
|
-
*/
|
|
1377
|
-
async stop() {
|
|
1378
|
-
if (this.state === 'stopped') {
|
|
1379
|
-
return { flushedCount: 0 };
|
|
1380
|
-
}
|
|
1381
|
-
let flushedCount = 0;
|
|
1382
|
-
if (this.batcher) {
|
|
1383
|
-
const result = await this.batcher.flush();
|
|
1384
|
-
flushedCount = result.eventCount;
|
|
1385
|
-
await this.batcher.stop();
|
|
1386
|
-
this.batcher = null;
|
|
1387
|
-
}
|
|
1388
|
-
this.state = 'stopped';
|
|
1389
|
-
return { flushedCount };
|
|
1390
|
-
}
|
|
1391
|
-
/**
|
|
1392
|
-
* Processes a single event.
|
|
1393
|
-
*
|
|
1394
|
-
* @description
|
|
1395
|
-
* Validates the event and adds it to the batcher for processing.
|
|
1396
|
-
* Updates metrics including latency tracking.
|
|
1397
|
-
*
|
|
1398
|
-
* @param event - The CDC event to process
|
|
1399
|
-
* @returns Promise resolving to process result
|
|
1400
|
-
*
|
|
1401
|
-
* @throws {CDCError} PROCESSING_ERROR - If pipeline is not running
|
|
1402
|
-
* @throws {CDCError} VALIDATION_ERROR - If event fails validation
|
|
1403
|
-
*
|
|
1404
|
-
* @example
|
|
1405
|
-
* ```typescript
|
|
1406
|
-
* const result = await pipeline.process(event)
|
|
1407
|
-
* if (result.success) {
|
|
1408
|
-
* console.log(`Processed event: ${result.eventId}`)
|
|
1409
|
-
* }
|
|
1410
|
-
* ```
|
|
1411
|
-
*/
|
|
1412
|
-
async process(event) {
|
|
1413
|
-
if (this.state !== 'running') {
|
|
1414
|
-
throw new CDCError('PROCESSING_ERROR', 'Pipeline is not running');
|
|
1415
|
-
}
|
|
1416
|
-
// Validate event
|
|
1417
|
-
validateCDCEvent(event);
|
|
1418
|
-
const startTime = Date.now();
|
|
1419
|
-
await this.batcher.add(event);
|
|
1420
|
-
this.metrics.eventsProcessed++;
|
|
1421
|
-
const latency = Date.now() - startTime;
|
|
1422
|
-
this.processingLatencies.push(latency);
|
|
1423
|
-
this.updateAvgLatency();
|
|
1424
|
-
return { success: true, eventId: event.id };
|
|
1425
|
-
}
|
|
1426
|
-
/**
|
|
1427
|
-
* Processes multiple events.
|
|
1428
|
-
*
|
|
1429
|
-
* @description
|
|
1430
|
-
* Convenience method to process an array of events sequentially.
|
|
1431
|
-
*
|
|
1432
|
-
* @param events - Array of CDC events to process
|
|
1433
|
-
* @returns Promise resolving to array of process results
|
|
1434
|
-
*
|
|
1435
|
-
* @example
|
|
1436
|
-
* ```typescript
|
|
1437
|
-
* const results = await pipeline.processMany(events)
|
|
1438
|
-
* const successCount = results.filter(r => r.success).length
|
|
1439
|
-
* console.log(`Processed ${successCount}/${events.length} events`)
|
|
1440
|
-
* ```
|
|
1441
|
-
*/
|
|
1442
|
-
async processMany(events) {
|
|
1443
|
-
const results = [];
|
|
1444
|
-
for (const event of events) {
|
|
1445
|
-
const result = await this.process(event);
|
|
1446
|
-
results.push(result);
|
|
1447
|
-
}
|
|
1448
|
-
return results;
|
|
1449
|
-
}
|
|
1450
|
-
/**
|
|
1451
|
-
* Manually flushes pending events.
|
|
1452
|
-
*
|
|
1453
|
-
* @description
|
|
1454
|
-
* Forces an immediate flush of the batcher and processes the
|
|
1455
|
-
* resulting batch through the pipeline.
|
|
1456
|
-
*
|
|
1457
|
-
* @example
|
|
1458
|
-
* ```typescript
|
|
1459
|
-
* await pipeline.flush()
|
|
1460
|
-
* console.log('All pending events flushed')
|
|
1461
|
-
* ```
|
|
1462
|
-
*/
|
|
1463
|
-
async flush() {
|
|
1464
|
-
if (this.batcher) {
|
|
1465
|
-
const result = await this.batcher.flush();
|
|
1466
|
-
if (result.eventCount > 0) {
|
|
1467
|
-
await this.handleBatch(result);
|
|
1468
|
-
}
|
|
1469
|
-
}
|
|
1470
|
-
}
|
|
1471
|
-
/**
|
|
1472
|
-
* Handles a batch of events with retry logic.
|
|
1473
|
-
* @private
|
|
1474
|
-
*/
|
|
1475
|
-
async handleBatch(batch) {
|
|
1476
|
-
let attempts = 0;
|
|
1477
|
-
let lastError = null;
|
|
1478
|
-
while (attempts <= this.config.maxRetries) {
|
|
1479
|
-
try {
|
|
1480
|
-
const parquetBatch = this.transformer.eventsToBatch(batch.events);
|
|
1481
|
-
const parquetBuffer = await this.transformer.toParquetBuffer(parquetBatch);
|
|
1482
|
-
const output = {
|
|
1483
|
-
parquetBuffer,
|
|
1484
|
-
events: batch.events,
|
|
1485
|
-
batchId: `batch-${Date.now()}-${Math.random().toString(36).slice(2)}`
|
|
1486
|
-
};
|
|
1487
|
-
// Notify output handlers
|
|
1488
|
-
for (const handler of this.outputHandlers) {
|
|
1489
|
-
handler(output);
|
|
1490
|
-
}
|
|
1491
|
-
this.metrics.batchesGenerated++;
|
|
1492
|
-
this.metrics.bytesWritten += parquetBuffer.length;
|
|
1493
|
-
return; // Success
|
|
1494
|
-
}
|
|
1495
|
-
catch (error) {
|
|
1496
|
-
lastError = error;
|
|
1497
|
-
attempts++;
|
|
1498
|
-
this.metrics.errors++;
|
|
1499
|
-
if (this.retryPolicy.shouldRetry(attempts)) {
|
|
1500
|
-
const delay = this.retryPolicy.getDelay(attempts);
|
|
1501
|
-
await this.sleep(delay);
|
|
1502
|
-
}
|
|
1503
|
-
}
|
|
1504
|
-
}
|
|
1505
|
-
// All retries exhausted - send to dead letter queue
|
|
1506
|
-
if (lastError) {
|
|
1507
|
-
for (const handler of this.deadLetterHandlers) {
|
|
1508
|
-
handler(batch.events, lastError);
|
|
1509
|
-
}
|
|
1510
|
-
}
|
|
1511
|
-
}
|
|
1512
|
-
/**
|
|
1513
|
-
* Sleeps for the specified duration.
|
|
1514
|
-
* @private
|
|
1515
|
-
*/
|
|
1516
|
-
sleep(ms) {
|
|
1517
|
-
return new Promise(resolve => setTimeout(resolve, ms));
|
|
1518
|
-
}
|
|
1519
|
-
/**
|
|
1520
|
-
* Updates the average latency metric.
|
|
1521
|
-
* @private
|
|
1522
|
-
*/
|
|
1523
|
-
updateAvgLatency() {
|
|
1524
|
-
if (this.processingLatencies.length === 0)
|
|
1525
|
-
return;
|
|
1526
|
-
// Keep only last 1000 measurements
|
|
1527
|
-
if (this.processingLatencies.length > 1000) {
|
|
1528
|
-
this.processingLatencies = this.processingLatencies.slice(-1000);
|
|
1529
|
-
}
|
|
1530
|
-
const sum = this.processingLatencies.reduce((a, b) => a + b, 0);
|
|
1531
|
-
this.metrics.avgProcessingLatencyMs = sum / this.processingLatencies.length;
|
|
1532
|
-
}
|
|
1533
|
-
/**
|
|
1534
|
-
* Returns current pipeline metrics.
|
|
1535
|
-
*
|
|
1536
|
-
* @description
|
|
1537
|
-
* Returns a copy of the current metrics. Metrics are cumulative
|
|
1538
|
-
* since pipeline creation.
|
|
1539
|
-
*
|
|
1540
|
-
* @returns Copy of current pipeline metrics
|
|
1541
|
-
*
|
|
1542
|
-
* @example
|
|
1543
|
-
* ```typescript
|
|
1544
|
-
* const metrics = pipeline.getMetrics()
|
|
1545
|
-
* console.log(`Processed: ${metrics.eventsProcessed}`)
|
|
1546
|
-
* console.log(`Batches: ${metrics.batchesGenerated}`)
|
|
1547
|
-
* console.log(`Errors: ${metrics.errors}`)
|
|
1548
|
-
* console.log(`Avg latency: ${metrics.avgProcessingLatencyMs}ms`)
|
|
1549
|
-
* ```
|
|
1550
|
-
*/
|
|
1551
|
-
getMetrics() {
|
|
1552
|
-
return { ...this.metrics };
|
|
1553
|
-
}
|
|
1554
|
-
/**
|
|
1555
|
-
* Registers an output handler.
|
|
1556
|
-
*
|
|
1557
|
-
* @description
|
|
1558
|
-
* Output handlers are called when a batch is successfully processed
|
|
1559
|
-
* and converted to Parquet format. Multiple handlers can be registered.
|
|
1560
|
-
*
|
|
1561
|
-
* @param handler - Callback to invoke for each successful batch
|
|
1562
|
-
*
|
|
1563
|
-
* @example
|
|
1564
|
-
* ```typescript
|
|
1565
|
-
* pipeline.onOutput(async (output) => {
|
|
1566
|
-
* await r2.put(`cdc/${output.batchId}.parquet`, output.parquetBuffer)
|
|
1567
|
-
* console.log(`Wrote ${output.events.length} events`)
|
|
1568
|
-
* })
|
|
1569
|
-
* ```
|
|
1570
|
-
*/
|
|
1571
|
-
onOutput(handler) {
|
|
1572
|
-
this.outputHandlers.push(handler);
|
|
1573
|
-
}
|
|
1574
|
-
/**
|
|
1575
|
-
* Registers a dead letter handler.
|
|
1576
|
-
*
|
|
1577
|
-
* @description
|
|
1578
|
-
* Dead letter handlers are called when a batch fails after all
|
|
1579
|
-
* retry attempts are exhausted. Use this for alerting, logging,
|
|
1580
|
-
* or storing failed events for later reprocessing.
|
|
1581
|
-
*
|
|
1582
|
-
* @param handler - Callback to invoke for failed events
|
|
1583
|
-
*
|
|
1584
|
-
* @example
|
|
1585
|
-
* ```typescript
|
|
1586
|
-
* pipeline.onDeadLetter((events, error) => {
|
|
1587
|
-
* console.error(`Failed to process ${events.length} events:`, error)
|
|
1588
|
-
* // Store in dead letter queue for later retry
|
|
1589
|
-
* await dlq.put(events)
|
|
1590
|
-
* })
|
|
1591
|
-
* ```
|
|
1592
|
-
*/
|
|
1593
|
-
onDeadLetter(handler) {
|
|
1594
|
-
this.deadLetterHandlers.push(handler);
|
|
1595
|
-
}
|
|
1596
|
-
}
|
|
1597
|
-
// ============================================================================
|
|
1598
|
-
// Utility Functions
|
|
1599
|
-
// ============================================================================
|
|
1600
|
-
/**
|
|
1601
|
-
* Valid CDC event types for validation.
|
|
1602
|
-
* @internal
|
|
1603
|
-
*/
|
|
1604
|
-
const VALID_EVENT_TYPES = [
|
|
1605
|
-
'OBJECT_CREATED',
|
|
1606
|
-
'OBJECT_DELETED',
|
|
1607
|
-
'REF_UPDATED',
|
|
1608
|
-
'PACK_RECEIVED',
|
|
1609
|
-
'COMMIT_CREATED',
|
|
1610
|
-
'TREE_MODIFIED',
|
|
1611
|
-
'BRANCH_CREATED',
|
|
1612
|
-
'BRANCH_DELETED',
|
|
1613
|
-
'TAG_CREATED',
|
|
1614
|
-
'MERGE_COMPLETED'
|
|
1615
|
-
];
|
|
1616
|
-
/**
|
|
1617
|
-
* Creates a new CDC event.
|
|
1618
|
-
*
|
|
1619
|
-
* @description
|
|
1620
|
-
* Factory function to create a properly structured CDC event with
|
|
1621
|
-
* automatically generated ID and timestamp.
|
|
1622
|
-
*
|
|
1623
|
-
* @param type - The event type
|
|
1624
|
-
* @param source - The event source
|
|
1625
|
-
* @param payload - Event payload data
|
|
1626
|
-
* @param options - Optional configuration
|
|
1627
|
-
* @param options.sequence - Custom sequence number (default: 0)
|
|
1628
|
-
* @returns A new CDCEvent
|
|
1629
|
-
*
|
|
1630
|
-
* @example
|
|
1631
|
-
* ```typescript
|
|
1632
|
-
* const event = createCDCEvent('COMMIT_CREATED', 'push', {
|
|
1633
|
-
* operation: 'commit-create',
|
|
1634
|
-
* sha: 'abc123...',
|
|
1635
|
-
* treeSha: 'def456...',
|
|
1636
|
-
* parentShas: ['parent1...']
|
|
1637
|
-
* })
|
|
1638
|
-
*
|
|
1639
|
-
* // With sequence number
|
|
1640
|
-
* const sequencedEvent = createCDCEvent('REF_UPDATED', 'push', {
|
|
1641
|
-
* operation: 'ref-update',
|
|
1642
|
-
* refName: 'refs/heads/main',
|
|
1643
|
-
* oldSha: 'old...',
|
|
1644
|
-
* newSha: 'new...'
|
|
1645
|
-
* }, { sequence: 42 })
|
|
1646
|
-
* ```
|
|
1647
|
-
*/
|
|
1648
|
-
export function createCDCEvent(type, source, payload, options) {
|
|
1649
|
-
return {
|
|
1650
|
-
id: `evt-${Date.now()}-${Math.random().toString(36).slice(2)}`,
|
|
1651
|
-
type,
|
|
1652
|
-
source,
|
|
1653
|
-
timestamp: Date.now(),
|
|
1654
|
-
payload,
|
|
1655
|
-
sequence: options?.sequence ?? 0,
|
|
1656
|
-
version: 1
|
|
1657
|
-
};
|
|
1658
|
-
}
|
|
1659
|
-
/**
|
|
1660
|
-
* Serializes a CDC event to bytes.
|
|
1661
|
-
*
|
|
1662
|
-
* @description
|
|
1663
|
-
* Converts a CDCEvent to a JSON-encoded Uint8Array for storage or
|
|
1664
|
-
* transmission. Handles Uint8Array payload data by converting to arrays.
|
|
1665
|
-
*
|
|
1666
|
-
* @param event - The CDC event to serialize
|
|
1667
|
-
* @returns The serialized event as a Uint8Array
|
|
1668
|
-
*
|
|
1669
|
-
* @example
|
|
1670
|
-
* ```typescript
|
|
1671
|
-
* const bytes = serializeEvent(event)
|
|
1672
|
-
* await r2.put(`events/${event.id}`, bytes)
|
|
1673
|
-
* ```
|
|
1674
|
-
*
|
|
1675
|
-
* @see {@link deserializeEvent} - Reverse operation
|
|
1676
|
-
*/
|
|
1677
|
-
export function serializeEvent(event) {
|
|
1678
|
-
// Create a serializable copy (Uint8Array is not JSON-serializable)
|
|
1679
|
-
const serializable = {
|
|
1680
|
-
...event,
|
|
1681
|
-
payload: {
|
|
1682
|
-
...event.payload,
|
|
1683
|
-
data: event.payload.data ? Array.from(event.payload.data) : undefined
|
|
1684
|
-
}
|
|
1685
|
-
};
|
|
1686
|
-
const json = JSON.stringify(serializable);
|
|
1687
|
-
return new TextEncoder().encode(json);
|
|
1688
|
-
}
|
|
1689
|
-
/**
|
|
1690
|
-
* Deserializes bytes to a CDC event.
|
|
1691
|
-
*
|
|
1692
|
-
* @description
|
|
1693
|
-
* Reconstructs a CDCEvent from JSON-encoded bytes. Handles Uint8Array
|
|
1694
|
-
* restoration for payload data that was converted to arrays during
|
|
1695
|
-
* serialization.
|
|
1696
|
-
*
|
|
1697
|
-
* @param bytes - The serialized event bytes
|
|
1698
|
-
* @returns The deserialized CDCEvent
|
|
1699
|
-
*
|
|
1700
|
-
* @example
|
|
1701
|
-
* ```typescript
|
|
1702
|
-
* const bytes = await r2.get(`events/${eventId}`)
|
|
1703
|
-
* const event = deserializeEvent(bytes)
|
|
1704
|
-
* console.log(`Event type: ${event.type}`)
|
|
1705
|
-
* ```
|
|
1706
|
-
*
|
|
1707
|
-
* @see {@link serializeEvent} - Reverse operation
|
|
1708
|
-
*/
|
|
1709
|
-
export function deserializeEvent(bytes) {
|
|
1710
|
-
const json = new TextDecoder().decode(bytes);
|
|
1711
|
-
const parsed = JSON.parse(json);
|
|
1712
|
-
// Restore Uint8Array if data was serialized
|
|
1713
|
-
if (parsed.payload?.data && Array.isArray(parsed.payload.data)) {
|
|
1714
|
-
parsed.payload.data = new Uint8Array(parsed.payload.data);
|
|
1715
|
-
}
|
|
1716
|
-
return parsed;
|
|
1717
|
-
}
|
|
1718
|
-
/**
|
|
1719
|
-
* Validates a CDC event.
|
|
1720
|
-
*
|
|
1721
|
-
* @description
|
|
1722
|
-
* Checks that an event has all required fields and valid values.
|
|
1723
|
-
* Throws a CDCError if validation fails.
|
|
1724
|
-
*
|
|
1725
|
-
* **Validation Rules:**
|
|
1726
|
-
* - Event must not be null/undefined
|
|
1727
|
-
* - Event ID must be a non-empty string
|
|
1728
|
-
* - Event type must be a valid CDCEventType
|
|
1729
|
-
* - Timestamp must be a non-negative number
|
|
1730
|
-
* - Sequence must be a non-negative number
|
|
1731
|
-
*
|
|
1732
|
-
* @param event - The CDC event to validate
|
|
1733
|
-
* @returns The validated event (for chaining)
|
|
1734
|
-
*
|
|
1735
|
-
* @throws {CDCError} VALIDATION_ERROR - If validation fails
|
|
1736
|
-
*
|
|
1737
|
-
* @example
|
|
1738
|
-
* ```typescript
|
|
1739
|
-
* try {
|
|
1740
|
-
* validateCDCEvent(event)
|
|
1741
|
-
* // Event is valid
|
|
1742
|
-
* } catch (error) {
|
|
1743
|
-
* if (error instanceof CDCError) {
|
|
1744
|
-
* console.log(`Invalid: ${error.message}`)
|
|
1745
|
-
* }
|
|
1746
|
-
* }
|
|
1747
|
-
* ```
|
|
1748
|
-
*/
|
|
1749
|
-
export function validateCDCEvent(event) {
|
|
1750
|
-
if (!event) {
|
|
1751
|
-
throw new CDCError('VALIDATION_ERROR', 'Event is null or undefined');
|
|
1752
|
-
}
|
|
1753
|
-
if (!event.id || typeof event.id !== 'string' || event.id.length === 0) {
|
|
1754
|
-
throw new CDCError('VALIDATION_ERROR', 'Event id is missing or invalid');
|
|
1755
|
-
}
|
|
1756
|
-
if (!VALID_EVENT_TYPES.includes(event.type)) {
|
|
1757
|
-
throw new CDCError('VALIDATION_ERROR', `Invalid event type: ${event.type}`);
|
|
1758
|
-
}
|
|
1759
|
-
if (typeof event.timestamp !== 'number' || event.timestamp < 0) {
|
|
1760
|
-
throw new CDCError('VALIDATION_ERROR', 'Invalid timestamp');
|
|
1761
|
-
}
|
|
1762
|
-
if (typeof event.sequence !== 'number' || event.sequence < 0) {
|
|
1763
|
-
throw new CDCError('VALIDATION_ERROR', 'Invalid sequence number');
|
|
1764
|
-
}
|
|
1765
|
-
return event;
|
|
1766
|
-
}
|
|
1767
|
-
// ============================================================================
|
|
1768
|
-
// Pipeline Operations
|
|
1769
|
-
// ============================================================================
|
|
1770
|
-
/**
|
|
1771
|
-
* Registry of active pipelines by ID.
|
|
1772
|
-
* @internal
|
|
1773
|
-
*/
|
|
1774
|
-
const activePipelines = new Map();
|
|
1775
|
-
/**
|
|
1776
|
-
* Starts a new pipeline with the given configuration.
|
|
1777
|
-
*
|
|
1778
|
-
* @description
|
|
1779
|
-
* Creates and starts a new CDCPipeline, registering it by ID for
|
|
1780
|
-
* later access. If a pipeline with the same ID already exists,
|
|
1781
|
-
* it will be replaced (the old pipeline is not automatically stopped).
|
|
1782
|
-
*
|
|
1783
|
-
* @param id - Unique identifier for the pipeline
|
|
1784
|
-
* @param config - Pipeline configuration
|
|
1785
|
-
* @returns The started pipeline instance
|
|
1786
|
-
*
|
|
1787
|
-
* @example
|
|
1788
|
-
* ```typescript
|
|
1789
|
-
* const pipeline = startPipeline('main', {
|
|
1790
|
-
* batchSize: 100,
|
|
1791
|
-
* flushIntervalMs: 5000,
|
|
1792
|
-
* maxRetries: 3,
|
|
1793
|
-
* parquetCompression: 'snappy',
|
|
1794
|
-
* outputPath: '/analytics',
|
|
1795
|
-
* schemaVersion: 1
|
|
1796
|
-
* })
|
|
1797
|
-
*
|
|
1798
|
-
* // Register handlers
|
|
1799
|
-
* pipeline.onOutput((output) => console.log(`Batch: ${output.batchId}`))
|
|
1800
|
-
* ```
|
|
1801
|
-
*/
|
|
1802
|
-
export function startPipeline(id, config) {
|
|
1803
|
-
const pipeline = new CDCPipeline(config);
|
|
1804
|
-
pipeline.start();
|
|
1805
|
-
activePipelines.set(id, pipeline);
|
|
1806
|
-
return pipeline;
|
|
1807
|
-
}
|
|
1808
|
-
/**
|
|
1809
|
-
* Stops a pipeline by ID.
|
|
1810
|
-
*
|
|
1811
|
-
* @description
|
|
1812
|
-
* Stops the pipeline identified by the given ID, flushing any pending
|
|
1813
|
-
* events and removing it from the registry.
|
|
1814
|
-
*
|
|
1815
|
-
* @param id - Pipeline identifier
|
|
1816
|
-
* @returns Promise resolving to stop result (0 if pipeline not found)
|
|
1817
|
-
*
|
|
1818
|
-
* @example
|
|
1819
|
-
* ```typescript
|
|
1820
|
-
* const result = await stopPipeline('main')
|
|
1821
|
-
* console.log(`Flushed ${result.flushedCount} events on shutdown`)
|
|
1822
|
-
* ```
|
|
1823
|
-
*/
|
|
1824
|
-
export async function stopPipeline(id) {
|
|
1825
|
-
const pipeline = activePipelines.get(id);
|
|
1826
|
-
if (!pipeline) {
|
|
1827
|
-
return { flushedCount: 0 };
|
|
1828
|
-
}
|
|
1829
|
-
const result = await pipeline.stop();
|
|
1830
|
-
activePipelines.delete(id);
|
|
1831
|
-
return result;
|
|
1832
|
-
}
|
|
1833
|
-
/**
|
|
1834
|
-
* Flushes a pipeline by ID.
|
|
1835
|
-
*
|
|
1836
|
-
* @description
|
|
1837
|
-
* Forces an immediate flush of all pending events in the pipeline.
|
|
1838
|
-
* No-op if pipeline not found.
|
|
1839
|
-
*
|
|
1840
|
-
* @param id - Pipeline identifier
|
|
1841
|
-
*
|
|
1842
|
-
* @example
|
|
1843
|
-
* ```typescript
|
|
1844
|
-
* await flushPipeline('main')
|
|
1845
|
-
* console.log('All pending events flushed')
|
|
1846
|
-
* ```
|
|
1847
|
-
*/
|
|
1848
|
-
export async function flushPipeline(id) {
|
|
1849
|
-
const pipeline = activePipelines.get(id);
|
|
1850
|
-
if (pipeline) {
|
|
1851
|
-
await pipeline.flush();
|
|
1852
|
-
}
|
|
1853
|
-
}
|
|
1854
|
-
/**
|
|
1855
|
-
* Gets metrics for a pipeline by ID.
|
|
1856
|
-
*
|
|
1857
|
-
* @description
|
|
1858
|
-
* Returns a copy of the current metrics for the specified pipeline.
|
|
1859
|
-
* Returns null if the pipeline is not found.
|
|
1860
|
-
*
|
|
1861
|
-
* @param id - Pipeline identifier
|
|
1862
|
-
* @returns Pipeline metrics or null if not found
|
|
1863
|
-
*
|
|
1864
|
-
* @example
|
|
1865
|
-
* ```typescript
|
|
1866
|
-
* const metrics = getPipelineMetrics('main')
|
|
1867
|
-
* if (metrics) {
|
|
1868
|
-
* console.log(`Events processed: ${metrics.eventsProcessed}`)
|
|
1869
|
-
* console.log(`Errors: ${metrics.errors}`)
|
|
1870
|
-
* }
|
|
1871
|
-
* ```
|
|
1872
|
-
*/
|
|
1873
|
-
export function getPipelineMetrics(id) {
|
|
1874
|
-
const pipeline = activePipelines.get(id);
|
|
1875
|
-
if (!pipeline) {
|
|
1876
|
-
return null;
|
|
1877
|
-
}
|
|
1878
|
-
return pipeline.getMetrics();
|
|
1879
|
-
}
|
|
1880
|
-
//# sourceMappingURL=cdc-pipeline.js.map
|