gitx.do 0.1.0 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +40 -353
- package/dist/do/logger.d.ts +50 -0
- package/dist/do/logger.d.ts.map +1 -0
- package/dist/do/logger.js +122 -0
- package/dist/do/logger.js.map +1 -0
- package/dist/{durable-object → do}/schema.d.ts +3 -3
- package/dist/do/schema.d.ts.map +1 -0
- package/dist/{durable-object → do}/schema.js +4 -3
- package/dist/do/schema.js.map +1 -0
- package/dist/do/types.d.ts +267 -0
- package/dist/do/types.d.ts.map +1 -0
- package/dist/do/types.js +62 -0
- package/dist/do/types.js.map +1 -0
- package/dist/index.d.ts +15 -415
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +31 -483
- package/dist/index.js.map +1 -1
- package/package.json +13 -21
- package/dist/cli/commands/add.d.ts +0 -174
- package/dist/cli/commands/add.d.ts.map +0 -1
- package/dist/cli/commands/add.js +0 -131
- package/dist/cli/commands/add.js.map +0 -1
- package/dist/cli/commands/blame.d.ts +0 -259
- package/dist/cli/commands/blame.d.ts.map +0 -1
- package/dist/cli/commands/blame.js +0 -609
- package/dist/cli/commands/blame.js.map +0 -1
- package/dist/cli/commands/branch.d.ts +0 -249
- package/dist/cli/commands/branch.d.ts.map +0 -1
- package/dist/cli/commands/branch.js +0 -693
- package/dist/cli/commands/branch.js.map +0 -1
- package/dist/cli/commands/commit.d.ts +0 -182
- package/dist/cli/commands/commit.d.ts.map +0 -1
- package/dist/cli/commands/commit.js +0 -437
- package/dist/cli/commands/commit.js.map +0 -1
- package/dist/cli/commands/diff.d.ts +0 -464
- package/dist/cli/commands/diff.d.ts.map +0 -1
- package/dist/cli/commands/diff.js +0 -958
- package/dist/cli/commands/diff.js.map +0 -1
- package/dist/cli/commands/log.d.ts +0 -239
- package/dist/cli/commands/log.d.ts.map +0 -1
- package/dist/cli/commands/log.js +0 -535
- package/dist/cli/commands/log.js.map +0 -1
- package/dist/cli/commands/merge.d.ts +0 -106
- package/dist/cli/commands/merge.d.ts.map +0 -1
- package/dist/cli/commands/merge.js +0 -55
- package/dist/cli/commands/merge.js.map +0 -1
- package/dist/cli/commands/review.d.ts +0 -457
- package/dist/cli/commands/review.d.ts.map +0 -1
- package/dist/cli/commands/review.js +0 -533
- package/dist/cli/commands/review.js.map +0 -1
- package/dist/cli/commands/status.d.ts +0 -269
- package/dist/cli/commands/status.d.ts.map +0 -1
- package/dist/cli/commands/status.js +0 -493
- package/dist/cli/commands/status.js.map +0 -1
- package/dist/cli/commands/web.d.ts +0 -199
- package/dist/cli/commands/web.d.ts.map +0 -1
- package/dist/cli/commands/web.js +0 -696
- package/dist/cli/commands/web.js.map +0 -1
- package/dist/cli/fs-adapter.d.ts +0 -656
- package/dist/cli/fs-adapter.d.ts.map +0 -1
- package/dist/cli/fs-adapter.js +0 -1179
- package/dist/cli/fs-adapter.js.map +0 -1
- package/dist/cli/fsx-cli-adapter.d.ts +0 -359
- package/dist/cli/fsx-cli-adapter.d.ts.map +0 -1
- package/dist/cli/fsx-cli-adapter.js +0 -619
- package/dist/cli/fsx-cli-adapter.js.map +0 -1
- package/dist/cli/index.d.ts +0 -387
- package/dist/cli/index.d.ts.map +0 -1
- package/dist/cli/index.js +0 -523
- package/dist/cli/index.js.map +0 -1
- package/dist/cli/ui/components/DiffView.d.ts +0 -7
- package/dist/cli/ui/components/DiffView.d.ts.map +0 -1
- package/dist/cli/ui/components/DiffView.js +0 -11
- package/dist/cli/ui/components/DiffView.js.map +0 -1
- package/dist/cli/ui/components/ErrorDisplay.d.ts +0 -6
- package/dist/cli/ui/components/ErrorDisplay.d.ts.map +0 -1
- package/dist/cli/ui/components/ErrorDisplay.js +0 -11
- package/dist/cli/ui/components/ErrorDisplay.js.map +0 -1
- package/dist/cli/ui/components/FuzzySearch.d.ts +0 -9
- package/dist/cli/ui/components/FuzzySearch.d.ts.map +0 -1
- package/dist/cli/ui/components/FuzzySearch.js +0 -12
- package/dist/cli/ui/components/FuzzySearch.js.map +0 -1
- package/dist/cli/ui/components/LoadingSpinner.d.ts +0 -6
- package/dist/cli/ui/components/LoadingSpinner.d.ts.map +0 -1
- package/dist/cli/ui/components/LoadingSpinner.js +0 -10
- package/dist/cli/ui/components/LoadingSpinner.js.map +0 -1
- package/dist/cli/ui/components/NavigationList.d.ts +0 -9
- package/dist/cli/ui/components/NavigationList.d.ts.map +0 -1
- package/dist/cli/ui/components/NavigationList.js +0 -11
- package/dist/cli/ui/components/NavigationList.js.map +0 -1
- package/dist/cli/ui/components/ScrollableContent.d.ts +0 -8
- package/dist/cli/ui/components/ScrollableContent.d.ts.map +0 -1
- package/dist/cli/ui/components/ScrollableContent.js +0 -11
- package/dist/cli/ui/components/ScrollableContent.js.map +0 -1
- package/dist/cli/ui/components/index.d.ts +0 -7
- package/dist/cli/ui/components/index.d.ts.map +0 -1
- package/dist/cli/ui/components/index.js +0 -9
- package/dist/cli/ui/components/index.js.map +0 -1
- package/dist/cli/ui/terminal-ui.d.ts +0 -52
- package/dist/cli/ui/terminal-ui.d.ts.map +0 -1
- package/dist/cli/ui/terminal-ui.js +0 -121
- package/dist/cli/ui/terminal-ui.js.map +0 -1
- package/dist/do/BashModule.d.ts +0 -871
- package/dist/do/BashModule.d.ts.map +0 -1
- package/dist/do/BashModule.js +0 -1143
- package/dist/do/BashModule.js.map +0 -1
- package/dist/do/FsModule.d.ts +0 -601
- package/dist/do/FsModule.d.ts.map +0 -1
- package/dist/do/FsModule.js +0 -1120
- package/dist/do/FsModule.js.map +0 -1
- package/dist/do/GitModule.d.ts +0 -635
- package/dist/do/GitModule.d.ts.map +0 -1
- package/dist/do/GitModule.js +0 -781
- package/dist/do/GitModule.js.map +0 -1
- package/dist/do/GitRepoDO.d.ts +0 -281
- package/dist/do/GitRepoDO.d.ts.map +0 -1
- package/dist/do/GitRepoDO.js +0 -479
- package/dist/do/GitRepoDO.js.map +0 -1
- package/dist/do/bash-ast.d.ts +0 -246
- package/dist/do/bash-ast.d.ts.map +0 -1
- package/dist/do/bash-ast.js +0 -888
- package/dist/do/bash-ast.js.map +0 -1
- package/dist/do/container-executor.d.ts +0 -491
- package/dist/do/container-executor.d.ts.map +0 -1
- package/dist/do/container-executor.js +0 -730
- package/dist/do/container-executor.js.map +0 -1
- package/dist/do/index.d.ts +0 -53
- package/dist/do/index.d.ts.map +0 -1
- package/dist/do/index.js +0 -91
- package/dist/do/index.js.map +0 -1
- package/dist/do/tiered-storage.d.ts +0 -403
- package/dist/do/tiered-storage.d.ts.map +0 -1
- package/dist/do/tiered-storage.js +0 -689
- package/dist/do/tiered-storage.js.map +0 -1
- package/dist/do/withBash.d.ts +0 -231
- package/dist/do/withBash.d.ts.map +0 -1
- package/dist/do/withBash.js +0 -244
- package/dist/do/withBash.js.map +0 -1
- package/dist/do/withFs.d.ts +0 -237
- package/dist/do/withFs.d.ts.map +0 -1
- package/dist/do/withFs.js +0 -387
- package/dist/do/withFs.js.map +0 -1
- package/dist/do/withGit.d.ts +0 -180
- package/dist/do/withGit.d.ts.map +0 -1
- package/dist/do/withGit.js +0 -271
- package/dist/do/withGit.js.map +0 -1
- package/dist/durable-object/object-store.d.ts +0 -633
- package/dist/durable-object/object-store.d.ts.map +0 -1
- package/dist/durable-object/object-store.js +0 -1161
- package/dist/durable-object/object-store.js.map +0 -1
- package/dist/durable-object/schema.d.ts.map +0 -1
- package/dist/durable-object/schema.js.map +0 -1
- package/dist/durable-object/wal.d.ts +0 -416
- package/dist/durable-object/wal.d.ts.map +0 -1
- package/dist/durable-object/wal.js +0 -445
- package/dist/durable-object/wal.js.map +0 -1
- package/dist/mcp/adapter.d.ts +0 -772
- package/dist/mcp/adapter.d.ts.map +0 -1
- package/dist/mcp/adapter.js +0 -895
- package/dist/mcp/adapter.js.map +0 -1
- package/dist/mcp/sandbox/miniflare-evaluator.d.ts +0 -22
- package/dist/mcp/sandbox/miniflare-evaluator.d.ts.map +0 -1
- package/dist/mcp/sandbox/miniflare-evaluator.js +0 -140
- package/dist/mcp/sandbox/miniflare-evaluator.js.map +0 -1
- package/dist/mcp/sandbox/object-store-proxy.d.ts +0 -32
- package/dist/mcp/sandbox/object-store-proxy.d.ts.map +0 -1
- package/dist/mcp/sandbox/object-store-proxy.js +0 -30
- package/dist/mcp/sandbox/object-store-proxy.js.map +0 -1
- package/dist/mcp/sandbox/template.d.ts +0 -17
- package/dist/mcp/sandbox/template.d.ts.map +0 -1
- package/dist/mcp/sandbox/template.js +0 -71
- package/dist/mcp/sandbox/template.js.map +0 -1
- package/dist/mcp/sandbox.d.ts +0 -764
- package/dist/mcp/sandbox.d.ts.map +0 -1
- package/dist/mcp/sandbox.js +0 -1362
- package/dist/mcp/sandbox.js.map +0 -1
- package/dist/mcp/sdk-adapter.d.ts +0 -835
- package/dist/mcp/sdk-adapter.d.ts.map +0 -1
- package/dist/mcp/sdk-adapter.js +0 -974
- package/dist/mcp/sdk-adapter.js.map +0 -1
- package/dist/mcp/tools/do.d.ts +0 -32
- package/dist/mcp/tools/do.d.ts.map +0 -1
- package/dist/mcp/tools/do.js +0 -115
- package/dist/mcp/tools/do.js.map +0 -1
- package/dist/mcp/tools.d.ts +0 -548
- package/dist/mcp/tools.d.ts.map +0 -1
- package/dist/mcp/tools.js +0 -1934
- package/dist/mcp/tools.js.map +0 -1
- package/dist/ops/blame.d.ts +0 -551
- package/dist/ops/blame.d.ts.map +0 -1
- package/dist/ops/blame.js +0 -1037
- package/dist/ops/blame.js.map +0 -1
- package/dist/ops/branch.d.ts +0 -766
- package/dist/ops/branch.d.ts.map +0 -1
- package/dist/ops/branch.js +0 -950
- package/dist/ops/branch.js.map +0 -1
- package/dist/ops/commit-traversal.d.ts +0 -349
- package/dist/ops/commit-traversal.d.ts.map +0 -1
- package/dist/ops/commit-traversal.js +0 -821
- package/dist/ops/commit-traversal.js.map +0 -1
- package/dist/ops/commit.d.ts +0 -555
- package/dist/ops/commit.d.ts.map +0 -1
- package/dist/ops/commit.js +0 -826
- package/dist/ops/commit.js.map +0 -1
- package/dist/ops/merge-base.d.ts +0 -397
- package/dist/ops/merge-base.d.ts.map +0 -1
- package/dist/ops/merge-base.js +0 -691
- package/dist/ops/merge-base.js.map +0 -1
- package/dist/ops/merge.d.ts +0 -855
- package/dist/ops/merge.d.ts.map +0 -1
- package/dist/ops/merge.js +0 -1551
- package/dist/ops/merge.js.map +0 -1
- package/dist/ops/tag.d.ts +0 -247
- package/dist/ops/tag.d.ts.map +0 -1
- package/dist/ops/tag.js +0 -649
- package/dist/ops/tag.js.map +0 -1
- package/dist/ops/tree-builder.d.ts +0 -178
- package/dist/ops/tree-builder.d.ts.map +0 -1
- package/dist/ops/tree-builder.js +0 -271
- package/dist/ops/tree-builder.js.map +0 -1
- package/dist/ops/tree-diff.d.ts +0 -291
- package/dist/ops/tree-diff.d.ts.map +0 -1
- package/dist/ops/tree-diff.js +0 -705
- package/dist/ops/tree-diff.js.map +0 -1
- package/dist/pack/delta.d.ts +0 -248
- package/dist/pack/delta.d.ts.map +0 -1
- package/dist/pack/delta.js +0 -736
- package/dist/pack/delta.js.map +0 -1
- package/dist/pack/format.d.ts +0 -446
- package/dist/pack/format.d.ts.map +0 -1
- package/dist/pack/format.js +0 -572
- package/dist/pack/format.js.map +0 -1
- package/dist/pack/full-generation.d.ts +0 -612
- package/dist/pack/full-generation.d.ts.map +0 -1
- package/dist/pack/full-generation.js +0 -1378
- package/dist/pack/full-generation.js.map +0 -1
- package/dist/pack/generation.d.ts +0 -441
- package/dist/pack/generation.d.ts.map +0 -1
- package/dist/pack/generation.js +0 -707
- package/dist/pack/generation.js.map +0 -1
- package/dist/pack/index.d.ts +0 -502
- package/dist/pack/index.d.ts.map +0 -1
- package/dist/pack/index.js +0 -833
- package/dist/pack/index.js.map +0 -1
- package/dist/refs/branch.d.ts +0 -668
- package/dist/refs/branch.d.ts.map +0 -1
- package/dist/refs/branch.js +0 -897
- package/dist/refs/branch.js.map +0 -1
- package/dist/refs/storage.d.ts +0 -833
- package/dist/refs/storage.d.ts.map +0 -1
- package/dist/refs/storage.js +0 -1023
- package/dist/refs/storage.js.map +0 -1
- package/dist/refs/tag.d.ts +0 -860
- package/dist/refs/tag.d.ts.map +0 -1
- package/dist/refs/tag.js +0 -996
- package/dist/refs/tag.js.map +0 -1
- package/dist/storage/backend.d.ts +0 -425
- package/dist/storage/backend.d.ts.map +0 -1
- package/dist/storage/backend.js +0 -41
- package/dist/storage/backend.js.map +0 -1
- package/dist/storage/fsx-adapter.d.ts +0 -204
- package/dist/storage/fsx-adapter.d.ts.map +0 -1
- package/dist/storage/fsx-adapter.js +0 -470
- package/dist/storage/fsx-adapter.js.map +0 -1
- package/dist/storage/lru-cache.d.ts +0 -691
- package/dist/storage/lru-cache.d.ts.map +0 -1
- package/dist/storage/lru-cache.js +0 -813
- package/dist/storage/lru-cache.js.map +0 -1
- package/dist/storage/object-index.d.ts +0 -585
- package/dist/storage/object-index.d.ts.map +0 -1
- package/dist/storage/object-index.js +0 -532
- package/dist/storage/object-index.js.map +0 -1
- package/dist/storage/r2-pack.d.ts +0 -1257
- package/dist/storage/r2-pack.d.ts.map +0 -1
- package/dist/storage/r2-pack.js +0 -1770
- package/dist/storage/r2-pack.js.map +0 -1
- package/dist/tiered/cdc-pipeline.d.ts +0 -1888
- package/dist/tiered/cdc-pipeline.d.ts.map +0 -1
- package/dist/tiered/cdc-pipeline.js +0 -1880
- package/dist/tiered/cdc-pipeline.js.map +0 -1
- package/dist/tiered/migration.d.ts +0 -1104
- package/dist/tiered/migration.d.ts.map +0 -1
- package/dist/tiered/migration.js +0 -1214
- package/dist/tiered/migration.js.map +0 -1
- package/dist/tiered/parquet-writer.d.ts +0 -1145
- package/dist/tiered/parquet-writer.d.ts.map +0 -1
- package/dist/tiered/parquet-writer.js +0 -1183
- package/dist/tiered/parquet-writer.js.map +0 -1
- package/dist/tiered/read-path.d.ts +0 -835
- package/dist/tiered/read-path.d.ts.map +0 -1
- package/dist/tiered/read-path.js +0 -487
- package/dist/tiered/read-path.js.map +0 -1
- package/dist/types/capability.d.ts +0 -1385
- package/dist/types/capability.d.ts.map +0 -1
- package/dist/types/capability.js +0 -36
- package/dist/types/capability.js.map +0 -1
- package/dist/types/index.d.ts +0 -13
- package/dist/types/index.d.ts.map +0 -1
- package/dist/types/index.js +0 -18
- package/dist/types/index.js.map +0 -1
- package/dist/types/objects.d.ts +0 -692
- package/dist/types/objects.d.ts.map +0 -1
- package/dist/types/objects.js +0 -837
- package/dist/types/objects.js.map +0 -1
- package/dist/types/storage.d.ts +0 -603
- package/dist/types/storage.d.ts.map +0 -1
- package/dist/types/storage.js +0 -191
- package/dist/types/storage.js.map +0 -1
- package/dist/types/worker-loader.d.ts +0 -60
- package/dist/types/worker-loader.d.ts.map +0 -1
- package/dist/types/worker-loader.js +0 -62
- package/dist/types/worker-loader.js.map +0 -1
- package/dist/utils/hash.d.ts +0 -197
- package/dist/utils/hash.d.ts.map +0 -1
- package/dist/utils/hash.js +0 -268
- package/dist/utils/hash.js.map +0 -1
- package/dist/utils/sha1.d.ts +0 -290
- package/dist/utils/sha1.d.ts.map +0 -1
- package/dist/utils/sha1.js +0 -582
- package/dist/utils/sha1.js.map +0 -1
- package/dist/wire/capabilities.d.ts +0 -1044
- package/dist/wire/capabilities.d.ts.map +0 -1
- package/dist/wire/capabilities.js +0 -941
- package/dist/wire/capabilities.js.map +0 -1
- package/dist/wire/path-security.d.ts +0 -157
- package/dist/wire/path-security.d.ts.map +0 -1
- package/dist/wire/path-security.js +0 -307
- package/dist/wire/path-security.js.map +0 -1
- package/dist/wire/pkt-line.d.ts +0 -345
- package/dist/wire/pkt-line.d.ts.map +0 -1
- package/dist/wire/pkt-line.js +0 -381
- package/dist/wire/pkt-line.js.map +0 -1
- package/dist/wire/receive-pack.d.ts +0 -1059
- package/dist/wire/receive-pack.d.ts.map +0 -1
- package/dist/wire/receive-pack.js +0 -1414
- package/dist/wire/receive-pack.js.map +0 -1
- package/dist/wire/smart-http.d.ts +0 -799
- package/dist/wire/smart-http.d.ts.map +0 -1
- package/dist/wire/smart-http.js +0 -945
- package/dist/wire/smart-http.js.map +0 -1
- package/dist/wire/upload-pack.d.ts +0 -727
- package/dist/wire/upload-pack.d.ts.map +0 -1
- package/dist/wire/upload-pack.js +0 -1138
- package/dist/wire/upload-pack.js.map +0 -1
|
@@ -1,1888 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* @fileoverview CDC (Change Data Capture) Pipeline for Git Operations
|
|
3
|
-
*
|
|
4
|
-
* @description
|
|
5
|
-
* This module provides a comprehensive Change Data Capture system for Git operations,
|
|
6
|
-
* enabling real-time event streaming, transformation, and analytics for Git repository events.
|
|
7
|
-
*
|
|
8
|
-
* ## Key Features
|
|
9
|
-
*
|
|
10
|
-
* - **Event Capture**: Captures git operations (push, fetch, commits, branches, tags, merges)
|
|
11
|
-
* - **Parquet Transformation**: Converts events to columnar Parquet format for analytics
|
|
12
|
-
* - **Batching**: Efficient event batching with configurable size and time-based flushing
|
|
13
|
-
* - **Retry Policies**: Configurable exponential backoff with jitter for resilient processing
|
|
14
|
-
* - **Dead Letter Queue**: Handles failed events for later reprocessing
|
|
15
|
-
* - **Metrics**: Built-in tracking for events processed, batches, errors, and latency
|
|
16
|
-
*
|
|
17
|
-
* ## Architecture
|
|
18
|
-
*
|
|
19
|
-
* The pipeline consists of several components:
|
|
20
|
-
* 1. **CDCEventCapture**: Captures git operations and converts them to CDCEvents
|
|
21
|
-
* 2. **CDCBatcher**: Batches events for efficient processing
|
|
22
|
-
* 3. **ParquetTransformer**: Transforms events to Parquet format
|
|
23
|
-
* 4. **CDCPipeline**: Orchestrates the entire flow with error handling
|
|
24
|
-
*
|
|
25
|
-
* ## Event Flow
|
|
26
|
-
*
|
|
27
|
-
* ```
|
|
28
|
-
* Git Operation -> CDCEventCapture -> CDCBatcher -> ParquetTransformer -> Output
|
|
29
|
-
* |
|
|
30
|
-
* v
|
|
31
|
-
* (On failure) Dead Letter Queue
|
|
32
|
-
* ```
|
|
33
|
-
*
|
|
34
|
-
* @module tiered/cdc-pipeline
|
|
35
|
-
*
|
|
36
|
-
* @example
|
|
37
|
-
* ```typescript
|
|
38
|
-
* // Create and start a pipeline
|
|
39
|
-
* const pipeline = new CDCPipeline({
|
|
40
|
-
* batchSize: 100,
|
|
41
|
-
* flushIntervalMs: 5000,
|
|
42
|
-
* maxRetries: 3,
|
|
43
|
-
* parquetCompression: 'snappy',
|
|
44
|
-
* outputPath: '/analytics',
|
|
45
|
-
* schemaVersion: 1
|
|
46
|
-
* })
|
|
47
|
-
*
|
|
48
|
-
* await pipeline.start()
|
|
49
|
-
*
|
|
50
|
-
* // Process events
|
|
51
|
-
* pipeline.onOutput((output) => {
|
|
52
|
-
* console.log(`Generated batch: ${output.batchId}`)
|
|
53
|
-
* console.log(`Events: ${output.events.length}`)
|
|
54
|
-
* console.log(`Parquet size: ${output.parquetBuffer.length} bytes`)
|
|
55
|
-
* })
|
|
56
|
-
*
|
|
57
|
-
* pipeline.onDeadLetter((events, error) => {
|
|
58
|
-
* console.error(`Failed events: ${events.length}`, error)
|
|
59
|
-
* })
|
|
60
|
-
*
|
|
61
|
-
* // Create and process an event
|
|
62
|
-
* const event = createCDCEvent('COMMIT_CREATED', 'push', {
|
|
63
|
-
* operation: 'commit-create',
|
|
64
|
-
* sha: 'abc123...',
|
|
65
|
-
* treeSha: 'def456...',
|
|
66
|
-
* parentShas: ['parent1...']
|
|
67
|
-
* })
|
|
68
|
-
*
|
|
69
|
-
* await pipeline.process(event)
|
|
70
|
-
*
|
|
71
|
-
* // Get metrics
|
|
72
|
-
* const metrics = pipeline.getMetrics()
|
|
73
|
-
* console.log(`Processed: ${metrics.eventsProcessed}`)
|
|
74
|
-
* console.log(`Batches: ${metrics.batchesGenerated}`)
|
|
75
|
-
*
|
|
76
|
-
* // Stop the pipeline
|
|
77
|
-
* await pipeline.stop()
|
|
78
|
-
* ```
|
|
79
|
-
*
|
|
80
|
-
* @see {@link CDCPipeline} - Main pipeline orchestration class
|
|
81
|
-
* @see {@link CDCEventCapture} - Event capture from git operations
|
|
82
|
-
* @see {@link ParquetTransformer} - Parquet format transformation
|
|
83
|
-
*/
|
|
84
|
-
/**
|
|
85
|
-
* CDC Event Types representing different git operations.
|
|
86
|
-
*
|
|
87
|
-
* @description
|
|
88
|
-
* Enumeration of all supported Git operation types that can be captured
|
|
89
|
-
* by the CDC system. Each type corresponds to a specific Git operation.
|
|
90
|
-
*
|
|
91
|
-
* @example
|
|
92
|
-
* ```typescript
|
|
93
|
-
* const eventType: CDCEventType = 'COMMIT_CREATED'
|
|
94
|
-
* ```
|
|
95
|
-
*/
|
|
96
|
-
export type CDCEventType = 'OBJECT_CREATED' | 'OBJECT_DELETED' | 'REF_UPDATED' | 'PACK_RECEIVED' | 'COMMIT_CREATED' | 'TREE_MODIFIED' | 'BRANCH_CREATED' | 'BRANCH_DELETED' | 'TAG_CREATED' | 'MERGE_COMPLETED';
|
|
97
|
-
/**
|
|
98
|
-
* CDC Event Source indicating the origin of the event.
|
|
99
|
-
*
|
|
100
|
-
* @description
|
|
101
|
-
* Identifies the source system or operation that generated the CDC event.
|
|
102
|
-
* This helps with event filtering, routing, and analytics.
|
|
103
|
-
*
|
|
104
|
-
* - `push`: Events from git push operations
|
|
105
|
-
* - `fetch`: Events from git fetch operations
|
|
106
|
-
* - `internal`: Events from internal system operations
|
|
107
|
-
* - `replication`: Events from repository replication
|
|
108
|
-
* - `gc`: Events from garbage collection
|
|
109
|
-
*
|
|
110
|
-
* @example
|
|
111
|
-
* ```typescript
|
|
112
|
-
* const source: CDCEventSource = 'push'
|
|
113
|
-
* ```
|
|
114
|
-
*/
|
|
115
|
-
export type CDCEventSource = 'push' | 'fetch' | 'internal' | 'replication' | 'gc';
|
|
116
|
-
/**
|
|
117
|
-
* Payload data for CDC events.
|
|
118
|
-
*
|
|
119
|
-
* @description
|
|
120
|
-
* Contains the detailed data associated with a CDC event. Different event
|
|
121
|
-
* types use different subsets of these fields.
|
|
122
|
-
*
|
|
123
|
-
* @example
|
|
124
|
-
* ```typescript
|
|
125
|
-
* // Commit created payload
|
|
126
|
-
* const payload: CDCEventPayload = {
|
|
127
|
-
* operation: 'commit-create',
|
|
128
|
-
* sha: 'abc123...',
|
|
129
|
-
* treeSha: 'def456...',
|
|
130
|
-
* parentShas: ['parent1...']
|
|
131
|
-
* }
|
|
132
|
-
*
|
|
133
|
-
* // Ref updated payload
|
|
134
|
-
* const refPayload: CDCEventPayload = {
|
|
135
|
-
* operation: 'ref-update',
|
|
136
|
-
* refName: 'refs/heads/main',
|
|
137
|
-
* oldSha: 'old123...',
|
|
138
|
-
* newSha: 'new456...'
|
|
139
|
-
* }
|
|
140
|
-
* ```
|
|
141
|
-
*/
|
|
142
|
-
export interface CDCEventPayload {
|
|
143
|
-
/**
|
|
144
|
-
* The type of operation performed.
|
|
145
|
-
*
|
|
146
|
-
* @example 'commit-create', 'ref-update', 'branch-create'
|
|
147
|
-
*/
|
|
148
|
-
operation: string;
|
|
149
|
-
/**
|
|
150
|
-
* SHA-1 hash of the affected object.
|
|
151
|
-
* Present for object-related events.
|
|
152
|
-
*/
|
|
153
|
-
sha?: string;
|
|
154
|
-
/**
|
|
155
|
-
* Timestamp of the operation in milliseconds since epoch.
|
|
156
|
-
*/
|
|
157
|
-
timestamp?: number;
|
|
158
|
-
/**
|
|
159
|
-
* Raw binary data associated with the event.
|
|
160
|
-
* Used for object creation and pack reception events.
|
|
161
|
-
*/
|
|
162
|
-
data?: Uint8Array;
|
|
163
|
-
/**
|
|
164
|
-
* Additional metadata key-value pairs.
|
|
165
|
-
* Can include object type, size, etc.
|
|
166
|
-
*/
|
|
167
|
-
metadata?: Record<string, unknown>;
|
|
168
|
-
/**
|
|
169
|
-
* Git reference name (e.g., 'refs/heads/main').
|
|
170
|
-
* Present for ref update events.
|
|
171
|
-
*/
|
|
172
|
-
refName?: string;
|
|
173
|
-
/**
|
|
174
|
-
* Previous SHA for ref update events.
|
|
175
|
-
* May be all zeros for new refs.
|
|
176
|
-
*/
|
|
177
|
-
oldSha?: string;
|
|
178
|
-
/**
|
|
179
|
-
* New SHA for ref update events.
|
|
180
|
-
* May be all zeros for deleted refs.
|
|
181
|
-
*/
|
|
182
|
-
newSha?: string;
|
|
183
|
-
/**
|
|
184
|
-
* Number of objects in a pack.
|
|
185
|
-
* Present for pack received events.
|
|
186
|
-
*/
|
|
187
|
-
objectCount?: number;
|
|
188
|
-
/**
|
|
189
|
-
* Tree SHA for commit events.
|
|
190
|
-
*/
|
|
191
|
-
treeSha?: string;
|
|
192
|
-
/**
|
|
193
|
-
* Parent commit SHAs for commit events.
|
|
194
|
-
*/
|
|
195
|
-
parentShas?: string[];
|
|
196
|
-
/**
|
|
197
|
-
* Branch name for branch-related events.
|
|
198
|
-
*/
|
|
199
|
-
branchName?: string;
|
|
200
|
-
/**
|
|
201
|
-
* Tag name for tag-related events.
|
|
202
|
-
*/
|
|
203
|
-
tagName?: string;
|
|
204
|
-
/**
|
|
205
|
-
* Base commit SHA for merge events.
|
|
206
|
-
*/
|
|
207
|
-
baseSha?: string;
|
|
208
|
-
/**
|
|
209
|
-
* Head commit SHA for merge events.
|
|
210
|
-
*/
|
|
211
|
-
headSha?: string;
|
|
212
|
-
}
|
|
213
|
-
/**
|
|
214
|
-
* CDC Event structure representing a single change data capture event.
|
|
215
|
-
*
|
|
216
|
-
* @description
|
|
217
|
-
* A CDCEvent captures a single git operation with all metadata needed
|
|
218
|
-
* for replication, analytics, and auditing. Events are immutable once
|
|
219
|
-
* created and ordered by their sequence number.
|
|
220
|
-
*
|
|
221
|
-
* @example
|
|
222
|
-
* ```typescript
|
|
223
|
-
* const event: CDCEvent = {
|
|
224
|
-
* id: 'evt-1234567890-abc123',
|
|
225
|
-
* type: 'COMMIT_CREATED',
|
|
226
|
-
* source: 'push',
|
|
227
|
-
* timestamp: 1703980800000,
|
|
228
|
-
* payload: {
|
|
229
|
-
* operation: 'commit-create',
|
|
230
|
-
* sha: 'abc123...',
|
|
231
|
-
* treeSha: 'def456...',
|
|
232
|
-
* parentShas: ['parent1...']
|
|
233
|
-
* },
|
|
234
|
-
* sequence: 42,
|
|
235
|
-
* version: 1
|
|
236
|
-
* }
|
|
237
|
-
* ```
|
|
238
|
-
*/
|
|
239
|
-
export interface CDCEvent {
|
|
240
|
-
/**
|
|
241
|
-
* Unique identifier for this event.
|
|
242
|
-
* Format: `evt-{timestamp}-{random}`
|
|
243
|
-
*/
|
|
244
|
-
id: string;
|
|
245
|
-
/**
|
|
246
|
-
* Type of git operation that generated this event.
|
|
247
|
-
*
|
|
248
|
-
* @see {@link CDCEventType}
|
|
249
|
-
*/
|
|
250
|
-
type: CDCEventType;
|
|
251
|
-
/**
|
|
252
|
-
* Source system or operation that generated this event.
|
|
253
|
-
*
|
|
254
|
-
* @see {@link CDCEventSource}
|
|
255
|
-
*/
|
|
256
|
-
source: CDCEventSource;
|
|
257
|
-
/**
|
|
258
|
-
* Unix timestamp in milliseconds when the event was created.
|
|
259
|
-
*/
|
|
260
|
-
timestamp: number;
|
|
261
|
-
/**
|
|
262
|
-
* Event payload containing operation-specific data.
|
|
263
|
-
*/
|
|
264
|
-
payload: CDCEventPayload;
|
|
265
|
-
/**
|
|
266
|
-
* Monotonically increasing sequence number within a capture session.
|
|
267
|
-
* Used for ordering and deduplication.
|
|
268
|
-
*/
|
|
269
|
-
sequence: number;
|
|
270
|
-
/**
|
|
271
|
-
* Schema version of the event format.
|
|
272
|
-
* Used for backward compatibility during upgrades.
|
|
273
|
-
*/
|
|
274
|
-
version: number;
|
|
275
|
-
}
|
|
276
|
-
/**
|
|
277
|
-
* Configuration for the CDC pipeline.
|
|
278
|
-
*
|
|
279
|
-
* @description
|
|
280
|
-
* Defines all configuration options for creating and running a CDC pipeline,
|
|
281
|
-
* including batching behavior, retry policy, and output format.
|
|
282
|
-
*
|
|
283
|
-
* @example
|
|
284
|
-
* ```typescript
|
|
285
|
-
* const config: CDCPipelineConfig = {
|
|
286
|
-
* batchSize: 100, // Flush every 100 events
|
|
287
|
-
* flushIntervalMs: 5000, // Or every 5 seconds
|
|
288
|
-
* maxRetries: 3, // Retry failed batches 3 times
|
|
289
|
-
* parquetCompression: 'snappy',
|
|
290
|
-
* outputPath: '/analytics/cdc',
|
|
291
|
-
* schemaVersion: 1
|
|
292
|
-
* }
|
|
293
|
-
* ```
|
|
294
|
-
*/
|
|
295
|
-
export interface CDCPipelineConfig {
|
|
296
|
-
/**
|
|
297
|
-
* Maximum number of events to batch before flushing.
|
|
298
|
-
* Lower values reduce latency, higher values improve throughput.
|
|
299
|
-
*/
|
|
300
|
-
batchSize: number;
|
|
301
|
-
/**
|
|
302
|
-
* Maximum time in milliseconds to wait before flushing a batch.
|
|
303
|
-
* Ensures events are processed even with low throughput.
|
|
304
|
-
*/
|
|
305
|
-
flushIntervalMs: number;
|
|
306
|
-
/**
|
|
307
|
-
* Maximum number of retry attempts for failed batch processing.
|
|
308
|
-
* Uses exponential backoff between attempts.
|
|
309
|
-
*/
|
|
310
|
-
maxRetries: number;
|
|
311
|
-
/**
|
|
312
|
-
* Compression algorithm for Parquet output.
|
|
313
|
-
*
|
|
314
|
-
* - `snappy`: Fast compression with moderate ratio (recommended)
|
|
315
|
-
* - `gzip`: Higher compression ratio, slower
|
|
316
|
-
* - `none`: No compression
|
|
317
|
-
*/
|
|
318
|
-
parquetCompression: 'snappy' | 'gzip' | 'none';
|
|
319
|
-
/**
|
|
320
|
-
* Base path for output files.
|
|
321
|
-
* Parquet files will be written to this directory.
|
|
322
|
-
*/
|
|
323
|
-
outputPath: string;
|
|
324
|
-
/**
|
|
325
|
-
* Schema version for event format.
|
|
326
|
-
* Used for backward compatibility during upgrades.
|
|
327
|
-
*/
|
|
328
|
-
schemaVersion: number;
|
|
329
|
-
}
|
|
330
|
-
/**
|
|
331
|
-
* Pipeline operational state.
|
|
332
|
-
*
|
|
333
|
-
* @description
|
|
334
|
-
* Indicates the current state of the CDC pipeline.
|
|
335
|
-
*
|
|
336
|
-
* - `stopped`: Pipeline is not running, no events are processed
|
|
337
|
-
* - `running`: Pipeline is active and processing events
|
|
338
|
-
* - `paused`: Pipeline is temporarily suspended (reserved for future use)
|
|
339
|
-
*/
|
|
340
|
-
export type CDCPipelineState = 'stopped' | 'running' | 'paused';
|
|
341
|
-
/**
|
|
342
|
-
* Configuration for event batching.
|
|
343
|
-
*
|
|
344
|
-
* @description
|
|
345
|
-
* Controls how events are grouped into batches for processing.
|
|
346
|
-
*
|
|
347
|
-
* @example
|
|
348
|
-
* ```typescript
|
|
349
|
-
* const config: BatchConfig = {
|
|
350
|
-
* batchSize: 100,
|
|
351
|
-
* flushIntervalMs: 5000
|
|
352
|
-
* }
|
|
353
|
-
* ```
|
|
354
|
-
*/
|
|
355
|
-
export interface BatchConfig {
|
|
356
|
-
/**
|
|
357
|
-
* Maximum number of events per batch.
|
|
358
|
-
*/
|
|
359
|
-
batchSize: number;
|
|
360
|
-
/**
|
|
361
|
-
* Maximum time to wait before flushing a partial batch.
|
|
362
|
-
*/
|
|
363
|
-
flushIntervalMs: number;
|
|
364
|
-
}
|
|
365
|
-
/**
|
|
366
|
-
* Result of a batch flush operation.
|
|
367
|
-
*
|
|
368
|
-
* @description
|
|
369
|
-
* Contains the events in the batch and metadata about the batch
|
|
370
|
-
* for downstream processing and monitoring.
|
|
371
|
-
*
|
|
372
|
-
* @example
|
|
373
|
-
* ```typescript
|
|
374
|
-
* batcher.onBatch((result: BatchResult) => {
|
|
375
|
-
* console.log(`Batch: ${result.eventCount} events`)
|
|
376
|
-
* console.log(`Sequences: ${result.minSequence} - ${result.maxSequence}`)
|
|
377
|
-
* console.log(`Time range: ${result.minTimestamp} - ${result.maxTimestamp}`)
|
|
378
|
-
* })
|
|
379
|
-
* ```
|
|
380
|
-
*/
|
|
381
|
-
export interface BatchResult {
|
|
382
|
-
/**
|
|
383
|
-
* Array of events in this batch.
|
|
384
|
-
*/
|
|
385
|
-
events: CDCEvent[];
|
|
386
|
-
/**
|
|
387
|
-
* Number of events in the batch.
|
|
388
|
-
*/
|
|
389
|
-
eventCount: number;
|
|
390
|
-
/**
|
|
391
|
-
* Whether the batch was processed successfully.
|
|
392
|
-
*/
|
|
393
|
-
success: boolean;
|
|
394
|
-
/**
|
|
395
|
-
* Minimum sequence number in the batch.
|
|
396
|
-
* Useful for tracking progress and resumption.
|
|
397
|
-
*/
|
|
398
|
-
minSequence?: number;
|
|
399
|
-
/**
|
|
400
|
-
* Maximum sequence number in the batch.
|
|
401
|
-
*/
|
|
402
|
-
maxSequence?: number;
|
|
403
|
-
/**
|
|
404
|
-
* Earliest event timestamp in the batch (milliseconds).
|
|
405
|
-
*/
|
|
406
|
-
minTimestamp?: number;
|
|
407
|
-
/**
|
|
408
|
-
* Latest event timestamp in the batch (milliseconds).
|
|
409
|
-
*/
|
|
410
|
-
maxTimestamp?: number;
|
|
411
|
-
}
|
|
412
|
-
/**
|
|
413
|
-
* CDC Error types for categorizing failures.
|
|
414
|
-
*
|
|
415
|
-
* @description
|
|
416
|
-
* Error codes that help identify the type of failure for
|
|
417
|
-
* appropriate error handling and recovery strategies.
|
|
418
|
-
*
|
|
419
|
-
* - `VALIDATION_ERROR`: Event failed validation checks
|
|
420
|
-
* - `PROCESSING_ERROR`: Error during event processing
|
|
421
|
-
* - `SERIALIZATION_ERROR`: Error serializing/deserializing events
|
|
422
|
-
* - `STORAGE_ERROR`: Error writing to storage
|
|
423
|
-
* - `TIMEOUT_ERROR`: Operation timed out
|
|
424
|
-
* - `BUFFER_OVERFLOW_ERROR`: Event buffer exceeded capacity
|
|
425
|
-
* - `UNKNOWN_ERROR`: Unclassified error
|
|
426
|
-
*/
|
|
427
|
-
export type CDCErrorType = 'VALIDATION_ERROR' | 'PROCESSING_ERROR' | 'SERIALIZATION_ERROR' | 'STORAGE_ERROR' | 'TIMEOUT_ERROR' | 'BUFFER_OVERFLOW_ERROR' | 'UNKNOWN_ERROR';
|
|
428
|
-
/**
|
|
429
|
-
* Field definition for Parquet schema.
|
|
430
|
-
*
|
|
431
|
-
* @description
|
|
432
|
-
* Defines a single column in the Parquet output schema.
|
|
433
|
-
*/
|
|
434
|
-
export interface ParquetField {
|
|
435
|
-
/**
|
|
436
|
-
* Column name.
|
|
437
|
-
*/
|
|
438
|
-
name: string;
|
|
439
|
-
/**
|
|
440
|
-
* Column data type (STRING, INT64, TIMESTAMP, etc.).
|
|
441
|
-
*/
|
|
442
|
-
type: string;
|
|
443
|
-
/**
|
|
444
|
-
* Whether the column can contain null values.
|
|
445
|
-
*/
|
|
446
|
-
nullable: boolean;
|
|
447
|
-
}
|
|
448
|
-
/**
|
|
449
|
-
* Row representation for Parquet output.
|
|
450
|
-
*
|
|
451
|
-
* @description
|
|
452
|
-
* Represents a single CDC event as a Parquet row with
|
|
453
|
-
* flattened fields for efficient columnar storage.
|
|
454
|
-
*/
|
|
455
|
-
export interface ParquetRow {
|
|
456
|
-
/**
|
|
457
|
-
* Event unique identifier.
|
|
458
|
-
*/
|
|
459
|
-
event_id: string;
|
|
460
|
-
/**
|
|
461
|
-
* Event type (e.g., 'COMMIT_CREATED').
|
|
462
|
-
*/
|
|
463
|
-
event_type: string;
|
|
464
|
-
/**
|
|
465
|
-
* Event source (e.g., 'push').
|
|
466
|
-
*/
|
|
467
|
-
source: string;
|
|
468
|
-
/**
|
|
469
|
-
* Event timestamp in milliseconds.
|
|
470
|
-
*/
|
|
471
|
-
timestamp: number;
|
|
472
|
-
/**
|
|
473
|
-
* Event sequence number.
|
|
474
|
-
*/
|
|
475
|
-
sequence: number;
|
|
476
|
-
/**
|
|
477
|
-
* Event schema version.
|
|
478
|
-
*/
|
|
479
|
-
version: number;
|
|
480
|
-
/**
|
|
481
|
-
* JSON-serialized event payload.
|
|
482
|
-
*/
|
|
483
|
-
payload_json: string;
|
|
484
|
-
/**
|
|
485
|
-
* SHA from the payload, extracted for efficient filtering.
|
|
486
|
-
*/
|
|
487
|
-
sha: string | null;
|
|
488
|
-
}
|
|
489
|
-
/**
|
|
490
|
-
* Batch of Parquet rows ready for writing.
|
|
491
|
-
*
|
|
492
|
-
* @description
|
|
493
|
-
* Contains transformed rows and metadata needed to write
|
|
494
|
-
* a Parquet file.
|
|
495
|
-
*/
|
|
496
|
-
export interface ParquetBatch {
|
|
497
|
-
/**
|
|
498
|
-
* Array of Parquet rows.
|
|
499
|
-
*/
|
|
500
|
-
rows: ParquetRow[];
|
|
501
|
-
/**
|
|
502
|
-
* Number of rows in the batch.
|
|
503
|
-
*/
|
|
504
|
-
rowCount: number;
|
|
505
|
-
/**
|
|
506
|
-
* Batch creation timestamp.
|
|
507
|
-
*/
|
|
508
|
-
createdAt: number;
|
|
509
|
-
/**
|
|
510
|
-
* Parquet schema definition.
|
|
511
|
-
*/
|
|
512
|
-
schema: {
|
|
513
|
-
fields: ParquetField[];
|
|
514
|
-
};
|
|
515
|
-
/**
|
|
516
|
-
* Compression algorithm used.
|
|
517
|
-
*/
|
|
518
|
-
compression: string;
|
|
519
|
-
}
|
|
520
|
-
/**
|
|
521
|
-
* Output from the CDC pipeline.
|
|
522
|
-
*
|
|
523
|
-
* @description
|
|
524
|
-
* Contains the Parquet-formatted data and metadata for a
|
|
525
|
-
* processed batch of events.
|
|
526
|
-
*
|
|
527
|
-
* @example
|
|
528
|
-
* ```typescript
|
|
529
|
-
* pipeline.onOutput((output: PipelineOutput) => {
|
|
530
|
-
* console.log(`Batch ID: ${output.batchId}`)
|
|
531
|
-
* console.log(`Events: ${output.events.length}`)
|
|
532
|
-
* console.log(`Size: ${output.parquetBuffer.length} bytes`)
|
|
533
|
-
*
|
|
534
|
-
* // Write to storage
|
|
535
|
-
* await r2.put(`cdc/${output.batchId}.parquet`, output.parquetBuffer)
|
|
536
|
-
* })
|
|
537
|
-
* ```
|
|
538
|
-
*/
|
|
539
|
-
export interface PipelineOutput {
|
|
540
|
-
/**
|
|
541
|
-
* Parquet-formatted data as a byte array.
|
|
542
|
-
*/
|
|
543
|
-
parquetBuffer: Uint8Array;
|
|
544
|
-
/**
|
|
545
|
-
* Original events included in this batch.
|
|
546
|
-
*/
|
|
547
|
-
events: CDCEvent[];
|
|
548
|
-
/**
|
|
549
|
-
* Unique identifier for this batch.
|
|
550
|
-
* Format: `batch-{timestamp}-{random}`
|
|
551
|
-
*/
|
|
552
|
-
batchId: string;
|
|
553
|
-
}
|
|
554
|
-
/**
|
|
555
|
-
* Metrics for monitoring pipeline performance.
|
|
556
|
-
*
|
|
557
|
-
* @description
|
|
558
|
-
* Provides operational metrics for monitoring and alerting
|
|
559
|
-
* on pipeline health and performance.
|
|
560
|
-
*
|
|
561
|
-
* @example
|
|
562
|
-
* ```typescript
|
|
563
|
-
* const metrics = pipeline.getMetrics()
|
|
564
|
-
* console.log(`Events processed: ${metrics.eventsProcessed}`)
|
|
565
|
-
* console.log(`Batches generated: ${metrics.batchesGenerated}`)
|
|
566
|
-
* console.log(`Bytes written: ${metrics.bytesWritten}`)
|
|
567
|
-
* console.log(`Errors: ${metrics.errors}`)
|
|
568
|
-
* console.log(`Avg latency: ${metrics.avgProcessingLatencyMs}ms`)
|
|
569
|
-
* ```
|
|
570
|
-
*/
|
|
571
|
-
export interface PipelineMetrics {
|
|
572
|
-
/**
|
|
573
|
-
* Total number of events processed.
|
|
574
|
-
*/
|
|
575
|
-
eventsProcessed: number;
|
|
576
|
-
/**
|
|
577
|
-
* Total number of batches generated.
|
|
578
|
-
*/
|
|
579
|
-
batchesGenerated: number;
|
|
580
|
-
/**
|
|
581
|
-
* Total bytes written to output.
|
|
582
|
-
*/
|
|
583
|
-
bytesWritten: number;
|
|
584
|
-
/**
|
|
585
|
-
* Total number of errors encountered.
|
|
586
|
-
*/
|
|
587
|
-
errors: number;
|
|
588
|
-
/**
|
|
589
|
-
* Average event processing latency in milliseconds.
|
|
590
|
-
* Calculated from the last 1000 events.
|
|
591
|
-
*/
|
|
592
|
-
avgProcessingLatencyMs: number;
|
|
593
|
-
}
|
|
594
|
-
/**
|
|
595
|
-
* Result of processing a single event.
|
|
596
|
-
*
|
|
597
|
-
* @description
|
|
598
|
-
* Returned when an event is successfully queued for processing.
|
|
599
|
-
*/
|
|
600
|
-
export interface ProcessResult {
|
|
601
|
-
/**
|
|
602
|
-
* Whether the event was successfully queued.
|
|
603
|
-
*/
|
|
604
|
-
success: boolean;
|
|
605
|
-
/**
|
|
606
|
-
* ID of the processed event.
|
|
607
|
-
*/
|
|
608
|
-
eventId: string;
|
|
609
|
-
}
|
|
610
|
-
/**
|
|
611
|
-
* Result of stopping the pipeline.
|
|
612
|
-
*
|
|
613
|
-
* @description
|
|
614
|
-
* Contains information about any pending events that were
|
|
615
|
-
* flushed during shutdown.
|
|
616
|
-
*/
|
|
617
|
-
export interface StopResult {
|
|
618
|
-
/**
|
|
619
|
-
* Number of events flushed during stop.
|
|
620
|
-
*/
|
|
621
|
-
flushedCount: number;
|
|
622
|
-
}
|
|
623
|
-
/**
|
|
624
|
-
* Custom error class for CDC operations.
|
|
625
|
-
*
|
|
626
|
-
* @description
|
|
627
|
-
* CDCError provides structured error information for CDC pipeline failures,
|
|
628
|
-
* including an error type for programmatic handling and optional cause for
|
|
629
|
-
* error chaining.
|
|
630
|
-
*
|
|
631
|
-
* @example
|
|
632
|
-
* ```typescript
|
|
633
|
-
* try {
|
|
634
|
-
* await pipeline.process(event)
|
|
635
|
-
* } catch (error) {
|
|
636
|
-
* if (error instanceof CDCError) {
|
|
637
|
-
* switch (error.type) {
|
|
638
|
-
* case 'VALIDATION_ERROR':
|
|
639
|
-
* console.log('Invalid event:', error.message)
|
|
640
|
-
* break
|
|
641
|
-
* case 'PROCESSING_ERROR':
|
|
642
|
-
* console.log('Processing failed:', error.message)
|
|
643
|
-
* if (error.cause) {
|
|
644
|
-
* console.log('Caused by:', error.cause.message)
|
|
645
|
-
* }
|
|
646
|
-
* break
|
|
647
|
-
* }
|
|
648
|
-
* }
|
|
649
|
-
* }
|
|
650
|
-
* ```
|
|
651
|
-
*
|
|
652
|
-
* @class CDCError
|
|
653
|
-
* @extends Error
|
|
654
|
-
*/
|
|
655
|
-
export declare class CDCError extends Error {
|
|
656
|
-
readonly type: CDCErrorType;
|
|
657
|
-
readonly cause?: Error | undefined;
|
|
658
|
-
/**
|
|
659
|
-
* Creates a new CDCError.
|
|
660
|
-
*
|
|
661
|
-
* @param type - Error type for categorization
|
|
662
|
-
* @param message - Human-readable error message
|
|
663
|
-
* @param cause - Optional underlying error that caused this error
|
|
664
|
-
*/
|
|
665
|
-
constructor(type: CDCErrorType, message: string, cause?: Error | undefined);
|
|
666
|
-
}
|
|
667
|
-
/**
|
|
668
|
-
* Configuration for the retry policy.
|
|
669
|
-
*
|
|
670
|
-
* @description
|
|
671
|
-
* Configures exponential backoff behavior for failed operations.
|
|
672
|
-
*
|
|
673
|
-
* @example
|
|
674
|
-
* ```typescript
|
|
675
|
-
* const config: RetryPolicyConfig = {
|
|
676
|
-
* maxRetries: 3,
|
|
677
|
-
* initialDelayMs: 100,
|
|
678
|
-
* maxDelayMs: 5000,
|
|
679
|
-
* backoffMultiplier: 2,
|
|
680
|
-
* jitter: true // Add randomness to prevent thundering herd
|
|
681
|
-
* }
|
|
682
|
-
* ```
|
|
683
|
-
*/
|
|
684
|
-
export interface RetryPolicyConfig {
|
|
685
|
-
/**
|
|
686
|
-
* Maximum number of retry attempts before giving up.
|
|
687
|
-
*/
|
|
688
|
-
maxRetries: number;
|
|
689
|
-
/**
|
|
690
|
-
* Initial delay in milliseconds before first retry.
|
|
691
|
-
*/
|
|
692
|
-
initialDelayMs: number;
|
|
693
|
-
/**
|
|
694
|
-
* Maximum delay in milliseconds between retries.
|
|
695
|
-
* Caps exponential growth.
|
|
696
|
-
*/
|
|
697
|
-
maxDelayMs: number;
|
|
698
|
-
/**
|
|
699
|
-
* Multiplier applied to delay after each attempt.
|
|
700
|
-
* A value of 2 doubles the delay each time.
|
|
701
|
-
*/
|
|
702
|
-
backoffMultiplier: number;
|
|
703
|
-
/**
|
|
704
|
-
* Whether to add random jitter to delays.
|
|
705
|
-
* Helps prevent thundering herd problems.
|
|
706
|
-
*/
|
|
707
|
-
jitter?: boolean;
|
|
708
|
-
}
|
|
709
|
-
/**
|
|
710
|
-
* Retry policy implementing exponential backoff with optional jitter.
|
|
711
|
-
*
|
|
712
|
-
* @description
|
|
713
|
-
* Provides a robust retry mechanism for handling transient failures.
|
|
714
|
-
* Uses exponential backoff to space out retry attempts, with optional
|
|
715
|
-
* jitter to prevent synchronized retries from multiple clients.
|
|
716
|
-
*
|
|
717
|
-
* **Backoff Formula:**
|
|
718
|
-
* `delay = min(initialDelay * (multiplier ^ attempt), maxDelay)`
|
|
719
|
-
*
|
|
720
|
-
* **With Jitter:**
|
|
721
|
-
* `delay = delay * random(0.5, 1.5)`
|
|
722
|
-
*
|
|
723
|
-
* @example
|
|
724
|
-
* ```typescript
|
|
725
|
-
* const policy = new CDCRetryPolicy({
|
|
726
|
-
* maxRetries: 3,
|
|
727
|
-
* initialDelayMs: 100,
|
|
728
|
-
* maxDelayMs: 5000,
|
|
729
|
-
* backoffMultiplier: 2,
|
|
730
|
-
* jitter: true
|
|
731
|
-
* })
|
|
732
|
-
*
|
|
733
|
-
* let attempts = 0
|
|
734
|
-
* while (attempts < 10) {
|
|
735
|
-
* try {
|
|
736
|
-
* await doOperation()
|
|
737
|
-
* break
|
|
738
|
-
* } catch (error) {
|
|
739
|
-
* attempts++
|
|
740
|
-
* if (!policy.shouldRetry(attempts)) {
|
|
741
|
-
* throw new Error('Max retries exceeded')
|
|
742
|
-
* }
|
|
743
|
-
* const delay = policy.getDelay(attempts)
|
|
744
|
-
* console.log(`Retry ${attempts} after ${delay}ms`)
|
|
745
|
-
* await sleep(delay)
|
|
746
|
-
* }
|
|
747
|
-
* }
|
|
748
|
-
* ```
|
|
749
|
-
*
|
|
750
|
-
* @class CDCRetryPolicy
|
|
751
|
-
*/
|
|
752
|
-
export declare class CDCRetryPolicy {
|
|
753
|
-
/**
|
|
754
|
-
* Retry configuration.
|
|
755
|
-
* @private
|
|
756
|
-
*/
|
|
757
|
-
private readonly config;
|
|
758
|
-
/**
|
|
759
|
-
* Creates a new retry policy.
|
|
760
|
-
*
|
|
761
|
-
* @param config - Retry policy configuration
|
|
762
|
-
*/
|
|
763
|
-
constructor(config: RetryPolicyConfig);
|
|
764
|
-
/**
|
|
765
|
-
* Determines whether another retry should be attempted.
|
|
766
|
-
*
|
|
767
|
-
* @param attemptCount - Number of attempts already made
|
|
768
|
-
* @returns true if more retries are allowed, false otherwise
|
|
769
|
-
*
|
|
770
|
-
* @example
|
|
771
|
-
* ```typescript
|
|
772
|
-
* if (policy.shouldRetry(3)) {
|
|
773
|
-
* // Retry is allowed
|
|
774
|
-
* }
|
|
775
|
-
* ```
|
|
776
|
-
*/
|
|
777
|
-
shouldRetry(attemptCount: number): boolean;
|
|
778
|
-
/**
|
|
779
|
-
* Calculates the delay before the next retry.
|
|
780
|
-
*
|
|
781
|
-
* @description
|
|
782
|
-
* Computes delay using exponential backoff, capped at maxDelayMs.
|
|
783
|
-
* If jitter is enabled, applies a random factor between 0.5x and 1.5x.
|
|
784
|
-
*
|
|
785
|
-
* @param attemptCount - Number of attempts already made (1-indexed)
|
|
786
|
-
* @returns Delay in milliseconds before next retry
|
|
787
|
-
*
|
|
788
|
-
* @example
|
|
789
|
-
* ```typescript
|
|
790
|
-
* // With initialDelay=100, multiplier=2:
|
|
791
|
-
* // Attempt 1: 100ms * 2^0 = 100ms
|
|
792
|
-
* // Attempt 2: 100ms * 2^1 = 200ms
|
|
793
|
-
* // Attempt 3: 100ms * 2^2 = 400ms
|
|
794
|
-
* const delay = policy.getDelay(attemptCount)
|
|
795
|
-
* await sleep(delay)
|
|
796
|
-
* ```
|
|
797
|
-
*/
|
|
798
|
-
getDelay(attemptCount: number): number;
|
|
799
|
-
}
|
|
800
|
-
/**
|
|
801
|
-
* Configuration options for CDC event capture.
|
|
802
|
-
*
|
|
803
|
-
* @example
|
|
804
|
-
* ```typescript
|
|
805
|
-
* const options: CDCEventCaptureOptions = {
|
|
806
|
-
* maxBufferSize: 1000 // Auto-flush when buffer reaches 1000 events
|
|
807
|
-
* }
|
|
808
|
-
* ```
|
|
809
|
-
*/
|
|
810
|
-
export interface CDCEventCaptureOptions {
|
|
811
|
-
/**
|
|
812
|
-
* Maximum number of events to buffer before auto-flushing.
|
|
813
|
-
* Defaults to Infinity (no auto-flush).
|
|
814
|
-
*/
|
|
815
|
-
maxBufferSize?: number;
|
|
816
|
-
}
|
|
817
|
-
/**
|
|
818
|
-
* Callback function for git operation events.
|
|
819
|
-
*
|
|
820
|
-
* @param event - The captured CDC event
|
|
821
|
-
*/
|
|
822
|
-
export type GitOperationListener = (event: CDCEvent) => void;
|
|
823
|
-
/**
|
|
824
|
-
* Captures git operations and converts them to CDC events.
|
|
825
|
-
*
|
|
826
|
-
* @description
|
|
827
|
-
* CDCEventCapture hooks into git operations and generates CDCEvents for each
|
|
828
|
-
* operation. It maintains an internal buffer of events that can be flushed
|
|
829
|
-
* manually or automatically when the buffer reaches a configured size.
|
|
830
|
-
*
|
|
831
|
-
* **Supported Operations:**
|
|
832
|
-
* - Object creation/deletion (blobs, trees, commits, tags)
|
|
833
|
-
* - Reference updates (branches, tags)
|
|
834
|
-
* - Commit creation
|
|
835
|
-
* - Pack reception
|
|
836
|
-
* - Branch creation/deletion
|
|
837
|
-
* - Tag creation
|
|
838
|
-
* - Merge completion
|
|
839
|
-
*
|
|
840
|
-
* **Event Ordering:**
|
|
841
|
-
* Events are assigned monotonically increasing sequence numbers within a
|
|
842
|
-
* capture session. This ensures proper ordering for replay and analytics.
|
|
843
|
-
*
|
|
844
|
-
* @example
|
|
845
|
-
* ```typescript
|
|
846
|
-
* const capture = new CDCEventCapture({ maxBufferSize: 100 })
|
|
847
|
-
*
|
|
848
|
-
* // Add a listener for real-time processing
|
|
849
|
-
* capture.addListener((event) => {
|
|
850
|
-
* console.log(`Event: ${event.type} - ${event.id}`)
|
|
851
|
-
* })
|
|
852
|
-
*
|
|
853
|
-
* // Capture git operations
|
|
854
|
-
* await capture.onCommitCreated('abc123...', 'tree456...', ['parent789...'])
|
|
855
|
-
* await capture.onRefUpdate('refs/heads/main', 'old...', 'new...')
|
|
856
|
-
*
|
|
857
|
-
* // Get buffered events
|
|
858
|
-
* console.log(`Buffer size: ${capture.getBufferSize()}`)
|
|
859
|
-
*
|
|
860
|
-
* // Flush buffer
|
|
861
|
-
* const events = await capture.flush()
|
|
862
|
-
* console.log(`Flushed ${events.length} events`)
|
|
863
|
-
* ```
|
|
864
|
-
*
|
|
865
|
-
* @class CDCEventCapture
|
|
866
|
-
*/
|
|
867
|
-
export declare class CDCEventCapture {
|
|
868
|
-
/**
|
|
869
|
-
* Buffer of captured events.
|
|
870
|
-
* @private
|
|
871
|
-
*/
|
|
872
|
-
private events;
|
|
873
|
-
/**
|
|
874
|
-
* Monotonically increasing sequence counter.
|
|
875
|
-
* @private
|
|
876
|
-
*/
|
|
877
|
-
private sequenceCounter;
|
|
878
|
-
/**
|
|
879
|
-
* Registered event listeners.
|
|
880
|
-
* @private
|
|
881
|
-
*/
|
|
882
|
-
private listeners;
|
|
883
|
-
/**
|
|
884
|
-
* Maximum buffer size before auto-flush.
|
|
885
|
-
* @private
|
|
886
|
-
*/
|
|
887
|
-
private readonly maxBufferSize;
|
|
888
|
-
/**
|
|
889
|
-
* Creates a new CDC event capture instance.
|
|
890
|
-
*
|
|
891
|
-
* @param options - Configuration options
|
|
892
|
-
*/
|
|
893
|
-
constructor(options?: CDCEventCaptureOptions);
|
|
894
|
-
/**
|
|
895
|
-
* Generates a unique event ID.
|
|
896
|
-
* @private
|
|
897
|
-
*/
|
|
898
|
-
private generateEventId;
|
|
899
|
-
/**
|
|
900
|
-
* Emits an event to the buffer and notifies listeners.
|
|
901
|
-
* @private
|
|
902
|
-
*/
|
|
903
|
-
private emitEvent;
|
|
904
|
-
/**
|
|
905
|
-
* Returns the next sequence number.
|
|
906
|
-
* @private
|
|
907
|
-
*/
|
|
908
|
-
private nextSequence;
|
|
909
|
-
/**
|
|
910
|
-
* Captures an object put (creation) operation.
|
|
911
|
-
*
|
|
912
|
-
* @description
|
|
913
|
-
* Called when a git object (blob, tree, commit, tag) is written to storage.
|
|
914
|
-
*
|
|
915
|
-
* @param sha - SHA-1 hash of the object
|
|
916
|
-
* @param type - Object type (blob, tree, commit, tag)
|
|
917
|
-
* @param data - Raw object data
|
|
918
|
-
*
|
|
919
|
-
* @example
|
|
920
|
-
* ```typescript
|
|
921
|
-
* await capture.onObjectPut('abc123...', 'blob', blobData)
|
|
922
|
-
* ```
|
|
923
|
-
*/
|
|
924
|
-
onObjectPut(sha: string, type: string, data: Uint8Array): Promise<void>;
|
|
925
|
-
/**
|
|
926
|
-
* Captures an object deletion operation.
|
|
927
|
-
*
|
|
928
|
-
* @description
|
|
929
|
-
* Called when a git object is deleted, typically during garbage collection.
|
|
930
|
-
*
|
|
931
|
-
* @param sha - SHA-1 hash of the deleted object
|
|
932
|
-
*
|
|
933
|
-
* @example
|
|
934
|
-
* ```typescript
|
|
935
|
-
* await capture.onObjectDelete('abc123...')
|
|
936
|
-
* ```
|
|
937
|
-
*/
|
|
938
|
-
onObjectDelete(sha: string): Promise<void>;
|
|
939
|
-
/**
|
|
940
|
-
* Captures a reference update operation.
|
|
941
|
-
*
|
|
942
|
-
* @description
|
|
943
|
-
* Called when a git reference (branch, tag) is updated to point to a new commit.
|
|
944
|
-
*
|
|
945
|
-
* @param refName - Full reference name (e.g., 'refs/heads/main')
|
|
946
|
-
* @param oldSha - Previous SHA (all zeros for new refs)
|
|
947
|
-
* @param newSha - New SHA (all zeros for deleted refs)
|
|
948
|
-
*
|
|
949
|
-
* @example
|
|
950
|
-
* ```typescript
|
|
951
|
-
* await capture.onRefUpdate(
|
|
952
|
-
* 'refs/heads/main',
|
|
953
|
-
* 'oldcommit123...',
|
|
954
|
-
* 'newcommit456...'
|
|
955
|
-
* )
|
|
956
|
-
* ```
|
|
957
|
-
*/
|
|
958
|
-
onRefUpdate(refName: string, oldSha: string, newSha: string): Promise<void>;
|
|
959
|
-
/**
|
|
960
|
-
* Captures a commit creation operation.
|
|
961
|
-
*
|
|
962
|
-
* @description
|
|
963
|
-
* Called when a new commit object is created.
|
|
964
|
-
*
|
|
965
|
-
* @param commitSha - SHA-1 hash of the commit
|
|
966
|
-
* @param treeSha - SHA-1 hash of the tree the commit points to
|
|
967
|
-
* @param parentShas - Array of parent commit SHAs
|
|
968
|
-
*
|
|
969
|
-
* @example
|
|
970
|
-
* ```typescript
|
|
971
|
-
* await capture.onCommitCreated(
|
|
972
|
-
* 'commitabc123...',
|
|
973
|
-
* 'treedef456...',
|
|
974
|
-
* ['parent1...', 'parent2...']
|
|
975
|
-
* )
|
|
976
|
-
* ```
|
|
977
|
-
*/
|
|
978
|
-
onCommitCreated(commitSha: string, treeSha: string, parentShas: string[]): Promise<void>;
|
|
979
|
-
/**
|
|
980
|
-
* Captures a pack reception operation.
|
|
981
|
-
*
|
|
982
|
-
* @description
|
|
983
|
-
* Called when a packfile is received during a push or fetch operation.
|
|
984
|
-
*
|
|
985
|
-
* @param packData - Raw packfile data
|
|
986
|
-
* @param objectCount - Number of objects in the pack
|
|
987
|
-
*
|
|
988
|
-
* @example
|
|
989
|
-
* ```typescript
|
|
990
|
-
* await capture.onPackReceived(packBuffer, 42)
|
|
991
|
-
* ```
|
|
992
|
-
*/
|
|
993
|
-
onPackReceived(packData: Uint8Array, objectCount: number): Promise<void>;
|
|
994
|
-
/**
|
|
995
|
-
* Captures a branch creation operation.
|
|
996
|
-
*
|
|
997
|
-
* @param branchName - Name of the branch (without refs/heads/ prefix)
|
|
998
|
-
* @param sha - SHA-1 hash the branch points to
|
|
999
|
-
*
|
|
1000
|
-
* @example
|
|
1001
|
-
* ```typescript
|
|
1002
|
-
* await capture.onBranchCreated('feature-x', 'abc123...')
|
|
1003
|
-
* ```
|
|
1004
|
-
*/
|
|
1005
|
-
onBranchCreated(branchName: string, sha: string): Promise<void>;
|
|
1006
|
-
/**
|
|
1007
|
-
* Captures a branch deletion operation.
|
|
1008
|
-
*
|
|
1009
|
-
* @param branchName - Name of the deleted branch
|
|
1010
|
-
*
|
|
1011
|
-
* @example
|
|
1012
|
-
* ```typescript
|
|
1013
|
-
* await capture.onBranchDeleted('feature-x')
|
|
1014
|
-
* ```
|
|
1015
|
-
*/
|
|
1016
|
-
onBranchDeleted(branchName: string): Promise<void>;
|
|
1017
|
-
/**
|
|
1018
|
-
* Captures a tag creation operation.
|
|
1019
|
-
*
|
|
1020
|
-
* @param tagName - Name of the tag
|
|
1021
|
-
* @param sha - SHA-1 hash the tag points to
|
|
1022
|
-
*
|
|
1023
|
-
* @example
|
|
1024
|
-
* ```typescript
|
|
1025
|
-
* await capture.onTagCreated('v1.0.0', 'abc123...')
|
|
1026
|
-
* ```
|
|
1027
|
-
*/
|
|
1028
|
-
onTagCreated(tagName: string, sha: string): Promise<void>;
|
|
1029
|
-
/**
|
|
1030
|
-
* Captures a merge completion operation.
|
|
1031
|
-
*
|
|
1032
|
-
* @param mergeSha - SHA-1 hash of the merge commit
|
|
1033
|
-
* @param baseSha - SHA-1 hash of the base commit
|
|
1034
|
-
* @param headSha - SHA-1 hash of the head commit being merged
|
|
1035
|
-
*
|
|
1036
|
-
* @example
|
|
1037
|
-
* ```typescript
|
|
1038
|
-
* await capture.onMergeCompleted('merge123...', 'base456...', 'head789...')
|
|
1039
|
-
* ```
|
|
1040
|
-
*/
|
|
1041
|
-
onMergeCompleted(mergeSha: string, baseSha: string, headSha: string): Promise<void>;
|
|
1042
|
-
/**
|
|
1043
|
-
* Returns a copy of all buffered events.
|
|
1044
|
-
*
|
|
1045
|
-
* @returns Array of buffered events
|
|
1046
|
-
*/
|
|
1047
|
-
getEvents(): CDCEvent[];
|
|
1048
|
-
/**
|
|
1049
|
-
* Returns the current buffer size.
|
|
1050
|
-
*
|
|
1051
|
-
* @returns Number of events in the buffer
|
|
1052
|
-
*/
|
|
1053
|
-
getBufferSize(): number;
|
|
1054
|
-
/**
|
|
1055
|
-
* Flushes all buffered events.
|
|
1056
|
-
*
|
|
1057
|
-
* @description
|
|
1058
|
-
* Returns and clears all events from the buffer. The returned events
|
|
1059
|
-
* can be processed, serialized, or forwarded to downstream systems.
|
|
1060
|
-
*
|
|
1061
|
-
* @returns Array of flushed events
|
|
1062
|
-
*
|
|
1063
|
-
* @example
|
|
1064
|
-
* ```typescript
|
|
1065
|
-
* const events = await capture.flush()
|
|
1066
|
-
* console.log(`Flushed ${events.length} events`)
|
|
1067
|
-
* await sendToAnalytics(events)
|
|
1068
|
-
* ```
|
|
1069
|
-
*/
|
|
1070
|
-
flush(): Promise<CDCEvent[]>;
|
|
1071
|
-
/**
|
|
1072
|
-
* Adds an event listener.
|
|
1073
|
-
*
|
|
1074
|
-
* @description
|
|
1075
|
-
* Listeners are called synchronously for each event as it is captured.
|
|
1076
|
-
*
|
|
1077
|
-
* @param listener - Callback function to invoke for each event
|
|
1078
|
-
*
|
|
1079
|
-
* @example
|
|
1080
|
-
* ```typescript
|
|
1081
|
-
* capture.addListener((event) => {
|
|
1082
|
-
* console.log(`New event: ${event.type}`)
|
|
1083
|
-
* })
|
|
1084
|
-
* ```
|
|
1085
|
-
*/
|
|
1086
|
-
addListener(listener: GitOperationListener): void;
|
|
1087
|
-
/**
|
|
1088
|
-
* Removes an event listener.
|
|
1089
|
-
*
|
|
1090
|
-
* @param listener - The listener to remove
|
|
1091
|
-
*/
|
|
1092
|
-
removeListener(listener: GitOperationListener): void;
|
|
1093
|
-
}
|
|
1094
|
-
/**
|
|
1095
|
-
* Parquet schema definition for CDC events.
|
|
1096
|
-
*
|
|
1097
|
-
* @description
|
|
1098
|
-
* Defines the column structure for CDC event Parquet files. The default
|
|
1099
|
-
* schema includes standard CDC event fields and can be extended with
|
|
1100
|
-
* custom fields for domain-specific data.
|
|
1101
|
-
*
|
|
1102
|
-
* @example
|
|
1103
|
-
* ```typescript
|
|
1104
|
-
* // Create default schema
|
|
1105
|
-
* const schema = ParquetSchema.forCDCEvents()
|
|
1106
|
-
*
|
|
1107
|
-
* // Create schema with custom fields
|
|
1108
|
-
* const customSchema = ParquetSchema.forCDCEvents([
|
|
1109
|
-
* { name: 'repository_id', type: 'STRING', nullable: false },
|
|
1110
|
-
* { name: 'user_id', type: 'STRING', nullable: true }
|
|
1111
|
-
* ])
|
|
1112
|
-
* ```
|
|
1113
|
-
*
|
|
1114
|
-
* @class ParquetSchema
|
|
1115
|
-
*/
|
|
1116
|
-
export declare class ParquetSchema {
|
|
1117
|
-
readonly fields: ParquetField[];
|
|
1118
|
-
/**
|
|
1119
|
-
* Creates a new ParquetSchema.
|
|
1120
|
-
*
|
|
1121
|
-
* @param fields - Array of field definitions
|
|
1122
|
-
*/
|
|
1123
|
-
constructor(fields: ParquetField[]);
|
|
1124
|
-
/**
|
|
1125
|
-
* Creates a schema for CDC events with optional custom fields.
|
|
1126
|
-
*
|
|
1127
|
-
* @description
|
|
1128
|
-
* Returns a schema with the standard CDC event fields. Additional
|
|
1129
|
-
* custom fields can be appended for domain-specific data.
|
|
1130
|
-
*
|
|
1131
|
-
* @param customFields - Optional additional fields to add
|
|
1132
|
-
* @returns A new ParquetSchema instance
|
|
1133
|
-
*
|
|
1134
|
-
* @example
|
|
1135
|
-
* ```typescript
|
|
1136
|
-
* const schema = ParquetSchema.forCDCEvents()
|
|
1137
|
-
* // Schema includes: event_id, event_type, source, timestamp,
|
|
1138
|
-
* // sequence, version, payload_json, sha
|
|
1139
|
-
* ```
|
|
1140
|
-
*/
|
|
1141
|
-
static forCDCEvents(customFields?: ParquetField[]): ParquetSchema;
|
|
1142
|
-
}
|
|
1143
|
-
/**
|
|
1144
|
-
* Configuration options for the Parquet transformer.
|
|
1145
|
-
*/
|
|
1146
|
-
export interface ParquetTransformerOptions {
|
|
1147
|
-
/**
|
|
1148
|
-
* Compression algorithm to use.
|
|
1149
|
-
* @default 'snappy'
|
|
1150
|
-
*/
|
|
1151
|
-
compression?: 'snappy' | 'gzip' | 'none';
|
|
1152
|
-
}
|
|
1153
|
-
/**
|
|
1154
|
-
* Transforms CDC events to Parquet format.
|
|
1155
|
-
*
|
|
1156
|
-
* @description
|
|
1157
|
-
* ParquetTransformer converts CDC events to Parquet-compatible rows and
|
|
1158
|
-
* serializes batches of events to Parquet file format. It handles:
|
|
1159
|
-
*
|
|
1160
|
-
* - Event to row conversion (flattening the event structure)
|
|
1161
|
-
* - JSON serialization of complex payloads
|
|
1162
|
-
* - Batch creation with schema and metadata
|
|
1163
|
-
* - Parquet file generation with compression
|
|
1164
|
-
*
|
|
1165
|
-
* @example
|
|
1166
|
-
* ```typescript
|
|
1167
|
-
* const transformer = new ParquetTransformer({ compression: 'snappy' })
|
|
1168
|
-
*
|
|
1169
|
-
* // Transform single event to row
|
|
1170
|
-
* const row = transformer.eventToRow(event)
|
|
1171
|
-
*
|
|
1172
|
-
* // Transform batch of events
|
|
1173
|
-
* const batch = transformer.eventsToBatch(events)
|
|
1174
|
-
*
|
|
1175
|
-
* // Generate Parquet file
|
|
1176
|
-
* const buffer = await transformer.toParquetBuffer(batch)
|
|
1177
|
-
* await r2.put('events.parquet', buffer)
|
|
1178
|
-
* ```
|
|
1179
|
-
*
|
|
1180
|
-
* @class ParquetTransformer
|
|
1181
|
-
*/
|
|
1182
|
-
export declare class ParquetTransformer {
|
|
1183
|
-
/**
|
|
1184
|
-
* Compression algorithm to use.
|
|
1185
|
-
* @private
|
|
1186
|
-
*/
|
|
1187
|
-
private readonly compression;
|
|
1188
|
-
/**
|
|
1189
|
-
* Creates a new ParquetTransformer.
|
|
1190
|
-
*
|
|
1191
|
-
* @param options - Transformer configuration
|
|
1192
|
-
*/
|
|
1193
|
-
constructor(options?: ParquetTransformerOptions);
|
|
1194
|
-
/**
|
|
1195
|
-
* Converts a CDC event to a Parquet row.
|
|
1196
|
-
*
|
|
1197
|
-
* @description
|
|
1198
|
-
* Flattens the event structure and serializes the payload to JSON
|
|
1199
|
-
* for storage in Parquet format.
|
|
1200
|
-
*
|
|
1201
|
-
* @param event - The CDC event to convert
|
|
1202
|
-
* @returns A Parquet row representation
|
|
1203
|
-
*
|
|
1204
|
-
* @example
|
|
1205
|
-
* ```typescript
|
|
1206
|
-
* const row = transformer.eventToRow(event)
|
|
1207
|
-
* console.log(row.event_id, row.event_type, row.sha)
|
|
1208
|
-
* ```
|
|
1209
|
-
*/
|
|
1210
|
-
eventToRow(event: CDCEvent): ParquetRow;
|
|
1211
|
-
/**
|
|
1212
|
-
* Converts multiple CDC events to a Parquet batch.
|
|
1213
|
-
*
|
|
1214
|
-
* @description
|
|
1215
|
-
* Transforms an array of events into a ParquetBatch structure
|
|
1216
|
-
* ready for serialization to Parquet format.
|
|
1217
|
-
*
|
|
1218
|
-
* @param events - Array of CDC events to batch
|
|
1219
|
-
* @returns A ParquetBatch ready for serialization
|
|
1220
|
-
*
|
|
1221
|
-
* @example
|
|
1222
|
-
* ```typescript
|
|
1223
|
-
* const batch = transformer.eventsToBatch(events)
|
|
1224
|
-
* console.log(`Batch has ${batch.rowCount} rows`)
|
|
1225
|
-
* ```
|
|
1226
|
-
*/
|
|
1227
|
-
eventsToBatch(events: CDCEvent[]): ParquetBatch;
|
|
1228
|
-
/**
|
|
1229
|
-
* Serializes a ParquetBatch to a Parquet file buffer.
|
|
1230
|
-
*
|
|
1231
|
-
* @description
|
|
1232
|
-
* Generates a Parquet-format file from the batch data. The output
|
|
1233
|
-
* includes PAR1 magic bytes, compressed data, and footer metadata.
|
|
1234
|
-
*
|
|
1235
|
-
* @param batch - The ParquetBatch to serialize
|
|
1236
|
-
* @returns Promise resolving to Parquet file as Uint8Array
|
|
1237
|
-
*
|
|
1238
|
-
* @example
|
|
1239
|
-
* ```typescript
|
|
1240
|
-
* const buffer = await transformer.toParquetBuffer(batch)
|
|
1241
|
-
* await r2.put('events.parquet', buffer)
|
|
1242
|
-
* ```
|
|
1243
|
-
*/
|
|
1244
|
-
toParquetBuffer(batch: ParquetBatch): Promise<Uint8Array>;
|
|
1245
|
-
private gzipCompress;
|
|
1246
|
-
private simpleCompress;
|
|
1247
|
-
}
|
|
1248
|
-
/**
|
|
1249
|
-
* Callback function for batch processing.
|
|
1250
|
-
*
|
|
1251
|
-
* @param batch - The batch result containing events and metadata
|
|
1252
|
-
* @returns void or a Promise that resolves when processing is complete
|
|
1253
|
-
*/
|
|
1254
|
-
type BatchHandler = (batch: BatchResult) => void | Promise<void>;
|
|
1255
|
-
/**
|
|
1256
|
-
* Batches CDC events for efficient processing.
|
|
1257
|
-
*
|
|
1258
|
-
* @description
|
|
1259
|
-
* CDCBatcher collects CDC events and groups them into batches based on
|
|
1260
|
-
* count or time thresholds. This enables efficient downstream processing
|
|
1261
|
-
* by reducing the number of I/O operations and enabling bulk operations.
|
|
1262
|
-
*
|
|
1263
|
-
* **Batching Strategies:**
|
|
1264
|
-
* - **Count-based**: Flush when batch reaches `batchSize` events
|
|
1265
|
-
* - **Time-based**: Flush after `flushIntervalMs` even if batch is not full
|
|
1266
|
-
*
|
|
1267
|
-
* **Features:**
|
|
1268
|
-
* - Async batch handlers for non-blocking processing
|
|
1269
|
-
* - Multiple handlers for parallel processing pipelines
|
|
1270
|
-
* - Graceful stop with pending event flush
|
|
1271
|
-
* - Batch metadata (sequences, timestamps) for tracking
|
|
1272
|
-
*
|
|
1273
|
-
* @example
|
|
1274
|
-
* ```typescript
|
|
1275
|
-
* const batcher = new CDCBatcher({
|
|
1276
|
-
* batchSize: 100,
|
|
1277
|
-
* flushIntervalMs: 5000
|
|
1278
|
-
* })
|
|
1279
|
-
*
|
|
1280
|
-
* // Register batch handler
|
|
1281
|
-
* batcher.onBatch(async (batch) => {
|
|
1282
|
-
* console.log(`Processing ${batch.eventCount} events`)
|
|
1283
|
-
* console.log(`Sequence range: ${batch.minSequence} - ${batch.maxSequence}`)
|
|
1284
|
-
* await saveToStorage(batch.events)
|
|
1285
|
-
* })
|
|
1286
|
-
*
|
|
1287
|
-
* // Add events
|
|
1288
|
-
* await batcher.add(event1)
|
|
1289
|
-
* await batcher.add(event2)
|
|
1290
|
-
*
|
|
1291
|
-
* // Check pending events
|
|
1292
|
-
* console.log(`Pending: ${batcher.getPendingCount()}`)
|
|
1293
|
-
*
|
|
1294
|
-
* // Manual flush
|
|
1295
|
-
* const result = await batcher.flush()
|
|
1296
|
-
*
|
|
1297
|
-
* // Stop the batcher
|
|
1298
|
-
* await batcher.stop()
|
|
1299
|
-
* ```
|
|
1300
|
-
*
|
|
1301
|
-
* @class CDCBatcher
|
|
1302
|
-
*/
|
|
1303
|
-
export declare class CDCBatcher {
|
|
1304
|
-
/**
|
|
1305
|
-
* Batch configuration.
|
|
1306
|
-
* @private
|
|
1307
|
-
*/
|
|
1308
|
-
private readonly config;
|
|
1309
|
-
/**
|
|
1310
|
-
* Buffer of pending events.
|
|
1311
|
-
* @private
|
|
1312
|
-
*/
|
|
1313
|
-
private events;
|
|
1314
|
-
/**
|
|
1315
|
-
* Registered batch handlers.
|
|
1316
|
-
* @private
|
|
1317
|
-
*/
|
|
1318
|
-
private batchHandlers;
|
|
1319
|
-
/**
|
|
1320
|
-
* Timer for time-based flushing.
|
|
1321
|
-
* @private
|
|
1322
|
-
*/
|
|
1323
|
-
private flushTimer;
|
|
1324
|
-
/**
|
|
1325
|
-
* Whether the batcher has been stopped.
|
|
1326
|
-
* @private
|
|
1327
|
-
*/
|
|
1328
|
-
private stopped;
|
|
1329
|
-
/**
|
|
1330
|
-
* Creates a new CDCBatcher.
|
|
1331
|
-
*
|
|
1332
|
-
* @param config - Batch configuration
|
|
1333
|
-
*/
|
|
1334
|
-
constructor(config: BatchConfig);
|
|
1335
|
-
private ensureTimerRunning;
|
|
1336
|
-
private clearFlushTimer;
|
|
1337
|
-
/**
|
|
1338
|
-
* Adds an event to the batch.
|
|
1339
|
-
*
|
|
1340
|
-
* @description
|
|
1341
|
-
* Adds the event to the pending batch. If the batch reaches the
|
|
1342
|
-
* configured size, it is automatically flushed. The flush timer
|
|
1343
|
-
* is started/restarted as needed.
|
|
1344
|
-
*
|
|
1345
|
-
* @param event - The CDC event to add
|
|
1346
|
-
*
|
|
1347
|
-
* @example
|
|
1348
|
-
* ```typescript
|
|
1349
|
-
* await batcher.add(event)
|
|
1350
|
-
* ```
|
|
1351
|
-
*/
|
|
1352
|
-
add(event: CDCEvent): Promise<void>;
|
|
1353
|
-
/**
|
|
1354
|
-
* Internal flush implementation.
|
|
1355
|
-
* @private
|
|
1356
|
-
*/
|
|
1357
|
-
private flushInternal;
|
|
1358
|
-
/**
|
|
1359
|
-
* Manually flushes pending events.
|
|
1360
|
-
*
|
|
1361
|
-
* @description
|
|
1362
|
-
* Forces an immediate flush of all pending events, regardless of
|
|
1363
|
-
* batch size or timer. Clears the flush timer.
|
|
1364
|
-
*
|
|
1365
|
-
* @returns Promise resolving to the batch result
|
|
1366
|
-
*
|
|
1367
|
-
* @example
|
|
1368
|
-
* ```typescript
|
|
1369
|
-
* const result = await batcher.flush()
|
|
1370
|
-
* console.log(`Flushed ${result.eventCount} events`)
|
|
1371
|
-
* ```
|
|
1372
|
-
*/
|
|
1373
|
-
flush(): Promise<BatchResult>;
|
|
1374
|
-
/**
|
|
1375
|
-
* Returns the number of pending events.
|
|
1376
|
-
*
|
|
1377
|
-
* @returns Number of events waiting to be flushed
|
|
1378
|
-
*/
|
|
1379
|
-
getPendingCount(): number;
|
|
1380
|
-
/**
|
|
1381
|
-
* Registers a batch handler.
|
|
1382
|
-
*
|
|
1383
|
-
* @description
|
|
1384
|
-
* Handlers are called when a batch is flushed (automatically or manually).
|
|
1385
|
-
* Multiple handlers can be registered for parallel processing.
|
|
1386
|
-
*
|
|
1387
|
-
* @param handler - Callback function to invoke for each batch
|
|
1388
|
-
*
|
|
1389
|
-
* @example
|
|
1390
|
-
* ```typescript
|
|
1391
|
-
* batcher.onBatch(async (batch) => {
|
|
1392
|
-
* await saveToStorage(batch.events)
|
|
1393
|
-
* })
|
|
1394
|
-
* ```
|
|
1395
|
-
*/
|
|
1396
|
-
onBatch(handler: BatchHandler): void;
|
|
1397
|
-
/**
|
|
1398
|
-
* Stops the batcher.
|
|
1399
|
-
*
|
|
1400
|
-
* @description
|
|
1401
|
-
* Stops the flush timer and prevents further processing.
|
|
1402
|
-
* Does NOT automatically flush pending events - call flush() first
|
|
1403
|
-
* if you need to process remaining events.
|
|
1404
|
-
*
|
|
1405
|
-
* @example
|
|
1406
|
-
* ```typescript
|
|
1407
|
-
* await batcher.flush() // Process remaining events
|
|
1408
|
-
* await batcher.stop() // Stop the timer
|
|
1409
|
-
* ```
|
|
1410
|
-
*/
|
|
1411
|
-
stop(): Promise<void>;
|
|
1412
|
-
}
|
|
1413
|
-
/**
|
|
1414
|
-
* Callback for successful batch output.
|
|
1415
|
-
*
|
|
1416
|
-
* @param output - The pipeline output containing Parquet data
|
|
1417
|
-
*/
|
|
1418
|
-
type OutputHandler = (output: PipelineOutput) => void;
|
|
1419
|
-
/**
|
|
1420
|
-
* Callback for failed events sent to dead letter queue.
|
|
1421
|
-
*
|
|
1422
|
-
* @param events - Array of failed events
|
|
1423
|
-
* @param error - The error that caused the failure
|
|
1424
|
-
*/
|
|
1425
|
-
type DeadLetterHandler = (events: CDCEvent[], error: Error) => void;
|
|
1426
|
-
/**
|
|
1427
|
-
* Main CDC Pipeline for processing git operation events.
|
|
1428
|
-
*
|
|
1429
|
-
* @description
|
|
1430
|
-
* CDCPipeline orchestrates the complete change data capture flow from
|
|
1431
|
-
* event ingestion to Parquet output. It integrates batching, transformation,
|
|
1432
|
-
* retry handling, and dead letter queue management.
|
|
1433
|
-
*
|
|
1434
|
-
* **Pipeline Flow:**
|
|
1435
|
-
* 1. Events are submitted via `process()` or `processMany()`
|
|
1436
|
-
* 2. Events are validated and added to the batcher
|
|
1437
|
-
* 3. When a batch is ready, it's transformed to Parquet format
|
|
1438
|
-
* 4. On success, output handlers are notified
|
|
1439
|
-
* 5. On failure, retries are attempted with exponential backoff
|
|
1440
|
-
* 6. After max retries, events go to dead letter queue
|
|
1441
|
-
*
|
|
1442
|
-
* **Features:**
|
|
1443
|
-
* - Configurable batch size and flush interval
|
|
1444
|
-
* - Automatic retry with exponential backoff
|
|
1445
|
-
* - Dead letter queue for failed events
|
|
1446
|
-
* - Real-time metrics for monitoring
|
|
1447
|
-
* - Graceful shutdown with pending event flush
|
|
1448
|
-
*
|
|
1449
|
-
* @example
|
|
1450
|
-
* ```typescript
|
|
1451
|
-
* const pipeline = new CDCPipeline({
|
|
1452
|
-
* batchSize: 100,
|
|
1453
|
-
* flushIntervalMs: 5000,
|
|
1454
|
-
* maxRetries: 3,
|
|
1455
|
-
* parquetCompression: 'snappy',
|
|
1456
|
-
* outputPath: '/analytics',
|
|
1457
|
-
* schemaVersion: 1
|
|
1458
|
-
* })
|
|
1459
|
-
*
|
|
1460
|
-
* // Register handlers
|
|
1461
|
-
* pipeline.onOutput(async (output) => {
|
|
1462
|
-
* await r2.put(`cdc/${output.batchId}.parquet`, output.parquetBuffer)
|
|
1463
|
-
* })
|
|
1464
|
-
*
|
|
1465
|
-
* pipeline.onDeadLetter((events, error) => {
|
|
1466
|
-
* console.error(`Failed ${events.length} events:`, error)
|
|
1467
|
-
* })
|
|
1468
|
-
*
|
|
1469
|
-
* // Start the pipeline
|
|
1470
|
-
* await pipeline.start()
|
|
1471
|
-
*
|
|
1472
|
-
* // Process events
|
|
1473
|
-
* await pipeline.process(event)
|
|
1474
|
-
*
|
|
1475
|
-
* // Check metrics
|
|
1476
|
-
* const metrics = pipeline.getMetrics()
|
|
1477
|
-
*
|
|
1478
|
-
* // Stop gracefully
|
|
1479
|
-
* const result = await pipeline.stop()
|
|
1480
|
-
* console.log(`Flushed ${result.flushedCount} events on shutdown`)
|
|
1481
|
-
* ```
|
|
1482
|
-
*
|
|
1483
|
-
* @class CDCPipeline
|
|
1484
|
-
*/
|
|
1485
|
-
export declare class CDCPipeline {
|
|
1486
|
-
/**
|
|
1487
|
-
* Pipeline configuration.
|
|
1488
|
-
* @private
|
|
1489
|
-
*/
|
|
1490
|
-
private readonly config;
|
|
1491
|
-
/**
|
|
1492
|
-
* Current pipeline state.
|
|
1493
|
-
* @private
|
|
1494
|
-
*/
|
|
1495
|
-
private state;
|
|
1496
|
-
/**
|
|
1497
|
-
* Event batcher instance.
|
|
1498
|
-
* @private
|
|
1499
|
-
*/
|
|
1500
|
-
private batcher;
|
|
1501
|
-
/**
|
|
1502
|
-
* Parquet transformer instance.
|
|
1503
|
-
* @private
|
|
1504
|
-
*/
|
|
1505
|
-
private transformer;
|
|
1506
|
-
/**
|
|
1507
|
-
* Registered output handlers.
|
|
1508
|
-
* @private
|
|
1509
|
-
*/
|
|
1510
|
-
private outputHandlers;
|
|
1511
|
-
/**
|
|
1512
|
-
* Registered dead letter handlers.
|
|
1513
|
-
* @private
|
|
1514
|
-
*/
|
|
1515
|
-
private deadLetterHandlers;
|
|
1516
|
-
/**
|
|
1517
|
-
* Pipeline metrics.
|
|
1518
|
-
* @private
|
|
1519
|
-
*/
|
|
1520
|
-
private metrics;
|
|
1521
|
-
/**
|
|
1522
|
-
* Processing latency samples.
|
|
1523
|
-
* @private
|
|
1524
|
-
*/
|
|
1525
|
-
private processingLatencies;
|
|
1526
|
-
/**
|
|
1527
|
-
* Retry policy instance.
|
|
1528
|
-
* @private
|
|
1529
|
-
*/
|
|
1530
|
-
private retryPolicy;
|
|
1531
|
-
/**
|
|
1532
|
-
* Creates a new CDCPipeline.
|
|
1533
|
-
*
|
|
1534
|
-
* @param config - Pipeline configuration
|
|
1535
|
-
*/
|
|
1536
|
-
constructor(config: CDCPipelineConfig);
|
|
1537
|
-
/**
|
|
1538
|
-
* Returns the current pipeline state.
|
|
1539
|
-
*
|
|
1540
|
-
* @returns Current state ('stopped', 'running', or 'paused')
|
|
1541
|
-
*/
|
|
1542
|
-
getState(): CDCPipelineState;
|
|
1543
|
-
/**
|
|
1544
|
-
* Starts the pipeline.
|
|
1545
|
-
*
|
|
1546
|
-
* @description
|
|
1547
|
-
* Initializes the batcher and begins accepting events. If already
|
|
1548
|
-
* running, this method is a no-op.
|
|
1549
|
-
*
|
|
1550
|
-
* @example
|
|
1551
|
-
* ```typescript
|
|
1552
|
-
* await pipeline.start()
|
|
1553
|
-
* console.log(pipeline.getState()) // 'running'
|
|
1554
|
-
* ```
|
|
1555
|
-
*/
|
|
1556
|
-
start(): Promise<void>;
|
|
1557
|
-
/**
|
|
1558
|
-
* Stops the pipeline.
|
|
1559
|
-
*
|
|
1560
|
-
* @description
|
|
1561
|
-
* Flushes any pending events, stops the batcher, and sets state to stopped.
|
|
1562
|
-
* Returns information about events flushed during shutdown.
|
|
1563
|
-
*
|
|
1564
|
-
* @returns Promise resolving to stop result with flushed event count
|
|
1565
|
-
*
|
|
1566
|
-
* @example
|
|
1567
|
-
* ```typescript
|
|
1568
|
-
* const result = await pipeline.stop()
|
|
1569
|
-
* console.log(`Flushed ${result.flushedCount} events on shutdown`)
|
|
1570
|
-
* ```
|
|
1571
|
-
*/
|
|
1572
|
-
stop(): Promise<StopResult>;
|
|
1573
|
-
/**
|
|
1574
|
-
* Processes a single event.
|
|
1575
|
-
*
|
|
1576
|
-
* @description
|
|
1577
|
-
* Validates the event and adds it to the batcher for processing.
|
|
1578
|
-
* Updates metrics including latency tracking.
|
|
1579
|
-
*
|
|
1580
|
-
* @param event - The CDC event to process
|
|
1581
|
-
* @returns Promise resolving to process result
|
|
1582
|
-
*
|
|
1583
|
-
* @throws {CDCError} PROCESSING_ERROR - If pipeline is not running
|
|
1584
|
-
* @throws {CDCError} VALIDATION_ERROR - If event fails validation
|
|
1585
|
-
*
|
|
1586
|
-
* @example
|
|
1587
|
-
* ```typescript
|
|
1588
|
-
* const result = await pipeline.process(event)
|
|
1589
|
-
* if (result.success) {
|
|
1590
|
-
* console.log(`Processed event: ${result.eventId}`)
|
|
1591
|
-
* }
|
|
1592
|
-
* ```
|
|
1593
|
-
*/
|
|
1594
|
-
process(event: CDCEvent): Promise<ProcessResult>;
|
|
1595
|
-
/**
|
|
1596
|
-
* Processes multiple events.
|
|
1597
|
-
*
|
|
1598
|
-
* @description
|
|
1599
|
-
* Convenience method to process an array of events sequentially.
|
|
1600
|
-
*
|
|
1601
|
-
* @param events - Array of CDC events to process
|
|
1602
|
-
* @returns Promise resolving to array of process results
|
|
1603
|
-
*
|
|
1604
|
-
* @example
|
|
1605
|
-
* ```typescript
|
|
1606
|
-
* const results = await pipeline.processMany(events)
|
|
1607
|
-
* const successCount = results.filter(r => r.success).length
|
|
1608
|
-
* console.log(`Processed ${successCount}/${events.length} events`)
|
|
1609
|
-
* ```
|
|
1610
|
-
*/
|
|
1611
|
-
processMany(events: CDCEvent[]): Promise<ProcessResult[]>;
|
|
1612
|
-
/**
|
|
1613
|
-
* Manually flushes pending events.
|
|
1614
|
-
*
|
|
1615
|
-
* @description
|
|
1616
|
-
* Forces an immediate flush of the batcher and processes the
|
|
1617
|
-
* resulting batch through the pipeline.
|
|
1618
|
-
*
|
|
1619
|
-
* @example
|
|
1620
|
-
* ```typescript
|
|
1621
|
-
* await pipeline.flush()
|
|
1622
|
-
* console.log('All pending events flushed')
|
|
1623
|
-
* ```
|
|
1624
|
-
*/
|
|
1625
|
-
flush(): Promise<void>;
|
|
1626
|
-
/**
|
|
1627
|
-
* Handles a batch of events with retry logic.
|
|
1628
|
-
* @private
|
|
1629
|
-
*/
|
|
1630
|
-
private handleBatch;
|
|
1631
|
-
/**
|
|
1632
|
-
* Sleeps for the specified duration.
|
|
1633
|
-
* @private
|
|
1634
|
-
*/
|
|
1635
|
-
private sleep;
|
|
1636
|
-
/**
|
|
1637
|
-
* Updates the average latency metric.
|
|
1638
|
-
* @private
|
|
1639
|
-
*/
|
|
1640
|
-
private updateAvgLatency;
|
|
1641
|
-
/**
|
|
1642
|
-
* Returns current pipeline metrics.
|
|
1643
|
-
*
|
|
1644
|
-
* @description
|
|
1645
|
-
* Returns a copy of the current metrics. Metrics are cumulative
|
|
1646
|
-
* since pipeline creation.
|
|
1647
|
-
*
|
|
1648
|
-
* @returns Copy of current pipeline metrics
|
|
1649
|
-
*
|
|
1650
|
-
* @example
|
|
1651
|
-
* ```typescript
|
|
1652
|
-
* const metrics = pipeline.getMetrics()
|
|
1653
|
-
* console.log(`Processed: ${metrics.eventsProcessed}`)
|
|
1654
|
-
* console.log(`Batches: ${metrics.batchesGenerated}`)
|
|
1655
|
-
* console.log(`Errors: ${metrics.errors}`)
|
|
1656
|
-
* console.log(`Avg latency: ${metrics.avgProcessingLatencyMs}ms`)
|
|
1657
|
-
* ```
|
|
1658
|
-
*/
|
|
1659
|
-
getMetrics(): PipelineMetrics;
|
|
1660
|
-
/**
|
|
1661
|
-
* Registers an output handler.
|
|
1662
|
-
*
|
|
1663
|
-
* @description
|
|
1664
|
-
* Output handlers are called when a batch is successfully processed
|
|
1665
|
-
* and converted to Parquet format. Multiple handlers can be registered.
|
|
1666
|
-
*
|
|
1667
|
-
* @param handler - Callback to invoke for each successful batch
|
|
1668
|
-
*
|
|
1669
|
-
* @example
|
|
1670
|
-
* ```typescript
|
|
1671
|
-
* pipeline.onOutput(async (output) => {
|
|
1672
|
-
* await r2.put(`cdc/${output.batchId}.parquet`, output.parquetBuffer)
|
|
1673
|
-
* console.log(`Wrote ${output.events.length} events`)
|
|
1674
|
-
* })
|
|
1675
|
-
* ```
|
|
1676
|
-
*/
|
|
1677
|
-
onOutput(handler: OutputHandler): void;
|
|
1678
|
-
/**
|
|
1679
|
-
* Registers a dead letter handler.
|
|
1680
|
-
*
|
|
1681
|
-
* @description
|
|
1682
|
-
* Dead letter handlers are called when a batch fails after all
|
|
1683
|
-
* retry attempts are exhausted. Use this for alerting, logging,
|
|
1684
|
-
* or storing failed events for later reprocessing.
|
|
1685
|
-
*
|
|
1686
|
-
* @param handler - Callback to invoke for failed events
|
|
1687
|
-
*
|
|
1688
|
-
* @example
|
|
1689
|
-
* ```typescript
|
|
1690
|
-
* pipeline.onDeadLetter((events, error) => {
|
|
1691
|
-
* console.error(`Failed to process ${events.length} events:`, error)
|
|
1692
|
-
* // Store in dead letter queue for later retry
|
|
1693
|
-
* await dlq.put(events)
|
|
1694
|
-
* })
|
|
1695
|
-
* ```
|
|
1696
|
-
*/
|
|
1697
|
-
onDeadLetter(handler: DeadLetterHandler): void;
|
|
1698
|
-
}
|
|
1699
|
-
/**
|
|
1700
|
-
* Creates a new CDC event.
|
|
1701
|
-
*
|
|
1702
|
-
* @description
|
|
1703
|
-
* Factory function to create a properly structured CDC event with
|
|
1704
|
-
* automatically generated ID and timestamp.
|
|
1705
|
-
*
|
|
1706
|
-
* @param type - The event type
|
|
1707
|
-
* @param source - The event source
|
|
1708
|
-
* @param payload - Event payload data
|
|
1709
|
-
* @param options - Optional configuration
|
|
1710
|
-
* @param options.sequence - Custom sequence number (default: 0)
|
|
1711
|
-
* @returns A new CDCEvent
|
|
1712
|
-
*
|
|
1713
|
-
* @example
|
|
1714
|
-
* ```typescript
|
|
1715
|
-
* const event = createCDCEvent('COMMIT_CREATED', 'push', {
|
|
1716
|
-
* operation: 'commit-create',
|
|
1717
|
-
* sha: 'abc123...',
|
|
1718
|
-
* treeSha: 'def456...',
|
|
1719
|
-
* parentShas: ['parent1...']
|
|
1720
|
-
* })
|
|
1721
|
-
*
|
|
1722
|
-
* // With sequence number
|
|
1723
|
-
* const sequencedEvent = createCDCEvent('REF_UPDATED', 'push', {
|
|
1724
|
-
* operation: 'ref-update',
|
|
1725
|
-
* refName: 'refs/heads/main',
|
|
1726
|
-
* oldSha: 'old...',
|
|
1727
|
-
* newSha: 'new...'
|
|
1728
|
-
* }, { sequence: 42 })
|
|
1729
|
-
* ```
|
|
1730
|
-
*/
|
|
1731
|
-
export declare function createCDCEvent(type: CDCEventType, source: CDCEventSource, payload: CDCEventPayload, options?: {
|
|
1732
|
-
sequence?: number;
|
|
1733
|
-
}): CDCEvent;
|
|
1734
|
-
/**
|
|
1735
|
-
* Serializes a CDC event to bytes.
|
|
1736
|
-
*
|
|
1737
|
-
* @description
|
|
1738
|
-
* Converts a CDCEvent to a JSON-encoded Uint8Array for storage or
|
|
1739
|
-
* transmission. Handles Uint8Array payload data by converting to arrays.
|
|
1740
|
-
*
|
|
1741
|
-
* @param event - The CDC event to serialize
|
|
1742
|
-
* @returns The serialized event as a Uint8Array
|
|
1743
|
-
*
|
|
1744
|
-
* @example
|
|
1745
|
-
* ```typescript
|
|
1746
|
-
* const bytes = serializeEvent(event)
|
|
1747
|
-
* await r2.put(`events/${event.id}`, bytes)
|
|
1748
|
-
* ```
|
|
1749
|
-
*
|
|
1750
|
-
* @see {@link deserializeEvent} - Reverse operation
|
|
1751
|
-
*/
|
|
1752
|
-
export declare function serializeEvent(event: CDCEvent): Uint8Array;
|
|
1753
|
-
/**
|
|
1754
|
-
* Deserializes bytes to a CDC event.
|
|
1755
|
-
*
|
|
1756
|
-
* @description
|
|
1757
|
-
* Reconstructs a CDCEvent from JSON-encoded bytes. Handles Uint8Array
|
|
1758
|
-
* restoration for payload data that was converted to arrays during
|
|
1759
|
-
* serialization.
|
|
1760
|
-
*
|
|
1761
|
-
* @param bytes - The serialized event bytes
|
|
1762
|
-
* @returns The deserialized CDCEvent
|
|
1763
|
-
*
|
|
1764
|
-
* @example
|
|
1765
|
-
* ```typescript
|
|
1766
|
-
* const bytes = await r2.get(`events/${eventId}`)
|
|
1767
|
-
* const event = deserializeEvent(bytes)
|
|
1768
|
-
* console.log(`Event type: ${event.type}`)
|
|
1769
|
-
* ```
|
|
1770
|
-
*
|
|
1771
|
-
* @see {@link serializeEvent} - Reverse operation
|
|
1772
|
-
*/
|
|
1773
|
-
export declare function deserializeEvent(bytes: Uint8Array): CDCEvent;
|
|
1774
|
-
/**
|
|
1775
|
-
* Validates a CDC event.
|
|
1776
|
-
*
|
|
1777
|
-
* @description
|
|
1778
|
-
* Checks that an event has all required fields and valid values.
|
|
1779
|
-
* Throws a CDCError if validation fails.
|
|
1780
|
-
*
|
|
1781
|
-
* **Validation Rules:**
|
|
1782
|
-
* - Event must not be null/undefined
|
|
1783
|
-
* - Event ID must be a non-empty string
|
|
1784
|
-
* - Event type must be a valid CDCEventType
|
|
1785
|
-
* - Timestamp must be a non-negative number
|
|
1786
|
-
* - Sequence must be a non-negative number
|
|
1787
|
-
*
|
|
1788
|
-
* @param event - The CDC event to validate
|
|
1789
|
-
* @returns The validated event (for chaining)
|
|
1790
|
-
*
|
|
1791
|
-
* @throws {CDCError} VALIDATION_ERROR - If validation fails
|
|
1792
|
-
*
|
|
1793
|
-
* @example
|
|
1794
|
-
* ```typescript
|
|
1795
|
-
* try {
|
|
1796
|
-
* validateCDCEvent(event)
|
|
1797
|
-
* // Event is valid
|
|
1798
|
-
* } catch (error) {
|
|
1799
|
-
* if (error instanceof CDCError) {
|
|
1800
|
-
* console.log(`Invalid: ${error.message}`)
|
|
1801
|
-
* }
|
|
1802
|
-
* }
|
|
1803
|
-
* ```
|
|
1804
|
-
*/
|
|
1805
|
-
export declare function validateCDCEvent(event: CDCEvent): CDCEvent;
|
|
1806
|
-
/**
|
|
1807
|
-
* Starts a new pipeline with the given configuration.
|
|
1808
|
-
*
|
|
1809
|
-
* @description
|
|
1810
|
-
* Creates and starts a new CDCPipeline, registering it by ID for
|
|
1811
|
-
* later access. If a pipeline with the same ID already exists,
|
|
1812
|
-
* it will be replaced (the old pipeline is not automatically stopped).
|
|
1813
|
-
*
|
|
1814
|
-
* @param id - Unique identifier for the pipeline
|
|
1815
|
-
* @param config - Pipeline configuration
|
|
1816
|
-
* @returns The started pipeline instance
|
|
1817
|
-
*
|
|
1818
|
-
* @example
|
|
1819
|
-
* ```typescript
|
|
1820
|
-
* const pipeline = startPipeline('main', {
|
|
1821
|
-
* batchSize: 100,
|
|
1822
|
-
* flushIntervalMs: 5000,
|
|
1823
|
-
* maxRetries: 3,
|
|
1824
|
-
* parquetCompression: 'snappy',
|
|
1825
|
-
* outputPath: '/analytics',
|
|
1826
|
-
* schemaVersion: 1
|
|
1827
|
-
* })
|
|
1828
|
-
*
|
|
1829
|
-
* // Register handlers
|
|
1830
|
-
* pipeline.onOutput((output) => console.log(`Batch: ${output.batchId}`))
|
|
1831
|
-
* ```
|
|
1832
|
-
*/
|
|
1833
|
-
export declare function startPipeline(id: string, config: CDCPipelineConfig): CDCPipeline;
|
|
1834
|
-
/**
|
|
1835
|
-
* Stops a pipeline by ID.
|
|
1836
|
-
*
|
|
1837
|
-
* @description
|
|
1838
|
-
* Stops the pipeline identified by the given ID, flushing any pending
|
|
1839
|
-
* events and removing it from the registry.
|
|
1840
|
-
*
|
|
1841
|
-
* @param id - Pipeline identifier
|
|
1842
|
-
* @returns Promise resolving to stop result (0 if pipeline not found)
|
|
1843
|
-
*
|
|
1844
|
-
* @example
|
|
1845
|
-
* ```typescript
|
|
1846
|
-
* const result = await stopPipeline('main')
|
|
1847
|
-
* console.log(`Flushed ${result.flushedCount} events on shutdown`)
|
|
1848
|
-
* ```
|
|
1849
|
-
*/
|
|
1850
|
-
export declare function stopPipeline(id: string): Promise<StopResult>;
|
|
1851
|
-
/**
|
|
1852
|
-
* Flushes a pipeline by ID.
|
|
1853
|
-
*
|
|
1854
|
-
* @description
|
|
1855
|
-
* Forces an immediate flush of all pending events in the pipeline.
|
|
1856
|
-
* No-op if pipeline not found.
|
|
1857
|
-
*
|
|
1858
|
-
* @param id - Pipeline identifier
|
|
1859
|
-
*
|
|
1860
|
-
* @example
|
|
1861
|
-
* ```typescript
|
|
1862
|
-
* await flushPipeline('main')
|
|
1863
|
-
* console.log('All pending events flushed')
|
|
1864
|
-
* ```
|
|
1865
|
-
*/
|
|
1866
|
-
export declare function flushPipeline(id: string): Promise<void>;
|
|
1867
|
-
/**
|
|
1868
|
-
* Gets metrics for a pipeline by ID.
|
|
1869
|
-
*
|
|
1870
|
-
* @description
|
|
1871
|
-
* Returns a copy of the current metrics for the specified pipeline.
|
|
1872
|
-
* Returns null if the pipeline is not found.
|
|
1873
|
-
*
|
|
1874
|
-
* @param id - Pipeline identifier
|
|
1875
|
-
* @returns Pipeline metrics or null if not found
|
|
1876
|
-
*
|
|
1877
|
-
* @example
|
|
1878
|
-
* ```typescript
|
|
1879
|
-
* const metrics = getPipelineMetrics('main')
|
|
1880
|
-
* if (metrics) {
|
|
1881
|
-
* console.log(`Events processed: ${metrics.eventsProcessed}`)
|
|
1882
|
-
* console.log(`Errors: ${metrics.errors}`)
|
|
1883
|
-
* }
|
|
1884
|
-
* ```
|
|
1885
|
-
*/
|
|
1886
|
-
export declare function getPipelineMetrics(id: string): PipelineMetrics | null;
|
|
1887
|
-
export {};
|
|
1888
|
-
//# sourceMappingURL=cdc-pipeline.d.ts.map
|