@pattern-stack/codegen 0.12.2 → 0.13.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +66 -0
- package/README.md +44 -0
- package/dist/runtime/subsystems/index.d.ts +6 -2
- package/dist/runtime/subsystems/index.js +174 -1
- package/dist/runtime/subsystems/index.js.map +1 -1
- package/dist/runtime/subsystems/integration/detection-config.schema.d.ts +110 -1
- package/dist/runtime/subsystems/integration/detection-config.schema.js +25 -2
- package/dist/runtime/subsystems/integration/detection-config.schema.js.map +1 -1
- package/dist/runtime/subsystems/integration/incremental-read.d.ts +248 -0
- package/dist/runtime/subsystems/integration/incremental-read.js +149 -0
- package/dist/runtime/subsystems/integration/incremental-read.js.map +1 -0
- package/dist/runtime/subsystems/integration/index.d.ts +2 -1
- package/dist/runtime/subsystems/integration/index.js +172 -2
- package/dist/runtime/subsystems/integration/index.js.map +1 -1
- package/dist/runtime/subsystems/jobs/job-worker.module.d.ts +1 -1
- package/dist/src/cli/index.js +642 -35
- package/dist/src/cli/index.js.map +1 -1
- package/dist/src/index.d.ts +78 -0
- package/dist/src/index.js +11 -1
- package/dist/src/index.js.map +1 -1
- package/package.json +1 -1
- package/runtime/subsystems/index.ts +35 -0
- package/runtime/subsystems/integration/detection-config.schema.ts +55 -0
- package/runtime/subsystems/integration/incremental-read.ts +379 -0
- package/runtime/subsystems/integration/index.ts +16 -0
|
@@ -102,8 +102,45 @@ declare const CursorStrategySchema: z.ZodDiscriminatedUnion<"kind", [z.ZodObject
|
|
|
102
102
|
}, {
|
|
103
103
|
field: string;
|
|
104
104
|
kind: "eventId";
|
|
105
|
+
}>, z.ZodObject<{
|
|
106
|
+
kind: z.ZodLiteral<"historyId">;
|
|
107
|
+
field: z.ZodString;
|
|
108
|
+
}, "strip", z.ZodTypeAny, {
|
|
109
|
+
field: string;
|
|
110
|
+
kind: "historyId";
|
|
111
|
+
}, {
|
|
112
|
+
field: string;
|
|
113
|
+
kind: "historyId";
|
|
114
|
+
}>, z.ZodObject<{
|
|
115
|
+
kind: z.ZodLiteral<"syncToken">;
|
|
116
|
+
field: z.ZodString;
|
|
117
|
+
}, "strip", z.ZodTypeAny, {
|
|
118
|
+
field: string;
|
|
119
|
+
kind: "syncToken";
|
|
120
|
+
}, {
|
|
121
|
+
field: string;
|
|
122
|
+
kind: "syncToken";
|
|
105
123
|
}>]>;
|
|
106
124
|
type CursorStrategy = z.infer<typeof CursorStrategySchema>;
|
|
125
|
+
/**
|
|
126
|
+
* Whether a cursor strategy is *divisible* — a property of the strategy, not
|
|
127
|
+
* the read primitive. Divisible cursors are sortable/monotonic watermarks whose
|
|
128
|
+
* value is meaningful AS OF any single record (HubSpot `systemModstamp`, a
|
|
129
|
+
* `timestamp` field, a Salesforce CDC `replayId`); the read primitive may
|
|
130
|
+
* checkpoint per-ref mid-walk, so a crash resumes from the last delivered ref.
|
|
131
|
+
*
|
|
132
|
+
* Atomic cursors are opaque vendor tokens (Gmail `historyId`, Calendar
|
|
133
|
+
* `syncToken`, a generic `eventId`) whose next value only exists at end-of-walk.
|
|
134
|
+
* The primitive must withhold per-ref cursors and emit the token only at a safe
|
|
135
|
+
* boundary, so an interrupted run never persists an unresumable mid-walk token
|
|
136
|
+
* (it resumes all-or-nothing from the prior token — see `IncrementalReadBase`).
|
|
137
|
+
*
|
|
138
|
+
* `eventId` is classified atomic conservatively: a generic opaque id is treated
|
|
139
|
+
* all-or-nothing unless a concrete strategy proves it monotonically resumable.
|
|
140
|
+
*/
|
|
141
|
+
declare const CURSOR_DIVISIBILITY: Readonly<Record<CursorStrategy['kind'], boolean>>;
|
|
142
|
+
/** Predicate form of {@link CURSOR_DIVISIBILITY}. */
|
|
143
|
+
declare function isDivisibleCursor(kind: CursorStrategy['kind']): boolean;
|
|
107
144
|
/**
|
|
108
145
|
* Poll-mode block. `provenance: 'cdc'` opts the poll primitive into stamping
|
|
109
146
|
* `Change<T>.source = 'cdc'` and populating `dedupKey` from the cursor's
|
|
@@ -146,6 +183,24 @@ declare const PollDetectionSchema: z.ZodObject<{
|
|
|
146
183
|
}, {
|
|
147
184
|
field: string;
|
|
148
185
|
kind: "eventId";
|
|
186
|
+
}>, z.ZodObject<{
|
|
187
|
+
kind: z.ZodLiteral<"historyId">;
|
|
188
|
+
field: z.ZodString;
|
|
189
|
+
}, "strip", z.ZodTypeAny, {
|
|
190
|
+
field: string;
|
|
191
|
+
kind: "historyId";
|
|
192
|
+
}, {
|
|
193
|
+
field: string;
|
|
194
|
+
kind: "historyId";
|
|
195
|
+
}>, z.ZodObject<{
|
|
196
|
+
kind: z.ZodLiteral<"syncToken">;
|
|
197
|
+
field: z.ZodString;
|
|
198
|
+
}, "strip", z.ZodTypeAny, {
|
|
199
|
+
field: string;
|
|
200
|
+
kind: "syncToken";
|
|
201
|
+
}, {
|
|
202
|
+
field: string;
|
|
203
|
+
kind: "syncToken";
|
|
149
204
|
}>]>;
|
|
150
205
|
provenance: z.ZodOptional<z.ZodEnum<["poll", "cdc"]>>;
|
|
151
206
|
}, "strip", z.ZodTypeAny, {
|
|
@@ -161,6 +216,12 @@ declare const PollDetectionSchema: z.ZodObject<{
|
|
|
161
216
|
} | {
|
|
162
217
|
field: string;
|
|
163
218
|
kind: "eventId";
|
|
219
|
+
} | {
|
|
220
|
+
field: string;
|
|
221
|
+
kind: "historyId";
|
|
222
|
+
} | {
|
|
223
|
+
field: string;
|
|
224
|
+
kind: "syncToken";
|
|
164
225
|
};
|
|
165
226
|
provenance?: "poll" | "cdc" | undefined;
|
|
166
227
|
}, {
|
|
@@ -176,6 +237,12 @@ declare const PollDetectionSchema: z.ZodObject<{
|
|
|
176
237
|
} | {
|
|
177
238
|
field: string;
|
|
178
239
|
kind: "eventId";
|
|
240
|
+
} | {
|
|
241
|
+
field: string;
|
|
242
|
+
kind: "historyId";
|
|
243
|
+
} | {
|
|
244
|
+
field: string;
|
|
245
|
+
kind: "syncToken";
|
|
179
246
|
};
|
|
180
247
|
provenance?: "poll" | "cdc" | undefined;
|
|
181
248
|
}>;
|
|
@@ -239,6 +306,24 @@ declare const DetectionConfigSchema: z.ZodDiscriminatedUnion<"mode", [z.ZodObjec
|
|
|
239
306
|
}, {
|
|
240
307
|
field: string;
|
|
241
308
|
kind: "eventId";
|
|
309
|
+
}>, z.ZodObject<{
|
|
310
|
+
kind: z.ZodLiteral<"historyId">;
|
|
311
|
+
field: z.ZodString;
|
|
312
|
+
}, "strip", z.ZodTypeAny, {
|
|
313
|
+
field: string;
|
|
314
|
+
kind: "historyId";
|
|
315
|
+
}, {
|
|
316
|
+
field: string;
|
|
317
|
+
kind: "historyId";
|
|
318
|
+
}>, z.ZodObject<{
|
|
319
|
+
kind: z.ZodLiteral<"syncToken">;
|
|
320
|
+
field: z.ZodString;
|
|
321
|
+
}, "strip", z.ZodTypeAny, {
|
|
322
|
+
field: string;
|
|
323
|
+
kind: "syncToken";
|
|
324
|
+
}, {
|
|
325
|
+
field: string;
|
|
326
|
+
kind: "syncToken";
|
|
242
327
|
}>]>;
|
|
243
328
|
provenance: z.ZodOptional<z.ZodEnum<["poll", "cdc"]>>;
|
|
244
329
|
}, "strip", z.ZodTypeAny, {
|
|
@@ -254,6 +339,12 @@ declare const DetectionConfigSchema: z.ZodDiscriminatedUnion<"mode", [z.ZodObjec
|
|
|
254
339
|
} | {
|
|
255
340
|
field: string;
|
|
256
341
|
kind: "eventId";
|
|
342
|
+
} | {
|
|
343
|
+
field: string;
|
|
344
|
+
kind: "historyId";
|
|
345
|
+
} | {
|
|
346
|
+
field: string;
|
|
347
|
+
kind: "syncToken";
|
|
257
348
|
};
|
|
258
349
|
provenance?: "poll" | "cdc" | undefined;
|
|
259
350
|
}, {
|
|
@@ -269,6 +360,12 @@ declare const DetectionConfigSchema: z.ZodDiscriminatedUnion<"mode", [z.ZodObjec
|
|
|
269
360
|
} | {
|
|
270
361
|
field: string;
|
|
271
362
|
kind: "eventId";
|
|
363
|
+
} | {
|
|
364
|
+
field: string;
|
|
365
|
+
kind: "historyId";
|
|
366
|
+
} | {
|
|
367
|
+
field: string;
|
|
368
|
+
kind: "syncToken";
|
|
272
369
|
};
|
|
273
370
|
provenance?: "poll" | "cdc" | undefined;
|
|
274
371
|
}>;
|
|
@@ -312,6 +409,12 @@ declare const DetectionConfigSchema: z.ZodDiscriminatedUnion<"mode", [z.ZodObjec
|
|
|
312
409
|
} | {
|
|
313
410
|
field: string;
|
|
314
411
|
kind: "eventId";
|
|
412
|
+
} | {
|
|
413
|
+
field: string;
|
|
414
|
+
kind: "historyId";
|
|
415
|
+
} | {
|
|
416
|
+
field: string;
|
|
417
|
+
kind: "syncToken";
|
|
315
418
|
};
|
|
316
419
|
provenance?: "poll" | "cdc" | undefined;
|
|
317
420
|
};
|
|
@@ -340,6 +443,12 @@ declare const DetectionConfigSchema: z.ZodDiscriminatedUnion<"mode", [z.ZodObjec
|
|
|
340
443
|
} | {
|
|
341
444
|
field: string;
|
|
342
445
|
kind: "eventId";
|
|
446
|
+
} | {
|
|
447
|
+
field: string;
|
|
448
|
+
kind: "historyId";
|
|
449
|
+
} | {
|
|
450
|
+
field: string;
|
|
451
|
+
kind: "syncToken";
|
|
343
452
|
};
|
|
344
453
|
provenance?: "poll" | "cdc" | undefined;
|
|
345
454
|
};
|
|
@@ -422,4 +531,4 @@ declare const DetectionConfigSchema: z.ZodDiscriminatedUnion<"mode", [z.ZodObjec
|
|
|
422
531
|
}>]>;
|
|
423
532
|
type DetectionConfig = z.infer<typeof DetectionConfigSchema>;
|
|
424
533
|
|
|
425
|
-
export { type CursorStrategy, CursorStrategySchema, type DetectionConfig, DetectionConfigSchema, type FieldMapping, FieldMappingSchema, type PollDetection, PollDetectionSchema, type ResolvedFilter, ResolvedFilterSchema, type WebhookDetection, WebhookDetectionSchema };
|
|
534
|
+
export { CURSOR_DIVISIBILITY, type CursorStrategy, CursorStrategySchema, type DetectionConfig, DetectionConfigSchema, type FieldMapping, FieldMappingSchema, type PollDetection, PollDetectionSchema, type ResolvedFilter, ResolvedFilterSchema, type WebhookDetection, WebhookDetectionSchema, isDivisibleCursor };
|
|
@@ -26,12 +26,33 @@ var EventIdCursorSchema = z.object({
|
|
|
26
26
|
kind: z.literal("eventId"),
|
|
27
27
|
field: z.string().min(1)
|
|
28
28
|
});
|
|
29
|
+
var HistoryIdCursorSchema = z.object({
|
|
30
|
+
kind: z.literal("historyId"),
|
|
31
|
+
field: z.string().min(1)
|
|
32
|
+
});
|
|
33
|
+
var SyncTokenCursorSchema = z.object({
|
|
34
|
+
kind: z.literal("syncToken"),
|
|
35
|
+
field: z.string().min(1)
|
|
36
|
+
});
|
|
29
37
|
var CursorStrategySchema = z.discriminatedUnion("kind", [
|
|
30
38
|
SystemModstampCursorSchema,
|
|
31
39
|
ReplayIdCursorSchema,
|
|
32
40
|
TimestampCursorSchema,
|
|
33
|
-
EventIdCursorSchema
|
|
41
|
+
EventIdCursorSchema,
|
|
42
|
+
HistoryIdCursorSchema,
|
|
43
|
+
SyncTokenCursorSchema
|
|
34
44
|
]);
|
|
45
|
+
var CURSOR_DIVISIBILITY = {
|
|
46
|
+
systemModstamp: true,
|
|
47
|
+
timestamp: true,
|
|
48
|
+
replayId: true,
|
|
49
|
+
eventId: false,
|
|
50
|
+
historyId: false,
|
|
51
|
+
syncToken: false
|
|
52
|
+
};
|
|
53
|
+
function isDivisibleCursor(kind) {
|
|
54
|
+
return CURSOR_DIVISIBILITY[kind];
|
|
55
|
+
}
|
|
35
56
|
var PollDetectionSchema = z.object({
|
|
36
57
|
cursor: CursorStrategySchema,
|
|
37
58
|
provenance: z.enum(["poll", "cdc"]).optional()
|
|
@@ -56,11 +77,13 @@ var DetectionConfigSchema = z.discriminatedUnion("mode", [
|
|
|
56
77
|
WebhookModeSchema
|
|
57
78
|
]);
|
|
58
79
|
export {
|
|
80
|
+
CURSOR_DIVISIBILITY,
|
|
59
81
|
CursorStrategySchema,
|
|
60
82
|
DetectionConfigSchema,
|
|
61
83
|
FieldMappingSchema,
|
|
62
84
|
PollDetectionSchema,
|
|
63
85
|
ResolvedFilterSchema,
|
|
64
|
-
WebhookDetectionSchema
|
|
86
|
+
WebhookDetectionSchema,
|
|
87
|
+
isDivisibleCursor
|
|
65
88
|
};
|
|
66
89
|
//# sourceMappingURL=detection-config.schema.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../../../../runtime/subsystems/integration/detection-config.schema.ts"],"sourcesContent":["/**\n * Integration subsystem — DetectionConfig schema (#226-1)\n *\n * Canonical Zod schema for per-entity integration detection config. The schema is\n * the single source of truth for filter/mapping shape and is consumed by:\n *\n * 1. Runtime primitives — `PollChangeSource<T>`, `WebhookChangeSource<T>`\n * (#226-3, #226-4) accept a parsed `DetectionConfig` at construction.\n * 2. Codegen — `src/schema/entity-definition.schema.ts` (#226-6) imports\n * this schema so per-entity YAML `detection:` blocks validate against\n * the same shape the runtime enforces.\n *\n * Locked decisions (see ADR-033 + decision memo Q1–Q6):\n * - Filter vocabulary is flat AND of `{ field, op, value }` triples; richer\n * boolean expressions (OR / NOT / nested) are deferred per epic open Q3.\n * - Cursor strategy is a tagged union over the four shapes the three modes\n * need (`systemModstamp`, `replayId`, `timestamp`, `eventId`). Each\n * strategy types its cursor internally; the orchestrator persists what\n * the iterator last yielded (integration skill rule 2).\n * - `mode: 'poll'` may opt into `provenance: 'cdc'` so Stripe-style event\n * endpoints (mechanically a poll, semantically CDC) reuse the poll\n * primitive while emitting `Change<T>.source = 'cdc'`. Long-lived\n * streaming CDC (SFDC Pub-Sub, Debezium) is a separate primitive\n * deferred to #226-8.\n * - `webhook` mode requires `eventIdField` so `WebhookChangeSource<T>`\n * can populate `Change<T>.dedupKey` from the inbound staging row.\n */\nimport { z } from 'zod';\n\n// ============================================================================\n// Field mapping — provider field → canonical target\n// ============================================================================\n\n/**\n * Maps a single provider field onto the canonical record. `transform` is an\n * opt-in tag the adapter callback may inspect (`date-iso`, `decimal-string`,\n * etc.); the schema does not enumerate transforms — adapters interpret them.\n */\nexport const FieldMappingSchema = z.object({\n source: z.string().min(1),\n target: z.string().min(1),\n transform: z.string().min(1).optional(),\n});\n\nexport type FieldMapping = z.infer<typeof FieldMappingSchema>;\n\n// ============================================================================\n// Resolved filter — flat-AND triple\n// ============================================================================\n\n/**\n * A single resolved filter clause applied at fetch time. `value` is `unknown`\n * to admit primitives, arrays (for `in` / `nin`), and dates as ISO strings —\n * adapters interpret per provider.\n */\nexport const ResolvedFilterSchema = z.object({\n field: z.string().min(1),\n op: z.enum(['eq', 'neq', 'in', 'nin', 'gt', 'gte', 'lt', 'lte']),\n value: z.unknown(),\n});\n\nexport type ResolvedFilter = z.infer<typeof ResolvedFilterSchema>;\n\n// ============================================================================\n// Cursor strategy — tagged union over the four shapes the modes need\n// ============================================================================\n\nconst SystemModstampCursorSchema = z.object({\n kind: z.literal('systemModstamp'),\n field: z.string().min(1),\n});\n\nconst ReplayIdCursorSchema = z.object({\n kind: z.literal('replayId'),\n field: z.string().min(1),\n});\n\nconst TimestampCursorSchema = z.object({\n kind: z.literal('timestamp'),\n field: z.string().min(1),\n});\n\nconst EventIdCursorSchema = z.object({\n kind: z.literal('eventId'),\n field: z.string().min(1),\n});\n\nexport const CursorStrategySchema = z.discriminatedUnion('kind', [\n SystemModstampCursorSchema,\n ReplayIdCursorSchema,\n TimestampCursorSchema,\n EventIdCursorSchema,\n]);\n\nexport type CursorStrategy = z.infer<typeof CursorStrategySchema>;\n\n// ============================================================================\n// Mode-specific blocks\n// ============================================================================\n\n/**\n * Poll-mode block. `provenance: 'cdc'` opts the poll primitive into stamping\n * `Change<T>.source = 'cdc'` and populating `dedupKey` from the cursor's\n * `field` — used for Stripe-style event endpoints. Defaults to `'poll'`.\n */\nexport const PollDetectionSchema = z.object({\n cursor: CursorStrategySchema,\n provenance: z.enum(['poll', 'cdc']).optional(),\n});\n\nexport type PollDetection = z.infer<typeof PollDetectionSchema>;\n\n/**\n * Webhook-mode block. `eventIdField` names the column in the consumer-owned\n * inbound staging row that `WebhookChangeSource<T>` reads to set\n * `Change<T>.dedupKey`.\n */\nexport const WebhookDetectionSchema = z.object({\n eventIdField: z.string().min(1),\n});\n\nexport type WebhookDetection = z.infer<typeof WebhookDetectionSchema>;\n\n// ============================================================================\n// DetectionConfig — top-level discriminated union over `mode`\n// ============================================================================\n\nconst PollModeSchema = z.object({\n mode: z.literal('poll'),\n poll: PollDetectionSchema,\n mapping: z.array(FieldMappingSchema).min(1),\n filters: z.array(ResolvedFilterSchema).default([]),\n});\n\nconst WebhookModeSchema = z.object({\n mode: z.literal('webhook'),\n webhook: WebhookDetectionSchema,\n mapping: z.array(FieldMappingSchema).min(1),\n filters: z.array(ResolvedFilterSchema).default([]),\n});\n\n/**\n * Top-level detection config. Discriminated on `mode` so the relevant\n * mode-block (poll/webhook) is structurally required for that mode. CDC as a\n * long-lived streaming primitive is deferred (#226-8); CDC-as-provenance\n * (Stripe-style event endpoints) is expressed via `mode: 'poll'` with\n * `poll.provenance: 'cdc'`.\n */\nexport const DetectionConfigSchema = z.discriminatedUnion('mode', [\n PollModeSchema,\n WebhookModeSchema,\n]);\n\nexport type DetectionConfig = z.infer<typeof DetectionConfigSchema>;\n"],"mappings":";AA2BA,SAAS,SAAS;AAWX,IAAM,qBAAqB,EAAE,OAAO;AAAA,EACzC,QAAQ,EAAE,OAAO,EAAE,IAAI,CAAC;AAAA,EACxB,QAAQ,EAAE,OAAO,EAAE,IAAI,CAAC;AAAA,EACxB,WAAW,EAAE,OAAO,EAAE,IAAI,CAAC,EAAE,SAAS;AACxC,CAAC;AAaM,IAAM,uBAAuB,EAAE,OAAO;AAAA,EAC3C,OAAO,EAAE,OAAO,EAAE,IAAI,CAAC;AAAA,EACvB,IAAI,EAAE,KAAK,CAAC,MAAM,OAAO,MAAM,OAAO,MAAM,OAAO,MAAM,KAAK,CAAC;AAAA,EAC/D,OAAO,EAAE,QAAQ;AACnB,CAAC;AAQD,IAAM,6BAA6B,EAAE,OAAO;AAAA,EAC1C,MAAM,EAAE,QAAQ,gBAAgB;AAAA,EAChC,OAAO,EAAE,OAAO,EAAE,IAAI,CAAC;AACzB,CAAC;AAED,IAAM,uBAAuB,EAAE,OAAO;AAAA,EACpC,MAAM,EAAE,QAAQ,UAAU;AAAA,EAC1B,OAAO,EAAE,OAAO,EAAE,IAAI,CAAC;AACzB,CAAC;AAED,IAAM,wBAAwB,EAAE,OAAO;AAAA,EACrC,MAAM,EAAE,QAAQ,WAAW;AAAA,EAC3B,OAAO,EAAE,OAAO,EAAE,IAAI,CAAC;AACzB,CAAC;AAED,IAAM,sBAAsB,EAAE,OAAO;AAAA,EACnC,MAAM,EAAE,QAAQ,SAAS;AAAA,EACzB,OAAO,EAAE,OAAO,EAAE,IAAI,CAAC;AACzB,CAAC;AAEM,IAAM,uBAAuB,EAAE,mBAAmB,QAAQ;AAAA,EAC/D;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF,CAAC;AAaM,IAAM,sBAAsB,EAAE,OAAO;AAAA,EAC1C,QAAQ;AAAA,EACR,YAAY,EAAE,KAAK,CAAC,QAAQ,KAAK,CAAC,EAAE,SAAS;AAC/C,CAAC;AASM,IAAM,yBAAyB,EAAE,OAAO;AAAA,EAC7C,cAAc,EAAE,OAAO,EAAE,IAAI,CAAC;AAChC,CAAC;AAQD,IAAM,iBAAiB,EAAE,OAAO;AAAA,EAC9B,MAAM,EAAE,QAAQ,MAAM;AAAA,EACtB,MAAM;AAAA,EACN,SAAS,EAAE,MAAM,kBAAkB,EAAE,IAAI,CAAC;AAAA,EAC1C,SAAS,EAAE,MAAM,oBAAoB,EAAE,QAAQ,CAAC,CAAC;AACnD,CAAC;AAED,IAAM,oBAAoB,EAAE,OAAO;AAAA,EACjC,MAAM,EAAE,QAAQ,SAAS;AAAA,EACzB,SAAS;AAAA,EACT,SAAS,EAAE,MAAM,kBAAkB,EAAE,IAAI,CAAC;AAAA,EAC1C,SAAS,EAAE,MAAM,oBAAoB,EAAE,QAAQ,CAAC,CAAC;AACnD,CAAC;AASM,IAAM,wBAAwB,EAAE,mBAAmB,QAAQ;AAAA,EAChE;AAAA,EACA;AACF,CAAC;","names":[]}
|
|
1
|
+
{"version":3,"sources":["../../../../runtime/subsystems/integration/detection-config.schema.ts"],"sourcesContent":["/**\n * Integration subsystem — DetectionConfig schema (#226-1)\n *\n * Canonical Zod schema for per-entity integration detection config. The schema is\n * the single source of truth for filter/mapping shape and is consumed by:\n *\n * 1. Runtime primitives — `PollChangeSource<T>`, `WebhookChangeSource<T>`\n * (#226-3, #226-4) accept a parsed `DetectionConfig` at construction.\n * 2. Codegen — `src/schema/entity-definition.schema.ts` (#226-6) imports\n * this schema so per-entity YAML `detection:` blocks validate against\n * the same shape the runtime enforces.\n *\n * Locked decisions (see ADR-033 + decision memo Q1–Q6):\n * - Filter vocabulary is flat AND of `{ field, op, value }` triples; richer\n * boolean expressions (OR / NOT / nested) are deferred per epic open Q3.\n * - Cursor strategy is a tagged union over the four shapes the three modes\n * need (`systemModstamp`, `replayId`, `timestamp`, `eventId`). Each\n * strategy types its cursor internally; the orchestrator persists what\n * the iterator last yielded (integration skill rule 2).\n * - `mode: 'poll'` may opt into `provenance: 'cdc'` so Stripe-style event\n * endpoints (mechanically a poll, semantically CDC) reuse the poll\n * primitive while emitting `Change<T>.source = 'cdc'`. Long-lived\n * streaming CDC (SFDC Pub-Sub, Debezium) is a separate primitive\n * deferred to #226-8.\n * - `webhook` mode requires `eventIdField` so `WebhookChangeSource<T>`\n * can populate `Change<T>.dedupKey` from the inbound staging row.\n */\nimport { z } from 'zod';\n\n// ============================================================================\n// Field mapping — provider field → canonical target\n// ============================================================================\n\n/**\n * Maps a single provider field onto the canonical record. `transform` is an\n * opt-in tag the adapter callback may inspect (`date-iso`, `decimal-string`,\n * etc.); the schema does not enumerate transforms — adapters interpret them.\n */\nexport const FieldMappingSchema = z.object({\n source: z.string().min(1),\n target: z.string().min(1),\n transform: z.string().min(1).optional(),\n});\n\nexport type FieldMapping = z.infer<typeof FieldMappingSchema>;\n\n// ============================================================================\n// Resolved filter — flat-AND triple\n// ============================================================================\n\n/**\n * A single resolved filter clause applied at fetch time. `value` is `unknown`\n * to admit primitives, arrays (for `in` / `nin`), and dates as ISO strings —\n * adapters interpret per provider.\n */\nexport const ResolvedFilterSchema = z.object({\n field: z.string().min(1),\n op: z.enum(['eq', 'neq', 'in', 'nin', 'gt', 'gte', 'lt', 'lte']),\n value: z.unknown(),\n});\n\nexport type ResolvedFilter = z.infer<typeof ResolvedFilterSchema>;\n\n// ============================================================================\n// Cursor strategy — tagged union over the four shapes the modes need\n// ============================================================================\n\nconst SystemModstampCursorSchema = z.object({\n kind: z.literal('systemModstamp'),\n field: z.string().min(1),\n});\n\nconst ReplayIdCursorSchema = z.object({\n kind: z.literal('replayId'),\n field: z.string().min(1),\n});\n\nconst TimestampCursorSchema = z.object({\n kind: z.literal('timestamp'),\n field: z.string().min(1),\n});\n\nconst EventIdCursorSchema = z.object({\n kind: z.literal('eventId'),\n field: z.string().min(1),\n});\n\n/**\n * Gmail `historyId` (RFC-0003 §3) — an opaque, atomic vendor token. The next\n * watermark only exists at end-of-walk; there is no resumable mid-walk value.\n * `field` is metadata for codegen/adapters (the response key the token lives on).\n */\nconst HistoryIdCursorSchema = z.object({\n kind: z.literal('historyId'),\n field: z.string().min(1),\n});\n\n/**\n * Google Calendar `syncToken` (RFC-0003 §3) — an opaque, atomic sync token,\n * same divisibility profile as `historyId`.\n */\nconst SyncTokenCursorSchema = z.object({\n kind: z.literal('syncToken'),\n field: z.string().min(1),\n});\n\nexport const CursorStrategySchema = z.discriminatedUnion('kind', [\n SystemModstampCursorSchema,\n ReplayIdCursorSchema,\n TimestampCursorSchema,\n EventIdCursorSchema,\n HistoryIdCursorSchema,\n SyncTokenCursorSchema,\n]);\n\nexport type CursorStrategy = z.infer<typeof CursorStrategySchema>;\n\n// ============================================================================\n// Cursor divisibility (RFC-0003 §3)\n// ============================================================================\n\n/**\n * Whether a cursor strategy is *divisible* — a property of the strategy, not\n * the read primitive. Divisible cursors are sortable/monotonic watermarks whose\n * value is meaningful AS OF any single record (HubSpot `systemModstamp`, a\n * `timestamp` field, a Salesforce CDC `replayId`); the read primitive may\n * checkpoint per-ref mid-walk, so a crash resumes from the last delivered ref.\n *\n * Atomic cursors are opaque vendor tokens (Gmail `historyId`, Calendar\n * `syncToken`, a generic `eventId`) whose next value only exists at end-of-walk.\n * The primitive must withhold per-ref cursors and emit the token only at a safe\n * boundary, so an interrupted run never persists an unresumable mid-walk token\n * (it resumes all-or-nothing from the prior token — see `IncrementalReadBase`).\n *\n * `eventId` is classified atomic conservatively: a generic opaque id is treated\n * all-or-nothing unless a concrete strategy proves it monotonically resumable.\n */\nexport const CURSOR_DIVISIBILITY: Readonly<Record<CursorStrategy['kind'], boolean>> = {\n systemModstamp: true,\n timestamp: true,\n replayId: true,\n eventId: false,\n historyId: false,\n syncToken: false,\n};\n\n/** Predicate form of {@link CURSOR_DIVISIBILITY}. */\nexport function isDivisibleCursor(kind: CursorStrategy['kind']): boolean {\n return CURSOR_DIVISIBILITY[kind];\n}\n\n// ============================================================================\n// Mode-specific blocks\n// ============================================================================\n\n/**\n * Poll-mode block. `provenance: 'cdc'` opts the poll primitive into stamping\n * `Change<T>.source = 'cdc'` and populating `dedupKey` from the cursor's\n * `field` — used for Stripe-style event endpoints. Defaults to `'poll'`.\n */\nexport const PollDetectionSchema = z.object({\n cursor: CursorStrategySchema,\n provenance: z.enum(['poll', 'cdc']).optional(),\n});\n\nexport type PollDetection = z.infer<typeof PollDetectionSchema>;\n\n/**\n * Webhook-mode block. `eventIdField` names the column in the consumer-owned\n * inbound staging row that `WebhookChangeSource<T>` reads to set\n * `Change<T>.dedupKey`.\n */\nexport const WebhookDetectionSchema = z.object({\n eventIdField: z.string().min(1),\n});\n\nexport type WebhookDetection = z.infer<typeof WebhookDetectionSchema>;\n\n// ============================================================================\n// DetectionConfig — top-level discriminated union over `mode`\n// ============================================================================\n\nconst PollModeSchema = z.object({\n mode: z.literal('poll'),\n poll: PollDetectionSchema,\n mapping: z.array(FieldMappingSchema).min(1),\n filters: z.array(ResolvedFilterSchema).default([]),\n});\n\nconst WebhookModeSchema = z.object({\n mode: z.literal('webhook'),\n webhook: WebhookDetectionSchema,\n mapping: z.array(FieldMappingSchema).min(1),\n filters: z.array(ResolvedFilterSchema).default([]),\n});\n\n/**\n * Top-level detection config. Discriminated on `mode` so the relevant\n * mode-block (poll/webhook) is structurally required for that mode. CDC as a\n * long-lived streaming primitive is deferred (#226-8); CDC-as-provenance\n * (Stripe-style event endpoints) is expressed via `mode: 'poll'` with\n * `poll.provenance: 'cdc'`.\n */\nexport const DetectionConfigSchema = z.discriminatedUnion('mode', [\n PollModeSchema,\n WebhookModeSchema,\n]);\n\nexport type DetectionConfig = z.infer<typeof DetectionConfigSchema>;\n"],"mappings":";AA2BA,SAAS,SAAS;AAWX,IAAM,qBAAqB,EAAE,OAAO;AAAA,EACzC,QAAQ,EAAE,OAAO,EAAE,IAAI,CAAC;AAAA,EACxB,QAAQ,EAAE,OAAO,EAAE,IAAI,CAAC;AAAA,EACxB,WAAW,EAAE,OAAO,EAAE,IAAI,CAAC,EAAE,SAAS;AACxC,CAAC;AAaM,IAAM,uBAAuB,EAAE,OAAO;AAAA,EAC3C,OAAO,EAAE,OAAO,EAAE,IAAI,CAAC;AAAA,EACvB,IAAI,EAAE,KAAK,CAAC,MAAM,OAAO,MAAM,OAAO,MAAM,OAAO,MAAM,KAAK,CAAC;AAAA,EAC/D,OAAO,EAAE,QAAQ;AACnB,CAAC;AAQD,IAAM,6BAA6B,EAAE,OAAO;AAAA,EAC1C,MAAM,EAAE,QAAQ,gBAAgB;AAAA,EAChC,OAAO,EAAE,OAAO,EAAE,IAAI,CAAC;AACzB,CAAC;AAED,IAAM,uBAAuB,EAAE,OAAO;AAAA,EACpC,MAAM,EAAE,QAAQ,UAAU;AAAA,EAC1B,OAAO,EAAE,OAAO,EAAE,IAAI,CAAC;AACzB,CAAC;AAED,IAAM,wBAAwB,EAAE,OAAO;AAAA,EACrC,MAAM,EAAE,QAAQ,WAAW;AAAA,EAC3B,OAAO,EAAE,OAAO,EAAE,IAAI,CAAC;AACzB,CAAC;AAED,IAAM,sBAAsB,EAAE,OAAO;AAAA,EACnC,MAAM,EAAE,QAAQ,SAAS;AAAA,EACzB,OAAO,EAAE,OAAO,EAAE,IAAI,CAAC;AACzB,CAAC;AAOD,IAAM,wBAAwB,EAAE,OAAO;AAAA,EACrC,MAAM,EAAE,QAAQ,WAAW;AAAA,EAC3B,OAAO,EAAE,OAAO,EAAE,IAAI,CAAC;AACzB,CAAC;AAMD,IAAM,wBAAwB,EAAE,OAAO;AAAA,EACrC,MAAM,EAAE,QAAQ,WAAW;AAAA,EAC3B,OAAO,EAAE,OAAO,EAAE,IAAI,CAAC;AACzB,CAAC;AAEM,IAAM,uBAAuB,EAAE,mBAAmB,QAAQ;AAAA,EAC/D;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF,CAAC;AAwBM,IAAM,sBAAyE;AAAA,EACpF,gBAAgB;AAAA,EAChB,WAAW;AAAA,EACX,UAAU;AAAA,EACV,SAAS;AAAA,EACT,WAAW;AAAA,EACX,WAAW;AACb;AAGO,SAAS,kBAAkB,MAAuC;AACvE,SAAO,oBAAoB,IAAI;AACjC;AAWO,IAAM,sBAAsB,EAAE,OAAO;AAAA,EAC1C,QAAQ;AAAA,EACR,YAAY,EAAE,KAAK,CAAC,QAAQ,KAAK,CAAC,EAAE,SAAS;AAC/C,CAAC;AASM,IAAM,yBAAyB,EAAE,OAAO;AAAA,EAC7C,cAAc,EAAE,OAAO,EAAE,IAAI,CAAC;AAChC,CAAC;AAQD,IAAM,iBAAiB,EAAE,OAAO;AAAA,EAC9B,MAAM,EAAE,QAAQ,MAAM;AAAA,EACtB,MAAM;AAAA,EACN,SAAS,EAAE,MAAM,kBAAkB,EAAE,IAAI,CAAC;AAAA,EAC1C,SAAS,EAAE,MAAM,oBAAoB,EAAE,QAAQ,CAAC,CAAC;AACnD,CAAC;AAED,IAAM,oBAAoB,EAAE,OAAO;AAAA,EACjC,MAAM,EAAE,QAAQ,SAAS;AAAA,EACzB,SAAS;AAAA,EACT,SAAS,EAAE,MAAM,kBAAkB,EAAE,IAAI,CAAC;AAAA,EAC1C,SAAS,EAAE,MAAM,oBAAoB,EAAE,QAAQ,CAAC,CAAC;AACnD,CAAC;AASM,IAAM,wBAAwB,EAAE,mBAAmB,QAAQ;AAAA,EAChE;AAAA,EACA;AACF,CAAC;","names":[]}
|
|
@@ -0,0 +1,248 @@
|
|
|
1
|
+
import { IntegrationSubscriptionView, IChangeSource, ChangeSource, Change } from './integration-change-source.protocol.js';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Integration subsystem — `IncrementalRead<T, F>` + `RandomRead<T>` capability
|
|
5
|
+
* and the providing `IncrementalReadBase<T, F, M>` (RFC-0003 R1).
|
|
6
|
+
*
|
|
7
|
+
* The universal read primitive. Where `IChangeSource.listChanges` is the
|
|
8
|
+
* *transport* contract (stream `Change<T>`, orchestrator owns cursor lifecycle),
|
|
9
|
+
* this base owns *how the body that produces those changes is written* — the
|
|
10
|
+
* level the bare `changeSources = {}` author-seam left unstructured.
|
|
11
|
+
*
|
|
12
|
+
* The read decomposes into two composable verbs the adapter supplies:
|
|
13
|
+
*
|
|
14
|
+
* - `enumerate(mode, filter) → AsyncIterable<Ref<M>[]>` — the cheap delta /
|
|
15
|
+
* backfill walk; streams pages of lightweight refs (id + per-ref cursor +
|
|
16
|
+
* filterable metadata). LAZY: pull-driven so hydrate backpressures it.
|
|
17
|
+
* - `hydrate(ids) → Map<id, raw>` — the expensive fetch-by-id, batched; where
|
|
18
|
+
* bounded concurrency / a vendor `/batch` endpoint lives. Keyed and
|
|
19
|
+
* miss-tolerant (a mid-run 404 cannot shift alignment).
|
|
20
|
+
* - `toCanonical(raw) → T | null` — provider payload → canonical record.
|
|
21
|
+
*
|
|
22
|
+
* The base PROVIDES the orchestration: drain enumerate, **filter before
|
|
23
|
+
* hydrate** (structural — an adapter physically cannot hydrate-then-discard),
|
|
24
|
+
* keyed pairing, per-ref cursor emission, and the `IChangeSource.listChanges`
|
|
25
|
+
* adaptation. It also provides `RandomRead.get()` for free as
|
|
26
|
+
* `toCanonical ∘ hydrate([id])` — so every incremental adapter is a
|
|
27
|
+
* single-record reader (the "list cheaply, fill on click" query-surface need)
|
|
28
|
+
* without extra code.
|
|
29
|
+
*
|
|
30
|
+
* The shape generalizes dealbrain's proven HubSpot `listSince` (streams, pushes
|
|
31
|
+
* the filter server-side, carries a per-record cursor) to vendors whose list
|
|
32
|
+
* returns id-stubs (Gmail) or nested resources (Meet). Calendar-style
|
|
33
|
+
* full-object lists override `hydrate` as a passthrough.
|
|
34
|
+
*
|
|
35
|
+
* See RFC-0003 (Track D round-3), ADR-033 (`detection:` config), and
|
|
36
|
+
* `poll-change-source.ts` (the sibling primitive this composes beside).
|
|
37
|
+
*/
|
|
38
|
+
|
|
39
|
+
/**
|
|
40
|
+
* How a read walks the upstream. Modes are values, not verbs (swe-brain
|
|
41
|
+
* ADR-0003: mode ≠ capability) — one `read()` verb dispatches on these.
|
|
42
|
+
*
|
|
43
|
+
* - `delta` — incremental walk from a persisted cursor.
|
|
44
|
+
* - `full` — cursorless backfill (optionally bounded by `since`).
|
|
45
|
+
* - `reconcile` — gap-repair: re-fetch a known id set the cursor skipped
|
|
46
|
+
* (the repair pass for the silent-tail-skip + #414-style
|
|
47
|
+
* multi-provider divergence).
|
|
48
|
+
*/
|
|
49
|
+
type ReadMode = {
|
|
50
|
+
readonly kind: 'delta';
|
|
51
|
+
readonly cursor: unknown;
|
|
52
|
+
} | {
|
|
53
|
+
readonly kind: 'full';
|
|
54
|
+
readonly since?: Date;
|
|
55
|
+
} | {
|
|
56
|
+
readonly kind: 'reconcile';
|
|
57
|
+
readonly knownIds: readonly string[];
|
|
58
|
+
};
|
|
59
|
+
/**
|
|
60
|
+
* A cheap ref from the enumerate pass: identity + per-ref cursor + metadata to
|
|
61
|
+
* filter or display on. `cursor` is the position AS OF this ref — see
|
|
62
|
+
* `IncrementalReadBase.cursorDivisible` (R2) for when it may be checkpointed
|
|
63
|
+
* mid-walk versus withheld until a safe boundary.
|
|
64
|
+
*/
|
|
65
|
+
interface Ref<M = Record<string, unknown>> {
|
|
66
|
+
readonly externalId: string;
|
|
67
|
+
readonly cursor: unknown;
|
|
68
|
+
readonly meta: M;
|
|
69
|
+
}
|
|
70
|
+
/** A read request: the mode, an optional adapter-typed filter, and page size. */
|
|
71
|
+
interface ReadRequest<F = unknown> {
|
|
72
|
+
readonly mode: ReadMode;
|
|
73
|
+
readonly filter?: F;
|
|
74
|
+
readonly pageSize?: number;
|
|
75
|
+
}
|
|
76
|
+
/**
|
|
77
|
+
* Per-run context threaded from `listChanges` into the vendor read body (R5).
|
|
78
|
+
*
|
|
79
|
+
* Carries the `subscription` framing the run so `enumerate`/`hydrate` can resolve
|
|
80
|
+
* **per-connection credentials** (and raw-landing keys) from
|
|
81
|
+
* `subscription.externalRef` — the gap a multi-account consumer surfaced: a
|
|
82
|
+
* singleton change source cannot hold connection-scoped auth, and before R5 the
|
|
83
|
+
* base forwarded the subscription only into `filterFor`, never into the fetch.
|
|
84
|
+
*
|
|
85
|
+
* Optional throughout (the core contract): a direct `read()` / `get()` call — the
|
|
86
|
+
* query surface's "fill one record on click" — may omit it. An adapter that needs
|
|
87
|
+
* per-connection auth reads `ctx?.subscription?.externalRef` and asserts its
|
|
88
|
+
* presence; a provider-level-auth adapter ignores it.
|
|
89
|
+
*/
|
|
90
|
+
interface ReadContext {
|
|
91
|
+
/** The subscription framing this run; `externalRef` is the upstream scope /
|
|
92
|
+
* connection id the adapter resolves credentials + raw-landing keys from. */
|
|
93
|
+
readonly subscription?: IntegrationSubscriptionView;
|
|
94
|
+
}
|
|
95
|
+
/**
|
|
96
|
+
* The `read()`-side envelope: canonical record + the raw vendor payload it came
|
|
97
|
+
* from + the originating external id + the per-ref cursor.
|
|
98
|
+
*
|
|
99
|
+
* Distinct from the runtime's transport envelope `Change<T>`
|
|
100
|
+
* (operation/externalId/cursor/source). The relationship is one-directional:
|
|
101
|
+
* `listChanges()` adapts `read()` → `Change<T>` (dropping `raw`, stamping
|
|
102
|
+
* `operation`). `read()` keeps `raw` and `externalId` so a query surface can
|
|
103
|
+
* re-project without a second fetch.
|
|
104
|
+
*/
|
|
105
|
+
interface SourcedRecord<T> {
|
|
106
|
+
readonly externalId: string;
|
|
107
|
+
readonly record: T;
|
|
108
|
+
readonly raw: unknown;
|
|
109
|
+
readonly cursor: unknown;
|
|
110
|
+
}
|
|
111
|
+
/**
|
|
112
|
+
* The universal read capability — one public verb that streams. Filtering,
|
|
113
|
+
* hydration, and cursor emission are the providing base's concern.
|
|
114
|
+
*/
|
|
115
|
+
interface IncrementalRead<T, F = unknown> {
|
|
116
|
+
read(req: ReadRequest<F>, ctx?: ReadContext): AsyncIterable<SourcedRecord<T>>;
|
|
117
|
+
}
|
|
118
|
+
/**
|
|
119
|
+
* Single-record read by external id — the "fill on click" atom. Provided for
|
|
120
|
+
* free by `IncrementalReadBase` (composes `hydrate` + `toCanonical`); declared
|
|
121
|
+
* as its own capability so consumers can depend on it without the streaming
|
|
122
|
+
* surface.
|
|
123
|
+
*/
|
|
124
|
+
interface RandomRead<T> {
|
|
125
|
+
get(id: string, ctx?: ReadContext): Promise<T | null>;
|
|
126
|
+
}
|
|
127
|
+
/**
|
|
128
|
+
* Map `ids` through `fn` with at most `limit` concurrent in-flight calls,
|
|
129
|
+
* collecting results keyed by id. The workhorse for writing a batched
|
|
130
|
+
* `hydrate` over a single-id fetch without serial N+1 latency.
|
|
131
|
+
*/
|
|
132
|
+
declare function mapConcurrent<R>(ids: readonly string[], fn: (id: string) => Promise<R>, limit: number): Promise<Map<string, R>>;
|
|
133
|
+
/**
|
|
134
|
+
* Providing base for the read capability. A subclass fills exactly three vendor
|
|
135
|
+
* methods — `enumerate`, `hydrate`, `toCanonical` — and gets a streaming,
|
|
136
|
+
* filter-before-hydrate, miss-tolerant `IncrementalRead<T, F>` +
|
|
137
|
+
* `IChangeSource<T>` + `RandomRead<T>`.
|
|
138
|
+
*
|
|
139
|
+
* Type params: `T` canonical record, `F` adapter-typed filter, `M` per-ref
|
|
140
|
+
* metadata (defaults to an untyped bag — surface packages supply a domain `M`).
|
|
141
|
+
*/
|
|
142
|
+
declare abstract class IncrementalReadBase<T, F = unknown, M = Record<string, unknown>> implements IncrementalRead<T, F>, IChangeSource<T>, RandomRead<T> {
|
|
143
|
+
/** Human label for run logs — e.g. `'google-mail-email'`. */
|
|
144
|
+
abstract readonly label: string;
|
|
145
|
+
/**
|
|
146
|
+
* Whether the vendor takes the request predicate server-side. Declared, not
|
|
147
|
+
* enforced here — surfaced into the emission manifest (R3) so the falsifier
|
|
148
|
+
* suite (R4) can record which adapters filter post-hydrate. `false` is the
|
|
149
|
+
* honest floor (e.g. Gmail without `q=`), handled via `matchesRecord`.
|
|
150
|
+
*/
|
|
151
|
+
protected readonly filterPushdown: boolean;
|
|
152
|
+
/** Max concurrent in-flight calls for a `mapConcurrent`-built `hydrate`. */
|
|
153
|
+
protected readonly hydrateConcurrency: number;
|
|
154
|
+
/** `Change<T>.source` provenance stamped by `listChanges`. */
|
|
155
|
+
protected readonly changeSource: ChangeSource;
|
|
156
|
+
/**
|
|
157
|
+
* Whether this source's cursor strategy is divisible (RFC-0003 §3). When
|
|
158
|
+
* `true` (default — sortable watermarks like `systemModstamp`/`timestamp`/
|
|
159
|
+
* `replayId`), `listChanges` emits each record's per-ref cursor, so the
|
|
160
|
+
* orchestrator may checkpoint mid-walk and a crash resumes from the last
|
|
161
|
+
* delivered ref.
|
|
162
|
+
*
|
|
163
|
+
* When `false` (atomic opaque tokens — Gmail `historyId`, Calendar
|
|
164
|
+
* `syncToken`), `listChanges` WITHHOLDS per-ref cursors and emits the
|
|
165
|
+
* end-of-walk token only on the final record, so the orchestrator's
|
|
166
|
+
* persist-last-yielded lifecycle can never persist an unresumable mid-walk
|
|
167
|
+
* token. The cost is blast-radius: an interrupted atomic run resumes
|
|
168
|
+
* all-or-nothing from the prior persisted token. For atomic *backfills* that
|
|
169
|
+
* radius is the whole enumerate walk — bound it with `ReadRequest.pageSize`
|
|
170
|
+
* (smaller pages ⇒ shorter walks per run). Per-page atomic checkpointing is a
|
|
171
|
+
* future refinement; R2 gates at end-of-walk.
|
|
172
|
+
*
|
|
173
|
+
* Codegen (R3) sets this from the strategy kind via `isDivisibleCursor`.
|
|
174
|
+
*/
|
|
175
|
+
protected readonly cursorDivisible: boolean;
|
|
176
|
+
/**
|
|
177
|
+
* The cheap walk. Streams pages of refs; LAZY so `hydrate` backpressures it
|
|
178
|
+
* (one page hydrated before the next is pulled). Mode-dispatch lives here:
|
|
179
|
+
* `delta` resumes from `mode.cursor`, `full` walks from the top, `reconcile`
|
|
180
|
+
* re-fetches `mode.knownIds`.
|
|
181
|
+
*
|
|
182
|
+
* `pageSize` (from `ReadRequest`) is the adapter's requested vendor page size
|
|
183
|
+
* — also the atomic-cursor backfill blast-radius bound (§ `cursorDivisible`).
|
|
184
|
+
* Honor it as a hint; vendors that cap page size clamp it.
|
|
185
|
+
*
|
|
186
|
+
* `ctx?.subscription` (R5) carries the run's subscription, so a per-connection
|
|
187
|
+
* adapter resolves credentials / upstream scope from `externalRef` here; absent
|
|
188
|
+
* on a direct `read()` with no run subscription.
|
|
189
|
+
*/
|
|
190
|
+
protected abstract enumerate(mode: ReadMode, filter?: F, pageSize?: number, ctx?: ReadContext): AsyncIterable<Ref<M>[]>;
|
|
191
|
+
/**
|
|
192
|
+
* Fetch raw payloads for `ids`, keyed by id. MUST be miss-tolerant: omit (or
|
|
193
|
+
* map to `null`) any id that 404s mid-run rather than throwing or shifting
|
|
194
|
+
* alignment. Write it over `mapConcurrent(ids, (id) => this.fetchOne(id),
|
|
195
|
+
* this.hydrateConcurrency)`; override with a real `/batch` call or a
|
|
196
|
+
* passthrough (full-object list) where the vendor allows.
|
|
197
|
+
*
|
|
198
|
+
* `ctx?.subscription` (R5) carries the run's subscription for per-connection
|
|
199
|
+
* credential resolution (the fetch is where the vendor call happens) and is the
|
|
200
|
+
* natural place to land raw payloads keyed by `subscription.id`.
|
|
201
|
+
*/
|
|
202
|
+
protected abstract hydrate(ids: string[], ctx?: ReadContext): Promise<Map<string, unknown>>;
|
|
203
|
+
/** Provider payload → canonical record. Return `null` to drop a record. */
|
|
204
|
+
protected abstract toCanonical(raw: unknown): T | null;
|
|
205
|
+
/** Pre-hydrate predicate over the cheap ref (preferred — avoids hydration). */
|
|
206
|
+
protected matchesRef(_ref: Ref<M>, _filter?: F): boolean;
|
|
207
|
+
/** Post-hydrate predicate over the canonical record (the no-pushdown floor). */
|
|
208
|
+
protected matchesRecord(_record: T, _filter?: F): boolean;
|
|
209
|
+
/**
|
|
210
|
+
* Resolve the filter for a subscription when adapting to `listChanges`
|
|
211
|
+
* (which has no filter argument). Defaults to none; codegen wiring (R3)
|
|
212
|
+
* overrides this to thread `DetectionConfig.filters`.
|
|
213
|
+
*/
|
|
214
|
+
protected filterFor(_subscription: IntegrationSubscriptionView): F | undefined;
|
|
215
|
+
/**
|
|
216
|
+
* Stream canonical records for a request. Filter is applied BEFORE hydrate
|
|
217
|
+
* (structural: a kept ref is hydrated, a rejected one never is), so an
|
|
218
|
+
* adapter cannot hydrate-then-discard. A hydrate miss (deleted mid-run) is
|
|
219
|
+
* skipped, never fabricated.
|
|
220
|
+
*/
|
|
221
|
+
read(req: ReadRequest<F>, ctx?: ReadContext): AsyncIterable<SourcedRecord<T>>;
|
|
222
|
+
/**
|
|
223
|
+
* `RandomRead<T>` — single-record read, provided for free as
|
|
224
|
+
* `toCanonical ∘ hydrate([id])`. Reuses the adapter's batched fetch + miss
|
|
225
|
+
* tolerance; returns `null` for a missing or undecodable record.
|
|
226
|
+
*/
|
|
227
|
+
get(id: string, ctx?: ReadContext): Promise<T | null>;
|
|
228
|
+
/**
|
|
229
|
+
* `IChangeSource<T>` adaptation. Maps the orchestrator's by-value cursor to a
|
|
230
|
+
* `ReadMode` (`null` → `full` backfill, else `delta`), streams `read()`, and
|
|
231
|
+
* stamps each `SourcedRecord` into a `Change<T>`. All records surface as
|
|
232
|
+
* `'updated'`; the orchestrator's diff stage classifies create-vs-update and
|
|
233
|
+
* deletes arrive as tombstone refs (`toCanonical` may flag them).
|
|
234
|
+
*
|
|
235
|
+
* Cursor emission honors `cursorDivisible` (RFC-0003 §3). Divisible: each
|
|
236
|
+
* record carries its own per-ref cursor. Atomic: per-ref cursors are withheld
|
|
237
|
+
* (`undefined`, which the orchestrator skips persisting) and the end-of-walk
|
|
238
|
+
* token rides only on the final record — so a mid-walk crash never persists
|
|
239
|
+
* an unresumable token. If an atomic run yields no surviving records, no
|
|
240
|
+
* cursor is persisted and the next run re-reads the same (empty) delta — a
|
|
241
|
+
* bounded inefficiency, never data loss.
|
|
242
|
+
*/
|
|
243
|
+
listChanges(subscription: IntegrationSubscriptionView, cursor: unknown | null): AsyncIterable<Change<T>>;
|
|
244
|
+
/** Stamp a `SourcedRecord` into a `Change<T>` with an explicit emitted cursor. */
|
|
245
|
+
private toChange;
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
export { type IncrementalRead, IncrementalReadBase, type RandomRead, type ReadContext, type ReadMode, type ReadRequest, type Ref, type SourcedRecord, mapConcurrent };
|
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
// runtime/subsystems/integration/incremental-read.ts
|
|
2
|
+
async function mapConcurrent(ids, fn, limit) {
|
|
3
|
+
const out = /* @__PURE__ */ new Map();
|
|
4
|
+
if (ids.length === 0) return out;
|
|
5
|
+
const width = Math.max(1, Math.min(limit, ids.length));
|
|
6
|
+
let next = 0;
|
|
7
|
+
const worker = async () => {
|
|
8
|
+
while (next < ids.length) {
|
|
9
|
+
const idx = next++;
|
|
10
|
+
const id = ids[idx];
|
|
11
|
+
out.set(id, await fn(id));
|
|
12
|
+
}
|
|
13
|
+
};
|
|
14
|
+
await Promise.all(Array.from({ length: width }, worker));
|
|
15
|
+
return out;
|
|
16
|
+
}
|
|
17
|
+
var IncrementalReadBase = class {
|
|
18
|
+
/**
|
|
19
|
+
* Whether the vendor takes the request predicate server-side. Declared, not
|
|
20
|
+
* enforced here — surfaced into the emission manifest (R3) so the falsifier
|
|
21
|
+
* suite (R4) can record which adapters filter post-hydrate. `false` is the
|
|
22
|
+
* honest floor (e.g. Gmail without `q=`), handled via `matchesRecord`.
|
|
23
|
+
*/
|
|
24
|
+
filterPushdown = false;
|
|
25
|
+
/** Max concurrent in-flight calls for a `mapConcurrent`-built `hydrate`. */
|
|
26
|
+
hydrateConcurrency = 10;
|
|
27
|
+
/** `Change<T>.source` provenance stamped by `listChanges`. */
|
|
28
|
+
changeSource = "poll";
|
|
29
|
+
/**
|
|
30
|
+
* Whether this source's cursor strategy is divisible (RFC-0003 §3). When
|
|
31
|
+
* `true` (default — sortable watermarks like `systemModstamp`/`timestamp`/
|
|
32
|
+
* `replayId`), `listChanges` emits each record's per-ref cursor, so the
|
|
33
|
+
* orchestrator may checkpoint mid-walk and a crash resumes from the last
|
|
34
|
+
* delivered ref.
|
|
35
|
+
*
|
|
36
|
+
* When `false` (atomic opaque tokens — Gmail `historyId`, Calendar
|
|
37
|
+
* `syncToken`), `listChanges` WITHHOLDS per-ref cursors and emits the
|
|
38
|
+
* end-of-walk token only on the final record, so the orchestrator's
|
|
39
|
+
* persist-last-yielded lifecycle can never persist an unresumable mid-walk
|
|
40
|
+
* token. The cost is blast-radius: an interrupted atomic run resumes
|
|
41
|
+
* all-or-nothing from the prior persisted token. For atomic *backfills* that
|
|
42
|
+
* radius is the whole enumerate walk — bound it with `ReadRequest.pageSize`
|
|
43
|
+
* (smaller pages ⇒ shorter walks per run). Per-page atomic checkpointing is a
|
|
44
|
+
* future refinement; R2 gates at end-of-walk.
|
|
45
|
+
*
|
|
46
|
+
* Codegen (R3) sets this from the strategy kind via `isDivisibleCursor`.
|
|
47
|
+
*/
|
|
48
|
+
cursorDivisible = true;
|
|
49
|
+
// ---- Optional filter hooks — exactly one is live per `filterPushdown` ----
|
|
50
|
+
/** Pre-hydrate predicate over the cheap ref (preferred — avoids hydration). */
|
|
51
|
+
matchesRef(_ref, _filter) {
|
|
52
|
+
return true;
|
|
53
|
+
}
|
|
54
|
+
/** Post-hydrate predicate over the canonical record (the no-pushdown floor). */
|
|
55
|
+
matchesRecord(_record, _filter) {
|
|
56
|
+
return true;
|
|
57
|
+
}
|
|
58
|
+
/**
|
|
59
|
+
* Resolve the filter for a subscription when adapting to `listChanges`
|
|
60
|
+
* (which has no filter argument). Defaults to none; codegen wiring (R3)
|
|
61
|
+
* overrides this to thread `DetectionConfig.filters`.
|
|
62
|
+
*/
|
|
63
|
+
filterFor(_subscription) {
|
|
64
|
+
return void 0;
|
|
65
|
+
}
|
|
66
|
+
// ---- PROVIDED by the base ----
|
|
67
|
+
/**
|
|
68
|
+
* Stream canonical records for a request. Filter is applied BEFORE hydrate
|
|
69
|
+
* (structural: a kept ref is hydrated, a rejected one never is), so an
|
|
70
|
+
* adapter cannot hydrate-then-discard. A hydrate miss (deleted mid-run) is
|
|
71
|
+
* skipped, never fabricated.
|
|
72
|
+
*/
|
|
73
|
+
async *read(req, ctx) {
|
|
74
|
+
for await (const refPage of this.enumerate(req.mode, req.filter, req.pageSize, ctx)) {
|
|
75
|
+
const kept = refPage.filter((ref) => this.matchesRef(ref, req.filter));
|
|
76
|
+
if (kept.length === 0) continue;
|
|
77
|
+
const raws = await this.hydrate(
|
|
78
|
+
kept.map((ref) => ref.externalId),
|
|
79
|
+
ctx
|
|
80
|
+
);
|
|
81
|
+
for (const ref of kept) {
|
|
82
|
+
const raw = raws.get(ref.externalId);
|
|
83
|
+
if (raw === void 0 || raw === null) continue;
|
|
84
|
+
const record = this.toCanonical(raw);
|
|
85
|
+
if (record !== null && this.matchesRecord(record, req.filter)) {
|
|
86
|
+
yield { externalId: ref.externalId, record, raw, cursor: ref.cursor };
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
/**
|
|
92
|
+
* `RandomRead<T>` — single-record read, provided for free as
|
|
93
|
+
* `toCanonical ∘ hydrate([id])`. Reuses the adapter's batched fetch + miss
|
|
94
|
+
* tolerance; returns `null` for a missing or undecodable record.
|
|
95
|
+
*/
|
|
96
|
+
async get(id, ctx) {
|
|
97
|
+
const raws = await this.hydrate([id], ctx);
|
|
98
|
+
const raw = raws.get(id);
|
|
99
|
+
if (raw === void 0 || raw === null) return null;
|
|
100
|
+
return this.toCanonical(raw);
|
|
101
|
+
}
|
|
102
|
+
/**
|
|
103
|
+
* `IChangeSource<T>` adaptation. Maps the orchestrator's by-value cursor to a
|
|
104
|
+
* `ReadMode` (`null` → `full` backfill, else `delta`), streams `read()`, and
|
|
105
|
+
* stamps each `SourcedRecord` into a `Change<T>`. All records surface as
|
|
106
|
+
* `'updated'`; the orchestrator's diff stage classifies create-vs-update and
|
|
107
|
+
* deletes arrive as tombstone refs (`toCanonical` may flag them).
|
|
108
|
+
*
|
|
109
|
+
* Cursor emission honors `cursorDivisible` (RFC-0003 §3). Divisible: each
|
|
110
|
+
* record carries its own per-ref cursor. Atomic: per-ref cursors are withheld
|
|
111
|
+
* (`undefined`, which the orchestrator skips persisting) and the end-of-walk
|
|
112
|
+
* token rides only on the final record — so a mid-walk crash never persists
|
|
113
|
+
* an unresumable token. If an atomic run yields no surviving records, no
|
|
114
|
+
* cursor is persisted and the next run re-reads the same (empty) delta — a
|
|
115
|
+
* bounded inefficiency, never data loss.
|
|
116
|
+
*/
|
|
117
|
+
async *listChanges(subscription, cursor) {
|
|
118
|
+
const mode = cursor === null || cursor === void 0 ? { kind: "full" } : { kind: "delta", cursor };
|
|
119
|
+
const filter = this.filterFor(subscription);
|
|
120
|
+
const stream = this.read({ mode, filter }, { subscription });
|
|
121
|
+
if (this.cursorDivisible) {
|
|
122
|
+
for await (const sourced of stream) {
|
|
123
|
+
yield this.toChange(sourced, sourced.cursor);
|
|
124
|
+
}
|
|
125
|
+
return;
|
|
126
|
+
}
|
|
127
|
+
let prev = null;
|
|
128
|
+
for await (const sourced of stream) {
|
|
129
|
+
if (prev !== null) yield this.toChange(prev, void 0);
|
|
130
|
+
prev = sourced;
|
|
131
|
+
}
|
|
132
|
+
if (prev !== null) yield this.toChange(prev, prev.cursor);
|
|
133
|
+
}
|
|
134
|
+
/** Stamp a `SourcedRecord` into a `Change<T>` with an explicit emitted cursor. */
|
|
135
|
+
toChange(sourced, cursor) {
|
|
136
|
+
return {
|
|
137
|
+
externalId: sourced.externalId,
|
|
138
|
+
operation: "updated",
|
|
139
|
+
record: sourced.record,
|
|
140
|
+
cursor,
|
|
141
|
+
source: this.changeSource
|
|
142
|
+
};
|
|
143
|
+
}
|
|
144
|
+
};
|
|
145
|
+
export {
|
|
146
|
+
IncrementalReadBase,
|
|
147
|
+
mapConcurrent
|
|
148
|
+
};
|
|
149
|
+
//# sourceMappingURL=incremental-read.js.map
|