@pattern-stack/codegen 0.12.2 → 0.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@pattern-stack/codegen",
3
- "version": "0.12.2",
3
+ "version": "0.13.0",
4
4
  "description": "Entity-driven code generation for full-stack TypeScript applications",
5
5
  "license": "MIT",
6
6
  "type": "module",
@@ -63,8 +63,42 @@ export {
63
63
  export type {
64
64
  IEntityChangeSourceRegistry,
65
65
  IChangeSource,
66
+ IntegrationSubscriptionView,
66
67
  } from './integration';
67
68
 
69
+ // Integration — IncrementalRead read primitive (RFC-0003 R1). Re-exported here
70
+ // so surface packages can author enumerate/hydrate adapters across the package
71
+ // boundary via @pattern-stack/codegen/subsystems. ResolvedFilter rides along:
72
+ // the R3 read-primitive scaffold imports it for its static `detection.filters`
73
+ // const and the `F = ResolvedFilter[]` type parameter.
74
+ export {
75
+ CURSOR_DIVISIBILITY,
76
+ IncrementalReadBase,
77
+ isDivisibleCursor,
78
+ mapConcurrent,
79
+ } from './integration';
80
+ export type {
81
+ IncrementalRead,
82
+ RandomRead,
83
+ ReadMode,
84
+ ReadRequest,
85
+ Ref,
86
+ ResolvedFilter,
87
+ SourcedRecord,
88
+ } from './integration';
89
+
90
+ // Integration — assembly emission (RFC-0002). The generated per-entity sink
91
+ // imports `IIntegrationSink`; the generated per-entity assembly module imports
92
+ // `ExecuteIntegrationUseCase` + `INTEGRATION_CHANGE_SOURCE` + `INTEGRATION_SINK`
93
+ // — all from `@pattern-stack/codegen/subsystems`. Forwarded here so the emitted
94
+ // `src/integrations/**` tree resolves them across the package boundary.
95
+ export {
96
+ ExecuteIntegrationUseCase,
97
+ INTEGRATION_CHANGE_SOURCE,
98
+ INTEGRATION_SINK,
99
+ } from './integration';
100
+ export type { IIntegrationSink } from './integration';
101
+
68
102
  // Auth
69
103
  export {
70
104
  ENCRYPTION_KEY,
@@ -85,15 +85,70 @@ const EventIdCursorSchema = z.object({
85
85
  field: z.string().min(1),
86
86
  });
87
87
 
88
+ /**
89
+ * Gmail `historyId` (RFC-0003 §3) — an opaque, atomic vendor token. The next
90
+ * watermark only exists at end-of-walk; there is no resumable mid-walk value.
91
+ * `field` is metadata for codegen/adapters (the response key the token lives on).
92
+ */
93
+ const HistoryIdCursorSchema = z.object({
94
+ kind: z.literal('historyId'),
95
+ field: z.string().min(1),
96
+ });
97
+
98
+ /**
99
+ * Google Calendar `syncToken` (RFC-0003 §3) — an opaque, atomic sync token,
100
+ * same divisibility profile as `historyId`.
101
+ */
102
+ const SyncTokenCursorSchema = z.object({
103
+ kind: z.literal('syncToken'),
104
+ field: z.string().min(1),
105
+ });
106
+
88
107
  export const CursorStrategySchema = z.discriminatedUnion('kind', [
89
108
  SystemModstampCursorSchema,
90
109
  ReplayIdCursorSchema,
91
110
  TimestampCursorSchema,
92
111
  EventIdCursorSchema,
112
+ HistoryIdCursorSchema,
113
+ SyncTokenCursorSchema,
93
114
  ]);
94
115
 
95
116
  export type CursorStrategy = z.infer<typeof CursorStrategySchema>;
96
117
 
118
+ // ============================================================================
119
+ // Cursor divisibility (RFC-0003 §3)
120
+ // ============================================================================
121
+
122
+ /**
123
+ * Whether a cursor strategy is *divisible* — a property of the strategy, not
124
+ * the read primitive. Divisible cursors are sortable/monotonic watermarks whose
125
+ * value is meaningful AS OF any single record (HubSpot `systemModstamp`, a
126
+ * `timestamp` field, a Salesforce CDC `replayId`); the read primitive may
127
+ * checkpoint per-ref mid-walk, so a crash resumes from the last delivered ref.
128
+ *
129
+ * Atomic cursors are opaque vendor tokens (Gmail `historyId`, Calendar
130
+ * `syncToken`, a generic `eventId`) whose next value only exists at end-of-walk.
131
+ * The primitive must withhold per-ref cursors and emit the token only at a safe
132
+ * boundary, so an interrupted run never persists an unresumable mid-walk token
133
+ * (it resumes all-or-nothing from the prior token — see `IncrementalReadBase`).
134
+ *
135
+ * `eventId` is classified atomic conservatively: a generic opaque id is treated
136
+ * all-or-nothing unless a concrete strategy proves it monotonically resumable.
137
+ */
138
+ export const CURSOR_DIVISIBILITY: Readonly<Record<CursorStrategy['kind'], boolean>> = {
139
+ systemModstamp: true,
140
+ timestamp: true,
141
+ replayId: true,
142
+ eventId: false,
143
+ historyId: false,
144
+ syncToken: false,
145
+ };
146
+
147
+ /** Predicate form of {@link CURSOR_DIVISIBILITY}. */
148
+ export function isDivisibleCursor(kind: CursorStrategy['kind']): boolean {
149
+ return CURSOR_DIVISIBILITY[kind];
150
+ }
151
+
97
152
  // ============================================================================
98
153
  // Mode-specific blocks
99
154
  // ============================================================================
@@ -0,0 +1,345 @@
1
+ /**
2
+ * Integration subsystem — `IncrementalRead<T, F>` + `RandomRead<T>` capability
3
+ * and the providing `IncrementalReadBase<T, F, M>` (RFC-0003 R1).
4
+ *
5
+ * The universal read primitive. Where `IChangeSource.listChanges` is the
6
+ * *transport* contract (stream `Change<T>`, orchestrator owns cursor lifecycle),
7
+ * this base owns *how the body that produces those changes is written* — the
8
+ * level the bare `changeSources = {}` author-seam left unstructured.
9
+ *
10
+ * The read decomposes into two composable verbs the adapter supplies:
11
+ *
12
+ * - `enumerate(mode, filter) → AsyncIterable<Ref<M>[]>` — the cheap delta /
13
+ * backfill walk; streams pages of lightweight refs (id + per-ref cursor +
14
+ * filterable metadata). LAZY: pull-driven so hydrate backpressures it.
15
+ * - `hydrate(ids) → Map<id, raw>` — the expensive fetch-by-id, batched; where
16
+ * bounded concurrency / a vendor `/batch` endpoint lives. Keyed and
17
+ * miss-tolerant (a mid-run 404 cannot shift alignment).
18
+ * - `toCanonical(raw) → T | null` — provider payload → canonical record.
19
+ *
20
+ * The base PROVIDES the orchestration: drain enumerate, **filter before
21
+ * hydrate** (structural — an adapter physically cannot hydrate-then-discard),
22
+ * keyed pairing, per-ref cursor emission, and the `IChangeSource.listChanges`
23
+ * adaptation. It also provides `RandomRead.get()` for free as
24
+ * `toCanonical ∘ hydrate([id])` — so every incremental adapter is a
25
+ * single-record reader (the "list cheaply, fill on click" query-surface need)
26
+ * without extra code.
27
+ *
28
+ * The shape generalizes dealbrain's proven HubSpot `listSince` (streams, pushes
29
+ * the filter server-side, carries a per-record cursor) to vendors whose list
30
+ * returns id-stubs (Gmail) or nested resources (Meet). Calendar-style
31
+ * full-object lists override `hydrate` as a passthrough.
32
+ *
33
+ * See RFC-0003 (Track D round-3), ADR-033 (`detection:` config), and
34
+ * `poll-change-source.ts` (the sibling primitive this composes beside).
35
+ */
36
+
37
+ import type {
38
+ Change,
39
+ ChangeSource,
40
+ IChangeSource,
41
+ IntegrationSubscriptionView,
42
+ } from './integration-change-source.protocol';
43
+
44
+ // ============================================================================
45
+ // Capability shapes
46
+ // ============================================================================
47
+
48
+ /**
49
+ * How a read walks the upstream. Modes are values, not verbs (swe-brain
50
+ * ADR-0003: mode ≠ capability) — one `read()` verb dispatches on these.
51
+ *
52
+ * - `delta` — incremental walk from a persisted cursor.
53
+ * - `full` — cursorless backfill (optionally bounded by `since`).
54
+ * - `reconcile` — gap-repair: re-fetch a known id set the cursor skipped
55
+ * (the repair pass for the silent-tail-skip + #414-style
56
+ * multi-provider divergence).
57
+ */
58
+ export type ReadMode =
59
+ | { readonly kind: 'delta'; readonly cursor: unknown }
60
+ | { readonly kind: 'full'; readonly since?: Date }
61
+ | { readonly kind: 'reconcile'; readonly knownIds: readonly string[] };
62
+
63
+ /**
64
+ * A cheap ref from the enumerate pass: identity + per-ref cursor + metadata to
65
+ * filter or display on. `cursor` is the position AS OF this ref — see
66
+ * `IncrementalReadBase.cursorDivisible` (R2) for when it may be checkpointed
67
+ * mid-walk versus withheld until a safe boundary.
68
+ */
69
+ export interface Ref<M = Record<string, unknown>> {
70
+ readonly externalId: string;
71
+ readonly cursor: unknown;
72
+ readonly meta: M;
73
+ }
74
+
75
+ /** A read request: the mode, an optional adapter-typed filter, and page size. */
76
+ export interface ReadRequest<F = unknown> {
77
+ readonly mode: ReadMode;
78
+ readonly filter?: F;
79
+ readonly pageSize?: number;
80
+ }
81
+
82
+ /**
83
+ * The `read()`-side envelope: canonical record + the raw vendor payload it came
84
+ * from + the originating external id + the per-ref cursor.
85
+ *
86
+ * Distinct from the runtime's transport envelope `Change<T>`
87
+ * (operation/externalId/cursor/source). The relationship is one-directional:
88
+ * `listChanges()` adapts `read()` → `Change<T>` (dropping `raw`, stamping
89
+ * `operation`). `read()` keeps `raw` and `externalId` so a query surface can
90
+ * re-project without a second fetch.
91
+ */
92
+ export interface SourcedRecord<T> {
93
+ readonly externalId: string;
94
+ readonly record: T;
95
+ readonly raw: unknown;
96
+ readonly cursor: unknown;
97
+ }
98
+
99
+ /**
100
+ * The universal read capability — one public verb that streams. Filtering,
101
+ * hydration, and cursor emission are the providing base's concern.
102
+ */
103
+ export interface IncrementalRead<T, F = unknown> {
104
+ read(req: ReadRequest<F>): AsyncIterable<SourcedRecord<T>>;
105
+ }
106
+
107
+ /**
108
+ * Single-record read by external id — the "fill on click" atom. Provided for
109
+ * free by `IncrementalReadBase` (composes `hydrate` + `toCanonical`); declared
110
+ * as its own capability so consumers can depend on it without the streaming
111
+ * surface.
112
+ */
113
+ export interface RandomRead<T> {
114
+ get(id: string): Promise<T | null>;
115
+ }
116
+
117
+ // ============================================================================
118
+ // Bounded-parallel map helper
119
+ // ============================================================================
120
+
121
+ /**
122
+ * Map `ids` through `fn` with at most `limit` concurrent in-flight calls,
123
+ * collecting results keyed by id. The workhorse for writing a batched
124
+ * `hydrate` over a single-id fetch without serial N+1 latency.
125
+ */
126
+ export async function mapConcurrent<R>(
127
+ ids: readonly string[],
128
+ fn: (id: string) => Promise<R>,
129
+ limit: number,
130
+ ): Promise<Map<string, R>> {
131
+ const out = new Map<string, R>();
132
+ if (ids.length === 0) return out;
133
+ const width = Math.max(1, Math.min(limit, ids.length));
134
+ let next = 0;
135
+ const worker = async (): Promise<void> => {
136
+ while (next < ids.length) {
137
+ const idx = next++;
138
+ const id = ids[idx]!;
139
+ out.set(id, await fn(id));
140
+ }
141
+ };
142
+ await Promise.all(Array.from({ length: width }, worker));
143
+ return out;
144
+ }
145
+
146
+ // ============================================================================
147
+ // IncrementalReadBase
148
+ // ============================================================================
149
+
150
+ /**
151
+ * Providing base for the read capability. A subclass fills exactly three vendor
152
+ * methods — `enumerate`, `hydrate`, `toCanonical` — and gets a streaming,
153
+ * filter-before-hydrate, miss-tolerant `IncrementalRead<T, F>` +
154
+ * `IChangeSource<T>` + `RandomRead<T>`.
155
+ *
156
+ * Type params: `T` canonical record, `F` adapter-typed filter, `M` per-ref
157
+ * metadata (defaults to an untyped bag — surface packages supply a domain `M`).
158
+ */
159
+ export abstract class IncrementalReadBase<T, F = unknown, M = Record<string, unknown>>
160
+ implements IncrementalRead<T, F>, IChangeSource<T>, RandomRead<T>
161
+ {
162
+ /** Human label for run logs — e.g. `'google-mail-email'`. */
163
+ abstract readonly label: string;
164
+
165
+ /**
166
+ * Whether the vendor takes the request predicate server-side. Declared, not
167
+ * enforced here — surfaced into the emission manifest (R3) so the falsifier
168
+ * suite (R4) can record which adapters filter post-hydrate. `false` is the
169
+ * honest floor (e.g. Gmail without `q=`), handled via `matchesRecord`.
170
+ */
171
+ protected readonly filterPushdown: boolean = false;
172
+
173
+ /** Max concurrent in-flight calls for a `mapConcurrent`-built `hydrate`. */
174
+ protected readonly hydrateConcurrency: number = 10;
175
+
176
+ /** `Change<T>.source` provenance stamped by `listChanges`. */
177
+ protected readonly changeSource: ChangeSource = 'poll';
178
+
179
+ /**
180
+ * Whether this source's cursor strategy is divisible (RFC-0003 §3). When
181
+ * `true` (default — sortable watermarks like `systemModstamp`/`timestamp`/
182
+ * `replayId`), `listChanges` emits each record's per-ref cursor, so the
183
+ * orchestrator may checkpoint mid-walk and a crash resumes from the last
184
+ * delivered ref.
185
+ *
186
+ * When `false` (atomic opaque tokens — Gmail `historyId`, Calendar
187
+ * `syncToken`), `listChanges` WITHHOLDS per-ref cursors and emits the
188
+ * end-of-walk token only on the final record, so the orchestrator's
189
+ * persist-last-yielded lifecycle can never persist an unresumable mid-walk
190
+ * token. The cost is blast-radius: an interrupted atomic run resumes
191
+ * all-or-nothing from the prior persisted token. For atomic *backfills* that
192
+ * radius is the whole enumerate walk — bound it with `ReadRequest.pageSize`
193
+ * (smaller pages ⇒ shorter walks per run). Per-page atomic checkpointing is a
194
+ * future refinement; R2 gates at end-of-walk.
195
+ *
196
+ * Codegen (R3) sets this from the strategy kind via `isDivisibleCursor`.
197
+ */
198
+ protected readonly cursorDivisible: boolean = true;
199
+
200
+ // ---- SUPPLIED by the adapter (the irreducible vendor seam) ----
201
+
202
+ /**
203
+ * The cheap walk. Streams pages of refs; LAZY so `hydrate` backpressures it
204
+ * (one page hydrated before the next is pulled). Mode-dispatch lives here:
205
+ * `delta` resumes from `mode.cursor`, `full` walks from the top, `reconcile`
206
+ * re-fetches `mode.knownIds`.
207
+ *
208
+ * `pageSize` (from `ReadRequest`) is the adapter's requested vendor page size
209
+ * — also the atomic-cursor backfill blast-radius bound (§ `cursorDivisible`).
210
+ * Honor it as a hint; vendors that cap page size clamp it.
211
+ */
212
+ protected abstract enumerate(
213
+ mode: ReadMode,
214
+ filter?: F,
215
+ pageSize?: number,
216
+ ): AsyncIterable<Ref<M>[]>;
217
+
218
+ /**
219
+ * Fetch raw payloads for `ids`, keyed by id. MUST be miss-tolerant: omit (or
220
+ * map to `null`) any id that 404s mid-run rather than throwing or shifting
221
+ * alignment. Write it over `mapConcurrent(ids, (id) => this.fetchOne(id),
222
+ * this.hydrateConcurrency)`; override with a real `/batch` call or a
223
+ * passthrough (full-object list) where the vendor allows.
224
+ */
225
+ protected abstract hydrate(ids: string[]): Promise<Map<string, unknown>>;
226
+
227
+ /** Provider payload → canonical record. Return `null` to drop a record. */
228
+ protected abstract toCanonical(raw: unknown): T | null;
229
+
230
+ // ---- Optional filter hooks — exactly one is live per `filterPushdown` ----
231
+
232
+ /** Pre-hydrate predicate over the cheap ref (preferred — avoids hydration). */
233
+ protected matchesRef(_ref: Ref<M>, _filter?: F): boolean {
234
+ return true;
235
+ }
236
+
237
+ /** Post-hydrate predicate over the canonical record (the no-pushdown floor). */
238
+ protected matchesRecord(_record: T, _filter?: F): boolean {
239
+ return true;
240
+ }
241
+
242
+ /**
243
+ * Resolve the filter for a subscription when adapting to `listChanges`
244
+ * (which has no filter argument). Defaults to none; codegen wiring (R3)
245
+ * overrides this to thread `DetectionConfig.filters`.
246
+ */
247
+ protected filterFor(_subscription: IntegrationSubscriptionView): F | undefined {
248
+ return undefined;
249
+ }
250
+
251
+ // ---- PROVIDED by the base ----
252
+
253
+ /**
254
+ * Stream canonical records for a request. Filter is applied BEFORE hydrate
255
+ * (structural: a kept ref is hydrated, a rejected one never is), so an
256
+ * adapter cannot hydrate-then-discard. A hydrate miss (deleted mid-run) is
257
+ * skipped, never fabricated.
258
+ */
259
+ async *read(req: ReadRequest<F>): AsyncIterable<SourcedRecord<T>> {
260
+ for await (const refPage of this.enumerate(req.mode, req.filter, req.pageSize)) {
261
+ const kept = refPage.filter((ref) => this.matchesRef(ref, req.filter));
262
+ if (kept.length === 0) continue;
263
+ const raws = await this.hydrate(kept.map((ref) => ref.externalId));
264
+ for (const ref of kept) {
265
+ const raw = raws.get(ref.externalId);
266
+ if (raw === undefined || raw === null) continue; // deleted mid-run → skip
267
+ const record = this.toCanonical(raw);
268
+ if (record !== null && this.matchesRecord(record, req.filter)) {
269
+ yield { externalId: ref.externalId, record, raw, cursor: ref.cursor };
270
+ }
271
+ }
272
+ }
273
+ }
274
+
275
+ /**
276
+ * `RandomRead<T>` — single-record read, provided for free as
277
+ * `toCanonical ∘ hydrate([id])`. Reuses the adapter's batched fetch + miss
278
+ * tolerance; returns `null` for a missing or undecodable record.
279
+ */
280
+ async get(id: string): Promise<T | null> {
281
+ const raws = await this.hydrate([id]);
282
+ const raw = raws.get(id);
283
+ if (raw === undefined || raw === null) return null;
284
+ return this.toCanonical(raw);
285
+ }
286
+
287
+ /**
288
+ * `IChangeSource<T>` adaptation. Maps the orchestrator's by-value cursor to a
289
+ * `ReadMode` (`null` → `full` backfill, else `delta`), streams `read()`, and
290
+ * stamps each `SourcedRecord` into a `Change<T>`. All records surface as
291
+ * `'updated'`; the orchestrator's diff stage classifies create-vs-update and
292
+ * deletes arrive as tombstone refs (`toCanonical` may flag them).
293
+ *
294
+ * Cursor emission honors `cursorDivisible` (RFC-0003 §3). Divisible: each
295
+ * record carries its own per-ref cursor. Atomic: per-ref cursors are withheld
296
+ * (`undefined`, which the orchestrator skips persisting) and the end-of-walk
297
+ * token rides only on the final record — so a mid-walk crash never persists
298
+ * an unresumable token. If an atomic run yields no surviving records, no
299
+ * cursor is persisted and the next run re-reads the same (empty) delta — a
300
+ * bounded inefficiency, never data loss.
301
+ */
302
+ async *listChanges(
303
+ subscription: IntegrationSubscriptionView,
304
+ cursor: unknown | null,
305
+ ): AsyncIterable<Change<T>> {
306
+ const mode: ReadMode =
307
+ cursor === null || cursor === undefined
308
+ ? { kind: 'full' }
309
+ : { kind: 'delta', cursor };
310
+ const filter = this.filterFor(subscription);
311
+ const stream = this.read({ mode, filter });
312
+
313
+ if (this.cursorDivisible) {
314
+ for await (const sourced of stream) {
315
+ yield this.toChange(sourced, sourced.cursor);
316
+ }
317
+ return;
318
+ }
319
+
320
+ // Atomic: one-record lookahead. Emit every record but the last with a
321
+ // withheld (`undefined`) cursor; the last record carries the end-of-walk
322
+ // token. Contract: an atomic adapter stamps the (single, shared) end-of-walk
323
+ // token onto its refs' `cursor` — so whichever record survives last carries
324
+ // it. The base emits a real cursor exactly once, on that final record, so the
325
+ // orchestrator can never persist a mid-walk value. If zero records survive,
326
+ // nothing is persisted (next run re-reads the delta — bounded, never lossy).
327
+ let prev: SourcedRecord<T> | null = null;
328
+ for await (const sourced of stream) {
329
+ if (prev !== null) yield this.toChange(prev, undefined);
330
+ prev = sourced;
331
+ }
332
+ if (prev !== null) yield this.toChange(prev, prev.cursor);
333
+ }
334
+
335
+ /** Stamp a `SourcedRecord` into a `Change<T>` with an explicit emitted cursor. */
336
+ private toChange(sourced: SourcedRecord<T>, cursor: unknown): Change<T> {
337
+ return {
338
+ externalId: sourced.externalId,
339
+ operation: 'updated',
340
+ record: sourced.record,
341
+ cursor,
342
+ source: this.changeSource,
343
+ };
344
+ }
345
+ }
@@ -55,9 +55,11 @@ export { MemoryEntityChangeSourceRegistry } from './entity-change-source-registr
55
55
  // DetectionConfig (#226-1) — Zod schema + inferred types; canonical source
56
56
  // of filter/mapping shape consumed by primitives + codegen YAML validator
57
57
  export {
58
+ CURSOR_DIVISIBILITY,
58
59
  CursorStrategySchema,
59
60
  DetectionConfigSchema,
60
61
  FieldMappingSchema,
62
+ isDivisibleCursor,
61
63
  PollDetectionSchema,
62
64
  ResolvedFilterSchema,
63
65
  WebhookDetectionSchema,
@@ -92,6 +94,19 @@ export {
92
94
  type PollFetchContext,
93
95
  } from './poll-change-source';
94
96
 
97
+ // IncrementalRead primitive (RFC-0003 R1) — enumerate/hydrate read capability;
98
+ // the providing base emits a streaming, filter-before-hydrate IChangeSource<T>.
99
+ export {
100
+ IncrementalReadBase,
101
+ mapConcurrent,
102
+ type IncrementalRead,
103
+ type RandomRead,
104
+ type ReadMode,
105
+ type ReadRequest,
106
+ type Ref,
107
+ type SourcedRecord,
108
+ } from './incremental-read';
109
+
95
110
  // Webhook primitive (#226-4) — generic webhook-mode IChangeSource<T>
96
111
  // driven by a consumer-owned inbound staging queue iterator
97
112
  export {