@indigoai-us/hq-cloud 5.26.0 → 5.27.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/workflows/ci.yml +34 -0
- package/dist/bin/sync-runner.d.ts +38 -0
- package/dist/bin/sync-runner.d.ts.map +1 -1
- package/dist/bin/sync-runner.js +75 -1
- package/dist/bin/sync-runner.js.map +1 -1
- package/dist/index.d.ts +4 -2
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +4 -1
- package/dist/index.js.map +1 -1
- package/dist/sync/feature-flags.d.ts +136 -0
- package/dist/sync/feature-flags.d.ts.map +1 -0
- package/dist/sync/feature-flags.js +160 -0
- package/dist/sync/feature-flags.js.map +1 -0
- package/dist/sync/feature-flags.test.d.ts +24 -0
- package/dist/sync/feature-flags.test.d.ts.map +1 -0
- package/dist/sync/feature-flags.test.js +330 -0
- package/dist/sync/feature-flags.test.js.map +1 -0
- package/dist/sync/index.d.ts +10 -2
- package/dist/sync/index.d.ts.map +1 -1
- package/dist/sync/index.js +5 -1
- package/dist/sync/index.js.map +1 -1
- package/dist/sync/logger.d.ts +61 -0
- package/dist/sync/logger.d.ts.map +1 -0
- package/dist/sync/logger.js +51 -0
- package/dist/sync/logger.js.map +1 -0
- package/dist/sync/logger.test.d.ts +19 -0
- package/dist/sync/logger.test.d.ts.map +1 -0
- package/dist/sync/logger.test.js +199 -0
- package/dist/sync/logger.test.js.map +1 -0
- package/dist/sync/metrics.d.ts +89 -0
- package/dist/sync/metrics.d.ts.map +1 -0
- package/dist/sync/metrics.js +105 -0
- package/dist/sync/metrics.js.map +1 -0
- package/dist/sync/metrics.test.d.ts +19 -0
- package/dist/sync/metrics.test.d.ts.map +1 -0
- package/dist/sync/metrics.test.js +280 -0
- package/dist/sync/metrics.test.js.map +1 -0
- package/dist/sync/push-receiver.d.ts +442 -0
- package/dist/sync/push-receiver.d.ts.map +1 -0
- package/dist/sync/push-receiver.js +782 -0
- package/dist/sync/push-receiver.js.map +1 -0
- package/dist/sync/push-receiver.test.d.ts +25 -0
- package/dist/sync/push-receiver.test.d.ts.map +1 -0
- package/dist/sync/push-receiver.test.js +477 -0
- package/dist/sync/push-receiver.test.js.map +1 -0
- package/dist/sync/push-transport.d.ts +84 -1
- package/dist/sync/push-transport.d.ts.map +1 -1
- package/dist/sync/push-transport.js +84 -0
- package/dist/sync/push-transport.js.map +1 -1
- package/dist/watcher.d.ts +113 -2
- package/dist/watcher.d.ts.map +1 -1
- package/dist/watcher.js +204 -25
- package/dist/watcher.js.map +1 -1
- package/package.json +9 -5
- package/src/bin/sync-runner.ts +102 -1
- package/src/index.ts +21 -0
- package/src/sync/feature-flags.test.ts +392 -0
- package/src/sync/feature-flags.ts +229 -0
- package/src/sync/index.ts +57 -2
- package/src/sync/logger.test.ts +241 -0
- package/src/sync/logger.ts +79 -0
- package/src/sync/metrics.test.ts +380 -0
- package/src/sync/metrics.ts +158 -0
- package/src/sync/push-receiver.test.ts +545 -0
- package/src/sync/push-receiver.ts +1077 -0
- package/src/sync/push-transport.ts +148 -1
- package/src/watcher.ts +299 -17
- package/test/e2e/sync/cross-tenant-isolation.test.ts +502 -0
- package/test/e2e/watcher-real-chokidar.test.ts +105 -0
|
@@ -0,0 +1,1077 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* PushReceiver — inbound subscription seam for the hq-cloud watcher daemon
|
|
3
|
+
* (project event-driven-sync-menubar US-009).
|
|
4
|
+
*
|
|
5
|
+
* Mirrors {@link PushTransport} (`./push-transport.ts`) but for the opposite
|
|
6
|
+
* direction of travel: where the transport SHIPS local file changes out to the
|
|
7
|
+
* cloud, the receiver SUBSCRIBES to the tenant fanout and triggers an
|
|
8
|
+
* immediate, TARGETED local pull the moment a peer device of the same tenant
|
|
9
|
+
* publishes a change. Together they form the event-driven primary path; the
|
|
10
|
+
* existing `--poll-remote-ms` poll in `runRunnerWithLoop` is the safety net
|
|
11
|
+
* behind both.
|
|
12
|
+
*
|
|
13
|
+
* Transport: SNS → per-client SQS (US-000 decision)
|
|
14
|
+
* ─────────────────────────────────────────────────
|
|
15
|
+
* Per the US-000 transport investigation (companies/indigo/projects/
|
|
16
|
+
* event-driven-sync-menubar/references.md): reuse PR #112's SNS publish +
|
|
17
|
+
* DynamoDB catch-up log, and build the client RECEIVE side as a per-client
|
|
18
|
+
* SQS queue subscribed to `sync-push-{tenantId}`. The receiver long-polls its
|
|
19
|
+
* own queue, decodes each message body as a {@link PushEvent}, dedupes by
|
|
20
|
+
* `sequenceNumber` per `relativePath`, and bridges into the existing sync
|
|
21
|
+
* engine via an injected {@link SyncEngineFn} (→ targeted `runRunner` pull).
|
|
22
|
+
*
|
|
23
|
+
* The live queue is NOT provisioned yet (the server SQS-provisioning Lambda is
|
|
24
|
+
* an unbuilt follow-up — see references.md "Open items handed to the plan").
|
|
25
|
+
* So this module ships:
|
|
26
|
+
* - {@link SqsClientLike} — the narrow SQS surface the receiver depends on
|
|
27
|
+
* (`receiveMessage` / `deleteMessage`). Production callers pass an
|
|
28
|
+
* `@aws-sdk/client-sqs` `SQSClient` adapted to this shape; unit tests inject
|
|
29
|
+
* a fake. NO real AWS is required to exercise the receiver.
|
|
30
|
+
* - {@link SqsPushReceiver} — the real receiver. Long-polls the queue,
|
|
31
|
+
* dispatches each event through the shared dedupe path, deletes the message
|
|
32
|
+
* on successful handoff, and reconnects on transient `receiveMessage`
|
|
33
|
+
* failures with exponential backoff. SQS's own 14-day retention buffers
|
|
34
|
+
* messages while the device is offline → reconnect-replay is "free": on
|
|
35
|
+
* reconnect the poll loop simply resumes and the retained messages are
|
|
36
|
+
* redelivered, then dedupe skips anything already processed.
|
|
37
|
+
* - {@link NoopPushReceiver} — the dormant default. Flips `connected` on
|
|
38
|
+
* start, opens no subscription. Wired when the daemon runs without a real
|
|
39
|
+
* queue (or when the feature flag is OFF).
|
|
40
|
+
* - {@link createPushReceiver} — factory the daemon uses; returns the noop
|
|
41
|
+
* unless an SQS client + queue URL are supplied.
|
|
42
|
+
*
|
|
43
|
+
* Lifecycle (mirrors PushTransport)
|
|
44
|
+
* ─────────────────────────────────
|
|
45
|
+
* - `start()` opens the subscription (begins the long-poll loop). Awaited
|
|
46
|
+
* AFTER the watcher starts so a synthetic event can't race a half-built
|
|
47
|
+
* daemon. When the feature flag is OFF, `start()` is a no-op and `connected`
|
|
48
|
+
* stays false — NO queue is polled (dormant; AC#4).
|
|
49
|
+
* - On each received message: validate with {@link decodePushEvent} (defense
|
|
50
|
+
* in depth at the wire boundary), dedupe by `relativePath` against the
|
|
51
|
+
* highest `sequenceNumber` seen for that path, then call the injected
|
|
52
|
+
* {@link SyncEngineFn}. The sync engine is an injected seam — this story
|
|
53
|
+
* does NOT re-implement download logic; it bridges to `runRunner` pull.
|
|
54
|
+
* - `dispose()` aborts in-flight via AbortController, stops the poll loop,
|
|
55
|
+
* awaits the in-flight sync up to a drain deadline, then disconnects.
|
|
56
|
+
*
|
|
57
|
+
* Dedupe (AC#3)
|
|
58
|
+
* ─────────────
|
|
59
|
+
* A per-`relativePath` map of the highest `sequenceNumber` already passed to
|
|
60
|
+
* `syncFn`. An incoming event with `sequenceNumber <= seen` is skipped. SQS
|
|
61
|
+
* at-least-once delivery + reconnect-replay means the SAME event can arrive
|
|
62
|
+
* twice; dedupe makes that idempotent.
|
|
63
|
+
*
|
|
64
|
+
* Disconnect / reconnect with catch-up replay (AC#3/#4)
|
|
65
|
+
* ─────────────────────────────────────────────────────
|
|
66
|
+
* `receiveMessage` failures (network blip, throttling) are caught; the loop
|
|
67
|
+
* backs off (exponential + jitter, capped) and resumes. Because the per-client
|
|
68
|
+
* SQS queue retains undelivered messages for 14 days, anything published while
|
|
69
|
+
* the device was offline/disconnected is redelivered when the poll resumes —
|
|
70
|
+
* catch-up replay with no server round-trip. Redelivered-but-already-processed
|
|
71
|
+
* events are absorbed by the dedupe path. The in-memory fake's
|
|
72
|
+
* `simulateDisconnect()` / `simulateReconnect()` model exactly this buffering.
|
|
73
|
+
*
|
|
74
|
+
* Feature flag (AC#4)
|
|
75
|
+
* ───────────────────
|
|
76
|
+
* Gated by the per-tenant {@link EventDrivenPushFlagProvider} (US-008's
|
|
77
|
+
* feature-flags.ts) — honors `HQ_SYNC_EVENT_DRIVEN_PUSH_ENABLED_TENANTS`
|
|
78
|
+
* (per-tenant) + the legacy global `HQ_SYNC_EVENT_DRIVEN_PUSH_ENABLED=true`.
|
|
79
|
+
* Default DISABLED. Resolution precedence: explicit `enabled` > injected
|
|
80
|
+
* `flagProvider.isEnabled(tenantId)` > default env-driven provider.
|
|
81
|
+
*
|
|
82
|
+
* Cross-tenant isolation (US-010)
|
|
83
|
+
* ───────────────────────────────
|
|
84
|
+
* Each receiver instance binds exactly ONE `tenantId` and polls exactly ONE
|
|
85
|
+
* queue URL (its own tenant's per-client queue). Isolation is enforced at the
|
|
86
|
+
* subscription boundary — the receiver never reads another tenant's queue, and
|
|
87
|
+
* never filters cross-tenant data post-hoc.
|
|
88
|
+
*
|
|
89
|
+
* @see ./push-transport.ts (the outbound seam this mirrors)
|
|
90
|
+
* @see ./feature-flags.ts (the per-tenant flag provider — US-008)
|
|
91
|
+
* @see ../bin/sync-runner.ts (the wiring site — runRunnerWithLoop)
|
|
92
|
+
* @see companies/indigo/projects/event-driven-sync-menubar/references.md (US-000)
|
|
93
|
+
*
|
|
94
|
+
* Adapted from indigoai-us/hq-pro PR #112 (src/sync/push-receiver.ts) into
|
|
95
|
+
* @indigoai-us/hq-cloud (Path B). The hq-pro source shipped only Noop +
|
|
96
|
+
* InMemory receivers (the production SQS path was deferred there); this module
|
|
97
|
+
* builds the real SQS receiver behind the same lifecycle/dedupe/flag seam.
|
|
98
|
+
*/
|
|
99
|
+
|
|
100
|
+
import { decodePushEvent, type PushEvent } from "./push-event.js";
|
|
101
|
+
import {
|
|
102
|
+
defaultFlagProvider,
|
|
103
|
+
type EventDrivenPushFlagProvider,
|
|
104
|
+
} from "./feature-flags.js";
|
|
105
|
+
import {
|
|
106
|
+
publishSyncLatencyMetric,
|
|
107
|
+
type SyncLatencyMetric,
|
|
108
|
+
} from "./metrics.js";
|
|
109
|
+
|
|
110
|
+
// ─── Constants ─────────────────────────────────────────────────────────────
|
|
111
|
+
|
|
112
|
+
/**
|
|
113
|
+
* How long `dispose()` awaits an in-flight `syncFn` after aborting its signal,
|
|
114
|
+
* before abandoning it (the poll/cadence safety net re-pulls on the next tick).
|
|
115
|
+
*/
|
|
116
|
+
export const DEFAULT_RECEIVER_DISPOSE_DRAIN_MS = 5_000;
|
|
117
|
+
|
|
118
|
+
/** Default SQS long-poll wait (seconds). 20 is the SQS max — true long-poll. */
|
|
119
|
+
export const DEFAULT_WAIT_TIME_SECONDS = 20;
|
|
120
|
+
|
|
121
|
+
/** Default max messages pulled per `receiveMessage` call (SQS max is 10). */
|
|
122
|
+
export const DEFAULT_MAX_MESSAGES = 10;
|
|
123
|
+
|
|
124
|
+
/** Reconnect backoff defaults. */
|
|
125
|
+
export const DEFAULT_RECONNECT_INITIAL_MS = 250;
|
|
126
|
+
export const DEFAULT_RECONNECT_MAX_MS = 30_000;
|
|
127
|
+
|
|
128
|
+
// ─── Narrow SQS surface (the injectable transport seam) ──────────────────────
|
|
129
|
+
|
|
130
|
+
/**
|
|
131
|
+
* One SQS message as the receiver consumes it. A structural subset of the AWS
|
|
132
|
+
* SDK `Message` so a real `SQSClient` response satisfies it without adaptation
|
|
133
|
+
* and tests can build literals.
|
|
134
|
+
*/
|
|
135
|
+
export interface SqsMessageLike {
|
|
136
|
+
/** The message payload — a JSON-encoded {@link PushEvent}. */
|
|
137
|
+
readonly Body?: string;
|
|
138
|
+
/** Opaque handle used to delete the message after successful handoff. */
|
|
139
|
+
readonly ReceiptHandle?: string;
|
|
140
|
+
/** Optional SQS message id (logged for diagnostics). */
|
|
141
|
+
readonly MessageId?: string;
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
/**
|
|
145
|
+
* The narrow SQS client surface the receiver depends on. The AWS SDK
|
|
146
|
+
* `SQSClient` does NOT match this shape directly (it exposes a single
|
|
147
|
+
* `send(command)`); production callers adapt it with a thin wrapper (see
|
|
148
|
+
* {@link sqsClientFromAwsSdk} in the wiring site / tests). Keeping the seam
|
|
149
|
+
* this narrow means unit tests inject a hand-written fake with zero AWS deps.
|
|
150
|
+
*/
|
|
151
|
+
export interface SqsClientLike {
|
|
152
|
+
/**
|
|
153
|
+
* Long-poll the queue. Resolves with zero or more messages. MUST honor the
|
|
154
|
+
* abort signal (resolve/reject promptly on abort) so `dispose()` doesn't
|
|
155
|
+
* block on an in-flight 20s long-poll.
|
|
156
|
+
*/
|
|
157
|
+
receiveMessage(args: {
|
|
158
|
+
queueUrl: string;
|
|
159
|
+
maxMessages: number;
|
|
160
|
+
waitTimeSeconds: number;
|
|
161
|
+
signal: AbortSignal;
|
|
162
|
+
}): Promise<{ messages: SqsMessageLike[] }>;
|
|
163
|
+
|
|
164
|
+
/** Delete a successfully-handled message so it isn't redelivered. */
|
|
165
|
+
deleteMessage(args: {
|
|
166
|
+
queueUrl: string;
|
|
167
|
+
receiptHandle: string;
|
|
168
|
+
}): Promise<void>;
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
// ─── Public types ──────────────────────────────────────────────────────────
|
|
172
|
+
|
|
173
|
+
/**
|
|
174
|
+
* Context handed to {@link SyncEngineFn} on every received event.
|
|
175
|
+
*
|
|
176
|
+
* - `event` — the validated, deduped PushEvent. `relativePath` is what the
|
|
177
|
+
* sync engine pulls; `sequenceNumber` is for observability.
|
|
178
|
+
* - `signal` — aborts when `dispose()` runs past its drain deadline. A
|
|
179
|
+
* well-behaved sync fn checks `signal.aborted` between stages and returns
|
|
180
|
+
* early.
|
|
181
|
+
*/
|
|
182
|
+
export interface PushReceiverContext {
|
|
183
|
+
readonly event: PushEvent;
|
|
184
|
+
readonly signal: AbortSignal;
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
/**
|
|
188
|
+
* The injected sync function. The receiver does NOT perform the actual fetch —
|
|
189
|
+
* it hands off the relativePath to whatever the deployment supplies. In
|
|
190
|
+
* production this bridges to a targeted `runRunner` pull for the affected
|
|
191
|
+
* company/path; in tests it's a fake recording invocations.
|
|
192
|
+
*
|
|
193
|
+
* Errors from `syncFn` are CAUGHT by the receiver — they log and the loop
|
|
194
|
+
* continues. A misbehaving sync fn cannot crash the receiver. The poll/cadence
|
|
195
|
+
* safety net is the recovery path for a thrown pull.
|
|
196
|
+
*/
|
|
197
|
+
export type SyncEngineFn = (ctx: PushReceiverContext) => Promise<void>;
|
|
198
|
+
|
|
199
|
+
/**
|
|
200
|
+
* Best-effort CloudWatch metric publish seam (US-011). Invoked on the
|
|
201
|
+
* receive-SUCCESS path with the measured save-on-A → visible-on-B latency.
|
|
202
|
+
* Defaults to {@link publishSyncLatencyMetric} (the module singleton client);
|
|
203
|
+
* tests inject a spy so no real AWS is touched. The receiver awaits it inside a
|
|
204
|
+
* try/catch — a metric failure can never crash the loop (it's also best-effort
|
|
205
|
+
* inside the default impl).
|
|
206
|
+
*/
|
|
207
|
+
export type PublishMetricFn = (metric: SyncLatencyMetric) => Promise<void>;
|
|
208
|
+
|
|
209
|
+
/** Minimal structured logger. Defaults to a no-op (quiet daemon). */
|
|
210
|
+
export interface ReceiverLogger {
|
|
211
|
+
info(obj: Record<string, unknown>, msg?: string): void;
|
|
212
|
+
warn(obj: Record<string, unknown>, msg?: string): void;
|
|
213
|
+
error(obj: Record<string, unknown>, msg?: string): void;
|
|
214
|
+
debug(obj: Record<string, unknown>, msg?: string): void;
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
const NOOP_LOGGER: ReceiverLogger = {
|
|
218
|
+
info: () => undefined,
|
|
219
|
+
warn: () => undefined,
|
|
220
|
+
error: () => undefined,
|
|
221
|
+
debug: () => undefined,
|
|
222
|
+
};
|
|
223
|
+
|
|
224
|
+
/**
|
|
225
|
+
* Lifecycle handle. Mirrors {@link PushTransport} so daemon wiring is
|
|
226
|
+
* mechanically identical on both sides.
|
|
227
|
+
*/
|
|
228
|
+
export interface PushReceiver {
|
|
229
|
+
/** Open the subscription / poll loop. No-op when the feature flag is OFF. */
|
|
230
|
+
start(): Promise<void>;
|
|
231
|
+
/** Idempotent teardown — stop polling, abort + drain in-flight, disconnect. */
|
|
232
|
+
dispose(): Promise<void>;
|
|
233
|
+
/** Advisory: is the subscription currently believed to be open? */
|
|
234
|
+
readonly connected: boolean;
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
// ─── Noop default ─────────────────────────────────────────────────────────
|
|
238
|
+
|
|
239
|
+
/**
|
|
240
|
+
* Default `PushReceiver` used when no real queue is wired (or the flag is OFF
|
|
241
|
+
* at the factory). `start()` flips `connected` true; `dispose()` flips it
|
|
242
|
+
* false. No subscription work, no events. Mirrors {@link NoopPushTransport}.
|
|
243
|
+
*/
|
|
244
|
+
export class NoopPushReceiver implements PushReceiver {
|
|
245
|
+
private _connected = false;
|
|
246
|
+
|
|
247
|
+
get connected(): boolean {
|
|
248
|
+
return this._connected;
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
async start(): Promise<void> {
|
|
252
|
+
this._connected = true;
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
async dispose(): Promise<void> {
|
|
256
|
+
this._connected = false;
|
|
257
|
+
}
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
// ─── Feature-flag resolution ─────────────────────────────────────────────────
|
|
261
|
+
|
|
262
|
+
/**
|
|
263
|
+
* Resolve the enabled boolean from layered config. Precedence (highest first):
|
|
264
|
+
* 1. `explicit` — `enabled: true|false` wins outright (test contract).
|
|
265
|
+
* 2. `flagProvider.isEnabled(tenantId)` — injected per-tenant provider.
|
|
266
|
+
* 3. default {@link EnvTenantListFlagProvider} from `env ?? process.env`.
|
|
267
|
+
*/
|
|
268
|
+
function resolveEnabled(args: {
|
|
269
|
+
explicit: boolean | undefined;
|
|
270
|
+
flagProvider: EventDrivenPushFlagProvider | undefined;
|
|
271
|
+
tenantId: string;
|
|
272
|
+
env: Record<string, string | undefined> | undefined;
|
|
273
|
+
}): boolean {
|
|
274
|
+
if (args.explicit !== undefined) return args.explicit;
|
|
275
|
+
if (args.flagProvider !== undefined) {
|
|
276
|
+
return args.flagProvider.isEnabled(args.tenantId);
|
|
277
|
+
}
|
|
278
|
+
return defaultFlagProvider(args.env ?? process.env).isEnabled(args.tenantId);
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
// ─── SQS receiver ─────────────────────────────────────────────────────────
|
|
282
|
+
|
|
283
|
+
/**
|
|
284
|
+
* Configuration for {@link SqsPushReceiver}.
|
|
285
|
+
*/
|
|
286
|
+
export interface SqsPushReceiverOptions {
|
|
287
|
+
/**
|
|
288
|
+
* Tenant id this receiver subscribes to. Each instance binds exactly one
|
|
289
|
+
* tenant — cross-tenant isolation is enforced by the subscription boundary
|
|
290
|
+
* (this queue belongs to this tenant), not post-hoc filtering. (US-010)
|
|
291
|
+
*/
|
|
292
|
+
tenantId: string;
|
|
293
|
+
/**
|
|
294
|
+
* The caller's own per-tenant SQS queue URL (minted server-side by the
|
|
295
|
+
* provisioning Lambda and subscribed to `sync-push-{tenantId}`). The
|
|
296
|
+
* receiver polls ONLY this URL.
|
|
297
|
+
*/
|
|
298
|
+
queueUrl: string;
|
|
299
|
+
/** The injected SQS client. Tests pass a fake; production an SDK adapter. */
|
|
300
|
+
sqs: SqsClientLike;
|
|
301
|
+
/**
|
|
302
|
+
* The sync engine that performs the actual targeted pull. The receiver only
|
|
303
|
+
* invokes this; errors are logged + isolated from the loop.
|
|
304
|
+
*/
|
|
305
|
+
syncFn: SyncEngineFn;
|
|
306
|
+
/** Structured logger. Default: a no-op (quiet). */
|
|
307
|
+
logger?: ReceiverLogger;
|
|
308
|
+
/**
|
|
309
|
+
* Feature-flag override — wins over provider + env when set explicitly.
|
|
310
|
+
* When the resolved value is false, `start()` is a no-op, `connected` stays
|
|
311
|
+
* false, and NO queue is polled. (AC#4)
|
|
312
|
+
*/
|
|
313
|
+
enabled?: boolean;
|
|
314
|
+
/** Per-tenant flag provider (US-008). Consulted when `enabled` is unset. */
|
|
315
|
+
flagProvider?: EventDrivenPushFlagProvider;
|
|
316
|
+
/** Env snapshot for flag resolution. Default: `process.env`. */
|
|
317
|
+
env?: Record<string, string | undefined>;
|
|
318
|
+
/** SQS long-poll wait seconds. Default {@link DEFAULT_WAIT_TIME_SECONDS}. */
|
|
319
|
+
waitTimeSeconds?: number;
|
|
320
|
+
/** Max messages per receive. Default {@link DEFAULT_MAX_MESSAGES}. */
|
|
321
|
+
maxMessages?: number;
|
|
322
|
+
/** Max time `dispose()` waits for an in-flight syncFn after abort. */
|
|
323
|
+
disposeDrainMs?: number;
|
|
324
|
+
/** Reconnect backoff config. */
|
|
325
|
+
reconnect?: {
|
|
326
|
+
initialMs?: number;
|
|
327
|
+
maxMs?: number;
|
|
328
|
+
jitter?: boolean;
|
|
329
|
+
};
|
|
330
|
+
/**
|
|
331
|
+
* Sleep seam for backoff (tests inject a fast/abortable sleep). Default:
|
|
332
|
+
* host `setTimeout` that resolves early on abort.
|
|
333
|
+
*/
|
|
334
|
+
sleep?: (ms: number, signal: AbortSignal) => Promise<void>;
|
|
335
|
+
/**
|
|
336
|
+
* Best-effort latency-metric publish (US-011). Called on the receive-success
|
|
337
|
+
* path with the measured save→visible latency. Default:
|
|
338
|
+
* {@link publishSyncLatencyMetric}; tests inject a spy. (AC#1/#3)
|
|
339
|
+
*/
|
|
340
|
+
publishMetric?: PublishMetricFn;
|
|
341
|
+
/**
|
|
342
|
+
* Clock for latency measurement + metric timestamps. Default
|
|
343
|
+
* `() => Date.now()`. Tests inject a fake clock to assert the latency value.
|
|
344
|
+
*/
|
|
345
|
+
now?: () => number;
|
|
346
|
+
}
|
|
347
|
+
|
|
348
|
+
/**
|
|
349
|
+
* Real client `PushReceiver` backed by a per-tenant SQS queue.
|
|
350
|
+
*
|
|
351
|
+
* Poll loop: long-poll `receiveMessage` → for each message, decode + dedupe +
|
|
352
|
+
* dispatch through `syncFn`, then `deleteMessage` on successful handoff. A
|
|
353
|
+
* `receiveMessage` rejection is treated as a transient disconnect: log, back
|
|
354
|
+
* off, resume. SQS retention covers offline catch-up; dedupe covers redelivery.
|
|
355
|
+
*/
|
|
356
|
+
export class SqsPushReceiver implements PushReceiver {
|
|
357
|
+
private readonly tenantId: string;
|
|
358
|
+
private readonly queueUrl: string;
|
|
359
|
+
private readonly sqs: SqsClientLike;
|
|
360
|
+
private readonly syncFn: SyncEngineFn;
|
|
361
|
+
private readonly logger: ReceiverLogger;
|
|
362
|
+
private readonly enabled: boolean;
|
|
363
|
+
private readonly waitTimeSeconds: number;
|
|
364
|
+
private readonly maxMessages: number;
|
|
365
|
+
private readonly disposeDrainMs: number;
|
|
366
|
+
private readonly reconnectInitialMs: number;
|
|
367
|
+
private readonly reconnectMaxMs: number;
|
|
368
|
+
private readonly reconnectJitter: boolean;
|
|
369
|
+
private readonly sleep: (ms: number, signal: AbortSignal) => Promise<void>;
|
|
370
|
+
private readonly publishMetric: PublishMetricFn;
|
|
371
|
+
private readonly now: () => number;
|
|
372
|
+
|
|
373
|
+
private _connected = false;
|
|
374
|
+
private disposed = false;
|
|
375
|
+
private disposing = false;
|
|
376
|
+
private disposePromise: Promise<void> | null = null;
|
|
377
|
+
|
|
378
|
+
/** Abort signal shared by the poll loop + in-flight sync; fired on dispose. */
|
|
379
|
+
private loopAbort: AbortController | null = null;
|
|
380
|
+
/** The running poll loop promise; awaited (best-effort) during dispose. */
|
|
381
|
+
private loopPromise: Promise<void> | null = null;
|
|
382
|
+
/** AbortController for the in-flight syncFn; refreshed each dispatch. */
|
|
383
|
+
private inFlightAbort: AbortController | null = null;
|
|
384
|
+
private inFlightSync: Promise<void> | null = null;
|
|
385
|
+
|
|
386
|
+
/** Per-path highest sequence number already PROCESSED by syncFn. */
|
|
387
|
+
private readonly seenSequencePerPath = new Map<string, number>();
|
|
388
|
+
|
|
389
|
+
private _processedCount = 0;
|
|
390
|
+
private _dedupedCount = 0;
|
|
391
|
+
private _decodeFailureCount = 0;
|
|
392
|
+
private _receiveErrorCount = 0;
|
|
393
|
+
|
|
394
|
+
constructor(opts: SqsPushReceiverOptions) {
|
|
395
|
+
if (!opts.tenantId || opts.tenantId.trim() === "") {
|
|
396
|
+
throw new Error("SqsPushReceiver: tenantId is required");
|
|
397
|
+
}
|
|
398
|
+
if (!opts.queueUrl || opts.queueUrl.trim() === "") {
|
|
399
|
+
throw new Error("SqsPushReceiver: queueUrl is required");
|
|
400
|
+
}
|
|
401
|
+
this.tenantId = opts.tenantId;
|
|
402
|
+
this.queueUrl = opts.queueUrl;
|
|
403
|
+
this.sqs = opts.sqs;
|
|
404
|
+
this.syncFn = opts.syncFn;
|
|
405
|
+
this.logger = opts.logger ?? NOOP_LOGGER;
|
|
406
|
+
this.enabled = resolveEnabled({
|
|
407
|
+
explicit: opts.enabled,
|
|
408
|
+
flagProvider: opts.flagProvider,
|
|
409
|
+
tenantId: opts.tenantId,
|
|
410
|
+
env: opts.env,
|
|
411
|
+
});
|
|
412
|
+
this.waitTimeSeconds = opts.waitTimeSeconds ?? DEFAULT_WAIT_TIME_SECONDS;
|
|
413
|
+
this.maxMessages = opts.maxMessages ?? DEFAULT_MAX_MESSAGES;
|
|
414
|
+
this.disposeDrainMs =
|
|
415
|
+
opts.disposeDrainMs ?? DEFAULT_RECEIVER_DISPOSE_DRAIN_MS;
|
|
416
|
+
this.reconnectInitialMs =
|
|
417
|
+
opts.reconnect?.initialMs ?? DEFAULT_RECONNECT_INITIAL_MS;
|
|
418
|
+
this.reconnectMaxMs = opts.reconnect?.maxMs ?? DEFAULT_RECONNECT_MAX_MS;
|
|
419
|
+
this.reconnectJitter = opts.reconnect?.jitter ?? true;
|
|
420
|
+
this.sleep = opts.sleep ?? defaultSleep;
|
|
421
|
+
this.publishMetric =
|
|
422
|
+
opts.publishMetric ?? ((m) => publishSyncLatencyMetric(m, { logger: undefined }));
|
|
423
|
+
this.now = opts.now ?? (() => Date.now());
|
|
424
|
+
}
|
|
425
|
+
|
|
426
|
+
// ─── PushReceiver surface ────────────────────────────────────────────────
|
|
427
|
+
|
|
428
|
+
get connected(): boolean {
|
|
429
|
+
return this._connected;
|
|
430
|
+
}
|
|
431
|
+
|
|
432
|
+
async start(): Promise<void> {
|
|
433
|
+
if (this.disposed) return;
|
|
434
|
+
if (!this.enabled) {
|
|
435
|
+
this.logger.info(
|
|
436
|
+
{
|
|
437
|
+
event: "receiver.start.disabled",
|
|
438
|
+
tenantId: this.tenantId,
|
|
439
|
+
reason: "feature flag off",
|
|
440
|
+
},
|
|
441
|
+
"push receiver disabled by feature flag",
|
|
442
|
+
);
|
|
443
|
+
return;
|
|
444
|
+
}
|
|
445
|
+
if (this.loopAbort !== null) {
|
|
446
|
+
// Double-start is a no-op — matches PushTransport idempotency posture.
|
|
447
|
+
this.logger.debug(
|
|
448
|
+
{ event: "receiver.start.noop", tenantId: this.tenantId },
|
|
449
|
+
"push receiver already started",
|
|
450
|
+
);
|
|
451
|
+
return;
|
|
452
|
+
}
|
|
453
|
+
|
|
454
|
+
this.loopAbort = new AbortController();
|
|
455
|
+
this._connected = true;
|
|
456
|
+
this.logger.info(
|
|
457
|
+
{ event: "receiver.start", tenantId: this.tenantId, queueUrl: this.queueUrl },
|
|
458
|
+
"push receiver subscribed (sqs long-poll)",
|
|
459
|
+
);
|
|
460
|
+
// Kick the loop off; do NOT await — start() returns once subscribed.
|
|
461
|
+
this.loopPromise = this.pollLoop(this.loopAbort.signal);
|
|
462
|
+
}
|
|
463
|
+
|
|
464
|
+
async dispose(): Promise<void> {
|
|
465
|
+
if (this.disposed) return;
|
|
466
|
+
if (this.disposePromise !== null) return this.disposePromise;
|
|
467
|
+
this.disposing = true;
|
|
468
|
+
|
|
469
|
+
this.disposePromise = (async () => {
|
|
470
|
+
// Stop the poll loop + abort any in-flight long-poll / syncFn.
|
|
471
|
+
try {
|
|
472
|
+
this.loopAbort?.abort();
|
|
473
|
+
} catch {
|
|
474
|
+
/* AbortController.abort never throws on Node; defensive. */
|
|
475
|
+
}
|
|
476
|
+
try {
|
|
477
|
+
this.inFlightAbort?.abort();
|
|
478
|
+
} catch {
|
|
479
|
+
/* defensive */
|
|
480
|
+
}
|
|
481
|
+
|
|
482
|
+
if (this.inFlightSync !== null) {
|
|
483
|
+
const drainDeadline = new Promise<void>((resolve) => {
|
|
484
|
+
const t = setTimeout(resolve, this.disposeDrainMs);
|
|
485
|
+
// Unref so a hung syncFn can't keep the loop alive past process exit.
|
|
486
|
+
(t as { unref?: () => void }).unref?.();
|
|
487
|
+
});
|
|
488
|
+
await Promise.race([
|
|
489
|
+
this.inFlightSync.catch(() => undefined),
|
|
490
|
+
drainDeadline,
|
|
491
|
+
]);
|
|
492
|
+
}
|
|
493
|
+
|
|
494
|
+
// Best-effort: let the poll loop observe the abort and exit.
|
|
495
|
+
if (this.loopPromise !== null) {
|
|
496
|
+
await this.loopPromise.catch(() => undefined);
|
|
497
|
+
}
|
|
498
|
+
|
|
499
|
+
this._connected = false;
|
|
500
|
+
this.disposed = true;
|
|
501
|
+
this.logger.info(
|
|
502
|
+
{
|
|
503
|
+
event: "receiver.stop",
|
|
504
|
+
tenantId: this.tenantId,
|
|
505
|
+
processed: this._processedCount,
|
|
506
|
+
deduped: this._dedupedCount,
|
|
507
|
+
},
|
|
508
|
+
"push receiver stopped",
|
|
509
|
+
);
|
|
510
|
+
})();
|
|
511
|
+
|
|
512
|
+
return this.disposePromise;
|
|
513
|
+
}
|
|
514
|
+
|
|
515
|
+
// ─── Observability ─────────────────────────────────────────────────────────
|
|
516
|
+
|
|
517
|
+
/** Events that passed dedupe AND completed `syncFn` successfully. */
|
|
518
|
+
get processedCount(): number {
|
|
519
|
+
return this._processedCount;
|
|
520
|
+
}
|
|
521
|
+
/** Events skipped by dedupe. */
|
|
522
|
+
get dedupedCount(): number {
|
|
523
|
+
return this._dedupedCount;
|
|
524
|
+
}
|
|
525
|
+
/** Events dropped at the wire-boundary decode step. */
|
|
526
|
+
get decodeFailureCount(): number {
|
|
527
|
+
return this._decodeFailureCount;
|
|
528
|
+
}
|
|
529
|
+
/** `receiveMessage` failures (transient disconnects) the loop recovered from. */
|
|
530
|
+
get receiveErrorCount(): number {
|
|
531
|
+
return this._receiveErrorCount;
|
|
532
|
+
}
|
|
533
|
+
|
|
534
|
+
// ─── Internals ───────────────────────────────────────────────────────────
|
|
535
|
+
|
|
536
|
+
/**
|
|
537
|
+
* The long-poll loop. Runs until the loop abort signal fires (dispose). A
|
|
538
|
+
* `receiveMessage` rejection is a transient disconnect — log, back off,
|
|
539
|
+
* resume. Because the SQS queue retains messages, resuming after a blip
|
|
540
|
+
* replays the gap (catch-up). The loop never throws past this method; it
|
|
541
|
+
* is fire-and-forgotten by `start()` and awaited best-effort by `dispose()`.
|
|
542
|
+
*/
|
|
543
|
+
private async pollLoop(signal: AbortSignal): Promise<void> {
|
|
544
|
+
let attempt = 0;
|
|
545
|
+
while (!signal.aborted) {
|
|
546
|
+
let received: { messages: SqsMessageLike[] };
|
|
547
|
+
try {
|
|
548
|
+
received = await this.sqs.receiveMessage({
|
|
549
|
+
queueUrl: this.queueUrl,
|
|
550
|
+
maxMessages: this.maxMessages,
|
|
551
|
+
waitTimeSeconds: this.waitTimeSeconds,
|
|
552
|
+
signal,
|
|
553
|
+
});
|
|
554
|
+
attempt = 0; // success → reset backoff
|
|
555
|
+
if (received.messages.length > 0) {
|
|
556
|
+
this._connected = true;
|
|
557
|
+
}
|
|
558
|
+
} catch (err) {
|
|
559
|
+
if (signal.aborted) return; // dispose-driven abort — clean exit
|
|
560
|
+
this._receiveErrorCount += 1;
|
|
561
|
+
this._connected = false;
|
|
562
|
+
const backoff = this.computeBackoff(attempt);
|
|
563
|
+
attempt += 1;
|
|
564
|
+
this.logger.warn(
|
|
565
|
+
{
|
|
566
|
+
event: "receiver.receive.failed",
|
|
567
|
+
tenantId: this.tenantId,
|
|
568
|
+
attempt,
|
|
569
|
+
backoffMs: backoff,
|
|
570
|
+
err: { message: (err as Error)?.message },
|
|
571
|
+
},
|
|
572
|
+
"push receiver receiveMessage failed; backing off (catch-up replay on resume)",
|
|
573
|
+
);
|
|
574
|
+
await this.sleep(backoff, signal);
|
|
575
|
+
continue;
|
|
576
|
+
}
|
|
577
|
+
|
|
578
|
+
for (const msg of received.messages) {
|
|
579
|
+
if (signal.aborted) return;
|
|
580
|
+
await this.handleMessage(msg);
|
|
581
|
+
}
|
|
582
|
+
}
|
|
583
|
+
}
|
|
584
|
+
|
|
585
|
+
/**
|
|
586
|
+
* Decode → dedupe → dispatch → delete a single SQS message. Decode failures
|
|
587
|
+
* and syncFn throws are logged + absorbed (never crash the loop). The message
|
|
588
|
+
* is deleted only after a successful handoff so an unprocessed message stays
|
|
589
|
+
* on the queue for redelivery (the dedupe path makes redelivery idempotent).
|
|
590
|
+
*/
|
|
591
|
+
private async handleMessage(msg: SqsMessageLike): Promise<void> {
|
|
592
|
+
if (this.disposing || this.disposed) return;
|
|
593
|
+
|
|
594
|
+
let validated: PushEvent;
|
|
595
|
+
try {
|
|
596
|
+
validated = decodePushEvent(msg.Body ?? "");
|
|
597
|
+
} catch (err) {
|
|
598
|
+
this._decodeFailureCount += 1;
|
|
599
|
+
this.logger.warn(
|
|
600
|
+
{
|
|
601
|
+
event: "receiver.decode.failed",
|
|
602
|
+
tenantId: this.tenantId,
|
|
603
|
+
messageId: msg.MessageId,
|
|
604
|
+
err: { message: (err as Error).message },
|
|
605
|
+
},
|
|
606
|
+
"push receiver dropped event: decode failed",
|
|
607
|
+
);
|
|
608
|
+
// A poison message we can't decode is deleted so it doesn't redeliver
|
|
609
|
+
// forever — it carries no actionable path. (Defense in depth.)
|
|
610
|
+
await this.safeDelete(msg);
|
|
611
|
+
return;
|
|
612
|
+
}
|
|
613
|
+
|
|
614
|
+
const handled = await this.dispatch(validated);
|
|
615
|
+
if (handled) {
|
|
616
|
+
// Delete only after the handoff (success OR dedupe-skip — both mean we
|
|
617
|
+
// don't need this message again). A syncFn throw still counts as handled
|
|
618
|
+
// for delete purposes: the seen-counter advanced, so a redelivery would
|
|
619
|
+
// dedupe; the poll/cadence safety net is the recovery path for the throw.
|
|
620
|
+
await this.safeDelete(msg);
|
|
621
|
+
}
|
|
622
|
+
}
|
|
623
|
+
|
|
624
|
+
/**
|
|
625
|
+
* Dedupe + invoke `syncFn`. Returns true once the event has been accounted
|
|
626
|
+
* for (deduped, or syncFn settled) so the caller can delete the message.
|
|
627
|
+
* Stores the in-flight promise so `dispose()` can drain it.
|
|
628
|
+
*/
|
|
629
|
+
private async dispatch(event: PushEvent): Promise<boolean> {
|
|
630
|
+
if (this.disposing || this.disposed) return false;
|
|
631
|
+
|
|
632
|
+
const seen = this.seenSequencePerPath.get(event.relativePath);
|
|
633
|
+
if (seen !== undefined && event.sequenceNumber <= seen) {
|
|
634
|
+
this._dedupedCount += 1;
|
|
635
|
+
this.logger.debug(
|
|
636
|
+
{
|
|
637
|
+
event: "receiver.event.deduped",
|
|
638
|
+
tenantId: this.tenantId,
|
|
639
|
+
relativePath: event.relativePath,
|
|
640
|
+
sequenceNumber: event.sequenceNumber,
|
|
641
|
+
seen,
|
|
642
|
+
},
|
|
643
|
+
"push receiver deduped event",
|
|
644
|
+
);
|
|
645
|
+
return true;
|
|
646
|
+
}
|
|
647
|
+
|
|
648
|
+
// Record BEFORE invoking syncFn — a back-to-back event for the same path
|
|
649
|
+
// must see this sequence in its dedupe check. The trade-off: a syncFn that
|
|
650
|
+
// throws still advances the counter ("latest we KNOW about"); the safety
|
|
651
|
+
// net poll is the recovery path for the throw.
|
|
652
|
+
this.seenSequencePerPath.set(event.relativePath, event.sequenceNumber);
|
|
653
|
+
|
|
654
|
+
const controller = new AbortController();
|
|
655
|
+
this.inFlightAbort = controller;
|
|
656
|
+
const ctx: PushReceiverContext = { event, signal: controller.signal };
|
|
657
|
+
|
|
658
|
+
const holder: { p: Promise<void> | null } = { p: null };
|
|
659
|
+
const startMs = this.now();
|
|
660
|
+
const p: Promise<void> = (async () => {
|
|
661
|
+
try {
|
|
662
|
+
this.logger.debug(
|
|
663
|
+
{
|
|
664
|
+
event: "receiver.sync.start",
|
|
665
|
+
tenantId: this.tenantId,
|
|
666
|
+
relativePath: event.relativePath,
|
|
667
|
+
sequenceNumber: event.sequenceNumber,
|
|
668
|
+
},
|
|
669
|
+
"push receiver invoking sync engine",
|
|
670
|
+
);
|
|
671
|
+
await this.syncFn(ctx);
|
|
672
|
+
this._processedCount += 1;
|
|
673
|
+
this.logger.debug(
|
|
674
|
+
{
|
|
675
|
+
event: "receiver.sync.completed",
|
|
676
|
+
tenantId: this.tenantId,
|
|
677
|
+
relativePath: event.relativePath,
|
|
678
|
+
sequenceNumber: event.sequenceNumber,
|
|
679
|
+
},
|
|
680
|
+
"push receiver sync completed",
|
|
681
|
+
);
|
|
682
|
+
// ── US-011: 3-log chain (3rd link) + p95 latency metric ──────────
|
|
683
|
+
// Latency = save-on-A (event.eventTimestamp) → visible-on-B (now),
|
|
684
|
+
// falling back to the local syncFn duration if the event timestamp is
|
|
685
|
+
// unparseable. Emitted ONLY on the success path so failed syncs don't
|
|
686
|
+
// skew p95 toward infinity.
|
|
687
|
+
const endMs = this.now();
|
|
688
|
+
const savedAtMs = Date.parse(event.eventTimestamp);
|
|
689
|
+
const latencyMs = Number.isFinite(savedAtMs)
|
|
690
|
+
? Math.max(0, endMs - savedAtMs)
|
|
691
|
+
: Math.max(0, endMs - startMs);
|
|
692
|
+
const latencySeconds = latencyMs / 1000;
|
|
693
|
+
// The 3rd correlated log line — shares `sequenceNumber` with
|
|
694
|
+
// watcher.emit (client push) and push.receive (server).
|
|
695
|
+
this.logger.info(
|
|
696
|
+
{
|
|
697
|
+
event: "fanout.receive",
|
|
698
|
+
tenantId: this.tenantId,
|
|
699
|
+
relativePath: event.relativePath,
|
|
700
|
+
sequenceNumber: event.sequenceNumber,
|
|
701
|
+
latencySeconds,
|
|
702
|
+
},
|
|
703
|
+
"push receiver fanout-receive (round-trip complete)",
|
|
704
|
+
);
|
|
705
|
+
// Best-effort metric. Fire-and-forget so observability never sits on
|
|
706
|
+
// the dispatch critical path (which gates message deletion) — a slow or
|
|
707
|
+
// hung CloudWatch call must not delay the delete/next-poll. Errors are
|
|
708
|
+
// swallowed; publishMetric is itself best-effort.
|
|
709
|
+
void this.emitLatencyMetric({
|
|
710
|
+
tenantId: this.tenantId,
|
|
711
|
+
relativePath: event.relativePath,
|
|
712
|
+
sequenceNumber: event.sequenceNumber,
|
|
713
|
+
latencySeconds,
|
|
714
|
+
timestamp: new Date(endMs),
|
|
715
|
+
});
|
|
716
|
+
} catch (err) {
|
|
717
|
+
// Critical: catch, log, return. A misbehaving sync engine must never
|
|
718
|
+
// crash the receiver loop. The safety-net poll handles eventual
|
|
719
|
+
// consistency for failed pulls.
|
|
720
|
+
const e = err as NodeJS.ErrnoException;
|
|
721
|
+
this.logger.error(
|
|
722
|
+
{
|
|
723
|
+
event: "receiver.sync.failed",
|
|
724
|
+
tenantId: this.tenantId,
|
|
725
|
+
relativePath: event.relativePath,
|
|
726
|
+
sequenceNumber: event.sequenceNumber,
|
|
727
|
+
err: { message: e?.message, code: e?.code },
|
|
728
|
+
},
|
|
729
|
+
"push receiver sync engine threw",
|
|
730
|
+
);
|
|
731
|
+
} finally {
|
|
732
|
+
if (this.inFlightAbort === controller) this.inFlightAbort = null;
|
|
733
|
+
if (this.inFlightSync === holder.p) this.inFlightSync = null;
|
|
734
|
+
}
|
|
735
|
+
})();
|
|
736
|
+
holder.p = p;
|
|
737
|
+
this.inFlightSync = p;
|
|
738
|
+
await p;
|
|
739
|
+
return true;
|
|
740
|
+
}
|
|
741
|
+
|
|
742
|
+
/** Delete a message, swallowing transport errors (redelivery is harmless). */
|
|
743
|
+
private async safeDelete(msg: SqsMessageLike): Promise<void> {
|
|
744
|
+
if (!msg.ReceiptHandle) return;
|
|
745
|
+
try {
|
|
746
|
+
await this.sqs.deleteMessage({
|
|
747
|
+
queueUrl: this.queueUrl,
|
|
748
|
+
receiptHandle: msg.ReceiptHandle,
|
|
749
|
+
});
|
|
750
|
+
} catch (err) {
|
|
751
|
+
this.logger.warn(
|
|
752
|
+
{
|
|
753
|
+
event: "receiver.delete.failed",
|
|
754
|
+
tenantId: this.tenantId,
|
|
755
|
+
messageId: msg.MessageId,
|
|
756
|
+
err: { message: (err as Error)?.message },
|
|
757
|
+
},
|
|
758
|
+
"push receiver failed to delete message (will redeliver; dedupe absorbs)",
|
|
759
|
+
);
|
|
760
|
+
}
|
|
761
|
+
}
|
|
762
|
+
|
|
763
|
+
/**
|
|
764
|
+
* Publish one best-effort latency datum (US-011). Awaits `publishMetric` and
|
|
765
|
+
* swallows any rejection so a metric-backend outage can never reach the poll
|
|
766
|
+
* loop. Called fire-and-forget (`void`) off the dispatch critical path.
|
|
767
|
+
*/
|
|
768
|
+
private async emitLatencyMetric(metric: SyncLatencyMetric): Promise<void> {
|
|
769
|
+
try {
|
|
770
|
+
await this.publishMetric(metric);
|
|
771
|
+
} catch (metricErr) {
|
|
772
|
+
this.logger.warn(
|
|
773
|
+
{
|
|
774
|
+
event: "receiver.metric.failed",
|
|
775
|
+
tenantId: metric.tenantId,
|
|
776
|
+
sequenceNumber: metric.sequenceNumber,
|
|
777
|
+
err: { message: (metricErr as Error)?.message },
|
|
778
|
+
},
|
|
779
|
+
"push receiver failed to publish latency metric (ignored)",
|
|
780
|
+
);
|
|
781
|
+
}
|
|
782
|
+
}
|
|
783
|
+
|
|
784
|
+
/** Exponential backoff (capped) with optional full-jitter. */
|
|
785
|
+
private computeBackoff(attempt: number): number {
|
|
786
|
+
const exp = Math.min(
|
|
787
|
+
this.reconnectMaxMs,
|
|
788
|
+
this.reconnectInitialMs * 2 ** attempt,
|
|
789
|
+
);
|
|
790
|
+
if (!this.reconnectJitter) return exp;
|
|
791
|
+
return Math.floor(Math.random() * exp);
|
|
792
|
+
}
|
|
793
|
+
}
|
|
794
|
+
|
|
795
|
+
// ─── In-memory receiver (unit-test transport analogue) ───────────────────────
|
|
796
|
+
|
|
797
|
+
/**
|
|
798
|
+
* A tiny in-process fanout the {@link InMemoryPushReceiver} subscribes against.
|
|
799
|
+
* Models SNS publish + the per-client SQS queue's disconnect buffering so unit
|
|
800
|
+
* tests can drive the receive path without AWS. `publish` raw strings (the
|
|
801
|
+
* wire form) so decode-failure paths are testable too.
|
|
802
|
+
*/
|
|
803
|
+
export class InMemoryFanout {
|
|
804
|
+
private readonly subscribers = new Set<(raw: string) => void>();
|
|
805
|
+
|
|
806
|
+
subscribe(handler: (raw: string) => void): () => void {
|
|
807
|
+
this.subscribers.add(handler);
|
|
808
|
+
return () => this.subscribers.delete(handler);
|
|
809
|
+
}
|
|
810
|
+
|
|
811
|
+
/** Publish a raw (already-encoded) message body to all subscribers. */
|
|
812
|
+
publish(raw: string): void {
|
|
813
|
+
for (const h of [...this.subscribers]) h(raw);
|
|
814
|
+
}
|
|
815
|
+
}
|
|
816
|
+
|
|
817
|
+
/** Options for {@link InMemoryPushReceiver}. */
|
|
818
|
+
export interface InMemoryPushReceiverOptions {
|
|
819
|
+
tenantId: string;
|
|
820
|
+
fanout: InMemoryFanout;
|
|
821
|
+
syncFn: SyncEngineFn;
|
|
822
|
+
logger?: ReceiverLogger;
|
|
823
|
+
enabled?: boolean;
|
|
824
|
+
flagProvider?: EventDrivenPushFlagProvider;
|
|
825
|
+
env?: Record<string, string | undefined>;
|
|
826
|
+
disposeDrainMs?: number;
|
|
827
|
+
}
|
|
828
|
+
|
|
829
|
+
/**
|
|
830
|
+
* In-memory receiver paired with {@link InMemoryFanout}. Powers the unit
|
|
831
|
+
* tests for dedupe, reconnect-replay, flag gating, and dispose-drain WITHOUT
|
|
832
|
+
* any AWS SDK. The dedupe / dispatch / dispose semantics are identical to
|
|
833
|
+
* {@link SqsPushReceiver} (shared design); the disconnect buffer is the
|
|
834
|
+
* in-process analogue of the per-client SQS queue's 14-day retention.
|
|
835
|
+
*/
|
|
836
|
+
export class InMemoryPushReceiver implements PushReceiver {
|
|
837
|
+
private readonly tenantId: string;
|
|
838
|
+
private readonly fanout: InMemoryFanout;
|
|
839
|
+
private readonly syncFn: SyncEngineFn;
|
|
840
|
+
private readonly logger: ReceiverLogger;
|
|
841
|
+
private readonly enabled: boolean;
|
|
842
|
+
private readonly disposeDrainMs: number;
|
|
843
|
+
|
|
844
|
+
private _connected = false;
|
|
845
|
+
private disposed = false;
|
|
846
|
+
private disposing = false;
|
|
847
|
+
private disposePromise: Promise<void> | null = null;
|
|
848
|
+
private unsubscribe: (() => void) | null = null;
|
|
849
|
+
|
|
850
|
+
private disconnectedFlag = false;
|
|
851
|
+
private readonly pendingDuringDisconnect: PushEvent[] = [];
|
|
852
|
+
private readonly seenSequencePerPath = new Map<string, number>();
|
|
853
|
+
|
|
854
|
+
private inFlightAbort: AbortController | null = null;
|
|
855
|
+
private inFlightSync: Promise<void> | null = null;
|
|
856
|
+
|
|
857
|
+
private _processedCount = 0;
|
|
858
|
+
private _dedupedCount = 0;
|
|
859
|
+
private _decodeFailureCount = 0;
|
|
860
|
+
|
|
861
|
+
constructor(opts: InMemoryPushReceiverOptions) {
|
|
862
|
+
this.tenantId = opts.tenantId;
|
|
863
|
+
this.fanout = opts.fanout;
|
|
864
|
+
this.syncFn = opts.syncFn;
|
|
865
|
+
this.logger = opts.logger ?? NOOP_LOGGER;
|
|
866
|
+
this.disposeDrainMs =
|
|
867
|
+
opts.disposeDrainMs ?? DEFAULT_RECEIVER_DISPOSE_DRAIN_MS;
|
|
868
|
+
this.enabled = resolveEnabled({
|
|
869
|
+
explicit: opts.enabled,
|
|
870
|
+
flagProvider: opts.flagProvider,
|
|
871
|
+
tenantId: opts.tenantId,
|
|
872
|
+
env: opts.env,
|
|
873
|
+
});
|
|
874
|
+
}
|
|
875
|
+
|
|
876
|
+
get connected(): boolean {
|
|
877
|
+
return this._connected;
|
|
878
|
+
}
|
|
879
|
+
|
|
880
|
+
async start(): Promise<void> {
|
|
881
|
+
if (this.disposed) return;
|
|
882
|
+
if (!this.enabled) {
|
|
883
|
+
this.logger.info(
|
|
884
|
+
{ event: "receiver.start.disabled", tenantId: this.tenantId },
|
|
885
|
+
"push receiver disabled by feature flag",
|
|
886
|
+
);
|
|
887
|
+
return;
|
|
888
|
+
}
|
|
889
|
+
if (this.unsubscribe !== null) return; // double-start no-op
|
|
890
|
+
|
|
891
|
+
this.unsubscribe = this.fanout.subscribe((raw) => {
|
|
892
|
+
let validated: PushEvent;
|
|
893
|
+
try {
|
|
894
|
+
validated = decodePushEvent(raw);
|
|
895
|
+
} catch (err) {
|
|
896
|
+
this._decodeFailureCount += 1;
|
|
897
|
+
this.logger.warn(
|
|
898
|
+
{
|
|
899
|
+
event: "receiver.decode.failed",
|
|
900
|
+
tenantId: this.tenantId,
|
|
901
|
+
err: { message: (err as Error).message },
|
|
902
|
+
},
|
|
903
|
+
"push receiver dropped event: decode failed",
|
|
904
|
+
);
|
|
905
|
+
return;
|
|
906
|
+
}
|
|
907
|
+
if (this.disconnectedFlag) {
|
|
908
|
+
this.pendingDuringDisconnect.push(validated);
|
|
909
|
+
return;
|
|
910
|
+
}
|
|
911
|
+
void this.dispatch(validated);
|
|
912
|
+
});
|
|
913
|
+
this._connected = true;
|
|
914
|
+
this.logger.info(
|
|
915
|
+
{ event: "receiver.start", tenantId: this.tenantId },
|
|
916
|
+
"push receiver subscribed (in-memory)",
|
|
917
|
+
);
|
|
918
|
+
}
|
|
919
|
+
|
|
920
|
+
async dispose(): Promise<void> {
|
|
921
|
+
if (this.disposed) return;
|
|
922
|
+
if (this.disposePromise !== null) return this.disposePromise;
|
|
923
|
+
this.disposing = true;
|
|
924
|
+
this.disposePromise = (async () => {
|
|
925
|
+
try {
|
|
926
|
+
this.inFlightAbort?.abort();
|
|
927
|
+
} catch {
|
|
928
|
+
/* defensive */
|
|
929
|
+
}
|
|
930
|
+
if (this.inFlightSync !== null) {
|
|
931
|
+
const drainDeadline = new Promise<void>((resolve) => {
|
|
932
|
+
const t = setTimeout(resolve, this.disposeDrainMs);
|
|
933
|
+
(t as { unref?: () => void }).unref?.();
|
|
934
|
+
});
|
|
935
|
+
await Promise.race([
|
|
936
|
+
this.inFlightSync.catch(() => undefined),
|
|
937
|
+
drainDeadline,
|
|
938
|
+
]);
|
|
939
|
+
}
|
|
940
|
+
if (this.unsubscribe !== null) {
|
|
941
|
+
try {
|
|
942
|
+
this.unsubscribe();
|
|
943
|
+
} catch {
|
|
944
|
+
/* defensive */
|
|
945
|
+
}
|
|
946
|
+
this.unsubscribe = null;
|
|
947
|
+
}
|
|
948
|
+
this._connected = false;
|
|
949
|
+
this.disposed = true;
|
|
950
|
+
this.logger.info(
|
|
951
|
+
{
|
|
952
|
+
event: "receiver.stop",
|
|
953
|
+
tenantId: this.tenantId,
|
|
954
|
+
processed: this._processedCount,
|
|
955
|
+
deduped: this._dedupedCount,
|
|
956
|
+
},
|
|
957
|
+
"push receiver stopped",
|
|
958
|
+
);
|
|
959
|
+
})();
|
|
960
|
+
return this.disposePromise;
|
|
961
|
+
}
|
|
962
|
+
|
|
963
|
+
// ─── Test hooks (model the SQS retention buffer) ────────────────────────────
|
|
964
|
+
|
|
965
|
+
get processedCount(): number {
|
|
966
|
+
return this._processedCount;
|
|
967
|
+
}
|
|
968
|
+
get dedupedCount(): number {
|
|
969
|
+
return this._dedupedCount;
|
|
970
|
+
}
|
|
971
|
+
get decodeFailureCount(): number {
|
|
972
|
+
return this._decodeFailureCount;
|
|
973
|
+
}
|
|
974
|
+
get bufferedCount(): number {
|
|
975
|
+
return this.pendingDuringDisconnect.length;
|
|
976
|
+
}
|
|
977
|
+
|
|
978
|
+
/** Emulate a network blip — events buffer instead of dispatching. */
|
|
979
|
+
simulateDisconnect(): void {
|
|
980
|
+
if (!this.enabled || this.unsubscribe === null) return;
|
|
981
|
+
this.disconnectedFlag = true;
|
|
982
|
+
this._connected = false;
|
|
983
|
+
}
|
|
984
|
+
|
|
985
|
+
/** Emulate reconnect — drain the buffer through the same dedupe path. */
|
|
986
|
+
simulateReconnect(): void {
|
|
987
|
+
if (!this.disconnectedFlag) return;
|
|
988
|
+
this.disconnectedFlag = false;
|
|
989
|
+
this._connected = true;
|
|
990
|
+
const queued = this.pendingDuringDisconnect.splice(0);
|
|
991
|
+
for (const evt of queued) void this.dispatch(evt);
|
|
992
|
+
}
|
|
993
|
+
|
|
994
|
+
private dispatch(event: PushEvent): Promise<void> {
|
|
995
|
+
if (this.disposing || this.disposed) return Promise.resolve();
|
|
996
|
+
|
|
997
|
+
const seen = this.seenSequencePerPath.get(event.relativePath);
|
|
998
|
+
if (seen !== undefined && event.sequenceNumber <= seen) {
|
|
999
|
+
this._dedupedCount += 1;
|
|
1000
|
+
return Promise.resolve();
|
|
1001
|
+
}
|
|
1002
|
+
this.seenSequencePerPath.set(event.relativePath, event.sequenceNumber);
|
|
1003
|
+
|
|
1004
|
+
const controller = new AbortController();
|
|
1005
|
+
this.inFlightAbort = controller;
|
|
1006
|
+
const ctx: PushReceiverContext = { event, signal: controller.signal };
|
|
1007
|
+
const holder: { p: Promise<void> | null } = { p: null };
|
|
1008
|
+
const p: Promise<void> = (async () => {
|
|
1009
|
+
try {
|
|
1010
|
+
await this.syncFn(ctx);
|
|
1011
|
+
this._processedCount += 1;
|
|
1012
|
+
} catch (err) {
|
|
1013
|
+
this.logger.error(
|
|
1014
|
+
{
|
|
1015
|
+
event: "receiver.sync.failed",
|
|
1016
|
+
tenantId: this.tenantId,
|
|
1017
|
+
relativePath: event.relativePath,
|
|
1018
|
+
err: { message: (err as Error)?.message },
|
|
1019
|
+
},
|
|
1020
|
+
"push receiver sync engine threw",
|
|
1021
|
+
);
|
|
1022
|
+
} finally {
|
|
1023
|
+
if (this.inFlightAbort === controller) this.inFlightAbort = null;
|
|
1024
|
+
if (this.inFlightSync === holder.p) this.inFlightSync = null;
|
|
1025
|
+
}
|
|
1026
|
+
})();
|
|
1027
|
+
holder.p = p;
|
|
1028
|
+
this.inFlightSync = p;
|
|
1029
|
+
return p;
|
|
1030
|
+
}
|
|
1031
|
+
}
|
|
1032
|
+
|
|
1033
|
+
// ─── Factory ───────────────────────────────────────────────────────────────
|
|
1034
|
+
|
|
1035
|
+
/**
|
|
1036
|
+
* Build a PushReceiver. The daemon defaults to the noop so wiring is
|
|
1037
|
+
* regression-safe — production deployments wire the SQS impl explicitly once
|
|
1038
|
+
* the server provisioning Lambda mints a queue URL. Mirrors PushTransport's
|
|
1039
|
+
* noop-default opt-in posture.
|
|
1040
|
+
*/
|
|
1041
|
+
export type CreatePushReceiverOptions =
|
|
1042
|
+
| (SqsPushReceiverOptions & { kind?: "sqs" })
|
|
1043
|
+
| { kind: "noop" };
|
|
1044
|
+
|
|
1045
|
+
export function createPushReceiver(
|
|
1046
|
+
opts: CreatePushReceiverOptions,
|
|
1047
|
+
): PushReceiver {
|
|
1048
|
+
if ("kind" in opts && opts.kind === "noop") {
|
|
1049
|
+
return new NoopPushReceiver();
|
|
1050
|
+
}
|
|
1051
|
+
if ("queueUrl" in opts && "sqs" in opts) {
|
|
1052
|
+
return new SqsPushReceiver(opts);
|
|
1053
|
+
}
|
|
1054
|
+
return new NoopPushReceiver();
|
|
1055
|
+
}
|
|
1056
|
+
|
|
1057
|
+
// ─── Helpers ───────────────────────────────────────────────────────────────
|
|
1058
|
+
|
|
1059
|
+
/**
|
|
1060
|
+
* Default sleep that resolves after `ms` OR promptly on abort (so a dispose
|
|
1061
|
+
* during a backoff wait doesn't block). Never rejects.
|
|
1062
|
+
*/
|
|
1063
|
+
function defaultSleep(ms: number, signal: AbortSignal): Promise<void> {
|
|
1064
|
+
return new Promise<void>((resolve) => {
|
|
1065
|
+
if (signal.aborted) return resolve();
|
|
1066
|
+
const t = setTimeout(() => {
|
|
1067
|
+
signal.removeEventListener("abort", onAbort);
|
|
1068
|
+
resolve();
|
|
1069
|
+
}, ms);
|
|
1070
|
+
(t as { unref?: () => void }).unref?.();
|
|
1071
|
+
const onAbort = (): void => {
|
|
1072
|
+
clearTimeout(t);
|
|
1073
|
+
resolve();
|
|
1074
|
+
};
|
|
1075
|
+
signal.addEventListener("abort", onAbort, { once: true });
|
|
1076
|
+
});
|
|
1077
|
+
}
|