@indigoai-us/hq-cloud 5.26.0 → 5.28.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. package/.github/workflows/ci.yml +34 -0
  2. package/dist/bin/sync-runner.d.ts +38 -0
  3. package/dist/bin/sync-runner.d.ts.map +1 -1
  4. package/dist/bin/sync-runner.js +75 -1
  5. package/dist/bin/sync-runner.js.map +1 -1
  6. package/dist/index.d.ts +4 -2
  7. package/dist/index.d.ts.map +1 -1
  8. package/dist/index.js +4 -1
  9. package/dist/index.js.map +1 -1
  10. package/dist/sync/feature-flags.d.ts +136 -0
  11. package/dist/sync/feature-flags.d.ts.map +1 -0
  12. package/dist/sync/feature-flags.js +160 -0
  13. package/dist/sync/feature-flags.js.map +1 -0
  14. package/dist/sync/feature-flags.test.d.ts +24 -0
  15. package/dist/sync/feature-flags.test.d.ts.map +1 -0
  16. package/dist/sync/feature-flags.test.js +330 -0
  17. package/dist/sync/feature-flags.test.js.map +1 -0
  18. package/dist/sync/index.d.ts +10 -2
  19. package/dist/sync/index.d.ts.map +1 -1
  20. package/dist/sync/index.js +5 -1
  21. package/dist/sync/index.js.map +1 -1
  22. package/dist/sync/logger.d.ts +61 -0
  23. package/dist/sync/logger.d.ts.map +1 -0
  24. package/dist/sync/logger.js +51 -0
  25. package/dist/sync/logger.js.map +1 -0
  26. package/dist/sync/logger.test.d.ts +19 -0
  27. package/dist/sync/logger.test.d.ts.map +1 -0
  28. package/dist/sync/logger.test.js +199 -0
  29. package/dist/sync/logger.test.js.map +1 -0
  30. package/dist/sync/metrics.d.ts +89 -0
  31. package/dist/sync/metrics.d.ts.map +1 -0
  32. package/dist/sync/metrics.js +105 -0
  33. package/dist/sync/metrics.js.map +1 -0
  34. package/dist/sync/metrics.test.d.ts +19 -0
  35. package/dist/sync/metrics.test.d.ts.map +1 -0
  36. package/dist/sync/metrics.test.js +280 -0
  37. package/dist/sync/metrics.test.js.map +1 -0
  38. package/dist/sync/push-receiver.d.ts +442 -0
  39. package/dist/sync/push-receiver.d.ts.map +1 -0
  40. package/dist/sync/push-receiver.js +782 -0
  41. package/dist/sync/push-receiver.js.map +1 -0
  42. package/dist/sync/push-receiver.test.d.ts +25 -0
  43. package/dist/sync/push-receiver.test.d.ts.map +1 -0
  44. package/dist/sync/push-receiver.test.js +477 -0
  45. package/dist/sync/push-receiver.test.js.map +1 -0
  46. package/dist/sync/push-transport.d.ts +84 -1
  47. package/dist/sync/push-transport.d.ts.map +1 -1
  48. package/dist/sync/push-transport.js +84 -0
  49. package/dist/sync/push-transport.js.map +1 -1
  50. package/dist/watcher.d.ts +127 -11
  51. package/dist/watcher.d.ts.map +1 -1
  52. package/dist/watcher.js +294 -57
  53. package/dist/watcher.js.map +1 -1
  54. package/package.json +9 -5
  55. package/src/bin/sync-runner.ts +102 -1
  56. package/src/index.ts +21 -0
  57. package/src/sync/feature-flags.test.ts +392 -0
  58. package/src/sync/feature-flags.ts +229 -0
  59. package/src/sync/index.ts +57 -2
  60. package/src/sync/logger.test.ts +241 -0
  61. package/src/sync/logger.ts +79 -0
  62. package/src/sync/metrics.test.ts +380 -0
  63. package/src/sync/metrics.ts +158 -0
  64. package/src/sync/push-receiver.test.ts +545 -0
  65. package/src/sync/push-receiver.ts +1077 -0
  66. package/src/sync/push-transport.ts +148 -1
  67. package/src/watcher.ts +408 -51
  68. package/test/e2e/sync/cross-tenant-isolation.test.ts +502 -0
  69. package/test/e2e/watcher-real-chokidar.test.ts +105 -0
  70. package/test/e2e/watcher-recursive-backend.test.ts +115 -0
@@ -0,0 +1,380 @@
1
+ /**
2
+ * US-011 — unit tests for `src/sync/metrics.ts` + the receive-success-path
3
+ * metric emission wired into {@link SqsPushReceiver}.
4
+ *
5
+ * Mirrors the hq-pro PR #112 metrics test:
6
+ * - mock the CloudWatchClient via `aws-sdk-client-mock`
7
+ * - inject the mocked client via the `_setSyncCloudWatchClient` test seam
8
+ * - assert namespace, metric name, unit, dimensions, value
9
+ * - assert SDK errors are CAUGHT (the receive loop must never crash on a
10
+ * CloudWatch outage)
11
+ *
12
+ * Plus the US-011-specific receiver wiring:
13
+ * - on the receive-SUCCESS path, the receiver publishes exactly one latency
14
+ * datum carrying the event's tenantId + sequenceNumber (mocked publish seam
15
+ * — no real AWS)
16
+ * - a failing publish seam does NOT crash the receiver loop
17
+ */
18
+
19
+ import { Writable } from "node:stream";
20
+
21
+ import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
22
+ import { mockClient } from "aws-sdk-client-mock";
23
+ import {
24
+ CloudWatchClient,
25
+ PutMetricDataCommand,
26
+ } from "@aws-sdk/client-cloudwatch";
27
+
28
+ import {
29
+ SYNC_LATENCY_METRIC_NAME,
30
+ SYNC_METRIC_NAMESPACE,
31
+ _setSyncCloudWatchClient,
32
+ publishSyncLatencyMetric,
33
+ type SyncLatencyMetric,
34
+ } from "./metrics.js";
35
+ import { createLogger } from "./logger.js";
36
+ import { encodePushEvent, type PushEvent } from "./push-event.js";
37
+ import {
38
+ SqsPushReceiver,
39
+ type PublishMetricFn,
40
+ type SqsClientLike,
41
+ type SqsMessageLike,
42
+ } from "./push-receiver.js";
43
+
44
+ // ── Helpers ─────────────────────────────────────────────────────────────────
45
+
46
+ function makeMetric(overrides: Partial<SyncLatencyMetric> = {}): SyncLatencyMetric {
47
+ return {
48
+ tenantId: "tenant-A",
49
+ relativePath: "docs/overview.md",
50
+ sequenceNumber: 42,
51
+ latencySeconds: 1.234,
52
+ timestamp: new Date("2026-05-19T12:00:00.000Z"),
53
+ ...overrides,
54
+ };
55
+ }
56
+
57
+ function captureStream(): { stream: Writable; lines: () => unknown[] } {
58
+ const chunks: string[] = [];
59
+ const stream = new Writable({
60
+ write(chunk, _enc, cb) {
61
+ chunks.push(
62
+ typeof chunk === "string" ? chunk : (chunk as Buffer).toString("utf8"),
63
+ );
64
+ cb();
65
+ },
66
+ });
67
+ return {
68
+ stream,
69
+ lines: () =>
70
+ chunks
71
+ .join("")
72
+ .split("\n")
73
+ .filter((s) => s.length > 0)
74
+ .map((s) => JSON.parse(s) as unknown),
75
+ };
76
+ }
77
+
78
+ const TENANT = "tenant-indigo";
79
+ const QUEUE_URL =
80
+ "https://sqs.us-east-1.amazonaws.com/123456789012/sync-push-indigo-deviceB";
81
+
82
+ function makeEvent(overrides: Partial<PushEvent> = {}): PushEvent {
83
+ return {
84
+ relativePath: "companies/indigo/notes.md",
85
+ contentHash:
86
+ "sha256:e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855",
87
+ mtime: "2026-05-21T12:34:56.000Z",
88
+ originDeviceId: "device-A",
89
+ originTenantId: TENANT,
90
+ sequenceNumber: 7,
91
+ eventTimestamp: "2026-05-21T12:34:56.000Z",
92
+ ...overrides,
93
+ };
94
+ }
95
+
96
+ /** Minimal fake SQS that drains one batch then idles until abort. */
97
+ class OneBatchSqs implements SqsClientLike {
98
+ private batch: SqsMessageLike[] | null;
99
+ readonly deleted: string[] = [];
100
+
101
+ constructor(messages: SqsMessageLike[]) {
102
+ this.batch = messages;
103
+ }
104
+
105
+ async receiveMessage(args: {
106
+ queueUrl: string;
107
+ maxMessages: number;
108
+ waitTimeSeconds: number;
109
+ signal: AbortSignal;
110
+ }): Promise<{ messages: SqsMessageLike[] }> {
111
+ if (this.batch) {
112
+ const b = this.batch;
113
+ this.batch = null;
114
+ return { messages: b };
115
+ }
116
+ return new Promise((resolve) => {
117
+ if (args.signal.aborted) return resolve({ messages: [] });
118
+ const t = setTimeout(() => resolve({ messages: [] }), 5);
119
+ (t as { unref?: () => void }).unref?.();
120
+ args.signal.addEventListener(
121
+ "abort",
122
+ () => {
123
+ clearTimeout(t);
124
+ resolve({ messages: [] });
125
+ },
126
+ { once: true },
127
+ );
128
+ });
129
+ }
130
+
131
+ async deleteMessage(args: {
132
+ queueUrl: string;
133
+ receiptHandle: string;
134
+ }): Promise<void> {
135
+ this.deleted.push(args.receiptHandle);
136
+ }
137
+ }
138
+
139
+ /** Wait until `predicate()` is true (polling), or throw after `timeoutMs`. */
140
+ async function until(
141
+ predicate: () => boolean,
142
+ timeoutMs = 1000,
143
+ ): Promise<void> {
144
+ const start = Date.now();
145
+ while (!predicate()) {
146
+ if (Date.now() - start > timeoutMs) {
147
+ throw new Error("until() timed out");
148
+ }
149
+ await new Promise((r) => setTimeout(r, 5));
150
+ }
151
+ }
152
+
153
+ // ── publishSyncLatencyMetric (CloudWatch contract) ───────────────────────────
154
+
155
+ const cwMock = mockClient(CloudWatchClient);
156
+
157
+ describe("publishSyncLatencyMetric", () => {
158
+ beforeEach(() => {
159
+ cwMock.reset();
160
+ _setSyncCloudWatchClient(cwMock as unknown as CloudWatchClient);
161
+ });
162
+
163
+ it("sends a single PutMetricData with the documented namespace + metric name", async () => {
164
+ cwMock.on(PutMetricDataCommand).resolves({});
165
+
166
+ await publishSyncLatencyMetric(makeMetric());
167
+
168
+ const calls = cwMock.commandCalls(PutMetricDataCommand);
169
+ expect(calls).toHaveLength(1);
170
+
171
+ const input = calls[0]!.args[0].input;
172
+ expect(input.Namespace).toBe(SYNC_METRIC_NAMESPACE);
173
+ expect(input.Namespace).toBe("HQPro/Sync");
174
+ expect(input.MetricData).toHaveLength(1);
175
+ expect(input.MetricData![0].MetricName).toBe(SYNC_LATENCY_METRIC_NAME);
176
+ expect(input.MetricData![0].MetricName).toBe(
177
+ "hq-cloud.sync.p95_latency_seconds",
178
+ );
179
+ });
180
+
181
+ it("uses Unit=Seconds and stamps the supplied timestamp", async () => {
182
+ cwMock.on(PutMetricDataCommand).resolves({});
183
+
184
+ const ts = new Date("2026-05-19T07:00:00.000Z");
185
+ await publishSyncLatencyMetric(makeMetric({ timestamp: ts }));
186
+
187
+ const datum = cwMock.commandCalls(PutMetricDataCommand)[0]!.args[0].input
188
+ .MetricData![0];
189
+ expect(datum.Unit).toBe("Seconds");
190
+ expect(datum.Timestamp).toEqual(ts);
191
+ });
192
+
193
+ it("publishes the observed latencySeconds value verbatim", async () => {
194
+ cwMock.on(PutMetricDataCommand).resolves({});
195
+
196
+ await publishSyncLatencyMetric(makeMetric({ latencySeconds: 2.71828 }));
197
+
198
+ const datum = cwMock.commandCalls(PutMetricDataCommand)[0]!.args[0].input
199
+ .MetricData![0];
200
+ expect(datum.Value).toBe(2.71828);
201
+ });
202
+
203
+ it("attaches a `TenantId` dimension carrying the tenantId", async () => {
204
+ cwMock.on(PutMetricDataCommand).resolves({});
205
+
206
+ await publishSyncLatencyMetric(makeMetric({ tenantId: "tenant-prs_xyz" }));
207
+
208
+ const datum = cwMock.commandCalls(PutMetricDataCommand)[0]!.args[0].input
209
+ .MetricData![0];
210
+ expect(datum.Dimensions).toEqual([
211
+ { Name: "TenantId", Value: "tenant-prs_xyz" },
212
+ ]);
213
+ });
214
+
215
+ it("does not throw when the CloudWatch SDK rejects (sync loop must not crash)", async () => {
216
+ cwMock
217
+ .on(PutMetricDataCommand)
218
+ .rejects(new Error("CloudWatch unavailable"));
219
+ const consoleSpy = vi.spyOn(console, "error").mockImplementation(() => {});
220
+
221
+ await expect(
222
+ publishSyncLatencyMetric(makeMetric()),
223
+ ).resolves.toBeUndefined();
224
+
225
+ expect(consoleSpy).toHaveBeenCalledWith(
226
+ "Failed to publish sync latency metric to CloudWatch:",
227
+ "CloudWatch unavailable",
228
+ );
229
+
230
+ consoleSpy.mockRestore();
231
+ });
232
+
233
+ it("routes SDK failures through the supplied pino logger when provided", async () => {
234
+ cwMock.on(PutMetricDataCommand).rejects(new Error("ThrottlingException"));
235
+ const { stream, lines } = captureStream();
236
+ const logger = createLogger({
237
+ component: "metrics-test",
238
+ destination: stream,
239
+ level: "debug",
240
+ });
241
+
242
+ await publishSyncLatencyMetric(
243
+ makeMetric({ sequenceNumber: 99, relativePath: "a.md" }),
244
+ { logger },
245
+ );
246
+
247
+ const failureLine = (lines() as Array<{ event?: string }>).find(
248
+ (l) => l.event === "sync.metric.publish_failed",
249
+ );
250
+ expect(failureLine).toBeDefined();
251
+ expect(failureLine).toMatchObject({
252
+ event: "sync.metric.publish_failed",
253
+ tenantId: "tenant-A",
254
+ relativePath: "a.md",
255
+ sequenceNumber: 99,
256
+ });
257
+ });
258
+
259
+ it("accepts a per-call client override (does not call the singleton)", async () => {
260
+ cwMock
261
+ .on(PutMetricDataCommand)
262
+ .rejects(new Error("singleton should not be used"));
263
+
264
+ const overrideMock = mockClient(CloudWatchClient);
265
+ overrideMock.on(PutMetricDataCommand).resolves({});
266
+
267
+ await publishSyncLatencyMetric(makeMetric(), {
268
+ client: overrideMock as unknown as CloudWatchClient,
269
+ });
270
+
271
+ expect(overrideMock.commandCalls(PutMetricDataCommand)).toHaveLength(1);
272
+ expect(cwMock.commandCalls(PutMetricDataCommand)).toHaveLength(0);
273
+
274
+ overrideMock.restore();
275
+ });
276
+ });
277
+
278
+ // ── Receive-success-path metric emission (US-011 AC: "metric emission unit-
279
+ // tested on the receive success path") ──────────────────────────────────
280
+
281
+ describe("SqsPushReceiver metric emission on the receive-success path", () => {
282
+ let receiver: SqsPushReceiver | undefined;
283
+
284
+ afterEach(async () => {
285
+ if (receiver) {
286
+ await receiver.dispose();
287
+ receiver = undefined;
288
+ }
289
+ });
290
+
291
+ it("publishes exactly one latency datum per processed event (mocked publish, no real AWS)", async () => {
292
+ const published: SyncLatencyMetric[] = [];
293
+ const publishMetric: PublishMetricFn = async (m) => {
294
+ published.push(m);
295
+ };
296
+ const event = makeEvent({ sequenceNumber: 7 });
297
+
298
+ receiver = new SqsPushReceiver({
299
+ tenantId: TENANT,
300
+ queueUrl: QUEUE_URL,
301
+ sqs: new OneBatchSqs([{ Body: encodePushEvent(event), ReceiptHandle: "rh-7" }]),
302
+ syncFn: async () => {
303
+ /* successful pull */
304
+ },
305
+ enabled: true,
306
+ publishMetric,
307
+ // Fake clock: start measured at T, success at T+1500ms → 1.5s latency.
308
+ now: (() => {
309
+ let t = Date.parse(event.eventTimestamp);
310
+ return () => {
311
+ const cur = t;
312
+ t += 1500;
313
+ return cur;
314
+ };
315
+ })(),
316
+ });
317
+
318
+ await receiver.start();
319
+ await until(() => published.length >= 1);
320
+
321
+ expect(published).toHaveLength(1);
322
+ expect(published[0]).toMatchObject({
323
+ tenantId: TENANT,
324
+ relativePath: event.relativePath,
325
+ sequenceNumber: 7,
326
+ });
327
+ expect(published[0]!.latencySeconds).toBeGreaterThanOrEqual(0);
328
+ expect(Number.isFinite(published[0]!.latencySeconds)).toBe(true);
329
+ });
330
+
331
+ it("does NOT publish a metric when the syncFn throws (failures don't skew p95)", async () => {
332
+ const published: SyncLatencyMetric[] = [];
333
+ const event = makeEvent({ sequenceNumber: 8 });
334
+
335
+ receiver = new SqsPushReceiver({
336
+ tenantId: TENANT,
337
+ queueUrl: QUEUE_URL,
338
+ sqs: new OneBatchSqs([{ Body: encodePushEvent(event), ReceiptHandle: "rh-8" }]),
339
+ syncFn: async () => {
340
+ throw new Error("pull failed");
341
+ },
342
+ enabled: true,
343
+ publishMetric: async (m) => {
344
+ published.push(m);
345
+ },
346
+ });
347
+
348
+ await receiver.start();
349
+ // Give the loop a few ticks to process the message + (not) publish.
350
+ await new Promise((r) => setTimeout(r, 50));
351
+
352
+ expect(published).toHaveLength(0);
353
+ expect(receiver.processedCount).toBe(0);
354
+ });
355
+
356
+ it("a throwing publish seam does not crash the receiver loop", async () => {
357
+ const event = makeEvent({ sequenceNumber: 9 });
358
+ let synced = false;
359
+
360
+ receiver = new SqsPushReceiver({
361
+ tenantId: TENANT,
362
+ queueUrl: QUEUE_URL,
363
+ sqs: new OneBatchSqs([{ Body: encodePushEvent(event), ReceiptHandle: "rh-9" }]),
364
+ syncFn: async () => {
365
+ synced = true;
366
+ },
367
+ enabled: true,
368
+ publishMetric: async () => {
369
+ throw new Error("metric backend down");
370
+ },
371
+ });
372
+
373
+ await receiver.start();
374
+ await until(() => synced);
375
+
376
+ // The sync still completed and the receiver is still alive (connected).
377
+ expect(receiver.processedCount).toBe(1);
378
+ expect(receiver.connected).toBe(true);
379
+ });
380
+ });
@@ -0,0 +1,158 @@
1
+ /**
2
+ * Sync pipeline metrics — CloudWatch custom metrics for the event-driven push
3
+ * receive loop (project event-driven-sync-menubar US-011).
4
+ *
5
+ * Publishes one `hq-cloud.sync.p95_latency_seconds` datum per successfully
6
+ * processed push event to the `HQPro/Sync` namespace, with a `TenantId`
7
+ * dimension so the dashboard widget can be filtered per tenant. CloudWatch
8
+ * aggregates p50/p95/p99 across the time window via the dashboard's
9
+ * `Statistics` setting; the receive-loop side just emits one raw value per
10
+ * event.
11
+ *
12
+ * Best-effort emission
13
+ * ────────────────────
14
+ * - module-level singleton CloudWatchClient with a `_setSyncCloudWatchClient`
15
+ * test seam
16
+ * - `publishSyncLatencyMetric` catches all errors and NEVER throws — a
17
+ * CloudWatch outage MUST NOT crash the sync receive loop. The cadence
18
+ * safety net still picks up missed work and metric blanks are recoverable;
19
+ * a crashed loop is not.
20
+ * - explicit `Unit` + `Timestamp` + `Dimensions` per datum
21
+ *
22
+ * Why latency (and not also "events received" / "events failed")?
23
+ * ───────────────────────────────────────────────────────────────
24
+ * US-011 AC#1 calls for the p95 latency metric specifically. The receive-loop's
25
+ * existing `processedCount` / log lines cover the count/failure dimensions for
26
+ * now; widening to additional metric names happens in a follow-up if the
27
+ * operator dashboard grows.
28
+ *
29
+ * Adapted from indigoai-us/hq-pro PR #112 (src/sync/metrics.ts) into
30
+ * @indigoai-us/hq-cloud (Path B).
31
+ */
32
+
33
+ import {
34
+ CloudWatchClient,
35
+ PutMetricDataCommand,
36
+ type MetricDatum,
37
+ } from "@aws-sdk/client-cloudwatch";
38
+ import type { Logger } from "pino";
39
+
40
+ // ── Constants ──────────────────────────────────────────────────────────────
41
+
42
+ /**
43
+ * CloudWatch metric namespace for the sync pipeline. Matches the server-side
44
+ * namespace (hq-pro PR #112) so the dashboard + alarm cover the client path.
45
+ */
46
+ export const SYNC_METRIC_NAMESPACE = "HQPro/Sync";
47
+
48
+ /**
49
+ * Metric name for per-event sync latency in seconds. The dashboard widget
50
+ * applies `Statistics: ["p95"]` to aggregate across the time window — the
51
+ * receive loop just emits one raw `Seconds` value per processed event.
52
+ *
53
+ * Name chosen to match the PRD's alarm threshold (`p95 > 10s`).
54
+ */
55
+ export const SYNC_LATENCY_METRIC_NAME = "hq-cloud.sync.p95_latency_seconds";
56
+
57
+ // ── Types ──────────────────────────────────────────────────────────────────
58
+
59
+ /**
60
+ * One latency observation. `latencySeconds` is the wall-clock duration from
61
+ * save-on-A to visible-on-B (or, on the receive loop, the `syncFn(ctx)`
62
+ * duration); we ONLY publish on the success path so failed syncs don't skew
63
+ * p95 toward infinity.
64
+ *
65
+ * `relativePath` and `sequenceNumber` are NOT used as CloudWatch dimensions
66
+ * (cardinality explosion) — they're captured here for the optional debug log
67
+ * emitted on failure so operators can correlate back to the 3-log chain when
68
+ * investigating a spike.
69
+ */
70
+ export interface SyncLatencyMetric {
71
+ tenantId: string;
72
+ relativePath: string;
73
+ sequenceNumber: number;
74
+ latencySeconds: number;
75
+ timestamp: Date;
76
+ }
77
+
78
+ // ── Client ─────────────────────────────────────────────────────────────────
79
+
80
+ let _cwClient: CloudWatchClient | undefined;
81
+
82
+ function getCloudWatchClient(): CloudWatchClient {
83
+ if (!_cwClient) {
84
+ _cwClient = new CloudWatchClient({});
85
+ }
86
+ return _cwClient;
87
+ }
88
+
89
+ /**
90
+ * Replace the CloudWatch client (for testing).
91
+ * @internal
92
+ */
93
+ export function _setSyncCloudWatchClient(client: CloudWatchClient): void {
94
+ _cwClient = client;
95
+ }
96
+
97
+ // ── Publish ────────────────────────────────────────────────────────────────
98
+
99
+ export interface PublishSyncLatencyMetricOptions {
100
+ /** Override the CloudWatch client (tests). Defaults to the module singleton. */
101
+ client?: CloudWatchClient;
102
+ /** Optional pino logger for emission failures. */
103
+ logger?: Logger;
104
+ }
105
+
106
+ /**
107
+ * Publish a single latency datum to CloudWatch.
108
+ *
109
+ * Best-effort: any error from the SDK is CAUGHT and logged. A CloudWatch outage
110
+ * MUST NOT crash the sync receive loop — the cadence safety net still picks up
111
+ * missed work, and metric blanks are recoverable; a crashed loop is not.
112
+ *
113
+ * Dimension: `TenantId` only. The `relativePath`/`sequenceNumber` fields on the
114
+ * input are intentionally NOT promoted to dimensions (cardinality), but they
115
+ * ride along on the failure log so an operator can correlate a missed datum to
116
+ * its 3-log chain entry.
117
+ */
118
+ export async function publishSyncLatencyMetric(
119
+ metric: SyncLatencyMetric,
120
+ opts: PublishSyncLatencyMetricOptions = {},
121
+ ): Promise<void> {
122
+ const datum: MetricDatum = {
123
+ MetricName: SYNC_LATENCY_METRIC_NAME,
124
+ Value: metric.latencySeconds,
125
+ Unit: "Seconds",
126
+ Timestamp: metric.timestamp,
127
+ Dimensions: [{ Name: "TenantId", Value: metric.tenantId }],
128
+ };
129
+
130
+ try {
131
+ const client = opts.client ?? getCloudWatchClient();
132
+ await client.send(
133
+ new PutMetricDataCommand({
134
+ Namespace: SYNC_METRIC_NAMESPACE,
135
+ MetricData: [datum],
136
+ }),
137
+ );
138
+ } catch (err) {
139
+ const message = err instanceof Error ? err.message : String(err);
140
+ if (opts.logger) {
141
+ opts.logger.warn(
142
+ {
143
+ event: "sync.metric.publish_failed",
144
+ tenantId: metric.tenantId,
145
+ relativePath: metric.relativePath,
146
+ sequenceNumber: metric.sequenceNumber,
147
+ err: { message },
148
+ },
149
+ "failed to publish sync latency metric to CloudWatch",
150
+ );
151
+ } else {
152
+ console.error(
153
+ "Failed to publish sync latency metric to CloudWatch:",
154
+ message,
155
+ );
156
+ }
157
+ }
158
+ }