@indigoai-us/hq-cloud 5.26.0 → 5.28.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/workflows/ci.yml +34 -0
- package/dist/bin/sync-runner.d.ts +38 -0
- package/dist/bin/sync-runner.d.ts.map +1 -1
- package/dist/bin/sync-runner.js +75 -1
- package/dist/bin/sync-runner.js.map +1 -1
- package/dist/index.d.ts +4 -2
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +4 -1
- package/dist/index.js.map +1 -1
- package/dist/sync/feature-flags.d.ts +136 -0
- package/dist/sync/feature-flags.d.ts.map +1 -0
- package/dist/sync/feature-flags.js +160 -0
- package/dist/sync/feature-flags.js.map +1 -0
- package/dist/sync/feature-flags.test.d.ts +24 -0
- package/dist/sync/feature-flags.test.d.ts.map +1 -0
- package/dist/sync/feature-flags.test.js +330 -0
- package/dist/sync/feature-flags.test.js.map +1 -0
- package/dist/sync/index.d.ts +10 -2
- package/dist/sync/index.d.ts.map +1 -1
- package/dist/sync/index.js +5 -1
- package/dist/sync/index.js.map +1 -1
- package/dist/sync/logger.d.ts +61 -0
- package/dist/sync/logger.d.ts.map +1 -0
- package/dist/sync/logger.js +51 -0
- package/dist/sync/logger.js.map +1 -0
- package/dist/sync/logger.test.d.ts +19 -0
- package/dist/sync/logger.test.d.ts.map +1 -0
- package/dist/sync/logger.test.js +199 -0
- package/dist/sync/logger.test.js.map +1 -0
- package/dist/sync/metrics.d.ts +89 -0
- package/dist/sync/metrics.d.ts.map +1 -0
- package/dist/sync/metrics.js +105 -0
- package/dist/sync/metrics.js.map +1 -0
- package/dist/sync/metrics.test.d.ts +19 -0
- package/dist/sync/metrics.test.d.ts.map +1 -0
- package/dist/sync/metrics.test.js +280 -0
- package/dist/sync/metrics.test.js.map +1 -0
- package/dist/sync/push-receiver.d.ts +442 -0
- package/dist/sync/push-receiver.d.ts.map +1 -0
- package/dist/sync/push-receiver.js +782 -0
- package/dist/sync/push-receiver.js.map +1 -0
- package/dist/sync/push-receiver.test.d.ts +25 -0
- package/dist/sync/push-receiver.test.d.ts.map +1 -0
- package/dist/sync/push-receiver.test.js +477 -0
- package/dist/sync/push-receiver.test.js.map +1 -0
- package/dist/sync/push-transport.d.ts +84 -1
- package/dist/sync/push-transport.d.ts.map +1 -1
- package/dist/sync/push-transport.js +84 -0
- package/dist/sync/push-transport.js.map +1 -1
- package/dist/watcher.d.ts +127 -11
- package/dist/watcher.d.ts.map +1 -1
- package/dist/watcher.js +294 -57
- package/dist/watcher.js.map +1 -1
- package/package.json +9 -5
- package/src/bin/sync-runner.ts +102 -1
- package/src/index.ts +21 -0
- package/src/sync/feature-flags.test.ts +392 -0
- package/src/sync/feature-flags.ts +229 -0
- package/src/sync/index.ts +57 -2
- package/src/sync/logger.test.ts +241 -0
- package/src/sync/logger.ts +79 -0
- package/src/sync/metrics.test.ts +380 -0
- package/src/sync/metrics.ts +158 -0
- package/src/sync/push-receiver.test.ts +545 -0
- package/src/sync/push-receiver.ts +1077 -0
- package/src/sync/push-transport.ts +148 -1
- package/src/watcher.ts +408 -51
- package/test/e2e/sync/cross-tenant-isolation.test.ts +502 -0
- package/test/e2e/watcher-real-chokidar.test.ts +105 -0
- package/test/e2e/watcher-recursive-backend.test.ts +115 -0
|
@@ -0,0 +1,380 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* US-011 — unit tests for `src/sync/metrics.ts` + the receive-success-path
|
|
3
|
+
* metric emission wired into {@link SqsPushReceiver}.
|
|
4
|
+
*
|
|
5
|
+
* Mirrors the hq-pro PR #112 metrics test:
|
|
6
|
+
* - mock the CloudWatchClient via `aws-sdk-client-mock`
|
|
7
|
+
* - inject the mocked client via the `_setSyncCloudWatchClient` test seam
|
|
8
|
+
* - assert namespace, metric name, unit, dimensions, value
|
|
9
|
+
* - assert SDK errors are CAUGHT (the receive loop must never crash on a
|
|
10
|
+
* CloudWatch outage)
|
|
11
|
+
*
|
|
12
|
+
* Plus the US-011-specific receiver wiring:
|
|
13
|
+
* - on the receive-SUCCESS path, the receiver publishes exactly one latency
|
|
14
|
+
* datum carrying the event's tenantId + sequenceNumber (mocked publish seam
|
|
15
|
+
* — no real AWS)
|
|
16
|
+
* - a failing publish seam does NOT crash the receiver loop
|
|
17
|
+
*/
|
|
18
|
+
|
|
19
|
+
import { Writable } from "node:stream";
|
|
20
|
+
|
|
21
|
+
import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
|
|
22
|
+
import { mockClient } from "aws-sdk-client-mock";
|
|
23
|
+
import {
|
|
24
|
+
CloudWatchClient,
|
|
25
|
+
PutMetricDataCommand,
|
|
26
|
+
} from "@aws-sdk/client-cloudwatch";
|
|
27
|
+
|
|
28
|
+
import {
|
|
29
|
+
SYNC_LATENCY_METRIC_NAME,
|
|
30
|
+
SYNC_METRIC_NAMESPACE,
|
|
31
|
+
_setSyncCloudWatchClient,
|
|
32
|
+
publishSyncLatencyMetric,
|
|
33
|
+
type SyncLatencyMetric,
|
|
34
|
+
} from "./metrics.js";
|
|
35
|
+
import { createLogger } from "./logger.js";
|
|
36
|
+
import { encodePushEvent, type PushEvent } from "./push-event.js";
|
|
37
|
+
import {
|
|
38
|
+
SqsPushReceiver,
|
|
39
|
+
type PublishMetricFn,
|
|
40
|
+
type SqsClientLike,
|
|
41
|
+
type SqsMessageLike,
|
|
42
|
+
} from "./push-receiver.js";
|
|
43
|
+
|
|
44
|
+
// ── Helpers ─────────────────────────────────────────────────────────────────
|
|
45
|
+
|
|
46
|
+
function makeMetric(overrides: Partial<SyncLatencyMetric> = {}): SyncLatencyMetric {
|
|
47
|
+
return {
|
|
48
|
+
tenantId: "tenant-A",
|
|
49
|
+
relativePath: "docs/overview.md",
|
|
50
|
+
sequenceNumber: 42,
|
|
51
|
+
latencySeconds: 1.234,
|
|
52
|
+
timestamp: new Date("2026-05-19T12:00:00.000Z"),
|
|
53
|
+
...overrides,
|
|
54
|
+
};
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
function captureStream(): { stream: Writable; lines: () => unknown[] } {
|
|
58
|
+
const chunks: string[] = [];
|
|
59
|
+
const stream = new Writable({
|
|
60
|
+
write(chunk, _enc, cb) {
|
|
61
|
+
chunks.push(
|
|
62
|
+
typeof chunk === "string" ? chunk : (chunk as Buffer).toString("utf8"),
|
|
63
|
+
);
|
|
64
|
+
cb();
|
|
65
|
+
},
|
|
66
|
+
});
|
|
67
|
+
return {
|
|
68
|
+
stream,
|
|
69
|
+
lines: () =>
|
|
70
|
+
chunks
|
|
71
|
+
.join("")
|
|
72
|
+
.split("\n")
|
|
73
|
+
.filter((s) => s.length > 0)
|
|
74
|
+
.map((s) => JSON.parse(s) as unknown),
|
|
75
|
+
};
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
const TENANT = "tenant-indigo";
|
|
79
|
+
const QUEUE_URL =
|
|
80
|
+
"https://sqs.us-east-1.amazonaws.com/123456789012/sync-push-indigo-deviceB";
|
|
81
|
+
|
|
82
|
+
function makeEvent(overrides: Partial<PushEvent> = {}): PushEvent {
|
|
83
|
+
return {
|
|
84
|
+
relativePath: "companies/indigo/notes.md",
|
|
85
|
+
contentHash:
|
|
86
|
+
"sha256:e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855",
|
|
87
|
+
mtime: "2026-05-21T12:34:56.000Z",
|
|
88
|
+
originDeviceId: "device-A",
|
|
89
|
+
originTenantId: TENANT,
|
|
90
|
+
sequenceNumber: 7,
|
|
91
|
+
eventTimestamp: "2026-05-21T12:34:56.000Z",
|
|
92
|
+
...overrides,
|
|
93
|
+
};
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
/** Minimal fake SQS that drains one batch then idles until abort. */
|
|
97
|
+
class OneBatchSqs implements SqsClientLike {
|
|
98
|
+
private batch: SqsMessageLike[] | null;
|
|
99
|
+
readonly deleted: string[] = [];
|
|
100
|
+
|
|
101
|
+
constructor(messages: SqsMessageLike[]) {
|
|
102
|
+
this.batch = messages;
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
async receiveMessage(args: {
|
|
106
|
+
queueUrl: string;
|
|
107
|
+
maxMessages: number;
|
|
108
|
+
waitTimeSeconds: number;
|
|
109
|
+
signal: AbortSignal;
|
|
110
|
+
}): Promise<{ messages: SqsMessageLike[] }> {
|
|
111
|
+
if (this.batch) {
|
|
112
|
+
const b = this.batch;
|
|
113
|
+
this.batch = null;
|
|
114
|
+
return { messages: b };
|
|
115
|
+
}
|
|
116
|
+
return new Promise((resolve) => {
|
|
117
|
+
if (args.signal.aborted) return resolve({ messages: [] });
|
|
118
|
+
const t = setTimeout(() => resolve({ messages: [] }), 5);
|
|
119
|
+
(t as { unref?: () => void }).unref?.();
|
|
120
|
+
args.signal.addEventListener(
|
|
121
|
+
"abort",
|
|
122
|
+
() => {
|
|
123
|
+
clearTimeout(t);
|
|
124
|
+
resolve({ messages: [] });
|
|
125
|
+
},
|
|
126
|
+
{ once: true },
|
|
127
|
+
);
|
|
128
|
+
});
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
async deleteMessage(args: {
|
|
132
|
+
queueUrl: string;
|
|
133
|
+
receiptHandle: string;
|
|
134
|
+
}): Promise<void> {
|
|
135
|
+
this.deleted.push(args.receiptHandle);
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
/** Wait until `predicate()` is true (polling), or throw after `timeoutMs`. */
|
|
140
|
+
async function until(
|
|
141
|
+
predicate: () => boolean,
|
|
142
|
+
timeoutMs = 1000,
|
|
143
|
+
): Promise<void> {
|
|
144
|
+
const start = Date.now();
|
|
145
|
+
while (!predicate()) {
|
|
146
|
+
if (Date.now() - start > timeoutMs) {
|
|
147
|
+
throw new Error("until() timed out");
|
|
148
|
+
}
|
|
149
|
+
await new Promise((r) => setTimeout(r, 5));
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
// ── publishSyncLatencyMetric (CloudWatch contract) ───────────────────────────
|
|
154
|
+
|
|
155
|
+
const cwMock = mockClient(CloudWatchClient);
|
|
156
|
+
|
|
157
|
+
describe("publishSyncLatencyMetric", () => {
|
|
158
|
+
beforeEach(() => {
|
|
159
|
+
cwMock.reset();
|
|
160
|
+
_setSyncCloudWatchClient(cwMock as unknown as CloudWatchClient);
|
|
161
|
+
});
|
|
162
|
+
|
|
163
|
+
it("sends a single PutMetricData with the documented namespace + metric name", async () => {
|
|
164
|
+
cwMock.on(PutMetricDataCommand).resolves({});
|
|
165
|
+
|
|
166
|
+
await publishSyncLatencyMetric(makeMetric());
|
|
167
|
+
|
|
168
|
+
const calls = cwMock.commandCalls(PutMetricDataCommand);
|
|
169
|
+
expect(calls).toHaveLength(1);
|
|
170
|
+
|
|
171
|
+
const input = calls[0]!.args[0].input;
|
|
172
|
+
expect(input.Namespace).toBe(SYNC_METRIC_NAMESPACE);
|
|
173
|
+
expect(input.Namespace).toBe("HQPro/Sync");
|
|
174
|
+
expect(input.MetricData).toHaveLength(1);
|
|
175
|
+
expect(input.MetricData![0].MetricName).toBe(SYNC_LATENCY_METRIC_NAME);
|
|
176
|
+
expect(input.MetricData![0].MetricName).toBe(
|
|
177
|
+
"hq-cloud.sync.p95_latency_seconds",
|
|
178
|
+
);
|
|
179
|
+
});
|
|
180
|
+
|
|
181
|
+
it("uses Unit=Seconds and stamps the supplied timestamp", async () => {
|
|
182
|
+
cwMock.on(PutMetricDataCommand).resolves({});
|
|
183
|
+
|
|
184
|
+
const ts = new Date("2026-05-19T07:00:00.000Z");
|
|
185
|
+
await publishSyncLatencyMetric(makeMetric({ timestamp: ts }));
|
|
186
|
+
|
|
187
|
+
const datum = cwMock.commandCalls(PutMetricDataCommand)[0]!.args[0].input
|
|
188
|
+
.MetricData![0];
|
|
189
|
+
expect(datum.Unit).toBe("Seconds");
|
|
190
|
+
expect(datum.Timestamp).toEqual(ts);
|
|
191
|
+
});
|
|
192
|
+
|
|
193
|
+
it("publishes the observed latencySeconds value verbatim", async () => {
|
|
194
|
+
cwMock.on(PutMetricDataCommand).resolves({});
|
|
195
|
+
|
|
196
|
+
await publishSyncLatencyMetric(makeMetric({ latencySeconds: 2.71828 }));
|
|
197
|
+
|
|
198
|
+
const datum = cwMock.commandCalls(PutMetricDataCommand)[0]!.args[0].input
|
|
199
|
+
.MetricData![0];
|
|
200
|
+
expect(datum.Value).toBe(2.71828);
|
|
201
|
+
});
|
|
202
|
+
|
|
203
|
+
it("attaches a `TenantId` dimension carrying the tenantId", async () => {
|
|
204
|
+
cwMock.on(PutMetricDataCommand).resolves({});
|
|
205
|
+
|
|
206
|
+
await publishSyncLatencyMetric(makeMetric({ tenantId: "tenant-prs_xyz" }));
|
|
207
|
+
|
|
208
|
+
const datum = cwMock.commandCalls(PutMetricDataCommand)[0]!.args[0].input
|
|
209
|
+
.MetricData![0];
|
|
210
|
+
expect(datum.Dimensions).toEqual([
|
|
211
|
+
{ Name: "TenantId", Value: "tenant-prs_xyz" },
|
|
212
|
+
]);
|
|
213
|
+
});
|
|
214
|
+
|
|
215
|
+
it("does not throw when the CloudWatch SDK rejects (sync loop must not crash)", async () => {
|
|
216
|
+
cwMock
|
|
217
|
+
.on(PutMetricDataCommand)
|
|
218
|
+
.rejects(new Error("CloudWatch unavailable"));
|
|
219
|
+
const consoleSpy = vi.spyOn(console, "error").mockImplementation(() => {});
|
|
220
|
+
|
|
221
|
+
await expect(
|
|
222
|
+
publishSyncLatencyMetric(makeMetric()),
|
|
223
|
+
).resolves.toBeUndefined();
|
|
224
|
+
|
|
225
|
+
expect(consoleSpy).toHaveBeenCalledWith(
|
|
226
|
+
"Failed to publish sync latency metric to CloudWatch:",
|
|
227
|
+
"CloudWatch unavailable",
|
|
228
|
+
);
|
|
229
|
+
|
|
230
|
+
consoleSpy.mockRestore();
|
|
231
|
+
});
|
|
232
|
+
|
|
233
|
+
it("routes SDK failures through the supplied pino logger when provided", async () => {
|
|
234
|
+
cwMock.on(PutMetricDataCommand).rejects(new Error("ThrottlingException"));
|
|
235
|
+
const { stream, lines } = captureStream();
|
|
236
|
+
const logger = createLogger({
|
|
237
|
+
component: "metrics-test",
|
|
238
|
+
destination: stream,
|
|
239
|
+
level: "debug",
|
|
240
|
+
});
|
|
241
|
+
|
|
242
|
+
await publishSyncLatencyMetric(
|
|
243
|
+
makeMetric({ sequenceNumber: 99, relativePath: "a.md" }),
|
|
244
|
+
{ logger },
|
|
245
|
+
);
|
|
246
|
+
|
|
247
|
+
const failureLine = (lines() as Array<{ event?: string }>).find(
|
|
248
|
+
(l) => l.event === "sync.metric.publish_failed",
|
|
249
|
+
);
|
|
250
|
+
expect(failureLine).toBeDefined();
|
|
251
|
+
expect(failureLine).toMatchObject({
|
|
252
|
+
event: "sync.metric.publish_failed",
|
|
253
|
+
tenantId: "tenant-A",
|
|
254
|
+
relativePath: "a.md",
|
|
255
|
+
sequenceNumber: 99,
|
|
256
|
+
});
|
|
257
|
+
});
|
|
258
|
+
|
|
259
|
+
it("accepts a per-call client override (does not call the singleton)", async () => {
|
|
260
|
+
cwMock
|
|
261
|
+
.on(PutMetricDataCommand)
|
|
262
|
+
.rejects(new Error("singleton should not be used"));
|
|
263
|
+
|
|
264
|
+
const overrideMock = mockClient(CloudWatchClient);
|
|
265
|
+
overrideMock.on(PutMetricDataCommand).resolves({});
|
|
266
|
+
|
|
267
|
+
await publishSyncLatencyMetric(makeMetric(), {
|
|
268
|
+
client: overrideMock as unknown as CloudWatchClient,
|
|
269
|
+
});
|
|
270
|
+
|
|
271
|
+
expect(overrideMock.commandCalls(PutMetricDataCommand)).toHaveLength(1);
|
|
272
|
+
expect(cwMock.commandCalls(PutMetricDataCommand)).toHaveLength(0);
|
|
273
|
+
|
|
274
|
+
overrideMock.restore();
|
|
275
|
+
});
|
|
276
|
+
});
|
|
277
|
+
|
|
278
|
+
// ── Receive-success-path metric emission (US-011 AC: "metric emission unit-
|
|
279
|
+
// tested on the receive success path") ──────────────────────────────────
|
|
280
|
+
|
|
281
|
+
describe("SqsPushReceiver metric emission on the receive-success path", () => {
|
|
282
|
+
let receiver: SqsPushReceiver | undefined;
|
|
283
|
+
|
|
284
|
+
afterEach(async () => {
|
|
285
|
+
if (receiver) {
|
|
286
|
+
await receiver.dispose();
|
|
287
|
+
receiver = undefined;
|
|
288
|
+
}
|
|
289
|
+
});
|
|
290
|
+
|
|
291
|
+
it("publishes exactly one latency datum per processed event (mocked publish, no real AWS)", async () => {
|
|
292
|
+
const published: SyncLatencyMetric[] = [];
|
|
293
|
+
const publishMetric: PublishMetricFn = async (m) => {
|
|
294
|
+
published.push(m);
|
|
295
|
+
};
|
|
296
|
+
const event = makeEvent({ sequenceNumber: 7 });
|
|
297
|
+
|
|
298
|
+
receiver = new SqsPushReceiver({
|
|
299
|
+
tenantId: TENANT,
|
|
300
|
+
queueUrl: QUEUE_URL,
|
|
301
|
+
sqs: new OneBatchSqs([{ Body: encodePushEvent(event), ReceiptHandle: "rh-7" }]),
|
|
302
|
+
syncFn: async () => {
|
|
303
|
+
/* successful pull */
|
|
304
|
+
},
|
|
305
|
+
enabled: true,
|
|
306
|
+
publishMetric,
|
|
307
|
+
// Fake clock: start measured at T, success at T+1500ms → 1.5s latency.
|
|
308
|
+
now: (() => {
|
|
309
|
+
let t = Date.parse(event.eventTimestamp);
|
|
310
|
+
return () => {
|
|
311
|
+
const cur = t;
|
|
312
|
+
t += 1500;
|
|
313
|
+
return cur;
|
|
314
|
+
};
|
|
315
|
+
})(),
|
|
316
|
+
});
|
|
317
|
+
|
|
318
|
+
await receiver.start();
|
|
319
|
+
await until(() => published.length >= 1);
|
|
320
|
+
|
|
321
|
+
expect(published).toHaveLength(1);
|
|
322
|
+
expect(published[0]).toMatchObject({
|
|
323
|
+
tenantId: TENANT,
|
|
324
|
+
relativePath: event.relativePath,
|
|
325
|
+
sequenceNumber: 7,
|
|
326
|
+
});
|
|
327
|
+
expect(published[0]!.latencySeconds).toBeGreaterThanOrEqual(0);
|
|
328
|
+
expect(Number.isFinite(published[0]!.latencySeconds)).toBe(true);
|
|
329
|
+
});
|
|
330
|
+
|
|
331
|
+
it("does NOT publish a metric when the syncFn throws (failures don't skew p95)", async () => {
|
|
332
|
+
const published: SyncLatencyMetric[] = [];
|
|
333
|
+
const event = makeEvent({ sequenceNumber: 8 });
|
|
334
|
+
|
|
335
|
+
receiver = new SqsPushReceiver({
|
|
336
|
+
tenantId: TENANT,
|
|
337
|
+
queueUrl: QUEUE_URL,
|
|
338
|
+
sqs: new OneBatchSqs([{ Body: encodePushEvent(event), ReceiptHandle: "rh-8" }]),
|
|
339
|
+
syncFn: async () => {
|
|
340
|
+
throw new Error("pull failed");
|
|
341
|
+
},
|
|
342
|
+
enabled: true,
|
|
343
|
+
publishMetric: async (m) => {
|
|
344
|
+
published.push(m);
|
|
345
|
+
},
|
|
346
|
+
});
|
|
347
|
+
|
|
348
|
+
await receiver.start();
|
|
349
|
+
// Give the loop a few ticks to process the message + (not) publish.
|
|
350
|
+
await new Promise((r) => setTimeout(r, 50));
|
|
351
|
+
|
|
352
|
+
expect(published).toHaveLength(0);
|
|
353
|
+
expect(receiver.processedCount).toBe(0);
|
|
354
|
+
});
|
|
355
|
+
|
|
356
|
+
it("a throwing publish seam does not crash the receiver loop", async () => {
|
|
357
|
+
const event = makeEvent({ sequenceNumber: 9 });
|
|
358
|
+
let synced = false;
|
|
359
|
+
|
|
360
|
+
receiver = new SqsPushReceiver({
|
|
361
|
+
tenantId: TENANT,
|
|
362
|
+
queueUrl: QUEUE_URL,
|
|
363
|
+
sqs: new OneBatchSqs([{ Body: encodePushEvent(event), ReceiptHandle: "rh-9" }]),
|
|
364
|
+
syncFn: async () => {
|
|
365
|
+
synced = true;
|
|
366
|
+
},
|
|
367
|
+
enabled: true,
|
|
368
|
+
publishMetric: async () => {
|
|
369
|
+
throw new Error("metric backend down");
|
|
370
|
+
},
|
|
371
|
+
});
|
|
372
|
+
|
|
373
|
+
await receiver.start();
|
|
374
|
+
await until(() => synced);
|
|
375
|
+
|
|
376
|
+
// The sync still completed and the receiver is still alive (connected).
|
|
377
|
+
expect(receiver.processedCount).toBe(1);
|
|
378
|
+
expect(receiver.connected).toBe(true);
|
|
379
|
+
});
|
|
380
|
+
});
|
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Sync pipeline metrics — CloudWatch custom metrics for the event-driven push
|
|
3
|
+
* receive loop (project event-driven-sync-menubar US-011).
|
|
4
|
+
*
|
|
5
|
+
* Publishes one `hq-cloud.sync.p95_latency_seconds` datum per successfully
|
|
6
|
+
* processed push event to the `HQPro/Sync` namespace, with a `TenantId`
|
|
7
|
+
* dimension so the dashboard widget can be filtered per tenant. CloudWatch
|
|
8
|
+
* aggregates p50/p95/p99 across the time window via the dashboard's
|
|
9
|
+
* `Statistics` setting; the receive-loop side just emits one raw value per
|
|
10
|
+
* event.
|
|
11
|
+
*
|
|
12
|
+
* Best-effort emission
|
|
13
|
+
* ────────────────────
|
|
14
|
+
* - module-level singleton CloudWatchClient with a `_setSyncCloudWatchClient`
|
|
15
|
+
* test seam
|
|
16
|
+
* - `publishSyncLatencyMetric` catches all errors and NEVER throws — a
|
|
17
|
+
* CloudWatch outage MUST NOT crash the sync receive loop. The cadence
|
|
18
|
+
* safety net still picks up missed work and metric blanks are recoverable;
|
|
19
|
+
* a crashed loop is not.
|
|
20
|
+
* - explicit `Unit` + `Timestamp` + `Dimensions` per datum
|
|
21
|
+
*
|
|
22
|
+
* Why latency (and not also "events received" / "events failed")?
|
|
23
|
+
* ───────────────────────────────────────────────────────────────
|
|
24
|
+
* US-011 AC#1 calls for the p95 latency metric specifically. The receive-loop's
|
|
25
|
+
* existing `processedCount` / log lines cover the count/failure dimensions for
|
|
26
|
+
* now; widening to additional metric names happens in a follow-up if the
|
|
27
|
+
* operator dashboard grows.
|
|
28
|
+
*
|
|
29
|
+
* Adapted from indigoai-us/hq-pro PR #112 (src/sync/metrics.ts) into
|
|
30
|
+
* @indigoai-us/hq-cloud (Path B).
|
|
31
|
+
*/
|
|
32
|
+
|
|
33
|
+
import {
|
|
34
|
+
CloudWatchClient,
|
|
35
|
+
PutMetricDataCommand,
|
|
36
|
+
type MetricDatum,
|
|
37
|
+
} from "@aws-sdk/client-cloudwatch";
|
|
38
|
+
import type { Logger } from "pino";
|
|
39
|
+
|
|
40
|
+
// ── Constants ──────────────────────────────────────────────────────────────
|
|
41
|
+
|
|
42
|
+
/**
|
|
43
|
+
* CloudWatch metric namespace for the sync pipeline. Matches the server-side
|
|
44
|
+
* namespace (hq-pro PR #112) so the dashboard + alarm cover the client path.
|
|
45
|
+
*/
|
|
46
|
+
export const SYNC_METRIC_NAMESPACE = "HQPro/Sync";
|
|
47
|
+
|
|
48
|
+
/**
|
|
49
|
+
* Metric name for per-event sync latency in seconds. The dashboard widget
|
|
50
|
+
* applies `Statistics: ["p95"]` to aggregate across the time window — the
|
|
51
|
+
* receive loop just emits one raw `Seconds` value per processed event.
|
|
52
|
+
*
|
|
53
|
+
* Name chosen to match the PRD's alarm threshold (`p95 > 10s`).
|
|
54
|
+
*/
|
|
55
|
+
export const SYNC_LATENCY_METRIC_NAME = "hq-cloud.sync.p95_latency_seconds";
|
|
56
|
+
|
|
57
|
+
// ── Types ──────────────────────────────────────────────────────────────────
|
|
58
|
+
|
|
59
|
+
/**
|
|
60
|
+
* One latency observation. `latencySeconds` is the wall-clock duration from
|
|
61
|
+
* save-on-A to visible-on-B (or, on the receive loop, the `syncFn(ctx)`
|
|
62
|
+
* duration); we ONLY publish on the success path so failed syncs don't skew
|
|
63
|
+
* p95 toward infinity.
|
|
64
|
+
*
|
|
65
|
+
* `relativePath` and `sequenceNumber` are NOT used as CloudWatch dimensions
|
|
66
|
+
* (cardinality explosion) — they're captured here for the optional debug log
|
|
67
|
+
* emitted on failure so operators can correlate back to the 3-log chain when
|
|
68
|
+
* investigating a spike.
|
|
69
|
+
*/
|
|
70
|
+
export interface SyncLatencyMetric {
|
|
71
|
+
tenantId: string;
|
|
72
|
+
relativePath: string;
|
|
73
|
+
sequenceNumber: number;
|
|
74
|
+
latencySeconds: number;
|
|
75
|
+
timestamp: Date;
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
// ── Client ─────────────────────────────────────────────────────────────────
|
|
79
|
+
|
|
80
|
+
let _cwClient: CloudWatchClient | undefined;
|
|
81
|
+
|
|
82
|
+
function getCloudWatchClient(): CloudWatchClient {
|
|
83
|
+
if (!_cwClient) {
|
|
84
|
+
_cwClient = new CloudWatchClient({});
|
|
85
|
+
}
|
|
86
|
+
return _cwClient;
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
/**
|
|
90
|
+
* Replace the CloudWatch client (for testing).
|
|
91
|
+
* @internal
|
|
92
|
+
*/
|
|
93
|
+
export function _setSyncCloudWatchClient(client: CloudWatchClient): void {
|
|
94
|
+
_cwClient = client;
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
// ── Publish ────────────────────────────────────────────────────────────────
|
|
98
|
+
|
|
99
|
+
export interface PublishSyncLatencyMetricOptions {
|
|
100
|
+
/** Override the CloudWatch client (tests). Defaults to the module singleton. */
|
|
101
|
+
client?: CloudWatchClient;
|
|
102
|
+
/** Optional pino logger for emission failures. */
|
|
103
|
+
logger?: Logger;
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
/**
|
|
107
|
+
* Publish a single latency datum to CloudWatch.
|
|
108
|
+
*
|
|
109
|
+
* Best-effort: any error from the SDK is CAUGHT and logged. A CloudWatch outage
|
|
110
|
+
* MUST NOT crash the sync receive loop — the cadence safety net still picks up
|
|
111
|
+
* missed work, and metric blanks are recoverable; a crashed loop is not.
|
|
112
|
+
*
|
|
113
|
+
* Dimension: `TenantId` only. The `relativePath`/`sequenceNumber` fields on the
|
|
114
|
+
* input are intentionally NOT promoted to dimensions (cardinality), but they
|
|
115
|
+
* ride along on the failure log so an operator can correlate a missed datum to
|
|
116
|
+
* its 3-log chain entry.
|
|
117
|
+
*/
|
|
118
|
+
export async function publishSyncLatencyMetric(
|
|
119
|
+
metric: SyncLatencyMetric,
|
|
120
|
+
opts: PublishSyncLatencyMetricOptions = {},
|
|
121
|
+
): Promise<void> {
|
|
122
|
+
const datum: MetricDatum = {
|
|
123
|
+
MetricName: SYNC_LATENCY_METRIC_NAME,
|
|
124
|
+
Value: metric.latencySeconds,
|
|
125
|
+
Unit: "Seconds",
|
|
126
|
+
Timestamp: metric.timestamp,
|
|
127
|
+
Dimensions: [{ Name: "TenantId", Value: metric.tenantId }],
|
|
128
|
+
};
|
|
129
|
+
|
|
130
|
+
try {
|
|
131
|
+
const client = opts.client ?? getCloudWatchClient();
|
|
132
|
+
await client.send(
|
|
133
|
+
new PutMetricDataCommand({
|
|
134
|
+
Namespace: SYNC_METRIC_NAMESPACE,
|
|
135
|
+
MetricData: [datum],
|
|
136
|
+
}),
|
|
137
|
+
);
|
|
138
|
+
} catch (err) {
|
|
139
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
140
|
+
if (opts.logger) {
|
|
141
|
+
opts.logger.warn(
|
|
142
|
+
{
|
|
143
|
+
event: "sync.metric.publish_failed",
|
|
144
|
+
tenantId: metric.tenantId,
|
|
145
|
+
relativePath: metric.relativePath,
|
|
146
|
+
sequenceNumber: metric.sequenceNumber,
|
|
147
|
+
err: { message },
|
|
148
|
+
},
|
|
149
|
+
"failed to publish sync latency metric to CloudWatch",
|
|
150
|
+
);
|
|
151
|
+
} else {
|
|
152
|
+
console.error(
|
|
153
|
+
"Failed to publish sync latency metric to CloudWatch:",
|
|
154
|
+
message,
|
|
155
|
+
);
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
}
|