@pattern-stack/codegen 0.4.4 → 0.4.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/runtime/subsystems/index.d.ts +7 -0
- package/dist/runtime/subsystems/index.js +905 -208
- package/dist/runtime/subsystems/index.js.map +1 -1
- package/dist/runtime/subsystems/observability/index.d.ts +10 -0
- package/dist/runtime/subsystems/observability/index.js +895 -0
- package/dist/runtime/subsystems/observability/index.js.map +1 -0
- package/dist/runtime/subsystems/observability/observability.drizzle-backend.d.ts +15 -0
- package/dist/runtime/subsystems/observability/observability.drizzle-backend.js +465 -0
- package/dist/runtime/subsystems/observability/observability.drizzle-backend.js.map +1 -0
- package/dist/runtime/subsystems/observability/observability.memory-backend.d.ts +28 -0
- package/dist/runtime/subsystems/observability/observability.memory-backend.js +75 -0
- package/dist/runtime/subsystems/observability/observability.memory-backend.js.map +1 -0
- package/dist/runtime/subsystems/observability/observability.module.d.ts +56 -0
- package/dist/runtime/subsystems/observability/observability.module.js +887 -0
- package/dist/runtime/subsystems/observability/observability.module.js.map +1 -0
- package/dist/runtime/subsystems/observability/observability.protocol.d.ts +155 -0
- package/dist/runtime/subsystems/observability/observability.protocol.js +1 -0
- package/dist/runtime/subsystems/observability/observability.protocol.js.map +1 -0
- package/dist/runtime/subsystems/observability/observability.tokens.d.ts +19 -0
- package/dist/runtime/subsystems/observability/observability.tokens.js +8 -0
- package/dist/runtime/subsystems/observability/observability.tokens.js.map +1 -0
- package/dist/runtime/subsystems/observability/reporters/bridge-metrics.reporter.d.ts +79 -0
- package/dist/runtime/subsystems/observability/reporters/bridge-metrics.reporter.js +425 -0
- package/dist/runtime/subsystems/observability/reporters/bridge-metrics.reporter.js.map +1 -0
- package/dist/runtime/subsystems/sync/sync-audit.schema.d.ts +4 -4
- package/package.json +6 -1
- package/runtime/subsystems/index.ts +23 -0
- package/runtime/subsystems/observability/index.ts +35 -0
- package/runtime/subsystems/observability/observability.drizzle-backend.ts +223 -0
- package/runtime/subsystems/observability/observability.memory-backend.ts +111 -0
- package/runtime/subsystems/observability/observability.module.ts +115 -0
- package/runtime/subsystems/observability/observability.protocol.ts +167 -0
- package/runtime/subsystems/observability/observability.tokens.ts +18 -0
- package/runtime/subsystems/observability/reporters/bridge-metrics.reporter.ts +222 -0
|
@@ -0,0 +1,223 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* DrizzleObservabilityService — production backend for IObservabilityService.
|
|
3
|
+
*
|
|
4
|
+
* Pure read-only SQL over framework-owned tables:
|
|
5
|
+
* - `job_run` (jobs subsystem)
|
|
6
|
+
* - `bridge_delivery` (bridge subsystem)
|
|
7
|
+
* - `domain_events` (events subsystem)
|
|
8
|
+
* - `sync_runs` (sync subsystem)
|
|
9
|
+
* - `sync_subscriptions` (sync subsystem)
|
|
10
|
+
*
|
|
11
|
+
* No new tables, no background loops, no lifecycle hooks. This is a query
|
|
12
|
+
* facade — each call hits the DB and returns. Rate-limit / dashboard-cadence
|
|
13
|
+
* coordination is the caller's responsibility.
|
|
14
|
+
*
|
|
15
|
+
* # Error behavior
|
|
16
|
+
*
|
|
17
|
+
* Methods throw on DB failure (consistent with the rest of the ADR-008
|
|
18
|
+
* family's write-ish backends). Dashboards and `/dev/status` endpoints are
|
|
19
|
+
* expected to handle the error surface — returning an empty snapshot on a
|
|
20
|
+
* transient DB blip would silently hide "Postgres is down" from operators,
|
|
21
|
+
* which is the opposite of what observability is for.
|
|
22
|
+
*
|
|
23
|
+
* # Drizzle-specific extensions (documented per CLAUDE.md core/extensions)
|
|
24
|
+
*
|
|
25
|
+
* Extensions MAY be added to this class that leverage Postgres-specific
|
|
26
|
+
* capability (e.g. `pg_stat_activity` sampling, advisory-lock inspection).
|
|
27
|
+
* Consumers opting into extensions accept backend-specific coupling; the
|
|
28
|
+
* core five methods below stay backend-portable.
|
|
29
|
+
*/
|
|
30
|
+
import { Inject, Injectable } from '@nestjs/common';
|
|
31
|
+
import { desc, eq, sql } from 'drizzle-orm';
|
|
32
|
+
|
|
33
|
+
import { DRIZZLE } from '../../constants/tokens';
|
|
34
|
+
import type { DrizzleClient } from '../../types/drizzle';
|
|
35
|
+
import { bridgeDelivery } from '../bridge/bridge-delivery.schema';
|
|
36
|
+
import { jobRuns } from '../jobs/job-orchestration.schema';
|
|
37
|
+
import { syncRuns, syncSubscriptions } from '../sync/sync-audit.schema';
|
|
38
|
+
import type {
|
|
39
|
+
CursorSnapshot,
|
|
40
|
+
IObservabilityService,
|
|
41
|
+
JobRunFailure,
|
|
42
|
+
PoolDepth,
|
|
43
|
+
StatusHistogram,
|
|
44
|
+
SyncRunSummary,
|
|
45
|
+
} from './observability.protocol';
|
|
46
|
+
|
|
47
|
+
@Injectable()
|
|
48
|
+
export class DrizzleObservabilityService implements IObservabilityService {
|
|
49
|
+
constructor(@Inject(DRIZZLE) private readonly db: DrizzleClient) {}
|
|
50
|
+
|
|
51
|
+
async getPoolDepths(): Promise<PoolDepth[]> {
|
|
52
|
+
// Raw SQL: Drizzle's builder drops AS-aliases on bare `sql<>` columns,
|
|
53
|
+
// which the pg driver then can't map back by name. Raw execute with
|
|
54
|
+
// explicit aliases keeps the result shape deterministic.
|
|
55
|
+
const result = await this.db.execute(sql`
|
|
56
|
+
SELECT
|
|
57
|
+
pool AS name,
|
|
58
|
+
COUNT(*) FILTER (WHERE status = 'pending')::int AS pending,
|
|
59
|
+
COUNT(*) FILTER (WHERE status = 'running')::int AS running,
|
|
60
|
+
(percentile_cont(0.95) WITHIN GROUP (
|
|
61
|
+
ORDER BY EXTRACT(EPOCH FROM (now() - claimed_at)) * 1000
|
|
62
|
+
) FILTER (WHERE status = 'running' AND claimed_at IS NOT NULL))::int
|
|
63
|
+
AS claimed_age_p95_ms
|
|
64
|
+
FROM job_run
|
|
65
|
+
WHERE status IN ('pending','running')
|
|
66
|
+
GROUP BY pool
|
|
67
|
+
ORDER BY pool
|
|
68
|
+
`);
|
|
69
|
+
|
|
70
|
+
const rows = extractRows<{
|
|
71
|
+
name: string;
|
|
72
|
+
pending: number;
|
|
73
|
+
running: number;
|
|
74
|
+
claimed_age_p95_ms: number | null;
|
|
75
|
+
}>(result);
|
|
76
|
+
|
|
77
|
+
return rows.map((r) => ({
|
|
78
|
+
name: r.name,
|
|
79
|
+
pending: r.pending,
|
|
80
|
+
running: r.running,
|
|
81
|
+
claimedAgeP95Ms: r.claimed_age_p95_ms,
|
|
82
|
+
}));
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
async getRecentSyncRuns(
|
|
86
|
+
limit: number,
|
|
87
|
+
integrationId?: string,
|
|
88
|
+
): Promise<SyncRunSummary[]> {
|
|
89
|
+
// Join to sync_subscriptions to recover integration/adapter/domain so
|
|
90
|
+
// callers don't re-hydrate the subscription row themselves. Upstream
|
|
91
|
+
// sync_runs owns only subscription_id; the enrichment columns live on
|
|
92
|
+
// the subscription side.
|
|
93
|
+
const base = this.db
|
|
94
|
+
.select({
|
|
95
|
+
id: syncRuns.id,
|
|
96
|
+
subscriptionId: syncRuns.subscriptionId,
|
|
97
|
+
integrationId: syncSubscriptions.integrationId,
|
|
98
|
+
adapter: syncSubscriptions.adapter,
|
|
99
|
+
domain: syncSubscriptions.domain,
|
|
100
|
+
direction: syncRuns.direction,
|
|
101
|
+
action: syncRuns.action,
|
|
102
|
+
status: syncRuns.status,
|
|
103
|
+
recordsFound: syncRuns.recordsFound,
|
|
104
|
+
recordsProcessed: syncRuns.recordsProcessed,
|
|
105
|
+
durationMs: syncRuns.durationMs,
|
|
106
|
+
error: syncRuns.error,
|
|
107
|
+
startedAt: syncRuns.startedAt,
|
|
108
|
+
completedAt: syncRuns.completedAt,
|
|
109
|
+
})
|
|
110
|
+
.from(syncRuns)
|
|
111
|
+
.innerJoin(
|
|
112
|
+
syncSubscriptions,
|
|
113
|
+
eq(syncRuns.subscriptionId, syncSubscriptions.id),
|
|
114
|
+
);
|
|
115
|
+
|
|
116
|
+
const filtered =
|
|
117
|
+
integrationId !== undefined
|
|
118
|
+
? base.where(eq(syncSubscriptions.integrationId, integrationId))
|
|
119
|
+
: base;
|
|
120
|
+
|
|
121
|
+
const rows = await filtered.orderBy(desc(syncRuns.startedAt)).limit(limit);
|
|
122
|
+
|
|
123
|
+
return rows.map((r) => ({
|
|
124
|
+
id: r.id,
|
|
125
|
+
subscriptionId: r.subscriptionId,
|
|
126
|
+
integrationId: r.integrationId,
|
|
127
|
+
adapter: r.adapter,
|
|
128
|
+
domain: r.domain,
|
|
129
|
+
direction: r.direction,
|
|
130
|
+
action: r.action,
|
|
131
|
+
status: r.status,
|
|
132
|
+
recordsFound: r.recordsFound,
|
|
133
|
+
recordsProcessed: r.recordsProcessed,
|
|
134
|
+
durationMs: r.durationMs,
|
|
135
|
+
error: r.error,
|
|
136
|
+
startedAt: r.startedAt,
|
|
137
|
+
completedAt: r.completedAt,
|
|
138
|
+
}));
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
async getBridgeDeliveryHistogram(
|
|
142
|
+
windowHours: number,
|
|
143
|
+
): Promise<StatusHistogram> {
|
|
144
|
+
// Window on COALESCE(delivered_at, attempted_at) so terminal skipped/
|
|
145
|
+
// failed rows (which never get delivered_at) are counted alongside
|
|
146
|
+
// delivered rows. The histogram is a flat Record<status, count>.
|
|
147
|
+
const result = await this.db.execute(sql`
|
|
148
|
+
SELECT status, COUNT(*)::int AS count
|
|
149
|
+
FROM bridge_delivery
|
|
150
|
+
WHERE COALESCE(delivered_at, attempted_at) > now() - make_interval(hours => ${windowHours})
|
|
151
|
+
GROUP BY status
|
|
152
|
+
`);
|
|
153
|
+
|
|
154
|
+
const rows = extractRows<{ status: string; count: number }>(result);
|
|
155
|
+
const hist: StatusHistogram = {};
|
|
156
|
+
for (const r of rows) hist[r.status] = r.count;
|
|
157
|
+
return hist;
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
async getRecentFailedJobs(limit: number): Promise<JobRunFailure[]> {
|
|
161
|
+
const rows = await this.db
|
|
162
|
+
.select({
|
|
163
|
+
id: jobRuns.id,
|
|
164
|
+
jobType: jobRuns.jobType,
|
|
165
|
+
pool: jobRuns.pool,
|
|
166
|
+
status: jobRuns.status,
|
|
167
|
+
error: jobRuns.error,
|
|
168
|
+
startedAt: jobRuns.startedAt,
|
|
169
|
+
finishedAt: jobRuns.finishedAt,
|
|
170
|
+
attempts: jobRuns.attempts,
|
|
171
|
+
})
|
|
172
|
+
.from(jobRuns)
|
|
173
|
+
.where(eq(jobRuns.status, 'failed'))
|
|
174
|
+
.orderBy(desc(jobRuns.finishedAt))
|
|
175
|
+
.limit(limit);
|
|
176
|
+
|
|
177
|
+
return rows.map((r) => ({
|
|
178
|
+
id: r.id,
|
|
179
|
+
jobType: r.jobType,
|
|
180
|
+
pool: r.pool,
|
|
181
|
+
status: r.status,
|
|
182
|
+
error: r.error,
|
|
183
|
+
startedAt: r.startedAt,
|
|
184
|
+
finishedAt: r.finishedAt,
|
|
185
|
+
attempts: r.attempts,
|
|
186
|
+
}));
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
async getCursors(): Promise<CursorSnapshot[]> {
|
|
190
|
+
const rows = await this.db
|
|
191
|
+
.select({
|
|
192
|
+
id: syncSubscriptions.id,
|
|
193
|
+
integrationId: syncSubscriptions.integrationId,
|
|
194
|
+
adapter: syncSubscriptions.adapter,
|
|
195
|
+
domain: syncSubscriptions.domain,
|
|
196
|
+
cursor: syncSubscriptions.cursor,
|
|
197
|
+
lastSyncAt: syncSubscriptions.lastSyncAt,
|
|
198
|
+
})
|
|
199
|
+
.from(syncSubscriptions)
|
|
200
|
+
.where(eq(syncSubscriptions.enabled, true))
|
|
201
|
+
.orderBy(syncSubscriptions.integrationId, syncSubscriptions.domain);
|
|
202
|
+
|
|
203
|
+
return rows.map((r) => ({
|
|
204
|
+
subscriptionId: r.id,
|
|
205
|
+
integrationId: r.integrationId,
|
|
206
|
+
adapter: r.adapter,
|
|
207
|
+
domain: r.domain,
|
|
208
|
+
lastCursor: r.cursor,
|
|
209
|
+
lastSyncAt: r.lastSyncAt,
|
|
210
|
+
}));
|
|
211
|
+
}
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
/**
|
|
215
|
+
* Normalize `db.execute()` return shape. `node-postgres` returns `{ rows: [] }`
|
|
216
|
+
* while some pg-compatible drivers return the row array directly.
|
|
217
|
+
*/
|
|
218
|
+
function extractRows<T>(result: unknown): T[] {
|
|
219
|
+
const maybe = result as { rows?: unknown };
|
|
220
|
+
if (Array.isArray(maybe.rows)) return maybe.rows as T[];
|
|
221
|
+
if (Array.isArray(result)) return result as T[];
|
|
222
|
+
return [];
|
|
223
|
+
}
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* MemoryObservabilityService — in-memory test backend for
|
|
3
|
+
* IObservabilityService.
|
|
4
|
+
*
|
|
5
|
+
* Stores snapshot data set by the test harness and returns it verbatim.
|
|
6
|
+
* This is deliberately NOT a replay-from-events simulator — the point of
|
|
7
|
+
* the memory backend is to let tests assert "when the subsystem returns
|
|
8
|
+
* X, does the caller render Y correctly?" without standing up Postgres.
|
|
9
|
+
*
|
|
10
|
+
* Tests populate the backend via the `seed*` methods, then exercise the
|
|
11
|
+
* protocol reads. Each seed method replaces (not merges) its slice, which
|
|
12
|
+
* keeps the mental model simple: the backend is a fixture holder.
|
|
13
|
+
*
|
|
14
|
+
* No lifecycle hooks (no background work to manage).
|
|
15
|
+
*/
|
|
16
|
+
import { Injectable } from '@nestjs/common';
|
|
17
|
+
import type {
|
|
18
|
+
CursorSnapshot,
|
|
19
|
+
IObservabilityService,
|
|
20
|
+
JobRunFailure,
|
|
21
|
+
PoolDepth,
|
|
22
|
+
StatusHistogram,
|
|
23
|
+
SyncRunSummary,
|
|
24
|
+
} from './observability.protocol';
|
|
25
|
+
|
|
26
|
+
@Injectable()
|
|
27
|
+
export class MemoryObservabilityService implements IObservabilityService {
|
|
28
|
+
private pools: PoolDepth[] = [];
|
|
29
|
+
private syncRuns: SyncRunSummary[] = [];
|
|
30
|
+
private bridgeHistogram: StatusHistogram = {};
|
|
31
|
+
private failedJobs: JobRunFailure[] = [];
|
|
32
|
+
private cursors: CursorSnapshot[] = [];
|
|
33
|
+
|
|
34
|
+
// ─── Core contract ─────────────────────────────────────────────────────
|
|
35
|
+
|
|
36
|
+
async getPoolDepths(): Promise<PoolDepth[]> {
|
|
37
|
+
return [...this.pools];
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
async getRecentSyncRuns(
|
|
41
|
+
limit: number,
|
|
42
|
+
integrationId?: string,
|
|
43
|
+
): Promise<SyncRunSummary[]> {
|
|
44
|
+
const filtered =
|
|
45
|
+
integrationId !== undefined
|
|
46
|
+
? this.syncRuns.filter((r) => r.integrationId === integrationId)
|
|
47
|
+
: this.syncRuns;
|
|
48
|
+
return filtered
|
|
49
|
+
.slice()
|
|
50
|
+
.sort((a, b) => b.startedAt.getTime() - a.startedAt.getTime())
|
|
51
|
+
.slice(0, limit);
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
async getBridgeDeliveryHistogram(
|
|
55
|
+
_windowHours: number,
|
|
56
|
+
): Promise<StatusHistogram> {
|
|
57
|
+
// Memory backend ignores the window — tests that care about windowing
|
|
58
|
+
// should seed the histogram for the window they're simulating.
|
|
59
|
+
return { ...this.bridgeHistogram };
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
async getRecentFailedJobs(limit: number): Promise<JobRunFailure[]> {
|
|
63
|
+
return this.failedJobs
|
|
64
|
+
.slice()
|
|
65
|
+
.sort(
|
|
66
|
+
(a, b) =>
|
|
67
|
+
(b.finishedAt?.getTime() ?? 0) - (a.finishedAt?.getTime() ?? 0),
|
|
68
|
+
)
|
|
69
|
+
.slice(0, limit);
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
async getCursors(): Promise<CursorSnapshot[]> {
|
|
73
|
+
return [...this.cursors];
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
// ─── Test seams ────────────────────────────────────────────────────────
|
|
77
|
+
|
|
78
|
+
/** Replace the pool-depth slice. */
|
|
79
|
+
seedPools(pools: PoolDepth[]): void {
|
|
80
|
+
this.pools = [...pools];
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
/** Replace the sync-run slice. */
|
|
84
|
+
seedSyncRuns(runs: SyncRunSummary[]): void {
|
|
85
|
+
this.syncRuns = [...runs];
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
/** Replace the bridge-delivery histogram. */
|
|
89
|
+
seedBridgeHistogram(hist: StatusHistogram): void {
|
|
90
|
+
this.bridgeHistogram = { ...hist };
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
/** Replace the failed-jobs slice. */
|
|
94
|
+
seedFailedJobs(jobs: JobRunFailure[]): void {
|
|
95
|
+
this.failedJobs = [...jobs];
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
/** Replace the cursor slice. */
|
|
99
|
+
seedCursors(cursors: CursorSnapshot[]): void {
|
|
100
|
+
this.cursors = [...cursors];
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
/** Reset every slice — for afterEach hooks. */
|
|
104
|
+
reset(): void {
|
|
105
|
+
this.pools = [];
|
|
106
|
+
this.syncRuns = [];
|
|
107
|
+
this.bridgeHistogram = {};
|
|
108
|
+
this.failedJobs = [];
|
|
109
|
+
this.cursors = [];
|
|
110
|
+
}
|
|
111
|
+
}
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* ObservabilityModule — DynamicModule factory for the observability
|
|
3
|
+
* subsystem (ADR-008, 5th subsystem).
|
|
4
|
+
*
|
|
5
|
+
* Usage in AppModule:
|
|
6
|
+
* ```typescript
|
|
7
|
+
* ObservabilityModule.forRoot({
|
|
8
|
+
* backend: 'drizzle',
|
|
9
|
+
* reporters: { bridgeMetrics: true }, // optional — requires bridge subsystem
|
|
10
|
+
* })
|
|
11
|
+
* ```
|
|
12
|
+
*
|
|
13
|
+
* Usage in tests:
|
|
14
|
+
* ```typescript
|
|
15
|
+
* ObservabilityModule.forRoot({ backend: 'memory' })
|
|
16
|
+
* ```
|
|
17
|
+
*
|
|
18
|
+
* `global: true` means any module that needs `IObservabilityService` can
|
|
19
|
+
* inject `OBSERVABILITY` without importing this module. Register once in
|
|
20
|
+
* AppModule.
|
|
21
|
+
*
|
|
22
|
+
* The drizzle backend requires `DRIZZLE` to be provided globally (e.g.,
|
|
23
|
+
* via DatabaseModule). The memory backend has no dependencies.
|
|
24
|
+
*
|
|
25
|
+
* # Reporters
|
|
26
|
+
*
|
|
27
|
+
* Reporters are orthogonal to backends — they compose on top of either
|
|
28
|
+
* drizzle or memory. The `reporters.bridgeMetrics` flag enables the
|
|
29
|
+
* `BridgeMetricsReporter` sampler. Gated because the reporter imports the
|
|
30
|
+
* bridge + events schemas; consumers without the bridge subsystem should
|
|
31
|
+
* leave it off (the default).
|
|
32
|
+
*
|
|
33
|
+
* `ScheduleModule.forRoot()` is imported conditionally — only when a
|
|
34
|
+
* reporter that needs it is enabled. Keeps the module dependency-light
|
|
35
|
+
* for consumers that only want the read surface.
|
|
36
|
+
*/
|
|
37
|
+
import { type DynamicModule, Module } from '@nestjs/common';
|
|
38
|
+
|
|
39
|
+
import { DrizzleObservabilityService } from './observability.drizzle-backend';
|
|
40
|
+
import { MemoryObservabilityService } from './observability.memory-backend';
|
|
41
|
+
import {
|
|
42
|
+
OBSERVABILITY,
|
|
43
|
+
OBSERVABILITY_REPORTERS,
|
|
44
|
+
} from './observability.tokens';
|
|
45
|
+
|
|
46
|
+
export interface ObservabilityReporterOptions {
|
|
47
|
+
/**
|
|
48
|
+
* Register `BridgeMetricsReporter` — periodic log sampler over
|
|
49
|
+
* `bridge_delivery`. Requires the bridge subsystem (schemas imported
|
|
50
|
+
* transitively). Defaults to `false`.
|
|
51
|
+
*/
|
|
52
|
+
bridgeMetrics?: boolean;
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
export interface ObservabilityModuleOptions {
|
|
56
|
+
backend: 'drizzle' | 'memory';
|
|
57
|
+
reporters?: ObservabilityReporterOptions;
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
@Module({})
|
|
61
|
+
export class ObservabilityModule {
|
|
62
|
+
static forRoot(
|
|
63
|
+
options: ObservabilityModuleOptions = { backend: 'drizzle' },
|
|
64
|
+
): DynamicModule {
|
|
65
|
+
const ConcreteClass =
|
|
66
|
+
options.backend === 'drizzle'
|
|
67
|
+
? DrizzleObservabilityService
|
|
68
|
+
: MemoryObservabilityService;
|
|
69
|
+
|
|
70
|
+
const wantsBridgeMetrics = options.reporters?.bridgeMetrics === true;
|
|
71
|
+
|
|
72
|
+
const providers: DynamicModule['providers'] = [
|
|
73
|
+
// Register the concrete class as the canonical instance.
|
|
74
|
+
ConcreteClass,
|
|
75
|
+
// OBSERVABILITY token points at the same instance — no duplicate.
|
|
76
|
+
{ provide: OBSERVABILITY, useExisting: ConcreteClass },
|
|
77
|
+
// Expose the resolved reporter config for introspection / tests.
|
|
78
|
+
{
|
|
79
|
+
provide: OBSERVABILITY_REPORTERS,
|
|
80
|
+
useValue: options.reporters ?? {},
|
|
81
|
+
},
|
|
82
|
+
];
|
|
83
|
+
|
|
84
|
+
const exports: DynamicModule['exports'] = [OBSERVABILITY];
|
|
85
|
+
if (wantsBridgeMetrics) {
|
|
86
|
+
// Lazy-require keeps the reporter file (and its @nestjs/schedule +
|
|
87
|
+
// bridge schema imports) off the hot path for consumers who don't
|
|
88
|
+
// enable the reporter.
|
|
89
|
+
// eslint-disable-next-line @typescript-eslint/no-var-requires
|
|
90
|
+
const { BridgeMetricsReporter } = require('./reporters/bridge-metrics.reporter');
|
|
91
|
+
providers.push(BridgeMetricsReporter);
|
|
92
|
+
exports.push(BridgeMetricsReporter);
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
// ScheduleModule is a PEER dep (optional) — only resolved when a
|
|
96
|
+
// reporter that uses it is enabled. Consumers using the read-only
|
|
97
|
+
// surface (default) are free of the @nestjs/schedule install tax.
|
|
98
|
+
const imports: DynamicModule['imports'] = [];
|
|
99
|
+
if (wantsBridgeMetrics) {
|
|
100
|
+
// Lazy-require: avoids parse-time failure for consumers that haven't
|
|
101
|
+
// installed @nestjs/schedule and don't need reporters.
|
|
102
|
+
// eslint-disable-next-line @typescript-eslint/no-var-requires
|
|
103
|
+
const { ScheduleModule } = require('@nestjs/schedule');
|
|
104
|
+
imports.push(ScheduleModule.forRoot());
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
return {
|
|
108
|
+
module: ObservabilityModule,
|
|
109
|
+
global: true,
|
|
110
|
+
imports,
|
|
111
|
+
providers,
|
|
112
|
+
exports,
|
|
113
|
+
};
|
|
114
|
+
}
|
|
115
|
+
}
|
|
@@ -0,0 +1,167 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* IObservabilityService — core contract for the observability subsystem
|
|
3
|
+
* (ADR-008, 5th subsystem in the infrastructure family alongside events,
|
|
4
|
+
* jobs, cache, storage).
|
|
5
|
+
*
|
|
6
|
+
* The contract is a **read-only reflection surface** over framework-owned
|
|
7
|
+
* tables (`job_run`, `bridge_delivery`, `domain_events`, `sync_runs`,
|
|
8
|
+
* `sync_subscriptions`). The subsystem itself owns no tables — it's a query
|
|
9
|
+
* facade over state the other subsystems already persist.
|
|
10
|
+
*
|
|
11
|
+
* # Core + extensions (per CLAUDE.md "Backend swappability")
|
|
12
|
+
*
|
|
13
|
+
* The five methods below are the **core contract** — every backend MUST
|
|
14
|
+
* implement them. App code that calls only these is portable across
|
|
15
|
+
* backends (drizzle / memory / future OpenTelemetry exporter / etc).
|
|
16
|
+
*
|
|
17
|
+
* Backend-specific capabilities (e.g. Postgres `pg_stat_activity` sampling,
|
|
18
|
+
* an OTel span exporter, a Prometheus scrape endpoint) are exposed as
|
|
19
|
+
* **extensions** on the concrete backend class, not lifted into this
|
|
20
|
+
* interface. Consumers opting into extensions accept backend-specific
|
|
21
|
+
* coupling — that's the whole point; the core contract is what guarantees
|
|
22
|
+
* portability.
|
|
23
|
+
*
|
|
24
|
+
* # The five core methods
|
|
25
|
+
*
|
|
26
|
+
* Finalized against two concrete consumers in `dealbrain-v2`:
|
|
27
|
+
* - `BridgeMetricsReporter` (60s sampler over `bridge_delivery`)
|
|
28
|
+
* - `StackStatusService` (on-demand `GET /dev/status` snapshot).
|
|
29
|
+
*
|
|
30
|
+
* Every distinct SQL query those two files run is covered by one of these
|
|
31
|
+
* five methods (or relocated entirely — see `reporters/`).
|
|
32
|
+
*/
|
|
33
|
+
export interface IObservabilityService {
|
|
34
|
+
/**
|
|
35
|
+
* Current pool depths for the jobs subsystem.
|
|
36
|
+
*
|
|
37
|
+
* One row per pool that has at least one pending or running `job_run`.
|
|
38
|
+
* Empty pools (no activity) are omitted — the surface is "what's
|
|
39
|
+
* active", not a pool-config dump.
|
|
40
|
+
*
|
|
41
|
+
* `claimedAgeP95Ms` is the p95 of `(now - claimed_at)` in milliseconds
|
|
42
|
+
* over currently-running runs, or `null` when the pool has no running
|
|
43
|
+
* runs. Useful for spotting stuck workers.
|
|
44
|
+
*/
|
|
45
|
+
getPoolDepths(): Promise<PoolDepth[]>;
|
|
46
|
+
|
|
47
|
+
/**
|
|
48
|
+
* Recent sync_runs, most-recent-first.
|
|
49
|
+
*
|
|
50
|
+
* When `integrationId` is provided, the query filters to that integration;
|
|
51
|
+
* when omitted, returns the N most recent runs across all integrations.
|
|
52
|
+
* For "last N per integration" fan-out, callers run the method per
|
|
53
|
+
* integration id rather than adding a per-group LATERAL variant to the
|
|
54
|
+
* core — LATERAL is a Postgres-ism that doesn't port cleanly to memory
|
|
55
|
+
* or hypothetical OTel/Redis backends.
|
|
56
|
+
*
|
|
57
|
+
* @param limit cap on rows returned
|
|
58
|
+
* @param integrationId optional integration filter
|
|
59
|
+
*/
|
|
60
|
+
getRecentSyncRuns(
|
|
61
|
+
limit: number,
|
|
62
|
+
integrationId?: string,
|
|
63
|
+
): Promise<SyncRunSummary[]>;
|
|
64
|
+
|
|
65
|
+
/**
|
|
66
|
+
* Count of `bridge_delivery` rows grouped by terminal status over a
|
|
67
|
+
* trailing window.
|
|
68
|
+
*
|
|
69
|
+
* The window is measured against `COALESCE(delivered_at, attempted_at)`
|
|
70
|
+
* so terminal `skipped` / `failed` rows are counted alongside
|
|
71
|
+
* `delivered`. Rows still `pending` at query time appear under
|
|
72
|
+
* `'pending'` if they fall in the window.
|
|
73
|
+
*
|
|
74
|
+
* @param windowHours trailing window size; typical values are 1h
|
|
75
|
+
* (dashboards) or 24h (daily summary).
|
|
76
|
+
*/
|
|
77
|
+
getBridgeDeliveryHistogram(windowHours: number): Promise<StatusHistogram>;
|
|
78
|
+
|
|
79
|
+
/**
|
|
80
|
+
* Most recent `job_run` rows with `status = 'failed'`, newest-first.
|
|
81
|
+
*
|
|
82
|
+
* Intended for on-demand ops drill-down (dashboard panel, `/dev/status`
|
|
83
|
+
* endpoint). Consumers that need structured alerting should subscribe to
|
|
84
|
+
* job events via the jobs subsystem directly rather than polling this.
|
|
85
|
+
*/
|
|
86
|
+
getRecentFailedJobs(limit: number): Promise<JobRunFailure[]>;
|
|
87
|
+
|
|
88
|
+
/**
|
|
89
|
+
* Cursor state per enabled `sync_subscriptions` row.
|
|
90
|
+
*
|
|
91
|
+
* Returns the opaque cursor payload verbatim — strategies type it
|
|
92
|
+
* internally (poll: `{ systemModstamp }`, cdc: `{ replayId }`, webhook:
|
|
93
|
+
* `{ ts }`), but the observability surface stays untyped so it works
|
|
94
|
+
* across adapter shapes.
|
|
95
|
+
*/
|
|
96
|
+
getCursors(): Promise<CursorSnapshot[]>;
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
// ─── Return shapes ───────────────────────────────────────────────────────
|
|
100
|
+
|
|
101
|
+
export interface PoolDepth {
|
|
102
|
+
/** Pool name (matches `job_run.pool`). */
|
|
103
|
+
name: string;
|
|
104
|
+
/** Count of `status = 'pending'` runs. */
|
|
105
|
+
pending: number;
|
|
106
|
+
/** Count of `status = 'running'` runs. */
|
|
107
|
+
running: number;
|
|
108
|
+
/**
|
|
109
|
+
* p95 of `now - claimed_at` in ms over currently-running runs, or null
|
|
110
|
+
* when the pool has no running runs.
|
|
111
|
+
*/
|
|
112
|
+
claimedAgeP95Ms: number | null;
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
export interface SyncRunSummary {
|
|
116
|
+
id: string;
|
|
117
|
+
/** Subscription id the run belongs to (FK → `sync_subscriptions.id`). */
|
|
118
|
+
subscriptionId: string;
|
|
119
|
+
/**
|
|
120
|
+
* Integration id — recovered via join on `sync_subscriptions` so
|
|
121
|
+
* consumers don't have to re-hydrate the subscription to answer
|
|
122
|
+
* "which integration ran?".
|
|
123
|
+
*/
|
|
124
|
+
integrationId: string | null;
|
|
125
|
+
/** Adapter label from the subscription (e.g. `'salesforce'`). */
|
|
126
|
+
adapter: string | null;
|
|
127
|
+
/** Domain label from the subscription (e.g. `'opportunity'`). */
|
|
128
|
+
domain: string | null;
|
|
129
|
+
direction: string;
|
|
130
|
+
action: string;
|
|
131
|
+
status: string;
|
|
132
|
+
recordsFound: number;
|
|
133
|
+
recordsProcessed: number;
|
|
134
|
+
durationMs: number | null;
|
|
135
|
+
error: string | null;
|
|
136
|
+
startedAt: Date;
|
|
137
|
+
completedAt: Date | null;
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
/**
|
|
141
|
+
* Histogram of bridge-delivery rows keyed by status. Keys are a subset of
|
|
142
|
+
* `'pending' | 'delivered' | 'skipped' | 'failed'`; statuses with zero
|
|
143
|
+
* rows in the window are omitted.
|
|
144
|
+
*/
|
|
145
|
+
export type StatusHistogram = Record<string, number>;
|
|
146
|
+
|
|
147
|
+
export interface JobRunFailure {
|
|
148
|
+
id: string;
|
|
149
|
+
jobType: string;
|
|
150
|
+
pool: string;
|
|
151
|
+
status: string;
|
|
152
|
+
error: unknown;
|
|
153
|
+
startedAt: Date | null;
|
|
154
|
+
finishedAt: Date | null;
|
|
155
|
+
attempts: number;
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
export interface CursorSnapshot {
|
|
159
|
+
/** `sync_subscriptions.id`. */
|
|
160
|
+
subscriptionId: string;
|
|
161
|
+
integrationId: string;
|
|
162
|
+
adapter: string;
|
|
163
|
+
domain: string;
|
|
164
|
+
/** Opaque cursor payload; null until the first successful run advances it. */
|
|
165
|
+
lastCursor: unknown;
|
|
166
|
+
lastSyncAt: Date | null;
|
|
167
|
+
}
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Injection token for the observability service (ADR-008, 5th subsystem).
|
|
3
|
+
*
|
|
4
|
+
* ```typescript
|
|
5
|
+
* constructor(@Inject(OBSERVABILITY) private readonly obs: IObservabilityService) {}
|
|
6
|
+
* ```
|
|
7
|
+
*
|
|
8
|
+
* Per ADR-008, tokens use `Symbol()` for collision avoidance.
|
|
9
|
+
*/
|
|
10
|
+
export const OBSERVABILITY = Symbol('OBSERVABILITY');
|
|
11
|
+
|
|
12
|
+
/**
|
|
13
|
+
* Opt-in config token that tells the module whether to register the
|
|
14
|
+
* `BridgeMetricsReporter` sampler. Consumers without the bridge subsystem
|
|
15
|
+
* leave this `false` (the default) so the module doesn't import the
|
|
16
|
+
* reporter's bridge-schema deps.
|
|
17
|
+
*/
|
|
18
|
+
export const OBSERVABILITY_REPORTERS = Symbol('OBSERVABILITY_REPORTERS');
|