@xeonr/upload-pool-sdk 1.2.0 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/job-context.d.ts +19 -1
- package/dist/job-context.d.ts.map +1 -1
- package/dist/job-context.js +231 -140
- package/dist/job-context.js.map +1 -1
- package/dist/pool.d.ts +23 -0
- package/dist/pool.d.ts.map +1 -1
- package/dist/pool.js +152 -40
- package/dist/pool.js.map +1 -1
- package/dist/rpc-clients.d.ts.map +1 -1
- package/dist/rpc-clients.js +139 -28
- package/dist/rpc-clients.js.map +1 -1
- package/dist/tracing.d.ts +107 -0
- package/dist/tracing.d.ts.map +1 -0
- package/dist/tracing.js +270 -0
- package/dist/tracing.js.map +1 -0
- package/dist/types.d.ts +8 -0
- package/dist/types.d.ts.map +1 -1
- package/package.json +8 -1
- package/src/job-context.ts +330 -150
- package/src/pool.ts +188 -49
- package/src/rpc-clients.ts +168 -28
- package/src/tracing.ts +333 -0
- package/src/types.ts +8 -0
package/src/pool.ts
CHANGED
|
@@ -12,6 +12,7 @@
|
|
|
12
12
|
import { hostname } from "node:os";
|
|
13
13
|
import { randomBytes } from "node:crypto";
|
|
14
14
|
import { create } from "@bufbuild/protobuf";
|
|
15
|
+
import { SpanStatusCode } from "@opentelemetry/api";
|
|
15
16
|
import {
|
|
16
17
|
AcceptJobRequestSchema,
|
|
17
18
|
CompleteJobRequestSchema,
|
|
@@ -22,6 +23,13 @@ import { SseClient } from "./sse-client.js";
|
|
|
22
23
|
import { createJobContext, type JobEnvelope } from "./job-context.js";
|
|
23
24
|
import { NonRetryableError } from "./errors.js";
|
|
24
25
|
import { JsonLogger, type Logger } from "./logger.js";
|
|
26
|
+
import {
|
|
27
|
+
initTracing,
|
|
28
|
+
shutdownTracing,
|
|
29
|
+
recordSpanError,
|
|
30
|
+
stampJobAttributes,
|
|
31
|
+
type TracingHandle,
|
|
32
|
+
} from "./tracing.js";
|
|
25
33
|
import type {
|
|
26
34
|
JobContext,
|
|
27
35
|
JobHandler,
|
|
@@ -33,9 +41,13 @@ export class Pool {
|
|
|
33
41
|
private readonly rpc: RpcClients;
|
|
34
42
|
private readonly sse: SseClient;
|
|
35
43
|
private readonly logger: Logger;
|
|
44
|
+
private readonly tracing: TracingHandle;
|
|
36
45
|
private inFlight = 0;
|
|
37
46
|
private readonly workerId: string;
|
|
38
47
|
private readonly capabilities: string[];
|
|
48
|
+
private runningPromise: Promise<void> | null = null;
|
|
49
|
+
private runningResolve: (() => void) | null = null;
|
|
50
|
+
private keepAliveInterval: NodeJS.Timeout | null = null;
|
|
39
51
|
|
|
40
52
|
constructor(config: PoolConfig) {
|
|
41
53
|
this.config = {
|
|
@@ -47,6 +59,19 @@ export class Pool {
|
|
|
47
59
|
this.logger = (config.logger ?? new JsonLogger()).child({
|
|
48
60
|
workerId: this.workerId,
|
|
49
61
|
});
|
|
62
|
+
|
|
63
|
+
// Initialise tracing *before* RPC clients so the interceptor in
|
|
64
|
+
// rpc-clients picks up an active TracerProvider on its first call.
|
|
65
|
+
// Spans go to pipeline-api's /v1/traces receiver; see tracing.ts
|
|
66
|
+
// for the isolation rationale wrt the host app's OTel pipeline.
|
|
67
|
+
this.tracing = initTracing({
|
|
68
|
+
pipelineEndpoint: config.pipelineEndpoint,
|
|
69
|
+
poolToken: config.token,
|
|
70
|
+
workerId: this.workerId,
|
|
71
|
+
sdkVersion: SDK_VERSION,
|
|
72
|
+
enabled: config.tracing?.enabled,
|
|
73
|
+
});
|
|
74
|
+
|
|
50
75
|
this.rpc = createRpcClients({
|
|
51
76
|
apiEndpoint: config.apiEndpoint,
|
|
52
77
|
pipelineEndpoint: config.pipelineEndpoint,
|
|
@@ -82,14 +107,58 @@ export class Pool {
|
|
|
82
107
|
}
|
|
83
108
|
}
|
|
84
109
|
|
|
110
|
+
/**
|
|
111
|
+
* Start the pool and keep the worker process alive until `stop()` is
|
|
112
|
+
* called.
|
|
113
|
+
*
|
|
114
|
+
* We deliberately return a never-resolving promise (resolved only by
|
|
115
|
+
* stop()) for two reasons:
|
|
116
|
+
*
|
|
117
|
+
* 1. ESM top-level `await pool.start()` is the documented worker
|
|
118
|
+
* entrypoint. If start() resolved as soon as the SSE client was
|
|
119
|
+
* kicked off, the script would end and Node would exit — the
|
|
120
|
+
* EventSource's underlying fetch socket isn't enough to hold the
|
|
121
|
+
* event loop open by itself in Node 20+ (undici unrefs idle
|
|
122
|
+
* connections under some conditions).
|
|
123
|
+
* 2. A keep-alive interval lives next to the unresolved promise so
|
|
124
|
+
* even if every socket is unrefed, the loop stays open.
|
|
125
|
+
*
|
|
126
|
+
* Callers that integrate the pool into a larger app (where some other
|
|
127
|
+
* piece holds the event loop) can ignore the returned promise.
|
|
128
|
+
*/
|
|
85
129
|
async start(): Promise<void> {
|
|
130
|
+
if (this.runningPromise) {
|
|
131
|
+
this.logger.warn("sdk.start.already_running");
|
|
132
|
+
return this.runningPromise;
|
|
133
|
+
}
|
|
86
134
|
this.logger.info("sdk.start");
|
|
87
135
|
this.sse.start();
|
|
136
|
+
this.runningPromise = new Promise<void>((resolve) => {
|
|
137
|
+
this.runningResolve = resolve;
|
|
138
|
+
});
|
|
139
|
+
// 60s heartbeat — also doubles as a "still alive" telemetry tick
|
|
140
|
+
// for log-based dashboards.
|
|
141
|
+
this.keepAliveInterval = setInterval(() => {
|
|
142
|
+
this.logger.debug("sdk.heartbeat", { inFlight: this.inFlight });
|
|
143
|
+
}, 60_000);
|
|
144
|
+
return this.runningPromise;
|
|
88
145
|
}
|
|
89
146
|
|
|
90
147
|
async stop(): Promise<void> {
|
|
91
148
|
this.logger.info("sdk.stop", { inFlight: this.inFlight });
|
|
92
149
|
this.sse.stop();
|
|
150
|
+
if (this.keepAliveInterval) {
|
|
151
|
+
clearInterval(this.keepAliveInterval);
|
|
152
|
+
this.keepAliveInterval = null;
|
|
153
|
+
}
|
|
154
|
+
// Flush any buffered spans before exit so the final job's traces
|
|
155
|
+
// land in the admin UI even on a clean SIGTERM.
|
|
156
|
+
await shutdownTracing();
|
|
157
|
+
if (this.runningResolve) {
|
|
158
|
+
this.runningResolve();
|
|
159
|
+
this.runningResolve = null;
|
|
160
|
+
}
|
|
161
|
+
this.runningPromise = null;
|
|
93
162
|
}
|
|
94
163
|
|
|
95
164
|
private async handleDispatch(envelope: JobEnvelope): Promise<void> {
|
|
@@ -110,61 +179,131 @@ export class Pool {
|
|
|
110
179
|
return;
|
|
111
180
|
}
|
|
112
181
|
this.inFlight++;
|
|
113
|
-
jobLogger.info("job.dispatched", {
|
|
114
|
-
filename: envelope.filename,
|
|
115
|
-
mimeType: envelope.mimeType,
|
|
116
|
-
inFlight: this.inFlight,
|
|
117
|
-
});
|
|
118
182
|
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
183
|
+
// Root the entire job under iq.job, parented to the dispatching
|
|
184
|
+
// activity's span via the traceparent the pipeline-worker stamped
|
|
185
|
+
// onto the envelope. All ctx.* + rpc.* spans created inside the
|
|
186
|
+
// handler will nest under this via OTel's AsyncLocalStorage-backed
|
|
187
|
+
// context manager.
|
|
188
|
+
const parentCtx = this.tracing.contextFromEnvelope(
|
|
189
|
+
envelope.traceContext ?? "",
|
|
190
|
+
envelope.wfRunId ?? "",
|
|
191
|
+
);
|
|
122
192
|
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
193
|
+
await this.tracing.tracer.startActiveSpan(
|
|
194
|
+
"iq.job",
|
|
195
|
+
{
|
|
196
|
+
attributes: {
|
|
197
|
+
"iq.filename": envelope.filename,
|
|
198
|
+
"iq.mime_type": envelope.mimeType,
|
|
199
|
+
},
|
|
200
|
+
},
|
|
201
|
+
parentCtx,
|
|
202
|
+
async (rootSpan) => {
|
|
203
|
+
stampJobAttributes(rootSpan, {
|
|
204
|
+
runId: envelope.wfRunId ?? "",
|
|
127
205
|
jobId: envelope.jobId,
|
|
206
|
+
uploadId: envelope.uploadId,
|
|
207
|
+
urn: envelope.contentTypeContext.urn,
|
|
128
208
|
workerId: this.workerId,
|
|
129
|
-
|
|
130
|
-
}),
|
|
131
|
-
);
|
|
132
|
-
jobLogger.info("job.accepted");
|
|
209
|
+
});
|
|
133
210
|
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
211
|
+
jobLogger.info("job.dispatched", {
|
|
212
|
+
filename: envelope.filename,
|
|
213
|
+
mimeType: envelope.mimeType,
|
|
214
|
+
inFlight: this.inFlight,
|
|
137
215
|
});
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
jobLogger,
|
|
142
|
-
new NonRetryableError(`no handler for URN ${envelope.contentTypeContext.urn}`),
|
|
143
|
-
);
|
|
144
|
-
return;
|
|
145
|
-
}
|
|
146
|
-
|
|
147
|
-
await handler(ctx);
|
|
148
|
-
|
|
149
|
-
await this.rpc.integrationQueue.completeJob(
|
|
150
|
-
create(CompleteJobRequestSchema, {
|
|
151
|
-
jobId: envelope.jobId,
|
|
216
|
+
|
|
217
|
+
const handler = this.resolveHandler(envelope.contentTypeContext.urn);
|
|
218
|
+
const ctx = createJobContext(envelope, this.rpc, jobLogger, {
|
|
152
219
|
workerId: this.workerId,
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
220
|
+
});
|
|
221
|
+
const startedAt = Date.now();
|
|
222
|
+
|
|
223
|
+
try {
|
|
224
|
+
// AcceptJob — clears the pipeline's accept-timeout.
|
|
225
|
+
await this.rpc.integrationQueue.acceptJob(
|
|
226
|
+
create(AcceptJobRequestSchema, {
|
|
227
|
+
jobId: envelope.jobId,
|
|
228
|
+
workerId: this.workerId,
|
|
229
|
+
queueToken: this.config.token,
|
|
230
|
+
}),
|
|
231
|
+
);
|
|
232
|
+
jobLogger.info("job.accepted");
|
|
233
|
+
|
|
234
|
+
if (!handler) {
|
|
235
|
+
jobLogger.warn("job.unhandled", {
|
|
236
|
+
availableHandlers: this.capabilities,
|
|
237
|
+
});
|
|
238
|
+
await this.reportError(
|
|
239
|
+
envelope.jobId,
|
|
240
|
+
ctx,
|
|
241
|
+
jobLogger,
|
|
242
|
+
new NonRetryableError(
|
|
243
|
+
`no handler for URN ${envelope.contentTypeContext.urn}`,
|
|
244
|
+
),
|
|
245
|
+
);
|
|
246
|
+
rootSpan.setStatus({
|
|
247
|
+
code: SpanStatusCode.ERROR,
|
|
248
|
+
message: "no handler",
|
|
249
|
+
});
|
|
250
|
+
return;
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
// iq.handler wraps the customer's handler function so its
|
|
254
|
+
// own latency is visible as a single row (excluding the
|
|
255
|
+
// accept/complete bookends). Children appear nested.
|
|
256
|
+
await this.tracing.tracer.startActiveSpan(
|
|
257
|
+
"iq.handler",
|
|
258
|
+
async (handlerSpan) => {
|
|
259
|
+
stampJobAttributes(handlerSpan, {
|
|
260
|
+
runId: envelope.wfRunId ?? "",
|
|
261
|
+
jobId: envelope.jobId,
|
|
262
|
+
uploadId: envelope.uploadId,
|
|
263
|
+
urn: envelope.contentTypeContext.urn,
|
|
264
|
+
workerId: this.workerId,
|
|
265
|
+
});
|
|
266
|
+
try {
|
|
267
|
+
await handler(ctx);
|
|
268
|
+
handlerSpan.setStatus({ code: SpanStatusCode.OK });
|
|
269
|
+
} catch (err) {
|
|
270
|
+
recordSpanError(handlerSpan, err);
|
|
271
|
+
throw err;
|
|
272
|
+
} finally {
|
|
273
|
+
handlerSpan.end();
|
|
274
|
+
}
|
|
275
|
+
},
|
|
276
|
+
);
|
|
277
|
+
|
|
278
|
+
await this.rpc.integrationQueue.completeJob(
|
|
279
|
+
create(CompleteJobRequestSchema, {
|
|
280
|
+
jobId: envelope.jobId,
|
|
281
|
+
workerId: this.workerId,
|
|
282
|
+
queueToken: this.config.token,
|
|
283
|
+
}),
|
|
284
|
+
);
|
|
285
|
+
jobLogger.info("job.completed", {
|
|
286
|
+
durationMs: Date.now() - startedAt,
|
|
287
|
+
});
|
|
288
|
+
rootSpan.setStatus({ code: SpanStatusCode.OK });
|
|
289
|
+
} catch (err) {
|
|
290
|
+
jobLogger.error("job.failed", {
|
|
291
|
+
err,
|
|
292
|
+
durationMs: Date.now() - startedAt,
|
|
293
|
+
});
|
|
294
|
+
recordSpanError(rootSpan, err);
|
|
295
|
+
await this.reportError(
|
|
296
|
+
envelope.jobId,
|
|
297
|
+
ctx,
|
|
298
|
+
jobLogger,
|
|
299
|
+
err as Error,
|
|
300
|
+
);
|
|
301
|
+
} finally {
|
|
302
|
+
rootSpan.end();
|
|
303
|
+
this.inFlight--;
|
|
304
|
+
}
|
|
305
|
+
},
|
|
306
|
+
);
|
|
168
307
|
}
|
|
169
308
|
|
|
170
309
|
private resolveHandler(urn: string): JobHandler | undefined {
|
|
@@ -197,4 +336,4 @@ export class Pool {
|
|
|
197
336
|
}
|
|
198
337
|
}
|
|
199
338
|
|
|
200
|
-
const SDK_VERSION = "1.
|
|
339
|
+
const SDK_VERSION = "1.3.0";
|
package/src/rpc-clients.ts
CHANGED
|
@@ -13,10 +13,20 @@
|
|
|
13
13
|
* pipeline-api) so we keep them on separate transports and don't try to
|
|
14
14
|
* share a baseUrl.
|
|
15
15
|
*
|
|
16
|
-
* Each request is wrapped in a logging interceptor
|
|
17
|
-
* `
|
|
18
|
-
*
|
|
19
|
-
*
|
|
16
|
+
* Each request is wrapped in a tracing-and-logging interceptor:
|
|
17
|
+
* - emits one `iq.callback.<Method>` span per call, child of the
|
|
18
|
+
* active job span (so the admin UI sees a row per RPC under the
|
|
19
|
+
* dispatching parseUpload run)
|
|
20
|
+
* - injects W3C `traceparent` on outbound headers *for the pipeline
|
|
21
|
+
* transport only* — calls to `apiEndpoint` ship without traceparent
|
|
22
|
+
* so upl-im-api's own OTel pipeline doesn't pick up our trace_id
|
|
23
|
+
* and conflate the two backends
|
|
24
|
+
* - captures sanitised request/response bodies as span events (4 KB
|
|
25
|
+
* cap, keys matching token/secret/auth/... redacted), so debugging
|
|
26
|
+
* a worker callback failure doesn't require correlating with
|
|
27
|
+
* server-side logs
|
|
28
|
+
* - logs `rpc.request` / `rpc.response` / `rpc.error` at info/error
|
|
29
|
+
* for the legacy log-only consumer surface that predates tracing
|
|
20
30
|
*/
|
|
21
31
|
import { createConnectTransport } from "@connectrpc/connect-node";
|
|
22
32
|
import {
|
|
@@ -26,41 +36,167 @@ import {
|
|
|
26
36
|
ConnectError,
|
|
27
37
|
Code,
|
|
28
38
|
} from "@connectrpc/connect";
|
|
39
|
+
import {
|
|
40
|
+
context as otelContext,
|
|
41
|
+
SpanStatusCode,
|
|
42
|
+
trace,
|
|
43
|
+
} from "@opentelemetry/api";
|
|
29
44
|
import { InternalUploadsService } from "./protocol/uplim/api/v1/uploads_pb.js";
|
|
30
45
|
import { IntegrationQueueService } from "./protocol/uplim/workflow/v1/integration_queue_pb.js";
|
|
31
46
|
import type { Logger } from "./logger.js";
|
|
47
|
+
import {
|
|
48
|
+
SPAN_ATTR,
|
|
49
|
+
getTracingHandle,
|
|
50
|
+
recordSpanError,
|
|
51
|
+
sanitizeRpcBody,
|
|
52
|
+
spanContextToTraceparent,
|
|
53
|
+
} from "./tracing.js";
|
|
32
54
|
|
|
33
55
|
export interface RpcClients {
|
|
34
56
|
internalUploads: Client<typeof InternalUploadsService>;
|
|
35
57
|
integrationQueue: Client<typeof IntegrationQueueService>;
|
|
36
58
|
}
|
|
37
59
|
|
|
38
|
-
|
|
60
|
+
type RpcTarget = "api" | "pipeline";
|
|
61
|
+
|
|
62
|
+
function tracingLoggingInterceptor(
|
|
63
|
+
logger: Logger,
|
|
64
|
+
target: RpcTarget,
|
|
65
|
+
): Interceptor {
|
|
39
66
|
return (next) => async (req) => {
|
|
40
|
-
const
|
|
67
|
+
const handle = getTracingHandle();
|
|
41
68
|
const method = `${req.service.typeName}/${req.method.name}`;
|
|
69
|
+
|
|
70
|
+
// Inject W3C traceparent on the pipeline target *only*. upl-im-api
|
|
71
|
+
// runs its own OTel pipeline against a different backend; passing
|
|
72
|
+
// our trace_id over would graft our worker activity into its trees,
|
|
73
|
+
// muddying its admin dashboards. The pool token in the request
|
|
74
|
+
// body / queue token in headers remain the trust boundary either
|
|
75
|
+
// way.
|
|
76
|
+
if (target === "pipeline") {
|
|
77
|
+
const activeSpan = trace.getActiveSpan();
|
|
78
|
+
if (activeSpan) {
|
|
79
|
+
req.header.set(
|
|
80
|
+
"traceparent",
|
|
81
|
+
spanContextToTraceparent(activeSpan.spanContext()),
|
|
82
|
+
);
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
const startedAt = Date.now();
|
|
42
87
|
logger.debug("rpc.request", { method, target });
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
88
|
+
|
|
89
|
+
// When tracing is uninitialised (e.g. tests, dry-runs), fall through
|
|
90
|
+
// to a logging-only path so we don't crash on a null tracer.
|
|
91
|
+
if (!handle) {
|
|
92
|
+
try {
|
|
93
|
+
const res = await next(req);
|
|
94
|
+
logger.debug("rpc.response", {
|
|
95
|
+
method,
|
|
96
|
+
target,
|
|
97
|
+
durationMs: Date.now() - startedAt,
|
|
98
|
+
});
|
|
99
|
+
return res;
|
|
100
|
+
} catch (err) {
|
|
101
|
+
const code =
|
|
102
|
+
err instanceof ConnectError ? Code[err.code] : undefined;
|
|
103
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
104
|
+
logger.error("rpc.error", {
|
|
105
|
+
method,
|
|
106
|
+
target,
|
|
107
|
+
code,
|
|
108
|
+
message,
|
|
109
|
+
durationMs: Date.now() - startedAt,
|
|
110
|
+
});
|
|
111
|
+
throw err;
|
|
112
|
+
}
|
|
63
113
|
}
|
|
114
|
+
|
|
115
|
+
return handle.tracer.startActiveSpan(
|
|
116
|
+
`iq.callback.${req.method.name}`,
|
|
117
|
+
async (span) => {
|
|
118
|
+
span.setAttribute(SPAN_ATTR.RPC_METHOD, method);
|
|
119
|
+
span.setAttribute(SPAN_ATTR.RPC_SERVICE, req.service.typeName);
|
|
120
|
+
span.setAttribute(SPAN_ATTR.RPC_TARGET, target);
|
|
121
|
+
|
|
122
|
+
// Inherit job-scoped attributes from the active parent span
|
|
123
|
+
// (set by Pool.handleDispatch via stampJobAttributes). Without
|
|
124
|
+
// this, the span lacks `pipeline.run_id` and gets dropped by
|
|
125
|
+
// the pipeline-api receiver. Reading them off the active span
|
|
126
|
+
// is the cheapest cross-span propagation we can do without a
|
|
127
|
+
// custom OTel context manager.
|
|
128
|
+
const parent = trace.getActiveSpan();
|
|
129
|
+
const parentAttrs = (parent as unknown as {
|
|
130
|
+
attributes?: Record<string, unknown>;
|
|
131
|
+
})?.attributes;
|
|
132
|
+
if (parentAttrs) {
|
|
133
|
+
for (const key of [
|
|
134
|
+
SPAN_ATTR.RUN_ID,
|
|
135
|
+
SPAN_ATTR.JOB_ID,
|
|
136
|
+
SPAN_ATTR.UPLOAD_ID,
|
|
137
|
+
SPAN_ATTR.URN,
|
|
138
|
+
SPAN_ATTR.WORKER_ID,
|
|
139
|
+
]) {
|
|
140
|
+
const v = parentAttrs[key];
|
|
141
|
+
if (typeof v === "string" && v.length > 0) {
|
|
142
|
+
span.setAttribute(key, v);
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
// Body capture: request first, response on success, error
|
|
148
|
+
// payload on failure. Sanitiser truncates to 4 KB and redacts
|
|
149
|
+
// token-shaped keys (see tracing.ts).
|
|
150
|
+
try {
|
|
151
|
+
span.addEvent("rpc.request", {
|
|
152
|
+
body: sanitizeRpcBody(req.message),
|
|
153
|
+
});
|
|
154
|
+
} catch {
|
|
155
|
+
/* never let body capture crash the RPC */
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
try {
|
|
159
|
+
const res = await next(req);
|
|
160
|
+
const durationMs = Date.now() - startedAt;
|
|
161
|
+
span.setAttribute(SPAN_ATTR.RPC_DURATION_MS, durationMs);
|
|
162
|
+
span.setAttribute(SPAN_ATTR.RPC_CODE, "ok");
|
|
163
|
+
try {
|
|
164
|
+
span.addEvent("rpc.response", {
|
|
165
|
+
body: sanitizeRpcBody(res.message),
|
|
166
|
+
});
|
|
167
|
+
} catch {
|
|
168
|
+
/* body capture is best-effort */
|
|
169
|
+
}
|
|
170
|
+
span.setStatus({ code: SpanStatusCode.OK });
|
|
171
|
+
logger.debug("rpc.response", {
|
|
172
|
+
method,
|
|
173
|
+
target,
|
|
174
|
+
durationMs,
|
|
175
|
+
});
|
|
176
|
+
return res;
|
|
177
|
+
} catch (err) {
|
|
178
|
+
const durationMs = Date.now() - startedAt;
|
|
179
|
+
const code =
|
|
180
|
+
err instanceof ConnectError ? Code[err.code] : "internal";
|
|
181
|
+
const message =
|
|
182
|
+
err instanceof Error ? err.message : String(err);
|
|
183
|
+
span.setAttribute(SPAN_ATTR.RPC_DURATION_MS, durationMs);
|
|
184
|
+
span.setAttribute(SPAN_ATTR.RPC_CODE, String(code));
|
|
185
|
+
span.addEvent("rpc.error", { code: String(code), message });
|
|
186
|
+
recordSpanError(span, err);
|
|
187
|
+
logger.error("rpc.error", {
|
|
188
|
+
method,
|
|
189
|
+
target,
|
|
190
|
+
code,
|
|
191
|
+
message,
|
|
192
|
+
durationMs,
|
|
193
|
+
});
|
|
194
|
+
throw err;
|
|
195
|
+
} finally {
|
|
196
|
+
span.end();
|
|
197
|
+
}
|
|
198
|
+
},
|
|
199
|
+
);
|
|
64
200
|
};
|
|
65
201
|
}
|
|
66
202
|
|
|
@@ -76,15 +212,19 @@ export function createRpcClients(config: RpcClientsConfig): RpcClients {
|
|
|
76
212
|
const apiTransport = createConnectTransport({
|
|
77
213
|
baseUrl: config.apiEndpoint,
|
|
78
214
|
httpVersion: "1.1",
|
|
79
|
-
interceptors: [
|
|
215
|
+
interceptors: [tracingLoggingInterceptor(rpcLogger, "api")],
|
|
80
216
|
});
|
|
81
217
|
const pipelineTransport = createConnectTransport({
|
|
82
218
|
baseUrl: config.pipelineEndpoint,
|
|
83
219
|
httpVersion: "1.1",
|
|
84
|
-
interceptors: [
|
|
220
|
+
interceptors: [tracingLoggingInterceptor(rpcLogger, "pipeline")],
|
|
85
221
|
});
|
|
86
222
|
return {
|
|
87
223
|
internalUploads: createClient(InternalUploadsService, apiTransport),
|
|
88
224
|
integrationQueue: createClient(IntegrationQueueService, pipelineTransport),
|
|
89
225
|
};
|
|
90
226
|
}
|
|
227
|
+
|
|
228
|
+
// keep otelContext import alive for future extensions that may want to
|
|
229
|
+
// detach span scopes around streaming responses.
|
|
230
|
+
void otelContext;
|