@xeonr/upload-pool-sdk 1.3.0 → 1.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -7,6 +7,7 @@
7
7
  */
8
8
  import { writeFile } from "node:fs/promises";
9
9
  import { create } from "@bufbuild/protobuf";
10
+ import { SpanStatusCode } from "@opentelemetry/api";
10
11
  import {
11
12
  ConfirmMetaUploadRequestSchema,
12
13
  InternalUpdateUploadRequestSchema,
@@ -18,6 +19,12 @@ import {
18
19
  import { UploadMetadataSchema } from "./protocol/uplim/api/v1/metadata_pb.js";
19
20
  import type { RpcClients } from "./rpc-clients.js";
20
21
  import type { Logger } from "./logger.js";
22
+ import {
23
+ SPAN_ATTR,
24
+ getTracingHandle,
25
+ recordSpanError,
26
+ stampJobAttributes,
27
+ } from "./tracing.js";
21
28
  import type {
22
29
  FolderContext,
23
30
  JobContext,
@@ -50,13 +57,80 @@ export interface JobEnvelope {
50
57
  config?: string; // base64-encoded
51
58
  };
52
59
  folderContext?: FolderContext;
60
+ /**
61
+ * W3C traceparent string forwarded by pipeline-worker. The SDK parses
62
+ * this and uses it as the parent SpanContext for the iq.job root
63
+ * span so worker callbacks nest under the dispatching parseUpload
64
+ * trace. Optional — may be absent for legacy direct-dispatch paths.
65
+ */
66
+ traceContext?: string;
67
+ /**
68
+ * The pipeline workflow run id (workflow_runs.run_id) that owns this
69
+ * job. The SDK stamps it on every span as the `pipeline.run_id`
70
+ * attribute so pipeline-api's trace-store receiver associates the
71
+ * span with the right run. Optional in payload; spans without it
72
+ * are dropped by the receiver.
73
+ */
74
+ wfRunId?: string;
75
+ }
76
+
77
+ /**
78
+ * Wrap an async fn in an `iq.ctx.*` span stamped with the job's identifiers.
79
+ * Used for every ctx.* method so the admin UI sees a row per handler
80
+ * callback with its own latency and (where applicable) request/response
81
+ * body on span events.
82
+ *
83
+ * No-op when tracing is uninitialised (e.g. in tests): the inner fn runs
84
+ * directly and we lose the span, no exception.
85
+ */
86
+ function withCtxSpan<T>(
87
+ name: string,
88
+ _envelope: JobEnvelope,
89
+ _workerId: string | undefined,
90
+ fn: () => Promise<T>,
91
+ attrs?: Record<string, string | number | boolean>,
92
+ ): Promise<T> {
93
+ const handle = getTracingHandle();
94
+ if (!handle) {
95
+ return fn();
96
+ }
97
+ return handle.tracer.startActiveSpan(name, async (span) => {
98
+ // Job-scoped attrs (runId, jobId, ...) pulled from the active
99
+ // OTel context — set once by Pool.handleDispatch via
100
+ // withJobContext. The receiver requires pipeline.run_id or the
101
+ // span is dropped, so this stamp is load-bearing.
102
+ stampJobAttributes(span);
103
+ if (attrs) {
104
+ for (const [k, v] of Object.entries(attrs)) {
105
+ span.setAttribute(k, v);
106
+ }
107
+ }
108
+ const startedAt = Date.now();
109
+ try {
110
+ const result = await fn();
111
+ span.setAttribute("ctx.duration_ms", Date.now() - startedAt);
112
+ span.setStatus({ code: SpanStatusCode.OK });
113
+ return result;
114
+ } catch (err) {
115
+ recordSpanError(span, err);
116
+ throw err;
117
+ } finally {
118
+ span.end();
119
+ }
120
+ });
121
+ }
122
+
123
+ export interface CreateJobContextOpts {
124
+ workerId?: string;
53
125
  }
54
126
 
55
127
  export function createJobContext(
56
128
  envelope: JobEnvelope,
57
129
  rpc: RpcClients,
58
130
  logger: Logger,
131
+ opts: CreateJobContextOpts = {},
59
132
  ): JobContext {
133
+ const workerId = opts.workerId;
60
134
  const ctx: JobContext = {
61
135
  jobId: envelope.jobId,
62
136
  uploadId: envelope.uploadId,
@@ -73,196 +147,300 @@ export function createJobContext(
73
147
  sourceUrl: envelope.sourceUrl,
74
148
 
75
149
  async download() {
76
- logger.debug("source.download.start");
77
- const resp = await fetch(envelope.sourceUrl);
78
- if (!resp.ok || !resp.body) {
79
- logger.error("source.download.failed", {
80
- status: resp.status,
81
- statusText: resp.statusText,
82
- });
83
- throw new Error(`source download failed: ${resp.status} ${resp.statusText}`);
84
- }
85
- return resp.body;
150
+ return withCtxSpan("iq.ctx.download", envelope, workerId, async () => {
151
+ logger.debug("source.download.start");
152
+ const resp = await fetch(envelope.sourceUrl);
153
+ if (!resp.ok || !resp.body) {
154
+ logger.error("source.download.failed", {
155
+ status: resp.status,
156
+ statusText: resp.statusText,
157
+ });
158
+ throw new Error(
159
+ `source download failed: ${resp.status} ${resp.statusText}`,
160
+ );
161
+ }
162
+ return resp.body;
163
+ }, { "ctx.mode": "stream" });
86
164
  },
87
165
 
88
166
  async downloadBuffer() {
89
- logger.debug("source.download.start", { mode: "buffer" });
90
- const resp = await fetch(envelope.sourceUrl);
91
- if (!resp.ok) {
92
- logger.error("source.download.failed", {
93
- status: resp.status,
94
- statusText: resp.statusText,
95
- });
96
- throw new Error(`source download failed: ${resp.status} ${resp.statusText}`);
97
- }
98
- const buf = await resp.arrayBuffer();
99
- logger.debug("source.download.complete", { sizeBytes: buf.byteLength });
100
- return new Uint8Array(buf);
167
+ return withCtxSpan("iq.ctx.download", envelope, workerId, async () => {
168
+ logger.debug("source.download.start", { mode: "buffer" });
169
+ const resp = await fetch(envelope.sourceUrl);
170
+ if (!resp.ok) {
171
+ logger.error("source.download.failed", {
172
+ status: resp.status,
173
+ statusText: resp.statusText,
174
+ });
175
+ throw new Error(
176
+ `source download failed: ${resp.status} ${resp.statusText}`,
177
+ );
178
+ }
179
+ const buf = await resp.arrayBuffer();
180
+ logger.debug("source.download.complete", { sizeBytes: buf.byteLength });
181
+ return new Uint8Array(buf);
182
+ }, { "ctx.mode": "buffer" });
101
183
  },
102
184
 
103
185
  async downloadToFile(path: string) {
104
- const data = await ctx.downloadBuffer();
105
- await writeFile(path, data);
106
- logger.debug("source.download.to_file", { path, sizeBytes: data.byteLength });
186
+ return withCtxSpan(
187
+ "iq.ctx.download_to_file",
188
+ envelope,
189
+ workerId,
190
+ async () => {
191
+ const data = await ctx.downloadBuffer();
192
+ await writeFile(path, data);
193
+ logger.debug("source.download.to_file", {
194
+ path,
195
+ sizeBytes: data.byteLength,
196
+ });
197
+ },
198
+ );
107
199
  },
108
200
 
109
201
  async uploadMeta(opts: UploadMetaOpts) {
110
- logger.debug("meta.upload.start", {
111
- type: opts.type,
112
- instance: opts.instance,
113
- filename: opts.filename,
114
- });
115
- const protoType = META_TYPE_MAP[opts.type];
116
- const metaType = create(MetaTypeSchema, {
117
- type: protoType,
118
- n: opts.instance,
119
- filename: opts.filename,
120
- });
202
+ return withCtxSpan(
203
+ "iq.ctx.upload_meta",
204
+ envelope,
205
+ workerId,
206
+ async () => {
207
+ logger.debug("meta.upload.start", {
208
+ type: opts.type,
209
+ instance: opts.instance,
210
+ filename: opts.filename,
211
+ });
212
+ const protoType = META_TYPE_MAP[opts.type];
213
+ const metaType = create(MetaTypeSchema, {
214
+ type: protoType,
215
+ n: opts.instance,
216
+ filename: opts.filename,
217
+ });
121
218
 
122
- // 1. RequestMetaUpload → presigned URL.
123
- const requested = await rpc.internalUploads.requestMetaUpload(
124
- create(RequestMetaUploadRequestSchema, {
125
- updateToken: envelope.updateToken,
126
- metaType: [metaType],
127
- }),
128
- );
219
+ // 1. RequestMetaUpload → presigned URL.
220
+ const requested = await rpc.internalUploads.requestMetaUpload(
221
+ create(RequestMetaUploadRequestSchema, {
222
+ updateToken: envelope.updateToken,
223
+ metaType: [metaType],
224
+ }),
225
+ );
129
226
 
130
- const meta = requested.metaUploads[0];
131
- if (!meta) {
132
- throw new Error("RequestMetaUpload returned no meta_uploads");
133
- }
227
+ const meta = requested.metaUploads[0];
228
+ if (!meta) {
229
+ throw new Error("RequestMetaUpload returned no meta_uploads");
230
+ }
134
231
 
135
- // 2. S3 PUT.
136
- const contentType = opts.contentType ?? inferContentType(opts.type);
137
- const body = opts.data instanceof Uint8Array
138
- ? opts.data
139
- : opts.data;
140
- const putResp = await fetch(meta.uploadUrl, {
141
- method: "PUT",
142
- body: body as BodyInit,
143
- headers: { "Content-Type": contentType },
144
- duplex: "half",
145
- } as RequestInit & { duplex?: "half" });
146
- if (!putResp.ok) {
147
- throw new Error(`meta upload PUT failed: ${putResp.status} ${putResp.statusText}`);
148
- }
232
+ // 2. S3 PUT — own span so the upload's bytes/duration are
233
+ // visible separately from the API round-trips.
234
+ const contentType =
235
+ opts.contentType ?? inferContentType(opts.type);
236
+ const body =
237
+ opts.data instanceof Uint8Array ? opts.data : opts.data;
238
+ await withCtxSpan(
239
+ "iq.s3.put",
240
+ envelope,
241
+ workerId,
242
+ async () => {
243
+ const handle = getTracingHandle();
244
+ const span = handle?.tracer
245
+ ? undefined
246
+ : undefined;
247
+ void span;
248
+ const putResp = await fetch(meta.uploadUrl, {
249
+ method: "PUT",
250
+ body: body as BodyInit,
251
+ headers: { "Content-Type": contentType },
252
+ duplex: "half",
253
+ } as RequestInit & { duplex?: "half" });
254
+ if (!putResp.ok) {
255
+ throw new Error(
256
+ `meta upload PUT failed: ${putResp.status} ${putResp.statusText}`,
257
+ );
258
+ }
259
+ },
260
+ {
261
+ [SPAN_ATTR.S3_URL_ORIGIN]: safeOrigin(meta.uploadUrl),
262
+ [SPAN_ATTR.S3_BYTES]:
263
+ body instanceof Uint8Array ? body.byteLength : 0,
264
+ "s3.content_type": contentType,
265
+ "s3.meta_type": opts.type,
266
+ },
267
+ );
149
268
 
150
- // 3. ConfirmMetaUpload.
151
- await rpc.internalUploads.confirmMetaUpload(
152
- create(ConfirmMetaUploadRequestSchema, {
153
- updateToken: envelope.updateToken,
154
- metadataId: [meta.metadataId],
155
- }),
269
+ // 3. ConfirmMetaUpload.
270
+ await rpc.internalUploads.confirmMetaUpload(
271
+ create(ConfirmMetaUploadRequestSchema, {
272
+ updateToken: envelope.updateToken,
273
+ metadataId: [meta.metadataId],
274
+ }),
275
+ );
276
+ logger.info("meta.upload.complete", {
277
+ type: opts.type,
278
+ metadataId: meta.metadataId,
279
+ });
280
+ },
281
+ { "ctx.meta_type": opts.type },
156
282
  );
157
- logger.info("meta.upload.complete", {
158
- type: opts.type,
159
- metadataId: meta.metadataId,
160
- });
161
283
  },
162
284
 
163
285
  async setMetadata(metadata: UploadMetadataPartial | UploadMetadataProto) {
164
- // Distinguish a pre-built proto from a partial: protos carry the
165
- // @bufbuild/protobuf $typeName brand.
166
- const isProto = (metadata as UploadMetadataProto).$typeName !== undefined;
167
- let protoMetadata: ReturnType<typeof create<typeof UploadMetadataSchema>>;
168
- let thumbnailVersion: number | undefined;
169
- if (isProto) {
170
- protoMetadata = metadata as ReturnType<typeof create<typeof UploadMetadataSchema>>;
171
- } else {
172
- const partial = metadata as UploadMetadataPartial;
173
- protoMetadata = create(UploadMetadataSchema, {});
174
- // Server-side proto has typed sub-messages; the SDK's
175
- // UploadMetadataPartial is intentionally loose so workers can
176
- // populate just what they computed. Cast into the proto via
177
- // the JSON path — fields that don't map are ignored.
178
- Object.assign(protoMetadata as object, partial);
179
- thumbnailVersion = partial.thumbnailGenerationVersion;
180
- }
286
+ return withCtxSpan(
287
+ "iq.ctx.set_metadata",
288
+ envelope,
289
+ workerId,
290
+ async () => {
291
+ // Distinguish a pre-built proto from a partial: protos carry
292
+ // the @bufbuild/protobuf $typeName brand.
293
+ const isProto =
294
+ (metadata as UploadMetadataProto).$typeName !== undefined;
295
+ let protoMetadata: ReturnType<
296
+ typeof create<typeof UploadMetadataSchema>
297
+ >;
298
+ let thumbnailVersion: number | undefined;
299
+ if (isProto) {
300
+ protoMetadata = metadata as ReturnType<
301
+ typeof create<typeof UploadMetadataSchema>
302
+ >;
303
+ } else {
304
+ const partial = metadata as UploadMetadataPartial;
305
+ protoMetadata = create(UploadMetadataSchema, {});
306
+ // Server-side proto has typed sub-messages; the SDK's
307
+ // UploadMetadataPartial is intentionally loose so workers
308
+ // can populate just what they computed. Cast into the
309
+ // proto via the JSON path — fields that don't map are
310
+ // ignored.
311
+ Object.assign(protoMetadata as object, partial);
312
+ thumbnailVersion = partial.thumbnailGenerationVersion;
313
+ }
181
314
 
182
- await rpc.internalUploads.updateUpload(
183
- create(InternalUpdateUploadRequestSchema, {
184
- updateToken: envelope.updateToken,
185
- metadata: protoMetadata,
186
- thumbnailGenerationVersion: thumbnailVersion,
187
- }),
315
+ await rpc.internalUploads.updateUpload(
316
+ create(InternalUpdateUploadRequestSchema, {
317
+ updateToken: envelope.updateToken,
318
+ metadata: protoMetadata,
319
+ thumbnailGenerationVersion: thumbnailVersion,
320
+ }),
321
+ );
322
+ logger.debug("metadata.set", { thumbnailVersion });
323
+ },
188
324
  );
189
- logger.debug("metadata.set", { thumbnailVersion });
190
325
  },
191
326
 
192
327
  async markHasThumbnail(version: number) {
193
- await rpc.internalUploads.updateUpload(
194
- create(InternalUpdateUploadRequestSchema, {
195
- updateToken: envelope.updateToken,
196
- hasThumbnail: true,
197
- thumbnailGenerationVersion: version,
198
- }),
328
+ return withCtxSpan(
329
+ "iq.ctx.mark_has_thumbnail",
330
+ envelope,
331
+ workerId,
332
+ async () => {
333
+ await rpc.internalUploads.updateUpload(
334
+ create(InternalUpdateUploadRequestSchema, {
335
+ updateToken: envelope.updateToken,
336
+ hasThumbnail: true,
337
+ thumbnailGenerationVersion: version,
338
+ }),
339
+ );
340
+ logger.info("thumbnail.marked", { version });
341
+ },
342
+ { "ctx.thumbnail_version": version },
199
343
  );
200
- logger.info("thumbnail.marked", { version });
201
344
  },
202
345
 
203
346
  async setDescription(text: string, tags?: string[]) {
204
- await rpc.internalUploads.updateUpload(
205
- create(InternalUpdateUploadRequestSchema, {
206
- updateToken: envelope.updateToken,
207
- aiMetadata: create(InternalUpdateUploadRequest_AiMetadataSchema, {
208
- description: text,
209
- generatedTags: tags ?? [],
210
- }),
211
- }),
347
+ return withCtxSpan(
348
+ "iq.ctx.set_description",
349
+ envelope,
350
+ workerId,
351
+ async () => {
352
+ await rpc.internalUploads.updateUpload(
353
+ create(InternalUpdateUploadRequestSchema, {
354
+ updateToken: envelope.updateToken,
355
+ aiMetadata: create(
356
+ InternalUpdateUploadRequest_AiMetadataSchema,
357
+ {
358
+ description: text,
359
+ generatedTags: tags ?? [],
360
+ },
361
+ ),
362
+ }),
363
+ );
364
+ logger.info("description.set", {
365
+ length: text.length,
366
+ tags: tags?.length ?? 0,
367
+ });
368
+ },
369
+ {
370
+ "ctx.description_length": text.length,
371
+ "ctx.tag_count": tags?.length ?? 0,
372
+ },
212
373
  );
213
- logger.info("description.set", {
214
- length: text.length,
215
- tags: tags?.length ?? 0,
216
- });
217
374
  },
218
375
 
219
376
  async withPresignedUploads<T>(
220
377
  opts: PresignedUploadRequest[],
221
378
  callback: (handles: import("./types.js").MetaUploadHandle[]) => Promise<T>,
222
379
  ): Promise<T> {
223
- logger.debug("presigned.upload.request", {
224
- count: opts.length,
225
- types: opts.map((o) => o.type),
226
- });
227
- const metaTypes = opts.map((opt) => create(MetaTypeSchema, {
228
- type: META_TYPE_MAP[opt.type],
229
- n: opt.instance,
230
- filename: opt.filename,
231
- }));
232
- const requested = await rpc.internalUploads.requestMetaUpload(
233
- create(RequestMetaUploadRequestSchema, {
234
- updateToken: envelope.updateToken,
235
- metaType: metaTypes,
236
- }),
380
+ return withCtxSpan(
381
+ "iq.ctx.with_presigned_uploads",
382
+ envelope,
383
+ workerId,
384
+ async () => {
385
+ logger.debug("presigned.upload.request", {
386
+ count: opts.length,
387
+ types: opts.map((o) => o.type),
388
+ });
389
+ const metaTypes = opts.map((opt) =>
390
+ create(MetaTypeSchema, {
391
+ type: META_TYPE_MAP[opt.type],
392
+ n: opt.instance,
393
+ filename: opt.filename,
394
+ }),
395
+ );
396
+ const requested = await rpc.internalUploads.requestMetaUpload(
397
+ create(RequestMetaUploadRequestSchema, {
398
+ updateToken: envelope.updateToken,
399
+ metaType: metaTypes,
400
+ }),
401
+ );
402
+ if (requested.metaUploads.length !== opts.length) {
403
+ throw new Error(
404
+ `RequestMetaUpload returned ${requested.metaUploads.length} URLs, expected ${opts.length}`,
405
+ );
406
+ }
407
+ const handles = requested.metaUploads.map((m) => ({
408
+ uploadUrl: m.uploadUrl,
409
+ downloadUrl: m.downloadUrl,
410
+ metadataId: m.metadataId,
411
+ }));
412
+ const result = await callback(handles);
413
+ // Confirm only after the callback resolves cleanly — on
414
+ // throw, the rows stay unfinalised and will be cleaned up
415
+ // by the next confirm sweep or expire naturally.
416
+ await rpc.internalUploads.confirmMetaUpload(
417
+ create(ConfirmMetaUploadRequestSchema, {
418
+ updateToken: envelope.updateToken,
419
+ metadataId: requested.metaUploads.map((m) => m.metadataId),
420
+ }),
421
+ );
422
+ logger.info("presigned.upload.complete", {
423
+ count: handles.length,
424
+ });
425
+ return result;
426
+ },
427
+ { "ctx.upload_count": opts.length },
237
428
  );
238
- if (requested.metaUploads.length !== opts.length) {
239
- throw new Error(
240
- `RequestMetaUpload returned ${requested.metaUploads.length} URLs, expected ${opts.length}`,
241
- );
242
- }
243
- const handles = requested.metaUploads.map((m) => ({
244
- uploadUrl: m.uploadUrl,
245
- downloadUrl: m.downloadUrl,
246
- metadataId: m.metadataId,
247
- }));
248
- const result = await callback(handles);
249
- // Confirm only after the callback resolves cleanly — on throw,
250
- // the rows stay unfinalised and will be cleaned up by the next
251
- // confirm sweep or expire naturally.
252
- await rpc.internalUploads.confirmMetaUpload(
253
- create(ConfirmMetaUploadRequestSchema, {
254
- updateToken: envelope.updateToken,
255
- metadataId: requested.metaUploads.map((m) => m.metadataId),
256
- }),
257
- );
258
- logger.info("presigned.upload.complete", { count: handles.length });
259
- return result;
260
429
  },
261
430
  };
262
431
 
263
432
  return ctx;
264
433
  }
265
434
 
435
+ function safeOrigin(url: string): string {
436
+ try {
437
+ const u = new URL(url);
438
+ return `${u.protocol}//${u.host}${u.pathname}`;
439
+ } catch {
440
+ return "<malformed>";
441
+ }
442
+ }
443
+
266
444
  function inferContentType(metaType: UploadMetaOpts["type"]): string {
267
445
  switch (metaType) {
268
446
  case "THUMBNAIL_LIGHT":