@xeonr/upload-pool-sdk 1.3.0 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -7,6 +7,7 @@
7
7
  */
8
8
  import { writeFile } from "node:fs/promises";
9
9
  import { create } from "@bufbuild/protobuf";
10
+ import { SpanStatusCode } from "@opentelemetry/api";
10
11
  import {
11
12
  ConfirmMetaUploadRequestSchema,
12
13
  InternalUpdateUploadRequestSchema,
@@ -18,6 +19,12 @@ import {
18
19
  import { UploadMetadataSchema } from "./protocol/uplim/api/v1/metadata_pb.js";
19
20
  import type { RpcClients } from "./rpc-clients.js";
20
21
  import type { Logger } from "./logger.js";
22
+ import {
23
+ SPAN_ATTR,
24
+ getTracingHandle,
25
+ recordSpanError,
26
+ stampJobAttributes,
27
+ } from "./tracing.js";
21
28
  import type {
22
29
  FolderContext,
23
30
  JobContext,
@@ -50,13 +57,82 @@ export interface JobEnvelope {
50
57
  config?: string; // base64-encoded
51
58
  };
52
59
  folderContext?: FolderContext;
60
+ /**
61
+ * W3C traceparent string forwarded by pipeline-worker. The SDK parses
62
+ * this and uses it as the parent SpanContext for the iq.job root
63
+ * span so worker callbacks nest under the dispatching parseUpload
64
+ * trace. Optional — may be absent for legacy direct-dispatch paths.
65
+ */
66
+ traceContext?: string;
67
+ /**
68
+ * The pipeline workflow run id (workflow_runs.run_id) that owns this
69
+ * job. The SDK stamps it on every span as the `pipeline.run_id`
70
+ * attribute so pipeline-api's trace-store receiver associates the
71
+ * span with the right run. Optional in payload; spans without it
72
+ * are dropped by the receiver.
73
+ */
74
+ wfRunId?: string;
75
+ }
76
+
77
+ /**
78
+ * Wrap an async fn in an `iq.ctx.*` span stamped with the job's identifiers.
79
+ * Used for every ctx.* method so the admin UI sees a row per handler
80
+ * callback with its own latency and (where applicable) request/response
81
+ * body on span events.
82
+ *
83
+ * No-op when tracing is uninitialised (e.g. in tests): the inner fn runs
84
+ * directly and we lose the span, no exception.
85
+ */
86
+ function withCtxSpan<T>(
87
+ name: string,
88
+ envelope: JobEnvelope,
89
+ workerId: string | undefined,
90
+ fn: () => Promise<T>,
91
+ attrs?: Record<string, string | number | boolean>,
92
+ ): Promise<T> {
93
+ const handle = getTracingHandle();
94
+ if (!handle) {
95
+ return fn();
96
+ }
97
+ return handle.tracer.startActiveSpan(name, async (span) => {
98
+ stampJobAttributes(span, {
99
+ runId: envelope.wfRunId ?? "",
100
+ jobId: envelope.jobId,
101
+ uploadId: envelope.uploadId,
102
+ urn: envelope.contentTypeContext.urn,
103
+ workerId,
104
+ });
105
+ if (attrs) {
106
+ for (const [k, v] of Object.entries(attrs)) {
107
+ span.setAttribute(k, v);
108
+ }
109
+ }
110
+ const startedAt = Date.now();
111
+ try {
112
+ const result = await fn();
113
+ span.setAttribute("ctx.duration_ms", Date.now() - startedAt);
114
+ span.setStatus({ code: SpanStatusCode.OK });
115
+ return result;
116
+ } catch (err) {
117
+ recordSpanError(span, err);
118
+ throw err;
119
+ } finally {
120
+ span.end();
121
+ }
122
+ });
123
+ }
124
+
125
+ export interface CreateJobContextOpts {
126
+ workerId?: string;
53
127
  }
54
128
 
55
129
  export function createJobContext(
56
130
  envelope: JobEnvelope,
57
131
  rpc: RpcClients,
58
132
  logger: Logger,
133
+ opts: CreateJobContextOpts = {},
59
134
  ): JobContext {
135
+ const workerId = opts.workerId;
60
136
  const ctx: JobContext = {
61
137
  jobId: envelope.jobId,
62
138
  uploadId: envelope.uploadId,
@@ -73,196 +149,300 @@ export function createJobContext(
73
149
  sourceUrl: envelope.sourceUrl,
74
150
 
75
151
  async download() {
76
- logger.debug("source.download.start");
77
- const resp = await fetch(envelope.sourceUrl);
78
- if (!resp.ok || !resp.body) {
79
- logger.error("source.download.failed", {
80
- status: resp.status,
81
- statusText: resp.statusText,
82
- });
83
- throw new Error(`source download failed: ${resp.status} ${resp.statusText}`);
84
- }
85
- return resp.body;
152
+ return withCtxSpan("iq.ctx.download", envelope, workerId, async () => {
153
+ logger.debug("source.download.start");
154
+ const resp = await fetch(envelope.sourceUrl);
155
+ if (!resp.ok || !resp.body) {
156
+ logger.error("source.download.failed", {
157
+ status: resp.status,
158
+ statusText: resp.statusText,
159
+ });
160
+ throw new Error(
161
+ `source download failed: ${resp.status} ${resp.statusText}`,
162
+ );
163
+ }
164
+ return resp.body;
165
+ }, { "ctx.mode": "stream" });
86
166
  },
87
167
 
88
168
  async downloadBuffer() {
89
- logger.debug("source.download.start", { mode: "buffer" });
90
- const resp = await fetch(envelope.sourceUrl);
91
- if (!resp.ok) {
92
- logger.error("source.download.failed", {
93
- status: resp.status,
94
- statusText: resp.statusText,
95
- });
96
- throw new Error(`source download failed: ${resp.status} ${resp.statusText}`);
97
- }
98
- const buf = await resp.arrayBuffer();
99
- logger.debug("source.download.complete", { sizeBytes: buf.byteLength });
100
- return new Uint8Array(buf);
169
+ return withCtxSpan("iq.ctx.download", envelope, workerId, async () => {
170
+ logger.debug("source.download.start", { mode: "buffer" });
171
+ const resp = await fetch(envelope.sourceUrl);
172
+ if (!resp.ok) {
173
+ logger.error("source.download.failed", {
174
+ status: resp.status,
175
+ statusText: resp.statusText,
176
+ });
177
+ throw new Error(
178
+ `source download failed: ${resp.status} ${resp.statusText}`,
179
+ );
180
+ }
181
+ const buf = await resp.arrayBuffer();
182
+ logger.debug("source.download.complete", { sizeBytes: buf.byteLength });
183
+ return new Uint8Array(buf);
184
+ }, { "ctx.mode": "buffer" });
101
185
  },
102
186
 
103
187
  async downloadToFile(path: string) {
104
- const data = await ctx.downloadBuffer();
105
- await writeFile(path, data);
106
- logger.debug("source.download.to_file", { path, sizeBytes: data.byteLength });
188
+ return withCtxSpan(
189
+ "iq.ctx.download_to_file",
190
+ envelope,
191
+ workerId,
192
+ async () => {
193
+ const data = await ctx.downloadBuffer();
194
+ await writeFile(path, data);
195
+ logger.debug("source.download.to_file", {
196
+ path,
197
+ sizeBytes: data.byteLength,
198
+ });
199
+ },
200
+ );
107
201
  },
108
202
 
109
203
  async uploadMeta(opts: UploadMetaOpts) {
110
- logger.debug("meta.upload.start", {
111
- type: opts.type,
112
- instance: opts.instance,
113
- filename: opts.filename,
114
- });
115
- const protoType = META_TYPE_MAP[opts.type];
116
- const metaType = create(MetaTypeSchema, {
117
- type: protoType,
118
- n: opts.instance,
119
- filename: opts.filename,
120
- });
204
+ return withCtxSpan(
205
+ "iq.ctx.upload_meta",
206
+ envelope,
207
+ workerId,
208
+ async () => {
209
+ logger.debug("meta.upload.start", {
210
+ type: opts.type,
211
+ instance: opts.instance,
212
+ filename: opts.filename,
213
+ });
214
+ const protoType = META_TYPE_MAP[opts.type];
215
+ const metaType = create(MetaTypeSchema, {
216
+ type: protoType,
217
+ n: opts.instance,
218
+ filename: opts.filename,
219
+ });
121
220
 
122
- // 1. RequestMetaUpload → presigned URL.
123
- const requested = await rpc.internalUploads.requestMetaUpload(
124
- create(RequestMetaUploadRequestSchema, {
125
- updateToken: envelope.updateToken,
126
- metaType: [metaType],
127
- }),
128
- );
221
+ // 1. RequestMetaUpload → presigned URL.
222
+ const requested = await rpc.internalUploads.requestMetaUpload(
223
+ create(RequestMetaUploadRequestSchema, {
224
+ updateToken: envelope.updateToken,
225
+ metaType: [metaType],
226
+ }),
227
+ );
129
228
 
130
- const meta = requested.metaUploads[0];
131
- if (!meta) {
132
- throw new Error("RequestMetaUpload returned no meta_uploads");
133
- }
229
+ const meta = requested.metaUploads[0];
230
+ if (!meta) {
231
+ throw new Error("RequestMetaUpload returned no meta_uploads");
232
+ }
134
233
 
135
- // 2. S3 PUT.
136
- const contentType = opts.contentType ?? inferContentType(opts.type);
137
- const body = opts.data instanceof Uint8Array
138
- ? opts.data
139
- : opts.data;
140
- const putResp = await fetch(meta.uploadUrl, {
141
- method: "PUT",
142
- body: body as BodyInit,
143
- headers: { "Content-Type": contentType },
144
- duplex: "half",
145
- } as RequestInit & { duplex?: "half" });
146
- if (!putResp.ok) {
147
- throw new Error(`meta upload PUT failed: ${putResp.status} ${putResp.statusText}`);
148
- }
234
+ // 2. S3 PUT — own span so the upload's bytes/duration are
235
+ // visible separately from the API round-trips.
236
+ const contentType =
237
+ opts.contentType ?? inferContentType(opts.type);
238
+ const body =
239
+ opts.data instanceof Uint8Array ? opts.data : opts.data;
240
+ await withCtxSpan(
241
+ "iq.s3.put",
242
+ envelope,
243
+ workerId,
244
+ async () => {
245
+ const handle = getTracingHandle();
246
+ const span = handle?.tracer
247
+ ? undefined
248
+ : undefined;
249
+ void span;
250
+ const putResp = await fetch(meta.uploadUrl, {
251
+ method: "PUT",
252
+ body: body as BodyInit,
253
+ headers: { "Content-Type": contentType },
254
+ duplex: "half",
255
+ } as RequestInit & { duplex?: "half" });
256
+ if (!putResp.ok) {
257
+ throw new Error(
258
+ `meta upload PUT failed: ${putResp.status} ${putResp.statusText}`,
259
+ );
260
+ }
261
+ },
262
+ {
263
+ [SPAN_ATTR.S3_URL_ORIGIN]: safeOrigin(meta.uploadUrl),
264
+ [SPAN_ATTR.S3_BYTES]:
265
+ body instanceof Uint8Array ? body.byteLength : 0,
266
+ "s3.content_type": contentType,
267
+ "s3.meta_type": opts.type,
268
+ },
269
+ );
149
270
 
150
- // 3. ConfirmMetaUpload.
151
- await rpc.internalUploads.confirmMetaUpload(
152
- create(ConfirmMetaUploadRequestSchema, {
153
- updateToken: envelope.updateToken,
154
- metadataId: [meta.metadataId],
155
- }),
271
+ // 3. ConfirmMetaUpload.
272
+ await rpc.internalUploads.confirmMetaUpload(
273
+ create(ConfirmMetaUploadRequestSchema, {
274
+ updateToken: envelope.updateToken,
275
+ metadataId: [meta.metadataId],
276
+ }),
277
+ );
278
+ logger.info("meta.upload.complete", {
279
+ type: opts.type,
280
+ metadataId: meta.metadataId,
281
+ });
282
+ },
283
+ { "ctx.meta_type": opts.type },
156
284
  );
157
- logger.info("meta.upload.complete", {
158
- type: opts.type,
159
- metadataId: meta.metadataId,
160
- });
161
285
  },
162
286
 
163
287
  async setMetadata(metadata: UploadMetadataPartial | UploadMetadataProto) {
164
- // Distinguish a pre-built proto from a partial: protos carry the
165
- // @bufbuild/protobuf $typeName brand.
166
- const isProto = (metadata as UploadMetadataProto).$typeName !== undefined;
167
- let protoMetadata: ReturnType<typeof create<typeof UploadMetadataSchema>>;
168
- let thumbnailVersion: number | undefined;
169
- if (isProto) {
170
- protoMetadata = metadata as ReturnType<typeof create<typeof UploadMetadataSchema>>;
171
- } else {
172
- const partial = metadata as UploadMetadataPartial;
173
- protoMetadata = create(UploadMetadataSchema, {});
174
- // Server-side proto has typed sub-messages; the SDK's
175
- // UploadMetadataPartial is intentionally loose so workers can
176
- // populate just what they computed. Cast into the proto via
177
- // the JSON path — fields that don't map are ignored.
178
- Object.assign(protoMetadata as object, partial);
179
- thumbnailVersion = partial.thumbnailGenerationVersion;
180
- }
288
+ return withCtxSpan(
289
+ "iq.ctx.set_metadata",
290
+ envelope,
291
+ workerId,
292
+ async () => {
293
+ // Distinguish a pre-built proto from a partial: protos carry
294
+ // the @bufbuild/protobuf $typeName brand.
295
+ const isProto =
296
+ (metadata as UploadMetadataProto).$typeName !== undefined;
297
+ let protoMetadata: ReturnType<
298
+ typeof create<typeof UploadMetadataSchema>
299
+ >;
300
+ let thumbnailVersion: number | undefined;
301
+ if (isProto) {
302
+ protoMetadata = metadata as ReturnType<
303
+ typeof create<typeof UploadMetadataSchema>
304
+ >;
305
+ } else {
306
+ const partial = metadata as UploadMetadataPartial;
307
+ protoMetadata = create(UploadMetadataSchema, {});
308
+ // Server-side proto has typed sub-messages; the SDK's
309
+ // UploadMetadataPartial is intentionally loose so workers
310
+ // can populate just what they computed. Cast into the
311
+ // proto via the JSON path — fields that don't map are
312
+ // ignored.
313
+ Object.assign(protoMetadata as object, partial);
314
+ thumbnailVersion = partial.thumbnailGenerationVersion;
315
+ }
181
316
 
182
- await rpc.internalUploads.updateUpload(
183
- create(InternalUpdateUploadRequestSchema, {
184
- updateToken: envelope.updateToken,
185
- metadata: protoMetadata,
186
- thumbnailGenerationVersion: thumbnailVersion,
187
- }),
317
+ await rpc.internalUploads.updateUpload(
318
+ create(InternalUpdateUploadRequestSchema, {
319
+ updateToken: envelope.updateToken,
320
+ metadata: protoMetadata,
321
+ thumbnailGenerationVersion: thumbnailVersion,
322
+ }),
323
+ );
324
+ logger.debug("metadata.set", { thumbnailVersion });
325
+ },
188
326
  );
189
- logger.debug("metadata.set", { thumbnailVersion });
190
327
  },
191
328
 
192
329
  async markHasThumbnail(version: number) {
193
- await rpc.internalUploads.updateUpload(
194
- create(InternalUpdateUploadRequestSchema, {
195
- updateToken: envelope.updateToken,
196
- hasThumbnail: true,
197
- thumbnailGenerationVersion: version,
198
- }),
330
+ return withCtxSpan(
331
+ "iq.ctx.mark_has_thumbnail",
332
+ envelope,
333
+ workerId,
334
+ async () => {
335
+ await rpc.internalUploads.updateUpload(
336
+ create(InternalUpdateUploadRequestSchema, {
337
+ updateToken: envelope.updateToken,
338
+ hasThumbnail: true,
339
+ thumbnailGenerationVersion: version,
340
+ }),
341
+ );
342
+ logger.info("thumbnail.marked", { version });
343
+ },
344
+ { "ctx.thumbnail_version": version },
199
345
  );
200
- logger.info("thumbnail.marked", { version });
201
346
  },
202
347
 
203
348
  async setDescription(text: string, tags?: string[]) {
204
- await rpc.internalUploads.updateUpload(
205
- create(InternalUpdateUploadRequestSchema, {
206
- updateToken: envelope.updateToken,
207
- aiMetadata: create(InternalUpdateUploadRequest_AiMetadataSchema, {
208
- description: text,
209
- generatedTags: tags ?? [],
210
- }),
211
- }),
349
+ return withCtxSpan(
350
+ "iq.ctx.set_description",
351
+ envelope,
352
+ workerId,
353
+ async () => {
354
+ await rpc.internalUploads.updateUpload(
355
+ create(InternalUpdateUploadRequestSchema, {
356
+ updateToken: envelope.updateToken,
357
+ aiMetadata: create(
358
+ InternalUpdateUploadRequest_AiMetadataSchema,
359
+ {
360
+ description: text,
361
+ generatedTags: tags ?? [],
362
+ },
363
+ ),
364
+ }),
365
+ );
366
+ logger.info("description.set", {
367
+ length: text.length,
368
+ tags: tags?.length ?? 0,
369
+ });
370
+ },
371
+ {
372
+ "ctx.description_length": text.length,
373
+ "ctx.tag_count": tags?.length ?? 0,
374
+ },
212
375
  );
213
- logger.info("description.set", {
214
- length: text.length,
215
- tags: tags?.length ?? 0,
216
- });
217
376
  },
218
377
 
219
378
  async withPresignedUploads<T>(
220
379
  opts: PresignedUploadRequest[],
221
380
  callback: (handles: import("./types.js").MetaUploadHandle[]) => Promise<T>,
222
381
  ): Promise<T> {
223
- logger.debug("presigned.upload.request", {
224
- count: opts.length,
225
- types: opts.map((o) => o.type),
226
- });
227
- const metaTypes = opts.map((opt) => create(MetaTypeSchema, {
228
- type: META_TYPE_MAP[opt.type],
229
- n: opt.instance,
230
- filename: opt.filename,
231
- }));
232
- const requested = await rpc.internalUploads.requestMetaUpload(
233
- create(RequestMetaUploadRequestSchema, {
234
- updateToken: envelope.updateToken,
235
- metaType: metaTypes,
236
- }),
382
+ return withCtxSpan(
383
+ "iq.ctx.with_presigned_uploads",
384
+ envelope,
385
+ workerId,
386
+ async () => {
387
+ logger.debug("presigned.upload.request", {
388
+ count: opts.length,
389
+ types: opts.map((o) => o.type),
390
+ });
391
+ const metaTypes = opts.map((opt) =>
392
+ create(MetaTypeSchema, {
393
+ type: META_TYPE_MAP[opt.type],
394
+ n: opt.instance,
395
+ filename: opt.filename,
396
+ }),
397
+ );
398
+ const requested = await rpc.internalUploads.requestMetaUpload(
399
+ create(RequestMetaUploadRequestSchema, {
400
+ updateToken: envelope.updateToken,
401
+ metaType: metaTypes,
402
+ }),
403
+ );
404
+ if (requested.metaUploads.length !== opts.length) {
405
+ throw new Error(
406
+ `RequestMetaUpload returned ${requested.metaUploads.length} URLs, expected ${opts.length}`,
407
+ );
408
+ }
409
+ const handles = requested.metaUploads.map((m) => ({
410
+ uploadUrl: m.uploadUrl,
411
+ downloadUrl: m.downloadUrl,
412
+ metadataId: m.metadataId,
413
+ }));
414
+ const result = await callback(handles);
415
+ // Confirm only after the callback resolves cleanly — on
416
+ // throw, the rows stay unfinalised and will be cleaned up
417
+ // by the next confirm sweep or expire naturally.
418
+ await rpc.internalUploads.confirmMetaUpload(
419
+ create(ConfirmMetaUploadRequestSchema, {
420
+ updateToken: envelope.updateToken,
421
+ metadataId: requested.metaUploads.map((m) => m.metadataId),
422
+ }),
423
+ );
424
+ logger.info("presigned.upload.complete", {
425
+ count: handles.length,
426
+ });
427
+ return result;
428
+ },
429
+ { "ctx.upload_count": opts.length },
237
430
  );
238
- if (requested.metaUploads.length !== opts.length) {
239
- throw new Error(
240
- `RequestMetaUpload returned ${requested.metaUploads.length} URLs, expected ${opts.length}`,
241
- );
242
- }
243
- const handles = requested.metaUploads.map((m) => ({
244
- uploadUrl: m.uploadUrl,
245
- downloadUrl: m.downloadUrl,
246
- metadataId: m.metadataId,
247
- }));
248
- const result = await callback(handles);
249
- // Confirm only after the callback resolves cleanly — on throw,
250
- // the rows stay unfinalised and will be cleaned up by the next
251
- // confirm sweep or expire naturally.
252
- await rpc.internalUploads.confirmMetaUpload(
253
- create(ConfirmMetaUploadRequestSchema, {
254
- updateToken: envelope.updateToken,
255
- metadataId: requested.metaUploads.map((m) => m.metadataId),
256
- }),
257
- );
258
- logger.info("presigned.upload.complete", { count: handles.length });
259
- return result;
260
431
  },
261
432
  };
262
433
 
263
434
  return ctx;
264
435
  }
265
436
 
437
+ function safeOrigin(url: string): string {
438
+ try {
439
+ const u = new URL(url);
440
+ return `${u.protocol}//${u.host}${u.pathname}`;
441
+ } catch {
442
+ return "<malformed>";
443
+ }
444
+ }
445
+
266
446
  function inferContentType(metaType: UploadMetaOpts["type"]): string {
267
447
  switch (metaType) {
268
448
  case "THUMBNAIL_LIGHT":