@xeonr/upload-pool-sdk 1.0.0 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/pool.ts CHANGED
@@ -3,6 +3,11 @@
3
3
  * to the pool's job stream, dispatches job:dispatch events to the right
4
4
  * handler keyed by content type URN, and orchestrates accept/complete/
5
5
  * report-error against IntegrationQueueService.
6
+ *
7
+ * Every transition emits a structured log line via the injected Logger
8
+ * so a worker's k8s logs surface SSE connection state, dispatched jobs,
9
+ * handler outcomes, and RPC errors without callers needing to wire up
10
+ * their own onError hook.
6
11
  */
7
12
  import { hostname } from "node:os";
8
13
  import { randomBytes } from "node:crypto";
@@ -16,6 +21,7 @@ import { createRpcClients, type RpcClients } from "./rpc-clients.js";
16
21
  import { SseClient } from "./sse-client.js";
17
22
  import { createJobContext, type JobEnvelope } from "./job-context.js";
18
23
  import { NonRetryableError } from "./errors.js";
24
+ import { JsonLogger, type Logger } from "./logger.js";
19
25
  import type {
20
26
  JobContext,
21
27
  JobHandler,
@@ -26,6 +32,7 @@ export class Pool {
26
32
  private readonly config: PoolConfig;
27
33
  private readonly rpc: RpcClients;
28
34
  private readonly sse: SseClient;
35
+ private readonly logger: Logger;
29
36
  private inFlight = 0;
30
37
  private readonly workerId: string;
31
38
  private readonly capabilities: string[];
@@ -33,46 +40,85 @@ export class Pool {
33
40
  constructor(config: PoolConfig) {
34
41
  this.config = {
35
42
  concurrency: 1,
36
- onError: defaultErrorHandler,
37
43
  ...config,
38
44
  };
39
45
  this.workerId = config.workerId ?? `${hostname()}-${randomBytes(4).toString("hex")}`;
40
46
  this.capabilities = Object.keys(config.handlers);
41
- this.rpc = createRpcClients(config.endpoint);
47
+ this.logger = (config.logger ?? new JsonLogger()).child({
48
+ workerId: this.workerId,
49
+ });
50
+ this.rpc = createRpcClients({
51
+ apiEndpoint: config.apiEndpoint,
52
+ pipelineEndpoint: config.pipelineEndpoint,
53
+ logger: this.logger,
54
+ });
42
55
  this.sse = new SseClient({
43
- endpoint: config.endpoint,
56
+ endpoint: config.pipelineEndpoint,
44
57
  token: config.token,
45
58
  workerId: this.workerId,
46
59
  capabilities: this.capabilities,
60
+ logger: this.logger.child({ component: "sse" }),
47
61
  onConnected: () => {
48
- /* lifecycle: future event emitter hook */
62
+ /* logged inside sse-client; callers can hook lifecycle here later */
49
63
  },
50
64
  onDisconnected: () => {
51
- /* lifecycle: future event emitter hook */
65
+ /* logged inside sse-client */
52
66
  },
53
67
  onJobDispatch: (payload) => this.handleDispatch(payload as JobEnvelope),
54
68
  });
69
+
70
+ this.logger.info("sdk.boot", {
71
+ apiEndpoint: config.apiEndpoint,
72
+ pipelineEndpoint: config.pipelineEndpoint,
73
+ capabilities: this.capabilities,
74
+ concurrency: this.config.concurrency,
75
+ version: SDK_VERSION,
76
+ });
77
+
78
+ if (this.capabilities.length === 0) {
79
+ this.logger.warn("sdk.boot.no_handlers", {
80
+ note: "no handlers registered — worker will not receive any jobs",
81
+ });
82
+ }
55
83
  }
56
84
 
57
85
  async start(): Promise<void> {
86
+ this.logger.info("sdk.start");
58
87
  this.sse.start();
59
88
  }
60
89
 
61
90
  async stop(): Promise<void> {
91
+ this.logger.info("sdk.stop", { inFlight: this.inFlight });
62
92
  this.sse.stop();
63
93
  }
64
94
 
65
95
  private async handleDispatch(envelope: JobEnvelope): Promise<void> {
96
+ const jobLogger = this.logger.child({
97
+ jobId: envelope.jobId,
98
+ uploadId: envelope.uploadId,
99
+ urn: envelope.contentTypeContext.urn,
100
+ });
101
+
66
102
  if (this.inFlight >= (this.config.concurrency ?? 1)) {
67
103
  // Pipeline only dispatches to idle workers (zero in-flight), so
68
104
  // this branch is defensive — if it ever fires we silently
69
105
  // drop the dispatch and let the pipeline timeout requeue.
106
+ jobLogger.warn("job.dispatched.dropped_at_capacity", {
107
+ inFlight: this.inFlight,
108
+ concurrency: this.config.concurrency,
109
+ });
70
110
  return;
71
111
  }
72
112
  this.inFlight++;
113
+ jobLogger.info("job.dispatched", {
114
+ filename: envelope.filename,
115
+ mimeType: envelope.mimeType,
116
+ inFlight: this.inFlight,
117
+ });
73
118
 
74
119
  const handler = this.resolveHandler(envelope.contentTypeContext.urn);
75
- const ctx = createJobContext(envelope, this.rpc);
120
+ const ctx = createJobContext(envelope, this.rpc, jobLogger);
121
+ const startedAt = Date.now();
76
122
 
77
123
  try {
78
124
  // AcceptJob — clears the pipeline's accept-timeout.
@@ -83,11 +129,16 @@ export class Pool {
83
129
  queueToken: this.config.token,
84
130
  }),
85
131
  );
132
+ jobLogger.info("job.accepted");
86
133
 
87
134
  if (!handler) {
135
+ jobLogger.warn("job.unhandled", {
136
+ availableHandlers: this.capabilities,
137
+ });
88
138
  await this.reportError(
89
139
  envelope.jobId,
90
140
  ctx,
141
+ jobLogger,
91
142
  new NonRetryableError(`no handler for URN ${envelope.contentTypeContext.urn}`),
92
143
  );
93
144
  return;
@@ -102,8 +153,15 @@ export class Pool {
102
153
  queueToken: this.config.token,
103
154
  }),
104
155
  );
156
+ jobLogger.info("job.completed", {
157
+ durationMs: Date.now() - startedAt,
158
+ });
105
159
  } catch (err) {
106
- await this.reportError(envelope.jobId, ctx, err as Error);
160
+ jobLogger.error("job.failed", {
161
+ err,
162
+ durationMs: Date.now() - startedAt,
163
+ });
164
+ await this.reportError(envelope.jobId, ctx, jobLogger, err as Error);
107
165
  } finally {
108
166
  this.inFlight--;
109
167
  }
@@ -116,6 +174,7 @@ export class Pool {
116
174
  private async reportError(
117
175
  jobId: string,
118
176
  ctx: JobContext,
177
+ jobLogger: Logger,
119
178
  err: Error,
120
179
  ): Promise<void> {
121
180
  const retry = !(err instanceof NonRetryableError);
@@ -130,14 +189,12 @@ export class Pool {
130
189
  retry,
131
190
  }),
132
191
  );
192
+ jobLogger.info("job.error_reported", { retry });
133
193
  } catch (rptErr) {
194
+ jobLogger.error("job.error_report_failed", { err: rptErr });
134
195
  this.config.onError?.(rptErr as Error, ctx);
135
196
  }
136
197
  }
137
198
  }
138
199
 
139
- function defaultErrorHandler(err: Error, ctx?: JobContext): void {
140
- const where = ctx ? `job ${ctx.jobId} (${ctx.contentType.urn})` : "pool";
141
- // eslint-disable-next-line no-console
142
- console.error(`[@xeonr/upload-pool-sdk] ${where}: ${err.message}`);
143
- }
200
+ const SDK_VERSION = "1.2.0";
@@ -1,29 +1,90 @@
1
1
  /**
2
2
  * ConnectRPC clients for the two services the SDK talks to:
3
- * - InternalUploadsService — per-upload callbacks (UpdateUpload,
4
- * RequestMetaUpload, ConfirmMetaUpload, GetProcessingContext).
5
- * Auth: the per-job update_token from the job envelope, passed
6
- * as the in-band `updateToken` field on each request.
7
- * - IntegrationQueueService — accept / complete / report-error.
8
- * Auth: the pool token, passed as `queueToken` on each request.
3
+ * - InternalUploadsService (`apiEndpoint`) — per-upload callbacks
4
+ * (UpdateUpload, RequestMetaUpload, ConfirmMetaUpload,
5
+ * GetProcessingContext). Auth: the per-job `update_token` from the
6
+ * job envelope, passed as the in-band `updateToken` field on each
7
+ * request.
8
+ * - IntegrationQueueService (`pipelineEndpoint`) accept / complete /
9
+ * report-error. Auth: the pool token, passed as `queueToken` on each
10
+ * request.
11
+ *
12
+ * The two services live on different processes (Go uploads-api vs Node
13
+ * pipeline-api) so we keep them on separate transports and don't try to
14
+ * share a baseUrl.
15
+ *
16
+ * Each request is wrapped in a logging interceptor that emits one
17
+ * `rpc.request` line on dispatch and one of `rpc.response` /
18
+ * `rpc.error` on completion. Latency and connect-error codes are
19
+ * captured so failed RPCs are diagnosable from worker logs alone.
9
20
  */
10
21
  import { createConnectTransport } from "@connectrpc/connect-node";
11
- import { createClient, type Client } from "@connectrpc/connect";
22
+ import {
23
+ createClient,
24
+ type Client,
25
+ type Interceptor,
26
+ ConnectError,
27
+ Code,
28
+ } from "@connectrpc/connect";
12
29
  import { InternalUploadsService } from "./protocol/uplim/api/v1/uploads_pb.js";
13
30
  import { IntegrationQueueService } from "./protocol/uplim/workflow/v1/integration_queue_pb.js";
31
+ import type { Logger } from "./logger.js";
14
32
 
15
33
  export interface RpcClients {
16
34
  internalUploads: Client<typeof InternalUploadsService>;
17
35
  integrationQueue: Client<typeof IntegrationQueueService>;
18
36
  }
19
37
 
20
- export function createRpcClients(endpoint: string): RpcClients {
21
- const transport = createConnectTransport({
22
- baseUrl: endpoint,
38
+ function loggingInterceptor(logger: Logger, target: string): Interceptor {
39
+ return (next) => async (req) => {
40
+ const startedAt = Date.now();
41
+ const method = `${req.service.typeName}/${req.method.name}`;
42
+ logger.debug("rpc.request", { method, target });
43
+ try {
44
+ const res = await next(req);
45
+ logger.debug("rpc.response", {
46
+ method,
47
+ target,
48
+ durationMs: Date.now() - startedAt,
49
+ });
50
+ return res;
51
+ } catch (err) {
52
+ const code =
53
+ err instanceof ConnectError ? Code[err.code] : undefined;
54
+ const message = err instanceof Error ? err.message : String(err);
55
+ logger.error("rpc.error", {
56
+ method,
57
+ target,
58
+ code,
59
+ message,
60
+ durationMs: Date.now() - startedAt,
61
+ });
62
+ throw err;
63
+ }
64
+ };
65
+ }
66
+
67
+ export interface RpcClientsConfig {
68
+ apiEndpoint: string;
69
+ pipelineEndpoint: string;
70
+ logger: Logger;
71
+ }
72
+
73
+ export function createRpcClients(config: RpcClientsConfig): RpcClients {
74
+ const rpcLogger = config.logger.child({ component: "rpc" });
75
+
76
+ const apiTransport = createConnectTransport({
77
+ baseUrl: config.apiEndpoint,
78
+ httpVersion: "1.1",
79
+ interceptors: [loggingInterceptor(rpcLogger, "api")],
80
+ });
81
+ const pipelineTransport = createConnectTransport({
82
+ baseUrl: config.pipelineEndpoint,
23
83
  httpVersion: "1.1",
84
+ interceptors: [loggingInterceptor(rpcLogger, "pipeline")],
24
85
  });
25
86
  return {
26
- internalUploads: createClient(InternalUploadsService, transport),
27
- integrationQueue: createClient(IntegrationQueueService, transport),
87
+ internalUploads: createClient(InternalUploadsService, apiTransport),
88
+ integrationQueue: createClient(IntegrationQueueService, pipelineTransport),
28
89
  };
29
90
  }
package/src/sse-client.ts CHANGED
@@ -5,14 +5,21 @@
5
5
  *
6
6
  * Reconnect strategy: exponential backoff (1s, 2s, 4s, 8s, capped at 30s).
7
7
  * On `job:dispatch` events the SDK invokes the registered onJob callback.
8
+ *
9
+ * All lifecycle transitions emit structured logs via the injected
10
+ * `Logger` so connection failures are visible in the worker's k8s logs
11
+ * (otherwise a worker that can't connect at all looks identical to one
12
+ * that's idle).
8
13
  */
9
14
  import { EventSource } from "eventsource";
15
+ import type { Logger } from "./logger.js";
10
16
 
11
17
  export interface SseClientConfig {
12
18
  endpoint: string;
13
19
  token: string;
14
20
  workerId: string;
15
21
  capabilities: string[];
22
+ logger: Logger;
16
23
  onConnected: () => void;
17
24
  onDisconnected: (reason: string) => void;
18
25
  onJobDispatch: (payload: unknown) => void;
@@ -25,6 +32,8 @@ export class SseClient {
25
32
  private es: EventSource | null = null;
26
33
  private stopped = false;
27
34
  private reconnectDelay = RECONNECT_INITIAL_MS;
35
+ private connectAttempt = 0;
36
+ private connectedAt: number | null = null;
28
37
 
29
38
  constructor(private readonly config: SseClientConfig) {}
30
39
 
@@ -39,11 +48,13 @@ export class SseClient {
39
48
  this.es.close();
40
49
  this.es = null;
41
50
  }
51
+ this.config.logger.info("sse.stopped");
42
52
  }
43
53
 
44
54
  private connect(): void {
45
55
  if (this.stopped) return;
46
56
 
57
+ this.connectAttempt++;
47
58
  const url = new URL(`${this.config.endpoint}/queue/connect`);
48
59
  url.searchParams.set("queueToken", this.config.token);
49
60
  url.searchParams.set("workerId", this.config.workerId);
@@ -51,31 +62,84 @@ export class SseClient {
51
62
  url.searchParams.set("capabilities", this.config.capabilities.join(","));
52
63
  }
53
64
 
65
+ // Strip the token from the logged URL — the queueToken param is the
66
+ // pool secret, no reason to bake it into telemetry.
67
+ const loggedUrl = new URL(url.toString());
68
+ loggedUrl.searchParams.set("queueToken", "[redacted]");
69
+
70
+ this.config.logger.info("sse.connect.start", {
71
+ url: loggedUrl.toString(),
72
+ attempt: this.connectAttempt,
73
+ capabilities: this.config.capabilities,
74
+ });
75
+
54
76
  this.es = new EventSource(url.toString());
55
77
 
78
+ this.es.addEventListener("open", () => {
79
+ this.config.logger.debug("sse.open");
80
+ });
81
+
56
82
  this.es.addEventListener("connected", () => {
57
83
  this.reconnectDelay = RECONNECT_INITIAL_MS;
84
+ this.connectedAt = Date.now();
85
+ this.config.logger.info("sse.connect.opened", {
86
+ attempt: this.connectAttempt,
87
+ });
58
88
  this.config.onConnected();
59
89
  });
60
90
 
61
91
  this.es.addEventListener("job:dispatch", (event) => {
62
92
  try {
63
93
  const data = JSON.parse((event as MessageEvent).data);
94
+ this.config.logger.debug("sse.event.job_dispatch", {
95
+ jobId: (data as { jobId?: string }).jobId,
96
+ });
64
97
  this.config.onJobDispatch(data);
65
98
  } catch (err) {
66
- this.config.onDisconnected(`bad job payload: ${(err as Error).message}`);
99
+ const reason = `bad job payload: ${(err as Error).message}`;
100
+ this.config.logger.error("sse.event.parse_error", {
101
+ err,
102
+ rawData: (event as MessageEvent).data,
103
+ });
104
+ this.config.onDisconnected(reason);
67
105
  }
68
106
  });
69
107
 
70
- // heartbeat events are no-ops receipt alone keeps the connection alive.
108
+ // heartbeat events are no-ops at the dispatch layer receipt alone
109
+ // keeps the connection alive. We log at debug so noisy keep-alive
110
+ // traffic doesn't drown the info stream.
111
+ this.es.addEventListener("heartbeat", () => {
112
+ this.config.logger.debug("sse.event.heartbeat");
113
+ });
71
114
 
72
- this.es.onerror = () => {
115
+ this.es.onerror = (err: unknown) => {
73
116
  if (this.stopped) return;
74
- this.config.onDisconnected("sse error");
117
+ const errMessage =
118
+ (err as { message?: string; type?: string; status?: number })?.message ??
119
+ (err as { type?: string })?.type ??
120
+ "unknown";
121
+ const status = (err as { status?: number; code?: number })?.status ??
122
+ (err as { code?: number })?.code;
123
+ const wasConnected = this.connectedAt !== null;
124
+ const uptimeMs = wasConnected ? Date.now() - this.connectedAt! : null;
125
+
126
+ this.config.logger.warn("sse.disconnected", {
127
+ message: errMessage,
128
+ status,
129
+ wasConnected,
130
+ uptimeMs,
131
+ attempt: this.connectAttempt,
132
+ nextReconnectMs: this.reconnectDelay,
133
+ });
134
+
135
+ this.config.onDisconnected(`sse error: ${errMessage}`);
136
+ this.connectedAt = null;
137
+
75
138
  if (this.es) {
76
139
  this.es.close();
77
140
  this.es = null;
78
141
  }
142
+
79
143
  setTimeout(() => this.connect(), this.reconnectDelay);
80
144
  this.reconnectDelay = Math.min(this.reconnectDelay * 2, RECONNECT_MAX_MS);
81
145
  };
package/src/types.ts CHANGED
@@ -17,16 +17,47 @@
17
17
  export interface PoolConfig {
18
18
  /** Pool token, prefixed with "tpq_". From the pool's creation response. */
19
19
  token: string;
20
- /** API endpoint base URL, e.g. "https://api.upl.im". */
21
- endpoint: string;
20
+ /**
21
+ * Public uploads-api base URL, e.g. "https://uploads-api.xeonr.dev".
22
+ * Hosts `InternalUploadsService` (RequestMetaUpload, ConfirmMetaUpload,
23
+ * UpdateUpload, GetProcessingContext).
24
+ *
25
+ * Use the public https URL even when running inside the same k8s cluster
26
+ * as the API — keeps third-party workers and first-party workers on a
27
+ * single code path (TLS, normal ingress, no service-mesh assumptions).
28
+ */
29
+ apiEndpoint: string;
30
+ /**
31
+ * Public pipeline-api base URL, e.g. "https://uploads-pipeline-api.xeonr.dev".
32
+ * Hosts the SSE `/queue/connect` endpoint and `IntegrationQueueService`
33
+ * (AcceptJob, CompleteJob, ReportError).
34
+ *
35
+ * Distinct from `apiEndpoint` because the pipeline runs as a separate
36
+ * Node service; conflating the two would silently route worker traffic
37
+ * to the wrong process.
38
+ */
39
+ pipelineEndpoint: string;
22
40
  /** Optional worker identifier. Defaults to hostname + random suffix. */
23
41
  workerId?: string;
24
42
  /** Handlers keyed by content type URN (e.g. "default:image"). */
25
43
  handlers: Record<string, JobHandler>;
26
44
  /** Fallback handler if URN has no specific entry in `handlers`. Optional. */
27
45
  onUnhandled?: JobHandler;
28
- /** Lifecycle / error hook. Defaults to console-based logging. */
46
+ /**
47
+ * Lifecycle / error hook. Receives every handler exception alongside
48
+ * the job context. The SDK also logs every failure via `logger` —
49
+ * `onError` is for hooks that need to fan out to external systems
50
+ * (sentry, pagerduty). Optional.
51
+ */
29
52
  onError?: (err: Error, ctx?: JobContext) => void;
53
+ /**
54
+ * Structured logger. Defaults to a JSON-line logger that writes to
55
+ * stdout (`info`/`debug`) and stderr (`warn`/`error`), honoring
56
+ * `UPL_LOG_LEVEL`. Pass `noopLogger` from this package for silence,
57
+ * or supply your own implementing the `Logger` interface to bridge
58
+ * to pino/winston.
59
+ */
60
+ logger?: import("./logger.js").Logger;
30
61
  /** Max concurrent in-flight jobs. Defaults to 1. */
31
62
  concurrency?: number;
32
63
  }