@qlever-llc/trellis 0.10.10 → 0.10.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. package/esm/client.d.ts +2 -0
  2. package/esm/client.d.ts.map +1 -1
  3. package/esm/client.js +2 -0
  4. package/esm/client_connect.d.ts +3 -2
  5. package/esm/client_connect.d.ts.map +1 -1
  6. package/esm/client_connect.js +4 -1
  7. package/esm/errors/TrellisError.d.ts +3 -3
  8. package/esm/errors/TrellisError.js +3 -3
  9. package/esm/server/health.d.ts.map +1 -1
  10. package/esm/server/health.js +34 -3
  11. package/esm/server/internal_jobs/job-manager.d.ts.map +1 -1
  12. package/esm/server/internal_jobs/job-manager.js +32 -1
  13. package/esm/server/runtime.d.ts +3 -0
  14. package/esm/server/runtime.d.ts.map +1 -1
  15. package/esm/server/service.d.ts +15 -0
  16. package/esm/server/service.d.ts.map +1 -1
  17. package/esm/server/service.js +41 -3
  18. package/esm/server.d.ts.map +1 -1
  19. package/esm/server.js +99 -10
  20. package/esm/service/deno.d.ts +1 -1
  21. package/esm/service/deno.d.ts.map +1 -1
  22. package/esm/service/mod.d.ts +1 -1
  23. package/esm/service/mod.d.ts.map +1 -1
  24. package/esm/service/node.d.ts +1 -1
  25. package/esm/service/node.d.ts.map +1 -1
  26. package/esm/service/outbox_inbox.d.ts.map +1 -1
  27. package/esm/service/outbox_inbox.js +14 -0
  28. package/esm/telemetry/core.d.ts.map +1 -1
  29. package/esm/telemetry/core.js +1 -1
  30. package/esm/telemetry/env.d.ts.map +1 -1
  31. package/esm/telemetry/env.js +6 -1
  32. package/esm/telemetry/init.d.ts +3 -0
  33. package/esm/telemetry/init.d.ts.map +1 -0
  34. package/esm/telemetry/init.js +7 -0
  35. package/esm/telemetry/metrics.d.ts +34 -0
  36. package/esm/telemetry/metrics.d.ts.map +1 -0
  37. package/esm/telemetry/metrics.js +181 -0
  38. package/esm/telemetry/mod.d.ts +3 -0
  39. package/esm/telemetry/mod.d.ts.map +1 -1
  40. package/esm/telemetry/mod.js +2 -0
  41. package/esm/telemetry/runtime.d.ts +2 -0
  42. package/esm/telemetry/runtime.d.ts.map +1 -0
  43. package/esm/telemetry/runtime.js +134 -0
  44. package/esm/telemetry.d.ts +3 -0
  45. package/esm/telemetry.d.ts.map +1 -0
  46. package/esm/telemetry.js +2 -0
  47. package/esm/transfer.d.ts.map +1 -1
  48. package/esm/transfer.js +27 -16
  49. package/esm/trellis.d.ts +28 -4
  50. package/esm/trellis.d.ts.map +1 -1
  51. package/esm/trellis.js +575 -80
  52. package/package.json +7 -5
  53. package/script/client.d.ts +2 -0
  54. package/script/client.d.ts.map +1 -1
  55. package/script/client.js +2 -0
  56. package/script/client_connect.d.ts +3 -2
  57. package/script/client_connect.d.ts.map +1 -1
  58. package/script/client_connect.js +4 -1
  59. package/script/errors/TrellisError.d.ts +3 -3
  60. package/script/errors/TrellisError.js +3 -3
  61. package/script/server/health.d.ts.map +1 -1
  62. package/script/server/health.js +34 -3
  63. package/script/server/internal_jobs/job-manager.d.ts.map +1 -1
  64. package/script/server/internal_jobs/job-manager.js +32 -1
  65. package/script/server/runtime.d.ts +3 -0
  66. package/script/server/runtime.d.ts.map +1 -1
  67. package/script/server/service.d.ts +15 -0
  68. package/script/server/service.d.ts.map +1 -1
  69. package/script/server/service.js +40 -2
  70. package/script/server.d.ts.map +1 -1
  71. package/script/server.js +98 -9
  72. package/script/service/deno.d.ts +1 -1
  73. package/script/service/deno.d.ts.map +1 -1
  74. package/script/service/mod.d.ts +1 -1
  75. package/script/service/mod.d.ts.map +1 -1
  76. package/script/service/node.d.ts +1 -1
  77. package/script/service/node.d.ts.map +1 -1
  78. package/script/service/outbox_inbox.d.ts.map +1 -1
  79. package/script/service/outbox_inbox.js +14 -0
  80. package/script/telemetry/core.d.ts.map +1 -1
  81. package/script/telemetry/core.js +1 -1
  82. package/script/telemetry/env.d.ts.map +1 -1
  83. package/script/telemetry/env.js +6 -1
  84. package/script/telemetry/init.d.ts +3 -0
  85. package/script/telemetry/init.d.ts.map +1 -0
  86. package/script/telemetry/init.js +10 -0
  87. package/script/telemetry/metrics.d.ts +34 -0
  88. package/script/telemetry/metrics.d.ts.map +1 -0
  89. package/script/telemetry/metrics.js +186 -0
  90. package/script/telemetry/mod.d.ts +3 -0
  91. package/script/telemetry/mod.d.ts.map +1 -1
  92. package/script/telemetry/mod.js +7 -1
  93. package/script/telemetry/runtime.d.ts +2 -0
  94. package/script/telemetry/runtime.d.ts.map +1 -0
  95. package/script/telemetry/runtime.js +137 -0
  96. package/script/telemetry.d.ts +3 -0
  97. package/script/telemetry.d.ts.map +1 -0
  98. package/script/telemetry.js +18 -0
  99. package/script/transfer.d.ts.map +1 -1
  100. package/script/transfer.js +28 -17
  101. package/script/trellis.d.ts +28 -4
  102. package/script/trellis.d.ts.map +1 -1
  103. package/script/trellis.js +606 -110
  104. package/src/client.ts +4 -0
  105. package/src/client_connect.ts +11 -9
  106. package/src/errors/TrellisError.ts +4 -4
  107. package/src/server/health.ts +41 -3
  108. package/src/server/internal_jobs/job-manager.ts +35 -5
  109. package/src/server/runtime.ts +4 -0
  110. package/src/server/service.ts +75 -3
  111. package/src/server.ts +124 -14
  112. package/src/service/deno.ts +1 -0
  113. package/src/service/mod.ts +1 -0
  114. package/src/service/node.ts +1 -0
  115. package/src/service/outbox_inbox.ts +14 -0
  116. package/src/telemetry/core.ts +1 -1
  117. package/src/telemetry/env.ts +5 -1
  118. package/src/telemetry/init.ts +8 -0
  119. package/src/telemetry/metrics.ts +294 -0
  120. package/src/telemetry/mod.ts +7 -0
  121. package/src/telemetry/runtime.ts +218 -0
  122. package/src/telemetry.ts +2 -0
  123. package/src/transfer.ts +69 -30
  124. package/src/trellis.ts +652 -141
  125. package/esm/tracing.d.ts +0 -5
  126. package/esm/tracing.d.ts.map +0 -1
  127. package/esm/tracing.js +0 -8
  128. package/script/tracing.d.ts +0 -5
  129. package/script/tracing.d.ts.map +0 -1
  130. package/script/tracing.js +0 -27
  131. package/src/tracing.ts +0 -28
package/src/client.ts CHANGED
@@ -50,6 +50,8 @@ type ClientContractManifest = {
50
50
  };
51
51
 
52
52
  type ClientContractShape = {
53
+ CONTRACT_ID?: string;
54
+ CONTRACT_DIGEST?: string;
53
55
  CONTRACT: ClientContractManifest;
54
56
  API: {
55
57
  owned?: TrellisAPI;
@@ -120,6 +122,8 @@ export function createClient<
120
122
  noResponderRetry: opts?.noResponderRetry,
121
123
  api,
122
124
  state: contract[CONTRACT_STATE_METADATA],
125
+ contractId: contract.CONTRACT_ID,
126
+ contractDigest: contract.CONTRACT_DIGEST,
123
127
  },
124
128
  );
125
129
  }
@@ -224,13 +224,12 @@ const ClientTransportsSchema = Type.Object({
224
224
  type ClientConnectDeps = {
225
225
  loadTransport(): Promise<RuntimeTransport>;
226
226
  now(): number;
227
- setInterval?: (
228
- handler: () => void,
229
- ms: number,
230
- ) => ReturnType<typeof globalThis.setInterval>;
231
- clearInterval?: (id: ReturnType<typeof globalThis.setInterval>) => void;
227
+ setInterval?: (handler: () => void, ms: number) => IntervalHandle;
228
+ clearInterval?: (id: IntervalHandle) => void;
232
229
  };
233
230
 
231
+ type IntervalHandle = ReturnType<typeof globalThis.setInterval> | number;
232
+
234
233
  const ClientBootstrapReadySchema = Type.Object({
235
234
  status: Type.Literal("ready"),
236
235
  serverNow: Type.Integer(),
@@ -800,11 +799,14 @@ async function createRuntimeUserAuthenticator(args: {
800
799
  ((
801
800
  handler: () => void,
802
801
  ms: number,
803
- ): ReturnType<typeof globalThis.setInterval> =>
804
- globalThis.setInterval(handler, ms));
802
+ ): IntervalHandle => globalThis.setInterval(handler, ms));
805
803
  const clearRefreshInterval = args.deps.clearInterval ??
806
- ((id: ReturnType<typeof globalThis.setInterval>) =>
807
- globalThis.clearInterval(id));
804
+ ((id: IntervalHandle) => {
805
+ const clearIntervalFn = globalThis.clearInterval as (
806
+ id: IntervalHandle,
807
+ ) => void;
808
+ clearIntervalFn(id);
809
+ });
808
810
  const refreshIntervalId = setRefreshInterval(() => {
809
811
  void refresh();
810
812
  }, 10_000);
@@ -1,20 +1,20 @@
1
1
  /**
2
2
  * Base class for all Trellis-specific errors.
3
- * Extends BaseError and relies on the traceId getter being configured via initTracing.
3
+ * Extends BaseError and relies on the traceId getter being configured via initTelemetry.
4
4
  */
5
5
  import { BaseError, type BaseErrorSchema } from "@qlever-llc/result";
6
6
 
7
7
  /**
8
8
  * Abstract base class for Trellis errors.
9
- * Trellis errors automatically include traceId when initTracing() has been called
9
+ * Trellis errors automatically include traceId when initTelemetry() has been called
10
10
  * and a span is active in the current context.
11
11
  *
12
- * The traceId integration is configured by the tracing module's initTracing() function,
12
+ * The traceId integration is configured by the telemetry module's initTelemetry() function,
13
13
  * which sets up BaseError.traceIdGetter to retrieve the traceId from the active span.
14
14
  */
15
15
  export abstract class TrellisError<
16
16
  TData extends BaseErrorSchema = BaseErrorSchema,
17
17
  > extends BaseError<TData> {
18
18
  // TrellisError inherits getTraceId() from BaseError which uses the static traceIdGetter.
19
- // The traceIdGetter is configured by initTracing() in the tracing module.
19
+ // The traceIdGetter is configured by initTelemetry() in the telemetry module.
20
20
  }
@@ -104,6 +104,25 @@ function summarizeHealthChecks(
104
104
  return `${failedCount} check${failedCount === 1 ? "" : "s"} failing`;
105
105
  }
106
106
 
107
+ function annotateServiceHealthCheck(
108
+ result: HealthCheckResult,
109
+ metadata: { service: string; contractId: string; contractDigest: string },
110
+ ): HealthCheckResult {
111
+ if (result.status !== "failed") {
112
+ return result;
113
+ }
114
+
115
+ return {
116
+ ...result,
117
+ info: {
118
+ ...(result.info ?? {}),
119
+ service: metadata.service,
120
+ contractId: metadata.contractId,
121
+ contractDigest: metadata.contractDigest,
122
+ },
123
+ };
124
+ }
125
+
107
126
  function normalizeLegacyHealthCheck(
108
127
  check: HealthCheckFn,
109
128
  ): ServiceHealthCheckFn {
@@ -114,6 +133,10 @@ function normalizeLegacyHealthCheck(
114
133
  status: "failed",
115
134
  error: result.error.message,
116
135
  summary: result.error.message,
136
+ info: {
137
+ errorType: result.error.name,
138
+ errorId: result.error.id,
139
+ },
117
140
  };
118
141
  }
119
142
 
@@ -206,6 +229,9 @@ export async function runServiceHealthCheck(
206
229
  status: "failed",
207
230
  error: message,
208
231
  summary: message,
232
+ info: {
233
+ errorType: error instanceof Error ? error.name : typeof error,
234
+ },
209
235
  latencyMs,
210
236
  };
211
237
  }
@@ -350,16 +376,28 @@ export class ServiceHealth {
350
376
  }
351
377
 
352
378
  async checks(): Promise<HealthCheckResult[]> {
353
- return await Promise.all(
379
+ const results = await Promise.all(
354
380
  Array.from(this.#checks.entries()).map(([name, check]) =>
355
381
  runServiceHealthCheck(name, check)
356
382
  ),
357
383
  );
384
+ return results.map((result) =>
385
+ annotateServiceHealthCheck(result, {
386
+ service: this.serviceName,
387
+ contractId: this.contractId,
388
+ contractDigest: this.contractDigest,
389
+ })
390
+ );
358
391
  }
359
392
 
360
393
  async response(): Promise<HealthResponse> {
361
- const checks = Object.fromEntries(this.#checks.entries());
362
- return await runAllServiceHealthChecks(this.serviceName, checks);
394
+ const results = await this.checks();
395
+ return {
396
+ status: summarizeHealthStatus(results),
397
+ service: this.serviceName,
398
+ timestamp: new Date().toISOString(),
399
+ checks: results,
400
+ };
363
401
  }
364
402
 
365
403
  async heartbeat(): Promise<Omit<HealthHeartbeat, "header">> {
@@ -4,6 +4,7 @@ import {
4
4
  createMapCarrier,
5
5
  injectTraceContext,
6
6
  } from "../../telemetry/carrier.js";
7
+ import { recordTrellisError } from "../../telemetry/mod.js";
7
8
 
8
9
  import {
9
10
  ActiveJob,
@@ -103,11 +104,22 @@ export class JobManager<TPayload = unknown, TResult = unknown> {
103
104
  ): Promise<void> {
104
105
  const binding = this.#getQueueBinding(type);
105
106
  const headers = headersFromJobContext(event.context);
106
- await this.#context.nc.publish(
107
- `${binding.publishPrefix}.${jobId}.${event.eventType}`,
108
- new TextEncoder().encode(JSON.stringify(event)),
109
- { headers },
110
- );
107
+ try {
108
+ await this.#context.nc.publish(
109
+ `${binding.publishPrefix}.${jobId}.${event.eventType}`,
110
+ new TextEncoder().encode(JSON.stringify(event)),
111
+ { headers },
112
+ );
113
+ } catch (error) {
114
+ recordTrellisError(error, {
115
+ surface: "job",
116
+ direction: "worker",
117
+ operation: type,
118
+ phase: "publish",
119
+ messagingSystem: "nats",
120
+ });
121
+ throw error;
122
+ }
111
123
  }
112
124
 
113
125
  async create(
@@ -247,6 +259,12 @@ export class JobManager<TPayload = unknown, TResult = unknown> {
247
259
  if (error instanceof JobProcessError) {
248
260
  const detail = error.message;
249
261
  if (error.kind === "retryable") {
262
+ recordTrellisError(error, {
263
+ surface: "job",
264
+ direction: "worker",
265
+ operation: job.type,
266
+ phase: "handler_result",
267
+ });
250
268
  await this.#publishJobEvent(job.type, job.id, {
251
269
  jobId: job.id,
252
270
  service: job.service,
@@ -262,6 +280,12 @@ export class JobManager<TPayload = unknown, TResult = unknown> {
262
280
  return { outcome: "retry", tries, error: detail };
263
281
  }
264
282
 
283
+ recordTrellisError(error, {
284
+ surface: "job",
285
+ direction: "worker",
286
+ operation: job.type,
287
+ phase: "handler_result",
288
+ });
265
289
  await this.#publishJobEvent(job.type, job.id, {
266
290
  jobId: job.id,
267
291
  service: job.service,
@@ -277,6 +301,12 @@ export class JobManager<TPayload = unknown, TResult = unknown> {
277
301
  return { outcome: "failed", tries, error: detail };
278
302
  }
279
303
 
304
+ recordTrellisError(error, {
305
+ surface: "job",
306
+ direction: "worker",
307
+ operation: job.type,
308
+ phase: "runtime",
309
+ });
280
310
  throw error;
281
311
  }
282
312
  }
@@ -20,8 +20,12 @@ export type NatsCredsAuthenticatorFn = (creds: Uint8Array) => unknown;
20
20
 
21
21
  export type ReadFileSyncFn = (path: string) => Uint8Array;
22
22
 
23
+ /** Initializes telemetry for a service runtime. */
24
+ export type InitTelemetryFn = (serviceName: string) => void;
25
+
23
26
  export type TrellisServiceRuntimeDeps = {
24
27
  connect: NatsConnectFn;
25
28
  credsAuthenticator?: NatsCredsAuthenticatorFn;
26
29
  readFileSync?: ReadFileSyncFn;
30
+ initTelemetry?: InitTelemetryFn;
27
31
  };
@@ -77,7 +77,10 @@ import type {
77
77
  RpcHandlerContext,
78
78
  RpcHandlerErrorOf,
79
79
  } from "../trellis.js";
80
- import { createTrellisInternal } from "../trellis.js";
80
+ import {
81
+ annotateHandlerBoundaryError,
82
+ createTrellisInternal,
83
+ } from "../trellis.js";
81
84
  import type {
82
85
  NatsConnectFn,
83
86
  NatsConnectOpts,
@@ -125,6 +128,7 @@ import {
125
128
  observeNatsTrellisConnection,
126
129
  type TrellisConnection,
127
130
  } from "../connection.js";
131
+ import { initTelemetry } from "../telemetry/init.js";
128
132
 
129
133
  type ExtraNatsConnectOpts = Omit<
130
134
  NatsConnectOpts,
@@ -428,6 +432,7 @@ async function loadDefaultServiceRuntimeDeps(): Promise<
428
432
  > {
429
433
  const transport = await loadDefaultRuntimeTransport();
430
434
  return {
435
+ initTelemetry,
431
436
  connect: (
432
437
  { servers, token, authenticator, inboxPrefix, ...extraOptions },
433
438
  ) =>
@@ -441,6 +446,12 @@ async function loadDefaultServiceRuntimeDeps(): Promise<
441
446
  };
442
447
  }
443
448
 
449
+ function automaticTelemetryEnabled(
450
+ telemetry: TrellisServiceConnectTelemetryOpts | undefined,
451
+ ): boolean {
452
+ return telemetry !== false && telemetry?.enabled !== false;
453
+ }
454
+
444
455
  const ServiceBootstrapReadySchema = Type.Object({
445
456
  status: Type.Literal("ready"),
446
457
  serverNow: Type.Integer(),
@@ -868,9 +879,20 @@ export type TrellisServiceConnectOpts<
868
879
  contract: ServiceContract<TOwnedApi, TTrellisApi>;
869
880
  name: string;
870
881
  sessionKeySeed: string;
882
+ /**
883
+ * Controls automatic telemetry initialization for this service connection.
884
+ * Enabled by default; pass `false` or `{ enabled: false }` to disable it.
885
+ */
886
+ telemetry?: TrellisServiceConnectTelemetryOpts;
871
887
  server?: TrellisServiceServerOpts;
872
888
  };
873
889
 
890
+ /** Controls automatic telemetry initialization for `TrellisService.connect()`. */
891
+ export type TrellisServiceConnectTelemetryOpts = false | {
892
+ /** Whether automatic telemetry initialization is enabled. Defaults to `true`. */
893
+ enabled?: boolean;
894
+ };
895
+
874
896
  type ServiceKvFacade<TKv extends ContractKvMetadata> = {
875
897
  [K in keyof TKv]: TKv[K]["required"] extends false
876
898
  ? TypedKV<TKv[K]["schema"]> | undefined
@@ -1757,6 +1779,11 @@ export type TrellisServiceConnectArgs<
1757
1779
  contract: TContract;
1758
1780
  name: string;
1759
1781
  sessionKeySeed: string;
1782
+ /**
1783
+ * Controls automatic telemetry initialization for this service connection.
1784
+ * Enabled by default; pass `false` or `{ enabled: false }` to disable it.
1785
+ */
1786
+ telemetry?: TrellisServiceConnectTelemetryOpts;
1760
1787
  server?: TrellisServiceServerOpts;
1761
1788
  };
1762
1789
 
@@ -1818,6 +1845,8 @@ export async function createConnectedService<
1818
1845
  stream: args.server.stream,
1819
1846
  noResponderRetry: args.server.noResponderRetry,
1820
1847
  api: runtimeApi,
1848
+ contractId: args.contractId,
1849
+ contractDigest: args.contractDigest,
1821
1850
  connection,
1822
1851
  transferSupport: {
1823
1852
  openOperationTransfer: (transferArgs) =>
@@ -1837,6 +1866,8 @@ export async function createConnectedService<
1837
1866
  stream: args.server.stream,
1838
1867
  noResponderRetry: args.server.noResponderRetry,
1839
1868
  api: runtimeApi,
1869
+ contractId: args.contractId,
1870
+ contractDigest: args.contractDigest,
1840
1871
  eventConsumers: {
1841
1872
  metadata: args.contractEventConsumers,
1842
1873
  bindings: args.bindings.eventConsumers,
@@ -2009,6 +2040,14 @@ function toUnexpectedError(cause: unknown): UnexpectedError {
2009
2040
  : new UnexpectedError({ cause });
2010
2041
  }
2011
2042
 
2043
+ function serializeJobHandlerError(error: BaseError): string {
2044
+ try {
2045
+ return JSON.stringify(error.toSerializable());
2046
+ } catch {
2047
+ return error.message;
2048
+ }
2049
+ }
2050
+
2012
2051
  function okVoid(): Result<void, never> {
2013
2052
  return Result.ok(undefined);
2014
2053
  }
@@ -2403,6 +2442,8 @@ function createJobsFacade<
2403
2442
  TKv extends ContractKvMetadata = ContractKvMetadata,
2404
2443
  >(args: {
2405
2444
  serviceName: string;
2445
+ contractId?: string;
2446
+ contractDigest?: string;
2406
2447
  nc: NatsConnection;
2407
2448
  contractJobs: TJobs;
2408
2449
  client: Trellis<TTrellisApi, TKv, TJobs>;
@@ -2566,9 +2607,35 @@ function createJobsFacade<
2566
2607
  },
2567
2608
  );
2568
2609
 
2569
- const handled = (await handler(publicJob)).take();
2610
+ const jobErrorContext = {
2611
+ jobType: queueType,
2612
+ requestId: job.context().requestId,
2613
+ service: args.serviceName,
2614
+ contractId: args.contractId,
2615
+ contractDigest: args.contractDigest,
2616
+ traceId: job.context().traceId,
2617
+ };
2618
+
2619
+ let handled: unknown | Result<never, BaseError>;
2620
+ try {
2621
+ handled = (await handler(publicJob)).take();
2622
+ } catch (cause) {
2623
+ const annotatedError = annotateHandlerBoundaryError(
2624
+ cause,
2625
+ jobErrorContext,
2626
+ );
2627
+ throw InternalJobProcessError.failed(
2628
+ serializeJobHandlerError(annotatedError),
2629
+ );
2630
+ }
2570
2631
  if (isErr(handled)) {
2571
- throw InternalJobProcessError.failed(handled.error.message);
2632
+ const annotatedError = annotateHandlerBoundaryError(
2633
+ handled.error,
2634
+ jobErrorContext,
2635
+ );
2636
+ throw InternalJobProcessError.failed(
2637
+ serializeJobHandlerError(annotatedError),
2638
+ );
2572
2639
  }
2573
2640
  return handled;
2574
2641
  },
@@ -2756,6 +2823,8 @@ export class TrellisService<
2756
2823
  this.#operationTransfer = operationTransfer;
2757
2824
  const jobs = createJobsFacade<TJobs, TTrellisApi, TKv>({
2758
2825
  serviceName: name,
2826
+ contractId: health.contractId,
2827
+ contractDigest: health.contractDigest,
2759
2828
  nc,
2760
2829
  contractJobs,
2761
2830
  client: handlerTrellis,
@@ -3097,6 +3166,9 @@ export class TrellisService<
3097
3166
  ...(await loadDefaultServiceRuntimeDeps()),
3098
3167
  ...deps,
3099
3168
  } satisfies TrellisServiceRuntimeDeps;
3169
+ if (automaticTelemetryEnabled(args.telemetry)) {
3170
+ runtimeDeps.initTelemetry?.(args.name);
3171
+ }
3100
3172
  const auth = await createAuth({ sessionKeySeed: args.sessionKeySeed });
3101
3173
  const bootstrapLog = resolveServiceLogger(args.server?.log);
3102
3174
  const bootstrap = await fetchServiceBootstrapInfo({
package/src/server.ts CHANGED
@@ -25,8 +25,13 @@ import {
25
25
  import { RemoteError } from "./errors/RemoteError.js";
26
26
  import type { LoggerLike } from "./globals.js";
27
27
  import { serverLogger } from "./server_logger.js";
28
+ import {
29
+ recordTrellisError,
30
+ type TrellisErrorMetricAttributes,
31
+ } from "./telemetry/mod.js";
28
32
  import {
29
33
  type AcceptedOperation,
34
+ annotateHandlerBoundaryError,
30
35
  type AnyTrellisAPI,
31
36
  type AuthRequestsValidateResponse,
32
37
  base64urlDecode,
@@ -199,6 +204,38 @@ function asOptionalStringRecordPointerValue(
199
204
  return ok(Object.fromEntries(entries) as Record<string, string>);
200
205
  }
201
206
 
207
+ function traceIdFromTraceparent(
208
+ traceparent: string | undefined,
209
+ ): string | undefined {
210
+ const [version, traceId, parentId, flags, extra] = traceparent?.split("-") ??
211
+ [];
212
+ if (
213
+ extra !== undefined ||
214
+ !/^[0-9a-f]{2}$/u.test(version ?? "") ||
215
+ version === "ff" ||
216
+ !/^[0-9a-f]{32}$/u.test(traceId ?? "") ||
217
+ traceId === "00000000000000000000000000000000" ||
218
+ !/^[0-9a-f]{16}$/u.test(parentId ?? "") ||
219
+ parentId === "0000000000000000" ||
220
+ !/^[0-9a-f]{2}$/u.test(flags ?? "")
221
+ ) {
222
+ return undefined;
223
+ }
224
+ return traceId;
225
+ }
226
+
227
+ function recordOperationServerError(
228
+ error: unknown,
229
+ attributes: TrellisErrorMetricAttributes,
230
+ ): void {
231
+ recordTrellisError(error, {
232
+ messagingSystem: "nats",
233
+ surface: "operation",
234
+ direction: "server",
235
+ ...attributes,
236
+ });
237
+ }
238
+
202
239
  export class TrellisServiceRuntime extends Trellis<TrellisAPI, TrellisMode> {
203
240
  #version?: string;
204
241
  #log: LoggerLike;
@@ -243,7 +280,7 @@ export class TrellisServiceRuntime extends Trellis<TrellisAPI, TrellisMode> {
243
280
  }),
244
281
  fail: (operationId, error) =>
245
282
  this.#applyOperationUpdate(operationId, "failed", {
246
- patch: { error: { type: error.name, message: error.message } },
283
+ patch: { error: error.toSerializable() },
247
284
  event: { type: "failed" },
248
285
  }),
249
286
  cancel: (operationId) =>
@@ -506,7 +543,7 @@ export class TrellisServiceRuntime extends Trellis<TrellisAPI, TrellisMode> {
506
543
  }),
507
544
  fail: (error: BaseError) =>
508
545
  this.#applyControlledOperationUpdate(runtime, ctx, "failed", {
509
- patch: { error: { type: error.name, message: error.message } },
546
+ patch: { error: error.toSerializable() },
510
547
  event: { type: "failed" },
511
548
  }),
512
549
  cancel: () => {
@@ -916,6 +953,10 @@ export class TrellisServiceRuntime extends Trellis<TrellisAPI, TrellisMode> {
916
953
  const trellisError = error instanceof BaseError
917
954
  ? error
918
955
  : new UnexpectedError({ cause: error });
956
+ recordOperationServerError(trellisError, {
957
+ operation,
958
+ phase: "control",
959
+ });
919
960
  msg.respond(JSON.stringify({
920
961
  kind: "error",
921
962
  error: trellisError.toSerializable(),
@@ -1210,7 +1251,10 @@ export class TrellisServiceRuntime extends Trellis<TrellisAPI, TrellisMode> {
1210
1251
  runtime.waiters.clear();
1211
1252
  };
1212
1253
 
1213
- const makeOperation = (runtime: RuntimeOperationRecord) => {
1254
+ const makeOperation = (
1255
+ runtime: RuntimeOperationRecord,
1256
+ context: { requestId?: string; traceId?: string },
1257
+ ) => {
1214
1258
  const ensureActive = () => {
1215
1259
  if (runtime.terminal) {
1216
1260
  return err(
@@ -1291,11 +1335,19 @@ export class TrellisServiceRuntime extends Trellis<TrellisAPI, TrellisMode> {
1291
1335
  AsyncResult.from((async () => {
1292
1336
  const active = ensureActive();
1293
1337
  if (active) return active;
1338
+ const annotatedError = annotateHandlerBoundaryError(error, {
1339
+ operation: String(operation),
1340
+ requestId: context.requestId,
1341
+ service: this.name,
1342
+ contractId: this.contractId,
1343
+ contractDigest: this.contractDigest,
1344
+ traceId: context.traceId,
1345
+ });
1294
1346
  const snapshot = buildRuntimeOperationSnapshot(
1295
1347
  runtime,
1296
1348
  "failed",
1297
1349
  {
1298
- error: { type: error.name, message: error.message },
1350
+ error: annotatedError.toSerializable(),
1299
1351
  completedAt: now(),
1300
1352
  },
1301
1353
  );
@@ -1507,6 +1559,10 @@ export class TrellisServiceRuntime extends Trellis<TrellisAPI, TrellisMode> {
1507
1559
  const validated = await authenticate(msg, true);
1508
1560
  const value = validated.take();
1509
1561
  if (isErr(value)) {
1562
+ recordOperationServerError(value.error, {
1563
+ operation: String(operation),
1564
+ phase: "start",
1565
+ });
1510
1566
  this.respondWithError(msg, value.error);
1511
1567
  continue;
1512
1568
  }
@@ -1514,15 +1570,20 @@ export class TrellisServiceRuntime extends Trellis<TrellisAPI, TrellisMode> {
1514
1570
  let transferSession: RuntimeOperationTransferSession | undefined;
1515
1571
  if (ctx.transfer) {
1516
1572
  if (!this.#transferSupport) {
1573
+ const error = new UnexpectedError({
1574
+ cause: new Error(
1575
+ `Operation '${
1576
+ String(operation)
1577
+ }' declared transfer support but no runtime transfer support is configured`,
1578
+ ),
1579
+ });
1580
+ recordOperationServerError(error, {
1581
+ operation: String(operation),
1582
+ phase: "start",
1583
+ });
1517
1584
  this.respondWithError(
1518
1585
  msg,
1519
- new UnexpectedError({
1520
- cause: new Error(
1521
- `Operation '${
1522
- String(operation)
1523
- }' declared transfer support but no runtime transfer support is configured`,
1524
- ),
1525
- }),
1586
+ error,
1526
1587
  );
1527
1588
  continue;
1528
1589
  }
@@ -1534,6 +1595,10 @@ export class TrellisServiceRuntime extends Trellis<TrellisAPI, TrellisMode> {
1534
1595
  "key",
1535
1596
  ).take();
1536
1597
  if (isErr(key)) {
1598
+ recordOperationServerError(key.error, {
1599
+ operation: String(operation),
1600
+ phase: "start",
1601
+ });
1537
1602
  this.respondWithError(msg, key.error);
1538
1603
  continue;
1539
1604
  }
@@ -1543,6 +1608,10 @@ export class TrellisServiceRuntime extends Trellis<TrellisAPI, TrellisMode> {
1543
1608
  ctx.transfer.contentType,
1544
1609
  ).take();
1545
1610
  if (isErr(contentType)) {
1611
+ recordOperationServerError(contentType.error, {
1612
+ operation: String(operation),
1613
+ phase: "start",
1614
+ });
1546
1615
  this.respondWithError(msg, contentType.error);
1547
1616
  continue;
1548
1617
  }
@@ -1552,6 +1621,10 @@ export class TrellisServiceRuntime extends Trellis<TrellisAPI, TrellisMode> {
1552
1621
  ctx.transfer.metadata,
1553
1622
  ).take();
1554
1623
  if (isErr(metadata)) {
1624
+ recordOperationServerError(metadata.error, {
1625
+ operation: String(operation),
1626
+ phase: "start",
1627
+ });
1555
1628
  this.respondWithError(msg, metadata.error);
1556
1629
  continue;
1557
1630
  }
@@ -1569,6 +1642,10 @@ export class TrellisServiceRuntime extends Trellis<TrellisAPI, TrellisMode> {
1569
1642
  ...(metadata !== undefined ? { metadata } : {}),
1570
1643
  }).take();
1571
1644
  if (isErr(openedTransferValue)) {
1645
+ recordOperationServerError(openedTransferValue.error, {
1646
+ operation: String(operation),
1647
+ phase: "start",
1648
+ });
1572
1649
  this.respondWithError(msg, openedTransferValue.error);
1573
1650
  continue;
1574
1651
  }
@@ -1636,7 +1713,13 @@ export class TrellisServiceRuntime extends Trellis<TrellisAPI, TrellisMode> {
1636
1713
  msg.respond(JSON.stringify(accepted));
1637
1714
 
1638
1715
  void (async () => {
1639
- const op = makeOperation(runtime);
1716
+ const operationContext = {
1717
+ requestId: msg.headers?.get("request-id"),
1718
+ traceId: traceIdFromTraceparent(
1719
+ msg.headers?.get("traceparent"),
1720
+ ),
1721
+ };
1722
+ const op = makeOperation(runtime, operationContext);
1640
1723
  try {
1641
1724
  const handlerResult: unknown = await handler(
1642
1725
  transferSession
@@ -1656,7 +1739,22 @@ export class TrellisServiceRuntime extends Trellis<TrellisAPI, TrellisMode> {
1656
1739
  ? handlerResult.take()
1657
1740
  : handlerResult;
1658
1741
  if (isErr(handlerOutcome)) {
1659
- await op.fail(handlerOutcome.error);
1742
+ const error = annotateHandlerBoundaryError(
1743
+ handlerOutcome.error,
1744
+ {
1745
+ operation: String(operation),
1746
+ requestId: operationContext.requestId,
1747
+ service: this.name,
1748
+ contractId: this.contractId,
1749
+ contractDigest: this.contractDigest,
1750
+ traceId: operationContext.traceId,
1751
+ },
1752
+ );
1753
+ recordOperationServerError(error, {
1754
+ operation: String(operation),
1755
+ phase: "handler_result",
1756
+ });
1757
+ await op.fail(error);
1660
1758
  return;
1661
1759
  }
1662
1760
 
@@ -1676,7 +1774,19 @@ export class TrellisServiceRuntime extends Trellis<TrellisAPI, TrellisMode> {
1676
1774
  await op.complete(handlerOutcome);
1677
1775
  }
1678
1776
  } catch (cause) {
1679
- await op.fail(new UnexpectedError({ cause }));
1777
+ const error = annotateHandlerBoundaryError(cause, {
1778
+ operation: String(operation),
1779
+ requestId: operationContext.requestId,
1780
+ service: this.name,
1781
+ contractId: this.contractId,
1782
+ contractDigest: this.contractDigest,
1783
+ traceId: operationContext.traceId,
1784
+ });
1785
+ recordOperationServerError(error, {
1786
+ operation: String(operation),
1787
+ phase: "handler_throw",
1788
+ });
1789
+ await op.fail(error);
1680
1790
  }
1681
1791
  })();
1682
1792
  }
@@ -17,4 +17,5 @@ export {
17
17
  type Trellis,
18
18
  TrellisService,
19
19
  type TrellisServiceConnectOpts,
20
+ type TrellisServiceConnectTelemetryOpts,
20
21
  } from "../server/service.js";
@@ -54,6 +54,7 @@ export {
54
54
  type Trellis,
55
55
  TrellisService,
56
56
  type TrellisServiceConnectOpts,
57
+ type TrellisServiceConnectTelemetryOpts,
57
58
  } from "../server/service.js";
58
59
  export {
59
60
  createPostgresOutboxSchema,