@infinitedusky/indusk-mcp 1.18.2 → 1.19.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -12,6 +12,7 @@
12
12
  * throws, same behavior. The evaluator never fails because of OTel.
13
13
  */
14
14
  import { type Attributes, type Span, type Tracer } from "@opentelemetry/api";
15
+ import { type Logger } from "@opentelemetry/api-logs";
15
16
  export interface EvalOtelConfig {
16
17
  enabled: boolean;
17
18
  endpoint: string | null;
@@ -56,9 +57,30 @@ export declare function initEvalOtel(projectRoot: string): Tracer;
56
57
  */
57
58
  export declare function withSpan<T>(tracer: Tracer, name: string, attrs: Attributes | undefined, fn: (span: Span) => Promise<T> | T): Promise<T>;
58
59
  /**
59
- * Flush and shut down the active provider. Call this before `process.exit()`
60
- * in detached processes so batched spans are not lost. No-op if no provider
61
- * is active.
60
+ * Initialize the OTel logs pipeline alongside traces. Returns a Logger
61
+ * real when enabled + endpoint set, no-op otherwise. Shares the same
62
+ * config gating + Dash0 dataset routing as `initEvalOtel`. Safe to call
63
+ * multiple times.
64
+ *
65
+ * Log records emitted via `getEvalLogger().emit(...)` automatically
66
+ * correlate with the active span via trace_id / span_id.
67
+ */
68
+ export declare function initEvalOtelLogs(projectRoot: string): Logger;
69
+ /**
70
+ * Accessor for the eval logger. Always safe to call — returns a no-op
71
+ * logger when logs aren't initialized.
72
+ */
73
+ export declare function getEvalLogger(): Logger;
74
+ /**
75
+ * Emit an info-severity log record with an arbitrary body. Shorthand for
76
+ * `getEvalLogger().emit(...)`. When called inside an active span, the
77
+ * SDK attaches trace_id + span_id automatically.
78
+ */
79
+ export declare function logEvalContent(name: string, body: string | Record<string, unknown>, attributes?: Record<string, string | number | boolean>): void;
80
+ /**
81
+ * Flush and shut down the active providers (traces + logs). Call this
82
+ * before `process.exit()` in detached processes so batched signals are
83
+ * not lost. No-op if neither provider is active.
62
84
  */
63
85
  export declare function shutdownEvalOtel(): Promise<void>;
64
86
  /**
@@ -14,8 +14,11 @@
14
14
  import { appendFileSync, existsSync, mkdirSync, readFileSync } from "node:fs";
15
15
  import { join, resolve } from "node:path";
16
16
  import { SpanStatusCode, trace } from "@opentelemetry/api";
17
+ import { logs, SeverityNumber } from "@opentelemetry/api-logs";
18
+ import { OTLPLogExporter } from "@opentelemetry/exporter-logs-otlp-http";
17
19
  import { OTLPTraceExporter } from "@opentelemetry/exporter-trace-otlp-http";
18
20
  import { resourceFromAttributes } from "@opentelemetry/resources";
21
+ import { BatchLogRecordProcessor, LoggerProvider } from "@opentelemetry/sdk-logs";
19
22
  import { BatchSpanProcessor } from "@opentelemetry/sdk-trace-base";
20
23
  import { NodeTracerProvider } from "@opentelemetry/sdk-trace-node";
21
24
  import { ATTR_SERVICE_NAME } from "@opentelemetry/semantic-conventions";
@@ -96,7 +99,9 @@ function rewriteDatasetInEnvHeaders(target) {
96
99
  const rewritten = current.replace(/Dash0-Dataset=[^,]*/g, `Dash0-Dataset=${target}`);
97
100
  process.env.OTEL_EXPORTER_OTLP_HEADERS = rewritten;
98
101
  }
102
+ const LOGGER_NAME = "@infinitedusky/indusk-mcp/eval";
99
103
  let activeProvider = null;
104
+ let activeLoggerProvider = null;
100
105
  /**
101
106
  * Initialize OTel tracing for the evaluator if enabled + endpoint set.
102
107
  * Returns a Tracer — real when enabled, no-op when not.
@@ -187,22 +192,105 @@ export async function withSpan(tracer, name, attrs, fn) {
187
192
  });
188
193
  }
189
194
  /**
190
- * Flush and shut down the active provider. Call this before `process.exit()`
191
- * in detached processes so batched spans are not lost. No-op if no provider
192
- * is active.
195
+ * Initialize the OTel logs pipeline alongside traces. Returns a Logger
196
+ * real when enabled + endpoint set, no-op otherwise. Shares the same
197
+ * config gating + Dash0 dataset routing as `initEvalOtel`. Safe to call
198
+ * multiple times.
199
+ *
200
+ * Log records emitted via `getEvalLogger().emit(...)` automatically
201
+ * correlate with the active span via trace_id / span_id.
202
+ */
203
+ export function initEvalOtelLogs(projectRoot) {
204
+ const { enabled, endpoint, dataset } = isEvalOtelEnabled(projectRoot);
205
+ if (!enabled)
206
+ return logs.getLogger(LOGGER_NAME);
207
+ if (!endpoint) {
208
+ syslog(projectRoot, "eval.otel.logs — endpoint unset; falling back to no-op logger");
209
+ return logs.getLogger(LOGGER_NAME);
210
+ }
211
+ if (activeLoggerProvider)
212
+ return logs.getLogger(LOGGER_NAME);
213
+ rewriteDatasetInEnvHeaders(dataset);
214
+ const headers = { "Dash0-Dataset": dataset };
215
+ if (process.env.DASH0_API_TOKEN) {
216
+ headers.Authorization = `Bearer ${process.env.DASH0_API_TOKEN}`;
217
+ }
218
+ try {
219
+ const exporter = new OTLPLogExporter({
220
+ url: endpoint.endsWith("/v1/logs") ? endpoint : `${endpoint.replace(/\/$/, "")}/v1/logs`,
221
+ headers,
222
+ });
223
+ const provider = new LoggerProvider({
224
+ resource: resourceFromAttributes({ [ATTR_SERVICE_NAME]: SERVICE_NAME }),
225
+ processors: [new BatchLogRecordProcessor(exporter)],
226
+ });
227
+ // setGlobalLoggerProvider returns false if one is already registered
228
+ // (e.g., a test's InMemoryLogRecordExporter provider). Respect that —
229
+ // only retain ownership (and tear down at shutdown) if we actually
230
+ // registered ours.
231
+ const accepted = logs.setGlobalLoggerProvider(provider);
232
+ if (accepted) {
233
+ activeLoggerProvider = provider;
234
+ syslog(projectRoot, `eval.otel.logs initialized — endpoint: ${endpoint}, dataset: ${dataset}`);
235
+ }
236
+ else {
237
+ syslog(projectRoot, "eval.otel.logs — global provider already set; using existing");
238
+ // Fire-and-forget shutdown of the unused provider
239
+ void provider.shutdown().catch(() => { });
240
+ }
241
+ }
242
+ catch (err) {
243
+ const message = err instanceof Error ? err.message : String(err);
244
+ syslog(projectRoot, `eval.otel.logs init failed — falling back to no-op: ${message}`);
245
+ }
246
+ return logs.getLogger(LOGGER_NAME);
247
+ }
248
+ /**
249
+ * Accessor for the eval logger. Always safe to call — returns a no-op
250
+ * logger when logs aren't initialized.
251
+ */
252
+ export function getEvalLogger() {
253
+ return logs.getLogger(LOGGER_NAME);
254
+ }
255
+ /**
256
+ * Emit an info-severity log record with an arbitrary body. Shorthand for
257
+ * `getEvalLogger().emit(...)`. When called inside an active span, the
258
+ * SDK attaches trace_id + span_id automatically.
259
+ */
260
+ export function logEvalContent(name, body, attributes) {
261
+ // AnyValue requires plain primitives/arrays/records — stringify objects so
262
+ // Dash0 ingests the content as a single searchable log body rather than a
263
+ // nested structure.
264
+ const bodyText = typeof body === "string" ? body : JSON.stringify(body);
265
+ getEvalLogger().emit({
266
+ severityNumber: SeverityNumber.INFO,
267
+ severityText: "INFO",
268
+ body: bodyText,
269
+ attributes: { "eval.event": name, ...(attributes ?? {}) },
270
+ });
271
+ }
272
+ /**
273
+ * Flush and shut down the active providers (traces + logs). Call this
274
+ * before `process.exit()` in detached processes so batched signals are
275
+ * not lost. No-op if neither provider is active.
193
276
  */
194
277
  export async function shutdownEvalOtel() {
195
- if (!activeProvider)
196
- return;
278
+ const tasks = [];
279
+ if (activeProvider) {
280
+ tasks.push(activeProvider.forceFlush().then(() => activeProvider?.shutdown()));
281
+ }
282
+ if (activeLoggerProvider) {
283
+ tasks.push(activeLoggerProvider.forceFlush().then(() => activeLoggerProvider?.shutdown()));
284
+ }
197
285
  try {
198
- await activeProvider.forceFlush();
199
- await activeProvider.shutdown();
286
+ await Promise.all(tasks);
200
287
  }
201
288
  catch {
202
289
  // shutdown is best-effort
203
290
  }
204
291
  finally {
205
292
  activeProvider = null;
293
+ activeLoggerProvider = null;
206
294
  }
207
295
  }
208
296
  /**
@@ -210,12 +298,17 @@ export async function shutdownEvalOtel() {
210
298
  * starts fresh. Not part of the public API.
211
299
  */
212
300
  export function __resetEvalOtelForTests() {
213
- // Tear down any provider left over from a previous test. This un-registers
214
- // from the global OTel API, so `trace.getTracer()` falls back to the no-op
215
- // tracer until a new provider is registered.
301
+ // Tear down any providers left over from a previous test. This
302
+ // un-registers from the global OTel API so `trace.getTracer()` /
303
+ // `logs.getLogger()` fall back to no-op until re-registered.
216
304
  if (activeProvider) {
217
305
  void activeProvider.shutdown().catch(() => { });
218
306
  }
307
+ if (activeLoggerProvider) {
308
+ void activeLoggerProvider.shutdown().catch(() => { });
309
+ }
219
310
  activeProvider = null;
311
+ activeLoggerProvider = null;
220
312
  trace.disable();
313
+ logs.disable();
221
314
  }
@@ -13,7 +13,7 @@ import { getProjectGroupId } from "../config.js";
13
13
  import { readUnprocessedHighlights } from "../highlights/highlights.js";
14
14
  import { ingestScorecard } from "./findings.js";
15
15
  import { EvalLogWriter } from "./log-writer.js";
16
- import { initEvalOtel, shutdownEvalOtel, withSpan } from "./otel.js";
16
+ import { initEvalOtel, initEvalOtelLogs, logEvalContent, shutdownEvalOtel, withSpan, } from "./otel.js";
17
17
  import { buildEvaluatorPrompt } from "./prompt-builder.js";
18
18
  import { V1_RUBRIC } from "./rubric.js";
19
19
  function getSessionPath(projectRoot) {
@@ -112,6 +112,7 @@ async function spawnClaude(args, prompt, cwd) {
112
112
  */
113
113
  export async function runPersistentEval(opts) {
114
114
  const tracer = initEvalOtel(opts.projectRoot);
115
+ initEvalOtelLogs(opts.projectRoot);
115
116
  const source = process.env.INDUSK_EVAL_SOURCE ?? "commit";
116
117
  const projectGroup = getProjectGroupId(opts.projectRoot);
117
118
  // Peek at the highlights queue before spawning — gives us observability
@@ -135,7 +136,17 @@ export async function runPersistentEval(opts) {
135
136
  const session = await withSpan(tracer, "eval.read_session", undefined, () => readSession(opts.projectRoot));
136
137
  rootSpan.setAttribute("resumed", session !== null);
137
138
  try {
138
- const { args, prompt } = await withSpan(tracer, "eval.build_prompt", { resumed: session !== null }, () => {
139
+ const { args, prompt } = await withSpan(tracer, "eval.build_prompt", { resumed: session !== null }, (span) => {
140
+ const built = buildArgsAndPrompt();
141
+ span.setAttribute("prompt.length", built.prompt.length);
142
+ span.setAttribute("prompt.kind", session ? "resume" : "full");
143
+ logEvalContent("prompt", built.prompt, {
144
+ "prompt.length": built.prompt.length,
145
+ "prompt.kind": session ? "resume" : "full",
146
+ });
147
+ return built;
148
+ });
149
+ function buildArgsAndPrompt() {
139
150
  if (session) {
140
151
  const resumePrompt = `Evaluate a new commit. Change ID: ${opts.changeId}
141
152
 
@@ -175,16 +186,24 @@ Output ONLY the JSON scorecard as before — no commentary.`;
175
186
  projectGroup,
176
187
  }),
177
188
  };
178
- });
189
+ }
179
190
  const claudeResult = await withSpan(tracer, "eval.spawn_claude", {
180
191
  "args.resumed": session !== null,
181
192
  "args.model": session ? "(resumed)" : "opus",
182
193
  }, async (span) => {
183
194
  const spawned = await spawnClaude(args, prompt, opts.projectRoot);
184
195
  span.setAttribute("exit.code", spawned.code ?? -1);
196
+ span.setAttribute("stdout.length", spawned.stdout.length);
185
197
  if (spawned.code !== 0) {
186
198
  span.setAttribute("exit.stderr_tail", spawned.stderr.slice(-500));
199
+ logEvalContent("claude.error", spawned.stderr, {
200
+ "exit.code": spawned.code ?? -1,
201
+ });
187
202
  }
203
+ logEvalContent("claude.stdout", spawned.stdout, {
204
+ "stdout.length": spawned.stdout.length,
205
+ "exit.code": spawned.code ?? -1,
206
+ });
188
207
  return spawned;
189
208
  });
190
209
  if (claudeResult.code !== 0) {
@@ -210,6 +229,26 @@ Output ONLY the JSON scorecard as before — no commentary.`;
210
229
  if (parsed.usage)
211
230
  scorecard.usage = parsed.usage;
212
231
  scorecard.telemetryPosted = false;
232
+ // Carry scorecard-level content onto the root span for at-a-glance debugging in Dash0
233
+ rootSpan.setAttribute("scorecard.status", "ok");
234
+ rootSpan.setAttribute("scorecard.question_count", scorecard.questions?.length ?? 0);
235
+ if (scorecard.summary) {
236
+ rootSpan.setAttribute("scorecard.summary", scorecard.summary.slice(0, 500));
237
+ }
238
+ if (scorecard.usage) {
239
+ rootSpan.setAttribute("scorecard.cost_usd", scorecard.usage.costUsd);
240
+ rootSpan.setAttribute("scorecard.duration_ms", scorecard.usage.durationMs);
241
+ rootSpan.setAttribute("scorecard.input_tokens", scorecard.usage.inputTokens);
242
+ rootSpan.setAttribute("scorecard.output_tokens", scorecard.usage.outputTokens);
243
+ }
244
+ const answerCounts = { yes: 0, no: 0, partial: 0 };
245
+ for (const q of scorecard.questions ?? []) {
246
+ if (q.answer in answerCounts)
247
+ answerCounts[q.answer]++;
248
+ }
249
+ rootSpan.setAttribute("scorecard.answers.yes", answerCounts.yes);
250
+ rootSpan.setAttribute("scorecard.answers.no", answerCounts.no);
251
+ rootSpan.setAttribute("scorecard.answers.partial", answerCounts.partial);
213
252
  await withSpan(tracer, "eval.update_session", undefined, () => {
214
253
  const newSession = {
215
254
  sessionId: parsed.sessionId ?? session?.sessionId ?? "unknown",
@@ -222,17 +261,28 @@ Output ONLY the JSON scorecard as before — no commentary.`;
222
261
  await withSpan(tracer, "eval.write_scorecard", undefined, async () => {
223
262
  await logWriter.append(scorecard);
224
263
  ingestScorecard(opts.projectRoot, scorecard);
264
+ logEvalContent("scorecard", JSON.stringify(scorecard), {
265
+ "scorecard.question_count": scorecard.questions?.length ?? 0,
266
+ "scorecard.summary_length": scorecard.summary?.length ?? 0,
267
+ });
225
268
  });
226
269
  return scorecard;
227
270
  }
228
271
  catch (err) {
272
+ const msg = err instanceof Error ? err.message : String(err);
273
+ const stack = err instanceof Error ? (err.stack ?? "") : "";
274
+ rootSpan.setAttribute("scorecard.status", "error");
275
+ rootSpan.setAttribute("error.message", msg.slice(0, 500));
276
+ logEvalContent("error", stack || msg, {
277
+ "error.message": msg.slice(0, 500),
278
+ });
229
279
  const errorEntry = {
230
280
  version: 1,
231
281
  timestamp: new Date().toISOString(),
232
282
  mode: opts.mode,
233
283
  changeId: opts.changeId,
234
284
  error: true,
235
- message: err instanceof Error ? err.message : String(err),
285
+ message: msg,
236
286
  };
237
287
  await logWriter.append(errorEntry);
238
288
  return errorEntry;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@infinitedusky/indusk-mcp",
3
- "version": "1.18.2",
3
+ "version": "1.19.0",
4
4
  "description": "InDusk development system — skills, MCP tools, and CLI for structured AI-assisted development",
5
5
  "type": "module",
6
6
  "files": [
@@ -29,8 +29,11 @@
29
29
  "dependencies": {
30
30
  "@modelcontextprotocol/sdk": "^1.12.1",
31
31
  "@opentelemetry/api": "^1.9.0",
32
+ "@opentelemetry/api-logs": "^0.214.0",
33
+ "@opentelemetry/exporter-logs-otlp-http": "^0.214.0",
32
34
  "@opentelemetry/exporter-trace-otlp-http": "^0.214.0",
33
35
  "@opentelemetry/resources": "^2.6.0",
36
+ "@opentelemetry/sdk-logs": "^0.214.0",
34
37
  "@opentelemetry/sdk-trace-base": "^2.6.0",
35
38
  "@opentelemetry/sdk-trace-node": "^2.6.0",
36
39
  "@opentelemetry/semantic-conventions": "^1.40.0",