@infinitedusky/indusk-mcp 1.18.2 → 1.19.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/lib/eval/otel.d.ts +25 -3
- package/dist/lib/eval/otel.js +103 -10
- package/dist/lib/eval/persistent-evaluator.js +54 -4
- package/package.json +4 -1
package/dist/lib/eval/otel.d.ts
CHANGED
|
@@ -12,6 +12,7 @@
|
|
|
12
12
|
* throws, same behavior. The evaluator never fails because of OTel.
|
|
13
13
|
*/
|
|
14
14
|
import { type Attributes, type Span, type Tracer } from "@opentelemetry/api";
|
|
15
|
+
import { type Logger } from "@opentelemetry/api-logs";
|
|
15
16
|
export interface EvalOtelConfig {
|
|
16
17
|
enabled: boolean;
|
|
17
18
|
endpoint: string | null;
|
|
@@ -56,9 +57,30 @@ export declare function initEvalOtel(projectRoot: string): Tracer;
|
|
|
56
57
|
*/
|
|
57
58
|
export declare function withSpan<T>(tracer: Tracer, name: string, attrs: Attributes | undefined, fn: (span: Span) => Promise<T> | T): Promise<T>;
|
|
58
59
|
/**
|
|
59
|
-
*
|
|
60
|
-
*
|
|
61
|
-
*
|
|
60
|
+
* Initialize the OTel logs pipeline alongside traces. Returns a Logger —
|
|
61
|
+
* real when enabled + endpoint set, no-op otherwise. Shares the same
|
|
62
|
+
* config gating + Dash0 dataset routing as `initEvalOtel`. Safe to call
|
|
63
|
+
* multiple times.
|
|
64
|
+
*
|
|
65
|
+
* Log records emitted via `getEvalLogger().emit(...)` automatically
|
|
66
|
+
* correlate with the active span via trace_id / span_id.
|
|
67
|
+
*/
|
|
68
|
+
export declare function initEvalOtelLogs(projectRoot: string): Logger;
|
|
69
|
+
/**
|
|
70
|
+
* Accessor for the eval logger. Always safe to call — returns a no-op
|
|
71
|
+
* logger when logs aren't initialized.
|
|
72
|
+
*/
|
|
73
|
+
export declare function getEvalLogger(): Logger;
|
|
74
|
+
/**
|
|
75
|
+
* Emit an info-severity log record with an arbitrary body. Shorthand for
|
|
76
|
+
* `getEvalLogger().emit(...)`. When called inside an active span, the
|
|
77
|
+
* SDK attaches trace_id + span_id automatically.
|
|
78
|
+
*/
|
|
79
|
+
export declare function logEvalContent(name: string, body: string | Record<string, unknown>, attributes?: Record<string, string | number | boolean>): void;
|
|
80
|
+
/**
|
|
81
|
+
* Flush and shut down the active providers (traces + logs). Call this
|
|
82
|
+
* before `process.exit()` in detached processes so batched signals are
|
|
83
|
+
* not lost. No-op if neither provider is active.
|
|
62
84
|
*/
|
|
63
85
|
export declare function shutdownEvalOtel(): Promise<void>;
|
|
64
86
|
/**
|
package/dist/lib/eval/otel.js
CHANGED
|
@@ -14,8 +14,11 @@
|
|
|
14
14
|
import { appendFileSync, existsSync, mkdirSync, readFileSync } from "node:fs";
|
|
15
15
|
import { join, resolve } from "node:path";
|
|
16
16
|
import { SpanStatusCode, trace } from "@opentelemetry/api";
|
|
17
|
+
import { logs, SeverityNumber } from "@opentelemetry/api-logs";
|
|
18
|
+
import { OTLPLogExporter } from "@opentelemetry/exporter-logs-otlp-http";
|
|
17
19
|
import { OTLPTraceExporter } from "@opentelemetry/exporter-trace-otlp-http";
|
|
18
20
|
import { resourceFromAttributes } from "@opentelemetry/resources";
|
|
21
|
+
import { BatchLogRecordProcessor, LoggerProvider } from "@opentelemetry/sdk-logs";
|
|
19
22
|
import { BatchSpanProcessor } from "@opentelemetry/sdk-trace-base";
|
|
20
23
|
import { NodeTracerProvider } from "@opentelemetry/sdk-trace-node";
|
|
21
24
|
import { ATTR_SERVICE_NAME } from "@opentelemetry/semantic-conventions";
|
|
@@ -96,7 +99,9 @@ function rewriteDatasetInEnvHeaders(target) {
|
|
|
96
99
|
const rewritten = current.replace(/Dash0-Dataset=[^,]*/g, `Dash0-Dataset=${target}`);
|
|
97
100
|
process.env.OTEL_EXPORTER_OTLP_HEADERS = rewritten;
|
|
98
101
|
}
|
|
102
|
+
const LOGGER_NAME = "@infinitedusky/indusk-mcp/eval";
|
|
99
103
|
let activeProvider = null;
|
|
104
|
+
let activeLoggerProvider = null;
|
|
100
105
|
/**
|
|
101
106
|
* Initialize OTel tracing for the evaluator if enabled + endpoint set.
|
|
102
107
|
* Returns a Tracer — real when enabled, no-op when not.
|
|
@@ -187,22 +192,105 @@ export async function withSpan(tracer, name, attrs, fn) {
|
|
|
187
192
|
});
|
|
188
193
|
}
|
|
189
194
|
/**
|
|
190
|
-
*
|
|
191
|
-
*
|
|
192
|
-
*
|
|
195
|
+
* Initialize the OTel logs pipeline alongside traces. Returns a Logger —
|
|
196
|
+
* real when enabled + endpoint set, no-op otherwise. Shares the same
|
|
197
|
+
* config gating + Dash0 dataset routing as `initEvalOtel`. Safe to call
|
|
198
|
+
* multiple times.
|
|
199
|
+
*
|
|
200
|
+
* Log records emitted via `getEvalLogger().emit(...)` automatically
|
|
201
|
+
* correlate with the active span via trace_id / span_id.
|
|
202
|
+
*/
|
|
203
|
+
export function initEvalOtelLogs(projectRoot) {
|
|
204
|
+
const { enabled, endpoint, dataset } = isEvalOtelEnabled(projectRoot);
|
|
205
|
+
if (!enabled)
|
|
206
|
+
return logs.getLogger(LOGGER_NAME);
|
|
207
|
+
if (!endpoint) {
|
|
208
|
+
syslog(projectRoot, "eval.otel.logs — endpoint unset; falling back to no-op logger");
|
|
209
|
+
return logs.getLogger(LOGGER_NAME);
|
|
210
|
+
}
|
|
211
|
+
if (activeLoggerProvider)
|
|
212
|
+
return logs.getLogger(LOGGER_NAME);
|
|
213
|
+
rewriteDatasetInEnvHeaders(dataset);
|
|
214
|
+
const headers = { "Dash0-Dataset": dataset };
|
|
215
|
+
if (process.env.DASH0_API_TOKEN) {
|
|
216
|
+
headers.Authorization = `Bearer ${process.env.DASH0_API_TOKEN}`;
|
|
217
|
+
}
|
|
218
|
+
try {
|
|
219
|
+
const exporter = new OTLPLogExporter({
|
|
220
|
+
url: endpoint.endsWith("/v1/logs") ? endpoint : `${endpoint.replace(/\/$/, "")}/v1/logs`,
|
|
221
|
+
headers,
|
|
222
|
+
});
|
|
223
|
+
const provider = new LoggerProvider({
|
|
224
|
+
resource: resourceFromAttributes({ [ATTR_SERVICE_NAME]: SERVICE_NAME }),
|
|
225
|
+
processors: [new BatchLogRecordProcessor(exporter)],
|
|
226
|
+
});
|
|
227
|
+
// setGlobalLoggerProvider returns false if one is already registered
|
|
228
|
+
// (e.g., a test's InMemoryLogRecordExporter provider). Respect that —
|
|
229
|
+
// only retain ownership (and tear down at shutdown) if we actually
|
|
230
|
+
// registered ours.
|
|
231
|
+
const accepted = logs.setGlobalLoggerProvider(provider);
|
|
232
|
+
if (accepted) {
|
|
233
|
+
activeLoggerProvider = provider;
|
|
234
|
+
syslog(projectRoot, `eval.otel.logs initialized — endpoint: ${endpoint}, dataset: ${dataset}`);
|
|
235
|
+
}
|
|
236
|
+
else {
|
|
237
|
+
syslog(projectRoot, "eval.otel.logs — global provider already set; using existing");
|
|
238
|
+
// Fire-and-forget shutdown of the unused provider
|
|
239
|
+
void provider.shutdown().catch(() => { });
|
|
240
|
+
}
|
|
241
|
+
}
|
|
242
|
+
catch (err) {
|
|
243
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
244
|
+
syslog(projectRoot, `eval.otel.logs init failed — falling back to no-op: ${message}`);
|
|
245
|
+
}
|
|
246
|
+
return logs.getLogger(LOGGER_NAME);
|
|
247
|
+
}
|
|
248
|
+
/**
|
|
249
|
+
* Accessor for the eval logger. Always safe to call — returns a no-op
|
|
250
|
+
* logger when logs aren't initialized.
|
|
251
|
+
*/
|
|
252
|
+
export function getEvalLogger() {
|
|
253
|
+
return logs.getLogger(LOGGER_NAME);
|
|
254
|
+
}
|
|
255
|
+
/**
|
|
256
|
+
* Emit an info-severity log record with an arbitrary body. Shorthand for
|
|
257
|
+
* `getEvalLogger().emit(...)`. When called inside an active span, the
|
|
258
|
+
* SDK attaches trace_id + span_id automatically.
|
|
259
|
+
*/
|
|
260
|
+
export function logEvalContent(name, body, attributes) {
|
|
261
|
+
// AnyValue requires plain primitives/arrays/records — stringify objects so
|
|
262
|
+
// Dash0 ingests the content as a single searchable log body rather than a
|
|
263
|
+
// nested structure.
|
|
264
|
+
const bodyText = typeof body === "string" ? body : JSON.stringify(body);
|
|
265
|
+
getEvalLogger().emit({
|
|
266
|
+
severityNumber: SeverityNumber.INFO,
|
|
267
|
+
severityText: "INFO",
|
|
268
|
+
body: bodyText,
|
|
269
|
+
attributes: { "eval.event": name, ...(attributes ?? {}) },
|
|
270
|
+
});
|
|
271
|
+
}
|
|
272
|
+
/**
|
|
273
|
+
* Flush and shut down the active providers (traces + logs). Call this
|
|
274
|
+
* before `process.exit()` in detached processes so batched signals are
|
|
275
|
+
* not lost. No-op if neither provider is active.
|
|
193
276
|
*/
|
|
194
277
|
export async function shutdownEvalOtel() {
|
|
195
|
-
|
|
196
|
-
|
|
278
|
+
const tasks = [];
|
|
279
|
+
if (activeProvider) {
|
|
280
|
+
tasks.push(activeProvider.forceFlush().then(() => activeProvider?.shutdown()));
|
|
281
|
+
}
|
|
282
|
+
if (activeLoggerProvider) {
|
|
283
|
+
tasks.push(activeLoggerProvider.forceFlush().then(() => activeLoggerProvider?.shutdown()));
|
|
284
|
+
}
|
|
197
285
|
try {
|
|
198
|
-
await
|
|
199
|
-
await activeProvider.shutdown();
|
|
286
|
+
await Promise.all(tasks);
|
|
200
287
|
}
|
|
201
288
|
catch {
|
|
202
289
|
// shutdown is best-effort
|
|
203
290
|
}
|
|
204
291
|
finally {
|
|
205
292
|
activeProvider = null;
|
|
293
|
+
activeLoggerProvider = null;
|
|
206
294
|
}
|
|
207
295
|
}
|
|
208
296
|
/**
|
|
@@ -210,12 +298,17 @@ export async function shutdownEvalOtel() {
|
|
|
210
298
|
* starts fresh. Not part of the public API.
|
|
211
299
|
*/
|
|
212
300
|
export function __resetEvalOtelForTests() {
|
|
213
|
-
// Tear down any
|
|
214
|
-
// from the global OTel API
|
|
215
|
-
//
|
|
301
|
+
// Tear down any providers left over from a previous test. This
|
|
302
|
+
// un-registers from the global OTel API so `trace.getTracer()` /
|
|
303
|
+
// `logs.getLogger()` fall back to no-op until re-registered.
|
|
216
304
|
if (activeProvider) {
|
|
217
305
|
void activeProvider.shutdown().catch(() => { });
|
|
218
306
|
}
|
|
307
|
+
if (activeLoggerProvider) {
|
|
308
|
+
void activeLoggerProvider.shutdown().catch(() => { });
|
|
309
|
+
}
|
|
219
310
|
activeProvider = null;
|
|
311
|
+
activeLoggerProvider = null;
|
|
220
312
|
trace.disable();
|
|
313
|
+
logs.disable();
|
|
221
314
|
}
|
|
@@ -13,7 +13,7 @@ import { getProjectGroupId } from "../config.js";
|
|
|
13
13
|
import { readUnprocessedHighlights } from "../highlights/highlights.js";
|
|
14
14
|
import { ingestScorecard } from "./findings.js";
|
|
15
15
|
import { EvalLogWriter } from "./log-writer.js";
|
|
16
|
-
import { initEvalOtel, shutdownEvalOtel, withSpan } from "./otel.js";
|
|
16
|
+
import { initEvalOtel, initEvalOtelLogs, logEvalContent, shutdownEvalOtel, withSpan, } from "./otel.js";
|
|
17
17
|
import { buildEvaluatorPrompt } from "./prompt-builder.js";
|
|
18
18
|
import { V1_RUBRIC } from "./rubric.js";
|
|
19
19
|
function getSessionPath(projectRoot) {
|
|
@@ -112,6 +112,7 @@ async function spawnClaude(args, prompt, cwd) {
|
|
|
112
112
|
*/
|
|
113
113
|
export async function runPersistentEval(opts) {
|
|
114
114
|
const tracer = initEvalOtel(opts.projectRoot);
|
|
115
|
+
initEvalOtelLogs(opts.projectRoot);
|
|
115
116
|
const source = process.env.INDUSK_EVAL_SOURCE ?? "commit";
|
|
116
117
|
const projectGroup = getProjectGroupId(opts.projectRoot);
|
|
117
118
|
// Peek at the highlights queue before spawning — gives us observability
|
|
@@ -135,7 +136,17 @@ export async function runPersistentEval(opts) {
|
|
|
135
136
|
const session = await withSpan(tracer, "eval.read_session", undefined, () => readSession(opts.projectRoot));
|
|
136
137
|
rootSpan.setAttribute("resumed", session !== null);
|
|
137
138
|
try {
|
|
138
|
-
const { args, prompt } = await withSpan(tracer, "eval.build_prompt", { resumed: session !== null }, () => {
|
|
139
|
+
const { args, prompt } = await withSpan(tracer, "eval.build_prompt", { resumed: session !== null }, (span) => {
|
|
140
|
+
const built = buildArgsAndPrompt();
|
|
141
|
+
span.setAttribute("prompt.length", built.prompt.length);
|
|
142
|
+
span.setAttribute("prompt.kind", session ? "resume" : "full");
|
|
143
|
+
logEvalContent("prompt", built.prompt, {
|
|
144
|
+
"prompt.length": built.prompt.length,
|
|
145
|
+
"prompt.kind": session ? "resume" : "full",
|
|
146
|
+
});
|
|
147
|
+
return built;
|
|
148
|
+
});
|
|
149
|
+
function buildArgsAndPrompt() {
|
|
139
150
|
if (session) {
|
|
140
151
|
const resumePrompt = `Evaluate a new commit. Change ID: ${opts.changeId}
|
|
141
152
|
|
|
@@ -175,16 +186,24 @@ Output ONLY the JSON scorecard as before — no commentary.`;
|
|
|
175
186
|
projectGroup,
|
|
176
187
|
}),
|
|
177
188
|
};
|
|
178
|
-
}
|
|
189
|
+
}
|
|
179
190
|
const claudeResult = await withSpan(tracer, "eval.spawn_claude", {
|
|
180
191
|
"args.resumed": session !== null,
|
|
181
192
|
"args.model": session ? "(resumed)" : "opus",
|
|
182
193
|
}, async (span) => {
|
|
183
194
|
const spawned = await spawnClaude(args, prompt, opts.projectRoot);
|
|
184
195
|
span.setAttribute("exit.code", spawned.code ?? -1);
|
|
196
|
+
span.setAttribute("stdout.length", spawned.stdout.length);
|
|
185
197
|
if (spawned.code !== 0) {
|
|
186
198
|
span.setAttribute("exit.stderr_tail", spawned.stderr.slice(-500));
|
|
199
|
+
logEvalContent("claude.error", spawned.stderr, {
|
|
200
|
+
"exit.code": spawned.code ?? -1,
|
|
201
|
+
});
|
|
187
202
|
}
|
|
203
|
+
logEvalContent("claude.stdout", spawned.stdout, {
|
|
204
|
+
"stdout.length": spawned.stdout.length,
|
|
205
|
+
"exit.code": spawned.code ?? -1,
|
|
206
|
+
});
|
|
188
207
|
return spawned;
|
|
189
208
|
});
|
|
190
209
|
if (claudeResult.code !== 0) {
|
|
@@ -210,6 +229,26 @@ Output ONLY the JSON scorecard as before — no commentary.`;
|
|
|
210
229
|
if (parsed.usage)
|
|
211
230
|
scorecard.usage = parsed.usage;
|
|
212
231
|
scorecard.telemetryPosted = false;
|
|
232
|
+
// Carry scorecard-level content onto the root span for at-a-glance debugging in Dash0
|
|
233
|
+
rootSpan.setAttribute("scorecard.status", "ok");
|
|
234
|
+
rootSpan.setAttribute("scorecard.question_count", scorecard.questions?.length ?? 0);
|
|
235
|
+
if (scorecard.summary) {
|
|
236
|
+
rootSpan.setAttribute("scorecard.summary", scorecard.summary.slice(0, 500));
|
|
237
|
+
}
|
|
238
|
+
if (scorecard.usage) {
|
|
239
|
+
rootSpan.setAttribute("scorecard.cost_usd", scorecard.usage.costUsd);
|
|
240
|
+
rootSpan.setAttribute("scorecard.duration_ms", scorecard.usage.durationMs);
|
|
241
|
+
rootSpan.setAttribute("scorecard.input_tokens", scorecard.usage.inputTokens);
|
|
242
|
+
rootSpan.setAttribute("scorecard.output_tokens", scorecard.usage.outputTokens);
|
|
243
|
+
}
|
|
244
|
+
const answerCounts = { yes: 0, no: 0, partial: 0 };
|
|
245
|
+
for (const q of scorecard.questions ?? []) {
|
|
246
|
+
if (q.answer in answerCounts)
|
|
247
|
+
answerCounts[q.answer]++;
|
|
248
|
+
}
|
|
249
|
+
rootSpan.setAttribute("scorecard.answers.yes", answerCounts.yes);
|
|
250
|
+
rootSpan.setAttribute("scorecard.answers.no", answerCounts.no);
|
|
251
|
+
rootSpan.setAttribute("scorecard.answers.partial", answerCounts.partial);
|
|
213
252
|
await withSpan(tracer, "eval.update_session", undefined, () => {
|
|
214
253
|
const newSession = {
|
|
215
254
|
sessionId: parsed.sessionId ?? session?.sessionId ?? "unknown",
|
|
@@ -222,17 +261,28 @@ Output ONLY the JSON scorecard as before — no commentary.`;
|
|
|
222
261
|
await withSpan(tracer, "eval.write_scorecard", undefined, async () => {
|
|
223
262
|
await logWriter.append(scorecard);
|
|
224
263
|
ingestScorecard(opts.projectRoot, scorecard);
|
|
264
|
+
logEvalContent("scorecard", JSON.stringify(scorecard), {
|
|
265
|
+
"scorecard.question_count": scorecard.questions?.length ?? 0,
|
|
266
|
+
"scorecard.summary_length": scorecard.summary?.length ?? 0,
|
|
267
|
+
});
|
|
225
268
|
});
|
|
226
269
|
return scorecard;
|
|
227
270
|
}
|
|
228
271
|
catch (err) {
|
|
272
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
273
|
+
const stack = err instanceof Error ? (err.stack ?? "") : "";
|
|
274
|
+
rootSpan.setAttribute("scorecard.status", "error");
|
|
275
|
+
rootSpan.setAttribute("error.message", msg.slice(0, 500));
|
|
276
|
+
logEvalContent("error", stack || msg, {
|
|
277
|
+
"error.message": msg.slice(0, 500),
|
|
278
|
+
});
|
|
229
279
|
const errorEntry = {
|
|
230
280
|
version: 1,
|
|
231
281
|
timestamp: new Date().toISOString(),
|
|
232
282
|
mode: opts.mode,
|
|
233
283
|
changeId: opts.changeId,
|
|
234
284
|
error: true,
|
|
235
|
-
message:
|
|
285
|
+
message: msg,
|
|
236
286
|
};
|
|
237
287
|
await logWriter.append(errorEntry);
|
|
238
288
|
return errorEntry;
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@infinitedusky/indusk-mcp",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.19.0",
|
|
4
4
|
"description": "InDusk development system — skills, MCP tools, and CLI for structured AI-assisted development",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"files": [
|
|
@@ -29,8 +29,11 @@
|
|
|
29
29
|
"dependencies": {
|
|
30
30
|
"@modelcontextprotocol/sdk": "^1.12.1",
|
|
31
31
|
"@opentelemetry/api": "^1.9.0",
|
|
32
|
+
"@opentelemetry/api-logs": "^0.214.0",
|
|
33
|
+
"@opentelemetry/exporter-logs-otlp-http": "^0.214.0",
|
|
32
34
|
"@opentelemetry/exporter-trace-otlp-http": "^0.214.0",
|
|
33
35
|
"@opentelemetry/resources": "^2.6.0",
|
|
36
|
+
"@opentelemetry/sdk-logs": "^0.214.0",
|
|
34
37
|
"@opentelemetry/sdk-trace-base": "^2.6.0",
|
|
35
38
|
"@opentelemetry/sdk-trace-node": "^2.6.0",
|
|
36
39
|
"@opentelemetry/semantic-conventions": "^1.40.0",
|