@infinitedusky/indusk-mcp 1.18.1 → 1.19.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/lib/eval/otel.d.ts +36 -7
- package/dist/lib/eval/otel.js +141 -16
- package/dist/lib/eval/persistent-evaluator.js +54 -4
- package/package.json +4 -1
package/dist/lib/eval/otel.d.ts
CHANGED
|
@@ -12,6 +12,7 @@
|
|
|
12
12
|
* throws, same behavior. The evaluator never fails because of OTel.
|
|
13
13
|
*/
|
|
14
14
|
import { type Attributes, type Span, type Tracer } from "@opentelemetry/api";
|
|
15
|
+
import { type Logger } from "@opentelemetry/api-logs";
|
|
15
16
|
export interface EvalOtelConfig {
|
|
16
17
|
enabled: boolean;
|
|
17
18
|
endpoint: string | null;
|
|
@@ -19,14 +20,21 @@ export interface EvalOtelConfig {
|
|
|
19
20
|
}
|
|
20
21
|
/**
|
|
21
22
|
* Pure predicate — reads `.indusk/config.json` `eval.otel.{enabled,dataset}` and
|
|
22
|
-
* the `INDUSK_EVAL_OTEL` / `INDUSK_EVAL_OTEL_DATASET` / `
|
|
23
|
-
* env vars. Does not init anything or touch the network.
|
|
23
|
+
* the `INDUSK_EVAL_OTEL` / `INDUSK_EVAL_OTEL_DATASET` / `EVAL_AGENT_DATASET` /
|
|
24
|
+
* `OTEL_EXPORTER_OTLP_ENDPOINT` env vars. Does not init anything or touch the network.
|
|
24
25
|
*
|
|
25
26
|
* Resolution:
|
|
26
27
|
* - `enabled`: `INDUSK_EVAL_OTEL=1` (truthy) wins, else config `eval.otel.enabled`, else false.
|
|
27
28
|
* - `endpoint`: `OTEL_EXPORTER_OTLP_ENDPOINT` (null if unset).
|
|
28
|
-
* - `dataset
|
|
29
|
-
*
|
|
29
|
+
* - `dataset` (priority, highest → lowest):
|
|
30
|
+
* 1. `INDUSK_EVAL_OTEL_DATASET` env var (explicit per-invocation override)
|
|
31
|
+
* 2. `EVAL_AGENT_DATASET` env var (composable.env convention — see env/components/dash0.env)
|
|
32
|
+
* 3. `.indusk/config.json` `eval.otel.dataset`
|
|
33
|
+
* 4. `"agent"` default
|
|
34
|
+
*
|
|
35
|
+
* Sent as the `Dash0-Dataset` header on every OTLP export. Also rewritten into
|
|
36
|
+
* `OTEL_EXPORTER_OTLP_HEADERS` if present there (env headers beat constructor
|
|
37
|
+
* headers per OTel spec — so we fix the env header at the source).
|
|
30
38
|
*/
|
|
31
39
|
export declare function isEvalOtelEnabled(projectRoot: string): EvalOtelConfig;
|
|
32
40
|
/**
|
|
@@ -49,9 +57,30 @@ export declare function initEvalOtel(projectRoot: string): Tracer;
|
|
|
49
57
|
*/
|
|
50
58
|
export declare function withSpan<T>(tracer: Tracer, name: string, attrs: Attributes | undefined, fn: (span: Span) => Promise<T> | T): Promise<T>;
|
|
51
59
|
/**
|
|
52
|
-
*
|
|
53
|
-
*
|
|
54
|
-
*
|
|
60
|
+
* Initialize the OTel logs pipeline alongside traces. Returns a Logger —
|
|
61
|
+
* real when enabled + endpoint set, no-op otherwise. Shares the same
|
|
62
|
+
* config gating + Dash0 dataset routing as `initEvalOtel`. Safe to call
|
|
63
|
+
* multiple times.
|
|
64
|
+
*
|
|
65
|
+
* Log records emitted via `getEvalLogger().emit(...)` automatically
|
|
66
|
+
* correlate with the active span via trace_id / span_id.
|
|
67
|
+
*/
|
|
68
|
+
export declare function initEvalOtelLogs(projectRoot: string): Logger;
|
|
69
|
+
/**
|
|
70
|
+
* Accessor for the eval logger. Always safe to call — returns a no-op
|
|
71
|
+
* logger when logs aren't initialized.
|
|
72
|
+
*/
|
|
73
|
+
export declare function getEvalLogger(): Logger;
|
|
74
|
+
/**
|
|
75
|
+
* Emit an info-severity log record with an arbitrary body. Shorthand for
|
|
76
|
+
* `getEvalLogger().emit(...)`. When called inside an active span, the
|
|
77
|
+
* SDK attaches trace_id + span_id automatically.
|
|
78
|
+
*/
|
|
79
|
+
export declare function logEvalContent(name: string, body: string | Record<string, unknown>, attributes?: Record<string, string | number | boolean>): void;
|
|
80
|
+
/**
|
|
81
|
+
* Flush and shut down the active providers (traces + logs). Call this
|
|
82
|
+
* before `process.exit()` in detached processes so batched signals are
|
|
83
|
+
* not lost. No-op if neither provider is active.
|
|
55
84
|
*/
|
|
56
85
|
export declare function shutdownEvalOtel(): Promise<void>;
|
|
57
86
|
/**
|
package/dist/lib/eval/otel.js
CHANGED
|
@@ -14,8 +14,11 @@
|
|
|
14
14
|
import { appendFileSync, existsSync, mkdirSync, readFileSync } from "node:fs";
|
|
15
15
|
import { join, resolve } from "node:path";
|
|
16
16
|
import { SpanStatusCode, trace } from "@opentelemetry/api";
|
|
17
|
+
import { logs, SeverityNumber } from "@opentelemetry/api-logs";
|
|
18
|
+
import { OTLPLogExporter } from "@opentelemetry/exporter-logs-otlp-http";
|
|
17
19
|
import { OTLPTraceExporter } from "@opentelemetry/exporter-trace-otlp-http";
|
|
18
20
|
import { resourceFromAttributes } from "@opentelemetry/resources";
|
|
21
|
+
import { BatchLogRecordProcessor, LoggerProvider } from "@opentelemetry/sdk-logs";
|
|
19
22
|
import { BatchSpanProcessor } from "@opentelemetry/sdk-trace-base";
|
|
20
23
|
import { NodeTracerProvider } from "@opentelemetry/sdk-trace-node";
|
|
21
24
|
import { ATTR_SERVICE_NAME } from "@opentelemetry/semantic-conventions";
|
|
@@ -34,19 +37,27 @@ function syslog(projectRoot, msg) {
|
|
|
34
37
|
const DEFAULT_DATASET = "agent";
|
|
35
38
|
/**
|
|
36
39
|
* Pure predicate — reads `.indusk/config.json` `eval.otel.{enabled,dataset}` and
|
|
37
|
-
* the `INDUSK_EVAL_OTEL` / `INDUSK_EVAL_OTEL_DATASET` / `
|
|
38
|
-
* env vars. Does not init anything or touch the network.
|
|
40
|
+
* the `INDUSK_EVAL_OTEL` / `INDUSK_EVAL_OTEL_DATASET` / `EVAL_AGENT_DATASET` /
|
|
41
|
+
* `OTEL_EXPORTER_OTLP_ENDPOINT` env vars. Does not init anything or touch the network.
|
|
39
42
|
*
|
|
40
43
|
* Resolution:
|
|
41
44
|
* - `enabled`: `INDUSK_EVAL_OTEL=1` (truthy) wins, else config `eval.otel.enabled`, else false.
|
|
42
45
|
* - `endpoint`: `OTEL_EXPORTER_OTLP_ENDPOINT` (null if unset).
|
|
43
|
-
* - `dataset
|
|
44
|
-
*
|
|
46
|
+
* - `dataset` (priority, highest → lowest):
|
|
47
|
+
* 1. `INDUSK_EVAL_OTEL_DATASET` env var (explicit per-invocation override)
|
|
48
|
+
* 2. `EVAL_AGENT_DATASET` env var (composable.env convention — see env/components/dash0.env)
|
|
49
|
+
* 3. `.indusk/config.json` `eval.otel.dataset`
|
|
50
|
+
* 4. `"agent"` default
|
|
51
|
+
*
|
|
52
|
+
* Sent as the `Dash0-Dataset` header on every OTLP export. Also rewritten into
|
|
53
|
+
* `OTEL_EXPORTER_OTLP_HEADERS` if present there (env headers beat constructor
|
|
54
|
+
* headers per OTel spec — so we fix the env header at the source).
|
|
45
55
|
*/
|
|
46
56
|
export function isEvalOtelEnabled(projectRoot) {
|
|
47
57
|
const envFlag = process.env.INDUSK_EVAL_OTEL;
|
|
48
58
|
const endpoint = process.env.OTEL_EXPORTER_OTLP_ENDPOINT ?? null;
|
|
49
|
-
const
|
|
59
|
+
const explicitDataset = process.env.INDUSK_EVAL_OTEL_DATASET;
|
|
60
|
+
const composableDataset = process.env.EVAL_AGENT_DATASET;
|
|
50
61
|
let configEnabled = false;
|
|
51
62
|
let configDataset;
|
|
52
63
|
const configPath = join(projectRoot, ".indusk", "config.json");
|
|
@@ -63,14 +74,34 @@ export function isEvalOtelEnabled(projectRoot) {
|
|
|
63
74
|
}
|
|
64
75
|
}
|
|
65
76
|
const envForcesEnabled = envFlag !== undefined && envFlag !== "" && envFlag !== "0" && envFlag.toLowerCase() !== "false";
|
|
66
|
-
const dataset =
|
|
77
|
+
const dataset = (explicitDataset && explicitDataset !== "" && explicitDataset) ||
|
|
78
|
+
(composableDataset && composableDataset !== "" && composableDataset) ||
|
|
79
|
+
configDataset ||
|
|
80
|
+
DEFAULT_DATASET;
|
|
67
81
|
return {
|
|
68
82
|
enabled: envForcesEnabled || configEnabled,
|
|
69
83
|
endpoint,
|
|
70
84
|
dataset,
|
|
71
85
|
};
|
|
72
86
|
}
|
|
87
|
+
/**
|
|
88
|
+
* Rewrite the `Dash0-Dataset=<old>` entry in `OTEL_EXPORTER_OTLP_HEADERS` to
|
|
89
|
+
* `Dash0-Dataset=<target>`. OTel spec says env-set headers override constructor
|
|
90
|
+
* headers, so we have to fix the env directly for routing to work when the user's
|
|
91
|
+
* shell already sets `OTEL_EXPORTER_OTLP_HEADERS` via composable.env.
|
|
92
|
+
*
|
|
93
|
+
* No-op if the env var is unset or doesn't contain `Dash0-Dataset=`.
|
|
94
|
+
*/
|
|
95
|
+
function rewriteDatasetInEnvHeaders(target) {
|
|
96
|
+
const current = process.env.OTEL_EXPORTER_OTLP_HEADERS;
|
|
97
|
+
if (!current || !current.includes("Dash0-Dataset="))
|
|
98
|
+
return;
|
|
99
|
+
const rewritten = current.replace(/Dash0-Dataset=[^,]*/g, `Dash0-Dataset=${target}`);
|
|
100
|
+
process.env.OTEL_EXPORTER_OTLP_HEADERS = rewritten;
|
|
101
|
+
}
|
|
102
|
+
const LOGGER_NAME = "@infinitedusky/indusk-mcp/eval";
|
|
73
103
|
let activeProvider = null;
|
|
104
|
+
let activeLoggerProvider = null;
|
|
74
105
|
/**
|
|
75
106
|
* Initialize OTel tracing for the evaluator if enabled + endpoint set.
|
|
76
107
|
* Returns a Tracer — real when enabled, no-op when not.
|
|
@@ -92,6 +123,12 @@ export function initEvalOtel(projectRoot) {
|
|
|
92
123
|
if (activeProvider) {
|
|
93
124
|
return trace.getTracer(TRACER_NAME);
|
|
94
125
|
}
|
|
126
|
+
// Ensure env-set OTEL_EXPORTER_OTLP_HEADERS routes to the eval agent's
|
|
127
|
+
// dataset. Env headers beat constructor headers per OTel spec — so if the
|
|
128
|
+
// user's shell (composable.env) already set Dash0-Dataset for project
|
|
129
|
+
// telemetry, we rewrite it in-place to the eval agent dataset before the
|
|
130
|
+
// exporter reads it.
|
|
131
|
+
rewriteDatasetInEnvHeaders(dataset);
|
|
95
132
|
// Build exporter headers. We pass Authorization and Dash0-Dataset in the
|
|
96
133
|
// constructor rather than relying on OTEL_EXPORTER_OTLP_HEADERS env parsing,
|
|
97
134
|
// because the OTel SDK's env parser has proven unreliable for tokens with
|
|
@@ -155,22 +192,105 @@ export async function withSpan(tracer, name, attrs, fn) {
|
|
|
155
192
|
});
|
|
156
193
|
}
|
|
157
194
|
/**
|
|
158
|
-
*
|
|
159
|
-
*
|
|
160
|
-
*
|
|
195
|
+
* Initialize the OTel logs pipeline alongside traces. Returns a Logger —
|
|
196
|
+
* real when enabled + endpoint set, no-op otherwise. Shares the same
|
|
197
|
+
* config gating + Dash0 dataset routing as `initEvalOtel`. Safe to call
|
|
198
|
+
* multiple times.
|
|
199
|
+
*
|
|
200
|
+
* Log records emitted via `getEvalLogger().emit(...)` automatically
|
|
201
|
+
* correlate with the active span via trace_id / span_id.
|
|
202
|
+
*/
|
|
203
|
+
export function initEvalOtelLogs(projectRoot) {
|
|
204
|
+
const { enabled, endpoint, dataset } = isEvalOtelEnabled(projectRoot);
|
|
205
|
+
if (!enabled)
|
|
206
|
+
return logs.getLogger(LOGGER_NAME);
|
|
207
|
+
if (!endpoint) {
|
|
208
|
+
syslog(projectRoot, "eval.otel.logs — endpoint unset; falling back to no-op logger");
|
|
209
|
+
return logs.getLogger(LOGGER_NAME);
|
|
210
|
+
}
|
|
211
|
+
if (activeLoggerProvider)
|
|
212
|
+
return logs.getLogger(LOGGER_NAME);
|
|
213
|
+
rewriteDatasetInEnvHeaders(dataset);
|
|
214
|
+
const headers = { "Dash0-Dataset": dataset };
|
|
215
|
+
if (process.env.DASH0_API_TOKEN) {
|
|
216
|
+
headers.Authorization = `Bearer ${process.env.DASH0_API_TOKEN}`;
|
|
217
|
+
}
|
|
218
|
+
try {
|
|
219
|
+
const exporter = new OTLPLogExporter({
|
|
220
|
+
url: endpoint.endsWith("/v1/logs") ? endpoint : `${endpoint.replace(/\/$/, "")}/v1/logs`,
|
|
221
|
+
headers,
|
|
222
|
+
});
|
|
223
|
+
const provider = new LoggerProvider({
|
|
224
|
+
resource: resourceFromAttributes({ [ATTR_SERVICE_NAME]: SERVICE_NAME }),
|
|
225
|
+
processors: [new BatchLogRecordProcessor(exporter)],
|
|
226
|
+
});
|
|
227
|
+
// setGlobalLoggerProvider returns false if one is already registered
|
|
228
|
+
// (e.g., a test's InMemoryLogRecordExporter provider). Respect that —
|
|
229
|
+
// only retain ownership (and tear down at shutdown) if we actually
|
|
230
|
+
// registered ours.
|
|
231
|
+
const accepted = logs.setGlobalLoggerProvider(provider);
|
|
232
|
+
if (accepted) {
|
|
233
|
+
activeLoggerProvider = provider;
|
|
234
|
+
syslog(projectRoot, `eval.otel.logs initialized — endpoint: ${endpoint}, dataset: ${dataset}`);
|
|
235
|
+
}
|
|
236
|
+
else {
|
|
237
|
+
syslog(projectRoot, "eval.otel.logs — global provider already set; using existing");
|
|
238
|
+
// Fire-and-forget shutdown of the unused provider
|
|
239
|
+
void provider.shutdown().catch(() => { });
|
|
240
|
+
}
|
|
241
|
+
}
|
|
242
|
+
catch (err) {
|
|
243
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
244
|
+
syslog(projectRoot, `eval.otel.logs init failed — falling back to no-op: ${message}`);
|
|
245
|
+
}
|
|
246
|
+
return logs.getLogger(LOGGER_NAME);
|
|
247
|
+
}
|
|
248
|
+
/**
|
|
249
|
+
* Accessor for the eval logger. Always safe to call — returns a no-op
|
|
250
|
+
* logger when logs aren't initialized.
|
|
251
|
+
*/
|
|
252
|
+
export function getEvalLogger() {
|
|
253
|
+
return logs.getLogger(LOGGER_NAME);
|
|
254
|
+
}
|
|
255
|
+
/**
|
|
256
|
+
* Emit an info-severity log record with an arbitrary body. Shorthand for
|
|
257
|
+
* `getEvalLogger().emit(...)`. When called inside an active span, the
|
|
258
|
+
* SDK attaches trace_id + span_id automatically.
|
|
259
|
+
*/
|
|
260
|
+
export function logEvalContent(name, body, attributes) {
|
|
261
|
+
// AnyValue requires plain primitives/arrays/records — stringify objects so
|
|
262
|
+
// Dash0 ingests the content as a single searchable log body rather than a
|
|
263
|
+
// nested structure.
|
|
264
|
+
const bodyText = typeof body === "string" ? body : JSON.stringify(body);
|
|
265
|
+
getEvalLogger().emit({
|
|
266
|
+
severityNumber: SeverityNumber.INFO,
|
|
267
|
+
severityText: "INFO",
|
|
268
|
+
body: bodyText,
|
|
269
|
+
attributes: { "eval.event": name, ...(attributes ?? {}) },
|
|
270
|
+
});
|
|
271
|
+
}
|
|
272
|
+
/**
|
|
273
|
+
* Flush and shut down the active providers (traces + logs). Call this
|
|
274
|
+
* before `process.exit()` in detached processes so batched signals are
|
|
275
|
+
* not lost. No-op if neither provider is active.
|
|
161
276
|
*/
|
|
162
277
|
export async function shutdownEvalOtel() {
|
|
163
|
-
|
|
164
|
-
|
|
278
|
+
const tasks = [];
|
|
279
|
+
if (activeProvider) {
|
|
280
|
+
tasks.push(activeProvider.forceFlush().then(() => activeProvider?.shutdown()));
|
|
281
|
+
}
|
|
282
|
+
if (activeLoggerProvider) {
|
|
283
|
+
tasks.push(activeLoggerProvider.forceFlush().then(() => activeLoggerProvider?.shutdown()));
|
|
284
|
+
}
|
|
165
285
|
try {
|
|
166
|
-
await
|
|
167
|
-
await activeProvider.shutdown();
|
|
286
|
+
await Promise.all(tasks);
|
|
168
287
|
}
|
|
169
288
|
catch {
|
|
170
289
|
// shutdown is best-effort
|
|
171
290
|
}
|
|
172
291
|
finally {
|
|
173
292
|
activeProvider = null;
|
|
293
|
+
activeLoggerProvider = null;
|
|
174
294
|
}
|
|
175
295
|
}
|
|
176
296
|
/**
|
|
@@ -178,12 +298,17 @@ export async function shutdownEvalOtel() {
|
|
|
178
298
|
* starts fresh. Not part of the public API.
|
|
179
299
|
*/
|
|
180
300
|
export function __resetEvalOtelForTests() {
|
|
181
|
-
// Tear down any
|
|
182
|
-
// from the global OTel API
|
|
183
|
-
//
|
|
301
|
+
// Tear down any providers left over from a previous test. This
|
|
302
|
+
// un-registers from the global OTel API so `trace.getTracer()` /
|
|
303
|
+
// `logs.getLogger()` fall back to no-op until re-registered.
|
|
184
304
|
if (activeProvider) {
|
|
185
305
|
void activeProvider.shutdown().catch(() => { });
|
|
186
306
|
}
|
|
307
|
+
if (activeLoggerProvider) {
|
|
308
|
+
void activeLoggerProvider.shutdown().catch(() => { });
|
|
309
|
+
}
|
|
187
310
|
activeProvider = null;
|
|
311
|
+
activeLoggerProvider = null;
|
|
188
312
|
trace.disable();
|
|
313
|
+
logs.disable();
|
|
189
314
|
}
|
|
@@ -13,7 +13,7 @@ import { getProjectGroupId } from "../config.js";
|
|
|
13
13
|
import { readUnprocessedHighlights } from "../highlights/highlights.js";
|
|
14
14
|
import { ingestScorecard } from "./findings.js";
|
|
15
15
|
import { EvalLogWriter } from "./log-writer.js";
|
|
16
|
-
import { initEvalOtel, shutdownEvalOtel, withSpan } from "./otel.js";
|
|
16
|
+
import { initEvalOtel, initEvalOtelLogs, logEvalContent, shutdownEvalOtel, withSpan, } from "./otel.js";
|
|
17
17
|
import { buildEvaluatorPrompt } from "./prompt-builder.js";
|
|
18
18
|
import { V1_RUBRIC } from "./rubric.js";
|
|
19
19
|
function getSessionPath(projectRoot) {
|
|
@@ -112,6 +112,7 @@ async function spawnClaude(args, prompt, cwd) {
|
|
|
112
112
|
*/
|
|
113
113
|
export async function runPersistentEval(opts) {
|
|
114
114
|
const tracer = initEvalOtel(opts.projectRoot);
|
|
115
|
+
initEvalOtelLogs(opts.projectRoot);
|
|
115
116
|
const source = process.env.INDUSK_EVAL_SOURCE ?? "commit";
|
|
116
117
|
const projectGroup = getProjectGroupId(opts.projectRoot);
|
|
117
118
|
// Peek at the highlights queue before spawning — gives us observability
|
|
@@ -135,7 +136,17 @@ export async function runPersistentEval(opts) {
|
|
|
135
136
|
const session = await withSpan(tracer, "eval.read_session", undefined, () => readSession(opts.projectRoot));
|
|
136
137
|
rootSpan.setAttribute("resumed", session !== null);
|
|
137
138
|
try {
|
|
138
|
-
const { args, prompt } = await withSpan(tracer, "eval.build_prompt", { resumed: session !== null }, () => {
|
|
139
|
+
const { args, prompt } = await withSpan(tracer, "eval.build_prompt", { resumed: session !== null }, (span) => {
|
|
140
|
+
const built = buildArgsAndPrompt();
|
|
141
|
+
span.setAttribute("prompt.length", built.prompt.length);
|
|
142
|
+
span.setAttribute("prompt.kind", session ? "resume" : "full");
|
|
143
|
+
logEvalContent("prompt", built.prompt, {
|
|
144
|
+
"prompt.length": built.prompt.length,
|
|
145
|
+
"prompt.kind": session ? "resume" : "full",
|
|
146
|
+
});
|
|
147
|
+
return built;
|
|
148
|
+
});
|
|
149
|
+
function buildArgsAndPrompt() {
|
|
139
150
|
if (session) {
|
|
140
151
|
const resumePrompt = `Evaluate a new commit. Change ID: ${opts.changeId}
|
|
141
152
|
|
|
@@ -175,16 +186,24 @@ Output ONLY the JSON scorecard as before — no commentary.`;
|
|
|
175
186
|
projectGroup,
|
|
176
187
|
}),
|
|
177
188
|
};
|
|
178
|
-
}
|
|
189
|
+
}
|
|
179
190
|
const claudeResult = await withSpan(tracer, "eval.spawn_claude", {
|
|
180
191
|
"args.resumed": session !== null,
|
|
181
192
|
"args.model": session ? "(resumed)" : "opus",
|
|
182
193
|
}, async (span) => {
|
|
183
194
|
const spawned = await spawnClaude(args, prompt, opts.projectRoot);
|
|
184
195
|
span.setAttribute("exit.code", spawned.code ?? -1);
|
|
196
|
+
span.setAttribute("stdout.length", spawned.stdout.length);
|
|
185
197
|
if (spawned.code !== 0) {
|
|
186
198
|
span.setAttribute("exit.stderr_tail", spawned.stderr.slice(-500));
|
|
199
|
+
logEvalContent("claude.error", spawned.stderr, {
|
|
200
|
+
"exit.code": spawned.code ?? -1,
|
|
201
|
+
});
|
|
187
202
|
}
|
|
203
|
+
logEvalContent("claude.stdout", spawned.stdout, {
|
|
204
|
+
"stdout.length": spawned.stdout.length,
|
|
205
|
+
"exit.code": spawned.code ?? -1,
|
|
206
|
+
});
|
|
188
207
|
return spawned;
|
|
189
208
|
});
|
|
190
209
|
if (claudeResult.code !== 0) {
|
|
@@ -210,6 +229,26 @@ Output ONLY the JSON scorecard as before — no commentary.`;
|
|
|
210
229
|
if (parsed.usage)
|
|
211
230
|
scorecard.usage = parsed.usage;
|
|
212
231
|
scorecard.telemetryPosted = false;
|
|
232
|
+
// Carry scorecard-level content onto the root span for at-a-glance debugging in Dash0
|
|
233
|
+
rootSpan.setAttribute("scorecard.status", "ok");
|
|
234
|
+
rootSpan.setAttribute("scorecard.question_count", scorecard.questions?.length ?? 0);
|
|
235
|
+
if (scorecard.summary) {
|
|
236
|
+
rootSpan.setAttribute("scorecard.summary", scorecard.summary.slice(0, 500));
|
|
237
|
+
}
|
|
238
|
+
if (scorecard.usage) {
|
|
239
|
+
rootSpan.setAttribute("scorecard.cost_usd", scorecard.usage.costUsd);
|
|
240
|
+
rootSpan.setAttribute("scorecard.duration_ms", scorecard.usage.durationMs);
|
|
241
|
+
rootSpan.setAttribute("scorecard.input_tokens", scorecard.usage.inputTokens);
|
|
242
|
+
rootSpan.setAttribute("scorecard.output_tokens", scorecard.usage.outputTokens);
|
|
243
|
+
}
|
|
244
|
+
const answerCounts = { yes: 0, no: 0, partial: 0 };
|
|
245
|
+
for (const q of scorecard.questions ?? []) {
|
|
246
|
+
if (q.answer in answerCounts)
|
|
247
|
+
answerCounts[q.answer]++;
|
|
248
|
+
}
|
|
249
|
+
rootSpan.setAttribute("scorecard.answers.yes", answerCounts.yes);
|
|
250
|
+
rootSpan.setAttribute("scorecard.answers.no", answerCounts.no);
|
|
251
|
+
rootSpan.setAttribute("scorecard.answers.partial", answerCounts.partial);
|
|
213
252
|
await withSpan(tracer, "eval.update_session", undefined, () => {
|
|
214
253
|
const newSession = {
|
|
215
254
|
sessionId: parsed.sessionId ?? session?.sessionId ?? "unknown",
|
|
@@ -222,17 +261,28 @@ Output ONLY the JSON scorecard as before — no commentary.`;
|
|
|
222
261
|
await withSpan(tracer, "eval.write_scorecard", undefined, async () => {
|
|
223
262
|
await logWriter.append(scorecard);
|
|
224
263
|
ingestScorecard(opts.projectRoot, scorecard);
|
|
264
|
+
logEvalContent("scorecard", JSON.stringify(scorecard), {
|
|
265
|
+
"scorecard.question_count": scorecard.questions?.length ?? 0,
|
|
266
|
+
"scorecard.summary_length": scorecard.summary?.length ?? 0,
|
|
267
|
+
});
|
|
225
268
|
});
|
|
226
269
|
return scorecard;
|
|
227
270
|
}
|
|
228
271
|
catch (err) {
|
|
272
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
273
|
+
const stack = err instanceof Error ? (err.stack ?? "") : "";
|
|
274
|
+
rootSpan.setAttribute("scorecard.status", "error");
|
|
275
|
+
rootSpan.setAttribute("error.message", msg.slice(0, 500));
|
|
276
|
+
logEvalContent("error", stack || msg, {
|
|
277
|
+
"error.message": msg.slice(0, 500),
|
|
278
|
+
});
|
|
229
279
|
const errorEntry = {
|
|
230
280
|
version: 1,
|
|
231
281
|
timestamp: new Date().toISOString(),
|
|
232
282
|
mode: opts.mode,
|
|
233
283
|
changeId: opts.changeId,
|
|
234
284
|
error: true,
|
|
235
|
-
message:
|
|
285
|
+
message: msg,
|
|
236
286
|
};
|
|
237
287
|
await logWriter.append(errorEntry);
|
|
238
288
|
return errorEntry;
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@infinitedusky/indusk-mcp",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.19.0",
|
|
4
4
|
"description": "InDusk development system — skills, MCP tools, and CLI for structured AI-assisted development",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"files": [
|
|
@@ -29,8 +29,11 @@
|
|
|
29
29
|
"dependencies": {
|
|
30
30
|
"@modelcontextprotocol/sdk": "^1.12.1",
|
|
31
31
|
"@opentelemetry/api": "^1.9.0",
|
|
32
|
+
"@opentelemetry/api-logs": "^0.214.0",
|
|
33
|
+
"@opentelemetry/exporter-logs-otlp-http": "^0.214.0",
|
|
32
34
|
"@opentelemetry/exporter-trace-otlp-http": "^0.214.0",
|
|
33
35
|
"@opentelemetry/resources": "^2.6.0",
|
|
36
|
+
"@opentelemetry/sdk-logs": "^0.214.0",
|
|
34
37
|
"@opentelemetry/sdk-trace-base": "^2.6.0",
|
|
35
38
|
"@opentelemetry/sdk-trace-node": "^2.6.0",
|
|
36
39
|
"@opentelemetry/semantic-conventions": "^1.40.0",
|