@cat-factory/executor-harness 1.31.10 → 1.32.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/job.ts CHANGED
@@ -1,4 +1,4 @@
1
- import type { PiRunStats } from './pi.js'
1
+ import type { HarnessCallMetric, PiRunStats } from './pi.js'
2
2
  import type { HarnessKind } from './pi-workspace.js'
3
3
  import type { FailureCause } from './failure.js'
4
4
 
@@ -146,7 +146,10 @@ function parseHarnessAuth(o: Record<string, unknown>): HarnessAuthFields {
146
146
  * `..` segment) — the agent's cwd is built from this, so a hostile value must never
147
147
  * point outside the cloned repo.
148
148
  */
149
- function sanitizeServiceDirectory(value: unknown): string | undefined {
149
+ function sanitizeServiceDirectory(
150
+ value: unknown,
151
+ field = 'repo.serviceDirectory',
152
+ ): string | undefined {
150
153
  if (typeof value !== 'string') return undefined
151
154
  const normalized = value
152
155
  .trim()
@@ -156,7 +159,7 @@ function sanitizeServiceDirectory(value: unknown): string | undefined {
156
159
  const segments = normalized.split('/').filter((s) => s !== '' && s !== '.')
157
160
  if (segments.length === 0) return undefined
158
161
  if (segments.some((s) => s === '..')) {
159
- throw new Error("Invalid job: 'repo.serviceDirectory' must be a path inside the repo")
162
+ throw new Error(`Invalid job: '${field}' must be a path inside the repo`)
160
163
  }
161
164
  return segments.join('/')
162
165
  }
@@ -290,6 +293,12 @@ export interface ServiceInfraSpec {
290
293
  */
291
294
  export interface FrontendInfraSpec {
292
295
  kind: 'frontend'
296
+ /**
297
+ * The frontend app's subdirectory within the checkout (a monorepo frontend). Absent ⇒ the
298
+ * checkout root. When set, install/build/serve run there and `outputDir`/`wiremockMappingsPath`
299
+ * are resolved relative to it.
300
+ */
301
+ directory?: string
293
302
  /** Package manager for install/build. Default `pnpm`. */
294
303
  packageManager?: 'pnpm' | 'npm' | 'yarn'
295
304
  /** Explicit install command, overriding the one derived from `packageManager`. */
@@ -520,6 +529,12 @@ export interface AgentResult {
520
529
  */
521
530
  failureCause?: FailureCause
522
531
  usage?: { inputTokens: number; outputTokens: number }
532
+ /**
533
+ * Per-model-call telemetry from a subscription harness's CLI stream (absent for the
534
+ * proxy-metered Pi harness). The backend records these into `llm_call_metrics`. See
535
+ * {@link HarnessCallMetric}.
536
+ */
537
+ callMetrics?: HarnessCallMetric[]
523
538
  }
524
539
 
525
540
  /** Parse the coding-mode bootstrap spec, or undefined when absent. Validates the target. */
@@ -659,8 +674,13 @@ function parseFrontendInfraSpec(o: Record<string, unknown>): FrontendInfraSpec {
659
674
  }
660
675
  const servePort = port(o.servePort)
661
676
  const wiremockPort = port(o.wiremockPort)
677
+ // The app's monorepo subdirectory becomes the install/build/serve cwd, so it goes through the
678
+ // same escape-guard as `repo.serviceDirectory` — strip slashes and reject any `..` segment so a
679
+ // hostile value can't point the stand-up outside the cloned repo.
680
+ const directory = sanitizeServiceDirectory(o.directory, 'frontend.directory')
662
681
  return {
663
682
  kind: 'frontend',
683
+ ...(directory ? { directory } : {}),
664
684
  ...(packageManager ? { packageManager } : {}),
665
685
  ...(typeof o.install === 'string' && o.install ? { install: o.install } : {}),
666
686
  ...(typeof o.buildScript === 'string' && o.buildScript ? { buildScript: o.buildScript } : {}),
package/src/pi.ts CHANGED
@@ -414,6 +414,38 @@ export interface RunDiagnostics {
414
414
  finalAnswerEmpty: boolean
415
415
  }
416
416
 
417
+ /**
418
+ * One model call captured from a subscription harness's CLI event stream, shaped so
419
+ * the backend can record it into the same `llm_call_metrics` telemetry the LLM proxy
420
+ * writes for the Pi harness. The subscription harnesses (Claude Code / Codex) talk
421
+ * DIRECT to the vendor and never touch the proxy, so this is the only place their
422
+ * per-call bodies are observable. Claude Code's `stream-json --verbose` is a near-
423
+ * verbatim Anthropic Messages stream, so its calls carry full request/response
424
+ * bodies; Codex's `exec --json` only surfaces flat assistant text + per-turn tokens,
425
+ * so its rows are honestly thinner (no request transcript, no tool/command bodies).
426
+ */
427
+ export interface HarnessCallMetric {
428
+ /** The vendor model that served this call (from the CLI event), when reported. */
429
+ model?: string
430
+ /**
431
+ * The full request as an OpenAI-style chat array (`[{role, content}, …]`),
432
+ * JSON-stringified — the growing history as of this call. Matches the proxy's
433
+ * `promptText` shape so the telemetry chain delta-compresses + renders identically.
434
+ */
435
+ promptText: string
436
+ /** Number of messages encoded in {@link promptText} (the telemetry chain messageCount). */
437
+ messageCount: number
438
+ /** The assistant's response text, as a plain string (`''` for a tool-only turn). */
439
+ responseText: string
440
+ /** The reasoning/thinking trace, as a plain string (`''` when none). */
441
+ reasoningText: string
442
+ inputTokens: number
443
+ cachedInputTokens: number
444
+ outputTokens: number
445
+ /** The provider finish/stop reason when the CLI reports one (else null). */
446
+ finishReason: string | null
447
+ }
448
+
417
449
  /** Pi's assistant summary plus {@link PiRunStats} describing what it did. */
418
450
  export interface PiRunOutcome {
419
451
  summary: string
@@ -432,6 +464,14 @@ export interface PiRunOutcome {
432
464
  * (usage-aware rotation) and telemetry. Absent for the proxy-metered Pi harness.
433
465
  */
434
466
  usage?: { inputTokens: number; outputTokens: number }
467
+ /**
468
+ * Per-model-call telemetry lifted from a subscription harness's CLI event stream
469
+ * (Claude Code / Codex), which the backend records into `llm_call_metrics` — the
470
+ * proxy-bypassing analogue of the per-call rows the LLM proxy writes for Pi. Absent
471
+ * for the proxy-metered Pi harness (the proxy is its metering point). See
472
+ * {@link HarnessCallMetric}.
473
+ */
474
+ callMetrics?: HarnessCallMetric[]
435
475
  /** Output-quality signals (truncation / empty final answer); see {@link RunDiagnostics}. */
436
476
  diagnostics?: RunDiagnostics
437
477
  }