npm - @cat-factory/executor-harness - Versions diffs - 1.31.10 → 1.32.0 - Mend

@cat-factory/executor-harness 1.31.10 → 1.32.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

package/src/job.ts CHANGED Viewed

@@ -1,4 +1,4 @@
-import type { PiRunStats } from './pi.js'
+import type { HarnessCallMetric, PiRunStats } from './pi.js'
 import type { HarnessKind } from './pi-workspace.js'
 import type { FailureCause } from './failure.js'
@@ -146,7 +146,10 @@ function parseHarnessAuth(o: Record<string, unknown>): HarnessAuthFields {
  * `..` segment) — the agent's cwd is built from this, so a hostile value must never
  * point outside the cloned repo.
  */
-function sanitizeServiceDirectory(value: unknown): string | undefined {
+function sanitizeServiceDirectory(
+  value: unknown,
+  field = 'repo.serviceDirectory',
+): string | undefined {
   if (typeof value !== 'string') return undefined
   const normalized = value
     .trim()
@@ -156,7 +159,7 @@ function sanitizeServiceDirectory(value: unknown): string | undefined {
   const segments = normalized.split('/').filter((s) => s !== '' && s !== '.')
   if (segments.length === 0) return undefined
   if (segments.some((s) => s === '..')) {
-    throw new Error("Invalid job: 'repo.serviceDirectory' must be a path inside the repo")
+    throw new Error(`Invalid job: '${field}' must be a path inside the repo`)
   }
   return segments.join('/')
 }
@@ -290,6 +293,12 @@ export interface ServiceInfraSpec {
  */
 export interface FrontendInfraSpec {
   kind: 'frontend'
+  /**
+   * The frontend app's subdirectory within the checkout (a monorepo frontend). Absent ⇒ the
+   * checkout root. When set, install/build/serve run there and `outputDir`/`wiremockMappingsPath`
+   * are resolved relative to it.
+   */
+  directory?: string
   /** Package manager for install/build. Default `pnpm`. */
   packageManager?: 'pnpm' | 'npm' | 'yarn'
   /** Explicit install command, overriding the one derived from `packageManager`. */
@@ -520,6 +529,12 @@ export interface AgentResult {
    */
   failureCause?: FailureCause
   usage?: { inputTokens: number; outputTokens: number }
+  /**
+   * Per-model-call telemetry from a subscription harness's CLI stream (absent for the
+   * proxy-metered Pi harness). The backend records these into `llm_call_metrics`. See
+   * {@link HarnessCallMetric}.
+   */
+  callMetrics?: HarnessCallMetric[]
 }
 /** Parse the coding-mode bootstrap spec, or undefined when absent. Validates the target. */
@@ -659,8 +674,13 @@ function parseFrontendInfraSpec(o: Record<string, unknown>): FrontendInfraSpec {
   }
   const servePort = port(o.servePort)
   const wiremockPort = port(o.wiremockPort)
+  // The app's monorepo subdirectory becomes the install/build/serve cwd, so it goes through the
+  // same escape-guard as `repo.serviceDirectory` — strip slashes and reject any `..` segment so a
+  // hostile value can't point the stand-up outside the cloned repo.
+  const directory = sanitizeServiceDirectory(o.directory, 'frontend.directory')
   return {
     kind: 'frontend',
+    ...(directory ? { directory } : {}),
     ...(packageManager ? { packageManager } : {}),
     ...(typeof o.install === 'string' && o.install ? { install: o.install } : {}),
     ...(typeof o.buildScript === 'string' && o.buildScript ? { buildScript: o.buildScript } : {}),

package/src/pi.ts CHANGED Viewed

@@ -414,6 +414,38 @@ export interface RunDiagnostics {
   finalAnswerEmpty: boolean
 }
+/**
+ * One model call captured from a subscription harness's CLI event stream, shaped so
+ * the backend can record it into the same `llm_call_metrics` telemetry the LLM proxy
+ * writes for the Pi harness. The subscription harnesses (Claude Code / Codex) talk
+ * DIRECT to the vendor and never touch the proxy, so this is the only place their
+ * per-call bodies are observable. Claude Code's `stream-json --verbose` is a near-
+ * verbatim Anthropic Messages stream, so its calls carry full request/response
+ * bodies; Codex's `exec --json` only surfaces flat assistant text + per-turn tokens,
+ * so its rows are honestly thinner (no request transcript, no tool/command bodies).
+ */
+export interface HarnessCallMetric {
+  /** The vendor model that served this call (from the CLI event), when reported. */
+  model?: string
+  /**
+   * The full request as an OpenAI-style chat array (`[{role, content}, …]`),
+   * JSON-stringified — the growing history as of this call. Matches the proxy's
+   * `promptText` shape so the telemetry chain delta-compresses + renders identically.
+   */
+  promptText: string
+  /** Number of messages encoded in {@link promptText} (the telemetry chain messageCount). */
+  messageCount: number
+  /** The assistant's response text, as a plain string (`''` for a tool-only turn). */
+  responseText: string
+  /** The reasoning/thinking trace, as a plain string (`''` when none). */
+  reasoningText: string
+  inputTokens: number
+  cachedInputTokens: number
+  outputTokens: number
+  /** The provider finish/stop reason when the CLI reports one (else null). */
+  finishReason: string | null
+}
 /** Pi's assistant summary plus {@link PiRunStats} describing what it did. */
 export interface PiRunOutcome {
   summary: string
@@ -432,6 +464,14 @@ export interface PiRunOutcome {
    * (usage-aware rotation) and telemetry. Absent for the proxy-metered Pi harness.
    */
   usage?: { inputTokens: number; outputTokens: number }
+  /**
+   * Per-model-call telemetry lifted from a subscription harness's CLI event stream
+   * (Claude Code / Codex), which the backend records into `llm_call_metrics` — the
+   * proxy-bypassing analogue of the per-call rows the LLM proxy writes for Pi. Absent
+   * for the proxy-metered Pi harness (the proxy is its metering point). See
+   * {@link HarnessCallMetric}.
+   */
+  callMetrics?: HarnessCallMetric[]
   /** Output-quality signals (truncation / empty final answer); see {@link RunDiagnostics}. */
   diagnostics?: RunDiagnostics
 }