@poncho-ai/harness 0.51.1 → 0.52.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,5 @@
1
1
 
2
- > @poncho-ai/harness@0.51.1 build /home/runner/work/poncho-ai/poncho-ai/packages/harness
2
+ > @poncho-ai/harness@0.52.1 build /home/runner/work/poncho-ai/poncho-ai/packages/harness
3
3
  > node scripts/embed-docs.js && tsup src/index.ts --format esm --dts
4
4
 
5
5
  [embed-docs] Generated poncho-docs.ts with 4 topics
@@ -8,9 +8,9 @@
8
8
  CLI tsup v8.5.1
9
9
  CLI Target: es2022
10
10
  ESM Build start
11
- ESM dist/index.js 535.75 KB
11
+ ESM dist/index.js 536.24 KB
12
12
  ESM dist/isolate-F2PPSUL6.js 53.82 KB
13
- ESM ⚡️ Build success in 234ms
13
+ ESM ⚡️ Build success in 235ms
14
14
  DTS Build start
15
- DTS ⚡️ Build success in 7701ms
16
- DTS dist/index.d.ts 91.75 KB
15
+ DTS ⚡️ Build success in 8126ms
16
+ DTS dist/index.d.ts 92.40 KB
package/CHANGELOG.md CHANGED
@@ -1,5 +1,24 @@
1
1
  # @poncho-ai/harness
2
2
 
3
+ ## 0.52.1
4
+
5
+ ### Patch Changes
6
+
7
+ - [`0e8fff1`](https://github.com/cesr/poncho-ai/commit/0e8fff12aed9d5efe1821ed3560ead48a16113c1) Thanks [@cesr](https://github.com/cesr)! - Only send `temperature` to the model when the agent explicitly sets one. The harness previously defaulted to `temperature: 0.2` and always passed it to `streamText`, which returns a 400 ("`temperature` is deprecated for this model") on models that removed sampling params (Fable 5, Opus 4.7+). `temperature` is now omitted from the request when undefined — the same treatment `maxTokens` already had — and `defaultAgentDefinition` no longer hard-codes a `temperature` line into the generated frontmatter (pass `temperature` explicitly to set one).
8
+
9
+ ## 0.52.0
10
+
11
+ ### Minor Changes
12
+
13
+ - [`d8453b4`](https://github.com/cesr/poncho-ai/commit/d8453b4f2360a1734e448960fe52f6c450cdf842) Thanks [@cesr](https://github.com/cesr)! - harness: propagate `suppressTelemetry` to subagents.
14
+
15
+ A telemetry-off run (e.g. incognito) now suppresses telemetry for the subagents it spawns too, not just the parent turn. The parent run's `suppressTelemetry` is exposed on `ToolContext`, captured by `spawn_subagent` into the new `SubagentManager.spawn({ suppressTelemetry })` option, stored on the subagent conversation's `subagentMeta`, and read back by the orchestrator's `runSubagent` / continuation so the child run (and its re-runs) emit no `invoke_agent` / `execute_tool` / AI-SDK spans.
16
+
17
+ ### Patch Changes
18
+
19
+ - Updated dependencies [[`d8453b4`](https://github.com/cesr/poncho-ai/commit/d8453b4f2360a1734e448960fe52f6c450cdf842)]:
20
+ - @poncho-ai/sdk@1.14.0
21
+
3
22
  ## 0.51.1
4
23
 
5
24
  ### Patch Changes
package/dist/index.d.ts CHANGED
@@ -195,6 +195,10 @@ interface Conversation {
195
195
  status: "running" | "completed" | "error" | "stopped";
196
196
  result?: _poncho_ai_sdk.RunResult;
197
197
  error?: _poncho_ai_sdk.AgentFailure;
198
+ /** Inherited from the parent run at spawn time — when true, this
199
+ * subagent's runs emit no telemetry (e.g. spawned from an incognito
200
+ * turn). Read by the orchestrator's runSubagent / continuation. */
201
+ suppressTelemetry?: boolean;
198
202
  };
199
203
  channelMeta?: {
200
204
  platform: string;
@@ -733,7 +737,12 @@ interface DefaultAgentDefinitionOptions {
733
737
  modelProvider?: "anthropic" | "openai" | "openai-codex";
734
738
  /** Model name. Default: "claude-opus-4-5". */
735
739
  modelName?: string;
736
- /** Sampling temperature. Default: 0.2. */
740
+ /**
741
+ * Sampling temperature. When unset, it is omitted from the generated
742
+ * frontmatter entirely and the harness sends no temperature (provider
743
+ * default). Newer models (Fable 5, Opus 4.7+) reject `temperature` — leave
744
+ * this unset for them.
745
+ */
737
746
  temperature?: number;
738
747
  /** Max tool-call steps per run. Default: 20. */
739
748
  maxSteps?: number;
@@ -1178,6 +1187,9 @@ interface SubagentManager {
1178
1187
  parentConversationId: string;
1179
1188
  ownerId: string;
1180
1189
  tenantId?: string | null;
1190
+ /** Inherit the parent run's telemetry choice — when true, the subagent
1191
+ * run (and its re-runs) emit no telemetry. */
1192
+ suppressTelemetry?: boolean;
1181
1193
  }): Promise<SubagentSpawnResult>;
1182
1194
  sendMessage(subagentId: string, message: string): Promise<SubagentSpawnResult>;
1183
1195
  stop(subagentId: string): Promise<void>;
package/dist/index.js CHANGED
@@ -588,7 +588,8 @@ var defaultAgentDefinition = (opts = {}) => {
588
588
  const description = opts.description ?? DEFAULT_AGENT_DESCRIPTION;
589
589
  const modelProvider = opts.modelProvider ?? DEFAULT_MODEL_PROVIDER;
590
590
  const modelName = opts.modelName ?? DEFAULT_MODEL_NAME;
591
- const temperature = opts.temperature ?? DEFAULT_TEMPERATURE;
591
+ const temperatureLine = opts.temperature !== void 0 ? `
592
+ temperature: ${opts.temperature}` : "";
592
593
  const maxSteps = opts.maxSteps ?? DEFAULT_MAX_STEPS;
593
594
  const timeout = opts.timeout ?? DEFAULT_TIMEOUT;
594
595
  return `---
@@ -597,8 +598,7 @@ id: ${id}
597
598
  description: ${description}
598
599
  model:
599
600
  provider: ${modelProvider}
600
- name: ${modelName}
601
- temperature: ${temperature}
601
+ name: ${modelName}${temperatureLine}
602
602
  limits:
603
603
  maxSteps: ${maxSteps}
604
604
  timeout: ${timeout}
@@ -8299,7 +8299,8 @@ var createSubagentTools = (manager) => [
8299
8299
  task: task.trim(),
8300
8300
  parentConversationId: conversationId,
8301
8301
  ownerId,
8302
- tenantId: context.tenantId
8302
+ tenantId: context.tenantId,
8303
+ suppressTelemetry: context.suppressTelemetry
8303
8304
  });
8304
8305
  return { subagentId, status: "running" };
8305
8306
  }
@@ -10831,7 +10832,7 @@ ${textContent}` };
10831
10832
  cachedCoreMessages = [...cachedCoreMessages, ...newCoreMessages];
10832
10833
  convertedUpTo = messages.length;
10833
10834
  const coreMessages = cachedCoreMessages;
10834
- const temperature = agent.frontmatter.model?.temperature ?? 0.2;
10835
+ const temperature = agent.frontmatter.model?.temperature;
10835
10836
  const maxTokens = agent.frontmatter.model?.maxTokens;
10836
10837
  const cachedMessages = skipTailCache ? coreMessages : addPromptCacheBreakpoints(
10837
10838
  coreMessages,
@@ -10859,7 +10860,7 @@ ${textContent}` };
10859
10860
  ...useStaticCache ? {} : { system: systemPrompt },
10860
10861
  messages: messagesForStep,
10861
10862
  tools: toolsForStep,
10862
- temperature,
10863
+ ...typeof temperature === "number" ? { temperature } : {},
10863
10864
  abortSignal: input.abortSignal,
10864
10865
  ...typeof maxTokens === "number" ? { maxTokens } : {},
10865
10866
  experimental_telemetry: {
@@ -11132,6 +11133,7 @@ ${textContent}` };
11132
11133
  abortSignal: input.abortSignal,
11133
11134
  conversationId: input.conversationId,
11134
11135
  tenantId: input.tenantId,
11136
+ suppressTelemetry: input.suppressTelemetry,
11135
11137
  vfs: this.bashManager ? this.createVfsAccess(input.tenantId ?? "__default__") : void 0
11136
11138
  };
11137
11139
  const toolResultsForModel = [];
@@ -12903,7 +12905,9 @@ var AgentOrchestrator = class {
12903
12905
  __ownerId: ownerId
12904
12906
  }, conversation),
12905
12907
  messages: harnessMessages,
12906
- abortSignal: childAbortController.signal
12908
+ abortSignal: childAbortController.signal,
12909
+ // Inherit the parent run's telemetry choice (e.g. incognito).
12910
+ suppressTelemetry: conversation.subagentMeta?.suppressTelemetry
12907
12911
  })) {
12908
12912
  if (event.type === "run:started") {
12909
12913
  latestRunId = event.runId;
@@ -13357,7 +13361,9 @@ ${resultBody}`,
13357
13361
  __ownerId: ownerId
13358
13362
  }, conversation),
13359
13363
  messages: continuationMessages,
13360
- abortSignal: childAbortController.signal
13364
+ abortSignal: childAbortController.signal,
13365
+ // Inherit the parent run's telemetry choice (e.g. incognito).
13366
+ suppressTelemetry: conversation.subagentMeta?.suppressTelemetry
13361
13367
  })) {
13362
13368
  if (event.type === "run:started") {
13363
13369
  const active = this.activeConversationRuns.get(conversationId);
@@ -13527,7 +13533,7 @@ ${resultBody}`,
13527
13533
  opts.tenantId ?? null,
13528
13534
  {
13529
13535
  parentConversationId: opts.parentConversationId,
13530
- subagentMeta: { task: opts.task, status: "running" },
13536
+ subagentMeta: { task: opts.task, status: "running", suppressTelemetry: opts.suppressTelemetry },
13531
13537
  messages: [{ role: "user", content: opts.task }]
13532
13538
  }
13533
13539
  );
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@poncho-ai/harness",
3
- "version": "0.51.1",
3
+ "version": "0.52.1",
4
4
  "description": "Agent execution runtime - conversation loop, tool dispatch, streaming",
5
5
  "repository": {
6
6
  "type": "git",
@@ -34,7 +34,7 @@
34
34
  "mustache": "^4.2.0",
35
35
  "yaml": "^2.4.0",
36
36
  "zod": "^3.22.0",
37
- "@poncho-ai/sdk": "1.13.0"
37
+ "@poncho-ai/sdk": "1.14.0"
38
38
  },
39
39
  "peerDependencies": {
40
40
  "esbuild": ">=0.17.0",
@@ -26,7 +26,12 @@ export interface DefaultAgentDefinitionOptions {
26
26
  modelProvider?: "anthropic" | "openai" | "openai-codex";
27
27
  /** Model name. Default: "claude-opus-4-5". */
28
28
  modelName?: string;
29
- /** Sampling temperature. Default: 0.2. */
29
+ /**
30
+ * Sampling temperature. When unset, it is omitted from the generated
31
+ * frontmatter entirely and the harness sends no temperature (provider
32
+ * default). Newer models (Fable 5, Opus 4.7+) reject `temperature` — leave
33
+ * this unset for them.
34
+ */
30
35
  temperature?: number;
31
36
  /** Max tool-call steps per run. Default: 20. */
32
37
  maxSteps?: number;
@@ -55,7 +60,10 @@ export const defaultAgentDefinition = (
55
60
  const description = opts.description ?? DEFAULT_AGENT_DESCRIPTION;
56
61
  const modelProvider = opts.modelProvider ?? DEFAULT_MODEL_PROVIDER;
57
62
  const modelName = opts.modelName ?? DEFAULT_MODEL_NAME;
58
- const temperature = opts.temperature ?? DEFAULT_TEMPERATURE;
63
+ // Opt-in: only emit a `temperature:` line when explicitly provided, so the
64
+ // harness sends no temperature otherwise (newer models reject it).
65
+ const temperatureLine =
66
+ opts.temperature !== undefined ? `\n temperature: ${opts.temperature}` : "";
59
67
  const maxSteps = opts.maxSteps ?? DEFAULT_MAX_STEPS;
60
68
  const timeout = opts.timeout ?? DEFAULT_TIMEOUT;
61
69
 
@@ -65,8 +73,7 @@ id: ${id}
65
73
  description: ${description}
66
74
  model:
67
75
  provider: ${modelProvider}
68
- name: ${modelName}
69
- temperature: ${temperature}
76
+ name: ${modelName}${temperatureLine}
70
77
  limits:
71
78
  maxSteps: ${maxSteps}
72
79
  timeout: ${timeout}
package/src/harness.ts CHANGED
@@ -2907,7 +2907,12 @@ Code is wrapped in an async IIFE — use \`return\` to return a value to the too
2907
2907
  convertedUpTo = messages.length;
2908
2908
  const coreMessages = cachedCoreMessages;
2909
2909
 
2910
- const temperature = agent.frontmatter.model?.temperature ?? 0.2;
2910
+ // Only send temperature when the agent explicitly set one. Newer
2911
+ // models (Fable 5, Opus 4.7+) removed sampling params entirely and
2912
+ // return a 400 ("`temperature` is deprecated for this model") on any
2913
+ // value — forcing a default here broke them. Treated like maxTokens
2914
+ // below: omitted from the request when undefined.
2915
+ const temperature = agent.frontmatter.model?.temperature;
2911
2916
  const maxTokens = agent.frontmatter.model?.maxTokens;
2912
2917
  // Place the tail breakpoint before any untruncated tool-result so
2913
2918
  // we cache only the stable prefix when prior-run tool results are
@@ -2971,7 +2976,7 @@ Code is wrapped in an async IIFE — use \`return\` to return a value to the too
2971
2976
  ...(useStaticCache ? {} : { system: systemPrompt }),
2972
2977
  messages: messagesForStep,
2973
2978
  tools: toolsForStep,
2974
- temperature,
2979
+ ...(typeof temperature === "number" ? { temperature } : {}),
2975
2980
  abortSignal: input.abortSignal,
2976
2981
  ...(typeof maxTokens === "number" ? { maxTokens } : {}),
2977
2982
  experimental_telemetry: {
@@ -3285,6 +3290,7 @@ Code is wrapped in an async IIFE — use \`return\` to return a value to the too
3285
3290
  abortSignal: input.abortSignal,
3286
3291
  conversationId: input.conversationId,
3287
3292
  tenantId: input.tenantId,
3293
+ suppressTelemetry: input.suppressTelemetry,
3288
3294
  vfs: this.bashManager
3289
3295
  ? this.createVfsAccess(input.tenantId ?? "__default__")
3290
3296
  : undefined,
@@ -825,6 +825,8 @@ export class AgentOrchestrator {
825
825
  }, conversation),
826
826
  messages: harnessMessages,
827
827
  abortSignal: childAbortController.signal,
828
+ // Inherit the parent run's telemetry choice (e.g. incognito).
829
+ suppressTelemetry: conversation.subagentMeta?.suppressTelemetry,
828
830
  })) {
829
831
  if (event.type === "run:started") {
830
832
  latestRunId = event.runId;
@@ -1350,6 +1352,8 @@ export class AgentOrchestrator {
1350
1352
  }, conversation),
1351
1353
  messages: continuationMessages,
1352
1354
  abortSignal: childAbortController.signal,
1355
+ // Inherit the parent run's telemetry choice (e.g. incognito).
1356
+ suppressTelemetry: conversation.subagentMeta?.suppressTelemetry,
1353
1357
  })) {
1354
1358
  if (event.type === "run:started") {
1355
1359
  const active = this.activeConversationRuns.get(conversationId);
@@ -1530,7 +1534,7 @@ export class AgentOrchestrator {
1530
1534
  opts.tenantId ?? null,
1531
1535
  {
1532
1536
  parentConversationId: opts.parentConversationId,
1533
- subagentMeta: { task: opts.task, status: "running" },
1537
+ subagentMeta: { task: opts.task, status: "running", suppressTelemetry: opts.suppressTelemetry },
1534
1538
  messages: [{ role: "user", content: opts.task }],
1535
1539
  },
1536
1540
  );
package/src/state.ts CHANGED
@@ -75,6 +75,10 @@ export interface Conversation {
75
75
  status: "running" | "completed" | "error" | "stopped";
76
76
  result?: import("@poncho-ai/sdk").RunResult;
77
77
  error?: import("@poncho-ai/sdk").AgentFailure;
78
+ /** Inherited from the parent run at spawn time — when true, this
79
+ * subagent's runs emit no telemetry (e.g. spawned from an incognito
80
+ * turn). Read by the orchestrator's runSubagent / continuation. */
81
+ suppressTelemetry?: boolean;
78
82
  };
79
83
  channelMeta?: {
80
84
  platform: string;
@@ -37,6 +37,9 @@ export interface SubagentManager {
37
37
  parentConversationId: string;
38
38
  ownerId: string;
39
39
  tenantId?: string | null;
40
+ /** Inherit the parent run's telemetry choice — when true, the subagent
41
+ * run (and its re-runs) emit no telemetry. */
42
+ suppressTelemetry?: boolean;
40
43
  }): Promise<SubagentSpawnResult>;
41
44
 
42
45
  sendMessage(subagentId: string, message: string): Promise<SubagentSpawnResult>;
@@ -45,6 +45,7 @@ export const createSubagentTools = (
45
45
  parentConversationId: conversationId,
46
46
  ownerId,
47
47
  tenantId: context.tenantId,
48
+ suppressTelemetry: context.suppressTelemetry,
48
49
  });
49
50
  return { subagentId, status: "running" };
50
51
  },