@geravant/sinain 1.8.0 → 1.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -18,6 +18,7 @@ import json
18
18
  import logging
19
19
  import os
20
20
  import time
21
+ import uuid
21
22
  from abc import ABC, abstractmethod
22
23
  from typing import TYPE_CHECKING, Optional
23
24
 
@@ -27,14 +28,23 @@ if TYPE_CHECKING:
27
28
  logger = logging.getLogger("sinain.vision")
28
29
 
29
30
 
31
+ class VisionResult:
32
+ """Result of a vision call: text + optional cost info."""
33
+ __slots__ = ("text", "cost")
34
+
35
+ def __init__(self, text: Optional[str], cost: Optional[dict] = None):
36
+ self.text = text
37
+ self.cost = cost # {cost, tokens_in, tokens_out, model, cost_id}
38
+
39
+
30
40
  class VisionProvider(ABC):
31
41
  """Abstract base for vision inference backends."""
32
42
 
33
43
  name: str = "unknown"
34
44
 
35
45
  @abstractmethod
36
- def describe(self, image: "Image.Image", prompt: Optional[str] = None) -> Optional[str]:
37
- """Describe image content. Returns None on failure."""
46
+ def describe(self, image: "Image.Image", prompt: Optional[str] = None) -> VisionResult:
47
+ """Describe image content. Returns VisionResult (text may be None on failure)."""
38
48
  ...
39
49
 
40
50
  @abstractmethod
@@ -53,8 +63,8 @@ class OllamaVisionProvider(VisionProvider):
53
63
  timeout=timeout, max_tokens=max_tokens)
54
64
  self.name = f"ollama ({model})"
55
65
 
56
- def describe(self, image: "Image.Image", prompt: Optional[str] = None) -> Optional[str]:
57
- return self._client.describe(image, prompt)
66
+ def describe(self, image: "Image.Image", prompt: Optional[str] = None) -> VisionResult:
67
+ return VisionResult(self._client.describe(image, prompt))
58
68
 
59
69
  def is_available(self) -> bool:
60
70
  return self._client.is_available()
@@ -73,9 +83,9 @@ class OpenRouterVisionProvider(VisionProvider):
73
83
  self._max_tokens = max_tokens
74
84
  self.name = f"openrouter ({model})"
75
85
 
76
- def describe(self, image: "Image.Image", prompt: Optional[str] = None) -> Optional[str]:
86
+ def describe(self, image: "Image.Image", prompt: Optional[str] = None) -> VisionResult:
77
87
  if not self._api_key:
78
- return None
88
+ return VisionResult(None)
79
89
 
80
90
  try:
81
91
  import requests
@@ -83,7 +93,7 @@ class OpenRouterVisionProvider(VisionProvider):
83
93
  # Encode image
84
94
  img_b64 = self._encode(image)
85
95
  if not img_b64:
86
- return None
96
+ return VisionResult(None)
87
97
 
88
98
  prompt_text = prompt or "Describe what's on this screen concisely (2-3 sentences)."
89
99
 
@@ -112,13 +122,23 @@ class OpenRouterVisionProvider(VisionProvider):
112
122
  resp.raise_for_status()
113
123
  data = resp.json()
114
124
  content = data["choices"][0]["message"]["content"].strip()
115
- logger.debug("openrouter vision: model=%s tokens=%s",
116
- self._model, data.get("usage", {}).get("total_tokens", "?"))
117
- return content if content else None
125
+ usage = data.get("usage", {})
126
+ logger.debug("openrouter vision: model=%s tokens=%s cost=%s",
127
+ self._model, usage.get("total_tokens", "?"), usage.get("cost", "?"))
128
+ cost_info = None
129
+ if usage.get("cost") is not None:
130
+ cost_info = {
131
+ "cost": usage["cost"],
132
+ "tokens_in": usage.get("prompt_tokens", 0),
133
+ "tokens_out": usage.get("completion_tokens", 0),
134
+ "model": self._model,
135
+ "cost_id": uuid.uuid4().hex[:16],
136
+ }
137
+ return VisionResult(content if content else None, cost_info)
118
138
 
119
139
  except Exception as e:
120
140
  logger.debug("openrouter vision failed: %s", e)
121
- return None
141
+ return VisionResult(None)
122
142
 
123
143
  def is_available(self) -> bool:
124
144
  return bool(self._api_key)
@@ -0,0 +1,23 @@
1
+ # sinain-agent configuration
2
+ # Copy to .env and customize: cp .env.example .env
3
+
4
+ # ── Agent ──
5
+ SINAIN_AGENT=claude # claude | codex | junie | goose | aider | <custom command>
6
+ # MCP agents (claude, codex, junie, goose) call sinain tools directly
7
+ # Pipe agents (aider, custom) receive escalation text on stdin
8
+
9
+ # ── Core connection ──
10
+ SINAIN_CORE_URL=http://localhost:9500
11
+
12
+ # ── Timing ──
13
+ SINAIN_POLL_INTERVAL=5 # seconds between escalation polls
14
+ SINAIN_HEARTBEAT_INTERVAL=900 # seconds between heartbeat ticks (15 min)
15
+
16
+ # ── Workspace ──
17
+ SINAIN_WORKSPACE=~/.openclaw/workspace # knowledge files, curation scripts, playbook
18
+
19
+ # ── Tool permissions (Claude only) ──
20
+ # Tools auto-approved without prompting (space-separated).
21
+ # Default: auto-derived from MCP config server names (e.g. mcp__sinain).
22
+ # Format: mcp__<server> (all tools) | mcp__<server>__<tool> (specific) | Bash(pattern)
23
+ # SINAIN_ALLOWED_TOOLS=mcp__sinain mcp__github Bash(git *)
@@ -5,9 +5,7 @@ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
5
5
 
6
6
  # Load .env as fallback — does NOT override vars already in the environment
7
7
  # (e.g. vars set by the launcher from ~/.sinain/.env)
8
- # Load project root .env (single config for all subsystems)
9
- ENV_FILE="$SCRIPT_DIR/../.env"
10
- if [ -f "$ENV_FILE" ]; then
8
+ if [ -f "$SCRIPT_DIR/.env" ]; then
11
9
  while IFS='=' read -r key val; do
12
10
  # Skip comments and blank lines
13
11
  [[ -z "$key" || "$key" =~ ^[[:space:]]*# ]] && continue
@@ -21,7 +19,7 @@ if [ -f "$ENV_FILE" ]; then
21
19
  if [ -z "${!key+x}" ]; then
22
20
  export "$key=$val"
23
21
  fi
24
- done < "$ENV_FILE"
22
+ done < "$SCRIPT_DIR/.env"
25
23
  fi
26
24
 
27
25
  MCP_CONFIG="${MCP_CONFIG:-$SCRIPT_DIR/mcp-config.json}"
@@ -30,8 +28,6 @@ POLL_INTERVAL="${SINAIN_POLL_INTERVAL:-2}"
30
28
  HEARTBEAT_INTERVAL="${SINAIN_HEARTBEAT_INTERVAL:-900}" # 15 minutes
31
29
  AGENT="${SINAIN_AGENT:-claude}"
32
30
  WORKSPACE="${SINAIN_WORKSPACE:-$HOME/.openclaw/workspace}"
33
- AGENT_MAX_TURNS="${SINAIN_AGENT_MAX_TURNS:-5}"
34
- SPAWN_MAX_TURNS="${SINAIN_SPAWN_MAX_TURNS:-25}"
35
31
 
36
32
  # Build allowed tools list for Claude's --allowedTools flag.
37
33
  # SINAIN_ALLOWED_TOOLS in .env overrides; otherwise auto-derive from MCP config.
@@ -68,16 +64,16 @@ invoke_agent() {
68
64
  local prompt="$1"
69
65
  case "$AGENT" in
70
66
  claude)
71
- local turns="${2:-$AGENT_MAX_TURNS}"
72
67
  claude --enable-auto-mode \
73
68
  --mcp-config "$MCP_CONFIG" \
74
69
  ${ALLOWED_TOOLS:+--allowedTools $ALLOWED_TOOLS} \
75
- --max-turns "$turns" --output-format text \
70
+ --max-turns 5 --output-format text \
76
71
  -p "$prompt"
77
72
  ;;
78
73
  codex)
79
74
  codex exec -s danger-full-access \
80
75
  --dangerously-bypass-approvals-and-sandbox \
76
+ --skip-git-repo-check \
81
77
  "$prompt"
82
78
  ;;
83
79
  junie)
@@ -93,10 +89,9 @@ invoke_agent() {
93
89
  fi
94
90
  ;;
95
91
  goose)
96
- local turns="${2:-$AGENT_MAX_TURNS}"
97
92
  GOOSE_MODE=auto goose run --text "$prompt" \
98
93
  --output-format text \
99
- --max-turns "$turns"
94
+ --max-turns 10
100
95
  ;;
101
96
  aider)
102
97
  # No MCP support — signal pipe mode
@@ -271,8 +266,8 @@ while true; do
271
266
  # MCP path: agent runs task with sinain tools available
272
267
  SPAWN_PROMPT="You have a background task to complete. Task: $SPAWN_TASK
273
268
 
274
- Complete this task thoroughly. Use sinain_get_knowledge and sinain_knowledge_query if you need context from past sessions. Use web search, file operations, and code execution as needed. Create end-to-end artifacts. Summarize your findings concisely."
275
- SPAWN_RESULT=$(invoke_agent "$SPAWN_PROMPT" "$SPAWN_MAX_TURNS" || echo "ERROR: agent invocation failed")
269
+ Complete this task thoroughly. Use sinain_get_knowledge and sinain_knowledge_query if you need context from past sessions. Summarize your findings concisely."
270
+ SPAWN_RESULT=$(invoke_agent "$SPAWN_PROMPT" || echo "ERROR: agent invocation failed")
276
271
  else
277
272
  # Pipe path: agent gets task text directly
278
273
  SPAWN_RESULT=$(invoke_pipe "Background task: $SPAWN_TASK" || echo "No output")
@@ -56,12 +56,13 @@ You produce outputs as JSON.
56
56
  Respond ONLY with valid JSON. No markdown, no code fences, no explanation.
57
57
  Your entire response must be parseable by JSON.parse().
58
58
 
59
- {"hud":"...","digest":"...","record":{"command":"start"|"stop","label":"..."}}
59
+ {"hud":"...","digest":"...","record":{"command":"start"|"stop","label":"..."},"task":"..."}
60
60
 
61
61
  Output fields:
62
62
  - "hud" (required): max 60 words describing what user is doing NOW
63
63
  - "digest" (required): 5-8 sentences with detailed activity description
64
64
  - "record" (optional): control recording — {"command":"start","label":"Meeting name"} or {"command":"stop"}
65
+ - "task" (optional): natural language instruction to spawn a background task
65
66
 
66
67
  When to use "record":
67
68
  - START when user begins a meeting, call, lecture, YouTube video, or important audio content
@@ -69,7 +70,24 @@ When to use "record":
69
70
  - Provide descriptive labels like "Team standup", "Client call", "YouTube: [video title from OCR]"
70
71
  - For YouTube/video content: extract video title from screen OCR for the label
71
72
 
72
- Do NOT set a "task" field — background tasks are spawned by user commands only.
73
+ When to use "task":
74
+ - User explicitly asks for research, lookup, or action
75
+ - Something needs external search or processing that isn't a real-time response
76
+ - Example: "Search for React 19 migration guide", "Find docs for this API"
77
+
78
+ When to spawn "task" for video content:
79
+ - If user watches a YouTube video for 2+ minutes AND no task has been spawned for this video yet, spawn: "Summarize YouTube video: [title or URL from OCR]"
80
+ - ONLY spawn ONCE per video - do not repeat spawn for the same video in subsequent ticks
81
+ - Extract video title or URL from screen OCR to include in the task
82
+
83
+ When to spawn "task" for coding problems:
84
+ - If user is actively working on a coding problem/challenge for 1+ minutes:
85
+ - Spawn: "Solve coding problem: [problem description/title from OCR]"
86
+ - This includes LeetCode, HackerRank, interviews, coding assessments, or any visible coding challenge
87
+ - Look for problem signals: "Input:", "Output:", "Example", "Constraints:", problem titles, test cases
88
+ - Include as much context as possible from the screen OCR (problem description, examples, constraints)
89
+ - ONLY spawn ONCE per distinct problem - do not repeat for the same problem
90
+ - The spawned task should provide a complete solution with code and explanation
73
91
 
74
92
  Audio sources: [\ud83d\udd0a]=system/speaker audio, [\ud83c\udf99]=microphone (user's voice).
75
93
  Treat [\ud83c\udf99] as direct user speech. Treat [\ud83d\udd0a] as external audio.
@@ -324,6 +342,7 @@ async function callModel(
324
342
  try {
325
343
  const jsonStr = raw.replace(/^```\w*\s*\n?/, "").replace(/\n?\s*```\s*$/, "").trim();
326
344
  const parsed = JSON.parse(jsonStr);
345
+ const apiCost = typeof data.usage?.cost === "number" ? data.usage.cost : undefined;
327
346
  return {
328
347
  hud: parsed.hud || "\u2014",
329
348
  digest: parsed.digest || "\u2014",
@@ -334,10 +353,12 @@ async function callModel(
334
353
  tokensOut: data.usage?.completion_tokens || 0,
335
354
  model,
336
355
  parsedOk: true,
356
+ cost: apiCost,
337
357
  };
338
358
  } catch {
339
359
  // Second chance: extract embedded JSON object
340
360
  const match = raw.match(/\{[\s\S]*\}/);
361
+ const apiCost = typeof data.usage?.cost === "number" ? data.usage.cost : undefined;
341
362
  if (match) {
342
363
  try {
343
364
  const parsed = JSON.parse(match[0]);
@@ -352,6 +373,7 @@ async function callModel(
352
373
  tokensOut: data.usage?.completion_tokens || 0,
353
374
  model,
354
375
  parsedOk: true,
376
+ cost: apiCost,
355
377
  };
356
378
  }
357
379
  } catch { /* fall through */ }
@@ -367,6 +389,7 @@ async function callModel(
367
389
  tokensOut: data.usage?.completion_tokens || 0,
368
390
  model,
369
391
  parsedOk: false,
392
+ cost: apiCost,
370
393
  };
371
394
  }
372
395
  } finally {
@@ -4,6 +4,7 @@ import type { FeedBuffer } from "../buffers/feed-buffer.js";
4
4
  import type { SenseBuffer } from "../buffers/sense-buffer.js";
5
5
  import type { AgentConfig, AgentEntry, ContextWindow, EscalationMode, ContextRichness, RecorderStatus, SenseEvent, FeedbackRecord } from "../types.js";
6
6
  import type { Profiler } from "../profiler.js";
7
+ import type { CostTracker } from "../cost/tracker.js";
7
8
  import { buildContextWindow, RICHNESS_PRESETS } from "./context-window.js";
8
9
  import { analyzeContext } from "./analyzer.js";
9
10
  import { writeSituationMd } from "./situation-writer.js";
@@ -40,6 +41,8 @@ export interface AgentLoopDeps {
40
41
  getKnowledgeDocPath?: () => string | null;
41
42
  /** Optional: feedback store for startup recap context. */
42
43
  feedbackStore?: { queryRecent(n: number): FeedbackRecord[] };
44
+ /** Optional: cost tracker for LLM cost accumulation. */
45
+ costTracker?: CostTracker;
43
46
  }
44
47
 
45
48
  export interface TraceContext {
@@ -317,6 +320,17 @@ export class AgentLoop extends EventEmitter {
317
320
  this.deps.profiler?.gauge("agent.parseSuccesses", this.stats.parseSuccesses);
318
321
  this.deps.profiler?.gauge("agent.parseFailures", this.stats.parseFailures);
319
322
 
323
+ if (typeof result.cost === "number" && result.cost > 0) {
324
+ this.deps.costTracker?.record({
325
+ source: "analyzer",
326
+ model: usedModel,
327
+ cost: result.cost,
328
+ tokensIn,
329
+ tokensOut,
330
+ ts: Date.now(),
331
+ });
332
+ }
333
+
320
334
  // Build entry
321
335
  const entry: AgentEntry = {
322
336
  ...result,
@@ -375,12 +389,13 @@ export class AgentLoop extends EventEmitter {
375
389
 
376
390
  // Finish trace
377
391
  const costPerToken = { in: 0.075 / 1_000_000, out: 0.3 / 1_000_000 };
392
+ const estimatedCost = tokensIn * costPerToken.in + tokensOut * costPerToken.out;
378
393
  traceCtx?.finish({
379
394
  totalLatencyMs: Date.now() - entry.ts + latencyMs,
380
395
  llmLatencyMs: latencyMs,
381
396
  llmInputTokens: tokensIn,
382
397
  llmOutputTokens: tokensOut,
383
- llmCost: tokensIn * costPerToken.in + tokensOut * costPerToken.out,
398
+ llmCost: result.cost ?? estimatedCost,
384
399
  escalated: false, // Updated by escalator
385
400
  escalationScore: 0,
386
401
  contextScreenEvents: contextWindow.screenCount,
@@ -477,6 +492,16 @@ export class AgentLoop extends EventEmitter {
477
492
  };
478
493
 
479
494
  const result = await analyzeContext(recapWindow, this.deps.agentConfig, null);
495
+ if (typeof result.cost === "number" && result.cost > 0) {
496
+ this.deps.costTracker?.record({
497
+ source: "analyzer",
498
+ model: result.model,
499
+ cost: result.cost,
500
+ tokensIn: result.tokensIn,
501
+ tokensOut: result.tokensOut,
502
+ ts: Date.now(),
503
+ });
504
+ }
480
505
  if (result?.hud && result.hud !== "—" && result.hud !== "Idle") {
481
506
  this.deps.onHudUpdate(result.hud);
482
507
  log(TAG, `recap tick (${Date.now() - startTs}ms, ${result.tokensIn}in+${result.tokensOut}out tok) hud="${result.hud}"`);
@@ -1,6 +1,7 @@
1
1
  import { EventEmitter } from "node:events";
2
2
  import type { TranscriptionConfig, AudioChunk, TranscriptResult } from "../types.js";
3
3
  import type { Profiler } from "../profiler.js";
4
+ import type { CostTracker } from "../cost/tracker.js";
4
5
  import { LocalTranscriptionBackend } from "./transcription-local.js";
5
6
  import { log, warn, error, debug } from "../log.js";
6
7
 
@@ -41,7 +42,10 @@ export class TranscriptionService extends EventEmitter {
41
42
  private dropCount: number = 0;
42
43
  private totalCalls: number = 0;
43
44
 
45
+ private costTracker: CostTracker | null = null;
46
+
44
47
  setProfiler(p: Profiler): void { this.profiler = p; }
48
+ setCostTracker(ct: CostTracker): void { this.costTracker = ct; }
45
49
 
46
50
  constructor(config: TranscriptionConfig) {
47
51
  super();
@@ -219,7 +223,7 @@ export class TranscriptionService extends EventEmitter {
219
223
 
220
224
  const data = await response.json() as {
221
225
  choices?: Array<{ message?: { content?: string } }>;
222
- usage?: { prompt_tokens?: number; completion_tokens?: number };
226
+ usage?: { prompt_tokens?: number; completion_tokens?: number; cost?: number };
223
227
  };
224
228
 
225
229
  const text = data.choices?.[0]?.message?.content?.trim();
@@ -231,6 +235,21 @@ export class TranscriptionService extends EventEmitter {
231
235
  this.profiler?.timerRecord("transcription.call", elapsed);
232
236
  this.totalAudioDurationMs += chunk.durationMs;
233
237
 
238
+ // Track tokens and cost before any early returns — the API call is already billed
239
+ if (data.usage) {
240
+ this.totalTokensConsumed += (data.usage.prompt_tokens || 0) + (data.usage.completion_tokens || 0);
241
+ }
242
+ if (typeof data.usage?.cost === "number" && data.usage.cost > 0) {
243
+ this.costTracker?.record({
244
+ source: "transcription",
245
+ model: this.config.geminiModel,
246
+ cost: data.usage.cost,
247
+ tokensIn: data.usage?.prompt_tokens || 0,
248
+ tokensOut: data.usage?.completion_tokens || 0,
249
+ ts: Date.now(),
250
+ });
251
+ }
252
+
234
253
  if (!text) {
235
254
  warn(TAG, `OpenRouter returned empty transcript (${elapsed}ms)`);
236
255
  return;
@@ -248,10 +267,6 @@ export class TranscriptionService extends EventEmitter {
248
267
 
249
268
  log(TAG, `transcript (${elapsed}ms): "${text.slice(0, 100)}${text.length > 100 ? "..." : ""}"`);
250
269
 
251
- if (data.usage) {
252
- this.totalTokensConsumed += (data.usage.prompt_tokens || 0) + (data.usage.completion_tokens || 0);
253
- }
254
-
255
270
  const result: TranscriptResult = {
256
271
  text,
257
272
  source: "openrouter",
@@ -11,10 +11,10 @@ const __dirname = dirname(fileURLToPath(import.meta.url));
11
11
  export let loadedEnvPath: string | undefined;
12
12
 
13
13
  function loadDotEnv(): void {
14
- // Try project root .env first, then sinain-core/.env fallback
14
+ // Try sinain-core/.env first, then project root .env
15
15
  const candidates = [
16
- resolve(__dirname, "..", "..", ".env"),
17
16
  resolve(__dirname, "..", ".env"),
17
+ resolve(__dirname, "..", "..", ".env"),
18
18
  ];
19
19
  for (const envPath of candidates) {
20
20
  if (!existsSync(envPath)) continue;
@@ -252,6 +252,7 @@ export function loadConfig(): CoreConfig {
252
252
  situationMdPath,
253
253
  traceEnabled: boolEnv("TRACE_ENABLED", true),
254
254
  traceDir: resolvePath(env("TRACE_DIR", resolve(sinainDataDir(), "traces"))),
255
+ costDisplayEnabled: boolEnv("COST_DISPLAY_ENABLED", false),
255
256
  learningConfig,
256
257
  traitConfig,
257
258
  privacyConfig,
@@ -0,0 +1,64 @@
1
+ import type { CostEntry, CostSnapshot } from "../types.js";
2
+ import { log } from "../log.js";
3
+
4
+ const TAG = "cost";
5
+
6
+ export class CostTracker {
7
+ private totalCost = 0;
8
+ private costBySource = new Map<string, number>();
9
+ private costByModel = new Map<string, number>();
10
+ private callCount = 0;
11
+ private startedAt = Date.now();
12
+ private timer: ReturnType<typeof setInterval> | null = null;
13
+ private onCostUpdate: (snapshot: CostSnapshot) => void;
14
+
15
+ constructor(onCostUpdate: (snapshot: CostSnapshot) => void) {
16
+ this.onCostUpdate = onCostUpdate;
17
+ }
18
+
19
+ record(entry: CostEntry): void {
20
+ if (entry.cost <= 0) return;
21
+ this.totalCost += entry.cost;
22
+ this.callCount++;
23
+ this.costBySource.set(
24
+ entry.source,
25
+ (this.costBySource.get(entry.source) || 0) + entry.cost,
26
+ );
27
+ this.costByModel.set(
28
+ entry.model,
29
+ (this.costByModel.get(entry.model) || 0) + entry.cost,
30
+ );
31
+ this.onCostUpdate(this.getSnapshot());
32
+ }
33
+
34
+ getSnapshot(): CostSnapshot {
35
+ return {
36
+ totalCost: this.totalCost,
37
+ costBySource: Object.fromEntries(this.costBySource),
38
+ costByModel: Object.fromEntries(this.costByModel),
39
+ callCount: this.callCount,
40
+ startedAt: this.startedAt,
41
+ };
42
+ }
43
+
44
+ startPeriodicLog(intervalMs: number): void {
45
+ this.timer = setInterval(() => {
46
+ if (this.callCount === 0) return;
47
+ const elapsed = ((Date.now() - this.startedAt) / 60_000).toFixed(1);
48
+ const sources = [...this.costBySource.entries()]
49
+ .map(([k, v]) => `${k}=$${v.toFixed(6)}`)
50
+ .join(" ");
51
+ const models = [...this.costByModel.entries()]
52
+ .map(([k, v]) => `${k}=$${v.toFixed(6)}`)
53
+ .join(" ");
54
+ log(TAG, `$${this.totalCost.toFixed(6)} total (${this.callCount} calls, ${elapsed} min) | ${sources} | ${models}`);
55
+ }, intervalMs);
56
+ }
57
+
58
+ stop(): void {
59
+ if (this.timer) {
60
+ clearInterval(this.timer);
61
+ this.timer = null;
62
+ }
63
+ }
64
+ }
@@ -13,14 +13,6 @@ import { isCodingContext, buildEscalationMessage, fetchKnowledgeFacts } from "./
13
13
  import { loadPendingTasks, savePendingTasks, type PendingTaskEntry } from "../util/task-store.js";
14
14
  import { log, warn, error } from "../log.js";
15
15
 
16
- /** Context passed to spawn subagents so they can act on the user's current situation. */
17
- export interface SpawnContext {
18
- currentApp?: string;
19
- digest?: string;
20
- recentAudio?: string;
21
- recentScreen?: string;
22
- }
23
-
24
16
  export interface HttpPendingEscalation {
25
17
  id: string;
26
18
  message: string;
@@ -473,7 +465,7 @@ ${recentLines.join("\n")}`;
473
465
  * Creates a unique child session key and sends the task directly to the gateway
474
466
  * agent RPC — bypassing the main session to avoid dedup/NO_REPLY issues.
475
467
  */
476
- async dispatchSpawnTask(task: string, label?: string, context?: SpawnContext): Promise<void> {
468
+ async dispatchSpawnTask(task: string, label?: string): Promise<void> {
477
469
  // Prevent sibling spawn RPCs from piling up (independent from escalation queue)
478
470
  if (this.spawnInFlight) {
479
471
  log(TAG, `spawn-task skipped — spawn RPC already in-flight`);
@@ -493,12 +485,9 @@ ${recentLines.join("\n")}`;
493
485
  this.lastSpawnFingerprint = fingerprint;
494
486
  this.lastSpawnTs = now;
495
487
 
496
- // Truncate label to gateway's 64-char limit
497
- const safeLabel = label?.slice(0, 64);
498
-
499
488
  const taskId = `spawn-${Date.now()}`;
500
489
  const startedAt = Date.now();
501
- const labelStr = safeLabel ? ` (label: "${safeLabel}")` : "";
490
+ const labelStr = label ? ` (label: "${label}")` : "";
502
491
  const idemKey = `spawn-task-${Date.now()}`;
503
492
 
504
493
  // Generate a unique child session key — bypasses the main agent entirely
@@ -509,11 +498,11 @@ ${recentLines.join("\n")}`;
509
498
  log(TAG, `dispatching spawn-task${labelStr} → child=${childSessionKey}: "${task.slice(0, 80)}..."`);
510
499
 
511
500
  // ★ Broadcast "spawned" BEFORE the RPC — TSK tab shows ··· immediately
512
- this.broadcastTaskEvent(taskId, "spawned", safeLabel, startedAt);
501
+ this.broadcastTaskEvent(taskId, "spawned", label, startedAt);
513
502
 
514
503
  if (!this.wsClient.isConnected) {
515
504
  // No OpenClaw gateway — queue for bare agent HTTP polling
516
- this.spawnHttpPending = { id: taskId, task, label: safeLabel || "background-task", ts: startedAt };
505
+ this.spawnHttpPending = { id: taskId, task, label: label || "background-task", ts: startedAt };
517
506
  const preview = task.length > 60 ? task.slice(0, 60) + "…" : task;
518
507
  this.deps.feedBuffer.push(`🔧 Task queued for agent: ${preview}`, "normal", "system", "stream");
519
508
  this.deps.wsHandler.broadcast(`🔧 Task queued for agent: ${preview}`, "normal");
@@ -521,10 +510,6 @@ ${recentLines.join("\n")}`;
521
510
  return;
522
511
  }
523
512
 
524
- // Dynamic timeout: scale with task length (long transcripts need more time)
525
- // Base 30s + 1s per 200 chars, min 45s, max 180s
526
- const timeoutMs = Math.min(180_000, Math.max(45_000, Math.ceil(task.length / 200) * 1000 + 30_000));
527
-
528
513
  // ★ Set spawnInFlight BEFORE first await — cleared in finally regardless of outcome.
529
514
  // Dedicated lane flag: never touches the escalation queue so regular escalations
530
515
  // continue unblocked while this spawn RPC is pending.
@@ -535,11 +520,11 @@ ${recentLines.join("\n")}`;
535
520
  message: task,
536
521
  sessionKey: childSessionKey,
537
522
  lane: "subagent",
538
- extraSystemPrompt: this.buildChildSystemPrompt(context),
523
+ extraSystemPrompt: this.buildChildSystemPrompt(task, label),
539
524
  deliver: false,
540
525
  idempotencyKey: idemKey,
541
- label: safeLabel || undefined,
542
- }, timeoutMs, { expectFinal: true });
526
+ label: label || undefined,
527
+ }, 45_000, { expectFinal: true });
543
528
 
544
529
  log(TAG, `spawn-task RPC response: ${JSON.stringify(result).slice(0, 500)}`);
545
530
  this.stats.totalSpawnResponses++;
@@ -551,15 +536,15 @@ ${recentLines.join("\n")}`;
551
536
  if (Array.isArray(payloads) && payloads.length > 0) {
552
537
  const output = payloads.map((pl: any) => pl.text || "").join("\n").trim();
553
538
  if (output) {
554
- this.pushResponse(`${safeLabel || "Background task"}:\n${output}`);
555
- this.broadcastTaskEvent(taskId, "completed", safeLabel, startedAt, output);
539
+ this.pushResponse(`${label || "Background task"}:\n${output}`);
540
+ this.broadcastTaskEvent(taskId, "completed", label, startedAt, output);
556
541
  } else {
557
542
  log(TAG, `spawn-task: ${payloads.length} payloads but empty text, trying chat.history`);
558
543
  const historyText = await this.fetchChildResult(childSessionKey);
559
- this.broadcastTaskEvent(taskId, "completed", safeLabel, startedAt,
544
+ this.broadcastTaskEvent(taskId, "completed", label, startedAt,
560
545
  historyText || "task completed (no output)");
561
546
  if (historyText) {
562
- this.pushResponse(`${safeLabel || "Background task"}:\n${historyText}`);
547
+ this.pushResponse(`${label || "Background task"}:\n${historyText}`);
563
548
  }
564
549
  }
565
550
  } else {
@@ -567,10 +552,10 @@ ${recentLines.join("\n")}`;
567
552
  log(TAG, `spawn-task: no payloads, fetching chat.history for child=${childSessionKey}`);
568
553
  const historyText = await this.fetchChildResult(childSessionKey);
569
554
  if (historyText) {
570
- this.pushResponse(`${safeLabel || "Background task"}:\n${historyText}`);
571
- this.broadcastTaskEvent(taskId, "completed", safeLabel, startedAt, historyText);
555
+ this.pushResponse(`${label || "Background task"}:\n${historyText}`);
556
+ this.broadcastTaskEvent(taskId, "completed", label, startedAt, historyText);
572
557
  } else {
573
- this.broadcastTaskEvent(taskId, "completed", safeLabel, startedAt,
558
+ this.broadcastTaskEvent(taskId, "completed", label, startedAt,
574
559
  "task completed (no output captured)");
575
560
  }
576
561
  }
@@ -579,7 +564,7 @@ ${recentLines.join("\n")}`;
579
564
  this.pendingSpawnTasks.set(taskId, {
580
565
  runId,
581
566
  childSessionKey,
582
- label: safeLabel,
567
+ label,
583
568
  startedAt,
584
569
  pollingEmitted: false,
585
570
  });
@@ -590,43 +575,30 @@ ${recentLines.join("\n")}`;
590
575
  savePendingTasks(this.pendingSpawnTasks);
591
576
  } catch (err: any) {
592
577
  error(TAG, `spawn-task failed: ${err.message}`);
593
- this.broadcastTaskEvent(taskId, "failed", safeLabel, startedAt);
578
+ this.broadcastTaskEvent(taskId, "failed", label, startedAt);
594
579
  } finally {
595
580
  this.spawnInFlight = false;
596
581
  }
597
582
  }
598
583
 
599
- /** Build a context-rich system prompt for the child subagent. */
600
- private buildChildSystemPrompt(context?: SpawnContext): string {
601
- const parts = [
602
- "# Background Agent",
584
+ /** Build a focused system prompt for the child subagent. */
585
+ private buildChildSystemPrompt(task: string, label?: string): string {
586
+ return [
587
+ "# Subagent Context",
588
+ "",
589
+ "You are a **subagent** spawned for a specific task.",
603
590
  "",
604
- "You are a background agent spawned by the user to complete a specific task.",
605
- "You have full tool access: file operations, web search, code execution.",
606
- "Create end-to-end valuable artifacts summaries, code files, emails, analysis docs.",
591
+ "## Your Role",
592
+ `- Task: ${task.replace(/\s+/g, " ").trim().slice(0, 500)}`,
593
+ "- Complete this task. That's your entire purpose.",
607
594
  "",
608
595
  "## Rules",
609
- "1. Complete the task fully actually do it, don't just describe what you'd do",
610
- "2. Use your tools: search the web, write files, run code as needed",
611
- "3. Your final message is shown in a small overlay — keep it concise (1-3 sentences + key links/paths)",
612
- "4. For substantial output, write to a file and report the path",
613
- ];
614
-
615
- if (context?.currentApp || context?.digest) {
616
- parts.push("", "## User Context");
617
- if (context.currentApp) parts.push(`- Current app: ${context.currentApp}`);
618
- if (context.digest) parts.push(`- Situation: ${context.digest.slice(0, 500)}`);
619
- }
620
-
621
- if (context?.recentScreen) {
622
- parts.push("", "## Recent Screen (OCR, last ~60s)", context.recentScreen);
623
- }
624
-
625
- if (context?.recentAudio) {
626
- parts.push("", "## Recent Audio (last ~60s)", context.recentAudio);
627
- }
628
-
629
- return parts.join("\n");
596
+ "1. Stay focused — do your assigned task, nothing else",
597
+ "2. Your final message will be reported to the requester",
598
+ "3. Be concise but informative",
599
+ "",
600
+ label ? `Label: ${label}` : "",
601
+ ].filter(Boolean).join("\n");
630
602
  }
631
603
 
632
604
  /** Fetch the latest assistant reply from a child session's chat history. */