@geravant/sinain 1.8.0 → 1.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +4 -6
- package/cli.js +16 -2
- package/config-shared.js +469 -0
- package/config.js +152 -0
- package/launcher.js +7 -1
- package/onboard.js +345 -0
- package/package.json +8 -2
- package/sense_client/__main__.py +8 -4
- package/sense_client/gate.py +1 -0
- package/sense_client/ocr.py +52 -22
- package/sense_client/sender.py +2 -0
- package/sense_client/vision.py +31 -11
- package/sinain-agent/.env.example +23 -0
- package/sinain-agent/run.sh +7 -12
- package/sinain-core/src/agent/analyzer.ts +25 -2
- package/sinain-core/src/agent/loop.ts +26 -1
- package/sinain-core/src/audio/transcription.ts +20 -5
- package/sinain-core/src/config.ts +3 -2
- package/sinain-core/src/cost/tracker.ts +64 -0
- package/sinain-core/src/escalation/escalator.ts +31 -59
- package/sinain-core/src/index.ts +41 -45
- package/sinain-core/src/overlay/commands.ts +12 -9
- package/sinain-core/src/overlay/ws-handler.ts +27 -3
- package/sinain-core/src/server.ts +41 -0
- package/sinain-core/src/types.ts +33 -1
package/sense_client/vision.py
CHANGED
|
@@ -18,6 +18,7 @@ import json
|
|
|
18
18
|
import logging
|
|
19
19
|
import os
|
|
20
20
|
import time
|
|
21
|
+
import uuid
|
|
21
22
|
from abc import ABC, abstractmethod
|
|
22
23
|
from typing import TYPE_CHECKING, Optional
|
|
23
24
|
|
|
@@ -27,14 +28,23 @@ if TYPE_CHECKING:
|
|
|
27
28
|
logger = logging.getLogger("sinain.vision")
|
|
28
29
|
|
|
29
30
|
|
|
31
|
+
class VisionResult:
|
|
32
|
+
"""Result of a vision call: text + optional cost info."""
|
|
33
|
+
__slots__ = ("text", "cost")
|
|
34
|
+
|
|
35
|
+
def __init__(self, text: Optional[str], cost: Optional[dict] = None):
|
|
36
|
+
self.text = text
|
|
37
|
+
self.cost = cost # {cost, tokens_in, tokens_out, model, cost_id}
|
|
38
|
+
|
|
39
|
+
|
|
30
40
|
class VisionProvider(ABC):
|
|
31
41
|
"""Abstract base for vision inference backends."""
|
|
32
42
|
|
|
33
43
|
name: str = "unknown"
|
|
34
44
|
|
|
35
45
|
@abstractmethod
|
|
36
|
-
def describe(self, image: "Image.Image", prompt: Optional[str] = None) ->
|
|
37
|
-
"""Describe image content. Returns None on failure."""
|
|
46
|
+
def describe(self, image: "Image.Image", prompt: Optional[str] = None) -> VisionResult:
|
|
47
|
+
"""Describe image content. Returns VisionResult (text may be None on failure)."""
|
|
38
48
|
...
|
|
39
49
|
|
|
40
50
|
@abstractmethod
|
|
@@ -53,8 +63,8 @@ class OllamaVisionProvider(VisionProvider):
|
|
|
53
63
|
timeout=timeout, max_tokens=max_tokens)
|
|
54
64
|
self.name = f"ollama ({model})"
|
|
55
65
|
|
|
56
|
-
def describe(self, image: "Image.Image", prompt: Optional[str] = None) ->
|
|
57
|
-
return self._client.describe(image, prompt)
|
|
66
|
+
def describe(self, image: "Image.Image", prompt: Optional[str] = None) -> VisionResult:
|
|
67
|
+
return VisionResult(self._client.describe(image, prompt))
|
|
58
68
|
|
|
59
69
|
def is_available(self) -> bool:
|
|
60
70
|
return self._client.is_available()
|
|
@@ -73,9 +83,9 @@ class OpenRouterVisionProvider(VisionProvider):
|
|
|
73
83
|
self._max_tokens = max_tokens
|
|
74
84
|
self.name = f"openrouter ({model})"
|
|
75
85
|
|
|
76
|
-
def describe(self, image: "Image.Image", prompt: Optional[str] = None) ->
|
|
86
|
+
def describe(self, image: "Image.Image", prompt: Optional[str] = None) -> VisionResult:
|
|
77
87
|
if not self._api_key:
|
|
78
|
-
return None
|
|
88
|
+
return VisionResult(None)
|
|
79
89
|
|
|
80
90
|
try:
|
|
81
91
|
import requests
|
|
@@ -83,7 +93,7 @@ class OpenRouterVisionProvider(VisionProvider):
|
|
|
83
93
|
# Encode image
|
|
84
94
|
img_b64 = self._encode(image)
|
|
85
95
|
if not img_b64:
|
|
86
|
-
return None
|
|
96
|
+
return VisionResult(None)
|
|
87
97
|
|
|
88
98
|
prompt_text = prompt or "Describe what's on this screen concisely (2-3 sentences)."
|
|
89
99
|
|
|
@@ -112,13 +122,23 @@ class OpenRouterVisionProvider(VisionProvider):
|
|
|
112
122
|
resp.raise_for_status()
|
|
113
123
|
data = resp.json()
|
|
114
124
|
content = data["choices"][0]["message"]["content"].strip()
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
125
|
+
usage = data.get("usage", {})
|
|
126
|
+
logger.debug("openrouter vision: model=%s tokens=%s cost=%s",
|
|
127
|
+
self._model, usage.get("total_tokens", "?"), usage.get("cost", "?"))
|
|
128
|
+
cost_info = None
|
|
129
|
+
if usage.get("cost") is not None:
|
|
130
|
+
cost_info = {
|
|
131
|
+
"cost": usage["cost"],
|
|
132
|
+
"tokens_in": usage.get("prompt_tokens", 0),
|
|
133
|
+
"tokens_out": usage.get("completion_tokens", 0),
|
|
134
|
+
"model": self._model,
|
|
135
|
+
"cost_id": uuid.uuid4().hex[:16],
|
|
136
|
+
}
|
|
137
|
+
return VisionResult(content if content else None, cost_info)
|
|
118
138
|
|
|
119
139
|
except Exception as e:
|
|
120
140
|
logger.debug("openrouter vision failed: %s", e)
|
|
121
|
-
return None
|
|
141
|
+
return VisionResult(None)
|
|
122
142
|
|
|
123
143
|
def is_available(self) -> bool:
|
|
124
144
|
return bool(self._api_key)
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
# sinain-agent configuration
|
|
2
|
+
# Copy to .env and customize: cp .env.example .env
|
|
3
|
+
|
|
4
|
+
# ── Agent ──
|
|
5
|
+
SINAIN_AGENT=claude # claude | codex | junie | goose | aider | <custom command>
|
|
6
|
+
# MCP agents (claude, codex, junie, goose) call sinain tools directly
|
|
7
|
+
# Pipe agents (aider, custom) receive escalation text on stdin
|
|
8
|
+
|
|
9
|
+
# ── Core connection ──
|
|
10
|
+
SINAIN_CORE_URL=http://localhost:9500
|
|
11
|
+
|
|
12
|
+
# ── Timing ──
|
|
13
|
+
SINAIN_POLL_INTERVAL=5 # seconds between escalation polls
|
|
14
|
+
SINAIN_HEARTBEAT_INTERVAL=900 # seconds between heartbeat ticks (15 min)
|
|
15
|
+
|
|
16
|
+
# ── Workspace ──
|
|
17
|
+
SINAIN_WORKSPACE=~/.openclaw/workspace # knowledge files, curation scripts, playbook
|
|
18
|
+
|
|
19
|
+
# ── Tool permissions (Claude only) ──
|
|
20
|
+
# Tools auto-approved without prompting (space-separated).
|
|
21
|
+
# Default: auto-derived from MCP config server names (e.g. mcp__sinain).
|
|
22
|
+
# Format: mcp__<server> (all tools) | mcp__<server>__<tool> (specific) | Bash(pattern)
|
|
23
|
+
# SINAIN_ALLOWED_TOOLS=mcp__sinain mcp__github Bash(git *)
|
package/sinain-agent/run.sh
CHANGED
|
@@ -5,9 +5,7 @@ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
|
5
5
|
|
|
6
6
|
# Load .env as fallback — does NOT override vars already in the environment
|
|
7
7
|
# (e.g. vars set by the launcher from ~/.sinain/.env)
|
|
8
|
-
|
|
9
|
-
ENV_FILE="$SCRIPT_DIR/../.env"
|
|
10
|
-
if [ -f "$ENV_FILE" ]; then
|
|
8
|
+
if [ -f "$SCRIPT_DIR/.env" ]; then
|
|
11
9
|
while IFS='=' read -r key val; do
|
|
12
10
|
# Skip comments and blank lines
|
|
13
11
|
[[ -z "$key" || "$key" =~ ^[[:space:]]*# ]] && continue
|
|
@@ -21,7 +19,7 @@ if [ -f "$ENV_FILE" ]; then
|
|
|
21
19
|
if [ -z "${!key+x}" ]; then
|
|
22
20
|
export "$key=$val"
|
|
23
21
|
fi
|
|
24
|
-
done < "$
|
|
22
|
+
done < "$SCRIPT_DIR/.env"
|
|
25
23
|
fi
|
|
26
24
|
|
|
27
25
|
MCP_CONFIG="${MCP_CONFIG:-$SCRIPT_DIR/mcp-config.json}"
|
|
@@ -30,8 +28,6 @@ POLL_INTERVAL="${SINAIN_POLL_INTERVAL:-2}"
|
|
|
30
28
|
HEARTBEAT_INTERVAL="${SINAIN_HEARTBEAT_INTERVAL:-900}" # 15 minutes
|
|
31
29
|
AGENT="${SINAIN_AGENT:-claude}"
|
|
32
30
|
WORKSPACE="${SINAIN_WORKSPACE:-$HOME/.openclaw/workspace}"
|
|
33
|
-
AGENT_MAX_TURNS="${SINAIN_AGENT_MAX_TURNS:-5}"
|
|
34
|
-
SPAWN_MAX_TURNS="${SINAIN_SPAWN_MAX_TURNS:-25}"
|
|
35
31
|
|
|
36
32
|
# Build allowed tools list for Claude's --allowedTools flag.
|
|
37
33
|
# SINAIN_ALLOWED_TOOLS in .env overrides; otherwise auto-derive from MCP config.
|
|
@@ -68,16 +64,16 @@ invoke_agent() {
|
|
|
68
64
|
local prompt="$1"
|
|
69
65
|
case "$AGENT" in
|
|
70
66
|
claude)
|
|
71
|
-
local turns="${2:-$AGENT_MAX_TURNS}"
|
|
72
67
|
claude --enable-auto-mode \
|
|
73
68
|
--mcp-config "$MCP_CONFIG" \
|
|
74
69
|
${ALLOWED_TOOLS:+--allowedTools $ALLOWED_TOOLS} \
|
|
75
|
-
--max-turns
|
|
70
|
+
--max-turns 5 --output-format text \
|
|
76
71
|
-p "$prompt"
|
|
77
72
|
;;
|
|
78
73
|
codex)
|
|
79
74
|
codex exec -s danger-full-access \
|
|
80
75
|
--dangerously-bypass-approvals-and-sandbox \
|
|
76
|
+
--skip-git-repo-check \
|
|
81
77
|
"$prompt"
|
|
82
78
|
;;
|
|
83
79
|
junie)
|
|
@@ -93,10 +89,9 @@ invoke_agent() {
|
|
|
93
89
|
fi
|
|
94
90
|
;;
|
|
95
91
|
goose)
|
|
96
|
-
local turns="${2:-$AGENT_MAX_TURNS}"
|
|
97
92
|
GOOSE_MODE=auto goose run --text "$prompt" \
|
|
98
93
|
--output-format text \
|
|
99
|
-
--max-turns
|
|
94
|
+
--max-turns 10
|
|
100
95
|
;;
|
|
101
96
|
aider)
|
|
102
97
|
# No MCP support — signal pipe mode
|
|
@@ -271,8 +266,8 @@ while true; do
|
|
|
271
266
|
# MCP path: agent runs task with sinain tools available
|
|
272
267
|
SPAWN_PROMPT="You have a background task to complete. Task: $SPAWN_TASK
|
|
273
268
|
|
|
274
|
-
Complete this task thoroughly. Use sinain_get_knowledge and sinain_knowledge_query if you need context from past sessions.
|
|
275
|
-
SPAWN_RESULT=$(invoke_agent "$SPAWN_PROMPT"
|
|
269
|
+
Complete this task thoroughly. Use sinain_get_knowledge and sinain_knowledge_query if you need context from past sessions. Summarize your findings concisely."
|
|
270
|
+
SPAWN_RESULT=$(invoke_agent "$SPAWN_PROMPT" || echo "ERROR: agent invocation failed")
|
|
276
271
|
else
|
|
277
272
|
# Pipe path: agent gets task text directly
|
|
278
273
|
SPAWN_RESULT=$(invoke_pipe "Background task: $SPAWN_TASK" || echo "No output")
|
|
@@ -56,12 +56,13 @@ You produce outputs as JSON.
|
|
|
56
56
|
Respond ONLY with valid JSON. No markdown, no code fences, no explanation.
|
|
57
57
|
Your entire response must be parseable by JSON.parse().
|
|
58
58
|
|
|
59
|
-
{"hud":"...","digest":"...","record":{"command":"start"|"stop","label":"..."}}
|
|
59
|
+
{"hud":"...","digest":"...","record":{"command":"start"|"stop","label":"..."},"task":"..."}
|
|
60
60
|
|
|
61
61
|
Output fields:
|
|
62
62
|
- "hud" (required): max 60 words describing what user is doing NOW
|
|
63
63
|
- "digest" (required): 5-8 sentences with detailed activity description
|
|
64
64
|
- "record" (optional): control recording — {"command":"start","label":"Meeting name"} or {"command":"stop"}
|
|
65
|
+
- "task" (optional): natural language instruction to spawn a background task
|
|
65
66
|
|
|
66
67
|
When to use "record":
|
|
67
68
|
- START when user begins a meeting, call, lecture, YouTube video, or important audio content
|
|
@@ -69,7 +70,24 @@ When to use "record":
|
|
|
69
70
|
- Provide descriptive labels like "Team standup", "Client call", "YouTube: [video title from OCR]"
|
|
70
71
|
- For YouTube/video content: extract video title from screen OCR for the label
|
|
71
72
|
|
|
72
|
-
|
|
73
|
+
When to use "task":
|
|
74
|
+
- User explicitly asks for research, lookup, or action
|
|
75
|
+
- Something needs external search or processing that isn't a real-time response
|
|
76
|
+
- Example: "Search for React 19 migration guide", "Find docs for this API"
|
|
77
|
+
|
|
78
|
+
When to spawn "task" for video content:
|
|
79
|
+
- If user watches a YouTube video for 2+ minutes AND no task has been spawned for this video yet, spawn: "Summarize YouTube video: [title or URL from OCR]"
|
|
80
|
+
- ONLY spawn ONCE per video - do not repeat spawn for the same video in subsequent ticks
|
|
81
|
+
- Extract video title or URL from screen OCR to include in the task
|
|
82
|
+
|
|
83
|
+
When to spawn "task" for coding problems:
|
|
84
|
+
- If user is actively working on a coding problem/challenge for 1+ minutes:
|
|
85
|
+
- Spawn: "Solve coding problem: [problem description/title from OCR]"
|
|
86
|
+
- This includes LeetCode, HackerRank, interviews, coding assessments, or any visible coding challenge
|
|
87
|
+
- Look for problem signals: "Input:", "Output:", "Example", "Constraints:", problem titles, test cases
|
|
88
|
+
- Include as much context as possible from the screen OCR (problem description, examples, constraints)
|
|
89
|
+
- ONLY spawn ONCE per distinct problem - do not repeat for the same problem
|
|
90
|
+
- The spawned task should provide a complete solution with code and explanation
|
|
73
91
|
|
|
74
92
|
Audio sources: [\ud83d\udd0a]=system/speaker audio, [\ud83c\udf99]=microphone (user's voice).
|
|
75
93
|
Treat [\ud83c\udf99] as direct user speech. Treat [\ud83d\udd0a] as external audio.
|
|
@@ -324,6 +342,7 @@ async function callModel(
|
|
|
324
342
|
try {
|
|
325
343
|
const jsonStr = raw.replace(/^```\w*\s*\n?/, "").replace(/\n?\s*```\s*$/, "").trim();
|
|
326
344
|
const parsed = JSON.parse(jsonStr);
|
|
345
|
+
const apiCost = typeof data.usage?.cost === "number" ? data.usage.cost : undefined;
|
|
327
346
|
return {
|
|
328
347
|
hud: parsed.hud || "\u2014",
|
|
329
348
|
digest: parsed.digest || "\u2014",
|
|
@@ -334,10 +353,12 @@ async function callModel(
|
|
|
334
353
|
tokensOut: data.usage?.completion_tokens || 0,
|
|
335
354
|
model,
|
|
336
355
|
parsedOk: true,
|
|
356
|
+
cost: apiCost,
|
|
337
357
|
};
|
|
338
358
|
} catch {
|
|
339
359
|
// Second chance: extract embedded JSON object
|
|
340
360
|
const match = raw.match(/\{[\s\S]*\}/);
|
|
361
|
+
const apiCost = typeof data.usage?.cost === "number" ? data.usage.cost : undefined;
|
|
341
362
|
if (match) {
|
|
342
363
|
try {
|
|
343
364
|
const parsed = JSON.parse(match[0]);
|
|
@@ -352,6 +373,7 @@ async function callModel(
|
|
|
352
373
|
tokensOut: data.usage?.completion_tokens || 0,
|
|
353
374
|
model,
|
|
354
375
|
parsedOk: true,
|
|
376
|
+
cost: apiCost,
|
|
355
377
|
};
|
|
356
378
|
}
|
|
357
379
|
} catch { /* fall through */ }
|
|
@@ -367,6 +389,7 @@ async function callModel(
|
|
|
367
389
|
tokensOut: data.usage?.completion_tokens || 0,
|
|
368
390
|
model,
|
|
369
391
|
parsedOk: false,
|
|
392
|
+
cost: apiCost,
|
|
370
393
|
};
|
|
371
394
|
}
|
|
372
395
|
} finally {
|
|
@@ -4,6 +4,7 @@ import type { FeedBuffer } from "../buffers/feed-buffer.js";
|
|
|
4
4
|
import type { SenseBuffer } from "../buffers/sense-buffer.js";
|
|
5
5
|
import type { AgentConfig, AgentEntry, ContextWindow, EscalationMode, ContextRichness, RecorderStatus, SenseEvent, FeedbackRecord } from "../types.js";
|
|
6
6
|
import type { Profiler } from "../profiler.js";
|
|
7
|
+
import type { CostTracker } from "../cost/tracker.js";
|
|
7
8
|
import { buildContextWindow, RICHNESS_PRESETS } from "./context-window.js";
|
|
8
9
|
import { analyzeContext } from "./analyzer.js";
|
|
9
10
|
import { writeSituationMd } from "./situation-writer.js";
|
|
@@ -40,6 +41,8 @@ export interface AgentLoopDeps {
|
|
|
40
41
|
getKnowledgeDocPath?: () => string | null;
|
|
41
42
|
/** Optional: feedback store for startup recap context. */
|
|
42
43
|
feedbackStore?: { queryRecent(n: number): FeedbackRecord[] };
|
|
44
|
+
/** Optional: cost tracker for LLM cost accumulation. */
|
|
45
|
+
costTracker?: CostTracker;
|
|
43
46
|
}
|
|
44
47
|
|
|
45
48
|
export interface TraceContext {
|
|
@@ -317,6 +320,17 @@ export class AgentLoop extends EventEmitter {
|
|
|
317
320
|
this.deps.profiler?.gauge("agent.parseSuccesses", this.stats.parseSuccesses);
|
|
318
321
|
this.deps.profiler?.gauge("agent.parseFailures", this.stats.parseFailures);
|
|
319
322
|
|
|
323
|
+
if (typeof result.cost === "number" && result.cost > 0) {
|
|
324
|
+
this.deps.costTracker?.record({
|
|
325
|
+
source: "analyzer",
|
|
326
|
+
model: usedModel,
|
|
327
|
+
cost: result.cost,
|
|
328
|
+
tokensIn,
|
|
329
|
+
tokensOut,
|
|
330
|
+
ts: Date.now(),
|
|
331
|
+
});
|
|
332
|
+
}
|
|
333
|
+
|
|
320
334
|
// Build entry
|
|
321
335
|
const entry: AgentEntry = {
|
|
322
336
|
...result,
|
|
@@ -375,12 +389,13 @@ export class AgentLoop extends EventEmitter {
|
|
|
375
389
|
|
|
376
390
|
// Finish trace
|
|
377
391
|
const costPerToken = { in: 0.075 / 1_000_000, out: 0.3 / 1_000_000 };
|
|
392
|
+
const estimatedCost = tokensIn * costPerToken.in + tokensOut * costPerToken.out;
|
|
378
393
|
traceCtx?.finish({
|
|
379
394
|
totalLatencyMs: Date.now() - entry.ts + latencyMs,
|
|
380
395
|
llmLatencyMs: latencyMs,
|
|
381
396
|
llmInputTokens: tokensIn,
|
|
382
397
|
llmOutputTokens: tokensOut,
|
|
383
|
-
llmCost:
|
|
398
|
+
llmCost: result.cost ?? estimatedCost,
|
|
384
399
|
escalated: false, // Updated by escalator
|
|
385
400
|
escalationScore: 0,
|
|
386
401
|
contextScreenEvents: contextWindow.screenCount,
|
|
@@ -477,6 +492,16 @@ export class AgentLoop extends EventEmitter {
|
|
|
477
492
|
};
|
|
478
493
|
|
|
479
494
|
const result = await analyzeContext(recapWindow, this.deps.agentConfig, null);
|
|
495
|
+
if (typeof result.cost === "number" && result.cost > 0) {
|
|
496
|
+
this.deps.costTracker?.record({
|
|
497
|
+
source: "analyzer",
|
|
498
|
+
model: result.model,
|
|
499
|
+
cost: result.cost,
|
|
500
|
+
tokensIn: result.tokensIn,
|
|
501
|
+
tokensOut: result.tokensOut,
|
|
502
|
+
ts: Date.now(),
|
|
503
|
+
});
|
|
504
|
+
}
|
|
480
505
|
if (result?.hud && result.hud !== "—" && result.hud !== "Idle") {
|
|
481
506
|
this.deps.onHudUpdate(result.hud);
|
|
482
507
|
log(TAG, `recap tick (${Date.now() - startTs}ms, ${result.tokensIn}in+${result.tokensOut}out tok) hud="${result.hud}"`);
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import { EventEmitter } from "node:events";
|
|
2
2
|
import type { TranscriptionConfig, AudioChunk, TranscriptResult } from "../types.js";
|
|
3
3
|
import type { Profiler } from "../profiler.js";
|
|
4
|
+
import type { CostTracker } from "../cost/tracker.js";
|
|
4
5
|
import { LocalTranscriptionBackend } from "./transcription-local.js";
|
|
5
6
|
import { log, warn, error, debug } from "../log.js";
|
|
6
7
|
|
|
@@ -41,7 +42,10 @@ export class TranscriptionService extends EventEmitter {
|
|
|
41
42
|
private dropCount: number = 0;
|
|
42
43
|
private totalCalls: number = 0;
|
|
43
44
|
|
|
45
|
+
private costTracker: CostTracker | null = null;
|
|
46
|
+
|
|
44
47
|
setProfiler(p: Profiler): void { this.profiler = p; }
|
|
48
|
+
setCostTracker(ct: CostTracker): void { this.costTracker = ct; }
|
|
45
49
|
|
|
46
50
|
constructor(config: TranscriptionConfig) {
|
|
47
51
|
super();
|
|
@@ -219,7 +223,7 @@ export class TranscriptionService extends EventEmitter {
|
|
|
219
223
|
|
|
220
224
|
const data = await response.json() as {
|
|
221
225
|
choices?: Array<{ message?: { content?: string } }>;
|
|
222
|
-
usage?: { prompt_tokens?: number; completion_tokens?: number };
|
|
226
|
+
usage?: { prompt_tokens?: number; completion_tokens?: number; cost?: number };
|
|
223
227
|
};
|
|
224
228
|
|
|
225
229
|
const text = data.choices?.[0]?.message?.content?.trim();
|
|
@@ -231,6 +235,21 @@ export class TranscriptionService extends EventEmitter {
|
|
|
231
235
|
this.profiler?.timerRecord("transcription.call", elapsed);
|
|
232
236
|
this.totalAudioDurationMs += chunk.durationMs;
|
|
233
237
|
|
|
238
|
+
// Track tokens and cost before any early returns — the API call is already billed
|
|
239
|
+
if (data.usage) {
|
|
240
|
+
this.totalTokensConsumed += (data.usage.prompt_tokens || 0) + (data.usage.completion_tokens || 0);
|
|
241
|
+
}
|
|
242
|
+
if (typeof data.usage?.cost === "number" && data.usage.cost > 0) {
|
|
243
|
+
this.costTracker?.record({
|
|
244
|
+
source: "transcription",
|
|
245
|
+
model: this.config.geminiModel,
|
|
246
|
+
cost: data.usage.cost,
|
|
247
|
+
tokensIn: data.usage?.prompt_tokens || 0,
|
|
248
|
+
tokensOut: data.usage?.completion_tokens || 0,
|
|
249
|
+
ts: Date.now(),
|
|
250
|
+
});
|
|
251
|
+
}
|
|
252
|
+
|
|
234
253
|
if (!text) {
|
|
235
254
|
warn(TAG, `OpenRouter returned empty transcript (${elapsed}ms)`);
|
|
236
255
|
return;
|
|
@@ -248,10 +267,6 @@ export class TranscriptionService extends EventEmitter {
|
|
|
248
267
|
|
|
249
268
|
log(TAG, `transcript (${elapsed}ms): "${text.slice(0, 100)}${text.length > 100 ? "..." : ""}"`);
|
|
250
269
|
|
|
251
|
-
if (data.usage) {
|
|
252
|
-
this.totalTokensConsumed += (data.usage.prompt_tokens || 0) + (data.usage.completion_tokens || 0);
|
|
253
|
-
}
|
|
254
|
-
|
|
255
270
|
const result: TranscriptResult = {
|
|
256
271
|
text,
|
|
257
272
|
source: "openrouter",
|
|
@@ -11,10 +11,10 @@ const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
|
11
11
|
export let loadedEnvPath: string | undefined;
|
|
12
12
|
|
|
13
13
|
function loadDotEnv(): void {
|
|
14
|
-
// Try
|
|
14
|
+
// Try sinain-core/.env first, then project root .env
|
|
15
15
|
const candidates = [
|
|
16
|
-
resolve(__dirname, "..", "..", ".env"),
|
|
17
16
|
resolve(__dirname, "..", ".env"),
|
|
17
|
+
resolve(__dirname, "..", "..", ".env"),
|
|
18
18
|
];
|
|
19
19
|
for (const envPath of candidates) {
|
|
20
20
|
if (!existsSync(envPath)) continue;
|
|
@@ -252,6 +252,7 @@ export function loadConfig(): CoreConfig {
|
|
|
252
252
|
situationMdPath,
|
|
253
253
|
traceEnabled: boolEnv("TRACE_ENABLED", true),
|
|
254
254
|
traceDir: resolvePath(env("TRACE_DIR", resolve(sinainDataDir(), "traces"))),
|
|
255
|
+
costDisplayEnabled: boolEnv("COST_DISPLAY_ENABLED", false),
|
|
255
256
|
learningConfig,
|
|
256
257
|
traitConfig,
|
|
257
258
|
privacyConfig,
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
import type { CostEntry, CostSnapshot } from "../types.js";
|
|
2
|
+
import { log } from "../log.js";
|
|
3
|
+
|
|
4
|
+
const TAG = "cost";
|
|
5
|
+
|
|
6
|
+
export class CostTracker {
|
|
7
|
+
private totalCost = 0;
|
|
8
|
+
private costBySource = new Map<string, number>();
|
|
9
|
+
private costByModel = new Map<string, number>();
|
|
10
|
+
private callCount = 0;
|
|
11
|
+
private startedAt = Date.now();
|
|
12
|
+
private timer: ReturnType<typeof setInterval> | null = null;
|
|
13
|
+
private onCostUpdate: (snapshot: CostSnapshot) => void;
|
|
14
|
+
|
|
15
|
+
constructor(onCostUpdate: (snapshot: CostSnapshot) => void) {
|
|
16
|
+
this.onCostUpdate = onCostUpdate;
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
record(entry: CostEntry): void {
|
|
20
|
+
if (entry.cost <= 0) return;
|
|
21
|
+
this.totalCost += entry.cost;
|
|
22
|
+
this.callCount++;
|
|
23
|
+
this.costBySource.set(
|
|
24
|
+
entry.source,
|
|
25
|
+
(this.costBySource.get(entry.source) || 0) + entry.cost,
|
|
26
|
+
);
|
|
27
|
+
this.costByModel.set(
|
|
28
|
+
entry.model,
|
|
29
|
+
(this.costByModel.get(entry.model) || 0) + entry.cost,
|
|
30
|
+
);
|
|
31
|
+
this.onCostUpdate(this.getSnapshot());
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
getSnapshot(): CostSnapshot {
|
|
35
|
+
return {
|
|
36
|
+
totalCost: this.totalCost,
|
|
37
|
+
costBySource: Object.fromEntries(this.costBySource),
|
|
38
|
+
costByModel: Object.fromEntries(this.costByModel),
|
|
39
|
+
callCount: this.callCount,
|
|
40
|
+
startedAt: this.startedAt,
|
|
41
|
+
};
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
startPeriodicLog(intervalMs: number): void {
|
|
45
|
+
this.timer = setInterval(() => {
|
|
46
|
+
if (this.callCount === 0) return;
|
|
47
|
+
const elapsed = ((Date.now() - this.startedAt) / 60_000).toFixed(1);
|
|
48
|
+
const sources = [...this.costBySource.entries()]
|
|
49
|
+
.map(([k, v]) => `${k}=$${v.toFixed(6)}`)
|
|
50
|
+
.join(" ");
|
|
51
|
+
const models = [...this.costByModel.entries()]
|
|
52
|
+
.map(([k, v]) => `${k}=$${v.toFixed(6)}`)
|
|
53
|
+
.join(" ");
|
|
54
|
+
log(TAG, `$${this.totalCost.toFixed(6)} total (${this.callCount} calls, ${elapsed} min) | ${sources} | ${models}`);
|
|
55
|
+
}, intervalMs);
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
stop(): void {
|
|
59
|
+
if (this.timer) {
|
|
60
|
+
clearInterval(this.timer);
|
|
61
|
+
this.timer = null;
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
}
|
|
@@ -13,14 +13,6 @@ import { isCodingContext, buildEscalationMessage, fetchKnowledgeFacts } from "./
|
|
|
13
13
|
import { loadPendingTasks, savePendingTasks, type PendingTaskEntry } from "../util/task-store.js";
|
|
14
14
|
import { log, warn, error } from "../log.js";
|
|
15
15
|
|
|
16
|
-
/** Context passed to spawn subagents so they can act on the user's current situation. */
|
|
17
|
-
export interface SpawnContext {
|
|
18
|
-
currentApp?: string;
|
|
19
|
-
digest?: string;
|
|
20
|
-
recentAudio?: string;
|
|
21
|
-
recentScreen?: string;
|
|
22
|
-
}
|
|
23
|
-
|
|
24
16
|
export interface HttpPendingEscalation {
|
|
25
17
|
id: string;
|
|
26
18
|
message: string;
|
|
@@ -473,7 +465,7 @@ ${recentLines.join("\n")}`;
|
|
|
473
465
|
* Creates a unique child session key and sends the task directly to the gateway
|
|
474
466
|
* agent RPC — bypassing the main session to avoid dedup/NO_REPLY issues.
|
|
475
467
|
*/
|
|
476
|
-
async dispatchSpawnTask(task: string, label?: string
|
|
468
|
+
async dispatchSpawnTask(task: string, label?: string): Promise<void> {
|
|
477
469
|
// Prevent sibling spawn RPCs from piling up (independent from escalation queue)
|
|
478
470
|
if (this.spawnInFlight) {
|
|
479
471
|
log(TAG, `spawn-task skipped — spawn RPC already in-flight`);
|
|
@@ -493,12 +485,9 @@ ${recentLines.join("\n")}`;
|
|
|
493
485
|
this.lastSpawnFingerprint = fingerprint;
|
|
494
486
|
this.lastSpawnTs = now;
|
|
495
487
|
|
|
496
|
-
// Truncate label to gateway's 64-char limit
|
|
497
|
-
const safeLabel = label?.slice(0, 64);
|
|
498
|
-
|
|
499
488
|
const taskId = `spawn-${Date.now()}`;
|
|
500
489
|
const startedAt = Date.now();
|
|
501
|
-
const labelStr =
|
|
490
|
+
const labelStr = label ? ` (label: "${label}")` : "";
|
|
502
491
|
const idemKey = `spawn-task-${Date.now()}`;
|
|
503
492
|
|
|
504
493
|
// Generate a unique child session key — bypasses the main agent entirely
|
|
@@ -509,11 +498,11 @@ ${recentLines.join("\n")}`;
|
|
|
509
498
|
log(TAG, `dispatching spawn-task${labelStr} → child=${childSessionKey}: "${task.slice(0, 80)}..."`);
|
|
510
499
|
|
|
511
500
|
// ★ Broadcast "spawned" BEFORE the RPC — TSK tab shows ··· immediately
|
|
512
|
-
this.broadcastTaskEvent(taskId, "spawned",
|
|
501
|
+
this.broadcastTaskEvent(taskId, "spawned", label, startedAt);
|
|
513
502
|
|
|
514
503
|
if (!this.wsClient.isConnected) {
|
|
515
504
|
// No OpenClaw gateway — queue for bare agent HTTP polling
|
|
516
|
-
this.spawnHttpPending = { id: taskId, task, label:
|
|
505
|
+
this.spawnHttpPending = { id: taskId, task, label: label || "background-task", ts: startedAt };
|
|
517
506
|
const preview = task.length > 60 ? task.slice(0, 60) + "…" : task;
|
|
518
507
|
this.deps.feedBuffer.push(`🔧 Task queued for agent: ${preview}`, "normal", "system", "stream");
|
|
519
508
|
this.deps.wsHandler.broadcast(`🔧 Task queued for agent: ${preview}`, "normal");
|
|
@@ -521,10 +510,6 @@ ${recentLines.join("\n")}`;
|
|
|
521
510
|
return;
|
|
522
511
|
}
|
|
523
512
|
|
|
524
|
-
// Dynamic timeout: scale with task length (long transcripts need more time)
|
|
525
|
-
// Base 30s + 1s per 200 chars, min 45s, max 180s
|
|
526
|
-
const timeoutMs = Math.min(180_000, Math.max(45_000, Math.ceil(task.length / 200) * 1000 + 30_000));
|
|
527
|
-
|
|
528
513
|
// ★ Set spawnInFlight BEFORE first await — cleared in finally regardless of outcome.
|
|
529
514
|
// Dedicated lane flag: never touches the escalation queue so regular escalations
|
|
530
515
|
// continue unblocked while this spawn RPC is pending.
|
|
@@ -535,11 +520,11 @@ ${recentLines.join("\n")}`;
|
|
|
535
520
|
message: task,
|
|
536
521
|
sessionKey: childSessionKey,
|
|
537
522
|
lane: "subagent",
|
|
538
|
-
extraSystemPrompt: this.buildChildSystemPrompt(
|
|
523
|
+
extraSystemPrompt: this.buildChildSystemPrompt(task, label),
|
|
539
524
|
deliver: false,
|
|
540
525
|
idempotencyKey: idemKey,
|
|
541
|
-
label:
|
|
542
|
-
},
|
|
526
|
+
label: label || undefined,
|
|
527
|
+
}, 45_000, { expectFinal: true });
|
|
543
528
|
|
|
544
529
|
log(TAG, `spawn-task RPC response: ${JSON.stringify(result).slice(0, 500)}`);
|
|
545
530
|
this.stats.totalSpawnResponses++;
|
|
@@ -551,15 +536,15 @@ ${recentLines.join("\n")}`;
|
|
|
551
536
|
if (Array.isArray(payloads) && payloads.length > 0) {
|
|
552
537
|
const output = payloads.map((pl: any) => pl.text || "").join("\n").trim();
|
|
553
538
|
if (output) {
|
|
554
|
-
this.pushResponse(`${
|
|
555
|
-
this.broadcastTaskEvent(taskId, "completed",
|
|
539
|
+
this.pushResponse(`${label || "Background task"}:\n${output}`);
|
|
540
|
+
this.broadcastTaskEvent(taskId, "completed", label, startedAt, output);
|
|
556
541
|
} else {
|
|
557
542
|
log(TAG, `spawn-task: ${payloads.length} payloads but empty text, trying chat.history`);
|
|
558
543
|
const historyText = await this.fetchChildResult(childSessionKey);
|
|
559
|
-
this.broadcastTaskEvent(taskId, "completed",
|
|
544
|
+
this.broadcastTaskEvent(taskId, "completed", label, startedAt,
|
|
560
545
|
historyText || "task completed (no output)");
|
|
561
546
|
if (historyText) {
|
|
562
|
-
this.pushResponse(`${
|
|
547
|
+
this.pushResponse(`${label || "Background task"}:\n${historyText}`);
|
|
563
548
|
}
|
|
564
549
|
}
|
|
565
550
|
} else {
|
|
@@ -567,10 +552,10 @@ ${recentLines.join("\n")}`;
|
|
|
567
552
|
log(TAG, `spawn-task: no payloads, fetching chat.history for child=${childSessionKey}`);
|
|
568
553
|
const historyText = await this.fetchChildResult(childSessionKey);
|
|
569
554
|
if (historyText) {
|
|
570
|
-
this.pushResponse(`${
|
|
571
|
-
this.broadcastTaskEvent(taskId, "completed",
|
|
555
|
+
this.pushResponse(`${label || "Background task"}:\n${historyText}`);
|
|
556
|
+
this.broadcastTaskEvent(taskId, "completed", label, startedAt, historyText);
|
|
572
557
|
} else {
|
|
573
|
-
this.broadcastTaskEvent(taskId, "completed",
|
|
558
|
+
this.broadcastTaskEvent(taskId, "completed", label, startedAt,
|
|
574
559
|
"task completed (no output captured)");
|
|
575
560
|
}
|
|
576
561
|
}
|
|
@@ -579,7 +564,7 @@ ${recentLines.join("\n")}`;
|
|
|
579
564
|
this.pendingSpawnTasks.set(taskId, {
|
|
580
565
|
runId,
|
|
581
566
|
childSessionKey,
|
|
582
|
-
label
|
|
567
|
+
label,
|
|
583
568
|
startedAt,
|
|
584
569
|
pollingEmitted: false,
|
|
585
570
|
});
|
|
@@ -590,43 +575,30 @@ ${recentLines.join("\n")}`;
|
|
|
590
575
|
savePendingTasks(this.pendingSpawnTasks);
|
|
591
576
|
} catch (err: any) {
|
|
592
577
|
error(TAG, `spawn-task failed: ${err.message}`);
|
|
593
|
-
this.broadcastTaskEvent(taskId, "failed",
|
|
578
|
+
this.broadcastTaskEvent(taskId, "failed", label, startedAt);
|
|
594
579
|
} finally {
|
|
595
580
|
this.spawnInFlight = false;
|
|
596
581
|
}
|
|
597
582
|
}
|
|
598
583
|
|
|
599
|
-
/** Build a
|
|
600
|
-
private buildChildSystemPrompt(
|
|
601
|
-
|
|
602
|
-
"#
|
|
584
|
+
/** Build a focused system prompt for the child subagent. */
|
|
585
|
+
private buildChildSystemPrompt(task: string, label?: string): string {
|
|
586
|
+
return [
|
|
587
|
+
"# Subagent Context",
|
|
588
|
+
"",
|
|
589
|
+
"You are a **subagent** spawned for a specific task.",
|
|
603
590
|
"",
|
|
604
|
-
"
|
|
605
|
-
|
|
606
|
-
"
|
|
591
|
+
"## Your Role",
|
|
592
|
+
`- Task: ${task.replace(/\s+/g, " ").trim().slice(0, 500)}`,
|
|
593
|
+
"- Complete this task. That's your entire purpose.",
|
|
607
594
|
"",
|
|
608
595
|
"## Rules",
|
|
609
|
-
"1.
|
|
610
|
-
"2.
|
|
611
|
-
"3.
|
|
612
|
-
"
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
if (context?.currentApp || context?.digest) {
|
|
616
|
-
parts.push("", "## User Context");
|
|
617
|
-
if (context.currentApp) parts.push(`- Current app: ${context.currentApp}`);
|
|
618
|
-
if (context.digest) parts.push(`- Situation: ${context.digest.slice(0, 500)}`);
|
|
619
|
-
}
|
|
620
|
-
|
|
621
|
-
if (context?.recentScreen) {
|
|
622
|
-
parts.push("", "## Recent Screen (OCR, last ~60s)", context.recentScreen);
|
|
623
|
-
}
|
|
624
|
-
|
|
625
|
-
if (context?.recentAudio) {
|
|
626
|
-
parts.push("", "## Recent Audio (last ~60s)", context.recentAudio);
|
|
627
|
-
}
|
|
628
|
-
|
|
629
|
-
return parts.join("\n");
|
|
596
|
+
"1. Stay focused — do your assigned task, nothing else",
|
|
597
|
+
"2. Your final message will be reported to the requester",
|
|
598
|
+
"3. Be concise but informative",
|
|
599
|
+
"",
|
|
600
|
+
label ? `Label: ${label}` : "",
|
|
601
|
+
].filter(Boolean).join("\n");
|
|
630
602
|
}
|
|
631
603
|
|
|
632
604
|
/** Fetch the latest assistant reply from a child session's chat history. */
|