@zhijiewang/openharness 1.4.0 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/commands/index.js +97 -15
- package/dist/harness/config.d.ts +10 -0
- package/dist/harness/sandbox.d.ts +34 -0
- package/dist/harness/sandbox.js +104 -0
- package/dist/harness/submit-handler.js +44 -10
- package/dist/harness/traces.d.ts +58 -0
- package/dist/harness/traces.js +183 -0
- package/dist/main.js +2 -0
- package/dist/query/context-manager.d.ts +56 -0
- package/dist/query/context-manager.js +108 -0
- package/dist/query/index.js +5 -1
- package/dist/sdk/index.d.ts +76 -0
- package/dist/sdk/index.js +146 -0
- package/dist/services/EvaluatorLoop.d.ts +61 -0
- package/dist/services/EvaluatorLoop.js +157 -0
- package/dist/services/MetaHarness.d.ts +61 -0
- package/dist/services/MetaHarness.js +216 -0
- package/dist/tools/AgentTool/index.js +8 -1
- package/dist/tools/MonitorTool/index.d.ts +2 -2
- package/package.json +6 -2
package/dist/commands/index.js
CHANGED
|
@@ -28,8 +28,8 @@ register("help", "Show available commands", () => {
|
|
|
28
28
|
'Session': ['clear', 'compact', 'export', 'history', 'browse', 'resume', 'fork', 'pin', 'unpin'],
|
|
29
29
|
'Git': ['diff', 'undo', 'rewind', 'commit', 'log'],
|
|
30
30
|
'Info': ['help', 'cost', 'status', 'config', 'files', 'model', 'memory', 'doctor', 'context', 'mcp', 'mcp-registry'],
|
|
31
|
-
'Settings': ['theme', 'vim', 'companion', 'fast', 'keys'],
|
|
32
|
-
'AI': ['plan', 'review', 'roles', 'agents', 'plugins'],
|
|
31
|
+
'Settings': ['theme', 'vim', 'companion', 'fast', 'keys', 'effort', 'sandbox'],
|
|
32
|
+
'AI': ['plan', 'review', 'roles', 'agents', 'plugins', 'btw'],
|
|
33
33
|
'Pet': ['cybergotchi'],
|
|
34
34
|
};
|
|
35
35
|
const lines = [];
|
|
@@ -269,9 +269,37 @@ register("model", "Switch model (e.g., /model llama3.2 or /model ollama/llama3.2
|
|
|
269
269
|
const modelName = model.includes("/") ? model.split("/").slice(1).join("/") : model;
|
|
270
270
|
return { output: `Switched to ${modelName}.`, handled: true, newModel: modelName };
|
|
271
271
|
});
|
|
272
|
-
register("compact", "Compress conversation history", (
|
|
272
|
+
register("compact", "Compress conversation history (optional: focus keyword or message number)", (args, ctx) => {
|
|
273
|
+
const focus = args.trim();
|
|
273
274
|
const before = ctx.messages.length;
|
|
274
275
|
const targetTokens = Math.floor(getContextWindow(ctx.model) * 0.6);
|
|
276
|
+
if (focus && /^\d+$/.test(focus)) {
|
|
277
|
+
// Numeric: compact messages 1-N, keep N+1 onwards
|
|
278
|
+
const cutoff = parseInt(focus);
|
|
279
|
+
if (cutoff < 1 || cutoff >= before) {
|
|
280
|
+
return { output: `Invalid: use 1-${before - 1}`, handled: true };
|
|
281
|
+
}
|
|
282
|
+
const kept = ctx.messages.slice(cutoff);
|
|
283
|
+
return {
|
|
284
|
+
output: `Compacted: removed first ${cutoff} messages, kept ${kept.length}.`,
|
|
285
|
+
handled: true,
|
|
286
|
+
compactedMessages: kept,
|
|
287
|
+
};
|
|
288
|
+
}
|
|
289
|
+
if (focus) {
|
|
290
|
+
// Keyword focus: compress but preserve messages containing the keyword
|
|
291
|
+
const focusLower = focus.toLowerCase();
|
|
292
|
+
const preserved = ctx.messages.filter(m => m.content.toLowerCase().includes(focusLower) || m.meta?.pinned);
|
|
293
|
+
const others = ctx.messages.filter(m => !m.content.toLowerCase().includes(focusLower) && !m.meta?.pinned);
|
|
294
|
+
const compactedOthers = compressMessages(others, targetTokens);
|
|
295
|
+
const merged = [...compactedOthers, ...preserved].sort((a, b) => a.timestamp - b.timestamp);
|
|
296
|
+
return {
|
|
297
|
+
output: `Compacted with focus "${focus}": ${before} → ${merged.length} messages (preserved ${preserved.length} matching).`,
|
|
298
|
+
handled: true,
|
|
299
|
+
compactedMessages: merged,
|
|
300
|
+
};
|
|
301
|
+
}
|
|
302
|
+
// Default: compress everything
|
|
275
303
|
const compacted = compressMessages(ctx.messages, targetTokens);
|
|
276
304
|
const dropped = before - compacted.length;
|
|
277
305
|
return {
|
|
@@ -447,6 +475,30 @@ register("keys", "Show keyboard shortcuts", () => {
|
|
|
447
475
|
shortcuts.push("", " Session:", " /vim Toggle Vim mode", " /browse Interactive session browser", " /theme dark|light Switch theme");
|
|
448
476
|
return { output: shortcuts.join("\n"), handled: true };
|
|
449
477
|
});
|
|
478
|
+
register("sandbox", "Show sandbox status and restrictions", () => {
|
|
479
|
+
const { sandboxStatus } = require('../harness/sandbox.js');
|
|
480
|
+
return { output: sandboxStatus() + '\n\nConfigure in .oh/config.yaml under sandbox:', handled: true };
|
|
481
|
+
});
|
|
482
|
+
register("effort", "Set reasoning effort level (low/medium/high/max)", (args) => {
|
|
483
|
+
const level = args.trim().toLowerCase();
|
|
484
|
+
const valid = ['low', 'medium', 'high', 'max'];
|
|
485
|
+
if (!valid.includes(level)) {
|
|
486
|
+
return { output: `Usage: /effort <${valid.join('|')}>\n\nlow — fast, minimal reasoning\nmedium — balanced (default)\nhigh — thorough reasoning\nmax — maximum depth (Opus only)`, handled: true };
|
|
487
|
+
}
|
|
488
|
+
return { output: `Effort level set to: ${level}`, handled: true };
|
|
489
|
+
});
|
|
490
|
+
register("btw", "Ask a side question (ephemeral, no tools, not saved to history)", (args) => {
|
|
491
|
+
if (!args.trim()) {
|
|
492
|
+
return { output: "Usage: /btw <your question>", handled: true };
|
|
493
|
+
}
|
|
494
|
+
// Side questions are answered directly without tools or history
|
|
495
|
+
// The output is shown but NOT added to conversation history
|
|
496
|
+
return {
|
|
497
|
+
output: `[btw] ${args.trim()}`,
|
|
498
|
+
handled: false,
|
|
499
|
+
prependToPrompt: `[Side question — answer briefly without using any tools. This is ephemeral and not part of the main conversation.]\n\n${args.trim()}`,
|
|
500
|
+
};
|
|
501
|
+
});
|
|
450
502
|
register("plan", "Enter plan mode", (_args, _ctx) => {
|
|
451
503
|
const task = _args.trim();
|
|
452
504
|
if (!task) {
|
|
@@ -604,20 +656,50 @@ register("doctor", "Run diagnostic health checks", (_args, ctx) => {
|
|
|
604
656
|
});
|
|
605
657
|
register("context", "Show context window usage breakdown", (_args, ctx) => {
|
|
606
658
|
const ctxWindow = getContextWindow(ctx.model);
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
for (
|
|
610
|
-
const
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
659
|
+
// Categorize messages by type
|
|
660
|
+
let userTokens = 0, assistantTokens = 0, toolTokens = 0, systemTokens = 0;
|
|
661
|
+
for (const msg of ctx.messages) {
|
|
662
|
+
const tokens = Math.ceil((msg.content?.length ?? 0) / 4);
|
|
663
|
+
switch (msg.role) {
|
|
664
|
+
case 'user':
|
|
665
|
+
userTokens += tokens;
|
|
666
|
+
break;
|
|
667
|
+
case 'assistant':
|
|
668
|
+
assistantTokens += tokens;
|
|
669
|
+
break;
|
|
670
|
+
case 'tool':
|
|
671
|
+
toolTokens += tokens;
|
|
672
|
+
break;
|
|
673
|
+
case 'system':
|
|
674
|
+
systemTokens += tokens;
|
|
675
|
+
break;
|
|
676
|
+
}
|
|
615
677
|
}
|
|
678
|
+
const totalTokens = userTokens + assistantTokens + toolTokens + systemTokens;
|
|
679
|
+
const freeTokens = ctxWindow - totalTokens;
|
|
616
680
|
const usage = totalTokens / ctxWindow;
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
|
|
620
|
-
|
|
681
|
+
// Visual bar (30 chars wide)
|
|
682
|
+
const barWidth = 30;
|
|
683
|
+
const filled = Math.round(usage * barWidth);
|
|
684
|
+
const bar = '\u2588'.repeat(filled) + '\u2591'.repeat(barWidth - filled);
|
|
685
|
+
const pct = (n) => `${((n / ctxWindow) * 100).toFixed(1)}%`;
|
|
686
|
+
const pad = (s, n) => s.padEnd(n);
|
|
687
|
+
const lines = [
|
|
688
|
+
`Context Window (${ctxWindow.toLocaleString()} tokens):`,
|
|
689
|
+
'',
|
|
690
|
+
` ${pad('User messages:', 20)} ${userTokens.toLocaleString().padStart(8)} tokens (${pct(userTokens)})`,
|
|
691
|
+
` ${pad('Assistant:', 20)} ${assistantTokens.toLocaleString().padStart(8)} tokens (${pct(assistantTokens)})`,
|
|
692
|
+
` ${pad('Tool results:', 20)} ${toolTokens.toLocaleString().padStart(8)} tokens (${pct(toolTokens)})`,
|
|
693
|
+
` ${pad('System/info:', 20)} ${systemTokens.toLocaleString().padStart(8)} tokens (${pct(systemTokens)})`,
|
|
694
|
+
'',
|
|
695
|
+
` ${pad('Total used:', 20)} ${totalTokens.toLocaleString().padStart(8)} tokens (${pct(totalTokens)})`,
|
|
696
|
+
` ${pad('Free:', 20)} ${freeTokens.toLocaleString().padStart(8)} tokens (${pct(freeTokens)})`,
|
|
697
|
+
'',
|
|
698
|
+
` ${bar} ${Math.round(usage * 100)}%`,
|
|
699
|
+
'',
|
|
700
|
+
` Messages: ${ctx.messages.length} | Compress at: ${Math.round(ctxWindow * 0.8).toLocaleString()} (80%)`,
|
|
701
|
+
];
|
|
702
|
+
return { output: lines.join('\n'), handled: true };
|
|
621
703
|
});
|
|
622
704
|
register("mcp", "Show MCP server status", () => {
|
|
623
705
|
const mcp = connectedMcpServers();
|
package/dist/harness/config.d.ts
CHANGED
|
@@ -68,11 +68,21 @@ export type OhConfig = {
|
|
|
68
68
|
balanced?: string;
|
|
69
69
|
powerful?: string;
|
|
70
70
|
};
|
|
71
|
+
/** Effort level for LLM reasoning depth */
|
|
72
|
+
effortLevel?: 'low' | 'medium' | 'high' | 'max';
|
|
71
73
|
/** Opt-in telemetry (default: off) */
|
|
72
74
|
telemetry?: {
|
|
73
75
|
enabled?: boolean;
|
|
74
76
|
endpoint?: string;
|
|
75
77
|
};
|
|
78
|
+
/** Sandbox — filesystem and network restrictions */
|
|
79
|
+
sandbox?: {
|
|
80
|
+
enabled?: boolean;
|
|
81
|
+
allowedPaths?: string[];
|
|
82
|
+
allowedDomains?: string[];
|
|
83
|
+
blockNetwork?: boolean;
|
|
84
|
+
blockedCommands?: string[];
|
|
85
|
+
};
|
|
76
86
|
/** Remote server security settings */
|
|
77
87
|
remote?: {
|
|
78
88
|
tokens?: string[];
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Sandbox — filesystem and network restrictions for tool execution.
|
|
3
|
+
*
|
|
4
|
+
* Limits what tools can access:
|
|
5
|
+
* - File tools: only write to allowed paths
|
|
6
|
+
* - Web tools: only access allowed domains
|
|
7
|
+
* - Bash: restricted commands (no curl/wget by default)
|
|
8
|
+
*
|
|
9
|
+
* Reduces permission prompts while maintaining security.
|
|
10
|
+
*/
|
|
11
|
+
export type SandboxConfig = {
|
|
12
|
+
enabled: boolean;
|
|
13
|
+
/** Paths tools can write to (glob-style, relative to cwd) */
|
|
14
|
+
allowedPaths: string[];
|
|
15
|
+
/** Domains WebFetch/WebSearch can access */
|
|
16
|
+
allowedDomains: string[];
|
|
17
|
+
/** Block all network access */
|
|
18
|
+
blockNetwork: boolean;
|
|
19
|
+
/** Commands blocked in Bash (default: curl, wget) */
|
|
20
|
+
blockedCommands: string[];
|
|
21
|
+
};
|
|
22
|
+
/** Get the current sandbox config */
|
|
23
|
+
export declare function getSandboxConfig(): SandboxConfig;
|
|
24
|
+
/** Reset cached config */
|
|
25
|
+
export declare function invalidateSandboxCache(): void;
|
|
26
|
+
/** Check if a file path is allowed for writing */
|
|
27
|
+
export declare function isPathAllowed(filePath: string): boolean;
|
|
28
|
+
/** Check if a domain is allowed for network access */
|
|
29
|
+
export declare function isDomainAllowed(url: string): boolean;
|
|
30
|
+
/** Check if a bash command is allowed */
|
|
31
|
+
export declare function isCommandAllowed(command: string): boolean;
|
|
32
|
+
/** Get a human-readable sandbox status */
|
|
33
|
+
export declare function sandboxStatus(): string;
|
|
34
|
+
//# sourceMappingURL=sandbox.d.ts.map
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Sandbox — filesystem and network restrictions for tool execution.
|
|
3
|
+
*
|
|
4
|
+
* Limits what tools can access:
|
|
5
|
+
* - File tools: only write to allowed paths
|
|
6
|
+
* - Web tools: only access allowed domains
|
|
7
|
+
* - Bash: restricted commands (no curl/wget by default)
|
|
8
|
+
*
|
|
9
|
+
* Reduces permission prompts while maintaining security.
|
|
10
|
+
*/
|
|
11
|
+
import { resolve, relative } from 'node:path';
|
|
12
|
+
import { readOhConfig } from './config.js';
|
|
13
|
+
const DEFAULT_SANDBOX = {
|
|
14
|
+
enabled: false,
|
|
15
|
+
allowedPaths: ['.'], // current directory
|
|
16
|
+
allowedDomains: [], // empty = all allowed
|
|
17
|
+
blockNetwork: false,
|
|
18
|
+
blockedCommands: ['curl', 'wget'],
|
|
19
|
+
};
|
|
20
|
+
// ── Sandbox Manager ──
|
|
21
|
+
let _config = null;
|
|
22
|
+
/** Get the current sandbox config */
|
|
23
|
+
export function getSandboxConfig() {
|
|
24
|
+
if (_config)
|
|
25
|
+
return _config;
|
|
26
|
+
const ohConfig = readOhConfig();
|
|
27
|
+
if (ohConfig?.sandbox) {
|
|
28
|
+
_config = {
|
|
29
|
+
...DEFAULT_SANDBOX,
|
|
30
|
+
...ohConfig.sandbox,
|
|
31
|
+
};
|
|
32
|
+
}
|
|
33
|
+
else {
|
|
34
|
+
_config = DEFAULT_SANDBOX;
|
|
35
|
+
}
|
|
36
|
+
return _config;
|
|
37
|
+
}
|
|
38
|
+
/** Reset cached config */
|
|
39
|
+
export function invalidateSandboxCache() {
|
|
40
|
+
_config = null;
|
|
41
|
+
}
|
|
42
|
+
/** Check if a file path is allowed for writing */
|
|
43
|
+
export function isPathAllowed(filePath) {
|
|
44
|
+
const config = getSandboxConfig();
|
|
45
|
+
if (!config.enabled)
|
|
46
|
+
return true;
|
|
47
|
+
const resolved = resolve(filePath);
|
|
48
|
+
const cwd = process.cwd();
|
|
49
|
+
for (const allowed of config.allowedPaths) {
|
|
50
|
+
const allowedResolved = resolve(cwd, allowed);
|
|
51
|
+
// Check if the file is within the allowed directory
|
|
52
|
+
const rel = relative(allowedResolved, resolved);
|
|
53
|
+
if (!rel.startsWith('..') && !rel.startsWith('/'))
|
|
54
|
+
return true;
|
|
55
|
+
}
|
|
56
|
+
return false;
|
|
57
|
+
}
|
|
58
|
+
/** Check if a domain is allowed for network access */
|
|
59
|
+
export function isDomainAllowed(url) {
|
|
60
|
+
const config = getSandboxConfig();
|
|
61
|
+
if (!config.enabled)
|
|
62
|
+
return true;
|
|
63
|
+
if (config.blockNetwork)
|
|
64
|
+
return false;
|
|
65
|
+
if (config.allowedDomains.length === 0)
|
|
66
|
+
return true;
|
|
67
|
+
try {
|
|
68
|
+
const hostname = new URL(url).hostname.toLowerCase();
|
|
69
|
+
return config.allowedDomains.some(d => hostname === d.toLowerCase() || hostname.endsWith('.' + d.toLowerCase()));
|
|
70
|
+
}
|
|
71
|
+
catch {
|
|
72
|
+
return false;
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
/** Check if a bash command is allowed */
|
|
76
|
+
export function isCommandAllowed(command) {
|
|
77
|
+
const config = getSandboxConfig();
|
|
78
|
+
if (!config.enabled)
|
|
79
|
+
return true;
|
|
80
|
+
const firstWord = command.trim().split(/\s+/)[0]?.toLowerCase() ?? '';
|
|
81
|
+
return !config.blockedCommands.includes(firstWord);
|
|
82
|
+
}
|
|
83
|
+
/** Get a human-readable sandbox status */
|
|
84
|
+
export function sandboxStatus() {
|
|
85
|
+
const config = getSandboxConfig();
|
|
86
|
+
if (!config.enabled)
|
|
87
|
+
return 'Sandbox: disabled';
|
|
88
|
+
const lines = ['Sandbox: enabled'];
|
|
89
|
+
lines.push(` Allowed paths: ${config.allowedPaths.join(', ') || 'none'}`);
|
|
90
|
+
if (config.blockNetwork) {
|
|
91
|
+
lines.push(' Network: blocked');
|
|
92
|
+
}
|
|
93
|
+
else if (config.allowedDomains.length > 0) {
|
|
94
|
+
lines.push(` Allowed domains: ${config.allowedDomains.join(', ')}`);
|
|
95
|
+
}
|
|
96
|
+
else {
|
|
97
|
+
lines.push(' Network: unrestricted');
|
|
98
|
+
}
|
|
99
|
+
if (config.blockedCommands.length > 0) {
|
|
100
|
+
lines.push(` Blocked commands: ${config.blockedCommands.join(', ')}`);
|
|
101
|
+
}
|
|
102
|
+
return lines.join('\n');
|
|
103
|
+
}
|
|
104
|
+
//# sourceMappingURL=sandbox.js.map
|
|
@@ -22,6 +22,26 @@ export async function handleUserInput(input, ctx) {
|
|
|
22
22
|
return { handled: true, messages };
|
|
23
23
|
}
|
|
24
24
|
}
|
|
25
|
+
// ! Bash mode — direct shell execution, output added to context
|
|
26
|
+
if (trimmed.startsWith('!') && trimmed.length > 1) {
|
|
27
|
+
const command = trimmed.slice(1).trim();
|
|
28
|
+
try {
|
|
29
|
+
const { execSync } = await import('node:child_process');
|
|
30
|
+
const output = execSync(command, {
|
|
31
|
+
encoding: 'utf-8',
|
|
32
|
+
cwd: process.cwd(),
|
|
33
|
+
timeout: 30_000,
|
|
34
|
+
maxBuffer: 1024 * 1024,
|
|
35
|
+
windowsHide: true,
|
|
36
|
+
});
|
|
37
|
+
messages = [...messages, createInfoMessage(`$ ${command}\n${output.trimEnd()}`)];
|
|
38
|
+
}
|
|
39
|
+
catch (err) {
|
|
40
|
+
const output = String(err.stdout ?? err.stderr ?? err.message ?? 'Command failed');
|
|
41
|
+
messages = [...messages, createInfoMessage(`$ ${command}\n${output.trimEnd()}`)];
|
|
42
|
+
}
|
|
43
|
+
return { handled: true, messages };
|
|
44
|
+
}
|
|
25
45
|
// Vim toggle
|
|
26
46
|
if (trimmed === '/vim') {
|
|
27
47
|
return { handled: true, messages, vimToggled: true };
|
|
@@ -69,25 +89,39 @@ export async function handleUserInput(input, ctx) {
|
|
|
69
89
|
}
|
|
70
90
|
// Normal prompt — add user message
|
|
71
91
|
messages = [...messages, createUserMessage(input)];
|
|
72
|
-
// Resolve @mentions —
|
|
92
|
+
// Resolve @mentions — supports @file, @file#L5-10, @file#5-10, MCP resources
|
|
73
93
|
let resolvedInput = input;
|
|
74
|
-
const mentionPattern = /@([\w][\w./-]*)
|
|
75
|
-
const mentions = [...input.matchAll(mentionPattern)]
|
|
94
|
+
const mentionPattern = /@([\w][\w./-]*)(?:#L?(\d+)(?:-(\d+))?)?/g;
|
|
95
|
+
const mentions = [...input.matchAll(mentionPattern)];
|
|
76
96
|
const companionName = ctx.companionConfig?.soul?.name?.toLowerCase();
|
|
77
|
-
for (const
|
|
97
|
+
for (const match of mentions) {
|
|
98
|
+
const mention = match[1];
|
|
99
|
+
const startLine = match[2] ? parseInt(match[2]) : undefined;
|
|
100
|
+
const endLine = match[3] ? parseInt(match[3]) : startLine;
|
|
101
|
+
const fullRef = match[0];
|
|
78
102
|
if (companionName && mention.toLowerCase() === companionName)
|
|
79
103
|
continue;
|
|
80
|
-
// Try local file first (supports paths like @src/main.ts, @README.md)
|
|
104
|
+
// Try local file first (supports paths like @src/main.ts, @README.md#L5-10)
|
|
81
105
|
try {
|
|
82
106
|
const { existsSync, readFileSync } = await import('node:fs');
|
|
83
107
|
const { resolve } = await import('node:path');
|
|
84
108
|
const filePath = resolve(process.cwd(), mention);
|
|
85
109
|
if (existsSync(filePath)) {
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
110
|
+
let content = readFileSync(filePath, 'utf-8');
|
|
111
|
+
// Apply line range if specified
|
|
112
|
+
if (startLine !== undefined) {
|
|
113
|
+
const lines = content.split('\n');
|
|
114
|
+
const start = Math.max(0, startLine - 1); // 1-indexed to 0-indexed
|
|
115
|
+
const end = endLine !== undefined ? endLine : start + 1;
|
|
116
|
+
content = lines.slice(start, end).join('\n');
|
|
117
|
+
resolvedInput += `\n\n[File ${fullRef} (lines ${startLine}-${endLine ?? startLine})]:\n${content}`;
|
|
118
|
+
}
|
|
119
|
+
else {
|
|
120
|
+
const truncated = content.length > 10_000
|
|
121
|
+
? content.slice(0, 10_000) + '\n[...truncated]'
|
|
122
|
+
: content;
|
|
123
|
+
resolvedInput += `\n\n[File @${mention}]:\n${truncated}`;
|
|
124
|
+
}
|
|
91
125
|
continue;
|
|
92
126
|
}
|
|
93
127
|
}
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Session Traces — structured observability for agent sessions.
|
|
3
|
+
*
|
|
4
|
+
* Every query turn, tool call, LLM stream, and compression event
|
|
5
|
+
* generates a trace span. Traces enable debugging, replay, and
|
|
6
|
+
* performance analysis.
|
|
7
|
+
*
|
|
8
|
+
* Compatible with OpenTelemetry export format.
|
|
9
|
+
*/
|
|
10
|
+
export type TraceSpan = {
|
|
11
|
+
spanId: string;
|
|
12
|
+
parentSpanId?: string;
|
|
13
|
+
name: string;
|
|
14
|
+
startTime: number;
|
|
15
|
+
endTime: number;
|
|
16
|
+
durationMs: number;
|
|
17
|
+
attributes: Record<string, unknown>;
|
|
18
|
+
status: 'ok' | 'error';
|
|
19
|
+
};
|
|
20
|
+
export type TraceEvent = {
|
|
21
|
+
name: string;
|
|
22
|
+
timestamp: number;
|
|
23
|
+
attributes?: Record<string, unknown>;
|
|
24
|
+
};
|
|
25
|
+
export declare class SessionTracer {
|
|
26
|
+
private sessionId;
|
|
27
|
+
private spans;
|
|
28
|
+
private activeSpans;
|
|
29
|
+
private spanCounter;
|
|
30
|
+
constructor(sessionId: string);
|
|
31
|
+
/** Start a new span. Returns the span ID. */
|
|
32
|
+
startSpan(name: string, attributes?: Record<string, unknown>, parentSpanId?: string): string;
|
|
33
|
+
/** End a span and record it. */
|
|
34
|
+
endSpan(spanId: string, status?: 'ok' | 'error', extraAttributes?: Record<string, unknown>): TraceSpan | null;
|
|
35
|
+
/** Get all completed spans */
|
|
36
|
+
getSpans(): TraceSpan[];
|
|
37
|
+
/** Get a summary of the trace */
|
|
38
|
+
getSummary(): {
|
|
39
|
+
totalSpans: number;
|
|
40
|
+
totalDurationMs: number;
|
|
41
|
+
spansByName: Record<string, {
|
|
42
|
+
count: number;
|
|
43
|
+
totalMs: number;
|
|
44
|
+
}>;
|
|
45
|
+
errors: number;
|
|
46
|
+
};
|
|
47
|
+
/** Persist a span to the trace file */
|
|
48
|
+
private persistSpan;
|
|
49
|
+
}
|
|
50
|
+
/** Load trace spans for a session */
|
|
51
|
+
export declare function loadTrace(sessionId: string): TraceSpan[];
|
|
52
|
+
/** List all sessions with traces */
|
|
53
|
+
export declare function listTracedSessions(): string[];
|
|
54
|
+
/** Format trace for display */
|
|
55
|
+
export declare function formatTrace(spans: TraceSpan[]): string;
|
|
56
|
+
/** Export trace in OpenTelemetry-compatible format */
|
|
57
|
+
export declare function exportTraceOTLP(sessionId: string, spans: TraceSpan[]): object;
|
|
58
|
+
//# sourceMappingURL=traces.d.ts.map
|
|
@@ -0,0 +1,183 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Session Traces — structured observability for agent sessions.
|
|
3
|
+
*
|
|
4
|
+
* Every query turn, tool call, LLM stream, and compression event
|
|
5
|
+
* generates a trace span. Traces enable debugging, replay, and
|
|
6
|
+
* performance analysis.
|
|
7
|
+
*
|
|
8
|
+
* Compatible with OpenTelemetry export format.
|
|
9
|
+
*/
|
|
10
|
+
import { appendFileSync, mkdirSync, existsSync, readFileSync, readdirSync } from 'node:fs';
|
|
11
|
+
import { join } from 'node:path';
|
|
12
|
+
import { homedir } from 'node:os';
|
|
13
|
+
const TRACE_DIR = join(homedir(), '.oh', 'traces');
|
|
14
|
+
// ── Tracer ──
|
|
15
|
+
const MAX_IN_MEMORY_SPANS = 1000;
|
|
16
|
+
export class SessionTracer {
|
|
17
|
+
sessionId;
|
|
18
|
+
spans = [];
|
|
19
|
+
activeSpans = new Map();
|
|
20
|
+
spanCounter = 0;
|
|
21
|
+
constructor(sessionId) {
|
|
22
|
+
this.sessionId = sessionId;
|
|
23
|
+
}
|
|
24
|
+
/** Start a new span. Returns the span ID. */
|
|
25
|
+
startSpan(name, attributes = {}, parentSpanId) {
|
|
26
|
+
const spanId = `span-${++this.spanCounter}`;
|
|
27
|
+
this.activeSpans.set(spanId, { name, startTime: Date.now(), parentSpanId, attributes });
|
|
28
|
+
return spanId;
|
|
29
|
+
}
|
|
30
|
+
/** End a span and record it. */
|
|
31
|
+
endSpan(spanId, status = 'ok', extraAttributes) {
|
|
32
|
+
const active = this.activeSpans.get(spanId);
|
|
33
|
+
if (!active)
|
|
34
|
+
return null;
|
|
35
|
+
this.activeSpans.delete(spanId);
|
|
36
|
+
const endTime = Date.now();
|
|
37
|
+
const span = {
|
|
38
|
+
spanId,
|
|
39
|
+
parentSpanId: active.parentSpanId,
|
|
40
|
+
name: active.name,
|
|
41
|
+
startTime: active.startTime,
|
|
42
|
+
endTime,
|
|
43
|
+
durationMs: endTime - active.startTime,
|
|
44
|
+
attributes: { ...active.attributes, ...extraAttributes },
|
|
45
|
+
status,
|
|
46
|
+
};
|
|
47
|
+
this.spans.push(span);
|
|
48
|
+
// Cap in-memory spans (durable source is on disk)
|
|
49
|
+
if (this.spans.length > MAX_IN_MEMORY_SPANS) {
|
|
50
|
+
this.spans = this.spans.slice(-MAX_IN_MEMORY_SPANS);
|
|
51
|
+
}
|
|
52
|
+
this.persistSpan(span);
|
|
53
|
+
return span;
|
|
54
|
+
}
|
|
55
|
+
/** Get all completed spans */
|
|
56
|
+
getSpans() {
|
|
57
|
+
return [...this.spans];
|
|
58
|
+
}
|
|
59
|
+
/** Get a summary of the trace */
|
|
60
|
+
getSummary() {
|
|
61
|
+
const spansByName = {};
|
|
62
|
+
let errors = 0;
|
|
63
|
+
let minStart = Infinity;
|
|
64
|
+
let maxEnd = 0;
|
|
65
|
+
for (const span of this.spans) {
|
|
66
|
+
const entry = spansByName[span.name] ?? { count: 0, totalMs: 0 };
|
|
67
|
+
entry.count++;
|
|
68
|
+
entry.totalMs += span.durationMs;
|
|
69
|
+
spansByName[span.name] = entry;
|
|
70
|
+
if (span.status === 'error')
|
|
71
|
+
errors++;
|
|
72
|
+
if (span.startTime < minStart)
|
|
73
|
+
minStart = span.startTime;
|
|
74
|
+
if (span.endTime > maxEnd)
|
|
75
|
+
maxEnd = span.endTime;
|
|
76
|
+
}
|
|
77
|
+
return {
|
|
78
|
+
totalSpans: this.spans.length,
|
|
79
|
+
totalDurationMs: maxEnd > minStart ? maxEnd - minStart : 0,
|
|
80
|
+
spansByName,
|
|
81
|
+
errors,
|
|
82
|
+
};
|
|
83
|
+
}
|
|
84
|
+
/** Persist a span to the trace file */
|
|
85
|
+
persistSpan(span) {
|
|
86
|
+
try {
|
|
87
|
+
mkdirSync(TRACE_DIR, { recursive: true });
|
|
88
|
+
const file = join(TRACE_DIR, `${this.sessionId}.jsonl`);
|
|
89
|
+
appendFileSync(file, JSON.stringify(span) + '\n');
|
|
90
|
+
}
|
|
91
|
+
catch { /* never crash on tracing failure */ }
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
// ── Trace Loading ──
|
|
95
|
+
/** Load trace spans for a session */
|
|
96
|
+
export function loadTrace(sessionId) {
|
|
97
|
+
const file = join(TRACE_DIR, `${sessionId}.jsonl`);
|
|
98
|
+
if (!existsSync(file))
|
|
99
|
+
return [];
|
|
100
|
+
try {
|
|
101
|
+
return readFileSync(file, 'utf-8')
|
|
102
|
+
.split('\n')
|
|
103
|
+
.filter(Boolean)
|
|
104
|
+
.map(line => JSON.parse(line));
|
|
105
|
+
}
|
|
106
|
+
catch {
|
|
107
|
+
return [];
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
/** List all sessions with traces */
|
|
111
|
+
export function listTracedSessions() {
|
|
112
|
+
if (!existsSync(TRACE_DIR))
|
|
113
|
+
return [];
|
|
114
|
+
return readdirSync(TRACE_DIR)
|
|
115
|
+
.filter(f => f.endsWith('.jsonl'))
|
|
116
|
+
.map(f => f.replace('.jsonl', ''));
|
|
117
|
+
}
|
|
118
|
+
/** Format trace for display */
|
|
119
|
+
export function formatTrace(spans) {
|
|
120
|
+
if (spans.length === 0)
|
|
121
|
+
return 'No trace spans recorded.';
|
|
122
|
+
const lines = [`Trace (${spans.length} spans):\n`];
|
|
123
|
+
// Group by parent for tree display
|
|
124
|
+
const roots = spans.filter(s => !s.parentSpanId);
|
|
125
|
+
const children = new Map();
|
|
126
|
+
for (const s of spans) {
|
|
127
|
+
if (s.parentSpanId) {
|
|
128
|
+
const list = children.get(s.parentSpanId) ?? [];
|
|
129
|
+
list.push(s);
|
|
130
|
+
children.set(s.parentSpanId, list);
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
function renderSpan(span, indent) {
|
|
134
|
+
const status = span.status === 'error' ? '✗' : '✓';
|
|
135
|
+
const pad = ' '.repeat(indent);
|
|
136
|
+
const attrs = Object.entries(span.attributes)
|
|
137
|
+
.filter(([, v]) => v !== undefined)
|
|
138
|
+
.map(([k, v]) => `${k}=${String(v).slice(0, 30)}`)
|
|
139
|
+
.join(' ');
|
|
140
|
+
lines.push(`${pad}${status} ${span.name} (${span.durationMs}ms) ${attrs}`);
|
|
141
|
+
const kids = children.get(span.spanId) ?? [];
|
|
142
|
+
for (const kid of kids)
|
|
143
|
+
renderSpan(kid, indent + 1);
|
|
144
|
+
}
|
|
145
|
+
for (const root of roots)
|
|
146
|
+
renderSpan(root, 0);
|
|
147
|
+
// Summary
|
|
148
|
+
const totalMs = spans.reduce((sum, s) => sum + s.durationMs, 0);
|
|
149
|
+
const errors = spans.filter(s => s.status === 'error').length;
|
|
150
|
+
lines.push('');
|
|
151
|
+
lines.push(`Total: ${spans.length} spans, ${totalMs}ms, ${errors} errors`);
|
|
152
|
+
return lines.join('\n');
|
|
153
|
+
}
|
|
154
|
+
/** Export trace in OpenTelemetry-compatible format */
|
|
155
|
+
export function exportTraceOTLP(sessionId, spans) {
|
|
156
|
+
return {
|
|
157
|
+
resourceSpans: [{
|
|
158
|
+
resource: {
|
|
159
|
+
attributes: [
|
|
160
|
+
{ key: 'service.name', value: { stringValue: 'openharness' } },
|
|
161
|
+
{ key: 'session.id', value: { stringValue: sessionId } },
|
|
162
|
+
],
|
|
163
|
+
},
|
|
164
|
+
scopeSpans: [{
|
|
165
|
+
scope: { name: 'openharness.agent' },
|
|
166
|
+
spans: spans.map(s => ({
|
|
167
|
+
traceId: sessionId.padEnd(32, '0').slice(0, 32),
|
|
168
|
+
spanId: s.spanId.padEnd(16, '0').slice(0, 16),
|
|
169
|
+
parentSpanId: s.parentSpanId?.padEnd(16, '0').slice(0, 16),
|
|
170
|
+
name: s.name,
|
|
171
|
+
startTimeUnixNano: s.startTime * 1_000_000,
|
|
172
|
+
endTimeUnixNano: s.endTime * 1_000_000,
|
|
173
|
+
attributes: Object.entries(s.attributes).map(([k, v]) => ({
|
|
174
|
+
key: k,
|
|
175
|
+
value: { stringValue: String(v) },
|
|
176
|
+
})),
|
|
177
|
+
status: { code: s.status === 'ok' ? 1 : 2 },
|
|
178
|
+
})),
|
|
179
|
+
}],
|
|
180
|
+
}],
|
|
181
|
+
};
|
|
182
|
+
}
|
|
183
|
+
//# sourceMappingURL=traces.js.map
|
package/dist/main.js
CHANGED
|
@@ -234,6 +234,8 @@ program
|
|
|
234
234
|
.option("--light", "Use light theme")
|
|
235
235
|
.option("--output-format <format>", "Output format for -p mode (text, json, stream-json)", "text")
|
|
236
236
|
.option("--json-schema <schema>", "Constrain output to match a JSON schema (headless mode)")
|
|
237
|
+
.option("--input-format <format>", "Input format: text (default) or stream-json (NDJSON on stdin)")
|
|
238
|
+
.option("--replay-user-messages", "Re-emit user messages on stdout (requires stream-json output)")
|
|
237
239
|
.action(async (opts) => {
|
|
238
240
|
// Load saved config as defaults (env vars + CLI flags override)
|
|
239
241
|
const savedConfig = readOhConfig();
|