osborn 0.5.2 → 0.5.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/settings.local.json +9 -0
- package/.claude/skills/markdown-to-pdf/SKILL.md +29 -0
- package/.claude/skills/pdf-to-markdown/SKILL.md +28 -0
- package/.claude/skills/playwright-browser/SKILL.md +75 -0
- package/.claude/skills/youtube-transcript/SKILL.md +24 -0
- package/dist/claude-llm.d.ts +29 -1
- package/dist/claude-llm.js +346 -79
- package/dist/config.d.ts +6 -2
- package/dist/config.js +6 -1
- package/dist/fast-brain.d.ts +124 -12
- package/dist/fast-brain.js +1361 -96
- package/dist/index-3-2-26-legacy.d.ts +1 -0
- package/dist/index-3-2-26-legacy.js +2233 -0
- package/dist/index.js +889 -394
- package/dist/jsonl-search.d.ts +66 -0
- package/dist/jsonl-search.js +274 -0
- package/dist/leagcyprompts2.d.ts +0 -0
- package/dist/leagcyprompts2.js +573 -0
- package/dist/pipeline-direct-llm.d.ts +77 -0
- package/dist/pipeline-direct-llm.js +216 -0
- package/dist/pipeline-fastbrain.d.ts +45 -0
- package/dist/pipeline-fastbrain.js +367 -0
- package/dist/prompts-2-25-26.d.ts +0 -0
- package/dist/prompts-2-25-26.js +518 -0
- package/dist/prompts-3-2-26.d.ts +78 -0
- package/dist/prompts-3-2-26.js +1319 -0
- package/dist/prompts.d.ts +83 -8
- package/dist/prompts.js +1990 -374
- package/dist/session-access.d.ts +60 -2
- package/dist/session-access.js +172 -2
- package/dist/summary-index.d.ts +87 -0
- package/dist/summary-index.js +570 -0
- package/dist/turn-detector-shim.d.ts +24 -0
- package/dist/turn-detector-shim.js +83 -0
- package/dist/voice-io.d.ts +9 -3
- package/dist/voice-io.js +39 -20
- package/package.json +18 -11
|
@@ -0,0 +1,570 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* summary-index.ts — Builds a compact searchable summary of Claude JSONL sessions
|
|
3
|
+
*
|
|
4
|
+
* Instead of ripgrepping 80MB raw JSONL, we extract one-line summaries per message
|
|
5
|
+
* into a ~1MB plain text file. Ripgrep searches this in <5ms.
|
|
6
|
+
*
|
|
7
|
+
* Format: {lineNum}|{timestamp}|{source}|{msgType}|{summary}
|
|
8
|
+
*
|
|
9
|
+
* No LLM calls — pure heuristic extraction:
|
|
10
|
+
* tool_use → tool name + key params (file path, command, query)
|
|
11
|
+
* tool_result → tool name + first 80 chars of output
|
|
12
|
+
* user → raw text (already short from voice)
|
|
13
|
+
* assistant → first 500 chars of text
|
|
14
|
+
*
|
|
15
|
+
* Per-session index stored at: .osborn/sessions/{sessionId}/.index/search-index.txt
|
|
16
|
+
*/
|
|
17
|
+
import { readFileSync, writeFileSync, appendFileSync, existsSync, statSync, openSync, readSync, closeSync, mkdirSync } from 'fs';
|
|
18
|
+
import { join, basename, dirname } from 'path';
|
|
19
|
+
import { getSessionPaths, getSessionSubAgents } from './session-access.js';
|
|
20
|
+
// ============================================================
|
|
21
|
+
// SUMMARY EXTRACTION (pure heuristic — no LLM)
|
|
22
|
+
// ============================================================
|
|
23
|
+
function extractSummary(raw, lineNum, byteOffset, source) {
|
|
24
|
+
try {
|
|
25
|
+
// Skip non-indexable types
|
|
26
|
+
if (raw.isMeta)
|
|
27
|
+
return null;
|
|
28
|
+
const type = raw.type;
|
|
29
|
+
if (!type)
|
|
30
|
+
return null;
|
|
31
|
+
if (type === 'queue-operation' || type === 'file-history-snapshot' || type === 'system')
|
|
32
|
+
return null;
|
|
33
|
+
const ts = raw.timestamp ? new Date(raw.timestamp).toISOString().substring(0, 19) : '';
|
|
34
|
+
// ── user message (regular text) ──
|
|
35
|
+
if (type === 'user') {
|
|
36
|
+
const content = raw.message?.content;
|
|
37
|
+
if (!Array.isArray(content))
|
|
38
|
+
return null;
|
|
39
|
+
// tool_result (user-type wrapper)
|
|
40
|
+
if (content[0]?.type === 'tool_result') {
|
|
41
|
+
const tr = content[0];
|
|
42
|
+
const resultText = typeof tr.content === 'string'
|
|
43
|
+
? tr.content
|
|
44
|
+
: Array.isArray(tr.content)
|
|
45
|
+
? tr.content.filter((b) => b.type === 'text').map((b) => b.text).join(' ')
|
|
46
|
+
: '';
|
|
47
|
+
// Resolve tool name from toolUseResult if available
|
|
48
|
+
const toolName = raw.toolUseResult?.name || '';
|
|
49
|
+
const summary = toolName
|
|
50
|
+
? `${toolName}: ${clean(resultText, 400)}`
|
|
51
|
+
: `tool_result: ${clean(resultText, 400)}`;
|
|
52
|
+
return { lineNum, byteOffset, timestamp: ts, source, msgType: 'tool_result', summary };
|
|
53
|
+
}
|
|
54
|
+
// Regular user text
|
|
55
|
+
const texts = [];
|
|
56
|
+
for (const block of content) {
|
|
57
|
+
if (block?.type === 'text' && block.text)
|
|
58
|
+
texts.push(block.text);
|
|
59
|
+
}
|
|
60
|
+
if (texts.length === 0)
|
|
61
|
+
return null;
|
|
62
|
+
return { lineNum, byteOffset, timestamp: ts, source, msgType: 'user', summary: clean(texts.join(' '), 500) };
|
|
63
|
+
}
|
|
64
|
+
// ── assistant message ──
|
|
65
|
+
if (type === 'assistant') {
|
|
66
|
+
const content = raw.message?.content;
|
|
67
|
+
if (!Array.isArray(content))
|
|
68
|
+
return null;
|
|
69
|
+
const entries = [];
|
|
70
|
+
for (const block of content) {
|
|
71
|
+
if (block?.type === 'text' && block.text?.trim()) {
|
|
72
|
+
entries.push({ lineNum, byteOffset, timestamp: ts, source, msgType: 'assistant', summary: clean(block.text, 500) });
|
|
73
|
+
}
|
|
74
|
+
if (block?.type === 'thinking' && block.thinking?.trim()) {
|
|
75
|
+
entries.push({ lineNum, byteOffset, timestamp: ts, source, msgType: 'thinking', summary: clean(block.thinking, 500) });
|
|
76
|
+
}
|
|
77
|
+
if (block?.type === 'tool_use') {
|
|
78
|
+
entries.push({ lineNum, byteOffset, timestamp: ts, source, msgType: 'tool_use', summary: summarizeTool(block.name, block.input) });
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
// Return first entry (caller handles multi-entry via extractAllSummaries)
|
|
82
|
+
return entries[0] || null;
|
|
83
|
+
}
|
|
84
|
+
// ── progress (skip) ──
|
|
85
|
+
if (type === 'progress')
|
|
86
|
+
return null;
|
|
87
|
+
return null;
|
|
88
|
+
}
|
|
89
|
+
catch {
|
|
90
|
+
return null;
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
/**
|
|
94
|
+
* Extract ALL entries from a single JSONL line (assistant messages can have
|
|
95
|
+
* text + thinking + multiple tool_use blocks).
|
|
96
|
+
*/
|
|
97
|
+
function extractAllSummaries(raw, lineNum, byteOffset, source) {
|
|
98
|
+
try {
|
|
99
|
+
if (raw.isMeta)
|
|
100
|
+
return [];
|
|
101
|
+
const type = raw.type;
|
|
102
|
+
if (!type || type === 'queue-operation' || type === 'file-history-snapshot' || type === 'system' || type === 'progress')
|
|
103
|
+
return [];
|
|
104
|
+
const ts = raw.timestamp ? new Date(raw.timestamp).toISOString().substring(0, 19) : '';
|
|
105
|
+
// user message
|
|
106
|
+
if (type === 'user') {
|
|
107
|
+
const single = extractSummary(raw, lineNum, byteOffset, source);
|
|
108
|
+
return single ? [single] : [];
|
|
109
|
+
}
|
|
110
|
+
// assistant message — can have multiple blocks
|
|
111
|
+
if (type === 'assistant') {
|
|
112
|
+
const content = raw.message?.content;
|
|
113
|
+
if (!Array.isArray(content))
|
|
114
|
+
return [];
|
|
115
|
+
const entries = [];
|
|
116
|
+
for (const block of content) {
|
|
117
|
+
if (block?.type === 'text' && block.text?.trim()) {
|
|
118
|
+
entries.push({ lineNum, byteOffset, timestamp: ts, source, msgType: 'assistant', summary: clean(block.text, 500) });
|
|
119
|
+
}
|
|
120
|
+
if (block?.type === 'thinking' && block.thinking?.trim()) {
|
|
121
|
+
entries.push({ lineNum, byteOffset, timestamp: ts, source, msgType: 'thinking', summary: clean(block.thinking, 500) });
|
|
122
|
+
}
|
|
123
|
+
if (block?.type === 'tool_use') {
|
|
124
|
+
entries.push({ lineNum, byteOffset, timestamp: ts, source, msgType: 'tool_use', summary: summarizeTool(block.name, block.input) });
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
return entries;
|
|
128
|
+
}
|
|
129
|
+
return [];
|
|
130
|
+
}
|
|
131
|
+
catch {
|
|
132
|
+
return [];
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
/** Summarize a tool_use block into a compact one-liner */
|
|
136
|
+
function summarizeTool(name, input) {
|
|
137
|
+
if (!input)
|
|
138
|
+
return name;
|
|
139
|
+
switch (name) {
|
|
140
|
+
case 'Read':
|
|
141
|
+
return `Read path="${input.file_path || ''}"${input.offset ? ` offset=${input.offset}` : ''}`;
|
|
142
|
+
case 'Write':
|
|
143
|
+
return `Write path="${input.file_path || ''}" ${clean(input.content || '', 100)}`;
|
|
144
|
+
case 'Edit':
|
|
145
|
+
return `Edit path="${input.file_path || ''}" old="${clean(input.old_string || '', 60)}"`;
|
|
146
|
+
case 'Grep':
|
|
147
|
+
return `Grep pattern="${input.pattern || ''}" path="${input.path || ''}"`;
|
|
148
|
+
case 'Glob':
|
|
149
|
+
return `Glob pattern="${input.pattern || ''}"${input.path ? ` path="${input.path}"` : ''}`;
|
|
150
|
+
case 'Bash':
|
|
151
|
+
return `Bash cmd="${clean(input.command || '', 200)}"`;
|
|
152
|
+
case 'WebSearch':
|
|
153
|
+
return `WebSearch query="${input.query || ''}"`;
|
|
154
|
+
case 'WebFetch':
|
|
155
|
+
return `WebFetch url="${input.url || ''}"`;
|
|
156
|
+
case 'Task':
|
|
157
|
+
return `Task prompt="${clean(input.prompt || input.description || '', 200)}"${input.agentId ? ` agentId=${input.agentId}` : ''}`;
|
|
158
|
+
case 'TodoWrite':
|
|
159
|
+
return `TodoWrite ${clean(JSON.stringify(input.todos || []), 200)}`;
|
|
160
|
+
default:
|
|
161
|
+
return `${name} ${clean(JSON.stringify(input), 300)}`;
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
/** Clean text: remove newlines, cap length */
|
|
165
|
+
function clean(text, maxLen) {
|
|
166
|
+
return text.replace(/[\n\r\t]+/g, ' ').replace(/\s+/g, ' ').trim().substring(0, maxLen);
|
|
167
|
+
}
|
|
168
|
+
/** Format an IndexEntry as a pipe-delimited line */
|
|
169
|
+
function formatLine(entry) {
|
|
170
|
+
return `${entry.lineNum}|${entry.byteOffset}|${entry.timestamp}|${entry.source}|${entry.msgType}|${entry.summary}`;
|
|
171
|
+
}
|
|
172
|
+
// ============================================================
|
|
173
|
+
// BUILD INDEX (cold start / catch-up)
|
|
174
|
+
// ============================================================
|
|
175
|
+
/**
|
|
176
|
+
* Build or resume a summary index for a session.
|
|
177
|
+
* Reads main JSONL + all sub-agent JSONLs, extracts summaries.
|
|
178
|
+
* If an existing index with metadata exists, resumes from last byte offset.
|
|
179
|
+
*/
|
|
180
|
+
export function buildSummaryIndex(sessionId, workingDir, sessionBaseDir, onProgress) {
|
|
181
|
+
// getSessionPaths and getSessionSubAgents imported at top level
|
|
182
|
+
const paths = getSessionPaths(sessionId, workingDir);
|
|
183
|
+
if (!paths.exists) {
|
|
184
|
+
onProgress?.('No session files found');
|
|
185
|
+
return emptyState(sessionId, sessionBaseDir, paths.conversation);
|
|
186
|
+
}
|
|
187
|
+
const workspace = join(sessionBaseDir, '.osborn', 'sessions', sessionId, '.index');
|
|
188
|
+
mkdirSync(workspace, { recursive: true });
|
|
189
|
+
const indexPath = join(workspace, 'search-index.txt');
|
|
190
|
+
const metaPath = join(workspace, 'search-index-meta.json');
|
|
191
|
+
// Check for existing metadata (resume from last offset)
|
|
192
|
+
let state = loadOrCreateState(indexPath, metaPath, paths.conversation);
|
|
193
|
+
const startMs = Date.now();
|
|
194
|
+
// ── Index main JSONL ──
|
|
195
|
+
const mainEntries = indexFile(paths.conversation, 'main', state.main.byteOffset);
|
|
196
|
+
if (mainEntries.lines.length > 0) {
|
|
197
|
+
appendFileSync(indexPath, mainEntries.lines.join('\n') + '\n');
|
|
198
|
+
state.main.byteOffset = mainEntries.newByteOffset;
|
|
199
|
+
state.main.lineCount += mainEntries.linesProcessed;
|
|
200
|
+
state.main.indexLineCount += mainEntries.lines.length;
|
|
201
|
+
// Collect tool_use_id → name mappings
|
|
202
|
+
for (const [id, name] of mainEntries.toolMap) {
|
|
203
|
+
state.toolUseIdMap.set(id, name);
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
onProgress?.(`Main JSONL: ${mainEntries.lines.length} entries in ${Date.now() - startMs}ms`);
|
|
207
|
+
// ── Discover and index sub-agents (both paths: subagents/ dir + project-level agent-*.jsonl) ──
|
|
208
|
+
const subAgentMs = Date.now();
|
|
209
|
+
let subAgentEntries = 0;
|
|
210
|
+
// Path 1: Session subdirectory subagents/ (discovered by getSessionPaths)
|
|
211
|
+
for (const agentFile of paths.subagents) {
|
|
212
|
+
const fileName = basename(agentFile, '.jsonl');
|
|
213
|
+
const agentKey = fileName.replace('agent-', '').substring(0, 12);
|
|
214
|
+
const sourceTag = `agent-${agentKey.substring(0, 8)}`;
|
|
215
|
+
const existing = state.subAgents.get(agentKey);
|
|
216
|
+
const offset = existing?.byteOffset || 0;
|
|
217
|
+
const result = indexFile(agentFile, sourceTag, offset);
|
|
218
|
+
if (result.lines.length > 0) {
|
|
219
|
+
appendFileSync(indexPath, result.lines.join('\n') + '\n');
|
|
220
|
+
subAgentEntries += result.lines.length;
|
|
221
|
+
state.subAgents.set(agentKey, {
|
|
222
|
+
jsonlPath: agentFile,
|
|
223
|
+
byteOffset: result.newByteOffset,
|
|
224
|
+
lineCount: (existing?.lineCount || 0) + result.linesProcessed,
|
|
225
|
+
indexLineCount: (existing?.indexLineCount || 0) + result.lines.length,
|
|
226
|
+
});
|
|
227
|
+
}
|
|
228
|
+
}
|
|
229
|
+
// Path 2: Project-level agent-*.jsonl (discovered by getSessionSubAgents)
|
|
230
|
+
const agents = getSessionSubAgents(sessionId, workingDir);
|
|
231
|
+
for (const agent of agents) {
|
|
232
|
+
if (!agent.agentFileExists)
|
|
233
|
+
continue;
|
|
234
|
+
const agentKey = agent.agentId.substring(0, 12);
|
|
235
|
+
if (state.subAgents.has(agentKey))
|
|
236
|
+
continue; // already indexed from Path 1
|
|
237
|
+
const sourceTag = `agent-${agent.agentId.substring(0, 8)}`;
|
|
238
|
+
const existing = state.subAgents.get(agentKey);
|
|
239
|
+
const offset = existing?.byteOffset || 0;
|
|
240
|
+
const result = indexFile(agent.agentFile, sourceTag, offset);
|
|
241
|
+
if (result.lines.length > 0) {
|
|
242
|
+
appendFileSync(indexPath, result.lines.join('\n') + '\n');
|
|
243
|
+
subAgentEntries += result.lines.length;
|
|
244
|
+
state.subAgents.set(agentKey, {
|
|
245
|
+
jsonlPath: agent.agentFile,
|
|
246
|
+
byteOffset: result.newByteOffset,
|
|
247
|
+
lineCount: (existing?.lineCount || 0) + result.linesProcessed,
|
|
248
|
+
indexLineCount: (existing?.indexLineCount || 0) + result.lines.length,
|
|
249
|
+
});
|
|
250
|
+
}
|
|
251
|
+
}
|
|
252
|
+
onProgress?.(`Sub-agents: ${agents.length} found, ${subAgentEntries} entries in ${Date.now() - subAgentMs}ms`);
|
|
253
|
+
// Trim toolUseIdMap to last 100
|
|
254
|
+
if (state.toolUseIdMap.size > 100) {
|
|
255
|
+
const entries = [...state.toolUseIdMap.entries()];
|
|
256
|
+
state.toolUseIdMap = new Map(entries.slice(-100));
|
|
257
|
+
}
|
|
258
|
+
// Save metadata
|
|
259
|
+
saveMeta(state, sessionId, metaPath);
|
|
260
|
+
const totalMs = Date.now() - startMs;
|
|
261
|
+
const indexSize = existsSync(indexPath) ? statSync(indexPath).size : 0;
|
|
262
|
+
onProgress?.(`Index complete: ${state.main.indexLineCount + subAgentEntries} total entries, ${(indexSize / 1024).toFixed(0)}KB, ${totalMs}ms`);
|
|
263
|
+
return state;
|
|
264
|
+
}
|
|
265
|
+
/** Index a single JSONL file from a byte offset. Returns formatted lines + new offset. */
|
|
266
|
+
function indexFile(filePath, source, fromByteOffset) {
|
|
267
|
+
if (!existsSync(filePath)) {
|
|
268
|
+
return { lines: [], linesProcessed: 0, newByteOffset: fromByteOffset, toolMap: new Map() };
|
|
269
|
+
}
|
|
270
|
+
const fileSize = statSync(filePath).size;
|
|
271
|
+
if (fromByteOffset >= fileSize) {
|
|
272
|
+
return { lines: [], linesProcessed: 0, newByteOffset: fromByteOffset, toolMap: new Map() };
|
|
273
|
+
}
|
|
274
|
+
// Read from offset
|
|
275
|
+
const buf = Buffer.alloc(fileSize - fromByteOffset);
|
|
276
|
+
const fd = openSync(filePath, 'r');
|
|
277
|
+
readSync(fd, buf, 0, buf.length, fromByteOffset);
|
|
278
|
+
closeSync(fd);
|
|
279
|
+
const text = buf.toString('utf-8');
|
|
280
|
+
const rawLines = text.split('\n');
|
|
281
|
+
const outputLines = [];
|
|
282
|
+
const toolMap = new Map();
|
|
283
|
+
let lineNum = fromByteOffset === 0 ? 1 : countLines(filePath, fromByteOffset) + 1;
|
|
284
|
+
let linesProcessed = 0;
|
|
285
|
+
let currentByteOffset = fromByteOffset; // track byte position — zero overhead
|
|
286
|
+
for (const rawLine of rawLines) {
|
|
287
|
+
const lineByteOffset = currentByteOffset;
|
|
288
|
+
currentByteOffset += Buffer.byteLength(rawLine, 'utf-8') + 1; // +1 for \n
|
|
289
|
+
if (!rawLine.trim()) {
|
|
290
|
+
lineNum++;
|
|
291
|
+
continue;
|
|
292
|
+
}
|
|
293
|
+
linesProcessed++;
|
|
294
|
+
try {
|
|
295
|
+
const obj = JSON.parse(rawLine);
|
|
296
|
+
// Track tool_use_id → tool_name for resolving tool_result entries
|
|
297
|
+
if (obj.type === 'assistant' && Array.isArray(obj.message?.content)) {
|
|
298
|
+
for (const block of obj.message.content) {
|
|
299
|
+
if (block?.type === 'tool_use' && block.id && block.name) {
|
|
300
|
+
toolMap.set(block.id, block.name);
|
|
301
|
+
}
|
|
302
|
+
}
|
|
303
|
+
}
|
|
304
|
+
// Extract all summaries — byteOffset enables instant targeted reads later
|
|
305
|
+
const entries = extractAllSummaries(obj, lineNum, lineByteOffset, source);
|
|
306
|
+
for (const entry of entries) {
|
|
307
|
+
outputLines.push(formatLine(entry));
|
|
308
|
+
}
|
|
309
|
+
}
|
|
310
|
+
catch {
|
|
311
|
+
// Skip unparseable lines
|
|
312
|
+
}
|
|
313
|
+
lineNum++;
|
|
314
|
+
}
|
|
315
|
+
return {
|
|
316
|
+
lines: outputLines,
|
|
317
|
+
linesProcessed,
|
|
318
|
+
newByteOffset: fileSize,
|
|
319
|
+
toolMap,
|
|
320
|
+
};
|
|
321
|
+
}
|
|
322
|
+
/** Count lines in a file up to a byte offset (for line number tracking on resume) */
|
|
323
|
+
function countLines(filePath, upToBytes) {
|
|
324
|
+
const buf = Buffer.alloc(Math.min(upToBytes, 1024 * 1024)); // Read max 1MB for line counting
|
|
325
|
+
const fd = openSync(filePath, 'r');
|
|
326
|
+
const bytesRead = readSync(fd, buf, 0, buf.length, Math.max(0, upToBytes - buf.length));
|
|
327
|
+
closeSync(fd);
|
|
328
|
+
let count = 0;
|
|
329
|
+
for (let i = 0; i < bytesRead; i++) {
|
|
330
|
+
if (buf[i] === 10)
|
|
331
|
+
count++; // newline
|
|
332
|
+
}
|
|
333
|
+
return count;
|
|
334
|
+
}
|
|
335
|
+
// ============================================================
|
|
336
|
+
// INCREMENTAL WATCHER
|
|
337
|
+
// ============================================================
|
|
338
|
+
/**
|
|
339
|
+
* Poll-based incremental index updater (10s interval).
|
|
340
|
+
* Checks main JSONL + sub-agents for new content, indexes in one batch.
|
|
341
|
+
* No fs.watch — avoids race conditions with concurrent writers.
|
|
342
|
+
*/
|
|
343
|
+
export function startIndexWatcher(sessionId, workingDir, sessionBaseDir, state) {
|
|
344
|
+
let stopped = false;
|
|
345
|
+
const pollInterval = setInterval(() => {
|
|
346
|
+
if (stopped)
|
|
347
|
+
return;
|
|
348
|
+
try {
|
|
349
|
+
let newEntries = 0;
|
|
350
|
+
// 1. Check main JSONL for new content
|
|
351
|
+
const mainResult = indexFile(state.main.jsonlPath, 'main', state.main.byteOffset);
|
|
352
|
+
if (mainResult.lines.length > 0) {
|
|
353
|
+
appendFileSync(state.indexPath, mainResult.lines.join('\n') + '\n');
|
|
354
|
+
state.main.byteOffset = mainResult.newByteOffset;
|
|
355
|
+
state.main.lineCount += mainResult.linesProcessed;
|
|
356
|
+
state.main.indexLineCount += mainResult.lines.length;
|
|
357
|
+
newEntries += mainResult.lines.length;
|
|
358
|
+
for (const [id, name] of mainResult.toolMap) {
|
|
359
|
+
state.toolUseIdMap.set(id, name);
|
|
360
|
+
}
|
|
361
|
+
}
|
|
362
|
+
// 2. Check for new sub-agents + update existing ones
|
|
363
|
+
const paths = getSessionPaths(sessionId, workingDir);
|
|
364
|
+
const allSubFiles = [...paths.subagents];
|
|
365
|
+
const agents = getSessionSubAgents(sessionId, workingDir);
|
|
366
|
+
for (const a of agents) {
|
|
367
|
+
if (a.agentFileExists && !allSubFiles.includes(a.agentFile)) {
|
|
368
|
+
allSubFiles.push(a.agentFile);
|
|
369
|
+
}
|
|
370
|
+
}
|
|
371
|
+
for (const agentFile of allSubFiles) {
|
|
372
|
+
if (!existsSync(agentFile))
|
|
373
|
+
continue;
|
|
374
|
+
const fileName = basename(agentFile, '.jsonl');
|
|
375
|
+
const agentKey = fileName.replace('agent-', '').substring(0, 12);
|
|
376
|
+
const sourceTag = `agent-${agentKey.substring(0, 8)}`;
|
|
377
|
+
const existing = state.subAgents.get(agentKey);
|
|
378
|
+
const offset = existing?.byteOffset || 0;
|
|
379
|
+
const result = indexFile(agentFile, sourceTag, offset);
|
|
380
|
+
if (result.lines.length > 0) {
|
|
381
|
+
appendFileSync(state.indexPath, result.lines.join('\n') + '\n');
|
|
382
|
+
newEntries += result.lines.length;
|
|
383
|
+
state.subAgents.set(agentKey, {
|
|
384
|
+
jsonlPath: agentFile,
|
|
385
|
+
byteOffset: result.newByteOffset,
|
|
386
|
+
lineCount: (existing?.lineCount || 0) + result.linesProcessed,
|
|
387
|
+
indexLineCount: (existing?.indexLineCount || 0) + result.lines.length,
|
|
388
|
+
});
|
|
389
|
+
}
|
|
390
|
+
}
|
|
391
|
+
if (newEntries > 0) {
|
|
392
|
+
console.log(`🔍 [index] +${newEntries} entries`);
|
|
393
|
+
saveMeta(state, sessionId, state.metaPath);
|
|
394
|
+
}
|
|
395
|
+
}
|
|
396
|
+
catch (err) {
|
|
397
|
+
console.error('🔍 [index] Poll error:', err?.message);
|
|
398
|
+
}
|
|
399
|
+
}, 10_000);
|
|
400
|
+
return {
|
|
401
|
+
stop() {
|
|
402
|
+
stopped = true;
|
|
403
|
+
clearInterval(pollInterval);
|
|
404
|
+
saveMeta(state, sessionId, state.metaPath);
|
|
405
|
+
},
|
|
406
|
+
state,
|
|
407
|
+
};
|
|
408
|
+
}
|
|
409
|
+
// ============================================================
|
|
410
|
+
// STATE MANAGEMENT
|
|
411
|
+
// ============================================================
|
|
412
|
+
function emptyState(sessionId, sessionBaseDir, mainJsonlPath) {
|
|
413
|
+
const workspace = join(sessionBaseDir, '.osborn', 'sessions', sessionId, '.index');
|
|
414
|
+
return {
|
|
415
|
+
indexPath: join(workspace, 'search-index.txt'),
|
|
416
|
+
metaPath: join(workspace, 'search-index-meta.json'),
|
|
417
|
+
main: { jsonlPath: mainJsonlPath, byteOffset: 0, lineCount: 0, indexLineCount: 0 },
|
|
418
|
+
subAgents: new Map(),
|
|
419
|
+
toolUseIdMap: new Map(),
|
|
420
|
+
};
|
|
421
|
+
}
|
|
422
|
+
function loadOrCreateState(indexPath, metaPath, mainJsonlPath) {
|
|
423
|
+
if (existsSync(metaPath)) {
|
|
424
|
+
try {
|
|
425
|
+
const meta = JSON.parse(readFileSync(metaPath, 'utf-8'));
|
|
426
|
+
// Validate main file hasn't shrunk (file corruption/replacement)
|
|
427
|
+
if (existsSync(meta.main.jsonlPath)) {
|
|
428
|
+
const currentSize = statSync(meta.main.jsonlPath).size;
|
|
429
|
+
if (meta.main.byteOffset <= currentSize) {
|
|
430
|
+
return {
|
|
431
|
+
indexPath,
|
|
432
|
+
metaPath,
|
|
433
|
+
main: meta.main,
|
|
434
|
+
subAgents: new Map(Object.entries(meta.subAgents || {})),
|
|
435
|
+
toolUseIdMap: new Map(Object.entries(meta.toolUseIdMap || {})),
|
|
436
|
+
};
|
|
437
|
+
}
|
|
438
|
+
}
|
|
439
|
+
}
|
|
440
|
+
catch { }
|
|
441
|
+
}
|
|
442
|
+
// Fresh state — will build from scratch
|
|
443
|
+
// Ensure index file starts empty
|
|
444
|
+
writeFileSync(indexPath, '');
|
|
445
|
+
return {
|
|
446
|
+
indexPath,
|
|
447
|
+
metaPath,
|
|
448
|
+
main: { jsonlPath: mainJsonlPath, byteOffset: 0, lineCount: 0, indexLineCount: 0 },
|
|
449
|
+
subAgents: new Map(),
|
|
450
|
+
toolUseIdMap: new Map(),
|
|
451
|
+
};
|
|
452
|
+
}
|
|
453
|
+
function saveMeta(state, sessionId, metaPath) {
|
|
454
|
+
const meta = {
|
|
455
|
+
version: 1,
|
|
456
|
+
sessionId,
|
|
457
|
+
createdAt: new Date().toISOString(),
|
|
458
|
+
updatedAt: new Date().toISOString(),
|
|
459
|
+
main: state.main,
|
|
460
|
+
subAgents: Object.fromEntries(state.subAgents),
|
|
461
|
+
toolUseIdMap: Object.fromEntries([...state.toolUseIdMap.entries()].slice(-100)),
|
|
462
|
+
};
|
|
463
|
+
try {
|
|
464
|
+
writeFileSync(metaPath, JSON.stringify(meta, null, 2));
|
|
465
|
+
}
|
|
466
|
+
catch { }
|
|
467
|
+
}
|
|
468
|
+
// ============================================================
|
|
469
|
+
// PUBLIC: Check if index exists for a session
|
|
470
|
+
// ============================================================
|
|
471
|
+
export function getIndexPath(sessionId, sessionBaseDir) {
|
|
472
|
+
const indexPath = join(sessionBaseDir, '.osborn', 'sessions', sessionId, '.index', 'search-index.txt');
|
|
473
|
+
return existsSync(indexPath) && statSync(indexPath).size > 0 ? indexPath : null;
|
|
474
|
+
}
|
|
475
|
+
// ============================================================
|
|
476
|
+
// PUBLIC: Read full clean text from raw JSONL by line numbers
|
|
477
|
+
// ============================================================
|
|
478
|
+
/**
|
|
479
|
+
* Given index search results (with byte offsets + source tags),
|
|
480
|
+
* read FULL content from raw JSONL via targeted reads — 0.5ms per result.
|
|
481
|
+
* No readFileSync of the whole file. Strips JSON noise, returns clean text.
|
|
482
|
+
*/
|
|
483
|
+
export function readFullContent(results, sessionId, workingDir, sessionBaseDir, maxCharsPerResult = 2000) {
|
|
484
|
+
const paths = getSessionPaths(sessionId, workingDir);
|
|
485
|
+
const output = [];
|
|
486
|
+
// Group results by source file
|
|
487
|
+
const bySource = new Map();
|
|
488
|
+
for (const r of results) {
|
|
489
|
+
if (!bySource.has(r.source))
|
|
490
|
+
bySource.set(r.source, []);
|
|
491
|
+
bySource.get(r.source).push({ lineNum: r.lineNum, byteOffset: r.byteOffset });
|
|
492
|
+
}
|
|
493
|
+
for (const [source, refs] of bySource) {
|
|
494
|
+
// Resolve source to file path
|
|
495
|
+
let filePath;
|
|
496
|
+
if (source === 'main') {
|
|
497
|
+
filePath = paths.conversation;
|
|
498
|
+
}
|
|
499
|
+
else {
|
|
500
|
+
const agentPrefix = source.replace('agent-', '');
|
|
501
|
+
const subFile = paths.subagents.find(f => basename(f).includes(agentPrefix))
|
|
502
|
+
|| join(dirname(paths.conversation), `agent-${agentPrefix}.jsonl`);
|
|
503
|
+
filePath = subFile;
|
|
504
|
+
}
|
|
505
|
+
if (!existsSync(filePath))
|
|
506
|
+
continue;
|
|
507
|
+
const fileSize = statSync(filePath).size;
|
|
508
|
+
// Targeted read per result — ~0.5ms each instead of 459ms for whole file
|
|
509
|
+
const fd = openSync(filePath, 'r');
|
|
510
|
+
try {
|
|
511
|
+
for (const ref of refs) {
|
|
512
|
+
if (ref.byteOffset >= fileSize)
|
|
513
|
+
continue;
|
|
514
|
+
// Read up to 100KB from the byte offset (enough for any single JSONL line)
|
|
515
|
+
const readLen = Math.min(100 * 1024, fileSize - ref.byteOffset);
|
|
516
|
+
const buf = Buffer.alloc(readLen);
|
|
517
|
+
readSync(fd, buf, 0, readLen, ref.byteOffset);
|
|
518
|
+
// Extract just the first line (one JSONL entry)
|
|
519
|
+
const text = buf.toString('utf-8');
|
|
520
|
+
const newlineIdx = text.indexOf('\n');
|
|
521
|
+
const jsonLine = newlineIdx >= 0 ? text.substring(0, newlineIdx) : text;
|
|
522
|
+
try {
|
|
523
|
+
const obj = JSON.parse(jsonLine);
|
|
524
|
+
const cleanText = extractCleanText(obj, maxCharsPerResult);
|
|
525
|
+
if (cleanText) {
|
|
526
|
+
output.push(`[${source} L${ref.lineNum}] ${cleanText}`);
|
|
527
|
+
}
|
|
528
|
+
}
|
|
529
|
+
catch { }
|
|
530
|
+
}
|
|
531
|
+
}
|
|
532
|
+
finally {
|
|
533
|
+
closeSync(fd);
|
|
534
|
+
}
|
|
535
|
+
}
|
|
536
|
+
return output;
|
|
537
|
+
}
|
|
538
|
+
/** Extract clean human-readable text from a parsed JSONL object — no JSON noise */
|
|
539
|
+
function extractCleanText(obj, maxChars) {
|
|
540
|
+
const parts = [];
|
|
541
|
+
if (obj.type === 'user' && Array.isArray(obj.message?.content)) {
|
|
542
|
+
for (const block of obj.message.content) {
|
|
543
|
+
if (block?.type === 'text' && block.text) {
|
|
544
|
+
parts.push(block.text);
|
|
545
|
+
}
|
|
546
|
+
if (block?.type === 'tool_result') {
|
|
547
|
+
const content = typeof block.content === 'string'
|
|
548
|
+
? block.content
|
|
549
|
+
: Array.isArray(block.content)
|
|
550
|
+
? block.content.filter((b) => b.type === 'text').map((b) => b.text).join('\n')
|
|
551
|
+
: '';
|
|
552
|
+
if (content)
|
|
553
|
+
parts.push(content);
|
|
554
|
+
}
|
|
555
|
+
}
|
|
556
|
+
}
|
|
557
|
+
if (obj.type === 'assistant' && Array.isArray(obj.message?.content)) {
|
|
558
|
+
for (const block of obj.message.content) {
|
|
559
|
+
if (block?.type === 'text' && block.text)
|
|
560
|
+
parts.push(block.text);
|
|
561
|
+
if (block?.type === 'thinking' && block.thinking)
|
|
562
|
+
parts.push(`[thinking] ${block.thinking}`);
|
|
563
|
+
if (block?.type === 'tool_use') {
|
|
564
|
+
parts.push(summarizeTool(block.name, block.input));
|
|
565
|
+
}
|
|
566
|
+
}
|
|
567
|
+
}
|
|
568
|
+
const joined = parts.join('\n').trim();
|
|
569
|
+
return joined.substring(0, maxChars);
|
|
570
|
+
}
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* turn-detector-shim.ts — LiveKit Cloud turn detection without JobContext
|
|
3
|
+
*
|
|
4
|
+
* The official MultilingualModel crashes without LiveKit's worker framework
|
|
5
|
+
* because it calls getJobContext(). This shim implements the same _TurnDetector
|
|
6
|
+
* interface but makes the remote HTTP call directly — no worker framework needed.
|
|
7
|
+
*
|
|
8
|
+
* On LiveKit Cloud (LIVEKIT_REMOTE_EOT_URL set): HTTP call to inference gateway.
|
|
9
|
+
* Without the URL: Returns 1.0 (always end of turn — STT endpointing handles it).
|
|
10
|
+
*/
|
|
11
|
+
import type { llm } from '@livekit/agents';
|
|
12
|
+
/**
|
|
13
|
+
* Implements _TurnDetector interface for LiveKit Cloud remote inference
|
|
14
|
+
* without requiring JobContext / worker framework.
|
|
15
|
+
*/
|
|
16
|
+
export declare class CloudTurnDetector {
|
|
17
|
+
#private;
|
|
18
|
+
readonly model = "lk_end_of_utterance_multilingual";
|
|
19
|
+
readonly provider = "livekit";
|
|
20
|
+
constructor();
|
|
21
|
+
unlikelyThreshold(_language?: string): Promise<number | undefined>;
|
|
22
|
+
supportsLanguage(_language?: string): Promise<boolean>;
|
|
23
|
+
predictEndOfTurn(chatCtx: llm.ChatContext, _timeout?: number): Promise<number>;
|
|
24
|
+
}
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* turn-detector-shim.ts — LiveKit Cloud turn detection without JobContext
|
|
3
|
+
*
|
|
4
|
+
* The official MultilingualModel crashes without LiveKit's worker framework
|
|
5
|
+
* because it calls getJobContext(). This shim implements the same _TurnDetector
|
|
6
|
+
* interface but makes the remote HTTP call directly — no worker framework needed.
|
|
7
|
+
*
|
|
8
|
+
* On LiveKit Cloud (LIVEKIT_REMOTE_EOT_URL set): HTTP call to inference gateway.
|
|
9
|
+
* Without the URL: Returns 1.0 (always end of turn — STT endpointing handles it).
|
|
10
|
+
*/
|
|
11
|
+
import { log } from '@livekit/agents';
|
|
12
|
+
const REMOTE_INFERENCE_TIMEOUT = 2000;
|
|
13
|
+
const MAX_HISTORY_TURNS = 15;
|
|
14
|
+
/**
|
|
15
|
+
* Implements _TurnDetector interface for LiveKit Cloud remote inference
|
|
16
|
+
* without requiring JobContext / worker framework.
|
|
17
|
+
*/
|
|
18
|
+
export class CloudTurnDetector {
|
|
19
|
+
#remoteUrl;
|
|
20
|
+
#logger = log();
|
|
21
|
+
model = 'lk_end_of_utterance_multilingual';
|
|
22
|
+
provider = 'livekit';
|
|
23
|
+
constructor() {
|
|
24
|
+
this.#remoteUrl = process.env.LIVEKIT_REMOTE_EOT_URL;
|
|
25
|
+
if (this.#remoteUrl) {
|
|
26
|
+
console.log(`🧠 Turn detector: LiveKit Cloud remote inference`);
|
|
27
|
+
}
|
|
28
|
+
else {
|
|
29
|
+
console.log('🧠 Turn detector: No LIVEKIT_REMOTE_EOT_URL — STT endpointing fallback');
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
async unlikelyThreshold(_language) {
|
|
33
|
+
return undefined; // Let the framework use defaults
|
|
34
|
+
}
|
|
35
|
+
async supportsLanguage(_language) {
|
|
36
|
+
return true; // Multilingual model supports all languages
|
|
37
|
+
}
|
|
38
|
+
async predictEndOfTurn(chatCtx, _timeout) {
|
|
39
|
+
if (!this.#remoteUrl) {
|
|
40
|
+
return 1.0; // No remote URL = always end of turn (STT handles it)
|
|
41
|
+
}
|
|
42
|
+
try {
|
|
43
|
+
const messages = chatCtx
|
|
44
|
+
.copy({
|
|
45
|
+
excludeFunctionCall: true,
|
|
46
|
+
excludeInstructions: true,
|
|
47
|
+
excludeEmptyMessage: true,
|
|
48
|
+
})
|
|
49
|
+
.truncate(MAX_HISTORY_TURNS);
|
|
50
|
+
const request = {
|
|
51
|
+
...messages.toJSON({
|
|
52
|
+
excludeImage: true,
|
|
53
|
+
excludeAudio: true,
|
|
54
|
+
excludeTimestamp: true,
|
|
55
|
+
}),
|
|
56
|
+
// Dummy IDs — LiveKit Cloud uses these for routing/logging, not auth
|
|
57
|
+
jobId: `osborn-${Date.now()}`,
|
|
58
|
+
workerId: 'osborn-direct',
|
|
59
|
+
};
|
|
60
|
+
const agentId = process.env.LIVEKIT_AGENT_ID;
|
|
61
|
+
if (agentId) {
|
|
62
|
+
request.agentId = agentId;
|
|
63
|
+
}
|
|
64
|
+
const resp = await fetch(`${this.#remoteUrl}/eot/multi`, {
|
|
65
|
+
method: 'POST',
|
|
66
|
+
body: JSON.stringify(request),
|
|
67
|
+
headers: { 'Content-Type': 'application/json' },
|
|
68
|
+
signal: AbortSignal.timeout(REMOTE_INFERENCE_TIMEOUT),
|
|
69
|
+
});
|
|
70
|
+
if (!resp.ok) {
|
|
71
|
+
return 1.0; // Failed — default to end of turn
|
|
72
|
+
}
|
|
73
|
+
const data = (await resp.json());
|
|
74
|
+
if (typeof data.probability === 'number' && data.probability >= 0) {
|
|
75
|
+
return data.probability;
|
|
76
|
+
}
|
|
77
|
+
return 1.0;
|
|
78
|
+
}
|
|
79
|
+
catch {
|
|
80
|
+
return 1.0; // Timeout/error — default to end of turn
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
}
|