osborn 0.5.2 → 0.5.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. package/.claude/settings.local.json +9 -0
  2. package/.claude/skills/markdown-to-pdf/SKILL.md +29 -0
  3. package/.claude/skills/pdf-to-markdown/SKILL.md +28 -0
  4. package/.claude/skills/playwright-browser/SKILL.md +75 -0
  5. package/.claude/skills/youtube-transcript/SKILL.md +24 -0
  6. package/dist/claude-llm.d.ts +29 -1
  7. package/dist/claude-llm.js +346 -79
  8. package/dist/config.d.ts +6 -2
  9. package/dist/config.js +6 -1
  10. package/dist/fast-brain.d.ts +124 -12
  11. package/dist/fast-brain.js +1361 -96
  12. package/dist/index-3-2-26-legacy.d.ts +1 -0
  13. package/dist/index-3-2-26-legacy.js +2233 -0
  14. package/dist/index.js +889 -394
  15. package/dist/jsonl-search.d.ts +66 -0
  16. package/dist/jsonl-search.js +274 -0
  17. package/dist/leagcyprompts2.d.ts +0 -0
  18. package/dist/leagcyprompts2.js +573 -0
  19. package/dist/pipeline-direct-llm.d.ts +77 -0
  20. package/dist/pipeline-direct-llm.js +216 -0
  21. package/dist/pipeline-fastbrain.d.ts +45 -0
  22. package/dist/pipeline-fastbrain.js +367 -0
  23. package/dist/prompts-2-25-26.d.ts +0 -0
  24. package/dist/prompts-2-25-26.js +518 -0
  25. package/dist/prompts-3-2-26.d.ts +78 -0
  26. package/dist/prompts-3-2-26.js +1319 -0
  27. package/dist/prompts.d.ts +83 -8
  28. package/dist/prompts.js +1990 -374
  29. package/dist/session-access.d.ts +60 -2
  30. package/dist/session-access.js +172 -2
  31. package/dist/summary-index.d.ts +87 -0
  32. package/dist/summary-index.js +570 -0
  33. package/dist/turn-detector-shim.d.ts +24 -0
  34. package/dist/turn-detector-shim.js +83 -0
  35. package/dist/voice-io.d.ts +9 -3
  36. package/dist/voice-io.js +39 -20
  37. package/package.json +18 -11
@@ -0,0 +1,570 @@
1
+ /**
2
+ * summary-index.ts — Builds a compact searchable summary of Claude JSONL sessions
3
+ *
4
+ * Instead of ripgrepping 80MB raw JSONL, we extract one-line summaries per message
5
+ * into a ~1MB plain text file. Ripgrep searches this in <5ms.
6
+ *
7
+ * Format: {lineNum}|{timestamp}|{source}|{msgType}|{summary}
8
+ *
9
+ * No LLM calls — pure heuristic extraction:
10
+ * tool_use → tool name + key params (file path, command, query)
11
+ * tool_result → tool name + first 80 chars of output
12
+ * user → raw text (already short from voice)
13
+ * assistant → first 500 chars of text
14
+ *
15
+ * Per-session index stored at: .osborn/sessions/{sessionId}/.index/search-index.txt
16
+ */
17
+ import { readFileSync, writeFileSync, appendFileSync, existsSync, statSync, openSync, readSync, closeSync, mkdirSync } from 'fs';
18
+ import { join, basename, dirname } from 'path';
19
+ import { getSessionPaths, getSessionSubAgents } from './session-access.js';
20
+ // ============================================================
21
+ // SUMMARY EXTRACTION (pure heuristic — no LLM)
22
+ // ============================================================
23
+ function extractSummary(raw, lineNum, byteOffset, source) {
24
+ try {
25
+ // Skip non-indexable types
26
+ if (raw.isMeta)
27
+ return null;
28
+ const type = raw.type;
29
+ if (!type)
30
+ return null;
31
+ if (type === 'queue-operation' || type === 'file-history-snapshot' || type === 'system')
32
+ return null;
33
+ const ts = raw.timestamp ? new Date(raw.timestamp).toISOString().substring(0, 19) : '';
34
+ // ── user message (regular text) ──
35
+ if (type === 'user') {
36
+ const content = raw.message?.content;
37
+ if (!Array.isArray(content))
38
+ return null;
39
+ // tool_result (user-type wrapper)
40
+ if (content[0]?.type === 'tool_result') {
41
+ const tr = content[0];
42
+ const resultText = typeof tr.content === 'string'
43
+ ? tr.content
44
+ : Array.isArray(tr.content)
45
+ ? tr.content.filter((b) => b.type === 'text').map((b) => b.text).join(' ')
46
+ : '';
47
+ // Resolve tool name from toolUseResult if available
48
+ const toolName = raw.toolUseResult?.name || '';
49
+ const summary = toolName
50
+ ? `${toolName}: ${clean(resultText, 400)}`
51
+ : `tool_result: ${clean(resultText, 400)}`;
52
+ return { lineNum, byteOffset, timestamp: ts, source, msgType: 'tool_result', summary };
53
+ }
54
+ // Regular user text
55
+ const texts = [];
56
+ for (const block of content) {
57
+ if (block?.type === 'text' && block.text)
58
+ texts.push(block.text);
59
+ }
60
+ if (texts.length === 0)
61
+ return null;
62
+ return { lineNum, byteOffset, timestamp: ts, source, msgType: 'user', summary: clean(texts.join(' '), 500) };
63
+ }
64
+ // ── assistant message ──
65
+ if (type === 'assistant') {
66
+ const content = raw.message?.content;
67
+ if (!Array.isArray(content))
68
+ return null;
69
+ const entries = [];
70
+ for (const block of content) {
71
+ if (block?.type === 'text' && block.text?.trim()) {
72
+ entries.push({ lineNum, byteOffset, timestamp: ts, source, msgType: 'assistant', summary: clean(block.text, 500) });
73
+ }
74
+ if (block?.type === 'thinking' && block.thinking?.trim()) {
75
+ entries.push({ lineNum, byteOffset, timestamp: ts, source, msgType: 'thinking', summary: clean(block.thinking, 500) });
76
+ }
77
+ if (block?.type === 'tool_use') {
78
+ entries.push({ lineNum, byteOffset, timestamp: ts, source, msgType: 'tool_use', summary: summarizeTool(block.name, block.input) });
79
+ }
80
+ }
81
+ // Return first entry (caller handles multi-entry via extractAllSummaries)
82
+ return entries[0] || null;
83
+ }
84
+ // ── progress (skip) ──
85
+ if (type === 'progress')
86
+ return null;
87
+ return null;
88
+ }
89
+ catch {
90
+ return null;
91
+ }
92
+ }
93
+ /**
94
+ * Extract ALL entries from a single JSONL line (assistant messages can have
95
+ * text + thinking + multiple tool_use blocks).
96
+ */
97
+ function extractAllSummaries(raw, lineNum, byteOffset, source) {
98
+ try {
99
+ if (raw.isMeta)
100
+ return [];
101
+ const type = raw.type;
102
+ if (!type || type === 'queue-operation' || type === 'file-history-snapshot' || type === 'system' || type === 'progress')
103
+ return [];
104
+ const ts = raw.timestamp ? new Date(raw.timestamp).toISOString().substring(0, 19) : '';
105
+ // user message
106
+ if (type === 'user') {
107
+ const single = extractSummary(raw, lineNum, byteOffset, source);
108
+ return single ? [single] : [];
109
+ }
110
+ // assistant message — can have multiple blocks
111
+ if (type === 'assistant') {
112
+ const content = raw.message?.content;
113
+ if (!Array.isArray(content))
114
+ return [];
115
+ const entries = [];
116
+ for (const block of content) {
117
+ if (block?.type === 'text' && block.text?.trim()) {
118
+ entries.push({ lineNum, byteOffset, timestamp: ts, source, msgType: 'assistant', summary: clean(block.text, 500) });
119
+ }
120
+ if (block?.type === 'thinking' && block.thinking?.trim()) {
121
+ entries.push({ lineNum, byteOffset, timestamp: ts, source, msgType: 'thinking', summary: clean(block.thinking, 500) });
122
+ }
123
+ if (block?.type === 'tool_use') {
124
+ entries.push({ lineNum, byteOffset, timestamp: ts, source, msgType: 'tool_use', summary: summarizeTool(block.name, block.input) });
125
+ }
126
+ }
127
+ return entries;
128
+ }
129
+ return [];
130
+ }
131
+ catch {
132
+ return [];
133
+ }
134
+ }
135
+ /** Summarize a tool_use block into a compact one-liner */
136
+ function summarizeTool(name, input) {
137
+ if (!input)
138
+ return name;
139
+ switch (name) {
140
+ case 'Read':
141
+ return `Read path="${input.file_path || ''}"${input.offset ? ` offset=${input.offset}` : ''}`;
142
+ case 'Write':
143
+ return `Write path="${input.file_path || ''}" ${clean(input.content || '', 100)}`;
144
+ case 'Edit':
145
+ return `Edit path="${input.file_path || ''}" old="${clean(input.old_string || '', 60)}"`;
146
+ case 'Grep':
147
+ return `Grep pattern="${input.pattern || ''}" path="${input.path || ''}"`;
148
+ case 'Glob':
149
+ return `Glob pattern="${input.pattern || ''}"${input.path ? ` path="${input.path}"` : ''}`;
150
+ case 'Bash':
151
+ return `Bash cmd="${clean(input.command || '', 200)}"`;
152
+ case 'WebSearch':
153
+ return `WebSearch query="${input.query || ''}"`;
154
+ case 'WebFetch':
155
+ return `WebFetch url="${input.url || ''}"`;
156
+ case 'Task':
157
+ return `Task prompt="${clean(input.prompt || input.description || '', 200)}"${input.agentId ? ` agentId=${input.agentId}` : ''}`;
158
+ case 'TodoWrite':
159
+ return `TodoWrite ${clean(JSON.stringify(input.todos || []), 200)}`;
160
+ default:
161
+ return `${name} ${clean(JSON.stringify(input), 300)}`;
162
+ }
163
+ }
164
+ /** Clean text: remove newlines, cap length */
165
+ function clean(text, maxLen) {
166
+ return text.replace(/[\n\r\t]+/g, ' ').replace(/\s+/g, ' ').trim().substring(0, maxLen);
167
+ }
168
+ /** Format an IndexEntry as a pipe-delimited line */
169
+ function formatLine(entry) {
170
+ return `${entry.lineNum}|${entry.byteOffset}|${entry.timestamp}|${entry.source}|${entry.msgType}|${entry.summary}`;
171
+ }
172
+ // ============================================================
173
+ // BUILD INDEX (cold start / catch-up)
174
+ // ============================================================
175
+ /**
176
+ * Build or resume a summary index for a session.
177
+ * Reads main JSONL + all sub-agent JSONLs, extracts summaries.
178
+ * If an existing index with metadata exists, resumes from last byte offset.
179
+ */
180
+ export function buildSummaryIndex(sessionId, workingDir, sessionBaseDir, onProgress) {
181
+ // getSessionPaths and getSessionSubAgents imported at top level
182
+ const paths = getSessionPaths(sessionId, workingDir);
183
+ if (!paths.exists) {
184
+ onProgress?.('No session files found');
185
+ return emptyState(sessionId, sessionBaseDir, paths.conversation);
186
+ }
187
+ const workspace = join(sessionBaseDir, '.osborn', 'sessions', sessionId, '.index');
188
+ mkdirSync(workspace, { recursive: true });
189
+ const indexPath = join(workspace, 'search-index.txt');
190
+ const metaPath = join(workspace, 'search-index-meta.json');
191
+ // Check for existing metadata (resume from last offset)
192
+ let state = loadOrCreateState(indexPath, metaPath, paths.conversation);
193
+ const startMs = Date.now();
194
+ // ── Index main JSONL ──
195
+ const mainEntries = indexFile(paths.conversation, 'main', state.main.byteOffset);
196
+ if (mainEntries.lines.length > 0) {
197
+ appendFileSync(indexPath, mainEntries.lines.join('\n') + '\n');
198
+ state.main.byteOffset = mainEntries.newByteOffset;
199
+ state.main.lineCount += mainEntries.linesProcessed;
200
+ state.main.indexLineCount += mainEntries.lines.length;
201
+ // Collect tool_use_id → name mappings
202
+ for (const [id, name] of mainEntries.toolMap) {
203
+ state.toolUseIdMap.set(id, name);
204
+ }
205
+ }
206
+ onProgress?.(`Main JSONL: ${mainEntries.lines.length} entries in ${Date.now() - startMs}ms`);
207
+ // ── Discover and index sub-agents (both paths: subagents/ dir + project-level agent-*.jsonl) ──
208
+ const subAgentMs = Date.now();
209
+ let subAgentEntries = 0;
210
+ // Path 1: Session subdirectory subagents/ (discovered by getSessionPaths)
211
+ for (const agentFile of paths.subagents) {
212
+ const fileName = basename(agentFile, '.jsonl');
213
+ const agentKey = fileName.replace('agent-', '').substring(0, 12);
214
+ const sourceTag = `agent-${agentKey.substring(0, 8)}`;
215
+ const existing = state.subAgents.get(agentKey);
216
+ const offset = existing?.byteOffset || 0;
217
+ const result = indexFile(agentFile, sourceTag, offset);
218
+ if (result.lines.length > 0) {
219
+ appendFileSync(indexPath, result.lines.join('\n') + '\n');
220
+ subAgentEntries += result.lines.length;
221
+ state.subAgents.set(agentKey, {
222
+ jsonlPath: agentFile,
223
+ byteOffset: result.newByteOffset,
224
+ lineCount: (existing?.lineCount || 0) + result.linesProcessed,
225
+ indexLineCount: (existing?.indexLineCount || 0) + result.lines.length,
226
+ });
227
+ }
228
+ }
229
+ // Path 2: Project-level agent-*.jsonl (discovered by getSessionSubAgents)
230
+ const agents = getSessionSubAgents(sessionId, workingDir);
231
+ for (const agent of agents) {
232
+ if (!agent.agentFileExists)
233
+ continue;
234
+ const agentKey = agent.agentId.substring(0, 12);
235
+ if (state.subAgents.has(agentKey))
236
+ continue; // already indexed from Path 1
237
+ const sourceTag = `agent-${agent.agentId.substring(0, 8)}`;
238
+ const existing = state.subAgents.get(agentKey);
239
+ const offset = existing?.byteOffset || 0;
240
+ const result = indexFile(agent.agentFile, sourceTag, offset);
241
+ if (result.lines.length > 0) {
242
+ appendFileSync(indexPath, result.lines.join('\n') + '\n');
243
+ subAgentEntries += result.lines.length;
244
+ state.subAgents.set(agentKey, {
245
+ jsonlPath: agent.agentFile,
246
+ byteOffset: result.newByteOffset,
247
+ lineCount: (existing?.lineCount || 0) + result.linesProcessed,
248
+ indexLineCount: (existing?.indexLineCount || 0) + result.lines.length,
249
+ });
250
+ }
251
+ }
252
+ onProgress?.(`Sub-agents: ${agents.length} found, ${subAgentEntries} entries in ${Date.now() - subAgentMs}ms`);
253
+ // Trim toolUseIdMap to last 100
254
+ if (state.toolUseIdMap.size > 100) {
255
+ const entries = [...state.toolUseIdMap.entries()];
256
+ state.toolUseIdMap = new Map(entries.slice(-100));
257
+ }
258
+ // Save metadata
259
+ saveMeta(state, sessionId, metaPath);
260
+ const totalMs = Date.now() - startMs;
261
+ const indexSize = existsSync(indexPath) ? statSync(indexPath).size : 0;
262
+ onProgress?.(`Index complete: ${state.main.indexLineCount + subAgentEntries} total entries, ${(indexSize / 1024).toFixed(0)}KB, ${totalMs}ms`);
263
+ return state;
264
+ }
265
+ /** Index a single JSONL file from a byte offset. Returns formatted lines + new offset. */
266
+ function indexFile(filePath, source, fromByteOffset) {
267
+ if (!existsSync(filePath)) {
268
+ return { lines: [], linesProcessed: 0, newByteOffset: fromByteOffset, toolMap: new Map() };
269
+ }
270
+ const fileSize = statSync(filePath).size;
271
+ if (fromByteOffset >= fileSize) {
272
+ return { lines: [], linesProcessed: 0, newByteOffset: fromByteOffset, toolMap: new Map() };
273
+ }
274
+ // Read from offset
275
+ const buf = Buffer.alloc(fileSize - fromByteOffset);
276
+ const fd = openSync(filePath, 'r');
277
+ readSync(fd, buf, 0, buf.length, fromByteOffset);
278
+ closeSync(fd);
279
+ const text = buf.toString('utf-8');
280
+ const rawLines = text.split('\n');
281
+ const outputLines = [];
282
+ const toolMap = new Map();
283
+ let lineNum = fromByteOffset === 0 ? 1 : countLines(filePath, fromByteOffset) + 1;
284
+ let linesProcessed = 0;
285
+ let currentByteOffset = fromByteOffset; // track byte position — zero overhead
286
+ for (const rawLine of rawLines) {
287
+ const lineByteOffset = currentByteOffset;
288
+ currentByteOffset += Buffer.byteLength(rawLine, 'utf-8') + 1; // +1 for \n
289
+ if (!rawLine.trim()) {
290
+ lineNum++;
291
+ continue;
292
+ }
293
+ linesProcessed++;
294
+ try {
295
+ const obj = JSON.parse(rawLine);
296
+ // Track tool_use_id → tool_name for resolving tool_result entries
297
+ if (obj.type === 'assistant' && Array.isArray(obj.message?.content)) {
298
+ for (const block of obj.message.content) {
299
+ if (block?.type === 'tool_use' && block.id && block.name) {
300
+ toolMap.set(block.id, block.name);
301
+ }
302
+ }
303
+ }
304
+ // Extract all summaries — byteOffset enables instant targeted reads later
305
+ const entries = extractAllSummaries(obj, lineNum, lineByteOffset, source);
306
+ for (const entry of entries) {
307
+ outputLines.push(formatLine(entry));
308
+ }
309
+ }
310
+ catch {
311
+ // Skip unparseable lines
312
+ }
313
+ lineNum++;
314
+ }
315
+ return {
316
+ lines: outputLines,
317
+ linesProcessed,
318
+ newByteOffset: fileSize,
319
+ toolMap,
320
+ };
321
+ }
322
+ /** Count lines in a file up to a byte offset (for line number tracking on resume) */
323
+ function countLines(filePath, upToBytes) {
324
+ const buf = Buffer.alloc(Math.min(upToBytes, 1024 * 1024)); // Read max 1MB for line counting
325
+ const fd = openSync(filePath, 'r');
326
+ const bytesRead = readSync(fd, buf, 0, buf.length, Math.max(0, upToBytes - buf.length));
327
+ closeSync(fd);
328
+ let count = 0;
329
+ for (let i = 0; i < bytesRead; i++) {
330
+ if (buf[i] === 10)
331
+ count++; // newline
332
+ }
333
+ return count;
334
+ }
335
+ // ============================================================
336
+ // INCREMENTAL WATCHER
337
+ // ============================================================
338
+ /**
339
+ * Poll-based incremental index updater (10s interval).
340
+ * Checks main JSONL + sub-agents for new content, indexes in one batch.
341
+ * No fs.watch — avoids race conditions with concurrent writers.
342
+ */
343
+ export function startIndexWatcher(sessionId, workingDir, sessionBaseDir, state) {
344
+ let stopped = false;
345
+ const pollInterval = setInterval(() => {
346
+ if (stopped)
347
+ return;
348
+ try {
349
+ let newEntries = 0;
350
+ // 1. Check main JSONL for new content
351
+ const mainResult = indexFile(state.main.jsonlPath, 'main', state.main.byteOffset);
352
+ if (mainResult.lines.length > 0) {
353
+ appendFileSync(state.indexPath, mainResult.lines.join('\n') + '\n');
354
+ state.main.byteOffset = mainResult.newByteOffset;
355
+ state.main.lineCount += mainResult.linesProcessed;
356
+ state.main.indexLineCount += mainResult.lines.length;
357
+ newEntries += mainResult.lines.length;
358
+ for (const [id, name] of mainResult.toolMap) {
359
+ state.toolUseIdMap.set(id, name);
360
+ }
361
+ }
362
+ // 2. Check for new sub-agents + update existing ones
363
+ const paths = getSessionPaths(sessionId, workingDir);
364
+ const allSubFiles = [...paths.subagents];
365
+ const agents = getSessionSubAgents(sessionId, workingDir);
366
+ for (const a of agents) {
367
+ if (a.agentFileExists && !allSubFiles.includes(a.agentFile)) {
368
+ allSubFiles.push(a.agentFile);
369
+ }
370
+ }
371
+ for (const agentFile of allSubFiles) {
372
+ if (!existsSync(agentFile))
373
+ continue;
374
+ const fileName = basename(agentFile, '.jsonl');
375
+ const agentKey = fileName.replace('agent-', '').substring(0, 12);
376
+ const sourceTag = `agent-${agentKey.substring(0, 8)}`;
377
+ const existing = state.subAgents.get(agentKey);
378
+ const offset = existing?.byteOffset || 0;
379
+ const result = indexFile(agentFile, sourceTag, offset);
380
+ if (result.lines.length > 0) {
381
+ appendFileSync(state.indexPath, result.lines.join('\n') + '\n');
382
+ newEntries += result.lines.length;
383
+ state.subAgents.set(agentKey, {
384
+ jsonlPath: agentFile,
385
+ byteOffset: result.newByteOffset,
386
+ lineCount: (existing?.lineCount || 0) + result.linesProcessed,
387
+ indexLineCount: (existing?.indexLineCount || 0) + result.lines.length,
388
+ });
389
+ }
390
+ }
391
+ if (newEntries > 0) {
392
+ console.log(`🔍 [index] +${newEntries} entries`);
393
+ saveMeta(state, sessionId, state.metaPath);
394
+ }
395
+ }
396
+ catch (err) {
397
+ console.error('🔍 [index] Poll error:', err?.message);
398
+ }
399
+ }, 10_000);
400
+ return {
401
+ stop() {
402
+ stopped = true;
403
+ clearInterval(pollInterval);
404
+ saveMeta(state, sessionId, state.metaPath);
405
+ },
406
+ state,
407
+ };
408
+ }
409
+ // ============================================================
410
+ // STATE MANAGEMENT
411
+ // ============================================================
412
+ function emptyState(sessionId, sessionBaseDir, mainJsonlPath) {
413
+ const workspace = join(sessionBaseDir, '.osborn', 'sessions', sessionId, '.index');
414
+ return {
415
+ indexPath: join(workspace, 'search-index.txt'),
416
+ metaPath: join(workspace, 'search-index-meta.json'),
417
+ main: { jsonlPath: mainJsonlPath, byteOffset: 0, lineCount: 0, indexLineCount: 0 },
418
+ subAgents: new Map(),
419
+ toolUseIdMap: new Map(),
420
+ };
421
+ }
422
+ function loadOrCreateState(indexPath, metaPath, mainJsonlPath) {
423
+ if (existsSync(metaPath)) {
424
+ try {
425
+ const meta = JSON.parse(readFileSync(metaPath, 'utf-8'));
426
+ // Validate main file hasn't shrunk (file corruption/replacement)
427
+ if (existsSync(meta.main.jsonlPath)) {
428
+ const currentSize = statSync(meta.main.jsonlPath).size;
429
+ if (meta.main.byteOffset <= currentSize) {
430
+ return {
431
+ indexPath,
432
+ metaPath,
433
+ main: meta.main,
434
+ subAgents: new Map(Object.entries(meta.subAgents || {})),
435
+ toolUseIdMap: new Map(Object.entries(meta.toolUseIdMap || {})),
436
+ };
437
+ }
438
+ }
439
+ }
440
+ catch { }
441
+ }
442
+ // Fresh state — will build from scratch
443
+ // Ensure index file starts empty
444
+ writeFileSync(indexPath, '');
445
+ return {
446
+ indexPath,
447
+ metaPath,
448
+ main: { jsonlPath: mainJsonlPath, byteOffset: 0, lineCount: 0, indexLineCount: 0 },
449
+ subAgents: new Map(),
450
+ toolUseIdMap: new Map(),
451
+ };
452
+ }
453
+ function saveMeta(state, sessionId, metaPath) {
454
+ const meta = {
455
+ version: 1,
456
+ sessionId,
457
+ createdAt: new Date().toISOString(),
458
+ updatedAt: new Date().toISOString(),
459
+ main: state.main,
460
+ subAgents: Object.fromEntries(state.subAgents),
461
+ toolUseIdMap: Object.fromEntries([...state.toolUseIdMap.entries()].slice(-100)),
462
+ };
463
+ try {
464
+ writeFileSync(metaPath, JSON.stringify(meta, null, 2));
465
+ }
466
+ catch { }
467
+ }
468
+ // ============================================================
469
+ // PUBLIC: Check if index exists for a session
470
+ // ============================================================
471
+ export function getIndexPath(sessionId, sessionBaseDir) {
472
+ const indexPath = join(sessionBaseDir, '.osborn', 'sessions', sessionId, '.index', 'search-index.txt');
473
+ return existsSync(indexPath) && statSync(indexPath).size > 0 ? indexPath : null;
474
+ }
475
+ // ============================================================
476
+ // PUBLIC: Read full clean text from raw JSONL by line numbers
477
+ // ============================================================
478
+ /**
479
+ * Given index search results (with byte offsets + source tags),
480
+ * read FULL content from raw JSONL via targeted reads — 0.5ms per result.
481
+ * No readFileSync of the whole file. Strips JSON noise, returns clean text.
482
+ */
483
+ export function readFullContent(results, sessionId, workingDir, sessionBaseDir, maxCharsPerResult = 2000) {
484
+ const paths = getSessionPaths(sessionId, workingDir);
485
+ const output = [];
486
+ // Group results by source file
487
+ const bySource = new Map();
488
+ for (const r of results) {
489
+ if (!bySource.has(r.source))
490
+ bySource.set(r.source, []);
491
+ bySource.get(r.source).push({ lineNum: r.lineNum, byteOffset: r.byteOffset });
492
+ }
493
+ for (const [source, refs] of bySource) {
494
+ // Resolve source to file path
495
+ let filePath;
496
+ if (source === 'main') {
497
+ filePath = paths.conversation;
498
+ }
499
+ else {
500
+ const agentPrefix = source.replace('agent-', '');
501
+ const subFile = paths.subagents.find(f => basename(f).includes(agentPrefix))
502
+ || join(dirname(paths.conversation), `agent-${agentPrefix}.jsonl`);
503
+ filePath = subFile;
504
+ }
505
+ if (!existsSync(filePath))
506
+ continue;
507
+ const fileSize = statSync(filePath).size;
508
+ // Targeted read per result — ~0.5ms each instead of 459ms for whole file
509
+ const fd = openSync(filePath, 'r');
510
+ try {
511
+ for (const ref of refs) {
512
+ if (ref.byteOffset >= fileSize)
513
+ continue;
514
+ // Read up to 100KB from the byte offset (enough for any single JSONL line)
515
+ const readLen = Math.min(100 * 1024, fileSize - ref.byteOffset);
516
+ const buf = Buffer.alloc(readLen);
517
+ readSync(fd, buf, 0, readLen, ref.byteOffset);
518
+ // Extract just the first line (one JSONL entry)
519
+ const text = buf.toString('utf-8');
520
+ const newlineIdx = text.indexOf('\n');
521
+ const jsonLine = newlineIdx >= 0 ? text.substring(0, newlineIdx) : text;
522
+ try {
523
+ const obj = JSON.parse(jsonLine);
524
+ const cleanText = extractCleanText(obj, maxCharsPerResult);
525
+ if (cleanText) {
526
+ output.push(`[${source} L${ref.lineNum}] ${cleanText}`);
527
+ }
528
+ }
529
+ catch { }
530
+ }
531
+ }
532
+ finally {
533
+ closeSync(fd);
534
+ }
535
+ }
536
+ return output;
537
+ }
538
+ /** Extract clean human-readable text from a parsed JSONL object — no JSON noise */
539
+ function extractCleanText(obj, maxChars) {
540
+ const parts = [];
541
+ if (obj.type === 'user' && Array.isArray(obj.message?.content)) {
542
+ for (const block of obj.message.content) {
543
+ if (block?.type === 'text' && block.text) {
544
+ parts.push(block.text);
545
+ }
546
+ if (block?.type === 'tool_result') {
547
+ const content = typeof block.content === 'string'
548
+ ? block.content
549
+ : Array.isArray(block.content)
550
+ ? block.content.filter((b) => b.type === 'text').map((b) => b.text).join('\n')
551
+ : '';
552
+ if (content)
553
+ parts.push(content);
554
+ }
555
+ }
556
+ }
557
+ if (obj.type === 'assistant' && Array.isArray(obj.message?.content)) {
558
+ for (const block of obj.message.content) {
559
+ if (block?.type === 'text' && block.text)
560
+ parts.push(block.text);
561
+ if (block?.type === 'thinking' && block.thinking)
562
+ parts.push(`[thinking] ${block.thinking}`);
563
+ if (block?.type === 'tool_use') {
564
+ parts.push(summarizeTool(block.name, block.input));
565
+ }
566
+ }
567
+ }
568
+ const joined = parts.join('\n').trim();
569
+ return joined.substring(0, maxChars);
570
+ }
@@ -0,0 +1,24 @@
1
+ /**
2
+ * turn-detector-shim.ts — LiveKit Cloud turn detection without JobContext
3
+ *
4
+ * The official MultilingualModel crashes without LiveKit's worker framework
5
+ * because it calls getJobContext(). This shim implements the same _TurnDetector
6
+ * interface but makes the remote HTTP call directly — no worker framework needed.
7
+ *
8
+ * On LiveKit Cloud (LIVEKIT_REMOTE_EOT_URL set): HTTP call to inference gateway.
9
+ * Without the URL: Returns 1.0 (always end of turn — STT endpointing handles it).
10
+ */
11
+ import type { llm } from '@livekit/agents';
12
+ /**
13
+ * Implements _TurnDetector interface for LiveKit Cloud remote inference
14
+ * without requiring JobContext / worker framework.
15
+ */
16
+ export declare class CloudTurnDetector {
17
+ #private;
18
+ readonly model = "lk_end_of_utterance_multilingual";
19
+ readonly provider = "livekit";
20
+ constructor();
21
+ unlikelyThreshold(_language?: string): Promise<number | undefined>;
22
+ supportsLanguage(_language?: string): Promise<boolean>;
23
+ predictEndOfTurn(chatCtx: llm.ChatContext, _timeout?: number): Promise<number>;
24
+ }
@@ -0,0 +1,83 @@
1
+ /**
2
+ * turn-detector-shim.ts — LiveKit Cloud turn detection without JobContext
3
+ *
4
+ * The official MultilingualModel crashes without LiveKit's worker framework
5
+ * because it calls getJobContext(). This shim implements the same _TurnDetector
6
+ * interface but makes the remote HTTP call directly — no worker framework needed.
7
+ *
8
+ * On LiveKit Cloud (LIVEKIT_REMOTE_EOT_URL set): HTTP call to inference gateway.
9
+ * Without the URL: Returns 1.0 (always end of turn — STT endpointing handles it).
10
+ */
11
+ import { log } from '@livekit/agents';
12
+ const REMOTE_INFERENCE_TIMEOUT = 2000;
13
+ const MAX_HISTORY_TURNS = 15;
14
+ /**
15
+ * Implements _TurnDetector interface for LiveKit Cloud remote inference
16
+ * without requiring JobContext / worker framework.
17
+ */
18
+ export class CloudTurnDetector {
19
+ #remoteUrl;
20
+ #logger = log();
21
+ model = 'lk_end_of_utterance_multilingual';
22
+ provider = 'livekit';
23
+ constructor() {
24
+ this.#remoteUrl = process.env.LIVEKIT_REMOTE_EOT_URL;
25
+ if (this.#remoteUrl) {
26
+ console.log(`🧠 Turn detector: LiveKit Cloud remote inference`);
27
+ }
28
+ else {
29
+ console.log('🧠 Turn detector: No LIVEKIT_REMOTE_EOT_URL — STT endpointing fallback');
30
+ }
31
+ }
32
+ async unlikelyThreshold(_language) {
33
+ return undefined; // Let the framework use defaults
34
+ }
35
+ async supportsLanguage(_language) {
36
+ return true; // Multilingual model supports all languages
37
+ }
38
+ async predictEndOfTurn(chatCtx, _timeout) {
39
+ if (!this.#remoteUrl) {
40
+ return 1.0; // No remote URL = always end of turn (STT handles it)
41
+ }
42
+ try {
43
+ const messages = chatCtx
44
+ .copy({
45
+ excludeFunctionCall: true,
46
+ excludeInstructions: true,
47
+ excludeEmptyMessage: true,
48
+ })
49
+ .truncate(MAX_HISTORY_TURNS);
50
+ const request = {
51
+ ...messages.toJSON({
52
+ excludeImage: true,
53
+ excludeAudio: true,
54
+ excludeTimestamp: true,
55
+ }),
56
+ // Dummy IDs — LiveKit Cloud uses these for routing/logging, not auth
57
+ jobId: `osborn-${Date.now()}`,
58
+ workerId: 'osborn-direct',
59
+ };
60
+ const agentId = process.env.LIVEKIT_AGENT_ID;
61
+ if (agentId) {
62
+ request.agentId = agentId;
63
+ }
64
+ const resp = await fetch(`${this.#remoteUrl}/eot/multi`, {
65
+ method: 'POST',
66
+ body: JSON.stringify(request),
67
+ headers: { 'Content-Type': 'application/json' },
68
+ signal: AbortSignal.timeout(REMOTE_INFERENCE_TIMEOUT),
69
+ });
70
+ if (!resp.ok) {
71
+ return 1.0; // Failed — default to end of turn
72
+ }
73
+ const data = (await resp.json());
74
+ if (typeof data.probability === 'number' && data.probability >= 0) {
75
+ return data.probability;
76
+ }
77
+ return 1.0;
78
+ }
79
+ catch {
80
+ return 1.0; // Timeout/error — default to end of turn
81
+ }
82
+ }
83
+ }