osborn 0.5.3 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. package/.claude/settings.local.json +9 -0
  2. package/.claude/skills/markdown-to-pdf/SKILL.md +29 -0
  3. package/.claude/skills/pdf-to-markdown/SKILL.md +28 -0
  4. package/.claude/skills/playwright-browser/SKILL.md +90 -0
  5. package/.claude/skills/shadcn/SKILL.md +232 -0
  6. package/.claude/skills/shadcn/image.png +0 -0
  7. package/.claude/skills/youtube-transcript/SKILL.md +24 -0
  8. package/.dockerignore +13 -0
  9. package/Dockerfile +103 -0
  10. package/deploy.sh +70 -0
  11. package/dist/claude-auth.d.ts +60 -0
  12. package/dist/claude-auth.js +334 -0
  13. package/dist/claude-llm.d.ts +51 -2
  14. package/dist/claude-llm.js +619 -86
  15. package/dist/config.d.ts +5 -1
  16. package/dist/config.js +4 -1
  17. package/dist/fast-brain.d.ts +70 -16
  18. package/dist/fast-brain.js +662 -99
  19. package/dist/index-3-2-26-legacy.d.ts +1 -0
  20. package/dist/index-3-2-26-legacy.js +2233 -0
  21. package/dist/index.js +979 -429
  22. package/dist/jsonl-search.d.ts +66 -0
  23. package/dist/jsonl-search.js +274 -0
  24. package/dist/leagcyprompts2.d.ts +0 -0
  25. package/dist/leagcyprompts2.js +573 -0
  26. package/dist/pipeline-direct-llm.d.ts +77 -0
  27. package/dist/pipeline-direct-llm.js +221 -0
  28. package/dist/pipeline-fastbrain.d.ts +45 -0
  29. package/dist/pipeline-fastbrain.js +373 -0
  30. package/dist/prompts-2-25-26.d.ts +0 -0
  31. package/dist/prompts-2-25-26.js +518 -0
  32. package/dist/prompts-3-2-26.d.ts +78 -0
  33. package/dist/prompts-3-2-26.js +1319 -0
  34. package/dist/prompts.d.ts +83 -12
  35. package/dist/prompts.js +2064 -587
  36. package/dist/recall-client.d.ts +33 -0
  37. package/dist/recall-client.js +101 -0
  38. package/dist/session-access.d.ts +24 -0
  39. package/dist/session-access.js +74 -0
  40. package/dist/summary-index.d.ts +87 -0
  41. package/dist/summary-index.js +570 -0
  42. package/dist/turn-detector-shim.d.ts +24 -0
  43. package/dist/turn-detector-shim.js +83 -0
  44. package/dist/voice-io.d.ts +15 -5
  45. package/dist/voice-io.js +52 -20
  46. package/fly.toml +30 -0
  47. package/package.json +18 -13
@@ -0,0 +1,77 @@
1
+ /**
2
+ * Pipeline Direct LLM — Wraps ClaudeLLM with parallel Gemini fast brain
3
+ *
4
+ * In pipeline mode, every user message fires two tracks simultaneously:
5
+ * Track A: Claude SDK (unchanged) — speaks the answer via TTS
6
+ * Track B: Gemini fast brain (new) — searches JSONL memory, sends result to UI only
7
+ *
8
+ * Phase 1 (current): Gemini is silent — results go to frontend panel for monitoring
9
+ * Phase 2 (future): Gemini speaks first, Claude suppressed when Gemini has HIGH confidence
10
+ */
11
+ import { llm, type APIConnectOptions } from '@livekit/agents';
12
+ import { type ClaudeLLMOptions } from './claude-llm.js';
13
+ import { EventEmitter } from 'events';
14
+ export interface InterruptionContext {
15
+ spokenText: string;
16
+ recentMessages: string;
17
+ }
18
+ export interface PipelineDirectOptions extends ClaudeLLMOptions {
19
+ onFastBrainResult?: (result: FastBrainPanelResult) => void;
20
+ getChatHistory?: () => {
21
+ role: string;
22
+ content: string;
23
+ }[];
24
+ getResearchContext?: () => string | undefined;
25
+ /** Returns pending interruption context and clears it (consumed once). null = no pending interruption. */
26
+ getAndConsumeInterruptionContext?: () => InterruptionContext | null;
27
+ }
28
+ export interface FastBrainPanelResult {
29
+ question: string;
30
+ answer: string;
31
+ type: string;
32
+ elapsedMs: number;
33
+ timestamp: number;
34
+ toolsUsed: string[];
35
+ }
36
+ export declare class PipelineDirectLLM extends llm.LLM {
37
+ #private;
38
+ constructor(opts: PipelineDirectOptions);
39
+ /** Stop the index watcher (call on disconnect/session switch) */
40
+ stopIndexWatcher(): void;
41
+ get events(): EventEmitter;
42
+ get sessionId(): string | null;
43
+ get model(): string;
44
+ get isResumingSession(): boolean;
45
+ label(): string;
46
+ setResumeSessionId(id: string | null): void;
47
+ setContinueSession(e: boolean): void;
48
+ resetForSessionSwitch(): void;
49
+ respondToPermission(allow: boolean, msg?: string): void;
50
+ hasPendingPermission(): boolean;
51
+ getPendingPermission(): {
52
+ toolName: string;
53
+ input: any;
54
+ };
55
+ getMcpServers(): Record<string, import("@anthropic-ai/claude-agent-sdk").McpServerConfig>;
56
+ setMcpServers(s: any): void;
57
+ interruptAgent(): Promise<boolean>;
58
+ abortAgent(): void;
59
+ rewindAgent(checkpointId?: string): Promise<boolean>;
60
+ hasActiveAgent(): boolean;
61
+ /** Send a new prompt to Claude via direct chat() — event listeners stay attached */
62
+ sendPrompt(prompt: string): void;
63
+ enableMcpServer(k: string, c: any): void;
64
+ disableMcpServer(k: string): void;
65
+ getLatestCheckpoint(): string;
66
+ getFirstCheckpoint(): string;
67
+ getCheckpoints(): string[];
68
+ clearCheckpoints(): void;
69
+ hasCheckpoints(): boolean;
70
+ chat({ chatCtx, toolCtx, connOptions, abortController, }: {
71
+ chatCtx: llm.ChatContext;
72
+ toolCtx?: llm.ToolContext;
73
+ connOptions?: APIConnectOptions;
74
+ abortController?: AbortController;
75
+ }): llm.LLMStream;
76
+ }
77
+ export declare function createPipelineDirectLLM(opts: PipelineDirectOptions): PipelineDirectLLM;
@@ -0,0 +1,221 @@
1
+ /**
2
+ * Pipeline Direct LLM — Wraps ClaudeLLM with parallel Gemini fast brain
3
+ *
4
+ * In pipeline mode, every user message fires two tracks simultaneously:
5
+ * Track A: Claude SDK (unchanged) — speaks the answer via TTS
6
+ * Track B: Gemini fast brain (new) — searches JSONL memory, sends result to UI only
7
+ *
8
+ * Phase 1 (current): Gemini is silent — results go to frontend panel for monitoring
9
+ * Phase 2 (future): Gemini speaks first, Claude suppressed when Gemini has HIGH confidence
10
+ */
11
+ import { llm, DEFAULT_API_CONNECT_OPTIONS } from '@livekit/agents';
12
+ import { ClaudeLLM } from './claude-llm.js';
13
+ import { askPipelineFastBrain } from './pipeline-fastbrain.js';
14
+ import { buildSummaryIndex, startIndexWatcher } from './summary-index.js';
15
+ export class PipelineDirectLLM extends llm.LLM {
16
+ #claudeLLM;
17
+ #opts;
18
+ #turnAbort = null;
19
+ #indexWatcher = null;
20
+ #indexBuilding = false;
21
+ constructor(opts) {
22
+ super();
23
+ this.#claudeLLM = new ClaudeLLM(opts);
24
+ this.#opts = opts;
25
+ }
26
+ /** Stop the index watcher (call on disconnect/session switch) */
27
+ stopIndexWatcher() {
28
+ if (this.#indexWatcher) {
29
+ this.#indexWatcher.stop();
30
+ this.#indexWatcher = null;
31
+ }
32
+ }
33
+ // Proxy all properties
34
+ get events() { return this.#claudeLLM.events; }
35
+ get sessionId() { return this.#claudeLLM.sessionId; }
36
+ get model() { return this.#claudeLLM.model; }
37
+ get isResumingSession() { return this.#claudeLLM.isResumingSession; }
38
+ label() { return 'pipeline-direct'; }
39
+ // Proxy all methods
40
+ setResumeSessionId(id) { this.#claudeLLM.setResumeSessionId(id); }
41
+ setContinueSession(e) { this.#claudeLLM.setContinueSession(e); }
42
+ resetForSessionSwitch() {
43
+ this.stopIndexWatcher();
44
+ this.#indexBuilding = false;
45
+ this.#claudeLLM.resetForSessionSwitch();
46
+ }
47
+ respondToPermission(allow, msg) { this.#claudeLLM.respondToPermission(allow, msg); }
48
+ hasPendingPermission() { return this.#claudeLLM.hasPendingPermission(); }
49
+ getPendingPermission() { return this.#claudeLLM.getPendingPermission(); }
50
+ getMcpServers() { return this.#claudeLLM.getMcpServers(); }
51
+ setMcpServers(s) { this.#claudeLLM.setMcpServers(s); }
52
+ // Agent control — proxied to ClaudeLLM for fast brain access
53
+ async interruptAgent() { return this.#claudeLLM.interruptQuery(); }
54
+ abortAgent() { this.#claudeLLM.abortQuery(); }
55
+ async rewindAgent(checkpointId) { return this.#claudeLLM.rewindToCheckpoint(checkpointId); }
56
+ hasActiveAgent() { return this.#claudeLLM.hasActiveQuery(); }
57
+ /** Send a new prompt to Claude via direct chat() — event listeners stay attached */
58
+ sendPrompt(prompt) {
59
+ console.log(`📋 [pipeline] Sending prompt to Claude (${prompt.length} chars)`);
60
+ const chatCtx = new llm.ChatContext();
61
+ chatCtx.addMessage({ role: 'user', content: prompt });
62
+ this.#claudeLLM.chat({ chatCtx });
63
+ }
64
+ enableMcpServer(k, c) { this.#claudeLLM.enableMcpServer(k, c); }
65
+ disableMcpServer(k) { this.#claudeLLM.disableMcpServer(k); }
66
+ getLatestCheckpoint() { return this.#claudeLLM.getLatestCheckpoint(); }
67
+ getFirstCheckpoint() { return this.#claudeLLM.getFirstCheckpoint(); }
68
+ getCheckpoints() { return this.#claudeLLM.getCheckpoints(); }
69
+ clearCheckpoints() { this.#claudeLLM.clearCheckpoints(); }
70
+ hasCheckpoints() { return this.#claudeLLM.hasCheckpoints(); }
71
+ #chatCallCount = 0;
72
+ chat({ chatCtx, toolCtx, connOptions = DEFAULT_API_CONNECT_OPTIONS, abortController, }) {
73
+ const callN = ++this.#chatCallCount;
74
+ // Extract user text for fast brain
75
+ let userText = '';
76
+ for (let i = chatCtx.items.length - 1; i >= 0; i--) {
77
+ const item = chatCtx.items[i];
78
+ if (item.type === 'message' && item.role === 'user') {
79
+ if (Array.isArray(item.content)) {
80
+ userText = item.content.filter((c) => typeof c === 'string').join('\n');
81
+ }
82
+ break;
83
+ }
84
+ }
85
+ console.log(`📥 [pipeline] chat() call #${callN}: "${userText.substring(0, 60)}"`);
86
+ // Check for pending interruption context — enrich user message if interrupted
87
+ const interruptCtx = this.#opts.getAndConsumeInterruptionContext?.();
88
+ if (interruptCtx && userText.trim()) {
89
+ console.log(`🔇 [pipeline] Enriching user message with interruption context`);
90
+ // Interrupt Claude's current work before sending enriched message
91
+ this.#claudeLLM.interruptQuery().catch(() => { });
92
+ // Replace user message in chatCtx with context-enriched version
93
+ const enrichedMessage = [
94
+ `[INTERRUPTED] The user interrupted your response mid-speech.`,
95
+ ``,
96
+ `What the user heard before cutoff:`,
97
+ `"${interruptCtx.spokenText}"`,
98
+ ``,
99
+ `Your recent messages (full untruncated — you wrote these):`,
100
+ interruptCtx.recentMessages || '(no recent messages found)',
101
+ ``,
102
+ `User's message: "${userText}"`,
103
+ ``,
104
+ `RESPOND with speech first, then act:`,
105
+ `- ALWAYS reply with at least one spoken sentence before doing any tool calls`,
106
+ `- If it's a quick side question, answer it then continue where you left off`,
107
+ `- If they want to change direction, acknowledge and follow their lead`,
108
+ `- Clarify when asked to or the question requires going over what you just said`,
109
+ `- Reference unspoken content naturally if relevant`,
110
+ ].join('\n');
111
+ // Modify the last user message in chatCtx
112
+ for (let i = chatCtx.items.length - 1; i >= 0; i--) {
113
+ const item = chatCtx.items[i];
114
+ if (item.type === 'message' && item.role === 'user') {
115
+ item.content = [enrichedMessage];
116
+ break;
117
+ }
118
+ }
119
+ }
120
+ // Fire Claude
121
+ const claudeStream = this.#claudeLLM.chat({ chatCtx, toolCtx, connOptions, abortController });
122
+ // Fire pipeline fast brain in background — no await, no blocking
123
+ if (userText.trim()) {
124
+ this.#firePipelineFastBrain(userText);
125
+ }
126
+ return claudeStream;
127
+ }
128
+ async #firePipelineFastBrain(userText) {
129
+ // Abort stale turn
130
+ if (this.#turnAbort)
131
+ this.#turnAbort.abort();
132
+ this.#turnAbort = new AbortController();
133
+ const signal = this.#turnAbort.signal;
134
+ const startMs = Date.now();
135
+ // Wait for SDK to assign session ID — listen for event instead of polling
136
+ // Large sessions (22MB+) can take 10-15s for SDK to replay JSONL
137
+ let sessionId = this.#claudeLLM.sessionId;
138
+ if (!sessionId) {
139
+ sessionId = await new Promise((resolve) => {
140
+ // Listen for the session_id event from SDK
141
+ const onSessionId = (data) => {
142
+ resolve(data.sessionId);
143
+ };
144
+ this.#claudeLLM.events.once('session_id', onSessionId);
145
+ // Safety timeout — don't wait forever
146
+ setTimeout(() => {
147
+ this.#claudeLLM.events.removeListener('session_id', onSessionId);
148
+ resolve(this.#claudeLLM.sessionId || 'pending');
149
+ }, 15000);
150
+ });
151
+ }
152
+ const workingDir = this.#opts.workingDirectory || process.cwd();
153
+ const sessionBaseDir = this.#opts.sessionBaseDir || workingDir;
154
+ // Build summary index on first question (async, non-blocking for subsequent questions)
155
+ if (!this.#indexWatcher && !this.#indexBuilding && sessionId !== 'pending') {
156
+ this.#indexBuilding = true;
157
+ try {
158
+ const startBuild = Date.now();
159
+ const state = buildSummaryIndex(sessionId, workingDir, sessionBaseDir, (msg) => console.log(`🔍 [index] ${msg}`));
160
+ this.#indexWatcher = startIndexWatcher(sessionId, workingDir, sessionBaseDir, state);
161
+ console.log(`🔍 [index] Built + watching in ${Date.now() - startBuild}ms`);
162
+ }
163
+ catch (err) {
164
+ console.error('🔍 [index] Build failed:', err?.message);
165
+ }
166
+ this.#indexBuilding = false;
167
+ }
168
+ try {
169
+ console.log(`🧠⚡ [pipeline] Fast brain: "${userText.substring(0, 60)}"`);
170
+ const result = await askPipelineFastBrain(workingDir, sessionId, userText, {
171
+ chatHistory: this.#opts.getChatHistory?.() || [],
172
+ researchContext: this.#opts.getResearchContext?.(),
173
+ sessionBaseDir,
174
+ agentControl: {
175
+ interrupt: () => this.#claudeLLM.interruptQuery(),
176
+ abort: () => this.#claudeLLM.abortQuery(),
177
+ hasActiveAgent: () => this.#claudeLLM.hasActiveQuery(),
178
+ getRecentUserMessages: (count) => {
179
+ const history = this.#opts.getChatHistory?.() || [];
180
+ return history
181
+ .filter(t => t.role === 'user')
182
+ .slice(-count)
183
+ .map(t => t.content);
184
+ },
185
+ sendPrompt: (prompt) => {
186
+ // Direct call to ClaudeLLM.chat() — event listeners (tts_say, tool_use, etc.) still attached
187
+ // skipTTSQueue mode: tts_say events → index.ts → session.say() — works independently
188
+ console.log(`🧠⚡ [control] Sending new prompt to Claude (${prompt.length} chars)`);
189
+ const chatCtx = new llm.ChatContext();
190
+ chatCtx.addMessage({ role: 'user', content: prompt });
191
+ this.#claudeLLM.chat({ chatCtx });
192
+ },
193
+ },
194
+ });
195
+ if (signal.aborted)
196
+ return;
197
+ const elapsedMs = Date.now() - startMs;
198
+ console.log(`🧠⚡ [pipeline] ${result.type} in ${elapsedMs}ms [${result.toolsUsed.join(',')}]: "${result.script.substring(0, 80)}"`);
199
+ this.#opts.onFastBrainResult?.({
200
+ question: userText,
201
+ answer: result.script,
202
+ type: result.type,
203
+ elapsedMs,
204
+ timestamp: Date.now(),
205
+ toolsUsed: result.toolsUsed,
206
+ });
207
+ }
208
+ catch (err) {
209
+ if (err?.name === 'AbortError')
210
+ return;
211
+ console.error('❌ [pipeline] Fast brain error:', err?.message);
212
+ }
213
+ finally {
214
+ if (this.#turnAbort?.signal === signal)
215
+ this.#turnAbort = null;
216
+ }
217
+ }
218
+ }
219
+ export function createPipelineDirectLLM(opts) {
220
+ return new PipelineDirectLLM(opts);
221
+ }
@@ -0,0 +1,45 @@
1
+ /**
2
+ * pipeline-fastbrain.ts — Pipeline Fast Brain (Agent with AFC)
3
+ *
4
+ * Uses Gemini Flash as an AGENT with Automatic Function Calling (AFC).
5
+ * One generateContent() call handles everything:
6
+ * - Gemini decides IF it needs to search (skips for greetings/follow-ups)
7
+ * - Gemini decides WHAT to search (smart phrase selection)
8
+ * - Gemini can multi-step: search → not enough → refine → search again
9
+ * - AFC handles the tool loop internally (up to 3 rounds)
10
+ *
11
+ * Tools:
12
+ * search_session — ripgrep the summary index + read full content via byte offsets
13
+ *
14
+ * No separate phrase extraction call. No manual tool loop. One API invocation.
15
+ */
16
+ export interface PipelineFastBrainResult {
17
+ script: string;
18
+ type: 'answer' | 'research_needed' | 'acknowledgment' | 'error';
19
+ toolsUsed: string[];
20
+ }
21
+ export interface PipelineFastBrainOptions {
22
+ chatHistory?: {
23
+ role: string;
24
+ content: string;
25
+ }[];
26
+ researchContext?: string;
27
+ sessionBaseDir?: string;
28
+ agentControl?: AgentControlCallbacks;
29
+ }
30
+ /** Clear the pipeline fast brain session (call on disconnect/reconnect) */
31
+ export declare function clearPipelineFastBrainSession(): void;
32
+ /** No-op — kept for backward compatibility with index.ts import */
33
+ export declare function prewarmBM25Index(_sessionId: string, _workingDir: string): Promise<void>;
34
+ /**
35
+ * Create a CallableTool that wraps ripgrep search of the summary index
36
+ * + byte-offset full content reads from raw JSONL.
37
+ */
38
+ export interface AgentControlCallbacks {
39
+ interrupt: () => Promise<boolean>;
40
+ abort: () => void;
41
+ hasActiveAgent: () => boolean;
42
+ getRecentUserMessages: (count: number) => string[];
43
+ sendPrompt: (prompt: string) => void;
44
+ }
45
+ export declare function askPipelineFastBrain(workingDir: string, sessionId: string, question: string, opts?: PipelineFastBrainOptions): Promise<PipelineFastBrainResult>;