osborn 0.5.3 → 0.5.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/settings.local.json +9 -0
- package/.claude/skills/markdown-to-pdf/SKILL.md +29 -0
- package/.claude/skills/pdf-to-markdown/SKILL.md +28 -0
- package/.claude/skills/playwright-browser/SKILL.md +75 -0
- package/.claude/skills/youtube-transcript/SKILL.md +24 -0
- package/dist/claude-llm.d.ts +29 -1
- package/dist/claude-llm.js +334 -78
- package/dist/config.d.ts +5 -1
- package/dist/config.js +4 -1
- package/dist/fast-brain.d.ts +70 -16
- package/dist/fast-brain.js +662 -99
- package/dist/index-3-2-26-legacy.d.ts +1 -0
- package/dist/index-3-2-26-legacy.js +2233 -0
- package/dist/index.js +752 -423
- package/dist/jsonl-search.d.ts +66 -0
- package/dist/jsonl-search.js +274 -0
- package/dist/leagcyprompts2.d.ts +0 -0
- package/dist/leagcyprompts2.js +573 -0
- package/dist/pipeline-direct-llm.d.ts +77 -0
- package/dist/pipeline-direct-llm.js +216 -0
- package/dist/pipeline-fastbrain.d.ts +45 -0
- package/dist/pipeline-fastbrain.js +367 -0
- package/dist/prompts-2-25-26.d.ts +0 -0
- package/dist/prompts-2-25-26.js +518 -0
- package/dist/prompts-3-2-26.d.ts +78 -0
- package/dist/prompts-3-2-26.js +1319 -0
- package/dist/prompts.d.ts +83 -12
- package/dist/prompts.js +1991 -588
- package/dist/session-access.d.ts +24 -0
- package/dist/session-access.js +74 -0
- package/dist/summary-index.d.ts +87 -0
- package/dist/summary-index.js +570 -0
- package/dist/turn-detector-shim.d.ts +24 -0
- package/dist/turn-detector-shim.js +83 -0
- package/dist/voice-io.d.ts +9 -3
- package/dist/voice-io.js +39 -20
- package/package.json +13 -10
package/dist/voice-io.d.ts
CHANGED
|
@@ -17,7 +17,7 @@ export interface STTConfig {
|
|
|
17
17
|
language?: string;
|
|
18
18
|
}
|
|
19
19
|
export interface TTSConfig {
|
|
20
|
-
provider: 'gemini' | 'openai' | 'elevenlabs' | 'deepgram';
|
|
20
|
+
provider: 'gemini' | 'openai' | 'elevenlabs' | 'deepgram' | 'groq-orpheus';
|
|
21
21
|
voice?: string;
|
|
22
22
|
model?: string;
|
|
23
23
|
}
|
|
@@ -45,15 +45,21 @@ export declare function createTTS(config: TTSConfig): any;
|
|
|
45
45
|
*/
|
|
46
46
|
export declare function createVAD(): Promise<silero.VAD>;
|
|
47
47
|
/**
|
|
48
|
-
* Default voice I/O configuration
|
|
48
|
+
* Default voice I/O configuration (used by realtime mode fallback)
|
|
49
49
|
* Uses Deepgram STT (fast, accurate) + Deepgram TTS (fast, good)
|
|
50
50
|
*/
|
|
51
51
|
export declare const DEFAULT_VOICE_IO_CONFIG: VoiceIOConfig;
|
|
52
|
+
/**
|
|
53
|
+
* Direct mode voice config — centralized here for easy provider swapping.
|
|
54
|
+
* To switch providers: comment out the active line, uncomment the alternative.
|
|
55
|
+
*/
|
|
56
|
+
export declare const DIRECT_MODE_STT: STTConfig;
|
|
57
|
+
export declare const DIRECT_MODE_TTS: TTSConfig;
|
|
52
58
|
export interface RealtimeModelConfig {
|
|
53
59
|
provider: 'openai' | 'gemini';
|
|
54
60
|
openaiVoice?: 'alloy' | 'echo' | 'fable' | 'onyx' | 'nova' | 'shimmer';
|
|
55
61
|
openaiModel?: string;
|
|
56
|
-
geminiVoice?: '
|
|
62
|
+
geminiVoice?: 'Charon' | 'Puck' | 'Kore' | 'Fenrir' | 'Aoede';
|
|
57
63
|
geminiModel?: string;
|
|
58
64
|
instructions?: string;
|
|
59
65
|
}
|
package/dist/voice-io.js
CHANGED
|
@@ -20,6 +20,7 @@ export function createSTT(config) {
|
|
|
20
20
|
return new deepgram.STT({
|
|
21
21
|
model: (config.model || 'nova-3'),
|
|
22
22
|
language: config.language || 'en',
|
|
23
|
+
endpointing: 600, // Wait 650ms of silence before committing final transcript (default 25ms causes mid-sentence fragments)
|
|
23
24
|
});
|
|
24
25
|
case 'groq-whisper':
|
|
25
26
|
return openai.STT.withGroq({
|
|
@@ -55,7 +56,17 @@ export function createTTS(config) {
|
|
|
55
56
|
break;
|
|
56
57
|
case 'deepgram':
|
|
57
58
|
tts = new deepgram.TTS({
|
|
58
|
-
model: (config.model || 'aura-asteria-en'),
|
|
59
|
+
model: (config.model || 'aura-2-asteria-en'),
|
|
60
|
+
});
|
|
61
|
+
break;
|
|
62
|
+
case 'groq-orpheus':
|
|
63
|
+
// Groq Orpheus TTS via OpenAI-compatible API ($22/M chars)
|
|
64
|
+
// Voices: autumn, diana, hannah, austin, daniel, troy
|
|
65
|
+
tts = new openai.TTS({
|
|
66
|
+
model: config.model || 'canopylabs/orpheus-v1-english',
|
|
67
|
+
voice: config.voice || 'autumn',
|
|
68
|
+
apiKey: process.env.GROQ_API_KEY,
|
|
69
|
+
baseURL: 'https://api.groq.com/openai/v1',
|
|
59
70
|
});
|
|
60
71
|
break;
|
|
61
72
|
default:
|
|
@@ -64,7 +75,7 @@ export function createTTS(config) {
|
|
|
64
75
|
// Increase max listeners to prevent memory leak warnings
|
|
65
76
|
// TTS instances can have many concurrent listeners during active conversations
|
|
66
77
|
if (tts && typeof tts.setMaxListeners === 'function') {
|
|
67
|
-
tts.setMaxListeners(
|
|
78
|
+
tts.setMaxListeners(100);
|
|
68
79
|
}
|
|
69
80
|
return tts;
|
|
70
81
|
}
|
|
@@ -77,23 +88,17 @@ export function createTTS(config) {
|
|
|
77
88
|
* - False triggers from ambient noise
|
|
78
89
|
*/
|
|
79
90
|
export async function createVAD() {
|
|
91
|
+
// VAD now only handles interruption detection — turn detection moved to Deepgram STT (server-side).
|
|
92
|
+
// Lighter settings = less local CPU from ONNX inference.
|
|
80
93
|
return silero.VAD.load({
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
// Allows natural pauses mid-sentence without triggering STT
|
|
86
|
-
// (increased from 0.8s to reduce sentence splitting)
|
|
87
|
-
minSilenceDuration: 1.2,
|
|
88
|
-
// Add 0.2s padding to start of speech chunks for cleaner audio
|
|
89
|
-
prefixPaddingDuration: 0.2,
|
|
90
|
-
// Higher threshold = less sensitive to quiet sounds/noise
|
|
91
|
-
// Default is 0.5, using 0.65 to reduce false positives
|
|
92
|
-
activationThreshold: 0.65,
|
|
94
|
+
minSpeechDuration: 0.4, // 400ms — quick interruption detection
|
|
95
|
+
minSilenceDuration: 1.2, // 1200ms — responsive
|
|
96
|
+
prefixPaddingDuration: 0.1,
|
|
97
|
+
activationThreshold: 0.85, // default — balanced for interruptions only
|
|
93
98
|
});
|
|
94
99
|
}
|
|
95
100
|
/**
|
|
96
|
-
* Default voice I/O configuration
|
|
101
|
+
* Default voice I/O configuration (used by realtime mode fallback)
|
|
97
102
|
* Uses Deepgram STT (fast, accurate) + Deepgram TTS (fast, good)
|
|
98
103
|
*/
|
|
99
104
|
export const DEFAULT_VOICE_IO_CONFIG = {
|
|
@@ -104,9 +109,24 @@ export const DEFAULT_VOICE_IO_CONFIG = {
|
|
|
104
109
|
},
|
|
105
110
|
tts: {
|
|
106
111
|
provider: 'deepgram',
|
|
107
|
-
voice: 'aura-asteria-en',
|
|
112
|
+
voice: 'aura-2-asteria-en',
|
|
108
113
|
},
|
|
109
114
|
};
|
|
115
|
+
/**
|
|
116
|
+
* Direct mode voice config — centralized here for easy provider swapping.
|
|
117
|
+
* To switch providers: comment out the active line, uncomment the alternative.
|
|
118
|
+
*/
|
|
119
|
+
export const DIRECT_MODE_STT = {
|
|
120
|
+
// provider: 'groq-whisper', model: 'whisper-large-v3-turbo',
|
|
121
|
+
// provider: 'openai-whisper', model: 'whisper-1',
|
|
122
|
+
provider: 'deepgram', model: 'nova-3', language: 'en',
|
|
123
|
+
};
|
|
124
|
+
export const DIRECT_MODE_TTS = {
|
|
125
|
+
// provider: 'deepgram', model: 'aura-2-asteria-en', // WebSocket-based: handles TTS abort cleanly (no unrecoverable crash on interruption)
|
|
126
|
+
// provider: 'gemini', model: 'gemini-2.5-flash-preview-tts', voice: 'apollo',
|
|
127
|
+
provider: 'openai', model: 'tts-1', voice: 'fable', // HTTP streaming: throws APIUserAbortError on interrupt → unrecoverable session crash
|
|
128
|
+
// provider: 'groq-orpheus', model: 'canopylabs/orpheus-v1-english', voice: 'autumn', // $22/M chars — voices: autumn, diana, hannah, austin, daniel, troy
|
|
129
|
+
};
|
|
110
130
|
/**
|
|
111
131
|
* Create Realtime Model for native speech-to-speech
|
|
112
132
|
* Supports OpenAI Realtime API and Gemini Live API
|
|
@@ -116,11 +136,10 @@ export const DEFAULT_VOICE_IO_CONFIG = {
|
|
|
116
136
|
export function createRealtimeModel(config) {
|
|
117
137
|
if (config.provider === 'gemini') {
|
|
118
138
|
console.log('📱 Using Gemini Live API (realtime)');
|
|
119
|
-
//
|
|
120
|
-
// with tool calls. No newer model available yet — auto-recovery in index.ts handles this.
|
|
139
|
+
// Using 'latest' alias — 12-2025 had a known 1008 crash bug during interruptions + tool calls
|
|
121
140
|
return new google.beta.realtime.RealtimeModel({
|
|
122
|
-
model: config.geminiModel || 'gemini-2.5-flash-native-audio-
|
|
123
|
-
voice: config.geminiVoice || '
|
|
141
|
+
model: config.geminiModel || 'gemini-2.5-flash-native-audio-latest',
|
|
142
|
+
voice: config.geminiVoice || 'Charon',
|
|
124
143
|
// Gemini supports instructions at model level
|
|
125
144
|
instructions: config.instructions,
|
|
126
145
|
// Enable transcription so we get text of what the agent says
|
package/package.json
CHANGED
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "osborn",
|
|
3
|
-
"version": "0.5.
|
|
3
|
+
"version": "0.5.5",
|
|
4
4
|
"description": "Voice AI coding assistant - local agent that connects to Osborn frontend",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|
|
7
|
-
"osborn": "
|
|
7
|
+
"osborn": "bin/cli.js"
|
|
8
8
|
},
|
|
9
9
|
"scripts": {
|
|
10
10
|
"dev": "tsx src/index.ts",
|
|
@@ -22,26 +22,29 @@
|
|
|
22
22
|
],
|
|
23
23
|
"repository": {
|
|
24
24
|
"type": "git",
|
|
25
|
-
"url": "https://github.com/Erriccc/osborn.git"
|
|
25
|
+
"url": "git+https://github.com/Erriccc/osborn.git"
|
|
26
26
|
},
|
|
27
27
|
"author": "Osborn Ojure",
|
|
28
28
|
"license": "MIT",
|
|
29
29
|
"dependencies": {
|
|
30
|
-
"@anthropic-ai/claude-agent-sdk": "^0.1.
|
|
30
|
+
"@anthropic-ai/claude-agent-sdk": "^0.1.76",
|
|
31
31
|
"@anthropic-ai/sdk": "^0.52.0",
|
|
32
32
|
"@google/genai": "^1.0.0",
|
|
33
|
-
"@livekit/agents": "^1.
|
|
34
|
-
"@livekit/agents-plugin-deepgram": "^1.
|
|
35
|
-
"@livekit/agents-plugin-elevenlabs": "^1.
|
|
36
|
-
"@livekit/agents-plugin-google": "^1.
|
|
37
|
-
"@livekit/agents-plugin-
|
|
38
|
-
"@livekit/agents-plugin-
|
|
33
|
+
"@livekit/agents": "^1.2.1",
|
|
34
|
+
"@livekit/agents-plugin-deepgram": "^1.2.1",
|
|
35
|
+
"@livekit/agents-plugin-elevenlabs": "^1.2.1",
|
|
36
|
+
"@livekit/agents-plugin-google": "^1.2.1",
|
|
37
|
+
"@livekit/agents-plugin-livekit": "^1.2.1",
|
|
38
|
+
"@livekit/agents-plugin-openai": "^1.2.1",
|
|
39
|
+
"@livekit/agents-plugin-silero": "^1.2.1",
|
|
39
40
|
"@livekit/rtc-node": "^0.13.24",
|
|
40
41
|
"@modelcontextprotocol/sdk": "^1.26.0",
|
|
41
42
|
"@openai/codex-sdk": "^0.77.0",
|
|
42
43
|
"@smithery/api": "^0.48.0",
|
|
44
|
+
"@vscode/ripgrep": "^1.17.1",
|
|
43
45
|
"dotenv": "^16.4.0",
|
|
44
46
|
"livekit-server-sdk": "^2.15.0",
|
|
47
|
+
"minisearch": "^7.2.0",
|
|
45
48
|
"node-pty": "^1.1.0",
|
|
46
49
|
"tsx": "^4.0.0",
|
|
47
50
|
"ws": "^8.19.0",
|