@andypai/orb 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +349 -0
- package/assets/orb-logo.svg +75 -0
- package/assets/orb-terminal-session.svg +72 -0
- package/assets/orb-wordmark.svg +77 -0
- package/package.json +76 -0
- package/prompts/anthropic.md +2 -0
- package/prompts/base.md +1 -0
- package/prompts/openai.md +7 -0
- package/prompts/voice.md +12 -0
- package/src/cli.ts +9 -0
- package/src/config.ts +270 -0
- package/src/index.ts +82 -0
- package/src/pipeline/adapters/anthropic.ts +111 -0
- package/src/pipeline/adapters/openai.ts +202 -0
- package/src/pipeline/adapters/types.ts +16 -0
- package/src/pipeline/adapters/utils.ts +131 -0
- package/src/pipeline/frames.ts +113 -0
- package/src/pipeline/observer.ts +36 -0
- package/src/pipeline/observers/metrics.ts +95 -0
- package/src/pipeline/pipeline.ts +43 -0
- package/src/pipeline/processor.ts +57 -0
- package/src/pipeline/processors/agent.ts +38 -0
- package/src/pipeline/processors/tts.ts +120 -0
- package/src/pipeline/task.ts +239 -0
- package/src/pipeline/transports/terminal-text.ts +24 -0
- package/src/pipeline/transports/types.ts +33 -0
- package/src/services/auth-utils.ts +149 -0
- package/src/services/global-config.ts +363 -0
- package/src/services/openai-auth.ts +18 -0
- package/src/services/prompts.ts +76 -0
- package/src/services/provider-defaults.ts +97 -0
- package/src/services/session.ts +204 -0
- package/src/services/streaming-tts.ts +483 -0
- package/src/services/tts.ts +309 -0
- package/src/setup.ts +234 -0
- package/src/types/index.ts +108 -0
- package/src/ui/App.tsx +142 -0
- package/src/ui/components/ActivityTimeline.tsx +60 -0
- package/src/ui/components/AsciiOrb.tsx +92 -0
- package/src/ui/components/ConversationRail.tsx +44 -0
- package/src/ui/components/Footer.tsx +61 -0
- package/src/ui/components/InputPrompt.tsx +88 -0
- package/src/ui/components/MicroOrb.tsx +25 -0
- package/src/ui/components/TTSErrorBanner.tsx +36 -0
- package/src/ui/components/TurnRow.tsx +71 -0
- package/src/ui/components/WelcomeSplash.tsx +78 -0
- package/src/ui/hooks/useAnimationFrame.ts +33 -0
- package/src/ui/hooks/useConversation.ts +195 -0
- package/src/ui/hooks/useKeyboardShortcuts.ts +57 -0
- package/src/ui/hooks/usePipeline.ts +83 -0
- package/src/ui/hooks/useTerminalSize.ts +37 -0
- package/src/ui/utils/markdown.ts +89 -0
- package/src/ui/utils/model-label.ts +20 -0
- package/src/ui/utils/text.ts +18 -0
- package/src/ui/utils/tool-format.ts +40 -0
|
@@ -0,0 +1,204 @@
|
|
|
1
|
+
import { promises as fs } from 'node:fs'
|
|
2
|
+
import path from 'node:path'
|
|
3
|
+
import os from 'node:os'
|
|
4
|
+
import crypto from 'node:crypto'
|
|
5
|
+
|
|
6
|
+
import type {
|
|
7
|
+
AgentSession,
|
|
8
|
+
AnthropicModel,
|
|
9
|
+
LlmProvider,
|
|
10
|
+
OpenAiSession,
|
|
11
|
+
SavedSession,
|
|
12
|
+
} from '../types'
|
|
13
|
+
|
|
14
|
+
const SESSION_VERSION = 2
|
|
15
|
+
const SESSION_DIR = path.join('.orb', 'sessions')
|
|
16
|
+
const MAX_SESSION_AGE_DAYS = 30
|
|
17
|
+
|
|
18
|
+
function isFileNotFoundError(err: unknown): boolean {
|
|
19
|
+
return (err as { code?: string })?.code === 'ENOENT'
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
function getSessionDir(): string {
|
|
23
|
+
return path.join(os.homedir(), SESSION_DIR)
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
function sanitizeFilename(value: string): string {
|
|
27
|
+
const sanitized = value.trim().replace(/[^a-zA-Z0-9._-]+/g, '-')
|
|
28
|
+
return sanitized || 'project'
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
function hashProjectPath(projectPath: string): string {
|
|
32
|
+
return crypto.createHash('sha256').update(projectPath).digest('hex').slice(0, 12)
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
export function getSessionPath(projectPath: string): string {
|
|
36
|
+
const resolved = path.resolve(projectPath)
|
|
37
|
+
const base = sanitizeFilename(path.basename(resolved))
|
|
38
|
+
const hash = hashProjectPath(resolved)
|
|
39
|
+
return path.join(getSessionDir(), `${base}-${hash}.json`)
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
interface SavedSessionV1 {
|
|
43
|
+
version: 1
|
|
44
|
+
projectPath: string
|
|
45
|
+
sessionId: string
|
|
46
|
+
model: AnthropicModel
|
|
47
|
+
lastModified: string
|
|
48
|
+
history: SavedSession['history']
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
function isSavedSessionV1(value: unknown): value is SavedSessionV1 {
|
|
52
|
+
if (!value || typeof value !== 'object') return false
|
|
53
|
+
const session = value as SavedSessionV1
|
|
54
|
+
return (
|
|
55
|
+
session.version === 1 &&
|
|
56
|
+
typeof session.projectPath === 'string' &&
|
|
57
|
+
typeof session.sessionId === 'string' &&
|
|
58
|
+
typeof session.model === 'string' &&
|
|
59
|
+
typeof session.lastModified === 'string' &&
|
|
60
|
+
Array.isArray(session.history)
|
|
61
|
+
)
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
function isSavedSessionV2(value: unknown): value is SavedSession {
|
|
65
|
+
if (!value || typeof value !== 'object') return false
|
|
66
|
+
const session = value as SavedSession
|
|
67
|
+
return (
|
|
68
|
+
session.version === SESSION_VERSION &&
|
|
69
|
+
typeof session.projectPath === 'string' &&
|
|
70
|
+
typeof session.llmProvider === 'string' &&
|
|
71
|
+
typeof session.llmModel === 'string' &&
|
|
72
|
+
typeof session.lastModified === 'string' &&
|
|
73
|
+
Array.isArray(session.history)
|
|
74
|
+
)
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
function normalizeSessionProvider(provider: string): LlmProvider | undefined {
|
|
78
|
+
if (provider === 'anthropic' || provider === 'openai') return provider
|
|
79
|
+
return undefined
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
function isValidOpenAiSession(value: unknown): value is OpenAiSession {
|
|
83
|
+
if (!value || typeof value !== 'object') return false
|
|
84
|
+
const session = value as Partial<OpenAiSession>
|
|
85
|
+
return (
|
|
86
|
+
session.provider === 'openai' &&
|
|
87
|
+
typeof session.previousResponseId === 'string' &&
|
|
88
|
+
session.previousResponseId.trim().length > 0
|
|
89
|
+
)
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
function normalizeAgentSession(session?: AgentSession): AgentSession | undefined {
|
|
93
|
+
if (!session) return undefined
|
|
94
|
+
|
|
95
|
+
switch (session.provider) {
|
|
96
|
+
case 'anthropic':
|
|
97
|
+
return session.sessionId?.length > 0 ? session : undefined
|
|
98
|
+
case 'openai':
|
|
99
|
+
return isValidOpenAiSession(session) ? session : undefined
|
|
100
|
+
default:
|
|
101
|
+
return undefined
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
async function cleanupOldSessions(maxAgeDays = MAX_SESSION_AGE_DAYS): Promise<void> {
|
|
106
|
+
const sessionDir = getSessionDir()
|
|
107
|
+
const maxAgeMs = maxAgeDays * 24 * 60 * 60 * 1000
|
|
108
|
+
|
|
109
|
+
let filenames: string[]
|
|
110
|
+
try {
|
|
111
|
+
filenames = await fs.readdir(sessionDir)
|
|
112
|
+
} catch (err) {
|
|
113
|
+
if (isFileNotFoundError(err)) return
|
|
114
|
+
throw err
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
const now = Date.now()
|
|
118
|
+
|
|
119
|
+
await Promise.all(
|
|
120
|
+
filenames
|
|
121
|
+
.filter((name) => name.endsWith('.json'))
|
|
122
|
+
.map(async (name) => {
|
|
123
|
+
const filePath = path.join(sessionDir, name)
|
|
124
|
+
try {
|
|
125
|
+
const stats = await fs.stat(filePath)
|
|
126
|
+
if (stats.isFile() && now - stats.mtimeMs > maxAgeMs) {
|
|
127
|
+
await fs.unlink(filePath)
|
|
128
|
+
}
|
|
129
|
+
} catch {
|
|
130
|
+
// Ignore errors for individual files
|
|
131
|
+
}
|
|
132
|
+
}),
|
|
133
|
+
)
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
export async function loadSession(projectPath: string): Promise<SavedSession | null> {
|
|
137
|
+
const resolved = path.resolve(projectPath)
|
|
138
|
+
const sessionPath = getSessionPath(resolved)
|
|
139
|
+
const sessionFile = Bun.file(sessionPath)
|
|
140
|
+
|
|
141
|
+
void cleanupOldSessions().catch((err) => {
|
|
142
|
+
console.warn('Failed to clean up old sessions:', err)
|
|
143
|
+
})
|
|
144
|
+
|
|
145
|
+
try {
|
|
146
|
+
const parsed = (await sessionFile.json()) as unknown
|
|
147
|
+
if (isSavedSessionV2(parsed)) {
|
|
148
|
+
if (path.resolve(parsed.projectPath) !== resolved) {
|
|
149
|
+
return null
|
|
150
|
+
}
|
|
151
|
+
return {
|
|
152
|
+
...parsed,
|
|
153
|
+
llmProvider: normalizeSessionProvider(parsed.llmProvider) ?? 'anthropic',
|
|
154
|
+
agentSession: normalizeAgentSession(parsed.agentSession),
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
if (isSavedSessionV1(parsed)) {
|
|
159
|
+
if (path.resolve(parsed.projectPath) !== resolved) {
|
|
160
|
+
return null
|
|
161
|
+
}
|
|
162
|
+
const migrated: SavedSession = {
|
|
163
|
+
version: SESSION_VERSION,
|
|
164
|
+
projectPath: parsed.projectPath,
|
|
165
|
+
llmProvider: 'anthropic',
|
|
166
|
+
llmModel: parsed.model,
|
|
167
|
+
agentSession: parsed.sessionId
|
|
168
|
+
? { provider: 'anthropic', sessionId: parsed.sessionId }
|
|
169
|
+
: undefined,
|
|
170
|
+
lastModified: parsed.lastModified,
|
|
171
|
+
history: parsed.history,
|
|
172
|
+
}
|
|
173
|
+
return migrated
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
console.warn('Invalid session format, starting fresh.')
|
|
177
|
+
return null
|
|
178
|
+
} catch (err) {
|
|
179
|
+
if (isFileNotFoundError(err)) return null
|
|
180
|
+
console.warn('Failed to load session, starting fresh:', err)
|
|
181
|
+
return null
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
export async function saveSession(session: SavedSession): Promise<void> {
|
|
186
|
+
const resolved = path.resolve(session.projectPath)
|
|
187
|
+
const sessionPath = getSessionPath(resolved)
|
|
188
|
+
const sessionDir = path.dirname(sessionPath)
|
|
189
|
+
|
|
190
|
+
await fs.mkdir(sessionDir, { recursive: true })
|
|
191
|
+
|
|
192
|
+
const payload: SavedSession = {
|
|
193
|
+
...session,
|
|
194
|
+
version: SESSION_VERSION,
|
|
195
|
+
projectPath: resolved,
|
|
196
|
+
llmProvider: normalizeSessionProvider(session.llmProvider) ?? 'anthropic',
|
|
197
|
+
agentSession: normalizeAgentSession(session.agentSession),
|
|
198
|
+
lastModified: new Date().toISOString(),
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
const tempPath = `${sessionPath}.${process.pid}.${Date.now()}.tmp`
|
|
202
|
+
await Bun.write(tempPath, JSON.stringify(payload, null, 2))
|
|
203
|
+
await fs.rename(tempPath, sessionPath)
|
|
204
|
+
}
|
|
@@ -0,0 +1,483 @@
|
|
|
1
|
+
import { tmpdir } from 'node:os'
|
|
2
|
+
import { join } from 'node:path'
|
|
3
|
+
import { unlink } from 'node:fs/promises'
|
|
4
|
+
import { TTSError, type AppConfig } from '../types'
|
|
5
|
+
import {
|
|
6
|
+
cleanTextForSpeech,
|
|
7
|
+
generateAudio,
|
|
8
|
+
playAudio,
|
|
9
|
+
stopSpeaking,
|
|
10
|
+
wasPlaybackStopped,
|
|
11
|
+
resetPlaybackStoppedFlag,
|
|
12
|
+
} from './tts'
|
|
13
|
+
|
|
14
|
+
export interface StreamingSpeechCallbacks {
|
|
15
|
+
onSpeakingStart?: () => void
|
|
16
|
+
onSpeakingEnd?: () => void
|
|
17
|
+
onError?: (error: TTSError) => void
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
export interface StreamingSpeechController {
|
|
21
|
+
feedText(chunk: string): void
|
|
22
|
+
finalize(): void
|
|
23
|
+
stop(): void
|
|
24
|
+
waitForCompletion(): Promise<void>
|
|
25
|
+
isActive(): boolean
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
interface QueuedAudio {
|
|
29
|
+
path: string
|
|
30
|
+
sentence: string
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
const STRONG_BOUNDARY = /[.!?]+["')\]]*(?:\s|$)/g
|
|
34
|
+
const SOFT_BOUNDARY = /[,;:](?:\s|$)/g
|
|
35
|
+
|
|
36
|
+
function findLastMatchIndex(text: string, re: RegExp): number {
|
|
37
|
+
const flags = re.flags.includes('g') ? re.flags : `${re.flags}g`
|
|
38
|
+
const pattern = new RegExp(re.source, flags)
|
|
39
|
+
let lastIndex = -1
|
|
40
|
+
|
|
41
|
+
while (pattern.exec(text) !== null) {
|
|
42
|
+
lastIndex = pattern.lastIndex
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
return lastIndex
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
function findLastWhitespaceIndex(text: string): number {
|
|
49
|
+
const lastSpace = Math.max(text.lastIndexOf(' '), text.lastIndexOf('\t'), text.lastIndexOf('\n'))
|
|
50
|
+
return lastSpace >= 0 ? lastSpace + 1 : -1
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
function extractStrongChunks(text: string): { chunks: string[]; consumed: number } {
|
|
54
|
+
const chunks: string[] = []
|
|
55
|
+
const pattern = new RegExp(STRONG_BOUNDARY.source, STRONG_BOUNDARY.flags)
|
|
56
|
+
let lastIndex = 0
|
|
57
|
+
|
|
58
|
+
while (pattern.exec(text) !== null) {
|
|
59
|
+
const end = pattern.lastIndex
|
|
60
|
+
const slice = text.slice(lastIndex, end)
|
|
61
|
+
const trimmed = slice.trimEnd()
|
|
62
|
+
if (trimmed.trim()) {
|
|
63
|
+
chunks.push(trimmed)
|
|
64
|
+
}
|
|
65
|
+
lastIndex = end
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
return { chunks, consumed: lastIndex }
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
function extractChunkAtBoundary(
|
|
72
|
+
text: string,
|
|
73
|
+
boundary: number,
|
|
74
|
+
minLength: number,
|
|
75
|
+
forceFlush: boolean,
|
|
76
|
+
): { chunk: string | null; consumed: number } {
|
|
77
|
+
if (boundary <= 0) return { chunk: null, consumed: 0 }
|
|
78
|
+
|
|
79
|
+
const trimmed = text.slice(0, boundary).trimEnd()
|
|
80
|
+
const hasContent = trimmed.trim().length > 0
|
|
81
|
+
const meetsMinLength = forceFlush || minLength <= 0 || trimmed.trim().length >= minLength
|
|
82
|
+
|
|
83
|
+
if (!hasContent || !meetsMinLength) {
|
|
84
|
+
return { chunk: null, consumed: 0 }
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
return { chunk: trimmed, consumed: boundary }
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
export function createStreamingSpeechController(
|
|
91
|
+
config: AppConfig,
|
|
92
|
+
callbacks: StreamingSpeechCallbacks = {},
|
|
93
|
+
): StreamingSpeechController {
|
|
94
|
+
let textBuffer = ''
|
|
95
|
+
let processedOffset = 0
|
|
96
|
+
let lastCleanedText = ''
|
|
97
|
+
let finalized = false
|
|
98
|
+
let stopped = false
|
|
99
|
+
let speakingStarted = false
|
|
100
|
+
let completed = false
|
|
101
|
+
let lastFlushAt = Date.now()
|
|
102
|
+
let maxWaitTimeout: ReturnType<typeof setTimeout> | null = null
|
|
103
|
+
let graceTimeout: ReturnType<typeof setTimeout> | null = null
|
|
104
|
+
let pendingGrace = false
|
|
105
|
+
|
|
106
|
+
const sentenceQueue: string[] = []
|
|
107
|
+
const audioQueue: QueuedAudio[] = []
|
|
108
|
+
|
|
109
|
+
let isGenerating = false
|
|
110
|
+
let isPlaying = false
|
|
111
|
+
let generationAbortController: AbortController | null = null
|
|
112
|
+
let completionResolve: (() => void) | null = null
|
|
113
|
+
let completionReject: ((error: TTSError) => void) | null = null
|
|
114
|
+
let completionPromise: Promise<void> | null = null
|
|
115
|
+
let fatalError: TTSError | null = null
|
|
116
|
+
|
|
117
|
+
function clearTimers(): void {
|
|
118
|
+
if (maxWaitTimeout) {
|
|
119
|
+
clearTimeout(maxWaitTimeout)
|
|
120
|
+
maxWaitTimeout = null
|
|
121
|
+
}
|
|
122
|
+
if (graceTimeout) {
|
|
123
|
+
clearTimeout(graceTimeout)
|
|
124
|
+
graceTimeout = null
|
|
125
|
+
}
|
|
126
|
+
pendingGrace = false
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
async function cleanupAudioPath(path: string): Promise<void> {
|
|
130
|
+
await unlink(path).catch(() => {})
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
function clearAudioQueue(): void {
|
|
134
|
+
for (const audio of audioQueue) {
|
|
135
|
+
void cleanupAudioPath(audio.path)
|
|
136
|
+
}
|
|
137
|
+
audioQueue.length = 0
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
function enqueueChunk(chunk: string, now: number): void {
|
|
141
|
+
if (!chunk.trim()) return
|
|
142
|
+
sentenceQueue.push(chunk)
|
|
143
|
+
lastFlushAt = now
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
function reconcileProcessedOffset(cleanedText: string): void {
|
|
147
|
+
if (!lastCleanedText) {
|
|
148
|
+
lastCleanedText = cleanedText
|
|
149
|
+
processedOffset = Math.min(processedOffset, cleanedText.length)
|
|
150
|
+
return
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
if (cleanedText !== lastCleanedText) {
|
|
154
|
+
const maxCheck = Math.min(processedOffset, cleanedText.length, lastCleanedText.length)
|
|
155
|
+
let index = 0
|
|
156
|
+
|
|
157
|
+
while (index < maxCheck && cleanedText[index] === lastCleanedText[index]) {
|
|
158
|
+
index += 1
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
if (processedOffset > index) {
|
|
162
|
+
processedOffset = index
|
|
163
|
+
}
|
|
164
|
+
lastCleanedText = cleanedText
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
if (processedOffset > cleanedText.length) {
|
|
168
|
+
processedOffset = cleanedText.length
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
function getPendingText(cleanedText: string): string {
|
|
173
|
+
reconcileProcessedOffset(cleanedText)
|
|
174
|
+
return cleanedText.slice(processedOffset)
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
function tryExtractAtBoundary(
|
|
178
|
+
cleanedText: string,
|
|
179
|
+
pending: string,
|
|
180
|
+
boundary: number,
|
|
181
|
+
minLength: number,
|
|
182
|
+
forceFlush: boolean,
|
|
183
|
+
now: number,
|
|
184
|
+
): string {
|
|
185
|
+
const result = extractChunkAtBoundary(pending, boundary, minLength, forceFlush)
|
|
186
|
+
if (result.consumed > 0) {
|
|
187
|
+
if (result.chunk) enqueueChunk(result.chunk, now)
|
|
188
|
+
processedOffset += result.consumed
|
|
189
|
+
return getPendingText(cleanedText)
|
|
190
|
+
}
|
|
191
|
+
return pending
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
function extractChunksFromCleaned(
|
|
195
|
+
cleanedText: string,
|
|
196
|
+
options: { forceFlush: boolean; finalized: boolean; now: number },
|
|
197
|
+
): string {
|
|
198
|
+
let pending = getPendingText(cleanedText)
|
|
199
|
+
if (!pending.trim()) return pending
|
|
200
|
+
|
|
201
|
+
const strong = extractStrongChunks(pending)
|
|
202
|
+
if (strong.chunks.length > 0) {
|
|
203
|
+
for (const chunk of strong.chunks) enqueueChunk(chunk, options.now)
|
|
204
|
+
processedOffset += strong.consumed
|
|
205
|
+
pending = getPendingText(cleanedText)
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
if (options.finalized) {
|
|
209
|
+
if (pending.trim()) enqueueChunk(pending.trimEnd(), options.now)
|
|
210
|
+
processedOffset = cleanedText.length
|
|
211
|
+
return ''
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
if (!pending.trim()) return pending
|
|
215
|
+
|
|
216
|
+
const { ttsMinChunkLength: minLength, ttsClauseBoundaries: allowClauses } = config
|
|
217
|
+
|
|
218
|
+
if (allowClauses) {
|
|
219
|
+
const softBoundary = findLastMatchIndex(pending, SOFT_BOUNDARY)
|
|
220
|
+
pending = tryExtractAtBoundary(
|
|
221
|
+
cleanedText,
|
|
222
|
+
pending,
|
|
223
|
+
softBoundary,
|
|
224
|
+
minLength,
|
|
225
|
+
options.forceFlush,
|
|
226
|
+
options.now,
|
|
227
|
+
)
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
if (!options.forceFlush || !pending.trim()) return pending
|
|
231
|
+
|
|
232
|
+
const maxChunkLength = 200
|
|
233
|
+
const wsBoundary = findLastWhitespaceIndex(pending)
|
|
234
|
+
if (wsBoundary > 0) {
|
|
235
|
+
pending = tryExtractAtBoundary(cleanedText, pending, wsBoundary, minLength, true, options.now)
|
|
236
|
+
} else if (pending.length > 0) {
|
|
237
|
+
// No whitespace boundary found - emit at max length or flush all if shorter
|
|
238
|
+
const emitLength = Math.min(pending.length, maxChunkLength)
|
|
239
|
+
enqueueChunk(pending.slice(0, emitLength), options.now)
|
|
240
|
+
processedOffset += emitLength
|
|
241
|
+
pending = pending.slice(emitLength)
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
return pending
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
function extractChunks(options: { forceFlush: boolean; finalized: boolean }): string {
|
|
248
|
+
const cleanedText = cleanTextForSpeech(textBuffer)
|
|
249
|
+
const now = Date.now()
|
|
250
|
+
return extractChunksFromCleaned(cleanedText, { ...options, now })
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
function shouldGrace(pending: string): boolean {
|
|
254
|
+
return /[\s.,!?;:]["')\]]?$/.test(pending)
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
function resetFlushTimers(pendingText: string): void {
|
|
258
|
+
clearTimers()
|
|
259
|
+
if (stopped || finalized) return
|
|
260
|
+
if (config.ttsMaxWaitMs <= 0) return
|
|
261
|
+
if (!pendingText.trim()) return
|
|
262
|
+
|
|
263
|
+
const elapsed = Date.now() - lastFlushAt
|
|
264
|
+
const delay = Math.max(config.ttsMaxWaitMs - elapsed, 0)
|
|
265
|
+
maxWaitTimeout = setTimeout(handleMaxWait, delay)
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
function handleMaxWait(): void {
|
|
269
|
+
if (stopped || finalized) return
|
|
270
|
+
|
|
271
|
+
const cleanedText = cleanTextForSpeech(textBuffer)
|
|
272
|
+
const pendingText = getPendingText(cleanedText)
|
|
273
|
+
if (!pendingText.trim()) {
|
|
274
|
+
return
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
if (config.ttsGraceWindowMs > 0 && shouldGrace(pendingText) && !pendingGrace) {
|
|
278
|
+
pendingGrace = true
|
|
279
|
+
graceTimeout = setTimeout(() => {
|
|
280
|
+
pendingGrace = false
|
|
281
|
+
const remaining = extractChunks({ forceFlush: true, finalized: false })
|
|
282
|
+
maybeStartGeneration()
|
|
283
|
+
resetFlushTimers(remaining)
|
|
284
|
+
}, config.ttsGraceWindowMs)
|
|
285
|
+
return
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
const lastFlush = lastFlushAt
|
|
289
|
+
const remaining = extractChunksFromCleaned(cleanedText, {
|
|
290
|
+
forceFlush: true,
|
|
291
|
+
finalized: false,
|
|
292
|
+
now: Date.now(),
|
|
293
|
+
})
|
|
294
|
+
if (remaining.trim() && lastFlushAt === lastFlush) {
|
|
295
|
+
lastFlushAt = Date.now()
|
|
296
|
+
}
|
|
297
|
+
maybeStartGeneration()
|
|
298
|
+
resetFlushTimers(remaining)
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
async function processGenerationQueue(): Promise<void> {
|
|
302
|
+
if (isGenerating || stopped) return
|
|
303
|
+
|
|
304
|
+
const sentence = sentenceQueue.shift()
|
|
305
|
+
if (!sentence) {
|
|
306
|
+
checkCompletion()
|
|
307
|
+
return
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
isGenerating = true
|
|
311
|
+
generationAbortController = new AbortController()
|
|
312
|
+
const audioPath = join(
|
|
313
|
+
tmpdir(),
|
|
314
|
+
`tts-stream-${Date.now()}-${Math.random().toString(36).slice(2, 8)}.${
|
|
315
|
+
config.ttsMode === 'generate' ? 'aiff' : 'wav'
|
|
316
|
+
}`,
|
|
317
|
+
)
|
|
318
|
+
|
|
319
|
+
try {
|
|
320
|
+
await generateAudio(sentence, config, audioPath, generationAbortController.signal)
|
|
321
|
+
if (!stopped) {
|
|
322
|
+
audioQueue.push({ path: audioPath, sentence })
|
|
323
|
+
processPlaybackQueue()
|
|
324
|
+
} else {
|
|
325
|
+
await cleanupAudioPath(audioPath)
|
|
326
|
+
}
|
|
327
|
+
} catch (err) {
|
|
328
|
+
const ttsError =
|
|
329
|
+
err instanceof TTSError ? err : new TTSError(String(err), 'generation_failed')
|
|
330
|
+
callbacks.onError?.(ttsError)
|
|
331
|
+
fail(ttsError)
|
|
332
|
+
} finally {
|
|
333
|
+
generationAbortController = null
|
|
334
|
+
isGenerating = false
|
|
335
|
+
if (!stopped) {
|
|
336
|
+
processGenerationQueue()
|
|
337
|
+
}
|
|
338
|
+
}
|
|
339
|
+
}
|
|
340
|
+
|
|
341
|
+
async function processPlaybackQueue(): Promise<void> {
|
|
342
|
+
if (isPlaying || stopped) return
|
|
343
|
+
|
|
344
|
+
const audio = audioQueue.shift()
|
|
345
|
+
if (!audio) {
|
|
346
|
+
checkCompletion()
|
|
347
|
+
return
|
|
348
|
+
}
|
|
349
|
+
|
|
350
|
+
isPlaying = true
|
|
351
|
+
resetPlaybackStoppedFlag()
|
|
352
|
+
|
|
353
|
+
// Notify that speaking has started
|
|
354
|
+
if (!speakingStarted) {
|
|
355
|
+
speakingStarted = true
|
|
356
|
+
callbacks.onSpeakingStart?.()
|
|
357
|
+
}
|
|
358
|
+
|
|
359
|
+
try {
|
|
360
|
+
await playAudio(audio.path, config.ttsSpeed)
|
|
361
|
+
} catch (err) {
|
|
362
|
+
// Check if playback was stopped manually (not an error)
|
|
363
|
+
if (!wasPlaybackStopped()) {
|
|
364
|
+
const ttsError = err instanceof TTSError ? err : new TTSError(String(err), 'audio_playback')
|
|
365
|
+
callbacks.onError?.(ttsError)
|
|
366
|
+
fail(ttsError)
|
|
367
|
+
}
|
|
368
|
+
} finally {
|
|
369
|
+
await cleanupAudioPath(audio.path)
|
|
370
|
+
|
|
371
|
+
isPlaying = false
|
|
372
|
+
if (!stopped) {
|
|
373
|
+
processPlaybackQueue()
|
|
374
|
+
}
|
|
375
|
+
}
|
|
376
|
+
}
|
|
377
|
+
|
|
378
|
+
function hasWorkRemaining(): boolean {
|
|
379
|
+
return isGenerating || isPlaying || sentenceQueue.length > 0 || audioQueue.length > 0
|
|
380
|
+
}
|
|
381
|
+
|
|
382
|
+
function markComplete(): void {
|
|
383
|
+
if (completed) return
|
|
384
|
+
completed = true
|
|
385
|
+
if (speakingStarted) callbacks.onSpeakingEnd?.()
|
|
386
|
+
completionResolve?.()
|
|
387
|
+
}
|
|
388
|
+
|
|
389
|
+
function fail(error: TTSError): void {
|
|
390
|
+
if (completed) return
|
|
391
|
+
fatalError = error
|
|
392
|
+
stopped = true
|
|
393
|
+
completed = true
|
|
394
|
+
sentenceQueue.length = 0
|
|
395
|
+
clearAudioQueue()
|
|
396
|
+
completionReject?.(error)
|
|
397
|
+
}
|
|
398
|
+
|
|
399
|
+
function checkCompletion(): void {
|
|
400
|
+
if (finalized && !stopped && !hasWorkRemaining() && !completed) {
|
|
401
|
+
markComplete()
|
|
402
|
+
}
|
|
403
|
+
}
|
|
404
|
+
|
|
405
|
+
function maybeStartGeneration(): void {
|
|
406
|
+
if (stopped || isGenerating) return
|
|
407
|
+
if (sentenceQueue.length >= config.ttsBufferSentences || finalized) {
|
|
408
|
+
processGenerationQueue()
|
|
409
|
+
}
|
|
410
|
+
}
|
|
411
|
+
|
|
412
|
+
return {
|
|
413
|
+
feedText(chunk: string): void {
|
|
414
|
+
if (stopped || !config.ttsEnabled) return
|
|
415
|
+
textBuffer += chunk
|
|
416
|
+
const remaining = extractChunks({ forceFlush: false, finalized: false })
|
|
417
|
+
maybeStartGeneration()
|
|
418
|
+
resetFlushTimers(remaining)
|
|
419
|
+
},
|
|
420
|
+
|
|
421
|
+
finalize(): void {
|
|
422
|
+
if (stopped || finalized) return
|
|
423
|
+
finalized = true
|
|
424
|
+
clearTimers()
|
|
425
|
+
|
|
426
|
+
if (!config.ttsEnabled) {
|
|
427
|
+
completionResolve?.()
|
|
428
|
+
return
|
|
429
|
+
}
|
|
430
|
+
|
|
431
|
+
// Extract any remaining chunks
|
|
432
|
+
extractChunks({ forceFlush: true, finalized: true })
|
|
433
|
+
processGenerationQueue()
|
|
434
|
+
checkCompletion()
|
|
435
|
+
},
|
|
436
|
+
|
|
437
|
+
stop(): void {
|
|
438
|
+
stopped = true
|
|
439
|
+
completed = true
|
|
440
|
+
clearTimers()
|
|
441
|
+
sentenceQueue.length = 0
|
|
442
|
+
|
|
443
|
+
clearAudioQueue()
|
|
444
|
+
|
|
445
|
+
generationAbortController?.abort()
|
|
446
|
+
generationAbortController = null
|
|
447
|
+
|
|
448
|
+
// Stop current playback
|
|
449
|
+
stopSpeaking()
|
|
450
|
+
|
|
451
|
+
// Resolve completion promise
|
|
452
|
+
completionResolve?.()
|
|
453
|
+
},
|
|
454
|
+
|
|
455
|
+
waitForCompletion(): Promise<void> {
|
|
456
|
+
if (completionPromise) return completionPromise
|
|
457
|
+
|
|
458
|
+
completionPromise = new Promise((resolve, reject) => {
|
|
459
|
+
completionResolve = resolve
|
|
460
|
+
completionReject = reject
|
|
461
|
+
|
|
462
|
+
if (fatalError) {
|
|
463
|
+
reject(fatalError)
|
|
464
|
+
return
|
|
465
|
+
}
|
|
466
|
+
|
|
467
|
+
const alreadyDone = stopped || completed || !config.ttsEnabled
|
|
468
|
+
const justFinished = finalized && !hasWorkRemaining()
|
|
469
|
+
|
|
470
|
+
if (alreadyDone || justFinished) {
|
|
471
|
+
markComplete()
|
|
472
|
+
resolve()
|
|
473
|
+
}
|
|
474
|
+
})
|
|
475
|
+
|
|
476
|
+
return completionPromise
|
|
477
|
+
},
|
|
478
|
+
|
|
479
|
+
isActive(): boolean {
|
|
480
|
+
return !stopped && (hasWorkRemaining() || speakingStarted)
|
|
481
|
+
},
|
|
482
|
+
}
|
|
483
|
+
}
|