@andypai/orb 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +349 -0
  3. package/assets/orb-logo.svg +75 -0
  4. package/assets/orb-terminal-session.svg +72 -0
  5. package/assets/orb-wordmark.svg +77 -0
  6. package/package.json +76 -0
  7. package/prompts/anthropic.md +2 -0
  8. package/prompts/base.md +1 -0
  9. package/prompts/openai.md +7 -0
  10. package/prompts/voice.md +12 -0
  11. package/src/cli.ts +9 -0
  12. package/src/config.ts +270 -0
  13. package/src/index.ts +82 -0
  14. package/src/pipeline/adapters/anthropic.ts +111 -0
  15. package/src/pipeline/adapters/openai.ts +202 -0
  16. package/src/pipeline/adapters/types.ts +16 -0
  17. package/src/pipeline/adapters/utils.ts +131 -0
  18. package/src/pipeline/frames.ts +113 -0
  19. package/src/pipeline/observer.ts +36 -0
  20. package/src/pipeline/observers/metrics.ts +95 -0
  21. package/src/pipeline/pipeline.ts +43 -0
  22. package/src/pipeline/processor.ts +57 -0
  23. package/src/pipeline/processors/agent.ts +38 -0
  24. package/src/pipeline/processors/tts.ts +120 -0
  25. package/src/pipeline/task.ts +239 -0
  26. package/src/pipeline/transports/terminal-text.ts +24 -0
  27. package/src/pipeline/transports/types.ts +33 -0
  28. package/src/services/auth-utils.ts +149 -0
  29. package/src/services/global-config.ts +363 -0
  30. package/src/services/openai-auth.ts +18 -0
  31. package/src/services/prompts.ts +76 -0
  32. package/src/services/provider-defaults.ts +97 -0
  33. package/src/services/session.ts +204 -0
  34. package/src/services/streaming-tts.ts +483 -0
  35. package/src/services/tts.ts +309 -0
  36. package/src/setup.ts +234 -0
  37. package/src/types/index.ts +108 -0
  38. package/src/ui/App.tsx +142 -0
  39. package/src/ui/components/ActivityTimeline.tsx +60 -0
  40. package/src/ui/components/AsciiOrb.tsx +92 -0
  41. package/src/ui/components/ConversationRail.tsx +44 -0
  42. package/src/ui/components/Footer.tsx +61 -0
  43. package/src/ui/components/InputPrompt.tsx +88 -0
  44. package/src/ui/components/MicroOrb.tsx +25 -0
  45. package/src/ui/components/TTSErrorBanner.tsx +36 -0
  46. package/src/ui/components/TurnRow.tsx +71 -0
  47. package/src/ui/components/WelcomeSplash.tsx +78 -0
  48. package/src/ui/hooks/useAnimationFrame.ts +33 -0
  49. package/src/ui/hooks/useConversation.ts +195 -0
  50. package/src/ui/hooks/useKeyboardShortcuts.ts +57 -0
  51. package/src/ui/hooks/usePipeline.ts +83 -0
  52. package/src/ui/hooks/useTerminalSize.ts +37 -0
  53. package/src/ui/utils/markdown.ts +89 -0
  54. package/src/ui/utils/model-label.ts +20 -0
  55. package/src/ui/utils/text.ts +18 -0
  56. package/src/ui/utils/tool-format.ts +40 -0
@@ -0,0 +1,38 @@
1
+ import type { Frame } from '../frames'
2
+ import { createFrame } from '../frames'
3
+ import type { Processor } from '../processor'
4
+ import type { AgentAdapterConfig } from '../adapters/types'
5
+ import { createAnthropicAdapter } from '../adapters/anthropic'
6
+ import { createOpenAiAdapter } from '../adapters/openai'
7
+ import { isAbortError } from '../adapters/utils'
8
+
9
+ /**
10
+ * AgentProcessor: receives UserTextFrame, dispatches to the appropriate adapter,
11
+ * and yields agent frames (text deltas, tool calls, completion).
12
+ * All other frames pass through unchanged.
13
+ */
14
+ export function createAgentProcessor(adapterConfig: AgentAdapterConfig): Processor {
15
+ return async function* agentProcessor(upstream: AsyncIterable<Frame>): AsyncGenerator<Frame> {
16
+ for await (const frame of upstream) {
17
+ if (frame.kind !== 'user-text') {
18
+ yield frame
19
+ continue
20
+ }
21
+
22
+ const adapter =
23
+ adapterConfig.appConfig.llmProvider === 'openai'
24
+ ? createOpenAiAdapter(adapterConfig)
25
+ : createAnthropicAdapter(adapterConfig)
26
+
27
+ try {
28
+ yield* adapter.stream(frame.text)
29
+ } catch (err) {
30
+ if (!isAbortError(err)) {
31
+ yield createFrame('agent-error', {
32
+ error: err instanceof Error ? err : new Error(String(err)),
33
+ })
34
+ }
35
+ }
36
+ }
37
+ }
38
+ }
@@ -0,0 +1,120 @@
1
+ import type { Frame } from '../frames'
2
+ import { createFrame } from '../frames'
3
+ import type { Processor } from '../processor'
4
+ import type { AppConfig } from '../../types'
5
+ import { TTSError } from '../../types'
6
+ import {
7
+ createStreamingSpeechController,
8
+ type StreamingSpeechController,
9
+ } from '../../services/streaming-tts'
10
+ import { speak, stopSpeaking } from '../../services/tts'
11
+
12
+ export interface TTSCompletionHandle {
13
+ waitForCompletion(): Promise<void>
14
+ stop(): void
15
+ }
16
+
17
+ export interface TTSRunControl {
18
+ setCompletion(handle: TTSCompletionHandle | null): void
19
+ }
20
+
21
+ /**
22
+ * TTSProcessor: intercepts agent text frames to drive TTS.
23
+ *
24
+ * Streaming mode: wraps StreamingSpeechController, feeds text deltas,
25
+ * emits speaking start/end/error frames, and hands a completion handle
26
+ * to the PipelineTask to await.
27
+ *
28
+ * Batch mode: hands a completion handle to the PipelineTask on completion
29
+ * that speaks the full text.
30
+ *
31
+ * All frames pass through to downstream (transport sees them for UI updates).
32
+ */
33
+ export function createTTSProcessor(appConfig: AppConfig, runControl?: TTSRunControl): Processor {
34
+ return async function* ttsProcessor(upstream: AsyncIterable<Frame>): AsyncGenerator<Frame> {
35
+ if (!appConfig.ttsEnabled) {
36
+ runControl?.setCompletion(null)
37
+ yield* upstream
38
+ return
39
+ }
40
+
41
+ const useStreaming = appConfig.ttsStreamingEnabled
42
+ let controller: StreamingSpeechController | null = null
43
+ let controllerHandedOff = false
44
+ const pendingTTSFrames: Frame[] = []
45
+
46
+ if (useStreaming) {
47
+ controller = createStreamingSpeechController(appConfig, {
48
+ onSpeakingStart: () => {
49
+ pendingTTSFrames.push(createFrame('tts-speaking-start'))
50
+ },
51
+ onSpeakingEnd: () => {
52
+ pendingTTSFrames.push(createFrame('tts-speaking-end'))
53
+ },
54
+ onError: (err: TTSError) => {
55
+ pendingTTSFrames.push(
56
+ createFrame('tts-error', {
57
+ errorType: err.type,
58
+ message: err.message,
59
+ }),
60
+ )
61
+ },
62
+ })
63
+ }
64
+
65
+ function* drainPending(): Iterable<Frame> {
66
+ while (pendingTTSFrames.length > 0) {
67
+ yield pendingTTSFrames.shift()!
68
+ }
69
+ }
70
+
71
+ let completedText = ''
72
+
73
+ try {
74
+ for await (const frame of upstream) {
75
+ // Feed text deltas to streaming TTS controller
76
+ if (frame.kind === 'agent-text-delta' && controller) {
77
+ controller.feedText(frame.delta)
78
+ }
79
+
80
+ // On agent completion, finalize TTS
81
+ if (frame.kind === 'agent-text-complete') {
82
+ completedText = frame.text
83
+
84
+ if (controller) {
85
+ // Streaming mode: finalize and yield pending frame
86
+ controller.finalize()
87
+ yield frame
88
+ yield* drainPending()
89
+
90
+ if (controller.isActive()) {
91
+ const ctrl = controller
92
+ controllerHandedOff = true
93
+ runControl?.setCompletion({
94
+ waitForCompletion: () => ctrl.waitForCompletion(),
95
+ stop: () => ctrl.stop(),
96
+ })
97
+ }
98
+ continue
99
+ }
100
+
101
+ // Batch mode: hand the synthesized playback work to the task layer.
102
+ yield frame
103
+ runControl?.setCompletion({
104
+ waitForCompletion: () => speak(completedText, appConfig),
105
+ stop: () => stopSpeaking(),
106
+ })
107
+ continue
108
+ }
109
+
110
+ // Pass through all frames + drain any TTS side-effect frames
111
+ yield frame
112
+ yield* drainPending()
113
+ }
114
+ } finally {
115
+ if (!controllerHandedOff) {
116
+ controller?.stop()
117
+ }
118
+ }
119
+ }
120
+ }
@@ -0,0 +1,239 @@
1
+ import { TTSError, type AppState, type AgentSession, type AppConfig } from '../types'
2
+ import type { Frame } from './frames'
3
+ import { createFrame } from './frames'
4
+ import { singleFrame } from './processor'
5
+ import { createPipeline } from './pipeline'
6
+ import type { PipelineObserver } from './observer'
7
+ import { createAgentProcessor } from './processors/agent'
8
+ import { createTTSProcessor, type TTSCompletionHandle } from './processors/tts'
9
+ import type { Transport, OutboundFrame } from './transports/types'
10
+ import { isAbortError } from './adapters/utils'
11
+
12
+ export type TaskState = AppState
13
+
14
+ export interface RunResult {
15
+ entryId: string
16
+ text: string
17
+ session?: AgentSession
18
+ error?: Error
19
+ cancelled: boolean
20
+ }
21
+
22
+ export interface PipelineTaskConfig {
23
+ appConfig: AppConfig
24
+ session?: AgentSession
25
+ transport: Transport
26
+ observers?: PipelineObserver[]
27
+ }
28
+
29
+ type StateListener = (state: TaskState) => void
30
+
31
+ export interface PipelineTask {
32
+ readonly state: TaskState
33
+ onStateChange(listener: StateListener): () => void
34
+ run(query: string, entryId: string): Promise<RunResult>
35
+ cancel(): void
36
+ updateConfig(config: AppConfig): void
37
+ }
38
+
39
+ /** Outbound frame kinds that get routed to the transport */
40
+ const OUTBOUND_KINDS = new Set<Frame['kind']>([
41
+ 'agent-text-delta',
42
+ 'agent-text-complete',
43
+ 'tool-call-start',
44
+ 'tool-call-result',
45
+ 'agent-error',
46
+ 'tts-speaking-start',
47
+ 'tts-speaking-end',
48
+ 'tts-error',
49
+ ])
50
+
51
+ export function createPipelineTask(taskConfig: PipelineTaskConfig): PipelineTask {
52
+ let state: TaskState = 'idle'
53
+ let config = taskConfig.appConfig
54
+ let session: AgentSession | undefined = taskConfig.session
55
+ const transport = taskConfig.transport
56
+ const observers = taskConfig.observers ?? []
57
+ const stateListeners = new Set<StateListener>()
58
+
59
+ let runCounter = 0
60
+ let currentAbort: AbortController | null = null
61
+ let currentTtsCompletion: TTSCompletionHandle | null = null
62
+
63
+ function setState(next: TaskState): void {
64
+ if (next === state) return
65
+ state = next
66
+ for (const listener of stateListeners) {
67
+ listener(next)
68
+ }
69
+ }
70
+
71
+ function isOutboundFrame(frame: Frame): frame is OutboundFrame {
72
+ return OUTBOUND_KINDS.has(frame.kind)
73
+ }
74
+
75
+ const task: PipelineTask = {
76
+ get state() {
77
+ return state
78
+ },
79
+
80
+ onStateChange(listener: StateListener): () => void {
81
+ stateListeners.add(listener)
82
+ return () => stateListeners.delete(listener)
83
+ },
84
+
85
+ async run(query: string, entryId: string): Promise<RunResult> {
86
+ // Cancel any in-progress run
87
+ if (currentAbort) {
88
+ currentAbort.abort()
89
+ currentAbort = null
90
+ }
91
+ if (currentTtsCompletion) {
92
+ currentTtsCompletion.stop()
93
+ currentTtsCompletion = null
94
+ }
95
+
96
+ const runId = ++runCounter
97
+ const abortController = new AbortController()
98
+ currentAbort = abortController
99
+
100
+ setState('processing')
101
+
102
+ let finalText = ''
103
+ let finalSession: AgentSession | undefined
104
+ let ttsCompletion: TTSCompletionHandle | null = null
105
+ let error: Error | undefined
106
+
107
+ // Notify observers
108
+ for (const observer of observers) {
109
+ observer.onRunStart?.(runId)
110
+ }
111
+
112
+ // Build pipeline: agent → tts
113
+ const pipeline = createPipeline({
114
+ processors: [
115
+ createAgentProcessor({
116
+ appConfig: config,
117
+ session,
118
+ abortController,
119
+ }),
120
+ createTTSProcessor(config, {
121
+ setCompletion(handle) {
122
+ ttsCompletion = handle
123
+ currentTtsCompletion = handle
124
+ },
125
+ }),
126
+ ],
127
+ observers,
128
+ })
129
+
130
+ // Create frame source
131
+ const source = singleFrame(createFrame('user-text', { text: query, entryId }))
132
+
133
+ try {
134
+ for await (const frame of pipeline(source)) {
135
+ // Stale run check
136
+ if (runId !== runCounter) break
137
+
138
+ switch (frame.kind) {
139
+ case 'agent-text-complete':
140
+ finalText = frame.text
141
+ if (frame.session) finalSession = frame.session
142
+ break
143
+
144
+ case 'agent-session':
145
+ finalSession = frame.session
146
+ break
147
+
148
+ case 'agent-error':
149
+ error = frame.error
150
+ break
151
+
152
+ case 'tts-speaking-start':
153
+ setState(state === 'processing' ? 'processing_speaking' : 'speaking')
154
+ break
155
+
156
+ case 'tts-speaking-end':
157
+ if (state === 'processing_speaking') setState('processing')
158
+ else if (state === 'speaking') setState('idle')
159
+ break
160
+ }
161
+
162
+ // Route displayable frames to transport
163
+ if (isOutboundFrame(frame)) {
164
+ transport.sendOutbound(frame)
165
+ }
166
+ }
167
+ } catch (err) {
168
+ if (!isAbortError(err)) {
169
+ error = err instanceof Error ? err : new Error(String(err))
170
+ }
171
+ }
172
+
173
+ // Handle TTS pending work (speaking state after agent completes)
174
+ if (ttsCompletion && runId === runCounter && !error) {
175
+ const completion = ttsCompletion as TTSCompletionHandle
176
+ setState('speaking')
177
+ try {
178
+ await completion.waitForCompletion()
179
+ } catch (err) {
180
+ if (runId === runCounter && err instanceof TTSError) {
181
+ transport.sendOutbound(
182
+ createFrame('tts-error', {
183
+ errorType: err.type,
184
+ message: err.message,
185
+ }) as OutboundFrame,
186
+ )
187
+ }
188
+ } finally {
189
+ if (currentTtsCompletion === completion) {
190
+ currentTtsCompletion = null
191
+ }
192
+ }
193
+ }
194
+
195
+ // Notify observers of run end
196
+ for (const observer of observers) {
197
+ observer.onRunEnd?.({
198
+ runId,
199
+ startTime: 0, // observers track their own startTime via onRunStart
200
+ endTime: Date.now(),
201
+ totalTextChars: 0,
202
+ toolCallCount: 0,
203
+ toolErrorCount: 0,
204
+ ttsErrorCount: 0,
205
+ frameCounts: {},
206
+ })
207
+ }
208
+
209
+ // Final state transition
210
+ const cancelled = runId !== runCounter
211
+ if (!cancelled) {
212
+ setState('idle')
213
+ currentAbort = null
214
+
215
+ // Update session for future runs
216
+ if (finalSession) {
217
+ session = finalSession
218
+ }
219
+ }
220
+
221
+ return { entryId, text: finalText, session: finalSession, error, cancelled }
222
+ },
223
+
224
+ cancel(): void {
225
+ runCounter++ // invalidate current run
226
+ currentAbort?.abort()
227
+ currentAbort = null
228
+ currentTtsCompletion?.stop()
229
+ currentTtsCompletion = null
230
+ setState('idle')
231
+ },
232
+
233
+ updateConfig(newConfig: AppConfig): void {
234
+ config = newConfig
235
+ },
236
+ }
237
+
238
+ return task
239
+ }
@@ -0,0 +1,24 @@
1
+ import type { Transport, OutboundFrame } from './types'
2
+
3
+ type OutboundListener = (frame: OutboundFrame) => void
4
+
5
+ /**
6
+ * In-memory transport for same-process terminal UI.
7
+ * Synchronous dispatch — both pipeline and React UI live in the same Bun process.
8
+ */
9
+ export function createTerminalTextTransport(): Transport {
10
+ const outboundListeners = new Set<OutboundListener>()
11
+
12
+ return {
13
+ onOutbound(listener: OutboundListener): () => void {
14
+ outboundListeners.add(listener)
15
+ return () => outboundListeners.delete(listener)
16
+ },
17
+
18
+ sendOutbound(frame: OutboundFrame): void {
19
+ for (const listener of outboundListeners) {
20
+ listener(frame)
21
+ }
22
+ },
23
+ }
24
+ }
@@ -0,0 +1,33 @@
1
+ import type {
2
+ AgentTextDeltaFrame,
3
+ AgentTextCompleteFrame,
4
+ ToolCallStartFrame,
5
+ ToolCallResultFrame,
6
+ AgentErrorFrame,
7
+ TTSSpeakingStartFrame,
8
+ TTSSpeakingEndFrame,
9
+ TTSErrorFrame,
10
+ } from '../frames'
11
+
12
+ /** Frames the pipeline task sends to the UI */
13
+ export type OutboundFrame =
14
+ | AgentTextDeltaFrame
15
+ | AgentTextCompleteFrame
16
+ | ToolCallStartFrame
17
+ | ToolCallResultFrame
18
+ | AgentErrorFrame
19
+ | TTSSpeakingStartFrame
20
+ | TTSSpeakingEndFrame
21
+ | TTSErrorFrame
22
+
23
+ /**
24
+ * Transport: boundary between pipeline system and UI.
25
+ * Outbound: task → UI (agent/TTS frames for rendering)
26
+ */
27
+ export interface Transport {
28
+ /** Subscribe to outbound frames (UI listens here) */
29
+ onOutbound(listener: (frame: OutboundFrame) => void): () => void
30
+
31
+ /** Send an outbound frame from the task side */
32
+ sendOutbound(frame: OutboundFrame): void
33
+ }
@@ -0,0 +1,149 @@
1
+ import { Buffer } from 'node:buffer'
2
+
3
+ export interface CodexTokens {
4
+ accessToken: string
5
+ refreshToken: string
6
+ expiresAt: number // Unix timestamp in ms
7
+ accountId?: string
8
+ }
9
+
10
+ const TOKEN_KEYS = [
11
+ 'access_token',
12
+ 'accessToken',
13
+ 'token',
14
+ 'id_token',
15
+ 'idToken',
16
+ 'session_token',
17
+ 'sessionToken',
18
+ ]
19
+
20
+ export function findToken(value: unknown): string | null {
21
+ if (!value) return null
22
+
23
+ if (typeof value === 'string') {
24
+ const trimmed = value.trim()
25
+ return trimmed || null
26
+ }
27
+
28
+ if (Array.isArray(value)) {
29
+ for (const entry of value) {
30
+ const found = findToken(entry)
31
+ if (found) return found
32
+ }
33
+ return null
34
+ }
35
+
36
+ if (typeof value !== 'object') return null
37
+
38
+ const record = value as Record<string, unknown>
39
+
40
+ // Check well-known token keys first
41
+ for (const key of TOKEN_KEYS) {
42
+ const candidate = record[key]
43
+ if (typeof candidate === 'string' && candidate.trim()) {
44
+ return candidate.trim()
45
+ }
46
+ }
47
+
48
+ // Search recursively through all keys
49
+ for (const [key, candidate] of Object.entries(record)) {
50
+ if (!candidate) continue
51
+
52
+ if (key.toLowerCase().includes('token') && typeof candidate === 'string') {
53
+ const trimmed = candidate.trim()
54
+ if (trimmed) return trimmed
55
+ }
56
+
57
+ const found = findToken(candidate)
58
+ if (found) return found
59
+ }
60
+
61
+ return null
62
+ }
63
+
64
+ export async function readJsonFile(path: string): Promise<unknown | null> {
65
+ try {
66
+ const file = Bun.file(path)
67
+ if (!(await file.exists())) return null
68
+ return (await file.json()) as unknown
69
+ } catch {
70
+ return null
71
+ }
72
+ }
73
+
74
+ export function extractAccountId(tokens: {
75
+ id_token?: string
76
+ access_token?: string
77
+ }): string | undefined {
78
+ const token = tokens.id_token || tokens.access_token
79
+ if (!token) return undefined
80
+
81
+ const parts = token.split('.')
82
+ if (parts.length !== 3) return undefined
83
+
84
+ const payloadPart = parts[1]
85
+ if (!payloadPart) return undefined
86
+
87
+ try {
88
+ const payload = Buffer.from(payloadPart, 'base64url').toString('utf8')
89
+ const claims = JSON.parse(payload) as Record<string, unknown>
90
+
91
+ // Check known locations for account ID
92
+ if (typeof claims.chatgpt_account_id === 'string') {
93
+ return claims.chatgpt_account_id
94
+ }
95
+
96
+ const auth = claims['https://api.openai.com/auth'] as Record<string, unknown> | undefined
97
+ if (auth && typeof auth.chatgpt_account_id === 'string') {
98
+ return auth.chatgpt_account_id
99
+ }
100
+
101
+ const orgs = claims.organizations as Array<{ id?: string }> | undefined
102
+ if (Array.isArray(orgs) && orgs[0]?.id) {
103
+ return orgs[0].id
104
+ }
105
+ } catch {
106
+ return undefined
107
+ }
108
+
109
+ return undefined
110
+ }
111
+
112
+ export function parseCodexAuthFile(data: unknown): CodexTokens | null {
113
+ if (!data || typeof data !== 'object') return null
114
+
115
+ const record = data as Record<string, unknown>
116
+ const tokens = record.tokens as Record<string, string> | undefined
117
+
118
+ if (!tokens || typeof tokens !== 'object') return null
119
+
120
+ const accessToken = tokens.access_token
121
+ const refreshToken = tokens.refresh_token
122
+
123
+ if (typeof accessToken !== 'string' || !accessToken.trim()) return null
124
+ if (typeof refreshToken !== 'string' || !refreshToken.trim()) return null
125
+
126
+ // Parse last_refresh to estimate expiry (tokens typically last 1 hour)
127
+ let expiresAt: number
128
+ const lastRefresh = record.last_refresh
129
+ if (typeof lastRefresh === 'string') {
130
+ const refreshTime = new Date(lastRefresh).getTime()
131
+ // Assume 1 hour expiry from last refresh
132
+ expiresAt = refreshTime + 3600 * 1000
133
+ } else {
134
+ // If no last_refresh, assume token expires in 5 minutes (forces refresh check)
135
+ expiresAt = Date.now() + 5 * 60 * 1000
136
+ }
137
+
138
+ return {
139
+ accessToken: accessToken.trim(),
140
+ refreshToken: refreshToken.trim(),
141
+ expiresAt,
142
+ accountId: extractAccountId(tokens),
143
+ }
144
+ }
145
+
146
+ export async function writeJsonFile(filePath: string, data: unknown): Promise<void> {
147
+ const json = JSON.stringify(data, null, 2)
148
+ await Bun.write(filePath, json)
149
+ }