@usejarvis/brain 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (266) hide show
  1. package/LICENSE +153 -0
  2. package/README.md +278 -0
  3. package/bin/jarvis.ts +413 -0
  4. package/package.json +74 -0
  5. package/scripts/ensure-bun.cjs +8 -0
  6. package/src/actions/README.md +421 -0
  7. package/src/actions/app-control/desktop-controller.test.ts +26 -0
  8. package/src/actions/app-control/desktop-controller.ts +438 -0
  9. package/src/actions/app-control/interface.ts +64 -0
  10. package/src/actions/app-control/linux.ts +273 -0
  11. package/src/actions/app-control/macos.ts +54 -0
  12. package/src/actions/app-control/sidecar-launcher.test.ts +23 -0
  13. package/src/actions/app-control/sidecar-launcher.ts +286 -0
  14. package/src/actions/app-control/windows.ts +44 -0
  15. package/src/actions/browser/cdp.ts +138 -0
  16. package/src/actions/browser/chrome-launcher.ts +252 -0
  17. package/src/actions/browser/session.ts +437 -0
  18. package/src/actions/browser/stealth.ts +49 -0
  19. package/src/actions/index.ts +20 -0
  20. package/src/actions/terminal/executor.ts +157 -0
  21. package/src/actions/terminal/wsl-bridge.ts +126 -0
  22. package/src/actions/test.ts +93 -0
  23. package/src/actions/tools/agents.ts +321 -0
  24. package/src/actions/tools/builtin.ts +846 -0
  25. package/src/actions/tools/commitments.ts +192 -0
  26. package/src/actions/tools/content.ts +217 -0
  27. package/src/actions/tools/delegate.ts +147 -0
  28. package/src/actions/tools/desktop.test.ts +55 -0
  29. package/src/actions/tools/desktop.ts +305 -0
  30. package/src/actions/tools/goals.ts +376 -0
  31. package/src/actions/tools/local-tools-guard.ts +20 -0
  32. package/src/actions/tools/registry.ts +171 -0
  33. package/src/actions/tools/research.ts +111 -0
  34. package/src/actions/tools/sidecar-list.ts +57 -0
  35. package/src/actions/tools/sidecar-route.ts +105 -0
  36. package/src/actions/tools/workflows.ts +216 -0
  37. package/src/agents/agent.ts +132 -0
  38. package/src/agents/delegation.ts +107 -0
  39. package/src/agents/hierarchy.ts +113 -0
  40. package/src/agents/index.ts +19 -0
  41. package/src/agents/messaging.ts +125 -0
  42. package/src/agents/orchestrator.ts +576 -0
  43. package/src/agents/role-discovery.ts +61 -0
  44. package/src/agents/sub-agent-runner.ts +307 -0
  45. package/src/agents/task-manager.ts +151 -0
  46. package/src/authority/approval-delivery.ts +59 -0
  47. package/src/authority/approval.ts +196 -0
  48. package/src/authority/audit.ts +158 -0
  49. package/src/authority/authority.test.ts +519 -0
  50. package/src/authority/deferred-executor.ts +103 -0
  51. package/src/authority/emergency.ts +66 -0
  52. package/src/authority/engine.ts +297 -0
  53. package/src/authority/index.ts +12 -0
  54. package/src/authority/learning.ts +111 -0
  55. package/src/authority/tool-action-map.ts +74 -0
  56. package/src/awareness/analytics.ts +466 -0
  57. package/src/awareness/awareness.test.ts +332 -0
  58. package/src/awareness/capture-engine.ts +305 -0
  59. package/src/awareness/context-graph.ts +130 -0
  60. package/src/awareness/context-tracker.ts +349 -0
  61. package/src/awareness/index.ts +25 -0
  62. package/src/awareness/intelligence.ts +321 -0
  63. package/src/awareness/ocr-engine.ts +88 -0
  64. package/src/awareness/service.ts +528 -0
  65. package/src/awareness/struggle-detector.ts +342 -0
  66. package/src/awareness/suggestion-engine.ts +476 -0
  67. package/src/awareness/types.ts +201 -0
  68. package/src/cli/autostart.ts +241 -0
  69. package/src/cli/deps.ts +449 -0
  70. package/src/cli/doctor.ts +230 -0
  71. package/src/cli/helpers.ts +401 -0
  72. package/src/cli/onboard.ts +580 -0
  73. package/src/comms/README.md +329 -0
  74. package/src/comms/auth-error.html +48 -0
  75. package/src/comms/channels/discord.ts +228 -0
  76. package/src/comms/channels/signal.ts +56 -0
  77. package/src/comms/channels/telegram.ts +316 -0
  78. package/src/comms/channels/whatsapp.ts +60 -0
  79. package/src/comms/channels.test.ts +173 -0
  80. package/src/comms/desktop-notify.ts +114 -0
  81. package/src/comms/example.ts +129 -0
  82. package/src/comms/index.ts +129 -0
  83. package/src/comms/streaming.ts +142 -0
  84. package/src/comms/voice.test.ts +152 -0
  85. package/src/comms/voice.ts +291 -0
  86. package/src/comms/websocket.test.ts +409 -0
  87. package/src/comms/websocket.ts +473 -0
  88. package/src/config/README.md +387 -0
  89. package/src/config/index.ts +6 -0
  90. package/src/config/loader.test.ts +137 -0
  91. package/src/config/loader.ts +142 -0
  92. package/src/config/types.ts +260 -0
  93. package/src/daemon/README.md +232 -0
  94. package/src/daemon/agent-service-interface.ts +9 -0
  95. package/src/daemon/agent-service.ts +600 -0
  96. package/src/daemon/api-routes.ts +2119 -0
  97. package/src/daemon/background-agent-service.ts +396 -0
  98. package/src/daemon/background-agent.test.ts +78 -0
  99. package/src/daemon/channel-service.ts +201 -0
  100. package/src/daemon/commitment-executor.ts +297 -0
  101. package/src/daemon/event-classifier.ts +239 -0
  102. package/src/daemon/event-coalescer.ts +123 -0
  103. package/src/daemon/event-reactor.ts +214 -0
  104. package/src/daemon/health.ts +220 -0
  105. package/src/daemon/index.ts +1004 -0
  106. package/src/daemon/llm-settings.ts +316 -0
  107. package/src/daemon/observer-service.ts +150 -0
  108. package/src/daemon/pid.ts +98 -0
  109. package/src/daemon/research-queue.ts +155 -0
  110. package/src/daemon/services.ts +175 -0
  111. package/src/daemon/ws-service.ts +788 -0
  112. package/src/goals/accountability.ts +240 -0
  113. package/src/goals/awareness-bridge.ts +185 -0
  114. package/src/goals/estimator.ts +185 -0
  115. package/src/goals/events.ts +28 -0
  116. package/src/goals/goals.test.ts +400 -0
  117. package/src/goals/integration.test.ts +329 -0
  118. package/src/goals/nl-builder.test.ts +220 -0
  119. package/src/goals/nl-builder.ts +256 -0
  120. package/src/goals/rhythm.test.ts +177 -0
  121. package/src/goals/rhythm.ts +275 -0
  122. package/src/goals/service.test.ts +135 -0
  123. package/src/goals/service.ts +348 -0
  124. package/src/goals/types.ts +106 -0
  125. package/src/goals/workflow-bridge.ts +96 -0
  126. package/src/integrations/google-api.ts +134 -0
  127. package/src/integrations/google-auth.ts +175 -0
  128. package/src/llm/README.md +291 -0
  129. package/src/llm/anthropic.ts +386 -0
  130. package/src/llm/gemini.ts +371 -0
  131. package/src/llm/index.ts +19 -0
  132. package/src/llm/manager.ts +153 -0
  133. package/src/llm/ollama.ts +307 -0
  134. package/src/llm/openai.ts +350 -0
  135. package/src/llm/provider.test.ts +231 -0
  136. package/src/llm/provider.ts +60 -0
  137. package/src/llm/test.ts +87 -0
  138. package/src/observers/README.md +278 -0
  139. package/src/observers/calendar.ts +113 -0
  140. package/src/observers/clipboard.ts +136 -0
  141. package/src/observers/email.ts +109 -0
  142. package/src/observers/example.ts +58 -0
  143. package/src/observers/file-watcher.ts +124 -0
  144. package/src/observers/index.ts +159 -0
  145. package/src/observers/notifications.ts +197 -0
  146. package/src/observers/observers.test.ts +203 -0
  147. package/src/observers/processes.ts +225 -0
  148. package/src/personality/README.md +61 -0
  149. package/src/personality/adapter.ts +196 -0
  150. package/src/personality/index.ts +20 -0
  151. package/src/personality/learner.ts +209 -0
  152. package/src/personality/model.ts +132 -0
  153. package/src/personality/personality.test.ts +236 -0
  154. package/src/roles/README.md +252 -0
  155. package/src/roles/authority.ts +119 -0
  156. package/src/roles/example-usage.ts +198 -0
  157. package/src/roles/index.ts +42 -0
  158. package/src/roles/loader.ts +143 -0
  159. package/src/roles/prompt-builder.ts +194 -0
  160. package/src/roles/test-multi.ts +102 -0
  161. package/src/roles/test-role.yaml +77 -0
  162. package/src/roles/test-utils.ts +93 -0
  163. package/src/roles/test.ts +106 -0
  164. package/src/roles/tool-guide.ts +190 -0
  165. package/src/roles/types.ts +36 -0
  166. package/src/roles/utils.ts +200 -0
  167. package/src/scripts/google-setup.ts +168 -0
  168. package/src/sidecar/connection.ts +179 -0
  169. package/src/sidecar/index.ts +6 -0
  170. package/src/sidecar/manager.ts +542 -0
  171. package/src/sidecar/protocol.ts +85 -0
  172. package/src/sidecar/rpc.ts +161 -0
  173. package/src/sidecar/scheduler.ts +136 -0
  174. package/src/sidecar/types.ts +112 -0
  175. package/src/sidecar/validator.ts +144 -0
  176. package/src/vault/README.md +110 -0
  177. package/src/vault/awareness.ts +341 -0
  178. package/src/vault/commitments.ts +299 -0
  179. package/src/vault/content-pipeline.ts +260 -0
  180. package/src/vault/conversations.ts +173 -0
  181. package/src/vault/entities.ts +180 -0
  182. package/src/vault/extractor.test.ts +356 -0
  183. package/src/vault/extractor.ts +345 -0
  184. package/src/vault/facts.ts +190 -0
  185. package/src/vault/goals.ts +477 -0
  186. package/src/vault/index.ts +87 -0
  187. package/src/vault/keychain.ts +99 -0
  188. package/src/vault/observations.ts +115 -0
  189. package/src/vault/relationships.ts +178 -0
  190. package/src/vault/retrieval.test.ts +126 -0
  191. package/src/vault/retrieval.ts +227 -0
  192. package/src/vault/schema.ts +658 -0
  193. package/src/vault/settings.ts +38 -0
  194. package/src/vault/vectors.ts +92 -0
  195. package/src/vault/workflows.ts +403 -0
  196. package/src/workflows/auto-suggest.ts +290 -0
  197. package/src/workflows/engine.ts +366 -0
  198. package/src/workflows/events.ts +24 -0
  199. package/src/workflows/executor.ts +207 -0
  200. package/src/workflows/nl-builder.ts +198 -0
  201. package/src/workflows/nodes/actions/agent-task.ts +73 -0
  202. package/src/workflows/nodes/actions/calendar-action.ts +85 -0
  203. package/src/workflows/nodes/actions/code-execution.ts +73 -0
  204. package/src/workflows/nodes/actions/discord.ts +77 -0
  205. package/src/workflows/nodes/actions/file-write.ts +73 -0
  206. package/src/workflows/nodes/actions/gmail.ts +69 -0
  207. package/src/workflows/nodes/actions/http-request.ts +117 -0
  208. package/src/workflows/nodes/actions/notification.ts +85 -0
  209. package/src/workflows/nodes/actions/run-tool.ts +55 -0
  210. package/src/workflows/nodes/actions/send-message.ts +82 -0
  211. package/src/workflows/nodes/actions/shell-command.ts +76 -0
  212. package/src/workflows/nodes/actions/telegram.ts +60 -0
  213. package/src/workflows/nodes/builtin.ts +119 -0
  214. package/src/workflows/nodes/error/error-handler.ts +37 -0
  215. package/src/workflows/nodes/error/fallback.ts +47 -0
  216. package/src/workflows/nodes/error/retry.ts +82 -0
  217. package/src/workflows/nodes/logic/delay.ts +42 -0
  218. package/src/workflows/nodes/logic/if-else.ts +41 -0
  219. package/src/workflows/nodes/logic/loop.ts +90 -0
  220. package/src/workflows/nodes/logic/merge.ts +38 -0
  221. package/src/workflows/nodes/logic/race.ts +40 -0
  222. package/src/workflows/nodes/logic/switch.ts +59 -0
  223. package/src/workflows/nodes/logic/template-render.ts +53 -0
  224. package/src/workflows/nodes/logic/variable-get.ts +37 -0
  225. package/src/workflows/nodes/logic/variable-set.ts +59 -0
  226. package/src/workflows/nodes/registry.ts +99 -0
  227. package/src/workflows/nodes/transform/aggregate.ts +99 -0
  228. package/src/workflows/nodes/transform/csv-parse.ts +70 -0
  229. package/src/workflows/nodes/transform/json-parse.ts +63 -0
  230. package/src/workflows/nodes/transform/map-filter.ts +84 -0
  231. package/src/workflows/nodes/transform/regex-match.ts +89 -0
  232. package/src/workflows/nodes/triggers/calendar.ts +33 -0
  233. package/src/workflows/nodes/triggers/clipboard.ts +32 -0
  234. package/src/workflows/nodes/triggers/cron.ts +40 -0
  235. package/src/workflows/nodes/triggers/email.ts +40 -0
  236. package/src/workflows/nodes/triggers/file-change.ts +45 -0
  237. package/src/workflows/nodes/triggers/git.ts +46 -0
  238. package/src/workflows/nodes/triggers/manual.ts +23 -0
  239. package/src/workflows/nodes/triggers/poll.ts +81 -0
  240. package/src/workflows/nodes/triggers/process.ts +44 -0
  241. package/src/workflows/nodes/triggers/screen-event.ts +37 -0
  242. package/src/workflows/nodes/triggers/webhook.ts +39 -0
  243. package/src/workflows/safe-eval.ts +139 -0
  244. package/src/workflows/template.ts +118 -0
  245. package/src/workflows/triggers/cron.ts +311 -0
  246. package/src/workflows/triggers/manager.ts +285 -0
  247. package/src/workflows/triggers/observer-bridge.ts +172 -0
  248. package/src/workflows/triggers/poller.ts +201 -0
  249. package/src/workflows/triggers/screen-condition.ts +218 -0
  250. package/src/workflows/triggers/triggers.test.ts +740 -0
  251. package/src/workflows/triggers/webhook.ts +191 -0
  252. package/src/workflows/types.ts +133 -0
  253. package/src/workflows/variables.ts +72 -0
  254. package/src/workflows/workflows.test.ts +383 -0
  255. package/src/workflows/yaml.ts +104 -0
  256. package/ui/dist/index-j75njzc1.css +1199 -0
  257. package/ui/dist/index-p2zh407q.js +80603 -0
  258. package/ui/dist/index.html +13 -0
  259. package/ui/public/openwakeword/models/embedding_model.onnx +0 -0
  260. package/ui/public/openwakeword/models/hey_jarvis_v0.1.onnx +0 -0
  261. package/ui/public/openwakeword/models/melspectrogram.onnx +0 -0
  262. package/ui/public/openwakeword/models/silero_vad.onnx +0 -0
  263. package/ui/public/ort/ort-wasm-simd-threaded.jsep.mjs +106 -0
  264. package/ui/public/ort/ort-wasm-simd-threaded.jsep.wasm +0 -0
  265. package/ui/public/ort/ort-wasm-simd-threaded.mjs +59 -0
  266. package/ui/public/ort/ort-wasm-simd-threaded.wasm +0 -0
@@ -0,0 +1,321 @@
1
+ /**
2
+ * Awareness Intelligence — Cloud Vision Analysis
3
+ *
4
+ * Escalates screenshots to LLM vision when local OCR detects errors,
5
+ * stuck states, or significant context changes. Rate-limited to avoid
6
+ * excessive API calls.
7
+ */
8
+
9
+ import type { LLMManager } from '../llm/manager.ts';
10
+ import type { ContentBlock } from '../llm/provider.ts';
11
+ import { guardImageSize } from '../llm/provider.ts';
12
+ import type { ScreenContext, AwarenessEvent } from './types.ts';
13
+
14
+ export class AwarenessIntelligence {
15
+ private llm: LLMManager;
16
+ private lastCloudCallAt = 0;
17
+ private cooldownMs: number;
18
+
19
+ constructor(llm: LLMManager, cooldownMs: number = 30000) {
20
+ this.llm = llm;
21
+ this.cooldownMs = cooldownMs;
22
+ }
23
+
24
+ /**
25
+ * Determine if a capture warrants cloud vision analysis.
26
+ */
27
+ shouldEscalateToCloud(context: ScreenContext, events: AwarenessEvent[]): boolean {
28
+ const now = Date.now();
29
+
30
+ // Rate limit
31
+ if (now - this.lastCloudCallAt < this.cooldownMs) {
32
+ return false;
33
+ }
34
+
35
+ // Escalate for error detection
36
+ if (events.some(e => e.type === 'error_detected')) {
37
+ return true;
38
+ }
39
+
40
+ // Escalate for stuck detection
41
+ if (events.some(e => e.type === 'stuck_detected')) {
42
+ return true;
43
+ }
44
+
45
+ // Escalate for struggle detection (needs vision for deep analysis)
46
+ if (events.some(e => e.type === 'struggle_detected')) {
47
+ return true;
48
+ }
49
+
50
+ // Escalate for significant context changes
51
+ if (context.isSignificantChange) {
52
+ return true;
53
+ }
54
+
55
+ // Escalate for very short/empty OCR (image-heavy screen)
56
+ if (context.ocrText.trim().length < 20) {
57
+ return true;
58
+ }
59
+
60
+ return false;
61
+ }
62
+
63
+ /**
64
+ * General screen analysis — what is the user doing?
65
+ */
66
+ async analyzeGeneral(imageBase64: string, context: ScreenContext): Promise<string> {
67
+ this.lastCloudCallAt = Date.now();
68
+
69
+ const imageBlock: ContentBlock = guardImageSize({
70
+ type: 'image',
71
+ source: { type: 'base64', media_type: 'image/png', data: imageBase64 },
72
+ });
73
+
74
+ const content: ContentBlock[] = [
75
+ imageBlock,
76
+ {
77
+ type: 'text',
78
+ text: `Analyze this screenshot. The user is in "${context.appName}" (window: "${context.windowTitle}").
79
+ OCR extracted: "${context.ocrText.slice(0, 500)}"
80
+
81
+ Provide a concise analysis:
82
+ 1. What is the user doing right now? (1 sentence)
83
+ 2. Any errors or issues visible? (yes/no + detail if yes)
84
+ 3. Any actionable suggestions? (1-2 if applicable)
85
+
86
+ Be brief and direct. No preamble.`,
87
+ },
88
+ ];
89
+
90
+ try {
91
+ const response = await this.llm.chat(
92
+ [{ role: 'user', content }],
93
+ { max_tokens: 300 }
94
+ );
95
+ return response.content;
96
+ } catch (err) {
97
+ console.error('[Intelligence] General analysis failed:', err instanceof Error ? err.message : err);
98
+ return '';
99
+ }
100
+ }
101
+
102
+ /**
103
+ * Delta-focused analysis — what changed between two captures?
104
+ */
105
+ async analyzeDelta(
106
+ imageBase64: string,
107
+ current: ScreenContext,
108
+ previous: ScreenContext | null
109
+ ): Promise<string> {
110
+ this.lastCloudCallAt = Date.now();
111
+
112
+ const imageBlock: ContentBlock = guardImageSize({
113
+ type: 'image',
114
+ source: { type: 'base64', media_type: 'image/png', data: imageBase64 },
115
+ });
116
+
117
+ const previousInfo = previous
118
+ ? `Previous: "${previous.appName}" — "${previous.windowTitle}"\nPrevious OCR: "${previous.ocrText.slice(0, 300)}"`
119
+ : 'No previous context (first capture).';
120
+
121
+ const content: ContentBlock[] = [
122
+ imageBlock,
123
+ {
124
+ type: 'text',
125
+ text: `The user's screen changed. Analyze the delta.
126
+
127
+ Current: "${current.appName}" — "${current.windowTitle}"
128
+ Current OCR: "${current.ocrText.slice(0, 300)}"
129
+
130
+ ${previousInfo}
131
+
132
+ What changed and why? Note any:
133
+ - Task transitions (starting/finishing something)
134
+ - Errors or problems that appeared
135
+ - Patterns worth learning (user habits)
136
+
137
+ Be concise. 2-3 sentences max.`,
138
+ },
139
+ ];
140
+
141
+ try {
142
+ const response = await this.llm.chat(
143
+ [{ role: 'user', content }],
144
+ { max_tokens: 200 }
145
+ );
146
+ return response.content;
147
+ } catch (err) {
148
+ console.error('[Intelligence] Delta analysis failed:', err instanceof Error ? err.message : err);
149
+ return '';
150
+ }
151
+ }
152
+
153
+ /**
154
+ * Deep struggle analysis — app-category-aware screenshot analysis.
155
+ * Returns specific, actionable guidance for the user's situation.
156
+ */
157
+ async analyzeStruggle(
158
+ imageBase64: string,
159
+ context: ScreenContext,
160
+ appCategory: string,
161
+ signals: Array<{ name: string; score: number; detail: string }>,
162
+ ocrPreview: string
163
+ ): Promise<string> {
164
+ this.lastCloudCallAt = Date.now();
165
+
166
+ const imageBlock: ContentBlock = guardImageSize({
167
+ type: 'image',
168
+ source: { type: 'base64', media_type: 'image/png', data: imageBase64 },
169
+ });
170
+
171
+ const prompt = this.buildStrugglePrompt(context, appCategory, signals, ocrPreview);
172
+ const content: ContentBlock[] = [imageBlock, { type: 'text', text: prompt }];
173
+
174
+ try {
175
+ const response = await this.llm.chat(
176
+ [{ role: 'user', content }],
177
+ { max_tokens: 600 }
178
+ );
179
+ return response.content;
180
+ } catch (err) {
181
+ console.error('[Intelligence] Struggle analysis failed:', err instanceof Error ? err.message : err);
182
+ return '';
183
+ }
184
+ }
185
+
186
+ private buildStrugglePrompt(
187
+ context: ScreenContext,
188
+ appCategory: string,
189
+ signals: Array<{ name: string; score: number; detail: string }>,
190
+ ocrPreview: string
191
+ ): string {
192
+ const signalSummary = signals
193
+ .filter(s => s.score > 0.3)
194
+ .map(s => `- ${s.name}: ${s.detail}`)
195
+ .join('\n');
196
+
197
+ const base = `The user appears to be struggling in "${context.appName}" (window: "${context.windowTitle}").
198
+ Behavioral signals detected:
199
+ ${signalSummary}
200
+
201
+ OCR text from screen:
202
+ "${ocrPreview}"
203
+
204
+ `;
205
+
206
+ const categoryPrompts: Record<string, string> = {
207
+ code_editor: `You are looking at a code editor. The user has been editing the same area repeatedly without making progress.
208
+
209
+ Analyze the visible code carefully:
210
+ 1. Look for syntax errors (missing brackets, semicolons, typos in keywords)
211
+ 2. Look for logic errors (wrong variable names, incorrect conditions, off-by-one)
212
+ 3. Look for missing imports or undefined variables
213
+ 4. Look for type errors if TypeScript/typed language
214
+ 5. Check the error panel/terminal output if visible
215
+
216
+ Provide the SPECIFIC fix. Say exactly what line has the issue and what to change. If you can see an error message, explain what it means and how to fix it.`,
217
+
218
+ terminal: `You are looking at a terminal/CLI. The user has been running commands that keep failing.
219
+
220
+ Analyze the terminal output:
221
+ 1. Identify the exact error message
222
+ 2. Determine if it's a wrong command, missing package, permission issue, or path problem
223
+ 3. Provide the corrected command they should run
224
+ 4. If it's a build/compile error, explain the root cause
225
+
226
+ Give the EXACT command to run. Start with the fix, not an explanation.`,
227
+
228
+ browser: `You are looking at a web browser. The user seems to be struggling to accomplish something.
229
+
230
+ Analyze what's visible:
231
+ 1. What is the user trying to do? (fill a form, find information, navigate, etc.)
232
+ 2. Is there a UI element they might be missing?
233
+ 3. Is there an error on the page?
234
+ 4. Are they on the wrong page for what they need?
235
+
236
+ Guide them to the specific button, link, or action they need.`,
237
+
238
+ creative_app: `You are looking at a creative application (design/art/video tool). The user seems to be looking for a feature or struggling with a technique.
239
+
240
+ Analyze the interface:
241
+ 1. What tool/feature appears to be selected?
242
+ 2. What is the user trying to create or modify?
243
+ 3. Is there a more appropriate tool for what they're doing?
244
+ 4. Are there keyboard shortcuts that would help?
245
+
246
+ Name the specific tool, menu item, or keyboard shortcut they need. Be precise about where it is in the interface.`,
247
+
248
+ puzzle_game: `You are looking at a puzzle or game. The user has been stuck on this for a while.
249
+
250
+ Analyze the game state:
251
+ 1. What type of puzzle is this?
252
+ 2. What is the current state of the board/puzzle?
253
+ 3. What moves are available?
254
+ 4. What is the optimal next move or strategy?
255
+
256
+ Suggest the next 1-2 specific moves. Be precise about positions on the screen.`,
257
+
258
+ general: `The user has been struggling with this application for a while without making progress.
259
+
260
+ Analyze the screen:
261
+ 1. What is the user trying to accomplish?
262
+ 2. What obstacle or confusion might they be facing?
263
+ 3. What specific action should they take next?
264
+
265
+ Provide clear, actionable guidance.`,
266
+ };
267
+
268
+ return base + (categoryPrompts[appCategory] ?? categoryPrompts.general) +
269
+ '\n\nBe concise but specific. No preamble. Start with the most important insight or fix.';
270
+ }
271
+
272
+ /**
273
+ * Summarize an activity session for storage.
274
+ */
275
+ async summarizeSession(
276
+ apps: string[],
277
+ captureCount: number,
278
+ durationMinutes: number,
279
+ sampleOcrTexts: string[]
280
+ ): Promise<{ topic: string; summary: string }> {
281
+ const ocrSample = sampleOcrTexts.slice(0, 5).map((t, i) => `[${i + 1}] ${t.slice(0, 200)}`).join('\n');
282
+
283
+ try {
284
+ const response = await this.llm.chat(
285
+ [{
286
+ role: 'user',
287
+ content: `Summarize this activity session:
288
+ - Apps used: ${apps.join(', ')}
289
+ - Duration: ${durationMinutes} minutes
290
+ - Captures: ${captureCount}
291
+
292
+ OCR samples from the session:
293
+ ${ocrSample}
294
+
295
+ Respond in JSON: { "topic": "short topic (3-5 words)", "summary": "1-2 sentence summary" }`,
296
+ }],
297
+ { max_tokens: 150 }
298
+ );
299
+
300
+ try {
301
+ // Try to extract JSON from response
302
+ const jsonMatch = response.content.match(/\{[\s\S]*\}/);
303
+ if (jsonMatch) {
304
+ const parsed = JSON.parse(jsonMatch[0]);
305
+ return {
306
+ topic: parsed.topic || 'Unknown activity',
307
+ summary: parsed.summary || response.content,
308
+ };
309
+ }
310
+ } catch { /* parse failure, fall through */ }
311
+
312
+ return { topic: 'Activity session', summary: response.content.slice(0, 200) };
313
+ } catch (err) {
314
+ console.error('[Intelligence] Session summary failed:', err instanceof Error ? err.message : err);
315
+ return {
316
+ topic: apps.length > 0 ? `${apps[0]} session` : 'Activity session',
317
+ summary: `${durationMinutes}min session using ${apps.join(', ')}`,
318
+ };
319
+ }
320
+ }
321
+ }
@@ -0,0 +1,88 @@
1
+ /**
2
+ * OCR Engine — Local Text Extraction via Tesseract.js
3
+ *
4
+ * Maintains a persistent Tesseract worker for fast repeated OCR.
5
+ * WASM-based, no native dependencies, runs in Bun.
6
+ */
7
+
8
+ import type { OCRResult } from './types.ts';
9
+
10
+ export class OCREngine {
11
+ private worker: any = null;
12
+ private ready = false;
13
+ private initializing = false;
14
+
15
+ /**
16
+ * Initialize the Tesseract worker. Call once before extractText().
17
+ * Loads the English language model (~2s on first run, cached after).
18
+ */
19
+ async initialize(): Promise<void> {
20
+ if (this.ready || this.initializing) return;
21
+ this.initializing = true;
22
+
23
+ try {
24
+ const Tesseract = await import('tesseract.js');
25
+ this.worker = await Tesseract.createWorker('eng');
26
+ this.ready = true;
27
+ console.log('[OCREngine] Tesseract worker initialized (eng)');
28
+ } catch (err) {
29
+ this.initializing = false;
30
+ console.error('[OCREngine] Failed to initialize:', err instanceof Error ? err.message : err);
31
+ throw err;
32
+ }
33
+ }
34
+
35
+ /**
36
+ * Shut down the Tesseract worker.
37
+ */
38
+ async shutdown(): Promise<void> {
39
+ if (this.worker) {
40
+ try {
41
+ await this.worker.terminate();
42
+ } catch { /* ignore */ }
43
+ this.worker = null;
44
+ this.ready = false;
45
+ this.initializing = false;
46
+ console.log('[OCREngine] Worker terminated');
47
+ }
48
+ }
49
+
50
+ /**
51
+ * Extract text from a PNG image buffer.
52
+ * Returns the extracted text, confidence score, and processing duration.
53
+ */
54
+ async extractText(imageBuffer: Buffer): Promise<OCRResult> {
55
+ if (!this.ready || !this.worker) {
56
+ throw new Error('OCR engine not initialized. Call initialize() first.');
57
+ }
58
+
59
+ const start = performance.now();
60
+
61
+ try {
62
+ const result = await this.worker.recognize(imageBuffer);
63
+ const durationMs = Math.round(performance.now() - start);
64
+
65
+ if (durationMs > 500) {
66
+ console.warn(`[OCREngine] Slow OCR: ${durationMs}ms`);
67
+ }
68
+
69
+ return {
70
+ text: result.data.text || '',
71
+ confidence: result.data.confidence || 0,
72
+ durationMs,
73
+ };
74
+ } catch (err) {
75
+ const durationMs = Math.round(performance.now() - start);
76
+ console.error('[OCREngine] Recognition failed:', err instanceof Error ? err.message : err);
77
+ return {
78
+ text: '',
79
+ confidence: 0,
80
+ durationMs,
81
+ };
82
+ }
83
+ }
84
+
85
+ isReady(): boolean {
86
+ return this.ready;
87
+ }
88
+ }