@geravant/sinain 1.0.18 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. package/README.md +10 -1
  2. package/cli.js +176 -0
  3. package/index.ts +163 -1257
  4. package/install.js +12 -2
  5. package/launcher.js +622 -0
  6. package/openclaw.plugin.json +4 -0
  7. package/pack-prepare.js +48 -0
  8. package/package.json +26 -5
  9. package/sense_client/README.md +82 -0
  10. package/sense_client/__init__.py +1 -0
  11. package/sense_client/__main__.py +462 -0
  12. package/sense_client/app_detector.py +54 -0
  13. package/sense_client/app_detector_win.py +83 -0
  14. package/sense_client/capture.py +215 -0
  15. package/sense_client/capture_win.py +88 -0
  16. package/sense_client/change_detector.py +86 -0
  17. package/sense_client/config.py +64 -0
  18. package/sense_client/gate.py +145 -0
  19. package/sense_client/ocr.py +347 -0
  20. package/sense_client/privacy.py +65 -0
  21. package/sense_client/requirements.txt +13 -0
  22. package/sense_client/roi_extractor.py +84 -0
  23. package/sense_client/sender.py +173 -0
  24. package/sense_client/tests/__init__.py +0 -0
  25. package/sense_client/tests/test_stream1_optimizations.py +234 -0
  26. package/setup-overlay.js +82 -0
  27. package/sinain-agent/.env.example +17 -0
  28. package/sinain-agent/CLAUDE.md +80 -0
  29. package/sinain-agent/mcp-config.json +12 -0
  30. package/sinain-agent/run.sh +248 -0
  31. package/sinain-core/.env.example +93 -0
  32. package/sinain-core/package-lock.json +552 -0
  33. package/sinain-core/package.json +21 -0
  34. package/sinain-core/src/agent/analyzer.ts +366 -0
  35. package/sinain-core/src/agent/context-window.ts +172 -0
  36. package/sinain-core/src/agent/loop.ts +404 -0
  37. package/sinain-core/src/agent/situation-writer.ts +187 -0
  38. package/sinain-core/src/agent/traits.ts +520 -0
  39. package/sinain-core/src/audio/capture-spawner-macos.ts +44 -0
  40. package/sinain-core/src/audio/capture-spawner-win.ts +37 -0
  41. package/sinain-core/src/audio/capture-spawner.ts +14 -0
  42. package/sinain-core/src/audio/pipeline.ts +335 -0
  43. package/sinain-core/src/audio/transcription-local.ts +141 -0
  44. package/sinain-core/src/audio/transcription.ts +278 -0
  45. package/sinain-core/src/buffers/feed-buffer.ts +71 -0
  46. package/sinain-core/src/buffers/sense-buffer.ts +425 -0
  47. package/sinain-core/src/config.ts +245 -0
  48. package/sinain-core/src/escalation/escalation-slot.ts +136 -0
  49. package/sinain-core/src/escalation/escalator.ts +812 -0
  50. package/sinain-core/src/escalation/message-builder.ts +323 -0
  51. package/sinain-core/src/escalation/openclaw-ws.ts +726 -0
  52. package/sinain-core/src/escalation/scorer.ts +166 -0
  53. package/sinain-core/src/index.ts +507 -0
  54. package/sinain-core/src/learning/feedback-store.ts +253 -0
  55. package/sinain-core/src/learning/signal-collector.ts +218 -0
  56. package/sinain-core/src/log.ts +24 -0
  57. package/sinain-core/src/overlay/commands.ts +126 -0
  58. package/sinain-core/src/overlay/ws-handler.ts +267 -0
  59. package/sinain-core/src/privacy/index.ts +18 -0
  60. package/sinain-core/src/privacy/presets.ts +40 -0
  61. package/sinain-core/src/privacy/redact.ts +92 -0
  62. package/sinain-core/src/profiler.ts +181 -0
  63. package/sinain-core/src/recorder.ts +186 -0
  64. package/sinain-core/src/server.ts +417 -0
  65. package/sinain-core/src/trace/trace-store.ts +73 -0
  66. package/sinain-core/src/trace/tracer.ts +94 -0
  67. package/sinain-core/src/types.ts +427 -0
  68. package/sinain-core/src/util/dedup.ts +48 -0
  69. package/sinain-core/src/util/task-store.ts +84 -0
  70. package/sinain-core/tsconfig.json +18 -0
  71. package/sinain-knowledge/adapters/generic/adapter.ts +103 -0
  72. package/sinain-knowledge/adapters/interface.ts +72 -0
  73. package/sinain-knowledge/adapters/openclaw/adapter.ts +223 -0
  74. package/sinain-knowledge/curation/engine.ts +493 -0
  75. package/sinain-knowledge/curation/resilience.ts +336 -0
  76. package/sinain-knowledge/data/git-store.ts +312 -0
  77. package/sinain-knowledge/data/schema.ts +89 -0
  78. package/sinain-knowledge/data/snapshot.ts +226 -0
  79. package/sinain-knowledge/data/store.ts +488 -0
  80. package/sinain-knowledge/deploy/cli.ts +214 -0
  81. package/sinain-knowledge/deploy/manifest.ts +80 -0
  82. package/sinain-knowledge/protocol/bindings/generic.md +5 -0
  83. package/sinain-knowledge/protocol/bindings/openclaw.md +5 -0
  84. package/sinain-knowledge/protocol/heartbeat.md +62 -0
  85. package/sinain-knowledge/protocol/renderer.ts +56 -0
  86. package/sinain-knowledge/protocol/skill.md +335 -0
  87. package/sinain-mcp-server/index.ts +337 -0
  88. package/sinain-mcp-server/package.json +19 -0
  89. package/sinain-mcp-server/tsconfig.json +15 -0
@@ -0,0 +1,366 @@
1
+ import type { AgentConfig, AgentResult, ContextWindow, RecorderStatus, RecordCommand } from "../types.js";
2
+ import { normalizeAppName } from "./context-window.js";
3
+ import { log, error } from "../log.js";
4
+ import { levelFor, applyLevel } from "../privacy/index.js";
5
+
6
+ const TAG = "agent";
7
+
8
+ /**
9
+ * Model-specific timeouts in milliseconds.
10
+ * Only increases timeouts for slow models to avoid false timeouts.
11
+ * Default 15s is kept for fast models.
12
+ */
13
+ const MODEL_TIMEOUTS: Record<string, number> = {
14
+ 'google/gemini-2.5-flash-lite': 15000,
15
+ 'google/gemini-2.5-flash': 15000,
16
+ 'google/gemini-2.0-flash': 15000,
17
+ 'anthropic/claude-3-opus': 60000,
18
+ 'anthropic/claude-3.5-sonnet': 30000,
19
+ 'anthropic/claude-3-haiku': 15000,
20
+ 'default': 15000,
21
+ };
22
+
23
+ /** Get timeout for a specific model. */
24
+ function getModelTimeout(model: string): number {
25
+ return MODEL_TIMEOUTS[model] ?? MODEL_TIMEOUTS['default'];
26
+ }
27
+
28
+ /** Message part for multimodal API calls. */
29
+ type ContentPart =
30
+ | { type: "text"; text: string }
31
+ | { type: "image_url"; image_url: { url: string; detail: "low" } };
32
+
33
+ /**
34
+ * Build recorder status section for the prompt.
35
+ */
36
+ function buildRecorderSection(status: RecorderStatus | null): string {
37
+ if (!status) return "";
38
+ if (!status.recording) return "\nRecorder: idle (not recording)";
39
+
40
+ const label = status.label ? ` "${status.label}"` : "";
41
+ const durationSec = Math.round(status.durationMs / 1000);
42
+ return `\nRecorder: RECORDING${label} (${durationSec}s, ${status.segments} segments)`;
43
+ }
44
+
45
+ /**
46
+ * Static system prompt (cached as module constant).
47
+ * Contains rules, output format, and behavioral instructions.
48
+ * Previously allocated ~3KB per tick; now zero-allocation.
49
+ */
50
+ const SYSTEM_PROMPT = `You are an AI monitoring a user's screen and audio in real-time.
51
+ You produce outputs as JSON.
52
+
53
+ Respond ONLY with valid JSON. No markdown, no code fences, no explanation.
54
+ Your entire response must be parseable by JSON.parse().
55
+
56
+ {"hud":"...","digest":"...","record":{"command":"start"|"stop","label":"..."},"task":"..."}
57
+
58
+ Output fields:
59
+ - "hud" (required): max 60 words describing what user is doing NOW
60
+ - "digest" (required): 5-8 sentences with detailed activity description
61
+ - "record" (optional): control recording — {"command":"start","label":"Meeting name"} or {"command":"stop"}
62
+ - "task" (optional): natural language instruction to spawn a background task
63
+
64
+ When to use "record":
65
+ - START when user begins a meeting, call, lecture, YouTube video, or important audio content
66
+ - STOP when the content ends or user navigates away
67
+ - Provide descriptive labels like "Team standup", "Client call", "YouTube: [video title from OCR]"
68
+ - For YouTube/video content: extract video title from screen OCR for the label
69
+
70
+ When to use "task":
71
+ - User explicitly asks for research, lookup, or action
72
+ - Something needs external search or processing that isn't a real-time response
73
+ - Example: "Search for React 19 migration guide", "Find docs for this API"
74
+
75
+ When to spawn "task" for video content:
76
+ - If user watches a YouTube video for 2+ minutes AND no task has been spawned for this video yet, spawn: "Summarize YouTube video: [title or URL from OCR]"
77
+ - ONLY spawn ONCE per video - do not repeat spawn for the same video in subsequent ticks
78
+ - Extract video title or URL from screen OCR to include in the task
79
+
80
+ When to spawn "task" for coding problems:
81
+ - If user is actively working on a coding problem/challenge for 1+ minutes:
82
+ - Spawn: "Solve coding problem: [problem description/title from OCR]"
83
+ - This includes LeetCode, HackerRank, interviews, coding assessments, or any visible coding challenge
84
+ - Look for problem signals: "Input:", "Output:", "Example", "Constraints:", problem titles, test cases
85
+ - Include as much context as possible from the screen OCR (problem description, examples, constraints)
86
+ - ONLY spawn ONCE per distinct problem - do not repeat for the same problem
87
+ - The spawned task should provide a complete solution with code and explanation
88
+
89
+ Audio sources: [\ud83d\udd0a]=system/speaker audio, [\ud83c\udf99]=microphone (user's voice).
90
+ Treat [\ud83c\udf99] as direct user speech. Treat [\ud83d\udd0a] as external audio.
91
+
92
+ Rules:
93
+ - "hud" is for a minimal overlay display. Example: "Editing hud-relay.mjs in IDEA"
94
+ - "digest" is for an AI assistant to understand the full situation and offer help.
95
+ - If nothing is happening, hud="Idle" and digest explains what was last seen.
96
+ - Include specific filenames, URLs, error messages, UI text from OCR in digest.
97
+ - Do NOT suggest actions in digest — just describe the situation factually.
98
+ - Only include "record" or "task" when genuinely appropriate — most responses won't have them.
99
+ - CRITICAL: Output ONLY the JSON object, nothing else.`;
100
+
101
+ /**
102
+ * Build the dynamic user prompt (changes every tick).
103
+ * Contains the current context data: screen OCR, audio transcripts, app state.
104
+ */
105
+ function buildUserPrompt(ctx: ContextWindow, recorderStatus: RecorderStatus | null = null): string {
106
+ const now = Date.now();
107
+
108
+ // Privacy gating: check levels for openrouter destination
109
+ let screenLines: string;
110
+ try {
111
+ const ocrLevel = levelFor("screen_ocr", "openrouter");
112
+ const titlesLevel = levelFor("window_titles", "openrouter");
113
+ screenLines = ctx.screen
114
+ .map(e => {
115
+ const app = normalizeAppName(e.meta.app);
116
+ const ago = Math.round((now - (e.ts || now)) / 1000);
117
+ const rawOcr = e.ocr ? e.ocr.replace(/\n/g, " ").slice(0, ctx.preset.maxOcrChars) : "(no text)";
118
+ const ocr = e.ocr ? applyLevel(rawOcr, ocrLevel, "ocr") : "(no text)";
119
+ const title = e.meta.windowTitle ? applyLevel(e.meta.windowTitle, titlesLevel, "titles") : "";
120
+ const titlePart = title ? ` [${title}]` : "";
121
+ return `[${ago}s ago] [${app}]${titlePart} ${ocr || "(no text)"}`;
122
+ })
123
+ .join("\n");
124
+ } catch {
125
+ // Privacy not yet initialized — use full text
126
+ screenLines = ctx.screen
127
+ .map(e => {
128
+ const app = normalizeAppName(e.meta.app);
129
+ const ago = Math.round((now - (e.ts || now)) / 1000);
130
+ const ocr = e.ocr ? e.ocr.replace(/\n/g, " ").slice(0, ctx.preset.maxOcrChars) : "(no text)";
131
+ return `[${ago}s ago] [${app}] ${ocr}`;
132
+ })
133
+ .join("\n");
134
+ }
135
+
136
+ let audioLines: string;
137
+ try {
138
+ const audioLevel = levelFor("audio_transcript", "openrouter");
139
+ audioLines = ctx.audio
140
+ .map(e => {
141
+ const ago = Math.round((now - (e.ts || now)) / 1000);
142
+ const text = applyLevel(e.text.slice(0, ctx.preset.maxTranscriptChars), audioLevel, "audio");
143
+ return `[${ago}s ago] ${text}`;
144
+ })
145
+ .join("\n");
146
+ } catch {
147
+ audioLines = ctx.audio
148
+ .map(e => {
149
+ const ago = Math.round((now - (e.ts || now)) / 1000);
150
+ return `[${ago}s ago] ${e.text.slice(0, ctx.preset.maxTranscriptChars)}`;
151
+ })
152
+ .join("\n");
153
+ }
154
+
155
+ const appSwitches = ctx.appHistory
156
+ .map(a => normalizeAppName(a.app))
157
+ .join(" \u2192 ");
158
+
159
+ const recorderSection = buildRecorderSection(recorderStatus);
160
+
161
+ // Gate images based on privacy level
162
+ let imagesForPrompt = ctx.images;
163
+ try {
164
+ const imgLevel = levelFor("screen_images", "openrouter");
165
+ if (imgLevel === "none") {
166
+ imagesForPrompt = [];
167
+ }
168
+ } catch { /* privacy not initialized, keep images */ }
169
+
170
+ const hasImages = imagesForPrompt && imagesForPrompt.length > 0;
171
+ const imageNote = hasImages ? `\n\nScreen screenshots (${imagesForPrompt!.length}) are attached below.` : "";
172
+
173
+ return `Active app: ${normalizeAppName(ctx.currentApp)}
174
+ App history: ${appSwitches || "(none)"}${recorderSection}
175
+
176
+ Screen (OCR text, newest first):
177
+ ${screenLines || "(no screen data)"}
178
+
179
+ Audio transcript (newest first, \ud83d\udd0a=system, \ud83c\udf99=mic):
180
+ ${audioLines || "(silence)"}${imageNote}`;
181
+ }
182
+
183
+ /**
184
+ * Parse record command from LLM response.
185
+ */
186
+ function parseRecord(parsed: any): RecordCommand | undefined {
187
+ if (!parsed.record || typeof parsed.record !== "object") return undefined;
188
+ const cmd = parsed.record.command;
189
+ if (cmd !== "start" && cmd !== "stop") return undefined;
190
+ return {
191
+ command: cmd,
192
+ label: typeof parsed.record.label === "string" ? parsed.record.label : undefined,
193
+ };
194
+ }
195
+
196
+ /**
197
+ * Parse task from LLM response.
198
+ */
199
+ function parseTask(parsed: any): string | undefined {
200
+ if (typeof parsed.task !== "string" || !parsed.task.trim()) return undefined;
201
+ return parsed.task.trim();
202
+ }
203
+
204
+ /**
205
+ * Call the LLM (OpenRouter) to analyze the context window.
206
+ * Supports model chain: primary + fallbacks.
207
+ * When images are present, auto-upgrades to the vision model.
208
+ */
209
+ export async function analyzeContext(
210
+ contextWindow: ContextWindow,
211
+ config: AgentConfig,
212
+ recorderStatus: RecorderStatus | null = null,
213
+ traitSystemPrompt?: string,
214
+ ): Promise<AgentResult> {
215
+ const userPrompt = buildUserPrompt(contextWindow, recorderStatus);
216
+ // Apply privacy gating for images sent to OpenRouter
217
+ let images = contextWindow.images || [];
218
+ try {
219
+ const imgLevel = levelFor("screen_images", "openrouter");
220
+ if (imgLevel === "none") {
221
+ images = [];
222
+ }
223
+ } catch { /* privacy not initialized, keep images */ }
224
+ const systemPrompt = traitSystemPrompt ?? SYSTEM_PROMPT;
225
+
226
+ const models = [config.model, ...config.fallbackModels];
227
+
228
+ // Auto-upgrade: use vision model when images are present
229
+ if (images.length > 0 && config.visionModel) {
230
+ // Insert vision model at the front if not already there
231
+ if (!models.includes(config.visionModel)) {
232
+ models.unshift(config.visionModel);
233
+ }
234
+ }
235
+
236
+ let lastError: Error | null = null;
237
+
238
+ for (const model of models) {
239
+ try {
240
+ return await callModel(systemPrompt, userPrompt, images, model, config);
241
+ } catch (err: any) {
242
+ lastError = err;
243
+ log(TAG, `model ${model} failed: ${err.message || err}, trying next...`);
244
+ }
245
+ }
246
+
247
+ throw lastError || new Error("all models failed");
248
+ }
249
+
250
+ async function callModel(
251
+ systemPrompt: string,
252
+ userPrompt: string,
253
+ images: ContextWindow["images"],
254
+ model: string,
255
+ config: AgentConfig,
256
+ ): Promise<AgentResult> {
257
+ const start = Date.now();
258
+ const controller = new AbortController();
259
+ const timeoutMs = getModelTimeout(model);
260
+ const timeout = setTimeout(() => controller.abort(), timeoutMs);
261
+
262
+ try {
263
+ // Build user message content: text + optional images
264
+ let userContent: string | ContentPart[];
265
+ if (images && images.length > 0) {
266
+ const parts: ContentPart[] = [{ type: "text", text: userPrompt }];
267
+ for (const img of images) {
268
+ parts.push({
269
+ type: "image_url",
270
+ image_url: {
271
+ url: `data:image/jpeg;base64,${img.data}`,
272
+ detail: "low",
273
+ },
274
+ });
275
+ }
276
+ userContent = parts;
277
+ } else {
278
+ userContent = userPrompt;
279
+ }
280
+
281
+ const imageCount = images?.length || 0;
282
+
283
+ const response = await fetch("https://openrouter.ai/api/v1/chat/completions", {
284
+ method: "POST",
285
+ headers: {
286
+ "Authorization": `Bearer ${config.openrouterApiKey}`,
287
+ "Content-Type": "application/json",
288
+ },
289
+ body: JSON.stringify({
290
+ model,
291
+ messages: [
292
+ { role: "system", content: systemPrompt },
293
+ { role: "user", content: userContent },
294
+ ],
295
+ max_tokens: config.maxTokens,
296
+ temperature: config.temperature,
297
+ }),
298
+ signal: controller.signal,
299
+ });
300
+
301
+ if (!response.ok) {
302
+ const body = await response.text().catch(() => "");
303
+ throw new Error(`HTTP ${response.status}: ${body.slice(0, 200)}`);
304
+ }
305
+
306
+ const data = await response.json() as any;
307
+ const latencyMs = Date.now() - start;
308
+ const raw = data.choices?.[0]?.message?.content?.trim() || "";
309
+
310
+ if (imageCount > 0) {
311
+ log(TAG, `multimodal call: model=${model}, images=${imageCount}`);
312
+ }
313
+
314
+ // Parse JSON response — try direct parse, then extract embedded JSON, then fallback
315
+ try {
316
+ const jsonStr = raw.replace(/^```\w*\s*\n?/, "").replace(/\n?\s*```\s*$/, "").trim();
317
+ const parsed = JSON.parse(jsonStr);
318
+ return {
319
+ hud: parsed.hud || "\u2014",
320
+ digest: parsed.digest || "\u2014",
321
+ record: parseRecord(parsed),
322
+ task: parseTask(parsed),
323
+ latencyMs,
324
+ tokensIn: data.usage?.prompt_tokens || 0,
325
+ tokensOut: data.usage?.completion_tokens || 0,
326
+ model,
327
+ parsedOk: true,
328
+ };
329
+ } catch {
330
+ // Second chance: extract embedded JSON object
331
+ const match = raw.match(/\{[\s\S]*\}/);
332
+ if (match) {
333
+ try {
334
+ const parsed = JSON.parse(match[0]);
335
+ if (parsed.hud) {
336
+ return {
337
+ hud: parsed.hud,
338
+ digest: parsed.digest || "\u2014",
339
+ record: parseRecord(parsed),
340
+ task: parseTask(parsed),
341
+ latencyMs,
342
+ tokensIn: data.usage?.prompt_tokens || 0,
343
+ tokensOut: data.usage?.completion_tokens || 0,
344
+ model,
345
+ parsedOk: true,
346
+ };
347
+ }
348
+ } catch { /* fall through */ }
349
+ }
350
+
351
+ // Final fallback: use raw text
352
+ log(TAG, `JSON parse failed (model=${model}), raw: "${raw.slice(0, 120)}"`);
353
+ return {
354
+ hud: raw.slice(0, 160) || "\u2014",
355
+ digest: raw || "\u2014",
356
+ latencyMs,
357
+ tokensIn: data.usage?.prompt_tokens || 0,
358
+ tokensOut: data.usage?.completion_tokens || 0,
359
+ model,
360
+ parsedOk: false,
361
+ };
362
+ }
363
+ } finally {
364
+ clearTimeout(timeout);
365
+ }
366
+ }
@@ -0,0 +1,172 @@
1
+ import type { FeedBuffer } from "../buffers/feed-buffer.js";
2
+ import type { SenseBuffer } from "../buffers/sense-buffer.js";
3
+ import type { ContextWindow, ContextRichness, RichnessPreset } from "../types.js";
4
+
5
+ /**
6
+ * Track recently sent image hashes to avoid sending duplicates to vision model.
7
+ * Uses simple content hash: length + first 1000 chars.
8
+ */
9
+ const recentlySentImageHashes = new Set<string>();
10
+ const MAX_IMAGE_HASH_CACHE = 20;
11
+ let imageHashCacheOrder: string[] = [];
12
+
13
+ /**
14
+ * Richness presets — control how much context goes into agent analysis and escalation.
15
+ *
16
+ * lean: For selective mode. Minimal context, fast + cheap.
17
+ * standard: For focus mode. Moderate detail.
18
+ * rich: Full context. Maximum detail for thorough agent analysis.
19
+ */
20
+ export const RICHNESS_PRESETS: Record<ContextRichness, RichnessPreset> = {
21
+ lean: { maxScreenEvents: 10, maxAudioEntries: 5, maxOcrChars: 400, maxTranscriptChars: 400, maxImages: 0 },
22
+ standard: { maxScreenEvents: 20, maxAudioEntries: 10, maxOcrChars: 1000, maxTranscriptChars: 800, maxImages: 1 },
23
+ rich: { maxScreenEvents: 50, maxAudioEntries: 30, maxOcrChars: 4000, maxTranscriptChars: 2000, maxImages: 2 },
24
+ } as const;
25
+
26
+ /** App name normalization map (consistent display names). */
27
+ const APP_NAMES: Record<string, string> = {
28
+ "idea": "IntelliJ IDEA",
29
+ "code": "VS Code",
30
+ "code - insiders": "VS Code Insiders",
31
+ "webstorm": "WebStorm",
32
+ "pycharm": "PyCharm",
33
+ "datagrip": "DataGrip",
34
+ "google chrome": "Chrome",
35
+ "firefox": "Firefox",
36
+ "safari": "Safari",
37
+ "telegram lite": "Telegram",
38
+ "telegram": "Telegram",
39
+ "iterm2": "iTerm",
40
+ "terminal": "Terminal",
41
+ "finder": "Finder",
42
+ "audio midi setup": "Audio MIDI Setup",
43
+ };
44
+
45
+ export function normalizeAppName(app: string): string {
46
+ return APP_NAMES[app.toLowerCase()] || app;
47
+ }
48
+
49
+ /** Short app names for overlay feed (compact display). */
50
+ const APP_SHORT_NAMES: Record<string, string> = {
51
+ "IntelliJ IDEA": "IDEA",
52
+ "IntelliJ IDEA Ultimate": "IDEA",
53
+ "idea": "IDEA",
54
+ "Google Chrome": "Chrome",
55
+ "Visual Studio Code": "Code",
56
+ "Code - Insiders": "Code",
57
+ "iTerm2": "iTerm",
58
+ "Terminal": "Term",
59
+ "Telegram": "TG",
60
+ "WebStorm": "WS",
61
+ "PyCharm": "PyCharm",
62
+ "DataGrip": "DG",
63
+ "Finder": "Finder",
64
+ };
65
+
66
+ export function shortAppName(app: string): string {
67
+ if (APP_SHORT_NAMES[app]) return APP_SHORT_NAMES[app];
68
+ const lower = app.toLowerCase();
69
+ for (const [key, value] of Object.entries(APP_SHORT_NAMES)) {
70
+ if (key.toLowerCase() === lower) return value;
71
+ }
72
+ return app;
73
+ }
74
+
75
+ /**
76
+ * Build a unified context window from in-process buffers.
77
+ * Replaces both relay's buildContextWindow() and bridge's ContextManager.
78
+ *
79
+ * No HTTP round-trips — direct access to feed and sense buffers.
80
+ */
81
+ export function buildContextWindow(
82
+ feedBuffer: FeedBuffer,
83
+ senseBuffer: SenseBuffer,
84
+ richness: ContextRichness = "standard",
85
+ maxAgeMs = 120_000,
86
+ ): ContextWindow {
87
+ const preset = RICHNESS_PRESETS[richness];
88
+ const cutoff = Date.now() - maxAgeMs;
89
+
90
+ // Audio: extract transcript text from feed items tagged as 'audio'
91
+ const audioItems = feedBuffer.queryBySource("audio", cutoff)
92
+ .slice(-preset.maxAudioEntries);
93
+
94
+ // Screen: get sense events within the time window
95
+ const screenEvents = senseBuffer.queryByTime(cutoff);
96
+
97
+ // Current app
98
+ const latestSense = screenEvents[screenEvents.length - 1];
99
+ const currentApp = latestSense?.meta.app || "unknown";
100
+
101
+ // Deduplicate OCR text (consecutive identical OCR is noise)
102
+ const dedupedScreen = [];
103
+ let lastOcr = "";
104
+ for (const e of screenEvents) {
105
+ if (e.ocr && e.ocr !== lastOcr) {
106
+ dedupedScreen.push(e);
107
+ lastOcr = e.ocr;
108
+ } else if (!e.ocr && e.type === "context") {
109
+ dedupedScreen.push(e);
110
+ }
111
+ }
112
+
113
+ // App transition timeline
114
+ const appHistory = senseBuffer.appHistory(cutoff);
115
+
116
+ // Limit to preset maximums, newest first for recency weighting
117
+ const sortedScreen = dedupedScreen.slice(-preset.maxScreenEvents).reverse();
118
+
119
+ // Compute newest event timestamp
120
+ const newestEventTs = Math.max(
121
+ audioItems[audioItems.length - 1]?.ts || 0,
122
+ screenEvents[screenEvents.length - 1]?.ts || 0
123
+ );
124
+
125
+ // Extract recent images for multimodal vision (with content-based deduplication)
126
+ let images: { data: string; app: string; ts: number }[] | undefined;
127
+ if (preset.maxImages > 0) {
128
+ const rawImages = senseBuffer.recentImages(preset.maxImages);
129
+ images = [];
130
+
131
+ for (const e of rawImages) {
132
+ if (!e.imageData) continue;
133
+
134
+ // Simple content hash: length + first 1000 chars
135
+ const hash = `${e.imageData.length}:${e.imageData.slice(0, 1000)}`;
136
+
137
+ // Skip if recently sent to vision model (avoid duplicate API calls)
138
+ if (recentlySentImageHashes.has(hash)) {
139
+ continue;
140
+ }
141
+
142
+ // Track this hash (LRU eviction)
143
+ recentlySentImageHashes.add(hash);
144
+ imageHashCacheOrder.push(hash);
145
+ while (imageHashCacheOrder.length > MAX_IMAGE_HASH_CACHE) {
146
+ const oldest = imageHashCacheOrder.shift()!;
147
+ recentlySentImageHashes.delete(oldest);
148
+ }
149
+
150
+ images.push({
151
+ data: e.imageData,
152
+ app: e.meta.app || "unknown",
153
+ ts: e.ts,
154
+ });
155
+ }
156
+
157
+ if (images.length === 0) images = undefined;
158
+ }
159
+
160
+ return {
161
+ audio: audioItems,
162
+ screen: sortedScreen,
163
+ images,
164
+ currentApp,
165
+ appHistory,
166
+ audioCount: audioItems.length,
167
+ screenCount: screenEvents.length,
168
+ windowMs: maxAgeMs,
169
+ newestEventTs,
170
+ preset,
171
+ };
172
+ }