@geravant/sinain 1.0.18 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. package/README.md +10 -1
  2. package/cli.js +176 -0
  3. package/index.ts +163 -1257
  4. package/install.js +12 -2
  5. package/launcher.js +622 -0
  6. package/openclaw.plugin.json +4 -0
  7. package/pack-prepare.js +48 -0
  8. package/package.json +26 -5
  9. package/sense_client/README.md +82 -0
  10. package/sense_client/__init__.py +1 -0
  11. package/sense_client/__main__.py +462 -0
  12. package/sense_client/app_detector.py +54 -0
  13. package/sense_client/app_detector_win.py +83 -0
  14. package/sense_client/capture.py +215 -0
  15. package/sense_client/capture_win.py +88 -0
  16. package/sense_client/change_detector.py +86 -0
  17. package/sense_client/config.py +64 -0
  18. package/sense_client/gate.py +145 -0
  19. package/sense_client/ocr.py +347 -0
  20. package/sense_client/privacy.py +65 -0
  21. package/sense_client/requirements.txt +13 -0
  22. package/sense_client/roi_extractor.py +84 -0
  23. package/sense_client/sender.py +173 -0
  24. package/sense_client/tests/__init__.py +0 -0
  25. package/sense_client/tests/test_stream1_optimizations.py +234 -0
  26. package/setup-overlay.js +82 -0
  27. package/sinain-agent/.env.example +17 -0
  28. package/sinain-agent/CLAUDE.md +80 -0
  29. package/sinain-agent/mcp-config.json +12 -0
  30. package/sinain-agent/run.sh +248 -0
  31. package/sinain-core/.env.example +93 -0
  32. package/sinain-core/package-lock.json +552 -0
  33. package/sinain-core/package.json +21 -0
  34. package/sinain-core/src/agent/analyzer.ts +366 -0
  35. package/sinain-core/src/agent/context-window.ts +172 -0
  36. package/sinain-core/src/agent/loop.ts +404 -0
  37. package/sinain-core/src/agent/situation-writer.ts +187 -0
  38. package/sinain-core/src/agent/traits.ts +520 -0
  39. package/sinain-core/src/audio/capture-spawner-macos.ts +44 -0
  40. package/sinain-core/src/audio/capture-spawner-win.ts +37 -0
  41. package/sinain-core/src/audio/capture-spawner.ts +14 -0
  42. package/sinain-core/src/audio/pipeline.ts +335 -0
  43. package/sinain-core/src/audio/transcription-local.ts +141 -0
  44. package/sinain-core/src/audio/transcription.ts +278 -0
  45. package/sinain-core/src/buffers/feed-buffer.ts +71 -0
  46. package/sinain-core/src/buffers/sense-buffer.ts +425 -0
  47. package/sinain-core/src/config.ts +245 -0
  48. package/sinain-core/src/escalation/escalation-slot.ts +136 -0
  49. package/sinain-core/src/escalation/escalator.ts +812 -0
  50. package/sinain-core/src/escalation/message-builder.ts +323 -0
  51. package/sinain-core/src/escalation/openclaw-ws.ts +726 -0
  52. package/sinain-core/src/escalation/scorer.ts +166 -0
  53. package/sinain-core/src/index.ts +507 -0
  54. package/sinain-core/src/learning/feedback-store.ts +253 -0
  55. package/sinain-core/src/learning/signal-collector.ts +218 -0
  56. package/sinain-core/src/log.ts +24 -0
  57. package/sinain-core/src/overlay/commands.ts +126 -0
  58. package/sinain-core/src/overlay/ws-handler.ts +267 -0
  59. package/sinain-core/src/privacy/index.ts +18 -0
  60. package/sinain-core/src/privacy/presets.ts +40 -0
  61. package/sinain-core/src/privacy/redact.ts +92 -0
  62. package/sinain-core/src/profiler.ts +181 -0
  63. package/sinain-core/src/recorder.ts +186 -0
  64. package/sinain-core/src/server.ts +417 -0
  65. package/sinain-core/src/trace/trace-store.ts +73 -0
  66. package/sinain-core/src/trace/tracer.ts +94 -0
  67. package/sinain-core/src/types.ts +427 -0
  68. package/sinain-core/src/util/dedup.ts +48 -0
  69. package/sinain-core/src/util/task-store.ts +84 -0
  70. package/sinain-core/tsconfig.json +18 -0
  71. package/sinain-knowledge/adapters/generic/adapter.ts +103 -0
  72. package/sinain-knowledge/adapters/interface.ts +72 -0
  73. package/sinain-knowledge/adapters/openclaw/adapter.ts +223 -0
  74. package/sinain-knowledge/curation/engine.ts +493 -0
  75. package/sinain-knowledge/curation/resilience.ts +336 -0
  76. package/sinain-knowledge/data/git-store.ts +312 -0
  77. package/sinain-knowledge/data/schema.ts +89 -0
  78. package/sinain-knowledge/data/snapshot.ts +226 -0
  79. package/sinain-knowledge/data/store.ts +488 -0
  80. package/sinain-knowledge/deploy/cli.ts +214 -0
  81. package/sinain-knowledge/deploy/manifest.ts +80 -0
  82. package/sinain-knowledge/protocol/bindings/generic.md +5 -0
  83. package/sinain-knowledge/protocol/bindings/openclaw.md +5 -0
  84. package/sinain-knowledge/protocol/heartbeat.md +62 -0
  85. package/sinain-knowledge/protocol/renderer.ts +56 -0
  86. package/sinain-knowledge/protocol/skill.md +335 -0
  87. package/sinain-mcp-server/index.ts +337 -0
  88. package/sinain-mcp-server/package.json +19 -0
  89. package/sinain-mcp-server/tsconfig.json +15 -0
@@ -0,0 +1,48 @@
1
+ #!/usr/bin/env node
2
+ // prepack: replace symlinks with real copies so npm pack bundles the files.
3
+ // postpack: restore symlinks.
4
+
5
+ import fs from "fs";
6
+ import path from "path";
7
+
8
+ const LINKS = ["sinain-core", "sinain-mcp-server", "sinain-agent", "sense_client"];
9
+ const PKG_DIR = path.dirname(new URL(import.meta.url).pathname);
10
+
11
+ const action = process.argv[2]; // "pre" or "post"
12
+
13
+ if (action === "pre") {
14
+ for (const name of LINKS) {
15
+ const linkPath = path.join(PKG_DIR, name);
16
+ if (!fs.existsSync(linkPath)) continue;
17
+ const stat = fs.lstatSync(linkPath);
18
+ if (!stat.isSymbolicLink()) continue;
19
+ const target = fs.realpathSync(linkPath);
20
+ fs.unlinkSync(linkPath);
21
+ copyDir(target, linkPath);
22
+ }
23
+ console.log("prepack: symlinks → copies");
24
+ } else if (action === "post") {
25
+ for (const name of LINKS) {
26
+ const linkPath = path.join(PKG_DIR, name);
27
+ if (!fs.existsSync(linkPath)) continue;
28
+ const stat = fs.lstatSync(linkPath);
29
+ if (stat.isSymbolicLink()) continue; // already a symlink
30
+ fs.rmSync(linkPath, { recursive: true, force: true });
31
+ fs.symlinkSync(`../${name}`, linkPath);
32
+ }
33
+ console.log("postpack: copies → symlinks");
34
+ }
35
+
36
+ function copyDir(src, dst) {
37
+ fs.mkdirSync(dst, { recursive: true });
38
+ for (const entry of fs.readdirSync(src, { withFileTypes: true })) {
39
+ if (["node_modules", "__pycache__", ".pytest_cache", "dist", ".env"].includes(entry.name)) continue;
40
+ const s = path.join(src, entry.name);
41
+ const d = path.join(dst, entry.name);
42
+ if (entry.isDirectory()) {
43
+ copyDir(s, d);
44
+ } else {
45
+ fs.copyFileSync(s, d);
46
+ }
47
+ }
48
+ }
package/package.json CHANGED
@@ -1,19 +1,40 @@
1
1
  {
2
2
  "name": "@geravant/sinain",
3
- "version": "1.0.18",
4
- "description": "sinain OpenClaw plugin — AI overlay for macOS",
3
+ "version": "1.1.0",
4
+ "description": "sinain — AI overlay system for macOS (npx @geravant/sinain start)",
5
5
  "type": "module",
6
6
  "bin": {
7
- "sinain": "./install.js"
7
+ "sinain": "./cli.js",
8
+ "sinain-knowledge": "./sinain-knowledge/deploy/cli.ts"
8
9
  },
9
10
  "scripts": {
10
- "postinstall": "node install.js"
11
+ "prepack": "node pack-prepare.js pre",
12
+ "postpack": "node pack-prepare.js post",
13
+ "postinstall": "node cli.js install --if-openclaw"
11
14
  },
12
15
  "files": [
16
+ "cli.js",
17
+ "launcher.js",
18
+ "setup-overlay.js",
19
+ "pack-prepare.js",
20
+ "install.js",
13
21
  "index.ts",
14
22
  "openclaw.plugin.json",
15
- "install.js",
16
23
  "sinain-memory",
24
+ "sinain-knowledge",
25
+ "sinain-core/src",
26
+ "sinain-core/package.json",
27
+ "sinain-core/package-lock.json",
28
+ "sinain-core/tsconfig.json",
29
+ "sinain-core/.env.example",
30
+ "sinain-mcp-server/index.ts",
31
+ "sinain-mcp-server/package.json",
32
+ "sinain-mcp-server/tsconfig.json",
33
+ "sinain-agent/run.sh",
34
+ "sinain-agent/mcp-config.json",
35
+ "sinain-agent/.env.example",
36
+ "sinain-agent/CLAUDE.md",
37
+ "sense_client",
17
38
  "HEARTBEAT.md",
18
39
  "SKILL.md"
19
40
  ],
@@ -0,0 +1,82 @@
1
+ # sense_client
2
+
3
+ Screen capture and change detection pipeline for SinainHUD. Captures the screen via ScreenCaptureKit, detects meaningful changes, runs OCR, applies privacy filters, and sends observations to sinain-core.
4
+
5
+ ## Architecture
6
+
7
+ ```
8
+ SCKCapture (ScreenCaptureKit)
9
+
10
+
11
+ ChangeDetector (SSIM diff)
12
+
13
+
14
+ ROIExtractor (contour → regions of interest)
15
+
16
+
17
+ OCR (Tesseract via pytesseract)
18
+
19
+
20
+ Privacy filter (strip <private> tags + auto-redact secrets)
21
+
22
+
23
+ DecisionGate (cooldown + significance check)
24
+
25
+
26
+ SenseSender ──POST──► sinain-core :9500/sense
27
+ ```
28
+
29
+ ### Capture backends
30
+
31
+ | Backend | API | Notes |
32
+ |---------|-----|-------|
33
+ | `SCKCapture` (default) | ScreenCaptureKit | macOS 12.3+, async zero-copy, camera-safe |
34
+ | `ScreenKitCapture` | IPC file read | Reads `~/.sinain/capture/frame.jpg` from overlay |
35
+ | `ScreenCapture` | `CGDisplayCreateImage` | Legacy fallback, deprecated on macOS 15 |
36
+
37
+ ## Requirements
38
+
39
+ - macOS 12.3+ (for ScreenCaptureKit)
40
+ - Python 3.10+
41
+ - Tesseract OCR: `brew install tesseract`
42
+
43
+ ## Setup
44
+
45
+ ```bash
46
+ cd sense_client
47
+ pip install -r requirements.txt
48
+ ```
49
+
50
+ ## Running
51
+
52
+ ```bash
53
+ # From the sinain-hud repo root:
54
+ python -m sense_client
55
+
56
+ # With a custom config file:
57
+ python -m sense_client --config path/to/config.json
58
+ ```
59
+
60
+ On first run, macOS will prompt for Screen Recording permission.
61
+
62
+ ## Configuration
63
+
64
+ The pipeline reads from a JSON config file (passed via `--config`). All fields are optional — defaults are used for anything unspecified.
65
+
66
+ | Section | Key | Default | Description |
67
+ |---------|-----|---------|-------------|
68
+ | `capture` | `mode` | `screen` | Capture mode |
69
+ | `capture` | `target` | `0` | Display index |
70
+ | `capture` | `fps` | `2.0` | Frames per second |
71
+ | `capture` | `scale` | `0.5` | Downscale factor |
72
+ | `detection` | `ssimThreshold` | `0.92` | SSIM score below which a frame is "changed" |
73
+ | `detection` | `cooldownMs` | `5000` | Min ms between change events |
74
+ | `gate` | `minOcrChars` | `20` | Minimum OCR text length to pass gate |
75
+ | `gate` | `cooldownMs` | `5000` | Min ms between gated events |
76
+ | `relay` | `url` | `http://localhost:9500` | sinain-core endpoint |
77
+
78
+ ## Privacy
79
+
80
+ - **`<private>` tags**: any on-screen text wrapped in `<private>...</private>` is stripped before sending
81
+ - **Auto-redaction**: credit card numbers, API keys, bearer tokens, AWS keys, and passwords are automatically redacted from OCR output
82
+ - Server-side stripping provides an additional layer via the sinain-hud plugin
@@ -0,0 +1 @@
1
+ """Sinain sense_client — screen capture preprocessing pipeline."""
@@ -0,0 +1,462 @@
1
+ """Entry point: python -m sense_client"""
2
+
3
+ import io
4
+ import sys
5
+ import traceback
6
+
7
+ # Force UTF-8 stdout/stderr on Windows to prevent UnicodeEncodeError crashes
8
+ # when window titles contain non-cp1251 characters (e.g. Telegram's \u200e).
9
+ if sys.platform == "win32":
10
+ sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding="utf-8", errors="replace", line_buffering=True)
11
+ sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding="utf-8", errors="replace", line_buffering=True)
12
+
13
+ import argparse
14
+ import concurrent.futures
15
+ import json
16
+ import os
17
+ import time
18
+
19
+ import numpy as np
20
+ import requests as _requests
21
+ from skimage.metrics import structural_similarity
22
+
23
+ # Platform-specific memory reporting
24
+ if sys.platform != "win32":
25
+ import resource
26
+
27
+ from .capture import ScreenCapture, create_capture
28
+ from .change_detector import ChangeDetector
29
+ from .roi_extractor import ROIExtractor
30
+ from .ocr import OCRResult, create_ocr
31
+ from .gate import DecisionGate, SenseObservation
32
+ from .sender import SenseSender, package_full_frame, package_roi
33
+ from .app_detector import AppDetector
34
+ from .config import load_config
35
+ from .privacy import apply_privacy
36
+
37
+ if sys.platform == "win32":
38
+ CONTROL_FILE = os.path.join(os.environ.get("TEMP", "C:\\Temp"), "sinain-sense-control.json")
39
+ else:
40
+ CONTROL_FILE = "/tmp/sinain-sense-control.json"
41
+
42
+
43
+ def log(msg: str):
44
+ print(f"[sense] {msg}", flush=True)
45
+
46
+
47
+ def _gate_reason(gate, change, ocr, app_changed, window_changed):
48
+ """Diagnose why the gate dropped an event."""
49
+ now = time.time() * 1000
50
+ ocr_len = len(ocr.text) if ocr.text else 0
51
+
52
+ # Check cooldown
53
+ recent_app = (now - gate.last_app_change_ts) < 10000
54
+ effective_cd = gate.adaptive_cooldown_ms if recent_app else gate.cooldown_ms
55
+ elapsed = now - gate.last_send_ts
56
+ if elapsed < effective_cd:
57
+ return f"cooldown ({elapsed:.0f}ms < {effective_cd}ms)"
58
+ if change is None:
59
+ return "no_change"
60
+ if ocr_len < gate.min_ocr_chars:
61
+ return f"too_few_chars ({ocr_len} < {gate.min_ocr_chars})"
62
+ if ocr.text and gate._is_duplicate(ocr.text):
63
+ return "duplicate (similar to recent text)"
64
+ if ocr.text and not gate._ocr_quality_ok(ocr.text):
65
+ return "bad_quality (ocr noise)"
66
+ if change.ssim_score >= gate.major_change_threshold:
67
+ return f"no_visual (ssim={change.ssim_score:.3f} >= {gate.major_change_threshold})"
68
+ return f"unknown (ocr={ocr_len}, ssim={change.ssim_score:.3f})"
69
+
70
+
71
+ def _run_ocr(ocr, ocr_pool, rois) -> OCRResult:
72
+ """Run OCR on extracted ROIs (parallel if multiple). Returns best result."""
73
+ if not rois:
74
+ return OCRResult(text="", confidence=0, word_count=0)
75
+ if len(rois) == 1:
76
+ return ocr.extract(rois[0].image)
77
+ futures = [ocr_pool.submit(ocr.extract, roi.image) for roi in rois]
78
+ results = [f.result() for f in concurrent.futures.as_completed(futures)]
79
+ return max(results, key=lambda r: len(r.text))
80
+
81
+
82
+ def is_enabled(control_path: str) -> bool:
83
+ """Check control file to see if capture is enabled."""
84
+ try:
85
+ with open(control_path) as f:
86
+ data = json.load(f)
87
+ return data.get("enabled", True)
88
+ except (FileNotFoundError, json.JSONDecodeError):
89
+ return True # default enabled if no control file
90
+
91
+
92
+ def main():
93
+ parser = argparse.ArgumentParser(description="Sinain screen capture pipeline")
94
+ parser.add_argument("--config", default=None, help="Path to config JSON")
95
+ parser.add_argument("--control", default=CONTROL_FILE, help="Path to control file")
96
+ args = parser.parse_args()
97
+
98
+ config = load_config(args.config)
99
+
100
+ log("initializing capture...")
101
+ capture = create_capture(
102
+ mode=config["capture"]["mode"],
103
+ target=config["capture"]["target"],
104
+ fps=config["capture"]["fps"],
105
+ scale=config["capture"]["scale"],
106
+ )
107
+ detector = ChangeDetector(
108
+ threshold=config["detection"]["ssimThreshold"],
109
+ min_area=config["detection"]["minArea"],
110
+ )
111
+ extractor = ROIExtractor(
112
+ padding=config["detection"]["roiPadding"],
113
+ )
114
+ log("initializing OCR...")
115
+ ocr = create_ocr(config)
116
+ gate = DecisionGate(
117
+ min_ocr_chars=config["gate"]["minOcrChars"],
118
+ major_change_threshold=config["gate"]["majorChangeThreshold"],
119
+ cooldown_ms=config["gate"]["cooldownMs"],
120
+ adaptive_cooldown_ms=config["gate"].get("adaptiveCooldownMs", 2000),
121
+ context_cooldown_ms=config["gate"].get("contextCooldownMs", 10000),
122
+ )
123
+ sender = SenseSender(
124
+ url=config["relay"]["url"],
125
+ max_image_kb=config["relay"]["maxImageKB"],
126
+ send_thumbnails=config["relay"]["sendThumbnails"],
127
+ )
128
+ app_detector = AppDetector()
129
+ ocr_pool = concurrent.futures.ThreadPoolExecutor(max_workers=4)
130
+
131
+ # Adaptive SSIM threshold state
132
+ ssim_stable_threshold = config["detection"]["ssimThreshold"] # 0.92
133
+ ssim_sensitive_threshold = 0.85
134
+ last_app_change_time = 0.0
135
+
136
+ opt = config.get("optimization", {})
137
+ use_backpressure = opt.get("backpressure", False)
138
+ use_text_dedup = opt.get("textDedup", False)
139
+ use_shadow = opt.get("shadowValidation", False)
140
+
141
+ # Privacy matrix env vars (gate what leaves this process toward sinain-core/openrouter)
142
+ _privacy_ocr_openrouter = os.environ.get("PRIVACY_OCR_OPENROUTER", "full")
143
+ _privacy_images_openrouter = os.environ.get("PRIVACY_IMAGES_OPENROUTER", "full")
144
+
145
+ log("sense_client started")
146
+ log(f" relay: {config['relay']['url']}")
147
+ log(f" fps: {config['capture']['fps']}, scale: {config['capture']['scale']}")
148
+ log(f" ocr backend: {config['ocr'].get('backend', 'auto')}")
149
+ log(f" privacy: ocr_openrouter={_privacy_ocr_openrouter} images_openrouter={_privacy_images_openrouter}")
150
+ log(f" control: {args.control}")
151
+ if use_backpressure:
152
+ log(" optimization: backpressure ON")
153
+ if use_text_dedup:
154
+ log(" optimization: textDedup ON")
155
+ if use_shadow:
156
+ log(" optimization: shadowValidation ON")
157
+
158
+ events_sent = 0
159
+ events_failed = 0
160
+ events_gated = 0
161
+ ocr_errors = 0
162
+ ocr_skipped_backpressure = 0
163
+ shadow_divergences = 0
164
+ last_stats = time.time()
165
+ start_time = time.time()
166
+ event_latencies: list[float] = []
167
+ detect_times: list[float] = []
168
+ ocr_times: list[float] = []
169
+ send_times: list[float] = []
170
+
171
+ # Backpressure state: latest changed frame waiting for gate
172
+ pending_frame = None
173
+ pending_rois = None
174
+ pending_change = None
175
+
176
+ # Diagnostic state
177
+ _logged_first_ssim = False
178
+ _logged_first_frame = False
179
+ _last_heartbeat = time.time()
180
+
181
+ for frame, ts in capture.capture_loop():
182
+ # Check control file (pause/resume)
183
+ if not is_enabled(args.control):
184
+ time.sleep(1)
185
+ continue
186
+
187
+ # First-frame log
188
+ if not _logged_first_frame:
189
+ log(f"first frame: {frame.size[0]}x{frame.size[1]} (scale={config['capture']['scale']})")
190
+ _logged_first_frame = True
191
+
192
+ # 1. Check app/window change
193
+ app_changed, window_changed, app_name, window_title = app_detector.detect_change()
194
+
195
+ # Adaptive SSIM threshold
196
+ now_sec = time.time()
197
+ if app_changed:
198
+ last_app_change_time = now_sec
199
+ detector.set_threshold(ssim_sensitive_threshold)
200
+ log(f"SSIM threshold lowered to {ssim_sensitive_threshold} (app change)")
201
+ elif now_sec - last_app_change_time > 10.0 and detector.threshold != ssim_stable_threshold:
202
+ detector.set_threshold(ssim_stable_threshold)
203
+ log(f"SSIM threshold restored to {ssim_stable_threshold} (stable)")
204
+
205
+ # 2. Detect frame change
206
+ t0 = time.time()
207
+ change = detector.detect(frame)
208
+ detect_times.append((time.time() - t0) * 1000)
209
+ if len(detect_times) > 500: detect_times.clear()
210
+ if change is None and not app_changed and not window_changed:
211
+ # Log first SSIM so we can see the range
212
+ if not _logged_first_ssim and detector.prev_frame is not None:
213
+ gray = np.array(frame.convert("L"))
214
+ score = structural_similarity(detector.prev_frame, gray)
215
+ log(f"first ssim sample: {score:.4f} (threshold={detector.threshold})")
216
+ _logged_first_ssim = True
217
+ # Periodic heartbeat
218
+ if time.time() - _last_heartbeat >= 30:
219
+ log(f"heartbeat: {capture.stats_ok} frames, {events_sent} sent, "
220
+ f"{events_gated} gated, threshold={detector.threshold}")
221
+ _last_heartbeat = time.time()
222
+ continue
223
+
224
+ if change:
225
+ log(f"change detected: ssim={change.ssim_score:.4f} contours={len(change.contours)}")
226
+
227
+ # 3. Extract ROIs + stash as pending
228
+ rois = []
229
+ if change:
230
+ rois = extractor.extract(frame, change.contours)
231
+ if rois:
232
+ roi_sizes = [f"{r.bbox[2]}x{r.bbox[3]}" for r in rois]
233
+ log(f"rois: {len(rois)} regions ({', '.join(roi_sizes)})")
234
+ else:
235
+ log(f"rois: 0 (contours={len(change.contours)} all too small)")
236
+ if use_backpressure:
237
+ pending_frame = frame
238
+ pending_rois = rois
239
+ pending_change = change
240
+
241
+ # 4. Backpressure: check if gate is ready before running OCR
242
+ if use_backpressure:
243
+ if not gate.is_ready(app_changed, window_changed):
244
+ ocr_skipped_backpressure += 1
245
+ events_gated += 1
246
+ continue
247
+ # Gate is ready — OCR the latest pending frame
248
+ use_frame = pending_frame or frame
249
+ use_rois = pending_rois or rois
250
+ use_change = pending_change or change
251
+ else:
252
+ use_frame = frame
253
+ use_rois = rois
254
+ use_change = change
255
+
256
+ # 5. OCR on ROIs
257
+ t0 = time.time()
258
+ ocr_result = OCRResult(text="", confidence=0, word_count=0)
259
+ try:
260
+ ocr_result = _run_ocr(ocr, ocr_pool, use_rois)
261
+ except Exception as e:
262
+ ocr_errors += 1
263
+ log(f"OCR error: {e}")
264
+ ocr_times.append((time.time() - t0) * 1000)
265
+ if len(ocr_times) > 500: ocr_times.clear()
266
+
267
+ if ocr_result.text:
268
+ log(f"ocr: {len(ocr_result.text)} chars, {ocr_result.word_count} words")
269
+ else:
270
+ log(f"ocr: empty (rois={len(use_rois)})")
271
+
272
+ # Shadow validation: run baseline OCR on original frame for comparison
273
+ if use_shadow and use_backpressure and rois:
274
+ try:
275
+ baseline_result = _run_ocr(ocr, ocr_pool, rois)
276
+ if baseline_result.text != ocr_result.text:
277
+ shadow_divergences += 1
278
+ log(f"SHADOW DIVERGENCE: baseline={len(baseline_result.text)}chars "
279
+ f"optimized={len(ocr_result.text)}chars")
280
+ # Use baseline for actual sending (safety)
281
+ ocr_result = baseline_result
282
+ except Exception as e:
283
+ log(f"Shadow OCR error: {e}")
284
+
285
+ # Clear pending state after OCR
286
+ if use_backpressure:
287
+ pending_frame = pending_rois = pending_change = None
288
+
289
+ # 5b. Privacy filter — strip <private> tags and redact secrets
290
+ if ocr_result.text:
291
+ ocr_result = OCRResult(
292
+ text=apply_privacy(ocr_result.text),
293
+ confidence=ocr_result.confidence,
294
+ word_count=ocr_result.word_count,
295
+ )
296
+
297
+ # 5c. Privacy matrix: apply OCR gating for openrouter destination
298
+ if ocr_result.text and _privacy_ocr_openrouter != "full":
299
+ if _privacy_ocr_openrouter == "none":
300
+ ocr_result = OCRResult(text="", confidence=0, word_count=0)
301
+ elif _privacy_ocr_openrouter == "summary":
302
+ ocr_result = OCRResult(
303
+ text=f"[SCREEN: {len(ocr_result.text)} chars]",
304
+ confidence=ocr_result.confidence,
305
+ word_count=1,
306
+ )
307
+ # "redacted" is already handled by apply_privacy above
308
+
309
+ # 6. Decision gate
310
+ event = gate.classify(
311
+ change=use_change,
312
+ ocr=ocr_result,
313
+ app_changed=app_changed,
314
+ window_changed=window_changed,
315
+ )
316
+ if event is None:
317
+ reason = _gate_reason(gate, use_change, ocr_result, app_changed, window_changed)
318
+ log(f"gate dropped: {reason}")
319
+ events_gated += 1
320
+ continue
321
+
322
+ # 7. Package and send
323
+ event.meta.app = app_name
324
+ event.meta.window_title = window_title
325
+ event.meta.screen = config["capture"]["target"]
326
+
327
+ # 7b. Auto-populate structured observation from available context
328
+ facts = []
329
+ if app_name:
330
+ facts.append(f"app: {app_name}")
331
+ if window_title:
332
+ facts.append(f"window: {window_title}")
333
+ if use_change and use_change.ssim_score:
334
+ facts.append(f"ssim: {use_change.ssim_score:.3f}")
335
+ if ocr_result.text:
336
+ # Extract first meaningful line as subtitle
337
+ first_line = ocr_result.text.split("\n")[0][:120]
338
+ facts.append(f"ocr: {first_line}")
339
+
340
+ title = f"{event.type} in {app_name}" if app_name else f"{event.type} event"
341
+ subtitle = window_title[:80] if window_title else ""
342
+ event.observation = SenseObservation(
343
+ title=title, subtitle=subtitle, facts=facts,
344
+ )
345
+
346
+ # Send small thumbnail for ALL event types (agent uses vision)
347
+ # Privacy matrix: gate image sending based on PRIVACY_IMAGES_OPENROUTER
348
+ if _privacy_images_openrouter == "none":
349
+ pass # Skip image packaging entirely
350
+ elif event.type == "context":
351
+ event.roi = package_full_frame(use_frame)
352
+ elif use_rois:
353
+ event.roi = package_roi(use_rois[0])
354
+ else:
355
+ # Fallback: send full frame thumbnail for text-only events
356
+ event.roi = package_full_frame(use_frame)
357
+ # Diff images removed — agent doesn't use binary diff masks
358
+
359
+ t0 = time.time()
360
+ ok = sender.send(event)
361
+ send_times.append((time.time() - t0) * 1000)
362
+ if len(send_times) > 500: send_times.clear()
363
+ if ok:
364
+ events_sent += 1
365
+ send_latency = time.time() * 1000 - event.ts
366
+ event_latencies.append(send_latency)
367
+ if len(event_latencies) > 500: event_latencies.clear()
368
+ ssim = f"{use_change.ssim_score:.3f}" if use_change else "n/a"
369
+ ctx = f"app={app_name}"
370
+ if window_title:
371
+ ctx += f", win={window_title[:40]}"
372
+ log(f"-> {event.type} sent ({ctx}, ssim={ssim}, latency={send_latency:.0f}ms)")
373
+ else:
374
+ events_failed += 1
375
+ log(f"-> {event.type} FAILED to send")
376
+
377
+ # Periodic pipeline stats
378
+ now = time.time()
379
+ if now - last_stats >= 60:
380
+ latency_info = ""
381
+ if event_latencies:
382
+ sorted_lat = sorted(event_latencies)
383
+ p50 = sorted_lat[len(sorted_lat) // 2]
384
+ p95 = sorted_lat[int(len(sorted_lat) * 0.95)]
385
+ latency_info = f" latency_p50={p50:.0f}ms p95={p95:.0f}ms"
386
+ event_latencies.clear()
387
+
388
+ avg_detect = sum(detect_times) / len(detect_times) if detect_times else 0
389
+ avg_ocr = sum(ocr_times) / len(ocr_times) if ocr_times else 0
390
+ avg_send = sum(send_times) / len(send_times) if send_times else 0
391
+
392
+ bp_info = ""
393
+ if use_backpressure:
394
+ bp_info = f" ocrSkipped={ocr_skipped_backpressure}"
395
+ shadow_info = ""
396
+ if use_shadow:
397
+ shadow_info = f" shadowDiv={shadow_divergences}"
398
+
399
+ log(f"stats: captures={capture.stats_ok}ok/{capture.stats_fail}fail"
400
+ f" events={events_sent}sent/{events_failed}fail/{events_gated}gated"
401
+ f"{bp_info}{shadow_info}{latency_info}"
402
+ f" detect={avg_detect:.1f}ms ocr={avg_ocr:.1f}ms send={avg_send:.1f}ms")
403
+
404
+ # POST profiling snapshot to sinain-core
405
+ if sys.platform == "win32":
406
+ try:
407
+ import psutil
408
+ rss_mb = round(psutil.Process().memory_info().rss / 1048576, 1)
409
+ except Exception:
410
+ rss_mb = 0.0
411
+ else:
412
+ usage = resource.getrusage(resource.RUSAGE_SELF)
413
+ rss_mb = round(usage.ru_maxrss / 1048576, 1)
414
+ snapshot = {
415
+ "rssMb": rss_mb,
416
+ "uptimeS": round(now - start_time),
417
+ "ts": int(now * 1000),
418
+ "extra": {
419
+ "capturesOk": capture.stats_ok,
420
+ "capturesFail": capture.stats_fail,
421
+ "eventsSent": events_sent,
422
+ "eventsFailed": events_failed,
423
+ "eventsGated": events_gated,
424
+ "ocrErrors": ocr_errors,
425
+ "ocrSkippedBackpressure": ocr_skipped_backpressure,
426
+ "shadowDivergences": shadow_divergences,
427
+ "detectAvgMs": round(avg_detect, 1),
428
+ "ocrAvgMs": round(avg_ocr, 1),
429
+ "sendAvgMs": round(avg_send, 1),
430
+ },
431
+ }
432
+ try:
433
+ _requests.post(
434
+ f"{config['relay']['url']}/profiling/sense",
435
+ json=snapshot, timeout=2,
436
+ )
437
+ except Exception:
438
+ pass
439
+
440
+ detect_times.clear()
441
+ ocr_times.clear()
442
+ send_times.clear()
443
+ last_stats = now
444
+
445
+
446
+ if __name__ == "__main__":
447
+ try:
448
+ main()
449
+ except Exception:
450
+ tb = traceback.format_exc()
451
+ print(f"[sense] CRASH:\n{tb}", file=sys.stderr, flush=True)
452
+ # Report crash to sinain-core so it's visible in health
453
+ try:
454
+ import requests as _req
455
+ _req.post(
456
+ "http://localhost:9500/profiling/sense",
457
+ json={"crash": tb, "ts": int(__import__("time").time() * 1000)},
458
+ timeout=2,
459
+ )
460
+ except Exception:
461
+ pass
462
+ raise