@geravant/sinain 1.0.19 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +10 -1
- package/cli.js +176 -0
- package/install.js +11 -2
- package/launcher.js +622 -0
- package/openclaw.plugin.json +4 -0
- package/pack-prepare.js +48 -0
- package/package.json +24 -5
- package/sense_client/README.md +82 -0
- package/sense_client/__init__.py +1 -0
- package/sense_client/__main__.py +462 -0
- package/sense_client/app_detector.py +54 -0
- package/sense_client/app_detector_win.py +83 -0
- package/sense_client/capture.py +215 -0
- package/sense_client/capture_win.py +88 -0
- package/sense_client/change_detector.py +86 -0
- package/sense_client/config.py +64 -0
- package/sense_client/gate.py +145 -0
- package/sense_client/ocr.py +347 -0
- package/sense_client/privacy.py +65 -0
- package/sense_client/requirements.txt +13 -0
- package/sense_client/roi_extractor.py +84 -0
- package/sense_client/sender.py +173 -0
- package/sense_client/tests/__init__.py +0 -0
- package/sense_client/tests/test_stream1_optimizations.py +234 -0
- package/setup-overlay.js +82 -0
- package/sinain-agent/.env.example +17 -0
- package/sinain-agent/CLAUDE.md +80 -0
- package/sinain-agent/mcp-config.json +12 -0
- package/sinain-agent/run.sh +248 -0
- package/sinain-core/.env.example +93 -0
- package/sinain-core/package-lock.json +552 -0
- package/sinain-core/package.json +21 -0
- package/sinain-core/src/agent/analyzer.ts +366 -0
- package/sinain-core/src/agent/context-window.ts +172 -0
- package/sinain-core/src/agent/loop.ts +404 -0
- package/sinain-core/src/agent/situation-writer.ts +187 -0
- package/sinain-core/src/agent/traits.ts +520 -0
- package/sinain-core/src/audio/capture-spawner-macos.ts +44 -0
- package/sinain-core/src/audio/capture-spawner-win.ts +37 -0
- package/sinain-core/src/audio/capture-spawner.ts +14 -0
- package/sinain-core/src/audio/pipeline.ts +335 -0
- package/sinain-core/src/audio/transcription-local.ts +141 -0
- package/sinain-core/src/audio/transcription.ts +278 -0
- package/sinain-core/src/buffers/feed-buffer.ts +71 -0
- package/sinain-core/src/buffers/sense-buffer.ts +425 -0
- package/sinain-core/src/config.ts +245 -0
- package/sinain-core/src/escalation/escalation-slot.ts +136 -0
- package/sinain-core/src/escalation/escalator.ts +812 -0
- package/sinain-core/src/escalation/message-builder.ts +323 -0
- package/sinain-core/src/escalation/openclaw-ws.ts +726 -0
- package/sinain-core/src/escalation/scorer.ts +166 -0
- package/sinain-core/src/index.ts +507 -0
- package/sinain-core/src/learning/feedback-store.ts +253 -0
- package/sinain-core/src/learning/signal-collector.ts +218 -0
- package/sinain-core/src/log.ts +24 -0
- package/sinain-core/src/overlay/commands.ts +126 -0
- package/sinain-core/src/overlay/ws-handler.ts +267 -0
- package/sinain-core/src/privacy/index.ts +18 -0
- package/sinain-core/src/privacy/presets.ts +40 -0
- package/sinain-core/src/privacy/redact.ts +92 -0
- package/sinain-core/src/profiler.ts +181 -0
- package/sinain-core/src/recorder.ts +186 -0
- package/sinain-core/src/server.ts +417 -0
- package/sinain-core/src/trace/trace-store.ts +73 -0
- package/sinain-core/src/trace/tracer.ts +94 -0
- package/sinain-core/src/types.ts +427 -0
- package/sinain-core/src/util/dedup.ts +48 -0
- package/sinain-core/src/util/task-store.ts +84 -0
- package/sinain-core/tsconfig.json +18 -0
- package/sinain-knowledge/data/git-store.ts +2 -0
- package/sinain-mcp-server/index.ts +337 -0
- package/sinain-mcp-server/package.json +19 -0
- package/sinain-mcp-server/tsconfig.json +15 -0
package/package.json
CHANGED
|
@@ -1,21 +1,40 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@geravant/sinain",
|
|
3
|
-
"version": "1.0
|
|
4
|
-
"description": "sinain
|
|
3
|
+
"version": "1.1.0",
|
|
4
|
+
"description": "sinain — AI overlay system for macOS (npx @geravant/sinain start)",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|
|
7
|
-
"sinain": "./
|
|
7
|
+
"sinain": "./cli.js",
|
|
8
8
|
"sinain-knowledge": "./sinain-knowledge/deploy/cli.ts"
|
|
9
9
|
},
|
|
10
10
|
"scripts": {
|
|
11
|
-
"
|
|
11
|
+
"prepack": "node pack-prepare.js pre",
|
|
12
|
+
"postpack": "node pack-prepare.js post",
|
|
13
|
+
"postinstall": "node cli.js install --if-openclaw"
|
|
12
14
|
},
|
|
13
15
|
"files": [
|
|
16
|
+
"cli.js",
|
|
17
|
+
"launcher.js",
|
|
18
|
+
"setup-overlay.js",
|
|
19
|
+
"pack-prepare.js",
|
|
20
|
+
"install.js",
|
|
14
21
|
"index.ts",
|
|
15
22
|
"openclaw.plugin.json",
|
|
16
|
-
"install.js",
|
|
17
23
|
"sinain-memory",
|
|
18
24
|
"sinain-knowledge",
|
|
25
|
+
"sinain-core/src",
|
|
26
|
+
"sinain-core/package.json",
|
|
27
|
+
"sinain-core/package-lock.json",
|
|
28
|
+
"sinain-core/tsconfig.json",
|
|
29
|
+
"sinain-core/.env.example",
|
|
30
|
+
"sinain-mcp-server/index.ts",
|
|
31
|
+
"sinain-mcp-server/package.json",
|
|
32
|
+
"sinain-mcp-server/tsconfig.json",
|
|
33
|
+
"sinain-agent/run.sh",
|
|
34
|
+
"sinain-agent/mcp-config.json",
|
|
35
|
+
"sinain-agent/.env.example",
|
|
36
|
+
"sinain-agent/CLAUDE.md",
|
|
37
|
+
"sense_client",
|
|
19
38
|
"HEARTBEAT.md",
|
|
20
39
|
"SKILL.md"
|
|
21
40
|
],
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
# sense_client
|
|
2
|
+
|
|
3
|
+
Screen capture and change detection pipeline for SinainHUD. Captures the screen via ScreenCaptureKit, detects meaningful changes, runs OCR, applies privacy filters, and sends observations to sinain-core.
|
|
4
|
+
|
|
5
|
+
## Architecture
|
|
6
|
+
|
|
7
|
+
```
|
|
8
|
+
SCKCapture (ScreenCaptureKit)
|
|
9
|
+
│
|
|
10
|
+
▼
|
|
11
|
+
ChangeDetector (SSIM diff)
|
|
12
|
+
│
|
|
13
|
+
▼
|
|
14
|
+
ROIExtractor (contour → regions of interest)
|
|
15
|
+
│
|
|
16
|
+
▼
|
|
17
|
+
OCR (Tesseract via pytesseract)
|
|
18
|
+
│
|
|
19
|
+
▼
|
|
20
|
+
Privacy filter (strip <private> tags + auto-redact secrets)
|
|
21
|
+
│
|
|
22
|
+
▼
|
|
23
|
+
DecisionGate (cooldown + significance check)
|
|
24
|
+
│
|
|
25
|
+
▼
|
|
26
|
+
SenseSender ──POST──► sinain-core :9500/sense
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
### Capture backends
|
|
30
|
+
|
|
31
|
+
| Backend | API | Notes |
|
|
32
|
+
|---------|-----|-------|
|
|
33
|
+
| `SCKCapture` (default) | ScreenCaptureKit | macOS 12.3+, async zero-copy, camera-safe |
|
|
34
|
+
| `ScreenKitCapture` | IPC file read | Reads `~/.sinain/capture/frame.jpg` from overlay |
|
|
35
|
+
| `ScreenCapture` | `CGDisplayCreateImage` | Legacy fallback, deprecated on macOS 15 |
|
|
36
|
+
|
|
37
|
+
## Requirements
|
|
38
|
+
|
|
39
|
+
- macOS 12.3+ (for ScreenCaptureKit)
|
|
40
|
+
- Python 3.10+
|
|
41
|
+
- Tesseract OCR: `brew install tesseract`
|
|
42
|
+
|
|
43
|
+
## Setup
|
|
44
|
+
|
|
45
|
+
```bash
|
|
46
|
+
cd sense_client
|
|
47
|
+
pip install -r requirements.txt
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
## Running
|
|
51
|
+
|
|
52
|
+
```bash
|
|
53
|
+
# From the sinain-hud repo root:
|
|
54
|
+
python -m sense_client
|
|
55
|
+
|
|
56
|
+
# With a custom config file:
|
|
57
|
+
python -m sense_client --config path/to/config.json
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
On first run, macOS will prompt for Screen Recording permission.
|
|
61
|
+
|
|
62
|
+
## Configuration
|
|
63
|
+
|
|
64
|
+
The pipeline reads from a JSON config file (passed via `--config`). All fields are optional — defaults are used for anything unspecified.
|
|
65
|
+
|
|
66
|
+
| Section | Key | Default | Description |
|
|
67
|
+
|---------|-----|---------|-------------|
|
|
68
|
+
| `capture` | `mode` | `screen` | Capture mode |
|
|
69
|
+
| `capture` | `target` | `0` | Display index |
|
|
70
|
+
| `capture` | `fps` | `2.0` | Frames per second |
|
|
71
|
+
| `capture` | `scale` | `0.5` | Downscale factor |
|
|
72
|
+
| `detection` | `ssimThreshold` | `0.92` | SSIM score below which a frame is "changed" |
|
|
73
|
+
| `detection` | `cooldownMs` | `5000` | Min ms between change events |
|
|
74
|
+
| `gate` | `minOcrChars` | `20` | Minimum OCR text length to pass gate |
|
|
75
|
+
| `gate` | `cooldownMs` | `5000` | Min ms between gated events |
|
|
76
|
+
| `relay` | `url` | `http://localhost:9500` | sinain-core endpoint |
|
|
77
|
+
|
|
78
|
+
## Privacy
|
|
79
|
+
|
|
80
|
+
- **`<private>` tags**: any on-screen text wrapped in `<private>...</private>` is stripped before sending
|
|
81
|
+
- **Auto-redaction**: credit card numbers, API keys, bearer tokens, AWS keys, and passwords are automatically redacted from OCR output
|
|
82
|
+
- Server-side stripping provides an additional layer via the sinain-hud plugin
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Sinain sense_client — screen capture preprocessing pipeline."""
|
|
@@ -0,0 +1,462 @@
|
|
|
1
|
+
"""Entry point: python -m sense_client"""
|
|
2
|
+
|
|
3
|
+
import io
|
|
4
|
+
import sys
|
|
5
|
+
import traceback
|
|
6
|
+
|
|
7
|
+
# Force UTF-8 stdout/stderr on Windows to prevent UnicodeEncodeError crashes
|
|
8
|
+
# when window titles contain non-cp1251 characters (e.g. Telegram's \u200e).
|
|
9
|
+
if sys.platform == "win32":
|
|
10
|
+
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding="utf-8", errors="replace", line_buffering=True)
|
|
11
|
+
sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding="utf-8", errors="replace", line_buffering=True)
|
|
12
|
+
|
|
13
|
+
import argparse
|
|
14
|
+
import concurrent.futures
|
|
15
|
+
import json
|
|
16
|
+
import os
|
|
17
|
+
import time
|
|
18
|
+
|
|
19
|
+
import numpy as np
|
|
20
|
+
import requests as _requests
|
|
21
|
+
from skimage.metrics import structural_similarity
|
|
22
|
+
|
|
23
|
+
# Platform-specific memory reporting
|
|
24
|
+
if sys.platform != "win32":
|
|
25
|
+
import resource
|
|
26
|
+
|
|
27
|
+
from .capture import ScreenCapture, create_capture
|
|
28
|
+
from .change_detector import ChangeDetector
|
|
29
|
+
from .roi_extractor import ROIExtractor
|
|
30
|
+
from .ocr import OCRResult, create_ocr
|
|
31
|
+
from .gate import DecisionGate, SenseObservation
|
|
32
|
+
from .sender import SenseSender, package_full_frame, package_roi
|
|
33
|
+
from .app_detector import AppDetector
|
|
34
|
+
from .config import load_config
|
|
35
|
+
from .privacy import apply_privacy
|
|
36
|
+
|
|
37
|
+
if sys.platform == "win32":
|
|
38
|
+
CONTROL_FILE = os.path.join(os.environ.get("TEMP", "C:\\Temp"), "sinain-sense-control.json")
|
|
39
|
+
else:
|
|
40
|
+
CONTROL_FILE = "/tmp/sinain-sense-control.json"
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def log(msg: str):
|
|
44
|
+
print(f"[sense] {msg}", flush=True)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def _gate_reason(gate, change, ocr, app_changed, window_changed):
|
|
48
|
+
"""Diagnose why the gate dropped an event."""
|
|
49
|
+
now = time.time() * 1000
|
|
50
|
+
ocr_len = len(ocr.text) if ocr.text else 0
|
|
51
|
+
|
|
52
|
+
# Check cooldown
|
|
53
|
+
recent_app = (now - gate.last_app_change_ts) < 10000
|
|
54
|
+
effective_cd = gate.adaptive_cooldown_ms if recent_app else gate.cooldown_ms
|
|
55
|
+
elapsed = now - gate.last_send_ts
|
|
56
|
+
if elapsed < effective_cd:
|
|
57
|
+
return f"cooldown ({elapsed:.0f}ms < {effective_cd}ms)"
|
|
58
|
+
if change is None:
|
|
59
|
+
return "no_change"
|
|
60
|
+
if ocr_len < gate.min_ocr_chars:
|
|
61
|
+
return f"too_few_chars ({ocr_len} < {gate.min_ocr_chars})"
|
|
62
|
+
if ocr.text and gate._is_duplicate(ocr.text):
|
|
63
|
+
return "duplicate (similar to recent text)"
|
|
64
|
+
if ocr.text and not gate._ocr_quality_ok(ocr.text):
|
|
65
|
+
return "bad_quality (ocr noise)"
|
|
66
|
+
if change.ssim_score >= gate.major_change_threshold:
|
|
67
|
+
return f"no_visual (ssim={change.ssim_score:.3f} >= {gate.major_change_threshold})"
|
|
68
|
+
return f"unknown (ocr={ocr_len}, ssim={change.ssim_score:.3f})"
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def _run_ocr(ocr, ocr_pool, rois) -> OCRResult:
|
|
72
|
+
"""Run OCR on extracted ROIs (parallel if multiple). Returns best result."""
|
|
73
|
+
if not rois:
|
|
74
|
+
return OCRResult(text="", confidence=0, word_count=0)
|
|
75
|
+
if len(rois) == 1:
|
|
76
|
+
return ocr.extract(rois[0].image)
|
|
77
|
+
futures = [ocr_pool.submit(ocr.extract, roi.image) for roi in rois]
|
|
78
|
+
results = [f.result() for f in concurrent.futures.as_completed(futures)]
|
|
79
|
+
return max(results, key=lambda r: len(r.text))
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def is_enabled(control_path: str) -> bool:
|
|
83
|
+
"""Check control file to see if capture is enabled."""
|
|
84
|
+
try:
|
|
85
|
+
with open(control_path) as f:
|
|
86
|
+
data = json.load(f)
|
|
87
|
+
return data.get("enabled", True)
|
|
88
|
+
except (FileNotFoundError, json.JSONDecodeError):
|
|
89
|
+
return True # default enabled if no control file
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def main():
|
|
93
|
+
parser = argparse.ArgumentParser(description="Sinain screen capture pipeline")
|
|
94
|
+
parser.add_argument("--config", default=None, help="Path to config JSON")
|
|
95
|
+
parser.add_argument("--control", default=CONTROL_FILE, help="Path to control file")
|
|
96
|
+
args = parser.parse_args()
|
|
97
|
+
|
|
98
|
+
config = load_config(args.config)
|
|
99
|
+
|
|
100
|
+
log("initializing capture...")
|
|
101
|
+
capture = create_capture(
|
|
102
|
+
mode=config["capture"]["mode"],
|
|
103
|
+
target=config["capture"]["target"],
|
|
104
|
+
fps=config["capture"]["fps"],
|
|
105
|
+
scale=config["capture"]["scale"],
|
|
106
|
+
)
|
|
107
|
+
detector = ChangeDetector(
|
|
108
|
+
threshold=config["detection"]["ssimThreshold"],
|
|
109
|
+
min_area=config["detection"]["minArea"],
|
|
110
|
+
)
|
|
111
|
+
extractor = ROIExtractor(
|
|
112
|
+
padding=config["detection"]["roiPadding"],
|
|
113
|
+
)
|
|
114
|
+
log("initializing OCR...")
|
|
115
|
+
ocr = create_ocr(config)
|
|
116
|
+
gate = DecisionGate(
|
|
117
|
+
min_ocr_chars=config["gate"]["minOcrChars"],
|
|
118
|
+
major_change_threshold=config["gate"]["majorChangeThreshold"],
|
|
119
|
+
cooldown_ms=config["gate"]["cooldownMs"],
|
|
120
|
+
adaptive_cooldown_ms=config["gate"].get("adaptiveCooldownMs", 2000),
|
|
121
|
+
context_cooldown_ms=config["gate"].get("contextCooldownMs", 10000),
|
|
122
|
+
)
|
|
123
|
+
sender = SenseSender(
|
|
124
|
+
url=config["relay"]["url"],
|
|
125
|
+
max_image_kb=config["relay"]["maxImageKB"],
|
|
126
|
+
send_thumbnails=config["relay"]["sendThumbnails"],
|
|
127
|
+
)
|
|
128
|
+
app_detector = AppDetector()
|
|
129
|
+
ocr_pool = concurrent.futures.ThreadPoolExecutor(max_workers=4)
|
|
130
|
+
|
|
131
|
+
# Adaptive SSIM threshold state
|
|
132
|
+
ssim_stable_threshold = config["detection"]["ssimThreshold"] # 0.92
|
|
133
|
+
ssim_sensitive_threshold = 0.85
|
|
134
|
+
last_app_change_time = 0.0
|
|
135
|
+
|
|
136
|
+
opt = config.get("optimization", {})
|
|
137
|
+
use_backpressure = opt.get("backpressure", False)
|
|
138
|
+
use_text_dedup = opt.get("textDedup", False)
|
|
139
|
+
use_shadow = opt.get("shadowValidation", False)
|
|
140
|
+
|
|
141
|
+
# Privacy matrix env vars (gate what leaves this process toward sinain-core/openrouter)
|
|
142
|
+
_privacy_ocr_openrouter = os.environ.get("PRIVACY_OCR_OPENROUTER", "full")
|
|
143
|
+
_privacy_images_openrouter = os.environ.get("PRIVACY_IMAGES_OPENROUTER", "full")
|
|
144
|
+
|
|
145
|
+
log("sense_client started")
|
|
146
|
+
log(f" relay: {config['relay']['url']}")
|
|
147
|
+
log(f" fps: {config['capture']['fps']}, scale: {config['capture']['scale']}")
|
|
148
|
+
log(f" ocr backend: {config['ocr'].get('backend', 'auto')}")
|
|
149
|
+
log(f" privacy: ocr_openrouter={_privacy_ocr_openrouter} images_openrouter={_privacy_images_openrouter}")
|
|
150
|
+
log(f" control: {args.control}")
|
|
151
|
+
if use_backpressure:
|
|
152
|
+
log(" optimization: backpressure ON")
|
|
153
|
+
if use_text_dedup:
|
|
154
|
+
log(" optimization: textDedup ON")
|
|
155
|
+
if use_shadow:
|
|
156
|
+
log(" optimization: shadowValidation ON")
|
|
157
|
+
|
|
158
|
+
events_sent = 0
|
|
159
|
+
events_failed = 0
|
|
160
|
+
events_gated = 0
|
|
161
|
+
ocr_errors = 0
|
|
162
|
+
ocr_skipped_backpressure = 0
|
|
163
|
+
shadow_divergences = 0
|
|
164
|
+
last_stats = time.time()
|
|
165
|
+
start_time = time.time()
|
|
166
|
+
event_latencies: list[float] = []
|
|
167
|
+
detect_times: list[float] = []
|
|
168
|
+
ocr_times: list[float] = []
|
|
169
|
+
send_times: list[float] = []
|
|
170
|
+
|
|
171
|
+
# Backpressure state: latest changed frame waiting for gate
|
|
172
|
+
pending_frame = None
|
|
173
|
+
pending_rois = None
|
|
174
|
+
pending_change = None
|
|
175
|
+
|
|
176
|
+
# Diagnostic state
|
|
177
|
+
_logged_first_ssim = False
|
|
178
|
+
_logged_first_frame = False
|
|
179
|
+
_last_heartbeat = time.time()
|
|
180
|
+
|
|
181
|
+
for frame, ts in capture.capture_loop():
|
|
182
|
+
# Check control file (pause/resume)
|
|
183
|
+
if not is_enabled(args.control):
|
|
184
|
+
time.sleep(1)
|
|
185
|
+
continue
|
|
186
|
+
|
|
187
|
+
# First-frame log
|
|
188
|
+
if not _logged_first_frame:
|
|
189
|
+
log(f"first frame: {frame.size[0]}x{frame.size[1]} (scale={config['capture']['scale']})")
|
|
190
|
+
_logged_first_frame = True
|
|
191
|
+
|
|
192
|
+
# 1. Check app/window change
|
|
193
|
+
app_changed, window_changed, app_name, window_title = app_detector.detect_change()
|
|
194
|
+
|
|
195
|
+
# Adaptive SSIM threshold
|
|
196
|
+
now_sec = time.time()
|
|
197
|
+
if app_changed:
|
|
198
|
+
last_app_change_time = now_sec
|
|
199
|
+
detector.set_threshold(ssim_sensitive_threshold)
|
|
200
|
+
log(f"SSIM threshold lowered to {ssim_sensitive_threshold} (app change)")
|
|
201
|
+
elif now_sec - last_app_change_time > 10.0 and detector.threshold != ssim_stable_threshold:
|
|
202
|
+
detector.set_threshold(ssim_stable_threshold)
|
|
203
|
+
log(f"SSIM threshold restored to {ssim_stable_threshold} (stable)")
|
|
204
|
+
|
|
205
|
+
# 2. Detect frame change
|
|
206
|
+
t0 = time.time()
|
|
207
|
+
change = detector.detect(frame)
|
|
208
|
+
detect_times.append((time.time() - t0) * 1000)
|
|
209
|
+
if len(detect_times) > 500: detect_times.clear()
|
|
210
|
+
if change is None and not app_changed and not window_changed:
|
|
211
|
+
# Log first SSIM so we can see the range
|
|
212
|
+
if not _logged_first_ssim and detector.prev_frame is not None:
|
|
213
|
+
gray = np.array(frame.convert("L"))
|
|
214
|
+
score = structural_similarity(detector.prev_frame, gray)
|
|
215
|
+
log(f"first ssim sample: {score:.4f} (threshold={detector.threshold})")
|
|
216
|
+
_logged_first_ssim = True
|
|
217
|
+
# Periodic heartbeat
|
|
218
|
+
if time.time() - _last_heartbeat >= 30:
|
|
219
|
+
log(f"heartbeat: {capture.stats_ok} frames, {events_sent} sent, "
|
|
220
|
+
f"{events_gated} gated, threshold={detector.threshold}")
|
|
221
|
+
_last_heartbeat = time.time()
|
|
222
|
+
continue
|
|
223
|
+
|
|
224
|
+
if change:
|
|
225
|
+
log(f"change detected: ssim={change.ssim_score:.4f} contours={len(change.contours)}")
|
|
226
|
+
|
|
227
|
+
# 3. Extract ROIs + stash as pending
|
|
228
|
+
rois = []
|
|
229
|
+
if change:
|
|
230
|
+
rois = extractor.extract(frame, change.contours)
|
|
231
|
+
if rois:
|
|
232
|
+
roi_sizes = [f"{r.bbox[2]}x{r.bbox[3]}" for r in rois]
|
|
233
|
+
log(f"rois: {len(rois)} regions ({', '.join(roi_sizes)})")
|
|
234
|
+
else:
|
|
235
|
+
log(f"rois: 0 (contours={len(change.contours)} all too small)")
|
|
236
|
+
if use_backpressure:
|
|
237
|
+
pending_frame = frame
|
|
238
|
+
pending_rois = rois
|
|
239
|
+
pending_change = change
|
|
240
|
+
|
|
241
|
+
# 4. Backpressure: check if gate is ready before running OCR
|
|
242
|
+
if use_backpressure:
|
|
243
|
+
if not gate.is_ready(app_changed, window_changed):
|
|
244
|
+
ocr_skipped_backpressure += 1
|
|
245
|
+
events_gated += 1
|
|
246
|
+
continue
|
|
247
|
+
# Gate is ready — OCR the latest pending frame
|
|
248
|
+
use_frame = pending_frame or frame
|
|
249
|
+
use_rois = pending_rois or rois
|
|
250
|
+
use_change = pending_change or change
|
|
251
|
+
else:
|
|
252
|
+
use_frame = frame
|
|
253
|
+
use_rois = rois
|
|
254
|
+
use_change = change
|
|
255
|
+
|
|
256
|
+
# 5. OCR on ROIs
|
|
257
|
+
t0 = time.time()
|
|
258
|
+
ocr_result = OCRResult(text="", confidence=0, word_count=0)
|
|
259
|
+
try:
|
|
260
|
+
ocr_result = _run_ocr(ocr, ocr_pool, use_rois)
|
|
261
|
+
except Exception as e:
|
|
262
|
+
ocr_errors += 1
|
|
263
|
+
log(f"OCR error: {e}")
|
|
264
|
+
ocr_times.append((time.time() - t0) * 1000)
|
|
265
|
+
if len(ocr_times) > 500: ocr_times.clear()
|
|
266
|
+
|
|
267
|
+
if ocr_result.text:
|
|
268
|
+
log(f"ocr: {len(ocr_result.text)} chars, {ocr_result.word_count} words")
|
|
269
|
+
else:
|
|
270
|
+
log(f"ocr: empty (rois={len(use_rois)})")
|
|
271
|
+
|
|
272
|
+
# Shadow validation: run baseline OCR on original frame for comparison
|
|
273
|
+
if use_shadow and use_backpressure and rois:
|
|
274
|
+
try:
|
|
275
|
+
baseline_result = _run_ocr(ocr, ocr_pool, rois)
|
|
276
|
+
if baseline_result.text != ocr_result.text:
|
|
277
|
+
shadow_divergences += 1
|
|
278
|
+
log(f"SHADOW DIVERGENCE: baseline={len(baseline_result.text)}chars "
|
|
279
|
+
f"optimized={len(ocr_result.text)}chars")
|
|
280
|
+
# Use baseline for actual sending (safety)
|
|
281
|
+
ocr_result = baseline_result
|
|
282
|
+
except Exception as e:
|
|
283
|
+
log(f"Shadow OCR error: {e}")
|
|
284
|
+
|
|
285
|
+
# Clear pending state after OCR
|
|
286
|
+
if use_backpressure:
|
|
287
|
+
pending_frame = pending_rois = pending_change = None
|
|
288
|
+
|
|
289
|
+
# 5b. Privacy filter — strip <private> tags and redact secrets
|
|
290
|
+
if ocr_result.text:
|
|
291
|
+
ocr_result = OCRResult(
|
|
292
|
+
text=apply_privacy(ocr_result.text),
|
|
293
|
+
confidence=ocr_result.confidence,
|
|
294
|
+
word_count=ocr_result.word_count,
|
|
295
|
+
)
|
|
296
|
+
|
|
297
|
+
# 5c. Privacy matrix: apply OCR gating for openrouter destination
|
|
298
|
+
if ocr_result.text and _privacy_ocr_openrouter != "full":
|
|
299
|
+
if _privacy_ocr_openrouter == "none":
|
|
300
|
+
ocr_result = OCRResult(text="", confidence=0, word_count=0)
|
|
301
|
+
elif _privacy_ocr_openrouter == "summary":
|
|
302
|
+
ocr_result = OCRResult(
|
|
303
|
+
text=f"[SCREEN: {len(ocr_result.text)} chars]",
|
|
304
|
+
confidence=ocr_result.confidence,
|
|
305
|
+
word_count=1,
|
|
306
|
+
)
|
|
307
|
+
# "redacted" is already handled by apply_privacy above
|
|
308
|
+
|
|
309
|
+
# 6. Decision gate
|
|
310
|
+
event = gate.classify(
|
|
311
|
+
change=use_change,
|
|
312
|
+
ocr=ocr_result,
|
|
313
|
+
app_changed=app_changed,
|
|
314
|
+
window_changed=window_changed,
|
|
315
|
+
)
|
|
316
|
+
if event is None:
|
|
317
|
+
reason = _gate_reason(gate, use_change, ocr_result, app_changed, window_changed)
|
|
318
|
+
log(f"gate dropped: {reason}")
|
|
319
|
+
events_gated += 1
|
|
320
|
+
continue
|
|
321
|
+
|
|
322
|
+
# 7. Package and send
|
|
323
|
+
event.meta.app = app_name
|
|
324
|
+
event.meta.window_title = window_title
|
|
325
|
+
event.meta.screen = config["capture"]["target"]
|
|
326
|
+
|
|
327
|
+
# 7b. Auto-populate structured observation from available context
|
|
328
|
+
facts = []
|
|
329
|
+
if app_name:
|
|
330
|
+
facts.append(f"app: {app_name}")
|
|
331
|
+
if window_title:
|
|
332
|
+
facts.append(f"window: {window_title}")
|
|
333
|
+
if use_change and use_change.ssim_score:
|
|
334
|
+
facts.append(f"ssim: {use_change.ssim_score:.3f}")
|
|
335
|
+
if ocr_result.text:
|
|
336
|
+
# Extract first meaningful line as subtitle
|
|
337
|
+
first_line = ocr_result.text.split("\n")[0][:120]
|
|
338
|
+
facts.append(f"ocr: {first_line}")
|
|
339
|
+
|
|
340
|
+
title = f"{event.type} in {app_name}" if app_name else f"{event.type} event"
|
|
341
|
+
subtitle = window_title[:80] if window_title else ""
|
|
342
|
+
event.observation = SenseObservation(
|
|
343
|
+
title=title, subtitle=subtitle, facts=facts,
|
|
344
|
+
)
|
|
345
|
+
|
|
346
|
+
# Send small thumbnail for ALL event types (agent uses vision)
|
|
347
|
+
# Privacy matrix: gate image sending based on PRIVACY_IMAGES_OPENROUTER
|
|
348
|
+
if _privacy_images_openrouter == "none":
|
|
349
|
+
pass # Skip image packaging entirely
|
|
350
|
+
elif event.type == "context":
|
|
351
|
+
event.roi = package_full_frame(use_frame)
|
|
352
|
+
elif use_rois:
|
|
353
|
+
event.roi = package_roi(use_rois[0])
|
|
354
|
+
else:
|
|
355
|
+
# Fallback: send full frame thumbnail for text-only events
|
|
356
|
+
event.roi = package_full_frame(use_frame)
|
|
357
|
+
# Diff images removed — agent doesn't use binary diff masks
|
|
358
|
+
|
|
359
|
+
t0 = time.time()
|
|
360
|
+
ok = sender.send(event)
|
|
361
|
+
send_times.append((time.time() - t0) * 1000)
|
|
362
|
+
if len(send_times) > 500: send_times.clear()
|
|
363
|
+
if ok:
|
|
364
|
+
events_sent += 1
|
|
365
|
+
send_latency = time.time() * 1000 - event.ts
|
|
366
|
+
event_latencies.append(send_latency)
|
|
367
|
+
if len(event_latencies) > 500: event_latencies.clear()
|
|
368
|
+
ssim = f"{use_change.ssim_score:.3f}" if use_change else "n/a"
|
|
369
|
+
ctx = f"app={app_name}"
|
|
370
|
+
if window_title:
|
|
371
|
+
ctx += f", win={window_title[:40]}"
|
|
372
|
+
log(f"-> {event.type} sent ({ctx}, ssim={ssim}, latency={send_latency:.0f}ms)")
|
|
373
|
+
else:
|
|
374
|
+
events_failed += 1
|
|
375
|
+
log(f"-> {event.type} FAILED to send")
|
|
376
|
+
|
|
377
|
+
# Periodic pipeline stats
|
|
378
|
+
now = time.time()
|
|
379
|
+
if now - last_stats >= 60:
|
|
380
|
+
latency_info = ""
|
|
381
|
+
if event_latencies:
|
|
382
|
+
sorted_lat = sorted(event_latencies)
|
|
383
|
+
p50 = sorted_lat[len(sorted_lat) // 2]
|
|
384
|
+
p95 = sorted_lat[int(len(sorted_lat) * 0.95)]
|
|
385
|
+
latency_info = f" latency_p50={p50:.0f}ms p95={p95:.0f}ms"
|
|
386
|
+
event_latencies.clear()
|
|
387
|
+
|
|
388
|
+
avg_detect = sum(detect_times) / len(detect_times) if detect_times else 0
|
|
389
|
+
avg_ocr = sum(ocr_times) / len(ocr_times) if ocr_times else 0
|
|
390
|
+
avg_send = sum(send_times) / len(send_times) if send_times else 0
|
|
391
|
+
|
|
392
|
+
bp_info = ""
|
|
393
|
+
if use_backpressure:
|
|
394
|
+
bp_info = f" ocrSkipped={ocr_skipped_backpressure}"
|
|
395
|
+
shadow_info = ""
|
|
396
|
+
if use_shadow:
|
|
397
|
+
shadow_info = f" shadowDiv={shadow_divergences}"
|
|
398
|
+
|
|
399
|
+
log(f"stats: captures={capture.stats_ok}ok/{capture.stats_fail}fail"
|
|
400
|
+
f" events={events_sent}sent/{events_failed}fail/{events_gated}gated"
|
|
401
|
+
f"{bp_info}{shadow_info}{latency_info}"
|
|
402
|
+
f" detect={avg_detect:.1f}ms ocr={avg_ocr:.1f}ms send={avg_send:.1f}ms")
|
|
403
|
+
|
|
404
|
+
# POST profiling snapshot to sinain-core
|
|
405
|
+
if sys.platform == "win32":
|
|
406
|
+
try:
|
|
407
|
+
import psutil
|
|
408
|
+
rss_mb = round(psutil.Process().memory_info().rss / 1048576, 1)
|
|
409
|
+
except Exception:
|
|
410
|
+
rss_mb = 0.0
|
|
411
|
+
else:
|
|
412
|
+
usage = resource.getrusage(resource.RUSAGE_SELF)
|
|
413
|
+
rss_mb = round(usage.ru_maxrss / 1048576, 1)
|
|
414
|
+
snapshot = {
|
|
415
|
+
"rssMb": rss_mb,
|
|
416
|
+
"uptimeS": round(now - start_time),
|
|
417
|
+
"ts": int(now * 1000),
|
|
418
|
+
"extra": {
|
|
419
|
+
"capturesOk": capture.stats_ok,
|
|
420
|
+
"capturesFail": capture.stats_fail,
|
|
421
|
+
"eventsSent": events_sent,
|
|
422
|
+
"eventsFailed": events_failed,
|
|
423
|
+
"eventsGated": events_gated,
|
|
424
|
+
"ocrErrors": ocr_errors,
|
|
425
|
+
"ocrSkippedBackpressure": ocr_skipped_backpressure,
|
|
426
|
+
"shadowDivergences": shadow_divergences,
|
|
427
|
+
"detectAvgMs": round(avg_detect, 1),
|
|
428
|
+
"ocrAvgMs": round(avg_ocr, 1),
|
|
429
|
+
"sendAvgMs": round(avg_send, 1),
|
|
430
|
+
},
|
|
431
|
+
}
|
|
432
|
+
try:
|
|
433
|
+
_requests.post(
|
|
434
|
+
f"{config['relay']['url']}/profiling/sense",
|
|
435
|
+
json=snapshot, timeout=2,
|
|
436
|
+
)
|
|
437
|
+
except Exception:
|
|
438
|
+
pass
|
|
439
|
+
|
|
440
|
+
detect_times.clear()
|
|
441
|
+
ocr_times.clear()
|
|
442
|
+
send_times.clear()
|
|
443
|
+
last_stats = now
|
|
444
|
+
|
|
445
|
+
|
|
446
|
+
if __name__ == "__main__":
|
|
447
|
+
try:
|
|
448
|
+
main()
|
|
449
|
+
except Exception:
|
|
450
|
+
tb = traceback.format_exc()
|
|
451
|
+
print(f"[sense] CRASH:\n{tb}", file=sys.stderr, flush=True)
|
|
452
|
+
# Report crash to sinain-core so it's visible in health
|
|
453
|
+
try:
|
|
454
|
+
import requests as _req
|
|
455
|
+
_req.post(
|
|
456
|
+
"http://localhost:9500/profiling/sense",
|
|
457
|
+
json={"crash": tb, "ts": int(__import__("time").time() * 1000)},
|
|
458
|
+
timeout=2,
|
|
459
|
+
)
|
|
460
|
+
except Exception:
|
|
461
|
+
pass
|
|
462
|
+
raise
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
"""Detect the frontmost application and window title (cross-platform)."""
|
|
2
|
+
|
|
3
|
+
import subprocess
|
|
4
|
+
import sys
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class MacAppDetector:
|
|
8
|
+
"""Detects the frontmost application and window title on macOS via AppleScript."""
|
|
9
|
+
|
|
10
|
+
def __init__(self):
|
|
11
|
+
self._last_app: str = ""
|
|
12
|
+
self._last_window: str = ""
|
|
13
|
+
|
|
14
|
+
def get_active_app(self) -> tuple[str, str]:
|
|
15
|
+
"""Returns (app_name, window_title) of the frontmost application."""
|
|
16
|
+
try:
|
|
17
|
+
result = subprocess.run(
|
|
18
|
+
[
|
|
19
|
+
"osascript", "-e",
|
|
20
|
+
'tell application "System Events"\n'
|
|
21
|
+
' set appProc to first application process whose frontmost is true\n'
|
|
22
|
+
' set appName to name of appProc\n'
|
|
23
|
+
' set winTitle to ""\n'
|
|
24
|
+
' try\n'
|
|
25
|
+
' set winTitle to name of front window of appProc\n'
|
|
26
|
+
' end try\n'
|
|
27
|
+
' return appName & "|||" & winTitle\n'
|
|
28
|
+
'end tell',
|
|
29
|
+
],
|
|
30
|
+
capture_output=True, text=True, timeout=2,
|
|
31
|
+
)
|
|
32
|
+
parts = result.stdout.strip().split("|||", 1)
|
|
33
|
+
app_name = parts[0].strip() if parts else ""
|
|
34
|
+
window_title = parts[1].strip() if len(parts) > 1 else ""
|
|
35
|
+
return app_name, window_title
|
|
36
|
+
except Exception:
|
|
37
|
+
return "", ""
|
|
38
|
+
|
|
39
|
+
def detect_change(self) -> tuple[bool, bool, str, str]:
|
|
40
|
+
"""Returns (app_changed, window_changed, app_name, window_title)."""
|
|
41
|
+
app, window = self.get_active_app()
|
|
42
|
+
app_changed = app != self._last_app and self._last_app != ""
|
|
43
|
+
window_changed = window != self._last_window and self._last_window != ""
|
|
44
|
+
self._last_app = app
|
|
45
|
+
self._last_window = window
|
|
46
|
+
return app_changed, window_changed, app, window
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def AppDetector():
|
|
50
|
+
"""Factory: returns the platform-appropriate app detector."""
|
|
51
|
+
if sys.platform == "win32":
|
|
52
|
+
from .app_detector_win import WinAppDetector
|
|
53
|
+
return WinAppDetector()
|
|
54
|
+
return MacAppDetector()
|