@geravant/sinain 1.0.19 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. package/README.md +10 -1
  2. package/cli.js +176 -0
  3. package/install.js +11 -2
  4. package/launcher.js +622 -0
  5. package/openclaw.plugin.json +4 -0
  6. package/pack-prepare.js +48 -0
  7. package/package.json +24 -5
  8. package/sense_client/README.md +82 -0
  9. package/sense_client/__init__.py +1 -0
  10. package/sense_client/__main__.py +462 -0
  11. package/sense_client/app_detector.py +54 -0
  12. package/sense_client/app_detector_win.py +83 -0
  13. package/sense_client/capture.py +215 -0
  14. package/sense_client/capture_win.py +88 -0
  15. package/sense_client/change_detector.py +86 -0
  16. package/sense_client/config.py +64 -0
  17. package/sense_client/gate.py +145 -0
  18. package/sense_client/ocr.py +347 -0
  19. package/sense_client/privacy.py +65 -0
  20. package/sense_client/requirements.txt +13 -0
  21. package/sense_client/roi_extractor.py +84 -0
  22. package/sense_client/sender.py +173 -0
  23. package/sense_client/tests/__init__.py +0 -0
  24. package/sense_client/tests/test_stream1_optimizations.py +234 -0
  25. package/setup-overlay.js +82 -0
  26. package/sinain-agent/.env.example +17 -0
  27. package/sinain-agent/CLAUDE.md +80 -0
  28. package/sinain-agent/mcp-config.json +12 -0
  29. package/sinain-agent/run.sh +248 -0
  30. package/sinain-core/.env.example +93 -0
  31. package/sinain-core/package-lock.json +552 -0
  32. package/sinain-core/package.json +21 -0
  33. package/sinain-core/src/agent/analyzer.ts +366 -0
  34. package/sinain-core/src/agent/context-window.ts +172 -0
  35. package/sinain-core/src/agent/loop.ts +404 -0
  36. package/sinain-core/src/agent/situation-writer.ts +187 -0
  37. package/sinain-core/src/agent/traits.ts +520 -0
  38. package/sinain-core/src/audio/capture-spawner-macos.ts +44 -0
  39. package/sinain-core/src/audio/capture-spawner-win.ts +37 -0
  40. package/sinain-core/src/audio/capture-spawner.ts +14 -0
  41. package/sinain-core/src/audio/pipeline.ts +335 -0
  42. package/sinain-core/src/audio/transcription-local.ts +141 -0
  43. package/sinain-core/src/audio/transcription.ts +278 -0
  44. package/sinain-core/src/buffers/feed-buffer.ts +71 -0
  45. package/sinain-core/src/buffers/sense-buffer.ts +425 -0
  46. package/sinain-core/src/config.ts +245 -0
  47. package/sinain-core/src/escalation/escalation-slot.ts +136 -0
  48. package/sinain-core/src/escalation/escalator.ts +812 -0
  49. package/sinain-core/src/escalation/message-builder.ts +323 -0
  50. package/sinain-core/src/escalation/openclaw-ws.ts +726 -0
  51. package/sinain-core/src/escalation/scorer.ts +166 -0
  52. package/sinain-core/src/index.ts +507 -0
  53. package/sinain-core/src/learning/feedback-store.ts +253 -0
  54. package/sinain-core/src/learning/signal-collector.ts +218 -0
  55. package/sinain-core/src/log.ts +24 -0
  56. package/sinain-core/src/overlay/commands.ts +126 -0
  57. package/sinain-core/src/overlay/ws-handler.ts +267 -0
  58. package/sinain-core/src/privacy/index.ts +18 -0
  59. package/sinain-core/src/privacy/presets.ts +40 -0
  60. package/sinain-core/src/privacy/redact.ts +92 -0
  61. package/sinain-core/src/profiler.ts +181 -0
  62. package/sinain-core/src/recorder.ts +186 -0
  63. package/sinain-core/src/server.ts +417 -0
  64. package/sinain-core/src/trace/trace-store.ts +73 -0
  65. package/sinain-core/src/trace/tracer.ts +94 -0
  66. package/sinain-core/src/types.ts +427 -0
  67. package/sinain-core/src/util/dedup.ts +48 -0
  68. package/sinain-core/src/util/task-store.ts +84 -0
  69. package/sinain-core/tsconfig.json +18 -0
  70. package/sinain-knowledge/data/git-store.ts +2 -0
  71. package/sinain-mcp-server/index.ts +337 -0
  72. package/sinain-mcp-server/package.json +19 -0
  73. package/sinain-mcp-server/tsconfig.json +15 -0
@@ -0,0 +1,83 @@
1
+ """Detect the foreground application and window title on Windows."""
2
+ from __future__ import annotations
3
+
4
+ import ctypes
5
+ import ctypes.wintypes
6
+
7
+
8
+ class WinAppDetector:
9
+ """Detects the foreground application and window title on Windows.
10
+
11
+ Uses Win32 API: GetForegroundWindow, GetWindowTextW, and
12
+ psutil/GetWindowThreadProcessId for process name resolution.
13
+ """
14
+
15
+ def __init__(self):
16
+ self._last_app: str = ""
17
+ self._last_window: str = ""
18
+ self._user32 = ctypes.windll.user32
19
+ self._kernel32 = ctypes.windll.kernel32
20
+
21
+ def get_active_app(self) -> tuple[str, str]:
22
+ """Returns (app_name, window_title) of the foreground window."""
23
+ try:
24
+ hwnd = self._user32.GetForegroundWindow()
25
+ if not hwnd:
26
+ return "", ""
27
+
28
+ # Get window title
29
+ length = self._user32.GetWindowTextLengthW(hwnd)
30
+ buf = ctypes.create_unicode_buffer(length + 1)
31
+ self._user32.GetWindowTextW(hwnd, buf, length + 1)
32
+ window_title = buf.value
33
+
34
+ # Get process name via PID
35
+ pid = ctypes.wintypes.DWORD()
36
+ self._user32.GetWindowThreadProcessId(hwnd, ctypes.byref(pid))
37
+ app_name = self._get_process_name(pid.value)
38
+
39
+ return app_name, window_title
40
+ except Exception:
41
+ return "", ""
42
+
43
+ def _get_process_name(self, pid: int) -> str:
44
+ """Get process executable name from PID."""
45
+ try:
46
+ # Try psutil first (more reliable)
47
+ import psutil
48
+ proc = psutil.Process(pid)
49
+ return proc.name().replace(".exe", "")
50
+ except Exception:
51
+ pass
52
+
53
+ # Fallback: OpenProcess + GetModuleBaseNameW
54
+ try:
55
+ PROCESS_QUERY_INFORMATION = 0x0400
56
+ PROCESS_VM_READ = 0x0010
57
+ handle = self._kernel32.OpenProcess(
58
+ PROCESS_QUERY_INFORMATION | PROCESS_VM_READ, False, pid
59
+ )
60
+ if not handle:
61
+ return ""
62
+
63
+ try:
64
+ psapi = ctypes.windll.psapi
65
+ buf = ctypes.create_unicode_buffer(260)
66
+ psapi.GetModuleBaseNameW(handle, None, buf, 260)
67
+ name = buf.value
68
+ if name.lower().endswith(".exe"):
69
+ name = name[:-4]
70
+ return name
71
+ finally:
72
+ self._kernel32.CloseHandle(handle)
73
+ except Exception:
74
+ return ""
75
+
76
+ def detect_change(self) -> tuple[bool, bool, str, str]:
77
+ """Returns (app_changed, window_changed, app_name, window_title)."""
78
+ app, window = self.get_active_app()
79
+ app_changed = app != self._last_app and self._last_app != ""
80
+ window_changed = window != self._last_window and self._last_window != ""
81
+ self._last_app = app
82
+ self._last_window = window
83
+ return app_changed, window_changed, app, window
@@ -0,0 +1,215 @@
1
+ """Screen capture via IPC from sck-capture (preferred) or CoreGraphics fallback.
2
+
3
+ On Windows, uses mss (DXGI Desktop Duplication) via capture_win.py.
4
+ """
5
+ from __future__ import annotations
6
+
7
+ import json
8
+ import os
9
+ import sys
10
+ import time
11
+ from typing import Generator
12
+
13
+ from PIL import Image
14
+
15
+ # macOS-only imports — deferred so the module loads on Windows
16
+ if sys.platform == "darwin":
17
+ import Quartz
18
+
19
+
20
+ class ScreenCapture:
21
+ """Captures screen frames via CGDisplayCreateImage (CoreGraphics/IOSurface).
22
+
23
+ Uses Quartz CGDisplayCreateImage instead of the screencapture CLI.
24
+ This avoids CoreMediaIO/ScreenCaptureKit, which blocks camera access
25
+ for other apps (e.g. Google Meet) on macOS 14+.
26
+ """
27
+
28
+ def __init__(self, mode: str = "screen", target: int = 0,
29
+ fps: float = 1, scale: float = 0.5):
30
+ self.mode = mode
31
+ self.target = target
32
+ self.fps = fps
33
+ self.scale = scale
34
+ self.stats_ok = 0
35
+ self.stats_fail = 0
36
+ self._last_stats_time = time.time()
37
+ self._stats_interval = 60 # log stats every 60s
38
+ self._display_id = Quartz.CGMainDisplayID()
39
+
40
+ def capture_frame(self) -> tuple[Image.Image, float]:
41
+ """Returns (PIL Image, timestamp).
42
+ Uses CGDisplayCreateImage for zero-subprocess, camera-safe capture.
43
+ Downscales by self.scale factor before returning.
44
+ """
45
+ ts = time.time()
46
+ cg_image = Quartz.CGDisplayCreateImage(self._display_id)
47
+ if cg_image is None:
48
+ self.stats_fail += 1
49
+ raise RuntimeError("CGDisplayCreateImage returned None")
50
+
51
+ try:
52
+ width = Quartz.CGImageGetWidth(cg_image)
53
+ height = Quartz.CGImageGetHeight(cg_image)
54
+ bytes_per_row = Quartz.CGImageGetBytesPerRow(cg_image)
55
+
56
+ # Get raw pixel data from CGImage
57
+ data_provider = Quartz.CGImageGetDataProvider(cg_image)
58
+ raw_data = Quartz.CGDataProviderCopyData(data_provider)
59
+ finally:
60
+ # Explicitly release CGImage and its IOSurface handle immediately.
61
+ # At continuous capture rates, unreleased handles cause GPU/camera
62
+ # contention because the camera shares IOSurface infrastructure.
63
+ del cg_image
64
+
65
+ # CGDisplayCreateImage returns BGRA (premultiplied alpha, 32Little)
66
+ img = Image.frombytes("RGBA", (width, height), raw_data,
67
+ "raw", "BGRA", bytes_per_row, 1)
68
+
69
+ if self.scale != 1.0:
70
+ new_w = int(width * self.scale)
71
+ new_h = int(height * self.scale)
72
+ img = img.resize((new_w, new_h), Image.LANCZOS)
73
+
74
+ self.stats_ok += 1
75
+ return img, ts
76
+
77
+ def capture_loop(self) -> Generator[tuple[Image.Image, float], None, None]:
78
+ """Yields frames at self.fps rate."""
79
+ interval = 1.0 / self.fps
80
+ while True:
81
+ start = time.time()
82
+ try:
83
+ yield self.capture_frame()
84
+ except Exception as e:
85
+ print(f"[capture] error: {e}")
86
+ self._maybe_log_stats()
87
+ elapsed = time.time() - start
88
+ sleep_time = interval - elapsed
89
+ if sleep_time > 0:
90
+ time.sleep(sleep_time)
91
+
92
+ def _maybe_log_stats(self):
93
+ now = time.time()
94
+ if now - self._last_stats_time >= self._stats_interval:
95
+ total = self.stats_ok + self.stats_fail
96
+ rate = (self.stats_ok / total * 100) if total > 0 else 0
97
+ print(f"[capture] stats: {self.stats_ok} ok, {self.stats_fail} fail"
98
+ f" ({rate:.0f}% success, {total} total)")
99
+ if self.stats_fail > 0 and self.stats_ok == 0:
100
+ print("[capture] WARNING: all captures failing — check screen recording permissions")
101
+ self._last_stats_time = now
102
+
103
+ class ScreenKitCapture:
104
+ """Reads JPEG frames written by sck-capture via IPC (~/.sinain/capture/)."""
105
+
106
+ FRAME_PATH = os.path.expanduser("~/.sinain/capture/frame.jpg")
107
+ META_PATH = os.path.expanduser("~/.sinain/capture/meta.json")
108
+ STALE_THRESHOLD = 1.0 # seconds
109
+
110
+ def __init__(self, fps: float = 1, scale: float = 1.0, **kwargs):
111
+ self.fps = fps
112
+ self.scale = scale
113
+ self.stats_ok = 0
114
+ self.stats_fail = 0
115
+ self._last_frame_ts = 0.0
116
+ self._last_stats_time = time.time()
117
+ self._stats_interval = 60
118
+
119
+ @classmethod
120
+ def is_available(cls) -> bool:
121
+ """Check if fresh frames exist from the overlay app."""
122
+ try:
123
+ if not os.path.exists(cls.FRAME_PATH):
124
+ return False
125
+ mtime = os.path.getmtime(cls.FRAME_PATH)
126
+ return (time.time() - mtime) < cls.STALE_THRESHOLD
127
+ except OSError:
128
+ return False
129
+
130
+ def capture_frame(self) -> tuple[Image.Image, float] | None:
131
+ """Read the latest frame from IPC.
132
+
133
+ Returns (PIL Image, timestamp) or None if frame is stale/duplicate.
134
+ """
135
+ try:
136
+ if not os.path.exists(self.FRAME_PATH):
137
+ return None
138
+
139
+ # Read metadata for precise timestamp
140
+ ts = time.time()
141
+ if os.path.exists(self.META_PATH):
142
+ try:
143
+ with open(self.META_PATH) as f:
144
+ meta = json.load(f)
145
+ ts = meta.get("timestamp", ts)
146
+ except (json.JSONDecodeError, OSError):
147
+ pass
148
+
149
+ # Skip duplicate frames
150
+ if ts == self._last_frame_ts:
151
+ return None
152
+
153
+ img = Image.open(self.FRAME_PATH)
154
+ img.load() # Force full read before file can be overwritten
155
+
156
+ if self.scale != 1.0:
157
+ new_w = int(img.width * self.scale)
158
+ new_h = int(img.height * self.scale)
159
+ img = img.resize((new_w, new_h), Image.LANCZOS)
160
+
161
+ self._last_frame_ts = ts
162
+ self.stats_ok += 1
163
+ return img, ts
164
+
165
+ except Exception as e:
166
+ self.stats_fail += 1
167
+ print(f"[capture-screenkit] error: {e}")
168
+ return None
169
+
170
+ def capture_loop(self) -> Generator[tuple[Image.Image, float], None, None]:
171
+ """Yields frames at self.fps rate, same interface as ScreenCapture."""
172
+ interval = 1.0 / self.fps
173
+ while True:
174
+ start = time.time()
175
+ result = self.capture_frame()
176
+ if result is not None:
177
+ yield result
178
+ self._maybe_log_stats()
179
+ elapsed = time.time() - start
180
+ sleep_time = interval - elapsed
181
+ if sleep_time > 0:
182
+ time.sleep(sleep_time)
183
+
184
+ def _maybe_log_stats(self):
185
+ now = time.time()
186
+ if now - self._last_stats_time >= self._stats_interval:
187
+ total = self.stats_ok + self.stats_fail
188
+ rate = (self.stats_ok / total * 100) if total > 0 else 0
189
+ print(f"[capture-screenkit] stats: {self.stats_ok} ok, {self.stats_fail} fail"
190
+ f" ({rate:.0f}% success, {total} total)")
191
+ self._last_stats_time = now
192
+
193
+
194
+ def create_capture(mode: str = "screen", target: int = 0,
195
+ fps: float = 1, scale: float = 0.5):
196
+ """Factory: platform-dispatched screen capture backend.
197
+
198
+ macOS: ScreenKitCapture (IPC from sck-capture) → ScreenCapture (CoreGraphics).
199
+ Windows: WinScreenCapture (mss / DXGI Desktop Duplication).
200
+ """
201
+ if sys.platform == "win32":
202
+ from .capture_win import WinScreenCapture
203
+ print("[capture] Using mss (DXGI Desktop Duplication)")
204
+ return WinScreenCapture(mode=mode, target=target, fps=fps, scale=scale)
205
+
206
+ # macOS path
207
+ # 1. IPC from sck-capture (primary — Swift binary writes JPEG frames)
208
+ if ScreenKitCapture.is_available():
209
+ print("[capture] Using ScreenCaptureKit (sck-capture IPC)")
210
+ return ScreenKitCapture(fps=fps, scale=1.0) # scale handled by sck-capture
211
+
212
+ # 2. CGDisplayCreateImage (legacy fallback for macOS < 13)
213
+ print("[capture] Using CoreGraphics (CGDisplayCreateImage)")
214
+ print("[capture] WARNING: CGDisplayCreateImage may cause camera conflicts on macOS 14+")
215
+ return ScreenCapture(mode=mode, target=target, fps=fps, scale=scale)
@@ -0,0 +1,88 @@
1
+ """Screen capture on Windows via mss (DXGI Desktop Duplication)."""
2
+ from __future__ import annotations
3
+
4
+ import time
5
+ from typing import Generator
6
+
7
+ from PIL import Image
8
+
9
+ try:
10
+ import mss
11
+ except ImportError:
12
+ mss = None
13
+
14
+
15
+ class WinScreenCapture:
16
+ """Captures screen frames on Windows using mss (DXGI Desktop Duplication).
17
+
18
+ Same interface as ScreenCapture/ScreenKitCapture on macOS:
19
+ capture_frame() -> (Image, float) and capture_loop() -> Generator.
20
+ """
21
+
22
+ def __init__(self, mode: str = "screen", target: int = 0,
23
+ fps: float = 1, scale: float = 0.5):
24
+ if mss is None:
25
+ raise RuntimeError("mss library required for Windows capture: pip install mss")
26
+ self.mode = mode
27
+ self.target = target # monitor index (0 = all, 1 = primary, etc.)
28
+ self.fps = fps
29
+ self.scale = scale
30
+ self.stats_ok = 0
31
+ self.stats_fail = 0
32
+ self._last_stats_time = time.time()
33
+ self._stats_interval = 60
34
+ self._sct = mss.mss()
35
+
36
+ def capture_frame(self) -> tuple[Image.Image, float]:
37
+ """Returns (PIL Image, timestamp).
38
+ Uses mss for DXGI-based capture. Downscales by self.scale factor.
39
+ """
40
+ ts = time.time()
41
+
42
+ # mss monitors: index 0 = all monitors combined, 1+ = individual
43
+ monitor_idx = self.target + 1 if self.target >= 0 else 1
44
+ if monitor_idx >= len(self._sct.monitors):
45
+ monitor_idx = 1 # fallback to primary
46
+
47
+ monitor = self._sct.monitors[monitor_idx]
48
+ screenshot = self._sct.grab(monitor)
49
+
50
+ # Convert to PIL Image (mss returns BGRA)
51
+ img = Image.frombytes("RGB", screenshot.size, screenshot.rgb)
52
+
53
+ if self.scale != 1.0:
54
+ new_w = int(img.width * self.scale)
55
+ new_h = int(img.height * self.scale)
56
+ img = img.resize((new_w, new_h), Image.LANCZOS)
57
+
58
+ self.stats_ok += 1
59
+ if self.stats_ok == 1:
60
+ print(f"[capture-win] first frame: {img.width}x{img.height} "
61
+ f"(monitor={monitor['width']}x{monitor['height']}, scale={self.scale})",
62
+ flush=True)
63
+ return img, ts
64
+
65
+ def capture_loop(self) -> Generator[tuple[Image.Image, float], None, None]:
66
+ """Yields frames at self.fps rate."""
67
+ interval = 1.0 / self.fps
68
+ while True:
69
+ start = time.time()
70
+ try:
71
+ yield self.capture_frame()
72
+ except Exception as e:
73
+ self.stats_fail += 1
74
+ print(f"[capture-win] error: {e}", flush=True)
75
+ self._maybe_log_stats()
76
+ elapsed = time.time() - start
77
+ sleep_time = interval - elapsed
78
+ if sleep_time > 0:
79
+ time.sleep(sleep_time)
80
+
81
+ def _maybe_log_stats(self):
82
+ now = time.time()
83
+ if now - self._last_stats_time >= self._stats_interval:
84
+ total = self.stats_ok + self.stats_fail
85
+ rate = (self.stats_ok / total * 100) if total > 0 else 0
86
+ print(f"[capture-win] stats: {self.stats_ok} ok, {self.stats_fail} fail"
87
+ f" ({rate:.0f}% success, {total} total)", flush=True)
88
+ self._last_stats_time = now
@@ -0,0 +1,86 @@
1
+ """SSIM-based frame change detection."""
2
+ from __future__ import annotations
3
+
4
+ from dataclasses import dataclass
5
+
6
+ import numpy as np
7
+ from PIL import Image
8
+ from skimage.metrics import structural_similarity
9
+
10
+
11
+ @dataclass
12
+ class ChangeResult:
13
+ ssim_score: float
14
+ diff_image: Image.Image
15
+ contours: list # list of (y, x) coordinate arrays
16
+ bbox: tuple[int, int, int, int] # (x, y, w, h)
17
+
18
+
19
+ class ChangeDetector:
20
+ """SSIM-based frame change detection."""
21
+
22
+ def __init__(self, threshold: float = 0.95, min_area: int = 100):
23
+ self.threshold = threshold
24
+ self.min_area = min_area
25
+ self.prev_frame: np.ndarray | None = None
26
+
27
+ def set_threshold(self, threshold: float) -> None:
28
+ """Dynamically adjust the SSIM change threshold."""
29
+ self.threshold = threshold
30
+
31
+ def detect(self, frame: Image.Image) -> ChangeResult | None:
32
+ """Compare frame to previous. Returns ChangeResult if significant."""
33
+ gray = np.array(frame.convert("L"))
34
+
35
+ if self.prev_frame is None:
36
+ self.prev_frame = gray
37
+ return None
38
+
39
+ if gray.shape != self.prev_frame.shape:
40
+ self.prev_frame = gray
41
+ return None
42
+
43
+ score, diff_map = structural_similarity(
44
+ self.prev_frame, gray, full=True
45
+ )
46
+
47
+ if score >= self.threshold:
48
+ return None
49
+
50
+ # Keyframe update: only advance prev_frame when change IS detected.
51
+ # This lets diffs accumulate against the last accepted keyframe,
52
+ # which is essential at high FPS where consecutive frames differ by <1%.
53
+ self.prev_frame = gray
54
+
55
+ # Convert diff map to binary mask
56
+ diff_binary = ((1.0 - diff_map) * 255).astype(np.uint8)
57
+ mask = diff_binary > 30 # threshold for "changed" pixels
58
+
59
+ # Find contours via connected components
60
+ from skimage.measure import label, regionprops
61
+ labeled = label(mask)
62
+ regions = regionprops(labeled)
63
+
64
+ # Filter by area
65
+ contours = []
66
+ for region in regions:
67
+ if region.area >= self.min_area:
68
+ contours.append(region.coords)
69
+
70
+ if not contours:
71
+ return None
72
+
73
+ # Compute merged bounding box
74
+ all_coords = np.vstack(contours)
75
+ min_y, min_x = all_coords.min(axis=0)
76
+ max_y, max_x = all_coords.max(axis=0)
77
+ bbox = (int(min_x), int(min_y), int(max_x - min_x), int(max_y - min_y))
78
+
79
+ diff_img = Image.fromarray(diff_binary)
80
+
81
+ return ChangeResult(
82
+ ssim_score=score,
83
+ diff_image=diff_img,
84
+ contours=contours,
85
+ bbox=bbox,
86
+ )
@@ -0,0 +1,64 @@
1
+ """Configuration loader for sense_client."""
2
+ from __future__ import annotations
3
+
4
+ import json
5
+ import os
6
+ import sys
7
+
8
+ DEFAULTS = {
9
+ "capture": {
10
+ "mode": "screen",
11
+ "target": 0,
12
+ "fps": 2.0,
13
+ "scale": 0.5,
14
+ },
15
+ "detection": {
16
+ "ssimThreshold": 0.92,
17
+ "minArea": 100,
18
+ "roiPadding": 20,
19
+ "cooldownMs": 5000,
20
+ },
21
+ "ocr": {
22
+ "enabled": True,
23
+ "backend": "auto",
24
+ "languages": ["en", "ru"],
25
+ "lang": "eng",
26
+ "psm": 11,
27
+ "minConfidence": 50,
28
+ },
29
+ "gate": {
30
+ "minOcrChars": 20,
31
+ "majorChangeThreshold": 0.85,
32
+ "cooldownMs": 5000,
33
+ "adaptiveCooldownMs": 2000,
34
+ "contextCooldownMs": 10000,
35
+ },
36
+ "relay": {
37
+ "url": "http://localhost:9500",
38
+ "sendThumbnails": True,
39
+ "maxImageKB": 500,
40
+ },
41
+ "optimization": {
42
+ "backpressure": False,
43
+ "textDedup": False,
44
+ "visionRegionOfInterest": False,
45
+ "shadowValidation": False,
46
+ },
47
+ }
48
+
49
+
50
+ def load_config(path: str | None = None) -> dict:
51
+ """Load config from JSON file, merge with defaults."""
52
+ config = json.loads(json.dumps(DEFAULTS)) # deep copy
53
+ if path and os.path.exists(path):
54
+ try:
55
+ with open(path) as f:
56
+ user = json.load(f)
57
+ for section, values in user.items():
58
+ if section in config and isinstance(values, dict):
59
+ config[section].update(values)
60
+ else:
61
+ config[section] = values
62
+ except (json.JSONDecodeError, ValueError):
63
+ pass # use defaults
64
+ return config
@@ -0,0 +1,145 @@
1
+ """Decision gate — classifies sense events and decides what to send."""
2
+ from __future__ import annotations
3
+
4
+ import difflib
5
+ import time
6
+ from collections import deque
7
+ from dataclasses import dataclass, field
8
+
9
+ from .change_detector import ChangeResult
10
+ from .ocr import OCRResult
11
+
12
+
13
+ @dataclass
14
+ class SenseMeta:
15
+ ssim: float = 0.0
16
+ app: str = ""
17
+ window_title: str = ""
18
+ screen: int = 0
19
+
20
+
21
+ @dataclass
22
+ class SenseObservation:
23
+ """Structured observation fields (claude-mem compatible schema).
24
+
25
+ Populated by sinain-core's agent layer, not by sense_client.
26
+ sense_client sets `title` and `facts` from OCR/app context;
27
+ sinain-core enriches with `narrative` and `concepts`.
28
+ """
29
+ title: str = ""
30
+ subtitle: str = ""
31
+ facts: list[str] = field(default_factory=list)
32
+ narrative: str = ""
33
+ concepts: list[str] = field(default_factory=list)
34
+
35
+
36
+ @dataclass
37
+ class SenseEvent:
38
+ type: str # "text" | "visual" | "context"
39
+ ts: float = 0.0
40
+ ocr: str = ""
41
+ roi: dict | None = None
42
+ diff: dict | None = None
43
+ meta: SenseMeta = field(default_factory=SenseMeta)
44
+ observation: SenseObservation = field(default_factory=SenseObservation)
45
+
46
+
47
+ class DecisionGate:
48
+ """Classifies sense events and decides what to send."""
49
+
50
+ def __init__(self, min_ocr_chars: int = 20,
51
+ major_change_threshold: float = 0.85,
52
+ cooldown_ms: int = 5000,
53
+ adaptive_cooldown_ms: int = 2000,
54
+ context_cooldown_ms: int = 10000):
55
+ self.min_ocr_chars = min_ocr_chars
56
+ self.major_change_threshold = major_change_threshold
57
+ self.cooldown_ms = cooldown_ms
58
+ self.adaptive_cooldown_ms = adaptive_cooldown_ms
59
+ self.context_cooldown_ms = context_cooldown_ms
60
+ self.last_send_ts: float = 0
61
+ self.last_context_ts: float = 0
62
+ self.last_app_change_ts: float = 0
63
+ # Fuzzy dedup: ring buffer of last 5 OCR texts
64
+ self._recent_texts: deque[str] = deque(maxlen=5)
65
+ self._last_sent_text: str = ""
66
+
67
+ def is_ready(self, app_changed: bool, window_changed: bool) -> bool:
68
+ """Time-based readiness check without consuming OCR output.
69
+
70
+ Used by backpressure scheduling to decide whether to run OCR at all.
71
+ """
72
+ if app_changed or window_changed:
73
+ return True
74
+ now = time.time() * 1000
75
+ recent = (now - self.last_app_change_ts) < 10000
76
+ cooldown = self.adaptive_cooldown_ms if recent else self.cooldown_ms
77
+ return now - self.last_send_ts >= cooldown
78
+
79
+ def _is_duplicate(self, text: str) -> bool:
80
+ """Check if text is too similar to any recently sent text."""
81
+ if text == self._last_sent_text:
82
+ return True
83
+ for prev in self._recent_texts:
84
+ ratio = difflib.SequenceMatcher(None, prev, text).ratio()
85
+ if ratio > 0.7:
86
+ return True
87
+ return False
88
+
89
+ @staticmethod
90
+ def _ocr_quality_ok(text: str) -> bool:
91
+ """Reject garbage OCR: >50% single-char tokens or <50% alphanumeric."""
92
+ tokens = text.split()
93
+ if not tokens:
94
+ return False
95
+ single_char = sum(1 for t in tokens if len(t) == 1)
96
+ if single_char / len(tokens) > 0.5:
97
+ return False
98
+ alnum = sum(1 for ch in text if ch.isalnum())
99
+ total = len(text.replace(" ", ""))
100
+ if total > 0 and alnum / total < 0.5:
101
+ return False
102
+ return True
103
+
104
+ def classify(self, change: ChangeResult | None,
105
+ ocr: OCRResult, app_changed: bool,
106
+ window_changed: bool = False) -> SenseEvent | None:
107
+ """Returns SenseEvent to send, or None to drop."""
108
+ now = time.time() * 1000
109
+
110
+ # Context events (app/window change) bypass normal cooldown
111
+ if app_changed or window_changed:
112
+ self.last_app_change_ts = now
113
+ if now - self.last_context_ts >= self.context_cooldown_ms:
114
+ self.last_context_ts = now
115
+ self.last_send_ts = now
116
+ return SenseEvent(type="context", ts=now)
117
+
118
+ # Adaptive cooldown: 2s after recent app switch, 5s otherwise
119
+ recent_app_change = (now - self.last_app_change_ts) < 10000
120
+ effective_cooldown = self.adaptive_cooldown_ms if recent_app_change else self.cooldown_ms
121
+ if now - self.last_send_ts < effective_cooldown:
122
+ return None
123
+
124
+ if change is None:
125
+ return None
126
+
127
+ # OCR text sufficient -> text event
128
+ if ocr.text and len(ocr.text) >= self.min_ocr_chars:
129
+ if self._is_duplicate(ocr.text):
130
+ return None
131
+ if not self._ocr_quality_ok(ocr.text):
132
+ return None
133
+ self._recent_texts.append(ocr.text)
134
+ self._last_sent_text = ocr.text
135
+ self.last_send_ts = now
136
+ return SenseEvent(type="text", ts=now, ocr=ocr.text,
137
+ meta=SenseMeta(ssim=change.ssim_score))
138
+
139
+ # Major visual change -> visual event
140
+ if change.ssim_score < self.major_change_threshold:
141
+ self.last_send_ts = now
142
+ return SenseEvent(type="visual", ts=now, ocr=ocr.text,
143
+ meta=SenseMeta(ssim=change.ssim_score))
144
+
145
+ return None