@geravant/sinain 1.0.18 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. package/README.md +10 -1
  2. package/cli.js +176 -0
  3. package/index.ts +163 -1257
  4. package/install.js +12 -2
  5. package/launcher.js +622 -0
  6. package/openclaw.plugin.json +4 -0
  7. package/pack-prepare.js +48 -0
  8. package/package.json +26 -5
  9. package/sense_client/README.md +82 -0
  10. package/sense_client/__init__.py +1 -0
  11. package/sense_client/__main__.py +462 -0
  12. package/sense_client/app_detector.py +54 -0
  13. package/sense_client/app_detector_win.py +83 -0
  14. package/sense_client/capture.py +215 -0
  15. package/sense_client/capture_win.py +88 -0
  16. package/sense_client/change_detector.py +86 -0
  17. package/sense_client/config.py +64 -0
  18. package/sense_client/gate.py +145 -0
  19. package/sense_client/ocr.py +347 -0
  20. package/sense_client/privacy.py +65 -0
  21. package/sense_client/requirements.txt +13 -0
  22. package/sense_client/roi_extractor.py +84 -0
  23. package/sense_client/sender.py +173 -0
  24. package/sense_client/tests/__init__.py +0 -0
  25. package/sense_client/tests/test_stream1_optimizations.py +234 -0
  26. package/setup-overlay.js +82 -0
  27. package/sinain-agent/.env.example +17 -0
  28. package/sinain-agent/CLAUDE.md +80 -0
  29. package/sinain-agent/mcp-config.json +12 -0
  30. package/sinain-agent/run.sh +248 -0
  31. package/sinain-core/.env.example +93 -0
  32. package/sinain-core/package-lock.json +552 -0
  33. package/sinain-core/package.json +21 -0
  34. package/sinain-core/src/agent/analyzer.ts +366 -0
  35. package/sinain-core/src/agent/context-window.ts +172 -0
  36. package/sinain-core/src/agent/loop.ts +404 -0
  37. package/sinain-core/src/agent/situation-writer.ts +187 -0
  38. package/sinain-core/src/agent/traits.ts +520 -0
  39. package/sinain-core/src/audio/capture-spawner-macos.ts +44 -0
  40. package/sinain-core/src/audio/capture-spawner-win.ts +37 -0
  41. package/sinain-core/src/audio/capture-spawner.ts +14 -0
  42. package/sinain-core/src/audio/pipeline.ts +335 -0
  43. package/sinain-core/src/audio/transcription-local.ts +141 -0
  44. package/sinain-core/src/audio/transcription.ts +278 -0
  45. package/sinain-core/src/buffers/feed-buffer.ts +71 -0
  46. package/sinain-core/src/buffers/sense-buffer.ts +425 -0
  47. package/sinain-core/src/config.ts +245 -0
  48. package/sinain-core/src/escalation/escalation-slot.ts +136 -0
  49. package/sinain-core/src/escalation/escalator.ts +812 -0
  50. package/sinain-core/src/escalation/message-builder.ts +323 -0
  51. package/sinain-core/src/escalation/openclaw-ws.ts +726 -0
  52. package/sinain-core/src/escalation/scorer.ts +166 -0
  53. package/sinain-core/src/index.ts +507 -0
  54. package/sinain-core/src/learning/feedback-store.ts +253 -0
  55. package/sinain-core/src/learning/signal-collector.ts +218 -0
  56. package/sinain-core/src/log.ts +24 -0
  57. package/sinain-core/src/overlay/commands.ts +126 -0
  58. package/sinain-core/src/overlay/ws-handler.ts +267 -0
  59. package/sinain-core/src/privacy/index.ts +18 -0
  60. package/sinain-core/src/privacy/presets.ts +40 -0
  61. package/sinain-core/src/privacy/redact.ts +92 -0
  62. package/sinain-core/src/profiler.ts +181 -0
  63. package/sinain-core/src/recorder.ts +186 -0
  64. package/sinain-core/src/server.ts +417 -0
  65. package/sinain-core/src/trace/trace-store.ts +73 -0
  66. package/sinain-core/src/trace/tracer.ts +94 -0
  67. package/sinain-core/src/types.ts +427 -0
  68. package/sinain-core/src/util/dedup.ts +48 -0
  69. package/sinain-core/src/util/task-store.ts +84 -0
  70. package/sinain-core/tsconfig.json +18 -0
  71. package/sinain-knowledge/adapters/generic/adapter.ts +103 -0
  72. package/sinain-knowledge/adapters/interface.ts +72 -0
  73. package/sinain-knowledge/adapters/openclaw/adapter.ts +223 -0
  74. package/sinain-knowledge/curation/engine.ts +493 -0
  75. package/sinain-knowledge/curation/resilience.ts +336 -0
  76. package/sinain-knowledge/data/git-store.ts +312 -0
  77. package/sinain-knowledge/data/schema.ts +89 -0
  78. package/sinain-knowledge/data/snapshot.ts +226 -0
  79. package/sinain-knowledge/data/store.ts +488 -0
  80. package/sinain-knowledge/deploy/cli.ts +214 -0
  81. package/sinain-knowledge/deploy/manifest.ts +80 -0
  82. package/sinain-knowledge/protocol/bindings/generic.md +5 -0
  83. package/sinain-knowledge/protocol/bindings/openclaw.md +5 -0
  84. package/sinain-knowledge/protocol/heartbeat.md +62 -0
  85. package/sinain-knowledge/protocol/renderer.ts +56 -0
  86. package/sinain-knowledge/protocol/skill.md +335 -0
  87. package/sinain-mcp-server/index.ts +337 -0
  88. package/sinain-mcp-server/package.json +19 -0
  89. package/sinain-mcp-server/tsconfig.json +15 -0
@@ -0,0 +1,54 @@
1
+ """Detect the frontmost application and window title (cross-platform)."""
2
+
3
+ import subprocess
4
+ import sys
5
+
6
+
7
+ class MacAppDetector:
8
+ """Detects the frontmost application and window title on macOS via AppleScript."""
9
+
10
+ def __init__(self):
11
+ self._last_app: str = ""
12
+ self._last_window: str = ""
13
+
14
+ def get_active_app(self) -> tuple[str, str]:
15
+ """Returns (app_name, window_title) of the frontmost application."""
16
+ try:
17
+ result = subprocess.run(
18
+ [
19
+ "osascript", "-e",
20
+ 'tell application "System Events"\n'
21
+ ' set appProc to first application process whose frontmost is true\n'
22
+ ' set appName to name of appProc\n'
23
+ ' set winTitle to ""\n'
24
+ ' try\n'
25
+ ' set winTitle to name of front window of appProc\n'
26
+ ' end try\n'
27
+ ' return appName & "|||" & winTitle\n'
28
+ 'end tell',
29
+ ],
30
+ capture_output=True, text=True, timeout=2,
31
+ )
32
+ parts = result.stdout.strip().split("|||", 1)
33
+ app_name = parts[0].strip() if parts else ""
34
+ window_title = parts[1].strip() if len(parts) > 1 else ""
35
+ return app_name, window_title
36
+ except Exception:
37
+ return "", ""
38
+
39
+ def detect_change(self) -> tuple[bool, bool, str, str]:
40
+ """Returns (app_changed, window_changed, app_name, window_title)."""
41
+ app, window = self.get_active_app()
42
+ app_changed = app != self._last_app and self._last_app != ""
43
+ window_changed = window != self._last_window and self._last_window != ""
44
+ self._last_app = app
45
+ self._last_window = window
46
+ return app_changed, window_changed, app, window
47
+
48
+
49
+ def AppDetector():
50
+ """Factory: returns the platform-appropriate app detector."""
51
+ if sys.platform == "win32":
52
+ from .app_detector_win import WinAppDetector
53
+ return WinAppDetector()
54
+ return MacAppDetector()
@@ -0,0 +1,83 @@
1
+ """Detect the foreground application and window title on Windows."""
2
+ from __future__ import annotations
3
+
4
+ import ctypes
5
+ import ctypes.wintypes
6
+
7
+
8
+ class WinAppDetector:
9
+ """Detects the foreground application and window title on Windows.
10
+
11
+ Uses Win32 API: GetForegroundWindow, GetWindowTextW, and
12
+ psutil/GetWindowThreadProcessId for process name resolution.
13
+ """
14
+
15
+ def __init__(self):
16
+ self._last_app: str = ""
17
+ self._last_window: str = ""
18
+ self._user32 = ctypes.windll.user32
19
+ self._kernel32 = ctypes.windll.kernel32
20
+
21
+ def get_active_app(self) -> tuple[str, str]:
22
+ """Returns (app_name, window_title) of the foreground window."""
23
+ try:
24
+ hwnd = self._user32.GetForegroundWindow()
25
+ if not hwnd:
26
+ return "", ""
27
+
28
+ # Get window title
29
+ length = self._user32.GetWindowTextLengthW(hwnd)
30
+ buf = ctypes.create_unicode_buffer(length + 1)
31
+ self._user32.GetWindowTextW(hwnd, buf, length + 1)
32
+ window_title = buf.value
33
+
34
+ # Get process name via PID
35
+ pid = ctypes.wintypes.DWORD()
36
+ self._user32.GetWindowThreadProcessId(hwnd, ctypes.byref(pid))
37
+ app_name = self._get_process_name(pid.value)
38
+
39
+ return app_name, window_title
40
+ except Exception:
41
+ return "", ""
42
+
43
+ def _get_process_name(self, pid: int) -> str:
44
+ """Get process executable name from PID."""
45
+ try:
46
+ # Try psutil first (more reliable)
47
+ import psutil
48
+ proc = psutil.Process(pid)
49
+ return proc.name().replace(".exe", "")
50
+ except Exception:
51
+ pass
52
+
53
+ # Fallback: OpenProcess + GetModuleBaseNameW
54
+ try:
55
+ PROCESS_QUERY_INFORMATION = 0x0400
56
+ PROCESS_VM_READ = 0x0010
57
+ handle = self._kernel32.OpenProcess(
58
+ PROCESS_QUERY_INFORMATION | PROCESS_VM_READ, False, pid
59
+ )
60
+ if not handle:
61
+ return ""
62
+
63
+ try:
64
+ psapi = ctypes.windll.psapi
65
+ buf = ctypes.create_unicode_buffer(260)
66
+ psapi.GetModuleBaseNameW(handle, None, buf, 260)
67
+ name = buf.value
68
+ if name.lower().endswith(".exe"):
69
+ name = name[:-4]
70
+ return name
71
+ finally:
72
+ self._kernel32.CloseHandle(handle)
73
+ except Exception:
74
+ return ""
75
+
76
+ def detect_change(self) -> tuple[bool, bool, str, str]:
77
+ """Returns (app_changed, window_changed, app_name, window_title)."""
78
+ app, window = self.get_active_app()
79
+ app_changed = app != self._last_app and self._last_app != ""
80
+ window_changed = window != self._last_window and self._last_window != ""
81
+ self._last_app = app
82
+ self._last_window = window
83
+ return app_changed, window_changed, app, window
@@ -0,0 +1,215 @@
1
+ """Screen capture via IPC from sck-capture (preferred) or CoreGraphics fallback.
2
+
3
+ On Windows, uses mss (DXGI Desktop Duplication) via capture_win.py.
4
+ """
5
+ from __future__ import annotations
6
+
7
+ import json
8
+ import os
9
+ import sys
10
+ import time
11
+ from typing import Generator
12
+
13
+ from PIL import Image
14
+
15
+ # macOS-only imports — deferred so the module loads on Windows
16
+ if sys.platform == "darwin":
17
+ import Quartz
18
+
19
+
20
+ class ScreenCapture:
21
+ """Captures screen frames via CGDisplayCreateImage (CoreGraphics/IOSurface).
22
+
23
+ Uses Quartz CGDisplayCreateImage instead of the screencapture CLI.
24
+ This avoids CoreMediaIO/ScreenCaptureKit, which blocks camera access
25
+ for other apps (e.g. Google Meet) on macOS 14+.
26
+ """
27
+
28
+ def __init__(self, mode: str = "screen", target: int = 0,
29
+ fps: float = 1, scale: float = 0.5):
30
+ self.mode = mode
31
+ self.target = target
32
+ self.fps = fps
33
+ self.scale = scale
34
+ self.stats_ok = 0
35
+ self.stats_fail = 0
36
+ self._last_stats_time = time.time()
37
+ self._stats_interval = 60 # log stats every 60s
38
+ self._display_id = Quartz.CGMainDisplayID()
39
+
40
+ def capture_frame(self) -> tuple[Image.Image, float]:
41
+ """Returns (PIL Image, timestamp).
42
+ Uses CGDisplayCreateImage for zero-subprocess, camera-safe capture.
43
+ Downscales by self.scale factor before returning.
44
+ """
45
+ ts = time.time()
46
+ cg_image = Quartz.CGDisplayCreateImage(self._display_id)
47
+ if cg_image is None:
48
+ self.stats_fail += 1
49
+ raise RuntimeError("CGDisplayCreateImage returned None")
50
+
51
+ try:
52
+ width = Quartz.CGImageGetWidth(cg_image)
53
+ height = Quartz.CGImageGetHeight(cg_image)
54
+ bytes_per_row = Quartz.CGImageGetBytesPerRow(cg_image)
55
+
56
+ # Get raw pixel data from CGImage
57
+ data_provider = Quartz.CGImageGetDataProvider(cg_image)
58
+ raw_data = Quartz.CGDataProviderCopyData(data_provider)
59
+ finally:
60
+ # Explicitly release CGImage and its IOSurface handle immediately.
61
+ # At continuous capture rates, unreleased handles cause GPU/camera
62
+ # contention because the camera shares IOSurface infrastructure.
63
+ del cg_image
64
+
65
+ # CGDisplayCreateImage returns BGRA (premultiplied alpha, 32Little)
66
+ img = Image.frombytes("RGBA", (width, height), raw_data,
67
+ "raw", "BGRA", bytes_per_row, 1)
68
+
69
+ if self.scale != 1.0:
70
+ new_w = int(width * self.scale)
71
+ new_h = int(height * self.scale)
72
+ img = img.resize((new_w, new_h), Image.LANCZOS)
73
+
74
+ self.stats_ok += 1
75
+ return img, ts
76
+
77
+ def capture_loop(self) -> Generator[tuple[Image.Image, float], None, None]:
78
+ """Yields frames at self.fps rate."""
79
+ interval = 1.0 / self.fps
80
+ while True:
81
+ start = time.time()
82
+ try:
83
+ yield self.capture_frame()
84
+ except Exception as e:
85
+ print(f"[capture] error: {e}")
86
+ self._maybe_log_stats()
87
+ elapsed = time.time() - start
88
+ sleep_time = interval - elapsed
89
+ if sleep_time > 0:
90
+ time.sleep(sleep_time)
91
+
92
+ def _maybe_log_stats(self):
93
+ now = time.time()
94
+ if now - self._last_stats_time >= self._stats_interval:
95
+ total = self.stats_ok + self.stats_fail
96
+ rate = (self.stats_ok / total * 100) if total > 0 else 0
97
+ print(f"[capture] stats: {self.stats_ok} ok, {self.stats_fail} fail"
98
+ f" ({rate:.0f}% success, {total} total)")
99
+ if self.stats_fail > 0 and self.stats_ok == 0:
100
+ print("[capture] WARNING: all captures failing — check screen recording permissions")
101
+ self._last_stats_time = now
102
+
103
+ class ScreenKitCapture:
104
+ """Reads JPEG frames written by sck-capture via IPC (~/.sinain/capture/)."""
105
+
106
+ FRAME_PATH = os.path.expanduser("~/.sinain/capture/frame.jpg")
107
+ META_PATH = os.path.expanduser("~/.sinain/capture/meta.json")
108
+ STALE_THRESHOLD = 1.0 # seconds
109
+
110
+ def __init__(self, fps: float = 1, scale: float = 1.0, **kwargs):
111
+ self.fps = fps
112
+ self.scale = scale
113
+ self.stats_ok = 0
114
+ self.stats_fail = 0
115
+ self._last_frame_ts = 0.0
116
+ self._last_stats_time = time.time()
117
+ self._stats_interval = 60
118
+
119
+ @classmethod
120
+ def is_available(cls) -> bool:
121
+ """Check if fresh frames exist from the overlay app."""
122
+ try:
123
+ if not os.path.exists(cls.FRAME_PATH):
124
+ return False
125
+ mtime = os.path.getmtime(cls.FRAME_PATH)
126
+ return (time.time() - mtime) < cls.STALE_THRESHOLD
127
+ except OSError:
128
+ return False
129
+
130
+ def capture_frame(self) -> tuple[Image.Image, float] | None:
131
+ """Read the latest frame from IPC.
132
+
133
+ Returns (PIL Image, timestamp) or None if frame is stale/duplicate.
134
+ """
135
+ try:
136
+ if not os.path.exists(self.FRAME_PATH):
137
+ return None
138
+
139
+ # Read metadata for precise timestamp
140
+ ts = time.time()
141
+ if os.path.exists(self.META_PATH):
142
+ try:
143
+ with open(self.META_PATH) as f:
144
+ meta = json.load(f)
145
+ ts = meta.get("timestamp", ts)
146
+ except (json.JSONDecodeError, OSError):
147
+ pass
148
+
149
+ # Skip duplicate frames
150
+ if ts == self._last_frame_ts:
151
+ return None
152
+
153
+ img = Image.open(self.FRAME_PATH)
154
+ img.load() # Force full read before file can be overwritten
155
+
156
+ if self.scale != 1.0:
157
+ new_w = int(img.width * self.scale)
158
+ new_h = int(img.height * self.scale)
159
+ img = img.resize((new_w, new_h), Image.LANCZOS)
160
+
161
+ self._last_frame_ts = ts
162
+ self.stats_ok += 1
163
+ return img, ts
164
+
165
+ except Exception as e:
166
+ self.stats_fail += 1
167
+ print(f"[capture-screenkit] error: {e}")
168
+ return None
169
+
170
+ def capture_loop(self) -> Generator[tuple[Image.Image, float], None, None]:
171
+ """Yields frames at self.fps rate, same interface as ScreenCapture."""
172
+ interval = 1.0 / self.fps
173
+ while True:
174
+ start = time.time()
175
+ result = self.capture_frame()
176
+ if result is not None:
177
+ yield result
178
+ self._maybe_log_stats()
179
+ elapsed = time.time() - start
180
+ sleep_time = interval - elapsed
181
+ if sleep_time > 0:
182
+ time.sleep(sleep_time)
183
+
184
+ def _maybe_log_stats(self):
185
+ now = time.time()
186
+ if now - self._last_stats_time >= self._stats_interval:
187
+ total = self.stats_ok + self.stats_fail
188
+ rate = (self.stats_ok / total * 100) if total > 0 else 0
189
+ print(f"[capture-screenkit] stats: {self.stats_ok} ok, {self.stats_fail} fail"
190
+ f" ({rate:.0f}% success, {total} total)")
191
+ self._last_stats_time = now
192
+
193
+
194
+ def create_capture(mode: str = "screen", target: int = 0,
195
+ fps: float = 1, scale: float = 0.5):
196
+ """Factory: platform-dispatched screen capture backend.
197
+
198
+ macOS: ScreenKitCapture (IPC from sck-capture) → ScreenCapture (CoreGraphics).
199
+ Windows: WinScreenCapture (mss / DXGI Desktop Duplication).
200
+ """
201
+ if sys.platform == "win32":
202
+ from .capture_win import WinScreenCapture
203
+ print("[capture] Using mss (DXGI Desktop Duplication)")
204
+ return WinScreenCapture(mode=mode, target=target, fps=fps, scale=scale)
205
+
206
+ # macOS path
207
+ # 1. IPC from sck-capture (primary — Swift binary writes JPEG frames)
208
+ if ScreenKitCapture.is_available():
209
+ print("[capture] Using ScreenCaptureKit (sck-capture IPC)")
210
+ return ScreenKitCapture(fps=fps, scale=1.0) # scale handled by sck-capture
211
+
212
+ # 2. CGDisplayCreateImage (legacy fallback for macOS < 13)
213
+ print("[capture] Using CoreGraphics (CGDisplayCreateImage)")
214
+ print("[capture] WARNING: CGDisplayCreateImage may cause camera conflicts on macOS 14+")
215
+ return ScreenCapture(mode=mode, target=target, fps=fps, scale=scale)
@@ -0,0 +1,88 @@
1
+ """Screen capture on Windows via mss (DXGI Desktop Duplication)."""
2
+ from __future__ import annotations
3
+
4
+ import time
5
+ from typing import Generator
6
+
7
+ from PIL import Image
8
+
9
+ try:
10
+ import mss
11
+ except ImportError:
12
+ mss = None
13
+
14
+
15
+ class WinScreenCapture:
16
+ """Captures screen frames on Windows using mss (DXGI Desktop Duplication).
17
+
18
+ Same interface as ScreenCapture/ScreenKitCapture on macOS:
19
+ capture_frame() -> (Image, float) and capture_loop() -> Generator.
20
+ """
21
+
22
+ def __init__(self, mode: str = "screen", target: int = 0,
23
+ fps: float = 1, scale: float = 0.5):
24
+ if mss is None:
25
+ raise RuntimeError("mss library required for Windows capture: pip install mss")
26
+ self.mode = mode
27
+ self.target = target # monitor index (0 = all, 1 = primary, etc.)
28
+ self.fps = fps
29
+ self.scale = scale
30
+ self.stats_ok = 0
31
+ self.stats_fail = 0
32
+ self._last_stats_time = time.time()
33
+ self._stats_interval = 60
34
+ self._sct = mss.mss()
35
+
36
+ def capture_frame(self) -> tuple[Image.Image, float]:
37
+ """Returns (PIL Image, timestamp).
38
+ Uses mss for DXGI-based capture. Downscales by self.scale factor.
39
+ """
40
+ ts = time.time()
41
+
42
+ # mss monitors: index 0 = all monitors combined, 1+ = individual
43
+ monitor_idx = self.target + 1 if self.target >= 0 else 1
44
+ if monitor_idx >= len(self._sct.monitors):
45
+ monitor_idx = 1 # fallback to primary
46
+
47
+ monitor = self._sct.monitors[monitor_idx]
48
+ screenshot = self._sct.grab(monitor)
49
+
50
+ # Convert to PIL Image (mss returns BGRA)
51
+ img = Image.frombytes("RGB", screenshot.size, screenshot.rgb)
52
+
53
+ if self.scale != 1.0:
54
+ new_w = int(img.width * self.scale)
55
+ new_h = int(img.height * self.scale)
56
+ img = img.resize((new_w, new_h), Image.LANCZOS)
57
+
58
+ self.stats_ok += 1
59
+ if self.stats_ok == 1:
60
+ print(f"[capture-win] first frame: {img.width}x{img.height} "
61
+ f"(monitor={monitor['width']}x{monitor['height']}, scale={self.scale})",
62
+ flush=True)
63
+ return img, ts
64
+
65
+ def capture_loop(self) -> Generator[tuple[Image.Image, float], None, None]:
66
+ """Yields frames at self.fps rate."""
67
+ interval = 1.0 / self.fps
68
+ while True:
69
+ start = time.time()
70
+ try:
71
+ yield self.capture_frame()
72
+ except Exception as e:
73
+ self.stats_fail += 1
74
+ print(f"[capture-win] error: {e}", flush=True)
75
+ self._maybe_log_stats()
76
+ elapsed = time.time() - start
77
+ sleep_time = interval - elapsed
78
+ if sleep_time > 0:
79
+ time.sleep(sleep_time)
80
+
81
+ def _maybe_log_stats(self):
82
+ now = time.time()
83
+ if now - self._last_stats_time >= self._stats_interval:
84
+ total = self.stats_ok + self.stats_fail
85
+ rate = (self.stats_ok / total * 100) if total > 0 else 0
86
+ print(f"[capture-win] stats: {self.stats_ok} ok, {self.stats_fail} fail"
87
+ f" ({rate:.0f}% success, {total} total)", flush=True)
88
+ self._last_stats_time = now
@@ -0,0 +1,86 @@
1
+ """SSIM-based frame change detection."""
2
+ from __future__ import annotations
3
+
4
+ from dataclasses import dataclass
5
+
6
+ import numpy as np
7
+ from PIL import Image
8
+ from skimage.metrics import structural_similarity
9
+
10
+
11
+ @dataclass
12
+ class ChangeResult:
13
+ ssim_score: float
14
+ diff_image: Image.Image
15
+ contours: list # list of (y, x) coordinate arrays
16
+ bbox: tuple[int, int, int, int] # (x, y, w, h)
17
+
18
+
19
+ class ChangeDetector:
20
+ """SSIM-based frame change detection."""
21
+
22
+ def __init__(self, threshold: float = 0.95, min_area: int = 100):
23
+ self.threshold = threshold
24
+ self.min_area = min_area
25
+ self.prev_frame: np.ndarray | None = None
26
+
27
+ def set_threshold(self, threshold: float) -> None:
28
+ """Dynamically adjust the SSIM change threshold."""
29
+ self.threshold = threshold
30
+
31
+ def detect(self, frame: Image.Image) -> ChangeResult | None:
32
+ """Compare frame to previous. Returns ChangeResult if significant."""
33
+ gray = np.array(frame.convert("L"))
34
+
35
+ if self.prev_frame is None:
36
+ self.prev_frame = gray
37
+ return None
38
+
39
+ if gray.shape != self.prev_frame.shape:
40
+ self.prev_frame = gray
41
+ return None
42
+
43
+ score, diff_map = structural_similarity(
44
+ self.prev_frame, gray, full=True
45
+ )
46
+
47
+ if score >= self.threshold:
48
+ return None
49
+
50
+ # Keyframe update: only advance prev_frame when change IS detected.
51
+ # This lets diffs accumulate against the last accepted keyframe,
52
+ # which is essential at high FPS where consecutive frames differ by <1%.
53
+ self.prev_frame = gray
54
+
55
+ # Convert diff map to binary mask
56
+ diff_binary = ((1.0 - diff_map) * 255).astype(np.uint8)
57
+ mask = diff_binary > 30 # threshold for "changed" pixels
58
+
59
+ # Find contours via connected components
60
+ from skimage.measure import label, regionprops
61
+ labeled = label(mask)
62
+ regions = regionprops(labeled)
63
+
64
+ # Filter by area
65
+ contours = []
66
+ for region in regions:
67
+ if region.area >= self.min_area:
68
+ contours.append(region.coords)
69
+
70
+ if not contours:
71
+ return None
72
+
73
+ # Compute merged bounding box
74
+ all_coords = np.vstack(contours)
75
+ min_y, min_x = all_coords.min(axis=0)
76
+ max_y, max_x = all_coords.max(axis=0)
77
+ bbox = (int(min_x), int(min_y), int(max_x - min_x), int(max_y - min_y))
78
+
79
+ diff_img = Image.fromarray(diff_binary)
80
+
81
+ return ChangeResult(
82
+ ssim_score=score,
83
+ diff_image=diff_img,
84
+ contours=contours,
85
+ bbox=bbox,
86
+ )
@@ -0,0 +1,64 @@
1
+ """Configuration loader for sense_client."""
2
+ from __future__ import annotations
3
+
4
+ import json
5
+ import os
6
+ import sys
7
+
8
+ DEFAULTS = {
9
+ "capture": {
10
+ "mode": "screen",
11
+ "target": 0,
12
+ "fps": 2.0,
13
+ "scale": 0.5,
14
+ },
15
+ "detection": {
16
+ "ssimThreshold": 0.92,
17
+ "minArea": 100,
18
+ "roiPadding": 20,
19
+ "cooldownMs": 5000,
20
+ },
21
+ "ocr": {
22
+ "enabled": True,
23
+ "backend": "auto",
24
+ "languages": ["en", "ru"],
25
+ "lang": "eng",
26
+ "psm": 11,
27
+ "minConfidence": 50,
28
+ },
29
+ "gate": {
30
+ "minOcrChars": 20,
31
+ "majorChangeThreshold": 0.85,
32
+ "cooldownMs": 5000,
33
+ "adaptiveCooldownMs": 2000,
34
+ "contextCooldownMs": 10000,
35
+ },
36
+ "relay": {
37
+ "url": "http://localhost:9500",
38
+ "sendThumbnails": True,
39
+ "maxImageKB": 500,
40
+ },
41
+ "optimization": {
42
+ "backpressure": False,
43
+ "textDedup": False,
44
+ "visionRegionOfInterest": False,
45
+ "shadowValidation": False,
46
+ },
47
+ }
48
+
49
+
50
+ def load_config(path: str | None = None) -> dict:
51
+ """Load config from JSON file, merge with defaults."""
52
+ config = json.loads(json.dumps(DEFAULTS)) # deep copy
53
+ if path and os.path.exists(path):
54
+ try:
55
+ with open(path) as f:
56
+ user = json.load(f)
57
+ for section, values in user.items():
58
+ if section in config and isinstance(values, dict):
59
+ config[section].update(values)
60
+ else:
61
+ config[section] = values
62
+ except (json.JSONDecodeError, ValueError):
63
+ pass # use defaults
64
+ return config