@geravant/sinain 1.0.19 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +10 -1
- package/cli.js +176 -0
- package/index.ts +4 -2
- package/install.js +89 -14
- package/launcher.js +622 -0
- package/openclaw.plugin.json +4 -0
- package/pack-prepare.js +48 -0
- package/package.json +24 -5
- package/sense_client/README.md +82 -0
- package/sense_client/__init__.py +1 -0
- package/sense_client/__main__.py +462 -0
- package/sense_client/app_detector.py +54 -0
- package/sense_client/app_detector_win.py +83 -0
- package/sense_client/capture.py +215 -0
- package/sense_client/capture_win.py +88 -0
- package/sense_client/change_detector.py +86 -0
- package/sense_client/config.py +64 -0
- package/sense_client/gate.py +145 -0
- package/sense_client/ocr.py +347 -0
- package/sense_client/privacy.py +65 -0
- package/sense_client/requirements.txt +13 -0
- package/sense_client/roi_extractor.py +84 -0
- package/sense_client/sender.py +173 -0
- package/sense_client/tests/__init__.py +0 -0
- package/sense_client/tests/test_stream1_optimizations.py +234 -0
- package/setup-overlay.js +82 -0
- package/sinain-agent/.env.example +17 -0
- package/sinain-agent/CLAUDE.md +87 -0
- package/sinain-agent/mcp-config.json +12 -0
- package/sinain-agent/run.sh +248 -0
- package/sinain-core/.env.example +93 -0
- package/sinain-core/package-lock.json +552 -0
- package/sinain-core/package.json +21 -0
- package/sinain-core/src/agent/analyzer.ts +366 -0
- package/sinain-core/src/agent/context-window.ts +172 -0
- package/sinain-core/src/agent/loop.ts +404 -0
- package/sinain-core/src/agent/situation-writer.ts +187 -0
- package/sinain-core/src/agent/traits.ts +520 -0
- package/sinain-core/src/audio/capture-spawner-macos.ts +44 -0
- package/sinain-core/src/audio/capture-spawner-win.ts +37 -0
- package/sinain-core/src/audio/capture-spawner.ts +14 -0
- package/sinain-core/src/audio/pipeline.ts +335 -0
- package/sinain-core/src/audio/transcription-local.ts +141 -0
- package/sinain-core/src/audio/transcription.ts +278 -0
- package/sinain-core/src/buffers/feed-buffer.ts +71 -0
- package/sinain-core/src/buffers/sense-buffer.ts +425 -0
- package/sinain-core/src/config.ts +245 -0
- package/sinain-core/src/escalation/escalation-slot.ts +136 -0
- package/sinain-core/src/escalation/escalator.ts +828 -0
- package/sinain-core/src/escalation/message-builder.ts +370 -0
- package/sinain-core/src/escalation/openclaw-ws.ts +726 -0
- package/sinain-core/src/escalation/scorer.ts +166 -0
- package/sinain-core/src/index.ts +537 -0
- package/sinain-core/src/learning/feedback-store.ts +253 -0
- package/sinain-core/src/learning/signal-collector.ts +218 -0
- package/sinain-core/src/log.ts +24 -0
- package/sinain-core/src/overlay/commands.ts +126 -0
- package/sinain-core/src/overlay/ws-handler.ts +267 -0
- package/sinain-core/src/privacy/index.ts +18 -0
- package/sinain-core/src/privacy/presets.ts +40 -0
- package/sinain-core/src/privacy/redact.ts +92 -0
- package/sinain-core/src/profiler.ts +181 -0
- package/sinain-core/src/recorder.ts +186 -0
- package/sinain-core/src/server.ts +456 -0
- package/sinain-core/src/trace/trace-store.ts +73 -0
- package/sinain-core/src/trace/tracer.ts +94 -0
- package/sinain-core/src/types.ts +427 -0
- package/sinain-core/src/util/dedup.ts +48 -0
- package/sinain-core/src/util/task-store.ts +84 -0
- package/sinain-core/tsconfig.json +18 -0
- package/sinain-knowledge/curation/engine.ts +137 -24
- package/sinain-knowledge/data/git-store.ts +26 -0
- package/sinain-knowledge/data/store.ts +117 -0
- package/sinain-mcp-server/index.ts +417 -0
- package/sinain-mcp-server/package.json +19 -0
- package/sinain-mcp-server/tsconfig.json +15 -0
- package/sinain-memory/graph_query.py +185 -0
- package/sinain-memory/knowledge_integrator.py +450 -0
- package/sinain-memory/memory-config.json +3 -1
- package/sinain-memory/session_distiller.py +162 -0
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
"""Detect the foreground application and window title on Windows."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import ctypes
|
|
5
|
+
import ctypes.wintypes
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class WinAppDetector:
|
|
9
|
+
"""Detects the foreground application and window title on Windows.
|
|
10
|
+
|
|
11
|
+
Uses Win32 API: GetForegroundWindow, GetWindowTextW, and
|
|
12
|
+
psutil/GetWindowThreadProcessId for process name resolution.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
def __init__(self):
|
|
16
|
+
self._last_app: str = ""
|
|
17
|
+
self._last_window: str = ""
|
|
18
|
+
self._user32 = ctypes.windll.user32
|
|
19
|
+
self._kernel32 = ctypes.windll.kernel32
|
|
20
|
+
|
|
21
|
+
def get_active_app(self) -> tuple[str, str]:
|
|
22
|
+
"""Returns (app_name, window_title) of the foreground window."""
|
|
23
|
+
try:
|
|
24
|
+
hwnd = self._user32.GetForegroundWindow()
|
|
25
|
+
if not hwnd:
|
|
26
|
+
return "", ""
|
|
27
|
+
|
|
28
|
+
# Get window title
|
|
29
|
+
length = self._user32.GetWindowTextLengthW(hwnd)
|
|
30
|
+
buf = ctypes.create_unicode_buffer(length + 1)
|
|
31
|
+
self._user32.GetWindowTextW(hwnd, buf, length + 1)
|
|
32
|
+
window_title = buf.value
|
|
33
|
+
|
|
34
|
+
# Get process name via PID
|
|
35
|
+
pid = ctypes.wintypes.DWORD()
|
|
36
|
+
self._user32.GetWindowThreadProcessId(hwnd, ctypes.byref(pid))
|
|
37
|
+
app_name = self._get_process_name(pid.value)
|
|
38
|
+
|
|
39
|
+
return app_name, window_title
|
|
40
|
+
except Exception:
|
|
41
|
+
return "", ""
|
|
42
|
+
|
|
43
|
+
def _get_process_name(self, pid: int) -> str:
|
|
44
|
+
"""Get process executable name from PID."""
|
|
45
|
+
try:
|
|
46
|
+
# Try psutil first (more reliable)
|
|
47
|
+
import psutil
|
|
48
|
+
proc = psutil.Process(pid)
|
|
49
|
+
return proc.name().replace(".exe", "")
|
|
50
|
+
except Exception:
|
|
51
|
+
pass
|
|
52
|
+
|
|
53
|
+
# Fallback: OpenProcess + GetModuleBaseNameW
|
|
54
|
+
try:
|
|
55
|
+
PROCESS_QUERY_INFORMATION = 0x0400
|
|
56
|
+
PROCESS_VM_READ = 0x0010
|
|
57
|
+
handle = self._kernel32.OpenProcess(
|
|
58
|
+
PROCESS_QUERY_INFORMATION | PROCESS_VM_READ, False, pid
|
|
59
|
+
)
|
|
60
|
+
if not handle:
|
|
61
|
+
return ""
|
|
62
|
+
|
|
63
|
+
try:
|
|
64
|
+
psapi = ctypes.windll.psapi
|
|
65
|
+
buf = ctypes.create_unicode_buffer(260)
|
|
66
|
+
psapi.GetModuleBaseNameW(handle, None, buf, 260)
|
|
67
|
+
name = buf.value
|
|
68
|
+
if name.lower().endswith(".exe"):
|
|
69
|
+
name = name[:-4]
|
|
70
|
+
return name
|
|
71
|
+
finally:
|
|
72
|
+
self._kernel32.CloseHandle(handle)
|
|
73
|
+
except Exception:
|
|
74
|
+
return ""
|
|
75
|
+
|
|
76
|
+
def detect_change(self) -> tuple[bool, bool, str, str]:
|
|
77
|
+
"""Returns (app_changed, window_changed, app_name, window_title)."""
|
|
78
|
+
app, window = self.get_active_app()
|
|
79
|
+
app_changed = app != self._last_app and self._last_app != ""
|
|
80
|
+
window_changed = window != self._last_window and self._last_window != ""
|
|
81
|
+
self._last_app = app
|
|
82
|
+
self._last_window = window
|
|
83
|
+
return app_changed, window_changed, app, window
|
|
@@ -0,0 +1,215 @@
|
|
|
1
|
+
"""Screen capture via IPC from sck-capture (preferred) or CoreGraphics fallback.
|
|
2
|
+
|
|
3
|
+
On Windows, uses mss (DXGI Desktop Duplication) via capture_win.py.
|
|
4
|
+
"""
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
import json
|
|
8
|
+
import os
|
|
9
|
+
import sys
|
|
10
|
+
import time
|
|
11
|
+
from typing import Generator
|
|
12
|
+
|
|
13
|
+
from PIL import Image
|
|
14
|
+
|
|
15
|
+
# macOS-only imports — deferred so the module loads on Windows
|
|
16
|
+
if sys.platform == "darwin":
|
|
17
|
+
import Quartz
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class ScreenCapture:
|
|
21
|
+
"""Captures screen frames via CGDisplayCreateImage (CoreGraphics/IOSurface).
|
|
22
|
+
|
|
23
|
+
Uses Quartz CGDisplayCreateImage instead of the screencapture CLI.
|
|
24
|
+
This avoids CoreMediaIO/ScreenCaptureKit, which blocks camera access
|
|
25
|
+
for other apps (e.g. Google Meet) on macOS 14+.
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
def __init__(self, mode: str = "screen", target: int = 0,
|
|
29
|
+
fps: float = 1, scale: float = 0.5):
|
|
30
|
+
self.mode = mode
|
|
31
|
+
self.target = target
|
|
32
|
+
self.fps = fps
|
|
33
|
+
self.scale = scale
|
|
34
|
+
self.stats_ok = 0
|
|
35
|
+
self.stats_fail = 0
|
|
36
|
+
self._last_stats_time = time.time()
|
|
37
|
+
self._stats_interval = 60 # log stats every 60s
|
|
38
|
+
self._display_id = Quartz.CGMainDisplayID()
|
|
39
|
+
|
|
40
|
+
def capture_frame(self) -> tuple[Image.Image, float]:
|
|
41
|
+
"""Returns (PIL Image, timestamp).
|
|
42
|
+
Uses CGDisplayCreateImage for zero-subprocess, camera-safe capture.
|
|
43
|
+
Downscales by self.scale factor before returning.
|
|
44
|
+
"""
|
|
45
|
+
ts = time.time()
|
|
46
|
+
cg_image = Quartz.CGDisplayCreateImage(self._display_id)
|
|
47
|
+
if cg_image is None:
|
|
48
|
+
self.stats_fail += 1
|
|
49
|
+
raise RuntimeError("CGDisplayCreateImage returned None")
|
|
50
|
+
|
|
51
|
+
try:
|
|
52
|
+
width = Quartz.CGImageGetWidth(cg_image)
|
|
53
|
+
height = Quartz.CGImageGetHeight(cg_image)
|
|
54
|
+
bytes_per_row = Quartz.CGImageGetBytesPerRow(cg_image)
|
|
55
|
+
|
|
56
|
+
# Get raw pixel data from CGImage
|
|
57
|
+
data_provider = Quartz.CGImageGetDataProvider(cg_image)
|
|
58
|
+
raw_data = Quartz.CGDataProviderCopyData(data_provider)
|
|
59
|
+
finally:
|
|
60
|
+
# Explicitly release CGImage and its IOSurface handle immediately.
|
|
61
|
+
# At continuous capture rates, unreleased handles cause GPU/camera
|
|
62
|
+
# contention because the camera shares IOSurface infrastructure.
|
|
63
|
+
del cg_image
|
|
64
|
+
|
|
65
|
+
# CGDisplayCreateImage returns BGRA (premultiplied alpha, 32Little)
|
|
66
|
+
img = Image.frombytes("RGBA", (width, height), raw_data,
|
|
67
|
+
"raw", "BGRA", bytes_per_row, 1)
|
|
68
|
+
|
|
69
|
+
if self.scale != 1.0:
|
|
70
|
+
new_w = int(width * self.scale)
|
|
71
|
+
new_h = int(height * self.scale)
|
|
72
|
+
img = img.resize((new_w, new_h), Image.LANCZOS)
|
|
73
|
+
|
|
74
|
+
self.stats_ok += 1
|
|
75
|
+
return img, ts
|
|
76
|
+
|
|
77
|
+
def capture_loop(self) -> Generator[tuple[Image.Image, float], None, None]:
|
|
78
|
+
"""Yields frames at self.fps rate."""
|
|
79
|
+
interval = 1.0 / self.fps
|
|
80
|
+
while True:
|
|
81
|
+
start = time.time()
|
|
82
|
+
try:
|
|
83
|
+
yield self.capture_frame()
|
|
84
|
+
except Exception as e:
|
|
85
|
+
print(f"[capture] error: {e}")
|
|
86
|
+
self._maybe_log_stats()
|
|
87
|
+
elapsed = time.time() - start
|
|
88
|
+
sleep_time = interval - elapsed
|
|
89
|
+
if sleep_time > 0:
|
|
90
|
+
time.sleep(sleep_time)
|
|
91
|
+
|
|
92
|
+
def _maybe_log_stats(self):
|
|
93
|
+
now = time.time()
|
|
94
|
+
if now - self._last_stats_time >= self._stats_interval:
|
|
95
|
+
total = self.stats_ok + self.stats_fail
|
|
96
|
+
rate = (self.stats_ok / total * 100) if total > 0 else 0
|
|
97
|
+
print(f"[capture] stats: {self.stats_ok} ok, {self.stats_fail} fail"
|
|
98
|
+
f" ({rate:.0f}% success, {total} total)")
|
|
99
|
+
if self.stats_fail > 0 and self.stats_ok == 0:
|
|
100
|
+
print("[capture] WARNING: all captures failing — check screen recording permissions")
|
|
101
|
+
self._last_stats_time = now
|
|
102
|
+
|
|
103
|
+
class ScreenKitCapture:
|
|
104
|
+
"""Reads JPEG frames written by sck-capture via IPC (~/.sinain/capture/)."""
|
|
105
|
+
|
|
106
|
+
FRAME_PATH = os.path.expanduser("~/.sinain/capture/frame.jpg")
|
|
107
|
+
META_PATH = os.path.expanduser("~/.sinain/capture/meta.json")
|
|
108
|
+
STALE_THRESHOLD = 1.0 # seconds
|
|
109
|
+
|
|
110
|
+
def __init__(self, fps: float = 1, scale: float = 1.0, **kwargs):
|
|
111
|
+
self.fps = fps
|
|
112
|
+
self.scale = scale
|
|
113
|
+
self.stats_ok = 0
|
|
114
|
+
self.stats_fail = 0
|
|
115
|
+
self._last_frame_ts = 0.0
|
|
116
|
+
self._last_stats_time = time.time()
|
|
117
|
+
self._stats_interval = 60
|
|
118
|
+
|
|
119
|
+
@classmethod
|
|
120
|
+
def is_available(cls) -> bool:
|
|
121
|
+
"""Check if fresh frames exist from the overlay app."""
|
|
122
|
+
try:
|
|
123
|
+
if not os.path.exists(cls.FRAME_PATH):
|
|
124
|
+
return False
|
|
125
|
+
mtime = os.path.getmtime(cls.FRAME_PATH)
|
|
126
|
+
return (time.time() - mtime) < cls.STALE_THRESHOLD
|
|
127
|
+
except OSError:
|
|
128
|
+
return False
|
|
129
|
+
|
|
130
|
+
def capture_frame(self) -> tuple[Image.Image, float] | None:
|
|
131
|
+
"""Read the latest frame from IPC.
|
|
132
|
+
|
|
133
|
+
Returns (PIL Image, timestamp) or None if frame is stale/duplicate.
|
|
134
|
+
"""
|
|
135
|
+
try:
|
|
136
|
+
if not os.path.exists(self.FRAME_PATH):
|
|
137
|
+
return None
|
|
138
|
+
|
|
139
|
+
# Read metadata for precise timestamp
|
|
140
|
+
ts = time.time()
|
|
141
|
+
if os.path.exists(self.META_PATH):
|
|
142
|
+
try:
|
|
143
|
+
with open(self.META_PATH) as f:
|
|
144
|
+
meta = json.load(f)
|
|
145
|
+
ts = meta.get("timestamp", ts)
|
|
146
|
+
except (json.JSONDecodeError, OSError):
|
|
147
|
+
pass
|
|
148
|
+
|
|
149
|
+
# Skip duplicate frames
|
|
150
|
+
if ts == self._last_frame_ts:
|
|
151
|
+
return None
|
|
152
|
+
|
|
153
|
+
img = Image.open(self.FRAME_PATH)
|
|
154
|
+
img.load() # Force full read before file can be overwritten
|
|
155
|
+
|
|
156
|
+
if self.scale != 1.0:
|
|
157
|
+
new_w = int(img.width * self.scale)
|
|
158
|
+
new_h = int(img.height * self.scale)
|
|
159
|
+
img = img.resize((new_w, new_h), Image.LANCZOS)
|
|
160
|
+
|
|
161
|
+
self._last_frame_ts = ts
|
|
162
|
+
self.stats_ok += 1
|
|
163
|
+
return img, ts
|
|
164
|
+
|
|
165
|
+
except Exception as e:
|
|
166
|
+
self.stats_fail += 1
|
|
167
|
+
print(f"[capture-screenkit] error: {e}")
|
|
168
|
+
return None
|
|
169
|
+
|
|
170
|
+
def capture_loop(self) -> Generator[tuple[Image.Image, float], None, None]:
|
|
171
|
+
"""Yields frames at self.fps rate, same interface as ScreenCapture."""
|
|
172
|
+
interval = 1.0 / self.fps
|
|
173
|
+
while True:
|
|
174
|
+
start = time.time()
|
|
175
|
+
result = self.capture_frame()
|
|
176
|
+
if result is not None:
|
|
177
|
+
yield result
|
|
178
|
+
self._maybe_log_stats()
|
|
179
|
+
elapsed = time.time() - start
|
|
180
|
+
sleep_time = interval - elapsed
|
|
181
|
+
if sleep_time > 0:
|
|
182
|
+
time.sleep(sleep_time)
|
|
183
|
+
|
|
184
|
+
def _maybe_log_stats(self):
|
|
185
|
+
now = time.time()
|
|
186
|
+
if now - self._last_stats_time >= self._stats_interval:
|
|
187
|
+
total = self.stats_ok + self.stats_fail
|
|
188
|
+
rate = (self.stats_ok / total * 100) if total > 0 else 0
|
|
189
|
+
print(f"[capture-screenkit] stats: {self.stats_ok} ok, {self.stats_fail} fail"
|
|
190
|
+
f" ({rate:.0f}% success, {total} total)")
|
|
191
|
+
self._last_stats_time = now
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
def create_capture(mode: str = "screen", target: int = 0,
|
|
195
|
+
fps: float = 1, scale: float = 0.5):
|
|
196
|
+
"""Factory: platform-dispatched screen capture backend.
|
|
197
|
+
|
|
198
|
+
macOS: ScreenKitCapture (IPC from sck-capture) → ScreenCapture (CoreGraphics).
|
|
199
|
+
Windows: WinScreenCapture (mss / DXGI Desktop Duplication).
|
|
200
|
+
"""
|
|
201
|
+
if sys.platform == "win32":
|
|
202
|
+
from .capture_win import WinScreenCapture
|
|
203
|
+
print("[capture] Using mss (DXGI Desktop Duplication)")
|
|
204
|
+
return WinScreenCapture(mode=mode, target=target, fps=fps, scale=scale)
|
|
205
|
+
|
|
206
|
+
# macOS path
|
|
207
|
+
# 1. IPC from sck-capture (primary — Swift binary writes JPEG frames)
|
|
208
|
+
if ScreenKitCapture.is_available():
|
|
209
|
+
print("[capture] Using ScreenCaptureKit (sck-capture IPC)")
|
|
210
|
+
return ScreenKitCapture(fps=fps, scale=1.0) # scale handled by sck-capture
|
|
211
|
+
|
|
212
|
+
# 2. CGDisplayCreateImage (legacy fallback for macOS < 13)
|
|
213
|
+
print("[capture] Using CoreGraphics (CGDisplayCreateImage)")
|
|
214
|
+
print("[capture] WARNING: CGDisplayCreateImage may cause camera conflicts on macOS 14+")
|
|
215
|
+
return ScreenCapture(mode=mode, target=target, fps=fps, scale=scale)
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
"""Screen capture on Windows via mss (DXGI Desktop Duplication)."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import time
|
|
5
|
+
from typing import Generator
|
|
6
|
+
|
|
7
|
+
from PIL import Image
|
|
8
|
+
|
|
9
|
+
try:
|
|
10
|
+
import mss
|
|
11
|
+
except ImportError:
|
|
12
|
+
mss = None
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class WinScreenCapture:
|
|
16
|
+
"""Captures screen frames on Windows using mss (DXGI Desktop Duplication).
|
|
17
|
+
|
|
18
|
+
Same interface as ScreenCapture/ScreenKitCapture on macOS:
|
|
19
|
+
capture_frame() -> (Image, float) and capture_loop() -> Generator.
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
def __init__(self, mode: str = "screen", target: int = 0,
|
|
23
|
+
fps: float = 1, scale: float = 0.5):
|
|
24
|
+
if mss is None:
|
|
25
|
+
raise RuntimeError("mss library required for Windows capture: pip install mss")
|
|
26
|
+
self.mode = mode
|
|
27
|
+
self.target = target # monitor index (0 = all, 1 = primary, etc.)
|
|
28
|
+
self.fps = fps
|
|
29
|
+
self.scale = scale
|
|
30
|
+
self.stats_ok = 0
|
|
31
|
+
self.stats_fail = 0
|
|
32
|
+
self._last_stats_time = time.time()
|
|
33
|
+
self._stats_interval = 60
|
|
34
|
+
self._sct = mss.mss()
|
|
35
|
+
|
|
36
|
+
def capture_frame(self) -> tuple[Image.Image, float]:
|
|
37
|
+
"""Returns (PIL Image, timestamp).
|
|
38
|
+
Uses mss for DXGI-based capture. Downscales by self.scale factor.
|
|
39
|
+
"""
|
|
40
|
+
ts = time.time()
|
|
41
|
+
|
|
42
|
+
# mss monitors: index 0 = all monitors combined, 1+ = individual
|
|
43
|
+
monitor_idx = self.target + 1 if self.target >= 0 else 1
|
|
44
|
+
if monitor_idx >= len(self._sct.monitors):
|
|
45
|
+
monitor_idx = 1 # fallback to primary
|
|
46
|
+
|
|
47
|
+
monitor = self._sct.monitors[monitor_idx]
|
|
48
|
+
screenshot = self._sct.grab(monitor)
|
|
49
|
+
|
|
50
|
+
# Convert to PIL Image (mss returns BGRA)
|
|
51
|
+
img = Image.frombytes("RGB", screenshot.size, screenshot.rgb)
|
|
52
|
+
|
|
53
|
+
if self.scale != 1.0:
|
|
54
|
+
new_w = int(img.width * self.scale)
|
|
55
|
+
new_h = int(img.height * self.scale)
|
|
56
|
+
img = img.resize((new_w, new_h), Image.LANCZOS)
|
|
57
|
+
|
|
58
|
+
self.stats_ok += 1
|
|
59
|
+
if self.stats_ok == 1:
|
|
60
|
+
print(f"[capture-win] first frame: {img.width}x{img.height} "
|
|
61
|
+
f"(monitor={monitor['width']}x{monitor['height']}, scale={self.scale})",
|
|
62
|
+
flush=True)
|
|
63
|
+
return img, ts
|
|
64
|
+
|
|
65
|
+
def capture_loop(self) -> Generator[tuple[Image.Image, float], None, None]:
|
|
66
|
+
"""Yields frames at self.fps rate."""
|
|
67
|
+
interval = 1.0 / self.fps
|
|
68
|
+
while True:
|
|
69
|
+
start = time.time()
|
|
70
|
+
try:
|
|
71
|
+
yield self.capture_frame()
|
|
72
|
+
except Exception as e:
|
|
73
|
+
self.stats_fail += 1
|
|
74
|
+
print(f"[capture-win] error: {e}", flush=True)
|
|
75
|
+
self._maybe_log_stats()
|
|
76
|
+
elapsed = time.time() - start
|
|
77
|
+
sleep_time = interval - elapsed
|
|
78
|
+
if sleep_time > 0:
|
|
79
|
+
time.sleep(sleep_time)
|
|
80
|
+
|
|
81
|
+
def _maybe_log_stats(self):
|
|
82
|
+
now = time.time()
|
|
83
|
+
if now - self._last_stats_time >= self._stats_interval:
|
|
84
|
+
total = self.stats_ok + self.stats_fail
|
|
85
|
+
rate = (self.stats_ok / total * 100) if total > 0 else 0
|
|
86
|
+
print(f"[capture-win] stats: {self.stats_ok} ok, {self.stats_fail} fail"
|
|
87
|
+
f" ({rate:.0f}% success, {total} total)", flush=True)
|
|
88
|
+
self._last_stats_time = now
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
"""SSIM-based frame change detection."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
|
|
6
|
+
import numpy as np
|
|
7
|
+
from PIL import Image
|
|
8
|
+
from skimage.metrics import structural_similarity
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@dataclass
|
|
12
|
+
class ChangeResult:
|
|
13
|
+
ssim_score: float
|
|
14
|
+
diff_image: Image.Image
|
|
15
|
+
contours: list # list of (y, x) coordinate arrays
|
|
16
|
+
bbox: tuple[int, int, int, int] # (x, y, w, h)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class ChangeDetector:
|
|
20
|
+
"""SSIM-based frame change detection."""
|
|
21
|
+
|
|
22
|
+
def __init__(self, threshold: float = 0.95, min_area: int = 100):
|
|
23
|
+
self.threshold = threshold
|
|
24
|
+
self.min_area = min_area
|
|
25
|
+
self.prev_frame: np.ndarray | None = None
|
|
26
|
+
|
|
27
|
+
def set_threshold(self, threshold: float) -> None:
|
|
28
|
+
"""Dynamically adjust the SSIM change threshold."""
|
|
29
|
+
self.threshold = threshold
|
|
30
|
+
|
|
31
|
+
def detect(self, frame: Image.Image) -> ChangeResult | None:
|
|
32
|
+
"""Compare frame to previous. Returns ChangeResult if significant."""
|
|
33
|
+
gray = np.array(frame.convert("L"))
|
|
34
|
+
|
|
35
|
+
if self.prev_frame is None:
|
|
36
|
+
self.prev_frame = gray
|
|
37
|
+
return None
|
|
38
|
+
|
|
39
|
+
if gray.shape != self.prev_frame.shape:
|
|
40
|
+
self.prev_frame = gray
|
|
41
|
+
return None
|
|
42
|
+
|
|
43
|
+
score, diff_map = structural_similarity(
|
|
44
|
+
self.prev_frame, gray, full=True
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
if score >= self.threshold:
|
|
48
|
+
return None
|
|
49
|
+
|
|
50
|
+
# Keyframe update: only advance prev_frame when change IS detected.
|
|
51
|
+
# This lets diffs accumulate against the last accepted keyframe,
|
|
52
|
+
# which is essential at high FPS where consecutive frames differ by <1%.
|
|
53
|
+
self.prev_frame = gray
|
|
54
|
+
|
|
55
|
+
# Convert diff map to binary mask
|
|
56
|
+
diff_binary = ((1.0 - diff_map) * 255).astype(np.uint8)
|
|
57
|
+
mask = diff_binary > 30 # threshold for "changed" pixels
|
|
58
|
+
|
|
59
|
+
# Find contours via connected components
|
|
60
|
+
from skimage.measure import label, regionprops
|
|
61
|
+
labeled = label(mask)
|
|
62
|
+
regions = regionprops(labeled)
|
|
63
|
+
|
|
64
|
+
# Filter by area
|
|
65
|
+
contours = []
|
|
66
|
+
for region in regions:
|
|
67
|
+
if region.area >= self.min_area:
|
|
68
|
+
contours.append(region.coords)
|
|
69
|
+
|
|
70
|
+
if not contours:
|
|
71
|
+
return None
|
|
72
|
+
|
|
73
|
+
# Compute merged bounding box
|
|
74
|
+
all_coords = np.vstack(contours)
|
|
75
|
+
min_y, min_x = all_coords.min(axis=0)
|
|
76
|
+
max_y, max_x = all_coords.max(axis=0)
|
|
77
|
+
bbox = (int(min_x), int(min_y), int(max_x - min_x), int(max_y - min_y))
|
|
78
|
+
|
|
79
|
+
diff_img = Image.fromarray(diff_binary)
|
|
80
|
+
|
|
81
|
+
return ChangeResult(
|
|
82
|
+
ssim_score=score,
|
|
83
|
+
diff_image=diff_img,
|
|
84
|
+
contours=contours,
|
|
85
|
+
bbox=bbox,
|
|
86
|
+
)
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
"""Configuration loader for sense_client."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import json
|
|
5
|
+
import os
|
|
6
|
+
import sys
|
|
7
|
+
|
|
8
|
+
DEFAULTS = {
|
|
9
|
+
"capture": {
|
|
10
|
+
"mode": "screen",
|
|
11
|
+
"target": 0,
|
|
12
|
+
"fps": 2.0,
|
|
13
|
+
"scale": 0.5,
|
|
14
|
+
},
|
|
15
|
+
"detection": {
|
|
16
|
+
"ssimThreshold": 0.92,
|
|
17
|
+
"minArea": 100,
|
|
18
|
+
"roiPadding": 20,
|
|
19
|
+
"cooldownMs": 5000,
|
|
20
|
+
},
|
|
21
|
+
"ocr": {
|
|
22
|
+
"enabled": True,
|
|
23
|
+
"backend": "auto",
|
|
24
|
+
"languages": ["en", "ru"],
|
|
25
|
+
"lang": "eng",
|
|
26
|
+
"psm": 11,
|
|
27
|
+
"minConfidence": 50,
|
|
28
|
+
},
|
|
29
|
+
"gate": {
|
|
30
|
+
"minOcrChars": 20,
|
|
31
|
+
"majorChangeThreshold": 0.85,
|
|
32
|
+
"cooldownMs": 5000,
|
|
33
|
+
"adaptiveCooldownMs": 2000,
|
|
34
|
+
"contextCooldownMs": 10000,
|
|
35
|
+
},
|
|
36
|
+
"relay": {
|
|
37
|
+
"url": "http://localhost:9500",
|
|
38
|
+
"sendThumbnails": True,
|
|
39
|
+
"maxImageKB": 500,
|
|
40
|
+
},
|
|
41
|
+
"optimization": {
|
|
42
|
+
"backpressure": False,
|
|
43
|
+
"textDedup": False,
|
|
44
|
+
"visionRegionOfInterest": False,
|
|
45
|
+
"shadowValidation": False,
|
|
46
|
+
},
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def load_config(path: str | None = None) -> dict:
|
|
51
|
+
"""Load config from JSON file, merge with defaults."""
|
|
52
|
+
config = json.loads(json.dumps(DEFAULTS)) # deep copy
|
|
53
|
+
if path and os.path.exists(path):
|
|
54
|
+
try:
|
|
55
|
+
with open(path) as f:
|
|
56
|
+
user = json.load(f)
|
|
57
|
+
for section, values in user.items():
|
|
58
|
+
if section in config and isinstance(values, dict):
|
|
59
|
+
config[section].update(values)
|
|
60
|
+
else:
|
|
61
|
+
config[section] = values
|
|
62
|
+
except (json.JSONDecodeError, ValueError):
|
|
63
|
+
pass # use defaults
|
|
64
|
+
return config
|
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
"""Decision gate — classifies sense events and decides what to send."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import difflib
|
|
5
|
+
import time
|
|
6
|
+
from collections import deque
|
|
7
|
+
from dataclasses import dataclass, field
|
|
8
|
+
|
|
9
|
+
from .change_detector import ChangeResult
|
|
10
|
+
from .ocr import OCRResult
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@dataclass
|
|
14
|
+
class SenseMeta:
|
|
15
|
+
ssim: float = 0.0
|
|
16
|
+
app: str = ""
|
|
17
|
+
window_title: str = ""
|
|
18
|
+
screen: int = 0
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@dataclass
|
|
22
|
+
class SenseObservation:
|
|
23
|
+
"""Structured observation fields (claude-mem compatible schema).
|
|
24
|
+
|
|
25
|
+
Populated by sinain-core's agent layer, not by sense_client.
|
|
26
|
+
sense_client sets `title` and `facts` from OCR/app context;
|
|
27
|
+
sinain-core enriches with `narrative` and `concepts`.
|
|
28
|
+
"""
|
|
29
|
+
title: str = ""
|
|
30
|
+
subtitle: str = ""
|
|
31
|
+
facts: list[str] = field(default_factory=list)
|
|
32
|
+
narrative: str = ""
|
|
33
|
+
concepts: list[str] = field(default_factory=list)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
@dataclass
|
|
37
|
+
class SenseEvent:
|
|
38
|
+
type: str # "text" | "visual" | "context"
|
|
39
|
+
ts: float = 0.0
|
|
40
|
+
ocr: str = ""
|
|
41
|
+
roi: dict | None = None
|
|
42
|
+
diff: dict | None = None
|
|
43
|
+
meta: SenseMeta = field(default_factory=SenseMeta)
|
|
44
|
+
observation: SenseObservation = field(default_factory=SenseObservation)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
class DecisionGate:
|
|
48
|
+
"""Classifies sense events and decides what to send."""
|
|
49
|
+
|
|
50
|
+
def __init__(self, min_ocr_chars: int = 20,
|
|
51
|
+
major_change_threshold: float = 0.85,
|
|
52
|
+
cooldown_ms: int = 5000,
|
|
53
|
+
adaptive_cooldown_ms: int = 2000,
|
|
54
|
+
context_cooldown_ms: int = 10000):
|
|
55
|
+
self.min_ocr_chars = min_ocr_chars
|
|
56
|
+
self.major_change_threshold = major_change_threshold
|
|
57
|
+
self.cooldown_ms = cooldown_ms
|
|
58
|
+
self.adaptive_cooldown_ms = adaptive_cooldown_ms
|
|
59
|
+
self.context_cooldown_ms = context_cooldown_ms
|
|
60
|
+
self.last_send_ts: float = 0
|
|
61
|
+
self.last_context_ts: float = 0
|
|
62
|
+
self.last_app_change_ts: float = 0
|
|
63
|
+
# Fuzzy dedup: ring buffer of last 5 OCR texts
|
|
64
|
+
self._recent_texts: deque[str] = deque(maxlen=5)
|
|
65
|
+
self._last_sent_text: str = ""
|
|
66
|
+
|
|
67
|
+
def is_ready(self, app_changed: bool, window_changed: bool) -> bool:
|
|
68
|
+
"""Time-based readiness check without consuming OCR output.
|
|
69
|
+
|
|
70
|
+
Used by backpressure scheduling to decide whether to run OCR at all.
|
|
71
|
+
"""
|
|
72
|
+
if app_changed or window_changed:
|
|
73
|
+
return True
|
|
74
|
+
now = time.time() * 1000
|
|
75
|
+
recent = (now - self.last_app_change_ts) < 10000
|
|
76
|
+
cooldown = self.adaptive_cooldown_ms if recent else self.cooldown_ms
|
|
77
|
+
return now - self.last_send_ts >= cooldown
|
|
78
|
+
|
|
79
|
+
def _is_duplicate(self, text: str) -> bool:
|
|
80
|
+
"""Check if text is too similar to any recently sent text."""
|
|
81
|
+
if text == self._last_sent_text:
|
|
82
|
+
return True
|
|
83
|
+
for prev in self._recent_texts:
|
|
84
|
+
ratio = difflib.SequenceMatcher(None, prev, text).ratio()
|
|
85
|
+
if ratio > 0.7:
|
|
86
|
+
return True
|
|
87
|
+
return False
|
|
88
|
+
|
|
89
|
+
@staticmethod
|
|
90
|
+
def _ocr_quality_ok(text: str) -> bool:
|
|
91
|
+
"""Reject garbage OCR: >50% single-char tokens or <50% alphanumeric."""
|
|
92
|
+
tokens = text.split()
|
|
93
|
+
if not tokens:
|
|
94
|
+
return False
|
|
95
|
+
single_char = sum(1 for t in tokens if len(t) == 1)
|
|
96
|
+
if single_char / len(tokens) > 0.5:
|
|
97
|
+
return False
|
|
98
|
+
alnum = sum(1 for ch in text if ch.isalnum())
|
|
99
|
+
total = len(text.replace(" ", ""))
|
|
100
|
+
if total > 0 and alnum / total < 0.5:
|
|
101
|
+
return False
|
|
102
|
+
return True
|
|
103
|
+
|
|
104
|
+
def classify(self, change: ChangeResult | None,
|
|
105
|
+
ocr: OCRResult, app_changed: bool,
|
|
106
|
+
window_changed: bool = False) -> SenseEvent | None:
|
|
107
|
+
"""Returns SenseEvent to send, or None to drop."""
|
|
108
|
+
now = time.time() * 1000
|
|
109
|
+
|
|
110
|
+
# Context events (app/window change) bypass normal cooldown
|
|
111
|
+
if app_changed or window_changed:
|
|
112
|
+
self.last_app_change_ts = now
|
|
113
|
+
if now - self.last_context_ts >= self.context_cooldown_ms:
|
|
114
|
+
self.last_context_ts = now
|
|
115
|
+
self.last_send_ts = now
|
|
116
|
+
return SenseEvent(type="context", ts=now)
|
|
117
|
+
|
|
118
|
+
# Adaptive cooldown: 2s after recent app switch, 5s otherwise
|
|
119
|
+
recent_app_change = (now - self.last_app_change_ts) < 10000
|
|
120
|
+
effective_cooldown = self.adaptive_cooldown_ms if recent_app_change else self.cooldown_ms
|
|
121
|
+
if now - self.last_send_ts < effective_cooldown:
|
|
122
|
+
return None
|
|
123
|
+
|
|
124
|
+
if change is None:
|
|
125
|
+
return None
|
|
126
|
+
|
|
127
|
+
# OCR text sufficient -> text event
|
|
128
|
+
if ocr.text and len(ocr.text) >= self.min_ocr_chars:
|
|
129
|
+
if self._is_duplicate(ocr.text):
|
|
130
|
+
return None
|
|
131
|
+
if not self._ocr_quality_ok(ocr.text):
|
|
132
|
+
return None
|
|
133
|
+
self._recent_texts.append(ocr.text)
|
|
134
|
+
self._last_sent_text = ocr.text
|
|
135
|
+
self.last_send_ts = now
|
|
136
|
+
return SenseEvent(type="text", ts=now, ocr=ocr.text,
|
|
137
|
+
meta=SenseMeta(ssim=change.ssim_score))
|
|
138
|
+
|
|
139
|
+
# Major visual change -> visual event
|
|
140
|
+
if change.ssim_score < self.major_change_threshold:
|
|
141
|
+
self.last_send_ts = now
|
|
142
|
+
return SenseEvent(type="visual", ts=now, ocr=ocr.text,
|
|
143
|
+
meta=SenseMeta(ssim=change.ssim_score))
|
|
144
|
+
|
|
145
|
+
return None
|