@geravant/sinain 1.0.18 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +10 -1
- package/cli.js +176 -0
- package/index.ts +163 -1257
- package/install.js +12 -2
- package/launcher.js +622 -0
- package/openclaw.plugin.json +4 -0
- package/pack-prepare.js +48 -0
- package/package.json +26 -5
- package/sense_client/README.md +82 -0
- package/sense_client/__init__.py +1 -0
- package/sense_client/__main__.py +462 -0
- package/sense_client/app_detector.py +54 -0
- package/sense_client/app_detector_win.py +83 -0
- package/sense_client/capture.py +215 -0
- package/sense_client/capture_win.py +88 -0
- package/sense_client/change_detector.py +86 -0
- package/sense_client/config.py +64 -0
- package/sense_client/gate.py +145 -0
- package/sense_client/ocr.py +347 -0
- package/sense_client/privacy.py +65 -0
- package/sense_client/requirements.txt +13 -0
- package/sense_client/roi_extractor.py +84 -0
- package/sense_client/sender.py +173 -0
- package/sense_client/tests/__init__.py +0 -0
- package/sense_client/tests/test_stream1_optimizations.py +234 -0
- package/setup-overlay.js +82 -0
- package/sinain-agent/.env.example +17 -0
- package/sinain-agent/CLAUDE.md +80 -0
- package/sinain-agent/mcp-config.json +12 -0
- package/sinain-agent/run.sh +248 -0
- package/sinain-core/.env.example +93 -0
- package/sinain-core/package-lock.json +552 -0
- package/sinain-core/package.json +21 -0
- package/sinain-core/src/agent/analyzer.ts +366 -0
- package/sinain-core/src/agent/context-window.ts +172 -0
- package/sinain-core/src/agent/loop.ts +404 -0
- package/sinain-core/src/agent/situation-writer.ts +187 -0
- package/sinain-core/src/agent/traits.ts +520 -0
- package/sinain-core/src/audio/capture-spawner-macos.ts +44 -0
- package/sinain-core/src/audio/capture-spawner-win.ts +37 -0
- package/sinain-core/src/audio/capture-spawner.ts +14 -0
- package/sinain-core/src/audio/pipeline.ts +335 -0
- package/sinain-core/src/audio/transcription-local.ts +141 -0
- package/sinain-core/src/audio/transcription.ts +278 -0
- package/sinain-core/src/buffers/feed-buffer.ts +71 -0
- package/sinain-core/src/buffers/sense-buffer.ts +425 -0
- package/sinain-core/src/config.ts +245 -0
- package/sinain-core/src/escalation/escalation-slot.ts +136 -0
- package/sinain-core/src/escalation/escalator.ts +812 -0
- package/sinain-core/src/escalation/message-builder.ts +323 -0
- package/sinain-core/src/escalation/openclaw-ws.ts +726 -0
- package/sinain-core/src/escalation/scorer.ts +166 -0
- package/sinain-core/src/index.ts +507 -0
- package/sinain-core/src/learning/feedback-store.ts +253 -0
- package/sinain-core/src/learning/signal-collector.ts +218 -0
- package/sinain-core/src/log.ts +24 -0
- package/sinain-core/src/overlay/commands.ts +126 -0
- package/sinain-core/src/overlay/ws-handler.ts +267 -0
- package/sinain-core/src/privacy/index.ts +18 -0
- package/sinain-core/src/privacy/presets.ts +40 -0
- package/sinain-core/src/privacy/redact.ts +92 -0
- package/sinain-core/src/profiler.ts +181 -0
- package/sinain-core/src/recorder.ts +186 -0
- package/sinain-core/src/server.ts +417 -0
- package/sinain-core/src/trace/trace-store.ts +73 -0
- package/sinain-core/src/trace/tracer.ts +94 -0
- package/sinain-core/src/types.ts +427 -0
- package/sinain-core/src/util/dedup.ts +48 -0
- package/sinain-core/src/util/task-store.ts +84 -0
- package/sinain-core/tsconfig.json +18 -0
- package/sinain-knowledge/adapters/generic/adapter.ts +103 -0
- package/sinain-knowledge/adapters/interface.ts +72 -0
- package/sinain-knowledge/adapters/openclaw/adapter.ts +223 -0
- package/sinain-knowledge/curation/engine.ts +493 -0
- package/sinain-knowledge/curation/resilience.ts +336 -0
- package/sinain-knowledge/data/git-store.ts +312 -0
- package/sinain-knowledge/data/schema.ts +89 -0
- package/sinain-knowledge/data/snapshot.ts +226 -0
- package/sinain-knowledge/data/store.ts +488 -0
- package/sinain-knowledge/deploy/cli.ts +214 -0
- package/sinain-knowledge/deploy/manifest.ts +80 -0
- package/sinain-knowledge/protocol/bindings/generic.md +5 -0
- package/sinain-knowledge/protocol/bindings/openclaw.md +5 -0
- package/sinain-knowledge/protocol/heartbeat.md +62 -0
- package/sinain-knowledge/protocol/renderer.ts +56 -0
- package/sinain-knowledge/protocol/skill.md +335 -0
- package/sinain-mcp-server/index.ts +337 -0
- package/sinain-mcp-server/package.json +19 -0
- package/sinain-mcp-server/tsconfig.json +15 -0
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
"""Detect the frontmost application and window title (cross-platform)."""
|
|
2
|
+
|
|
3
|
+
import subprocess
|
|
4
|
+
import sys
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class MacAppDetector:
|
|
8
|
+
"""Detects the frontmost application and window title on macOS via AppleScript."""
|
|
9
|
+
|
|
10
|
+
def __init__(self):
|
|
11
|
+
self._last_app: str = ""
|
|
12
|
+
self._last_window: str = ""
|
|
13
|
+
|
|
14
|
+
def get_active_app(self) -> tuple[str, str]:
|
|
15
|
+
"""Returns (app_name, window_title) of the frontmost application."""
|
|
16
|
+
try:
|
|
17
|
+
result = subprocess.run(
|
|
18
|
+
[
|
|
19
|
+
"osascript", "-e",
|
|
20
|
+
'tell application "System Events"\n'
|
|
21
|
+
' set appProc to first application process whose frontmost is true\n'
|
|
22
|
+
' set appName to name of appProc\n'
|
|
23
|
+
' set winTitle to ""\n'
|
|
24
|
+
' try\n'
|
|
25
|
+
' set winTitle to name of front window of appProc\n'
|
|
26
|
+
' end try\n'
|
|
27
|
+
' return appName & "|||" & winTitle\n'
|
|
28
|
+
'end tell',
|
|
29
|
+
],
|
|
30
|
+
capture_output=True, text=True, timeout=2,
|
|
31
|
+
)
|
|
32
|
+
parts = result.stdout.strip().split("|||", 1)
|
|
33
|
+
app_name = parts[0].strip() if parts else ""
|
|
34
|
+
window_title = parts[1].strip() if len(parts) > 1 else ""
|
|
35
|
+
return app_name, window_title
|
|
36
|
+
except Exception:
|
|
37
|
+
return "", ""
|
|
38
|
+
|
|
39
|
+
def detect_change(self) -> tuple[bool, bool, str, str]:
|
|
40
|
+
"""Returns (app_changed, window_changed, app_name, window_title)."""
|
|
41
|
+
app, window = self.get_active_app()
|
|
42
|
+
app_changed = app != self._last_app and self._last_app != ""
|
|
43
|
+
window_changed = window != self._last_window and self._last_window != ""
|
|
44
|
+
self._last_app = app
|
|
45
|
+
self._last_window = window
|
|
46
|
+
return app_changed, window_changed, app, window
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def AppDetector():
|
|
50
|
+
"""Factory: returns the platform-appropriate app detector."""
|
|
51
|
+
if sys.platform == "win32":
|
|
52
|
+
from .app_detector_win import WinAppDetector
|
|
53
|
+
return WinAppDetector()
|
|
54
|
+
return MacAppDetector()
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
"""Detect the foreground application and window title on Windows."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import ctypes
|
|
5
|
+
import ctypes.wintypes
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class WinAppDetector:
|
|
9
|
+
"""Detects the foreground application and window title on Windows.
|
|
10
|
+
|
|
11
|
+
Uses Win32 API: GetForegroundWindow, GetWindowTextW, and
|
|
12
|
+
psutil/GetWindowThreadProcessId for process name resolution.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
def __init__(self):
|
|
16
|
+
self._last_app: str = ""
|
|
17
|
+
self._last_window: str = ""
|
|
18
|
+
self._user32 = ctypes.windll.user32
|
|
19
|
+
self._kernel32 = ctypes.windll.kernel32
|
|
20
|
+
|
|
21
|
+
def get_active_app(self) -> tuple[str, str]:
|
|
22
|
+
"""Returns (app_name, window_title) of the foreground window."""
|
|
23
|
+
try:
|
|
24
|
+
hwnd = self._user32.GetForegroundWindow()
|
|
25
|
+
if not hwnd:
|
|
26
|
+
return "", ""
|
|
27
|
+
|
|
28
|
+
# Get window title
|
|
29
|
+
length = self._user32.GetWindowTextLengthW(hwnd)
|
|
30
|
+
buf = ctypes.create_unicode_buffer(length + 1)
|
|
31
|
+
self._user32.GetWindowTextW(hwnd, buf, length + 1)
|
|
32
|
+
window_title = buf.value
|
|
33
|
+
|
|
34
|
+
# Get process name via PID
|
|
35
|
+
pid = ctypes.wintypes.DWORD()
|
|
36
|
+
self._user32.GetWindowThreadProcessId(hwnd, ctypes.byref(pid))
|
|
37
|
+
app_name = self._get_process_name(pid.value)
|
|
38
|
+
|
|
39
|
+
return app_name, window_title
|
|
40
|
+
except Exception:
|
|
41
|
+
return "", ""
|
|
42
|
+
|
|
43
|
+
def _get_process_name(self, pid: int) -> str:
|
|
44
|
+
"""Get process executable name from PID."""
|
|
45
|
+
try:
|
|
46
|
+
# Try psutil first (more reliable)
|
|
47
|
+
import psutil
|
|
48
|
+
proc = psutil.Process(pid)
|
|
49
|
+
return proc.name().replace(".exe", "")
|
|
50
|
+
except Exception:
|
|
51
|
+
pass
|
|
52
|
+
|
|
53
|
+
# Fallback: OpenProcess + GetModuleBaseNameW
|
|
54
|
+
try:
|
|
55
|
+
PROCESS_QUERY_INFORMATION = 0x0400
|
|
56
|
+
PROCESS_VM_READ = 0x0010
|
|
57
|
+
handle = self._kernel32.OpenProcess(
|
|
58
|
+
PROCESS_QUERY_INFORMATION | PROCESS_VM_READ, False, pid
|
|
59
|
+
)
|
|
60
|
+
if not handle:
|
|
61
|
+
return ""
|
|
62
|
+
|
|
63
|
+
try:
|
|
64
|
+
psapi = ctypes.windll.psapi
|
|
65
|
+
buf = ctypes.create_unicode_buffer(260)
|
|
66
|
+
psapi.GetModuleBaseNameW(handle, None, buf, 260)
|
|
67
|
+
name = buf.value
|
|
68
|
+
if name.lower().endswith(".exe"):
|
|
69
|
+
name = name[:-4]
|
|
70
|
+
return name
|
|
71
|
+
finally:
|
|
72
|
+
self._kernel32.CloseHandle(handle)
|
|
73
|
+
except Exception:
|
|
74
|
+
return ""
|
|
75
|
+
|
|
76
|
+
def detect_change(self) -> tuple[bool, bool, str, str]:
|
|
77
|
+
"""Returns (app_changed, window_changed, app_name, window_title)."""
|
|
78
|
+
app, window = self.get_active_app()
|
|
79
|
+
app_changed = app != self._last_app and self._last_app != ""
|
|
80
|
+
window_changed = window != self._last_window and self._last_window != ""
|
|
81
|
+
self._last_app = app
|
|
82
|
+
self._last_window = window
|
|
83
|
+
return app_changed, window_changed, app, window
|
|
@@ -0,0 +1,215 @@
|
|
|
1
|
+
"""Screen capture via IPC from sck-capture (preferred) or CoreGraphics fallback.
|
|
2
|
+
|
|
3
|
+
On Windows, uses mss (DXGI Desktop Duplication) via capture_win.py.
|
|
4
|
+
"""
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
import json
|
|
8
|
+
import os
|
|
9
|
+
import sys
|
|
10
|
+
import time
|
|
11
|
+
from typing import Generator
|
|
12
|
+
|
|
13
|
+
from PIL import Image
|
|
14
|
+
|
|
15
|
+
# macOS-only imports — deferred so the module loads on Windows
|
|
16
|
+
if sys.platform == "darwin":
|
|
17
|
+
import Quartz
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class ScreenCapture:
|
|
21
|
+
"""Captures screen frames via CGDisplayCreateImage (CoreGraphics/IOSurface).
|
|
22
|
+
|
|
23
|
+
Uses Quartz CGDisplayCreateImage instead of the screencapture CLI.
|
|
24
|
+
This avoids CoreMediaIO/ScreenCaptureKit, which blocks camera access
|
|
25
|
+
for other apps (e.g. Google Meet) on macOS 14+.
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
def __init__(self, mode: str = "screen", target: int = 0,
|
|
29
|
+
fps: float = 1, scale: float = 0.5):
|
|
30
|
+
self.mode = mode
|
|
31
|
+
self.target = target
|
|
32
|
+
self.fps = fps
|
|
33
|
+
self.scale = scale
|
|
34
|
+
self.stats_ok = 0
|
|
35
|
+
self.stats_fail = 0
|
|
36
|
+
self._last_stats_time = time.time()
|
|
37
|
+
self._stats_interval = 60 # log stats every 60s
|
|
38
|
+
self._display_id = Quartz.CGMainDisplayID()
|
|
39
|
+
|
|
40
|
+
def capture_frame(self) -> tuple[Image.Image, float]:
|
|
41
|
+
"""Returns (PIL Image, timestamp).
|
|
42
|
+
Uses CGDisplayCreateImage for zero-subprocess, camera-safe capture.
|
|
43
|
+
Downscales by self.scale factor before returning.
|
|
44
|
+
"""
|
|
45
|
+
ts = time.time()
|
|
46
|
+
cg_image = Quartz.CGDisplayCreateImage(self._display_id)
|
|
47
|
+
if cg_image is None:
|
|
48
|
+
self.stats_fail += 1
|
|
49
|
+
raise RuntimeError("CGDisplayCreateImage returned None")
|
|
50
|
+
|
|
51
|
+
try:
|
|
52
|
+
width = Quartz.CGImageGetWidth(cg_image)
|
|
53
|
+
height = Quartz.CGImageGetHeight(cg_image)
|
|
54
|
+
bytes_per_row = Quartz.CGImageGetBytesPerRow(cg_image)
|
|
55
|
+
|
|
56
|
+
# Get raw pixel data from CGImage
|
|
57
|
+
data_provider = Quartz.CGImageGetDataProvider(cg_image)
|
|
58
|
+
raw_data = Quartz.CGDataProviderCopyData(data_provider)
|
|
59
|
+
finally:
|
|
60
|
+
# Explicitly release CGImage and its IOSurface handle immediately.
|
|
61
|
+
# At continuous capture rates, unreleased handles cause GPU/camera
|
|
62
|
+
# contention because the camera shares IOSurface infrastructure.
|
|
63
|
+
del cg_image
|
|
64
|
+
|
|
65
|
+
# CGDisplayCreateImage returns BGRA (premultiplied alpha, 32Little)
|
|
66
|
+
img = Image.frombytes("RGBA", (width, height), raw_data,
|
|
67
|
+
"raw", "BGRA", bytes_per_row, 1)
|
|
68
|
+
|
|
69
|
+
if self.scale != 1.0:
|
|
70
|
+
new_w = int(width * self.scale)
|
|
71
|
+
new_h = int(height * self.scale)
|
|
72
|
+
img = img.resize((new_w, new_h), Image.LANCZOS)
|
|
73
|
+
|
|
74
|
+
self.stats_ok += 1
|
|
75
|
+
return img, ts
|
|
76
|
+
|
|
77
|
+
def capture_loop(self) -> Generator[tuple[Image.Image, float], None, None]:
|
|
78
|
+
"""Yields frames at self.fps rate."""
|
|
79
|
+
interval = 1.0 / self.fps
|
|
80
|
+
while True:
|
|
81
|
+
start = time.time()
|
|
82
|
+
try:
|
|
83
|
+
yield self.capture_frame()
|
|
84
|
+
except Exception as e:
|
|
85
|
+
print(f"[capture] error: {e}")
|
|
86
|
+
self._maybe_log_stats()
|
|
87
|
+
elapsed = time.time() - start
|
|
88
|
+
sleep_time = interval - elapsed
|
|
89
|
+
if sleep_time > 0:
|
|
90
|
+
time.sleep(sleep_time)
|
|
91
|
+
|
|
92
|
+
def _maybe_log_stats(self):
|
|
93
|
+
now = time.time()
|
|
94
|
+
if now - self._last_stats_time >= self._stats_interval:
|
|
95
|
+
total = self.stats_ok + self.stats_fail
|
|
96
|
+
rate = (self.stats_ok / total * 100) if total > 0 else 0
|
|
97
|
+
print(f"[capture] stats: {self.stats_ok} ok, {self.stats_fail} fail"
|
|
98
|
+
f" ({rate:.0f}% success, {total} total)")
|
|
99
|
+
if self.stats_fail > 0 and self.stats_ok == 0:
|
|
100
|
+
print("[capture] WARNING: all captures failing — check screen recording permissions")
|
|
101
|
+
self._last_stats_time = now
|
|
102
|
+
|
|
103
|
+
class ScreenKitCapture:
|
|
104
|
+
"""Reads JPEG frames written by sck-capture via IPC (~/.sinain/capture/)."""
|
|
105
|
+
|
|
106
|
+
FRAME_PATH = os.path.expanduser("~/.sinain/capture/frame.jpg")
|
|
107
|
+
META_PATH = os.path.expanduser("~/.sinain/capture/meta.json")
|
|
108
|
+
STALE_THRESHOLD = 1.0 # seconds
|
|
109
|
+
|
|
110
|
+
def __init__(self, fps: float = 1, scale: float = 1.0, **kwargs):
|
|
111
|
+
self.fps = fps
|
|
112
|
+
self.scale = scale
|
|
113
|
+
self.stats_ok = 0
|
|
114
|
+
self.stats_fail = 0
|
|
115
|
+
self._last_frame_ts = 0.0
|
|
116
|
+
self._last_stats_time = time.time()
|
|
117
|
+
self._stats_interval = 60
|
|
118
|
+
|
|
119
|
+
@classmethod
|
|
120
|
+
def is_available(cls) -> bool:
|
|
121
|
+
"""Check if fresh frames exist from the overlay app."""
|
|
122
|
+
try:
|
|
123
|
+
if not os.path.exists(cls.FRAME_PATH):
|
|
124
|
+
return False
|
|
125
|
+
mtime = os.path.getmtime(cls.FRAME_PATH)
|
|
126
|
+
return (time.time() - mtime) < cls.STALE_THRESHOLD
|
|
127
|
+
except OSError:
|
|
128
|
+
return False
|
|
129
|
+
|
|
130
|
+
def capture_frame(self) -> tuple[Image.Image, float] | None:
|
|
131
|
+
"""Read the latest frame from IPC.
|
|
132
|
+
|
|
133
|
+
Returns (PIL Image, timestamp) or None if frame is stale/duplicate.
|
|
134
|
+
"""
|
|
135
|
+
try:
|
|
136
|
+
if not os.path.exists(self.FRAME_PATH):
|
|
137
|
+
return None
|
|
138
|
+
|
|
139
|
+
# Read metadata for precise timestamp
|
|
140
|
+
ts = time.time()
|
|
141
|
+
if os.path.exists(self.META_PATH):
|
|
142
|
+
try:
|
|
143
|
+
with open(self.META_PATH) as f:
|
|
144
|
+
meta = json.load(f)
|
|
145
|
+
ts = meta.get("timestamp", ts)
|
|
146
|
+
except (json.JSONDecodeError, OSError):
|
|
147
|
+
pass
|
|
148
|
+
|
|
149
|
+
# Skip duplicate frames
|
|
150
|
+
if ts == self._last_frame_ts:
|
|
151
|
+
return None
|
|
152
|
+
|
|
153
|
+
img = Image.open(self.FRAME_PATH)
|
|
154
|
+
img.load() # Force full read before file can be overwritten
|
|
155
|
+
|
|
156
|
+
if self.scale != 1.0:
|
|
157
|
+
new_w = int(img.width * self.scale)
|
|
158
|
+
new_h = int(img.height * self.scale)
|
|
159
|
+
img = img.resize((new_w, new_h), Image.LANCZOS)
|
|
160
|
+
|
|
161
|
+
self._last_frame_ts = ts
|
|
162
|
+
self.stats_ok += 1
|
|
163
|
+
return img, ts
|
|
164
|
+
|
|
165
|
+
except Exception as e:
|
|
166
|
+
self.stats_fail += 1
|
|
167
|
+
print(f"[capture-screenkit] error: {e}")
|
|
168
|
+
return None
|
|
169
|
+
|
|
170
|
+
def capture_loop(self) -> Generator[tuple[Image.Image, float], None, None]:
|
|
171
|
+
"""Yields frames at self.fps rate, same interface as ScreenCapture."""
|
|
172
|
+
interval = 1.0 / self.fps
|
|
173
|
+
while True:
|
|
174
|
+
start = time.time()
|
|
175
|
+
result = self.capture_frame()
|
|
176
|
+
if result is not None:
|
|
177
|
+
yield result
|
|
178
|
+
self._maybe_log_stats()
|
|
179
|
+
elapsed = time.time() - start
|
|
180
|
+
sleep_time = interval - elapsed
|
|
181
|
+
if sleep_time > 0:
|
|
182
|
+
time.sleep(sleep_time)
|
|
183
|
+
|
|
184
|
+
def _maybe_log_stats(self):
|
|
185
|
+
now = time.time()
|
|
186
|
+
if now - self._last_stats_time >= self._stats_interval:
|
|
187
|
+
total = self.stats_ok + self.stats_fail
|
|
188
|
+
rate = (self.stats_ok / total * 100) if total > 0 else 0
|
|
189
|
+
print(f"[capture-screenkit] stats: {self.stats_ok} ok, {self.stats_fail} fail"
|
|
190
|
+
f" ({rate:.0f}% success, {total} total)")
|
|
191
|
+
self._last_stats_time = now
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
def create_capture(mode: str = "screen", target: int = 0,
|
|
195
|
+
fps: float = 1, scale: float = 0.5):
|
|
196
|
+
"""Factory: platform-dispatched screen capture backend.
|
|
197
|
+
|
|
198
|
+
macOS: ScreenKitCapture (IPC from sck-capture) → ScreenCapture (CoreGraphics).
|
|
199
|
+
Windows: WinScreenCapture (mss / DXGI Desktop Duplication).
|
|
200
|
+
"""
|
|
201
|
+
if sys.platform == "win32":
|
|
202
|
+
from .capture_win import WinScreenCapture
|
|
203
|
+
print("[capture] Using mss (DXGI Desktop Duplication)")
|
|
204
|
+
return WinScreenCapture(mode=mode, target=target, fps=fps, scale=scale)
|
|
205
|
+
|
|
206
|
+
# macOS path
|
|
207
|
+
# 1. IPC from sck-capture (primary — Swift binary writes JPEG frames)
|
|
208
|
+
if ScreenKitCapture.is_available():
|
|
209
|
+
print("[capture] Using ScreenCaptureKit (sck-capture IPC)")
|
|
210
|
+
return ScreenKitCapture(fps=fps, scale=1.0) # scale handled by sck-capture
|
|
211
|
+
|
|
212
|
+
# 2. CGDisplayCreateImage (legacy fallback for macOS < 13)
|
|
213
|
+
print("[capture] Using CoreGraphics (CGDisplayCreateImage)")
|
|
214
|
+
print("[capture] WARNING: CGDisplayCreateImage may cause camera conflicts on macOS 14+")
|
|
215
|
+
return ScreenCapture(mode=mode, target=target, fps=fps, scale=scale)
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
"""Screen capture on Windows via mss (DXGI Desktop Duplication)."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import time
|
|
5
|
+
from typing import Generator
|
|
6
|
+
|
|
7
|
+
from PIL import Image
|
|
8
|
+
|
|
9
|
+
try:
|
|
10
|
+
import mss
|
|
11
|
+
except ImportError:
|
|
12
|
+
mss = None
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class WinScreenCapture:
|
|
16
|
+
"""Captures screen frames on Windows using mss (DXGI Desktop Duplication).
|
|
17
|
+
|
|
18
|
+
Same interface as ScreenCapture/ScreenKitCapture on macOS:
|
|
19
|
+
capture_frame() -> (Image, float) and capture_loop() -> Generator.
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
def __init__(self, mode: str = "screen", target: int = 0,
|
|
23
|
+
fps: float = 1, scale: float = 0.5):
|
|
24
|
+
if mss is None:
|
|
25
|
+
raise RuntimeError("mss library required for Windows capture: pip install mss")
|
|
26
|
+
self.mode = mode
|
|
27
|
+
self.target = target # monitor index (0 = all, 1 = primary, etc.)
|
|
28
|
+
self.fps = fps
|
|
29
|
+
self.scale = scale
|
|
30
|
+
self.stats_ok = 0
|
|
31
|
+
self.stats_fail = 0
|
|
32
|
+
self._last_stats_time = time.time()
|
|
33
|
+
self._stats_interval = 60
|
|
34
|
+
self._sct = mss.mss()
|
|
35
|
+
|
|
36
|
+
def capture_frame(self) -> tuple[Image.Image, float]:
|
|
37
|
+
"""Returns (PIL Image, timestamp).
|
|
38
|
+
Uses mss for DXGI-based capture. Downscales by self.scale factor.
|
|
39
|
+
"""
|
|
40
|
+
ts = time.time()
|
|
41
|
+
|
|
42
|
+
# mss monitors: index 0 = all monitors combined, 1+ = individual
|
|
43
|
+
monitor_idx = self.target + 1 if self.target >= 0 else 1
|
|
44
|
+
if monitor_idx >= len(self._sct.monitors):
|
|
45
|
+
monitor_idx = 1 # fallback to primary
|
|
46
|
+
|
|
47
|
+
monitor = self._sct.monitors[monitor_idx]
|
|
48
|
+
screenshot = self._sct.grab(monitor)
|
|
49
|
+
|
|
50
|
+
# Convert to PIL Image (mss returns BGRA)
|
|
51
|
+
img = Image.frombytes("RGB", screenshot.size, screenshot.rgb)
|
|
52
|
+
|
|
53
|
+
if self.scale != 1.0:
|
|
54
|
+
new_w = int(img.width * self.scale)
|
|
55
|
+
new_h = int(img.height * self.scale)
|
|
56
|
+
img = img.resize((new_w, new_h), Image.LANCZOS)
|
|
57
|
+
|
|
58
|
+
self.stats_ok += 1
|
|
59
|
+
if self.stats_ok == 1:
|
|
60
|
+
print(f"[capture-win] first frame: {img.width}x{img.height} "
|
|
61
|
+
f"(monitor={monitor['width']}x{monitor['height']}, scale={self.scale})",
|
|
62
|
+
flush=True)
|
|
63
|
+
return img, ts
|
|
64
|
+
|
|
65
|
+
def capture_loop(self) -> Generator[tuple[Image.Image, float], None, None]:
|
|
66
|
+
"""Yields frames at self.fps rate."""
|
|
67
|
+
interval = 1.0 / self.fps
|
|
68
|
+
while True:
|
|
69
|
+
start = time.time()
|
|
70
|
+
try:
|
|
71
|
+
yield self.capture_frame()
|
|
72
|
+
except Exception as e:
|
|
73
|
+
self.stats_fail += 1
|
|
74
|
+
print(f"[capture-win] error: {e}", flush=True)
|
|
75
|
+
self._maybe_log_stats()
|
|
76
|
+
elapsed = time.time() - start
|
|
77
|
+
sleep_time = interval - elapsed
|
|
78
|
+
if sleep_time > 0:
|
|
79
|
+
time.sleep(sleep_time)
|
|
80
|
+
|
|
81
|
+
def _maybe_log_stats(self):
|
|
82
|
+
now = time.time()
|
|
83
|
+
if now - self._last_stats_time >= self._stats_interval:
|
|
84
|
+
total = self.stats_ok + self.stats_fail
|
|
85
|
+
rate = (self.stats_ok / total * 100) if total > 0 else 0
|
|
86
|
+
print(f"[capture-win] stats: {self.stats_ok} ok, {self.stats_fail} fail"
|
|
87
|
+
f" ({rate:.0f}% success, {total} total)", flush=True)
|
|
88
|
+
self._last_stats_time = now
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
"""SSIM-based frame change detection."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
|
|
6
|
+
import numpy as np
|
|
7
|
+
from PIL import Image
|
|
8
|
+
from skimage.metrics import structural_similarity
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@dataclass
|
|
12
|
+
class ChangeResult:
|
|
13
|
+
ssim_score: float
|
|
14
|
+
diff_image: Image.Image
|
|
15
|
+
contours: list # list of (y, x) coordinate arrays
|
|
16
|
+
bbox: tuple[int, int, int, int] # (x, y, w, h)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class ChangeDetector:
|
|
20
|
+
"""SSIM-based frame change detection."""
|
|
21
|
+
|
|
22
|
+
def __init__(self, threshold: float = 0.95, min_area: int = 100):
|
|
23
|
+
self.threshold = threshold
|
|
24
|
+
self.min_area = min_area
|
|
25
|
+
self.prev_frame: np.ndarray | None = None
|
|
26
|
+
|
|
27
|
+
def set_threshold(self, threshold: float) -> None:
|
|
28
|
+
"""Dynamically adjust the SSIM change threshold."""
|
|
29
|
+
self.threshold = threshold
|
|
30
|
+
|
|
31
|
+
def detect(self, frame: Image.Image) -> ChangeResult | None:
|
|
32
|
+
"""Compare frame to previous. Returns ChangeResult if significant."""
|
|
33
|
+
gray = np.array(frame.convert("L"))
|
|
34
|
+
|
|
35
|
+
if self.prev_frame is None:
|
|
36
|
+
self.prev_frame = gray
|
|
37
|
+
return None
|
|
38
|
+
|
|
39
|
+
if gray.shape != self.prev_frame.shape:
|
|
40
|
+
self.prev_frame = gray
|
|
41
|
+
return None
|
|
42
|
+
|
|
43
|
+
score, diff_map = structural_similarity(
|
|
44
|
+
self.prev_frame, gray, full=True
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
if score >= self.threshold:
|
|
48
|
+
return None
|
|
49
|
+
|
|
50
|
+
# Keyframe update: only advance prev_frame when change IS detected.
|
|
51
|
+
# This lets diffs accumulate against the last accepted keyframe,
|
|
52
|
+
# which is essential at high FPS where consecutive frames differ by <1%.
|
|
53
|
+
self.prev_frame = gray
|
|
54
|
+
|
|
55
|
+
# Convert diff map to binary mask
|
|
56
|
+
diff_binary = ((1.0 - diff_map) * 255).astype(np.uint8)
|
|
57
|
+
mask = diff_binary > 30 # threshold for "changed" pixels
|
|
58
|
+
|
|
59
|
+
# Find contours via connected components
|
|
60
|
+
from skimage.measure import label, regionprops
|
|
61
|
+
labeled = label(mask)
|
|
62
|
+
regions = regionprops(labeled)
|
|
63
|
+
|
|
64
|
+
# Filter by area
|
|
65
|
+
contours = []
|
|
66
|
+
for region in regions:
|
|
67
|
+
if region.area >= self.min_area:
|
|
68
|
+
contours.append(region.coords)
|
|
69
|
+
|
|
70
|
+
if not contours:
|
|
71
|
+
return None
|
|
72
|
+
|
|
73
|
+
# Compute merged bounding box
|
|
74
|
+
all_coords = np.vstack(contours)
|
|
75
|
+
min_y, min_x = all_coords.min(axis=0)
|
|
76
|
+
max_y, max_x = all_coords.max(axis=0)
|
|
77
|
+
bbox = (int(min_x), int(min_y), int(max_x - min_x), int(max_y - min_y))
|
|
78
|
+
|
|
79
|
+
diff_img = Image.fromarray(diff_binary)
|
|
80
|
+
|
|
81
|
+
return ChangeResult(
|
|
82
|
+
ssim_score=score,
|
|
83
|
+
diff_image=diff_img,
|
|
84
|
+
contours=contours,
|
|
85
|
+
bbox=bbox,
|
|
86
|
+
)
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
"""Configuration loader for sense_client."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import json
|
|
5
|
+
import os
|
|
6
|
+
import sys
|
|
7
|
+
|
|
8
|
+
DEFAULTS = {
|
|
9
|
+
"capture": {
|
|
10
|
+
"mode": "screen",
|
|
11
|
+
"target": 0,
|
|
12
|
+
"fps": 2.0,
|
|
13
|
+
"scale": 0.5,
|
|
14
|
+
},
|
|
15
|
+
"detection": {
|
|
16
|
+
"ssimThreshold": 0.92,
|
|
17
|
+
"minArea": 100,
|
|
18
|
+
"roiPadding": 20,
|
|
19
|
+
"cooldownMs": 5000,
|
|
20
|
+
},
|
|
21
|
+
"ocr": {
|
|
22
|
+
"enabled": True,
|
|
23
|
+
"backend": "auto",
|
|
24
|
+
"languages": ["en", "ru"],
|
|
25
|
+
"lang": "eng",
|
|
26
|
+
"psm": 11,
|
|
27
|
+
"minConfidence": 50,
|
|
28
|
+
},
|
|
29
|
+
"gate": {
|
|
30
|
+
"minOcrChars": 20,
|
|
31
|
+
"majorChangeThreshold": 0.85,
|
|
32
|
+
"cooldownMs": 5000,
|
|
33
|
+
"adaptiveCooldownMs": 2000,
|
|
34
|
+
"contextCooldownMs": 10000,
|
|
35
|
+
},
|
|
36
|
+
"relay": {
|
|
37
|
+
"url": "http://localhost:9500",
|
|
38
|
+
"sendThumbnails": True,
|
|
39
|
+
"maxImageKB": 500,
|
|
40
|
+
},
|
|
41
|
+
"optimization": {
|
|
42
|
+
"backpressure": False,
|
|
43
|
+
"textDedup": False,
|
|
44
|
+
"visionRegionOfInterest": False,
|
|
45
|
+
"shadowValidation": False,
|
|
46
|
+
},
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def load_config(path: str | None = None) -> dict:
|
|
51
|
+
"""Load config from JSON file, merge with defaults."""
|
|
52
|
+
config = json.loads(json.dumps(DEFAULTS)) # deep copy
|
|
53
|
+
if path and os.path.exists(path):
|
|
54
|
+
try:
|
|
55
|
+
with open(path) as f:
|
|
56
|
+
user = json.load(f)
|
|
57
|
+
for section, values in user.items():
|
|
58
|
+
if section in config and isinstance(values, dict):
|
|
59
|
+
config[section].update(values)
|
|
60
|
+
else:
|
|
61
|
+
config[section] = values
|
|
62
|
+
except (json.JSONDecodeError, ValueError):
|
|
63
|
+
pass # use defaults
|
|
64
|
+
return config
|