@geravant/sinain 1.0.18 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. package/README.md +10 -1
  2. package/cli.js +176 -0
  3. package/index.ts +163 -1257
  4. package/install.js +12 -2
  5. package/launcher.js +622 -0
  6. package/openclaw.plugin.json +4 -0
  7. package/pack-prepare.js +48 -0
  8. package/package.json +26 -5
  9. package/sense_client/README.md +82 -0
  10. package/sense_client/__init__.py +1 -0
  11. package/sense_client/__main__.py +462 -0
  12. package/sense_client/app_detector.py +54 -0
  13. package/sense_client/app_detector_win.py +83 -0
  14. package/sense_client/capture.py +215 -0
  15. package/sense_client/capture_win.py +88 -0
  16. package/sense_client/change_detector.py +86 -0
  17. package/sense_client/config.py +64 -0
  18. package/sense_client/gate.py +145 -0
  19. package/sense_client/ocr.py +347 -0
  20. package/sense_client/privacy.py +65 -0
  21. package/sense_client/requirements.txt +13 -0
  22. package/sense_client/roi_extractor.py +84 -0
  23. package/sense_client/sender.py +173 -0
  24. package/sense_client/tests/__init__.py +0 -0
  25. package/sense_client/tests/test_stream1_optimizations.py +234 -0
  26. package/setup-overlay.js +82 -0
  27. package/sinain-agent/.env.example +17 -0
  28. package/sinain-agent/CLAUDE.md +80 -0
  29. package/sinain-agent/mcp-config.json +12 -0
  30. package/sinain-agent/run.sh +248 -0
  31. package/sinain-core/.env.example +93 -0
  32. package/sinain-core/package-lock.json +552 -0
  33. package/sinain-core/package.json +21 -0
  34. package/sinain-core/src/agent/analyzer.ts +366 -0
  35. package/sinain-core/src/agent/context-window.ts +172 -0
  36. package/sinain-core/src/agent/loop.ts +404 -0
  37. package/sinain-core/src/agent/situation-writer.ts +187 -0
  38. package/sinain-core/src/agent/traits.ts +520 -0
  39. package/sinain-core/src/audio/capture-spawner-macos.ts +44 -0
  40. package/sinain-core/src/audio/capture-spawner-win.ts +37 -0
  41. package/sinain-core/src/audio/capture-spawner.ts +14 -0
  42. package/sinain-core/src/audio/pipeline.ts +335 -0
  43. package/sinain-core/src/audio/transcription-local.ts +141 -0
  44. package/sinain-core/src/audio/transcription.ts +278 -0
  45. package/sinain-core/src/buffers/feed-buffer.ts +71 -0
  46. package/sinain-core/src/buffers/sense-buffer.ts +425 -0
  47. package/sinain-core/src/config.ts +245 -0
  48. package/sinain-core/src/escalation/escalation-slot.ts +136 -0
  49. package/sinain-core/src/escalation/escalator.ts +812 -0
  50. package/sinain-core/src/escalation/message-builder.ts +323 -0
  51. package/sinain-core/src/escalation/openclaw-ws.ts +726 -0
  52. package/sinain-core/src/escalation/scorer.ts +166 -0
  53. package/sinain-core/src/index.ts +507 -0
  54. package/sinain-core/src/learning/feedback-store.ts +253 -0
  55. package/sinain-core/src/learning/signal-collector.ts +218 -0
  56. package/sinain-core/src/log.ts +24 -0
  57. package/sinain-core/src/overlay/commands.ts +126 -0
  58. package/sinain-core/src/overlay/ws-handler.ts +267 -0
  59. package/sinain-core/src/privacy/index.ts +18 -0
  60. package/sinain-core/src/privacy/presets.ts +40 -0
  61. package/sinain-core/src/privacy/redact.ts +92 -0
  62. package/sinain-core/src/profiler.ts +181 -0
  63. package/sinain-core/src/recorder.ts +186 -0
  64. package/sinain-core/src/server.ts +417 -0
  65. package/sinain-core/src/trace/trace-store.ts +73 -0
  66. package/sinain-core/src/trace/tracer.ts +94 -0
  67. package/sinain-core/src/types.ts +427 -0
  68. package/sinain-core/src/util/dedup.ts +48 -0
  69. package/sinain-core/src/util/task-store.ts +84 -0
  70. package/sinain-core/tsconfig.json +18 -0
  71. package/sinain-knowledge/adapters/generic/adapter.ts +103 -0
  72. package/sinain-knowledge/adapters/interface.ts +72 -0
  73. package/sinain-knowledge/adapters/openclaw/adapter.ts +223 -0
  74. package/sinain-knowledge/curation/engine.ts +493 -0
  75. package/sinain-knowledge/curation/resilience.ts +336 -0
  76. package/sinain-knowledge/data/git-store.ts +312 -0
  77. package/sinain-knowledge/data/schema.ts +89 -0
  78. package/sinain-knowledge/data/snapshot.ts +226 -0
  79. package/sinain-knowledge/data/store.ts +488 -0
  80. package/sinain-knowledge/deploy/cli.ts +214 -0
  81. package/sinain-knowledge/deploy/manifest.ts +80 -0
  82. package/sinain-knowledge/protocol/bindings/generic.md +5 -0
  83. package/sinain-knowledge/protocol/bindings/openclaw.md +5 -0
  84. package/sinain-knowledge/protocol/heartbeat.md +62 -0
  85. package/sinain-knowledge/protocol/renderer.ts +56 -0
  86. package/sinain-knowledge/protocol/skill.md +335 -0
  87. package/sinain-mcp-server/index.ts +337 -0
  88. package/sinain-mcp-server/package.json +19 -0
  89. package/sinain-mcp-server/tsconfig.json +15 -0
@@ -0,0 +1,145 @@
1
+ """Decision gate — classifies sense events and decides what to send."""
2
+ from __future__ import annotations
3
+
4
+ import difflib
5
+ import time
6
+ from collections import deque
7
+ from dataclasses import dataclass, field
8
+
9
+ from .change_detector import ChangeResult
10
+ from .ocr import OCRResult
11
+
12
+
13
+ @dataclass
14
+ class SenseMeta:
15
+ ssim: float = 0.0
16
+ app: str = ""
17
+ window_title: str = ""
18
+ screen: int = 0
19
+
20
+
21
+ @dataclass
22
+ class SenseObservation:
23
+ """Structured observation fields (claude-mem compatible schema).
24
+
25
+ Populated by sinain-core's agent layer, not by sense_client.
26
+ sense_client sets `title` and `facts` from OCR/app context;
27
+ sinain-core enriches with `narrative` and `concepts`.
28
+ """
29
+ title: str = ""
30
+ subtitle: str = ""
31
+ facts: list[str] = field(default_factory=list)
32
+ narrative: str = ""
33
+ concepts: list[str] = field(default_factory=list)
34
+
35
+
36
+ @dataclass
37
+ class SenseEvent:
38
+ type: str # "text" | "visual" | "context"
39
+ ts: float = 0.0
40
+ ocr: str = ""
41
+ roi: dict | None = None
42
+ diff: dict | None = None
43
+ meta: SenseMeta = field(default_factory=SenseMeta)
44
+ observation: SenseObservation = field(default_factory=SenseObservation)
45
+
46
+
47
+ class DecisionGate:
48
+ """Classifies sense events and decides what to send."""
49
+
50
+ def __init__(self, min_ocr_chars: int = 20,
51
+ major_change_threshold: float = 0.85,
52
+ cooldown_ms: int = 5000,
53
+ adaptive_cooldown_ms: int = 2000,
54
+ context_cooldown_ms: int = 10000):
55
+ self.min_ocr_chars = min_ocr_chars
56
+ self.major_change_threshold = major_change_threshold
57
+ self.cooldown_ms = cooldown_ms
58
+ self.adaptive_cooldown_ms = adaptive_cooldown_ms
59
+ self.context_cooldown_ms = context_cooldown_ms
60
+ self.last_send_ts: float = 0
61
+ self.last_context_ts: float = 0
62
+ self.last_app_change_ts: float = 0
63
+ # Fuzzy dedup: ring buffer of last 5 OCR texts
64
+ self._recent_texts: deque[str] = deque(maxlen=5)
65
+ self._last_sent_text: str = ""
66
+
67
+ def is_ready(self, app_changed: bool, window_changed: bool) -> bool:
68
+ """Time-based readiness check without consuming OCR output.
69
+
70
+ Used by backpressure scheduling to decide whether to run OCR at all.
71
+ """
72
+ if app_changed or window_changed:
73
+ return True
74
+ now = time.time() * 1000
75
+ recent = (now - self.last_app_change_ts) < 10000
76
+ cooldown = self.adaptive_cooldown_ms if recent else self.cooldown_ms
77
+ return now - self.last_send_ts >= cooldown
78
+
79
+ def _is_duplicate(self, text: str) -> bool:
80
+ """Check if text is too similar to any recently sent text."""
81
+ if text == self._last_sent_text:
82
+ return True
83
+ for prev in self._recent_texts:
84
+ ratio = difflib.SequenceMatcher(None, prev, text).ratio()
85
+ if ratio > 0.7:
86
+ return True
87
+ return False
88
+
89
+ @staticmethod
90
+ def _ocr_quality_ok(text: str) -> bool:
91
+ """Reject garbage OCR: >50% single-char tokens or <50% alphanumeric."""
92
+ tokens = text.split()
93
+ if not tokens:
94
+ return False
95
+ single_char = sum(1 for t in tokens if len(t) == 1)
96
+ if single_char / len(tokens) > 0.5:
97
+ return False
98
+ alnum = sum(1 for ch in text if ch.isalnum())
99
+ total = len(text.replace(" ", ""))
100
+ if total > 0 and alnum / total < 0.5:
101
+ return False
102
+ return True
103
+
104
+ def classify(self, change: ChangeResult | None,
105
+ ocr: OCRResult, app_changed: bool,
106
+ window_changed: bool = False) -> SenseEvent | None:
107
+ """Returns SenseEvent to send, or None to drop."""
108
+ now = time.time() * 1000
109
+
110
+ # Context events (app/window change) bypass normal cooldown
111
+ if app_changed or window_changed:
112
+ self.last_app_change_ts = now
113
+ if now - self.last_context_ts >= self.context_cooldown_ms:
114
+ self.last_context_ts = now
115
+ self.last_send_ts = now
116
+ return SenseEvent(type="context", ts=now)
117
+
118
+ # Adaptive cooldown: 2s after recent app switch, 5s otherwise
119
+ recent_app_change = (now - self.last_app_change_ts) < 10000
120
+ effective_cooldown = self.adaptive_cooldown_ms if recent_app_change else self.cooldown_ms
121
+ if now - self.last_send_ts < effective_cooldown:
122
+ return None
123
+
124
+ if change is None:
125
+ return None
126
+
127
+ # OCR text sufficient -> text event
128
+ if ocr.text and len(ocr.text) >= self.min_ocr_chars:
129
+ if self._is_duplicate(ocr.text):
130
+ return None
131
+ if not self._ocr_quality_ok(ocr.text):
132
+ return None
133
+ self._recent_texts.append(ocr.text)
134
+ self._last_sent_text = ocr.text
135
+ self.last_send_ts = now
136
+ return SenseEvent(type="text", ts=now, ocr=ocr.text,
137
+ meta=SenseMeta(ssim=change.ssim_score))
138
+
139
+ # Major visual change -> visual event
140
+ if change.ssim_score < self.major_change_threshold:
141
+ self.last_send_ts = now
142
+ return SenseEvent(type="visual", ts=now, ocr=ocr.text,
143
+ meta=SenseMeta(ssim=change.ssim_score))
144
+
145
+ return None
@@ -0,0 +1,347 @@
1
+ """OCR backends for UI text extraction: macOS Vision, Windows.Media.Ocr, and Tesseract."""
2
+ from __future__ import annotations
3
+
4
+ import io
5
+ import re
6
+ import sys
7
+ from dataclasses import dataclass
8
+
9
+ from PIL import Image
10
+
11
+ try:
12
+ import pytesseract
13
+ except ImportError:
14
+ pytesseract = None
15
+
16
+
17
+ @dataclass
18
+ class OCRResult:
19
+ text: str
20
+ confidence: float
21
+ word_count: int
22
+
23
+
24
+ class LocalOCR:
25
+ """Tesseract OCR wrapper for UI text extraction."""
26
+
27
+ def __init__(self, lang: str = "eng", psm: int = 11,
28
+ min_confidence: int = 30, enabled: bool = True):
29
+ self.lang = lang
30
+ self.psm = psm
31
+ self.min_confidence = min_confidence
32
+ self.enabled = enabled
33
+
34
+ def extract(self, image: Image.Image) -> OCRResult:
35
+ """Returns extracted text with confidence."""
36
+ if not self.enabled or pytesseract is None:
37
+ return OCRResult(text="", confidence=0, word_count=0)
38
+
39
+ try:
40
+ data = pytesseract.image_to_data(
41
+ image,
42
+ lang=self.lang,
43
+ config=f"--psm {self.psm}",
44
+ output_type=pytesseract.Output.DICT,
45
+ )
46
+ except Exception as e:
47
+ print(f"[ocr] error: {e}", flush=True)
48
+ return OCRResult(text="", confidence=0, word_count=0)
49
+
50
+ words = []
51
+ confidences = []
52
+ for i, conf in enumerate(data["conf"]):
53
+ try:
54
+ c = int(conf)
55
+ except (ValueError, TypeError):
56
+ continue
57
+ if c >= self.min_confidence:
58
+ word = data["text"][i].strip()
59
+ if word:
60
+ words.append(word)
61
+ confidences.append(c)
62
+
63
+ text = " ".join(words)
64
+ text = self._clean(text)
65
+ avg_conf = sum(confidences) / len(confidences) if confidences else 0
66
+
67
+ return OCRResult(
68
+ text=text,
69
+ confidence=avg_conf,
70
+ word_count=len(words),
71
+ )
72
+
73
+ @staticmethod
74
+ def _clean(text: str) -> str:
75
+ """Strip control chars, collapse whitespace, remove noise lines."""
76
+ text = re.sub(r"[\x00-\x08\x0b\x0c\x0e-\x1f]", "", text)
77
+ text = re.sub(r"[ \t]+", " ", text)
78
+ lines = text.split("\n")
79
+ cleaned = []
80
+ for line in lines:
81
+ line = line.strip()
82
+ if line and re.search(r"[a-zA-Z0-9]", line):
83
+ cleaned.append(line)
84
+ return "\n".join(cleaned)
85
+
86
+
87
+ class VisionOCR:
88
+ """macOS Vision framework OCR using pyobjc."""
89
+
90
+ def __init__(self, languages: list[str] | None = None,
91
+ min_confidence: float = 0.5, enabled: bool = True):
92
+ self.languages = languages or ["en", "ru"]
93
+ self.min_confidence = min_confidence
94
+ self.enabled = enabled
95
+ self._available = False
96
+
97
+ if not enabled:
98
+ return
99
+
100
+ try:
101
+ import objc # noqa: F401
102
+ import Quartz # noqa: F401
103
+ from Foundation import NSURL, NSData # noqa: F401
104
+ objc.loadBundle('Vision', bundle_path='/System/Library/Frameworks/Vision.framework',
105
+ module_globals=globals())
106
+ self._available = True
107
+ except Exception as e:
108
+ print(f"[ocr] Vision framework unavailable: {e}", flush=True)
109
+
110
+ def extract(self, image: Image.Image) -> OCRResult:
111
+ """Returns extracted text using macOS Vision framework."""
112
+ if not self.enabled or not self._available:
113
+ return OCRResult(text="", confidence=0, word_count=0)
114
+
115
+ try:
116
+ return self._do_extract(image)
117
+ except Exception as e:
118
+ print(f"[ocr] Vision error: {e}", flush=True)
119
+ return OCRResult(text="", confidence=0, word_count=0)
120
+
121
+ def _do_extract(self, image: Image.Image) -> OCRResult:
122
+ import objc
123
+ import Vision
124
+ from Foundation import NSData
125
+ import Quartz
126
+
127
+ # Convert PIL Image to CGImage via PNG bytes
128
+ buf = io.BytesIO()
129
+ image.save(buf, format="PNG")
130
+ png_data = buf.getvalue()
131
+
132
+ ns_data = NSData.dataWithBytes_length_(png_data, len(png_data))
133
+ ci_image = Quartz.CIImage.imageWithData_(ns_data)
134
+ context = Quartz.CIContext.context()
135
+ cg_image = context.createCGImage_fromRect_(ci_image, ci_image.extent())
136
+
137
+ if cg_image is None:
138
+ return OCRResult(text="", confidence=0, word_count=0)
139
+
140
+ # Create and configure request
141
+ request = Vision.VNRecognizeTextRequest.alloc().init()
142
+ request.setRecognitionLevel_(Vision.VNRequestTextRecognitionLevelAccurate)
143
+ request.setRecognitionLanguages_(self.languages)
144
+ request.setUsesLanguageCorrection_(True)
145
+
146
+ # Execute
147
+ handler = Vision.VNImageRequestHandler.alloc().initWithCGImage_options_(cg_image, None)
148
+ success = handler.performRequests_error_([request], objc.nil)
149
+ if not success[0]:
150
+ return OCRResult(text="", confidence=0, word_count=0)
151
+
152
+ results = request.results()
153
+ if not results:
154
+ return OCRResult(text="", confidence=0, word_count=0)
155
+
156
+ lines = []
157
+ confidences = []
158
+ word_count = 0
159
+
160
+ for observation in results:
161
+ candidate = observation.topCandidates_(1)
162
+ if not candidate:
163
+ continue
164
+ text = candidate[0].string()
165
+ conf = candidate[0].confidence()
166
+
167
+ if conf < self.min_confidence:
168
+ continue
169
+ if text and text.strip():
170
+ lines.append(text.strip())
171
+ confidences.append(conf)
172
+ word_count += len(text.split())
173
+
174
+ text = "\n".join(lines)
175
+ text = self._clean(text)
176
+ avg_conf = (sum(confidences) / len(confidences) * 100) if confidences else 0
177
+
178
+ return OCRResult(
179
+ text=text,
180
+ confidence=avg_conf,
181
+ word_count=word_count,
182
+ )
183
+
184
+ @staticmethod
185
+ def _clean(text: str) -> str:
186
+ """Collapse whitespace, remove noise lines."""
187
+ text = re.sub(r"[\x00-\x08\x0b\x0c\x0e-\x1f]", "", text)
188
+ lines = text.split("\n")
189
+ cleaned = []
190
+ for line in lines:
191
+ line = re.sub(r"[ \t]+", " ", line).strip()
192
+ if line and re.search(r"[a-zA-Z0-9а-яА-ЯёЁ]", line):
193
+ cleaned.append(line)
194
+ return "\n".join(cleaned)
195
+
196
+
197
+ class WinOCR:
198
+ """Windows.Media.Ocr backend via winrt-python (Windows 10+)."""
199
+
200
+ def __init__(self, language: str = "en", min_confidence: float = 0.5,
201
+ enabled: bool = True):
202
+ self.language = language
203
+ self.min_confidence = min_confidence
204
+ self.enabled = enabled
205
+ self._available = False
206
+ self._engine = None
207
+
208
+ if not enabled:
209
+ return
210
+
211
+ try:
212
+ from winrt.windows.media.ocr import OcrEngine
213
+ from winrt.windows.globalization import Language
214
+
215
+ lang = Language(language)
216
+ if OcrEngine.is_language_supported(lang):
217
+ self._engine = OcrEngine.try_create_from_language(lang)
218
+ self._available = self._engine is not None
219
+ else:
220
+ print(f"[ocr] WinOCR: language '{language}' not supported", flush=True)
221
+ except Exception as e:
222
+ print(f"[ocr] WinOCR unavailable: {e}", flush=True)
223
+
224
+ def extract(self, image: Image.Image) -> OCRResult:
225
+ """Returns extracted text using Windows.Media.Ocr."""
226
+ if not self.enabled or not self._available:
227
+ return OCRResult(text="", confidence=0, word_count=0)
228
+
229
+ try:
230
+ return self._do_extract(image)
231
+ except Exception as e:
232
+ print(f"[ocr] WinOCR error: {e}", flush=True)
233
+ return OCRResult(text="", confidence=0, word_count=0)
234
+
235
+ def _do_extract(self, image: Image.Image) -> OCRResult:
236
+ import asyncio
237
+ from winrt.windows.graphics.imaging import (
238
+ SoftwareBitmap, BitmapPixelFormat, BitmapAlphaMode,
239
+ )
240
+ from winrt.windows.storage.streams import (
241
+ InMemoryRandomAccessStream, DataWriter,
242
+ )
243
+
244
+ # Convert PIL to BMP bytes and load as SoftwareBitmap
245
+ buf = io.BytesIO()
246
+ image.convert("RGBA").save(buf, format="BMP")
247
+ bmp_bytes = buf.getvalue()
248
+
249
+ async def _run():
250
+ stream = InMemoryRandomAccessStream()
251
+ writer = DataWriter(stream)
252
+ writer.write_bytes(bmp_bytes)
253
+ await writer.store_async()
254
+ stream.seek(0)
255
+
256
+ from winrt.windows.graphics.imaging import BitmapDecoder
257
+ decoder = await BitmapDecoder.create_async(stream)
258
+ bitmap = await decoder.get_software_bitmap_async()
259
+
260
+ # Convert to supported pixel format if needed
261
+ if bitmap.bitmap_pixel_format != BitmapPixelFormat.BGRA8:
262
+ bitmap = SoftwareBitmap.convert(bitmap, BitmapPixelFormat.BGRA8,
263
+ BitmapAlphaMode.PREMULTIPLIED)
264
+
265
+ result = await self._engine.recognize_async(bitmap)
266
+ return result
267
+
268
+ loop = asyncio.new_event_loop()
269
+ try:
270
+ result = loop.run_until_complete(_run())
271
+ finally:
272
+ loop.close()
273
+
274
+ lines = []
275
+ word_count = 0
276
+ for line in result.lines:
277
+ text = line.text.strip()
278
+ if text:
279
+ lines.append(text)
280
+ word_count += len(text.split())
281
+
282
+ text = "\n".join(lines)
283
+ text = self._clean(text)
284
+
285
+ return OCRResult(text=text, confidence=80.0, word_count=word_count)
286
+
287
+ @staticmethod
288
+ def _clean(text: str) -> str:
289
+ text = re.sub(r"[\x00-\x08\x0b\x0c\x0e-\x1f]", "", text)
290
+ lines = text.split("\n")
291
+ cleaned = []
292
+ for line in lines:
293
+ line = re.sub(r"[ \t]+", " ", line).strip()
294
+ if line and re.search(r"[a-zA-Z0-9а-яА-ЯёЁ]", line):
295
+ cleaned.append(line)
296
+ return "\n".join(cleaned)
297
+
298
+
299
+ def create_ocr(config: dict):
300
+ """Factory: create the best available OCR backend based on config + platform.
301
+
302
+ config["ocr"] keys:
303
+ backend: "auto" | "vision" | "tesseract" | "winocr"
304
+ languages: list[str] (BCP-47 for Vision / WinOCR, e.g. ["en", "ru"])
305
+ lang: str (Tesseract lang code, e.g. "eng")
306
+ minConfidence: int (0-100 scale)
307
+ enabled: bool
308
+ """
309
+ ocr_cfg = config.get("ocr", {})
310
+ backend = ocr_cfg.get("backend", "auto")
311
+ enabled = ocr_cfg.get("enabled", True)
312
+
313
+ # macOS: try Vision framework
314
+ if sys.platform == "darwin" and backend in ("auto", "vision"):
315
+ vision = VisionOCR(
316
+ languages=ocr_cfg.get("languages", ["en", "ru"]),
317
+ min_confidence=ocr_cfg.get("minConfidence", 50) / 100.0,
318
+ enabled=enabled,
319
+ )
320
+ if vision._available:
321
+ print(f"[ocr] using Vision backend (languages={vision.languages})", flush=True)
322
+ return vision
323
+ if backend == "vision":
324
+ print("[ocr] Vision requested but unavailable, falling back to Tesseract", flush=True)
325
+
326
+ # Windows: try Windows.Media.Ocr
327
+ if sys.platform == "win32" and backend in ("auto", "winocr"):
328
+ languages = ocr_cfg.get("languages", ["en"])
329
+ winocr = WinOCR(
330
+ language=languages[0] if languages else "en",
331
+ min_confidence=ocr_cfg.get("minConfidence", 50) / 100.0,
332
+ enabled=enabled,
333
+ )
334
+ if winocr._available:
335
+ print(f"[ocr] using WinOCR backend (language={winocr.language})", flush=True)
336
+ return winocr
337
+ if backend == "winocr":
338
+ print("[ocr] WinOCR requested but unavailable, falling back to Tesseract", flush=True)
339
+
340
+ # Fallback to Tesseract (cross-platform)
341
+ print("[ocr] using Tesseract backend", flush=True)
342
+ return LocalOCR(
343
+ lang=ocr_cfg.get("lang", "eng"),
344
+ psm=ocr_cfg.get("psm", 11),
345
+ min_confidence=ocr_cfg.get("minConfidence", 50),
346
+ enabled=enabled,
347
+ )
@@ -0,0 +1,65 @@
1
+ """Privacy filter — strips <private> tags and auto-redacts sensitive patterns from OCR text."""
2
+
3
+ import re
4
+
5
+ # Patterns that auto-redact without manual tagging
6
+ _REDACT_PATTERNS: list[tuple[re.Pattern, str]] = [
7
+ # Credit card numbers (4 groups of 4 digits)
8
+ (re.compile(r"\b\d{4}[\s-]?\d{4}[\s-]?\d{4}[\s-]?\d{4}\b"), "[REDACTED:card]"),
9
+ # API keys / tokens (long hex or base64 strings)
10
+ (re.compile(r"\b(?:sk-|pk-|api[_-]?key[=:]\s*)[A-Za-z0-9_\-]{20,}\b"), "[REDACTED:apikey]"),
11
+ # Bearer tokens
12
+ (re.compile(r"Bearer\s+[A-Za-z0-9_\-\.]{20,}"), "[REDACTED:bearer]"),
13
+ # AWS secret keys
14
+ (re.compile(r"\b(?:AKIA|ASIA)[A-Z0-9]{16}\b"), "[REDACTED:awskey]"),
15
+ # Passwords in assignment context
16
+ (re.compile(r"(?:password|passwd|pwd)\s*[:=]\s*\S+", re.IGNORECASE), "[REDACTED:password]"),
17
+ # GitHub personal access tokens
18
+ (re.compile(r"\bghp_[A-Za-z0-9]{36}\b"), "[REDACTED:github_pat]"),
19
+ # GitHub server tokens
20
+ (re.compile(r"\bghs_[A-Za-z0-9]{36}\b"), "[REDACTED:github_srv]"),
21
+ # Slack tokens
22
+ (re.compile(r"\bxox[bpoa]-[0-9A-Za-z\-]+"), "[REDACTED:slack]"),
23
+ # Google OAuth tokens
24
+ (re.compile(r"\bya29\.[0-9A-Za-z\-_]+"), "[REDACTED:google_oauth]"),
25
+ # JWT tokens (three base64url segments)
26
+ (re.compile(r"\beyJ[A-Za-z0-9\-_]+\.[A-Za-z0-9\-_]+\.[A-Za-z0-9\-_]+"), "[REDACTED:jwt]"),
27
+ # Generic secrets / keys in assignment context
28
+ (re.compile(r"(?:secret|token|key)\s*[:=]\s*[A-Za-z0-9_\-\.]{10,}", re.IGNORECASE), "[REDACTED:secret]"),
29
+ # Email addresses
30
+ (re.compile(r"\b[A-Za-z0-9._%+\-]+@[A-Za-z0-9.\-]+\.[A-Za-z]{2,}\b"), "[REDACTED:email]"),
31
+ # US phone numbers
32
+ (re.compile(r"\+?1?\s?\(?\d{3}\)?[\s.\-]?\d{3}[\s.\-]?\d{4}\b"), "[REDACTED:phone]"),
33
+ # SSN (XXX-XX-XXXX)
34
+ (re.compile(r"\b\d{3}-\d{2}-\d{4}\b"), "[REDACTED:ssn]"),
35
+ # CVV codes
36
+ (re.compile(r"\bCVV\s*[:=]?\s*\d{3,4}\b", re.IGNORECASE), "[REDACTED:cvv]"),
37
+ # PIN codes in assignment context
38
+ (re.compile(r"\bpin\s*[:=]\s*\d{4,8}\b", re.IGNORECASE), "[REDACTED:pin]"),
39
+ # Private key headers
40
+ (re.compile(r"-----BEGIN (?:RSA |EC |OPENSSH )?PRIVATE KEY-----"), "[REDACTED:privkey]"),
41
+ # MRN (medical record numbers)
42
+ (re.compile(r"\bMRN\s*[:=]?\s*\d{6,10}\b", re.IGNORECASE), "[REDACTED:mrn]"),
43
+ ]
44
+
45
+ # Matches <private>...</private> blocks (including multiline)
46
+ _PRIVATE_TAG = re.compile(r"<private>.*?</private>", re.DOTALL)
47
+
48
+
49
+ def strip_private(text: str) -> str:
50
+ """Remove <private>...</private> blocks from text."""
51
+ return _PRIVATE_TAG.sub("", text).strip()
52
+
53
+
54
+ def redact_sensitive(text: str) -> str:
55
+ """Auto-redact patterns that look like secrets or PII."""
56
+ for pattern, replacement in _REDACT_PATTERNS:
57
+ text = pattern.sub(replacement, text)
58
+ return text
59
+
60
+
61
+ def apply_privacy(text: str) -> str:
62
+ """Full privacy pipeline: strip private tags, then auto-redact."""
63
+ text = strip_private(text)
64
+ text = redact_sensitive(text)
65
+ return text
@@ -0,0 +1,13 @@
1
+ pillow>=10.0
2
+ scikit-image>=0.22
3
+ numpy>=1.24
4
+ pytesseract>=0.3
5
+ requests>=2.31
6
+ mss>=9.0; sys_platform == "win32"
7
+ psutil>=5.9; sys_platform == "win32"
8
+ winrt-Windows.Media.Ocr>=2.0; sys_platform == "win32"
9
+ winrt-Windows.Globalization>=2.0; sys_platform == "win32"
10
+ winrt-Windows.Graphics.Imaging>=2.0; sys_platform == "win32"
11
+ winrt-Windows.Storage.Streams>=2.0; sys_platform == "win32"
12
+ winrt-Windows.Foundation>=2.0; sys_platform == "win32"
13
+ winrt-Windows.Foundation.Collections>=2.0; sys_platform == "win32"
@@ -0,0 +1,84 @@
1
+ """Region of Interest extraction from changed regions."""
2
+
3
+ from dataclasses import dataclass
4
+
5
+ import numpy as np
6
+ from PIL import Image
7
+
8
+
9
+ @dataclass
10
+ class ROI:
11
+ image: Image.Image
12
+ bbox: tuple[int, int, int, int] # (x, y, w, h)
13
+
14
+
15
+ class ROIExtractor:
16
+ """Extracts and crops changed regions from a frame."""
17
+
18
+ def __init__(self, padding: int = 20, min_size: tuple[int, int] = (64, 64),
19
+ max_rois: int = 3):
20
+ self.padding = padding
21
+ self.min_size = min_size
22
+ self.max_rois = max_rois
23
+
24
+ def extract(self, frame: Image.Image, contours: list) -> list[ROI]:
25
+ """Returns list of ROI crops from frame based on contours."""
26
+ if not contours:
27
+ return []
28
+
29
+ # Compute bounding boxes for each contour
30
+ boxes = []
31
+ for coords in contours:
32
+ arr = np.array(coords)
33
+ min_y, min_x = arr.min(axis=0)
34
+ max_y, max_x = arr.max(axis=0)
35
+ boxes.append((int(min_x), int(min_y), int(max_x), int(max_y)))
36
+
37
+ # Merge overlapping/adjacent boxes
38
+ merged = self._merge_boxes(boxes)
39
+
40
+ # Add padding, clamp, crop
41
+ rois = []
42
+ w, h = frame.size
43
+ for x1, y1, x2, y2 in merged[:self.max_rois]:
44
+ x1 = max(0, x1 - self.padding)
45
+ y1 = max(0, y1 - self.padding)
46
+ x2 = min(w, x2 + self.padding)
47
+ y2 = min(h, y2 + self.padding)
48
+
49
+ roi_w = x2 - x1
50
+ roi_h = y2 - y1
51
+ if roi_w < self.min_size[0] or roi_h < self.min_size[1]:
52
+ continue
53
+
54
+ crop = frame.crop((x1, y1, x2, y2))
55
+ rois.append(ROI(image=crop, bbox=(x1, y1, roi_w, roi_h)))
56
+
57
+ return rois
58
+
59
+ def _merge_boxes(self, boxes: list[tuple]) -> list[tuple]:
60
+ """Merge overlapping or adjacent bounding boxes."""
61
+ if not boxes:
62
+ return []
63
+
64
+ # Sort by x1
65
+ boxes = sorted(boxes, key=lambda b: b[0])
66
+ merged = [list(boxes[0])]
67
+
68
+ for x1, y1, x2, y2 in boxes[1:]:
69
+ last = merged[-1]
70
+ # Check if boxes overlap or are within padding distance
71
+ if (x1 <= last[2] + self.padding and
72
+ y1 <= last[3] + self.padding and
73
+ y2 >= last[1] - self.padding):
74
+ # Merge
75
+ last[0] = min(last[0], x1)
76
+ last[1] = min(last[1], y1)
77
+ last[2] = max(last[2], x2)
78
+ last[3] = max(last[3], y2)
79
+ else:
80
+ merged.append([x1, y1, x2, y2])
81
+
82
+ # Sort by area (largest first)
83
+ merged.sort(key=lambda b: (b[2] - b[0]) * (b[3] - b[1]), reverse=True)
84
+ return [tuple(b) for b in merged]