npm - @geravant/sinain - Versions diffs - 1.0.19 → 1.2.0 - Mend

@geravant/sinain 1.0.19 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (80) hide show

package/README.md +10 -1
package/cli.js +176 -0
package/index.ts +4 -2
package/install.js +89 -14
package/launcher.js +622 -0
package/openclaw.plugin.json +4 -0
package/pack-prepare.js +48 -0
package/package.json +24 -5
package/sense_client/README.md +82 -0
package/sense_client/__init__.py +1 -0
package/sense_client/__main__.py +462 -0
package/sense_client/app_detector.py +54 -0
package/sense_client/app_detector_win.py +83 -0
package/sense_client/capture.py +215 -0
package/sense_client/capture_win.py +88 -0
package/sense_client/change_detector.py +86 -0
package/sense_client/config.py +64 -0
package/sense_client/gate.py +145 -0
package/sense_client/ocr.py +347 -0
package/sense_client/privacy.py +65 -0
package/sense_client/requirements.txt +13 -0
package/sense_client/roi_extractor.py +84 -0
package/sense_client/sender.py +173 -0
package/sense_client/tests/__init__.py +0 -0
package/sense_client/tests/test_stream1_optimizations.py +234 -0
package/setup-overlay.js +82 -0
package/sinain-agent/.env.example +17 -0
package/sinain-agent/CLAUDE.md +87 -0
package/sinain-agent/mcp-config.json +12 -0
package/sinain-agent/run.sh +248 -0
package/sinain-core/.env.example +93 -0
package/sinain-core/package-lock.json +552 -0
package/sinain-core/package.json +21 -0
package/sinain-core/src/agent/analyzer.ts +366 -0
package/sinain-core/src/agent/context-window.ts +172 -0
package/sinain-core/src/agent/loop.ts +404 -0
package/sinain-core/src/agent/situation-writer.ts +187 -0
package/sinain-core/src/agent/traits.ts +520 -0
package/sinain-core/src/audio/capture-spawner-macos.ts +44 -0
package/sinain-core/src/audio/capture-spawner-win.ts +37 -0
package/sinain-core/src/audio/capture-spawner.ts +14 -0
package/sinain-core/src/audio/pipeline.ts +335 -0
package/sinain-core/src/audio/transcription-local.ts +141 -0
package/sinain-core/src/audio/transcription.ts +278 -0
package/sinain-core/src/buffers/feed-buffer.ts +71 -0
package/sinain-core/src/buffers/sense-buffer.ts +425 -0
package/sinain-core/src/config.ts +245 -0
package/sinain-core/src/escalation/escalation-slot.ts +136 -0
package/sinain-core/src/escalation/escalator.ts +828 -0
package/sinain-core/src/escalation/message-builder.ts +370 -0
package/sinain-core/src/escalation/openclaw-ws.ts +726 -0
package/sinain-core/src/escalation/scorer.ts +166 -0
package/sinain-core/src/index.ts +537 -0
package/sinain-core/src/learning/feedback-store.ts +253 -0
package/sinain-core/src/learning/signal-collector.ts +218 -0
package/sinain-core/src/log.ts +24 -0
package/sinain-core/src/overlay/commands.ts +126 -0
package/sinain-core/src/overlay/ws-handler.ts +267 -0
package/sinain-core/src/privacy/index.ts +18 -0
package/sinain-core/src/privacy/presets.ts +40 -0
package/sinain-core/src/privacy/redact.ts +92 -0
package/sinain-core/src/profiler.ts +181 -0
package/sinain-core/src/recorder.ts +186 -0
package/sinain-core/src/server.ts +456 -0
package/sinain-core/src/trace/trace-store.ts +73 -0
package/sinain-core/src/trace/tracer.ts +94 -0
package/sinain-core/src/types.ts +427 -0
package/sinain-core/src/util/dedup.ts +48 -0
package/sinain-core/src/util/task-store.ts +84 -0
package/sinain-core/tsconfig.json +18 -0
package/sinain-knowledge/curation/engine.ts +137 -24
package/sinain-knowledge/data/git-store.ts +26 -0
package/sinain-knowledge/data/store.ts +117 -0
package/sinain-mcp-server/index.ts +417 -0
package/sinain-mcp-server/package.json +19 -0
package/sinain-mcp-server/tsconfig.json +15 -0
package/sinain-memory/graph_query.py +185 -0
package/sinain-memory/knowledge_integrator.py +450 -0
package/sinain-memory/memory-config.json +3 -1
package/sinain-memory/session_distiller.py +162 -0

package/sense_client/ocr.py ADDED Viewed

@@ -0,0 +1,347 @@
+"""OCR backends for UI text extraction: macOS Vision, Windows.Media.Ocr, and Tesseract."""
+from __future__ import annotations
+import io
+import re
+import sys
+from dataclasses import dataclass
+from PIL import Image
+try:
+    import pytesseract
+except ImportError:
+    pytesseract = None
+@dataclass
+class OCRResult:
+    text: str
+    confidence: float
+    word_count: int
+class LocalOCR:
+    """Tesseract OCR wrapper for UI text extraction."""
+    def __init__(self, lang: str = "eng", psm: int = 11,
+                 min_confidence: int = 30, enabled: bool = True):
+        self.lang = lang
+        self.psm = psm
+        self.min_confidence = min_confidence
+        self.enabled = enabled
+    def extract(self, image: Image.Image) -> OCRResult:
+        """Returns extracted text with confidence."""
+        if not self.enabled or pytesseract is None:
+            return OCRResult(text="", confidence=0, word_count=0)
+        try:
+            data = pytesseract.image_to_data(
+                image,
+                lang=self.lang,
+                config=f"--psm {self.psm}",
+                output_type=pytesseract.Output.DICT,
+            )
+        except Exception as e:
+            print(f"[ocr] error: {e}", flush=True)
+            return OCRResult(text="", confidence=0, word_count=0)
+        words = []
+        confidences = []
+        for i, conf in enumerate(data["conf"]):
+            try:
+                c = int(conf)
+            except (ValueError, TypeError):
+                continue
+            if c >= self.min_confidence:
+                word = data["text"][i].strip()
+                if word:
+                    words.append(word)
+                    confidences.append(c)
+        text = " ".join(words)
+        text = self._clean(text)
+        avg_conf = sum(confidences) / len(confidences) if confidences else 0
+        return OCRResult(
+            text=text,
+            confidence=avg_conf,
+            word_count=len(words),
+        )
+    @staticmethod
+    def _clean(text: str) -> str:
+        """Strip control chars, collapse whitespace, remove noise lines."""
+        text = re.sub(r"[\x00-\x08\x0b\x0c\x0e-\x1f]", "", text)
+        text = re.sub(r"[ \t]+", " ", text)
+        lines = text.split("\n")
+        cleaned = []
+        for line in lines:
+            line = line.strip()
+            if line and re.search(r"[a-zA-Z0-9]", line):
+                cleaned.append(line)
+        return "\n".join(cleaned)
+class VisionOCR:
+    """macOS Vision framework OCR using pyobjc."""
+    def __init__(self, languages: list[str] | None = None,
+                 min_confidence: float = 0.5, enabled: bool = True):
+        self.languages = languages or ["en", "ru"]
+        self.min_confidence = min_confidence
+        self.enabled = enabled
+        self._available = False
+        if not enabled:
+            return
+        try:
+            import objc  # noqa: F401
+            import Quartz  # noqa: F401
+            from Foundation import NSURL, NSData  # noqa: F401
+            objc.loadBundle('Vision', bundle_path='/System/Library/Frameworks/Vision.framework',
+                            module_globals=globals())
+            self._available = True
+        except Exception as e:
+            print(f"[ocr] Vision framework unavailable: {e}", flush=True)
+    def extract(self, image: Image.Image) -> OCRResult:
+        """Returns extracted text using macOS Vision framework."""
+        if not self.enabled or not self._available:
+            return OCRResult(text="", confidence=0, word_count=0)
+        try:
+            return self._do_extract(image)
+        except Exception as e:
+            print(f"[ocr] Vision error: {e}", flush=True)
+            return OCRResult(text="", confidence=0, word_count=0)
+    def _do_extract(self, image: Image.Image) -> OCRResult:
+        import objc
+        import Vision
+        from Foundation import NSData
+        import Quartz
+        # Convert PIL Image to CGImage via PNG bytes
+        buf = io.BytesIO()
+        image.save(buf, format="PNG")
+        png_data = buf.getvalue()
+        ns_data = NSData.dataWithBytes_length_(png_data, len(png_data))
+        ci_image = Quartz.CIImage.imageWithData_(ns_data)
+        context = Quartz.CIContext.context()
+        cg_image = context.createCGImage_fromRect_(ci_image, ci_image.extent())
+        if cg_image is None:
+            return OCRResult(text="", confidence=0, word_count=0)
+        # Create and configure request
+        request = Vision.VNRecognizeTextRequest.alloc().init()
+        request.setRecognitionLevel_(Vision.VNRequestTextRecognitionLevelAccurate)
+        request.setRecognitionLanguages_(self.languages)
+        request.setUsesLanguageCorrection_(True)
+        # Execute
+        handler = Vision.VNImageRequestHandler.alloc().initWithCGImage_options_(cg_image, None)
+        success = handler.performRequests_error_([request], objc.nil)
+        if not success[0]:
+            return OCRResult(text="", confidence=0, word_count=0)
+        results = request.results()
+        if not results:
+            return OCRResult(text="", confidence=0, word_count=0)
+        lines = []
+        confidences = []
+        word_count = 0
+        for observation in results:
+            candidate = observation.topCandidates_(1)
+            if not candidate:
+                continue
+            text = candidate[0].string()
+            conf = candidate[0].confidence()
+            if conf < self.min_confidence:
+                continue
+            if text and text.strip():
+                lines.append(text.strip())
+                confidences.append(conf)
+                word_count += len(text.split())
+        text = "\n".join(lines)
+        text = self._clean(text)
+        avg_conf = (sum(confidences) / len(confidences) * 100) if confidences else 0
+        return OCRResult(
+            text=text,
+            confidence=avg_conf,
+            word_count=word_count,
+        )
+    @staticmethod
+    def _clean(text: str) -> str:
+        """Collapse whitespace, remove noise lines."""
+        text = re.sub(r"[\x00-\x08\x0b\x0c\x0e-\x1f]", "", text)
+        lines = text.split("\n")
+        cleaned = []
+        for line in lines:
+            line = re.sub(r"[ \t]+", " ", line).strip()
+            if line and re.search(r"[a-zA-Z0-9а-яА-ЯёЁ]", line):
+                cleaned.append(line)
+        return "\n".join(cleaned)
+class WinOCR:
+    """Windows.Media.Ocr backend via winrt-python (Windows 10+)."""
+    def __init__(self, language: str = "en", min_confidence: float = 0.5,
+                 enabled: bool = True):
+        self.language = language
+        self.min_confidence = min_confidence
+        self.enabled = enabled
+        self._available = False
+        self._engine = None
+        if not enabled:
+            return
+        try:
+            from winrt.windows.media.ocr import OcrEngine
+            from winrt.windows.globalization import Language
+            lang = Language(language)
+            if OcrEngine.is_language_supported(lang):
+                self._engine = OcrEngine.try_create_from_language(lang)
+                self._available = self._engine is not None
+            else:
+                print(f"[ocr] WinOCR: language '{language}' not supported", flush=True)
+        except Exception as e:
+            print(f"[ocr] WinOCR unavailable: {e}", flush=True)
+    def extract(self, image: Image.Image) -> OCRResult:
+        """Returns extracted text using Windows.Media.Ocr."""
+        if not self.enabled or not self._available:
+            return OCRResult(text="", confidence=0, word_count=0)
+        try:
+            return self._do_extract(image)
+        except Exception as e:
+            print(f"[ocr] WinOCR error: {e}", flush=True)
+            return OCRResult(text="", confidence=0, word_count=0)
+    def _do_extract(self, image: Image.Image) -> OCRResult:
+        import asyncio
+        from winrt.windows.graphics.imaging import (
+            SoftwareBitmap, BitmapPixelFormat, BitmapAlphaMode,
+        )
+        from winrt.windows.storage.streams import (
+            InMemoryRandomAccessStream, DataWriter,
+        )
+        # Convert PIL to BMP bytes and load as SoftwareBitmap
+        buf = io.BytesIO()
+        image.convert("RGBA").save(buf, format="BMP")
+        bmp_bytes = buf.getvalue()
+        async def _run():
+            stream = InMemoryRandomAccessStream()
+            writer = DataWriter(stream)
+            writer.write_bytes(bmp_bytes)
+            await writer.store_async()
+            stream.seek(0)
+            from winrt.windows.graphics.imaging import BitmapDecoder
+            decoder = await BitmapDecoder.create_async(stream)
+            bitmap = await decoder.get_software_bitmap_async()
+            # Convert to supported pixel format if needed
+            if bitmap.bitmap_pixel_format != BitmapPixelFormat.BGRA8:
+                bitmap = SoftwareBitmap.convert(bitmap, BitmapPixelFormat.BGRA8,
+                                                 BitmapAlphaMode.PREMULTIPLIED)
+            result = await self._engine.recognize_async(bitmap)
+            return result
+        loop = asyncio.new_event_loop()
+        try:
+            result = loop.run_until_complete(_run())
+        finally:
+            loop.close()
+        lines = []
+        word_count = 0
+        for line in result.lines:
+            text = line.text.strip()
+            if text:
+                lines.append(text)
+                word_count += len(text.split())
+        text = "\n".join(lines)
+        text = self._clean(text)
+        return OCRResult(text=text, confidence=80.0, word_count=word_count)
+    @staticmethod
+    def _clean(text: str) -> str:
+        text = re.sub(r"[\x00-\x08\x0b\x0c\x0e-\x1f]", "", text)
+        lines = text.split("\n")
+        cleaned = []
+        for line in lines:
+            line = re.sub(r"[ \t]+", " ", line).strip()
+            if line and re.search(r"[a-zA-Z0-9а-яА-ЯёЁ]", line):
+                cleaned.append(line)
+        return "\n".join(cleaned)
+def create_ocr(config: dict):
+    """Factory: create the best available OCR backend based on config + platform.
+    config["ocr"] keys:
+        backend: "auto" | "vision" | "tesseract" | "winocr"
+        languages: list[str]  (BCP-47 for Vision / WinOCR, e.g. ["en", "ru"])
+        lang: str             (Tesseract lang code, e.g. "eng")
+        minConfidence: int    (0-100 scale)
+        enabled: bool
+    """
+    ocr_cfg = config.get("ocr", {})
+    backend = ocr_cfg.get("backend", "auto")
+    enabled = ocr_cfg.get("enabled", True)
+    # macOS: try Vision framework
+    if sys.platform == "darwin" and backend in ("auto", "vision"):
+        vision = VisionOCR(
+            languages=ocr_cfg.get("languages", ["en", "ru"]),
+            min_confidence=ocr_cfg.get("minConfidence", 50) / 100.0,
+            enabled=enabled,
+        )
+        if vision._available:
+            print(f"[ocr] using Vision backend (languages={vision.languages})", flush=True)
+            return vision
+        if backend == "vision":
+            print("[ocr] Vision requested but unavailable, falling back to Tesseract", flush=True)
+    # Windows: try Windows.Media.Ocr
+    if sys.platform == "win32" and backend in ("auto", "winocr"):
+        languages = ocr_cfg.get("languages", ["en"])
+        winocr = WinOCR(
+            language=languages[0] if languages else "en",
+            min_confidence=ocr_cfg.get("minConfidence", 50) / 100.0,
+            enabled=enabled,
+        )
+        if winocr._available:
+            print(f"[ocr] using WinOCR backend (language={winocr.language})", flush=True)
+            return winocr
+        if backend == "winocr":
+            print("[ocr] WinOCR requested but unavailable, falling back to Tesseract", flush=True)
+    # Fallback to Tesseract (cross-platform)
+    print("[ocr] using Tesseract backend", flush=True)
+    return LocalOCR(
+        lang=ocr_cfg.get("lang", "eng"),
+        psm=ocr_cfg.get("psm", 11),
+        min_confidence=ocr_cfg.get("minConfidence", 50),
+        enabled=enabled,
+    )

package/sense_client/privacy.py ADDED Viewed

@@ -0,0 +1,65 @@
+"""Privacy filter — strips <private> tags and auto-redacts sensitive patterns from OCR text."""
+import re
+# Patterns that auto-redact without manual tagging
+_REDACT_PATTERNS: list[tuple[re.Pattern, str]] = [
+    # Credit card numbers (4 groups of 4 digits)
+    (re.compile(r"\b\d{4}[\s-]?\d{4}[\s-]?\d{4}[\s-]?\d{4}\b"), "[REDACTED:card]"),
+    # API keys / tokens (long hex or base64 strings)
+    (re.compile(r"\b(?:sk-|pk-|api[_-]?key[=:]\s*)[A-Za-z0-9_\-]{20,}\b"), "[REDACTED:apikey]"),
+    # Bearer tokens
+    (re.compile(r"Bearer\s+[A-Za-z0-9_\-\.]{20,}"), "[REDACTED:bearer]"),
+    # AWS secret keys
+    (re.compile(r"\b(?:AKIA|ASIA)[A-Z0-9]{16}\b"), "[REDACTED:awskey]"),
+    # Passwords in assignment context
+    (re.compile(r"(?:password|passwd|pwd)\s*[:=]\s*\S+", re.IGNORECASE), "[REDACTED:password]"),
+    # GitHub personal access tokens
+    (re.compile(r"\bghp_[A-Za-z0-9]{36}\b"), "[REDACTED:github_pat]"),
+    # GitHub server tokens
+    (re.compile(r"\bghs_[A-Za-z0-9]{36}\b"), "[REDACTED:github_srv]"),
+    # Slack tokens
+    (re.compile(r"\bxox[bpoa]-[0-9A-Za-z\-]+"), "[REDACTED:slack]"),
+    # Google OAuth tokens
+    (re.compile(r"\bya29\.[0-9A-Za-z\-_]+"), "[REDACTED:google_oauth]"),
+    # JWT tokens (three base64url segments)
+    (re.compile(r"\beyJ[A-Za-z0-9\-_]+\.[A-Za-z0-9\-_]+\.[A-Za-z0-9\-_]+"), "[REDACTED:jwt]"),
+    # Generic secrets / keys in assignment context
+    (re.compile(r"(?:secret|token|key)\s*[:=]\s*[A-Za-z0-9_\-\.]{10,}", re.IGNORECASE), "[REDACTED:secret]"),
+    # Email addresses
+    (re.compile(r"\b[A-Za-z0-9._%+\-]+@[A-Za-z0-9.\-]+\.[A-Za-z]{2,}\b"), "[REDACTED:email]"),
+    # US phone numbers
+    (re.compile(r"\+?1?\s?\(?\d{3}\)?[\s.\-]?\d{3}[\s.\-]?\d{4}\b"), "[REDACTED:phone]"),
+    # SSN (XXX-XX-XXXX)
+    (re.compile(r"\b\d{3}-\d{2}-\d{4}\b"), "[REDACTED:ssn]"),
+    # CVV codes
+    (re.compile(r"\bCVV\s*[:=]?\s*\d{3,4}\b", re.IGNORECASE), "[REDACTED:cvv]"),
+    # PIN codes in assignment context
+    (re.compile(r"\bpin\s*[:=]\s*\d{4,8}\b", re.IGNORECASE), "[REDACTED:pin]"),
+    # Private key headers
+    (re.compile(r"-----BEGIN (?:RSA |EC |OPENSSH )?PRIVATE KEY-----"), "[REDACTED:privkey]"),
+    # MRN (medical record numbers)
+    (re.compile(r"\bMRN\s*[:=]?\s*\d{6,10}\b", re.IGNORECASE), "[REDACTED:mrn]"),
+]
+# Matches <private>...</private> blocks (including multiline)
+_PRIVATE_TAG = re.compile(r"<private>.*?</private>", re.DOTALL)
+def strip_private(text: str) -> str:
+    """Remove <private>...</private> blocks from text."""
+    return _PRIVATE_TAG.sub("", text).strip()
+def redact_sensitive(text: str) -> str:
+    """Auto-redact patterns that look like secrets or PII."""
+    for pattern, replacement in _REDACT_PATTERNS:
+        text = pattern.sub(replacement, text)
+    return text
+def apply_privacy(text: str) -> str:
+    """Full privacy pipeline: strip private tags, then auto-redact."""
+    text = strip_private(text)
+    text = redact_sensitive(text)
+    return text

package/sense_client/requirements.txt ADDED Viewed

@@ -0,0 +1,13 @@
+pillow>=10.0
+scikit-image>=0.22
+numpy>=1.24
+pytesseract>=0.3
+requests>=2.31
+mss>=9.0; sys_platform == "win32"
+psutil>=5.9; sys_platform == "win32"
+winrt-Windows.Media.Ocr>=2.0; sys_platform == "win32"
+winrt-Windows.Globalization>=2.0; sys_platform == "win32"
+winrt-Windows.Graphics.Imaging>=2.0; sys_platform == "win32"
+winrt-Windows.Storage.Streams>=2.0; sys_platform == "win32"
+winrt-Windows.Foundation>=2.0; sys_platform == "win32"
+winrt-Windows.Foundation.Collections>=2.0; sys_platform == "win32"

package/sense_client/roi_extractor.py ADDED Viewed

@@ -0,0 +1,84 @@
+"""Region of Interest extraction from changed regions."""
+from dataclasses import dataclass
+import numpy as np
+from PIL import Image
+@dataclass
+class ROI:
+    image: Image.Image
+    bbox: tuple[int, int, int, int]  # (x, y, w, h)
+class ROIExtractor:
+    """Extracts and crops changed regions from a frame."""
+    def __init__(self, padding: int = 20, min_size: tuple[int, int] = (64, 64),
+                 max_rois: int = 3):
+        self.padding = padding
+        self.min_size = min_size
+        self.max_rois = max_rois
+    def extract(self, frame: Image.Image, contours: list) -> list[ROI]:
+        """Returns list of ROI crops from frame based on contours."""
+        if not contours:
+            return []
+        # Compute bounding boxes for each contour
+        boxes = []
+        for coords in contours:
+            arr = np.array(coords)
+            min_y, min_x = arr.min(axis=0)
+            max_y, max_x = arr.max(axis=0)
+            boxes.append((int(min_x), int(min_y), int(max_x), int(max_y)))
+        # Merge overlapping/adjacent boxes
+        merged = self._merge_boxes(boxes)
+        # Add padding, clamp, crop
+        rois = []
+        w, h = frame.size
+        for x1, y1, x2, y2 in merged[:self.max_rois]:
+            x1 = max(0, x1 - self.padding)
+            y1 = max(0, y1 - self.padding)
+            x2 = min(w, x2 + self.padding)
+            y2 = min(h, y2 + self.padding)
+            roi_w = x2 - x1
+            roi_h = y2 - y1
+            if roi_w < self.min_size[0] or roi_h < self.min_size[1]:
+                continue
+            crop = frame.crop((x1, y1, x2, y2))
+            rois.append(ROI(image=crop, bbox=(x1, y1, roi_w, roi_h)))
+        return rois
+    def _merge_boxes(self, boxes: list[tuple]) -> list[tuple]:
+        """Merge overlapping or adjacent bounding boxes."""
+        if not boxes:
+            return []
+        # Sort by x1
+        boxes = sorted(boxes, key=lambda b: b[0])
+        merged = [list(boxes[0])]
+        for x1, y1, x2, y2 in boxes[1:]:
+            last = merged[-1]
+            # Check if boxes overlap or are within padding distance
+            if (x1 <= last[2] + self.padding and
+                    y1 <= last[3] + self.padding and
+                    y2 >= last[1] - self.padding):
+                # Merge
+                last[0] = min(last[0], x1)
+                last[1] = min(last[1], y1)
+                last[2] = max(last[2], x2)
+                last[3] = max(last[3], y2)
+            else:
+                merged.append([x1, y1, x2, y2])
+        # Sort by area (largest first)
+        merged.sort(key=lambda b: (b[2] - b[0]) * (b[3] - b[1]), reverse=True)
+        return [tuple(b) for b in merged]

package/sense_client/sender.py ADDED Viewed

@@ -0,0 +1,173 @@
+"""POST sense events to the relay server."""
+import base64
+import io
+import time
+import requests
+from PIL import Image
+from .gate import SenseEvent
+# Retry config for /sense POST
+_MAX_RETRIES = 3
+_RETRY_BASE_DELAY_S = 1.0  # 1s, 2s, 4s (exponential)
+class SenseSender:
+    """POSTs sense events to the relay server with retry and backoff."""
+    def __init__(self, url: str = "http://localhost:9500",
+                 max_image_kb: int = 500, send_thumbnails: bool = True):
+        self.url = url.rstrip("/")
+        self.max_image_kb = max_image_kb
+        self.send_thumbnails = send_thumbnails
+        self._latencies: list[float] = []
+        self._last_stats_ts: float = time.time()
+        self._consecutive_failures: int = 0
+    def send(self, event: SenseEvent) -> bool:
+        """POST /sense with JSON payload. Returns True on success."""
+        payload = {
+            "type": event.type,
+            "ts": event.ts,
+            "ocr": event.ocr,
+            "meta": {
+                "ssim": event.meta.ssim,
+                "app": event.meta.app,
+                "windowTitle": event.meta.window_title,
+                "screen": event.meta.screen,
+            },
+        }
+        if event.roi:
+            payload["roi"] = event.roi
+        if event.diff:
+            payload["diff"] = event.diff
+        if event.observation and event.observation.title:
+            payload["observation"] = {
+                "title": event.observation.title,
+                "subtitle": event.observation.subtitle,
+                "facts": event.observation.facts,
+                "narrative": event.observation.narrative,
+                "concepts": event.observation.concepts,
+            }
+        for attempt in range(_MAX_RETRIES):
+            try:
+                start = time.time()
+                resp = requests.post(
+                    f"{self.url}/sense",
+                    json=payload,
+                    timeout=5,
+                )
+                elapsed_ms = (time.time() - start) * 1000
+                self._latencies.append(elapsed_ms)
+                self._maybe_log_stats()
+                if resp.status_code == 200:
+                    if self._consecutive_failures > 0:
+                        print(f"[sender] reconnected after {self._consecutive_failures} failure(s)", flush=True)
+                    self._consecutive_failures = 0
+                    return True
+                # Non-200 but not an exception — log and retry
+                print(f"[sender] HTTP {resp.status_code} on attempt {attempt + 1}/{_MAX_RETRIES}", flush=True)
+            except requests.exceptions.ConnectionError as e:
+                print(f"[sender] connection error (attempt {attempt + 1}/{_MAX_RETRIES}): {e}", flush=True)
+            except requests.exceptions.Timeout:
+                print(f"[sender] timeout (attempt {attempt + 1}/{_MAX_RETRIES})", flush=True)
+            except Exception as e:
+                print(f"[sender] unexpected error (attempt {attempt + 1}/{_MAX_RETRIES}): {e}", flush=True)
+            # Don't sleep after the last attempt
+            if attempt < _MAX_RETRIES - 1:
+                delay = _RETRY_BASE_DELAY_S * (2 ** attempt)
+                print(f"[sender] retrying in {delay:.1f}s...", flush=True)
+                time.sleep(delay)
+        self._consecutive_failures += 1
+        if self._consecutive_failures == 1 or self._consecutive_failures % 10 == 0:
+            print(f"[sender] all {_MAX_RETRIES} attempts failed (consecutive failures: {self._consecutive_failures})", flush=True)
+        return False
+    def _maybe_log_stats(self):
+        """Log P50/P95 send latencies every 60s."""
+        now = time.time()
+        if now - self._last_stats_ts < 60:
+            return
+        if not self._latencies:
+            return
+        sorted_lat = sorted(self._latencies)
+        p50 = sorted_lat[len(sorted_lat) // 2]
+        p95 = sorted_lat[int(len(sorted_lat) * 0.95)]
+        print(f"[sender] relay latency: p50={p50:.0f}ms p95={p95:.0f}ms (n={len(sorted_lat)})", flush=True)
+        self._latencies.clear()
+        self._last_stats_ts = now
+def encode_image(img: Image.Image, max_kb: int, max_px: int = 0) -> str:
+    """Encode PIL Image to base64 JPEG, reducing quality until under max_kb."""
+    if max_px:
+        ratio = max_px / max(img.size)
+        if ratio < 1:
+            img = img.resize(
+                (int(img.width * ratio), int(img.height * ratio)),
+                Image.LANCZOS,
+            )
+    if img.mode == "RGBA":
+        img = img.convert("RGB")
+    # Try high quality first — often fits
+    max_bytes = max_kb * 1024
+    buf = io.BytesIO()
+    img.save(buf, format="JPEG", quality=85)
+    if buf.tell() <= max_bytes:
+        return base64.b64encode(buf.getvalue()).decode()
+    # Binary search for the highest quality that fits
+    lo, hi = 20, 80
+    best_buf = None
+    while lo <= hi:
+        mid = (lo + hi) // 2
+        buf = io.BytesIO()
+        img.save(buf, format="JPEG", quality=mid)
+        if buf.tell() <= max_bytes:
+            best_buf = buf
+            lo = mid + 1
+        else:
+            hi = mid - 1
+    if best_buf is not None:
+        return base64.b64encode(best_buf.getvalue()).decode()
+    # Last resort: return at lowest quality
+    buf = io.BytesIO()
+    img.save(buf, format="JPEG", quality=20)
+    return base64.b64encode(buf.getvalue()).decode()
+def package_full_frame(frame: Image.Image, max_px: int = 384) -> dict:
+    """Package a full frame as a small thumbnail for context events."""
+    return {
+        "data": encode_image(frame, max_kb=200, max_px=max_px),
+        "bbox": [0, 0, frame.width, frame.height],
+        "thumb": True,
+    }
+def package_roi(roi, thumb: bool = True) -> dict:
+    """Package an ROI as a small thumbnail for text/visual events."""
+    return {
+        "data": encode_image(roi.image, max_kb=60, max_px=384),
+        "bbox": list(roi.bbox),
+        "thumb": True,
+    }
+def package_diff(diff_image: Image.Image) -> dict:
+    """Package a diff image."""
+    return {
+        "data": encode_image(diff_image, max_kb=200),
+    }

package/sense_client/tests/__init__.py ADDED Viewed

File without changes