PyPI - openrunner-sdk - Versions diffs - 2.7.0__tar.gz → 2.8.0__tar.gz - Mend

openrunner-sdk 2.7.0tar.gz → 2.8.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (118) hide show

{openrunner_sdk-2.7.0 → openrunner_sdk-2.8.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: openrunner-sdk
-Version: 2.7.0
+Version: 2.8.0
 Summary: OpenRunner SDK - W&B-compatible ML experiment tracking client
 Project-URL: Homepage, https://github.com/jqueguiner/openrunner
 Project-URL: Repository, https://github.com/jqueguiner/openrunner
@@ -33,6 +33,7 @@ Requires-Dist: catboost>=1.2; extra == 'catboost'
 Provides-Extra: dev
 Requires-Dist: numpy>=1.24; extra == 'dev'
 Requires-Dist: pytest-asyncio>=0.23; extra == 'dev'
+Requires-Dist: pytest-cov>=5.0; extra == 'dev'
 Requires-Dist: pytest>=8.0; extra == 'dev'
 Provides-Extra: diffusers
 Requires-Dist: diffusers>=0.25; extra == 'diffusers'

{openrunner_sdk-2.7.0 → openrunner_sdk-2.8.0}/openrunner/__init__.py RENAMED Viewed

@@ -120,7 +120,7 @@ launch.from_run = _launch_from_run  # type: ignore[attr-defined]
 # openrunner.trace.patch_openai() syntax
 trace.patch_openai = _patch_openai  # type: ignore[attr-defined]
-__version__ = "2.7.0"
+__version__ = "2.8.0"
 logger = logging.getLogger("openrunner")

{openrunner_sdk-2.7.0 → openrunner_sdk-2.8.0}/openrunner/cli.py RENAMED Viewed

@@ -2391,13 +2391,18 @@ def session_setup() -> None:
 @click.option("--hours", "-h", default=24.0, help="Look back N hours (default: 24)")
 @click.option("--project", "-p", default=None, help="Target project (default: from config)")
 @click.option("--dry-run", is_flag=True, help="Show what would be synced without uploading")
-def session_sync(directory: str | None, hours: float, project: str | None, dry_run: bool) -> None:
+@click.option("--redact/--no-redact", default=None, help="Force redaction on/off (default: use config)")
+@click.option("--redact-mode", type=click.Choice(["regex", "ner"]), default=None, help="Redaction mode")
+@click.option("--public", "visibility", flag_value="public", help="Make session public")
+@click.option("--private", "visibility", flag_value="private", default=True, help="Keep session private (default)")
+def session_sync(directory: str | None, hours: float, project: str | None, dry_run: bool, redact: bool | None, redact_mode: str | None, visibility: str) -> None:
     """Sync AI sessions to OpenRunner.
     If DIRECTORY is given, scan that path for .jsonl/.json session files.
     Otherwise, scan default locations (~/.claude, ~/.codex, ~/.qwen-code).
     On first run, prompts for API key and project selection.
+    Redaction strips API keys, tokens, emails, passwords before upload.
     """
     from pathlib import Path
     from openrunner.session import discover_all_sessions, discover_in_directory, sync_all, get_session_config, interactive_setup
@@ -2427,7 +2432,7 @@ def session_sync(directory: str | None, hours: float, project: str | None, dry_r
         if dry_run:
             return
-    synced = sync_all(since_hours=hours, project=project, directory=Path(directory) if directory else None)
+    synced = sync_all(since_hours=hours, project=project, directory=Path(directory) if directory else None, redact=redact, redact_mode=redact_mode, visibility=visibility)
     if synced:
         click.echo(f"Synced {len(synced)} session(s) to OpenRunner.")
         for run_id in synced:

{openrunner_sdk-2.7.0 → openrunner_sdk-2.8.0}/openrunner/install_commands.py RENAMED Viewed

@@ -26,68 +26,90 @@ from pathlib import Path
 SYNC_SESSION_CMD = """---
 name: {prefix}sync-session
 description: Sync current coding session to OpenRunner as a research log
+argument-hint: "[org/project]"
+allowed-tools:
+  - Bash
+  - Read
+  - AskUserQuestion
 ---
-Sync the current session to OpenRunner. Run:
+Sync the current Claude Code session to OpenRunner.
+## Process
+### Step 1: Check configuration
+Run this to check if session config exists:
 ```bash
 python3 -c "
-import sys, os
+import json
 from pathlib import Path
+config_file = Path.home() / '.openrunner' / 'session_config.json'
+if config_file.exists():
+    config = json.loads(config_file.read_text())
+    if config.get('api_key') and config.get('project'):
+        print('CONFIGURED')
+        print(f'project={{config[\"project\"]}}')
+    else:
+        print('INCOMPLETE')
+else:
+    print('NOT_CONFIGURED')
+"
+```
-# Ensure openrunner is importable
-for p in [os.path.expanduser('~/.local/lib/python3.12/site-packages'),
-          os.path.expanduser('~/.local/lib/python3.11/site-packages'),
-          os.path.expanduser('~/.local/lib/python3.10/site-packages')]:
-    if os.path.isdir(p):
-        sys.path.insert(0, p)
+### Step 2a: If NOT_CONFIGURED or INCOMPLETE
-from openrunner.session import discover_in_directory, parse_claude_session, parse_generic_session, sync_session_to_openrunner
-# Find session dir for current tool
-session_dirs = [
-    Path.home() / '.claude' / 'projects',
-    Path.home() / '.codex' / 'sessions',
-    Path.home() / '.qwen-code' / 'sessions',
-    Path.home() / '.opencode' / 'sessions',
-]
-# Find most recent session file across all sources
-all_sessions = []
-for d in session_dirs:
-    if d.exists():
-        for f in d.rglob('*.jsonl'):
-            if f.stat().st_size > 100 and '.meta.' not in f.name:
-                all_sessions.append(f)
-        for f in d.rglob('*.json'):
-            if f.stat().st_size > 100 and '.meta.' not in f.name:
-                all_sessions.append(f)
-if not all_sessions:
-    print('No sessions found.')
-    sys.exit(1)
+Ask the user for their OpenRunner API key and base URL. Then list projects and ask them to pick one. Save to ~/.openrunner/session_config.json.
-latest = max(all_sessions, key=lambda f: f.stat().st_mtime)
-print(f'Syncing: {{latest.name}} ({{latest.stat().st_size // 1024}} KB)')
+### Step 2b: If CONFIGURED (or after setup)
-if latest.suffix == '.jsonl' and '.claude' in str(latest):
-    parsed = parse_claude_session(latest)
-else:
-    source = 'codex' if '.codex' in str(latest) else 'qwen' if '.qwen' in str(latest) else 'opencode'
-    parsed = parse_generic_session(latest, source)
+Sync the current session. Use $ARGUMENTS as project override if provided:
+```bash
+python3 -c "
+import sys, os
+from pathlib import Path
+for p in [os.path.expanduser(f'~/.local/lib/python3.{{v}}/site-packages') for v in (12,11,10)]:
+    if os.path.isdir(p): sys.path.insert(0, p)
+from openrunner.session import parse_claude_session, sync_session_to_openrunner, get_session_config
+cwd = Path.cwd()
+cwd_key = '-' + str(cwd).replace('/', '-').lstrip('-')
+project_dir = Path.home() / '.claude' / 'projects' / cwd_key
+if not project_dir.exists():
+    for d in (Path.home() / '.claude' / 'projects').iterdir():
+        if d.is_dir() and cwd_key in d.name:
+            project_dir = d
+            break
+sessions = sorted(
+    [f for f in project_dir.rglob('*.jsonl') if f.stat().st_size > 100 and '.meta.' not in f.name],
+    key=lambda f: f.stat().st_mtime, reverse=True,
+) if project_dir.exists() else []
+if not sessions:
+    print('No session files found.')
+    sys.exit(1)
+latest = sessions[0]
+print(f'Syncing: {{latest.name}} ({{latest.stat().st_size // 1024}} KB)')
+parsed = parse_claude_session(latest)
 print(f'  Messages: {{parsed[\"message_count\"]}} ({{parsed[\"user_message_count\"]}} user)')
 print(f'  Tokens: {{parsed.get(\"total_tokens\", 0):,}}')
-project = os.environ.get('OPENRUNNER_SESSION_PROJECT', 'research-sessions')
-run_id = sync_session_to_openrunner(parsed, project=project)
+project_override = '$ARGUMENTS'.strip() or None
+run_id = sync_session_to_openrunner(parsed, project=project_override)
 if run_id:
-    base = os.environ.get('OPENRUNNER_BASE_URL', 'https://openrun.gladia.io')
+    config = get_session_config()
+    base = config.get('base_url', 'https://openrun.gladia.io')
     print(f'Synced -> {{base}}/runs/{{run_id}}')
 else:
-    print('Failed. Run: openrunner login')
+    print('Sync failed. Run: openrunner session setup')
 "
 ```
+### Step 3: Report the run URL and stats to the user.
 """
 LOG_NOTE_CMD = """---

openrunner_sdk-2.8.0/openrunner/redact.py ADDED Viewed

@@ -0,0 +1,339 @@
+"""Session redaction — detect and mask PII/secrets before sync.
+Inspired by Dataiku's kiji-proxy (DeBERTa NER + synthetic replacement).
+Two modes:
+1. Regex-based (fast, no deps): API keys, tokens, passwords, IPs, emails, paths
+2. NER-based (accurate, needs transformers): full PII detection via DeBERTa
+Redaction can be configured at:
+- Client side: per-sync via `openrunner session sync --redact`
+- Organization level: org setting forces redaction for all members
+- User level: user setting in session_config.json
+"""
+from __future__ import annotations
+import hashlib
+import os
+import re
+from typing import Any
+# ---------------------------------------------------------------------------
+# Regex patterns for secrets and common PII
+# ---------------------------------------------------------------------------
+SECRET_PATTERNS: list[tuple[str, re.Pattern]] = [
+    # API keys / tokens (generic patterns)
+    ("API_KEY", re.compile(r"\b(sk-[a-zA-Z0-9\-_]{20,})\b")),  # OpenAI
+    ("API_KEY", re.compile(r"\b(or_[a-zA-Z0-9_\-]{20,})\b")),  # OpenRunner
+    ("API_KEY", re.compile(r"\b(ghp_[a-zA-Z0-9]{36,})\b")),  # GitHub PAT
+    ("API_KEY", re.compile(r"\b(gho_[a-zA-Z0-9]{36,})\b")),  # GitHub OAuth
+    ("API_KEY", re.compile(r"\b(github_pat_[a-zA-Z0-9_]{40,})\b")),  # GitHub fine-grained
+    ("API_KEY", re.compile(r"\b(pypi-[a-zA-Z0-9_\-]{50,})\b")),  # PyPI
+    ("API_KEY", re.compile(r"\b(npm_[a-zA-Z0-9]{30,})\b")),  # npm
+    ("API_KEY", re.compile(r"\b(xox[bsapr]-[a-zA-Z0-9\-]{10,})\b")),  # Slack
+    ("API_KEY", re.compile(r"\b(AKIA[0-9A-Z]{16})\b")),  # AWS access key
+    ("SECRET", re.compile(r"\b([a-zA-Z0-9/+=]{40})\b(?=.*(?:secret|SECRET))")),  # AWS secret
+    ("API_KEY", re.compile(r"\b(AIza[0-9A-Za-z_\-]{35})\b")),  # Google API
+    ("TOKEN", re.compile(r"\b(eyJ[a-zA-Z0-9_\-]{20,}\.[a-zA-Z0-9_\-]{20,}\.[a-zA-Z0-9_\-]{20,})\b")),  # JWT
+    # Passwords in config/env
+    ("PASSWORD", re.compile(r"(?i)(?:password|passwd|pwd)\s*[=:]\s*['\"]?([^\s'\"]{6,})['\"]?")),
+    # Connection strings
+    ("CONNECTION_STRING", re.compile(r"(?i)((?:postgres|mysql|mongodb|redis)://[^\s'\"]+)")),
+    # Private keys
+    ("PRIVATE_KEY", re.compile(r"(-----BEGIN (?:RSA |EC |DSA )?PRIVATE KEY-----[^-]+-----END (?:RSA |EC |DSA )?PRIVATE KEY-----)", re.DOTALL)),
+    # Email addresses
+    ("EMAIL", re.compile(r"\b([a-zA-Z0-9._%+\-]+@[a-zA-Z0-9.\-]+\.[a-zA-Z]{2,})\b")),
+    # IP addresses (non-localhost, non-docker)
+    ("IP_ADDRESS", re.compile(r"\b((?:(?:25[0-5]|2[0-4]\d|1\d{2}|[1-9]?\d)\.){3}(?:25[0-5]|2[0-4]\d|1\d{2}|[1-9]?\d))\b")),
+    # Home directory paths (contain username)
+    ("PATH", re.compile(r"(/(?:home|Users)/[a-zA-Z0-9._\-]+)")),
+]
+# IPs to NOT redact (internal/docker/localhost)
+SAFE_IPS = {"127.0.0.1", "0.0.0.0", "localhost", "172.17.0.1", "172.18.0.1"}
+SAFE_IP_PREFIXES = ("10.", "172.16.", "172.17.", "172.18.", "192.168.")
+# Emails to NOT redact
+SAFE_EMAILS = {"noreply@anthropic.com", "noreply@github.com"}
+def _is_safe_ip(ip: str) -> bool:
+    return ip in SAFE_IPS or any(ip.startswith(p) for p in SAFE_IP_PREFIXES)
+def _generate_replacement(label: str, original: str) -> str:
+    """Generate a deterministic replacement (same input -> same output)."""
+    # Use hash to generate consistent replacement
+    h = hashlib.sha256(original.encode()).hexdigest()[:8]
+    if label == "API_KEY":
+        return f"REDACTED_KEY_{h}"
+    elif label == "SECRET":
+        return f"REDACTED_SECRET_{h}"
+    elif label == "TOKEN":
+        return f"REDACTED_TOKEN_{h}"
+    elif label == "PASSWORD":
+        return f"REDACTED_PASS_{h}"
+    elif label == "CONNECTION_STRING":
+        # Keep protocol, redact rest
+        proto = original.split("://")[0] if "://" in original else "db"
+        return f"{proto}://REDACTED_{h}"
+    elif label == "PRIVATE_KEY":
+        return "-----BEGIN PRIVATE KEY-----\nREDACTED\n-----END PRIVATE KEY-----"
+    elif label == "EMAIL":
+        domain = original.split("@")[1] if "@" in original else "example.com"
+        return f"user_{h[:4]}@{domain}"
+    elif label == "IP_ADDRESS":
+        return f"x.x.x.{h[:2]}"
+    elif label == "PATH":
+        return f"/home/user_{h[:4]}"
+    else:
+        return f"[REDACTED:{label}]"
+# ---------------------------------------------------------------------------
+# Core redaction engine
+# ---------------------------------------------------------------------------
+class RedactionResult:
+    """Result of redacting text."""
+    def __init__(self, text: str, entities: list[dict], mapping: dict[str, str]):
+        self.text = text
+        self.entities = entities  # [{label, start, end, original, replacement}]
+        self.mapping = mapping  # original -> replacement (for restoration)
+    @property
+    def redacted_count(self) -> int:
+        return len(self.entities)
+def redact_text(text: str, mode: str = "regex") -> RedactionResult:
+    """Redact sensitive content from text.
+    Args:
+        text: Input text to redact.
+        mode: "regex" (fast, pattern-based) or "ner" (ML-based, needs transformers).
+    Returns:
+        RedactionResult with redacted text and metadata.
+    """
+    if mode == "ner":
+        return _redact_ner(text)
+    return _redact_regex(text)
+def _redact_regex(text: str) -> RedactionResult:
+    """Fast regex-based redaction for secrets and common PII."""
+    entities = []
+    for label, pattern in SECRET_PATTERNS:
+        for match in pattern.finditer(text):
+            original = match.group(1) if match.lastindex else match.group(0)
+            start = match.start(1) if match.lastindex else match.start(0)
+            end = match.end(1) if match.lastindex else match.end(0)
+            # Skip safe values
+            if label == "IP_ADDRESS" and _is_safe_ip(original):
+                continue
+            if label == "EMAIL" and original.lower() in SAFE_EMAILS:
+                continue
+            # Skip very short matches (likely false positives)
+            if len(original) < 6:
+                continue
+            entities.append({
+                "label": label,
+                "start": start,
+                "end": end,
+                "original": original,
+                "replacement": _generate_replacement(label, original),
+            })
+    # Deduplicate overlapping entities (keep longest)
+    entities.sort(key=lambda e: (e["start"], -(e["end"] - e["start"])))
+    deduped = []
+    last_end = -1
+    for e in entities:
+        if e["start"] >= last_end:
+            deduped.append(e)
+            last_end = e["end"]
+    # Apply replacements (end-to-start to preserve offsets)
+    result_text = text
+    mapping = {}
+    for e in reversed(deduped):
+        result_text = result_text[:e["start"]] + e["replacement"] + result_text[e["end"]:]
+        mapping[e["original"]] = e["replacement"]
+    return RedactionResult(result_text, deduped, mapping)
+def _redact_ner(text: str) -> RedactionResult:
+    """NER-based redaction using DeBERTa model (DataikuNLP/kiji-pii-model).
+    Falls back to regex if transformers not installed.
+    """
+    try:
+        from transformers import pipeline
+    except ImportError:
+        return _redact_regex(text)
+    # Load model (cached after first call)
+    global _ner_pipeline
+    if "_ner_pipeline" not in globals() or _ner_pipeline is None:
+        try:
+            _ner_pipeline = pipeline(
+                "token-classification",
+                model="DataikuNLP/kiji-pii-model-onnx",
+                aggregation_strategy="simple",
+            )
+        except Exception:
+            # Fall back to regex if model load fails
+            return _redact_regex(text)
+    # Run NER
+    try:
+        ner_results = _ner_pipeline(text[:10000])  # Cap at 10k chars
+    except Exception:
+        return _redact_regex(text)
+    entities = []
+    for ent in ner_results:
+        if ent.get("score", 0) < 0.25:
+            continue
+        label = ent.get("entity_group", ent.get("entity", "UNKNOWN"))
+        original = ent.get("word", "")
+        entities.append({
+            "label": label,
+            "start": ent["start"],
+            "end": ent["end"],
+            "original": original,
+            "replacement": _generate_replacement(label, original),
+        })
+    # Also run regex for secrets (NER won't catch API keys)
+    regex_result = _redact_regex(text)
+    # Merge: add regex entities that don't overlap with NER
+    for re_ent in regex_result.entities:
+        overlaps = any(
+            re_ent["start"] < e["end"] and re_ent["end"] > e["start"]
+            for e in entities
+        )
+        if not overlaps:
+            entities.append(re_ent)
+    entities.sort(key=lambda e: e["start"])
+    # Apply replacements
+    result_text = text
+    mapping = {}
+    for e in reversed(entities):
+        result_text = result_text[:e["start"]] + e["replacement"] + result_text[e["end"]:]
+        mapping[e["original"]] = e["replacement"]
+    return RedactionResult(result_text, entities, mapping)
+# ---------------------------------------------------------------------------
+# Session-level redaction
+# ---------------------------------------------------------------------------
+def redact_session(parsed: dict[str, Any], mode: str = "regex") -> dict[str, Any]:
+    """Redact a parsed session dict before sync.
+    Redacts:
+    - All message content (user + assistant)
+    - File paths (replace usernames)
+    - First message / summary
+    Returns a new dict (doesn't mutate input).
+    """
+    import copy
+    result = copy.deepcopy(parsed)
+    total_redacted = 0
+    # Redact messages
+    for msg in result.get("messages", []):
+        if msg.get("content"):
+            r = redact_text(msg["content"], mode=mode)
+            msg["content"] = r.text
+            total_redacted += r.redacted_count
+    # Redact first_message
+    if result.get("first_message"):
+        r = redact_text(result["first_message"], mode=mode)
+        result["first_message"] = r.text
+        total_redacted += r.redacted_count
+    # Redact summary
+    if result.get("summary"):
+        r = redact_text(result["summary"], mode=mode)
+        result["summary"] = r.text
+        total_redacted += r.redacted_count
+    # Redact file paths (just home dir usernames)
+    if result.get("files_touched"):
+        result["files_touched"] = [
+            re.sub(r"/(?:home|Users)/[^/]+", "/home/user", f)
+            for f in result["files_touched"]
+        ]
+    result["_redaction"] = {
+        "mode": mode,
+        "entities_redacted": total_redacted,
+    }
+    return result
+# ---------------------------------------------------------------------------
+# Redaction policy config
+# ---------------------------------------------------------------------------
+class RedactionPolicy:
+    """Redaction policy: determines if/how to redact based on config."""
+    def __init__(
+        self,
+        enabled: bool = False,
+        mode: str = "regex",  # "regex" or "ner"
+        force: bool = False,  # org-level forced redaction
+    ):
+        self.enabled = enabled
+        self.mode = mode
+        self.force = force
+    @classmethod
+    def from_config(cls, config: dict) -> "RedactionPolicy":
+        """Load policy from session config or org settings."""
+        redaction = config.get("redaction", {})
+        return cls(
+            enabled=redaction.get("enabled", False),
+            mode=redaction.get("mode", "regex"),
+            force=redaction.get("force", False),
+        )
+    @classmethod
+    def from_org_settings(cls, org_settings: dict) -> "RedactionPolicy":
+        """Load policy from organization-level settings."""
+        if org_settings.get("force_session_redaction"):
+            return cls(enabled=True, mode=org_settings.get("redaction_mode", "regex"), force=True)
+        return cls(enabled=False)
+    def should_redact(self, user_choice: bool | None = None) -> bool:
+        """Determine if redaction should be applied.
+        Priority: org force > user explicit choice > config default.
+        """
+        if self.force:
+            return True
+        if user_choice is not None:
+            return user_choice
+        return self.enabled

{openrunner_sdk-2.7.0 → openrunner_sdk-2.8.0}/openrunner/run.py RENAMED Viewed

@@ -1188,15 +1188,7 @@ class Run:
         Returns:
             Path to the local artifact directory, or None on failure.
         """
-        if not self._client:
-            return None
-        return self._client.download_artifact(
-            run_id=self._run_id,
-            artifact_name=name,
-            dest_dir=dest_dir,
-            version=version,
-            alias=alias,
-        )
+        return self.use_artifact(name, version=version, alias=alias)
     def link_model(
         self,

openrunner-sdk 2.7.0__tar.gz → 2.8.0__tar.gz

openrunner-sdk 2.7.0tar.gz → 2.8.0tar.gz