openrunner-sdk 2.7.0__tar.gz → 2.7.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/PKG-INFO +1 -1
- {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/__init__.py +1 -1
- {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/cli.py +7 -2
- openrunner_sdk-2.7.1/openrunner/redact.py +339 -0
- {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/run.py +1 -9
- {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/session.py +48 -2
- {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/pyproject.toml +1 -1
- {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/.gitignore +0 -0
- {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/=6.0 +0 -0
- {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/=8.1 +0 -0
- {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/README.md +0 -0
- {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/api_client.py +0 -0
- {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/artifact.py +0 -0
- {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/buffer.py +0 -0
- {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/cache.py +0 -0
- {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/config.py +0 -0
- {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/cost.py +0 -0
- {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/dataset.py +0 -0
- {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/environment.py +0 -0
- {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/evaluation.py +0 -0
- {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/feedback.py +0 -0
- {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/git_info.py +0 -0
- {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/guardrails.py +0 -0
- {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/install_commands.py +0 -0
- {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/integration/__init__.py +0 -0
- {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/integration/accelerate.py +0 -0
- {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/integration/anthropic_tracer.py +0 -0
- {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/integration/catboost.py +0 -0
- {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/integration/diffusers.py +0 -0
- {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/integration/fastai.py +0 -0
- {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/integration/forced_alignment.py +0 -0
- {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/integration/gladia.py +0 -0
- {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/integration/gymnasium.py +0 -0
- {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/integration/huggingface.py +0 -0
- {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/integration/hydra.py +0 -0
- {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/integration/ignite.py +0 -0
- {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/integration/jax.py +0 -0
- {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/integration/keras.py +0 -0
- {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/integration/langchain.py +0 -0
- {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/integration/lightgbm.py +0 -0
- {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/integration/lightning.py +0 -0
- {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/integration/llamaindex.py +0 -0
- {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/integration/openai_finetune.py +0 -0
- {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/integration/openai_tracer.py +0 -0
- {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/integration/optuna.py +0 -0
- {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/integration/pytorch.py +0 -0
- {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/integration/sb3.py +0 -0
- {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/integration/sklearn.py +0 -0
- {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/integration/tensorflow.py +0 -0
- {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/integration/trl.py +0 -0
- {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/integration/tts.py +0 -0
- {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/integration/ultralytics.py +0 -0
- {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/integration/voice_agent.py +0 -0
- {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/integration/whisper.py +0 -0
- {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/integration/xgboost.py +0 -0
- {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/launch.py +0 -0
- {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/media.py +0 -0
- {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/migrate.py +0 -0
- {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/model.py +0 -0
- {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/offline.py +0 -0
- {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/pii.py +0 -0
- {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/plot.py +0 -0
- {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/prompt.py +0 -0
- {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/query_api.py +0 -0
- {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/scorers.py +0 -0
- {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/sender.py +0 -0
- {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/settings.py +0 -0
- {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/summary.py +0 -0
- {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/sweep.py +0 -0
- {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/system_metrics.py +0 -0
- {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/tensorboard.py +0 -0
- {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/trace.py +0 -0
- {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/transcript_formatter.py +0 -0
- {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/wal.py +0 -0
- {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/wandb_compat/__init__.py +0 -0
- {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/wandb_compat/_shim.py +0 -0
- {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/wer.py +0 -0
- {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/tests/__init__.py +0 -0
- {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/tests/conftest.py +0 -0
- {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/tests/test_alert.py +0 -0
- {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/tests/test_aliases.py +0 -0
- {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/tests/test_api_client.py +0 -0
- {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/tests/test_artifact.py +0 -0
- {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/tests/test_buffer.py +0 -0
- {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/tests/test_cache.py +0 -0
- {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/tests/test_class_scorers.py +0 -0
- {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/tests/test_cli.py +0 -0
- {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/tests/test_config.py +0 -0
- {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/tests/test_evaluation.py +0 -0
- {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/tests/test_finish.py +0 -0
- {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/tests/test_git_info.py +0 -0
- {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/tests/test_init.py +0 -0
- {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/tests/test_integration_fastai.py +0 -0
- {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/tests/test_integration_huggingface.py +0 -0
- {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/tests/test_integration_keras.py +0 -0
- {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/tests/test_integration_langchain.py +0 -0
- {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/tests/test_integration_lightning.py +0 -0
- {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/tests/test_integration_pytorch.py +0 -0
- {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/tests/test_integration_sklearn.py +0 -0
- {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/tests/test_integration_xgboost.py +0 -0
- {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/tests/test_launch.py +0 -0
- {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/tests/test_log.py +0 -0
- {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/tests/test_log_code.py +0 -0
- {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/tests/test_media.py +0 -0
- {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/tests/test_migrate.py +0 -0
- {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/tests/test_offline.py +0 -0
- {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/tests/test_offline_sync.py +0 -0
- {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/tests/test_pii.py +0 -0
- {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/tests/test_plot.py +0 -0
- {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/tests/test_query_api.py +0 -0
- {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/tests/test_resume.py +0 -0
- {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/tests/test_sdk_features.py +0 -0
- {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/tests/test_sender.py +0 -0
- {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/tests/test_summary.py +0 -0
- {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/tests/test_sweep.py +0 -0
- {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/tests/test_system_metrics.py +0 -0
- {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/tests/test_trace.py +0 -0
- {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/tests/test_wandb_compat.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: openrunner-sdk
|
|
3
|
-
Version: 2.7.
|
|
3
|
+
Version: 2.7.1
|
|
4
4
|
Summary: OpenRunner SDK - W&B-compatible ML experiment tracking client
|
|
5
5
|
Project-URL: Homepage, https://github.com/jqueguiner/openrunner
|
|
6
6
|
Project-URL: Repository, https://github.com/jqueguiner/openrunner
|
|
@@ -120,7 +120,7 @@ launch.from_run = _launch_from_run # type: ignore[attr-defined]
|
|
|
120
120
|
# openrunner.trace.patch_openai() syntax
|
|
121
121
|
trace.patch_openai = _patch_openai # type: ignore[attr-defined]
|
|
122
122
|
|
|
123
|
-
__version__ = "2.7.
|
|
123
|
+
__version__ = "2.7.1"
|
|
124
124
|
|
|
125
125
|
logger = logging.getLogger("openrunner")
|
|
126
126
|
|
|
@@ -2391,13 +2391,18 @@ def session_setup() -> None:
|
|
|
2391
2391
|
@click.option("--hours", "-h", default=24.0, help="Look back N hours (default: 24)")
|
|
2392
2392
|
@click.option("--project", "-p", default=None, help="Target project (default: from config)")
|
|
2393
2393
|
@click.option("--dry-run", is_flag=True, help="Show what would be synced without uploading")
|
|
2394
|
-
|
|
2394
|
+
@click.option("--redact/--no-redact", default=None, help="Force redaction on/off (default: use config)")
|
|
2395
|
+
@click.option("--redact-mode", type=click.Choice(["regex", "ner"]), default=None, help="Redaction mode")
|
|
2396
|
+
@click.option("--public", "visibility", flag_value="public", help="Make session public")
|
|
2397
|
+
@click.option("--private", "visibility", flag_value="private", default=True, help="Keep session private (default)")
|
|
2398
|
+
def session_sync(directory: str | None, hours: float, project: str | None, dry_run: bool, redact: bool | None, redact_mode: str | None, visibility: str) -> None:
|
|
2395
2399
|
"""Sync AI sessions to OpenRunner.
|
|
2396
2400
|
|
|
2397
2401
|
If DIRECTORY is given, scan that path for .jsonl/.json session files.
|
|
2398
2402
|
Otherwise, scan default locations (~/.claude, ~/.codex, ~/.qwen-code).
|
|
2399
2403
|
|
|
2400
2404
|
On first run, prompts for API key and project selection.
|
|
2405
|
+
Redaction strips API keys, tokens, emails, passwords before upload.
|
|
2401
2406
|
"""
|
|
2402
2407
|
from pathlib import Path
|
|
2403
2408
|
from openrunner.session import discover_all_sessions, discover_in_directory, sync_all, get_session_config, interactive_setup
|
|
@@ -2427,7 +2432,7 @@ def session_sync(directory: str | None, hours: float, project: str | None, dry_r
|
|
|
2427
2432
|
if dry_run:
|
|
2428
2433
|
return
|
|
2429
2434
|
|
|
2430
|
-
synced = sync_all(since_hours=hours, project=project, directory=Path(directory) if directory else None)
|
|
2435
|
+
synced = sync_all(since_hours=hours, project=project, directory=Path(directory) if directory else None, redact=redact, redact_mode=redact_mode, visibility=visibility)
|
|
2431
2436
|
if synced:
|
|
2432
2437
|
click.echo(f"Synced {len(synced)} session(s) to OpenRunner.")
|
|
2433
2438
|
for run_id in synced:
|
|
@@ -0,0 +1,339 @@
|
|
|
1
|
+
"""Session redaction — detect and mask PII/secrets before sync.
|
|
2
|
+
|
|
3
|
+
Inspired by Dataiku's kiji-proxy (DeBERTa NER + synthetic replacement).
|
|
4
|
+
|
|
5
|
+
Two modes:
|
|
6
|
+
1. Regex-based (fast, no deps): API keys, tokens, passwords, IPs, emails, paths
|
|
7
|
+
2. NER-based (accurate, needs transformers): full PII detection via DeBERTa
|
|
8
|
+
|
|
9
|
+
Redaction can be configured at:
|
|
10
|
+
- Client side: per-sync via `openrunner session sync --redact`
|
|
11
|
+
- Organization level: org setting forces redaction for all members
|
|
12
|
+
- User level: user setting in session_config.json
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
import hashlib
|
|
18
|
+
import os
|
|
19
|
+
import re
|
|
20
|
+
from typing import Any
|
|
21
|
+
|
|
22
|
+
# ---------------------------------------------------------------------------
|
|
23
|
+
# Regex patterns for secrets and common PII
|
|
24
|
+
# ---------------------------------------------------------------------------
|
|
25
|
+
|
|
26
|
+
SECRET_PATTERNS: list[tuple[str, re.Pattern]] = [
|
|
27
|
+
# API keys / tokens (generic patterns)
|
|
28
|
+
("API_KEY", re.compile(r"\b(sk-[a-zA-Z0-9\-_]{20,})\b")), # OpenAI
|
|
29
|
+
("API_KEY", re.compile(r"\b(or_[a-zA-Z0-9_\-]{20,})\b")), # OpenRunner
|
|
30
|
+
("API_KEY", re.compile(r"\b(ghp_[a-zA-Z0-9]{36,})\b")), # GitHub PAT
|
|
31
|
+
("API_KEY", re.compile(r"\b(gho_[a-zA-Z0-9]{36,})\b")), # GitHub OAuth
|
|
32
|
+
("API_KEY", re.compile(r"\b(github_pat_[a-zA-Z0-9_]{40,})\b")), # GitHub fine-grained
|
|
33
|
+
("API_KEY", re.compile(r"\b(pypi-[a-zA-Z0-9_\-]{50,})\b")), # PyPI
|
|
34
|
+
("API_KEY", re.compile(r"\b(npm_[a-zA-Z0-9]{30,})\b")), # npm
|
|
35
|
+
("API_KEY", re.compile(r"\b(xox[bsapr]-[a-zA-Z0-9\-]{10,})\b")), # Slack
|
|
36
|
+
("API_KEY", re.compile(r"\b(AKIA[0-9A-Z]{16})\b")), # AWS access key
|
|
37
|
+
("SECRET", re.compile(r"\b([a-zA-Z0-9/+=]{40})\b(?=.*(?:secret|SECRET))")), # AWS secret
|
|
38
|
+
("API_KEY", re.compile(r"\b(AIza[0-9A-Za-z_\-]{35})\b")), # Google API
|
|
39
|
+
("TOKEN", re.compile(r"\b(eyJ[a-zA-Z0-9_\-]{20,}\.[a-zA-Z0-9_\-]{20,}\.[a-zA-Z0-9_\-]{20,})\b")), # JWT
|
|
40
|
+
# Passwords in config/env
|
|
41
|
+
("PASSWORD", re.compile(r"(?i)(?:password|passwd|pwd)\s*[=:]\s*['\"]?([^\s'\"]{6,})['\"]?")),
|
|
42
|
+
# Connection strings
|
|
43
|
+
("CONNECTION_STRING", re.compile(r"(?i)((?:postgres|mysql|mongodb|redis)://[^\s'\"]+)")),
|
|
44
|
+
# Private keys
|
|
45
|
+
("PRIVATE_KEY", re.compile(r"(-----BEGIN (?:RSA |EC |DSA )?PRIVATE KEY-----[^-]+-----END (?:RSA |EC |DSA )?PRIVATE KEY-----)", re.DOTALL)),
|
|
46
|
+
# Email addresses
|
|
47
|
+
("EMAIL", re.compile(r"\b([a-zA-Z0-9._%+\-]+@[a-zA-Z0-9.\-]+\.[a-zA-Z]{2,})\b")),
|
|
48
|
+
# IP addresses (non-localhost, non-docker)
|
|
49
|
+
("IP_ADDRESS", re.compile(r"\b((?:(?:25[0-5]|2[0-4]\d|1\d{2}|[1-9]?\d)\.){3}(?:25[0-5]|2[0-4]\d|1\d{2}|[1-9]?\d))\b")),
|
|
50
|
+
# Home directory paths (contain username)
|
|
51
|
+
("PATH", re.compile(r"(/(?:home|Users)/[a-zA-Z0-9._\-]+)")),
|
|
52
|
+
]
|
|
53
|
+
|
|
54
|
+
# IPs to NOT redact (internal/docker/localhost)
|
|
55
|
+
SAFE_IPS = {"127.0.0.1", "0.0.0.0", "localhost", "172.17.0.1", "172.18.0.1"}
|
|
56
|
+
SAFE_IP_PREFIXES = ("10.", "172.16.", "172.17.", "172.18.", "192.168.")
|
|
57
|
+
|
|
58
|
+
# Emails to NOT redact
|
|
59
|
+
SAFE_EMAILS = {"noreply@anthropic.com", "noreply@github.com"}
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def _is_safe_ip(ip: str) -> bool:
|
|
63
|
+
return ip in SAFE_IPS or any(ip.startswith(p) for p in SAFE_IP_PREFIXES)
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def _generate_replacement(label: str, original: str) -> str:
|
|
67
|
+
"""Generate a deterministic replacement (same input -> same output)."""
|
|
68
|
+
# Use hash to generate consistent replacement
|
|
69
|
+
h = hashlib.sha256(original.encode()).hexdigest()[:8]
|
|
70
|
+
|
|
71
|
+
if label == "API_KEY":
|
|
72
|
+
return f"REDACTED_KEY_{h}"
|
|
73
|
+
elif label == "SECRET":
|
|
74
|
+
return f"REDACTED_SECRET_{h}"
|
|
75
|
+
elif label == "TOKEN":
|
|
76
|
+
return f"REDACTED_TOKEN_{h}"
|
|
77
|
+
elif label == "PASSWORD":
|
|
78
|
+
return f"REDACTED_PASS_{h}"
|
|
79
|
+
elif label == "CONNECTION_STRING":
|
|
80
|
+
# Keep protocol, redact rest
|
|
81
|
+
proto = original.split("://")[0] if "://" in original else "db"
|
|
82
|
+
return f"{proto}://REDACTED_{h}"
|
|
83
|
+
elif label == "PRIVATE_KEY":
|
|
84
|
+
return "-----BEGIN PRIVATE KEY-----\nREDACTED\n-----END PRIVATE KEY-----"
|
|
85
|
+
elif label == "EMAIL":
|
|
86
|
+
domain = original.split("@")[1] if "@" in original else "example.com"
|
|
87
|
+
return f"user_{h[:4]}@{domain}"
|
|
88
|
+
elif label == "IP_ADDRESS":
|
|
89
|
+
return f"x.x.x.{h[:2]}"
|
|
90
|
+
elif label == "PATH":
|
|
91
|
+
return f"/home/user_{h[:4]}"
|
|
92
|
+
else:
|
|
93
|
+
return f"[REDACTED:{label}]"
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
# ---------------------------------------------------------------------------
|
|
97
|
+
# Core redaction engine
|
|
98
|
+
# ---------------------------------------------------------------------------
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
class RedactionResult:
|
|
102
|
+
"""Result of redacting text."""
|
|
103
|
+
|
|
104
|
+
def __init__(self, text: str, entities: list[dict], mapping: dict[str, str]):
|
|
105
|
+
self.text = text
|
|
106
|
+
self.entities = entities # [{label, start, end, original, replacement}]
|
|
107
|
+
self.mapping = mapping # original -> replacement (for restoration)
|
|
108
|
+
|
|
109
|
+
@property
|
|
110
|
+
def redacted_count(self) -> int:
|
|
111
|
+
return len(self.entities)
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def redact_text(text: str, mode: str = "regex") -> RedactionResult:
|
|
115
|
+
"""Redact sensitive content from text.
|
|
116
|
+
|
|
117
|
+
Args:
|
|
118
|
+
text: Input text to redact.
|
|
119
|
+
mode: "regex" (fast, pattern-based) or "ner" (ML-based, needs transformers).
|
|
120
|
+
|
|
121
|
+
Returns:
|
|
122
|
+
RedactionResult with redacted text and metadata.
|
|
123
|
+
"""
|
|
124
|
+
if mode == "ner":
|
|
125
|
+
return _redact_ner(text)
|
|
126
|
+
return _redact_regex(text)
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def _redact_regex(text: str) -> RedactionResult:
|
|
130
|
+
"""Fast regex-based redaction for secrets and common PII."""
|
|
131
|
+
entities = []
|
|
132
|
+
|
|
133
|
+
for label, pattern in SECRET_PATTERNS:
|
|
134
|
+
for match in pattern.finditer(text):
|
|
135
|
+
original = match.group(1) if match.lastindex else match.group(0)
|
|
136
|
+
start = match.start(1) if match.lastindex else match.start(0)
|
|
137
|
+
end = match.end(1) if match.lastindex else match.end(0)
|
|
138
|
+
|
|
139
|
+
# Skip safe values
|
|
140
|
+
if label == "IP_ADDRESS" and _is_safe_ip(original):
|
|
141
|
+
continue
|
|
142
|
+
if label == "EMAIL" and original.lower() in SAFE_EMAILS:
|
|
143
|
+
continue
|
|
144
|
+
# Skip very short matches (likely false positives)
|
|
145
|
+
if len(original) < 6:
|
|
146
|
+
continue
|
|
147
|
+
|
|
148
|
+
entities.append({
|
|
149
|
+
"label": label,
|
|
150
|
+
"start": start,
|
|
151
|
+
"end": end,
|
|
152
|
+
"original": original,
|
|
153
|
+
"replacement": _generate_replacement(label, original),
|
|
154
|
+
})
|
|
155
|
+
|
|
156
|
+
# Deduplicate overlapping entities (keep longest)
|
|
157
|
+
entities.sort(key=lambda e: (e["start"], -(e["end"] - e["start"])))
|
|
158
|
+
deduped = []
|
|
159
|
+
last_end = -1
|
|
160
|
+
for e in entities:
|
|
161
|
+
if e["start"] >= last_end:
|
|
162
|
+
deduped.append(e)
|
|
163
|
+
last_end = e["end"]
|
|
164
|
+
|
|
165
|
+
# Apply replacements (end-to-start to preserve offsets)
|
|
166
|
+
result_text = text
|
|
167
|
+
mapping = {}
|
|
168
|
+
for e in reversed(deduped):
|
|
169
|
+
result_text = result_text[:e["start"]] + e["replacement"] + result_text[e["end"]:]
|
|
170
|
+
mapping[e["original"]] = e["replacement"]
|
|
171
|
+
|
|
172
|
+
return RedactionResult(result_text, deduped, mapping)
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
def _redact_ner(text: str) -> RedactionResult:
|
|
176
|
+
"""NER-based redaction using DeBERTa model (DataikuNLP/kiji-pii-model).
|
|
177
|
+
|
|
178
|
+
Falls back to regex if transformers not installed.
|
|
179
|
+
"""
|
|
180
|
+
try:
|
|
181
|
+
from transformers import pipeline
|
|
182
|
+
except ImportError:
|
|
183
|
+
return _redact_regex(text)
|
|
184
|
+
|
|
185
|
+
# Load model (cached after first call)
|
|
186
|
+
global _ner_pipeline
|
|
187
|
+
if "_ner_pipeline" not in globals() or _ner_pipeline is None:
|
|
188
|
+
try:
|
|
189
|
+
_ner_pipeline = pipeline(
|
|
190
|
+
"token-classification",
|
|
191
|
+
model="DataikuNLP/kiji-pii-model-onnx",
|
|
192
|
+
aggregation_strategy="simple",
|
|
193
|
+
)
|
|
194
|
+
except Exception:
|
|
195
|
+
# Fall back to regex if model load fails
|
|
196
|
+
return _redact_regex(text)
|
|
197
|
+
|
|
198
|
+
# Run NER
|
|
199
|
+
try:
|
|
200
|
+
ner_results = _ner_pipeline(text[:10000]) # Cap at 10k chars
|
|
201
|
+
except Exception:
|
|
202
|
+
return _redact_regex(text)
|
|
203
|
+
|
|
204
|
+
entities = []
|
|
205
|
+
for ent in ner_results:
|
|
206
|
+
if ent.get("score", 0) < 0.25:
|
|
207
|
+
continue
|
|
208
|
+
label = ent.get("entity_group", ent.get("entity", "UNKNOWN"))
|
|
209
|
+
original = ent.get("word", "")
|
|
210
|
+
entities.append({
|
|
211
|
+
"label": label,
|
|
212
|
+
"start": ent["start"],
|
|
213
|
+
"end": ent["end"],
|
|
214
|
+
"original": original,
|
|
215
|
+
"replacement": _generate_replacement(label, original),
|
|
216
|
+
})
|
|
217
|
+
|
|
218
|
+
# Also run regex for secrets (NER won't catch API keys)
|
|
219
|
+
regex_result = _redact_regex(text)
|
|
220
|
+
# Merge: add regex entities that don't overlap with NER
|
|
221
|
+
for re_ent in regex_result.entities:
|
|
222
|
+
overlaps = any(
|
|
223
|
+
re_ent["start"] < e["end"] and re_ent["end"] > e["start"]
|
|
224
|
+
for e in entities
|
|
225
|
+
)
|
|
226
|
+
if not overlaps:
|
|
227
|
+
entities.append(re_ent)
|
|
228
|
+
|
|
229
|
+
entities.sort(key=lambda e: e["start"])
|
|
230
|
+
|
|
231
|
+
# Apply replacements
|
|
232
|
+
result_text = text
|
|
233
|
+
mapping = {}
|
|
234
|
+
for e in reversed(entities):
|
|
235
|
+
result_text = result_text[:e["start"]] + e["replacement"] + result_text[e["end"]:]
|
|
236
|
+
mapping[e["original"]] = e["replacement"]
|
|
237
|
+
|
|
238
|
+
return RedactionResult(result_text, entities, mapping)
|
|
239
|
+
|
|
240
|
+
|
|
241
|
+
# ---------------------------------------------------------------------------
|
|
242
|
+
# Session-level redaction
|
|
243
|
+
# ---------------------------------------------------------------------------
|
|
244
|
+
|
|
245
|
+
|
|
246
|
+
def redact_session(parsed: dict[str, Any], mode: str = "regex") -> dict[str, Any]:
|
|
247
|
+
"""Redact a parsed session dict before sync.
|
|
248
|
+
|
|
249
|
+
Redacts:
|
|
250
|
+
- All message content (user + assistant)
|
|
251
|
+
- File paths (replace usernames)
|
|
252
|
+
- First message / summary
|
|
253
|
+
|
|
254
|
+
Returns a new dict (doesn't mutate input).
|
|
255
|
+
"""
|
|
256
|
+
import copy
|
|
257
|
+
result = copy.deepcopy(parsed)
|
|
258
|
+
|
|
259
|
+
total_redacted = 0
|
|
260
|
+
|
|
261
|
+
# Redact messages
|
|
262
|
+
for msg in result.get("messages", []):
|
|
263
|
+
if msg.get("content"):
|
|
264
|
+
r = redact_text(msg["content"], mode=mode)
|
|
265
|
+
msg["content"] = r.text
|
|
266
|
+
total_redacted += r.redacted_count
|
|
267
|
+
|
|
268
|
+
# Redact first_message
|
|
269
|
+
if result.get("first_message"):
|
|
270
|
+
r = redact_text(result["first_message"], mode=mode)
|
|
271
|
+
result["first_message"] = r.text
|
|
272
|
+
total_redacted += r.redacted_count
|
|
273
|
+
|
|
274
|
+
# Redact summary
|
|
275
|
+
if result.get("summary"):
|
|
276
|
+
r = redact_text(result["summary"], mode=mode)
|
|
277
|
+
result["summary"] = r.text
|
|
278
|
+
total_redacted += r.redacted_count
|
|
279
|
+
|
|
280
|
+
# Redact file paths (just home dir usernames)
|
|
281
|
+
if result.get("files_touched"):
|
|
282
|
+
result["files_touched"] = [
|
|
283
|
+
re.sub(r"/(?:home|Users)/[^/]+", "/home/user", f)
|
|
284
|
+
for f in result["files_touched"]
|
|
285
|
+
]
|
|
286
|
+
|
|
287
|
+
result["_redaction"] = {
|
|
288
|
+
"mode": mode,
|
|
289
|
+
"entities_redacted": total_redacted,
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
return result
|
|
293
|
+
|
|
294
|
+
|
|
295
|
+
# ---------------------------------------------------------------------------
|
|
296
|
+
# Redaction policy config
|
|
297
|
+
# ---------------------------------------------------------------------------
|
|
298
|
+
|
|
299
|
+
|
|
300
|
+
class RedactionPolicy:
|
|
301
|
+
"""Redaction policy: determines if/how to redact based on config."""
|
|
302
|
+
|
|
303
|
+
def __init__(
|
|
304
|
+
self,
|
|
305
|
+
enabled: bool = False,
|
|
306
|
+
mode: str = "regex", # "regex" or "ner"
|
|
307
|
+
force: bool = False, # org-level forced redaction
|
|
308
|
+
):
|
|
309
|
+
self.enabled = enabled
|
|
310
|
+
self.mode = mode
|
|
311
|
+
self.force = force
|
|
312
|
+
|
|
313
|
+
@classmethod
|
|
314
|
+
def from_config(cls, config: dict) -> "RedactionPolicy":
|
|
315
|
+
"""Load policy from session config or org settings."""
|
|
316
|
+
redaction = config.get("redaction", {})
|
|
317
|
+
return cls(
|
|
318
|
+
enabled=redaction.get("enabled", False),
|
|
319
|
+
mode=redaction.get("mode", "regex"),
|
|
320
|
+
force=redaction.get("force", False),
|
|
321
|
+
)
|
|
322
|
+
|
|
323
|
+
@classmethod
|
|
324
|
+
def from_org_settings(cls, org_settings: dict) -> "RedactionPolicy":
|
|
325
|
+
"""Load policy from organization-level settings."""
|
|
326
|
+
if org_settings.get("force_session_redaction"):
|
|
327
|
+
return cls(enabled=True, mode=org_settings.get("redaction_mode", "regex"), force=True)
|
|
328
|
+
return cls(enabled=False)
|
|
329
|
+
|
|
330
|
+
def should_redact(self, user_choice: bool | None = None) -> bool:
|
|
331
|
+
"""Determine if redaction should be applied.
|
|
332
|
+
|
|
333
|
+
Priority: org force > user explicit choice > config default.
|
|
334
|
+
"""
|
|
335
|
+
if self.force:
|
|
336
|
+
return True
|
|
337
|
+
if user_choice is not None:
|
|
338
|
+
return user_choice
|
|
339
|
+
return self.enabled
|
|
@@ -1188,15 +1188,7 @@ class Run:
|
|
|
1188
1188
|
Returns:
|
|
1189
1189
|
Path to the local artifact directory, or None on failure.
|
|
1190
1190
|
"""
|
|
1191
|
-
|
|
1192
|
-
return None
|
|
1193
|
-
return self._client.download_artifact(
|
|
1194
|
-
run_id=self._run_id,
|
|
1195
|
-
artifact_name=name,
|
|
1196
|
-
dest_dir=dest_dir,
|
|
1197
|
-
version=version,
|
|
1198
|
-
alias=alias,
|
|
1199
|
-
)
|
|
1191
|
+
return self.use_artifact(name, version=version, alias=alias)
|
|
1200
1192
|
|
|
1201
1193
|
def link_model(
|
|
1202
1194
|
self,
|
|
@@ -418,15 +418,52 @@ def sync_session_to_openrunner(
|
|
|
418
418
|
project: str | None = None,
|
|
419
419
|
api_key: str | None = None,
|
|
420
420
|
base_url: str | None = None,
|
|
421
|
+
redact: bool | None = None,
|
|
422
|
+
redact_mode: str | None = None,
|
|
423
|
+
visibility: str = "private",
|
|
421
424
|
) -> str | None:
|
|
422
425
|
"""Upload a parsed session to OpenRunner as a run with notes.
|
|
423
426
|
|
|
427
|
+
Args:
|
|
428
|
+
redact: Force redaction on/off. None = use config/org policy.
|
|
429
|
+
redact_mode: "regex" (fast) or "ner" (ML-based, needs transformers).
|
|
430
|
+
visibility: "public" or "private".
|
|
431
|
+
|
|
424
432
|
Returns the run ID on success, None on failure.
|
|
425
433
|
"""
|
|
426
434
|
from openrunner.api_client import APIClient
|
|
435
|
+
from openrunner.redact import RedactionPolicy, redact_session
|
|
427
436
|
|
|
428
437
|
# Load config (cascade: args > env > session_config > settings)
|
|
429
438
|
config = get_session_config()
|
|
439
|
+
|
|
440
|
+
# Apply redaction policy (local config + server org policy)
|
|
441
|
+
policy = RedactionPolicy.from_config(config)
|
|
442
|
+
|
|
443
|
+
# Fetch server-side policy if not explicitly overridden
|
|
444
|
+
if redact is None and api_key and base_url:
|
|
445
|
+
try:
|
|
446
|
+
client_check = APIClient(base_url=base_url, api_key=api_key)
|
|
447
|
+
resp = client_check._request("get", "/users/me/settings/redaction")
|
|
448
|
+
if resp.status_code == 200:
|
|
449
|
+
server_policy = resp.json().get("effective", {})
|
|
450
|
+
if server_policy.get("forced") or server_policy.get("enabled"):
|
|
451
|
+
policy = RedactionPolicy(
|
|
452
|
+
enabled=True,
|
|
453
|
+
mode=server_policy.get("mode", "regex"),
|
|
454
|
+
force=server_policy.get("forced", False),
|
|
455
|
+
)
|
|
456
|
+
client_check.close()
|
|
457
|
+
except Exception:
|
|
458
|
+
pass # Offline or server doesn't support — use local config
|
|
459
|
+
|
|
460
|
+
if policy.should_redact(redact):
|
|
461
|
+
mode = redact_mode or policy.mode
|
|
462
|
+
parsed = redact_session(parsed, mode=mode)
|
|
463
|
+
redaction_info = parsed.get("_redaction", {})
|
|
464
|
+
if redaction_info.get("entities_redacted", 0) > 0:
|
|
465
|
+
logger.info(f"Redacted {redaction_info['entities_redacted']} sensitive entities ({mode} mode)")
|
|
466
|
+
|
|
430
467
|
api_key = api_key or config.get("api_key")
|
|
431
468
|
base_url = base_url or config.get("base_url")
|
|
432
469
|
|
|
@@ -457,11 +494,17 @@ def sync_session_to_openrunner(
|
|
|
457
494
|
"tools_used": parsed.get("tools_used", []),
|
|
458
495
|
"total_tokens": parsed.get("total_tokens", 0),
|
|
459
496
|
},
|
|
460
|
-
"tags": [f"source:{source}", "ai-session"],
|
|
497
|
+
"tags": [f"source:{source}", "ai-session", f"visibility:{visibility}"],
|
|
461
498
|
"notes": _format_session_notes(parsed),
|
|
462
499
|
"state": "finished",
|
|
463
500
|
}
|
|
464
501
|
|
|
502
|
+
# Add redaction metadata if applied
|
|
503
|
+
if parsed.get("_redaction"):
|
|
504
|
+
run_data["config"]["redacted"] = True
|
|
505
|
+
run_data["config"]["redaction_mode"] = parsed["_redaction"]["mode"]
|
|
506
|
+
run_data["config"]["entities_redacted"] = parsed["_redaction"]["entities_redacted"]
|
|
507
|
+
|
|
465
508
|
result = client.create_run(run_data)
|
|
466
509
|
if not result:
|
|
467
510
|
logger.warning("Failed to create session run")
|
|
@@ -590,6 +633,9 @@ def sync_all(
|
|
|
590
633
|
project: str | None = None,
|
|
591
634
|
dry_run: bool = False,
|
|
592
635
|
directory: Path | None = None,
|
|
636
|
+
redact: bool | None = None,
|
|
637
|
+
redact_mode: str | None = None,
|
|
638
|
+
visibility: str = "private",
|
|
593
639
|
) -> list[str]:
|
|
594
640
|
"""Discover and sync all new sessions. Returns list of synced run IDs."""
|
|
595
641
|
state = _load_sync_state()
|
|
@@ -612,7 +658,7 @@ def sync_all(
|
|
|
612
658
|
parsed = parse_generic_session(session_info["path"], session_info["source"])
|
|
613
659
|
|
|
614
660
|
# Sync
|
|
615
|
-
run_id = sync_session_to_openrunner(parsed, project=project)
|
|
661
|
+
run_id = sync_session_to_openrunner(parsed, project=project, redact=redact, redact_mode=redact_mode, visibility=visibility)
|
|
616
662
|
if run_id:
|
|
617
663
|
state["synced"][h] = {
|
|
618
664
|
"run_id": run_id,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|