openrunner-sdk 2.7.0__tar.gz → 2.7.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (118) hide show
  1. {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/PKG-INFO +1 -1
  2. {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/__init__.py +1 -1
  3. {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/cli.py +7 -2
  4. openrunner_sdk-2.7.1/openrunner/redact.py +339 -0
  5. {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/run.py +1 -9
  6. {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/session.py +48 -2
  7. {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/pyproject.toml +1 -1
  8. {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/.gitignore +0 -0
  9. {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/=6.0 +0 -0
  10. {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/=8.1 +0 -0
  11. {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/README.md +0 -0
  12. {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/api_client.py +0 -0
  13. {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/artifact.py +0 -0
  14. {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/buffer.py +0 -0
  15. {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/cache.py +0 -0
  16. {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/config.py +0 -0
  17. {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/cost.py +0 -0
  18. {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/dataset.py +0 -0
  19. {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/environment.py +0 -0
  20. {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/evaluation.py +0 -0
  21. {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/feedback.py +0 -0
  22. {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/git_info.py +0 -0
  23. {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/guardrails.py +0 -0
  24. {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/install_commands.py +0 -0
  25. {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/integration/__init__.py +0 -0
  26. {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/integration/accelerate.py +0 -0
  27. {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/integration/anthropic_tracer.py +0 -0
  28. {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/integration/catboost.py +0 -0
  29. {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/integration/diffusers.py +0 -0
  30. {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/integration/fastai.py +0 -0
  31. {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/integration/forced_alignment.py +0 -0
  32. {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/integration/gladia.py +0 -0
  33. {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/integration/gymnasium.py +0 -0
  34. {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/integration/huggingface.py +0 -0
  35. {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/integration/hydra.py +0 -0
  36. {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/integration/ignite.py +0 -0
  37. {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/integration/jax.py +0 -0
  38. {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/integration/keras.py +0 -0
  39. {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/integration/langchain.py +0 -0
  40. {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/integration/lightgbm.py +0 -0
  41. {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/integration/lightning.py +0 -0
  42. {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/integration/llamaindex.py +0 -0
  43. {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/integration/openai_finetune.py +0 -0
  44. {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/integration/openai_tracer.py +0 -0
  45. {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/integration/optuna.py +0 -0
  46. {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/integration/pytorch.py +0 -0
  47. {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/integration/sb3.py +0 -0
  48. {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/integration/sklearn.py +0 -0
  49. {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/integration/tensorflow.py +0 -0
  50. {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/integration/trl.py +0 -0
  51. {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/integration/tts.py +0 -0
  52. {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/integration/ultralytics.py +0 -0
  53. {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/integration/voice_agent.py +0 -0
  54. {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/integration/whisper.py +0 -0
  55. {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/integration/xgboost.py +0 -0
  56. {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/launch.py +0 -0
  57. {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/media.py +0 -0
  58. {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/migrate.py +0 -0
  59. {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/model.py +0 -0
  60. {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/offline.py +0 -0
  61. {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/pii.py +0 -0
  62. {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/plot.py +0 -0
  63. {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/prompt.py +0 -0
  64. {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/query_api.py +0 -0
  65. {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/scorers.py +0 -0
  66. {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/sender.py +0 -0
  67. {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/settings.py +0 -0
  68. {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/summary.py +0 -0
  69. {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/sweep.py +0 -0
  70. {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/system_metrics.py +0 -0
  71. {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/tensorboard.py +0 -0
  72. {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/trace.py +0 -0
  73. {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/transcript_formatter.py +0 -0
  74. {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/wal.py +0 -0
  75. {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/wandb_compat/__init__.py +0 -0
  76. {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/wandb_compat/_shim.py +0 -0
  77. {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/openrunner/wer.py +0 -0
  78. {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/tests/__init__.py +0 -0
  79. {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/tests/conftest.py +0 -0
  80. {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/tests/test_alert.py +0 -0
  81. {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/tests/test_aliases.py +0 -0
  82. {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/tests/test_api_client.py +0 -0
  83. {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/tests/test_artifact.py +0 -0
  84. {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/tests/test_buffer.py +0 -0
  85. {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/tests/test_cache.py +0 -0
  86. {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/tests/test_class_scorers.py +0 -0
  87. {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/tests/test_cli.py +0 -0
  88. {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/tests/test_config.py +0 -0
  89. {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/tests/test_evaluation.py +0 -0
  90. {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/tests/test_finish.py +0 -0
  91. {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/tests/test_git_info.py +0 -0
  92. {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/tests/test_init.py +0 -0
  93. {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/tests/test_integration_fastai.py +0 -0
  94. {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/tests/test_integration_huggingface.py +0 -0
  95. {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/tests/test_integration_keras.py +0 -0
  96. {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/tests/test_integration_langchain.py +0 -0
  97. {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/tests/test_integration_lightning.py +0 -0
  98. {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/tests/test_integration_pytorch.py +0 -0
  99. {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/tests/test_integration_sklearn.py +0 -0
  100. {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/tests/test_integration_xgboost.py +0 -0
  101. {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/tests/test_launch.py +0 -0
  102. {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/tests/test_log.py +0 -0
  103. {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/tests/test_log_code.py +0 -0
  104. {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/tests/test_media.py +0 -0
  105. {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/tests/test_migrate.py +0 -0
  106. {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/tests/test_offline.py +0 -0
  107. {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/tests/test_offline_sync.py +0 -0
  108. {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/tests/test_pii.py +0 -0
  109. {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/tests/test_plot.py +0 -0
  110. {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/tests/test_query_api.py +0 -0
  111. {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/tests/test_resume.py +0 -0
  112. {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/tests/test_sdk_features.py +0 -0
  113. {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/tests/test_sender.py +0 -0
  114. {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/tests/test_summary.py +0 -0
  115. {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/tests/test_sweep.py +0 -0
  116. {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/tests/test_system_metrics.py +0 -0
  117. {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/tests/test_trace.py +0 -0
  118. {openrunner_sdk-2.7.0 → openrunner_sdk-2.7.1}/tests/test_wandb_compat.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: openrunner-sdk
3
- Version: 2.7.0
3
+ Version: 2.7.1
4
4
  Summary: OpenRunner SDK - W&B-compatible ML experiment tracking client
5
5
  Project-URL: Homepage, https://github.com/jqueguiner/openrunner
6
6
  Project-URL: Repository, https://github.com/jqueguiner/openrunner
@@ -120,7 +120,7 @@ launch.from_run = _launch_from_run # type: ignore[attr-defined]
120
120
  # openrunner.trace.patch_openai() syntax
121
121
  trace.patch_openai = _patch_openai # type: ignore[attr-defined]
122
122
 
123
- __version__ = "2.7.0"
123
+ __version__ = "2.7.1"
124
124
 
125
125
  logger = logging.getLogger("openrunner")
126
126
 
@@ -2391,13 +2391,18 @@ def session_setup() -> None:
2391
2391
  @click.option("--hours", "-h", default=24.0, help="Look back N hours (default: 24)")
2392
2392
  @click.option("--project", "-p", default=None, help="Target project (default: from config)")
2393
2393
  @click.option("--dry-run", is_flag=True, help="Show what would be synced without uploading")
2394
- def session_sync(directory: str | None, hours: float, project: str | None, dry_run: bool) -> None:
2394
+ @click.option("--redact/--no-redact", default=None, help="Force redaction on/off (default: use config)")
2395
+ @click.option("--redact-mode", type=click.Choice(["regex", "ner"]), default=None, help="Redaction mode")
2396
+ @click.option("--public", "visibility", flag_value="public", help="Make session public")
2397
+ @click.option("--private", "visibility", flag_value="private", default=True, help="Keep session private (default)")
2398
+ def session_sync(directory: str | None, hours: float, project: str | None, dry_run: bool, redact: bool | None, redact_mode: str | None, visibility: str) -> None:
2395
2399
  """Sync AI sessions to OpenRunner.
2396
2400
 
2397
2401
  If DIRECTORY is given, scan that path for .jsonl/.json session files.
2398
2402
  Otherwise, scan default locations (~/.claude, ~/.codex, ~/.qwen-code).
2399
2403
 
2400
2404
  On first run, prompts for API key and project selection.
2405
+ Redaction strips API keys, tokens, emails, passwords before upload.
2401
2406
  """
2402
2407
  from pathlib import Path
2403
2408
  from openrunner.session import discover_all_sessions, discover_in_directory, sync_all, get_session_config, interactive_setup
@@ -2427,7 +2432,7 @@ def session_sync(directory: str | None, hours: float, project: str | None, dry_r
2427
2432
  if dry_run:
2428
2433
  return
2429
2434
 
2430
- synced = sync_all(since_hours=hours, project=project, directory=Path(directory) if directory else None)
2435
+ synced = sync_all(since_hours=hours, project=project, directory=Path(directory) if directory else None, redact=redact, redact_mode=redact_mode, visibility=visibility)
2431
2436
  if synced:
2432
2437
  click.echo(f"Synced {len(synced)} session(s) to OpenRunner.")
2433
2438
  for run_id in synced:
@@ -0,0 +1,339 @@
1
+ """Session redaction — detect and mask PII/secrets before sync.
2
+
3
+ Inspired by Dataiku's kiji-proxy (DeBERTa NER + synthetic replacement).
4
+
5
+ Two modes:
6
+ 1. Regex-based (fast, no deps): API keys, tokens, passwords, IPs, emails, paths
7
+ 2. NER-based (accurate, needs transformers): full PII detection via DeBERTa
8
+
9
+ Redaction can be configured at:
10
+ - Client side: per-sync via `openrunner session sync --redact`
11
+ - Organization level: org setting forces redaction for all members
12
+ - User level: user setting in session_config.json
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ import hashlib
18
+ import os
19
+ import re
20
+ from typing import Any
21
+
22
+ # ---------------------------------------------------------------------------
23
+ # Regex patterns for secrets and common PII
24
+ # ---------------------------------------------------------------------------
25
+
26
+ SECRET_PATTERNS: list[tuple[str, re.Pattern]] = [
27
+ # API keys / tokens (generic patterns)
28
+ ("API_KEY", re.compile(r"\b(sk-[a-zA-Z0-9\-_]{20,})\b")), # OpenAI
29
+ ("API_KEY", re.compile(r"\b(or_[a-zA-Z0-9_\-]{20,})\b")), # OpenRunner
30
+ ("API_KEY", re.compile(r"\b(ghp_[a-zA-Z0-9]{36,})\b")), # GitHub PAT
31
+ ("API_KEY", re.compile(r"\b(gho_[a-zA-Z0-9]{36,})\b")), # GitHub OAuth
32
+ ("API_KEY", re.compile(r"\b(github_pat_[a-zA-Z0-9_]{40,})\b")), # GitHub fine-grained
33
+ ("API_KEY", re.compile(r"\b(pypi-[a-zA-Z0-9_\-]{50,})\b")), # PyPI
34
+ ("API_KEY", re.compile(r"\b(npm_[a-zA-Z0-9]{30,})\b")), # npm
35
+ ("API_KEY", re.compile(r"\b(xox[bsapr]-[a-zA-Z0-9\-]{10,})\b")), # Slack
36
+ ("API_KEY", re.compile(r"\b(AKIA[0-9A-Z]{16})\b")), # AWS access key
37
+ ("SECRET", re.compile(r"\b([a-zA-Z0-9/+=]{40})\b(?=.*(?:secret|SECRET))")), # AWS secret
38
+ ("API_KEY", re.compile(r"\b(AIza[0-9A-Za-z_\-]{35})\b")), # Google API
39
+ ("TOKEN", re.compile(r"\b(eyJ[a-zA-Z0-9_\-]{20,}\.[a-zA-Z0-9_\-]{20,}\.[a-zA-Z0-9_\-]{20,})\b")), # JWT
40
+ # Passwords in config/env
41
+ ("PASSWORD", re.compile(r"(?i)(?:password|passwd|pwd)\s*[=:]\s*['\"]?([^\s'\"]{6,})['\"]?")),
42
+ # Connection strings
43
+ ("CONNECTION_STRING", re.compile(r"(?i)((?:postgres|mysql|mongodb|redis)://[^\s'\"]+)")),
44
+ # Private keys
45
+ ("PRIVATE_KEY", re.compile(r"(-----BEGIN (?:RSA |EC |DSA )?PRIVATE KEY-----[^-]+-----END (?:RSA |EC |DSA )?PRIVATE KEY-----)", re.DOTALL)),
46
+ # Email addresses
47
+ ("EMAIL", re.compile(r"\b([a-zA-Z0-9._%+\-]+@[a-zA-Z0-9.\-]+\.[a-zA-Z]{2,})\b")),
48
+ # IP addresses (non-localhost, non-docker)
49
+ ("IP_ADDRESS", re.compile(r"\b((?:(?:25[0-5]|2[0-4]\d|1\d{2}|[1-9]?\d)\.){3}(?:25[0-5]|2[0-4]\d|1\d{2}|[1-9]?\d))\b")),
50
+ # Home directory paths (contain username)
51
+ ("PATH", re.compile(r"(/(?:home|Users)/[a-zA-Z0-9._\-]+)")),
52
+ ]
53
+
54
+ # IPs to NOT redact (internal/docker/localhost)
55
+ SAFE_IPS = {"127.0.0.1", "0.0.0.0", "localhost", "172.17.0.1", "172.18.0.1"}
56
+ SAFE_IP_PREFIXES = ("10.", "172.16.", "172.17.", "172.18.", "192.168.")
57
+
58
+ # Emails to NOT redact
59
+ SAFE_EMAILS = {"noreply@anthropic.com", "noreply@github.com"}
60
+
61
+
62
+ def _is_safe_ip(ip: str) -> bool:
63
+ return ip in SAFE_IPS or any(ip.startswith(p) for p in SAFE_IP_PREFIXES)
64
+
65
+
66
+ def _generate_replacement(label: str, original: str) -> str:
67
+ """Generate a deterministic replacement (same input -> same output)."""
68
+ # Use hash to generate consistent replacement
69
+ h = hashlib.sha256(original.encode()).hexdigest()[:8]
70
+
71
+ if label == "API_KEY":
72
+ return f"REDACTED_KEY_{h}"
73
+ elif label == "SECRET":
74
+ return f"REDACTED_SECRET_{h}"
75
+ elif label == "TOKEN":
76
+ return f"REDACTED_TOKEN_{h}"
77
+ elif label == "PASSWORD":
78
+ return f"REDACTED_PASS_{h}"
79
+ elif label == "CONNECTION_STRING":
80
+ # Keep protocol, redact rest
81
+ proto = original.split("://")[0] if "://" in original else "db"
82
+ return f"{proto}://REDACTED_{h}"
83
+ elif label == "PRIVATE_KEY":
84
+ return "-----BEGIN PRIVATE KEY-----\nREDACTED\n-----END PRIVATE KEY-----"
85
+ elif label == "EMAIL":
86
+ domain = original.split("@")[1] if "@" in original else "example.com"
87
+ return f"user_{h[:4]}@{domain}"
88
+ elif label == "IP_ADDRESS":
89
+ return f"x.x.x.{h[:2]}"
90
+ elif label == "PATH":
91
+ return f"/home/user_{h[:4]}"
92
+ else:
93
+ return f"[REDACTED:{label}]"
94
+
95
+
96
+ # ---------------------------------------------------------------------------
97
+ # Core redaction engine
98
+ # ---------------------------------------------------------------------------
99
+
100
+
101
+ class RedactionResult:
102
+ """Result of redacting text."""
103
+
104
+ def __init__(self, text: str, entities: list[dict], mapping: dict[str, str]):
105
+ self.text = text
106
+ self.entities = entities # [{label, start, end, original, replacement}]
107
+ self.mapping = mapping # original -> replacement (for restoration)
108
+
109
+ @property
110
+ def redacted_count(self) -> int:
111
+ return len(self.entities)
112
+
113
+
114
+ def redact_text(text: str, mode: str = "regex") -> RedactionResult:
115
+ """Redact sensitive content from text.
116
+
117
+ Args:
118
+ text: Input text to redact.
119
+ mode: "regex" (fast, pattern-based) or "ner" (ML-based, needs transformers).
120
+
121
+ Returns:
122
+ RedactionResult with redacted text and metadata.
123
+ """
124
+ if mode == "ner":
125
+ return _redact_ner(text)
126
+ return _redact_regex(text)
127
+
128
+
129
+ def _redact_regex(text: str) -> RedactionResult:
130
+ """Fast regex-based redaction for secrets and common PII."""
131
+ entities = []
132
+
133
+ for label, pattern in SECRET_PATTERNS:
134
+ for match in pattern.finditer(text):
135
+ original = match.group(1) if match.lastindex else match.group(0)
136
+ start = match.start(1) if match.lastindex else match.start(0)
137
+ end = match.end(1) if match.lastindex else match.end(0)
138
+
139
+ # Skip safe values
140
+ if label == "IP_ADDRESS" and _is_safe_ip(original):
141
+ continue
142
+ if label == "EMAIL" and original.lower() in SAFE_EMAILS:
143
+ continue
144
+ # Skip very short matches (likely false positives)
145
+ if len(original) < 6:
146
+ continue
147
+
148
+ entities.append({
149
+ "label": label,
150
+ "start": start,
151
+ "end": end,
152
+ "original": original,
153
+ "replacement": _generate_replacement(label, original),
154
+ })
155
+
156
+ # Deduplicate overlapping entities (keep longest)
157
+ entities.sort(key=lambda e: (e["start"], -(e["end"] - e["start"])))
158
+ deduped = []
159
+ last_end = -1
160
+ for e in entities:
161
+ if e["start"] >= last_end:
162
+ deduped.append(e)
163
+ last_end = e["end"]
164
+
165
+ # Apply replacements (end-to-start to preserve offsets)
166
+ result_text = text
167
+ mapping = {}
168
+ for e in reversed(deduped):
169
+ result_text = result_text[:e["start"]] + e["replacement"] + result_text[e["end"]:]
170
+ mapping[e["original"]] = e["replacement"]
171
+
172
+ return RedactionResult(result_text, deduped, mapping)
173
+
174
+
175
+ def _redact_ner(text: str) -> RedactionResult:
176
+ """NER-based redaction using DeBERTa model (DataikuNLP/kiji-pii-model).
177
+
178
+ Falls back to regex if transformers not installed.
179
+ """
180
+ try:
181
+ from transformers import pipeline
182
+ except ImportError:
183
+ return _redact_regex(text)
184
+
185
+ # Load model (cached after first call)
186
+ global _ner_pipeline
187
+ if "_ner_pipeline" not in globals() or _ner_pipeline is None:
188
+ try:
189
+ _ner_pipeline = pipeline(
190
+ "token-classification",
191
+ model="DataikuNLP/kiji-pii-model-onnx",
192
+ aggregation_strategy="simple",
193
+ )
194
+ except Exception:
195
+ # Fall back to regex if model load fails
196
+ return _redact_regex(text)
197
+
198
+ # Run NER
199
+ try:
200
+ ner_results = _ner_pipeline(text[:10000]) # Cap at 10k chars
201
+ except Exception:
202
+ return _redact_regex(text)
203
+
204
+ entities = []
205
+ for ent in ner_results:
206
+ if ent.get("score", 0) < 0.25:
207
+ continue
208
+ label = ent.get("entity_group", ent.get("entity", "UNKNOWN"))
209
+ original = ent.get("word", "")
210
+ entities.append({
211
+ "label": label,
212
+ "start": ent["start"],
213
+ "end": ent["end"],
214
+ "original": original,
215
+ "replacement": _generate_replacement(label, original),
216
+ })
217
+
218
+ # Also run regex for secrets (NER won't catch API keys)
219
+ regex_result = _redact_regex(text)
220
+ # Merge: add regex entities that don't overlap with NER
221
+ for re_ent in regex_result.entities:
222
+ overlaps = any(
223
+ re_ent["start"] < e["end"] and re_ent["end"] > e["start"]
224
+ for e in entities
225
+ )
226
+ if not overlaps:
227
+ entities.append(re_ent)
228
+
229
+ entities.sort(key=lambda e: e["start"])
230
+
231
+ # Apply replacements
232
+ result_text = text
233
+ mapping = {}
234
+ for e in reversed(entities):
235
+ result_text = result_text[:e["start"]] + e["replacement"] + result_text[e["end"]:]
236
+ mapping[e["original"]] = e["replacement"]
237
+
238
+ return RedactionResult(result_text, entities, mapping)
239
+
240
+
241
+ # ---------------------------------------------------------------------------
242
+ # Session-level redaction
243
+ # ---------------------------------------------------------------------------
244
+
245
+
246
+ def redact_session(parsed: dict[str, Any], mode: str = "regex") -> dict[str, Any]:
247
+ """Redact a parsed session dict before sync.
248
+
249
+ Redacts:
250
+ - All message content (user + assistant)
251
+ - File paths (replace usernames)
252
+ - First message / summary
253
+
254
+ Returns a new dict (doesn't mutate input).
255
+ """
256
+ import copy
257
+ result = copy.deepcopy(parsed)
258
+
259
+ total_redacted = 0
260
+
261
+ # Redact messages
262
+ for msg in result.get("messages", []):
263
+ if msg.get("content"):
264
+ r = redact_text(msg["content"], mode=mode)
265
+ msg["content"] = r.text
266
+ total_redacted += r.redacted_count
267
+
268
+ # Redact first_message
269
+ if result.get("first_message"):
270
+ r = redact_text(result["first_message"], mode=mode)
271
+ result["first_message"] = r.text
272
+ total_redacted += r.redacted_count
273
+
274
+ # Redact summary
275
+ if result.get("summary"):
276
+ r = redact_text(result["summary"], mode=mode)
277
+ result["summary"] = r.text
278
+ total_redacted += r.redacted_count
279
+
280
+ # Redact file paths (just home dir usernames)
281
+ if result.get("files_touched"):
282
+ result["files_touched"] = [
283
+ re.sub(r"/(?:home|Users)/[^/]+", "/home/user", f)
284
+ for f in result["files_touched"]
285
+ ]
286
+
287
+ result["_redaction"] = {
288
+ "mode": mode,
289
+ "entities_redacted": total_redacted,
290
+ }
291
+
292
+ return result
293
+
294
+
295
+ # ---------------------------------------------------------------------------
296
+ # Redaction policy config
297
+ # ---------------------------------------------------------------------------
298
+
299
+
300
+ class RedactionPolicy:
301
+ """Redaction policy: determines if/how to redact based on config."""
302
+
303
+ def __init__(
304
+ self,
305
+ enabled: bool = False,
306
+ mode: str = "regex", # "regex" or "ner"
307
+ force: bool = False, # org-level forced redaction
308
+ ):
309
+ self.enabled = enabled
310
+ self.mode = mode
311
+ self.force = force
312
+
313
+ @classmethod
314
+ def from_config(cls, config: dict) -> "RedactionPolicy":
315
+ """Load policy from session config or org settings."""
316
+ redaction = config.get("redaction", {})
317
+ return cls(
318
+ enabled=redaction.get("enabled", False),
319
+ mode=redaction.get("mode", "regex"),
320
+ force=redaction.get("force", False),
321
+ )
322
+
323
+ @classmethod
324
+ def from_org_settings(cls, org_settings: dict) -> "RedactionPolicy":
325
+ """Load policy from organization-level settings."""
326
+ if org_settings.get("force_session_redaction"):
327
+ return cls(enabled=True, mode=org_settings.get("redaction_mode", "regex"), force=True)
328
+ return cls(enabled=False)
329
+
330
+ def should_redact(self, user_choice: bool | None = None) -> bool:
331
+ """Determine if redaction should be applied.
332
+
333
+ Priority: org force > user explicit choice > config default.
334
+ """
335
+ if self.force:
336
+ return True
337
+ if user_choice is not None:
338
+ return user_choice
339
+ return self.enabled
@@ -1188,15 +1188,7 @@ class Run:
1188
1188
  Returns:
1189
1189
  Path to the local artifact directory, or None on failure.
1190
1190
  """
1191
- if not self._client:
1192
- return None
1193
- return self._client.download_artifact(
1194
- run_id=self._run_id,
1195
- artifact_name=name,
1196
- dest_dir=dest_dir,
1197
- version=version,
1198
- alias=alias,
1199
- )
1191
+ return self.use_artifact(name, version=version, alias=alias)
1200
1192
 
1201
1193
  def link_model(
1202
1194
  self,
@@ -418,15 +418,52 @@ def sync_session_to_openrunner(
418
418
  project: str | None = None,
419
419
  api_key: str | None = None,
420
420
  base_url: str | None = None,
421
+ redact: bool | None = None,
422
+ redact_mode: str | None = None,
423
+ visibility: str = "private",
421
424
  ) -> str | None:
422
425
  """Upload a parsed session to OpenRunner as a run with notes.
423
426
 
427
+ Args:
428
+ redact: Force redaction on/off. None = use config/org policy.
429
+ redact_mode: "regex" (fast) or "ner" (ML-based, needs transformers).
430
+ visibility: "public" or "private".
431
+
424
432
  Returns the run ID on success, None on failure.
425
433
  """
426
434
  from openrunner.api_client import APIClient
435
+ from openrunner.redact import RedactionPolicy, redact_session
427
436
 
428
437
  # Load config (cascade: args > env > session_config > settings)
429
438
  config = get_session_config()
439
+
440
+ # Apply redaction policy (local config + server org policy)
441
+ policy = RedactionPolicy.from_config(config)
442
+
443
+ # Fetch server-side policy if not explicitly overridden
444
+ if redact is None and api_key and base_url:
445
+ try:
446
+ client_check = APIClient(base_url=base_url, api_key=api_key)
447
+ resp = client_check._request("get", "/users/me/settings/redaction")
448
+ if resp.status_code == 200:
449
+ server_policy = resp.json().get("effective", {})
450
+ if server_policy.get("forced") or server_policy.get("enabled"):
451
+ policy = RedactionPolicy(
452
+ enabled=True,
453
+ mode=server_policy.get("mode", "regex"),
454
+ force=server_policy.get("forced", False),
455
+ )
456
+ client_check.close()
457
+ except Exception:
458
+ pass # Offline or server doesn't support — use local config
459
+
460
+ if policy.should_redact(redact):
461
+ mode = redact_mode or policy.mode
462
+ parsed = redact_session(parsed, mode=mode)
463
+ redaction_info = parsed.get("_redaction", {})
464
+ if redaction_info.get("entities_redacted", 0) > 0:
465
+ logger.info(f"Redacted {redaction_info['entities_redacted']} sensitive entities ({mode} mode)")
466
+
430
467
  api_key = api_key or config.get("api_key")
431
468
  base_url = base_url or config.get("base_url")
432
469
 
@@ -457,11 +494,17 @@ def sync_session_to_openrunner(
457
494
  "tools_used": parsed.get("tools_used", []),
458
495
  "total_tokens": parsed.get("total_tokens", 0),
459
496
  },
460
- "tags": [f"source:{source}", "ai-session"],
497
+ "tags": [f"source:{source}", "ai-session", f"visibility:{visibility}"],
461
498
  "notes": _format_session_notes(parsed),
462
499
  "state": "finished",
463
500
  }
464
501
 
502
+ # Add redaction metadata if applied
503
+ if parsed.get("_redaction"):
504
+ run_data["config"]["redacted"] = True
505
+ run_data["config"]["redaction_mode"] = parsed["_redaction"]["mode"]
506
+ run_data["config"]["entities_redacted"] = parsed["_redaction"]["entities_redacted"]
507
+
465
508
  result = client.create_run(run_data)
466
509
  if not result:
467
510
  logger.warning("Failed to create session run")
@@ -590,6 +633,9 @@ def sync_all(
590
633
  project: str | None = None,
591
634
  dry_run: bool = False,
592
635
  directory: Path | None = None,
636
+ redact: bool | None = None,
637
+ redact_mode: str | None = None,
638
+ visibility: str = "private",
593
639
  ) -> list[str]:
594
640
  """Discover and sync all new sessions. Returns list of synced run IDs."""
595
641
  state = _load_sync_state()
@@ -612,7 +658,7 @@ def sync_all(
612
658
  parsed = parse_generic_session(session_info["path"], session_info["source"])
613
659
 
614
660
  # Sync
615
- run_id = sync_session_to_openrunner(parsed, project=project)
661
+ run_id = sync_session_to_openrunner(parsed, project=project, redact=redact, redact_mode=redact_mode, visibility=visibility)
616
662
  if run_id:
617
663
  state["synced"][h] = {
618
664
  "run_id": run_id,
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "openrunner-sdk"
3
- version = "2.7.0"
3
+ version = "2.7.1"
4
4
  description = "OpenRunner SDK - W&B-compatible ML experiment tracking client"
5
5
  readme = "README.md"
6
6
  license = {text = "MIT"}
File without changes
File without changes
File without changes