openrunner-sdk 2.4.0__tar.gz → 2.4.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (115) hide show
  1. {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.2}/PKG-INFO +3 -1
  2. {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.2}/openrunner/__init__.py +1 -0
  3. {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.2}/openrunner/api_client.py +21 -11
  4. {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.2}/openrunner/run.py +20 -7
  5. openrunner_sdk-2.4.2/openrunner/wer.py +232 -0
  6. {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.2}/pyproject.toml +2 -1
  7. {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.2}/.gitignore +0 -0
  8. {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.2}/=6.0 +0 -0
  9. {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.2}/=8.1 +0 -0
  10. {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.2}/README.md +0 -0
  11. {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.2}/openrunner/artifact.py +0 -0
  12. {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.2}/openrunner/buffer.py +0 -0
  13. {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.2}/openrunner/cache.py +0 -0
  14. {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.2}/openrunner/cli.py +0 -0
  15. {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.2}/openrunner/config.py +0 -0
  16. {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.2}/openrunner/cost.py +0 -0
  17. {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.2}/openrunner/dataset.py +0 -0
  18. {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.2}/openrunner/environment.py +0 -0
  19. {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.2}/openrunner/evaluation.py +0 -0
  20. {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.2}/openrunner/feedback.py +0 -0
  21. {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.2}/openrunner/git_info.py +0 -0
  22. {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.2}/openrunner/guardrails.py +0 -0
  23. {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.2}/openrunner/integration/__init__.py +0 -0
  24. {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.2}/openrunner/integration/accelerate.py +0 -0
  25. {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.2}/openrunner/integration/anthropic_tracer.py +0 -0
  26. {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.2}/openrunner/integration/catboost.py +0 -0
  27. {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.2}/openrunner/integration/diffusers.py +0 -0
  28. {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.2}/openrunner/integration/fastai.py +0 -0
  29. {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.2}/openrunner/integration/forced_alignment.py +0 -0
  30. {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.2}/openrunner/integration/gladia.py +0 -0
  31. {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.2}/openrunner/integration/gymnasium.py +0 -0
  32. {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.2}/openrunner/integration/huggingface.py +0 -0
  33. {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.2}/openrunner/integration/hydra.py +0 -0
  34. {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.2}/openrunner/integration/ignite.py +0 -0
  35. {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.2}/openrunner/integration/jax.py +0 -0
  36. {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.2}/openrunner/integration/keras.py +0 -0
  37. {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.2}/openrunner/integration/langchain.py +0 -0
  38. {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.2}/openrunner/integration/lightgbm.py +0 -0
  39. {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.2}/openrunner/integration/lightning.py +0 -0
  40. {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.2}/openrunner/integration/llamaindex.py +0 -0
  41. {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.2}/openrunner/integration/openai_finetune.py +0 -0
  42. {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.2}/openrunner/integration/openai_tracer.py +0 -0
  43. {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.2}/openrunner/integration/optuna.py +0 -0
  44. {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.2}/openrunner/integration/pytorch.py +0 -0
  45. {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.2}/openrunner/integration/sb3.py +0 -0
  46. {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.2}/openrunner/integration/sklearn.py +0 -0
  47. {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.2}/openrunner/integration/tensorflow.py +0 -0
  48. {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.2}/openrunner/integration/trl.py +0 -0
  49. {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.2}/openrunner/integration/tts.py +0 -0
  50. {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.2}/openrunner/integration/ultralytics.py +0 -0
  51. {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.2}/openrunner/integration/voice_agent.py +0 -0
  52. {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.2}/openrunner/integration/whisper.py +0 -0
  53. {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.2}/openrunner/integration/xgboost.py +0 -0
  54. {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.2}/openrunner/launch.py +0 -0
  55. {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.2}/openrunner/media.py +0 -0
  56. {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.2}/openrunner/migrate.py +0 -0
  57. {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.2}/openrunner/model.py +0 -0
  58. {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.2}/openrunner/offline.py +0 -0
  59. {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.2}/openrunner/pii.py +0 -0
  60. {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.2}/openrunner/plot.py +0 -0
  61. {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.2}/openrunner/prompt.py +0 -0
  62. {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.2}/openrunner/query_api.py +0 -0
  63. {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.2}/openrunner/scorers.py +0 -0
  64. {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.2}/openrunner/sender.py +0 -0
  65. {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.2}/openrunner/settings.py +0 -0
  66. {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.2}/openrunner/summary.py +0 -0
  67. {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.2}/openrunner/sweep.py +0 -0
  68. {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.2}/openrunner/system_metrics.py +0 -0
  69. {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.2}/openrunner/tensorboard.py +0 -0
  70. {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.2}/openrunner/trace.py +0 -0
  71. {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.2}/openrunner/transcript_formatter.py +0 -0
  72. {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.2}/openrunner/wal.py +0 -0
  73. {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.2}/openrunner/wandb_compat/__init__.py +0 -0
  74. {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.2}/openrunner/wandb_compat/_shim.py +0 -0
  75. {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.2}/tests/__init__.py +0 -0
  76. {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.2}/tests/conftest.py +0 -0
  77. {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.2}/tests/test_alert.py +0 -0
  78. {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.2}/tests/test_aliases.py +0 -0
  79. {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.2}/tests/test_api_client.py +0 -0
  80. {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.2}/tests/test_artifact.py +0 -0
  81. {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.2}/tests/test_buffer.py +0 -0
  82. {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.2}/tests/test_cache.py +0 -0
  83. {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.2}/tests/test_class_scorers.py +0 -0
  84. {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.2}/tests/test_cli.py +0 -0
  85. {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.2}/tests/test_config.py +0 -0
  86. {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.2}/tests/test_evaluation.py +0 -0
  87. {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.2}/tests/test_finish.py +0 -0
  88. {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.2}/tests/test_git_info.py +0 -0
  89. {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.2}/tests/test_init.py +0 -0
  90. {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.2}/tests/test_integration_fastai.py +0 -0
  91. {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.2}/tests/test_integration_huggingface.py +0 -0
  92. {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.2}/tests/test_integration_keras.py +0 -0
  93. {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.2}/tests/test_integration_langchain.py +0 -0
  94. {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.2}/tests/test_integration_lightning.py +0 -0
  95. {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.2}/tests/test_integration_pytorch.py +0 -0
  96. {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.2}/tests/test_integration_sklearn.py +0 -0
  97. {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.2}/tests/test_integration_xgboost.py +0 -0
  98. {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.2}/tests/test_launch.py +0 -0
  99. {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.2}/tests/test_log.py +0 -0
  100. {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.2}/tests/test_log_code.py +0 -0
  101. {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.2}/tests/test_media.py +0 -0
  102. {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.2}/tests/test_migrate.py +0 -0
  103. {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.2}/tests/test_offline.py +0 -0
  104. {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.2}/tests/test_offline_sync.py +0 -0
  105. {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.2}/tests/test_pii.py +0 -0
  106. {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.2}/tests/test_plot.py +0 -0
  107. {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.2}/tests/test_query_api.py +0 -0
  108. {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.2}/tests/test_resume.py +0 -0
  109. {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.2}/tests/test_sdk_features.py +0 -0
  110. {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.2}/tests/test_sender.py +0 -0
  111. {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.2}/tests/test_summary.py +0 -0
  112. {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.2}/tests/test_sweep.py +0 -0
  113. {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.2}/tests/test_system_metrics.py +0 -0
  114. {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.2}/tests/test_trace.py +0 -0
  115. {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.2}/tests/test_wandb_compat.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: openrunner-sdk
3
- Version: 2.4.0
3
+ Version: 2.4.2
4
4
  Summary: OpenRunner SDK - W&B-compatible ML experiment tracking client
5
5
  Project-URL: Homepage, https://github.com/jqueguiner/openrunner
6
6
  Project-URL: Repository, https://github.com/jqueguiner/openrunner
@@ -82,6 +82,8 @@ Requires-Dist: numpy>=1.24; extra == 'tts'
82
82
  Provides-Extra: ultralytics
83
83
  Requires-Dist: ultralytics>=8.0; extra == 'ultralytics'
84
84
  Provides-Extra: voice-agent
85
+ Provides-Extra: wer
86
+ Requires-Dist: num2words2>=0.1; extra == 'wer'
85
87
  Provides-Extra: whisper
86
88
  Requires-Dist: openai-whisper>=20231117; extra == 'whisper'
87
89
  Provides-Extra: xgboost
@@ -97,6 +97,7 @@ from openrunner.settings import SDKSettings
97
97
  from openrunner.summary import Summary
98
98
  from openrunner.sweep import agent, sweep
99
99
  from openrunner.evaluation import EvaluationLogger, Scorer, evaluate, scorer
100
+ from openrunner.wer import WERScorer, compute_wer, compute_wer_batch
100
101
  from openrunner.guardrails import (
101
102
  GuardrailCheckResult,
102
103
  GuardrailResult,
@@ -542,17 +542,27 @@ class APIClient:
542
542
  continue
543
543
 
544
544
  # Try presigned URL first, fall back to proxy
545
- data = self.download_file_from_presigned_url(url)
546
- if data is None and "localhost" in url:
547
- # Try proxy
548
- key = f.get("storage_key") or f.get("key")
549
- if key:
550
- try:
551
- resp = self._request("GET", f"/storage/download?key={key}")
552
- if resp.status_code == 200:
553
- data = resp.content
554
- except Exception:
555
- pass
545
+ if url.startswith("/"):
546
+ # Relative proxy URL use authenticated client
547
+ api_path = url.replace("/api/v1/", "/", 1) if url.startswith("/api/v1/") else url
548
+ try:
549
+ resp = self._request("get", api_path)
550
+ if resp.status_code == 200:
551
+ data = resp.content
552
+ except Exception:
553
+ pass
554
+ else:
555
+ data = self.download_file_from_presigned_url(url)
556
+ if data is None:
557
+ # Try proxy with storage_key
558
+ key = f.get("storage_key") or f.get("key")
559
+ if key:
560
+ try:
561
+ resp = self._request("get", f"/storage/download?key={key}")
562
+ if resp.status_code == 200:
563
+ data = resp.content
564
+ except Exception:
565
+ pass
556
566
 
557
567
  if data:
558
568
  file_path = out_dir / fname
@@ -940,10 +940,11 @@ class Run:
940
940
  artifact_dir.mkdir(parents=True, exist_ok=True)
941
941
 
942
942
  for file_info in result.get("files", []):
943
- content_hash = file_info.get("content_hash", "")
944
- cached = self._artifact_cache.get(content_hash)
943
+ content_hash = file_info.get("content_hash") or ""
944
+ file_path = file_info.get("name") or file_info.get("path") or "file"
945
+ cached = self._artifact_cache.get(content_hash) if content_hash else None
945
946
 
946
- dest = artifact_dir / file_info["path"]
947
+ dest = artifact_dir / file_path
947
948
  dest.parent.mkdir(parents=True, exist_ok=True)
948
949
 
949
950
  if cached:
@@ -954,11 +955,23 @@ class Run:
954
955
  shutil.copy2(str(cached), str(dest))
955
956
  else:
956
957
  # Download and cache
957
- data = self._client.download_file_from_presigned_url(
958
- file_info["presigned_url"]
959
- )
958
+ url = file_info.get("download_url") or file_info.get("presigned_url", "")
959
+ data = None
960
+ if url.startswith("/"):
961
+ # Relative proxy URL — use authenticated client
962
+ # Strip /api/v1/ prefix since _request prepends it via base_url
963
+ api_path = url.replace("/api/v1/", "/", 1) if url.startswith("/api/v1/") else url
964
+ try:
965
+ resp = self._client._request("get", api_path)
966
+ if resp.status_code == 200:
967
+ data = resp.content
968
+ except Exception:
969
+ pass
970
+ else:
971
+ data = self._client.download_file_from_presigned_url(url)
960
972
  if data:
961
- self._artifact_cache.put(content_hash, data)
973
+ if content_hash:
974
+ self._artifact_cache.put(content_hash, data)
962
975
  dest.write_bytes(data)
963
976
 
964
977
  return artifact_dir
@@ -0,0 +1,232 @@
1
+ """Word Error Rate (WER) computation with num2words2 normalization.
2
+
3
+ Normalizes numbers to words before comparison so "50" vs "fifty" aren't
4
+ counted as substitution errors. Uses num2words2 (modern fork optimized
5
+ for LLM/AI/speech applications).
6
+
7
+ Install: pip install num2words2
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import re
13
+ from typing import Any
14
+
15
+
16
+ def _normalize_text(text: str, language: str = "en") -> str:
17
+ """Normalize text for WER: lowercase, expand numbers, strip punctuation."""
18
+ text = text.lower().strip()
19
+
20
+ # Expand numbers to words using num2words2
21
+ try:
22
+ from num2words2 import num2words
23
+
24
+ def _expand_number(match: re.Match) -> str:
25
+ num_str = match.group(0)
26
+ try:
27
+ # Handle decimals
28
+ if "." in num_str:
29
+ return num2words(float(num_str), lang=language)
30
+ # Handle integers
31
+ return num2words(int(num_str), lang=language)
32
+ except (ValueError, OverflowError):
33
+ return num_str
34
+
35
+ # Match numbers (integers, decimals, negatives)
36
+ text = re.sub(r"-?\d+\.?\d*", _expand_number, text)
37
+
38
+ except ImportError:
39
+ pass # num2words2 not installed — skip normalization
40
+
41
+ # Expand common currency symbols
42
+ text = re.sub(r"\$\s*", "dollars ", text)
43
+ text = re.sub(r"€\s*", "euros ", text)
44
+ text = re.sub(r"£\s*", "pounds ", text)
45
+ text = re.sub(r"%", " percent", text)
46
+
47
+ # Strip punctuation (keep hyphens inside words for compound words)
48
+ text = re.sub(r"[^\w\s-]", " ", text)
49
+ # Collapse whitespace
50
+ text = re.sub(r"\s+", " ", text).strip()
51
+
52
+ return text
53
+
54
+
55
+ def compute_wer(
56
+ reference: str,
57
+ hypothesis: str,
58
+ normalize: bool = True,
59
+ language: str = "en",
60
+ ) -> dict[str, Any]:
61
+ """Compute Word Error Rate between reference and hypothesis.
62
+
63
+ Args:
64
+ reference: Ground truth transcription
65
+ hypothesis: Model prediction
66
+ normalize: If True, expand numbers with num2words2 before comparing
67
+ language: Language code for num2words2 (en, es, fr, de, etc.)
68
+
69
+ Returns:
70
+ Dict with: wer, substitutions, insertions, deletions, ref_words, hyp_words
71
+ """
72
+ if normalize:
73
+ ref = _normalize_text(reference, language)
74
+ hyp = _normalize_text(hypothesis, language)
75
+ else:
76
+ ref = reference.lower().strip()
77
+ hyp = hypothesis.lower().strip()
78
+
79
+ ref_words = ref.split()
80
+ hyp_words = hyp.split()
81
+
82
+ # Edit distance DP
83
+ m, n = len(ref_words), len(hyp_words)
84
+ dp = [[0] * (n + 1) for _ in range(m + 1)]
85
+ for i in range(m + 1):
86
+ dp[i][0] = i
87
+ for j in range(n + 1):
88
+ dp[0][j] = j
89
+ for i in range(1, m + 1):
90
+ for j in range(1, n + 1):
91
+ if ref_words[i - 1] == hyp_words[j - 1]:
92
+ dp[i][j] = dp[i - 1][j - 1]
93
+ else:
94
+ dp[i][j] = 1 + min(
95
+ dp[i - 1][j - 1], # substitution
96
+ dp[i - 1][j], # deletion
97
+ dp[i][j - 1], # insertion
98
+ )
99
+
100
+ # Backtrace for error counts
101
+ subs, dels, ins = 0, 0, 0
102
+ i, j = m, n
103
+ while i > 0 or j > 0:
104
+ if i > 0 and j > 0 and ref_words[i - 1] == hyp_words[j - 1]:
105
+ i -= 1
106
+ j -= 1
107
+ elif i > 0 and j > 0 and dp[i][j] == dp[i - 1][j - 1] + 1:
108
+ subs += 1
109
+ i -= 1
110
+ j -= 1
111
+ elif i > 0 and dp[i][j] == dp[i - 1][j] + 1:
112
+ dels += 1
113
+ i -= 1
114
+ else:
115
+ ins += 1
116
+ j -= 1
117
+
118
+ total_errors = subs + dels + ins
119
+ wer_score = total_errors / max(len(ref_words), 1)
120
+
121
+ return {
122
+ "wer": round(wer_score, 4),
123
+ "substitutions": subs,
124
+ "insertions": ins,
125
+ "deletions": dels,
126
+ "errors": total_errors,
127
+ "ref_words": len(ref_words),
128
+ "hyp_words": len(hyp_words),
129
+ }
130
+
131
+
132
+ def compute_wer_batch(
133
+ references: list[str],
134
+ hypotheses: list[str],
135
+ normalize: bool = True,
136
+ language: str = "en",
137
+ ) -> dict[str, Any]:
138
+ """Compute WER across a batch of reference/hypothesis pairs.
139
+
140
+ Returns aggregate metrics + per-example breakdown.
141
+ """
142
+ total_errors = 0
143
+ total_ref_words = 0
144
+ total_subs = 0
145
+ total_ins = 0
146
+ total_dels = 0
147
+ examples = []
148
+
149
+ for ref, hyp in zip(references, hypotheses):
150
+ result = compute_wer(ref, hyp, normalize=normalize, language=language)
151
+ total_errors += result["errors"]
152
+ total_ref_words += result["ref_words"]
153
+ total_subs += result["substitutions"]
154
+ total_ins += result["insertions"]
155
+ total_dels += result["deletions"]
156
+ examples.append(result)
157
+
158
+ wer_score = total_errors / max(total_ref_words, 1)
159
+
160
+ return {
161
+ "wer": round(wer_score, 4),
162
+ "substitutions": total_subs,
163
+ "insertions": total_ins,
164
+ "deletions": total_dels,
165
+ "errors": total_errors,
166
+ "ref_words": total_ref_words,
167
+ "n_examples": len(references),
168
+ "examples": examples,
169
+ }
170
+
171
+
172
+ # ---------------------------------------------------------------------------
173
+ # Scorer integration for openrunner.evaluate()
174
+ # ---------------------------------------------------------------------------
175
+
176
+ from openrunner.evaluation import Scorer
177
+
178
+
179
+ class WERScorer(Scorer):
180
+ """Word Error Rate scorer for evaluation framework.
181
+
182
+ Uses num2words2 to normalize numbers before comparison.
183
+
184
+ Args:
185
+ normalize: Expand numbers to words (default True)
186
+ language: Language for number expansion (default "en")
187
+ ref_key: Key in the example dict for ground truth (default "expected")
188
+ hyp_key: Key in the output for hypothesis (default: uses output directly)
189
+
190
+ Example:
191
+ results = openrunner.evaluate(
192
+ model_fn=my_asr_model,
193
+ dataset=[{"input": audio, "expected": "fifty dollars"}],
194
+ scorers=[WERScorer(language="en")],
195
+ )
196
+ """
197
+
198
+ def __init__(
199
+ self,
200
+ normalize: bool = True,
201
+ language: str = "en",
202
+ ):
203
+ self.normalize = normalize
204
+ self.language = language
205
+
206
+ def score(self, output: Any, expected: Any, **kwargs) -> dict:
207
+ ref = str(expected) if expected else ""
208
+ hyp = str(output) if output else ""
209
+ result = compute_wer(ref, hyp, normalize=self.normalize, language=self.language)
210
+ return {
211
+ "wer": result["wer"],
212
+ "substitutions": result["substitutions"],
213
+ "insertions": result["insertions"],
214
+ "deletions": result["deletions"],
215
+ }
216
+
217
+ def summarize(self, scores: list[dict]) -> dict:
218
+ """Aggregate WER across all examples (corpus-level)."""
219
+ total_errors = sum(s["substitutions"] + s["insertions"] + s["deletions"] for s in scores)
220
+ # Approximate ref_words from individual WERs
221
+ total_ref = sum(
222
+ round((s["substitutions"] + s["insertions"] + s["deletions"]) / max(s["wer"], 1e-9))
223
+ if s["wer"] > 0 else 10
224
+ for s in scores
225
+ )
226
+ return {
227
+ "wer": round(total_errors / max(total_ref, 1), 4),
228
+ "mean_wer": round(sum(s["wer"] for s in scores) / max(len(scores), 1), 4),
229
+ "total_substitutions": sum(s["substitutions"] for s in scores),
230
+ "total_insertions": sum(s["insertions"] for s in scores),
231
+ "total_deletions": sum(s["deletions"] for s in scores),
232
+ }
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "openrunner-sdk"
3
- version = "2.4.0"
3
+ version = "2.4.2"
4
4
  description = "OpenRunner SDK - W&B-compatible ML experiment tracking client"
5
5
  readme = "README.md"
6
6
  license = {text = "MIT"}
@@ -30,6 +30,7 @@ Issues = "https://github.com/jqueguiner/openrunner/issues"
30
30
  openrunner = "openrunner.cli:main"
31
31
 
32
32
  [project.optional-dependencies]
33
+ wer = ["num2words2>=0.1"]
33
34
  gpu = ["nvidia-ml-py>=12.0"]
34
35
  pytorch = ["torch>=2.0"]
35
36
  huggingface = ["transformers>=4.30"]
File without changes
File without changes
File without changes