openrunner-sdk 2.4.0__tar.gz → 2.4.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.1}/PKG-INFO +3 -1
- {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.1}/openrunner/__init__.py +1 -0
- {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.1}/openrunner/run.py +18 -7
- openrunner_sdk-2.4.1/openrunner/wer.py +232 -0
- {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.1}/pyproject.toml +2 -1
- {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.1}/.gitignore +0 -0
- {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.1}/=6.0 +0 -0
- {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.1}/=8.1 +0 -0
- {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.1}/README.md +0 -0
- {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.1}/openrunner/api_client.py +0 -0
- {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.1}/openrunner/artifact.py +0 -0
- {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.1}/openrunner/buffer.py +0 -0
- {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.1}/openrunner/cache.py +0 -0
- {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.1}/openrunner/cli.py +0 -0
- {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.1}/openrunner/config.py +0 -0
- {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.1}/openrunner/cost.py +0 -0
- {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.1}/openrunner/dataset.py +0 -0
- {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.1}/openrunner/environment.py +0 -0
- {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.1}/openrunner/evaluation.py +0 -0
- {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.1}/openrunner/feedback.py +0 -0
- {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.1}/openrunner/git_info.py +0 -0
- {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.1}/openrunner/guardrails.py +0 -0
- {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.1}/openrunner/integration/__init__.py +0 -0
- {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.1}/openrunner/integration/accelerate.py +0 -0
- {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.1}/openrunner/integration/anthropic_tracer.py +0 -0
- {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.1}/openrunner/integration/catboost.py +0 -0
- {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.1}/openrunner/integration/diffusers.py +0 -0
- {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.1}/openrunner/integration/fastai.py +0 -0
- {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.1}/openrunner/integration/forced_alignment.py +0 -0
- {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.1}/openrunner/integration/gladia.py +0 -0
- {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.1}/openrunner/integration/gymnasium.py +0 -0
- {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.1}/openrunner/integration/huggingface.py +0 -0
- {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.1}/openrunner/integration/hydra.py +0 -0
- {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.1}/openrunner/integration/ignite.py +0 -0
- {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.1}/openrunner/integration/jax.py +0 -0
- {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.1}/openrunner/integration/keras.py +0 -0
- {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.1}/openrunner/integration/langchain.py +0 -0
- {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.1}/openrunner/integration/lightgbm.py +0 -0
- {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.1}/openrunner/integration/lightning.py +0 -0
- {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.1}/openrunner/integration/llamaindex.py +0 -0
- {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.1}/openrunner/integration/openai_finetune.py +0 -0
- {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.1}/openrunner/integration/openai_tracer.py +0 -0
- {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.1}/openrunner/integration/optuna.py +0 -0
- {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.1}/openrunner/integration/pytorch.py +0 -0
- {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.1}/openrunner/integration/sb3.py +0 -0
- {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.1}/openrunner/integration/sklearn.py +0 -0
- {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.1}/openrunner/integration/tensorflow.py +0 -0
- {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.1}/openrunner/integration/trl.py +0 -0
- {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.1}/openrunner/integration/tts.py +0 -0
- {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.1}/openrunner/integration/ultralytics.py +0 -0
- {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.1}/openrunner/integration/voice_agent.py +0 -0
- {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.1}/openrunner/integration/whisper.py +0 -0
- {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.1}/openrunner/integration/xgboost.py +0 -0
- {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.1}/openrunner/launch.py +0 -0
- {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.1}/openrunner/media.py +0 -0
- {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.1}/openrunner/migrate.py +0 -0
- {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.1}/openrunner/model.py +0 -0
- {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.1}/openrunner/offline.py +0 -0
- {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.1}/openrunner/pii.py +0 -0
- {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.1}/openrunner/plot.py +0 -0
- {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.1}/openrunner/prompt.py +0 -0
- {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.1}/openrunner/query_api.py +0 -0
- {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.1}/openrunner/scorers.py +0 -0
- {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.1}/openrunner/sender.py +0 -0
- {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.1}/openrunner/settings.py +0 -0
- {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.1}/openrunner/summary.py +0 -0
- {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.1}/openrunner/sweep.py +0 -0
- {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.1}/openrunner/system_metrics.py +0 -0
- {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.1}/openrunner/tensorboard.py +0 -0
- {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.1}/openrunner/trace.py +0 -0
- {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.1}/openrunner/transcript_formatter.py +0 -0
- {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.1}/openrunner/wal.py +0 -0
- {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.1}/openrunner/wandb_compat/__init__.py +0 -0
- {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.1}/openrunner/wandb_compat/_shim.py +0 -0
- {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.1}/tests/__init__.py +0 -0
- {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.1}/tests/conftest.py +0 -0
- {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.1}/tests/test_alert.py +0 -0
- {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.1}/tests/test_aliases.py +0 -0
- {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.1}/tests/test_api_client.py +0 -0
- {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.1}/tests/test_artifact.py +0 -0
- {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.1}/tests/test_buffer.py +0 -0
- {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.1}/tests/test_cache.py +0 -0
- {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.1}/tests/test_class_scorers.py +0 -0
- {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.1}/tests/test_cli.py +0 -0
- {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.1}/tests/test_config.py +0 -0
- {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.1}/tests/test_evaluation.py +0 -0
- {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.1}/tests/test_finish.py +0 -0
- {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.1}/tests/test_git_info.py +0 -0
- {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.1}/tests/test_init.py +0 -0
- {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.1}/tests/test_integration_fastai.py +0 -0
- {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.1}/tests/test_integration_huggingface.py +0 -0
- {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.1}/tests/test_integration_keras.py +0 -0
- {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.1}/tests/test_integration_langchain.py +0 -0
- {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.1}/tests/test_integration_lightning.py +0 -0
- {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.1}/tests/test_integration_pytorch.py +0 -0
- {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.1}/tests/test_integration_sklearn.py +0 -0
- {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.1}/tests/test_integration_xgboost.py +0 -0
- {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.1}/tests/test_launch.py +0 -0
- {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.1}/tests/test_log.py +0 -0
- {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.1}/tests/test_log_code.py +0 -0
- {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.1}/tests/test_media.py +0 -0
- {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.1}/tests/test_migrate.py +0 -0
- {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.1}/tests/test_offline.py +0 -0
- {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.1}/tests/test_offline_sync.py +0 -0
- {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.1}/tests/test_pii.py +0 -0
- {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.1}/tests/test_plot.py +0 -0
- {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.1}/tests/test_query_api.py +0 -0
- {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.1}/tests/test_resume.py +0 -0
- {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.1}/tests/test_sdk_features.py +0 -0
- {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.1}/tests/test_sender.py +0 -0
- {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.1}/tests/test_summary.py +0 -0
- {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.1}/tests/test_sweep.py +0 -0
- {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.1}/tests/test_system_metrics.py +0 -0
- {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.1}/tests/test_trace.py +0 -0
- {openrunner_sdk-2.4.0 → openrunner_sdk-2.4.1}/tests/test_wandb_compat.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: openrunner-sdk
|
|
3
|
-
Version: 2.4.
|
|
3
|
+
Version: 2.4.1
|
|
4
4
|
Summary: OpenRunner SDK - W&B-compatible ML experiment tracking client
|
|
5
5
|
Project-URL: Homepage, https://github.com/jqueguiner/openrunner
|
|
6
6
|
Project-URL: Repository, https://github.com/jqueguiner/openrunner
|
|
@@ -82,6 +82,8 @@ Requires-Dist: numpy>=1.24; extra == 'tts'
|
|
|
82
82
|
Provides-Extra: ultralytics
|
|
83
83
|
Requires-Dist: ultralytics>=8.0; extra == 'ultralytics'
|
|
84
84
|
Provides-Extra: voice-agent
|
|
85
|
+
Provides-Extra: wer
|
|
86
|
+
Requires-Dist: num2words2>=0.1; extra == 'wer'
|
|
85
87
|
Provides-Extra: whisper
|
|
86
88
|
Requires-Dist: openai-whisper>=20231117; extra == 'whisper'
|
|
87
89
|
Provides-Extra: xgboost
|
|
@@ -97,6 +97,7 @@ from openrunner.settings import SDKSettings
|
|
|
97
97
|
from openrunner.summary import Summary
|
|
98
98
|
from openrunner.sweep import agent, sweep
|
|
99
99
|
from openrunner.evaluation import EvaluationLogger, Scorer, evaluate, scorer
|
|
100
|
+
from openrunner.wer import WERScorer, compute_wer, compute_wer_batch
|
|
100
101
|
from openrunner.guardrails import (
|
|
101
102
|
GuardrailCheckResult,
|
|
102
103
|
GuardrailResult,
|
|
@@ -940,10 +940,11 @@ class Run:
|
|
|
940
940
|
artifact_dir.mkdir(parents=True, exist_ok=True)
|
|
941
941
|
|
|
942
942
|
for file_info in result.get("files", []):
|
|
943
|
-
content_hash = file_info.get("content_hash"
|
|
944
|
-
|
|
943
|
+
content_hash = file_info.get("content_hash") or ""
|
|
944
|
+
file_path = file_info.get("name") or file_info.get("path") or "file"
|
|
945
|
+
cached = self._artifact_cache.get(content_hash) if content_hash else None
|
|
945
946
|
|
|
946
|
-
dest = artifact_dir /
|
|
947
|
+
dest = artifact_dir / file_path
|
|
947
948
|
dest.parent.mkdir(parents=True, exist_ok=True)
|
|
948
949
|
|
|
949
950
|
if cached:
|
|
@@ -954,11 +955,21 @@ class Run:
|
|
|
954
955
|
shutil.copy2(str(cached), str(dest))
|
|
955
956
|
else:
|
|
956
957
|
# Download and cache
|
|
957
|
-
|
|
958
|
-
|
|
959
|
-
)
|
|
958
|
+
url = file_info.get("download_url") or file_info.get("presigned_url", "")
|
|
959
|
+
data = None
|
|
960
|
+
if url.startswith("/"):
|
|
961
|
+
# Relative proxy URL — use authenticated client
|
|
962
|
+
try:
|
|
963
|
+
resp = self._client._request("GET", url)
|
|
964
|
+
if resp.status_code == 200:
|
|
965
|
+
data = resp.content
|
|
966
|
+
except Exception:
|
|
967
|
+
pass
|
|
968
|
+
else:
|
|
969
|
+
data = self._client.download_file_from_presigned_url(url)
|
|
960
970
|
if data:
|
|
961
|
-
|
|
971
|
+
if content_hash:
|
|
972
|
+
self._artifact_cache.put(content_hash, data)
|
|
962
973
|
dest.write_bytes(data)
|
|
963
974
|
|
|
964
975
|
return artifact_dir
|
|
@@ -0,0 +1,232 @@
|
|
|
1
|
+
"""Word Error Rate (WER) computation with num2words2 normalization.
|
|
2
|
+
|
|
3
|
+
Normalizes numbers to words before comparison so "50" vs "fifty" aren't
|
|
4
|
+
counted as substitution errors. Uses num2words2 (modern fork optimized
|
|
5
|
+
for LLM/AI/speech applications).
|
|
6
|
+
|
|
7
|
+
Install: pip install num2words2
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
import re
|
|
13
|
+
from typing import Any
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def _normalize_text(text: str, language: str = "en") -> str:
|
|
17
|
+
"""Normalize text for WER: lowercase, expand numbers, strip punctuation."""
|
|
18
|
+
text = text.lower().strip()
|
|
19
|
+
|
|
20
|
+
# Expand numbers to words using num2words2
|
|
21
|
+
try:
|
|
22
|
+
from num2words2 import num2words
|
|
23
|
+
|
|
24
|
+
def _expand_number(match: re.Match) -> str:
|
|
25
|
+
num_str = match.group(0)
|
|
26
|
+
try:
|
|
27
|
+
# Handle decimals
|
|
28
|
+
if "." in num_str:
|
|
29
|
+
return num2words(float(num_str), lang=language)
|
|
30
|
+
# Handle integers
|
|
31
|
+
return num2words(int(num_str), lang=language)
|
|
32
|
+
except (ValueError, OverflowError):
|
|
33
|
+
return num_str
|
|
34
|
+
|
|
35
|
+
# Match numbers (integers, decimals, negatives)
|
|
36
|
+
text = re.sub(r"-?\d+\.?\d*", _expand_number, text)
|
|
37
|
+
|
|
38
|
+
except ImportError:
|
|
39
|
+
pass # num2words2 not installed — skip normalization
|
|
40
|
+
|
|
41
|
+
# Expand common currency symbols
|
|
42
|
+
text = re.sub(r"\$\s*", "dollars ", text)
|
|
43
|
+
text = re.sub(r"€\s*", "euros ", text)
|
|
44
|
+
text = re.sub(r"£\s*", "pounds ", text)
|
|
45
|
+
text = re.sub(r"%", " percent", text)
|
|
46
|
+
|
|
47
|
+
# Strip punctuation (keep hyphens inside words for compound words)
|
|
48
|
+
text = re.sub(r"[^\w\s-]", " ", text)
|
|
49
|
+
# Collapse whitespace
|
|
50
|
+
text = re.sub(r"\s+", " ", text).strip()
|
|
51
|
+
|
|
52
|
+
return text
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def compute_wer(
|
|
56
|
+
reference: str,
|
|
57
|
+
hypothesis: str,
|
|
58
|
+
normalize: bool = True,
|
|
59
|
+
language: str = "en",
|
|
60
|
+
) -> dict[str, Any]:
|
|
61
|
+
"""Compute Word Error Rate between reference and hypothesis.
|
|
62
|
+
|
|
63
|
+
Args:
|
|
64
|
+
reference: Ground truth transcription
|
|
65
|
+
hypothesis: Model prediction
|
|
66
|
+
normalize: If True, expand numbers with num2words2 before comparing
|
|
67
|
+
language: Language code for num2words2 (en, es, fr, de, etc.)
|
|
68
|
+
|
|
69
|
+
Returns:
|
|
70
|
+
Dict with: wer, substitutions, insertions, deletions, ref_words, hyp_words
|
|
71
|
+
"""
|
|
72
|
+
if normalize:
|
|
73
|
+
ref = _normalize_text(reference, language)
|
|
74
|
+
hyp = _normalize_text(hypothesis, language)
|
|
75
|
+
else:
|
|
76
|
+
ref = reference.lower().strip()
|
|
77
|
+
hyp = hypothesis.lower().strip()
|
|
78
|
+
|
|
79
|
+
ref_words = ref.split()
|
|
80
|
+
hyp_words = hyp.split()
|
|
81
|
+
|
|
82
|
+
# Edit distance DP
|
|
83
|
+
m, n = len(ref_words), len(hyp_words)
|
|
84
|
+
dp = [[0] * (n + 1) for _ in range(m + 1)]
|
|
85
|
+
for i in range(m + 1):
|
|
86
|
+
dp[i][0] = i
|
|
87
|
+
for j in range(n + 1):
|
|
88
|
+
dp[0][j] = j
|
|
89
|
+
for i in range(1, m + 1):
|
|
90
|
+
for j in range(1, n + 1):
|
|
91
|
+
if ref_words[i - 1] == hyp_words[j - 1]:
|
|
92
|
+
dp[i][j] = dp[i - 1][j - 1]
|
|
93
|
+
else:
|
|
94
|
+
dp[i][j] = 1 + min(
|
|
95
|
+
dp[i - 1][j - 1], # substitution
|
|
96
|
+
dp[i - 1][j], # deletion
|
|
97
|
+
dp[i][j - 1], # insertion
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
# Backtrace for error counts
|
|
101
|
+
subs, dels, ins = 0, 0, 0
|
|
102
|
+
i, j = m, n
|
|
103
|
+
while i > 0 or j > 0:
|
|
104
|
+
if i > 0 and j > 0 and ref_words[i - 1] == hyp_words[j - 1]:
|
|
105
|
+
i -= 1
|
|
106
|
+
j -= 1
|
|
107
|
+
elif i > 0 and j > 0 and dp[i][j] == dp[i - 1][j - 1] + 1:
|
|
108
|
+
subs += 1
|
|
109
|
+
i -= 1
|
|
110
|
+
j -= 1
|
|
111
|
+
elif i > 0 and dp[i][j] == dp[i - 1][j] + 1:
|
|
112
|
+
dels += 1
|
|
113
|
+
i -= 1
|
|
114
|
+
else:
|
|
115
|
+
ins += 1
|
|
116
|
+
j -= 1
|
|
117
|
+
|
|
118
|
+
total_errors = subs + dels + ins
|
|
119
|
+
wer_score = total_errors / max(len(ref_words), 1)
|
|
120
|
+
|
|
121
|
+
return {
|
|
122
|
+
"wer": round(wer_score, 4),
|
|
123
|
+
"substitutions": subs,
|
|
124
|
+
"insertions": ins,
|
|
125
|
+
"deletions": dels,
|
|
126
|
+
"errors": total_errors,
|
|
127
|
+
"ref_words": len(ref_words),
|
|
128
|
+
"hyp_words": len(hyp_words),
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
def compute_wer_batch(
|
|
133
|
+
references: list[str],
|
|
134
|
+
hypotheses: list[str],
|
|
135
|
+
normalize: bool = True,
|
|
136
|
+
language: str = "en",
|
|
137
|
+
) -> dict[str, Any]:
|
|
138
|
+
"""Compute WER across a batch of reference/hypothesis pairs.
|
|
139
|
+
|
|
140
|
+
Returns aggregate metrics + per-example breakdown.
|
|
141
|
+
"""
|
|
142
|
+
total_errors = 0
|
|
143
|
+
total_ref_words = 0
|
|
144
|
+
total_subs = 0
|
|
145
|
+
total_ins = 0
|
|
146
|
+
total_dels = 0
|
|
147
|
+
examples = []
|
|
148
|
+
|
|
149
|
+
for ref, hyp in zip(references, hypotheses):
|
|
150
|
+
result = compute_wer(ref, hyp, normalize=normalize, language=language)
|
|
151
|
+
total_errors += result["errors"]
|
|
152
|
+
total_ref_words += result["ref_words"]
|
|
153
|
+
total_subs += result["substitutions"]
|
|
154
|
+
total_ins += result["insertions"]
|
|
155
|
+
total_dels += result["deletions"]
|
|
156
|
+
examples.append(result)
|
|
157
|
+
|
|
158
|
+
wer_score = total_errors / max(total_ref_words, 1)
|
|
159
|
+
|
|
160
|
+
return {
|
|
161
|
+
"wer": round(wer_score, 4),
|
|
162
|
+
"substitutions": total_subs,
|
|
163
|
+
"insertions": total_ins,
|
|
164
|
+
"deletions": total_dels,
|
|
165
|
+
"errors": total_errors,
|
|
166
|
+
"ref_words": total_ref_words,
|
|
167
|
+
"n_examples": len(references),
|
|
168
|
+
"examples": examples,
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
# ---------------------------------------------------------------------------
|
|
173
|
+
# Scorer integration for openrunner.evaluate()
|
|
174
|
+
# ---------------------------------------------------------------------------
|
|
175
|
+
|
|
176
|
+
from openrunner.evaluation import Scorer
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
class WERScorer(Scorer):
|
|
180
|
+
"""Word Error Rate scorer for evaluation framework.
|
|
181
|
+
|
|
182
|
+
Uses num2words2 to normalize numbers before comparison.
|
|
183
|
+
|
|
184
|
+
Args:
|
|
185
|
+
normalize: Expand numbers to words (default True)
|
|
186
|
+
language: Language for number expansion (default "en")
|
|
187
|
+
ref_key: Key in the example dict for ground truth (default "expected")
|
|
188
|
+
hyp_key: Key in the output for hypothesis (default: uses output directly)
|
|
189
|
+
|
|
190
|
+
Example:
|
|
191
|
+
results = openrunner.evaluate(
|
|
192
|
+
model_fn=my_asr_model,
|
|
193
|
+
dataset=[{"input": audio, "expected": "fifty dollars"}],
|
|
194
|
+
scorers=[WERScorer(language="en")],
|
|
195
|
+
)
|
|
196
|
+
"""
|
|
197
|
+
|
|
198
|
+
def __init__(
|
|
199
|
+
self,
|
|
200
|
+
normalize: bool = True,
|
|
201
|
+
language: str = "en",
|
|
202
|
+
):
|
|
203
|
+
self.normalize = normalize
|
|
204
|
+
self.language = language
|
|
205
|
+
|
|
206
|
+
def score(self, output: Any, expected: Any, **kwargs) -> dict:
|
|
207
|
+
ref = str(expected) if expected else ""
|
|
208
|
+
hyp = str(output) if output else ""
|
|
209
|
+
result = compute_wer(ref, hyp, normalize=self.normalize, language=self.language)
|
|
210
|
+
return {
|
|
211
|
+
"wer": result["wer"],
|
|
212
|
+
"substitutions": result["substitutions"],
|
|
213
|
+
"insertions": result["insertions"],
|
|
214
|
+
"deletions": result["deletions"],
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
def summarize(self, scores: list[dict]) -> dict:
|
|
218
|
+
"""Aggregate WER across all examples (corpus-level)."""
|
|
219
|
+
total_errors = sum(s["substitutions"] + s["insertions"] + s["deletions"] for s in scores)
|
|
220
|
+
# Approximate ref_words from individual WERs
|
|
221
|
+
total_ref = sum(
|
|
222
|
+
round((s["substitutions"] + s["insertions"] + s["deletions"]) / max(s["wer"], 1e-9))
|
|
223
|
+
if s["wer"] > 0 else 10
|
|
224
|
+
for s in scores
|
|
225
|
+
)
|
|
226
|
+
return {
|
|
227
|
+
"wer": round(total_errors / max(total_ref, 1), 4),
|
|
228
|
+
"mean_wer": round(sum(s["wer"] for s in scores) / max(len(scores), 1), 4),
|
|
229
|
+
"total_substitutions": sum(s["substitutions"] for s in scores),
|
|
230
|
+
"total_insertions": sum(s["insertions"] for s in scores),
|
|
231
|
+
"total_deletions": sum(s["deletions"] for s in scores),
|
|
232
|
+
}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "openrunner-sdk"
|
|
3
|
-
version = "2.4.
|
|
3
|
+
version = "2.4.1"
|
|
4
4
|
description = "OpenRunner SDK - W&B-compatible ML experiment tracking client"
|
|
5
5
|
readme = "README.md"
|
|
6
6
|
license = {text = "MIT"}
|
|
@@ -30,6 +30,7 @@ Issues = "https://github.com/jqueguiner/openrunner/issues"
|
|
|
30
30
|
openrunner = "openrunner.cli:main"
|
|
31
31
|
|
|
32
32
|
[project.optional-dependencies]
|
|
33
|
+
wer = ["num2words2>=0.1"]
|
|
33
34
|
gpu = ["nvidia-ml-py>=12.0"]
|
|
34
35
|
pytorch = ["torch>=2.0"]
|
|
35
36
|
huggingface = ["transformers>=4.30"]
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|