dreadnode 1.11.0__tar.gz → 1.12.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. {dreadnode-1.11.0 → dreadnode-1.12.0}/PKG-INFO +2 -1
  2. {dreadnode-1.11.0 → dreadnode-1.12.0}/dreadnode/__init__.py +8 -3
  3. {dreadnode-1.11.0 → dreadnode-1.12.0}/dreadnode/data_types/image.py +2 -2
  4. {dreadnode-1.11.0 → dreadnode-1.12.0}/dreadnode/integrations/transformers.py +9 -3
  5. {dreadnode-1.11.0 → dreadnode-1.12.0}/dreadnode/main.py +53 -20
  6. {dreadnode-1.11.0 → dreadnode-1.12.0}/dreadnode/metric.py +21 -16
  7. dreadnode-1.12.0/dreadnode/object.py +48 -0
  8. dreadnode-1.12.0/dreadnode/scorers/__init__.py +35 -0
  9. dreadnode-1.12.0/dreadnode/scorers/consistency.py +66 -0
  10. dreadnode-1.12.0/dreadnode/scorers/contains.py +185 -0
  11. dreadnode-1.12.0/dreadnode/scorers/length.py +140 -0
  12. dreadnode-1.12.0/dreadnode/scorers/pii.py +158 -0
  13. dreadnode-1.12.0/dreadnode/scorers/readability.py +60 -0
  14. dreadnode-1.12.0/dreadnode/scorers/rigging.py +76 -0
  15. dreadnode-1.12.0/dreadnode/scorers/sentiment.py +117 -0
  16. dreadnode-1.12.0/dreadnode/scorers/similarity.py +180 -0
  17. {dreadnode-1.11.0 → dreadnode-1.12.0}/dreadnode/task.py +93 -7
  18. {dreadnode-1.11.0 → dreadnode-1.12.0}/dreadnode/tracing/span.py +4 -5
  19. {dreadnode-1.11.0 → dreadnode-1.12.0}/dreadnode/util.py +8 -2
  20. {dreadnode-1.11.0 → dreadnode-1.12.0}/pyproject.toml +3 -3
  21. dreadnode-1.11.0/dreadnode/object.py +0 -32
  22. {dreadnode-1.11.0 → dreadnode-1.12.0}/README.md +0 -0
  23. {dreadnode-1.11.0 → dreadnode-1.12.0}/dreadnode/api/__init__.py +0 -0
  24. {dreadnode-1.11.0 → dreadnode-1.12.0}/dreadnode/api/client.py +0 -0
  25. {dreadnode-1.11.0 → dreadnode-1.12.0}/dreadnode/api/models.py +0 -0
  26. {dreadnode-1.11.0 → dreadnode-1.12.0}/dreadnode/api/util.py +0 -0
  27. {dreadnode-1.11.0 → dreadnode-1.12.0}/dreadnode/artifact/__init__.py +0 -0
  28. {dreadnode-1.11.0 → dreadnode-1.12.0}/dreadnode/artifact/merger.py +0 -0
  29. {dreadnode-1.11.0 → dreadnode-1.12.0}/dreadnode/artifact/storage.py +0 -0
  30. {dreadnode-1.11.0 → dreadnode-1.12.0}/dreadnode/artifact/tree_builder.py +0 -0
  31. {dreadnode-1.11.0 → dreadnode-1.12.0}/dreadnode/constants.py +0 -0
  32. {dreadnode-1.11.0 → dreadnode-1.12.0}/dreadnode/convert.py +0 -0
  33. {dreadnode-1.11.0 → dreadnode-1.12.0}/dreadnode/data_types/__init__.py +0 -0
  34. {dreadnode-1.11.0 → dreadnode-1.12.0}/dreadnode/data_types/audio.py +0 -0
  35. {dreadnode-1.11.0 → dreadnode-1.12.0}/dreadnode/data_types/base.py +0 -0
  36. {dreadnode-1.11.0 → dreadnode-1.12.0}/dreadnode/data_types/object_3d.py +0 -0
  37. {dreadnode-1.11.0 → dreadnode-1.12.0}/dreadnode/data_types/table.py +0 -0
  38. {dreadnode-1.11.0 → dreadnode-1.12.0}/dreadnode/data_types/text.py +0 -0
  39. {dreadnode-1.11.0 → dreadnode-1.12.0}/dreadnode/data_types/video.py +0 -0
  40. {dreadnode-1.11.0 → dreadnode-1.12.0}/dreadnode/integrations/__init__.py +0 -0
  41. {dreadnode-1.11.0 → dreadnode-1.12.0}/dreadnode/py.typed +0 -0
  42. {dreadnode-1.11.0 → dreadnode-1.12.0}/dreadnode/serialization.py +0 -0
  43. {dreadnode-1.11.0 → dreadnode-1.12.0}/dreadnode/tracing/__init__.py +0 -0
  44. {dreadnode-1.11.0 → dreadnode-1.12.0}/dreadnode/tracing/constants.py +0 -0
  45. {dreadnode-1.11.0 → dreadnode-1.12.0}/dreadnode/tracing/exporters.py +0 -0
  46. {dreadnode-1.11.0 → dreadnode-1.12.0}/dreadnode/types.py +0 -0
  47. {dreadnode-1.11.0 → dreadnode-1.12.0}/dreadnode/version.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: dreadnode
3
- Version: 1.11.0
3
+ Version: 1.12.0
4
4
  Summary: Dreadnode SDK
5
5
  Author: Nick Landers
6
6
  Author-email: monoxgas@gmail.com
@@ -22,6 +22,7 @@ Requires-Dist: pandas (>=2.2.3,<3.0.0)
22
22
  Requires-Dist: pillow (>=11.2.1,<12.0.0) ; extra == "multimodal"
23
23
  Requires-Dist: pydantic (>=2.9.2,<3.0.0)
24
24
  Requires-Dist: python-ulid (>=3.0.0,<4.0.0)
25
+ Requires-Dist: rigging (>=3.1.1,<4.0.0)
25
26
  Requires-Dist: soundfile (>=0.13.1,<0.14.0) ; extra == "multimodal"
26
27
  Requires-Dist: transformers (>=4.41.0,<5.0.0) ; extra == "training"
27
28
  Project-URL: Repository, https://github.com/dreadnode/sdk
@@ -1,9 +1,9 @@
1
- from dreadnode import convert, data_types
2
- from dreadnode.data_types import Audio, Image, Object3D, Table, Video
1
+ from dreadnode import convert, data_types, scorers
2
+ from dreadnode.data_types import Audio, Code, Image, Markdown, Object3D, Table, Text, Video
3
3
  from dreadnode.main import DEFAULT_INSTANCE, Dreadnode
4
4
  from dreadnode.metric import Metric, MetricDict, Scorer
5
5
  from dreadnode.object import Object
6
- from dreadnode.task import Task
6
+ from dreadnode.task import Task, TaskInput
7
7
  from dreadnode.tracing.span import RunSpan, Span, TaskSpan
8
8
  from dreadnode.version import VERSION
9
9
 
@@ -36,8 +36,10 @@ __version__ = VERSION
36
36
  __all__ = [
37
37
  "DEFAULT_INSTANCE",
38
38
  "Audio",
39
+ "Code",
39
40
  "Dreadnode",
40
41
  "Image",
42
+ "Markdown",
41
43
  "Metric",
42
44
  "MetricDict",
43
45
  "Object",
@@ -48,7 +50,9 @@ __all__ = [
48
50
  "Span",
49
51
  "Table",
50
52
  "Task",
53
+ "TaskInput",
51
54
  "TaskSpan",
55
+ "Text",
52
56
  "Video",
53
57
  "__version__",
54
58
  "api",
@@ -68,6 +72,7 @@ __all__ = [
68
72
  "push_update",
69
73
  "run",
70
74
  "scorer",
75
+ "scorers",
71
76
  "shutdown",
72
77
  "span",
73
78
  "tag",
@@ -8,9 +8,9 @@ import numpy as np
8
8
  from dreadnode.data_types.base import DataType
9
9
 
10
10
  try:
11
- from PIL import Image as PILImage
11
+ from PIL import Image as PILImage # type: ignore[import-not-found,unused-ignore]
12
12
  except ImportError:
13
- PILImage = None # type: ignore[assignment]
13
+ PILImage = None # type: ignore[assignment,unused-ignore]
14
14
 
15
15
  ImageDataType = t.Any | np.ndarray[t.Any, t.Any]
16
16
  ImageDataOrPathType = str | Path | bytes | ImageDataType
@@ -12,8 +12,14 @@ if importlib.util.find_spec("transformers") is None:
12
12
 
13
13
  import typing as t
14
14
 
15
- from transformers.trainer_callback import TrainerCallback, TrainerControl, TrainerState
16
- from transformers.training_args import TrainingArguments
15
+ from transformers.trainer_callback import ( # type: ignore[import-not-found,unused-ignore]
16
+ TrainerCallback,
17
+ TrainerControl,
18
+ TrainerState,
19
+ )
20
+ from transformers.training_args import ( # type: ignore[import-not-found,unused-ignore]
21
+ TrainingArguments,
22
+ )
17
23
 
18
24
  import dreadnode as dn
19
25
 
@@ -40,7 +46,7 @@ def _clean_keys(data: dict[str, t.Any]) -> dict[str, t.Any]:
40
46
  return cleaned
41
47
 
42
48
 
43
- class DreadnodeCallback(TrainerCallback):
49
+ class DreadnodeCallback(TrainerCallback): # type: ignore[misc,unused-ignore]
44
50
  """
45
51
  An implementation of the `TrainerCallback` interface for Dreadnode.
46
52
 
@@ -634,9 +634,7 @@ class Dreadnode:
634
634
  attributes=_attributes,
635
635
  func=t.cast("t.Callable[P, R]", func),
636
636
  scorers=[
637
- scorer
638
- if isinstance(scorer, Scorer)
639
- else Scorer.from_callable(self._get_tracer(), scorer)
637
+ scorer if isinstance(scorer, Scorer) else Scorer.from_callable(scorer)
640
638
  for scorer in scorers or []
641
639
  ],
642
640
  tags=list(tags or []),
@@ -726,7 +724,6 @@ class Dreadnode:
726
724
 
727
725
  def make_scorer(func: ScorerCallable[T]) -> Scorer[T]:
728
726
  return Scorer.from_callable(
729
- self._get_tracer(),
730
727
  func,
731
728
  name=name,
732
729
  tags=tags,
@@ -860,7 +857,11 @@ class Dreadnode:
860
857
 
861
858
  target = (task or run) if to == "task-or-run" else run
862
859
  if target is None:
863
- raise RuntimeError("Tagging must be done within a run")
860
+ warn_at_user_stacklevel(
861
+ "tag() was called outside of a task or run.",
862
+ category=DreadnodeUsageWarning,
863
+ )
864
+ return
864
865
 
865
866
  target.add_tags(tag)
866
867
 
@@ -883,7 +884,11 @@ class Dreadnode:
883
884
  # do more work
884
885
  """
885
886
  if (run := current_run_span.get()) is None:
886
- raise RuntimeError("Run updates must be pushed within a run")
887
+ warn_at_user_stacklevel(
888
+ "push_update() was called outside of a run.",
889
+ category=DreadnodeUsageWarning,
890
+ )
891
+ return
887
892
 
888
893
  run.push_update(force=True)
889
894
 
@@ -934,7 +939,12 @@ class Dreadnode:
934
939
  **params: The parameters to log. Each parameter is a key-value pair.
935
940
  """
936
941
  if (run := current_run_span.get()) is None:
937
- raise RuntimeError("Parameters must be logged within a run")
942
+ warn_at_user_stacklevel(
943
+ "log_params() was called outside of a run.",
944
+ category=DreadnodeUsageWarning,
945
+ )
946
+ return
947
+
938
948
  run.log_params(**params)
939
949
 
940
950
  @t.overload
@@ -1085,13 +1095,6 @@ class Dreadnode:
1085
1095
  Returns:
1086
1096
  The logged metric object.
1087
1097
  """
1088
- task = current_task_span.get()
1089
- run = current_run_span.get()
1090
-
1091
- target = (task or run) if to == "task-or-run" else run
1092
- if target is None:
1093
- raise RuntimeError("log_metric() must be called within a run")
1094
-
1095
1098
  metric = (
1096
1099
  value
1097
1100
  if isinstance(value, Metric)
@@ -1102,6 +1105,18 @@ class Dreadnode:
1102
1105
  attributes or {},
1103
1106
  )
1104
1107
  )
1108
+
1109
+ task = current_task_span.get()
1110
+ run = current_run_span.get()
1111
+
1112
+ target = (task or run) if to == "task-or-run" else run
1113
+ if target is None:
1114
+ warn_at_user_stacklevel(
1115
+ "log_metric() was called outside of a task or run.",
1116
+ category=DreadnodeUsageWarning,
1117
+ )
1118
+ return metric
1119
+
1105
1120
  return target.log_metric(name, metric, origin=origin, mode=mode)
1106
1121
 
1107
1122
  @t.overload
@@ -1240,7 +1255,11 @@ class Dreadnode:
1240
1255
 
1241
1256
  target = (task or run) if to == "task-or-run" else run
1242
1257
  if target is None:
1243
- raise RuntimeError("log_metrics() must be called within a run")
1258
+ warn_at_user_stacklevel(
1259
+ "log_metrics() was called outside of a task or run.",
1260
+ category=DreadnodeUsageWarning,
1261
+ )
1262
+ return []
1244
1263
 
1245
1264
  logged_metrics: list[Metric] = []
1246
1265
 
@@ -1312,7 +1331,11 @@ class Dreadnode:
1312
1331
  local_uri: The local path to the file to upload.
1313
1332
  """
1314
1333
  if (run := current_run_span.get()) is None:
1315
- raise RuntimeError("log_artifact() must be called within a run")
1334
+ warn_at_user_stacklevel(
1335
+ "log_artifact() was called outside of a run.",
1336
+ category=DreadnodeUsageWarning,
1337
+ )
1338
+ return
1316
1339
 
1317
1340
  run.log_artifact(local_uri=local_uri)
1318
1341
 
@@ -1350,7 +1373,11 @@ class Dreadnode:
1350
1373
 
1351
1374
  target = (task or run) if to == "task-or-run" else run
1352
1375
  if target is None:
1353
- raise RuntimeError("log_inputs() must be called within a run")
1376
+ warn_at_user_stacklevel(
1377
+ "log_input() was called outside of a task or run.",
1378
+ category=DreadnodeUsageWarning,
1379
+ )
1380
+ return
1354
1381
 
1355
1382
  target.log_input(name, value, label=label, attributes=attributes)
1356
1383
 
@@ -1412,9 +1439,11 @@ class Dreadnode:
1412
1439
 
1413
1440
  target = (task or run) if to == "task-or-run" else run
1414
1441
  if target is None:
1415
- raise RuntimeError(
1416
- "log_output() must be called within a run or a task",
1442
+ warn_at_user_stacklevel(
1443
+ "log_output() was called outside of a task or run.",
1444
+ category=DreadnodeUsageWarning,
1417
1445
  )
1446
+ return
1418
1447
 
1419
1448
  target.log_output(name, value, label=label, attributes=attributes)
1420
1449
 
@@ -1461,7 +1490,11 @@ class Dreadnode:
1461
1490
  attributes: Additional attributes to attach to the link.
1462
1491
  """
1463
1492
  if (run := current_run_span.get()) is None:
1464
- raise RuntimeError("link() must be called within a run")
1493
+ warn_at_user_stacklevel(
1494
+ "link_objects() was called outside of a run.",
1495
+ category=DreadnodeUsageWarning,
1496
+ )
1497
+ return
1465
1498
 
1466
1499
  origin_hash = run.log_object(origin)
1467
1500
  link_hash = run.log_object(link)
@@ -6,7 +6,6 @@ from datetime import datetime, timezone
6
6
  import typing_extensions as te
7
7
  from logfire._internal.stack_info import warn_at_user_stacklevel
8
8
  from logfire._internal.utils import safe_repr
9
- from opentelemetry.trace import Tracer
10
9
 
11
10
  from dreadnode.types import JsonDict, JsonValue
12
11
 
@@ -73,7 +72,11 @@ class Metric:
73
72
  total = sum(value * weight for _, value, weight in values)
74
73
  weight = sum(weight for _, _, weight in values)
75
74
  score_attributes = {name: value for name, value, _ in values}
76
- return cls(value=total / weight, step=step, attributes={**attributes, **score_attributes})
75
+ return cls(
76
+ value=total / weight,
77
+ step=step,
78
+ attributes={**attributes, **score_attributes},
79
+ )
77
80
 
78
81
  def apply_mode(self, mode: MetricAggMode, others: "list[Metric]") -> "Metric":
79
82
  """
@@ -124,8 +127,6 @@ ScorerCallable = t.Callable[[T], t.Awaitable[ScorerResult]] | t.Callable[[T], Sc
124
127
 
125
128
  @dataclass
126
129
  class Scorer(t.Generic[T]):
127
- tracer: Tracer
128
-
129
130
  name: str
130
131
  "The name of the scorer, used for reporting metrics."
131
132
  tags: t.Sequence[str]
@@ -138,25 +139,27 @@ class Scorer(t.Generic[T]):
138
139
  "The step value to attach to metrics produced by this Scorer."
139
140
  auto_increment_step: bool = False
140
141
  "Whether to automatically increment the step for each time this scorer is called."
142
+ catch: bool = False
143
+ "Whether to catch exceptions in the scorer function and return a 0 Metric with error information."
141
144
 
142
145
  @classmethod
143
146
  def from_callable(
144
147
  cls,
145
- tracer: Tracer,
146
148
  func: "ScorerCallable[T] | Scorer[T]",
147
149
  *,
148
150
  name: str | None = None,
149
151
  tags: t.Sequence[str] | None = None,
152
+ catch: bool = False,
150
153
  **attributes: t.Any,
151
154
  ) -> "Scorer[T]":
152
155
  """
153
156
  Create a scorer from a callable function.
154
157
 
155
158
  Args:
156
- tracer: The tracer to use for reporting metrics.
157
159
  func: The function to call to get the metric.
158
160
  name: The name of the scorer, used for reporting metrics.
159
161
  tags: A list of tags to attach to the metric.
162
+ catch: Whether to catch exceptions in the scorer function and return a 0 Metric with error information.
160
163
  **attributes: A dictionary of attributes to attach to the metric.
161
164
 
162
165
  Returns:
@@ -177,11 +180,11 @@ class Scorer(t.Generic[T]):
177
180
  )
178
181
  name = name or func_name
179
182
  return cls(
180
- tracer=tracer,
181
183
  name=name,
182
184
  tags=tags or [],
183
185
  attributes=attributes or {},
184
186
  func=func,
187
+ catch=catch,
185
188
  )
186
189
 
187
190
  def __post_init__(self) -> None:
@@ -196,13 +199,13 @@ class Scorer(t.Generic[T]):
196
199
  A new Scorer.
197
200
  """
198
201
  return Scorer(
199
- tracer=self.tracer,
200
202
  name=self.name,
201
203
  tags=self.tags,
202
204
  attributes=self.attributes,
203
205
  func=self.func,
204
206
  step=self.step,
205
207
  auto_increment_step=self.auto_increment_step,
208
+ catch=self.catch,
206
209
  )
207
210
 
208
211
  async def __call__(self, object: T) -> Metric:
@@ -217,17 +220,19 @@ class Scorer(t.Generic[T]):
217
220
  Returns:
218
221
  A Metric object.
219
222
  """
220
- from dreadnode.tracing.span import Span
221
-
222
- with Span(
223
- name=self.name,
224
- tags=self.tags,
225
- attributes=self.attributes,
226
- tracer=self.tracer,
227
- ):
223
+ try:
228
224
  metric = self.func(object)
229
225
  if inspect.isawaitable(metric):
230
226
  metric = await metric
227
+ except Exception as exc:
228
+ if not self.catch:
229
+ raise
230
+
231
+ warn_at_user_stacklevel(
232
+ f"Error executing scorer {self.name!r} for object {object!r}: {exc}",
233
+ MetricWarning,
234
+ )
235
+ metric = Metric(value=0.0, step=self.step, attributes={"error": str(exc)})
231
236
 
232
237
  if not isinstance(metric, Metric):
233
238
  metric = Metric(
@@ -0,0 +1,48 @@
1
+ import typing as t
2
+ from dataclasses import dataclass
3
+
4
+ from pydantic import BaseModel, Field
5
+
6
+ from dreadnode.types import AnyDict
7
+
8
+
9
+ @dataclass
10
+ class ObjectRef:
11
+ name: str
12
+ label: str
13
+ hash: str
14
+ attributes: AnyDict | None
15
+
16
+
17
+ class ObjectUri(BaseModel):
18
+ hash: str
19
+ schema_hash: str
20
+ uri: str
21
+ size: int
22
+ type: t.Literal["uri"] = "uri"
23
+
24
+ # During execution, we might want to dynamically pull a value
25
+ # in it's unserialized form, so we store it here.
26
+ runtime_value: t.Any | None = Field(None, init=False, repr=False, exclude=True)
27
+
28
+ @property
29
+ def value(self) -> t.Any:
30
+ return self.runtime_value or self.uri
31
+
32
+
33
+ class ObjectVal(BaseModel):
34
+ hash: str
35
+ schema_hash: str
36
+ value_: t.Any = Field(alias="value")
37
+ type: t.Literal["val"] = "val"
38
+
39
+ # During execution, we might want to dynamically pull a value
40
+ # in it's unserialized form, so we store it here.
41
+ runtime_value: t.Any | None = Field(None, init=False, repr=False, exclude=True)
42
+
43
+ @property
44
+ def value(self) -> t.Any:
45
+ return self.runtime_value or self.value_
46
+
47
+
48
+ Object = ObjectUri | ObjectVal
@@ -0,0 +1,35 @@
1
+ from dreadnode.scorers.consistency import character_consistency
2
+ from dreadnode.scorers.contains import (
3
+ contains,
4
+ detect_ansi_escapes,
5
+ detect_refusal,
6
+ detect_sensitive_keywords,
7
+ detect_unsafe_shell_content,
8
+ )
9
+ from dreadnode.scorers.length import length_in_range, length_ratio, length_target
10
+ from dreadnode.scorers.pii import detect_pii, detect_pii_with_presidio
11
+ from dreadnode.scorers.readability import readability
12
+ from dreadnode.scorers.rigging import wrap_chat
13
+ from dreadnode.scorers.sentiment import sentiment, sentiment_with_perspective
14
+ from dreadnode.scorers.similarity import bleu, semantic_similarity, similarity
15
+
16
+ __all__ = [
17
+ "bleu",
18
+ "character_consistency",
19
+ "contains",
20
+ "detect_ansi_escapes",
21
+ "detect_pii",
22
+ "detect_pii_with_presidio",
23
+ "detect_refusal",
24
+ "detect_sensitive_keywords",
25
+ "detect_unsafe_shell_content",
26
+ "length_in_range",
27
+ "length_ratio",
28
+ "length_target",
29
+ "readability",
30
+ "semantic_similarity",
31
+ "sentiment",
32
+ "sentiment_with_perspective",
33
+ "similarity",
34
+ "wrap_chat",
35
+ ]
@@ -0,0 +1,66 @@
1
+ import re
2
+ import typing as t
3
+
4
+ from dreadnode.metric import Metric, Scorer
5
+ from dreadnode.task import TaskInput
6
+ from dreadnode.util import clean_str
7
+
8
+ if t.TYPE_CHECKING:
9
+ from dreadnode.types import JsonDict
10
+
11
+
12
+ def character_consistency(
13
+ reference: str | TaskInput,
14
+ *,
15
+ max_ratio_diff: float = 2.0,
16
+ name: str | None = None,
17
+ ) -> "Scorer[t.Any]":
18
+ """
19
+ Scores character type consistency between the data and a reference text.
20
+
21
+ It compares the ratio of letters, numbers, and symbols in both texts.
22
+ A score of 1.0 indicates identical distributions.
23
+
24
+ Args:
25
+ reference: The reference text (e.g., the prompt) or a TaskInput.
26
+ max_ratio_diff: The denominator for normalizing ratio differences.
27
+ name: Name of the scorer.
28
+ """
29
+
30
+ def _analyze_text(text: str) -> dict[str, int]:
31
+ return {
32
+ "letters": len(re.findall(r"[a-zA-Z]", text)),
33
+ "numbers": len(re.findall(r"\d", text)),
34
+ "symbols": len(re.findall(r"[^\w\s]", text)),
35
+ }
36
+
37
+ def evaluate(data: t.Any) -> Metric:
38
+ candidate_text = str(data)
39
+ reference_text = str(reference.resolve()) if isinstance(reference, TaskInput) else reference
40
+
41
+ candidate_chars = _analyze_text(candidate_text)
42
+ reference_chars = _analyze_text(reference_text)
43
+
44
+ candidate_total = sum(candidate_chars.values())
45
+ reference_total = sum(reference_chars.values())
46
+
47
+ if reference_total == 0 or candidate_total == 0:
48
+ return Metric(value=0.0, attributes={"error": "Reference or candidate text is empty."})
49
+
50
+ scores: dict[str, float] = {}
51
+ metadata: JsonDict = {}
52
+ for char_type in ["letters", "numbers", "symbols"]:
53
+ ref_ratio = reference_chars[char_type] / reference_total
54
+ cand_ratio = candidate_chars[char_type] / candidate_total
55
+ diff = abs(ref_ratio - cand_ratio)
56
+ score = max(0.0, 1.0 - (diff / max_ratio_diff))
57
+ scores[char_type] = score
58
+ metadata[f"{char_type}_ratio_diff"] = round(diff, 4)
59
+
60
+ return Metric.from_many([(name, score, 1.0) for name, score in scores.items()])
61
+
62
+ if name is None:
63
+ ref_name = reference.name if isinstance(reference, TaskInput) else "static_text"
64
+ name = f"char_consistency_{clean_str(ref_name)}"
65
+
66
+ return Scorer.from_callable(evaluate, name=name)