dreadnode 1.11.0__tar.gz → 1.12.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dreadnode-1.11.0 → dreadnode-1.12.0}/PKG-INFO +2 -1
- {dreadnode-1.11.0 → dreadnode-1.12.0}/dreadnode/__init__.py +8 -3
- {dreadnode-1.11.0 → dreadnode-1.12.0}/dreadnode/data_types/image.py +2 -2
- {dreadnode-1.11.0 → dreadnode-1.12.0}/dreadnode/integrations/transformers.py +9 -3
- {dreadnode-1.11.0 → dreadnode-1.12.0}/dreadnode/main.py +53 -20
- {dreadnode-1.11.0 → dreadnode-1.12.0}/dreadnode/metric.py +21 -16
- dreadnode-1.12.0/dreadnode/object.py +48 -0
- dreadnode-1.12.0/dreadnode/scorers/__init__.py +35 -0
- dreadnode-1.12.0/dreadnode/scorers/consistency.py +66 -0
- dreadnode-1.12.0/dreadnode/scorers/contains.py +185 -0
- dreadnode-1.12.0/dreadnode/scorers/length.py +140 -0
- dreadnode-1.12.0/dreadnode/scorers/pii.py +158 -0
- dreadnode-1.12.0/dreadnode/scorers/readability.py +60 -0
- dreadnode-1.12.0/dreadnode/scorers/rigging.py +76 -0
- dreadnode-1.12.0/dreadnode/scorers/sentiment.py +117 -0
- dreadnode-1.12.0/dreadnode/scorers/similarity.py +180 -0
- {dreadnode-1.11.0 → dreadnode-1.12.0}/dreadnode/task.py +93 -7
- {dreadnode-1.11.0 → dreadnode-1.12.0}/dreadnode/tracing/span.py +4 -5
- {dreadnode-1.11.0 → dreadnode-1.12.0}/dreadnode/util.py +8 -2
- {dreadnode-1.11.0 → dreadnode-1.12.0}/pyproject.toml +3 -3
- dreadnode-1.11.0/dreadnode/object.py +0 -32
- {dreadnode-1.11.0 → dreadnode-1.12.0}/README.md +0 -0
- {dreadnode-1.11.0 → dreadnode-1.12.0}/dreadnode/api/__init__.py +0 -0
- {dreadnode-1.11.0 → dreadnode-1.12.0}/dreadnode/api/client.py +0 -0
- {dreadnode-1.11.0 → dreadnode-1.12.0}/dreadnode/api/models.py +0 -0
- {dreadnode-1.11.0 → dreadnode-1.12.0}/dreadnode/api/util.py +0 -0
- {dreadnode-1.11.0 → dreadnode-1.12.0}/dreadnode/artifact/__init__.py +0 -0
- {dreadnode-1.11.0 → dreadnode-1.12.0}/dreadnode/artifact/merger.py +0 -0
- {dreadnode-1.11.0 → dreadnode-1.12.0}/dreadnode/artifact/storage.py +0 -0
- {dreadnode-1.11.0 → dreadnode-1.12.0}/dreadnode/artifact/tree_builder.py +0 -0
- {dreadnode-1.11.0 → dreadnode-1.12.0}/dreadnode/constants.py +0 -0
- {dreadnode-1.11.0 → dreadnode-1.12.0}/dreadnode/convert.py +0 -0
- {dreadnode-1.11.0 → dreadnode-1.12.0}/dreadnode/data_types/__init__.py +0 -0
- {dreadnode-1.11.0 → dreadnode-1.12.0}/dreadnode/data_types/audio.py +0 -0
- {dreadnode-1.11.0 → dreadnode-1.12.0}/dreadnode/data_types/base.py +0 -0
- {dreadnode-1.11.0 → dreadnode-1.12.0}/dreadnode/data_types/object_3d.py +0 -0
- {dreadnode-1.11.0 → dreadnode-1.12.0}/dreadnode/data_types/table.py +0 -0
- {dreadnode-1.11.0 → dreadnode-1.12.0}/dreadnode/data_types/text.py +0 -0
- {dreadnode-1.11.0 → dreadnode-1.12.0}/dreadnode/data_types/video.py +0 -0
- {dreadnode-1.11.0 → dreadnode-1.12.0}/dreadnode/integrations/__init__.py +0 -0
- {dreadnode-1.11.0 → dreadnode-1.12.0}/dreadnode/py.typed +0 -0
- {dreadnode-1.11.0 → dreadnode-1.12.0}/dreadnode/serialization.py +0 -0
- {dreadnode-1.11.0 → dreadnode-1.12.0}/dreadnode/tracing/__init__.py +0 -0
- {dreadnode-1.11.0 → dreadnode-1.12.0}/dreadnode/tracing/constants.py +0 -0
- {dreadnode-1.11.0 → dreadnode-1.12.0}/dreadnode/tracing/exporters.py +0 -0
- {dreadnode-1.11.0 → dreadnode-1.12.0}/dreadnode/types.py +0 -0
- {dreadnode-1.11.0 → dreadnode-1.12.0}/dreadnode/version.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: dreadnode
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.12.0
|
|
4
4
|
Summary: Dreadnode SDK
|
|
5
5
|
Author: Nick Landers
|
|
6
6
|
Author-email: monoxgas@gmail.com
|
|
@@ -22,6 +22,7 @@ Requires-Dist: pandas (>=2.2.3,<3.0.0)
|
|
|
22
22
|
Requires-Dist: pillow (>=11.2.1,<12.0.0) ; extra == "multimodal"
|
|
23
23
|
Requires-Dist: pydantic (>=2.9.2,<3.0.0)
|
|
24
24
|
Requires-Dist: python-ulid (>=3.0.0,<4.0.0)
|
|
25
|
+
Requires-Dist: rigging (>=3.1.1,<4.0.0)
|
|
25
26
|
Requires-Dist: soundfile (>=0.13.1,<0.14.0) ; extra == "multimodal"
|
|
26
27
|
Requires-Dist: transformers (>=4.41.0,<5.0.0) ; extra == "training"
|
|
27
28
|
Project-URL: Repository, https://github.com/dreadnode/sdk
|
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
from dreadnode import convert, data_types
|
|
2
|
-
from dreadnode.data_types import Audio, Image, Object3D, Table, Video
|
|
1
|
+
from dreadnode import convert, data_types, scorers
|
|
2
|
+
from dreadnode.data_types import Audio, Code, Image, Markdown, Object3D, Table, Text, Video
|
|
3
3
|
from dreadnode.main import DEFAULT_INSTANCE, Dreadnode
|
|
4
4
|
from dreadnode.metric import Metric, MetricDict, Scorer
|
|
5
5
|
from dreadnode.object import Object
|
|
6
|
-
from dreadnode.task import Task
|
|
6
|
+
from dreadnode.task import Task, TaskInput
|
|
7
7
|
from dreadnode.tracing.span import RunSpan, Span, TaskSpan
|
|
8
8
|
from dreadnode.version import VERSION
|
|
9
9
|
|
|
@@ -36,8 +36,10 @@ __version__ = VERSION
|
|
|
36
36
|
__all__ = [
|
|
37
37
|
"DEFAULT_INSTANCE",
|
|
38
38
|
"Audio",
|
|
39
|
+
"Code",
|
|
39
40
|
"Dreadnode",
|
|
40
41
|
"Image",
|
|
42
|
+
"Markdown",
|
|
41
43
|
"Metric",
|
|
42
44
|
"MetricDict",
|
|
43
45
|
"Object",
|
|
@@ -48,7 +50,9 @@ __all__ = [
|
|
|
48
50
|
"Span",
|
|
49
51
|
"Table",
|
|
50
52
|
"Task",
|
|
53
|
+
"TaskInput",
|
|
51
54
|
"TaskSpan",
|
|
55
|
+
"Text",
|
|
52
56
|
"Video",
|
|
53
57
|
"__version__",
|
|
54
58
|
"api",
|
|
@@ -68,6 +72,7 @@ __all__ = [
|
|
|
68
72
|
"push_update",
|
|
69
73
|
"run",
|
|
70
74
|
"scorer",
|
|
75
|
+
"scorers",
|
|
71
76
|
"shutdown",
|
|
72
77
|
"span",
|
|
73
78
|
"tag",
|
|
@@ -8,9 +8,9 @@ import numpy as np
|
|
|
8
8
|
from dreadnode.data_types.base import DataType
|
|
9
9
|
|
|
10
10
|
try:
|
|
11
|
-
from PIL import Image as PILImage
|
|
11
|
+
from PIL import Image as PILImage # type: ignore[import-not-found,unused-ignore]
|
|
12
12
|
except ImportError:
|
|
13
|
-
PILImage = None # type: ignore[assignment]
|
|
13
|
+
PILImage = None # type: ignore[assignment,unused-ignore]
|
|
14
14
|
|
|
15
15
|
ImageDataType = t.Any | np.ndarray[t.Any, t.Any]
|
|
16
16
|
ImageDataOrPathType = str | Path | bytes | ImageDataType
|
|
@@ -12,8 +12,14 @@ if importlib.util.find_spec("transformers") is None:
|
|
|
12
12
|
|
|
13
13
|
import typing as t
|
|
14
14
|
|
|
15
|
-
from transformers.trainer_callback import
|
|
16
|
-
|
|
15
|
+
from transformers.trainer_callback import ( # type: ignore[import-not-found,unused-ignore]
|
|
16
|
+
TrainerCallback,
|
|
17
|
+
TrainerControl,
|
|
18
|
+
TrainerState,
|
|
19
|
+
)
|
|
20
|
+
from transformers.training_args import ( # type: ignore[import-not-found,unused-ignore]
|
|
21
|
+
TrainingArguments,
|
|
22
|
+
)
|
|
17
23
|
|
|
18
24
|
import dreadnode as dn
|
|
19
25
|
|
|
@@ -40,7 +46,7 @@ def _clean_keys(data: dict[str, t.Any]) -> dict[str, t.Any]:
|
|
|
40
46
|
return cleaned
|
|
41
47
|
|
|
42
48
|
|
|
43
|
-
class DreadnodeCallback(TrainerCallback):
|
|
49
|
+
class DreadnodeCallback(TrainerCallback): # type: ignore[misc,unused-ignore]
|
|
44
50
|
"""
|
|
45
51
|
An implementation of the `TrainerCallback` interface for Dreadnode.
|
|
46
52
|
|
|
@@ -634,9 +634,7 @@ class Dreadnode:
|
|
|
634
634
|
attributes=_attributes,
|
|
635
635
|
func=t.cast("t.Callable[P, R]", func),
|
|
636
636
|
scorers=[
|
|
637
|
-
scorer
|
|
638
|
-
if isinstance(scorer, Scorer)
|
|
639
|
-
else Scorer.from_callable(self._get_tracer(), scorer)
|
|
637
|
+
scorer if isinstance(scorer, Scorer) else Scorer.from_callable(scorer)
|
|
640
638
|
for scorer in scorers or []
|
|
641
639
|
],
|
|
642
640
|
tags=list(tags or []),
|
|
@@ -726,7 +724,6 @@ class Dreadnode:
|
|
|
726
724
|
|
|
727
725
|
def make_scorer(func: ScorerCallable[T]) -> Scorer[T]:
|
|
728
726
|
return Scorer.from_callable(
|
|
729
|
-
self._get_tracer(),
|
|
730
727
|
func,
|
|
731
728
|
name=name,
|
|
732
729
|
tags=tags,
|
|
@@ -860,7 +857,11 @@ class Dreadnode:
|
|
|
860
857
|
|
|
861
858
|
target = (task or run) if to == "task-or-run" else run
|
|
862
859
|
if target is None:
|
|
863
|
-
|
|
860
|
+
warn_at_user_stacklevel(
|
|
861
|
+
"tag() was called outside of a task or run.",
|
|
862
|
+
category=DreadnodeUsageWarning,
|
|
863
|
+
)
|
|
864
|
+
return
|
|
864
865
|
|
|
865
866
|
target.add_tags(tag)
|
|
866
867
|
|
|
@@ -883,7 +884,11 @@ class Dreadnode:
|
|
|
883
884
|
# do more work
|
|
884
885
|
"""
|
|
885
886
|
if (run := current_run_span.get()) is None:
|
|
886
|
-
|
|
887
|
+
warn_at_user_stacklevel(
|
|
888
|
+
"push_update() was called outside of a run.",
|
|
889
|
+
category=DreadnodeUsageWarning,
|
|
890
|
+
)
|
|
891
|
+
return
|
|
887
892
|
|
|
888
893
|
run.push_update(force=True)
|
|
889
894
|
|
|
@@ -934,7 +939,12 @@ class Dreadnode:
|
|
|
934
939
|
**params: The parameters to log. Each parameter is a key-value pair.
|
|
935
940
|
"""
|
|
936
941
|
if (run := current_run_span.get()) is None:
|
|
937
|
-
|
|
942
|
+
warn_at_user_stacklevel(
|
|
943
|
+
"log_params() was called outside of a run.",
|
|
944
|
+
category=DreadnodeUsageWarning,
|
|
945
|
+
)
|
|
946
|
+
return
|
|
947
|
+
|
|
938
948
|
run.log_params(**params)
|
|
939
949
|
|
|
940
950
|
@t.overload
|
|
@@ -1085,13 +1095,6 @@ class Dreadnode:
|
|
|
1085
1095
|
Returns:
|
|
1086
1096
|
The logged metric object.
|
|
1087
1097
|
"""
|
|
1088
|
-
task = current_task_span.get()
|
|
1089
|
-
run = current_run_span.get()
|
|
1090
|
-
|
|
1091
|
-
target = (task or run) if to == "task-or-run" else run
|
|
1092
|
-
if target is None:
|
|
1093
|
-
raise RuntimeError("log_metric() must be called within a run")
|
|
1094
|
-
|
|
1095
1098
|
metric = (
|
|
1096
1099
|
value
|
|
1097
1100
|
if isinstance(value, Metric)
|
|
@@ -1102,6 +1105,18 @@ class Dreadnode:
|
|
|
1102
1105
|
attributes or {},
|
|
1103
1106
|
)
|
|
1104
1107
|
)
|
|
1108
|
+
|
|
1109
|
+
task = current_task_span.get()
|
|
1110
|
+
run = current_run_span.get()
|
|
1111
|
+
|
|
1112
|
+
target = (task or run) if to == "task-or-run" else run
|
|
1113
|
+
if target is None:
|
|
1114
|
+
warn_at_user_stacklevel(
|
|
1115
|
+
"log_metric() was called outside of a task or run.",
|
|
1116
|
+
category=DreadnodeUsageWarning,
|
|
1117
|
+
)
|
|
1118
|
+
return metric
|
|
1119
|
+
|
|
1105
1120
|
return target.log_metric(name, metric, origin=origin, mode=mode)
|
|
1106
1121
|
|
|
1107
1122
|
@t.overload
|
|
@@ -1240,7 +1255,11 @@ class Dreadnode:
|
|
|
1240
1255
|
|
|
1241
1256
|
target = (task or run) if to == "task-or-run" else run
|
|
1242
1257
|
if target is None:
|
|
1243
|
-
|
|
1258
|
+
warn_at_user_stacklevel(
|
|
1259
|
+
"log_metrics() was called outside of a task or run.",
|
|
1260
|
+
category=DreadnodeUsageWarning,
|
|
1261
|
+
)
|
|
1262
|
+
return []
|
|
1244
1263
|
|
|
1245
1264
|
logged_metrics: list[Metric] = []
|
|
1246
1265
|
|
|
@@ -1312,7 +1331,11 @@ class Dreadnode:
|
|
|
1312
1331
|
local_uri: The local path to the file to upload.
|
|
1313
1332
|
"""
|
|
1314
1333
|
if (run := current_run_span.get()) is None:
|
|
1315
|
-
|
|
1334
|
+
warn_at_user_stacklevel(
|
|
1335
|
+
"log_artifact() was called outside of a run.",
|
|
1336
|
+
category=DreadnodeUsageWarning,
|
|
1337
|
+
)
|
|
1338
|
+
return
|
|
1316
1339
|
|
|
1317
1340
|
run.log_artifact(local_uri=local_uri)
|
|
1318
1341
|
|
|
@@ -1350,7 +1373,11 @@ class Dreadnode:
|
|
|
1350
1373
|
|
|
1351
1374
|
target = (task or run) if to == "task-or-run" else run
|
|
1352
1375
|
if target is None:
|
|
1353
|
-
|
|
1376
|
+
warn_at_user_stacklevel(
|
|
1377
|
+
"log_input() was called outside of a task or run.",
|
|
1378
|
+
category=DreadnodeUsageWarning,
|
|
1379
|
+
)
|
|
1380
|
+
return
|
|
1354
1381
|
|
|
1355
1382
|
target.log_input(name, value, label=label, attributes=attributes)
|
|
1356
1383
|
|
|
@@ -1412,9 +1439,11 @@ class Dreadnode:
|
|
|
1412
1439
|
|
|
1413
1440
|
target = (task or run) if to == "task-or-run" else run
|
|
1414
1441
|
if target is None:
|
|
1415
|
-
|
|
1416
|
-
"log_output()
|
|
1442
|
+
warn_at_user_stacklevel(
|
|
1443
|
+
"log_output() was called outside of a task or run.",
|
|
1444
|
+
category=DreadnodeUsageWarning,
|
|
1417
1445
|
)
|
|
1446
|
+
return
|
|
1418
1447
|
|
|
1419
1448
|
target.log_output(name, value, label=label, attributes=attributes)
|
|
1420
1449
|
|
|
@@ -1461,7 +1490,11 @@ class Dreadnode:
|
|
|
1461
1490
|
attributes: Additional attributes to attach to the link.
|
|
1462
1491
|
"""
|
|
1463
1492
|
if (run := current_run_span.get()) is None:
|
|
1464
|
-
|
|
1493
|
+
warn_at_user_stacklevel(
|
|
1494
|
+
"link_objects() was called outside of a run.",
|
|
1495
|
+
category=DreadnodeUsageWarning,
|
|
1496
|
+
)
|
|
1497
|
+
return
|
|
1465
1498
|
|
|
1466
1499
|
origin_hash = run.log_object(origin)
|
|
1467
1500
|
link_hash = run.log_object(link)
|
|
@@ -6,7 +6,6 @@ from datetime import datetime, timezone
|
|
|
6
6
|
import typing_extensions as te
|
|
7
7
|
from logfire._internal.stack_info import warn_at_user_stacklevel
|
|
8
8
|
from logfire._internal.utils import safe_repr
|
|
9
|
-
from opentelemetry.trace import Tracer
|
|
10
9
|
|
|
11
10
|
from dreadnode.types import JsonDict, JsonValue
|
|
12
11
|
|
|
@@ -73,7 +72,11 @@ class Metric:
|
|
|
73
72
|
total = sum(value * weight for _, value, weight in values)
|
|
74
73
|
weight = sum(weight for _, _, weight in values)
|
|
75
74
|
score_attributes = {name: value for name, value, _ in values}
|
|
76
|
-
return cls(
|
|
75
|
+
return cls(
|
|
76
|
+
value=total / weight,
|
|
77
|
+
step=step,
|
|
78
|
+
attributes={**attributes, **score_attributes},
|
|
79
|
+
)
|
|
77
80
|
|
|
78
81
|
def apply_mode(self, mode: MetricAggMode, others: "list[Metric]") -> "Metric":
|
|
79
82
|
"""
|
|
@@ -124,8 +127,6 @@ ScorerCallable = t.Callable[[T], t.Awaitable[ScorerResult]] | t.Callable[[T], Sc
|
|
|
124
127
|
|
|
125
128
|
@dataclass
|
|
126
129
|
class Scorer(t.Generic[T]):
|
|
127
|
-
tracer: Tracer
|
|
128
|
-
|
|
129
130
|
name: str
|
|
130
131
|
"The name of the scorer, used for reporting metrics."
|
|
131
132
|
tags: t.Sequence[str]
|
|
@@ -138,25 +139,27 @@ class Scorer(t.Generic[T]):
|
|
|
138
139
|
"The step value to attach to metrics produced by this Scorer."
|
|
139
140
|
auto_increment_step: bool = False
|
|
140
141
|
"Whether to automatically increment the step for each time this scorer is called."
|
|
142
|
+
catch: bool = False
|
|
143
|
+
"Whether to catch exceptions in the scorer function and return a 0 Metric with error information."
|
|
141
144
|
|
|
142
145
|
@classmethod
|
|
143
146
|
def from_callable(
|
|
144
147
|
cls,
|
|
145
|
-
tracer: Tracer,
|
|
146
148
|
func: "ScorerCallable[T] | Scorer[T]",
|
|
147
149
|
*,
|
|
148
150
|
name: str | None = None,
|
|
149
151
|
tags: t.Sequence[str] | None = None,
|
|
152
|
+
catch: bool = False,
|
|
150
153
|
**attributes: t.Any,
|
|
151
154
|
) -> "Scorer[T]":
|
|
152
155
|
"""
|
|
153
156
|
Create a scorer from a callable function.
|
|
154
157
|
|
|
155
158
|
Args:
|
|
156
|
-
tracer: The tracer to use for reporting metrics.
|
|
157
159
|
func: The function to call to get the metric.
|
|
158
160
|
name: The name of the scorer, used for reporting metrics.
|
|
159
161
|
tags: A list of tags to attach to the metric.
|
|
162
|
+
catch: Whether to catch exceptions in the scorer function and return a 0 Metric with error information.
|
|
160
163
|
**attributes: A dictionary of attributes to attach to the metric.
|
|
161
164
|
|
|
162
165
|
Returns:
|
|
@@ -177,11 +180,11 @@ class Scorer(t.Generic[T]):
|
|
|
177
180
|
)
|
|
178
181
|
name = name or func_name
|
|
179
182
|
return cls(
|
|
180
|
-
tracer=tracer,
|
|
181
183
|
name=name,
|
|
182
184
|
tags=tags or [],
|
|
183
185
|
attributes=attributes or {},
|
|
184
186
|
func=func,
|
|
187
|
+
catch=catch,
|
|
185
188
|
)
|
|
186
189
|
|
|
187
190
|
def __post_init__(self) -> None:
|
|
@@ -196,13 +199,13 @@ class Scorer(t.Generic[T]):
|
|
|
196
199
|
A new Scorer.
|
|
197
200
|
"""
|
|
198
201
|
return Scorer(
|
|
199
|
-
tracer=self.tracer,
|
|
200
202
|
name=self.name,
|
|
201
203
|
tags=self.tags,
|
|
202
204
|
attributes=self.attributes,
|
|
203
205
|
func=self.func,
|
|
204
206
|
step=self.step,
|
|
205
207
|
auto_increment_step=self.auto_increment_step,
|
|
208
|
+
catch=self.catch,
|
|
206
209
|
)
|
|
207
210
|
|
|
208
211
|
async def __call__(self, object: T) -> Metric:
|
|
@@ -217,17 +220,19 @@ class Scorer(t.Generic[T]):
|
|
|
217
220
|
Returns:
|
|
218
221
|
A Metric object.
|
|
219
222
|
"""
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
with Span(
|
|
223
|
-
name=self.name,
|
|
224
|
-
tags=self.tags,
|
|
225
|
-
attributes=self.attributes,
|
|
226
|
-
tracer=self.tracer,
|
|
227
|
-
):
|
|
223
|
+
try:
|
|
228
224
|
metric = self.func(object)
|
|
229
225
|
if inspect.isawaitable(metric):
|
|
230
226
|
metric = await metric
|
|
227
|
+
except Exception as exc:
|
|
228
|
+
if not self.catch:
|
|
229
|
+
raise
|
|
230
|
+
|
|
231
|
+
warn_at_user_stacklevel(
|
|
232
|
+
f"Error executing scorer {self.name!r} for object {object!r}: {exc}",
|
|
233
|
+
MetricWarning,
|
|
234
|
+
)
|
|
235
|
+
metric = Metric(value=0.0, step=self.step, attributes={"error": str(exc)})
|
|
231
236
|
|
|
232
237
|
if not isinstance(metric, Metric):
|
|
233
238
|
metric = Metric(
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
import typing as t
|
|
2
|
+
from dataclasses import dataclass
|
|
3
|
+
|
|
4
|
+
from pydantic import BaseModel, Field
|
|
5
|
+
|
|
6
|
+
from dreadnode.types import AnyDict
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@dataclass
|
|
10
|
+
class ObjectRef:
|
|
11
|
+
name: str
|
|
12
|
+
label: str
|
|
13
|
+
hash: str
|
|
14
|
+
attributes: AnyDict | None
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class ObjectUri(BaseModel):
|
|
18
|
+
hash: str
|
|
19
|
+
schema_hash: str
|
|
20
|
+
uri: str
|
|
21
|
+
size: int
|
|
22
|
+
type: t.Literal["uri"] = "uri"
|
|
23
|
+
|
|
24
|
+
# During execution, we might want to dynamically pull a value
|
|
25
|
+
# in it's unserialized form, so we store it here.
|
|
26
|
+
runtime_value: t.Any | None = Field(None, init=False, repr=False, exclude=True)
|
|
27
|
+
|
|
28
|
+
@property
|
|
29
|
+
def value(self) -> t.Any:
|
|
30
|
+
return self.runtime_value or self.uri
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class ObjectVal(BaseModel):
|
|
34
|
+
hash: str
|
|
35
|
+
schema_hash: str
|
|
36
|
+
value_: t.Any = Field(alias="value")
|
|
37
|
+
type: t.Literal["val"] = "val"
|
|
38
|
+
|
|
39
|
+
# During execution, we might want to dynamically pull a value
|
|
40
|
+
# in it's unserialized form, so we store it here.
|
|
41
|
+
runtime_value: t.Any | None = Field(None, init=False, repr=False, exclude=True)
|
|
42
|
+
|
|
43
|
+
@property
|
|
44
|
+
def value(self) -> t.Any:
|
|
45
|
+
return self.runtime_value or self.value_
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
Object = ObjectUri | ObjectVal
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
from dreadnode.scorers.consistency import character_consistency
|
|
2
|
+
from dreadnode.scorers.contains import (
|
|
3
|
+
contains,
|
|
4
|
+
detect_ansi_escapes,
|
|
5
|
+
detect_refusal,
|
|
6
|
+
detect_sensitive_keywords,
|
|
7
|
+
detect_unsafe_shell_content,
|
|
8
|
+
)
|
|
9
|
+
from dreadnode.scorers.length import length_in_range, length_ratio, length_target
|
|
10
|
+
from dreadnode.scorers.pii import detect_pii, detect_pii_with_presidio
|
|
11
|
+
from dreadnode.scorers.readability import readability
|
|
12
|
+
from dreadnode.scorers.rigging import wrap_chat
|
|
13
|
+
from dreadnode.scorers.sentiment import sentiment, sentiment_with_perspective
|
|
14
|
+
from dreadnode.scorers.similarity import bleu, semantic_similarity, similarity
|
|
15
|
+
|
|
16
|
+
__all__ = [
|
|
17
|
+
"bleu",
|
|
18
|
+
"character_consistency",
|
|
19
|
+
"contains",
|
|
20
|
+
"detect_ansi_escapes",
|
|
21
|
+
"detect_pii",
|
|
22
|
+
"detect_pii_with_presidio",
|
|
23
|
+
"detect_refusal",
|
|
24
|
+
"detect_sensitive_keywords",
|
|
25
|
+
"detect_unsafe_shell_content",
|
|
26
|
+
"length_in_range",
|
|
27
|
+
"length_ratio",
|
|
28
|
+
"length_target",
|
|
29
|
+
"readability",
|
|
30
|
+
"semantic_similarity",
|
|
31
|
+
"sentiment",
|
|
32
|
+
"sentiment_with_perspective",
|
|
33
|
+
"similarity",
|
|
34
|
+
"wrap_chat",
|
|
35
|
+
]
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
import re
|
|
2
|
+
import typing as t
|
|
3
|
+
|
|
4
|
+
from dreadnode.metric import Metric, Scorer
|
|
5
|
+
from dreadnode.task import TaskInput
|
|
6
|
+
from dreadnode.util import clean_str
|
|
7
|
+
|
|
8
|
+
if t.TYPE_CHECKING:
|
|
9
|
+
from dreadnode.types import JsonDict
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def character_consistency(
|
|
13
|
+
reference: str | TaskInput,
|
|
14
|
+
*,
|
|
15
|
+
max_ratio_diff: float = 2.0,
|
|
16
|
+
name: str | None = None,
|
|
17
|
+
) -> "Scorer[t.Any]":
|
|
18
|
+
"""
|
|
19
|
+
Scores character type consistency between the data and a reference text.
|
|
20
|
+
|
|
21
|
+
It compares the ratio of letters, numbers, and symbols in both texts.
|
|
22
|
+
A score of 1.0 indicates identical distributions.
|
|
23
|
+
|
|
24
|
+
Args:
|
|
25
|
+
reference: The reference text (e.g., the prompt) or a TaskInput.
|
|
26
|
+
max_ratio_diff: The denominator for normalizing ratio differences.
|
|
27
|
+
name: Name of the scorer.
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
def _analyze_text(text: str) -> dict[str, int]:
|
|
31
|
+
return {
|
|
32
|
+
"letters": len(re.findall(r"[a-zA-Z]", text)),
|
|
33
|
+
"numbers": len(re.findall(r"\d", text)),
|
|
34
|
+
"symbols": len(re.findall(r"[^\w\s]", text)),
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
def evaluate(data: t.Any) -> Metric:
|
|
38
|
+
candidate_text = str(data)
|
|
39
|
+
reference_text = str(reference.resolve()) if isinstance(reference, TaskInput) else reference
|
|
40
|
+
|
|
41
|
+
candidate_chars = _analyze_text(candidate_text)
|
|
42
|
+
reference_chars = _analyze_text(reference_text)
|
|
43
|
+
|
|
44
|
+
candidate_total = sum(candidate_chars.values())
|
|
45
|
+
reference_total = sum(reference_chars.values())
|
|
46
|
+
|
|
47
|
+
if reference_total == 0 or candidate_total == 0:
|
|
48
|
+
return Metric(value=0.0, attributes={"error": "Reference or candidate text is empty."})
|
|
49
|
+
|
|
50
|
+
scores: dict[str, float] = {}
|
|
51
|
+
metadata: JsonDict = {}
|
|
52
|
+
for char_type in ["letters", "numbers", "symbols"]:
|
|
53
|
+
ref_ratio = reference_chars[char_type] / reference_total
|
|
54
|
+
cand_ratio = candidate_chars[char_type] / candidate_total
|
|
55
|
+
diff = abs(ref_ratio - cand_ratio)
|
|
56
|
+
score = max(0.0, 1.0 - (diff / max_ratio_diff))
|
|
57
|
+
scores[char_type] = score
|
|
58
|
+
metadata[f"{char_type}_ratio_diff"] = round(diff, 4)
|
|
59
|
+
|
|
60
|
+
return Metric.from_many([(name, score, 1.0) for name, score in scores.items()])
|
|
61
|
+
|
|
62
|
+
if name is None:
|
|
63
|
+
ref_name = reference.name if isinstance(reference, TaskInput) else "static_text"
|
|
64
|
+
name = f"char_consistency_{clean_str(ref_name)}"
|
|
65
|
+
|
|
66
|
+
return Scorer.from_callable(evaluate, name=name)
|