dyff-schema 0.22.0__tar.gz → 0.24.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dyff-schema might be problematic. Click here for more details.
- {dyff_schema-0.22.0/dyff_schema.egg-info → dyff_schema-0.24.0}/PKG-INFO +1 -1
- {dyff_schema-0.22.0 → dyff_schema-0.24.0}/dyff/schema/v0/r1/adapters.py +320 -54
- {dyff_schema-0.22.0 → dyff_schema-0.24.0}/dyff/schema/v0/r1/platform.py +52 -6
- {dyff_schema-0.22.0 → dyff_schema-0.24.0/dyff_schema.egg-info}/PKG-INFO +1 -1
- {dyff_schema-0.22.0 → dyff_schema-0.24.0}/dyff_schema.egg-info/SOURCES.txt +1 -0
- dyff_schema-0.24.0/tests/test_adapters.py +368 -0
- {dyff_schema-0.22.0 → dyff_schema-0.24.0}/.gitignore +0 -0
- {dyff_schema-0.22.0 → dyff_schema-0.24.0}/.gitlab-ci.yml +0 -0
- {dyff_schema-0.22.0 → dyff_schema-0.24.0}/.licenserc.yaml +0 -0
- {dyff_schema-0.22.0 → dyff_schema-0.24.0}/.pre-commit-config.yaml +0 -0
- {dyff_schema-0.22.0 → dyff_schema-0.24.0}/.prettierignore +0 -0
- {dyff_schema-0.22.0 → dyff_schema-0.24.0}/.secrets.baseline +0 -0
- {dyff_schema-0.22.0 → dyff_schema-0.24.0}/CODE_OF_CONDUCT.md +0 -0
- {dyff_schema-0.22.0 → dyff_schema-0.24.0}/LICENSE +0 -0
- {dyff_schema-0.22.0 → dyff_schema-0.24.0}/NOTICE +0 -0
- {dyff_schema-0.22.0 → dyff_schema-0.24.0}/README.md +0 -0
- {dyff_schema-0.22.0 → dyff_schema-0.24.0}/dyff/schema/__init__.py +0 -0
- {dyff_schema-0.22.0 → dyff_schema-0.24.0}/dyff/schema/adapters.py +0 -0
- {dyff_schema-0.22.0 → dyff_schema-0.24.0}/dyff/schema/annotations.py +0 -0
- {dyff_schema-0.22.0 → dyff_schema-0.24.0}/dyff/schema/base.py +0 -0
- {dyff_schema-0.22.0 → dyff_schema-0.24.0}/dyff/schema/copydoc.py +0 -0
- {dyff_schema-0.22.0 → dyff_schema-0.24.0}/dyff/schema/dataset/__init__.py +0 -0
- {dyff_schema-0.22.0 → dyff_schema-0.24.0}/dyff/schema/dataset/arrow.py +0 -0
- {dyff_schema-0.22.0 → dyff_schema-0.24.0}/dyff/schema/dataset/binary.py +0 -0
- {dyff_schema-0.22.0 → dyff_schema-0.24.0}/dyff/schema/dataset/classification.py +0 -0
- {dyff_schema-0.22.0 → dyff_schema-0.24.0}/dyff/schema/dataset/embedding.py +0 -0
- {dyff_schema-0.22.0 → dyff_schema-0.24.0}/dyff/schema/dataset/text.py +0 -0
- {dyff_schema-0.22.0 → dyff_schema-0.24.0}/dyff/schema/dataset/vision.py +0 -0
- {dyff_schema-0.22.0 → dyff_schema-0.24.0}/dyff/schema/errors.py +0 -0
- {dyff_schema-0.22.0 → dyff_schema-0.24.0}/dyff/schema/ids.py +0 -0
- {dyff_schema-0.22.0 → dyff_schema-0.24.0}/dyff/schema/io/__init__.py +0 -0
- {dyff_schema-0.22.0 → dyff_schema-0.24.0}/dyff/schema/io/vllm.py +0 -0
- {dyff_schema-0.22.0 → dyff_schema-0.24.0}/dyff/schema/platform.py +0 -0
- {dyff_schema-0.22.0 → dyff_schema-0.24.0}/dyff/schema/py.typed +0 -0
- {dyff_schema-0.22.0 → dyff_schema-0.24.0}/dyff/schema/quantity.py +0 -0
- {dyff_schema-0.22.0 → dyff_schema-0.24.0}/dyff/schema/requests.py +0 -0
- {dyff_schema-0.22.0 → dyff_schema-0.24.0}/dyff/schema/test.py +0 -0
- {dyff_schema-0.22.0 → dyff_schema-0.24.0}/dyff/schema/v0/__init__.py +0 -0
- {dyff_schema-0.22.0 → dyff_schema-0.24.0}/dyff/schema/v0/r1/__init__.py +0 -0
- {dyff_schema-0.22.0 → dyff_schema-0.24.0}/dyff/schema/v0/r1/base.py +0 -0
- {dyff_schema-0.22.0 → dyff_schema-0.24.0}/dyff/schema/v0/r1/dataset/__init__.py +0 -0
- {dyff_schema-0.22.0 → dyff_schema-0.24.0}/dyff/schema/v0/r1/dataset/arrow.py +0 -0
- {dyff_schema-0.22.0 → dyff_schema-0.24.0}/dyff/schema/v0/r1/dataset/binary.py +0 -0
- {dyff_schema-0.22.0 → dyff_schema-0.24.0}/dyff/schema/v0/r1/dataset/classification.py +0 -0
- {dyff_schema-0.22.0 → dyff_schema-0.24.0}/dyff/schema/v0/r1/dataset/embedding.py +0 -0
- {dyff_schema-0.22.0 → dyff_schema-0.24.0}/dyff/schema/v0/r1/dataset/text.py +0 -0
- {dyff_schema-0.22.0 → dyff_schema-0.24.0}/dyff/schema/v0/r1/dataset/vision.py +0 -0
- {dyff_schema-0.22.0 → dyff_schema-0.24.0}/dyff/schema/v0/r1/io/__init__.py +0 -0
- {dyff_schema-0.22.0 → dyff_schema-0.24.0}/dyff/schema/v0/r1/io/vllm.py +0 -0
- {dyff_schema-0.22.0 → dyff_schema-0.24.0}/dyff/schema/v0/r1/requests.py +0 -0
- {dyff_schema-0.22.0 → dyff_schema-0.24.0}/dyff/schema/v0/r1/test.py +0 -0
- {dyff_schema-0.22.0 → dyff_schema-0.24.0}/dyff/schema/v0/r1/version.py +0 -0
- {dyff_schema-0.22.0 → dyff_schema-0.24.0}/dyff/schema/version.py +0 -0
- {dyff_schema-0.22.0 → dyff_schema-0.24.0}/dyff_schema.egg-info/dependency_links.txt +0 -0
- {dyff_schema-0.22.0 → dyff_schema-0.24.0}/dyff_schema.egg-info/requires.txt +0 -0
- {dyff_schema-0.22.0 → dyff_schema-0.24.0}/dyff_schema.egg-info/top_level.txt +0 -0
- {dyff_schema-0.22.0 → dyff_schema-0.24.0}/makefile +0 -0
- {dyff_schema-0.22.0 → dyff_schema-0.24.0}/pyproject.toml +0 -0
- {dyff_schema-0.22.0 → dyff_schema-0.24.0}/setup.cfg +0 -0
- {dyff_schema-0.22.0 → dyff_schema-0.24.0}/tests/test_import.py +0 -0
|
@@ -6,14 +6,21 @@ from __future__ import annotations
|
|
|
6
6
|
|
|
7
7
|
import functools
|
|
8
8
|
import json
|
|
9
|
-
|
|
9
|
+
import operator
|
|
10
|
+
import re
|
|
11
|
+
from typing import Any, Callable, Iterable, Literal, NamedTuple, Protocol, Type
|
|
10
12
|
|
|
11
13
|
import jsonpath_ng as jsonpath
|
|
12
14
|
from jsonpath_ng.exceptions import JSONPathError
|
|
15
|
+
from jsonpath_ng.ext.parser import parse as jsonpath_parse_ext
|
|
13
16
|
|
|
14
17
|
from dyff.schema.platform import SchemaAdapter
|
|
15
18
|
|
|
16
19
|
|
|
20
|
+
def _json_deep_copy(data):
|
|
21
|
+
return json.loads(json.dumps(data))
|
|
22
|
+
|
|
23
|
+
|
|
17
24
|
def map_structure(fn, data):
|
|
18
25
|
"""Given a JSON data structure ``data``, create a new data structure instance with
|
|
19
26
|
the same shape as ``data`` by applying ``fn`` to each "leaf" value in the nested
|
|
@@ -70,90 +77,346 @@ class Adapter(Protocol):
|
|
|
70
77
|
raise NotImplementedError()
|
|
71
78
|
|
|
72
79
|
|
|
80
|
+
class _Literal:
|
|
81
|
+
def __init__(self, value):
|
|
82
|
+
self.value = value
|
|
83
|
+
|
|
84
|
+
def __call__(self, x):
|
|
85
|
+
return self.value
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
class _Func_findall:
|
|
89
|
+
def __init__(self, *, pattern: str, flags: int = 0):
|
|
90
|
+
self.pattern = pattern
|
|
91
|
+
self.flags = flags
|
|
92
|
+
|
|
93
|
+
def __call__(self, x) -> list[str]:
|
|
94
|
+
return re.findall(self.pattern, x, self.flags)
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
class _Func_join:
|
|
98
|
+
def __init__(self, *, separator: str = ""):
|
|
99
|
+
self._separator = separator
|
|
100
|
+
|
|
101
|
+
def __call__(self, x: list[str]) -> str:
|
|
102
|
+
return self._separator.join(x)
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
class _Func_list:
|
|
106
|
+
def __call__(self, x) -> list:
|
|
107
|
+
return list(x)
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
class _Func_reduce:
|
|
111
|
+
def __call__(self, x):
|
|
112
|
+
return functools.reduce(operator.add, x)
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
class _Func_search:
|
|
116
|
+
def __init__(
|
|
117
|
+
self,
|
|
118
|
+
*,
|
|
119
|
+
pattern: str,
|
|
120
|
+
flags: int = 0,
|
|
121
|
+
group: int = 0,
|
|
122
|
+
default: str | None = None,
|
|
123
|
+
):
|
|
124
|
+
self.pattern = pattern
|
|
125
|
+
self.flags = flags
|
|
126
|
+
self.group = group
|
|
127
|
+
self.default = default
|
|
128
|
+
|
|
129
|
+
def __call__(self, x) -> str | None:
|
|
130
|
+
m = re.search(self.pattern, x, self.flags)
|
|
131
|
+
return self.default if m is None else m.group(self.group)
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
class _Func_split:
|
|
135
|
+
def __init__(self, *, pattern: str, maxsplit: int = 0, flags: int = 0):
|
|
136
|
+
self.pattern = pattern
|
|
137
|
+
self.maxsplit = maxsplit
|
|
138
|
+
self.flags = flags
|
|
139
|
+
|
|
140
|
+
def __call__(self, x) -> list[str]:
|
|
141
|
+
return re.split(self.pattern, x, self.maxsplit, self.flags)
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
class _Func_sub:
|
|
145
|
+
def __init__(self, *, pattern: str, repl: str, count: int = 0, flags: int = 0):
|
|
146
|
+
self.pattern = pattern
|
|
147
|
+
self.repl = repl
|
|
148
|
+
self.count = count
|
|
149
|
+
self.flags = flags
|
|
150
|
+
|
|
151
|
+
def __call__(self, x) -> str:
|
|
152
|
+
return re.sub(self.pattern, self.repl, x, self.count, self.flags)
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
class _Value_jsonpath:
|
|
156
|
+
def __init__(self, expr, *, kind: Literal["scalar", "list"] = "scalar"):
|
|
157
|
+
self._expr: jsonpath.JSONPath = jsonpath.parse(expr)
|
|
158
|
+
self._kind = kind
|
|
159
|
+
|
|
160
|
+
def __call__(self, x):
|
|
161
|
+
results = self._expr.find(x)
|
|
162
|
+
if self._kind == "list":
|
|
163
|
+
return [result.value for result in results]
|
|
164
|
+
elif self._kind == "scalar":
|
|
165
|
+
if len(results) == 0:
|
|
166
|
+
raise ValueError(f"no match for '{self._expr}' in '{x}'")
|
|
167
|
+
elif len(results) > 1:
|
|
168
|
+
raise ValueError(f"multiple results for '{self._expr}' in '{x}'")
|
|
169
|
+
return results[0].value
|
|
170
|
+
else:
|
|
171
|
+
raise AssertionError(f"kind {self._kind}")
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
class _Value_list:
|
|
175
|
+
def __init__(self, exprs: list[Callable]):
|
|
176
|
+
self._exprs = exprs
|
|
177
|
+
|
|
178
|
+
def __call__(self, x) -> list:
|
|
179
|
+
return [e(x) for e in self._exprs]
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
def _maybe_value_expr(expr: dict) -> Callable | None:
|
|
183
|
+
kinds = ["$literal", "$scalar", "$list"]
|
|
184
|
+
maybe_exprs = {k: expr.get(k) for k in kinds}
|
|
185
|
+
just_exprs = [k for k in kinds if maybe_exprs[k] is not None]
|
|
186
|
+
if len(just_exprs) == 0:
|
|
187
|
+
return None
|
|
188
|
+
if len(just_exprs) > 1:
|
|
189
|
+
raise ValueError(f"must specify exactly one of {kinds}: got {just_exprs}")
|
|
190
|
+
|
|
191
|
+
# remove sigil
|
|
192
|
+
kind: Literal["literal", "scalar", "list"] = just_exprs[0][1:] # type: ignore
|
|
193
|
+
value = maybe_exprs[just_exprs[0]]
|
|
194
|
+
if kind == "literal":
|
|
195
|
+
return _Literal(value)
|
|
196
|
+
|
|
197
|
+
op: Callable = _Literal(value)
|
|
198
|
+
if isinstance(value, str):
|
|
199
|
+
if value.startswith("$"):
|
|
200
|
+
if value.startswith("$$"):
|
|
201
|
+
# Literal string -- remove "escape" character
|
|
202
|
+
op = _Literal(value[1:])
|
|
203
|
+
else:
|
|
204
|
+
op = _Value_jsonpath(value, kind=kind)
|
|
205
|
+
elif kind == "list" and isinstance(value, list):
|
|
206
|
+
exprs = [_maybe_value_expr(e) for e in value]
|
|
207
|
+
if any(e is None for e in exprs):
|
|
208
|
+
raise ValueError("$list elements must be value expressions")
|
|
209
|
+
op = _Value_list(exprs) # type: ignore
|
|
210
|
+
if isinstance(op, _Literal) and kind != "literal":
|
|
211
|
+
raise ValueError("must use $literal when providing a literal value")
|
|
212
|
+
return op
|
|
213
|
+
|
|
214
|
+
|
|
215
|
+
class _LeafExpression:
|
|
216
|
+
FUNCTIONS = {
|
|
217
|
+
"findall": _Func_findall,
|
|
218
|
+
"join": _Func_join,
|
|
219
|
+
"list": _Func_list,
|
|
220
|
+
"reduce": _Func_reduce,
|
|
221
|
+
"search": _Func_search,
|
|
222
|
+
"split": _Func_split,
|
|
223
|
+
"sub": _Func_sub,
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
def __init__(self, pipeline: dict | list[dict]):
|
|
227
|
+
if isinstance(pipeline, dict):
|
|
228
|
+
pipeline = [pipeline]
|
|
229
|
+
|
|
230
|
+
self._compiled_pipeline: list[Callable] = []
|
|
231
|
+
for step in pipeline:
|
|
232
|
+
if (value_op := _maybe_value_expr(step)) is not None:
|
|
233
|
+
self._compiled_pipeline.append(value_op)
|
|
234
|
+
elif (func := step.pop("$func", None)) is not None:
|
|
235
|
+
self._compiled_pipeline.append(_LeafExpression.FUNCTIONS[func](**step))
|
|
236
|
+
else:
|
|
237
|
+
raise ValueError(f"invalid $compute step: {step}")
|
|
238
|
+
|
|
239
|
+
def __call__(self, x):
|
|
240
|
+
output = None
|
|
241
|
+
for i, step in enumerate(self._compiled_pipeline):
|
|
242
|
+
if i == 0:
|
|
243
|
+
output = step(x)
|
|
244
|
+
else:
|
|
245
|
+
output = step(output)
|
|
246
|
+
return output
|
|
247
|
+
|
|
248
|
+
|
|
73
249
|
class TransformJSON:
|
|
74
|
-
"""
|
|
75
|
-
|
|
250
|
+
"""Create a new JSON structure where the "leaf" values are populated by the results
|
|
251
|
+
of transformation functions applied to the input.
|
|
252
|
+
|
|
253
|
+
The "value" for each leaf can be::
|
|
254
|
+
|
|
255
|
+
1. A JSON literal value, or
|
|
256
|
+
2. The result of a jsonpath query on the input structure, or
|
|
257
|
+
3. The result of a computation pipeline starting from (1) or (2).
|
|
76
258
|
|
|
77
|
-
|
|
78
|
-
|
|
259
|
+
To distinguish the specifications of leaf values from the specification of
|
|
260
|
+
the output structure, we apply the following rules::
|
|
79
261
|
|
|
80
|
-
|
|
262
|
+
1. Composite values (``list`` and ``dict``) specify the structure of
|
|
263
|
+
the output.
|
|
264
|
+
2. Scalar values are output as-is, unless they are strings containing
|
|
265
|
+
JSONPath queries.
|
|
266
|
+
3. JSONPath queries are strings beginning with '$'. They are replaced
|
|
267
|
+
by the result of the query.
|
|
268
|
+
4. A ``dict`` containing the special key ``"$compute"`` introduces a
|
|
269
|
+
"compute context", which computes a leaf value from the input data.
|
|
270
|
+
Descendents of this key have "compute context semantics", which are
|
|
271
|
+
different from the "normal" semantics.
|
|
272
|
+
|
|
273
|
+
For example, if the ``configuration`` is::
|
|
81
274
|
|
|
82
275
|
{
|
|
83
276
|
"id": "$.object.id",
|
|
84
277
|
"name": "literal",
|
|
85
278
|
"children": {"left": "$.list[0]", "right": "$.list[1]"}
|
|
279
|
+
"characters": {
|
|
280
|
+
"letters": {
|
|
281
|
+
"$compute": [
|
|
282
|
+
{"$scalar": "$.object.id"},
|
|
283
|
+
{
|
|
284
|
+
"$func": "sub",
|
|
285
|
+
"pattern": "[A-Za-z]",
|
|
286
|
+
"repl": "",
|
|
287
|
+
},
|
|
288
|
+
{"$func": "list"}
|
|
289
|
+
]
|
|
290
|
+
}
|
|
291
|
+
}
|
|
86
292
|
}
|
|
87
293
|
|
|
88
294
|
and the data is::
|
|
89
295
|
|
|
90
296
|
{
|
|
91
|
-
"object": {"id":
|
|
297
|
+
"object": {"id": "abc123", "name": "spam"},
|
|
92
298
|
"list": [1, 2]
|
|
93
299
|
}
|
|
94
300
|
|
|
95
|
-
Then applying the
|
|
301
|
+
Then applying the transformation to the data will result in the new structure::
|
|
96
302
|
|
|
97
303
|
{
|
|
98
|
-
"id":
|
|
304
|
+
"id": "abc123",
|
|
99
305
|
"name": "literal",
|
|
100
|
-
"children: {"left": 1, "right": 2}
|
|
306
|
+
"children: {"left": 1, "right": 2},
|
|
307
|
+
"characters": {
|
|
308
|
+
"letters": ["a", "b", "c"]
|
|
309
|
+
}
|
|
101
310
|
}
|
|
102
311
|
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
312
|
+
The ``.characters.letters`` field was derived by::
|
|
313
|
+
|
|
314
|
+
1. Extracting the value of the ``.object.id`` field in the input
|
|
315
|
+
2. Applying ``re.sub(r"[A-Za-z]", "", _)`` to the result of (1)
|
|
316
|
+
3. Applying ``list(_)`` to the result of (2)
|
|
317
|
+
|
|
318
|
+
Notice that descendents of the ``$compute`` key no longer describe the
|
|
319
|
+
structure of the output, but instead describe steps of the computation.
|
|
320
|
+
The value of ``"$compute"`` can be either an object or a list of objects.
|
|
321
|
+
A list is interpreted as a "pipeline" where each step is applied to the
|
|
322
|
+
output of the previous step.
|
|
323
|
+
|
|
324
|
+
Implicit queries
|
|
325
|
+
================
|
|
326
|
+
|
|
327
|
+
Outside of the ``$compute`` context, string values that start with a ``$``
|
|
328
|
+
character are interpreted as jsonpath queries. Queries in this context must
|
|
329
|
+
return **exactly one value**, otherwise a ``ValueError`` will be raised.
|
|
330
|
+
This is because when multiple values are returned, there's no way to
|
|
331
|
+
distinguish a scalar-valued query that found 1 scalar from a list-valued
|
|
332
|
+
query that found a list with 1 element. In the ``$compute`` context, you
|
|
333
|
+
can specify which semantics you want.
|
|
334
|
+
|
|
335
|
+
If you need a literal string that starts with the '$' character, escape it
|
|
336
|
+
with a second '$', e.g., "$$PATH" will appear as the literal string "$PATH"
|
|
337
|
+
in the output. This works for both keys and values, e.g.,
|
|
338
|
+
``{"$$key": "$$value"}`` outputs ``{"$key": "$value"}``. All keys that
|
|
339
|
+
begin with ``$`` are reserved, and you must always escape them.
|
|
340
|
+
|
|
341
|
+
The $compute context
|
|
342
|
+
====================
|
|
343
|
+
|
|
344
|
+
A ``$compute`` context is introduced by a ``dict`` that contains the key
|
|
345
|
+
``{"$compute": ...}``. Semantics in the ``$compute`` context are different
|
|
346
|
+
from semantics in the "normal" context.
|
|
347
|
+
|
|
348
|
+
$literal vs. $scalar vs. $list
|
|
349
|
+
------------------------------
|
|
350
|
+
|
|
351
|
+
Inside a ``$compute`` context, we distinguish explicitly between literal
|
|
352
|
+
values, jsonpath queries that return scalars, and jsonpath queries that
|
|
353
|
+
return lists. You specify which semantics you intend by using
|
|
354
|
+
``{"$literal": [1, 2]}``, ``{"$scalar": "$.foo"}``, or ``{"$list": $.foo[*]}``.
|
|
355
|
+
Items with ``$literal`` semantics are **never** interpreted as jsonpath
|
|
356
|
+
queries, even if they start with ``$``. In the ``$literal`` context, you
|
|
357
|
+
**should not** escape the leading ``$`` character.
|
|
358
|
+
|
|
359
|
+
A ``$scalar`` query has the same semantiics as a jsonpath query outside
|
|
360
|
+
of the ``$compute`` context, i.e., it must return exactly 1 item.
|
|
361
|
+
A ``$list`` query will return a list, which can be empty. Scalar-valued
|
|
362
|
+
queries in a ``$list`` context will return a list with 1 element.
|
|
363
|
+
|
|
364
|
+
$func
|
|
365
|
+
-----
|
|
366
|
+
|
|
367
|
+
You use blocks with a ``$func`` key to specify computation steps. The
|
|
368
|
+
available functions are: ``findall``, ``join``, ``list``, ``reduce``,
|
|
369
|
+
``search``, ``split``, ``sub``. These behave the same way as the
|
|
370
|
+
corresponding functions from the Python standard library::
|
|
371
|
+
|
|
372
|
+
* ``findall``, ``search``, ``split``, and ``sub`` are from the
|
|
373
|
+
``re`` module.
|
|
374
|
+
* ``reduce`` uses the ``+`` operator with no starting value; it will
|
|
375
|
+
raise an error if called on an empty list.
|
|
376
|
+
|
|
377
|
+
All of these functions take named parameters with the same names and
|
|
378
|
+
semantics as their parameters in Python.
|
|
110
379
|
"""
|
|
111
380
|
|
|
112
381
|
def __init__(self, configuration: dict):
|
|
113
|
-
"""
|
|
114
|
-
Parameters:
|
|
115
|
-
``output_structure``: A JSON object where all the "leaf" values
|
|
116
|
-
are strings containing jsonpath queries.
|
|
117
|
-
"""
|
|
118
382
|
if configuration != json.loads(json.dumps(configuration)):
|
|
119
383
|
raise ValueError("configuration is not valid JSON")
|
|
120
|
-
self.
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
384
|
+
self.configuration = configuration
|
|
385
|
+
self._transformation = self._compile(self.configuration)
|
|
386
|
+
|
|
387
|
+
def _compile(self, x) -> Callable | list | dict:
|
|
388
|
+
if isinstance(x, dict):
|
|
389
|
+
if (compute := x.get("$compute")) is not None:
|
|
390
|
+
if len(x) != 1:
|
|
391
|
+
raise ValueError("$compute must be the only key in the dict")
|
|
392
|
+
return _LeafExpression(compute)
|
|
393
|
+
else:
|
|
394
|
+
# Escape '$' in dict keys
|
|
395
|
+
d: dict[str, Any] = {}
|
|
396
|
+
for k, v in x.items():
|
|
397
|
+
if k.startswith("$"):
|
|
398
|
+
if k.startswith("$$"):
|
|
399
|
+
k = k[1:]
|
|
400
|
+
else:
|
|
401
|
+
raise ValueError(
|
|
402
|
+
f"dict key '{k}': keys beginning with '$' are reserved; use '$$' to escape"
|
|
403
|
+
)
|
|
404
|
+
d[k] = self._compile(v)
|
|
405
|
+
return d
|
|
406
|
+
elif isinstance(x, list):
|
|
407
|
+
return [self._compile(y) for y in x]
|
|
408
|
+
elif isinstance(x, str):
|
|
132
409
|
if x.startswith("$"):
|
|
133
410
|
if x.startswith("$$"):
|
|
134
411
|
# Literal string -- remove "escape" character
|
|
135
|
-
return x[1:]
|
|
412
|
+
return _Literal(x[1:])
|
|
136
413
|
else:
|
|
137
|
-
return
|
|
138
|
-
return x
|
|
414
|
+
return _Value_jsonpath(x, kind="scalar")
|
|
415
|
+
return _Literal(x)
|
|
139
416
|
|
|
140
417
|
def __call__(self, stream: Iterable[dict]) -> Iterable[dict]:
|
|
141
|
-
def query(data, expr):
|
|
142
|
-
if not isinstance(expr, jsonpath.JSONPath):
|
|
143
|
-
# Literal
|
|
144
|
-
return expr
|
|
145
|
-
results = expr.find(data)
|
|
146
|
-
if len(results) == 0:
|
|
147
|
-
raise ValueError(f"no match for {expr}")
|
|
148
|
-
elif len(results) > 1:
|
|
149
|
-
raise ValueError(f"multiple results for {expr}")
|
|
150
|
-
return results[0].value
|
|
151
|
-
|
|
152
418
|
for item in stream:
|
|
153
|
-
|
|
154
|
-
lambda expr: query(item, expr), self._expressions
|
|
155
|
-
)
|
|
156
|
-
yield transformed
|
|
419
|
+
yield map_structure(lambda compute: compute(item), self._transformation)
|
|
157
420
|
|
|
158
421
|
|
|
159
422
|
class EmbedIndex:
|
|
@@ -573,13 +836,16 @@ def _test():
|
|
|
573
836
|
print(list(transformer([data])))
|
|
574
837
|
|
|
575
838
|
transformer = TransformJSON(
|
|
576
|
-
{
|
|
839
|
+
{
|
|
840
|
+
"id": "$.object.id",
|
|
841
|
+
"children": {"left": "$.list[0]", "right": "$.list[1]"},
|
|
842
|
+
}
|
|
577
843
|
)
|
|
578
844
|
print(
|
|
579
845
|
list(
|
|
580
846
|
transformer(
|
|
581
847
|
[
|
|
582
|
-
{"object": {"id":
|
|
848
|
+
{"object": {"id": "abc123", "name": "spam"}, "list": [1, 2]},
|
|
583
849
|
]
|
|
584
850
|
)
|
|
585
851
|
)
|
|
@@ -1226,6 +1226,7 @@ class InferenceServiceRunner(DyffSchemaBaseModel):
|
|
|
1226
1226
|
|
|
1227
1227
|
nodes: int = pydantic.Field(
|
|
1228
1228
|
default=1,
|
|
1229
|
+
ge=1,
|
|
1229
1230
|
description="Number of nodes. The resource specs apply to *each node*.",
|
|
1230
1231
|
)
|
|
1231
1232
|
|
|
@@ -1258,6 +1259,7 @@ class InferenceServiceBase(DyffSchemaBaseModel):
|
|
|
1258
1259
|
description="Configuration of the Builder used to build the service.",
|
|
1259
1260
|
)
|
|
1260
1261
|
|
|
1262
|
+
# FIXME: (DYFF-261) .runner should be required
|
|
1261
1263
|
runner: Optional[InferenceServiceRunner] = pydantic.Field(
|
|
1262
1264
|
default=None, description="Configuration of the Runner used to run the service."
|
|
1263
1265
|
)
|
|
@@ -1316,12 +1318,6 @@ class InferenceSessionBase(DyffSchemaBaseModel):
|
|
|
1316
1318
|
" Note that some accelerator types may not be available in non-spot pods.",
|
|
1317
1319
|
)
|
|
1318
1320
|
|
|
1319
|
-
nodes: int = pydantic.Field(
|
|
1320
|
-
default=1,
|
|
1321
|
-
ge=1,
|
|
1322
|
-
description="Number of nodes. The resource specs apply to *each node*.",
|
|
1323
|
-
)
|
|
1324
|
-
|
|
1325
1321
|
|
|
1326
1322
|
class InferenceSessionSpec(InferenceSessionBase):
|
|
1327
1323
|
inferenceService: ForeignInferenceService = pydantic.Field(
|
|
@@ -1374,6 +1370,50 @@ class TaskSchema(DyffSchemaBaseModel):
|
|
|
1374
1370
|
objective: str
|
|
1375
1371
|
|
|
1376
1372
|
|
|
1373
|
+
class EvaluationClientConfiguration(DyffSchemaBaseModel):
|
|
1374
|
+
badRequestPolicy: Literal["Abort", "Skip"] = pydantic.Field(
|
|
1375
|
+
default="Abort",
|
|
1376
|
+
description="What to do if an inference call raises a 400 Bad Request"
|
|
1377
|
+
" or a similar error that indicates a problem with the input instance."
|
|
1378
|
+
" Abort (default): the evaluation fails immediately."
|
|
1379
|
+
" Skip: output None for the bad instance and continue.",
|
|
1380
|
+
)
|
|
1381
|
+
|
|
1382
|
+
transientErrorRetryLimit: int = pydantic.Field(
|
|
1383
|
+
default=120,
|
|
1384
|
+
description="How many times to retry transient errors before the"
|
|
1385
|
+
" evaluation fails. The count is reset after a successful inference."
|
|
1386
|
+
" Note that transient errors often occur during inference service"
|
|
1387
|
+
" startup. The maximum time that the evaluation will wait for a"
|
|
1388
|
+
" service (re)start is (retry limit) * (retry delay).",
|
|
1389
|
+
)
|
|
1390
|
+
|
|
1391
|
+
transientErrorRetryDelaySeconds: int = pydantic.Field(
|
|
1392
|
+
default=30,
|
|
1393
|
+
description="How long to wait before retrying a transient error."
|
|
1394
|
+
" Note that transient errors often occur during inference service"
|
|
1395
|
+
" startup. The maximum time that the evaluation will wait for a"
|
|
1396
|
+
" service (re)start is (retry limit) * (retry delay).",
|
|
1397
|
+
)
|
|
1398
|
+
|
|
1399
|
+
duplicateOutputPolicy: Literal["Deduplicate", "Error", "Ignore"] = pydantic.Field(
|
|
1400
|
+
default="Deduplicate",
|
|
1401
|
+
description="What to do if there are missing outputs."
|
|
1402
|
+
" Deduplicate (default): output only one of the duplicates, chosen"
|
|
1403
|
+
" arbitrarily. Error: the evaluation fails. Ignore: duplicates are"
|
|
1404
|
+
" retained in the output."
|
|
1405
|
+
" Setting this to Error is discouraged because duplicates can"
|
|
1406
|
+
" arise in normal operation if the client restarts due to"
|
|
1407
|
+
" a transient failure.",
|
|
1408
|
+
)
|
|
1409
|
+
|
|
1410
|
+
missingOutputPolicy: Literal["Error", "Ignore"] = pydantic.Field(
|
|
1411
|
+
default="Error",
|
|
1412
|
+
description="What to do if there are missing outputs."
|
|
1413
|
+
" Error (default): the evaluation fails. Ignore: no error.",
|
|
1414
|
+
)
|
|
1415
|
+
|
|
1416
|
+
|
|
1377
1417
|
class EvaluationBase(DyffSchemaBaseModel):
|
|
1378
1418
|
dataset: str = pydantic.Field(description="The Dataset to evaluate on.")
|
|
1379
1419
|
|
|
@@ -1381,11 +1421,17 @@ class EvaluationBase(DyffSchemaBaseModel):
|
|
|
1381
1421
|
default=1, description="Number of replications to run."
|
|
1382
1422
|
)
|
|
1383
1423
|
|
|
1424
|
+
# TODO: This should be in the client config object
|
|
1384
1425
|
workersPerReplica: Optional[int] = pydantic.Field(
|
|
1385
1426
|
default=None,
|
|
1386
1427
|
description="Number of data workers per inference service replica.",
|
|
1387
1428
|
)
|
|
1388
1429
|
|
|
1430
|
+
client: EvaluationClientConfiguration = pydantic.Field(
|
|
1431
|
+
default_factory=EvaluationClientConfiguration,
|
|
1432
|
+
description="Configuration for the evaluation client.",
|
|
1433
|
+
)
|
|
1434
|
+
|
|
1389
1435
|
|
|
1390
1436
|
class Evaluation(DyffEntity, EvaluationBase):
|
|
1391
1437
|
"""A description of how to run an InferenceService on a Dataset to obtain a set of
|
|
@@ -0,0 +1,368 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: 2024 UL Research Institutes
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
|
|
4
|
+
import pytest
|
|
5
|
+
|
|
6
|
+
from dyff.schema.adapters import TransformJSON
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class Test_TransformJSON:
|
|
10
|
+
def test_literal(self):
|
|
11
|
+
transformer = TransformJSON({"literal": "literal"})
|
|
12
|
+
input = {"input": 42}
|
|
13
|
+
result = list(transformer([input]))
|
|
14
|
+
assert result == [{"literal": "literal"}]
|
|
15
|
+
|
|
16
|
+
def test_jsonpath(self):
|
|
17
|
+
transformer = TransformJSON({"jsonpath": "$.input"})
|
|
18
|
+
input = {"input": 42}
|
|
19
|
+
result = list(transformer([input]))
|
|
20
|
+
assert result == [{"jsonpath": 42}]
|
|
21
|
+
|
|
22
|
+
def test_jsonpath_escape(self):
|
|
23
|
+
transformer = TransformJSON({"jsonpath": "$$.input"})
|
|
24
|
+
input = {"input": 42}
|
|
25
|
+
result = list(transformer([input]))
|
|
26
|
+
assert result == [{"jsonpath": "$.input"}]
|
|
27
|
+
|
|
28
|
+
def test_multiple_inputs(self):
|
|
29
|
+
transformer = TransformJSON({"jsonpath": "$.input"})
|
|
30
|
+
inputs = [{"input": 42}, {"input": 314}]
|
|
31
|
+
result = list(transformer(inputs))
|
|
32
|
+
assert result == [{"jsonpath": 42}, {"jsonpath": 314}]
|
|
33
|
+
|
|
34
|
+
def test_key_sigil_escape(self):
|
|
35
|
+
transformer = TransformJSON({"$$escaped": "literal"})
|
|
36
|
+
input = {"input": 42}
|
|
37
|
+
result = list(transformer([input]))
|
|
38
|
+
assert result == [{"$escaped": "literal"}]
|
|
39
|
+
|
|
40
|
+
def test_error_key_sigil(self):
|
|
41
|
+
with pytest.raises(ValueError):
|
|
42
|
+
TransformJSON({"$invalid": "$$.input"})
|
|
43
|
+
|
|
44
|
+
def test_structure_object(self):
|
|
45
|
+
transformer = TransformJSON({"out1": {"out2": "$.level1.level2.value"}})
|
|
46
|
+
input = {"level1": {"level2": {"value": 42}}}
|
|
47
|
+
result = list(transformer([input]))
|
|
48
|
+
assert result == [{"out1": {"out2": 42}}]
|
|
49
|
+
|
|
50
|
+
def test_structure_list(self):
|
|
51
|
+
transformer = TransformJSON({"out1": ["$.level1.level2.value1", "literal"]})
|
|
52
|
+
input = {"level1": {"level2": {"value1": 42, "value2": "foobar"}}}
|
|
53
|
+
result = list(transformer([input]))
|
|
54
|
+
assert result == [{"out1": [42, "literal"]}]
|
|
55
|
+
|
|
56
|
+
def test_jsonpath_error_no_results(self):
|
|
57
|
+
transformer = TransformJSON({"output": "$.level1[*].nothing"})
|
|
58
|
+
input = {"level1": [{"level2": 0}, {"level2": 1}]}
|
|
59
|
+
with pytest.raises(ValueError):
|
|
60
|
+
list(transformer([input]))
|
|
61
|
+
|
|
62
|
+
def test_jsonpath_error_multiple_results(self):
|
|
63
|
+
transformer = TransformJSON({"output": "$.level1[*].level2"})
|
|
64
|
+
input = {"level1": [{"level2": 0}, {"level2": 1}]}
|
|
65
|
+
with pytest.raises(ValueError):
|
|
66
|
+
list(transformer([input]))
|
|
67
|
+
|
|
68
|
+
def test_compute_literal(self):
|
|
69
|
+
transformer = TransformJSON({"output": {"$compute": {"$literal": "literal"}}})
|
|
70
|
+
input = {"level1": 42}
|
|
71
|
+
result = list(transformer([input]))
|
|
72
|
+
assert result == [{"output": "literal"}]
|
|
73
|
+
|
|
74
|
+
def test_error_compute_scalar_literal(self):
|
|
75
|
+
with pytest.raises(ValueError):
|
|
76
|
+
TransformJSON({"output": {"$compute": {"$scalar": "literal"}}})
|
|
77
|
+
|
|
78
|
+
def test_error_compute_list_literal(self):
|
|
79
|
+
with pytest.raises(ValueError):
|
|
80
|
+
TransformJSON({"output": {"$compute": {"$list": "literal"}}})
|
|
81
|
+
|
|
82
|
+
def test_compute_scalar_query(self):
|
|
83
|
+
transformer = TransformJSON({"output": {"$compute": {"$scalar": "$.level1"}}})
|
|
84
|
+
input = {"level1": 42}
|
|
85
|
+
result = list(transformer([input]))
|
|
86
|
+
assert result == [{"output": 42}]
|
|
87
|
+
|
|
88
|
+
def test_compute_scalar_query_list(self):
|
|
89
|
+
transformer = TransformJSON({"output": {"$compute": {"$scalar": "$.level1"}}})
|
|
90
|
+
input = {"level1": [{"level2": 0}, {"level2": 1}]}
|
|
91
|
+
result = list(transformer([input]))
|
|
92
|
+
assert result == [{"output": [{"level2": 0}, {"level2": 1}]}]
|
|
93
|
+
|
|
94
|
+
def test_compute_list_query_list(self):
|
|
95
|
+
transformer = TransformJSON({"output": {"$compute": {"$list": "$.level1"}}})
|
|
96
|
+
input = {"level1": [{"level2": 0}, {"level2": 1}]}
|
|
97
|
+
result = list(transformer([input]))
|
|
98
|
+
assert result == [{"output": [[{"level2": 0}, {"level2": 1}]]}]
|
|
99
|
+
|
|
100
|
+
def test_compute_list_query(self):
|
|
101
|
+
transformer = TransformJSON(
|
|
102
|
+
{"output": {"$compute": {"$list": "$.level1[*].level2"}}}
|
|
103
|
+
)
|
|
104
|
+
input = {"level1": [{"level2": 0}, {"level2": 1}]}
|
|
105
|
+
result = list(transformer([input]))
|
|
106
|
+
assert result == [{"output": [0, 1]}]
|
|
107
|
+
|
|
108
|
+
def test_compute_list_empty(self):
|
|
109
|
+
transformer = TransformJSON(
|
|
110
|
+
{"output": {"$compute": {"$list": "$.level1[*].nothing"}}}
|
|
111
|
+
)
|
|
112
|
+
input = {"level1": [{"level2": 0}, {"level2": 1}]}
|
|
113
|
+
result = list(transformer([input]))
|
|
114
|
+
assert result == [{"output": []}]
|
|
115
|
+
|
|
116
|
+
def test_compute_list_single(self):
|
|
117
|
+
transformer = TransformJSON(
|
|
118
|
+
{"output": {"$compute": {"$list": "$.level1[0].level2"}}}
|
|
119
|
+
)
|
|
120
|
+
input = {"level1": [{"level2": 0}, {"level2": 1}]}
|
|
121
|
+
result = list(transformer([input]))
|
|
122
|
+
assert result == [{"output": [0]}]
|
|
123
|
+
|
|
124
|
+
def test_compute_func_findall(self):
|
|
125
|
+
transformer = TransformJSON(
|
|
126
|
+
{
|
|
127
|
+
"output": {
|
|
128
|
+
"$compute": [
|
|
129
|
+
{"$scalar": "$.input"},
|
|
130
|
+
{"$func": "findall", "pattern": r"[a-c]"},
|
|
131
|
+
]
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
)
|
|
135
|
+
input = {"input": "abc123"}
|
|
136
|
+
result = list(transformer([input]))
|
|
137
|
+
assert result == [{"output": ["a", "b", "c"]}]
|
|
138
|
+
|
|
139
|
+
def test_compute_func_findall_nothing(self):
|
|
140
|
+
transformer = TransformJSON(
|
|
141
|
+
{
|
|
142
|
+
"output": {
|
|
143
|
+
"$compute": [
|
|
144
|
+
{"$scalar": "$.input"},
|
|
145
|
+
{"$func": "findall", "pattern": r"nothing"},
|
|
146
|
+
]
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
)
|
|
150
|
+
input = {"input": "abc123"}
|
|
151
|
+
result = list(transformer([input]))
|
|
152
|
+
assert result == [{"output": []}]
|
|
153
|
+
|
|
154
|
+
def test_compute_func_join(self):
|
|
155
|
+
transformer = TransformJSON(
|
|
156
|
+
{
|
|
157
|
+
"output": {
|
|
158
|
+
"$compute": [
|
|
159
|
+
{"$scalar": "$.input"},
|
|
160
|
+
{"$func": "join", "separator": ","},
|
|
161
|
+
]
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
)
|
|
165
|
+
input = {"input": ["a", "b", "c"]}
|
|
166
|
+
result = list(transformer([input]))
|
|
167
|
+
assert result == [{"output": "a,b,c"}]
|
|
168
|
+
|
|
169
|
+
def test_compute_func_list(self):
|
|
170
|
+
transformer = TransformJSON(
|
|
171
|
+
{
|
|
172
|
+
"output": {
|
|
173
|
+
"$compute": [
|
|
174
|
+
{"$scalar": "$.input"},
|
|
175
|
+
{"$func": "list"},
|
|
176
|
+
]
|
|
177
|
+
}
|
|
178
|
+
}
|
|
179
|
+
)
|
|
180
|
+
input = {"input": "abc123"}
|
|
181
|
+
result = list(transformer([input]))
|
|
182
|
+
assert result == [{"output": ["a", "b", "c", "1", "2", "3"]}]
|
|
183
|
+
|
|
184
|
+
def test_compute_search(self):
|
|
185
|
+
transformer = TransformJSON(
|
|
186
|
+
{
|
|
187
|
+
"output": {
|
|
188
|
+
"$compute": [
|
|
189
|
+
{"$scalar": "$.input"},
|
|
190
|
+
{"$func": "search", "pattern": r"[1-3]+"},
|
|
191
|
+
]
|
|
192
|
+
}
|
|
193
|
+
}
|
|
194
|
+
)
|
|
195
|
+
input = {"input": "abc123"}
|
|
196
|
+
result = list(transformer([input]))
|
|
197
|
+
assert result == [{"output": "123"}]
|
|
198
|
+
|
|
199
|
+
def test_compute_search_group(self):
|
|
200
|
+
transformer = TransformJSON(
|
|
201
|
+
{
|
|
202
|
+
"output": {
|
|
203
|
+
"$compute": [
|
|
204
|
+
{"$scalar": "$.input"},
|
|
205
|
+
{
|
|
206
|
+
"$func": "search",
|
|
207
|
+
"pattern": r"[1-3]([1-3])[1-3]",
|
|
208
|
+
"group": 1,
|
|
209
|
+
},
|
|
210
|
+
]
|
|
211
|
+
}
|
|
212
|
+
}
|
|
213
|
+
)
|
|
214
|
+
input = {"input": "abc123"}
|
|
215
|
+
result = list(transformer([input]))
|
|
216
|
+
assert result == [{"output": "2"}]
|
|
217
|
+
|
|
218
|
+
def test_compute_split(self):
|
|
219
|
+
transformer = TransformJSON(
|
|
220
|
+
{
|
|
221
|
+
"output": {
|
|
222
|
+
"$compute": [
|
|
223
|
+
{"$scalar": "$.input"},
|
|
224
|
+
{"$func": "split", "pattern": r","},
|
|
225
|
+
]
|
|
226
|
+
}
|
|
227
|
+
}
|
|
228
|
+
)
|
|
229
|
+
input = {"input": "ab,cd,ef,gh"}
|
|
230
|
+
result = list(transformer([input]))
|
|
231
|
+
assert result == [{"output": ["ab", "cd", "ef", "gh"]}]
|
|
232
|
+
|
|
233
|
+
def test_compute_split_maxsplit(self):
|
|
234
|
+
transformer = TransformJSON(
|
|
235
|
+
{
|
|
236
|
+
"output": {
|
|
237
|
+
"$compute": [
|
|
238
|
+
{"$scalar": "$.input"},
|
|
239
|
+
{"$func": "split", "pattern": r",", "maxsplit": 2},
|
|
240
|
+
]
|
|
241
|
+
}
|
|
242
|
+
}
|
|
243
|
+
)
|
|
244
|
+
input = {"input": "ab,cd,ef,gh"}
|
|
245
|
+
result = list(transformer([input]))
|
|
246
|
+
assert result == [{"output": ["ab", "cd", "ef,gh"]}]
|
|
247
|
+
|
|
248
|
+
def test_compute_sub(self):
|
|
249
|
+
transformer = TransformJSON(
|
|
250
|
+
{
|
|
251
|
+
"output": {
|
|
252
|
+
"$compute": [
|
|
253
|
+
{"$scalar": "$.input"},
|
|
254
|
+
{"$func": "sub", "pattern": r"[1-3]", "repl": "6"},
|
|
255
|
+
]
|
|
256
|
+
}
|
|
257
|
+
}
|
|
258
|
+
)
|
|
259
|
+
input = {"input": "abc123"}
|
|
260
|
+
result = list(transformer([input]))
|
|
261
|
+
assert result == [{"output": "abc666"}]
|
|
262
|
+
|
|
263
|
+
def test_compute_sub_count(self):
|
|
264
|
+
transformer = TransformJSON(
|
|
265
|
+
{
|
|
266
|
+
"output": {
|
|
267
|
+
"$compute": [
|
|
268
|
+
{"$scalar": "$.input"},
|
|
269
|
+
{"$func": "sub", "pattern": r"[1-3]", "repl": "6", "count": 2},
|
|
270
|
+
]
|
|
271
|
+
}
|
|
272
|
+
}
|
|
273
|
+
)
|
|
274
|
+
input = {"input": "abc123"}
|
|
275
|
+
result = list(transformer([input]))
|
|
276
|
+
assert result == [{"output": "abc663"}]
|
|
277
|
+
|
|
278
|
+
def test_compute_reduce_lists(self):
|
|
279
|
+
transformer = TransformJSON(
|
|
280
|
+
{
|
|
281
|
+
"output": {
|
|
282
|
+
"$compute": [
|
|
283
|
+
{"$literal": [[1, 2], [3, 4]]},
|
|
284
|
+
{"$func": "reduce"},
|
|
285
|
+
]
|
|
286
|
+
}
|
|
287
|
+
}
|
|
288
|
+
)
|
|
289
|
+
input = {"input": ["a", "b"]}
|
|
290
|
+
result = list(transformer([input]))
|
|
291
|
+
assert result == [{"output": [1, 2, 3, 4]}]
|
|
292
|
+
|
|
293
|
+
def test_compute_reduce_strings(self):
|
|
294
|
+
transformer = TransformJSON(
|
|
295
|
+
{
|
|
296
|
+
"output": {
|
|
297
|
+
"$compute": [
|
|
298
|
+
{"$literal": ["foo", "bar"]},
|
|
299
|
+
{"$func": "reduce"},
|
|
300
|
+
]
|
|
301
|
+
}
|
|
302
|
+
}
|
|
303
|
+
)
|
|
304
|
+
input = {"input": "prompt"}
|
|
305
|
+
result = list(transformer([input]))
|
|
306
|
+
assert result == [{"output": "foobar"}]
|
|
307
|
+
|
|
308
|
+
def test_list_multiple_lists(self):
|
|
309
|
+
transformer = TransformJSON(
|
|
310
|
+
{
|
|
311
|
+
"output": {
|
|
312
|
+
"$compute": [
|
|
313
|
+
{
|
|
314
|
+
"$list": [
|
|
315
|
+
{"$literal": [1, 2]},
|
|
316
|
+
{"$list": "$.aux[*]"},
|
|
317
|
+
],
|
|
318
|
+
},
|
|
319
|
+
]
|
|
320
|
+
}
|
|
321
|
+
}
|
|
322
|
+
)
|
|
323
|
+
input = {"input": "prompt", "aux": ["foo", "bar"]}
|
|
324
|
+
result = list(transformer([input]))
|
|
325
|
+
assert result == [{"output": [[1, 2], ["foo", "bar"]]}]
|
|
326
|
+
|
|
327
|
+
def test_list_multiple_lists_nested(self):
|
|
328
|
+
transformer = TransformJSON(
|
|
329
|
+
{
|
|
330
|
+
"output": {
|
|
331
|
+
"$compute": [
|
|
332
|
+
{
|
|
333
|
+
"$list": [
|
|
334
|
+
{"$literal": [1, 2]},
|
|
335
|
+
{
|
|
336
|
+
"$list": [
|
|
337
|
+
{"$scalar": "$.aux[0]"},
|
|
338
|
+
{"$scalar": "$.aux[1]"},
|
|
339
|
+
]
|
|
340
|
+
},
|
|
341
|
+
],
|
|
342
|
+
},
|
|
343
|
+
]
|
|
344
|
+
}
|
|
345
|
+
}
|
|
346
|
+
)
|
|
347
|
+
input = {"input": "prompt", "aux": ["foo", "bar"]}
|
|
348
|
+
result = list(transformer([input]))
|
|
349
|
+
assert result == [{"output": [[1, 2], ["foo", "bar"]]}]
|
|
350
|
+
|
|
351
|
+
def test_list_multiple_strings(self):
|
|
352
|
+
transformer = TransformJSON(
|
|
353
|
+
{
|
|
354
|
+
"output": {
|
|
355
|
+
"$compute": [
|
|
356
|
+
{
|
|
357
|
+
"$list": [
|
|
358
|
+
{"$scalar": "$.input"},
|
|
359
|
+
{"$literal": "<think>\n"},
|
|
360
|
+
],
|
|
361
|
+
},
|
|
362
|
+
]
|
|
363
|
+
}
|
|
364
|
+
}
|
|
365
|
+
)
|
|
366
|
+
input = {"input": "prompt"}
|
|
367
|
+
result = list(transformer([input]))
|
|
368
|
+
assert result == [{"output": ["prompt", "<think>\n"]}]
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|