PyPI - dyff-schema - Versions diffs - 0.22.0__tar.gz → 0.24.0__tar.gz - Mend

dyff-schema 0.22.0tar.gz → 0.24.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of dyff-schema might be problematic. Click here for more details.

Files changed (60) hide show

{dyff_schema-0.22.0/dyff_schema.egg-info → dyff_schema-0.24.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: dyff-schema
-Version: 0.22.0
+Version: 0.24.0
 Summary: Data models for the Dyff AI auditing platform.
 Author-email: Digital Safety Research Institute <contact@dsri.org>
 License: Apache-2.0

{dyff_schema-0.22.0 → dyff_schema-0.24.0}/dyff/schema/v0/r1/adapters.py RENAMED Viewed

@@ -6,14 +6,21 @@ from __future__ import annotations
 import functools
 import json
-from typing import Any, Iterable, NamedTuple, Protocol, Type
+import operator
+import re
+from typing import Any, Callable, Iterable, Literal, NamedTuple, Protocol, Type
 import jsonpath_ng as jsonpath
 from jsonpath_ng.exceptions import JSONPathError
+from jsonpath_ng.ext.parser import parse as jsonpath_parse_ext
 from dyff.schema.platform import SchemaAdapter
+def _json_deep_copy(data):
+    return json.loads(json.dumps(data))
 def map_structure(fn, data):
     """Given a JSON data structure ``data``, create a new data structure instance with
     the same shape as ``data`` by applying ``fn`` to each "leaf" value in the nested
@@ -70,90 +77,346 @@ class Adapter(Protocol):
         raise NotImplementedError()
+class _Literal:
+    def __init__(self, value):
+        self.value = value
+    def __call__(self, x):
+        return self.value
+class _Func_findall:
+    def __init__(self, *, pattern: str, flags: int = 0):
+        self.pattern = pattern
+        self.flags = flags
+    def __call__(self, x) -> list[str]:
+        return re.findall(self.pattern, x, self.flags)
+class _Func_join:
+    def __init__(self, *, separator: str = ""):
+        self._separator = separator
+    def __call__(self, x: list[str]) -> str:
+        return self._separator.join(x)
+class _Func_list:
+    def __call__(self, x) -> list:
+        return list(x)
+class _Func_reduce:
+    def __call__(self, x):
+        return functools.reduce(operator.add, x)
+class _Func_search:
+    def __init__(
+        self,
+        *,
+        pattern: str,
+        flags: int = 0,
+        group: int = 0,
+        default: str | None = None,
+    ):
+        self.pattern = pattern
+        self.flags = flags
+        self.group = group
+        self.default = default
+    def __call__(self, x) -> str | None:
+        m = re.search(self.pattern, x, self.flags)
+        return self.default if m is None else m.group(self.group)
+class _Func_split:
+    def __init__(self, *, pattern: str, maxsplit: int = 0, flags: int = 0):
+        self.pattern = pattern
+        self.maxsplit = maxsplit
+        self.flags = flags
+    def __call__(self, x) -> list[str]:
+        return re.split(self.pattern, x, self.maxsplit, self.flags)
+class _Func_sub:
+    def __init__(self, *, pattern: str, repl: str, count: int = 0, flags: int = 0):
+        self.pattern = pattern
+        self.repl = repl
+        self.count = count
+        self.flags = flags
+    def __call__(self, x) -> str:
+        return re.sub(self.pattern, self.repl, x, self.count, self.flags)
+class _Value_jsonpath:
+    def __init__(self, expr, *, kind: Literal["scalar", "list"] = "scalar"):
+        self._expr: jsonpath.JSONPath = jsonpath.parse(expr)
+        self._kind = kind
+    def __call__(self, x):
+        results = self._expr.find(x)
+        if self._kind == "list":
+            return [result.value for result in results]
+        elif self._kind == "scalar":
+            if len(results) == 0:
+                raise ValueError(f"no match for '{self._expr}' in '{x}'")
+            elif len(results) > 1:
+                raise ValueError(f"multiple results for '{self._expr}' in '{x}'")
+            return results[0].value
+        else:
+            raise AssertionError(f"kind {self._kind}")
+class _Value_list:
+    def __init__(self, exprs: list[Callable]):
+        self._exprs = exprs
+    def __call__(self, x) -> list:
+        return [e(x) for e in self._exprs]
+def _maybe_value_expr(expr: dict) -> Callable | None:
+    kinds = ["$literal", "$scalar", "$list"]
+    maybe_exprs = {k: expr.get(k) for k in kinds}
+    just_exprs = [k for k in kinds if maybe_exprs[k] is not None]
+    if len(just_exprs) == 0:
+        return None
+    if len(just_exprs) > 1:
+        raise ValueError(f"must specify exactly one of {kinds}: got {just_exprs}")
+    # remove sigil
+    kind: Literal["literal", "scalar", "list"] = just_exprs[0][1:]  # type: ignore
+    value = maybe_exprs[just_exprs[0]]
+    if kind == "literal":
+        return _Literal(value)
+    op: Callable = _Literal(value)
+    if isinstance(value, str):
+        if value.startswith("$"):
+            if value.startswith("$$"):
+                # Literal string -- remove "escape" character
+                op = _Literal(value[1:])
+            else:
+                op = _Value_jsonpath(value, kind=kind)
+    elif kind == "list" and isinstance(value, list):
+        exprs = [_maybe_value_expr(e) for e in value]
+        if any(e is None for e in exprs):
+            raise ValueError("$list elements must be value expressions")
+        op = _Value_list(exprs)  # type: ignore
+    if isinstance(op, _Literal) and kind != "literal":
+        raise ValueError("must use $literal when providing a literal value")
+    return op
+class _LeafExpression:
+    FUNCTIONS = {
+        "findall": _Func_findall,
+        "join": _Func_join,
+        "list": _Func_list,
+        "reduce": _Func_reduce,
+        "search": _Func_search,
+        "split": _Func_split,
+        "sub": _Func_sub,
+    }
+    def __init__(self, pipeline: dict | list[dict]):
+        if isinstance(pipeline, dict):
+            pipeline = [pipeline]
+        self._compiled_pipeline: list[Callable] = []
+        for step in pipeline:
+            if (value_op := _maybe_value_expr(step)) is not None:
+                self._compiled_pipeline.append(value_op)
+            elif (func := step.pop("$func", None)) is not None:
+                self._compiled_pipeline.append(_LeafExpression.FUNCTIONS[func](**step))
+            else:
+                raise ValueError(f"invalid $compute step: {step}")
+    def __call__(self, x):
+        output = None
+        for i, step in enumerate(self._compiled_pipeline):
+            if i == 0:
+                output = step(x)
+            else:
+                output = step(output)
+        return output
 class TransformJSON:
-    """Transform an input JSON structure by creating a new output JSON structure where
-    all of the "leaf" values are populated by either:
+    """Create a new JSON structure where the "leaf" values are populated by the results
+    of transformation functions applied to the input.
+    The "value" for each leaf can be::
+        1. A JSON literal value, or
+        2. The result of a jsonpath query on the input structure, or
+        3. The result of a computation pipeline starting from (1) or (2).
-        1. A provided JSON literal value, or
-        2. The result of a jsonpath query on the input structure.
+    To distinguish the specifications of leaf values from the specification of
+    the output structure, we apply the following rules::
-    For example, if the ``output_structure`` parameter is::
+        1. Composite values (``list`` and ``dict``) specify the structure of
+        the output.
+        2. Scalar values are output as-is, unless they are strings containing
+        JSONPath queries.
+        3. JSONPath queries are strings beginning with '$'. They are replaced
+        by the result of the query.
+        4. A ``dict`` containing the special key ``"$compute"`` introduces a
+        "compute context", which computes a leaf value from the input data.
+        Descendents of this key have "compute context semantics", which are
+        different from the "normal" semantics.
+    For example, if the ``configuration`` is::
         {
             "id": "$.object.id",
             "name": "literal",
             "children": {"left": "$.list[0]", "right": "$.list[1]"}
+            "characters": {
+                "letters": {
+                    "$compute": [
+                        {"$scalar": "$.object.id"},
+                        {
+                            "$func": "sub",
+                            "pattern": "[A-Za-z]",
+                            "repl": "",
+                        },
+                        {"$func": "list"}
+                    ]
+                }
+            }
         }
     and the data is::
         {
-            "object": {"id": 42, "name": "spam"},
+            "object": {"id": "abc123", "name": "spam"},
             "list": [1, 2]
         }
-    Then applying the transformer to the data will result in the new structure::
+    Then applying the transformation to the data will result in the new structure::
         {
-            "id": 42,
+            "id": "abc123",
             "name": "literal",
-            "children: {"left": 1, "right": 2}
+            "children: {"left": 1, "right": 2},
+            "characters": {
+                "letters": ["a", "b", "c"]
+            }
         }
-    A value is interpreted as a jsonpath query if it is a string that starts
-    with the '$' character. If you need a literal string that starts with
-    the '$' character, escape it with a second '$', e.g., "$$PATH" will appear
-    as the literal string "$PATH" in the output.
-    All of the jsonpath queries must return *exactly one value* when executed
-    against each input item. If not, a ``ValueError`` will be raised.
+    The ``.characters.letters`` field was derived by::
+        1. Extracting the value of the ``.object.id`` field in the input
+        2. Applying ``re.sub(r"[A-Za-z]", "", _)`` to the result of (1)
+        3. Applying ``list(_)`` to the result of (2)
+    Notice that descendents of the ``$compute`` key no longer describe the
+    structure of the output, but instead describe steps of the computation.
+    The value of ``"$compute"`` can be either an object or a list of objects.
+    A list is interpreted as a "pipeline" where each step is applied to the
+    output of the previous step.
+    Implicit queries
+    ================
+    Outside of the ``$compute`` context, string values that start with a ``$``
+    character are interpreted as jsonpath queries. Queries in this context must
+    return **exactly one value**, otherwise a ``ValueError`` will be raised.
+    This is because when multiple values are returned, there's no way to
+    distinguish a scalar-valued query that found 1 scalar from a list-valued
+    query that found a list with 1 element. In the ``$compute`` context, you
+    can specify which semantics you want.
+    If you need a literal string that starts with the '$' character, escape it
+    with a second '$', e.g., "$$PATH" will appear as the literal string "$PATH"
+    in the output. This works for both keys and values, e.g.,
+    ``{"$$key": "$$value"}`` outputs ``{"$key": "$value"}``. All keys that
+    begin with ``$`` are reserved, and you must always escape them.
+    The $compute context
+    ====================
+    A ``$compute`` context is introduced by a ``dict`` that contains the key
+    ``{"$compute": ...}``. Semantics in the ``$compute`` context are different
+    from semantics in the "normal" context.
+    $literal vs. $scalar vs. $list
+    ------------------------------
+    Inside a ``$compute`` context, we distinguish explicitly between literal
+    values, jsonpath queries that return scalars, and jsonpath queries that
+    return lists. You specify which semantics you intend by using
+    ``{"$literal": [1, 2]}``, ``{"$scalar": "$.foo"}``, or ``{"$list": $.foo[*]}``.
+    Items with ``$literal`` semantics are **never** interpreted as jsonpath
+    queries, even if they start with ``$``. In the ``$literal`` context, you
+    **should not** escape the leading ``$`` character.
+    A ``$scalar`` query has the same semantiics as a jsonpath query outside
+    of the ``$compute`` context, i.e., it must return exactly 1 item.
+    A ``$list`` query will return a list, which can be empty. Scalar-valued
+    queries in a ``$list`` context will return a list with 1 element.
+    $func
+    -----
+    You use blocks with a ``$func`` key to specify computation steps. The
+    available functions are: ``findall``, ``join``, ``list``, ``reduce``,
+    ``search``, ``split``, ``sub``. These behave the same way as the
+    corresponding functions from the Python standard library::
+        * ``findall``, ``search``, ``split``, and ``sub`` are from the
+        ``re`` module.
+        * ``reduce`` uses the ``+`` operator with no starting value; it will
+        raise an error if called on an empty list.
+    All of these functions take named parameters with the same names and
+    semantics as their parameters in Python.
     """
     def __init__(self, configuration: dict):
-        """
-        Parameters:
-            ``output_structure``: A JSON object where all the "leaf" values
-                are strings containing jsonpath queries.
-        """
         if configuration != json.loads(json.dumps(configuration)):
             raise ValueError("configuration is not valid JSON")
-        self.output_structure = configuration
-        try:
-            self._expressions = map_structure(
-                self._jsonpath_expr_or_literal, self.output_structure
-            )
-        except JSONPathError as ex:
-            raise ValueError(
-                "output_structure leaf values must be JSON literals or jsonpath query strings"
-            ) from ex
-    def _jsonpath_expr_or_literal(self, x):
-        if isinstance(x, str):
+        self.configuration = configuration
+        self._transformation = self._compile(self.configuration)
+    def _compile(self, x) -> Callable | list | dict:
+        if isinstance(x, dict):
+            if (compute := x.get("$compute")) is not None:
+                if len(x) != 1:
+                    raise ValueError("$compute must be the only key in the dict")
+                return _LeafExpression(compute)
+            else:
+                # Escape '$' in dict keys
+                d: dict[str, Any] = {}
+                for k, v in x.items():
+                    if k.startswith("$"):
+                        if k.startswith("$$"):
+                            k = k[1:]
+                        else:
+                            raise ValueError(
+                                f"dict key '{k}': keys beginning with '$' are reserved; use '$$' to escape"
+                            )
+                    d[k] = self._compile(v)
+                return d
+        elif isinstance(x, list):
+            return [self._compile(y) for y in x]
+        elif isinstance(x, str):
             if x.startswith("$"):
                 if x.startswith("$$"):
                     # Literal string -- remove "escape" character
-                    return x[1:]
+                    return _Literal(x[1:])
                 else:
-                    return jsonpath.parse(x)
-        return x
+                    return _Value_jsonpath(x, kind="scalar")
+        return _Literal(x)
     def __call__(self, stream: Iterable[dict]) -> Iterable[dict]:
-        def query(data, expr):
-            if not isinstance(expr, jsonpath.JSONPath):
-                # Literal
-                return expr
-            results = expr.find(data)
-            if len(results) == 0:
-                raise ValueError(f"no match for {expr}")
-            elif len(results) > 1:
-                raise ValueError(f"multiple results for {expr}")
-            return results[0].value
         for item in stream:
-            transformed = map_structure(
-                lambda expr: query(item, expr), self._expressions
-            )
-            yield transformed
+            yield map_structure(lambda compute: compute(item), self._transformation)
 class EmbedIndex:
@@ -573,13 +836,16 @@ def _test():
     print(list(transformer([data])))
     transformer = TransformJSON(
-        {"id": "$.object.id", "children": {"left": "$.list[0]", "right": "$.list[1]"}}
+        {
+            "id": "$.object.id",
+            "children": {"left": "$.list[0]", "right": "$.list[1]"},
+        }
     )
     print(
         list(
             transformer(
                 [
-                    {"object": {"id": 42, "name": "spam"}, "list": [1, 2]},
+                    {"object": {"id": "abc123", "name": "spam"}, "list": [1, 2]},
                 ]
             )
         )

{dyff_schema-0.22.0 → dyff_schema-0.24.0}/dyff/schema/v0/r1/platform.py RENAMED Viewed

@@ -1226,6 +1226,7 @@ class InferenceServiceRunner(DyffSchemaBaseModel):
     nodes: int = pydantic.Field(
         default=1,
+        ge=1,
         description="Number of nodes. The resource specs apply to *each node*.",
     )
@@ -1258,6 +1259,7 @@ class InferenceServiceBase(DyffSchemaBaseModel):
         description="Configuration of the Builder used to build the service.",
     )
+    # FIXME: (DYFF-261) .runner should be required
     runner: Optional[InferenceServiceRunner] = pydantic.Field(
         default=None, description="Configuration of the Runner used to run the service."
     )
@@ -1316,12 +1318,6 @@ class InferenceSessionBase(DyffSchemaBaseModel):
         " Note that some accelerator types may not be available in non-spot pods.",
     )
-    nodes: int = pydantic.Field(
-        default=1,
-        ge=1,
-        description="Number of nodes. The resource specs apply to *each node*.",
-    )
 class InferenceSessionSpec(InferenceSessionBase):
     inferenceService: ForeignInferenceService = pydantic.Field(
@@ -1374,6 +1370,50 @@ class TaskSchema(DyffSchemaBaseModel):
     objective: str
+class EvaluationClientConfiguration(DyffSchemaBaseModel):
+    badRequestPolicy: Literal["Abort", "Skip"] = pydantic.Field(
+        default="Abort",
+        description="What to do if an inference call raises a 400 Bad Request"
+        " or a similar error that indicates a problem with the input instance."
+        " Abort (default): the evaluation fails immediately."
+        " Skip: output None for the bad instance and continue.",
+    )
+    transientErrorRetryLimit: int = pydantic.Field(
+        default=120,
+        description="How many times to retry transient errors before the"
+        " evaluation fails. The count is reset after a successful inference."
+        " Note that transient errors often occur during inference service"
+        " startup. The maximum time that the evaluation will wait for a"
+        " service (re)start is (retry limit) * (retry delay).",
+    )
+    transientErrorRetryDelaySeconds: int = pydantic.Field(
+        default=30,
+        description="How long to wait before retrying a transient error."
+        " Note that transient errors often occur during inference service"
+        " startup. The maximum time that the evaluation will wait for a"
+        " service (re)start is (retry limit) * (retry delay).",
+    )
+    duplicateOutputPolicy: Literal["Deduplicate", "Error", "Ignore"] = pydantic.Field(
+        default="Deduplicate",
+        description="What to do if there are missing outputs."
+        " Deduplicate (default): output only one of the duplicates, chosen"
+        " arbitrarily. Error: the evaluation fails. Ignore: duplicates are"
+        " retained in the output."
+        " Setting this to Error is discouraged because duplicates can"
+        " arise in normal operation if the client restarts due to"
+        " a transient failure.",
+    )
+    missingOutputPolicy: Literal["Error", "Ignore"] = pydantic.Field(
+        default="Error",
+        description="What to do if there are missing outputs."
+        " Error (default): the evaluation fails. Ignore: no error.",
+    )
 class EvaluationBase(DyffSchemaBaseModel):
     dataset: str = pydantic.Field(description="The Dataset to evaluate on.")
@@ -1381,11 +1421,17 @@ class EvaluationBase(DyffSchemaBaseModel):
         default=1, description="Number of replications to run."
     )
+    # TODO: This should be in the client config object
     workersPerReplica: Optional[int] = pydantic.Field(
         default=None,
         description="Number of data workers per inference service replica.",
     )
+    client: EvaluationClientConfiguration = pydantic.Field(
+        default_factory=EvaluationClientConfiguration,
+        description="Configuration for the evaluation client.",
+    )
 class Evaluation(DyffEntity, EvaluationBase):
     """A description of how to run an InferenceService on a Dataset to obtain a set of

{dyff_schema-0.22.0 → dyff_schema-0.24.0/dyff_schema.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: dyff-schema
-Version: 0.22.0
+Version: 0.24.0
 Summary: Data models for the Dyff AI auditing platform.
 Author-email: Digital Safety Research Institute <contact@dsri.org>
 License: Apache-2.0

{dyff_schema-0.22.0 → dyff_schema-0.24.0}/dyff_schema.egg-info/SOURCES.txt RENAMED Viewed

@@ -54,4 +54,5 @@ dyff_schema.egg-info/SOURCES.txt
 dyff_schema.egg-info/dependency_links.txt
 dyff_schema.egg-info/requires.txt
 dyff_schema.egg-info/top_level.txt
+tests/test_adapters.py
 tests/test_import.py

dyff_schema-0.24.0/tests/test_adapters.py ADDED Viewed

@@ -0,0 +1,368 @@
+# SPDX-FileCopyrightText: 2024 UL Research Institutes
+# SPDX-License-Identifier: Apache-2.0
+import pytest
+from dyff.schema.adapters import TransformJSON
+class Test_TransformJSON:
+    def test_literal(self):
+        transformer = TransformJSON({"literal": "literal"})
+        input = {"input": 42}
+        result = list(transformer([input]))
+        assert result == [{"literal": "literal"}]
+    def test_jsonpath(self):
+        transformer = TransformJSON({"jsonpath": "$.input"})
+        input = {"input": 42}
+        result = list(transformer([input]))
+        assert result == [{"jsonpath": 42}]
+    def test_jsonpath_escape(self):
+        transformer = TransformJSON({"jsonpath": "$$.input"})
+        input = {"input": 42}
+        result = list(transformer([input]))
+        assert result == [{"jsonpath": "$.input"}]
+    def test_multiple_inputs(self):
+        transformer = TransformJSON({"jsonpath": "$.input"})
+        inputs = [{"input": 42}, {"input": 314}]
+        result = list(transformer(inputs))
+        assert result == [{"jsonpath": 42}, {"jsonpath": 314}]
+    def test_key_sigil_escape(self):
+        transformer = TransformJSON({"$$escaped": "literal"})
+        input = {"input": 42}
+        result = list(transformer([input]))
+        assert result == [{"$escaped": "literal"}]
+    def test_error_key_sigil(self):
+        with pytest.raises(ValueError):
+            TransformJSON({"$invalid": "$$.input"})
+    def test_structure_object(self):
+        transformer = TransformJSON({"out1": {"out2": "$.level1.level2.value"}})
+        input = {"level1": {"level2": {"value": 42}}}
+        result = list(transformer([input]))
+        assert result == [{"out1": {"out2": 42}}]
+    def test_structure_list(self):
+        transformer = TransformJSON({"out1": ["$.level1.level2.value1", "literal"]})
+        input = {"level1": {"level2": {"value1": 42, "value2": "foobar"}}}
+        result = list(transformer([input]))
+        assert result == [{"out1": [42, "literal"]}]
+    def test_jsonpath_error_no_results(self):
+        transformer = TransformJSON({"output": "$.level1[*].nothing"})
+        input = {"level1": [{"level2": 0}, {"level2": 1}]}
+        with pytest.raises(ValueError):
+            list(transformer([input]))
+    def test_jsonpath_error_multiple_results(self):
+        transformer = TransformJSON({"output": "$.level1[*].level2"})
+        input = {"level1": [{"level2": 0}, {"level2": 1}]}
+        with pytest.raises(ValueError):
+            list(transformer([input]))
+    def test_compute_literal(self):
+        transformer = TransformJSON({"output": {"$compute": {"$literal": "literal"}}})
+        input = {"level1": 42}
+        result = list(transformer([input]))
+        assert result == [{"output": "literal"}]
+    def test_error_compute_scalar_literal(self):
+        with pytest.raises(ValueError):
+            TransformJSON({"output": {"$compute": {"$scalar": "literal"}}})
+    def test_error_compute_list_literal(self):
+        with pytest.raises(ValueError):
+            TransformJSON({"output": {"$compute": {"$list": "literal"}}})
+    def test_compute_scalar_query(self):
+        transformer = TransformJSON({"output": {"$compute": {"$scalar": "$.level1"}}})
+        input = {"level1": 42}
+        result = list(transformer([input]))
+        assert result == [{"output": 42}]
+    def test_compute_scalar_query_list(self):
+        transformer = TransformJSON({"output": {"$compute": {"$scalar": "$.level1"}}})
+        input = {"level1": [{"level2": 0}, {"level2": 1}]}
+        result = list(transformer([input]))
+        assert result == [{"output": [{"level2": 0}, {"level2": 1}]}]
+    def test_compute_list_query_list(self):
+        transformer = TransformJSON({"output": {"$compute": {"$list": "$.level1"}}})
+        input = {"level1": [{"level2": 0}, {"level2": 1}]}
+        result = list(transformer([input]))
+        assert result == [{"output": [[{"level2": 0}, {"level2": 1}]]}]
+    def test_compute_list_query(self):
+        transformer = TransformJSON(
+            {"output": {"$compute": {"$list": "$.level1[*].level2"}}}
+        )
+        input = {"level1": [{"level2": 0}, {"level2": 1}]}
+        result = list(transformer([input]))
+        assert result == [{"output": [0, 1]}]
+    def test_compute_list_empty(self):
+        transformer = TransformJSON(
+            {"output": {"$compute": {"$list": "$.level1[*].nothing"}}}
+        )
+        input = {"level1": [{"level2": 0}, {"level2": 1}]}
+        result = list(transformer([input]))
+        assert result == [{"output": []}]
+    def test_compute_list_single(self):
+        transformer = TransformJSON(
+            {"output": {"$compute": {"$list": "$.level1[0].level2"}}}
+        )
+        input = {"level1": [{"level2": 0}, {"level2": 1}]}
+        result = list(transformer([input]))
+        assert result == [{"output": [0]}]
+    def test_compute_func_findall(self):
+        transformer = TransformJSON(
+            {
+                "output": {
+                    "$compute": [
+                        {"$scalar": "$.input"},
+                        {"$func": "findall", "pattern": r"[a-c]"},
+                    ]
+                }
+            }
+        )
+        input = {"input": "abc123"}
+        result = list(transformer([input]))
+        assert result == [{"output": ["a", "b", "c"]}]
+    def test_compute_func_findall_nothing(self):
+        transformer = TransformJSON(
+            {
+                "output": {
+                    "$compute": [
+                        {"$scalar": "$.input"},
+                        {"$func": "findall", "pattern": r"nothing"},
+                    ]
+                }
+            }
+        )
+        input = {"input": "abc123"}
+        result = list(transformer([input]))
+        assert result == [{"output": []}]
+    def test_compute_func_join(self):
+        transformer = TransformJSON(
+            {
+                "output": {
+                    "$compute": [
+                        {"$scalar": "$.input"},
+                        {"$func": "join", "separator": ","},
+                    ]
+                }
+            }
+        )
+        input = {"input": ["a", "b", "c"]}
+        result = list(transformer([input]))
+        assert result == [{"output": "a,b,c"}]
+    def test_compute_func_list(self):
+        transformer = TransformJSON(
+            {
+                "output": {
+                    "$compute": [
+                        {"$scalar": "$.input"},
+                        {"$func": "list"},
+                    ]
+                }
+            }
+        )
+        input = {"input": "abc123"}
+        result = list(transformer([input]))
+        assert result == [{"output": ["a", "b", "c", "1", "2", "3"]}]
+    def test_compute_search(self):
+        transformer = TransformJSON(
+            {
+                "output": {
+                    "$compute": [
+                        {"$scalar": "$.input"},
+                        {"$func": "search", "pattern": r"[1-3]+"},
+                    ]
+                }
+            }
+        )
+        input = {"input": "abc123"}
+        result = list(transformer([input]))
+        assert result == [{"output": "123"}]
+    def test_compute_search_group(self):
+        transformer = TransformJSON(
+            {
+                "output": {
+                    "$compute": [
+                        {"$scalar": "$.input"},
+                        {
+                            "$func": "search",
+                            "pattern": r"[1-3]([1-3])[1-3]",
+                            "group": 1,
+                        },
+                    ]
+                }
+            }
+        )
+        input = {"input": "abc123"}
+        result = list(transformer([input]))
+        assert result == [{"output": "2"}]
+    def test_compute_split(self):
+        transformer = TransformJSON(
+            {
+                "output": {
+                    "$compute": [
+                        {"$scalar": "$.input"},
+                        {"$func": "split", "pattern": r","},
+                    ]
+                }
+            }
+        )
+        input = {"input": "ab,cd,ef,gh"}
+        result = list(transformer([input]))
+        assert result == [{"output": ["ab", "cd", "ef", "gh"]}]
+    def test_compute_split_maxsplit(self):
+        transformer = TransformJSON(
+            {
+                "output": {
+                    "$compute": [
+                        {"$scalar": "$.input"},
+                        {"$func": "split", "pattern": r",", "maxsplit": 2},
+                    ]
+                }
+            }
+        )
+        input = {"input": "ab,cd,ef,gh"}
+        result = list(transformer([input]))
+        assert result == [{"output": ["ab", "cd", "ef,gh"]}]
+    def test_compute_sub(self):
+        transformer = TransformJSON(
+            {
+                "output": {
+                    "$compute": [
+                        {"$scalar": "$.input"},
+                        {"$func": "sub", "pattern": r"[1-3]", "repl": "6"},
+                    ]
+                }
+            }
+        )
+        input = {"input": "abc123"}
+        result = list(transformer([input]))
+        assert result == [{"output": "abc666"}]
+    def test_compute_sub_count(self):
+        transformer = TransformJSON(
+            {
+                "output": {
+                    "$compute": [
+                        {"$scalar": "$.input"},
+                        {"$func": "sub", "pattern": r"[1-3]", "repl": "6", "count": 2},
+                    ]
+                }
+            }
+        )
+        input = {"input": "abc123"}
+        result = list(transformer([input]))
+        assert result == [{"output": "abc663"}]
+    def test_compute_reduce_lists(self):
+        transformer = TransformJSON(
+            {
+                "output": {
+                    "$compute": [
+                        {"$literal": [[1, 2], [3, 4]]},
+                        {"$func": "reduce"},
+                    ]
+                }
+            }
+        )
+        input = {"input": ["a", "b"]}
+        result = list(transformer([input]))
+        assert result == [{"output": [1, 2, 3, 4]}]
+    def test_compute_reduce_strings(self):
+        transformer = TransformJSON(
+            {
+                "output": {
+                    "$compute": [
+                        {"$literal": ["foo", "bar"]},
+                        {"$func": "reduce"},
+                    ]
+                }
+            }
+        )
+        input = {"input": "prompt"}
+        result = list(transformer([input]))
+        assert result == [{"output": "foobar"}]
+    def test_list_multiple_lists(self):
+        transformer = TransformJSON(
+            {
+                "output": {
+                    "$compute": [
+                        {
+                            "$list": [
+                                {"$literal": [1, 2]},
+                                {"$list": "$.aux[*]"},
+                            ],
+                        },
+                    ]
+                }
+            }
+        )
+        input = {"input": "prompt", "aux": ["foo", "bar"]}
+        result = list(transformer([input]))
+        assert result == [{"output": [[1, 2], ["foo", "bar"]]}]
+    def test_list_multiple_lists_nested(self):
+        transformer = TransformJSON(
+            {
+                "output": {
+                    "$compute": [
+                        {
+                            "$list": [
+                                {"$literal": [1, 2]},
+                                {
+                                    "$list": [
+                                        {"$scalar": "$.aux[0]"},
+                                        {"$scalar": "$.aux[1]"},
+                                    ]
+                                },
+                            ],
+                        },
+                    ]
+                }
+            }
+        )
+        input = {"input": "prompt", "aux": ["foo", "bar"]}
+        result = list(transformer([input]))
+        assert result == [{"output": [[1, 2], ["foo", "bar"]]}]
+    def test_list_multiple_strings(self):
+        transformer = TransformJSON(
+            {
+                "output": {
+                    "$compute": [
+                        {
+                            "$list": [
+                                {"$scalar": "$.input"},
+                                {"$literal": "<think>\n"},
+                            ],
+                        },
+                    ]
+                }
+            }
+        )
+        input = {"input": "prompt"}
+        result = list(transformer([input]))
+        assert result == [{"output": ["prompt", "<think>\n"]}]

{dyff_schema-0.22.0 → dyff_schema-0.24.0}/.gitignore RENAMED Viewed

File without changes

{dyff_schema-0.22.0 → dyff_schema-0.24.0}/.gitlab-ci.yml RENAMED Viewed

File without changes

{dyff_schema-0.22.0 → dyff_schema-0.24.0}/.licenserc.yaml RENAMED Viewed

File without changes

{dyff_schema-0.22.0 → dyff_schema-0.24.0}/.pre-commit-config.yaml RENAMED Viewed

File without changes

{dyff_schema-0.22.0 → dyff_schema-0.24.0}/.prettierignore RENAMED Viewed

File without changes

{dyff_schema-0.22.0 → dyff_schema-0.24.0}/.secrets.baseline RENAMED Viewed

File without changes

{dyff_schema-0.22.0 → dyff_schema-0.24.0}/CODE_OF_CONDUCT.md RENAMED Viewed

File without changes

{dyff_schema-0.22.0 → dyff_schema-0.24.0}/LICENSE RENAMED Viewed

File without changes

{dyff_schema-0.22.0 → dyff_schema-0.24.0}/NOTICE RENAMED Viewed

File without changes

{dyff_schema-0.22.0 → dyff_schema-0.24.0}/README.md RENAMED Viewed

File without changes

{dyff_schema-0.22.0 → dyff_schema-0.24.0}/dyff/schema/__init__.py RENAMED Viewed

File without changes

{dyff_schema-0.22.0 → dyff_schema-0.24.0}/dyff/schema/adapters.py RENAMED Viewed

File without changes

{dyff_schema-0.22.0 → dyff_schema-0.24.0}/dyff/schema/annotations.py RENAMED Viewed

File without changes

{dyff_schema-0.22.0 → dyff_schema-0.24.0}/dyff/schema/base.py RENAMED Viewed

File without changes

{dyff_schema-0.22.0 → dyff_schema-0.24.0}/dyff/schema/copydoc.py RENAMED Viewed

File without changes

{dyff_schema-0.22.0 → dyff_schema-0.24.0}/dyff/schema/dataset/__init__.py RENAMED Viewed

File without changes

{dyff_schema-0.22.0 → dyff_schema-0.24.0}/dyff/schema/dataset/arrow.py RENAMED Viewed

File without changes

{dyff_schema-0.22.0 → dyff_schema-0.24.0}/dyff/schema/dataset/binary.py RENAMED Viewed

File without changes

{dyff_schema-0.22.0 → dyff_schema-0.24.0}/dyff/schema/dataset/classification.py RENAMED Viewed

File without changes

{dyff_schema-0.22.0 → dyff_schema-0.24.0}/dyff/schema/dataset/embedding.py RENAMED Viewed

File without changes

{dyff_schema-0.22.0 → dyff_schema-0.24.0}/dyff/schema/dataset/text.py RENAMED Viewed

File without changes

{dyff_schema-0.22.0 → dyff_schema-0.24.0}/dyff/schema/dataset/vision.py RENAMED Viewed

File without changes

{dyff_schema-0.22.0 → dyff_schema-0.24.0}/dyff/schema/errors.py RENAMED Viewed

File without changes

{dyff_schema-0.22.0 → dyff_schema-0.24.0}/dyff/schema/ids.py RENAMED Viewed

File without changes

{dyff_schema-0.22.0 → dyff_schema-0.24.0}/dyff/schema/io/__init__.py RENAMED Viewed

File without changes

{dyff_schema-0.22.0 → dyff_schema-0.24.0}/dyff/schema/io/vllm.py RENAMED Viewed

File without changes

{dyff_schema-0.22.0 → dyff_schema-0.24.0}/dyff/schema/platform.py RENAMED Viewed

File without changes

{dyff_schema-0.22.0 → dyff_schema-0.24.0}/dyff/schema/py.typed RENAMED Viewed

File without changes

{dyff_schema-0.22.0 → dyff_schema-0.24.0}/dyff/schema/quantity.py RENAMED Viewed

File without changes

{dyff_schema-0.22.0 → dyff_schema-0.24.0}/dyff/schema/requests.py RENAMED Viewed

File without changes

{dyff_schema-0.22.0 → dyff_schema-0.24.0}/dyff/schema/test.py RENAMED Viewed

File without changes

{dyff_schema-0.22.0 → dyff_schema-0.24.0}/dyff/schema/v0/__init__.py RENAMED Viewed

File without changes

{dyff_schema-0.22.0 → dyff_schema-0.24.0}/dyff/schema/v0/r1/__init__.py RENAMED Viewed

File without changes

{dyff_schema-0.22.0 → dyff_schema-0.24.0}/dyff/schema/v0/r1/base.py RENAMED Viewed

File without changes

{dyff_schema-0.22.0 → dyff_schema-0.24.0}/dyff/schema/v0/r1/dataset/__init__.py RENAMED Viewed

File without changes

{dyff_schema-0.22.0 → dyff_schema-0.24.0}/dyff/schema/v0/r1/dataset/arrow.py RENAMED Viewed

File without changes

{dyff_schema-0.22.0 → dyff_schema-0.24.0}/dyff/schema/v0/r1/dataset/binary.py RENAMED Viewed

File without changes

{dyff_schema-0.22.0 → dyff_schema-0.24.0}/dyff/schema/v0/r1/dataset/classification.py RENAMED Viewed

File without changes

{dyff_schema-0.22.0 → dyff_schema-0.24.0}/dyff/schema/v0/r1/dataset/embedding.py RENAMED Viewed

File without changes

{dyff_schema-0.22.0 → dyff_schema-0.24.0}/dyff/schema/v0/r1/dataset/text.py RENAMED Viewed

File without changes

{dyff_schema-0.22.0 → dyff_schema-0.24.0}/dyff/schema/v0/r1/dataset/vision.py RENAMED Viewed

File without changes

{dyff_schema-0.22.0 → dyff_schema-0.24.0}/dyff/schema/v0/r1/io/__init__.py RENAMED Viewed

File without changes

{dyff_schema-0.22.0 → dyff_schema-0.24.0}/dyff/schema/v0/r1/io/vllm.py RENAMED Viewed

File without changes

{dyff_schema-0.22.0 → dyff_schema-0.24.0}/dyff/schema/v0/r1/requests.py RENAMED Viewed

File without changes

{dyff_schema-0.22.0 → dyff_schema-0.24.0}/dyff/schema/v0/r1/test.py RENAMED Viewed

File without changes

{dyff_schema-0.22.0 → dyff_schema-0.24.0}/dyff/schema/v0/r1/version.py RENAMED Viewed

File without changes

{dyff_schema-0.22.0 → dyff_schema-0.24.0}/dyff/schema/version.py RENAMED Viewed

File without changes

{dyff_schema-0.22.0 → dyff_schema-0.24.0}/dyff_schema.egg-info/dependency_links.txt RENAMED Viewed

File without changes

{dyff_schema-0.22.0 → dyff_schema-0.24.0}/dyff_schema.egg-info/requires.txt RENAMED Viewed

File without changes

{dyff_schema-0.22.0 → dyff_schema-0.24.0}/dyff_schema.egg-info/top_level.txt RENAMED Viewed

File without changes

{dyff_schema-0.22.0 → dyff_schema-0.24.0}/makefile RENAMED Viewed

File without changes

{dyff_schema-0.22.0 → dyff_schema-0.24.0}/pyproject.toml RENAMED Viewed

File without changes

{dyff_schema-0.22.0 → dyff_schema-0.24.0}/setup.cfg RENAMED Viewed

File without changes

{dyff_schema-0.22.0 → dyff_schema-0.24.0}/tests/test_import.py RENAMED Viewed

File without changes

dyff-schema 0.22.0__tar.gz → 0.24.0__tar.gz

Potentially problematic release.

dyff-schema 0.22.0tar.gz → 0.24.0tar.gz