PyPI - datachain - Versions diffs - 0.7.0__py3-none-any.whl → 0.7.2__py3-none-any.whl - Mend

datachain 0.7.0py3-none-any.whl → 0.7.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of datachain might be problematic. Click here for more details.

Files changed (52) hide show

datachain/__init__.py +0 -3
datachain/catalog/catalog.py +8 -6
datachain/cli.py +1 -1
datachain/client/fsspec.py +9 -9
datachain/data_storage/schema.py +2 -2
datachain/data_storage/sqlite.py +5 -4
datachain/data_storage/warehouse.py +18 -18
datachain/func/__init__.py +49 -0
datachain/{lib/func → func}/aggregate.py +13 -11
datachain/func/array.py +176 -0
datachain/func/base.py +23 -0
datachain/func/conditional.py +81 -0
datachain/func/func.py +384 -0
datachain/func/path.py +110 -0
datachain/func/random.py +23 -0
datachain/func/string.py +154 -0
datachain/func/window.py +49 -0
datachain/lib/arrow.py +24 -12
datachain/lib/data_model.py +25 -9
datachain/lib/dataset_info.py +2 -2
datachain/lib/dc.py +94 -56
datachain/lib/hf.py +1 -1
datachain/lib/signal_schema.py +1 -1
datachain/lib/utils.py +1 -0
datachain/lib/webdataset_laion.py +5 -5
datachain/model/__init__.py +6 -0
datachain/model/bbox.py +102 -0
datachain/model/pose.py +88 -0
datachain/model/segment.py +47 -0
datachain/model/ultralytics/__init__.py +27 -0
datachain/model/ultralytics/bbox.py +147 -0
datachain/model/ultralytics/pose.py +113 -0
datachain/model/ultralytics/segment.py +91 -0
datachain/nodes_fetcher.py +2 -2
datachain/query/dataset.py +57 -34
datachain/sql/__init__.py +0 -2
datachain/sql/functions/__init__.py +0 -26
datachain/sql/selectable.py +11 -5
datachain/sql/sqlite/base.py +11 -2
datachain/toolkit/split.py +6 -2
{datachain-0.7.0.dist-info → datachain-0.7.2.dist-info}/METADATA +72 -71
{datachain-0.7.0.dist-info → datachain-0.7.2.dist-info}/RECORD +46 -35
{datachain-0.7.0.dist-info → datachain-0.7.2.dist-info}/WHEEL +1 -1
datachain/lib/func/__init__.py +0 -32
datachain/lib/func/func.py +0 -152
datachain/lib/models/__init__.py +0 -5
datachain/lib/models/bbox.py +0 -45
datachain/lib/models/pose.py +0 -37
datachain/lib/models/yolo.py +0 -39
{datachain-0.7.0.dist-info → datachain-0.7.2.dist-info}/LICENSE +0 -0
{datachain-0.7.0.dist-info → datachain-0.7.2.dist-info}/entry_points.txt +0 -0
{datachain-0.7.0.dist-info → datachain-0.7.2.dist-info}/top_level.txt +0 -0

datachain/func/window.py ADDED Viewed

@@ -0,0 +1,49 @@
+from dataclasses import dataclass
+from datachain.query.schema import ColumnMeta
+@dataclass
+class Window:
+    """Represents a window specification for SQL window functions."""
+    partition_by: str
+    order_by: str
+    desc: bool = False
+def window(partition_by: str, order_by: str, desc: bool = False) -> Window:
+    """
+    Defines a window specification for SQL window functions.
+    The `window` function specifies how to partition and order the result set
+    for the associated window function. It is used to define the scope of the rows
+    that the window function will operate on.
+    Args:
+        partition_by (str): The column name by which to partition the result set.
+                            Rows with the same value in the partition column
+                            will be grouped together for the window function.
+        order_by (str): The column name by which to order the rows
+                        within each partition. This determines the sequence in which
+                        the window function is applied.
+        desc (bool, optional): If True, the rows will be ordered in descending order.
+                               Defaults to False, which orders the rows
+                               in ascending order.
+    Returns:
+        Window: A Window object representing the window specification.
+    Example:
+        ```py
+        window = func.window(partition_by="signal.category", order_by="created_at")
+        dc.mutate(
+            row_number=func.row_number().over(window),
+        )
+        ```
+    """
+    return Window(
+        ColumnMeta.to_db_name(partition_by),
+        ColumnMeta.to_db_name(order_by),
+        desc,
+    )

datachain/lib/arrow.py CHANGED Viewed

@@ -116,31 +116,43 @@ def infer_schema(chain: "DataChain", **kwargs) -> pa.Schema:
     return pa.unify_schemas(schemas)
-def schema_to_output(schema: pa.Schema, col_names: Optional[Sequence[str]] = None):
-    """Generate UDF output schema from pyarrow schema."""
+def schema_to_output(
+    schema: pa.Schema, col_names: Optional[Sequence[str]] = None
+) -> tuple[dict[str, type], list[str]]:
+    """
+    Generate UDF output schema from pyarrow schema.
+    Returns a tuple of output schema and original column names (since they may be
+    normalized in the output dict).
+    """
+    signal_schema = _get_datachain_schema(schema)
+    if signal_schema:
+        return signal_schema.values, list(signal_schema.values)
     if col_names and (len(schema) != len(col_names)):
         raise ValueError(
             "Error generating output from Arrow schema - "
             f"Schema has {len(schema)} columns but got {len(col_names)} column names."
         )
     if not col_names:
-        col_names = schema.names
-    signal_schema = _get_datachain_schema(schema)
-    if signal_schema:
-        return signal_schema.values
-    columns = list(normalize_col_names(col_names).keys())  # type: ignore[arg-type]
+        col_names = schema.names or []
+    normalized_col_dict = normalize_col_names(col_names)
+    col_names = list(normalized_col_dict)
     hf_schema = _get_hf_schema(schema)
     if hf_schema:
         return {
-            column: hf_type for hf_type, column in zip(hf_schema[1].values(), columns)
-        }
+            column: hf_type for hf_type, column in zip(hf_schema[1].values(), col_names)
+        }, list(normalized_col_dict.values())
     output = {}
-    for field, column in zip(schema, columns):
-        dtype = arrow_type_mapper(field.type, column)  # type: ignore[assignment]
+    for field, column in zip(schema, col_names):
+        dtype = arrow_type_mapper(field.type, column)
         if field.nullable and not ModelStore.is_pydantic(dtype):
             dtype = Optional[dtype]  # type: ignore[assignment]
         output[column] = dtype
-    return output
+    return output, list(normalized_col_dict.values())
 def arrow_type_mapper(col_type: pa.DataType, column: str = "") -> type:  # noqa: PLR0911

datachain/lib/data_model.py CHANGED Viewed

@@ -1,8 +1,8 @@
 from collections.abc import Sequence
 from datetime import datetime
-from typing import ClassVar, Union, get_args, get_origin
+from typing import ClassVar, Optional, Union, get_args, get_origin
-from pydantic import BaseModel, Field, create_model
+from pydantic import AliasChoices, BaseModel, Field, create_model
 from datachain.lib.model_store import ModelStore
 from datachain.lib.utils import normalize_col_names
@@ -60,17 +60,33 @@ def is_chain_type(t: type) -> bool:
     return False
-def dict_to_data_model(name: str, data_dict: dict[str, DataType]) -> type[BaseModel]:
-    # Gets a map of a normalized_name -> original_name
-    columns = normalize_col_names(list(data_dict.keys()))
-    # We reverse if for convenience to original_name -> normalized_name
-    columns = {v: k for k, v in columns.items()}
+def dict_to_data_model(
+    name: str,
+    data_dict: dict[str, DataType],
+    original_names: Optional[list[str]] = None,
+) -> type[BaseModel]:
+    if not original_names:
+        # Gets a map of a normalized_name -> original_name
+        columns = normalize_col_names(list(data_dict))
+        data_dict = dict(zip(columns.keys(), data_dict.values()))
+        original_names = list(columns.values())
     fields = {
-        columns[name]: (anno, Field(alias=name)) for name, anno in data_dict.items()
+        name: (
+            anno,
+            Field(
+                validation_alias=AliasChoices(name, original_names[idx] or name),
+                default=None,
+            ),
+        )
+        for idx, (name, anno) in enumerate(data_dict.items())
     }
+    class _DataModelStrict(BaseModel, extra="forbid"):
+        pass
     return create_model(
         name,
-        __base__=(DataModel,),  # type: ignore[call-overload]
+        __base__=_DataModelStrict,
         **fields,
     )  # type: ignore[call-overload]

datachain/lib/dataset_info.py CHANGED Viewed

@@ -23,8 +23,8 @@ class DatasetInfo(DataModel):
     finished_at: Optional[datetime] = Field(default=None)
     num_objects: Optional[int] = Field(default=None)
     size: Optional[int] = Field(default=None)
-    params: dict[str, str] = Field(default=dict)
-    metrics: dict[str, Any] = Field(default=dict)
+    params: dict[str, str] = Field(default={})
+    metrics: dict[str, Any] = Field(default={})
     error_message: str = Field(default="")
     error_stack: str = Field(default="")

datachain/lib/dc.py CHANGED Viewed

@@ -28,13 +28,14 @@ from sqlalchemy.sql.sqltypes import NullType
 from datachain.client import Client
 from datachain.client.local import FileClient
 from datachain.dataset import DatasetRecord
+from datachain.func.base import Function
+from datachain.func.func import Func
 from datachain.lib.convert.python_to_sql import python_to_sql
 from datachain.lib.convert.values_to_tuples import values_to_tuples
 from datachain.lib.data_model import DataModel, DataType, DataValue, dict_to_data_model
 from datachain.lib.dataset_info import DatasetInfo
 from datachain.lib.file import ArrowRow, File, get_file_type
 from datachain.lib.file import ExportPlacement as FileExportPlacement
-from datachain.lib.func import Func
 from datachain.lib.listing import (
     list_bucket,
     ls,
@@ -112,9 +113,29 @@ class DatasetFromValuesError(DataChainParamsError):  # noqa: D101
         super().__init__(f"Dataset{name} from values error: {msg}")
-def _get_merge_error_str(col: Union[str, sqlalchemy.ColumnElement]) -> str:
+MergeColType = Union[str, Function, sqlalchemy.ColumnElement]
+def _validate_merge_on(
+    on: Union[MergeColType, Sequence[MergeColType]],
+    ds: "DataChain",
+) -> Sequence[MergeColType]:
+    if isinstance(on, (str, sqlalchemy.ColumnElement)):
+        return [on]
+    if isinstance(on, Function):
+        return [on.get_column(table=ds._query.table)]
+    if isinstance(on, Sequence):
+        return [
+            c.get_column(table=ds._query.table) if isinstance(c, Function) else c
+            for c in on
+        ]
+def _get_merge_error_str(col: MergeColType) -> str:
     if isinstance(col, str):
         return col
+    if isinstance(col, Function):
+        return f"{col.name}()"
     if isinstance(col, sqlalchemy.Column):
         return col.name.replace(DEFAULT_DELIMITER, ".")
     if isinstance(col, sqlalchemy.ColumnElement) and hasattr(col, "name"):
@@ -125,11 +146,13 @@ def _get_merge_error_str(col: Union[str, sqlalchemy.ColumnElement]) -> str:
 class DatasetMergeError(DataChainParamsError):  # noqa: D101
     def __init__(  # noqa: D107
         self,
-        on: Sequence[Union[str, sqlalchemy.ColumnElement]],
-        right_on: Optional[Sequence[Union[str, sqlalchemy.ColumnElement]]],
+        on: Union[MergeColType, Sequence[MergeColType]],
+        right_on: Optional[Union[MergeColType, Sequence[MergeColType]]],
         msg: str,
     ):
-        def _get_str(on: Sequence[Union[str, sqlalchemy.ColumnElement]]) -> str:
+        def _get_str(
+            on: Union[MergeColType, Sequence[MergeColType]],
+        ) -> str:
             if not isinstance(on, Sequence):
                 return str(on)  # type: ignore[unreachable]
             return ", ".join([_get_merge_error_str(col) for col in on])
@@ -348,6 +371,9 @@ class DataChain:
                 enable all available CPUs (default=1)
             workers : number of distributed workers. Only for Studio mode. (default=1)
             min_task_size : minimum number of tasks (default=1)
+            prefetch: number of workers to use for downloading files in advance.
+                      This is enabled by default and uses 2 workers.
+                      To disable prefetching, set it to 0.
         Example:
             ```py
@@ -648,6 +674,7 @@ class DataChain:
         col: str,
         model_name: Optional[str] = None,
         object_name: Optional[str] = None,
+        schema_sample_size: int = 1,
     ) -> "DataChain":
         """Explodes a column containing JSON objects (dict or str DataChain type) into
            individual columns based on the schema of the JSON. Schema is inferred from
@@ -659,6 +686,9 @@ class DataChain:
                 automatically.
             object_name: optional generated object column name. By default generates the
                 name automatically.
+            schema_sample_size: the number of rows to use for inferring the schema of
+                the JSON (in case some fields are optional and it's not enough to
+                analyze a single row).
         Returns:
             DataChain: A new DataChain instance with the new set of columns.
@@ -669,21 +699,22 @@ class DataChain:
         from datachain.lib.arrow import schema_to_output
-        json_value = next(self.limit(1).collect(col))
-        json_dict = (
+        json_values = list(self.limit(schema_sample_size).collect(col))
+        json_dicts = [
             json.loads(json_value) if isinstance(json_value, str) else json_value
-        )
+            for json_value in json_values
+        ]
-        if not isinstance(json_dict, dict):
+        if any(not isinstance(json_dict, dict) for json_dict in json_dicts):
             raise TypeError(f"Column {col} should be a string or dict type with JSON")
-        schema = pa.Table.from_pylist([json_dict]).schema
-        output = schema_to_output(schema, None)
+        schema = pa.Table.from_pylist(json_dicts).schema
+        output, original_names = schema_to_output(schema, None)
         if not model_name:
             model_name = f"{col.title()}ExplodedModel"
-        model = dict_to_data_model(model_name, output)
+        model = dict_to_data_model(model_name, output, original_names)
         def json_to_model(json_value: Union[str, dict]):
             json_dict = (
@@ -776,7 +807,7 @@ class DataChain:
             ```py
             uri = "gs://datachain-demo/coco2017/annotations_captions/"
             chain = DataChain.from_storage(uri)
-            chain = chain.show_json_schema()
+            chain = chain.print_json_schema()
             chain.save()
             ```
         """
@@ -1119,7 +1150,7 @@ class DataChain:
     def group_by(
         self,
         *,
-        partition_by: Union[str, Sequence[str]],
+        partition_by: Union[str, Func, Sequence[Union[str, Func]]],
         **kwargs: Func,
     ) -> "Self":
         """Group rows by specified set of signals and return new signals
@@ -1136,36 +1167,47 @@ class DataChain:
             )
             ```
         """
-        if isinstance(partition_by, str):
+        if isinstance(partition_by, (str, Func)):
             partition_by = [partition_by]
         if not partition_by:
             raise ValueError("At least one column should be provided for partition_by")
-        if not kwargs:
-            raise ValueError("At least one column should be provided for group_by")
-        for col_name, func in kwargs.items():
-            if not isinstance(func, Func):
-                raise DataChainColumnError(
-                    col_name,
-                    f"Column {col_name} has type {type(func)} but expected Func object",
-                )
         partition_by_columns: list[Column] = []
         signal_columns: list[Column] = []
         schema_fields: dict[str, DataType] = {}
         # validate partition_by columns and add them to the schema
-        for col_name in partition_by:
-            col_db_name = ColumnMeta.to_db_name(col_name)
-            col_type = self.signals_schema.get_column_type(col_db_name)
-            col = Column(col_db_name, python_to_sql(col_type))
-            partition_by_columns.append(col)
+        for col in partition_by:
+            if isinstance(col, str):
+                col_db_name = ColumnMeta.to_db_name(col)
+                col_type = self.signals_schema.get_column_type(col_db_name)
+                column = Column(col_db_name, python_to_sql(col_type))
+            elif isinstance(col, Function):
+                column = col.get_column(self.signals_schema)
+                col_db_name = column.name
+                col_type = column.type.python_type
+            else:
+                raise DataChainColumnError(
+                    col,
+                    (
+                        f"partition_by column {col} has type {type(col)}"
+                        " but expected str or Function"
+                    ),
+                )
+            partition_by_columns.append(column)
             schema_fields[col_db_name] = col_type
         # validate signal columns and add them to the schema
+        if not kwargs:
+            raise ValueError("At least one column should be provided for group_by")
         for col_name, func in kwargs.items():
-            col = func.get_column(self.signals_schema, label=col_name)
-            signal_columns.append(col)
+            if not isinstance(func, Func):
+                raise DataChainColumnError(
+                    col_name,
+                    f"Column {col_name} has type {type(func)} but expected Func object",
+                )
+            column = func.get_column(self.signals_schema, label=col_name)
+            signal_columns.append(column)
             schema_fields[col_name] = func.get_result_type(self.signals_schema)
         return self._evolve(
@@ -1413,25 +1455,16 @@ class DataChain:
     def merge(
         self,
         right_ds: "DataChain",
-        on: Union[
-            str,
-            sqlalchemy.ColumnElement,
-            Sequence[Union[str, sqlalchemy.ColumnElement]],
-        ],
-        right_on: Union[
-            str,
-            sqlalchemy.ColumnElement,
-            Sequence[Union[str, sqlalchemy.ColumnElement]],
-            None,
-        ] = None,
+        on: Union[MergeColType, Sequence[MergeColType]],
+        right_on: Optional[Union[MergeColType, Sequence[MergeColType]]] = None,
         inner=False,
         rname="right_",
     ) -> "Self":
         """Merge two chains based on the specified criteria.
         Parameters:
-            right_ds : Chain to join with.
-            on : Predicate or list of Predicates to join on. If both chains have the
+            right_ds: Chain to join with.
+            on: Predicate or list of Predicates to join on. If both chains have the
                 same predicates then this predicate is enough for the join. Otherwise,
                 `right_on` parameter has to specify the predicates for the other chain.
             right_on: Optional predicate or list of Predicates
@@ -1448,23 +1481,24 @@ class DataChain:
         if on is None:
             raise DatasetMergeError(["None"], None, "'on' must be specified")
-        if isinstance(on, (str, sqlalchemy.ColumnElement)):
-            on = [on]
-        elif not isinstance(on, Sequence):
+        on = _validate_merge_on(on, self)
+        if not on:
             raise DatasetMergeError(
                 on,
                 right_on,
-                f"'on' must be 'str' or 'Sequence' object but got type '{type(on)}'",
+                (
+                    "'on' must be 'str', 'Func' or 'Sequence' object "
+                    f"but got type '{type(on)}'"
+                ),
             )
         if right_on is not None:
-            if isinstance(right_on, (str, sqlalchemy.ColumnElement)):
-                right_on = [right_on]
-            elif not isinstance(right_on, Sequence):
+            right_on = _validate_merge_on(right_on, right_ds)
+            if not right_on:
                 raise DatasetMergeError(
                     on,
                     right_on,
-                    "'right_on' must be 'str' or 'Sequence' object"
+                    "'right_on' must be 'str', 'Func' or 'Sequence' object"
                     f" but got type '{type(right_on)}'",
                 )
@@ -1480,10 +1514,12 @@ class DataChain:
         def _resolve(
             ds: DataChain,
-            col: Union[str, sqlalchemy.ColumnElement],
+            col: Union[str, Function, sqlalchemy.ColumnElement],
             side: Union[str, None],
         ):
             try:
+                if isinstance(col, Function):
+                    return ds.c(col.get_column())
                 return ds.c(col) if isinstance(col, (str, C)) else col
             except ValueError:
                 if side:
@@ -1834,13 +1870,14 @@ class DataChain:
         if col_names or not output:
             try:
                 schema = infer_schema(self, **kwargs)
-                output = schema_to_output(schema, col_names)
+                output, _ = schema_to_output(schema, col_names)
             except ValueError as e:
                 raise DatasetPrepareError(self.name, e) from e
         if isinstance(output, dict):
             model_name = model_name or object_name or ""
             model = dict_to_data_model(model_name, output)
+            output = model
         else:
             model = output  # type: ignore[assignment]
@@ -1851,6 +1888,7 @@ class DataChain:
                 name: info.annotation  # type: ignore[misc]
                 for name, info in output.model_fields.items()
             }
         if source:
             output = {"source": ArrowRow} | output  # type: ignore[assignment,operator]
         return self.gen(
@@ -2389,9 +2427,9 @@ class DataChain:
             dc.filter(C("file.name").glob("*.jpg"))
             ```
-            Using `datachain.sql.functions`
+            Using `datachain.func`
             ```py
-            from datachain.sql.functions import string
+            from datachain.func import string
             dc.filter(string.length(C("file.name")) > 5)
             ```

datachain/lib/hf.py CHANGED Viewed

@@ -98,7 +98,7 @@ class HFGenerator(Generator):
         with tqdm(desc=desc, unit=" rows") as pbar:
             for row in ds:
                 output_dict = {}
-                if split:
+                if split and "split" in self.output_schema.model_fields:
                     output_dict["split"] = split
                 for name, feat in ds.features.items():
                     anno = self.output_schema.model_fields[name].annotation

datachain/lib/signal_schema.py CHANGED Viewed

@@ -23,12 +23,12 @@ from pydantic import BaseModel, create_model
 from sqlalchemy import ColumnElement
 from typing_extensions import Literal as LiteralEx
+from datachain.func.func import Func
 from datachain.lib.convert.python_to_sql import python_to_sql
 from datachain.lib.convert.sql_to_python import sql_to_python
 from datachain.lib.convert.unflatten import unflatten_to_json_pos
 from datachain.lib.data_model import DataModel, DataType, DataValue
 from datachain.lib.file import File
-from datachain.lib.func import Func
 from datachain.lib.model_store import ModelStore
 from datachain.lib.utils import DataChainParamsError
 from datachain.query.schema import DEFAULT_DELIMITER, Column

datachain/lib/utils.py CHANGED Viewed

@@ -33,6 +33,7 @@ class DataChainColumnError(DataChainParamsError):
 def normalize_col_names(col_names: Sequence[str]) -> dict[str, str]:
+    """Returns normalized_name -> original_name dict."""
     gen_col_counter = 0
     new_col_names = {}
     org_col_names = set(col_names)

datachain/lib/webdataset_laion.py CHANGED Viewed

@@ -49,11 +49,11 @@ class WDSLaion(WDSBasic):
 class LaionMeta(BaseModel):
     file: File
     index: Optional[int] = Field(default=None)
-    b32_img: list[float] = Field(default=None)
-    b32_txt: list[float] = Field(default=None)
-    l14_img: list[float] = Field(default=None)
-    l14_txt: list[float] = Field(default=None)
-    dedup: list[float] = Field(default=None)
+    b32_img: list[float] = Field(default=[])
+    b32_txt: list[float] = Field(default=[])
+    l14_img: list[float] = Field(default=[])
+    l14_txt: list[float] = Field(default=[])
+    dedup: list[float] = Field(default=[])
 def process_laion_meta(file: File) -> Iterator[LaionMeta]:

datachain/model/__init__.py ADDED Viewed

@@ -0,0 +1,6 @@
+from . import ultralytics
+from .bbox import BBox, OBBox
+from .pose import Pose, Pose3D
+from .segment import Segment
+__all__ = ["BBox", "OBBox", "Pose", "Pose3D", "Segment", "ultralytics"]

datachain/model/bbox.py ADDED Viewed

@@ -0,0 +1,102 @@
+from pydantic import Field
+from datachain.lib.data_model import DataModel
+class BBox(DataModel):
+    """
+    A data model for representing bounding box.
+    Attributes:
+        title (str): The title of the bounding box.
+        coords (list[int]): The coordinates of the bounding box.
+    The bounding box is defined by two points:
+        - (x1, y1): The top-left corner of the box.
+        - (x2, y2): The bottom-right corner of the box.
+    """
+    title: str = Field(default="")
+    coords: list[int] = Field(default=[])
+    @staticmethod
+    def from_list(coords: list[float], title: str = "") -> "BBox":
+        assert len(coords) == 4, "Bounding box must be a list of 4 coordinates."
+        assert all(
+            isinstance(value, (int, float)) for value in coords
+        ), "Bounding box coordinates must be floats or integers."
+        return BBox(
+            title=title,
+            coords=[round(c) for c in coords],
+        )
+    @staticmethod
+    def from_dict(coords: dict[str, float], title: str = "") -> "BBox":
+        assert isinstance(coords, dict) and set(coords) == {
+            "x1",
+            "y1",
+            "x2",
+            "y2",
+        }, "Bounding box must be a dictionary with keys 'x1', 'y1', 'x2' and 'y2'."
+        return BBox.from_list(
+            [coords["x1"], coords["y1"], coords["x2"], coords["y2"]],
+            title=title,
+        )
+class OBBox(DataModel):
+    """
+    A data model for representing oriented bounding boxes.
+    Attributes:
+        title (str): The title of the oriented bounding box.
+        coords (list[int]): The coordinates of the oriented bounding box.
+    The oriented bounding box is defined by four points:
+        - (x1, y1): The first corner of the box.
+        - (x2, y2): The second corner of the box.
+        - (x3, y3): The third corner of the box.
+        - (x4, y4): The fourth corner of the box.
+    """
+    title: str = Field(default="")
+    coords: list[int] = Field(default=[])
+    @staticmethod
+    def from_list(coords: list[float], title: str = "") -> "OBBox":
+        assert (
+            len(coords) == 8
+        ), "Oriented bounding box must be a list of 8 coordinates."
+        assert all(
+            isinstance(value, (int, float)) for value in coords
+        ), "Oriented bounding box coordinates must be floats or integers."
+        return OBBox(
+            title=title,
+            coords=[round(c) for c in coords],
+        )
+    @staticmethod
+    def from_dict(coords: dict[str, float], title: str = "") -> "OBBox":
+        assert isinstance(coords, dict) and set(coords) == {
+            "x1",
+            "y1",
+            "x2",
+            "y2",
+            "x3",
+            "y3",
+            "x4",
+            "y4",
+        }, "Oriented bounding box must be a dictionary with coordinates."
+        return OBBox.from_list(
+            [
+                coords["x1"],
+                coords["y1"],
+                coords["x2"],
+                coords["y2"],
+                coords["x3"],
+                coords["y3"],
+                coords["x4"],
+                coords["y4"],
+            ],
+            title=title,
+        )

datachain 0.7.0__py3-none-any.whl → 0.7.2__py3-none-any.whl

Potentially problematic release.

datachain 0.7.0py3-none-any.whl → 0.7.2py3-none-any.whl