PyPI - datachain - Versions diffs - 0.7.0__py3-none-any.whl → 0.7.2__py3-none-any.whl - Mend

datachain 0.7.0py3-none-any.whl → 0.7.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of datachain might be problematic. Click here for more details.

Files changed (52) hide show

datachain/__init__.py +0 -3
datachain/catalog/catalog.py +8 -6
datachain/cli.py +1 -1
datachain/client/fsspec.py +9 -9
datachain/data_storage/schema.py +2 -2
datachain/data_storage/sqlite.py +5 -4
datachain/data_storage/warehouse.py +18 -18
datachain/func/__init__.py +49 -0
datachain/{lib/func → func}/aggregate.py +13 -11
datachain/func/array.py +176 -0
datachain/func/base.py +23 -0
datachain/func/conditional.py +81 -0
datachain/func/func.py +384 -0
datachain/func/path.py +110 -0
datachain/func/random.py +23 -0
datachain/func/string.py +154 -0
datachain/func/window.py +49 -0
datachain/lib/arrow.py +24 -12
datachain/lib/data_model.py +25 -9
datachain/lib/dataset_info.py +2 -2
datachain/lib/dc.py +94 -56
datachain/lib/hf.py +1 -1
datachain/lib/signal_schema.py +1 -1
datachain/lib/utils.py +1 -0
datachain/lib/webdataset_laion.py +5 -5
datachain/model/__init__.py +6 -0
datachain/model/bbox.py +102 -0
datachain/model/pose.py +88 -0
datachain/model/segment.py +47 -0
datachain/model/ultralytics/__init__.py +27 -0
datachain/model/ultralytics/bbox.py +147 -0
datachain/model/ultralytics/pose.py +113 -0
datachain/model/ultralytics/segment.py +91 -0
datachain/nodes_fetcher.py +2 -2
datachain/query/dataset.py +57 -34
datachain/sql/__init__.py +0 -2
datachain/sql/functions/__init__.py +0 -26
datachain/sql/selectable.py +11 -5
datachain/sql/sqlite/base.py +11 -2
datachain/toolkit/split.py +6 -2
{datachain-0.7.0.dist-info → datachain-0.7.2.dist-info}/METADATA +72 -71
{datachain-0.7.0.dist-info → datachain-0.7.2.dist-info}/RECORD +46 -35
{datachain-0.7.0.dist-info → datachain-0.7.2.dist-info}/WHEEL +1 -1
datachain/lib/func/__init__.py +0 -32
datachain/lib/func/func.py +0 -152
datachain/lib/models/__init__.py +0 -5
datachain/lib/models/bbox.py +0 -45
datachain/lib/models/pose.py +0 -37
datachain/lib/models/yolo.py +0 -39
{datachain-0.7.0.dist-info → datachain-0.7.2.dist-info}/LICENSE +0 -0
{datachain-0.7.0.dist-info → datachain-0.7.2.dist-info}/entry_points.txt +0 -0
{datachain-0.7.0.dist-info → datachain-0.7.2.dist-info}/top_level.txt +0 -0

datachain/model/pose.py ADDED Viewed

@@ -0,0 +1,88 @@
+from pydantic import Field
+from datachain.lib.data_model import DataModel
+class Pose(DataModel):
+    """
+    A data model for representing pose keypoints.
+    Attributes:
+        x (list[int]): The x-coordinates of the keypoints.
+        y (list[int]): The y-coordinates of the keypoints.
+    The keypoints are represented as lists of x and y coordinates, where each index
+    corresponds to a specific body part.
+    """
+    x: list[int] = Field(default=[])
+    y: list[int] = Field(default=[])
+    @staticmethod
+    def from_list(points: list[list[float]]) -> "Pose":
+        assert len(points) == 2, "Pose must be a list of 2 lists: x and y coordinates."
+        points_x, points_y = points
+        assert (
+            len(points_x) == len(points_y) == 17
+        ), "Pose x and y coordinates must have the same length of 17."
+        assert all(
+            isinstance(value, (int, float)) for value in [*points_x, *points_y]
+        ), "Pose coordinates must be floats or integers."
+        return Pose(
+            x=[round(coord) for coord in points_x],
+            y=[round(coord) for coord in points_y],
+        )
+    @staticmethod
+    def from_dict(points: dict[str, list[float]]) -> "Pose":
+        assert isinstance(points, dict) and set(points) == {
+            "x",
+            "y",
+        }, "Pose must be a dict with keys 'x' and 'y'."
+        return Pose.from_list([points["x"], points["y"]])
+class Pose3D(DataModel):
+    """
+    A data model for representing 3D pose keypoints.
+    Attributes:
+        x (list[int]): The x-coordinates of the keypoints.
+        y (list[int]): The y-coordinates of the keypoints.
+        visible (list[float]): The visibility of the keypoints.
+    The keypoints are represented as lists of x, y, and visibility values,
+    where each index corresponds to a specific body part.
+    """
+    x: list[int] = Field(default=[])
+    y: list[int] = Field(default=[])
+    visible: list[float] = Field(default=[])
+    @staticmethod
+    def from_list(points: list[list[float]]) -> "Pose3D":
+        assert (
+            len(points) == 3
+        ), "Pose3D must be a list of 3 lists: x, y coordinates and visible."
+        points_x, points_y, points_v = points
+        assert (
+            len(points_x) == len(points_y) == len(points_v) == 17
+        ), "Pose3D x, y coordinates and visible must have the same length of 17."
+        assert all(
+            isinstance(value, (int, float))
+            for value in [*points_x, *points_y, *points_v]
+        ), "Pose3D coordinates must be floats or integers."
+        return Pose3D(
+            x=[round(coord) for coord in points_x],
+            y=[round(coord) for coord in points_y],
+            visible=points_v,
+        )
+    @staticmethod
+    def from_dict(points: dict[str, list[float]]) -> "Pose3D":
+        assert isinstance(points, dict) and set(points) == {
+            "x",
+            "y",
+            "visible",
+        }, "Pose3D must be a dict with keys 'x', 'y' and 'visible'."
+        return Pose3D.from_list([points["x"], points["y"], points["visible"]])

datachain/model/segment.py ADDED Viewed

@@ -0,0 +1,47 @@
+from pydantic import Field
+from datachain.lib.data_model import DataModel
+class Segment(DataModel):
+    """
+    A data model for representing segment.
+    Attributes:
+        title (str): The title of the segment.
+        x (list[int]): The x-coordinates of the segment.
+        y (list[int]): The y-coordinates of the segment.
+    The segment is represented as lists of x and y coordinates, where each index
+    corresponds to a specific point.
+    """
+    title: str = Field(default="")
+    x: list[int] = Field(default=[])
+    y: list[int] = Field(default=[])
+    @staticmethod
+    def from_list(points: list[list[float]], title: str = "") -> "Segment":
+        assert (
+            len(points) == 2
+        ), "Segment must be a list of 2 lists: x and y coordinates."
+        points_x, points_y = points
+        assert len(points_x) == len(
+            points_y
+        ), "Segment x and y coordinates must have the same length."
+        assert all(
+            isinstance(value, (int, float)) for value in [*points_x, *points_y]
+        ), "Segment coordinates must be floats or integers."
+        return Segment(
+            title=title,
+            x=[round(coord) for coord in points_x],
+            y=[round(coord) for coord in points_y],
+        )
+    @staticmethod
+    def from_dict(points: dict[str, list[float]], title: str = "") -> "Segment":
+        assert isinstance(points, dict) and set(points) == {
+            "x",
+            "y",
+        }, "Segment must be a dict with keys 'x' and 'y'."
+        return Segment.from_list([points["x"], points["y"]], title=title)

datachain/model/ultralytics/__init__.py ADDED Viewed

@@ -0,0 +1,27 @@
+"""
+This module contains the YOLO models.
+YOLO stands for "You Only Look Once", a family of object detection models that
+are designed to be fast and accurate. The models are trained to detect objects
+in images by dividing the image into a grid and predicting the bounding boxes
+and class probabilities for each grid cell.
+More information about YOLO can be found here:
+- https://pjreddie.com/darknet/yolo/
+- https://docs.ultralytics.com/
+"""
+from .bbox import YoloBBox, YoloBBoxes, YoloOBBox, YoloOBBoxes
+from .pose import YoloPose, YoloPoses
+from .segment import YoloSegment, YoloSegments
+__all__ = [
+    "YoloBBox",
+    "YoloBBoxes",
+    "YoloOBBox",
+    "YoloOBBoxes",
+    "YoloPose",
+    "YoloPoses",
+    "YoloSegment",
+    "YoloSegments",
+]

datachain/model/ultralytics/bbox.py ADDED Viewed

@@ -0,0 +1,147 @@
+from typing import TYPE_CHECKING
+from pydantic import Field
+from datachain.lib.data_model import DataModel
+from datachain.model.bbox import BBox, OBBox
+if TYPE_CHECKING:
+    from ultralytics.engine.results import Results
+class YoloBBox(DataModel):
+    """
+    A class representing a bounding box detected by a YOLO model.
+    Attributes:
+        cls: The class of the detected object.
+        name: The name of the detected object.
+        confidence: The confidence score of the detection.
+        box: The bounding box of the detected object
+    """
+    cls: int = Field(default=-1)
+    name: str = Field(default="")
+    confidence: float = Field(default=0)
+    box: BBox
+    @staticmethod
+    def from_result(result: "Results") -> "YoloBBox":
+        summary = result.summary()
+        if not summary:
+            return YoloBBox(box=BBox())
+        name = summary[0].get("name", "")
+        box = (
+            BBox.from_dict(summary[0]["box"], title=name)
+            if "box" in summary[0]
+            else BBox()
+        )
+        return YoloBBox(
+            cls=summary[0]["class"],
+            name=name,
+            confidence=summary[0]["confidence"],
+            box=box,
+        )
+class YoloBBoxes(DataModel):
+    """
+    A class representing a list of bounding boxes detected by a YOLO model.
+    Attributes:
+        cls: A list of classes of the detected objects.
+        name: A list of names of the detected objects.
+        confidence: A list of confidence scores of the detections.
+        box: A list of bounding boxes of the detected objects
+    """
+    cls: list[int]
+    name: list[str]
+    confidence: list[float]
+    box: list[BBox]
+    @staticmethod
+    def from_results(results: list["Results"]) -> "YoloBBoxes":
+        cls, names, confidence, box = [], [], [], []
+        for r in results:
+            for s in r.summary():
+                name = s.get("name", "")
+                cls.append(s["class"])
+                names.append(name)
+                confidence.append(s["confidence"])
+                box.append(BBox.from_dict(s.get("box", {}), title=name))
+        return YoloBBoxes(
+            cls=cls,
+            name=names,
+            confidence=confidence,
+            box=box,
+        )
+class YoloOBBox(DataModel):
+    """
+    A class representing an oriented bounding box detected by a YOLO model.
+    Attributes:
+        cls: The class of the detected object.
+        name: The name of the detected object.
+        confidence: The confidence score of the detection.
+        box: The oriented bounding box of the detected object.
+    """
+    cls: int = Field(default=-1)
+    name: str = Field(default="")
+    confidence: float = Field(default=0)
+    box: OBBox
+    @staticmethod
+    def from_result(result: "Results") -> "YoloOBBox":
+        summary = result.summary()
+        if not summary:
+            return YoloOBBox(box=OBBox())
+        name = summary[0].get("name", "")
+        box = (
+            OBBox.from_dict(summary[0]["box"], title=name)
+            if "box" in summary[0]
+            else OBBox()
+        )
+        return YoloOBBox(
+            cls=summary[0]["class"],
+            name=name,
+            confidence=summary[0]["confidence"],
+            box=box,
+        )
+class YoloOBBoxes(DataModel):
+    """
+    A class representing a list of oriented bounding boxes detected by a YOLO model.
+    Attributes:
+        cls: A list of classes of the detected objects.
+        name: A list of names of the detected objects.
+        confidence: A list of confidence scores of the detections.
+        box: A list of oriented bounding boxes of the detected objects.
+    """
+    cls: list[int]
+    name: list[str]
+    confidence: list[float]
+    box: list[OBBox]
+    @staticmethod
+    def from_results(results: list["Results"]) -> "YoloOBBoxes":
+        cls, names, confidence, box = [], [], [], []
+        for r in results:
+            for s in r.summary():
+                name = s.get("name", "")
+                cls.append(s["class"])
+                names.append(name)
+                confidence.append(s["confidence"])
+                box.append(OBBox.from_dict(s.get("box", {}), title=name))
+        return YoloOBBoxes(
+            cls=cls,
+            name=names,
+            confidence=confidence,
+            box=box,
+        )

datachain/model/ultralytics/pose.py ADDED Viewed

@@ -0,0 +1,113 @@
+from typing import TYPE_CHECKING
+from pydantic import Field
+from datachain.lib.data_model import DataModel
+from datachain.model.bbox import BBox
+from datachain.model.pose import Pose3D
+if TYPE_CHECKING:
+    from ultralytics.engine.results import Results
+class YoloPoseBodyPart:
+    """An enumeration of body parts for YOLO pose keypoints."""
+    nose = 0
+    left_eye = 1
+    right_eye = 2
+    left_ear = 3
+    right_ear = 4
+    left_shoulder = 5
+    right_shoulder = 6
+    left_elbow = 7
+    right_elbow = 8
+    left_wrist = 9
+    right_wrist = 10
+    left_hip = 11
+    right_hip = 12
+    left_knee = 13
+    right_knee = 14
+    left_ankle = 15
+    right_ankle = 16
+class YoloPose(DataModel):
+    """
+    A data model for YOLO pose keypoints.
+    Attributes:
+        cls: The class of the pose.
+        name: The name of the pose.
+        confidence: The confidence score of the pose.
+        box: The bounding box of the pose.
+        pose: The 3D pose keypoints.
+    """
+    cls: int = Field(default=-1)
+    name: str = Field(default="")
+    confidence: float = Field(default=0)
+    box: BBox
+    pose: Pose3D
+    @staticmethod
+    def from_result(result: "Results") -> "YoloPose":
+        summary = result.summary()
+        if not summary:
+            return YoloPose(box=BBox(), pose=Pose3D())
+        name = summary[0].get("name", "")
+        box = (
+            BBox.from_dict(summary[0]["box"], title=name)
+            if "box" in summary[0]
+            else BBox()
+        )
+        pose = (
+            Pose3D.from_dict(summary[0]["keypoints"])
+            if "keypoints" in summary[0]
+            else Pose3D()
+        )
+        return YoloPose(
+            cls=summary[0]["class"],
+            name=name,
+            confidence=summary[0]["confidence"],
+            box=box,
+            pose=pose,
+        )
+class YoloPoses(DataModel):
+    """
+    A data model for a list of YOLO pose keypoints.
+    Attributes:
+        cls: The classes of the poses.
+        name: The names of the poses.
+        confidence: The confidence scores of the poses.
+        box: The bounding boxes of the poses.
+        pose: The 3D pose keypoints of the poses.
+    """
+    cls: list[int]
+    name: list[str]
+    confidence: list[float]
+    box: list[BBox]
+    pose: list[Pose3D]
+    @staticmethod
+    def from_results(results: list["Results"]) -> "YoloPoses":
+        cls, names, confidence, box, pose = [], [], [], [], []
+        for r in results:
+            for s in r.summary():
+                name = s.get("name", "")
+                cls.append(s["class"])
+                names.append(name)
+                confidence.append(s["confidence"])
+                box.append(BBox.from_dict(s.get("box", {}), title=name))
+                pose.append(Pose3D.from_dict(s.get("keypoints", {})))
+        return YoloPoses(
+            cls=cls,
+            name=names,
+            confidence=confidence,
+            box=box,
+            pose=pose,
+        )

datachain/model/ultralytics/segment.py ADDED Viewed

@@ -0,0 +1,91 @@
+from typing import TYPE_CHECKING
+from pydantic import Field
+from datachain.lib.data_model import DataModel
+from datachain.model.bbox import BBox
+from datachain.model.segment import Segment
+if TYPE_CHECKING:
+    from ultralytics.engine.results import Results
+class YoloSegment(DataModel):
+    """
+    A data model for a single YOLO segment.
+    Attributes:
+        cls (int): The class of the segment.
+        name (str): The name of the segment.
+        confidence (float): The confidence of the segment.
+        box (BBox): The bounding box of the segment.
+        segment (Segments): The segments of the segment.
+    """
+    cls: int = Field(default=-1)
+    name: str = Field(default="")
+    confidence: float = Field(default=0)
+    box: BBox
+    segment: Segment
+    @staticmethod
+    def from_result(result: "Results") -> "YoloSegment":
+        summary = result.summary()
+        if not summary:
+            return YoloSegment(box=BBox(), segment=Segment())
+        name = summary[0].get("name", "")
+        box = (
+            BBox.from_dict(summary[0]["box"], title=name)
+            if "box" in summary[0]
+            else BBox()
+        )
+        segment = (
+            Segment.from_dict(summary[0]["segments"], title=name)
+            if "segments" in summary[0]
+            else Segment()
+        )
+        return YoloSegment(
+            cls=summary[0]["class"],
+            name=summary[0]["name"],
+            confidence=summary[0]["confidence"],
+            box=box,
+            segment=segment,
+        )
+class YoloSegments(DataModel):
+    """
+    A data model for a list of YOLO segments.
+    Attributes:
+        cls (list[int]): The classes of the segments.
+        name (list[str]): The names of the segments.
+        confidence (list[float]): The confidences of the segments.
+        box (list[BBox]): The bounding boxes of the segments.
+        segment (list[Segments]): The segments of the segments.
+    """
+    cls: list[int]
+    name: list[str]
+    confidence: list[float]
+    box: list[BBox]
+    segment: list[Segment]
+    @staticmethod
+    def from_results(results: list["Results"]) -> "YoloSegments":
+        cls, names, confidence, box, segment = [], [], [], [], []
+        for r in results:
+            for s in r.summary():
+                name = s.get("name", "")
+                cls.append(s["class"])
+                names.append(name)
+                confidence.append(s["confidence"])
+                box.append(BBox.from_dict(s.get("box", {}), title=name))
+                segment.append(Segment.from_dict(s.get("segments", {}), title=name))
+        return YoloSegments(
+            cls=cls,
+            name=names,
+            confidence=confidence,
+            box=box,
+            segment=segment,
+        )

datachain/nodes_fetcher.py CHANGED Viewed

@@ -2,12 +2,12 @@ import logging
 from collections.abc import Iterable
 from typing import TYPE_CHECKING
-from datachain.node import Node
 from datachain.nodes_thread_pool import NodesThreadPool
 if TYPE_CHECKING:
     from datachain.cache import DataChainCache
     from datachain.client.fsspec import Client
+    from datachain.node import Node
 logger = logging.getLogger("datachain")
@@ -22,7 +22,7 @@ class NodesFetcher(NodesThreadPool):
         for task in done:
             task.result()
-    def do_task(self, chunk: Iterable[Node]) -> None:
+    def do_task(self, chunk: Iterable["Node"]) -> None:
         from fsspec import Callback
         class _CB(Callback):

datachain 0.7.0__py3-none-any.whl → 0.7.2__py3-none-any.whl

Potentially problematic release.

datachain 0.7.0py3-none-any.whl → 0.7.2py3-none-any.whl