datachain 0.32.3__py3-none-any.whl → 0.33.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datachain might be problematic. Click here for more details.

@@ -0,0 +1,44 @@
1
+ import uuid
2
+ from dataclasses import dataclass
3
+ from datetime import datetime
4
+ from typing import Union
5
+
6
+
7
+ @dataclass
8
+ class Checkpoint:
9
+ """
10
+ Represents a checkpoint within a job run.
11
+
12
+ A checkpoint marks a successfully completed stage of execution. In the event
13
+ of a failure, the job can resume from the most recent checkpoint rather than
14
+ starting over from the beginning.
15
+
16
+ Checkpoints can also be created in a "partial" mode, which indicates that the
17
+ work at this stage was only partially completed. For example, if a failure
18
+ occurs halfway through running a UDF, already computed results can still be
19
+ saved, allowing the job to resume from that partially completed state on
20
+ restart.
21
+ """
22
+
23
+ id: str
24
+ job_id: str
25
+ hash: str
26
+ partial: bool
27
+ created_at: datetime
28
+
29
+ @classmethod
30
+ def parse(
31
+ cls,
32
+ id: Union[str, uuid.UUID],
33
+ job_id: str,
34
+ _hash: str,
35
+ partial: bool,
36
+ created_at: datetime,
37
+ ) -> "Checkpoint":
38
+ return cls(
39
+ str(id),
40
+ job_id,
41
+ _hash,
42
+ bool(partial),
43
+ created_at,
44
+ )
@@ -93,10 +93,11 @@ class Client(ABC):
93
93
  self.uri = self.get_uri(self.name)
94
94
 
95
95
  @staticmethod
96
- def get_implementation(url: Union[str, os.PathLike[str]]) -> type["Client"]:
96
+ def get_implementation(url: Union[str, os.PathLike[str]]) -> type["Client"]: # noqa: PLR0911
97
97
  from .azure import AzureClient
98
98
  from .gcs import GCSClient
99
99
  from .hf import HfClient
100
+ from .http import HTTPClient, HTTPSClient
100
101
  from .local import FileClient
101
102
  from .s3 import ClientS3
102
103
 
@@ -114,6 +115,10 @@ class Client(ABC):
114
115
  return FileClient
115
116
  if protocol == HfClient.protocol:
116
117
  return HfClient
118
+ if protocol == HTTPClient.protocol:
119
+ return HTTPClient
120
+ if protocol == HTTPSClient.protocol:
121
+ return HTTPSClient
117
122
 
118
123
  raise NotImplementedError(f"Unsupported protocol: {protocol}")
119
124
 
@@ -0,0 +1,157 @@
1
+ from datetime import datetime, timezone
2
+ from typing import TYPE_CHECKING, Any, ClassVar, Optional, cast
3
+ from urllib.parse import urlparse
4
+
5
+ from fsspec.implementations.http import HTTPFileSystem
6
+
7
+ from datachain.dataset import StorageURI
8
+ from datachain.lib.file import File
9
+
10
+ from .fsspec import Client
11
+
12
+ if TYPE_CHECKING:
13
+ from datachain.cache import Cache
14
+
15
+
16
+ class HTTPClient(Client):
17
+ FS_CLASS = HTTPFileSystem
18
+ PREFIX: ClassVar[str] = "http://"
19
+ protocol: ClassVar[str] = "http"
20
+
21
+ @classmethod
22
+ def create_fs(cls, **kwargs) -> HTTPFileSystem:
23
+ # Configure HTTPFileSystem options
24
+ kwargs.setdefault("simple_links", True)
25
+ kwargs.setdefault("same_scheme", True)
26
+ kwargs.setdefault("cache_type", "bytes")
27
+
28
+ kwargs.pop("version_aware", None)
29
+
30
+ fs = cls.FS_CLASS(**kwargs)
31
+ fs.invalidate_cache()
32
+ return cast("HTTPFileSystem", fs)
33
+
34
+ @classmethod
35
+ def from_name(
36
+ cls,
37
+ name: str,
38
+ cache: "Cache",
39
+ kwargs: dict[str, Any],
40
+ ) -> "HTTPClient":
41
+ parsed = urlparse(name)
42
+
43
+ if parsed.scheme:
44
+ name = parsed.netloc + parsed.path
45
+
46
+ return cls(name, kwargs, cache)
47
+
48
+ @classmethod
49
+ def split_url(cls, url: str) -> tuple[str, str]:
50
+ """Split HTTP/HTTPS URL into domain (bucket equivalent) and path."""
51
+ parsed = urlparse(url)
52
+ domain = parsed.netloc
53
+ path = parsed.path.lstrip("/")
54
+
55
+ if parsed.query:
56
+ path += f"?{parsed.query}"
57
+ if parsed.fragment:
58
+ path += f"#{parsed.fragment}"
59
+
60
+ return domain, path
61
+
62
+ @classmethod
63
+ def get_uri(cls, name: str) -> "StorageURI":
64
+ if not name.startswith(("http://", "https://")):
65
+ return StorageURI(f"{cls.PREFIX}{name}")
66
+ return StorageURI(name)
67
+
68
+ @classmethod
69
+ def is_root_url(cls, url: str) -> bool:
70
+ parsed = urlparse(url)
71
+ return parsed.path in ("", "/") and not parsed.query and not parsed.fragment
72
+
73
+ def get_full_path(self, rel_path: str, version_id: Optional[str] = None) -> str:
74
+ if self.name.startswith(("http://", "https://")):
75
+ base_url = self.name
76
+ else:
77
+ if rel_path and "/" in rel_path:
78
+ first_part = rel_path.split("/")[0]
79
+ if "." in first_part and not first_part.startswith("."):
80
+ return f"{self.protocol}://{rel_path}"
81
+
82
+ base_url = f"{self.protocol}://{self.name}"
83
+
84
+ if rel_path:
85
+ if not base_url.endswith("/") and not rel_path.startswith("/"):
86
+ base_url += "/"
87
+ full_url = base_url + rel_path
88
+ else:
89
+ full_url = base_url
90
+
91
+ return full_url
92
+
93
+ def url(self, path: str, expires: int = 3600, **kwargs) -> str:
94
+ """
95
+ Generate URL for the given path.
96
+ Note: HTTP URLs don't support signed/expiring URLs.
97
+ """
98
+ return self.get_full_path(path, kwargs.pop("version_id", None))
99
+
100
+ def info_to_file(self, v: dict[str, Any], path: str) -> File:
101
+ etag = v.get("ETag", "").strip('"')
102
+ last_modified = v.get("last_modified")
103
+ if last_modified:
104
+ if isinstance(last_modified, str):
105
+ try:
106
+ from email.utils import parsedate_to_datetime
107
+
108
+ last_modified = parsedate_to_datetime(last_modified)
109
+ except (ValueError, TypeError):
110
+ last_modified = datetime.now(timezone.utc)
111
+ elif isinstance(last_modified, (int, float)):
112
+ last_modified = datetime.fromtimestamp(last_modified, timezone.utc)
113
+ else:
114
+ last_modified = datetime.now(timezone.utc)
115
+
116
+ return File(
117
+ source=self.uri,
118
+ path=path,
119
+ size=v.get("size", 0),
120
+ etag=etag,
121
+ version="",
122
+ is_latest=True,
123
+ last_modified=last_modified,
124
+ )
125
+
126
+ def upload(self, data: bytes, path: str) -> "File":
127
+ raise NotImplementedError(
128
+ "HTTP/HTTPS client is read-only. Upload operations are not supported."
129
+ )
130
+
131
+ def get_file_info(self, path: str, version_id: Optional[str] = None) -> "File":
132
+ info = self.fs.info(self.get_full_path(path))
133
+ return self.info_to_file(info, path)
134
+
135
+ def open_object(self, file: "File", use_cache: bool = True, cb=None):
136
+ from datachain.client.fileslice import FileWrapper
137
+
138
+ if use_cache and (cache_path := self.cache.get_path(file)):
139
+ return open(cache_path, mode="rb")
140
+
141
+ assert not file.location
142
+ return FileWrapper(
143
+ self.fs.open(self.get_full_path(file.get_path_normalized())),
144
+ cb or (lambda x: None),
145
+ )
146
+
147
+ async def get_file(self, lpath, rpath, callback, version_id: Optional[str] = None):
148
+ return await self.fs._get_file(lpath, rpath, callback=callback)
149
+
150
+ async def _fetch_dir(self, prefix: str, pbar, result_queue) -> set[str]:
151
+ full_url = self.get_full_path(prefix)
152
+ raise NotImplementedError(f"Cannot download file from {full_url}")
153
+
154
+
155
+ class HTTPSClient(HTTPClient):
156
+ protocol = "https"
157
+ PREFIX = "https://"
@@ -13,6 +13,7 @@ from uuid import uuid4
13
13
  from sqlalchemy import (
14
14
  JSON,
15
15
  BigInteger,
16
+ Boolean,
16
17
  Column,
17
18
  DateTime,
18
19
  ForeignKey,
@@ -24,6 +25,7 @@ from sqlalchemy import (
24
25
  )
25
26
  from sqlalchemy.sql import func as f
26
27
 
28
+ from datachain.checkpoint import Checkpoint
27
29
  from datachain.data_storage import JobQueryType, JobStatus
28
30
  from datachain.data_storage.serializer import Serializable
29
31
  from datachain.dataset import (
@@ -36,6 +38,7 @@ from datachain.dataset import (
36
38
  StorageURI,
37
39
  )
38
40
  from datachain.error import (
41
+ CheckpointNotFoundError,
39
42
  DatasetNotFoundError,
40
43
  DatasetVersionNotFoundError,
41
44
  NamespaceDeleteNotAllowedError,
@@ -75,6 +78,7 @@ class AbstractMetastore(ABC, Serializable):
75
78
  dataset_list_version_class: type[DatasetListVersion] = DatasetListVersion
76
79
  dependency_class: type[DatasetDependency] = DatasetDependency
77
80
  job_class: type[Job] = Job
81
+ checkpoint_class: type[Checkpoint] = Checkpoint
78
82
 
79
83
  def __init__(
80
84
  self,
@@ -431,6 +435,35 @@ class AbstractMetastore(ABC, Serializable):
431
435
  def get_job_status(self, job_id: str) -> Optional[JobStatus]:
432
436
  """Returns the status of the given job."""
433
437
 
438
+ #
439
+ # Checkpoints
440
+ #
441
+
442
+ @abstractmethod
443
+ def list_checkpoints(self, job_id: str, conn=None) -> Iterator["Checkpoint"]:
444
+ """Returns all checkpoints related to some job"""
445
+
446
+ @abstractmethod
447
+ def get_checkpoint_by_id(self, checkpoint_id: str, conn=None) -> Checkpoint:
448
+ """Gets single checkpoint by id"""
449
+
450
+ def find_checkpoint(
451
+ self, job_id: str, _hash: str, partial: bool = False, conn=None
452
+ ) -> Optional[Checkpoint]:
453
+ """
454
+ Tries to find checkpoint for a job with specific hash and optionally partial
455
+ """
456
+
457
+ @abstractmethod
458
+ def create_checkpoint(
459
+ self,
460
+ job_id: str,
461
+ _hash: str,
462
+ partial: bool = False,
463
+ conn: Optional[Any] = None,
464
+ ) -> Checkpoint:
465
+ """Creates new checkpoint"""
466
+
434
467
 
435
468
  class AbstractDBMetastore(AbstractMetastore):
436
469
  """
@@ -446,6 +479,7 @@ class AbstractDBMetastore(AbstractMetastore):
446
479
  DATASET_VERSION_TABLE = "datasets_versions"
447
480
  DATASET_DEPENDENCY_TABLE = "datasets_dependencies"
448
481
  JOBS_TABLE = "jobs"
482
+ CHECKPOINTS_TABLE = "checkpoints"
449
483
 
450
484
  db: "DatabaseEngine"
451
485
 
@@ -1663,3 +1697,106 @@ class AbstractDBMetastore(AbstractMetastore):
1663
1697
  if not results:
1664
1698
  return None
1665
1699
  return results[0][0]
1700
+
1701
+ #
1702
+ # Checkpoints
1703
+ #
1704
+
1705
+ @staticmethod
1706
+ def _checkpoints_columns() -> "list[SchemaItem]":
1707
+ return [
1708
+ Column(
1709
+ "id",
1710
+ Text,
1711
+ default=uuid4,
1712
+ primary_key=True,
1713
+ nullable=False,
1714
+ ),
1715
+ Column("job_id", Text, nullable=True),
1716
+ Column("hash", Text, nullable=False),
1717
+ Column("partial", Boolean, default=False),
1718
+ Column("created_at", DateTime(timezone=True), nullable=False),
1719
+ UniqueConstraint("job_id", "hash"),
1720
+ ]
1721
+
1722
+ @cached_property
1723
+ def _checkpoints_fields(self) -> list[str]:
1724
+ return [c.name for c in self._checkpoints_columns() if c.name] # type: ignore[attr-defined]
1725
+
1726
+ @cached_property
1727
+ def _checkpoints(self) -> "Table":
1728
+ return Table(
1729
+ self.CHECKPOINTS_TABLE,
1730
+ self.db.metadata,
1731
+ *self._checkpoints_columns(),
1732
+ )
1733
+
1734
+ @abstractmethod
1735
+ def _checkpoints_insert(self) -> "Insert": ...
1736
+
1737
+ def _checkpoints_select(self, *columns) -> "Select":
1738
+ if not columns:
1739
+ return self._checkpoints.select()
1740
+ return select(*columns)
1741
+
1742
+ def _checkpoints_delete(self) -> "Delete":
1743
+ return self._checkpoints.delete()
1744
+
1745
+ def _checkpoints_query(self):
1746
+ return self._checkpoints_select(
1747
+ *[getattr(self._checkpoints.c, f) for f in self._checkpoints_fields]
1748
+ )
1749
+
1750
+ def create_checkpoint(
1751
+ self,
1752
+ job_id: str,
1753
+ _hash: str,
1754
+ partial: bool = False,
1755
+ conn: Optional[Any] = None,
1756
+ ) -> Checkpoint:
1757
+ """
1758
+ Creates a new job query step.
1759
+ """
1760
+ checkpoint_id = str(uuid4())
1761
+ self.db.execute(
1762
+ self._checkpoints_insert().values(
1763
+ id=checkpoint_id,
1764
+ job_id=job_id,
1765
+ hash=_hash,
1766
+ partial=partial,
1767
+ created_at=datetime.now(timezone.utc),
1768
+ ),
1769
+ conn=conn,
1770
+ )
1771
+ return self.get_checkpoint_by_id(checkpoint_id)
1772
+
1773
+ def list_checkpoints(self, job_id: str, conn=None) -> Iterator["Checkpoint"]:
1774
+ """List checkpoints by job id."""
1775
+ query = self._checkpoints_query().where(self._checkpoints.c.job_id == job_id)
1776
+ rows = list(self.db.execute(query, conn=conn))
1777
+
1778
+ yield from [self.checkpoint_class.parse(*r) for r in rows]
1779
+
1780
+ def get_checkpoint_by_id(self, checkpoint_id: str, conn=None) -> Checkpoint:
1781
+ """Returns the checkpoint with the given ID."""
1782
+ ch = self._checkpoints
1783
+ query = self._checkpoints_select(ch).where(ch.c.id == checkpoint_id)
1784
+ rows = list(self.db.execute(query, conn=conn))
1785
+ if not rows:
1786
+ raise CheckpointNotFoundError(f"Checkpoint {checkpoint_id} not found")
1787
+ return self.checkpoint_class.parse(*rows[0])
1788
+
1789
+ def find_checkpoint(
1790
+ self, job_id: str, _hash: str, partial: bool = False, conn=None
1791
+ ) -> Optional[Checkpoint]:
1792
+ """
1793
+ Tries to find checkpoint for a job with specific hash and optionally partial
1794
+ """
1795
+ ch = self._checkpoints
1796
+ query = self._checkpoints_select(ch).where(
1797
+ ch.c.job_id == job_id, ch.c.hash == _hash, ch.c.partial == partial
1798
+ )
1799
+ rows = list(self.db.execute(query, conn=conn))
1800
+ if not rows:
1801
+ return None
1802
+ return self.checkpoint_class.parse(*rows[0])
@@ -51,7 +51,7 @@ def dedup_columns(columns: Iterable[sa.Column]) -> list[sa.Column]:
51
51
  """
52
52
  c_set: dict[str, sa.Column] = {}
53
53
  for c in columns:
54
- if (ec := c_set.get(c.name, None)) is not None:
54
+ if (ec := c_set.get(c.name)) is not None:
55
55
  if str(ec.type) != str(c.type):
56
56
  raise ValueError(
57
57
  f"conflicting types for column {c.name}:{c.type!s} and {ec.type!s}"
@@ -459,6 +459,8 @@ class SQLiteMetastore(AbstractDBMetastore):
459
459
  self.default_table_names.append(self._datasets_dependencies.name)
460
460
  self.db.create_table(self._jobs, if_not_exists=True)
461
461
  self.default_table_names.append(self._jobs.name)
462
+ self.db.create_table(self._checkpoints, if_not_exists=True)
463
+ self.default_table_names.append(self._checkpoints.name)
462
464
 
463
465
  def _init_namespaces_projects(self) -> None:
464
466
  """
@@ -543,6 +545,12 @@ class SQLiteMetastore(AbstractDBMetastore):
543
545
  def _jobs_insert(self) -> "Insert":
544
546
  return sqlite.insert(self._jobs)
545
547
 
548
+ #
549
+ # Checkpoints
550
+ #
551
+ def _checkpoints_insert(self) -> "Insert":
552
+ return sqlite.insert(self._checkpoints)
553
+
546
554
  #
547
555
  # Namespaces
548
556
  #
datachain/error.py CHANGED
@@ -97,3 +97,7 @@ class TableMissingError(DataChainError):
97
97
 
98
98
  class OutdatedDatabaseSchemaError(DataChainError):
99
99
  pass
100
+
101
+
102
+ class CheckpointNotFoundError(NotFoundError):
103
+ pass
@@ -1,4 +1,5 @@
1
1
  import copy
2
+ import hashlib
2
3
  import os
3
4
  import os.path
4
5
  import sys
@@ -18,6 +19,7 @@ from typing import (
18
19
  cast,
19
20
  overload,
20
21
  )
22
+ from uuid import uuid4
21
23
 
22
24
  import sqlalchemy
23
25
  import ujson as json
@@ -665,7 +667,7 @@ class DataChain:
665
667
  name, namespace=namespace_name, project=project_name, **kwargs
666
668
  )
667
669
 
668
- return self._evolve(
670
+ result = self._evolve(
669
671
  query=self._query.save(
670
672
  name=name,
671
673
  version=version,
@@ -678,6 +680,16 @@ class DataChain:
678
680
  )
679
681
  )
680
682
 
683
+ if job_id := os.getenv("DATACHAIN_JOB_ID"):
684
+ catalog.metastore.create_checkpoint(
685
+ job_id, # type: ignore[arg-type]
686
+ _hash=hashlib.sha256( # TODO this will be replaced with self.hash()
687
+ str(uuid4()).encode()
688
+ ).hexdigest(),
689
+ )
690
+
691
+ return result
692
+
681
693
  def apply(self, func, *args, **kwargs):
682
694
  """Apply any function to the chain.
683
695
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datachain
3
- Version: 0.32.3
3
+ Version: 0.33.0
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License-Expression: Apache-2.0
@@ -86,6 +86,7 @@ Requires-Dist: psycopg2-binary>=2.9.0; extra == "postgres"
86
86
  Provides-Extra: tests
87
87
  Requires-Dist: datachain[audio,hf,postgres,remote,torch,vector,video]; extra == "tests"
88
88
  Requires-Dist: pytest<9,>=8; extra == "tests"
89
+ Requires-Dist: pytest-asyncio; extra == "tests"
89
90
  Requires-Dist: pytest-sugar>=0.9.6; extra == "tests"
90
91
  Requires-Dist: pytest-cov>=4.1.0; extra == "tests"
91
92
  Requires-Dist: pytest-mock>=3.12.0; extra == "tests"
@@ -2,10 +2,11 @@ datachain/__init__.py,sha256=BRqfLPoBRRycnndaxyba-i4ZrZCJl0As2pwV9RiNBr8,1822
2
2
  datachain/__main__.py,sha256=hG3Y4ARGEqe1AWwNMd259rBlqtphx1Wk39YbueQ0yV8,91
3
3
  datachain/asyn.py,sha256=RH_jFwJcTXxhEFomaI9yL6S3Onau6NZ6FSKfKFGtrJE,9689
4
4
  datachain/cache.py,sha256=ESVRaCJXEThMIfGEFVHx6wJPOZA7FYk9V6WxjyuqUBY,3626
5
+ datachain/checkpoint.py,sha256=Ar6SnnDMN3fr5ZZm3Xpdbj2f9buhqeApad-B1Lyrr4Y,1152
5
6
  datachain/config.py,sha256=g8qbNV0vW2VEKpX-dGZ9pAn0DAz6G2ZFcr7SAV3PoSM,4272
6
7
  datachain/dataset.py,sha256=eX7xGa3EUpAccBZWpkgDmYV6_FjGuhjkMLFHpjl6lVI,25256
7
8
  datachain/delta.py,sha256=X5Lw6GQ8MAYNl2YIExNvl0tPIkylQEWwnCw0We7NtHM,10693
8
- datachain/error.py,sha256=comKx1JCdjsBpxabrOWaiRP0aHBspBDZl1mkKFnBSq0,1739
9
+ datachain/error.py,sha256=WR1MoO9BPI0hO1FVKVTS0hgyxxumywtDnSY7Sv1oE1c,1796
9
10
  datachain/job.py,sha256=x5PB6d5sqx00hePNNkirESlOVAvnmkEM5ygUgQmAhsk,1262
10
11
  datachain/listing.py,sha256=aqayl5St3D9PwdwM6nR1STkpLSw-S3U8pudO9PWi3N8,7241
11
12
  datachain/namespace.py,sha256=sgIF90KEaC_VlMFivDIJiFz8RUsTftMxW4kOUTyxo3A,2356
@@ -41,18 +42,19 @@ datachain/cli/parser/utils.py,sha256=rETdD-9Hq9A4OolgfT7jQw4aoawtbfmkdtH6E7nkhpI
41
42
  datachain/client/__init__.py,sha256=1kDpCPoibMXi1gExR4lTLc5pi-k6M5TANiwtXkPoLhU,49
42
43
  datachain/client/azure.py,sha256=7yyAgANHfu9Kfh187MKNTT1guvu9Q-WYsi4vYoY3aew,3270
43
44
  datachain/client/fileslice.py,sha256=bT7TYco1Qe3bqoc8aUkUZcPdPofJDHlryL5BsTn9xsY,3021
44
- datachain/client/fsspec.py,sha256=sChjxu931QgU2-n9MdXlmOrhGAiAckXoDVZTxKcNv6M,14336
45
+ datachain/client/fsspec.py,sha256=urt-b9Osay-S4LmwyXUKyYp-JHUBlFewoUvYNP7W_Jw,14553
45
46
  datachain/client/gcs.py,sha256=8hcFhEHp8qGRsJoyfCoawfuwb1Et-MSkyQoM9AnNuXI,5204
46
47
  datachain/client/hf.py,sha256=n5xJZdvNLS-SqokxuBCIPfGbhIeC_XfLm_BNYtEVvg4,2677
48
+ datachain/client/http.py,sha256=oU4nxaOa3xNXkxprDjjIS5fufgRJS0eNHTau3FUC6sg,5171
47
49
  datachain/client/local.py,sha256=0J52Wzvw25hSucVlzBvLuMRAZwrAHZAYDvD1mNBqf4c,4607
48
50
  datachain/client/s3.py,sha256=6DNVGLg-woPS1DVlYVX2rIlunNblsuxyOnI1rSzhW3k,7515
49
51
  datachain/data_storage/__init__.py,sha256=9Wit-oe5P46V7CJQTD0BJ5MhOa2Y9h3ddJ4VWTe-Lec,273
50
52
  datachain/data_storage/db_engine.py,sha256=n8ojCbvVMPY2e3SG8fUaaD0b9GkVfpl_Naa_6EiHfWg,3788
51
53
  datachain/data_storage/job.py,sha256=ZkeXCNUj_VCkoKYx29hqB4AcfVUielnRjY-GYUcUxt4,426
52
- datachain/data_storage/metastore.py,sha256=SrcMeHAjzwTbX8A3WEZ3zzQzVW1n7uamrGDtQXqucyE,55810
53
- datachain/data_storage/schema.py,sha256=o3JbURKXRg3IJyIVA4QjHHkn6byRuz7avbydU2FlvNY,9897
54
+ datachain/data_storage/metastore.py,sha256=TgLYAKraH1WsmteaAqO5TW2VzNZZM4_SASgcBlDzdr8,60218
55
+ datachain/data_storage/schema.py,sha256=DmxxXjNIsXib9gj5jcrb1CVjGzHf7HZLOehs1RmuiMA,9891
54
56
  datachain/data_storage/serializer.py,sha256=6G2YtOFqqDzJf1KbvZraKGXl2XHZyVml2krunWUum5o,927
55
- datachain/data_storage/sqlite.py,sha256=1fIeIhmB3O8oQVzP8dDKap0KUIgI0n2TdBQSyv0R8J4,30345
57
+ datachain/data_storage/sqlite.py,sha256=Z6KlFk7hWoXBbjzxfk2NuIBecqP86AJzp5iEE2W4yw0,30603
56
58
  datachain/data_storage/warehouse.py,sha256=7jc69CtWdfQlc_9WbJ5l6yQooarpLFBrDk4fY-svi_0,32783
57
59
  datachain/diff/__init__.py,sha256=-OFZzgOplqO84iWgGY7kfe60NXaWR9JRIh9T-uJboAM,9668
58
60
  datachain/fs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -104,7 +106,7 @@ datachain/lib/convert/values_to_tuples.py,sha256=j5yZMrVUH6W7b-7yUvdCTGI7JCUAYUO
104
106
  datachain/lib/dc/__init__.py,sha256=UrUzmDH6YyVl8fxM5iXTSFtl5DZTUzEYm1MaazK4vdQ,900
105
107
  datachain/lib/dc/csv.py,sha256=wUsDPpLD4lts92yn0gejZHqTv8qQBbv8JYRwiIepj0o,4471
106
108
  datachain/lib/dc/database.py,sha256=sTpos1rE4BS5BTzzixykhWIO2JxVYKH1GTRncdpu4dU,14716
107
- datachain/lib/dc/datachain.py,sha256=pDgUmvmf0ENngFepoD0AkxxqiqNIgoRueejfojyuURQ,100458
109
+ datachain/lib/dc/datachain.py,sha256=1LvKFKqAWw8TMw2bdpfG6LfOCMMgBS6bluBp0lCX0s4,100845
108
110
  datachain/lib/dc/datasets.py,sha256=pVRcrVEPVPHMf8sLqqhjXbilB3QuUqKE-byvZ-XlJNE,15347
109
111
  datachain/lib/dc/hf.py,sha256=B7pubDQTDmth9uILXyhpQNtOAT3UOLjR-peU__tpypk,2884
110
112
  datachain/lib/dc/json.py,sha256=-vJ-pUpp2JxK4_vOfznE09FIoEOrvCwoIZSLxM6pjmY,2742
@@ -161,9 +163,9 @@ datachain/sql/sqlite/vector.py,sha256=ncW4eu2FlJhrP_CIpsvtkUabZlQdl2D5Lgwy_cbfqR
161
163
  datachain/toolkit/__init__.py,sha256=eQ58Q5Yf_Fgv1ZG0IO5dpB4jmP90rk8YxUWmPc1M2Bo,68
162
164
  datachain/toolkit/split.py,sha256=ktGWzY4kyzjWyR86dhvzw-Zhl0lVk_LOX3NciTac6qo,2914
163
165
  datachain/torch/__init__.py,sha256=gIS74PoEPy4TB3X6vx9nLO0Y3sLJzsA8ckn8pRWihJM,579
164
- datachain-0.32.3.dist-info/licenses/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
165
- datachain-0.32.3.dist-info/METADATA,sha256=MJCn0xaCu7eOuQl8AXKTFX4HTvPqtBPY93rCvcUcoBg,13607
166
- datachain-0.32.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
167
- datachain-0.32.3.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
168
- datachain-0.32.3.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
169
- datachain-0.32.3.dist-info/RECORD,,
166
+ datachain-0.33.0.dist-info/licenses/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
167
+ datachain-0.33.0.dist-info/METADATA,sha256=UGH-boSaU6Kaz6RIsQItwQe4Auzl6L4oHSeeNCKZ7pw,13655
168
+ datachain-0.33.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
169
+ datachain-0.33.0.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
170
+ datachain-0.33.0.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
171
+ datachain-0.33.0.dist-info/RECORD,,