datachain 0.3.13__py3-none-any.whl → 0.3.14__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datachain might be problematic. Click here for more details.

datachain/asyn.py CHANGED
@@ -1,14 +1,8 @@
1
1
  import asyncio
2
- from collections.abc import Awaitable, Coroutine, Iterable
2
+ from collections.abc import AsyncIterable, Awaitable, Coroutine, Iterable, Iterator
3
3
  from concurrent.futures import ThreadPoolExecutor
4
4
  from heapq import heappop, heappush
5
- from typing import (
6
- Any,
7
- Callable,
8
- Generic,
9
- Optional,
10
- TypeVar,
11
- )
5
+ from typing import Any, Callable, Generic, Optional, TypeVar
12
6
 
13
7
  from fsspec.asyn import get_loop
14
8
 
@@ -16,6 +10,7 @@ ASYNC_WORKERS = 20
16
10
 
17
11
  InputT = TypeVar("InputT", contravariant=True) # noqa: PLC0105
18
12
  ResultT = TypeVar("ResultT", covariant=True) # noqa: PLC0105
13
+ T = TypeVar("T")
19
14
 
20
15
 
21
16
  class AsyncMapper(Generic[InputT, ResultT]):
@@ -226,7 +221,7 @@ class OrderedMapper(AsyncMapper[InputT, ResultT]):
226
221
  self._push_result(self._next_yield, None)
227
222
 
228
223
 
229
- def iter_over_async(ait, loop):
224
+ def iter_over_async(ait: AsyncIterable[T], loop) -> Iterator[T]:
230
225
  """Wrap an asynchronous iterator into a synchronous one"""
231
226
  ait = ait.__aiter__()
232
227
 
@@ -1390,12 +1390,12 @@ class Catalog:
1390
1390
  dataset = self.get_dataset(name)
1391
1391
  return self.warehouse.dataset_table_export_file_names(dataset, version)
1392
1392
 
1393
- def dataset_stats(self, name: str, version: int) -> DatasetStats:
1393
+ def dataset_stats(self, name: str, version: Optional[int]) -> DatasetStats:
1394
1394
  """
1395
1395
  Returns tuple with dataset stats: total number of rows and total dataset size.
1396
1396
  """
1397
1397
  dataset = self.get_dataset(name)
1398
- dataset_version = dataset.get_version(version)
1398
+ dataset_version = dataset.get_version(version or dataset.latest_version)
1399
1399
  return DatasetStats(
1400
1400
  num_objects=dataset_version.num_objects,
1401
1401
  size=dataset_version.size,
datachain/client/azure.py CHANGED
@@ -4,7 +4,6 @@ from adlfs import AzureBlobFileSystem
4
4
  from tqdm import tqdm
5
5
 
6
6
  from datachain.lib.file import File
7
- from datachain.node import Entry
8
7
 
9
8
  from .fsspec import DELIMITER, Client, ResultQueue
10
9
 
@@ -14,17 +13,6 @@ class AzureClient(Client):
14
13
  PREFIX = "az://"
15
14
  protocol = "az"
16
15
 
17
- def convert_info(self, v: dict[str, Any], path: str) -> Entry:
18
- version_id = v.get("version_id")
19
- return Entry.from_file(
20
- path=path,
21
- etag=v.get("etag", "").strip('"'),
22
- version=version_id or "",
23
- is_latest=version_id is None or bool(v.get("is_current_version")),
24
- last_modified=v["last_modified"],
25
- size=v.get("size", ""),
26
- )
27
-
28
16
  def info_to_file(self, v: dict[str, Any], path: str) -> File:
29
17
  version_id = v.get("version_id")
30
18
  return File(
@@ -57,7 +45,7 @@ class AzureClient(Client):
57
45
  continue
58
46
  info = (await self.fs._details([b]))[0]
59
47
  entries.append(
60
- self.convert_info(info, self.rel_path(info["name"]))
48
+ self.info_to_file(info, self.rel_path(info["name"]))
61
49
  )
62
50
  if entries:
63
51
  await result_queue.put(entries)
@@ -29,7 +29,7 @@ from tqdm import tqdm
29
29
  from datachain.cache import DataChainCache, UniqueId
30
30
  from datachain.client.fileslice import FileSlice, FileWrapper
31
31
  from datachain.error import ClientError as DataChainClientError
32
- from datachain.node import Entry
32
+ from datachain.lib.file import File
33
33
  from datachain.nodes_fetcher import NodesFetcher
34
34
  from datachain.nodes_thread_pool import NodeChunk
35
35
  from datachain.storage import StorageURI
@@ -45,7 +45,7 @@ DELIMITER = "/" # Path delimiter.
45
45
 
46
46
  DATA_SOURCE_URI_PATTERN = re.compile(r"^[\w]+:\/\/.*$")
47
47
 
48
- ResultQueue = asyncio.Queue[Optional[Sequence[Entry]]]
48
+ ResultQueue = asyncio.Queue[Optional[Sequence[File]]]
49
49
 
50
50
 
51
51
  def _is_win_local_path(uri: str) -> bool:
@@ -188,7 +188,7 @@ class Client(ABC):
188
188
 
189
189
  async def get_current_etag(self, uid: UniqueId) -> str:
190
190
  info = await self.fs._info(self.get_full_path(uid.path))
191
- return self.convert_info(info, "").etag
191
+ return self.info_to_file(info, "").etag
192
192
 
193
193
  async def get_size(self, path: str) -> int:
194
194
  return await self.fs._size(path)
@@ -198,7 +198,7 @@ class Client(ABC):
198
198
 
199
199
  async def scandir(
200
200
  self, start_prefix: str, method: str = "default"
201
- ) -> AsyncIterator[Sequence[Entry]]:
201
+ ) -> AsyncIterator[Sequence[File]]:
202
202
  try:
203
203
  impl = getattr(self, f"_fetch_{method}")
204
204
  except AttributeError:
@@ -264,7 +264,7 @@ class Client(ABC):
264
264
  ) -> None:
265
265
  await self._fetch_nested(start_prefix, result_queue)
266
266
 
267
- async def _fetch_dir(self, prefix, pbar, result_queue) -> set[str]:
267
+ async def _fetch_dir(self, prefix, pbar, result_queue: ResultQueue) -> set[str]:
268
268
  path = f"{self.name}/{prefix}"
269
269
  infos = await self.ls_dir(path)
270
270
  files = []
@@ -277,7 +277,7 @@ class Client(ABC):
277
277
  if info["type"] == "directory":
278
278
  subdirs.add(subprefix)
279
279
  else:
280
- files.append(self.convert_info(info, subprefix))
280
+ files.append(self.info_to_file(info, subprefix))
281
281
  if files:
282
282
  await result_queue.put(files)
283
283
  found_count = len(subdirs) + len(files)
@@ -303,7 +303,7 @@ class Client(ABC):
303
303
  return f"{self.PREFIX}{self.name}/{rel_path}"
304
304
 
305
305
  @abstractmethod
306
- def convert_info(self, v: dict[str, Any], parent: str) -> Entry: ...
306
+ def info_to_file(self, v: dict[str, Any], parent: str) -> File: ...
307
307
 
308
308
  def fetch_nodes(
309
309
  self,
datachain/client/gcs.py CHANGED
@@ -10,7 +10,6 @@ from gcsfs import GCSFileSystem
10
10
  from tqdm import tqdm
11
11
 
12
12
  from datachain.lib.file import File
13
- from datachain.node import Entry
14
13
 
15
14
  from .fsspec import DELIMITER, Client, ResultQueue
16
15
 
@@ -108,19 +107,9 @@ class GCSClient(Client):
108
107
  finally:
109
108
  await page_queue.put(None)
110
109
 
111
- def _entry_from_dict(self, d: dict[str, Any]) -> Entry:
110
+ def _entry_from_dict(self, d: dict[str, Any]) -> File:
112
111
  info = self.fs._process_object(self.name, d)
113
- return self.convert_info(info, self.rel_path(info["name"]))
114
-
115
- def convert_info(self, v: dict[str, Any], path: str) -> Entry:
116
- return Entry.from_file(
117
- path=path,
118
- etag=v.get("etag", ""),
119
- version=v.get("generation", ""),
120
- is_latest=not v.get("timeDeleted"),
121
- last_modified=self.parse_timestamp(v["updated"]),
122
- size=v.get("size", ""),
123
- )
112
+ return self.info_to_file(info, self.rel_path(info["name"]))
124
113
 
125
114
  def info_to_file(self, v: dict[str, Any], path: str) -> File:
126
115
  return File(
datachain/client/hf.py CHANGED
@@ -5,7 +5,6 @@ from typing import Any, cast
5
5
  from huggingface_hub import HfFileSystem
6
6
 
7
7
  from datachain.lib.file import File
8
- from datachain.node import Entry
9
8
 
10
9
  from .fsspec import Client
11
10
 
@@ -22,15 +21,6 @@ class HfClient(Client):
22
21
 
23
22
  return cast(HfFileSystem, super().create_fs(**kwargs))
24
23
 
25
- def convert_info(self, v: dict[str, Any], path: str) -> Entry:
26
- return Entry.from_file(
27
- path=path,
28
- size=v["size"],
29
- version=v["last_commit"].oid,
30
- etag=v.get("blob_id", ""),
31
- last_modified=v["last_commit"].date,
32
- )
33
-
34
24
  def info_to_file(self, v: dict[str, Any], path: str) -> File:
35
25
  return File(
36
26
  path=path,
datachain/client/local.py CHANGED
@@ -7,8 +7,8 @@ from urllib.parse import urlparse
7
7
 
8
8
  from fsspec.implementations.local import LocalFileSystem
9
9
 
10
+ from datachain.cache import UniqueId
10
11
  from datachain.lib.file import File
11
- from datachain.node import Entry
12
12
  from datachain.storage import StorageURI
13
13
 
14
14
  from .fsspec import Client
@@ -114,9 +114,9 @@ class FileClient(Client):
114
114
  use_symlinks=use_symlinks,
115
115
  )
116
116
 
117
- async def get_current_etag(self, uid) -> str:
117
+ async def get_current_etag(self, uid: UniqueId) -> str:
118
118
  info = self.fs.info(self.get_full_path(uid.path))
119
- return self.convert_info(info, "").etag
119
+ return self.info_to_file(info, "").etag
120
120
 
121
121
  async def get_size(self, path: str) -> int:
122
122
  return self.fs.size(path)
@@ -136,15 +136,6 @@ class FileClient(Client):
136
136
  full_path += "/"
137
137
  return full_path
138
138
 
139
- def convert_info(self, v: dict[str, Any], path: str) -> Entry:
140
- return Entry.from_file(
141
- path=path,
142
- etag=v["mtime"].hex(),
143
- is_latest=True,
144
- last_modified=datetime.fromtimestamp(v["mtime"], timezone.utc),
145
- size=v.get("size", ""),
146
- )
147
-
148
139
  def info_to_file(self, v: dict[str, Any], path: str) -> File:
149
140
  return File(
150
141
  source=self.uri,
datachain/client/s3.py CHANGED
@@ -1,12 +1,11 @@
1
1
  import asyncio
2
- from typing import Any, cast
2
+ from typing import Any, Optional, cast
3
3
 
4
4
  from botocore.exceptions import NoCredentialsError
5
5
  from s3fs import S3FileSystem
6
6
  from tqdm import tqdm
7
7
 
8
8
  from datachain.lib.file import File
9
- from datachain.node import Entry
10
9
 
11
10
  from .fsspec import DELIMITER, Client, ResultQueue
12
11
 
@@ -111,8 +110,9 @@ class ClientS3(Client):
111
110
  ) -> None:
112
111
  await self._fetch_flat(start_prefix, result_queue)
113
112
 
114
- def _entry_from_boto(self, v, bucket, versions=False):
115
- return Entry.from_file(
113
+ def _entry_from_boto(self, v, bucket, versions=False) -> File:
114
+ return File(
115
+ source=self.uri,
116
116
  path=v["Key"],
117
117
  etag=v.get("ETag", "").strip('"'),
118
118
  version=ClientS3.clean_s3_version(v.get("VersionId", "")),
@@ -125,8 +125,8 @@ class ClientS3(Client):
125
125
  self,
126
126
  prefix,
127
127
  pbar,
128
- result_queue,
129
- ):
128
+ result_queue: ResultQueue,
129
+ ) -> set[str]:
130
130
  if prefix:
131
131
  prefix = prefix.lstrip(DELIMITER) + DELIMITER
132
132
  files = []
@@ -141,7 +141,7 @@ class ClientS3(Client):
141
141
  if info["type"] == "directory":
142
142
  subdirs.add(subprefix)
143
143
  else:
144
- files.append(self.convert_info(info, subprefix))
144
+ files.append(self.info_to_file(info, subprefix))
145
145
  pbar.update()
146
146
  found = True
147
147
  if not found:
@@ -152,18 +152,8 @@ class ClientS3(Client):
152
152
  return subdirs
153
153
 
154
154
  @staticmethod
155
- def clean_s3_version(ver):
156
- return ver if ver != "null" else ""
157
-
158
- def convert_info(self, v: dict[str, Any], path: str) -> Entry:
159
- return Entry.from_file(
160
- path=path,
161
- etag=v.get("ETag", "").strip('"'),
162
- version=ClientS3.clean_s3_version(v.get("VersionId", "")),
163
- is_latest=v.get("IsLatest", True),
164
- last_modified=v.get("LastModified", ""),
165
- size=v["size"],
166
- )
155
+ def clean_s3_version(ver: Optional[str]) -> str:
156
+ return ver if (ver is not None and ver != "null") else ""
167
157
 
168
158
  def info_to_file(self, v: dict[str, Any], path: str) -> File:
169
159
  return File(
@@ -43,6 +43,8 @@ if TYPE_CHECKING:
43
43
  from sqlalchemy.sql.elements import ColumnElement
44
44
  from sqlalchemy.types import TypeEngine
45
45
 
46
+ from datachain.lib.file import File
47
+
46
48
 
47
49
  logger = logging.getLogger("datachain")
48
50
 
@@ -58,6 +60,10 @@ quote_schema = sqlite_dialect.identifier_preparer.quote_schema
58
60
  quote = sqlite_dialect.identifier_preparer.quote
59
61
 
60
62
 
63
+ def _get_in_memory_uri():
64
+ return "file::memory:?cache=shared"
65
+
66
+
61
67
  def get_retry_sleep_sec(retry_count: int) -> int:
62
68
  return RETRY_START_SEC * (RETRY_FACTOR**retry_count)
63
69
 
@@ -119,7 +125,7 @@ class SQLiteDatabaseEngine(DatabaseEngine):
119
125
  if db_file == ":memory:":
120
126
  # Enable multithreaded usage of the same in-memory db
121
127
  db = sqlite3.connect(
122
- "file::memory:?cache=shared", uri=True, detect_types=DETECT_TYPES
128
+ _get_in_memory_uri(), uri=True, detect_types=DETECT_TYPES
123
129
  )
124
130
  else:
125
131
  db = sqlite3.connect(
@@ -704,6 +710,9 @@ class SQLiteWarehouse(AbstractWarehouse):
704
710
 
705
711
  self.db.execute(insert_query)
706
712
 
713
+ def prepare_entries(self, entries: "Iterable[File]") -> Iterable[dict[str, Any]]:
714
+ return (e.model_dump() for e in entries)
715
+
707
716
  def insert_rows(self, table: Table, rows: Iterable[dict[str, Any]]) -> None:
708
717
  rows = list(rows)
709
718
  if not rows:
@@ -20,7 +20,7 @@ from datachain.client import Client
20
20
  from datachain.data_storage.schema import convert_rows_custom_column_types
21
21
  from datachain.data_storage.serializer import Serializable
22
22
  from datachain.dataset import DatasetRecord
23
- from datachain.node import DirType, DirTypeGroup, Entry, Node, NodeWithPath, get_path
23
+ from datachain.node import DirType, DirTypeGroup, Node, NodeWithPath, get_path
24
24
  from datachain.sql.functions import path as pathfunc
25
25
  from datachain.sql.types import Int, SQLType
26
26
  from datachain.storage import StorageURI
@@ -34,6 +34,7 @@ if TYPE_CHECKING:
34
34
  from datachain.data_storage import AbstractIDGenerator, schema
35
35
  from datachain.data_storage.db_engine import DatabaseEngine
36
36
  from datachain.data_storage.schema import DataTable
37
+ from datachain.lib.file import File
37
38
 
38
39
  try:
39
40
  import numpy as np
@@ -401,25 +402,18 @@ class AbstractWarehouse(ABC, Serializable):
401
402
  expressions: tuple[_ColumnsClauseArgument[Any], ...] = (
402
403
  sa.func.count(table.c.sys__id),
403
404
  )
404
- if "file__size" in table.columns:
405
- expressions = (*expressions, sa.func.sum(table.c.file__size))
406
- elif "size" in table.columns:
407
- expressions = (*expressions, sa.func.sum(table.c.size))
405
+ size_columns = [
406
+ c for c in table.columns if c.name == "size" or c.name.endswith("__size")
407
+ ]
408
+ if size_columns:
409
+ expressions = (*expressions, sa.func.sum(sum(size_columns)))
408
410
  query = select(*expressions)
409
411
  ((nrows, *rest),) = self.db.execute(query)
410
- return nrows, rest[0] if rest else None
411
-
412
- def prepare_entries(
413
- self, uri: str, entries: Iterable[Entry]
414
- ) -> list[dict[str, Any]]:
415
- """
416
- Prepares bucket listing entry (row) for inserting into database
417
- """
418
-
419
- def _prepare_entry(entry: Entry):
420
- return attrs.asdict(entry) | {"source": uri}
412
+ return nrows, rest[0] if rest else 0
421
413
 
422
- return [_prepare_entry(e) for e in entries]
414
+ @abstractmethod
415
+ def prepare_entries(self, entries: "Iterable[File]") -> Iterable[dict[str, Any]]:
416
+ """Convert File entries so they can be passed on to `insert_rows()`"""
423
417
 
424
418
  @abstractmethod
425
419
  def insert_rows(self, table: Table, rows: Iterable[dict[str, Any]]) -> None:
datachain/lib/listing.py CHANGED
@@ -30,8 +30,7 @@ def list_bucket(uri: str, client_config=None) -> Callable:
30
30
  config = client_config or {}
31
31
  client, path = Client.parse_url(uri, None, **config) # type: ignore[arg-type]
32
32
  for entries in iter_over_async(client.scandir(path.rstrip("/")), get_loop()):
33
- for entry in entries:
34
- yield entry.to_file(client.uri)
33
+ yield from entries
35
34
 
36
35
  return list_func
37
36
 
@@ -1,6 +1,6 @@
1
1
  import inspect
2
2
  import logging
3
- from typing import ClassVar, Optional
3
+ from typing import Any, ClassVar, Optional
4
4
 
5
5
  from pydantic import BaseModel
6
6
 
@@ -69,7 +69,7 @@ class ModelStore:
69
69
  del cls.store[fr.__name__][version]
70
70
 
71
71
  @staticmethod
72
- def is_pydantic(val):
72
+ def is_pydantic(val: Any) -> bool:
73
73
  return (
74
74
  not hasattr(val, "__origin__")
75
75
  and inspect.isclass(val)
datachain/lib/pytorch.py CHANGED
@@ -7,6 +7,7 @@ from torch import float32
7
7
  from torch.distributed import get_rank, get_world_size
8
8
  from torch.utils.data import IterableDataset, get_worker_info
9
9
  from torchvision.transforms import v2
10
+ from tqdm import tqdm
10
11
 
11
12
  from datachain.catalog import Catalog, get_catalog
12
13
  from datachain.lib.dc import DataChain
@@ -93,33 +94,38 @@ class PytorchDataset(IterableDataset):
93
94
  if self.num_samples > 0:
94
95
  ds = ds.sample(self.num_samples)
95
96
  ds = ds.chunk(total_rank, total_workers)
96
- for row_features in ds.collect():
97
- row = []
98
- for fr in row_features:
99
- if hasattr(fr, "read"):
100
- row.append(fr.read()) # type: ignore[unreachable]
101
- else:
102
- row.append(fr)
103
- # Apply transforms
104
- if self.transform:
105
- try:
106
- if isinstance(self.transform, v2.Transform):
107
- row = self.transform(row)
97
+ desc = f"Parsed PyTorch dataset for rank={total_rank} worker"
98
+ with tqdm(desc=desc, unit=" rows") as pbar:
99
+ for row_features in ds.collect():
100
+ row = []
101
+ for fr in row_features:
102
+ if hasattr(fr, "read"):
103
+ row.append(fr.read()) # type: ignore[unreachable]
104
+ else:
105
+ row.append(fr)
106
+ # Apply transforms
107
+ if self.transform:
108
+ try:
109
+ if isinstance(self.transform, v2.Transform):
110
+ row = self.transform(row)
111
+ for i, val in enumerate(row):
112
+ if isinstance(val, Image.Image):
113
+ row[i] = self.transform(val)
114
+ except ValueError:
115
+ logger.warning(
116
+ "Skipping transform due to unsupported data types."
117
+ )
118
+ self.transform = None
119
+ if self.tokenizer:
108
120
  for i, val in enumerate(row):
109
- if isinstance(val, Image.Image):
110
- row[i] = self.transform(val)
111
- except ValueError:
112
- logger.warning("Skipping transform due to unsupported data types.")
113
- self.transform = None
114
- if self.tokenizer:
115
- for i, val in enumerate(row):
116
- if isinstance(val, str) or (
117
- isinstance(val, list) and isinstance(val[0], str)
118
- ):
119
- row[i] = convert_text(
120
- val, self.tokenizer, self.tokenizer_kwargs
121
- ).squeeze(0) # type: ignore[union-attr]
122
- yield row
121
+ if isinstance(val, str) or (
122
+ isinstance(val, list) and isinstance(val[0], str)
123
+ ):
124
+ row[i] = convert_text(
125
+ val, self.tokenizer, self.tokenizer_kwargs
126
+ ).squeeze(0) # type: ignore[union-attr]
127
+ yield row
128
+ pbar.update(1)
123
129
 
124
130
  @staticmethod
125
131
  def get_rank_and_workers() -> tuple[int, int]:
@@ -4,11 +4,14 @@ from collections.abc import Iterator, Sequence
4
4
  from dataclasses import dataclass
5
5
  from datetime import datetime
6
6
  from inspect import isclass
7
- from typing import (
7
+ from typing import ( # noqa: UP035
8
8
  TYPE_CHECKING,
9
9
  Annotated,
10
10
  Any,
11
11
  Callable,
12
+ Dict,
13
+ Final,
14
+ List,
12
15
  Literal,
13
16
  Optional,
14
17
  Union,
@@ -42,8 +45,13 @@ NAMES_TO_TYPES = {
42
45
  "dict": dict,
43
46
  "bytes": bytes,
44
47
  "datetime": datetime,
45
- "Literal": Literal,
48
+ "Final": Final,
46
49
  "Union": Union,
50
+ "Optional": Optional,
51
+ "List": list,
52
+ "Dict": dict,
53
+ "Literal": Any,
54
+ "Any": Any,
47
55
  }
48
56
 
49
57
 
@@ -146,35 +154,11 @@ class SignalSchema:
146
154
  return SignalSchema(signals)
147
155
 
148
156
  @staticmethod
149
- def _get_name_original_type(fr_type: type) -> tuple[str, type]:
150
- """Returns the name of and the original type for the given type,
151
- based on whether the type is Optional or not."""
152
- orig = get_origin(fr_type)
153
- args = get_args(fr_type)
154
- # Check if fr_type is Optional
155
- if orig == Union and len(args) == 2 and (type(None) in args):
156
- fr_type = args[0]
157
- orig = get_origin(fr_type)
158
- if orig in (Literal, LiteralEx):
159
- # Literal has no __name__ in Python 3.9
160
- type_name = "Literal"
161
- elif orig == Union:
162
- # Union also has no __name__ in Python 3.9
163
- type_name = "Union"
164
- else:
165
- type_name = str(fr_type.__name__) # type: ignore[union-attr]
166
- return type_name, fr_type
167
-
168
- @staticmethod
169
- def serialize_custom_model_fields(
170
- name: str, fr: type, custom_types: dict[str, Any]
157
+ def _serialize_custom_model_fields(
158
+ version_name: str, fr: type[BaseModel], custom_types: dict[str, Any]
171
159
  ) -> str:
172
160
  """This serializes any custom type information to the provided custom_types
173
- dict, and returns the name of the type provided."""
174
- if hasattr(fr, "__origin__") or not issubclass(fr, BaseModel):
175
- # Don't store non-feature types.
176
- return name
177
- version_name = ModelStore.get_name(fr)
161
+ dict, and returns the name of the type serialized."""
178
162
  if version_name in custom_types:
179
163
  # This type is already stored in custom_types.
180
164
  return version_name
@@ -183,37 +167,102 @@ class SignalSchema:
183
167
  field_type = info.annotation
184
168
  # All fields should be typed.
185
169
  assert field_type
186
- field_type_name, field_type = SignalSchema._get_name_original_type(
187
- field_type
188
- )
189
- # Serialize this type to custom_types if it is a custom type as well.
190
- fields[field_name] = SignalSchema.serialize_custom_model_fields(
191
- field_type_name, field_type, custom_types
192
- )
170
+ fields[field_name] = SignalSchema._serialize_type(field_type, custom_types)
193
171
  custom_types[version_name] = fields
194
172
  return version_name
195
173
 
174
+ @staticmethod
175
+ def _serialize_type(fr: type, custom_types: dict[str, Any]) -> str:
176
+ """Serialize a given type to a string, including automatic ModelStore
177
+ registration, and save this type and subtypes to custom_types as well."""
178
+ subtypes: list[Any] = []
179
+ type_name = SignalSchema._type_to_str(fr, subtypes)
180
+ # Iterate over all subtypes (includes the input type).
181
+ for st in subtypes:
182
+ if st is None or not ModelStore.is_pydantic(st):
183
+ continue
184
+ # Register and save feature types.
185
+ ModelStore.register(st)
186
+ st_version_name = ModelStore.get_name(st)
187
+ if st is fr:
188
+ # If the main type is Pydantic, then use the ModelStore version name.
189
+ type_name = st_version_name
190
+ # Save this type to custom_types.
191
+ SignalSchema._serialize_custom_model_fields(
192
+ st_version_name, st, custom_types
193
+ )
194
+ return type_name
195
+
196
196
  def serialize(self) -> dict[str, Any]:
197
197
  signals: dict[str, Any] = {}
198
198
  custom_types: dict[str, Any] = {}
199
199
  for name, fr_type in self.values.items():
200
- if (fr := ModelStore.to_pydantic(fr_type)) is not None:
201
- ModelStore.register(fr)
202
- signals[name] = ModelStore.get_name(fr)
203
- type_name, fr_type = SignalSchema._get_name_original_type(fr)
204
- else:
205
- type_name, fr_type = SignalSchema._get_name_original_type(fr_type)
206
- signals[name] = type_name
207
- self.serialize_custom_model_fields(type_name, fr_type, custom_types)
200
+ signals[name] = self._serialize_type(fr_type, custom_types)
208
201
  if custom_types:
209
202
  signals["_custom_types"] = custom_types
210
203
  return signals
211
204
 
212
205
  @staticmethod
213
- def _resolve_type(type_name: str, custom_types: dict[str, Any]) -> Optional[type]:
206
+ def _split_subtypes(type_name: str) -> list[str]:
207
+ """This splits a list of subtypes, including proper square bracket handling."""
208
+ start = 0
209
+ depth = 0
210
+ subtypes = []
211
+ for i, c in enumerate(type_name):
212
+ if c == "[":
213
+ depth += 1
214
+ elif c == "]":
215
+ if depth == 0:
216
+ raise TypeError(
217
+ "Extra closing square bracket when parsing subtype list"
218
+ )
219
+ depth -= 1
220
+ elif c == "," and depth == 0:
221
+ subtypes.append(type_name[start:i].strip())
222
+ start = i + 1
223
+ if depth > 0:
224
+ raise TypeError("Unclosed square bracket when parsing subtype list")
225
+ subtypes.append(type_name[start:].strip())
226
+ return subtypes
227
+
228
+ @staticmethod
229
+ def _resolve_type(type_name: str, custom_types: dict[str, Any]) -> Optional[type]: # noqa: PLR0911
214
230
  """Convert a string-based type back into a python type."""
231
+ type_name = type_name.strip()
232
+ if not type_name:
233
+ raise TypeError("Type cannot be empty")
234
+ if type_name == "NoneType":
235
+ return None
236
+
237
+ bracket_idx = type_name.find("[")
238
+ subtypes: Optional[tuple[Optional[type], ...]] = None
239
+ if bracket_idx > -1:
240
+ if bracket_idx == 0:
241
+ raise TypeError("Type cannot start with '['")
242
+ close_bracket_idx = type_name.rfind("]")
243
+ if close_bracket_idx == -1:
244
+ raise TypeError("Unclosed square bracket when parsing type")
245
+ if close_bracket_idx < bracket_idx:
246
+ raise TypeError("Square brackets are out of order when parsing type")
247
+ if close_bracket_idx == bracket_idx + 1:
248
+ raise TypeError("Empty square brackets when parsing type")
249
+ subtype_names = SignalSchema._split_subtypes(
250
+ type_name[bracket_idx + 1 : close_bracket_idx]
251
+ )
252
+ # Types like Union require the parameters to be a tuple of types.
253
+ subtypes = tuple(
254
+ SignalSchema._resolve_type(st, custom_types) for st in subtype_names
255
+ )
256
+ type_name = type_name[:bracket_idx].strip()
257
+
215
258
  fr = NAMES_TO_TYPES.get(type_name)
216
259
  if fr:
260
+ if subtypes:
261
+ if len(subtypes) == 1:
262
+ # Types like Optional require there to be only one argument.
263
+ return fr[subtypes[0]] # type: ignore[index]
264
+ # Other types like Union require the parameters to be a tuple of types.
265
+ return fr[subtypes] # type: ignore[index]
217
266
  return fr # type: ignore[return-value]
218
267
 
219
268
  model_name, version = ModelStore.parse_name_version(type_name)
@@ -228,7 +277,14 @@ class SignalSchema:
228
277
  for field_name, field_type_str in fields.items()
229
278
  }
230
279
  return create_feature_model(type_name, fields)
231
- return None
280
+ # This can occur if a third-party or custom type is used, which is not available
281
+ # when deserializing.
282
+ warnings.warn(
283
+ f"Could not resolve type: '{type_name}'.",
284
+ SignalSchemaWarning,
285
+ stacklevel=2,
286
+ )
287
+ return Any # type: ignore[return-value]
232
288
 
233
289
  @staticmethod
234
290
  def deserialize(schema: dict[str, Any]) -> "SignalSchema":
@@ -242,9 +298,14 @@ class SignalSchema:
242
298
  # This entry is used as a lookup for custom types,
243
299
  # and is not an actual field.
244
300
  continue
301
+ if not isinstance(type_name, str):
302
+ raise SignalSchemaError(
303
+ f"cannot deserialize '{type_name}': "
304
+ "serialized types must be a string"
305
+ )
245
306
  try:
246
307
  fr = SignalSchema._resolve_type(type_name, custom_types)
247
- if fr is None:
308
+ if fr is Any:
248
309
  # Skip if the type is not found, so all data can be displayed.
249
310
  warnings.warn(
250
311
  f"In signal '{signal}': "
@@ -258,7 +319,7 @@ class SignalSchema:
258
319
  raise SignalSchemaError(
259
320
  f"cannot deserialize '{signal}': {err}"
260
321
  ) from err
261
- signals[signal] = fr
322
+ signals[signal] = fr # type: ignore[assignment]
262
323
 
263
324
  return SignalSchema(signals)
264
325
 
@@ -509,31 +570,58 @@ class SignalSchema:
509
570
  return self.values.pop(name)
510
571
 
511
572
  @staticmethod
512
- def _type_to_str(type_): # noqa: PLR0911
573
+ def _type_to_str(type_: Optional[type], subtypes: Optional[list] = None) -> str: # noqa: PLR0911
574
+ """Convert a type to a string-based representation."""
575
+ if type_ is None:
576
+ return "NoneType"
577
+
513
578
  origin = get_origin(type_)
514
579
 
515
580
  if origin == Union:
516
581
  args = get_args(type_)
517
- formatted_types = ", ".join(SignalSchema._type_to_str(arg) for arg in args)
582
+ formatted_types = ", ".join(
583
+ SignalSchema._type_to_str(arg, subtypes) for arg in args
584
+ )
518
585
  return f"Union[{formatted_types}]"
519
586
  if origin == Optional:
520
587
  args = get_args(type_)
521
- type_str = SignalSchema._type_to_str(args[0])
588
+ type_str = SignalSchema._type_to_str(args[0], subtypes)
522
589
  return f"Optional[{type_str}]"
523
- if origin is list:
590
+ if origin in (list, List): # noqa: UP006
524
591
  args = get_args(type_)
525
- type_str = SignalSchema._type_to_str(args[0])
592
+ type_str = SignalSchema._type_to_str(args[0], subtypes)
526
593
  return f"list[{type_str}]"
527
- if origin is dict:
594
+ if origin in (dict, Dict): # noqa: UP006
528
595
  args = get_args(type_)
529
- type_str = SignalSchema._type_to_str(args[0]) if len(args) > 0 else ""
530
- vals = f", {SignalSchema._type_to_str(args[1])}" if len(args) > 1 else ""
596
+ type_str = (
597
+ SignalSchema._type_to_str(args[0], subtypes) if len(args) > 0 else ""
598
+ )
599
+ vals = (
600
+ f", {SignalSchema._type_to_str(args[1], subtypes)}"
601
+ if len(args) > 1
602
+ else ""
603
+ )
531
604
  return f"dict[{type_str}{vals}]"
532
605
  if origin == Annotated:
533
606
  args = get_args(type_)
534
- return SignalSchema._type_to_str(args[0])
535
- if origin in (Literal, LiteralEx):
607
+ return SignalSchema._type_to_str(args[0], subtypes)
608
+ if origin in (Literal, LiteralEx) or type_ in (Literal, LiteralEx):
536
609
  return "Literal"
610
+ if Any in (origin, type_):
611
+ return "Any"
612
+ if Final in (origin, type_):
613
+ return "Final"
614
+ if subtypes is not None:
615
+ # Include this type in the list of all subtypes, if requested.
616
+ subtypes.append(type_)
617
+ if not hasattr(type_, "__name__"):
618
+ # This can happen for some third-party or custom types, mostly on Python 3.9
619
+ warnings.warn(
620
+ f"Unable to determine name of type '{type_}'.",
621
+ SignalSchemaWarning,
622
+ stacklevel=2,
623
+ )
624
+ return "Any"
537
625
  return type_.__name__
538
626
 
539
627
  @staticmethod
datachain/listing.py CHANGED
@@ -9,7 +9,8 @@ from sqlalchemy import Column
9
9
  from sqlalchemy.sql import func
10
10
  from tqdm import tqdm
11
11
 
12
- from datachain.node import DirType, Entry, Node, NodeWithPath
12
+ from datachain.lib.file import File
13
+ from datachain.node import DirType, Node, NodeWithPath
13
14
  from datachain.sql.functions import path as pathfunc
14
15
  from datachain.utils import suffix_to_number
15
16
 
@@ -80,16 +81,13 @@ class Listing:
80
81
  finally:
81
82
  fetch_listing.insert_entries_done()
82
83
 
83
- def insert_entry(self, entry: Entry) -> None:
84
- self.warehouse.insert_rows(
85
- self.dataset_rows.get_table(),
86
- self.warehouse.prepare_entries(self.client.uri, [entry]),
87
- )
84
+ def insert_entry(self, entry: File) -> None:
85
+ self.insert_entries([entry])
88
86
 
89
- def insert_entries(self, entries: Iterable[Entry]) -> None:
87
+ def insert_entries(self, entries: Iterable[File]) -> None:
90
88
  self.warehouse.insert_rows(
91
89
  self.dataset_rows.get_table(),
92
- self.warehouse.prepare_entries(self.client.uri, entries),
90
+ self.warehouse.prepare_entries(entries),
93
91
  )
94
92
 
95
93
  def insert_entries_done(self) -> None:
datachain/node.py CHANGED
@@ -4,7 +4,6 @@ from typing import TYPE_CHECKING, Any, Optional
4
4
  import attrs
5
5
 
6
6
  from datachain.cache import UniqueId
7
- from datachain.lib.file import File
8
7
  from datachain.storage import StorageURI
9
8
  from datachain.utils import TIME_ZERO, time_to_str
10
9
 
@@ -139,48 +138,6 @@ class Node:
139
138
  return split[0]
140
139
 
141
140
 
142
- @attrs.define
143
- class Entry:
144
- path: str = ""
145
- etag: str = ""
146
- version: str = ""
147
- is_latest: bool = True
148
- last_modified: Optional[datetime] = None
149
- size: int = 0
150
- location: Optional[str] = None
151
-
152
- @classmethod
153
- def from_file(cls, path: str, **kwargs) -> "Entry":
154
- return cls(path=path, **kwargs)
155
-
156
- @property
157
- def full_path(self) -> str:
158
- return self.path
159
-
160
- @property
161
- def name(self):
162
- return self.path.rsplit("/", 1)[-1]
163
-
164
- @property
165
- def parent(self):
166
- split = self.path.rsplit("/", 1)
167
- if len(split) <= 1:
168
- return ""
169
- return split[0]
170
-
171
- def to_file(self, source: str) -> File:
172
- return File(
173
- source=source,
174
- path=self.path,
175
- size=self.size,
176
- version=self.version,
177
- etag=self.etag,
178
- is_latest=self.is_latest,
179
- last_modified=self.last_modified,
180
- location=self.location,
181
- )
182
-
183
-
184
141
  def get_path(parent: str, name: str):
185
142
  return f"{parent}/{name}" if parent else name
186
143
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: datachain
3
- Version: 0.3.13
3
+ Version: 0.3.14
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License: Apache-2.0
@@ -1,6 +1,6 @@
1
1
  datachain/__init__.py,sha256=GeyhE-5LgfJav2OKYGaieP2lBvf2Gm-ihj7thnK9zjI,800
2
2
  datachain/__main__.py,sha256=hG3Y4ARGEqe1AWwNMd259rBlqtphx1Wk39YbueQ0yV8,91
3
- datachain/asyn.py,sha256=biF8M8fQujtj5xs0VLi8S16eBtzG6kceWlO_NILbCsg,8197
3
+ datachain/asyn.py,sha256=Lg3Ck1PQLjQziMx9KU4atzbEnJXTE0924WMYkhgWtGU,8247
4
4
  datachain/cache.py,sha256=WP-ktH_bRn3w2g1JOOQ7rCPsZyR4OM6K1Kb7yZsSSns,4056
5
5
  datachain/cli.py,sha256=alMjnoBUBLvBSMBR51N09rA_aUEdHJwyxSRogF7VbbA,30891
6
6
  datachain/cli_utils.py,sha256=jrn9ejGXjybeO1ur3fjdSiAyCHZrX0qsLLbJzN9ErPM,2418
@@ -8,8 +8,8 @@ datachain/config.py,sha256=PfC7W5yO6HFO6-iMB4YB-0RR88LPiGmD6sS_SfVbGso,1979
8
8
  datachain/dataset.py,sha256=EcYjhHg1dxxPbDwSuIxc-mDRDo3v_pYf79fMy4re1oA,14740
9
9
  datachain/error.py,sha256=OnZ8OaBtDdTZPy8XQiy29SAjqdQArQeorYbP5ju7ldc,1199
10
10
  datachain/job.py,sha256=Jt4sNutMHJReaGsj3r3scueN5aESLGfhimAa8pUP7Is,1271
11
- datachain/listing.py,sha256=LgL0lV10AzD1v52ajSaJKFnyiq4hNXwQiqaGySWGQsw,8290
12
- datachain/node.py,sha256=gacKxUPLgJ1ul6LJWz7nylYjUWPbyUY5cqaBFDOnO9E,5756
11
+ datachain/listing.py,sha256=vfjOlcb98A7xkGGKWEYON6l7lfrOqNv6kldmdVnlJn4,8178
12
+ datachain/node.py,sha256=2pF3Y9oYzElfiUBcw2LIv7LNNt--V4E-K021zjv0b0I,4748
13
13
  datachain/nodes_fetcher.py,sha256=kca19yvu11JxoVY1t4_ydp1FmchiV88GnNicNBQ9NIA,831
14
14
  datachain/nodes_thread_pool.py,sha256=ZyzBvUImIPmi4WlKC2SW2msA0UhtembbTdcs2nx29A0,3191
15
15
  datachain/progress.py,sha256=7_8FtJs770ITK9sMq-Lt4k4k18QmYl4yIG_kCoWID3o,4559
@@ -17,17 +17,17 @@ datachain/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
17
17
  datachain/storage.py,sha256=RiSJLYdHUjnrEWkLBKPcETHpAxld_B2WxLg711t0aZI,3733
18
18
  datachain/utils.py,sha256=Z9-lPNvrrAh_VWpzVBJ7L5-Oy_Oo1V0ZW7G0MVDyPK4,13065
19
19
  datachain/catalog/__init__.py,sha256=g2iAAFx_gEIrqshXlhSEbrc8qDaEH11cjU40n3CHDz4,409
20
- datachain/catalog/catalog.py,sha256=hhLciKHD0dVwniFzUsYORQ72WpnM40QYT0ydoyx1Kvw,69308
20
+ datachain/catalog/catalog.py,sha256=7yl_WMGS6CfOc_G2MCbVVkdAfAlcZb2gC_PvXzBnoJ0,69344
21
21
  datachain/catalog/datasource.py,sha256=D-VWIVDCM10A8sQavLhRXdYSCG7F4o4ifswEF80_NAQ,1412
22
22
  datachain/catalog/loader.py,sha256=-6VelNfXUdgUnwInVyA8g86Boxv2xqhTh9xNS-Zlwig,8242
23
23
  datachain/client/__init__.py,sha256=T4wiYL9KIM0ZZ_UqIyzV8_ufzYlewmizlV4iymHNluE,86
24
- datachain/client/azure.py,sha256=LXSahE0Z6r4dXqpBkKnq3J5fg7N7ymC1lSn-1SoILGc,2687
24
+ datachain/client/azure.py,sha256=ffxs26zm6KLAL1aUWJm-vtzuZP3LSNha7UDGXynMBKo,2234
25
25
  datachain/client/fileslice.py,sha256=bT7TYco1Qe3bqoc8aUkUZcPdPofJDHlryL5BsTn9xsY,3021
26
- datachain/client/fsspec.py,sha256=Hy3-4HRV-3MozOybqAnF-qL0EoMYFHynpTG_YZphjZE,13298
27
- datachain/client/gcs.py,sha256=P_E3mhzhXR9mJ_wc3AYZuczzwOJ0-D3J5qhJXeSU-xk,4518
28
- datachain/client/hf.py,sha256=R-F6Ks6aVM9wSNkIXOkOnZFwsJlfdRwJjymRa78RLjM,1246
29
- datachain/client/local.py,sha256=H8TNY8pi2kA8y9_f_1XLUjJF66f229qC_b2y4xGkzdU,5300
30
- datachain/client/s3.py,sha256=zs41EvYW1bS_pUxnkCnJILzUJpL2V1jvvVKSN4BKYcc,6326
26
+ datachain/client/fsspec.py,sha256=S93K9bS76MGcLYgWKVZiPVivbMElJ9Fq1w67I8BCR-g,13311
27
+ datachain/client/gcs.py,sha256=cnTIr5GS6dbYOEYfqehhyQu3dr6XNjPHSg5U3FkivUk,4124
28
+ datachain/client/hf.py,sha256=k24bpa6FEKNQn9zhoNC9kCigDwFSqobLsCnN_Nuzwh4,922
29
+ datachain/client/local.py,sha256=LTyISV4oNSOPUdsai5eNZYCGXNCn8rNGuAI0bdgbtnU,5006
30
+ datachain/client/s3.py,sha256=CVHBUZ1Ic2Q3370nl-Bbe69phuWjFlrVv9dTJKBpRT0,6019
31
31
  datachain/data_storage/__init__.py,sha256=cEOJpyu1JDZtfUupYucCDNFI6e5Wmp_Oyzq6rZv32Y8,398
32
32
  datachain/data_storage/db_engine.py,sha256=81Ol1of9TTTzD97ORajCnP366Xz2mEJt6C-kTUCaru4,3406
33
33
  datachain/data_storage/id_generator.py,sha256=lCEoU0BM37Ai2aRpSbwo5oQT0GqZnSpYwwvizathRMQ,4292
@@ -35,8 +35,8 @@ datachain/data_storage/job.py,sha256=w-7spowjkOa1P5fUVtJou3OltT0L48P0RYWZ9rSJ9-s
35
35
  datachain/data_storage/metastore.py,sha256=cHN0xmbUvChyayHHZm3Vqxr87jFqojPSlGBqhTPStlE,54519
36
36
  datachain/data_storage/schema.py,sha256=AGbjyEir5UmRZXI3m0jChZogUh5wd8csj6-YlUWaAxQ,8383
37
37
  datachain/data_storage/serializer.py,sha256=6G2YtOFqqDzJf1KbvZraKGXl2XHZyVml2krunWUum5o,927
38
- datachain/data_storage/sqlite.py,sha256=Z4B2KDL4C8Uio2aLMxaKv0t2MoOtCV3bSqWg4X9mTFg,28048
39
- datachain/data_storage/warehouse.py,sha256=s5hhVUWrlEopE6eGOqzXHeNtRapK30G8gj0Vkt_HHFQ,32649
38
+ datachain/data_storage/sqlite.py,sha256=yooLHQXrpoqDguGlF0SGcCiMU1T82OEc4wr1ra8eBHo,28285
39
+ datachain/data_storage/warehouse.py,sha256=Pq6Nt3fyz1WFv6Mdtv2ZUr0_GFCNbafbtS4PdibblUg,32507
40
40
  datachain/lib/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
41
41
  datachain/lib/arrow.py,sha256=dV17oGiknqEW55ogGK_9T0ycNFwd2z-EFOW0AQiR6TU,5840
42
42
  datachain/lib/clip.py,sha256=lm5CzVi4Cj1jVLEKvERKArb-egb9j1Ls-fwTItT6vlI,6150
@@ -46,13 +46,13 @@ datachain/lib/dc.py,sha256=C-sfWRinV8pDK2P6UHLbScOahTlTiVQpoxUUdVllF2k,68710
46
46
  datachain/lib/file.py,sha256=rXmyzUFgnLQ4J3CyOCcg-guhzAz4x9Ug595FbNn4Y2E,11398
47
47
  datachain/lib/hf.py,sha256=ZiMvgy3DYiklGKZv-w7gevrHOgn3bGfpTlpDPOHCNqs,5336
48
48
  datachain/lib/image.py,sha256=AMXYwQsmarZjRbPCZY3M1jDsM2WAB_b3cTY4uOIuXNU,2675
49
- datachain/lib/listing.py,sha256=S9Xn_Saxu4xk3K_01VexkfMZW0INQiATlidt2bzgWKY,3938
49
+ datachain/lib/listing.py,sha256=mt-dsYfYFMPHN3zXnkohBHuueY-4tiNGPkcDYkKB0lY,3887
50
50
  datachain/lib/listing_info.py,sha256=sr5KzCXlCxlPuRmy_pVadD4miLpp5y0btvyaIPcluwI,996
51
51
  datachain/lib/meta_formats.py,sha256=3f-0vpMTesagS9iMd3y9-u9r-7g0eqYsxmK4fVfNWlw,6635
52
- datachain/lib/model_store.py,sha256=xcrQ69-jcQs716U4UFOSoSKM7EvFIWqxlPhIcE4X7oI,2497
53
- datachain/lib/pytorch.py,sha256=vK3GbWCy7kunN7ubul6w1hrWmJLja56uTCiMG_7XVQA,5623
52
+ datachain/lib/model_store.py,sha256=DNIv8Y6Jtk1_idNLzIpsThOsdW2BMAudyUCbPUcgcxk,2515
53
+ datachain/lib/pytorch.py,sha256=8LNyFaBrx8zws--MEsFg5g3pb8oLnaQAUlgGvtjKxX4,5960
54
54
  datachain/lib/settings.py,sha256=39thOpYJw-zPirzeNO6pmRC2vPrQvt4eBsw1xLWDFsw,2344
55
- datachain/lib/signal_schema.py,sha256=hqQLwUmt3w8RLa96MtubK9N2CBXqqTPrUkSRXc0ktt4,20275
55
+ datachain/lib/signal_schema.py,sha256=vb4yCC90_pEngiu9Irc02kCPyqBxkrFDL4TKr7UMY5U,23808
56
56
  datachain/lib/text.py,sha256=UNHm8fhidk7wdrWqacEWaA6I9ykfYqarQ2URby7jc7M,1261
57
57
  datachain/lib/udf.py,sha256=nG7DDuPgZ5ZuijwvDoCq-OZMxlDM8vFNzyxMmik0Y1c,11716
58
58
  datachain/lib/udf_signature.py,sha256=gMStcEeYJka5M6cg50Z9orC6y6HzCAJ3MkFqqn1fjZg,7137
@@ -96,9 +96,9 @@ datachain/sql/sqlite/base.py,sha256=WLPHBhZbXbiqPoRV1VgDrXJqku4UuvJpBhYeQ0k5rI8,
96
96
  datachain/sql/sqlite/types.py,sha256=yzvp0sXSEoEYXs6zaYC_2YubarQoZH-MiUNXcpuEP4s,1573
97
97
  datachain/sql/sqlite/vector.py,sha256=ncW4eu2FlJhrP_CIpsvtkUabZlQdl2D5Lgwy_cbfqR0,469
98
98
  datachain/torch/__init__.py,sha256=gIS74PoEPy4TB3X6vx9nLO0Y3sLJzsA8ckn8pRWihJM,579
99
- datachain-0.3.13.dist-info/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
100
- datachain-0.3.13.dist-info/METADATA,sha256=pzMOR9LYuLR26Wifk4GPS9Wi1mmqCC5CIBZyA-X5_oo,17073
101
- datachain-0.3.13.dist-info/WHEEL,sha256=cVxcB9AmuTcXqmwrtPhNK88dr7IR_b6qagTj0UvIEbY,91
102
- datachain-0.3.13.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
103
- datachain-0.3.13.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
104
- datachain-0.3.13.dist-info/RECORD,,
99
+ datachain-0.3.14.dist-info/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
100
+ datachain-0.3.14.dist-info/METADATA,sha256=bItmxEsx2MEsJ78Mu1yjO-PX-RkDuWHMESoPuGiJgxw,17073
101
+ datachain-0.3.14.dist-info/WHEEL,sha256=cVxcB9AmuTcXqmwrtPhNK88dr7IR_b6qagTj0UvIEbY,91
102
+ datachain-0.3.14.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
103
+ datachain-0.3.14.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
104
+ datachain-0.3.14.dist-info/RECORD,,