datachain 0.18.7__py3-none-any.whl → 0.18.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datachain might be problematic. Click here for more details.

datachain/cache.py CHANGED
@@ -76,9 +76,9 @@ class Cache:
76
76
  async def download(
77
77
  self, file: "File", client: "Client", callback: Optional[Callback] = None
78
78
  ) -> None:
79
- from_path = f"{file.source}/{file.path}"
80
79
  from dvc_objects.fs.utils import tmp_fname
81
80
 
81
+ from_path = file.get_uri()
82
82
  odb_fs = self.odb.fs
83
83
  tmp_info = odb_fs.join(self.odb.tmp_dir, tmp_fname()) # type: ignore[arg-type]
84
84
  size = file.size
@@ -207,13 +207,14 @@ class Client(ABC):
207
207
  )
208
208
 
209
209
  async def get_current_etag(self, file: "File") -> str:
210
+ file_path = file.get_path_normalized()
210
211
  kwargs = {}
211
212
  if self._is_version_aware():
212
213
  kwargs["version_id"] = file.version
213
214
  info = await self.fs._info(
214
- self.get_full_path(file.path, file.version), **kwargs
215
+ self.get_full_path(file_path, file.version), **kwargs
215
216
  )
216
- return self.info_to_file(info, file.path).etag
217
+ return self.info_to_file(info, file_path).etag
217
218
 
218
219
  def get_file_info(self, path: str, version_id: Optional[str] = None) -> "File":
219
220
  info = self.fs.info(self.get_full_path(path, version_id), version_id=version_id)
@@ -385,7 +386,8 @@ class Client(ABC):
385
386
  return open(cache_path, mode="rb")
386
387
  assert not file.location
387
388
  return FileWrapper(
388
- self.fs.open(self.get_full_path(file.path, file.version)), cb
389
+ self.fs.open(self.get_full_path(file.get_path_normalized(), file.version)),
390
+ cb,
389
391
  ) # type: ignore[return-value]
390
392
 
391
393
  def upload(self, data: bytes, path: str) -> "File":
datachain/client/local.py CHANGED
@@ -99,7 +99,7 @@ class FileClient(Client):
99
99
  )
100
100
 
101
101
  async def get_current_etag(self, file: "File") -> str:
102
- info = self.fs.info(self.get_full_path(file.path))
102
+ info = self.fs.info(self.get_full_path(file.get_path_normalized()))
103
103
  return self.info_to_file(info, "").etag
104
104
 
105
105
  async def get_size(self, path: str, version_id: Optional[str] = None) -> int:
@@ -138,8 +138,8 @@ class FileClient(Client):
138
138
  if not self.use_symlinks:
139
139
  super().fetch_nodes(nodes, shared_progress_bar)
140
140
 
141
- def do_instantiate_object(self, uid, dst):
141
+ def do_instantiate_object(self, file: File, dst: str) -> None:
142
142
  if self.use_symlinks:
143
- os.symlink(Path(self.name, uid.path), dst)
143
+ os.symlink(Path(self.name, file.path), dst)
144
144
  else:
145
- super().do_instantiate_object(uid, dst)
145
+ super().do_instantiate_object(file, dst)
datachain/lib/arrow.py CHANGED
@@ -76,7 +76,7 @@ class ArrowGenerator(Generator):
76
76
  fs_path = file.path
77
77
  fs = ReferenceFileSystem({fs_path: [cache_path]})
78
78
  else:
79
- fs, fs_path = file.get_fs(), file.get_path()
79
+ fs, fs_path = file.get_fs(), file.get_fs_path()
80
80
 
81
81
  kwargs = self.kwargs
82
82
  if format := kwargs.get("format"):
@@ -161,7 +161,7 @@ def infer_schema(chain: "DataChain", **kwargs) -> pa.Schema:
161
161
 
162
162
  schemas = []
163
163
  for file in chain.collect("file"):
164
- ds = dataset(file.get_path(), filesystem=file.get_fs(), **kwargs) # type: ignore[union-attr]
164
+ ds = dataset(file.get_fs_path(), filesystem=file.get_fs(), **kwargs) # type: ignore[union-attr]
165
165
  schemas.append(ds.schema)
166
166
  if not schemas:
167
167
  raise ValueError(
datachain/lib/file.py CHANGED
@@ -5,13 +5,14 @@ import json
5
5
  import logging
6
6
  import os
7
7
  import posixpath
8
+ import warnings
8
9
  from abc import ABC, abstractmethod
9
10
  from collections.abc import Iterator
10
11
  from contextlib import contextmanager
11
12
  from datetime import datetime
12
13
  from functools import partial
13
14
  from io import BytesIO
14
- from pathlib import Path, PurePosixPath
15
+ from pathlib import Path, PurePath, PurePosixPath
15
16
  from typing import TYPE_CHECKING, Any, ClassVar, Literal, Optional, Union
16
17
  from urllib.parse import unquote, urlparse
17
18
  from urllib.request import url2pathname
@@ -69,7 +70,7 @@ class FileExporter(NodesThreadPool):
69
70
  for task in done:
70
71
  task.result()
71
72
 
72
- def do_task(self, file):
73
+ def do_task(self, file: "File"):
73
74
  file.export(
74
75
  self.output,
75
76
  self.placement,
@@ -81,14 +82,28 @@ class FileExporter(NodesThreadPool):
81
82
 
82
83
 
83
84
  class VFileError(DataChainError):
84
- def __init__(self, file: "File", message: str, vtype: str = ""):
85
+ def __init__(self, message: str, source: str, path: str, vtype: str = ""):
86
+ self.message = message
87
+ self.source = source
88
+ self.path = path
89
+ self.vtype = vtype
90
+
85
91
  type_ = f" of vtype '{vtype}'" if vtype else ""
86
- super().__init__(f"Error in v-file '{file.path}'{type_}: {message}")
92
+ super().__init__(f"Error in v-file '{source}/{path}'{type_}: {message}")
93
+
94
+ def __reduce__(self):
95
+ return self.__class__, (self.message, self.source, self.path, self.vtype)
87
96
 
88
97
 
89
98
  class FileError(DataChainError):
90
- def __init__(self, file: "File", message: str):
91
- super().__init__(f"Error in file {file.get_uri()}: {message}")
99
+ def __init__(self, message: str, source: str, path: str):
100
+ self.message = message
101
+ self.source = source
102
+ self.path = path
103
+ super().__init__(f"Error in file '{source}/{path}': {message}")
104
+
105
+ def __reduce__(self):
106
+ return self.__class__, (self.message, self.source, self.path)
92
107
 
93
108
 
94
109
  class VFile(ABC):
@@ -114,18 +129,20 @@ class TarVFile(VFile):
114
129
  def open(cls, file: "File", location: list[dict]):
115
130
  """Stream file from tar archive based on location in archive."""
116
131
  if len(location) > 1:
117
- raise VFileError(file, "multiple 'location's are not supported yet")
132
+ raise VFileError(
133
+ "multiple 'location's are not supported yet", file.source, file.path
134
+ )
118
135
 
119
136
  loc = location[0]
120
137
 
121
138
  if (offset := loc.get("offset", None)) is None:
122
- raise VFileError(file, "'offset' is not specified")
139
+ raise VFileError("'offset' is not specified", file.source, file.path)
123
140
 
124
141
  if (size := loc.get("size", None)) is None:
125
- raise VFileError(file, "'size' is not specified")
142
+ raise VFileError("'size' is not specified", file.source, file.path)
126
143
 
127
144
  if (parent := loc.get("parent", None)) is None:
128
- raise VFileError(file, "'parent' is not specified")
145
+ raise VFileError("'parent' is not specified", file.source, file.path)
129
146
 
130
147
  tar_file = File(**parent)
131
148
  tar_file._set_stream(file._catalog)
@@ -145,14 +162,18 @@ class VFileRegistry:
145
162
  @classmethod
146
163
  def resolve(cls, file: "File", location: list[dict]):
147
164
  if len(location) == 0:
148
- raise VFileError(file, "'location' must not be list of JSONs")
165
+ raise VFileError(
166
+ "'location' must not be list of JSONs", file.source, file.path
167
+ )
149
168
 
150
169
  if not (vtype := location[0].get("vtype", "")):
151
- raise VFileError(file, "vtype is not specified")
170
+ raise VFileError("vtype is not specified", file.source, file.path)
152
171
 
153
172
  reader = cls._vtype_readers.get(vtype, None)
154
173
  if not reader:
155
- raise VFileError(file, "reader not registered", vtype)
174
+ raise VFileError(
175
+ "reader not registered", file.source, file.path, vtype=vtype
176
+ )
156
177
 
157
178
  return reader.open(file, location)
158
179
 
@@ -236,8 +257,8 @@ class File(DataModel):
236
257
 
237
258
  @field_validator("path", mode="before")
238
259
  @classmethod
239
- def validate_path(cls, path):
240
- return Path(path).as_posix() if path else ""
260
+ def validate_path(cls, path: str) -> str:
261
+ return PurePath(path).as_posix() if path else ""
241
262
 
242
263
  def model_dump_custom(self):
243
264
  res = self.model_dump()
@@ -299,11 +320,11 @@ class File(DataModel):
299
320
  return cls(**{key: row[key] for key in cls._datachain_column_types})
300
321
 
301
322
  @property
302
- def name(self):
323
+ def name(self) -> str:
303
324
  return PurePosixPath(self.path).name
304
325
 
305
326
  @property
306
- def parent(self):
327
+ def parent(self) -> str:
307
328
  return str(PurePosixPath(self.path).parent)
308
329
 
309
330
  @contextmanager
@@ -346,7 +367,7 @@ class File(DataModel):
346
367
 
347
368
  client.upload(self.read(), destination)
348
369
 
349
- def _symlink_to(self, destination: str):
370
+ def _symlink_to(self, destination: str) -> None:
350
371
  if self.location:
351
372
  raise OSError(errno.ENOTSUP, "Symlinking virtual file is not supported")
352
373
 
@@ -355,7 +376,7 @@ class File(DataModel):
355
376
  source = self.get_local_path()
356
377
  assert source, "File was not cached"
357
378
  elif self.source.startswith("file://"):
358
- source = self.get_path()
379
+ source = self.get_fs_path()
359
380
  else:
360
381
  raise OSError(errno.EXDEV, "can't link across filesystems")
361
382
 
@@ -432,27 +453,62 @@ class File(DataModel):
432
453
 
433
454
  def get_file_ext(self):
434
455
  """Returns last part of file name without `.`."""
435
- return PurePosixPath(self.path).suffix.strip(".")
456
+ return PurePosixPath(self.path).suffix.lstrip(".")
436
457
 
437
458
  def get_file_stem(self):
438
459
  """Returns file name without extension."""
439
460
  return PurePosixPath(self.path).stem
440
461
 
441
462
  def get_full_name(self):
442
- """Returns name with parent directories."""
463
+ """
464
+ [DEPRECATED] Use `file.path` directly instead.
465
+
466
+ Returns name with parent directories.
467
+ """
468
+ warnings.warn(
469
+ "file.get_full_name() is deprecated and will be removed "
470
+ "in a future version. Use `file.path` directly.",
471
+ DeprecationWarning,
472
+ stacklevel=2,
473
+ )
443
474
  return self.path
444
475
 
445
- def get_uri(self):
476
+ def get_path_normalized(self) -> str:
477
+ if not self.path:
478
+ raise FileError("path must not be empty", self.source, self.path)
479
+
480
+ if self.path.endswith("/"):
481
+ raise FileError("path must not be a directory", self.source, self.path)
482
+
483
+ normpath = os.path.normpath(self.path)
484
+ normpath = PurePath(normpath).as_posix()
485
+
486
+ if normpath == ".":
487
+ raise FileError("path must not be a directory", self.source, self.path)
488
+
489
+ if any(part == ".." for part in PurePath(normpath).parts):
490
+ raise FileError("path must not contain '..'", self.source, self.path)
491
+
492
+ return normpath
493
+
494
+ def get_uri(self) -> str:
446
495
  """Returns file URI."""
447
- return f"{self.source}/{self.get_full_name()}"
496
+ return f"{self.source}/{self.get_path_normalized()}"
497
+
498
+ def get_fs_path(self) -> str:
499
+ """
500
+ Returns file path with respect to the filescheme.
448
501
 
449
- def get_path(self) -> str:
450
- """Returns file path."""
502
+ If `normalize` is True, the path is normalized to remove any redundant
503
+ separators and up-level references.
504
+
505
+ If the file scheme is "file", the path is converted to a local file path
506
+ using `url2pathname`. Otherwise, the original path with scheme is returned.
507
+ """
451
508
  path = unquote(self.get_uri())
452
- source = urlparse(self.source)
453
- if source.scheme == "file":
454
- path = urlparse(path).path
455
- path = url2pathname(path)
509
+ path_parsed = urlparse(path)
510
+ if path_parsed.scheme == "file":
511
+ path = url2pathname(path_parsed.path)
456
512
  return path
457
513
 
458
514
  def get_destination_path(
@@ -467,7 +523,7 @@ class File(DataModel):
467
523
  elif placement == "etag":
468
524
  path = f"{self.etag}{self.get_file_suffix()}"
469
525
  elif placement == "fullpath":
470
- path = unquote(self.get_full_name())
526
+ path = unquote(self.get_path_normalized())
471
527
  source = urlparse(self.source)
472
528
  if source.scheme and source.scheme != "file":
473
529
  path = posixpath.join(source.netloc, path)
@@ -505,8 +561,9 @@ class File(DataModel):
505
561
  ) from e
506
562
 
507
563
  try:
508
- info = client.fs.info(client.get_full_path(self.path))
509
- converted_info = client.info_to_file(info, self.path)
564
+ normalized_path = self.get_path_normalized()
565
+ info = client.fs.info(client.get_full_path(normalized_path))
566
+ converted_info = client.info_to_file(info, normalized_path)
510
567
  return type(self)(
511
568
  path=self.path,
512
569
  source=self.source,
@@ -517,8 +574,17 @@ class File(DataModel):
517
574
  last_modified=converted_info.last_modified,
518
575
  location=self.location,
519
576
  )
577
+ except FileError as e:
578
+ logger.warning(
579
+ "File error when resolving %s/%s: %s", self.source, self.path, str(e)
580
+ )
520
581
  except (FileNotFoundError, PermissionError, OSError) as e:
521
- logger.warning("File system error when resolving %s: %s", self.path, str(e))
582
+ logger.warning(
583
+ "File system error when resolving %s/%s: %s",
584
+ self.source,
585
+ self.path,
586
+ str(e),
587
+ )
522
588
 
523
589
  return type(self)(
524
590
  path=self.path,
@@ -534,6 +600,8 @@ class File(DataModel):
534
600
 
535
601
  def resolve(file: File) -> File:
536
602
  """
603
+ [DEPRECATED] Use `file.resolve()` directly instead.
604
+
537
605
  Resolve a File object by checking its existence and updating its metadata.
538
606
 
539
607
  This function is a wrapper around the File.resolve() method, designed to be
@@ -549,6 +617,12 @@ def resolve(file: File) -> File:
549
617
  RuntimeError: If the file's catalog is not set or if
550
618
  the file source protocol is unsupported.
551
619
  """
620
+ warnings.warn(
621
+ "resolve() is deprecated and will be removed "
622
+ "in a future version. Use file.resolve() directly.",
623
+ DeprecationWarning,
624
+ stacklevel=2,
625
+ )
552
626
  return file.resolve()
553
627
 
554
628
 
@@ -896,7 +970,7 @@ class ArrowRow(DataModel):
896
970
  ds = dataset(path, **self.kwargs)
897
971
 
898
972
  else:
899
- path = self.file.get_path()
973
+ path = self.file.get_fs_path()
900
974
  ds = dataset(path, filesystem=self.file.get_fs(), **self.kwargs)
901
975
 
902
976
  return ds.take([self.index]).to_reader()
datachain/lib/image.py CHANGED
@@ -19,7 +19,7 @@ def image_info(file: Union[File, ImageFile]) -> Image:
19
19
  try:
20
20
  img = file.as_image_file().read()
21
21
  except Exception as exc:
22
- raise FileError(file, "unable to open image file") from exc
22
+ raise FileError("unable to open image file", file.source, file.path) from exc
23
23
 
24
24
  return Image(
25
25
  width=img.width,
datachain/lib/tar.py CHANGED
@@ -6,12 +6,11 @@ from datachain.lib.file import File, TarVFile
6
6
 
7
7
 
8
8
  def build_tar_member(parent: File, info: tarfile.TarInfo) -> File:
9
- new_parent = parent.get_full_name()
10
9
  etag_string = "-".join([parent.etag, info.name, str(info.mtime)])
11
10
  etag = hashlib.md5(etag_string.encode(), usedforsecurity=False).hexdigest()
12
11
  return File(
13
12
  source=parent.source,
14
- path=f"{new_parent}/{info.name}",
13
+ path=f"{parent.name}/{info.name}",
15
14
  version=parent.version,
16
15
  size=info.size,
17
16
  etag=etag,
datachain/lib/utils.py CHANGED
@@ -18,13 +18,11 @@ class AbstractUDF(ABC):
18
18
 
19
19
 
20
20
  class DataChainError(Exception):
21
- def __init__(self, message):
22
- super().__init__(message)
21
+ pass
23
22
 
24
23
 
25
24
  class DataChainParamsError(DataChainError):
26
- def __init__(self, message):
27
- super().__init__(message)
25
+ pass
28
26
 
29
27
 
30
28
  class DataChainColumnError(DataChainParamsError):
datachain/lib/video.py CHANGED
@@ -34,21 +34,27 @@ def video_info(file: Union[File, VideoFile]) -> Video:
34
34
  file.ensure_cached()
35
35
  file_path = file.get_local_path()
36
36
  if not file_path:
37
- raise FileError(file, "unable to download video file")
37
+ raise FileError("unable to download video file", file.source, file.path)
38
38
 
39
39
  try:
40
40
  probe = ffmpeg.probe(file_path)
41
41
  except Exception as exc:
42
- raise FileError(file, "unable to extract metadata from video file") from exc
42
+ raise FileError(
43
+ "unable to extract metadata from video file", file.source, file.path
44
+ ) from exc
43
45
 
44
46
  all_streams = probe.get("streams")
45
47
  video_format = probe.get("format")
46
48
  if not all_streams or not video_format:
47
- raise FileError(file, "unable to extract metadata from video file")
49
+ raise FileError(
50
+ "unable to extract metadata from video file", file.source, file.path
51
+ )
48
52
 
49
53
  video_streams = [s for s in all_streams if s["codec_type"] == "video"]
50
54
  if len(video_streams) == 0:
51
- raise FileError(file, "unable to extract metadata from video file")
55
+ raise FileError(
56
+ "unable to extract metadata from video file", file.source, file.path
57
+ )
52
58
 
53
59
  video_stream = video_streams[0]
54
60
 
@@ -35,7 +35,7 @@ warnings.filterwarnings(
35
35
 
36
36
  class WDSError(DataChainError):
37
37
  def __init__(self, tar_stream, message: str):
38
- super().__init__(f"WebDataset error '{tar_stream.get_full_name()}': {message}")
38
+ super().__init__(f"WebDataset error '{tar_stream.name}': {message}")
39
39
 
40
40
 
41
41
  class CoreFileDuplicationError(WDSError):
@@ -1348,7 +1348,7 @@ class DatasetQuery:
1348
1348
 
1349
1349
  async def get_params(row: Sequence) -> tuple:
1350
1350
  row_dict = RowDict(zip(query_fields, row))
1351
- return tuple(
1351
+ return tuple( # noqa: C409
1352
1352
  [
1353
1353
  await p.get_value_async(
1354
1354
  self.catalog, row_dict, mapper, **kwargs
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datachain
3
- Version: 0.18.7
3
+ Version: 0.18.8
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License-Expression: Apache-2.0
@@ -94,7 +94,7 @@ Requires-Dist: scipy; extra == "tests"
94
94
  Requires-Dist: ultralytics; extra == "tests"
95
95
  Provides-Extra: dev
96
96
  Requires-Dist: datachain[docs,tests]; extra == "dev"
97
- Requires-Dist: mypy==1.15.0; extra == "dev"
97
+ Requires-Dist: mypy==1.16.0; extra == "dev"
98
98
  Requires-Dist: types-python-dateutil; extra == "dev"
99
99
  Requires-Dist: types-pytz; extra == "dev"
100
100
  Requires-Dist: types-PyYAML; extra == "dev"
@@ -1,7 +1,7 @@
1
1
  datachain/__init__.py,sha256=Dx_Dw6AuvC_CZtXxfRv0Z-ND6ieC4Cz-tZkMW-Rvmz4,1496
2
2
  datachain/__main__.py,sha256=hG3Y4ARGEqe1AWwNMd259rBlqtphx1Wk39YbueQ0yV8,91
3
3
  datachain/asyn.py,sha256=RH_jFwJcTXxhEFomaI9yL6S3Onau6NZ6FSKfKFGtrJE,9689
4
- datachain/cache.py,sha256=yQblPhOh_Mq74Ma7xT1CL1idLJ0HgrQxpGVYvRy_9Eg,3623
4
+ datachain/cache.py,sha256=3GWMvF2LMpz2l5lWbtbpmzSB-92eGCCtujeWlFa3r14,3609
5
5
  datachain/config.py,sha256=g8qbNV0vW2VEKpX-dGZ9pAn0DAz6G2ZFcr7SAV3PoSM,4272
6
6
  datachain/dataset.py,sha256=XUZ-kSBL1y6juFqlSWXXbattGS1E53lXpyhc0Ip1_AA,20527
7
7
  datachain/delta.py,sha256=q-ritPMxgsTh53qJYd2N1TqZ3Inxc7GJ9JED9rE-Z1M,3994
@@ -39,10 +39,10 @@ datachain/cli/parser/utils.py,sha256=rETdD-9Hq9A4OolgfT7jQw4aoawtbfmkdtH6E7nkhpI
39
39
  datachain/client/__init__.py,sha256=1kDpCPoibMXi1gExR4lTLc5pi-k6M5TANiwtXkPoLhU,49
40
40
  datachain/client/azure.py,sha256=7yyAgANHfu9Kfh187MKNTT1guvu9Q-WYsi4vYoY3aew,3270
41
41
  datachain/client/fileslice.py,sha256=bT7TYco1Qe3bqoc8aUkUZcPdPofJDHlryL5BsTn9xsY,3021
42
- datachain/client/fsspec.py,sha256=c8oRBUMo31k8bMB_mIA60PDfna4nYTdslzHqmqL2Uvg,13918
42
+ datachain/client/fsspec.py,sha256=SSKhvl7x2IzECYUsJ_4hYxvy46AiU0wpsfPduE9alFI,13995
43
43
  datachain/client/gcs.py,sha256=8hcFhEHp8qGRsJoyfCoawfuwb1Et-MSkyQoM9AnNuXI,5204
44
44
  datachain/client/hf.py,sha256=posnI5WOKOMG1yY_ZiV9Orcd24QsUPKZlOXgJVLxxrM,1558
45
- datachain/client/local.py,sha256=cGoCYflribzexiOe-Y1qbaE2fJRh-_EgQrfCSa0yK_E,4568
45
+ datachain/client/local.py,sha256=0J52Wzvw25hSucVlzBvLuMRAZwrAHZAYDvD1mNBqf4c,4607
46
46
  datachain/client/s3.py,sha256=6DNVGLg-woPS1DVlYVX2rIlunNblsuxyOnI1rSzhW3k,7515
47
47
  datachain/data_storage/__init__.py,sha256=9Wit-oe5P46V7CJQTD0BJ5MhOa2Y9h3ddJ4VWTe-Lec,273
48
48
  datachain/data_storage/db_engine.py,sha256=n8ojCbvVMPY2e3SG8fUaaD0b9GkVfpl_Naa_6EiHfWg,3788
@@ -68,13 +68,13 @@ datachain/func/random.py,sha256=t7jwXsI8-hy0qAdvjAntgzy-AHtTAfozlZ1CpKR-QZE,458
68
68
  datachain/func/string.py,sha256=X9u4ip97U63RCaKRhMddoze7HgPiY3LbPRn9G06UWWo,7311
69
69
  datachain/func/window.py,sha256=ImyRpc1QI8QUSPO7KdD60e_DPVo7Ja0G5kcm6BlyMcw,1584
70
70
  datachain/lib/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
71
- datachain/lib/arrow.py,sha256=mFO_6wRqzpEzBhXf7Xn1aeLUvaiHcC6XQ-8as9sbcgY,10253
71
+ datachain/lib/arrow.py,sha256=K8djofgt4HEgxnkwqZZChccAqeIQ_1D2urGyqti-1-4,10259
72
72
  datachain/lib/clip.py,sha256=lm5CzVi4Cj1jVLEKvERKArb-egb9j1Ls-fwTItT6vlI,6150
73
73
  datachain/lib/data_model.py,sha256=ZwBXELtqROEdLL4DmxTipnwUZmhQvMz_UVDzyf7nQ9Y,2899
74
74
  datachain/lib/dataset_info.py,sha256=d-jz6zeDU5DEgYtyeSF5nK0MU-40FV5km_iOCh4pXzo,3179
75
- datachain/lib/file.py,sha256=0oFm1MWU7AatXplxRj-6Xbjjb6A_AvM_awwk9mYb0hc,30466
75
+ datachain/lib/file.py,sha256=-Y0ccgfQt-2jOnNhOH5j5fTQpCsS9z2ja97umDUHbmA,33054
76
76
  datachain/lib/hf.py,sha256=gjxuStZBlKtNk3-4yYSlWZDv9zBGblOdvEy_Lwap5hA,5882
77
- datachain/lib/image.py,sha256=butvUY_33PVEYPKX2nVCPeJjJVcBaptZwsE9REQsTS8,3247
77
+ datachain/lib/image.py,sha256=erWvZW5M3emnbl6_fGAOPyKm-1EKbt3vOdWPfe3Oo7U,3265
78
78
  datachain/lib/listing.py,sha256=5_GoATtIwCtd1JMqlorPB_vQDxndOQZpiWjNOG3NMw4,7007
79
79
  datachain/lib/listing_info.py,sha256=9ua40Hw0aiQByUw3oAEeNzMavJYfW0Uhe8YdCTK-m_g,1110
80
80
  datachain/lib/meta_formats.py,sha256=Epydbdch1g4CojK8wd_ePzmwmljC4fVWlJtZ16jsX-A,6349
@@ -82,13 +82,13 @@ datachain/lib/model_store.py,sha256=DNIv8Y6Jtk1_idNLzIpsThOsdW2BMAudyUCbPUcgcxk,
82
82
  datachain/lib/pytorch.py,sha256=elrmJ4YUDC2LZ9yXM1KwImVBOYIBJf6k0ZR7eSe6Aao,7712
83
83
  datachain/lib/settings.py,sha256=ZELRCTLbi5vzRPiDX6cQ9LLg9TefJ_A05gIGni0lll8,2535
84
84
  datachain/lib/signal_schema.py,sha256=Zhg8qThFDf9eoNWFH6KGeYB-sIGys7A_ybq2CUBG7Dg,36127
85
- datachain/lib/tar.py,sha256=3WIzao6yD5fbLqXLTt9GhPGNonbFIs_fDRu-9vgLgsA,1038
85
+ datachain/lib/tar.py,sha256=k8RFnF72H1jxbMghQQbmoGL-UsA1im8gRLXBM1GJAYI,999
86
86
  datachain/lib/text.py,sha256=UNHm8fhidk7wdrWqacEWaA6I9ykfYqarQ2URby7jc7M,1261
87
87
  datachain/lib/udf.py,sha256=FWqA476ygdk4MU-0qehYKxvnt8Tekh21Cyf3RgddD1k,16674
88
88
  datachain/lib/udf_signature.py,sha256=2EtsOPDNSPqcOlYwqbCdy6RF5MldI-7smii8aLy8p7Y,7543
89
- datachain/lib/utils.py,sha256=QrjVs_oLRXEotOPUYurBJypBFi_ReTJmxcnJeH4j2Uk,1596
90
- datachain/lib/video.py,sha256=suH_8Mi8VYk4-IVb1vjSduF_njs64ji1WGKHxDLnGYw,6629
91
- datachain/lib/webdataset.py,sha256=o7SHk5HOUWsZ5Ln04xOM04eQqiBHiJNO7xLgyVBrwo8,6924
89
+ datachain/lib/utils.py,sha256=rG2y7NwTqZOuomZZRmrA-Q-ANM_j1cToQYqDJoOeGyU,1480
90
+ datachain/lib/video.py,sha256=u6fLJWj5G6QqsVkpfHnKGklBNpG3BRRg6v3izngnNcU,6767
91
+ datachain/lib/webdataset.py,sha256=hZWar13LoZ1TAidFW_sl9rUO-KtMJQY3OFmbnPkJw_A,6913
92
92
  datachain/lib/webdataset_laion.py,sha256=xvT6m_r5y0KbOx14BUe7UC5mOgrktJq53Mh-H0EVlUE,2525
93
93
  datachain/lib/convert/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
94
94
  datachain/lib/convert/flatten.py,sha256=IZFiUYbgXSxXhPSG5Cqf5IjnJ4ZDZKXMr4o_yCR1NY4,1505
@@ -121,7 +121,7 @@ datachain/model/ultralytics/pose.py,sha256=pBlmt63Qe68FKmexHimUGlNbNOoOlMHXG4fzX
121
121
  datachain/model/ultralytics/segment.py,sha256=63bDCj43E6iZ0hFI5J6uQfksdCmjEp6sEm1XzVaE8pw,2986
122
122
  datachain/query/__init__.py,sha256=7DhEIjAA8uZJfejruAVMZVcGFmvUpffuZJwgRqNwe-c,263
123
123
  datachain/query/batch.py,sha256=-goxLpE0EUvaDHu66rstj53UnfHpYfBUGux8GSpJ93k,4306
124
- datachain/query/dataset.py,sha256=3c3MAiIl7ZnCii_0dZA-Om73ornNMSKkna32JX3H05E,60587
124
+ datachain/query/dataset.py,sha256=dI51zOU1Drev65f6SPn4mvRdwRXs4SOW5STMm3WYd7A,60601
125
125
  datachain/query/dispatch.py,sha256=A0nPxn6mEN5d9dDo6S8m16Ji_9IvJLXrgF2kqXdi4fs,15546
126
126
  datachain/query/metrics.py,sha256=DOK5HdNVaRugYPjl8qnBONvTkwjMloLqAr7Mi3TjCO0,858
127
127
  datachain/query/params.py,sha256=O_j89mjYRLOwWNhYZl-z7mi-rkdP7WyFmaDufsdTryE,863
@@ -153,9 +153,9 @@ datachain/sql/sqlite/vector.py,sha256=ncW4eu2FlJhrP_CIpsvtkUabZlQdl2D5Lgwy_cbfqR
153
153
  datachain/toolkit/__init__.py,sha256=eQ58Q5Yf_Fgv1ZG0IO5dpB4jmP90rk8YxUWmPc1M2Bo,68
154
154
  datachain/toolkit/split.py,sha256=ktGWzY4kyzjWyR86dhvzw-Zhl0lVk_LOX3NciTac6qo,2914
155
155
  datachain/torch/__init__.py,sha256=gIS74PoEPy4TB3X6vx9nLO0Y3sLJzsA8ckn8pRWihJM,579
156
- datachain-0.18.7.dist-info/licenses/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
157
- datachain-0.18.7.dist-info/METADATA,sha256=OXGuP0EbV6ZC57NPhtyse2-6OP2pDKbhJkmcDfHp1mU,11319
158
- datachain-0.18.7.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
159
- datachain-0.18.7.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
160
- datachain-0.18.7.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
161
- datachain-0.18.7.dist-info/RECORD,,
156
+ datachain-0.18.8.dist-info/licenses/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
157
+ datachain-0.18.8.dist-info/METADATA,sha256=7_EQNrTrI5u-hjaGNfJOamf3LW-qljTmCuELCFkA2yE,11319
158
+ datachain-0.18.8.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
159
+ datachain-0.18.8.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
160
+ datachain-0.18.8.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
161
+ datachain-0.18.8.dist-info/RECORD,,