datachain 0.2.18__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datachain might be problematic. Click here for more details.

datachain/cache.py CHANGED
@@ -24,8 +24,7 @@ sha256 = partial(hashlib.sha256, usedforsecurity=False)
24
24
  @attrs.frozen
25
25
  class UniqueId:
26
26
  storage: "StorageURI"
27
- parent: str
28
- name: str
27
+ path: str
29
28
  size: int
30
29
  etag: str
31
30
  version: str = ""
@@ -34,10 +33,6 @@ class UniqueId:
34
33
  location: Optional[str] = None
35
34
  last_modified: datetime = TIME_ZERO
36
35
 
37
- @property
38
- def path(self) -> str:
39
- return f"{self.parent}/{self.name}" if self.parent else self.name
40
-
41
36
  def get_parsed_location(self) -> Optional[dict]:
42
37
  if not self.location:
43
38
  return None
@@ -53,10 +48,10 @@ class UniqueId:
53
48
  return loc_stack[0]
54
49
 
55
50
  def get_hash(self) -> str:
56
- etag = f"{self.vtype}{self.location}" if self.vtype else self.etag
57
- return sha256(
58
- f"{self.storage}/{self.parent}/{self.name}/{self.version}/{etag}".encode()
59
- ).hexdigest()
51
+ fingerprint = f"{self.storage}/{self.path}/{self.version}/{self.etag}"
52
+ if self.location:
53
+ fingerprint += f"/{self.location}"
54
+ return sha256(fingerprint.encode()).hexdigest()
60
55
 
61
56
 
62
57
  def try_scandir(path):
@@ -529,21 +529,16 @@ def find_column_to_str( # noqa: PLR0911
529
529
  if column == "du":
530
530
  return str(
531
531
  src.listing.du(
532
- {
533
- f: row[field_lookup[f]]
534
- for f in ["dir_type", "size", "parent", "name"]
535
- }
532
+ {f: row[field_lookup[f]] for f in ["dir_type", "size", "path"]}
536
533
  )[0]
537
534
  )
538
535
  if column == "name":
539
- return row[field_lookup["name"]] or ""
536
+ return posixpath.basename(row[field_lookup["path"]]) or ""
540
537
  if column == "owner":
541
538
  return row[field_lookup["owner_name"]] or ""
542
539
  if column == "path":
543
540
  is_dir = row[field_lookup["dir_type"]] == DirType.DIR
544
- parent = row[field_lookup["parent"]]
545
- name = row[field_lookup["name"]]
546
- path = f"{parent}/{name}" if parent else name
541
+ path = row[field_lookup["path"]]
547
542
  if is_dir and path:
548
543
  full_path = path + "/"
549
544
  else:
@@ -724,8 +719,7 @@ class Catalog:
724
719
  columns = [
725
720
  Column("vtype", String),
726
721
  Column("dir_type", Int),
727
- Column("parent", String),
728
- Column("name", String),
722
+ Column("path", String),
729
723
  Column("etag", String),
730
724
  Column("version", String),
731
725
  Column("is_latest", Boolean),
@@ -1623,8 +1617,7 @@ class Catalog:
1623
1617
  Example output:
1624
1618
  {
1625
1619
  "source": "s3://ldb-public",
1626
- "parent": "animals/dogs",
1627
- "name": "dog.jpg",
1620
+ "path": "animals/dogs/dog.jpg",
1628
1621
  ...
1629
1622
  }
1630
1623
  """
@@ -1675,8 +1668,7 @@ class Catalog:
1675
1668
  def _get_row_uid(self, row: RowDict) -> UniqueId:
1676
1669
  return UniqueId(
1677
1670
  row["source"],
1678
- row["parent"],
1679
- row["name"],
1671
+ row["path"],
1680
1672
  row["size"],
1681
1673
  row["etag"],
1682
1674
  row["version"],
@@ -2308,16 +2300,14 @@ class Catalog:
2308
2300
  if column == "du":
2309
2301
  field_set.add("dir_type")
2310
2302
  field_set.add("size")
2311
- field_set.add("parent")
2312
- field_set.add("name")
2303
+ field_set.add("path")
2313
2304
  elif column == "name":
2314
- field_set.add("name")
2305
+ field_set.add("path")
2315
2306
  elif column == "owner":
2316
2307
  field_set.add("owner_name")
2317
2308
  elif column == "path":
2318
2309
  field_set.add("dir_type")
2319
- field_set.add("parent")
2320
- field_set.add("name")
2310
+ field_set.add("path")
2321
2311
  elif column == "size":
2322
2312
  field_set.add("size")
2323
2313
  elif column == "type":
datachain/client/azure.py CHANGED
@@ -1,4 +1,3 @@
1
- import posixpath
2
1
  from typing import Any
3
2
 
4
3
  from adlfs import AzureBlobFileSystem
@@ -14,16 +13,10 @@ class AzureClient(Client):
14
13
  PREFIX = "az://"
15
14
  protocol = "az"
16
15
 
17
- def convert_info(self, v: dict[str, Any], parent: str) -> Entry:
16
+ def convert_info(self, v: dict[str, Any], path: str) -> Entry:
18
17
  version_id = v.get("version_id")
19
- name = v.get("name", "").split(DELIMITER)[-1]
20
- if version_id:
21
- version_suffix = f"?versionid={version_id}"
22
- if name.endswith(version_suffix):
23
- name = name[: -len(version_suffix)]
24
18
  return Entry.from_file(
25
- parent=parent,
26
- name=name,
19
+ path=path,
27
20
  etag=v.get("etag", "").strip('"'),
28
21
  version=version_id or "",
29
22
  is_latest=version_id is None or bool(v.get("is_current_version")),
@@ -50,9 +43,9 @@ class AzureClient(Client):
50
43
  if not self._is_valid_key(b["name"]):
51
44
  continue
52
45
  info = (await self.fs._details([b]))[0]
53
- full_path = info["name"]
54
- parent = posixpath.dirname(self.rel_path(full_path))
55
- entries.append(self.convert_info(info, parent))
46
+ entries.append(
47
+ self.convert_info(info, self.rel_path(info["name"]))
48
+ )
56
49
  if entries:
57
50
  await result_queue.put(entries)
58
51
  pbar.update(len(entries))
@@ -277,7 +277,7 @@ class Client(ABC):
277
277
  if info["type"] == "directory":
278
278
  subdirs.add(subprefix)
279
279
  else:
280
- files.append(self.convert_info(info, prefix))
280
+ files.append(self.convert_info(info, subprefix))
281
281
  if files:
282
282
  await result_queue.put(files)
283
283
  found_count = len(subdirs) + len(files)
@@ -360,12 +360,11 @@ class Client(ABC):
360
360
 
361
361
  parent_uid = UniqueId(
362
362
  parent["source"],
363
- parent["parent"],
364
- parent["name"],
365
- parent["etag"],
363
+ parent["path"],
366
364
  parent["size"],
367
- parent["vtype"],
368
- parent["location"],
365
+ parent["etag"],
366
+ vtype=parent["vtype"],
367
+ location=parent["location"],
369
368
  )
370
369
  f = self.open_object(parent_uid, use_cache=use_cache)
371
370
  return FileSlice(f, offset, size, posixpath.basename(uid.path))
datachain/client/gcs.py CHANGED
@@ -1,7 +1,6 @@
1
1
  import asyncio
2
2
  import json
3
3
  import os
4
- import posixpath
5
4
  from collections.abc import Iterable
6
5
  from datetime import datetime
7
6
  from typing import Any, Optional, cast
@@ -110,20 +109,11 @@ class GCSClient(Client):
110
109
 
111
110
  def _entry_from_dict(self, d: dict[str, Any]) -> Entry:
112
111
  info = self.fs._process_object(self.name, d)
113
- full_path = info["name"]
114
- subprefix = self.rel_path(full_path)
115
- parent = posixpath.dirname(subprefix)
116
- return self.convert_info(info, parent)
117
-
118
- def convert_info(self, v: dict[str, Any], parent: str) -> Entry:
119
- name = v.get("name", "").split(DELIMITER)[-1]
120
- if "generation" in v:
121
- gen = f"#{v['generation']}"
122
- if name.endswith(gen):
123
- name = name[: -len(gen)]
112
+ return self.convert_info(info, self.rel_path(info["name"]))
113
+
114
+ def convert_info(self, v: dict[str, Any], path: str) -> Entry:
124
115
  return Entry.from_file(
125
- parent=parent,
126
- name=name,
116
+ path=path,
127
117
  etag=v.get("etag", ""),
128
118
  version=v.get("generation", ""),
129
119
  is_latest=not v.get("timeDeleted"),
datachain/client/local.py CHANGED
@@ -140,11 +140,9 @@ class FileClient(Client):
140
140
  full_path += "/"
141
141
  return full_path
142
142
 
143
- def convert_info(self, v: dict[str, Any], parent: str) -> Entry:
144
- name = posixpath.basename(v["name"])
143
+ def convert_info(self, v: dict[str, Any], path: str) -> Entry:
145
144
  return Entry.from_file(
146
- parent=parent,
147
- name=name,
145
+ path=path,
148
146
  etag=v["mtime"].hex(),
149
147
  is_latest=True,
150
148
  last_modified=datetime.fromtimestamp(v["mtime"], timezone.utc),
datachain/client/s3.py CHANGED
@@ -1,5 +1,4 @@
1
1
  import asyncio
2
- import posixpath
3
2
  from typing import Any, cast
4
3
 
5
4
  from botocore.exceptions import NoCredentialsError
@@ -112,10 +111,8 @@ class ClientS3(Client):
112
111
  await self._fetch_flat(start_prefix, result_queue)
113
112
 
114
113
  def _entry_from_boto(self, v, bucket, versions=False):
115
- parent, name = posixpath.split(v["Key"])
116
114
  return Entry.from_file(
117
- parent=parent,
118
- name=name,
115
+ path=v["Key"],
119
116
  etag=v.get("ETag", "").strip('"'),
120
117
  version=ClientS3.clean_s3_version(v.get("VersionId", "")),
121
118
  is_latest=v.get("IsLatest", True),
@@ -145,7 +142,7 @@ class ClientS3(Client):
145
142
  if info["type"] == "directory":
146
143
  subdirs.add(subprefix)
147
144
  else:
148
- files.append(self.convert_info(info, prefix.rstrip("/")))
145
+ files.append(self.convert_info(info, subprefix))
149
146
  pbar.update()
150
147
  found = True
151
148
  if not found:
@@ -159,10 +156,9 @@ class ClientS3(Client):
159
156
  def clean_s3_version(ver):
160
157
  return ver if ver != "null" else ""
161
158
 
162
- def convert_info(self, v: dict[str, Any], parent: str) -> Entry:
159
+ def convert_info(self, v: dict[str, Any], path: str) -> Entry:
163
160
  return Entry.from_file(
164
- parent=parent,
165
- name=v.get("Key", "").split(DELIMITER)[-1],
161
+ path=path,
166
162
  etag=v.get("ETag", "").strip('"'),
167
163
  version=ClientS3.clean_s3_version(v.get("VersionId", "")),
168
164
  is_latest=v.get("IsLatest", True),
@@ -80,8 +80,7 @@ class DirExpansion:
80
80
  q.c.vtype,
81
81
  (q.c.dir_type == DirType.DIR).label("is_dir"),
82
82
  q.c.source,
83
- q.c.parent,
84
- q.c.name,
83
+ q.c.path,
85
84
  q.c.version,
86
85
  q.c.location,
87
86
  )
@@ -94,36 +93,29 @@ class DirExpansion:
94
93
  q.c.vtype,
95
94
  q.c.is_dir,
96
95
  q.c.source,
97
- q.c.parent,
98
- q.c.name,
96
+ q.c.path,
99
97
  q.c.version,
100
98
  f.max(q.c.location).label("location"),
101
99
  )
102
100
  .select_from(q)
103
- .group_by(
104
- q.c.source, q.c.parent, q.c.name, q.c.vtype, q.c.is_dir, q.c.version
105
- )
106
- .order_by(
107
- q.c.source, q.c.parent, q.c.name, q.c.vtype, q.c.is_dir, q.c.version
108
- )
101
+ .group_by(q.c.source, q.c.path, q.c.vtype, q.c.is_dir, q.c.version)
102
+ .order_by(q.c.source, q.c.path, q.c.vtype, q.c.is_dir, q.c.version)
109
103
  )
110
104
 
111
105
  @classmethod
112
106
  def query(cls, q):
113
107
  q = cls.base_select(q).cte(recursive=True)
114
- parent_parent = path.parent(q.c.parent)
115
- parent_name = path.name(q.c.parent)
108
+ parent = path.parent(q.c.path)
116
109
  q = q.union_all(
117
110
  sa.select(
118
111
  sa.literal(-1).label("sys__id"),
119
112
  sa.literal("").label("vtype"),
120
113
  true().label("is_dir"),
121
114
  q.c.source,
122
- parent_parent.label("parent"),
123
- parent_name.label("name"),
115
+ parent.label("path"),
124
116
  sa.literal("").label("version"),
125
117
  null().label("location"),
126
- ).where((parent_name != "") | (parent_parent != ""))
118
+ ).where(parent != "")
127
119
  )
128
120
  return cls.apply_group_by(q)
129
121
 
@@ -19,6 +19,7 @@ from datachain.client import Client
19
19
  from datachain.data_storage.serializer import Serializable
20
20
  from datachain.dataset import DatasetRecord, RowDict
21
21
  from datachain.node import DirType, DirTypeGroup, Entry, Node, NodeWithPath, get_path
22
+ from datachain.sql.functions import path as pathfunc
22
23
  from datachain.sql.types import Int, SQLType
23
24
  from datachain.storage import StorageURI
24
25
  from datachain.utils import sql_escape_like
@@ -373,9 +374,7 @@ class AbstractWarehouse(ABC, Serializable):
373
374
 
374
375
  else:
375
376
  parent = self.get_node_by_path(dr, path.lstrip("/").rstrip("/*"))
376
- select_query = select_query.where(
377
- (dr.c.parent == parent.path) | (self.path_expr(dr) == path)
378
- )
377
+ select_query = select_query.where(pathfunc.parent(dr.c.path) == parent.path)
379
378
  return select_query
380
379
 
381
380
  def rename_dataset_table(
@@ -532,8 +531,8 @@ class AbstractWarehouse(ABC, Serializable):
532
531
  dr,
533
532
  parent_path,
534
533
  type="dir",
535
- conds=[sa.Column("parent") == parent_path],
536
- order_by=["source", "parent", "name"],
534
+ conds=[pathfunc.parent(sa.Column("path")) == parent_path],
535
+ order_by=["source", "path"],
537
536
  )
538
537
  return self.get_nodes(query)
539
538
 
@@ -556,7 +555,7 @@ class AbstractWarehouse(ABC, Serializable):
556
555
  & ~self.instr(relpath, "/")
557
556
  & (self.path_expr(de) != dirpath)
558
557
  )
559
- .order_by(de.c.source, de.c.parent, de.c.name, de.c.version)
558
+ .order_by(de.c.source, de.c.path, de.c.version)
560
559
  )
561
560
 
562
561
  def _get_node_by_path_list(
@@ -572,8 +571,8 @@ class AbstractWarehouse(ABC, Serializable):
572
571
  ).subquery()
573
572
  query = self.expand_query(de, dr)
574
573
 
575
- q = query.where((de.c.parent == parent) & (de.c.name == name)).order_by(
576
- de.c.source, de.c.parent, de.c.name, de.c.version
574
+ q = query.where(de.c.path == get_path(parent, name)).order_by(
575
+ de.c.source, de.c.path, de.c.version
577
576
  )
578
577
  row = next(self.dataset_rows_select(q), None)
579
578
  if not row:
@@ -636,8 +635,7 @@ class AbstractWarehouse(ABC, Serializable):
636
635
  case((de.c.is_dir == true(), DirType.DIR), else_=dr.c.dir_type).label(
637
636
  "dir_type"
638
637
  ),
639
- de.c.parent,
640
- de.c.name,
638
+ de.c.path,
641
639
  with_default(dr.c.etag),
642
640
  de.c.version,
643
641
  with_default(dr.c.is_latest),
@@ -670,7 +668,7 @@ class AbstractWarehouse(ABC, Serializable):
670
668
  .where(
671
669
  dr.c.is_latest == true(),
672
670
  dr.c.dir_type != DirType.DIR,
673
- (dr.c.parent + "/").startswith(path),
671
+ dr.c.path.startswith(path),
674
672
  )
675
673
  .exists()
676
674
  )
@@ -678,8 +676,7 @@ class AbstractWarehouse(ABC, Serializable):
678
676
  if not row:
679
677
  raise FileNotFoundError(f"Unable to resolve path {path}")
680
678
  path = path.removesuffix("/")
681
- parent, name = path.rsplit("/", 1) if "/" in path else ("", path)
682
- return Node.from_dir(parent, name)
679
+ return Node.from_dir(path)
683
680
 
684
681
  def expand_path(self, dataset_rows: "DataTable", path: str) -> list[Node]:
685
682
  """Simulates Unix-like shell expansion"""
@@ -703,18 +700,21 @@ class AbstractWarehouse(ABC, Serializable):
703
700
  de = dr.dataset_dir_expansion(
704
701
  dr.select().where(dr.c.is_latest == true()).subquery()
705
702
  ).subquery()
706
- where_cond = de.c.parent == parent_path
703
+ where_cond = pathfunc.parent(de.c.path) == parent_path
707
704
  if parent_path == "":
708
705
  # Exclude the root dir
709
- where_cond = where_cond & (de.c.name != "")
706
+ where_cond = where_cond & (de.c.path != "")
710
707
  inner_query = self.expand_query(de, dr).where(where_cond).subquery()
708
+
709
+ def field_to_expr(f):
710
+ if f == "name":
711
+ return pathfunc.name(inner_query.c.path)
712
+ return getattr(inner_query.c, f)
713
+
711
714
  return self.db.execute(
712
- sa.select(*(getattr(inner_query.c, f) for f in fields))
713
- .select_from(inner_query)
714
- .order_by(
715
+ select(*(field_to_expr(f) for f in fields)).order_by(
715
716
  inner_query.c.source,
716
- inner_query.c.parent,
717
- inner_query.c.name,
717
+ inner_query.c.path,
718
718
  inner_query.c.version,
719
719
  )
720
720
  )
@@ -727,21 +727,20 @@ class AbstractWarehouse(ABC, Serializable):
727
727
  """
728
728
  dr = dataset_rows
729
729
  dirpath = f"{parent_path}/"
730
- relpath = func.substr(self.path_expr(dr), len(dirpath) + 1)
731
730
 
732
731
  def field_to_expr(f):
733
732
  if f == "name":
734
- return relpath
733
+ return pathfunc.name(dr.c.path)
735
734
  return getattr(dr.c, f)
736
735
 
737
736
  q = (
738
737
  select(*(field_to_expr(f) for f in fields))
739
738
  .where(
740
739
  self.path_expr(dr).like(f"{sql_escape_like(dirpath)}%"),
741
- ~self.instr(relpath, "/"),
740
+ ~self.instr(pathfunc.name(dr.c.path), "/"),
742
741
  dr.c.is_latest == true(),
743
742
  )
744
- .order_by(dr.c.source, dr.c.parent, dr.c.name, dr.c.version, dr.c.etag)
743
+ .order_by(dr.c.source, dr.c.path, dr.c.version, dr.c.etag)
745
744
  )
746
745
  return self.db.execute(q)
747
746
 
@@ -758,7 +757,7 @@ class AbstractWarehouse(ABC, Serializable):
758
757
  if isinstance(node, dict):
759
758
  is_dir = node.get("is_dir", node["dir_type"] in DirTypeGroup.SUBOBJ_DIR)
760
759
  node_size = node["size"]
761
- path = get_path(node["parent"], node["name"])
760
+ path = node["path"]
762
761
  else:
763
762
  is_dir = node.is_container
764
763
  node_size = node.size
@@ -790,7 +789,7 @@ class AbstractWarehouse(ABC, Serializable):
790
789
  return results[0] or 0, 0
791
790
 
792
791
  def path_expr(self, t):
793
- return case((t.c.parent == "", t.c.name), else_=t.c.parent + "/" + t.c.name)
792
+ return t.c.path
794
793
 
795
794
  def _find_query(
796
795
  self,
@@ -947,11 +946,7 @@ class AbstractWarehouse(ABC, Serializable):
947
946
  tq = target_query.alias("target_query")
948
947
 
949
948
  source_target_join = sa.join(
950
- sq,
951
- tq,
952
- (sq.c.source == tq.c.source)
953
- & (sq.c.parent == tq.c.parent)
954
- & (sq.c.name == tq.c.name),
949
+ sq, tq, (sq.c.source == tq.c.source) & (sq.c.path == tq.c.path)
955
950
  )
956
951
 
957
952
  return (
datachain/lib/dc.py CHANGED
@@ -49,6 +49,7 @@ from datachain.query.dataset import (
49
49
  detach,
50
50
  )
51
51
  from datachain.query.schema import Column, DatasetRow
52
+ from datachain.sql.functions import path as pathfunc
52
53
  from datachain.utils import inside_notebook
53
54
 
54
55
  if TYPE_CHECKING:
@@ -202,7 +203,7 @@ class DataChain(DatasetQuery):
202
203
 
203
204
  DEFAULT_FILE_RECORD: ClassVar[dict] = {
204
205
  "source": "",
205
- "name": "",
206
+ "path": "",
206
207
  "vtype": "",
207
208
  "size": 0,
208
209
  }
@@ -1586,10 +1587,11 @@ class DataChain(DatasetQuery):
1586
1587
  use_cache: bool = True,
1587
1588
  ) -> None:
1588
1589
  """Method that exports all files from chain to some folder."""
1589
- if placement == "filename":
1590
- print("Checking if file names are unique")
1591
- if self.distinct(f"{signal}.name").count() != self.count():
1592
- raise ValueError("Files with the same name found")
1590
+ if placement == "filename" and (
1591
+ super().distinct(pathfunc.name(C(f"{signal}__path"))).count()
1592
+ != self.count()
1593
+ ):
1594
+ raise ValueError("Files with the same name found")
1593
1595
 
1594
1596
  for file in self.collect(signal):
1595
1597
  file.export(output, placement, use_cache) # type: ignore[union-attr]
datachain/lib/file.py CHANGED
@@ -6,7 +6,7 @@ from abc import ABC, abstractmethod
6
6
  from contextlib import contextmanager
7
7
  from datetime import datetime
8
8
  from io import BytesIO
9
- from pathlib import Path
9
+ from pathlib import Path, PurePosixPath
10
10
  from typing import TYPE_CHECKING, Any, ClassVar, Literal, Optional, Union
11
11
  from urllib.parse import unquote, urlparse
12
12
  from urllib.request import url2pathname
@@ -111,8 +111,7 @@ class File(DataModel):
111
111
  """`DataModel` for reading binary files."""
112
112
 
113
113
  source: str = Field(default="")
114
- parent: str = Field(default="")
115
- name: str
114
+ path: str
116
115
  size: int = Field(default=0)
117
116
  version: str = Field(default="")
118
117
  etag: str = Field(default="")
@@ -123,8 +122,7 @@ class File(DataModel):
123
122
 
124
123
  _datachain_column_types: ClassVar[dict[str, Any]] = {
125
124
  "source": String,
126
- "parent": String,
127
- "name": String,
125
+ "path": String,
128
126
  "size": Int,
129
127
  "version": String,
130
128
  "etag": String,
@@ -136,8 +134,7 @@ class File(DataModel):
136
134
 
137
135
  _unique_id_keys: ClassVar[list[str]] = [
138
136
  "source",
139
- "parent",
140
- "name",
137
+ "path",
141
138
  "size",
142
139
  "etag",
143
140
  "version",
@@ -168,11 +165,9 @@ class File(DataModel):
168
165
  def validate_location(cls, v):
169
166
  return File._validate_dict(v)
170
167
 
171
- @field_validator("parent", mode="before")
168
+ @field_validator("path", mode="before")
172
169
  @classmethod
173
170
  def validate_path(cls, path):
174
- if path == "":
175
- return ""
176
171
  return Path(path).as_posix()
177
172
 
178
173
  def model_dump_custom(self):
@@ -185,6 +180,14 @@ class File(DataModel):
185
180
  self._catalog = None
186
181
  self._caching_enabled = False
187
182
 
183
+ @property
184
+ def name(self):
185
+ return PurePosixPath(self.path).name
186
+
187
+ @property
188
+ def parent(self):
189
+ return str(PurePosixPath(self.path).parent)
190
+
188
191
  @contextmanager
189
192
  def open(self, mode: Literal["rb", "r"] = "rb"):
190
193
  """Open the file and return a file object."""
@@ -261,19 +264,19 @@ class File(DataModel):
261
264
 
262
265
  def get_file_suffix(self):
263
266
  """Returns last part of file name with `.`."""
264
- return Path(self.name).suffix
267
+ return PurePosixPath(self.path).suffix
265
268
 
266
269
  def get_file_ext(self):
267
270
  """Returns last part of file name without `.`."""
268
- return Path(self.name).suffix.strip(".")
271
+ return PurePosixPath(self.path).suffix.strip(".")
269
272
 
270
273
  def get_file_stem(self):
271
274
  """Returns file name without extension."""
272
- return Path(self.name).stem
275
+ return PurePosixPath(self.path).stem
273
276
 
274
277
  def get_full_name(self):
275
278
  """Returns name with parent directories."""
276
- return (Path(self.parent) / self.name).as_posix()
279
+ return self.path
277
280
 
278
281
  def get_uri(self):
279
282
  """Returns file URI."""
@@ -355,8 +358,7 @@ def get_file(type_: Literal["binary", "text", "image"] = "binary"):
355
358
 
356
359
  def get_file_type(
357
360
  source: str,
358
- parent: str,
359
- name: str,
361
+ path: str,
360
362
  size: int,
361
363
  version: str,
362
364
  etag: str,
@@ -367,8 +369,7 @@ def get_file(type_: Literal["binary", "text", "image"] = "binary"):
367
369
  ) -> file: # type: ignore[valid-type]
368
370
  return file(
369
371
  source=source,
370
- parent=parent,
371
- name=name,
372
+ path=path,
372
373
  size=size,
373
374
  version=version,
374
375
  etag=etag,
@@ -119,7 +119,7 @@ class Builder:
119
119
  return self._tar.extractfile(item).read().decode(self._encoding)
120
120
 
121
121
  def add(self, file: tarfile.TarInfo):
122
- fstream = File(name=file.name)
122
+ fstream = File(path=file.name)
123
123
  ext = fstream.get_file_ext()
124
124
  stem = fstream.get_file_stem()
125
125
 
@@ -176,9 +176,8 @@ class Builder:
176
176
  )
177
177
  etag = hashlib.md5(etag_string.encode(), usedforsecurity=False).hexdigest()
178
178
  return File(
179
- name=core_file.name,
180
179
  source=self._tar_stream.source,
181
- parent=new_parent,
180
+ path=f"{new_parent}/{core_file.name}",
182
181
  version=self._tar_stream.version,
183
182
  size=core_file.size,
184
183
  etag=etag,
datachain/listing.py CHANGED
@@ -5,11 +5,12 @@ from itertools import zip_longest
5
5
  from typing import TYPE_CHECKING, Optional
6
6
 
7
7
  from fsspec.asyn import get_loop, sync
8
- from sqlalchemy import Column, case
8
+ from sqlalchemy import Column
9
9
  from sqlalchemy.sql import func
10
10
  from tqdm import tqdm
11
11
 
12
12
  from datachain.node import DirType, Entry, Node, NodeWithPath
13
+ from datachain.sql.functions import path as pathfunc
13
14
  from datachain.utils import suffix_to_number
14
15
 
15
16
  if TYPE_CHECKING:
@@ -129,7 +130,7 @@ class Listing:
129
130
  dir_path = []
130
131
  if not copy_dir_contents:
131
132
  dir_path.append(node.name)
132
- subtree_nodes = src.find(sort=["parent", "name"])
133
+ subtree_nodes = src.find(sort=["path"])
133
134
  all_nodes.extend(
134
135
  NodeWithPath(n.n, path=dir_path + n.path) for n in subtree_nodes
135
136
  )
@@ -148,8 +149,7 @@ class Listing:
148
149
  elif from_dataset:
149
150
  node_path = [
150
151
  src.listing.client.name,
151
- node.parent,
152
- node.name,
152
+ node.path,
153
153
  ]
154
154
  else:
155
155
  node_path = [node.name]
@@ -201,25 +201,19 @@ class Listing:
201
201
  dr = self.dataset_rows
202
202
  conds = []
203
203
  if names:
204
- f = Column("name").op("GLOB")
205
- conds.extend(f(name) for name in names)
204
+ for name in names:
205
+ conds.append(pathfunc.name(Column("path")).op("GLOB")(name))
206
206
  if inames:
207
- f = func.lower(Column("name")).op("GLOB")
208
- conds.extend(f(iname.lower()) for iname in inames)
207
+ for iname in inames:
208
+ conds.append(
209
+ func.lower(pathfunc.name(Column("path"))).op("GLOB")(iname.lower())
210
+ )
209
211
  if paths:
210
- node_path = case(
211
- (Column("parent") == "", Column("name")),
212
- else_=Column("parent") + "/" + Column("name"),
213
- )
214
- f = node_path.op("GLOB")
215
- conds.extend(f(path) for path in paths)
212
+ for path in paths:
213
+ conds.append(Column("path").op("GLOB")(path))
216
214
  if ipaths:
217
- node_path = case(
218
- (Column("parent") == "", Column("name")),
219
- else_=Column("parent") + "/" + Column("name"),
220
- )
221
- f = func.lower(node_path).op("GLOB")
222
- conds.extend(f(ipath.lower()) for ipath in ipaths)
215
+ for ipath in ipaths:
216
+ conds.append(func.lower(Column("path")).op("GLOB")(ipath.lower()))
223
217
 
224
218
  if size is not None:
225
219
  size_limit = suffix_to_number(size)
datachain/node.py CHANGED
@@ -50,8 +50,7 @@ class Node:
50
50
  sys__rand: int = -1
51
51
  vtype: str = ""
52
52
  dir_type: Optional[int] = None
53
- parent: str = ""
54
- name: str = ""
53
+ path: str = ""
55
54
  etag: str = ""
56
55
  version: Optional[str] = None
57
56
  is_latest: bool = True
@@ -62,10 +61,6 @@ class Node:
62
61
  location: Optional[str] = None
63
62
  source: StorageURI = StorageURI("")
64
63
 
65
- @property
66
- def path(self) -> str:
67
- return f"{self.parent}/{self.name}" if self.parent else self.name
68
-
69
64
  @property
70
65
  def is_dir(self) -> bool:
71
66
  return self.dir_type == DirType.DIR
@@ -107,13 +102,12 @@ class Node:
107
102
  return self.path + "/"
108
103
  return self.path
109
104
 
110
- def as_uid(self, storage: Optional[StorageURI] = None):
105
+ def as_uid(self, storage: Optional[StorageURI] = None) -> UniqueId:
111
106
  if storage is None:
112
107
  storage = self.source
113
108
  return UniqueId(
114
109
  storage=storage,
115
- parent=self.parent,
116
- name=self.name,
110
+ path=self.path,
117
111
  size=self.size,
118
112
  version=self.version or "",
119
113
  etag=self.etag,
@@ -129,20 +123,30 @@ class Node:
129
123
  return cls(**kw)
130
124
 
131
125
  @classmethod
132
- def from_dir(cls, parent, name, **kwargs) -> "Node":
133
- return cls(sys__id=-1, dir_type=DirType.DIR, parent=parent, name=name, **kwargs)
126
+ def from_dir(cls, path, **kwargs) -> "Node":
127
+ return cls(sys__id=-1, dir_type=DirType.DIR, path=path, **kwargs)
134
128
 
135
129
  @classmethod
136
130
  def root(cls) -> "Node":
137
131
  return cls(sys__id=-1, dir_type=DirType.DIR)
138
132
 
133
+ @property
134
+ def name(self):
135
+ return self.path.rsplit("/", 1)[-1]
136
+
137
+ @property
138
+ def parent(self):
139
+ split = self.path.rsplit("/", 1)
140
+ if len(split) <= 1:
141
+ return ""
142
+ return split[0]
143
+
139
144
 
140
145
  @attrs.define
141
146
  class Entry:
142
147
  vtype: str = ""
143
148
  dir_type: Optional[int] = None
144
- parent: str = ""
145
- name: str = ""
149
+ path: str = ""
146
150
  etag: str = ""
147
151
  version: str = ""
148
152
  is_latest: bool = True
@@ -157,27 +161,34 @@ class Entry:
157
161
  return self.dir_type == DirType.DIR
158
162
 
159
163
  @classmethod
160
- def from_dir(cls, parent: str, name: str, **kwargs) -> "Entry":
161
- return cls(dir_type=DirType.DIR, parent=parent, name=name, **kwargs)
164
+ def from_dir(cls, path: str, **kwargs) -> "Entry":
165
+ return cls(dir_type=DirType.DIR, path=path, **kwargs)
162
166
 
163
167
  @classmethod
164
- def from_file(cls, parent: str, name: str, **kwargs) -> "Entry":
165
- return cls(dir_type=DirType.FILE, parent=parent, name=name, **kwargs)
168
+ def from_file(cls, path: str, **kwargs) -> "Entry":
169
+ return cls(dir_type=DirType.FILE, path=path, **kwargs)
166
170
 
167
171
  @classmethod
168
172
  def root(cls):
169
173
  return cls(dir_type=DirType.DIR)
170
174
 
171
- @property
172
- def path(self) -> str:
173
- return f"{self.parent}/{self.name}" if self.parent else self.name
174
-
175
175
  @property
176
176
  def full_path(self) -> str:
177
177
  if self.is_dir and self.path:
178
178
  return self.path + "/"
179
179
  return self.path
180
180
 
181
+ @property
182
+ def name(self):
183
+ return self.path.rsplit("/", 1)[-1]
184
+
185
+ @property
186
+ def parent(self):
187
+ split = self.path.rsplit("/", 1)
188
+ if len(split) <= 1:
189
+ return ""
190
+ return split[0]
191
+
181
192
 
182
193
  def get_path(parent: str, name: str):
183
194
  return f"{parent}/{name}" if parent else name
@@ -20,8 +20,7 @@ def load_tar(raw):
20
20
  @udf(
21
21
  (
22
22
  C.source,
23
- C.name,
24
- C.parent,
23
+ C.path,
25
24
  C.size,
26
25
  C.vtype,
27
26
  C.dir_type,
@@ -37,8 +36,7 @@ def load_tar(raw):
37
36
  )
38
37
  def index_tar(
39
38
  source,
40
- name,
41
- parent,
39
+ parent_path,
42
40
  size,
43
41
  vtype,
44
42
  dir_type,
@@ -52,9 +50,8 @@ def index_tar(
52
50
  ):
53
51
  # generate original tar files as well, along with subobjects
54
52
  yield DatasetRow.create(
55
- name,
56
53
  source=source,
57
- parent=parent,
54
+ path=parent_path,
58
55
  size=size,
59
56
  vtype=vtype,
60
57
  dir_type=dir_type,
@@ -66,15 +63,12 @@ def index_tar(
66
63
  etag=etag,
67
64
  )
68
65
 
69
- parent_path = name if not parent else f"{parent}/{name}"
70
66
  for info in tar_entries:
71
67
  if info.isfile():
72
68
  full_path = f"{parent_path}/{info.name}"
73
- parent_dir, subobject_name = full_path.rsplit("/", 1)
74
69
  yield DatasetRow.create(
75
- subobject_name,
76
70
  source=source,
77
- parent=parent_dir,
71
+ path=full_path,
78
72
  size=info.size,
79
73
  vtype="tar",
80
74
  location={
@@ -83,8 +77,7 @@ def index_tar(
83
77
  "size": info.size,
84
78
  "parent": {
85
79
  "source": source,
86
- "parent": parent,
87
- "name": name,
80
+ "path": parent_path,
88
81
  "version": version,
89
82
  "size": size,
90
83
  "etag": etag,
@@ -307,7 +307,7 @@ class Subtract(DatasetDiffOperation):
307
307
  class Changed(DatasetDiffOperation):
308
308
  """
309
309
  Calculates rows that are changed in a source query compared to target query
310
- Changed means it has same source + parent + name but different last_modified
310
+ Changed means it has same source + path but different last_modified
311
311
  Example:
312
312
  >>> ds = DatasetQuery(name="dogs_cats") # some older dataset with embeddings
313
313
  >>> ds_updated = (
@@ -1526,7 +1526,7 @@ class DatasetQuery:
1526
1526
 
1527
1527
  @detach
1528
1528
  def subtract(self, dq: "DatasetQuery") -> "Self":
1529
- return self._subtract(dq, on=["source", "parent", "name"])
1529
+ return self._subtract(dq, on=["source", "path"])
1530
1530
 
1531
1531
  @detach
1532
1532
  def _subtract(self, dq: "DatasetQuery", on: Sequence[str]) -> "Self":
datachain/query/schema.py CHANGED
@@ -215,8 +215,7 @@ def normalize_param(param: UDFParamSpec) -> UDFParameter:
215
215
  class DatasetRow:
216
216
  schema: ClassVar[dict[str, type[SQLType]]] = {
217
217
  "source": String,
218
- "parent": String,
219
- "name": String,
218
+ "path": String,
220
219
  "size": Int64,
221
220
  "location": JSON,
222
221
  "vtype": String,
@@ -231,9 +230,8 @@ class DatasetRow:
231
230
 
232
231
  @staticmethod
233
232
  def create(
234
- name: str,
233
+ path: str,
235
234
  source: str = "",
236
- parent: str = "",
237
235
  size: int = 0,
238
236
  location: Optional[dict[str, Any]] = None,
239
237
  vtype: str = "",
@@ -245,7 +243,6 @@ class DatasetRow:
245
243
  version: str = "",
246
244
  etag: str = "",
247
245
  ) -> tuple[
248
- str,
249
246
  str,
250
247
  str,
251
248
  int,
@@ -267,8 +264,7 @@ class DatasetRow:
267
264
 
268
265
  return ( # type: ignore [return-value]
269
266
  source,
270
- parent,
271
- name,
267
+ path,
272
268
  size,
273
269
  location,
274
270
  vtype,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: datachain
3
- Version: 0.2.18
3
+ Version: 0.3.0
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License: Apache-2.0
@@ -1,15 +1,15 @@
1
1
  datachain/__init__.py,sha256=GeyhE-5LgfJav2OKYGaieP2lBvf2Gm-ihj7thnK9zjI,800
2
2
  datachain/__main__.py,sha256=hG3Y4ARGEqe1AWwNMd259rBlqtphx1Wk39YbueQ0yV8,91
3
3
  datachain/asyn.py,sha256=CKCFQJ0CbB3r04S7mUTXxriKzPnOvdUaVPXjM8vCtJw,7644
4
- datachain/cache.py,sha256=N6PCEFJlWRpq7f_zeBNoaURFCJFAV7ibsLJqyiMHbBg,4207
4
+ datachain/cache.py,sha256=wznC2pge6RhlPTaJfBVGjmBc6bxWCPThu4aTFMltvFU,4076
5
5
  datachain/cli.py,sha256=DbmI1sXs7-KCQz6RdLE_JAp3XO3yrTSRJ71LdUzx-XE,33099
6
6
  datachain/cli_utils.py,sha256=jrn9ejGXjybeO1ur3fjdSiAyCHZrX0qsLLbJzN9ErPM,2418
7
7
  datachain/config.py,sha256=PfC7W5yO6HFO6-iMB4YB-0RR88LPiGmD6sS_SfVbGso,1979
8
8
  datachain/dataset.py,sha256=MZezyuJWNj_3PEtzr0epPMNyWAOTrhTSPI5FmemV6L4,14470
9
9
  datachain/error.py,sha256=GY9KYTmb7GHXn2gGHV9X-PBhgwLj3i7VpK7tGHtAoGM,1279
10
10
  datachain/job.py,sha256=bk25bIqClhgRPzlXAhxpTtDeewibQe5l3S8Cf7db0gM,1229
11
- datachain/listing.py,sha256=JEhi5WOSV2LUqRQgt0-fdmJ8Zb5fNpNFzBQcuTtx63o,8555
12
- datachain/node.py,sha256=LwzSOSM9SbPLI5RvYDsiEkk7d5rbMX8huzM_m7uWKx4,5917
11
+ datachain/listing.py,sha256=keLkvPfumDA3gijeIiinH5yGWe71qCxgF5HqqP5AeH4,8299
12
+ datachain/node.py,sha256=frxZWoEvqUvk9pyXmVaeiNCs3W-xjC_sENmUD11V06Q,6006
13
13
  datachain/nodes_fetcher.py,sha256=kca19yvu11JxoVY1t4_ydp1FmchiV88GnNicNBQ9NIA,831
14
14
  datachain/nodes_thread_pool.py,sha256=ZyzBvUImIPmi4WlKC2SW2msA0UhtembbTdcs2nx29A0,3191
15
15
  datachain/progress.py,sha256=7_8FtJs770ITK9sMq-Lt4k4k18QmYl4yIG_kCoWID3o,4559
@@ -17,33 +17,33 @@ datachain/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
17
17
  datachain/storage.py,sha256=RiSJLYdHUjnrEWkLBKPcETHpAxld_B2WxLg711t0aZI,3733
18
18
  datachain/utils.py,sha256=kgH5NPj47eC_KrFTd6ZS206lKVhnJVFt5XsqkK6ppTc,12483
19
19
  datachain/catalog/__init__.py,sha256=g2iAAFx_gEIrqshXlhSEbrc8qDaEH11cjU40n3CHDz4,409
20
- datachain/catalog/catalog.py,sha256=z0tclel0kNdSzJojNRRnRVhgt-K7ElO3CeuurlwQMGI,80612
20
+ datachain/catalog/catalog.py,sha256=BJ8ZP9mleUbN5Y4CoYJ94R_tnnsA9sHdZq2RBGwVN5Y,80291
21
21
  datachain/catalog/datasource.py,sha256=D-VWIVDCM10A8sQavLhRXdYSCG7F4o4ifswEF80_NAQ,1412
22
22
  datachain/catalog/loader.py,sha256=GJ8zhEYkC7TuaPzCsjJQ4LtTdECu-wwYzC12MikPOMQ,7307
23
23
  datachain/catalog/subclass.py,sha256=B5R0qxeTYEyVAAPM1RutBPSoXZc8L5mVVZeSGXki9Sw,2096
24
24
  datachain/client/__init__.py,sha256=T4wiYL9KIM0ZZ_UqIyzV8_ufzYlewmizlV4iymHNluE,86
25
- datachain/client/azure.py,sha256=rxvF5erntGD32Y3DYK_TUCsyV2ALfuWWTnE8IWGwKEo,2542
25
+ datachain/client/azure.py,sha256=3RfDTAI_TszDy9WazHQd3bI3sS2wDFrNXfNqCDewZgE,2214
26
26
  datachain/client/fileslice.py,sha256=bT7TYco1Qe3bqoc8aUkUZcPdPofJDHlryL5BsTn9xsY,3021
27
- datachain/client/fsspec.py,sha256=F1Iyyw0iTrp2wQTFeignGtaHpm5Rg_cvbKaIzBX5aSc,13390
28
- datachain/client/gcs.py,sha256=ucX8e6JrqlFY-f80zkv084vxnKdtxpO32QJ-RG8Nv1s,4454
29
- datachain/client/local.py,sha256=NQVkLTJQ-a7Udavqbh_4uT-IejfZQYn10j22owz9sis,5150
30
- datachain/client/s3.py,sha256=TmW4f7VUM5CMZjSmgyFQFKeMUGrXt2SLoLEbLOUleiU,6296
27
+ datachain/client/fsspec.py,sha256=VrssoNenXsFxznr-Xx1haZPlXU-dr-WHdxmdbgFI_UA,13378
28
+ datachain/client/gcs.py,sha256=Mt77W_l8_fK61gLm4mmxNmENuOM0ETwxdiFp4S8d-_w,4105
29
+ datachain/client/local.py,sha256=yhC-pMKdprJ-rMGwPpBmPkdkG5riIIKkVSe6kNpyCok,5076
30
+ datachain/client/s3.py,sha256=GfRZZzNPQPRsYjoef8bbsLbanJPUlCbyGTTK8ojzp8A,6136
31
31
  datachain/data_storage/__init__.py,sha256=cEOJpyu1JDZtfUupYucCDNFI6e5Wmp_Oyzq6rZv32Y8,398
32
32
  datachain/data_storage/db_engine.py,sha256=81Ol1of9TTTzD97ORajCnP366Xz2mEJt6C-kTUCaru4,3406
33
33
  datachain/data_storage/id_generator.py,sha256=lCEoU0BM37Ai2aRpSbwo5oQT0GqZnSpYwwvizathRMQ,4292
34
34
  datachain/data_storage/job.py,sha256=w-7spowjkOa1P5fUVtJou3OltT0L48P0RYWZ9rSJ9-s,383
35
35
  datachain/data_storage/metastore.py,sha256=nxcY6nwyEmQWMAo33sNGO-FgUFQs2amBGGnZz2ftEz0,55362
36
- datachain/data_storage/schema.py,sha256=FQvt5MUMSnI5ZAE7Nthae4aaJpt8JC4nH8KiWDuhJkk,8135
36
+ datachain/data_storage/schema.py,sha256=Idi-29fckvZozzvkyz3nTR2FOIajPlSuPdIEO7SMvXM,7863
37
37
  datachain/data_storage/serializer.py,sha256=6G2YtOFqqDzJf1KbvZraKGXl2XHZyVml2krunWUum5o,927
38
38
  datachain/data_storage/sqlite.py,sha256=0r6L_a2hdGRoR_gl06v1qWhEFOS_Q31aldHyk07Yx-M,26857
39
- datachain/data_storage/warehouse.py,sha256=G79jsQwA6anYPWoiBXngwPyx-uP7yGIWqhZGc4TL5mY,33591
39
+ datachain/data_storage/warehouse.py,sha256=eEZvzYwpqwzzLXqHWjB6l4tRsIHifIr8VWI5STm53LE,33310
40
40
  datachain/lib/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
41
41
  datachain/lib/arrow.py,sha256=R8wDUDEa-5hYjI3HW9cqvOYYJpeeah5lbhFIL3gkmcE,4915
42
42
  datachain/lib/clip.py,sha256=16u4b_y2Y15nUS2UN_8ximMo6r_-_4IQpmct2ol-e-g,5730
43
43
  datachain/lib/data_model.py,sha256=qfTtQNncS5pt9SvXdMEa5kClniaT6XBGBfO7onEz2TI,1632
44
44
  datachain/lib/dataset_info.py,sha256=lONGr71ozo1DS4CQEhnpKORaU4qFb6Ketv8Xm8CVm2U,2188
45
- datachain/lib/dc.py,sha256=F2DrvBLxsLDHY7wDVzMFj_-IRscDxb_STTRMqd0gmyw,57971
46
- datachain/lib/file.py,sha256=MCklths3w9SgQTR0LACnDohfGdEc3t30XD0qNq1oTlI,12000
45
+ datachain/lib/dc.py,sha256=bU45N7vBlxSyS6bpe0ShQ1c0DpXKFVfNcFcvbBrE1Ag,58011
46
+ datachain/lib/file.py,sha256=ZHpdilDPYCob8uqtwUPtBvBNxVvQRq4AC_0IGg5m-G4,12003
47
47
  datachain/lib/image.py,sha256=TgYhRhzd4nkytfFMeykQkPyzqb5Le_-tU81unVMPn4Q,2328
48
48
  datachain/lib/meta_formats.py,sha256=jlSYWRUeDMjun_YCsQ2JxyaDJpEpokzHDPmKUAoCXnU,7034
49
49
  datachain/lib/model_store.py,sha256=c4USXsBBjrGH8VOh4seIgOiav-qHOwdoixtxfLgU63c,2409
@@ -55,7 +55,7 @@ datachain/lib/udf.py,sha256=IjuDt2B8E3xEHhcJnaK_ZhmivdrOYPXz5uf7ylpktws,11815
55
55
  datachain/lib/udf_signature.py,sha256=gMStcEeYJka5M6cg50Z9orC6y6HzCAJ3MkFqqn1fjZg,7137
56
56
  datachain/lib/utils.py,sha256=5-kJlAZE0D9nXXweAjo7-SP_AWGo28feaDByONYaooQ,463
57
57
  datachain/lib/vfile.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
58
- datachain/lib/webdataset.py,sha256=nIa6ubv94CwnATeeSdE7f_F9Zkz9LuBTfbXvFg3_-Ak,8295
58
+ datachain/lib/webdataset.py,sha256=SsjCKLSKEkHRRfeTHQhjoGqNPqIWw_SCWQcUwgUWWP0,8282
59
59
  datachain/lib/webdataset_laion.py,sha256=PQP6tQmUP7Xu9fPuAGK1JDBYA6T5UufYMUTGaxgspJA,2118
60
60
  datachain/lib/convert/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
61
61
  datachain/lib/convert/flatten.py,sha256=YMoC00BqEy3zSpvCp6Q0DfxihuPmgjUJj1g2cesWGPs,1790
@@ -65,12 +65,12 @@ datachain/lib/convert/unflatten.py,sha256=Ogvh_5wg2f38_At_1lN0D_e2uZOOpYEvwvB2xd
65
65
  datachain/lib/convert/values_to_tuples.py,sha256=aVoHWMOUGLAiS6_BBwKJqVIne91VffOW6-dWyNE7oHg,3715
66
66
  datachain/query/__init__.py,sha256=tv-spkjUCYamMN9ys_90scYrZ8kJ7C7d1MTYVmxGtk4,325
67
67
  datachain/query/batch.py,sha256=j-_ZcuQra2Ro3Wj4crtqQCg-7xuv-p84hr4QHdvT7as,3479
68
- datachain/query/builtins.py,sha256=ZKNs49t8Oa_OaboCBIEqtXZt7c1Qe9OR_C_HpoDriIU,2781
69
- datachain/query/dataset.py,sha256=-AGkz3-K_b-2YBJCMqQz-Qq7FKzMcScPty_77S0AQtE,59938
68
+ datachain/query/builtins.py,sha256=EmKPYsoQ46zwdyOn54MuCzvYFmfsBn5F8zyF7UBUfrc,2550
69
+ datachain/query/dataset.py,sha256=nfRRz6mkUz0tcD084rx-ps4PUWnZr5JQlIlRUF-PpSc,59919
70
70
  datachain/query/dispatch.py,sha256=oGX9ZuoKWPB_EyqAZD_eULcO3OejY44_keSmFS6SHT0,13315
71
71
  datachain/query/metrics.py,sha256=vsECqbZfoSDBnvC3GQlziKXmISVYDLgHP1fMPEOtKyo,640
72
72
  datachain/query/params.py,sha256=O_j89mjYRLOwWNhYZl-z7mi-rkdP7WyFmaDufsdTryE,863
73
- datachain/query/schema.py,sha256=hAvux_GxUmuG_PwtnKkkizld9f0Gvt2JBzbu3m74fvE,7840
73
+ datachain/query/schema.py,sha256=O3mTM5DRjvRAJCI7O9mR8wOdFJbgI1jIjvtfl5YvjI4,7755
74
74
  datachain/query/session.py,sha256=qTzkXgwMJdJhal3rVt3hdv3x1EXT1IHuXcwkC-Ex0As,4111
75
75
  datachain/query/udf.py,sha256=c0IOTkcedpOQEmX-Idlrrl1__1IecNXL0N9oUO9Dtkg,7755
76
76
  datachain/remote/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -92,9 +92,9 @@ datachain/sql/sqlite/base.py,sha256=LBYmXqXsVF30fbcnR55evCZHbPDCzMdGk_ogPLps63s,
92
92
  datachain/sql/sqlite/types.py,sha256=yzvp0sXSEoEYXs6zaYC_2YubarQoZH-MiUNXcpuEP4s,1573
93
93
  datachain/sql/sqlite/vector.py,sha256=ncW4eu2FlJhrP_CIpsvtkUabZlQdl2D5Lgwy_cbfqR0,469
94
94
  datachain/torch/__init__.py,sha256=gIS74PoEPy4TB3X6vx9nLO0Y3sLJzsA8ckn8pRWihJM,579
95
- datachain-0.2.18.dist-info/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
96
- datachain-0.2.18.dist-info/METADATA,sha256=_wZgyu8nS5Ut_kQcIc_n9979rQcvv8fPuSIHbyCGhX0,17269
97
- datachain-0.2.18.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
98
- datachain-0.2.18.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
99
- datachain-0.2.18.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
100
- datachain-0.2.18.dist-info/RECORD,,
95
+ datachain-0.3.0.dist-info/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
96
+ datachain-0.3.0.dist-info/METADATA,sha256=x0jqtxoQE9ynjAAKFeyrz0rvyuv_E2e0D6UuhU3Yu_I,17268
97
+ datachain-0.3.0.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
98
+ datachain-0.3.0.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
99
+ datachain-0.3.0.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
100
+ datachain-0.3.0.dist-info/RECORD,,