datachain 0.24.2__py3-none-any.whl → 0.24.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datachain might be problematic. Click here for more details.

@@ -1098,9 +1098,18 @@ class Catalog:
1098
1098
  ) -> DatasetRecord:
1099
1099
  from datachain.lib.listing import is_listing_dataset
1100
1100
 
1101
+ project = project or self.metastore.default_project
1102
+
1101
1103
  if is_listing_dataset(name):
1102
1104
  project = self.metastore.listing_project
1103
- return self.metastore.get_dataset(name, project.id if project else None)
1105
+
1106
+ try:
1107
+ return self.metastore.get_dataset(name, project.id if project else None)
1108
+ except DatasetNotFoundError:
1109
+ raise DatasetNotFoundError(
1110
+ f"Dataset {name} not found in namespace {project.namespace.name}"
1111
+ f" and project {project.name}"
1112
+ ) from None
1104
1113
 
1105
1114
  def get_dataset_with_remote_fallback(
1106
1115
  self,
@@ -1124,7 +1133,7 @@ class Catalog:
1124
1133
  raise DatasetNotFoundError(
1125
1134
  f"Dataset {name}"
1126
1135
  + (f" version {version} " if version else " ")
1127
- + "not found"
1136
+ + f"not found in namespace {namespace_name} and project {project_name}"
1128
1137
  )
1129
1138
 
1130
1139
  if pull_dataset:
@@ -1194,14 +1194,16 @@ class AbstractDBMetastore(AbstractMetastore):
1194
1194
  Gets a single dataset in project by dataset name.
1195
1195
  """
1196
1196
  project_id = project_id or self.default_project.id
1197
+
1197
1198
  d = self._datasets
1198
1199
  query = self._base_dataset_query()
1199
1200
  query = query.where(d.c.name == name, d.c.project_id == project_id) # type: ignore [attr-defined]
1200
1201
  ds = self._parse_dataset(self.db.execute(query, conn=conn))
1201
1202
  if not ds:
1202
1203
  raise DatasetNotFoundError(
1203
- f"Dataset {name} not found in project {project_id}"
1204
+ f"Dataset {name} not found in project with id {project_id}"
1204
1205
  )
1206
+
1205
1207
  return ds
1206
1208
 
1207
1209
  def remove_dataset_version(
@@ -774,7 +774,15 @@ class SQLiteWarehouse(AbstractWarehouse):
774
774
  query: Select,
775
775
  progress_cb: Optional[Callable[[int], None]] = None,
776
776
  ) -> None:
777
- if len(query._group_by_clause) > 0:
777
+ col_id = (
778
+ query.selected_columns.sys__id
779
+ if "sys__id" in query.selected_columns
780
+ else None
781
+ )
782
+
783
+ # If there is no sys__id column, we cannot copy the table in batches,
784
+ # and we need to copy all rows at once. Same if there is a group by clause.
785
+ if col_id is None or len(query._group_by_clause) > 0:
778
786
  select_q = query.with_only_columns(
779
787
  *[c for c in query.selected_columns if c.name != "sys__id"]
780
788
  )
@@ -782,12 +790,7 @@ class SQLiteWarehouse(AbstractWarehouse):
782
790
  self.db.execute(q)
783
791
  return
784
792
 
785
- if "sys__id" in query.selected_columns:
786
- col_id = query.selected_columns.sys__id
787
- else:
788
- col_id = sqlalchemy.column("sys__id")
789
793
  select_ids = query.with_only_columns(col_id)
790
-
791
794
  ids = self.db.execute(select_ids).fetchall()
792
795
 
793
796
  select_q = (
datachain/listing.py CHANGED
@@ -65,10 +65,17 @@ class Listing:
65
65
 
66
66
  @cached_property
67
67
  def dataset(self) -> "DatasetRecord":
68
+ from datachain.error import DatasetNotFoundError
69
+
68
70
  assert self.dataset_name
69
- return self.metastore.get_dataset(
70
- self.dataset_name, self.metastore.listing_project.id
71
- )
71
+ project = self.metastore.listing_project
72
+ try:
73
+ return self.metastore.get_dataset(self.dataset_name, project.id)
74
+ except DatasetNotFoundError:
75
+ raise DatasetNotFoundError(
76
+ f"Dataset {self.dataset_name} not found in namespace"
77
+ f" {project.namespace.name} and project {project.name}"
78
+ ) from None
72
79
 
73
80
  @cached_property
74
81
  def dataset_rows(self):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datachain
3
- Version: 0.24.2
3
+ Version: 0.24.3
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License-Expression: Apache-2.0
@@ -7,7 +7,7 @@ datachain/dataset.py,sha256=wDrukmkDnYP0X8bAGY-7O1NDE3DWCFqrH8VVDpXM9Ok,25263
7
7
  datachain/delta.py,sha256=fTEhCedseUsHuH_Ek52NXFhFPyFD_6MioEH5sCilNgo,9897
8
8
  datachain/error.py,sha256=OWwWMkzZYJrkcoEDGhJHMf7SfKvxcsOLRF94mjPf29I,1609
9
9
  datachain/job.py,sha256=x5PB6d5sqx00hePNNkirESlOVAvnmkEM5ygUgQmAhsk,1262
10
- datachain/listing.py,sha256=T4bCgdCRuFW7bsPUG2PSl5om2nfJL6fzB84m7mCO8cA,7136
10
+ datachain/listing.py,sha256=GuUlj3oYJQOIbLYr3IqNCSeuiU5pfOtRAQQl9uBEFQU,7458
11
11
  datachain/namespace.py,sha256=r7YwpXFc7LdMS2jhyLMTrAHffALFG2bAIiYUSuZa5Rc,1786
12
12
  datachain/node.py,sha256=KWDT0ClYXB7FYI-QOvzAa-UDkLJErUI2eWm5FBteYuU,5577
13
13
  datachain/nodes_fetcher.py,sha256=_wgaKyqEjkqdwJ_Hj6D8vUYz7hnU7g6xhm0H6ZnYxmE,1095
@@ -21,7 +21,7 @@ datachain/studio.py,sha256=bLok-eJNFRHQScEyAyA_Fas52dmijd5r-73KudWxV4k,13337
21
21
  datachain/telemetry.py,sha256=0A4IOPPp9VlP5pyW9eBfaTK3YhHGzHl7dQudQjUAx9A,994
22
22
  datachain/utils.py,sha256=DNqOi-Ydb7InyWvD9m7_yailxz6-YGpZzh00biQaHNo,15305
23
23
  datachain/catalog/__init__.py,sha256=cMZzSz3VoUi-6qXSVaHYN-agxQuAcz2XSqnEPZ55crE,353
24
- datachain/catalog/catalog.py,sha256=z4GbRMHeW0YA20Sjh7QuPy1Rj4RkX547WN9Pp5wAD6o,65277
24
+ datachain/catalog/catalog.py,sha256=7N4QmG6mpj8WaWcJSp7LijFQlVHbz_B6DLUnMKXmx6k,65644
25
25
  datachain/catalog/datasource.py,sha256=IkGMh0Ttg6Q-9DWfU_H05WUnZepbGa28HYleECi6K7I,1353
26
26
  datachain/catalog/loader.py,sha256=UXjYD6BNRoupPvkiz3-b04jepXhtLHCA4gzKFnXxOtQ,5987
27
27
  datachain/cli/__init__.py,sha256=WvBqnwjG8Wp9xGCn-4eqfoZ3n7Sj1HJemCi4MayJh_c,8221
@@ -49,10 +49,10 @@ datachain/client/s3.py,sha256=6DNVGLg-woPS1DVlYVX2rIlunNblsuxyOnI1rSzhW3k,7515
49
49
  datachain/data_storage/__init__.py,sha256=9Wit-oe5P46V7CJQTD0BJ5MhOa2Y9h3ddJ4VWTe-Lec,273
50
50
  datachain/data_storage/db_engine.py,sha256=n8ojCbvVMPY2e3SG8fUaaD0b9GkVfpl_Naa_6EiHfWg,3788
51
51
  datachain/data_storage/job.py,sha256=9r0OGwh22bHNIvLHqg8_-eJSP1YYB-BN5HOla5TdCxw,402
52
- datachain/data_storage/metastore.py,sha256=9mWYOKK3AoHeKPGFm-WBfPrmnYHhwYeXx5MOueKTe7I,52657
52
+ datachain/data_storage/metastore.py,sha256=af7TsMHbANXmWKIu-LzQmsZpNxM6-hhzZfInWx7MQXI,52667
53
53
  datachain/data_storage/schema.py,sha256=o3JbURKXRg3IJyIVA4QjHHkn6byRuz7avbydU2FlvNY,9897
54
54
  datachain/data_storage/serializer.py,sha256=6G2YtOFqqDzJf1KbvZraKGXl2XHZyVml2krunWUum5o,927
55
- datachain/data_storage/sqlite.py,sha256=tT_soVi6l_pFSKaDktA1t4qW_vmPvXnvYSf4TZTKZYk,30067
55
+ datachain/data_storage/sqlite.py,sha256=TTQjdDXUaZSr3MEaxZjDhsVIkIJqxFNA-sD25TO3m_4,30228
56
56
  datachain/data_storage/warehouse.py,sha256=2Bp2fXfcm-acwYjDWqVzGjoIQSAR4L56GPNtPcaT2gU,32418
57
57
  datachain/diff/__init__.py,sha256=-OFZzgOplqO84iWgGY7kfe60NXaWR9JRIh9T-uJboAM,9668
58
58
  datachain/fs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -157,9 +157,9 @@ datachain/sql/sqlite/vector.py,sha256=ncW4eu2FlJhrP_CIpsvtkUabZlQdl2D5Lgwy_cbfqR
157
157
  datachain/toolkit/__init__.py,sha256=eQ58Q5Yf_Fgv1ZG0IO5dpB4jmP90rk8YxUWmPc1M2Bo,68
158
158
  datachain/toolkit/split.py,sha256=ktGWzY4kyzjWyR86dhvzw-Zhl0lVk_LOX3NciTac6qo,2914
159
159
  datachain/torch/__init__.py,sha256=gIS74PoEPy4TB3X6vx9nLO0Y3sLJzsA8ckn8pRWihJM,579
160
- datachain-0.24.2.dist-info/licenses/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
161
- datachain-0.24.2.dist-info/METADATA,sha256=66Dz51BXpod8ZZG-pcmodALbszVZjNJXykMIrYRwXdA,13281
162
- datachain-0.24.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
163
- datachain-0.24.2.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
164
- datachain-0.24.2.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
165
- datachain-0.24.2.dist-info/RECORD,,
160
+ datachain-0.24.3.dist-info/licenses/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
161
+ datachain-0.24.3.dist-info/METADATA,sha256=qlqP_kAxEtkEJvV-ZzEPikT2OwW9S7Yqx4ly-iIOMBY,13281
162
+ datachain-0.24.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
163
+ datachain-0.24.3.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
164
+ datachain-0.24.3.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
165
+ datachain-0.24.3.dist-info/RECORD,,