datachain 0.24.2__py3-none-any.whl → 0.24.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datachain might be problematic. Click here for more details.
- datachain/catalog/catalog.py +11 -2
- datachain/data_storage/metastore.py +3 -1
- datachain/data_storage/sqlite.py +9 -6
- datachain/listing.py +10 -3
- {datachain-0.24.2.dist-info → datachain-0.24.3.dist-info}/METADATA +1 -1
- {datachain-0.24.2.dist-info → datachain-0.24.3.dist-info}/RECORD +10 -10
- {datachain-0.24.2.dist-info → datachain-0.24.3.dist-info}/WHEEL +0 -0
- {datachain-0.24.2.dist-info → datachain-0.24.3.dist-info}/entry_points.txt +0 -0
- {datachain-0.24.2.dist-info → datachain-0.24.3.dist-info}/licenses/LICENSE +0 -0
- {datachain-0.24.2.dist-info → datachain-0.24.3.dist-info}/top_level.txt +0 -0
datachain/catalog/catalog.py
CHANGED
|
@@ -1098,9 +1098,18 @@ class Catalog:
|
|
|
1098
1098
|
) -> DatasetRecord:
|
|
1099
1099
|
from datachain.lib.listing import is_listing_dataset
|
|
1100
1100
|
|
|
1101
|
+
project = project or self.metastore.default_project
|
|
1102
|
+
|
|
1101
1103
|
if is_listing_dataset(name):
|
|
1102
1104
|
project = self.metastore.listing_project
|
|
1103
|
-
|
|
1105
|
+
|
|
1106
|
+
try:
|
|
1107
|
+
return self.metastore.get_dataset(name, project.id if project else None)
|
|
1108
|
+
except DatasetNotFoundError:
|
|
1109
|
+
raise DatasetNotFoundError(
|
|
1110
|
+
f"Dataset {name} not found in namespace {project.namespace.name}"
|
|
1111
|
+
f" and project {project.name}"
|
|
1112
|
+
) from None
|
|
1104
1113
|
|
|
1105
1114
|
def get_dataset_with_remote_fallback(
|
|
1106
1115
|
self,
|
|
@@ -1124,7 +1133,7 @@ class Catalog:
|
|
|
1124
1133
|
raise DatasetNotFoundError(
|
|
1125
1134
|
f"Dataset {name}"
|
|
1126
1135
|
+ (f" version {version} " if version else " ")
|
|
1127
|
-
+ "not found"
|
|
1136
|
+
+ f"not found in namespace {namespace_name} and project {project_name}"
|
|
1128
1137
|
)
|
|
1129
1138
|
|
|
1130
1139
|
if pull_dataset:
|
|
@@ -1194,14 +1194,16 @@ class AbstractDBMetastore(AbstractMetastore):
|
|
|
1194
1194
|
Gets a single dataset in project by dataset name.
|
|
1195
1195
|
"""
|
|
1196
1196
|
project_id = project_id or self.default_project.id
|
|
1197
|
+
|
|
1197
1198
|
d = self._datasets
|
|
1198
1199
|
query = self._base_dataset_query()
|
|
1199
1200
|
query = query.where(d.c.name == name, d.c.project_id == project_id) # type: ignore [attr-defined]
|
|
1200
1201
|
ds = self._parse_dataset(self.db.execute(query, conn=conn))
|
|
1201
1202
|
if not ds:
|
|
1202
1203
|
raise DatasetNotFoundError(
|
|
1203
|
-
f"Dataset {name} not found in project {project_id}"
|
|
1204
|
+
f"Dataset {name} not found in project with id {project_id}"
|
|
1204
1205
|
)
|
|
1206
|
+
|
|
1205
1207
|
return ds
|
|
1206
1208
|
|
|
1207
1209
|
def remove_dataset_version(
|
datachain/data_storage/sqlite.py
CHANGED
|
@@ -774,7 +774,15 @@ class SQLiteWarehouse(AbstractWarehouse):
|
|
|
774
774
|
query: Select,
|
|
775
775
|
progress_cb: Optional[Callable[[int], None]] = None,
|
|
776
776
|
) -> None:
|
|
777
|
-
|
|
777
|
+
col_id = (
|
|
778
|
+
query.selected_columns.sys__id
|
|
779
|
+
if "sys__id" in query.selected_columns
|
|
780
|
+
else None
|
|
781
|
+
)
|
|
782
|
+
|
|
783
|
+
# If there is no sys__id column, we cannot copy the table in batches,
|
|
784
|
+
# and we need to copy all rows at once. Same if there is a group by clause.
|
|
785
|
+
if col_id is None or len(query._group_by_clause) > 0:
|
|
778
786
|
select_q = query.with_only_columns(
|
|
779
787
|
*[c for c in query.selected_columns if c.name != "sys__id"]
|
|
780
788
|
)
|
|
@@ -782,12 +790,7 @@ class SQLiteWarehouse(AbstractWarehouse):
|
|
|
782
790
|
self.db.execute(q)
|
|
783
791
|
return
|
|
784
792
|
|
|
785
|
-
if "sys__id" in query.selected_columns:
|
|
786
|
-
col_id = query.selected_columns.sys__id
|
|
787
|
-
else:
|
|
788
|
-
col_id = sqlalchemy.column("sys__id")
|
|
789
793
|
select_ids = query.with_only_columns(col_id)
|
|
790
|
-
|
|
791
794
|
ids = self.db.execute(select_ids).fetchall()
|
|
792
795
|
|
|
793
796
|
select_q = (
|
datachain/listing.py
CHANGED
|
@@ -65,10 +65,17 @@ class Listing:
|
|
|
65
65
|
|
|
66
66
|
@cached_property
|
|
67
67
|
def dataset(self) -> "DatasetRecord":
|
|
68
|
+
from datachain.error import DatasetNotFoundError
|
|
69
|
+
|
|
68
70
|
assert self.dataset_name
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
71
|
+
project = self.metastore.listing_project
|
|
72
|
+
try:
|
|
73
|
+
return self.metastore.get_dataset(self.dataset_name, project.id)
|
|
74
|
+
except DatasetNotFoundError:
|
|
75
|
+
raise DatasetNotFoundError(
|
|
76
|
+
f"Dataset {self.dataset_name} not found in namespace"
|
|
77
|
+
f" {project.namespace.name} and project {project.name}"
|
|
78
|
+
) from None
|
|
72
79
|
|
|
73
80
|
@cached_property
|
|
74
81
|
def dataset_rows(self):
|
|
@@ -7,7 +7,7 @@ datachain/dataset.py,sha256=wDrukmkDnYP0X8bAGY-7O1NDE3DWCFqrH8VVDpXM9Ok,25263
|
|
|
7
7
|
datachain/delta.py,sha256=fTEhCedseUsHuH_Ek52NXFhFPyFD_6MioEH5sCilNgo,9897
|
|
8
8
|
datachain/error.py,sha256=OWwWMkzZYJrkcoEDGhJHMf7SfKvxcsOLRF94mjPf29I,1609
|
|
9
9
|
datachain/job.py,sha256=x5PB6d5sqx00hePNNkirESlOVAvnmkEM5ygUgQmAhsk,1262
|
|
10
|
-
datachain/listing.py,sha256=
|
|
10
|
+
datachain/listing.py,sha256=GuUlj3oYJQOIbLYr3IqNCSeuiU5pfOtRAQQl9uBEFQU,7458
|
|
11
11
|
datachain/namespace.py,sha256=r7YwpXFc7LdMS2jhyLMTrAHffALFG2bAIiYUSuZa5Rc,1786
|
|
12
12
|
datachain/node.py,sha256=KWDT0ClYXB7FYI-QOvzAa-UDkLJErUI2eWm5FBteYuU,5577
|
|
13
13
|
datachain/nodes_fetcher.py,sha256=_wgaKyqEjkqdwJ_Hj6D8vUYz7hnU7g6xhm0H6ZnYxmE,1095
|
|
@@ -21,7 +21,7 @@ datachain/studio.py,sha256=bLok-eJNFRHQScEyAyA_Fas52dmijd5r-73KudWxV4k,13337
|
|
|
21
21
|
datachain/telemetry.py,sha256=0A4IOPPp9VlP5pyW9eBfaTK3YhHGzHl7dQudQjUAx9A,994
|
|
22
22
|
datachain/utils.py,sha256=DNqOi-Ydb7InyWvD9m7_yailxz6-YGpZzh00biQaHNo,15305
|
|
23
23
|
datachain/catalog/__init__.py,sha256=cMZzSz3VoUi-6qXSVaHYN-agxQuAcz2XSqnEPZ55crE,353
|
|
24
|
-
datachain/catalog/catalog.py,sha256=
|
|
24
|
+
datachain/catalog/catalog.py,sha256=7N4QmG6mpj8WaWcJSp7LijFQlVHbz_B6DLUnMKXmx6k,65644
|
|
25
25
|
datachain/catalog/datasource.py,sha256=IkGMh0Ttg6Q-9DWfU_H05WUnZepbGa28HYleECi6K7I,1353
|
|
26
26
|
datachain/catalog/loader.py,sha256=UXjYD6BNRoupPvkiz3-b04jepXhtLHCA4gzKFnXxOtQ,5987
|
|
27
27
|
datachain/cli/__init__.py,sha256=WvBqnwjG8Wp9xGCn-4eqfoZ3n7Sj1HJemCi4MayJh_c,8221
|
|
@@ -49,10 +49,10 @@ datachain/client/s3.py,sha256=6DNVGLg-woPS1DVlYVX2rIlunNblsuxyOnI1rSzhW3k,7515
|
|
|
49
49
|
datachain/data_storage/__init__.py,sha256=9Wit-oe5P46V7CJQTD0BJ5MhOa2Y9h3ddJ4VWTe-Lec,273
|
|
50
50
|
datachain/data_storage/db_engine.py,sha256=n8ojCbvVMPY2e3SG8fUaaD0b9GkVfpl_Naa_6EiHfWg,3788
|
|
51
51
|
datachain/data_storage/job.py,sha256=9r0OGwh22bHNIvLHqg8_-eJSP1YYB-BN5HOla5TdCxw,402
|
|
52
|
-
datachain/data_storage/metastore.py,sha256=
|
|
52
|
+
datachain/data_storage/metastore.py,sha256=af7TsMHbANXmWKIu-LzQmsZpNxM6-hhzZfInWx7MQXI,52667
|
|
53
53
|
datachain/data_storage/schema.py,sha256=o3JbURKXRg3IJyIVA4QjHHkn6byRuz7avbydU2FlvNY,9897
|
|
54
54
|
datachain/data_storage/serializer.py,sha256=6G2YtOFqqDzJf1KbvZraKGXl2XHZyVml2krunWUum5o,927
|
|
55
|
-
datachain/data_storage/sqlite.py,sha256=
|
|
55
|
+
datachain/data_storage/sqlite.py,sha256=TTQjdDXUaZSr3MEaxZjDhsVIkIJqxFNA-sD25TO3m_4,30228
|
|
56
56
|
datachain/data_storage/warehouse.py,sha256=2Bp2fXfcm-acwYjDWqVzGjoIQSAR4L56GPNtPcaT2gU,32418
|
|
57
57
|
datachain/diff/__init__.py,sha256=-OFZzgOplqO84iWgGY7kfe60NXaWR9JRIh9T-uJboAM,9668
|
|
58
58
|
datachain/fs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -157,9 +157,9 @@ datachain/sql/sqlite/vector.py,sha256=ncW4eu2FlJhrP_CIpsvtkUabZlQdl2D5Lgwy_cbfqR
|
|
|
157
157
|
datachain/toolkit/__init__.py,sha256=eQ58Q5Yf_Fgv1ZG0IO5dpB4jmP90rk8YxUWmPc1M2Bo,68
|
|
158
158
|
datachain/toolkit/split.py,sha256=ktGWzY4kyzjWyR86dhvzw-Zhl0lVk_LOX3NciTac6qo,2914
|
|
159
159
|
datachain/torch/__init__.py,sha256=gIS74PoEPy4TB3X6vx9nLO0Y3sLJzsA8ckn8pRWihJM,579
|
|
160
|
-
datachain-0.24.
|
|
161
|
-
datachain-0.24.
|
|
162
|
-
datachain-0.24.
|
|
163
|
-
datachain-0.24.
|
|
164
|
-
datachain-0.24.
|
|
165
|
-
datachain-0.24.
|
|
160
|
+
datachain-0.24.3.dist-info/licenses/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
|
|
161
|
+
datachain-0.24.3.dist-info/METADATA,sha256=qlqP_kAxEtkEJvV-ZzEPikT2OwW9S7Yqx4ly-iIOMBY,13281
|
|
162
|
+
datachain-0.24.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
163
|
+
datachain-0.24.3.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
|
|
164
|
+
datachain-0.24.3.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
|
|
165
|
+
datachain-0.24.3.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|