datachain 0.24.1__py3-none-any.whl → 0.24.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datachain might be problematic. Click here for more details.
- datachain/data_storage/warehouse.py +6 -4
- datachain/query/dataset.py +11 -10
- {datachain-0.24.1.dist-info → datachain-0.24.2.dist-info}/METADATA +1 -1
- {datachain-0.24.1.dist-info → datachain-0.24.2.dist-info}/RECORD +8 -8
- {datachain-0.24.1.dist-info → datachain-0.24.2.dist-info}/WHEEL +0 -0
- {datachain-0.24.1.dist-info → datachain-0.24.2.dist-info}/entry_points.txt +0 -0
- {datachain-0.24.1.dist-info → datachain-0.24.2.dist-info}/licenses/LICENSE +0 -0
- {datachain-0.24.1.dist-info → datachain-0.24.2.dist-info}/top_level.txt +0 -0
|
@@ -218,7 +218,7 @@ class AbstractWarehouse(ABC, Serializable):
|
|
|
218
218
|
limit = query._limit
|
|
219
219
|
paginated_query = query.limit(page_size)
|
|
220
220
|
|
|
221
|
-
offset = 0
|
|
221
|
+
offset = query._offset or 0
|
|
222
222
|
num_yielded = 0
|
|
223
223
|
|
|
224
224
|
# Ensure we're using a thread-local connection
|
|
@@ -234,13 +234,13 @@ class AbstractWarehouse(ABC, Serializable):
|
|
|
234
234
|
# Cursor results are not thread-safe, so we convert them to a list
|
|
235
235
|
results = list(wh.dataset_rows_select(paginated_query.offset(offset)))
|
|
236
236
|
|
|
237
|
-
processed =
|
|
237
|
+
processed = 0
|
|
238
238
|
for row in results:
|
|
239
|
-
processed
|
|
239
|
+
processed += 1
|
|
240
240
|
yield row
|
|
241
241
|
num_yielded += 1
|
|
242
242
|
|
|
243
|
-
if
|
|
243
|
+
if processed < page_size:
|
|
244
244
|
break # no more results
|
|
245
245
|
offset += page_size
|
|
246
246
|
|
|
@@ -343,6 +343,8 @@ class AbstractWarehouse(ABC, Serializable):
|
|
|
343
343
|
if (id_col := get_query_id_column(query)) is None:
|
|
344
344
|
raise RuntimeError("sys__id column not found in query")
|
|
345
345
|
|
|
346
|
+
query = query._clone().offset(None).limit(None).order_by(None)
|
|
347
|
+
|
|
346
348
|
if is_batched:
|
|
347
349
|
for batch in ids:
|
|
348
350
|
yield list(self.dataset_rows_select(query.where(id_col.in_(batch))))
|
datachain/query/dataset.py
CHANGED
|
@@ -11,6 +11,7 @@ from collections.abc import Generator, Iterable, Iterator, Sequence
|
|
|
11
11
|
from copy import copy
|
|
12
12
|
from functools import wraps
|
|
13
13
|
from secrets import token_hex
|
|
14
|
+
from types import GeneratorType
|
|
14
15
|
from typing import (
|
|
15
16
|
TYPE_CHECKING,
|
|
16
17
|
Any,
|
|
@@ -557,8 +558,8 @@ class UDFStep(Step, ABC):
|
|
|
557
558
|
"""
|
|
558
559
|
assert self.partition_by is not None
|
|
559
560
|
|
|
560
|
-
if isinstance(self.partition_by,
|
|
561
|
-
list_partition_by = self.partition_by
|
|
561
|
+
if isinstance(self.partition_by, (list, tuple, GeneratorType)):
|
|
562
|
+
list_partition_by = list(self.partition_by)
|
|
562
563
|
else:
|
|
563
564
|
list_partition_by = [self.partition_by]
|
|
564
565
|
|
|
@@ -575,7 +576,10 @@ class UDFStep(Step, ABC):
|
|
|
575
576
|
f.dense_rank().over(order_by=partition_by).label(PARTITION_COLUMN_ID),
|
|
576
577
|
]
|
|
577
578
|
self.catalog.warehouse.db.execute(
|
|
578
|
-
tbl.insert().from_select(
|
|
579
|
+
tbl.insert().from_select(
|
|
580
|
+
cols,
|
|
581
|
+
query.offset(None).limit(None).with_only_columns(*cols),
|
|
582
|
+
)
|
|
579
583
|
)
|
|
580
584
|
|
|
581
585
|
return tbl
|
|
@@ -601,13 +605,10 @@ class UDFStep(Step, ABC):
|
|
|
601
605
|
if self.partition_by is not None:
|
|
602
606
|
partition_tbl = self.create_partitions_table(query)
|
|
603
607
|
temp_tables.append(partition_tbl.name)
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
.outerjoin(partition_tbl, partition_tbl.c.sys__id == subq.c.sys__id)
|
|
609
|
-
.add_columns(*partition_columns())
|
|
610
|
-
)
|
|
608
|
+
query = query.outerjoin(
|
|
609
|
+
partition_tbl,
|
|
610
|
+
partition_tbl.c.sys__id == query.selected_columns.sys__id,
|
|
611
|
+
).add_columns(*partition_columns())
|
|
611
612
|
|
|
612
613
|
query, tables = self.process_input_query(query)
|
|
613
614
|
temp_tables.extend(t.name for t in tables)
|
|
@@ -53,7 +53,7 @@ datachain/data_storage/metastore.py,sha256=9mWYOKK3AoHeKPGFm-WBfPrmnYHhwYeXx5MOu
|
|
|
53
53
|
datachain/data_storage/schema.py,sha256=o3JbURKXRg3IJyIVA4QjHHkn6byRuz7avbydU2FlvNY,9897
|
|
54
54
|
datachain/data_storage/serializer.py,sha256=6G2YtOFqqDzJf1KbvZraKGXl2XHZyVml2krunWUum5o,927
|
|
55
55
|
datachain/data_storage/sqlite.py,sha256=tT_soVi6l_pFSKaDktA1t4qW_vmPvXnvYSf4TZTKZYk,30067
|
|
56
|
-
datachain/data_storage/warehouse.py,sha256=
|
|
56
|
+
datachain/data_storage/warehouse.py,sha256=2Bp2fXfcm-acwYjDWqVzGjoIQSAR4L56GPNtPcaT2gU,32418
|
|
57
57
|
datachain/diff/__init__.py,sha256=-OFZzgOplqO84iWgGY7kfe60NXaWR9JRIh9T-uJboAM,9668
|
|
58
58
|
datachain/fs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
59
59
|
datachain/fs/reference.py,sha256=A8McpXF0CqbXPqanXuvpKu50YLB3a2ZXA3YAPxtBXSM,914
|
|
@@ -125,7 +125,7 @@ datachain/model/ultralytics/pose.py,sha256=pBlmt63Qe68FKmexHimUGlNbNOoOlMHXG4fzX
|
|
|
125
125
|
datachain/model/ultralytics/segment.py,sha256=63bDCj43E6iZ0hFI5J6uQfksdCmjEp6sEm1XzVaE8pw,2986
|
|
126
126
|
datachain/query/__init__.py,sha256=7DhEIjAA8uZJfejruAVMZVcGFmvUpffuZJwgRqNwe-c,263
|
|
127
127
|
datachain/query/batch.py,sha256=-goxLpE0EUvaDHu66rstj53UnfHpYfBUGux8GSpJ93k,4306
|
|
128
|
-
datachain/query/dataset.py,sha256=
|
|
128
|
+
datachain/query/dataset.py,sha256=mKee4PkQHYPT96utPjM1DocURU4TghAR7AHtYkzdqwY,61292
|
|
129
129
|
datachain/query/dispatch.py,sha256=A0nPxn6mEN5d9dDo6S8m16Ji_9IvJLXrgF2kqXdi4fs,15546
|
|
130
130
|
datachain/query/metrics.py,sha256=DOK5HdNVaRugYPjl8qnBONvTkwjMloLqAr7Mi3TjCO0,858
|
|
131
131
|
datachain/query/params.py,sha256=O_j89mjYRLOwWNhYZl-z7mi-rkdP7WyFmaDufsdTryE,863
|
|
@@ -157,9 +157,9 @@ datachain/sql/sqlite/vector.py,sha256=ncW4eu2FlJhrP_CIpsvtkUabZlQdl2D5Lgwy_cbfqR
|
|
|
157
157
|
datachain/toolkit/__init__.py,sha256=eQ58Q5Yf_Fgv1ZG0IO5dpB4jmP90rk8YxUWmPc1M2Bo,68
|
|
158
158
|
datachain/toolkit/split.py,sha256=ktGWzY4kyzjWyR86dhvzw-Zhl0lVk_LOX3NciTac6qo,2914
|
|
159
159
|
datachain/torch/__init__.py,sha256=gIS74PoEPy4TB3X6vx9nLO0Y3sLJzsA8ckn8pRWihJM,579
|
|
160
|
-
datachain-0.24.
|
|
161
|
-
datachain-0.24.
|
|
162
|
-
datachain-0.24.
|
|
163
|
-
datachain-0.24.
|
|
164
|
-
datachain-0.24.
|
|
165
|
-
datachain-0.24.
|
|
160
|
+
datachain-0.24.2.dist-info/licenses/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
|
|
161
|
+
datachain-0.24.2.dist-info/METADATA,sha256=66Dz51BXpod8ZZG-pcmodALbszVZjNJXykMIrYRwXdA,13281
|
|
162
|
+
datachain-0.24.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
163
|
+
datachain-0.24.2.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
|
|
164
|
+
datachain-0.24.2.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
|
|
165
|
+
datachain-0.24.2.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|