datachain 0.2.15__py3-none-any.whl → 0.2.16__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datachain might be problematic. Click here for more details.
- datachain/catalog/catalog.py +5 -7
- datachain/cli.py +1 -1
- datachain/data_storage/metastore.py +2 -2
- datachain/data_storage/warehouse.py +4 -6
- datachain/lib/dc.py +8 -20
- datachain/lib/signal_schema.py +4 -1
- datachain/query/dataset.py +5 -2
- {datachain-0.2.15.dist-info → datachain-0.2.16.dist-info}/METADATA +1 -1
- {datachain-0.2.15.dist-info → datachain-0.2.16.dist-info}/RECORD +13 -13
- {datachain-0.2.15.dist-info → datachain-0.2.16.dist-info}/LICENSE +0 -0
- {datachain-0.2.15.dist-info → datachain-0.2.16.dist-info}/WHEEL +0 -0
- {datachain-0.2.15.dist-info → datachain-0.2.16.dist-info}/entry_points.txt +0 -0
- {datachain-0.2.15.dist-info → datachain-0.2.16.dist-info}/top_level.txt +0 -0
datachain/catalog/catalog.py
CHANGED
|
@@ -1217,16 +1217,14 @@ class Catalog:
|
|
|
1217
1217
|
def get_temp_table_names(self) -> list[str]:
|
|
1218
1218
|
return self.warehouse.get_temp_table_names()
|
|
1219
1219
|
|
|
1220
|
-
def
|
|
1220
|
+
def cleanup_tables(self, names: Iterable[str]) -> None:
|
|
1221
1221
|
"""
|
|
1222
|
-
Drop tables
|
|
1222
|
+
Drop tables passed.
|
|
1223
1223
|
|
|
1224
|
-
This should be implemented
|
|
1225
|
-
|
|
1226
|
-
needed. When running the same `DatasetQuery` multiple times we
|
|
1227
|
-
may use the same temporary table names.
|
|
1224
|
+
This should be implemented to ensure that the provided tables
|
|
1225
|
+
are cleaned up as soon as they are no longer needed.
|
|
1228
1226
|
"""
|
|
1229
|
-
self.warehouse.
|
|
1227
|
+
self.warehouse.cleanup_tables(names)
|
|
1230
1228
|
self.id_generator.delete_uris(names)
|
|
1231
1229
|
|
|
1232
1230
|
def create_dataset_from_sources(
|
datachain/cli.py
CHANGED
|
@@ -910,7 +910,7 @@ def garbage_collect(catalog: "Catalog"):
|
|
|
910
910
|
print("Nothing to clean up.")
|
|
911
911
|
else:
|
|
912
912
|
print(f"Garbage collecting {len(temp_tables)} tables.")
|
|
913
|
-
catalog.
|
|
913
|
+
catalog.cleanup_tables(temp_tables)
|
|
914
914
|
|
|
915
915
|
|
|
916
916
|
def completion(shell: str) -> str:
|
|
@@ -97,7 +97,7 @@ class AbstractMetastore(ABC, Serializable):
|
|
|
97
97
|
def close(self) -> None:
|
|
98
98
|
"""Closes any active database or HTTP connections."""
|
|
99
99
|
|
|
100
|
-
def
|
|
100
|
+
def cleanup_tables(self, temp_table_names: list[str]) -> None:
|
|
101
101
|
"""Cleanup temp tables."""
|
|
102
102
|
|
|
103
103
|
def cleanup_for_tests(self) -> None:
|
|
@@ -457,7 +457,7 @@ class AbstractDBMetastore(AbstractMetastore):
|
|
|
457
457
|
"""Closes any active database connections."""
|
|
458
458
|
self.db.close()
|
|
459
459
|
|
|
460
|
-
def
|
|
460
|
+
def cleanup_tables(self, temp_table_names: list[str]) -> None:
|
|
461
461
|
"""Cleanup temp tables."""
|
|
462
462
|
self.id_generator.delete_uris(temp_table_names)
|
|
463
463
|
|
|
@@ -915,14 +915,12 @@ class AbstractWarehouse(ABC, Serializable):
|
|
|
915
915
|
if self.is_temp_table_name(t)
|
|
916
916
|
]
|
|
917
917
|
|
|
918
|
-
def
|
|
918
|
+
def cleanup_tables(self, names: Iterable[str]) -> None:
|
|
919
919
|
"""
|
|
920
|
-
Drop tables
|
|
920
|
+
Drop tables passed.
|
|
921
921
|
|
|
922
|
-
This should be implemented
|
|
923
|
-
|
|
924
|
-
needed. When running the same `DatasetQuery` multiple times we
|
|
925
|
-
may use the same temporary table names.
|
|
922
|
+
This should be implemented to ensure that the provided tables
|
|
923
|
+
are cleaned up as soon as they are no longer needed.
|
|
926
924
|
"""
|
|
927
925
|
for name in names:
|
|
928
926
|
self.db.drop_table(Table(name, self.db.metadata), if_exists=True)
|
datachain/lib/dc.py
CHANGED
|
@@ -193,8 +193,6 @@ class DataChain(DatasetQuery):
|
|
|
193
193
|
```
|
|
194
194
|
"""
|
|
195
195
|
|
|
196
|
-
max_row_count: Optional[int] = None
|
|
197
|
-
|
|
198
196
|
DEFAULT_FILE_RECORD: ClassVar[dict] = {
|
|
199
197
|
"source": "",
|
|
200
198
|
"name": "",
|
|
@@ -1124,7 +1122,7 @@ class DataChain(DatasetQuery):
|
|
|
1124
1122
|
def _func_fr() -> Iterator[tuple_type]: # type: ignore[valid-type]
|
|
1125
1123
|
yield from tuples
|
|
1126
1124
|
|
|
1127
|
-
chain = DataChain.
|
|
1125
|
+
chain = DataChain.from_records(DataChain.DEFAULT_FILE_RECORD, session=session)
|
|
1128
1126
|
if object_name:
|
|
1129
1127
|
output = {object_name: DataChain._dict_to_data_model(object_name, output)} # type: ignore[arg-type]
|
|
1130
1128
|
return chain.gen(_func_fr, output=output)
|
|
@@ -1441,13 +1439,14 @@ class DataChain(DatasetQuery):
|
|
|
1441
1439
|
)
|
|
1442
1440
|
|
|
1443
1441
|
@classmethod
|
|
1444
|
-
def
|
|
1442
|
+
def from_records(
|
|
1445
1443
|
cls,
|
|
1446
1444
|
to_insert: Optional[Union[dict, list[dict]]],
|
|
1447
1445
|
session: Optional[Session] = None,
|
|
1448
1446
|
) -> "DataChain":
|
|
1449
|
-
"""Create
|
|
1450
|
-
generating a
|
|
1447
|
+
"""Create a DataChain from the provided records. This method can be used for
|
|
1448
|
+
programmatically generating a chain in contrast of reading data from storages
|
|
1449
|
+
or other sources.
|
|
1451
1450
|
|
|
1452
1451
|
Parameters:
|
|
1453
1452
|
to_insert : records (or a single record) to insert. Each record is
|
|
@@ -1455,8 +1454,8 @@ class DataChain(DatasetQuery):
|
|
|
1455
1454
|
|
|
1456
1455
|
Example:
|
|
1457
1456
|
```py
|
|
1458
|
-
empty = DataChain.
|
|
1459
|
-
single_record = DataChain.
|
|
1457
|
+
empty = DataChain.from_records()
|
|
1458
|
+
single_record = DataChain.from_records(DataChain.DEFAULT_FILE_RECORD)
|
|
1460
1459
|
```
|
|
1461
1460
|
"""
|
|
1462
1461
|
session = Session.get(session)
|
|
@@ -1602,18 +1601,7 @@ class DataChain(DatasetQuery):
|
|
|
1602
1601
|
@detach
|
|
1603
1602
|
def limit(self, n: int) -> "Self":
|
|
1604
1603
|
"""Return the first n rows of the chain."""
|
|
1605
|
-
|
|
1606
|
-
|
|
1607
|
-
if self.max_row_count is None:
|
|
1608
|
-
self.max_row_count = n
|
|
1609
|
-
return super().limit(n)
|
|
1610
|
-
|
|
1611
|
-
limit = min(n, self.max_row_count)
|
|
1612
|
-
if limit == self.max_row_count:
|
|
1613
|
-
return self
|
|
1614
|
-
|
|
1615
|
-
self.max_row_count = limit
|
|
1616
|
-
return super().limit(self.max_row_count)
|
|
1604
|
+
return super().limit(n)
|
|
1617
1605
|
|
|
1618
1606
|
@detach
|
|
1619
1607
|
def offset(self, offset: int) -> "Self":
|
datachain/lib/signal_schema.py
CHANGED
|
@@ -243,8 +243,11 @@ class SignalSchema:
|
|
|
243
243
|
curr_type = None
|
|
244
244
|
i = 0
|
|
245
245
|
while curr_tree is not None and i < len(path):
|
|
246
|
-
if val := curr_tree.get(path[i]
|
|
246
|
+
if val := curr_tree.get(path[i]):
|
|
247
247
|
curr_type, curr_tree = val
|
|
248
|
+
elif i == 0 and len(path) > 1 and (val := curr_tree.get(".".join(path))):
|
|
249
|
+
curr_type, curr_tree = val
|
|
250
|
+
break
|
|
248
251
|
else:
|
|
249
252
|
curr_type = None
|
|
250
253
|
i += 1
|
datachain/query/dataset.py
CHANGED
|
@@ -1201,10 +1201,10 @@ class DatasetQuery:
|
|
|
1201
1201
|
# implementations, as errors may close or render unusable the existing
|
|
1202
1202
|
# connections.
|
|
1203
1203
|
metastore = self.catalog.metastore.clone(use_new_connection=True)
|
|
1204
|
-
metastore.
|
|
1204
|
+
metastore.cleanup_tables(self.temp_table_names)
|
|
1205
1205
|
metastore.close()
|
|
1206
1206
|
warehouse = self.catalog.warehouse.clone(use_new_connection=True)
|
|
1207
|
-
warehouse.
|
|
1207
|
+
warehouse.cleanup_tables(self.temp_table_names)
|
|
1208
1208
|
warehouse.close()
|
|
1209
1209
|
self.temp_table_names = []
|
|
1210
1210
|
|
|
@@ -1383,6 +1383,9 @@ class DatasetQuery:
|
|
|
1383
1383
|
@detach
|
|
1384
1384
|
def limit(self, n: int) -> "Self":
|
|
1385
1385
|
query = self.clone(new_table=False)
|
|
1386
|
+
for step in query.steps:
|
|
1387
|
+
if isinstance(step, SQLLimit) and step.n < n:
|
|
1388
|
+
return query
|
|
1386
1389
|
query.steps.append(SQLLimit(n))
|
|
1387
1390
|
return query
|
|
1388
1391
|
|
|
@@ -2,7 +2,7 @@ datachain/__init__.py,sha256=GeyhE-5LgfJav2OKYGaieP2lBvf2Gm-ihj7thnK9zjI,800
|
|
|
2
2
|
datachain/__main__.py,sha256=hG3Y4ARGEqe1AWwNMd259rBlqtphx1Wk39YbueQ0yV8,91
|
|
3
3
|
datachain/asyn.py,sha256=CKCFQJ0CbB3r04S7mUTXxriKzPnOvdUaVPXjM8vCtJw,7644
|
|
4
4
|
datachain/cache.py,sha256=N6PCEFJlWRpq7f_zeBNoaURFCJFAV7ibsLJqyiMHbBg,4207
|
|
5
|
-
datachain/cli.py,sha256=
|
|
5
|
+
datachain/cli.py,sha256=DbmI1sXs7-KCQz6RdLE_JAp3XO3yrTSRJ71LdUzx-XE,33099
|
|
6
6
|
datachain/cli_utils.py,sha256=jrn9ejGXjybeO1ur3fjdSiAyCHZrX0qsLLbJzN9ErPM,2418
|
|
7
7
|
datachain/config.py,sha256=PfC7W5yO6HFO6-iMB4YB-0RR88LPiGmD6sS_SfVbGso,1979
|
|
8
8
|
datachain/dataset.py,sha256=MZezyuJWNj_3PEtzr0epPMNyWAOTrhTSPI5FmemV6L4,14470
|
|
@@ -17,7 +17,7 @@ datachain/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
|
17
17
|
datachain/storage.py,sha256=RiSJLYdHUjnrEWkLBKPcETHpAxld_B2WxLg711t0aZI,3733
|
|
18
18
|
datachain/utils.py,sha256=kgH5NPj47eC_KrFTd6ZS206lKVhnJVFt5XsqkK6ppTc,12483
|
|
19
19
|
datachain/catalog/__init__.py,sha256=g2iAAFx_gEIrqshXlhSEbrc8qDaEH11cjU40n3CHDz4,409
|
|
20
|
-
datachain/catalog/catalog.py,sha256=
|
|
20
|
+
datachain/catalog/catalog.py,sha256=LZo9pIgi_HOUWpxX1c7RMt5OnrlDHXx2YpL5oP8X0kk,80397
|
|
21
21
|
datachain/catalog/datasource.py,sha256=D-VWIVDCM10A8sQavLhRXdYSCG7F4o4ifswEF80_NAQ,1412
|
|
22
22
|
datachain/catalog/loader.py,sha256=GJ8zhEYkC7TuaPzCsjJQ4LtTdECu-wwYzC12MikPOMQ,7307
|
|
23
23
|
datachain/catalog/subclass.py,sha256=B5R0qxeTYEyVAAPM1RutBPSoXZc8L5mVVZeSGXki9Sw,2096
|
|
@@ -32,24 +32,24 @@ datachain/data_storage/__init__.py,sha256=cEOJpyu1JDZtfUupYucCDNFI6e5Wmp_Oyzq6rZ
|
|
|
32
32
|
datachain/data_storage/db_engine.py,sha256=rgBuqJ-M1j5QyqiUQuJRewctuvRRj8LBDL54-aPEFxE,3287
|
|
33
33
|
datachain/data_storage/id_generator.py,sha256=VlDALKijggegAnNMJwuMETJgnLoPYxpkrkld5DNTPQw,3839
|
|
34
34
|
datachain/data_storage/job.py,sha256=w-7spowjkOa1P5fUVtJou3OltT0L48P0RYWZ9rSJ9-s,383
|
|
35
|
-
datachain/data_storage/metastore.py,sha256=
|
|
35
|
+
datachain/data_storage/metastore.py,sha256=ody-hWyrisGuNlzy24bc7QBqPXWIg64NcucIhZYronk,54842
|
|
36
36
|
datachain/data_storage/schema.py,sha256=FQvt5MUMSnI5ZAE7Nthae4aaJpt8JC4nH8KiWDuhJkk,8135
|
|
37
37
|
datachain/data_storage/serializer.py,sha256=6G2YtOFqqDzJf1KbvZraKGXl2XHZyVml2krunWUum5o,927
|
|
38
38
|
datachain/data_storage/sqlite.py,sha256=w0d_cZ2u9LpQYFFXll22mnxHaxPOoJdHlsKAZmONQpA,25605
|
|
39
|
-
datachain/data_storage/warehouse.py,sha256=
|
|
39
|
+
datachain/data_storage/warehouse.py,sha256=3iD946WXgGxohZ5lagmwydFZr7j7RceZW423QXU_7_U,33120
|
|
40
40
|
datachain/lib/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
41
41
|
datachain/lib/arrow.py,sha256=9C5AVH6tLo9hwzav-1tLLnmWP-3_SReYCOfcOC54pu0,4437
|
|
42
42
|
datachain/lib/clip.py,sha256=16u4b_y2Y15nUS2UN_8ximMo6r_-_4IQpmct2ol-e-g,5730
|
|
43
43
|
datachain/lib/data_model.py,sha256=qfTtQNncS5pt9SvXdMEa5kClniaT6XBGBfO7onEz2TI,1632
|
|
44
44
|
datachain/lib/dataset_info.py,sha256=lONGr71ozo1DS4CQEhnpKORaU4qFb6Ketv8Xm8CVm2U,2188
|
|
45
|
-
datachain/lib/dc.py,sha256=
|
|
45
|
+
datachain/lib/dc.py,sha256=6RtwA7MC3hosxi9RBgpOXjkv46SdN99g9N_u4mCDUUo,56071
|
|
46
46
|
datachain/lib/file.py,sha256=n9GBmZ1CjzDjHkbUBsUrs8JOJrAoh3MV2Cc8hBkex20,11957
|
|
47
47
|
datachain/lib/image.py,sha256=TgYhRhzd4nkytfFMeykQkPyzqb5Le_-tU81unVMPn4Q,2328
|
|
48
48
|
datachain/lib/meta_formats.py,sha256=jlSYWRUeDMjun_YCsQ2JxyaDJpEpokzHDPmKUAoCXnU,7034
|
|
49
49
|
datachain/lib/model_store.py,sha256=c4USXsBBjrGH8VOh4seIgOiav-qHOwdoixtxfLgU63c,2409
|
|
50
50
|
datachain/lib/pytorch.py,sha256=9PsypKseyKfIimTmTQOgb-pbNXgeeAHLdlWx0qRPULY,5660
|
|
51
51
|
datachain/lib/settings.py,sha256=39thOpYJw-zPirzeNO6pmRC2vPrQvt4eBsw1xLWDFsw,2344
|
|
52
|
-
datachain/lib/signal_schema.py,sha256=
|
|
52
|
+
datachain/lib/signal_schema.py,sha256=XQTINSN_FJK76Jn8qd03g6J0cum58knP8U7Iuw-zKyU,14704
|
|
53
53
|
datachain/lib/text.py,sha256=dVe2Ilc_gW2EV0kun0UwegiCkapWcd20cef7CgINWHU,1083
|
|
54
54
|
datachain/lib/udf.py,sha256=IjuDt2B8E3xEHhcJnaK_ZhmivdrOYPXz5uf7ylpktws,11815
|
|
55
55
|
datachain/lib/udf_signature.py,sha256=gMStcEeYJka5M6cg50Z9orC6y6HzCAJ3MkFqqn1fjZg,7137
|
|
@@ -66,7 +66,7 @@ datachain/lib/convert/values_to_tuples.py,sha256=aVoHWMOUGLAiS6_BBwKJqVIne91VffO
|
|
|
66
66
|
datachain/query/__init__.py,sha256=tv-spkjUCYamMN9ys_90scYrZ8kJ7C7d1MTYVmxGtk4,325
|
|
67
67
|
datachain/query/batch.py,sha256=j-_ZcuQra2Ro3Wj4crtqQCg-7xuv-p84hr4QHdvT7as,3479
|
|
68
68
|
datachain/query/builtins.py,sha256=ZKNs49t8Oa_OaboCBIEqtXZt7c1Qe9OR_C_HpoDriIU,2781
|
|
69
|
-
datachain/query/dataset.py,sha256=
|
|
69
|
+
datachain/query/dataset.py,sha256=iTz3c5nJ-WmoQ5zcvKGT9ly6xVKJtD_fk76LA7zecWk,60164
|
|
70
70
|
datachain/query/dispatch.py,sha256=oGX9ZuoKWPB_EyqAZD_eULcO3OejY44_keSmFS6SHT0,13315
|
|
71
71
|
datachain/query/metrics.py,sha256=vsECqbZfoSDBnvC3GQlziKXmISVYDLgHP1fMPEOtKyo,640
|
|
72
72
|
datachain/query/params.py,sha256=O_j89mjYRLOwWNhYZl-z7mi-rkdP7WyFmaDufsdTryE,863
|
|
@@ -92,9 +92,9 @@ datachain/sql/sqlite/base.py,sha256=Jb1csbIARjEvwbylnvgNA7ChozSyoL3CQzOGBUf8QAw,
|
|
|
92
92
|
datachain/sql/sqlite/types.py,sha256=yzvp0sXSEoEYXs6zaYC_2YubarQoZH-MiUNXcpuEP4s,1573
|
|
93
93
|
datachain/sql/sqlite/vector.py,sha256=ncW4eu2FlJhrP_CIpsvtkUabZlQdl2D5Lgwy_cbfqR0,469
|
|
94
94
|
datachain/torch/__init__.py,sha256=gIS74PoEPy4TB3X6vx9nLO0Y3sLJzsA8ckn8pRWihJM,579
|
|
95
|
-
datachain-0.2.
|
|
96
|
-
datachain-0.2.
|
|
97
|
-
datachain-0.2.
|
|
98
|
-
datachain-0.2.
|
|
99
|
-
datachain-0.2.
|
|
100
|
-
datachain-0.2.
|
|
95
|
+
datachain-0.2.16.dist-info/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
|
|
96
|
+
datachain-0.2.16.dist-info/METADATA,sha256=1f326fK-ZnS0nPvETuUj9PaI4R5SatpGVDIsQiJ0OvM,14577
|
|
97
|
+
datachain-0.2.16.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
|
|
98
|
+
datachain-0.2.16.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
|
|
99
|
+
datachain-0.2.16.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
|
|
100
|
+
datachain-0.2.16.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|