datachain 0.3.6__py3-none-any.whl → 0.3.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datachain might be problematic. Click here for more details.

@@ -50,7 +50,7 @@ def convert_rows_custom_column_types(
50
50
  columns: "ColumnCollection[str, ColumnElement[Any]]",
51
51
  rows: Iterator[tuple[Any, ...]],
52
52
  dialect: "Dialect",
53
- ):
53
+ ) -> Iterator[tuple[Any, ...]]:
54
54
  """
55
55
  This function converts values of rows columns based on their types which are
56
56
  defined in columns. We are only converting column values for which types are
@@ -27,10 +27,7 @@ import datachain.sql.sqlite
27
27
  from datachain.data_storage import AbstractDBMetastore, AbstractWarehouse
28
28
  from datachain.data_storage.db_engine import DatabaseEngine
29
29
  from datachain.data_storage.id_generator import AbstractDBIDGenerator
30
- from datachain.data_storage.schema import (
31
- DefaultSchema,
32
- convert_rows_custom_column_types,
33
- )
30
+ from datachain.data_storage.schema import DefaultSchema
34
31
  from datachain.dataset import DatasetRecord
35
32
  from datachain.error import DataChainError
36
33
  from datachain.sql.sqlite import create_user_defined_sql_functions, sqlite_dialect
@@ -651,12 +648,6 @@ class SQLiteWarehouse(AbstractWarehouse):
651
648
  self.db.create_table(table, if_not_exists=if_not_exists)
652
649
  return table
653
650
 
654
- def dataset_rows_select(self, select_query: Select, **kwargs):
655
- rows = self.db.execute(select_query, **kwargs)
656
- yield from convert_rows_custom_column_types(
657
- select_query.selected_columns, rows, sqlite_dialect
658
- )
659
-
660
651
  def get_dataset_sources(
661
652
  self, dataset: DatasetRecord, version: int
662
653
  ) -> list[StorageURI]:
@@ -17,6 +17,7 @@ from sqlalchemy.sql.expression import true
17
17
  from tqdm import tqdm
18
18
 
19
19
  from datachain.client import Client
20
+ from datachain.data_storage.schema import convert_rows_custom_column_types
20
21
  from datachain.data_storage.serializer import Serializable
21
22
  from datachain.dataset import DatasetRecord
22
23
  from datachain.node import DirType, DirTypeGroup, Entry, Node, NodeWithPath, get_path
@@ -226,7 +227,7 @@ class AbstractWarehouse(ABC, Serializable):
226
227
  if limit < page_size:
227
228
  paginated_query = paginated_query.limit(None).limit(limit)
228
229
 
229
- results = self.db.execute(paginated_query.offset(offset))
230
+ results = self.dataset_rows_select(paginated_query.offset(offset))
230
231
 
231
232
  processed = False
232
233
  for row in results:
@@ -309,12 +310,18 @@ class AbstractWarehouse(ABC, Serializable):
309
310
  Merge results should not contain duplicates.
310
311
  """
311
312
 
312
- @abstractmethod
313
- def dataset_rows_select(self, select_query: sa.sql.selectable.Select, **kwargs):
313
+ def dataset_rows_select(
314
+ self,
315
+ query: sa.sql.selectable.Select,
316
+ **kwargs,
317
+ ) -> Iterator[tuple[Any, ...]]:
314
318
  """
315
- Method for fetching dataset rows from database. This is abstract since
316
- in some DBs we need to use special settings
319
+ Fetch dataset rows from database.
317
320
  """
321
+ rows = self.db.execute(query, **kwargs)
322
+ yield from convert_rows_custom_column_types(
323
+ query.selected_columns, rows, self.db.dialect
324
+ )
318
325
 
319
326
  @abstractmethod
320
327
  def get_dataset_sources(
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: datachain
3
- Version: 0.3.6
3
+ Version: 0.3.7
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License: Apache-2.0
@@ -33,10 +33,10 @@ datachain/data_storage/db_engine.py,sha256=81Ol1of9TTTzD97ORajCnP366Xz2mEJt6C-kT
33
33
  datachain/data_storage/id_generator.py,sha256=lCEoU0BM37Ai2aRpSbwo5oQT0GqZnSpYwwvizathRMQ,4292
34
34
  datachain/data_storage/job.py,sha256=w-7spowjkOa1P5fUVtJou3OltT0L48P0RYWZ9rSJ9-s,383
35
35
  datachain/data_storage/metastore.py,sha256=nxcY6nwyEmQWMAo33sNGO-FgUFQs2amBGGnZz2ftEz0,55362
36
- datachain/data_storage/schema.py,sha256=GwJIHkjhrnBxJAV1WvCMM8jiJN5h79LXDyzMmUDtRw0,8523
36
+ datachain/data_storage/schema.py,sha256=JKpSEz8igpwZ9zkpRPYVXZxEpiXuLKEs2WNhH0KqM6U,8552
37
37
  datachain/data_storage/serializer.py,sha256=6G2YtOFqqDzJf1KbvZraKGXl2XHZyVml2krunWUum5o,927
38
- datachain/data_storage/sqlite.py,sha256=WJh_r9nlYZ_luTpEs-5StwUxScksIarJTNtdFUEj-4g,28613
39
- datachain/data_storage/warehouse.py,sha256=tyJJDxFae6XWgLmOoG0B_MJ_Z_UEMoW_wJb96zzwTtA,33471
38
+ datachain/data_storage/sqlite.py,sha256=jLgkvikYkENQUO_ykoNFfsBc2ofZXwFHLMa1nyWP3aw,28316
39
+ datachain/data_storage/warehouse.py,sha256=cvlfa-nyIxqrrpSRtCdeVjlTwhn7rcIoWjOq91HhItU,33668
40
40
  datachain/lib/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
41
41
  datachain/lib/arrow.py,sha256=D8N7zCppRdc5sTYT1hNIbROc-sKA_8FN5J_m-KjD3Us,4929
42
42
  datachain/lib/clip.py,sha256=16u4b_y2Y15nUS2UN_8ximMo6r_-_4IQpmct2ol-e-g,5730
@@ -94,9 +94,9 @@ datachain/sql/sqlite/base.py,sha256=WLPHBhZbXbiqPoRV1VgDrXJqku4UuvJpBhYeQ0k5rI8,
94
94
  datachain/sql/sqlite/types.py,sha256=yzvp0sXSEoEYXs6zaYC_2YubarQoZH-MiUNXcpuEP4s,1573
95
95
  datachain/sql/sqlite/vector.py,sha256=ncW4eu2FlJhrP_CIpsvtkUabZlQdl2D5Lgwy_cbfqR0,469
96
96
  datachain/torch/__init__.py,sha256=gIS74PoEPy4TB3X6vx9nLO0Y3sLJzsA8ckn8pRWihJM,579
97
- datachain-0.3.6.dist-info/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
98
- datachain-0.3.6.dist-info/METADATA,sha256=s4YRBs53Pf05kFs33Sqp6E3ehEEXxtmfg8OhcrRCUp4,16719
99
- datachain-0.3.6.dist-info/WHEEL,sha256=Mdi9PDNwEZptOjTlUcAth7XJDFtKrHYaQMPulZeBCiQ,91
100
- datachain-0.3.6.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
101
- datachain-0.3.6.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
102
- datachain-0.3.6.dist-info/RECORD,,
97
+ datachain-0.3.7.dist-info/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
98
+ datachain-0.3.7.dist-info/METADATA,sha256=m7NZ31iEMCD2xOF8HZNp8YvGu05TmF_3UiZQQPUVmmc,16719
99
+ datachain-0.3.7.dist-info/WHEEL,sha256=Mdi9PDNwEZptOjTlUcAth7XJDFtKrHYaQMPulZeBCiQ,91
100
+ datachain-0.3.7.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
101
+ datachain-0.3.7.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
102
+ datachain-0.3.7.dist-info/RECORD,,