datachain 0.37.2__py3-none-any.whl → 0.37.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datachain might be problematic. Click here for more details.

@@ -29,7 +29,6 @@ from sqlalchemy.sql.selectable import Select
29
29
  from tqdm.auto import tqdm
30
30
 
31
31
  import datachain.sql.sqlite
32
- from datachain import semver
33
32
  from datachain.data_storage import AbstractDBMetastore, AbstractWarehouse
34
33
  from datachain.data_storage.db_engine import DatabaseEngine
35
34
  from datachain.data_storage.schema import DefaultSchema
@@ -692,61 +691,6 @@ class SQLiteWarehouse(AbstractWarehouse):
692
691
  for row in self.db.execute(query, cursor=cur)
693
692
  ]
694
693
 
695
- def merge_dataset_rows(
696
- self,
697
- src: DatasetRecord,
698
- dst: DatasetRecord,
699
- src_version: str,
700
- dst_version: str,
701
- ) -> None:
702
- dst_empty = False
703
-
704
- if not self.db.has_table(self.dataset_table_name(src, src_version)):
705
- # source table doesn't exist, nothing to do
706
- return
707
-
708
- src_dr = self.dataset_rows(src, src_version).table
709
-
710
- if not self.db.has_table(self.dataset_table_name(dst, dst_version)):
711
- # destination table doesn't exist, create it
712
- self.create_dataset_rows_table(
713
- self.dataset_table_name(dst, dst_version),
714
- columns=src_dr.columns,
715
- )
716
- dst_empty = True
717
-
718
- dst_dr = self.dataset_rows(dst, dst_version).table
719
- merge_fields = [c.name for c in src_dr.columns if c.name != "sys__id"]
720
- select_src = select(*(getattr(src_dr.columns, f) for f in merge_fields))
721
-
722
- if dst_empty:
723
- # we don't need union, but just select from source to destination
724
- insert_query = sqlite.insert(dst_dr).from_select(merge_fields, select_src)
725
- else:
726
- dst_version_latest = None
727
- # find the previous version of the destination dataset
728
- dst_previous_versions = [
729
- v.version
730
- for v in dst.versions # type: ignore [union-attr]
731
- if semver.compare(v.version, dst_version) == -1
732
- ]
733
- if dst_previous_versions:
734
- dst_version_latest = max(dst_previous_versions)
735
-
736
- dst_dr_latest = self.dataset_rows(dst, dst_version_latest).table
737
-
738
- select_dst_latest = select(
739
- *(getattr(dst_dr_latest.c, f) for f in merge_fields)
740
- )
741
- union_query = sqlalchemy.union(select_src, select_dst_latest)
742
- insert_query = (
743
- sqlite.insert(dst_dr)
744
- .from_select(merge_fields, union_query)
745
- .prefix_with("OR IGNORE")
746
- )
747
-
748
- self.db.execute(insert_query)
749
-
750
694
  def prepare_entries(self, entries: "Iterable[File]") -> Iterable[dict[str, Any]]:
751
695
  return (e.model_dump() for e in entries)
752
696
 
@@ -371,21 +371,6 @@ class AbstractWarehouse(ABC, Serializable):
371
371
  table = sa.Table(table_name, self.db.metadata)
372
372
  self.db.drop_table(table, if_exists=if_exists)
373
373
 
374
- @abstractmethod
375
- def merge_dataset_rows(
376
- self,
377
- src: "DatasetRecord",
378
- dst: "DatasetRecord",
379
- src_version: str,
380
- dst_version: str,
381
- ) -> None:
382
- """
383
- Merges source dataset rows and current latest destination dataset rows
384
- into a new rows table created for new destination dataset version.
385
- Note that table for new destination version must be created upfront.
386
- Merge results should not contain duplicates.
387
- """
388
-
389
374
  def dataset_rows_select(
390
375
  self,
391
376
  query: sa.Select,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datachain
3
- Version: 0.37.2
3
+ Version: 0.37.4
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License-Expression: Apache-2.0
@@ -57,8 +57,8 @@ datachain/data_storage/job.py,sha256=NGFhXg0C0zRFTaF6ccjXZJT4xI4_gUr1WcxTLK6WYDE
57
57
  datachain/data_storage/metastore.py,sha256=DFyTkKLJN5-nFXXc7ln_rGj-FLctj0nrhXJxuyprZSk,64661
58
58
  datachain/data_storage/schema.py,sha256=3fAgiE11TIDYCW7EbTdiOm61SErRitvsLr7YPnUlVm0,9801
59
59
  datachain/data_storage/serializer.py,sha256=oL8i8smyAeVUyDepk8Xhf3lFOGOEHMoZjA5GdFzvfGI,3862
60
- datachain/data_storage/sqlite.py,sha256=pee99RewNQh5kVxGpD2sf9V5VloM4xwn8oeEhquU1rs,31756
61
- datachain/data_storage/warehouse.py,sha256=nuGT27visvAi7jr7ZAZF-wmFe0ZEFD8qaTheINX_7RM,35269
60
+ datachain/data_storage/sqlite.py,sha256=o9TR6N27JB52M9rRXdM9uwdBektGucWtJi9UnmLGh0A,29669
61
+ datachain/data_storage/warehouse.py,sha256=Zhf_HzhiEpsI0IuinAK-sF4ZMH66rV_ZDSOx-UFHv5o,34771
62
62
  datachain/diff/__init__.py,sha256=lGrygGzdWSSYJ1DgX4h2q_ko5QINEW8PKfxOwE9ZFnI,9394
63
63
  datachain/fs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
64
64
  datachain/fs/reference.py,sha256=A8McpXF0CqbXPqanXuvpKu50YLB3a2ZXA3YAPxtBXSM,914
@@ -165,9 +165,9 @@ datachain/sql/sqlite/vector.py,sha256=ncW4eu2FlJhrP_CIpsvtkUabZlQdl2D5Lgwy_cbfqR
165
165
  datachain/toolkit/__init__.py,sha256=eQ58Q5Yf_Fgv1ZG0IO5dpB4jmP90rk8YxUWmPc1M2Bo,68
166
166
  datachain/toolkit/split.py,sha256=xQzzmvQRKsPteDKbpgOxd4r971BnFaK33mcOl0FuGeI,2883
167
167
  datachain/torch/__init__.py,sha256=gIS74PoEPy4TB3X6vx9nLO0Y3sLJzsA8ckn8pRWihJM,579
168
- datachain-0.37.2.dist-info/licenses/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
169
- datachain-0.37.2.dist-info/METADATA,sha256=TysOdteujF1qCj3q8AwEJicm-CqwufUUZapZo0ADMNQ,13762
170
- datachain-0.37.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
171
- datachain-0.37.2.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
172
- datachain-0.37.2.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
173
- datachain-0.37.2.dist-info/RECORD,,
168
+ datachain-0.37.4.dist-info/licenses/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
169
+ datachain-0.37.4.dist-info/METADATA,sha256=00fv6-Lg7pJ2yBxYEaRtnMDF4C_Y5SUp5tsPwS8hUXk,13762
170
+ datachain-0.37.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
171
+ datachain-0.37.4.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
172
+ datachain-0.37.4.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
173
+ datachain-0.37.4.dist-info/RECORD,,