datachain 0.1.11__py3-none-any.whl → 0.1.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datachain might be problematic. Click here for more details.

datachain/_version.py CHANGED
@@ -12,5 +12,5 @@ __version__: str
12
12
  __version_tuple__: VERSION_TUPLE
13
13
  version_tuple: VERSION_TUPLE
14
14
 
15
- __version__ = version = '0.1.11'
16
- __version_tuple__ = version_tuple = (0, 1, 11)
15
+ __version__ = version = '0.1.13'
16
+ __version_tuple__ = version_tuple = (0, 1, 13)
datachain/lib/dc.py CHANGED
@@ -492,16 +492,43 @@ class DataChain(DatasetQuery):
492
492
  chain.signals_schema = new_schema
493
493
  return chain
494
494
 
495
- def get_values(self) -> Iterator[list]:
496
- """Iterate over rows, getting feature values and applying reader calls."""
497
- for features in self.iterate():
498
- yield [fr.get_value() if isinstance(fr, Feature) else fr for fr in features]
495
+ def get_values(self, *cols: str) -> Iterator[list]:
496
+ """Iterate over rows, getting feature values and applying reader calls.
497
+ If columns are specified - limit them to specified columns.
498
+ """
499
+ for features in self.iterate(*cols):
500
+ yield [fr.get_value() if isinstance(fr, Feature) else fr for fr in features] # type: ignore[union-attr,call-arg]
501
+
502
+ def get_one_value(self, col: str) -> Iterator:
503
+ for item in self.get_values(col):
504
+ yield item[0]
499
505
 
500
- def iterate(self) -> Iterator[Sequence[Feature]]:
501
- db_signals = self.signals_schema.db_signals()
506
+ def iterate(self, *cols: str) -> Iterator[list[FeatureType]]:
507
+ """Iterate over rows. If columns are specified - limit them to specified
508
+ columns.
509
+ """
510
+ chain = self.select(*cols) if cols else self
511
+
512
+ db_signals = chain.signals_schema.db_signals()
502
513
  with super().select(*db_signals).as_iterable() as rows_iter:
503
514
  for row in rows_iter:
504
- yield self.signals_schema.row_to_features(row, self.session.catalog)
515
+ yield chain.signals_schema.row_to_features(row, chain.session.catalog)
516
+
517
+ def iterate_one(self, col: str) -> Iterator[FeatureType]:
518
+ for item in self.iterate(col):
519
+ yield item[0]
520
+
521
+ def collect(self, *cols: str) -> list[list[FeatureType]]:
522
+ return list(self.iterate(*cols))
523
+
524
+ def collect_one(self, col: str) -> list[FeatureType]:
525
+ return list(self.iterate_one(col))
526
+
527
+ def collect_values(self, *cols: str) -> list[list]:
528
+ return list(self.get_values(*cols))
529
+
530
+ def collect_one_value(self, col: str) -> list:
531
+ return list(self.get_one_value(col))
505
532
 
506
533
  def to_pytorch(self, **kwargs):
507
534
  """Convert to pytorch dataset format."""
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: datachain
3
- Version: 0.1.11
3
+ Version: 0.1.13
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License: Apache-2.0
@@ -1,6 +1,6 @@
1
1
  datachain/__init__.py,sha256=9a0qX6tqyA9KC3ahLmGarqlRTZJXhM7HijAWpfUaOnQ,102
2
2
  datachain/__main__.py,sha256=hG3Y4ARGEqe1AWwNMd259rBlqtphx1Wk39YbueQ0yV8,91
3
- datachain/_version.py,sha256=HreDwlLXV189L3kiBj3huM_kqWD1usijlC8LN1YXcCM,413
3
+ datachain/_version.py,sha256=S22EPqqZRb53L2H7sobVA3TUXv9skvkYd-YtLuHuV6M,413
4
4
  datachain/asyn.py,sha256=opARBVZJxTKU3EGYd-8gcpNXoshuCfVz_b0ut3oxC50,7641
5
5
  datachain/cache.py,sha256=FaPWrqWznPffmskTb1pdPkt2jAMMf__9FC2zEnP0vDU,4022
6
6
  datachain/cli.py,sha256=1mBozBJS9Nq-EeahxwyKH8ef64E2v93o0CAEzxjcbkY,32209
@@ -40,7 +40,7 @@ datachain/data_storage/warehouse.py,sha256=sQLOrv6DH8UcWH1aqlg3YJKmaHr696XkVafBx
40
40
  datachain/lib/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
41
41
  datachain/lib/cached_stream.py,sha256=BQI6gpJ2y7_-jqQo_0VB9ntbkOVISvj9wlDwGDQbqw8,3537
42
42
  datachain/lib/claude.py,sha256=iAauA1zNVNONpLzUo1t0QN5PZ5Ot6cZkfib7Ka_c638,1969
43
- datachain/lib/dc.py,sha256=kyuSg-l7HciqFaunqPx41WKyAeuJ2H2tpWJplCXhZJc,26086
43
+ datachain/lib/dc.py,sha256=MAy1Bsxknaz2aduZ28ffuq88x8Ja8QHA59CsyyiUlZE,27048
44
44
  datachain/lib/feature.py,sha256=C5lxQ_Ef4rL0-mef4A4EeoqB0rcNZ0ExRE26ehx20RM,14196
45
45
  datachain/lib/feature_registry.py,sha256=hg_S_9JPEYaQ-8PI64mU0sEhSJ-rcrKtwQk5TPBotEw,1570
46
46
  datachain/lib/feature_utils.py,sha256=6wbKZ2xq08b751EFBRJy1OZLqWYd_gxq9A_Em_aMFk4,4713
@@ -92,9 +92,9 @@ datachain/sql/sqlite/__init__.py,sha256=TAdJX0Bg28XdqPO-QwUVKy8rg78cgMileHvMNot7
92
92
  datachain/sql/sqlite/base.py,sha256=XVxn4pB-N4pPfiby5uVvfH7feNzRKlBNzsc5eyKPvhI,10965
93
93
  datachain/sql/sqlite/types.py,sha256=oP93nLfTBaYnN0z_4Dsv-HZm8j9rrUf1esMM-z3JLbg,1754
94
94
  datachain/sql/sqlite/vector.py,sha256=stBeEW6fbVbILmAtV4khjXdJIGT13HkRWJeCoqIOk50,315
95
- datachain-0.1.11.dist-info/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
96
- datachain-0.1.11.dist-info/METADATA,sha256=BFTmlt8_vtCHF80AHQcIQkE9YMCigp7k1jcAZV1D7j4,13972
97
- datachain-0.1.11.dist-info/WHEEL,sha256=mguMlWGMX-VHnMpKOjjQidIo1ssRlCFu4a4mBpz1s2M,91
98
- datachain-0.1.11.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
99
- datachain-0.1.11.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
100
- datachain-0.1.11.dist-info/RECORD,,
95
+ datachain-0.1.13.dist-info/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
96
+ datachain-0.1.13.dist-info/METADATA,sha256=aqjqnY-YxqDJZhpkKaPQ35QZkehWOcsGIdqNzdLRw-0,13972
97
+ datachain-0.1.13.dist-info/WHEEL,sha256=mguMlWGMX-VHnMpKOjjQidIo1ssRlCFu4a4mBpz1s2M,91
98
+ datachain-0.1.13.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
99
+ datachain-0.1.13.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
100
+ datachain-0.1.13.dist-info/RECORD,,