datachain 0.7.1__py3-none-any.whl → 0.7.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datachain might be problematic. Click here for more details.

Files changed (46) hide show
  1. datachain/__init__.py +0 -2
  2. datachain/catalog/catalog.py +12 -9
  3. datachain/cli.py +109 -9
  4. datachain/client/fsspec.py +9 -9
  5. datachain/data_storage/metastore.py +63 -11
  6. datachain/data_storage/schema.py +2 -2
  7. datachain/data_storage/sqlite.py +5 -4
  8. datachain/data_storage/warehouse.py +18 -18
  9. datachain/dataset.py +142 -14
  10. datachain/func/__init__.py +49 -0
  11. datachain/{lib/func → func}/aggregate.py +13 -11
  12. datachain/func/array.py +176 -0
  13. datachain/func/base.py +23 -0
  14. datachain/func/conditional.py +81 -0
  15. datachain/func/func.py +384 -0
  16. datachain/func/path.py +110 -0
  17. datachain/func/random.py +23 -0
  18. datachain/func/string.py +154 -0
  19. datachain/func/window.py +49 -0
  20. datachain/lib/arrow.py +24 -12
  21. datachain/lib/data_model.py +25 -9
  22. datachain/lib/dataset_info.py +9 -5
  23. datachain/lib/dc.py +94 -56
  24. datachain/lib/hf.py +1 -1
  25. datachain/lib/signal_schema.py +1 -1
  26. datachain/lib/utils.py +1 -0
  27. datachain/lib/webdataset_laion.py +5 -5
  28. datachain/model/bbox.py +2 -2
  29. datachain/model/pose.py +5 -5
  30. datachain/model/segment.py +2 -2
  31. datachain/nodes_fetcher.py +2 -2
  32. datachain/query/dataset.py +57 -34
  33. datachain/remote/studio.py +40 -8
  34. datachain/sql/__init__.py +0 -2
  35. datachain/sql/functions/__init__.py +0 -26
  36. datachain/sql/selectable.py +11 -5
  37. datachain/sql/sqlite/base.py +11 -2
  38. datachain/studio.py +29 -0
  39. {datachain-0.7.1.dist-info → datachain-0.7.3.dist-info}/METADATA +2 -2
  40. {datachain-0.7.1.dist-info → datachain-0.7.3.dist-info}/RECORD +44 -37
  41. datachain/lib/func/__init__.py +0 -32
  42. datachain/lib/func/func.py +0 -152
  43. {datachain-0.7.1.dist-info → datachain-0.7.3.dist-info}/LICENSE +0 -0
  44. {datachain-0.7.1.dist-info → datachain-0.7.3.dist-info}/WHEEL +0 -0
  45. {datachain-0.7.1.dist-info → datachain-0.7.3.dist-info}/entry_points.txt +0 -0
  46. {datachain-0.7.1.dist-info → datachain-0.7.3.dist-info}/top_level.txt +0 -0
@@ -43,9 +43,10 @@ from datachain.data_storage.schema import (
43
43
  )
44
44
  from datachain.dataset import DatasetStatus, RowDict
45
45
  from datachain.error import DatasetNotFoundError, QueryScriptCancelError
46
+ from datachain.func.base import Function
46
47
  from datachain.lib.udf import UDFAdapter
47
48
  from datachain.progress import CombinedDownloadCallback
48
- from datachain.sql.functions import rand
49
+ from datachain.sql.functions.random import rand
49
50
  from datachain.utils import (
50
51
  batched,
51
52
  determine_processes,
@@ -65,15 +66,16 @@ if TYPE_CHECKING:
65
66
  from datachain.catalog import Catalog
66
67
  from datachain.data_storage import AbstractWarehouse
67
68
  from datachain.dataset import DatasetRecord
68
-
69
- from .udf import UDFResult
69
+ from datachain.lib.udf import UDFResult
70
70
 
71
71
  P = ParamSpec("P")
72
72
 
73
73
 
74
74
  INSERT_BATCH_SIZE = 10000
75
75
 
76
- PartitionByType = Union[ColumnElement, Sequence[ColumnElement]]
76
+ PartitionByType = Union[
77
+ Function, ColumnElement, Sequence[Union[Function, ColumnElement]]
78
+ ]
77
79
  JoinPredicateType = Union[str, ColumnClause, ColumnElement]
78
80
  DatasetDependencyType = tuple[str, int]
79
81
 
@@ -457,18 +459,15 @@ class UDFStep(Step, ABC):
457
459
  # Run the UDFDispatcher in another process to avoid needing
458
460
  # if __name__ == '__main__': in user scripts
459
461
  exec_cmd = get_datachain_executable()
462
+ cmd = [*exec_cmd, "internal-run-udf"]
460
463
  envs = dict(os.environ)
461
464
  envs.update({"PYTHONPATH": os.getcwd()})
462
465
  process_data = filtered_cloudpickle_dumps(udf_info)
463
- result = subprocess.run( # noqa: S603
464
- [*exec_cmd, "internal-run-udf"],
465
- input=process_data,
466
- check=False,
467
- env=envs,
468
- )
469
- if result.returncode != 0:
470
- raise RuntimeError("UDF Execution Failed!")
471
466
 
467
+ with subprocess.Popen(cmd, env=envs, stdin=subprocess.PIPE) as process: # noqa: S603
468
+ process.communicate(process_data)
469
+ if process.poll():
470
+ raise RuntimeError("UDF Execution Failed!")
472
471
  else:
473
472
  # Otherwise process single-threaded (faster for smaller UDFs)
474
473
  warehouse = self.catalog.warehouse
@@ -520,13 +519,17 @@ class UDFStep(Step, ABC):
520
519
  else:
521
520
  list_partition_by = [self.partition_by]
522
521
 
522
+ partition_by = [
523
+ p.get_column() if isinstance(p, Function) else p for p in list_partition_by
524
+ ]
525
+
523
526
  # create table with partitions
524
527
  tbl = self.catalog.warehouse.create_udf_table(partition_columns())
525
528
 
526
529
  # fill table with partitions
527
530
  cols = [
528
531
  query.selected_columns.sys__id,
529
- f.dense_rank().over(order_by=list_partition_by).label(PARTITION_COLUMN_ID),
532
+ f.dense_rank().over(order_by=partition_by).label(PARTITION_COLUMN_ID),
530
533
  ]
531
534
  self.catalog.warehouse.db.execute(
532
535
  tbl.insert().from_select(cols, query.with_only_columns(*cols))
@@ -683,6 +686,12 @@ class SQLClause(Step, ABC):
683
686
 
684
687
  return step_result(q, new_query.selected_columns)
685
688
 
689
+ def parse_cols(
690
+ self,
691
+ cols: Sequence[Union[Function, ColumnElement]],
692
+ ) -> tuple[ColumnElement, ...]:
693
+ return tuple(c.get_column() if isinstance(c, Function) else c for c in cols)
694
+
686
695
  @abstractmethod
687
696
  def apply_sql_clause(self, query):
688
697
  pass
@@ -690,12 +699,14 @@ class SQLClause(Step, ABC):
690
699
 
691
700
  @frozen
692
701
  class SQLSelect(SQLClause):
693
- args: tuple[Union[str, ColumnElement], ...]
702
+ args: tuple[Union[Function, ColumnElement], ...]
694
703
 
695
704
  def apply_sql_clause(self, query) -> Select:
696
705
  subquery = query.subquery()
697
-
698
- args = [subquery.c[str(c)] if isinstance(c, (str, C)) else c for c in self.args]
706
+ args = [
707
+ subquery.c[str(c)] if isinstance(c, (str, C)) else c
708
+ for c in self.parse_cols(self.args)
709
+ ]
699
710
  if not args:
700
711
  args = subquery.c
701
712
 
@@ -704,22 +715,25 @@ class SQLSelect(SQLClause):
704
715
 
705
716
  @frozen
706
717
  class SQLSelectExcept(SQLClause):
707
- args: tuple[str, ...]
718
+ args: tuple[Union[Function, ColumnElement], ...]
708
719
 
709
720
  def apply_sql_clause(self, query: Select) -> Select:
710
721
  subquery = query.subquery()
711
- names = set(self.args)
712
- args = [c for c in subquery.c if c.name not in names]
722
+ args = [c for c in subquery.c if c.name not in set(self.parse_cols(self.args))]
713
723
  return sqlalchemy.select(*args).select_from(subquery)
714
724
 
715
725
 
716
726
  @frozen
717
727
  class SQLMutate(SQLClause):
718
- args: tuple[ColumnElement, ...]
728
+ args: tuple[Union[Function, ColumnElement], ...]
719
729
 
720
730
  def apply_sql_clause(self, query: Select) -> Select:
721
731
  original_subquery = query.subquery()
722
- to_mutate = {c.name for c in self.args}
732
+ args = [
733
+ original_subquery.c[str(c)] if isinstance(c, (str, C)) else c
734
+ for c in self.parse_cols(self.args)
735
+ ]
736
+ to_mutate = {c.name for c in args}
723
737
 
724
738
  prefix = f"mutate{token_hex(8)}_"
725
739
  cols = [
@@ -729,9 +743,7 @@ class SQLMutate(SQLClause):
729
743
  # this is needed for new column to be used in clauses
730
744
  # like ORDER BY, otherwise new column is not recognized
731
745
  subquery = (
732
- sqlalchemy.select(*cols, *self.args)
733
- .select_from(original_subquery)
734
- .subquery()
746
+ sqlalchemy.select(*cols, *args).select_from(original_subquery).subquery()
735
747
  )
736
748
 
737
749
  return sqlalchemy.select(*subquery.c).select_from(subquery)
@@ -739,21 +751,24 @@ class SQLMutate(SQLClause):
739
751
 
740
752
  @frozen
741
753
  class SQLFilter(SQLClause):
742
- expressions: tuple[ColumnElement, ...]
754
+ expressions: tuple[Union[Function, ColumnElement], ...]
743
755
 
744
756
  def __and__(self, other):
745
- return self.__class__(self.expressions + other)
757
+ expressions = self.parse_cols(self.expressions)
758
+ return self.__class__(expressions + other)
746
759
 
747
760
  def apply_sql_clause(self, query: Select) -> Select:
748
- return query.filter(*self.expressions)
761
+ expressions = self.parse_cols(self.expressions)
762
+ return query.filter(*expressions)
749
763
 
750
764
 
751
765
  @frozen
752
766
  class SQLOrderBy(SQLClause):
753
- args: tuple[ColumnElement, ...]
767
+ args: tuple[Union[Function, ColumnElement], ...]
754
768
 
755
769
  def apply_sql_clause(self, query: Select) -> Select:
756
- return query.order_by(*self.args)
770
+ args = self.parse_cols(self.args)
771
+ return query.order_by(*args)
757
772
 
758
773
 
759
774
  @frozen
@@ -948,8 +963,8 @@ class SQLJoin(Step):
948
963
 
949
964
  @frozen
950
965
  class SQLGroupBy(SQLClause):
951
- cols: Sequence[Union[str, ColumnElement]]
952
- group_by: Sequence[Union[str, ColumnElement]]
966
+ cols: Sequence[Union[str, Function, ColumnElement]]
967
+ group_by: Sequence[Union[str, Function, ColumnElement]]
953
968
 
954
969
  def apply_sql_clause(self, query) -> Select:
955
970
  if not self.cols:
@@ -959,12 +974,20 @@ class SQLGroupBy(SQLClause):
959
974
 
960
975
  subquery = query.subquery()
961
976
 
977
+ group_by = [
978
+ c.get_column() if isinstance(c, Function) else c for c in self.group_by
979
+ ]
980
+
962
981
  cols = [
963
- subquery.c[str(c)] if isinstance(c, (str, C)) else c
964
- for c in [*self.group_by, *self.cols]
982
+ c.get_column()
983
+ if isinstance(c, Function)
984
+ else subquery.c[str(c)]
985
+ if isinstance(c, (str, C))
986
+ else c
987
+ for c in (*group_by, *self.cols)
965
988
  ]
966
989
 
967
- return sqlalchemy.select(*cols).select_from(subquery).group_by(*self.group_by)
990
+ return sqlalchemy.select(*cols).select_from(subquery).group_by(*group_by)
968
991
 
969
992
 
970
993
  def _validate_columns(
@@ -178,17 +178,9 @@ class StudioClient:
178
178
  data = {}
179
179
 
180
180
  if not ok:
181
- logger.error(
182
- "Got bad response from Studio, content is %s",
183
- response.content.decode("utf-8"),
184
- )
185
181
  if response.status_code == 403:
186
182
  message = f"Not authorized for the team {self.team}"
187
183
  else:
188
- logger.error(
189
- "Got bad response from Studio, content is %s",
190
- response.content.decode("utf-8"),
191
- )
192
184
  message = data.get("message", "")
193
185
  else:
194
186
  message = ""
@@ -230,6 +222,46 @@ class StudioClient:
230
222
  def ls_datasets(self) -> Response[LsData]:
231
223
  return self._send_request("datachain/ls-datasets", {})
232
224
 
225
+ def edit_dataset(
226
+ self,
227
+ name: str,
228
+ new_name: Optional[str] = None,
229
+ description: Optional[str] = None,
230
+ labels: Optional[list[str]] = None,
231
+ ) -> Response[DatasetInfoData]:
232
+ body = {
233
+ "dataset_name": name,
234
+ }
235
+
236
+ if new_name is not None:
237
+ body["new_name"] = new_name
238
+
239
+ if description is not None:
240
+ body["description"] = description
241
+
242
+ if labels is not None:
243
+ body["labels"] = labels # type: ignore[assignment]
244
+
245
+ return self._send_request(
246
+ "datachain/edit-dataset",
247
+ body,
248
+ )
249
+
250
+ def rm_dataset(
251
+ self,
252
+ name: str,
253
+ version: Optional[int] = None,
254
+ force: Optional[bool] = False,
255
+ ) -> Response[DatasetInfoData]:
256
+ return self._send_request(
257
+ "datachain/rm-dataset",
258
+ {
259
+ "dataset_name": name,
260
+ "version": version,
261
+ "force": force,
262
+ },
263
+ )
264
+
233
265
  def dataset_info(self, name: str) -> Response[DatasetInfoData]:
234
266
  def _parse_dataset_info(dataset_info):
235
267
  _parse_dates(dataset_info, ["created_at", "finished_at"])
datachain/sql/__init__.py CHANGED
@@ -1,13 +1,11 @@
1
1
  from sqlalchemy.sql.elements import literal
2
2
  from sqlalchemy.sql.expression import column
3
3
 
4
- from . import functions
5
4
  from .default import setup as default_setup
6
5
  from .selectable import select, values
7
6
 
8
7
  __all__ = [
9
8
  "column",
10
- "functions",
11
9
  "literal",
12
10
  "select",
13
11
  "values",
@@ -1,26 +0,0 @@
1
- from sqlalchemy.sql.expression import func
2
-
3
- from . import array, path, string
4
- from .aggregate import avg
5
- from .conditional import greatest, least
6
- from .random import rand
7
-
8
- count = func.count
9
- sum = func.sum
10
- min = func.min
11
- max = func.max
12
-
13
- __all__ = [
14
- "array",
15
- "avg",
16
- "count",
17
- "func",
18
- "greatest",
19
- "least",
20
- "max",
21
- "min",
22
- "path",
23
- "rand",
24
- "string",
25
- "sum",
26
- ]
@@ -9,7 +9,9 @@ class Values(selectable.Values):
9
9
  columns = [expression.column(f"c{i}") for i in range(1, num_columns + 1)]
10
10
  else:
11
11
  columns = [
12
- expression.column(c) if isinstance(c, str) else c for c in columns
12
+ process_column_expression(c)
13
+ for c in columns
14
+ # expression.column(c) if isinstance(c, str) else c for c in columns
13
15
  ]
14
16
  super().__init__(*columns, **kwargs)
15
17
  self._data += tuple(data)
@@ -19,13 +21,17 @@ def values(data, columns=None, **kwargs) -> Values:
19
21
  return Values(data, columns=columns, **kwargs)
20
22
 
21
23
 
22
- def process_column_expressions(columns):
23
- return [expression.column(c) if isinstance(c, str) else c for c in columns]
24
+ def process_column_expression(col):
25
+ if hasattr(col, "get_column"):
26
+ return col.get_column()
27
+ if isinstance(col, str):
28
+ return expression.column(col)
29
+ return col
24
30
 
25
31
 
26
32
  def select(*columns, **kwargs) -> "expression.Select":
27
- columns = process_column_expressions(columns)
28
- return expression.select(*columns, **kwargs)
33
+ columns_processed = [process_column_expression(c) for c in columns]
34
+ return expression.select(*columns_processed, **kwargs)
29
35
 
30
36
 
31
37
  def base_values_compiler(column_name_func, element, compiler, **kwargs):
@@ -1,6 +1,7 @@
1
1
  import logging
2
2
  import re
3
3
  import sqlite3
4
+ import warnings
4
5
  from collections.abc import Iterable
5
6
  from datetime import MAXYEAR, MINYEAR, datetime, timezone
6
7
  from types import MappingProxyType
@@ -418,14 +419,22 @@ def compile_collect(element, compiler, **kwargs):
418
419
  return compiler.process(func.json_group_array(*element.clauses.clauses), **kwargs)
419
420
 
420
421
 
421
- def load_usearch_extension(conn) -> bool:
422
+ def load_usearch_extension(conn: sqlite3.Connection) -> bool:
422
423
  try:
423
424
  # usearch is part of the vector optional dependencies
424
425
  # we use the extension's cosine and euclidean distance functions
425
426
  from usearch import sqlite_path
426
427
 
427
428
  conn.enable_load_extension(True)
428
- conn.load_extension(sqlite_path())
429
+
430
+ with warnings.catch_warnings():
431
+ # usearch binary is not available for Windows, see: https://github.com/unum-cloud/usearch/issues/427.
432
+ # and, sometimes fail to download the binary in other platforms
433
+ # triggering UserWarning.
434
+
435
+ warnings.filterwarnings("ignore", category=UserWarning, module="usearch")
436
+ conn.load_extension(sqlite_path())
437
+
429
438
  conn.enable_load_extension(False)
430
439
  return True
431
440
 
datachain/studio.py CHANGED
@@ -130,6 +130,35 @@ def list_datasets(team: Optional[str] = None):
130
130
  yield (name, version)
131
131
 
132
132
 
133
+ def edit_studio_dataset(
134
+ team_name: Optional[str],
135
+ name: str,
136
+ new_name: Optional[str] = None,
137
+ description: Optional[str] = None,
138
+ labels: Optional[list[str]] = None,
139
+ ):
140
+ client = StudioClient(team=team_name)
141
+ response = client.edit_dataset(name, new_name, description, labels)
142
+ if not response.ok:
143
+ raise_remote_error(response.message)
144
+
145
+ print(f"Dataset {name} updated")
146
+
147
+
148
+ def remove_studio_dataset(
149
+ team_name: Optional[str],
150
+ name: str,
151
+ version: Optional[int] = None,
152
+ force: Optional[bool] = False,
153
+ ):
154
+ client = StudioClient(team=team_name)
155
+ response = client.rm_dataset(name, version, force)
156
+ if not response.ok:
157
+ raise_remote_error(response.message)
158
+
159
+ print(f"Dataset {name} removed")
160
+
161
+
133
162
  def save_config(hostname, token):
134
163
  config = Config(ConfigLevel.GLOBAL)
135
164
  with config.edit() as conf:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: datachain
3
- Version: 0.7.1
3
+ Version: 0.7.3
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License: Apache-2.0
@@ -139,7 +139,7 @@ Key Features
139
139
  ============
140
140
 
141
141
  📂 **Multimodal Dataset Versioning.**
142
- - Version unstructured data without redundant data copies, by supporitng
142
+ - Version unstructured data without redundant data copies, by supporting
143
143
  references to S3, GCP, Azure, and local file systems.
144
144
  - Multimodal data support: images, video, text, PDFs, JSONs, CSVs, parquet, etc.
145
145
  - Unite files and metadata together into persistent, versioned, columnar datasets.
@@ -1,30 +1,30 @@
1
- datachain/__init__.py,sha256=OGzc8xZWtwqxiiutjU4AxCRPY0lrX_csgERiTrq4G0o,908
1
+ datachain/__init__.py,sha256=ofPJ6B-d-ybSDRrE7J6wqF_ZRAB2W9U8l-eeuBtqPLg,865
2
2
  datachain/__main__.py,sha256=hG3Y4ARGEqe1AWwNMd259rBlqtphx1Wk39YbueQ0yV8,91
3
3
  datachain/asyn.py,sha256=5aKrjnUxk0mtnZeFKNJd1DCE0MsnSoyJBZkr0y9H_a0,9313
4
4
  datachain/cache.py,sha256=s0YHN7qurmQv-eC265TjeureK84TebWWAnL07cxchZQ,2997
5
- datachain/cli.py,sha256=hdVt_HJumQVgtaBAtBVJm-uPyYVogMXNVLmRcZyWHgk,36677
5
+ datachain/cli.py,sha256=1hiBClE1kbRyx0DK3uX5KMVa0ktbsG6TsFSNvoT2xxs,39399
6
6
  datachain/cli_utils.py,sha256=jrn9ejGXjybeO1ur3fjdSiAyCHZrX0qsLLbJzN9ErPM,2418
7
7
  datachain/config.py,sha256=g8qbNV0vW2VEKpX-dGZ9pAn0DAz6G2ZFcr7SAV3PoSM,4272
8
- datachain/dataset.py,sha256=0IN-5y723y-bnFlieKtOFZLCjwX_yplFo3q0DV7LRPw,14821
8
+ datachain/dataset.py,sha256=-9uPdOn1uWkGucouhsFVGRIuFdWkCdUrhV0U9f6Ihgc,18218
9
9
  datachain/error.py,sha256=bxAAL32lSeMgzsQDEHbGTGORj-mPzzpCRvWDPueJNN4,1092
10
10
  datachain/job.py,sha256=Jt4sNutMHJReaGsj3r3scueN5aESLGfhimAa8pUP7Is,1271
11
11
  datachain/listing.py,sha256=TgKg25ZWAP5enzKgw2_2GUPJVdnQUh6uySHB5SJrUY4,7773
12
12
  datachain/node.py,sha256=o8Sqy92QkzzcLK6XmIFLyDSE6Rw6kUTmGRhEmfLFdhg,5211
13
- datachain/nodes_fetcher.py,sha256=F-73-h19HHNGtHFBGKk7p3mc0ALm4a9zGnzhtuUjnp4,1107
13
+ datachain/nodes_fetcher.py,sha256=ILMzUW5o4_6lUOVrLDC9gJPCXfcgKnMG68plrc7dAOA,1113
14
14
  datachain/nodes_thread_pool.py,sha256=uPo-xl8zG5m9YgODjPFBpbcqqHjI-dcxH87yAbj_qco,3192
15
15
  datachain/progress.py,sha256=5KotcvvzAUL_RF0GEj4JY0IB1lyImnmHxe89YkT1XO4,4330
16
16
  datachain/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
17
- datachain/studio.py,sha256=6kxF7VxPAbh9D7_Bk8_SghS5OXrwUwSpDaw19eNCTP4,4083
17
+ datachain/studio.py,sha256=w41vgVPrBfJ02XQOaDccLbh-1uSAfq9cAgOmkYUqExE,4845
18
18
  datachain/telemetry.py,sha256=0A4IOPPp9VlP5pyW9eBfaTK3YhHGzHl7dQudQjUAx9A,994
19
19
  datachain/utils.py,sha256=-mSFowjIidJ4_sMXInvNHLn4rK_QnHuIlLuH1_lMGmI,13897
20
20
  datachain/catalog/__init__.py,sha256=g2iAAFx_gEIrqshXlhSEbrc8qDaEH11cjU40n3CHDz4,409
21
- datachain/catalog/catalog.py,sha256=J1nUWLI4RYCvvR6fB4neQBtB7V-CTh4PM71irhNmJc4,57817
21
+ datachain/catalog/catalog.py,sha256=l_HAxor5i_F03VvbmMuwhi4INhsmNrqubyydPhXWo2Y,57980
22
22
  datachain/catalog/datasource.py,sha256=D-VWIVDCM10A8sQavLhRXdYSCG7F4o4ifswEF80_NAQ,1412
23
23
  datachain/catalog/loader.py,sha256=-6VelNfXUdgUnwInVyA8g86Boxv2xqhTh9xNS-Zlwig,8242
24
24
  datachain/client/__init__.py,sha256=T4wiYL9KIM0ZZ_UqIyzV8_ufzYlewmizlV4iymHNluE,86
25
25
  datachain/client/azure.py,sha256=ffxs26zm6KLAL1aUWJm-vtzuZP3LSNha7UDGXynMBKo,2234
26
26
  datachain/client/fileslice.py,sha256=bT7TYco1Qe3bqoc8aUkUZcPdPofJDHlryL5BsTn9xsY,3021
27
- datachain/client/fsspec.py,sha256=Ai5m7alkAnv-RWXuLbZ95SKEPaQ3Pyk5ujDy50JDX5w,12692
27
+ datachain/client/fsspec.py,sha256=KDGLhJMnive73hI8GABeP_aQZv1w5M_6rxz6KRRxaHI,12712
28
28
  datachain/client/gcs.py,sha256=cnTIr5GS6dbYOEYfqehhyQu3dr6XNjPHSg5U3FkivUk,4124
29
29
  datachain/client/hf.py,sha256=XeVJVbiNViZCpn3sfb90Fr8SYO3BdLmfE3hOWMoqInE,951
30
30
  datachain/client/local.py,sha256=vwbgCwZ7IqY2voj2l7tLJjgov7Dp--fEUvUwUBsMbls,4457
@@ -33,19 +33,29 @@ datachain/data_storage/__init__.py,sha256=cEOJpyu1JDZtfUupYucCDNFI6e5Wmp_Oyzq6rZ
33
33
  datachain/data_storage/db_engine.py,sha256=81Ol1of9TTTzD97ORajCnP366Xz2mEJt6C-kTUCaru4,3406
34
34
  datachain/data_storage/id_generator.py,sha256=lCEoU0BM37Ai2aRpSbwo5oQT0GqZnSpYwwvizathRMQ,4292
35
35
  datachain/data_storage/job.py,sha256=w-7spowjkOa1P5fUVtJou3OltT0L48P0RYWZ9rSJ9-s,383
36
- datachain/data_storage/metastore.py,sha256=5b7o_CSHC2djottebYn-Hq5q0yaSLOKPIRCnaVRvjsU,36056
37
- datachain/data_storage/schema.py,sha256=scANMQqozita3HjEtq7eupMgh6yYkrZHoXtfuL2RoQg,9879
36
+ datachain/data_storage/metastore.py,sha256=S9pkbAi7yJlU_CTuhB-eTZgzZgkPMhJ5Br90AVLDXsQ,37922
37
+ datachain/data_storage/schema.py,sha256=-QVlRvD0dfu-ZFUxylEoSnLJLnleMEjVlcAb2OGu-AY,9895
38
38
  datachain/data_storage/serializer.py,sha256=6G2YtOFqqDzJf1KbvZraKGXl2XHZyVml2krunWUum5o,927
39
- datachain/data_storage/sqlite.py,sha256=CspRUlYsIcubgzvcQxTACnmcuKESSLZcqCl0dcrtRiA,27471
40
- datachain/data_storage/warehouse.py,sha256=yXNU0U3exzR1E6dqbYYmL4RhXWsbYWVdZ3jONGcVniY,30914
39
+ datachain/data_storage/sqlite.py,sha256=nF-2B-n8YZh9cJlZv4XnbahAJDW6pvrp1h9L-140M7A,27538
40
+ datachain/data_storage/warehouse.py,sha256=kFLhYEFkpsfl65Lr1c4t4HJt3nO1Ez_QQ76aQNN30fc,30966
41
+ datachain/func/__init__.py,sha256=4VUt5BaLdBAl_BnAku0Jb8plqd7kDOiYrQTMG3pN0c4,794
42
+ datachain/func/aggregate.py,sha256=7_IPrIwb2XSs3zG4iOr1eTvzn6kNVe2mkzvNzjusDHk,10942
43
+ datachain/func/array.py,sha256=zHDNWuWLA7HVa9FEvQeHhVi00_xqenyleTqcLwkXWBI,5477
44
+ datachain/func/base.py,sha256=wA0sBQAVyN9LPxoo7Ox83peS0zUVnyuKxukwAcjGLfY,534
45
+ datachain/func/conditional.py,sha256=mQroxsoExpBW84Zm5dAYP4OpBblWmzfnF2qJq9rba54,2223
46
+ datachain/func/func.py,sha256=9wqdxxisoDL0w8qKGQmL6sNdgJeIOzotEUPlxu9t2IQ,12326
47
+ datachain/func/path.py,sha256=mqN_mfkwv44z2II7DMTp_fGGw95hmTCNls_TOFNpr4k,3155
48
+ datachain/func/random.py,sha256=pENOLj9rSmWfGCnOsUIaCsVC5486zQb66qfQvXaz9Z4,452
49
+ datachain/func/string.py,sha256=NQzaXXYu7yb72HPADy4WrFlcgvTS77L9x7-qvCKJtnk,4522
50
+ datachain/func/window.py,sha256=0MB1yjpVbwOrl_WNLZ8V3jkJz3o0XlYinpAcZQJuxiA,1688
41
51
  datachain/lib/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
42
- datachain/lib/arrow.py,sha256=-hu9tic79a01SY2UBqkA3U6wUr6tnE3T3q5q_BnO93A,9156
52
+ datachain/lib/arrow.py,sha256=b5efxAUaNNYVwtXVJqj07D3zf5KC-BPlLCxKEZbEG6w,9429
43
53
  datachain/lib/clip.py,sha256=lm5CzVi4Cj1jVLEKvERKArb-egb9j1Ls-fwTItT6vlI,6150
44
- datachain/lib/data_model.py,sha256=dau4AlZBhOFvF7pEKMeqCeRkcFFg5KFvTBWW_2CdH5g,2371
45
- datachain/lib/dataset_info.py,sha256=q0EW9tj5jXGSD9Lzct9zbH4P1lfIGd_cIWqhnMxv7Q0,2464
46
- datachain/lib/dc.py,sha256=u0RQJPG0zwxsoYS-4wrbDBPuLYZajwIi1YX37khKfkI,87942
54
+ datachain/lib/data_model.py,sha256=zS4lmXHVBXc9ntcyea2a1CRLXGSAN_0glXcF88CohgY,2685
55
+ datachain/lib/dataset_info.py,sha256=IjdF1E0TQNOq9YyynfWiCFTeZpbyGfyJvxgJY4YN810,2493
56
+ datachain/lib/dc.py,sha256=J7liATKQBJCkeHanVLr0s3d1t5wxiiiSJuSbuxKBbLg,89527
47
57
  datachain/lib/file.py,sha256=-XMkL6ED1sE7TMhWoMRTEuOXswZJw8X6AEmJDONFP74,15019
48
- datachain/lib/hf.py,sha256=BW2NPpqxkpPwkSaGlppT8Rbs8zPpyYC-tR6htY08c-0,5817
58
+ datachain/lib/hf.py,sha256=a-zFpDmZIR4r8dlNNTjfpAKSnuJ9xyRXlgcdENiXt3E,5864
49
59
  datachain/lib/image.py,sha256=AMXYwQsmarZjRbPCZY3M1jDsM2WAB_b3cTY4uOIuXNU,2675
50
60
  datachain/lib/listing.py,sha256=cVkCp7TRVpcZKSx-Bbk9t51bQI9Mw0o86W6ZPhAsuzM,3667
51
61
  datachain/lib/listing_info.py,sha256=9ua40Hw0aiQByUw3oAEeNzMavJYfW0Uhe8YdCTK-m_g,1110
@@ -53,35 +63,32 @@ datachain/lib/meta_formats.py,sha256=anK2bDVbaeCCh0yvKUBaW2MVos3zRgdaSV8uSduzPcU
53
63
  datachain/lib/model_store.py,sha256=DNIv8Y6Jtk1_idNLzIpsThOsdW2BMAudyUCbPUcgcxk,2515
54
64
  datachain/lib/pytorch.py,sha256=W-ARi2xH1f1DUkVfRuerW-YWYgSaJASmNCxtz2lrJGI,6072
55
65
  datachain/lib/settings.py,sha256=ZELRCTLbi5vzRPiDX6cQ9LLg9TefJ_A05gIGni0lll8,2535
56
- datachain/lib/signal_schema.py,sha256=xwkE5bxJxUhZTjrA6jqN87XbSXPikCbL6eOPL9WyrKM,24556
66
+ datachain/lib/signal_schema.py,sha256=_uh19nCKhiD9ua8oIN1Q8R9iYv1BZAuqTJCLYVmyW8k,24557
57
67
  datachain/lib/tar.py,sha256=3WIzao6yD5fbLqXLTt9GhPGNonbFIs_fDRu-9vgLgsA,1038
58
68
  datachain/lib/text.py,sha256=UNHm8fhidk7wdrWqacEWaA6I9ykfYqarQ2URby7jc7M,1261
59
69
  datachain/lib/udf.py,sha256=-j0krjNAELTqRI0dB1N65AmawtcIY5vN---AuUcW8Us,13637
60
70
  datachain/lib/udf_signature.py,sha256=GXw24A-Olna6DWCdgy2bC-gZh_gLGPQ-KvjuI6pUjC0,7281
61
- datachain/lib/utils.py,sha256=6NwgWLl5JrgtD4rsSFEe-yR2ntEwJMJEtAZ3FIxK3fg,1529
71
+ datachain/lib/utils.py,sha256=om-MCiyYwvPHtFq3V2rBKrRDNkio9XXofj7RsUIlHKU,1586
62
72
  datachain/lib/vfile.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
63
73
  datachain/lib/webdataset.py,sha256=o7SHk5HOUWsZ5Ln04xOM04eQqiBHiJNO7xLgyVBrwo8,6924
64
- datachain/lib/webdataset_laion.py,sha256=aGMWeFmeYNK75ewO9JTA11iB1i3QtTzUfenQA5jajfo,2535
74
+ datachain/lib/webdataset_laion.py,sha256=xvT6m_r5y0KbOx14BUe7UC5mOgrktJq53Mh-H0EVlUE,2525
65
75
  datachain/lib/convert/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
66
76
  datachain/lib/convert/flatten.py,sha256=Uebc5CeqCsacp-nr6IG9i6OGuUavXqdqnoGctZBk3RQ,1384
67
77
  datachain/lib/convert/python_to_sql.py,sha256=40SAOdoOgikZRhn8iomCPDRoxC3RFxjJLivEAA9MHDU,2880
68
78
  datachain/lib/convert/sql_to_python.py,sha256=XXCBYDQFUXJIBNWkjEP944cnCfJ8GF2Tji0DLF3A_zQ,315
69
79
  datachain/lib/convert/unflatten.py,sha256=Ogvh_5wg2f38_At_1lN0D_e2uZOOpYEvwvB2xdq56Tw,2012
70
80
  datachain/lib/convert/values_to_tuples.py,sha256=varRCnSMT_pZmHznrd2Yi05qXLLz_v9YH_pOCpHSkdc,3921
71
- datachain/lib/func/__init__.py,sha256=wlAKhGV0QDg9y7reSwoUF8Vicfqh_YOUNIXLzxICGz4,403
72
- datachain/lib/func/aggregate.py,sha256=H1ziFQdaK9zvnxvttfnEzkkyGvEEmMAvmgCsBV6nfm8,10917
73
- datachain/lib/func/func.py,sha256=HAJZ_tpiRG2R-et7pr0WnoyNZYtpbPn3_HBuL3RQpbU,4800
74
81
  datachain/model/__init__.py,sha256=R9faX5OHV1xh2EW-g2MPedwbtEqt3LodJRyluB-QylI,189
75
- datachain/model/bbox.py,sha256=LLtzc8OiL-cxqqlPWXA4MFTo8HRS3GW2gOxA0Sf_cxI,3158
76
- datachain/model/pose.py,sha256=0URrnS99Ugq0yspCXC2z-hgpybEA5tWLJXpxqVLnAlI,3088
77
- datachain/model/segment.py,sha256=dMxtm-05fNseEoEKpZj9iDN7fwGK1udyAreN-V-cRks,1597
82
+ datachain/model/bbox.py,sha256=1Li1G3RdiQwLOAc2Mak2nQU0bcvdH-lXmXtA984CUWM,3154
83
+ datachain/model/pose.py,sha256=q9NgB8h66aKnYnLi7Pyf9bU-F_90W4cbvtSO3-_hkdk,3078
84
+ datachain/model/segment.py,sha256=iRWf0KieXfSM1eGD9Y7THx8L_EMB79Sk8WVebs3xSbQ,1593
78
85
  datachain/model/ultralytics/__init__.py,sha256=EvcNX9qUyxKXXlKCPpsXeRrabyXk5E9EkN-tyiYkfS4,750
79
86
  datachain/model/ultralytics/bbox.py,sha256=OZ9XBdyMOYc401P-RhfSN9QaYvMpnx2Phu9ptaJgZBY,4316
80
87
  datachain/model/ultralytics/pose.py,sha256=71KBTcoST2wcEtsyGXqLVpvUtqbp9gwZGA15pEPtX5A,2959
81
88
  datachain/model/ultralytics/segment.py,sha256=Z1ab0tZRJubSYNH4KkFlzhYeGNTfAyC71KmkQcToHDQ,2760
82
89
  datachain/query/__init__.py,sha256=7DhEIjAA8uZJfejruAVMZVcGFmvUpffuZJwgRqNwe-c,263
83
90
  datachain/query/batch.py,sha256=5fEhORFe7li12SdYddaSK3LyqksMfCHhwN1_A6TfsA4,3485
84
- datachain/query/dataset.py,sha256=sQny-ZemB2HueC4mPg-7qSaqUD85MMO-DQyVVP8K1CA,53765
91
+ datachain/query/dataset.py,sha256=bQVG4WnJfBQpvnxouIdDlsJF2gB8V4lDp4Zu9JeZ-rc,54771
85
92
  datachain/query/dispatch.py,sha256=wjjTWw6sFQbB9SKRh78VbfvwSMgJXCfqJklS3-9KnCU,12025
86
93
  datachain/query/metrics.py,sha256=r5b0ygYhokbXp8Mg3kCH8iFSRw0jxzyeBe-C-J_bKFc,938
87
94
  datachain/query/params.py,sha256=O_j89mjYRLOwWNhYZl-z7mi-rkdP7WyFmaDufsdTryE,863
@@ -89,14 +96,14 @@ datachain/query/queue.py,sha256=waqM_KzavU8C-G95-4211Nd4GXna_u2747Chgwtgz2w,3839
89
96
  datachain/query/schema.py,sha256=b_KnVy6B26Ol4nYG0LqNNpeQ1QYPk95YRGUjXfdaQWs,6606
90
97
  datachain/query/session.py,sha256=50SOdLNCjqHHKI-L4xGXyzTVxzMWfANqKqjeYre-c2k,5959
91
98
  datachain/remote/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
92
- datachain/remote/studio.py,sha256=g88kHdlRhmruiWwoIxq_JJoymZUrtMAL937NWQyWyXI,9209
93
- datachain/sql/__init__.py,sha256=A2djrbQwSMUZZEIKGnm-mnRA-NDSbiDJNpAmmwGNyIo,303
94
- datachain/sql/selectable.py,sha256=fBM-wS1TUA42kVEAAiwqGtibIevyZAEritwt8PZGyLQ,1589
99
+ datachain/remote/studio.py,sha256=z9DTDqfdWKT8MC23wRDTOHvI8hc_OySS1Ce3F617gjA,9906
100
+ datachain/sql/__init__.py,sha256=6SQRdbljO3d2hx3EAVXEZrHQKv5jth0Jh98PogT59No,262
101
+ datachain/sql/selectable.py,sha256=cTc60qVoAwqqss0Vop8Lt5Z-ROnM1XrQmL_GLjRxhXs,1765
95
102
  datachain/sql/types.py,sha256=ASSPkmM5EzdRindqj2O7WHLXq8VHAgFYedG8lYfGvVI,14045
96
103
  datachain/sql/utils.py,sha256=rzlJw08etivdrcuQPqNVvVWhuVSyUPUQEEc6DOhu258,818
97
104
  datachain/sql/default/__init__.py,sha256=XQ2cEZpzWiABqjV-6yYHUBGI9vN_UHxbxZENESmVAWw,45
98
105
  datachain/sql/default/base.py,sha256=QD-31C6JnyOXzogyDx90sUhm7QvgXIYpeHEASH84igU,628
99
- datachain/sql/functions/__init__.py,sha256=-vIkU0AqwOW5FX6P89xYl-uBIUdt46CEnCtshmN85gM,400
106
+ datachain/sql/functions/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
100
107
  datachain/sql/functions/aggregate.py,sha256=3AQdA8YHPFdtCEfwZKQXTT8SlQWdG9gD5PBtGN3Odqs,944
101
108
  datachain/sql/functions/array.py,sha256=rvH27SWN9gdh_mFnp0GIiXuCrNW6n8ZbY4I_JUS-_e0,1140
102
109
  datachain/sql/functions/conditional.py,sha256=q7YUKfunXeEldXaxgT-p5pUTcOEVU_tcQ2BJlquTRPs,207
@@ -104,15 +111,15 @@ datachain/sql/functions/path.py,sha256=zixpERotTFP6LZ7I4TiGtyRA8kXOoZmH1yzH9oRW0
104
111
  datachain/sql/functions/random.py,sha256=vBwEEj98VH4LjWixUCygQ5Bz1mv1nohsCG0-ZTELlVg,271
105
112
  datachain/sql/functions/string.py,sha256=DYgiw8XSk7ge7GXvyRI1zbaMruIizNeI-puOjriQGZQ,1148
106
113
  datachain/sql/sqlite/__init__.py,sha256=TAdJX0Bg28XdqPO-QwUVKy8rg78cgMileHvMNot7d04,166
107
- datachain/sql/sqlite/base.py,sha256=aHSZVvh4XSVkvZ07h3jMoRlHI4sWD8y3SnmGs9xMG9Y,14375
114
+ datachain/sql/sqlite/base.py,sha256=X4iEynOAqqvqz8lmgUKvURleKO6aguULgG8RoufKrSk,14772
108
115
  datachain/sql/sqlite/types.py,sha256=lPXS1XbkmUtlkkiRxy_A_UzsgpPv2VSkXYOD4zIHM4w,1734
109
116
  datachain/sql/sqlite/vector.py,sha256=ncW4eu2FlJhrP_CIpsvtkUabZlQdl2D5Lgwy_cbfqR0,469
110
117
  datachain/toolkit/__init__.py,sha256=eQ58Q5Yf_Fgv1ZG0IO5dpB4jmP90rk8YxUWmPc1M2Bo,68
111
118
  datachain/toolkit/split.py,sha256=ZgDcrNiKiPXZmKD591_1z9qRIXitu5zwAsoVPB7ykiU,2508
112
119
  datachain/torch/__init__.py,sha256=gIS74PoEPy4TB3X6vx9nLO0Y3sLJzsA8ckn8pRWihJM,579
113
- datachain-0.7.1.dist-info/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
114
- datachain-0.7.1.dist-info/METADATA,sha256=9ICI9nDBKNq39JJR2q_RxuYBCFkUD4o81T2FEO8LKDU,18006
115
- datachain-0.7.1.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
116
- datachain-0.7.1.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
117
- datachain-0.7.1.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
118
- datachain-0.7.1.dist-info/RECORD,,
120
+ datachain-0.7.3.dist-info/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
121
+ datachain-0.7.3.dist-info/METADATA,sha256=E1-nP4rZghwCV5kSS09620YEJdwaTAiVpI5DmmRnZy0,18006
122
+ datachain-0.7.3.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
123
+ datachain-0.7.3.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
124
+ datachain-0.7.3.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
125
+ datachain-0.7.3.dist-info/RECORD,,
@@ -1,32 +0,0 @@
1
- from .aggregate import (
2
- any_value,
3
- avg,
4
- collect,
5
- concat,
6
- count,
7
- dense_rank,
8
- first,
9
- max,
10
- min,
11
- rank,
12
- row_number,
13
- sum,
14
- )
15
- from .func import Func, window
16
-
17
- __all__ = [
18
- "Func",
19
- "any_value",
20
- "avg",
21
- "collect",
22
- "concat",
23
- "count",
24
- "dense_rank",
25
- "first",
26
- "max",
27
- "min",
28
- "rank",
29
- "row_number",
30
- "sum",
31
- "window",
32
- ]