datachain 0.7.0__py3-none-any.whl → 0.7.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datachain might be problematic. Click here for more details.

Files changed (52) hide show
  1. datachain/__init__.py +0 -3
  2. datachain/catalog/catalog.py +8 -6
  3. datachain/cli.py +1 -1
  4. datachain/client/fsspec.py +9 -9
  5. datachain/data_storage/schema.py +2 -2
  6. datachain/data_storage/sqlite.py +5 -4
  7. datachain/data_storage/warehouse.py +18 -18
  8. datachain/func/__init__.py +49 -0
  9. datachain/{lib/func → func}/aggregate.py +13 -11
  10. datachain/func/array.py +176 -0
  11. datachain/func/base.py +23 -0
  12. datachain/func/conditional.py +81 -0
  13. datachain/func/func.py +384 -0
  14. datachain/func/path.py +110 -0
  15. datachain/func/random.py +23 -0
  16. datachain/func/string.py +154 -0
  17. datachain/func/window.py +49 -0
  18. datachain/lib/arrow.py +24 -12
  19. datachain/lib/data_model.py +25 -9
  20. datachain/lib/dataset_info.py +2 -2
  21. datachain/lib/dc.py +94 -56
  22. datachain/lib/hf.py +1 -1
  23. datachain/lib/signal_schema.py +1 -1
  24. datachain/lib/utils.py +1 -0
  25. datachain/lib/webdataset_laion.py +5 -5
  26. datachain/model/__init__.py +6 -0
  27. datachain/model/bbox.py +102 -0
  28. datachain/model/pose.py +88 -0
  29. datachain/model/segment.py +47 -0
  30. datachain/model/ultralytics/__init__.py +27 -0
  31. datachain/model/ultralytics/bbox.py +147 -0
  32. datachain/model/ultralytics/pose.py +113 -0
  33. datachain/model/ultralytics/segment.py +91 -0
  34. datachain/nodes_fetcher.py +2 -2
  35. datachain/query/dataset.py +57 -34
  36. datachain/sql/__init__.py +0 -2
  37. datachain/sql/functions/__init__.py +0 -26
  38. datachain/sql/selectable.py +11 -5
  39. datachain/sql/sqlite/base.py +11 -2
  40. datachain/toolkit/split.py +6 -2
  41. {datachain-0.7.0.dist-info → datachain-0.7.2.dist-info}/METADATA +72 -71
  42. {datachain-0.7.0.dist-info → datachain-0.7.2.dist-info}/RECORD +46 -35
  43. {datachain-0.7.0.dist-info → datachain-0.7.2.dist-info}/WHEEL +1 -1
  44. datachain/lib/func/__init__.py +0 -32
  45. datachain/lib/func/func.py +0 -152
  46. datachain/lib/models/__init__.py +0 -5
  47. datachain/lib/models/bbox.py +0 -45
  48. datachain/lib/models/pose.py +0 -37
  49. datachain/lib/models/yolo.py +0 -39
  50. {datachain-0.7.0.dist-info → datachain-0.7.2.dist-info}/LICENSE +0 -0
  51. {datachain-0.7.0.dist-info → datachain-0.7.2.dist-info}/entry_points.txt +0 -0
  52. {datachain-0.7.0.dist-info → datachain-0.7.2.dist-info}/top_level.txt +0 -0
@@ -43,9 +43,10 @@ from datachain.data_storage.schema import (
43
43
  )
44
44
  from datachain.dataset import DatasetStatus, RowDict
45
45
  from datachain.error import DatasetNotFoundError, QueryScriptCancelError
46
+ from datachain.func.base import Function
46
47
  from datachain.lib.udf import UDFAdapter
47
48
  from datachain.progress import CombinedDownloadCallback
48
- from datachain.sql.functions import rand
49
+ from datachain.sql.functions.random import rand
49
50
  from datachain.utils import (
50
51
  batched,
51
52
  determine_processes,
@@ -65,15 +66,16 @@ if TYPE_CHECKING:
65
66
  from datachain.catalog import Catalog
66
67
  from datachain.data_storage import AbstractWarehouse
67
68
  from datachain.dataset import DatasetRecord
68
-
69
- from .udf import UDFResult
69
+ from datachain.lib.udf import UDFResult
70
70
 
71
71
  P = ParamSpec("P")
72
72
 
73
73
 
74
74
  INSERT_BATCH_SIZE = 10000
75
75
 
76
- PartitionByType = Union[ColumnElement, Sequence[ColumnElement]]
76
+ PartitionByType = Union[
77
+ Function, ColumnElement, Sequence[Union[Function, ColumnElement]]
78
+ ]
77
79
  JoinPredicateType = Union[str, ColumnClause, ColumnElement]
78
80
  DatasetDependencyType = tuple[str, int]
79
81
 
@@ -457,18 +459,15 @@ class UDFStep(Step, ABC):
457
459
  # Run the UDFDispatcher in another process to avoid needing
458
460
  # if __name__ == '__main__': in user scripts
459
461
  exec_cmd = get_datachain_executable()
462
+ cmd = [*exec_cmd, "internal-run-udf"]
460
463
  envs = dict(os.environ)
461
464
  envs.update({"PYTHONPATH": os.getcwd()})
462
465
  process_data = filtered_cloudpickle_dumps(udf_info)
463
- result = subprocess.run( # noqa: S603
464
- [*exec_cmd, "internal-run-udf"],
465
- input=process_data,
466
- check=False,
467
- env=envs,
468
- )
469
- if result.returncode != 0:
470
- raise RuntimeError("UDF Execution Failed!")
471
466
 
467
+ with subprocess.Popen(cmd, env=envs, stdin=subprocess.PIPE) as process: # noqa: S603
468
+ process.communicate(process_data)
469
+ if process.poll():
470
+ raise RuntimeError("UDF Execution Failed!")
472
471
  else:
473
472
  # Otherwise process single-threaded (faster for smaller UDFs)
474
473
  warehouse = self.catalog.warehouse
@@ -520,13 +519,17 @@ class UDFStep(Step, ABC):
520
519
  else:
521
520
  list_partition_by = [self.partition_by]
522
521
 
522
+ partition_by = [
523
+ p.get_column() if isinstance(p, Function) else p for p in list_partition_by
524
+ ]
525
+
523
526
  # create table with partitions
524
527
  tbl = self.catalog.warehouse.create_udf_table(partition_columns())
525
528
 
526
529
  # fill table with partitions
527
530
  cols = [
528
531
  query.selected_columns.sys__id,
529
- f.dense_rank().over(order_by=list_partition_by).label(PARTITION_COLUMN_ID),
532
+ f.dense_rank().over(order_by=partition_by).label(PARTITION_COLUMN_ID),
530
533
  ]
531
534
  self.catalog.warehouse.db.execute(
532
535
  tbl.insert().from_select(cols, query.with_only_columns(*cols))
@@ -683,6 +686,12 @@ class SQLClause(Step, ABC):
683
686
 
684
687
  return step_result(q, new_query.selected_columns)
685
688
 
689
+ def parse_cols(
690
+ self,
691
+ cols: Sequence[Union[Function, ColumnElement]],
692
+ ) -> tuple[ColumnElement, ...]:
693
+ return tuple(c.get_column() if isinstance(c, Function) else c for c in cols)
694
+
686
695
  @abstractmethod
687
696
  def apply_sql_clause(self, query):
688
697
  pass
@@ -690,12 +699,14 @@ class SQLClause(Step, ABC):
690
699
 
691
700
  @frozen
692
701
  class SQLSelect(SQLClause):
693
- args: tuple[Union[str, ColumnElement], ...]
702
+ args: tuple[Union[Function, ColumnElement], ...]
694
703
 
695
704
  def apply_sql_clause(self, query) -> Select:
696
705
  subquery = query.subquery()
697
-
698
- args = [subquery.c[str(c)] if isinstance(c, (str, C)) else c for c in self.args]
706
+ args = [
707
+ subquery.c[str(c)] if isinstance(c, (str, C)) else c
708
+ for c in self.parse_cols(self.args)
709
+ ]
699
710
  if not args:
700
711
  args = subquery.c
701
712
 
@@ -704,22 +715,25 @@ class SQLSelect(SQLClause):
704
715
 
705
716
  @frozen
706
717
  class SQLSelectExcept(SQLClause):
707
- args: tuple[str, ...]
718
+ args: tuple[Union[Function, ColumnElement], ...]
708
719
 
709
720
  def apply_sql_clause(self, query: Select) -> Select:
710
721
  subquery = query.subquery()
711
- names = set(self.args)
712
- args = [c for c in subquery.c if c.name not in names]
722
+ args = [c for c in subquery.c if c.name not in set(self.parse_cols(self.args))]
713
723
  return sqlalchemy.select(*args).select_from(subquery)
714
724
 
715
725
 
716
726
  @frozen
717
727
  class SQLMutate(SQLClause):
718
- args: tuple[ColumnElement, ...]
728
+ args: tuple[Union[Function, ColumnElement], ...]
719
729
 
720
730
  def apply_sql_clause(self, query: Select) -> Select:
721
731
  original_subquery = query.subquery()
722
- to_mutate = {c.name for c in self.args}
732
+ args = [
733
+ original_subquery.c[str(c)] if isinstance(c, (str, C)) else c
734
+ for c in self.parse_cols(self.args)
735
+ ]
736
+ to_mutate = {c.name for c in args}
723
737
 
724
738
  prefix = f"mutate{token_hex(8)}_"
725
739
  cols = [
@@ -729,9 +743,7 @@ class SQLMutate(SQLClause):
729
743
  # this is needed for new column to be used in clauses
730
744
  # like ORDER BY, otherwise new column is not recognized
731
745
  subquery = (
732
- sqlalchemy.select(*cols, *self.args)
733
- .select_from(original_subquery)
734
- .subquery()
746
+ sqlalchemy.select(*cols, *args).select_from(original_subquery).subquery()
735
747
  )
736
748
 
737
749
  return sqlalchemy.select(*subquery.c).select_from(subquery)
@@ -739,21 +751,24 @@ class SQLMutate(SQLClause):
739
751
 
740
752
  @frozen
741
753
  class SQLFilter(SQLClause):
742
- expressions: tuple[ColumnElement, ...]
754
+ expressions: tuple[Union[Function, ColumnElement], ...]
743
755
 
744
756
  def __and__(self, other):
745
- return self.__class__(self.expressions + other)
757
+ expressions = self.parse_cols(self.expressions)
758
+ return self.__class__(expressions + other)
746
759
 
747
760
  def apply_sql_clause(self, query: Select) -> Select:
748
- return query.filter(*self.expressions)
761
+ expressions = self.parse_cols(self.expressions)
762
+ return query.filter(*expressions)
749
763
 
750
764
 
751
765
  @frozen
752
766
  class SQLOrderBy(SQLClause):
753
- args: tuple[ColumnElement, ...]
767
+ args: tuple[Union[Function, ColumnElement], ...]
754
768
 
755
769
  def apply_sql_clause(self, query: Select) -> Select:
756
- return query.order_by(*self.args)
770
+ args = self.parse_cols(self.args)
771
+ return query.order_by(*args)
757
772
 
758
773
 
759
774
  @frozen
@@ -948,8 +963,8 @@ class SQLJoin(Step):
948
963
 
949
964
  @frozen
950
965
  class SQLGroupBy(SQLClause):
951
- cols: Sequence[Union[str, ColumnElement]]
952
- group_by: Sequence[Union[str, ColumnElement]]
966
+ cols: Sequence[Union[str, Function, ColumnElement]]
967
+ group_by: Sequence[Union[str, Function, ColumnElement]]
953
968
 
954
969
  def apply_sql_clause(self, query) -> Select:
955
970
  if not self.cols:
@@ -959,12 +974,20 @@ class SQLGroupBy(SQLClause):
959
974
 
960
975
  subquery = query.subquery()
961
976
 
977
+ group_by = [
978
+ c.get_column() if isinstance(c, Function) else c for c in self.group_by
979
+ ]
980
+
962
981
  cols = [
963
- subquery.c[str(c)] if isinstance(c, (str, C)) else c
964
- for c in [*self.group_by, *self.cols]
982
+ c.get_column()
983
+ if isinstance(c, Function)
984
+ else subquery.c[str(c)]
985
+ if isinstance(c, (str, C))
986
+ else c
987
+ for c in (*group_by, *self.cols)
965
988
  ]
966
989
 
967
- return sqlalchemy.select(*cols).select_from(subquery).group_by(*self.group_by)
990
+ return sqlalchemy.select(*cols).select_from(subquery).group_by(*group_by)
968
991
 
969
992
 
970
993
  def _validate_columns(
datachain/sql/__init__.py CHANGED
@@ -1,13 +1,11 @@
1
1
  from sqlalchemy.sql.elements import literal
2
2
  from sqlalchemy.sql.expression import column
3
3
 
4
- from . import functions
5
4
  from .default import setup as default_setup
6
5
  from .selectable import select, values
7
6
 
8
7
  __all__ = [
9
8
  "column",
10
- "functions",
11
9
  "literal",
12
10
  "select",
13
11
  "values",
@@ -1,26 +0,0 @@
1
- from sqlalchemy.sql.expression import func
2
-
3
- from . import array, path, string
4
- from .aggregate import avg
5
- from .conditional import greatest, least
6
- from .random import rand
7
-
8
- count = func.count
9
- sum = func.sum
10
- min = func.min
11
- max = func.max
12
-
13
- __all__ = [
14
- "array",
15
- "avg",
16
- "count",
17
- "func",
18
- "greatest",
19
- "least",
20
- "max",
21
- "min",
22
- "path",
23
- "rand",
24
- "string",
25
- "sum",
26
- ]
@@ -9,7 +9,9 @@ class Values(selectable.Values):
9
9
  columns = [expression.column(f"c{i}") for i in range(1, num_columns + 1)]
10
10
  else:
11
11
  columns = [
12
- expression.column(c) if isinstance(c, str) else c for c in columns
12
+ process_column_expression(c)
13
+ for c in columns
14
+ # expression.column(c) if isinstance(c, str) else c for c in columns
13
15
  ]
14
16
  super().__init__(*columns, **kwargs)
15
17
  self._data += tuple(data)
@@ -19,13 +21,17 @@ def values(data, columns=None, **kwargs) -> Values:
19
21
  return Values(data, columns=columns, **kwargs)
20
22
 
21
23
 
22
- def process_column_expressions(columns):
23
- return [expression.column(c) if isinstance(c, str) else c for c in columns]
24
+ def process_column_expression(col):
25
+ if hasattr(col, "get_column"):
26
+ return col.get_column()
27
+ if isinstance(col, str):
28
+ return expression.column(col)
29
+ return col
24
30
 
25
31
 
26
32
  def select(*columns, **kwargs) -> "expression.Select":
27
- columns = process_column_expressions(columns)
28
- return expression.select(*columns, **kwargs)
33
+ columns_processed = [process_column_expression(c) for c in columns]
34
+ return expression.select(*columns_processed, **kwargs)
29
35
 
30
36
 
31
37
  def base_values_compiler(column_name_func, element, compiler, **kwargs):
@@ -1,6 +1,7 @@
1
1
  import logging
2
2
  import re
3
3
  import sqlite3
4
+ import warnings
4
5
  from collections.abc import Iterable
5
6
  from datetime import MAXYEAR, MINYEAR, datetime, timezone
6
7
  from types import MappingProxyType
@@ -418,14 +419,22 @@ def compile_collect(element, compiler, **kwargs):
418
419
  return compiler.process(func.json_group_array(*element.clauses.clauses), **kwargs)
419
420
 
420
421
 
421
- def load_usearch_extension(conn) -> bool:
422
+ def load_usearch_extension(conn: sqlite3.Connection) -> bool:
422
423
  try:
423
424
  # usearch is part of the vector optional dependencies
424
425
  # we use the extension's cosine and euclidean distance functions
425
426
  from usearch import sqlite_path
426
427
 
427
428
  conn.enable_load_extension(True)
428
- conn.load_extension(sqlite_path())
429
+
430
+ with warnings.catch_warnings():
431
+ # usearch binary is not available for Windows, see: https://github.com/unum-cloud/usearch/issues/427.
432
+ # and, sometimes fail to download the binary in other platforms
433
+ # triggering UserWarning.
434
+
435
+ warnings.filterwarnings("ignore", category=UserWarning, module="usearch")
436
+ conn.load_extension(sqlite_path())
437
+
429
438
  conn.enable_load_extension(False)
430
439
  return True
431
440
 
@@ -58,10 +58,14 @@ def train_test_split(dc: DataChain, weights: list[float]) -> list[DataChain]:
58
58
 
59
59
  weights_normalized = [weight / sum(weights) for weight in weights]
60
60
 
61
+ resolution = 2**31 - 1 # Maximum positive value for a 32-bit signed integer.
62
+
61
63
  return [
62
64
  dc.filter(
63
- C("sys__rand") % 1000 >= round(sum(weights_normalized[:index]) * 1000),
64
- C("sys__rand") % 1000 < round(sum(weights_normalized[: index + 1]) * 1000),
65
+ C("sys__rand") % resolution
66
+ >= round(sum(weights_normalized[:index]) * resolution),
67
+ C("sys__rand") % resolution
68
+ < round(sum(weights_normalized[: index + 1]) * resolution),
65
69
  )
66
70
  for index, _ in enumerate(weights_normalized)
67
71
  ]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: datachain
3
- Version: 0.7.0
3
+ Version: 0.7.2
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License: Apache-2.0
@@ -19,85 +19,86 @@ License-File: LICENSE
19
19
  Requires-Dist: pyyaml
20
20
  Requires-Dist: tomlkit
21
21
  Requires-Dist: tqdm
22
- Requires-Dist: numpy <3,>=1
23
- Requires-Dist: pandas >=2.0.0
22
+ Requires-Dist: numpy<3,>=1
23
+ Requires-Dist: pandas>=2.0.0
24
24
  Requires-Dist: pyarrow
25
25
  Requires-Dist: typing-extensions
26
- Requires-Dist: python-dateutil >=2
27
- Requires-Dist: attrs >=21.3.0
28
- Requires-Dist: s3fs >=2024.2.0
29
- Requires-Dist: gcsfs >=2024.2.0
30
- Requires-Dist: adlfs >=2024.2.0
31
- Requires-Dist: dvc-data <4,>=3.10
32
- Requires-Dist: dvc-objects <6,>=4
33
- Requires-Dist: shtab <2,>=1.3.4
34
- Requires-Dist: sqlalchemy >=2
35
- Requires-Dist: multiprocess ==0.70.16
26
+ Requires-Dist: python-dateutil>=2
27
+ Requires-Dist: attrs>=21.3.0
28
+ Requires-Dist: s3fs>=2024.2.0
29
+ Requires-Dist: gcsfs>=2024.2.0
30
+ Requires-Dist: adlfs>=2024.2.0
31
+ Requires-Dist: dvc-data<4,>=3.10
32
+ Requires-Dist: dvc-objects<6,>=4
33
+ Requires-Dist: shtab<2,>=1.3.4
34
+ Requires-Dist: sqlalchemy>=2
35
+ Requires-Dist: multiprocess==0.70.16
36
36
  Requires-Dist: cloudpickle
37
- Requires-Dist: orjson >=3.10.5
38
- Requires-Dist: pydantic <3,>=2
39
- Requires-Dist: jmespath >=1.0
40
- Requires-Dist: datamodel-code-generator >=0.25
41
- Requires-Dist: Pillow <12,>=10.0.0
42
- Requires-Dist: msgpack <2,>=1.0.4
37
+ Requires-Dist: orjson>=3.10.5
38
+ Requires-Dist: pydantic<3,>=2
39
+ Requires-Dist: jmespath>=1.0
40
+ Requires-Dist: datamodel-code-generator>=0.25
41
+ Requires-Dist: Pillow<12,>=10.0.0
42
+ Requires-Dist: msgpack<2,>=1.0.4
43
43
  Requires-Dist: psutil
44
- Requires-Dist: huggingface-hub
45
- Requires-Dist: iterative-telemetry >=0.0.9
44
+ Requires-Dist: huggingface_hub
45
+ Requires-Dist: iterative-telemetry>=0.0.9
46
46
  Requires-Dist: platformdirs
47
- Requires-Dist: dvc-studio-client <1,>=0.21
47
+ Requires-Dist: dvc-studio-client<1,>=0.21
48
48
  Requires-Dist: tabulate
49
- Provides-Extra: dev
50
- Requires-Dist: datachain[docs,tests] ; extra == 'dev'
51
- Requires-Dist: mypy ==1.13.0 ; extra == 'dev'
52
- Requires-Dist: types-python-dateutil ; extra == 'dev'
53
- Requires-Dist: types-pytz ; extra == 'dev'
54
- Requires-Dist: types-PyYAML ; extra == 'dev'
55
- Requires-Dist: types-requests ; extra == 'dev'
56
- Requires-Dist: types-tabulate ; extra == 'dev'
57
49
  Provides-Extra: docs
58
- Requires-Dist: mkdocs >=1.5.2 ; extra == 'docs'
59
- Requires-Dist: mkdocs-gen-files >=0.5.0 ; extra == 'docs'
60
- Requires-Dist: mkdocs-material >=9.3.1 ; extra == 'docs'
61
- Requires-Dist: mkdocs-section-index >=0.3.6 ; extra == 'docs'
62
- Requires-Dist: mkdocstrings-python >=1.6.3 ; extra == 'docs'
63
- Requires-Dist: mkdocs-literate-nav >=0.6.1 ; extra == 'docs'
64
- Provides-Extra: examples
65
- Requires-Dist: datachain[tests] ; extra == 'examples'
66
- Requires-Dist: numpy <2,>=1 ; extra == 'examples'
67
- Requires-Dist: defusedxml ; extra == 'examples'
68
- Requires-Dist: accelerate ; extra == 'examples'
69
- Requires-Dist: unstructured[embed-huggingface,pdf] <0.16.0 ; extra == 'examples'
70
- Requires-Dist: pdfplumber ==0.11.4 ; extra == 'examples'
71
- Requires-Dist: huggingface-hub[hf_transfer] ; extra == 'examples'
72
- Requires-Dist: onnx ==1.16.1 ; extra == 'examples'
73
- Provides-Extra: hf
74
- Requires-Dist: numba >=0.60.0 ; extra == 'hf'
75
- Requires-Dist: datasets[audio,vision] >=2.21.0 ; extra == 'hf'
76
- Provides-Extra: remote
77
- Requires-Dist: lz4 ; extra == 'remote'
78
- Requires-Dist: requests >=2.22.0 ; extra == 'remote'
79
- Provides-Extra: tests
80
- Requires-Dist: datachain[hf,remote,torch,vector] ; extra == 'tests'
81
- Requires-Dist: pytest <9,>=8 ; extra == 'tests'
82
- Requires-Dist: pytest-sugar >=0.9.6 ; extra == 'tests'
83
- Requires-Dist: pytest-cov >=4.1.0 ; extra == 'tests'
84
- Requires-Dist: pytest-mock >=3.12.0 ; extra == 'tests'
85
- Requires-Dist: pytest-servers[all] >=0.5.8 ; extra == 'tests'
86
- Requires-Dist: pytest-benchmark[histogram] ; extra == 'tests'
87
- Requires-Dist: pytest-xdist >=3.3.1 ; extra == 'tests'
88
- Requires-Dist: virtualenv ; extra == 'tests'
89
- Requires-Dist: dulwich ; extra == 'tests'
90
- Requires-Dist: hypothesis ; extra == 'tests'
91
- Requires-Dist: open-clip-torch ; extra == 'tests'
92
- Requires-Dist: aiotools >=1.7.0 ; extra == 'tests'
93
- Requires-Dist: requests-mock ; extra == 'tests'
94
- Requires-Dist: scipy ; extra == 'tests'
50
+ Requires-Dist: mkdocs>=1.5.2; extra == "docs"
51
+ Requires-Dist: mkdocs-gen-files>=0.5.0; extra == "docs"
52
+ Requires-Dist: mkdocs-material>=9.3.1; extra == "docs"
53
+ Requires-Dist: mkdocs-section-index>=0.3.6; extra == "docs"
54
+ Requires-Dist: mkdocstrings-python>=1.6.3; extra == "docs"
55
+ Requires-Dist: mkdocs-literate-nav>=0.6.1; extra == "docs"
95
56
  Provides-Extra: torch
96
- Requires-Dist: torch >=2.1.0 ; extra == 'torch'
97
- Requires-Dist: torchvision ; extra == 'torch'
98
- Requires-Dist: transformers >=4.36.0 ; extra == 'torch'
57
+ Requires-Dist: torch>=2.1.0; extra == "torch"
58
+ Requires-Dist: torchvision; extra == "torch"
59
+ Requires-Dist: transformers>=4.36.0; extra == "torch"
60
+ Provides-Extra: remote
61
+ Requires-Dist: lz4; extra == "remote"
62
+ Requires-Dist: requests>=2.22.0; extra == "remote"
99
63
  Provides-Extra: vector
100
- Requires-Dist: usearch ; extra == 'vector'
64
+ Requires-Dist: usearch; extra == "vector"
65
+ Provides-Extra: hf
66
+ Requires-Dist: numba>=0.60.0; extra == "hf"
67
+ Requires-Dist: datasets[audio,vision]>=2.21.0; extra == "hf"
68
+ Provides-Extra: tests
69
+ Requires-Dist: datachain[hf,remote,torch,vector]; extra == "tests"
70
+ Requires-Dist: pytest<9,>=8; extra == "tests"
71
+ Requires-Dist: pytest-sugar>=0.9.6; extra == "tests"
72
+ Requires-Dist: pytest-cov>=4.1.0; extra == "tests"
73
+ Requires-Dist: pytest-mock>=3.12.0; extra == "tests"
74
+ Requires-Dist: pytest-servers[all]>=0.5.8; extra == "tests"
75
+ Requires-Dist: pytest-benchmark[histogram]; extra == "tests"
76
+ Requires-Dist: pytest-xdist>=3.3.1; extra == "tests"
77
+ Requires-Dist: virtualenv; extra == "tests"
78
+ Requires-Dist: dulwich; extra == "tests"
79
+ Requires-Dist: hypothesis; extra == "tests"
80
+ Requires-Dist: open_clip_torch; extra == "tests"
81
+ Requires-Dist: aiotools>=1.7.0; extra == "tests"
82
+ Requires-Dist: requests-mock; extra == "tests"
83
+ Requires-Dist: scipy; extra == "tests"
84
+ Provides-Extra: dev
85
+ Requires-Dist: datachain[docs,tests]; extra == "dev"
86
+ Requires-Dist: mypy==1.13.0; extra == "dev"
87
+ Requires-Dist: types-python-dateutil; extra == "dev"
88
+ Requires-Dist: types-pytz; extra == "dev"
89
+ Requires-Dist: types-PyYAML; extra == "dev"
90
+ Requires-Dist: types-requests; extra == "dev"
91
+ Requires-Dist: types-tabulate; extra == "dev"
92
+ Provides-Extra: examples
93
+ Requires-Dist: datachain[tests]; extra == "examples"
94
+ Requires-Dist: numpy<2,>=1; extra == "examples"
95
+ Requires-Dist: defusedxml; extra == "examples"
96
+ Requires-Dist: accelerate; extra == "examples"
97
+ Requires-Dist: unstructured[embed-huggingface,pdf]<0.16.0; extra == "examples"
98
+ Requires-Dist: pdfplumber==0.11.4; extra == "examples"
99
+ Requires-Dist: huggingface_hub[hf_transfer]; extra == "examples"
100
+ Requires-Dist: onnx==1.16.1; extra == "examples"
101
+ Requires-Dist: ultralytics==8.3.29; extra == "examples"
101
102
 
102
103
  ================
103
104
  |logo| DataChain