datachain 0.7.0__py3-none-any.whl → 0.7.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datachain might be problematic. Click here for more details.
- datachain/__init__.py +0 -3
- datachain/catalog/catalog.py +8 -6
- datachain/cli.py +1 -1
- datachain/client/fsspec.py +9 -9
- datachain/data_storage/schema.py +2 -2
- datachain/data_storage/sqlite.py +5 -4
- datachain/data_storage/warehouse.py +18 -18
- datachain/func/__init__.py +49 -0
- datachain/{lib/func → func}/aggregate.py +13 -11
- datachain/func/array.py +176 -0
- datachain/func/base.py +23 -0
- datachain/func/conditional.py +81 -0
- datachain/func/func.py +384 -0
- datachain/func/path.py +110 -0
- datachain/func/random.py +23 -0
- datachain/func/string.py +154 -0
- datachain/func/window.py +49 -0
- datachain/lib/arrow.py +24 -12
- datachain/lib/data_model.py +25 -9
- datachain/lib/dataset_info.py +2 -2
- datachain/lib/dc.py +94 -56
- datachain/lib/hf.py +1 -1
- datachain/lib/signal_schema.py +1 -1
- datachain/lib/utils.py +1 -0
- datachain/lib/webdataset_laion.py +5 -5
- datachain/model/__init__.py +6 -0
- datachain/model/bbox.py +102 -0
- datachain/model/pose.py +88 -0
- datachain/model/segment.py +47 -0
- datachain/model/ultralytics/__init__.py +27 -0
- datachain/model/ultralytics/bbox.py +147 -0
- datachain/model/ultralytics/pose.py +113 -0
- datachain/model/ultralytics/segment.py +91 -0
- datachain/nodes_fetcher.py +2 -2
- datachain/query/dataset.py +57 -34
- datachain/sql/__init__.py +0 -2
- datachain/sql/functions/__init__.py +0 -26
- datachain/sql/selectable.py +11 -5
- datachain/sql/sqlite/base.py +11 -2
- datachain/toolkit/split.py +6 -2
- {datachain-0.7.0.dist-info → datachain-0.7.2.dist-info}/METADATA +72 -71
- {datachain-0.7.0.dist-info → datachain-0.7.2.dist-info}/RECORD +46 -35
- {datachain-0.7.0.dist-info → datachain-0.7.2.dist-info}/WHEEL +1 -1
- datachain/lib/func/__init__.py +0 -32
- datachain/lib/func/func.py +0 -152
- datachain/lib/models/__init__.py +0 -5
- datachain/lib/models/bbox.py +0 -45
- datachain/lib/models/pose.py +0 -37
- datachain/lib/models/yolo.py +0 -39
- {datachain-0.7.0.dist-info → datachain-0.7.2.dist-info}/LICENSE +0 -0
- {datachain-0.7.0.dist-info → datachain-0.7.2.dist-info}/entry_points.txt +0 -0
- {datachain-0.7.0.dist-info → datachain-0.7.2.dist-info}/top_level.txt +0 -0
datachain/query/dataset.py
CHANGED
|
@@ -43,9 +43,10 @@ from datachain.data_storage.schema import (
|
|
|
43
43
|
)
|
|
44
44
|
from datachain.dataset import DatasetStatus, RowDict
|
|
45
45
|
from datachain.error import DatasetNotFoundError, QueryScriptCancelError
|
|
46
|
+
from datachain.func.base import Function
|
|
46
47
|
from datachain.lib.udf import UDFAdapter
|
|
47
48
|
from datachain.progress import CombinedDownloadCallback
|
|
48
|
-
from datachain.sql.functions import rand
|
|
49
|
+
from datachain.sql.functions.random import rand
|
|
49
50
|
from datachain.utils import (
|
|
50
51
|
batched,
|
|
51
52
|
determine_processes,
|
|
@@ -65,15 +66,16 @@ if TYPE_CHECKING:
|
|
|
65
66
|
from datachain.catalog import Catalog
|
|
66
67
|
from datachain.data_storage import AbstractWarehouse
|
|
67
68
|
from datachain.dataset import DatasetRecord
|
|
68
|
-
|
|
69
|
-
from .udf import UDFResult
|
|
69
|
+
from datachain.lib.udf import UDFResult
|
|
70
70
|
|
|
71
71
|
P = ParamSpec("P")
|
|
72
72
|
|
|
73
73
|
|
|
74
74
|
INSERT_BATCH_SIZE = 10000
|
|
75
75
|
|
|
76
|
-
PartitionByType = Union[
|
|
76
|
+
PartitionByType = Union[
|
|
77
|
+
Function, ColumnElement, Sequence[Union[Function, ColumnElement]]
|
|
78
|
+
]
|
|
77
79
|
JoinPredicateType = Union[str, ColumnClause, ColumnElement]
|
|
78
80
|
DatasetDependencyType = tuple[str, int]
|
|
79
81
|
|
|
@@ -457,18 +459,15 @@ class UDFStep(Step, ABC):
|
|
|
457
459
|
# Run the UDFDispatcher in another process to avoid needing
|
|
458
460
|
# if __name__ == '__main__': in user scripts
|
|
459
461
|
exec_cmd = get_datachain_executable()
|
|
462
|
+
cmd = [*exec_cmd, "internal-run-udf"]
|
|
460
463
|
envs = dict(os.environ)
|
|
461
464
|
envs.update({"PYTHONPATH": os.getcwd()})
|
|
462
465
|
process_data = filtered_cloudpickle_dumps(udf_info)
|
|
463
|
-
result = subprocess.run( # noqa: S603
|
|
464
|
-
[*exec_cmd, "internal-run-udf"],
|
|
465
|
-
input=process_data,
|
|
466
|
-
check=False,
|
|
467
|
-
env=envs,
|
|
468
|
-
)
|
|
469
|
-
if result.returncode != 0:
|
|
470
|
-
raise RuntimeError("UDF Execution Failed!")
|
|
471
466
|
|
|
467
|
+
with subprocess.Popen(cmd, env=envs, stdin=subprocess.PIPE) as process: # noqa: S603
|
|
468
|
+
process.communicate(process_data)
|
|
469
|
+
if process.poll():
|
|
470
|
+
raise RuntimeError("UDF Execution Failed!")
|
|
472
471
|
else:
|
|
473
472
|
# Otherwise process single-threaded (faster for smaller UDFs)
|
|
474
473
|
warehouse = self.catalog.warehouse
|
|
@@ -520,13 +519,17 @@ class UDFStep(Step, ABC):
|
|
|
520
519
|
else:
|
|
521
520
|
list_partition_by = [self.partition_by]
|
|
522
521
|
|
|
522
|
+
partition_by = [
|
|
523
|
+
p.get_column() if isinstance(p, Function) else p for p in list_partition_by
|
|
524
|
+
]
|
|
525
|
+
|
|
523
526
|
# create table with partitions
|
|
524
527
|
tbl = self.catalog.warehouse.create_udf_table(partition_columns())
|
|
525
528
|
|
|
526
529
|
# fill table with partitions
|
|
527
530
|
cols = [
|
|
528
531
|
query.selected_columns.sys__id,
|
|
529
|
-
f.dense_rank().over(order_by=
|
|
532
|
+
f.dense_rank().over(order_by=partition_by).label(PARTITION_COLUMN_ID),
|
|
530
533
|
]
|
|
531
534
|
self.catalog.warehouse.db.execute(
|
|
532
535
|
tbl.insert().from_select(cols, query.with_only_columns(*cols))
|
|
@@ -683,6 +686,12 @@ class SQLClause(Step, ABC):
|
|
|
683
686
|
|
|
684
687
|
return step_result(q, new_query.selected_columns)
|
|
685
688
|
|
|
689
|
+
def parse_cols(
|
|
690
|
+
self,
|
|
691
|
+
cols: Sequence[Union[Function, ColumnElement]],
|
|
692
|
+
) -> tuple[ColumnElement, ...]:
|
|
693
|
+
return tuple(c.get_column() if isinstance(c, Function) else c for c in cols)
|
|
694
|
+
|
|
686
695
|
@abstractmethod
|
|
687
696
|
def apply_sql_clause(self, query):
|
|
688
697
|
pass
|
|
@@ -690,12 +699,14 @@ class SQLClause(Step, ABC):
|
|
|
690
699
|
|
|
691
700
|
@frozen
|
|
692
701
|
class SQLSelect(SQLClause):
|
|
693
|
-
args: tuple[Union[
|
|
702
|
+
args: tuple[Union[Function, ColumnElement], ...]
|
|
694
703
|
|
|
695
704
|
def apply_sql_clause(self, query) -> Select:
|
|
696
705
|
subquery = query.subquery()
|
|
697
|
-
|
|
698
|
-
|
|
706
|
+
args = [
|
|
707
|
+
subquery.c[str(c)] if isinstance(c, (str, C)) else c
|
|
708
|
+
for c in self.parse_cols(self.args)
|
|
709
|
+
]
|
|
699
710
|
if not args:
|
|
700
711
|
args = subquery.c
|
|
701
712
|
|
|
@@ -704,22 +715,25 @@ class SQLSelect(SQLClause):
|
|
|
704
715
|
|
|
705
716
|
@frozen
|
|
706
717
|
class SQLSelectExcept(SQLClause):
|
|
707
|
-
args: tuple[
|
|
718
|
+
args: tuple[Union[Function, ColumnElement], ...]
|
|
708
719
|
|
|
709
720
|
def apply_sql_clause(self, query: Select) -> Select:
|
|
710
721
|
subquery = query.subquery()
|
|
711
|
-
|
|
712
|
-
args = [c for c in subquery.c if c.name not in names]
|
|
722
|
+
args = [c for c in subquery.c if c.name not in set(self.parse_cols(self.args))]
|
|
713
723
|
return sqlalchemy.select(*args).select_from(subquery)
|
|
714
724
|
|
|
715
725
|
|
|
716
726
|
@frozen
|
|
717
727
|
class SQLMutate(SQLClause):
|
|
718
|
-
args: tuple[ColumnElement, ...]
|
|
728
|
+
args: tuple[Union[Function, ColumnElement], ...]
|
|
719
729
|
|
|
720
730
|
def apply_sql_clause(self, query: Select) -> Select:
|
|
721
731
|
original_subquery = query.subquery()
|
|
722
|
-
|
|
732
|
+
args = [
|
|
733
|
+
original_subquery.c[str(c)] if isinstance(c, (str, C)) else c
|
|
734
|
+
for c in self.parse_cols(self.args)
|
|
735
|
+
]
|
|
736
|
+
to_mutate = {c.name for c in args}
|
|
723
737
|
|
|
724
738
|
prefix = f"mutate{token_hex(8)}_"
|
|
725
739
|
cols = [
|
|
@@ -729,9 +743,7 @@ class SQLMutate(SQLClause):
|
|
|
729
743
|
# this is needed for new column to be used in clauses
|
|
730
744
|
# like ORDER BY, otherwise new column is not recognized
|
|
731
745
|
subquery = (
|
|
732
|
-
sqlalchemy.select(*cols, *
|
|
733
|
-
.select_from(original_subquery)
|
|
734
|
-
.subquery()
|
|
746
|
+
sqlalchemy.select(*cols, *args).select_from(original_subquery).subquery()
|
|
735
747
|
)
|
|
736
748
|
|
|
737
749
|
return sqlalchemy.select(*subquery.c).select_from(subquery)
|
|
@@ -739,21 +751,24 @@ class SQLMutate(SQLClause):
|
|
|
739
751
|
|
|
740
752
|
@frozen
|
|
741
753
|
class SQLFilter(SQLClause):
|
|
742
|
-
expressions: tuple[ColumnElement, ...]
|
|
754
|
+
expressions: tuple[Union[Function, ColumnElement], ...]
|
|
743
755
|
|
|
744
756
|
def __and__(self, other):
|
|
745
|
-
|
|
757
|
+
expressions = self.parse_cols(self.expressions)
|
|
758
|
+
return self.__class__(expressions + other)
|
|
746
759
|
|
|
747
760
|
def apply_sql_clause(self, query: Select) -> Select:
|
|
748
|
-
|
|
761
|
+
expressions = self.parse_cols(self.expressions)
|
|
762
|
+
return query.filter(*expressions)
|
|
749
763
|
|
|
750
764
|
|
|
751
765
|
@frozen
|
|
752
766
|
class SQLOrderBy(SQLClause):
|
|
753
|
-
args: tuple[ColumnElement, ...]
|
|
767
|
+
args: tuple[Union[Function, ColumnElement], ...]
|
|
754
768
|
|
|
755
769
|
def apply_sql_clause(self, query: Select) -> Select:
|
|
756
|
-
|
|
770
|
+
args = self.parse_cols(self.args)
|
|
771
|
+
return query.order_by(*args)
|
|
757
772
|
|
|
758
773
|
|
|
759
774
|
@frozen
|
|
@@ -948,8 +963,8 @@ class SQLJoin(Step):
|
|
|
948
963
|
|
|
949
964
|
@frozen
|
|
950
965
|
class SQLGroupBy(SQLClause):
|
|
951
|
-
cols: Sequence[Union[str, ColumnElement]]
|
|
952
|
-
group_by: Sequence[Union[str, ColumnElement]]
|
|
966
|
+
cols: Sequence[Union[str, Function, ColumnElement]]
|
|
967
|
+
group_by: Sequence[Union[str, Function, ColumnElement]]
|
|
953
968
|
|
|
954
969
|
def apply_sql_clause(self, query) -> Select:
|
|
955
970
|
if not self.cols:
|
|
@@ -959,12 +974,20 @@ class SQLGroupBy(SQLClause):
|
|
|
959
974
|
|
|
960
975
|
subquery = query.subquery()
|
|
961
976
|
|
|
977
|
+
group_by = [
|
|
978
|
+
c.get_column() if isinstance(c, Function) else c for c in self.group_by
|
|
979
|
+
]
|
|
980
|
+
|
|
962
981
|
cols = [
|
|
963
|
-
|
|
964
|
-
|
|
982
|
+
c.get_column()
|
|
983
|
+
if isinstance(c, Function)
|
|
984
|
+
else subquery.c[str(c)]
|
|
985
|
+
if isinstance(c, (str, C))
|
|
986
|
+
else c
|
|
987
|
+
for c in (*group_by, *self.cols)
|
|
965
988
|
]
|
|
966
989
|
|
|
967
|
-
return sqlalchemy.select(*cols).select_from(subquery).group_by(*
|
|
990
|
+
return sqlalchemy.select(*cols).select_from(subquery).group_by(*group_by)
|
|
968
991
|
|
|
969
992
|
|
|
970
993
|
def _validate_columns(
|
datachain/sql/__init__.py
CHANGED
|
@@ -1,13 +1,11 @@
|
|
|
1
1
|
from sqlalchemy.sql.elements import literal
|
|
2
2
|
from sqlalchemy.sql.expression import column
|
|
3
3
|
|
|
4
|
-
from . import functions
|
|
5
4
|
from .default import setup as default_setup
|
|
6
5
|
from .selectable import select, values
|
|
7
6
|
|
|
8
7
|
__all__ = [
|
|
9
8
|
"column",
|
|
10
|
-
"functions",
|
|
11
9
|
"literal",
|
|
12
10
|
"select",
|
|
13
11
|
"values",
|
|
@@ -1,26 +0,0 @@
|
|
|
1
|
-
from sqlalchemy.sql.expression import func
|
|
2
|
-
|
|
3
|
-
from . import array, path, string
|
|
4
|
-
from .aggregate import avg
|
|
5
|
-
from .conditional import greatest, least
|
|
6
|
-
from .random import rand
|
|
7
|
-
|
|
8
|
-
count = func.count
|
|
9
|
-
sum = func.sum
|
|
10
|
-
min = func.min
|
|
11
|
-
max = func.max
|
|
12
|
-
|
|
13
|
-
__all__ = [
|
|
14
|
-
"array",
|
|
15
|
-
"avg",
|
|
16
|
-
"count",
|
|
17
|
-
"func",
|
|
18
|
-
"greatest",
|
|
19
|
-
"least",
|
|
20
|
-
"max",
|
|
21
|
-
"min",
|
|
22
|
-
"path",
|
|
23
|
-
"rand",
|
|
24
|
-
"string",
|
|
25
|
-
"sum",
|
|
26
|
-
]
|
datachain/sql/selectable.py
CHANGED
|
@@ -9,7 +9,9 @@ class Values(selectable.Values):
|
|
|
9
9
|
columns = [expression.column(f"c{i}") for i in range(1, num_columns + 1)]
|
|
10
10
|
else:
|
|
11
11
|
columns = [
|
|
12
|
-
|
|
12
|
+
process_column_expression(c)
|
|
13
|
+
for c in columns
|
|
14
|
+
# expression.column(c) if isinstance(c, str) else c for c in columns
|
|
13
15
|
]
|
|
14
16
|
super().__init__(*columns, **kwargs)
|
|
15
17
|
self._data += tuple(data)
|
|
@@ -19,13 +21,17 @@ def values(data, columns=None, **kwargs) -> Values:
|
|
|
19
21
|
return Values(data, columns=columns, **kwargs)
|
|
20
22
|
|
|
21
23
|
|
|
22
|
-
def
|
|
23
|
-
|
|
24
|
+
def process_column_expression(col):
|
|
25
|
+
if hasattr(col, "get_column"):
|
|
26
|
+
return col.get_column()
|
|
27
|
+
if isinstance(col, str):
|
|
28
|
+
return expression.column(col)
|
|
29
|
+
return col
|
|
24
30
|
|
|
25
31
|
|
|
26
32
|
def select(*columns, **kwargs) -> "expression.Select":
|
|
27
|
-
|
|
28
|
-
return expression.select(*
|
|
33
|
+
columns_processed = [process_column_expression(c) for c in columns]
|
|
34
|
+
return expression.select(*columns_processed, **kwargs)
|
|
29
35
|
|
|
30
36
|
|
|
31
37
|
def base_values_compiler(column_name_func, element, compiler, **kwargs):
|
datachain/sql/sqlite/base.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
import re
|
|
3
3
|
import sqlite3
|
|
4
|
+
import warnings
|
|
4
5
|
from collections.abc import Iterable
|
|
5
6
|
from datetime import MAXYEAR, MINYEAR, datetime, timezone
|
|
6
7
|
from types import MappingProxyType
|
|
@@ -418,14 +419,22 @@ def compile_collect(element, compiler, **kwargs):
|
|
|
418
419
|
return compiler.process(func.json_group_array(*element.clauses.clauses), **kwargs)
|
|
419
420
|
|
|
420
421
|
|
|
421
|
-
def load_usearch_extension(conn) -> bool:
|
|
422
|
+
def load_usearch_extension(conn: sqlite3.Connection) -> bool:
|
|
422
423
|
try:
|
|
423
424
|
# usearch is part of the vector optional dependencies
|
|
424
425
|
# we use the extension's cosine and euclidean distance functions
|
|
425
426
|
from usearch import sqlite_path
|
|
426
427
|
|
|
427
428
|
conn.enable_load_extension(True)
|
|
428
|
-
|
|
429
|
+
|
|
430
|
+
with warnings.catch_warnings():
|
|
431
|
+
# usearch binary is not available for Windows, see: https://github.com/unum-cloud/usearch/issues/427.
|
|
432
|
+
# and, sometimes fail to download the binary in other platforms
|
|
433
|
+
# triggering UserWarning.
|
|
434
|
+
|
|
435
|
+
warnings.filterwarnings("ignore", category=UserWarning, module="usearch")
|
|
436
|
+
conn.load_extension(sqlite_path())
|
|
437
|
+
|
|
429
438
|
conn.enable_load_extension(False)
|
|
430
439
|
return True
|
|
431
440
|
|
datachain/toolkit/split.py
CHANGED
|
@@ -58,10 +58,14 @@ def train_test_split(dc: DataChain, weights: list[float]) -> list[DataChain]:
|
|
|
58
58
|
|
|
59
59
|
weights_normalized = [weight / sum(weights) for weight in weights]
|
|
60
60
|
|
|
61
|
+
resolution = 2**31 - 1 # Maximum positive value for a 32-bit signed integer.
|
|
62
|
+
|
|
61
63
|
return [
|
|
62
64
|
dc.filter(
|
|
63
|
-
C("sys__rand") %
|
|
64
|
-
|
|
65
|
+
C("sys__rand") % resolution
|
|
66
|
+
>= round(sum(weights_normalized[:index]) * resolution),
|
|
67
|
+
C("sys__rand") % resolution
|
|
68
|
+
< round(sum(weights_normalized[: index + 1]) * resolution),
|
|
65
69
|
)
|
|
66
70
|
for index, _ in enumerate(weights_normalized)
|
|
67
71
|
]
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: datachain
|
|
3
|
-
Version: 0.7.
|
|
3
|
+
Version: 0.7.2
|
|
4
4
|
Summary: Wrangle unstructured AI data at scale
|
|
5
5
|
Author-email: Dmitry Petrov <support@dvc.org>
|
|
6
6
|
License: Apache-2.0
|
|
@@ -19,85 +19,86 @@ License-File: LICENSE
|
|
|
19
19
|
Requires-Dist: pyyaml
|
|
20
20
|
Requires-Dist: tomlkit
|
|
21
21
|
Requires-Dist: tqdm
|
|
22
|
-
Requires-Dist: numpy
|
|
23
|
-
Requires-Dist: pandas
|
|
22
|
+
Requires-Dist: numpy<3,>=1
|
|
23
|
+
Requires-Dist: pandas>=2.0.0
|
|
24
24
|
Requires-Dist: pyarrow
|
|
25
25
|
Requires-Dist: typing-extensions
|
|
26
|
-
Requires-Dist: python-dateutil
|
|
27
|
-
Requires-Dist: attrs
|
|
28
|
-
Requires-Dist: s3fs
|
|
29
|
-
Requires-Dist: gcsfs
|
|
30
|
-
Requires-Dist: adlfs
|
|
31
|
-
Requires-Dist: dvc-data
|
|
32
|
-
Requires-Dist: dvc-objects
|
|
33
|
-
Requires-Dist: shtab
|
|
34
|
-
Requires-Dist: sqlalchemy
|
|
35
|
-
Requires-Dist: multiprocess
|
|
26
|
+
Requires-Dist: python-dateutil>=2
|
|
27
|
+
Requires-Dist: attrs>=21.3.0
|
|
28
|
+
Requires-Dist: s3fs>=2024.2.0
|
|
29
|
+
Requires-Dist: gcsfs>=2024.2.0
|
|
30
|
+
Requires-Dist: adlfs>=2024.2.0
|
|
31
|
+
Requires-Dist: dvc-data<4,>=3.10
|
|
32
|
+
Requires-Dist: dvc-objects<6,>=4
|
|
33
|
+
Requires-Dist: shtab<2,>=1.3.4
|
|
34
|
+
Requires-Dist: sqlalchemy>=2
|
|
35
|
+
Requires-Dist: multiprocess==0.70.16
|
|
36
36
|
Requires-Dist: cloudpickle
|
|
37
|
-
Requires-Dist: orjson
|
|
38
|
-
Requires-Dist: pydantic
|
|
39
|
-
Requires-Dist: jmespath
|
|
40
|
-
Requires-Dist: datamodel-code-generator
|
|
41
|
-
Requires-Dist: Pillow
|
|
42
|
-
Requires-Dist: msgpack
|
|
37
|
+
Requires-Dist: orjson>=3.10.5
|
|
38
|
+
Requires-Dist: pydantic<3,>=2
|
|
39
|
+
Requires-Dist: jmespath>=1.0
|
|
40
|
+
Requires-Dist: datamodel-code-generator>=0.25
|
|
41
|
+
Requires-Dist: Pillow<12,>=10.0.0
|
|
42
|
+
Requires-Dist: msgpack<2,>=1.0.4
|
|
43
43
|
Requires-Dist: psutil
|
|
44
|
-
Requires-Dist:
|
|
45
|
-
Requires-Dist: iterative-telemetry
|
|
44
|
+
Requires-Dist: huggingface_hub
|
|
45
|
+
Requires-Dist: iterative-telemetry>=0.0.9
|
|
46
46
|
Requires-Dist: platformdirs
|
|
47
|
-
Requires-Dist: dvc-studio-client
|
|
47
|
+
Requires-Dist: dvc-studio-client<1,>=0.21
|
|
48
48
|
Requires-Dist: tabulate
|
|
49
|
-
Provides-Extra: dev
|
|
50
|
-
Requires-Dist: datachain[docs,tests] ; extra == 'dev'
|
|
51
|
-
Requires-Dist: mypy ==1.13.0 ; extra == 'dev'
|
|
52
|
-
Requires-Dist: types-python-dateutil ; extra == 'dev'
|
|
53
|
-
Requires-Dist: types-pytz ; extra == 'dev'
|
|
54
|
-
Requires-Dist: types-PyYAML ; extra == 'dev'
|
|
55
|
-
Requires-Dist: types-requests ; extra == 'dev'
|
|
56
|
-
Requires-Dist: types-tabulate ; extra == 'dev'
|
|
57
49
|
Provides-Extra: docs
|
|
58
|
-
Requires-Dist: mkdocs
|
|
59
|
-
Requires-Dist: mkdocs-gen-files
|
|
60
|
-
Requires-Dist: mkdocs-material
|
|
61
|
-
Requires-Dist: mkdocs-section-index
|
|
62
|
-
Requires-Dist: mkdocstrings-python
|
|
63
|
-
Requires-Dist: mkdocs-literate-nav
|
|
64
|
-
Provides-Extra: examples
|
|
65
|
-
Requires-Dist: datachain[tests] ; extra == 'examples'
|
|
66
|
-
Requires-Dist: numpy <2,>=1 ; extra == 'examples'
|
|
67
|
-
Requires-Dist: defusedxml ; extra == 'examples'
|
|
68
|
-
Requires-Dist: accelerate ; extra == 'examples'
|
|
69
|
-
Requires-Dist: unstructured[embed-huggingface,pdf] <0.16.0 ; extra == 'examples'
|
|
70
|
-
Requires-Dist: pdfplumber ==0.11.4 ; extra == 'examples'
|
|
71
|
-
Requires-Dist: huggingface-hub[hf_transfer] ; extra == 'examples'
|
|
72
|
-
Requires-Dist: onnx ==1.16.1 ; extra == 'examples'
|
|
73
|
-
Provides-Extra: hf
|
|
74
|
-
Requires-Dist: numba >=0.60.0 ; extra == 'hf'
|
|
75
|
-
Requires-Dist: datasets[audio,vision] >=2.21.0 ; extra == 'hf'
|
|
76
|
-
Provides-Extra: remote
|
|
77
|
-
Requires-Dist: lz4 ; extra == 'remote'
|
|
78
|
-
Requires-Dist: requests >=2.22.0 ; extra == 'remote'
|
|
79
|
-
Provides-Extra: tests
|
|
80
|
-
Requires-Dist: datachain[hf,remote,torch,vector] ; extra == 'tests'
|
|
81
|
-
Requires-Dist: pytest <9,>=8 ; extra == 'tests'
|
|
82
|
-
Requires-Dist: pytest-sugar >=0.9.6 ; extra == 'tests'
|
|
83
|
-
Requires-Dist: pytest-cov >=4.1.0 ; extra == 'tests'
|
|
84
|
-
Requires-Dist: pytest-mock >=3.12.0 ; extra == 'tests'
|
|
85
|
-
Requires-Dist: pytest-servers[all] >=0.5.8 ; extra == 'tests'
|
|
86
|
-
Requires-Dist: pytest-benchmark[histogram] ; extra == 'tests'
|
|
87
|
-
Requires-Dist: pytest-xdist >=3.3.1 ; extra == 'tests'
|
|
88
|
-
Requires-Dist: virtualenv ; extra == 'tests'
|
|
89
|
-
Requires-Dist: dulwich ; extra == 'tests'
|
|
90
|
-
Requires-Dist: hypothesis ; extra == 'tests'
|
|
91
|
-
Requires-Dist: open-clip-torch ; extra == 'tests'
|
|
92
|
-
Requires-Dist: aiotools >=1.7.0 ; extra == 'tests'
|
|
93
|
-
Requires-Dist: requests-mock ; extra == 'tests'
|
|
94
|
-
Requires-Dist: scipy ; extra == 'tests'
|
|
50
|
+
Requires-Dist: mkdocs>=1.5.2; extra == "docs"
|
|
51
|
+
Requires-Dist: mkdocs-gen-files>=0.5.0; extra == "docs"
|
|
52
|
+
Requires-Dist: mkdocs-material>=9.3.1; extra == "docs"
|
|
53
|
+
Requires-Dist: mkdocs-section-index>=0.3.6; extra == "docs"
|
|
54
|
+
Requires-Dist: mkdocstrings-python>=1.6.3; extra == "docs"
|
|
55
|
+
Requires-Dist: mkdocs-literate-nav>=0.6.1; extra == "docs"
|
|
95
56
|
Provides-Extra: torch
|
|
96
|
-
Requires-Dist: torch
|
|
97
|
-
Requires-Dist: torchvision
|
|
98
|
-
Requires-Dist: transformers
|
|
57
|
+
Requires-Dist: torch>=2.1.0; extra == "torch"
|
|
58
|
+
Requires-Dist: torchvision; extra == "torch"
|
|
59
|
+
Requires-Dist: transformers>=4.36.0; extra == "torch"
|
|
60
|
+
Provides-Extra: remote
|
|
61
|
+
Requires-Dist: lz4; extra == "remote"
|
|
62
|
+
Requires-Dist: requests>=2.22.0; extra == "remote"
|
|
99
63
|
Provides-Extra: vector
|
|
100
|
-
Requires-Dist: usearch
|
|
64
|
+
Requires-Dist: usearch; extra == "vector"
|
|
65
|
+
Provides-Extra: hf
|
|
66
|
+
Requires-Dist: numba>=0.60.0; extra == "hf"
|
|
67
|
+
Requires-Dist: datasets[audio,vision]>=2.21.0; extra == "hf"
|
|
68
|
+
Provides-Extra: tests
|
|
69
|
+
Requires-Dist: datachain[hf,remote,torch,vector]; extra == "tests"
|
|
70
|
+
Requires-Dist: pytest<9,>=8; extra == "tests"
|
|
71
|
+
Requires-Dist: pytest-sugar>=0.9.6; extra == "tests"
|
|
72
|
+
Requires-Dist: pytest-cov>=4.1.0; extra == "tests"
|
|
73
|
+
Requires-Dist: pytest-mock>=3.12.0; extra == "tests"
|
|
74
|
+
Requires-Dist: pytest-servers[all]>=0.5.8; extra == "tests"
|
|
75
|
+
Requires-Dist: pytest-benchmark[histogram]; extra == "tests"
|
|
76
|
+
Requires-Dist: pytest-xdist>=3.3.1; extra == "tests"
|
|
77
|
+
Requires-Dist: virtualenv; extra == "tests"
|
|
78
|
+
Requires-Dist: dulwich; extra == "tests"
|
|
79
|
+
Requires-Dist: hypothesis; extra == "tests"
|
|
80
|
+
Requires-Dist: open_clip_torch; extra == "tests"
|
|
81
|
+
Requires-Dist: aiotools>=1.7.0; extra == "tests"
|
|
82
|
+
Requires-Dist: requests-mock; extra == "tests"
|
|
83
|
+
Requires-Dist: scipy; extra == "tests"
|
|
84
|
+
Provides-Extra: dev
|
|
85
|
+
Requires-Dist: datachain[docs,tests]; extra == "dev"
|
|
86
|
+
Requires-Dist: mypy==1.13.0; extra == "dev"
|
|
87
|
+
Requires-Dist: types-python-dateutil; extra == "dev"
|
|
88
|
+
Requires-Dist: types-pytz; extra == "dev"
|
|
89
|
+
Requires-Dist: types-PyYAML; extra == "dev"
|
|
90
|
+
Requires-Dist: types-requests; extra == "dev"
|
|
91
|
+
Requires-Dist: types-tabulate; extra == "dev"
|
|
92
|
+
Provides-Extra: examples
|
|
93
|
+
Requires-Dist: datachain[tests]; extra == "examples"
|
|
94
|
+
Requires-Dist: numpy<2,>=1; extra == "examples"
|
|
95
|
+
Requires-Dist: defusedxml; extra == "examples"
|
|
96
|
+
Requires-Dist: accelerate; extra == "examples"
|
|
97
|
+
Requires-Dist: unstructured[embed-huggingface,pdf]<0.16.0; extra == "examples"
|
|
98
|
+
Requires-Dist: pdfplumber==0.11.4; extra == "examples"
|
|
99
|
+
Requires-Dist: huggingface_hub[hf_transfer]; extra == "examples"
|
|
100
|
+
Requires-Dist: onnx==1.16.1; extra == "examples"
|
|
101
|
+
Requires-Dist: ultralytics==8.3.29; extra == "examples"
|
|
101
102
|
|
|
102
103
|
================
|
|
103
104
|
|logo| DataChain
|