datachain 0.7.1__py3-none-any.whl → 0.7.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datachain might be problematic. Click here for more details.
- datachain/__init__.py +0 -2
- datachain/catalog/catalog.py +12 -9
- datachain/cli.py +109 -9
- datachain/client/fsspec.py +9 -9
- datachain/data_storage/metastore.py +63 -11
- datachain/data_storage/schema.py +2 -2
- datachain/data_storage/sqlite.py +5 -4
- datachain/data_storage/warehouse.py +18 -18
- datachain/dataset.py +142 -14
- datachain/func/__init__.py +49 -0
- datachain/{lib/func → func}/aggregate.py +13 -11
- datachain/func/array.py +176 -0
- datachain/func/base.py +23 -0
- datachain/func/conditional.py +81 -0
- datachain/func/func.py +384 -0
- datachain/func/path.py +110 -0
- datachain/func/random.py +23 -0
- datachain/func/string.py +154 -0
- datachain/func/window.py +49 -0
- datachain/lib/arrow.py +24 -12
- datachain/lib/data_model.py +25 -9
- datachain/lib/dataset_info.py +9 -5
- datachain/lib/dc.py +94 -56
- datachain/lib/hf.py +1 -1
- datachain/lib/signal_schema.py +1 -1
- datachain/lib/utils.py +1 -0
- datachain/lib/webdataset_laion.py +5 -5
- datachain/model/bbox.py +2 -2
- datachain/model/pose.py +5 -5
- datachain/model/segment.py +2 -2
- datachain/nodes_fetcher.py +2 -2
- datachain/query/dataset.py +57 -34
- datachain/remote/studio.py +40 -8
- datachain/sql/__init__.py +0 -2
- datachain/sql/functions/__init__.py +0 -26
- datachain/sql/selectable.py +11 -5
- datachain/sql/sqlite/base.py +11 -2
- datachain/studio.py +29 -0
- {datachain-0.7.1.dist-info → datachain-0.7.3.dist-info}/METADATA +2 -2
- {datachain-0.7.1.dist-info → datachain-0.7.3.dist-info}/RECORD +44 -37
- datachain/lib/func/__init__.py +0 -32
- datachain/lib/func/func.py +0 -152
- {datachain-0.7.1.dist-info → datachain-0.7.3.dist-info}/LICENSE +0 -0
- {datachain-0.7.1.dist-info → datachain-0.7.3.dist-info}/WHEEL +0 -0
- {datachain-0.7.1.dist-info → datachain-0.7.3.dist-info}/entry_points.txt +0 -0
- {datachain-0.7.1.dist-info → datachain-0.7.3.dist-info}/top_level.txt +0 -0
datachain/query/dataset.py
CHANGED
|
@@ -43,9 +43,10 @@ from datachain.data_storage.schema import (
|
|
|
43
43
|
)
|
|
44
44
|
from datachain.dataset import DatasetStatus, RowDict
|
|
45
45
|
from datachain.error import DatasetNotFoundError, QueryScriptCancelError
|
|
46
|
+
from datachain.func.base import Function
|
|
46
47
|
from datachain.lib.udf import UDFAdapter
|
|
47
48
|
from datachain.progress import CombinedDownloadCallback
|
|
48
|
-
from datachain.sql.functions import rand
|
|
49
|
+
from datachain.sql.functions.random import rand
|
|
49
50
|
from datachain.utils import (
|
|
50
51
|
batched,
|
|
51
52
|
determine_processes,
|
|
@@ -65,15 +66,16 @@ if TYPE_CHECKING:
|
|
|
65
66
|
from datachain.catalog import Catalog
|
|
66
67
|
from datachain.data_storage import AbstractWarehouse
|
|
67
68
|
from datachain.dataset import DatasetRecord
|
|
68
|
-
|
|
69
|
-
from .udf import UDFResult
|
|
69
|
+
from datachain.lib.udf import UDFResult
|
|
70
70
|
|
|
71
71
|
P = ParamSpec("P")
|
|
72
72
|
|
|
73
73
|
|
|
74
74
|
INSERT_BATCH_SIZE = 10000
|
|
75
75
|
|
|
76
|
-
PartitionByType = Union[
|
|
76
|
+
PartitionByType = Union[
|
|
77
|
+
Function, ColumnElement, Sequence[Union[Function, ColumnElement]]
|
|
78
|
+
]
|
|
77
79
|
JoinPredicateType = Union[str, ColumnClause, ColumnElement]
|
|
78
80
|
DatasetDependencyType = tuple[str, int]
|
|
79
81
|
|
|
@@ -457,18 +459,15 @@ class UDFStep(Step, ABC):
|
|
|
457
459
|
# Run the UDFDispatcher in another process to avoid needing
|
|
458
460
|
# if __name__ == '__main__': in user scripts
|
|
459
461
|
exec_cmd = get_datachain_executable()
|
|
462
|
+
cmd = [*exec_cmd, "internal-run-udf"]
|
|
460
463
|
envs = dict(os.environ)
|
|
461
464
|
envs.update({"PYTHONPATH": os.getcwd()})
|
|
462
465
|
process_data = filtered_cloudpickle_dumps(udf_info)
|
|
463
|
-
result = subprocess.run( # noqa: S603
|
|
464
|
-
[*exec_cmd, "internal-run-udf"],
|
|
465
|
-
input=process_data,
|
|
466
|
-
check=False,
|
|
467
|
-
env=envs,
|
|
468
|
-
)
|
|
469
|
-
if result.returncode != 0:
|
|
470
|
-
raise RuntimeError("UDF Execution Failed!")
|
|
471
466
|
|
|
467
|
+
with subprocess.Popen(cmd, env=envs, stdin=subprocess.PIPE) as process: # noqa: S603
|
|
468
|
+
process.communicate(process_data)
|
|
469
|
+
if process.poll():
|
|
470
|
+
raise RuntimeError("UDF Execution Failed!")
|
|
472
471
|
else:
|
|
473
472
|
# Otherwise process single-threaded (faster for smaller UDFs)
|
|
474
473
|
warehouse = self.catalog.warehouse
|
|
@@ -520,13 +519,17 @@ class UDFStep(Step, ABC):
|
|
|
520
519
|
else:
|
|
521
520
|
list_partition_by = [self.partition_by]
|
|
522
521
|
|
|
522
|
+
partition_by = [
|
|
523
|
+
p.get_column() if isinstance(p, Function) else p for p in list_partition_by
|
|
524
|
+
]
|
|
525
|
+
|
|
523
526
|
# create table with partitions
|
|
524
527
|
tbl = self.catalog.warehouse.create_udf_table(partition_columns())
|
|
525
528
|
|
|
526
529
|
# fill table with partitions
|
|
527
530
|
cols = [
|
|
528
531
|
query.selected_columns.sys__id,
|
|
529
|
-
f.dense_rank().over(order_by=
|
|
532
|
+
f.dense_rank().over(order_by=partition_by).label(PARTITION_COLUMN_ID),
|
|
530
533
|
]
|
|
531
534
|
self.catalog.warehouse.db.execute(
|
|
532
535
|
tbl.insert().from_select(cols, query.with_only_columns(*cols))
|
|
@@ -683,6 +686,12 @@ class SQLClause(Step, ABC):
|
|
|
683
686
|
|
|
684
687
|
return step_result(q, new_query.selected_columns)
|
|
685
688
|
|
|
689
|
+
def parse_cols(
|
|
690
|
+
self,
|
|
691
|
+
cols: Sequence[Union[Function, ColumnElement]],
|
|
692
|
+
) -> tuple[ColumnElement, ...]:
|
|
693
|
+
return tuple(c.get_column() if isinstance(c, Function) else c for c in cols)
|
|
694
|
+
|
|
686
695
|
@abstractmethod
|
|
687
696
|
def apply_sql_clause(self, query):
|
|
688
697
|
pass
|
|
@@ -690,12 +699,14 @@ class SQLClause(Step, ABC):
|
|
|
690
699
|
|
|
691
700
|
@frozen
|
|
692
701
|
class SQLSelect(SQLClause):
|
|
693
|
-
args: tuple[Union[
|
|
702
|
+
args: tuple[Union[Function, ColumnElement], ...]
|
|
694
703
|
|
|
695
704
|
def apply_sql_clause(self, query) -> Select:
|
|
696
705
|
subquery = query.subquery()
|
|
697
|
-
|
|
698
|
-
|
|
706
|
+
args = [
|
|
707
|
+
subquery.c[str(c)] if isinstance(c, (str, C)) else c
|
|
708
|
+
for c in self.parse_cols(self.args)
|
|
709
|
+
]
|
|
699
710
|
if not args:
|
|
700
711
|
args = subquery.c
|
|
701
712
|
|
|
@@ -704,22 +715,25 @@ class SQLSelect(SQLClause):
|
|
|
704
715
|
|
|
705
716
|
@frozen
|
|
706
717
|
class SQLSelectExcept(SQLClause):
|
|
707
|
-
args: tuple[
|
|
718
|
+
args: tuple[Union[Function, ColumnElement], ...]
|
|
708
719
|
|
|
709
720
|
def apply_sql_clause(self, query: Select) -> Select:
|
|
710
721
|
subquery = query.subquery()
|
|
711
|
-
|
|
712
|
-
args = [c for c in subquery.c if c.name not in names]
|
|
722
|
+
args = [c for c in subquery.c if c.name not in set(self.parse_cols(self.args))]
|
|
713
723
|
return sqlalchemy.select(*args).select_from(subquery)
|
|
714
724
|
|
|
715
725
|
|
|
716
726
|
@frozen
|
|
717
727
|
class SQLMutate(SQLClause):
|
|
718
|
-
args: tuple[ColumnElement, ...]
|
|
728
|
+
args: tuple[Union[Function, ColumnElement], ...]
|
|
719
729
|
|
|
720
730
|
def apply_sql_clause(self, query: Select) -> Select:
|
|
721
731
|
original_subquery = query.subquery()
|
|
722
|
-
|
|
732
|
+
args = [
|
|
733
|
+
original_subquery.c[str(c)] if isinstance(c, (str, C)) else c
|
|
734
|
+
for c in self.parse_cols(self.args)
|
|
735
|
+
]
|
|
736
|
+
to_mutate = {c.name for c in args}
|
|
723
737
|
|
|
724
738
|
prefix = f"mutate{token_hex(8)}_"
|
|
725
739
|
cols = [
|
|
@@ -729,9 +743,7 @@ class SQLMutate(SQLClause):
|
|
|
729
743
|
# this is needed for new column to be used in clauses
|
|
730
744
|
# like ORDER BY, otherwise new column is not recognized
|
|
731
745
|
subquery = (
|
|
732
|
-
sqlalchemy.select(*cols, *
|
|
733
|
-
.select_from(original_subquery)
|
|
734
|
-
.subquery()
|
|
746
|
+
sqlalchemy.select(*cols, *args).select_from(original_subquery).subquery()
|
|
735
747
|
)
|
|
736
748
|
|
|
737
749
|
return sqlalchemy.select(*subquery.c).select_from(subquery)
|
|
@@ -739,21 +751,24 @@ class SQLMutate(SQLClause):
|
|
|
739
751
|
|
|
740
752
|
@frozen
|
|
741
753
|
class SQLFilter(SQLClause):
|
|
742
|
-
expressions: tuple[ColumnElement, ...]
|
|
754
|
+
expressions: tuple[Union[Function, ColumnElement], ...]
|
|
743
755
|
|
|
744
756
|
def __and__(self, other):
|
|
745
|
-
|
|
757
|
+
expressions = self.parse_cols(self.expressions)
|
|
758
|
+
return self.__class__(expressions + other)
|
|
746
759
|
|
|
747
760
|
def apply_sql_clause(self, query: Select) -> Select:
|
|
748
|
-
|
|
761
|
+
expressions = self.parse_cols(self.expressions)
|
|
762
|
+
return query.filter(*expressions)
|
|
749
763
|
|
|
750
764
|
|
|
751
765
|
@frozen
|
|
752
766
|
class SQLOrderBy(SQLClause):
|
|
753
|
-
args: tuple[ColumnElement, ...]
|
|
767
|
+
args: tuple[Union[Function, ColumnElement], ...]
|
|
754
768
|
|
|
755
769
|
def apply_sql_clause(self, query: Select) -> Select:
|
|
756
|
-
|
|
770
|
+
args = self.parse_cols(self.args)
|
|
771
|
+
return query.order_by(*args)
|
|
757
772
|
|
|
758
773
|
|
|
759
774
|
@frozen
|
|
@@ -948,8 +963,8 @@ class SQLJoin(Step):
|
|
|
948
963
|
|
|
949
964
|
@frozen
|
|
950
965
|
class SQLGroupBy(SQLClause):
|
|
951
|
-
cols: Sequence[Union[str, ColumnElement]]
|
|
952
|
-
group_by: Sequence[Union[str, ColumnElement]]
|
|
966
|
+
cols: Sequence[Union[str, Function, ColumnElement]]
|
|
967
|
+
group_by: Sequence[Union[str, Function, ColumnElement]]
|
|
953
968
|
|
|
954
969
|
def apply_sql_clause(self, query) -> Select:
|
|
955
970
|
if not self.cols:
|
|
@@ -959,12 +974,20 @@ class SQLGroupBy(SQLClause):
|
|
|
959
974
|
|
|
960
975
|
subquery = query.subquery()
|
|
961
976
|
|
|
977
|
+
group_by = [
|
|
978
|
+
c.get_column() if isinstance(c, Function) else c for c in self.group_by
|
|
979
|
+
]
|
|
980
|
+
|
|
962
981
|
cols = [
|
|
963
|
-
|
|
964
|
-
|
|
982
|
+
c.get_column()
|
|
983
|
+
if isinstance(c, Function)
|
|
984
|
+
else subquery.c[str(c)]
|
|
985
|
+
if isinstance(c, (str, C))
|
|
986
|
+
else c
|
|
987
|
+
for c in (*group_by, *self.cols)
|
|
965
988
|
]
|
|
966
989
|
|
|
967
|
-
return sqlalchemy.select(*cols).select_from(subquery).group_by(*
|
|
990
|
+
return sqlalchemy.select(*cols).select_from(subquery).group_by(*group_by)
|
|
968
991
|
|
|
969
992
|
|
|
970
993
|
def _validate_columns(
|
datachain/remote/studio.py
CHANGED
|
@@ -178,17 +178,9 @@ class StudioClient:
|
|
|
178
178
|
data = {}
|
|
179
179
|
|
|
180
180
|
if not ok:
|
|
181
|
-
logger.error(
|
|
182
|
-
"Got bad response from Studio, content is %s",
|
|
183
|
-
response.content.decode("utf-8"),
|
|
184
|
-
)
|
|
185
181
|
if response.status_code == 403:
|
|
186
182
|
message = f"Not authorized for the team {self.team}"
|
|
187
183
|
else:
|
|
188
|
-
logger.error(
|
|
189
|
-
"Got bad response from Studio, content is %s",
|
|
190
|
-
response.content.decode("utf-8"),
|
|
191
|
-
)
|
|
192
184
|
message = data.get("message", "")
|
|
193
185
|
else:
|
|
194
186
|
message = ""
|
|
@@ -230,6 +222,46 @@ class StudioClient:
|
|
|
230
222
|
def ls_datasets(self) -> Response[LsData]:
|
|
231
223
|
return self._send_request("datachain/ls-datasets", {})
|
|
232
224
|
|
|
225
|
+
def edit_dataset(
|
|
226
|
+
self,
|
|
227
|
+
name: str,
|
|
228
|
+
new_name: Optional[str] = None,
|
|
229
|
+
description: Optional[str] = None,
|
|
230
|
+
labels: Optional[list[str]] = None,
|
|
231
|
+
) -> Response[DatasetInfoData]:
|
|
232
|
+
body = {
|
|
233
|
+
"dataset_name": name,
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
if new_name is not None:
|
|
237
|
+
body["new_name"] = new_name
|
|
238
|
+
|
|
239
|
+
if description is not None:
|
|
240
|
+
body["description"] = description
|
|
241
|
+
|
|
242
|
+
if labels is not None:
|
|
243
|
+
body["labels"] = labels # type: ignore[assignment]
|
|
244
|
+
|
|
245
|
+
return self._send_request(
|
|
246
|
+
"datachain/edit-dataset",
|
|
247
|
+
body,
|
|
248
|
+
)
|
|
249
|
+
|
|
250
|
+
def rm_dataset(
|
|
251
|
+
self,
|
|
252
|
+
name: str,
|
|
253
|
+
version: Optional[int] = None,
|
|
254
|
+
force: Optional[bool] = False,
|
|
255
|
+
) -> Response[DatasetInfoData]:
|
|
256
|
+
return self._send_request(
|
|
257
|
+
"datachain/rm-dataset",
|
|
258
|
+
{
|
|
259
|
+
"dataset_name": name,
|
|
260
|
+
"version": version,
|
|
261
|
+
"force": force,
|
|
262
|
+
},
|
|
263
|
+
)
|
|
264
|
+
|
|
233
265
|
def dataset_info(self, name: str) -> Response[DatasetInfoData]:
|
|
234
266
|
def _parse_dataset_info(dataset_info):
|
|
235
267
|
_parse_dates(dataset_info, ["created_at", "finished_at"])
|
datachain/sql/__init__.py
CHANGED
|
@@ -1,13 +1,11 @@
|
|
|
1
1
|
from sqlalchemy.sql.elements import literal
|
|
2
2
|
from sqlalchemy.sql.expression import column
|
|
3
3
|
|
|
4
|
-
from . import functions
|
|
5
4
|
from .default import setup as default_setup
|
|
6
5
|
from .selectable import select, values
|
|
7
6
|
|
|
8
7
|
__all__ = [
|
|
9
8
|
"column",
|
|
10
|
-
"functions",
|
|
11
9
|
"literal",
|
|
12
10
|
"select",
|
|
13
11
|
"values",
|
|
@@ -1,26 +0,0 @@
|
|
|
1
|
-
from sqlalchemy.sql.expression import func
|
|
2
|
-
|
|
3
|
-
from . import array, path, string
|
|
4
|
-
from .aggregate import avg
|
|
5
|
-
from .conditional import greatest, least
|
|
6
|
-
from .random import rand
|
|
7
|
-
|
|
8
|
-
count = func.count
|
|
9
|
-
sum = func.sum
|
|
10
|
-
min = func.min
|
|
11
|
-
max = func.max
|
|
12
|
-
|
|
13
|
-
__all__ = [
|
|
14
|
-
"array",
|
|
15
|
-
"avg",
|
|
16
|
-
"count",
|
|
17
|
-
"func",
|
|
18
|
-
"greatest",
|
|
19
|
-
"least",
|
|
20
|
-
"max",
|
|
21
|
-
"min",
|
|
22
|
-
"path",
|
|
23
|
-
"rand",
|
|
24
|
-
"string",
|
|
25
|
-
"sum",
|
|
26
|
-
]
|
datachain/sql/selectable.py
CHANGED
|
@@ -9,7 +9,9 @@ class Values(selectable.Values):
|
|
|
9
9
|
columns = [expression.column(f"c{i}") for i in range(1, num_columns + 1)]
|
|
10
10
|
else:
|
|
11
11
|
columns = [
|
|
12
|
-
|
|
12
|
+
process_column_expression(c)
|
|
13
|
+
for c in columns
|
|
14
|
+
# expression.column(c) if isinstance(c, str) else c for c in columns
|
|
13
15
|
]
|
|
14
16
|
super().__init__(*columns, **kwargs)
|
|
15
17
|
self._data += tuple(data)
|
|
@@ -19,13 +21,17 @@ def values(data, columns=None, **kwargs) -> Values:
|
|
|
19
21
|
return Values(data, columns=columns, **kwargs)
|
|
20
22
|
|
|
21
23
|
|
|
22
|
-
def
|
|
23
|
-
|
|
24
|
+
def process_column_expression(col):
|
|
25
|
+
if hasattr(col, "get_column"):
|
|
26
|
+
return col.get_column()
|
|
27
|
+
if isinstance(col, str):
|
|
28
|
+
return expression.column(col)
|
|
29
|
+
return col
|
|
24
30
|
|
|
25
31
|
|
|
26
32
|
def select(*columns, **kwargs) -> "expression.Select":
|
|
27
|
-
|
|
28
|
-
return expression.select(*
|
|
33
|
+
columns_processed = [process_column_expression(c) for c in columns]
|
|
34
|
+
return expression.select(*columns_processed, **kwargs)
|
|
29
35
|
|
|
30
36
|
|
|
31
37
|
def base_values_compiler(column_name_func, element, compiler, **kwargs):
|
datachain/sql/sqlite/base.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
import re
|
|
3
3
|
import sqlite3
|
|
4
|
+
import warnings
|
|
4
5
|
from collections.abc import Iterable
|
|
5
6
|
from datetime import MAXYEAR, MINYEAR, datetime, timezone
|
|
6
7
|
from types import MappingProxyType
|
|
@@ -418,14 +419,22 @@ def compile_collect(element, compiler, **kwargs):
|
|
|
418
419
|
return compiler.process(func.json_group_array(*element.clauses.clauses), **kwargs)
|
|
419
420
|
|
|
420
421
|
|
|
421
|
-
def load_usearch_extension(conn) -> bool:
|
|
422
|
+
def load_usearch_extension(conn: sqlite3.Connection) -> bool:
|
|
422
423
|
try:
|
|
423
424
|
# usearch is part of the vector optional dependencies
|
|
424
425
|
# we use the extension's cosine and euclidean distance functions
|
|
425
426
|
from usearch import sqlite_path
|
|
426
427
|
|
|
427
428
|
conn.enable_load_extension(True)
|
|
428
|
-
|
|
429
|
+
|
|
430
|
+
with warnings.catch_warnings():
|
|
431
|
+
# usearch binary is not available for Windows, see: https://github.com/unum-cloud/usearch/issues/427.
|
|
432
|
+
# and, sometimes fail to download the binary in other platforms
|
|
433
|
+
# triggering UserWarning.
|
|
434
|
+
|
|
435
|
+
warnings.filterwarnings("ignore", category=UserWarning, module="usearch")
|
|
436
|
+
conn.load_extension(sqlite_path())
|
|
437
|
+
|
|
429
438
|
conn.enable_load_extension(False)
|
|
430
439
|
return True
|
|
431
440
|
|
datachain/studio.py
CHANGED
|
@@ -130,6 +130,35 @@ def list_datasets(team: Optional[str] = None):
|
|
|
130
130
|
yield (name, version)
|
|
131
131
|
|
|
132
132
|
|
|
133
|
+
def edit_studio_dataset(
|
|
134
|
+
team_name: Optional[str],
|
|
135
|
+
name: str,
|
|
136
|
+
new_name: Optional[str] = None,
|
|
137
|
+
description: Optional[str] = None,
|
|
138
|
+
labels: Optional[list[str]] = None,
|
|
139
|
+
):
|
|
140
|
+
client = StudioClient(team=team_name)
|
|
141
|
+
response = client.edit_dataset(name, new_name, description, labels)
|
|
142
|
+
if not response.ok:
|
|
143
|
+
raise_remote_error(response.message)
|
|
144
|
+
|
|
145
|
+
print(f"Dataset {name} updated")
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
def remove_studio_dataset(
|
|
149
|
+
team_name: Optional[str],
|
|
150
|
+
name: str,
|
|
151
|
+
version: Optional[int] = None,
|
|
152
|
+
force: Optional[bool] = False,
|
|
153
|
+
):
|
|
154
|
+
client = StudioClient(team=team_name)
|
|
155
|
+
response = client.rm_dataset(name, version, force)
|
|
156
|
+
if not response.ok:
|
|
157
|
+
raise_remote_error(response.message)
|
|
158
|
+
|
|
159
|
+
print(f"Dataset {name} removed")
|
|
160
|
+
|
|
161
|
+
|
|
133
162
|
def save_config(hostname, token):
|
|
134
163
|
config = Config(ConfigLevel.GLOBAL)
|
|
135
164
|
with config.edit() as conf:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: datachain
|
|
3
|
-
Version: 0.7.
|
|
3
|
+
Version: 0.7.3
|
|
4
4
|
Summary: Wrangle unstructured AI data at scale
|
|
5
5
|
Author-email: Dmitry Petrov <support@dvc.org>
|
|
6
6
|
License: Apache-2.0
|
|
@@ -139,7 +139,7 @@ Key Features
|
|
|
139
139
|
============
|
|
140
140
|
|
|
141
141
|
📂 **Multimodal Dataset Versioning.**
|
|
142
|
-
- Version unstructured data without redundant data copies, by
|
|
142
|
+
- Version unstructured data without redundant data copies, by supporting
|
|
143
143
|
references to S3, GCP, Azure, and local file systems.
|
|
144
144
|
- Multimodal data support: images, video, text, PDFs, JSONs, CSVs, parquet, etc.
|
|
145
145
|
- Unite files and metadata together into persistent, versioned, columnar datasets.
|
|
@@ -1,30 +1,30 @@
|
|
|
1
|
-
datachain/__init__.py,sha256=
|
|
1
|
+
datachain/__init__.py,sha256=ofPJ6B-d-ybSDRrE7J6wqF_ZRAB2W9U8l-eeuBtqPLg,865
|
|
2
2
|
datachain/__main__.py,sha256=hG3Y4ARGEqe1AWwNMd259rBlqtphx1Wk39YbueQ0yV8,91
|
|
3
3
|
datachain/asyn.py,sha256=5aKrjnUxk0mtnZeFKNJd1DCE0MsnSoyJBZkr0y9H_a0,9313
|
|
4
4
|
datachain/cache.py,sha256=s0YHN7qurmQv-eC265TjeureK84TebWWAnL07cxchZQ,2997
|
|
5
|
-
datachain/cli.py,sha256=
|
|
5
|
+
datachain/cli.py,sha256=1hiBClE1kbRyx0DK3uX5KMVa0ktbsG6TsFSNvoT2xxs,39399
|
|
6
6
|
datachain/cli_utils.py,sha256=jrn9ejGXjybeO1ur3fjdSiAyCHZrX0qsLLbJzN9ErPM,2418
|
|
7
7
|
datachain/config.py,sha256=g8qbNV0vW2VEKpX-dGZ9pAn0DAz6G2ZFcr7SAV3PoSM,4272
|
|
8
|
-
datachain/dataset.py,sha256
|
|
8
|
+
datachain/dataset.py,sha256=-9uPdOn1uWkGucouhsFVGRIuFdWkCdUrhV0U9f6Ihgc,18218
|
|
9
9
|
datachain/error.py,sha256=bxAAL32lSeMgzsQDEHbGTGORj-mPzzpCRvWDPueJNN4,1092
|
|
10
10
|
datachain/job.py,sha256=Jt4sNutMHJReaGsj3r3scueN5aESLGfhimAa8pUP7Is,1271
|
|
11
11
|
datachain/listing.py,sha256=TgKg25ZWAP5enzKgw2_2GUPJVdnQUh6uySHB5SJrUY4,7773
|
|
12
12
|
datachain/node.py,sha256=o8Sqy92QkzzcLK6XmIFLyDSE6Rw6kUTmGRhEmfLFdhg,5211
|
|
13
|
-
datachain/nodes_fetcher.py,sha256=
|
|
13
|
+
datachain/nodes_fetcher.py,sha256=ILMzUW5o4_6lUOVrLDC9gJPCXfcgKnMG68plrc7dAOA,1113
|
|
14
14
|
datachain/nodes_thread_pool.py,sha256=uPo-xl8zG5m9YgODjPFBpbcqqHjI-dcxH87yAbj_qco,3192
|
|
15
15
|
datachain/progress.py,sha256=5KotcvvzAUL_RF0GEj4JY0IB1lyImnmHxe89YkT1XO4,4330
|
|
16
16
|
datachain/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
17
|
-
datachain/studio.py,sha256=
|
|
17
|
+
datachain/studio.py,sha256=w41vgVPrBfJ02XQOaDccLbh-1uSAfq9cAgOmkYUqExE,4845
|
|
18
18
|
datachain/telemetry.py,sha256=0A4IOPPp9VlP5pyW9eBfaTK3YhHGzHl7dQudQjUAx9A,994
|
|
19
19
|
datachain/utils.py,sha256=-mSFowjIidJ4_sMXInvNHLn4rK_QnHuIlLuH1_lMGmI,13897
|
|
20
20
|
datachain/catalog/__init__.py,sha256=g2iAAFx_gEIrqshXlhSEbrc8qDaEH11cjU40n3CHDz4,409
|
|
21
|
-
datachain/catalog/catalog.py,sha256=
|
|
21
|
+
datachain/catalog/catalog.py,sha256=l_HAxor5i_F03VvbmMuwhi4INhsmNrqubyydPhXWo2Y,57980
|
|
22
22
|
datachain/catalog/datasource.py,sha256=D-VWIVDCM10A8sQavLhRXdYSCG7F4o4ifswEF80_NAQ,1412
|
|
23
23
|
datachain/catalog/loader.py,sha256=-6VelNfXUdgUnwInVyA8g86Boxv2xqhTh9xNS-Zlwig,8242
|
|
24
24
|
datachain/client/__init__.py,sha256=T4wiYL9KIM0ZZ_UqIyzV8_ufzYlewmizlV4iymHNluE,86
|
|
25
25
|
datachain/client/azure.py,sha256=ffxs26zm6KLAL1aUWJm-vtzuZP3LSNha7UDGXynMBKo,2234
|
|
26
26
|
datachain/client/fileslice.py,sha256=bT7TYco1Qe3bqoc8aUkUZcPdPofJDHlryL5BsTn9xsY,3021
|
|
27
|
-
datachain/client/fsspec.py,sha256=
|
|
27
|
+
datachain/client/fsspec.py,sha256=KDGLhJMnive73hI8GABeP_aQZv1w5M_6rxz6KRRxaHI,12712
|
|
28
28
|
datachain/client/gcs.py,sha256=cnTIr5GS6dbYOEYfqehhyQu3dr6XNjPHSg5U3FkivUk,4124
|
|
29
29
|
datachain/client/hf.py,sha256=XeVJVbiNViZCpn3sfb90Fr8SYO3BdLmfE3hOWMoqInE,951
|
|
30
30
|
datachain/client/local.py,sha256=vwbgCwZ7IqY2voj2l7tLJjgov7Dp--fEUvUwUBsMbls,4457
|
|
@@ -33,19 +33,29 @@ datachain/data_storage/__init__.py,sha256=cEOJpyu1JDZtfUupYucCDNFI6e5Wmp_Oyzq6rZ
|
|
|
33
33
|
datachain/data_storage/db_engine.py,sha256=81Ol1of9TTTzD97ORajCnP366Xz2mEJt6C-kTUCaru4,3406
|
|
34
34
|
datachain/data_storage/id_generator.py,sha256=lCEoU0BM37Ai2aRpSbwo5oQT0GqZnSpYwwvizathRMQ,4292
|
|
35
35
|
datachain/data_storage/job.py,sha256=w-7spowjkOa1P5fUVtJou3OltT0L48P0RYWZ9rSJ9-s,383
|
|
36
|
-
datachain/data_storage/metastore.py,sha256=
|
|
37
|
-
datachain/data_storage/schema.py,sha256
|
|
36
|
+
datachain/data_storage/metastore.py,sha256=S9pkbAi7yJlU_CTuhB-eTZgzZgkPMhJ5Br90AVLDXsQ,37922
|
|
37
|
+
datachain/data_storage/schema.py,sha256=-QVlRvD0dfu-ZFUxylEoSnLJLnleMEjVlcAb2OGu-AY,9895
|
|
38
38
|
datachain/data_storage/serializer.py,sha256=6G2YtOFqqDzJf1KbvZraKGXl2XHZyVml2krunWUum5o,927
|
|
39
|
-
datachain/data_storage/sqlite.py,sha256=
|
|
40
|
-
datachain/data_storage/warehouse.py,sha256=
|
|
39
|
+
datachain/data_storage/sqlite.py,sha256=nF-2B-n8YZh9cJlZv4XnbahAJDW6pvrp1h9L-140M7A,27538
|
|
40
|
+
datachain/data_storage/warehouse.py,sha256=kFLhYEFkpsfl65Lr1c4t4HJt3nO1Ez_QQ76aQNN30fc,30966
|
|
41
|
+
datachain/func/__init__.py,sha256=4VUt5BaLdBAl_BnAku0Jb8plqd7kDOiYrQTMG3pN0c4,794
|
|
42
|
+
datachain/func/aggregate.py,sha256=7_IPrIwb2XSs3zG4iOr1eTvzn6kNVe2mkzvNzjusDHk,10942
|
|
43
|
+
datachain/func/array.py,sha256=zHDNWuWLA7HVa9FEvQeHhVi00_xqenyleTqcLwkXWBI,5477
|
|
44
|
+
datachain/func/base.py,sha256=wA0sBQAVyN9LPxoo7Ox83peS0zUVnyuKxukwAcjGLfY,534
|
|
45
|
+
datachain/func/conditional.py,sha256=mQroxsoExpBW84Zm5dAYP4OpBblWmzfnF2qJq9rba54,2223
|
|
46
|
+
datachain/func/func.py,sha256=9wqdxxisoDL0w8qKGQmL6sNdgJeIOzotEUPlxu9t2IQ,12326
|
|
47
|
+
datachain/func/path.py,sha256=mqN_mfkwv44z2II7DMTp_fGGw95hmTCNls_TOFNpr4k,3155
|
|
48
|
+
datachain/func/random.py,sha256=pENOLj9rSmWfGCnOsUIaCsVC5486zQb66qfQvXaz9Z4,452
|
|
49
|
+
datachain/func/string.py,sha256=NQzaXXYu7yb72HPADy4WrFlcgvTS77L9x7-qvCKJtnk,4522
|
|
50
|
+
datachain/func/window.py,sha256=0MB1yjpVbwOrl_WNLZ8V3jkJz3o0XlYinpAcZQJuxiA,1688
|
|
41
51
|
datachain/lib/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
42
|
-
datachain/lib/arrow.py,sha256
|
|
52
|
+
datachain/lib/arrow.py,sha256=b5efxAUaNNYVwtXVJqj07D3zf5KC-BPlLCxKEZbEG6w,9429
|
|
43
53
|
datachain/lib/clip.py,sha256=lm5CzVi4Cj1jVLEKvERKArb-egb9j1Ls-fwTItT6vlI,6150
|
|
44
|
-
datachain/lib/data_model.py,sha256=
|
|
45
|
-
datachain/lib/dataset_info.py,sha256=
|
|
46
|
-
datachain/lib/dc.py,sha256=
|
|
54
|
+
datachain/lib/data_model.py,sha256=zS4lmXHVBXc9ntcyea2a1CRLXGSAN_0glXcF88CohgY,2685
|
|
55
|
+
datachain/lib/dataset_info.py,sha256=IjdF1E0TQNOq9YyynfWiCFTeZpbyGfyJvxgJY4YN810,2493
|
|
56
|
+
datachain/lib/dc.py,sha256=J7liATKQBJCkeHanVLr0s3d1t5wxiiiSJuSbuxKBbLg,89527
|
|
47
57
|
datachain/lib/file.py,sha256=-XMkL6ED1sE7TMhWoMRTEuOXswZJw8X6AEmJDONFP74,15019
|
|
48
|
-
datachain/lib/hf.py,sha256=
|
|
58
|
+
datachain/lib/hf.py,sha256=a-zFpDmZIR4r8dlNNTjfpAKSnuJ9xyRXlgcdENiXt3E,5864
|
|
49
59
|
datachain/lib/image.py,sha256=AMXYwQsmarZjRbPCZY3M1jDsM2WAB_b3cTY4uOIuXNU,2675
|
|
50
60
|
datachain/lib/listing.py,sha256=cVkCp7TRVpcZKSx-Bbk9t51bQI9Mw0o86W6ZPhAsuzM,3667
|
|
51
61
|
datachain/lib/listing_info.py,sha256=9ua40Hw0aiQByUw3oAEeNzMavJYfW0Uhe8YdCTK-m_g,1110
|
|
@@ -53,35 +63,32 @@ datachain/lib/meta_formats.py,sha256=anK2bDVbaeCCh0yvKUBaW2MVos3zRgdaSV8uSduzPcU
|
|
|
53
63
|
datachain/lib/model_store.py,sha256=DNIv8Y6Jtk1_idNLzIpsThOsdW2BMAudyUCbPUcgcxk,2515
|
|
54
64
|
datachain/lib/pytorch.py,sha256=W-ARi2xH1f1DUkVfRuerW-YWYgSaJASmNCxtz2lrJGI,6072
|
|
55
65
|
datachain/lib/settings.py,sha256=ZELRCTLbi5vzRPiDX6cQ9LLg9TefJ_A05gIGni0lll8,2535
|
|
56
|
-
datachain/lib/signal_schema.py,sha256=
|
|
66
|
+
datachain/lib/signal_schema.py,sha256=_uh19nCKhiD9ua8oIN1Q8R9iYv1BZAuqTJCLYVmyW8k,24557
|
|
57
67
|
datachain/lib/tar.py,sha256=3WIzao6yD5fbLqXLTt9GhPGNonbFIs_fDRu-9vgLgsA,1038
|
|
58
68
|
datachain/lib/text.py,sha256=UNHm8fhidk7wdrWqacEWaA6I9ykfYqarQ2URby7jc7M,1261
|
|
59
69
|
datachain/lib/udf.py,sha256=-j0krjNAELTqRI0dB1N65AmawtcIY5vN---AuUcW8Us,13637
|
|
60
70
|
datachain/lib/udf_signature.py,sha256=GXw24A-Olna6DWCdgy2bC-gZh_gLGPQ-KvjuI6pUjC0,7281
|
|
61
|
-
datachain/lib/utils.py,sha256=
|
|
71
|
+
datachain/lib/utils.py,sha256=om-MCiyYwvPHtFq3V2rBKrRDNkio9XXofj7RsUIlHKU,1586
|
|
62
72
|
datachain/lib/vfile.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
63
73
|
datachain/lib/webdataset.py,sha256=o7SHk5HOUWsZ5Ln04xOM04eQqiBHiJNO7xLgyVBrwo8,6924
|
|
64
|
-
datachain/lib/webdataset_laion.py,sha256=
|
|
74
|
+
datachain/lib/webdataset_laion.py,sha256=xvT6m_r5y0KbOx14BUe7UC5mOgrktJq53Mh-H0EVlUE,2525
|
|
65
75
|
datachain/lib/convert/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
66
76
|
datachain/lib/convert/flatten.py,sha256=Uebc5CeqCsacp-nr6IG9i6OGuUavXqdqnoGctZBk3RQ,1384
|
|
67
77
|
datachain/lib/convert/python_to_sql.py,sha256=40SAOdoOgikZRhn8iomCPDRoxC3RFxjJLivEAA9MHDU,2880
|
|
68
78
|
datachain/lib/convert/sql_to_python.py,sha256=XXCBYDQFUXJIBNWkjEP944cnCfJ8GF2Tji0DLF3A_zQ,315
|
|
69
79
|
datachain/lib/convert/unflatten.py,sha256=Ogvh_5wg2f38_At_1lN0D_e2uZOOpYEvwvB2xdq56Tw,2012
|
|
70
80
|
datachain/lib/convert/values_to_tuples.py,sha256=varRCnSMT_pZmHznrd2Yi05qXLLz_v9YH_pOCpHSkdc,3921
|
|
71
|
-
datachain/lib/func/__init__.py,sha256=wlAKhGV0QDg9y7reSwoUF8Vicfqh_YOUNIXLzxICGz4,403
|
|
72
|
-
datachain/lib/func/aggregate.py,sha256=H1ziFQdaK9zvnxvttfnEzkkyGvEEmMAvmgCsBV6nfm8,10917
|
|
73
|
-
datachain/lib/func/func.py,sha256=HAJZ_tpiRG2R-et7pr0WnoyNZYtpbPn3_HBuL3RQpbU,4800
|
|
74
81
|
datachain/model/__init__.py,sha256=R9faX5OHV1xh2EW-g2MPedwbtEqt3LodJRyluB-QylI,189
|
|
75
|
-
datachain/model/bbox.py,sha256=
|
|
76
|
-
datachain/model/pose.py,sha256=
|
|
77
|
-
datachain/model/segment.py,sha256=
|
|
82
|
+
datachain/model/bbox.py,sha256=1Li1G3RdiQwLOAc2Mak2nQU0bcvdH-lXmXtA984CUWM,3154
|
|
83
|
+
datachain/model/pose.py,sha256=q9NgB8h66aKnYnLi7Pyf9bU-F_90W4cbvtSO3-_hkdk,3078
|
|
84
|
+
datachain/model/segment.py,sha256=iRWf0KieXfSM1eGD9Y7THx8L_EMB79Sk8WVebs3xSbQ,1593
|
|
78
85
|
datachain/model/ultralytics/__init__.py,sha256=EvcNX9qUyxKXXlKCPpsXeRrabyXk5E9EkN-tyiYkfS4,750
|
|
79
86
|
datachain/model/ultralytics/bbox.py,sha256=OZ9XBdyMOYc401P-RhfSN9QaYvMpnx2Phu9ptaJgZBY,4316
|
|
80
87
|
datachain/model/ultralytics/pose.py,sha256=71KBTcoST2wcEtsyGXqLVpvUtqbp9gwZGA15pEPtX5A,2959
|
|
81
88
|
datachain/model/ultralytics/segment.py,sha256=Z1ab0tZRJubSYNH4KkFlzhYeGNTfAyC71KmkQcToHDQ,2760
|
|
82
89
|
datachain/query/__init__.py,sha256=7DhEIjAA8uZJfejruAVMZVcGFmvUpffuZJwgRqNwe-c,263
|
|
83
90
|
datachain/query/batch.py,sha256=5fEhORFe7li12SdYddaSK3LyqksMfCHhwN1_A6TfsA4,3485
|
|
84
|
-
datachain/query/dataset.py,sha256=
|
|
91
|
+
datachain/query/dataset.py,sha256=bQVG4WnJfBQpvnxouIdDlsJF2gB8V4lDp4Zu9JeZ-rc,54771
|
|
85
92
|
datachain/query/dispatch.py,sha256=wjjTWw6sFQbB9SKRh78VbfvwSMgJXCfqJklS3-9KnCU,12025
|
|
86
93
|
datachain/query/metrics.py,sha256=r5b0ygYhokbXp8Mg3kCH8iFSRw0jxzyeBe-C-J_bKFc,938
|
|
87
94
|
datachain/query/params.py,sha256=O_j89mjYRLOwWNhYZl-z7mi-rkdP7WyFmaDufsdTryE,863
|
|
@@ -89,14 +96,14 @@ datachain/query/queue.py,sha256=waqM_KzavU8C-G95-4211Nd4GXna_u2747Chgwtgz2w,3839
|
|
|
89
96
|
datachain/query/schema.py,sha256=b_KnVy6B26Ol4nYG0LqNNpeQ1QYPk95YRGUjXfdaQWs,6606
|
|
90
97
|
datachain/query/session.py,sha256=50SOdLNCjqHHKI-L4xGXyzTVxzMWfANqKqjeYre-c2k,5959
|
|
91
98
|
datachain/remote/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
92
|
-
datachain/remote/studio.py,sha256=
|
|
93
|
-
datachain/sql/__init__.py,sha256=
|
|
94
|
-
datachain/sql/selectable.py,sha256=
|
|
99
|
+
datachain/remote/studio.py,sha256=z9DTDqfdWKT8MC23wRDTOHvI8hc_OySS1Ce3F617gjA,9906
|
|
100
|
+
datachain/sql/__init__.py,sha256=6SQRdbljO3d2hx3EAVXEZrHQKv5jth0Jh98PogT59No,262
|
|
101
|
+
datachain/sql/selectable.py,sha256=cTc60qVoAwqqss0Vop8Lt5Z-ROnM1XrQmL_GLjRxhXs,1765
|
|
95
102
|
datachain/sql/types.py,sha256=ASSPkmM5EzdRindqj2O7WHLXq8VHAgFYedG8lYfGvVI,14045
|
|
96
103
|
datachain/sql/utils.py,sha256=rzlJw08etivdrcuQPqNVvVWhuVSyUPUQEEc6DOhu258,818
|
|
97
104
|
datachain/sql/default/__init__.py,sha256=XQ2cEZpzWiABqjV-6yYHUBGI9vN_UHxbxZENESmVAWw,45
|
|
98
105
|
datachain/sql/default/base.py,sha256=QD-31C6JnyOXzogyDx90sUhm7QvgXIYpeHEASH84igU,628
|
|
99
|
-
datachain/sql/functions/__init__.py,sha256
|
|
106
|
+
datachain/sql/functions/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
100
107
|
datachain/sql/functions/aggregate.py,sha256=3AQdA8YHPFdtCEfwZKQXTT8SlQWdG9gD5PBtGN3Odqs,944
|
|
101
108
|
datachain/sql/functions/array.py,sha256=rvH27SWN9gdh_mFnp0GIiXuCrNW6n8ZbY4I_JUS-_e0,1140
|
|
102
109
|
datachain/sql/functions/conditional.py,sha256=q7YUKfunXeEldXaxgT-p5pUTcOEVU_tcQ2BJlquTRPs,207
|
|
@@ -104,15 +111,15 @@ datachain/sql/functions/path.py,sha256=zixpERotTFP6LZ7I4TiGtyRA8kXOoZmH1yzH9oRW0
|
|
|
104
111
|
datachain/sql/functions/random.py,sha256=vBwEEj98VH4LjWixUCygQ5Bz1mv1nohsCG0-ZTELlVg,271
|
|
105
112
|
datachain/sql/functions/string.py,sha256=DYgiw8XSk7ge7GXvyRI1zbaMruIizNeI-puOjriQGZQ,1148
|
|
106
113
|
datachain/sql/sqlite/__init__.py,sha256=TAdJX0Bg28XdqPO-QwUVKy8rg78cgMileHvMNot7d04,166
|
|
107
|
-
datachain/sql/sqlite/base.py,sha256=
|
|
114
|
+
datachain/sql/sqlite/base.py,sha256=X4iEynOAqqvqz8lmgUKvURleKO6aguULgG8RoufKrSk,14772
|
|
108
115
|
datachain/sql/sqlite/types.py,sha256=lPXS1XbkmUtlkkiRxy_A_UzsgpPv2VSkXYOD4zIHM4w,1734
|
|
109
116
|
datachain/sql/sqlite/vector.py,sha256=ncW4eu2FlJhrP_CIpsvtkUabZlQdl2D5Lgwy_cbfqR0,469
|
|
110
117
|
datachain/toolkit/__init__.py,sha256=eQ58Q5Yf_Fgv1ZG0IO5dpB4jmP90rk8YxUWmPc1M2Bo,68
|
|
111
118
|
datachain/toolkit/split.py,sha256=ZgDcrNiKiPXZmKD591_1z9qRIXitu5zwAsoVPB7ykiU,2508
|
|
112
119
|
datachain/torch/__init__.py,sha256=gIS74PoEPy4TB3X6vx9nLO0Y3sLJzsA8ckn8pRWihJM,579
|
|
113
|
-
datachain-0.7.
|
|
114
|
-
datachain-0.7.
|
|
115
|
-
datachain-0.7.
|
|
116
|
-
datachain-0.7.
|
|
117
|
-
datachain-0.7.
|
|
118
|
-
datachain-0.7.
|
|
120
|
+
datachain-0.7.3.dist-info/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
|
|
121
|
+
datachain-0.7.3.dist-info/METADATA,sha256=E1-nP4rZghwCV5kSS09620YEJdwaTAiVpI5DmmRnZy0,18006
|
|
122
|
+
datachain-0.7.3.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
|
|
123
|
+
datachain-0.7.3.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
|
|
124
|
+
datachain-0.7.3.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
|
|
125
|
+
datachain-0.7.3.dist-info/RECORD,,
|
datachain/lib/func/__init__.py
DELETED
|
@@ -1,32 +0,0 @@
|
|
|
1
|
-
from .aggregate import (
|
|
2
|
-
any_value,
|
|
3
|
-
avg,
|
|
4
|
-
collect,
|
|
5
|
-
concat,
|
|
6
|
-
count,
|
|
7
|
-
dense_rank,
|
|
8
|
-
first,
|
|
9
|
-
max,
|
|
10
|
-
min,
|
|
11
|
-
rank,
|
|
12
|
-
row_number,
|
|
13
|
-
sum,
|
|
14
|
-
)
|
|
15
|
-
from .func import Func, window
|
|
16
|
-
|
|
17
|
-
__all__ = [
|
|
18
|
-
"Func",
|
|
19
|
-
"any_value",
|
|
20
|
-
"avg",
|
|
21
|
-
"collect",
|
|
22
|
-
"concat",
|
|
23
|
-
"count",
|
|
24
|
-
"dense_rank",
|
|
25
|
-
"first",
|
|
26
|
-
"max",
|
|
27
|
-
"min",
|
|
28
|
-
"rank",
|
|
29
|
-
"row_number",
|
|
30
|
-
"sum",
|
|
31
|
-
"window",
|
|
32
|
-
]
|