fugue 0.9.0.dev4__tar.gz → 0.9.2.dev1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/PKG-INFO +1 -1
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue/dataframe/function_wrapper.py +102 -18
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue/extensions/transformer/convert.py +4 -4
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue.egg-info/PKG-INFO +1 -1
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue.egg-info/requires.txt +1 -1
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue_ray/_utils/io.py +22 -15
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue_spark/_utils/misc.py +1 -1
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue_test/builtin_suite.py +36 -1
- fugue-0.9.2.dev1/fugue_version/__init__.py +1 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/setup.py +1 -1
- fugue-0.9.0.dev4/fugue_version/__init__.py +0 -1
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/LICENSE +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/README.md +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue/__init__.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue/_utils/__init__.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue/_utils/display.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue/_utils/exception.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue/_utils/interfaceless.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue/_utils/io.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue/_utils/misc.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue/_utils/registry.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue/api.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue/bag/__init__.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue/bag/array_bag.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue/bag/bag.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue/collections/__init__.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue/collections/partition.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue/collections/sql.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue/collections/yielded.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue/column/__init__.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue/column/expressions.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue/column/functions.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue/column/sql.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue/constants.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue/dataframe/__init__.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue/dataframe/api.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue/dataframe/array_dataframe.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue/dataframe/arrow_dataframe.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue/dataframe/dataframe.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue/dataframe/dataframe_iterable_dataframe.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue/dataframe/dataframes.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue/dataframe/iterable_dataframe.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue/dataframe/pandas_dataframe.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue/dataframe/utils.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue/dataset/__init__.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue/dataset/api.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue/dataset/dataset.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue/dev.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue/exceptions.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue/execution/__init__.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue/execution/api.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue/execution/execution_engine.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue/execution/factory.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue/execution/native_execution_engine.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue/extensions/__init__.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue/extensions/_builtins/__init__.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue/extensions/_builtins/creators.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue/extensions/_builtins/outputters.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue/extensions/_builtins/processors.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue/extensions/_utils.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue/extensions/context.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue/extensions/creator/__init__.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue/extensions/creator/convert.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue/extensions/creator/creator.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue/extensions/outputter/__init__.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue/extensions/outputter/convert.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue/extensions/outputter/outputter.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue/extensions/processor/__init__.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue/extensions/processor/convert.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue/extensions/processor/processor.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue/extensions/transformer/__init__.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue/extensions/transformer/constants.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue/extensions/transformer/transformer.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue/plugins.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue/py.typed +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue/registry.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue/rpc/__init__.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue/rpc/base.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue/rpc/flask.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue/sql/__init__.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue/sql/_utils.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue/sql/_visitors.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue/sql/api.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue/sql/workflow.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue/test/__init__.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue/test/pandas_tester.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue/test/plugins.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue/workflow/__init__.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue/workflow/_checkpoint.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue/workflow/_tasks.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue/workflow/_workflow_context.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue/workflow/api.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue/workflow/input.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue/workflow/module.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue/workflow/workflow.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue.egg-info/SOURCES.txt +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue.egg-info/dependency_links.txt +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue.egg-info/entry_points.txt +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue.egg-info/top_level.txt +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue_contrib/__init__.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue_contrib/contrib.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue_contrib/seaborn/__init__.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue_contrib/viz/__init__.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue_contrib/viz/_ext.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue_dask/__init__.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue_dask/_constants.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue_dask/_io.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue_dask/_utils.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue_dask/dataframe.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue_dask/execution_engine.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue_dask/registry.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue_dask/tester.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue_duckdb/__init__.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue_duckdb/_io.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue_duckdb/_utils.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue_duckdb/dask.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue_duckdb/dataframe.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue_duckdb/execution_engine.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue_duckdb/registry.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue_duckdb/tester.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue_ibis/__init__.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue_ibis/_compat.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue_ibis/_utils.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue_ibis/dataframe.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue_ibis/execution_engine.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue_notebook/__init__.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue_notebook/env.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue_notebook/nbextension/README.md +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue_notebook/nbextension/__init__.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue_notebook/nbextension/description.yaml +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue_notebook/nbextension/main.js +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue_polars/__init__.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue_polars/_utils.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue_polars/polars_dataframe.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue_polars/registry.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue_ray/__init__.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue_ray/_constants.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue_ray/_utils/__init__.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue_ray/_utils/cluster.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue_ray/_utils/dataframe.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue_ray/dataframe.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue_ray/execution_engine.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue_ray/registry.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue_ray/tester.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue_spark/__init__.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue_spark/_constants.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue_spark/_utils/__init__.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue_spark/_utils/convert.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue_spark/_utils/io.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue_spark/_utils/partition.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue_spark/dataframe.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue_spark/execution_engine.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue_spark/registry.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue_spark/tester.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue_sql/__init__.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue_sql/exceptions.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue_test/__init__.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue_test/bag_suite.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue_test/dataframe_suite.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue_test/execution_suite.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/fugue_test/fixtures.py +0 -0
- {fugue-0.9.0.dev4 → fugue-0.9.2.dev1}/setup.cfg +0 -0
|
@@ -20,6 +20,7 @@ from triad.collections.function_wrapper import (
|
|
|
20
20
|
PositionalParam,
|
|
21
21
|
function_wrapper,
|
|
22
22
|
)
|
|
23
|
+
from triad.utils.convert import compare_annotations
|
|
23
24
|
from triad.utils.iter import EmptyAwareIterable, make_empty_aware
|
|
24
25
|
|
|
25
26
|
from ..constants import FUGUE_ENTRYPOINT
|
|
@@ -37,6 +38,14 @@ from .iterable_dataframe import IterableDataFrame
|
|
|
37
38
|
from .pandas_dataframe import PandasDataFrame
|
|
38
39
|
|
|
39
40
|
|
|
41
|
+
def _compare_iter(tp: Any) -> Any:
|
|
42
|
+
return lambda x: compare_annotations(
|
|
43
|
+
x, Iterable[tp] # type:ignore
|
|
44
|
+
) or compare_annotations(
|
|
45
|
+
x, Iterator[tp] # type:ignore
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
|
|
40
49
|
@function_wrapper(FUGUE_ENTRYPOINT)
|
|
41
50
|
class DataFrameFunctionWrapper(FunctionWrapper):
|
|
42
51
|
@property
|
|
@@ -71,6 +80,7 @@ class DataFrameFunctionWrapper(FunctionWrapper):
|
|
|
71
80
|
p.update(kwargs)
|
|
72
81
|
has_kw = False
|
|
73
82
|
rargs: Dict[str, Any] = {}
|
|
83
|
+
row_param_info: Any = None
|
|
74
84
|
for k, v in self._params.items():
|
|
75
85
|
if isinstance(v, (PositionalParam, KeywordParam)):
|
|
76
86
|
if isinstance(v, KeywordParam):
|
|
@@ -81,7 +91,14 @@ class DataFrameFunctionWrapper(FunctionWrapper):
|
|
|
81
91
|
isinstance(p[k], DataFrame),
|
|
82
92
|
lambda: TypeError(f"{p[k]} is not a DataFrame"),
|
|
83
93
|
)
|
|
84
|
-
|
|
94
|
+
if v.is_per_row:
|
|
95
|
+
assert_or_throw(
|
|
96
|
+
row_param_info is None,
|
|
97
|
+
lambda: ValueError("only one row parameter is allowed"),
|
|
98
|
+
)
|
|
99
|
+
row_param_info = (k, v, p[k])
|
|
100
|
+
else:
|
|
101
|
+
rargs[k] = v.to_input_data(p[k], ctx=ctx)
|
|
85
102
|
else:
|
|
86
103
|
rargs[k] = p[k] # TODO: should we do auto type conversion?
|
|
87
104
|
del p[k]
|
|
@@ -91,12 +108,38 @@ class DataFrameFunctionWrapper(FunctionWrapper):
|
|
|
91
108
|
rargs.update(p)
|
|
92
109
|
elif not ignore_unknown and len(p) > 0:
|
|
93
110
|
raise ValueError(f"{p} are not acceptable parameters")
|
|
111
|
+
if row_param_info is None:
|
|
112
|
+
return self._run_func(rargs, output, output_schema, ctx, raw=False)
|
|
113
|
+
else: # input contains row parameter
|
|
114
|
+
|
|
115
|
+
def _dfs() -> Iterable[Any]:
|
|
116
|
+
k, v, df = row_param_info
|
|
117
|
+
for row in v.to_input_rows(df, ctx):
|
|
118
|
+
rargs[k] = None
|
|
119
|
+
_rargs = rargs.copy()
|
|
120
|
+
_rargs[k] = row
|
|
121
|
+
yield self._run_func(_rargs, output, output_schema, ctx, raw=True)
|
|
122
|
+
|
|
123
|
+
if not output:
|
|
124
|
+
sum(1 for _ in _dfs())
|
|
125
|
+
return
|
|
126
|
+
else:
|
|
127
|
+
return self._rt.iterable_to_output_df(_dfs(), output_schema, ctx)
|
|
128
|
+
|
|
129
|
+
def _run_func(
|
|
130
|
+
self,
|
|
131
|
+
rargs: Dict[str, Any],
|
|
132
|
+
output: bool,
|
|
133
|
+
output_schema: Any,
|
|
134
|
+
ctx: Any,
|
|
135
|
+
raw: bool,
|
|
136
|
+
) -> Any:
|
|
94
137
|
rt = self._func(**rargs)
|
|
95
138
|
if not output:
|
|
96
139
|
if isinstance(self._rt, _DataFrameParamBase):
|
|
97
140
|
self._rt.count(rt)
|
|
98
141
|
return
|
|
99
|
-
if isinstance(self._rt, _DataFrameParamBase):
|
|
142
|
+
if not raw and isinstance(self._rt, _DataFrameParamBase):
|
|
100
143
|
return self._rt.to_output_df(rt, output_schema, ctx=ctx)
|
|
101
144
|
return rt
|
|
102
145
|
|
|
@@ -136,14 +179,30 @@ class _DataFrameParamBase(AnnotatedParam):
|
|
|
136
179
|
super().__init__(param)
|
|
137
180
|
assert_or_throw(self.required, lambda: TypeError(f"{self} must be required"))
|
|
138
181
|
|
|
182
|
+
@property
|
|
183
|
+
def is_per_row(self) -> bool:
|
|
184
|
+
return False
|
|
185
|
+
|
|
139
186
|
def to_input_data(self, df: DataFrame, ctx: Any) -> Any: # pragma: no cover
|
|
140
187
|
raise NotImplementedError
|
|
141
188
|
|
|
189
|
+
def to_input_rows(
|
|
190
|
+
self,
|
|
191
|
+
df: DataFrame,
|
|
192
|
+
ctx: Any,
|
|
193
|
+
) -> Iterable[Any]:
|
|
194
|
+
raise NotImplementedError # pragma: no cover
|
|
195
|
+
|
|
142
196
|
def to_output_df(
|
|
143
197
|
self, df: Any, schema: Any, ctx: Any
|
|
144
198
|
) -> DataFrame: # pragma: no cover
|
|
145
199
|
raise NotImplementedError
|
|
146
200
|
|
|
201
|
+
def iterable_to_output_df(
|
|
202
|
+
self, dfs: Iterable[Any], schema: Any, ctx: Any
|
|
203
|
+
) -> DataFrame: # pragma: no cover
|
|
204
|
+
raise NotImplementedError
|
|
205
|
+
|
|
147
206
|
def count(self, df: Any) -> int: # pragma: no cover
|
|
148
207
|
raise NotImplementedError
|
|
149
208
|
|
|
@@ -173,6 +232,34 @@ class DataFrameParam(_DataFrameParamBase):
|
|
|
173
232
|
return sum(1 for _ in df.as_array_iterable())
|
|
174
233
|
|
|
175
234
|
|
|
235
|
+
@fugue_annotated_param(DataFrame, "r", child_can_reuse_code=True)
|
|
236
|
+
class RowParam(_DataFrameParamBase):
|
|
237
|
+
@property
|
|
238
|
+
def is_per_row(self) -> bool:
|
|
239
|
+
return True
|
|
240
|
+
|
|
241
|
+
def count(self, df: Any) -> int:
|
|
242
|
+
return 1
|
|
243
|
+
|
|
244
|
+
|
|
245
|
+
@fugue_annotated_param(Dict[str, Any])
|
|
246
|
+
class DictParam(RowParam):
|
|
247
|
+
def to_input_rows(self, df: DataFrame, ctx: Any) -> Iterable[Any]:
|
|
248
|
+
yield from df.as_dict_iterable()
|
|
249
|
+
|
|
250
|
+
def to_output_df(self, output: Dict[str, Any], schema: Any, ctx: Any) -> DataFrame:
|
|
251
|
+
return ArrayDataFrame([list(output.values())], schema)
|
|
252
|
+
|
|
253
|
+
def iterable_to_output_df(
|
|
254
|
+
self, dfs: Iterable[Dict[str, Any]], schema: Any, ctx: Any
|
|
255
|
+
) -> DataFrame: # pragma: no cover
|
|
256
|
+
params: Dict[str, Any] = {}
|
|
257
|
+
if schema is not None:
|
|
258
|
+
params["schema"] = Schema(schema).pa_schema
|
|
259
|
+
adf = pa.Table.from_pylist(list(dfs), **params)
|
|
260
|
+
return ArrowDataFrame(adf)
|
|
261
|
+
|
|
262
|
+
|
|
176
263
|
@fugue_annotated_param(AnyDataFrame)
|
|
177
264
|
class _AnyDataFrameParam(DataFrameParam):
|
|
178
265
|
def to_output_df(self, output: AnyDataFrame, schema: Any, ctx: Any) -> DataFrame:
|
|
@@ -198,6 +285,15 @@ class LocalDataFrameParam(DataFrameParam):
|
|
|
198
285
|
)
|
|
199
286
|
return output
|
|
200
287
|
|
|
288
|
+
def iterable_to_output_df(
|
|
289
|
+
self, dfs: Iterable[Any], schema: Any, ctx: Any
|
|
290
|
+
) -> DataFrame: # pragma: no cover
|
|
291
|
+
def _dfs() -> Iterable[DataFrame]:
|
|
292
|
+
for df in dfs:
|
|
293
|
+
yield self.to_output_df(df, schema, ctx)
|
|
294
|
+
|
|
295
|
+
return LocalDataFrameIterableDataFrame(_dfs(), schema=schema)
|
|
296
|
+
|
|
201
297
|
def count(self, df: LocalDataFrame) -> int:
|
|
202
298
|
if df.is_bounded:
|
|
203
299
|
return df.count()
|
|
@@ -228,10 +324,7 @@ class _ListListParam(_LocalNoSchemaDataFrameParam):
|
|
|
228
324
|
return len(df)
|
|
229
325
|
|
|
230
326
|
|
|
231
|
-
@fugue_annotated_param(
|
|
232
|
-
Iterable[List[Any]],
|
|
233
|
-
matcher=lambda x: x == Iterable[List[Any]] or x == Iterator[List[Any]],
|
|
234
|
-
)
|
|
327
|
+
@fugue_annotated_param(Iterable[List[Any]], matcher=_compare_iter(List[Any]))
|
|
235
328
|
class _IterableListParam(_LocalNoSchemaDataFrameParam):
|
|
236
329
|
@no_type_check
|
|
237
330
|
def to_input_data(self, df: DataFrame, ctx: Any) -> Iterable[List[Any]]:
|
|
@@ -288,10 +381,7 @@ class _ListDictParam(_LocalNoSchemaDataFrameParam):
|
|
|
288
381
|
return len(df)
|
|
289
382
|
|
|
290
383
|
|
|
291
|
-
@fugue_annotated_param(
|
|
292
|
-
Iterable[Dict[str, Any]],
|
|
293
|
-
matcher=lambda x: x == Iterable[Dict[str, Any]] or x == Iterator[Dict[str, Any]],
|
|
294
|
-
)
|
|
384
|
+
@fugue_annotated_param(Iterable[Dict[str, Any]], matcher=_compare_iter(Dict[str, Any]))
|
|
295
385
|
class _IterableDictParam(_LocalNoSchemaDataFrameParam):
|
|
296
386
|
@no_type_check
|
|
297
387
|
def to_input_data(self, df: DataFrame, ctx: Any) -> Iterable[Dict[str, Any]]:
|
|
@@ -360,10 +450,7 @@ class _PandasParam(LocalDataFrameParam):
|
|
|
360
450
|
return "pandas"
|
|
361
451
|
|
|
362
452
|
|
|
363
|
-
@fugue_annotated_param(
|
|
364
|
-
Iterable[pd.DataFrame],
|
|
365
|
-
matcher=lambda x: x == Iterable[pd.DataFrame] or x == Iterator[pd.DataFrame],
|
|
366
|
-
)
|
|
453
|
+
@fugue_annotated_param(Iterable[pd.DataFrame], matcher=_compare_iter(pd.DataFrame))
|
|
367
454
|
class _IterablePandasParam(LocalDataFrameParam):
|
|
368
455
|
@no_type_check
|
|
369
456
|
def to_input_data(self, df: DataFrame, ctx: Any) -> Iterable[pd.DataFrame]:
|
|
@@ -419,10 +506,7 @@ class _PyArrowTableParam(LocalDataFrameParam):
|
|
|
419
506
|
return "pyarrow"
|
|
420
507
|
|
|
421
508
|
|
|
422
|
-
@fugue_annotated_param(
|
|
423
|
-
Iterable[pa.Table],
|
|
424
|
-
matcher=lambda x: x == Iterable[pa.Table] or x == Iterator[pa.Table],
|
|
425
|
-
)
|
|
509
|
+
@fugue_annotated_param(Iterable[pa.Table], matcher=_compare_iter(pa.Table))
|
|
426
510
|
class _IterableArrowParam(LocalDataFrameParam):
|
|
427
511
|
@no_type_check
|
|
428
512
|
def to_input_data(self, df: DataFrame, ctx: Any) -> Iterable[pa.Table]:
|
|
@@ -375,7 +375,7 @@ class _FuncAsTransformer(Transformer):
|
|
|
375
375
|
assert_arg_not_none(schema, "schema")
|
|
376
376
|
tr = _FuncAsTransformer()
|
|
377
377
|
tr._wrapper = DataFrameFunctionWrapper( # type: ignore
|
|
378
|
-
func, "^[
|
|
378
|
+
func, "^[lspqr][fF]?x*z?$", "^[lspqr]$"
|
|
379
379
|
)
|
|
380
380
|
tr._output_schema_arg = schema # type: ignore
|
|
381
381
|
tr._validation_rules = validation_rules # type: ignore
|
|
@@ -410,7 +410,7 @@ class _FuncAsOutputTransformer(_FuncAsTransformer):
|
|
|
410
410
|
validation_rules.update(parse_validation_rules_from_comment(func))
|
|
411
411
|
tr = _FuncAsOutputTransformer()
|
|
412
412
|
tr._wrapper = DataFrameFunctionWrapper( # type: ignore
|
|
413
|
-
func, "^[
|
|
413
|
+
func, "^[lspqr][fF]?x*z?$", "^[lspnqr]$"
|
|
414
414
|
)
|
|
415
415
|
tr._output_schema_arg = None # type: ignore
|
|
416
416
|
tr._validation_rules = validation_rules # type: ignore
|
|
@@ -503,7 +503,7 @@ class _FuncAsCoTransformer(CoTransformer):
|
|
|
503
503
|
assert_arg_not_none(schema, "schema")
|
|
504
504
|
tr = _FuncAsCoTransformer()
|
|
505
505
|
tr._wrapper = DataFrameFunctionWrapper( # type: ignore
|
|
506
|
-
func, "^(c|[lspq]+)[fF]?x*z?$", "^[
|
|
506
|
+
func, "^(c|[lspq]+)[fF]?x*z?$", "^[lspqr]$"
|
|
507
507
|
)
|
|
508
508
|
tr._dfs_input = tr._wrapper.input_code[0] == "c" # type: ignore
|
|
509
509
|
tr._output_schema_arg = schema # type: ignore
|
|
@@ -562,7 +562,7 @@ class _FuncAsOutputCoTransformer(_FuncAsCoTransformer):
|
|
|
562
562
|
|
|
563
563
|
tr = _FuncAsOutputCoTransformer()
|
|
564
564
|
tr._wrapper = DataFrameFunctionWrapper( # type: ignore
|
|
565
|
-
func, "^(c|[lspq]+)[fF]?x*z?$", "^[
|
|
565
|
+
func, "^(c|[lspq]+)[fF]?x*z?$", "^[lspnqr]$"
|
|
566
566
|
)
|
|
567
567
|
tr._dfs_input = tr._wrapper.input_code[0] == "c" # type: ignore
|
|
568
568
|
tr._output_schema_arg = None # type: ignore
|
|
@@ -7,7 +7,7 @@ import ray.data as rd
|
|
|
7
7
|
from packaging import version
|
|
8
8
|
from pyarrow import csv as pacsv
|
|
9
9
|
from pyarrow import json as pajson
|
|
10
|
-
|
|
10
|
+
|
|
11
11
|
from triad.collections import Schema
|
|
12
12
|
from triad.collections.dict import ParamDict
|
|
13
13
|
from triad.utils.assertion import assert_or_throw
|
|
@@ -21,6 +21,27 @@ from fugue_ray.dataframe import RayDataFrame
|
|
|
21
21
|
|
|
22
22
|
from .._constants import RAY_VERSION
|
|
23
23
|
|
|
24
|
+
try:
|
|
25
|
+
from ray.data.datasource import FileExtensionFilter
|
|
26
|
+
|
|
27
|
+
class _FileFiler(FileExtensionFilter): # pragma: no cover
|
|
28
|
+
def __init__(
|
|
29
|
+
self, file_extensions: Union[str, List[str]], exclude: Iterable[str]
|
|
30
|
+
):
|
|
31
|
+
super().__init__(file_extensions, allow_if_no_extension=True)
|
|
32
|
+
self._exclude = set(exclude)
|
|
33
|
+
|
|
34
|
+
def _is_valid(self, path: str) -> bool:
|
|
35
|
+
return pathlib.Path(
|
|
36
|
+
path
|
|
37
|
+
).name not in self._exclude and self._file_has_extension(path)
|
|
38
|
+
|
|
39
|
+
def __call__(self, paths: List[str]) -> List[str]:
|
|
40
|
+
return [path for path in paths if self._is_valid(path)]
|
|
41
|
+
|
|
42
|
+
except ImportError: # pragma: no cover
|
|
43
|
+
pass # ray >=2.10
|
|
44
|
+
|
|
24
45
|
|
|
25
46
|
class RayIO(object):
|
|
26
47
|
def __init__(self, engine: ExecutionEngine):
|
|
@@ -248,17 +269,3 @@ class RayIO(object):
|
|
|
248
269
|
|
|
249
270
|
def _remote_args(self) -> Dict[str, Any]:
|
|
250
271
|
return {"num_cpus": 1}
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
class _FileFiler(FileExtensionFilter): # pragma: no cover
|
|
254
|
-
def __init__(self, file_extensions: Union[str, List[str]], exclude: Iterable[str]):
|
|
255
|
-
super().__init__(file_extensions, allow_if_no_extension=True)
|
|
256
|
-
self._exclude = set(exclude)
|
|
257
|
-
|
|
258
|
-
def _is_valid(self, path: str) -> bool:
|
|
259
|
-
return pathlib.Path(
|
|
260
|
-
path
|
|
261
|
-
).name not in self._exclude and self._file_has_extension(path)
|
|
262
|
-
|
|
263
|
-
def __call__(self, paths: List[str]) -> List[str]:
|
|
264
|
-
return [path for path in paths if self._is_valid(path)]
|
|
@@ -3,7 +3,7 @@ from typing import Any
|
|
|
3
3
|
try:
|
|
4
4
|
from pyspark.sql.connect.session import SparkSession as SparkConnectSession
|
|
5
5
|
from pyspark.sql.connect.dataframe import DataFrame as SparkConnectDataFrame
|
|
6
|
-
except
|
|
6
|
+
except Exception: # pragma: no cover
|
|
7
7
|
SparkConnectSession = None
|
|
8
8
|
SparkConnectDataFrame = None
|
|
9
9
|
import pyspark.sql as ps
|
|
@@ -486,6 +486,23 @@ class BuiltInTests(object):
|
|
|
486
486
|
dag.df([], "a:int,b:int").assert_eq(b)
|
|
487
487
|
dag.run(self.engine)
|
|
488
488
|
|
|
489
|
+
def test_transform_row_wise(self):
|
|
490
|
+
def t1(row: Dict[str, Any]) -> Dict[str, Any]:
|
|
491
|
+
row["b"] = 1
|
|
492
|
+
return row
|
|
493
|
+
|
|
494
|
+
def t2(rows: List[Dict[str, Any]]) -> Dict[str, Any]:
|
|
495
|
+
return rows[0]
|
|
496
|
+
|
|
497
|
+
with fa.engine_context(self.engine):
|
|
498
|
+
a = pd.DataFrame([[3, 4], [1, 2], [3, 5]], columns=["a", "b"])
|
|
499
|
+
b = fa.transform(a, t1, schema="*")
|
|
500
|
+
assert sorted(fa.as_array(b)) == [[1, 1], [3, 1], [3, 1]]
|
|
501
|
+
b = fa.transform(
|
|
502
|
+
a, t2, schema="*", partition={"by": "a", "presort": "b"}
|
|
503
|
+
)
|
|
504
|
+
assert sorted(fa.as_array(b)) == [[1, 2], [3, 4]]
|
|
505
|
+
|
|
489
506
|
def test_transform_binary(self):
|
|
490
507
|
with FugueWorkflow() as dag:
|
|
491
508
|
a = dag.df([[1, pickle.dumps([0, "a"])]], "a:int,b:bytes")
|
|
@@ -548,6 +565,8 @@ class BuiltInTests(object):
|
|
|
548
565
|
e = dag.df([[1, 2, 1, 10]], "a:int,ct1:int,ct2:int,x:int")
|
|
549
566
|
e.assert_eq(c)
|
|
550
567
|
|
|
568
|
+
a.zip(b).transform(mock_co_tf1_d, params=dict(p=10)).assert_eq(e)
|
|
569
|
+
|
|
551
570
|
# interfaceless
|
|
552
571
|
c = dag.transform(
|
|
553
572
|
a.zip(b),
|
|
@@ -676,6 +695,13 @@ class BuiltInTests(object):
|
|
|
676
695
|
incr()
|
|
677
696
|
yield pa.Table.from_pandas(df)
|
|
678
697
|
|
|
698
|
+
def t11(row: Dict[str, Any]) -> Dict[str, Any]:
|
|
699
|
+
incr()
|
|
700
|
+
return row
|
|
701
|
+
|
|
702
|
+
def t12(row: Dict[str, Any]) -> None:
|
|
703
|
+
incr()
|
|
704
|
+
|
|
679
705
|
with FugueWorkflow() as dag:
|
|
680
706
|
a = dag.df([[1, 2], [3, 4]], "a:double,b:int")
|
|
681
707
|
a.out_transform(t1) # +2
|
|
@@ -688,6 +714,8 @@ class BuiltInTests(object):
|
|
|
688
714
|
a.out_transform(t8, ignore_errors=[NotImplementedError]) # +1
|
|
689
715
|
a.out_transform(t9) # +1
|
|
690
716
|
a.out_transform(t10) # +1
|
|
717
|
+
a.out_transform(t11) # +2
|
|
718
|
+
a.out_transform(t12) # +2
|
|
691
719
|
raises(FugueWorkflowCompileValidationError, lambda: a.out_transform(t2))
|
|
692
720
|
raises(FugueWorkflowCompileValidationError, lambda: a.out_transform(t3))
|
|
693
721
|
raises(FugueWorkflowCompileValidationError, lambda: a.out_transform(t4))
|
|
@@ -695,7 +723,7 @@ class BuiltInTests(object):
|
|
|
695
723
|
raises(FugueWorkflowCompileValidationError, lambda: a.out_transform(T7))
|
|
696
724
|
dag.run(self.engine)
|
|
697
725
|
|
|
698
|
-
assert
|
|
726
|
+
assert 17 <= incr()
|
|
699
727
|
|
|
700
728
|
def test_out_cotransform(self): # noqa: C901
|
|
701
729
|
tmpdir = str(self.tmpdir)
|
|
@@ -2001,6 +2029,13 @@ def mock_co_tf1(
|
|
|
2001
2029
|
return [[df1[0]["a"], len(df1), len(df2), p]]
|
|
2002
2030
|
|
|
2003
2031
|
|
|
2032
|
+
@cotransformer(lambda dfs, **kwargs: "a:int,ct1:int,ct2:int,x:int")
|
|
2033
|
+
def mock_co_tf1_d(
|
|
2034
|
+
df1: List[Dict[str, Any]], df2: List[List[Any]], p=1
|
|
2035
|
+
) -> Dict[str, Any]:
|
|
2036
|
+
return dict(a=df1[0]["a"], ct1=len(df1), ct2=len(df2), x=p)
|
|
2037
|
+
|
|
2038
|
+
|
|
2004
2039
|
def mock_co_tf2(dfs: DataFrames, p=1) -> List[List[Any]]:
|
|
2005
2040
|
return [[dfs[0].peek_dict()["a"], dfs[0].count(), dfs[1].count(), p]]
|
|
2006
2041
|
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.9.2"
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
__version__ = "0.9.0"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|