fugue 0.8.2.dev1__py3-none-any.whl → 0.8.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fugue/__init__.py +9 -5
- fugue/_utils/interfaceless.py +1 -558
- fugue/_utils/io.py +2 -91
- fugue/_utils/registry.py +3 -2
- fugue/api.py +1 -0
- fugue/bag/bag.py +8 -4
- fugue/collections/__init__.py +0 -7
- fugue/collections/partition.py +21 -9
- fugue/constants.py +3 -1
- fugue/dataframe/__init__.py +7 -8
- fugue/dataframe/arrow_dataframe.py +1 -2
- fugue/dataframe/dataframe.py +17 -18
- fugue/dataframe/dataframe_iterable_dataframe.py +22 -6
- fugue/dataframe/function_wrapper.py +432 -0
- fugue/dataframe/iterable_dataframe.py +3 -0
- fugue/dataframe/utils.py +11 -79
- fugue/dataset/api.py +0 -4
- fugue/dev.py +47 -0
- fugue/execution/__init__.py +1 -5
- fugue/execution/api.py +36 -14
- fugue/execution/execution_engine.py +30 -4
- fugue/execution/factory.py +0 -6
- fugue/execution/native_execution_engine.py +44 -67
- fugue/extensions/_builtins/creators.py +4 -2
- fugue/extensions/_builtins/outputters.py +4 -3
- fugue/extensions/_builtins/processors.py +3 -3
- fugue/extensions/creator/convert.py +5 -2
- fugue/extensions/outputter/convert.py +2 -2
- fugue/extensions/processor/convert.py +3 -2
- fugue/extensions/transformer/convert.py +22 -9
- fugue/extensions/transformer/transformer.py +15 -1
- fugue/plugins.py +2 -0
- fugue/registry.py +0 -39
- fugue/sql/_utils.py +1 -1
- fugue/workflow/_checkpoint.py +1 -1
- fugue/workflow/api.py +13 -13
- fugue/workflow/module.py +30 -37
- fugue/workflow/workflow.py +6 -0
- {fugue-0.8.2.dev1.dist-info → fugue-0.8.4.dist-info}/METADATA +37 -23
- {fugue-0.8.2.dev1.dist-info → fugue-0.8.4.dist-info}/RECORD +112 -101
- {fugue-0.8.2.dev1.dist-info → fugue-0.8.4.dist-info}/WHEEL +1 -1
- {fugue-0.8.2.dev1.dist-info → fugue-0.8.4.dist-info}/entry_points.txt +2 -1
- {fugue-0.8.2.dev1.dist-info → fugue-0.8.4.dist-info}/top_level.txt +1 -0
- fugue_contrib/contrib.py +1 -0
- fugue_contrib/viz/_ext.py +7 -1
- fugue_dask/_io.py +0 -13
- fugue_dask/_utils.py +10 -4
- fugue_dask/dataframe.py +1 -2
- fugue_dask/execution_engine.py +45 -18
- fugue_dask/registry.py +8 -33
- fugue_duckdb/_io.py +8 -2
- fugue_duckdb/_utils.py +7 -2
- fugue_duckdb/dask.py +1 -1
- fugue_duckdb/dataframe.py +23 -19
- fugue_duckdb/execution_engine.py +19 -22
- fugue_duckdb/registry.py +11 -34
- fugue_ibis/dataframe.py +6 -10
- fugue_ibis/execution_engine.py +7 -1
- fugue_notebook/env.py +5 -10
- fugue_polars/__init__.py +2 -0
- fugue_polars/_utils.py +8 -0
- fugue_polars/polars_dataframe.py +234 -0
- fugue_polars/registry.py +86 -0
- fugue_ray/_constants.py +10 -1
- fugue_ray/_utils/dataframe.py +36 -9
- fugue_ray/_utils/io.py +2 -4
- fugue_ray/dataframe.py +16 -12
- fugue_ray/execution_engine.py +53 -32
- fugue_ray/registry.py +8 -32
- fugue_spark/_utils/convert.py +22 -11
- fugue_spark/_utils/io.py +0 -13
- fugue_spark/_utils/misc.py +27 -0
- fugue_spark/_utils/partition.py +11 -18
- fugue_spark/dataframe.py +26 -22
- fugue_spark/execution_engine.py +136 -54
- fugue_spark/registry.py +29 -78
- fugue_test/builtin_suite.py +36 -14
- fugue_test/dataframe_suite.py +9 -5
- fugue_test/execution_suite.py +100 -122
- fugue_version/__init__.py +1 -1
- tests/fugue/bag/test_array_bag.py +0 -9
- tests/fugue/collections/test_partition.py +10 -3
- tests/fugue/dataframe/test_function_wrapper.py +293 -0
- tests/fugue/dataframe/test_utils.py +2 -34
- tests/fugue/execution/test_factory.py +7 -9
- tests/fugue/execution/test_naive_execution_engine.py +35 -80
- tests/fugue/extensions/test_utils.py +12 -7
- tests/fugue/extensions/transformer/test_convert_cotransformer.py +1 -0
- tests/fugue/extensions/transformer/test_convert_output_cotransformer.py +1 -0
- tests/fugue/extensions/transformer/test_convert_transformer.py +2 -0
- tests/fugue/sql/test_workflow.py +1 -1
- tests/fugue/sql/test_workflow_parse.py +3 -5
- tests/fugue/utils/test_interfaceless.py +1 -325
- tests/fugue/utils/test_io.py +0 -80
- tests/fugue_dask/test_execution_engine.py +48 -0
- tests/fugue_dask/test_io.py +0 -55
- tests/fugue_duckdb/test_dataframe.py +2 -2
- tests/fugue_duckdb/test_execution_engine.py +16 -1
- tests/fugue_duckdb/test_utils.py +1 -1
- tests/fugue_ibis/test_dataframe.py +6 -3
- tests/fugue_polars/__init__.py +0 -0
- tests/fugue_polars/test_api.py +13 -0
- tests/fugue_polars/test_dataframe.py +82 -0
- tests/fugue_polars/test_transform.py +100 -0
- tests/fugue_ray/test_execution_engine.py +40 -4
- tests/fugue_spark/test_dataframe.py +0 -8
- tests/fugue_spark/test_execution_engine.py +50 -11
- tests/fugue_spark/test_importless.py +4 -4
- tests/fugue_spark/test_spark_connect.py +82 -0
- tests/fugue_spark/utils/test_convert.py +6 -8
- tests/fugue_spark/utils/test_io.py +0 -17
- fugue/_utils/register.py +0 -3
- fugue_test/_utils.py +0 -13
- {fugue-0.8.2.dev1.dist-info → fugue-0.8.4.dist-info}/LICENSE +0 -0
fugue/__init__.py
CHANGED
|
@@ -7,7 +7,7 @@ from fugue.bag.array_bag import ArrayBag
|
|
|
7
7
|
from fugue.bag.bag import Bag, BagDisplay
|
|
8
8
|
from fugue.collections.partition import PartitionCursor, PartitionSpec
|
|
9
9
|
from fugue.collections.sql import StructuredRawSQL, TempTableName
|
|
10
|
-
from fugue.collections.yielded import
|
|
10
|
+
from fugue.collections.yielded import PhysicalYielded, Yielded
|
|
11
11
|
from fugue.constants import register_global_conf
|
|
12
12
|
from fugue.dataframe.array_dataframe import ArrayDataFrame
|
|
13
13
|
from fugue.dataframe.arrow_dataframe import ArrowDataFrame
|
|
@@ -18,11 +18,14 @@ from fugue.dataframe.dataframe import (
|
|
|
18
18
|
LocalBoundedDataFrame,
|
|
19
19
|
LocalDataFrame,
|
|
20
20
|
)
|
|
21
|
-
from fugue.dataframe.dataframe_iterable_dataframe import
|
|
21
|
+
from fugue.dataframe.dataframe_iterable_dataframe import (
|
|
22
|
+
IterableArrowDataFrame,
|
|
23
|
+
IterablePandasDataFrame,
|
|
24
|
+
LocalDataFrameIterableDataFrame,
|
|
25
|
+
)
|
|
22
26
|
from fugue.dataframe.dataframes import DataFrames
|
|
23
27
|
from fugue.dataframe.iterable_dataframe import IterableDataFrame
|
|
24
28
|
from fugue.dataframe.pandas_dataframe import PandasDataFrame
|
|
25
|
-
from fugue.dataframe.utils import to_local_bounded_df, to_local_df
|
|
26
29
|
from fugue.dataset import (
|
|
27
30
|
AnyDataset,
|
|
28
31
|
Dataset,
|
|
@@ -32,8 +35,8 @@ from fugue.dataset import (
|
|
|
32
35
|
)
|
|
33
36
|
from fugue.execution.execution_engine import (
|
|
34
37
|
AnyExecutionEngine,
|
|
35
|
-
ExecutionEngine,
|
|
36
38
|
EngineFacet,
|
|
39
|
+
ExecutionEngine,
|
|
37
40
|
MapEngine,
|
|
38
41
|
SQLEngine,
|
|
39
42
|
)
|
|
@@ -50,7 +53,6 @@ from fugue.execution.native_execution_engine import (
|
|
|
50
53
|
NativeExecutionEngine,
|
|
51
54
|
PandasMapEngine,
|
|
52
55
|
QPDPandasEngine,
|
|
53
|
-
SqliteEngine,
|
|
54
56
|
)
|
|
55
57
|
from fugue.extensions.creator import Creator, creator, register_creator
|
|
56
58
|
from fugue.extensions.outputter import Outputter, outputter, register_outputter
|
|
@@ -84,4 +86,6 @@ from fugue.workflow.module import module
|
|
|
84
86
|
from fugue.workflow.workflow import FugueWorkflow, WorkflowDataFrame, WorkflowDataFrames
|
|
85
87
|
from fugue_version import __version__
|
|
86
88
|
|
|
89
|
+
from .dev import *
|
|
90
|
+
|
|
87
91
|
_register()
|
fugue/_utils/interfaceless.py
CHANGED
|
@@ -1,36 +1,7 @@
|
|
|
1
|
-
import copy
|
|
2
1
|
import inspect
|
|
3
|
-
import
|
|
4
|
-
from typing import (
|
|
5
|
-
Any,
|
|
6
|
-
Callable,
|
|
7
|
-
Dict,
|
|
8
|
-
Iterable,
|
|
9
|
-
List,
|
|
10
|
-
Optional,
|
|
11
|
-
Tuple,
|
|
12
|
-
Type,
|
|
13
|
-
get_type_hints,
|
|
14
|
-
)
|
|
2
|
+
from typing import Callable, Optional
|
|
15
3
|
|
|
16
|
-
import pandas as pd
|
|
17
|
-
from fugue.dataframe import (
|
|
18
|
-
ArrayDataFrame,
|
|
19
|
-
DataFrame,
|
|
20
|
-
IterableDataFrame,
|
|
21
|
-
LocalDataFrame,
|
|
22
|
-
LocalDataFrameIterableDataFrame,
|
|
23
|
-
PandasDataFrame,
|
|
24
|
-
)
|
|
25
|
-
from fugue.dataframe.dataframes import DataFrames
|
|
26
|
-
from fugue.dataframe.utils import to_local_df
|
|
27
|
-
from fugue.exceptions import FugueWorkflowRuntimeError
|
|
28
|
-
from triad import IndexedOrderedDict
|
|
29
|
-
from triad.collections import Schema
|
|
30
4
|
from triad.utils.assertion import assert_or_throw
|
|
31
|
-
from triad.utils.convert import get_full_type_path, to_type
|
|
32
|
-
from triad.utils.hash import to_uuid
|
|
33
|
-
from triad.utils.iter import EmptyAwareIterable, make_empty_aware
|
|
34
5
|
|
|
35
6
|
_COMMENT_SCHEMA_ANNOTATION = "schema"
|
|
36
7
|
|
|
@@ -100,531 +71,3 @@ def is_class_method(func: Callable) -> bool:
|
|
|
100
71
|
sig = inspect.signature(func)
|
|
101
72
|
# TODO: this is not the best way
|
|
102
73
|
return "self" in sig.parameters
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
class AnnotationConverter:
|
|
106
|
-
def check(self, annotation: Any) -> bool: # pragma: no cover
|
|
107
|
-
raise NotImplementedError
|
|
108
|
-
|
|
109
|
-
def convert(
|
|
110
|
-
self, param: Optional[inspect.Parameter]
|
|
111
|
-
) -> "_FuncParam": # pragma: no cover
|
|
112
|
-
raise NotImplementedError
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
class SimpleAnnotationConverter(AnnotationConverter):
|
|
116
|
-
def __init__(
|
|
117
|
-
self,
|
|
118
|
-
expected_annotation,
|
|
119
|
-
converter: Callable[[Optional[inspect.Parameter]], "_FuncParam"],
|
|
120
|
-
) -> None:
|
|
121
|
-
self._expected = expected_annotation
|
|
122
|
-
self._converter = converter
|
|
123
|
-
|
|
124
|
-
def check(self, annotation: Any) -> bool:
|
|
125
|
-
return annotation == self._expected
|
|
126
|
-
|
|
127
|
-
def convert(self, param: Optional[inspect.Parameter]) -> "_FuncParam":
|
|
128
|
-
return self._converter(param)
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
_ANNOTATION_CONVERTERS: List[Tuple[float, AnnotationConverter]] = []
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
def register_annotation_converter(
|
|
135
|
-
priority: float, converter: AnnotationConverter
|
|
136
|
-
) -> None:
|
|
137
|
-
"""Register a new annotation for Fugue's interfaceless system
|
|
138
|
-
|
|
139
|
-
:param priority: priority number, smaller means higher priority for checking
|
|
140
|
-
:param converter: a new converter
|
|
141
|
-
|
|
142
|
-
.. admonition:: New Since
|
|
143
|
-
:class: hint
|
|
144
|
-
|
|
145
|
-
**0.6.0**
|
|
146
|
-
|
|
147
|
-
.. note::
|
|
148
|
-
|
|
149
|
-
This is not ready for public use yet, the interface is subjected to change
|
|
150
|
-
|
|
151
|
-
"""
|
|
152
|
-
_ANNOTATION_CONVERTERS.append((priority, converter))
|
|
153
|
-
_ANNOTATION_CONVERTERS.sort(key=lambda x: x[0])
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
class FunctionWrapper(object):
|
|
157
|
-
def __init__(
|
|
158
|
-
self,
|
|
159
|
-
func: Callable,
|
|
160
|
-
params_re: str = ".*",
|
|
161
|
-
return_re: str = ".*",
|
|
162
|
-
):
|
|
163
|
-
self._class_method, self._params, self._rt = self._parse_function(
|
|
164
|
-
func, params_re, return_re
|
|
165
|
-
)
|
|
166
|
-
self._func = func
|
|
167
|
-
|
|
168
|
-
def __deepcopy__(self, memo: Any) -> Any:
|
|
169
|
-
return copy.copy(self)
|
|
170
|
-
|
|
171
|
-
def __call__(self, *args: Any, **kwargs: Any) -> Any:
|
|
172
|
-
return self._func(*args, **kwargs)
|
|
173
|
-
|
|
174
|
-
def __uuid__(self) -> str:
|
|
175
|
-
return to_uuid(get_full_type_path(self._func), self._params, self._rt)
|
|
176
|
-
|
|
177
|
-
@property
|
|
178
|
-
def input_code(self) -> str:
|
|
179
|
-
return "".join(x.code for x in self._params.values())
|
|
180
|
-
|
|
181
|
-
@property
|
|
182
|
-
def need_output_schema(self) -> Optional[bool]:
|
|
183
|
-
return (
|
|
184
|
-
self._rt.need_schema()
|
|
185
|
-
if isinstance(self._rt, _DataFrameParamBase)
|
|
186
|
-
else False
|
|
187
|
-
)
|
|
188
|
-
|
|
189
|
-
def run( # noqa: C901
|
|
190
|
-
self,
|
|
191
|
-
args: List[Any],
|
|
192
|
-
kwargs: Dict[str, Any],
|
|
193
|
-
ignore_unknown: bool = False,
|
|
194
|
-
output_schema: Any = None,
|
|
195
|
-
output: bool = True,
|
|
196
|
-
ctx: Any = None,
|
|
197
|
-
) -> Any:
|
|
198
|
-
p: Dict[str, Any] = {}
|
|
199
|
-
for i in range(len(args)):
|
|
200
|
-
p[self._params.get_key_by_index(i)] = args[i]
|
|
201
|
-
p.update(kwargs)
|
|
202
|
-
has_kw = False
|
|
203
|
-
rargs: Dict[str, Any] = {}
|
|
204
|
-
for k, v in self._params.items():
|
|
205
|
-
if isinstance(v, (_PositionalParam, _KeywordParam)):
|
|
206
|
-
if isinstance(v, _KeywordParam):
|
|
207
|
-
has_kw = True
|
|
208
|
-
elif k in p:
|
|
209
|
-
if isinstance(v, _DataFrameParamBase):
|
|
210
|
-
assert_or_throw(
|
|
211
|
-
isinstance(p[k], DataFrame),
|
|
212
|
-
lambda: TypeError(f"{p[k]} is not a DataFrame"),
|
|
213
|
-
)
|
|
214
|
-
rargs[k] = v.to_input_data(p[k], ctx=ctx)
|
|
215
|
-
else:
|
|
216
|
-
rargs[k] = p[k] # TODO: should we do auto type conversion?
|
|
217
|
-
del p[k]
|
|
218
|
-
elif v.required:
|
|
219
|
-
raise ValueError(f"{k} is required by not given")
|
|
220
|
-
if has_kw:
|
|
221
|
-
rargs.update(p)
|
|
222
|
-
elif not ignore_unknown and len(p) > 0:
|
|
223
|
-
raise ValueError(f"{p} are not acceptable parameters")
|
|
224
|
-
rt = self._func(**rargs)
|
|
225
|
-
if not output:
|
|
226
|
-
if isinstance(self._rt, _DataFrameParamBase):
|
|
227
|
-
self._rt.count(rt)
|
|
228
|
-
return
|
|
229
|
-
if isinstance(self._rt, _DataFrameParamBase):
|
|
230
|
-
return self._rt.to_output_df(rt, output_schema, ctx=ctx)
|
|
231
|
-
return rt
|
|
232
|
-
|
|
233
|
-
def _parse_function(
|
|
234
|
-
self, func: Callable, params_re: str = ".*", return_re: str = ".*"
|
|
235
|
-
) -> Tuple[bool, IndexedOrderedDict[str, "_FuncParam"], "_FuncParam"]:
|
|
236
|
-
sig = inspect.signature(func)
|
|
237
|
-
annotations = get_type_hints(func)
|
|
238
|
-
res: IndexedOrderedDict[str, "_FuncParam"] = IndexedOrderedDict()
|
|
239
|
-
class_method = False
|
|
240
|
-
for k, w in sig.parameters.items():
|
|
241
|
-
if k == "self":
|
|
242
|
-
res[k] = _SelfParam(w)
|
|
243
|
-
class_method = True
|
|
244
|
-
else:
|
|
245
|
-
anno = annotations.get(k, w.annotation)
|
|
246
|
-
res[k] = self._parse_param(anno, w)
|
|
247
|
-
anno = annotations.get("return", sig.return_annotation)
|
|
248
|
-
rt = self._parse_param(anno, None, none_as_other=False)
|
|
249
|
-
params_str = "".join(x.code for x in res.values())
|
|
250
|
-
assert_or_throw(
|
|
251
|
-
re.match(params_re, params_str),
|
|
252
|
-
lambda: TypeError(f"Input types not valid {res} for {func}"),
|
|
253
|
-
)
|
|
254
|
-
assert_or_throw(
|
|
255
|
-
re.match(return_re, rt.code),
|
|
256
|
-
lambda: TypeError(f"Return type not valid {rt} for {func}"),
|
|
257
|
-
)
|
|
258
|
-
return class_method, res, rt
|
|
259
|
-
|
|
260
|
-
def _parse_param( # noqa: C901
|
|
261
|
-
self,
|
|
262
|
-
annotation: Any,
|
|
263
|
-
param: Optional[inspect.Parameter],
|
|
264
|
-
none_as_other: bool = True,
|
|
265
|
-
) -> "_FuncParam":
|
|
266
|
-
import fugue._utils.register # pylint: disable=W0611 # noqa: F401
|
|
267
|
-
|
|
268
|
-
if annotation == type(None): # noqa: E721
|
|
269
|
-
return _NoneParam(param)
|
|
270
|
-
if annotation == inspect.Parameter.empty:
|
|
271
|
-
if param is not None and param.kind == param.VAR_POSITIONAL:
|
|
272
|
-
return _PositionalParam(param)
|
|
273
|
-
if param is not None and param.kind == param.VAR_KEYWORD:
|
|
274
|
-
return _KeywordParam(param)
|
|
275
|
-
return _OtherParam(param) if none_as_other else _NoneParam(param)
|
|
276
|
-
if (
|
|
277
|
-
annotation == Callable
|
|
278
|
-
or annotation == callable # pylint: disable=comparison-with-callable
|
|
279
|
-
or str(annotation).startswith("typing.Callable")
|
|
280
|
-
):
|
|
281
|
-
return _CallableParam(param)
|
|
282
|
-
if (
|
|
283
|
-
annotation == Optional[Callable]
|
|
284
|
-
or annotation == Optional[callable]
|
|
285
|
-
or str(annotation).startswith("typing.Union[typing.Callable") # 3.8-
|
|
286
|
-
or str(annotation).startswith("typing.Optional[typing.Callable") # 3.9+
|
|
287
|
-
):
|
|
288
|
-
return _OptionalCallableParam(param)
|
|
289
|
-
for _, c in _ANNOTATION_CONVERTERS:
|
|
290
|
-
if c.check(annotation):
|
|
291
|
-
return c.convert(param)
|
|
292
|
-
if annotation == to_type("fugue.execution.ExecutionEngine"):
|
|
293
|
-
# to prevent cyclic import
|
|
294
|
-
return ExecutionEngineParam(param, "ExecutionEngine", annotation)
|
|
295
|
-
if annotation == DataFrames:
|
|
296
|
-
return _DataFramesParam(param)
|
|
297
|
-
if annotation == LocalDataFrame:
|
|
298
|
-
return _LocalDataFrameParam(param)
|
|
299
|
-
if annotation == DataFrame:
|
|
300
|
-
return DataFrameParam(param)
|
|
301
|
-
if annotation == pd.DataFrame:
|
|
302
|
-
return _PandasParam(param)
|
|
303
|
-
if annotation == List[List[Any]]:
|
|
304
|
-
return _ListListParam(param)
|
|
305
|
-
if annotation == Iterable[List[Any]]:
|
|
306
|
-
return _IterableListParam(param)
|
|
307
|
-
if annotation == EmptyAwareIterable[List[Any]]:
|
|
308
|
-
return _EmptyAwareIterableListParam(param)
|
|
309
|
-
if annotation == List[Dict[str, Any]]:
|
|
310
|
-
return _ListDictParam(param)
|
|
311
|
-
if annotation == Iterable[Dict[str, Any]]:
|
|
312
|
-
return _IterableDictParam(param)
|
|
313
|
-
if annotation == EmptyAwareIterable[Dict[str, Any]]:
|
|
314
|
-
return _EmptyAwareIterableDictParam(param)
|
|
315
|
-
if annotation == Iterable[pd.DataFrame]:
|
|
316
|
-
return _IterablePandasParam(param)
|
|
317
|
-
if param is not None and param.kind == param.VAR_POSITIONAL:
|
|
318
|
-
return _PositionalParam(param)
|
|
319
|
-
if param is not None and param.kind == param.VAR_KEYWORD:
|
|
320
|
-
return _KeywordParam(param)
|
|
321
|
-
return _OtherParam(param)
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
class _FuncParam(object):
|
|
325
|
-
def __init__(self, param: Optional[inspect.Parameter], annotation: Any, code: str):
|
|
326
|
-
if param is not None:
|
|
327
|
-
self.required = param.default == inspect.Parameter.empty
|
|
328
|
-
self.default = param.default
|
|
329
|
-
else:
|
|
330
|
-
self.required, self.default = True, None
|
|
331
|
-
self.code = code
|
|
332
|
-
self.annotation = annotation
|
|
333
|
-
|
|
334
|
-
def __repr__(self) -> str:
|
|
335
|
-
return str(self.annotation)
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
class _CallableParam(_FuncParam):
|
|
339
|
-
def __init__(self, param: Optional[inspect.Parameter]):
|
|
340
|
-
super().__init__(param, "Callable", "F")
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
class _OptionalCallableParam(_FuncParam):
|
|
344
|
-
def __init__(self, param: Optional[inspect.Parameter]):
|
|
345
|
-
super().__init__(param, "Callable", "f")
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
class ExecutionEngineParam(_FuncParam):
|
|
349
|
-
def __init__(
|
|
350
|
-
self,
|
|
351
|
-
param: Optional[inspect.Parameter],
|
|
352
|
-
annotation: str,
|
|
353
|
-
engine_type: Type,
|
|
354
|
-
):
|
|
355
|
-
super().__init__(param, annotation, "e")
|
|
356
|
-
self._type = engine_type
|
|
357
|
-
|
|
358
|
-
def to_input(self, engine: Any) -> Any: # pragma: no cover
|
|
359
|
-
assert_or_throw(
|
|
360
|
-
isinstance(engine, self._type),
|
|
361
|
-
FugueWorkflowRuntimeError(f"{engine} is not of type {self._type}"),
|
|
362
|
-
)
|
|
363
|
-
return engine
|
|
364
|
-
|
|
365
|
-
def __uuid__(self) -> str:
|
|
366
|
-
return to_uuid(self.code, self.annotation, self._type)
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
class _DataFramesParam(_FuncParam):
|
|
370
|
-
def __init__(self, param: Optional[inspect.Parameter]):
|
|
371
|
-
super().__init__(param, "DataFrames", "c")
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
class _DataFrameParamBase(_FuncParam):
|
|
375
|
-
def __init__(self, param: Optional[inspect.Parameter], annotation: Any, code: str):
|
|
376
|
-
super().__init__(param, annotation, code)
|
|
377
|
-
assert_or_throw(self.required, lambda: TypeError(f"{self} must be required"))
|
|
378
|
-
|
|
379
|
-
def to_input_data(self, df: DataFrame, ctx: Any) -> Any: # pragma: no cover
|
|
380
|
-
raise NotImplementedError
|
|
381
|
-
|
|
382
|
-
def to_output_df(
|
|
383
|
-
self, df: Any, schema: Any, ctx: Any
|
|
384
|
-
) -> DataFrame: # pragma: no cover
|
|
385
|
-
raise NotImplementedError
|
|
386
|
-
|
|
387
|
-
def count(self, df: Any) -> int: # pragma: no cover
|
|
388
|
-
raise NotImplementedError
|
|
389
|
-
|
|
390
|
-
def need_schema(self) -> Optional[bool]:
|
|
391
|
-
return False
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
class DataFrameParam(_DataFrameParamBase):
|
|
395
|
-
def __init__(
|
|
396
|
-
self, param: Optional[inspect.Parameter], annotation: str = "DataFrame"
|
|
397
|
-
):
|
|
398
|
-
super().__init__(param, annotation=annotation, code="d")
|
|
399
|
-
|
|
400
|
-
def to_input_data(self, df: DataFrame, ctx: Any) -> Any:
|
|
401
|
-
return df
|
|
402
|
-
|
|
403
|
-
def to_output_df(self, output: Any, schema: Any, ctx: Any) -> DataFrame:
|
|
404
|
-
assert_or_throw(
|
|
405
|
-
schema is None or output.schema == schema,
|
|
406
|
-
lambda: f"Output schema mismatch {output.schema} vs {schema}",
|
|
407
|
-
)
|
|
408
|
-
return output
|
|
409
|
-
|
|
410
|
-
def count(self, df: Any) -> int:
|
|
411
|
-
if df.is_bounded:
|
|
412
|
-
return df.count()
|
|
413
|
-
else:
|
|
414
|
-
return sum(1 for _ in df.as_array_iterable())
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
class _LocalDataFrameParam(_DataFrameParamBase):
|
|
418
|
-
def __init__(self, param: Optional[inspect.Parameter]):
|
|
419
|
-
super().__init__(param, "LocalDataFrame", "l")
|
|
420
|
-
|
|
421
|
-
def to_input_data(self, df: DataFrame, ctx: Any) -> LocalDataFrame:
|
|
422
|
-
return to_local_df(df)
|
|
423
|
-
|
|
424
|
-
def to_output_df(self, output: LocalDataFrame, schema: Any, ctx: Any) -> DataFrame:
|
|
425
|
-
assert_or_throw(
|
|
426
|
-
schema is None or output.schema == schema,
|
|
427
|
-
lambda: f"Output schema mismatch {output.schema} vs {schema}",
|
|
428
|
-
)
|
|
429
|
-
return output
|
|
430
|
-
|
|
431
|
-
def count(self, df: LocalDataFrame) -> int:
|
|
432
|
-
if df.is_bounded:
|
|
433
|
-
return df.count()
|
|
434
|
-
else:
|
|
435
|
-
return sum(1 for _ in df.as_array_iterable())
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
class _ListListParam(_DataFrameParamBase):
|
|
439
|
-
def __init__(self, param: Optional[inspect.Parameter]):
|
|
440
|
-
super().__init__(param, "List[List[Any]]", "s")
|
|
441
|
-
|
|
442
|
-
def to_input_data(self, df: DataFrame, ctx: Any) -> List[List[Any]]:
|
|
443
|
-
return df.as_array(type_safe=True)
|
|
444
|
-
|
|
445
|
-
def to_output_df(self, output: List[List[Any]], schema: Any, ctx: Any) -> DataFrame:
|
|
446
|
-
return ArrayDataFrame(output, schema)
|
|
447
|
-
|
|
448
|
-
def count(self, df: List[List[Any]]) -> int:
|
|
449
|
-
return len(df)
|
|
450
|
-
|
|
451
|
-
def need_schema(self) -> Optional[bool]:
|
|
452
|
-
return True
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
class _IterableListParam(_DataFrameParamBase):
|
|
456
|
-
def __init__(self, param: Optional[inspect.Parameter]):
|
|
457
|
-
super().__init__(param, "Iterable[List[Any]]", "s")
|
|
458
|
-
|
|
459
|
-
def to_input_data(self, df: DataFrame, ctx: Any) -> Iterable[List[Any]]:
|
|
460
|
-
return df.as_array_iterable(type_safe=True)
|
|
461
|
-
|
|
462
|
-
def to_output_df(
|
|
463
|
-
self, output: Iterable[List[Any]], schema: Any, ctx: Any
|
|
464
|
-
) -> DataFrame:
|
|
465
|
-
return IterableDataFrame(output, schema)
|
|
466
|
-
|
|
467
|
-
def count(self, df: Iterable[List[Any]]) -> int:
|
|
468
|
-
return sum(1 for _ in df)
|
|
469
|
-
|
|
470
|
-
def need_schema(self) -> Optional[bool]: # pragma: no cover
|
|
471
|
-
return True
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
class _EmptyAwareIterableListParam(_DataFrameParamBase):
|
|
475
|
-
def __init__(self, param: Optional[inspect.Parameter]):
|
|
476
|
-
super().__init__(param, "EmptyAwareIterable[List[Any]]", "s")
|
|
477
|
-
|
|
478
|
-
def to_input_data(self, df: DataFrame, ctx: Any) -> EmptyAwareIterable[List[Any]]:
|
|
479
|
-
return make_empty_aware(df.as_array_iterable(type_safe=True))
|
|
480
|
-
|
|
481
|
-
def to_output_df(
|
|
482
|
-
self, output: EmptyAwareIterable[List[Any]], schema: Any, ctx: Any
|
|
483
|
-
) -> DataFrame:
|
|
484
|
-
return IterableDataFrame(output, schema)
|
|
485
|
-
|
|
486
|
-
def count(self, df: EmptyAwareIterable[List[Any]]) -> int:
|
|
487
|
-
return sum(1 for _ in df)
|
|
488
|
-
|
|
489
|
-
def need_schema(self) -> Optional[bool]: # pragma: no cover
|
|
490
|
-
return True
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
class _ListDictParam(_DataFrameParamBase):
|
|
494
|
-
def __init__(self, param: Optional[inspect.Parameter]):
|
|
495
|
-
super().__init__(param, "List[Dict[str,Any]]", "s")
|
|
496
|
-
|
|
497
|
-
def to_input_data(self, df: DataFrame, ctx: Any) -> List[Dict[str, Any]]:
|
|
498
|
-
return list(to_local_df(df).as_dict_iterable())
|
|
499
|
-
|
|
500
|
-
def to_output_df(
|
|
501
|
-
self, output: List[Dict[str, Any]], schema: Any, ctx: Any
|
|
502
|
-
) -> DataFrame:
|
|
503
|
-
schema = schema if isinstance(schema, Schema) else Schema(schema)
|
|
504
|
-
|
|
505
|
-
def get_all() -> Iterable[List[Any]]:
|
|
506
|
-
for row in output:
|
|
507
|
-
yield [row[x] for x in schema.names]
|
|
508
|
-
|
|
509
|
-
return IterableDataFrame(get_all(), schema)
|
|
510
|
-
|
|
511
|
-
def count(self, df: List[Dict[str, Any]]) -> int:
|
|
512
|
-
return len(df)
|
|
513
|
-
|
|
514
|
-
def need_schema(self) -> Optional[bool]: # pragma: no cover
|
|
515
|
-
return True
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
class _IterableDictParam(_DataFrameParamBase):
|
|
519
|
-
def __init__(self, param: Optional[inspect.Parameter]):
|
|
520
|
-
super().__init__(param, "Iterable[Dict[str,Any]]", "s")
|
|
521
|
-
|
|
522
|
-
def to_input_data(self, df: DataFrame, ctx: Any) -> Iterable[Dict[str, Any]]:
|
|
523
|
-
return df.as_dict_iterable()
|
|
524
|
-
|
|
525
|
-
def to_output_df(
|
|
526
|
-
self, output: Iterable[Dict[str, Any]], schema: Any, ctx: Any
|
|
527
|
-
) -> DataFrame:
|
|
528
|
-
schema = schema if isinstance(schema, Schema) else Schema(schema)
|
|
529
|
-
|
|
530
|
-
def get_all() -> Iterable[List[Any]]:
|
|
531
|
-
for row in output:
|
|
532
|
-
yield [row[x] for x in schema.names]
|
|
533
|
-
|
|
534
|
-
return IterableDataFrame(get_all(), schema)
|
|
535
|
-
|
|
536
|
-
def count(self, df: Iterable[Dict[str, Any]]) -> int:
|
|
537
|
-
return sum(1 for _ in df)
|
|
538
|
-
|
|
539
|
-
def need_schema(self) -> Optional[bool]: # pragma: no cover
|
|
540
|
-
return True
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
class _EmptyAwareIterableDictParam(_DataFrameParamBase):
|
|
544
|
-
def __init__(self, param: Optional[inspect.Parameter]):
|
|
545
|
-
super().__init__(param, "EmptyAwareIterable[Dict[str,Any]]", "s")
|
|
546
|
-
|
|
547
|
-
def to_input_data(
|
|
548
|
-
self, df: DataFrame, ctx: Any
|
|
549
|
-
) -> EmptyAwareIterable[Dict[str, Any]]:
|
|
550
|
-
return make_empty_aware(df.as_dict_iterable())
|
|
551
|
-
|
|
552
|
-
def to_output_df(
|
|
553
|
-
self, output: EmptyAwareIterable[Dict[str, Any]], schema: Any, ctx: Any
|
|
554
|
-
) -> DataFrame:
|
|
555
|
-
schema = schema if isinstance(schema, Schema) else Schema(schema)
|
|
556
|
-
|
|
557
|
-
def get_all() -> Iterable[List[Any]]:
|
|
558
|
-
for row in output:
|
|
559
|
-
yield [row[x] for x in schema.names]
|
|
560
|
-
|
|
561
|
-
return IterableDataFrame(get_all(), schema)
|
|
562
|
-
|
|
563
|
-
def count(self, df: EmptyAwareIterable[Dict[str, Any]]) -> int:
|
|
564
|
-
return sum(1 for _ in df)
|
|
565
|
-
|
|
566
|
-
def need_schema(self) -> Optional[bool]: # pragma: no cover
|
|
567
|
-
return True
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
class _PandasParam(_DataFrameParamBase):
|
|
571
|
-
def __init__(self, param: Optional[inspect.Parameter]):
|
|
572
|
-
super().__init__(param, "pd.DataFrame", "p")
|
|
573
|
-
|
|
574
|
-
def to_input_data(self, df: DataFrame, ctx: Any) -> pd.DataFrame:
|
|
575
|
-
return df.as_pandas()
|
|
576
|
-
|
|
577
|
-
def to_output_df(self, output: pd.DataFrame, schema: Any, ctx: Any) -> DataFrame:
|
|
578
|
-
return PandasDataFrame(output, schema)
|
|
579
|
-
|
|
580
|
-
def count(self, df: pd.DataFrame) -> int:
|
|
581
|
-
return df.shape[0]
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
class _IterablePandasParam(_DataFrameParamBase):
|
|
585
|
-
def __init__(self, param: Optional[inspect.Parameter]):
|
|
586
|
-
super().__init__(param, "Iterable[pd.DataFrame]", "q")
|
|
587
|
-
|
|
588
|
-
def to_input_data(self, df: DataFrame, ctx: Any) -> Iterable[pd.DataFrame]:
|
|
589
|
-
if not isinstance(df, LocalDataFrameIterableDataFrame):
|
|
590
|
-
yield df.as_pandas()
|
|
591
|
-
else:
|
|
592
|
-
for sub in df.native:
|
|
593
|
-
yield sub.as_pandas()
|
|
594
|
-
|
|
595
|
-
def to_output_df(
|
|
596
|
-
self, output: Iterable[pd.DataFrame], schema: Any, ctx: Any
|
|
597
|
-
) -> DataFrame:
|
|
598
|
-
def dfs():
|
|
599
|
-
for df in output:
|
|
600
|
-
yield PandasDataFrame(df, schema)
|
|
601
|
-
|
|
602
|
-
return LocalDataFrameIterableDataFrame(dfs())
|
|
603
|
-
|
|
604
|
-
def count(self, df: Iterable[pd.DataFrame]) -> int:
|
|
605
|
-
return sum(_.shape[0] for _ in df)
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
class _NoneParam(_FuncParam):
|
|
609
|
-
def __init__(self, param: Optional[inspect.Parameter]):
|
|
610
|
-
super().__init__(param, "NoneType", "n")
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
class _SelfParam(_FuncParam):
|
|
614
|
-
def __init__(self, param: Optional[inspect.Parameter]):
|
|
615
|
-
super().__init__(param, "[Self]", "0")
|
|
616
|
-
|
|
617
|
-
|
|
618
|
-
class _OtherParam(_FuncParam):
|
|
619
|
-
def __init__(self, param: Optional[inspect.Parameter]):
|
|
620
|
-
super().__init__(param, "[Other]", "x")
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
class _PositionalParam(_FuncParam):
|
|
624
|
-
def __init__(self, param: Optional[inspect.Parameter]):
|
|
625
|
-
super().__init__(param, "[Positional]", "y")
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
class _KeywordParam(_FuncParam):
|
|
629
|
-
def __init__(self, param: Optional[inspect.Parameter]):
|
|
630
|
-
super().__init__(param, "[Keyword]", "z")
|