fugue 0.8.2.dev1__py3-none-any.whl → 0.8.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (114) hide show
  1. fugue/__init__.py +9 -5
  2. fugue/_utils/interfaceless.py +1 -558
  3. fugue/_utils/io.py +2 -91
  4. fugue/_utils/registry.py +3 -2
  5. fugue/api.py +1 -0
  6. fugue/bag/bag.py +8 -4
  7. fugue/collections/__init__.py +0 -7
  8. fugue/collections/partition.py +21 -9
  9. fugue/constants.py +3 -1
  10. fugue/dataframe/__init__.py +7 -8
  11. fugue/dataframe/arrow_dataframe.py +1 -2
  12. fugue/dataframe/dataframe.py +17 -18
  13. fugue/dataframe/dataframe_iterable_dataframe.py +22 -6
  14. fugue/dataframe/function_wrapper.py +432 -0
  15. fugue/dataframe/iterable_dataframe.py +3 -0
  16. fugue/dataframe/utils.py +11 -79
  17. fugue/dataset/api.py +0 -4
  18. fugue/dev.py +47 -0
  19. fugue/execution/__init__.py +1 -5
  20. fugue/execution/api.py +36 -14
  21. fugue/execution/execution_engine.py +30 -4
  22. fugue/execution/factory.py +0 -6
  23. fugue/execution/native_execution_engine.py +44 -67
  24. fugue/extensions/_builtins/creators.py +4 -2
  25. fugue/extensions/_builtins/outputters.py +4 -3
  26. fugue/extensions/_builtins/processors.py +3 -3
  27. fugue/extensions/creator/convert.py +5 -2
  28. fugue/extensions/outputter/convert.py +2 -2
  29. fugue/extensions/processor/convert.py +3 -2
  30. fugue/extensions/transformer/convert.py +22 -9
  31. fugue/extensions/transformer/transformer.py +15 -1
  32. fugue/plugins.py +2 -0
  33. fugue/registry.py +0 -39
  34. fugue/sql/_utils.py +1 -1
  35. fugue/workflow/_checkpoint.py +1 -1
  36. fugue/workflow/api.py +13 -13
  37. fugue/workflow/module.py +30 -37
  38. fugue/workflow/workflow.py +6 -0
  39. {fugue-0.8.2.dev1.dist-info → fugue-0.8.4.dist-info}/METADATA +37 -23
  40. {fugue-0.8.2.dev1.dist-info → fugue-0.8.4.dist-info}/RECORD +112 -101
  41. {fugue-0.8.2.dev1.dist-info → fugue-0.8.4.dist-info}/WHEEL +1 -1
  42. {fugue-0.8.2.dev1.dist-info → fugue-0.8.4.dist-info}/entry_points.txt +2 -1
  43. {fugue-0.8.2.dev1.dist-info → fugue-0.8.4.dist-info}/top_level.txt +1 -0
  44. fugue_contrib/contrib.py +1 -0
  45. fugue_contrib/viz/_ext.py +7 -1
  46. fugue_dask/_io.py +0 -13
  47. fugue_dask/_utils.py +10 -4
  48. fugue_dask/dataframe.py +1 -2
  49. fugue_dask/execution_engine.py +45 -18
  50. fugue_dask/registry.py +8 -33
  51. fugue_duckdb/_io.py +8 -2
  52. fugue_duckdb/_utils.py +7 -2
  53. fugue_duckdb/dask.py +1 -1
  54. fugue_duckdb/dataframe.py +23 -19
  55. fugue_duckdb/execution_engine.py +19 -22
  56. fugue_duckdb/registry.py +11 -34
  57. fugue_ibis/dataframe.py +6 -10
  58. fugue_ibis/execution_engine.py +7 -1
  59. fugue_notebook/env.py +5 -10
  60. fugue_polars/__init__.py +2 -0
  61. fugue_polars/_utils.py +8 -0
  62. fugue_polars/polars_dataframe.py +234 -0
  63. fugue_polars/registry.py +86 -0
  64. fugue_ray/_constants.py +10 -1
  65. fugue_ray/_utils/dataframe.py +36 -9
  66. fugue_ray/_utils/io.py +2 -4
  67. fugue_ray/dataframe.py +16 -12
  68. fugue_ray/execution_engine.py +53 -32
  69. fugue_ray/registry.py +8 -32
  70. fugue_spark/_utils/convert.py +22 -11
  71. fugue_spark/_utils/io.py +0 -13
  72. fugue_spark/_utils/misc.py +27 -0
  73. fugue_spark/_utils/partition.py +11 -18
  74. fugue_spark/dataframe.py +26 -22
  75. fugue_spark/execution_engine.py +136 -54
  76. fugue_spark/registry.py +29 -78
  77. fugue_test/builtin_suite.py +36 -14
  78. fugue_test/dataframe_suite.py +9 -5
  79. fugue_test/execution_suite.py +100 -122
  80. fugue_version/__init__.py +1 -1
  81. tests/fugue/bag/test_array_bag.py +0 -9
  82. tests/fugue/collections/test_partition.py +10 -3
  83. tests/fugue/dataframe/test_function_wrapper.py +293 -0
  84. tests/fugue/dataframe/test_utils.py +2 -34
  85. tests/fugue/execution/test_factory.py +7 -9
  86. tests/fugue/execution/test_naive_execution_engine.py +35 -80
  87. tests/fugue/extensions/test_utils.py +12 -7
  88. tests/fugue/extensions/transformer/test_convert_cotransformer.py +1 -0
  89. tests/fugue/extensions/transformer/test_convert_output_cotransformer.py +1 -0
  90. tests/fugue/extensions/transformer/test_convert_transformer.py +2 -0
  91. tests/fugue/sql/test_workflow.py +1 -1
  92. tests/fugue/sql/test_workflow_parse.py +3 -5
  93. tests/fugue/utils/test_interfaceless.py +1 -325
  94. tests/fugue/utils/test_io.py +0 -80
  95. tests/fugue_dask/test_execution_engine.py +48 -0
  96. tests/fugue_dask/test_io.py +0 -55
  97. tests/fugue_duckdb/test_dataframe.py +2 -2
  98. tests/fugue_duckdb/test_execution_engine.py +16 -1
  99. tests/fugue_duckdb/test_utils.py +1 -1
  100. tests/fugue_ibis/test_dataframe.py +6 -3
  101. tests/fugue_polars/__init__.py +0 -0
  102. tests/fugue_polars/test_api.py +13 -0
  103. tests/fugue_polars/test_dataframe.py +82 -0
  104. tests/fugue_polars/test_transform.py +100 -0
  105. tests/fugue_ray/test_execution_engine.py +40 -4
  106. tests/fugue_spark/test_dataframe.py +0 -8
  107. tests/fugue_spark/test_execution_engine.py +50 -11
  108. tests/fugue_spark/test_importless.py +4 -4
  109. tests/fugue_spark/test_spark_connect.py +82 -0
  110. tests/fugue_spark/utils/test_convert.py +6 -8
  111. tests/fugue_spark/utils/test_io.py +0 -17
  112. fugue/_utils/register.py +0 -3
  113. fugue_test/_utils.py +0 -13
  114. {fugue-0.8.2.dev1.dist-info → fugue-0.8.4.dist-info}/LICENSE +0 -0
fugue/__init__.py CHANGED
@@ -7,7 +7,7 @@ from fugue.bag.array_bag import ArrayBag
7
7
  from fugue.bag.bag import Bag, BagDisplay
8
8
  from fugue.collections.partition import PartitionCursor, PartitionSpec
9
9
  from fugue.collections.sql import StructuredRawSQL, TempTableName
10
- from fugue.collections.yielded import Yielded, PhysicalYielded
10
+ from fugue.collections.yielded import PhysicalYielded, Yielded
11
11
  from fugue.constants import register_global_conf
12
12
  from fugue.dataframe.array_dataframe import ArrayDataFrame
13
13
  from fugue.dataframe.arrow_dataframe import ArrowDataFrame
@@ -18,11 +18,14 @@ from fugue.dataframe.dataframe import (
18
18
  LocalBoundedDataFrame,
19
19
  LocalDataFrame,
20
20
  )
21
- from fugue.dataframe.dataframe_iterable_dataframe import LocalDataFrameIterableDataFrame
21
+ from fugue.dataframe.dataframe_iterable_dataframe import (
22
+ IterableArrowDataFrame,
23
+ IterablePandasDataFrame,
24
+ LocalDataFrameIterableDataFrame,
25
+ )
22
26
  from fugue.dataframe.dataframes import DataFrames
23
27
  from fugue.dataframe.iterable_dataframe import IterableDataFrame
24
28
  from fugue.dataframe.pandas_dataframe import PandasDataFrame
25
- from fugue.dataframe.utils import to_local_bounded_df, to_local_df
26
29
  from fugue.dataset import (
27
30
  AnyDataset,
28
31
  Dataset,
@@ -32,8 +35,8 @@ from fugue.dataset import (
32
35
  )
33
36
  from fugue.execution.execution_engine import (
34
37
  AnyExecutionEngine,
35
- ExecutionEngine,
36
38
  EngineFacet,
39
+ ExecutionEngine,
37
40
  MapEngine,
38
41
  SQLEngine,
39
42
  )
@@ -50,7 +53,6 @@ from fugue.execution.native_execution_engine import (
50
53
  NativeExecutionEngine,
51
54
  PandasMapEngine,
52
55
  QPDPandasEngine,
53
- SqliteEngine,
54
56
  )
55
57
  from fugue.extensions.creator import Creator, creator, register_creator
56
58
  from fugue.extensions.outputter import Outputter, outputter, register_outputter
@@ -84,4 +86,6 @@ from fugue.workflow.module import module
84
86
  from fugue.workflow.workflow import FugueWorkflow, WorkflowDataFrame, WorkflowDataFrames
85
87
  from fugue_version import __version__
86
88
 
89
+ from .dev import *
90
+
87
91
  _register()
@@ -1,36 +1,7 @@
1
- import copy
2
1
  import inspect
3
- import re
4
- from typing import (
5
- Any,
6
- Callable,
7
- Dict,
8
- Iterable,
9
- List,
10
- Optional,
11
- Tuple,
12
- Type,
13
- get_type_hints,
14
- )
2
+ from typing import Callable, Optional
15
3
 
16
- import pandas as pd
17
- from fugue.dataframe import (
18
- ArrayDataFrame,
19
- DataFrame,
20
- IterableDataFrame,
21
- LocalDataFrame,
22
- LocalDataFrameIterableDataFrame,
23
- PandasDataFrame,
24
- )
25
- from fugue.dataframe.dataframes import DataFrames
26
- from fugue.dataframe.utils import to_local_df
27
- from fugue.exceptions import FugueWorkflowRuntimeError
28
- from triad import IndexedOrderedDict
29
- from triad.collections import Schema
30
4
  from triad.utils.assertion import assert_or_throw
31
- from triad.utils.convert import get_full_type_path, to_type
32
- from triad.utils.hash import to_uuid
33
- from triad.utils.iter import EmptyAwareIterable, make_empty_aware
34
5
 
35
6
  _COMMENT_SCHEMA_ANNOTATION = "schema"
36
7
 
@@ -100,531 +71,3 @@ def is_class_method(func: Callable) -> bool:
100
71
  sig = inspect.signature(func)
101
72
  # TODO: this is not the best way
102
73
  return "self" in sig.parameters
103
-
104
-
105
- class AnnotationConverter:
106
- def check(self, annotation: Any) -> bool: # pragma: no cover
107
- raise NotImplementedError
108
-
109
- def convert(
110
- self, param: Optional[inspect.Parameter]
111
- ) -> "_FuncParam": # pragma: no cover
112
- raise NotImplementedError
113
-
114
-
115
- class SimpleAnnotationConverter(AnnotationConverter):
116
- def __init__(
117
- self,
118
- expected_annotation,
119
- converter: Callable[[Optional[inspect.Parameter]], "_FuncParam"],
120
- ) -> None:
121
- self._expected = expected_annotation
122
- self._converter = converter
123
-
124
- def check(self, annotation: Any) -> bool:
125
- return annotation == self._expected
126
-
127
- def convert(self, param: Optional[inspect.Parameter]) -> "_FuncParam":
128
- return self._converter(param)
129
-
130
-
131
- _ANNOTATION_CONVERTERS: List[Tuple[float, AnnotationConverter]] = []
132
-
133
-
134
- def register_annotation_converter(
135
- priority: float, converter: AnnotationConverter
136
- ) -> None:
137
- """Register a new annotation for Fugue's interfaceless system
138
-
139
- :param priority: priority number, smaller means higher priority for checking
140
- :param converter: a new converter
141
-
142
- .. admonition:: New Since
143
- :class: hint
144
-
145
- **0.6.0**
146
-
147
- .. note::
148
-
149
- This is not ready for public use yet, the interface is subjected to change
150
-
151
- """
152
- _ANNOTATION_CONVERTERS.append((priority, converter))
153
- _ANNOTATION_CONVERTERS.sort(key=lambda x: x[0])
154
-
155
-
156
- class FunctionWrapper(object):
157
- def __init__(
158
- self,
159
- func: Callable,
160
- params_re: str = ".*",
161
- return_re: str = ".*",
162
- ):
163
- self._class_method, self._params, self._rt = self._parse_function(
164
- func, params_re, return_re
165
- )
166
- self._func = func
167
-
168
- def __deepcopy__(self, memo: Any) -> Any:
169
- return copy.copy(self)
170
-
171
- def __call__(self, *args: Any, **kwargs: Any) -> Any:
172
- return self._func(*args, **kwargs)
173
-
174
- def __uuid__(self) -> str:
175
- return to_uuid(get_full_type_path(self._func), self._params, self._rt)
176
-
177
- @property
178
- def input_code(self) -> str:
179
- return "".join(x.code for x in self._params.values())
180
-
181
- @property
182
- def need_output_schema(self) -> Optional[bool]:
183
- return (
184
- self._rt.need_schema()
185
- if isinstance(self._rt, _DataFrameParamBase)
186
- else False
187
- )
188
-
189
- def run( # noqa: C901
190
- self,
191
- args: List[Any],
192
- kwargs: Dict[str, Any],
193
- ignore_unknown: bool = False,
194
- output_schema: Any = None,
195
- output: bool = True,
196
- ctx: Any = None,
197
- ) -> Any:
198
- p: Dict[str, Any] = {}
199
- for i in range(len(args)):
200
- p[self._params.get_key_by_index(i)] = args[i]
201
- p.update(kwargs)
202
- has_kw = False
203
- rargs: Dict[str, Any] = {}
204
- for k, v in self._params.items():
205
- if isinstance(v, (_PositionalParam, _KeywordParam)):
206
- if isinstance(v, _KeywordParam):
207
- has_kw = True
208
- elif k in p:
209
- if isinstance(v, _DataFrameParamBase):
210
- assert_or_throw(
211
- isinstance(p[k], DataFrame),
212
- lambda: TypeError(f"{p[k]} is not a DataFrame"),
213
- )
214
- rargs[k] = v.to_input_data(p[k], ctx=ctx)
215
- else:
216
- rargs[k] = p[k] # TODO: should we do auto type conversion?
217
- del p[k]
218
- elif v.required:
219
- raise ValueError(f"{k} is required by not given")
220
- if has_kw:
221
- rargs.update(p)
222
- elif not ignore_unknown and len(p) > 0:
223
- raise ValueError(f"{p} are not acceptable parameters")
224
- rt = self._func(**rargs)
225
- if not output:
226
- if isinstance(self._rt, _DataFrameParamBase):
227
- self._rt.count(rt)
228
- return
229
- if isinstance(self._rt, _DataFrameParamBase):
230
- return self._rt.to_output_df(rt, output_schema, ctx=ctx)
231
- return rt
232
-
233
- def _parse_function(
234
- self, func: Callable, params_re: str = ".*", return_re: str = ".*"
235
- ) -> Tuple[bool, IndexedOrderedDict[str, "_FuncParam"], "_FuncParam"]:
236
- sig = inspect.signature(func)
237
- annotations = get_type_hints(func)
238
- res: IndexedOrderedDict[str, "_FuncParam"] = IndexedOrderedDict()
239
- class_method = False
240
- for k, w in sig.parameters.items():
241
- if k == "self":
242
- res[k] = _SelfParam(w)
243
- class_method = True
244
- else:
245
- anno = annotations.get(k, w.annotation)
246
- res[k] = self._parse_param(anno, w)
247
- anno = annotations.get("return", sig.return_annotation)
248
- rt = self._parse_param(anno, None, none_as_other=False)
249
- params_str = "".join(x.code for x in res.values())
250
- assert_or_throw(
251
- re.match(params_re, params_str),
252
- lambda: TypeError(f"Input types not valid {res} for {func}"),
253
- )
254
- assert_or_throw(
255
- re.match(return_re, rt.code),
256
- lambda: TypeError(f"Return type not valid {rt} for {func}"),
257
- )
258
- return class_method, res, rt
259
-
260
- def _parse_param( # noqa: C901
261
- self,
262
- annotation: Any,
263
- param: Optional[inspect.Parameter],
264
- none_as_other: bool = True,
265
- ) -> "_FuncParam":
266
- import fugue._utils.register # pylint: disable=W0611 # noqa: F401
267
-
268
- if annotation == type(None): # noqa: E721
269
- return _NoneParam(param)
270
- if annotation == inspect.Parameter.empty:
271
- if param is not None and param.kind == param.VAR_POSITIONAL:
272
- return _PositionalParam(param)
273
- if param is not None and param.kind == param.VAR_KEYWORD:
274
- return _KeywordParam(param)
275
- return _OtherParam(param) if none_as_other else _NoneParam(param)
276
- if (
277
- annotation == Callable
278
- or annotation == callable # pylint: disable=comparison-with-callable
279
- or str(annotation).startswith("typing.Callable")
280
- ):
281
- return _CallableParam(param)
282
- if (
283
- annotation == Optional[Callable]
284
- or annotation == Optional[callable]
285
- or str(annotation).startswith("typing.Union[typing.Callable") # 3.8-
286
- or str(annotation).startswith("typing.Optional[typing.Callable") # 3.9+
287
- ):
288
- return _OptionalCallableParam(param)
289
- for _, c in _ANNOTATION_CONVERTERS:
290
- if c.check(annotation):
291
- return c.convert(param)
292
- if annotation == to_type("fugue.execution.ExecutionEngine"):
293
- # to prevent cyclic import
294
- return ExecutionEngineParam(param, "ExecutionEngine", annotation)
295
- if annotation == DataFrames:
296
- return _DataFramesParam(param)
297
- if annotation == LocalDataFrame:
298
- return _LocalDataFrameParam(param)
299
- if annotation == DataFrame:
300
- return DataFrameParam(param)
301
- if annotation == pd.DataFrame:
302
- return _PandasParam(param)
303
- if annotation == List[List[Any]]:
304
- return _ListListParam(param)
305
- if annotation == Iterable[List[Any]]:
306
- return _IterableListParam(param)
307
- if annotation == EmptyAwareIterable[List[Any]]:
308
- return _EmptyAwareIterableListParam(param)
309
- if annotation == List[Dict[str, Any]]:
310
- return _ListDictParam(param)
311
- if annotation == Iterable[Dict[str, Any]]:
312
- return _IterableDictParam(param)
313
- if annotation == EmptyAwareIterable[Dict[str, Any]]:
314
- return _EmptyAwareIterableDictParam(param)
315
- if annotation == Iterable[pd.DataFrame]:
316
- return _IterablePandasParam(param)
317
- if param is not None and param.kind == param.VAR_POSITIONAL:
318
- return _PositionalParam(param)
319
- if param is not None and param.kind == param.VAR_KEYWORD:
320
- return _KeywordParam(param)
321
- return _OtherParam(param)
322
-
323
-
324
- class _FuncParam(object):
325
- def __init__(self, param: Optional[inspect.Parameter], annotation: Any, code: str):
326
- if param is not None:
327
- self.required = param.default == inspect.Parameter.empty
328
- self.default = param.default
329
- else:
330
- self.required, self.default = True, None
331
- self.code = code
332
- self.annotation = annotation
333
-
334
- def __repr__(self) -> str:
335
- return str(self.annotation)
336
-
337
-
338
- class _CallableParam(_FuncParam):
339
- def __init__(self, param: Optional[inspect.Parameter]):
340
- super().__init__(param, "Callable", "F")
341
-
342
-
343
- class _OptionalCallableParam(_FuncParam):
344
- def __init__(self, param: Optional[inspect.Parameter]):
345
- super().__init__(param, "Callable", "f")
346
-
347
-
348
- class ExecutionEngineParam(_FuncParam):
349
- def __init__(
350
- self,
351
- param: Optional[inspect.Parameter],
352
- annotation: str,
353
- engine_type: Type,
354
- ):
355
- super().__init__(param, annotation, "e")
356
- self._type = engine_type
357
-
358
- def to_input(self, engine: Any) -> Any: # pragma: no cover
359
- assert_or_throw(
360
- isinstance(engine, self._type),
361
- FugueWorkflowRuntimeError(f"{engine} is not of type {self._type}"),
362
- )
363
- return engine
364
-
365
- def __uuid__(self) -> str:
366
- return to_uuid(self.code, self.annotation, self._type)
367
-
368
-
369
- class _DataFramesParam(_FuncParam):
370
- def __init__(self, param: Optional[inspect.Parameter]):
371
- super().__init__(param, "DataFrames", "c")
372
-
373
-
374
- class _DataFrameParamBase(_FuncParam):
375
- def __init__(self, param: Optional[inspect.Parameter], annotation: Any, code: str):
376
- super().__init__(param, annotation, code)
377
- assert_or_throw(self.required, lambda: TypeError(f"{self} must be required"))
378
-
379
- def to_input_data(self, df: DataFrame, ctx: Any) -> Any: # pragma: no cover
380
- raise NotImplementedError
381
-
382
- def to_output_df(
383
- self, df: Any, schema: Any, ctx: Any
384
- ) -> DataFrame: # pragma: no cover
385
- raise NotImplementedError
386
-
387
- def count(self, df: Any) -> int: # pragma: no cover
388
- raise NotImplementedError
389
-
390
- def need_schema(self) -> Optional[bool]:
391
- return False
392
-
393
-
394
- class DataFrameParam(_DataFrameParamBase):
395
- def __init__(
396
- self, param: Optional[inspect.Parameter], annotation: str = "DataFrame"
397
- ):
398
- super().__init__(param, annotation=annotation, code="d")
399
-
400
- def to_input_data(self, df: DataFrame, ctx: Any) -> Any:
401
- return df
402
-
403
- def to_output_df(self, output: Any, schema: Any, ctx: Any) -> DataFrame:
404
- assert_or_throw(
405
- schema is None or output.schema == schema,
406
- lambda: f"Output schema mismatch {output.schema} vs {schema}",
407
- )
408
- return output
409
-
410
- def count(self, df: Any) -> int:
411
- if df.is_bounded:
412
- return df.count()
413
- else:
414
- return sum(1 for _ in df.as_array_iterable())
415
-
416
-
417
- class _LocalDataFrameParam(_DataFrameParamBase):
418
- def __init__(self, param: Optional[inspect.Parameter]):
419
- super().__init__(param, "LocalDataFrame", "l")
420
-
421
- def to_input_data(self, df: DataFrame, ctx: Any) -> LocalDataFrame:
422
- return to_local_df(df)
423
-
424
- def to_output_df(self, output: LocalDataFrame, schema: Any, ctx: Any) -> DataFrame:
425
- assert_or_throw(
426
- schema is None or output.schema == schema,
427
- lambda: f"Output schema mismatch {output.schema} vs {schema}",
428
- )
429
- return output
430
-
431
- def count(self, df: LocalDataFrame) -> int:
432
- if df.is_bounded:
433
- return df.count()
434
- else:
435
- return sum(1 for _ in df.as_array_iterable())
436
-
437
-
438
- class _ListListParam(_DataFrameParamBase):
439
- def __init__(self, param: Optional[inspect.Parameter]):
440
- super().__init__(param, "List[List[Any]]", "s")
441
-
442
- def to_input_data(self, df: DataFrame, ctx: Any) -> List[List[Any]]:
443
- return df.as_array(type_safe=True)
444
-
445
- def to_output_df(self, output: List[List[Any]], schema: Any, ctx: Any) -> DataFrame:
446
- return ArrayDataFrame(output, schema)
447
-
448
- def count(self, df: List[List[Any]]) -> int:
449
- return len(df)
450
-
451
- def need_schema(self) -> Optional[bool]:
452
- return True
453
-
454
-
455
- class _IterableListParam(_DataFrameParamBase):
456
- def __init__(self, param: Optional[inspect.Parameter]):
457
- super().__init__(param, "Iterable[List[Any]]", "s")
458
-
459
- def to_input_data(self, df: DataFrame, ctx: Any) -> Iterable[List[Any]]:
460
- return df.as_array_iterable(type_safe=True)
461
-
462
- def to_output_df(
463
- self, output: Iterable[List[Any]], schema: Any, ctx: Any
464
- ) -> DataFrame:
465
- return IterableDataFrame(output, schema)
466
-
467
- def count(self, df: Iterable[List[Any]]) -> int:
468
- return sum(1 for _ in df)
469
-
470
- def need_schema(self) -> Optional[bool]: # pragma: no cover
471
- return True
472
-
473
-
474
- class _EmptyAwareIterableListParam(_DataFrameParamBase):
475
- def __init__(self, param: Optional[inspect.Parameter]):
476
- super().__init__(param, "EmptyAwareIterable[List[Any]]", "s")
477
-
478
- def to_input_data(self, df: DataFrame, ctx: Any) -> EmptyAwareIterable[List[Any]]:
479
- return make_empty_aware(df.as_array_iterable(type_safe=True))
480
-
481
- def to_output_df(
482
- self, output: EmptyAwareIterable[List[Any]], schema: Any, ctx: Any
483
- ) -> DataFrame:
484
- return IterableDataFrame(output, schema)
485
-
486
- def count(self, df: EmptyAwareIterable[List[Any]]) -> int:
487
- return sum(1 for _ in df)
488
-
489
- def need_schema(self) -> Optional[bool]: # pragma: no cover
490
- return True
491
-
492
-
493
- class _ListDictParam(_DataFrameParamBase):
494
- def __init__(self, param: Optional[inspect.Parameter]):
495
- super().__init__(param, "List[Dict[str,Any]]", "s")
496
-
497
- def to_input_data(self, df: DataFrame, ctx: Any) -> List[Dict[str, Any]]:
498
- return list(to_local_df(df).as_dict_iterable())
499
-
500
- def to_output_df(
501
- self, output: List[Dict[str, Any]], schema: Any, ctx: Any
502
- ) -> DataFrame:
503
- schema = schema if isinstance(schema, Schema) else Schema(schema)
504
-
505
- def get_all() -> Iterable[List[Any]]:
506
- for row in output:
507
- yield [row[x] for x in schema.names]
508
-
509
- return IterableDataFrame(get_all(), schema)
510
-
511
- def count(self, df: List[Dict[str, Any]]) -> int:
512
- return len(df)
513
-
514
- def need_schema(self) -> Optional[bool]: # pragma: no cover
515
- return True
516
-
517
-
518
- class _IterableDictParam(_DataFrameParamBase):
519
- def __init__(self, param: Optional[inspect.Parameter]):
520
- super().__init__(param, "Iterable[Dict[str,Any]]", "s")
521
-
522
- def to_input_data(self, df: DataFrame, ctx: Any) -> Iterable[Dict[str, Any]]:
523
- return df.as_dict_iterable()
524
-
525
- def to_output_df(
526
- self, output: Iterable[Dict[str, Any]], schema: Any, ctx: Any
527
- ) -> DataFrame:
528
- schema = schema if isinstance(schema, Schema) else Schema(schema)
529
-
530
- def get_all() -> Iterable[List[Any]]:
531
- for row in output:
532
- yield [row[x] for x in schema.names]
533
-
534
- return IterableDataFrame(get_all(), schema)
535
-
536
- def count(self, df: Iterable[Dict[str, Any]]) -> int:
537
- return sum(1 for _ in df)
538
-
539
- def need_schema(self) -> Optional[bool]: # pragma: no cover
540
- return True
541
-
542
-
543
- class _EmptyAwareIterableDictParam(_DataFrameParamBase):
544
- def __init__(self, param: Optional[inspect.Parameter]):
545
- super().__init__(param, "EmptyAwareIterable[Dict[str,Any]]", "s")
546
-
547
- def to_input_data(
548
- self, df: DataFrame, ctx: Any
549
- ) -> EmptyAwareIterable[Dict[str, Any]]:
550
- return make_empty_aware(df.as_dict_iterable())
551
-
552
- def to_output_df(
553
- self, output: EmptyAwareIterable[Dict[str, Any]], schema: Any, ctx: Any
554
- ) -> DataFrame:
555
- schema = schema if isinstance(schema, Schema) else Schema(schema)
556
-
557
- def get_all() -> Iterable[List[Any]]:
558
- for row in output:
559
- yield [row[x] for x in schema.names]
560
-
561
- return IterableDataFrame(get_all(), schema)
562
-
563
- def count(self, df: EmptyAwareIterable[Dict[str, Any]]) -> int:
564
- return sum(1 for _ in df)
565
-
566
- def need_schema(self) -> Optional[bool]: # pragma: no cover
567
- return True
568
-
569
-
570
- class _PandasParam(_DataFrameParamBase):
571
- def __init__(self, param: Optional[inspect.Parameter]):
572
- super().__init__(param, "pd.DataFrame", "p")
573
-
574
- def to_input_data(self, df: DataFrame, ctx: Any) -> pd.DataFrame:
575
- return df.as_pandas()
576
-
577
- def to_output_df(self, output: pd.DataFrame, schema: Any, ctx: Any) -> DataFrame:
578
- return PandasDataFrame(output, schema)
579
-
580
- def count(self, df: pd.DataFrame) -> int:
581
- return df.shape[0]
582
-
583
-
584
- class _IterablePandasParam(_DataFrameParamBase):
585
- def __init__(self, param: Optional[inspect.Parameter]):
586
- super().__init__(param, "Iterable[pd.DataFrame]", "q")
587
-
588
- def to_input_data(self, df: DataFrame, ctx: Any) -> Iterable[pd.DataFrame]:
589
- if not isinstance(df, LocalDataFrameIterableDataFrame):
590
- yield df.as_pandas()
591
- else:
592
- for sub in df.native:
593
- yield sub.as_pandas()
594
-
595
- def to_output_df(
596
- self, output: Iterable[pd.DataFrame], schema: Any, ctx: Any
597
- ) -> DataFrame:
598
- def dfs():
599
- for df in output:
600
- yield PandasDataFrame(df, schema)
601
-
602
- return LocalDataFrameIterableDataFrame(dfs())
603
-
604
- def count(self, df: Iterable[pd.DataFrame]) -> int:
605
- return sum(_.shape[0] for _ in df)
606
-
607
-
608
- class _NoneParam(_FuncParam):
609
- def __init__(self, param: Optional[inspect.Parameter]):
610
- super().__init__(param, "NoneType", "n")
611
-
612
-
613
- class _SelfParam(_FuncParam):
614
- def __init__(self, param: Optional[inspect.Parameter]):
615
- super().__init__(param, "[Self]", "0")
616
-
617
-
618
- class _OtherParam(_FuncParam):
619
- def __init__(self, param: Optional[inspect.Parameter]):
620
- super().__init__(param, "[Other]", "x")
621
-
622
-
623
- class _PositionalParam(_FuncParam):
624
- def __init__(self, param: Optional[inspect.Parameter]):
625
- super().__init__(param, "[Positional]", "y")
626
-
627
-
628
- class _KeywordParam(_FuncParam):
629
- def __init__(self, param: Optional[inspect.Parameter]):
630
- super().__init__(param, "[Keyword]", "z")