fugue 0.9.6__py3-none-any.whl → 0.9.7.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fugue/__init__.py +4 -1
- fugue/dataframe/dataframe.py +1 -3
- fugue/dataframe/function_wrapper.py +63 -19
- fugue/dataframe/utils.py +3 -3
- fugue/extensions/_builtins/outputters.py +1 -1
- fugue/extensions/_builtins/processors.py +1 -1
- fugue/extensions/processor/convert.py +1 -0
- fugue/extensions/transformer/convert.py +4 -1
- fugue/sql/_utils.py +0 -1
- fugue/sql/_visitors.py +0 -1
- fugue/workflow/_tasks.py +3 -1
- fugue/workflow/workflow.py +3 -3
- {fugue-0.9.6.dist-info → fugue-0.9.7.dev0.dist-info}/METADATA +13 -26
- {fugue-0.9.6.dist-info → fugue-0.9.7.dev0.dist-info}/RECORD +29 -30
- {fugue-0.9.6.dist-info → fugue-0.9.7.dev0.dist-info}/WHEEL +1 -1
- fugue-0.9.7.dev0.dist-info/entry_points.txt +11 -0
- {fugue-0.9.6.dist-info → fugue-0.9.7.dev0.dist-info}/top_level.txt +0 -1
- fugue_dask/__init__.py +3 -1
- fugue_dask/_utils.py +31 -34
- fugue_duckdb/execution_engine.py +7 -3
- fugue_ibis/_utils.py +2 -2
- fugue_notebook/__init__.py +2 -1
- fugue_ray/_utils/io.py +2 -1
- fugue_spark/__init__.py +3 -1
- fugue_spark/_utils/convert.py +4 -5
- fugue_spark/dataframe.py +1 -3
- fugue_sql/__init__.py +3 -1
- fugue_test/builtin_suite.py +1 -2
- fugue-0.9.6.dist-info/entry_points.txt +0 -11
- fugue_version/__init__.py +0 -1
- {fugue-0.9.6.dist-info → fugue-0.9.7.dev0.dist-info}/licenses/LICENSE +0 -0
fugue/__init__.py
CHANGED
|
@@ -1,4 +1,8 @@
|
|
|
1
1
|
# flake8: noqa
|
|
2
|
+
from importlib.metadata import version
|
|
3
|
+
|
|
4
|
+
__version__ = version("fugue")
|
|
5
|
+
|
|
2
6
|
from triad.collections import Schema
|
|
3
7
|
|
|
4
8
|
from fugue.api import out_transform, transform
|
|
@@ -83,7 +87,6 @@ from fugue.sql.workflow import FugueSQLWorkflow
|
|
|
83
87
|
from fugue.workflow._workflow_context import FugueWorkflowContext
|
|
84
88
|
from fugue.workflow.module import module
|
|
85
89
|
from fugue.workflow.workflow import FugueWorkflow, WorkflowDataFrame, WorkflowDataFrames
|
|
86
|
-
from fugue_version import __version__
|
|
87
90
|
|
|
88
91
|
from .dev import *
|
|
89
92
|
|
fugue/dataframe/dataframe.py
CHANGED
|
@@ -59,9 +59,7 @@ class DataFrame(Dataset):
|
|
|
59
59
|
assert isinstance(self._schema, Schema)
|
|
60
60
|
return self._schema # type: ignore
|
|
61
61
|
with self._lazy_schema_lock:
|
|
62
|
-
self._schema = _input_schema(
|
|
63
|
-
self._schema()
|
|
64
|
-
).assert_not_empty() # type: ignore
|
|
62
|
+
self._schema = _input_schema(self._schema()).assert_not_empty() # type: ignore
|
|
65
63
|
self._schema.set_readonly()
|
|
66
64
|
self._schema_discovered = True
|
|
67
65
|
return self._schema
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import inspect
|
|
2
|
+
from collections.abc import Callable as AbcCallable
|
|
2
3
|
from typing import (
|
|
3
4
|
Any,
|
|
4
5
|
Callable,
|
|
@@ -7,6 +8,9 @@ from typing import (
|
|
|
7
8
|
Iterator,
|
|
8
9
|
List,
|
|
9
10
|
Optional,
|
|
11
|
+
Union,
|
|
12
|
+
get_args,
|
|
13
|
+
get_origin,
|
|
10
14
|
no_type_check,
|
|
11
15
|
)
|
|
12
16
|
|
|
@@ -39,13 +43,66 @@ from .pandas_dataframe import PandasDataFrame
|
|
|
39
43
|
|
|
40
44
|
|
|
41
45
|
def _compare_iter(tp: Any) -> Any:
|
|
42
|
-
return lambda x:
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
+
return lambda x: (
|
|
47
|
+
compare_annotations(
|
|
48
|
+
x,
|
|
49
|
+
Iterable[tp], # type:ignore
|
|
50
|
+
)
|
|
51
|
+
or compare_annotations(
|
|
52
|
+
x,
|
|
53
|
+
Iterator[tp], # type:ignore
|
|
54
|
+
)
|
|
46
55
|
)
|
|
47
56
|
|
|
48
57
|
|
|
58
|
+
def _is_optional(annotation) -> bool:
|
|
59
|
+
origin = get_origin(annotation)
|
|
60
|
+
|
|
61
|
+
# Check if it's a Union type
|
|
62
|
+
if origin is Union:
|
|
63
|
+
args = get_args(annotation)
|
|
64
|
+
# Optional[T] is Union[T, None]
|
|
65
|
+
return type(None) in args
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def _is_required_callable(annotation) -> bool:
|
|
69
|
+
"""Check if annotation is a required (non-optional) Callable type."""
|
|
70
|
+
if _is_optional(annotation):
|
|
71
|
+
return False
|
|
72
|
+
|
|
73
|
+
# Check direct equality
|
|
74
|
+
if annotation == Callable or annotation == callable: # pylint: disable=comparison-with-callable
|
|
75
|
+
return True
|
|
76
|
+
|
|
77
|
+
# Check if it's a generic Callable like Callable[[int], str]
|
|
78
|
+
origin = get_origin(annotation)
|
|
79
|
+
return origin is AbcCallable or origin is type(Callable)
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def _is_optional_callable(annotation) -> bool:
|
|
83
|
+
"""Check if annotation is an optional Callable type (Optional[Callable] or Callable | None)."""
|
|
84
|
+
if not _is_optional(annotation):
|
|
85
|
+
return False
|
|
86
|
+
|
|
87
|
+
# Get the non-None types from the Union
|
|
88
|
+
args = get_args(annotation)
|
|
89
|
+
non_none_types = [arg for arg in args if arg is not type(None)]
|
|
90
|
+
|
|
91
|
+
# Should have exactly one non-None type, and it should be Callable
|
|
92
|
+
if len(non_none_types) != 1: # pragma: no cover
|
|
93
|
+
return False
|
|
94
|
+
|
|
95
|
+
inner_type = non_none_types[0]
|
|
96
|
+
|
|
97
|
+
# Check if the inner type is Callable
|
|
98
|
+
if inner_type == Callable or inner_type == callable: # pylint: disable=comparison-with-callable
|
|
99
|
+
return True
|
|
100
|
+
|
|
101
|
+
# Check if it's a generic Callable like Callable[[int], str]
|
|
102
|
+
origin = get_origin(inner_type)
|
|
103
|
+
return origin is AbcCallable or origin is type(Callable)
|
|
104
|
+
|
|
105
|
+
|
|
49
106
|
@function_wrapper(FUGUE_ENTRYPOINT)
|
|
50
107
|
class DataFrameFunctionWrapper(FunctionWrapper):
|
|
51
108
|
@property
|
|
@@ -154,12 +211,7 @@ fugue_annotated_param = DataFrameFunctionWrapper.annotated_param
|
|
|
154
211
|
@fugue_annotated_param(
|
|
155
212
|
"Callable",
|
|
156
213
|
"F",
|
|
157
|
-
|
|
158
|
-
annotation == Callable
|
|
159
|
-
or annotation == callable # pylint: disable=comparison-with-callable
|
|
160
|
-
or str(annotation).startswith("typing.Callable")
|
|
161
|
-
or str(annotation).startswith("collections.abc.Callable")
|
|
162
|
-
),
|
|
214
|
+
_is_required_callable,
|
|
163
215
|
)
|
|
164
216
|
class _CallableParam(AnnotatedParam):
|
|
165
217
|
pass
|
|
@@ -168,15 +220,7 @@ class _CallableParam(AnnotatedParam):
|
|
|
168
220
|
@fugue_annotated_param(
|
|
169
221
|
"Callable",
|
|
170
222
|
"f",
|
|
171
|
-
|
|
172
|
-
annotation == Optional[Callable]
|
|
173
|
-
or annotation == Optional[callable]
|
|
174
|
-
or str(annotation).startswith("typing.Union[typing.Callable") # 3.8-
|
|
175
|
-
or str(annotation).startswith("typing.Optional[typing.Callable") # 3.9+
|
|
176
|
-
or str(annotation).startswith(
|
|
177
|
-
"typing.Optional[collections.abc.Callable]"
|
|
178
|
-
) # 3.9+
|
|
179
|
-
),
|
|
223
|
+
_is_optional_callable,
|
|
180
224
|
)
|
|
181
225
|
class _OptionalCallableParam(AnnotatedParam):
|
|
182
226
|
pass
|
fugue/dataframe/utils.py
CHANGED
|
@@ -61,9 +61,9 @@ def _df_eq(
|
|
|
61
61
|
else:
|
|
62
62
|
df2 = as_fugue_df(data).as_local_bounded()
|
|
63
63
|
try:
|
|
64
|
-
assert (
|
|
65
|
-
df1.count()
|
|
66
|
-
)
|
|
64
|
+
assert df1.count() == df2.count(), (
|
|
65
|
+
f"count mismatch {df1.count()}, {df2.count()}"
|
|
66
|
+
)
|
|
67
67
|
assert not check_schema or df.schema.is_like(
|
|
68
68
|
df2.schema, equal_groups=equal_type_groups
|
|
69
69
|
), f"schema mismatch {df.schema.pa_schema}, {df2.schema.pa_schema}"
|
|
@@ -136,6 +136,7 @@ def processor(
|
|
|
136
136
|
Please read
|
|
137
137
|
:doc:`Processor Tutorial <tutorial:tutorials/extensions/processor>`
|
|
138
138
|
"""
|
|
139
|
+
|
|
139
140
|
# TODO: validation of schema if without * should be done at compile time
|
|
140
141
|
def deco(func: Callable) -> "_FuncAsProcessor":
|
|
141
142
|
return _FuncAsProcessor.from_func(
|
|
@@ -546,7 +546,10 @@ class _FuncAsOutputCoTransformer(_FuncAsCoTransformer):
|
|
|
546
546
|
p = dict(dfs)
|
|
547
547
|
p.update(self.params)
|
|
548
548
|
self._wrapper.run(
|
|
549
|
-
[] + cb,
|
|
549
|
+
[] + cb,
|
|
550
|
+
p,
|
|
551
|
+
ignore_unknown=False,
|
|
552
|
+
output=False, # type: ignore
|
|
550
553
|
)
|
|
551
554
|
return ArrayDataFrame([], OUTPUT_TRANSFORMER_DUMMY_SCHEMA)
|
|
552
555
|
|
fugue/sql/_utils.py
CHANGED
fugue/sql/_visitors.py
CHANGED
fugue/workflow/_tasks.py
CHANGED
|
@@ -204,7 +204,9 @@ class FugueTask(TaskSpec, ABC):
|
|
|
204
204
|
|
|
205
205
|
# add caller traceback
|
|
206
206
|
ctb = modify_traceback(
|
|
207
|
-
sys.exc_info()[2].tb_next,
|
|
207
|
+
sys.exc_info()[2].tb_next,
|
|
208
|
+
None,
|
|
209
|
+
self._traceback, # type: ignore
|
|
208
210
|
)
|
|
209
211
|
if ctb is None: # pragma: no cover
|
|
210
212
|
raise
|
fugue/workflow/workflow.py
CHANGED
|
@@ -1468,7 +1468,8 @@ class WorkflowDataFrames(DataFrames):
|
|
|
1468
1468
|
super().__setitem__(key, value, *args, **kwds)
|
|
1469
1469
|
|
|
1470
1470
|
def __getitem__( # pylint: disable=W0235
|
|
1471
|
-
self,
|
|
1471
|
+
self,
|
|
1472
|
+
key: Union[str, int], # type: ignore
|
|
1472
1473
|
) -> WorkflowDataFrame:
|
|
1473
1474
|
return super().__getitem__(key) # type: ignore
|
|
1474
1475
|
|
|
@@ -1791,8 +1792,7 @@ class FugueWorkflow:
|
|
|
1791
1792
|
)
|
|
1792
1793
|
)
|
|
1793
1794
|
raise FugueWorkflowCompileError(
|
|
1794
|
-
f"Input data of type {type(data)} can't "
|
|
1795
|
-
"be converted to WorkflowDataFrame"
|
|
1795
|
+
f"Input data of type {type(data)} can't be converted to WorkflowDataFrame"
|
|
1796
1796
|
)
|
|
1797
1797
|
|
|
1798
1798
|
def df(
|
|
@@ -1,27 +1,26 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: fugue
|
|
3
|
-
Version: 0.9.
|
|
3
|
+
Version: 0.9.7.dev0
|
|
4
4
|
Summary: An abstraction layer for distributed computation
|
|
5
|
-
|
|
6
|
-
Author: The Fugue Development Team
|
|
7
|
-
Author-email: hello@fugue.ai
|
|
5
|
+
Author-email: The Fugue Development Team <hello@fugue.ai>
|
|
8
6
|
License: Apache-2.0
|
|
9
|
-
|
|
7
|
+
Project-URL: Homepage, http://github.com/fugue-project/fugue
|
|
8
|
+
Project-URL: Repository, http://github.com/fugue-project/fugue
|
|
9
|
+
Keywords: distributed,spark,dask,ray,sql,dsl,domain specific language
|
|
10
10
|
Classifier: Development Status :: 5 - Production/Stable
|
|
11
11
|
Classifier: Intended Audience :: Developers
|
|
12
12
|
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
13
|
-
Classifier: License :: OSI Approved :: Apache Software License
|
|
14
13
|
Classifier: Programming Language :: Python :: 3
|
|
15
|
-
Classifier: Programming Language :: Python :: 3.8
|
|
16
|
-
Classifier: Programming Language :: Python :: 3.9
|
|
17
14
|
Classifier: Programming Language :: Python :: 3.10
|
|
18
15
|
Classifier: Programming Language :: Python :: 3.11
|
|
19
16
|
Classifier: Programming Language :: Python :: 3.12
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
20
19
|
Classifier: Programming Language :: Python :: 3 :: Only
|
|
21
|
-
Requires-Python: >=3.
|
|
20
|
+
Requires-Python: >=3.10
|
|
22
21
|
Description-Content-Type: text/markdown
|
|
23
22
|
License-File: LICENSE
|
|
24
|
-
Requires-Dist: triad>=1.0.
|
|
23
|
+
Requires-Dist: triad>=1.0.1
|
|
25
24
|
Requires-Dist: adagio>=0.2.6
|
|
26
25
|
Requires-Dist: pandas<3
|
|
27
26
|
Provides-Extra: sql
|
|
@@ -39,7 +38,7 @@ Requires-Dist: dask[dataframe,distributed]>=2024.4.0; extra == "dask"
|
|
|
39
38
|
Requires-Dist: pyarrow>=7.0.0; extra == "dask"
|
|
40
39
|
Requires-Dist: pandas>=2.0.2; extra == "dask"
|
|
41
40
|
Provides-Extra: ray
|
|
42
|
-
Requires-Dist: ray[data]>=2.30.0; extra == "ray"
|
|
41
|
+
Requires-Dist: ray[data]>=2.30.0; python_version < "3.14" and extra == "ray"
|
|
43
42
|
Requires-Dist: duckdb>=0.5.0; extra == "ray"
|
|
44
43
|
Requires-Dist: pyarrow>=7.0.0; extra == "ray"
|
|
45
44
|
Requires-Dist: pandas; extra == "ray"
|
|
@@ -70,7 +69,7 @@ Requires-Dist: jinja2; extra == "all"
|
|
|
70
69
|
Requires-Dist: pyspark>=3.1.1; extra == "all"
|
|
71
70
|
Requires-Dist: dask[dataframe,distributed]>=2024.4.0; extra == "all"
|
|
72
71
|
Requires-Dist: dask-sql; extra == "all"
|
|
73
|
-
Requires-Dist: ray[data]>=2.30.0; extra == "all"
|
|
72
|
+
Requires-Dist: ray[data]>=2.30.0; python_version < "3.14" and extra == "all"
|
|
74
73
|
Requires-Dist: notebook; extra == "all"
|
|
75
74
|
Requires-Dist: jupyterlab; extra == "all"
|
|
76
75
|
Requires-Dist: ipython>=7.10.0; extra == "all"
|
|
@@ -79,26 +78,14 @@ Requires-Dist: pyarrow>=6.0.1; extra == "all"
|
|
|
79
78
|
Requires-Dist: pandas>=2.0.2; extra == "all"
|
|
80
79
|
Requires-Dist: ibis-framework[duckdb,pandas]; extra == "all"
|
|
81
80
|
Requires-Dist: polars; extra == "all"
|
|
82
|
-
Dynamic: author
|
|
83
|
-
Dynamic: author-email
|
|
84
|
-
Dynamic: classifier
|
|
85
|
-
Dynamic: description
|
|
86
|
-
Dynamic: description-content-type
|
|
87
|
-
Dynamic: home-page
|
|
88
|
-
Dynamic: keywords
|
|
89
|
-
Dynamic: license
|
|
90
81
|
Dynamic: license-file
|
|
91
|
-
Dynamic: provides-extra
|
|
92
|
-
Dynamic: requires-dist
|
|
93
|
-
Dynamic: requires-python
|
|
94
|
-
Dynamic: summary
|
|
95
82
|
|
|
96
|
-
#
|
|
83
|
+
# <img src="./images/logo.svg" width="200">
|
|
97
84
|
|
|
98
85
|
[](https://pypi.python.org/pypi/fugue/)
|
|
99
86
|
[](https://pypi.python.org/pypi/fugue/)
|
|
100
87
|
[](https://pypi.python.org/pypi/fugue/)
|
|
101
|
-
[](https://codecov.io/gh/fugue-project/fugue)
|
|
102
89
|
[](https://www.codacy.com/gh/fugue-project/fugue/dashboard?utm_source=github.com&utm_medium=referral&utm_content=fugue-project/fugue&utm_campaign=Badge_Grade)
|
|
103
90
|
[](https://pepy.tech/project/fugue)
|
|
104
91
|
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
fugue/__init__.py,sha256=
|
|
1
|
+
fugue/__init__.py,sha256=asKV-hQ4hXj28c8zwjZQHj_JlomElOTRRaO8tx29sYw,2739
|
|
2
2
|
fugue/api.py,sha256=dLUrigFhDMB5x7cvlWSK8EyaY2o0AmhgPr0VRtfzSz0,1254
|
|
3
3
|
fugue/constants.py,sha256=crd0VqX8WtBcjSUNwZDi2LDIEkhUMWOlSn73H8JI9ds,3385
|
|
4
4
|
fugue/dev.py,sha256=GQCkezBBl4V0lVDWhGtUQKqomiCxgR9dMhfqj9C8cS8,1369
|
|
@@ -28,13 +28,13 @@ fugue/dataframe/__init__.py,sha256=zm7TbsaJLIvfm7zymWm2LGcuJd3nxfGsFnQiyrSnenM,6
|
|
|
28
28
|
fugue/dataframe/api.py,sha256=aWBvMaiSUxOvdQMfe79zHShWuPfLcgiWggC9HvVxvSE,11017
|
|
29
29
|
fugue/dataframe/array_dataframe.py,sha256=4scWnmQ6sjy1A6o7IYdRc0VVutBEfcJrA1f9wkph4Kg,4440
|
|
30
30
|
fugue/dataframe/arrow_dataframe.py,sha256=r5zcZBX_N6XO5dmixBkTCPgLcMmgDF022piZvrwRp_c,11485
|
|
31
|
-
fugue/dataframe/dataframe.py,sha256=
|
|
31
|
+
fugue/dataframe/dataframe.py,sha256=B9-oKUWMNXvKjAEchwJt9gzDbeIYMFWsLdt_YmJUCQg,16934
|
|
32
32
|
fugue/dataframe/dataframe_iterable_dataframe.py,sha256=lx71KfaI4lsVKI-79buc-idaeT20JEMBOq21SQcAiY8,7259
|
|
33
33
|
fugue/dataframe/dataframes.py,sha256=tBSpHsENgbcdOJ0Jgst6PTKbjG7_uoFJch96oTlaQIs,4160
|
|
34
|
-
fugue/dataframe/function_wrapper.py,sha256=
|
|
34
|
+
fugue/dataframe/function_wrapper.py,sha256=HbilDPKVuRMvOBqMB_083xQf7rVw4Gx_hxzmHOLREbY,19495
|
|
35
35
|
fugue/dataframe/iterable_dataframe.py,sha256=TcOoNKa4jNbHbvAZ0XAhtMmGcioygIHPxI9budDtenQ,4758
|
|
36
36
|
fugue/dataframe/pandas_dataframe.py,sha256=0L0wYCGhD2BpQbruoT07Ox9iQM5YLHLNrcgzudc-yKs,11633
|
|
37
|
-
fugue/dataframe/utils.py,sha256=
|
|
37
|
+
fugue/dataframe/utils.py,sha256=LXauFKUiydr2Y_cLE6zdPwXUyw78Gm1cJCzdmEXERrI,10488
|
|
38
38
|
fugue/dataset/__init__.py,sha256=5f2CAJ4xst6Z2o9Q2e2twfDOGUw8ZJoE2ild4JEU2pg,112
|
|
39
39
|
fugue/dataset/api.py,sha256=DacI4L2w5NJ-eZ6nFxNMqmReEnb0WUXswbjVp7BeErk,2794
|
|
40
40
|
fugue/dataset/dataset.py,sha256=jWXZqy3msMPFFkhas2PYJEX55ZAI3gk3Txq5f4-Qya4,4759
|
|
@@ -48,8 +48,8 @@ fugue/extensions/_utils.py,sha256=Bi3pYKy2Z6fG6_5BpwIWldxetassXpB4Zp8QamWB-wg,51
|
|
|
48
48
|
fugue/extensions/context.py,sha256=c_y2UttzzIFoQTOCV42VCdj2nqah33xYuBjbKNIOpx8,4262
|
|
49
49
|
fugue/extensions/_builtins/__init__.py,sha256=OAUjZJP-QI8VpJxxEEZJFFGir4PmTyLMmWQ3VCHtIGk,545
|
|
50
50
|
fugue/extensions/_builtins/creators.py,sha256=ad9snV4oN7F9o50Iaa9T4tw5J6rXBUgOHOINKzDqoEQ,1825
|
|
51
|
-
fugue/extensions/_builtins/outputters.py,sha256=
|
|
52
|
-
fugue/extensions/_builtins/processors.py,sha256=
|
|
51
|
+
fugue/extensions/_builtins/outputters.py,sha256=n_9cqv6JGQNKw_5j_4kXo5xcMnt6Hz5YB4t1wfsvU5A,6940
|
|
52
|
+
fugue/extensions/_builtins/processors.py,sha256=4L_AicO6XBbH2Sif1-Z0BGunCN4OFSIIzSyGSuqjHgQ,14546
|
|
53
53
|
fugue/extensions/creator/__init__.py,sha256=7qmrb0eRKbGSEvrb6045-5hkmjH-nT1GJo1qYubX158,188
|
|
54
54
|
fugue/extensions/creator/convert.py,sha256=66ei5x0K0a8nWQ-kNwZXmmf1VeR_6XuqSe9rZ64mYpI,7297
|
|
55
55
|
fugue/extensions/creator/creator.py,sha256=ET9yprY4mvw0kkfWtW4aQEvKv1VR-FiWT4SThvwgAyo,1541
|
|
@@ -57,18 +57,18 @@ fugue/extensions/outputter/__init__.py,sha256=j7jHSOB2xfnJlg9BdL9UHwAML4A77eI6kI
|
|
|
57
57
|
fugue/extensions/outputter/convert.py,sha256=dPyNELy6Sruv5_NJlMSPPIfBknTlRVZo0zCWAz5U16o,6939
|
|
58
58
|
fugue/extensions/outputter/outputter.py,sha256=n2Do4NKX7_uKkUQRrSjJAl6CYoifb9_zEaXTZIF_ZJQ,1619
|
|
59
59
|
fugue/extensions/processor/__init__.py,sha256=8ws8WSnLGCfTdsigoU0_xpaPY54vpo0V2O3Bw3W9cSg,204
|
|
60
|
-
fugue/extensions/processor/convert.py,sha256=
|
|
60
|
+
fugue/extensions/processor/convert.py,sha256=dtknMhHeAoXEeNIMZ0m-ySJofdb5JKL2eHY-pgBWOVQ,8303
|
|
61
61
|
fugue/extensions/processor/processor.py,sha256=czhQlQgMpAXXoLVAX9Q0TFUMYEEhsgufTammxcKSmOY,1665
|
|
62
62
|
fugue/extensions/transformer/__init__.py,sha256=VD6d-8xW1Yl8fUPj43cBWNR9pCOlYD9xWyGIHAlHwvI,456
|
|
63
63
|
fugue/extensions/transformer/constants.py,sha256=76DfpoTOGQ8gp5XtCs_xznfbr_H015-prXpHWSqMNDU,59
|
|
64
|
-
fugue/extensions/transformer/convert.py,sha256=
|
|
64
|
+
fugue/extensions/transformer/convert.py,sha256=QRAp1S2rm8uDJt69Wm7yzszJyD83IzFZEURrNCNOMuQ,23433
|
|
65
65
|
fugue/extensions/transformer/transformer.py,sha256=zhOUgyv5-DPxYd1CP_98WeEw-zUgwknRnPW_6di-q3g,9098
|
|
66
66
|
fugue/rpc/__init__.py,sha256=3GzUl4QZQuCChjD7eaTJW8tnTwfke6ZY9r9g5nCeBZ8,167
|
|
67
67
|
fugue/rpc/base.py,sha256=3Fq5SvwLZqw9NXru3r32WuJKBGFr9bl7nFgy6e9boGo,8470
|
|
68
68
|
fugue/rpc/flask.py,sha256=VzJEND8Pqatf6pYYT9LDXeO1JDMmYAOY0lm8XUncKbA,4807
|
|
69
69
|
fugue/sql/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
70
|
-
fugue/sql/_utils.py,sha256=
|
|
71
|
-
fugue/sql/_visitors.py,sha256=
|
|
70
|
+
fugue/sql/_utils.py,sha256=IOXhwDNnjtQI2uxhSjnEzMIFpe3k6B3T1RikPRYdf-M,2082
|
|
71
|
+
fugue/sql/_visitors.py,sha256=5m8msl86T1SYW5EyeX_NLVLTPUE_pCdRKt-dgMV1bYI,33843
|
|
72
72
|
fugue/sql/api.py,sha256=l2I9CAy_W2oFFTct9fDPLyXF0LiDxQhMx5O8jBHTAxU,10050
|
|
73
73
|
fugue/sql/workflow.py,sha256=S1pOhp0b0t6johFAJWmj6xUB7Ti5LQgNABpAzmLGjrQ,3010
|
|
74
74
|
fugue/test/__init__.py,sha256=hvVrNbJYkWI_6otpILneyTjUafxURaA4obK6AoDyCUw,250
|
|
@@ -76,23 +76,23 @@ fugue/test/pandas_tester.py,sha256=_w6rFqlzZKjBtmFf-08a4C97W5xtqGw5XorLhj6Zyoo,6
|
|
|
76
76
|
fugue/test/plugins.py,sha256=GLZia5GCmy0eBVGNbIqTbX7Ou3euf2SY4litKgdigwY,12318
|
|
77
77
|
fugue/workflow/__init__.py,sha256=tXM_KYO8Q358W6qAVlwhIQIaYNRDgZtTubrIEX4QMgM,229
|
|
78
78
|
fugue/workflow/_checkpoint.py,sha256=tt5Iv7c5ZStC0MD1inItksQ0GuK0ViniA3nvrgym-5c,5681
|
|
79
|
-
fugue/workflow/_tasks.py,sha256=
|
|
79
|
+
fugue/workflow/_tasks.py,sha256=Qezr3Gb-I2WifTjE2vC_YiAaXx6IDZKT3ka3NHqI2tU,11859
|
|
80
80
|
fugue/workflow/_workflow_context.py,sha256=Wmp6n0lSrh2Gpslb5EaSX6BQNniKsvKn6SlhVkQ6ui0,2504
|
|
81
81
|
fugue/workflow/api.py,sha256=uQoxPSCZ91-ST4vwuPWG7qioRGW4eo-Sgi3DdwtSL4k,12495
|
|
82
82
|
fugue/workflow/input.py,sha256=V_zLDNzndmQuYJAPXtdK4n-vOp7LrimGIf_wQtwf2mc,321
|
|
83
83
|
fugue/workflow/module.py,sha256=ajyqgMwX6hFMZY9xp4Bp1Q-Zdta0p5f_W_n_SNrc4LE,5547
|
|
84
|
-
fugue/workflow/workflow.py,sha256
|
|
85
|
-
fugue-0.9.
|
|
84
|
+
fugue/workflow/workflow.py,sha256=laQXwUEhrmCxYWVownMgjLLbcmc7BiNb2ajZfQ4FgZU,88213
|
|
85
|
+
fugue-0.9.7.dev0.dist-info/licenses/LICENSE,sha256=z8d0m5b2O9McPEK1xHG_dWgUBT6EfBDz6wA0F7xSPTA,11358
|
|
86
86
|
fugue_contrib/__init__.py,sha256=QJioX-r2AiU7Pvt24M-k2c4vNq29qpK-3WNUde7ucck,222
|
|
87
87
|
fugue_contrib/contrib.py,sha256=3B--6oIVBMZ-GwjIOXwZqYqkloH7Cxfq1I8vkwl2yPk,267
|
|
88
88
|
fugue_contrib/seaborn/__init__.py,sha256=NuVv8EI4Om4gHcHwYO8ddextLQqw24vDj8qJio3E1MU,1405
|
|
89
89
|
fugue_contrib/viz/__init__.py,sha256=osgZx63Br-yMZImyEfYf9MVzJNM2Cqqke_-WsuDmG5M,1273
|
|
90
90
|
fugue_contrib/viz/_ext.py,sha256=Lu_DlS5DcmrFz27fHcKTCkhKyknVWcfS5kzZVVuO9xM,1345
|
|
91
|
-
fugue_dask/__init__.py,sha256=
|
|
91
|
+
fugue_dask/__init__.py,sha256=OPgP3PUI1-RrAw9oNevlFI7c7friM5JcTVRgm74iFhA,194
|
|
92
92
|
fugue_dask/_constants.py,sha256=35UmTVITk21GhRyRlbJOwPPdQsytM_p_2NytOXEay18,510
|
|
93
93
|
fugue_dask/_dask_sql_wrapper.py,sha256=lj38gJIOdoMV9W44gpwzLjUEtPVsQNKjRWuEkfI7-PM,2618
|
|
94
94
|
fugue_dask/_io.py,sha256=pl4F7mbVgP7Rwh1FFG7xfOz2TBZRUj1l3lLvDY4jOf4,6020
|
|
95
|
-
fugue_dask/_utils.py,sha256=
|
|
95
|
+
fugue_dask/_utils.py,sha256=Z15myyjGbYpdHWZLmabuwi1SG40RNwfTmgjRWREHBLI,8864
|
|
96
96
|
fugue_dask/dataframe.py,sha256=4Dvckpc4mlld2WsEFTTemxoA1zYK8Cn6jMKxUxYQCEE,13491
|
|
97
97
|
fugue_dask/execution_engine.py,sha256=mFN_IurhdBEu8C5OreqpGSRdTbTBqSpzJO2dMQzEF-o,21264
|
|
98
98
|
fugue_dask/registry.py,sha256=jepWKH55VWNIWV3pOF5vpCl2OpO0rI1IULx5GM2Gk6w,2274
|
|
@@ -102,15 +102,15 @@ fugue_duckdb/_io.py,sha256=vnd8m8C6XeMCBJBbAdA5h695NMfsduQrvONyS0HcEFA,8475
|
|
|
102
102
|
fugue_duckdb/_utils.py,sha256=ElKbHUyn5fWSPGXsK57iqMzcqKtCf0c8pBVBYGe5Ql4,5020
|
|
103
103
|
fugue_duckdb/dask.py,sha256=agoLzeB7Swxj2kVWfmXFbWD1NS2lbbTlnrjSkR8kKWY,5014
|
|
104
104
|
fugue_duckdb/dataframe.py,sha256=LAPoPOad9hgGhjyhlMGMfrnhkyBKe06Xzn6eP1hkl-w,8504
|
|
105
|
-
fugue_duckdb/execution_engine.py,sha256
|
|
105
|
+
fugue_duckdb/execution_engine.py,sha256=-nuRZQ64SPCWehY54hX9h30jz_8UWQpvXBRKFqD6pwI,20453
|
|
106
106
|
fugue_duckdb/registry.py,sha256=9_41KO42kXqcjF4yParQ5JGyg5TckcbhH-Q2IlGpSho,3987
|
|
107
107
|
fugue_duckdb/tester.py,sha256=MzTkv3sdOwOjI59LRrSGGl4w59Njv3OArTU5kSRL-P0,1526
|
|
108
108
|
fugue_ibis/__init__.py,sha256=z7TkK7M2_0p9XO6jQATNDgT0aHXn5k69Ttz2ga-eQG8,190
|
|
109
109
|
fugue_ibis/_compat.py,sha256=zKdTaTfuC02eUIzZPkcd7oObnVBi_X5mQjQf7SDme3Y,246
|
|
110
|
-
fugue_ibis/_utils.py,sha256=
|
|
110
|
+
fugue_ibis/_utils.py,sha256=3e_ir6eigJO99_YJRHzRElsM-YXciUi15MhfBCDZ3sU,6609
|
|
111
111
|
fugue_ibis/dataframe.py,sha256=k4Q6qBLBIADF5YhbvaDplXO7OkMZSHuf_Wg5o-AusEI,7796
|
|
112
112
|
fugue_ibis/execution_engine.py,sha256=jRnp1m1wuTicS29A-WA043f8QwdoK8b9rwPXvTkm8r8,18751
|
|
113
|
-
fugue_notebook/__init__.py,sha256=
|
|
113
|
+
fugue_notebook/__init__.py,sha256=4VQQJweSb7ZKAtMtPsoyBPDO5K2MrS4xyHyR5d8pWbc,4358
|
|
114
114
|
fugue_notebook/env.py,sha256=TYiTxYPFi-BVJJY49jDsvw9mddhK8WrifeRxBke30I8,4773
|
|
115
115
|
fugue_notebook/nbextension/README.md,sha256=QLnr957YeGfwzy2r4c4qbZPaXyCbyGrKPvcqSBQYSnU,123
|
|
116
116
|
fugue_notebook/nbextension/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -129,29 +129,28 @@ fugue_ray/tester.py,sha256=oTA_xOzvQhJU3ohc4hsVpZc0zv4bwJn1c8a9u8kcuIs,537
|
|
|
129
129
|
fugue_ray/_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
130
130
|
fugue_ray/_utils/cluster.py,sha256=3T3Gyra6lAHlzktta-Ro35j6YZQfH6fNrj2hC5ATF9k,621
|
|
131
131
|
fugue_ray/_utils/dataframe.py,sha256=5c4duGV--mdLkKrbJRgjDWvVcp9BegA3yX16pmYDYLE,3954
|
|
132
|
-
fugue_ray/_utils/io.py,sha256=
|
|
133
|
-
fugue_spark/__init__.py,sha256=
|
|
132
|
+
fugue_ray/_utils/io.py,sha256=jt4A4bQXCbNGvuJblL41TjZO9wscIJcwMlp2VfrbXx8,10155
|
|
133
|
+
fugue_spark/__init__.py,sha256=mFhe1g4wdr6LeTkXJGe7G2yZ2mympB8hbSkWye9L9sA,198
|
|
134
134
|
fugue_spark/_constants.py,sha256=K2uLQfjvMxXk75K-7_Wn47Alpwq5rW57BtECAUrOeqA,177
|
|
135
|
-
fugue_spark/dataframe.py,sha256=
|
|
135
|
+
fugue_spark/dataframe.py,sha256=_gLP9rLVVCb5ILccvYMFDpCAdTAzedecqHKYGZEX0DE,11984
|
|
136
136
|
fugue_spark/execution_engine.py,sha256=YBMtNxCvpy77xICFSg9PHMa6feNoYhWEZe8MmxznX4U,33048
|
|
137
137
|
fugue_spark/registry.py,sha256=_NmiV2cOooYK0YmqATEnNkPEMT9suUMtuecw2NNbIIk,4530
|
|
138
138
|
fugue_spark/tester.py,sha256=VX003yGNlBukaZTQSN-w7XvgSk4rqxrWQIzno0dWrXg,2481
|
|
139
139
|
fugue_spark/_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
140
|
-
fugue_spark/_utils/convert.py,sha256=
|
|
140
|
+
fugue_spark/_utils/convert.py,sha256=c5YaJ6kfXd0DnFGqjlR8WGG5TyNKMPVaV2HzdfY-ix0,10201
|
|
141
141
|
fugue_spark/_utils/io.py,sha256=OdUezKpB29Lx9aUS2k9x0xUAGZrmgMZyQYGPEeHk7rQ,5574
|
|
142
142
|
fugue_spark/_utils/misc.py,sha256=lUo19PPwcYGSYj3L10stqf8eRW7lnwPoJ9Q3kUwvwY0,1206
|
|
143
143
|
fugue_spark/_utils/partition.py,sha256=iaesyO5f4uXhj1W-p91cD5ecPiGlu0bzh8gl2ce2Uvg,3618
|
|
144
|
-
fugue_sql/__init__.py,sha256=
|
|
144
|
+
fugue_sql/__init__.py,sha256=pzg-LiHE8n6j0vWysB3Gw_fyx5fHEw7Joja6T137Rrs,320
|
|
145
145
|
fugue_sql/exceptions.py,sha256=ltS0MC8gMnVVrJbQiOZ0kRUWvVQ2LTx33dCW3ugqtb0,260
|
|
146
146
|
fugue_test/__init__.py,sha256=xoQuVobhU64uyODRdnzf6MSWe9lw5khkhpJ2atvADoc,2315
|
|
147
147
|
fugue_test/bag_suite.py,sha256=WbDCFjuAHYoJh4GXSPiSJxOoOwE1VMtYpJ3lQrsUK-Y,2483
|
|
148
|
-
fugue_test/builtin_suite.py,sha256=
|
|
148
|
+
fugue_test/builtin_suite.py,sha256=n79zKh__qdtF67zBi3TCBoY5NC52JIzCWtlFPUkgsHc,79376
|
|
149
149
|
fugue_test/dataframe_suite.py,sha256=7ym4sshDUly6004cq1UlppqDVtbwxD6CKxR4Lu70i0s,18994
|
|
150
150
|
fugue_test/execution_suite.py,sha256=wUiGdb8wLRd13JXo7Lo19vPOLh7t1C-L2NPLeU0k-uE,48736
|
|
151
151
|
fugue_test/fixtures.py,sha256=8Pev-mxRZOWwTFlsGjcSZ0iIs78zyWbp5tq4KG1wyvk,1432
|
|
152
|
-
|
|
153
|
-
fugue-0.9.
|
|
154
|
-
fugue-0.9.
|
|
155
|
-
fugue-0.9.
|
|
156
|
-
fugue-0.9.
|
|
157
|
-
fugue-0.9.6.dist-info/RECORD,,
|
|
152
|
+
fugue-0.9.7.dev0.dist-info/METADATA,sha256=OwSSeQA85bd68u6E1S2mV-nP1REAWoZH8QTEg0i__Bo,18441
|
|
153
|
+
fugue-0.9.7.dev0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
154
|
+
fugue-0.9.7.dev0.dist-info/entry_points.txt,sha256=nlvV6bg8ZNdUE8WJrgHWkNBMiRnPaTWQA8Y7xfqSQVA,255
|
|
155
|
+
fugue-0.9.7.dev0.dist-info/top_level.txt,sha256=OCVNEONKPoAH4r8hUUSnvRHtZVnDu38FH44y3CRxtB8,126
|
|
156
|
+
fugue-0.9.7.dev0.dist-info/RECORD,,
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
[fugue.plugins]
|
|
2
|
+
dask = fugue_dask.registry
|
|
3
|
+
duckdb = fugue_duckdb.registry
|
|
4
|
+
ibis = fugue_ibis
|
|
5
|
+
polars = fugue_polars.registry
|
|
6
|
+
ray = fugue_ray.registry
|
|
7
|
+
spark = fugue_spark.registry
|
|
8
|
+
|
|
9
|
+
[pytest11]
|
|
10
|
+
fugue_test = fugue_test
|
|
11
|
+
fugue_test_fixtures = fugue_test.fixtures
|
fugue_dask/__init__.py
CHANGED
fugue_dask/_utils.py
CHANGED
|
@@ -55,8 +55,7 @@ def hash_repartition(df: dd.DataFrame, num: int, cols: List[Any]) -> dd.DataFram
|
|
|
55
55
|
if num == 1:
|
|
56
56
|
return df.repartition(npartitions=1)
|
|
57
57
|
df = df.reset_index(drop=True).clear_divisions()
|
|
58
|
-
|
|
59
|
-
return _postprocess(idf, ct, num)
|
|
58
|
+
return _add_hash_index(df, num, cols)
|
|
60
59
|
|
|
61
60
|
|
|
62
61
|
def even_repartition(df: dd.DataFrame, num: int, cols: List[Any]) -> dd.DataFrame:
|
|
@@ -81,13 +80,9 @@ def even_repartition(df: dd.DataFrame, num: int, cols: List[Any]) -> dd.DataFram
|
|
|
81
80
|
return df
|
|
82
81
|
df = df.reset_index(drop=True).clear_divisions()
|
|
83
82
|
if len(cols) == 0:
|
|
84
|
-
|
|
83
|
+
return _add_continuous_index(df, num=num)
|
|
85
84
|
else:
|
|
86
|
-
|
|
87
|
-
# when cols are set and num is not set, we use the number of groups
|
|
88
|
-
if num <= 0:
|
|
89
|
-
num = ct
|
|
90
|
-
return _postprocess(idf, ct, num)
|
|
85
|
+
return _add_group_index(df, cols, shuffle=False, num=num)
|
|
91
86
|
|
|
92
87
|
|
|
93
88
|
def rand_repartition(
|
|
@@ -114,25 +109,30 @@ def rand_repartition(
|
|
|
114
109
|
return df.repartition(npartitions=1)
|
|
115
110
|
df = df.reset_index(drop=True).clear_divisions()
|
|
116
111
|
if len(cols) == 0:
|
|
117
|
-
|
|
112
|
+
return _add_random_index(df, num=num, seed=seed)
|
|
118
113
|
else:
|
|
119
|
-
|
|
120
|
-
# when cols are set and num is not set, we use the number of groups
|
|
121
|
-
return _postprocess(idf, ct, num)
|
|
114
|
+
return _add_group_index(df, cols, shuffle=True, num=num, seed=seed)
|
|
122
115
|
|
|
123
116
|
|
|
124
|
-
def
|
|
125
|
-
|
|
117
|
+
def _safe_set_index(df: dd.DataFrame, key_ct: int, num_partitions: int) -> dd.DataFrame:
|
|
118
|
+
if num_partitions <= 0:
|
|
119
|
+
num_partitions = key_ct
|
|
120
|
+
parts = min(key_ct, num_partitions)
|
|
126
121
|
if parts <= 1:
|
|
127
|
-
return
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
122
|
+
return df.set_index(
|
|
123
|
+
_FUGUE_DASK_TEMP_IDX_COLUMN, drop=True, sort=True, npartitions=1
|
|
124
|
+
)
|
|
125
|
+
divisions = np.arange(key_ct, step=int(math.ceil(key_ct / parts))).tolist()
|
|
126
|
+
# divisions.append(ct - 1)
|
|
127
|
+
divisions.append(key_ct)
|
|
128
|
+
return df.set_index(
|
|
129
|
+
_FUGUE_DASK_TEMP_IDX_COLUMN, drop=True, sort=True, divisions=divisions
|
|
130
|
+
)
|
|
131
131
|
|
|
132
132
|
|
|
133
133
|
def _add_group_index(
|
|
134
|
-
df: dd.DataFrame, cols: List[str], shuffle: bool, seed: Any = None
|
|
135
|
-
) ->
|
|
134
|
+
df: dd.DataFrame, cols: List[str], shuffle: bool, num: int, seed: Any = None
|
|
135
|
+
) -> dd.DataFrame:
|
|
136
136
|
keys = df[cols].drop_duplicates().compute()
|
|
137
137
|
if shuffle:
|
|
138
138
|
keys = keys.sample(frac=1, random_state=seed)
|
|
@@ -140,12 +140,10 @@ def _add_group_index(
|
|
|
140
140
|
**{_FUGUE_DASK_TEMP_IDX_COLUMN: pd.Series(range(len(keys)), dtype=int)}
|
|
141
141
|
)
|
|
142
142
|
df = df.merge(dd.from_pandas(keys, npartitions=1), on=cols, broadcast=True)
|
|
143
|
-
return df
|
|
143
|
+
return _safe_set_index(df, len(keys), num)
|
|
144
144
|
|
|
145
145
|
|
|
146
|
-
def _add_hash_index(
|
|
147
|
-
df: dd.DataFrame, num: int, cols: List[str]
|
|
148
|
-
) -> Tuple[dd.DataFrame, int]:
|
|
146
|
+
def _add_hash_index(df: dd.DataFrame, num: int, cols: List[str]) -> dd.DataFrame:
|
|
149
147
|
if len(cols) == 0:
|
|
150
148
|
cols = list(df.columns)
|
|
151
149
|
|
|
@@ -165,13 +163,13 @@ def _add_hash_index(
|
|
|
165
163
|
orig_schema = list(df.dtypes.to_dict().items())
|
|
166
164
|
idf = df.map_partitions(
|
|
167
165
|
_add_hash, meta=orig_schema + [(_FUGUE_DASK_TEMP_IDX_COLUMN, int)]
|
|
168
|
-
)
|
|
169
|
-
return idf, num
|
|
166
|
+
)
|
|
167
|
+
return _safe_set_index(idf, num, num)
|
|
170
168
|
|
|
171
169
|
|
|
172
170
|
def _add_random_index(
|
|
173
171
|
df: dd.DataFrame, num: int, seed: Any = None
|
|
174
|
-
) ->
|
|
172
|
+
) -> dd.DataFrame: # pragma: no cover
|
|
175
173
|
def _add_rand(df: pd.DataFrame) -> pd.DataFrame:
|
|
176
174
|
if len(df) == 0:
|
|
177
175
|
return df.assign(**{_FUGUE_DASK_TEMP_IDX_COLUMN: pd.Series(dtype=int)})
|
|
@@ -184,11 +182,11 @@ def _add_random_index(
|
|
|
184
182
|
orig_schema = list(df.dtypes.to_dict().items())
|
|
185
183
|
idf = df.map_partitions(
|
|
186
184
|
_add_rand, meta=orig_schema + [(_FUGUE_DASK_TEMP_IDX_COLUMN, int)]
|
|
187
|
-
)
|
|
188
|
-
return idf, num
|
|
185
|
+
)
|
|
186
|
+
return _safe_set_index(idf, num, num)
|
|
189
187
|
|
|
190
188
|
|
|
191
|
-
def _add_continuous_index(df: dd.DataFrame) ->
|
|
189
|
+
def _add_continuous_index(df: dd.DataFrame, num: int) -> dd.DataFrame:
|
|
192
190
|
def _get_info(
|
|
193
191
|
df: pd.DataFrame, partition_info: Any
|
|
194
192
|
) -> pd.DataFrame: # pragma: no cover
|
|
@@ -216,8 +214,7 @@ def _add_continuous_index(df: dd.DataFrame) -> Tuple[dd.DataFrame, int]:
|
|
|
216
214
|
idf = df.map_partitions(
|
|
217
215
|
_add_index, meta=orig_schema + [(_FUGUE_DASK_TEMP_IDX_COLUMN, int)]
|
|
218
216
|
)
|
|
219
|
-
|
|
220
|
-
return idf, counts[-1]
|
|
217
|
+
return _safe_set_index(idf, counts[-1], num)
|
|
221
218
|
|
|
222
219
|
|
|
223
220
|
class DaskUtils(PandasLikeUtils[dd.DataFrame, dd.Series]):
|
|
@@ -255,7 +252,7 @@ class DaskUtils(PandasLikeUtils[dd.DataFrame, dd.Series]):
|
|
|
255
252
|
schema: pa.Schema,
|
|
256
253
|
use_extension_types: bool = True,
|
|
257
254
|
use_arrow_dtype: bool = False,
|
|
258
|
-
**kwargs: Any
|
|
255
|
+
**kwargs: Any,
|
|
259
256
|
) -> DataFrame:
|
|
260
257
|
output_dtypes = to_pandas_dtype(
|
|
261
258
|
schema,
|
|
@@ -268,7 +265,7 @@ class DaskUtils(PandasLikeUtils[dd.DataFrame, dd.Series]):
|
|
|
268
265
|
use_extension_types=use_extension_types,
|
|
269
266
|
use_arrow_dtype=use_arrow_dtype,
|
|
270
267
|
meta=output_dtypes,
|
|
271
|
-
**kwargs
|
|
268
|
+
**kwargs,
|
|
272
269
|
)
|
|
273
270
|
|
|
274
271
|
|
fugue_duckdb/execution_engine.py
CHANGED
|
@@ -64,7 +64,9 @@ class DuckDBEngine(SQLEngine):
|
|
|
64
64
|
if isinstance(self.execution_engine, DuckExecutionEngine):
|
|
65
65
|
con = self.execution_engine.connection
|
|
66
66
|
tdf: DuckDataFrame = _to_duck_df(
|
|
67
|
-
self.execution_engine,
|
|
67
|
+
self.execution_engine,
|
|
68
|
+
df,
|
|
69
|
+
create_view=False, # type: ignore
|
|
68
70
|
)
|
|
69
71
|
et = self._get_table(table)
|
|
70
72
|
if et is not None:
|
|
@@ -96,7 +98,9 @@ class DuckDBEngine(SQLEngine):
|
|
|
96
98
|
name_map: Dict[str, str] = {}
|
|
97
99
|
for k, v in dfs.items():
|
|
98
100
|
tdf: DuckDataFrame = _to_duck_df(
|
|
99
|
-
self.execution_engine,
|
|
101
|
+
self.execution_engine,
|
|
102
|
+
v,
|
|
103
|
+
create_view=True, # type: ignore
|
|
100
104
|
)
|
|
101
105
|
name_map[k] = tdf.alias
|
|
102
106
|
query = statement.construct(name_map, dialect=self.dialect, log=self.log)
|
|
@@ -415,7 +419,7 @@ class DuckExecutionEngine(ExecutionEngine):
|
|
|
415
419
|
)
|
|
416
420
|
tb = TempTableName()
|
|
417
421
|
if frac is not None:
|
|
418
|
-
sql = f"SELECT * FROM {tb} USING SAMPLE bernoulli({frac*100} PERCENT)"
|
|
422
|
+
sql = f"SELECT * FROM {tb} USING SAMPLE bernoulli({frac * 100} PERCENT)"
|
|
419
423
|
else:
|
|
420
424
|
sql = f"SELECT * FROM {tb} USING SAMPLE reservoir({n} ROWS)"
|
|
421
425
|
if seed is not None:
|
fugue_ibis/_utils.py
CHANGED
|
@@ -149,7 +149,7 @@ class LazyIbisAttr(LazyIbisObject):
|
|
|
149
149
|
self._super_lazy_internal_objs["parent"],
|
|
150
150
|
self._super_lazy_internal_objs["name"],
|
|
151
151
|
*args,
|
|
152
|
-
**kwargs
|
|
152
|
+
**kwargs,
|
|
153
153
|
)
|
|
154
154
|
|
|
155
155
|
|
|
@@ -196,7 +196,7 @@ def _materialize(obj: Any, context: Dict[int, Any]) -> Any:
|
|
|
196
196
|
)
|
|
197
197
|
v = f(
|
|
198
198
|
*_materialize(obj._super_lazy_internal_objs["args"], context),
|
|
199
|
-
**_materialize(obj._super_lazy_internal_objs["kwargs"], context)
|
|
199
|
+
**_materialize(obj._super_lazy_internal_objs["kwargs"], context),
|
|
200
200
|
)
|
|
201
201
|
elif isinstance(obj, LazyIbisAttr):
|
|
202
202
|
v = getattr(
|
fugue_notebook/__init__.py
CHANGED
fugue_ray/_utils/io.py
CHANGED
|
@@ -147,7 +147,8 @@ class RayIO(object):
|
|
|
147
147
|
f"prepartitioning by keys {by} is not supported by ray, will ignore"
|
|
148
148
|
)
|
|
149
149
|
return self._engine.repartition(
|
|
150
|
-
rdf,
|
|
150
|
+
rdf,
|
|
151
|
+
partition_spec=partition_spec, # type: ignore
|
|
151
152
|
)
|
|
152
153
|
|
|
153
154
|
def _load_parquet(
|
fugue_spark/__init__.py
CHANGED
fugue_spark/_utils/convert.py
CHANGED
|
@@ -81,8 +81,9 @@ def to_cast_expression(
|
|
|
81
81
|
name_match or allow_name_mismatch,
|
|
82
82
|
lambda: ValueError(f"schema name mismatch: {schema1}, {schema2}"),
|
|
83
83
|
)
|
|
84
|
-
n1, n2 =
|
|
85
|
-
|
|
84
|
+
n1, n2 = (
|
|
85
|
+
quote_name(schema1[i].name, quote="`"),
|
|
86
|
+
quote_name(schema2[i].name, quote="`"),
|
|
86
87
|
)
|
|
87
88
|
if schema1[i].dataType != schema2[i].dataType:
|
|
88
89
|
type2 = schema2[i].dataType.simpleString()
|
|
@@ -90,9 +91,7 @@ def to_cast_expression(
|
|
|
90
91
|
schema2[i].dataType, (pt.StringType, pt.IntegralType)
|
|
91
92
|
):
|
|
92
93
|
expr.append(
|
|
93
|
-
f"CAST(IF(isnan({n1}) OR {n1} IS NULL"
|
|
94
|
-
f", NULL, {n1})"
|
|
95
|
-
f" AS {type2}) {n2}"
|
|
94
|
+
f"CAST(IF(isnan({n1}) OR {n1} IS NULL, NULL, {n1}) AS {type2}) {n2}"
|
|
96
95
|
)
|
|
97
96
|
else:
|
|
98
97
|
expr.append(f"CAST({n1} AS {type2}) {n2}")
|
fugue_spark/dataframe.py
CHANGED
|
@@ -178,9 +178,7 @@ class SparkDataFrame(DataFrame):
|
|
|
178
178
|
self, n: int, columns: Optional[List[str]] = None
|
|
179
179
|
) -> LocalBoundedDataFrame:
|
|
180
180
|
sdf = self._select_columns(columns)
|
|
181
|
-
return SparkDataFrame(
|
|
182
|
-
sdf.native.limit(n), sdf.schema
|
|
183
|
-
).as_local() # type: ignore
|
|
181
|
+
return SparkDataFrame(sdf.native.limit(n), sdf.schema).as_local() # type: ignore
|
|
184
182
|
|
|
185
183
|
@property
|
|
186
184
|
def _first(self) -> Optional[List[Any]]:
|
fugue_sql/__init__.py
CHANGED
fugue_test/builtin_suite.py
CHANGED
|
@@ -53,9 +53,8 @@ from fugue import (
|
|
|
53
53
|
register_transformer,
|
|
54
54
|
transformer,
|
|
55
55
|
)
|
|
56
|
-
from fugue.column import col
|
|
56
|
+
from fugue.column import col, lit
|
|
57
57
|
from fugue.column import functions as ff
|
|
58
|
-
from fugue.column import lit
|
|
59
58
|
from fugue.exceptions import (
|
|
60
59
|
FugueInterfacelessError,
|
|
61
60
|
FugueWorkflowCompileError,
|
|
@@ -1,11 +0,0 @@
|
|
|
1
|
-
[fugue.plugins]
|
|
2
|
-
dask = fugue_dask.registry[dask]
|
|
3
|
-
duckdb = fugue_duckdb.registry[duckdb]
|
|
4
|
-
ibis = fugue_ibis[ibis]
|
|
5
|
-
polars = fugue_polars.registry[polars]
|
|
6
|
-
ray = fugue_ray.registry[ray]
|
|
7
|
-
spark = fugue_spark.registry[spark]
|
|
8
|
-
|
|
9
|
-
[pytest11]
|
|
10
|
-
fugue_test = fugue_test
|
|
11
|
-
fugue_test_fixtures = fugue_test.fixtures
|
fugue_version/__init__.py
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
__version__ = "0.9.6"
|
|
File without changes
|