fugue 0.8.2.dev1__py3-none-any.whl → 0.8.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fugue/__init__.py +9 -5
- fugue/_utils/interfaceless.py +1 -558
- fugue/_utils/io.py +2 -91
- fugue/_utils/registry.py +3 -2
- fugue/api.py +1 -0
- fugue/bag/bag.py +8 -4
- fugue/collections/__init__.py +0 -7
- fugue/collections/partition.py +21 -9
- fugue/constants.py +3 -1
- fugue/dataframe/__init__.py +7 -8
- fugue/dataframe/arrow_dataframe.py +1 -2
- fugue/dataframe/dataframe.py +17 -18
- fugue/dataframe/dataframe_iterable_dataframe.py +22 -6
- fugue/dataframe/function_wrapper.py +432 -0
- fugue/dataframe/iterable_dataframe.py +3 -0
- fugue/dataframe/utils.py +11 -79
- fugue/dataset/api.py +0 -4
- fugue/dev.py +47 -0
- fugue/execution/__init__.py +1 -5
- fugue/execution/api.py +36 -14
- fugue/execution/execution_engine.py +30 -4
- fugue/execution/factory.py +0 -6
- fugue/execution/native_execution_engine.py +44 -67
- fugue/extensions/_builtins/creators.py +4 -2
- fugue/extensions/_builtins/outputters.py +4 -3
- fugue/extensions/_builtins/processors.py +3 -3
- fugue/extensions/creator/convert.py +5 -2
- fugue/extensions/outputter/convert.py +2 -2
- fugue/extensions/processor/convert.py +3 -2
- fugue/extensions/transformer/convert.py +22 -9
- fugue/extensions/transformer/transformer.py +15 -1
- fugue/plugins.py +2 -0
- fugue/registry.py +0 -39
- fugue/sql/_utils.py +1 -1
- fugue/workflow/_checkpoint.py +1 -1
- fugue/workflow/api.py +13 -13
- fugue/workflow/module.py +30 -37
- fugue/workflow/workflow.py +6 -0
- {fugue-0.8.2.dev1.dist-info → fugue-0.8.4.dist-info}/METADATA +37 -23
- {fugue-0.8.2.dev1.dist-info → fugue-0.8.4.dist-info}/RECORD +112 -101
- {fugue-0.8.2.dev1.dist-info → fugue-0.8.4.dist-info}/WHEEL +1 -1
- {fugue-0.8.2.dev1.dist-info → fugue-0.8.4.dist-info}/entry_points.txt +2 -1
- {fugue-0.8.2.dev1.dist-info → fugue-0.8.4.dist-info}/top_level.txt +1 -0
- fugue_contrib/contrib.py +1 -0
- fugue_contrib/viz/_ext.py +7 -1
- fugue_dask/_io.py +0 -13
- fugue_dask/_utils.py +10 -4
- fugue_dask/dataframe.py +1 -2
- fugue_dask/execution_engine.py +45 -18
- fugue_dask/registry.py +8 -33
- fugue_duckdb/_io.py +8 -2
- fugue_duckdb/_utils.py +7 -2
- fugue_duckdb/dask.py +1 -1
- fugue_duckdb/dataframe.py +23 -19
- fugue_duckdb/execution_engine.py +19 -22
- fugue_duckdb/registry.py +11 -34
- fugue_ibis/dataframe.py +6 -10
- fugue_ibis/execution_engine.py +7 -1
- fugue_notebook/env.py +5 -10
- fugue_polars/__init__.py +2 -0
- fugue_polars/_utils.py +8 -0
- fugue_polars/polars_dataframe.py +234 -0
- fugue_polars/registry.py +86 -0
- fugue_ray/_constants.py +10 -1
- fugue_ray/_utils/dataframe.py +36 -9
- fugue_ray/_utils/io.py +2 -4
- fugue_ray/dataframe.py +16 -12
- fugue_ray/execution_engine.py +53 -32
- fugue_ray/registry.py +8 -32
- fugue_spark/_utils/convert.py +22 -11
- fugue_spark/_utils/io.py +0 -13
- fugue_spark/_utils/misc.py +27 -0
- fugue_spark/_utils/partition.py +11 -18
- fugue_spark/dataframe.py +26 -22
- fugue_spark/execution_engine.py +136 -54
- fugue_spark/registry.py +29 -78
- fugue_test/builtin_suite.py +36 -14
- fugue_test/dataframe_suite.py +9 -5
- fugue_test/execution_suite.py +100 -122
- fugue_version/__init__.py +1 -1
- tests/fugue/bag/test_array_bag.py +0 -9
- tests/fugue/collections/test_partition.py +10 -3
- tests/fugue/dataframe/test_function_wrapper.py +293 -0
- tests/fugue/dataframe/test_utils.py +2 -34
- tests/fugue/execution/test_factory.py +7 -9
- tests/fugue/execution/test_naive_execution_engine.py +35 -80
- tests/fugue/extensions/test_utils.py +12 -7
- tests/fugue/extensions/transformer/test_convert_cotransformer.py +1 -0
- tests/fugue/extensions/transformer/test_convert_output_cotransformer.py +1 -0
- tests/fugue/extensions/transformer/test_convert_transformer.py +2 -0
- tests/fugue/sql/test_workflow.py +1 -1
- tests/fugue/sql/test_workflow_parse.py +3 -5
- tests/fugue/utils/test_interfaceless.py +1 -325
- tests/fugue/utils/test_io.py +0 -80
- tests/fugue_dask/test_execution_engine.py +48 -0
- tests/fugue_dask/test_io.py +0 -55
- tests/fugue_duckdb/test_dataframe.py +2 -2
- tests/fugue_duckdb/test_execution_engine.py +16 -1
- tests/fugue_duckdb/test_utils.py +1 -1
- tests/fugue_ibis/test_dataframe.py +6 -3
- tests/fugue_polars/__init__.py +0 -0
- tests/fugue_polars/test_api.py +13 -0
- tests/fugue_polars/test_dataframe.py +82 -0
- tests/fugue_polars/test_transform.py +100 -0
- tests/fugue_ray/test_execution_engine.py +40 -4
- tests/fugue_spark/test_dataframe.py +0 -8
- tests/fugue_spark/test_execution_engine.py +50 -11
- tests/fugue_spark/test_importless.py +4 -4
- tests/fugue_spark/test_spark_connect.py +82 -0
- tests/fugue_spark/utils/test_convert.py +6 -8
- tests/fugue_spark/utils/test_io.py +0 -17
- fugue/_utils/register.py +0 -3
- fugue_test/_utils.py +0 -13
- {fugue-0.8.2.dev1.dist-info → fugue-0.8.4.dist-info}/LICENSE +0 -0
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import Any
|
|
1
|
+
from typing import Any, Optional
|
|
2
2
|
|
|
3
3
|
from fugue.dataframe import DataFrame, DataFrames, LocalDataFrame, ArrayDataFrame
|
|
4
4
|
from fugue.extensions.context import ExtensionContext
|
|
@@ -47,6 +47,13 @@ class Transformer(ExtensionContext):
|
|
|
47
47
|
"""
|
|
48
48
|
raise NotImplementedError
|
|
49
49
|
|
|
50
|
+
def get_format_hint(self) -> Optional[str]:
|
|
51
|
+
"""Get the transformer's preferred data format, for example it can be
|
|
52
|
+
``pandas``, ``pyarrow`` and None. This is to help the execution engine
|
|
53
|
+
use the most efficient way to execute the logic.
|
|
54
|
+
"""
|
|
55
|
+
return None
|
|
56
|
+
|
|
50
57
|
def on_init(self, df: DataFrame) -> None: # pragma: no cover
|
|
51
58
|
"""Callback for initializing
|
|
52
59
|
:ref:`physical partition that contains one or multiple logical partitions
|
|
@@ -147,6 +154,13 @@ class CoTransformer(ExtensionContext):
|
|
|
147
154
|
"""
|
|
148
155
|
raise NotImplementedError
|
|
149
156
|
|
|
157
|
+
def get_format_hint(self) -> Optional[str]: # pragma: no cover
|
|
158
|
+
"""Get the transformer's preferred data format, for example it can be
|
|
159
|
+
``pandas``, ``pyarrow`` and None. This is to help the execution engine
|
|
160
|
+
use the most efficient way to execute the logic.
|
|
161
|
+
"""
|
|
162
|
+
return None
|
|
163
|
+
|
|
150
164
|
def on_init(self, dfs: DataFrames) -> None: # pragma: no cover
|
|
151
165
|
"""Callback for initializing
|
|
152
166
|
:ref:`physical partition that contains one or multiple logical partitions
|
fugue/plugins.py
CHANGED
|
@@ -9,6 +9,7 @@ from fugue.dataframe import (
|
|
|
9
9
|
as_dict_iterable,
|
|
10
10
|
as_pandas,
|
|
11
11
|
drop_columns,
|
|
12
|
+
fugue_annotated_param,
|
|
12
13
|
get_column_names,
|
|
13
14
|
get_schema,
|
|
14
15
|
head,
|
|
@@ -29,6 +30,7 @@ from fugue.dataset import (
|
|
|
29
30
|
is_empty,
|
|
30
31
|
is_local,
|
|
31
32
|
)
|
|
33
|
+
from fugue.execution.api import as_fugue_engine_df
|
|
32
34
|
from fugue.execution.factory import (
|
|
33
35
|
infer_execution_engine,
|
|
34
36
|
parse_execution_engine,
|
fugue/registry.py
CHANGED
|
@@ -1,19 +1,7 @@
|
|
|
1
|
-
import inspect
|
|
2
|
-
from typing import Any, Optional
|
|
3
|
-
|
|
4
|
-
import pyarrow as pa
|
|
5
|
-
|
|
6
|
-
from fugue._utils.interfaceless import (
|
|
7
|
-
DataFrameParam,
|
|
8
|
-
SimpleAnnotationConverter,
|
|
9
|
-
register_annotation_converter,
|
|
10
|
-
)
|
|
11
|
-
from fugue.dataframe import ArrowDataFrame, DataFrame
|
|
12
1
|
from fugue.execution.factory import register_execution_engine, register_sql_engine
|
|
13
2
|
from fugue.execution.native_execution_engine import (
|
|
14
3
|
NativeExecutionEngine,
|
|
15
4
|
QPDPandasEngine,
|
|
16
|
-
SqliteEngine,
|
|
17
5
|
)
|
|
18
6
|
|
|
19
7
|
|
|
@@ -27,7 +15,6 @@ def _register() -> None:
|
|
|
27
15
|
>>> import fugue
|
|
28
16
|
"""
|
|
29
17
|
_register_engines()
|
|
30
|
-
_register_annotation_converters()
|
|
31
18
|
|
|
32
19
|
|
|
33
20
|
def _register_engines() -> None:
|
|
@@ -37,35 +24,9 @@ def _register_engines() -> None:
|
|
|
37
24
|
register_execution_engine(
|
|
38
25
|
"pandas", lambda conf: NativeExecutionEngine(conf), on_dup="ignore"
|
|
39
26
|
)
|
|
40
|
-
register_sql_engine("sqlite", lambda engine: SqliteEngine(engine), on_dup="ignore")
|
|
41
27
|
register_sql_engine(
|
|
42
28
|
"qpdpandas", lambda engine: QPDPandasEngine(engine), on_dup="ignore"
|
|
43
29
|
)
|
|
44
30
|
register_sql_engine(
|
|
45
31
|
"qpd_pandas", lambda engine: QPDPandasEngine(engine), on_dup="ignore"
|
|
46
32
|
)
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
def _register_annotation_converters() -> None:
|
|
50
|
-
register_annotation_converter(
|
|
51
|
-
0.8,
|
|
52
|
-
SimpleAnnotationConverter(
|
|
53
|
-
pa.Table,
|
|
54
|
-
lambda param: _PyArrowTableParam(param),
|
|
55
|
-
),
|
|
56
|
-
)
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
class _PyArrowTableParam(DataFrameParam):
|
|
60
|
-
def __init__(self, param: Optional[inspect.Parameter]):
|
|
61
|
-
super().__init__(param, annotation="Table")
|
|
62
|
-
|
|
63
|
-
def to_input_data(self, df: DataFrame, ctx: Any) -> Any:
|
|
64
|
-
return df.as_arrow()
|
|
65
|
-
|
|
66
|
-
def to_output_df(self, output: Any, schema: Any, ctx: Any) -> DataFrame:
|
|
67
|
-
assert isinstance(output, pa.Table)
|
|
68
|
-
return ArrowDataFrame(output, schema=schema)
|
|
69
|
-
|
|
70
|
-
def count(self, df: Any) -> int: # pragma: no cover
|
|
71
|
-
return df.count()
|
fugue/sql/_utils.py
CHANGED
|
@@ -5,7 +5,7 @@ import jinja2
|
|
|
5
5
|
from jinja2 import Template
|
|
6
6
|
from triad import assert_or_throw
|
|
7
7
|
|
|
8
|
-
from ..collections import Yielded
|
|
8
|
+
from ..collections.yielded import Yielded
|
|
9
9
|
from ..exceptions import FugueSQLError
|
|
10
10
|
from ..workflow.workflow import FugueWorkflow, WorkflowDataFrame
|
|
11
11
|
|
fugue/workflow/_checkpoint.py
CHANGED
|
@@ -166,7 +166,7 @@ class CheckpointPath(object):
|
|
|
166
166
|
|
|
167
167
|
def get_table_name(self, obj_id: str, permanent: bool) -> str:
|
|
168
168
|
path = self._path if permanent else self._temp_path
|
|
169
|
-
return to_uuid(path, obj_id)[:5]
|
|
169
|
+
return "temp_" + to_uuid(path, obj_id)[:5]
|
|
170
170
|
|
|
171
171
|
def temp_file_exists(self, path: str) -> bool:
|
|
172
172
|
try:
|
fugue/workflow/api.py
CHANGED
|
@@ -49,11 +49,11 @@ def transform( # noqa: C901
|
|
|
49
49
|
) -> Any:
|
|
50
50
|
"""Transform this dataframe using transformer. It's a wrapper of
|
|
51
51
|
:meth:`~fugue.workflow.workflow.FugueWorkflow.transform` and
|
|
52
|
-
:meth:`~fugue.workflow.workflow.FugueWorkflow.run`. It let you do
|
|
53
|
-
basic dataframe transformation without using
|
|
52
|
+
:meth:`~fugue.workflow.workflow.FugueWorkflow.run`. It will let you do
|
|
53
|
+
the basic dataframe transformation without using
|
|
54
54
|
:class:`~fugue.workflow.workflow.FugueWorkflow` and
|
|
55
|
-
:class:`~fugue.dataframe.dataframe.DataFrame`.
|
|
56
|
-
|
|
55
|
+
:class:`~fugue.dataframe.dataframe.DataFrame`. Also, only native
|
|
56
|
+
types are accepted for both input and output.
|
|
57
57
|
|
|
58
58
|
Please read |TransformerTutorial|
|
|
59
59
|
|
|
@@ -80,8 +80,8 @@ def transform( # noqa: C901
|
|
|
80
80
|
:param engine_conf: |ParamsLikeObject|, defaults to None
|
|
81
81
|
:param as_fugue: If true, the function will always return
|
|
82
82
|
a ``FugueDataFrame``, otherwise, if ``df`` is in native dataframe types such
|
|
83
|
-
as pandas dataframe, then the output will also in its native format.
|
|
84
|
-
to False
|
|
83
|
+
as pandas dataframe, then the output will also return in its native format.
|
|
84
|
+
Defaults to False
|
|
85
85
|
:param persist: Whether to persist(materialize) the dataframe before returning
|
|
86
86
|
:param as_local: If true, the result will be converted to a ``LocalDataFrame``
|
|
87
87
|
:param save_path: Whether to save the output to a file (see the note)
|
|
@@ -109,7 +109,7 @@ def transform( # noqa: C901
|
|
|
109
109
|
|
|
110
110
|
* When `save_path` is None and `checkpoint` is False, then the output will
|
|
111
111
|
not be saved into a file. The return will be a dataframe.
|
|
112
|
-
* When `save_path` is None and `checkpoint` is True, then the output
|
|
112
|
+
* When `save_path` is None and `checkpoint` is True, then the output is
|
|
113
113
|
saved into the path set by `fugue.workflow.checkpoint.path`, the name will
|
|
114
114
|
be randomly chosen, and it is NOT a deterministic checkpoint, so if you run
|
|
115
115
|
multiple times, the output will be saved into different files. The return
|
|
@@ -196,21 +196,21 @@ def out_transform(
|
|
|
196
196
|
) -> None:
|
|
197
197
|
"""Transform this dataframe using transformer. It's a wrapper of
|
|
198
198
|
:meth:`~fugue.workflow.workflow.FugueWorkflow.out_transform` and
|
|
199
|
-
:meth:`~fugue.workflow.workflow.FugueWorkflow.run`. It let you do the
|
|
199
|
+
:meth:`~fugue.workflow.workflow.FugueWorkflow.run`. It will let you do the
|
|
200
200
|
basic dataframe transformation without using
|
|
201
201
|
:class:`~fugue.workflow.workflow.FugueWorkflow` and
|
|
202
|
-
:class:`~fugue.dataframe.dataframe.DataFrame`.
|
|
203
|
-
|
|
202
|
+
:class:`~fugue.dataframe.dataframe.DataFrame`. Only native types are
|
|
203
|
+
accepted for both input and output.
|
|
204
204
|
|
|
205
205
|
Please read |TransformerTutorial|
|
|
206
206
|
|
|
207
207
|
:param df: |DataFrameLikeObject| or :class:`~fugue.workflow.yielded.Yielded`
|
|
208
208
|
or a path string to a parquet file
|
|
209
209
|
:param using: transformer-like object, can't be a string expression
|
|
210
|
-
:param params: |ParamsLikeObject| to run the processor, defaults to None
|
|
210
|
+
:param params: |ParamsLikeObject| to run the processor, defaults to None
|
|
211
211
|
The transformer will be able to access this value from
|
|
212
212
|
:meth:`~fugue.extensions.context.ExtensionContext.params`
|
|
213
|
-
:param partition: |PartitionLikeObject|, defaults to None
|
|
213
|
+
:param partition: |PartitionLikeObject|, defaults to None
|
|
214
214
|
:param callback: |RPCHandlerLikeObject|, defaults to None
|
|
215
215
|
:param ignore_errors: list of exception types the transformer can ignore,
|
|
216
216
|
defaults to None (empty list)
|
|
@@ -225,7 +225,7 @@ def out_transform(
|
|
|
225
225
|
|
|
226
226
|
.. note::
|
|
227
227
|
|
|
228
|
-
This function can only take parquet file paths in `df`.
|
|
228
|
+
This function can only take parquet file paths in `df`. CSV and JSON file
|
|
229
229
|
formats are disallowed.
|
|
230
230
|
|
|
231
231
|
This transformation is guaranteed to execute immediately (eager)
|
fugue/workflow/module.py
CHANGED
|
@@ -2,12 +2,18 @@ import copy
|
|
|
2
2
|
import inspect
|
|
3
3
|
from typing import Any, Callable, Dict, Iterable, Optional
|
|
4
4
|
|
|
5
|
-
from
|
|
6
|
-
from
|
|
7
|
-
|
|
5
|
+
from triad import extension_method
|
|
6
|
+
from triad.collections.function_wrapper import (
|
|
7
|
+
AnnotatedParam,
|
|
8
|
+
FunctionWrapper,
|
|
9
|
+
function_wrapper,
|
|
10
|
+
)
|
|
8
11
|
from triad.utils.assertion import assert_or_throw
|
|
9
12
|
from triad.utils.convert import get_caller_global_local_vars, to_function
|
|
10
|
-
|
|
13
|
+
|
|
14
|
+
from fugue.constants import FUGUE_ENTRYPOINT
|
|
15
|
+
from fugue.exceptions import FugueInterfacelessError
|
|
16
|
+
from fugue.workflow.workflow import FugueWorkflow, WorkflowDataFrame, WorkflowDataFrames
|
|
11
17
|
|
|
12
18
|
|
|
13
19
|
def module(
|
|
@@ -50,23 +56,9 @@ def _to_module(
|
|
|
50
56
|
raise FugueInterfacelessError(f"{obj} is not a valid module", exp)
|
|
51
57
|
|
|
52
58
|
|
|
53
|
-
|
|
54
|
-
def __init__(self, param: Optional[inspect.Parameter]):
|
|
55
|
-
super().__init__(param, "FugueWorkflow", "w")
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
class _WorkflowDataFrameParam(_FuncParam):
|
|
59
|
-
def __init__(self, param: Optional[inspect.Parameter]):
|
|
60
|
-
super().__init__(param, "WorkflowDataFrame", "v")
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
class _WorkflowDataFramesParam(_FuncParam):
|
|
64
|
-
def __init__(self, param: Optional[inspect.Parameter]):
|
|
65
|
-
super().__init__(param, "WorkflowDataFrame", "u")
|
|
66
|
-
|
|
67
|
-
|
|
59
|
+
@function_wrapper(FUGUE_ENTRYPOINT)
|
|
68
60
|
class _ModuleFunctionWrapper(FunctionWrapper):
|
|
69
|
-
def __init__(
|
|
61
|
+
def __init__( # pylint: disable-all
|
|
70
62
|
self,
|
|
71
63
|
func: Callable,
|
|
72
64
|
params_re: str = "^(w?(u|v+)|w(u?|v*))x*z?$",
|
|
@@ -153,20 +145,21 @@ class _ModuleFunctionWrapper(FunctionWrapper):
|
|
|
153
145
|
wf = v.workflow
|
|
154
146
|
return wf
|
|
155
147
|
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
148
|
+
|
|
149
|
+
@_ModuleFunctionWrapper.annotated_param(
|
|
150
|
+
FugueWorkflow,
|
|
151
|
+
"w",
|
|
152
|
+
matcher=lambda x: inspect.isclass(x) and issubclass(x, FugueWorkflow),
|
|
153
|
+
)
|
|
154
|
+
class _FugueWorkflowParam(AnnotatedParam):
|
|
155
|
+
pass
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
@_ModuleFunctionWrapper.annotated_param(WorkflowDataFrame, "v")
|
|
159
|
+
class _WorkflowDataFrameParam(AnnotatedParam):
|
|
160
|
+
pass
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
@_ModuleFunctionWrapper.annotated_param(WorkflowDataFrames, "u")
|
|
164
|
+
class _WorkflowDataFramesParam(AnnotatedParam):
|
|
165
|
+
pass
|
fugue/workflow/workflow.py
CHANGED
|
@@ -1348,6 +1348,12 @@ class WorkflowDataFrame(DataFrame):
|
|
|
1348
1348
|
"""
|
|
1349
1349
|
raise NotImplementedError("WorkflowDataFrame does not support this method")
|
|
1350
1350
|
|
|
1351
|
+
def as_local_bounded(self) -> DataFrame: # type: ignore # pragma: no cover
|
|
1352
|
+
"""
|
|
1353
|
+
:raises NotImplementedError: don't call this method
|
|
1354
|
+
"""
|
|
1355
|
+
raise NotImplementedError("WorkflowDataFrame does not support this method")
|
|
1356
|
+
|
|
1351
1357
|
@property
|
|
1352
1358
|
def is_bounded(self) -> bool: # pragma: no cover
|
|
1353
1359
|
"""
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: fugue
|
|
3
|
-
Version: 0.8.
|
|
3
|
+
Version: 0.8.4
|
|
4
4
|
Summary: An abstraction layer for distributed computation
|
|
5
5
|
Home-page: http://github.com/fugue-project/fugue
|
|
6
6
|
Author: The Fugue Development Team
|
|
@@ -20,17 +20,16 @@ Classifier: Programming Language :: Python :: 3.10
|
|
|
20
20
|
Classifier: Programming Language :: Python :: 3 :: Only
|
|
21
21
|
Requires-Python: >=3.7
|
|
22
22
|
Description-Content-Type: text/markdown
|
|
23
|
-
Requires-Dist: triad (>=0.8.
|
|
23
|
+
Requires-Dist: triad (>=0.8.8)
|
|
24
24
|
Requires-Dist: adagio (>=0.2.4)
|
|
25
|
-
Requires-Dist: qpd (>=0.4.
|
|
26
|
-
Requires-Dist: fugue-sql-antlr (>=0.1.
|
|
27
|
-
Requires-Dist: sqlalchemy
|
|
28
|
-
Requires-Dist: sqlglot
|
|
25
|
+
Requires-Dist: qpd (>=0.4.1)
|
|
26
|
+
Requires-Dist: fugue-sql-antlr (>=0.1.6)
|
|
29
27
|
Requires-Dist: pyarrow (>=0.15.1)
|
|
30
|
-
Requires-Dist: pandas (>=1.0
|
|
28
|
+
Requires-Dist: pandas (>=1.2.0)
|
|
29
|
+
Requires-Dist: sqlglot
|
|
31
30
|
Requires-Dist: jinja2
|
|
32
31
|
Provides-Extra: all
|
|
33
|
-
Requires-Dist: fugue-sql-antlr[cpp] (>=0.1.
|
|
32
|
+
Requires-Dist: fugue-sql-antlr[cpp] (>=0.1.6) ; extra == 'all'
|
|
34
33
|
Requires-Dist: pyspark ; extra == 'all'
|
|
35
34
|
Requires-Dist: ray[data] (>=2.0.0) ; extra == 'all'
|
|
36
35
|
Requires-Dist: qpd[dask] (>=0.4.0) ; extra == 'all'
|
|
@@ -39,14 +38,15 @@ Requires-Dist: jupyterlab ; extra == 'all'
|
|
|
39
38
|
Requires-Dist: ipython (>=7.10.0) ; extra == 'all'
|
|
40
39
|
Requires-Dist: duckdb (>=0.5.0) ; extra == 'all'
|
|
41
40
|
Requires-Dist: pyarrow (>=6.0.1) ; extra == 'all'
|
|
41
|
+
Requires-Dist: polars ; extra == 'all'
|
|
42
42
|
Requires-Dist: dask[dataframe,distributed] ; (python_version < "3.8") and extra == 'all'
|
|
43
43
|
Requires-Dist: ibis-framework (>=2.1.1) ; (python_version < "3.8") and extra == 'all'
|
|
44
44
|
Requires-Dist: dask[dataframe,distributed] (>=2022.9.0) ; (python_version >= "3.8") and extra == 'all'
|
|
45
45
|
Requires-Dist: ibis-framework (>=3.2.0) ; (python_version >= "3.8") and extra == 'all'
|
|
46
46
|
Provides-Extra: cpp_sql_parser
|
|
47
|
-
Requires-Dist: fugue-sql-antlr[cpp] (>=0.1.
|
|
47
|
+
Requires-Dist: fugue-sql-antlr[cpp] (>=0.1.6) ; extra == 'cpp_sql_parser'
|
|
48
48
|
Provides-Extra: dask
|
|
49
|
-
Requires-Dist: qpd[dask] (>=0.4.
|
|
49
|
+
Requires-Dist: qpd[dask] (>=0.4.1) ; extra == 'dask'
|
|
50
50
|
Requires-Dist: dask[dataframe,distributed] ; (python_version < "3.8") and extra == 'dask'
|
|
51
51
|
Requires-Dist: dask[dataframe,distributed] (>=2022.9.0) ; (python_version >= "3.8") and extra == 'dask'
|
|
52
52
|
Provides-Extra: duckdb
|
|
@@ -60,6 +60,8 @@ Provides-Extra: notebook
|
|
|
60
60
|
Requires-Dist: notebook ; extra == 'notebook'
|
|
61
61
|
Requires-Dist: jupyterlab ; extra == 'notebook'
|
|
62
62
|
Requires-Dist: ipython (>=7.10.0) ; extra == 'notebook'
|
|
63
|
+
Provides-Extra: polars
|
|
64
|
+
Requires-Dist: polars ; extra == 'polars'
|
|
63
65
|
Provides-Extra: ray
|
|
64
66
|
Requires-Dist: ray[data] (>=2.0.0) ; extra == 'ray'
|
|
65
67
|
Requires-Dist: duckdb (>=0.5.0) ; extra == 'ray'
|
|
@@ -76,9 +78,9 @@ Requires-Dist: pyspark ; extra == 'spark'
|
|
|
76
78
|
[](https://www.codacy.com/gh/fugue-project/fugue/dashboard?utm_source=github.com&utm_medium=referral&utm_content=fugue-project/fugue&utm_campaign=Badge_Grade)
|
|
77
79
|
[](https://pepy.tech/project/fugue)
|
|
78
80
|
|
|
79
|
-
| Tutorials
|
|
80
|
-
|
|
|
81
|
-
| [](https://fugue-tutorials.readthedocs.io/) | [](https://fugue.readthedocs.org)
|
|
81
|
+
| Tutorials | API Documentation | Chat with us on slack! |
|
|
82
|
+
| --------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------ |
|
|
83
|
+
| [](https://fugue-tutorials.readthedocs.io/) | [](https://fugue.readthedocs.org) | [](http://slack.fugue.ai) |
|
|
82
84
|
|
|
83
85
|
|
|
84
86
|
**Fugue is a unified interface for distributed computing that lets users execute Python, Pandas, and SQL code on Spark, Dask, and Ray with minimal rewrites**.
|
|
@@ -217,13 +219,14 @@ It also has the following installation extras:
|
|
|
217
219
|
* **dask**: to support Dask as the ExecutionEngine.
|
|
218
220
|
* **ray**: to support Ray as the ExecutionEngine.
|
|
219
221
|
* **duckdb**: to support DuckDB as the ExecutionEngine, read [details](https://fugue-tutorials.readthedocs.io/tutorials/integrations/backends/duckdb.html).
|
|
222
|
+
* **polars**: to support Polars DataFrames and extensions using Polars.
|
|
220
223
|
* **ibis**: to enable Ibis for Fugue workflows, read [details](https://fugue-tutorials.readthedocs.io/tutorials/integrations/backends/ibis.html).
|
|
221
224
|
* **cpp_sql_parser**: to enable the CPP antlr parser for Fugue SQL. It can be 50+ times faster than the pure Python parser. For the main Python versions and platforms, there is already pre-built binaries, but for the remaining, it needs a C++ compiler to build on the fly.
|
|
222
225
|
|
|
223
226
|
For example a common use case is:
|
|
224
227
|
|
|
225
228
|
```bash
|
|
226
|
-
pip install fugue[duckdb,spark]
|
|
229
|
+
pip install "fugue[duckdb,spark]"
|
|
227
230
|
```
|
|
228
231
|
|
|
229
232
|
Note if you already installed Spark or DuckDB independently, Fugue is able to automatically use them without installing the extras.
|
|
@@ -270,6 +273,7 @@ By being an abstraction layer, Fugue can be used with a lot of other open-source
|
|
|
270
273
|
Python backends:
|
|
271
274
|
|
|
272
275
|
* [Pandas](https://github.com/pandas-dev/pandas)
|
|
276
|
+
* [Polars](https://www.pola.rs) (DataFrames only)
|
|
273
277
|
* [Spark](https://github.com/apache/spark)
|
|
274
278
|
* [Dask](https://github.com/dask/dask)
|
|
275
279
|
* [Ray](http://github.com/ray-project/ray)
|
|
@@ -281,6 +285,7 @@ FugueSQL backends:
|
|
|
281
285
|
* [Duckdb](https://github.com/duckdb/duckdb) - in-process SQL OLAP database management
|
|
282
286
|
* [dask-sql](https://github.com/dask-contrib/dask-sql) - SQL interface for Dask
|
|
283
287
|
* SparkSQL
|
|
288
|
+
* BigQuery
|
|
284
289
|
|
|
285
290
|
|
|
286
291
|
Fugue is available as a backend or can integrate with the following projects:
|
|
@@ -291,23 +296,36 @@ Fugue is available as a backend or can integrate with the following projects:
|
|
|
291
296
|
* [Prefect](https://fugue-tutorials.readthedocs.io/tutorials/integrations/ecosystem/prefect.html) - workflow orchestration
|
|
292
297
|
* [Pandera](https://fugue-tutorials.readthedocs.io/tutorials/integrations/ecosystem/pandera.html) - data validation
|
|
293
298
|
|
|
299
|
+
Registered 3rd party extensions (majorly for Fugue SQL) include:
|
|
294
300
|
|
|
295
|
-
|
|
301
|
+
* [Pandas plot](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.plot.html) - visualize data using matplotlib or plotly
|
|
302
|
+
* [Seaborn](https://seaborn.pydata.org/api.html) - visualize data using seaborn
|
|
303
|
+
* [WhyLogs](https://whylogs.readthedocs.io/en/latest/examples/integrations/Fugue_Profiling.html?highlight=fugue) - visualize data profiling
|
|
304
|
+
* [Vizzu](https://github.com/vizzuhq/ipyvizzu) - visualize data using ipyvizzu
|
|
296
305
|
|
|
297
|
-
|
|
306
|
+
## Community and Contributing
|
|
307
|
+
|
|
308
|
+
Feel free to message us on [Slack](http://slack.fugue.ai). We also have [contributing instructions](CONTRIBUTING.md).
|
|
298
309
|
|
|
299
310
|
### Case Studies
|
|
300
311
|
|
|
301
312
|
* [How LyftLearn Democratizes Distributed Compute through Kubernetes Spark and Fugue](https://eng.lyft.com/how-lyftlearn-democratizes-distributed-compute-through-kubernetes-spark-and-fugue-c0875b97c3d9)
|
|
302
313
|
* [Clobotics - Large Scale Image Processing with Spark through Fugue](https://medium.com/fugue-project/large-scale-image-processing-with-spark-through-fugue-e510b9813da8)
|
|
303
314
|
|
|
315
|
+
### Mentioned Uses
|
|
316
|
+
|
|
317
|
+
* [Productionizing Data Science at Interos, Inc. (LinkedIn post by Anthony Holten)](https://www.linkedin.com/posts/anthony-holten_pandas-spark-dask-activity-7022628193983459328-QvcF)
|
|
318
|
+
|
|
319
|
+
* [Multiple Time Series Forecasting with Fugue & Nixtla at Bain & Company(LinkedIn post by Fahad Akbar)](https://www.linkedin.com/posts/fahadakbar_fugue-datascience-forecasting-activity-7041119034813124608-u08q?utm_source=share&utm_medium=member_desktop)
|
|
320
|
+
|
|
321
|
+
## Further Resources
|
|
322
|
+
|
|
323
|
+
View some of our latest conferences presentations and content. For a more complete list, check the [Content](https://fugue-tutorials.readthedocs.io/tutorials/resources/content.html) page in the tutorials.
|
|
324
|
+
|
|
304
325
|
### Blogs
|
|
305
326
|
|
|
306
327
|
* [Why Pandas-like Interfaces are Sub-optimal for Distributed Computing](https://towardsdatascience.com/why-pandas-like-interfaces-are-sub-optimal-for-distributed-computing-322dacbce43)
|
|
307
|
-
* [Interoperable Python and SQL in Jupyter Notebooks (Towards Data Science)](https://towardsdatascience.com/interoperable-python-and-sql-in-jupyter-notebooks-86245e711352)
|
|
308
|
-
* [Introducing Fugue - Reducing PySpark Developer Friction](https://towardsdatascience.com/introducing-fugue-reducing-pyspark-developer-friction-a702230455de)
|
|
309
328
|
* [Introducing FugueSQL — SQL for Pandas, Spark, and Dask DataFrames (Towards Data Science by Khuyen Tran)](https://towardsdatascience.com/introducing-fuguesql-sql-for-pandas-spark-and-dask-dataframes-63d461a16b27)
|
|
310
|
-
* [Using Pandera on Spark for Data Validation through Fugue (Towards Data Science)](https://towardsdatascience.com/using-pandera-on-spark-for-data-validation-through-fugue-72956f274793)
|
|
311
329
|
|
|
312
330
|
### Conferences
|
|
313
331
|
|
|
@@ -317,7 +335,3 @@ View some of our latest conferences presentations and content. For a more comple
|
|
|
317
335
|
* [FugueSQL - The Enhanced SQL Interface for Pandas, Spark, and Dask DataFrames (PyData Global)](https://www.youtube.com/watch?v=OBpnGYjNBBI)
|
|
318
336
|
* [Distributed Hybrid Parameter Tuning](https://www.youtube.com/watch?v=_GBjqskD8Qk)
|
|
319
337
|
|
|
320
|
-
## Community and Contributing
|
|
321
|
-
|
|
322
|
-
Feel free to message us on [Slack](http://slack.fugue.ai). We also have [contributing instructions](CONTRIBUTING.md).
|
|
323
|
-
|