fugue 0.8.2.dev1__py3-none-any.whl → 0.8.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (114) hide show
  1. fugue/__init__.py +9 -5
  2. fugue/_utils/interfaceless.py +1 -558
  3. fugue/_utils/io.py +2 -91
  4. fugue/_utils/registry.py +3 -2
  5. fugue/api.py +1 -0
  6. fugue/bag/bag.py +8 -4
  7. fugue/collections/__init__.py +0 -7
  8. fugue/collections/partition.py +21 -9
  9. fugue/constants.py +3 -1
  10. fugue/dataframe/__init__.py +7 -8
  11. fugue/dataframe/arrow_dataframe.py +1 -2
  12. fugue/dataframe/dataframe.py +17 -18
  13. fugue/dataframe/dataframe_iterable_dataframe.py +22 -6
  14. fugue/dataframe/function_wrapper.py +432 -0
  15. fugue/dataframe/iterable_dataframe.py +3 -0
  16. fugue/dataframe/utils.py +11 -79
  17. fugue/dataset/api.py +0 -4
  18. fugue/dev.py +47 -0
  19. fugue/execution/__init__.py +1 -5
  20. fugue/execution/api.py +36 -14
  21. fugue/execution/execution_engine.py +30 -4
  22. fugue/execution/factory.py +0 -6
  23. fugue/execution/native_execution_engine.py +44 -67
  24. fugue/extensions/_builtins/creators.py +4 -2
  25. fugue/extensions/_builtins/outputters.py +4 -3
  26. fugue/extensions/_builtins/processors.py +3 -3
  27. fugue/extensions/creator/convert.py +5 -2
  28. fugue/extensions/outputter/convert.py +2 -2
  29. fugue/extensions/processor/convert.py +3 -2
  30. fugue/extensions/transformer/convert.py +22 -9
  31. fugue/extensions/transformer/transformer.py +15 -1
  32. fugue/plugins.py +2 -0
  33. fugue/registry.py +0 -39
  34. fugue/sql/_utils.py +1 -1
  35. fugue/workflow/_checkpoint.py +1 -1
  36. fugue/workflow/api.py +13 -13
  37. fugue/workflow/module.py +30 -37
  38. fugue/workflow/workflow.py +6 -0
  39. {fugue-0.8.2.dev1.dist-info → fugue-0.8.4.dist-info}/METADATA +37 -23
  40. {fugue-0.8.2.dev1.dist-info → fugue-0.8.4.dist-info}/RECORD +112 -101
  41. {fugue-0.8.2.dev1.dist-info → fugue-0.8.4.dist-info}/WHEEL +1 -1
  42. {fugue-0.8.2.dev1.dist-info → fugue-0.8.4.dist-info}/entry_points.txt +2 -1
  43. {fugue-0.8.2.dev1.dist-info → fugue-0.8.4.dist-info}/top_level.txt +1 -0
  44. fugue_contrib/contrib.py +1 -0
  45. fugue_contrib/viz/_ext.py +7 -1
  46. fugue_dask/_io.py +0 -13
  47. fugue_dask/_utils.py +10 -4
  48. fugue_dask/dataframe.py +1 -2
  49. fugue_dask/execution_engine.py +45 -18
  50. fugue_dask/registry.py +8 -33
  51. fugue_duckdb/_io.py +8 -2
  52. fugue_duckdb/_utils.py +7 -2
  53. fugue_duckdb/dask.py +1 -1
  54. fugue_duckdb/dataframe.py +23 -19
  55. fugue_duckdb/execution_engine.py +19 -22
  56. fugue_duckdb/registry.py +11 -34
  57. fugue_ibis/dataframe.py +6 -10
  58. fugue_ibis/execution_engine.py +7 -1
  59. fugue_notebook/env.py +5 -10
  60. fugue_polars/__init__.py +2 -0
  61. fugue_polars/_utils.py +8 -0
  62. fugue_polars/polars_dataframe.py +234 -0
  63. fugue_polars/registry.py +86 -0
  64. fugue_ray/_constants.py +10 -1
  65. fugue_ray/_utils/dataframe.py +36 -9
  66. fugue_ray/_utils/io.py +2 -4
  67. fugue_ray/dataframe.py +16 -12
  68. fugue_ray/execution_engine.py +53 -32
  69. fugue_ray/registry.py +8 -32
  70. fugue_spark/_utils/convert.py +22 -11
  71. fugue_spark/_utils/io.py +0 -13
  72. fugue_spark/_utils/misc.py +27 -0
  73. fugue_spark/_utils/partition.py +11 -18
  74. fugue_spark/dataframe.py +26 -22
  75. fugue_spark/execution_engine.py +136 -54
  76. fugue_spark/registry.py +29 -78
  77. fugue_test/builtin_suite.py +36 -14
  78. fugue_test/dataframe_suite.py +9 -5
  79. fugue_test/execution_suite.py +100 -122
  80. fugue_version/__init__.py +1 -1
  81. tests/fugue/bag/test_array_bag.py +0 -9
  82. tests/fugue/collections/test_partition.py +10 -3
  83. tests/fugue/dataframe/test_function_wrapper.py +293 -0
  84. tests/fugue/dataframe/test_utils.py +2 -34
  85. tests/fugue/execution/test_factory.py +7 -9
  86. tests/fugue/execution/test_naive_execution_engine.py +35 -80
  87. tests/fugue/extensions/test_utils.py +12 -7
  88. tests/fugue/extensions/transformer/test_convert_cotransformer.py +1 -0
  89. tests/fugue/extensions/transformer/test_convert_output_cotransformer.py +1 -0
  90. tests/fugue/extensions/transformer/test_convert_transformer.py +2 -0
  91. tests/fugue/sql/test_workflow.py +1 -1
  92. tests/fugue/sql/test_workflow_parse.py +3 -5
  93. tests/fugue/utils/test_interfaceless.py +1 -325
  94. tests/fugue/utils/test_io.py +0 -80
  95. tests/fugue_dask/test_execution_engine.py +48 -0
  96. tests/fugue_dask/test_io.py +0 -55
  97. tests/fugue_duckdb/test_dataframe.py +2 -2
  98. tests/fugue_duckdb/test_execution_engine.py +16 -1
  99. tests/fugue_duckdb/test_utils.py +1 -1
  100. tests/fugue_ibis/test_dataframe.py +6 -3
  101. tests/fugue_polars/__init__.py +0 -0
  102. tests/fugue_polars/test_api.py +13 -0
  103. tests/fugue_polars/test_dataframe.py +82 -0
  104. tests/fugue_polars/test_transform.py +100 -0
  105. tests/fugue_ray/test_execution_engine.py +40 -4
  106. tests/fugue_spark/test_dataframe.py +0 -8
  107. tests/fugue_spark/test_execution_engine.py +50 -11
  108. tests/fugue_spark/test_importless.py +4 -4
  109. tests/fugue_spark/test_spark_connect.py +82 -0
  110. tests/fugue_spark/utils/test_convert.py +6 -8
  111. tests/fugue_spark/utils/test_io.py +0 -17
  112. fugue/_utils/register.py +0 -3
  113. fugue_test/_utils.py +0 -13
  114. {fugue-0.8.2.dev1.dist-info → fugue-0.8.4.dist-info}/LICENSE +0 -0
@@ -1,4 +1,4 @@
1
- from typing import Any
1
+ from typing import Any, Optional
2
2
 
3
3
  from fugue.dataframe import DataFrame, DataFrames, LocalDataFrame, ArrayDataFrame
4
4
  from fugue.extensions.context import ExtensionContext
@@ -47,6 +47,13 @@ class Transformer(ExtensionContext):
47
47
  """
48
48
  raise NotImplementedError
49
49
 
50
+ def get_format_hint(self) -> Optional[str]:
51
+ """Get the transformer's preferred data format, for example it can be
52
+ ``pandas``, ``pyarrow`` and None. This is to help the execution engine
53
+ use the most efficient way to execute the logic.
54
+ """
55
+ return None
56
+
50
57
  def on_init(self, df: DataFrame) -> None: # pragma: no cover
51
58
  """Callback for initializing
52
59
  :ref:`physical partition that contains one or multiple logical partitions
@@ -147,6 +154,13 @@ class CoTransformer(ExtensionContext):
147
154
  """
148
155
  raise NotImplementedError
149
156
 
157
+ def get_format_hint(self) -> Optional[str]: # pragma: no cover
158
+ """Get the transformer's preferred data format, for example it can be
159
+ ``pandas``, ``pyarrow`` and None. This is to help the execution engine
160
+ use the most efficient way to execute the logic.
161
+ """
162
+ return None
163
+
150
164
  def on_init(self, dfs: DataFrames) -> None: # pragma: no cover
151
165
  """Callback for initializing
152
166
  :ref:`physical partition that contains one or multiple logical partitions
fugue/plugins.py CHANGED
@@ -9,6 +9,7 @@ from fugue.dataframe import (
9
9
  as_dict_iterable,
10
10
  as_pandas,
11
11
  drop_columns,
12
+ fugue_annotated_param,
12
13
  get_column_names,
13
14
  get_schema,
14
15
  head,
@@ -29,6 +30,7 @@ from fugue.dataset import (
29
30
  is_empty,
30
31
  is_local,
31
32
  )
33
+ from fugue.execution.api import as_fugue_engine_df
32
34
  from fugue.execution.factory import (
33
35
  infer_execution_engine,
34
36
  parse_execution_engine,
fugue/registry.py CHANGED
@@ -1,19 +1,7 @@
1
- import inspect
2
- from typing import Any, Optional
3
-
4
- import pyarrow as pa
5
-
6
- from fugue._utils.interfaceless import (
7
- DataFrameParam,
8
- SimpleAnnotationConverter,
9
- register_annotation_converter,
10
- )
11
- from fugue.dataframe import ArrowDataFrame, DataFrame
12
1
  from fugue.execution.factory import register_execution_engine, register_sql_engine
13
2
  from fugue.execution.native_execution_engine import (
14
3
  NativeExecutionEngine,
15
4
  QPDPandasEngine,
16
- SqliteEngine,
17
5
  )
18
6
 
19
7
 
@@ -27,7 +15,6 @@ def _register() -> None:
27
15
  >>> import fugue
28
16
  """
29
17
  _register_engines()
30
- _register_annotation_converters()
31
18
 
32
19
 
33
20
  def _register_engines() -> None:
@@ -37,35 +24,9 @@ def _register_engines() -> None:
37
24
  register_execution_engine(
38
25
  "pandas", lambda conf: NativeExecutionEngine(conf), on_dup="ignore"
39
26
  )
40
- register_sql_engine("sqlite", lambda engine: SqliteEngine(engine), on_dup="ignore")
41
27
  register_sql_engine(
42
28
  "qpdpandas", lambda engine: QPDPandasEngine(engine), on_dup="ignore"
43
29
  )
44
30
  register_sql_engine(
45
31
  "qpd_pandas", lambda engine: QPDPandasEngine(engine), on_dup="ignore"
46
32
  )
47
-
48
-
49
- def _register_annotation_converters() -> None:
50
- register_annotation_converter(
51
- 0.8,
52
- SimpleAnnotationConverter(
53
- pa.Table,
54
- lambda param: _PyArrowTableParam(param),
55
- ),
56
- )
57
-
58
-
59
- class _PyArrowTableParam(DataFrameParam):
60
- def __init__(self, param: Optional[inspect.Parameter]):
61
- super().__init__(param, annotation="Table")
62
-
63
- def to_input_data(self, df: DataFrame, ctx: Any) -> Any:
64
- return df.as_arrow()
65
-
66
- def to_output_df(self, output: Any, schema: Any, ctx: Any) -> DataFrame:
67
- assert isinstance(output, pa.Table)
68
- return ArrowDataFrame(output, schema=schema)
69
-
70
- def count(self, df: Any) -> int: # pragma: no cover
71
- return df.count()
fugue/sql/_utils.py CHANGED
@@ -5,7 +5,7 @@ import jinja2
5
5
  from jinja2 import Template
6
6
  from triad import assert_or_throw
7
7
 
8
- from ..collections import Yielded
8
+ from ..collections.yielded import Yielded
9
9
  from ..exceptions import FugueSQLError
10
10
  from ..workflow.workflow import FugueWorkflow, WorkflowDataFrame
11
11
 
@@ -166,7 +166,7 @@ class CheckpointPath(object):
166
166
 
167
167
  def get_table_name(self, obj_id: str, permanent: bool) -> str:
168
168
  path = self._path if permanent else self._temp_path
169
- return to_uuid(path, obj_id)[:5]
169
+ return "temp_" + to_uuid(path, obj_id)[:5]
170
170
 
171
171
  def temp_file_exists(self, path: str) -> bool:
172
172
  try:
fugue/workflow/api.py CHANGED
@@ -49,11 +49,11 @@ def transform( # noqa: C901
49
49
  ) -> Any:
50
50
  """Transform this dataframe using transformer. It's a wrapper of
51
51
  :meth:`~fugue.workflow.workflow.FugueWorkflow.transform` and
52
- :meth:`~fugue.workflow.workflow.FugueWorkflow.run`. It let you do the
53
- basic dataframe transformation without using
52
+ :meth:`~fugue.workflow.workflow.FugueWorkflow.run`. It will let you do
53
+ the basic dataframe transformation without using
54
54
  :class:`~fugue.workflow.workflow.FugueWorkflow` and
55
- :class:`~fugue.dataframe.dataframe.DataFrame`. Both input and output
56
- can be native types only.
55
+ :class:`~fugue.dataframe.dataframe.DataFrame`. Also, only native
56
+ types are accepted for both input and output.
57
57
 
58
58
  Please read |TransformerTutorial|
59
59
 
@@ -80,8 +80,8 @@ def transform( # noqa: C901
80
80
  :param engine_conf: |ParamsLikeObject|, defaults to None
81
81
  :param as_fugue: If true, the function will always return
82
82
  a ``FugueDataFrame``, otherwise, if ``df`` is in native dataframe types such
83
- as pandas dataframe, then the output will also in its native format. Defaults
84
- to False
83
+ as pandas dataframe, then the output will also return in its native format.
84
+ Defaults to False
85
85
  :param persist: Whether to persist(materialize) the dataframe before returning
86
86
  :param as_local: If true, the result will be converted to a ``LocalDataFrame``
87
87
  :param save_path: Whether to save the output to a file (see the note)
@@ -109,7 +109,7 @@ def transform( # noqa: C901
109
109
 
110
110
  * When `save_path` is None and `checkpoint` is False, then the output will
111
111
  not be saved into a file. The return will be a dataframe.
112
- * When `save_path` is None and `checkpoint` is True, then the output will be
112
+ * When `save_path` is None and `checkpoint` is True, then the output is
113
113
  saved into the path set by `fugue.workflow.checkpoint.path`, the name will
114
114
  be randomly chosen, and it is NOT a deterministic checkpoint, so if you run
115
115
  multiple times, the output will be saved into different files. The return
@@ -196,21 +196,21 @@ def out_transform(
196
196
  ) -> None:
197
197
  """Transform this dataframe using transformer. It's a wrapper of
198
198
  :meth:`~fugue.workflow.workflow.FugueWorkflow.out_transform` and
199
- :meth:`~fugue.workflow.workflow.FugueWorkflow.run`. It let you do the
199
+ :meth:`~fugue.workflow.workflow.FugueWorkflow.run`. It will let you do the
200
200
  basic dataframe transformation without using
201
201
  :class:`~fugue.workflow.workflow.FugueWorkflow` and
202
- :class:`~fugue.dataframe.dataframe.DataFrame`. The input can be native
203
- type only
202
+ :class:`~fugue.dataframe.dataframe.DataFrame`. Only native types are
203
+ accepted for both input and output.
204
204
 
205
205
  Please read |TransformerTutorial|
206
206
 
207
207
  :param df: |DataFrameLikeObject| or :class:`~fugue.workflow.yielded.Yielded`
208
208
  or a path string to a parquet file
209
209
  :param using: transformer-like object, can't be a string expression
210
- :param params: |ParamsLikeObject| to run the processor, defaults to None.
210
+ :param params: |ParamsLikeObject| to run the processor, defaults to None
211
211
  The transformer will be able to access this value from
212
212
  :meth:`~fugue.extensions.context.ExtensionContext.params`
213
- :param partition: |PartitionLikeObject|, defaults to None.
213
+ :param partition: |PartitionLikeObject|, defaults to None
214
214
  :param callback: |RPCHandlerLikeObject|, defaults to None
215
215
  :param ignore_errors: list of exception types the transformer can ignore,
216
216
  defaults to None (empty list)
@@ -225,7 +225,7 @@ def out_transform(
225
225
 
226
226
  .. note::
227
227
 
228
- This function can only take parquet file paths in `df`. Csv and other file
228
+ This function can only take parquet file paths in `df`. CSV and JSON file
229
229
  formats are disallowed.
230
230
 
231
231
  This transformation is guaranteed to execute immediately (eager)
fugue/workflow/module.py CHANGED
@@ -2,12 +2,18 @@ import copy
2
2
  import inspect
3
3
  from typing import Any, Callable, Dict, Iterable, Optional
4
4
 
5
- from fugue._utils.interfaceless import FunctionWrapper, _FuncParam
6
- from fugue.exceptions import FugueInterfacelessError
7
- from fugue.workflow.workflow import FugueWorkflow, WorkflowDataFrame, WorkflowDataFrames
5
+ from triad import extension_method
6
+ from triad.collections.function_wrapper import (
7
+ AnnotatedParam,
8
+ FunctionWrapper,
9
+ function_wrapper,
10
+ )
8
11
  from triad.utils.assertion import assert_or_throw
9
12
  from triad.utils.convert import get_caller_global_local_vars, to_function
10
- from triad import extension_method
13
+
14
+ from fugue.constants import FUGUE_ENTRYPOINT
15
+ from fugue.exceptions import FugueInterfacelessError
16
+ from fugue.workflow.workflow import FugueWorkflow, WorkflowDataFrame, WorkflowDataFrames
11
17
 
12
18
 
13
19
  def module(
@@ -50,23 +56,9 @@ def _to_module(
50
56
  raise FugueInterfacelessError(f"{obj} is not a valid module", exp)
51
57
 
52
58
 
53
- class _FugueWorkflowParam(_FuncParam):
54
- def __init__(self, param: Optional[inspect.Parameter]):
55
- super().__init__(param, "FugueWorkflow", "w")
56
-
57
-
58
- class _WorkflowDataFrameParam(_FuncParam):
59
- def __init__(self, param: Optional[inspect.Parameter]):
60
- super().__init__(param, "WorkflowDataFrame", "v")
61
-
62
-
63
- class _WorkflowDataFramesParam(_FuncParam):
64
- def __init__(self, param: Optional[inspect.Parameter]):
65
- super().__init__(param, "WorkflowDataFrame", "u")
66
-
67
-
59
+ @function_wrapper(FUGUE_ENTRYPOINT)
68
60
  class _ModuleFunctionWrapper(FunctionWrapper):
69
- def __init__(
61
+ def __init__( # pylint: disable-all
70
62
  self,
71
63
  func: Callable,
72
64
  params_re: str = "^(w?(u|v+)|w(u?|v*))x*z?$",
@@ -153,20 +145,21 @@ class _ModuleFunctionWrapper(FunctionWrapper):
153
145
  wf = v.workflow
154
146
  return wf
155
147
 
156
- def _parse_param(
157
- self,
158
- annotation: Any,
159
- param: Optional[inspect.Parameter],
160
- none_as_other: bool = True,
161
- ) -> _FuncParam:
162
- if issubclass(annotation, FugueWorkflow):
163
- # to prevent cyclic import
164
- return _FugueWorkflowParam(param)
165
- elif annotation == WorkflowDataFrame:
166
- # to prevent cyclic import
167
- return _WorkflowDataFrameParam(param)
168
- elif annotation == WorkflowDataFrames:
169
- # to prevent cyclic import
170
- return _WorkflowDataFramesParam(param)
171
- else:
172
- return super()._parse_param(annotation, param, none_as_other)
148
+
149
+ @_ModuleFunctionWrapper.annotated_param(
150
+ FugueWorkflow,
151
+ "w",
152
+ matcher=lambda x: inspect.isclass(x) and issubclass(x, FugueWorkflow),
153
+ )
154
+ class _FugueWorkflowParam(AnnotatedParam):
155
+ pass
156
+
157
+
158
+ @_ModuleFunctionWrapper.annotated_param(WorkflowDataFrame, "v")
159
+ class _WorkflowDataFrameParam(AnnotatedParam):
160
+ pass
161
+
162
+
163
+ @_ModuleFunctionWrapper.annotated_param(WorkflowDataFrames, "u")
164
+ class _WorkflowDataFramesParam(AnnotatedParam):
165
+ pass
@@ -1348,6 +1348,12 @@ class WorkflowDataFrame(DataFrame):
1348
1348
  """
1349
1349
  raise NotImplementedError("WorkflowDataFrame does not support this method")
1350
1350
 
1351
+ def as_local_bounded(self) -> DataFrame: # type: ignore # pragma: no cover
1352
+ """
1353
+ :raises NotImplementedError: don't call this method
1354
+ """
1355
+ raise NotImplementedError("WorkflowDataFrame does not support this method")
1356
+
1351
1357
  @property
1352
1358
  def is_bounded(self) -> bool: # pragma: no cover
1353
1359
  """
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: fugue
3
- Version: 0.8.2.dev1
3
+ Version: 0.8.4
4
4
  Summary: An abstraction layer for distributed computation
5
5
  Home-page: http://github.com/fugue-project/fugue
6
6
  Author: The Fugue Development Team
@@ -20,17 +20,16 @@ Classifier: Programming Language :: Python :: 3.10
20
20
  Classifier: Programming Language :: Python :: 3 :: Only
21
21
  Requires-Python: >=3.7
22
22
  Description-Content-Type: text/markdown
23
- Requires-Dist: triad (>=0.8.1)
23
+ Requires-Dist: triad (>=0.8.8)
24
24
  Requires-Dist: adagio (>=0.2.4)
25
- Requires-Dist: qpd (>=0.4.0)
26
- Requires-Dist: fugue-sql-antlr (>=0.1.5)
27
- Requires-Dist: sqlalchemy
28
- Requires-Dist: sqlglot
25
+ Requires-Dist: qpd (>=0.4.1)
26
+ Requires-Dist: fugue-sql-antlr (>=0.1.6)
29
27
  Requires-Dist: pyarrow (>=0.15.1)
30
- Requires-Dist: pandas (>=1.0.2)
28
+ Requires-Dist: pandas (>=1.2.0)
29
+ Requires-Dist: sqlglot
31
30
  Requires-Dist: jinja2
32
31
  Provides-Extra: all
33
- Requires-Dist: fugue-sql-antlr[cpp] (>=0.1.5) ; extra == 'all'
32
+ Requires-Dist: fugue-sql-antlr[cpp] (>=0.1.6) ; extra == 'all'
34
33
  Requires-Dist: pyspark ; extra == 'all'
35
34
  Requires-Dist: ray[data] (>=2.0.0) ; extra == 'all'
36
35
  Requires-Dist: qpd[dask] (>=0.4.0) ; extra == 'all'
@@ -39,14 +38,15 @@ Requires-Dist: jupyterlab ; extra == 'all'
39
38
  Requires-Dist: ipython (>=7.10.0) ; extra == 'all'
40
39
  Requires-Dist: duckdb (>=0.5.0) ; extra == 'all'
41
40
  Requires-Dist: pyarrow (>=6.0.1) ; extra == 'all'
41
+ Requires-Dist: polars ; extra == 'all'
42
42
  Requires-Dist: dask[dataframe,distributed] ; (python_version < "3.8") and extra == 'all'
43
43
  Requires-Dist: ibis-framework (>=2.1.1) ; (python_version < "3.8") and extra == 'all'
44
44
  Requires-Dist: dask[dataframe,distributed] (>=2022.9.0) ; (python_version >= "3.8") and extra == 'all'
45
45
  Requires-Dist: ibis-framework (>=3.2.0) ; (python_version >= "3.8") and extra == 'all'
46
46
  Provides-Extra: cpp_sql_parser
47
- Requires-Dist: fugue-sql-antlr[cpp] (>=0.1.5) ; extra == 'cpp_sql_parser'
47
+ Requires-Dist: fugue-sql-antlr[cpp] (>=0.1.6) ; extra == 'cpp_sql_parser'
48
48
  Provides-Extra: dask
49
- Requires-Dist: qpd[dask] (>=0.4.0) ; extra == 'dask'
49
+ Requires-Dist: qpd[dask] (>=0.4.1) ; extra == 'dask'
50
50
  Requires-Dist: dask[dataframe,distributed] ; (python_version < "3.8") and extra == 'dask'
51
51
  Requires-Dist: dask[dataframe,distributed] (>=2022.9.0) ; (python_version >= "3.8") and extra == 'dask'
52
52
  Provides-Extra: duckdb
@@ -60,6 +60,8 @@ Provides-Extra: notebook
60
60
  Requires-Dist: notebook ; extra == 'notebook'
61
61
  Requires-Dist: jupyterlab ; extra == 'notebook'
62
62
  Requires-Dist: ipython (>=7.10.0) ; extra == 'notebook'
63
+ Provides-Extra: polars
64
+ Requires-Dist: polars ; extra == 'polars'
63
65
  Provides-Extra: ray
64
66
  Requires-Dist: ray[data] (>=2.0.0) ; extra == 'ray'
65
67
  Requires-Dist: duckdb (>=0.5.0) ; extra == 'ray'
@@ -76,9 +78,9 @@ Requires-Dist: pyspark ; extra == 'spark'
76
78
  [![Codacy Badge](https://app.codacy.com/project/badge/Grade/4fa5f2f53e6f48aaa1218a89f4808b91)](https://www.codacy.com/gh/fugue-project/fugue/dashboard?utm_source=github.com&utm_medium=referral&utm_content=fugue-project/fugue&utm_campaign=Badge_Grade)
77
79
  [![Downloads](https://pepy.tech/badge/fugue)](https://pepy.tech/project/fugue)
78
80
 
79
- | Tutorials | API Documentation | Chat with us on slack! |
80
- | --- | --- | --- |
81
- | [![Jupyter Book Badge](https://jupyterbook.org/badge.svg)](https://fugue-tutorials.readthedocs.io/) | [![Doc](https://readthedocs.org/projects/fugue/badge)](https://fugue.readthedocs.org) | [![Slack Status](https://img.shields.io/badge/slack-join_chat-white.svg?logo=slack&style=social)](http://slack.fugue.ai) |
81
+ | Tutorials | API Documentation | Chat with us on slack! |
82
+ | --------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------ |
83
+ | [![Jupyter Book Badge](https://jupyterbook.org/badge.svg)](https://fugue-tutorials.readthedocs.io/) | [![Doc](https://readthedocs.org/projects/fugue/badge)](https://fugue.readthedocs.org) | [![Slack Status](https://img.shields.io/badge/slack-join_chat-white.svg?logo=slack&style=social)](http://slack.fugue.ai) |
82
84
 
83
85
 
84
86
  **Fugue is a unified interface for distributed computing that lets users execute Python, Pandas, and SQL code on Spark, Dask, and Ray with minimal rewrites**.
@@ -217,13 +219,14 @@ It also has the following installation extras:
217
219
  * **dask**: to support Dask as the ExecutionEngine.
218
220
  * **ray**: to support Ray as the ExecutionEngine.
219
221
  * **duckdb**: to support DuckDB as the ExecutionEngine, read [details](https://fugue-tutorials.readthedocs.io/tutorials/integrations/backends/duckdb.html).
222
+ * **polars**: to support Polars DataFrames and extensions using Polars.
220
223
  * **ibis**: to enable Ibis for Fugue workflows, read [details](https://fugue-tutorials.readthedocs.io/tutorials/integrations/backends/ibis.html).
221
224
  * **cpp_sql_parser**: to enable the CPP antlr parser for Fugue SQL. It can be 50+ times faster than the pure Python parser. For the main Python versions and platforms, there is already pre-built binaries, but for the remaining, it needs a C++ compiler to build on the fly.
222
225
 
223
226
  For example a common use case is:
224
227
 
225
228
  ```bash
226
- pip install fugue[duckdb,spark]
229
+ pip install "fugue[duckdb,spark]"
227
230
  ```
228
231
 
229
232
  Note if you already installed Spark or DuckDB independently, Fugue is able to automatically use them without installing the extras.
@@ -270,6 +273,7 @@ By being an abstraction layer, Fugue can be used with a lot of other open-source
270
273
  Python backends:
271
274
 
272
275
  * [Pandas](https://github.com/pandas-dev/pandas)
276
+ * [Polars](https://www.pola.rs) (DataFrames only)
273
277
  * [Spark](https://github.com/apache/spark)
274
278
  * [Dask](https://github.com/dask/dask)
275
279
  * [Ray](http://github.com/ray-project/ray)
@@ -281,6 +285,7 @@ FugueSQL backends:
281
285
  * [Duckdb](https://github.com/duckdb/duckdb) - in-process SQL OLAP database management
282
286
  * [dask-sql](https://github.com/dask-contrib/dask-sql) - SQL interface for Dask
283
287
  * SparkSQL
288
+ * BigQuery
284
289
 
285
290
 
286
291
  Fugue is available as a backend or can integrate with the following projects:
@@ -291,23 +296,36 @@ Fugue is available as a backend or can integrate with the following projects:
291
296
  * [Prefect](https://fugue-tutorials.readthedocs.io/tutorials/integrations/ecosystem/prefect.html) - workflow orchestration
292
297
  * [Pandera](https://fugue-tutorials.readthedocs.io/tutorials/integrations/ecosystem/pandera.html) - data validation
293
298
 
299
+ Registered 3rd party extensions (majorly for Fugue SQL) include:
294
300
 
295
- ## Further Resources
301
+ * [Pandas plot](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.plot.html) - visualize data using matplotlib or plotly
302
+ * [Seaborn](https://seaborn.pydata.org/api.html) - visualize data using seaborn
303
+ * [WhyLogs](https://whylogs.readthedocs.io/en/latest/examples/integrations/Fugue_Profiling.html?highlight=fugue) - visualize data profiling
304
+ * [Vizzu](https://github.com/vizzuhq/ipyvizzu) - visualize data using ipyvizzu
296
305
 
297
- View some of our latest conferences presentations and content. For a more complete list, check the [Content](https://fugue-tutorials.readthedocs.io/tutorials/resources/content.html) page in the tutorials.
306
+ ## Community and Contributing
307
+
308
+ Feel free to message us on [Slack](http://slack.fugue.ai). We also have [contributing instructions](CONTRIBUTING.md).
298
309
 
299
310
  ### Case Studies
300
311
 
301
312
  * [How LyftLearn Democratizes Distributed Compute through Kubernetes Spark and Fugue](https://eng.lyft.com/how-lyftlearn-democratizes-distributed-compute-through-kubernetes-spark-and-fugue-c0875b97c3d9)
302
313
  * [Clobotics - Large Scale Image Processing with Spark through Fugue](https://medium.com/fugue-project/large-scale-image-processing-with-spark-through-fugue-e510b9813da8)
303
314
 
315
+ ### Mentioned Uses
316
+
317
+ * [Productionizing Data Science at Interos, Inc. (LinkedIn post by Anthony Holten)](https://www.linkedin.com/posts/anthony-holten_pandas-spark-dask-activity-7022628193983459328-QvcF)
318
+
319
+ * [Multiple Time Series Forecasting with Fugue & Nixtla at Bain & Company(LinkedIn post by Fahad Akbar)](https://www.linkedin.com/posts/fahadakbar_fugue-datascience-forecasting-activity-7041119034813124608-u08q?utm_source=share&utm_medium=member_desktop)
320
+
321
+ ## Further Resources
322
+
323
+ View some of our latest conferences presentations and content. For a more complete list, check the [Content](https://fugue-tutorials.readthedocs.io/tutorials/resources/content.html) page in the tutorials.
324
+
304
325
  ### Blogs
305
326
 
306
327
  * [Why Pandas-like Interfaces are Sub-optimal for Distributed Computing](https://towardsdatascience.com/why-pandas-like-interfaces-are-sub-optimal-for-distributed-computing-322dacbce43)
307
- * [Interoperable Python and SQL in Jupyter Notebooks (Towards Data Science)](https://towardsdatascience.com/interoperable-python-and-sql-in-jupyter-notebooks-86245e711352)
308
- * [Introducing Fugue - Reducing PySpark Developer Friction](https://towardsdatascience.com/introducing-fugue-reducing-pyspark-developer-friction-a702230455de)
309
328
  * [Introducing FugueSQL — SQL for Pandas, Spark, and Dask DataFrames (Towards Data Science by Khuyen Tran)](https://towardsdatascience.com/introducing-fuguesql-sql-for-pandas-spark-and-dask-dataframes-63d461a16b27)
310
- * [Using Pandera on Spark for Data Validation through Fugue (Towards Data Science)](https://towardsdatascience.com/using-pandera-on-spark-for-data-validation-through-fugue-72956f274793)
311
329
 
312
330
  ### Conferences
313
331
 
@@ -317,7 +335,3 @@ View some of our latest conferences presentations and content. For a more comple
317
335
  * [FugueSQL - The Enhanced SQL Interface for Pandas, Spark, and Dask DataFrames (PyData Global)](https://www.youtube.com/watch?v=OBpnGYjNBBI)
318
336
  * [Distributed Hybrid Parameter Tuning](https://www.youtube.com/watch?v=_GBjqskD8Qk)
319
337
 
320
- ## Community and Contributing
321
-
322
- Feel free to message us on [Slack](http://slack.fugue.ai). We also have [contributing instructions](CONTRIBUTING.md).
323
-