fugue 0.8.7.dev7__py3-none-any.whl → 0.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. fugue/collections/sql.py +1 -1
  2. fugue/dataframe/utils.py +4 -18
  3. fugue/test/__init__.py +11 -0
  4. fugue/test/pandas_tester.py +24 -0
  5. fugue/test/plugins.py +393 -0
  6. {fugue-0.8.7.dev7.dist-info → fugue-0.9.0.dist-info}/METADATA +24 -15
  7. {fugue-0.8.7.dev7.dist-info → fugue-0.9.0.dist-info}/RECORD +38 -47
  8. {fugue-0.8.7.dev7.dist-info → fugue-0.9.0.dist-info}/WHEEL +1 -1
  9. fugue-0.9.0.dist-info/entry_points.txt +12 -0
  10. fugue_dask/_io.py +8 -5
  11. fugue_dask/_utils.py +4 -4
  12. fugue_dask/execution_engine.py +11 -0
  13. fugue_dask/registry.py +2 -0
  14. fugue_dask/tester.py +24 -0
  15. fugue_duckdb/__init__.py +0 -5
  16. fugue_duckdb/_io.py +1 -0
  17. fugue_duckdb/registry.py +30 -2
  18. fugue_duckdb/tester.py +49 -0
  19. fugue_ibis/__init__.py +0 -3
  20. fugue_ibis/dataframe.py +2 -2
  21. fugue_ibis/execution_engine.py +14 -7
  22. fugue_ray/_constants.py +3 -4
  23. fugue_ray/_utils/dataframe.py +10 -21
  24. fugue_ray/_utils/io.py +38 -9
  25. fugue_ray/execution_engine.py +1 -2
  26. fugue_ray/registry.py +1 -0
  27. fugue_ray/tester.py +22 -0
  28. fugue_spark/execution_engine.py +5 -5
  29. fugue_spark/registry.py +13 -1
  30. fugue_spark/tester.py +78 -0
  31. fugue_test/__init__.py +82 -0
  32. fugue_test/builtin_suite.py +26 -43
  33. fugue_test/dataframe_suite.py +5 -14
  34. fugue_test/execution_suite.py +170 -143
  35. fugue_test/fixtures.py +61 -0
  36. fugue_version/__init__.py +1 -1
  37. fugue-0.8.7.dev7.dist-info/entry_points.txt +0 -17
  38. fugue_dask/ibis_engine.py +0 -62
  39. fugue_duckdb/ibis_engine.py +0 -56
  40. fugue_ibis/execution/__init__.py +0 -0
  41. fugue_ibis/execution/ibis_engine.py +0 -49
  42. fugue_ibis/execution/pandas_backend.py +0 -54
  43. fugue_ibis/extensions.py +0 -203
  44. fugue_spark/ibis_engine.py +0 -45
  45. fugue_test/ibis_suite.py +0 -92
  46. fugue_test/plugins/__init__.py +0 -0
  47. fugue_test/plugins/dask/__init__.py +0 -2
  48. fugue_test/plugins/dask/fixtures.py +0 -12
  49. fugue_test/plugins/duckdb/__init__.py +0 -2
  50. fugue_test/plugins/duckdb/fixtures.py +0 -9
  51. fugue_test/plugins/misc/__init__.py +0 -2
  52. fugue_test/plugins/misc/fixtures.py +0 -18
  53. fugue_test/plugins/ray/__init__.py +0 -2
  54. fugue_test/plugins/ray/fixtures.py +0 -9
  55. {fugue-0.8.7.dev7.dist-info → fugue-0.9.0.dist-info}/LICENSE +0 -0
  56. {fugue-0.8.7.dev7.dist-info → fugue-0.9.0.dist-info}/top_level.txt +0 -0
fugue_test/fixtures.py ADDED
@@ -0,0 +1,61 @@
1
+ import pytest
2
+
3
+ _DEFAULT_SCOPE = "module"
4
+
5
+
6
+ @pytest.fixture(scope=_DEFAULT_SCOPE)
7
+ def pandas_session():
8
+ yield "pandas"
9
+
10
+
11
+ @pytest.fixture(scope=_DEFAULT_SCOPE)
12
+ def native_session():
13
+ yield "native"
14
+
15
+
16
+ @pytest.fixture(scope=_DEFAULT_SCOPE)
17
+ def dask_session():
18
+ from fugue_dask.tester import DaskTestBackend
19
+
20
+ with DaskTestBackend.generate_session_fixture() as session:
21
+ yield session
22
+
23
+
24
+ @pytest.fixture(scope=_DEFAULT_SCOPE)
25
+ def duckdb_session():
26
+ from fugue_duckdb.tester import DuckDBTestBackend
27
+
28
+ with DuckDBTestBackend.generate_session_fixture() as session:
29
+ yield session
30
+
31
+
32
+ @pytest.fixture(scope=_DEFAULT_SCOPE)
33
+ def duckdask_session():
34
+ from fugue_duckdb.tester import DuckDaskTestBackend
35
+
36
+ with DuckDaskTestBackend.generate_session_fixture() as session:
37
+ yield session
38
+
39
+
40
+ @pytest.fixture(scope=_DEFAULT_SCOPE)
41
+ def ray_session():
42
+ from fugue_ray.tester import RayTestBackend
43
+
44
+ with RayTestBackend.generate_session_fixture() as session:
45
+ yield session
46
+
47
+
48
+ @pytest.fixture(scope=_DEFAULT_SCOPE)
49
+ def spark_session():
50
+ from fugue_spark.tester import SparkTestBackend
51
+
52
+ with SparkTestBackend.generate_session_fixture() as session:
53
+ yield session
54
+
55
+
56
+ @pytest.fixture(scope=_DEFAULT_SCOPE)
57
+ def sparkconnect_session():
58
+ from fugue_spark.tester import SparkConnectTestBackend
59
+
60
+ with SparkConnectTestBackend.generate_session_fixture() as session:
61
+ yield session
fugue_version/__init__.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.8.7"
1
+ __version__ = "0.9.0"
@@ -1,17 +0,0 @@
1
- [fugue.plugins]
2
- dask = fugue_dask.registry [dask]
3
- dask_ibis = fugue_dask.ibis_engine [dask,ibis]
4
- duckdb = fugue_duckdb.registry [duckdb]
5
- duckdb_ibis = fugue_duckdb.ibis_engine [ibis,duckdb]
6
- ibis = fugue_ibis [ibis]
7
- polars = fugue_polars.registry [polars]
8
- ray = fugue_ray.registry [ray]
9
- spark = fugue_spark.registry [spark]
10
- spark_ibis = fugue_spark.ibis_engine [spark,ibis]
11
-
12
- [pytest11]
13
- fugue_test_dask = fugue_test.plugins.dask [dask]
14
- fugue_test_duckdb = fugue_test.plugins.duckdb [duckdb]
15
- fugue_test_misc = fugue_test.plugins.misc
16
- fugue_test_ray = fugue_test.plugins.ray [ray]
17
-
fugue_dask/ibis_engine.py DELETED
@@ -1,62 +0,0 @@
1
- from typing import Any, Callable
2
-
3
- import dask.dataframe as dd
4
- import ibis
5
- from ibis.backends.dask import Backend
6
- from triad.utils.assertion import assert_or_throw
7
-
8
- from fugue import DataFrame, DataFrames, ExecutionEngine
9
- from fugue_dask.dataframe import DaskDataFrame
10
- from fugue_dask.execution_engine import DaskExecutionEngine
11
- from fugue_ibis import IbisTable
12
- from fugue_ibis._utils import to_ibis_schema, to_schema
13
- from fugue_ibis.execution.ibis_engine import IbisEngine, parse_ibis_engine
14
-
15
-
16
- class DaskIbisEngine(IbisEngine):
17
- def __init__(self, execution_engine: ExecutionEngine) -> None:
18
- assert_or_throw(
19
- isinstance(execution_engine, DaskExecutionEngine),
20
- lambda: ValueError(
21
- f"DaskIbisEngine must use DaskExecutionEngine ({execution_engine})"
22
- ),
23
- )
24
- super().__init__(execution_engine)
25
-
26
- def select(
27
- self, dfs: DataFrames, ibis_func: Callable[[ibis.BaseBackend], IbisTable]
28
- ) -> DataFrame:
29
- pdfs = {
30
- k: self.execution_engine.to_df(v).native # type: ignore
31
- for k, v in dfs.items()
32
- }
33
- be = _BackendWrapper().connect(pdfs)
34
- be.set_schemas(dfs)
35
- expr = ibis_func(be)
36
- schema = to_schema(expr.schema())
37
- result = expr.compile()
38
- assert_or_throw(
39
- isinstance(result, dd.DataFrame),
40
- lambda: ValueError(f"result must be a Dask DataFrame ({type(result)})"),
41
- )
42
- return DaskDataFrame(result, schema=schema)
43
-
44
-
45
- @parse_ibis_engine.candidate(
46
- lambda obj, *args, **kwargs: isinstance(obj, DaskExecutionEngine)
47
- )
48
- def _to_dask_ibis_engine(obj: Any, engine: ExecutionEngine) -> IbisEngine:
49
- return DaskIbisEngine(engine)
50
-
51
-
52
- class _BackendWrapper(Backend):
53
- def set_schemas(self, dfs: DataFrames) -> None:
54
- self._schemas = {k: to_ibis_schema(v.schema) for k, v in dfs.items()}
55
-
56
- def table(self, name: str, schema: Any = None):
57
- return super().table(
58
- name,
59
- schema=self._schemas[name]
60
- if schema is None and name in self._schemas
61
- else schema,
62
- )
@@ -1,56 +0,0 @@
1
- from typing import Any, Callable, Dict, Optional, Tuple
2
-
3
- import ibis
4
- from ibis.backends.pandas import Backend
5
-
6
- from fugue import DataFrame, DataFrames, ExecutionEngine
7
- from fugue.collections.sql import StructuredRawSQL, TempTableName
8
- from fugue_ibis import IbisTable
9
- from fugue_ibis._utils import to_ibis_schema
10
- from fugue_ibis.execution.ibis_engine import IbisEngine, parse_ibis_engine
11
-
12
- from .execution_engine import DuckDBEngine, DuckExecutionEngine
13
-
14
-
15
- class DuckDBIbisEngine(IbisEngine):
16
- def select(
17
- self, dfs: DataFrames, ibis_func: Callable[[ibis.BaseBackend], IbisTable]
18
- ) -> DataFrame:
19
- be = _BackendWrapper().connect({})
20
- be.set_schemas(dfs)
21
- expr = ibis_func(be)
22
- sql = StructuredRawSQL.from_expr(
23
- str(
24
- ibis.postgres.compile(expr).compile(
25
- compile_kwargs={"literal_binds": True}
26
- )
27
- ),
28
- prefix='"<tmpdf:',
29
- suffix='>"',
30
- dialect="postgres",
31
- )
32
-
33
- engine = DuckDBEngine(self.execution_engine)
34
- _dfs = DataFrames({be._name_map[k][0].key: v for k, v in dfs.items()})
35
- return engine.select(_dfs, sql)
36
-
37
-
38
- @parse_ibis_engine.candidate(
39
- lambda obj, *args, **kwargs: isinstance(obj, DuckExecutionEngine)
40
- or (isinstance(obj, str) and obj in ["duck", "duckdb"])
41
- )
42
- def _to_duck_ibis_engine(obj: Any, engine: ExecutionEngine) -> Optional[IbisEngine]:
43
- return DuckDBIbisEngine(engine)
44
-
45
-
46
- class _BackendWrapper(Backend):
47
- def set_schemas(self, dfs: DataFrames) -> None:
48
- self._schemas = {k: to_ibis_schema(v.schema) for k, v in dfs.items()}
49
- self._name_map: Dict[str, Tuple[TempTableName, IbisTable]] = {}
50
-
51
- def table(self, name: str, schema: Any = None) -> IbisTable:
52
- if name not in self._name_map:
53
- tn = TempTableName()
54
- tb = ibis.table(self._schemas[name], name=(str(tn)))
55
- self._name_map[name] = (tn, tb)
56
- return self._name_map[name][1]
File without changes
@@ -1,49 +0,0 @@
1
- from abc import abstractmethod
2
- from typing import Any, Callable
3
-
4
- import ibis
5
-
6
- from fugue import AnyDataFrame, DataFrame, DataFrames, EngineFacet, ExecutionEngine
7
- from fugue._utils.registry import fugue_plugin
8
-
9
- from .._compat import IbisTable
10
-
11
-
12
- @fugue_plugin
13
- def parse_ibis_engine(obj: Any, engine: ExecutionEngine) -> "IbisEngine":
14
- if isinstance(obj, IbisEngine):
15
- return obj
16
- raise NotImplementedError(
17
- f"Ibis execution engine can't be parsed from {obj}."
18
- " You may need to register a parser for it."
19
- )
20
-
21
-
22
- class IbisEngine(EngineFacet):
23
- """The abstract base class for different ibis execution implementations.
24
-
25
- :param execution_engine: the execution engine this ibis engine will run on
26
- """
27
-
28
- @property
29
- def is_distributed(self) -> bool: # pragma: no cover
30
- return self.execution_engine.is_distributed
31
-
32
- def to_df(self, df: AnyDataFrame, schema: Any = None) -> DataFrame:
33
- raise NotImplementedError # pragma: no cover
34
-
35
- @abstractmethod
36
- def select(
37
- self, dfs: DataFrames, ibis_func: Callable[[ibis.BaseBackend], IbisTable]
38
- ) -> DataFrame: # pragma: no cover
39
- """Execute the ibis select expression.
40
-
41
- :param dfs: a collection of dataframes that must have keys
42
- :param ibis_func: the ibis compute function
43
- :return: result of the ibis function
44
-
45
- .. note::
46
-
47
- This interface is experimental, so it is subjected to change.
48
- """
49
- raise NotImplementedError
@@ -1,54 +0,0 @@
1
- from typing import Any, Callable
2
-
3
- import ibis
4
- import pandas as pd
5
- from ibis.backends.pandas import Backend
6
- from triad.utils.assertion import assert_or_throw
7
-
8
- from fugue import (
9
- DataFrame,
10
- DataFrames,
11
- ExecutionEngine,
12
- NativeExecutionEngine,
13
- PandasDataFrame,
14
- )
15
- from fugue_ibis._utils import to_ibis_schema, to_schema
16
-
17
- from .._compat import IbisTable
18
- from .ibis_engine import IbisEngine, parse_ibis_engine
19
-
20
-
21
- class PandasIbisEngine(IbisEngine):
22
- def select(
23
- self, dfs: DataFrames, ibis_func: Callable[[ibis.BaseBackend], IbisTable]
24
- ) -> DataFrame: # pragma: no cover
25
- pdfs = {k: v.as_pandas() for k, v in dfs.items()}
26
- be = _BackendWrapper().connect(pdfs)
27
- be.set_schemas(dfs)
28
- expr = ibis_func(be)
29
- schema = to_schema(expr.schema())
30
- result = expr.execute()
31
- assert_or_throw(
32
- isinstance(result, pd.DataFrame), "result must be a pandas DataFrame"
33
- )
34
- return PandasDataFrame(result, schema=schema)
35
-
36
-
37
- @parse_ibis_engine.candidate(
38
- lambda obj, *args, **kwargs: isinstance(obj, NativeExecutionEngine)
39
- )
40
- def _pd_to_ibis_engine(obj: Any, engine: ExecutionEngine) -> IbisEngine:
41
- return PandasIbisEngine(engine)
42
-
43
-
44
- class _BackendWrapper(Backend):
45
- def set_schemas(self, dfs: DataFrames) -> None:
46
- self._schemas = {k: to_ibis_schema(v.schema) for k, v in dfs.items()}
47
-
48
- def table(self, name: str, schema: Any = None):
49
- return super().table(
50
- name,
51
- schema=self._schemas[name]
52
- if schema is None and name in self._schemas
53
- else schema,
54
- )
fugue_ibis/extensions.py DELETED
@@ -1,203 +0,0 @@
1
- from typing import Any, Callable, Dict
2
-
3
- import ibis
4
- from fugue import DataFrame, DataFrames, Processor, WorkflowDataFrame
5
- from fugue.exceptions import FugueWorkflowCompileError
6
- from fugue.workflow.workflow import WorkflowDataFrames
7
- from triad import assert_or_throw, extension_method
8
-
9
- from ._utils import LazyIbisObject, _materialize
10
- from .execution.ibis_engine import parse_ibis_engine
11
-
12
- from ._compat import IbisTable
13
-
14
-
15
- def run_ibis(
16
- ibis_func: Callable[[ibis.BaseBackend], IbisTable],
17
- ibis_engine: Any = None,
18
- **dfs: WorkflowDataFrame,
19
- ) -> WorkflowDataFrame:
20
- """Run an ibis workflow wrapped in ``ibis_func``
21
-
22
- :param ibis_func: the function taking in an ibis backend, and returning
23
- an Ibis TableExpr
24
- :param ibis_engine: an object that together with |ExecutionEngine|
25
- can determine :class:`~fugue_ibis.execution.ibis_engine.IbisEngine`
26
- , defaults to None
27
- :param dfs: dataframes in the same workflow
28
- :return: the output workflow dataframe
29
-
30
- .. admonition:: Examples
31
-
32
- .. code-block:: python
33
-
34
- import fugue as FugueWorkflow
35
- from fugue_ibis import run_ibis
36
-
37
- def func(backend):
38
- t = backend.table("tb")
39
- return t.mutate(b=t.a+1)
40
-
41
- dag = FugueWorkflow()
42
- df = dag.df([[0]], "a:int")
43
- result = run_ibis(func, tb=df)
44
- result.show()
45
- """
46
- wdfs = WorkflowDataFrames(**dfs)
47
- return wdfs.workflow.process(
48
- wdfs,
49
- using=_IbisProcessor,
50
- params=dict(ibis_func=ibis_func, ibis_engine=ibis_engine),
51
- )
52
-
53
-
54
- @extension_method
55
- def as_ibis(df: WorkflowDataFrame) -> IbisTable:
56
- """Convert the Fugue workflow dataframe to an ibis table for ibis
57
- operations.
58
-
59
- :param df: the Fugue workflow dataframe
60
- :return: the object representing the ibis table
61
-
62
- .. admonition:: Examples
63
-
64
- .. code-block:: python
65
-
66
- # non-magical approach
67
- import fugue as FugueWorkflow
68
- from fugue_ibis import as_ibis, as_fugue
69
-
70
- dag = FugueWorkflow()
71
- df1 = dag.df([[0]], "a:int")
72
- df2 = dag.df([[1]], "a:int")
73
- idf1 = as_ibis(df1)
74
- idf2 = as_ibis(df2)
75
- idf3 = idf1.union(idf2)
76
- result = idf3.mutate(b=idf3.a+1)
77
- as_fugue(result).show()
78
-
79
- .. code-block:: python
80
-
81
- # magical approach
82
- import fugue as FugueWorkflow
83
- import fugue_ibis # must import
84
-
85
- dag = FugueWorkflow()
86
- idf1 = dag.df([[0]], "a:int").as_ibis()
87
- idf2 = dag.df([[1]], "a:int").as_ibis()
88
- idf3 = idf1.union(idf2)
89
- result = idf3.mutate(b=idf3.a+1).as_fugue()
90
- result.show()
91
-
92
- .. note::
93
-
94
- The magic is that when importing ``fugue_ibis``, the functions
95
- ``as_ibis`` and ``as_fugue`` are added to the correspondent classes
96
- so you can use them as if they are parts of the original classes.
97
-
98
- This is an idea similar to patching. Ibis uses this programming model
99
- a lot. Fugue provides this as an option.
100
-
101
- .. note::
102
-
103
- The returned object is not really a ``TableExpr``, it's a 'super lazy'
104
- object that will be translated into ``TableExpr`` at run time.
105
- This is because to compile an ibis execution graph, the input schemas
106
- must be known. However, in Fugue, this is not always true. For example
107
- if the previous step is to pivot a table, then the output schema can be
108
- known at runtime. So in order to be a part of Fugue, we need to be able to
109
- construct ibis expressions before knowing the input schemas.
110
- """
111
- return LazyIbisObject(df) # type: ignore
112
-
113
-
114
- @extension_method(class_type=LazyIbisObject)
115
- def as_fugue(
116
- expr: IbisTable,
117
- ibis_engine: Any = None,
118
- ) -> WorkflowDataFrame:
119
- """Convert a lazy ibis object to Fugue workflow dataframe
120
-
121
- :param expr: the actual instance should be LazyIbisObject
122
- :return: the Fugue workflow dataframe
123
-
124
- .. admonition:: Examples
125
-
126
- .. code-block:: python
127
-
128
- # non-magical approach
129
- import fugue as FugueWorkflow
130
- from fugue_ibis import as_ibis, as_fugue
131
-
132
- dag = FugueWorkflow()
133
- df1 = dag.df([[0]], "a:int")
134
- df2 = dag.df([[1]], "a:int")
135
- idf1 = as_ibis(df1)
136
- idf2 = as_ibis(df2)
137
- idf3 = idf1.union(idf2)
138
- result = idf3.mutate(b=idf3.a+1)
139
- as_fugue(result).show()
140
-
141
- .. code-block:: python
142
-
143
- # magical approach
144
- import fugue as FugueWorkflow
145
- import fugue_ibis # must import
146
-
147
- dag = FugueWorkflow()
148
- idf1 = dag.df([[0]], "a:int").as_ibis()
149
- idf2 = dag.df([[1]], "a:int").as_ibis()
150
- idf3 = idf1.union(idf2)
151
- result = idf3.mutate(b=idf3.a+1).as_fugue()
152
- result.show()
153
-
154
- .. note::
155
-
156
- The magic is that when importing ``fugue_ibis``, the functions
157
- ``as_ibis`` and ``as_fugue`` are added to the correspondent classes
158
- so you can use them as if they are parts of the original classes.
159
-
160
- This is an idea similar to patching. Ibis uses this programming model
161
- a lot. Fugue provides this as an option.
162
-
163
- .. note::
164
-
165
- The returned object is not really a ``TableExpr``, it's a 'super lazy'
166
- object that will be translated into ``TableExpr`` at run time.
167
- This is because to compile an ibis execution graph, the input schemas
168
- must be known. However, in Fugue, this is not always true. For example
169
- if the previous step is to pivot a table, then the output schema can be
170
- known at runtime. So in order to be a part of Fugue, we need to be able to
171
- construct ibis expressions before knowing the input schemas.
172
- """
173
-
174
- def _func(
175
- be: ibis.BaseBackend,
176
- lazy_expr: LazyIbisObject,
177
- ctx: Dict[int, Any],
178
- ) -> IbisTable:
179
- return _materialize(
180
- lazy_expr, {k: be.table(f"_{id(v)}") for k, v in ctx.items()}
181
- )
182
-
183
- assert_or_throw(
184
- isinstance(expr, LazyIbisObject),
185
- FugueWorkflowCompileError("expr must be a LazyIbisObject"),
186
- )
187
- _lazy_expr: LazyIbisObject = expr # type: ignore
188
- _ctx = _lazy_expr._super_lazy_internal_ctx
189
- _dfs = {f"_{id(v)}": v for _, v in _ctx.items()}
190
- return run_ibis(
191
- lambda be: _func(be, _lazy_expr, _ctx), ibis_engine=ibis_engine, **_dfs
192
- )
193
-
194
-
195
- class _IbisProcessor(Processor):
196
- def process(self, dfs: DataFrames) -> DataFrame:
197
- ibis_func = self.params.get_or_throw("ibis_func", Callable)
198
- ibis_engine = self.params.get_or_none("ibis_engine", object)
199
- ie = parse_ibis_engine(
200
- self.execution_engine if ibis_engine is None else ibis_engine,
201
- self.execution_engine,
202
- )
203
- return ie.select(dfs, ibis_func)
@@ -1,45 +0,0 @@
1
- from typing import Any, Callable
2
-
3
- import ibis
4
- from pyspark.sql import DataFrame as PySparkDataFrame
5
- from triad.utils.assertion import assert_or_throw
6
-
7
- from fugue import DataFrame, DataFrames, ExecutionEngine
8
- from fugue_ibis import IbisTable
9
- from fugue_ibis._utils import to_schema
10
- from fugue_ibis.execution.ibis_engine import IbisEngine, parse_ibis_engine
11
- from fugue_spark.dataframe import SparkDataFrame
12
- from fugue_spark.execution_engine import SparkExecutionEngine
13
-
14
-
15
- class SparkIbisEngine(IbisEngine):
16
- def __init__(self, execution_engine: ExecutionEngine) -> None:
17
- assert_or_throw(
18
- isinstance(execution_engine, SparkExecutionEngine),
19
- lambda: ValueError(
20
- f"SparkIbisEngine must use SparkExecutionEngine ({execution_engine})"
21
- ),
22
- )
23
- super().__init__(execution_engine)
24
-
25
- def select(
26
- self, dfs: DataFrames, ibis_func: Callable[[ibis.BaseBackend], IbisTable]
27
- ) -> DataFrame:
28
- for k, v in dfs.items():
29
- self.execution_engine.register(v, k) # type: ignore
30
- con = ibis.pyspark.connect(self.execution_engine.spark_session) # type: ignore
31
- expr = ibis_func(con)
32
- schema = to_schema(expr.schema())
33
- result = expr.compile()
34
- assert_or_throw(
35
- isinstance(result, PySparkDataFrame),
36
- lambda: ValueError(f"result must be a PySpark DataFrame ({type(result)})"),
37
- )
38
- return SparkDataFrame(result, schema=schema)
39
-
40
-
41
- @parse_ibis_engine.candidate(
42
- lambda obj, *args, **kwargs: isinstance(obj, SparkExecutionEngine)
43
- )
44
- def _spark_to_ibis_engine(obj: Any, engine: ExecutionEngine) -> IbisEngine:
45
- return SparkIbisEngine(engine)
fugue_test/ibis_suite.py DELETED
@@ -1,92 +0,0 @@
1
- # pylint: disable-all
2
- from unittest import TestCase
3
-
4
- import ibis
5
- from fugue import ExecutionEngine, FugueWorkflow, register_default_sql_engine
6
- from fugue_ibis import IbisEngine, as_fugue, as_ibis, run_ibis
7
-
8
-
9
- class IbisTests(object):
10
- """Ibis test suite.
11
- Any new engine from :class:`~fugue_ibis.execution.ibis_engine.IbisEngine`
12
- should also pass this test suite.
13
- """
14
-
15
- class Tests(TestCase):
16
- @classmethod
17
- def setUpClass(cls):
18
- register_default_sql_engine(lambda engine: engine.sql_engine)
19
- cls._engine = cls.make_engine(cls)
20
- cls._ibis_engine = cls.make_ibis_engine(cls)
21
-
22
- @property
23
- def engine(self) -> ExecutionEngine:
24
- return self._engine # type: ignore
25
-
26
- @property
27
- def ibis_engine(self) -> ExecutionEngine:
28
- return self._ibis_engine # type: ignore
29
-
30
- @classmethod
31
- def tearDownClass(cls):
32
- cls._engine.stop()
33
-
34
- def make_engine(self) -> ExecutionEngine: # pragma: no cover
35
- raise NotImplementedError
36
-
37
- def make_ibis_engine(self) -> IbisEngine: # pragma: no cover
38
- raise NotImplementedError
39
-
40
- def test_run_ibis(self):
41
- def _test1(con: ibis.BaseBackend) -> ibis.Expr:
42
- tb = con.table("a")
43
- return tb
44
-
45
- def _test2(con: ibis.BaseBackend) -> ibis.Expr:
46
- tb = con.table("a")
47
- return tb.mutate(c=tb.a + tb.b)
48
-
49
- dag = FugueWorkflow()
50
- df = dag.df([[0, 1], [2, 3]], "a:long,b:long")
51
- res = run_ibis(_test1, ibis_engine=self.ibis_engine, a=df)
52
- res.assert_eq(df)
53
- df = dag.df([[0, 1], [2, 3]], "a:long,b:long")
54
- res = run_ibis(_test2, ibis_engine=self.ibis_engine, a=df)
55
- df2 = dag.df([[0, 1, 1], [2, 3, 5]], "a:long,b:long,c:long")
56
- res.assert_eq(df2)
57
- dag.run(self.engine)
58
-
59
- def test_run_as_ibis(self):
60
- dag = FugueWorkflow()
61
- df = dag.df([[0, 1], [2, 3]], "a:long,b:long")
62
- idf = as_ibis(df)
63
- res = as_fugue(idf)
64
- res.assert_eq(df)
65
- dag.run(self.engine)
66
-
67
- dag = FugueWorkflow()
68
- df1 = dag.df([[0, 1], [2, 3]], "a:long,b:long")
69
- df2 = dag.df([[0, ["x"]], [3, ["y"]]], "a:long,c:[str]")
70
- idf1 = as_ibis(df1)
71
- idf2 = as_ibis(df2)
72
- idf = idf1.inner_join(idf2, idf1.a == idf2.a)[idf1, idf2.c]
73
- res = as_fugue(idf)
74
- expected = dag.df([[0, 1, ["x"]]], "a:long,b:long,c:[str]")
75
- res.assert_eq(expected, check_order=True, check_schema=True)
76
- dag.run(self.engine)
77
-
78
- dag = FugueWorkflow()
79
- idf1 = dag.df([[0, 1], [2, 3]], "a:long,b:long").as_ibis()
80
- idf2 = dag.df([[0, ["x"]], [3, ["y"]]], "a:long,c:[str]").as_ibis()
81
- res = idf1.inner_join(idf2, idf1.a == idf2.a)[idf1, idf2.c].as_fugue()
82
- expected = dag.df([[0, 1, ["x"]]], "a:long,b:long,c:[str]")
83
- res.assert_eq(expected, check_order=True, check_schema=True)
84
- dag.run(self.engine)
85
-
86
- def test_literal(self):
87
- dag = FugueWorkflow()
88
- idf1 = dag.df([[0, 1], [2, 3]], "a:long,b:long").as_ibis()
89
- res = idf1.mutate(c=idf1.b + 10).as_fugue()
90
- expected = dag.df([[0, 1, 11], [2, 3, 13]], "a:long,b:long,c:long")
91
- res.assert_eq(expected, check_order=True, check_schema=True)
92
- dag.run(self.engine)
File without changes
@@ -1,2 +0,0 @@
1
- # flake8: noqa
2
- from .fixtures import fugue_dask_client
@@ -1,12 +0,0 @@
1
- import pytest
2
-
3
-
4
- @pytest.fixture(scope="session")
5
- def fugue_dask_client():
6
- from dask.distributed import Client
7
- import dask
8
-
9
- with Client(processes=True, n_workers=3, threads_per_worker=1) as client:
10
- dask.config.set({"dataframe.shuffle.method": "tasks"})
11
- dask.config.set({"dataframe.convert-string": False})
12
- yield client
@@ -1,2 +0,0 @@
1
- # flake8: noqa
2
- from .fixtures import fugue_duckdb_connection
@@ -1,9 +0,0 @@
1
- import pytest
2
-
3
-
4
- @pytest.fixture(scope="session")
5
- def fugue_duckdb_connection():
6
- import duckdb
7
-
8
- with duckdb.connect() as connection:
9
- yield connection
@@ -1,2 +0,0 @@
1
- # flake8: noqa
2
- from .fixtures import tmp_mem_dir