fugue 0.9.2.dev1__py3-none-any.whl → 0.9.2.dev2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fugue/dataframe/function_wrapper.py +4 -0
- {fugue-0.9.2.dev1.dist-info → fugue-0.9.2.dev2.dist-info}/METADATA +50 -51
- {fugue-0.9.2.dev1.dist-info → fugue-0.9.2.dev2.dist-info}/RECORD +11 -10
- {fugue-0.9.2.dev1.dist-info → fugue-0.9.2.dev2.dist-info}/WHEEL +1 -1
- fugue-0.9.2.dev2.dist-info/entry_points.txt +11 -0
- fugue_dask/_dask_sql_wrapper.py +76 -0
- fugue_dask/_utils.py +1 -1
- fugue_dask/execution_engine.py +5 -9
- fugue_ibis/execution_engine.py +7 -6
- fugue-0.9.2.dev1.dist-info/entry_points.txt +0 -12
- {fugue-0.9.2.dev1.dist-info → fugue-0.9.2.dev2.dist-info}/LICENSE +0 -0
- {fugue-0.9.2.dev1.dist-info → fugue-0.9.2.dev2.dist-info}/top_level.txt +0 -0
|
@@ -154,6 +154,7 @@ fugue_annotated_param = DataFrameFunctionWrapper.annotated_param
|
|
|
154
154
|
annotation == Callable
|
|
155
155
|
or annotation == callable # pylint: disable=comparison-with-callable
|
|
156
156
|
or str(annotation).startswith("typing.Callable")
|
|
157
|
+
or str(annotation).startswith("collections.abc.Callable")
|
|
157
158
|
),
|
|
158
159
|
)
|
|
159
160
|
class _CallableParam(AnnotatedParam):
|
|
@@ -168,6 +169,9 @@ class _CallableParam(AnnotatedParam):
|
|
|
168
169
|
or annotation == Optional[callable]
|
|
169
170
|
or str(annotation).startswith("typing.Union[typing.Callable") # 3.8-
|
|
170
171
|
or str(annotation).startswith("typing.Optional[typing.Callable") # 3.9+
|
|
172
|
+
or str(annotation).startswith(
|
|
173
|
+
"typing.Optional[collections.abc.Callable]"
|
|
174
|
+
) # 3.9+
|
|
171
175
|
),
|
|
172
176
|
)
|
|
173
177
|
class _OptionalCallableParam(AnnotatedParam):
|
|
@@ -1,13 +1,12 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: fugue
|
|
3
|
-
Version: 0.9.2.
|
|
3
|
+
Version: 0.9.2.dev2
|
|
4
4
|
Summary: An abstraction layer for distributed computation
|
|
5
5
|
Home-page: http://github.com/fugue-project/fugue
|
|
6
6
|
Author: The Fugue Development Team
|
|
7
7
|
Author-email: hello@fugue.ai
|
|
8
8
|
License: Apache-2.0
|
|
9
9
|
Keywords: distributed spark dask ray sql dsl domain specific language
|
|
10
|
-
Platform: UNKNOWN
|
|
11
10
|
Classifier: Development Status :: 5 - Production/Stable
|
|
12
11
|
Classifier: Intended Audience :: Developers
|
|
13
12
|
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
@@ -17,67 +16,68 @@ Classifier: Programming Language :: Python :: 3.8
|
|
|
17
16
|
Classifier: Programming Language :: Python :: 3.9
|
|
18
17
|
Classifier: Programming Language :: Python :: 3.10
|
|
19
18
|
Classifier: Programming Language :: Python :: 3.11
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
20
20
|
Classifier: Programming Language :: Python :: 3 :: Only
|
|
21
21
|
Requires-Python: >=3.8
|
|
22
22
|
Description-Content-Type: text/markdown
|
|
23
|
-
|
|
24
|
-
Requires-Dist:
|
|
23
|
+
License-File: LICENSE
|
|
24
|
+
Requires-Dist: triad>=0.9.7
|
|
25
|
+
Requires-Dist: adagio>=0.2.6
|
|
25
26
|
Provides-Extra: all
|
|
26
|
-
Requires-Dist: qpd
|
|
27
|
-
Requires-Dist: fugue-sql-antlr
|
|
28
|
-
Requires-Dist: sqlglot
|
|
29
|
-
Requires-Dist: jinja2
|
|
30
|
-
Requires-Dist: pyspark
|
|
31
|
-
Requires-Dist: dask[dataframe,distributed]
|
|
32
|
-
Requires-Dist: dask-sql
|
|
33
|
-
Requires-Dist: ray[data]
|
|
34
|
-
Requires-Dist: notebook
|
|
35
|
-
Requires-Dist: jupyterlab
|
|
36
|
-
Requires-Dist: ipython
|
|
37
|
-
Requires-Dist: duckdb
|
|
38
|
-
Requires-Dist: pyarrow
|
|
39
|
-
Requires-Dist: pandas
|
|
40
|
-
Requires-Dist: ibis-framework
|
|
41
|
-
Requires-Dist: polars
|
|
27
|
+
Requires-Dist: qpd>=0.4.4; extra == "all"
|
|
28
|
+
Requires-Dist: fugue-sql-antlr>=0.2.0; extra == "all"
|
|
29
|
+
Requires-Dist: sqlglot; extra == "all"
|
|
30
|
+
Requires-Dist: jinja2; extra == "all"
|
|
31
|
+
Requires-Dist: pyspark>=3.1.1; extra == "all"
|
|
32
|
+
Requires-Dist: dask[dataframe,distributed]>=2023.5.0; extra == "all"
|
|
33
|
+
Requires-Dist: dask-sql; extra == "all"
|
|
34
|
+
Requires-Dist: ray[data]>=2.5.0; extra == "all"
|
|
35
|
+
Requires-Dist: notebook; extra == "all"
|
|
36
|
+
Requires-Dist: jupyterlab; extra == "all"
|
|
37
|
+
Requires-Dist: ipython>=7.10.0; extra == "all"
|
|
38
|
+
Requires-Dist: duckdb>=0.5.0; extra == "all"
|
|
39
|
+
Requires-Dist: pyarrow>=6.0.1; extra == "all"
|
|
40
|
+
Requires-Dist: pandas<2.2,>=2.0.2; extra == "all"
|
|
41
|
+
Requires-Dist: ibis-framework[duckdb,pandas]; extra == "all"
|
|
42
|
+
Requires-Dist: polars; extra == "all"
|
|
42
43
|
Provides-Extra: cpp_sql_parser
|
|
43
|
-
Requires-Dist: fugue-sql-antlr[cpp]
|
|
44
|
+
Requires-Dist: fugue-sql-antlr[cpp]>=0.2.0; extra == "cpp-sql-parser"
|
|
44
45
|
Provides-Extra: dask
|
|
45
|
-
Requires-Dist: dask[dataframe,distributed]
|
|
46
|
-
Requires-Dist: pyarrow
|
|
47
|
-
Requires-Dist: pandas
|
|
48
|
-
Requires-Dist: dask[dataframe,distributed] >=2024.4.0 ; (python_version >= "3.11.9") and extra == 'dask'
|
|
46
|
+
Requires-Dist: dask[dataframe,distributed]>=2024.4.0; extra == "dask"
|
|
47
|
+
Requires-Dist: pyarrow>=7.0.0; extra == "dask"
|
|
48
|
+
Requires-Dist: pandas>=2.0.2; extra == "dask"
|
|
49
49
|
Provides-Extra: duckdb
|
|
50
|
-
Requires-Dist: qpd
|
|
51
|
-
Requires-Dist: fugue-sql-antlr
|
|
52
|
-
Requires-Dist: sqlglot
|
|
53
|
-
Requires-Dist: jinja2
|
|
54
|
-
Requires-Dist: duckdb
|
|
55
|
-
Requires-Dist: numpy
|
|
50
|
+
Requires-Dist: qpd>=0.4.4; extra == "duckdb"
|
|
51
|
+
Requires-Dist: fugue-sql-antlr>=0.2.0; extra == "duckdb"
|
|
52
|
+
Requires-Dist: sqlglot; extra == "duckdb"
|
|
53
|
+
Requires-Dist: jinja2; extra == "duckdb"
|
|
54
|
+
Requires-Dist: duckdb>=0.5.0; extra == "duckdb"
|
|
55
|
+
Requires-Dist: numpy; extra == "duckdb"
|
|
56
56
|
Provides-Extra: ibis
|
|
57
|
-
Requires-Dist: qpd
|
|
58
|
-
Requires-Dist: fugue-sql-antlr
|
|
59
|
-
Requires-Dist: sqlglot
|
|
60
|
-
Requires-Dist: jinja2
|
|
61
|
-
Requires-Dist: ibis-framework
|
|
62
|
-
Requires-Dist: pandas
|
|
57
|
+
Requires-Dist: qpd>=0.4.4; extra == "ibis"
|
|
58
|
+
Requires-Dist: fugue-sql-antlr>=0.2.0; extra == "ibis"
|
|
59
|
+
Requires-Dist: sqlglot; extra == "ibis"
|
|
60
|
+
Requires-Dist: jinja2; extra == "ibis"
|
|
61
|
+
Requires-Dist: ibis-framework[pandas]; extra == "ibis"
|
|
62
|
+
Requires-Dist: pandas<2.2; extra == "ibis"
|
|
63
63
|
Provides-Extra: notebook
|
|
64
|
-
Requires-Dist: notebook
|
|
65
|
-
Requires-Dist: jupyterlab
|
|
66
|
-
Requires-Dist: ipython
|
|
64
|
+
Requires-Dist: notebook; extra == "notebook"
|
|
65
|
+
Requires-Dist: jupyterlab; extra == "notebook"
|
|
66
|
+
Requires-Dist: ipython>=7.10.0; extra == "notebook"
|
|
67
67
|
Provides-Extra: polars
|
|
68
|
-
Requires-Dist: polars
|
|
68
|
+
Requires-Dist: polars; extra == "polars"
|
|
69
69
|
Provides-Extra: ray
|
|
70
|
-
Requires-Dist: ray[data]
|
|
71
|
-
Requires-Dist: duckdb
|
|
72
|
-
Requires-Dist: pyarrow
|
|
73
|
-
Requires-Dist: pandas
|
|
70
|
+
Requires-Dist: ray[data]>=2.5.0; extra == "ray"
|
|
71
|
+
Requires-Dist: duckdb>=0.5.0; extra == "ray"
|
|
72
|
+
Requires-Dist: pyarrow>=7.0.0; extra == "ray"
|
|
73
|
+
Requires-Dist: pandas<2.2; extra == "ray"
|
|
74
74
|
Provides-Extra: spark
|
|
75
|
-
Requires-Dist: pyspark
|
|
75
|
+
Requires-Dist: pyspark>=3.1.1; extra == "spark"
|
|
76
76
|
Provides-Extra: sql
|
|
77
|
-
Requires-Dist: qpd
|
|
78
|
-
Requires-Dist: fugue-sql-antlr
|
|
79
|
-
Requires-Dist: sqlglot
|
|
80
|
-
Requires-Dist: jinja2
|
|
77
|
+
Requires-Dist: qpd>=0.4.4; extra == "sql"
|
|
78
|
+
Requires-Dist: fugue-sql-antlr>=0.2.0; extra == "sql"
|
|
79
|
+
Requires-Dist: sqlglot; extra == "sql"
|
|
80
|
+
Requires-Dist: jinja2; extra == "sql"
|
|
81
81
|
|
|
82
82
|
# Fugue
|
|
83
83
|
|
|
@@ -355,4 +355,3 @@ View some of our latest conferences presentations and content. For a more comple
|
|
|
355
355
|
* [Large Scale Data Validation with Spark and Dask (PyCon US)](https://www.youtube.com/watch?v=2AdvBgjO_3Q)
|
|
356
356
|
* [FugueSQL - The Enhanced SQL Interface for Pandas, Spark, and Dask DataFrames (PyData Global)](https://www.youtube.com/watch?v=OBpnGYjNBBI)
|
|
357
357
|
* [Distributed Hybrid Parameter Tuning](https://www.youtube.com/watch?v=_GBjqskD8Qk)
|
|
358
|
-
|
|
@@ -31,7 +31,7 @@ fugue/dataframe/arrow_dataframe.py,sha256=r5zcZBX_N6XO5dmixBkTCPgLcMmgDF022piZvr
|
|
|
31
31
|
fugue/dataframe/dataframe.py,sha256=xmyG85i14A6LDRkNmPt29oYq7PJsq668s1QvFHK8PV4,16964
|
|
32
32
|
fugue/dataframe/dataframe_iterable_dataframe.py,sha256=lx71KfaI4lsVKI-79buc-idaeT20JEMBOq21SQcAiY8,7259
|
|
33
33
|
fugue/dataframe/dataframes.py,sha256=tBSpHsENgbcdOJ0Jgst6PTKbjG7_uoFJch96oTlaQIs,4160
|
|
34
|
-
fugue/dataframe/function_wrapper.py,sha256=
|
|
34
|
+
fugue/dataframe/function_wrapper.py,sha256=1CjI4UXHffomylK0_u0CGL1dPv_sSXTN22S5grD10_w,17889
|
|
35
35
|
fugue/dataframe/iterable_dataframe.py,sha256=TcOoNKa4jNbHbvAZ0XAhtMmGcioygIHPxI9budDtenQ,4758
|
|
36
36
|
fugue/dataframe/pandas_dataframe.py,sha256=0L0wYCGhD2BpQbruoT07Ox9iQM5YLHLNrcgzudc-yKs,11633
|
|
37
37
|
fugue/dataframe/utils.py,sha256=bA_otOJt9oju1yq5gtn21L_GDT_pUgNc6luYuBIhbUQ,10488
|
|
@@ -89,10 +89,11 @@ fugue_contrib/viz/__init__.py,sha256=osgZx63Br-yMZImyEfYf9MVzJNM2Cqqke_-WsuDmG5M
|
|
|
89
89
|
fugue_contrib/viz/_ext.py,sha256=Lu_DlS5DcmrFz27fHcKTCkhKyknVWcfS5kzZVVuO9xM,1345
|
|
90
90
|
fugue_dask/__init__.py,sha256=2CcJ0AsN-k_f7dZ-yAyYpaICfUMPfH3l0FvUJSBzTr0,161
|
|
91
91
|
fugue_dask/_constants.py,sha256=35UmTVITk21GhRyRlbJOwPPdQsytM_p_2NytOXEay18,510
|
|
92
|
+
fugue_dask/_dask_sql_wrapper.py,sha256=lj38gJIOdoMV9W44gpwzLjUEtPVsQNKjRWuEkfI7-PM,2618
|
|
92
93
|
fugue_dask/_io.py,sha256=pl4F7mbVgP7Rwh1FFG7xfOz2TBZRUj1l3lLvDY4jOf4,6020
|
|
93
|
-
fugue_dask/_utils.py,sha256=
|
|
94
|
+
fugue_dask/_utils.py,sha256=dGUkhOoXQqgGQH_BY6aeYFo9UIWUAyo8YjwtdB7QD4s,8951
|
|
94
95
|
fugue_dask/dataframe.py,sha256=MuG9TqCND7qI66lPvxzuomfE7yA4sW7DjrvbyvE6XEU,13471
|
|
95
|
-
fugue_dask/execution_engine.py,sha256=
|
|
96
|
+
fugue_dask/execution_engine.py,sha256=Em9pN6cw5w5DGLcjV6oKQKQeLLblc9DZ0DkvxKVFxQQ,21167
|
|
96
97
|
fugue_dask/registry.py,sha256=jepWKH55VWNIWV3pOF5vpCl2OpO0rI1IULx5GM2Gk6w,2274
|
|
97
98
|
fugue_dask/tester.py,sha256=E7BZjgFpJgrHsLMKzvSO5im5OwocYcratjzulJSQZl0,718
|
|
98
99
|
fugue_duckdb/__init__.py,sha256=ZzhmAWbROR1YL9Kmlt7OlwkgPZzFhsSdwLV2pFmAqGI,268
|
|
@@ -107,7 +108,7 @@ fugue_ibis/__init__.py,sha256=z7TkK7M2_0p9XO6jQATNDgT0aHXn5k69Ttz2ga-eQG8,190
|
|
|
107
108
|
fugue_ibis/_compat.py,sha256=zKdTaTfuC02eUIzZPkcd7oObnVBi_X5mQjQf7SDme3Y,246
|
|
108
109
|
fugue_ibis/_utils.py,sha256=BUL5swA5FE4eQu0t5Z17hZVu9a2MFfxlFH6Ymy9xifg,6607
|
|
109
110
|
fugue_ibis/dataframe.py,sha256=k4Q6qBLBIADF5YhbvaDplXO7OkMZSHuf_Wg5o-AusEI,7796
|
|
110
|
-
fugue_ibis/execution_engine.py,sha256=
|
|
111
|
+
fugue_ibis/execution_engine.py,sha256=jRnp1m1wuTicS29A-WA043f8QwdoK8b9rwPXvTkm8r8,18751
|
|
111
112
|
fugue_notebook/__init__.py,sha256=9r_-2uxu1lBeZ8GgpYCKom_OZy2soIOYZajg7JDO-HY,4326
|
|
112
113
|
fugue_notebook/env.py,sha256=TYiTxYPFi-BVJJY49jDsvw9mddhK8WrifeRxBke30I8,4773
|
|
113
114
|
fugue_notebook/nbextension/README.md,sha256=QLnr957YeGfwzy2r4c4qbZPaXyCbyGrKPvcqSBQYSnU,123
|
|
@@ -148,9 +149,9 @@ fugue_test/dataframe_suite.py,sha256=7ym4sshDUly6004cq1UlppqDVtbwxD6CKxR4Lu70i0s
|
|
|
148
149
|
fugue_test/execution_suite.py,sha256=jcSSoKqTGbeWzTxkyYU-8i2zJAjzuXn7BqE8ul-JjIc,48646
|
|
149
150
|
fugue_test/fixtures.py,sha256=8Pev-mxRZOWwTFlsGjcSZ0iIs78zyWbp5tq4KG1wyvk,1432
|
|
150
151
|
fugue_version/__init__.py,sha256=gqT-BGoeEItda9fICQDvLbxEjWRIBhFJxPxxKvmHLUo,22
|
|
151
|
-
fugue-0.9.2.
|
|
152
|
-
fugue-0.9.2.
|
|
153
|
-
fugue-0.9.2.
|
|
154
|
-
fugue-0.9.2.
|
|
155
|
-
fugue-0.9.2.
|
|
156
|
-
fugue-0.9.2.
|
|
152
|
+
fugue-0.9.2.dev2.dist-info/LICENSE,sha256=z8d0m5b2O9McPEK1xHG_dWgUBT6EfBDz6wA0F7xSPTA,11358
|
|
153
|
+
fugue-0.9.2.dev2.dist-info/METADATA,sha256=eR5mL6Tf1RGa_-Do5Dmzy4ZkbcbKf-FzW4qA0cAW1Ec,18283
|
|
154
|
+
fugue-0.9.2.dev2.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
|
|
155
|
+
fugue-0.9.2.dev2.dist-info/entry_points.txt,sha256=2Vxp1qew_tswacA8m0RzIliLlFOQMlzezvSXPugM_KA,295
|
|
156
|
+
fugue-0.9.2.dev2.dist-info/top_level.txt,sha256=y1eCfzGdQ1_RkgcShcfbvXs-bopD3DwJcIOxP9EFXno,140
|
|
157
|
+
fugue-0.9.2.dev2.dist-info/RECORD,,
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
[fugue.plugins]
|
|
2
|
+
dask = fugue_dask.registry[dask]
|
|
3
|
+
duckdb = fugue_duckdb.registry[duckdb]
|
|
4
|
+
ibis = fugue_ibis[ibis]
|
|
5
|
+
polars = fugue_polars.registry[polars]
|
|
6
|
+
ray = fugue_ray.registry[ray]
|
|
7
|
+
spark = fugue_spark.registry[spark]
|
|
8
|
+
|
|
9
|
+
[pytest11]
|
|
10
|
+
fugue_test = fugue_test
|
|
11
|
+
fugue_test_fixtures = fugue_test.fixtures
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
from typing import Any, Optional
|
|
2
|
+
|
|
3
|
+
import dask.dataframe as dd
|
|
4
|
+
|
|
5
|
+
try:
|
|
6
|
+
from dask.dataframe.dask_expr.io.parquet import ReadParquet
|
|
7
|
+
|
|
8
|
+
HAS_DASK_EXPR = True # newer dask
|
|
9
|
+
except ImportError: # pragma: no cover
|
|
10
|
+
HAS_DASK_EXPR = False # older dask
|
|
11
|
+
|
|
12
|
+
if not HAS_DASK_EXPR: # pragma: no cover
|
|
13
|
+
try:
|
|
14
|
+
from dask_sql import Context as ContextWrapper # pylint: disable-all
|
|
15
|
+
except ImportError: # pragma: no cover
|
|
16
|
+
raise ImportError(
|
|
17
|
+
"dask-sql is not installed. Please install it with `pip install dask-sql`"
|
|
18
|
+
)
|
|
19
|
+
else:
|
|
20
|
+
from triad.utils.assertion import assert_or_throw
|
|
21
|
+
|
|
22
|
+
try:
|
|
23
|
+
from dask_sql import Context
|
|
24
|
+
from dask_sql.datacontainer import Statistics
|
|
25
|
+
from dask_sql.input_utils import InputUtil
|
|
26
|
+
except ImportError: # pragma: no cover
|
|
27
|
+
raise ImportError(
|
|
28
|
+
"dask-sql is not installed. Please install it with `pip install dask-sql`"
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
class ContextWrapper(Context): # type: ignore
|
|
32
|
+
def create_table(
|
|
33
|
+
self,
|
|
34
|
+
table_name: str,
|
|
35
|
+
input_table: dd.DataFrame,
|
|
36
|
+
format: Optional[str] = None, # noqa
|
|
37
|
+
persist: bool = False,
|
|
38
|
+
schema_name: Optional[str] = None,
|
|
39
|
+
statistics: Optional[Statistics] = None,
|
|
40
|
+
gpu: bool = False,
|
|
41
|
+
**kwargs: Any,
|
|
42
|
+
) -> None: # pragma: no cover
|
|
43
|
+
assert_or_throw(
|
|
44
|
+
isinstance(input_table, dd.DataFrame),
|
|
45
|
+
lambda: ValueError(
|
|
46
|
+
f"input_table must be a dask dataframe, but got {type(input_table)}"
|
|
47
|
+
),
|
|
48
|
+
)
|
|
49
|
+
assert_or_throw(
|
|
50
|
+
dd._dask_expr_enabled(), lambda: ValueError("Dask expr must be enabled")
|
|
51
|
+
)
|
|
52
|
+
schema_name = schema_name or self.schema_name
|
|
53
|
+
|
|
54
|
+
dc = InputUtil.to_dc(
|
|
55
|
+
input_table,
|
|
56
|
+
table_name=table_name,
|
|
57
|
+
format=format,
|
|
58
|
+
persist=persist,
|
|
59
|
+
gpu=gpu,
|
|
60
|
+
**kwargs,
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
dask_filepath = None
|
|
64
|
+
operations = input_table.find_operations(ReadParquet)
|
|
65
|
+
for op in operations:
|
|
66
|
+
dask_filepath = op._args[0]
|
|
67
|
+
|
|
68
|
+
dc.filepath = dask_filepath
|
|
69
|
+
self.schema[schema_name].filepaths[table_name.lower()] = dask_filepath
|
|
70
|
+
|
|
71
|
+
if not statistics:
|
|
72
|
+
statistics = Statistics(float("nan"))
|
|
73
|
+
dc.statistics = statistics
|
|
74
|
+
|
|
75
|
+
self.schema[schema_name].tables[table_name.lower()] = dc
|
|
76
|
+
self.schema[schema_name].statistics[table_name.lower()] = statistics
|
fugue_dask/_utils.py
CHANGED
|
@@ -5,7 +5,7 @@ import dask.dataframe as dd
|
|
|
5
5
|
import numpy as np
|
|
6
6
|
import pandas as pd
|
|
7
7
|
import pyarrow as pa
|
|
8
|
-
from dask.dataframe
|
|
8
|
+
from dask.dataframe import DataFrame
|
|
9
9
|
from dask.delayed import delayed
|
|
10
10
|
from dask.distributed import Client, get_client
|
|
11
11
|
from triad.utils.pandas_like import PD_UTILS, PandasLikeUtils
|
fugue_dask/execution_engine.py
CHANGED
|
@@ -9,9 +9,10 @@ from triad.collections import Schema
|
|
|
9
9
|
from triad.collections.dict import IndexedOrderedDict, ParamDict
|
|
10
10
|
from triad.utils.assertion import assert_or_throw
|
|
11
11
|
from triad.utils.hash import to_uuid
|
|
12
|
+
from triad.utils.io import makedirs
|
|
12
13
|
from triad.utils.pandas_like import PandasUtils
|
|
13
14
|
from triad.utils.threading import RunOnce
|
|
14
|
-
|
|
15
|
+
|
|
15
16
|
from fugue import StructuredRawSQL
|
|
16
17
|
from fugue.collections.partition import (
|
|
17
18
|
PartitionCursor,
|
|
@@ -61,14 +62,9 @@ class DaskSQLEngine(SQLEngine):
|
|
|
61
62
|
return True
|
|
62
63
|
|
|
63
64
|
def select(self, dfs: DataFrames, statement: StructuredRawSQL) -> DataFrame:
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
raise ImportError(
|
|
68
|
-
"dask-sql is not installed. "
|
|
69
|
-
"Please install it with `pip install dask-sql`"
|
|
70
|
-
)
|
|
71
|
-
ctx = Context()
|
|
65
|
+
from ._dask_sql_wrapper import ContextWrapper
|
|
66
|
+
|
|
67
|
+
ctx = ContextWrapper()
|
|
72
68
|
_dfs: Dict[str, dd.DataFrame] = {k: self._to_safe_df(v) for k, v in dfs.items()}
|
|
73
69
|
sql = statement.construct(dialect=self.dialect, log=self.log)
|
|
74
70
|
res = ctx.sql(
|
fugue_ibis/execution_engine.py
CHANGED
|
@@ -92,7 +92,8 @@ class IbisSQLEngine(SQLEngine):
|
|
|
92
92
|
_df2 = self.to_df(df2)
|
|
93
93
|
key_schema, end_schema = get_join_schemas(_df1, _df2, how=how, on=on)
|
|
94
94
|
on_fields = [_df1.native[k] == _df2.native[k] for k in key_schema]
|
|
95
|
-
|
|
95
|
+
version = int(ibis.__version__.split(".")[0])
|
|
96
|
+
if version < 6: # pragma: no cover
|
|
96
97
|
suffixes: Dict[str, Any] = dict(suffixes=("", _JOIN_RIGHT_SUFFIX))
|
|
97
98
|
else:
|
|
98
99
|
# breaking change in ibis 6.0
|
|
@@ -113,7 +114,7 @@ class IbisSQLEngine(SQLEngine):
|
|
|
113
114
|
cols.append(
|
|
114
115
|
ibis.coalesce(tb[k], tb[k + _JOIN_RIGHT_SUFFIX]).name(k)
|
|
115
116
|
)
|
|
116
|
-
tb = tb
|
|
117
|
+
tb = tb.select(*cols)
|
|
117
118
|
elif how.lower() in ["semi", "left_semi"]:
|
|
118
119
|
tb = _df1.native.semi_join(_df2.native, on_fields, **suffixes)
|
|
119
120
|
elif how.lower() in ["anti", "left_anti"]:
|
|
@@ -153,7 +154,7 @@ class IbisSQLEngine(SQLEngine):
|
|
|
153
154
|
self,
|
|
154
155
|
df: DataFrame,
|
|
155
156
|
how: str = "any",
|
|
156
|
-
thresh: int = None,
|
|
157
|
+
thresh: Optional[int] = None,
|
|
157
158
|
subset: Optional[List[str]] = None,
|
|
158
159
|
) -> DataFrame:
|
|
159
160
|
schema = df.schema
|
|
@@ -161,7 +162,7 @@ class IbisSQLEngine(SQLEngine):
|
|
|
161
162
|
schema = schema.extract(subset)
|
|
162
163
|
_df = self.to_df(df)
|
|
163
164
|
if thresh is None:
|
|
164
|
-
tb = _df.native.
|
|
165
|
+
tb = _df.native.drop_null(subset, how=how)
|
|
165
166
|
return self.to_df(tb, df.schema)
|
|
166
167
|
assert_or_throw(
|
|
167
168
|
how == "any", ValueError("when thresh is set, how must be 'any'")
|
|
@@ -204,7 +205,7 @@ class IbisSQLEngine(SQLEngine):
|
|
|
204
205
|
ibis.coalesce(tb[f], ibis.literal(vd[f])).name(f) if f in names else tb[f]
|
|
205
206
|
for f in df.columns
|
|
206
207
|
]
|
|
207
|
-
return self.to_df(tb
|
|
208
|
+
return self.to_df(tb.select(cols), schema=df.schema)
|
|
208
209
|
|
|
209
210
|
def take(
|
|
210
211
|
self,
|
|
@@ -241,7 +242,7 @@ class IbisSQLEngine(SQLEngine):
|
|
|
241
242
|
f") WHERE __fugue_take_param<={n}"
|
|
242
243
|
)
|
|
243
244
|
tb = self.query_to_table(sql, {tbn: idf})
|
|
244
|
-
return self.to_df(tb
|
|
245
|
+
return self.to_df(tb.select(*df.columns), schema=df.schema)
|
|
245
246
|
|
|
246
247
|
sorts: List[str] = []
|
|
247
248
|
for k, v in _presort.items():
|
|
@@ -1,12 +0,0 @@
|
|
|
1
|
-
[fugue.plugins]
|
|
2
|
-
dask = fugue_dask.registry [dask]
|
|
3
|
-
duckdb = fugue_duckdb.registry [duckdb]
|
|
4
|
-
ibis = fugue_ibis [ibis]
|
|
5
|
-
polars = fugue_polars.registry [polars]
|
|
6
|
-
ray = fugue_ray.registry [ray]
|
|
7
|
-
spark = fugue_spark.registry [spark]
|
|
8
|
-
|
|
9
|
-
[pytest11]
|
|
10
|
-
fugue_test = fugue_test
|
|
11
|
-
fugue_test_fixtures = fugue_test.fixtures
|
|
12
|
-
|
|
File without changes
|
|
File without changes
|