fugue 0.9.1__py3-none-any.whl → 0.9.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fugue/_utils/io.py +14 -2
- fugue/dataframe/function_wrapper.py +99 -2
- fugue/extensions/transformer/convert.py +4 -4
- {fugue-0.9.1.dist-info → fugue-0.9.2.dist-info}/METADATA +70 -58
- {fugue-0.9.1.dist-info → fugue-0.9.2.dist-info}/RECORD +21 -20
- {fugue-0.9.1.dist-info → fugue-0.9.2.dist-info}/WHEEL +1 -1
- fugue-0.9.2.dist-info/entry_points.txt +11 -0
- fugue_dask/_dask_sql_wrapper.py +76 -0
- fugue_dask/_utils.py +9 -5
- fugue_dask/dataframe.py +1 -1
- fugue_dask/execution_engine.py +8 -11
- fugue_duckdb/dataframe.py +5 -5
- fugue_duckdb/execution_engine.py +1 -1
- fugue_ibis/execution_engine.py +7 -6
- fugue_ray/_utils/io.py +23 -16
- fugue_spark/_utils/convert.py +18 -12
- fugue_test/builtin_suite.py +38 -1
- fugue_test/execution_suite.py +2 -0
- fugue_version/__init__.py +1 -1
- fugue-0.9.1.dist-info/entry_points.txt +0 -12
- {fugue-0.9.1.dist-info → fugue-0.9.2.dist-info/licenses}/LICENSE +0 -0
- {fugue-0.9.1.dist-info → fugue-0.9.2.dist-info}/top_level.txt +0 -0
fugue/_utils/io.py
CHANGED
|
@@ -20,6 +20,10 @@ class FileParser(object):
|
|
|
20
20
|
self._has_glob = "*" in path or "?" in path
|
|
21
21
|
self._raw_path = path
|
|
22
22
|
self._fs, self._fs_path = url_to_fs(path)
|
|
23
|
+
if not self._has_glob and self._fs.isdir(self._fs_path):
|
|
24
|
+
self._is_dir = True
|
|
25
|
+
else:
|
|
26
|
+
self._is_dir = False
|
|
23
27
|
if not self.is_local:
|
|
24
28
|
self._path = self._fs.unstrip_protocol(self._fs_path)
|
|
25
29
|
else:
|
|
@@ -43,11 +47,15 @@ class FileParser(object):
|
|
|
43
47
|
return self
|
|
44
48
|
|
|
45
49
|
@property
|
|
46
|
-
def
|
|
50
|
+
def is_dir(self) -> bool:
|
|
51
|
+
return self._is_dir
|
|
52
|
+
|
|
53
|
+
@property
|
|
54
|
+
def has_glob(self) -> bool:
|
|
47
55
|
return self._has_glob
|
|
48
56
|
|
|
49
57
|
@property
|
|
50
|
-
def is_local(self):
|
|
58
|
+
def is_local(self) -> bool:
|
|
51
59
|
return isinstance(self._fs, LocalFileSystem)
|
|
52
60
|
|
|
53
61
|
def join(self, path: str, format_hint: Optional[str] = None) -> "FileParser":
|
|
@@ -65,6 +73,10 @@ class FileParser(object):
|
|
|
65
73
|
def path(self) -> str:
|
|
66
74
|
return self._path
|
|
67
75
|
|
|
76
|
+
def as_dir_path(self) -> str:
|
|
77
|
+
assert_or_throw(self.is_dir, f"{self.raw_path} is not a directory")
|
|
78
|
+
return self.path + self._fs.sep
|
|
79
|
+
|
|
68
80
|
@property
|
|
69
81
|
def raw_path(self) -> str:
|
|
70
82
|
return self._raw_path
|
|
@@ -80,6 +80,7 @@ class DataFrameFunctionWrapper(FunctionWrapper):
|
|
|
80
80
|
p.update(kwargs)
|
|
81
81
|
has_kw = False
|
|
82
82
|
rargs: Dict[str, Any] = {}
|
|
83
|
+
row_param_info: Any = None
|
|
83
84
|
for k, v in self._params.items():
|
|
84
85
|
if isinstance(v, (PositionalParam, KeywordParam)):
|
|
85
86
|
if isinstance(v, KeywordParam):
|
|
@@ -90,7 +91,16 @@ class DataFrameFunctionWrapper(FunctionWrapper):
|
|
|
90
91
|
isinstance(p[k], DataFrame),
|
|
91
92
|
lambda: TypeError(f"{p[k]} is not a DataFrame"),
|
|
92
93
|
)
|
|
93
|
-
|
|
94
|
+
if v.is_per_row: # pragma: no cover
|
|
95
|
+
# TODO: this branch is used only if row annotations
|
|
96
|
+
# are allowed as input
|
|
97
|
+
assert_or_throw(
|
|
98
|
+
row_param_info is None,
|
|
99
|
+
lambda: ValueError("only one row parameter is allowed"),
|
|
100
|
+
)
|
|
101
|
+
row_param_info = (k, v, p[k])
|
|
102
|
+
else:
|
|
103
|
+
rargs[k] = v.to_input_data(p[k], ctx=ctx)
|
|
94
104
|
else:
|
|
95
105
|
rargs[k] = p[k] # TODO: should we do auto type conversion?
|
|
96
106
|
del p[k]
|
|
@@ -100,12 +110,40 @@ class DataFrameFunctionWrapper(FunctionWrapper):
|
|
|
100
110
|
rargs.update(p)
|
|
101
111
|
elif not ignore_unknown and len(p) > 0:
|
|
102
112
|
raise ValueError(f"{p} are not acceptable parameters")
|
|
113
|
+
if row_param_info is None:
|
|
114
|
+
return self._run_func(rargs, output, output_schema, ctx, raw=False)
|
|
115
|
+
else: # pragma: no cover
|
|
116
|
+
# input contains row parameter
|
|
117
|
+
# TODO: this branch is used only if row annotations are allowed as input
|
|
118
|
+
|
|
119
|
+
def _dfs() -> Iterable[Any]:
|
|
120
|
+
k, v, df = row_param_info
|
|
121
|
+
for row in v.to_input_rows(df, ctx):
|
|
122
|
+
rargs[k] = None
|
|
123
|
+
_rargs = rargs.copy()
|
|
124
|
+
_rargs[k] = row
|
|
125
|
+
yield self._run_func(_rargs, output, output_schema, ctx, raw=True)
|
|
126
|
+
|
|
127
|
+
if not output:
|
|
128
|
+
sum(1 for _ in _dfs())
|
|
129
|
+
return
|
|
130
|
+
else:
|
|
131
|
+
return self._rt.iterable_to_output_df(_dfs(), output_schema, ctx)
|
|
132
|
+
|
|
133
|
+
def _run_func(
|
|
134
|
+
self,
|
|
135
|
+
rargs: Dict[str, Any],
|
|
136
|
+
output: bool,
|
|
137
|
+
output_schema: Any,
|
|
138
|
+
ctx: Any,
|
|
139
|
+
raw: bool,
|
|
140
|
+
) -> Any:
|
|
103
141
|
rt = self._func(**rargs)
|
|
104
142
|
if not output:
|
|
105
143
|
if isinstance(self._rt, _DataFrameParamBase):
|
|
106
144
|
self._rt.count(rt)
|
|
107
145
|
return
|
|
108
|
-
if isinstance(self._rt, _DataFrameParamBase):
|
|
146
|
+
if not raw and isinstance(self._rt, _DataFrameParamBase):
|
|
109
147
|
return self._rt.to_output_df(rt, output_schema, ctx=ctx)
|
|
110
148
|
return rt
|
|
111
149
|
|
|
@@ -120,6 +158,7 @@ fugue_annotated_param = DataFrameFunctionWrapper.annotated_param
|
|
|
120
158
|
annotation == Callable
|
|
121
159
|
or annotation == callable # pylint: disable=comparison-with-callable
|
|
122
160
|
or str(annotation).startswith("typing.Callable")
|
|
161
|
+
or str(annotation).startswith("collections.abc.Callable")
|
|
123
162
|
),
|
|
124
163
|
)
|
|
125
164
|
class _CallableParam(AnnotatedParam):
|
|
@@ -134,6 +173,9 @@ class _CallableParam(AnnotatedParam):
|
|
|
134
173
|
or annotation == Optional[callable]
|
|
135
174
|
or str(annotation).startswith("typing.Union[typing.Callable") # 3.8-
|
|
136
175
|
or str(annotation).startswith("typing.Optional[typing.Callable") # 3.9+
|
|
176
|
+
or str(annotation).startswith(
|
|
177
|
+
"typing.Optional[collections.abc.Callable]"
|
|
178
|
+
) # 3.9+
|
|
137
179
|
),
|
|
138
180
|
)
|
|
139
181
|
class _OptionalCallableParam(AnnotatedParam):
|
|
@@ -145,14 +187,30 @@ class _DataFrameParamBase(AnnotatedParam):
|
|
|
145
187
|
super().__init__(param)
|
|
146
188
|
assert_or_throw(self.required, lambda: TypeError(f"{self} must be required"))
|
|
147
189
|
|
|
190
|
+
@property
|
|
191
|
+
def is_per_row(self) -> bool:
|
|
192
|
+
return False
|
|
193
|
+
|
|
148
194
|
def to_input_data(self, df: DataFrame, ctx: Any) -> Any: # pragma: no cover
|
|
149
195
|
raise NotImplementedError
|
|
150
196
|
|
|
197
|
+
def to_input_rows(
|
|
198
|
+
self,
|
|
199
|
+
df: DataFrame,
|
|
200
|
+
ctx: Any,
|
|
201
|
+
) -> Iterable[Any]:
|
|
202
|
+
raise NotImplementedError # pragma: no cover
|
|
203
|
+
|
|
151
204
|
def to_output_df(
|
|
152
205
|
self, df: Any, schema: Any, ctx: Any
|
|
153
206
|
) -> DataFrame: # pragma: no cover
|
|
154
207
|
raise NotImplementedError
|
|
155
208
|
|
|
209
|
+
def iterable_to_output_df(
|
|
210
|
+
self, dfs: Iterable[Any], schema: Any, ctx: Any
|
|
211
|
+
) -> DataFrame: # pragma: no cover
|
|
212
|
+
raise NotImplementedError
|
|
213
|
+
|
|
156
214
|
def count(self, df: Any) -> int: # pragma: no cover
|
|
157
215
|
raise NotImplementedError
|
|
158
216
|
|
|
@@ -182,6 +240,36 @@ class DataFrameParam(_DataFrameParamBase):
|
|
|
182
240
|
return sum(1 for _ in df.as_array_iterable())
|
|
183
241
|
|
|
184
242
|
|
|
243
|
+
@fugue_annotated_param(DataFrame, "r", child_can_reuse_code=True)
|
|
244
|
+
class RowParam(_DataFrameParamBase): # pragma: no cover
|
|
245
|
+
# TODO: this class is used only if row annotations are allowed as input
|
|
246
|
+
@property
|
|
247
|
+
def is_per_row(self) -> bool:
|
|
248
|
+
return True
|
|
249
|
+
|
|
250
|
+
def count(self, df: Any) -> int:
|
|
251
|
+
return 1
|
|
252
|
+
|
|
253
|
+
|
|
254
|
+
@fugue_annotated_param(Dict[str, Any])
|
|
255
|
+
class DictParam(RowParam): # pragma: no cover
|
|
256
|
+
# TODO: this class is used only if row annotations are allowed as input
|
|
257
|
+
def to_input_rows(self, df: DataFrame, ctx: Any) -> Iterable[Any]:
|
|
258
|
+
yield from df.as_dict_iterable()
|
|
259
|
+
|
|
260
|
+
def to_output_df(self, output: Dict[str, Any], schema: Any, ctx: Any) -> DataFrame:
|
|
261
|
+
return ArrayDataFrame([list(output.values())], schema)
|
|
262
|
+
|
|
263
|
+
def iterable_to_output_df(
|
|
264
|
+
self, dfs: Iterable[Dict[str, Any]], schema: Any, ctx: Any
|
|
265
|
+
) -> DataFrame: # pragma: no cover
|
|
266
|
+
params: Dict[str, Any] = {}
|
|
267
|
+
if schema is not None:
|
|
268
|
+
params["schema"] = Schema(schema).pa_schema
|
|
269
|
+
adf = pa.Table.from_pylist(list(dfs), **params)
|
|
270
|
+
return ArrowDataFrame(adf)
|
|
271
|
+
|
|
272
|
+
|
|
185
273
|
@fugue_annotated_param(AnyDataFrame)
|
|
186
274
|
class _AnyDataFrameParam(DataFrameParam):
|
|
187
275
|
def to_output_df(self, output: AnyDataFrame, schema: Any, ctx: Any) -> DataFrame:
|
|
@@ -207,6 +295,15 @@ class LocalDataFrameParam(DataFrameParam):
|
|
|
207
295
|
)
|
|
208
296
|
return output
|
|
209
297
|
|
|
298
|
+
def iterable_to_output_df(
|
|
299
|
+
self, dfs: Iterable[Any], schema: Any, ctx: Any
|
|
300
|
+
) -> DataFrame: # pragma: no cover
|
|
301
|
+
def _dfs() -> Iterable[DataFrame]:
|
|
302
|
+
for df in dfs:
|
|
303
|
+
yield self.to_output_df(df, schema, ctx)
|
|
304
|
+
|
|
305
|
+
return LocalDataFrameIterableDataFrame(_dfs(), schema=schema)
|
|
306
|
+
|
|
210
307
|
def count(self, df: LocalDataFrame) -> int:
|
|
211
308
|
if df.is_bounded:
|
|
212
309
|
return df.count()
|
|
@@ -375,7 +375,7 @@ class _FuncAsTransformer(Transformer):
|
|
|
375
375
|
assert_arg_not_none(schema, "schema")
|
|
376
376
|
tr = _FuncAsTransformer()
|
|
377
377
|
tr._wrapper = DataFrameFunctionWrapper( # type: ignore
|
|
378
|
-
func, "^[lspq][fF]?x*z?$", "^[
|
|
378
|
+
func, "^[lspq][fF]?x*z?$", "^[lspqr]$"
|
|
379
379
|
)
|
|
380
380
|
tr._output_schema_arg = schema # type: ignore
|
|
381
381
|
tr._validation_rules = validation_rules # type: ignore
|
|
@@ -410,7 +410,7 @@ class _FuncAsOutputTransformer(_FuncAsTransformer):
|
|
|
410
410
|
validation_rules.update(parse_validation_rules_from_comment(func))
|
|
411
411
|
tr = _FuncAsOutputTransformer()
|
|
412
412
|
tr._wrapper = DataFrameFunctionWrapper( # type: ignore
|
|
413
|
-
func, "^[lspq][fF]?x*z?$", "^[
|
|
413
|
+
func, "^[lspq][fF]?x*z?$", "^[lspnqr]$"
|
|
414
414
|
)
|
|
415
415
|
tr._output_schema_arg = None # type: ignore
|
|
416
416
|
tr._validation_rules = validation_rules # type: ignore
|
|
@@ -503,7 +503,7 @@ class _FuncAsCoTransformer(CoTransformer):
|
|
|
503
503
|
assert_arg_not_none(schema, "schema")
|
|
504
504
|
tr = _FuncAsCoTransformer()
|
|
505
505
|
tr._wrapper = DataFrameFunctionWrapper( # type: ignore
|
|
506
|
-
func, "^(c|[lspq]+)[fF]?x*z?$", "^[
|
|
506
|
+
func, "^(c|[lspq]+)[fF]?x*z?$", "^[lspqr]$"
|
|
507
507
|
)
|
|
508
508
|
tr._dfs_input = tr._wrapper.input_code[0] == "c" # type: ignore
|
|
509
509
|
tr._output_schema_arg = schema # type: ignore
|
|
@@ -562,7 +562,7 @@ class _FuncAsOutputCoTransformer(_FuncAsCoTransformer):
|
|
|
562
562
|
|
|
563
563
|
tr = _FuncAsOutputCoTransformer()
|
|
564
564
|
tr._wrapper = DataFrameFunctionWrapper( # type: ignore
|
|
565
|
-
func, "^(c|[lspq]+)[fF]?x*z?$", "^[
|
|
565
|
+
func, "^(c|[lspq]+)[fF]?x*z?$", "^[lspnqr]$"
|
|
566
566
|
)
|
|
567
567
|
tr._dfs_input = tr._wrapper.input_code[0] == "c" # type: ignore
|
|
568
568
|
tr._output_schema_arg = None # type: ignore
|
|
@@ -1,13 +1,12 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: fugue
|
|
3
|
-
Version: 0.9.
|
|
3
|
+
Version: 0.9.2
|
|
4
4
|
Summary: An abstraction layer for distributed computation
|
|
5
5
|
Home-page: http://github.com/fugue-project/fugue
|
|
6
6
|
Author: The Fugue Development Team
|
|
7
7
|
Author-email: hello@fugue.ai
|
|
8
8
|
License: Apache-2.0
|
|
9
9
|
Keywords: distributed spark dask ray sql dsl domain specific language
|
|
10
|
-
Platform: UNKNOWN
|
|
11
10
|
Classifier: Development Status :: 5 - Production/Stable
|
|
12
11
|
Classifier: Intended Audience :: Developers
|
|
13
12
|
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
@@ -17,67 +16,81 @@ Classifier: Programming Language :: Python :: 3.8
|
|
|
17
16
|
Classifier: Programming Language :: Python :: 3.9
|
|
18
17
|
Classifier: Programming Language :: Python :: 3.10
|
|
19
18
|
Classifier: Programming Language :: Python :: 3.11
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
20
20
|
Classifier: Programming Language :: Python :: 3 :: Only
|
|
21
21
|
Requires-Python: >=3.8
|
|
22
22
|
Description-Content-Type: text/markdown
|
|
23
|
-
|
|
24
|
-
Requires-Dist:
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
Requires-Dist:
|
|
28
|
-
Requires-Dist:
|
|
29
|
-
Requires-Dist:
|
|
30
|
-
Requires-Dist:
|
|
31
|
-
|
|
32
|
-
Requires-Dist:
|
|
33
|
-
|
|
34
|
-
Requires-Dist:
|
|
35
|
-
Requires-Dist: jupyterlab ; extra == 'all'
|
|
36
|
-
Requires-Dist: ipython >=7.10.0 ; extra == 'all'
|
|
37
|
-
Requires-Dist: duckdb >=0.5.0 ; extra == 'all'
|
|
38
|
-
Requires-Dist: pyarrow >=6.0.1 ; extra == 'all'
|
|
39
|
-
Requires-Dist: pandas <2.2,>=2.0.2 ; extra == 'all'
|
|
40
|
-
Requires-Dist: ibis-framework ; extra == 'all'
|
|
41
|
-
Requires-Dist: polars ; extra == 'all'
|
|
42
|
-
Provides-Extra: cpp_sql_parser
|
|
43
|
-
Requires-Dist: fugue-sql-antlr[cpp] >=0.2.0 ; extra == 'cpp_sql_parser'
|
|
23
|
+
License-File: LICENSE
|
|
24
|
+
Requires-Dist: triad>=1.0.0
|
|
25
|
+
Requires-Dist: adagio>=0.2.6
|
|
26
|
+
Provides-Extra: sql
|
|
27
|
+
Requires-Dist: qpd>=0.4.4; extra == "sql"
|
|
28
|
+
Requires-Dist: fugue-sql-antlr>=0.2.0; extra == "sql"
|
|
29
|
+
Requires-Dist: sqlglot; extra == "sql"
|
|
30
|
+
Requires-Dist: jinja2; extra == "sql"
|
|
31
|
+
Provides-Extra: cpp-sql-parser
|
|
32
|
+
Requires-Dist: fugue-sql-antlr[cpp]>=0.2.0; extra == "cpp-sql-parser"
|
|
33
|
+
Provides-Extra: spark
|
|
34
|
+
Requires-Dist: pyspark>=3.1.1; extra == "spark"
|
|
44
35
|
Provides-Extra: dask
|
|
45
|
-
Requires-Dist: dask[dataframe,distributed]
|
|
46
|
-
Requires-Dist: pyarrow
|
|
47
|
-
Requires-Dist: pandas
|
|
48
|
-
|
|
36
|
+
Requires-Dist: dask[dataframe,distributed]>=2024.4.0; extra == "dask"
|
|
37
|
+
Requires-Dist: pyarrow>=7.0.0; extra == "dask"
|
|
38
|
+
Requires-Dist: pandas>=2.0.2; extra == "dask"
|
|
39
|
+
Provides-Extra: ray
|
|
40
|
+
Requires-Dist: ray[data]>=2.30.0; extra == "ray"
|
|
41
|
+
Requires-Dist: duckdb>=0.5.0; extra == "ray"
|
|
42
|
+
Requires-Dist: pyarrow>=7.0.0; extra == "ray"
|
|
43
|
+
Requires-Dist: pandas<2.2; extra == "ray"
|
|
49
44
|
Provides-Extra: duckdb
|
|
50
|
-
Requires-Dist: qpd
|
|
51
|
-
Requires-Dist: fugue-sql-antlr
|
|
52
|
-
Requires-Dist: sqlglot
|
|
53
|
-
Requires-Dist: jinja2
|
|
54
|
-
Requires-Dist: duckdb
|
|
55
|
-
Requires-Dist: numpy
|
|
45
|
+
Requires-Dist: qpd>=0.4.4; extra == "duckdb"
|
|
46
|
+
Requires-Dist: fugue-sql-antlr>=0.2.0; extra == "duckdb"
|
|
47
|
+
Requires-Dist: sqlglot; extra == "duckdb"
|
|
48
|
+
Requires-Dist: jinja2; extra == "duckdb"
|
|
49
|
+
Requires-Dist: duckdb>=0.5.0; extra == "duckdb"
|
|
50
|
+
Requires-Dist: numpy; extra == "duckdb"
|
|
51
|
+
Provides-Extra: polars
|
|
52
|
+
Requires-Dist: polars; extra == "polars"
|
|
56
53
|
Provides-Extra: ibis
|
|
57
|
-
Requires-Dist: qpd
|
|
58
|
-
Requires-Dist: fugue-sql-antlr
|
|
59
|
-
Requires-Dist: sqlglot
|
|
60
|
-
Requires-Dist: jinja2
|
|
61
|
-
Requires-Dist: ibis-framework
|
|
62
|
-
Requires-Dist: pandas
|
|
54
|
+
Requires-Dist: qpd>=0.4.4; extra == "ibis"
|
|
55
|
+
Requires-Dist: fugue-sql-antlr>=0.2.0; extra == "ibis"
|
|
56
|
+
Requires-Dist: sqlglot; extra == "ibis"
|
|
57
|
+
Requires-Dist: jinja2; extra == "ibis"
|
|
58
|
+
Requires-Dist: ibis-framework[pandas]; extra == "ibis"
|
|
59
|
+
Requires-Dist: pandas<2.2; extra == "ibis"
|
|
63
60
|
Provides-Extra: notebook
|
|
64
|
-
Requires-Dist: notebook
|
|
65
|
-
Requires-Dist: jupyterlab
|
|
66
|
-
Requires-Dist: ipython
|
|
67
|
-
Provides-Extra:
|
|
68
|
-
Requires-Dist:
|
|
69
|
-
|
|
70
|
-
Requires-Dist:
|
|
71
|
-
Requires-Dist:
|
|
72
|
-
Requires-Dist:
|
|
73
|
-
Requires-Dist:
|
|
74
|
-
|
|
75
|
-
Requires-Dist:
|
|
76
|
-
|
|
77
|
-
Requires-Dist:
|
|
78
|
-
Requires-Dist:
|
|
79
|
-
Requires-Dist:
|
|
80
|
-
Requires-Dist:
|
|
61
|
+
Requires-Dist: notebook; extra == "notebook"
|
|
62
|
+
Requires-Dist: jupyterlab; extra == "notebook"
|
|
63
|
+
Requires-Dist: ipython>=7.10.0; extra == "notebook"
|
|
64
|
+
Provides-Extra: all
|
|
65
|
+
Requires-Dist: qpd>=0.4.4; extra == "all"
|
|
66
|
+
Requires-Dist: fugue-sql-antlr>=0.2.0; extra == "all"
|
|
67
|
+
Requires-Dist: sqlglot; extra == "all"
|
|
68
|
+
Requires-Dist: jinja2; extra == "all"
|
|
69
|
+
Requires-Dist: pyspark>=3.1.1; extra == "all"
|
|
70
|
+
Requires-Dist: dask[dataframe,distributed]>=2024.4.0; extra == "all"
|
|
71
|
+
Requires-Dist: dask-sql; extra == "all"
|
|
72
|
+
Requires-Dist: ray[data]>=2.30.0; extra == "all"
|
|
73
|
+
Requires-Dist: notebook; extra == "all"
|
|
74
|
+
Requires-Dist: jupyterlab; extra == "all"
|
|
75
|
+
Requires-Dist: ipython>=7.10.0; extra == "all"
|
|
76
|
+
Requires-Dist: duckdb>=0.5.0; extra == "all"
|
|
77
|
+
Requires-Dist: pyarrow>=6.0.1; extra == "all"
|
|
78
|
+
Requires-Dist: pandas<2.2,>=2.0.2; extra == "all"
|
|
79
|
+
Requires-Dist: ibis-framework[duckdb,pandas]; extra == "all"
|
|
80
|
+
Requires-Dist: polars; extra == "all"
|
|
81
|
+
Dynamic: author
|
|
82
|
+
Dynamic: author-email
|
|
83
|
+
Dynamic: classifier
|
|
84
|
+
Dynamic: description
|
|
85
|
+
Dynamic: description-content-type
|
|
86
|
+
Dynamic: home-page
|
|
87
|
+
Dynamic: keywords
|
|
88
|
+
Dynamic: license
|
|
89
|
+
Dynamic: license-file
|
|
90
|
+
Dynamic: provides-extra
|
|
91
|
+
Dynamic: requires-dist
|
|
92
|
+
Dynamic: requires-python
|
|
93
|
+
Dynamic: summary
|
|
81
94
|
|
|
82
95
|
# Fugue
|
|
83
96
|
|
|
@@ -355,4 +368,3 @@ View some of our latest conferences presentations and content. For a more comple
|
|
|
355
368
|
* [Large Scale Data Validation with Spark and Dask (PyCon US)](https://www.youtube.com/watch?v=2AdvBgjO_3Q)
|
|
356
369
|
* [FugueSQL - The Enhanced SQL Interface for Pandas, Spark, and Dask DataFrames (PyData Global)](https://www.youtube.com/watch?v=OBpnGYjNBBI)
|
|
357
370
|
* [Distributed Hybrid Parameter Tuning](https://www.youtube.com/watch?v=_GBjqskD8Qk)
|
|
358
|
-
|
|
@@ -10,7 +10,7 @@ fugue/_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
|
10
10
|
fugue/_utils/display.py,sha256=JV8oDA7efHm1wceZulCBOY5dMvjbWHvIm6ASisKfoWY,3164
|
|
11
11
|
fugue/_utils/exception.py,sha256=SFIjwjV4CIEovp3P9k7ePNOFB12A5D8hDdhtfFUeM5Y,2247
|
|
12
12
|
fugue/_utils/interfaceless.py,sha256=wI0H6L4W_1uQjh9tpjgT9HzN-fbrrtXXHC1x6Q_rrPg,2203
|
|
13
|
-
fugue/_utils/io.py,sha256=
|
|
13
|
+
fugue/_utils/io.py,sha256=5twd99LBzHtIMT67il1qwnEUa5n13WZmVKNd1shO4No,9649
|
|
14
14
|
fugue/_utils/misc.py,sha256=_huy0eylmRTEFoReGR2M4rbAI8m79hFcfY5bDceVEXU,887
|
|
15
15
|
fugue/_utils/registry.py,sha256=lrbzTdUEVnW6paBGDj-Yb-aTIbP5mjCqrXuRU9_N6os,316
|
|
16
16
|
fugue/bag/__init__.py,sha256=0Q0_rnrEThrTx2U-1xGNyAg95idp_xcnywymIcW4Xck,46
|
|
@@ -31,7 +31,7 @@ fugue/dataframe/arrow_dataframe.py,sha256=r5zcZBX_N6XO5dmixBkTCPgLcMmgDF022piZvr
|
|
|
31
31
|
fugue/dataframe/dataframe.py,sha256=xmyG85i14A6LDRkNmPt29oYq7PJsq668s1QvFHK8PV4,16964
|
|
32
32
|
fugue/dataframe/dataframe_iterable_dataframe.py,sha256=lx71KfaI4lsVKI-79buc-idaeT20JEMBOq21SQcAiY8,7259
|
|
33
33
|
fugue/dataframe/dataframes.py,sha256=tBSpHsENgbcdOJ0Jgst6PTKbjG7_uoFJch96oTlaQIs,4160
|
|
34
|
-
fugue/dataframe/function_wrapper.py,sha256=
|
|
34
|
+
fugue/dataframe/function_wrapper.py,sha256=7Sb6XrWTD_swtHJbHDWZRxHvFNWkERynnCDzLM0wSbo,18340
|
|
35
35
|
fugue/dataframe/iterable_dataframe.py,sha256=TcOoNKa4jNbHbvAZ0XAhtMmGcioygIHPxI9budDtenQ,4758
|
|
36
36
|
fugue/dataframe/pandas_dataframe.py,sha256=0L0wYCGhD2BpQbruoT07Ox9iQM5YLHLNrcgzudc-yKs,11633
|
|
37
37
|
fugue/dataframe/utils.py,sha256=bA_otOJt9oju1yq5gtn21L_GDT_pUgNc6luYuBIhbUQ,10488
|
|
@@ -61,7 +61,7 @@ fugue/extensions/processor/convert.py,sha256=zG0lMtHGwY5TsqK4eplbMdlTg7J_PD3HbI0
|
|
|
61
61
|
fugue/extensions/processor/processor.py,sha256=czhQlQgMpAXXoLVAX9Q0TFUMYEEhsgufTammxcKSmOY,1665
|
|
62
62
|
fugue/extensions/transformer/__init__.py,sha256=VD6d-8xW1Yl8fUPj43cBWNR9pCOlYD9xWyGIHAlHwvI,456
|
|
63
63
|
fugue/extensions/transformer/constants.py,sha256=76DfpoTOGQ8gp5XtCs_xznfbr_H015-prXpHWSqMNDU,59
|
|
64
|
-
fugue/extensions/transformer/convert.py,sha256=
|
|
64
|
+
fugue/extensions/transformer/convert.py,sha256=zDDIpZawMnHFarjZNZAyiw1jfyXGuPjnvgQk9jpYLak,23384
|
|
65
65
|
fugue/extensions/transformer/transformer.py,sha256=zhOUgyv5-DPxYd1CP_98WeEw-zUgwknRnPW_6di-q3g,9098
|
|
66
66
|
fugue/rpc/__init__.py,sha256=3GzUl4QZQuCChjD7eaTJW8tnTwfke6ZY9r9g5nCeBZ8,167
|
|
67
67
|
fugue/rpc/base.py,sha256=3Fq5SvwLZqw9NXru3r32WuJKBGFr9bl7nFgy6e9boGo,8470
|
|
@@ -82,6 +82,7 @@ fugue/workflow/api.py,sha256=uQoxPSCZ91-ST4vwuPWG7qioRGW4eo-Sgi3DdwtSL4k,12495
|
|
|
82
82
|
fugue/workflow/input.py,sha256=V_zLDNzndmQuYJAPXtdK4n-vOp7LrimGIf_wQtwf2mc,321
|
|
83
83
|
fugue/workflow/module.py,sha256=ajyqgMwX6hFMZY9xp4Bp1Q-Zdta0p5f_W_n_SNrc4LE,5547
|
|
84
84
|
fugue/workflow/workflow.py,sha256=-SFCXkyxgXbS6DpQGSBox4d3Ws3psIlB6PnraJLSu9Y,88219
|
|
85
|
+
fugue-0.9.2.dist-info/licenses/LICENSE,sha256=z8d0m5b2O9McPEK1xHG_dWgUBT6EfBDz6wA0F7xSPTA,11358
|
|
85
86
|
fugue_contrib/__init__.py,sha256=QJioX-r2AiU7Pvt24M-k2c4vNq29qpK-3WNUde7ucck,222
|
|
86
87
|
fugue_contrib/contrib.py,sha256=3B--6oIVBMZ-GwjIOXwZqYqkloH7Cxfq1I8vkwl2yPk,267
|
|
87
88
|
fugue_contrib/seaborn/__init__.py,sha256=NuVv8EI4Om4gHcHwYO8ddextLQqw24vDj8qJio3E1MU,1405
|
|
@@ -89,25 +90,26 @@ fugue_contrib/viz/__init__.py,sha256=osgZx63Br-yMZImyEfYf9MVzJNM2Cqqke_-WsuDmG5M
|
|
|
89
90
|
fugue_contrib/viz/_ext.py,sha256=Lu_DlS5DcmrFz27fHcKTCkhKyknVWcfS5kzZVVuO9xM,1345
|
|
90
91
|
fugue_dask/__init__.py,sha256=2CcJ0AsN-k_f7dZ-yAyYpaICfUMPfH3l0FvUJSBzTr0,161
|
|
91
92
|
fugue_dask/_constants.py,sha256=35UmTVITk21GhRyRlbJOwPPdQsytM_p_2NytOXEay18,510
|
|
93
|
+
fugue_dask/_dask_sql_wrapper.py,sha256=lj38gJIOdoMV9W44gpwzLjUEtPVsQNKjRWuEkfI7-PM,2618
|
|
92
94
|
fugue_dask/_io.py,sha256=pl4F7mbVgP7Rwh1FFG7xfOz2TBZRUj1l3lLvDY4jOf4,6020
|
|
93
|
-
fugue_dask/_utils.py,sha256=
|
|
94
|
-
fugue_dask/dataframe.py,sha256=
|
|
95
|
-
fugue_dask/execution_engine.py,sha256=
|
|
95
|
+
fugue_dask/_utils.py,sha256=0R0pCh4B47kQsAS_o0QGaosIqVcZnSakm6pfMB7fSXs,9059
|
|
96
|
+
fugue_dask/dataframe.py,sha256=4Dvckpc4mlld2WsEFTTemxoA1zYK8Cn6jMKxUxYQCEE,13491
|
|
97
|
+
fugue_dask/execution_engine.py,sha256=mFN_IurhdBEu8C5OreqpGSRdTbTBqSpzJO2dMQzEF-o,21264
|
|
96
98
|
fugue_dask/registry.py,sha256=jepWKH55VWNIWV3pOF5vpCl2OpO0rI1IULx5GM2Gk6w,2274
|
|
97
99
|
fugue_dask/tester.py,sha256=E7BZjgFpJgrHsLMKzvSO5im5OwocYcratjzulJSQZl0,718
|
|
98
100
|
fugue_duckdb/__init__.py,sha256=ZzhmAWbROR1YL9Kmlt7OlwkgPZzFhsSdwLV2pFmAqGI,268
|
|
99
101
|
fugue_duckdb/_io.py,sha256=vnd8m8C6XeMCBJBbAdA5h695NMfsduQrvONyS0HcEFA,8475
|
|
100
102
|
fugue_duckdb/_utils.py,sha256=ElKbHUyn5fWSPGXsK57iqMzcqKtCf0c8pBVBYGe5Ql4,5020
|
|
101
103
|
fugue_duckdb/dask.py,sha256=agoLzeB7Swxj2kVWfmXFbWD1NS2lbbTlnrjSkR8kKWY,5014
|
|
102
|
-
fugue_duckdb/dataframe.py,sha256=
|
|
103
|
-
fugue_duckdb/execution_engine.py,sha256=
|
|
104
|
+
fugue_duckdb/dataframe.py,sha256=LAPoPOad9hgGhjyhlMGMfrnhkyBKe06Xzn6eP1hkl-w,8504
|
|
105
|
+
fugue_duckdb/execution_engine.py,sha256=3f5hbWcX1y9mAtfFixrri-snYxVIQAf4HOgo9fHbDwQ,20385
|
|
104
106
|
fugue_duckdb/registry.py,sha256=9_41KO42kXqcjF4yParQ5JGyg5TckcbhH-Q2IlGpSho,3987
|
|
105
107
|
fugue_duckdb/tester.py,sha256=MzTkv3sdOwOjI59LRrSGGl4w59Njv3OArTU5kSRL-P0,1526
|
|
106
108
|
fugue_ibis/__init__.py,sha256=z7TkK7M2_0p9XO6jQATNDgT0aHXn5k69Ttz2ga-eQG8,190
|
|
107
109
|
fugue_ibis/_compat.py,sha256=zKdTaTfuC02eUIzZPkcd7oObnVBi_X5mQjQf7SDme3Y,246
|
|
108
110
|
fugue_ibis/_utils.py,sha256=BUL5swA5FE4eQu0t5Z17hZVu9a2MFfxlFH6Ymy9xifg,6607
|
|
109
111
|
fugue_ibis/dataframe.py,sha256=k4Q6qBLBIADF5YhbvaDplXO7OkMZSHuf_Wg5o-AusEI,7796
|
|
110
|
-
fugue_ibis/execution_engine.py,sha256=
|
|
112
|
+
fugue_ibis/execution_engine.py,sha256=jRnp1m1wuTicS29A-WA043f8QwdoK8b9rwPXvTkm8r8,18751
|
|
111
113
|
fugue_notebook/__init__.py,sha256=9r_-2uxu1lBeZ8GgpYCKom_OZy2soIOYZajg7JDO-HY,4326
|
|
112
114
|
fugue_notebook/env.py,sha256=TYiTxYPFi-BVJJY49jDsvw9mddhK8WrifeRxBke30I8,4773
|
|
113
115
|
fugue_notebook/nbextension/README.md,sha256=QLnr957YeGfwzy2r4c4qbZPaXyCbyGrKPvcqSBQYSnU,123
|
|
@@ -127,7 +129,7 @@ fugue_ray/tester.py,sha256=oTA_xOzvQhJU3ohc4hsVpZc0zv4bwJn1c8a9u8kcuIs,537
|
|
|
127
129
|
fugue_ray/_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
128
130
|
fugue_ray/_utils/cluster.py,sha256=3T3Gyra6lAHlzktta-Ro35j6YZQfH6fNrj2hC5ATF9k,621
|
|
129
131
|
fugue_ray/_utils/dataframe.py,sha256=5c4duGV--mdLkKrbJRgjDWvVcp9BegA3yX16pmYDYLE,3954
|
|
130
|
-
fugue_ray/_utils/io.py,sha256=
|
|
132
|
+
fugue_ray/_utils/io.py,sha256=y7TFtdKcqDtMw2e1u012rT8Ay0ChvAT2uJL4pCypABM,9963
|
|
131
133
|
fugue_spark/__init__.py,sha256=rvrMpFs9socMgyH_58gLbnAqmirBf5oidXoO4cekW6U,165
|
|
132
134
|
fugue_spark/_constants.py,sha256=K2uLQfjvMxXk75K-7_Wn47Alpwq5rW57BtECAUrOeqA,177
|
|
133
135
|
fugue_spark/dataframe.py,sha256=lYa8FizM3p_lsKYFR49FazkVZMJKyi2LABKTpP5YBLo,12006
|
|
@@ -135,7 +137,7 @@ fugue_spark/execution_engine.py,sha256=YBMtNxCvpy77xICFSg9PHMa6feNoYhWEZe8MmxznX
|
|
|
135
137
|
fugue_spark/registry.py,sha256=_NmiV2cOooYK0YmqATEnNkPEMT9suUMtuecw2NNbIIk,4530
|
|
136
138
|
fugue_spark/tester.py,sha256=VX003yGNlBukaZTQSN-w7XvgSk4rqxrWQIzno0dWrXg,2481
|
|
137
139
|
fugue_spark/_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
138
|
-
fugue_spark/_utils/convert.py,sha256=
|
|
140
|
+
fugue_spark/_utils/convert.py,sha256=J3HtbuzomTYTN6A11iuvsC1h2C7o3fQBW5U360xGDhE,10234
|
|
139
141
|
fugue_spark/_utils/io.py,sha256=OdUezKpB29Lx9aUS2k9x0xUAGZrmgMZyQYGPEeHk7rQ,5574
|
|
140
142
|
fugue_spark/_utils/misc.py,sha256=9LsbBp6nOEhqXFLr8oWTc3VKzKk-vuVyixlRoquGnEs,858
|
|
141
143
|
fugue_spark/_utils/partition.py,sha256=iaesyO5f4uXhj1W-p91cD5ecPiGlu0bzh8gl2ce2Uvg,3618
|
|
@@ -143,14 +145,13 @@ fugue_sql/__init__.py,sha256=Cmr7w0Efr7PzoXdQzdJfc4Dgqd69qKqcHZZodENq7EU,287
|
|
|
143
145
|
fugue_sql/exceptions.py,sha256=ltS0MC8gMnVVrJbQiOZ0kRUWvVQ2LTx33dCW3ugqtb0,260
|
|
144
146
|
fugue_test/__init__.py,sha256=xoQuVobhU64uyODRdnzf6MSWe9lw5khkhpJ2atvADoc,2315
|
|
145
147
|
fugue_test/bag_suite.py,sha256=WbDCFjuAHYoJh4GXSPiSJxOoOwE1VMtYpJ3lQrsUK-Y,2483
|
|
146
|
-
fugue_test/builtin_suite.py,sha256=
|
|
148
|
+
fugue_test/builtin_suite.py,sha256=jP3xiq2vRZNNGzoSRjcUfrUk8NVg31SU0kpJaEvP25E,79400
|
|
147
149
|
fugue_test/dataframe_suite.py,sha256=7ym4sshDUly6004cq1UlppqDVtbwxD6CKxR4Lu70i0s,18994
|
|
148
|
-
fugue_test/execution_suite.py,sha256=
|
|
150
|
+
fugue_test/execution_suite.py,sha256=wUiGdb8wLRd13JXo7Lo19vPOLh7t1C-L2NPLeU0k-uE,48736
|
|
149
151
|
fugue_test/fixtures.py,sha256=8Pev-mxRZOWwTFlsGjcSZ0iIs78zyWbp5tq4KG1wyvk,1432
|
|
150
|
-
fugue_version/__init__.py,sha256=
|
|
151
|
-
fugue-0.9.
|
|
152
|
-
fugue-0.9.
|
|
153
|
-
fugue-0.9.
|
|
154
|
-
fugue-0.9.
|
|
155
|
-
fugue-0.9.
|
|
156
|
-
fugue-0.9.1.dist-info/RECORD,,
|
|
152
|
+
fugue_version/__init__.py,sha256=gqT-BGoeEItda9fICQDvLbxEjWRIBhFJxPxxKvmHLUo,22
|
|
153
|
+
fugue-0.9.2.dist-info/METADATA,sha256=zmzlL5Fw-t0qTfcRb5jCeJb_IUJ7HVYRT0SpgcN3ncI,18558
|
|
154
|
+
fugue-0.9.2.dist-info/WHEEL,sha256=SmOxYU7pzNKBqASvQJ7DjX3XGUF92lrGhMb3R6_iiqI,91
|
|
155
|
+
fugue-0.9.2.dist-info/entry_points.txt,sha256=2Vxp1qew_tswacA8m0RzIliLlFOQMlzezvSXPugM_KA,295
|
|
156
|
+
fugue-0.9.2.dist-info/top_level.txt,sha256=y1eCfzGdQ1_RkgcShcfbvXs-bopD3DwJcIOxP9EFXno,140
|
|
157
|
+
fugue-0.9.2.dist-info/RECORD,,
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
[fugue.plugins]
|
|
2
|
+
dask = fugue_dask.registry[dask]
|
|
3
|
+
duckdb = fugue_duckdb.registry[duckdb]
|
|
4
|
+
ibis = fugue_ibis[ibis]
|
|
5
|
+
polars = fugue_polars.registry[polars]
|
|
6
|
+
ray = fugue_ray.registry[ray]
|
|
7
|
+
spark = fugue_spark.registry[spark]
|
|
8
|
+
|
|
9
|
+
[pytest11]
|
|
10
|
+
fugue_test = fugue_test
|
|
11
|
+
fugue_test_fixtures = fugue_test.fixtures
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
from typing import Any, Optional
|
|
2
|
+
|
|
3
|
+
import dask.dataframe as dd
|
|
4
|
+
|
|
5
|
+
try:
|
|
6
|
+
from dask.dataframe.dask_expr.io.parquet import ReadParquet
|
|
7
|
+
|
|
8
|
+
HAS_DASK_EXPR = True # newer dask
|
|
9
|
+
except ImportError: # pragma: no cover
|
|
10
|
+
HAS_DASK_EXPR = False # older dask
|
|
11
|
+
|
|
12
|
+
if not HAS_DASK_EXPR: # pragma: no cover
|
|
13
|
+
try:
|
|
14
|
+
from dask_sql import Context as ContextWrapper # pylint: disable-all
|
|
15
|
+
except ImportError: # pragma: no cover
|
|
16
|
+
raise ImportError(
|
|
17
|
+
"dask-sql is not installed. Please install it with `pip install dask-sql`"
|
|
18
|
+
)
|
|
19
|
+
else:
|
|
20
|
+
from triad.utils.assertion import assert_or_throw
|
|
21
|
+
|
|
22
|
+
try:
|
|
23
|
+
from dask_sql import Context
|
|
24
|
+
from dask_sql.datacontainer import Statistics
|
|
25
|
+
from dask_sql.input_utils import InputUtil
|
|
26
|
+
except ImportError: # pragma: no cover
|
|
27
|
+
raise ImportError(
|
|
28
|
+
"dask-sql is not installed. Please install it with `pip install dask-sql`"
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
class ContextWrapper(Context): # type: ignore
|
|
32
|
+
def create_table(
|
|
33
|
+
self,
|
|
34
|
+
table_name: str,
|
|
35
|
+
input_table: dd.DataFrame,
|
|
36
|
+
format: Optional[str] = None, # noqa
|
|
37
|
+
persist: bool = False,
|
|
38
|
+
schema_name: Optional[str] = None,
|
|
39
|
+
statistics: Optional[Statistics] = None,
|
|
40
|
+
gpu: bool = False,
|
|
41
|
+
**kwargs: Any,
|
|
42
|
+
) -> None: # pragma: no cover
|
|
43
|
+
assert_or_throw(
|
|
44
|
+
isinstance(input_table, dd.DataFrame),
|
|
45
|
+
lambda: ValueError(
|
|
46
|
+
f"input_table must be a dask dataframe, but got {type(input_table)}"
|
|
47
|
+
),
|
|
48
|
+
)
|
|
49
|
+
assert_or_throw(
|
|
50
|
+
dd._dask_expr_enabled(), lambda: ValueError("Dask expr must be enabled")
|
|
51
|
+
)
|
|
52
|
+
schema_name = schema_name or self.schema_name
|
|
53
|
+
|
|
54
|
+
dc = InputUtil.to_dc(
|
|
55
|
+
input_table,
|
|
56
|
+
table_name=table_name,
|
|
57
|
+
format=format,
|
|
58
|
+
persist=persist,
|
|
59
|
+
gpu=gpu,
|
|
60
|
+
**kwargs,
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
dask_filepath = None
|
|
64
|
+
operations = input_table.find_operations(ReadParquet)
|
|
65
|
+
for op in operations:
|
|
66
|
+
dask_filepath = op._args[0]
|
|
67
|
+
|
|
68
|
+
dc.filepath = dask_filepath
|
|
69
|
+
self.schema[schema_name].filepaths[table_name.lower()] = dask_filepath
|
|
70
|
+
|
|
71
|
+
if not statistics:
|
|
72
|
+
statistics = Statistics(float("nan"))
|
|
73
|
+
dc.statistics = statistics
|
|
74
|
+
|
|
75
|
+
self.schema[schema_name].tables[table_name.lower()] = dc
|
|
76
|
+
self.schema[schema_name].statistics[table_name.lower()] = statistics
|
fugue_dask/_utils.py
CHANGED
|
@@ -5,7 +5,7 @@ import dask.dataframe as dd
|
|
|
5
5
|
import numpy as np
|
|
6
6
|
import pandas as pd
|
|
7
7
|
import pyarrow as pa
|
|
8
|
-
from dask.dataframe
|
|
8
|
+
from dask.dataframe import DataFrame
|
|
9
9
|
from dask.delayed import delayed
|
|
10
10
|
from dask.distributed import Client, get_client
|
|
11
11
|
from triad.utils.pandas_like import PD_UTILS, PandasLikeUtils
|
|
@@ -149,7 +149,7 @@ def _add_hash_index(
|
|
|
149
149
|
if len(cols) == 0:
|
|
150
150
|
cols = list(df.columns)
|
|
151
151
|
|
|
152
|
-
def _add_hash(df: pd.DataFrame) -> pd.DataFrame:
|
|
152
|
+
def _add_hash(df: pd.DataFrame) -> pd.DataFrame: # pragma: no cover
|
|
153
153
|
if len(df) == 0:
|
|
154
154
|
return df.assign(**{_FUGUE_DASK_TEMP_IDX_COLUMN: pd.Series(dtype=int)})
|
|
155
155
|
return df.assign(
|
|
@@ -171,7 +171,7 @@ def _add_hash_index(
|
|
|
171
171
|
|
|
172
172
|
def _add_random_index(
|
|
173
173
|
df: dd.DataFrame, num: int, seed: Any = None
|
|
174
|
-
) -> Tuple[dd.DataFrame, int]:
|
|
174
|
+
) -> Tuple[dd.DataFrame, int]: # pragma: no cover
|
|
175
175
|
def _add_rand(df: pd.DataFrame) -> pd.DataFrame:
|
|
176
176
|
if len(df) == 0:
|
|
177
177
|
return df.assign(**{_FUGUE_DASK_TEMP_IDX_COLUMN: pd.Series(dtype=int)})
|
|
@@ -189,7 +189,9 @@ def _add_random_index(
|
|
|
189
189
|
|
|
190
190
|
|
|
191
191
|
def _add_continuous_index(df: dd.DataFrame) -> Tuple[dd.DataFrame, int]:
|
|
192
|
-
def _get_info(
|
|
192
|
+
def _get_info(
|
|
193
|
+
df: pd.DataFrame, partition_info: Any
|
|
194
|
+
) -> pd.DataFrame: # pragma: no cover
|
|
193
195
|
return pd.DataFrame(dict(no=[partition_info["number"]], ct=[len(df)]))
|
|
194
196
|
|
|
195
197
|
pinfo = (
|
|
@@ -200,7 +202,9 @@ def _add_continuous_index(df: dd.DataFrame) -> Tuple[dd.DataFrame, int]:
|
|
|
200
202
|
counts = pinfo.sort_values("no").ct.cumsum().tolist()
|
|
201
203
|
starts = [0] + counts[0:-1]
|
|
202
204
|
|
|
203
|
-
def _add_index(
|
|
205
|
+
def _add_index(
|
|
206
|
+
df: pd.DataFrame, partition_info: Any
|
|
207
|
+
) -> pd.DataFrame: # pragma: no cover
|
|
204
208
|
return df.assign(
|
|
205
209
|
**{
|
|
206
210
|
_FUGUE_DASK_TEMP_IDX_COLUMN: np.arange(len(df))
|
fugue_dask/dataframe.py
CHANGED
|
@@ -379,7 +379,7 @@ def _to_array_chunks(
|
|
|
379
379
|
assert_or_throw(columns is None or len(columns) > 0, ValueError("empty columns"))
|
|
380
380
|
_df = df if columns is None or len(columns) == 0 else df[columns]
|
|
381
381
|
|
|
382
|
-
def _to_list(pdf: pd.DataFrame) -> List[Any]:
|
|
382
|
+
def _to_list(pdf: pd.DataFrame) -> List[Any]: # pragma: no cover
|
|
383
383
|
return list(
|
|
384
384
|
PD_UTILS.as_array_iterable(
|
|
385
385
|
pdf,
|
fugue_dask/execution_engine.py
CHANGED
|
@@ -9,9 +9,10 @@ from triad.collections import Schema
|
|
|
9
9
|
from triad.collections.dict import IndexedOrderedDict, ParamDict
|
|
10
10
|
from triad.utils.assertion import assert_or_throw
|
|
11
11
|
from triad.utils.hash import to_uuid
|
|
12
|
+
from triad.utils.io import makedirs
|
|
12
13
|
from triad.utils.pandas_like import PandasUtils
|
|
13
14
|
from triad.utils.threading import RunOnce
|
|
14
|
-
|
|
15
|
+
|
|
15
16
|
from fugue import StructuredRawSQL
|
|
16
17
|
from fugue.collections.partition import (
|
|
17
18
|
PartitionCursor,
|
|
@@ -61,14 +62,9 @@ class DaskSQLEngine(SQLEngine):
|
|
|
61
62
|
return True
|
|
62
63
|
|
|
63
64
|
def select(self, dfs: DataFrames, statement: StructuredRawSQL) -> DataFrame:
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
raise ImportError(
|
|
68
|
-
"dask-sql is not installed. "
|
|
69
|
-
"Please install it with `pip install dask-sql`"
|
|
70
|
-
)
|
|
71
|
-
ctx = Context()
|
|
65
|
+
from ._dask_sql_wrapper import ContextWrapper
|
|
66
|
+
|
|
67
|
+
ctx = ContextWrapper()
|
|
72
68
|
_dfs: Dict[str, dd.DataFrame] = {k: self._to_safe_df(v) for k, v in dfs.items()}
|
|
73
69
|
sql = statement.construct(dialect=self.dialect, log=self.log)
|
|
74
70
|
res = ctx.sql(
|
|
@@ -102,7 +98,8 @@ class DaskMapEngine(MapEngine):
|
|
|
102
98
|
partition_spec: PartitionSpec,
|
|
103
99
|
on_init: Optional[Callable[[int, DataFrame], Any]] = None,
|
|
104
100
|
map_func_format_hint: Optional[str] = None,
|
|
105
|
-
) -> DataFrame:
|
|
101
|
+
) -> DataFrame: # pragma: no cover
|
|
102
|
+
# It is well tested but not captured by coverage
|
|
106
103
|
presort = partition_spec.get_sorts(
|
|
107
104
|
df.schema, with_partition_keys=partition_spec.algo == "coarse"
|
|
108
105
|
)
|
|
@@ -475,7 +472,7 @@ class DaskExecutionEngine(ExecutionEngine):
|
|
|
475
472
|
# Use presort over partition_spec.presort if possible
|
|
476
473
|
_presort: IndexedOrderedDict = presort or partition_spec.presort
|
|
477
474
|
|
|
478
|
-
def _partition_take(partition, n, presort):
|
|
475
|
+
def _partition_take(partition, n, presort): # pragma: no cover
|
|
479
476
|
assert_or_throw(
|
|
480
477
|
partition.shape[1] == len(meta),
|
|
481
478
|
FugueBug("hitting the dask bug where partition keys are lost"),
|
fugue_duckdb/dataframe.py
CHANGED
|
@@ -165,7 +165,7 @@ def _duck_as_local(df: DuckDBPyRelation) -> DuckDBPyRelation:
|
|
|
165
165
|
|
|
166
166
|
@as_arrow.candidate(lambda df: isinstance(df, DuckDBPyRelation))
|
|
167
167
|
def _duck_as_arrow(df: DuckDBPyRelation) -> pa.Table:
|
|
168
|
-
_df = df.
|
|
168
|
+
_df = df.fetch_arrow_table()
|
|
169
169
|
_df = replace_types_in_table(_df, LARGE_TYPES_REPLACEMENT, recursive=True)
|
|
170
170
|
return _df
|
|
171
171
|
|
|
@@ -216,7 +216,7 @@ def _drop_duckdb_columns(df: DuckDBPyRelation, columns: List[str]) -> DuckDBPyRe
|
|
|
216
216
|
def _duck_as_array(
|
|
217
217
|
df: DuckDBPyRelation, columns: Optional[List[str]] = None, type_safe: bool = False
|
|
218
218
|
) -> List[Any]:
|
|
219
|
-
return pa_table_as_array(df.
|
|
219
|
+
return pa_table_as_array(df.fetch_arrow_table(), columns=columns)
|
|
220
220
|
|
|
221
221
|
|
|
222
222
|
@as_array_iterable.candidate(
|
|
@@ -225,14 +225,14 @@ def _duck_as_array(
|
|
|
225
225
|
def _duck_as_array_iterable(
|
|
226
226
|
df: DuckDBPyRelation, columns: Optional[List[str]] = None, type_safe: bool = False
|
|
227
227
|
) -> Iterable[Any]:
|
|
228
|
-
yield from pa_table_as_array_iterable(df.
|
|
228
|
+
yield from pa_table_as_array_iterable(df.fetch_arrow_table(), columns=columns)
|
|
229
229
|
|
|
230
230
|
|
|
231
231
|
@as_dicts.candidate(lambda df, *args, **kwargs: isinstance(df, DuckDBPyRelation))
|
|
232
232
|
def _duck_as_dicts(
|
|
233
233
|
df: DuckDBPyRelation, columns: Optional[List[str]] = None
|
|
234
234
|
) -> List[Dict[str, Any]]:
|
|
235
|
-
return pa_table_as_dicts(df.
|
|
235
|
+
return pa_table_as_dicts(df.fetch_arrow_table(), columns=columns)
|
|
236
236
|
|
|
237
237
|
|
|
238
238
|
@as_dict_iterable.candidate(
|
|
@@ -241,7 +241,7 @@ def _duck_as_dicts(
|
|
|
241
241
|
def _duck_as_dict_iterable(
|
|
242
242
|
df: DuckDBPyRelation, columns: Optional[List[str]] = None
|
|
243
243
|
) -> Iterable[Dict[str, Any]]:
|
|
244
|
-
yield from pa_table_as_dict_iterable(df.
|
|
244
|
+
yield from pa_table_as_dict_iterable(df.fetch_arrow_table(), columns=columns)
|
|
245
245
|
|
|
246
246
|
|
|
247
247
|
def _assert_no_missing(df: DuckDBPyRelation, columns: Iterable[Any]) -> None:
|
fugue_duckdb/execution_engine.py
CHANGED
|
@@ -108,7 +108,7 @@ class DuckDBEngine(SQLEngine):
|
|
|
108
108
|
try:
|
|
109
109
|
for k, v in dfs.items():
|
|
110
110
|
duckdb.from_arrow(v.as_arrow(), connection=conn).create_view(k)
|
|
111
|
-
return ArrowDataFrame(_duck_as_arrow(conn.
|
|
111
|
+
return ArrowDataFrame(_duck_as_arrow(conn.sql(statement)))
|
|
112
112
|
finally:
|
|
113
113
|
conn.close()
|
|
114
114
|
|
fugue_ibis/execution_engine.py
CHANGED
|
@@ -92,7 +92,8 @@ class IbisSQLEngine(SQLEngine):
|
|
|
92
92
|
_df2 = self.to_df(df2)
|
|
93
93
|
key_schema, end_schema = get_join_schemas(_df1, _df2, how=how, on=on)
|
|
94
94
|
on_fields = [_df1.native[k] == _df2.native[k] for k in key_schema]
|
|
95
|
-
|
|
95
|
+
version = int(ibis.__version__.split(".")[0])
|
|
96
|
+
if version < 6: # pragma: no cover
|
|
96
97
|
suffixes: Dict[str, Any] = dict(suffixes=("", _JOIN_RIGHT_SUFFIX))
|
|
97
98
|
else:
|
|
98
99
|
# breaking change in ibis 6.0
|
|
@@ -113,7 +114,7 @@ class IbisSQLEngine(SQLEngine):
|
|
|
113
114
|
cols.append(
|
|
114
115
|
ibis.coalesce(tb[k], tb[k + _JOIN_RIGHT_SUFFIX]).name(k)
|
|
115
116
|
)
|
|
116
|
-
tb = tb
|
|
117
|
+
tb = tb.select(*cols)
|
|
117
118
|
elif how.lower() in ["semi", "left_semi"]:
|
|
118
119
|
tb = _df1.native.semi_join(_df2.native, on_fields, **suffixes)
|
|
119
120
|
elif how.lower() in ["anti", "left_anti"]:
|
|
@@ -153,7 +154,7 @@ class IbisSQLEngine(SQLEngine):
|
|
|
153
154
|
self,
|
|
154
155
|
df: DataFrame,
|
|
155
156
|
how: str = "any",
|
|
156
|
-
thresh: int = None,
|
|
157
|
+
thresh: Optional[int] = None,
|
|
157
158
|
subset: Optional[List[str]] = None,
|
|
158
159
|
) -> DataFrame:
|
|
159
160
|
schema = df.schema
|
|
@@ -161,7 +162,7 @@ class IbisSQLEngine(SQLEngine):
|
|
|
161
162
|
schema = schema.extract(subset)
|
|
162
163
|
_df = self.to_df(df)
|
|
163
164
|
if thresh is None:
|
|
164
|
-
tb = _df.native.
|
|
165
|
+
tb = _df.native.drop_null(subset, how=how)
|
|
165
166
|
return self.to_df(tb, df.schema)
|
|
166
167
|
assert_or_throw(
|
|
167
168
|
how == "any", ValueError("when thresh is set, how must be 'any'")
|
|
@@ -204,7 +205,7 @@ class IbisSQLEngine(SQLEngine):
|
|
|
204
205
|
ibis.coalesce(tb[f], ibis.literal(vd[f])).name(f) if f in names else tb[f]
|
|
205
206
|
for f in df.columns
|
|
206
207
|
]
|
|
207
|
-
return self.to_df(tb
|
|
208
|
+
return self.to_df(tb.select(cols), schema=df.schema)
|
|
208
209
|
|
|
209
210
|
def take(
|
|
210
211
|
self,
|
|
@@ -241,7 +242,7 @@ class IbisSQLEngine(SQLEngine):
|
|
|
241
242
|
f") WHERE __fugue_take_param<={n}"
|
|
242
243
|
)
|
|
243
244
|
tb = self.query_to_table(sql, {tbn: idf})
|
|
244
|
-
return self.to_df(tb
|
|
245
|
+
return self.to_df(tb.select(*df.columns), schema=df.schema)
|
|
245
246
|
|
|
246
247
|
sorts: List[str] = []
|
|
247
248
|
for k, v in _presort.items():
|
fugue_ray/_utils/io.py
CHANGED
|
@@ -7,7 +7,7 @@ import ray.data as rd
|
|
|
7
7
|
from packaging import version
|
|
8
8
|
from pyarrow import csv as pacsv
|
|
9
9
|
from pyarrow import json as pajson
|
|
10
|
-
|
|
10
|
+
|
|
11
11
|
from triad.collections import Schema
|
|
12
12
|
from triad.collections.dict import ParamDict
|
|
13
13
|
from triad.utils.assertion import assert_or_throw
|
|
@@ -21,6 +21,27 @@ from fugue_ray.dataframe import RayDataFrame
|
|
|
21
21
|
|
|
22
22
|
from .._constants import RAY_VERSION
|
|
23
23
|
|
|
24
|
+
try:
|
|
25
|
+
from ray.data.datasource import FileExtensionFilter
|
|
26
|
+
|
|
27
|
+
class _FileFiler(FileExtensionFilter): # pragma: no cover
|
|
28
|
+
def __init__(
|
|
29
|
+
self, file_extensions: Union[str, List[str]], exclude: Iterable[str]
|
|
30
|
+
):
|
|
31
|
+
super().__init__(file_extensions, allow_if_no_extension=True)
|
|
32
|
+
self._exclude = set(exclude)
|
|
33
|
+
|
|
34
|
+
def _is_valid(self, path: str) -> bool:
|
|
35
|
+
return pathlib.Path(
|
|
36
|
+
path
|
|
37
|
+
).name not in self._exclude and self._file_has_extension(path)
|
|
38
|
+
|
|
39
|
+
def __call__(self, paths: List[str]) -> List[str]:
|
|
40
|
+
return [path for path in paths if self._is_valid(path)]
|
|
41
|
+
|
|
42
|
+
except ImportError: # pragma: no cover
|
|
43
|
+
pass # ray >=2.10
|
|
44
|
+
|
|
24
45
|
|
|
25
46
|
class RayIO(object):
|
|
26
47
|
def __init__(self, engine: ExecutionEngine):
|
|
@@ -53,7 +74,7 @@ class RayIO(object):
|
|
|
53
74
|
len(fmts) == 1, NotImplementedError("can't support multiple formats")
|
|
54
75
|
)
|
|
55
76
|
fmt = fmts[0]
|
|
56
|
-
files = [f.path for f in fp]
|
|
77
|
+
files = [f.as_dir_path() if f.is_dir else f.path for f in fp]
|
|
57
78
|
return self._loads[fmt](files, columns, **kwargs)
|
|
58
79
|
|
|
59
80
|
def save_df(
|
|
@@ -248,17 +269,3 @@ class RayIO(object):
|
|
|
248
269
|
|
|
249
270
|
def _remote_args(self) -> Dict[str, Any]:
|
|
250
271
|
return {"num_cpus": 1}
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
class _FileFiler(FileExtensionFilter): # pragma: no cover
|
|
254
|
-
def __init__(self, file_extensions: Union[str, List[str]], exclude: Iterable[str]):
|
|
255
|
-
super().__init__(file_extensions, allow_if_no_extension=True)
|
|
256
|
-
self._exclude = set(exclude)
|
|
257
|
-
|
|
258
|
-
def _is_valid(self, path: str) -> bool:
|
|
259
|
-
return pathlib.Path(
|
|
260
|
-
path
|
|
261
|
-
).name not in self._exclude and self._file_has_extension(path)
|
|
262
|
-
|
|
263
|
-
def __call__(self, paths: List[str]) -> List[str]:
|
|
264
|
-
return [path for path in paths if self._is_valid(path)]
|
fugue_spark/_utils/convert.py
CHANGED
|
@@ -174,20 +174,26 @@ def pd_to_spark_df(
|
|
|
174
174
|
|
|
175
175
|
|
|
176
176
|
def to_pandas(df: ps.DataFrame) -> pd.DataFrame:
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
177
|
+
def _to_df() -> pd.DataFrame:
|
|
178
|
+
if version.parse(pd.__version__) < version.parse("2.0.0") or not any(
|
|
179
|
+
isinstance(x.dataType, (pt.TimestampType, TimestampNTZType))
|
|
180
|
+
for x in df.schema.fields
|
|
181
|
+
):
|
|
182
|
+
return df.toPandas()
|
|
183
|
+
else: # pragma: no cover
|
|
184
|
+
|
|
185
|
+
def serialize(dfs):
|
|
186
|
+
for df in dfs:
|
|
187
|
+
data = pickle.dumps(df)
|
|
188
|
+
yield pd.DataFrame([[data]], columns=["data"])
|
|
183
189
|
|
|
184
|
-
|
|
185
|
-
for
|
|
186
|
-
data = pickle.dumps(df)
|
|
187
|
-
yield pd.DataFrame([[data]], columns=["data"])
|
|
190
|
+
sdf = df.mapInPandas(serialize, schema="data binary")
|
|
191
|
+
return pd.concat(pickle.loads(x.data) for x in sdf.collect())
|
|
188
192
|
|
|
189
|
-
|
|
190
|
-
|
|
193
|
+
pdf = _to_df()
|
|
194
|
+
if hasattr(pdf, "attrs") and "metrics" in pdf.attrs: # pragma: no cover
|
|
195
|
+
del pdf.attrs["metrics"]
|
|
196
|
+
return pdf
|
|
191
197
|
|
|
192
198
|
|
|
193
199
|
def to_arrow(df: ps.DataFrame) -> pa.Table:
|
fugue_test/builtin_suite.py
CHANGED
|
@@ -486,6 +486,25 @@ class BuiltInTests(object):
|
|
|
486
486
|
dag.df([], "a:int,b:int").assert_eq(b)
|
|
487
487
|
dag.run(self.engine)
|
|
488
488
|
|
|
489
|
+
def _test_transform_row_wise(self): # pragma: no cover
|
|
490
|
+
# TODO: currently disabled because we don't support Dict[str, Any]
|
|
491
|
+
# as dataframe input
|
|
492
|
+
def t1(row: Dict[str, Any]) -> Dict[str, Any]:
|
|
493
|
+
row["b"] = 1
|
|
494
|
+
return row
|
|
495
|
+
|
|
496
|
+
def t2(rows: List[Dict[str, Any]]) -> Dict[str, Any]:
|
|
497
|
+
return rows[0]
|
|
498
|
+
|
|
499
|
+
with fa.engine_context(self.engine):
|
|
500
|
+
a = pd.DataFrame([[3, 4], [1, 2], [3, 5]], columns=["a", "b"])
|
|
501
|
+
b = fa.transform(a, t1, schema="*")
|
|
502
|
+
assert sorted(fa.as_array(b)) == [[1, 1], [3, 1], [3, 1]]
|
|
503
|
+
b = fa.transform(
|
|
504
|
+
a, t2, schema="*", partition={"by": "a", "presort": "b"}
|
|
505
|
+
)
|
|
506
|
+
assert sorted(fa.as_array(b)) == [[1, 2], [3, 4]]
|
|
507
|
+
|
|
489
508
|
def test_transform_binary(self):
|
|
490
509
|
with FugueWorkflow() as dag:
|
|
491
510
|
a = dag.df([[1, pickle.dumps([0, "a"])]], "a:int,b:bytes")
|
|
@@ -548,6 +567,8 @@ class BuiltInTests(object):
|
|
|
548
567
|
e = dag.df([[1, 2, 1, 10]], "a:int,ct1:int,ct2:int,x:int")
|
|
549
568
|
e.assert_eq(c)
|
|
550
569
|
|
|
570
|
+
a.zip(b).transform(mock_co_tf1_d, params=dict(p=10)).assert_eq(e)
|
|
571
|
+
|
|
551
572
|
# interfaceless
|
|
552
573
|
c = dag.transform(
|
|
553
574
|
a.zip(b),
|
|
@@ -676,6 +697,13 @@ class BuiltInTests(object):
|
|
|
676
697
|
incr()
|
|
677
698
|
yield pa.Table.from_pandas(df)
|
|
678
699
|
|
|
700
|
+
def t11(row: list[dict[str, Any]]) -> dict[str, Any]:
|
|
701
|
+
incr()
|
|
702
|
+
return row[0]
|
|
703
|
+
|
|
704
|
+
def t12(row: list[dict[str, Any]]) -> None:
|
|
705
|
+
incr()
|
|
706
|
+
|
|
679
707
|
with FugueWorkflow() as dag:
|
|
680
708
|
a = dag.df([[1, 2], [3, 4]], "a:double,b:int")
|
|
681
709
|
a.out_transform(t1) # +2
|
|
@@ -688,6 +716,8 @@ class BuiltInTests(object):
|
|
|
688
716
|
a.out_transform(t8, ignore_errors=[NotImplementedError]) # +1
|
|
689
717
|
a.out_transform(t9) # +1
|
|
690
718
|
a.out_transform(t10) # +1
|
|
719
|
+
a.out_transform(t11) # +2
|
|
720
|
+
a.out_transform(t12) # +2
|
|
691
721
|
raises(FugueWorkflowCompileValidationError, lambda: a.out_transform(t2))
|
|
692
722
|
raises(FugueWorkflowCompileValidationError, lambda: a.out_transform(t3))
|
|
693
723
|
raises(FugueWorkflowCompileValidationError, lambda: a.out_transform(t4))
|
|
@@ -695,7 +725,7 @@ class BuiltInTests(object):
|
|
|
695
725
|
raises(FugueWorkflowCompileValidationError, lambda: a.out_transform(T7))
|
|
696
726
|
dag.run(self.engine)
|
|
697
727
|
|
|
698
|
-
assert
|
|
728
|
+
assert 17 <= incr()
|
|
699
729
|
|
|
700
730
|
def test_out_cotransform(self): # noqa: C901
|
|
701
731
|
tmpdir = str(self.tmpdir)
|
|
@@ -2001,6 +2031,13 @@ def mock_co_tf1(
|
|
|
2001
2031
|
return [[df1[0]["a"], len(df1), len(df2), p]]
|
|
2002
2032
|
|
|
2003
2033
|
|
|
2034
|
+
@cotransformer(lambda dfs, **kwargs: "a:int,ct1:int,ct2:int,x:int")
|
|
2035
|
+
def mock_co_tf1_d(
|
|
2036
|
+
df1: List[Dict[str, Any]], df2: List[List[Any]], p=1
|
|
2037
|
+
) -> Dict[str, Any]:
|
|
2038
|
+
return dict(a=df1[0]["a"], ct1=len(df1), ct2=len(df2), x=p)
|
|
2039
|
+
|
|
2040
|
+
|
|
2004
2041
|
def mock_co_tf2(dfs: DataFrames, p=1) -> List[List[Any]]:
|
|
2005
2042
|
return [[dfs[0].peek_dict()["a"], dfs[0].count(), dfs[1].count(), p]]
|
|
2006
2043
|
|
fugue_test/execution_suite.py
CHANGED
|
@@ -9,6 +9,7 @@ except ImportError: # pragma: no cover
|
|
|
9
9
|
import copy
|
|
10
10
|
import os
|
|
11
11
|
import pickle
|
|
12
|
+
import sys
|
|
12
13
|
from datetime import datetime
|
|
13
14
|
|
|
14
15
|
import pandas as pd
|
|
@@ -1194,6 +1195,7 @@ class ExecutionEngineTests(object):
|
|
|
1194
1195
|
)
|
|
1195
1196
|
self.df_eq(c, [[1.1, 6.1], [7.1, 2.1]], "a:double,c:double", throw=True)
|
|
1196
1197
|
|
|
1198
|
+
@pytest.mark.skipif(sys.platform == "win32", reason="skip on Windows")
|
|
1197
1199
|
def test_load_csv_folder(self):
|
|
1198
1200
|
native = NativeExecutionEngine()
|
|
1199
1201
|
a = ArrayDataFrame([[6.1, 1.1]], "c:double,a:double")
|
fugue_version/__init__.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "0.9.
|
|
1
|
+
__version__ = "0.9.2"
|
|
@@ -1,12 +0,0 @@
|
|
|
1
|
-
[fugue.plugins]
|
|
2
|
-
dask = fugue_dask.registry [dask]
|
|
3
|
-
duckdb = fugue_duckdb.registry [duckdb]
|
|
4
|
-
ibis = fugue_ibis [ibis]
|
|
5
|
-
polars = fugue_polars.registry [polars]
|
|
6
|
-
ray = fugue_ray.registry [ray]
|
|
7
|
-
spark = fugue_spark.registry [spark]
|
|
8
|
-
|
|
9
|
-
[pytest11]
|
|
10
|
-
fugue_test = fugue_test
|
|
11
|
-
fugue_test_fixtures = fugue_test.fixtures
|
|
12
|
-
|
|
File without changes
|
|
File without changes
|