fugue 0.9.2.dev1__py3-none-any.whl → 0.9.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
fugue/_utils/io.py CHANGED
@@ -20,6 +20,10 @@ class FileParser(object):
20
20
  self._has_glob = "*" in path or "?" in path
21
21
  self._raw_path = path
22
22
  self._fs, self._fs_path = url_to_fs(path)
23
+ if not self._has_glob and self._fs.isdir(self._fs_path):
24
+ self._is_dir = True
25
+ else:
26
+ self._is_dir = False
23
27
  if not self.is_local:
24
28
  self._path = self._fs.unstrip_protocol(self._fs_path)
25
29
  else:
@@ -43,11 +47,15 @@ class FileParser(object):
43
47
  return self
44
48
 
45
49
  @property
46
- def has_glob(self):
50
+ def is_dir(self) -> bool:
51
+ return self._is_dir
52
+
53
+ @property
54
+ def has_glob(self) -> bool:
47
55
  return self._has_glob
48
56
 
49
57
  @property
50
- def is_local(self):
58
+ def is_local(self) -> bool:
51
59
  return isinstance(self._fs, LocalFileSystem)
52
60
 
53
61
  def join(self, path: str, format_hint: Optional[str] = None) -> "FileParser":
@@ -65,6 +73,10 @@ class FileParser(object):
65
73
  def path(self) -> str:
66
74
  return self._path
67
75
 
76
+ def as_dir_path(self) -> str:
77
+ assert_or_throw(self.is_dir, f"{self.raw_path} is not a directory")
78
+ return self.path + self._fs.sep
79
+
68
80
  @property
69
81
  def raw_path(self) -> str:
70
82
  return self._raw_path
@@ -91,7 +91,9 @@ class DataFrameFunctionWrapper(FunctionWrapper):
91
91
  isinstance(p[k], DataFrame),
92
92
  lambda: TypeError(f"{p[k]} is not a DataFrame"),
93
93
  )
94
- if v.is_per_row:
94
+ if v.is_per_row: # pragma: no cover
95
+ # TODO: this branch is used only if row annotations
96
+ # are allowed as input
95
97
  assert_or_throw(
96
98
  row_param_info is None,
97
99
  lambda: ValueError("only one row parameter is allowed"),
@@ -110,7 +112,9 @@ class DataFrameFunctionWrapper(FunctionWrapper):
110
112
  raise ValueError(f"{p} are not acceptable parameters")
111
113
  if row_param_info is None:
112
114
  return self._run_func(rargs, output, output_schema, ctx, raw=False)
113
- else: # input contains row parameter
115
+ else: # pragma: no cover
116
+ # input contains row parameter
117
+ # TODO: this branch is used only if row annotations are allowed as input
114
118
 
115
119
  def _dfs() -> Iterable[Any]:
116
120
  k, v, df = row_param_info
@@ -154,6 +158,7 @@ fugue_annotated_param = DataFrameFunctionWrapper.annotated_param
154
158
  annotation == Callable
155
159
  or annotation == callable # pylint: disable=comparison-with-callable
156
160
  or str(annotation).startswith("typing.Callable")
161
+ or str(annotation).startswith("collections.abc.Callable")
157
162
  ),
158
163
  )
159
164
  class _CallableParam(AnnotatedParam):
@@ -168,6 +173,9 @@ class _CallableParam(AnnotatedParam):
168
173
  or annotation == Optional[callable]
169
174
  or str(annotation).startswith("typing.Union[typing.Callable") # 3.8-
170
175
  or str(annotation).startswith("typing.Optional[typing.Callable") # 3.9+
176
+ or str(annotation).startswith(
177
+ "typing.Optional[collections.abc.Callable]"
178
+ ) # 3.9+
171
179
  ),
172
180
  )
173
181
  class _OptionalCallableParam(AnnotatedParam):
@@ -233,7 +241,8 @@ class DataFrameParam(_DataFrameParamBase):
233
241
 
234
242
 
235
243
  @fugue_annotated_param(DataFrame, "r", child_can_reuse_code=True)
236
- class RowParam(_DataFrameParamBase):
244
+ class RowParam(_DataFrameParamBase): # pragma: no cover
245
+ # TODO: this class is used only if row annotations are allowed as input
237
246
  @property
238
247
  def is_per_row(self) -> bool:
239
248
  return True
@@ -243,7 +252,8 @@ class RowParam(_DataFrameParamBase):
243
252
 
244
253
 
245
254
  @fugue_annotated_param(Dict[str, Any])
246
- class DictParam(RowParam):
255
+ class DictParam(RowParam): # pragma: no cover
256
+ # TODO: this class is used only if row annotations are allowed as input
247
257
  def to_input_rows(self, df: DataFrame, ctx: Any) -> Iterable[Any]:
248
258
  yield from df.as_dict_iterable()
249
259
 
@@ -375,7 +375,7 @@ class _FuncAsTransformer(Transformer):
375
375
  assert_arg_not_none(schema, "schema")
376
376
  tr = _FuncAsTransformer()
377
377
  tr._wrapper = DataFrameFunctionWrapper( # type: ignore
378
- func, "^[lspqr][fF]?x*z?$", "^[lspqr]$"
378
+ func, "^[lspq][fF]?x*z?$", "^[lspqr]$"
379
379
  )
380
380
  tr._output_schema_arg = schema # type: ignore
381
381
  tr._validation_rules = validation_rules # type: ignore
@@ -410,7 +410,7 @@ class _FuncAsOutputTransformer(_FuncAsTransformer):
410
410
  validation_rules.update(parse_validation_rules_from_comment(func))
411
411
  tr = _FuncAsOutputTransformer()
412
412
  tr._wrapper = DataFrameFunctionWrapper( # type: ignore
413
- func, "^[lspqr][fF]?x*z?$", "^[lspnqr]$"
413
+ func, "^[lspq][fF]?x*z?$", "^[lspnqr]$"
414
414
  )
415
415
  tr._output_schema_arg = None # type: ignore
416
416
  tr._validation_rules = validation_rules # type: ignore
fugue/rpc/flask.py CHANGED
@@ -1,6 +1,5 @@
1
- import base64
2
1
  import logging
3
- import cloudpickle
2
+ import json
4
3
  from threading import Thread
5
4
  from typing import Any, Optional, Tuple, Dict, List
6
5
 
@@ -60,6 +59,7 @@ class FlaskRPCServer(RPCServer):
60
59
  -1.0 if timeout is None else to_timedelta(timeout).total_seconds()
61
60
  )
62
61
  self._server: Optional[FlaskRPCServer._Thread] = None
62
+ self._log = logging.getLogger()
63
63
 
64
64
  def make_client(self, handler: Any) -> RPCClient:
65
65
  """Add ``handler`` and correspondent :class:`~.FlaskRPCClient`
@@ -77,6 +77,14 @@ class FlaskRPCServer(RPCServer):
77
77
 
78
78
  def start_server(self) -> None:
79
79
  """Start Flask RPC server"""
80
+ msg = (
81
+ "Starting RPC server on %s:%s. "
82
+ "This server has no authentication and relies on network isolation. "
83
+ "Ensure proper VPC/firewall configuration in production. "
84
+ "See https://fugue-tutorials.readthedocs.io/tutorials/resources/"
85
+ "security.html"
86
+ )
87
+ self._log.warning(msg, self._host, self._port)
80
88
  app = Flask("FlaskRPCServer")
81
89
  app.route("/invoke", methods=["POST"])(self._invoke)
82
90
  self._server = FlaskRPCServer._Thread(app, self._host, self._port)
@@ -122,10 +130,10 @@ class FlaskRPCClient(RPCClient):
122
130
 
123
131
 
124
132
  def _encode(*args: Any, **kwargs: Any) -> str:
125
- data = base64.b64encode(cloudpickle.dumps(dict(args=args, kwargs=kwargs)))
126
- return data.decode("ascii")
133
+ data = json.dumps(dict(args=args, kwargs=kwargs))
134
+ return data
127
135
 
128
136
 
129
137
  def _decode(data: str) -> Tuple[List[Any], Dict[str, Any]]:
130
- data = cloudpickle.loads(base64.b64decode(data.encode("ascii")))
138
+ data = json.loads(data)
131
139
  return data["args"], data["kwargs"] # type: ignore
@@ -1,13 +1,12 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.4
2
2
  Name: fugue
3
- Version: 0.9.2.dev1
3
+ Version: 0.9.3
4
4
  Summary: An abstraction layer for distributed computation
5
5
  Home-page: http://github.com/fugue-project/fugue
6
6
  Author: The Fugue Development Team
7
7
  Author-email: hello@fugue.ai
8
8
  License: Apache-2.0
9
9
  Keywords: distributed spark dask ray sql dsl domain specific language
10
- Platform: UNKNOWN
11
10
  Classifier: Development Status :: 5 - Production/Stable
12
11
  Classifier: Intended Audience :: Developers
13
12
  Classifier: Topic :: Software Development :: Libraries :: Python Modules
@@ -17,67 +16,81 @@ Classifier: Programming Language :: Python :: 3.8
17
16
  Classifier: Programming Language :: Python :: 3.9
18
17
  Classifier: Programming Language :: Python :: 3.10
19
18
  Classifier: Programming Language :: Python :: 3.11
19
+ Classifier: Programming Language :: Python :: 3.12
20
20
  Classifier: Programming Language :: Python :: 3 :: Only
21
21
  Requires-Python: >=3.8
22
22
  Description-Content-Type: text/markdown
23
- Requires-Dist: triad >=0.9.7
24
- Requires-Dist: adagio >=0.2.4
25
- Provides-Extra: all
26
- Requires-Dist: qpd >=0.4.4 ; extra == 'all'
27
- Requires-Dist: fugue-sql-antlr >=0.2.0 ; extra == 'all'
28
- Requires-Dist: sqlglot ; extra == 'all'
29
- Requires-Dist: jinja2 ; extra == 'all'
30
- Requires-Dist: pyspark >=3.1.1 ; extra == 'all'
31
- Requires-Dist: dask[dataframe,distributed] >=2023.5.0 ; extra == 'all'
32
- Requires-Dist: dask-sql ; extra == 'all'
33
- Requires-Dist: ray[data] >=2.5.0 ; extra == 'all'
34
- Requires-Dist: notebook ; extra == 'all'
35
- Requires-Dist: jupyterlab ; extra == 'all'
36
- Requires-Dist: ipython >=7.10.0 ; extra == 'all'
37
- Requires-Dist: duckdb >=0.5.0 ; extra == 'all'
38
- Requires-Dist: pyarrow >=6.0.1 ; extra == 'all'
39
- Requires-Dist: pandas <2.2,>=2.0.2 ; extra == 'all'
40
- Requires-Dist: ibis-framework ; extra == 'all'
41
- Requires-Dist: polars ; extra == 'all'
42
- Provides-Extra: cpp_sql_parser
43
- Requires-Dist: fugue-sql-antlr[cpp] >=0.2.0 ; extra == 'cpp_sql_parser'
23
+ License-File: LICENSE
24
+ Requires-Dist: triad>=1.0.0
25
+ Requires-Dist: adagio>=0.2.6
26
+ Provides-Extra: sql
27
+ Requires-Dist: qpd>=0.4.4; extra == "sql"
28
+ Requires-Dist: fugue-sql-antlr>=0.2.0; extra == "sql"
29
+ Requires-Dist: sqlglot<28; extra == "sql"
30
+ Requires-Dist: jinja2; extra == "sql"
31
+ Provides-Extra: cpp-sql-parser
32
+ Requires-Dist: fugue-sql-antlr[cpp]>=0.2.0; extra == "cpp-sql-parser"
33
+ Provides-Extra: spark
34
+ Requires-Dist: pyspark>=3.1.1; extra == "spark"
44
35
  Provides-Extra: dask
45
- Requires-Dist: dask[dataframe,distributed] >=2023.5.0 ; extra == 'dask'
46
- Requires-Dist: pyarrow >=7.0.0 ; extra == 'dask'
47
- Requires-Dist: pandas >=2.0.2 ; extra == 'dask'
48
- Requires-Dist: dask[dataframe,distributed] >=2024.4.0 ; (python_version >= "3.11.9") and extra == 'dask'
36
+ Requires-Dist: dask[dataframe,distributed]>=2024.4.0; extra == "dask"
37
+ Requires-Dist: pyarrow>=7.0.0; extra == "dask"
38
+ Requires-Dist: pandas>=2.0.2; extra == "dask"
39
+ Provides-Extra: ray
40
+ Requires-Dist: ray[data]>=2.30.0; extra == "ray"
41
+ Requires-Dist: duckdb>=0.5.0; extra == "ray"
42
+ Requires-Dist: pyarrow>=7.0.0; extra == "ray"
43
+ Requires-Dist: pandas<2.2; extra == "ray"
49
44
  Provides-Extra: duckdb
50
- Requires-Dist: qpd >=0.4.4 ; extra == 'duckdb'
51
- Requires-Dist: fugue-sql-antlr >=0.2.0 ; extra == 'duckdb'
52
- Requires-Dist: sqlglot ; extra == 'duckdb'
53
- Requires-Dist: jinja2 ; extra == 'duckdb'
54
- Requires-Dist: duckdb >=0.5.0 ; extra == 'duckdb'
55
- Requires-Dist: numpy ; extra == 'duckdb'
45
+ Requires-Dist: qpd>=0.4.4; extra == "duckdb"
46
+ Requires-Dist: fugue-sql-antlr>=0.2.0; extra == "duckdb"
47
+ Requires-Dist: sqlglot<28; extra == "duckdb"
48
+ Requires-Dist: jinja2; extra == "duckdb"
49
+ Requires-Dist: duckdb>=0.5.0; extra == "duckdb"
50
+ Requires-Dist: numpy; extra == "duckdb"
51
+ Provides-Extra: polars
52
+ Requires-Dist: polars; extra == "polars"
56
53
  Provides-Extra: ibis
57
- Requires-Dist: qpd >=0.4.4 ; extra == 'ibis'
58
- Requires-Dist: fugue-sql-antlr >=0.2.0 ; extra == 'ibis'
59
- Requires-Dist: sqlglot ; extra == 'ibis'
60
- Requires-Dist: jinja2 ; extra == 'ibis'
61
- Requires-Dist: ibis-framework ; extra == 'ibis'
62
- Requires-Dist: pandas <2.2 ; extra == 'ibis'
54
+ Requires-Dist: qpd>=0.4.4; extra == "ibis"
55
+ Requires-Dist: fugue-sql-antlr>=0.2.0; extra == "ibis"
56
+ Requires-Dist: sqlglot<28; extra == "ibis"
57
+ Requires-Dist: jinja2; extra == "ibis"
58
+ Requires-Dist: ibis-framework[pandas]; extra == "ibis"
59
+ Requires-Dist: pandas<2.2; extra == "ibis"
63
60
  Provides-Extra: notebook
64
- Requires-Dist: notebook ; extra == 'notebook'
65
- Requires-Dist: jupyterlab ; extra == 'notebook'
66
- Requires-Dist: ipython >=7.10.0 ; extra == 'notebook'
67
- Provides-Extra: polars
68
- Requires-Dist: polars ; extra == 'polars'
69
- Provides-Extra: ray
70
- Requires-Dist: ray[data] >=2.5.0 ; extra == 'ray'
71
- Requires-Dist: duckdb >=0.5.0 ; extra == 'ray'
72
- Requires-Dist: pyarrow >=7.0.0 ; extra == 'ray'
73
- Requires-Dist: pandas <2.2 ; extra == 'ray'
74
- Provides-Extra: spark
75
- Requires-Dist: pyspark >=3.1.1 ; extra == 'spark'
76
- Provides-Extra: sql
77
- Requires-Dist: qpd >=0.4.4 ; extra == 'sql'
78
- Requires-Dist: fugue-sql-antlr >=0.2.0 ; extra == 'sql'
79
- Requires-Dist: sqlglot ; extra == 'sql'
80
- Requires-Dist: jinja2 ; extra == 'sql'
61
+ Requires-Dist: notebook; extra == "notebook"
62
+ Requires-Dist: jupyterlab; extra == "notebook"
63
+ Requires-Dist: ipython>=7.10.0; extra == "notebook"
64
+ Provides-Extra: all
65
+ Requires-Dist: qpd>=0.4.4; extra == "all"
66
+ Requires-Dist: fugue-sql-antlr>=0.2.0; extra == "all"
67
+ Requires-Dist: sqlglot<28; extra == "all"
68
+ Requires-Dist: jinja2; extra == "all"
69
+ Requires-Dist: pyspark>=3.1.1; extra == "all"
70
+ Requires-Dist: dask[dataframe,distributed]>=2024.4.0; extra == "all"
71
+ Requires-Dist: dask-sql; extra == "all"
72
+ Requires-Dist: ray[data]>=2.30.0; extra == "all"
73
+ Requires-Dist: notebook; extra == "all"
74
+ Requires-Dist: jupyterlab; extra == "all"
75
+ Requires-Dist: ipython>=7.10.0; extra == "all"
76
+ Requires-Dist: duckdb>=0.5.0; extra == "all"
77
+ Requires-Dist: pyarrow>=6.0.1; extra == "all"
78
+ Requires-Dist: pandas<2.2,>=2.0.2; extra == "all"
79
+ Requires-Dist: ibis-framework[duckdb,pandas]; extra == "all"
80
+ Requires-Dist: polars; extra == "all"
81
+ Dynamic: author
82
+ Dynamic: author-email
83
+ Dynamic: classifier
84
+ Dynamic: description
85
+ Dynamic: description-content-type
86
+ Dynamic: home-page
87
+ Dynamic: keywords
88
+ Dynamic: license
89
+ Dynamic: license-file
90
+ Dynamic: provides-extra
91
+ Dynamic: requires-dist
92
+ Dynamic: requires-python
93
+ Dynamic: summary
81
94
 
82
95
  # Fugue
83
96
 
@@ -355,4 +368,3 @@ View some of our latest conferences presentations and content. For a more comple
355
368
  * [Large Scale Data Validation with Spark and Dask (PyCon US)](https://www.youtube.com/watch?v=2AdvBgjO_3Q)
356
369
  * [FugueSQL - The Enhanced SQL Interface for Pandas, Spark, and Dask DataFrames (PyData Global)](https://www.youtube.com/watch?v=OBpnGYjNBBI)
357
370
  * [Distributed Hybrid Parameter Tuning](https://www.youtube.com/watch?v=_GBjqskD8Qk)
358
-
@@ -10,7 +10,7 @@ fugue/_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
10
  fugue/_utils/display.py,sha256=JV8oDA7efHm1wceZulCBOY5dMvjbWHvIm6ASisKfoWY,3164
11
11
  fugue/_utils/exception.py,sha256=SFIjwjV4CIEovp3P9k7ePNOFB12A5D8hDdhtfFUeM5Y,2247
12
12
  fugue/_utils/interfaceless.py,sha256=wI0H6L4W_1uQjh9tpjgT9HzN-fbrrtXXHC1x6Q_rrPg,2203
13
- fugue/_utils/io.py,sha256=adrtj6Dq0ti426DNlkliApbTkp8b3bfBysAiE5MVQVc,9265
13
+ fugue/_utils/io.py,sha256=5twd99LBzHtIMT67il1qwnEUa5n13WZmVKNd1shO4No,9649
14
14
  fugue/_utils/misc.py,sha256=_huy0eylmRTEFoReGR2M4rbAI8m79hFcfY5bDceVEXU,887
15
15
  fugue/_utils/registry.py,sha256=lrbzTdUEVnW6paBGDj-Yb-aTIbP5mjCqrXuRU9_N6os,316
16
16
  fugue/bag/__init__.py,sha256=0Q0_rnrEThrTx2U-1xGNyAg95idp_xcnywymIcW4Xck,46
@@ -31,7 +31,7 @@ fugue/dataframe/arrow_dataframe.py,sha256=r5zcZBX_N6XO5dmixBkTCPgLcMmgDF022piZvr
31
31
  fugue/dataframe/dataframe.py,sha256=xmyG85i14A6LDRkNmPt29oYq7PJsq668s1QvFHK8PV4,16964
32
32
  fugue/dataframe/dataframe_iterable_dataframe.py,sha256=lx71KfaI4lsVKI-79buc-idaeT20JEMBOq21SQcAiY8,7259
33
33
  fugue/dataframe/dataframes.py,sha256=tBSpHsENgbcdOJ0Jgst6PTKbjG7_uoFJch96oTlaQIs,4160
34
- fugue/dataframe/function_wrapper.py,sha256=cG-0ICf-WxgxmEJIdU2jx4GzYhVfP69AyzjTjNDpXGE,17710
34
+ fugue/dataframe/function_wrapper.py,sha256=7Sb6XrWTD_swtHJbHDWZRxHvFNWkERynnCDzLM0wSbo,18340
35
35
  fugue/dataframe/iterable_dataframe.py,sha256=TcOoNKa4jNbHbvAZ0XAhtMmGcioygIHPxI9budDtenQ,4758
36
36
  fugue/dataframe/pandas_dataframe.py,sha256=0L0wYCGhD2BpQbruoT07Ox9iQM5YLHLNrcgzudc-yKs,11633
37
37
  fugue/dataframe/utils.py,sha256=bA_otOJt9oju1yq5gtn21L_GDT_pUgNc6luYuBIhbUQ,10488
@@ -61,11 +61,11 @@ fugue/extensions/processor/convert.py,sha256=zG0lMtHGwY5TsqK4eplbMdlTg7J_PD3HbI0
61
61
  fugue/extensions/processor/processor.py,sha256=czhQlQgMpAXXoLVAX9Q0TFUMYEEhsgufTammxcKSmOY,1665
62
62
  fugue/extensions/transformer/__init__.py,sha256=VD6d-8xW1Yl8fUPj43cBWNR9pCOlYD9xWyGIHAlHwvI,456
63
63
  fugue/extensions/transformer/constants.py,sha256=76DfpoTOGQ8gp5XtCs_xznfbr_H015-prXpHWSqMNDU,59
64
- fugue/extensions/transformer/convert.py,sha256=SU_KvzZp_nV8oCxZGx7qDsdCE0CJ--8UAp5m8z4d4HY,23386
64
+ fugue/extensions/transformer/convert.py,sha256=zDDIpZawMnHFarjZNZAyiw1jfyXGuPjnvgQk9jpYLak,23384
65
65
  fugue/extensions/transformer/transformer.py,sha256=zhOUgyv5-DPxYd1CP_98WeEw-zUgwknRnPW_6di-q3g,9098
66
66
  fugue/rpc/__init__.py,sha256=3GzUl4QZQuCChjD7eaTJW8tnTwfke6ZY9r9g5nCeBZ8,167
67
67
  fugue/rpc/base.py,sha256=3Fq5SvwLZqw9NXru3r32WuJKBGFr9bl7nFgy6e9boGo,8470
68
- fugue/rpc/flask.py,sha256=1oD0dE4Q-0PKeZ7RG3c0pfwyx21dUavfXWORu_gV7mg,4485
68
+ fugue/rpc/flask.py,sha256=VzJEND8Pqatf6pYYT9LDXeO1JDMmYAOY0lm8XUncKbA,4807
69
69
  fugue/sql/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
70
70
  fugue/sql/_utils.py,sha256=khpjGeFCVlaqf2JIYvS4TVTJO3fe5-8bEsvy6AIP_5Q,2083
71
71
  fugue/sql/_visitors.py,sha256=2pc0J-AHJAiIexsKgNjcgrCGOyhC3_7rzonSgtjy--k,33844
@@ -82,6 +82,7 @@ fugue/workflow/api.py,sha256=uQoxPSCZ91-ST4vwuPWG7qioRGW4eo-Sgi3DdwtSL4k,12495
82
82
  fugue/workflow/input.py,sha256=V_zLDNzndmQuYJAPXtdK4n-vOp7LrimGIf_wQtwf2mc,321
83
83
  fugue/workflow/module.py,sha256=ajyqgMwX6hFMZY9xp4Bp1Q-Zdta0p5f_W_n_SNrc4LE,5547
84
84
  fugue/workflow/workflow.py,sha256=-SFCXkyxgXbS6DpQGSBox4d3Ws3psIlB6PnraJLSu9Y,88219
85
+ fugue-0.9.3.dist-info/licenses/LICENSE,sha256=z8d0m5b2O9McPEK1xHG_dWgUBT6EfBDz6wA0F7xSPTA,11358
85
86
  fugue_contrib/__init__.py,sha256=QJioX-r2AiU7Pvt24M-k2c4vNq29qpK-3WNUde7ucck,222
86
87
  fugue_contrib/contrib.py,sha256=3B--6oIVBMZ-GwjIOXwZqYqkloH7Cxfq1I8vkwl2yPk,267
87
88
  fugue_contrib/seaborn/__init__.py,sha256=NuVv8EI4Om4gHcHwYO8ddextLQqw24vDj8qJio3E1MU,1405
@@ -89,25 +90,26 @@ fugue_contrib/viz/__init__.py,sha256=osgZx63Br-yMZImyEfYf9MVzJNM2Cqqke_-WsuDmG5M
89
90
  fugue_contrib/viz/_ext.py,sha256=Lu_DlS5DcmrFz27fHcKTCkhKyknVWcfS5kzZVVuO9xM,1345
90
91
  fugue_dask/__init__.py,sha256=2CcJ0AsN-k_f7dZ-yAyYpaICfUMPfH3l0FvUJSBzTr0,161
91
92
  fugue_dask/_constants.py,sha256=35UmTVITk21GhRyRlbJOwPPdQsytM_p_2NytOXEay18,510
93
+ fugue_dask/_dask_sql_wrapper.py,sha256=lj38gJIOdoMV9W44gpwzLjUEtPVsQNKjRWuEkfI7-PM,2618
92
94
  fugue_dask/_io.py,sha256=pl4F7mbVgP7Rwh1FFG7xfOz2TBZRUj1l3lLvDY4jOf4,6020
93
- fugue_dask/_utils.py,sha256=1uplEqvpCDZDp2YdwJxa6cuGScpgG9VvN3057J02bys,8956
94
- fugue_dask/dataframe.py,sha256=MuG9TqCND7qI66lPvxzuomfE7yA4sW7DjrvbyvE6XEU,13471
95
- fugue_dask/execution_engine.py,sha256=60IiwYRBVhN-pX3v6i9BZ8Pa4bcSh5UoklvCScM_XAM,21361
95
+ fugue_dask/_utils.py,sha256=0R0pCh4B47kQsAS_o0QGaosIqVcZnSakm6pfMB7fSXs,9059
96
+ fugue_dask/dataframe.py,sha256=4Dvckpc4mlld2WsEFTTemxoA1zYK8Cn6jMKxUxYQCEE,13491
97
+ fugue_dask/execution_engine.py,sha256=mFN_IurhdBEu8C5OreqpGSRdTbTBqSpzJO2dMQzEF-o,21264
96
98
  fugue_dask/registry.py,sha256=jepWKH55VWNIWV3pOF5vpCl2OpO0rI1IULx5GM2Gk6w,2274
97
99
  fugue_dask/tester.py,sha256=E7BZjgFpJgrHsLMKzvSO5im5OwocYcratjzulJSQZl0,718
98
100
  fugue_duckdb/__init__.py,sha256=ZzhmAWbROR1YL9Kmlt7OlwkgPZzFhsSdwLV2pFmAqGI,268
99
101
  fugue_duckdb/_io.py,sha256=vnd8m8C6XeMCBJBbAdA5h695NMfsduQrvONyS0HcEFA,8475
100
102
  fugue_duckdb/_utils.py,sha256=ElKbHUyn5fWSPGXsK57iqMzcqKtCf0c8pBVBYGe5Ql4,5020
101
103
  fugue_duckdb/dask.py,sha256=agoLzeB7Swxj2kVWfmXFbWD1NS2lbbTlnrjSkR8kKWY,5014
102
- fugue_duckdb/dataframe.py,sha256=LRfTv7Y46wMM_IDYSP1R-5OXuHuBg8GHjPGFFt8u7l0,8444
103
- fugue_duckdb/execution_engine.py,sha256=IZDmSAtOMJGvulTStxjTmsqJyI5QRNyxBgSMlFMSrBI,20389
104
+ fugue_duckdb/dataframe.py,sha256=LAPoPOad9hgGhjyhlMGMfrnhkyBKe06Xzn6eP1hkl-w,8504
105
+ fugue_duckdb/execution_engine.py,sha256=3f5hbWcX1y9mAtfFixrri-snYxVIQAf4HOgo9fHbDwQ,20385
104
106
  fugue_duckdb/registry.py,sha256=9_41KO42kXqcjF4yParQ5JGyg5TckcbhH-Q2IlGpSho,3987
105
107
  fugue_duckdb/tester.py,sha256=MzTkv3sdOwOjI59LRrSGGl4w59Njv3OArTU5kSRL-P0,1526
106
108
  fugue_ibis/__init__.py,sha256=z7TkK7M2_0p9XO6jQATNDgT0aHXn5k69Ttz2ga-eQG8,190
107
109
  fugue_ibis/_compat.py,sha256=zKdTaTfuC02eUIzZPkcd7oObnVBi_X5mQjQf7SDme3Y,246
108
110
  fugue_ibis/_utils.py,sha256=BUL5swA5FE4eQu0t5Z17hZVu9a2MFfxlFH6Ymy9xifg,6607
109
111
  fugue_ibis/dataframe.py,sha256=k4Q6qBLBIADF5YhbvaDplXO7OkMZSHuf_Wg5o-AusEI,7796
110
- fugue_ibis/execution_engine.py,sha256=5I-ou5xPdomVu-srdvidvP8f7wDYbGrCV_lGffZa_ac,18679
112
+ fugue_ibis/execution_engine.py,sha256=jRnp1m1wuTicS29A-WA043f8QwdoK8b9rwPXvTkm8r8,18751
111
113
  fugue_notebook/__init__.py,sha256=9r_-2uxu1lBeZ8GgpYCKom_OZy2soIOYZajg7JDO-HY,4326
112
114
  fugue_notebook/env.py,sha256=TYiTxYPFi-BVJJY49jDsvw9mddhK8WrifeRxBke30I8,4773
113
115
  fugue_notebook/nbextension/README.md,sha256=QLnr957YeGfwzy2r4c4qbZPaXyCbyGrKPvcqSBQYSnU,123
@@ -127,7 +129,7 @@ fugue_ray/tester.py,sha256=oTA_xOzvQhJU3ohc4hsVpZc0zv4bwJn1c8a9u8kcuIs,537
127
129
  fugue_ray/_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
128
130
  fugue_ray/_utils/cluster.py,sha256=3T3Gyra6lAHlzktta-Ro35j6YZQfH6fNrj2hC5ATF9k,621
129
131
  fugue_ray/_utils/dataframe.py,sha256=5c4duGV--mdLkKrbJRgjDWvVcp9BegA3yX16pmYDYLE,3954
130
- fugue_ray/_utils/io.py,sha256=Dz0WuQrh_8Ix7jU5viFIA6caJcfxV4ew0ruBZLQbD1s,9930
132
+ fugue_ray/_utils/io.py,sha256=JZdL7pdpk1DUIj77NJSzU_EZOW4cN7oNjwGy2w-LRTw,10142
131
133
  fugue_spark/__init__.py,sha256=rvrMpFs9socMgyH_58gLbnAqmirBf5oidXoO4cekW6U,165
132
134
  fugue_spark/_constants.py,sha256=K2uLQfjvMxXk75K-7_Wn47Alpwq5rW57BtECAUrOeqA,177
133
135
  fugue_spark/dataframe.py,sha256=lYa8FizM3p_lsKYFR49FazkVZMJKyi2LABKTpP5YBLo,12006
@@ -135,7 +137,7 @@ fugue_spark/execution_engine.py,sha256=YBMtNxCvpy77xICFSg9PHMa6feNoYhWEZe8MmxznX
135
137
  fugue_spark/registry.py,sha256=_NmiV2cOooYK0YmqATEnNkPEMT9suUMtuecw2NNbIIk,4530
136
138
  fugue_spark/tester.py,sha256=VX003yGNlBukaZTQSN-w7XvgSk4rqxrWQIzno0dWrXg,2481
137
139
  fugue_spark/_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
138
- fugue_spark/_utils/convert.py,sha256=eRWkDYA4UO-FQu-2y4O80WEdawx7X_rIrWg55AlOiRc,10007
140
+ fugue_spark/_utils/convert.py,sha256=J3HtbuzomTYTN6A11iuvsC1h2C7o3fQBW5U360xGDhE,10234
139
141
  fugue_spark/_utils/io.py,sha256=OdUezKpB29Lx9aUS2k9x0xUAGZrmgMZyQYGPEeHk7rQ,5574
140
142
  fugue_spark/_utils/misc.py,sha256=9LsbBp6nOEhqXFLr8oWTc3VKzKk-vuVyixlRoquGnEs,858
141
143
  fugue_spark/_utils/partition.py,sha256=iaesyO5f4uXhj1W-p91cD5ecPiGlu0bzh8gl2ce2Uvg,3618
@@ -143,14 +145,13 @@ fugue_sql/__init__.py,sha256=Cmr7w0Efr7PzoXdQzdJfc4Dgqd69qKqcHZZodENq7EU,287
143
145
  fugue_sql/exceptions.py,sha256=ltS0MC8gMnVVrJbQiOZ0kRUWvVQ2LTx33dCW3ugqtb0,260
144
146
  fugue_test/__init__.py,sha256=xoQuVobhU64uyODRdnzf6MSWe9lw5khkhpJ2atvADoc,2315
145
147
  fugue_test/bag_suite.py,sha256=WbDCFjuAHYoJh4GXSPiSJxOoOwE1VMtYpJ3lQrsUK-Y,2483
146
- fugue_test/builtin_suite.py,sha256=BpGwa66cAUuuc7ULOsPP3ax8IKQtNIPoSmlUFgqUKQk,79252
148
+ fugue_test/builtin_suite.py,sha256=jP3xiq2vRZNNGzoSRjcUfrUk8NVg31SU0kpJaEvP25E,79400
147
149
  fugue_test/dataframe_suite.py,sha256=7ym4sshDUly6004cq1UlppqDVtbwxD6CKxR4Lu70i0s,18994
148
- fugue_test/execution_suite.py,sha256=jcSSoKqTGbeWzTxkyYU-8i2zJAjzuXn7BqE8ul-JjIc,48646
150
+ fugue_test/execution_suite.py,sha256=wUiGdb8wLRd13JXo7Lo19vPOLh7t1C-L2NPLeU0k-uE,48736
149
151
  fugue_test/fixtures.py,sha256=8Pev-mxRZOWwTFlsGjcSZ0iIs78zyWbp5tq4KG1wyvk,1432
150
- fugue_version/__init__.py,sha256=gqT-BGoeEItda9fICQDvLbxEjWRIBhFJxPxxKvmHLUo,22
151
- fugue-0.9.2.dev1.dist-info/LICENSE,sha256=z8d0m5b2O9McPEK1xHG_dWgUBT6EfBDz6wA0F7xSPTA,11358
152
- fugue-0.9.2.dev1.dist-info/METADATA,sha256=0ML_xHPma0CaGuGtZn45doFbLu2UtzB-VlPX4koFLYg,18385
153
- fugue-0.9.2.dev1.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
154
- fugue-0.9.2.dev1.dist-info/entry_points.txt,sha256=kiRuUkKOnnHFvlWpYSfVUZiXJW3hOez6gjYoOhGht3Q,302
155
- fugue-0.9.2.dev1.dist-info/top_level.txt,sha256=y1eCfzGdQ1_RkgcShcfbvXs-bopD3DwJcIOxP9EFXno,140
156
- fugue-0.9.2.dev1.dist-info/RECORD,,
152
+ fugue_version/__init__.py,sha256=xKd3pzbczuMsdB08eLAOqZDUd_q1IRxwZ_ccAFL4c4A,22
153
+ fugue-0.9.3.dist-info/METADATA,sha256=AXA7npC7pohZQCKiAqe6M5Zoq2K--K4SisIvBi_l1Tc,18570
154
+ fugue-0.9.3.dist-info/WHEEL,sha256=SmOxYU7pzNKBqASvQJ7DjX3XGUF92lrGhMb3R6_iiqI,91
155
+ fugue-0.9.3.dist-info/entry_points.txt,sha256=2Vxp1qew_tswacA8m0RzIliLlFOQMlzezvSXPugM_KA,295
156
+ fugue-0.9.3.dist-info/top_level.txt,sha256=y1eCfzGdQ1_RkgcShcfbvXs-bopD3DwJcIOxP9EFXno,140
157
+ fugue-0.9.3.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: bdist_wheel (0.43.0)
2
+ Generator: setuptools (79.0.1)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -0,0 +1,11 @@
1
+ [fugue.plugins]
2
+ dask = fugue_dask.registry[dask]
3
+ duckdb = fugue_duckdb.registry[duckdb]
4
+ ibis = fugue_ibis[ibis]
5
+ polars = fugue_polars.registry[polars]
6
+ ray = fugue_ray.registry[ray]
7
+ spark = fugue_spark.registry[spark]
8
+
9
+ [pytest11]
10
+ fugue_test = fugue_test
11
+ fugue_test_fixtures = fugue_test.fixtures
@@ -0,0 +1,76 @@
1
+ from typing import Any, Optional
2
+
3
+ import dask.dataframe as dd
4
+
5
+ try:
6
+ from dask.dataframe.dask_expr.io.parquet import ReadParquet
7
+
8
+ HAS_DASK_EXPR = True # newer dask
9
+ except ImportError: # pragma: no cover
10
+ HAS_DASK_EXPR = False # older dask
11
+
12
+ if not HAS_DASK_EXPR: # pragma: no cover
13
+ try:
14
+ from dask_sql import Context as ContextWrapper # pylint: disable-all
15
+ except ImportError: # pragma: no cover
16
+ raise ImportError(
17
+ "dask-sql is not installed. Please install it with `pip install dask-sql`"
18
+ )
19
+ else:
20
+ from triad.utils.assertion import assert_or_throw
21
+
22
+ try:
23
+ from dask_sql import Context
24
+ from dask_sql.datacontainer import Statistics
25
+ from dask_sql.input_utils import InputUtil
26
+ except ImportError: # pragma: no cover
27
+ raise ImportError(
28
+ "dask-sql is not installed. Please install it with `pip install dask-sql`"
29
+ )
30
+
31
+ class ContextWrapper(Context): # type: ignore
32
+ def create_table(
33
+ self,
34
+ table_name: str,
35
+ input_table: dd.DataFrame,
36
+ format: Optional[str] = None, # noqa
37
+ persist: bool = False,
38
+ schema_name: Optional[str] = None,
39
+ statistics: Optional[Statistics] = None,
40
+ gpu: bool = False,
41
+ **kwargs: Any,
42
+ ) -> None: # pragma: no cover
43
+ assert_or_throw(
44
+ isinstance(input_table, dd.DataFrame),
45
+ lambda: ValueError(
46
+ f"input_table must be a dask dataframe, but got {type(input_table)}"
47
+ ),
48
+ )
49
+ assert_or_throw(
50
+ dd._dask_expr_enabled(), lambda: ValueError("Dask expr must be enabled")
51
+ )
52
+ schema_name = schema_name or self.schema_name
53
+
54
+ dc = InputUtil.to_dc(
55
+ input_table,
56
+ table_name=table_name,
57
+ format=format,
58
+ persist=persist,
59
+ gpu=gpu,
60
+ **kwargs,
61
+ )
62
+
63
+ dask_filepath = None
64
+ operations = input_table.find_operations(ReadParquet)
65
+ for op in operations:
66
+ dask_filepath = op._args[0]
67
+
68
+ dc.filepath = dask_filepath
69
+ self.schema[schema_name].filepaths[table_name.lower()] = dask_filepath
70
+
71
+ if not statistics:
72
+ statistics = Statistics(float("nan"))
73
+ dc.statistics = statistics
74
+
75
+ self.schema[schema_name].tables[table_name.lower()] = dc
76
+ self.schema[schema_name].statistics[table_name.lower()] = statistics
fugue_dask/_utils.py CHANGED
@@ -5,7 +5,7 @@ import dask.dataframe as dd
5
5
  import numpy as np
6
6
  import pandas as pd
7
7
  import pyarrow as pa
8
- from dask.dataframe.core import DataFrame
8
+ from dask.dataframe import DataFrame
9
9
  from dask.delayed import delayed
10
10
  from dask.distributed import Client, get_client
11
11
  from triad.utils.pandas_like import PD_UTILS, PandasLikeUtils
@@ -149,7 +149,7 @@ def _add_hash_index(
149
149
  if len(cols) == 0:
150
150
  cols = list(df.columns)
151
151
 
152
- def _add_hash(df: pd.DataFrame) -> pd.DataFrame:
152
+ def _add_hash(df: pd.DataFrame) -> pd.DataFrame: # pragma: no cover
153
153
  if len(df) == 0:
154
154
  return df.assign(**{_FUGUE_DASK_TEMP_IDX_COLUMN: pd.Series(dtype=int)})
155
155
  return df.assign(
@@ -171,7 +171,7 @@ def _add_hash_index(
171
171
 
172
172
  def _add_random_index(
173
173
  df: dd.DataFrame, num: int, seed: Any = None
174
- ) -> Tuple[dd.DataFrame, int]:
174
+ ) -> Tuple[dd.DataFrame, int]: # pragma: no cover
175
175
  def _add_rand(df: pd.DataFrame) -> pd.DataFrame:
176
176
  if len(df) == 0:
177
177
  return df.assign(**{_FUGUE_DASK_TEMP_IDX_COLUMN: pd.Series(dtype=int)})
@@ -189,7 +189,9 @@ def _add_random_index(
189
189
 
190
190
 
191
191
  def _add_continuous_index(df: dd.DataFrame) -> Tuple[dd.DataFrame, int]:
192
- def _get_info(df: pd.DataFrame, partition_info: Any) -> pd.DataFrame:
192
+ def _get_info(
193
+ df: pd.DataFrame, partition_info: Any
194
+ ) -> pd.DataFrame: # pragma: no cover
193
195
  return pd.DataFrame(dict(no=[partition_info["number"]], ct=[len(df)]))
194
196
 
195
197
  pinfo = (
@@ -200,7 +202,9 @@ def _add_continuous_index(df: dd.DataFrame) -> Tuple[dd.DataFrame, int]:
200
202
  counts = pinfo.sort_values("no").ct.cumsum().tolist()
201
203
  starts = [0] + counts[0:-1]
202
204
 
203
- def _add_index(df: pd.DataFrame, partition_info: Any) -> pd.DataFrame:
205
+ def _add_index(
206
+ df: pd.DataFrame, partition_info: Any
207
+ ) -> pd.DataFrame: # pragma: no cover
204
208
  return df.assign(
205
209
  **{
206
210
  _FUGUE_DASK_TEMP_IDX_COLUMN: np.arange(len(df))
fugue_dask/dataframe.py CHANGED
@@ -379,7 +379,7 @@ def _to_array_chunks(
379
379
  assert_or_throw(columns is None or len(columns) > 0, ValueError("empty columns"))
380
380
  _df = df if columns is None or len(columns) == 0 else df[columns]
381
381
 
382
- def _to_list(pdf: pd.DataFrame) -> List[Any]:
382
+ def _to_list(pdf: pd.DataFrame) -> List[Any]: # pragma: no cover
383
383
  return list(
384
384
  PD_UTILS.as_array_iterable(
385
385
  pdf,
@@ -9,9 +9,10 @@ from triad.collections import Schema
9
9
  from triad.collections.dict import IndexedOrderedDict, ParamDict
10
10
  from triad.utils.assertion import assert_or_throw
11
11
  from triad.utils.hash import to_uuid
12
+ from triad.utils.io import makedirs
12
13
  from triad.utils.pandas_like import PandasUtils
13
14
  from triad.utils.threading import RunOnce
14
- from triad.utils.io import makedirs
15
+
15
16
  from fugue import StructuredRawSQL
16
17
  from fugue.collections.partition import (
17
18
  PartitionCursor,
@@ -61,14 +62,9 @@ class DaskSQLEngine(SQLEngine):
61
62
  return True
62
63
 
63
64
  def select(self, dfs: DataFrames, statement: StructuredRawSQL) -> DataFrame:
64
- try:
65
- from dask_sql import Context
66
- except ImportError: # pragma: no cover
67
- raise ImportError(
68
- "dask-sql is not installed. "
69
- "Please install it with `pip install dask-sql`"
70
- )
71
- ctx = Context()
65
+ from ._dask_sql_wrapper import ContextWrapper
66
+
67
+ ctx = ContextWrapper()
72
68
  _dfs: Dict[str, dd.DataFrame] = {k: self._to_safe_df(v) for k, v in dfs.items()}
73
69
  sql = statement.construct(dialect=self.dialect, log=self.log)
74
70
  res = ctx.sql(
@@ -102,7 +98,8 @@ class DaskMapEngine(MapEngine):
102
98
  partition_spec: PartitionSpec,
103
99
  on_init: Optional[Callable[[int, DataFrame], Any]] = None,
104
100
  map_func_format_hint: Optional[str] = None,
105
- ) -> DataFrame:
101
+ ) -> DataFrame: # pragma: no cover
102
+ # It is well tested but not captured by coverage
106
103
  presort = partition_spec.get_sorts(
107
104
  df.schema, with_partition_keys=partition_spec.algo == "coarse"
108
105
  )
@@ -475,7 +472,7 @@ class DaskExecutionEngine(ExecutionEngine):
475
472
  # Use presort over partition_spec.presort if possible
476
473
  _presort: IndexedOrderedDict = presort or partition_spec.presort
477
474
 
478
- def _partition_take(partition, n, presort):
475
+ def _partition_take(partition, n, presort): # pragma: no cover
479
476
  assert_or_throw(
480
477
  partition.shape[1] == len(meta),
481
478
  FugueBug("hitting the dask bug where partition keys are lost"),
fugue_duckdb/dataframe.py CHANGED
@@ -165,7 +165,7 @@ def _duck_as_local(df: DuckDBPyRelation) -> DuckDBPyRelation:
165
165
 
166
166
  @as_arrow.candidate(lambda df: isinstance(df, DuckDBPyRelation))
167
167
  def _duck_as_arrow(df: DuckDBPyRelation) -> pa.Table:
168
- _df = df.arrow()
168
+ _df = df.fetch_arrow_table()
169
169
  _df = replace_types_in_table(_df, LARGE_TYPES_REPLACEMENT, recursive=True)
170
170
  return _df
171
171
 
@@ -216,7 +216,7 @@ def _drop_duckdb_columns(df: DuckDBPyRelation, columns: List[str]) -> DuckDBPyRe
216
216
  def _duck_as_array(
217
217
  df: DuckDBPyRelation, columns: Optional[List[str]] = None, type_safe: bool = False
218
218
  ) -> List[Any]:
219
- return pa_table_as_array(df.arrow(), columns=columns)
219
+ return pa_table_as_array(df.fetch_arrow_table(), columns=columns)
220
220
 
221
221
 
222
222
  @as_array_iterable.candidate(
@@ -225,14 +225,14 @@ def _duck_as_array(
225
225
  def _duck_as_array_iterable(
226
226
  df: DuckDBPyRelation, columns: Optional[List[str]] = None, type_safe: bool = False
227
227
  ) -> Iterable[Any]:
228
- yield from pa_table_as_array_iterable(df.arrow(), columns=columns)
228
+ yield from pa_table_as_array_iterable(df.fetch_arrow_table(), columns=columns)
229
229
 
230
230
 
231
231
  @as_dicts.candidate(lambda df, *args, **kwargs: isinstance(df, DuckDBPyRelation))
232
232
  def _duck_as_dicts(
233
233
  df: DuckDBPyRelation, columns: Optional[List[str]] = None
234
234
  ) -> List[Dict[str, Any]]:
235
- return pa_table_as_dicts(df.arrow(), columns=columns)
235
+ return pa_table_as_dicts(df.fetch_arrow_table(), columns=columns)
236
236
 
237
237
 
238
238
  @as_dict_iterable.candidate(
@@ -241,7 +241,7 @@ def _duck_as_dicts(
241
241
  def _duck_as_dict_iterable(
242
242
  df: DuckDBPyRelation, columns: Optional[List[str]] = None
243
243
  ) -> Iterable[Dict[str, Any]]:
244
- yield from pa_table_as_dict_iterable(df.arrow(), columns=columns)
244
+ yield from pa_table_as_dict_iterable(df.fetch_arrow_table(), columns=columns)
245
245
 
246
246
 
247
247
  def _assert_no_missing(df: DuckDBPyRelation, columns: Iterable[Any]) -> None:
@@ -108,7 +108,7 @@ class DuckDBEngine(SQLEngine):
108
108
  try:
109
109
  for k, v in dfs.items():
110
110
  duckdb.from_arrow(v.as_arrow(), connection=conn).create_view(k)
111
- return ArrowDataFrame(_duck_as_arrow(conn.execute(statement)))
111
+ return ArrowDataFrame(_duck_as_arrow(conn.sql(statement)))
112
112
  finally:
113
113
  conn.close()
114
114
 
@@ -92,7 +92,8 @@ class IbisSQLEngine(SQLEngine):
92
92
  _df2 = self.to_df(df2)
93
93
  key_schema, end_schema = get_join_schemas(_df1, _df2, how=how, on=on)
94
94
  on_fields = [_df1.native[k] == _df2.native[k] for k in key_schema]
95
- if ibis.__version__ < "6": # pragma: no cover
95
+ version = int(ibis.__version__.split(".")[0])
96
+ if version < 6: # pragma: no cover
96
97
  suffixes: Dict[str, Any] = dict(suffixes=("", _JOIN_RIGHT_SUFFIX))
97
98
  else:
98
99
  # breaking change in ibis 6.0
@@ -113,7 +114,7 @@ class IbisSQLEngine(SQLEngine):
113
114
  cols.append(
114
115
  ibis.coalesce(tb[k], tb[k + _JOIN_RIGHT_SUFFIX]).name(k)
115
116
  )
116
- tb = tb[cols]
117
+ tb = tb.select(*cols)
117
118
  elif how.lower() in ["semi", "left_semi"]:
118
119
  tb = _df1.native.semi_join(_df2.native, on_fields, **suffixes)
119
120
  elif how.lower() in ["anti", "left_anti"]:
@@ -153,7 +154,7 @@ class IbisSQLEngine(SQLEngine):
153
154
  self,
154
155
  df: DataFrame,
155
156
  how: str = "any",
156
- thresh: int = None,
157
+ thresh: Optional[int] = None,
157
158
  subset: Optional[List[str]] = None,
158
159
  ) -> DataFrame:
159
160
  schema = df.schema
@@ -161,7 +162,7 @@ class IbisSQLEngine(SQLEngine):
161
162
  schema = schema.extract(subset)
162
163
  _df = self.to_df(df)
163
164
  if thresh is None:
164
- tb = _df.native.dropna(subset=subset, how=how)
165
+ tb = _df.native.drop_null(subset, how=how)
165
166
  return self.to_df(tb, df.schema)
166
167
  assert_or_throw(
167
168
  how == "any", ValueError("when thresh is set, how must be 'any'")
@@ -204,7 +205,7 @@ class IbisSQLEngine(SQLEngine):
204
205
  ibis.coalesce(tb[f], ibis.literal(vd[f])).name(f) if f in names else tb[f]
205
206
  for f in df.columns
206
207
  ]
207
- return self.to_df(tb[cols], schema=df.schema)
208
+ return self.to_df(tb.select(cols), schema=df.schema)
208
209
 
209
210
  def take(
210
211
  self,
@@ -241,7 +242,7 @@ class IbisSQLEngine(SQLEngine):
241
242
  f") WHERE __fugue_take_param<={n}"
242
243
  )
243
244
  tb = self.query_to_table(sql, {tbn: idf})
244
- return self.to_df(tb[df.columns], schema=df.schema)
245
+ return self.to_df(tb.select(*df.columns), schema=df.schema)
245
246
 
246
247
  sorts: List[str] = []
247
248
  for k, v in _presort.items():
fugue_ray/_utils/io.py CHANGED
@@ -74,7 +74,7 @@ class RayIO(object):
74
74
  len(fmts) == 1, NotImplementedError("can't support multiple formats")
75
75
  )
76
76
  fmt = fmts[0]
77
- files = [f.path for f in fp]
77
+ files = [f.as_dir_path() if f.is_dir else f.path for f in fp]
78
78
  return self._loads[fmt](files, columns, **kwargs)
79
79
 
80
80
  def save_df(
@@ -153,6 +153,10 @@ class RayIO(object):
153
153
  def _load_parquet(
154
154
  self, p: List[str], columns: Any = None, **kwargs: Any
155
155
  ) -> DataFrame:
156
+ # in 2.52.0 the default changes to ["parquet"]
157
+ if "file_extensions" not in kwargs:
158
+ kwargs = kwargs.copy()
159
+ kwargs["file_extensions"] = None
156
160
  sdf = rd.read_parquet(p, ray_remote_args=self._remote_args(), **kwargs)
157
161
  if columns is None:
158
162
  return RayDataFrame(sdf)
@@ -174,20 +174,26 @@ def pd_to_spark_df(
174
174
 
175
175
 
176
176
  def to_pandas(df: ps.DataFrame) -> pd.DataFrame:
177
- if version.parse(pd.__version__) < version.parse("2.0.0") or not any(
178
- isinstance(x.dataType, (pt.TimestampType, TimestampNTZType))
179
- for x in df.schema.fields
180
- ):
181
- return df.toPandas()
182
- else: # pragma: no cover
177
+ def _to_df() -> pd.DataFrame:
178
+ if version.parse(pd.__version__) < version.parse("2.0.0") or not any(
179
+ isinstance(x.dataType, (pt.TimestampType, TimestampNTZType))
180
+ for x in df.schema.fields
181
+ ):
182
+ return df.toPandas()
183
+ else: # pragma: no cover
184
+
185
+ def serialize(dfs):
186
+ for df in dfs:
187
+ data = pickle.dumps(df)
188
+ yield pd.DataFrame([[data]], columns=["data"])
183
189
 
184
- def serialize(dfs):
185
- for df in dfs:
186
- data = pickle.dumps(df)
187
- yield pd.DataFrame([[data]], columns=["data"])
190
+ sdf = df.mapInPandas(serialize, schema="data binary")
191
+ return pd.concat(pickle.loads(x.data) for x in sdf.collect())
188
192
 
189
- sdf = df.mapInPandas(serialize, schema="data binary")
190
- return pd.concat(pickle.loads(x.data) for x in sdf.collect())
193
+ pdf = _to_df()
194
+ if hasattr(pdf, "attrs") and "metrics" in pdf.attrs: # pragma: no cover
195
+ del pdf.attrs["metrics"]
196
+ return pdf
191
197
 
192
198
 
193
199
  def to_arrow(df: ps.DataFrame) -> pa.Table:
@@ -486,7 +486,9 @@ class BuiltInTests(object):
486
486
  dag.df([], "a:int,b:int").assert_eq(b)
487
487
  dag.run(self.engine)
488
488
 
489
- def test_transform_row_wise(self):
489
+ def _test_transform_row_wise(self): # pragma: no cover
490
+ # TODO: currently disabled because we don't support Dict[str, Any]
491
+ # as dataframe input
490
492
  def t1(row: Dict[str, Any]) -> Dict[str, Any]:
491
493
  row["b"] = 1
492
494
  return row
@@ -695,11 +697,11 @@ class BuiltInTests(object):
695
697
  incr()
696
698
  yield pa.Table.from_pandas(df)
697
699
 
698
- def t11(row: Dict[str, Any]) -> Dict[str, Any]:
700
+ def t11(row: list[dict[str, Any]]) -> dict[str, Any]:
699
701
  incr()
700
- return row
702
+ return row[0]
701
703
 
702
- def t12(row: Dict[str, Any]) -> None:
704
+ def t12(row: list[dict[str, Any]]) -> None:
703
705
  incr()
704
706
 
705
707
  with FugueWorkflow() as dag:
@@ -9,6 +9,7 @@ except ImportError: # pragma: no cover
9
9
  import copy
10
10
  import os
11
11
  import pickle
12
+ import sys
12
13
  from datetime import datetime
13
14
 
14
15
  import pandas as pd
@@ -1194,6 +1195,7 @@ class ExecutionEngineTests(object):
1194
1195
  )
1195
1196
  self.df_eq(c, [[1.1, 6.1], [7.1, 2.1]], "a:double,c:double", throw=True)
1196
1197
 
1198
+ @pytest.mark.skipif(sys.platform == "win32", reason="skip on Windows")
1197
1199
  def test_load_csv_folder(self):
1198
1200
  native = NativeExecutionEngine()
1199
1201
  a = ArrayDataFrame([[6.1, 1.1]], "c:double,a:double")
fugue_version/__init__.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.9.2"
1
+ __version__ = "0.9.3"
@@ -1,12 +0,0 @@
1
- [fugue.plugins]
2
- dask = fugue_dask.registry [dask]
3
- duckdb = fugue_duckdb.registry [duckdb]
4
- ibis = fugue_ibis [ibis]
5
- polars = fugue_polars.registry [polars]
6
- ray = fugue_ray.registry [ray]
7
- spark = fugue_spark.registry [spark]
8
-
9
- [pytest11]
10
- fugue_test = fugue_test
11
- fugue_test_fixtures = fugue_test.fixtures
12
-