fugue 0.9.2.dev2__py3-none-any.whl → 0.9.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
fugue/_utils/io.py CHANGED
@@ -20,6 +20,10 @@ class FileParser(object):
20
20
  self._has_glob = "*" in path or "?" in path
21
21
  self._raw_path = path
22
22
  self._fs, self._fs_path = url_to_fs(path)
23
+ if not self._has_glob and self._fs.isdir(self._fs_path):
24
+ self._is_dir = True
25
+ else:
26
+ self._is_dir = False
23
27
  if not self.is_local:
24
28
  self._path = self._fs.unstrip_protocol(self._fs_path)
25
29
  else:
@@ -43,11 +47,15 @@ class FileParser(object):
43
47
  return self
44
48
 
45
49
  @property
46
- def has_glob(self):
50
+ def is_dir(self) -> bool:
51
+ return self._is_dir
52
+
53
+ @property
54
+ def has_glob(self) -> bool:
47
55
  return self._has_glob
48
56
 
49
57
  @property
50
- def is_local(self):
58
+ def is_local(self) -> bool:
51
59
  return isinstance(self._fs, LocalFileSystem)
52
60
 
53
61
  def join(self, path: str, format_hint: Optional[str] = None) -> "FileParser":
@@ -65,6 +73,10 @@ class FileParser(object):
65
73
  def path(self) -> str:
66
74
  return self._path
67
75
 
76
+ def as_dir_path(self) -> str:
77
+ assert_or_throw(self.is_dir, f"{self.raw_path} is not a directory")
78
+ return self.path + self._fs.sep
79
+
68
80
  @property
69
81
  def raw_path(self) -> str:
70
82
  return self._raw_path
@@ -91,7 +91,9 @@ class DataFrameFunctionWrapper(FunctionWrapper):
91
91
  isinstance(p[k], DataFrame),
92
92
  lambda: TypeError(f"{p[k]} is not a DataFrame"),
93
93
  )
94
- if v.is_per_row:
94
+ if v.is_per_row: # pragma: no cover
95
+ # TODO: this branch is used only if row annotations
96
+ # are allowed as input
95
97
  assert_or_throw(
96
98
  row_param_info is None,
97
99
  lambda: ValueError("only one row parameter is allowed"),
@@ -110,7 +112,9 @@ class DataFrameFunctionWrapper(FunctionWrapper):
110
112
  raise ValueError(f"{p} are not acceptable parameters")
111
113
  if row_param_info is None:
112
114
  return self._run_func(rargs, output, output_schema, ctx, raw=False)
113
- else: # input contains row parameter
115
+ else: # pragma: no cover
116
+ # input contains row parameter
117
+ # TODO: this branch is used only if row annotations are allowed as input
114
118
 
115
119
  def _dfs() -> Iterable[Any]:
116
120
  k, v, df = row_param_info
@@ -237,7 +241,8 @@ class DataFrameParam(_DataFrameParamBase):
237
241
 
238
242
 
239
243
  @fugue_annotated_param(DataFrame, "r", child_can_reuse_code=True)
240
- class RowParam(_DataFrameParamBase):
244
+ class RowParam(_DataFrameParamBase): # pragma: no cover
245
+ # TODO: this class is used only if row annotations are allowed as input
241
246
  @property
242
247
  def is_per_row(self) -> bool:
243
248
  return True
@@ -247,7 +252,8 @@ class RowParam(_DataFrameParamBase):
247
252
 
248
253
 
249
254
  @fugue_annotated_param(Dict[str, Any])
250
- class DictParam(RowParam):
255
+ class DictParam(RowParam): # pragma: no cover
256
+ # TODO: this class is used only if row annotations are allowed as input
251
257
  def to_input_rows(self, df: DataFrame, ctx: Any) -> Iterable[Any]:
252
258
  yield from df.as_dict_iterable()
253
259
 
@@ -375,7 +375,7 @@ class _FuncAsTransformer(Transformer):
375
375
  assert_arg_not_none(schema, "schema")
376
376
  tr = _FuncAsTransformer()
377
377
  tr._wrapper = DataFrameFunctionWrapper( # type: ignore
378
- func, "^[lspqr][fF]?x*z?$", "^[lspqr]$"
378
+ func, "^[lspq][fF]?x*z?$", "^[lspqr]$"
379
379
  )
380
380
  tr._output_schema_arg = schema # type: ignore
381
381
  tr._validation_rules = validation_rules # type: ignore
@@ -410,7 +410,7 @@ class _FuncAsOutputTransformer(_FuncAsTransformer):
410
410
  validation_rules.update(parse_validation_rules_from_comment(func))
411
411
  tr = _FuncAsOutputTransformer()
412
412
  tr._wrapper = DataFrameFunctionWrapper( # type: ignore
413
- func, "^[lspqr][fF]?x*z?$", "^[lspnqr]$"
413
+ func, "^[lspq][fF]?x*z?$", "^[lspnqr]$"
414
414
  )
415
415
  tr._output_schema_arg = None # type: ignore
416
416
  tr._validation_rules = validation_rules # type: ignore
fugue/rpc/flask.py CHANGED
@@ -1,6 +1,5 @@
1
- import base64
2
1
  import logging
3
- import cloudpickle
2
+ import json
4
3
  from threading import Thread
5
4
  from typing import Any, Optional, Tuple, Dict, List
6
5
 
@@ -60,6 +59,7 @@ class FlaskRPCServer(RPCServer):
60
59
  -1.0 if timeout is None else to_timedelta(timeout).total_seconds()
61
60
  )
62
61
  self._server: Optional[FlaskRPCServer._Thread] = None
62
+ self._log = logging.getLogger()
63
63
 
64
64
  def make_client(self, handler: Any) -> RPCClient:
65
65
  """Add ``handler`` and correspondent :class:`~.FlaskRPCClient`
@@ -77,6 +77,14 @@ class FlaskRPCServer(RPCServer):
77
77
 
78
78
  def start_server(self) -> None:
79
79
  """Start Flask RPC server"""
80
+ msg = (
81
+ "Starting RPC server on %s:%s. "
82
+ "This server has no authentication and relies on network isolation. "
83
+ "Ensure proper VPC/firewall configuration in production. "
84
+ "See https://fugue-tutorials.readthedocs.io/tutorials/resources/"
85
+ "security.html"
86
+ )
87
+ self._log.warning(msg, self._host, self._port)
80
88
  app = Flask("FlaskRPCServer")
81
89
  app.route("/invoke", methods=["POST"])(self._invoke)
82
90
  self._server = FlaskRPCServer._Thread(app, self._host, self._port)
@@ -122,10 +130,10 @@ class FlaskRPCClient(RPCClient):
122
130
 
123
131
 
124
132
  def _encode(*args: Any, **kwargs: Any) -> str:
125
- data = base64.b64encode(cloudpickle.dumps(dict(args=args, kwargs=kwargs)))
126
- return data.decode("ascii")
133
+ data = json.dumps(dict(args=args, kwargs=kwargs))
134
+ return data
127
135
 
128
136
 
129
137
  def _decode(data: str) -> Tuple[List[Any], Dict[str, Any]]:
130
- data = cloudpickle.loads(base64.b64decode(data.encode("ascii")))
138
+ data = json.loads(data)
131
139
  return data["args"], data["kwargs"] # type: ignore
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.4
2
2
  Name: fugue
3
- Version: 0.9.2.dev2
3
+ Version: 0.9.4
4
4
  Summary: An abstraction layer for distributed computation
5
5
  Home-page: http://github.com/fugue-project/fugue
6
6
  Author: The Fugue Development Team
@@ -21,63 +21,75 @@ Classifier: Programming Language :: Python :: 3 :: Only
21
21
  Requires-Python: >=3.8
22
22
  Description-Content-Type: text/markdown
23
23
  License-File: LICENSE
24
- Requires-Dist: triad>=0.9.7
24
+ Requires-Dist: triad>=1.0.0
25
25
  Requires-Dist: adagio>=0.2.6
26
- Provides-Extra: all
27
- Requires-Dist: qpd>=0.4.4; extra == "all"
28
- Requires-Dist: fugue-sql-antlr>=0.2.0; extra == "all"
29
- Requires-Dist: sqlglot; extra == "all"
30
- Requires-Dist: jinja2; extra == "all"
31
- Requires-Dist: pyspark>=3.1.1; extra == "all"
32
- Requires-Dist: dask[dataframe,distributed]>=2023.5.0; extra == "all"
33
- Requires-Dist: dask-sql; extra == "all"
34
- Requires-Dist: ray[data]>=2.5.0; extra == "all"
35
- Requires-Dist: notebook; extra == "all"
36
- Requires-Dist: jupyterlab; extra == "all"
37
- Requires-Dist: ipython>=7.10.0; extra == "all"
38
- Requires-Dist: duckdb>=0.5.0; extra == "all"
39
- Requires-Dist: pyarrow>=6.0.1; extra == "all"
40
- Requires-Dist: pandas<2.2,>=2.0.2; extra == "all"
41
- Requires-Dist: ibis-framework[duckdb,pandas]; extra == "all"
42
- Requires-Dist: polars; extra == "all"
43
- Provides-Extra: cpp_sql_parser
26
+ Provides-Extra: sql
27
+ Requires-Dist: qpd>=0.4.4; extra == "sql"
28
+ Requires-Dist: fugue-sql-antlr>=0.2.0; extra == "sql"
29
+ Requires-Dist: sqlglot<28; extra == "sql"
30
+ Requires-Dist: jinja2; extra == "sql"
31
+ Provides-Extra: cpp-sql-parser
44
32
  Requires-Dist: fugue-sql-antlr[cpp]>=0.2.0; extra == "cpp-sql-parser"
33
+ Provides-Extra: spark
34
+ Requires-Dist: pyspark>=3.1.1; extra == "spark"
45
35
  Provides-Extra: dask
46
36
  Requires-Dist: dask[dataframe,distributed]>=2024.4.0; extra == "dask"
47
37
  Requires-Dist: pyarrow>=7.0.0; extra == "dask"
48
38
  Requires-Dist: pandas>=2.0.2; extra == "dask"
39
+ Provides-Extra: ray
40
+ Requires-Dist: ray[data]>=2.30.0; extra == "ray"
41
+ Requires-Dist: duckdb>=0.5.0; extra == "ray"
42
+ Requires-Dist: pyarrow>=7.0.0; extra == "ray"
43
+ Requires-Dist: pandas; extra == "ray"
49
44
  Provides-Extra: duckdb
50
45
  Requires-Dist: qpd>=0.4.4; extra == "duckdb"
51
46
  Requires-Dist: fugue-sql-antlr>=0.2.0; extra == "duckdb"
52
- Requires-Dist: sqlglot; extra == "duckdb"
47
+ Requires-Dist: sqlglot<28; extra == "duckdb"
53
48
  Requires-Dist: jinja2; extra == "duckdb"
54
49
  Requires-Dist: duckdb>=0.5.0; extra == "duckdb"
55
50
  Requires-Dist: numpy; extra == "duckdb"
51
+ Provides-Extra: polars
52
+ Requires-Dist: polars; extra == "polars"
56
53
  Provides-Extra: ibis
57
54
  Requires-Dist: qpd>=0.4.4; extra == "ibis"
58
55
  Requires-Dist: fugue-sql-antlr>=0.2.0; extra == "ibis"
59
- Requires-Dist: sqlglot; extra == "ibis"
56
+ Requires-Dist: sqlglot<28; extra == "ibis"
60
57
  Requires-Dist: jinja2; extra == "ibis"
61
58
  Requires-Dist: ibis-framework[pandas]; extra == "ibis"
62
- Requires-Dist: pandas<2.2; extra == "ibis"
63
59
  Provides-Extra: notebook
64
60
  Requires-Dist: notebook; extra == "notebook"
65
61
  Requires-Dist: jupyterlab; extra == "notebook"
66
62
  Requires-Dist: ipython>=7.10.0; extra == "notebook"
67
- Provides-Extra: polars
68
- Requires-Dist: polars; extra == "polars"
69
- Provides-Extra: ray
70
- Requires-Dist: ray[data]>=2.5.0; extra == "ray"
71
- Requires-Dist: duckdb>=0.5.0; extra == "ray"
72
- Requires-Dist: pyarrow>=7.0.0; extra == "ray"
73
- Requires-Dist: pandas<2.2; extra == "ray"
74
- Provides-Extra: spark
75
- Requires-Dist: pyspark>=3.1.1; extra == "spark"
76
- Provides-Extra: sql
77
- Requires-Dist: qpd>=0.4.4; extra == "sql"
78
- Requires-Dist: fugue-sql-antlr>=0.2.0; extra == "sql"
79
- Requires-Dist: sqlglot; extra == "sql"
80
- Requires-Dist: jinja2; extra == "sql"
63
+ Provides-Extra: all
64
+ Requires-Dist: qpd>=0.4.4; extra == "all"
65
+ Requires-Dist: fugue-sql-antlr>=0.2.0; extra == "all"
66
+ Requires-Dist: sqlglot<28; extra == "all"
67
+ Requires-Dist: jinja2; extra == "all"
68
+ Requires-Dist: pyspark>=3.1.1; extra == "all"
69
+ Requires-Dist: dask[dataframe,distributed]>=2024.4.0; extra == "all"
70
+ Requires-Dist: dask-sql; extra == "all"
71
+ Requires-Dist: ray[data]>=2.30.0; extra == "all"
72
+ Requires-Dist: notebook; extra == "all"
73
+ Requires-Dist: jupyterlab; extra == "all"
74
+ Requires-Dist: ipython>=7.10.0; extra == "all"
75
+ Requires-Dist: duckdb>=0.5.0; extra == "all"
76
+ Requires-Dist: pyarrow>=6.0.1; extra == "all"
77
+ Requires-Dist: pandas>=2.0.2; extra == "all"
78
+ Requires-Dist: ibis-framework[duckdb,pandas]; extra == "all"
79
+ Requires-Dist: polars; extra == "all"
80
+ Dynamic: author
81
+ Dynamic: author-email
82
+ Dynamic: classifier
83
+ Dynamic: description
84
+ Dynamic: description-content-type
85
+ Dynamic: home-page
86
+ Dynamic: keywords
87
+ Dynamic: license
88
+ Dynamic: license-file
89
+ Dynamic: provides-extra
90
+ Dynamic: requires-dist
91
+ Dynamic: requires-python
92
+ Dynamic: summary
81
93
 
82
94
  # Fugue
83
95
 
@@ -10,7 +10,7 @@ fugue/_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
10
  fugue/_utils/display.py,sha256=JV8oDA7efHm1wceZulCBOY5dMvjbWHvIm6ASisKfoWY,3164
11
11
  fugue/_utils/exception.py,sha256=SFIjwjV4CIEovp3P9k7ePNOFB12A5D8hDdhtfFUeM5Y,2247
12
12
  fugue/_utils/interfaceless.py,sha256=wI0H6L4W_1uQjh9tpjgT9HzN-fbrrtXXHC1x6Q_rrPg,2203
13
- fugue/_utils/io.py,sha256=adrtj6Dq0ti426DNlkliApbTkp8b3bfBysAiE5MVQVc,9265
13
+ fugue/_utils/io.py,sha256=5twd99LBzHtIMT67il1qwnEUa5n13WZmVKNd1shO4No,9649
14
14
  fugue/_utils/misc.py,sha256=_huy0eylmRTEFoReGR2M4rbAI8m79hFcfY5bDceVEXU,887
15
15
  fugue/_utils/registry.py,sha256=lrbzTdUEVnW6paBGDj-Yb-aTIbP5mjCqrXuRU9_N6os,316
16
16
  fugue/bag/__init__.py,sha256=0Q0_rnrEThrTx2U-1xGNyAg95idp_xcnywymIcW4Xck,46
@@ -31,7 +31,7 @@ fugue/dataframe/arrow_dataframe.py,sha256=r5zcZBX_N6XO5dmixBkTCPgLcMmgDF022piZvr
31
31
  fugue/dataframe/dataframe.py,sha256=xmyG85i14A6LDRkNmPt29oYq7PJsq668s1QvFHK8PV4,16964
32
32
  fugue/dataframe/dataframe_iterable_dataframe.py,sha256=lx71KfaI4lsVKI-79buc-idaeT20JEMBOq21SQcAiY8,7259
33
33
  fugue/dataframe/dataframes.py,sha256=tBSpHsENgbcdOJ0Jgst6PTKbjG7_uoFJch96oTlaQIs,4160
34
- fugue/dataframe/function_wrapper.py,sha256=1CjI4UXHffomylK0_u0CGL1dPv_sSXTN22S5grD10_w,17889
34
+ fugue/dataframe/function_wrapper.py,sha256=7Sb6XrWTD_swtHJbHDWZRxHvFNWkERynnCDzLM0wSbo,18340
35
35
  fugue/dataframe/iterable_dataframe.py,sha256=TcOoNKa4jNbHbvAZ0XAhtMmGcioygIHPxI9budDtenQ,4758
36
36
  fugue/dataframe/pandas_dataframe.py,sha256=0L0wYCGhD2BpQbruoT07Ox9iQM5YLHLNrcgzudc-yKs,11633
37
37
  fugue/dataframe/utils.py,sha256=bA_otOJt9oju1yq5gtn21L_GDT_pUgNc6luYuBIhbUQ,10488
@@ -61,11 +61,11 @@ fugue/extensions/processor/convert.py,sha256=zG0lMtHGwY5TsqK4eplbMdlTg7J_PD3HbI0
61
61
  fugue/extensions/processor/processor.py,sha256=czhQlQgMpAXXoLVAX9Q0TFUMYEEhsgufTammxcKSmOY,1665
62
62
  fugue/extensions/transformer/__init__.py,sha256=VD6d-8xW1Yl8fUPj43cBWNR9pCOlYD9xWyGIHAlHwvI,456
63
63
  fugue/extensions/transformer/constants.py,sha256=76DfpoTOGQ8gp5XtCs_xznfbr_H015-prXpHWSqMNDU,59
64
- fugue/extensions/transformer/convert.py,sha256=SU_KvzZp_nV8oCxZGx7qDsdCE0CJ--8UAp5m8z4d4HY,23386
64
+ fugue/extensions/transformer/convert.py,sha256=zDDIpZawMnHFarjZNZAyiw1jfyXGuPjnvgQk9jpYLak,23384
65
65
  fugue/extensions/transformer/transformer.py,sha256=zhOUgyv5-DPxYd1CP_98WeEw-zUgwknRnPW_6di-q3g,9098
66
66
  fugue/rpc/__init__.py,sha256=3GzUl4QZQuCChjD7eaTJW8tnTwfke6ZY9r9g5nCeBZ8,167
67
67
  fugue/rpc/base.py,sha256=3Fq5SvwLZqw9NXru3r32WuJKBGFr9bl7nFgy6e9boGo,8470
68
- fugue/rpc/flask.py,sha256=1oD0dE4Q-0PKeZ7RG3c0pfwyx21dUavfXWORu_gV7mg,4485
68
+ fugue/rpc/flask.py,sha256=VzJEND8Pqatf6pYYT9LDXeO1JDMmYAOY0lm8XUncKbA,4807
69
69
  fugue/sql/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
70
70
  fugue/sql/_utils.py,sha256=khpjGeFCVlaqf2JIYvS4TVTJO3fe5-8bEsvy6AIP_5Q,2083
71
71
  fugue/sql/_visitors.py,sha256=2pc0J-AHJAiIexsKgNjcgrCGOyhC3_7rzonSgtjy--k,33844
@@ -82,6 +82,7 @@ fugue/workflow/api.py,sha256=uQoxPSCZ91-ST4vwuPWG7qioRGW4eo-Sgi3DdwtSL4k,12495
82
82
  fugue/workflow/input.py,sha256=V_zLDNzndmQuYJAPXtdK4n-vOp7LrimGIf_wQtwf2mc,321
83
83
  fugue/workflow/module.py,sha256=ajyqgMwX6hFMZY9xp4Bp1Q-Zdta0p5f_W_n_SNrc4LE,5547
84
84
  fugue/workflow/workflow.py,sha256=-SFCXkyxgXbS6DpQGSBox4d3Ws3psIlB6PnraJLSu9Y,88219
85
+ fugue-0.9.4.dist-info/licenses/LICENSE,sha256=z8d0m5b2O9McPEK1xHG_dWgUBT6EfBDz6wA0F7xSPTA,11358
85
86
  fugue_contrib/__init__.py,sha256=QJioX-r2AiU7Pvt24M-k2c4vNq29qpK-3WNUde7ucck,222
86
87
  fugue_contrib/contrib.py,sha256=3B--6oIVBMZ-GwjIOXwZqYqkloH7Cxfq1I8vkwl2yPk,267
87
88
  fugue_contrib/seaborn/__init__.py,sha256=NuVv8EI4Om4gHcHwYO8ddextLQqw24vDj8qJio3E1MU,1405
@@ -91,17 +92,17 @@ fugue_dask/__init__.py,sha256=2CcJ0AsN-k_f7dZ-yAyYpaICfUMPfH3l0FvUJSBzTr0,161
91
92
  fugue_dask/_constants.py,sha256=35UmTVITk21GhRyRlbJOwPPdQsytM_p_2NytOXEay18,510
92
93
  fugue_dask/_dask_sql_wrapper.py,sha256=lj38gJIOdoMV9W44gpwzLjUEtPVsQNKjRWuEkfI7-PM,2618
93
94
  fugue_dask/_io.py,sha256=pl4F7mbVgP7Rwh1FFG7xfOz2TBZRUj1l3lLvDY4jOf4,6020
94
- fugue_dask/_utils.py,sha256=dGUkhOoXQqgGQH_BY6aeYFo9UIWUAyo8YjwtdB7QD4s,8951
95
- fugue_dask/dataframe.py,sha256=MuG9TqCND7qI66lPvxzuomfE7yA4sW7DjrvbyvE6XEU,13471
96
- fugue_dask/execution_engine.py,sha256=Em9pN6cw5w5DGLcjV6oKQKQeLLblc9DZ0DkvxKVFxQQ,21167
95
+ fugue_dask/_utils.py,sha256=0R0pCh4B47kQsAS_o0QGaosIqVcZnSakm6pfMB7fSXs,9059
96
+ fugue_dask/dataframe.py,sha256=4Dvckpc4mlld2WsEFTTemxoA1zYK8Cn6jMKxUxYQCEE,13491
97
+ fugue_dask/execution_engine.py,sha256=mFN_IurhdBEu8C5OreqpGSRdTbTBqSpzJO2dMQzEF-o,21264
97
98
  fugue_dask/registry.py,sha256=jepWKH55VWNIWV3pOF5vpCl2OpO0rI1IULx5GM2Gk6w,2274
98
99
  fugue_dask/tester.py,sha256=E7BZjgFpJgrHsLMKzvSO5im5OwocYcratjzulJSQZl0,718
99
100
  fugue_duckdb/__init__.py,sha256=ZzhmAWbROR1YL9Kmlt7OlwkgPZzFhsSdwLV2pFmAqGI,268
100
101
  fugue_duckdb/_io.py,sha256=vnd8m8C6XeMCBJBbAdA5h695NMfsduQrvONyS0HcEFA,8475
101
102
  fugue_duckdb/_utils.py,sha256=ElKbHUyn5fWSPGXsK57iqMzcqKtCf0c8pBVBYGe5Ql4,5020
102
103
  fugue_duckdb/dask.py,sha256=agoLzeB7Swxj2kVWfmXFbWD1NS2lbbTlnrjSkR8kKWY,5014
103
- fugue_duckdb/dataframe.py,sha256=LRfTv7Y46wMM_IDYSP1R-5OXuHuBg8GHjPGFFt8u7l0,8444
104
- fugue_duckdb/execution_engine.py,sha256=IZDmSAtOMJGvulTStxjTmsqJyI5QRNyxBgSMlFMSrBI,20389
104
+ fugue_duckdb/dataframe.py,sha256=LAPoPOad9hgGhjyhlMGMfrnhkyBKe06Xzn6eP1hkl-w,8504
105
+ fugue_duckdb/execution_engine.py,sha256=3f5hbWcX1y9mAtfFixrri-snYxVIQAf4HOgo9fHbDwQ,20385
105
106
  fugue_duckdb/registry.py,sha256=9_41KO42kXqcjF4yParQ5JGyg5TckcbhH-Q2IlGpSho,3987
106
107
  fugue_duckdb/tester.py,sha256=MzTkv3sdOwOjI59LRrSGGl4w59Njv3OArTU5kSRL-P0,1526
107
108
  fugue_ibis/__init__.py,sha256=z7TkK7M2_0p9XO6jQATNDgT0aHXn5k69Ttz2ga-eQG8,190
@@ -128,7 +129,7 @@ fugue_ray/tester.py,sha256=oTA_xOzvQhJU3ohc4hsVpZc0zv4bwJn1c8a9u8kcuIs,537
128
129
  fugue_ray/_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
129
130
  fugue_ray/_utils/cluster.py,sha256=3T3Gyra6lAHlzktta-Ro35j6YZQfH6fNrj2hC5ATF9k,621
130
131
  fugue_ray/_utils/dataframe.py,sha256=5c4duGV--mdLkKrbJRgjDWvVcp9BegA3yX16pmYDYLE,3954
131
- fugue_ray/_utils/io.py,sha256=Dz0WuQrh_8Ix7jU5viFIA6caJcfxV4ew0ruBZLQbD1s,9930
132
+ fugue_ray/_utils/io.py,sha256=JZdL7pdpk1DUIj77NJSzU_EZOW4cN7oNjwGy2w-LRTw,10142
132
133
  fugue_spark/__init__.py,sha256=rvrMpFs9socMgyH_58gLbnAqmirBf5oidXoO4cekW6U,165
133
134
  fugue_spark/_constants.py,sha256=K2uLQfjvMxXk75K-7_Wn47Alpwq5rW57BtECAUrOeqA,177
134
135
  fugue_spark/dataframe.py,sha256=lYa8FizM3p_lsKYFR49FazkVZMJKyi2LABKTpP5YBLo,12006
@@ -136,7 +137,7 @@ fugue_spark/execution_engine.py,sha256=YBMtNxCvpy77xICFSg9PHMa6feNoYhWEZe8MmxznX
136
137
  fugue_spark/registry.py,sha256=_NmiV2cOooYK0YmqATEnNkPEMT9suUMtuecw2NNbIIk,4530
137
138
  fugue_spark/tester.py,sha256=VX003yGNlBukaZTQSN-w7XvgSk4rqxrWQIzno0dWrXg,2481
138
139
  fugue_spark/_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
139
- fugue_spark/_utils/convert.py,sha256=eRWkDYA4UO-FQu-2y4O80WEdawx7X_rIrWg55AlOiRc,10007
140
+ fugue_spark/_utils/convert.py,sha256=J3HtbuzomTYTN6A11iuvsC1h2C7o3fQBW5U360xGDhE,10234
140
141
  fugue_spark/_utils/io.py,sha256=OdUezKpB29Lx9aUS2k9x0xUAGZrmgMZyQYGPEeHk7rQ,5574
141
142
  fugue_spark/_utils/misc.py,sha256=9LsbBp6nOEhqXFLr8oWTc3VKzKk-vuVyixlRoquGnEs,858
142
143
  fugue_spark/_utils/partition.py,sha256=iaesyO5f4uXhj1W-p91cD5ecPiGlu0bzh8gl2ce2Uvg,3618
@@ -144,14 +145,13 @@ fugue_sql/__init__.py,sha256=Cmr7w0Efr7PzoXdQzdJfc4Dgqd69qKqcHZZodENq7EU,287
144
145
  fugue_sql/exceptions.py,sha256=ltS0MC8gMnVVrJbQiOZ0kRUWvVQ2LTx33dCW3ugqtb0,260
145
146
  fugue_test/__init__.py,sha256=xoQuVobhU64uyODRdnzf6MSWe9lw5khkhpJ2atvADoc,2315
146
147
  fugue_test/bag_suite.py,sha256=WbDCFjuAHYoJh4GXSPiSJxOoOwE1VMtYpJ3lQrsUK-Y,2483
147
- fugue_test/builtin_suite.py,sha256=BpGwa66cAUuuc7ULOsPP3ax8IKQtNIPoSmlUFgqUKQk,79252
148
+ fugue_test/builtin_suite.py,sha256=jP3xiq2vRZNNGzoSRjcUfrUk8NVg31SU0kpJaEvP25E,79400
148
149
  fugue_test/dataframe_suite.py,sha256=7ym4sshDUly6004cq1UlppqDVtbwxD6CKxR4Lu70i0s,18994
149
- fugue_test/execution_suite.py,sha256=jcSSoKqTGbeWzTxkyYU-8i2zJAjzuXn7BqE8ul-JjIc,48646
150
+ fugue_test/execution_suite.py,sha256=wUiGdb8wLRd13JXo7Lo19vPOLh7t1C-L2NPLeU0k-uE,48736
150
151
  fugue_test/fixtures.py,sha256=8Pev-mxRZOWwTFlsGjcSZ0iIs78zyWbp5tq4KG1wyvk,1432
151
- fugue_version/__init__.py,sha256=gqT-BGoeEItda9fICQDvLbxEjWRIBhFJxPxxKvmHLUo,22
152
- fugue-0.9.2.dev2.dist-info/LICENSE,sha256=z8d0m5b2O9McPEK1xHG_dWgUBT6EfBDz6wA0F7xSPTA,11358
153
- fugue-0.9.2.dev2.dist-info/METADATA,sha256=eR5mL6Tf1RGa_-Do5Dmzy4ZkbcbKf-FzW4qA0cAW1Ec,18283
154
- fugue-0.9.2.dev2.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
155
- fugue-0.9.2.dev2.dist-info/entry_points.txt,sha256=2Vxp1qew_tswacA8m0RzIliLlFOQMlzezvSXPugM_KA,295
156
- fugue-0.9.2.dev2.dist-info/top_level.txt,sha256=y1eCfzGdQ1_RkgcShcfbvXs-bopD3DwJcIOxP9EFXno,140
157
- fugue-0.9.2.dev2.dist-info/RECORD,,
152
+ fugue_version/__init__.py,sha256=e56AvHfJCtG2ZwwINqsxINVbehWdKxMYgIDbjd7P-II,22
153
+ fugue-0.9.4.dist-info/METADATA,sha256=1S17OQrlR3NfUz8lrjDp7HHAfY3fCgnik5r0tty7Yuw,18518
154
+ fugue-0.9.4.dist-info/WHEEL,sha256=SmOxYU7pzNKBqASvQJ7DjX3XGUF92lrGhMb3R6_iiqI,91
155
+ fugue-0.9.4.dist-info/entry_points.txt,sha256=2Vxp1qew_tswacA8m0RzIliLlFOQMlzezvSXPugM_KA,295
156
+ fugue-0.9.4.dist-info/top_level.txt,sha256=y1eCfzGdQ1_RkgcShcfbvXs-bopD3DwJcIOxP9EFXno,140
157
+ fugue-0.9.4.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: bdist_wheel (0.45.1)
2
+ Generator: setuptools (79.0.1)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
fugue_dask/_utils.py CHANGED
@@ -149,7 +149,7 @@ def _add_hash_index(
149
149
  if len(cols) == 0:
150
150
  cols = list(df.columns)
151
151
 
152
- def _add_hash(df: pd.DataFrame) -> pd.DataFrame:
152
+ def _add_hash(df: pd.DataFrame) -> pd.DataFrame: # pragma: no cover
153
153
  if len(df) == 0:
154
154
  return df.assign(**{_FUGUE_DASK_TEMP_IDX_COLUMN: pd.Series(dtype=int)})
155
155
  return df.assign(
@@ -171,7 +171,7 @@ def _add_hash_index(
171
171
 
172
172
  def _add_random_index(
173
173
  df: dd.DataFrame, num: int, seed: Any = None
174
- ) -> Tuple[dd.DataFrame, int]:
174
+ ) -> Tuple[dd.DataFrame, int]: # pragma: no cover
175
175
  def _add_rand(df: pd.DataFrame) -> pd.DataFrame:
176
176
  if len(df) == 0:
177
177
  return df.assign(**{_FUGUE_DASK_TEMP_IDX_COLUMN: pd.Series(dtype=int)})
@@ -189,7 +189,9 @@ def _add_random_index(
189
189
 
190
190
 
191
191
  def _add_continuous_index(df: dd.DataFrame) -> Tuple[dd.DataFrame, int]:
192
- def _get_info(df: pd.DataFrame, partition_info: Any) -> pd.DataFrame:
192
+ def _get_info(
193
+ df: pd.DataFrame, partition_info: Any
194
+ ) -> pd.DataFrame: # pragma: no cover
193
195
  return pd.DataFrame(dict(no=[partition_info["number"]], ct=[len(df)]))
194
196
 
195
197
  pinfo = (
@@ -200,7 +202,9 @@ def _add_continuous_index(df: dd.DataFrame) -> Tuple[dd.DataFrame, int]:
200
202
  counts = pinfo.sort_values("no").ct.cumsum().tolist()
201
203
  starts = [0] + counts[0:-1]
202
204
 
203
- def _add_index(df: pd.DataFrame, partition_info: Any) -> pd.DataFrame:
205
+ def _add_index(
206
+ df: pd.DataFrame, partition_info: Any
207
+ ) -> pd.DataFrame: # pragma: no cover
204
208
  return df.assign(
205
209
  **{
206
210
  _FUGUE_DASK_TEMP_IDX_COLUMN: np.arange(len(df))
fugue_dask/dataframe.py CHANGED
@@ -379,7 +379,7 @@ def _to_array_chunks(
379
379
  assert_or_throw(columns is None or len(columns) > 0, ValueError("empty columns"))
380
380
  _df = df if columns is None or len(columns) == 0 else df[columns]
381
381
 
382
- def _to_list(pdf: pd.DataFrame) -> List[Any]:
382
+ def _to_list(pdf: pd.DataFrame) -> List[Any]: # pragma: no cover
383
383
  return list(
384
384
  PD_UTILS.as_array_iterable(
385
385
  pdf,
@@ -98,7 +98,8 @@ class DaskMapEngine(MapEngine):
98
98
  partition_spec: PartitionSpec,
99
99
  on_init: Optional[Callable[[int, DataFrame], Any]] = None,
100
100
  map_func_format_hint: Optional[str] = None,
101
- ) -> DataFrame:
101
+ ) -> DataFrame: # pragma: no cover
102
+ # It is well tested but not captured by coverage
102
103
  presort = partition_spec.get_sorts(
103
104
  df.schema, with_partition_keys=partition_spec.algo == "coarse"
104
105
  )
@@ -471,7 +472,7 @@ class DaskExecutionEngine(ExecutionEngine):
471
472
  # Use presort over partition_spec.presort if possible
472
473
  _presort: IndexedOrderedDict = presort or partition_spec.presort
473
474
 
474
- def _partition_take(partition, n, presort):
475
+ def _partition_take(partition, n, presort): # pragma: no cover
475
476
  assert_or_throw(
476
477
  partition.shape[1] == len(meta),
477
478
  FugueBug("hitting the dask bug where partition keys are lost"),
fugue_duckdb/dataframe.py CHANGED
@@ -165,7 +165,7 @@ def _duck_as_local(df: DuckDBPyRelation) -> DuckDBPyRelation:
165
165
 
166
166
  @as_arrow.candidate(lambda df: isinstance(df, DuckDBPyRelation))
167
167
  def _duck_as_arrow(df: DuckDBPyRelation) -> pa.Table:
168
- _df = df.arrow()
168
+ _df = df.fetch_arrow_table()
169
169
  _df = replace_types_in_table(_df, LARGE_TYPES_REPLACEMENT, recursive=True)
170
170
  return _df
171
171
 
@@ -216,7 +216,7 @@ def _drop_duckdb_columns(df: DuckDBPyRelation, columns: List[str]) -> DuckDBPyRe
216
216
  def _duck_as_array(
217
217
  df: DuckDBPyRelation, columns: Optional[List[str]] = None, type_safe: bool = False
218
218
  ) -> List[Any]:
219
- return pa_table_as_array(df.arrow(), columns=columns)
219
+ return pa_table_as_array(df.fetch_arrow_table(), columns=columns)
220
220
 
221
221
 
222
222
  @as_array_iterable.candidate(
@@ -225,14 +225,14 @@ def _duck_as_array(
225
225
  def _duck_as_array_iterable(
226
226
  df: DuckDBPyRelation, columns: Optional[List[str]] = None, type_safe: bool = False
227
227
  ) -> Iterable[Any]:
228
- yield from pa_table_as_array_iterable(df.arrow(), columns=columns)
228
+ yield from pa_table_as_array_iterable(df.fetch_arrow_table(), columns=columns)
229
229
 
230
230
 
231
231
  @as_dicts.candidate(lambda df, *args, **kwargs: isinstance(df, DuckDBPyRelation))
232
232
  def _duck_as_dicts(
233
233
  df: DuckDBPyRelation, columns: Optional[List[str]] = None
234
234
  ) -> List[Dict[str, Any]]:
235
- return pa_table_as_dicts(df.arrow(), columns=columns)
235
+ return pa_table_as_dicts(df.fetch_arrow_table(), columns=columns)
236
236
 
237
237
 
238
238
  @as_dict_iterable.candidate(
@@ -241,7 +241,7 @@ def _duck_as_dicts(
241
241
  def _duck_as_dict_iterable(
242
242
  df: DuckDBPyRelation, columns: Optional[List[str]] = None
243
243
  ) -> Iterable[Dict[str, Any]]:
244
- yield from pa_table_as_dict_iterable(df.arrow(), columns=columns)
244
+ yield from pa_table_as_dict_iterable(df.fetch_arrow_table(), columns=columns)
245
245
 
246
246
 
247
247
  def _assert_no_missing(df: DuckDBPyRelation, columns: Iterable[Any]) -> None:
@@ -108,7 +108,7 @@ class DuckDBEngine(SQLEngine):
108
108
  try:
109
109
  for k, v in dfs.items():
110
110
  duckdb.from_arrow(v.as_arrow(), connection=conn).create_view(k)
111
- return ArrowDataFrame(_duck_as_arrow(conn.execute(statement)))
111
+ return ArrowDataFrame(_duck_as_arrow(conn.sql(statement)))
112
112
  finally:
113
113
  conn.close()
114
114
 
fugue_ray/_utils/io.py CHANGED
@@ -74,7 +74,7 @@ class RayIO(object):
74
74
  len(fmts) == 1, NotImplementedError("can't support multiple formats")
75
75
  )
76
76
  fmt = fmts[0]
77
- files = [f.path for f in fp]
77
+ files = [f.as_dir_path() if f.is_dir else f.path for f in fp]
78
78
  return self._loads[fmt](files, columns, **kwargs)
79
79
 
80
80
  def save_df(
@@ -153,6 +153,10 @@ class RayIO(object):
153
153
  def _load_parquet(
154
154
  self, p: List[str], columns: Any = None, **kwargs: Any
155
155
  ) -> DataFrame:
156
+ # in 2.52.0 the default changes to ["parquet"]
157
+ if "file_extensions" not in kwargs:
158
+ kwargs = kwargs.copy()
159
+ kwargs["file_extensions"] = None
156
160
  sdf = rd.read_parquet(p, ray_remote_args=self._remote_args(), **kwargs)
157
161
  if columns is None:
158
162
  return RayDataFrame(sdf)
@@ -174,20 +174,26 @@ def pd_to_spark_df(
174
174
 
175
175
 
176
176
  def to_pandas(df: ps.DataFrame) -> pd.DataFrame:
177
- if version.parse(pd.__version__) < version.parse("2.0.0") or not any(
178
- isinstance(x.dataType, (pt.TimestampType, TimestampNTZType))
179
- for x in df.schema.fields
180
- ):
181
- return df.toPandas()
182
- else: # pragma: no cover
177
+ def _to_df() -> pd.DataFrame:
178
+ if version.parse(pd.__version__) < version.parse("2.0.0") or not any(
179
+ isinstance(x.dataType, (pt.TimestampType, TimestampNTZType))
180
+ for x in df.schema.fields
181
+ ):
182
+ return df.toPandas()
183
+ else: # pragma: no cover
184
+
185
+ def serialize(dfs):
186
+ for df in dfs:
187
+ data = pickle.dumps(df)
188
+ yield pd.DataFrame([[data]], columns=["data"])
183
189
 
184
- def serialize(dfs):
185
- for df in dfs:
186
- data = pickle.dumps(df)
187
- yield pd.DataFrame([[data]], columns=["data"])
190
+ sdf = df.mapInPandas(serialize, schema="data binary")
191
+ return pd.concat(pickle.loads(x.data) for x in sdf.collect())
188
192
 
189
- sdf = df.mapInPandas(serialize, schema="data binary")
190
- return pd.concat(pickle.loads(x.data) for x in sdf.collect())
193
+ pdf = _to_df()
194
+ if hasattr(pdf, "attrs") and "metrics" in pdf.attrs: # pragma: no cover
195
+ del pdf.attrs["metrics"]
196
+ return pdf
191
197
 
192
198
 
193
199
  def to_arrow(df: ps.DataFrame) -> pa.Table:
@@ -486,7 +486,9 @@ class BuiltInTests(object):
486
486
  dag.df([], "a:int,b:int").assert_eq(b)
487
487
  dag.run(self.engine)
488
488
 
489
- def test_transform_row_wise(self):
489
+ def _test_transform_row_wise(self): # pragma: no cover
490
+ # TODO: currently disabled because we don't support Dict[str, Any]
491
+ # as dataframe input
490
492
  def t1(row: Dict[str, Any]) -> Dict[str, Any]:
491
493
  row["b"] = 1
492
494
  return row
@@ -695,11 +697,11 @@ class BuiltInTests(object):
695
697
  incr()
696
698
  yield pa.Table.from_pandas(df)
697
699
 
698
- def t11(row: Dict[str, Any]) -> Dict[str, Any]:
700
+ def t11(row: list[dict[str, Any]]) -> dict[str, Any]:
699
701
  incr()
700
- return row
702
+ return row[0]
701
703
 
702
- def t12(row: Dict[str, Any]) -> None:
704
+ def t12(row: list[dict[str, Any]]) -> None:
703
705
  incr()
704
706
 
705
707
  with FugueWorkflow() as dag:
@@ -9,6 +9,7 @@ except ImportError: # pragma: no cover
9
9
  import copy
10
10
  import os
11
11
  import pickle
12
+ import sys
12
13
  from datetime import datetime
13
14
 
14
15
  import pandas as pd
@@ -1194,6 +1195,7 @@ class ExecutionEngineTests(object):
1194
1195
  )
1195
1196
  self.df_eq(c, [[1.1, 6.1], [7.1, 2.1]], "a:double,c:double", throw=True)
1196
1197
 
1198
+ @pytest.mark.skipif(sys.platform == "win32", reason="skip on Windows")
1197
1199
  def test_load_csv_folder(self):
1198
1200
  native = NativeExecutionEngine()
1199
1201
  a = ArrayDataFrame([[6.1, 1.1]], "c:double,a:double")
fugue_version/__init__.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.9.2"
1
+ __version__ = "0.9.4"