fugue 0.9.5__py3-none-any.whl → 0.9.7.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
fugue/__init__.py CHANGED
@@ -1,4 +1,8 @@
1
1
  # flake8: noqa
2
+ from importlib.metadata import version
3
+
4
+ __version__ = version("fugue")
5
+
2
6
  from triad.collections import Schema
3
7
 
4
8
  from fugue.api import out_transform, transform
@@ -83,7 +87,6 @@ from fugue.sql.workflow import FugueSQLWorkflow
83
87
  from fugue.workflow._workflow_context import FugueWorkflowContext
84
88
  from fugue.workflow.module import module
85
89
  from fugue.workflow.workflow import FugueWorkflow, WorkflowDataFrame, WorkflowDataFrames
86
- from fugue_version import __version__
87
90
 
88
91
  from .dev import *
89
92
 
@@ -59,9 +59,7 @@ class DataFrame(Dataset):
59
59
  assert isinstance(self._schema, Schema)
60
60
  return self._schema # type: ignore
61
61
  with self._lazy_schema_lock:
62
- self._schema = _input_schema(
63
- self._schema()
64
- ).assert_not_empty() # type: ignore
62
+ self._schema = _input_schema(self._schema()).assert_not_empty() # type: ignore
65
63
  self._schema.set_readonly()
66
64
  self._schema_discovered = True
67
65
  return self._schema
@@ -1,4 +1,5 @@
1
1
  import inspect
2
+ from collections.abc import Callable as AbcCallable
2
3
  from typing import (
3
4
  Any,
4
5
  Callable,
@@ -7,6 +8,9 @@ from typing import (
7
8
  Iterator,
8
9
  List,
9
10
  Optional,
11
+ Union,
12
+ get_args,
13
+ get_origin,
10
14
  no_type_check,
11
15
  )
12
16
 
@@ -39,13 +43,66 @@ from .pandas_dataframe import PandasDataFrame
39
43
 
40
44
 
41
45
  def _compare_iter(tp: Any) -> Any:
42
- return lambda x: compare_annotations(
43
- x, Iterable[tp] # type:ignore
44
- ) or compare_annotations(
45
- x, Iterator[tp] # type:ignore
46
+ return lambda x: (
47
+ compare_annotations(
48
+ x,
49
+ Iterable[tp], # type:ignore
50
+ )
51
+ or compare_annotations(
52
+ x,
53
+ Iterator[tp], # type:ignore
54
+ )
46
55
  )
47
56
 
48
57
 
58
+ def _is_optional(annotation) -> bool:
59
+ origin = get_origin(annotation)
60
+
61
+ # Check if it's a Union type
62
+ if origin is Union:
63
+ args = get_args(annotation)
64
+ # Optional[T] is Union[T, None]
65
+ return type(None) in args
66
+
67
+
68
+ def _is_required_callable(annotation) -> bool:
69
+ """Check if annotation is a required (non-optional) Callable type."""
70
+ if _is_optional(annotation):
71
+ return False
72
+
73
+ # Check direct equality
74
+ if annotation == Callable or annotation == callable: # pylint: disable=comparison-with-callable
75
+ return True
76
+
77
+ # Check if it's a generic Callable like Callable[[int], str]
78
+ origin = get_origin(annotation)
79
+ return origin is AbcCallable or origin is type(Callable)
80
+
81
+
82
+ def _is_optional_callable(annotation) -> bool:
83
+ """Check if annotation is an optional Callable type (Optional[Callable] or Callable | None)."""
84
+ if not _is_optional(annotation):
85
+ return False
86
+
87
+ # Get the non-None types from the Union
88
+ args = get_args(annotation)
89
+ non_none_types = [arg for arg in args if arg is not type(None)]
90
+
91
+ # Should have exactly one non-None type, and it should be Callable
92
+ if len(non_none_types) != 1: # pragma: no cover
93
+ return False
94
+
95
+ inner_type = non_none_types[0]
96
+
97
+ # Check if the inner type is Callable
98
+ if inner_type == Callable or inner_type == callable: # pylint: disable=comparison-with-callable
99
+ return True
100
+
101
+ # Check if it's a generic Callable like Callable[[int], str]
102
+ origin = get_origin(inner_type)
103
+ return origin is AbcCallable or origin is type(Callable)
104
+
105
+
49
106
  @function_wrapper(FUGUE_ENTRYPOINT)
50
107
  class DataFrameFunctionWrapper(FunctionWrapper):
51
108
  @property
@@ -154,12 +211,7 @@ fugue_annotated_param = DataFrameFunctionWrapper.annotated_param
154
211
  @fugue_annotated_param(
155
212
  "Callable",
156
213
  "F",
157
- lambda annotation: (
158
- annotation == Callable
159
- or annotation == callable # pylint: disable=comparison-with-callable
160
- or str(annotation).startswith("typing.Callable")
161
- or str(annotation).startswith("collections.abc.Callable")
162
- ),
214
+ _is_required_callable,
163
215
  )
164
216
  class _CallableParam(AnnotatedParam):
165
217
  pass
@@ -168,15 +220,7 @@ class _CallableParam(AnnotatedParam):
168
220
  @fugue_annotated_param(
169
221
  "Callable",
170
222
  "f",
171
- lambda annotation: (
172
- annotation == Optional[Callable]
173
- or annotation == Optional[callable]
174
- or str(annotation).startswith("typing.Union[typing.Callable") # 3.8-
175
- or str(annotation).startswith("typing.Optional[typing.Callable") # 3.9+
176
- or str(annotation).startswith(
177
- "typing.Optional[collections.abc.Callable]"
178
- ) # 3.9+
179
- ),
223
+ _is_optional_callable,
180
224
  )
181
225
  class _OptionalCallableParam(AnnotatedParam):
182
226
  pass
fugue/dataframe/utils.py CHANGED
@@ -61,9 +61,9 @@ def _df_eq(
61
61
  else:
62
62
  df2 = as_fugue_df(data).as_local_bounded()
63
63
  try:
64
- assert (
65
- df1.count() == df2.count()
66
- ), f"count mismatch {df1.count()}, {df2.count()}"
64
+ assert df1.count() == df2.count(), (
65
+ f"count mismatch {df1.count()}, {df2.count()}"
66
+ )
67
67
  assert not check_schema or df.schema.is_like(
68
68
  df2.schema, equal_groups=equal_type_groups
69
69
  ), f"schema mismatch {df.schema.pa_schema}, {df2.schema.pa_schema}"
@@ -67,7 +67,7 @@ class Save(Outputter):
67
67
  mode=mode,
68
68
  partition_spec=partition_spec,
69
69
  force_single=force_single,
70
- **kwargs
70
+ **kwargs,
71
71
  )
72
72
 
73
73
 
@@ -314,7 +314,7 @@ class SaveAndUse(Processor):
314
314
  mode=mode,
315
315
  partition_spec=partition_spec,
316
316
  force_single=force_single,
317
- **kwargs
317
+ **kwargs,
318
318
  )
319
319
  return self.execution_engine.load_df(path=path, format_hint=format_hint)
320
320
 
@@ -136,6 +136,7 @@ def processor(
136
136
  Please read
137
137
  :doc:`Processor Tutorial <tutorial:tutorials/extensions/processor>`
138
138
  """
139
+
139
140
  # TODO: validation of schema if without * should be done at compile time
140
141
  def deco(func: Callable) -> "_FuncAsProcessor":
141
142
  return _FuncAsProcessor.from_func(
@@ -546,7 +546,10 @@ class _FuncAsOutputCoTransformer(_FuncAsCoTransformer):
546
546
  p = dict(dfs)
547
547
  p.update(self.params)
548
548
  self._wrapper.run(
549
- [] + cb, p, ignore_unknown=False, output=False # type: ignore
549
+ [] + cb,
550
+ p,
551
+ ignore_unknown=False,
552
+ output=False, # type: ignore
550
553
  )
551
554
  return ArrayDataFrame([], OUTPUT_TRANSFORMER_DUMMY_SCHEMA)
552
555
 
fugue/sql/_utils.py CHANGED
@@ -35,7 +35,6 @@ def fill_sql_template(sql: str, params: Dict[str, Any]):
35
35
  template = Template(new_sql)
36
36
 
37
37
  except jinja2.exceptions.TemplateSyntaxError:
38
-
39
38
  template = Template(sql)
40
39
 
41
40
  return template.render(**params)
fugue/sql/_visitors.py CHANGED
@@ -820,7 +820,6 @@ class _Extensions(_VisitorBase):
820
820
 
821
821
  def visitSetOperation(self, ctx: fp.SetOperationContext) -> Iterable[Any]:
822
822
  def get_sub(_ctx: Tree) -> List[Any]:
823
-
824
823
  sub = list(
825
824
  self.visitFugueTerm(_ctx)
826
825
  if isinstance(_ctx, fp.FugueTermContext)
fugue/workflow/_tasks.py CHANGED
@@ -204,7 +204,9 @@ class FugueTask(TaskSpec, ABC):
204
204
 
205
205
  # add caller traceback
206
206
  ctb = modify_traceback(
207
- sys.exc_info()[2].tb_next, None, self._traceback # type: ignore
207
+ sys.exc_info()[2].tb_next,
208
+ None,
209
+ self._traceback, # type: ignore
208
210
  )
209
211
  if ctb is None: # pragma: no cover
210
212
  raise
@@ -1468,7 +1468,8 @@ class WorkflowDataFrames(DataFrames):
1468
1468
  super().__setitem__(key, value, *args, **kwds)
1469
1469
 
1470
1470
  def __getitem__( # pylint: disable=W0235
1471
- self, key: Union[str, int] # type: ignore
1471
+ self,
1472
+ key: Union[str, int], # type: ignore
1472
1473
  ) -> WorkflowDataFrame:
1473
1474
  return super().__getitem__(key) # type: ignore
1474
1475
 
@@ -1791,8 +1792,7 @@ class FugueWorkflow:
1791
1792
  )
1792
1793
  )
1793
1794
  raise FugueWorkflowCompileError(
1794
- f"Input data of type {type(data)} can't "
1795
- "be converted to WorkflowDataFrame"
1795
+ f"Input data of type {type(data)} can't be converted to WorkflowDataFrame"
1796
1796
  )
1797
1797
 
1798
1798
  def df(
@@ -1,27 +1,26 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: fugue
3
- Version: 0.9.5
3
+ Version: 0.9.7.dev0
4
4
  Summary: An abstraction layer for distributed computation
5
- Home-page: http://github.com/fugue-project/fugue
6
- Author: The Fugue Development Team
7
- Author-email: hello@fugue.ai
5
+ Author-email: The Fugue Development Team <hello@fugue.ai>
8
6
  License: Apache-2.0
9
- Keywords: distributed spark dask ray sql dsl domain specific language
7
+ Project-URL: Homepage, http://github.com/fugue-project/fugue
8
+ Project-URL: Repository, http://github.com/fugue-project/fugue
9
+ Keywords: distributed,spark,dask,ray,sql,dsl,domain specific language
10
10
  Classifier: Development Status :: 5 - Production/Stable
11
11
  Classifier: Intended Audience :: Developers
12
12
  Classifier: Topic :: Software Development :: Libraries :: Python Modules
13
- Classifier: License :: OSI Approved :: Apache Software License
14
13
  Classifier: Programming Language :: Python :: 3
15
- Classifier: Programming Language :: Python :: 3.8
16
- Classifier: Programming Language :: Python :: 3.9
17
14
  Classifier: Programming Language :: Python :: 3.10
18
15
  Classifier: Programming Language :: Python :: 3.11
19
16
  Classifier: Programming Language :: Python :: 3.12
17
+ Classifier: Programming Language :: Python :: 3.13
18
+ Classifier: Programming Language :: Python :: 3.14
20
19
  Classifier: Programming Language :: Python :: 3 :: Only
21
- Requires-Python: >=3.8
20
+ Requires-Python: >=3.10
22
21
  Description-Content-Type: text/markdown
23
22
  License-File: LICENSE
24
- Requires-Dist: triad>=1.0.0
23
+ Requires-Dist: triad>=1.0.1
25
24
  Requires-Dist: adagio>=0.2.6
26
25
  Requires-Dist: pandas<3
27
26
  Provides-Extra: sql
@@ -39,7 +38,7 @@ Requires-Dist: dask[dataframe,distributed]>=2024.4.0; extra == "dask"
39
38
  Requires-Dist: pyarrow>=7.0.0; extra == "dask"
40
39
  Requires-Dist: pandas>=2.0.2; extra == "dask"
41
40
  Provides-Extra: ray
42
- Requires-Dist: ray[data]>=2.30.0; extra == "ray"
41
+ Requires-Dist: ray[data]>=2.30.0; python_version < "3.14" and extra == "ray"
43
42
  Requires-Dist: duckdb>=0.5.0; extra == "ray"
44
43
  Requires-Dist: pyarrow>=7.0.0; extra == "ray"
45
44
  Requires-Dist: pandas; extra == "ray"
@@ -70,7 +69,7 @@ Requires-Dist: jinja2; extra == "all"
70
69
  Requires-Dist: pyspark>=3.1.1; extra == "all"
71
70
  Requires-Dist: dask[dataframe,distributed]>=2024.4.0; extra == "all"
72
71
  Requires-Dist: dask-sql; extra == "all"
73
- Requires-Dist: ray[data]>=2.30.0; extra == "all"
72
+ Requires-Dist: ray[data]>=2.30.0; python_version < "3.14" and extra == "all"
74
73
  Requires-Dist: notebook; extra == "all"
75
74
  Requires-Dist: jupyterlab; extra == "all"
76
75
  Requires-Dist: ipython>=7.10.0; extra == "all"
@@ -79,26 +78,14 @@ Requires-Dist: pyarrow>=6.0.1; extra == "all"
79
78
  Requires-Dist: pandas>=2.0.2; extra == "all"
80
79
  Requires-Dist: ibis-framework[duckdb,pandas]; extra == "all"
81
80
  Requires-Dist: polars; extra == "all"
82
- Dynamic: author
83
- Dynamic: author-email
84
- Dynamic: classifier
85
- Dynamic: description
86
- Dynamic: description-content-type
87
- Dynamic: home-page
88
- Dynamic: keywords
89
- Dynamic: license
90
81
  Dynamic: license-file
91
- Dynamic: provides-extra
92
- Dynamic: requires-dist
93
- Dynamic: requires-python
94
- Dynamic: summary
95
82
 
96
- # Fugue
83
+ # <img src="./images/logo.svg" width="200">
97
84
 
98
85
  [![PyPI version](https://badge.fury.io/py/fugue.svg)](https://pypi.python.org/pypi/fugue/)
99
86
  [![PyPI pyversions](https://img.shields.io/pypi/pyversions/fugue.svg)](https://pypi.python.org/pypi/fugue/)
100
87
  [![PyPI license](https://img.shields.io/pypi/l/fugue.svg)](https://pypi.python.org/pypi/fugue/)
101
- [![codecov](https://codecov.io/gh/fugue-project/fugue/branch/master/graph/badge.svg?token=ZO9YD5N3IA)](https://codecov.io/gh/fugue-project/fugue)
88
+ [![codecov](https://codecov.io/gh/fugue-project/fugue/branch/main/graph/badge.svg?token=ZO9YD5N3IA)](https://codecov.io/gh/fugue-project/fugue)
102
89
  [![Codacy Badge](https://app.codacy.com/project/badge/Grade/4fa5f2f53e6f48aaa1218a89f4808b91)](https://www.codacy.com/gh/fugue-project/fugue/dashboard?utm_source=github.com&utm_medium=referral&utm_content=fugue-project/fugue&utm_campaign=Badge_Grade)
103
90
  [![Downloads](https://static.pepy.tech/badge/fugue)](https://pepy.tech/project/fugue)
104
91
 
@@ -1,4 +1,4 @@
1
- fugue/__init__.py,sha256=LKkBEPEAMLG-Yuzqt0IgoIDEfNf9a1zUffNKb83D_l8,2705
1
+ fugue/__init__.py,sha256=asKV-hQ4hXj28c8zwjZQHj_JlomElOTRRaO8tx29sYw,2739
2
2
  fugue/api.py,sha256=dLUrigFhDMB5x7cvlWSK8EyaY2o0AmhgPr0VRtfzSz0,1254
3
3
  fugue/constants.py,sha256=crd0VqX8WtBcjSUNwZDi2LDIEkhUMWOlSn73H8JI9ds,3385
4
4
  fugue/dev.py,sha256=GQCkezBBl4V0lVDWhGtUQKqomiCxgR9dMhfqj9C8cS8,1369
@@ -28,13 +28,13 @@ fugue/dataframe/__init__.py,sha256=zm7TbsaJLIvfm7zymWm2LGcuJd3nxfGsFnQiyrSnenM,6
28
28
  fugue/dataframe/api.py,sha256=aWBvMaiSUxOvdQMfe79zHShWuPfLcgiWggC9HvVxvSE,11017
29
29
  fugue/dataframe/array_dataframe.py,sha256=4scWnmQ6sjy1A6o7IYdRc0VVutBEfcJrA1f9wkph4Kg,4440
30
30
  fugue/dataframe/arrow_dataframe.py,sha256=r5zcZBX_N6XO5dmixBkTCPgLcMmgDF022piZvrwRp_c,11485
31
- fugue/dataframe/dataframe.py,sha256=xmyG85i14A6LDRkNmPt29oYq7PJsq668s1QvFHK8PV4,16964
31
+ fugue/dataframe/dataframe.py,sha256=B9-oKUWMNXvKjAEchwJt9gzDbeIYMFWsLdt_YmJUCQg,16934
32
32
  fugue/dataframe/dataframe_iterable_dataframe.py,sha256=lx71KfaI4lsVKI-79buc-idaeT20JEMBOq21SQcAiY8,7259
33
33
  fugue/dataframe/dataframes.py,sha256=tBSpHsENgbcdOJ0Jgst6PTKbjG7_uoFJch96oTlaQIs,4160
34
- fugue/dataframe/function_wrapper.py,sha256=7Sb6XrWTD_swtHJbHDWZRxHvFNWkERynnCDzLM0wSbo,18340
34
+ fugue/dataframe/function_wrapper.py,sha256=HbilDPKVuRMvOBqMB_083xQf7rVw4Gx_hxzmHOLREbY,19495
35
35
  fugue/dataframe/iterable_dataframe.py,sha256=TcOoNKa4jNbHbvAZ0XAhtMmGcioygIHPxI9budDtenQ,4758
36
36
  fugue/dataframe/pandas_dataframe.py,sha256=0L0wYCGhD2BpQbruoT07Ox9iQM5YLHLNrcgzudc-yKs,11633
37
- fugue/dataframe/utils.py,sha256=bA_otOJt9oju1yq5gtn21L_GDT_pUgNc6luYuBIhbUQ,10488
37
+ fugue/dataframe/utils.py,sha256=LXauFKUiydr2Y_cLE6zdPwXUyw78Gm1cJCzdmEXERrI,10488
38
38
  fugue/dataset/__init__.py,sha256=5f2CAJ4xst6Z2o9Q2e2twfDOGUw8ZJoE2ild4JEU2pg,112
39
39
  fugue/dataset/api.py,sha256=DacI4L2w5NJ-eZ6nFxNMqmReEnb0WUXswbjVp7BeErk,2794
40
40
  fugue/dataset/dataset.py,sha256=jWXZqy3msMPFFkhas2PYJEX55ZAI3gk3Txq5f4-Qya4,4759
@@ -48,8 +48,8 @@ fugue/extensions/_utils.py,sha256=Bi3pYKy2Z6fG6_5BpwIWldxetassXpB4Zp8QamWB-wg,51
48
48
  fugue/extensions/context.py,sha256=c_y2UttzzIFoQTOCV42VCdj2nqah33xYuBjbKNIOpx8,4262
49
49
  fugue/extensions/_builtins/__init__.py,sha256=OAUjZJP-QI8VpJxxEEZJFFGir4PmTyLMmWQ3VCHtIGk,545
50
50
  fugue/extensions/_builtins/creators.py,sha256=ad9snV4oN7F9o50Iaa9T4tw5J6rXBUgOHOINKzDqoEQ,1825
51
- fugue/extensions/_builtins/outputters.py,sha256=vMkzarf39bBk37efVa1nQ2qFaFauejsOEG6c_0fC9kE,6939
52
- fugue/extensions/_builtins/processors.py,sha256=JuwIhyvGiO_kZ_xU8SCCy5Zk0wOXfC_pSrxraM24izU,14545
51
+ fugue/extensions/_builtins/outputters.py,sha256=n_9cqv6JGQNKw_5j_4kXo5xcMnt6Hz5YB4t1wfsvU5A,6940
52
+ fugue/extensions/_builtins/processors.py,sha256=4L_AicO6XBbH2Sif1-Z0BGunCN4OFSIIzSyGSuqjHgQ,14546
53
53
  fugue/extensions/creator/__init__.py,sha256=7qmrb0eRKbGSEvrb6045-5hkmjH-nT1GJo1qYubX158,188
54
54
  fugue/extensions/creator/convert.py,sha256=66ei5x0K0a8nWQ-kNwZXmmf1VeR_6XuqSe9rZ64mYpI,7297
55
55
  fugue/extensions/creator/creator.py,sha256=ET9yprY4mvw0kkfWtW4aQEvKv1VR-FiWT4SThvwgAyo,1541
@@ -57,18 +57,18 @@ fugue/extensions/outputter/__init__.py,sha256=j7jHSOB2xfnJlg9BdL9UHwAML4A77eI6kI
57
57
  fugue/extensions/outputter/convert.py,sha256=dPyNELy6Sruv5_NJlMSPPIfBknTlRVZo0zCWAz5U16o,6939
58
58
  fugue/extensions/outputter/outputter.py,sha256=n2Do4NKX7_uKkUQRrSjJAl6CYoifb9_zEaXTZIF_ZJQ,1619
59
59
  fugue/extensions/processor/__init__.py,sha256=8ws8WSnLGCfTdsigoU0_xpaPY54vpo0V2O3Bw3W9cSg,204
60
- fugue/extensions/processor/convert.py,sha256=zG0lMtHGwY5TsqK4eplbMdlTg7J_PD3HbI0jdWcv5yw,8302
60
+ fugue/extensions/processor/convert.py,sha256=dtknMhHeAoXEeNIMZ0m-ySJofdb5JKL2eHY-pgBWOVQ,8303
61
61
  fugue/extensions/processor/processor.py,sha256=czhQlQgMpAXXoLVAX9Q0TFUMYEEhsgufTammxcKSmOY,1665
62
62
  fugue/extensions/transformer/__init__.py,sha256=VD6d-8xW1Yl8fUPj43cBWNR9pCOlYD9xWyGIHAlHwvI,456
63
63
  fugue/extensions/transformer/constants.py,sha256=76DfpoTOGQ8gp5XtCs_xznfbr_H015-prXpHWSqMNDU,59
64
- fugue/extensions/transformer/convert.py,sha256=zDDIpZawMnHFarjZNZAyiw1jfyXGuPjnvgQk9jpYLak,23384
64
+ fugue/extensions/transformer/convert.py,sha256=QRAp1S2rm8uDJt69Wm7yzszJyD83IzFZEURrNCNOMuQ,23433
65
65
  fugue/extensions/transformer/transformer.py,sha256=zhOUgyv5-DPxYd1CP_98WeEw-zUgwknRnPW_6di-q3g,9098
66
66
  fugue/rpc/__init__.py,sha256=3GzUl4QZQuCChjD7eaTJW8tnTwfke6ZY9r9g5nCeBZ8,167
67
67
  fugue/rpc/base.py,sha256=3Fq5SvwLZqw9NXru3r32WuJKBGFr9bl7nFgy6e9boGo,8470
68
68
  fugue/rpc/flask.py,sha256=VzJEND8Pqatf6pYYT9LDXeO1JDMmYAOY0lm8XUncKbA,4807
69
69
  fugue/sql/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
70
- fugue/sql/_utils.py,sha256=khpjGeFCVlaqf2JIYvS4TVTJO3fe5-8bEsvy6AIP_5Q,2083
71
- fugue/sql/_visitors.py,sha256=2pc0J-AHJAiIexsKgNjcgrCGOyhC3_7rzonSgtjy--k,33844
70
+ fugue/sql/_utils.py,sha256=IOXhwDNnjtQI2uxhSjnEzMIFpe3k6B3T1RikPRYdf-M,2082
71
+ fugue/sql/_visitors.py,sha256=5m8msl86T1SYW5EyeX_NLVLTPUE_pCdRKt-dgMV1bYI,33843
72
72
  fugue/sql/api.py,sha256=l2I9CAy_W2oFFTct9fDPLyXF0LiDxQhMx5O8jBHTAxU,10050
73
73
  fugue/sql/workflow.py,sha256=S1pOhp0b0t6johFAJWmj6xUB7Ti5LQgNABpAzmLGjrQ,3010
74
74
  fugue/test/__init__.py,sha256=hvVrNbJYkWI_6otpILneyTjUafxURaA4obK6AoDyCUw,250
@@ -76,23 +76,23 @@ fugue/test/pandas_tester.py,sha256=_w6rFqlzZKjBtmFf-08a4C97W5xtqGw5XorLhj6Zyoo,6
76
76
  fugue/test/plugins.py,sha256=GLZia5GCmy0eBVGNbIqTbX7Ou3euf2SY4litKgdigwY,12318
77
77
  fugue/workflow/__init__.py,sha256=tXM_KYO8Q358W6qAVlwhIQIaYNRDgZtTubrIEX4QMgM,229
78
78
  fugue/workflow/_checkpoint.py,sha256=tt5Iv7c5ZStC0MD1inItksQ0GuK0ViniA3nvrgym-5c,5681
79
- fugue/workflow/_tasks.py,sha256=Zq_jXJO_VaF8DrWUuBiwO2Y3OVuhsiOQdzP4VBsp7Fo,11826
79
+ fugue/workflow/_tasks.py,sha256=Qezr3Gb-I2WifTjE2vC_YiAaXx6IDZKT3ka3NHqI2tU,11859
80
80
  fugue/workflow/_workflow_context.py,sha256=Wmp6n0lSrh2Gpslb5EaSX6BQNniKsvKn6SlhVkQ6ui0,2504
81
81
  fugue/workflow/api.py,sha256=uQoxPSCZ91-ST4vwuPWG7qioRGW4eo-Sgi3DdwtSL4k,12495
82
82
  fugue/workflow/input.py,sha256=V_zLDNzndmQuYJAPXtdK4n-vOp7LrimGIf_wQtwf2mc,321
83
83
  fugue/workflow/module.py,sha256=ajyqgMwX6hFMZY9xp4Bp1Q-Zdta0p5f_W_n_SNrc4LE,5547
84
- fugue/workflow/workflow.py,sha256=-SFCXkyxgXbS6DpQGSBox4d3Ws3psIlB6PnraJLSu9Y,88219
85
- fugue-0.9.5.dist-info/licenses/LICENSE,sha256=z8d0m5b2O9McPEK1xHG_dWgUBT6EfBDz6wA0F7xSPTA,11358
84
+ fugue/workflow/workflow.py,sha256=laQXwUEhrmCxYWVownMgjLLbcmc7BiNb2ajZfQ4FgZU,88213
85
+ fugue-0.9.7.dev0.dist-info/licenses/LICENSE,sha256=z8d0m5b2O9McPEK1xHG_dWgUBT6EfBDz6wA0F7xSPTA,11358
86
86
  fugue_contrib/__init__.py,sha256=QJioX-r2AiU7Pvt24M-k2c4vNq29qpK-3WNUde7ucck,222
87
87
  fugue_contrib/contrib.py,sha256=3B--6oIVBMZ-GwjIOXwZqYqkloH7Cxfq1I8vkwl2yPk,267
88
88
  fugue_contrib/seaborn/__init__.py,sha256=NuVv8EI4Om4gHcHwYO8ddextLQqw24vDj8qJio3E1MU,1405
89
89
  fugue_contrib/viz/__init__.py,sha256=osgZx63Br-yMZImyEfYf9MVzJNM2Cqqke_-WsuDmG5M,1273
90
90
  fugue_contrib/viz/_ext.py,sha256=Lu_DlS5DcmrFz27fHcKTCkhKyknVWcfS5kzZVVuO9xM,1345
91
- fugue_dask/__init__.py,sha256=2CcJ0AsN-k_f7dZ-yAyYpaICfUMPfH3l0FvUJSBzTr0,161
91
+ fugue_dask/__init__.py,sha256=OPgP3PUI1-RrAw9oNevlFI7c7friM5JcTVRgm74iFhA,194
92
92
  fugue_dask/_constants.py,sha256=35UmTVITk21GhRyRlbJOwPPdQsytM_p_2NytOXEay18,510
93
93
  fugue_dask/_dask_sql_wrapper.py,sha256=lj38gJIOdoMV9W44gpwzLjUEtPVsQNKjRWuEkfI7-PM,2618
94
94
  fugue_dask/_io.py,sha256=pl4F7mbVgP7Rwh1FFG7xfOz2TBZRUj1l3lLvDY4jOf4,6020
95
- fugue_dask/_utils.py,sha256=0R0pCh4B47kQsAS_o0QGaosIqVcZnSakm6pfMB7fSXs,9059
95
+ fugue_dask/_utils.py,sha256=Z15myyjGbYpdHWZLmabuwi1SG40RNwfTmgjRWREHBLI,8864
96
96
  fugue_dask/dataframe.py,sha256=4Dvckpc4mlld2WsEFTTemxoA1zYK8Cn6jMKxUxYQCEE,13491
97
97
  fugue_dask/execution_engine.py,sha256=mFN_IurhdBEu8C5OreqpGSRdTbTBqSpzJO2dMQzEF-o,21264
98
98
  fugue_dask/registry.py,sha256=jepWKH55VWNIWV3pOF5vpCl2OpO0rI1IULx5GM2Gk6w,2274
@@ -102,15 +102,15 @@ fugue_duckdb/_io.py,sha256=vnd8m8C6XeMCBJBbAdA5h695NMfsduQrvONyS0HcEFA,8475
102
102
  fugue_duckdb/_utils.py,sha256=ElKbHUyn5fWSPGXsK57iqMzcqKtCf0c8pBVBYGe5Ql4,5020
103
103
  fugue_duckdb/dask.py,sha256=agoLzeB7Swxj2kVWfmXFbWD1NS2lbbTlnrjSkR8kKWY,5014
104
104
  fugue_duckdb/dataframe.py,sha256=LAPoPOad9hgGhjyhlMGMfrnhkyBKe06Xzn6eP1hkl-w,8504
105
- fugue_duckdb/execution_engine.py,sha256=3f5hbWcX1y9mAtfFixrri-snYxVIQAf4HOgo9fHbDwQ,20385
105
+ fugue_duckdb/execution_engine.py,sha256=-nuRZQ64SPCWehY54hX9h30jz_8UWQpvXBRKFqD6pwI,20453
106
106
  fugue_duckdb/registry.py,sha256=9_41KO42kXqcjF4yParQ5JGyg5TckcbhH-Q2IlGpSho,3987
107
107
  fugue_duckdb/tester.py,sha256=MzTkv3sdOwOjI59LRrSGGl4w59Njv3OArTU5kSRL-P0,1526
108
108
  fugue_ibis/__init__.py,sha256=z7TkK7M2_0p9XO6jQATNDgT0aHXn5k69Ttz2ga-eQG8,190
109
109
  fugue_ibis/_compat.py,sha256=zKdTaTfuC02eUIzZPkcd7oObnVBi_X5mQjQf7SDme3Y,246
110
- fugue_ibis/_utils.py,sha256=BUL5swA5FE4eQu0t5Z17hZVu9a2MFfxlFH6Ymy9xifg,6607
110
+ fugue_ibis/_utils.py,sha256=3e_ir6eigJO99_YJRHzRElsM-YXciUi15MhfBCDZ3sU,6609
111
111
  fugue_ibis/dataframe.py,sha256=k4Q6qBLBIADF5YhbvaDplXO7OkMZSHuf_Wg5o-AusEI,7796
112
112
  fugue_ibis/execution_engine.py,sha256=jRnp1m1wuTicS29A-WA043f8QwdoK8b9rwPXvTkm8r8,18751
113
- fugue_notebook/__init__.py,sha256=9r_-2uxu1lBeZ8GgpYCKom_OZy2soIOYZajg7JDO-HY,4326
113
+ fugue_notebook/__init__.py,sha256=4VQQJweSb7ZKAtMtPsoyBPDO5K2MrS4xyHyR5d8pWbc,4358
114
114
  fugue_notebook/env.py,sha256=TYiTxYPFi-BVJJY49jDsvw9mddhK8WrifeRxBke30I8,4773
115
115
  fugue_notebook/nbextension/README.md,sha256=QLnr957YeGfwzy2r4c4qbZPaXyCbyGrKPvcqSBQYSnU,123
116
116
  fugue_notebook/nbextension/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -129,29 +129,28 @@ fugue_ray/tester.py,sha256=oTA_xOzvQhJU3ohc4hsVpZc0zv4bwJn1c8a9u8kcuIs,537
129
129
  fugue_ray/_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
130
130
  fugue_ray/_utils/cluster.py,sha256=3T3Gyra6lAHlzktta-Ro35j6YZQfH6fNrj2hC5ATF9k,621
131
131
  fugue_ray/_utils/dataframe.py,sha256=5c4duGV--mdLkKrbJRgjDWvVcp9BegA3yX16pmYDYLE,3954
132
- fugue_ray/_utils/io.py,sha256=JZdL7pdpk1DUIj77NJSzU_EZOW4cN7oNjwGy2w-LRTw,10142
133
- fugue_spark/__init__.py,sha256=rvrMpFs9socMgyH_58gLbnAqmirBf5oidXoO4cekW6U,165
132
+ fugue_ray/_utils/io.py,sha256=jt4A4bQXCbNGvuJblL41TjZO9wscIJcwMlp2VfrbXx8,10155
133
+ fugue_spark/__init__.py,sha256=mFhe1g4wdr6LeTkXJGe7G2yZ2mympB8hbSkWye9L9sA,198
134
134
  fugue_spark/_constants.py,sha256=K2uLQfjvMxXk75K-7_Wn47Alpwq5rW57BtECAUrOeqA,177
135
- fugue_spark/dataframe.py,sha256=lYa8FizM3p_lsKYFR49FazkVZMJKyi2LABKTpP5YBLo,12006
135
+ fugue_spark/dataframe.py,sha256=_gLP9rLVVCb5ILccvYMFDpCAdTAzedecqHKYGZEX0DE,11984
136
136
  fugue_spark/execution_engine.py,sha256=YBMtNxCvpy77xICFSg9PHMa6feNoYhWEZe8MmxznX4U,33048
137
137
  fugue_spark/registry.py,sha256=_NmiV2cOooYK0YmqATEnNkPEMT9suUMtuecw2NNbIIk,4530
138
138
  fugue_spark/tester.py,sha256=VX003yGNlBukaZTQSN-w7XvgSk4rqxrWQIzno0dWrXg,2481
139
139
  fugue_spark/_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
140
- fugue_spark/_utils/convert.py,sha256=J3HtbuzomTYTN6A11iuvsC1h2C7o3fQBW5U360xGDhE,10234
140
+ fugue_spark/_utils/convert.py,sha256=c5YaJ6kfXd0DnFGqjlR8WGG5TyNKMPVaV2HzdfY-ix0,10201
141
141
  fugue_spark/_utils/io.py,sha256=OdUezKpB29Lx9aUS2k9x0xUAGZrmgMZyQYGPEeHk7rQ,5574
142
- fugue_spark/_utils/misc.py,sha256=yLZVsz09gxGxfUkAGdqWAtmjSWiF5IInQtlN8z7c8hY,1228
142
+ fugue_spark/_utils/misc.py,sha256=lUo19PPwcYGSYj3L10stqf8eRW7lnwPoJ9Q3kUwvwY0,1206
143
143
  fugue_spark/_utils/partition.py,sha256=iaesyO5f4uXhj1W-p91cD5ecPiGlu0bzh8gl2ce2Uvg,3618
144
- fugue_sql/__init__.py,sha256=Cmr7w0Efr7PzoXdQzdJfc4Dgqd69qKqcHZZodENq7EU,287
144
+ fugue_sql/__init__.py,sha256=pzg-LiHE8n6j0vWysB3Gw_fyx5fHEw7Joja6T137Rrs,320
145
145
  fugue_sql/exceptions.py,sha256=ltS0MC8gMnVVrJbQiOZ0kRUWvVQ2LTx33dCW3ugqtb0,260
146
146
  fugue_test/__init__.py,sha256=xoQuVobhU64uyODRdnzf6MSWe9lw5khkhpJ2atvADoc,2315
147
147
  fugue_test/bag_suite.py,sha256=WbDCFjuAHYoJh4GXSPiSJxOoOwE1VMtYpJ3lQrsUK-Y,2483
148
- fugue_test/builtin_suite.py,sha256=jP3xiq2vRZNNGzoSRjcUfrUk8NVg31SU0kpJaEvP25E,79400
148
+ fugue_test/builtin_suite.py,sha256=n79zKh__qdtF67zBi3TCBoY5NC52JIzCWtlFPUkgsHc,79376
149
149
  fugue_test/dataframe_suite.py,sha256=7ym4sshDUly6004cq1UlppqDVtbwxD6CKxR4Lu70i0s,18994
150
150
  fugue_test/execution_suite.py,sha256=wUiGdb8wLRd13JXo7Lo19vPOLh7t1C-L2NPLeU0k-uE,48736
151
151
  fugue_test/fixtures.py,sha256=8Pev-mxRZOWwTFlsGjcSZ0iIs78zyWbp5tq4KG1wyvk,1432
152
- fugue_version/__init__.py,sha256=ORAtCCI2THBDcdzIbh6oBsoshDvkkmXUWpmO4Q5McAk,22
153
- fugue-0.9.5.dist-info/METADATA,sha256=zWWQ_tI_bPidg9zi3s4BL4FfC2OD45Qm-PuVERmTGrA,18593
154
- fugue-0.9.5.dist-info/WHEEL,sha256=SmOxYU7pzNKBqASvQJ7DjX3XGUF92lrGhMb3R6_iiqI,91
155
- fugue-0.9.5.dist-info/entry_points.txt,sha256=2Vxp1qew_tswacA8m0RzIliLlFOQMlzezvSXPugM_KA,295
156
- fugue-0.9.5.dist-info/top_level.txt,sha256=y1eCfzGdQ1_RkgcShcfbvXs-bopD3DwJcIOxP9EFXno,140
157
- fugue-0.9.5.dist-info/RECORD,,
152
+ fugue-0.9.7.dev0.dist-info/METADATA,sha256=OwSSeQA85bd68u6E1S2mV-nP1REAWoZH8QTEg0i__Bo,18441
153
+ fugue-0.9.7.dev0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
154
+ fugue-0.9.7.dev0.dist-info/entry_points.txt,sha256=nlvV6bg8ZNdUE8WJrgHWkNBMiRnPaTWQA8Y7xfqSQVA,255
155
+ fugue-0.9.7.dev0.dist-info/top_level.txt,sha256=OCVNEONKPoAH4r8hUUSnvRHtZVnDu38FH44y3CRxtB8,126
156
+ fugue-0.9.7.dev0.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (79.0.1)
2
+ Generator: setuptools (80.10.2)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -0,0 +1,11 @@
1
+ [fugue.plugins]
2
+ dask = fugue_dask.registry
3
+ duckdb = fugue_duckdb.registry
4
+ ibis = fugue_ibis
5
+ polars = fugue_polars.registry
6
+ ray = fugue_ray.registry
7
+ spark = fugue_spark.registry
8
+
9
+ [pytest11]
10
+ fugue_test = fugue_test
11
+ fugue_test_fixtures = fugue_test.fixtures
@@ -9,4 +9,3 @@ fugue_ray
9
9
  fugue_spark
10
10
  fugue_sql
11
11
  fugue_test
12
- fugue_version
fugue_dask/__init__.py CHANGED
@@ -1,5 +1,7 @@
1
1
  # flake8: noqa
2
- from fugue_version import __version__
2
+ from importlib.metadata import version
3
+
4
+ __version__ = version("fugue")
3
5
 
4
6
  from fugue_dask.dataframe import DaskDataFrame
5
7
  from fugue_dask.execution_engine import DaskExecutionEngine
fugue_dask/_utils.py CHANGED
@@ -55,8 +55,7 @@ def hash_repartition(df: dd.DataFrame, num: int, cols: List[Any]) -> dd.DataFram
55
55
  if num == 1:
56
56
  return df.repartition(npartitions=1)
57
57
  df = df.reset_index(drop=True).clear_divisions()
58
- idf, ct = _add_hash_index(df, num, cols)
59
- return _postprocess(idf, ct, num)
58
+ return _add_hash_index(df, num, cols)
60
59
 
61
60
 
62
61
  def even_repartition(df: dd.DataFrame, num: int, cols: List[Any]) -> dd.DataFrame:
@@ -81,13 +80,9 @@ def even_repartition(df: dd.DataFrame, num: int, cols: List[Any]) -> dd.DataFram
81
80
  return df
82
81
  df = df.reset_index(drop=True).clear_divisions()
83
82
  if len(cols) == 0:
84
- idf, ct = _add_continuous_index(df)
83
+ return _add_continuous_index(df, num=num)
85
84
  else:
86
- idf, ct = _add_group_index(df, cols, shuffle=False)
87
- # when cols are set and num is not set, we use the number of groups
88
- if num <= 0:
89
- num = ct
90
- return _postprocess(idf, ct, num)
85
+ return _add_group_index(df, cols, shuffle=False, num=num)
91
86
 
92
87
 
93
88
  def rand_repartition(
@@ -114,25 +109,30 @@ def rand_repartition(
114
109
  return df.repartition(npartitions=1)
115
110
  df = df.reset_index(drop=True).clear_divisions()
116
111
  if len(cols) == 0:
117
- idf, ct = _add_random_index(df, num=num, seed=seed)
112
+ return _add_random_index(df, num=num, seed=seed)
118
113
  else:
119
- idf, ct = _add_group_index(df, cols, shuffle=True, seed=seed)
120
- # when cols are set and num is not set, we use the number of groups
121
- return _postprocess(idf, ct, num)
114
+ return _add_group_index(df, cols, shuffle=True, num=num, seed=seed)
122
115
 
123
116
 
124
- def _postprocess(idf: dd.DataFrame, ct: int, num: int) -> dd.DataFrame:
125
- parts = min(ct, num)
117
+ def _safe_set_index(df: dd.DataFrame, key_ct: int, num_partitions: int) -> dd.DataFrame:
118
+ if num_partitions <= 0:
119
+ num_partitions = key_ct
120
+ parts = min(key_ct, num_partitions)
126
121
  if parts <= 1:
127
- return idf.repartition(npartitions=1)
128
- divisions = list(np.arange(ct, step=math.ceil(ct / parts)))
129
- divisions.append(ct - 1)
130
- return idf.repartition(divisions=divisions, force=True)
122
+ return df.set_index(
123
+ _FUGUE_DASK_TEMP_IDX_COLUMN, drop=True, sort=True, npartitions=1
124
+ )
125
+ divisions = np.arange(key_ct, step=int(math.ceil(key_ct / parts))).tolist()
126
+ # divisions.append(ct - 1)
127
+ divisions.append(key_ct)
128
+ return df.set_index(
129
+ _FUGUE_DASK_TEMP_IDX_COLUMN, drop=True, sort=True, divisions=divisions
130
+ )
131
131
 
132
132
 
133
133
  def _add_group_index(
134
- df: dd.DataFrame, cols: List[str], shuffle: bool, seed: Any = None
135
- ) -> Tuple[dd.DataFrame, int]:
134
+ df: dd.DataFrame, cols: List[str], shuffle: bool, num: int, seed: Any = None
135
+ ) -> dd.DataFrame:
136
136
  keys = df[cols].drop_duplicates().compute()
137
137
  if shuffle:
138
138
  keys = keys.sample(frac=1, random_state=seed)
@@ -140,12 +140,10 @@ def _add_group_index(
140
140
  **{_FUGUE_DASK_TEMP_IDX_COLUMN: pd.Series(range(len(keys)), dtype=int)}
141
141
  )
142
142
  df = df.merge(dd.from_pandas(keys, npartitions=1), on=cols, broadcast=True)
143
- return df.set_index(_FUGUE_DASK_TEMP_IDX_COLUMN, drop=True), len(keys)
143
+ return _safe_set_index(df, len(keys), num)
144
144
 
145
145
 
146
- def _add_hash_index(
147
- df: dd.DataFrame, num: int, cols: List[str]
148
- ) -> Tuple[dd.DataFrame, int]:
146
+ def _add_hash_index(df: dd.DataFrame, num: int, cols: List[str]) -> dd.DataFrame:
149
147
  if len(cols) == 0:
150
148
  cols = list(df.columns)
151
149
 
@@ -165,13 +163,13 @@ def _add_hash_index(
165
163
  orig_schema = list(df.dtypes.to_dict().items())
166
164
  idf = df.map_partitions(
167
165
  _add_hash, meta=orig_schema + [(_FUGUE_DASK_TEMP_IDX_COLUMN, int)]
168
- ).set_index(_FUGUE_DASK_TEMP_IDX_COLUMN, drop=True)
169
- return idf, num
166
+ )
167
+ return _safe_set_index(idf, num, num)
170
168
 
171
169
 
172
170
  def _add_random_index(
173
171
  df: dd.DataFrame, num: int, seed: Any = None
174
- ) -> Tuple[dd.DataFrame, int]: # pragma: no cover
172
+ ) -> dd.DataFrame: # pragma: no cover
175
173
  def _add_rand(df: pd.DataFrame) -> pd.DataFrame:
176
174
  if len(df) == 0:
177
175
  return df.assign(**{_FUGUE_DASK_TEMP_IDX_COLUMN: pd.Series(dtype=int)})
@@ -184,11 +182,11 @@ def _add_random_index(
184
182
  orig_schema = list(df.dtypes.to_dict().items())
185
183
  idf = df.map_partitions(
186
184
  _add_rand, meta=orig_schema + [(_FUGUE_DASK_TEMP_IDX_COLUMN, int)]
187
- ).set_index(_FUGUE_DASK_TEMP_IDX_COLUMN, drop=True)
188
- return idf, num
185
+ )
186
+ return _safe_set_index(idf, num, num)
189
187
 
190
188
 
191
- def _add_continuous_index(df: dd.DataFrame) -> Tuple[dd.DataFrame, int]:
189
+ def _add_continuous_index(df: dd.DataFrame, num: int) -> dd.DataFrame:
192
190
  def _get_info(
193
191
  df: pd.DataFrame, partition_info: Any
194
192
  ) -> pd.DataFrame: # pragma: no cover
@@ -216,8 +214,7 @@ def _add_continuous_index(df: dd.DataFrame) -> Tuple[dd.DataFrame, int]:
216
214
  idf = df.map_partitions(
217
215
  _add_index, meta=orig_schema + [(_FUGUE_DASK_TEMP_IDX_COLUMN, int)]
218
216
  )
219
- idf = idf.set_index(_FUGUE_DASK_TEMP_IDX_COLUMN, drop=True)
220
- return idf, counts[-1]
217
+ return _safe_set_index(idf, counts[-1], num)
221
218
 
222
219
 
223
220
  class DaskUtils(PandasLikeUtils[dd.DataFrame, dd.Series]):
@@ -255,7 +252,7 @@ class DaskUtils(PandasLikeUtils[dd.DataFrame, dd.Series]):
255
252
  schema: pa.Schema,
256
253
  use_extension_types: bool = True,
257
254
  use_arrow_dtype: bool = False,
258
- **kwargs: Any
255
+ **kwargs: Any,
259
256
  ) -> DataFrame:
260
257
  output_dtypes = to_pandas_dtype(
261
258
  schema,
@@ -268,7 +265,7 @@ class DaskUtils(PandasLikeUtils[dd.DataFrame, dd.Series]):
268
265
  use_extension_types=use_extension_types,
269
266
  use_arrow_dtype=use_arrow_dtype,
270
267
  meta=output_dtypes,
271
- **kwargs
268
+ **kwargs,
272
269
  )
273
270
 
274
271
 
@@ -64,7 +64,9 @@ class DuckDBEngine(SQLEngine):
64
64
  if isinstance(self.execution_engine, DuckExecutionEngine):
65
65
  con = self.execution_engine.connection
66
66
  tdf: DuckDataFrame = _to_duck_df(
67
- self.execution_engine, df, create_view=False # type: ignore
67
+ self.execution_engine,
68
+ df,
69
+ create_view=False, # type: ignore
68
70
  )
69
71
  et = self._get_table(table)
70
72
  if et is not None:
@@ -96,7 +98,9 @@ class DuckDBEngine(SQLEngine):
96
98
  name_map: Dict[str, str] = {}
97
99
  for k, v in dfs.items():
98
100
  tdf: DuckDataFrame = _to_duck_df(
99
- self.execution_engine, v, create_view=True # type: ignore
101
+ self.execution_engine,
102
+ v,
103
+ create_view=True, # type: ignore
100
104
  )
101
105
  name_map[k] = tdf.alias
102
106
  query = statement.construct(name_map, dialect=self.dialect, log=self.log)
@@ -415,7 +419,7 @@ class DuckExecutionEngine(ExecutionEngine):
415
419
  )
416
420
  tb = TempTableName()
417
421
  if frac is not None:
418
- sql = f"SELECT * FROM {tb} USING SAMPLE bernoulli({frac*100} PERCENT)"
422
+ sql = f"SELECT * FROM {tb} USING SAMPLE bernoulli({frac * 100} PERCENT)"
419
423
  else:
420
424
  sql = f"SELECT * FROM {tb} USING SAMPLE reservoir({n} ROWS)"
421
425
  if seed is not None:
fugue_ibis/_utils.py CHANGED
@@ -149,7 +149,7 @@ class LazyIbisAttr(LazyIbisObject):
149
149
  self._super_lazy_internal_objs["parent"],
150
150
  self._super_lazy_internal_objs["name"],
151
151
  *args,
152
- **kwargs
152
+ **kwargs,
153
153
  )
154
154
 
155
155
 
@@ -196,7 +196,7 @@ def _materialize(obj: Any, context: Dict[int, Any]) -> Any:
196
196
  )
197
197
  v = f(
198
198
  *_materialize(obj._super_lazy_internal_objs["args"], context),
199
- **_materialize(obj._super_lazy_internal_objs["kwargs"], context)
199
+ **_materialize(obj._super_lazy_internal_objs["kwargs"], context),
200
200
  )
201
201
  elif isinstance(obj, LazyIbisAttr):
202
202
  v = getattr(
@@ -1,7 +1,8 @@
1
1
  # flake8: noqa
2
2
  from typing import Any
3
+ from importlib.metadata import version
3
4
 
4
- from fugue_version import __version__
5
+ __version__ = version("fugue")
5
6
  from IPython import get_ipython
6
7
  from IPython.display import Javascript, display
7
8
 
fugue_ray/_utils/io.py CHANGED
@@ -147,7 +147,8 @@ class RayIO(object):
147
147
  f"prepartitioning by keys {by} is not supported by ray, will ignore"
148
148
  )
149
149
  return self._engine.repartition(
150
- rdf, partition_spec=partition_spec # type: ignore
150
+ rdf,
151
+ partition_spec=partition_spec, # type: ignore
151
152
  )
152
153
 
153
154
  def _load_parquet(
fugue_spark/__init__.py CHANGED
@@ -1,5 +1,7 @@
1
1
  # flake8: noqa
2
- from fugue_version import __version__
2
+ from importlib.metadata import version
3
+
4
+ __version__ = version("fugue")
3
5
 
4
6
  from fugue_spark.dataframe import SparkDataFrame
5
7
  from fugue_spark.execution_engine import SparkExecutionEngine
@@ -81,8 +81,9 @@ def to_cast_expression(
81
81
  name_match or allow_name_mismatch,
82
82
  lambda: ValueError(f"schema name mismatch: {schema1}, {schema2}"),
83
83
  )
84
- n1, n2 = quote_name(schema1[i].name, quote="`"), quote_name(
85
- schema2[i].name, quote="`"
84
+ n1, n2 = (
85
+ quote_name(schema1[i].name, quote="`"),
86
+ quote_name(schema2[i].name, quote="`"),
86
87
  )
87
88
  if schema1[i].dataType != schema2[i].dataType:
88
89
  type2 = schema2[i].dataType.simpleString()
@@ -90,9 +91,7 @@ def to_cast_expression(
90
91
  schema2[i].dataType, (pt.StringType, pt.IntegralType)
91
92
  ):
92
93
  expr.append(
93
- f"CAST(IF(isnan({n1}) OR {n1} IS NULL"
94
- f", NULL, {n1})"
95
- f" AS {type2}) {n2}"
94
+ f"CAST(IF(isnan({n1}) OR {n1} IS NULL, NULL, {n1}) AS {type2}) {n2}"
96
95
  )
97
96
  else:
98
97
  expr.append(f"CAST({n1} AS {type2}) {n2}")
@@ -1,8 +1,9 @@
1
1
  import pyspark.sql as ps
2
+ import pyspark
2
3
  from typing import Any
3
- from importlib.metadata import version
4
4
 
5
- SPARK_VERSION = version("pyspark").split(".")
5
+
6
+ SPARK_VERSION = pyspark.__version__.split(".")
6
7
 
7
8
  try:
8
9
  if int(SPARK_VERSION[0]) >= 4:
fugue_spark/dataframe.py CHANGED
@@ -178,9 +178,7 @@ class SparkDataFrame(DataFrame):
178
178
  self, n: int, columns: Optional[List[str]] = None
179
179
  ) -> LocalBoundedDataFrame:
180
180
  sdf = self._select_columns(columns)
181
- return SparkDataFrame(
182
- sdf.native.limit(n), sdf.schema
183
- ).as_local() # type: ignore
181
+ return SparkDataFrame(sdf.native.limit(n), sdf.schema).as_local() # type: ignore
184
182
 
185
183
  @property
186
184
  def _first(self) -> Optional[List[Any]]:
fugue_sql/__init__.py CHANGED
@@ -1,6 +1,8 @@
1
1
  # flake8: noqa
2
2
  # TODO: This folder is to be deprecated
3
- from fugue_version import __version__
3
+ from importlib.metadata import version
4
+
5
+ __version__ = version("fugue")
4
6
 
5
7
  import warnings
6
8
  from fugue import FugueSQLWorkflow, fsql
@@ -53,9 +53,8 @@ from fugue import (
53
53
  register_transformer,
54
54
  transformer,
55
55
  )
56
- from fugue.column import col
56
+ from fugue.column import col, lit
57
57
  from fugue.column import functions as ff
58
- from fugue.column import lit
59
58
  from fugue.exceptions import (
60
59
  FugueInterfacelessError,
61
60
  FugueWorkflowCompileError,
@@ -1,11 +0,0 @@
1
- [fugue.plugins]
2
- dask = fugue_dask.registry[dask]
3
- duckdb = fugue_duckdb.registry[duckdb]
4
- ibis = fugue_ibis[ibis]
5
- polars = fugue_polars.registry[polars]
6
- ray = fugue_ray.registry[ray]
7
- spark = fugue_spark.registry[spark]
8
-
9
- [pytest11]
10
- fugue_test = fugue_test
11
- fugue_test_fixtures = fugue_test.fixtures
fugue_version/__init__.py DELETED
@@ -1 +0,0 @@
1
- __version__ = "0.9.5"