fugue 0.9.5__tar.gz → 0.9.7.dev0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {fugue-0.9.5 → fugue-0.9.7.dev0}/PKG-INFO +13 -26
- {fugue-0.9.5 → fugue-0.9.7.dev0}/README.md +1 -1
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue/__init__.py +4 -1
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue/dataframe/dataframe.py +1 -3
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue/dataframe/function_wrapper.py +63 -19
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue/dataframe/utils.py +3 -3
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue/extensions/_builtins/outputters.py +1 -1
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue/extensions/_builtins/processors.py +1 -1
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue/extensions/processor/convert.py +1 -0
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue/extensions/transformer/convert.py +4 -1
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue/sql/_utils.py +0 -1
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue/sql/_visitors.py +0 -1
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue/workflow/_tasks.py +3 -1
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue/workflow/workflow.py +3 -3
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue.egg-info/PKG-INFO +13 -26
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue.egg-info/SOURCES.txt +2 -4
- fugue-0.9.7.dev0/fugue.egg-info/entry_points.txt +11 -0
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue.egg-info/requires.txt +7 -3
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue.egg-info/top_level.txt +0 -1
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue_dask/__init__.py +3 -1
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue_dask/_utils.py +31 -34
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue_duckdb/execution_engine.py +7 -3
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue_ibis/_utils.py +2 -2
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue_notebook/__init__.py +2 -1
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue_ray/_utils/io.py +2 -1
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue_spark/__init__.py +3 -1
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue_spark/_utils/convert.py +4 -5
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue_spark/_utils/misc.py +3 -2
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue_spark/dataframe.py +1 -3
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue_sql/__init__.py +3 -1
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue_test/builtin_suite.py +1 -2
- fugue-0.9.7.dev0/pyproject.toml +224 -0
- fugue-0.9.7.dev0/setup.cfg +4 -0
- fugue-0.9.5/fugue.egg-info/entry_points.txt +0 -11
- fugue-0.9.5/fugue_version/__init__.py +0 -1
- fugue-0.9.5/setup.cfg +0 -58
- fugue-0.9.5/setup.py +0 -114
- {fugue-0.9.5 → fugue-0.9.7.dev0}/LICENSE +0 -0
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue/_utils/__init__.py +0 -0
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue/_utils/display.py +0 -0
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue/_utils/exception.py +0 -0
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue/_utils/interfaceless.py +0 -0
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue/_utils/io.py +0 -0
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue/_utils/misc.py +0 -0
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue/_utils/registry.py +0 -0
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue/api.py +0 -0
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue/bag/__init__.py +0 -0
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue/bag/array_bag.py +0 -0
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue/bag/bag.py +0 -0
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue/collections/__init__.py +0 -0
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue/collections/partition.py +0 -0
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue/collections/sql.py +0 -0
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue/collections/yielded.py +0 -0
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue/column/__init__.py +0 -0
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue/column/expressions.py +0 -0
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue/column/functions.py +0 -0
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue/column/sql.py +0 -0
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue/constants.py +0 -0
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue/dataframe/__init__.py +0 -0
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue/dataframe/api.py +0 -0
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue/dataframe/array_dataframe.py +0 -0
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue/dataframe/arrow_dataframe.py +0 -0
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue/dataframe/dataframe_iterable_dataframe.py +0 -0
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue/dataframe/dataframes.py +0 -0
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue/dataframe/iterable_dataframe.py +0 -0
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue/dataframe/pandas_dataframe.py +0 -0
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue/dataset/__init__.py +0 -0
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue/dataset/api.py +0 -0
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue/dataset/dataset.py +0 -0
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue/dev.py +0 -0
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue/exceptions.py +0 -0
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue/execution/__init__.py +0 -0
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue/execution/api.py +0 -0
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue/execution/execution_engine.py +0 -0
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue/execution/factory.py +0 -0
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue/execution/native_execution_engine.py +0 -0
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue/extensions/__init__.py +0 -0
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue/extensions/_builtins/__init__.py +0 -0
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue/extensions/_builtins/creators.py +0 -0
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue/extensions/_utils.py +0 -0
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue/extensions/context.py +0 -0
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue/extensions/creator/__init__.py +0 -0
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue/extensions/creator/convert.py +0 -0
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue/extensions/creator/creator.py +0 -0
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue/extensions/outputter/__init__.py +0 -0
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue/extensions/outputter/convert.py +0 -0
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue/extensions/outputter/outputter.py +0 -0
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue/extensions/processor/__init__.py +0 -0
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue/extensions/processor/processor.py +0 -0
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue/extensions/transformer/__init__.py +0 -0
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue/extensions/transformer/constants.py +0 -0
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue/extensions/transformer/transformer.py +0 -0
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue/plugins.py +0 -0
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue/py.typed +0 -0
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue/registry.py +0 -0
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue/rpc/__init__.py +0 -0
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue/rpc/base.py +0 -0
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue/rpc/flask.py +0 -0
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue/sql/__init__.py +0 -0
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue/sql/api.py +0 -0
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue/sql/workflow.py +0 -0
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue/test/__init__.py +0 -0
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue/test/pandas_tester.py +0 -0
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue/test/plugins.py +0 -0
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue/workflow/__init__.py +0 -0
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue/workflow/_checkpoint.py +0 -0
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue/workflow/_workflow_context.py +0 -0
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue/workflow/api.py +0 -0
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue/workflow/input.py +0 -0
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue/workflow/module.py +0 -0
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue.egg-info/dependency_links.txt +0 -0
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue_contrib/__init__.py +0 -0
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue_contrib/contrib.py +0 -0
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue_contrib/seaborn/__init__.py +0 -0
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue_contrib/viz/__init__.py +0 -0
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue_contrib/viz/_ext.py +0 -0
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue_dask/_constants.py +0 -0
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue_dask/_dask_sql_wrapper.py +0 -0
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue_dask/_io.py +0 -0
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue_dask/dataframe.py +0 -0
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue_dask/execution_engine.py +0 -0
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue_dask/registry.py +0 -0
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue_dask/tester.py +0 -0
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue_duckdb/__init__.py +0 -0
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue_duckdb/_io.py +0 -0
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue_duckdb/_utils.py +0 -0
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue_duckdb/dask.py +0 -0
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue_duckdb/dataframe.py +0 -0
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue_duckdb/registry.py +0 -0
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue_duckdb/tester.py +0 -0
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue_ibis/__init__.py +0 -0
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue_ibis/_compat.py +0 -0
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue_ibis/dataframe.py +0 -0
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue_ibis/execution_engine.py +0 -0
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue_notebook/env.py +0 -0
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue_notebook/nbextension/README.md +0 -0
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue_notebook/nbextension/__init__.py +0 -0
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue_notebook/nbextension/description.yaml +0 -0
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue_notebook/nbextension/main.js +0 -0
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue_polars/__init__.py +0 -0
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue_polars/_utils.py +0 -0
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue_polars/polars_dataframe.py +0 -0
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue_polars/registry.py +0 -0
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue_ray/__init__.py +0 -0
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue_ray/_constants.py +0 -0
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue_ray/_utils/__init__.py +0 -0
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue_ray/_utils/cluster.py +0 -0
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue_ray/_utils/dataframe.py +0 -0
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue_ray/dataframe.py +0 -0
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue_ray/execution_engine.py +0 -0
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue_ray/registry.py +0 -0
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue_ray/tester.py +0 -0
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue_spark/_constants.py +0 -0
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue_spark/_utils/__init__.py +0 -0
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue_spark/_utils/io.py +0 -0
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue_spark/_utils/partition.py +0 -0
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue_spark/execution_engine.py +0 -0
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue_spark/registry.py +0 -0
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue_spark/tester.py +0 -0
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue_sql/exceptions.py +0 -0
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue_test/__init__.py +0 -0
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue_test/bag_suite.py +0 -0
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue_test/dataframe_suite.py +0 -0
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue_test/execution_suite.py +0 -0
- {fugue-0.9.5 → fugue-0.9.7.dev0}/fugue_test/fixtures.py +0 -0
|
@@ -1,27 +1,26 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: fugue
|
|
3
|
-
Version: 0.9.
|
|
3
|
+
Version: 0.9.7.dev0
|
|
4
4
|
Summary: An abstraction layer for distributed computation
|
|
5
|
-
|
|
6
|
-
Author: The Fugue Development Team
|
|
7
|
-
Author-email: hello@fugue.ai
|
|
5
|
+
Author-email: The Fugue Development Team <hello@fugue.ai>
|
|
8
6
|
License: Apache-2.0
|
|
9
|
-
|
|
7
|
+
Project-URL: Homepage, http://github.com/fugue-project/fugue
|
|
8
|
+
Project-URL: Repository, http://github.com/fugue-project/fugue
|
|
9
|
+
Keywords: distributed,spark,dask,ray,sql,dsl,domain specific language
|
|
10
10
|
Classifier: Development Status :: 5 - Production/Stable
|
|
11
11
|
Classifier: Intended Audience :: Developers
|
|
12
12
|
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
13
|
-
Classifier: License :: OSI Approved :: Apache Software License
|
|
14
13
|
Classifier: Programming Language :: Python :: 3
|
|
15
|
-
Classifier: Programming Language :: Python :: 3.8
|
|
16
|
-
Classifier: Programming Language :: Python :: 3.9
|
|
17
14
|
Classifier: Programming Language :: Python :: 3.10
|
|
18
15
|
Classifier: Programming Language :: Python :: 3.11
|
|
19
16
|
Classifier: Programming Language :: Python :: 3.12
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
20
19
|
Classifier: Programming Language :: Python :: 3 :: Only
|
|
21
|
-
Requires-Python: >=3.
|
|
20
|
+
Requires-Python: >=3.10
|
|
22
21
|
Description-Content-Type: text/markdown
|
|
23
22
|
License-File: LICENSE
|
|
24
|
-
Requires-Dist: triad>=1.0.
|
|
23
|
+
Requires-Dist: triad>=1.0.1
|
|
25
24
|
Requires-Dist: adagio>=0.2.6
|
|
26
25
|
Requires-Dist: pandas<3
|
|
27
26
|
Provides-Extra: sql
|
|
@@ -39,7 +38,7 @@ Requires-Dist: dask[dataframe,distributed]>=2024.4.0; extra == "dask"
|
|
|
39
38
|
Requires-Dist: pyarrow>=7.0.0; extra == "dask"
|
|
40
39
|
Requires-Dist: pandas>=2.0.2; extra == "dask"
|
|
41
40
|
Provides-Extra: ray
|
|
42
|
-
Requires-Dist: ray[data]>=2.30.0; extra == "ray"
|
|
41
|
+
Requires-Dist: ray[data]>=2.30.0; python_version < "3.14" and extra == "ray"
|
|
43
42
|
Requires-Dist: duckdb>=0.5.0; extra == "ray"
|
|
44
43
|
Requires-Dist: pyarrow>=7.0.0; extra == "ray"
|
|
45
44
|
Requires-Dist: pandas; extra == "ray"
|
|
@@ -70,7 +69,7 @@ Requires-Dist: jinja2; extra == "all"
|
|
|
70
69
|
Requires-Dist: pyspark>=3.1.1; extra == "all"
|
|
71
70
|
Requires-Dist: dask[dataframe,distributed]>=2024.4.0; extra == "all"
|
|
72
71
|
Requires-Dist: dask-sql; extra == "all"
|
|
73
|
-
Requires-Dist: ray[data]>=2.30.0; extra == "all"
|
|
72
|
+
Requires-Dist: ray[data]>=2.30.0; python_version < "3.14" and extra == "all"
|
|
74
73
|
Requires-Dist: notebook; extra == "all"
|
|
75
74
|
Requires-Dist: jupyterlab; extra == "all"
|
|
76
75
|
Requires-Dist: ipython>=7.10.0; extra == "all"
|
|
@@ -79,26 +78,14 @@ Requires-Dist: pyarrow>=6.0.1; extra == "all"
|
|
|
79
78
|
Requires-Dist: pandas>=2.0.2; extra == "all"
|
|
80
79
|
Requires-Dist: ibis-framework[duckdb,pandas]; extra == "all"
|
|
81
80
|
Requires-Dist: polars; extra == "all"
|
|
82
|
-
Dynamic: author
|
|
83
|
-
Dynamic: author-email
|
|
84
|
-
Dynamic: classifier
|
|
85
|
-
Dynamic: description
|
|
86
|
-
Dynamic: description-content-type
|
|
87
|
-
Dynamic: home-page
|
|
88
|
-
Dynamic: keywords
|
|
89
|
-
Dynamic: license
|
|
90
81
|
Dynamic: license-file
|
|
91
|
-
Dynamic: provides-extra
|
|
92
|
-
Dynamic: requires-dist
|
|
93
|
-
Dynamic: requires-python
|
|
94
|
-
Dynamic: summary
|
|
95
82
|
|
|
96
|
-
#
|
|
83
|
+
# <img src="./images/logo.svg" width="200">
|
|
97
84
|
|
|
98
85
|
[](https://pypi.python.org/pypi/fugue/)
|
|
99
86
|
[](https://pypi.python.org/pypi/fugue/)
|
|
100
87
|
[](https://pypi.python.org/pypi/fugue/)
|
|
101
|
-
[](https://codecov.io/gh/fugue-project/fugue)
|
|
102
89
|
[](https://www.codacy.com/gh/fugue-project/fugue/dashboard?utm_source=github.com&utm_medium=referral&utm_content=fugue-project/fugue&utm_campaign=Badge_Grade)
|
|
103
90
|
[](https://pepy.tech/project/fugue)
|
|
104
91
|
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
[](https://pypi.python.org/pypi/fugue/)
|
|
4
4
|
[](https://pypi.python.org/pypi/fugue/)
|
|
5
5
|
[](https://pypi.python.org/pypi/fugue/)
|
|
6
|
-
[](https://codecov.io/gh/fugue-project/fugue)
|
|
7
7
|
[](https://www.codacy.com/gh/fugue-project/fugue/dashboard?utm_source=github.com&utm_medium=referral&utm_content=fugue-project/fugue&utm_campaign=Badge_Grade)
|
|
8
8
|
[](https://pepy.tech/project/fugue)
|
|
9
9
|
|
|
@@ -1,4 +1,8 @@
|
|
|
1
1
|
# flake8: noqa
|
|
2
|
+
from importlib.metadata import version
|
|
3
|
+
|
|
4
|
+
__version__ = version("fugue")
|
|
5
|
+
|
|
2
6
|
from triad.collections import Schema
|
|
3
7
|
|
|
4
8
|
from fugue.api import out_transform, transform
|
|
@@ -83,7 +87,6 @@ from fugue.sql.workflow import FugueSQLWorkflow
|
|
|
83
87
|
from fugue.workflow._workflow_context import FugueWorkflowContext
|
|
84
88
|
from fugue.workflow.module import module
|
|
85
89
|
from fugue.workflow.workflow import FugueWorkflow, WorkflowDataFrame, WorkflowDataFrames
|
|
86
|
-
from fugue_version import __version__
|
|
87
90
|
|
|
88
91
|
from .dev import *
|
|
89
92
|
|
|
@@ -59,9 +59,7 @@ class DataFrame(Dataset):
|
|
|
59
59
|
assert isinstance(self._schema, Schema)
|
|
60
60
|
return self._schema # type: ignore
|
|
61
61
|
with self._lazy_schema_lock:
|
|
62
|
-
self._schema = _input_schema(
|
|
63
|
-
self._schema()
|
|
64
|
-
).assert_not_empty() # type: ignore
|
|
62
|
+
self._schema = _input_schema(self._schema()).assert_not_empty() # type: ignore
|
|
65
63
|
self._schema.set_readonly()
|
|
66
64
|
self._schema_discovered = True
|
|
67
65
|
return self._schema
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import inspect
|
|
2
|
+
from collections.abc import Callable as AbcCallable
|
|
2
3
|
from typing import (
|
|
3
4
|
Any,
|
|
4
5
|
Callable,
|
|
@@ -7,6 +8,9 @@ from typing import (
|
|
|
7
8
|
Iterator,
|
|
8
9
|
List,
|
|
9
10
|
Optional,
|
|
11
|
+
Union,
|
|
12
|
+
get_args,
|
|
13
|
+
get_origin,
|
|
10
14
|
no_type_check,
|
|
11
15
|
)
|
|
12
16
|
|
|
@@ -39,13 +43,66 @@ from .pandas_dataframe import PandasDataFrame
|
|
|
39
43
|
|
|
40
44
|
|
|
41
45
|
def _compare_iter(tp: Any) -> Any:
|
|
42
|
-
return lambda x:
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
+
return lambda x: (
|
|
47
|
+
compare_annotations(
|
|
48
|
+
x,
|
|
49
|
+
Iterable[tp], # type:ignore
|
|
50
|
+
)
|
|
51
|
+
or compare_annotations(
|
|
52
|
+
x,
|
|
53
|
+
Iterator[tp], # type:ignore
|
|
54
|
+
)
|
|
46
55
|
)
|
|
47
56
|
|
|
48
57
|
|
|
58
|
+
def _is_optional(annotation) -> bool:
|
|
59
|
+
origin = get_origin(annotation)
|
|
60
|
+
|
|
61
|
+
# Check if it's a Union type
|
|
62
|
+
if origin is Union:
|
|
63
|
+
args = get_args(annotation)
|
|
64
|
+
# Optional[T] is Union[T, None]
|
|
65
|
+
return type(None) in args
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def _is_required_callable(annotation) -> bool:
|
|
69
|
+
"""Check if annotation is a required (non-optional) Callable type."""
|
|
70
|
+
if _is_optional(annotation):
|
|
71
|
+
return False
|
|
72
|
+
|
|
73
|
+
# Check direct equality
|
|
74
|
+
if annotation == Callable or annotation == callable: # pylint: disable=comparison-with-callable
|
|
75
|
+
return True
|
|
76
|
+
|
|
77
|
+
# Check if it's a generic Callable like Callable[[int], str]
|
|
78
|
+
origin = get_origin(annotation)
|
|
79
|
+
return origin is AbcCallable or origin is type(Callable)
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def _is_optional_callable(annotation) -> bool:
|
|
83
|
+
"""Check if annotation is an optional Callable type (Optional[Callable] or Callable | None)."""
|
|
84
|
+
if not _is_optional(annotation):
|
|
85
|
+
return False
|
|
86
|
+
|
|
87
|
+
# Get the non-None types from the Union
|
|
88
|
+
args = get_args(annotation)
|
|
89
|
+
non_none_types = [arg for arg in args if arg is not type(None)]
|
|
90
|
+
|
|
91
|
+
# Should have exactly one non-None type, and it should be Callable
|
|
92
|
+
if len(non_none_types) != 1: # pragma: no cover
|
|
93
|
+
return False
|
|
94
|
+
|
|
95
|
+
inner_type = non_none_types[0]
|
|
96
|
+
|
|
97
|
+
# Check if the inner type is Callable
|
|
98
|
+
if inner_type == Callable or inner_type == callable: # pylint: disable=comparison-with-callable
|
|
99
|
+
return True
|
|
100
|
+
|
|
101
|
+
# Check if it's a generic Callable like Callable[[int], str]
|
|
102
|
+
origin = get_origin(inner_type)
|
|
103
|
+
return origin is AbcCallable or origin is type(Callable)
|
|
104
|
+
|
|
105
|
+
|
|
49
106
|
@function_wrapper(FUGUE_ENTRYPOINT)
|
|
50
107
|
class DataFrameFunctionWrapper(FunctionWrapper):
|
|
51
108
|
@property
|
|
@@ -154,12 +211,7 @@ fugue_annotated_param = DataFrameFunctionWrapper.annotated_param
|
|
|
154
211
|
@fugue_annotated_param(
|
|
155
212
|
"Callable",
|
|
156
213
|
"F",
|
|
157
|
-
|
|
158
|
-
annotation == Callable
|
|
159
|
-
or annotation == callable # pylint: disable=comparison-with-callable
|
|
160
|
-
or str(annotation).startswith("typing.Callable")
|
|
161
|
-
or str(annotation).startswith("collections.abc.Callable")
|
|
162
|
-
),
|
|
214
|
+
_is_required_callable,
|
|
163
215
|
)
|
|
164
216
|
class _CallableParam(AnnotatedParam):
|
|
165
217
|
pass
|
|
@@ -168,15 +220,7 @@ class _CallableParam(AnnotatedParam):
|
|
|
168
220
|
@fugue_annotated_param(
|
|
169
221
|
"Callable",
|
|
170
222
|
"f",
|
|
171
|
-
|
|
172
|
-
annotation == Optional[Callable]
|
|
173
|
-
or annotation == Optional[callable]
|
|
174
|
-
or str(annotation).startswith("typing.Union[typing.Callable") # 3.8-
|
|
175
|
-
or str(annotation).startswith("typing.Optional[typing.Callable") # 3.9+
|
|
176
|
-
or str(annotation).startswith(
|
|
177
|
-
"typing.Optional[collections.abc.Callable]"
|
|
178
|
-
) # 3.9+
|
|
179
|
-
),
|
|
223
|
+
_is_optional_callable,
|
|
180
224
|
)
|
|
181
225
|
class _OptionalCallableParam(AnnotatedParam):
|
|
182
226
|
pass
|
|
@@ -61,9 +61,9 @@ def _df_eq(
|
|
|
61
61
|
else:
|
|
62
62
|
df2 = as_fugue_df(data).as_local_bounded()
|
|
63
63
|
try:
|
|
64
|
-
assert (
|
|
65
|
-
df1.count()
|
|
66
|
-
)
|
|
64
|
+
assert df1.count() == df2.count(), (
|
|
65
|
+
f"count mismatch {df1.count()}, {df2.count()}"
|
|
66
|
+
)
|
|
67
67
|
assert not check_schema or df.schema.is_like(
|
|
68
68
|
df2.schema, equal_groups=equal_type_groups
|
|
69
69
|
), f"schema mismatch {df.schema.pa_schema}, {df2.schema.pa_schema}"
|
|
@@ -136,6 +136,7 @@ def processor(
|
|
|
136
136
|
Please read
|
|
137
137
|
:doc:`Processor Tutorial <tutorial:tutorials/extensions/processor>`
|
|
138
138
|
"""
|
|
139
|
+
|
|
139
140
|
# TODO: validation of schema if without * should be done at compile time
|
|
140
141
|
def deco(func: Callable) -> "_FuncAsProcessor":
|
|
141
142
|
return _FuncAsProcessor.from_func(
|
|
@@ -546,7 +546,10 @@ class _FuncAsOutputCoTransformer(_FuncAsCoTransformer):
|
|
|
546
546
|
p = dict(dfs)
|
|
547
547
|
p.update(self.params)
|
|
548
548
|
self._wrapper.run(
|
|
549
|
-
[] + cb,
|
|
549
|
+
[] + cb,
|
|
550
|
+
p,
|
|
551
|
+
ignore_unknown=False,
|
|
552
|
+
output=False, # type: ignore
|
|
550
553
|
)
|
|
551
554
|
return ArrayDataFrame([], OUTPUT_TRANSFORMER_DUMMY_SCHEMA)
|
|
552
555
|
|
|
@@ -204,7 +204,9 @@ class FugueTask(TaskSpec, ABC):
|
|
|
204
204
|
|
|
205
205
|
# add caller traceback
|
|
206
206
|
ctb = modify_traceback(
|
|
207
|
-
sys.exc_info()[2].tb_next,
|
|
207
|
+
sys.exc_info()[2].tb_next,
|
|
208
|
+
None,
|
|
209
|
+
self._traceback, # type: ignore
|
|
208
210
|
)
|
|
209
211
|
if ctb is None: # pragma: no cover
|
|
210
212
|
raise
|
|
@@ -1468,7 +1468,8 @@ class WorkflowDataFrames(DataFrames):
|
|
|
1468
1468
|
super().__setitem__(key, value, *args, **kwds)
|
|
1469
1469
|
|
|
1470
1470
|
def __getitem__( # pylint: disable=W0235
|
|
1471
|
-
self,
|
|
1471
|
+
self,
|
|
1472
|
+
key: Union[str, int], # type: ignore
|
|
1472
1473
|
) -> WorkflowDataFrame:
|
|
1473
1474
|
return super().__getitem__(key) # type: ignore
|
|
1474
1475
|
|
|
@@ -1791,8 +1792,7 @@ class FugueWorkflow:
|
|
|
1791
1792
|
)
|
|
1792
1793
|
)
|
|
1793
1794
|
raise FugueWorkflowCompileError(
|
|
1794
|
-
f"Input data of type {type(data)} can't "
|
|
1795
|
-
"be converted to WorkflowDataFrame"
|
|
1795
|
+
f"Input data of type {type(data)} can't be converted to WorkflowDataFrame"
|
|
1796
1796
|
)
|
|
1797
1797
|
|
|
1798
1798
|
def df(
|
|
@@ -1,27 +1,26 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: fugue
|
|
3
|
-
Version: 0.9.
|
|
3
|
+
Version: 0.9.7.dev0
|
|
4
4
|
Summary: An abstraction layer for distributed computation
|
|
5
|
-
|
|
6
|
-
Author: The Fugue Development Team
|
|
7
|
-
Author-email: hello@fugue.ai
|
|
5
|
+
Author-email: The Fugue Development Team <hello@fugue.ai>
|
|
8
6
|
License: Apache-2.0
|
|
9
|
-
|
|
7
|
+
Project-URL: Homepage, http://github.com/fugue-project/fugue
|
|
8
|
+
Project-URL: Repository, http://github.com/fugue-project/fugue
|
|
9
|
+
Keywords: distributed,spark,dask,ray,sql,dsl,domain specific language
|
|
10
10
|
Classifier: Development Status :: 5 - Production/Stable
|
|
11
11
|
Classifier: Intended Audience :: Developers
|
|
12
12
|
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
13
|
-
Classifier: License :: OSI Approved :: Apache Software License
|
|
14
13
|
Classifier: Programming Language :: Python :: 3
|
|
15
|
-
Classifier: Programming Language :: Python :: 3.8
|
|
16
|
-
Classifier: Programming Language :: Python :: 3.9
|
|
17
14
|
Classifier: Programming Language :: Python :: 3.10
|
|
18
15
|
Classifier: Programming Language :: Python :: 3.11
|
|
19
16
|
Classifier: Programming Language :: Python :: 3.12
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
20
19
|
Classifier: Programming Language :: Python :: 3 :: Only
|
|
21
|
-
Requires-Python: >=3.
|
|
20
|
+
Requires-Python: >=3.10
|
|
22
21
|
Description-Content-Type: text/markdown
|
|
23
22
|
License-File: LICENSE
|
|
24
|
-
Requires-Dist: triad>=1.0.
|
|
23
|
+
Requires-Dist: triad>=1.0.1
|
|
25
24
|
Requires-Dist: adagio>=0.2.6
|
|
26
25
|
Requires-Dist: pandas<3
|
|
27
26
|
Provides-Extra: sql
|
|
@@ -39,7 +38,7 @@ Requires-Dist: dask[dataframe,distributed]>=2024.4.0; extra == "dask"
|
|
|
39
38
|
Requires-Dist: pyarrow>=7.0.0; extra == "dask"
|
|
40
39
|
Requires-Dist: pandas>=2.0.2; extra == "dask"
|
|
41
40
|
Provides-Extra: ray
|
|
42
|
-
Requires-Dist: ray[data]>=2.30.0; extra == "ray"
|
|
41
|
+
Requires-Dist: ray[data]>=2.30.0; python_version < "3.14" and extra == "ray"
|
|
43
42
|
Requires-Dist: duckdb>=0.5.0; extra == "ray"
|
|
44
43
|
Requires-Dist: pyarrow>=7.0.0; extra == "ray"
|
|
45
44
|
Requires-Dist: pandas; extra == "ray"
|
|
@@ -70,7 +69,7 @@ Requires-Dist: jinja2; extra == "all"
|
|
|
70
69
|
Requires-Dist: pyspark>=3.1.1; extra == "all"
|
|
71
70
|
Requires-Dist: dask[dataframe,distributed]>=2024.4.0; extra == "all"
|
|
72
71
|
Requires-Dist: dask-sql; extra == "all"
|
|
73
|
-
Requires-Dist: ray[data]>=2.30.0; extra == "all"
|
|
72
|
+
Requires-Dist: ray[data]>=2.30.0; python_version < "3.14" and extra == "all"
|
|
74
73
|
Requires-Dist: notebook; extra == "all"
|
|
75
74
|
Requires-Dist: jupyterlab; extra == "all"
|
|
76
75
|
Requires-Dist: ipython>=7.10.0; extra == "all"
|
|
@@ -79,26 +78,14 @@ Requires-Dist: pyarrow>=6.0.1; extra == "all"
|
|
|
79
78
|
Requires-Dist: pandas>=2.0.2; extra == "all"
|
|
80
79
|
Requires-Dist: ibis-framework[duckdb,pandas]; extra == "all"
|
|
81
80
|
Requires-Dist: polars; extra == "all"
|
|
82
|
-
Dynamic: author
|
|
83
|
-
Dynamic: author-email
|
|
84
|
-
Dynamic: classifier
|
|
85
|
-
Dynamic: description
|
|
86
|
-
Dynamic: description-content-type
|
|
87
|
-
Dynamic: home-page
|
|
88
|
-
Dynamic: keywords
|
|
89
|
-
Dynamic: license
|
|
90
81
|
Dynamic: license-file
|
|
91
|
-
Dynamic: provides-extra
|
|
92
|
-
Dynamic: requires-dist
|
|
93
|
-
Dynamic: requires-python
|
|
94
|
-
Dynamic: summary
|
|
95
82
|
|
|
96
|
-
#
|
|
83
|
+
# <img src="./images/logo.svg" width="200">
|
|
97
84
|
|
|
98
85
|
[](https://pypi.python.org/pypi/fugue/)
|
|
99
86
|
[](https://pypi.python.org/pypi/fugue/)
|
|
100
87
|
[](https://pypi.python.org/pypi/fugue/)
|
|
101
|
-
[](https://codecov.io/gh/fugue-project/fugue)
|
|
102
89
|
[](https://www.codacy.com/gh/fugue-project/fugue/dashboard?utm_source=github.com&utm_medium=referral&utm_content=fugue-project/fugue&utm_campaign=Badge_Grade)
|
|
103
90
|
[](https://pepy.tech/project/fugue)
|
|
104
91
|
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
LICENSE
|
|
2
2
|
README.md
|
|
3
|
-
|
|
4
|
-
setup.py
|
|
3
|
+
pyproject.toml
|
|
5
4
|
fugue/__init__.py
|
|
6
5
|
fugue/api.py
|
|
7
6
|
fugue/constants.py
|
|
@@ -157,5 +156,4 @@ fugue_test/bag_suite.py
|
|
|
157
156
|
fugue_test/builtin_suite.py
|
|
158
157
|
fugue_test/dataframe_suite.py
|
|
159
158
|
fugue_test/execution_suite.py
|
|
160
|
-
fugue_test/fixtures.py
|
|
161
|
-
fugue_version/__init__.py
|
|
159
|
+
fugue_test/fixtures.py
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
[fugue.plugins]
|
|
2
|
+
dask = fugue_dask.registry
|
|
3
|
+
duckdb = fugue_duckdb.registry
|
|
4
|
+
ibis = fugue_ibis
|
|
5
|
+
polars = fugue_polars.registry
|
|
6
|
+
ray = fugue_ray.registry
|
|
7
|
+
spark = fugue_spark.registry
|
|
8
|
+
|
|
9
|
+
[pytest11]
|
|
10
|
+
fugue_test = fugue_test
|
|
11
|
+
fugue_test_fixtures = fugue_test.fixtures
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
triad>=1.0.
|
|
1
|
+
triad>=1.0.1
|
|
2
2
|
adagio>=0.2.6
|
|
3
3
|
pandas<3
|
|
4
4
|
|
|
@@ -10,7 +10,6 @@ jinja2
|
|
|
10
10
|
pyspark>=3.1.1
|
|
11
11
|
dask[dataframe,distributed]>=2024.4.0
|
|
12
12
|
dask-sql
|
|
13
|
-
ray[data]>=2.30.0
|
|
14
13
|
notebook
|
|
15
14
|
jupyterlab
|
|
16
15
|
ipython>=7.10.0
|
|
@@ -20,6 +19,9 @@ pandas>=2.0.2
|
|
|
20
19
|
ibis-framework[duckdb,pandas]
|
|
21
20
|
polars
|
|
22
21
|
|
|
22
|
+
[all:python_version < "3.14"]
|
|
23
|
+
ray[data]>=2.30.0
|
|
24
|
+
|
|
23
25
|
[cpp_sql_parser]
|
|
24
26
|
fugue-sql-antlr[cpp]>=0.2.0
|
|
25
27
|
|
|
@@ -52,11 +54,13 @@ ipython>=7.10.0
|
|
|
52
54
|
polars
|
|
53
55
|
|
|
54
56
|
[ray]
|
|
55
|
-
ray[data]>=2.30.0
|
|
56
57
|
duckdb>=0.5.0
|
|
57
58
|
pyarrow>=7.0.0
|
|
58
59
|
pandas
|
|
59
60
|
|
|
61
|
+
[ray:python_version < "3.14"]
|
|
62
|
+
ray[data]>=2.30.0
|
|
63
|
+
|
|
60
64
|
[spark]
|
|
61
65
|
pyspark>=3.1.1
|
|
62
66
|
zstandard>=0.25.0
|
|
@@ -55,8 +55,7 @@ def hash_repartition(df: dd.DataFrame, num: int, cols: List[Any]) -> dd.DataFram
|
|
|
55
55
|
if num == 1:
|
|
56
56
|
return df.repartition(npartitions=1)
|
|
57
57
|
df = df.reset_index(drop=True).clear_divisions()
|
|
58
|
-
|
|
59
|
-
return _postprocess(idf, ct, num)
|
|
58
|
+
return _add_hash_index(df, num, cols)
|
|
60
59
|
|
|
61
60
|
|
|
62
61
|
def even_repartition(df: dd.DataFrame, num: int, cols: List[Any]) -> dd.DataFrame:
|
|
@@ -81,13 +80,9 @@ def even_repartition(df: dd.DataFrame, num: int, cols: List[Any]) -> dd.DataFram
|
|
|
81
80
|
return df
|
|
82
81
|
df = df.reset_index(drop=True).clear_divisions()
|
|
83
82
|
if len(cols) == 0:
|
|
84
|
-
|
|
83
|
+
return _add_continuous_index(df, num=num)
|
|
85
84
|
else:
|
|
86
|
-
|
|
87
|
-
# when cols are set and num is not set, we use the number of groups
|
|
88
|
-
if num <= 0:
|
|
89
|
-
num = ct
|
|
90
|
-
return _postprocess(idf, ct, num)
|
|
85
|
+
return _add_group_index(df, cols, shuffle=False, num=num)
|
|
91
86
|
|
|
92
87
|
|
|
93
88
|
def rand_repartition(
|
|
@@ -114,25 +109,30 @@ def rand_repartition(
|
|
|
114
109
|
return df.repartition(npartitions=1)
|
|
115
110
|
df = df.reset_index(drop=True).clear_divisions()
|
|
116
111
|
if len(cols) == 0:
|
|
117
|
-
|
|
112
|
+
return _add_random_index(df, num=num, seed=seed)
|
|
118
113
|
else:
|
|
119
|
-
|
|
120
|
-
# when cols are set and num is not set, we use the number of groups
|
|
121
|
-
return _postprocess(idf, ct, num)
|
|
114
|
+
return _add_group_index(df, cols, shuffle=True, num=num, seed=seed)
|
|
122
115
|
|
|
123
116
|
|
|
124
|
-
def
|
|
125
|
-
|
|
117
|
+
def _safe_set_index(df: dd.DataFrame, key_ct: int, num_partitions: int) -> dd.DataFrame:
|
|
118
|
+
if num_partitions <= 0:
|
|
119
|
+
num_partitions = key_ct
|
|
120
|
+
parts = min(key_ct, num_partitions)
|
|
126
121
|
if parts <= 1:
|
|
127
|
-
return
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
122
|
+
return df.set_index(
|
|
123
|
+
_FUGUE_DASK_TEMP_IDX_COLUMN, drop=True, sort=True, npartitions=1
|
|
124
|
+
)
|
|
125
|
+
divisions = np.arange(key_ct, step=int(math.ceil(key_ct / parts))).tolist()
|
|
126
|
+
# divisions.append(ct - 1)
|
|
127
|
+
divisions.append(key_ct)
|
|
128
|
+
return df.set_index(
|
|
129
|
+
_FUGUE_DASK_TEMP_IDX_COLUMN, drop=True, sort=True, divisions=divisions
|
|
130
|
+
)
|
|
131
131
|
|
|
132
132
|
|
|
133
133
|
def _add_group_index(
|
|
134
|
-
df: dd.DataFrame, cols: List[str], shuffle: bool, seed: Any = None
|
|
135
|
-
) ->
|
|
134
|
+
df: dd.DataFrame, cols: List[str], shuffle: bool, num: int, seed: Any = None
|
|
135
|
+
) -> dd.DataFrame:
|
|
136
136
|
keys = df[cols].drop_duplicates().compute()
|
|
137
137
|
if shuffle:
|
|
138
138
|
keys = keys.sample(frac=1, random_state=seed)
|
|
@@ -140,12 +140,10 @@ def _add_group_index(
|
|
|
140
140
|
**{_FUGUE_DASK_TEMP_IDX_COLUMN: pd.Series(range(len(keys)), dtype=int)}
|
|
141
141
|
)
|
|
142
142
|
df = df.merge(dd.from_pandas(keys, npartitions=1), on=cols, broadcast=True)
|
|
143
|
-
return df
|
|
143
|
+
return _safe_set_index(df, len(keys), num)
|
|
144
144
|
|
|
145
145
|
|
|
146
|
-
def _add_hash_index(
|
|
147
|
-
df: dd.DataFrame, num: int, cols: List[str]
|
|
148
|
-
) -> Tuple[dd.DataFrame, int]:
|
|
146
|
+
def _add_hash_index(df: dd.DataFrame, num: int, cols: List[str]) -> dd.DataFrame:
|
|
149
147
|
if len(cols) == 0:
|
|
150
148
|
cols = list(df.columns)
|
|
151
149
|
|
|
@@ -165,13 +163,13 @@ def _add_hash_index(
|
|
|
165
163
|
orig_schema = list(df.dtypes.to_dict().items())
|
|
166
164
|
idf = df.map_partitions(
|
|
167
165
|
_add_hash, meta=orig_schema + [(_FUGUE_DASK_TEMP_IDX_COLUMN, int)]
|
|
168
|
-
)
|
|
169
|
-
return idf, num
|
|
166
|
+
)
|
|
167
|
+
return _safe_set_index(idf, num, num)
|
|
170
168
|
|
|
171
169
|
|
|
172
170
|
def _add_random_index(
|
|
173
171
|
df: dd.DataFrame, num: int, seed: Any = None
|
|
174
|
-
) ->
|
|
172
|
+
) -> dd.DataFrame: # pragma: no cover
|
|
175
173
|
def _add_rand(df: pd.DataFrame) -> pd.DataFrame:
|
|
176
174
|
if len(df) == 0:
|
|
177
175
|
return df.assign(**{_FUGUE_DASK_TEMP_IDX_COLUMN: pd.Series(dtype=int)})
|
|
@@ -184,11 +182,11 @@ def _add_random_index(
|
|
|
184
182
|
orig_schema = list(df.dtypes.to_dict().items())
|
|
185
183
|
idf = df.map_partitions(
|
|
186
184
|
_add_rand, meta=orig_schema + [(_FUGUE_DASK_TEMP_IDX_COLUMN, int)]
|
|
187
|
-
)
|
|
188
|
-
return idf, num
|
|
185
|
+
)
|
|
186
|
+
return _safe_set_index(idf, num, num)
|
|
189
187
|
|
|
190
188
|
|
|
191
|
-
def _add_continuous_index(df: dd.DataFrame) ->
|
|
189
|
+
def _add_continuous_index(df: dd.DataFrame, num: int) -> dd.DataFrame:
|
|
192
190
|
def _get_info(
|
|
193
191
|
df: pd.DataFrame, partition_info: Any
|
|
194
192
|
) -> pd.DataFrame: # pragma: no cover
|
|
@@ -216,8 +214,7 @@ def _add_continuous_index(df: dd.DataFrame) -> Tuple[dd.DataFrame, int]:
|
|
|
216
214
|
idf = df.map_partitions(
|
|
217
215
|
_add_index, meta=orig_schema + [(_FUGUE_DASK_TEMP_IDX_COLUMN, int)]
|
|
218
216
|
)
|
|
219
|
-
|
|
220
|
-
return idf, counts[-1]
|
|
217
|
+
return _safe_set_index(idf, counts[-1], num)
|
|
221
218
|
|
|
222
219
|
|
|
223
220
|
class DaskUtils(PandasLikeUtils[dd.DataFrame, dd.Series]):
|
|
@@ -255,7 +252,7 @@ class DaskUtils(PandasLikeUtils[dd.DataFrame, dd.Series]):
|
|
|
255
252
|
schema: pa.Schema,
|
|
256
253
|
use_extension_types: bool = True,
|
|
257
254
|
use_arrow_dtype: bool = False,
|
|
258
|
-
**kwargs: Any
|
|
255
|
+
**kwargs: Any,
|
|
259
256
|
) -> DataFrame:
|
|
260
257
|
output_dtypes = to_pandas_dtype(
|
|
261
258
|
schema,
|
|
@@ -268,7 +265,7 @@ class DaskUtils(PandasLikeUtils[dd.DataFrame, dd.Series]):
|
|
|
268
265
|
use_extension_types=use_extension_types,
|
|
269
266
|
use_arrow_dtype=use_arrow_dtype,
|
|
270
267
|
meta=output_dtypes,
|
|
271
|
-
**kwargs
|
|
268
|
+
**kwargs,
|
|
272
269
|
)
|
|
273
270
|
|
|
274
271
|
|