fugue 0.9.6__tar.gz → 0.9.7.dev0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (165) hide show
  1. {fugue-0.9.6 → fugue-0.9.7.dev0}/PKG-INFO +13 -26
  2. {fugue-0.9.6 → fugue-0.9.7.dev0}/README.md +1 -1
  3. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue/__init__.py +4 -1
  4. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue/dataframe/dataframe.py +1 -3
  5. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue/dataframe/function_wrapper.py +63 -19
  6. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue/dataframe/utils.py +3 -3
  7. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue/extensions/_builtins/outputters.py +1 -1
  8. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue/extensions/_builtins/processors.py +1 -1
  9. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue/extensions/processor/convert.py +1 -0
  10. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue/extensions/transformer/convert.py +4 -1
  11. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue/sql/_utils.py +0 -1
  12. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue/sql/_visitors.py +0 -1
  13. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue/workflow/_tasks.py +3 -1
  14. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue/workflow/workflow.py +3 -3
  15. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue.egg-info/PKG-INFO +13 -26
  16. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue.egg-info/SOURCES.txt +2 -4
  17. fugue-0.9.7.dev0/fugue.egg-info/entry_points.txt +11 -0
  18. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue.egg-info/requires.txt +7 -3
  19. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue.egg-info/top_level.txt +0 -1
  20. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue_dask/__init__.py +3 -1
  21. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue_dask/_utils.py +31 -34
  22. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue_duckdb/execution_engine.py +7 -3
  23. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue_ibis/_utils.py +2 -2
  24. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue_notebook/__init__.py +2 -1
  25. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue_ray/_utils/io.py +2 -1
  26. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue_spark/__init__.py +3 -1
  27. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue_spark/_utils/convert.py +4 -5
  28. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue_spark/dataframe.py +1 -3
  29. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue_sql/__init__.py +3 -1
  30. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue_test/builtin_suite.py +1 -2
  31. fugue-0.9.7.dev0/pyproject.toml +224 -0
  32. fugue-0.9.7.dev0/setup.cfg +4 -0
  33. fugue-0.9.6/fugue.egg-info/entry_points.txt +0 -11
  34. fugue-0.9.6/fugue_version/__init__.py +0 -1
  35. fugue-0.9.6/setup.cfg +0 -58
  36. fugue-0.9.6/setup.py +0 -114
  37. {fugue-0.9.6 → fugue-0.9.7.dev0}/LICENSE +0 -0
  38. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue/_utils/__init__.py +0 -0
  39. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue/_utils/display.py +0 -0
  40. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue/_utils/exception.py +0 -0
  41. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue/_utils/interfaceless.py +0 -0
  42. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue/_utils/io.py +0 -0
  43. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue/_utils/misc.py +0 -0
  44. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue/_utils/registry.py +0 -0
  45. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue/api.py +0 -0
  46. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue/bag/__init__.py +0 -0
  47. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue/bag/array_bag.py +0 -0
  48. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue/bag/bag.py +0 -0
  49. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue/collections/__init__.py +0 -0
  50. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue/collections/partition.py +0 -0
  51. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue/collections/sql.py +0 -0
  52. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue/collections/yielded.py +0 -0
  53. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue/column/__init__.py +0 -0
  54. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue/column/expressions.py +0 -0
  55. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue/column/functions.py +0 -0
  56. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue/column/sql.py +0 -0
  57. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue/constants.py +0 -0
  58. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue/dataframe/__init__.py +0 -0
  59. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue/dataframe/api.py +0 -0
  60. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue/dataframe/array_dataframe.py +0 -0
  61. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue/dataframe/arrow_dataframe.py +0 -0
  62. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue/dataframe/dataframe_iterable_dataframe.py +0 -0
  63. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue/dataframe/dataframes.py +0 -0
  64. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue/dataframe/iterable_dataframe.py +0 -0
  65. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue/dataframe/pandas_dataframe.py +0 -0
  66. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue/dataset/__init__.py +0 -0
  67. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue/dataset/api.py +0 -0
  68. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue/dataset/dataset.py +0 -0
  69. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue/dev.py +0 -0
  70. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue/exceptions.py +0 -0
  71. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue/execution/__init__.py +0 -0
  72. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue/execution/api.py +0 -0
  73. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue/execution/execution_engine.py +0 -0
  74. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue/execution/factory.py +0 -0
  75. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue/execution/native_execution_engine.py +0 -0
  76. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue/extensions/__init__.py +0 -0
  77. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue/extensions/_builtins/__init__.py +0 -0
  78. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue/extensions/_builtins/creators.py +0 -0
  79. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue/extensions/_utils.py +0 -0
  80. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue/extensions/context.py +0 -0
  81. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue/extensions/creator/__init__.py +0 -0
  82. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue/extensions/creator/convert.py +0 -0
  83. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue/extensions/creator/creator.py +0 -0
  84. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue/extensions/outputter/__init__.py +0 -0
  85. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue/extensions/outputter/convert.py +0 -0
  86. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue/extensions/outputter/outputter.py +0 -0
  87. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue/extensions/processor/__init__.py +0 -0
  88. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue/extensions/processor/processor.py +0 -0
  89. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue/extensions/transformer/__init__.py +0 -0
  90. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue/extensions/transformer/constants.py +0 -0
  91. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue/extensions/transformer/transformer.py +0 -0
  92. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue/plugins.py +0 -0
  93. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue/py.typed +0 -0
  94. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue/registry.py +0 -0
  95. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue/rpc/__init__.py +0 -0
  96. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue/rpc/base.py +0 -0
  97. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue/rpc/flask.py +0 -0
  98. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue/sql/__init__.py +0 -0
  99. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue/sql/api.py +0 -0
  100. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue/sql/workflow.py +0 -0
  101. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue/test/__init__.py +0 -0
  102. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue/test/pandas_tester.py +0 -0
  103. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue/test/plugins.py +0 -0
  104. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue/workflow/__init__.py +0 -0
  105. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue/workflow/_checkpoint.py +0 -0
  106. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue/workflow/_workflow_context.py +0 -0
  107. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue/workflow/api.py +0 -0
  108. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue/workflow/input.py +0 -0
  109. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue/workflow/module.py +0 -0
  110. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue.egg-info/dependency_links.txt +0 -0
  111. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue_contrib/__init__.py +0 -0
  112. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue_contrib/contrib.py +0 -0
  113. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue_contrib/seaborn/__init__.py +0 -0
  114. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue_contrib/viz/__init__.py +0 -0
  115. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue_contrib/viz/_ext.py +0 -0
  116. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue_dask/_constants.py +0 -0
  117. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue_dask/_dask_sql_wrapper.py +0 -0
  118. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue_dask/_io.py +0 -0
  119. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue_dask/dataframe.py +0 -0
  120. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue_dask/execution_engine.py +0 -0
  121. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue_dask/registry.py +0 -0
  122. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue_dask/tester.py +0 -0
  123. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue_duckdb/__init__.py +0 -0
  124. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue_duckdb/_io.py +0 -0
  125. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue_duckdb/_utils.py +0 -0
  126. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue_duckdb/dask.py +0 -0
  127. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue_duckdb/dataframe.py +0 -0
  128. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue_duckdb/registry.py +0 -0
  129. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue_duckdb/tester.py +0 -0
  130. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue_ibis/__init__.py +0 -0
  131. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue_ibis/_compat.py +0 -0
  132. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue_ibis/dataframe.py +0 -0
  133. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue_ibis/execution_engine.py +0 -0
  134. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue_notebook/env.py +0 -0
  135. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue_notebook/nbextension/README.md +0 -0
  136. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue_notebook/nbextension/__init__.py +0 -0
  137. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue_notebook/nbextension/description.yaml +0 -0
  138. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue_notebook/nbextension/main.js +0 -0
  139. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue_polars/__init__.py +0 -0
  140. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue_polars/_utils.py +0 -0
  141. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue_polars/polars_dataframe.py +0 -0
  142. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue_polars/registry.py +0 -0
  143. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue_ray/__init__.py +0 -0
  144. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue_ray/_constants.py +0 -0
  145. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue_ray/_utils/__init__.py +0 -0
  146. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue_ray/_utils/cluster.py +0 -0
  147. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue_ray/_utils/dataframe.py +0 -0
  148. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue_ray/dataframe.py +0 -0
  149. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue_ray/execution_engine.py +0 -0
  150. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue_ray/registry.py +0 -0
  151. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue_ray/tester.py +0 -0
  152. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue_spark/_constants.py +0 -0
  153. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue_spark/_utils/__init__.py +0 -0
  154. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue_spark/_utils/io.py +0 -0
  155. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue_spark/_utils/misc.py +0 -0
  156. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue_spark/_utils/partition.py +0 -0
  157. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue_spark/execution_engine.py +0 -0
  158. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue_spark/registry.py +0 -0
  159. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue_spark/tester.py +0 -0
  160. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue_sql/exceptions.py +0 -0
  161. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue_test/__init__.py +0 -0
  162. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue_test/bag_suite.py +0 -0
  163. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue_test/dataframe_suite.py +0 -0
  164. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue_test/execution_suite.py +0 -0
  165. {fugue-0.9.6 → fugue-0.9.7.dev0}/fugue_test/fixtures.py +0 -0
@@ -1,27 +1,26 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: fugue
3
- Version: 0.9.6
3
+ Version: 0.9.7.dev0
4
4
  Summary: An abstraction layer for distributed computation
5
- Home-page: http://github.com/fugue-project/fugue
6
- Author: The Fugue Development Team
7
- Author-email: hello@fugue.ai
5
+ Author-email: The Fugue Development Team <hello@fugue.ai>
8
6
  License: Apache-2.0
9
- Keywords: distributed spark dask ray sql dsl domain specific language
7
+ Project-URL: Homepage, http://github.com/fugue-project/fugue
8
+ Project-URL: Repository, http://github.com/fugue-project/fugue
9
+ Keywords: distributed,spark,dask,ray,sql,dsl,domain specific language
10
10
  Classifier: Development Status :: 5 - Production/Stable
11
11
  Classifier: Intended Audience :: Developers
12
12
  Classifier: Topic :: Software Development :: Libraries :: Python Modules
13
- Classifier: License :: OSI Approved :: Apache Software License
14
13
  Classifier: Programming Language :: Python :: 3
15
- Classifier: Programming Language :: Python :: 3.8
16
- Classifier: Programming Language :: Python :: 3.9
17
14
  Classifier: Programming Language :: Python :: 3.10
18
15
  Classifier: Programming Language :: Python :: 3.11
19
16
  Classifier: Programming Language :: Python :: 3.12
17
+ Classifier: Programming Language :: Python :: 3.13
18
+ Classifier: Programming Language :: Python :: 3.14
20
19
  Classifier: Programming Language :: Python :: 3 :: Only
21
- Requires-Python: >=3.8
20
+ Requires-Python: >=3.10
22
21
  Description-Content-Type: text/markdown
23
22
  License-File: LICENSE
24
- Requires-Dist: triad>=1.0.0
23
+ Requires-Dist: triad>=1.0.1
25
24
  Requires-Dist: adagio>=0.2.6
26
25
  Requires-Dist: pandas<3
27
26
  Provides-Extra: sql
@@ -39,7 +38,7 @@ Requires-Dist: dask[dataframe,distributed]>=2024.4.0; extra == "dask"
39
38
  Requires-Dist: pyarrow>=7.0.0; extra == "dask"
40
39
  Requires-Dist: pandas>=2.0.2; extra == "dask"
41
40
  Provides-Extra: ray
42
- Requires-Dist: ray[data]>=2.30.0; extra == "ray"
41
+ Requires-Dist: ray[data]>=2.30.0; python_version < "3.14" and extra == "ray"
43
42
  Requires-Dist: duckdb>=0.5.0; extra == "ray"
44
43
  Requires-Dist: pyarrow>=7.0.0; extra == "ray"
45
44
  Requires-Dist: pandas; extra == "ray"
@@ -70,7 +69,7 @@ Requires-Dist: jinja2; extra == "all"
70
69
  Requires-Dist: pyspark>=3.1.1; extra == "all"
71
70
  Requires-Dist: dask[dataframe,distributed]>=2024.4.0; extra == "all"
72
71
  Requires-Dist: dask-sql; extra == "all"
73
- Requires-Dist: ray[data]>=2.30.0; extra == "all"
72
+ Requires-Dist: ray[data]>=2.30.0; python_version < "3.14" and extra == "all"
74
73
  Requires-Dist: notebook; extra == "all"
75
74
  Requires-Dist: jupyterlab; extra == "all"
76
75
  Requires-Dist: ipython>=7.10.0; extra == "all"
@@ -79,26 +78,14 @@ Requires-Dist: pyarrow>=6.0.1; extra == "all"
79
78
  Requires-Dist: pandas>=2.0.2; extra == "all"
80
79
  Requires-Dist: ibis-framework[duckdb,pandas]; extra == "all"
81
80
  Requires-Dist: polars; extra == "all"
82
- Dynamic: author
83
- Dynamic: author-email
84
- Dynamic: classifier
85
- Dynamic: description
86
- Dynamic: description-content-type
87
- Dynamic: home-page
88
- Dynamic: keywords
89
- Dynamic: license
90
81
  Dynamic: license-file
91
- Dynamic: provides-extra
92
- Dynamic: requires-dist
93
- Dynamic: requires-python
94
- Dynamic: summary
95
82
 
96
- # Fugue
83
+ # <img src="./images/logo.svg" width="200">
97
84
 
98
85
  [![PyPI version](https://badge.fury.io/py/fugue.svg)](https://pypi.python.org/pypi/fugue/)
99
86
  [![PyPI pyversions](https://img.shields.io/pypi/pyversions/fugue.svg)](https://pypi.python.org/pypi/fugue/)
100
87
  [![PyPI license](https://img.shields.io/pypi/l/fugue.svg)](https://pypi.python.org/pypi/fugue/)
101
- [![codecov](https://codecov.io/gh/fugue-project/fugue/branch/master/graph/badge.svg?token=ZO9YD5N3IA)](https://codecov.io/gh/fugue-project/fugue)
88
+ [![codecov](https://codecov.io/gh/fugue-project/fugue/branch/main/graph/badge.svg?token=ZO9YD5N3IA)](https://codecov.io/gh/fugue-project/fugue)
102
89
  [![Codacy Badge](https://app.codacy.com/project/badge/Grade/4fa5f2f53e6f48aaa1218a89f4808b91)](https://www.codacy.com/gh/fugue-project/fugue/dashboard?utm_source=github.com&utm_medium=referral&utm_content=fugue-project/fugue&utm_campaign=Badge_Grade)
103
90
  [![Downloads](https://static.pepy.tech/badge/fugue)](https://pepy.tech/project/fugue)
104
91
 
@@ -3,7 +3,7 @@
3
3
  [![PyPI version](https://badge.fury.io/py/fugue.svg)](https://pypi.python.org/pypi/fugue/)
4
4
  [![PyPI pyversions](https://img.shields.io/pypi/pyversions/fugue.svg)](https://pypi.python.org/pypi/fugue/)
5
5
  [![PyPI license](https://img.shields.io/pypi/l/fugue.svg)](https://pypi.python.org/pypi/fugue/)
6
- [![codecov](https://codecov.io/gh/fugue-project/fugue/branch/master/graph/badge.svg?token=ZO9YD5N3IA)](https://codecov.io/gh/fugue-project/fugue)
6
+ [![codecov](https://codecov.io/gh/fugue-project/fugue/branch/main/graph/badge.svg?token=ZO9YD5N3IA)](https://codecov.io/gh/fugue-project/fugue)
7
7
  [![Codacy Badge](https://app.codacy.com/project/badge/Grade/4fa5f2f53e6f48aaa1218a89f4808b91)](https://www.codacy.com/gh/fugue-project/fugue/dashboard?utm_source=github.com&utm_medium=referral&utm_content=fugue-project/fugue&utm_campaign=Badge_Grade)
8
8
  [![Downloads](https://static.pepy.tech/badge/fugue)](https://pepy.tech/project/fugue)
9
9
 
@@ -1,4 +1,8 @@
1
1
  # flake8: noqa
2
+ from importlib.metadata import version
3
+
4
+ __version__ = version("fugue")
5
+
2
6
  from triad.collections import Schema
3
7
 
4
8
  from fugue.api import out_transform, transform
@@ -83,7 +87,6 @@ from fugue.sql.workflow import FugueSQLWorkflow
83
87
  from fugue.workflow._workflow_context import FugueWorkflowContext
84
88
  from fugue.workflow.module import module
85
89
  from fugue.workflow.workflow import FugueWorkflow, WorkflowDataFrame, WorkflowDataFrames
86
- from fugue_version import __version__
87
90
 
88
91
  from .dev import *
89
92
 
@@ -59,9 +59,7 @@ class DataFrame(Dataset):
59
59
  assert isinstance(self._schema, Schema)
60
60
  return self._schema # type: ignore
61
61
  with self._lazy_schema_lock:
62
- self._schema = _input_schema(
63
- self._schema()
64
- ).assert_not_empty() # type: ignore
62
+ self._schema = _input_schema(self._schema()).assert_not_empty() # type: ignore
65
63
  self._schema.set_readonly()
66
64
  self._schema_discovered = True
67
65
  return self._schema
@@ -1,4 +1,5 @@
1
1
  import inspect
2
+ from collections.abc import Callable as AbcCallable
2
3
  from typing import (
3
4
  Any,
4
5
  Callable,
@@ -7,6 +8,9 @@ from typing import (
7
8
  Iterator,
8
9
  List,
9
10
  Optional,
11
+ Union,
12
+ get_args,
13
+ get_origin,
10
14
  no_type_check,
11
15
  )
12
16
 
@@ -39,13 +43,66 @@ from .pandas_dataframe import PandasDataFrame
39
43
 
40
44
 
41
45
  def _compare_iter(tp: Any) -> Any:
42
- return lambda x: compare_annotations(
43
- x, Iterable[tp] # type:ignore
44
- ) or compare_annotations(
45
- x, Iterator[tp] # type:ignore
46
+ return lambda x: (
47
+ compare_annotations(
48
+ x,
49
+ Iterable[tp], # type:ignore
50
+ )
51
+ or compare_annotations(
52
+ x,
53
+ Iterator[tp], # type:ignore
54
+ )
46
55
  )
47
56
 
48
57
 
58
+ def _is_optional(annotation) -> bool:
59
+ origin = get_origin(annotation)
60
+
61
+ # Check if it's a Union type
62
+ if origin is Union:
63
+ args = get_args(annotation)
64
+ # Optional[T] is Union[T, None]
65
+ return type(None) in args
66
+
67
+
68
+ def _is_required_callable(annotation) -> bool:
69
+ """Check if annotation is a required (non-optional) Callable type."""
70
+ if _is_optional(annotation):
71
+ return False
72
+
73
+ # Check direct equality
74
+ if annotation == Callable or annotation == callable: # pylint: disable=comparison-with-callable
75
+ return True
76
+
77
+ # Check if it's a generic Callable like Callable[[int], str]
78
+ origin = get_origin(annotation)
79
+ return origin is AbcCallable or origin is type(Callable)
80
+
81
+
82
+ def _is_optional_callable(annotation) -> bool:
83
+ """Check if annotation is an optional Callable type (Optional[Callable] or Callable | None)."""
84
+ if not _is_optional(annotation):
85
+ return False
86
+
87
+ # Get the non-None types from the Union
88
+ args = get_args(annotation)
89
+ non_none_types = [arg for arg in args if arg is not type(None)]
90
+
91
+ # Should have exactly one non-None type, and it should be Callable
92
+ if len(non_none_types) != 1: # pragma: no cover
93
+ return False
94
+
95
+ inner_type = non_none_types[0]
96
+
97
+ # Check if the inner type is Callable
98
+ if inner_type == Callable or inner_type == callable: # pylint: disable=comparison-with-callable
99
+ return True
100
+
101
+ # Check if it's a generic Callable like Callable[[int], str]
102
+ origin = get_origin(inner_type)
103
+ return origin is AbcCallable or origin is type(Callable)
104
+
105
+
49
106
  @function_wrapper(FUGUE_ENTRYPOINT)
50
107
  class DataFrameFunctionWrapper(FunctionWrapper):
51
108
  @property
@@ -154,12 +211,7 @@ fugue_annotated_param = DataFrameFunctionWrapper.annotated_param
154
211
  @fugue_annotated_param(
155
212
  "Callable",
156
213
  "F",
157
- lambda annotation: (
158
- annotation == Callable
159
- or annotation == callable # pylint: disable=comparison-with-callable
160
- or str(annotation).startswith("typing.Callable")
161
- or str(annotation).startswith("collections.abc.Callable")
162
- ),
214
+ _is_required_callable,
163
215
  )
164
216
  class _CallableParam(AnnotatedParam):
165
217
  pass
@@ -168,15 +220,7 @@ class _CallableParam(AnnotatedParam):
168
220
  @fugue_annotated_param(
169
221
  "Callable",
170
222
  "f",
171
- lambda annotation: (
172
- annotation == Optional[Callable]
173
- or annotation == Optional[callable]
174
- or str(annotation).startswith("typing.Union[typing.Callable") # 3.8-
175
- or str(annotation).startswith("typing.Optional[typing.Callable") # 3.9+
176
- or str(annotation).startswith(
177
- "typing.Optional[collections.abc.Callable]"
178
- ) # 3.9+
179
- ),
223
+ _is_optional_callable,
180
224
  )
181
225
  class _OptionalCallableParam(AnnotatedParam):
182
226
  pass
@@ -61,9 +61,9 @@ def _df_eq(
61
61
  else:
62
62
  df2 = as_fugue_df(data).as_local_bounded()
63
63
  try:
64
- assert (
65
- df1.count() == df2.count()
66
- ), f"count mismatch {df1.count()}, {df2.count()}"
64
+ assert df1.count() == df2.count(), (
65
+ f"count mismatch {df1.count()}, {df2.count()}"
66
+ )
67
67
  assert not check_schema or df.schema.is_like(
68
68
  df2.schema, equal_groups=equal_type_groups
69
69
  ), f"schema mismatch {df.schema.pa_schema}, {df2.schema.pa_schema}"
@@ -67,7 +67,7 @@ class Save(Outputter):
67
67
  mode=mode,
68
68
  partition_spec=partition_spec,
69
69
  force_single=force_single,
70
- **kwargs
70
+ **kwargs,
71
71
  )
72
72
 
73
73
 
@@ -314,7 +314,7 @@ class SaveAndUse(Processor):
314
314
  mode=mode,
315
315
  partition_spec=partition_spec,
316
316
  force_single=force_single,
317
- **kwargs
317
+ **kwargs,
318
318
  )
319
319
  return self.execution_engine.load_df(path=path, format_hint=format_hint)
320
320
 
@@ -136,6 +136,7 @@ def processor(
136
136
  Please read
137
137
  :doc:`Processor Tutorial <tutorial:tutorials/extensions/processor>`
138
138
  """
139
+
139
140
  # TODO: validation of schema if without * should be done at compile time
140
141
  def deco(func: Callable) -> "_FuncAsProcessor":
141
142
  return _FuncAsProcessor.from_func(
@@ -546,7 +546,10 @@ class _FuncAsOutputCoTransformer(_FuncAsCoTransformer):
546
546
  p = dict(dfs)
547
547
  p.update(self.params)
548
548
  self._wrapper.run(
549
- [] + cb, p, ignore_unknown=False, output=False # type: ignore
549
+ [] + cb,
550
+ p,
551
+ ignore_unknown=False,
552
+ output=False, # type: ignore
550
553
  )
551
554
  return ArrayDataFrame([], OUTPUT_TRANSFORMER_DUMMY_SCHEMA)
552
555
 
@@ -35,7 +35,6 @@ def fill_sql_template(sql: str, params: Dict[str, Any]):
35
35
  template = Template(new_sql)
36
36
 
37
37
  except jinja2.exceptions.TemplateSyntaxError:
38
-
39
38
  template = Template(sql)
40
39
 
41
40
  return template.render(**params)
@@ -820,7 +820,6 @@ class _Extensions(_VisitorBase):
820
820
 
821
821
  def visitSetOperation(self, ctx: fp.SetOperationContext) -> Iterable[Any]:
822
822
  def get_sub(_ctx: Tree) -> List[Any]:
823
-
824
823
  sub = list(
825
824
  self.visitFugueTerm(_ctx)
826
825
  if isinstance(_ctx, fp.FugueTermContext)
@@ -204,7 +204,9 @@ class FugueTask(TaskSpec, ABC):
204
204
 
205
205
  # add caller traceback
206
206
  ctb = modify_traceback(
207
- sys.exc_info()[2].tb_next, None, self._traceback # type: ignore
207
+ sys.exc_info()[2].tb_next,
208
+ None,
209
+ self._traceback, # type: ignore
208
210
  )
209
211
  if ctb is None: # pragma: no cover
210
212
  raise
@@ -1468,7 +1468,8 @@ class WorkflowDataFrames(DataFrames):
1468
1468
  super().__setitem__(key, value, *args, **kwds)
1469
1469
 
1470
1470
  def __getitem__( # pylint: disable=W0235
1471
- self, key: Union[str, int] # type: ignore
1471
+ self,
1472
+ key: Union[str, int], # type: ignore
1472
1473
  ) -> WorkflowDataFrame:
1473
1474
  return super().__getitem__(key) # type: ignore
1474
1475
 
@@ -1791,8 +1792,7 @@ class FugueWorkflow:
1791
1792
  )
1792
1793
  )
1793
1794
  raise FugueWorkflowCompileError(
1794
- f"Input data of type {type(data)} can't "
1795
- "be converted to WorkflowDataFrame"
1795
+ f"Input data of type {type(data)} can't be converted to WorkflowDataFrame"
1796
1796
  )
1797
1797
 
1798
1798
  def df(
@@ -1,27 +1,26 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: fugue
3
- Version: 0.9.6
3
+ Version: 0.9.7.dev0
4
4
  Summary: An abstraction layer for distributed computation
5
- Home-page: http://github.com/fugue-project/fugue
6
- Author: The Fugue Development Team
7
- Author-email: hello@fugue.ai
5
+ Author-email: The Fugue Development Team <hello@fugue.ai>
8
6
  License: Apache-2.0
9
- Keywords: distributed spark dask ray sql dsl domain specific language
7
+ Project-URL: Homepage, http://github.com/fugue-project/fugue
8
+ Project-URL: Repository, http://github.com/fugue-project/fugue
9
+ Keywords: distributed,spark,dask,ray,sql,dsl,domain specific language
10
10
  Classifier: Development Status :: 5 - Production/Stable
11
11
  Classifier: Intended Audience :: Developers
12
12
  Classifier: Topic :: Software Development :: Libraries :: Python Modules
13
- Classifier: License :: OSI Approved :: Apache Software License
14
13
  Classifier: Programming Language :: Python :: 3
15
- Classifier: Programming Language :: Python :: 3.8
16
- Classifier: Programming Language :: Python :: 3.9
17
14
  Classifier: Programming Language :: Python :: 3.10
18
15
  Classifier: Programming Language :: Python :: 3.11
19
16
  Classifier: Programming Language :: Python :: 3.12
17
+ Classifier: Programming Language :: Python :: 3.13
18
+ Classifier: Programming Language :: Python :: 3.14
20
19
  Classifier: Programming Language :: Python :: 3 :: Only
21
- Requires-Python: >=3.8
20
+ Requires-Python: >=3.10
22
21
  Description-Content-Type: text/markdown
23
22
  License-File: LICENSE
24
- Requires-Dist: triad>=1.0.0
23
+ Requires-Dist: triad>=1.0.1
25
24
  Requires-Dist: adagio>=0.2.6
26
25
  Requires-Dist: pandas<3
27
26
  Provides-Extra: sql
@@ -39,7 +38,7 @@ Requires-Dist: dask[dataframe,distributed]>=2024.4.0; extra == "dask"
39
38
  Requires-Dist: pyarrow>=7.0.0; extra == "dask"
40
39
  Requires-Dist: pandas>=2.0.2; extra == "dask"
41
40
  Provides-Extra: ray
42
- Requires-Dist: ray[data]>=2.30.0; extra == "ray"
41
+ Requires-Dist: ray[data]>=2.30.0; python_version < "3.14" and extra == "ray"
43
42
  Requires-Dist: duckdb>=0.5.0; extra == "ray"
44
43
  Requires-Dist: pyarrow>=7.0.0; extra == "ray"
45
44
  Requires-Dist: pandas; extra == "ray"
@@ -70,7 +69,7 @@ Requires-Dist: jinja2; extra == "all"
70
69
  Requires-Dist: pyspark>=3.1.1; extra == "all"
71
70
  Requires-Dist: dask[dataframe,distributed]>=2024.4.0; extra == "all"
72
71
  Requires-Dist: dask-sql; extra == "all"
73
- Requires-Dist: ray[data]>=2.30.0; extra == "all"
72
+ Requires-Dist: ray[data]>=2.30.0; python_version < "3.14" and extra == "all"
74
73
  Requires-Dist: notebook; extra == "all"
75
74
  Requires-Dist: jupyterlab; extra == "all"
76
75
  Requires-Dist: ipython>=7.10.0; extra == "all"
@@ -79,26 +78,14 @@ Requires-Dist: pyarrow>=6.0.1; extra == "all"
79
78
  Requires-Dist: pandas>=2.0.2; extra == "all"
80
79
  Requires-Dist: ibis-framework[duckdb,pandas]; extra == "all"
81
80
  Requires-Dist: polars; extra == "all"
82
- Dynamic: author
83
- Dynamic: author-email
84
- Dynamic: classifier
85
- Dynamic: description
86
- Dynamic: description-content-type
87
- Dynamic: home-page
88
- Dynamic: keywords
89
- Dynamic: license
90
81
  Dynamic: license-file
91
- Dynamic: provides-extra
92
- Dynamic: requires-dist
93
- Dynamic: requires-python
94
- Dynamic: summary
95
82
 
96
- # Fugue
83
+ # <img src="./images/logo.svg" width="200">
97
84
 
98
85
  [![PyPI version](https://badge.fury.io/py/fugue.svg)](https://pypi.python.org/pypi/fugue/)
99
86
  [![PyPI pyversions](https://img.shields.io/pypi/pyversions/fugue.svg)](https://pypi.python.org/pypi/fugue/)
100
87
  [![PyPI license](https://img.shields.io/pypi/l/fugue.svg)](https://pypi.python.org/pypi/fugue/)
101
- [![codecov](https://codecov.io/gh/fugue-project/fugue/branch/master/graph/badge.svg?token=ZO9YD5N3IA)](https://codecov.io/gh/fugue-project/fugue)
88
+ [![codecov](https://codecov.io/gh/fugue-project/fugue/branch/main/graph/badge.svg?token=ZO9YD5N3IA)](https://codecov.io/gh/fugue-project/fugue)
102
89
  [![Codacy Badge](https://app.codacy.com/project/badge/Grade/4fa5f2f53e6f48aaa1218a89f4808b91)](https://www.codacy.com/gh/fugue-project/fugue/dashboard?utm_source=github.com&utm_medium=referral&utm_content=fugue-project/fugue&utm_campaign=Badge_Grade)
103
90
  [![Downloads](https://static.pepy.tech/badge/fugue)](https://pepy.tech/project/fugue)
104
91
 
@@ -1,7 +1,6 @@
1
1
  LICENSE
2
2
  README.md
3
- setup.cfg
4
- setup.py
3
+ pyproject.toml
5
4
  fugue/__init__.py
6
5
  fugue/api.py
7
6
  fugue/constants.py
@@ -157,5 +156,4 @@ fugue_test/bag_suite.py
157
156
  fugue_test/builtin_suite.py
158
157
  fugue_test/dataframe_suite.py
159
158
  fugue_test/execution_suite.py
160
- fugue_test/fixtures.py
161
- fugue_version/__init__.py
159
+ fugue_test/fixtures.py
@@ -0,0 +1,11 @@
1
+ [fugue.plugins]
2
+ dask = fugue_dask.registry
3
+ duckdb = fugue_duckdb.registry
4
+ ibis = fugue_ibis
5
+ polars = fugue_polars.registry
6
+ ray = fugue_ray.registry
7
+ spark = fugue_spark.registry
8
+
9
+ [pytest11]
10
+ fugue_test = fugue_test
11
+ fugue_test_fixtures = fugue_test.fixtures
@@ -1,4 +1,4 @@
1
- triad>=1.0.0
1
+ triad>=1.0.1
2
2
  adagio>=0.2.6
3
3
  pandas<3
4
4
 
@@ -10,7 +10,6 @@ jinja2
10
10
  pyspark>=3.1.1
11
11
  dask[dataframe,distributed]>=2024.4.0
12
12
  dask-sql
13
- ray[data]>=2.30.0
14
13
  notebook
15
14
  jupyterlab
16
15
  ipython>=7.10.0
@@ -20,6 +19,9 @@ pandas>=2.0.2
20
19
  ibis-framework[duckdb,pandas]
21
20
  polars
22
21
 
22
+ [all:python_version < "3.14"]
23
+ ray[data]>=2.30.0
24
+
23
25
  [cpp_sql_parser]
24
26
  fugue-sql-antlr[cpp]>=0.2.0
25
27
 
@@ -52,11 +54,13 @@ ipython>=7.10.0
52
54
  polars
53
55
 
54
56
  [ray]
55
- ray[data]>=2.30.0
56
57
  duckdb>=0.5.0
57
58
  pyarrow>=7.0.0
58
59
  pandas
59
60
 
61
+ [ray:python_version < "3.14"]
62
+ ray[data]>=2.30.0
63
+
60
64
  [spark]
61
65
  pyspark>=3.1.1
62
66
  zstandard>=0.25.0
@@ -9,4 +9,3 @@ fugue_ray
9
9
  fugue_spark
10
10
  fugue_sql
11
11
  fugue_test
12
- fugue_version
@@ -1,5 +1,7 @@
1
1
  # flake8: noqa
2
- from fugue_version import __version__
2
+ from importlib.metadata import version
3
+
4
+ __version__ = version("fugue")
3
5
 
4
6
  from fugue_dask.dataframe import DaskDataFrame
5
7
  from fugue_dask.execution_engine import DaskExecutionEngine
@@ -55,8 +55,7 @@ def hash_repartition(df: dd.DataFrame, num: int, cols: List[Any]) -> dd.DataFram
55
55
  if num == 1:
56
56
  return df.repartition(npartitions=1)
57
57
  df = df.reset_index(drop=True).clear_divisions()
58
- idf, ct = _add_hash_index(df, num, cols)
59
- return _postprocess(idf, ct, num)
58
+ return _add_hash_index(df, num, cols)
60
59
 
61
60
 
62
61
  def even_repartition(df: dd.DataFrame, num: int, cols: List[Any]) -> dd.DataFrame:
@@ -81,13 +80,9 @@ def even_repartition(df: dd.DataFrame, num: int, cols: List[Any]) -> dd.DataFram
81
80
  return df
82
81
  df = df.reset_index(drop=True).clear_divisions()
83
82
  if len(cols) == 0:
84
- idf, ct = _add_continuous_index(df)
83
+ return _add_continuous_index(df, num=num)
85
84
  else:
86
- idf, ct = _add_group_index(df, cols, shuffle=False)
87
- # when cols are set and num is not set, we use the number of groups
88
- if num <= 0:
89
- num = ct
90
- return _postprocess(idf, ct, num)
85
+ return _add_group_index(df, cols, shuffle=False, num=num)
91
86
 
92
87
 
93
88
  def rand_repartition(
@@ -114,25 +109,30 @@ def rand_repartition(
114
109
  return df.repartition(npartitions=1)
115
110
  df = df.reset_index(drop=True).clear_divisions()
116
111
  if len(cols) == 0:
117
- idf, ct = _add_random_index(df, num=num, seed=seed)
112
+ return _add_random_index(df, num=num, seed=seed)
118
113
  else:
119
- idf, ct = _add_group_index(df, cols, shuffle=True, seed=seed)
120
- # when cols are set and num is not set, we use the number of groups
121
- return _postprocess(idf, ct, num)
114
+ return _add_group_index(df, cols, shuffle=True, num=num, seed=seed)
122
115
 
123
116
 
124
- def _postprocess(idf: dd.DataFrame, ct: int, num: int) -> dd.DataFrame:
125
- parts = min(ct, num)
117
+ def _safe_set_index(df: dd.DataFrame, key_ct: int, num_partitions: int) -> dd.DataFrame:
118
+ if num_partitions <= 0:
119
+ num_partitions = key_ct
120
+ parts = min(key_ct, num_partitions)
126
121
  if parts <= 1:
127
- return idf.repartition(npartitions=1)
128
- divisions = list(np.arange(ct, step=math.ceil(ct / parts)))
129
- divisions.append(ct - 1)
130
- return idf.repartition(divisions=divisions, force=True)
122
+ return df.set_index(
123
+ _FUGUE_DASK_TEMP_IDX_COLUMN, drop=True, sort=True, npartitions=1
124
+ )
125
+ divisions = np.arange(key_ct, step=int(math.ceil(key_ct / parts))).tolist()
126
+ # divisions.append(ct - 1)
127
+ divisions.append(key_ct)
128
+ return df.set_index(
129
+ _FUGUE_DASK_TEMP_IDX_COLUMN, drop=True, sort=True, divisions=divisions
130
+ )
131
131
 
132
132
 
133
133
  def _add_group_index(
134
- df: dd.DataFrame, cols: List[str], shuffle: bool, seed: Any = None
135
- ) -> Tuple[dd.DataFrame, int]:
134
+ df: dd.DataFrame, cols: List[str], shuffle: bool, num: int, seed: Any = None
135
+ ) -> dd.DataFrame:
136
136
  keys = df[cols].drop_duplicates().compute()
137
137
  if shuffle:
138
138
  keys = keys.sample(frac=1, random_state=seed)
@@ -140,12 +140,10 @@ def _add_group_index(
140
140
  **{_FUGUE_DASK_TEMP_IDX_COLUMN: pd.Series(range(len(keys)), dtype=int)}
141
141
  )
142
142
  df = df.merge(dd.from_pandas(keys, npartitions=1), on=cols, broadcast=True)
143
- return df.set_index(_FUGUE_DASK_TEMP_IDX_COLUMN, drop=True), len(keys)
143
+ return _safe_set_index(df, len(keys), num)
144
144
 
145
145
 
146
- def _add_hash_index(
147
- df: dd.DataFrame, num: int, cols: List[str]
148
- ) -> Tuple[dd.DataFrame, int]:
146
+ def _add_hash_index(df: dd.DataFrame, num: int, cols: List[str]) -> dd.DataFrame:
149
147
  if len(cols) == 0:
150
148
  cols = list(df.columns)
151
149
 
@@ -165,13 +163,13 @@ def _add_hash_index(
165
163
  orig_schema = list(df.dtypes.to_dict().items())
166
164
  idf = df.map_partitions(
167
165
  _add_hash, meta=orig_schema + [(_FUGUE_DASK_TEMP_IDX_COLUMN, int)]
168
- ).set_index(_FUGUE_DASK_TEMP_IDX_COLUMN, drop=True)
169
- return idf, num
166
+ )
167
+ return _safe_set_index(idf, num, num)
170
168
 
171
169
 
172
170
  def _add_random_index(
173
171
  df: dd.DataFrame, num: int, seed: Any = None
174
- ) -> Tuple[dd.DataFrame, int]: # pragma: no cover
172
+ ) -> dd.DataFrame: # pragma: no cover
175
173
  def _add_rand(df: pd.DataFrame) -> pd.DataFrame:
176
174
  if len(df) == 0:
177
175
  return df.assign(**{_FUGUE_DASK_TEMP_IDX_COLUMN: pd.Series(dtype=int)})
@@ -184,11 +182,11 @@ def _add_random_index(
184
182
  orig_schema = list(df.dtypes.to_dict().items())
185
183
  idf = df.map_partitions(
186
184
  _add_rand, meta=orig_schema + [(_FUGUE_DASK_TEMP_IDX_COLUMN, int)]
187
- ).set_index(_FUGUE_DASK_TEMP_IDX_COLUMN, drop=True)
188
- return idf, num
185
+ )
186
+ return _safe_set_index(idf, num, num)
189
187
 
190
188
 
191
- def _add_continuous_index(df: dd.DataFrame) -> Tuple[dd.DataFrame, int]:
189
+ def _add_continuous_index(df: dd.DataFrame, num: int) -> dd.DataFrame:
192
190
  def _get_info(
193
191
  df: pd.DataFrame, partition_info: Any
194
192
  ) -> pd.DataFrame: # pragma: no cover
@@ -216,8 +214,7 @@ def _add_continuous_index(df: dd.DataFrame) -> Tuple[dd.DataFrame, int]:
216
214
  idf = df.map_partitions(
217
215
  _add_index, meta=orig_schema + [(_FUGUE_DASK_TEMP_IDX_COLUMN, int)]
218
216
  )
219
- idf = idf.set_index(_FUGUE_DASK_TEMP_IDX_COLUMN, drop=True)
220
- return idf, counts[-1]
217
+ return _safe_set_index(idf, counts[-1], num)
221
218
 
222
219
 
223
220
  class DaskUtils(PandasLikeUtils[dd.DataFrame, dd.Series]):
@@ -255,7 +252,7 @@ class DaskUtils(PandasLikeUtils[dd.DataFrame, dd.Series]):
255
252
  schema: pa.Schema,
256
253
  use_extension_types: bool = True,
257
254
  use_arrow_dtype: bool = False,
258
- **kwargs: Any
255
+ **kwargs: Any,
259
256
  ) -> DataFrame:
260
257
  output_dtypes = to_pandas_dtype(
261
258
  schema,
@@ -268,7 +265,7 @@ class DaskUtils(PandasLikeUtils[dd.DataFrame, dd.Series]):
268
265
  use_extension_types=use_extension_types,
269
266
  use_arrow_dtype=use_arrow_dtype,
270
267
  meta=output_dtypes,
271
- **kwargs
268
+ **kwargs,
272
269
  )
273
270
 
274
271