fugue 0.9.0.dev4__tar.gz → 0.9.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (166) hide show
  1. fugue-0.9.2/PKG-INFO +370 -0
  2. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue/_utils/io.py +14 -2
  3. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue/dataframe/function_wrapper.py +112 -18
  4. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue/extensions/transformer/convert.py +4 -4
  5. fugue-0.9.2/fugue.egg-info/PKG-INFO +370 -0
  6. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue.egg-info/SOURCES.txt +1 -0
  7. fugue-0.9.2/fugue.egg-info/entry_points.txt +11 -0
  8. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue.egg-info/requires.txt +8 -11
  9. fugue-0.9.2/fugue_dask/_dask_sql_wrapper.py +76 -0
  10. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue_dask/_utils.py +9 -5
  11. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue_dask/dataframe.py +1 -1
  12. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue_dask/execution_engine.py +8 -11
  13. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue_duckdb/dataframe.py +5 -5
  14. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue_duckdb/execution_engine.py +1 -1
  15. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue_ibis/execution_engine.py +7 -6
  16. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue_ray/_utils/io.py +23 -16
  17. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue_spark/_utils/convert.py +18 -12
  18. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue_spark/_utils/misc.py +1 -1
  19. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue_test/builtin_suite.py +38 -1
  20. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue_test/execution_suite.py +2 -0
  21. fugue-0.9.2/fugue_version/__init__.py +1 -0
  22. {fugue-0.9.0.dev4 → fugue-0.9.2}/setup.cfg +1 -1
  23. {fugue-0.9.0.dev4 → fugue-0.9.2}/setup.py +10 -10
  24. fugue-0.9.0.dev4/PKG-INFO +0 -308
  25. fugue-0.9.0.dev4/fugue.egg-info/PKG-INFO +0 -308
  26. fugue-0.9.0.dev4/fugue.egg-info/entry_points.txt +0 -12
  27. fugue-0.9.0.dev4/fugue_version/__init__.py +0 -1
  28. {fugue-0.9.0.dev4 → fugue-0.9.2}/LICENSE +0 -0
  29. {fugue-0.9.0.dev4 → fugue-0.9.2}/README.md +0 -0
  30. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue/__init__.py +0 -0
  31. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue/_utils/__init__.py +0 -0
  32. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue/_utils/display.py +0 -0
  33. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue/_utils/exception.py +0 -0
  34. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue/_utils/interfaceless.py +0 -0
  35. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue/_utils/misc.py +0 -0
  36. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue/_utils/registry.py +0 -0
  37. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue/api.py +0 -0
  38. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue/bag/__init__.py +0 -0
  39. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue/bag/array_bag.py +0 -0
  40. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue/bag/bag.py +0 -0
  41. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue/collections/__init__.py +0 -0
  42. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue/collections/partition.py +0 -0
  43. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue/collections/sql.py +0 -0
  44. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue/collections/yielded.py +0 -0
  45. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue/column/__init__.py +0 -0
  46. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue/column/expressions.py +0 -0
  47. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue/column/functions.py +0 -0
  48. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue/column/sql.py +0 -0
  49. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue/constants.py +0 -0
  50. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue/dataframe/__init__.py +0 -0
  51. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue/dataframe/api.py +0 -0
  52. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue/dataframe/array_dataframe.py +0 -0
  53. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue/dataframe/arrow_dataframe.py +0 -0
  54. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue/dataframe/dataframe.py +0 -0
  55. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue/dataframe/dataframe_iterable_dataframe.py +0 -0
  56. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue/dataframe/dataframes.py +0 -0
  57. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue/dataframe/iterable_dataframe.py +0 -0
  58. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue/dataframe/pandas_dataframe.py +0 -0
  59. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue/dataframe/utils.py +0 -0
  60. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue/dataset/__init__.py +0 -0
  61. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue/dataset/api.py +0 -0
  62. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue/dataset/dataset.py +0 -0
  63. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue/dev.py +0 -0
  64. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue/exceptions.py +0 -0
  65. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue/execution/__init__.py +0 -0
  66. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue/execution/api.py +0 -0
  67. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue/execution/execution_engine.py +0 -0
  68. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue/execution/factory.py +0 -0
  69. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue/execution/native_execution_engine.py +0 -0
  70. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue/extensions/__init__.py +0 -0
  71. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue/extensions/_builtins/__init__.py +0 -0
  72. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue/extensions/_builtins/creators.py +0 -0
  73. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue/extensions/_builtins/outputters.py +0 -0
  74. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue/extensions/_builtins/processors.py +0 -0
  75. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue/extensions/_utils.py +0 -0
  76. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue/extensions/context.py +0 -0
  77. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue/extensions/creator/__init__.py +0 -0
  78. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue/extensions/creator/convert.py +0 -0
  79. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue/extensions/creator/creator.py +0 -0
  80. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue/extensions/outputter/__init__.py +0 -0
  81. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue/extensions/outputter/convert.py +0 -0
  82. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue/extensions/outputter/outputter.py +0 -0
  83. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue/extensions/processor/__init__.py +0 -0
  84. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue/extensions/processor/convert.py +0 -0
  85. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue/extensions/processor/processor.py +0 -0
  86. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue/extensions/transformer/__init__.py +0 -0
  87. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue/extensions/transformer/constants.py +0 -0
  88. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue/extensions/transformer/transformer.py +0 -0
  89. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue/plugins.py +0 -0
  90. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue/py.typed +0 -0
  91. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue/registry.py +0 -0
  92. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue/rpc/__init__.py +0 -0
  93. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue/rpc/base.py +0 -0
  94. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue/rpc/flask.py +0 -0
  95. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue/sql/__init__.py +0 -0
  96. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue/sql/_utils.py +0 -0
  97. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue/sql/_visitors.py +0 -0
  98. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue/sql/api.py +0 -0
  99. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue/sql/workflow.py +0 -0
  100. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue/test/__init__.py +0 -0
  101. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue/test/pandas_tester.py +0 -0
  102. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue/test/plugins.py +0 -0
  103. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue/workflow/__init__.py +0 -0
  104. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue/workflow/_checkpoint.py +0 -0
  105. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue/workflow/_tasks.py +0 -0
  106. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue/workflow/_workflow_context.py +0 -0
  107. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue/workflow/api.py +0 -0
  108. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue/workflow/input.py +0 -0
  109. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue/workflow/module.py +0 -0
  110. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue/workflow/workflow.py +0 -0
  111. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue.egg-info/dependency_links.txt +0 -0
  112. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue.egg-info/top_level.txt +0 -0
  113. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue_contrib/__init__.py +0 -0
  114. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue_contrib/contrib.py +0 -0
  115. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue_contrib/seaborn/__init__.py +0 -0
  116. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue_contrib/viz/__init__.py +0 -0
  117. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue_contrib/viz/_ext.py +0 -0
  118. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue_dask/__init__.py +0 -0
  119. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue_dask/_constants.py +0 -0
  120. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue_dask/_io.py +0 -0
  121. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue_dask/registry.py +0 -0
  122. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue_dask/tester.py +0 -0
  123. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue_duckdb/__init__.py +0 -0
  124. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue_duckdb/_io.py +0 -0
  125. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue_duckdb/_utils.py +0 -0
  126. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue_duckdb/dask.py +0 -0
  127. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue_duckdb/registry.py +0 -0
  128. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue_duckdb/tester.py +0 -0
  129. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue_ibis/__init__.py +0 -0
  130. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue_ibis/_compat.py +0 -0
  131. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue_ibis/_utils.py +0 -0
  132. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue_ibis/dataframe.py +0 -0
  133. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue_notebook/__init__.py +0 -0
  134. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue_notebook/env.py +0 -0
  135. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue_notebook/nbextension/README.md +0 -0
  136. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue_notebook/nbextension/__init__.py +0 -0
  137. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue_notebook/nbextension/description.yaml +0 -0
  138. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue_notebook/nbextension/main.js +0 -0
  139. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue_polars/__init__.py +0 -0
  140. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue_polars/_utils.py +0 -0
  141. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue_polars/polars_dataframe.py +0 -0
  142. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue_polars/registry.py +0 -0
  143. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue_ray/__init__.py +0 -0
  144. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue_ray/_constants.py +0 -0
  145. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue_ray/_utils/__init__.py +0 -0
  146. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue_ray/_utils/cluster.py +0 -0
  147. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue_ray/_utils/dataframe.py +0 -0
  148. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue_ray/dataframe.py +0 -0
  149. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue_ray/execution_engine.py +0 -0
  150. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue_ray/registry.py +0 -0
  151. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue_ray/tester.py +0 -0
  152. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue_spark/__init__.py +0 -0
  153. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue_spark/_constants.py +0 -0
  154. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue_spark/_utils/__init__.py +0 -0
  155. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue_spark/_utils/io.py +0 -0
  156. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue_spark/_utils/partition.py +0 -0
  157. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue_spark/dataframe.py +0 -0
  158. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue_spark/execution_engine.py +0 -0
  159. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue_spark/registry.py +0 -0
  160. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue_spark/tester.py +0 -0
  161. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue_sql/__init__.py +0 -0
  162. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue_sql/exceptions.py +0 -0
  163. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue_test/__init__.py +0 -0
  164. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue_test/bag_suite.py +0 -0
  165. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue_test/dataframe_suite.py +0 -0
  166. {fugue-0.9.0.dev4 → fugue-0.9.2}/fugue_test/fixtures.py +0 -0
fugue-0.9.2/PKG-INFO ADDED
@@ -0,0 +1,370 @@
1
+ Metadata-Version: 2.4
2
+ Name: fugue
3
+ Version: 0.9.2
4
+ Summary: An abstraction layer for distributed computation
5
+ Home-page: http://github.com/fugue-project/fugue
6
+ Author: The Fugue Development Team
7
+ Author-email: hello@fugue.ai
8
+ License: Apache-2.0
9
+ Keywords: distributed spark dask ray sql dsl domain specific language
10
+ Classifier: Development Status :: 5 - Production/Stable
11
+ Classifier: Intended Audience :: Developers
12
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
13
+ Classifier: License :: OSI Approved :: Apache Software License
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.8
16
+ Classifier: Programming Language :: Python :: 3.9
17
+ Classifier: Programming Language :: Python :: 3.10
18
+ Classifier: Programming Language :: Python :: 3.11
19
+ Classifier: Programming Language :: Python :: 3.12
20
+ Classifier: Programming Language :: Python :: 3 :: Only
21
+ Requires-Python: >=3.8
22
+ Description-Content-Type: text/markdown
23
+ License-File: LICENSE
24
+ Requires-Dist: triad>=1.0.0
25
+ Requires-Dist: adagio>=0.2.6
26
+ Provides-Extra: sql
27
+ Requires-Dist: qpd>=0.4.4; extra == "sql"
28
+ Requires-Dist: fugue-sql-antlr>=0.2.0; extra == "sql"
29
+ Requires-Dist: sqlglot; extra == "sql"
30
+ Requires-Dist: jinja2; extra == "sql"
31
+ Provides-Extra: cpp-sql-parser
32
+ Requires-Dist: fugue-sql-antlr[cpp]>=0.2.0; extra == "cpp-sql-parser"
33
+ Provides-Extra: spark
34
+ Requires-Dist: pyspark>=3.1.1; extra == "spark"
35
+ Provides-Extra: dask
36
+ Requires-Dist: dask[dataframe,distributed]>=2024.4.0; extra == "dask"
37
+ Requires-Dist: pyarrow>=7.0.0; extra == "dask"
38
+ Requires-Dist: pandas>=2.0.2; extra == "dask"
39
+ Provides-Extra: ray
40
+ Requires-Dist: ray[data]>=2.30.0; extra == "ray"
41
+ Requires-Dist: duckdb>=0.5.0; extra == "ray"
42
+ Requires-Dist: pyarrow>=7.0.0; extra == "ray"
43
+ Requires-Dist: pandas<2.2; extra == "ray"
44
+ Provides-Extra: duckdb
45
+ Requires-Dist: qpd>=0.4.4; extra == "duckdb"
46
+ Requires-Dist: fugue-sql-antlr>=0.2.0; extra == "duckdb"
47
+ Requires-Dist: sqlglot; extra == "duckdb"
48
+ Requires-Dist: jinja2; extra == "duckdb"
49
+ Requires-Dist: duckdb>=0.5.0; extra == "duckdb"
50
+ Requires-Dist: numpy; extra == "duckdb"
51
+ Provides-Extra: polars
52
+ Requires-Dist: polars; extra == "polars"
53
+ Provides-Extra: ibis
54
+ Requires-Dist: qpd>=0.4.4; extra == "ibis"
55
+ Requires-Dist: fugue-sql-antlr>=0.2.0; extra == "ibis"
56
+ Requires-Dist: sqlglot; extra == "ibis"
57
+ Requires-Dist: jinja2; extra == "ibis"
58
+ Requires-Dist: ibis-framework[pandas]; extra == "ibis"
59
+ Requires-Dist: pandas<2.2; extra == "ibis"
60
+ Provides-Extra: notebook
61
+ Requires-Dist: notebook; extra == "notebook"
62
+ Requires-Dist: jupyterlab; extra == "notebook"
63
+ Requires-Dist: ipython>=7.10.0; extra == "notebook"
64
+ Provides-Extra: all
65
+ Requires-Dist: qpd>=0.4.4; extra == "all"
66
+ Requires-Dist: fugue-sql-antlr>=0.2.0; extra == "all"
67
+ Requires-Dist: sqlglot; extra == "all"
68
+ Requires-Dist: jinja2; extra == "all"
69
+ Requires-Dist: pyspark>=3.1.1; extra == "all"
70
+ Requires-Dist: dask[dataframe,distributed]>=2024.4.0; extra == "all"
71
+ Requires-Dist: dask-sql; extra == "all"
72
+ Requires-Dist: ray[data]>=2.30.0; extra == "all"
73
+ Requires-Dist: notebook; extra == "all"
74
+ Requires-Dist: jupyterlab; extra == "all"
75
+ Requires-Dist: ipython>=7.10.0; extra == "all"
76
+ Requires-Dist: duckdb>=0.5.0; extra == "all"
77
+ Requires-Dist: pyarrow>=6.0.1; extra == "all"
78
+ Requires-Dist: pandas<2.2,>=2.0.2; extra == "all"
79
+ Requires-Dist: ibis-framework[duckdb,pandas]; extra == "all"
80
+ Requires-Dist: polars; extra == "all"
81
+ Dynamic: author
82
+ Dynamic: author-email
83
+ Dynamic: classifier
84
+ Dynamic: description
85
+ Dynamic: description-content-type
86
+ Dynamic: home-page
87
+ Dynamic: keywords
88
+ Dynamic: license
89
+ Dynamic: license-file
90
+ Dynamic: provides-extra
91
+ Dynamic: requires-dist
92
+ Dynamic: requires-python
93
+ Dynamic: summary
94
+
95
+ # Fugue
96
+
97
+ [![PyPI version](https://badge.fury.io/py/fugue.svg)](https://pypi.python.org/pypi/fugue/)
98
+ [![PyPI pyversions](https://img.shields.io/pypi/pyversions/fugue.svg)](https://pypi.python.org/pypi/fugue/)
99
+ [![PyPI license](https://img.shields.io/pypi/l/fugue.svg)](https://pypi.python.org/pypi/fugue/)
100
+ [![codecov](https://codecov.io/gh/fugue-project/fugue/branch/master/graph/badge.svg?token=ZO9YD5N3IA)](https://codecov.io/gh/fugue-project/fugue)
101
+ [![Codacy Badge](https://app.codacy.com/project/badge/Grade/4fa5f2f53e6f48aaa1218a89f4808b91)](https://www.codacy.com/gh/fugue-project/fugue/dashboard?utm_source=github.com&utm_medium=referral&utm_content=fugue-project/fugue&utm_campaign=Badge_Grade)
102
+ [![Downloads](https://static.pepy.tech/badge/fugue)](https://pepy.tech/project/fugue)
103
+
104
+ | Tutorials | API Documentation | Chat with us on slack! |
105
+ | --------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------ |
106
+ | [![Jupyter Book Badge](https://jupyterbook.org/badge.svg)](https://fugue-tutorials.readthedocs.io/) | [![Doc](https://readthedocs.org/projects/fugue/badge)](https://fugue.readthedocs.org) | [![Slack Status](https://img.shields.io/badge/slack-join_chat-white.svg?logo=slack&style=social)](http://slack.fugue.ai) |
107
+
108
+
109
+ **Fugue is a unified interface for distributed computing that lets users execute Python, Pandas, and SQL code on Spark, Dask, and Ray with minimal rewrites**.
110
+
111
+ Fugue is most commonly used for:
112
+
113
+ * **Parallelizing or scaling existing Python and Pandas code** by bringing it to Spark, Dask, or Ray with minimal rewrites.
114
+ * Using [FugueSQL](https://fugue-tutorials.readthedocs.io/tutorials/quick_look/ten_minutes_sql.html) to **define end-to-end workflows** on top of Pandas, Spark, and Dask DataFrames. FugueSQL is an enhanced SQL interface that can invoke Python code.
115
+
116
+ To see how Fugue compares to other frameworks like dbt, Arrow, Ibis, PySpark Pandas, see the [comparisons](https://fugue-tutorials.readthedocs.io/#how-does-fugue-compare-to)
117
+
118
+ ## [Fugue API](https://fugue-tutorials.readthedocs.io/tutorials/quick_look/ten_minutes.html)
119
+
120
+ The Fugue API is a collection of functions that are capable of running on Pandas, Spark, Dask, and Ray. The simplest way to use Fugue is the [`transform()` function](https://fugue-tutorials.readthedocs.io/tutorials/beginner/transform.html). This lets users parallelize the execution of a single function by bringing it to Spark, Dask, or Ray. In the example below, the `map_letter_to_food()` function takes in a mapping and applies it on a column. This is just Pandas and Python so far (without Fugue).
121
+
122
+ ```python
123
+ import pandas as pd
124
+ from typing import Dict
125
+
126
+ input_df = pd.DataFrame({"id":[0,1,2], "value": (["A", "B", "C"])})
127
+ map_dict = {"A": "Apple", "B": "Banana", "C": "Carrot"}
128
+
129
+ def map_letter_to_food(df: pd.DataFrame, mapping: Dict[str, str]) -> pd.DataFrame:
130
+ df["value"] = df["value"].map(mapping)
131
+ return df
132
+ ```
133
+
134
+ Now, the `map_letter_to_food()` function is brought to the Spark execution engine by invoking the `transform()` function of Fugue. The output `schema` and `params` are passed to the `transform()` call. The `schema` is needed because it's a requirement for distributed frameworks. A schema of `"*"` below means all input columns are in the output.
135
+
136
+ ```python
137
+ from pyspark.sql import SparkSession
138
+ from fugue import transform
139
+
140
+ spark = SparkSession.builder.getOrCreate()
141
+ sdf = spark.createDataFrame(input_df)
142
+
143
+ out = transform(sdf,
144
+ map_letter_to_food,
145
+ schema="*",
146
+ params=dict(mapping=map_dict),
147
+ )
148
+ # out is a Spark DataFrame
149
+ out.show()
150
+ ```
151
+ ```rst
152
+ +---+------+
153
+ | id| value|
154
+ +---+------+
155
+ | 0| Apple|
156
+ | 1|Banana|
157
+ | 2|Carrot|
158
+ +---+------+
159
+ ```
160
+
161
+ <details>
162
+ <summary>PySpark equivalent of Fugue transform()</summary>
163
+
164
+ ```python
165
+ from typing import Iterator, Union
166
+ from pyspark.sql.types import StructType
167
+ from pyspark.sql import DataFrame, SparkSession
168
+
169
+ spark_session = SparkSession.builder.getOrCreate()
170
+
171
+ def mapping_wrapper(dfs: Iterator[pd.DataFrame], mapping):
172
+ for df in dfs:
173
+ yield map_letter_to_food(df, mapping)
174
+
175
+ def run_map_letter_to_food(input_df: Union[DataFrame, pd.DataFrame], mapping):
176
+ # conversion
177
+ if isinstance(input_df, pd.DataFrame):
178
+ sdf = spark_session.createDataFrame(input_df.copy())
179
+ else:
180
+ sdf = input_df.copy()
181
+
182
+ schema = StructType(list(sdf.schema.fields))
183
+ return sdf.mapInPandas(lambda dfs: mapping_wrapper(dfs, mapping),
184
+ schema=schema)
185
+
186
+ result = run_map_letter_to_food(input_df, map_dict)
187
+ result.show()
188
+ ```
189
+ </details>
190
+
191
+ This syntax is simpler, cleaner, and more maintainable than the PySpark equivalent. At the same time, no edits were made to the original Pandas-based function to bring it to Spark. It is still usable on Pandas DataFrames. Fugue `transform()` also supports Dask and Ray as execution engines alongside the default Pandas-based engine.
192
+
193
+ The Fugue API has a broader collection of functions that are also compatible with Spark, Dask, and Ray. For example, we can use `load()` and `save()` to create an end-to-end workflow compatible with Spark, Dask, and Ray. For the full list of functions, see the [Top Level API](https://fugue.readthedocs.io/en/latest/top_api.html)
194
+
195
+ ```python
196
+ import fugue.api as fa
197
+
198
+ def run(engine=None):
199
+ with fa.engine_context(engine):
200
+ df = fa.load("/path/to/file.parquet")
201
+ out = fa.transform(df, map_letter_to_food, schema="*")
202
+ fa.save(out, "/path/to/output_file.parquet")
203
+
204
+ run() # runs on Pandas
205
+ run(engine="spark") # runs on Spark
206
+ run(engine="dask") # runs on Dask
207
+ ```
208
+
209
+ All functions underneath the context will run on the specified backend. This makes it easy to toggle between local execution, and distributed execution.
210
+
211
+ ## [FugueSQL](https://fugue-tutorials.readthedocs.io/tutorials/fugue_sql/index.html)
212
+
213
+ FugueSQL is a SQL-based language capable of expressing end-to-end data workflows on top of Pandas, Spark, and Dask. The `map_letter_to_food()` function above is used in the SQL expression below. This is how to use a Python-defined function along with the standard SQL `SELECT` statement.
214
+
215
+ ```python
216
+ from fugue.api import fugue_sql
217
+ import json
218
+
219
+ query = """
220
+ SELECT id, value
221
+ FROM input_df
222
+ TRANSFORM USING map_letter_to_food(mapping={{mapping}}) SCHEMA *
223
+ """
224
+ map_dict_str = json.dumps(map_dict)
225
+
226
+ # returns Pandas DataFrame
227
+ fugue_sql(query,mapping=map_dict_str)
228
+
229
+ # returns Spark DataFrame
230
+ fugue_sql(query, mapping=map_dict_str, engine="spark")
231
+ ```
232
+
233
+ ## Installation
234
+
235
+ Fugue can be installed through pip or conda. For example:
236
+
237
+ ```bash
238
+ pip install fugue
239
+ ```
240
+
241
+ In order to use Fugue SQL, it is strongly recommended to install the `sql` extra:
242
+
243
+ ```bash
244
+ pip install fugue[sql]
245
+ ```
246
+
247
+ It also has the following installation extras:
248
+
249
+ * **sql**: to support Fugue SQL. Without this extra, the non-SQL part still works. Before Fugue 0.9.0, this extra is included in Fugue's core dependency so you don't need to install explicitly. **But for 0,9.0+, this becomes required if you want to use Fugue SQL.**
250
+ * **spark**: to support Spark as the [ExecutionEngine](https://fugue-tutorials.readthedocs.io/tutorials/advanced/execution_engine.html).
251
+ * **dask**: to support Dask as the ExecutionEngine.
252
+ * **ray**: to support Ray as the ExecutionEngine.
253
+ * **duckdb**: to support DuckDB as the ExecutionEngine, read [details](https://fugue-tutorials.readthedocs.io/tutorials/integrations/backends/duckdb.html).
254
+ * **polars**: to support Polars DataFrames and extensions using Polars.
255
+ * **ibis**: to enable Ibis for Fugue workflows, read [details](https://fugue-tutorials.readthedocs.io/tutorials/integrations/backends/ibis.html).
256
+ * **cpp_sql_parser**: to enable the CPP antlr parser for Fugue SQL. It can be 50+ times faster than the pure Python parser. For the main Python versions and platforms, there is already pre-built binaries, but for the remaining, it needs a C++ compiler to build on the fly.
257
+
258
+ For example a common use case is:
259
+
260
+ ```bash
261
+ pip install "fugue[duckdb,spark]"
262
+ ```
263
+
264
+ Note if you already installed Spark or DuckDB independently, Fugue is able to automatically use them without installing the extras.
265
+
266
+ ## [Getting Started](https://fugue-tutorials.readthedocs.io/)
267
+
268
+ The best way to get started with Fugue is to work through the 10 minute tutorials:
269
+
270
+ * [Fugue API in 10 minutes](https://fugue-tutorials.readthedocs.io/tutorials/quick_look/ten_minutes.html)
271
+ * [FugueSQL in 10 minutes](https://fugue-tutorials.readthedocs.io/tutorials/quick_look/ten_minutes_sql.html)
272
+
273
+ For the top level API, see:
274
+
275
+ * [Fugue Top Level API](https://fugue.readthedocs.io/en/latest/top_api.html)
276
+
277
+ The [tutorials](https://fugue-tutorials.readthedocs.io/) can also be run in an interactive notebook environment through binder or Docker:
278
+
279
+ ### Using binder
280
+
281
+ [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/fugue-project/tutorials/master)
282
+
283
+ **Note it runs slow on binder** because the machine on binder isn't powerful enough for a distributed framework such as Spark. Parallel executions can become sequential, so some of the performance comparison examples will not give you the correct numbers.
284
+
285
+ ### Using Docker
286
+
287
+ Alternatively, you should get decent performance by running this Docker image on your own machine:
288
+
289
+ ```bash
290
+ docker run -p 8888:8888 fugueproject/tutorials:latest
291
+ ```
292
+
293
+
294
+ ## Jupyter Notebook Extension
295
+
296
+ There is an accompanying [notebook extension](https://pypi.org/project/fugue-jupyter/) for FugueSQL that lets users use the `%%fsql` cell magic. The extension also provides syntax highlighting for FugueSQL cells. It works for both classic notebook and Jupyter Lab. More details can be found in the [installation instructions](https://github.com/fugue-project/fugue-jupyter#install).
297
+
298
+ ![FugueSQL gif](https://miro.medium.com/max/700/1*6091-RcrOPyifJTLjo0anA.gif)
299
+
300
+
301
+ ## Ecosystem
302
+
303
+ By being an abstraction layer, Fugue can be used with a lot of other open-source projects seamlessly.
304
+
305
+ Python backends:
306
+
307
+ * [Pandas](https://github.com/pandas-dev/pandas)
308
+ * [Polars](https://www.pola.rs) (DataFrames only)
309
+ * [Spark](https://github.com/apache/spark)
310
+ * [Dask](https://github.com/dask/dask)
311
+ * [Ray](http://github.com/ray-project/ray)
312
+ * [Ibis](https://github.com/ibis-project/ibis/)
313
+
314
+ FugueSQL backends:
315
+
316
+ * Pandas - FugueSQL can run on Pandas
317
+ * [Duckdb](https://github.com/duckdb/duckdb) - in-process SQL OLAP database management
318
+ * [dask-sql](https://github.com/dask-contrib/dask-sql) - SQL interface for Dask
319
+ * SparkSQL
320
+ * [BigQuery](https://fugue-tutorials.readthedocs.io/tutorials/integrations/warehouses/bigquery.html)
321
+ * Trino
322
+
323
+
324
+ Fugue is available as a backend or can integrate with the following projects:
325
+
326
+ * [WhyLogs](https://whylogs.readthedocs.io/en/latest/examples/integrations/Fugue_Profiling.html?highlight=fugue) - data profiling
327
+ * [PyCaret](https://fugue-tutorials.readthedocs.io/tutorials/integrations/ecosystem/pycaret.html) - low code machine learning
328
+ * [Nixtla](https://fugue-tutorials.readthedocs.io/tutorials/integrations/ecosystem/nixtla.html) - timeseries modelling
329
+ * [Prefect](https://fugue-tutorials.readthedocs.io/tutorials/integrations/ecosystem/prefect.html) - workflow orchestration
330
+ * [Pandera](https://fugue-tutorials.readthedocs.io/tutorials/integrations/ecosystem/pandera.html) - data validation
331
+ * [Datacompy (by Capital One)](https://fugue-tutorials.readthedocs.io/tutorials/integrations/ecosystem/datacompy.html) - comparing DataFrames
332
+
333
+ Registered 3rd party extensions (majorly for Fugue SQL) include:
334
+
335
+ * [Pandas plot](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.plot.html) - visualize data using matplotlib or plotly
336
+ * [Seaborn](https://seaborn.pydata.org/api.html) - visualize data using seaborn
337
+ * [WhyLogs](https://whylogs.readthedocs.io/en/latest/examples/integrations/Fugue_Profiling.html?highlight=fugue) - visualize data profiling
338
+ * [Vizzu](https://github.com/vizzuhq/ipyvizzu) - visualize data using ipyvizzu
339
+
340
+ ## Community and Contributing
341
+
342
+ Feel free to message us on [Slack](http://slack.fugue.ai). We also have [contributing instructions](CONTRIBUTING.md).
343
+
344
+ ### Case Studies
345
+
346
+ * [How LyftLearn Democratizes Distributed Compute through Kubernetes Spark and Fugue](https://eng.lyft.com/how-lyftlearn-democratizes-distributed-compute-through-kubernetes-spark-and-fugue-c0875b97c3d9)
347
+ * [Clobotics - Large Scale Image Processing with Spark through Fugue](https://medium.com/fugue-project/large-scale-image-processing-with-spark-through-fugue-e510b9813da8)
348
+ * [Architecture for a data lake REST API using Delta Lake, Fugue & Spark (article by bitsofinfo)](https://bitsofinfo.wordpress.com/2023/08/14/data-lake-rest-api-delta-lake-fugue-spark)
349
+
350
+ ### Mentioned Uses
351
+
352
+ * [Productionizing Data Science at Interos, Inc. (LinkedIn post by Anthony Holten)](https://www.linkedin.com/posts/anthony-holten_pandas-spark-dask-activity-7022628193983459328-QvcF)
353
+ * [Multiple Time Series Forecasting with Fugue & Nixtla at Bain & Company (LinkedIn post by Fahad Akbar)](https://www.linkedin.com/posts/fahadakbar_fugue-datascience-forecasting-activity-7041119034813124608-u08q?utm_source=share&utm_medium=member_desktop)
354
+
355
+ ## Further Resources
356
+
357
+ View some of our latest conferences presentations and content. For a more complete list, check the [Content](https://fugue-tutorials.readthedocs.io/tutorials/resources/content.html) page in the tutorials.
358
+
359
+ ### Blogs
360
+
361
+ * [Why Pandas-like Interfaces are Sub-optimal for Distributed Computing](https://towardsdatascience.com/why-pandas-like-interfaces-are-sub-optimal-for-distributed-computing-322dacbce43)
362
+ * [Introducing FugueSQL — SQL for Pandas, Spark, and Dask DataFrames (Towards Data Science by Khuyen Tran)](https://towardsdatascience.com/introducing-fuguesql-sql-for-pandas-spark-and-dask-dataframes-63d461a16b27)
363
+
364
+ ### Conferences
365
+
366
+ * [Distributed Machine Learning at Lyft](https://www.youtube.com/watch?v=_IVyIOV0LgY)
367
+ * [Comparing the Different Ways to Scale Python and Pandas Code](https://www.youtube.com/watch?v=b3ae0m_XTys)
368
+ * [Large Scale Data Validation with Spark and Dask (PyCon US)](https://www.youtube.com/watch?v=2AdvBgjO_3Q)
369
+ * [FugueSQL - The Enhanced SQL Interface for Pandas, Spark, and Dask DataFrames (PyData Global)](https://www.youtube.com/watch?v=OBpnGYjNBBI)
370
+ * [Distributed Hybrid Parameter Tuning](https://www.youtube.com/watch?v=_GBjqskD8Qk)
@@ -20,6 +20,10 @@ class FileParser(object):
20
20
  self._has_glob = "*" in path or "?" in path
21
21
  self._raw_path = path
22
22
  self._fs, self._fs_path = url_to_fs(path)
23
+ if not self._has_glob and self._fs.isdir(self._fs_path):
24
+ self._is_dir = True
25
+ else:
26
+ self._is_dir = False
23
27
  if not self.is_local:
24
28
  self._path = self._fs.unstrip_protocol(self._fs_path)
25
29
  else:
@@ -43,11 +47,15 @@ class FileParser(object):
43
47
  return self
44
48
 
45
49
  @property
46
- def has_glob(self):
50
+ def is_dir(self) -> bool:
51
+ return self._is_dir
52
+
53
+ @property
54
+ def has_glob(self) -> bool:
47
55
  return self._has_glob
48
56
 
49
57
  @property
50
- def is_local(self):
58
+ def is_local(self) -> bool:
51
59
  return isinstance(self._fs, LocalFileSystem)
52
60
 
53
61
  def join(self, path: str, format_hint: Optional[str] = None) -> "FileParser":
@@ -65,6 +73,10 @@ class FileParser(object):
65
73
  def path(self) -> str:
66
74
  return self._path
67
75
 
76
+ def as_dir_path(self) -> str:
77
+ assert_or_throw(self.is_dir, f"{self.raw_path} is not a directory")
78
+ return self.path + self._fs.sep
79
+
68
80
  @property
69
81
  def raw_path(self) -> str:
70
82
  return self._raw_path
@@ -20,6 +20,7 @@ from triad.collections.function_wrapper import (
20
20
  PositionalParam,
21
21
  function_wrapper,
22
22
  )
23
+ from triad.utils.convert import compare_annotations
23
24
  from triad.utils.iter import EmptyAwareIterable, make_empty_aware
24
25
 
25
26
  from ..constants import FUGUE_ENTRYPOINT
@@ -37,6 +38,14 @@ from .iterable_dataframe import IterableDataFrame
37
38
  from .pandas_dataframe import PandasDataFrame
38
39
 
39
40
 
41
+ def _compare_iter(tp: Any) -> Any:
42
+ return lambda x: compare_annotations(
43
+ x, Iterable[tp] # type:ignore
44
+ ) or compare_annotations(
45
+ x, Iterator[tp] # type:ignore
46
+ )
47
+
48
+
40
49
  @function_wrapper(FUGUE_ENTRYPOINT)
41
50
  class DataFrameFunctionWrapper(FunctionWrapper):
42
51
  @property
@@ -71,6 +80,7 @@ class DataFrameFunctionWrapper(FunctionWrapper):
71
80
  p.update(kwargs)
72
81
  has_kw = False
73
82
  rargs: Dict[str, Any] = {}
83
+ row_param_info: Any = None
74
84
  for k, v in self._params.items():
75
85
  if isinstance(v, (PositionalParam, KeywordParam)):
76
86
  if isinstance(v, KeywordParam):
@@ -81,7 +91,16 @@ class DataFrameFunctionWrapper(FunctionWrapper):
81
91
  isinstance(p[k], DataFrame),
82
92
  lambda: TypeError(f"{p[k]} is not a DataFrame"),
83
93
  )
84
- rargs[k] = v.to_input_data(p[k], ctx=ctx)
94
+ if v.is_per_row: # pragma: no cover
95
+ # TODO: this branch is used only if row annotations
96
+ # are allowed as input
97
+ assert_or_throw(
98
+ row_param_info is None,
99
+ lambda: ValueError("only one row parameter is allowed"),
100
+ )
101
+ row_param_info = (k, v, p[k])
102
+ else:
103
+ rargs[k] = v.to_input_data(p[k], ctx=ctx)
85
104
  else:
86
105
  rargs[k] = p[k] # TODO: should we do auto type conversion?
87
106
  del p[k]
@@ -91,12 +110,40 @@ class DataFrameFunctionWrapper(FunctionWrapper):
91
110
  rargs.update(p)
92
111
  elif not ignore_unknown and len(p) > 0:
93
112
  raise ValueError(f"{p} are not acceptable parameters")
113
+ if row_param_info is None:
114
+ return self._run_func(rargs, output, output_schema, ctx, raw=False)
115
+ else: # pragma: no cover
116
+ # input contains row parameter
117
+ # TODO: this branch is used only if row annotations are allowed as input
118
+
119
+ def _dfs() -> Iterable[Any]:
120
+ k, v, df = row_param_info
121
+ for row in v.to_input_rows(df, ctx):
122
+ rargs[k] = None
123
+ _rargs = rargs.copy()
124
+ _rargs[k] = row
125
+ yield self._run_func(_rargs, output, output_schema, ctx, raw=True)
126
+
127
+ if not output:
128
+ sum(1 for _ in _dfs())
129
+ return
130
+ else:
131
+ return self._rt.iterable_to_output_df(_dfs(), output_schema, ctx)
132
+
133
+ def _run_func(
134
+ self,
135
+ rargs: Dict[str, Any],
136
+ output: bool,
137
+ output_schema: Any,
138
+ ctx: Any,
139
+ raw: bool,
140
+ ) -> Any:
94
141
  rt = self._func(**rargs)
95
142
  if not output:
96
143
  if isinstance(self._rt, _DataFrameParamBase):
97
144
  self._rt.count(rt)
98
145
  return
99
- if isinstance(self._rt, _DataFrameParamBase):
146
+ if not raw and isinstance(self._rt, _DataFrameParamBase):
100
147
  return self._rt.to_output_df(rt, output_schema, ctx=ctx)
101
148
  return rt
102
149
 
@@ -111,6 +158,7 @@ fugue_annotated_param = DataFrameFunctionWrapper.annotated_param
111
158
  annotation == Callable
112
159
  or annotation == callable # pylint: disable=comparison-with-callable
113
160
  or str(annotation).startswith("typing.Callable")
161
+ or str(annotation).startswith("collections.abc.Callable")
114
162
  ),
115
163
  )
116
164
  class _CallableParam(AnnotatedParam):
@@ -125,6 +173,9 @@ class _CallableParam(AnnotatedParam):
125
173
  or annotation == Optional[callable]
126
174
  or str(annotation).startswith("typing.Union[typing.Callable") # 3.8-
127
175
  or str(annotation).startswith("typing.Optional[typing.Callable") # 3.9+
176
+ or str(annotation).startswith(
177
+ "typing.Optional[collections.abc.Callable]"
178
+ ) # 3.9+
128
179
  ),
129
180
  )
130
181
  class _OptionalCallableParam(AnnotatedParam):
@@ -136,14 +187,30 @@ class _DataFrameParamBase(AnnotatedParam):
136
187
  super().__init__(param)
137
188
  assert_or_throw(self.required, lambda: TypeError(f"{self} must be required"))
138
189
 
190
+ @property
191
+ def is_per_row(self) -> bool:
192
+ return False
193
+
139
194
  def to_input_data(self, df: DataFrame, ctx: Any) -> Any: # pragma: no cover
140
195
  raise NotImplementedError
141
196
 
197
+ def to_input_rows(
198
+ self,
199
+ df: DataFrame,
200
+ ctx: Any,
201
+ ) -> Iterable[Any]:
202
+ raise NotImplementedError # pragma: no cover
203
+
142
204
  def to_output_df(
143
205
  self, df: Any, schema: Any, ctx: Any
144
206
  ) -> DataFrame: # pragma: no cover
145
207
  raise NotImplementedError
146
208
 
209
+ def iterable_to_output_df(
210
+ self, dfs: Iterable[Any], schema: Any, ctx: Any
211
+ ) -> DataFrame: # pragma: no cover
212
+ raise NotImplementedError
213
+
147
214
  def count(self, df: Any) -> int: # pragma: no cover
148
215
  raise NotImplementedError
149
216
 
@@ -173,6 +240,36 @@ class DataFrameParam(_DataFrameParamBase):
173
240
  return sum(1 for _ in df.as_array_iterable())
174
241
 
175
242
 
243
+ @fugue_annotated_param(DataFrame, "r", child_can_reuse_code=True)
244
+ class RowParam(_DataFrameParamBase): # pragma: no cover
245
+ # TODO: this class is used only if row annotations are allowed as input
246
+ @property
247
+ def is_per_row(self) -> bool:
248
+ return True
249
+
250
+ def count(self, df: Any) -> int:
251
+ return 1
252
+
253
+
254
+ @fugue_annotated_param(Dict[str, Any])
255
+ class DictParam(RowParam): # pragma: no cover
256
+ # TODO: this class is used only if row annotations are allowed as input
257
+ def to_input_rows(self, df: DataFrame, ctx: Any) -> Iterable[Any]:
258
+ yield from df.as_dict_iterable()
259
+
260
+ def to_output_df(self, output: Dict[str, Any], schema: Any, ctx: Any) -> DataFrame:
261
+ return ArrayDataFrame([list(output.values())], schema)
262
+
263
+ def iterable_to_output_df(
264
+ self, dfs: Iterable[Dict[str, Any]], schema: Any, ctx: Any
265
+ ) -> DataFrame: # pragma: no cover
266
+ params: Dict[str, Any] = {}
267
+ if schema is not None:
268
+ params["schema"] = Schema(schema).pa_schema
269
+ adf = pa.Table.from_pylist(list(dfs), **params)
270
+ return ArrowDataFrame(adf)
271
+
272
+
176
273
  @fugue_annotated_param(AnyDataFrame)
177
274
  class _AnyDataFrameParam(DataFrameParam):
178
275
  def to_output_df(self, output: AnyDataFrame, schema: Any, ctx: Any) -> DataFrame:
@@ -198,6 +295,15 @@ class LocalDataFrameParam(DataFrameParam):
198
295
  )
199
296
  return output
200
297
 
298
+ def iterable_to_output_df(
299
+ self, dfs: Iterable[Any], schema: Any, ctx: Any
300
+ ) -> DataFrame: # pragma: no cover
301
+ def _dfs() -> Iterable[DataFrame]:
302
+ for df in dfs:
303
+ yield self.to_output_df(df, schema, ctx)
304
+
305
+ return LocalDataFrameIterableDataFrame(_dfs(), schema=schema)
306
+
201
307
  def count(self, df: LocalDataFrame) -> int:
202
308
  if df.is_bounded:
203
309
  return df.count()
@@ -228,10 +334,7 @@ class _ListListParam(_LocalNoSchemaDataFrameParam):
228
334
  return len(df)
229
335
 
230
336
 
231
- @fugue_annotated_param(
232
- Iterable[List[Any]],
233
- matcher=lambda x: x == Iterable[List[Any]] or x == Iterator[List[Any]],
234
- )
337
+ @fugue_annotated_param(Iterable[List[Any]], matcher=_compare_iter(List[Any]))
235
338
  class _IterableListParam(_LocalNoSchemaDataFrameParam):
236
339
  @no_type_check
237
340
  def to_input_data(self, df: DataFrame, ctx: Any) -> Iterable[List[Any]]:
@@ -288,10 +391,7 @@ class _ListDictParam(_LocalNoSchemaDataFrameParam):
288
391
  return len(df)
289
392
 
290
393
 
291
- @fugue_annotated_param(
292
- Iterable[Dict[str, Any]],
293
- matcher=lambda x: x == Iterable[Dict[str, Any]] or x == Iterator[Dict[str, Any]],
294
- )
394
+ @fugue_annotated_param(Iterable[Dict[str, Any]], matcher=_compare_iter(Dict[str, Any]))
295
395
  class _IterableDictParam(_LocalNoSchemaDataFrameParam):
296
396
  @no_type_check
297
397
  def to_input_data(self, df: DataFrame, ctx: Any) -> Iterable[Dict[str, Any]]:
@@ -360,10 +460,7 @@ class _PandasParam(LocalDataFrameParam):
360
460
  return "pandas"
361
461
 
362
462
 
363
- @fugue_annotated_param(
364
- Iterable[pd.DataFrame],
365
- matcher=lambda x: x == Iterable[pd.DataFrame] or x == Iterator[pd.DataFrame],
366
- )
463
+ @fugue_annotated_param(Iterable[pd.DataFrame], matcher=_compare_iter(pd.DataFrame))
367
464
  class _IterablePandasParam(LocalDataFrameParam):
368
465
  @no_type_check
369
466
  def to_input_data(self, df: DataFrame, ctx: Any) -> Iterable[pd.DataFrame]:
@@ -419,10 +516,7 @@ class _PyArrowTableParam(LocalDataFrameParam):
419
516
  return "pyarrow"
420
517
 
421
518
 
422
- @fugue_annotated_param(
423
- Iterable[pa.Table],
424
- matcher=lambda x: x == Iterable[pa.Table] or x == Iterator[pa.Table],
425
- )
519
+ @fugue_annotated_param(Iterable[pa.Table], matcher=_compare_iter(pa.Table))
426
520
  class _IterableArrowParam(LocalDataFrameParam):
427
521
  @no_type_check
428
522
  def to_input_data(self, df: DataFrame, ctx: Any) -> Iterable[pa.Table]:
@@ -375,7 +375,7 @@ class _FuncAsTransformer(Transformer):
375
375
  assert_arg_not_none(schema, "schema")
376
376
  tr = _FuncAsTransformer()
377
377
  tr._wrapper = DataFrameFunctionWrapper( # type: ignore
378
- func, "^[lspq][fF]?x*z?$", "^[lspq]$"
378
+ func, "^[lspq][fF]?x*z?$", "^[lspqr]$"
379
379
  )
380
380
  tr._output_schema_arg = schema # type: ignore
381
381
  tr._validation_rules = validation_rules # type: ignore
@@ -410,7 +410,7 @@ class _FuncAsOutputTransformer(_FuncAsTransformer):
410
410
  validation_rules.update(parse_validation_rules_from_comment(func))
411
411
  tr = _FuncAsOutputTransformer()
412
412
  tr._wrapper = DataFrameFunctionWrapper( # type: ignore
413
- func, "^[lspq][fF]?x*z?$", "^[lspnq]$"
413
+ func, "^[lspq][fF]?x*z?$", "^[lspnqr]$"
414
414
  )
415
415
  tr._output_schema_arg = None # type: ignore
416
416
  tr._validation_rules = validation_rules # type: ignore
@@ -503,7 +503,7 @@ class _FuncAsCoTransformer(CoTransformer):
503
503
  assert_arg_not_none(schema, "schema")
504
504
  tr = _FuncAsCoTransformer()
505
505
  tr._wrapper = DataFrameFunctionWrapper( # type: ignore
506
- func, "^(c|[lspq]+)[fF]?x*z?$", "^[lspq]$"
506
+ func, "^(c|[lspq]+)[fF]?x*z?$", "^[lspqr]$"
507
507
  )
508
508
  tr._dfs_input = tr._wrapper.input_code[0] == "c" # type: ignore
509
509
  tr._output_schema_arg = schema # type: ignore
@@ -562,7 +562,7 @@ class _FuncAsOutputCoTransformer(_FuncAsCoTransformer):
562
562
 
563
563
  tr = _FuncAsOutputCoTransformer()
564
564
  tr._wrapper = DataFrameFunctionWrapper( # type: ignore
565
- func, "^(c|[lspq]+)[fF]?x*z?$", "^[lspnq]$"
565
+ func, "^(c|[lspq]+)[fF]?x*z?$", "^[lspnqr]$"
566
566
  )
567
567
  tr._dfs_input = tr._wrapper.input_code[0] == "c" # type: ignore
568
568
  tr._output_schema_arg = None # type: ignore