fugue 0.9.1__tar.gz → 0.9.2.dev1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (162) hide show
  1. {fugue-0.9.1 → fugue-0.9.2.dev1}/PKG-INFO +1 -1
  2. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue/dataframe/function_wrapper.py +89 -2
  3. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue/extensions/transformer/convert.py +4 -4
  4. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue.egg-info/PKG-INFO +1 -1
  5. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue_ray/_utils/io.py +22 -15
  6. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue_test/builtin_suite.py +36 -1
  7. fugue-0.9.2.dev1/fugue_version/__init__.py +1 -0
  8. fugue-0.9.1/fugue_version/__init__.py +0 -1
  9. {fugue-0.9.1 → fugue-0.9.2.dev1}/LICENSE +0 -0
  10. {fugue-0.9.1 → fugue-0.9.2.dev1}/README.md +0 -0
  11. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue/__init__.py +0 -0
  12. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue/_utils/__init__.py +0 -0
  13. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue/_utils/display.py +0 -0
  14. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue/_utils/exception.py +0 -0
  15. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue/_utils/interfaceless.py +0 -0
  16. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue/_utils/io.py +0 -0
  17. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue/_utils/misc.py +0 -0
  18. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue/_utils/registry.py +0 -0
  19. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue/api.py +0 -0
  20. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue/bag/__init__.py +0 -0
  21. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue/bag/array_bag.py +0 -0
  22. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue/bag/bag.py +0 -0
  23. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue/collections/__init__.py +0 -0
  24. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue/collections/partition.py +0 -0
  25. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue/collections/sql.py +0 -0
  26. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue/collections/yielded.py +0 -0
  27. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue/column/__init__.py +0 -0
  28. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue/column/expressions.py +0 -0
  29. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue/column/functions.py +0 -0
  30. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue/column/sql.py +0 -0
  31. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue/constants.py +0 -0
  32. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue/dataframe/__init__.py +0 -0
  33. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue/dataframe/api.py +0 -0
  34. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue/dataframe/array_dataframe.py +0 -0
  35. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue/dataframe/arrow_dataframe.py +0 -0
  36. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue/dataframe/dataframe.py +0 -0
  37. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue/dataframe/dataframe_iterable_dataframe.py +0 -0
  38. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue/dataframe/dataframes.py +0 -0
  39. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue/dataframe/iterable_dataframe.py +0 -0
  40. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue/dataframe/pandas_dataframe.py +0 -0
  41. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue/dataframe/utils.py +0 -0
  42. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue/dataset/__init__.py +0 -0
  43. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue/dataset/api.py +0 -0
  44. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue/dataset/dataset.py +0 -0
  45. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue/dev.py +0 -0
  46. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue/exceptions.py +0 -0
  47. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue/execution/__init__.py +0 -0
  48. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue/execution/api.py +0 -0
  49. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue/execution/execution_engine.py +0 -0
  50. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue/execution/factory.py +0 -0
  51. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue/execution/native_execution_engine.py +0 -0
  52. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue/extensions/__init__.py +0 -0
  53. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue/extensions/_builtins/__init__.py +0 -0
  54. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue/extensions/_builtins/creators.py +0 -0
  55. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue/extensions/_builtins/outputters.py +0 -0
  56. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue/extensions/_builtins/processors.py +0 -0
  57. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue/extensions/_utils.py +0 -0
  58. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue/extensions/context.py +0 -0
  59. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue/extensions/creator/__init__.py +0 -0
  60. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue/extensions/creator/convert.py +0 -0
  61. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue/extensions/creator/creator.py +0 -0
  62. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue/extensions/outputter/__init__.py +0 -0
  63. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue/extensions/outputter/convert.py +0 -0
  64. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue/extensions/outputter/outputter.py +0 -0
  65. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue/extensions/processor/__init__.py +0 -0
  66. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue/extensions/processor/convert.py +0 -0
  67. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue/extensions/processor/processor.py +0 -0
  68. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue/extensions/transformer/__init__.py +0 -0
  69. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue/extensions/transformer/constants.py +0 -0
  70. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue/extensions/transformer/transformer.py +0 -0
  71. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue/plugins.py +0 -0
  72. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue/py.typed +0 -0
  73. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue/registry.py +0 -0
  74. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue/rpc/__init__.py +0 -0
  75. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue/rpc/base.py +0 -0
  76. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue/rpc/flask.py +0 -0
  77. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue/sql/__init__.py +0 -0
  78. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue/sql/_utils.py +0 -0
  79. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue/sql/_visitors.py +0 -0
  80. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue/sql/api.py +0 -0
  81. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue/sql/workflow.py +0 -0
  82. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue/test/__init__.py +0 -0
  83. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue/test/pandas_tester.py +0 -0
  84. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue/test/plugins.py +0 -0
  85. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue/workflow/__init__.py +0 -0
  86. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue/workflow/_checkpoint.py +0 -0
  87. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue/workflow/_tasks.py +0 -0
  88. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue/workflow/_workflow_context.py +0 -0
  89. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue/workflow/api.py +0 -0
  90. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue/workflow/input.py +0 -0
  91. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue/workflow/module.py +0 -0
  92. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue/workflow/workflow.py +0 -0
  93. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue.egg-info/SOURCES.txt +0 -0
  94. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue.egg-info/dependency_links.txt +0 -0
  95. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue.egg-info/entry_points.txt +0 -0
  96. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue.egg-info/requires.txt +0 -0
  97. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue.egg-info/top_level.txt +0 -0
  98. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue_contrib/__init__.py +0 -0
  99. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue_contrib/contrib.py +0 -0
  100. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue_contrib/seaborn/__init__.py +0 -0
  101. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue_contrib/viz/__init__.py +0 -0
  102. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue_contrib/viz/_ext.py +0 -0
  103. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue_dask/__init__.py +0 -0
  104. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue_dask/_constants.py +0 -0
  105. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue_dask/_io.py +0 -0
  106. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue_dask/_utils.py +0 -0
  107. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue_dask/dataframe.py +0 -0
  108. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue_dask/execution_engine.py +0 -0
  109. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue_dask/registry.py +0 -0
  110. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue_dask/tester.py +0 -0
  111. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue_duckdb/__init__.py +0 -0
  112. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue_duckdb/_io.py +0 -0
  113. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue_duckdb/_utils.py +0 -0
  114. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue_duckdb/dask.py +0 -0
  115. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue_duckdb/dataframe.py +0 -0
  116. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue_duckdb/execution_engine.py +0 -0
  117. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue_duckdb/registry.py +0 -0
  118. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue_duckdb/tester.py +0 -0
  119. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue_ibis/__init__.py +0 -0
  120. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue_ibis/_compat.py +0 -0
  121. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue_ibis/_utils.py +0 -0
  122. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue_ibis/dataframe.py +0 -0
  123. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue_ibis/execution_engine.py +0 -0
  124. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue_notebook/__init__.py +0 -0
  125. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue_notebook/env.py +0 -0
  126. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue_notebook/nbextension/README.md +0 -0
  127. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue_notebook/nbextension/__init__.py +0 -0
  128. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue_notebook/nbextension/description.yaml +0 -0
  129. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue_notebook/nbextension/main.js +0 -0
  130. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue_polars/__init__.py +0 -0
  131. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue_polars/_utils.py +0 -0
  132. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue_polars/polars_dataframe.py +0 -0
  133. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue_polars/registry.py +0 -0
  134. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue_ray/__init__.py +0 -0
  135. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue_ray/_constants.py +0 -0
  136. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue_ray/_utils/__init__.py +0 -0
  137. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue_ray/_utils/cluster.py +0 -0
  138. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue_ray/_utils/dataframe.py +0 -0
  139. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue_ray/dataframe.py +0 -0
  140. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue_ray/execution_engine.py +0 -0
  141. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue_ray/registry.py +0 -0
  142. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue_ray/tester.py +0 -0
  143. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue_spark/__init__.py +0 -0
  144. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue_spark/_constants.py +0 -0
  145. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue_spark/_utils/__init__.py +0 -0
  146. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue_spark/_utils/convert.py +0 -0
  147. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue_spark/_utils/io.py +0 -0
  148. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue_spark/_utils/misc.py +0 -0
  149. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue_spark/_utils/partition.py +0 -0
  150. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue_spark/dataframe.py +0 -0
  151. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue_spark/execution_engine.py +0 -0
  152. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue_spark/registry.py +0 -0
  153. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue_spark/tester.py +0 -0
  154. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue_sql/__init__.py +0 -0
  155. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue_sql/exceptions.py +0 -0
  156. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue_test/__init__.py +0 -0
  157. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue_test/bag_suite.py +0 -0
  158. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue_test/dataframe_suite.py +0 -0
  159. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue_test/execution_suite.py +0 -0
  160. {fugue-0.9.1 → fugue-0.9.2.dev1}/fugue_test/fixtures.py +0 -0
  161. {fugue-0.9.1 → fugue-0.9.2.dev1}/setup.cfg +0 -0
  162. {fugue-0.9.1 → fugue-0.9.2.dev1}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: fugue
3
- Version: 0.9.1
3
+ Version: 0.9.2.dev1
4
4
  Summary: An abstraction layer for distributed computation
5
5
  Home-page: http://github.com/fugue-project/fugue
6
6
  Author: The Fugue Development Team
@@ -80,6 +80,7 @@ class DataFrameFunctionWrapper(FunctionWrapper):
80
80
  p.update(kwargs)
81
81
  has_kw = False
82
82
  rargs: Dict[str, Any] = {}
83
+ row_param_info: Any = None
83
84
  for k, v in self._params.items():
84
85
  if isinstance(v, (PositionalParam, KeywordParam)):
85
86
  if isinstance(v, KeywordParam):
@@ -90,7 +91,14 @@ class DataFrameFunctionWrapper(FunctionWrapper):
90
91
  isinstance(p[k], DataFrame),
91
92
  lambda: TypeError(f"{p[k]} is not a DataFrame"),
92
93
  )
93
- rargs[k] = v.to_input_data(p[k], ctx=ctx)
94
+ if v.is_per_row:
95
+ assert_or_throw(
96
+ row_param_info is None,
97
+ lambda: ValueError("only one row parameter is allowed"),
98
+ )
99
+ row_param_info = (k, v, p[k])
100
+ else:
101
+ rargs[k] = v.to_input_data(p[k], ctx=ctx)
94
102
  else:
95
103
  rargs[k] = p[k] # TODO: should we do auto type conversion?
96
104
  del p[k]
@@ -100,12 +108,38 @@ class DataFrameFunctionWrapper(FunctionWrapper):
100
108
  rargs.update(p)
101
109
  elif not ignore_unknown and len(p) > 0:
102
110
  raise ValueError(f"{p} are not acceptable parameters")
111
+ if row_param_info is None:
112
+ return self._run_func(rargs, output, output_schema, ctx, raw=False)
113
+ else: # input contains row parameter
114
+
115
+ def _dfs() -> Iterable[Any]:
116
+ k, v, df = row_param_info
117
+ for row in v.to_input_rows(df, ctx):
118
+ rargs[k] = None
119
+ _rargs = rargs.copy()
120
+ _rargs[k] = row
121
+ yield self._run_func(_rargs, output, output_schema, ctx, raw=True)
122
+
123
+ if not output:
124
+ sum(1 for _ in _dfs())
125
+ return
126
+ else:
127
+ return self._rt.iterable_to_output_df(_dfs(), output_schema, ctx)
128
+
129
+ def _run_func(
130
+ self,
131
+ rargs: Dict[str, Any],
132
+ output: bool,
133
+ output_schema: Any,
134
+ ctx: Any,
135
+ raw: bool,
136
+ ) -> Any:
103
137
  rt = self._func(**rargs)
104
138
  if not output:
105
139
  if isinstance(self._rt, _DataFrameParamBase):
106
140
  self._rt.count(rt)
107
141
  return
108
- if isinstance(self._rt, _DataFrameParamBase):
142
+ if not raw and isinstance(self._rt, _DataFrameParamBase):
109
143
  return self._rt.to_output_df(rt, output_schema, ctx=ctx)
110
144
  return rt
111
145
 
@@ -145,14 +179,30 @@ class _DataFrameParamBase(AnnotatedParam):
145
179
  super().__init__(param)
146
180
  assert_or_throw(self.required, lambda: TypeError(f"{self} must be required"))
147
181
 
182
+ @property
183
+ def is_per_row(self) -> bool:
184
+ return False
185
+
148
186
  def to_input_data(self, df: DataFrame, ctx: Any) -> Any: # pragma: no cover
149
187
  raise NotImplementedError
150
188
 
189
+ def to_input_rows(
190
+ self,
191
+ df: DataFrame,
192
+ ctx: Any,
193
+ ) -> Iterable[Any]:
194
+ raise NotImplementedError # pragma: no cover
195
+
151
196
  def to_output_df(
152
197
  self, df: Any, schema: Any, ctx: Any
153
198
  ) -> DataFrame: # pragma: no cover
154
199
  raise NotImplementedError
155
200
 
201
+ def iterable_to_output_df(
202
+ self, dfs: Iterable[Any], schema: Any, ctx: Any
203
+ ) -> DataFrame: # pragma: no cover
204
+ raise NotImplementedError
205
+
156
206
  def count(self, df: Any) -> int: # pragma: no cover
157
207
  raise NotImplementedError
158
208
 
@@ -182,6 +232,34 @@ class DataFrameParam(_DataFrameParamBase):
182
232
  return sum(1 for _ in df.as_array_iterable())
183
233
 
184
234
 
235
+ @fugue_annotated_param(DataFrame, "r", child_can_reuse_code=True)
236
+ class RowParam(_DataFrameParamBase):
237
+ @property
238
+ def is_per_row(self) -> bool:
239
+ return True
240
+
241
+ def count(self, df: Any) -> int:
242
+ return 1
243
+
244
+
245
+ @fugue_annotated_param(Dict[str, Any])
246
+ class DictParam(RowParam):
247
+ def to_input_rows(self, df: DataFrame, ctx: Any) -> Iterable[Any]:
248
+ yield from df.as_dict_iterable()
249
+
250
+ def to_output_df(self, output: Dict[str, Any], schema: Any, ctx: Any) -> DataFrame:
251
+ return ArrayDataFrame([list(output.values())], schema)
252
+
253
+ def iterable_to_output_df(
254
+ self, dfs: Iterable[Dict[str, Any]], schema: Any, ctx: Any
255
+ ) -> DataFrame: # pragma: no cover
256
+ params: Dict[str, Any] = {}
257
+ if schema is not None:
258
+ params["schema"] = Schema(schema).pa_schema
259
+ adf = pa.Table.from_pylist(list(dfs), **params)
260
+ return ArrowDataFrame(adf)
261
+
262
+
185
263
  @fugue_annotated_param(AnyDataFrame)
186
264
  class _AnyDataFrameParam(DataFrameParam):
187
265
  def to_output_df(self, output: AnyDataFrame, schema: Any, ctx: Any) -> DataFrame:
@@ -207,6 +285,15 @@ class LocalDataFrameParam(DataFrameParam):
207
285
  )
208
286
  return output
209
287
 
288
+ def iterable_to_output_df(
289
+ self, dfs: Iterable[Any], schema: Any, ctx: Any
290
+ ) -> DataFrame: # pragma: no cover
291
+ def _dfs() -> Iterable[DataFrame]:
292
+ for df in dfs:
293
+ yield self.to_output_df(df, schema, ctx)
294
+
295
+ return LocalDataFrameIterableDataFrame(_dfs(), schema=schema)
296
+
210
297
  def count(self, df: LocalDataFrame) -> int:
211
298
  if df.is_bounded:
212
299
  return df.count()
@@ -375,7 +375,7 @@ class _FuncAsTransformer(Transformer):
375
375
  assert_arg_not_none(schema, "schema")
376
376
  tr = _FuncAsTransformer()
377
377
  tr._wrapper = DataFrameFunctionWrapper( # type: ignore
378
- func, "^[lspq][fF]?x*z?$", "^[lspq]$"
378
+ func, "^[lspqr][fF]?x*z?$", "^[lspqr]$"
379
379
  )
380
380
  tr._output_schema_arg = schema # type: ignore
381
381
  tr._validation_rules = validation_rules # type: ignore
@@ -410,7 +410,7 @@ class _FuncAsOutputTransformer(_FuncAsTransformer):
410
410
  validation_rules.update(parse_validation_rules_from_comment(func))
411
411
  tr = _FuncAsOutputTransformer()
412
412
  tr._wrapper = DataFrameFunctionWrapper( # type: ignore
413
- func, "^[lspq][fF]?x*z?$", "^[lspnq]$"
413
+ func, "^[lspqr][fF]?x*z?$", "^[lspnqr]$"
414
414
  )
415
415
  tr._output_schema_arg = None # type: ignore
416
416
  tr._validation_rules = validation_rules # type: ignore
@@ -503,7 +503,7 @@ class _FuncAsCoTransformer(CoTransformer):
503
503
  assert_arg_not_none(schema, "schema")
504
504
  tr = _FuncAsCoTransformer()
505
505
  tr._wrapper = DataFrameFunctionWrapper( # type: ignore
506
- func, "^(c|[lspq]+)[fF]?x*z?$", "^[lspq]$"
506
+ func, "^(c|[lspq]+)[fF]?x*z?$", "^[lspqr]$"
507
507
  )
508
508
  tr._dfs_input = tr._wrapper.input_code[0] == "c" # type: ignore
509
509
  tr._output_schema_arg = schema # type: ignore
@@ -562,7 +562,7 @@ class _FuncAsOutputCoTransformer(_FuncAsCoTransformer):
562
562
 
563
563
  tr = _FuncAsOutputCoTransformer()
564
564
  tr._wrapper = DataFrameFunctionWrapper( # type: ignore
565
- func, "^(c|[lspq]+)[fF]?x*z?$", "^[lspnq]$"
565
+ func, "^(c|[lspq]+)[fF]?x*z?$", "^[lspnqr]$"
566
566
  )
567
567
  tr._dfs_input = tr._wrapper.input_code[0] == "c" # type: ignore
568
568
  tr._output_schema_arg = None # type: ignore
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: fugue
3
- Version: 0.9.1
3
+ Version: 0.9.2.dev1
4
4
  Summary: An abstraction layer for distributed computation
5
5
  Home-page: http://github.com/fugue-project/fugue
6
6
  Author: The Fugue Development Team
@@ -7,7 +7,7 @@ import ray.data as rd
7
7
  from packaging import version
8
8
  from pyarrow import csv as pacsv
9
9
  from pyarrow import json as pajson
10
- from ray.data.datasource import FileExtensionFilter
10
+
11
11
  from triad.collections import Schema
12
12
  from triad.collections.dict import ParamDict
13
13
  from triad.utils.assertion import assert_or_throw
@@ -21,6 +21,27 @@ from fugue_ray.dataframe import RayDataFrame
21
21
 
22
22
  from .._constants import RAY_VERSION
23
23
 
24
+ try:
25
+ from ray.data.datasource import FileExtensionFilter
26
+
27
+ class _FileFiler(FileExtensionFilter): # pragma: no cover
28
+ def __init__(
29
+ self, file_extensions: Union[str, List[str]], exclude: Iterable[str]
30
+ ):
31
+ super().__init__(file_extensions, allow_if_no_extension=True)
32
+ self._exclude = set(exclude)
33
+
34
+ def _is_valid(self, path: str) -> bool:
35
+ return pathlib.Path(
36
+ path
37
+ ).name not in self._exclude and self._file_has_extension(path)
38
+
39
+ def __call__(self, paths: List[str]) -> List[str]:
40
+ return [path for path in paths if self._is_valid(path)]
41
+
42
+ except ImportError: # pragma: no cover
43
+ pass # ray >=2.10
44
+
24
45
 
25
46
  class RayIO(object):
26
47
  def __init__(self, engine: ExecutionEngine):
@@ -248,17 +269,3 @@ class RayIO(object):
248
269
 
249
270
  def _remote_args(self) -> Dict[str, Any]:
250
271
  return {"num_cpus": 1}
251
-
252
-
253
- class _FileFiler(FileExtensionFilter): # pragma: no cover
254
- def __init__(self, file_extensions: Union[str, List[str]], exclude: Iterable[str]):
255
- super().__init__(file_extensions, allow_if_no_extension=True)
256
- self._exclude = set(exclude)
257
-
258
- def _is_valid(self, path: str) -> bool:
259
- return pathlib.Path(
260
- path
261
- ).name not in self._exclude and self._file_has_extension(path)
262
-
263
- def __call__(self, paths: List[str]) -> List[str]:
264
- return [path for path in paths if self._is_valid(path)]
@@ -486,6 +486,23 @@ class BuiltInTests(object):
486
486
  dag.df([], "a:int,b:int").assert_eq(b)
487
487
  dag.run(self.engine)
488
488
 
489
+ def test_transform_row_wise(self):
490
+ def t1(row: Dict[str, Any]) -> Dict[str, Any]:
491
+ row["b"] = 1
492
+ return row
493
+
494
+ def t2(rows: List[Dict[str, Any]]) -> Dict[str, Any]:
495
+ return rows[0]
496
+
497
+ with fa.engine_context(self.engine):
498
+ a = pd.DataFrame([[3, 4], [1, 2], [3, 5]], columns=["a", "b"])
499
+ b = fa.transform(a, t1, schema="*")
500
+ assert sorted(fa.as_array(b)) == [[1, 1], [3, 1], [3, 1]]
501
+ b = fa.transform(
502
+ a, t2, schema="*", partition={"by": "a", "presort": "b"}
503
+ )
504
+ assert sorted(fa.as_array(b)) == [[1, 2], [3, 4]]
505
+
489
506
  def test_transform_binary(self):
490
507
  with FugueWorkflow() as dag:
491
508
  a = dag.df([[1, pickle.dumps([0, "a"])]], "a:int,b:bytes")
@@ -548,6 +565,8 @@ class BuiltInTests(object):
548
565
  e = dag.df([[1, 2, 1, 10]], "a:int,ct1:int,ct2:int,x:int")
549
566
  e.assert_eq(c)
550
567
 
568
+ a.zip(b).transform(mock_co_tf1_d, params=dict(p=10)).assert_eq(e)
569
+
551
570
  # interfaceless
552
571
  c = dag.transform(
553
572
  a.zip(b),
@@ -676,6 +695,13 @@ class BuiltInTests(object):
676
695
  incr()
677
696
  yield pa.Table.from_pandas(df)
678
697
 
698
+ def t11(row: Dict[str, Any]) -> Dict[str, Any]:
699
+ incr()
700
+ return row
701
+
702
+ def t12(row: Dict[str, Any]) -> None:
703
+ incr()
704
+
679
705
  with FugueWorkflow() as dag:
680
706
  a = dag.df([[1, 2], [3, 4]], "a:double,b:int")
681
707
  a.out_transform(t1) # +2
@@ -688,6 +714,8 @@ class BuiltInTests(object):
688
714
  a.out_transform(t8, ignore_errors=[NotImplementedError]) # +1
689
715
  a.out_transform(t9) # +1
690
716
  a.out_transform(t10) # +1
717
+ a.out_transform(t11) # +2
718
+ a.out_transform(t12) # +2
691
719
  raises(FugueWorkflowCompileValidationError, lambda: a.out_transform(t2))
692
720
  raises(FugueWorkflowCompileValidationError, lambda: a.out_transform(t3))
693
721
  raises(FugueWorkflowCompileValidationError, lambda: a.out_transform(t4))
@@ -695,7 +723,7 @@ class BuiltInTests(object):
695
723
  raises(FugueWorkflowCompileValidationError, lambda: a.out_transform(T7))
696
724
  dag.run(self.engine)
697
725
 
698
- assert 13 <= incr()
726
+ assert 17 <= incr()
699
727
 
700
728
  def test_out_cotransform(self): # noqa: C901
701
729
  tmpdir = str(self.tmpdir)
@@ -2001,6 +2029,13 @@ def mock_co_tf1(
2001
2029
  return [[df1[0]["a"], len(df1), len(df2), p]]
2002
2030
 
2003
2031
 
2032
+ @cotransformer(lambda dfs, **kwargs: "a:int,ct1:int,ct2:int,x:int")
2033
+ def mock_co_tf1_d(
2034
+ df1: List[Dict[str, Any]], df2: List[List[Any]], p=1
2035
+ ) -> Dict[str, Any]:
2036
+ return dict(a=df1[0]["a"], ct1=len(df1), ct2=len(df2), x=p)
2037
+
2038
+
2004
2039
  def mock_co_tf2(dfs: DataFrames, p=1) -> List[List[Any]]:
2005
2040
  return [[dfs[0].peek_dict()["a"], dfs[0].count(), dfs[1].count(), p]]
2006
2041
 
@@ -0,0 +1 @@
1
+ __version__ = "0.9.2"
@@ -1 +0,0 @@
1
- __version__ = "0.9.1"
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes