fugue 0.8.7.dev5__py3-none-any.whl → 0.8.7.dev6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
fugue/api.py CHANGED
@@ -6,6 +6,7 @@ from .dataframe.api import (
6
6
  as_array_iterable,
7
7
  as_arrow,
8
8
  as_dict_iterable,
9
+ as_dicts,
9
10
  as_fugue_df,
10
11
  as_pandas,
11
12
  drop_columns,
fugue/dataframe/api.py CHANGED
@@ -116,15 +116,32 @@ def as_array_iterable(
116
116
  return as_fugue_df(df).as_array_iterable(columns=columns, type_safe=type_safe)
117
117
 
118
118
 
119
+ @fugue_plugin
120
+ def as_dicts(
121
+ df: AnyDataFrame, columns: Optional[List[str]] = None
122
+ ) -> List[Dict[str, Any]]:
123
+ """Convert any dataframe to a list of python dicts
124
+
125
+ :param df: the object that can be recognized as a dataframe by Fugue
126
+ :param columns: columns to extract, defaults to None
127
+ :return: a list of python dicts
128
+
129
+ .. note::
130
+
131
+ The default implementation enforces ``type_safe`` True
132
+ """
133
+ return as_fugue_df(df).as_dicts(columns=columns)
134
+
135
+
119
136
  @fugue_plugin
120
137
  def as_dict_iterable(
121
138
  df: AnyDataFrame, columns: Optional[List[str]] = None
122
139
  ) -> Iterable[Dict[str, Any]]:
123
- """Convert any dataframe to iterable of native python dicts
140
+ """Convert any dataframe to iterable of python dicts
124
141
 
125
142
  :param df: the object that can be recognized as a dataframe by Fugue
126
143
  :param columns: columns to extract, defaults to None
127
- :return: iterable of native python dicts
144
+ :return: iterable of python dicts
128
145
 
129
146
  .. note::
130
147
 
@@ -21,6 +21,10 @@ from fugue.exceptions import FugueDataFrameOperationError
21
21
 
22
22
  from .api import (
23
23
  alter_columns,
24
+ as_array,
25
+ as_array_iterable,
26
+ as_dict_iterable,
27
+ as_dicts,
24
28
  as_pandas,
25
29
  drop_columns,
26
30
  get_column_names,
@@ -30,6 +34,12 @@ from .api import (
30
34
  select_columns,
31
35
  )
32
36
  from .dataframe import DataFrame, LocalBoundedDataFrame, _input_schema
37
+ from .utils import (
38
+ pa_table_as_array,
39
+ pa_table_as_array_iterable,
40
+ pa_table_as_dict_iterable,
41
+ pa_table_as_dicts,
42
+ )
33
43
 
34
44
 
35
45
  class ArrowDataFrame(LocalBoundedDataFrame):
@@ -174,21 +184,20 @@ class ArrowDataFrame(LocalBoundedDataFrame):
174
184
  def as_array(
175
185
  self, columns: Optional[List[str]] = None, type_safe: bool = False
176
186
  ) -> List[Any]:
177
- return list(self.as_array_iterable(columns, type_safe=type_safe))
187
+ return pa_table_as_array(self.native, columns=columns)
188
+
189
+ def as_dicts(self, columns: Optional[List[str]] = None) -> List[Dict[str, Any]]:
190
+ return pa_table_as_dicts(self.native, columns=columns)
178
191
 
179
192
  def as_array_iterable(
180
193
  self, columns: Optional[List[str]] = None, type_safe: bool = False
181
194
  ) -> Iterable[Any]:
182
- if self.empty:
183
- return
184
- if columns is not None:
185
- for x in self[columns].as_array_iterable(type_safe=type_safe):
186
- yield x
187
- else:
188
- d = self.native.to_pydict()
189
- cols = [d[n] for n in self.columns]
190
- for arr in zip(*cols):
191
- yield list(arr)
195
+ yield from pa_table_as_array_iterable(self.native, columns=columns)
196
+
197
+ def as_dict_iterable(
198
+ self, columns: Optional[List[str]] = None
199
+ ) -> Iterable[Dict[str, Any]]:
200
+ yield from pa_table_as_dict_iterable(self.native, columns=columns)
192
201
 
193
202
 
194
203
  @as_local.candidate(lambda df: isinstance(df, pa.Table))
@@ -212,6 +221,34 @@ def _pa_table_as_pandas(df: pa.Table) -> pd.DataFrame:
212
221
  )
213
222
 
214
223
 
224
+ @as_array.candidate(lambda df, *args, **kwargs: isinstance(df, pa.Table))
225
+ def _pa_table_as_array(
226
+ df: pa.Table, columns: Optional[List[str]] = None, type_safe: bool = False
227
+ ) -> List[Any]:
228
+ return pa_table_as_array(df, columns=columns)
229
+
230
+
231
+ @as_array_iterable.candidate(lambda df, *args, **kwargs: isinstance(df, pa.Table))
232
+ def _pa_table_as_array_iterable(
233
+ df: pa.Table, columns: Optional[List[str]] = None, type_safe: bool = False
234
+ ) -> Iterable[Any]:
235
+ yield from pa_table_as_array_iterable(df, columns=columns)
236
+
237
+
238
+ @as_dicts.candidate(lambda df, *args, **kwargs: isinstance(df, pa.Table))
239
+ def _pa_table_as_dicts(
240
+ df: pa.Table, columns: Optional[List[str]] = None
241
+ ) -> List[Dict[str, Any]]:
242
+ return pa_table_as_dicts(df, columns=columns)
243
+
244
+
245
+ @as_dict_iterable.candidate(lambda df, *args, **kwargs: isinstance(df, pa.Table))
246
+ def _pa_table_as_dict_iterable(
247
+ df: pa.Table, columns: Optional[List[str]] = None
248
+ ) -> Iterable[Dict[str, Any]]:
249
+ yield from pa_table_as_dict_iterable(df, columns=columns)
250
+
251
+
215
252
  @alter_columns.candidate(lambda df, *args, **kwargs: isinstance(df, pa.Table))
216
253
  def _pa_table_alter_columns(
217
254
  df: pa.Table, columns: Any, as_fugue: bool = False
@@ -237,13 +237,31 @@ class DataFrame(Dataset):
237
237
  """
238
238
  raise NotImplementedError
239
239
 
240
+ def as_dicts(self, columns: Optional[List[str]] = None) -> List[Dict[str, Any]]:
241
+ """Convert to a list of python dicts
242
+
243
+ :param columns: columns to extract, defaults to None
244
+ :return: a list of python dicts
245
+
246
+ .. note::
247
+
248
+ The default implementation enforces ``type_safe`` True
249
+ """
250
+ if columns is None:
251
+ columns = self.columns
252
+ idx = range(len(columns))
253
+ return [
254
+ {columns[i]: x[i] for i in idx}
255
+ for x in self.as_array(columns, type_safe=True)
256
+ ]
257
+
240
258
  def as_dict_iterable(
241
259
  self, columns: Optional[List[str]] = None
242
260
  ) -> Iterable[Dict[str, Any]]:
243
- """Convert to iterable of native python dicts
261
+ """Convert to iterable of python dicts
244
262
 
245
263
  :param columns: columns to extract, defaults to None
246
- :return: iterable of native python dicts
264
+ :return: iterable of python dicts
247
265
 
248
266
  .. note::
249
267
 
@@ -269,7 +269,7 @@ class _EmptyAwareIterableListParam(_LocalNoSchemaDataFrameParam):
269
269
  class _ListDictParam(_LocalNoSchemaDataFrameParam):
270
270
  @no_type_check
271
271
  def to_input_data(self, df: DataFrame, ctx: Any) -> List[Dict[str, Any]]:
272
- return list(df.as_local().as_dict_iterable())
272
+ return df.as_local().as_dicts()
273
273
 
274
274
  @no_type_check
275
275
  def to_output_df(
@@ -105,6 +105,9 @@ class IterableDataFrame(LocalUnboundedDataFrame):
105
105
  ) -> List[Any]:
106
106
  return list(self.as_array_iterable(columns, type_safe=type_safe))
107
107
 
108
+ def as_dicts(self, columns: Optional[List[str]] = None) -> List[Dict[str, Any]]:
109
+ return list(self.as_dict_iterable(columns))
110
+
108
111
  def as_array_iterable(
109
112
  self, columns: Optional[List[str]] = None, type_safe: bool = False
110
113
  ) -> Iterable[Any]:
@@ -1,8 +1,11 @@
1
1
  from typing import Any, Dict, Iterable, List, Optional, Tuple
2
2
 
3
3
  import pandas as pd
4
+ import pyarrow as pa
5
+ from triad import assert_or_throw
4
6
  from triad.collections.schema import Schema
5
7
  from triad.utils.pandas_like import PD_UTILS
8
+ from triad.utils.pyarrow import pa_batch_to_dicts
6
9
 
7
10
  from fugue.dataset.api import (
8
11
  as_fugue_dataset,
@@ -17,6 +20,10 @@ from fugue.dataset.api import (
17
20
  from fugue.exceptions import FugueDataFrameOperationError
18
21
 
19
22
  from .api import (
23
+ as_array,
24
+ as_array_iterable,
25
+ as_dict_iterable,
26
+ as_dicts,
20
27
  drop_columns,
21
28
  get_column_names,
22
29
  get_schema,
@@ -134,6 +141,9 @@ class PandasDataFrame(LocalBoundedDataFrame):
134
141
  return self
135
142
  return PandasDataFrame(self.native, new_schema)
136
143
 
144
+ def as_arrow(self, type_safe: bool = False) -> pa.Table:
145
+ return PD_UTILS.as_arrow(self.native, schema=self.schema.pa_schema)
146
+
137
147
  def as_array(
138
148
  self, columns: Optional[List[str]] = None, type_safe: bool = False
139
149
  ) -> List[Any]:
@@ -150,6 +160,18 @@ class PandasDataFrame(LocalBoundedDataFrame):
150
160
  ):
151
161
  yield row
152
162
 
163
+ def as_dicts(self, columns: Optional[List[str]] = None) -> List[Dict[str, Any]]:
164
+ res: List[Dict[str, Any]] = []
165
+ for block in _to_dicts(self.native, columns, self.schema):
166
+ res += block
167
+ return res
168
+
169
+ def as_dict_iterable(
170
+ self, columns: Optional[List[str]] = None
171
+ ) -> Iterable[Dict[str, Any]]:
172
+ for block in _to_dicts(self.native, columns, self.schema):
173
+ yield from block
174
+
153
175
  def head(
154
176
  self, n: int, columns: Optional[List[str]] = None
155
177
  ) -> LocalBoundedDataFrame:
@@ -272,6 +294,43 @@ def _pd_head(
272
294
  return _adjust_df(df.head(n), as_fugue=as_fugue)
273
295
 
274
296
 
297
+ @as_array.candidate(lambda df, *args, **kwargs: isinstance(df, pd.DataFrame))
298
+ def _pd_as_array(
299
+ df: pd.DataFrame, columns: Optional[List[str]] = None, type_safe: bool = False
300
+ ) -> List[Any]:
301
+ return list(_pd_as_array_iterable(df, columns, type_safe=type_safe))
302
+
303
+
304
+ @as_array_iterable.candidate(lambda df, *args, **kwargs: isinstance(df, pd.DataFrame))
305
+ def _pd_as_array_iterable(
306
+ df: pd.DataFrame, columns: Optional[List[str]] = None, type_safe: bool = False
307
+ ) -> Iterable[Any]:
308
+ for row in PD_UTILS.as_array_iterable(
309
+ df,
310
+ columns=columns,
311
+ type_safe=type_safe,
312
+ ):
313
+ yield row
314
+
315
+
316
+ @as_dicts.candidate(lambda df, *args, **kwargs: isinstance(df, pd.DataFrame))
317
+ def _pd_as_dicts(
318
+ df: pd.DataFrame, columns: Optional[List[str]] = None
319
+ ) -> List[Dict[str, Any]]:
320
+ res: List[Dict[str, Any]] = []
321
+ for block in _to_dicts(df, columns):
322
+ res += block
323
+ return res
324
+
325
+
326
+ @as_dict_iterable.candidate(lambda df, *args, **kwargs: isinstance(df, pd.DataFrame))
327
+ def _pd_as_dict_iterable(
328
+ df: pa.Table, columns: Optional[List[str]] = None
329
+ ) -> Iterable[Dict[str, Any]]:
330
+ for block in _to_dicts(df, columns):
331
+ yield from block
332
+
333
+
275
334
  def _adjust_df(res: pd.DataFrame, as_fugue: bool):
276
335
  return res if not as_fugue else PandasDataFrame(res)
277
336
 
@@ -280,3 +339,17 @@ def _assert_no_missing(df: pd.DataFrame, columns: Iterable[Any]) -> None:
280
339
  missing = [x for x in columns if x not in df.columns]
281
340
  if len(missing) > 0:
282
341
  raise FugueDataFrameOperationError("found nonexistent columns: {missing}")
342
+
343
+
344
+ def _to_dicts(
345
+ df: pd.DataFrame,
346
+ columns: Optional[List[str]] = None,
347
+ schema: Optional[Schema] = None,
348
+ ) -> Iterable[List[Dict[str, Any]]]:
349
+ cols = list(df.columns) if columns is None else columns
350
+ assert_or_throw(len(cols) > 0, ValueError("columns cannot be empty"))
351
+ pa_schema = schema.extract(cols).pa_schema if schema is not None else None
352
+ adf = PD_UTILS.as_arrow(df[cols], schema=pa_schema)
353
+ for batch in adf.to_batches():
354
+ if batch.num_rows > 0:
355
+ yield pa_batch_to_dicts(batch)
fugue/dataframe/utils.py CHANGED
@@ -1,15 +1,16 @@
1
1
  import os
2
2
  import pickle
3
- from typing import Any, Iterable, Optional, Tuple
3
+ from typing import Any, Iterable, Optional, Tuple, List, Dict
4
4
 
5
5
  import pandas as pd
6
6
  import pyarrow as pa
7
7
  from fs import open_fs
8
- from triad import FileSystem, Schema
8
+ from triad import FileSystem, Schema, assert_or_throw
9
9
  from triad.collections.schema import SchemaError
10
10
  from triad.exceptions import InvalidOperationError
11
11
  from triad.utils.assertion import assert_arg_not_none
12
12
  from triad.utils.assertion import assert_or_throw as aot
13
+ from triad.utils.pyarrow import pa_batch_to_dicts
13
14
 
14
15
  from .api import as_fugue_df, get_column_names, normalize_column_names, rename
15
16
  from .dataframe import DataFrame, LocalBoundedDataFrame
@@ -250,3 +251,68 @@ def get_join_schemas(
250
251
  else:
251
252
  aot(len(on) > 0, SchemaError("join on columns must be specified"))
252
253
  return cm, (df1.schema.union(schema2))
254
+
255
+
256
+ def pa_table_as_array_iterable(
257
+ df: pa.Table, columns: Optional[List[str]] = None
258
+ ) -> Iterable[List[List[Any]]]:
259
+ """Convert a pyarrow table to an iterable of list
260
+
261
+ :param df: pyarrow table
262
+ :param columns: if not None, only these columns will be returned, defaults to None
263
+ :return: an iterable of list
264
+ """
265
+ assert_or_throw(columns is None or len(columns) > 0, ValueError("empty columns"))
266
+ _df = df if columns is None or len(columns) == 0 else df.select(columns)
267
+ for batch in _df.to_batches():
268
+ for x in zip(*batch.to_pydict().values()):
269
+ yield list(x)
270
+
271
+
272
+ def pa_table_as_array(
273
+ df: pa.Table, columns: Optional[List[str]] = None
274
+ ) -> List[List[List[Any]]]:
275
+ """Convert a pyarrow table to a list of list
276
+
277
+ :param df: pyarrow table
278
+ :param columns: if not None, only these columns will be returned, defaults to None
279
+ :return: a list of list
280
+ """
281
+ return list(pa_table_as_array_iterable(df, columns=columns))
282
+
283
+
284
+ def pa_table_as_dict_iterable(
285
+ df: pa.Table, columns: Optional[List[str]] = None
286
+ ) -> Iterable[Dict[str, Any]]:
287
+ """Convert a pyarrow table to an iterable of dict
288
+
289
+ :param df: pyarrow table
290
+ :param columns: if not None, only these columns will be returned, defaults to None
291
+ :return: an iterable of dict
292
+ """
293
+ for ck in _pa_table_as_dicts_chunks(df, columns=columns):
294
+ yield from ck
295
+
296
+
297
+ def pa_table_as_dicts(
298
+ df: pa.Table, columns: Optional[List[str]] = None
299
+ ) -> List[Dict[str, Any]]:
300
+ """Convert a pyarrow table to a list of dict
301
+
302
+ :param df: pyarrow table
303
+ :param columns: if not None, only these columns will be returned, defaults to None
304
+ :return: a list of dict
305
+ """
306
+ res: List[Dict[str, Any]] = []
307
+ for ck in _pa_table_as_dicts_chunks(df, columns=columns):
308
+ res += ck
309
+ return res
310
+
311
+
312
+ def _pa_table_as_dicts_chunks(
313
+ df: pa.Table, columns: Optional[List[str]] = None
314
+ ) -> Iterable[List[Dict[str, Any]]]:
315
+ assert_or_throw(columns is None or len(columns) > 0, ValueError("empty columns"))
316
+ _df = df if columns is None or len(columns) == 0 else df.select(columns)
317
+ for batch in _df.to_batches():
318
+ yield pa_batch_to_dicts(batch)
@@ -1323,7 +1323,7 @@ class _Comap:
1323
1323
  self._on_init(partition_no, empty_dfs)
1324
1324
 
1325
1325
  def run(self, cursor: PartitionCursor, df: LocalDataFrame) -> LocalDataFrame:
1326
- data = list(df.as_dict_iterable())
1326
+ data = df.as_dicts()
1327
1327
  if self.how == "inner":
1328
1328
  if len(data) < self.dfs_count:
1329
1329
  return ArrayDataFrame([], self.output_schema)
fugue/plugins.py CHANGED
@@ -7,6 +7,7 @@ from fugue.dataframe import (
7
7
  as_array_iterable,
8
8
  as_arrow,
9
9
  as_dict_iterable,
10
+ as_dicts,
10
11
  as_pandas,
11
12
  drop_columns,
12
13
  fugue_annotated_param,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: fugue
3
- Version: 0.8.7.dev5
3
+ Version: 0.8.7.dev6
4
4
  Summary: An abstraction layer for distributed computation
5
5
  Home-page: http://github.com/fugue-project/fugue
6
6
  Author: The Fugue Development Team
@@ -19,7 +19,7 @@ Classifier: Programming Language :: Python :: 3.10
19
19
  Classifier: Programming Language :: Python :: 3 :: Only
20
20
  Requires-Python: >=3.8
21
21
  Description-Content-Type: text/markdown
22
- Requires-Dist: triad ==0.9.2.dev3
22
+ Requires-Dist: triad ==0.9.2.dev5
23
23
  Requires-Dist: adagio >=0.2.4
24
24
  Requires-Dist: qpd >=0.4.4
25
25
  Requires-Dist: fugue-sql-antlr >=0.1.6
@@ -32,7 +32,7 @@ Requires-Dist: fugue-sql-antlr[cpp] >=0.1.6 ; extra == 'all'
32
32
  Requires-Dist: pyspark >=3.1.1 ; extra == 'all'
33
33
  Requires-Dist: dask[dataframe,distributed] >=2023.5.0 ; extra == 'all'
34
34
  Requires-Dist: dask-sql ; extra == 'all'
35
- Requires-Dist: ray[data] >=2.1.0 ; extra == 'all'
35
+ Requires-Dist: ray[data] >=2.4.0 ; extra == 'all'
36
36
  Requires-Dist: notebook ; extra == 'all'
37
37
  Requires-Dist: jupyterlab ; extra == 'all'
38
38
  Requires-Dist: ipython >=7.10.0 ; extra == 'all'
@@ -59,7 +59,7 @@ Requires-Dist: ipython >=7.10.0 ; extra == 'notebook'
59
59
  Provides-Extra: polars
60
60
  Requires-Dist: polars ; extra == 'polars'
61
61
  Provides-Extra: ray
62
- Requires-Dist: ray[data] >=2.1.0 ; extra == 'ray'
62
+ Requires-Dist: ray[data] >=2.4.0 ; extra == 'ray'
63
63
  Requires-Dist: duckdb >=0.5.0 ; extra == 'ray'
64
64
  Requires-Dist: pyarrow >=6.0.1 ; extra == 'ray'
65
65
  Provides-Extra: spark
@@ -1,9 +1,9 @@
1
1
  fugue/__init__.py,sha256=xT5zuNZfRkjbA8a-uTT5oLK6hLGuezGZLWYBl6eS5J4,2749
2
- fugue/api.py,sha256=6_d3vYwJGAX7tW7NMhHB_NAX4aPsfzK2L06Zr2V78Ks,1240
2
+ fugue/api.py,sha256=dLUrigFhDMB5x7cvlWSK8EyaY2o0AmhgPr0VRtfzSz0,1254
3
3
  fugue/constants.py,sha256=crd0VqX8WtBcjSUNwZDi2LDIEkhUMWOlSn73H8JI9ds,3385
4
4
  fugue/dev.py,sha256=GQCkezBBl4V0lVDWhGtUQKqomiCxgR9dMhfqj9C8cS8,1369
5
5
  fugue/exceptions.py,sha256=ylP8gkZL8ao_ZLinNYKv16FPyO_n7c29dN-4QChUxi0,1544
6
- fugue/plugins.py,sha256=SJ-jqs04StHIHJ65lgdGP0IDopVIGBDpmzHHllNK8p0,998
6
+ fugue/plugins.py,sha256=kao-H5z-cRbujBKW1QC9IHUOBKxXMhpVQ6saIE7cXm8,1012
7
7
  fugue/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
8
  fugue/registry.py,sha256=SNULGv08f37fRO-cIxFDmnVcod7ref2fNLSK6G7nVnI,868
9
9
  fugue/_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -25,22 +25,22 @@ fugue/column/expressions.py,sha256=fdGX9oPCqJBuROFZqrOYVcwkjghdXT9ngaSTG5tW_i8,2
25
25
  fugue/column/functions.py,sha256=ygLyn2gp5lTdGbYqJXeGeMmRNhbm4-vfJvAY_Zt0pb0,9774
26
26
  fugue/column/sql.py,sha256=s_qTtHgnvRFqjhCWr7s595PTrHM-Pr9zHUQfU5xcTVA,17391
27
27
  fugue/dataframe/__init__.py,sha256=zm7TbsaJLIvfm7zymWm2LGcuJd3nxfGsFnQiyrSnenM,678
28
- fugue/dataframe/api.py,sha256=KEbZbXCnaUgfwGF7iZODZsCtJTL4-reQS4qVbigFrps,10554
28
+ fugue/dataframe/api.py,sha256=aWBvMaiSUxOvdQMfe79zHShWuPfLcgiWggC9HvVxvSE,11017
29
29
  fugue/dataframe/array_dataframe.py,sha256=4scWnmQ6sjy1A6o7IYdRc0VVutBEfcJrA1f9wkph4Kg,4440
30
- fugue/dataframe/arrow_dataframe.py,sha256=mJzrYBGs9mEMsHgxmnhDdiLUiOkcOs3YBAzHs75KFsI,10202
31
- fugue/dataframe/dataframe.py,sha256=a7jhYUaovN7w8vcJ-OU2AMfkfqxpvFF06cYWFqIJWqM,16418
30
+ fugue/dataframe/arrow_dataframe.py,sha256=r5zcZBX_N6XO5dmixBkTCPgLcMmgDF022piZvrwRp_c,11485
31
+ fugue/dataframe/dataframe.py,sha256=xmyG85i14A6LDRkNmPt29oYq7PJsq668s1QvFHK8PV4,16964
32
32
  fugue/dataframe/dataframe_iterable_dataframe.py,sha256=lx71KfaI4lsVKI-79buc-idaeT20JEMBOq21SQcAiY8,7259
33
33
  fugue/dataframe/dataframes.py,sha256=tBSpHsENgbcdOJ0Jgst6PTKbjG7_uoFJch96oTlaQIs,4160
34
- fugue/dataframe/function_wrapper.py,sha256=r6H1SQWaag2eSbJ50327t_bt7MZunbOMOl9OcOcQW2E,14827
35
- fugue/dataframe/iterable_dataframe.py,sha256=9g2BAF9A6QPbo63Si-trFq_9nPVqAD9vSePRCV71AfY,4620
36
- fugue/dataframe/pandas_dataframe.py,sha256=JNkr24h5gir1Msttx3lNfzFjwMqjHbjDswNynpCiizo,9158
37
- fugue/dataframe/utils.py,sha256=Oid7L9-V-NjKnwnkN8Jg85E2OPMWkjkjNI5OoeKTnbs,9132
34
+ fugue/dataframe/function_wrapper.py,sha256=V1eQMOn27UroEYT7_YiwoEF0RjZYIM0zkD3vfaMAQFs,14813
35
+ fugue/dataframe/iterable_dataframe.py,sha256=TcOoNKa4jNbHbvAZ0XAhtMmGcioygIHPxI9budDtenQ,4758
36
+ fugue/dataframe/pandas_dataframe.py,sha256=0L0wYCGhD2BpQbruoT07Ox9iQM5YLHLNrcgzudc-yKs,11633
37
+ fugue/dataframe/utils.py,sha256=VS1qLCr-9NEcEjaK-219rADJadDf6EfzYZCGRUpn1fY,11405
38
38
  fugue/dataset/__init__.py,sha256=5f2CAJ4xst6Z2o9Q2e2twfDOGUw8ZJoE2ild4JEU2pg,112
39
39
  fugue/dataset/api.py,sha256=DacI4L2w5NJ-eZ6nFxNMqmReEnb0WUXswbjVp7BeErk,2794
40
40
  fugue/dataset/dataset.py,sha256=jWXZqy3msMPFFkhas2PYJEX55ZAI3gk3Txq5f4-Qya4,4759
41
41
  fugue/execution/__init__.py,sha256=iZGxAznZz9piM3k4gp0tln97MDIBxdliLyNbD-0Zc48,427
42
42
  fugue/execution/api.py,sha256=KsFOLGdWQMlXmlQ5JRgRsbUeB64qzTVHxSEaunjiojo,39818
43
- fugue/execution/execution_engine.py,sha256=n-mw0k0QtK8FQgP4w4_NrWJbg0XvrR4sFn70tSaOi0I,47735
43
+ fugue/execution/execution_engine.py,sha256=G_SsTmcuDcy6_azi_88lGzsOodiizu0JdWxebxgbqRg,47721
44
44
  fugue/execution/factory.py,sha256=5ICzfNh2QqqABuVyYLijY5-7LZgfRqczlaZN32p78bE,21003
45
45
  fugue/execution/native_execution_engine.py,sha256=Mm9BVC3dEMS3IWRZe4YvGKp6_mmW7dLmoLMK5HgAPcs,14408
46
46
  fugue/extensions/__init__.py,sha256=y-uLKd6mZ8sZ_8-OdW6ELoBO_9IfC0gDmEbE_rMCvOA,599
@@ -87,8 +87,8 @@ fugue_contrib/viz/_ext.py,sha256=Lu_DlS5DcmrFz27fHcKTCkhKyknVWcfS5kzZVVuO9xM,134
87
87
  fugue_dask/__init__.py,sha256=2CcJ0AsN-k_f7dZ-yAyYpaICfUMPfH3l0FvUJSBzTr0,161
88
88
  fugue_dask/_constants.py,sha256=35UmTVITk21GhRyRlbJOwPPdQsytM_p_2NytOXEay18,510
89
89
  fugue_dask/_io.py,sha256=9G516yM6zQvSC5_JA6qHb3LwBDmhWcxK5sjFHrQ81zo,6012
90
- fugue_dask/_utils.py,sha256=uFoJAL95rmnBgieU2hPyqxFZGvR6ZJgPRMq5TAJqIBI,8520
91
- fugue_dask/dataframe.py,sha256=TdKjxhoQpsU5CvBTgO2c5Zo_4LfyelR0IK8bPgjAxcg,10218
90
+ fugue_dask/_utils.py,sha256=n70N3wPPMz13Jh0GWJM3Je-TCYpU36yGP_YCwIHqUrc,8908
91
+ fugue_dask/dataframe.py,sha256=MuG9TqCND7qI66lPvxzuomfE7yA4sW7DjrvbyvE6XEU,13471
92
92
  fugue_dask/execution_engine.py,sha256=XJp6wrdkaNh5pOpwt-Hjoa2sxgCOgusFRWrcqoCcaNM,21153
93
93
  fugue_dask/ibis_engine.py,sha256=kQdaG_KlZZ2AjtYETNCdTJOgtwI_eH0aGzLaAiIBbRI,2120
94
94
  fugue_dask/registry.py,sha256=7UTg_eie7zKlHYKMCyOo0TNn5y2TiIjE8kiS2PruHFc,2200
@@ -96,14 +96,14 @@ fugue_duckdb/__init__.py,sha256=nSNv-fxBAKD6W23EbMeV4dVRIaSTqr9DzQUWuVOES8s,379
96
96
  fugue_duckdb/_io.py,sha256=Sq228unVnroYTq4GX-Wnv22SLHC9Ji-aWgiqrfdu81w,8880
97
97
  fugue_duckdb/_utils.py,sha256=ElKbHUyn5fWSPGXsK57iqMzcqKtCf0c8pBVBYGe5Ql4,5020
98
98
  fugue_duckdb/dask.py,sha256=agoLzeB7Swxj2kVWfmXFbWD1NS2lbbTlnrjSkR8kKWY,5014
99
- fugue_duckdb/dataframe.py,sha256=vNZF2BC1sJpW3P5TVFTpU6C1Ddam81jPC_4i8kBuEpo,6512
99
+ fugue_duckdb/dataframe.py,sha256=LRfTv7Y46wMM_IDYSP1R-5OXuHuBg8GHjPGFFt8u7l0,8444
100
100
  fugue_duckdb/execution_engine.py,sha256=fkkQb4Eh0m7SwKrTplVk2oQalLkNoj3CW0R12g01ofk,20536
101
101
  fugue_duckdb/ibis_engine.py,sha256=MrypeABozqwetKOpqtrmWvCJX2QPfBXhbSEhvK9vqmI,1990
102
102
  fugue_duckdb/registry.py,sha256=Dj0Tng1cXVT6Q7t-KxOky2k1dD9xSBjYGQmI26UgZPo,3095
103
103
  fugue_ibis/__init__.py,sha256=PcUt66KlLyGGicad7asq5j2U567_fhR0HzvWQBhV1VM,362
104
104
  fugue_ibis/_compat.py,sha256=zKdTaTfuC02eUIzZPkcd7oObnVBi_X5mQjQf7SDme3Y,246
105
105
  fugue_ibis/_utils.py,sha256=BUL5swA5FE4eQu0t5Z17hZVu9a2MFfxlFH6Ymy9xifg,6607
106
- fugue_ibis/dataframe.py,sha256=olGfVYY9n5wwPOZojS30Fs3XEOMlenCzX8fuR2WPaq4,7295
106
+ fugue_ibis/dataframe.py,sha256=0Fb1vJjwEeffgoUCDfDGIMuSFaPgUJqcB-JqJOAALfs,7789
107
107
  fugue_ibis/execution_engine.py,sha256=p5zy0IBXiJgLi67RBHCRcHgZsaJMANdNSpUxz0k_6C0,18453
108
108
  fugue_ibis/extensions.py,sha256=H8l-SPfoqLuUoILtOuL2nccOpoL83zHeSoIhoqjtWQM,6905
109
109
  fugue_ibis/execution/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -117,11 +117,11 @@ fugue_notebook/nbextension/description.yaml,sha256=CsXgx9CSLbAlO4Z1kvX9ejYA_TImP
117
117
  fugue_notebook/nbextension/main.js,sha256=Px2tQuBCNGEZOEBKsnfVruFEg-AxK7Tj0dY84ktub_U,3709
118
118
  fugue_polars/__init__.py,sha256=NDkjlbLhHPTjUaCAw6mAwIqeK3HSeh-z88s9dqmwheQ,61
119
119
  fugue_polars/_utils.py,sha256=7rGGWgB1-VqFwh4PcBLYk_5VNjd8FNOS4TDFyDVz2sg,159
120
- fugue_polars/polars_dataframe.py,sha256=Ll4ZUuRhAETWtmSf87KsdUCqZPiexFqy4FiPkvWQkN0,7348
120
+ fugue_polars/polars_dataframe.py,sha256=8LQ0IB-JFFdjW2ltDzq8DfIbUC_jjjDr1YM29usJag0,8831
121
121
  fugue_polars/registry.py,sha256=gd6qQ-OxYtTAQFyvYbLDPXmSvCR-LW6n5K5ylgMY_7A,2950
122
122
  fugue_ray/__init__.py,sha256=HzEHfG2mpc0ugf3nf1Pdy15Bhg35K6maZpYejn1aoyI,119
123
123
  fugue_ray/_constants.py,sha256=vu5l1w-Wi-2V_nm0HLXKOYhh5HdWRCc5yQktO2XzhOg,569
124
- fugue_ray/dataframe.py,sha256=vyVShPnNtMef_KBsVP3iTHcssA_fm33-Y077c7S3J-A,10612
124
+ fugue_ray/dataframe.py,sha256=7asw2qf9vm6vLBSzqghm9pUcNAppJOz5CkT7XyR0S5g,12514
125
125
  fugue_ray/execution_engine.py,sha256=NT_mnacijp1zskFbtganUwA3JNRPU-FNNvJswA6U_Yg,12607
126
126
  fugue_ray/registry.py,sha256=xJRAhbwNrg695EwghQDnVtTKi4YkqZ0_61BD4OAblSA,1685
127
127
  fugue_ray/_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -130,7 +130,7 @@ fugue_ray/_utils/dataframe.py,sha256=_EadzS4rPom1A_cF0pqoPlwrNYZTfTwcyyu86_fFsqU
130
130
  fugue_ray/_utils/io.py,sha256=SFTU4qXubGEmO5IGZA5yHy8Hu4b9aFZ9-eTU4Qs-NsQ,8757
131
131
  fugue_spark/__init__.py,sha256=rvrMpFs9socMgyH_58gLbnAqmirBf5oidXoO4cekW6U,165
132
132
  fugue_spark/_constants.py,sha256=K2uLQfjvMxXk75K-7_Wn47Alpwq5rW57BtECAUrOeqA,177
133
- fugue_spark/dataframe.py,sha256=HJHMDVLaT-7QZ8mhMcvpLDRiKuFjtw4XtLm1N2QskKs,9704
133
+ fugue_spark/dataframe.py,sha256=lYa8FizM3p_lsKYFR49FazkVZMJKyi2LABKTpP5YBLo,12006
134
134
  fugue_spark/execution_engine.py,sha256=rqgY9U1bpjh0GFNyNkuPcI7iV0xeipadURhNIir4w08,33147
135
135
  fugue_spark/ibis_engine.py,sha256=Yl5xxwROo1idcD2hFaylaI1IpmBUgbvOZRWtcrE0Zjo,1697
136
136
  fugue_spark/registry.py,sha256=kyIMk6dAiKRSKCHawQKyXu9DhZ24T6j3gL57TiOAZ8c,4162
@@ -144,7 +144,7 @@ fugue_sql/exceptions.py,sha256=ltS0MC8gMnVVrJbQiOZ0kRUWvVQ2LTx33dCW3ugqtb0,260
144
144
  fugue_test/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
145
145
  fugue_test/bag_suite.py,sha256=WbDCFjuAHYoJh4GXSPiSJxOoOwE1VMtYpJ3lQrsUK-Y,2483
146
146
  fugue_test/builtin_suite.py,sha256=o8aMZTKa74nKBmcUTTBbliTJMtNbsXE9SPKZopS504o,78400
147
- fugue_test/dataframe_suite.py,sha256=mOr_x94H-Ylp0lJ-KBwHXJu-Q-qesqY3PzJxR9LI_Ko,18323
147
+ fugue_test/dataframe_suite.py,sha256=LgB931CkASbGOrRQ9j92DGk9wPb__FoNusOk-HeqU9E,19165
148
148
  fugue_test/execution_suite.py,sha256=FI6UmwBvdoT1jkJRBqJT_Q0IDehFryvv00UL6jjxyAk,47689
149
149
  fugue_test/ibis_suite.py,sha256=Dk4AHVD00RcFsNm9VvJ4_4LOyFdGX30OnAtpO2SPruE,3529
150
150
  fugue_test/plugins/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -155,9 +155,9 @@ fugue_test/plugins/duckdb/fixtures.py,sha256=UxQbIMRbSrTZ3pgCmKZgd5wd1YvnVrqLSUP
155
155
  fugue_test/plugins/ray/__init__.py,sha256=nyKGW6xgTXtMhSs7yjgFNKO7mVboCNg63Bvdf39fO_I,55
156
156
  fugue_test/plugins/ray/fixtures.py,sha256=hZkvuo0AcD63XJl5JUroc9tm2LWHUPszg2zzY6FCSao,141
157
157
  fugue_version/__init__.py,sha256=vTwvdJOZi8jZb9U-Em7-d50qNDNPS2z51IXqRoojeNM,22
158
- fugue-0.8.7.dev5.dist-info/LICENSE,sha256=z8d0m5b2O9McPEK1xHG_dWgUBT6EfBDz6wA0F7xSPTA,11358
159
- fugue-0.8.7.dev5.dist-info/METADATA,sha256=yQYxW_TTsinAtSyHUUfLVxrFlyc9x5FU-lfRH-77wfA,17860
160
- fugue-0.8.7.dev5.dist-info/WHEEL,sha256=yQN5g4mg4AybRjkgi-9yy4iQEFibGQmlz78Pik5Or-A,92
161
- fugue-0.8.7.dev5.dist-info/entry_points.txt,sha256=F4V76epxLiTYZgyacpmxJzNgfGqy2mUnIIG-PMlvBo8,536
162
- fugue-0.8.7.dev5.dist-info/top_level.txt,sha256=y1eCfzGdQ1_RkgcShcfbvXs-bopD3DwJcIOxP9EFXno,140
163
- fugue-0.8.7.dev5.dist-info/RECORD,,
158
+ fugue-0.8.7.dev6.dist-info/LICENSE,sha256=z8d0m5b2O9McPEK1xHG_dWgUBT6EfBDz6wA0F7xSPTA,11358
159
+ fugue-0.8.7.dev6.dist-info/METADATA,sha256=0i4ibczIy_wEMtZ6vFvaCw40x5KmuQa6OsuBVWUTQyk,17860
160
+ fugue-0.8.7.dev6.dist-info/WHEEL,sha256=yQN5g4mg4AybRjkgi-9yy4iQEFibGQmlz78Pik5Or-A,92
161
+ fugue-0.8.7.dev6.dist-info/entry_points.txt,sha256=N_BIIy3lSvF6Z32QE0yXTucgdHrPbUrOwH1zj7bZ0ow,536
162
+ fugue-0.8.7.dev6.dist-info/top_level.txt,sha256=y1eCfzGdQ1_RkgcShcfbvXs-bopD3DwJcIOxP9EFXno,140
163
+ fugue-0.8.7.dev6.dist-info/RECORD,,
@@ -7,7 +7,7 @@ ibis = fugue_ibis [ibis]
7
7
  polars = fugue_polars.registry [polars]
8
8
  ray = fugue_ray.registry [ray]
9
9
  spark = fugue_spark.registry [spark]
10
- spark_ibis = fugue_spark.ibis_engine [ibis,spark]
10
+ spark_ibis = fugue_spark.ibis_engine [spark,ibis]
11
11
 
12
12
  [pytest11]
13
13
  fugue_test_dask = fugue_test.plugins.dask [dask]
fugue_dask/_utils.py CHANGED
@@ -1,13 +1,14 @@
1
1
  import math
2
- from typing import Any, List, Optional, Tuple
2
+ from typing import Any, Callable, List, Optional, Tuple, TypeVar
3
3
 
4
4
  import dask.dataframe as dd
5
5
  import numpy as np
6
6
  import pandas as pd
7
7
  import pyarrow as pa
8
8
  from dask.dataframe.core import DataFrame
9
+ from dask.delayed import delayed
9
10
  from dask.distributed import Client, get_client
10
- from triad.utils.pandas_like import PandasLikeUtils, PD_UTILS
11
+ from triad.utils.pandas_like import PD_UTILS, PandasLikeUtils
11
12
  from triad.utils.pyarrow import to_pandas_dtype
12
13
 
13
14
  import fugue.api as fa
@@ -16,6 +17,7 @@ from fugue.constants import FUGUE_CONF_DEFAULT_PARTITIONS
16
17
  from ._constants import FUGUE_DASK_CONF_DEFAULT_PARTITIONS
17
18
 
18
19
  _FUGUE_DASK_TEMP_IDX_COLUMN = "_fugue_dask_temp_index"
20
+ T = TypeVar("T")
19
21
 
20
22
 
21
23
  def get_default_partitions() -> int:
@@ -28,6 +30,17 @@ def get_default_partitions() -> int:
28
30
  return n if n > 0 else fa.get_current_parallelism() * 2
29
31
 
30
32
 
33
+ def collect(df: dd.DataFrame, func: Callable[[pd.DataFrame], T]) -> Tuple[T]:
34
+ """Compute each partition in parallel and collect the results
35
+
36
+ :param df: dask dataframe
37
+ :return: the collected result
38
+ """
39
+ dfs = df.to_delayed()
40
+ objs = [delayed(func)(df) for df in dfs]
41
+ return dd.compute(*objs)
42
+
43
+
31
44
  def hash_repartition(df: dd.DataFrame, num: int, cols: List[Any]) -> dd.DataFrame:
32
45
  """Repartition the dataframe by hashing the given columns
33
46