fugue 0.8.2.dev1__py3-none-any.whl → 0.8.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (114) hide show
  1. fugue/__init__.py +9 -5
  2. fugue/_utils/interfaceless.py +1 -558
  3. fugue/_utils/io.py +2 -91
  4. fugue/_utils/registry.py +3 -2
  5. fugue/api.py +1 -0
  6. fugue/bag/bag.py +8 -4
  7. fugue/collections/__init__.py +0 -7
  8. fugue/collections/partition.py +21 -9
  9. fugue/constants.py +3 -1
  10. fugue/dataframe/__init__.py +7 -8
  11. fugue/dataframe/arrow_dataframe.py +1 -2
  12. fugue/dataframe/dataframe.py +17 -18
  13. fugue/dataframe/dataframe_iterable_dataframe.py +22 -6
  14. fugue/dataframe/function_wrapper.py +432 -0
  15. fugue/dataframe/iterable_dataframe.py +3 -0
  16. fugue/dataframe/utils.py +11 -79
  17. fugue/dataset/api.py +0 -4
  18. fugue/dev.py +47 -0
  19. fugue/execution/__init__.py +1 -5
  20. fugue/execution/api.py +36 -14
  21. fugue/execution/execution_engine.py +30 -4
  22. fugue/execution/factory.py +0 -6
  23. fugue/execution/native_execution_engine.py +44 -67
  24. fugue/extensions/_builtins/creators.py +4 -2
  25. fugue/extensions/_builtins/outputters.py +4 -3
  26. fugue/extensions/_builtins/processors.py +3 -3
  27. fugue/extensions/creator/convert.py +5 -2
  28. fugue/extensions/outputter/convert.py +2 -2
  29. fugue/extensions/processor/convert.py +3 -2
  30. fugue/extensions/transformer/convert.py +22 -9
  31. fugue/extensions/transformer/transformer.py +15 -1
  32. fugue/plugins.py +2 -0
  33. fugue/registry.py +0 -39
  34. fugue/sql/_utils.py +1 -1
  35. fugue/workflow/_checkpoint.py +1 -1
  36. fugue/workflow/api.py +13 -13
  37. fugue/workflow/module.py +30 -37
  38. fugue/workflow/workflow.py +6 -0
  39. {fugue-0.8.2.dev1.dist-info → fugue-0.8.4.dist-info}/METADATA +37 -23
  40. {fugue-0.8.2.dev1.dist-info → fugue-0.8.4.dist-info}/RECORD +112 -101
  41. {fugue-0.8.2.dev1.dist-info → fugue-0.8.4.dist-info}/WHEEL +1 -1
  42. {fugue-0.8.2.dev1.dist-info → fugue-0.8.4.dist-info}/entry_points.txt +2 -1
  43. {fugue-0.8.2.dev1.dist-info → fugue-0.8.4.dist-info}/top_level.txt +1 -0
  44. fugue_contrib/contrib.py +1 -0
  45. fugue_contrib/viz/_ext.py +7 -1
  46. fugue_dask/_io.py +0 -13
  47. fugue_dask/_utils.py +10 -4
  48. fugue_dask/dataframe.py +1 -2
  49. fugue_dask/execution_engine.py +45 -18
  50. fugue_dask/registry.py +8 -33
  51. fugue_duckdb/_io.py +8 -2
  52. fugue_duckdb/_utils.py +7 -2
  53. fugue_duckdb/dask.py +1 -1
  54. fugue_duckdb/dataframe.py +23 -19
  55. fugue_duckdb/execution_engine.py +19 -22
  56. fugue_duckdb/registry.py +11 -34
  57. fugue_ibis/dataframe.py +6 -10
  58. fugue_ibis/execution_engine.py +7 -1
  59. fugue_notebook/env.py +5 -10
  60. fugue_polars/__init__.py +2 -0
  61. fugue_polars/_utils.py +8 -0
  62. fugue_polars/polars_dataframe.py +234 -0
  63. fugue_polars/registry.py +86 -0
  64. fugue_ray/_constants.py +10 -1
  65. fugue_ray/_utils/dataframe.py +36 -9
  66. fugue_ray/_utils/io.py +2 -4
  67. fugue_ray/dataframe.py +16 -12
  68. fugue_ray/execution_engine.py +53 -32
  69. fugue_ray/registry.py +8 -32
  70. fugue_spark/_utils/convert.py +22 -11
  71. fugue_spark/_utils/io.py +0 -13
  72. fugue_spark/_utils/misc.py +27 -0
  73. fugue_spark/_utils/partition.py +11 -18
  74. fugue_spark/dataframe.py +26 -22
  75. fugue_spark/execution_engine.py +136 -54
  76. fugue_spark/registry.py +29 -78
  77. fugue_test/builtin_suite.py +36 -14
  78. fugue_test/dataframe_suite.py +9 -5
  79. fugue_test/execution_suite.py +100 -122
  80. fugue_version/__init__.py +1 -1
  81. tests/fugue/bag/test_array_bag.py +0 -9
  82. tests/fugue/collections/test_partition.py +10 -3
  83. tests/fugue/dataframe/test_function_wrapper.py +293 -0
  84. tests/fugue/dataframe/test_utils.py +2 -34
  85. tests/fugue/execution/test_factory.py +7 -9
  86. tests/fugue/execution/test_naive_execution_engine.py +35 -80
  87. tests/fugue/extensions/test_utils.py +12 -7
  88. tests/fugue/extensions/transformer/test_convert_cotransformer.py +1 -0
  89. tests/fugue/extensions/transformer/test_convert_output_cotransformer.py +1 -0
  90. tests/fugue/extensions/transformer/test_convert_transformer.py +2 -0
  91. tests/fugue/sql/test_workflow.py +1 -1
  92. tests/fugue/sql/test_workflow_parse.py +3 -5
  93. tests/fugue/utils/test_interfaceless.py +1 -325
  94. tests/fugue/utils/test_io.py +0 -80
  95. tests/fugue_dask/test_execution_engine.py +48 -0
  96. tests/fugue_dask/test_io.py +0 -55
  97. tests/fugue_duckdb/test_dataframe.py +2 -2
  98. tests/fugue_duckdb/test_execution_engine.py +16 -1
  99. tests/fugue_duckdb/test_utils.py +1 -1
  100. tests/fugue_ibis/test_dataframe.py +6 -3
  101. tests/fugue_polars/__init__.py +0 -0
  102. tests/fugue_polars/test_api.py +13 -0
  103. tests/fugue_polars/test_dataframe.py +82 -0
  104. tests/fugue_polars/test_transform.py +100 -0
  105. tests/fugue_ray/test_execution_engine.py +40 -4
  106. tests/fugue_spark/test_dataframe.py +0 -8
  107. tests/fugue_spark/test_execution_engine.py +50 -11
  108. tests/fugue_spark/test_importless.py +4 -4
  109. tests/fugue_spark/test_spark_connect.py +82 -0
  110. tests/fugue_spark/utils/test_convert.py +6 -8
  111. tests/fugue_spark/utils/test_io.py +0 -17
  112. fugue/_utils/register.py +0 -3
  113. fugue_test/_utils.py +0 -13
  114. {fugue-0.8.2.dev1.dist-info → fugue-0.8.4.dist-info}/LICENSE +0 -0
fugue/_utils/io.py CHANGED
@@ -5,13 +5,13 @@ from urllib.parse import urlparse
5
5
 
6
6
  import fs as pfs
7
7
  import pandas as pd
8
- from fs.errors import FileExpected
9
- from fugue.dataframe import LocalBoundedDataFrame, LocalDataFrame, PandasDataFrame
10
8
  from triad.collections.dict import ParamDict
11
9
  from triad.collections.fs import FileSystem
12
10
  from triad.collections.schema import Schema
13
11
  from triad.utils.assertion import assert_or_throw
14
12
 
13
+ from fugue.dataframe import LocalBoundedDataFrame, LocalDataFrame, PandasDataFrame
14
+
15
15
 
16
16
  class FileParser(object):
17
17
  def __init__(self, path: str, format_hint: Optional[str] = None):
@@ -271,111 +271,22 @@ def _load_json(
271
271
  return pdf[schema.names], schema
272
272
 
273
273
 
274
- def _save_avro(df: LocalDataFrame, p: FileParser, **kwargs: Any):
275
- """Save pandas dataframe as avro.
276
- If providing your own schema, the usage of schema argument is preferred
277
-
278
- :param schema: Avro Schema determines dtypes saved
279
- """
280
- import pandavro as pdx
281
-
282
- kw = ParamDict(kwargs)
283
-
284
- # pandavro defaults
285
- schema = None
286
- append = False
287
- times_as_micros = True
288
-
289
- if "schema" in kw:
290
- schema = kw["schema"]
291
- del kw["schema"]
292
-
293
- if "append" in kw:
294
- append = kw["append"] # default is overwrite (False) instead of append (True)
295
- del kw["append"]
296
-
297
- if "times_as_micros" in kw:
298
- times_as_micros = kw["times_as_micros"]
299
- del kw["times_as_micros"]
300
-
301
- pdf = df.as_pandas()
302
- pdx.to_avro(
303
- p.uri, pdf, schema=schema, append=append, times_as_micros=times_as_micros, **kw
304
- )
305
-
306
-
307
- def _load_avro(
308
- p: FileParser, columns: Any = None, **kwargs: Any
309
- ) -> Tuple[pd.DataFrame, Any]:
310
- path = p.uri
311
- try:
312
- pdf = _load_single_avro(path, **kwargs)
313
- except (IsADirectoryError, PermissionError, FileExpected):
314
- fs = FileSystem()
315
- pdf = pd.concat(
316
- [
317
- _load_single_avro(
318
- pfs.path.combine(path, pfs.path.basename(x.path)), **kwargs
319
- )
320
- for x in fs.opendir(path).glob("*.avro")
321
- ]
322
- )
323
-
324
- if columns is None:
325
- return pdf, None
326
- if isinstance(columns, list): # column names
327
- return pdf[columns], None
328
-
329
- schema = Schema(columns)
330
-
331
- # Return created DataFrame
332
- return pdf[schema.names], schema
333
-
334
-
335
- def _load_single_avro(path: str, **kwargs: Any) -> pd.DataFrame:
336
- from fastavro import reader
337
-
338
- kw = ParamDict(kwargs)
339
- process_record = None
340
- if "process_record" in kw:
341
- process_record = kw["process_record"]
342
- del kw["process_record"]
343
-
344
- fs = FileSystem()
345
- with fs.openbin(path) as fp:
346
- # Configure Avro reader
347
- avro_reader = reader(fp)
348
- # Load records in memory
349
- if process_record:
350
- records = [process_record(r) for r in avro_reader]
351
-
352
- else:
353
- records = list(avro_reader)
354
-
355
- # Populate pandas.DataFrame with records
356
- return pd.DataFrame.from_records(records)
357
-
358
-
359
274
  _FORMAT_MAP: Dict[str, str] = {
360
275
  ".csv": "csv",
361
276
  ".csv.gz": "csv",
362
277
  ".parquet": "parquet",
363
278
  ".json": "json",
364
279
  ".json.gz": "json",
365
- ".avro": "avro",
366
- ".avro.gz": "avro",
367
280
  }
368
281
 
369
282
  _FORMAT_LOAD: Dict[str, Callable[..., Tuple[pd.DataFrame, Any]]] = {
370
283
  "csv": _load_csv,
371
284
  "parquet": _load_parquet,
372
285
  "json": _load_json,
373
- "avro": _load_avro,
374
286
  }
375
287
 
376
288
  _FORMAT_SAVE: Dict[str, Callable] = {
377
289
  "csv": _save_csv,
378
290
  "parquet": _save_parquet,
379
291
  "json": _save_json,
380
- "avro": _save_avro,
381
292
  }
fugue/_utils/registry.py CHANGED
@@ -1,9 +1,10 @@
1
1
  from typing import Callable
2
+
2
3
  from triad import conditional_dispatcher
3
4
  from triad.utils.dispatcher import ConditionalDispatcher
4
5
 
5
- _FUGUE_ENTRYPOINT = "fugue.plugins"
6
+ from ..constants import FUGUE_ENTRYPOINT
6
7
 
7
8
 
8
9
  def fugue_plugin(func: Callable) -> ConditionalDispatcher:
9
- return conditional_dispatcher(entry_point=_FUGUE_ENTRYPOINT)(func) # type: ignore
10
+ return conditional_dispatcher(entry_point=FUGUE_ENTRYPOINT)(func) # type: ignore
fugue/api.py CHANGED
@@ -34,6 +34,7 @@ from .dataset.api import (
34
34
  from .execution.api import (
35
35
  aggregate,
36
36
  anti_join,
37
+ as_fugue_engine_df,
37
38
  assign,
38
39
  broadcast,
39
40
  clear_global_engine,
fugue/bag/bag.py CHANGED
@@ -9,9 +9,13 @@ class Bag(Dataset):
9
9
  unordered objects.
10
10
  """
11
11
 
12
- @abstractmethod
13
12
  def as_local(self) -> "LocalBag": # pragma: no cover
14
13
  """Convert this bag to a :class:`.LocalBag`"""
14
+ return self.as_local_bounded()
15
+
16
+ @abstractmethod
17
+ def as_local_bounded(self) -> "LocalBoundedBag": # pragma: no cover
18
+ """Convert this bag to a :class:`.LocalBoundedBag`"""
15
19
  raise NotImplementedError
16
20
 
17
21
  @abstractmethod
@@ -50,9 +54,6 @@ class LocalBag(Bag):
50
54
  def is_local(self) -> bool:
51
55
  return True
52
56
 
53
- def as_local(self) -> "LocalBag":
54
- return self
55
-
56
57
  @property
57
58
  def num_partitions(self) -> int:
58
59
  return 1
@@ -63,6 +64,9 @@ class LocalBoundedBag(LocalBag):
63
64
  def is_bounded(self) -> bool:
64
65
  return True
65
66
 
67
+ def as_local_bounded(self) -> "LocalBoundedBag":
68
+ return self
69
+
66
70
 
67
71
  class BagDisplay(DatasetDisplay):
68
72
  """:class:`~.Bag` plain display class"""
@@ -1,7 +0,0 @@
1
- # flake8: noqa
2
- from fugue.collections.partition import (
3
- BagPartitionCursor,
4
- PartitionCursor,
5
- PartitionSpec,
6
- )
7
- from fugue.collections.yielded import Yielded, PhysicalYielded
@@ -98,7 +98,7 @@ class PartitionSpec(object):
98
98
 
99
99
  Partition consists for these specs:
100
100
 
101
- * **algo**: can be one of ``hash`` (default), ``rand`` and ``even``
101
+ * **algo**: can be one of ``hash`` (default), ``rand``, ``even`` or ``coarse``
102
102
  * **num** or **num_partitions**: number of physical partitions, it can be an
103
103
  expression or integer numbers, e.g ``(ROWCOUNT+4) / 3``
104
104
  * **by** or **partition_by**: keys to partition on
@@ -208,7 +208,9 @@ class PartitionSpec(object):
208
208
 
209
209
  @property
210
210
  def algo(self) -> str:
211
- """Get algo of the spec, one of ``hash`` (default), ``rand`` and ``even``"""
211
+ """Get algo of the spec, one of ``hash`` (default),
212
+ ``rand`` ``even`` or ``coarse``
213
+ """
212
214
  return self._algo if self._algo != "" else "hash"
213
215
 
214
216
  @property
@@ -258,11 +260,14 @@ class PartitionSpec(object):
258
260
  """Get deterministic unique id of this object"""
259
261
  return to_uuid(self.jsondict)
260
262
 
261
- def get_sorts(self, schema: Schema) -> IndexedOrderedDict[str, bool]:
263
+ def get_sorts(
264
+ self, schema: Schema, with_partition_keys: bool = True
265
+ ) -> IndexedOrderedDict[str, bool]:
262
266
  """Get keys for sorting in a partition, it's the combination of partition
263
267
  keys plus the presort keys
264
268
 
265
269
  :param schema: the dataframe schema this partition spec to operate on
270
+ :param with_partition_keys: whether to include partition keys
266
271
  :return: an ordered dictionary of key, order pairs
267
272
 
268
273
  .. admonition:: Examples
@@ -272,9 +277,10 @@ class PartitionSpec(object):
272
277
  >>> assert p.get_sorts(schema) == {"a":True, "b":True, "c": False}
273
278
  """
274
279
  d: IndexedOrderedDict[str, bool] = IndexedOrderedDict()
275
- for p in self.partition_by:
276
- aot(p in schema, lambda: KeyError(f"{p} not in {schema}"))
277
- d[p] = True
280
+ if with_partition_keys:
281
+ for p in self.partition_by:
282
+ aot(p in schema, lambda: KeyError(f"{p} not in {schema}"))
283
+ d[p] = True
278
284
  for p, v in self.presort.items():
279
285
  aot(p in schema, lambda: KeyError(f"{p} not in {schema}"))
280
286
  d[p] = v
@@ -348,7 +354,7 @@ class DatasetPartitionCursor:
348
354
  """reset the cursor to a row (which should be the first row of a
349
355
  new logical partition)
350
356
 
351
- :param item: an item of the dataset
357
+ :param item: an item of the dataset, or an function generating the item
352
358
  :param partition_no: logical partition number
353
359
  :param slice_no: slice number inside the logical partition (to be deprecated)
354
360
  """
@@ -359,6 +365,8 @@ class DatasetPartitionCursor:
359
365
  @property
360
366
  def item(self) -> Any:
361
367
  """Get current item"""
368
+ if callable(self._item):
369
+ self._item = self._item()
362
370
  return self._item
363
371
 
364
372
  @property
@@ -417,11 +425,15 @@ class PartitionCursor(DatasetPartitionCursor):
417
425
  """reset the cursor to a row (which should be the first row of a
418
426
  new logical partition)
419
427
 
420
- :param row: list-like row data
428
+ :param row: list-like row data or a function generating a list-like row
421
429
  :param partition_no: logical partition number
422
430
  :param slice_no: slice number inside the logical partition (to be deprecated)
423
431
  """
424
- super().set(list(row), partition_no=partition_no, slice_no=slice_no)
432
+ super().set(
433
+ list(row) if not callable(row) else lambda: list(row()),
434
+ partition_no=partition_no,
435
+ slice_no=slice_no,
436
+ )
425
437
 
426
438
  @property
427
439
  def row(self) -> List[Any]:
fugue/constants.py CHANGED
@@ -2,7 +2,9 @@ from typing import Any, Dict
2
2
  from triad import ParamDict
3
3
 
4
4
  KEYWORD_ROWCOUNT = "ROWCOUNT"
5
- KEYWORD_CORECOUNT = "CORECOUNT"
5
+ KEYWORD_PARALLELISM = "CONCURRENCY"
6
+
7
+ FUGUE_ENTRYPOINT = "fugue.plugins"
6
8
 
7
9
  FUGUE_SQL_DEFAULT_DIALECT = "spark"
8
10
 
@@ -9,14 +9,13 @@ from .dataframe import (
9
9
  LocalDataFrame,
10
10
  YieldedDataFrame,
11
11
  )
12
- from .dataframe_iterable_dataframe import LocalDataFrameIterableDataFrame
12
+ from .dataframe_iterable_dataframe import (
13
+ IterableArrowDataFrame,
14
+ IterablePandasDataFrame,
15
+ LocalDataFrameIterableDataFrame,
16
+ )
13
17
  from .dataframes import DataFrames
18
+ from .function_wrapper import DataFrameFunctionWrapper, fugue_annotated_param
14
19
  from .iterable_dataframe import IterableDataFrame
15
20
  from .pandas_dataframe import PandasDataFrame
16
- from .utils import (
17
- get_column_names,
18
- normalize_dataframe_column_names,
19
- rename,
20
- to_local_bounded_df,
21
- to_local_df,
22
- )
21
+ from .utils import get_column_names, normalize_dataframe_column_names, rename
@@ -49,7 +49,6 @@ class ArrowDataFrame(LocalBoundedDataFrame):
49
49
  self,
50
50
  df: Any = None,
51
51
  schema: Any = None,
52
- pandas_df_wrapper: bool = False,
53
52
  ):
54
53
  if df is None:
55
54
  schema = _input_schema(schema).assert_not_empty()
@@ -142,7 +141,7 @@ class ArrowDataFrame(LocalBoundedDataFrame):
142
141
  return self.native.shape[0]
143
142
 
144
143
  def as_pandas(self) -> pd.DataFrame:
145
- return self.native.to_pandas()
144
+ return self.native.to_pandas(use_threads=False, date_as_object=False)
146
145
 
147
146
  def head(
148
147
  self, n: int, columns: Optional[List[str]] = None
@@ -85,9 +85,13 @@ class DataFrame(Dataset):
85
85
  """
86
86
  raise NotImplementedError
87
87
 
88
- @abstractmethod
89
88
  def as_local(self) -> "LocalDataFrame": # pragma: no cover
90
89
  """Convert this dataframe to a :class:`.LocalDataFrame`"""
90
+ return self.as_local_bounded()
91
+
92
+ @abstractmethod
93
+ def as_local_bounded(self) -> "LocalBoundedDataFrame": # pragma: no cover
94
+ """Convert this dataframe to a :class:`.LocalBoundedDataFrame`"""
91
95
  raise NotImplementedError
92
96
 
93
97
  @abstractmethod
@@ -317,10 +321,6 @@ class LocalDataFrame(DataFrame):
317
321
  """Always True because it's a LocalDataFrame"""
318
322
  return True
319
323
 
320
- def as_local(self) -> "LocalDataFrame":
321
- """Always return self, because it's a LocalDataFrame"""
322
- return self
323
-
324
324
  @property
325
325
  def num_partitions(self) -> int: # pragma: no cover
326
326
  """Always 1 because it's a LocalDataFrame"""
@@ -346,6 +346,10 @@ class LocalBoundedDataFrame(LocalDataFrame):
346
346
  """Always True because it's a bounded dataframe"""
347
347
  return True
348
348
 
349
+ def as_local_bounded(self) -> "LocalBoundedDataFrame":
350
+ """Always True because it's a bounded dataframe"""
351
+ return self
352
+
349
353
 
350
354
  class LocalUnboundedDataFrame(LocalDataFrame):
351
355
  """Base class of all local unbounded dataframes. Read
@@ -367,6 +371,9 @@ class LocalUnboundedDataFrame(LocalDataFrame):
367
371
  """Always False because it's an unbounded dataframe"""
368
372
  return False
369
373
 
374
+ def as_local(self) -> "LocalDataFrame":
375
+ return self
376
+
370
377
  def count(self) -> int:
371
378
  """
372
379
  :raises InvalidOperationError: You can't count an unbounded dataframe
@@ -458,22 +465,14 @@ def _get_dataframe_display(ds: DataFrame):
458
465
  return DataFrameDisplay(ds)
459
466
 
460
467
 
461
- @as_local.candidate(lambda df: isinstance(df, DataFrame) and not df.is_local)
462
- def _df_to_local(df: DataFrame) -> DataFrame:
468
+ @as_local.candidate(lambda df: isinstance(df, DataFrame))
469
+ def _df_to_local(df: DataFrame) -> LocalDataFrame:
463
470
  return df.as_local()
464
471
 
465
472
 
466
- @as_local_bounded.candidate(
467
- lambda df: isinstance(df, DataFrame) and not (df.is_local and df.is_bounded),
468
- priority=0.9,
469
- )
470
- def _df_to_local_bounded(df: DataFrame) -> DataFrame:
471
- res: DataFrame = df.as_local()
472
- if not res.is_bounded:
473
- res = as_fugue_df(res.as_array(), schema=df.schema)
474
- if res is not df and df.has_metadata:
475
- res.reset_metadata(df.metadata)
476
- return res
473
+ @as_local_bounded.candidate(lambda df: isinstance(df, DataFrame))
474
+ def _df_to_local_bounded(df: DataFrame) -> LocalBoundedDataFrame:
475
+ return df.as_local_bounded()
477
476
 
478
477
 
479
478
  def _get_schema_change(
@@ -2,16 +2,20 @@ from typing import Any, Dict, Iterable, List, Optional
2
2
 
3
3
  import pandas as pd
4
4
  import pyarrow as pa
5
- from fugue.dataframe.array_dataframe import ArrayDataFrame
6
- from fugue.dataframe.dataframe import (
5
+ from triad import Schema, assert_or_throw
6
+ from triad.utils.iter import EmptyAwareIterable, make_empty_aware
7
+
8
+ from fugue.exceptions import FugueDataFrameInitError
9
+
10
+ from .array_dataframe import ArrayDataFrame
11
+ from .arrow_dataframe import ArrowDataFrame
12
+ from .dataframe import (
7
13
  DataFrame,
14
+ LocalBoundedDataFrame,
8
15
  LocalDataFrame,
9
16
  LocalUnboundedDataFrame,
10
- LocalBoundedDataFrame,
11
17
  )
12
- from fugue.exceptions import FugueDataFrameInitError
13
- from triad import Schema, assert_or_throw
14
- from triad.utils.iter import EmptyAwareIterable, make_empty_aware
18
+ from .pandas_dataframe import PandasDataFrame
15
19
 
16
20
 
17
21
  class LocalDataFrameIterableDataFrame(LocalUnboundedDataFrame):
@@ -142,6 +146,9 @@ class LocalDataFrameIterableDataFrame(LocalUnboundedDataFrame):
142
146
 
143
147
  return LocalDataFrameIterableDataFrame(_transform())
144
148
 
149
+ def as_local_bounded(self) -> "LocalBoundedDataFrame":
150
+ return ArrowDataFrame(self.as_arrow())
151
+
145
152
  def as_array(
146
153
  self, columns: Optional[List[str]] = None, type_safe: bool = False
147
154
  ) -> List[Any]:
@@ -190,3 +197,12 @@ class LocalDataFrameIterableDataFrame(LocalUnboundedDataFrame):
190
197
  yield df._drop_cols(cols)
191
198
 
192
199
  return LocalDataFrameIterableDataFrame(_transform())
200
+
201
+
202
+ class IterablePandasDataFrame(LocalDataFrameIterableDataFrame):
203
+ def as_local_bounded(self) -> "LocalBoundedDataFrame":
204
+ return PandasDataFrame(self.as_pandas(), schema=self.schema)
205
+
206
+
207
+ class IterableArrowDataFrame(LocalDataFrameIterableDataFrame):
208
+ pass