fugue 0.8.7.dev7__py3-none-any.whl → 0.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. fugue/collections/sql.py +1 -1
  2. fugue/dataframe/utils.py +4 -18
  3. fugue/test/__init__.py +11 -0
  4. fugue/test/pandas_tester.py +24 -0
  5. fugue/test/plugins.py +393 -0
  6. {fugue-0.8.7.dev7.dist-info → fugue-0.9.0.dist-info}/METADATA +24 -15
  7. {fugue-0.8.7.dev7.dist-info → fugue-0.9.0.dist-info}/RECORD +38 -47
  8. {fugue-0.8.7.dev7.dist-info → fugue-0.9.0.dist-info}/WHEEL +1 -1
  9. fugue-0.9.0.dist-info/entry_points.txt +12 -0
  10. fugue_dask/_io.py +8 -5
  11. fugue_dask/_utils.py +4 -4
  12. fugue_dask/execution_engine.py +11 -0
  13. fugue_dask/registry.py +2 -0
  14. fugue_dask/tester.py +24 -0
  15. fugue_duckdb/__init__.py +0 -5
  16. fugue_duckdb/_io.py +1 -0
  17. fugue_duckdb/registry.py +30 -2
  18. fugue_duckdb/tester.py +49 -0
  19. fugue_ibis/__init__.py +0 -3
  20. fugue_ibis/dataframe.py +2 -2
  21. fugue_ibis/execution_engine.py +14 -7
  22. fugue_ray/_constants.py +3 -4
  23. fugue_ray/_utils/dataframe.py +10 -21
  24. fugue_ray/_utils/io.py +38 -9
  25. fugue_ray/execution_engine.py +1 -2
  26. fugue_ray/registry.py +1 -0
  27. fugue_ray/tester.py +22 -0
  28. fugue_spark/execution_engine.py +5 -5
  29. fugue_spark/registry.py +13 -1
  30. fugue_spark/tester.py +78 -0
  31. fugue_test/__init__.py +82 -0
  32. fugue_test/builtin_suite.py +26 -43
  33. fugue_test/dataframe_suite.py +5 -14
  34. fugue_test/execution_suite.py +170 -143
  35. fugue_test/fixtures.py +61 -0
  36. fugue_version/__init__.py +1 -1
  37. fugue-0.8.7.dev7.dist-info/entry_points.txt +0 -17
  38. fugue_dask/ibis_engine.py +0 -62
  39. fugue_duckdb/ibis_engine.py +0 -56
  40. fugue_ibis/execution/__init__.py +0 -0
  41. fugue_ibis/execution/ibis_engine.py +0 -49
  42. fugue_ibis/execution/pandas_backend.py +0 -54
  43. fugue_ibis/extensions.py +0 -203
  44. fugue_spark/ibis_engine.py +0 -45
  45. fugue_test/ibis_suite.py +0 -92
  46. fugue_test/plugins/__init__.py +0 -0
  47. fugue_test/plugins/dask/__init__.py +0 -2
  48. fugue_test/plugins/dask/fixtures.py +0 -12
  49. fugue_test/plugins/duckdb/__init__.py +0 -2
  50. fugue_test/plugins/duckdb/fixtures.py +0 -9
  51. fugue_test/plugins/misc/__init__.py +0 -2
  52. fugue_test/plugins/misc/fixtures.py +0 -18
  53. fugue_test/plugins/ray/__init__.py +0 -2
  54. fugue_test/plugins/ray/fixtures.py +0 -9
  55. {fugue-0.8.7.dev7.dist-info → fugue-0.9.0.dist-info}/LICENSE +0 -0
  56. {fugue-0.8.7.dev7.dist-info → fugue-0.9.0.dist-info}/top_level.txt +0 -0
fugue_ray/_utils/io.py CHANGED
@@ -4,13 +4,14 @@ from typing import Any, Callable, Dict, Iterable, List, Optional, Union
4
4
 
5
5
  import pyarrow as pa
6
6
  import ray.data as rd
7
+ from packaging import version
7
8
  from pyarrow import csv as pacsv
8
9
  from pyarrow import json as pajson
9
10
  from ray.data.datasource import FileExtensionFilter
10
11
  from triad.collections import Schema
11
12
  from triad.collections.dict import ParamDict
12
13
  from triad.utils.assertion import assert_or_throw
13
- from triad.utils.io import exists, makedirs, rm
14
+ from triad.utils.io import exists, makedirs, rm, isfile
14
15
 
15
16
  from fugue import ExecutionEngine
16
17
  from fugue._utils.io import FileParser, save_df
@@ -18,6 +19,8 @@ from fugue.collections.partition import PartitionSpec
18
19
  from fugue.dataframe import DataFrame
19
20
  from fugue_ray.dataframe import RayDataFrame
20
21
 
22
+ from .._constants import RAY_VERSION
23
+
21
24
 
22
25
  class RayIO(object):
23
26
  def __init__(self, engine: ExecutionEngine):
@@ -148,6 +151,18 @@ class RayIO(object):
148
151
  if infer_schema and columns is not None and not isinstance(columns, list):
149
152
  raise ValueError("can't set columns as a schema when infer schema is true")
150
153
 
154
+ if RAY_VERSION >= version.parse("2.10"):
155
+ if len(p) == 1 and isfile(p[0]): # TODO: very hacky
156
+ params: Dict[str, Any] = {}
157
+ else:
158
+ params = {"file_extensions": ["csv"]}
159
+ else: # pragma: no cover
160
+ params = {
161
+ "partition_filter": _FileFiler(
162
+ file_extensions=["csv"], exclude=["_SUCCESS"]
163
+ ),
164
+ }
165
+
151
166
  def _read_csv(to_str: bool) -> RayDataFrame:
152
167
  res = rd.read_csv(
153
168
  p,
@@ -155,9 +170,7 @@ class RayIO(object):
155
170
  read_options=pacsv.ReadOptions(**read_options),
156
171
  parse_options=pacsv.ParseOptions(**parse_options),
157
172
  convert_options=pacsv.ConvertOptions(**convert_options),
158
- partition_filter=_FileFiler(
159
- file_extensions=["csv"], exclude=["_SUCCESS"]
160
- ),
173
+ **params,
161
174
  )
162
175
  if to_str:
163
176
  _schema = res.schema(fetch_if_missing=True)
@@ -195,16 +208,32 @@ class RayIO(object):
195
208
  read_options: Dict[str, Any] = {"use_threads": False}
196
209
  parse_options: Dict[str, Any] = {}
197
210
 
198
- def _read_json() -> RayDataFrame:
211
+ def _read_json() -> RayDataFrame: # pragma: no cover
212
+ if RAY_VERSION >= version.parse("2.10"):
213
+ if len(p) == 1 and isfile(p[0]): # TODO: very hacky
214
+ params: Dict[str, Any] = {"file_extensions": None}
215
+ else:
216
+ params = {"file_extensions": ["json"]}
217
+ elif RAY_VERSION >= version.parse("2.9"): # pragma: no cover
218
+ params = {
219
+ "file_extensions": None,
220
+ "partition_filter": _FileFiler(
221
+ file_extensions=["json"], exclude=["_SUCCESS"]
222
+ ),
223
+ }
224
+ else: # pragma: no cover
225
+ params = {
226
+ "partition_filter": _FileFiler(
227
+ file_extensions=["json"], exclude=["_SUCCESS"]
228
+ ),
229
+ }
199
230
  return RayDataFrame(
200
231
  rd.read_json(
201
232
  p,
202
233
  ray_remote_args=self._remote_args(),
203
234
  read_options=pajson.ReadOptions(**read_options),
204
235
  parse_options=pajson.ParseOptions(**parse_options),
205
- partition_filter=_FileFiler(
206
- file_extensions=["json"], exclude=["_SUCCESS"]
207
- ),
236
+ **params,
208
237
  )
209
238
  )
210
239
 
@@ -221,7 +250,7 @@ class RayIO(object):
221
250
  return {"num_cpus": 1}
222
251
 
223
252
 
224
- class _FileFiler(FileExtensionFilter):
253
+ class _FileFiler(FileExtensionFilter): # pragma: no cover
225
254
  def __init__(self, file_extensions: Union[str, List[str]], exclude: Iterable[str]):
226
255
  super().__init__(file_extensions, allow_if_no_extension=True)
227
256
  self._exclude = set(exclude)
@@ -191,8 +191,7 @@ class RayMapEngine(MapEngine):
191
191
  mb_args["batch_size"] = self.conf.get_or_throw(
192
192
  FUGUE_RAY_DEFAULT_BATCH_SIZE, int
193
193
  )
194
- if ray.__version__ >= "2.3":
195
- mb_args["zero_copy_batch"] = self.conf.get(FUGUE_RAY_ZERO_COPY, True)
194
+ mb_args["zero_copy_batch"] = self.conf.get(FUGUE_RAY_ZERO_COPY, True)
196
195
  sdf = rdf.native.map_batches(
197
196
  _udf,
198
197
  batch_format="pyarrow",
fugue_ray/registry.py CHANGED
@@ -14,6 +14,7 @@ from fugue.plugins import as_fugue_dataset, infer_execution_engine
14
14
 
15
15
  from .dataframe import RayDataFrame
16
16
  from .execution_engine import RayExecutionEngine
17
+ from .tester import RayTestBackend # noqa: F401 # pylint: disable-all
17
18
 
18
19
 
19
20
  @infer_execution_engine.candidate(
fugue_ray/tester.py ADDED
@@ -0,0 +1,22 @@
1
+ from contextlib import contextmanager
2
+ from typing import Any, Dict, Iterator
3
+
4
+ import ray
5
+
6
+ import fugue.test as ft
7
+
8
+
9
+ @ft.fugue_test_backend
10
+ class RayTestBackend(ft.FugueTestBackend):
11
+ name = "ray"
12
+ default_session_conf = {"num_cpus": 2}
13
+ default_fugue_conf = {
14
+ "fugue.ray.zero_copy": True,
15
+ "fugue.ray.default.batch_size": 10000,
16
+ }
17
+
18
+ @classmethod
19
+ @contextmanager
20
+ def session_context(cls, session_conf: Dict[str, Any]) -> Iterator[Any]:
21
+ with ray.init(**session_conf):
22
+ yield "ray"
@@ -492,11 +492,6 @@ class SparkExecutionEngine(ExecutionEngine):
492
492
  res.reset_metadata(df.metadata)
493
493
  return res
494
494
 
495
- def register(self, df: DataFrame, name: str) -> SparkDataFrame:
496
- sdf = self._to_spark_df(df)
497
- sdf.native.createOrReplaceTempView(name)
498
- return sdf
499
-
500
495
  def join(
501
496
  self,
502
497
  df1: DataFrame,
@@ -679,6 +674,11 @@ class SparkExecutionEngine(ExecutionEngine):
679
674
 
680
675
  # If partition exists
681
676
  else:
677
+ if len(_presort.keys()) == 0 and n == 1:
678
+ return self._to_spark_df(
679
+ d.dropDuplicates(subset=partition_spec.partition_by), df.schema
680
+ )
681
+
682
682
  w = Window.partitionBy([col(x) for x in partition_spec.partition_by])
683
683
 
684
684
  if len(_presort.keys()) > 0:
fugue_spark/registry.py CHANGED
@@ -19,6 +19,13 @@ from fugue_spark.dataframe import SparkDataFrame
19
19
  from fugue_spark.execution_engine import SparkExecutionEngine
20
20
 
21
21
  from ._utils.misc import SparkConnectDataFrame, SparkConnectSession, is_spark_dataframe
22
+ from .tester import SparkTestBackend # noqa: F401 # pylint: disable-all
23
+
24
+ try:
25
+ from .tester import SparkConnectTestBackend # noqa: F401 # pylint: disable-all
26
+ except ImportError: # pragma: no cover
27
+ pass
28
+
22
29
 
23
30
  _is_sparksql = namespace_candidate("sparksql", lambda x: isinstance(x, str))
24
31
 
@@ -31,7 +38,12 @@ _is_sparksql = namespace_candidate("sparksql", lambda x: isinstance(x, str))
31
38
  )
32
39
  or any(_is_sparksql(obj) for obj in objs)
33
40
  )
34
- def _infer_spark_client(obj: Any) -> Any:
41
+ def _infer_spark_client(objs: Any) -> Any:
42
+ obj = objs[0]
43
+ if isinstance(obj, SparkDataFrame):
44
+ obj = obj.native
45
+ if hasattr(obj, "sparkSession"):
46
+ return obj.sparkSession
35
47
  return SparkSession.builder.getOrCreate()
36
48
 
37
49
 
fugue_spark/tester.py ADDED
@@ -0,0 +1,78 @@
1
+ from contextlib import contextmanager
2
+ from typing import Any, Dict, Iterator
3
+
4
+ from pyspark.sql import SparkSession
5
+
6
+ import fugue.test as ft
7
+
8
+ from ._utils.misc import SparkConnectSession
9
+
10
+
11
+ @ft.fugue_test_backend
12
+ class SparkTestBackend(ft.FugueTestBackend):
13
+ name = "spark"
14
+ default_session_conf = {
15
+ "spark.app.name": "fugue-test-spark",
16
+ "spark.master": "local[*]",
17
+ "spark.default.parallelism": 4,
18
+ "spark.dynamicAllocation.enabled": "false",
19
+ "spark.executor.cores": 4,
20
+ "spark.executor.instances": 1,
21
+ "spark.io.compression.codec": "lz4",
22
+ "spark.rdd.compress": "false",
23
+ "spark.sql.shuffle.partitions": 4,
24
+ "spark.shuffle.compress": "false",
25
+ "spark.sql.catalogImplementation": "in-memory",
26
+ "spark.sql.execution.arrow.pyspark.enabled": True,
27
+ "spark.sql.adaptive.enabled": False,
28
+ }
29
+
30
+ @classmethod
31
+ def transform_session_conf(cls, conf: Dict[str, Any]) -> Dict[str, Any]:
32
+ return ft.extract_conf(conf, "spark.", remove_prefix=False)
33
+
34
+ @classmethod
35
+ @contextmanager
36
+ def session_context(cls, session_conf: Dict[str, Any]) -> Iterator[Any]:
37
+ with _create_session(session_conf).getOrCreate() as spark:
38
+ yield spark
39
+
40
+
41
+ if SparkConnectSession is not None:
42
+
43
+ @ft.fugue_test_backend
44
+ class SparkConnectTestBackend(SparkTestBackend):
45
+ name = "sparkconnect"
46
+ default_session_conf = {
47
+ "spark.default.parallelism": 4,
48
+ "spark.sql.shuffle.partitions": 4,
49
+ "spark.sql.execution.arrow.pyspark.enabled": True,
50
+ "spark.sql.adaptive.enabled": False,
51
+ }
52
+
53
+ @classmethod
54
+ def transform_session_conf(
55
+ cls, conf: Dict[str, Any]
56
+ ) -> Dict[str, Any]: # pragma: no cover
57
+ # replace sparkconnect. with spark.
58
+ return {
59
+ "spark." + k: v
60
+ for k, v in ft.extract_conf(
61
+ conf, cls.name + ".", remove_prefix=True
62
+ ).items()
63
+ }
64
+
65
+ @classmethod
66
+ @contextmanager
67
+ def session_context(
68
+ cls, session_conf: Dict[str, Any]
69
+ ) -> Iterator[Any]: # pragma: no cover
70
+ spark = _create_session(session_conf).remote("sc://localhost").getOrCreate()
71
+ yield spark
72
+
73
+
74
+ def _create_session(conf: Dict[str, Any]) -> Any:
75
+ sb = SparkSession.builder
76
+ for k, v in conf.items():
77
+ sb = sb.config(k, v)
78
+ return sb
fugue_test/__init__.py CHANGED
@@ -0,0 +1,82 @@
1
+ from typing import Any, Dict, Tuple
2
+
3
+ import pyarrow as pa
4
+ import pytest
5
+ from triad.utils.pyarrow import to_pa_datatype
6
+
7
+ _FUGUE_TEST_CONF_NAME = "fugue_test_conf"
8
+
9
+
10
+ def pytest_addoption(parser: Any): # pragma: no cover
11
+ parser.addini(
12
+ _FUGUE_TEST_CONF_NAME,
13
+ help="Configs for fugue testing execution engines",
14
+ type="linelist",
15
+ )
16
+
17
+
18
+ def pytest_configure(config: Any):
19
+ from fugue.test.plugins import _set_global_conf
20
+
21
+ options = config.getini(_FUGUE_TEST_CONF_NAME)
22
+ conf: Dict[str, Any] = {}
23
+ if options:
24
+ for line in options:
25
+ line = line.strip()
26
+ if not line.startswith("#"):
27
+ k, v = _parse_line(line)
28
+ conf[k] = v
29
+ _set_global_conf(conf)
30
+
31
+
32
+ def pytest_report_header(config, start_path):
33
+ from fugue.test.plugins import _get_all_ini_conf
34
+
35
+ header_lines = []
36
+ header_lines.append("Fugue tests will be initialized with options:")
37
+ for k, v in _get_all_ini_conf().items():
38
+ header_lines.append(f"\t{k} = {v}")
39
+ return "\n".join(header_lines)
40
+
41
+
42
+ def _parse_line(line: str) -> Tuple[str, Any]:
43
+ try:
44
+ kv = line.split("=", 1)
45
+ if len(kv) == 1:
46
+ raise ValueError()
47
+ kt = kv[0].split(":", 1)
48
+ if len(kt) == 1:
49
+ tp = pa.string()
50
+ else:
51
+ tp = to_pa_datatype(kt[1].strip())
52
+ key = kt[0].strip()
53
+ if key == "":
54
+ raise ValueError()
55
+ value = pa.compute.cast([kv[1].strip()], tp).to_pylist()[0]
56
+ return key, value
57
+ except Exception:
58
+ raise ValueError(
59
+ f"Invalid config line: {line}, it must be in format: key[:type]=value"
60
+ )
61
+
62
+
63
+ @pytest.fixture(scope="class")
64
+ def backend_context(request: Any):
65
+ from fugue.test.plugins import _make_backend_context, _parse_backend
66
+
67
+ c, _ = _parse_backend(request.param)
68
+ session = request.getfixturevalue(c + "_session")
69
+ with _make_backend_context(request.param, session) as ctx:
70
+ yield ctx
71
+
72
+
73
+ @pytest.fixture(scope="class")
74
+ def _class_backend_context(request, backend_context):
75
+ from fugue.test.plugins import FugueTestContext
76
+
77
+ request.cls._test_context = FugueTestContext(
78
+ engine=backend_context.engine,
79
+ session=backend_context.session,
80
+ name=backend_context.name,
81
+ )
82
+ yield
@@ -10,9 +10,8 @@ import datetime
10
10
  import os
11
11
  import pickle
12
12
  from typing import Any, Callable, Dict, Iterable, Iterator, List, Optional
13
- from unittest import TestCase
14
13
  from uuid import uuid4
15
- from triad.utils.io import write_text, join
14
+
16
15
  import numpy as np
17
16
  import pandas as pd
18
17
  import pyarrow as pa
@@ -20,8 +19,10 @@ import pytest
20
19
  from fsspec.implementations.local import LocalFileSystem
21
20
  from pytest import raises
22
21
  from triad import SerializableRLock
22
+ from triad.utils.io import join, write_text
23
23
 
24
24
  import fugue.api as fa
25
+ import fugue.test as ft
25
26
  from fugue import (
26
27
  AnyDataFrame,
27
28
  ArrayDataFrame,
@@ -46,7 +47,6 @@ from fugue import (
46
47
  outputter,
47
48
  processor,
48
49
  register_creator,
49
- register_default_sql_engine,
50
50
  register_output_transformer,
51
51
  register_outputter,
52
52
  register_processor,
@@ -56,7 +56,6 @@ from fugue import (
56
56
  from fugue.column import col
57
57
  from fugue.column import functions as ff
58
58
  from fugue.column import lit
59
- from fugue.dataframe.utils import _df_eq as df_eq
60
59
  from fugue.exceptions import (
61
60
  FugueInterfacelessError,
62
61
  FugueWorkflowCompileError,
@@ -78,26 +77,10 @@ class BuiltInTests(object):
78
77
  add correspondent tests here
79
78
  """
80
79
 
81
- class Tests(TestCase):
82
- @classmethod
83
- def setUpClass(cls):
84
- register_default_sql_engine(lambda engine: engine.sql_engine)
85
- cls._engine = cls.make_engine(cls)
86
-
87
- @property
88
- def engine(self) -> ExecutionEngine:
89
- return self._engine # type: ignore
90
-
91
- @classmethod
92
- def tearDownClass(cls):
93
- cls._engine.stop()
94
-
95
- def make_engine(self) -> ExecutionEngine: # pragma: no cover
96
- raise NotImplementedError
97
-
80
+ class Tests(ft.FugueTestSuite):
98
81
  def test_workflows(self):
99
82
  a = FugueWorkflow().df([[0]], "a:int")
100
- df_eq(a.compute(self.engine), [[0]], "a:int")
83
+ self.df_eq(a.compute(self.engine), [[0]], "a:int")
101
84
 
102
85
  def test_create_show(self):
103
86
  with FugueWorkflow() as dag:
@@ -1706,7 +1689,7 @@ class BuiltInTests(object):
1706
1689
  """,
1707
1690
  x=sdf3,
1708
1691
  ).run()
1709
- df_eq(
1692
+ self.df_eq(
1710
1693
  res["res"],
1711
1694
  [[3, 4, 13]],
1712
1695
  schema="a:long,b:int,c:long",
@@ -1739,9 +1722,9 @@ class BuiltInTests(object):
1739
1722
  df1 = pd.DataFrame([[0, 1], [2, 3]], columns=["a b", " "])
1740
1723
  df2 = pd.DataFrame([[0, 10], [20, 3]], columns=["a b", "d"])
1741
1724
  r = fa.inner_join(df1, df2, as_fugue=True)
1742
- df_eq(r, [[0, 1, 10]], "`a b`:long,` `:long,d:long", throw=True)
1725
+ self.df_eq(r, [[0, 1, 10]], "`a b`:long,` `:long,d:long", throw=True)
1743
1726
  r = fa.transform(r, tr)
1744
- df_eq(
1727
+ self.df_eq(
1745
1728
  r,
1746
1729
  [[0, 1, 10, 2]],
1747
1730
  "`a b`:long,` `:long,d:long,`c *`:long",
@@ -1755,7 +1738,7 @@ class BuiltInTests(object):
1755
1738
  col("d"),
1756
1739
  col("c *").cast(int),
1757
1740
  )
1758
- df_eq(
1741
+ self.df_eq(
1759
1742
  r,
1760
1743
  [[0, 1, 10, 2]],
1761
1744
  "`a b `:long,`x y`:long,d:long,`c *`:long",
@@ -1764,13 +1747,13 @@ class BuiltInTests(object):
1764
1747
  r = fa.rename(r, {"a b ": "a b"})
1765
1748
  fa.save(r, f_csv, header=True, force_single=True)
1766
1749
  fa.save(r, f_parquet)
1767
- df_eq(
1750
+ self.df_eq(
1768
1751
  fa.load(f_parquet, columns=["x y", "d", "c *"], as_fugue=True),
1769
1752
  [[1, 10, 2]],
1770
1753
  "`x y`:long,d:long,`c *`:long",
1771
1754
  throw=True,
1772
1755
  )
1773
- df_eq(
1756
+ self.df_eq(
1774
1757
  fa.load(
1775
1758
  f_csv,
1776
1759
  header=True,
@@ -1782,7 +1765,7 @@ class BuiltInTests(object):
1782
1765
  "d:str,`c *`:str",
1783
1766
  throw=True,
1784
1767
  )
1785
- df_eq(
1768
+ self.df_eq(
1786
1769
  fa.load(
1787
1770
  f_csv,
1788
1771
  header=True,
@@ -1802,14 +1785,14 @@ class BuiltInTests(object):
1802
1785
  """,
1803
1786
  as_fugue=True,
1804
1787
  )
1805
- df_eq(r, [[0, 1, 10]], "`a b`:long,` `:long,d:long", throw=True)
1788
+ self.df_eq(r, [[0, 1, 10]], "`a b`:long,` `:long,d:long", throw=True)
1806
1789
  r = fa.fugue_sql(
1807
1790
  """
1808
1791
  TRANSFORM r USING tr SCHEMA *,`c *`:long
1809
1792
  """,
1810
1793
  as_fugue=True,
1811
1794
  )
1812
- df_eq(
1795
+ self.df_eq(
1813
1796
  r,
1814
1797
  [[0, 1, 10, 2]],
1815
1798
  "`a b`:long,` `:long,d:long,`c *`:long",
@@ -1821,7 +1804,7 @@ class BuiltInTests(object):
1821
1804
  """,
1822
1805
  as_fugue=True,
1823
1806
  )
1824
- df_eq(
1807
+ self.df_eq(
1825
1808
  r,
1826
1809
  [[0, 1, 10, 2]],
1827
1810
  "`a b`:long,` `:long,d:long,`c *`:long",
@@ -1842,19 +1825,19 @@ class BuiltInTests(object):
1842
1825
  f_parquet=f_parquet,
1843
1826
  f_csv=f_csv,
1844
1827
  ).run()
1845
- df_eq(
1828
+ self.df_eq(
1846
1829
  res["r1"],
1847
1830
  [[1, 10, 2]],
1848
1831
  "`x y`:long,d:long,`c *`:long",
1849
1832
  throw=True,
1850
1833
  )
1851
- df_eq(
1834
+ self.df_eq(
1852
1835
  res["r2"],
1853
1836
  [["1", "10", "2"]],
1854
1837
  "`x y`:str,d:str,`c *`:str",
1855
1838
  throw=True,
1856
1839
  )
1857
- df_eq(
1840
+ self.df_eq(
1858
1841
  res["r3"],
1859
1842
  [[0, 1, 10, 2]],
1860
1843
  "`a b`:long,`x y`:long,d:long,`c *`:long",
@@ -1875,13 +1858,13 @@ def mock_processor(df1: List[List[Any]], df2: List[List[Any]]) -> DataFrame:
1875
1858
 
1876
1859
 
1877
1860
  def mock_processor2(e: ExecutionEngine, dfs: DataFrames) -> DataFrame:
1878
- assert "test" in e.conf
1861
+ assert "fugue.test" in e.conf
1879
1862
  return ArrayDataFrame([[sum(s.count() for s in dfs.values())]], "a:int")
1880
1863
 
1881
1864
 
1882
1865
  class MockProcessor3(Processor):
1883
1866
  def process(self, dfs):
1884
- assert "test" in self.workflow_conf
1867
+ assert "fugue.test" in self.workflow_conf
1885
1868
  return ArrayDataFrame([[sum(s.count() for s in dfs.values())]], "a:int")
1886
1869
 
1887
1870
 
@@ -1915,11 +1898,11 @@ class MockOutputter4(Outputter):
1915
1898
 
1916
1899
  class MockTransform1(Transformer):
1917
1900
  def get_output_schema(self, df: DataFrame) -> Any:
1918
- assert "test" in self.workflow_conf
1901
+ assert "fugue.test" in self.workflow_conf
1919
1902
  return [df.schema, "ct:int,p:int"]
1920
1903
 
1921
1904
  def on_init(self, df: DataFrame) -> None:
1922
- assert "test" in self.workflow_conf
1905
+ assert "fugue.test" in self.workflow_conf
1923
1906
  self.pn = self.cursor.physical_partition_no
1924
1907
  self.ks = self.key_schema
1925
1908
  if "on_init_called" not in self.__dict__:
@@ -1929,7 +1912,7 @@ class MockTransform1(Transformer):
1929
1912
 
1930
1913
  def transform(self, df: LocalDataFrame) -> LocalDataFrame:
1931
1914
  assert 1 == self.on_init_called
1932
- assert "test" in self.workflow_conf
1915
+ assert "fugue.test" in self.workflow_conf
1933
1916
  pdf = df.as_pandas()
1934
1917
  pdf["p"] = self.params.get("p", 1)
1935
1918
  pdf["ct"] = pdf.shape[0]
@@ -1971,7 +1954,7 @@ def mock_tf3(df: Iterable[Dict[str, Any]]) -> Iterable[Dict[str, Any]]:
1971
1954
 
1972
1955
  class MockCoTransform1(CoTransformer):
1973
1956
  def get_output_schema(self, dfs: DataFrames) -> Any:
1974
- assert "test" in self.workflow_conf
1957
+ assert "fugue.test" in self.workflow_conf
1975
1958
  assert 2 == len(dfs)
1976
1959
  if self.params.get("named", False):
1977
1960
  assert dfs.has_key
@@ -1980,7 +1963,7 @@ class MockCoTransform1(CoTransformer):
1980
1963
  return [self.key_schema, "ct1:int,ct2:int,p:int"]
1981
1964
 
1982
1965
  def on_init(self, dfs: DataFrames) -> None:
1983
- assert "test" in self.workflow_conf
1966
+ assert "fugue.test" in self.workflow_conf
1984
1967
  assert 2 == len(dfs)
1985
1968
  if self.params.get("named", False):
1986
1969
  assert dfs.has_key
@@ -1995,7 +1978,7 @@ class MockCoTransform1(CoTransformer):
1995
1978
 
1996
1979
  def transform(self, dfs: DataFrames) -> LocalDataFrame:
1997
1980
  assert 1 == self.on_init_called
1998
- assert "test" in self.workflow_conf
1981
+ assert "fugue.test" in self.workflow_conf
1999
1982
  assert 2 == len(dfs)
2000
1983
  if self.params.get("named", False):
2001
1984
  assert dfs.has_key
@@ -2,15 +2,14 @@
2
2
 
3
3
  from datetime import date, datetime
4
4
  from typing import Any
5
- from unittest import TestCase
6
5
 
7
6
  import numpy as np
8
7
  import pandas as pd
9
8
  from pytest import raises
10
9
 
11
10
  import fugue.api as fi
11
+ import fugue.test as ft
12
12
  from fugue.dataframe import ArrowDataFrame, DataFrame
13
- from fugue.dataframe.utils import _df_eq as df_eq
14
13
  from fugue.exceptions import FugueDataFrameOperationError, FugueDatasetEmptyError
15
14
 
16
15
 
@@ -19,15 +18,7 @@ class DataFrameTests(object):
19
18
  All new DataFrame types should pass this test suite.
20
19
  """
21
20
 
22
- class Tests(TestCase):
23
- @classmethod
24
- def setUpClass(cls):
25
- pass
26
-
27
- @classmethod
28
- def tearDownClass(cls):
29
- pass
30
-
21
+ class Tests(ft.FugueTestSuite):
31
22
  def df(self, data: Any = None, schema: Any = None) -> Any: # pragma: no cover
32
23
  raise NotImplementedError
33
24
 
@@ -129,7 +120,7 @@ class DataFrameTests(object):
129
120
  assert [[1]] == fi.as_array(df, type_safe=True)
130
121
 
131
122
  df = self.df([["a", 1, 2]], "a:str,b:int,c:int")
132
- df_eq(
123
+ self.df_eq(
133
124
  fi.as_fugue_df(fi.select_columns(df, ["c", "a"])),
134
125
  [[2, "a"]],
135
126
  "a:str,c:int",
@@ -140,13 +131,13 @@ class DataFrameTests(object):
140
131
  df = self.df(data, "a:str,b:int")
141
132
  df2 = fi.rename(df, columns=dict(a="aa"))
142
133
  assert fi.get_schema(df) == "a:str,b:int"
143
- df_eq(fi.as_fugue_df(df2), data, "aa:str,b:int", throw=True)
134
+ self.df_eq(fi.as_fugue_df(df2), data, "aa:str,b:int", throw=True)
144
135
 
145
136
  for data in [[["a", 1]], []]:
146
137
  df = self.df(data, "a:str,b:int")
147
138
  df3 = fi.rename(df, columns={})
148
139
  assert fi.get_schema(df3) == "a:str,b:int"
149
- df_eq(fi.as_fugue_df(df3), data, "a:str,b:int", throw=True)
140
+ self.df_eq(fi.as_fugue_df(df3), data, "a:str,b:int", throw=True)
150
141
 
151
142
  def test_rename_invalid(self):
152
143
  df = self.df([["a", 1]], "a:str,b:int")