PyPI - fugue - Versions diffs - 0.9.0.dev2__tar.gz → 0.9.0.dev4__tar.gz - Mend

fugue 0.9.0.dev2tar.gz → 0.9.0.dev4tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (161) hide show

{fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: fugue
-Version: 0.9.0.dev2
+Version: 0.9.0.dev4
 Summary: An abstraction layer for distributed computation
 Home-page: http://github.com/fugue-project/fugue
 Author: The Fugue Development Team

{fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue/collections/sql.py RENAMED Viewed

@@ -15,7 +15,7 @@ class TempTableName:
     """Generating a temporary, random and globaly unique table name"""
     def __init__(self):
-        self.key = "_" + str(uuid4())[:5]
+        self.key = "_" + str(uuid4())[:5].upper()
     def __repr__(self) -> str:
         return _TEMP_TABLE_EXPR_PREFIX + self.key + _TEMP_TABLE_EXPR_SUFFIX

{fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue/dataframe/utils.py RENAMED Viewed

@@ -21,22 +21,6 @@ normalize_dataframe_column_names = normalize_column_names
 rename_dataframe_column_names = rename
-def _pa_type_eq(t1: pa.DataType, t2: pa.DataType) -> bool:
-    # should ignore the name difference of list
-    # e.g. list<item: string> == list<l: string>
-    if pa.types.is_list(t1) and pa.types.is_list(t2):  # pragma: no cover
-        return _pa_type_eq(t1.value_type, t2.value_type)
-    return t1 == t2
-def _schema_eq(s1: Schema, s2: Schema) -> bool:
-    if s1 == s2:
-        return True
-    return s1.names == s2.names and all(
-        _pa_type_eq(f1.type, f2.type) for f1, f2 in zip(s1.fields, s2.fields)
-    )
 def _df_eq(
     df: DataFrame,
     data: Any,
@@ -46,6 +30,7 @@ def _df_eq(
     check_schema: bool = True,
     check_content: bool = True,
     no_pandas: bool = False,
+    equal_type_groups: Optional[List[List[Any]]] = None,
     throw=False,
 ) -> bool:
     """Compare if two dataframes are equal. Is for internal, unit test
@@ -66,6 +51,7 @@ def _df_eq(
     :param no_pandas: if true, it will compare the string representations of the
       dataframes, otherwise, it will convert both to pandas dataframe to compare,
       defaults to False
+    :param equal_type_groups: the groups to treat as equal types, defaults to None.
     :param throw: if to throw error if not equal, defaults to False
     :return: if they equal
     """
@@ -78,8 +64,8 @@ def _df_eq(
         assert (
             df1.count() == df2.count()
         ), f"count mismatch {df1.count()}, {df2.count()}"
-        assert not check_schema or _schema_eq(
-            df.schema, df2.schema
+        assert not check_schema or df.schema.is_like(
+            df2.schema, equal_groups=equal_type_groups
         ), f"schema mismatch {df.schema.pa_schema}, {df2.schema.pa_schema}"
         if not check_content:
             return True

{fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue/test/plugins.py RENAMED Viewed

@@ -2,7 +2,7 @@ from contextlib import contextmanager
 from dataclasses import dataclass
 from pathlib import Path
 from typing import Any, Dict, Iterator, List, Optional, Tuple, Type
+from fugue.dataframe.utils import _df_eq
 from triad import assert_or_throw, run_once
 from triad.utils.entry_points import load_entry_point
@@ -160,6 +160,7 @@ class FugueTestSuite:
     backend: Any
     tmp_path: Path
+    equal_type_groups: Any = None
     __test__ = False
     _test_context: Any = None
@@ -180,6 +181,15 @@ class FugueTestSuite:
         """The engine object inside the ``FugueTestContext``"""
         return self.context.engine
+    def get_equal_type_groups(self) -> Optional[List[List[Any]]]:
+        return None  # pragma: no cover
+    def df_eq(self, *args: Any, **kwargs: Any) -> bool:
+        """A wrapper of :func:`~fugue.dataframe.utils.df_eq`"""
+        if "equal_type_groups" not in kwargs:
+            kwargs["equal_type_groups"] = self.equal_type_groups
+        return _df_eq(*args, **kwargs)
 def fugue_test_suite(backend: Any, mark_test: Optional[bool] = None) -> Any:
     def deco(cls: Type["FugueTestSuite"]) -> Type["FugueTestSuite"]:

{fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: fugue
-Version: 0.9.0.dev2
+Version: 0.9.0.dev4
 Summary: An abstraction layer for distributed computation
 Home-page: http://github.com/fugue-project/fugue
 Author: The Fugue Development Team

{fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue.egg-info/requires.txt RENAMED Viewed

@@ -1,4 +1,4 @@
-triad>=0.9.3
+triad>=0.9.6
 adagio>=0.2.4
 [all]
@@ -9,14 +9,14 @@ jinja2
 pyspark>=3.1.1
 dask[dataframe,distributed]>=2023.5.0
 dask-sql
-ray[data]>=2.4.0
+ray[data]>=2.5.0
 notebook
 jupyterlab
 ipython>=7.10.0
 duckdb>=0.5.0
 pyarrow>=6.0.1
-pandas>=2.0.2
-ibis-framework<6,>=3.2.0
+pandas<2.2,>=2.0.2
+ibis-framework
 polars
 [cpp_sql_parser]
@@ -27,6 +27,9 @@ dask[dataframe,distributed]>=2023.5.0
 pyarrow>=7.0.0
 pandas>=2.0.2
+[dask:python_version >= "3.11.9"]
+dask[dataframe,distributed]>=2024.4.0
 [duckdb]
 qpd>=0.4.4
 fugue-sql-antlr>=0.2.0
@@ -40,7 +43,8 @@ qpd>=0.4.4
 fugue-sql-antlr>=0.2.0
 sqlglot
 jinja2
-ibis-framework<6,>=3.2.0
+ibis-framework
+pandas<2.2
 [notebook]
 notebook
@@ -51,9 +55,10 @@ ipython>=7.10.0
 polars
 [ray]
-ray[data]>=2.4.0
+ray[data]>=2.5.0
 duckdb>=0.5.0
-pyarrow>=6.0.1
+pyarrow>=7.0.0
+pandas<2.2
 [spark]
 pyspark>=3.1.1

{fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue_dask/_io.py RENAMED Viewed

@@ -6,7 +6,7 @@ from fsspec import AbstractFileSystem
 from triad.collections.dict import ParamDict
 from triad.collections.schema import Schema
 from triad.utils.assertion import assert_or_throw
-from triad.utils.io import join, makedirs, url_to_fs
+from triad.utils.io import isfile, join, makedirs, url_to_fs
 from fugue._utils.io import FileParser, _get_single_files
 from fugue_dask.dataframe import DaskDataFrame
@@ -100,9 +100,11 @@ def _save_csv(df: DaskDataFrame, p: FileParser, **kwargs: Any) -> None:
 def _safe_load_csv(path: str, **kwargs: Any) -> dd.DataFrame:
+    if not isfile(path):
+        return dd.read_csv(join(path, "*.csv"), **kwargs)
     try:
         return dd.read_csv(path, **kwargs)
-    except (IsADirectoryError, PermissionError):
+    except (IsADirectoryError, PermissionError):  # pragma: no cover
         return dd.read_csv(join(path, "*.csv"), **kwargs)
@@ -148,11 +150,12 @@ def _save_json(df: DaskDataFrame, p: FileParser, **kwargs: Any) -> None:
 def _safe_load_json(path: str, **kwargs: Any) -> dd.DataFrame:
+    if not isfile(path):
+        return dd.read_json(join(path, "*.json"), **kwargs)
     try:
         return dd.read_json(path, **kwargs)
-    except (IsADirectoryError, PermissionError):
-        x = dd.read_json(join(path, "*.json"), **kwargs)
-        return x
+    except (IsADirectoryError, PermissionError):  # pragma: no cover
+        return dd.read_json(join(path, "*.json"), **kwargs)
 def _load_json(

{fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue_dask/_utils.py RENAMED Viewed

@@ -53,7 +53,7 @@ def hash_repartition(df: dd.DataFrame, num: int, cols: List[Any]) -> dd.DataFram
     if num < 1:
         return df
     if num == 1:
-        return df.repartition(1)
+        return df.repartition(npartitions=1)
     df = df.reset_index(drop=True).clear_divisions()
     idf, ct = _add_hash_index(df, num, cols)
     return _postprocess(idf, ct, num)
@@ -76,7 +76,7 @@ def even_repartition(df: dd.DataFrame, num: int, cols: List[Any]) -> dd.DataFram
         the number of partitions will be the number of groups.
     """
     if num == 1:
-        return df.repartition(1)
+        return df.repartition(npartitions=1)
     if len(cols) == 0 and num <= 0:
         return df
     df = df.reset_index(drop=True).clear_divisions()
@@ -111,7 +111,7 @@ def rand_repartition(
     if num < 1:
         return df
     if num == 1:
-        return df.repartition(1)
+        return df.repartition(npartitions=1)
     df = df.reset_index(drop=True).clear_divisions()
     if len(cols) == 0:
         idf, ct = _add_random_index(df, num=num, seed=seed)
@@ -124,7 +124,7 @@ def rand_repartition(
 def _postprocess(idf: dd.DataFrame, ct: int, num: int) -> dd.DataFrame:
     parts = min(ct, num)
     if parts <= 1:
-        return idf.repartition(1)
+        return idf.repartition(npartitions=1)
     divisions = list(np.arange(ct, step=math.ceil(ct / parts)))
     divisions.append(ct - 1)
     return idf.repartition(divisions=divisions, force=True)

{fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue_duckdb/_io.py RENAMED Viewed

@@ -140,6 +140,7 @@ class DuckDBIO:
         else:
             if header:
                 kw["ALL_VARCHAR"] = 1
+                kw["auto_detect"] = 1
                 if columns is None:
                     cols = "*"
                 elif isinstance(columns, list):

{fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue_ibis/execution_engine.py RENAMED Viewed

@@ -23,8 +23,8 @@ from ._compat import IbisTable
 from ._utils import to_ibis_schema
 from .dataframe import IbisDataFrame
-_JOIN_RIGHT_SUFFIX = "_ibis_y__"
-_GEN_TABLE_NAMES = (f"_fugue_temp_table_{i:d}" for i in itertools.count())
+_JOIN_RIGHT_SUFFIX = "_ibis_y__".upper()
+_GEN_TABLE_NAMES = (f"_fugue_temp_table_{i:d}".upper() for i in itertools.count())
 class IbisSQLEngine(SQLEngine):
@@ -92,11 +92,11 @@ class IbisSQLEngine(SQLEngine):
         _df2 = self.to_df(df2)
         key_schema, end_schema = get_join_schemas(_df1, _df2, how=how, on=on)
         on_fields = [_df1.native[k] == _df2.native[k] for k in key_schema]
-        if ibis.__version__ < "6":
+        if ibis.__version__ < "6":  # pragma: no cover
             suffixes: Dict[str, Any] = dict(suffixes=("", _JOIN_RIGHT_SUFFIX))
-        else:  # pragma: no cover
+        else:
             # breaking change in ibis 6.0
-            suffixes = dict(lname="", rname=_JOIN_RIGHT_SUFFIX)
+            suffixes = dict(lname="", rname="{name}" + _JOIN_RIGHT_SUFFIX)
         if how.lower() == "cross":
             tb = _df1.native.cross_join(_df2.native, **suffixes)
         elif how.lower() == "right_outer":
@@ -224,7 +224,7 @@ class IbisSQLEngine(SQLEngine):
             _presort = parse_presort_exp(presort)
         else:
             _presort = partition_spec.presort
-        tbn = "_temp"
+        tbn = "_TEMP"
         idf = self.to_df(df)
         if len(_presort) == 0:
@@ -233,9 +233,10 @@ class IbisSQLEngine(SQLEngine):
             pcols = ", ".join(
                 self.encode_column_name(x) for x in partition_spec.partition_by
             )
+            dummy_order_by = self._dummy_window_order_by()
             sql = (
                 f"SELECT * FROM ("
-                f"SELECT *, ROW_NUMBER() OVER (PARTITION BY {pcols}) "
+                f"SELECT *, ROW_NUMBER() OVER (PARTITION BY {pcols} {dummy_order_by}) "
                 f"AS __fugue_take_param FROM {tbn}"
                 f") WHERE __fugue_take_param<={n}"
             )
@@ -290,6 +291,12 @@ class IbisSQLEngine(SQLEngine):
     def load_table(self, table: str, **kwargs: Any) -> DataFrame:
         return self.to_df(self.backend.table(table))
+    def _dummy_window_order_by(self) -> str:
+        """Return a dummy window order by clause, this is required for
+        some SQL backends when there is no real order by clause in window
+        """
+        return ""
 class IbisMapEngine(MapEngine):
     """IbisExecutionEngine's MapEngine, it is a wrapper of the map engine

{fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue_ray/_constants.py RENAMED Viewed

@@ -1,6 +1,7 @@
 from typing import Any, Dict
 import ray
+from packaging import version
 FUGUE_RAY_CONF_SHUFFLE_PARTITIONS = "fugue.ray.shuffle.partitions"
 FUGUE_RAY_DEFAULT_PARTITIONS = "fugue.ray.default.partitions"
@@ -12,8 +13,6 @@ FUGUE_RAY_DEFAULT_CONF: Dict[str, Any] = {
     FUGUE_RAY_DEFAULT_PARTITIONS: 0,
     FUGUE_RAY_ZERO_COPY: True,
 }
+RAY_VERSION = version.parse(ray.__version__)
-if ray.__version__ >= "2.3":
-    _ZERO_COPY: Dict[str, Any] = {"zero_copy_batch": True}
-else:  # pragma: no cover
-    _ZERO_COPY = {}
+_ZERO_COPY: Dict[str, Any] = {"zero_copy_batch": True}

{fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue_ray/_utils/dataframe.py RENAMED Viewed

@@ -3,7 +3,6 @@ from typing import Any, Dict, List, Optional, Tuple
 import pandas as pd
 import pyarrow as pa
-import ray
 import ray.data as rd
 from triad import Schema
@@ -31,31 +30,21 @@ def get_dataset_format(df: rd.Dataset) -> Tuple[Optional[str], rd.Dataset]:
     df = materialize(df)
     if df.count() == 0:
         return None, df
-    if ray.__version__ < "2.5.0":  # pragma: no cover
-        if hasattr(df, "_dataset_format"):  # pragma: no cover
-            return df._dataset_format(), df  # ray<2.2
-        ctx = rd.context.DatasetContext.get_current()
-        ctx.use_streaming_executor = False
-        return df.dataset_format(), df  # ray>=2.2
-    else:
-        schema = df.schema(fetch_if_missing=True)
-        if schema is None:  # pragma: no cover
-            return None, df
-        if isinstance(schema.base_schema, pa.Schema):
-            return "arrow", df
-        return "pandas", df
+    schema = df.schema(fetch_if_missing=True)
+    if schema is None:  # pragma: no cover
+        return None, df
+    if isinstance(schema.base_schema, pa.Schema):
+        return "arrow", df
+    return "pandas", df
 def to_schema(schema: Any) -> Schema:  # pragma: no cover
     if isinstance(schema, pa.Schema):
         return Schema(schema)
-    if ray.__version__ >= "2.5.0":
-        if isinstance(schema, rd.Schema):
-            if hasattr(schema, "base_schema") and isinstance(
-                schema.base_schema, pa.Schema
-            ):
-                return Schema(schema.base_schema)
-            return Schema(list(zip(schema.names, schema.types)))
+    if isinstance(schema, rd.Schema):
+        if hasattr(schema, "base_schema") and isinstance(schema.base_schema, pa.Schema):
+            return Schema(schema.base_schema)
+        return Schema(list(zip(schema.names, schema.types)))
     raise ValueError(f"{schema} is not supported")

{fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue_ray/_utils/io.py RENAMED Viewed

@@ -3,15 +3,15 @@ import pathlib
 from typing import Any, Callable, Dict, Iterable, List, Optional, Union
 import pyarrow as pa
-import ray
 import ray.data as rd
+from packaging import version
 from pyarrow import csv as pacsv
 from pyarrow import json as pajson
 from ray.data.datasource import FileExtensionFilter
 from triad.collections import Schema
 from triad.collections.dict import ParamDict
 from triad.utils.assertion import assert_or_throw
-from triad.utils.io import exists, makedirs, rm
+from triad.utils.io import exists, makedirs, rm, isfile
 from fugue import ExecutionEngine
 from fugue._utils.io import FileParser, save_df
@@ -19,6 +19,8 @@ from fugue.collections.partition import PartitionSpec
 from fugue.dataframe import DataFrame
 from fugue_ray.dataframe import RayDataFrame
+from .._constants import RAY_VERSION
 class RayIO(object):
     def __init__(self, engine: ExecutionEngine):
@@ -149,6 +151,18 @@ class RayIO(object):
         if infer_schema and columns is not None and not isinstance(columns, list):
             raise ValueError("can't set columns as a schema when infer schema is true")
+        if RAY_VERSION >= version.parse("2.10"):
+            if len(p) == 1 and isfile(p[0]):  # TODO: very hacky
+                params: Dict[str, Any] = {}
+            else:
+                params = {"file_extensions": ["csv"]}
+        else:  # pragma: no cover
+            params = {
+                "partition_filter": _FileFiler(
+                    file_extensions=["csv"], exclude=["_SUCCESS"]
+                ),
+            }
         def _read_csv(to_str: bool) -> RayDataFrame:
             res = rd.read_csv(
                 p,
@@ -156,9 +170,7 @@ class RayIO(object):
                 read_options=pacsv.ReadOptions(**read_options),
                 parse_options=pacsv.ParseOptions(**parse_options),
                 convert_options=pacsv.ConvertOptions(**convert_options),
-                partition_filter=_FileFiler(
-                    file_extensions=["csv"], exclude=["_SUCCESS"]
-                ),
+                **params,
             )
             if to_str:
                 _schema = res.schema(fetch_if_missing=True)
@@ -196,20 +208,31 @@ class RayIO(object):
         read_options: Dict[str, Any] = {"use_threads": False}
         parse_options: Dict[str, Any] = {}
-        def _read_json() -> RayDataFrame:
-            if ray.__version__ >= "2.9":
-                params: Dict[str, Any] = {"file_extensions": None}
+        def _read_json() -> RayDataFrame:  # pragma: no cover
+            if RAY_VERSION >= version.parse("2.10"):
+                if len(p) == 1 and isfile(p[0]):  # TODO: very hacky
+                    params: Dict[str, Any] = {"file_extensions": None}
+                else:
+                    params = {"file_extensions": ["json"]}
+            elif RAY_VERSION >= version.parse("2.9"):  # pragma: no cover
+                params = {
+                    "file_extensions": None,
+                    "partition_filter": _FileFiler(
+                        file_extensions=["json"], exclude=["_SUCCESS"]
+                    ),
+                }
             else:  # pragma: no cover
-                params = {}
+                params = {
+                    "partition_filter": _FileFiler(
+                        file_extensions=["json"], exclude=["_SUCCESS"]
+                    ),
+                }
             return RayDataFrame(
                 rd.read_json(
                     p,
                     ray_remote_args=self._remote_args(),
                     read_options=pajson.ReadOptions(**read_options),
                     parse_options=pajson.ParseOptions(**parse_options),
-                    partition_filter=_FileFiler(
-                        file_extensions=["json"], exclude=["_SUCCESS"]
-                    ),
                     **params,
                 )
             )
@@ -227,7 +250,7 @@ class RayIO(object):
         return {"num_cpus": 1}
-class _FileFiler(FileExtensionFilter):
+class _FileFiler(FileExtensionFilter):  # pragma: no cover
     def __init__(self, file_extensions: Union[str, List[str]], exclude: Iterable[str]):
         super().__init__(file_extensions, allow_if_no_extension=True)
         self._exclude = set(exclude)

{fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue_ray/execution_engine.py RENAMED Viewed

@@ -191,8 +191,7 @@ class RayMapEngine(MapEngine):
             mb_args["batch_size"] = self.conf.get_or_throw(
                 FUGUE_RAY_DEFAULT_BATCH_SIZE, int
             )
-        if ray.__version__ >= "2.3":
-            mb_args["zero_copy_batch"] = self.conf.get(FUGUE_RAY_ZERO_COPY, True)
+        mb_args["zero_copy_batch"] = self.conf.get(FUGUE_RAY_ZERO_COPY, True)
         sdf = rdf.native.map_batches(
             _udf,
             batch_format="pyarrow",

{fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue_test/builtin_suite.py RENAMED Viewed

@@ -56,7 +56,6 @@ from fugue import (
 from fugue.column import col
 from fugue.column import functions as ff
 from fugue.column import lit
-from fugue.dataframe.utils import _df_eq as df_eq
 from fugue.exceptions import (
     FugueInterfacelessError,
     FugueWorkflowCompileError,
@@ -81,7 +80,7 @@ class BuiltInTests(object):
     class Tests(ft.FugueTestSuite):
         def test_workflows(self):
             a = FugueWorkflow().df([[0]], "a:int")
-            df_eq(a.compute(self.engine), [[0]], "a:int")
+            self.df_eq(a.compute(self.engine), [[0]], "a:int")
         def test_create_show(self):
             with FugueWorkflow() as dag:
@@ -1690,7 +1689,7 @@ class BuiltInTests(object):
                 """,
                     x=sdf3,
                 ).run()
-                df_eq(
+                self.df_eq(
                     res["res"],
                     [[3, 4, 13]],
                     schema="a:long,b:int,c:long",
@@ -1723,9 +1722,9 @@ class BuiltInTests(object):
                 df1 = pd.DataFrame([[0, 1], [2, 3]], columns=["a b", " "])
                 df2 = pd.DataFrame([[0, 10], [20, 3]], columns=["a b", "d"])
                 r = fa.inner_join(df1, df2, as_fugue=True)
-                df_eq(r, [[0, 1, 10]], "`a b`:long,` `:long,d:long", throw=True)
+                self.df_eq(r, [[0, 1, 10]], "`a b`:long,` `:long,d:long", throw=True)
                 r = fa.transform(r, tr)
-                df_eq(
+                self.df_eq(
                     r,
                     [[0, 1, 10, 2]],
                     "`a b`:long,` `:long,d:long,`c *`:long",
@@ -1739,7 +1738,7 @@ class BuiltInTests(object):
                     col("d"),
                     col("c *").cast(int),
                 )
-                df_eq(
+                self.df_eq(
                     r,
                     [[0, 1, 10, 2]],
                     "`a b `:long,`x y`:long,d:long,`c *`:long",
@@ -1748,13 +1747,13 @@ class BuiltInTests(object):
                 r = fa.rename(r, {"a b ": "a b"})
                 fa.save(r, f_csv, header=True, force_single=True)
                 fa.save(r, f_parquet)
-                df_eq(
+                self.df_eq(
                     fa.load(f_parquet, columns=["x y", "d", "c *"], as_fugue=True),
                     [[1, 10, 2]],
                     "`x y`:long,d:long,`c *`:long",
                     throw=True,
                 )
-                df_eq(
+                self.df_eq(
                     fa.load(
                         f_csv,
                         header=True,
@@ -1766,7 +1765,7 @@ class BuiltInTests(object):
                     "d:str,`c *`:str",
                     throw=True,
                 )
-                df_eq(
+                self.df_eq(
                     fa.load(
                         f_csv,
                         header=True,
@@ -1786,14 +1785,14 @@ class BuiltInTests(object):
                 """,
                     as_fugue=True,
                 )
-                df_eq(r, [[0, 1, 10]], "`a b`:long,` `:long,d:long", throw=True)
+                self.df_eq(r, [[0, 1, 10]], "`a b`:long,` `:long,d:long", throw=True)
                 r = fa.fugue_sql(
                     """
                 TRANSFORM r USING tr SCHEMA *,`c *`:long
                 """,
                     as_fugue=True,
                 )
-                df_eq(
+                self.df_eq(
                     r,
                     [[0, 1, 10, 2]],
                     "`a b`:long,` `:long,d:long,`c *`:long",
@@ -1805,7 +1804,7 @@ class BuiltInTests(object):
                 """,
                     as_fugue=True,
                 )
-                df_eq(
+                self.df_eq(
                     r,
                     [[0, 1, 10, 2]],
                     "`a b`:long,` `:long,d:long,`c *`:long",
@@ -1826,19 +1825,19 @@ class BuiltInTests(object):
                     f_parquet=f_parquet,
                     f_csv=f_csv,
                 ).run()
-                df_eq(
+                self.df_eq(
                     res["r1"],
                     [[1, 10, 2]],
                     "`x y`:long,d:long,`c *`:long",
                     throw=True,
                 )
-                df_eq(
+                self.df_eq(
                     res["r2"],
                     [["1", "10", "2"]],
                     "`x y`:str,d:str,`c *`:str",
                     throw=True,
                 )
-                df_eq(
+                self.df_eq(
                     res["r3"],
                     [[0, 1, 10, 2]],
                     "`a b`:long,`x y`:long,d:long,`c *`:long",

{fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue_test/dataframe_suite.py RENAMED Viewed

@@ -10,7 +10,6 @@ from pytest import raises
 import fugue.api as fi
 import fugue.test as ft
 from fugue.dataframe import ArrowDataFrame, DataFrame
-from fugue.dataframe.utils import _df_eq as df_eq
 from fugue.exceptions import FugueDataFrameOperationError, FugueDatasetEmptyError
@@ -121,7 +120,7 @@ class DataFrameTests(object):
             assert [[1]] == fi.as_array(df, type_safe=True)
             df = self.df([["a", 1, 2]], "a:str,b:int,c:int")
-            df_eq(
+            self.df_eq(
                 fi.as_fugue_df(fi.select_columns(df, ["c", "a"])),
                 [[2, "a"]],
                 "a:str,c:int",
@@ -132,13 +131,13 @@ class DataFrameTests(object):
                 df = self.df(data, "a:str,b:int")
                 df2 = fi.rename(df, columns=dict(a="aa"))
                 assert fi.get_schema(df) == "a:str,b:int"
-                df_eq(fi.as_fugue_df(df2), data, "aa:str,b:int", throw=True)
+                self.df_eq(fi.as_fugue_df(df2), data, "aa:str,b:int", throw=True)
             for data in [[["a", 1]], []]:
                 df = self.df(data, "a:str,b:int")
                 df3 = fi.rename(df, columns={})
                 assert fi.get_schema(df3) == "a:str,b:int"
-                df_eq(fi.as_fugue_df(df3), data, "a:str,b:int", throw=True)
+                self.df_eq(fi.as_fugue_df(df3), data, "a:str,b:int", throw=True)
         def test_rename_invalid(self):
             df = self.df([["a", 1]], "a:str,b:int")

fugue 0.9.0.dev2__tar.gz → 0.9.0.dev4__tar.gz

fugue 0.9.0.dev2tar.gz → 0.9.0.dev4tar.gz