fugue 0.9.0.dev2__tar.gz → 0.9.0.dev4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/PKG-INFO +1 -1
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue/collections/sql.py +1 -1
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue/dataframe/utils.py +4 -18
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue/test/plugins.py +11 -1
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue.egg-info/PKG-INFO +1 -1
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue.egg-info/requires.txt +12 -7
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue_dask/_io.py +8 -5
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue_dask/_utils.py +4 -4
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue_duckdb/_io.py +1 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue_ibis/execution_engine.py +14 -7
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue_ray/_constants.py +3 -4
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue_ray/_utils/dataframe.py +10 -21
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue_ray/_utils/io.py +36 -13
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue_ray/execution_engine.py +1 -2
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue_test/builtin_suite.py +14 -15
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue_test/dataframe_suite.py +3 -4
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue_test/execution_suite.py +130 -123
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/setup.cfg +1 -1
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/setup.py +12 -6
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/LICENSE +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/README.md +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue/__init__.py +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue/_utils/__init__.py +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue/_utils/display.py +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue/_utils/exception.py +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue/_utils/interfaceless.py +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue/_utils/io.py +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue/_utils/misc.py +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue/_utils/registry.py +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue/api.py +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue/bag/__init__.py +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue/bag/array_bag.py +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue/bag/bag.py +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue/collections/__init__.py +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue/collections/partition.py +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue/collections/yielded.py +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue/column/__init__.py +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue/column/expressions.py +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue/column/functions.py +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue/column/sql.py +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue/constants.py +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue/dataframe/__init__.py +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue/dataframe/api.py +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue/dataframe/array_dataframe.py +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue/dataframe/arrow_dataframe.py +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue/dataframe/dataframe.py +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue/dataframe/dataframe_iterable_dataframe.py +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue/dataframe/dataframes.py +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue/dataframe/function_wrapper.py +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue/dataframe/iterable_dataframe.py +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue/dataframe/pandas_dataframe.py +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue/dataset/__init__.py +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue/dataset/api.py +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue/dataset/dataset.py +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue/dev.py +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue/exceptions.py +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue/execution/__init__.py +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue/execution/api.py +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue/execution/execution_engine.py +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue/execution/factory.py +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue/execution/native_execution_engine.py +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue/extensions/__init__.py +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue/extensions/_builtins/__init__.py +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue/extensions/_builtins/creators.py +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue/extensions/_builtins/outputters.py +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue/extensions/_builtins/processors.py +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue/extensions/_utils.py +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue/extensions/context.py +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue/extensions/creator/__init__.py +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue/extensions/creator/convert.py +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue/extensions/creator/creator.py +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue/extensions/outputter/__init__.py +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue/extensions/outputter/convert.py +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue/extensions/outputter/outputter.py +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue/extensions/processor/__init__.py +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue/extensions/processor/convert.py +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue/extensions/processor/processor.py +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue/extensions/transformer/__init__.py +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue/extensions/transformer/constants.py +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue/extensions/transformer/convert.py +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue/extensions/transformer/transformer.py +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue/plugins.py +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue/py.typed +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue/registry.py +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue/rpc/__init__.py +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue/rpc/base.py +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue/rpc/flask.py +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue/sql/__init__.py +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue/sql/_utils.py +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue/sql/_visitors.py +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue/sql/api.py +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue/sql/workflow.py +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue/test/__init__.py +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue/test/pandas_tester.py +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue/workflow/__init__.py +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue/workflow/_checkpoint.py +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue/workflow/_tasks.py +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue/workflow/_workflow_context.py +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue/workflow/api.py +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue/workflow/input.py +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue/workflow/module.py +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue/workflow/workflow.py +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue.egg-info/SOURCES.txt +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue.egg-info/dependency_links.txt +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue.egg-info/entry_points.txt +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue.egg-info/top_level.txt +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue_contrib/__init__.py +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue_contrib/contrib.py +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue_contrib/seaborn/__init__.py +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue_contrib/viz/__init__.py +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue_contrib/viz/_ext.py +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue_dask/__init__.py +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue_dask/_constants.py +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue_dask/dataframe.py +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue_dask/execution_engine.py +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue_dask/registry.py +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue_dask/tester.py +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue_duckdb/__init__.py +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue_duckdb/_utils.py +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue_duckdb/dask.py +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue_duckdb/dataframe.py +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue_duckdb/execution_engine.py +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue_duckdb/registry.py +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue_duckdb/tester.py +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue_ibis/__init__.py +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue_ibis/_compat.py +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue_ibis/_utils.py +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue_ibis/dataframe.py +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue_notebook/__init__.py +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue_notebook/env.py +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue_notebook/nbextension/README.md +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue_notebook/nbextension/__init__.py +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue_notebook/nbextension/description.yaml +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue_notebook/nbextension/main.js +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue_polars/__init__.py +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue_polars/_utils.py +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue_polars/polars_dataframe.py +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue_polars/registry.py +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue_ray/__init__.py +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue_ray/_utils/__init__.py +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue_ray/_utils/cluster.py +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue_ray/dataframe.py +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue_ray/registry.py +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue_ray/tester.py +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue_spark/__init__.py +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue_spark/_constants.py +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue_spark/_utils/__init__.py +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue_spark/_utils/convert.py +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue_spark/_utils/io.py +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue_spark/_utils/misc.py +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue_spark/_utils/partition.py +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue_spark/dataframe.py +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue_spark/execution_engine.py +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue_spark/registry.py +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue_spark/tester.py +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue_sql/__init__.py +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue_sql/exceptions.py +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue_test/__init__.py +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue_test/bag_suite.py +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue_test/fixtures.py +0 -0
- {fugue-0.9.0.dev2 → fugue-0.9.0.dev4}/fugue_version/__init__.py +0 -0
|
@@ -15,7 +15,7 @@ class TempTableName:
|
|
|
15
15
|
"""Generating a temporary, random and globaly unique table name"""
|
|
16
16
|
|
|
17
17
|
def __init__(self):
|
|
18
|
-
self.key = "_" + str(uuid4())[:5]
|
|
18
|
+
self.key = "_" + str(uuid4())[:5].upper()
|
|
19
19
|
|
|
20
20
|
def __repr__(self) -> str:
|
|
21
21
|
return _TEMP_TABLE_EXPR_PREFIX + self.key + _TEMP_TABLE_EXPR_SUFFIX
|
|
@@ -21,22 +21,6 @@ normalize_dataframe_column_names = normalize_column_names
|
|
|
21
21
|
rename_dataframe_column_names = rename
|
|
22
22
|
|
|
23
23
|
|
|
24
|
-
def _pa_type_eq(t1: pa.DataType, t2: pa.DataType) -> bool:
|
|
25
|
-
# should ignore the name difference of list
|
|
26
|
-
# e.g. list<item: string> == list<l: string>
|
|
27
|
-
if pa.types.is_list(t1) and pa.types.is_list(t2): # pragma: no cover
|
|
28
|
-
return _pa_type_eq(t1.value_type, t2.value_type)
|
|
29
|
-
return t1 == t2
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
def _schema_eq(s1: Schema, s2: Schema) -> bool:
|
|
33
|
-
if s1 == s2:
|
|
34
|
-
return True
|
|
35
|
-
return s1.names == s2.names and all(
|
|
36
|
-
_pa_type_eq(f1.type, f2.type) for f1, f2 in zip(s1.fields, s2.fields)
|
|
37
|
-
)
|
|
38
|
-
|
|
39
|
-
|
|
40
24
|
def _df_eq(
|
|
41
25
|
df: DataFrame,
|
|
42
26
|
data: Any,
|
|
@@ -46,6 +30,7 @@ def _df_eq(
|
|
|
46
30
|
check_schema: bool = True,
|
|
47
31
|
check_content: bool = True,
|
|
48
32
|
no_pandas: bool = False,
|
|
33
|
+
equal_type_groups: Optional[List[List[Any]]] = None,
|
|
49
34
|
throw=False,
|
|
50
35
|
) -> bool:
|
|
51
36
|
"""Compare if two dataframes are equal. Is for internal, unit test
|
|
@@ -66,6 +51,7 @@ def _df_eq(
|
|
|
66
51
|
:param no_pandas: if true, it will compare the string representations of the
|
|
67
52
|
dataframes, otherwise, it will convert both to pandas dataframe to compare,
|
|
68
53
|
defaults to False
|
|
54
|
+
:param equal_type_groups: the groups to treat as equal types, defaults to None.
|
|
69
55
|
:param throw: if to throw error if not equal, defaults to False
|
|
70
56
|
:return: if they equal
|
|
71
57
|
"""
|
|
@@ -78,8 +64,8 @@ def _df_eq(
|
|
|
78
64
|
assert (
|
|
79
65
|
df1.count() == df2.count()
|
|
80
66
|
), f"count mismatch {df1.count()}, {df2.count()}"
|
|
81
|
-
assert not check_schema or
|
|
82
|
-
|
|
67
|
+
assert not check_schema or df.schema.is_like(
|
|
68
|
+
df2.schema, equal_groups=equal_type_groups
|
|
83
69
|
), f"schema mismatch {df.schema.pa_schema}, {df2.schema.pa_schema}"
|
|
84
70
|
if not check_content:
|
|
85
71
|
return True
|
|
@@ -2,7 +2,7 @@ from contextlib import contextmanager
|
|
|
2
2
|
from dataclasses import dataclass
|
|
3
3
|
from pathlib import Path
|
|
4
4
|
from typing import Any, Dict, Iterator, List, Optional, Tuple, Type
|
|
5
|
-
|
|
5
|
+
from fugue.dataframe.utils import _df_eq
|
|
6
6
|
from triad import assert_or_throw, run_once
|
|
7
7
|
from triad.utils.entry_points import load_entry_point
|
|
8
8
|
|
|
@@ -160,6 +160,7 @@ class FugueTestSuite:
|
|
|
160
160
|
|
|
161
161
|
backend: Any
|
|
162
162
|
tmp_path: Path
|
|
163
|
+
equal_type_groups: Any = None
|
|
163
164
|
|
|
164
165
|
__test__ = False
|
|
165
166
|
_test_context: Any = None
|
|
@@ -180,6 +181,15 @@ class FugueTestSuite:
|
|
|
180
181
|
"""The engine object inside the ``FugueTestContext``"""
|
|
181
182
|
return self.context.engine
|
|
182
183
|
|
|
184
|
+
def get_equal_type_groups(self) -> Optional[List[List[Any]]]:
|
|
185
|
+
return None # pragma: no cover
|
|
186
|
+
|
|
187
|
+
def df_eq(self, *args: Any, **kwargs: Any) -> bool:
|
|
188
|
+
"""A wrapper of :func:`~fugue.dataframe.utils.df_eq`"""
|
|
189
|
+
if "equal_type_groups" not in kwargs:
|
|
190
|
+
kwargs["equal_type_groups"] = self.equal_type_groups
|
|
191
|
+
return _df_eq(*args, **kwargs)
|
|
192
|
+
|
|
183
193
|
|
|
184
194
|
def fugue_test_suite(backend: Any, mark_test: Optional[bool] = None) -> Any:
|
|
185
195
|
def deco(cls: Type["FugueTestSuite"]) -> Type["FugueTestSuite"]:
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
triad>=0.9.
|
|
1
|
+
triad>=0.9.6
|
|
2
2
|
adagio>=0.2.4
|
|
3
3
|
|
|
4
4
|
[all]
|
|
@@ -9,14 +9,14 @@ jinja2
|
|
|
9
9
|
pyspark>=3.1.1
|
|
10
10
|
dask[dataframe,distributed]>=2023.5.0
|
|
11
11
|
dask-sql
|
|
12
|
-
ray[data]>=2.
|
|
12
|
+
ray[data]>=2.5.0
|
|
13
13
|
notebook
|
|
14
14
|
jupyterlab
|
|
15
15
|
ipython>=7.10.0
|
|
16
16
|
duckdb>=0.5.0
|
|
17
17
|
pyarrow>=6.0.1
|
|
18
|
-
pandas
|
|
19
|
-
ibis-framework
|
|
18
|
+
pandas<2.2,>=2.0.2
|
|
19
|
+
ibis-framework
|
|
20
20
|
polars
|
|
21
21
|
|
|
22
22
|
[cpp_sql_parser]
|
|
@@ -27,6 +27,9 @@ dask[dataframe,distributed]>=2023.5.0
|
|
|
27
27
|
pyarrow>=7.0.0
|
|
28
28
|
pandas>=2.0.2
|
|
29
29
|
|
|
30
|
+
[dask:python_version >= "3.11.9"]
|
|
31
|
+
dask[dataframe,distributed]>=2024.4.0
|
|
32
|
+
|
|
30
33
|
[duckdb]
|
|
31
34
|
qpd>=0.4.4
|
|
32
35
|
fugue-sql-antlr>=0.2.0
|
|
@@ -40,7 +43,8 @@ qpd>=0.4.4
|
|
|
40
43
|
fugue-sql-antlr>=0.2.0
|
|
41
44
|
sqlglot
|
|
42
45
|
jinja2
|
|
43
|
-
ibis-framework
|
|
46
|
+
ibis-framework
|
|
47
|
+
pandas<2.2
|
|
44
48
|
|
|
45
49
|
[notebook]
|
|
46
50
|
notebook
|
|
@@ -51,9 +55,10 @@ ipython>=7.10.0
|
|
|
51
55
|
polars
|
|
52
56
|
|
|
53
57
|
[ray]
|
|
54
|
-
ray[data]>=2.
|
|
58
|
+
ray[data]>=2.5.0
|
|
55
59
|
duckdb>=0.5.0
|
|
56
|
-
pyarrow>=
|
|
60
|
+
pyarrow>=7.0.0
|
|
61
|
+
pandas<2.2
|
|
57
62
|
|
|
58
63
|
[spark]
|
|
59
64
|
pyspark>=3.1.1
|
|
@@ -6,7 +6,7 @@ from fsspec import AbstractFileSystem
|
|
|
6
6
|
from triad.collections.dict import ParamDict
|
|
7
7
|
from triad.collections.schema import Schema
|
|
8
8
|
from triad.utils.assertion import assert_or_throw
|
|
9
|
-
from triad.utils.io import join, makedirs, url_to_fs
|
|
9
|
+
from triad.utils.io import isfile, join, makedirs, url_to_fs
|
|
10
10
|
|
|
11
11
|
from fugue._utils.io import FileParser, _get_single_files
|
|
12
12
|
from fugue_dask.dataframe import DaskDataFrame
|
|
@@ -100,9 +100,11 @@ def _save_csv(df: DaskDataFrame, p: FileParser, **kwargs: Any) -> None:
|
|
|
100
100
|
|
|
101
101
|
|
|
102
102
|
def _safe_load_csv(path: str, **kwargs: Any) -> dd.DataFrame:
|
|
103
|
+
if not isfile(path):
|
|
104
|
+
return dd.read_csv(join(path, "*.csv"), **kwargs)
|
|
103
105
|
try:
|
|
104
106
|
return dd.read_csv(path, **kwargs)
|
|
105
|
-
except (IsADirectoryError, PermissionError):
|
|
107
|
+
except (IsADirectoryError, PermissionError): # pragma: no cover
|
|
106
108
|
return dd.read_csv(join(path, "*.csv"), **kwargs)
|
|
107
109
|
|
|
108
110
|
|
|
@@ -148,11 +150,12 @@ def _save_json(df: DaskDataFrame, p: FileParser, **kwargs: Any) -> None:
|
|
|
148
150
|
|
|
149
151
|
|
|
150
152
|
def _safe_load_json(path: str, **kwargs: Any) -> dd.DataFrame:
|
|
153
|
+
if not isfile(path):
|
|
154
|
+
return dd.read_json(join(path, "*.json"), **kwargs)
|
|
151
155
|
try:
|
|
152
156
|
return dd.read_json(path, **kwargs)
|
|
153
|
-
except (IsADirectoryError, PermissionError):
|
|
154
|
-
|
|
155
|
-
return x
|
|
157
|
+
except (IsADirectoryError, PermissionError): # pragma: no cover
|
|
158
|
+
return dd.read_json(join(path, "*.json"), **kwargs)
|
|
156
159
|
|
|
157
160
|
|
|
158
161
|
def _load_json(
|
|
@@ -53,7 +53,7 @@ def hash_repartition(df: dd.DataFrame, num: int, cols: List[Any]) -> dd.DataFram
|
|
|
53
53
|
if num < 1:
|
|
54
54
|
return df
|
|
55
55
|
if num == 1:
|
|
56
|
-
return df.repartition(1)
|
|
56
|
+
return df.repartition(npartitions=1)
|
|
57
57
|
df = df.reset_index(drop=True).clear_divisions()
|
|
58
58
|
idf, ct = _add_hash_index(df, num, cols)
|
|
59
59
|
return _postprocess(idf, ct, num)
|
|
@@ -76,7 +76,7 @@ def even_repartition(df: dd.DataFrame, num: int, cols: List[Any]) -> dd.DataFram
|
|
|
76
76
|
the number of partitions will be the number of groups.
|
|
77
77
|
"""
|
|
78
78
|
if num == 1:
|
|
79
|
-
return df.repartition(1)
|
|
79
|
+
return df.repartition(npartitions=1)
|
|
80
80
|
if len(cols) == 0 and num <= 0:
|
|
81
81
|
return df
|
|
82
82
|
df = df.reset_index(drop=True).clear_divisions()
|
|
@@ -111,7 +111,7 @@ def rand_repartition(
|
|
|
111
111
|
if num < 1:
|
|
112
112
|
return df
|
|
113
113
|
if num == 1:
|
|
114
|
-
return df.repartition(1)
|
|
114
|
+
return df.repartition(npartitions=1)
|
|
115
115
|
df = df.reset_index(drop=True).clear_divisions()
|
|
116
116
|
if len(cols) == 0:
|
|
117
117
|
idf, ct = _add_random_index(df, num=num, seed=seed)
|
|
@@ -124,7 +124,7 @@ def rand_repartition(
|
|
|
124
124
|
def _postprocess(idf: dd.DataFrame, ct: int, num: int) -> dd.DataFrame:
|
|
125
125
|
parts = min(ct, num)
|
|
126
126
|
if parts <= 1:
|
|
127
|
-
return idf.repartition(1)
|
|
127
|
+
return idf.repartition(npartitions=1)
|
|
128
128
|
divisions = list(np.arange(ct, step=math.ceil(ct / parts)))
|
|
129
129
|
divisions.append(ct - 1)
|
|
130
130
|
return idf.repartition(divisions=divisions, force=True)
|
|
@@ -23,8 +23,8 @@ from ._compat import IbisTable
|
|
|
23
23
|
from ._utils import to_ibis_schema
|
|
24
24
|
from .dataframe import IbisDataFrame
|
|
25
25
|
|
|
26
|
-
_JOIN_RIGHT_SUFFIX = "_ibis_y__"
|
|
27
|
-
_GEN_TABLE_NAMES = (f"_fugue_temp_table_{i:d}" for i in itertools.count())
|
|
26
|
+
_JOIN_RIGHT_SUFFIX = "_ibis_y__".upper()
|
|
27
|
+
_GEN_TABLE_NAMES = (f"_fugue_temp_table_{i:d}".upper() for i in itertools.count())
|
|
28
28
|
|
|
29
29
|
|
|
30
30
|
class IbisSQLEngine(SQLEngine):
|
|
@@ -92,11 +92,11 @@ class IbisSQLEngine(SQLEngine):
|
|
|
92
92
|
_df2 = self.to_df(df2)
|
|
93
93
|
key_schema, end_schema = get_join_schemas(_df1, _df2, how=how, on=on)
|
|
94
94
|
on_fields = [_df1.native[k] == _df2.native[k] for k in key_schema]
|
|
95
|
-
if ibis.__version__ < "6":
|
|
95
|
+
if ibis.__version__ < "6": # pragma: no cover
|
|
96
96
|
suffixes: Dict[str, Any] = dict(suffixes=("", _JOIN_RIGHT_SUFFIX))
|
|
97
|
-
else:
|
|
97
|
+
else:
|
|
98
98
|
# breaking change in ibis 6.0
|
|
99
|
-
suffixes = dict(lname="", rname=_JOIN_RIGHT_SUFFIX)
|
|
99
|
+
suffixes = dict(lname="", rname="{name}" + _JOIN_RIGHT_SUFFIX)
|
|
100
100
|
if how.lower() == "cross":
|
|
101
101
|
tb = _df1.native.cross_join(_df2.native, **suffixes)
|
|
102
102
|
elif how.lower() == "right_outer":
|
|
@@ -224,7 +224,7 @@ class IbisSQLEngine(SQLEngine):
|
|
|
224
224
|
_presort = parse_presort_exp(presort)
|
|
225
225
|
else:
|
|
226
226
|
_presort = partition_spec.presort
|
|
227
|
-
tbn = "
|
|
227
|
+
tbn = "_TEMP"
|
|
228
228
|
idf = self.to_df(df)
|
|
229
229
|
|
|
230
230
|
if len(_presort) == 0:
|
|
@@ -233,9 +233,10 @@ class IbisSQLEngine(SQLEngine):
|
|
|
233
233
|
pcols = ", ".join(
|
|
234
234
|
self.encode_column_name(x) for x in partition_spec.partition_by
|
|
235
235
|
)
|
|
236
|
+
dummy_order_by = self._dummy_window_order_by()
|
|
236
237
|
sql = (
|
|
237
238
|
f"SELECT * FROM ("
|
|
238
|
-
f"SELECT *, ROW_NUMBER() OVER (PARTITION BY {pcols}) "
|
|
239
|
+
f"SELECT *, ROW_NUMBER() OVER (PARTITION BY {pcols} {dummy_order_by}) "
|
|
239
240
|
f"AS __fugue_take_param FROM {tbn}"
|
|
240
241
|
f") WHERE __fugue_take_param<={n}"
|
|
241
242
|
)
|
|
@@ -290,6 +291,12 @@ class IbisSQLEngine(SQLEngine):
|
|
|
290
291
|
def load_table(self, table: str, **kwargs: Any) -> DataFrame:
|
|
291
292
|
return self.to_df(self.backend.table(table))
|
|
292
293
|
|
|
294
|
+
def _dummy_window_order_by(self) -> str:
|
|
295
|
+
"""Return a dummy window order by clause, this is required for
|
|
296
|
+
some SQL backends when there is no real order by clause in window
|
|
297
|
+
"""
|
|
298
|
+
return ""
|
|
299
|
+
|
|
293
300
|
|
|
294
301
|
class IbisMapEngine(MapEngine):
|
|
295
302
|
"""IbisExecutionEngine's MapEngine, it is a wrapper of the map engine
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
from typing import Any, Dict
|
|
2
2
|
|
|
3
3
|
import ray
|
|
4
|
+
from packaging import version
|
|
4
5
|
|
|
5
6
|
FUGUE_RAY_CONF_SHUFFLE_PARTITIONS = "fugue.ray.shuffle.partitions"
|
|
6
7
|
FUGUE_RAY_DEFAULT_PARTITIONS = "fugue.ray.default.partitions"
|
|
@@ -12,8 +13,6 @@ FUGUE_RAY_DEFAULT_CONF: Dict[str, Any] = {
|
|
|
12
13
|
FUGUE_RAY_DEFAULT_PARTITIONS: 0,
|
|
13
14
|
FUGUE_RAY_ZERO_COPY: True,
|
|
14
15
|
}
|
|
16
|
+
RAY_VERSION = version.parse(ray.__version__)
|
|
15
17
|
|
|
16
|
-
|
|
17
|
-
_ZERO_COPY: Dict[str, Any] = {"zero_copy_batch": True}
|
|
18
|
-
else: # pragma: no cover
|
|
19
|
-
_ZERO_COPY = {}
|
|
18
|
+
_ZERO_COPY: Dict[str, Any] = {"zero_copy_batch": True}
|
|
@@ -3,7 +3,6 @@ from typing import Any, Dict, List, Optional, Tuple
|
|
|
3
3
|
|
|
4
4
|
import pandas as pd
|
|
5
5
|
import pyarrow as pa
|
|
6
|
-
import ray
|
|
7
6
|
import ray.data as rd
|
|
8
7
|
from triad import Schema
|
|
9
8
|
|
|
@@ -31,31 +30,21 @@ def get_dataset_format(df: rd.Dataset) -> Tuple[Optional[str], rd.Dataset]:
|
|
|
31
30
|
df = materialize(df)
|
|
32
31
|
if df.count() == 0:
|
|
33
32
|
return None, df
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
else:
|
|
41
|
-
schema = df.schema(fetch_if_missing=True)
|
|
42
|
-
if schema is None: # pragma: no cover
|
|
43
|
-
return None, df
|
|
44
|
-
if isinstance(schema.base_schema, pa.Schema):
|
|
45
|
-
return "arrow", df
|
|
46
|
-
return "pandas", df
|
|
33
|
+
schema = df.schema(fetch_if_missing=True)
|
|
34
|
+
if schema is None: # pragma: no cover
|
|
35
|
+
return None, df
|
|
36
|
+
if isinstance(schema.base_schema, pa.Schema):
|
|
37
|
+
return "arrow", df
|
|
38
|
+
return "pandas", df
|
|
47
39
|
|
|
48
40
|
|
|
49
41
|
def to_schema(schema: Any) -> Schema: # pragma: no cover
|
|
50
42
|
if isinstance(schema, pa.Schema):
|
|
51
43
|
return Schema(schema)
|
|
52
|
-
if
|
|
53
|
-
if isinstance(schema,
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
):
|
|
57
|
-
return Schema(schema.base_schema)
|
|
58
|
-
return Schema(list(zip(schema.names, schema.types)))
|
|
44
|
+
if isinstance(schema, rd.Schema):
|
|
45
|
+
if hasattr(schema, "base_schema") and isinstance(schema.base_schema, pa.Schema):
|
|
46
|
+
return Schema(schema.base_schema)
|
|
47
|
+
return Schema(list(zip(schema.names, schema.types)))
|
|
59
48
|
raise ValueError(f"{schema} is not supported")
|
|
60
49
|
|
|
61
50
|
|
|
@@ -3,15 +3,15 @@ import pathlib
|
|
|
3
3
|
from typing import Any, Callable, Dict, Iterable, List, Optional, Union
|
|
4
4
|
|
|
5
5
|
import pyarrow as pa
|
|
6
|
-
import ray
|
|
7
6
|
import ray.data as rd
|
|
7
|
+
from packaging import version
|
|
8
8
|
from pyarrow import csv as pacsv
|
|
9
9
|
from pyarrow import json as pajson
|
|
10
10
|
from ray.data.datasource import FileExtensionFilter
|
|
11
11
|
from triad.collections import Schema
|
|
12
12
|
from triad.collections.dict import ParamDict
|
|
13
13
|
from triad.utils.assertion import assert_or_throw
|
|
14
|
-
from triad.utils.io import exists, makedirs, rm
|
|
14
|
+
from triad.utils.io import exists, makedirs, rm, isfile
|
|
15
15
|
|
|
16
16
|
from fugue import ExecutionEngine
|
|
17
17
|
from fugue._utils.io import FileParser, save_df
|
|
@@ -19,6 +19,8 @@ from fugue.collections.partition import PartitionSpec
|
|
|
19
19
|
from fugue.dataframe import DataFrame
|
|
20
20
|
from fugue_ray.dataframe import RayDataFrame
|
|
21
21
|
|
|
22
|
+
from .._constants import RAY_VERSION
|
|
23
|
+
|
|
22
24
|
|
|
23
25
|
class RayIO(object):
|
|
24
26
|
def __init__(self, engine: ExecutionEngine):
|
|
@@ -149,6 +151,18 @@ class RayIO(object):
|
|
|
149
151
|
if infer_schema and columns is not None and not isinstance(columns, list):
|
|
150
152
|
raise ValueError("can't set columns as a schema when infer schema is true")
|
|
151
153
|
|
|
154
|
+
if RAY_VERSION >= version.parse("2.10"):
|
|
155
|
+
if len(p) == 1 and isfile(p[0]): # TODO: very hacky
|
|
156
|
+
params: Dict[str, Any] = {}
|
|
157
|
+
else:
|
|
158
|
+
params = {"file_extensions": ["csv"]}
|
|
159
|
+
else: # pragma: no cover
|
|
160
|
+
params = {
|
|
161
|
+
"partition_filter": _FileFiler(
|
|
162
|
+
file_extensions=["csv"], exclude=["_SUCCESS"]
|
|
163
|
+
),
|
|
164
|
+
}
|
|
165
|
+
|
|
152
166
|
def _read_csv(to_str: bool) -> RayDataFrame:
|
|
153
167
|
res = rd.read_csv(
|
|
154
168
|
p,
|
|
@@ -156,9 +170,7 @@ class RayIO(object):
|
|
|
156
170
|
read_options=pacsv.ReadOptions(**read_options),
|
|
157
171
|
parse_options=pacsv.ParseOptions(**parse_options),
|
|
158
172
|
convert_options=pacsv.ConvertOptions(**convert_options),
|
|
159
|
-
|
|
160
|
-
file_extensions=["csv"], exclude=["_SUCCESS"]
|
|
161
|
-
),
|
|
173
|
+
**params,
|
|
162
174
|
)
|
|
163
175
|
if to_str:
|
|
164
176
|
_schema = res.schema(fetch_if_missing=True)
|
|
@@ -196,20 +208,31 @@ class RayIO(object):
|
|
|
196
208
|
read_options: Dict[str, Any] = {"use_threads": False}
|
|
197
209
|
parse_options: Dict[str, Any] = {}
|
|
198
210
|
|
|
199
|
-
def _read_json() -> RayDataFrame:
|
|
200
|
-
if
|
|
201
|
-
|
|
211
|
+
def _read_json() -> RayDataFrame: # pragma: no cover
|
|
212
|
+
if RAY_VERSION >= version.parse("2.10"):
|
|
213
|
+
if len(p) == 1 and isfile(p[0]): # TODO: very hacky
|
|
214
|
+
params: Dict[str, Any] = {"file_extensions": None}
|
|
215
|
+
else:
|
|
216
|
+
params = {"file_extensions": ["json"]}
|
|
217
|
+
elif RAY_VERSION >= version.parse("2.9"): # pragma: no cover
|
|
218
|
+
params = {
|
|
219
|
+
"file_extensions": None,
|
|
220
|
+
"partition_filter": _FileFiler(
|
|
221
|
+
file_extensions=["json"], exclude=["_SUCCESS"]
|
|
222
|
+
),
|
|
223
|
+
}
|
|
202
224
|
else: # pragma: no cover
|
|
203
|
-
params = {
|
|
225
|
+
params = {
|
|
226
|
+
"partition_filter": _FileFiler(
|
|
227
|
+
file_extensions=["json"], exclude=["_SUCCESS"]
|
|
228
|
+
),
|
|
229
|
+
}
|
|
204
230
|
return RayDataFrame(
|
|
205
231
|
rd.read_json(
|
|
206
232
|
p,
|
|
207
233
|
ray_remote_args=self._remote_args(),
|
|
208
234
|
read_options=pajson.ReadOptions(**read_options),
|
|
209
235
|
parse_options=pajson.ParseOptions(**parse_options),
|
|
210
|
-
partition_filter=_FileFiler(
|
|
211
|
-
file_extensions=["json"], exclude=["_SUCCESS"]
|
|
212
|
-
),
|
|
213
236
|
**params,
|
|
214
237
|
)
|
|
215
238
|
)
|
|
@@ -227,7 +250,7 @@ class RayIO(object):
|
|
|
227
250
|
return {"num_cpus": 1}
|
|
228
251
|
|
|
229
252
|
|
|
230
|
-
class _FileFiler(FileExtensionFilter):
|
|
253
|
+
class _FileFiler(FileExtensionFilter): # pragma: no cover
|
|
231
254
|
def __init__(self, file_extensions: Union[str, List[str]], exclude: Iterable[str]):
|
|
232
255
|
super().__init__(file_extensions, allow_if_no_extension=True)
|
|
233
256
|
self._exclude = set(exclude)
|
|
@@ -191,8 +191,7 @@ class RayMapEngine(MapEngine):
|
|
|
191
191
|
mb_args["batch_size"] = self.conf.get_or_throw(
|
|
192
192
|
FUGUE_RAY_DEFAULT_BATCH_SIZE, int
|
|
193
193
|
)
|
|
194
|
-
|
|
195
|
-
mb_args["zero_copy_batch"] = self.conf.get(FUGUE_RAY_ZERO_COPY, True)
|
|
194
|
+
mb_args["zero_copy_batch"] = self.conf.get(FUGUE_RAY_ZERO_COPY, True)
|
|
196
195
|
sdf = rdf.native.map_batches(
|
|
197
196
|
_udf,
|
|
198
197
|
batch_format="pyarrow",
|
|
@@ -56,7 +56,6 @@ from fugue import (
|
|
|
56
56
|
from fugue.column import col
|
|
57
57
|
from fugue.column import functions as ff
|
|
58
58
|
from fugue.column import lit
|
|
59
|
-
from fugue.dataframe.utils import _df_eq as df_eq
|
|
60
59
|
from fugue.exceptions import (
|
|
61
60
|
FugueInterfacelessError,
|
|
62
61
|
FugueWorkflowCompileError,
|
|
@@ -81,7 +80,7 @@ class BuiltInTests(object):
|
|
|
81
80
|
class Tests(ft.FugueTestSuite):
|
|
82
81
|
def test_workflows(self):
|
|
83
82
|
a = FugueWorkflow().df([[0]], "a:int")
|
|
84
|
-
df_eq(a.compute(self.engine), [[0]], "a:int")
|
|
83
|
+
self.df_eq(a.compute(self.engine), [[0]], "a:int")
|
|
85
84
|
|
|
86
85
|
def test_create_show(self):
|
|
87
86
|
with FugueWorkflow() as dag:
|
|
@@ -1690,7 +1689,7 @@ class BuiltInTests(object):
|
|
|
1690
1689
|
""",
|
|
1691
1690
|
x=sdf3,
|
|
1692
1691
|
).run()
|
|
1693
|
-
df_eq(
|
|
1692
|
+
self.df_eq(
|
|
1694
1693
|
res["res"],
|
|
1695
1694
|
[[3, 4, 13]],
|
|
1696
1695
|
schema="a:long,b:int,c:long",
|
|
@@ -1723,9 +1722,9 @@ class BuiltInTests(object):
|
|
|
1723
1722
|
df1 = pd.DataFrame([[0, 1], [2, 3]], columns=["a b", " "])
|
|
1724
1723
|
df2 = pd.DataFrame([[0, 10], [20, 3]], columns=["a b", "d"])
|
|
1725
1724
|
r = fa.inner_join(df1, df2, as_fugue=True)
|
|
1726
|
-
df_eq(r, [[0, 1, 10]], "`a b`:long,` `:long,d:long", throw=True)
|
|
1725
|
+
self.df_eq(r, [[0, 1, 10]], "`a b`:long,` `:long,d:long", throw=True)
|
|
1727
1726
|
r = fa.transform(r, tr)
|
|
1728
|
-
df_eq(
|
|
1727
|
+
self.df_eq(
|
|
1729
1728
|
r,
|
|
1730
1729
|
[[0, 1, 10, 2]],
|
|
1731
1730
|
"`a b`:long,` `:long,d:long,`c *`:long",
|
|
@@ -1739,7 +1738,7 @@ class BuiltInTests(object):
|
|
|
1739
1738
|
col("d"),
|
|
1740
1739
|
col("c *").cast(int),
|
|
1741
1740
|
)
|
|
1742
|
-
df_eq(
|
|
1741
|
+
self.df_eq(
|
|
1743
1742
|
r,
|
|
1744
1743
|
[[0, 1, 10, 2]],
|
|
1745
1744
|
"`a b `:long,`x y`:long,d:long,`c *`:long",
|
|
@@ -1748,13 +1747,13 @@ class BuiltInTests(object):
|
|
|
1748
1747
|
r = fa.rename(r, {"a b ": "a b"})
|
|
1749
1748
|
fa.save(r, f_csv, header=True, force_single=True)
|
|
1750
1749
|
fa.save(r, f_parquet)
|
|
1751
|
-
df_eq(
|
|
1750
|
+
self.df_eq(
|
|
1752
1751
|
fa.load(f_parquet, columns=["x y", "d", "c *"], as_fugue=True),
|
|
1753
1752
|
[[1, 10, 2]],
|
|
1754
1753
|
"`x y`:long,d:long,`c *`:long",
|
|
1755
1754
|
throw=True,
|
|
1756
1755
|
)
|
|
1757
|
-
df_eq(
|
|
1756
|
+
self.df_eq(
|
|
1758
1757
|
fa.load(
|
|
1759
1758
|
f_csv,
|
|
1760
1759
|
header=True,
|
|
@@ -1766,7 +1765,7 @@ class BuiltInTests(object):
|
|
|
1766
1765
|
"d:str,`c *`:str",
|
|
1767
1766
|
throw=True,
|
|
1768
1767
|
)
|
|
1769
|
-
df_eq(
|
|
1768
|
+
self.df_eq(
|
|
1770
1769
|
fa.load(
|
|
1771
1770
|
f_csv,
|
|
1772
1771
|
header=True,
|
|
@@ -1786,14 +1785,14 @@ class BuiltInTests(object):
|
|
|
1786
1785
|
""",
|
|
1787
1786
|
as_fugue=True,
|
|
1788
1787
|
)
|
|
1789
|
-
df_eq(r, [[0, 1, 10]], "`a b`:long,` `:long,d:long", throw=True)
|
|
1788
|
+
self.df_eq(r, [[0, 1, 10]], "`a b`:long,` `:long,d:long", throw=True)
|
|
1790
1789
|
r = fa.fugue_sql(
|
|
1791
1790
|
"""
|
|
1792
1791
|
TRANSFORM r USING tr SCHEMA *,`c *`:long
|
|
1793
1792
|
""",
|
|
1794
1793
|
as_fugue=True,
|
|
1795
1794
|
)
|
|
1796
|
-
df_eq(
|
|
1795
|
+
self.df_eq(
|
|
1797
1796
|
r,
|
|
1798
1797
|
[[0, 1, 10, 2]],
|
|
1799
1798
|
"`a b`:long,` `:long,d:long,`c *`:long",
|
|
@@ -1805,7 +1804,7 @@ class BuiltInTests(object):
|
|
|
1805
1804
|
""",
|
|
1806
1805
|
as_fugue=True,
|
|
1807
1806
|
)
|
|
1808
|
-
df_eq(
|
|
1807
|
+
self.df_eq(
|
|
1809
1808
|
r,
|
|
1810
1809
|
[[0, 1, 10, 2]],
|
|
1811
1810
|
"`a b`:long,` `:long,d:long,`c *`:long",
|
|
@@ -1826,19 +1825,19 @@ class BuiltInTests(object):
|
|
|
1826
1825
|
f_parquet=f_parquet,
|
|
1827
1826
|
f_csv=f_csv,
|
|
1828
1827
|
).run()
|
|
1829
|
-
df_eq(
|
|
1828
|
+
self.df_eq(
|
|
1830
1829
|
res["r1"],
|
|
1831
1830
|
[[1, 10, 2]],
|
|
1832
1831
|
"`x y`:long,d:long,`c *`:long",
|
|
1833
1832
|
throw=True,
|
|
1834
1833
|
)
|
|
1835
|
-
df_eq(
|
|
1834
|
+
self.df_eq(
|
|
1836
1835
|
res["r2"],
|
|
1837
1836
|
[["1", "10", "2"]],
|
|
1838
1837
|
"`x y`:str,d:str,`c *`:str",
|
|
1839
1838
|
throw=True,
|
|
1840
1839
|
)
|
|
1841
|
-
df_eq(
|
|
1840
|
+
self.df_eq(
|
|
1842
1841
|
res["r3"],
|
|
1843
1842
|
[[0, 1, 10, 2]],
|
|
1844
1843
|
"`a b`:long,`x y`:long,d:long,`c *`:long",
|
|
@@ -10,7 +10,6 @@ from pytest import raises
|
|
|
10
10
|
import fugue.api as fi
|
|
11
11
|
import fugue.test as ft
|
|
12
12
|
from fugue.dataframe import ArrowDataFrame, DataFrame
|
|
13
|
-
from fugue.dataframe.utils import _df_eq as df_eq
|
|
14
13
|
from fugue.exceptions import FugueDataFrameOperationError, FugueDatasetEmptyError
|
|
15
14
|
|
|
16
15
|
|
|
@@ -121,7 +120,7 @@ class DataFrameTests(object):
|
|
|
121
120
|
assert [[1]] == fi.as_array(df, type_safe=True)
|
|
122
121
|
|
|
123
122
|
df = self.df([["a", 1, 2]], "a:str,b:int,c:int")
|
|
124
|
-
df_eq(
|
|
123
|
+
self.df_eq(
|
|
125
124
|
fi.as_fugue_df(fi.select_columns(df, ["c", "a"])),
|
|
126
125
|
[[2, "a"]],
|
|
127
126
|
"a:str,c:int",
|
|
@@ -132,13 +131,13 @@ class DataFrameTests(object):
|
|
|
132
131
|
df = self.df(data, "a:str,b:int")
|
|
133
132
|
df2 = fi.rename(df, columns=dict(a="aa"))
|
|
134
133
|
assert fi.get_schema(df) == "a:str,b:int"
|
|
135
|
-
df_eq(fi.as_fugue_df(df2), data, "aa:str,b:int", throw=True)
|
|
134
|
+
self.df_eq(fi.as_fugue_df(df2), data, "aa:str,b:int", throw=True)
|
|
136
135
|
|
|
137
136
|
for data in [[["a", 1]], []]:
|
|
138
137
|
df = self.df(data, "a:str,b:int")
|
|
139
138
|
df3 = fi.rename(df, columns={})
|
|
140
139
|
assert fi.get_schema(df3) == "a:str,b:int"
|
|
141
|
-
df_eq(fi.as_fugue_df(df3), data, "a:str,b:int", throw=True)
|
|
140
|
+
self.df_eq(fi.as_fugue_df(df3), data, "a:str,b:int", throw=True)
|
|
142
141
|
|
|
143
142
|
def test_rename_invalid(self):
|
|
144
143
|
df = self.df([["a", 1]], "a:str,b:int")
|