PyPI - fugue - Versions diffs - 0.8.7.dev6__py3-none-any.whl → 0.8.7.dev7__py3-none-any.whl - Mend

fugue 0.8.7.dev6py3-none-any.whl → 0.8.7.dev7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

fugue/__init__.py +0 -1
fugue/_utils/io.py +84 -89
fugue/dataframe/utils.py +12 -25
fugue/execution/execution_engine.py +0 -7
fugue/execution/native_execution_engine.py +5 -11
fugue/workflow/_checkpoint.py +9 -9
{fugue-0.8.7.dev6.dist-info → fugue-0.8.7.dev7.dist-info}/METADATA +2 -2
{fugue-0.8.7.dev6.dist-info → fugue-0.8.7.dev7.dist-info}/RECORD +24 -22
{fugue-0.8.7.dev6.dist-info → fugue-0.8.7.dev7.dist-info}/WHEEL +1 -1
{fugue-0.8.7.dev6.dist-info → fugue-0.8.7.dev7.dist-info}/entry_points.txt +2 -1
fugue_dask/_io.py +22 -29
fugue_dask/execution_engine.py +5 -12
fugue_duckdb/_io.py +21 -37
fugue_duckdb/execution_engine.py +2 -7
fugue_ibis/execution_engine.py +1 -5
fugue_ray/_utils/io.py +15 -17
fugue_spark/_utils/io.py +3 -5
fugue_spark/execution_engine.py +2 -7
fugue_test/builtin_suite.py +12 -12
fugue_test/execution_suite.py +13 -18
fugue_test/plugins/misc/__init__.py +2 -0
fugue_test/plugins/misc/fixtures.py +18 -0
{fugue-0.8.7.dev6.dist-info → fugue-0.8.7.dev7.dist-info}/LICENSE +0 -0
{fugue-0.8.7.dev6.dist-info → fugue-0.8.7.dev7.dist-info}/top_level.txt +0 -0

fugue_spark/_utils/io.py CHANGED Viewed

@@ -4,7 +4,6 @@ import pyspark.sql as ps
 from pyspark.sql import SparkSession
 from triad.collections import Schema
 from triad.collections.dict import ParamDict
-from triad.collections.fs import FileSystem
 from triad.utils.assertion import assert_or_throw
 from fugue._utils.io import FileParser, save_df
@@ -16,9 +15,8 @@ from .convert import to_schema, to_spark_schema
 class SparkIO(object):
-    def __init__(self, spark_session: SparkSession, fs: FileSystem):
+    def __init__(self, spark_session: SparkSession):
         self._session = spark_session
-        self._fs = fs
         self._loads: Dict[str, Callable[..., DataFrame]] = {
             "csv": self._load_csv,
             "parquet": self._load_parquet,
@@ -41,7 +39,7 @@ class SparkIO(object):
             len(fmts) == 1, NotImplementedError("can't support multiple formats")
         )
         fmt = fmts[0]
-        files = [f.uri for f in fp]
+        files = [f.path for f in fp]
         return self._loads[fmt](files, columns, **kwargs)
     def save_df(
@@ -64,7 +62,7 @@ class SparkIO(object):
             ldf = df.as_local()
             if isinstance(ldf, PandasDataFrame) and hasattr(ldf.native, "attrs"):
                 ldf.native.attrs = {}  # pragma: no cover
-            save_df(ldf, uri, format_hint=format_hint, mode=mode, fs=self._fs, **kwargs)
+            save_df(ldf, uri, format_hint=format_hint, mode=mode, **kwargs)
     def _get_writer(
         self, sdf: ps.DataFrame, partition_spec: PartitionSpec

fugue_spark/execution_engine.py CHANGED Viewed

@@ -11,7 +11,7 @@ from pyspark.rdd import RDD
 from pyspark.sql import SparkSession
 from pyspark.sql.functions import broadcast, col, lit, row_number
 from pyspark.sql.window import Window
-from triad import FileSystem, IndexedOrderedDict, ParamDict, Schema, SerializableRLock
+from triad import IndexedOrderedDict, ParamDict, Schema, SerializableRLock
 from triad.utils.assertion import assert_arg_not_none, assert_or_throw
 from triad.utils.hash import to_uuid
 from triad.utils.iter import EmptyAwareIterable
@@ -360,13 +360,12 @@ class SparkExecutionEngine(ExecutionEngine):
         cf.update(ParamDict(conf))
         super().__init__(cf)
         self._lock = SerializableRLock()
-        self._fs = FileSystem()
         self._log = logging.getLogger()
         self._broadcast_func = RunOnce(
             self._broadcast, lambda *args, **kwargs: id(args[0])
         )
         self._persist_func = RunOnce(self._persist, lambda *args, **kwargs: id(args[0]))
-        self._io = SparkIO(self.spark_session, self.fs)
+        self._io = SparkIO(self.spark_session)
         self._registered_dfs: Dict[str, SparkDataFrame] = {}
     def __repr__(self) -> str:
@@ -395,10 +394,6 @@ class SparkExecutionEngine(ExecutionEngine):
     def log(self) -> logging.Logger:
         return self._log
-    @property
-    def fs(self) -> FileSystem:
-        return self._fs
     def create_default_sql_engine(self) -> SQLEngine:
         return SparkSQLEngine(self)

fugue_test/builtin_suite.py CHANGED Viewed

@@ -12,11 +12,12 @@ import pickle
 from typing import Any, Callable, Dict, Iterable, Iterator, List, Optional
 from unittest import TestCase
 from uuid import uuid4
+from triad.utils.io import write_text, join
 import numpy as np
 import pandas as pd
 import pyarrow as pa
 import pytest
+from fsspec.implementations.local import LocalFileSystem
 from pytest import raises
 from triad import SerializableRLock
@@ -28,7 +29,6 @@ from fugue import (
     DataFrame,
     DataFrames,
     ExecutionEngine,
-    FileSystem,
     FugueWorkflow,
     LocalDataFrame,
     OutputCoTransformer,
@@ -65,6 +65,8 @@ from fugue.exceptions import (
     FugueWorkflowRuntimeValidationError,
 )
+_LOCAL_FS = LocalFileSystem(auto_mkdir=True)
 class BuiltInTests(object):
     """Workflow level general test suite. It is a more general end to end
@@ -633,9 +635,8 @@ class BuiltInTests(object):
             tmpdir = str(self.tmpdir)
             def incr():
-                fs = FileSystem(auto_close=False).makedirs(tmpdir, recreate=True)
-                fs.writetext(str(uuid4()) + ".txt", "")
-                return fs.glob("*.txt").count().files
+                write_text(join(tmpdir, str(uuid4()) + ".txt"), "")
+                return len(_LOCAL_FS.glob(join(tmpdir, "*.txt")))
             def t1(df: Iterable[Dict[str, Any]]) -> Iterable[Dict[str, Any]]:
                 for row in df:
@@ -717,9 +718,8 @@ class BuiltInTests(object):
             tmpdir = str(self.tmpdir)
             def incr():
-                fs = FileSystem(auto_close=False).makedirs(tmpdir, recreate=True)
-                fs.writetext(str(uuid4()) + ".txt", "")
-                return fs.glob("*.tx" "t").count().files
+                write_text(join(tmpdir, str(uuid4()) + ".txt"), "")
+                return len(_LOCAL_FS.glob(join(tmpdir, "*.txt")))
             def t1(
                 df: Iterable[Dict[str, Any]], df2: pd.DataFrame
@@ -1348,7 +1348,7 @@ class BuiltInTests(object):
                 b.partition(num=3).save(path, fmt="parquet", single=True)
                 b.save(path2, header=True)
             dag.run(self.engine)
-            assert FileSystem().isfile(path)
+            assert _LOCAL_FS.isfile(path)
             with FugueWorkflow() as dag:
                 a = dag.load(path, fmt="parquet", columns=["a", "c"])
                 a.assert_eq(dag.df([[1, 6], [7, 2]], "a:long,c:int"))
@@ -1359,9 +1359,9 @@ class BuiltInTests(object):
                 b = dag.df([[6, 1], [2, 7]], "c:int,a:long")
                 b.partition(by="c").save(path3, fmt="parquet", single=False)
             dag.run(self.engine)
-            assert FileSystem().isdir(path3)
-            assert FileSystem().isdir(os.path.join(path3, "c=6"))
-            assert FileSystem().isdir(os.path.join(path3, "c=2"))
+            assert _LOCAL_FS.isdir(path3)
+            assert _LOCAL_FS.isdir(os.path.join(path3, "c=6"))
+            assert _LOCAL_FS.isdir(os.path.join(path3, "c=2"))
             # TODO: in test below, once issue #288 is fixed, use dag.load
             #  instead of pd.read_parquet
             pdf = pd.read_parquet(path3).sort_values("a").reset_index(drop=True)

fugue_test/execution_suite.py CHANGED Viewed

@@ -15,8 +15,8 @@ from unittest import TestCase
 import pandas as pd
 import pytest
 from pytest import raises
-from triad.collections.fs import FileSystem
 from triad.exceptions import InvalidOperationError
+from triad.utils.io import isfile, makedirs, touch
 import fugue.api as fa
 import fugue.column.functions as ff
@@ -62,7 +62,6 @@ class ExecutionEngineTests(object):
         def test_init(self):
             print(self.engine)
             assert self.engine.log is not None
-            assert self.engine.fs is not None
             assert copy.copy(self.engine) is self.engine
             assert copy.deepcopy(self.engine) is self.engine
@@ -985,17 +984,16 @@ class ExecutionEngineTests(object):
             df_eq(res, [[1, "z1"]], "a:int,v:str", throw=True)
         @pytest.fixture(autouse=True)
-        def init_tmpdir(self, tmpdir):
+        def init_tmpdir(self, tmpdir, tmp_mem_dir):
             self.tmpdir = tmpdir
         def test_save_single_and_load_parquet(self):
-            e = self.engine
             b = ArrayDataFrame([[6, 1], [2, 7]], "c:int,a:long")
             path = os.path.join(self.tmpdir, "a", "b")
-            e.fs.makedirs(path, recreate=True)
+            makedirs(path, exist_ok=True)
             # over write folder with single file
             fa.save(b, path, format_hint="parquet", force_single=True)
-            assert e.fs.isfile(path)
+            assert isfile(path)
             c = fa.load(path, format_hint="parquet", columns=["a", "c"], as_fugue=True)
             df_eq(c, [[1, 6], [7, 2]], "a:long,c:int", throw=True)
@@ -1019,7 +1017,7 @@ class ExecutionEngineTests(object):
             path = os.path.join(self.tmpdir, "a", "b")
             fa.save(a, os.path.join(path, "a.parquet"), engine=native)
             fa.save(b, os.path.join(path, "b.parquet"), engine=native)
-            FileSystem().touch(os.path.join(path, "_SUCCESS"))
+            touch(os.path.join(path, "_SUCCESS"))
             c = fa.load(path, format_hint="parquet", columns=["a", "c"], as_fugue=True)
             df_eq(c, [[1, 6], [7, 2], [8, 4]], "a:long,c:int", throw=True)
@@ -1038,13 +1036,12 @@ class ExecutionEngineTests(object):
             df_eq(c, [[1, 6], [7, 2], [8, 4]], "a:long,c:int", throw=True)
         def test_save_single_and_load_csv(self):
-            e = self.engine
             b = ArrayDataFrame([[6.1, 1.1], [2.1, 7.1]], "c:double,a:double")
             path = os.path.join(self.tmpdir, "a", "b")
-            e.fs.makedirs(path, recreate=True)
+            makedirs(path, exist_ok=True)
             # over write folder with single file
             fa.save(b, path, format_hint="csv", header=True, force_single=True)
-            assert e.fs.isfile(path)
+            assert isfile(path)
             c = fa.load(
                 path, format_hint="csv", header=True, infer_schema=False, as_fugue=True
             )
@@ -1099,13 +1096,12 @@ class ExecutionEngineTests(object):
             df_eq(c, [["1.1", "60.1"], ["7.1", "20.1"]], "a:str,c:str", throw=True)
         def test_save_single_and_load_csv_no_header(self):
-            e = self.engine
             b = ArrayDataFrame([[6.1, 1.1], [2.1, 7.1]], "c:double,a:double")
             path = os.path.join(self.tmpdir, "a", "b")
-            e.fs.makedirs(path, recreate=True)
+            makedirs(path, exist_ok=True)
             # over write folder with single file
             fa.save(b, path, format_hint="csv", header=False, force_single=True)
-            assert e.fs.isfile(path)
+            assert isfile(path)
             with raises(ValueError):
                 c = fa.load(
@@ -1190,7 +1186,7 @@ class ExecutionEngineTests(object):
                 header=True,
                 engine=native,
             )
-            FileSystem().touch(os.path.join(path, "_SUCCESS"))
+            touch(os.path.join(path, "_SUCCESS"))
             c = fa.load(
                 path,
                 format_hint="csv",
@@ -1204,13 +1200,12 @@ class ExecutionEngineTests(object):
             )
         def test_save_single_and_load_json(self):
-            e = self.engine
             b = ArrayDataFrame([[6, 1], [2, 7]], "c:int,a:long")
             path = os.path.join(self.tmpdir, "a", "b")
-            e.fs.makedirs(path, recreate=True)
+            makedirs(path, exist_ok=True)
             # over write folder with single file
             fa.save(b, path, format_hint="json", force_single=True)
-            assert e.fs.isfile(path)
+            assert isfile(path)
             c = fa.load(path, format_hint="json", columns=["a", "c"], as_fugue=True)
             df_eq(c, [[1, 6], [7, 2]], "a:long,c:long", throw=True)
@@ -1241,7 +1236,7 @@ class ExecutionEngineTests(object):
             path = os.path.join(self.tmpdir, "a", "b")
             fa.save(a, os.path.join(path, "a.json"), format_hint="json", engine=native)
             fa.save(b, os.path.join(path, "b.json"), format_hint="json", engine=native)
-            FileSystem().touch(os.path.join(path, "_SUCCESS"))
+            touch(os.path.join(path, "_SUCCESS"))
             c = fa.load(path, format_hint="json", columns=["a", "c"], as_fugue=True)
             df_eq(c, [[1, 6], [7, 2], [8, 4], [4, 3]], "a:long,c:long", throw=True)

fugue_test/plugins/misc/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ # flake8: noqa
2	+ from .fixtures import tmp_mem_dir

fugue_test/plugins/misc/fixtures.py ADDED Viewed

@@ -0,0 +1,18 @@
+import uuid
+import pytest
+from triad.utils.io import makedirs, rm
+@pytest.fixture
+def tmp_mem_dir():
+    uuid_str = str(uuid.uuid4())[:5]
+    path = "memory://test_" + uuid_str
+    makedirs(path)
+    try:
+        yield path
+    finally:
+        try:
+            rm(path, recursive=True)
+        except Exception:  # pragma: no cover
+            pass

{fugue-0.8.7.dev6.dist-info → fugue-0.8.7.dev7.dist-info}/LICENSE RENAMED Viewed

File without changes

{fugue-0.8.7.dev6.dist-info → fugue-0.8.7.dev7.dist-info}/top_level.txt RENAMED Viewed

File without changes

fugue 0.8.7.dev6__py3-none-any.whl → 0.8.7.dev7__py3-none-any.whl

fugue 0.8.7.dev6py3-none-any.whl → 0.8.7.dev7py3-none-any.whl