fugue 0.8.7.dev5__py3-none-any.whl → 0.8.7.dev7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fugue/__init__.py +0 -1
- fugue/_utils/io.py +84 -89
- fugue/api.py +1 -0
- fugue/dataframe/api.py +19 -2
- fugue/dataframe/arrow_dataframe.py +48 -11
- fugue/dataframe/dataframe.py +20 -2
- fugue/dataframe/function_wrapper.py +1 -1
- fugue/dataframe/iterable_dataframe.py +3 -0
- fugue/dataframe/pandas_dataframe.py +73 -0
- fugue/dataframe/utils.py +78 -25
- fugue/execution/execution_engine.py +1 -8
- fugue/execution/native_execution_engine.py +5 -11
- fugue/plugins.py +1 -0
- fugue/workflow/_checkpoint.py +9 -9
- {fugue-0.8.7.dev5.dist-info → fugue-0.8.7.dev7.dist-info}/METADATA +4 -4
- {fugue-0.8.7.dev5.dist-info → fugue-0.8.7.dev7.dist-info}/RECORD +40 -38
- {fugue-0.8.7.dev5.dist-info → fugue-0.8.7.dev7.dist-info}/WHEEL +1 -1
- {fugue-0.8.7.dev5.dist-info → fugue-0.8.7.dev7.dist-info}/entry_points.txt +3 -2
- fugue_dask/_io.py +22 -29
- fugue_dask/_utils.py +15 -2
- fugue_dask/dataframe.py +105 -18
- fugue_dask/execution_engine.py +5 -12
- fugue_duckdb/_io.py +21 -37
- fugue_duckdb/dataframe.py +87 -29
- fugue_duckdb/execution_engine.py +2 -7
- fugue_ibis/dataframe.py +13 -0
- fugue_ibis/execution_engine.py +1 -5
- fugue_polars/polars_dataframe.py +53 -16
- fugue_ray/_utils/io.py +15 -17
- fugue_ray/dataframe.py +71 -19
- fugue_spark/_utils/io.py +3 -5
- fugue_spark/dataframe.py +69 -13
- fugue_spark/execution_engine.py +2 -7
- fugue_test/builtin_suite.py +12 -12
- fugue_test/dataframe_suite.py +14 -0
- fugue_test/execution_suite.py +13 -18
- fugue_test/plugins/misc/__init__.py +2 -0
- fugue_test/plugins/misc/fixtures.py +18 -0
- {fugue-0.8.7.dev5.dist-info → fugue-0.8.7.dev7.dist-info}/LICENSE +0 -0
- {fugue-0.8.7.dev5.dist-info → fugue-0.8.7.dev7.dist-info}/top_level.txt +0 -0
fugue_spark/dataframe.py
CHANGED
|
@@ -14,9 +14,14 @@ from fugue.dataframe import (
|
|
|
14
14
|
IterableDataFrame,
|
|
15
15
|
LocalBoundedDataFrame,
|
|
16
16
|
)
|
|
17
|
+
from fugue.dataframe.utils import pa_table_as_array, pa_table_as_dicts
|
|
17
18
|
from fugue.exceptions import FugueDataFrameOperationError
|
|
18
19
|
from fugue.plugins import (
|
|
20
|
+
as_array,
|
|
21
|
+
as_array_iterable,
|
|
19
22
|
as_arrow,
|
|
23
|
+
as_dict_iterable,
|
|
24
|
+
as_dicts,
|
|
20
25
|
as_local_bounded,
|
|
21
26
|
as_pandas,
|
|
22
27
|
count,
|
|
@@ -152,23 +157,22 @@ class SparkDataFrame(DataFrame):
|
|
|
152
157
|
def as_array(
|
|
153
158
|
self, columns: Optional[List[str]] = None, type_safe: bool = False
|
|
154
159
|
) -> List[Any]:
|
|
155
|
-
|
|
156
|
-
return sdf.as_local().as_array(type_safe=type_safe)
|
|
160
|
+
return _spark_as_array(self.native, columns=columns, type_safe=type_safe)
|
|
157
161
|
|
|
158
162
|
def as_array_iterable(
|
|
159
163
|
self, columns: Optional[List[str]] = None, type_safe: bool = False
|
|
160
164
|
) -> Iterable[Any]:
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
165
|
+
yield from _spark_as_array_iterable(
|
|
166
|
+
self.native, columns=columns, type_safe=type_safe
|
|
167
|
+
)
|
|
168
|
+
|
|
169
|
+
def as_dicts(self, columns: Optional[List[str]] = None) -> List[Dict[str, Any]]:
|
|
170
|
+
return _spark_as_dicts(self.native, columns=columns)
|
|
171
|
+
|
|
172
|
+
def as_dict_iterable(
|
|
173
|
+
self, columns: Optional[List[str]] = None
|
|
174
|
+
) -> Iterable[Dict[str, Any]]:
|
|
175
|
+
yield from _spark_as_dict_iterable(self.native, columns=columns)
|
|
172
176
|
|
|
173
177
|
def head(
|
|
174
178
|
self, n: int, columns: Optional[List[str]] = None
|
|
@@ -288,6 +292,58 @@ def _spark_df_head(
|
|
|
288
292
|
return SparkDataFrame(res).as_local() if as_fugue else to_pandas(res)
|
|
289
293
|
|
|
290
294
|
|
|
295
|
+
@as_array.candidate(lambda df, *args, **kwargs: is_spark_dataframe(df))
|
|
296
|
+
def _spark_as_array(
|
|
297
|
+
df: ps.DataFrame, columns: Optional[List[str]] = None, type_safe: bool = False
|
|
298
|
+
) -> List[Any]:
|
|
299
|
+
assert_or_throw(columns is None or len(columns) > 0, ValueError("empty columns"))
|
|
300
|
+
_df = df if columns is None or len(columns) == 0 else df[columns]
|
|
301
|
+
return pa_table_as_array(to_arrow(_df), columns)
|
|
302
|
+
|
|
303
|
+
|
|
304
|
+
@as_array_iterable.candidate(lambda df, *args, **kwargs: is_spark_dataframe(df))
|
|
305
|
+
def _spark_as_array_iterable(
|
|
306
|
+
df: ps.DataFrame, columns: Optional[List[str]] = None, type_safe: bool = False
|
|
307
|
+
) -> Iterable[Any]:
|
|
308
|
+
if is_spark_connect(df): # pragma: no cover
|
|
309
|
+
yield from _spark_as_array(df, columns, type_safe=type_safe)
|
|
310
|
+
else:
|
|
311
|
+
assert_or_throw(
|
|
312
|
+
columns is None or len(columns) > 0, ValueError("empty columns")
|
|
313
|
+
)
|
|
314
|
+
_df = df if columns is None or len(columns) == 0 else df[columns]
|
|
315
|
+
if not type_safe:
|
|
316
|
+
for row in to_type_safe_input(
|
|
317
|
+
_df.rdd.toLocalIterator(), to_schema(_df.schema)
|
|
318
|
+
):
|
|
319
|
+
yield list(row)
|
|
320
|
+
else:
|
|
321
|
+
tdf = IterableDataFrame(
|
|
322
|
+
_spark_as_array_iterable(_df, type_safe=False), to_schema(_df.schema)
|
|
323
|
+
)
|
|
324
|
+
yield from tdf.as_array_iterable(type_safe=True)
|
|
325
|
+
|
|
326
|
+
|
|
327
|
+
@as_dicts.candidate(lambda df, *args, **kwargs: is_spark_dataframe(df))
|
|
328
|
+
def _spark_as_dicts(
|
|
329
|
+
df: ps.DataFrame, columns: Optional[List[str]] = None, type_safe: bool = False
|
|
330
|
+
) -> List[Dict[str, Any]]:
|
|
331
|
+
assert_or_throw(columns is None or len(columns) > 0, ValueError("empty columns"))
|
|
332
|
+
_df = df if columns is None or len(columns) == 0 else df[columns]
|
|
333
|
+
return pa_table_as_dicts(to_arrow(_df), columns)
|
|
334
|
+
|
|
335
|
+
|
|
336
|
+
@as_dict_iterable.candidate(lambda df, *args, **kwargs: is_spark_dataframe(df))
|
|
337
|
+
def _spark_as_dict_iterable(
|
|
338
|
+
df: ps.DataFrame, columns: Optional[List[str]] = None, type_safe: bool = False
|
|
339
|
+
) -> Iterable[Dict[str, Any]]:
|
|
340
|
+
assert_or_throw(columns is None or len(columns) > 0, ValueError("empty columns"))
|
|
341
|
+
_df = df if columns is None or len(columns) == 0 else df[columns]
|
|
342
|
+
cols = list(_df.columns)
|
|
343
|
+
for row in _spark_as_array_iterable(_df, type_safe=type_safe):
|
|
344
|
+
yield dict(zip(cols, row))
|
|
345
|
+
|
|
346
|
+
|
|
291
347
|
def _rename_spark_dataframe(df: ps.DataFrame, names: Dict[str, Any]) -> ps.DataFrame:
|
|
292
348
|
cols: List[ps.Column] = []
|
|
293
349
|
for f in df.schema:
|
fugue_spark/execution_engine.py
CHANGED
|
@@ -11,7 +11,7 @@ from pyspark.rdd import RDD
|
|
|
11
11
|
from pyspark.sql import SparkSession
|
|
12
12
|
from pyspark.sql.functions import broadcast, col, lit, row_number
|
|
13
13
|
from pyspark.sql.window import Window
|
|
14
|
-
from triad import
|
|
14
|
+
from triad import IndexedOrderedDict, ParamDict, Schema, SerializableRLock
|
|
15
15
|
from triad.utils.assertion import assert_arg_not_none, assert_or_throw
|
|
16
16
|
from triad.utils.hash import to_uuid
|
|
17
17
|
from triad.utils.iter import EmptyAwareIterable
|
|
@@ -360,13 +360,12 @@ class SparkExecutionEngine(ExecutionEngine):
|
|
|
360
360
|
cf.update(ParamDict(conf))
|
|
361
361
|
super().__init__(cf)
|
|
362
362
|
self._lock = SerializableRLock()
|
|
363
|
-
self._fs = FileSystem()
|
|
364
363
|
self._log = logging.getLogger()
|
|
365
364
|
self._broadcast_func = RunOnce(
|
|
366
365
|
self._broadcast, lambda *args, **kwargs: id(args[0])
|
|
367
366
|
)
|
|
368
367
|
self._persist_func = RunOnce(self._persist, lambda *args, **kwargs: id(args[0]))
|
|
369
|
-
self._io = SparkIO(self.spark_session
|
|
368
|
+
self._io = SparkIO(self.spark_session)
|
|
370
369
|
self._registered_dfs: Dict[str, SparkDataFrame] = {}
|
|
371
370
|
|
|
372
371
|
def __repr__(self) -> str:
|
|
@@ -395,10 +394,6 @@ class SparkExecutionEngine(ExecutionEngine):
|
|
|
395
394
|
def log(self) -> logging.Logger:
|
|
396
395
|
return self._log
|
|
397
396
|
|
|
398
|
-
@property
|
|
399
|
-
def fs(self) -> FileSystem:
|
|
400
|
-
return self._fs
|
|
401
|
-
|
|
402
397
|
def create_default_sql_engine(self) -> SQLEngine:
|
|
403
398
|
return SparkSQLEngine(self)
|
|
404
399
|
|
fugue_test/builtin_suite.py
CHANGED
|
@@ -12,11 +12,12 @@ import pickle
|
|
|
12
12
|
from typing import Any, Callable, Dict, Iterable, Iterator, List, Optional
|
|
13
13
|
from unittest import TestCase
|
|
14
14
|
from uuid import uuid4
|
|
15
|
-
|
|
15
|
+
from triad.utils.io import write_text, join
|
|
16
16
|
import numpy as np
|
|
17
17
|
import pandas as pd
|
|
18
18
|
import pyarrow as pa
|
|
19
19
|
import pytest
|
|
20
|
+
from fsspec.implementations.local import LocalFileSystem
|
|
20
21
|
from pytest import raises
|
|
21
22
|
from triad import SerializableRLock
|
|
22
23
|
|
|
@@ -28,7 +29,6 @@ from fugue import (
|
|
|
28
29
|
DataFrame,
|
|
29
30
|
DataFrames,
|
|
30
31
|
ExecutionEngine,
|
|
31
|
-
FileSystem,
|
|
32
32
|
FugueWorkflow,
|
|
33
33
|
LocalDataFrame,
|
|
34
34
|
OutputCoTransformer,
|
|
@@ -65,6 +65,8 @@ from fugue.exceptions import (
|
|
|
65
65
|
FugueWorkflowRuntimeValidationError,
|
|
66
66
|
)
|
|
67
67
|
|
|
68
|
+
_LOCAL_FS = LocalFileSystem(auto_mkdir=True)
|
|
69
|
+
|
|
68
70
|
|
|
69
71
|
class BuiltInTests(object):
|
|
70
72
|
"""Workflow level general test suite. It is a more general end to end
|
|
@@ -633,9 +635,8 @@ class BuiltInTests(object):
|
|
|
633
635
|
tmpdir = str(self.tmpdir)
|
|
634
636
|
|
|
635
637
|
def incr():
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
return fs.glob("*.txt").count().files
|
|
638
|
+
write_text(join(tmpdir, str(uuid4()) + ".txt"), "")
|
|
639
|
+
return len(_LOCAL_FS.glob(join(tmpdir, "*.txt")))
|
|
639
640
|
|
|
640
641
|
def t1(df: Iterable[Dict[str, Any]]) -> Iterable[Dict[str, Any]]:
|
|
641
642
|
for row in df:
|
|
@@ -717,9 +718,8 @@ class BuiltInTests(object):
|
|
|
717
718
|
tmpdir = str(self.tmpdir)
|
|
718
719
|
|
|
719
720
|
def incr():
|
|
720
|
-
|
|
721
|
-
|
|
722
|
-
return fs.glob("*.tx" "t").count().files
|
|
721
|
+
write_text(join(tmpdir, str(uuid4()) + ".txt"), "")
|
|
722
|
+
return len(_LOCAL_FS.glob(join(tmpdir, "*.txt")))
|
|
723
723
|
|
|
724
724
|
def t1(
|
|
725
725
|
df: Iterable[Dict[str, Any]], df2: pd.DataFrame
|
|
@@ -1348,7 +1348,7 @@ class BuiltInTests(object):
|
|
|
1348
1348
|
b.partition(num=3).save(path, fmt="parquet", single=True)
|
|
1349
1349
|
b.save(path2, header=True)
|
|
1350
1350
|
dag.run(self.engine)
|
|
1351
|
-
assert
|
|
1351
|
+
assert _LOCAL_FS.isfile(path)
|
|
1352
1352
|
with FugueWorkflow() as dag:
|
|
1353
1353
|
a = dag.load(path, fmt="parquet", columns=["a", "c"])
|
|
1354
1354
|
a.assert_eq(dag.df([[1, 6], [7, 2]], "a:long,c:int"))
|
|
@@ -1359,9 +1359,9 @@ class BuiltInTests(object):
|
|
|
1359
1359
|
b = dag.df([[6, 1], [2, 7]], "c:int,a:long")
|
|
1360
1360
|
b.partition(by="c").save(path3, fmt="parquet", single=False)
|
|
1361
1361
|
dag.run(self.engine)
|
|
1362
|
-
assert
|
|
1363
|
-
assert
|
|
1364
|
-
assert
|
|
1362
|
+
assert _LOCAL_FS.isdir(path3)
|
|
1363
|
+
assert _LOCAL_FS.isdir(os.path.join(path3, "c=6"))
|
|
1364
|
+
assert _LOCAL_FS.isdir(os.path.join(path3, "c=2"))
|
|
1365
1365
|
# TODO: in test below, once issue #288 is fixed, use dag.load
|
|
1366
1366
|
# instead of pd.read_parquet
|
|
1367
1367
|
pdf = pd.read_parquet(path3).sort_values("a").reset_index(drop=True)
|
fugue_test/dataframe_suite.py
CHANGED
|
@@ -208,8 +208,22 @@ class DataFrameTests(object):
|
|
|
208
208
|
def test_as_dict_iterable(self):
|
|
209
209
|
df = self.df([[pd.NaT, 1]], "a:datetime,b:int")
|
|
210
210
|
assert [dict(a=None, b=1)] == list(fi.as_dict_iterable(df))
|
|
211
|
+
df = self.df([[pd.NaT, 1]], "a:datetime,b:int")
|
|
212
|
+
assert [dict(b=1)] == list(fi.as_dict_iterable(df, ["b"]))
|
|
211
213
|
df = self.df([[pd.Timestamp("2020-01-01"), 1]], "a:datetime,b:int")
|
|
212
214
|
assert [dict(a=datetime(2020, 1, 1), b=1)] == list(fi.as_dict_iterable(df))
|
|
215
|
+
df = self.df([[pd.Timestamp("2020-01-01"), 1]], "a:datetime,b:int")
|
|
216
|
+
assert [dict(b=1)] == list(fi.as_dict_iterable(df, ["b"]))
|
|
217
|
+
|
|
218
|
+
def test_as_dicts(self):
|
|
219
|
+
df = self.df([[pd.NaT, 1]], "a:datetime,b:int")
|
|
220
|
+
assert [dict(a=None, b=1)] == fi.as_dicts(df)
|
|
221
|
+
df = self.df([[pd.NaT, 1]], "a:datetime,b:int")
|
|
222
|
+
assert [dict(b=1)] == fi.as_dicts(df, ["b"])
|
|
223
|
+
df = self.df([[pd.Timestamp("2020-01-01"), 1]], "a:datetime,b:int")
|
|
224
|
+
assert [dict(a=datetime(2020, 1, 1), b=1)] == fi.as_dicts(df)
|
|
225
|
+
df = self.df([[pd.Timestamp("2020-01-01"), 1]], "a:datetime,b:int")
|
|
226
|
+
assert [dict(b=1)] == fi.as_dicts(df, ["b"])
|
|
213
227
|
|
|
214
228
|
def test_list_type(self):
|
|
215
229
|
data = [[[30, 40]]]
|
fugue_test/execution_suite.py
CHANGED
|
@@ -15,8 +15,8 @@ from unittest import TestCase
|
|
|
15
15
|
import pandas as pd
|
|
16
16
|
import pytest
|
|
17
17
|
from pytest import raises
|
|
18
|
-
from triad.collections.fs import FileSystem
|
|
19
18
|
from triad.exceptions import InvalidOperationError
|
|
19
|
+
from triad.utils.io import isfile, makedirs, touch
|
|
20
20
|
|
|
21
21
|
import fugue.api as fa
|
|
22
22
|
import fugue.column.functions as ff
|
|
@@ -62,7 +62,6 @@ class ExecutionEngineTests(object):
|
|
|
62
62
|
def test_init(self):
|
|
63
63
|
print(self.engine)
|
|
64
64
|
assert self.engine.log is not None
|
|
65
|
-
assert self.engine.fs is not None
|
|
66
65
|
assert copy.copy(self.engine) is self.engine
|
|
67
66
|
assert copy.deepcopy(self.engine) is self.engine
|
|
68
67
|
|
|
@@ -985,17 +984,16 @@ class ExecutionEngineTests(object):
|
|
|
985
984
|
df_eq(res, [[1, "z1"]], "a:int,v:str", throw=True)
|
|
986
985
|
|
|
987
986
|
@pytest.fixture(autouse=True)
|
|
988
|
-
def init_tmpdir(self, tmpdir):
|
|
987
|
+
def init_tmpdir(self, tmpdir, tmp_mem_dir):
|
|
989
988
|
self.tmpdir = tmpdir
|
|
990
989
|
|
|
991
990
|
def test_save_single_and_load_parquet(self):
|
|
992
|
-
e = self.engine
|
|
993
991
|
b = ArrayDataFrame([[6, 1], [2, 7]], "c:int,a:long")
|
|
994
992
|
path = os.path.join(self.tmpdir, "a", "b")
|
|
995
|
-
|
|
993
|
+
makedirs(path, exist_ok=True)
|
|
996
994
|
# over write folder with single file
|
|
997
995
|
fa.save(b, path, format_hint="parquet", force_single=True)
|
|
998
|
-
assert
|
|
996
|
+
assert isfile(path)
|
|
999
997
|
c = fa.load(path, format_hint="parquet", columns=["a", "c"], as_fugue=True)
|
|
1000
998
|
df_eq(c, [[1, 6], [7, 2]], "a:long,c:int", throw=True)
|
|
1001
999
|
|
|
@@ -1019,7 +1017,7 @@ class ExecutionEngineTests(object):
|
|
|
1019
1017
|
path = os.path.join(self.tmpdir, "a", "b")
|
|
1020
1018
|
fa.save(a, os.path.join(path, "a.parquet"), engine=native)
|
|
1021
1019
|
fa.save(b, os.path.join(path, "b.parquet"), engine=native)
|
|
1022
|
-
|
|
1020
|
+
touch(os.path.join(path, "_SUCCESS"))
|
|
1023
1021
|
c = fa.load(path, format_hint="parquet", columns=["a", "c"], as_fugue=True)
|
|
1024
1022
|
df_eq(c, [[1, 6], [7, 2], [8, 4]], "a:long,c:int", throw=True)
|
|
1025
1023
|
|
|
@@ -1038,13 +1036,12 @@ class ExecutionEngineTests(object):
|
|
|
1038
1036
|
df_eq(c, [[1, 6], [7, 2], [8, 4]], "a:long,c:int", throw=True)
|
|
1039
1037
|
|
|
1040
1038
|
def test_save_single_and_load_csv(self):
|
|
1041
|
-
e = self.engine
|
|
1042
1039
|
b = ArrayDataFrame([[6.1, 1.1], [2.1, 7.1]], "c:double,a:double")
|
|
1043
1040
|
path = os.path.join(self.tmpdir, "a", "b")
|
|
1044
|
-
|
|
1041
|
+
makedirs(path, exist_ok=True)
|
|
1045
1042
|
# over write folder with single file
|
|
1046
1043
|
fa.save(b, path, format_hint="csv", header=True, force_single=True)
|
|
1047
|
-
assert
|
|
1044
|
+
assert isfile(path)
|
|
1048
1045
|
c = fa.load(
|
|
1049
1046
|
path, format_hint="csv", header=True, infer_schema=False, as_fugue=True
|
|
1050
1047
|
)
|
|
@@ -1099,13 +1096,12 @@ class ExecutionEngineTests(object):
|
|
|
1099
1096
|
df_eq(c, [["1.1", "60.1"], ["7.1", "20.1"]], "a:str,c:str", throw=True)
|
|
1100
1097
|
|
|
1101
1098
|
def test_save_single_and_load_csv_no_header(self):
|
|
1102
|
-
e = self.engine
|
|
1103
1099
|
b = ArrayDataFrame([[6.1, 1.1], [2.1, 7.1]], "c:double,a:double")
|
|
1104
1100
|
path = os.path.join(self.tmpdir, "a", "b")
|
|
1105
|
-
|
|
1101
|
+
makedirs(path, exist_ok=True)
|
|
1106
1102
|
# over write folder with single file
|
|
1107
1103
|
fa.save(b, path, format_hint="csv", header=False, force_single=True)
|
|
1108
|
-
assert
|
|
1104
|
+
assert isfile(path)
|
|
1109
1105
|
|
|
1110
1106
|
with raises(ValueError):
|
|
1111
1107
|
c = fa.load(
|
|
@@ -1190,7 +1186,7 @@ class ExecutionEngineTests(object):
|
|
|
1190
1186
|
header=True,
|
|
1191
1187
|
engine=native,
|
|
1192
1188
|
)
|
|
1193
|
-
|
|
1189
|
+
touch(os.path.join(path, "_SUCCESS"))
|
|
1194
1190
|
c = fa.load(
|
|
1195
1191
|
path,
|
|
1196
1192
|
format_hint="csv",
|
|
@@ -1204,13 +1200,12 @@ class ExecutionEngineTests(object):
|
|
|
1204
1200
|
)
|
|
1205
1201
|
|
|
1206
1202
|
def test_save_single_and_load_json(self):
|
|
1207
|
-
e = self.engine
|
|
1208
1203
|
b = ArrayDataFrame([[6, 1], [2, 7]], "c:int,a:long")
|
|
1209
1204
|
path = os.path.join(self.tmpdir, "a", "b")
|
|
1210
|
-
|
|
1205
|
+
makedirs(path, exist_ok=True)
|
|
1211
1206
|
# over write folder with single file
|
|
1212
1207
|
fa.save(b, path, format_hint="json", force_single=True)
|
|
1213
|
-
assert
|
|
1208
|
+
assert isfile(path)
|
|
1214
1209
|
c = fa.load(path, format_hint="json", columns=["a", "c"], as_fugue=True)
|
|
1215
1210
|
df_eq(c, [[1, 6], [7, 2]], "a:long,c:long", throw=True)
|
|
1216
1211
|
|
|
@@ -1241,7 +1236,7 @@ class ExecutionEngineTests(object):
|
|
|
1241
1236
|
path = os.path.join(self.tmpdir, "a", "b")
|
|
1242
1237
|
fa.save(a, os.path.join(path, "a.json"), format_hint="json", engine=native)
|
|
1243
1238
|
fa.save(b, os.path.join(path, "b.json"), format_hint="json", engine=native)
|
|
1244
|
-
|
|
1239
|
+
touch(os.path.join(path, "_SUCCESS"))
|
|
1245
1240
|
c = fa.load(path, format_hint="json", columns=["a", "c"], as_fugue=True)
|
|
1246
1241
|
df_eq(c, [[1, 6], [7, 2], [8, 4], [4, 3]], "a:long,c:long", throw=True)
|
|
1247
1242
|
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
import uuid
|
|
2
|
+
|
|
3
|
+
import pytest
|
|
4
|
+
from triad.utils.io import makedirs, rm
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
@pytest.fixture
|
|
8
|
+
def tmp_mem_dir():
|
|
9
|
+
uuid_str = str(uuid.uuid4())[:5]
|
|
10
|
+
path = "memory://test_" + uuid_str
|
|
11
|
+
makedirs(path)
|
|
12
|
+
try:
|
|
13
|
+
yield path
|
|
14
|
+
finally:
|
|
15
|
+
try:
|
|
16
|
+
rm(path, recursive=True)
|
|
17
|
+
except Exception: # pragma: no cover
|
|
18
|
+
pass
|
|
File without changes
|
|
File without changes
|