fugue 0.9.0.dev2__py3-none-any.whl → 0.9.0.dev4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fugue/collections/sql.py +1 -1
- fugue/dataframe/utils.py +4 -18
- fugue/test/plugins.py +11 -1
- {fugue-0.9.0.dev2.dist-info → fugue-0.9.0.dev4.dist-info}/METADATA +11 -8
- {fugue-0.9.0.dev2.dist-info → fugue-0.9.0.dev4.dist-info}/RECORD +20 -20
- {fugue-0.9.0.dev2.dist-info → fugue-0.9.0.dev4.dist-info}/WHEEL +1 -1
- fugue_dask/_io.py +8 -5
- fugue_dask/_utils.py +4 -4
- fugue_duckdb/_io.py +1 -0
- fugue_ibis/execution_engine.py +14 -7
- fugue_ray/_constants.py +3 -4
- fugue_ray/_utils/dataframe.py +10 -21
- fugue_ray/_utils/io.py +36 -13
- fugue_ray/execution_engine.py +1 -2
- fugue_test/builtin_suite.py +14 -15
- fugue_test/dataframe_suite.py +3 -4
- fugue_test/execution_suite.py +130 -123
- {fugue-0.9.0.dev2.dist-info → fugue-0.9.0.dev4.dist-info}/LICENSE +0 -0
- {fugue-0.9.0.dev2.dist-info → fugue-0.9.0.dev4.dist-info}/entry_points.txt +0 -0
- {fugue-0.9.0.dev2.dist-info → fugue-0.9.0.dev4.dist-info}/top_level.txt +0 -0
fugue_test/execution_suite.py
CHANGED
|
@@ -28,7 +28,6 @@ from fugue import (
|
|
|
28
28
|
PartitionSpec,
|
|
29
29
|
)
|
|
30
30
|
from fugue.column import all_cols, col, lit
|
|
31
|
-
from fugue.dataframe.utils import _df_eq as df_eq
|
|
32
31
|
from fugue.execution.native_execution_engine import NativeExecutionEngine
|
|
33
32
|
|
|
34
33
|
|
|
@@ -56,19 +55,19 @@ class ExecutionEngineTests(object):
|
|
|
56
55
|
)
|
|
57
56
|
# all engines should accept these types of inputs
|
|
58
57
|
# should take fugue.DataFrame
|
|
59
|
-
df_eq(o, fa.as_fugue_engine_df(e, o), throw=True)
|
|
58
|
+
self.df_eq(o, fa.as_fugue_engine_df(e, o), throw=True)
|
|
60
59
|
# should take array, shema
|
|
61
|
-
df_eq(
|
|
60
|
+
self.df_eq(
|
|
62
61
|
o,
|
|
63
62
|
fa.as_fugue_engine_df(e, [[1.1, 2.2], [3.3, 4.4]], "a:double,b:double"),
|
|
64
63
|
throw=True,
|
|
65
64
|
)
|
|
66
65
|
# should take pandas dataframe
|
|
67
66
|
pdf = pd.DataFrame([[1.1, 2.2], [3.3, 4.4]], columns=["a", "b"])
|
|
68
|
-
df_eq(o, fa.as_fugue_engine_df(e, pdf), throw=True)
|
|
67
|
+
self.df_eq(o, fa.as_fugue_engine_df(e, pdf), throw=True)
|
|
69
68
|
|
|
70
69
|
# should convert string to datetime in to_df
|
|
71
|
-
df_eq(
|
|
70
|
+
self.df_eq(
|
|
72
71
|
fa.as_fugue_engine_df(e, [["2020-01-01"]], "a:datetime"),
|
|
73
72
|
[[datetime(2020, 1, 1)]],
|
|
74
73
|
"a:datetime",
|
|
@@ -79,7 +78,7 @@ class ExecutionEngineTests(object):
|
|
|
79
78
|
o = ArrayDataFrame([], "a:double,b:str")
|
|
80
79
|
pdf = pd.DataFrame([[0.1, "a"]], columns=["a", "b"])
|
|
81
80
|
pdf = pdf[pdf.a < 0]
|
|
82
|
-
df_eq(o, fa.as_fugue_engine_df(e, pdf), throw=True)
|
|
81
|
+
self.df_eq(o, fa.as_fugue_engine_df(e, pdf), throw=True)
|
|
83
82
|
|
|
84
83
|
@pytest.mark.skipif(not HAS_QPD, reason="qpd not working")
|
|
85
84
|
def test_filter(self):
|
|
@@ -88,11 +87,11 @@ class ExecutionEngineTests(object):
|
|
|
88
87
|
"a:double,b:int",
|
|
89
88
|
)
|
|
90
89
|
b = fa.filter(a, col("a").not_null())
|
|
91
|
-
df_eq(b, [[1, 2], [3, 4]], "a:double,b:int", throw=True)
|
|
90
|
+
self.df_eq(b, [[1, 2], [3, 4]], "a:double,b:int", throw=True)
|
|
92
91
|
c = fa.filter(a, col("a").not_null() & (col("b") < 3))
|
|
93
|
-
df_eq(c, [[1, 2]], "a:double,b:int", throw=True)
|
|
92
|
+
self.df_eq(c, [[1, 2]], "a:double,b:int", throw=True)
|
|
94
93
|
c = fa.filter(a, col("a") + col("b") == 3)
|
|
95
|
-
df_eq(c, [[1, 2]], "a:double,b:int", throw=True)
|
|
94
|
+
self.df_eq(c, [[1, 2]], "a:double,b:int", throw=True)
|
|
96
95
|
|
|
97
96
|
@pytest.mark.skipif(not HAS_QPD, reason="qpd not working")
|
|
98
97
|
def test_select(self):
|
|
@@ -102,7 +101,7 @@ class ExecutionEngineTests(object):
|
|
|
102
101
|
|
|
103
102
|
# simple
|
|
104
103
|
b = fa.select(a, col("b"), (col("b") + 1).alias("c").cast(str))
|
|
105
|
-
df_eq(
|
|
104
|
+
self.df_eq(
|
|
106
105
|
b,
|
|
107
106
|
[[2, "3"], [2, "3"], [1, "2"], [4, "5"], [4, "5"]],
|
|
108
107
|
"b:int,c:str",
|
|
@@ -113,7 +112,7 @@ class ExecutionEngineTests(object):
|
|
|
113
112
|
b = fa.select(
|
|
114
113
|
a, col("b"), (col("b") + 1).alias("c").cast(str), distinct=True
|
|
115
114
|
)
|
|
116
|
-
df_eq(
|
|
115
|
+
self.df_eq(
|
|
117
116
|
b,
|
|
118
117
|
[[2, "3"], [1, "2"], [4, "5"]],
|
|
119
118
|
"b:int,c:str",
|
|
@@ -122,11 +121,11 @@ class ExecutionEngineTests(object):
|
|
|
122
121
|
|
|
123
122
|
# wildcard
|
|
124
123
|
b = fa.select(a, all_cols(), where=col("a") + col("b") == 3)
|
|
125
|
-
df_eq(b, [[1, 2]], "a:double,b:int", throw=True)
|
|
124
|
+
self.df_eq(b, [[1, 2]], "a:double,b:int", throw=True)
|
|
126
125
|
|
|
127
126
|
# aggregation
|
|
128
127
|
b = fa.select(a, col("a"), ff.sum(col("b")).cast(float).alias("b"))
|
|
129
|
-
df_eq(b, [[1, 2], [3, 4], [None, 7]], "a:double,b:double", throw=True)
|
|
128
|
+
self.df_eq(b, [[1, 2], [3, 4], [None, 7]], "a:double,b:double", throw=True)
|
|
130
129
|
|
|
131
130
|
# having
|
|
132
131
|
# https://github.com/fugue-project/fugue/issues/222
|
|
@@ -137,7 +136,7 @@ class ExecutionEngineTests(object):
|
|
|
137
136
|
col_b.cast(float).alias("c"),
|
|
138
137
|
having=(col_b >= 7) | (col("a") == 1),
|
|
139
138
|
)
|
|
140
|
-
df_eq(b, [[1, 2], [None, 7]], "a:double,c:double", throw=True)
|
|
139
|
+
self.df_eq(b, [[1, 2], [None, 7]], "a:double,c:double", throw=True)
|
|
141
140
|
|
|
142
141
|
# literal + alias inference
|
|
143
142
|
# https://github.com/fugue-project/fugue/issues/222
|
|
@@ -149,7 +148,7 @@ class ExecutionEngineTests(object):
|
|
|
149
148
|
col_b.cast(float).alias("c"),
|
|
150
149
|
having=(col_b >= 7) | (col("a") == 1),
|
|
151
150
|
)
|
|
152
|
-
df_eq(
|
|
151
|
+
self.df_eq(
|
|
153
152
|
b, [[1, "1", 2], [None, "1", 7]], "a:double,o:str,c:double", throw=True
|
|
154
153
|
)
|
|
155
154
|
|
|
@@ -160,7 +159,7 @@ class ExecutionEngineTests(object):
|
|
|
160
159
|
)
|
|
161
160
|
|
|
162
161
|
b = fa.assign(a, x=1, b=col("b").cast(str), c=(col("b") + 1).cast(int))
|
|
163
|
-
df_eq(
|
|
162
|
+
self.df_eq(
|
|
164
163
|
b,
|
|
165
164
|
[
|
|
166
165
|
[1, "2", 1, 3],
|
|
@@ -184,7 +183,7 @@ class ExecutionEngineTests(object):
|
|
|
184
183
|
b=ff.max(col("b")),
|
|
185
184
|
c=(ff.max(col("b")) * 2).cast("int32").alias("c"),
|
|
186
185
|
)
|
|
187
|
-
df_eq(b, [[4, 8]], "b:int,c:int", throw=True)
|
|
186
|
+
self.df_eq(b, [[4, 8]], "b:int,c:int", throw=True)
|
|
188
187
|
|
|
189
188
|
b = fa.aggregate(
|
|
190
189
|
a,
|
|
@@ -192,7 +191,7 @@ class ExecutionEngineTests(object):
|
|
|
192
191
|
b=ff.max(col("b")),
|
|
193
192
|
c=(ff.max(col("b")) * 2).cast("int32").alias("c"),
|
|
194
193
|
)
|
|
195
|
-
df_eq(
|
|
194
|
+
self.df_eq(
|
|
196
195
|
b,
|
|
197
196
|
[[None, 4, 8], [1, 2, 4], [3, 4, 8]],
|
|
198
197
|
"a:double,b:int,c:int",
|
|
@@ -221,17 +220,17 @@ class ExecutionEngineTests(object):
|
|
|
221
220
|
a = fa.as_fugue_engine_df(e, o)
|
|
222
221
|
# no partition
|
|
223
222
|
c = e.map_engine.map_dataframe(a, noop, a.schema, PartitionSpec())
|
|
224
|
-
df_eq(c, o, throw=True)
|
|
223
|
+
self.df_eq(c, o, throw=True)
|
|
225
224
|
# with key partition
|
|
226
225
|
c = e.map_engine.map_dataframe(
|
|
227
226
|
a, noop, a.schema, PartitionSpec(by=["a"], presort="b")
|
|
228
227
|
)
|
|
229
|
-
df_eq(c, o, throw=True)
|
|
228
|
+
self.df_eq(c, o, throw=True)
|
|
230
229
|
# select top
|
|
231
230
|
c = e.map_engine.map_dataframe(
|
|
232
231
|
a, select_top, a.schema, PartitionSpec(by=["a"], presort="b")
|
|
233
232
|
)
|
|
234
|
-
df_eq(c, [[None, 1], [1, 2], [3, 4]], "a:double,b:int", throw=True)
|
|
233
|
+
self.df_eq(c, [[None, 1], [1, 2], [3, 4]], "a:double,b:int", throw=True)
|
|
235
234
|
# select top with another order
|
|
236
235
|
c = e.map_engine.map_dataframe(
|
|
237
236
|
a,
|
|
@@ -239,7 +238,7 @@ class ExecutionEngineTests(object):
|
|
|
239
238
|
a.schema,
|
|
240
239
|
PartitionSpec(partition_by=["a"], presort="b DESC"),
|
|
241
240
|
)
|
|
242
|
-
df_eq(
|
|
241
|
+
self.df_eq(
|
|
243
242
|
c,
|
|
244
243
|
[[None, 4], [1, 2], [3, 4]],
|
|
245
244
|
"a:double,b:int",
|
|
@@ -253,7 +252,7 @@ class ExecutionEngineTests(object):
|
|
|
253
252
|
PartitionSpec(partition_by=["a"], presort="b DESC", num_partitions=3),
|
|
254
253
|
on_init=on_init,
|
|
255
254
|
)
|
|
256
|
-
df_eq(c, [[None, 4], [1, 2], [3, 4]], "a:double,b:int", throw=True)
|
|
255
|
+
self.df_eq(c, [[None, 4], [1, 2], [3, 4]], "a:double,b:int", throw=True)
|
|
257
256
|
|
|
258
257
|
def test_map_with_special_values(self):
|
|
259
258
|
def with_nat(cursor, data):
|
|
@@ -270,7 +269,7 @@ class ExecutionEngineTests(object):
|
|
|
270
269
|
c = e.map_engine.map_dataframe(
|
|
271
270
|
o, select_top, o.schema, PartitionSpec(by=["a", "b"], presort="c")
|
|
272
271
|
)
|
|
273
|
-
df_eq(
|
|
272
|
+
self.df_eq(
|
|
274
273
|
c,
|
|
275
274
|
[[1, None, 0], [None, None, 2]],
|
|
276
275
|
"a:double,b:double,c:int",
|
|
@@ -291,7 +290,7 @@ class ExecutionEngineTests(object):
|
|
|
291
290
|
c = e.map_engine.map_dataframe(
|
|
292
291
|
o, select_top, o.schema, PartitionSpec(by=["a", "c"], presort="b DESC")
|
|
293
292
|
)
|
|
294
|
-
df_eq(
|
|
293
|
+
self.df_eq(
|
|
295
294
|
c,
|
|
296
295
|
[[None, 4, None], [dt, 5, 1]],
|
|
297
296
|
"a:datetime,b:int,c:double",
|
|
@@ -300,7 +299,7 @@ class ExecutionEngineTests(object):
|
|
|
300
299
|
d = e.map_engine.map_dataframe(
|
|
301
300
|
c, with_nat, "a:datetime,b:int,c:double,nat:datetime", PartitionSpec()
|
|
302
301
|
)
|
|
303
|
-
df_eq(
|
|
302
|
+
self.df_eq(
|
|
304
303
|
d,
|
|
305
304
|
[[None, 4, None, None], [dt, 5, 1, None]],
|
|
306
305
|
"a:datetime,b:int,c:double,nat:datetime",
|
|
@@ -311,7 +310,7 @@ class ExecutionEngineTests(object):
|
|
|
311
310
|
c = e.map_engine.map_dataframe(
|
|
312
311
|
o, select_top, o.schema, PartitionSpec(by=["a"])
|
|
313
312
|
)
|
|
314
|
-
df_eq(c, o, check_order=True, throw=True)
|
|
313
|
+
self.df_eq(c, o, check_order=True, throw=True)
|
|
315
314
|
|
|
316
315
|
def test_map_with_dict_col(self):
|
|
317
316
|
e = self.engine
|
|
@@ -321,7 +320,7 @@ class ExecutionEngineTests(object):
|
|
|
321
320
|
c = e.map_engine.map_dataframe(
|
|
322
321
|
o, select_top, o.schema, PartitionSpec(by=["a"])
|
|
323
322
|
)
|
|
324
|
-
df_eq(c, o, no_pandas=True, check_order=True, throw=True)
|
|
323
|
+
self.df_eq(c, o, no_pandas=True, check_order=True, throw=True)
|
|
325
324
|
|
|
326
325
|
# input has dict, output doesn't
|
|
327
326
|
def mp2(cursor, data):
|
|
@@ -330,7 +329,7 @@ class ExecutionEngineTests(object):
|
|
|
330
329
|
c = e.map_engine.map_dataframe(
|
|
331
330
|
o, mp2, "a:datetime", PartitionSpec(by=["a"])
|
|
332
331
|
)
|
|
333
|
-
df_eq(
|
|
332
|
+
self.df_eq(
|
|
334
333
|
c,
|
|
335
334
|
PandasDataFrame([[dt]], "a:datetime"),
|
|
336
335
|
no_pandas=True,
|
|
@@ -345,7 +344,7 @@ class ExecutionEngineTests(object):
|
|
|
345
344
|
c = e.map_engine.map_dataframe(
|
|
346
345
|
c, mp3, "a:datetime,b:{a:long}", PartitionSpec(by=["a"])
|
|
347
346
|
)
|
|
348
|
-
df_eq(c, o, no_pandas=True, check_order=True, throw=True)
|
|
347
|
+
self.df_eq(c, o, no_pandas=True, check_order=True, throw=True)
|
|
349
348
|
|
|
350
349
|
def test_map_with_binary(self):
|
|
351
350
|
e = self.engine
|
|
@@ -361,7 +360,7 @@ class ExecutionEngineTests(object):
|
|
|
361
360
|
],
|
|
362
361
|
"a:bytes",
|
|
363
362
|
)
|
|
364
|
-
df_eq(expected, c, no_pandas=True, check_order=
|
|
363
|
+
self.df_eq(expected, c, no_pandas=True, check_order=False, throw=True)
|
|
365
364
|
|
|
366
365
|
def test_join_multiple(self):
|
|
367
366
|
e = self.engine
|
|
@@ -369,7 +368,7 @@ class ExecutionEngineTests(object):
|
|
|
369
368
|
b = fa.as_fugue_engine_df(e, [[1, 20], [3, 40]], "a:int,c:int")
|
|
370
369
|
c = fa.as_fugue_engine_df(e, [[1, 200], [3, 400]], "a:int,d:int")
|
|
371
370
|
d = fa.inner_join(a, b, c)
|
|
372
|
-
df_eq(
|
|
371
|
+
self.df_eq(
|
|
373
372
|
d,
|
|
374
373
|
[[1, 2, 20, 200], [3, 4, 40, 400]],
|
|
375
374
|
"a:int,b:int,c:int,d:int",
|
|
@@ -381,7 +380,7 @@ class ExecutionEngineTests(object):
|
|
|
381
380
|
a = fa.as_fugue_engine_df(e, [[1, 2], [3, 4]], "a:int,b:int")
|
|
382
381
|
b = fa.as_fugue_engine_df(e, [[6], [7]], "c:int")
|
|
383
382
|
c = fa.join(a, b, how="Cross")
|
|
384
|
-
df_eq(
|
|
383
|
+
self.df_eq(
|
|
385
384
|
c,
|
|
386
385
|
[[1, 2, 6], [1, 2, 7], [3, 4, 6], [3, 4, 7]],
|
|
387
386
|
"a:int,b:int,c:int",
|
|
@@ -390,26 +389,26 @@ class ExecutionEngineTests(object):
|
|
|
390
389
|
|
|
391
390
|
b = fa.as_fugue_engine_df(e, [], "c:int")
|
|
392
391
|
c = fa.cross_join(a, b)
|
|
393
|
-
df_eq(c, [], "a:int,b:int,c:int", throw=True)
|
|
392
|
+
self.df_eq(c, [], "a:int,b:int,c:int", throw=True)
|
|
394
393
|
|
|
395
394
|
a = fa.as_fugue_engine_df(e, [], "a:int,b:int")
|
|
396
395
|
b = fa.as_fugue_engine_df(e, [], "c:int")
|
|
397
396
|
c = fa.join(a, b, how="Cross")
|
|
398
|
-
df_eq(c, [], "a:int,b:int,c:int", throw=True)
|
|
397
|
+
self.df_eq(c, [], "a:int,b:int,c:int", throw=True)
|
|
399
398
|
|
|
400
399
|
def test__join_inner(self):
|
|
401
400
|
e = self.engine
|
|
402
401
|
a = fa.as_fugue_engine_df(e, [[1, 2], [3, 4]], "a:int,b:int")
|
|
403
402
|
b = fa.as_fugue_engine_df(e, [[6, 1], [2, 7]], "c:int,a:int")
|
|
404
403
|
c = fa.join(a, b, how="INNER", on=["a"])
|
|
405
|
-
df_eq(c, [[1, 2, 6]], "a:int,b:int,c:int", throw=True)
|
|
404
|
+
self.df_eq(c, [[1, 2, 6]], "a:int,b:int,c:int", throw=True)
|
|
406
405
|
c = fa.inner_join(b, a)
|
|
407
|
-
df_eq(c, [[6, 1, 2]], "c:int,a:int,b:int", throw=True)
|
|
406
|
+
self.df_eq(c, [[6, 1, 2]], "c:int,a:int,b:int", throw=True)
|
|
408
407
|
|
|
409
408
|
a = fa.as_fugue_engine_df(e, [], "a:int,b:int")
|
|
410
409
|
b = fa.as_fugue_engine_df(e, [], "c:int,a:int")
|
|
411
410
|
c = fa.join(a, b, how="INNER", on=["a"])
|
|
412
|
-
df_eq(c, [], "a:int,b:int,c:int", throw=True)
|
|
411
|
+
self.df_eq(c, [], "a:int,b:int,c:int", throw=True)
|
|
413
412
|
|
|
414
413
|
def test__join_outer(self):
|
|
415
414
|
e = self.engine
|
|
@@ -417,34 +416,38 @@ class ExecutionEngineTests(object):
|
|
|
417
416
|
a = fa.as_fugue_engine_df(e, [], "a:int,b:int")
|
|
418
417
|
b = fa.as_fugue_engine_df(e, [], "c:str,a:int")
|
|
419
418
|
c = fa.left_outer_join(a, b)
|
|
420
|
-
df_eq(c, [], "a:int,b:int,c:str", throw=True)
|
|
419
|
+
self.df_eq(c, [], "a:int,b:int,c:str", throw=True)
|
|
421
420
|
|
|
422
421
|
a = fa.as_fugue_engine_df(e, [], "a:int,b:str")
|
|
423
422
|
b = fa.as_fugue_engine_df(e, [], "c:int,a:int")
|
|
424
423
|
c = fa.right_outer_join(a, b)
|
|
425
|
-
df_eq(c, [], "a:int,b:str,c:int", throw=True)
|
|
424
|
+
self.df_eq(c, [], "a:int,b:str,c:int", throw=True)
|
|
426
425
|
|
|
427
426
|
a = fa.as_fugue_engine_df(e, [], "a:int,b:str")
|
|
428
427
|
b = fa.as_fugue_engine_df(e, [], "c:str,a:int")
|
|
429
428
|
c = fa.full_outer_join(a, b)
|
|
430
|
-
df_eq(c, [], "a:int,b:str,c:str", throw=True)
|
|
429
|
+
self.df_eq(c, [], "a:int,b:str,c:str", throw=True)
|
|
431
430
|
|
|
432
431
|
a = fa.as_fugue_engine_df(e, [[1, "2"], [3, "4"]], "a:int,b:str")
|
|
433
432
|
b = fa.as_fugue_engine_df(e, [["6", 1], ["2", 7]], "c:str,a:int")
|
|
434
433
|
c = fa.join(a, b, how="left_OUTER", on=["a"])
|
|
435
|
-
df_eq(
|
|
434
|
+
self.df_eq(
|
|
435
|
+
c, [[1, "2", "6"], [3, "4", None]], "a:int,b:str,c:str", throw=True
|
|
436
|
+
)
|
|
436
437
|
c = fa.join(b, a, how="left_outer", on=["a"])
|
|
437
|
-
df_eq(
|
|
438
|
+
self.df_eq(
|
|
439
|
+
c, [["6", 1, "2"], ["2", 7, None]], "c:str,a:int,b:str", throw=True
|
|
440
|
+
)
|
|
438
441
|
|
|
439
442
|
a = fa.as_fugue_engine_df(e, [[1, "2"], [3, "4"]], "a:int,b:str")
|
|
440
443
|
b = fa.as_fugue_engine_df(e, [[6, 1], [2, 7]], "c:double,a:int")
|
|
441
444
|
c = fa.join(a, b, how="left_OUTER", on=["a"])
|
|
442
|
-
df_eq(
|
|
445
|
+
self.df_eq(
|
|
443
446
|
c, [[1, "2", 6.0], [3, "4", None]], "a:int,b:str,c:double", throw=True
|
|
444
447
|
)
|
|
445
448
|
c = fa.join(b, a, how="left_outer", on=["a"])
|
|
446
449
|
# assert c.as_pandas().values.tolist()[1][2] is None
|
|
447
|
-
df_eq(
|
|
450
|
+
self.df_eq(
|
|
448
451
|
c, [[6.0, 1, "2"], [2.0, 7, None]], "c:double,a:int,b:str", throw=True
|
|
449
452
|
)
|
|
450
453
|
|
|
@@ -452,10 +455,12 @@ class ExecutionEngineTests(object):
|
|
|
452
455
|
b = fa.as_fugue_engine_df(e, [["6", 1], ["2", 7]], "c:str,a:int")
|
|
453
456
|
c = fa.join(a, b, how="right_outer", on=["a"])
|
|
454
457
|
# assert c.as_pandas().values.tolist()[1][1] is None
|
|
455
|
-
df_eq(
|
|
458
|
+
self.df_eq(
|
|
459
|
+
c, [[1, "2", "6"], [7, None, "2"]], "a:int,b:str,c:str", throw=True
|
|
460
|
+
)
|
|
456
461
|
|
|
457
462
|
c = fa.join(a, b, how="full_outer", on=["a"])
|
|
458
|
-
df_eq(
|
|
463
|
+
self.df_eq(
|
|
459
464
|
c,
|
|
460
465
|
[[1, "2", "6"], [3, "4", None], [7, None, "2"]],
|
|
461
466
|
"a:int,b:str,c:str",
|
|
@@ -468,21 +473,23 @@ class ExecutionEngineTests(object):
|
|
|
468
473
|
a = fa.as_fugue_engine_df(e, [[1, "2"], [3, "4"]], "a:int,b:str")
|
|
469
474
|
b = fa.as_fugue_engine_df(e, [[6, 1], [2, 7]], "c:int,a:int")
|
|
470
475
|
c = fa.join(a, b, how="left_OUTER", on=["a"])
|
|
471
|
-
df_eq(
|
|
476
|
+
self.df_eq(
|
|
472
477
|
c,
|
|
473
478
|
[[1, "2", 6], [3, "4", None]],
|
|
474
479
|
"a:int,b:str,c:int",
|
|
475
480
|
throw=True,
|
|
476
481
|
)
|
|
477
482
|
c = fa.join(b, a, how="left_outer", on=["a"])
|
|
478
|
-
df_eq(c, [[6, 1, "2"], [2, 7, None]], "c:int,a:int,b:str", throw=True)
|
|
483
|
+
self.df_eq(c, [[6, 1, "2"], [2, 7, None]], "c:int,a:int,b:str", throw=True)
|
|
479
484
|
|
|
480
485
|
a = fa.as_fugue_engine_df(e, [[1, "2"], [3, "4"]], "a:int,b:str")
|
|
481
486
|
b = fa.as_fugue_engine_df(e, [[True, 1], [False, 7]], "c:bool,a:int")
|
|
482
487
|
c = fa.join(a, b, how="left_OUTER", on=["a"])
|
|
483
|
-
df_eq(
|
|
488
|
+
self.df_eq(
|
|
489
|
+
c, [[1, "2", True], [3, "4", None]], "a:int,b:str,c:bool", throw=True
|
|
490
|
+
)
|
|
484
491
|
c = fa.join(b, a, how="left_outer", on=["a"])
|
|
485
|
-
df_eq(
|
|
492
|
+
self.df_eq(
|
|
486
493
|
c, [[True, 1, "2"], [False, 7, None]], "c:bool,a:int,b:str", throw=True
|
|
487
494
|
)
|
|
488
495
|
|
|
@@ -491,36 +498,36 @@ class ExecutionEngineTests(object):
|
|
|
491
498
|
a = fa.as_fugue_engine_df(e, [[1, 2], [3, 4]], "a:int,b:int")
|
|
492
499
|
b = fa.as_fugue_engine_df(e, [[6, 1], [2, 7]], "c:int,a:int")
|
|
493
500
|
c = fa.join(a, b, how="semi", on=["a"])
|
|
494
|
-
df_eq(c, [[1, 2]], "a:int,b:int", throw=True)
|
|
501
|
+
self.df_eq(c, [[1, 2]], "a:int,b:int", throw=True)
|
|
495
502
|
c = fa.semi_join(b, a)
|
|
496
|
-
df_eq(c, [[6, 1]], "c:int,a:int", throw=True)
|
|
503
|
+
self.df_eq(c, [[6, 1]], "c:int,a:int", throw=True)
|
|
497
504
|
|
|
498
505
|
b = fa.as_fugue_engine_df(e, [], "c:int,a:int")
|
|
499
506
|
c = fa.join(a, b, how="semi", on=["a"])
|
|
500
|
-
df_eq(c, [], "a:int,b:int", throw=True)
|
|
507
|
+
self.df_eq(c, [], "a:int,b:int", throw=True)
|
|
501
508
|
|
|
502
509
|
a = fa.as_fugue_engine_df(e, [], "a:int,b:int")
|
|
503
510
|
b = fa.as_fugue_engine_df(e, [], "c:int,a:int")
|
|
504
511
|
c = fa.join(a, b, how="semi", on=["a"])
|
|
505
|
-
df_eq(c, [], "a:int,b:int", throw=True)
|
|
512
|
+
self.df_eq(c, [], "a:int,b:int", throw=True)
|
|
506
513
|
|
|
507
514
|
def test__join_anti(self):
|
|
508
515
|
e = self.engine
|
|
509
516
|
a = fa.as_fugue_engine_df(e, [[1, 2], [3, 4]], "a:int,b:int")
|
|
510
517
|
b = fa.as_fugue_engine_df(e, [[6, 1], [2, 7]], "c:int,a:int")
|
|
511
518
|
c = fa.join(a, b, how="anti", on=["a"])
|
|
512
|
-
df_eq(c, [[3, 4]], "a:int,b:int", throw=True)
|
|
519
|
+
self.df_eq(c, [[3, 4]], "a:int,b:int", throw=True)
|
|
513
520
|
c = fa.anti_join(b, a)
|
|
514
|
-
df_eq(c, [[2, 7]], "c:int,a:int", throw=True)
|
|
521
|
+
self.df_eq(c, [[2, 7]], "c:int,a:int", throw=True)
|
|
515
522
|
|
|
516
523
|
b = fa.as_fugue_engine_df(e, [], "c:int,a:int")
|
|
517
524
|
c = fa.join(a, b, how="anti", on=["a"])
|
|
518
|
-
df_eq(c, [[1, 2], [3, 4]], "a:int,b:int", throw=True)
|
|
525
|
+
self.df_eq(c, [[1, 2], [3, 4]], "a:int,b:int", throw=True)
|
|
519
526
|
|
|
520
527
|
a = fa.as_fugue_engine_df(e, [], "a:int,b:int")
|
|
521
528
|
b = fa.as_fugue_engine_df(e, [], "c:int,a:int")
|
|
522
529
|
c = fa.join(a, b, how="anti", on=["a"])
|
|
523
|
-
df_eq(c, [], "a:int,b:int", throw=True)
|
|
530
|
+
self.df_eq(c, [], "a:int,b:int", throw=True)
|
|
524
531
|
|
|
525
532
|
def test__join_with_null_keys(self):
|
|
526
533
|
# SQL will not match null values
|
|
@@ -532,7 +539,7 @@ class ExecutionEngineTests(object):
|
|
|
532
539
|
e, [[1, 2, 33], [4, None, 63]], "a:double,b:double,d:int"
|
|
533
540
|
)
|
|
534
541
|
c = fa.join(a, b, how="INNER")
|
|
535
|
-
df_eq(c, [[1, 2, 3, 33]], "a:double,b:double,c:int,d:int", throw=True)
|
|
542
|
+
self.df_eq(c, [[1, 2, 3, 33]], "a:double,b:double,c:int,d:int", throw=True)
|
|
536
543
|
|
|
537
544
|
def test_union(self):
|
|
538
545
|
e = self.engine
|
|
@@ -543,21 +550,21 @@ class ExecutionEngineTests(object):
|
|
|
543
550
|
e, [[1, 2, 33], [4, None, 6]], "a:double,b:double,c:int"
|
|
544
551
|
)
|
|
545
552
|
c = fa.union(a, b)
|
|
546
|
-
df_eq(
|
|
553
|
+
self.df_eq(
|
|
547
554
|
c,
|
|
548
555
|
[[1, 2, 3], [4, None, 6], [1, 2, 33]],
|
|
549
556
|
"a:double,b:double,c:int",
|
|
550
557
|
throw=True,
|
|
551
558
|
)
|
|
552
559
|
c = fa.union(a, b, distinct=False)
|
|
553
|
-
df_eq(
|
|
560
|
+
self.df_eq(
|
|
554
561
|
c,
|
|
555
562
|
[[1, 2, 3], [4, None, 6], [1, 2, 33], [4, None, 6]],
|
|
556
563
|
"a:double,b:double,c:int",
|
|
557
564
|
throw=True,
|
|
558
565
|
)
|
|
559
566
|
d = fa.union(a, b, c, distinct=False)
|
|
560
|
-
df_eq(
|
|
567
|
+
self.df_eq(
|
|
561
568
|
d,
|
|
562
569
|
[
|
|
563
570
|
[1, 2, 3],
|
|
@@ -582,7 +589,7 @@ class ExecutionEngineTests(object):
|
|
|
582
589
|
e, [[1, 2, 33], [4, None, 6]], "a:double,b:double,c:int"
|
|
583
590
|
)
|
|
584
591
|
c = fa.subtract(a, b)
|
|
585
|
-
df_eq(
|
|
592
|
+
self.df_eq(
|
|
586
593
|
c,
|
|
587
594
|
[[1, 2, 3]],
|
|
588
595
|
"a:double,b:double,c:int",
|
|
@@ -591,7 +598,7 @@ class ExecutionEngineTests(object):
|
|
|
591
598
|
x = fa.as_fugue_engine_df(e, [[1, 2, 33]], "a:double,b:double,c:int")
|
|
592
599
|
y = fa.as_fugue_engine_df(e, [[4, None, 6]], "a:double,b:double,c:int")
|
|
593
600
|
z = fa.subtract(a, x, y)
|
|
594
|
-
df_eq(
|
|
601
|
+
self.df_eq(
|
|
595
602
|
z,
|
|
596
603
|
[[1, 2, 3]],
|
|
597
604
|
"a:double,b:double,c:int",
|
|
@@ -599,7 +606,7 @@ class ExecutionEngineTests(object):
|
|
|
599
606
|
)
|
|
600
607
|
# TODO: EXCEPT ALL is not implemented (QPD issue)
|
|
601
608
|
# c = fa.subtract(a, b, distinct=False)
|
|
602
|
-
# df_eq(
|
|
609
|
+
# self.df_eq(
|
|
603
610
|
# c,
|
|
604
611
|
# [[1, 2, 3], [1, 2, 3]],
|
|
605
612
|
# "a:double,b:double,c:int",
|
|
@@ -617,7 +624,7 @@ class ExecutionEngineTests(object):
|
|
|
617
624
|
"a:double,b:double,c:int",
|
|
618
625
|
)
|
|
619
626
|
c = fa.intersect(a, b)
|
|
620
|
-
df_eq(
|
|
627
|
+
self.df_eq(
|
|
621
628
|
c,
|
|
622
629
|
[[4, None, 6]],
|
|
623
630
|
"a:double,b:double,c:int",
|
|
@@ -634,7 +641,7 @@ class ExecutionEngineTests(object):
|
|
|
634
641
|
"a:double,b:double,c:int",
|
|
635
642
|
)
|
|
636
643
|
z = fa.intersect(a, x, y)
|
|
637
|
-
df_eq(
|
|
644
|
+
self.df_eq(
|
|
638
645
|
z,
|
|
639
646
|
[],
|
|
640
647
|
"a:double,b:double,c:int",
|
|
@@ -642,7 +649,7 @@ class ExecutionEngineTests(object):
|
|
|
642
649
|
)
|
|
643
650
|
# TODO: INTERSECT ALL is not implemented (QPD issue)
|
|
644
651
|
# c = fa.intersect(a, b, distinct=False)
|
|
645
|
-
# df_eq(
|
|
652
|
+
# self.df_eq(
|
|
646
653
|
# c,
|
|
647
654
|
# [[4, None, 6], [4, None, 6]],
|
|
648
655
|
# "a:double,b:double,c:int",
|
|
@@ -655,7 +662,7 @@ class ExecutionEngineTests(object):
|
|
|
655
662
|
e, [[4, None, 6], [1, 2, 3], [4, None, 6]], "a:double,b:double,c:int"
|
|
656
663
|
)
|
|
657
664
|
c = fa.distinct(a)
|
|
658
|
-
df_eq(
|
|
665
|
+
self.df_eq(
|
|
659
666
|
c,
|
|
660
667
|
[[4, None, 6], [1, 2, 3]],
|
|
661
668
|
"a:double,b:double,c:int",
|
|
@@ -674,25 +681,25 @@ class ExecutionEngineTests(object):
|
|
|
674
681
|
f = fa.dropna(a, how="any", thresh=2)
|
|
675
682
|
g = fa.dropna(a, how="any", subset=["a", "c"])
|
|
676
683
|
h = fa.dropna(a, how="any", thresh=1, subset=["a", "c"])
|
|
677
|
-
df_eq(
|
|
684
|
+
self.df_eq(
|
|
678
685
|
c,
|
|
679
686
|
[[1, 2, 3]],
|
|
680
687
|
"a:double,b:double,c:double",
|
|
681
688
|
throw=True,
|
|
682
689
|
)
|
|
683
|
-
df_eq(
|
|
690
|
+
self.df_eq(
|
|
684
691
|
d,
|
|
685
692
|
[[4, None, 6], [1, 2, 3], [4, None, None]],
|
|
686
693
|
"a:double,b:double,c:double",
|
|
687
694
|
throw=True,
|
|
688
695
|
)
|
|
689
|
-
df_eq(
|
|
696
|
+
self.df_eq(
|
|
690
697
|
f, [[4, None, 6], [1, 2, 3]], "a:double,b:double,c:double", throw=True
|
|
691
698
|
)
|
|
692
|
-
df_eq(
|
|
699
|
+
self.df_eq(
|
|
693
700
|
g, [[4, None, 6], [1, 2, 3]], "a:double,b:double,c:double", throw=True
|
|
694
701
|
)
|
|
695
|
-
df_eq(
|
|
702
|
+
self.df_eq(
|
|
696
703
|
h,
|
|
697
704
|
[[4, None, 6], [1, 2, 3], [4, None, None]],
|
|
698
705
|
"a:double,b:double,c:double",
|
|
@@ -710,25 +717,25 @@ class ExecutionEngineTests(object):
|
|
|
710
717
|
d = fa.fillna(a, {"b": 99, "c": -99})
|
|
711
718
|
f = fa.fillna(a, value=-99, subset=["c"])
|
|
712
719
|
g = fa.fillna(a, {"b": 99, "c": -99}, subset=["c"]) # subset ignored
|
|
713
|
-
df_eq(
|
|
720
|
+
self.df_eq(
|
|
714
721
|
c,
|
|
715
722
|
[[4, 1, 6], [1, 2, 3], [4, 1, 1]],
|
|
716
723
|
"a:double,b:double,c:double",
|
|
717
724
|
throw=True,
|
|
718
725
|
)
|
|
719
|
-
df_eq(
|
|
726
|
+
self.df_eq(
|
|
720
727
|
d,
|
|
721
728
|
[[4, 99, 6], [1, 2, 3], [4, 99, -99]],
|
|
722
729
|
"a:double,b:double,c:double",
|
|
723
730
|
throw=True,
|
|
724
731
|
)
|
|
725
|
-
df_eq(
|
|
732
|
+
self.df_eq(
|
|
726
733
|
f,
|
|
727
734
|
[[4, None, 6], [1, 2, 3], [4, None, -99]],
|
|
728
735
|
"a:double,b:double,c:double",
|
|
729
736
|
throw=True,
|
|
730
737
|
)
|
|
731
|
-
df_eq(g, d, throw=True)
|
|
738
|
+
self.df_eq(g, d, throw=True)
|
|
732
739
|
raises(ValueError, lambda: fa.fillna(a, {"b": None, c: "99"}))
|
|
733
740
|
raises(ValueError, lambda: fa.fillna(a, None))
|
|
734
741
|
# raises(ValueError, lambda: fa.fillna(a, ["b"]))
|
|
@@ -747,9 +754,9 @@ class ExecutionEngineTests(object):
|
|
|
747
754
|
h = fa.sample(a, frac=0.8, seed=1)
|
|
748
755
|
h2 = fa.sample(a, frac=0.8, seed=1)
|
|
749
756
|
i = fa.sample(a, frac=0.8, seed=2)
|
|
750
|
-
assert not df_eq(f, g, throw=False)
|
|
751
|
-
df_eq(h, h2, throw=True)
|
|
752
|
-
assert not df_eq(h, i, throw=False)
|
|
757
|
+
assert not self.df_eq(f, g, throw=False)
|
|
758
|
+
self.df_eq(h, h2, throw=True)
|
|
759
|
+
assert not self.df_eq(h, i, throw=False)
|
|
753
760
|
assert abs(len(i.as_array()) - 80) < 10
|
|
754
761
|
|
|
755
762
|
def test_take(self):
|
|
@@ -774,37 +781,37 @@ class ExecutionEngineTests(object):
|
|
|
774
781
|
f = fa.take(a, n=1, presort=None, partition=ps2)
|
|
775
782
|
g = fa.take(a, n=2, presort="a desc", na_position="last")
|
|
776
783
|
h = fa.take(a, n=2, presort="a", na_position="first")
|
|
777
|
-
df_eq(
|
|
784
|
+
self.df_eq(
|
|
778
785
|
b,
|
|
779
786
|
[[None, 4, 2]],
|
|
780
787
|
"a:str,b:int,c:long",
|
|
781
788
|
throw=True,
|
|
782
789
|
)
|
|
783
|
-
df_eq(
|
|
790
|
+
self.df_eq(
|
|
784
791
|
c,
|
|
785
792
|
[[None, 4, 2], [None, 2, 1]],
|
|
786
793
|
"a:str,b:int,c:long",
|
|
787
794
|
throw=True,
|
|
788
795
|
)
|
|
789
|
-
df_eq(
|
|
796
|
+
self.df_eq(
|
|
790
797
|
d,
|
|
791
798
|
[["a", 3, 4], ["b", 2, 2], [None, 4, 2]],
|
|
792
799
|
"a:str,b:int,c:long",
|
|
793
800
|
throw=True,
|
|
794
801
|
)
|
|
795
|
-
df_eq(
|
|
802
|
+
self.df_eq(
|
|
796
803
|
f,
|
|
797
804
|
[["a", 2, 3], ["a", 3, 4], ["b", 1, 2], [None, 2, 1]],
|
|
798
805
|
"a:str,b:int,c:long",
|
|
799
806
|
throw=True,
|
|
800
807
|
)
|
|
801
|
-
df_eq(
|
|
808
|
+
self.df_eq(
|
|
802
809
|
g,
|
|
803
810
|
[["b", 1, 2], ["b", 2, 2]],
|
|
804
811
|
"a:str,b:int,c:long",
|
|
805
812
|
throw=True,
|
|
806
813
|
)
|
|
807
|
-
df_eq(
|
|
814
|
+
self.df_eq(
|
|
808
815
|
h,
|
|
809
816
|
[
|
|
810
817
|
[None, 4, 2],
|
|
@@ -823,7 +830,7 @@ class ExecutionEngineTests(object):
|
|
|
823
830
|
"a:str,b:int,c:long",
|
|
824
831
|
)
|
|
825
832
|
i = fa.take(a, n=1, partition="a", presort=None)
|
|
826
|
-
case1 = df_eq(
|
|
833
|
+
case1 = self.df_eq(
|
|
827
834
|
i,
|
|
828
835
|
[
|
|
829
836
|
["a", 2, 3],
|
|
@@ -832,7 +839,7 @@ class ExecutionEngineTests(object):
|
|
|
832
839
|
"a:str,b:int,c:long",
|
|
833
840
|
throw=False,
|
|
834
841
|
)
|
|
835
|
-
case2 = df_eq(
|
|
842
|
+
case2 = self.df_eq(
|
|
836
843
|
i,
|
|
837
844
|
[
|
|
838
845
|
["a", 2, 3],
|
|
@@ -843,7 +850,7 @@ class ExecutionEngineTests(object):
|
|
|
843
850
|
)
|
|
844
851
|
assert case1 or case2
|
|
845
852
|
j = fa.take(a, n=2, partition="a", presort=None)
|
|
846
|
-
df_eq(
|
|
853
|
+
self.df_eq(
|
|
847
854
|
j,
|
|
848
855
|
[
|
|
849
856
|
["a", 2, 3],
|
|
@@ -864,9 +871,9 @@ class ExecutionEngineTests(object):
|
|
|
864
871
|
d = fa.sample(a, n=90, seed=1)
|
|
865
872
|
d2 = fa.sample(a, n=90, seed=1)
|
|
866
873
|
e = fa.sample(a, n=90, seed=2)
|
|
867
|
-
assert not df_eq(b, c, throw=False)
|
|
868
|
-
df_eq(d, d2, throw=True)
|
|
869
|
-
assert not df_eq(d, e, throw=False)
|
|
874
|
+
assert not self.df_eq(b, c, throw=False)
|
|
875
|
+
self.df_eq(d, d2, throw=True)
|
|
876
|
+
assert not self.df_eq(d, e, throw=False)
|
|
870
877
|
assert abs(len(e.as_array()) - 90) < 2
|
|
871
878
|
|
|
872
879
|
def test_comap(self):
|
|
@@ -922,11 +929,11 @@ class ExecutionEngineTests(object):
|
|
|
922
929
|
PartitionSpec(),
|
|
923
930
|
on_init=on_init,
|
|
924
931
|
)
|
|
925
|
-
df_eq(res, [[1, "_02,_11"]], "a:int,v:str", throw=True)
|
|
932
|
+
self.df_eq(res, [[1, "_02,_11"]], "a:int,v:str", throw=True)
|
|
926
933
|
|
|
927
934
|
# for outer joins, the NULL will be filled with empty dataframe
|
|
928
935
|
res = e.comap(z2, comap, "a:int,v:str", PartitionSpec())
|
|
929
|
-
df_eq(
|
|
936
|
+
self.df_eq(
|
|
930
937
|
res,
|
|
931
938
|
[[1, "_02,_11"], [3, "_01,_10"]],
|
|
932
939
|
"a:int,v:str",
|
|
@@ -934,7 +941,7 @@ class ExecutionEngineTests(object):
|
|
|
934
941
|
)
|
|
935
942
|
|
|
936
943
|
res = e.comap(z3, comap, "a:int,v:str", PartitionSpec())
|
|
937
|
-
df_eq(
|
|
944
|
+
self.df_eq(
|
|
938
945
|
res,
|
|
939
946
|
[[1, "_01,_12"], [3, "_00,_11"]],
|
|
940
947
|
"a:int,v:str",
|
|
@@ -942,10 +949,10 @@ class ExecutionEngineTests(object):
|
|
|
942
949
|
)
|
|
943
950
|
|
|
944
951
|
res = e.comap(z4, comap, "v:str", PartitionSpec())
|
|
945
|
-
df_eq(res, [["_03,_12"]], "v:str", throw=True)
|
|
952
|
+
self.df_eq(res, [["_03,_12"]], "v:str", throw=True)
|
|
946
953
|
|
|
947
954
|
res = e.comap(z5, comap, "a:int,v:str", PartitionSpec())
|
|
948
|
-
df_eq(
|
|
955
|
+
self.df_eq(
|
|
949
956
|
res,
|
|
950
957
|
[[1, "_02,_11"], [3, "_01,_10"], [7, "_00,_11"]],
|
|
951
958
|
"a:int,v:str",
|
|
@@ -983,7 +990,7 @@ class ExecutionEngineTests(object):
|
|
|
983
990
|
PartitionSpec(),
|
|
984
991
|
on_init=on_init,
|
|
985
992
|
)
|
|
986
|
-
df_eq(res, [[1, "x2,y1"]], "a:int,v:str", throw=True)
|
|
993
|
+
self.df_eq(res, [[1, "x2,y1"]], "a:int,v:str", throw=True)
|
|
987
994
|
|
|
988
995
|
res = e.comap(
|
|
989
996
|
z2,
|
|
@@ -992,7 +999,7 @@ class ExecutionEngineTests(object):
|
|
|
992
999
|
PartitionSpec(),
|
|
993
1000
|
on_init=on_init,
|
|
994
1001
|
)
|
|
995
|
-
df_eq(res, [[1, "x2,y1,z1"]], "a:int,v:str", throw=True)
|
|
1002
|
+
self.df_eq(res, [[1, "x2,y1,z1"]], "a:int,v:str", throw=True)
|
|
996
1003
|
|
|
997
1004
|
res = e.comap(
|
|
998
1005
|
z3,
|
|
@@ -1001,7 +1008,7 @@ class ExecutionEngineTests(object):
|
|
|
1001
1008
|
PartitionSpec(),
|
|
1002
1009
|
on_init=on_init,
|
|
1003
1010
|
)
|
|
1004
|
-
df_eq(res, [[1, "z1"]], "a:int,v:str", throw=True)
|
|
1011
|
+
self.df_eq(res, [[1, "z1"]], "a:int,v:str", throw=True)
|
|
1005
1012
|
|
|
1006
1013
|
@pytest.fixture(autouse=True)
|
|
1007
1014
|
def init_tmpdir(self, tmpdir):
|
|
@@ -1015,20 +1022,20 @@ class ExecutionEngineTests(object):
|
|
|
1015
1022
|
fa.save(b, path, format_hint="parquet", force_single=True)
|
|
1016
1023
|
assert isfile(path)
|
|
1017
1024
|
c = fa.load(path, format_hint="parquet", columns=["a", "c"], as_fugue=True)
|
|
1018
|
-
df_eq(c, [[1, 6], [7, 2]], "a:long,c:int", throw=True)
|
|
1025
|
+
self.df_eq(c, [[1, 6], [7, 2]], "a:long,c:int", throw=True)
|
|
1019
1026
|
|
|
1020
1027
|
# overwirte single with folder (if applicable)
|
|
1021
1028
|
b = ArrayDataFrame([[60, 1], [20, 7]], "c:int,a:long")
|
|
1022
1029
|
fa.save(b, path, format_hint="parquet", mode="overwrite")
|
|
1023
1030
|
c = fa.load(path, format_hint="parquet", columns=["a", "c"], as_fugue=True)
|
|
1024
|
-
df_eq(c, [[1, 60], [7, 20]], "a:long,c:int", throw=True)
|
|
1031
|
+
self.df_eq(c, [[1, 60], [7, 20]], "a:long,c:int", throw=True)
|
|
1025
1032
|
|
|
1026
1033
|
def test_save_and_load_parquet(self):
|
|
1027
1034
|
b = ArrayDataFrame([[6, 1], [2, 7]], "c:int,a:long")
|
|
1028
1035
|
path = os.path.join(self.tmpdir, "a", "b")
|
|
1029
1036
|
fa.save(b, path, format_hint="parquet")
|
|
1030
1037
|
c = fa.load(path, format_hint="parquet", columns=["a", "c"], as_fugue=True)
|
|
1031
|
-
df_eq(c, [[1, 6], [7, 2]], "a:long,c:int", throw=True)
|
|
1038
|
+
self.df_eq(c, [[1, 6], [7, 2]], "a:long,c:int", throw=True)
|
|
1032
1039
|
|
|
1033
1040
|
def test_load_parquet_folder(self):
|
|
1034
1041
|
native = NativeExecutionEngine()
|
|
@@ -1039,7 +1046,7 @@ class ExecutionEngineTests(object):
|
|
|
1039
1046
|
fa.save(b, os.path.join(path, "b.parquet"), engine=native)
|
|
1040
1047
|
touch(os.path.join(path, "_SUCCESS"))
|
|
1041
1048
|
c = fa.load(path, format_hint="parquet", columns=["a", "c"], as_fugue=True)
|
|
1042
|
-
df_eq(c, [[1, 6], [7, 2], [8, 4]], "a:long,c:int", throw=True)
|
|
1049
|
+
self.df_eq(c, [[1, 6], [7, 2], [8, 4]], "a:long,c:int", throw=True)
|
|
1043
1050
|
|
|
1044
1051
|
def test_load_parquet_files(self):
|
|
1045
1052
|
native = NativeExecutionEngine()
|
|
@@ -1053,7 +1060,7 @@ class ExecutionEngineTests(object):
|
|
|
1053
1060
|
c = fa.load(
|
|
1054
1061
|
[f1, f2], format_hint="parquet", columns=["a", "c"], as_fugue=True
|
|
1055
1062
|
)
|
|
1056
|
-
df_eq(c, [[1, 6], [7, 2], [8, 4]], "a:long,c:int", throw=True)
|
|
1063
|
+
self.df_eq(c, [[1, 6], [7, 2], [8, 4]], "a:long,c:int", throw=True)
|
|
1057
1064
|
|
|
1058
1065
|
def test_save_single_and_load_csv(self):
|
|
1059
1066
|
b = ArrayDataFrame([[6.1, 1.1], [2.1, 7.1]], "c:double,a:double")
|
|
@@ -1065,12 +1072,12 @@ class ExecutionEngineTests(object):
|
|
|
1065
1072
|
c = fa.load(
|
|
1066
1073
|
path, format_hint="csv", header=True, infer_schema=False, as_fugue=True
|
|
1067
1074
|
)
|
|
1068
|
-
df_eq(c, [["6.1", "1.1"], ["2.1", "7.1"]], "c:str,a:str", throw=True)
|
|
1075
|
+
self.df_eq(c, [["6.1", "1.1"], ["2.1", "7.1"]], "c:str,a:str", throw=True)
|
|
1069
1076
|
|
|
1070
1077
|
c = fa.load(
|
|
1071
1078
|
path, format_hint="csv", header=True, infer_schema=True, as_fugue=True
|
|
1072
1079
|
)
|
|
1073
|
-
df_eq(c, [[6.1, 1.1], [2.1, 7.1]], "c:double,a:double", throw=True)
|
|
1080
|
+
self.df_eq(c, [[6.1, 1.1], [2.1, 7.1]], "c:double,a:double", throw=True)
|
|
1074
1081
|
|
|
1075
1082
|
with raises(ValueError):
|
|
1076
1083
|
c = fa.load(
|
|
@@ -1090,7 +1097,7 @@ class ExecutionEngineTests(object):
|
|
|
1090
1097
|
columns=["a", "c"],
|
|
1091
1098
|
as_fugue=True,
|
|
1092
1099
|
)
|
|
1093
|
-
df_eq(c, [["1.1", "6.1"], ["7.1", "2.1"]], "a:str,c:str", throw=True)
|
|
1100
|
+
self.df_eq(c, [["1.1", "6.1"], ["7.1", "2.1"]], "a:str,c:str", throw=True)
|
|
1094
1101
|
|
|
1095
1102
|
c = fa.load(
|
|
1096
1103
|
path,
|
|
@@ -1100,7 +1107,7 @@ class ExecutionEngineTests(object):
|
|
|
1100
1107
|
columns="a:double,c:double",
|
|
1101
1108
|
as_fugue=True,
|
|
1102
1109
|
)
|
|
1103
|
-
df_eq(c, [[1.1, 6.1], [7.1, 2.1]], "a:double,c:double", throw=True)
|
|
1110
|
+
self.df_eq(c, [[1.1, 6.1], [7.1, 2.1]], "a:double,c:double", throw=True)
|
|
1104
1111
|
|
|
1105
1112
|
# overwirte single with folder (if applicable)
|
|
1106
1113
|
b = ArrayDataFrame([[60.1, 1.1], [20.1, 7.1]], "c:double,a:double")
|
|
@@ -1113,7 +1120,7 @@ class ExecutionEngineTests(object):
|
|
|
1113
1120
|
columns=["a", "c"],
|
|
1114
1121
|
as_fugue=True,
|
|
1115
1122
|
)
|
|
1116
|
-
df_eq(c, [["1.1", "60.1"], ["7.1", "20.1"]], "a:str,c:str", throw=True)
|
|
1123
|
+
self.df_eq(c, [["1.1", "60.1"], ["7.1", "20.1"]], "a:str,c:str", throw=True)
|
|
1117
1124
|
|
|
1118
1125
|
def test_save_single_and_load_csv_no_header(self):
|
|
1119
1126
|
b = ArrayDataFrame([[6.1, 1.1], [2.1, 7.1]], "c:double,a:double")
|
|
@@ -1129,7 +1136,7 @@ class ExecutionEngineTests(object):
|
|
|
1129
1136
|
format_hint="csv",
|
|
1130
1137
|
header=False,
|
|
1131
1138
|
infer_schema=False,
|
|
1132
|
-
as_fugue=True
|
|
1139
|
+
as_fugue=True,
|
|
1133
1140
|
# when header is False, must set columns
|
|
1134
1141
|
)
|
|
1135
1142
|
|
|
@@ -1141,7 +1148,7 @@ class ExecutionEngineTests(object):
|
|
|
1141
1148
|
columns=["c", "a"],
|
|
1142
1149
|
as_fugue=True,
|
|
1143
1150
|
)
|
|
1144
|
-
df_eq(c, [["6.1", "1.1"], ["2.1", "7.1"]], "c:str,a:str", throw=True)
|
|
1151
|
+
self.df_eq(c, [["6.1", "1.1"], ["2.1", "7.1"]], "c:str,a:str", throw=True)
|
|
1145
1152
|
|
|
1146
1153
|
c = fa.load(
|
|
1147
1154
|
path,
|
|
@@ -1151,7 +1158,7 @@ class ExecutionEngineTests(object):
|
|
|
1151
1158
|
columns=["c", "a"],
|
|
1152
1159
|
as_fugue=True,
|
|
1153
1160
|
)
|
|
1154
|
-
df_eq(c, [[6.1, 1.1], [2.1, 7.1]], "c:double,a:double", throw=True)
|
|
1161
|
+
self.df_eq(c, [[6.1, 1.1], [2.1, 7.1]], "c:double,a:double", throw=True)
|
|
1155
1162
|
|
|
1156
1163
|
with raises(ValueError):
|
|
1157
1164
|
c = fa.load(
|
|
@@ -1171,7 +1178,7 @@ class ExecutionEngineTests(object):
|
|
|
1171
1178
|
columns="c:double,a:str",
|
|
1172
1179
|
as_fugue=True,
|
|
1173
1180
|
)
|
|
1174
|
-
df_eq(c, [[6.1, "1.1"], [2.1, "7.1"]], "c:double,a:str", throw=True)
|
|
1181
|
+
self.df_eq(c, [[6.1, "1.1"], [2.1, "7.1"]], "c:double,a:str", throw=True)
|
|
1175
1182
|
|
|
1176
1183
|
def test_save_and_load_csv(self):
|
|
1177
1184
|
b = ArrayDataFrame([[6.1, 1.1], [2.1, 7.1]], "c:double,a:double")
|
|
@@ -1185,7 +1192,7 @@ class ExecutionEngineTests(object):
|
|
|
1185
1192
|
columns=["a", "c"],
|
|
1186
1193
|
as_fugue=True,
|
|
1187
1194
|
)
|
|
1188
|
-
df_eq(c, [[1.1, 6.1], [7.1, 2.1]], "a:double,c:double", throw=True)
|
|
1195
|
+
self.df_eq(c, [[1.1, 6.1], [7.1, 2.1]], "a:double,c:double", throw=True)
|
|
1189
1196
|
|
|
1190
1197
|
def test_load_csv_folder(self):
|
|
1191
1198
|
native = NativeExecutionEngine()
|
|
@@ -1215,7 +1222,7 @@ class ExecutionEngineTests(object):
|
|
|
1215
1222
|
columns=["a", "c"],
|
|
1216
1223
|
as_fugue=True,
|
|
1217
1224
|
)
|
|
1218
|
-
df_eq(
|
|
1225
|
+
self.df_eq(
|
|
1219
1226
|
c, [[1.1, 6.1], [7.1, 2.1], [8.1, 4.1]], "a:double,c:double", throw=True
|
|
1220
1227
|
)
|
|
1221
1228
|
|
|
@@ -1227,13 +1234,13 @@ class ExecutionEngineTests(object):
|
|
|
1227
1234
|
fa.save(b, path, format_hint="json", force_single=True)
|
|
1228
1235
|
assert isfile(path)
|
|
1229
1236
|
c = fa.load(path, format_hint="json", columns=["a", "c"], as_fugue=True)
|
|
1230
|
-
df_eq(c, [[1, 6], [7, 2]], "a:long,c:long", throw=True)
|
|
1237
|
+
self.df_eq(c, [[1, 6], [7, 2]], "a:long,c:long", throw=True)
|
|
1231
1238
|
|
|
1232
1239
|
# overwirte single with folder (if applicable)
|
|
1233
1240
|
b = ArrayDataFrame([[60, 1], [20, 7]], "c:long,a:long")
|
|
1234
1241
|
fa.save(b, path, format_hint="json", mode="overwrite")
|
|
1235
1242
|
c = fa.load(path, format_hint="json", columns=["a", "c"], as_fugue=True)
|
|
1236
|
-
df_eq(c, [[1, 60], [7, 20]], "a:long,c:long", throw=True)
|
|
1243
|
+
self.df_eq(c, [[1, 60], [7, 20]], "a:long,c:long", throw=True)
|
|
1237
1244
|
|
|
1238
1245
|
def test_save_and_load_json(self):
|
|
1239
1246
|
e = self.engine
|
|
@@ -1245,7 +1252,7 @@ class ExecutionEngineTests(object):
|
|
|
1245
1252
|
format_hint="json",
|
|
1246
1253
|
)
|
|
1247
1254
|
c = fa.load(path, format_hint="json", columns=["a", "c"], as_fugue=True)
|
|
1248
|
-
df_eq(
|
|
1255
|
+
self.df_eq(
|
|
1249
1256
|
c, [[1, 6], [7, 2], [4, 3], [8, 4], [7, 6]], "a:long,c:long", throw=True
|
|
1250
1257
|
)
|
|
1251
1258
|
|
|
@@ -1258,7 +1265,7 @@ class ExecutionEngineTests(object):
|
|
|
1258
1265
|
fa.save(b, os.path.join(path, "b.json"), format_hint="json", engine=native)
|
|
1259
1266
|
touch(os.path.join(path, "_SUCCESS"))
|
|
1260
1267
|
c = fa.load(path, format_hint="json", columns=["a", "c"], as_fugue=True)
|
|
1261
|
-
df_eq(c, [[1, 6], [7, 2], [8, 4], [4, 3]], "a:long,c:long", throw=True)
|
|
1268
|
+
self.df_eq(c, [[1, 6], [7, 2], [8, 4], [4, 3]], "a:long,c:long", throw=True)
|
|
1262
1269
|
|
|
1263
1270
|
def test_engine_api(self):
|
|
1264
1271
|
# complimentary tests not covered by the other tests
|
|
@@ -1271,7 +1278,7 @@ class ExecutionEngineTests(object):
|
|
|
1271
1278
|
assert fa.is_df(df3) and not isinstance(df3, DataFrame)
|
|
1272
1279
|
df4 = fa.union(df1, df2, as_fugue=True)
|
|
1273
1280
|
assert isinstance(df4, DataFrame)
|
|
1274
|
-
df_eq(df4, fa.as_pandas(df3), throw=True)
|
|
1281
|
+
self.df_eq(df4, fa.as_pandas(df3), throw=True)
|
|
1275
1282
|
|
|
1276
1283
|
|
|
1277
1284
|
def select_top(cursor, data):
|