fugue 0.8.7.dev8__py3-none-any.whl → 0.9.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fugue/collections/sql.py +1 -1
- fugue/dataframe/utils.py +4 -18
- fugue/test/__init__.py +11 -0
- fugue/test/pandas_tester.py +24 -0
- fugue/test/plugins.py +393 -0
- {fugue-0.8.7.dev8.dist-info → fugue-0.9.0.dist-info}/METADATA +24 -15
- {fugue-0.8.7.dev8.dist-info → fugue-0.9.0.dist-info}/RECORD +38 -47
- {fugue-0.8.7.dev8.dist-info → fugue-0.9.0.dist-info}/WHEEL +1 -1
- fugue-0.9.0.dist-info/entry_points.txt +12 -0
- fugue_dask/_io.py +8 -5
- fugue_dask/_utils.py +4 -4
- fugue_dask/execution_engine.py +11 -0
- fugue_dask/registry.py +2 -0
- fugue_dask/tester.py +24 -0
- fugue_duckdb/__init__.py +0 -5
- fugue_duckdb/_io.py +1 -0
- fugue_duckdb/registry.py +30 -2
- fugue_duckdb/tester.py +49 -0
- fugue_ibis/__init__.py +0 -3
- fugue_ibis/dataframe.py +2 -2
- fugue_ibis/execution_engine.py +14 -7
- fugue_ray/_constants.py +3 -4
- fugue_ray/_utils/dataframe.py +10 -21
- fugue_ray/_utils/io.py +38 -9
- fugue_ray/execution_engine.py +1 -2
- fugue_ray/registry.py +1 -0
- fugue_ray/tester.py +22 -0
- fugue_spark/execution_engine.py +5 -5
- fugue_spark/registry.py +13 -1
- fugue_spark/tester.py +78 -0
- fugue_test/__init__.py +82 -0
- fugue_test/builtin_suite.py +26 -43
- fugue_test/dataframe_suite.py +5 -14
- fugue_test/execution_suite.py +170 -143
- fugue_test/fixtures.py +61 -0
- fugue_version/__init__.py +1 -1
- fugue-0.8.7.dev8.dist-info/entry_points.txt +0 -17
- fugue_dask/ibis_engine.py +0 -62
- fugue_duckdb/ibis_engine.py +0 -56
- fugue_ibis/execution/__init__.py +0 -0
- fugue_ibis/execution/ibis_engine.py +0 -49
- fugue_ibis/execution/pandas_backend.py +0 -54
- fugue_ibis/extensions.py +0 -203
- fugue_spark/ibis_engine.py +0 -45
- fugue_test/ibis_suite.py +0 -92
- fugue_test/plugins/__init__.py +0 -0
- fugue_test/plugins/dask/__init__.py +0 -2
- fugue_test/plugins/dask/fixtures.py +0 -12
- fugue_test/plugins/duckdb/__init__.py +0 -2
- fugue_test/plugins/duckdb/fixtures.py +0 -9
- fugue_test/plugins/misc/__init__.py +0 -2
- fugue_test/plugins/misc/fixtures.py +0 -18
- fugue_test/plugins/ray/__init__.py +0 -2
- fugue_test/plugins/ray/fixtures.py +0 -9
- {fugue-0.8.7.dev8.dist-info → fugue-0.9.0.dist-info}/LICENSE +0 -0
- {fugue-0.8.7.dev8.dist-info → fugue-0.9.0.dist-info}/top_level.txt +0 -0
fugue_test/execution_suite.py
CHANGED
|
@@ -10,7 +10,6 @@ import copy
|
|
|
10
10
|
import os
|
|
11
11
|
import pickle
|
|
12
12
|
from datetime import datetime
|
|
13
|
-
from unittest import TestCase
|
|
14
13
|
|
|
15
14
|
import pandas as pd
|
|
16
15
|
import pytest
|
|
@@ -20,17 +19,15 @@ from triad.utils.io import isfile, makedirs, touch
|
|
|
20
19
|
|
|
21
20
|
import fugue.api as fa
|
|
22
21
|
import fugue.column.functions as ff
|
|
22
|
+
import fugue.test as ft
|
|
23
23
|
from fugue import (
|
|
24
24
|
ArrayDataFrame,
|
|
25
25
|
DataFrame,
|
|
26
26
|
DataFrames,
|
|
27
|
-
ExecutionEngine,
|
|
28
27
|
PandasDataFrame,
|
|
29
28
|
PartitionSpec,
|
|
30
|
-
register_default_sql_engine,
|
|
31
29
|
)
|
|
32
30
|
from fugue.column import all_cols, col, lit
|
|
33
|
-
from fugue.dataframe.utils import _df_eq as df_eq
|
|
34
31
|
from fugue.execution.native_execution_engine import NativeExecutionEngine
|
|
35
32
|
|
|
36
33
|
|
|
@@ -40,25 +37,7 @@ class ExecutionEngineTests(object):
|
|
|
40
37
|
should pass this test suite.
|
|
41
38
|
"""
|
|
42
39
|
|
|
43
|
-
class Tests(
|
|
44
|
-
@classmethod
|
|
45
|
-
def setUpClass(cls):
|
|
46
|
-
register_default_sql_engine(lambda engine: engine.sql_engine)
|
|
47
|
-
cls._engine = cls.make_engine(cls)
|
|
48
|
-
fa.set_global_engine(cls._engine)
|
|
49
|
-
|
|
50
|
-
@property
|
|
51
|
-
def engine(self) -> ExecutionEngine:
|
|
52
|
-
return self._engine # type: ignore
|
|
53
|
-
|
|
54
|
-
@classmethod
|
|
55
|
-
def tearDownClass(cls):
|
|
56
|
-
fa.clear_global_engine()
|
|
57
|
-
cls._engine.stop()
|
|
58
|
-
|
|
59
|
-
def make_engine(self) -> ExecutionEngine: # pragma: no cover
|
|
60
|
-
raise NotImplementedError
|
|
61
|
-
|
|
40
|
+
class Tests(ft.FugueTestSuite):
|
|
62
41
|
def test_init(self):
|
|
63
42
|
print(self.engine)
|
|
64
43
|
assert self.engine.log is not None
|
|
@@ -76,19 +55,19 @@ class ExecutionEngineTests(object):
|
|
|
76
55
|
)
|
|
77
56
|
# all engines should accept these types of inputs
|
|
78
57
|
# should take fugue.DataFrame
|
|
79
|
-
df_eq(o, fa.as_fugue_engine_df(e, o), throw=True)
|
|
58
|
+
self.df_eq(o, fa.as_fugue_engine_df(e, o), throw=True)
|
|
80
59
|
# should take array, shema
|
|
81
|
-
df_eq(
|
|
60
|
+
self.df_eq(
|
|
82
61
|
o,
|
|
83
62
|
fa.as_fugue_engine_df(e, [[1.1, 2.2], [3.3, 4.4]], "a:double,b:double"),
|
|
84
63
|
throw=True,
|
|
85
64
|
)
|
|
86
65
|
# should take pandas dataframe
|
|
87
66
|
pdf = pd.DataFrame([[1.1, 2.2], [3.3, 4.4]], columns=["a", "b"])
|
|
88
|
-
df_eq(o, fa.as_fugue_engine_df(e, pdf), throw=True)
|
|
67
|
+
self.df_eq(o, fa.as_fugue_engine_df(e, pdf), throw=True)
|
|
89
68
|
|
|
90
69
|
# should convert string to datetime in to_df
|
|
91
|
-
df_eq(
|
|
70
|
+
self.df_eq(
|
|
92
71
|
fa.as_fugue_engine_df(e, [["2020-01-01"]], "a:datetime"),
|
|
93
72
|
[[datetime(2020, 1, 1)]],
|
|
94
73
|
"a:datetime",
|
|
@@ -99,7 +78,7 @@ class ExecutionEngineTests(object):
|
|
|
99
78
|
o = ArrayDataFrame([], "a:double,b:str")
|
|
100
79
|
pdf = pd.DataFrame([[0.1, "a"]], columns=["a", "b"])
|
|
101
80
|
pdf = pdf[pdf.a < 0]
|
|
102
|
-
df_eq(o, fa.as_fugue_engine_df(e, pdf), throw=True)
|
|
81
|
+
self.df_eq(o, fa.as_fugue_engine_df(e, pdf), throw=True)
|
|
103
82
|
|
|
104
83
|
@pytest.mark.skipif(not HAS_QPD, reason="qpd not working")
|
|
105
84
|
def test_filter(self):
|
|
@@ -108,11 +87,11 @@ class ExecutionEngineTests(object):
|
|
|
108
87
|
"a:double,b:int",
|
|
109
88
|
)
|
|
110
89
|
b = fa.filter(a, col("a").not_null())
|
|
111
|
-
df_eq(b, [[1, 2], [3, 4]], "a:double,b:int", throw=True)
|
|
90
|
+
self.df_eq(b, [[1, 2], [3, 4]], "a:double,b:int", throw=True)
|
|
112
91
|
c = fa.filter(a, col("a").not_null() & (col("b") < 3))
|
|
113
|
-
df_eq(c, [[1, 2]], "a:double,b:int", throw=True)
|
|
92
|
+
self.df_eq(c, [[1, 2]], "a:double,b:int", throw=True)
|
|
114
93
|
c = fa.filter(a, col("a") + col("b") == 3)
|
|
115
|
-
df_eq(c, [[1, 2]], "a:double,b:int", throw=True)
|
|
94
|
+
self.df_eq(c, [[1, 2]], "a:double,b:int", throw=True)
|
|
116
95
|
|
|
117
96
|
@pytest.mark.skipif(not HAS_QPD, reason="qpd not working")
|
|
118
97
|
def test_select(self):
|
|
@@ -122,7 +101,7 @@ class ExecutionEngineTests(object):
|
|
|
122
101
|
|
|
123
102
|
# simple
|
|
124
103
|
b = fa.select(a, col("b"), (col("b") + 1).alias("c").cast(str))
|
|
125
|
-
df_eq(
|
|
104
|
+
self.df_eq(
|
|
126
105
|
b,
|
|
127
106
|
[[2, "3"], [2, "3"], [1, "2"], [4, "5"], [4, "5"]],
|
|
128
107
|
"b:int,c:str",
|
|
@@ -133,7 +112,7 @@ class ExecutionEngineTests(object):
|
|
|
133
112
|
b = fa.select(
|
|
134
113
|
a, col("b"), (col("b") + 1).alias("c").cast(str), distinct=True
|
|
135
114
|
)
|
|
136
|
-
df_eq(
|
|
115
|
+
self.df_eq(
|
|
137
116
|
b,
|
|
138
117
|
[[2, "3"], [1, "2"], [4, "5"]],
|
|
139
118
|
"b:int,c:str",
|
|
@@ -142,11 +121,11 @@ class ExecutionEngineTests(object):
|
|
|
142
121
|
|
|
143
122
|
# wildcard
|
|
144
123
|
b = fa.select(a, all_cols(), where=col("a") + col("b") == 3)
|
|
145
|
-
df_eq(b, [[1, 2]], "a:double,b:int", throw=True)
|
|
124
|
+
self.df_eq(b, [[1, 2]], "a:double,b:int", throw=True)
|
|
146
125
|
|
|
147
126
|
# aggregation
|
|
148
127
|
b = fa.select(a, col("a"), ff.sum(col("b")).cast(float).alias("b"))
|
|
149
|
-
df_eq(b, [[1, 2], [3, 4], [None, 7]], "a:double,b:double", throw=True)
|
|
128
|
+
self.df_eq(b, [[1, 2], [3, 4], [None, 7]], "a:double,b:double", throw=True)
|
|
150
129
|
|
|
151
130
|
# having
|
|
152
131
|
# https://github.com/fugue-project/fugue/issues/222
|
|
@@ -157,7 +136,7 @@ class ExecutionEngineTests(object):
|
|
|
157
136
|
col_b.cast(float).alias("c"),
|
|
158
137
|
having=(col_b >= 7) | (col("a") == 1),
|
|
159
138
|
)
|
|
160
|
-
df_eq(b, [[1, 2], [None, 7]], "a:double,c:double", throw=True)
|
|
139
|
+
self.df_eq(b, [[1, 2], [None, 7]], "a:double,c:double", throw=True)
|
|
161
140
|
|
|
162
141
|
# literal + alias inference
|
|
163
142
|
# https://github.com/fugue-project/fugue/issues/222
|
|
@@ -169,7 +148,7 @@ class ExecutionEngineTests(object):
|
|
|
169
148
|
col_b.cast(float).alias("c"),
|
|
170
149
|
having=(col_b >= 7) | (col("a") == 1),
|
|
171
150
|
)
|
|
172
|
-
df_eq(
|
|
151
|
+
self.df_eq(
|
|
173
152
|
b, [[1, "1", 2], [None, "1", 7]], "a:double,o:str,c:double", throw=True
|
|
174
153
|
)
|
|
175
154
|
|
|
@@ -180,7 +159,7 @@ class ExecutionEngineTests(object):
|
|
|
180
159
|
)
|
|
181
160
|
|
|
182
161
|
b = fa.assign(a, x=1, b=col("b").cast(str), c=(col("b") + 1).cast(int))
|
|
183
|
-
df_eq(
|
|
162
|
+
self.df_eq(
|
|
184
163
|
b,
|
|
185
164
|
[
|
|
186
165
|
[1, "2", 1, 3],
|
|
@@ -204,7 +183,7 @@ class ExecutionEngineTests(object):
|
|
|
204
183
|
b=ff.max(col("b")),
|
|
205
184
|
c=(ff.max(col("b")) * 2).cast("int32").alias("c"),
|
|
206
185
|
)
|
|
207
|
-
df_eq(b, [[4, 8]], "b:int,c:int", throw=True)
|
|
186
|
+
self.df_eq(b, [[4, 8]], "b:int,c:int", throw=True)
|
|
208
187
|
|
|
209
188
|
b = fa.aggregate(
|
|
210
189
|
a,
|
|
@@ -212,7 +191,7 @@ class ExecutionEngineTests(object):
|
|
|
212
191
|
b=ff.max(col("b")),
|
|
213
192
|
c=(ff.max(col("b")) * 2).cast("int32").alias("c"),
|
|
214
193
|
)
|
|
215
|
-
df_eq(
|
|
194
|
+
self.df_eq(
|
|
216
195
|
b,
|
|
217
196
|
[[None, 4, 8], [1, 2, 4], [3, 4, 8]],
|
|
218
197
|
"a:double,b:int,c:int",
|
|
@@ -241,17 +220,17 @@ class ExecutionEngineTests(object):
|
|
|
241
220
|
a = fa.as_fugue_engine_df(e, o)
|
|
242
221
|
# no partition
|
|
243
222
|
c = e.map_engine.map_dataframe(a, noop, a.schema, PartitionSpec())
|
|
244
|
-
df_eq(c, o, throw=True)
|
|
223
|
+
self.df_eq(c, o, throw=True)
|
|
245
224
|
# with key partition
|
|
246
225
|
c = e.map_engine.map_dataframe(
|
|
247
226
|
a, noop, a.schema, PartitionSpec(by=["a"], presort="b")
|
|
248
227
|
)
|
|
249
|
-
df_eq(c, o, throw=True)
|
|
228
|
+
self.df_eq(c, o, throw=True)
|
|
250
229
|
# select top
|
|
251
230
|
c = e.map_engine.map_dataframe(
|
|
252
231
|
a, select_top, a.schema, PartitionSpec(by=["a"], presort="b")
|
|
253
232
|
)
|
|
254
|
-
df_eq(c, [[None, 1], [1, 2], [3, 4]], "a:double,b:int", throw=True)
|
|
233
|
+
self.df_eq(c, [[None, 1], [1, 2], [3, 4]], "a:double,b:int", throw=True)
|
|
255
234
|
# select top with another order
|
|
256
235
|
c = e.map_engine.map_dataframe(
|
|
257
236
|
a,
|
|
@@ -259,7 +238,7 @@ class ExecutionEngineTests(object):
|
|
|
259
238
|
a.schema,
|
|
260
239
|
PartitionSpec(partition_by=["a"], presort="b DESC"),
|
|
261
240
|
)
|
|
262
|
-
df_eq(
|
|
241
|
+
self.df_eq(
|
|
263
242
|
c,
|
|
264
243
|
[[None, 4], [1, 2], [3, 4]],
|
|
265
244
|
"a:double,b:int",
|
|
@@ -273,7 +252,7 @@ class ExecutionEngineTests(object):
|
|
|
273
252
|
PartitionSpec(partition_by=["a"], presort="b DESC", num_partitions=3),
|
|
274
253
|
on_init=on_init,
|
|
275
254
|
)
|
|
276
|
-
df_eq(c, [[None, 4], [1, 2], [3, 4]], "a:double,b:int", throw=True)
|
|
255
|
+
self.df_eq(c, [[None, 4], [1, 2], [3, 4]], "a:double,b:int", throw=True)
|
|
277
256
|
|
|
278
257
|
def test_map_with_special_values(self):
|
|
279
258
|
def with_nat(cursor, data):
|
|
@@ -290,7 +269,7 @@ class ExecutionEngineTests(object):
|
|
|
290
269
|
c = e.map_engine.map_dataframe(
|
|
291
270
|
o, select_top, o.schema, PartitionSpec(by=["a", "b"], presort="c")
|
|
292
271
|
)
|
|
293
|
-
df_eq(
|
|
272
|
+
self.df_eq(
|
|
294
273
|
c,
|
|
295
274
|
[[1, None, 0], [None, None, 2]],
|
|
296
275
|
"a:double,b:double,c:int",
|
|
@@ -311,7 +290,7 @@ class ExecutionEngineTests(object):
|
|
|
311
290
|
c = e.map_engine.map_dataframe(
|
|
312
291
|
o, select_top, o.schema, PartitionSpec(by=["a", "c"], presort="b DESC")
|
|
313
292
|
)
|
|
314
|
-
df_eq(
|
|
293
|
+
self.df_eq(
|
|
315
294
|
c,
|
|
316
295
|
[[None, 4, None], [dt, 5, 1]],
|
|
317
296
|
"a:datetime,b:int,c:double",
|
|
@@ -320,7 +299,7 @@ class ExecutionEngineTests(object):
|
|
|
320
299
|
d = e.map_engine.map_dataframe(
|
|
321
300
|
c, with_nat, "a:datetime,b:int,c:double,nat:datetime", PartitionSpec()
|
|
322
301
|
)
|
|
323
|
-
df_eq(
|
|
302
|
+
self.df_eq(
|
|
324
303
|
d,
|
|
325
304
|
[[None, 4, None, None], [dt, 5, 1, None]],
|
|
326
305
|
"a:datetime,b:int,c:double,nat:datetime",
|
|
@@ -331,7 +310,7 @@ class ExecutionEngineTests(object):
|
|
|
331
310
|
c = e.map_engine.map_dataframe(
|
|
332
311
|
o, select_top, o.schema, PartitionSpec(by=["a"])
|
|
333
312
|
)
|
|
334
|
-
df_eq(c, o, check_order=True, throw=True)
|
|
313
|
+
self.df_eq(c, o, check_order=True, throw=True)
|
|
335
314
|
|
|
336
315
|
def test_map_with_dict_col(self):
|
|
337
316
|
e = self.engine
|
|
@@ -341,7 +320,7 @@ class ExecutionEngineTests(object):
|
|
|
341
320
|
c = e.map_engine.map_dataframe(
|
|
342
321
|
o, select_top, o.schema, PartitionSpec(by=["a"])
|
|
343
322
|
)
|
|
344
|
-
df_eq(c, o, no_pandas=True, check_order=True, throw=True)
|
|
323
|
+
self.df_eq(c, o, no_pandas=True, check_order=True, throw=True)
|
|
345
324
|
|
|
346
325
|
# input has dict, output doesn't
|
|
347
326
|
def mp2(cursor, data):
|
|
@@ -350,7 +329,7 @@ class ExecutionEngineTests(object):
|
|
|
350
329
|
c = e.map_engine.map_dataframe(
|
|
351
330
|
o, mp2, "a:datetime", PartitionSpec(by=["a"])
|
|
352
331
|
)
|
|
353
|
-
df_eq(
|
|
332
|
+
self.df_eq(
|
|
354
333
|
c,
|
|
355
334
|
PandasDataFrame([[dt]], "a:datetime"),
|
|
356
335
|
no_pandas=True,
|
|
@@ -365,7 +344,7 @@ class ExecutionEngineTests(object):
|
|
|
365
344
|
c = e.map_engine.map_dataframe(
|
|
366
345
|
c, mp3, "a:datetime,b:{a:long}", PartitionSpec(by=["a"])
|
|
367
346
|
)
|
|
368
|
-
df_eq(c, o, no_pandas=True, check_order=True, throw=True)
|
|
347
|
+
self.df_eq(c, o, no_pandas=True, check_order=True, throw=True)
|
|
369
348
|
|
|
370
349
|
def test_map_with_binary(self):
|
|
371
350
|
e = self.engine
|
|
@@ -381,7 +360,7 @@ class ExecutionEngineTests(object):
|
|
|
381
360
|
],
|
|
382
361
|
"a:bytes",
|
|
383
362
|
)
|
|
384
|
-
df_eq(expected, c, no_pandas=True, check_order=
|
|
363
|
+
self.df_eq(expected, c, no_pandas=True, check_order=False, throw=True)
|
|
385
364
|
|
|
386
365
|
def test_join_multiple(self):
|
|
387
366
|
e = self.engine
|
|
@@ -389,7 +368,7 @@ class ExecutionEngineTests(object):
|
|
|
389
368
|
b = fa.as_fugue_engine_df(e, [[1, 20], [3, 40]], "a:int,c:int")
|
|
390
369
|
c = fa.as_fugue_engine_df(e, [[1, 200], [3, 400]], "a:int,d:int")
|
|
391
370
|
d = fa.inner_join(a, b, c)
|
|
392
|
-
df_eq(
|
|
371
|
+
self.df_eq(
|
|
393
372
|
d,
|
|
394
373
|
[[1, 2, 20, 200], [3, 4, 40, 400]],
|
|
395
374
|
"a:int,b:int,c:int,d:int",
|
|
@@ -401,7 +380,7 @@ class ExecutionEngineTests(object):
|
|
|
401
380
|
a = fa.as_fugue_engine_df(e, [[1, 2], [3, 4]], "a:int,b:int")
|
|
402
381
|
b = fa.as_fugue_engine_df(e, [[6], [7]], "c:int")
|
|
403
382
|
c = fa.join(a, b, how="Cross")
|
|
404
|
-
df_eq(
|
|
383
|
+
self.df_eq(
|
|
405
384
|
c,
|
|
406
385
|
[[1, 2, 6], [1, 2, 7], [3, 4, 6], [3, 4, 7]],
|
|
407
386
|
"a:int,b:int,c:int",
|
|
@@ -410,26 +389,26 @@ class ExecutionEngineTests(object):
|
|
|
410
389
|
|
|
411
390
|
b = fa.as_fugue_engine_df(e, [], "c:int")
|
|
412
391
|
c = fa.cross_join(a, b)
|
|
413
|
-
df_eq(c, [], "a:int,b:int,c:int", throw=True)
|
|
392
|
+
self.df_eq(c, [], "a:int,b:int,c:int", throw=True)
|
|
414
393
|
|
|
415
394
|
a = fa.as_fugue_engine_df(e, [], "a:int,b:int")
|
|
416
395
|
b = fa.as_fugue_engine_df(e, [], "c:int")
|
|
417
396
|
c = fa.join(a, b, how="Cross")
|
|
418
|
-
df_eq(c, [], "a:int,b:int,c:int", throw=True)
|
|
397
|
+
self.df_eq(c, [], "a:int,b:int,c:int", throw=True)
|
|
419
398
|
|
|
420
399
|
def test__join_inner(self):
|
|
421
400
|
e = self.engine
|
|
422
401
|
a = fa.as_fugue_engine_df(e, [[1, 2], [3, 4]], "a:int,b:int")
|
|
423
402
|
b = fa.as_fugue_engine_df(e, [[6, 1], [2, 7]], "c:int,a:int")
|
|
424
403
|
c = fa.join(a, b, how="INNER", on=["a"])
|
|
425
|
-
df_eq(c, [[1, 2, 6]], "a:int,b:int,c:int", throw=True)
|
|
404
|
+
self.df_eq(c, [[1, 2, 6]], "a:int,b:int,c:int", throw=True)
|
|
426
405
|
c = fa.inner_join(b, a)
|
|
427
|
-
df_eq(c, [[6, 1, 2]], "c:int,a:int,b:int", throw=True)
|
|
406
|
+
self.df_eq(c, [[6, 1, 2]], "c:int,a:int,b:int", throw=True)
|
|
428
407
|
|
|
429
408
|
a = fa.as_fugue_engine_df(e, [], "a:int,b:int")
|
|
430
409
|
b = fa.as_fugue_engine_df(e, [], "c:int,a:int")
|
|
431
410
|
c = fa.join(a, b, how="INNER", on=["a"])
|
|
432
|
-
df_eq(c, [], "a:int,b:int,c:int", throw=True)
|
|
411
|
+
self.df_eq(c, [], "a:int,b:int,c:int", throw=True)
|
|
433
412
|
|
|
434
413
|
def test__join_outer(self):
|
|
435
414
|
e = self.engine
|
|
@@ -437,34 +416,38 @@ class ExecutionEngineTests(object):
|
|
|
437
416
|
a = fa.as_fugue_engine_df(e, [], "a:int,b:int")
|
|
438
417
|
b = fa.as_fugue_engine_df(e, [], "c:str,a:int")
|
|
439
418
|
c = fa.left_outer_join(a, b)
|
|
440
|
-
df_eq(c, [], "a:int,b:int,c:str", throw=True)
|
|
419
|
+
self.df_eq(c, [], "a:int,b:int,c:str", throw=True)
|
|
441
420
|
|
|
442
421
|
a = fa.as_fugue_engine_df(e, [], "a:int,b:str")
|
|
443
422
|
b = fa.as_fugue_engine_df(e, [], "c:int,a:int")
|
|
444
423
|
c = fa.right_outer_join(a, b)
|
|
445
|
-
df_eq(c, [], "a:int,b:str,c:int", throw=True)
|
|
424
|
+
self.df_eq(c, [], "a:int,b:str,c:int", throw=True)
|
|
446
425
|
|
|
447
426
|
a = fa.as_fugue_engine_df(e, [], "a:int,b:str")
|
|
448
427
|
b = fa.as_fugue_engine_df(e, [], "c:str,a:int")
|
|
449
428
|
c = fa.full_outer_join(a, b)
|
|
450
|
-
df_eq(c, [], "a:int,b:str,c:str", throw=True)
|
|
429
|
+
self.df_eq(c, [], "a:int,b:str,c:str", throw=True)
|
|
451
430
|
|
|
452
431
|
a = fa.as_fugue_engine_df(e, [[1, "2"], [3, "4"]], "a:int,b:str")
|
|
453
432
|
b = fa.as_fugue_engine_df(e, [["6", 1], ["2", 7]], "c:str,a:int")
|
|
454
433
|
c = fa.join(a, b, how="left_OUTER", on=["a"])
|
|
455
|
-
df_eq(
|
|
434
|
+
self.df_eq(
|
|
435
|
+
c, [[1, "2", "6"], [3, "4", None]], "a:int,b:str,c:str", throw=True
|
|
436
|
+
)
|
|
456
437
|
c = fa.join(b, a, how="left_outer", on=["a"])
|
|
457
|
-
df_eq(
|
|
438
|
+
self.df_eq(
|
|
439
|
+
c, [["6", 1, "2"], ["2", 7, None]], "c:str,a:int,b:str", throw=True
|
|
440
|
+
)
|
|
458
441
|
|
|
459
442
|
a = fa.as_fugue_engine_df(e, [[1, "2"], [3, "4"]], "a:int,b:str")
|
|
460
443
|
b = fa.as_fugue_engine_df(e, [[6, 1], [2, 7]], "c:double,a:int")
|
|
461
444
|
c = fa.join(a, b, how="left_OUTER", on=["a"])
|
|
462
|
-
df_eq(
|
|
445
|
+
self.df_eq(
|
|
463
446
|
c, [[1, "2", 6.0], [3, "4", None]], "a:int,b:str,c:double", throw=True
|
|
464
447
|
)
|
|
465
448
|
c = fa.join(b, a, how="left_outer", on=["a"])
|
|
466
449
|
# assert c.as_pandas().values.tolist()[1][2] is None
|
|
467
|
-
df_eq(
|
|
450
|
+
self.df_eq(
|
|
468
451
|
c, [[6.0, 1, "2"], [2.0, 7, None]], "c:double,a:int,b:str", throw=True
|
|
469
452
|
)
|
|
470
453
|
|
|
@@ -472,10 +455,12 @@ class ExecutionEngineTests(object):
|
|
|
472
455
|
b = fa.as_fugue_engine_df(e, [["6", 1], ["2", 7]], "c:str,a:int")
|
|
473
456
|
c = fa.join(a, b, how="right_outer", on=["a"])
|
|
474
457
|
# assert c.as_pandas().values.tolist()[1][1] is None
|
|
475
|
-
df_eq(
|
|
458
|
+
self.df_eq(
|
|
459
|
+
c, [[1, "2", "6"], [7, None, "2"]], "a:int,b:str,c:str", throw=True
|
|
460
|
+
)
|
|
476
461
|
|
|
477
462
|
c = fa.join(a, b, how="full_outer", on=["a"])
|
|
478
|
-
df_eq(
|
|
463
|
+
self.df_eq(
|
|
479
464
|
c,
|
|
480
465
|
[[1, "2", "6"], [3, "4", None], [7, None, "2"]],
|
|
481
466
|
"a:int,b:str,c:str",
|
|
@@ -488,21 +473,23 @@ class ExecutionEngineTests(object):
|
|
|
488
473
|
a = fa.as_fugue_engine_df(e, [[1, "2"], [3, "4"]], "a:int,b:str")
|
|
489
474
|
b = fa.as_fugue_engine_df(e, [[6, 1], [2, 7]], "c:int,a:int")
|
|
490
475
|
c = fa.join(a, b, how="left_OUTER", on=["a"])
|
|
491
|
-
df_eq(
|
|
476
|
+
self.df_eq(
|
|
492
477
|
c,
|
|
493
478
|
[[1, "2", 6], [3, "4", None]],
|
|
494
479
|
"a:int,b:str,c:int",
|
|
495
480
|
throw=True,
|
|
496
481
|
)
|
|
497
482
|
c = fa.join(b, a, how="left_outer", on=["a"])
|
|
498
|
-
df_eq(c, [[6, 1, "2"], [2, 7, None]], "c:int,a:int,b:str", throw=True)
|
|
483
|
+
self.df_eq(c, [[6, 1, "2"], [2, 7, None]], "c:int,a:int,b:str", throw=True)
|
|
499
484
|
|
|
500
485
|
a = fa.as_fugue_engine_df(e, [[1, "2"], [3, "4"]], "a:int,b:str")
|
|
501
486
|
b = fa.as_fugue_engine_df(e, [[True, 1], [False, 7]], "c:bool,a:int")
|
|
502
487
|
c = fa.join(a, b, how="left_OUTER", on=["a"])
|
|
503
|
-
df_eq(
|
|
488
|
+
self.df_eq(
|
|
489
|
+
c, [[1, "2", True], [3, "4", None]], "a:int,b:str,c:bool", throw=True
|
|
490
|
+
)
|
|
504
491
|
c = fa.join(b, a, how="left_outer", on=["a"])
|
|
505
|
-
df_eq(
|
|
492
|
+
self.df_eq(
|
|
506
493
|
c, [[True, 1, "2"], [False, 7, None]], "c:bool,a:int,b:str", throw=True
|
|
507
494
|
)
|
|
508
495
|
|
|
@@ -511,36 +498,36 @@ class ExecutionEngineTests(object):
|
|
|
511
498
|
a = fa.as_fugue_engine_df(e, [[1, 2], [3, 4]], "a:int,b:int")
|
|
512
499
|
b = fa.as_fugue_engine_df(e, [[6, 1], [2, 7]], "c:int,a:int")
|
|
513
500
|
c = fa.join(a, b, how="semi", on=["a"])
|
|
514
|
-
df_eq(c, [[1, 2]], "a:int,b:int", throw=True)
|
|
501
|
+
self.df_eq(c, [[1, 2]], "a:int,b:int", throw=True)
|
|
515
502
|
c = fa.semi_join(b, a)
|
|
516
|
-
df_eq(c, [[6, 1]], "c:int,a:int", throw=True)
|
|
503
|
+
self.df_eq(c, [[6, 1]], "c:int,a:int", throw=True)
|
|
517
504
|
|
|
518
505
|
b = fa.as_fugue_engine_df(e, [], "c:int,a:int")
|
|
519
506
|
c = fa.join(a, b, how="semi", on=["a"])
|
|
520
|
-
df_eq(c, [], "a:int,b:int", throw=True)
|
|
507
|
+
self.df_eq(c, [], "a:int,b:int", throw=True)
|
|
521
508
|
|
|
522
509
|
a = fa.as_fugue_engine_df(e, [], "a:int,b:int")
|
|
523
510
|
b = fa.as_fugue_engine_df(e, [], "c:int,a:int")
|
|
524
511
|
c = fa.join(a, b, how="semi", on=["a"])
|
|
525
|
-
df_eq(c, [], "a:int,b:int", throw=True)
|
|
512
|
+
self.df_eq(c, [], "a:int,b:int", throw=True)
|
|
526
513
|
|
|
527
514
|
def test__join_anti(self):
|
|
528
515
|
e = self.engine
|
|
529
516
|
a = fa.as_fugue_engine_df(e, [[1, 2], [3, 4]], "a:int,b:int")
|
|
530
517
|
b = fa.as_fugue_engine_df(e, [[6, 1], [2, 7]], "c:int,a:int")
|
|
531
518
|
c = fa.join(a, b, how="anti", on=["a"])
|
|
532
|
-
df_eq(c, [[3, 4]], "a:int,b:int", throw=True)
|
|
519
|
+
self.df_eq(c, [[3, 4]], "a:int,b:int", throw=True)
|
|
533
520
|
c = fa.anti_join(b, a)
|
|
534
|
-
df_eq(c, [[2, 7]], "c:int,a:int", throw=True)
|
|
521
|
+
self.df_eq(c, [[2, 7]], "c:int,a:int", throw=True)
|
|
535
522
|
|
|
536
523
|
b = fa.as_fugue_engine_df(e, [], "c:int,a:int")
|
|
537
524
|
c = fa.join(a, b, how="anti", on=["a"])
|
|
538
|
-
df_eq(c, [[1, 2], [3, 4]], "a:int,b:int", throw=True)
|
|
525
|
+
self.df_eq(c, [[1, 2], [3, 4]], "a:int,b:int", throw=True)
|
|
539
526
|
|
|
540
527
|
a = fa.as_fugue_engine_df(e, [], "a:int,b:int")
|
|
541
528
|
b = fa.as_fugue_engine_df(e, [], "c:int,a:int")
|
|
542
529
|
c = fa.join(a, b, how="anti", on=["a"])
|
|
543
|
-
df_eq(c, [], "a:int,b:int", throw=True)
|
|
530
|
+
self.df_eq(c, [], "a:int,b:int", throw=True)
|
|
544
531
|
|
|
545
532
|
def test__join_with_null_keys(self):
|
|
546
533
|
# SQL will not match null values
|
|
@@ -552,7 +539,7 @@ class ExecutionEngineTests(object):
|
|
|
552
539
|
e, [[1, 2, 33], [4, None, 63]], "a:double,b:double,d:int"
|
|
553
540
|
)
|
|
554
541
|
c = fa.join(a, b, how="INNER")
|
|
555
|
-
df_eq(c, [[1, 2, 3, 33]], "a:double,b:double,c:int,d:int", throw=True)
|
|
542
|
+
self.df_eq(c, [[1, 2, 3, 33]], "a:double,b:double,c:int,d:int", throw=True)
|
|
556
543
|
|
|
557
544
|
def test_union(self):
|
|
558
545
|
e = self.engine
|
|
@@ -563,21 +550,21 @@ class ExecutionEngineTests(object):
|
|
|
563
550
|
e, [[1, 2, 33], [4, None, 6]], "a:double,b:double,c:int"
|
|
564
551
|
)
|
|
565
552
|
c = fa.union(a, b)
|
|
566
|
-
df_eq(
|
|
553
|
+
self.df_eq(
|
|
567
554
|
c,
|
|
568
555
|
[[1, 2, 3], [4, None, 6], [1, 2, 33]],
|
|
569
556
|
"a:double,b:double,c:int",
|
|
570
557
|
throw=True,
|
|
571
558
|
)
|
|
572
559
|
c = fa.union(a, b, distinct=False)
|
|
573
|
-
df_eq(
|
|
560
|
+
self.df_eq(
|
|
574
561
|
c,
|
|
575
562
|
[[1, 2, 3], [4, None, 6], [1, 2, 33], [4, None, 6]],
|
|
576
563
|
"a:double,b:double,c:int",
|
|
577
564
|
throw=True,
|
|
578
565
|
)
|
|
579
566
|
d = fa.union(a, b, c, distinct=False)
|
|
580
|
-
df_eq(
|
|
567
|
+
self.df_eq(
|
|
581
568
|
d,
|
|
582
569
|
[
|
|
583
570
|
[1, 2, 3],
|
|
@@ -602,7 +589,7 @@ class ExecutionEngineTests(object):
|
|
|
602
589
|
e, [[1, 2, 33], [4, None, 6]], "a:double,b:double,c:int"
|
|
603
590
|
)
|
|
604
591
|
c = fa.subtract(a, b)
|
|
605
|
-
df_eq(
|
|
592
|
+
self.df_eq(
|
|
606
593
|
c,
|
|
607
594
|
[[1, 2, 3]],
|
|
608
595
|
"a:double,b:double,c:int",
|
|
@@ -611,7 +598,7 @@ class ExecutionEngineTests(object):
|
|
|
611
598
|
x = fa.as_fugue_engine_df(e, [[1, 2, 33]], "a:double,b:double,c:int")
|
|
612
599
|
y = fa.as_fugue_engine_df(e, [[4, None, 6]], "a:double,b:double,c:int")
|
|
613
600
|
z = fa.subtract(a, x, y)
|
|
614
|
-
df_eq(
|
|
601
|
+
self.df_eq(
|
|
615
602
|
z,
|
|
616
603
|
[[1, 2, 3]],
|
|
617
604
|
"a:double,b:double,c:int",
|
|
@@ -619,7 +606,7 @@ class ExecutionEngineTests(object):
|
|
|
619
606
|
)
|
|
620
607
|
# TODO: EXCEPT ALL is not implemented (QPD issue)
|
|
621
608
|
# c = fa.subtract(a, b, distinct=False)
|
|
622
|
-
# df_eq(
|
|
609
|
+
# self.df_eq(
|
|
623
610
|
# c,
|
|
624
611
|
# [[1, 2, 3], [1, 2, 3]],
|
|
625
612
|
# "a:double,b:double,c:int",
|
|
@@ -637,7 +624,7 @@ class ExecutionEngineTests(object):
|
|
|
637
624
|
"a:double,b:double,c:int",
|
|
638
625
|
)
|
|
639
626
|
c = fa.intersect(a, b)
|
|
640
|
-
df_eq(
|
|
627
|
+
self.df_eq(
|
|
641
628
|
c,
|
|
642
629
|
[[4, None, 6]],
|
|
643
630
|
"a:double,b:double,c:int",
|
|
@@ -654,7 +641,7 @@ class ExecutionEngineTests(object):
|
|
|
654
641
|
"a:double,b:double,c:int",
|
|
655
642
|
)
|
|
656
643
|
z = fa.intersect(a, x, y)
|
|
657
|
-
df_eq(
|
|
644
|
+
self.df_eq(
|
|
658
645
|
z,
|
|
659
646
|
[],
|
|
660
647
|
"a:double,b:double,c:int",
|
|
@@ -662,7 +649,7 @@ class ExecutionEngineTests(object):
|
|
|
662
649
|
)
|
|
663
650
|
# TODO: INTERSECT ALL is not implemented (QPD issue)
|
|
664
651
|
# c = fa.intersect(a, b, distinct=False)
|
|
665
|
-
# df_eq(
|
|
652
|
+
# self.df_eq(
|
|
666
653
|
# c,
|
|
667
654
|
# [[4, None, 6], [4, None, 6]],
|
|
668
655
|
# "a:double,b:double,c:int",
|
|
@@ -675,7 +662,7 @@ class ExecutionEngineTests(object):
|
|
|
675
662
|
e, [[4, None, 6], [1, 2, 3], [4, None, 6]], "a:double,b:double,c:int"
|
|
676
663
|
)
|
|
677
664
|
c = fa.distinct(a)
|
|
678
|
-
df_eq(
|
|
665
|
+
self.df_eq(
|
|
679
666
|
c,
|
|
680
667
|
[[4, None, 6], [1, 2, 3]],
|
|
681
668
|
"a:double,b:double,c:int",
|
|
@@ -694,25 +681,25 @@ class ExecutionEngineTests(object):
|
|
|
694
681
|
f = fa.dropna(a, how="any", thresh=2)
|
|
695
682
|
g = fa.dropna(a, how="any", subset=["a", "c"])
|
|
696
683
|
h = fa.dropna(a, how="any", thresh=1, subset=["a", "c"])
|
|
697
|
-
df_eq(
|
|
684
|
+
self.df_eq(
|
|
698
685
|
c,
|
|
699
686
|
[[1, 2, 3]],
|
|
700
687
|
"a:double,b:double,c:double",
|
|
701
688
|
throw=True,
|
|
702
689
|
)
|
|
703
|
-
df_eq(
|
|
690
|
+
self.df_eq(
|
|
704
691
|
d,
|
|
705
692
|
[[4, None, 6], [1, 2, 3], [4, None, None]],
|
|
706
693
|
"a:double,b:double,c:double",
|
|
707
694
|
throw=True,
|
|
708
695
|
)
|
|
709
|
-
df_eq(
|
|
696
|
+
self.df_eq(
|
|
710
697
|
f, [[4, None, 6], [1, 2, 3]], "a:double,b:double,c:double", throw=True
|
|
711
698
|
)
|
|
712
|
-
df_eq(
|
|
699
|
+
self.df_eq(
|
|
713
700
|
g, [[4, None, 6], [1, 2, 3]], "a:double,b:double,c:double", throw=True
|
|
714
701
|
)
|
|
715
|
-
df_eq(
|
|
702
|
+
self.df_eq(
|
|
716
703
|
h,
|
|
717
704
|
[[4, None, 6], [1, 2, 3], [4, None, None]],
|
|
718
705
|
"a:double,b:double,c:double",
|
|
@@ -730,25 +717,25 @@ class ExecutionEngineTests(object):
|
|
|
730
717
|
d = fa.fillna(a, {"b": 99, "c": -99})
|
|
731
718
|
f = fa.fillna(a, value=-99, subset=["c"])
|
|
732
719
|
g = fa.fillna(a, {"b": 99, "c": -99}, subset=["c"]) # subset ignored
|
|
733
|
-
df_eq(
|
|
720
|
+
self.df_eq(
|
|
734
721
|
c,
|
|
735
722
|
[[4, 1, 6], [1, 2, 3], [4, 1, 1]],
|
|
736
723
|
"a:double,b:double,c:double",
|
|
737
724
|
throw=True,
|
|
738
725
|
)
|
|
739
|
-
df_eq(
|
|
726
|
+
self.df_eq(
|
|
740
727
|
d,
|
|
741
728
|
[[4, 99, 6], [1, 2, 3], [4, 99, -99]],
|
|
742
729
|
"a:double,b:double,c:double",
|
|
743
730
|
throw=True,
|
|
744
731
|
)
|
|
745
|
-
df_eq(
|
|
732
|
+
self.df_eq(
|
|
746
733
|
f,
|
|
747
734
|
[[4, None, 6], [1, 2, 3], [4, None, -99]],
|
|
748
735
|
"a:double,b:double,c:double",
|
|
749
736
|
throw=True,
|
|
750
737
|
)
|
|
751
|
-
df_eq(g, d, throw=True)
|
|
738
|
+
self.df_eq(g, d, throw=True)
|
|
752
739
|
raises(ValueError, lambda: fa.fillna(a, {"b": None, c: "99"}))
|
|
753
740
|
raises(ValueError, lambda: fa.fillna(a, None))
|
|
754
741
|
# raises(ValueError, lambda: fa.fillna(a, ["b"]))
|
|
@@ -767,9 +754,9 @@ class ExecutionEngineTests(object):
|
|
|
767
754
|
h = fa.sample(a, frac=0.8, seed=1)
|
|
768
755
|
h2 = fa.sample(a, frac=0.8, seed=1)
|
|
769
756
|
i = fa.sample(a, frac=0.8, seed=2)
|
|
770
|
-
assert not df_eq(f, g, throw=False)
|
|
771
|
-
df_eq(h, h2, throw=True)
|
|
772
|
-
assert not df_eq(h, i, throw=False)
|
|
757
|
+
assert not self.df_eq(f, g, throw=False)
|
|
758
|
+
self.df_eq(h, h2, throw=True)
|
|
759
|
+
assert not self.df_eq(h, i, throw=False)
|
|
773
760
|
assert abs(len(i.as_array()) - 80) < 10
|
|
774
761
|
|
|
775
762
|
def test_take(self):
|
|
@@ -794,37 +781,37 @@ class ExecutionEngineTests(object):
|
|
|
794
781
|
f = fa.take(a, n=1, presort=None, partition=ps2)
|
|
795
782
|
g = fa.take(a, n=2, presort="a desc", na_position="last")
|
|
796
783
|
h = fa.take(a, n=2, presort="a", na_position="first")
|
|
797
|
-
df_eq(
|
|
784
|
+
self.df_eq(
|
|
798
785
|
b,
|
|
799
786
|
[[None, 4, 2]],
|
|
800
787
|
"a:str,b:int,c:long",
|
|
801
788
|
throw=True,
|
|
802
789
|
)
|
|
803
|
-
df_eq(
|
|
790
|
+
self.df_eq(
|
|
804
791
|
c,
|
|
805
792
|
[[None, 4, 2], [None, 2, 1]],
|
|
806
793
|
"a:str,b:int,c:long",
|
|
807
794
|
throw=True,
|
|
808
795
|
)
|
|
809
|
-
df_eq(
|
|
796
|
+
self.df_eq(
|
|
810
797
|
d,
|
|
811
798
|
[["a", 3, 4], ["b", 2, 2], [None, 4, 2]],
|
|
812
799
|
"a:str,b:int,c:long",
|
|
813
800
|
throw=True,
|
|
814
801
|
)
|
|
815
|
-
df_eq(
|
|
802
|
+
self.df_eq(
|
|
816
803
|
f,
|
|
817
804
|
[["a", 2, 3], ["a", 3, 4], ["b", 1, 2], [None, 2, 1]],
|
|
818
805
|
"a:str,b:int,c:long",
|
|
819
806
|
throw=True,
|
|
820
807
|
)
|
|
821
|
-
df_eq(
|
|
808
|
+
self.df_eq(
|
|
822
809
|
g,
|
|
823
810
|
[["b", 1, 2], ["b", 2, 2]],
|
|
824
811
|
"a:str,b:int,c:long",
|
|
825
812
|
throw=True,
|
|
826
813
|
)
|
|
827
|
-
df_eq(
|
|
814
|
+
self.df_eq(
|
|
828
815
|
h,
|
|
829
816
|
[
|
|
830
817
|
[None, 4, 2],
|
|
@@ -833,6 +820,46 @@ class ExecutionEngineTests(object):
|
|
|
833
820
|
"a:str,b:int,c:long",
|
|
834
821
|
throw=True,
|
|
835
822
|
)
|
|
823
|
+
a = fa.as_fugue_engine_df(
|
|
824
|
+
e,
|
|
825
|
+
[
|
|
826
|
+
["a", 2, 3],
|
|
827
|
+
[None, 4, 2],
|
|
828
|
+
[None, 2, 1],
|
|
829
|
+
],
|
|
830
|
+
"a:str,b:int,c:long",
|
|
831
|
+
)
|
|
832
|
+
i = fa.take(a, n=1, partition="a", presort=None)
|
|
833
|
+
case1 = self.df_eq(
|
|
834
|
+
i,
|
|
835
|
+
[
|
|
836
|
+
["a", 2, 3],
|
|
837
|
+
[None, 4, 2],
|
|
838
|
+
],
|
|
839
|
+
"a:str,b:int,c:long",
|
|
840
|
+
throw=False,
|
|
841
|
+
)
|
|
842
|
+
case2 = self.df_eq(
|
|
843
|
+
i,
|
|
844
|
+
[
|
|
845
|
+
["a", 2, 3],
|
|
846
|
+
[None, 2, 1],
|
|
847
|
+
],
|
|
848
|
+
"a:str,b:int,c:long",
|
|
849
|
+
throw=False,
|
|
850
|
+
)
|
|
851
|
+
assert case1 or case2
|
|
852
|
+
j = fa.take(a, n=2, partition="a", presort=None)
|
|
853
|
+
self.df_eq(
|
|
854
|
+
j,
|
|
855
|
+
[
|
|
856
|
+
["a", 2, 3],
|
|
857
|
+
[None, 4, 2],
|
|
858
|
+
[None, 2, 1],
|
|
859
|
+
],
|
|
860
|
+
"a:str,b:int,c:long",
|
|
861
|
+
throw=True,
|
|
862
|
+
)
|
|
836
863
|
raises(ValueError, lambda: fa.take(a, n=0.5, presort=None))
|
|
837
864
|
|
|
838
865
|
def test_sample_n(self):
|
|
@@ -844,9 +871,9 @@ class ExecutionEngineTests(object):
|
|
|
844
871
|
d = fa.sample(a, n=90, seed=1)
|
|
845
872
|
d2 = fa.sample(a, n=90, seed=1)
|
|
846
873
|
e = fa.sample(a, n=90, seed=2)
|
|
847
|
-
assert not df_eq(b, c, throw=False)
|
|
848
|
-
df_eq(d, d2, throw=True)
|
|
849
|
-
assert not df_eq(d, e, throw=False)
|
|
874
|
+
assert not self.df_eq(b, c, throw=False)
|
|
875
|
+
self.df_eq(d, d2, throw=True)
|
|
876
|
+
assert not self.df_eq(d, e, throw=False)
|
|
850
877
|
assert abs(len(e.as_array()) - 90) < 2
|
|
851
878
|
|
|
852
879
|
def test_comap(self):
|
|
@@ -902,11 +929,11 @@ class ExecutionEngineTests(object):
|
|
|
902
929
|
PartitionSpec(),
|
|
903
930
|
on_init=on_init,
|
|
904
931
|
)
|
|
905
|
-
df_eq(res, [[1, "_02,_11"]], "a:int,v:str", throw=True)
|
|
932
|
+
self.df_eq(res, [[1, "_02,_11"]], "a:int,v:str", throw=True)
|
|
906
933
|
|
|
907
934
|
# for outer joins, the NULL will be filled with empty dataframe
|
|
908
935
|
res = e.comap(z2, comap, "a:int,v:str", PartitionSpec())
|
|
909
|
-
df_eq(
|
|
936
|
+
self.df_eq(
|
|
910
937
|
res,
|
|
911
938
|
[[1, "_02,_11"], [3, "_01,_10"]],
|
|
912
939
|
"a:int,v:str",
|
|
@@ -914,7 +941,7 @@ class ExecutionEngineTests(object):
|
|
|
914
941
|
)
|
|
915
942
|
|
|
916
943
|
res = e.comap(z3, comap, "a:int,v:str", PartitionSpec())
|
|
917
|
-
df_eq(
|
|
944
|
+
self.df_eq(
|
|
918
945
|
res,
|
|
919
946
|
[[1, "_01,_12"], [3, "_00,_11"]],
|
|
920
947
|
"a:int,v:str",
|
|
@@ -922,10 +949,10 @@ class ExecutionEngineTests(object):
|
|
|
922
949
|
)
|
|
923
950
|
|
|
924
951
|
res = e.comap(z4, comap, "v:str", PartitionSpec())
|
|
925
|
-
df_eq(res, [["_03,_12"]], "v:str", throw=True)
|
|
952
|
+
self.df_eq(res, [["_03,_12"]], "v:str", throw=True)
|
|
926
953
|
|
|
927
954
|
res = e.comap(z5, comap, "a:int,v:str", PartitionSpec())
|
|
928
|
-
df_eq(
|
|
955
|
+
self.df_eq(
|
|
929
956
|
res,
|
|
930
957
|
[[1, "_02,_11"], [3, "_01,_10"], [7, "_00,_11"]],
|
|
931
958
|
"a:int,v:str",
|
|
@@ -963,7 +990,7 @@ class ExecutionEngineTests(object):
|
|
|
963
990
|
PartitionSpec(),
|
|
964
991
|
on_init=on_init,
|
|
965
992
|
)
|
|
966
|
-
df_eq(res, [[1, "x2,y1"]], "a:int,v:str", throw=True)
|
|
993
|
+
self.df_eq(res, [[1, "x2,y1"]], "a:int,v:str", throw=True)
|
|
967
994
|
|
|
968
995
|
res = e.comap(
|
|
969
996
|
z2,
|
|
@@ -972,7 +999,7 @@ class ExecutionEngineTests(object):
|
|
|
972
999
|
PartitionSpec(),
|
|
973
1000
|
on_init=on_init,
|
|
974
1001
|
)
|
|
975
|
-
df_eq(res, [[1, "x2,y1,z1"]], "a:int,v:str", throw=True)
|
|
1002
|
+
self.df_eq(res, [[1, "x2,y1,z1"]], "a:int,v:str", throw=True)
|
|
976
1003
|
|
|
977
1004
|
res = e.comap(
|
|
978
1005
|
z3,
|
|
@@ -981,10 +1008,10 @@ class ExecutionEngineTests(object):
|
|
|
981
1008
|
PartitionSpec(),
|
|
982
1009
|
on_init=on_init,
|
|
983
1010
|
)
|
|
984
|
-
df_eq(res, [[1, "z1"]], "a:int,v:str", throw=True)
|
|
1011
|
+
self.df_eq(res, [[1, "z1"]], "a:int,v:str", throw=True)
|
|
985
1012
|
|
|
986
1013
|
@pytest.fixture(autouse=True)
|
|
987
|
-
def init_tmpdir(self, tmpdir
|
|
1014
|
+
def init_tmpdir(self, tmpdir):
|
|
988
1015
|
self.tmpdir = tmpdir
|
|
989
1016
|
|
|
990
1017
|
def test_save_single_and_load_parquet(self):
|
|
@@ -995,20 +1022,20 @@ class ExecutionEngineTests(object):
|
|
|
995
1022
|
fa.save(b, path, format_hint="parquet", force_single=True)
|
|
996
1023
|
assert isfile(path)
|
|
997
1024
|
c = fa.load(path, format_hint="parquet", columns=["a", "c"], as_fugue=True)
|
|
998
|
-
df_eq(c, [[1, 6], [7, 2]], "a:long,c:int", throw=True)
|
|
1025
|
+
self.df_eq(c, [[1, 6], [7, 2]], "a:long,c:int", throw=True)
|
|
999
1026
|
|
|
1000
1027
|
# overwirte single with folder (if applicable)
|
|
1001
1028
|
b = ArrayDataFrame([[60, 1], [20, 7]], "c:int,a:long")
|
|
1002
1029
|
fa.save(b, path, format_hint="parquet", mode="overwrite")
|
|
1003
1030
|
c = fa.load(path, format_hint="parquet", columns=["a", "c"], as_fugue=True)
|
|
1004
|
-
df_eq(c, [[1, 60], [7, 20]], "a:long,c:int", throw=True)
|
|
1031
|
+
self.df_eq(c, [[1, 60], [7, 20]], "a:long,c:int", throw=True)
|
|
1005
1032
|
|
|
1006
1033
|
def test_save_and_load_parquet(self):
|
|
1007
1034
|
b = ArrayDataFrame([[6, 1], [2, 7]], "c:int,a:long")
|
|
1008
1035
|
path = os.path.join(self.tmpdir, "a", "b")
|
|
1009
1036
|
fa.save(b, path, format_hint="parquet")
|
|
1010
1037
|
c = fa.load(path, format_hint="parquet", columns=["a", "c"], as_fugue=True)
|
|
1011
|
-
df_eq(c, [[1, 6], [7, 2]], "a:long,c:int", throw=True)
|
|
1038
|
+
self.df_eq(c, [[1, 6], [7, 2]], "a:long,c:int", throw=True)
|
|
1012
1039
|
|
|
1013
1040
|
def test_load_parquet_folder(self):
|
|
1014
1041
|
native = NativeExecutionEngine()
|
|
@@ -1019,7 +1046,7 @@ class ExecutionEngineTests(object):
|
|
|
1019
1046
|
fa.save(b, os.path.join(path, "b.parquet"), engine=native)
|
|
1020
1047
|
touch(os.path.join(path, "_SUCCESS"))
|
|
1021
1048
|
c = fa.load(path, format_hint="parquet", columns=["a", "c"], as_fugue=True)
|
|
1022
|
-
df_eq(c, [[1, 6], [7, 2], [8, 4]], "a:long,c:int", throw=True)
|
|
1049
|
+
self.df_eq(c, [[1, 6], [7, 2], [8, 4]], "a:long,c:int", throw=True)
|
|
1023
1050
|
|
|
1024
1051
|
def test_load_parquet_files(self):
|
|
1025
1052
|
native = NativeExecutionEngine()
|
|
@@ -1033,7 +1060,7 @@ class ExecutionEngineTests(object):
|
|
|
1033
1060
|
c = fa.load(
|
|
1034
1061
|
[f1, f2], format_hint="parquet", columns=["a", "c"], as_fugue=True
|
|
1035
1062
|
)
|
|
1036
|
-
df_eq(c, [[1, 6], [7, 2], [8, 4]], "a:long,c:int", throw=True)
|
|
1063
|
+
self.df_eq(c, [[1, 6], [7, 2], [8, 4]], "a:long,c:int", throw=True)
|
|
1037
1064
|
|
|
1038
1065
|
def test_save_single_and_load_csv(self):
|
|
1039
1066
|
b = ArrayDataFrame([[6.1, 1.1], [2.1, 7.1]], "c:double,a:double")
|
|
@@ -1045,12 +1072,12 @@ class ExecutionEngineTests(object):
|
|
|
1045
1072
|
c = fa.load(
|
|
1046
1073
|
path, format_hint="csv", header=True, infer_schema=False, as_fugue=True
|
|
1047
1074
|
)
|
|
1048
|
-
df_eq(c, [["6.1", "1.1"], ["2.1", "7.1"]], "c:str,a:str", throw=True)
|
|
1075
|
+
self.df_eq(c, [["6.1", "1.1"], ["2.1", "7.1"]], "c:str,a:str", throw=True)
|
|
1049
1076
|
|
|
1050
1077
|
c = fa.load(
|
|
1051
1078
|
path, format_hint="csv", header=True, infer_schema=True, as_fugue=True
|
|
1052
1079
|
)
|
|
1053
|
-
df_eq(c, [[6.1, 1.1], [2.1, 7.1]], "c:double,a:double", throw=True)
|
|
1080
|
+
self.df_eq(c, [[6.1, 1.1], [2.1, 7.1]], "c:double,a:double", throw=True)
|
|
1054
1081
|
|
|
1055
1082
|
with raises(ValueError):
|
|
1056
1083
|
c = fa.load(
|
|
@@ -1070,7 +1097,7 @@ class ExecutionEngineTests(object):
|
|
|
1070
1097
|
columns=["a", "c"],
|
|
1071
1098
|
as_fugue=True,
|
|
1072
1099
|
)
|
|
1073
|
-
df_eq(c, [["1.1", "6.1"], ["7.1", "2.1"]], "a:str,c:str", throw=True)
|
|
1100
|
+
self.df_eq(c, [["1.1", "6.1"], ["7.1", "2.1"]], "a:str,c:str", throw=True)
|
|
1074
1101
|
|
|
1075
1102
|
c = fa.load(
|
|
1076
1103
|
path,
|
|
@@ -1080,7 +1107,7 @@ class ExecutionEngineTests(object):
|
|
|
1080
1107
|
columns="a:double,c:double",
|
|
1081
1108
|
as_fugue=True,
|
|
1082
1109
|
)
|
|
1083
|
-
df_eq(c, [[1.1, 6.1], [7.1, 2.1]], "a:double,c:double", throw=True)
|
|
1110
|
+
self.df_eq(c, [[1.1, 6.1], [7.1, 2.1]], "a:double,c:double", throw=True)
|
|
1084
1111
|
|
|
1085
1112
|
# overwirte single with folder (if applicable)
|
|
1086
1113
|
b = ArrayDataFrame([[60.1, 1.1], [20.1, 7.1]], "c:double,a:double")
|
|
@@ -1093,7 +1120,7 @@ class ExecutionEngineTests(object):
|
|
|
1093
1120
|
columns=["a", "c"],
|
|
1094
1121
|
as_fugue=True,
|
|
1095
1122
|
)
|
|
1096
|
-
df_eq(c, [["1.1", "60.1"], ["7.1", "20.1"]], "a:str,c:str", throw=True)
|
|
1123
|
+
self.df_eq(c, [["1.1", "60.1"], ["7.1", "20.1"]], "a:str,c:str", throw=True)
|
|
1097
1124
|
|
|
1098
1125
|
def test_save_single_and_load_csv_no_header(self):
|
|
1099
1126
|
b = ArrayDataFrame([[6.1, 1.1], [2.1, 7.1]], "c:double,a:double")
|
|
@@ -1109,7 +1136,7 @@ class ExecutionEngineTests(object):
|
|
|
1109
1136
|
format_hint="csv",
|
|
1110
1137
|
header=False,
|
|
1111
1138
|
infer_schema=False,
|
|
1112
|
-
as_fugue=True
|
|
1139
|
+
as_fugue=True,
|
|
1113
1140
|
# when header is False, must set columns
|
|
1114
1141
|
)
|
|
1115
1142
|
|
|
@@ -1121,7 +1148,7 @@ class ExecutionEngineTests(object):
|
|
|
1121
1148
|
columns=["c", "a"],
|
|
1122
1149
|
as_fugue=True,
|
|
1123
1150
|
)
|
|
1124
|
-
df_eq(c, [["6.1", "1.1"], ["2.1", "7.1"]], "c:str,a:str", throw=True)
|
|
1151
|
+
self.df_eq(c, [["6.1", "1.1"], ["2.1", "7.1"]], "c:str,a:str", throw=True)
|
|
1125
1152
|
|
|
1126
1153
|
c = fa.load(
|
|
1127
1154
|
path,
|
|
@@ -1131,7 +1158,7 @@ class ExecutionEngineTests(object):
|
|
|
1131
1158
|
columns=["c", "a"],
|
|
1132
1159
|
as_fugue=True,
|
|
1133
1160
|
)
|
|
1134
|
-
df_eq(c, [[6.1, 1.1], [2.1, 7.1]], "c:double,a:double", throw=True)
|
|
1161
|
+
self.df_eq(c, [[6.1, 1.1], [2.1, 7.1]], "c:double,a:double", throw=True)
|
|
1135
1162
|
|
|
1136
1163
|
with raises(ValueError):
|
|
1137
1164
|
c = fa.load(
|
|
@@ -1151,7 +1178,7 @@ class ExecutionEngineTests(object):
|
|
|
1151
1178
|
columns="c:double,a:str",
|
|
1152
1179
|
as_fugue=True,
|
|
1153
1180
|
)
|
|
1154
|
-
df_eq(c, [[6.1, "1.1"], [2.1, "7.1"]], "c:double,a:str", throw=True)
|
|
1181
|
+
self.df_eq(c, [[6.1, "1.1"], [2.1, "7.1"]], "c:double,a:str", throw=True)
|
|
1155
1182
|
|
|
1156
1183
|
def test_save_and_load_csv(self):
|
|
1157
1184
|
b = ArrayDataFrame([[6.1, 1.1], [2.1, 7.1]], "c:double,a:double")
|
|
@@ -1165,7 +1192,7 @@ class ExecutionEngineTests(object):
|
|
|
1165
1192
|
columns=["a", "c"],
|
|
1166
1193
|
as_fugue=True,
|
|
1167
1194
|
)
|
|
1168
|
-
df_eq(c, [[1.1, 6.1], [7.1, 2.1]], "a:double,c:double", throw=True)
|
|
1195
|
+
self.df_eq(c, [[1.1, 6.1], [7.1, 2.1]], "a:double,c:double", throw=True)
|
|
1169
1196
|
|
|
1170
1197
|
def test_load_csv_folder(self):
|
|
1171
1198
|
native = NativeExecutionEngine()
|
|
@@ -1195,7 +1222,7 @@ class ExecutionEngineTests(object):
|
|
|
1195
1222
|
columns=["a", "c"],
|
|
1196
1223
|
as_fugue=True,
|
|
1197
1224
|
)
|
|
1198
|
-
df_eq(
|
|
1225
|
+
self.df_eq(
|
|
1199
1226
|
c, [[1.1, 6.1], [7.1, 2.1], [8.1, 4.1]], "a:double,c:double", throw=True
|
|
1200
1227
|
)
|
|
1201
1228
|
|
|
@@ -1207,13 +1234,13 @@ class ExecutionEngineTests(object):
|
|
|
1207
1234
|
fa.save(b, path, format_hint="json", force_single=True)
|
|
1208
1235
|
assert isfile(path)
|
|
1209
1236
|
c = fa.load(path, format_hint="json", columns=["a", "c"], as_fugue=True)
|
|
1210
|
-
df_eq(c, [[1, 6], [7, 2]], "a:long,c:long", throw=True)
|
|
1237
|
+
self.df_eq(c, [[1, 6], [7, 2]], "a:long,c:long", throw=True)
|
|
1211
1238
|
|
|
1212
1239
|
# overwirte single with folder (if applicable)
|
|
1213
1240
|
b = ArrayDataFrame([[60, 1], [20, 7]], "c:long,a:long")
|
|
1214
1241
|
fa.save(b, path, format_hint="json", mode="overwrite")
|
|
1215
1242
|
c = fa.load(path, format_hint="json", columns=["a", "c"], as_fugue=True)
|
|
1216
|
-
df_eq(c, [[1, 60], [7, 20]], "a:long,c:long", throw=True)
|
|
1243
|
+
self.df_eq(c, [[1, 60], [7, 20]], "a:long,c:long", throw=True)
|
|
1217
1244
|
|
|
1218
1245
|
def test_save_and_load_json(self):
|
|
1219
1246
|
e = self.engine
|
|
@@ -1225,7 +1252,7 @@ class ExecutionEngineTests(object):
|
|
|
1225
1252
|
format_hint="json",
|
|
1226
1253
|
)
|
|
1227
1254
|
c = fa.load(path, format_hint="json", columns=["a", "c"], as_fugue=True)
|
|
1228
|
-
df_eq(
|
|
1255
|
+
self.df_eq(
|
|
1229
1256
|
c, [[1, 6], [7, 2], [4, 3], [8, 4], [7, 6]], "a:long,c:long", throw=True
|
|
1230
1257
|
)
|
|
1231
1258
|
|
|
@@ -1238,7 +1265,7 @@ class ExecutionEngineTests(object):
|
|
|
1238
1265
|
fa.save(b, os.path.join(path, "b.json"), format_hint="json", engine=native)
|
|
1239
1266
|
touch(os.path.join(path, "_SUCCESS"))
|
|
1240
1267
|
c = fa.load(path, format_hint="json", columns=["a", "c"], as_fugue=True)
|
|
1241
|
-
df_eq(c, [[1, 6], [7, 2], [8, 4], [4, 3]], "a:long,c:long", throw=True)
|
|
1268
|
+
self.df_eq(c, [[1, 6], [7, 2], [8, 4], [4, 3]], "a:long,c:long", throw=True)
|
|
1242
1269
|
|
|
1243
1270
|
def test_engine_api(self):
|
|
1244
1271
|
# complimentary tests not covered by the other tests
|
|
@@ -1251,7 +1278,7 @@ class ExecutionEngineTests(object):
|
|
|
1251
1278
|
assert fa.is_df(df3) and not isinstance(df3, DataFrame)
|
|
1252
1279
|
df4 = fa.union(df1, df2, as_fugue=True)
|
|
1253
1280
|
assert isinstance(df4, DataFrame)
|
|
1254
|
-
df_eq(df4, fa.as_pandas(df3), throw=True)
|
|
1281
|
+
self.df_eq(df4, fa.as_pandas(df3), throw=True)
|
|
1255
1282
|
|
|
1256
1283
|
|
|
1257
1284
|
def select_top(cursor, data):
|