fugue 0.8.7.dev7__py3-none-any.whl → 0.9.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fugue/collections/sql.py +1 -1
- fugue/dataframe/utils.py +4 -18
- fugue/test/__init__.py +11 -0
- fugue/test/pandas_tester.py +24 -0
- fugue/test/plugins.py +393 -0
- {fugue-0.8.7.dev7.dist-info → fugue-0.9.0.dist-info}/METADATA +24 -15
- {fugue-0.8.7.dev7.dist-info → fugue-0.9.0.dist-info}/RECORD +38 -47
- {fugue-0.8.7.dev7.dist-info → fugue-0.9.0.dist-info}/WHEEL +1 -1
- fugue-0.9.0.dist-info/entry_points.txt +12 -0
- fugue_dask/_io.py +8 -5
- fugue_dask/_utils.py +4 -4
- fugue_dask/execution_engine.py +11 -0
- fugue_dask/registry.py +2 -0
- fugue_dask/tester.py +24 -0
- fugue_duckdb/__init__.py +0 -5
- fugue_duckdb/_io.py +1 -0
- fugue_duckdb/registry.py +30 -2
- fugue_duckdb/tester.py +49 -0
- fugue_ibis/__init__.py +0 -3
- fugue_ibis/dataframe.py +2 -2
- fugue_ibis/execution_engine.py +14 -7
- fugue_ray/_constants.py +3 -4
- fugue_ray/_utils/dataframe.py +10 -21
- fugue_ray/_utils/io.py +38 -9
- fugue_ray/execution_engine.py +1 -2
- fugue_ray/registry.py +1 -0
- fugue_ray/tester.py +22 -0
- fugue_spark/execution_engine.py +5 -5
- fugue_spark/registry.py +13 -1
- fugue_spark/tester.py +78 -0
- fugue_test/__init__.py +82 -0
- fugue_test/builtin_suite.py +26 -43
- fugue_test/dataframe_suite.py +5 -14
- fugue_test/execution_suite.py +170 -143
- fugue_test/fixtures.py +61 -0
- fugue_version/__init__.py +1 -1
- fugue-0.8.7.dev7.dist-info/entry_points.txt +0 -17
- fugue_dask/ibis_engine.py +0 -62
- fugue_duckdb/ibis_engine.py +0 -56
- fugue_ibis/execution/__init__.py +0 -0
- fugue_ibis/execution/ibis_engine.py +0 -49
- fugue_ibis/execution/pandas_backend.py +0 -54
- fugue_ibis/extensions.py +0 -203
- fugue_spark/ibis_engine.py +0 -45
- fugue_test/ibis_suite.py +0 -92
- fugue_test/plugins/__init__.py +0 -0
- fugue_test/plugins/dask/__init__.py +0 -2
- fugue_test/plugins/dask/fixtures.py +0 -12
- fugue_test/plugins/duckdb/__init__.py +0 -2
- fugue_test/plugins/duckdb/fixtures.py +0 -9
- fugue_test/plugins/misc/__init__.py +0 -2
- fugue_test/plugins/misc/fixtures.py +0 -18
- fugue_test/plugins/ray/__init__.py +0 -2
- fugue_test/plugins/ray/fixtures.py +0 -9
- {fugue-0.8.7.dev7.dist-info → fugue-0.9.0.dist-info}/LICENSE +0 -0
- {fugue-0.8.7.dev7.dist-info → fugue-0.9.0.dist-info}/top_level.txt +0 -0
|
@@ -18,7 +18,7 @@ fugue/bag/array_bag.py,sha256=b0UdDPmZpEAI3R0SBbZVOLVLAwMQnBCFeYDEpFWen14,1111
|
|
|
18
18
|
fugue/bag/bag.py,sha256=sNBAzPmEh5fEm8ME8NEEOOre6l58ri6oouVBWwafqTc,3018
|
|
19
19
|
fugue/collections/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
20
20
|
fugue/collections/partition.py,sha256=rPkU-3y6E598Q7wvE-jTSbSwWh3fzIVxdwPpbQvWS-M,17257
|
|
21
|
-
fugue/collections/sql.py,sha256=
|
|
21
|
+
fugue/collections/sql.py,sha256=3MjnuQMPuUMq55n-EypikkRqcpOCZtOjp7S2fs7ujAA,4955
|
|
22
22
|
fugue/collections/yielded.py,sha256=KAvCXAZpeuErGww7Y217_F7M2zv9G5hfdl2AWiO7wEM,2040
|
|
23
23
|
fugue/column/__init__.py,sha256=aoZwwzyJtNL-duLxzU2sNGoaKikWd-yesbigE_Wj29s,208
|
|
24
24
|
fugue/column/expressions.py,sha256=fdGX9oPCqJBuROFZqrOYVcwkjghdXT9ngaSTG5tW_i8,26544
|
|
@@ -34,7 +34,7 @@ fugue/dataframe/dataframes.py,sha256=tBSpHsENgbcdOJ0Jgst6PTKbjG7_uoFJch96oTlaQIs
|
|
|
34
34
|
fugue/dataframe/function_wrapper.py,sha256=V1eQMOn27UroEYT7_YiwoEF0RjZYIM0zkD3vfaMAQFs,14813
|
|
35
35
|
fugue/dataframe/iterable_dataframe.py,sha256=TcOoNKa4jNbHbvAZ0XAhtMmGcioygIHPxI9budDtenQ,4758
|
|
36
36
|
fugue/dataframe/pandas_dataframe.py,sha256=0L0wYCGhD2BpQbruoT07Ox9iQM5YLHLNrcgzudc-yKs,11633
|
|
37
|
-
fugue/dataframe/utils.py,sha256=
|
|
37
|
+
fugue/dataframe/utils.py,sha256=bA_otOJt9oju1yq5gtn21L_GDT_pUgNc6luYuBIhbUQ,10488
|
|
38
38
|
fugue/dataset/__init__.py,sha256=5f2CAJ4xst6Z2o9Q2e2twfDOGUw8ZJoE2ild4JEU2pg,112
|
|
39
39
|
fugue/dataset/api.py,sha256=DacI4L2w5NJ-eZ6nFxNMqmReEnb0WUXswbjVp7BeErk,2794
|
|
40
40
|
fugue/dataset/dataset.py,sha256=jWXZqy3msMPFFkhas2PYJEX55ZAI3gk3Txq5f4-Qya4,4759
|
|
@@ -71,6 +71,9 @@ fugue/sql/_utils.py,sha256=khpjGeFCVlaqf2JIYvS4TVTJO3fe5-8bEsvy6AIP_5Q,2083
|
|
|
71
71
|
fugue/sql/_visitors.py,sha256=2pc0J-AHJAiIexsKgNjcgrCGOyhC3_7rzonSgtjy--k,33844
|
|
72
72
|
fugue/sql/api.py,sha256=l2I9CAy_W2oFFTct9fDPLyXF0LiDxQhMx5O8jBHTAxU,10050
|
|
73
73
|
fugue/sql/workflow.py,sha256=S1pOhp0b0t6johFAJWmj6xUB7Ti5LQgNABpAzmLGjrQ,3010
|
|
74
|
+
fugue/test/__init__.py,sha256=hvVrNbJYkWI_6otpILneyTjUafxURaA4obK6AoDyCUw,250
|
|
75
|
+
fugue/test/pandas_tester.py,sha256=_w6rFqlzZKjBtmFf-08a4C97W5xtqGw5XorLhj6Zyoo,622
|
|
76
|
+
fugue/test/plugins.py,sha256=GLZia5GCmy0eBVGNbIqTbX7Ou3euf2SY4litKgdigwY,12318
|
|
74
77
|
fugue/workflow/__init__.py,sha256=tXM_KYO8Q358W6qAVlwhIQIaYNRDgZtTubrIEX4QMgM,229
|
|
75
78
|
fugue/workflow/_checkpoint.py,sha256=tt5Iv7c5ZStC0MD1inItksQ0GuK0ViniA3nvrgym-5c,5681
|
|
76
79
|
fugue/workflow/_tasks.py,sha256=Zq_jXJO_VaF8DrWUuBiwO2Y3OVuhsiOQdzP4VBsp7Fo,11826
|
|
@@ -86,29 +89,25 @@ fugue_contrib/viz/__init__.py,sha256=osgZx63Br-yMZImyEfYf9MVzJNM2Cqqke_-WsuDmG5M
|
|
|
86
89
|
fugue_contrib/viz/_ext.py,sha256=Lu_DlS5DcmrFz27fHcKTCkhKyknVWcfS5kzZVVuO9xM,1345
|
|
87
90
|
fugue_dask/__init__.py,sha256=2CcJ0AsN-k_f7dZ-yAyYpaICfUMPfH3l0FvUJSBzTr0,161
|
|
88
91
|
fugue_dask/_constants.py,sha256=35UmTVITk21GhRyRlbJOwPPdQsytM_p_2NytOXEay18,510
|
|
89
|
-
fugue_dask/_io.py,sha256=
|
|
90
|
-
fugue_dask/_utils.py,sha256=
|
|
92
|
+
fugue_dask/_io.py,sha256=pl4F7mbVgP7Rwh1FFG7xfOz2TBZRUj1l3lLvDY4jOf4,6020
|
|
93
|
+
fugue_dask/_utils.py,sha256=1uplEqvpCDZDp2YdwJxa6cuGScpgG9VvN3057J02bys,8956
|
|
91
94
|
fugue_dask/dataframe.py,sha256=MuG9TqCND7qI66lPvxzuomfE7yA4sW7DjrvbyvE6XEU,13471
|
|
92
|
-
fugue_dask/execution_engine.py,sha256=
|
|
93
|
-
fugue_dask/
|
|
94
|
-
fugue_dask/
|
|
95
|
-
fugue_duckdb/__init__.py,sha256=
|
|
96
|
-
fugue_duckdb/_io.py,sha256=
|
|
95
|
+
fugue_dask/execution_engine.py,sha256=60IiwYRBVhN-pX3v6i9BZ8Pa4bcSh5UoklvCScM_XAM,21361
|
|
96
|
+
fugue_dask/registry.py,sha256=jepWKH55VWNIWV3pOF5vpCl2OpO0rI1IULx5GM2Gk6w,2274
|
|
97
|
+
fugue_dask/tester.py,sha256=E7BZjgFpJgrHsLMKzvSO5im5OwocYcratjzulJSQZl0,718
|
|
98
|
+
fugue_duckdb/__init__.py,sha256=ZzhmAWbROR1YL9Kmlt7OlwkgPZzFhsSdwLV2pFmAqGI,268
|
|
99
|
+
fugue_duckdb/_io.py,sha256=vnd8m8C6XeMCBJBbAdA5h695NMfsduQrvONyS0HcEFA,8475
|
|
97
100
|
fugue_duckdb/_utils.py,sha256=ElKbHUyn5fWSPGXsK57iqMzcqKtCf0c8pBVBYGe5Ql4,5020
|
|
98
101
|
fugue_duckdb/dask.py,sha256=agoLzeB7Swxj2kVWfmXFbWD1NS2lbbTlnrjSkR8kKWY,5014
|
|
99
102
|
fugue_duckdb/dataframe.py,sha256=LRfTv7Y46wMM_IDYSP1R-5OXuHuBg8GHjPGFFt8u7l0,8444
|
|
100
103
|
fugue_duckdb/execution_engine.py,sha256=IZDmSAtOMJGvulTStxjTmsqJyI5QRNyxBgSMlFMSrBI,20389
|
|
101
|
-
fugue_duckdb/
|
|
102
|
-
fugue_duckdb/
|
|
103
|
-
fugue_ibis/__init__.py,sha256=
|
|
104
|
+
fugue_duckdb/registry.py,sha256=9_41KO42kXqcjF4yParQ5JGyg5TckcbhH-Q2IlGpSho,3987
|
|
105
|
+
fugue_duckdb/tester.py,sha256=MzTkv3sdOwOjI59LRrSGGl4w59Njv3OArTU5kSRL-P0,1526
|
|
106
|
+
fugue_ibis/__init__.py,sha256=z7TkK7M2_0p9XO6jQATNDgT0aHXn5k69Ttz2ga-eQG8,190
|
|
104
107
|
fugue_ibis/_compat.py,sha256=zKdTaTfuC02eUIzZPkcd7oObnVBi_X5mQjQf7SDme3Y,246
|
|
105
108
|
fugue_ibis/_utils.py,sha256=BUL5swA5FE4eQu0t5Z17hZVu9a2MFfxlFH6Ymy9xifg,6607
|
|
106
|
-
fugue_ibis/dataframe.py,sha256=
|
|
107
|
-
fugue_ibis/execution_engine.py,sha256=
|
|
108
|
-
fugue_ibis/extensions.py,sha256=H8l-SPfoqLuUoILtOuL2nccOpoL83zHeSoIhoqjtWQM,6905
|
|
109
|
-
fugue_ibis/execution/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
110
|
-
fugue_ibis/execution/ibis_engine.py,sha256=-HdPnIFWD83n5WITdzJiu4attH7GOcO041wkT5Y5ChA,1499
|
|
111
|
-
fugue_ibis/execution/pandas_backend.py,sha256=r6Pfs06FXrWukSbK2uz5gsCDG6a4wrk4sWIJ9acSuNU,1615
|
|
109
|
+
fugue_ibis/dataframe.py,sha256=k4Q6qBLBIADF5YhbvaDplXO7OkMZSHuf_Wg5o-AusEI,7796
|
|
110
|
+
fugue_ibis/execution_engine.py,sha256=5I-ou5xPdomVu-srdvidvP8f7wDYbGrCV_lGffZa_ac,18679
|
|
112
111
|
fugue_notebook/__init__.py,sha256=9r_-2uxu1lBeZ8GgpYCKom_OZy2soIOYZajg7JDO-HY,4326
|
|
113
112
|
fugue_notebook/env.py,sha256=TYiTxYPFi-BVJJY49jDsvw9mddhK8WrifeRxBke30I8,4773
|
|
114
113
|
fugue_notebook/nbextension/README.md,sha256=QLnr957YeGfwzy2r4c4qbZPaXyCbyGrKPvcqSBQYSnU,123
|
|
@@ -120,20 +119,21 @@ fugue_polars/_utils.py,sha256=7rGGWgB1-VqFwh4PcBLYk_5VNjd8FNOS4TDFyDVz2sg,159
|
|
|
120
119
|
fugue_polars/polars_dataframe.py,sha256=8LQ0IB-JFFdjW2ltDzq8DfIbUC_jjjDr1YM29usJag0,8831
|
|
121
120
|
fugue_polars/registry.py,sha256=gd6qQ-OxYtTAQFyvYbLDPXmSvCR-LW6n5K5ylgMY_7A,2950
|
|
122
121
|
fugue_ray/__init__.py,sha256=HzEHfG2mpc0ugf3nf1Pdy15Bhg35K6maZpYejn1aoyI,119
|
|
123
|
-
fugue_ray/_constants.py,sha256=
|
|
122
|
+
fugue_ray/_constants.py,sha256=RHlaVKyjQnwdbo5mFO_GBtQZcz5GvWcCbkOkLfVTQ1A,565
|
|
124
123
|
fugue_ray/dataframe.py,sha256=7asw2qf9vm6vLBSzqghm9pUcNAppJOz5CkT7XyR0S5g,12514
|
|
125
|
-
fugue_ray/execution_engine.py,sha256=
|
|
126
|
-
fugue_ray/registry.py,sha256=
|
|
124
|
+
fugue_ray/execution_engine.py,sha256=PZlWbmdCwTPfZJhN2I-44JW7so8NVCFFumaKIhJLfoI,12566
|
|
125
|
+
fugue_ray/registry.py,sha256=TS-HWy2IUozp6_A0vqc8_ZdVUT_Z9yVjG6e1gbbgy2A,1757
|
|
126
|
+
fugue_ray/tester.py,sha256=oTA_xOzvQhJU3ohc4hsVpZc0zv4bwJn1c8a9u8kcuIs,537
|
|
127
127
|
fugue_ray/_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
128
128
|
fugue_ray/_utils/cluster.py,sha256=3T3Gyra6lAHlzktta-Ro35j6YZQfH6fNrj2hC5ATF9k,621
|
|
129
|
-
fugue_ray/_utils/dataframe.py,sha256=
|
|
130
|
-
fugue_ray/_utils/io.py,sha256=
|
|
129
|
+
fugue_ray/_utils/dataframe.py,sha256=5c4duGV--mdLkKrbJRgjDWvVcp9BegA3yX16pmYDYLE,3954
|
|
130
|
+
fugue_ray/_utils/io.py,sha256=3hFNDeBuh4bfCud40ZsGrGZLSvCSuxL_1VlqCTnn6RA,9794
|
|
131
131
|
fugue_spark/__init__.py,sha256=rvrMpFs9socMgyH_58gLbnAqmirBf5oidXoO4cekW6U,165
|
|
132
132
|
fugue_spark/_constants.py,sha256=K2uLQfjvMxXk75K-7_Wn47Alpwq5rW57BtECAUrOeqA,177
|
|
133
133
|
fugue_spark/dataframe.py,sha256=lYa8FizM3p_lsKYFR49FazkVZMJKyi2LABKTpP5YBLo,12006
|
|
134
|
-
fugue_spark/execution_engine.py,sha256=
|
|
135
|
-
fugue_spark/
|
|
136
|
-
fugue_spark/
|
|
134
|
+
fugue_spark/execution_engine.py,sha256=YBMtNxCvpy77xICFSg9PHMa6feNoYhWEZe8MmxznX4U,33048
|
|
135
|
+
fugue_spark/registry.py,sha256=_NmiV2cOooYK0YmqATEnNkPEMT9suUMtuecw2NNbIIk,4530
|
|
136
|
+
fugue_spark/tester.py,sha256=VX003yGNlBukaZTQSN-w7XvgSk4rqxrWQIzno0dWrXg,2481
|
|
137
137
|
fugue_spark/_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
138
138
|
fugue_spark/_utils/convert.py,sha256=eRWkDYA4UO-FQu-2y4O80WEdawx7X_rIrWg55AlOiRc,10007
|
|
139
139
|
fugue_spark/_utils/io.py,sha256=OdUezKpB29Lx9aUS2k9x0xUAGZrmgMZyQYGPEeHk7rQ,5574
|
|
@@ -141,25 +141,16 @@ fugue_spark/_utils/misc.py,sha256=o8dZmXOHnA7D_ps37vgGXTPTiSEG9LQzPKq7l-MG-qM,86
|
|
|
141
141
|
fugue_spark/_utils/partition.py,sha256=iaesyO5f4uXhj1W-p91cD5ecPiGlu0bzh8gl2ce2Uvg,3618
|
|
142
142
|
fugue_sql/__init__.py,sha256=Cmr7w0Efr7PzoXdQzdJfc4Dgqd69qKqcHZZodENq7EU,287
|
|
143
143
|
fugue_sql/exceptions.py,sha256=ltS0MC8gMnVVrJbQiOZ0kRUWvVQ2LTx33dCW3ugqtb0,260
|
|
144
|
-
fugue_test/__init__.py,sha256=
|
|
144
|
+
fugue_test/__init__.py,sha256=xoQuVobhU64uyODRdnzf6MSWe9lw5khkhpJ2atvADoc,2315
|
|
145
145
|
fugue_test/bag_suite.py,sha256=WbDCFjuAHYoJh4GXSPiSJxOoOwE1VMtYpJ3lQrsUK-Y,2483
|
|
146
|
-
fugue_test/builtin_suite.py,sha256=
|
|
147
|
-
fugue_test/dataframe_suite.py,sha256=
|
|
148
|
-
fugue_test/execution_suite.py,sha256=
|
|
149
|
-
fugue_test/
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
fugue_test/plugins/ray/__init__.py,sha256=nyKGW6xgTXtMhSs7yjgFNKO7mVboCNg63Bvdf39fO_I,55
|
|
158
|
-
fugue_test/plugins/ray/fixtures.py,sha256=hZkvuo0AcD63XJl5JUroc9tm2LWHUPszg2zzY6FCSao,141
|
|
159
|
-
fugue_version/__init__.py,sha256=vTwvdJOZi8jZb9U-Em7-d50qNDNPS2z51IXqRoojeNM,22
|
|
160
|
-
fugue-0.8.7.dev7.dist-info/LICENSE,sha256=z8d0m5b2O9McPEK1xHG_dWgUBT6EfBDz6wA0F7xSPTA,11358
|
|
161
|
-
fugue-0.8.7.dev7.dist-info/METADATA,sha256=nSp1i8apniEEe6U09_5RA8K89P40c7M5Gn9l6ofLTHQ,17860
|
|
162
|
-
fugue-0.8.7.dev7.dist-info/WHEEL,sha256=Xo9-1PvkuimrydujYJAjF7pCkriuXBpUPEjma1nZyJ0,92
|
|
163
|
-
fugue-0.8.7.dev7.dist-info/entry_points.txt,sha256=Xrl3ISyVKAFIPn1klqeGsL9DinzoYqfqBkOT4qAVBNA,578
|
|
164
|
-
fugue-0.8.7.dev7.dist-info/top_level.txt,sha256=y1eCfzGdQ1_RkgcShcfbvXs-bopD3DwJcIOxP9EFXno,140
|
|
165
|
-
fugue-0.8.7.dev7.dist-info/RECORD,,
|
|
146
|
+
fugue_test/builtin_suite.py,sha256=cOkZG6w1RHhWWxtjQhZClZQaGT6haNd576BoUmNC_cA,77960
|
|
147
|
+
fugue_test/dataframe_suite.py,sha256=7ym4sshDUly6004cq1UlppqDVtbwxD6CKxR4Lu70i0s,18994
|
|
148
|
+
fugue_test/execution_suite.py,sha256=jcSSoKqTGbeWzTxkyYU-8i2zJAjzuXn7BqE8ul-JjIc,48646
|
|
149
|
+
fugue_test/fixtures.py,sha256=8Pev-mxRZOWwTFlsGjcSZ0iIs78zyWbp5tq4KG1wyvk,1432
|
|
150
|
+
fugue_version/__init__.py,sha256=H9NWRZb7NbeRRPLP_V1fARmLNXranorVM-OOY-8_2ug,22
|
|
151
|
+
fugue-0.9.0.dist-info/LICENSE,sha256=z8d0m5b2O9McPEK1xHG_dWgUBT6EfBDz6wA0F7xSPTA,11358
|
|
152
|
+
fugue-0.9.0.dist-info/METADATA,sha256=Ce9dp1NghY5AbNc4MuMD1VeGtRsrisvAhwgVajZY-ZM,18380
|
|
153
|
+
fugue-0.9.0.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
|
154
|
+
fugue-0.9.0.dist-info/entry_points.txt,sha256=kiRuUkKOnnHFvlWpYSfVUZiXJW3hOez6gjYoOhGht3Q,302
|
|
155
|
+
fugue-0.9.0.dist-info/top_level.txt,sha256=y1eCfzGdQ1_RkgcShcfbvXs-bopD3DwJcIOxP9EFXno,140
|
|
156
|
+
fugue-0.9.0.dist-info/RECORD,,
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
[fugue.plugins]
|
|
2
|
+
dask = fugue_dask.registry [dask]
|
|
3
|
+
duckdb = fugue_duckdb.registry [duckdb]
|
|
4
|
+
ibis = fugue_ibis [ibis]
|
|
5
|
+
polars = fugue_polars.registry [polars]
|
|
6
|
+
ray = fugue_ray.registry [ray]
|
|
7
|
+
spark = fugue_spark.registry [spark]
|
|
8
|
+
|
|
9
|
+
[pytest11]
|
|
10
|
+
fugue_test = fugue_test
|
|
11
|
+
fugue_test_fixtures = fugue_test.fixtures
|
|
12
|
+
|
fugue_dask/_io.py
CHANGED
|
@@ -6,7 +6,7 @@ from fsspec import AbstractFileSystem
|
|
|
6
6
|
from triad.collections.dict import ParamDict
|
|
7
7
|
from triad.collections.schema import Schema
|
|
8
8
|
from triad.utils.assertion import assert_or_throw
|
|
9
|
-
from triad.utils.io import join, makedirs, url_to_fs
|
|
9
|
+
from triad.utils.io import isfile, join, makedirs, url_to_fs
|
|
10
10
|
|
|
11
11
|
from fugue._utils.io import FileParser, _get_single_files
|
|
12
12
|
from fugue_dask.dataframe import DaskDataFrame
|
|
@@ -100,9 +100,11 @@ def _save_csv(df: DaskDataFrame, p: FileParser, **kwargs: Any) -> None:
|
|
|
100
100
|
|
|
101
101
|
|
|
102
102
|
def _safe_load_csv(path: str, **kwargs: Any) -> dd.DataFrame:
|
|
103
|
+
if not isfile(path):
|
|
104
|
+
return dd.read_csv(join(path, "*.csv"), **kwargs)
|
|
103
105
|
try:
|
|
104
106
|
return dd.read_csv(path, **kwargs)
|
|
105
|
-
except (IsADirectoryError, PermissionError):
|
|
107
|
+
except (IsADirectoryError, PermissionError): # pragma: no cover
|
|
106
108
|
return dd.read_csv(join(path, "*.csv"), **kwargs)
|
|
107
109
|
|
|
108
110
|
|
|
@@ -148,11 +150,12 @@ def _save_json(df: DaskDataFrame, p: FileParser, **kwargs: Any) -> None:
|
|
|
148
150
|
|
|
149
151
|
|
|
150
152
|
def _safe_load_json(path: str, **kwargs: Any) -> dd.DataFrame:
|
|
153
|
+
if not isfile(path):
|
|
154
|
+
return dd.read_json(join(path, "*.json"), **kwargs)
|
|
151
155
|
try:
|
|
152
156
|
return dd.read_json(path, **kwargs)
|
|
153
|
-
except (IsADirectoryError, PermissionError):
|
|
154
|
-
|
|
155
|
-
return x
|
|
157
|
+
except (IsADirectoryError, PermissionError): # pragma: no cover
|
|
158
|
+
return dd.read_json(join(path, "*.json"), **kwargs)
|
|
156
159
|
|
|
157
160
|
|
|
158
161
|
def _load_json(
|
fugue_dask/_utils.py
CHANGED
|
@@ -53,7 +53,7 @@ def hash_repartition(df: dd.DataFrame, num: int, cols: List[Any]) -> dd.DataFram
|
|
|
53
53
|
if num < 1:
|
|
54
54
|
return df
|
|
55
55
|
if num == 1:
|
|
56
|
-
return df.repartition(1)
|
|
56
|
+
return df.repartition(npartitions=1)
|
|
57
57
|
df = df.reset_index(drop=True).clear_divisions()
|
|
58
58
|
idf, ct = _add_hash_index(df, num, cols)
|
|
59
59
|
return _postprocess(idf, ct, num)
|
|
@@ -76,7 +76,7 @@ def even_repartition(df: dd.DataFrame, num: int, cols: List[Any]) -> dd.DataFram
|
|
|
76
76
|
the number of partitions will be the number of groups.
|
|
77
77
|
"""
|
|
78
78
|
if num == 1:
|
|
79
|
-
return df.repartition(1)
|
|
79
|
+
return df.repartition(npartitions=1)
|
|
80
80
|
if len(cols) == 0 and num <= 0:
|
|
81
81
|
return df
|
|
82
82
|
df = df.reset_index(drop=True).clear_divisions()
|
|
@@ -111,7 +111,7 @@ def rand_repartition(
|
|
|
111
111
|
if num < 1:
|
|
112
112
|
return df
|
|
113
113
|
if num == 1:
|
|
114
|
-
return df.repartition(1)
|
|
114
|
+
return df.repartition(npartitions=1)
|
|
115
115
|
df = df.reset_index(drop=True).clear_divisions()
|
|
116
116
|
if len(cols) == 0:
|
|
117
117
|
idf, ct = _add_random_index(df, num=num, seed=seed)
|
|
@@ -124,7 +124,7 @@ def rand_repartition(
|
|
|
124
124
|
def _postprocess(idf: dd.DataFrame, ct: int, num: int) -> dd.DataFrame:
|
|
125
125
|
parts = min(ct, num)
|
|
126
126
|
if parts <= 1:
|
|
127
|
-
return idf.repartition(1)
|
|
127
|
+
return idf.repartition(npartitions=1)
|
|
128
128
|
divisions = list(np.arange(ct, step=math.ceil(ct / parts)))
|
|
129
129
|
divisions.append(ct - 1)
|
|
130
130
|
return idf.repartition(divisions=divisions, force=True)
|
fugue_dask/execution_engine.py
CHANGED
|
@@ -506,6 +506,17 @@ class DaskExecutionEngine(ExecutionEngine):
|
|
|
506
506
|
).head(n)
|
|
507
507
|
|
|
508
508
|
else:
|
|
509
|
+
if len(_presort.keys()) == 0 and n == 1:
|
|
510
|
+
return DaskDataFrame(
|
|
511
|
+
d.drop_duplicates(
|
|
512
|
+
subset=partition_spec.partition_by,
|
|
513
|
+
ignore_index=True,
|
|
514
|
+
keep="first",
|
|
515
|
+
),
|
|
516
|
+
df.schema,
|
|
517
|
+
type_safe=False,
|
|
518
|
+
)
|
|
519
|
+
|
|
509
520
|
d = (
|
|
510
521
|
d.groupby(partition_spec.partition_by, dropna=False)
|
|
511
522
|
.apply(_partition_take, n=n, presort=_presort, meta=meta)
|
fugue_dask/registry.py
CHANGED
|
@@ -19,6 +19,8 @@ from fugue_dask._utils import DASK_UTILS
|
|
|
19
19
|
from fugue_dask.dataframe import DaskDataFrame
|
|
20
20
|
from fugue_dask.execution_engine import DaskExecutionEngine
|
|
21
21
|
|
|
22
|
+
from .tester import DaskTestBackend # noqa: F401 # pylint: disable-all
|
|
23
|
+
|
|
22
24
|
|
|
23
25
|
@infer_execution_engine.candidate(
|
|
24
26
|
lambda objs: is_pandas_or(objs, (dd.DataFrame, DaskDataFrame))
|
fugue_dask/tester.py
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
from contextlib import contextmanager
|
|
2
|
+
from typing import Any, Dict, Iterator
|
|
3
|
+
|
|
4
|
+
import dask
|
|
5
|
+
from dask.distributed import Client
|
|
6
|
+
|
|
7
|
+
import fugue.test as ft
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@ft.fugue_test_backend
|
|
11
|
+
class DaskTestBackend(ft.FugueTestBackend):
|
|
12
|
+
name = "dask"
|
|
13
|
+
|
|
14
|
+
@classmethod
|
|
15
|
+
def transform_session_conf(cls, conf: Dict[str, Any]) -> Dict[str, Any]:
|
|
16
|
+
return ft.extract_conf(conf, "dask.", remove_prefix=True)
|
|
17
|
+
|
|
18
|
+
@classmethod
|
|
19
|
+
@contextmanager
|
|
20
|
+
def session_context(cls, session_conf: Dict[str, Any]) -> Iterator[Any]:
|
|
21
|
+
with Client(**session_conf) as client:
|
|
22
|
+
dask.config.set({"dataframe.shuffle.method": "tasks"})
|
|
23
|
+
dask.config.set({"dataframe.convert-string": False})
|
|
24
|
+
yield client
|
fugue_duckdb/__init__.py
CHANGED
fugue_duckdb/_io.py
CHANGED
fugue_duckdb/registry.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import Any
|
|
1
|
+
from typing import Any, Tuple
|
|
2
2
|
|
|
3
3
|
from duckdb import DuckDBPyConnection, DuckDBPyRelation
|
|
4
4
|
from triad import run_at_def
|
|
@@ -15,10 +15,17 @@ from fugue.dev import (
|
|
|
15
15
|
fugue_annotated_param,
|
|
16
16
|
is_pandas_or,
|
|
17
17
|
)
|
|
18
|
-
from fugue.plugins import infer_execution_engine
|
|
18
|
+
from fugue.plugins import infer_execution_engine, parse_execution_engine
|
|
19
19
|
from fugue_duckdb.dataframe import DuckDataFrame
|
|
20
20
|
from fugue_duckdb.execution_engine import DuckDBEngine, DuckExecutionEngine
|
|
21
21
|
|
|
22
|
+
from .tester import DuckDBTestBackend # noqa: F401 # pylint: disable-all
|
|
23
|
+
|
|
24
|
+
try:
|
|
25
|
+
from .tester import DuckDaskTestBackend # noqa: F401 # pylint: disable-all
|
|
26
|
+
except ImportError: # pragma: no cover
|
|
27
|
+
pass
|
|
28
|
+
|
|
22
29
|
|
|
23
30
|
@infer_execution_engine.candidate(
|
|
24
31
|
lambda objs: is_pandas_or(objs, (DuckDBPyRelation, DuckDataFrame))
|
|
@@ -67,6 +74,27 @@ def _register_engines() -> None:
|
|
|
67
74
|
register_sql_engine("duckdb", lambda engine: DuckDBEngine(engine))
|
|
68
75
|
|
|
69
76
|
|
|
77
|
+
try:
|
|
78
|
+
from fugue_duckdb.dask import DuckDaskExecutionEngine
|
|
79
|
+
from dask.distributed import Client
|
|
80
|
+
|
|
81
|
+
@parse_execution_engine.candidate(
|
|
82
|
+
lambda engine, conf, **kwargs: isinstance(engine, list)
|
|
83
|
+
and len(engine) == 2
|
|
84
|
+
and isinstance(engine[0], DuckDBPyConnection)
|
|
85
|
+
and isinstance(engine[1], Client),
|
|
86
|
+
)
|
|
87
|
+
def _parse_duck_dask_client(
|
|
88
|
+
engine: Tuple[DuckDBPyConnection, Client], conf: Any, **kwargs: Any
|
|
89
|
+
) -> DuckDaskExecutionEngine:
|
|
90
|
+
return DuckDaskExecutionEngine(
|
|
91
|
+
connection=engine[0], dask_client=engine[1], conf=conf
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
except Exception: # pragma: no cover
|
|
95
|
+
pass
|
|
96
|
+
|
|
97
|
+
|
|
70
98
|
@fugue_annotated_param(DuckExecutionEngine)
|
|
71
99
|
class _DuckExecutionEngineParam(ExecutionEngineParam):
|
|
72
100
|
pass
|
fugue_duckdb/tester.py
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
from contextlib import contextmanager
|
|
2
|
+
from typing import Any, Dict, Iterator
|
|
3
|
+
|
|
4
|
+
import duckdb
|
|
5
|
+
|
|
6
|
+
import fugue.test as ft
|
|
7
|
+
|
|
8
|
+
try:
|
|
9
|
+
import dask.distributed as dd
|
|
10
|
+
import dask
|
|
11
|
+
|
|
12
|
+
_HAS_DASK = True
|
|
13
|
+
except ImportError: # pragma: no cover
|
|
14
|
+
_HAS_DASK = False
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
@ft.fugue_test_backend
|
|
18
|
+
class DuckDBTestBackend(ft.FugueTestBackend):
|
|
19
|
+
name = "duckdb"
|
|
20
|
+
|
|
21
|
+
@classmethod
|
|
22
|
+
@contextmanager
|
|
23
|
+
def session_context(cls, session_conf: Dict[str, Any]) -> Iterator[Any]:
|
|
24
|
+
with duckdb.connect(config=session_conf) as conn:
|
|
25
|
+
yield conn
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
if _HAS_DASK:
|
|
29
|
+
|
|
30
|
+
@ft.fugue_test_backend
|
|
31
|
+
class DuckDaskTestBackend(ft.FugueTestBackend):
|
|
32
|
+
name = "duckdask"
|
|
33
|
+
|
|
34
|
+
@classmethod
|
|
35
|
+
def transform_session_conf(cls, conf: Dict[str, Any]) -> Dict[str, Any]:
|
|
36
|
+
res = ft.extract_conf(conf, "duck.", remove_prefix=False)
|
|
37
|
+
res.update(ft.extract_conf(conf, "dask.", remove_prefix=False))
|
|
38
|
+
return res
|
|
39
|
+
|
|
40
|
+
@classmethod
|
|
41
|
+
@contextmanager
|
|
42
|
+
def session_context(cls, session_conf: Dict[str, Any]) -> Iterator[Any]:
|
|
43
|
+
duck_conf = ft.extract_conf(session_conf, "duck.", remove_prefix=True)
|
|
44
|
+
dask_conf = ft.extract_conf(session_conf, "dask.", remove_prefix=True)
|
|
45
|
+
with dd.Client(**dask_conf) as client:
|
|
46
|
+
dask.config.set({"dataframe.shuffle.method": "tasks"})
|
|
47
|
+
dask.config.set({"dataframe.convert-string": False})
|
|
48
|
+
with duckdb.connect(config=duck_conf) as conn:
|
|
49
|
+
yield [conn, client]
|
fugue_ibis/__init__.py
CHANGED
|
@@ -3,7 +3,4 @@ from triad import run_at_def
|
|
|
3
3
|
|
|
4
4
|
from ._compat import IbisSchema, IbisTable
|
|
5
5
|
from .dataframe import IbisDataFrame
|
|
6
|
-
from .execution.ibis_engine import IbisEngine, parse_ibis_engine
|
|
7
|
-
from .execution.pandas_backend import PandasIbisEngine
|
|
8
6
|
from .execution_engine import IbisExecutionEngine, IbisSQLEngine
|
|
9
|
-
from .extensions import as_fugue, as_ibis, run_ibis
|
fugue_ibis/dataframe.py
CHANGED
|
@@ -78,7 +78,7 @@ class IbisDataFrame(DataFrame):
|
|
|
78
78
|
|
|
79
79
|
@property
|
|
80
80
|
def columns(self) -> List[str]:
|
|
81
|
-
return self._table
|
|
81
|
+
return _get_ibis_columns(self._table)
|
|
82
82
|
|
|
83
83
|
def peek_array(self) -> List[Any]:
|
|
84
84
|
res = self._to_local_df(self._table.head(1)).as_array()
|
|
@@ -202,7 +202,7 @@ def _drop_ibis_columns(df: IbisTable, columns: List[str]) -> IbisTable:
|
|
|
202
202
|
|
|
203
203
|
|
|
204
204
|
@rename.candidate(lambda df, *args, **kwargs: isinstance(df, IbisTable))
|
|
205
|
-
def
|
|
205
|
+
def _rename_ibis_table(df: IbisTable, columns: Dict[str, Any]) -> IbisTable:
|
|
206
206
|
_assert_no_missing(df, columns.keys())
|
|
207
207
|
old_names = df.columns
|
|
208
208
|
new_names = [columns.get(name, name) for name in old_names]
|
fugue_ibis/execution_engine.py
CHANGED
|
@@ -23,8 +23,8 @@ from ._compat import IbisTable
|
|
|
23
23
|
from ._utils import to_ibis_schema
|
|
24
24
|
from .dataframe import IbisDataFrame
|
|
25
25
|
|
|
26
|
-
_JOIN_RIGHT_SUFFIX = "_ibis_y__"
|
|
27
|
-
_GEN_TABLE_NAMES = (f"_fugue_temp_table_{i:d}" for i in itertools.count())
|
|
26
|
+
_JOIN_RIGHT_SUFFIX = "_ibis_y__".upper()
|
|
27
|
+
_GEN_TABLE_NAMES = (f"_fugue_temp_table_{i:d}".upper() for i in itertools.count())
|
|
28
28
|
|
|
29
29
|
|
|
30
30
|
class IbisSQLEngine(SQLEngine):
|
|
@@ -92,11 +92,11 @@ class IbisSQLEngine(SQLEngine):
|
|
|
92
92
|
_df2 = self.to_df(df2)
|
|
93
93
|
key_schema, end_schema = get_join_schemas(_df1, _df2, how=how, on=on)
|
|
94
94
|
on_fields = [_df1.native[k] == _df2.native[k] for k in key_schema]
|
|
95
|
-
if ibis.__version__ < "6":
|
|
95
|
+
if ibis.__version__ < "6": # pragma: no cover
|
|
96
96
|
suffixes: Dict[str, Any] = dict(suffixes=("", _JOIN_RIGHT_SUFFIX))
|
|
97
|
-
else:
|
|
97
|
+
else:
|
|
98
98
|
# breaking change in ibis 6.0
|
|
99
|
-
suffixes = dict(lname="", rname=_JOIN_RIGHT_SUFFIX)
|
|
99
|
+
suffixes = dict(lname="", rname="{name}" + _JOIN_RIGHT_SUFFIX)
|
|
100
100
|
if how.lower() == "cross":
|
|
101
101
|
tb = _df1.native.cross_join(_df2.native, **suffixes)
|
|
102
102
|
elif how.lower() == "right_outer":
|
|
@@ -224,7 +224,7 @@ class IbisSQLEngine(SQLEngine):
|
|
|
224
224
|
_presort = parse_presort_exp(presort)
|
|
225
225
|
else:
|
|
226
226
|
_presort = partition_spec.presort
|
|
227
|
-
tbn = "
|
|
227
|
+
tbn = "_TEMP"
|
|
228
228
|
idf = self.to_df(df)
|
|
229
229
|
|
|
230
230
|
if len(_presort) == 0:
|
|
@@ -233,9 +233,10 @@ class IbisSQLEngine(SQLEngine):
|
|
|
233
233
|
pcols = ", ".join(
|
|
234
234
|
self.encode_column_name(x) for x in partition_spec.partition_by
|
|
235
235
|
)
|
|
236
|
+
dummy_order_by = self._dummy_window_order_by()
|
|
236
237
|
sql = (
|
|
237
238
|
f"SELECT * FROM ("
|
|
238
|
-
f"SELECT *, ROW_NUMBER() OVER (PARTITION BY {pcols}) "
|
|
239
|
+
f"SELECT *, ROW_NUMBER() OVER (PARTITION BY {pcols} {dummy_order_by}) "
|
|
239
240
|
f"AS __fugue_take_param FROM {tbn}"
|
|
240
241
|
f") WHERE __fugue_take_param<={n}"
|
|
241
242
|
)
|
|
@@ -290,6 +291,12 @@ class IbisSQLEngine(SQLEngine):
|
|
|
290
291
|
def load_table(self, table: str, **kwargs: Any) -> DataFrame:
|
|
291
292
|
return self.to_df(self.backend.table(table))
|
|
292
293
|
|
|
294
|
+
def _dummy_window_order_by(self) -> str:
|
|
295
|
+
"""Return a dummy window order by clause, this is required for
|
|
296
|
+
some SQL backends when there is no real order by clause in window
|
|
297
|
+
"""
|
|
298
|
+
return ""
|
|
299
|
+
|
|
293
300
|
|
|
294
301
|
class IbisMapEngine(MapEngine):
|
|
295
302
|
"""IbisExecutionEngine's MapEngine, it is a wrapper of the map engine
|
fugue_ray/_constants.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
from typing import Any, Dict
|
|
2
2
|
|
|
3
3
|
import ray
|
|
4
|
+
from packaging import version
|
|
4
5
|
|
|
5
6
|
FUGUE_RAY_CONF_SHUFFLE_PARTITIONS = "fugue.ray.shuffle.partitions"
|
|
6
7
|
FUGUE_RAY_DEFAULT_PARTITIONS = "fugue.ray.default.partitions"
|
|
@@ -12,8 +13,6 @@ FUGUE_RAY_DEFAULT_CONF: Dict[str, Any] = {
|
|
|
12
13
|
FUGUE_RAY_DEFAULT_PARTITIONS: 0,
|
|
13
14
|
FUGUE_RAY_ZERO_COPY: True,
|
|
14
15
|
}
|
|
16
|
+
RAY_VERSION = version.parse(ray.__version__)
|
|
15
17
|
|
|
16
|
-
|
|
17
|
-
_ZERO_COPY: Dict[str, Any] = {"zero_copy_batch": True}
|
|
18
|
-
else: # pragma: no cover
|
|
19
|
-
_ZERO_COPY = {}
|
|
18
|
+
_ZERO_COPY: Dict[str, Any] = {"zero_copy_batch": True}
|
fugue_ray/_utils/dataframe.py
CHANGED
|
@@ -3,7 +3,6 @@ from typing import Any, Dict, List, Optional, Tuple
|
|
|
3
3
|
|
|
4
4
|
import pandas as pd
|
|
5
5
|
import pyarrow as pa
|
|
6
|
-
import ray
|
|
7
6
|
import ray.data as rd
|
|
8
7
|
from triad import Schema
|
|
9
8
|
|
|
@@ -31,31 +30,21 @@ def get_dataset_format(df: rd.Dataset) -> Tuple[Optional[str], rd.Dataset]:
|
|
|
31
30
|
df = materialize(df)
|
|
32
31
|
if df.count() == 0:
|
|
33
32
|
return None, df
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
else:
|
|
41
|
-
schema = df.schema(fetch_if_missing=True)
|
|
42
|
-
if schema is None: # pragma: no cover
|
|
43
|
-
return None, df
|
|
44
|
-
if isinstance(schema.base_schema, pa.Schema):
|
|
45
|
-
return "arrow", df
|
|
46
|
-
return "pandas", df
|
|
33
|
+
schema = df.schema(fetch_if_missing=True)
|
|
34
|
+
if schema is None: # pragma: no cover
|
|
35
|
+
return None, df
|
|
36
|
+
if isinstance(schema.base_schema, pa.Schema):
|
|
37
|
+
return "arrow", df
|
|
38
|
+
return "pandas", df
|
|
47
39
|
|
|
48
40
|
|
|
49
41
|
def to_schema(schema: Any) -> Schema: # pragma: no cover
|
|
50
42
|
if isinstance(schema, pa.Schema):
|
|
51
43
|
return Schema(schema)
|
|
52
|
-
if
|
|
53
|
-
if isinstance(schema,
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
):
|
|
57
|
-
return Schema(schema.base_schema)
|
|
58
|
-
return Schema(list(zip(schema.names, schema.types)))
|
|
44
|
+
if isinstance(schema, rd.Schema):
|
|
45
|
+
if hasattr(schema, "base_schema") and isinstance(schema.base_schema, pa.Schema):
|
|
46
|
+
return Schema(schema.base_schema)
|
|
47
|
+
return Schema(list(zip(schema.names, schema.types)))
|
|
59
48
|
raise ValueError(f"{schema} is not supported")
|
|
60
49
|
|
|
61
50
|
|