fugue 0.8.7.dev6__py3-none-any.whl → 0.8.7.dev7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,4 @@
1
- fugue/__init__.py,sha256=xT5zuNZfRkjbA8a-uTT5oLK6hLGuezGZLWYBl6eS5J4,2749
1
+ fugue/__init__.py,sha256=LKkBEPEAMLG-Yuzqt0IgoIDEfNf9a1zUffNKb83D_l8,2705
2
2
  fugue/api.py,sha256=dLUrigFhDMB5x7cvlWSK8EyaY2o0AmhgPr0VRtfzSz0,1254
3
3
  fugue/constants.py,sha256=crd0VqX8WtBcjSUNwZDi2LDIEkhUMWOlSn73H8JI9ds,3385
4
4
  fugue/dev.py,sha256=GQCkezBBl4V0lVDWhGtUQKqomiCxgR9dMhfqj9C8cS8,1369
@@ -10,7 +10,7 @@ fugue/_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
10
  fugue/_utils/display.py,sha256=JV8oDA7efHm1wceZulCBOY5dMvjbWHvIm6ASisKfoWY,3164
11
11
  fugue/_utils/exception.py,sha256=SFIjwjV4CIEovp3P9k7ePNOFB12A5D8hDdhtfFUeM5Y,2247
12
12
  fugue/_utils/interfaceless.py,sha256=wI0H6L4W_1uQjh9tpjgT9HzN-fbrrtXXHC1x6Q_rrPg,2203
13
- fugue/_utils/io.py,sha256=qDwqgY389GhCHV-7EvuiysJVbHxhquuEva9IlOYsmDw,9271
13
+ fugue/_utils/io.py,sha256=adrtj6Dq0ti426DNlkliApbTkp8b3bfBysAiE5MVQVc,9265
14
14
  fugue/_utils/misc.py,sha256=_huy0eylmRTEFoReGR2M4rbAI8m79hFcfY5bDceVEXU,887
15
15
  fugue/_utils/registry.py,sha256=lrbzTdUEVnW6paBGDj-Yb-aTIbP5mjCqrXuRU9_N6os,316
16
16
  fugue/bag/__init__.py,sha256=0Q0_rnrEThrTx2U-1xGNyAg95idp_xcnywymIcW4Xck,46
@@ -34,15 +34,15 @@ fugue/dataframe/dataframes.py,sha256=tBSpHsENgbcdOJ0Jgst6PTKbjG7_uoFJch96oTlaQIs
34
34
  fugue/dataframe/function_wrapper.py,sha256=V1eQMOn27UroEYT7_YiwoEF0RjZYIM0zkD3vfaMAQFs,14813
35
35
  fugue/dataframe/iterable_dataframe.py,sha256=TcOoNKa4jNbHbvAZ0XAhtMmGcioygIHPxI9budDtenQ,4758
36
36
  fugue/dataframe/pandas_dataframe.py,sha256=0L0wYCGhD2BpQbruoT07Ox9iQM5YLHLNrcgzudc-yKs,11633
37
- fugue/dataframe/utils.py,sha256=VS1qLCr-9NEcEjaK-219rADJadDf6EfzYZCGRUpn1fY,11405
37
+ fugue/dataframe/utils.py,sha256=shN1eHYTnPhb38BHEpLlCdLSzX_qpoQ3-fsDgu1hCzQ,10840
38
38
  fugue/dataset/__init__.py,sha256=5f2CAJ4xst6Z2o9Q2e2twfDOGUw8ZJoE2ild4JEU2pg,112
39
39
  fugue/dataset/api.py,sha256=DacI4L2w5NJ-eZ6nFxNMqmReEnb0WUXswbjVp7BeErk,2794
40
40
  fugue/dataset/dataset.py,sha256=jWXZqy3msMPFFkhas2PYJEX55ZAI3gk3Txq5f4-Qya4,4759
41
41
  fugue/execution/__init__.py,sha256=iZGxAznZz9piM3k4gp0tln97MDIBxdliLyNbD-0Zc48,427
42
42
  fugue/execution/api.py,sha256=KsFOLGdWQMlXmlQ5JRgRsbUeB64qzTVHxSEaunjiojo,39818
43
- fugue/execution/execution_engine.py,sha256=G_SsTmcuDcy6_azi_88lGzsOodiizu0JdWxebxgbqRg,47721
43
+ fugue/execution/execution_engine.py,sha256=5lIlebgPK7q-Gf4bWt1t_Anq3MjPaJBpGWN9bbry1B4,47506
44
44
  fugue/execution/factory.py,sha256=5ICzfNh2QqqABuVyYLijY5-7LZgfRqczlaZN32p78bE,21003
45
- fugue/execution/native_execution_engine.py,sha256=Mm9BVC3dEMS3IWRZe4YvGKp6_mmW7dLmoLMK5HgAPcs,14408
45
+ fugue/execution/native_execution_engine.py,sha256=lbKd3uGh00cSTkIM8l-u8jmsMxFzV2PSUeJgudayxKs,14236
46
46
  fugue/extensions/__init__.py,sha256=y-uLKd6mZ8sZ_8-OdW6ELoBO_9IfC0gDmEbE_rMCvOA,599
47
47
  fugue/extensions/_utils.py,sha256=Bi3pYKy2Z6fG6_5BpwIWldxetassXpB4Zp8QamWB-wg,5173
48
48
  fugue/extensions/context.py,sha256=c_y2UttzzIFoQTOCV42VCdj2nqah33xYuBjbKNIOpx8,4262
@@ -72,7 +72,7 @@ fugue/sql/_visitors.py,sha256=2pc0J-AHJAiIexsKgNjcgrCGOyhC3_7rzonSgtjy--k,33844
72
72
  fugue/sql/api.py,sha256=l2I9CAy_W2oFFTct9fDPLyXF0LiDxQhMx5O8jBHTAxU,10050
73
73
  fugue/sql/workflow.py,sha256=S1pOhp0b0t6johFAJWmj6xUB7Ti5LQgNABpAzmLGjrQ,3010
74
74
  fugue/workflow/__init__.py,sha256=tXM_KYO8Q358W6qAVlwhIQIaYNRDgZtTubrIEX4QMgM,229
75
- fugue/workflow/_checkpoint.py,sha256=MTMyNCdWHf5UK8bRepfR2u8y3cEhO1RYIYq558ZlXzA,5715
75
+ fugue/workflow/_checkpoint.py,sha256=tt5Iv7c5ZStC0MD1inItksQ0GuK0ViniA3nvrgym-5c,5681
76
76
  fugue/workflow/_tasks.py,sha256=Zq_jXJO_VaF8DrWUuBiwO2Y3OVuhsiOQdzP4VBsp7Fo,11826
77
77
  fugue/workflow/_workflow_context.py,sha256=Wmp6n0lSrh2Gpslb5EaSX6BQNniKsvKn6SlhVkQ6ui0,2504
78
78
  fugue/workflow/api.py,sha256=uQoxPSCZ91-ST4vwuPWG7qioRGW4eo-Sgi3DdwtSL4k,12495
@@ -86,25 +86,25 @@ fugue_contrib/viz/__init__.py,sha256=osgZx63Br-yMZImyEfYf9MVzJNM2Cqqke_-WsuDmG5M
86
86
  fugue_contrib/viz/_ext.py,sha256=Lu_DlS5DcmrFz27fHcKTCkhKyknVWcfS5kzZVVuO9xM,1345
87
87
  fugue_dask/__init__.py,sha256=2CcJ0AsN-k_f7dZ-yAyYpaICfUMPfH3l0FvUJSBzTr0,161
88
88
  fugue_dask/_constants.py,sha256=35UmTVITk21GhRyRlbJOwPPdQsytM_p_2NytOXEay18,510
89
- fugue_dask/_io.py,sha256=9G516yM6zQvSC5_JA6qHb3LwBDmhWcxK5sjFHrQ81zo,6012
89
+ fugue_dask/_io.py,sha256=HmL3Q2lRSptX1-GwiB3MN2VpjTRfmVKD8TDZkhS4x5c,5818
90
90
  fugue_dask/_utils.py,sha256=n70N3wPPMz13Jh0GWJM3Je-TCYpU36yGP_YCwIHqUrc,8908
91
91
  fugue_dask/dataframe.py,sha256=MuG9TqCND7qI66lPvxzuomfE7yA4sW7DjrvbyvE6XEU,13471
92
- fugue_dask/execution_engine.py,sha256=XJp6wrdkaNh5pOpwt-Hjoa2sxgCOgusFRWrcqoCcaNM,21153
92
+ fugue_dask/execution_engine.py,sha256=PAClUP9lCdn2Aajt2AsoFOsgO-95WcdRDKkjNSbVbzA,20980
93
93
  fugue_dask/ibis_engine.py,sha256=kQdaG_KlZZ2AjtYETNCdTJOgtwI_eH0aGzLaAiIBbRI,2120
94
94
  fugue_dask/registry.py,sha256=7UTg_eie7zKlHYKMCyOo0TNn5y2TiIjE8kiS2PruHFc,2200
95
95
  fugue_duckdb/__init__.py,sha256=nSNv-fxBAKD6W23EbMeV4dVRIaSTqr9DzQUWuVOES8s,379
96
- fugue_duckdb/_io.py,sha256=Sq228unVnroYTq4GX-Wnv22SLHC9Ji-aWgiqrfdu81w,8880
96
+ fugue_duckdb/_io.py,sha256=E35_GoD1uGuuAMOY4H8E2j-UazdAgTmLp4lLWqJrNsE,8437
97
97
  fugue_duckdb/_utils.py,sha256=ElKbHUyn5fWSPGXsK57iqMzcqKtCf0c8pBVBYGe5Ql4,5020
98
98
  fugue_duckdb/dask.py,sha256=agoLzeB7Swxj2kVWfmXFbWD1NS2lbbTlnrjSkR8kKWY,5014
99
99
  fugue_duckdb/dataframe.py,sha256=LRfTv7Y46wMM_IDYSP1R-5OXuHuBg8GHjPGFFt8u7l0,8444
100
- fugue_duckdb/execution_engine.py,sha256=fkkQb4Eh0m7SwKrTplVk2oQalLkNoj3CW0R12g01ofk,20536
100
+ fugue_duckdb/execution_engine.py,sha256=IZDmSAtOMJGvulTStxjTmsqJyI5QRNyxBgSMlFMSrBI,20389
101
101
  fugue_duckdb/ibis_engine.py,sha256=MrypeABozqwetKOpqtrmWvCJX2QPfBXhbSEhvK9vqmI,1990
102
102
  fugue_duckdb/registry.py,sha256=Dj0Tng1cXVT6Q7t-KxOky2k1dD9xSBjYGQmI26UgZPo,3095
103
103
  fugue_ibis/__init__.py,sha256=PcUt66KlLyGGicad7asq5j2U567_fhR0HzvWQBhV1VM,362
104
104
  fugue_ibis/_compat.py,sha256=zKdTaTfuC02eUIzZPkcd7oObnVBi_X5mQjQf7SDme3Y,246
105
105
  fugue_ibis/_utils.py,sha256=BUL5swA5FE4eQu0t5Z17hZVu9a2MFfxlFH6Ymy9xifg,6607
106
106
  fugue_ibis/dataframe.py,sha256=0Fb1vJjwEeffgoUCDfDGIMuSFaPgUJqcB-JqJOAALfs,7789
107
- fugue_ibis/execution_engine.py,sha256=p5zy0IBXiJgLi67RBHCRcHgZsaJMANdNSpUxz0k_6C0,18453
107
+ fugue_ibis/execution_engine.py,sha256=0GIjjMmitCKhjasAKFiFUCCUBNdxAiU0b61RsmFyhIk,18355
108
108
  fugue_ibis/extensions.py,sha256=H8l-SPfoqLuUoILtOuL2nccOpoL83zHeSoIhoqjtWQM,6905
109
109
  fugue_ibis/execution/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
110
110
  fugue_ibis/execution/ibis_engine.py,sha256=-HdPnIFWD83n5WITdzJiu4attH7GOcO041wkT5Y5ChA,1499
@@ -127,37 +127,39 @@ fugue_ray/registry.py,sha256=xJRAhbwNrg695EwghQDnVtTKi4YkqZ0_61BD4OAblSA,1685
127
127
  fugue_ray/_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
128
128
  fugue_ray/_utils/cluster.py,sha256=3T3Gyra6lAHlzktta-Ro35j6YZQfH6fNrj2hC5ATF9k,621
129
129
  fugue_ray/_utils/dataframe.py,sha256=_EadzS4rPom1A_cF0pqoPlwrNYZTfTwcyyu86_fFsqU,4400
130
- fugue_ray/_utils/io.py,sha256=SFTU4qXubGEmO5IGZA5yHy8Hu4b9aFZ9-eTU4Qs-NsQ,8757
130
+ fugue_ray/_utils/io.py,sha256=4FfPS2DMeIHvbzGoJ_iPvwwVr7lZHXRoJZxceNZ4EHQ,8647
131
131
  fugue_spark/__init__.py,sha256=rvrMpFs9socMgyH_58gLbnAqmirBf5oidXoO4cekW6U,165
132
132
  fugue_spark/_constants.py,sha256=K2uLQfjvMxXk75K-7_Wn47Alpwq5rW57BtECAUrOeqA,177
133
133
  fugue_spark/dataframe.py,sha256=lYa8FizM3p_lsKYFR49FazkVZMJKyi2LABKTpP5YBLo,12006
134
- fugue_spark/execution_engine.py,sha256=rqgY9U1bpjh0GFNyNkuPcI7iV0xeipadURhNIir4w08,33147
134
+ fugue_spark/execution_engine.py,sha256=KPmBtH4zioXdWsvnPow4fOPQh8Yj0cn6yCJyKdbT544,33023
135
135
  fugue_spark/ibis_engine.py,sha256=Yl5xxwROo1idcD2hFaylaI1IpmBUgbvOZRWtcrE0Zjo,1697
136
136
  fugue_spark/registry.py,sha256=kyIMk6dAiKRSKCHawQKyXu9DhZ24T6j3gL57TiOAZ8c,4162
137
137
  fugue_spark/_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
138
138
  fugue_spark/_utils/convert.py,sha256=eRWkDYA4UO-FQu-2y4O80WEdawx7X_rIrWg55AlOiRc,10007
139
- fugue_spark/_utils/io.py,sha256=0ndQ70YlirPwGKjh5IDN6IdJxD26BnPpMonRob4dxII,5668
139
+ fugue_spark/_utils/io.py,sha256=OdUezKpB29Lx9aUS2k9x0xUAGZrmgMZyQYGPEeHk7rQ,5574
140
140
  fugue_spark/_utils/misc.py,sha256=o8dZmXOHnA7D_ps37vgGXTPTiSEG9LQzPKq7l-MG-qM,860
141
141
  fugue_spark/_utils/partition.py,sha256=iaesyO5f4uXhj1W-p91cD5ecPiGlu0bzh8gl2ce2Uvg,3618
142
142
  fugue_sql/__init__.py,sha256=Cmr7w0Efr7PzoXdQzdJfc4Dgqd69qKqcHZZodENq7EU,287
143
143
  fugue_sql/exceptions.py,sha256=ltS0MC8gMnVVrJbQiOZ0kRUWvVQ2LTx33dCW3ugqtb0,260
144
144
  fugue_test/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
145
145
  fugue_test/bag_suite.py,sha256=WbDCFjuAHYoJh4GXSPiSJxOoOwE1VMtYpJ3lQrsUK-Y,2483
146
- fugue_test/builtin_suite.py,sha256=o8aMZTKa74nKBmcUTTBbliTJMtNbsXE9SPKZopS504o,78400
146
+ fugue_test/builtin_suite.py,sha256=3uSY484Jl2985UoJravD4C-SlKBH0WwTWFobp4Pqgzg,78399
147
147
  fugue_test/dataframe_suite.py,sha256=LgB931CkASbGOrRQ9j92DGk9wPb__FoNusOk-HeqU9E,19165
148
- fugue_test/execution_suite.py,sha256=FI6UmwBvdoT1jkJRBqJT_Q0IDehFryvv00UL6jjxyAk,47689
148
+ fugue_test/execution_suite.py,sha256=ClZUYt2R560LN4DZM_OP9cA5jaHmz3-u_BC3A0C24fQ,47472
149
149
  fugue_test/ibis_suite.py,sha256=Dk4AHVD00RcFsNm9VvJ4_4LOyFdGX30OnAtpO2SPruE,3529
150
150
  fugue_test/plugins/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
151
151
  fugue_test/plugins/dask/__init__.py,sha256=LQVyNgGRvZrKUrNNV1z1X1GyIL3qJoxvNjFfpFzNVCc,55
152
152
  fugue_test/plugins/dask/fixtures.py,sha256=uf5gkO30L5-LvxpEpBjG4_bNUrpkemHvyVPxDHgMSGM,354
153
153
  fugue_test/plugins/duckdb/__init__.py,sha256=WXtNYQpbO0JScPpIA3QREv8cwOZP2GDOgGOtJKgpTVM,61
154
154
  fugue_test/plugins/duckdb/fixtures.py,sha256=UxQbIMRbSrTZ3pgCmKZgd5wd1YvnVrqLSUPaO8UYrSE,165
155
+ fugue_test/plugins/misc/__init__.py,sha256=0SZvyo0xlw5NDbJly4yjaNDqL9M4D2Jsg33HCWE40q8,49
156
+ fugue_test/plugins/misc/fixtures.py,sha256=GrD9WTTtcIDCWLHn-ToVv8pUiUCGCSczgs9bodWKo7c,353
155
157
  fugue_test/plugins/ray/__init__.py,sha256=nyKGW6xgTXtMhSs7yjgFNKO7mVboCNg63Bvdf39fO_I,55
156
158
  fugue_test/plugins/ray/fixtures.py,sha256=hZkvuo0AcD63XJl5JUroc9tm2LWHUPszg2zzY6FCSao,141
157
159
  fugue_version/__init__.py,sha256=vTwvdJOZi8jZb9U-Em7-d50qNDNPS2z51IXqRoojeNM,22
158
- fugue-0.8.7.dev6.dist-info/LICENSE,sha256=z8d0m5b2O9McPEK1xHG_dWgUBT6EfBDz6wA0F7xSPTA,11358
159
- fugue-0.8.7.dev6.dist-info/METADATA,sha256=0i4ibczIy_wEMtZ6vFvaCw40x5KmuQa6OsuBVWUTQyk,17860
160
- fugue-0.8.7.dev6.dist-info/WHEEL,sha256=yQN5g4mg4AybRjkgi-9yy4iQEFibGQmlz78Pik5Or-A,92
161
- fugue-0.8.7.dev6.dist-info/entry_points.txt,sha256=N_BIIy3lSvF6Z32QE0yXTucgdHrPbUrOwH1zj7bZ0ow,536
162
- fugue-0.8.7.dev6.dist-info/top_level.txt,sha256=y1eCfzGdQ1_RkgcShcfbvXs-bopD3DwJcIOxP9EFXno,140
163
- fugue-0.8.7.dev6.dist-info/RECORD,,
160
+ fugue-0.8.7.dev7.dist-info/LICENSE,sha256=z8d0m5b2O9McPEK1xHG_dWgUBT6EfBDz6wA0F7xSPTA,11358
161
+ fugue-0.8.7.dev7.dist-info/METADATA,sha256=nSp1i8apniEEe6U09_5RA8K89P40c7M5Gn9l6ofLTHQ,17860
162
+ fugue-0.8.7.dev7.dist-info/WHEEL,sha256=Xo9-1PvkuimrydujYJAjF7pCkriuXBpUPEjma1nZyJ0,92
163
+ fugue-0.8.7.dev7.dist-info/entry_points.txt,sha256=Xrl3ISyVKAFIPn1klqeGsL9DinzoYqfqBkOT4qAVBNA,578
164
+ fugue-0.8.7.dev7.dist-info/top_level.txt,sha256=y1eCfzGdQ1_RkgcShcfbvXs-bopD3DwJcIOxP9EFXno,140
165
+ fugue-0.8.7.dev7.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: bdist_wheel (0.41.2)
2
+ Generator: bdist_wheel (0.41.3)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -2,7 +2,7 @@
2
2
  dask = fugue_dask.registry [dask]
3
3
  dask_ibis = fugue_dask.ibis_engine [dask,ibis]
4
4
  duckdb = fugue_duckdb.registry [duckdb]
5
- duckdb_ibis = fugue_duckdb.ibis_engine [duckdb,ibis]
5
+ duckdb_ibis = fugue_duckdb.ibis_engine [ibis,duckdb]
6
6
  ibis = fugue_ibis [ibis]
7
7
  polars = fugue_polars.registry [polars]
8
8
  ray = fugue_ray.registry [ray]
@@ -12,5 +12,6 @@ spark_ibis = fugue_spark.ibis_engine [spark,ibis]
12
12
  [pytest11]
13
13
  fugue_test_dask = fugue_test.plugins.dask [dask]
14
14
  fugue_test_duckdb = fugue_test.plugins.duckdb [duckdb]
15
+ fugue_test_misc = fugue_test.plugins.misc
15
16
  fugue_test_ray = fugue_test.plugins.ray [ray]
16
17
 
fugue_dask/_io.py CHANGED
@@ -1,13 +1,12 @@
1
1
  from typing import Any, Callable, Dict, List, Optional, Tuple, Union
2
2
 
3
- import fsspec
4
- import fs as pfs
5
3
  import pandas as pd
6
4
  from dask import dataframe as dd
5
+ from fsspec import AbstractFileSystem
7
6
  from triad.collections.dict import ParamDict
8
- from triad.collections.fs import FileSystem
9
7
  from triad.collections.schema import Schema
10
8
  from triad.utils.assertion import assert_or_throw
9
+ from triad.utils.io import join, makedirs, url_to_fs
11
10
 
12
11
  from fugue._utils.io import FileParser, _get_single_files
13
12
  from fugue_dask.dataframe import DaskDataFrame
@@ -19,7 +18,7 @@ def load_df(
19
18
  uri: Union[str, List[str]],
20
19
  format_hint: Optional[str] = None,
21
20
  columns: Any = None,
22
- fs: Optional[FileSystem] = None,
21
+ fs: Optional[AbstractFileSystem] = None,
23
22
  **kwargs: Any,
24
23
  ) -> DaskDataFrame:
25
24
  if isinstance(uri, str):
@@ -39,7 +38,7 @@ def save_df(
39
38
  uri: str,
40
39
  format_hint: Optional[str] = None,
41
40
  mode: str = "overwrite",
42
- fs: Optional[FileSystem] = None,
41
+ fs: Optional[AbstractFileSystem] = None,
43
42
  **kwargs: Any,
44
43
  ) -> None:
45
44
  assert_or_throw(
@@ -48,16 +47,13 @@ def save_df(
48
47
  )
49
48
  p = FileParser(uri, format_hint).assert_no_glob()
50
49
  if fs is None:
51
- fs = FileSystem()
50
+ fs, _ = url_to_fs(uri)
52
51
  if fs.exists(uri):
53
52
  assert_or_throw(mode == "overwrite", FileExistsError(uri))
54
53
  try:
55
- fs.remove(uri)
56
- except Exception:
57
- try:
58
- fs.removetree(uri)
59
- except Exception: # pragma: no cover
60
- pass
54
+ fs.rm(uri, recursive=True)
55
+ except Exception: # pragma: no cover
56
+ pass
61
57
  _FORMAT_SAVE[p.file_format](df, p, **kwargs)
62
58
 
63
59
 
@@ -67,7 +63,7 @@ def _save_parquet(df: DaskDataFrame, p: FileParser, **kwargs: Any) -> None:
67
63
  "write_index": False,
68
64
  **kwargs,
69
65
  }
70
- DASK_UTILS.to_parquet_friendly(df.native).to_parquet(p.uri, **params)
66
+ DASK_UTILS.to_parquet_friendly(df.native).to_parquet(p.path, **params)
71
67
 
72
68
 
73
69
  def _load_parquet(
@@ -80,27 +76,26 @@ def _load_parquet(
80
76
  if pd.__version__ >= "1.5":
81
77
  dtype_backend = "pyarrow"
82
78
  if columns is None:
83
- pdf = dd.read_parquet(p.uri, dtype_backend=dtype_backend, **params)
79
+ pdf = dd.read_parquet(p.path, dtype_backend=dtype_backend, **params)
84
80
  schema = Schema(pdf.head(1))
85
81
  return pdf, schema
86
82
  if isinstance(columns, list): # column names
87
83
  pdf = dd.read_parquet(
88
- p.uri, columns=columns, dtype_backend=dtype_backend, **params
84
+ p.path, columns=columns, dtype_backend=dtype_backend, **params
89
85
  )
90
86
  schema = Schema(pdf.head(1))
91
87
  return pdf, schema
92
88
  schema = Schema(columns)
93
89
  pdf = dd.read_parquet(
94
- p.uri, columns=schema.names, dtype_backend=dtype_backend, **params
90
+ p.path, columns=schema.names, dtype_backend=dtype_backend, **params
95
91
  )
96
92
  return pdf, schema
97
93
 
98
94
 
99
95
  def _save_csv(df: DaskDataFrame, p: FileParser, **kwargs: Any) -> None:
100
- fs, path = fsspec.core.url_to_fs(p.uri)
101
- fs.makedirs(path, exist_ok=True)
96
+ makedirs(p.path, exist_ok=True)
102
97
  df.native.to_csv(
103
- pfs.path.combine(p.uri, "*.csv"), **{"index": False, "header": False, **kwargs}
98
+ p.join("*.csv").path, **{"index": False, "header": False, **kwargs}
104
99
  )
105
100
 
106
101
 
@@ -108,7 +103,7 @@ def _safe_load_csv(path: str, **kwargs: Any) -> dd.DataFrame:
108
103
  try:
109
104
  return dd.read_csv(path, **kwargs)
110
105
  except (IsADirectoryError, PermissionError):
111
- return dd.read_csv(pfs.path.combine(path, "*.csv"), **kwargs)
106
+ return dd.read_csv(join(path, "*.csv"), **kwargs)
112
107
 
113
108
 
114
109
  def _load_csv( # noqa: C901
@@ -127,7 +122,7 @@ def _load_csv( # noqa: C901
127
122
  header = kw["header"]
128
123
  del kw["header"]
129
124
  if str(header) in ["True", "0"]:
130
- pdf = _safe_load_csv(p.uri, **{"header": 0, **kw})
125
+ pdf = _safe_load_csv(p.path, **{"header": 0, **kw})
131
126
  if columns is None:
132
127
  return pdf, None
133
128
  if isinstance(columns, list): # column names
@@ -138,34 +133,32 @@ def _load_csv( # noqa: C901
138
133
  if columns is None:
139
134
  raise ValueError("columns must be set if without header")
140
135
  if isinstance(columns, list): # column names
141
- pdf = _safe_load_csv(p.uri, **{"header": None, "names": columns, **kw})
136
+ pdf = _safe_load_csv(p.path, **{"header": None, "names": columns, **kw})
142
137
  return pdf, None
143
138
  schema = Schema(columns)
144
- pdf = _safe_load_csv(p.uri, **{"header": None, "names": schema.names, **kw})
139
+ pdf = _safe_load_csv(p.path, **{"header": None, "names": schema.names, **kw})
145
140
  return pdf, schema
146
141
  else:
147
142
  raise NotImplementedError(f"{header} is not supported")
148
143
 
149
144
 
150
145
  def _save_json(df: DaskDataFrame, p: FileParser, **kwargs: Any) -> None:
151
- fs, path = fsspec.core.url_to_fs(p.uri)
152
- fs.makedirs(path, exist_ok=True)
153
- df.native.to_json(pfs.path.combine(p.uri, "*.json"), **kwargs)
146
+ makedirs(p.path, exist_ok=True)
147
+ df.native.to_json(p.join("*.json").path, **kwargs)
154
148
 
155
149
 
156
150
  def _safe_load_json(path: str, **kwargs: Any) -> dd.DataFrame:
157
151
  try:
158
152
  return dd.read_json(path, **kwargs)
159
153
  except (IsADirectoryError, PermissionError):
160
- x = dd.read_json(pfs.path.combine(path, "*.json"), **kwargs)
161
- print(x.compute())
154
+ x = dd.read_json(join(path, "*.json"), **kwargs)
162
155
  return x
163
156
 
164
157
 
165
158
  def _load_json(
166
159
  p: FileParser, columns: Any = None, **kwargs: Any
167
160
  ) -> Tuple[dd.DataFrame, Any]:
168
- pdf = _safe_load_json(p.uri, **kwargs).reset_index(drop=True)
161
+ pdf = _safe_load_json(p.path, **kwargs).reset_index(drop=True)
169
162
  if columns is None:
170
163
  return pdf, None
171
164
  if isinstance(columns, list): # column names
@@ -7,18 +7,17 @@ import pandas as pd
7
7
  from distributed import Client
8
8
  from triad.collections import Schema
9
9
  from triad.collections.dict import IndexedOrderedDict, ParamDict
10
- from triad.collections.fs import FileSystem
11
10
  from triad.utils.assertion import assert_or_throw
12
11
  from triad.utils.hash import to_uuid
13
12
  from triad.utils.pandas_like import PandasUtils
14
13
  from triad.utils.threading import RunOnce
14
+ from triad.utils.io import makedirs
15
15
  from fugue import StructuredRawSQL
16
16
  from fugue.collections.partition import (
17
17
  PartitionCursor,
18
18
  PartitionSpec,
19
19
  parse_presort_exp,
20
20
  )
21
- from fugue.exceptions import FugueBug
22
21
  from fugue.constants import KEYWORD_PARALLELISM, KEYWORD_ROWCOUNT
23
22
  from fugue.dataframe import (
24
23
  AnyDataFrame,
@@ -28,6 +27,7 @@ from fugue.dataframe import (
28
27
  PandasDataFrame,
29
28
  )
30
29
  from fugue.dataframe.utils import get_join_schemas
30
+ from fugue.exceptions import FugueBug
31
31
  from fugue.execution.execution_engine import ExecutionEngine, MapEngine, SQLEngine
32
32
  from fugue.execution.native_execution_engine import NativeExecutionEngine
33
33
  from fugue_dask._constants import FUGUE_DASK_DEFAULT_CONF
@@ -206,7 +206,6 @@ class DaskExecutionEngine(ExecutionEngine):
206
206
  p = ParamDict(FUGUE_DASK_DEFAULT_CONF)
207
207
  p.update(ParamDict(conf))
208
208
  super().__init__(p)
209
- self._fs = FileSystem()
210
209
  self._log = logging.getLogger()
211
210
  self._client = DASK_UTILS.get_or_create_client(dask_client)
212
211
  self._native = NativeExecutionEngine(conf=conf)
@@ -227,10 +226,6 @@ class DaskExecutionEngine(ExecutionEngine):
227
226
  def log(self) -> logging.Logger:
228
227
  return self._log
229
228
 
230
- @property
231
- def fs(self) -> FileSystem:
232
- return self._fs
233
-
234
229
  def create_default_sql_engine(self) -> SQLEngine:
235
230
  return DaskSQLEngine(self)
236
231
 
@@ -527,9 +522,7 @@ class DaskExecutionEngine(ExecutionEngine):
527
522
  **kwargs: Any,
528
523
  ) -> DaskDataFrame:
529
524
  return self.to_df(
530
- load_df(
531
- path, format_hint=format_hint, columns=columns, fs=self.fs, **kwargs
532
- )
525
+ load_df(path, format_hint=format_hint, columns=columns, **kwargs)
533
526
  )
534
527
 
535
528
  def save_df(
@@ -556,9 +549,9 @@ class DaskExecutionEngine(ExecutionEngine):
556
549
  else:
557
550
  if not partition_spec.empty:
558
551
  kwargs["partition_on"] = partition_spec.partition_by
559
- self.fs.makedirs(os.path.dirname(path), recreate=True)
552
+ makedirs(os.path.dirname(path), exist_ok=True)
560
553
  df = self.to_df(df)
561
- save_df(df, path, format_hint=format_hint, mode=mode, fs=self.fs, **kwargs)
554
+ save_df(df, path, format_hint=format_hint, mode=mode, **kwargs)
562
555
 
563
556
 
564
557
  def to_dask_engine_df(df: Any, schema: Any = None) -> DaskDataFrame:
fugue_duckdb/_io.py CHANGED
@@ -3,9 +3,9 @@ from typing import Any, Iterable, List, Optional, Union
3
3
 
4
4
  from duckdb import DuckDBPyConnection
5
5
  from triad import ParamDict, Schema
6
- from triad.collections.fs import FileSystem
7
- from triad.utils.assertion import assert_or_throw
8
6
 
7
+ from triad.utils.assertion import assert_or_throw
8
+ from triad.utils.io import isdir, makedirs, rm, exists
9
9
  from fugue._utils.io import FileParser, load_df, save_df
10
10
  from fugue.collections.sql import TempTableName
11
11
  from fugue.dataframe import ArrowDataFrame, LocalBoundedDataFrame
@@ -18,26 +18,17 @@ from fugue_duckdb._utils import (
18
18
  from fugue_duckdb.dataframe import DuckDataFrame
19
19
 
20
20
 
21
- def _get_single_files(
22
- fp: Iterable[FileParser], fs: FileSystem, fmt: str
23
- ) -> Iterable[FileParser]:
24
- def _isdir(d: str) -> bool:
25
- try:
26
- return fs.isdir(d)
27
- except Exception: # pragma: no cover
28
- return False
29
-
21
+ def _get_files(fp: Iterable[FileParser], fmt: str) -> Iterable[FileParser]:
30
22
  for f in fp:
31
- if f.glob_pattern == "" and _isdir(f.uri):
32
- yield f.with_glob("*." + fmt, fmt)
23
+ if not f.has_glob and isdir(f.path):
24
+ yield from f.join("*." + fmt, fmt).find_all()
33
25
  else:
34
26
  yield f
35
27
 
36
28
 
37
29
  class DuckDBIO:
38
- def __init__(self, fs: FileSystem, con: DuckDBPyConnection) -> None:
30
+ def __init__(self, con: DuckDBPyConnection) -> None:
39
31
  self._con = con
40
- self._fs = fs
41
32
  self._format_load = {"csv": self._load_csv, "parquet": self._load_parquet}
42
33
  self._format_save = {"csv": self._save_csv, "parquet": self._save_parquet}
43
34
 
@@ -55,11 +46,9 @@ class DuckDBIO:
55
46
  else:
56
47
  fp = [FileParser(u, format_hint) for u in uri]
57
48
  if fp[0].file_format not in self._format_load:
58
- return load_df(
59
- uri, format_hint=format_hint, columns=columns, fs=self._fs, **kwargs
60
- )
49
+ return load_df(uri, format_hint=format_hint, columns=columns, **kwargs)
61
50
  dfs: List[DuckDataFrame] = []
62
- for f in _get_single_files(fp, self._fs, fp[0].file_format):
51
+ for f in _get_files(fp, fp[0].file_format):
63
52
  df = self._format_load[f.file_format](f, columns, **kwargs)
64
53
  dfs.append(df)
65
54
  rel = dfs[0].native
@@ -83,26 +72,20 @@ class DuckDBIO:
83
72
  )
84
73
  p = FileParser(uri, format_hint).assert_no_glob()
85
74
  if (p.file_format not in self._format_save) or ("partition_cols" in kwargs):
86
- self._fs.makedirs(os.path.dirname(uri), recreate=True)
75
+ makedirs(os.path.dirname(uri), exist_ok=True)
87
76
  ldf = ArrowDataFrame(df.as_arrow())
88
- return save_df(
89
- ldf, uri=uri, format_hint=format_hint, mode=mode, fs=self._fs, **kwargs
90
- )
91
- fs = self._fs
92
- if fs.exists(uri):
77
+ return save_df(ldf, uri=uri, format_hint=format_hint, mode=mode, **kwargs)
78
+ if exists(uri):
93
79
  assert_or_throw(mode == "overwrite", FileExistsError(uri))
94
80
  try:
95
- fs.remove(uri)
96
- except Exception:
97
- try:
98
- fs.removetree(uri)
99
- except Exception: # pragma: no cover
100
- pass
101
- if not fs.exists(p.parent):
102
- fs.makedirs(p.parent, recreate=True)
81
+ rm(uri, recursive=True)
82
+ except Exception: # pragma: no cover
83
+ pass
84
+ p.make_parent_dirs()
103
85
  self._format_save[p.file_format](df, p, **kwargs)
104
86
 
105
87
  def _save_csv(self, df: DuckDataFrame, p: FileParser, **kwargs: Any):
88
+ p.assert_no_glob()
106
89
  dn = TempTableName()
107
90
  df.native.create_view(dn.key)
108
91
  kw = ParamDict({k.lower(): v for k, v in kwargs.items()})
@@ -111,7 +94,7 @@ class DuckDBIO:
111
94
  for k, v in kw.items():
112
95
  params.append(f"{k.upper()} " + encode_value_to_expr(v))
113
96
  pm = ", ".join(params)
114
- query = f"COPY {dn.key} TO {encode_value_to_expr(p.uri)} WITH ({pm})"
97
+ query = f"COPY {dn.key} TO {encode_value_to_expr(p.path)} WITH ({pm})"
115
98
  self._con.execute(query)
116
99
 
117
100
  def _load_csv( # noqa: C901
@@ -125,7 +108,7 @@ class DuckDBIO:
125
108
  ValueError("when csv has no header, columns must be specified"),
126
109
  )
127
110
  kw.pop("auto_detect", None)
128
- params: List[str] = [encode_value_to_expr(p.uri_with_glob)]
111
+ params: List[str] = [encode_value_to_expr(p.path)]
129
112
  kw["header"] = 1 if header else 0
130
113
  kw["auto_detect"] = 1 if infer_schema else 0
131
114
  if infer_schema:
@@ -188,6 +171,7 @@ class DuckDBIO:
188
171
  return DuckDataFrame(self._con.from_query(query))
189
172
 
190
173
  def _save_parquet(self, df: DuckDataFrame, p: FileParser, **kwargs: Any):
174
+ p.assert_no_glob()
191
175
  dn = TempTableName()
192
176
  df.native.create_view(dn.key)
193
177
  kw = ParamDict({k.lower(): v for k, v in kwargs.items()})
@@ -196,7 +180,7 @@ class DuckDBIO:
196
180
  for k, v in kw.items():
197
181
  params.append(f"{k.upper()} " + encode_value_to_expr(v))
198
182
  pm = ", ".join(params)
199
- query = f"COPY {dn.key} TO {encode_value_to_expr(p.uri)}"
183
+ query = f"COPY {dn.key} TO {encode_value_to_expr(p.path)}"
200
184
  if len(params) > 0:
201
185
  query += f" WITH ({pm})"
202
186
  self._con.execute(query)
@@ -205,7 +189,7 @@ class DuckDBIO:
205
189
  self, p: FileParser, columns: Any = None, **kwargs: Any
206
190
  ) -> DuckDataFrame:
207
191
  kw = ParamDict({k.lower(): v for k, v in kwargs.items()})
208
- params: List[str] = [encode_value_to_expr(p.uri_with_glob)]
192
+ params: List[str] = [encode_value_to_expr(p.path)]
209
193
  if isinstance(columns, list):
210
194
  cols = ", ".join(encode_column_names(columns))
211
195
  else:
@@ -4,7 +4,6 @@ from typing import Any, Dict, Iterable, List, Optional, Union
4
4
  import duckdb
5
5
  from duckdb import DuckDBPyConnection, DuckDBPyRelation
6
6
  from triad import SerializableRLock
7
- from triad.collections.fs import FileSystem
8
7
  from triad.utils.assertion import assert_or_throw
9
8
  from triad.utils.schema import quote_name
10
9
 
@@ -195,10 +194,6 @@ class DuckExecutionEngine(ExecutionEngine):
195
194
  def log(self) -> logging.Logger:
196
195
  return self._native_engine.log
197
196
 
198
- @property
199
- def fs(self) -> FileSystem:
200
- return self._native_engine.fs
201
-
202
197
  def create_default_sql_engine(self) -> SQLEngine:
203
198
  return DuckDBEngine(self)
204
199
 
@@ -488,7 +483,7 @@ class DuckExecutionEngine(ExecutionEngine):
488
483
  columns: Any = None,
489
484
  **kwargs: Any,
490
485
  ) -> LocalBoundedDataFrame:
491
- dio = DuckDBIO(self.fs, self.connection)
486
+ dio = DuckDBIO(self.connection)
492
487
  return dio.load_df(path, format_hint, columns, **kwargs)
493
488
 
494
489
  def save_df(
@@ -504,7 +499,7 @@ class DuckExecutionEngine(ExecutionEngine):
504
499
  partition_spec = partition_spec or PartitionSpec()
505
500
  if not partition_spec.empty and not force_single:
506
501
  kwargs["partition_cols"] = partition_spec.partition_by
507
- dio = DuckDBIO(self.fs, self.connection)
502
+ dio = DuckDBIO(self.connection)
508
503
  dio.save_df(_to_duck_df(self, df), path, format_hint, mode, **kwargs)
509
504
 
510
505
  def convert_yield_dataframe(self, df: DataFrame, as_local: bool) -> DataFrame:
@@ -5,7 +5,7 @@ from typing import Any, Callable, Dict, List, Optional, Type
5
5
 
6
6
  import ibis
7
7
  from ibis import BaseBackend
8
- from triad import FileSystem, assert_or_throw
8
+ from triad import assert_or_throw
9
9
 
10
10
  from fugue import StructuredRawSQL
11
11
  from fugue.bag import Bag, LocalBag
@@ -375,10 +375,6 @@ class IbisExecutionEngine(ExecutionEngine):
375
375
  def log(self) -> logging.Logger:
376
376
  return self.non_ibis_engine.log
377
377
 
378
- @property
379
- def fs(self) -> FileSystem:
380
- return self.non_ibis_engine.fs
381
-
382
378
  def get_current_parallelism(self) -> int:
383
379
  return self.non_ibis_engine.get_current_parallelism()
384
380
 
fugue_ray/_utils/io.py CHANGED
@@ -4,23 +4,24 @@ from typing import Any, Callable, Dict, Iterable, List, Optional, Union
4
4
 
5
5
  import pyarrow as pa
6
6
  import ray.data as rd
7
- from fugue import ExecutionEngine
8
- from fugue._utils.io import FileParser, save_df
9
- from fugue.collections.partition import PartitionSpec
10
- from fugue.dataframe import DataFrame
11
- from fugue_ray.dataframe import RayDataFrame
12
7
  from pyarrow import csv as pacsv
13
8
  from pyarrow import json as pajson
14
9
  from ray.data.datasource import FileExtensionFilter
15
10
  from triad.collections import Schema
16
11
  from triad.collections.dict import ParamDict
17
12
  from triad.utils.assertion import assert_or_throw
13
+ from triad.utils.io import exists, makedirs, rm
14
+
15
+ from fugue import ExecutionEngine
16
+ from fugue._utils.io import FileParser, save_df
17
+ from fugue.collections.partition import PartitionSpec
18
+ from fugue.dataframe import DataFrame
19
+ from fugue_ray.dataframe import RayDataFrame
18
20
 
19
21
 
20
22
  class RayIO(object):
21
23
  def __init__(self, engine: ExecutionEngine):
22
24
  self._engine = engine
23
- self._fs = engine.fs
24
25
  self._logger = engine.log
25
26
  self._loads: Dict[str, Callable[..., DataFrame]] = {
26
27
  "csv": self._load_csv,
@@ -49,7 +50,7 @@ class RayIO(object):
49
50
  len(fmts) == 1, NotImplementedError("can't support multiple formats")
50
51
  )
51
52
  fmt = fmts[0]
52
- files = [f.uri for f in fp]
53
+ files = [f.path for f in fp]
53
54
  return self._loads[fmt](files, columns, **kwargs)
54
55
 
55
56
  def save_df(
@@ -63,24 +64,21 @@ class RayIO(object):
63
64
  **kwargs: Any,
64
65
  ) -> None:
65
66
  partition_spec = partition_spec or PartitionSpec()
66
- if self._fs.exists(uri):
67
+ if exists(uri):
67
68
  assert_or_throw(mode == "overwrite", FileExistsError(uri))
68
69
  try:
69
- self._fs.remove(uri)
70
- except Exception:
71
- try:
72
- self._fs.removetree(uri)
73
- except Exception: # pragma: no cover
74
- pass
70
+ rm(uri, recursive=True)
71
+ except Exception: # pragma: no cover
72
+ pass
75
73
  p = FileParser(uri, format_hint)
76
74
  if not force_single:
77
75
  df = self._prepartition(df, partition_spec=partition_spec)
78
76
 
79
- self._saves[p.file_format](df=df, uri=p.uri, **kwargs)
77
+ self._saves[p.file_format](df=df, uri=p.path, **kwargs)
80
78
  else:
81
79
  ldf = df.as_local()
82
- self._fs.makedirs(os.path.dirname(uri), recreate=True)
83
- save_df(ldf, uri, format_hint=format_hint, mode=mode, fs=self._fs, **kwargs)
80
+ makedirs(os.path.dirname(uri), exist_ok=True)
81
+ save_df(ldf, uri, format_hint=format_hint, mode=mode, **kwargs)
84
82
 
85
83
  def _save_parquet(
86
84
  self,