maxframe 2.0.0b2__cp37-cp37m-win32.whl → 2.3.0rc1__cp37-cp37m-win32.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of maxframe might be problematic. Click here for more details.

Files changed (443) hide show
  1. maxframe/__init__.py +1 -0
  2. maxframe/_utils.cp37-win32.pyd +0 -0
  3. maxframe/_utils.pyx +14 -1
  4. maxframe/codegen/core.py +9 -8
  5. maxframe/codegen/spe/core.py +1 -1
  6. maxframe/codegen/spe/dataframe/__init__.py +1 -0
  7. maxframe/codegen/spe/dataframe/accessors/base.py +18 -0
  8. maxframe/codegen/spe/dataframe/accessors/dict_.py +25 -130
  9. maxframe/codegen/spe/dataframe/accessors/list_.py +12 -48
  10. maxframe/codegen/spe/dataframe/accessors/struct_.py +28 -0
  11. maxframe/codegen/spe/dataframe/arithmetic.py +7 -2
  12. maxframe/codegen/spe/dataframe/groupby.py +88 -0
  13. maxframe/codegen/spe/dataframe/indexing.py +99 -4
  14. maxframe/codegen/spe/dataframe/merge.py +38 -1
  15. maxframe/codegen/spe/dataframe/misc.py +11 -33
  16. maxframe/codegen/spe/dataframe/reduction.py +32 -9
  17. maxframe/codegen/spe/dataframe/reshape.py +46 -0
  18. maxframe/codegen/spe/dataframe/sort.py +39 -18
  19. maxframe/codegen/spe/dataframe/tests/accessors/test_dict.py +9 -15
  20. maxframe/codegen/spe/dataframe/tests/accessors/test_list.py +4 -7
  21. maxframe/codegen/spe/dataframe/tests/accessors/test_struct.py +75 -0
  22. maxframe/codegen/spe/dataframe/tests/indexing/test_iloc.py +20 -1
  23. maxframe/codegen/spe/dataframe/tests/indexing/test_loc.py +35 -0
  24. maxframe/codegen/spe/dataframe/tests/misc/test_misc.py +0 -32
  25. maxframe/codegen/spe/dataframe/tests/test_groupby.py +81 -18
  26. maxframe/codegen/spe/dataframe/tests/test_merge.py +27 -1
  27. maxframe/codegen/spe/dataframe/tests/test_reduction.py +13 -0
  28. maxframe/codegen/spe/dataframe/tests/test_reshape.py +79 -0
  29. maxframe/codegen/spe/dataframe/tests/test_sort.py +20 -0
  30. maxframe/codegen/spe/dataframe/tseries.py +9 -0
  31. maxframe/codegen/spe/learn/contrib/lightgbm.py +4 -3
  32. maxframe/codegen/spe/learn/contrib/tests/test_xgboost.py +2 -1
  33. maxframe/codegen/spe/learn/metrics/__init__.py +1 -1
  34. maxframe/codegen/spe/learn/metrics/_ranking.py +76 -0
  35. maxframe/codegen/spe/learn/metrics/pairwise.py +51 -0
  36. maxframe/codegen/spe/learn/metrics/tests/test_pairwise.py +36 -0
  37. maxframe/codegen/spe/learn/metrics/tests/test_ranking.py +59 -0
  38. maxframe/codegen/spe/tensor/__init__.py +3 -0
  39. maxframe/codegen/spe/tensor/datasource.py +1 -0
  40. maxframe/codegen/spe/tensor/fft.py +74 -0
  41. maxframe/codegen/spe/tensor/linalg.py +29 -2
  42. maxframe/codegen/spe/tensor/misc.py +79 -25
  43. maxframe/codegen/spe/tensor/spatial.py +45 -0
  44. maxframe/codegen/spe/tensor/statistics.py +44 -0
  45. maxframe/codegen/spe/tensor/tests/test_fft.py +64 -0
  46. maxframe/codegen/spe/tensor/tests/test_linalg.py +15 -1
  47. maxframe/codegen/spe/tensor/tests/test_misc.py +52 -2
  48. maxframe/codegen/spe/tensor/tests/test_spatial.py +33 -0
  49. maxframe/codegen/spe/tensor/tests/test_statistics.py +15 -1
  50. maxframe/codegen/spe/tests/test_spe_codegen.py +6 -12
  51. maxframe/codegen/spe/utils.py +2 -0
  52. maxframe/config/config.py +73 -9
  53. maxframe/config/tests/test_validators.py +13 -1
  54. maxframe/config/validators.py +49 -0
  55. maxframe/conftest.py +54 -17
  56. maxframe/core/accessor.py +2 -2
  57. maxframe/core/base.py +2 -1
  58. maxframe/core/entity/core.py +5 -0
  59. maxframe/core/entity/tileables.py +3 -1
  60. maxframe/core/graph/core.cp37-win32.pyd +0 -0
  61. maxframe/core/graph/entity.py +8 -3
  62. maxframe/core/mode.py +6 -1
  63. maxframe/core/operator/base.py +9 -2
  64. maxframe/core/operator/core.py +10 -2
  65. maxframe/core/operator/utils.py +13 -0
  66. maxframe/dataframe/__init__.py +12 -5
  67. maxframe/dataframe/accessors/__init__.py +1 -1
  68. maxframe/dataframe/accessors/compat.py +45 -0
  69. maxframe/dataframe/accessors/datetime_/__init__.py +4 -1
  70. maxframe/dataframe/accessors/dict_/contains.py +7 -16
  71. maxframe/dataframe/accessors/dict_/core.py +48 -0
  72. maxframe/dataframe/accessors/dict_/getitem.py +17 -21
  73. maxframe/dataframe/accessors/dict_/length.py +7 -16
  74. maxframe/dataframe/accessors/dict_/remove.py +6 -18
  75. maxframe/dataframe/accessors/dict_/setitem.py +8 -18
  76. maxframe/dataframe/accessors/dict_/tests/test_dict_accessor.py +62 -22
  77. maxframe/dataframe/accessors/list_/__init__.py +2 -2
  78. maxframe/dataframe/accessors/list_/core.py +48 -0
  79. maxframe/dataframe/accessors/list_/getitem.py +12 -19
  80. maxframe/dataframe/accessors/list_/length.py +7 -16
  81. maxframe/dataframe/accessors/list_/tests/test_list_accessor.py +11 -9
  82. maxframe/dataframe/accessors/string_/__init__.py +4 -1
  83. maxframe/dataframe/accessors/struct_/__init__.py +37 -0
  84. maxframe/dataframe/accessors/struct_/accessor.py +39 -0
  85. maxframe/dataframe/accessors/struct_/core.py +43 -0
  86. maxframe/dataframe/accessors/struct_/dtypes.py +53 -0
  87. maxframe/dataframe/accessors/struct_/field.py +123 -0
  88. maxframe/dataframe/accessors/struct_/tests/__init__.py +13 -0
  89. maxframe/dataframe/accessors/struct_/tests/test_struct_accessor.py +91 -0
  90. maxframe/dataframe/arithmetic/__init__.py +18 -4
  91. maxframe/dataframe/arithmetic/between.py +106 -0
  92. maxframe/dataframe/arithmetic/dot.py +237 -0
  93. maxframe/dataframe/arithmetic/maximum.py +33 -0
  94. maxframe/dataframe/arithmetic/minimum.py +33 -0
  95. maxframe/dataframe/arithmetic/{around.py → round.py} +11 -7
  96. maxframe/dataframe/core.py +161 -224
  97. maxframe/dataframe/datasource/__init__.py +18 -0
  98. maxframe/dataframe/datasource/core.py +6 -0
  99. maxframe/dataframe/datasource/direct.py +57 -0
  100. maxframe/dataframe/datasource/from_dict.py +124 -0
  101. maxframe/dataframe/datasource/from_index.py +1 -1
  102. maxframe/dataframe/datasource/from_records.py +77 -0
  103. maxframe/dataframe/datasource/from_tensor.py +109 -41
  104. maxframe/dataframe/datasource/read_csv.py +21 -14
  105. maxframe/dataframe/datasource/read_odps_query.py +29 -6
  106. maxframe/dataframe/datasource/read_odps_table.py +32 -10
  107. maxframe/dataframe/datasource/read_parquet.py +38 -39
  108. maxframe/dataframe/datasource/tests/test_datasource.py +37 -0
  109. maxframe/dataframe/datastore/__init__.py +11 -1
  110. maxframe/dataframe/datastore/direct.py +268 -0
  111. maxframe/dataframe/datastore/to_csv.py +29 -41
  112. maxframe/dataframe/datastore/to_odps.py +36 -4
  113. maxframe/dataframe/extensions/__init__.py +20 -4
  114. maxframe/dataframe/extensions/apply_chunk.py +32 -6
  115. maxframe/dataframe/extensions/cartesian_chunk.py +153 -0
  116. maxframe/dataframe/extensions/collect_kv.py +126 -0
  117. maxframe/dataframe/extensions/extract_kv.py +177 -0
  118. maxframe/dataframe/extensions/flatjson.py +2 -1
  119. maxframe/dataframe/extensions/map_reduce.py +263 -0
  120. maxframe/dataframe/extensions/rebalance.py +62 -0
  121. maxframe/dataframe/extensions/tests/test_apply_chunk.py +9 -2
  122. maxframe/dataframe/extensions/tests/test_extensions.py +54 -0
  123. maxframe/dataframe/extensions/tests/test_map_reduce.py +135 -0
  124. maxframe/dataframe/groupby/__init__.py +17 -2
  125. maxframe/dataframe/groupby/aggregation.py +86 -49
  126. maxframe/dataframe/groupby/apply.py +1 -1
  127. maxframe/dataframe/groupby/apply_chunk.py +19 -5
  128. maxframe/dataframe/groupby/core.py +116 -16
  129. maxframe/dataframe/groupby/cum.py +4 -25
  130. maxframe/dataframe/groupby/expanding.py +264 -0
  131. maxframe/dataframe/groupby/fill.py +1 -1
  132. maxframe/dataframe/groupby/getitem.py +12 -5
  133. maxframe/dataframe/groupby/head.py +11 -1
  134. maxframe/dataframe/groupby/rank.py +136 -0
  135. maxframe/dataframe/groupby/rolling.py +206 -0
  136. maxframe/dataframe/groupby/shift.py +114 -0
  137. maxframe/dataframe/groupby/tests/test_groupby.py +0 -5
  138. maxframe/dataframe/indexing/__init__.py +22 -2
  139. maxframe/dataframe/indexing/droplevel.py +195 -0
  140. maxframe/dataframe/indexing/filter.py +169 -0
  141. maxframe/dataframe/indexing/get_level_values.py +76 -0
  142. maxframe/dataframe/indexing/iat.py +45 -0
  143. maxframe/dataframe/indexing/iloc.py +152 -12
  144. maxframe/dataframe/indexing/insert.py +46 -18
  145. maxframe/dataframe/indexing/loc.py +287 -7
  146. maxframe/dataframe/indexing/reindex.py +14 -5
  147. maxframe/dataframe/indexing/rename.py +6 -0
  148. maxframe/dataframe/indexing/rename_axis.py +2 -2
  149. maxframe/dataframe/indexing/reorder_levels.py +143 -0
  150. maxframe/dataframe/indexing/reset_index.py +33 -6
  151. maxframe/dataframe/indexing/sample.py +8 -0
  152. maxframe/dataframe/indexing/setitem.py +3 -3
  153. maxframe/dataframe/indexing/swaplevel.py +185 -0
  154. maxframe/dataframe/indexing/take.py +99 -0
  155. maxframe/dataframe/indexing/truncate.py +140 -0
  156. maxframe/dataframe/indexing/where.py +0 -11
  157. maxframe/dataframe/indexing/xs.py +148 -0
  158. maxframe/dataframe/merge/__init__.py +15 -1
  159. maxframe/dataframe/merge/append.py +97 -98
  160. maxframe/dataframe/merge/combine.py +244 -0
  161. maxframe/dataframe/merge/combine_first.py +120 -0
  162. maxframe/dataframe/merge/compare.py +387 -0
  163. maxframe/dataframe/merge/concat.py +183 -0
  164. maxframe/dataframe/merge/update.py +271 -0
  165. maxframe/dataframe/misc/__init__.py +28 -11
  166. maxframe/dataframe/misc/_duplicate.py +10 -4
  167. maxframe/dataframe/misc/apply.py +1 -1
  168. maxframe/dataframe/misc/check_unique.py +82 -0
  169. maxframe/dataframe/misc/clip.py +145 -0
  170. maxframe/dataframe/misc/describe.py +175 -9
  171. maxframe/dataframe/misc/drop.py +31 -0
  172. maxframe/dataframe/misc/drop_duplicates.py +2 -2
  173. maxframe/dataframe/misc/duplicated.py +2 -2
  174. maxframe/dataframe/misc/get_dummies.py +5 -1
  175. maxframe/dataframe/misc/infer_dtypes.py +251 -0
  176. maxframe/dataframe/misc/isin.py +2 -2
  177. maxframe/dataframe/misc/map.py +125 -18
  178. maxframe/dataframe/misc/repeat.py +159 -0
  179. maxframe/dataframe/misc/tests/test_misc.py +48 -3
  180. maxframe/dataframe/misc/to_numeric.py +3 -0
  181. maxframe/dataframe/misc/transform.py +12 -5
  182. maxframe/dataframe/misc/transpose.py +13 -1
  183. maxframe/dataframe/misc/valid_index.py +115 -0
  184. maxframe/dataframe/misc/value_counts.py +38 -4
  185. maxframe/dataframe/missing/checkna.py +14 -6
  186. maxframe/dataframe/missing/dropna.py +5 -0
  187. maxframe/dataframe/missing/fillna.py +1 -1
  188. maxframe/dataframe/missing/replace.py +7 -4
  189. maxframe/dataframe/reduction/__init__.py +35 -16
  190. maxframe/dataframe/reduction/aggregation.py +43 -14
  191. maxframe/dataframe/reduction/all.py +2 -2
  192. maxframe/dataframe/reduction/any.py +2 -2
  193. maxframe/dataframe/reduction/argmax.py +103 -0
  194. maxframe/dataframe/reduction/argmin.py +103 -0
  195. maxframe/dataframe/reduction/core.py +80 -24
  196. maxframe/dataframe/reduction/count.py +13 -9
  197. maxframe/dataframe/reduction/cov.py +166 -0
  198. maxframe/dataframe/reduction/cummax.py +2 -2
  199. maxframe/dataframe/reduction/cummin.py +2 -2
  200. maxframe/dataframe/reduction/cumprod.py +2 -2
  201. maxframe/dataframe/reduction/cumsum.py +2 -2
  202. maxframe/dataframe/reduction/custom_reduction.py +2 -2
  203. maxframe/dataframe/reduction/idxmax.py +185 -0
  204. maxframe/dataframe/reduction/idxmin.py +185 -0
  205. maxframe/dataframe/reduction/kurtosis.py +37 -30
  206. maxframe/dataframe/reduction/max.py +2 -2
  207. maxframe/dataframe/reduction/mean.py +9 -7
  208. maxframe/dataframe/reduction/median.py +2 -2
  209. maxframe/dataframe/reduction/min.py +2 -2
  210. maxframe/dataframe/reduction/mode.py +144 -0
  211. maxframe/dataframe/reduction/nunique.py +19 -11
  212. maxframe/dataframe/reduction/prod.py +18 -13
  213. maxframe/dataframe/reduction/reduction_size.py +2 -2
  214. maxframe/dataframe/reduction/sem.py +13 -9
  215. maxframe/dataframe/reduction/skew.py +31 -27
  216. maxframe/dataframe/reduction/str_concat.py +10 -7
  217. maxframe/dataframe/reduction/sum.py +18 -14
  218. maxframe/dataframe/reduction/tests/test_reduction.py +12 -0
  219. maxframe/dataframe/reduction/unique.py +20 -3
  220. maxframe/dataframe/reduction/var.py +16 -12
  221. maxframe/dataframe/reshape/__init__.py +38 -0
  222. maxframe/dataframe/{misc → reshape}/pivot.py +1 -0
  223. maxframe/dataframe/{misc → reshape}/pivot_table.py +1 -0
  224. maxframe/dataframe/reshape/unstack.py +114 -0
  225. maxframe/dataframe/sort/__init__.py +16 -1
  226. maxframe/dataframe/sort/argsort.py +68 -0
  227. maxframe/dataframe/sort/core.py +2 -1
  228. maxframe/dataframe/sort/nlargest.py +238 -0
  229. maxframe/dataframe/sort/nsmallest.py +228 -0
  230. maxframe/dataframe/sort/rank.py +147 -0
  231. maxframe/dataframe/statistics/__init__.py +3 -3
  232. maxframe/dataframe/statistics/corr.py +1 -0
  233. maxframe/dataframe/statistics/quantile.py +2 -2
  234. maxframe/dataframe/tests/test_typing.py +104 -0
  235. maxframe/dataframe/tests/test_utils.py +66 -2
  236. maxframe/dataframe/tseries/__init__.py +19 -0
  237. maxframe/dataframe/tseries/at_time.py +61 -0
  238. maxframe/dataframe/tseries/between_time.py +122 -0
  239. maxframe/dataframe/typing_.py +185 -0
  240. maxframe/dataframe/utils.py +125 -52
  241. maxframe/dataframe/window/aggregation.py +8 -4
  242. maxframe/dataframe/window/core.py +14 -1
  243. maxframe/dataframe/window/ewm.py +1 -3
  244. maxframe/dataframe/window/expanding.py +37 -35
  245. maxframe/dataframe/window/rolling.py +49 -39
  246. maxframe/dataframe/window/tests/test_expanding.py +1 -7
  247. maxframe/dataframe/window/tests/test_rolling.py +1 -1
  248. maxframe/env.py +7 -4
  249. maxframe/errors.py +2 -2
  250. maxframe/io/odpsio/schema.py +9 -3
  251. maxframe/io/odpsio/tableio.py +7 -2
  252. maxframe/io/odpsio/tests/test_schema.py +198 -83
  253. maxframe/learn/__init__.py +10 -2
  254. maxframe/learn/cluster/__init__.py +15 -0
  255. maxframe/learn/cluster/_kmeans.py +782 -0
  256. maxframe/learn/contrib/llm/core.py +18 -7
  257. maxframe/learn/contrib/llm/deploy/__init__.py +13 -0
  258. maxframe/learn/contrib/llm/deploy/config.py +221 -0
  259. maxframe/learn/contrib/llm/deploy/core.py +247 -0
  260. maxframe/learn/contrib/llm/deploy/framework.py +35 -0
  261. maxframe/learn/contrib/llm/deploy/loader.py +360 -0
  262. maxframe/learn/contrib/llm/deploy/tests/__init__.py +13 -0
  263. maxframe/learn/contrib/llm/deploy/tests/test_register_models.py +359 -0
  264. maxframe/learn/contrib/llm/models/__init__.py +1 -0
  265. maxframe/learn/contrib/llm/models/dashscope.py +12 -6
  266. maxframe/learn/contrib/llm/models/managed.py +76 -11
  267. maxframe/learn/contrib/llm/models/openai.py +72 -0
  268. maxframe/learn/contrib/llm/tests/__init__.py +13 -0
  269. maxframe/learn/contrib/llm/tests/test_core.py +34 -0
  270. maxframe/learn/contrib/llm/tests/test_openai.py +187 -0
  271. maxframe/learn/contrib/llm/tests/test_text_gen.py +155 -0
  272. maxframe/learn/contrib/llm/text.py +348 -42
  273. maxframe/learn/contrib/models.py +4 -1
  274. maxframe/learn/contrib/xgboost/classifier.py +2 -0
  275. maxframe/learn/contrib/xgboost/core.py +113 -4
  276. maxframe/learn/contrib/xgboost/predict.py +4 -2
  277. maxframe/learn/contrib/xgboost/regressor.py +5 -0
  278. maxframe/learn/contrib/xgboost/train.py +7 -2
  279. maxframe/learn/core.py +66 -0
  280. maxframe/learn/linear_model/_base.py +58 -1
  281. maxframe/learn/linear_model/_lin_reg.py +1 -1
  282. maxframe/learn/metrics/__init__.py +6 -0
  283. maxframe/learn/metrics/_classification.py +145 -0
  284. maxframe/learn/metrics/_ranking.py +477 -0
  285. maxframe/learn/metrics/_scorer.py +60 -0
  286. maxframe/learn/metrics/pairwise/__init__.py +21 -0
  287. maxframe/learn/metrics/pairwise/core.py +77 -0
  288. maxframe/learn/metrics/pairwise/cosine.py +115 -0
  289. maxframe/learn/metrics/pairwise/euclidean.py +176 -0
  290. maxframe/learn/metrics/pairwise/haversine.py +96 -0
  291. maxframe/learn/metrics/pairwise/manhattan.py +80 -0
  292. maxframe/learn/metrics/pairwise/pairwise.py +127 -0
  293. maxframe/learn/metrics/pairwise/pairwise_distances_topk.py +121 -0
  294. maxframe/learn/metrics/pairwise/rbf_kernel.py +51 -0
  295. maxframe/learn/metrics/tests/__init__.py +13 -0
  296. maxframe/learn/metrics/tests/test_scorer.py +26 -0
  297. maxframe/learn/preprocessing/_data/min_max_scaler.py +34 -23
  298. maxframe/learn/preprocessing/_data/standard_scaler.py +34 -25
  299. maxframe/learn/utils/__init__.py +2 -1
  300. maxframe/learn/utils/checks.py +1 -2
  301. maxframe/learn/utils/core.py +59 -0
  302. maxframe/learn/utils/extmath.py +79 -9
  303. maxframe/learn/utils/odpsio.py +262 -0
  304. maxframe/learn/utils/validation.py +2 -2
  305. maxframe/lib/compat.py +40 -0
  306. maxframe/lib/dtypes_extension/__init__.py +16 -1
  307. maxframe/lib/dtypes_extension/_fake_arrow_dtype.py +604 -0
  308. maxframe/lib/dtypes_extension/blob.py +304 -0
  309. maxframe/lib/dtypes_extension/dtypes.py +40 -0
  310. maxframe/lib/dtypes_extension/tests/test_blob.py +88 -0
  311. maxframe/lib/dtypes_extension/tests/test_dtypes.py +16 -1
  312. maxframe/lib/dtypes_extension/tests/test_fake_arrow_dtype.py +75 -0
  313. maxframe/lib/filesystem/_oss_lib/common.py +124 -50
  314. maxframe/lib/filesystem/_oss_lib/glob.py +1 -1
  315. maxframe/lib/filesystem/_oss_lib/handle.py +21 -25
  316. maxframe/lib/filesystem/base.py +1 -1
  317. maxframe/lib/filesystem/core.py +1 -1
  318. maxframe/lib/filesystem/oss.py +115 -46
  319. maxframe/lib/filesystem/tests/test_oss.py +74 -36
  320. maxframe/lib/mmh3.cp37-win32.pyd +0 -0
  321. maxframe/lib/wrapped_pickle.py +10 -0
  322. maxframe/opcodes.py +41 -15
  323. maxframe/protocol.py +12 -0
  324. maxframe/remote/core.py +4 -0
  325. maxframe/serialization/__init__.py +11 -2
  326. maxframe/serialization/arrow.py +38 -13
  327. maxframe/serialization/blob.py +32 -0
  328. maxframe/serialization/core.cp37-win32.pyd +0 -0
  329. maxframe/serialization/core.pyx +39 -1
  330. maxframe/serialization/exception.py +2 -4
  331. maxframe/serialization/numpy.py +11 -0
  332. maxframe/serialization/pandas.py +46 -9
  333. maxframe/serialization/serializables/core.py +2 -2
  334. maxframe/serialization/tests/test_serial.py +31 -4
  335. maxframe/tensor/__init__.py +38 -8
  336. maxframe/tensor/arithmetic/__init__.py +19 -10
  337. maxframe/tensor/arithmetic/core.py +2 -2
  338. maxframe/tensor/arithmetic/iscomplexobj.py +53 -0
  339. maxframe/tensor/arithmetic/tests/test_arithmetic.py +6 -9
  340. maxframe/tensor/core.py +6 -2
  341. maxframe/tensor/datasource/tests/test_datasource.py +2 -1
  342. maxframe/tensor/extensions/__init__.py +2 -0
  343. maxframe/tensor/extensions/apply_chunk.py +3 -3
  344. maxframe/tensor/extensions/rebalance.py +65 -0
  345. maxframe/tensor/fft/__init__.py +32 -0
  346. maxframe/tensor/fft/core.py +168 -0
  347. maxframe/tensor/fft/fft.py +112 -0
  348. maxframe/tensor/fft/fft2.py +118 -0
  349. maxframe/tensor/fft/fftfreq.py +80 -0
  350. maxframe/tensor/fft/fftn.py +123 -0
  351. maxframe/tensor/fft/fftshift.py +79 -0
  352. maxframe/tensor/fft/hfft.py +112 -0
  353. maxframe/tensor/fft/ifft.py +114 -0
  354. maxframe/tensor/fft/ifft2.py +115 -0
  355. maxframe/tensor/fft/ifftn.py +123 -0
  356. maxframe/tensor/fft/ifftshift.py +73 -0
  357. maxframe/tensor/fft/ihfft.py +93 -0
  358. maxframe/tensor/fft/irfft.py +118 -0
  359. maxframe/tensor/fft/irfft2.py +62 -0
  360. maxframe/tensor/fft/irfftn.py +114 -0
  361. maxframe/tensor/fft/rfft.py +116 -0
  362. maxframe/tensor/fft/rfft2.py +63 -0
  363. maxframe/tensor/fft/rfftfreq.py +87 -0
  364. maxframe/tensor/fft/rfftn.py +113 -0
  365. maxframe/tensor/indexing/fill_diagonal.py +1 -7
  366. maxframe/tensor/linalg/__init__.py +7 -0
  367. maxframe/tensor/linalg/_einsumfunc.py +1025 -0
  368. maxframe/tensor/linalg/cholesky.py +117 -0
  369. maxframe/tensor/linalg/einsum.py +339 -0
  370. maxframe/tensor/linalg/lstsq.py +100 -0
  371. maxframe/tensor/linalg/matrix_norm.py +75 -0
  372. maxframe/tensor/linalg/norm.py +249 -0
  373. maxframe/tensor/linalg/solve.py +72 -0
  374. maxframe/tensor/linalg/solve_triangular.py +2 -2
  375. maxframe/tensor/linalg/vector_norm.py +113 -0
  376. maxframe/tensor/misc/__init__.py +24 -1
  377. maxframe/tensor/misc/argwhere.py +72 -0
  378. maxframe/tensor/misc/array_split.py +46 -0
  379. maxframe/tensor/misc/broadcast_arrays.py +57 -0
  380. maxframe/tensor/misc/copyto.py +130 -0
  381. maxframe/tensor/misc/delete.py +104 -0
  382. maxframe/tensor/misc/dsplit.py +68 -0
  383. maxframe/tensor/misc/ediff1d.py +74 -0
  384. maxframe/tensor/misc/expand_dims.py +85 -0
  385. maxframe/tensor/misc/flip.py +90 -0
  386. maxframe/tensor/misc/fliplr.py +64 -0
  387. maxframe/tensor/misc/flipud.py +68 -0
  388. maxframe/tensor/misc/hsplit.py +85 -0
  389. maxframe/tensor/misc/insert.py +139 -0
  390. maxframe/tensor/misc/moveaxis.py +83 -0
  391. maxframe/tensor/misc/result_type.py +88 -0
  392. maxframe/tensor/misc/roll.py +124 -0
  393. maxframe/tensor/misc/rollaxis.py +77 -0
  394. maxframe/tensor/misc/shape.py +89 -0
  395. maxframe/tensor/misc/split.py +190 -0
  396. maxframe/tensor/misc/tile.py +109 -0
  397. maxframe/tensor/misc/vsplit.py +74 -0
  398. maxframe/tensor/reduction/array_equal.py +2 -1
  399. maxframe/tensor/sort/__init__.py +2 -0
  400. maxframe/tensor/sort/argpartition.py +98 -0
  401. maxframe/tensor/sort/partition.py +228 -0
  402. maxframe/tensor/spatial/__init__.py +15 -0
  403. maxframe/tensor/spatial/distance/__init__.py +17 -0
  404. maxframe/tensor/spatial/distance/cdist.py +421 -0
  405. maxframe/tensor/spatial/distance/pdist.py +398 -0
  406. maxframe/tensor/spatial/distance/squareform.py +153 -0
  407. maxframe/tensor/special/__init__.py +159 -21
  408. maxframe/tensor/special/airy.py +55 -0
  409. maxframe/tensor/special/bessel.py +199 -0
  410. maxframe/tensor/special/core.py +65 -4
  411. maxframe/tensor/special/ellip_func_integrals.py +155 -0
  412. maxframe/tensor/special/ellip_harm.py +55 -0
  413. maxframe/tensor/special/err_fresnel.py +223 -0
  414. maxframe/tensor/special/gamma_funcs.py +303 -0
  415. maxframe/tensor/special/hypergeometric_funcs.py +69 -0
  416. maxframe/tensor/special/info_theory.py +189 -0
  417. maxframe/tensor/special/misc.py +21 -0
  418. maxframe/tensor/statistics/__init__.py +6 -0
  419. maxframe/tensor/statistics/corrcoef.py +77 -0
  420. maxframe/tensor/statistics/cov.py +222 -0
  421. maxframe/tensor/statistics/digitize.py +126 -0
  422. maxframe/tensor/statistics/histogram.py +520 -0
  423. maxframe/tensor/statistics/median.py +85 -0
  424. maxframe/tensor/statistics/ptp.py +89 -0
  425. maxframe/tensor/utils.py +3 -3
  426. maxframe/tests/test_udf.py +61 -0
  427. maxframe/tests/test_utils.py +51 -6
  428. maxframe/tests/utils.py +0 -2
  429. maxframe/typing_.py +2 -0
  430. maxframe/udf.py +130 -9
  431. maxframe/utils.py +254 -27
  432. {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/METADATA +3 -3
  433. {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/RECORD +442 -264
  434. maxframe_client/fetcher.py +35 -4
  435. maxframe_client/session/odps.py +7 -2
  436. maxframe_client/session/task.py +8 -1
  437. maxframe_client/tests/test_fetcher.py +76 -3
  438. maxframe_client/tests/test_session.py +28 -1
  439. maxframe/dataframe/arrays.py +0 -864
  440. /maxframe/dataframe/{misc → reshape}/melt.py +0 -0
  441. /maxframe/dataframe/{misc → reshape}/stack.py +0 -0
  442. {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/WHEEL +0 -0
  443. {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/top_level.txt +0 -0
@@ -12,15 +12,18 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- import base64
16
- import json
15
+ import logging
17
16
  import os
17
+ from typing import NamedTuple, Optional
18
+ from urllib.parse import parse_qs, urlparse
18
19
 
19
20
  from ....utils import lazy_import
20
21
  from ..base import path_type, stringify_path
21
22
 
22
23
  oss2 = lazy_import("oss2", placeholder=True)
23
24
 
25
+ logger = logging.getLogger(__name__)
26
+
24
27
  # OSS api time out
25
28
  _oss_time_out = 10
26
29
 
@@ -37,6 +40,8 @@ class OSSFileEntry:
37
40
  self._storage_options = storage_options
38
41
 
39
42
  def is_dir(self):
43
+ if self._path.endswith("/"):
44
+ self._is_dir = True
40
45
  if self._is_dir is None:
41
46
  self._is_dir = oss_isdir(self._path)
42
47
  return self._is_dir
@@ -63,43 +68,81 @@ class OSSFileEntry:
63
68
  return self._path
64
69
 
65
70
 
66
- def parse_osspath(path: path_type):
71
+ class ParsedOSSPath(NamedTuple):
72
+ endpoint: str
73
+ bucket: str
74
+ key: str
75
+ access_key_id: Optional[str] = None
76
+ access_key_secret: Optional[str] = None
77
+ security_token: Optional[str] = None
78
+ scheme: str = None
79
+
80
+
81
+ def parse_osspath(path: path_type, check_errors: bool = True) -> ParsedOSSPath:
67
82
  # Extract OSS configuration from the encoded URL.
68
83
  str_path = stringify_path(path)
69
- parse_result = oss2.urlparse(str_path)
70
- if parse_result.scheme != "oss":
84
+ parse_result = urlparse(str_path)
85
+ if check_errors and parse_result.scheme != "oss":
71
86
  raise ValueError(
72
87
  f"Except scheme oss, but got scheme: {parse_result.scheme}"
73
88
  f" in path: {str_path}"
74
89
  )
75
- bucket = parse_result.hostname
76
- if not (parse_result.username and parse_result.password):
77
- raise RuntimeError(r"Please use build_oss_path to add OSS info")
78
- param_dict = url_to_dict(parse_result.username)
79
- access_key_id = param_dict["access_key_id"]
90
+ access_key_id = parse_result.username
80
91
  access_key_secret = parse_result.password
81
- end_point = param_dict["end_point"]
92
+
93
+ if not parse_result.query:
94
+ sts_token = None
95
+ else:
96
+ sts_token = parse_qs(parse_result.query).get("security_token", [None])[0]
97
+
98
+ if check_errors and not (access_key_id and access_key_secret):
99
+ raise ValueError(r"No credentials provided")
100
+
82
101
  key = parse_result.path
83
102
  key = key[1:] if key.startswith("/") else key
84
- return bucket, key, access_key_id, access_key_secret, end_point
103
+ if "/" not in key:
104
+ bucket, key = key, None
105
+ if check_errors:
106
+ raise ValueError("Need to use format bucket/key to separate bucket and key")
107
+ else:
108
+ bucket, key = key.split("/", 1)
109
+
110
+ endpoint = parse_result.hostname
111
+ if endpoint and parse_result.port:
112
+ endpoint += f":{parse_result.port}"
113
+ return ParsedOSSPath(
114
+ endpoint,
115
+ bucket,
116
+ key,
117
+ access_key_id,
118
+ access_key_secret,
119
+ sts_token,
120
+ parse_result.scheme,
121
+ )
85
122
 
86
123
 
87
- def _get_oss_bucket(bucket, access_key_id, access_key_secret, end_point):
124
+ def get_oss_bucket(parsed_path: ParsedOSSPath):
125
+ if parsed_path.security_token is not None:
126
+ auth = oss2.StsAuth(
127
+ parsed_path.access_key_id,
128
+ parsed_path.access_key_secret,
129
+ parsed_path.security_token,
130
+ )
131
+ else:
132
+ auth = oss2.Auth(parsed_path.access_key_id, parsed_path.access_key_secret)
88
133
  oss_bucket = oss2.Bucket(
89
- auth=oss2.Auth(
90
- access_key_id=access_key_id, access_key_secret=access_key_secret
91
- ),
92
- endpoint=end_point,
93
- bucket_name=bucket,
134
+ auth=auth,
135
+ endpoint=parsed_path.endpoint,
136
+ bucket_name=parsed_path.bucket,
94
137
  connect_timeout=_oss_time_out,
95
138
  )
96
139
  return oss_bucket
97
140
 
98
141
 
99
142
  def oss_exists(path: path_type):
100
- bucket, key, access_key_id, access_key_secret, end_point = parse_osspath(path)
101
- oss_bucket = _get_oss_bucket(bucket, access_key_id, access_key_secret, end_point)
102
- return oss_bucket.object_exists(key) or oss_isdir(path)
143
+ parsed_path = parse_osspath(path)
144
+ oss_bucket = get_oss_bucket(parsed_path)
145
+ return oss_bucket.object_exists(parsed_path.key) or oss_isdir(path)
103
146
 
104
147
 
105
148
  def oss_isdir(path: path_type):
@@ -112,26 +155,71 @@ def oss_isdir(path: path_type):
112
155
  dirname = stringify_path(path)
113
156
  if not dirname.endswith("/"):
114
157
  dirname = dirname + "/"
115
- bucket, key, access_key_id, access_key_secret, end_point = parse_osspath(dirname)
116
- oss_bucket = _get_oss_bucket(bucket, access_key_id, access_key_secret, end_point)
158
+ logger.info("Checking isdir for path %s", dirname)
159
+ parsed_path = parse_osspath(dirname)
160
+ oss_bucket = get_oss_bucket(parsed_path)
117
161
  isdir = False
118
- for obj in oss2.ObjectIteratorV2(oss_bucket, prefix=key, max_keys=2):
119
- if obj.key == key:
162
+ for obj in oss2.ObjectIteratorV2(oss_bucket, prefix=parsed_path.key, max_keys=2):
163
+ if obj.key == parsed_path.key:
120
164
  continue
121
165
  isdir = True
122
166
  break
123
167
  return isdir
124
168
 
125
169
 
170
+ def oss_delete(path: path_type):
171
+ """
172
+ Perform both key deletion and prefix deletion. Once no files
173
+ deleted in both scenarios, we can make assertion that the file
174
+ does not exist.
175
+ """
176
+ parsed_path = parse_osspath(path)
177
+ oss_bucket = get_oss_bucket(parsed_path)
178
+
179
+ try:
180
+ oss_bucket.delete_object(parsed_path.key)
181
+ return
182
+ except oss2.exceptions.NoSuchKey:
183
+ pass
184
+
185
+ is_missing = True
186
+ dir_key = parsed_path.key.rstrip("/") + "/"
187
+ for obj in oss2.ObjectIteratorV2(oss_bucket, prefix=dir_key):
188
+ try:
189
+ oss_bucket.delete_object(obj.key)
190
+ is_missing = False
191
+ except oss2.exceptions.NoSuchKey:
192
+ pass
193
+ if is_missing:
194
+ raise FileNotFoundError("No such file or directory: %s", path)
195
+
196
+
197
+ def oss_copy_file(src_path: path_type, dest_path: path_type):
198
+ # todo implements copy of huge files
199
+ parsed_src_path = parse_osspath(src_path)
200
+ parsed_dest_path = parse_osspath(dest_path)
201
+ try:
202
+ if oss_isdir(src_path):
203
+ raise NotImplementedError("Copying directories not implemented yet")
204
+ except:
205
+ # fixme currently we cannot handle error with iterating files with STS token
206
+ logger.exception("Failed to judge if src is a directory")
207
+
208
+ oss_bucket = get_oss_bucket(parsed_dest_path)
209
+ oss_bucket.copy_object(
210
+ parsed_src_path.bucket, parsed_src_path.key, parsed_dest_path.key
211
+ )
212
+
213
+
126
214
  def oss_stat(path: path_type):
127
215
  path = stringify_path(path)
128
- bucket, key, access_key_id, access_key_secret, end_point = parse_osspath(path)
129
- oss_bucket = _get_oss_bucket(bucket, access_key_id, access_key_secret, end_point)
216
+ parsed_path = parse_osspath(path)
217
+ oss_bucket = get_oss_bucket(parsed_path)
130
218
  if oss_isdir(path):
131
219
  stat = dict(name=path, size=0, modified_time=-1)
132
220
  stat["type"] = "directory"
133
221
  else:
134
- meta = oss_bucket.get_object_meta(key)
222
+ meta = oss_bucket.get_object_meta(parsed_path.key)
135
223
  stat = dict(
136
224
  name=path,
137
225
  size=int(meta.headers["Content-Length"]),
@@ -145,11 +233,11 @@ def oss_scandir(dirname: path_type):
145
233
  dirname = stringify_path(dirname)
146
234
  if not dirname.endswith("/"):
147
235
  dirname = dirname + "/"
148
- bucket, key, access_key_id, access_key_secret, end_point = parse_osspath(dirname)
149
- oss_bucket = _get_oss_bucket(bucket, access_key_id, access_key_secret, end_point)
236
+ parsed_path = parse_osspath(dirname)
237
+ oss_bucket = get_oss_bucket(parsed_path)
150
238
  dirname_set = set()
151
- for obj in oss2.ObjectIteratorV2(oss_bucket, prefix=key):
152
- rel_path = obj.key[len(key) :]
239
+ for obj in oss2.ObjectIteratorV2(oss_bucket, prefix=parsed_path.key):
240
+ rel_path = obj.key[len(parsed_path.key) :]
153
241
  try:
154
242
  inside_dirname, inside_filename = rel_path.split("/", 1)
155
243
  except ValueError:
@@ -160,11 +248,11 @@ def oss_scandir(dirname: path_type):
160
248
  continue
161
249
  dirname_set.add(inside_dirname)
162
250
  yield OSSFileEntry(
163
- os.path.join(dirname, inside_dirname),
251
+ "/".join([dirname, inside_dirname]),
164
252
  is_dir=True,
165
253
  is_file=False,
166
254
  stat={
167
- "name": os.path.join(dirname, inside_dirname),
255
+ "name": "/".join([dirname, inside_dirname]),
168
256
  "type": "directory",
169
257
  "size": 0,
170
258
  "modified_time": -1,
@@ -172,27 +260,13 @@ def oss_scandir(dirname: path_type):
172
260
  )
173
261
  else:
174
262
  yield OSSFileEntry(
175
- os.path.join(dirname, inside_filename),
263
+ "/".join([dirname, inside_filename]),
176
264
  is_dir=False,
177
265
  is_file=True,
178
266
  stat={
179
- "name": os.path.join(dirname, inside_filename),
267
+ "name": "/".join([dirname, inside_filename]),
180
268
  "type": "file",
181
269
  "size": obj.size,
182
270
  "modified_time": obj.last_modified,
183
271
  },
184
272
  )
185
-
186
-
187
- def dict_to_url(param: dict):
188
- # Encode the dictionary with url-safe-base64.
189
- str_param = json.dumps(param)
190
- url_param = base64.urlsafe_b64encode(bytes(str_param, encoding="utf8"))
191
- return bytes.decode(url_param, encoding="utf8")
192
-
193
-
194
- def url_to_dict(url_param: str):
195
- # Decode url-safe-base64 encoded string.
196
- bytes_param = bytes(url_param, encoding="utf8")
197
- str_param = bytes.decode(base64.urlsafe_b64decode(bytes_param), encoding="utf8")
198
- return json.loads(str_param)
@@ -104,7 +104,7 @@ def _glob0(dirname, basename, dironly):
104
104
  if oss_isdir(dirname):
105
105
  return [basename]
106
106
  else:
107
- if oss_exists(os.path.join(dirname, basename)):
107
+ if oss_exists("/".join([dirname, basename])):
108
108
  return [basename]
109
109
  return []
110
110
 
@@ -15,7 +15,7 @@
15
15
  from io import IOBase
16
16
 
17
17
  from ....utils import lazy_import
18
- from .common import oss_stat, parse_osspath
18
+ from .common import get_oss_bucket, oss_stat, parse_osspath
19
19
 
20
20
  oss2 = lazy_import("oss2", placeholder=True)
21
21
 
@@ -23,20 +23,20 @@ oss2 = lazy_import("oss2", placeholder=True)
23
23
  class OSSIOBase(IOBase):
24
24
  def __init__(self, path, mode):
25
25
  self._path = path
26
- (
27
- self._bucket_name,
28
- self._key_name,
29
- self._access_key_id,
30
- self._access_key_secret,
31
- self._end_point,
32
- ) = parse_osspath(self._path)
33
- self._bucket = self._get_bucket()
26
+ self._parsed_path = parse_osspath(self._path)
27
+ self._bucket = get_oss_bucket(self._parsed_path)
34
28
  self._current_pos = 0
35
29
  self._size = None
36
30
  self._buffer = b""
37
31
  self._buffer_size = 1 * 1024
38
32
  self._mode = mode
39
33
 
34
+ if mode and mode.startswith("w"):
35
+ try:
36
+ self._bucket.delete_object(self._parsed_path.key)
37
+ except oss2.exceptions.NoSuchKey:
38
+ pass
39
+
40
40
  @property
41
41
  def mode(self):
42
42
  return self._mode
@@ -44,16 +44,6 @@ class OSSIOBase(IOBase):
44
44
  def fileno(self) -> int:
45
45
  raise AttributeError
46
46
 
47
- def _get_bucket(self):
48
- return oss2.Bucket(
49
- auth=oss2.Auth(
50
- access_key_id=self._access_key_id,
51
- access_key_secret=self._access_key_secret,
52
- ),
53
- endpoint=self._end_point,
54
- bucket_name=self._bucket_name,
55
- )
56
-
57
47
  def _get_size(self):
58
48
  if self._size is None:
59
49
  self._size = int(oss_stat(self._path)["size"])
@@ -79,7 +69,7 @@ class OSSIOBase(IOBase):
79
69
  return self._current_pos
80
70
 
81
71
  def seekable(self):
82
- return True
72
+ return "r" in self._mode
83
73
 
84
74
  def read(self, size=-1):
85
75
  """
@@ -97,12 +87,12 @@ class OSSIOBase(IOBase):
97
87
  return b""
98
88
  elif size < 0:
99
89
  obj = self._bucket.get_object(
100
- self._key_name, byte_range=(self._current_pos, None)
90
+ self._parsed_path.key, byte_range=(self._current_pos, None)
101
91
  )
102
92
  self._current_pos = self._get_size()
103
93
  else:
104
94
  obj = self._bucket.get_object(
105
- self._key_name,
95
+ self._parsed_path.key,
106
96
  byte_range=(self._current_pos, self._current_pos + size - 1),
107
97
  )
108
98
  self._current_pos = self._current_pos + size
@@ -117,7 +107,7 @@ class OSSIOBase(IOBase):
117
107
  self._get_size() - 1, self._current_pos + self._buffer_size - 1
118
108
  )
119
109
  buffer = self._bucket.get_object(
120
- self._key_name, byte_range=(self._current_pos, read_to)
110
+ self._parsed_path.key, byte_range=(self._current_pos, read_to)
121
111
  ).read()
122
112
  if not buffer:
123
113
  return 1
@@ -145,11 +135,17 @@ class OSSIOBase(IOBase):
145
135
  break
146
136
  return bytes(res)
147
137
 
138
+ def write(self, block):
139
+ append_result = self._bucket.append_object(
140
+ self._parsed_path.key, self._current_pos, block
141
+ )
142
+ self._current_pos = append_result.next_position
143
+
148
144
  def readable(self):
149
- return True
145
+ return "r" in self._mode
150
146
 
151
147
  def writable(self):
152
- return False
148
+ return "w" in self._mode or "a" in self._mode
153
149
 
154
150
  def close(self):
155
151
  # already closed by oss
@@ -247,7 +247,7 @@ class FileSystem(ABC):
247
247
  def parse_from_path(uri: str):
248
248
  parsed_uri = urlparse(uri)
249
249
  options = dict()
250
- options["host"] = parsed_uri.netloc.rsplit("@", 1)[-1].rsplit(":", 1)[0]
250
+ options["host"] = parsed_uri.hostname
251
251
  if parsed_uri.port:
252
252
  options["port"] = parsed_uri.port
253
253
  if parsed_uri.username:
@@ -49,7 +49,7 @@ def get_fs(path: path_type, storage_options: Dict = None) -> FileSystem:
49
49
 
50
50
  if scheme in _filesystems:
51
51
  file_system_type = _filesystems[scheme]
52
- if scheme == "file" or scheme == "oss":
52
+ if scheme == "file":
53
53
  # local file systems are singletons.
54
54
  return file_system_type.get_instance()
55
55
  else:
@@ -12,8 +12,10 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- from typing import Dict, Iterator, List, Tuple
16
- from urllib import parse
15
+ import enum
16
+ import re
17
+ from typing import Dict, Iterator, List, Tuple, Union
18
+ from urllib.parse import urlencode
17
19
 
18
20
  from ...utils import implements, lazy_import
19
21
  from ._oss_lib import common as oc
@@ -22,18 +24,43 @@ from ._oss_lib.handle import OSSIOBase
22
24
  from .base import FileSystem, path_type
23
25
 
24
26
  oss2 = lazy_import("oss2", placeholder=True)
27
+ _ip_regex = re.compile(r"^([0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3})")
25
28
 
26
29
  _oss_time_out = 10
27
30
 
28
31
 
29
- class OSSFileSystem(FileSystem):
30
- _instance = None
32
+ class HostEnforceType(enum.Enum):
33
+ no_enforce = 0
34
+ force_internal = 1
35
+ force_external = 2
36
+
31
37
 
32
- @classmethod
33
- def get_instance(cls):
34
- if cls._instance is None:
35
- cls._instance = OSSFileSystem()
36
- return cls._instance
38
+ class OSSFileSystem(FileSystem):
39
+ def __init__(
40
+ self,
41
+ access_key_id: str = None,
42
+ access_key_secret: str = None,
43
+ security_token: str = None,
44
+ host_enforce_type: Union[HostEnforceType, str] = HostEnforceType.no_enforce,
45
+ **kw,
46
+ ):
47
+ self._access_key_id = access_key_id or kw.get("user")
48
+ self._access_key_secret = access_key_secret or kw.get("password")
49
+ self._security_token = security_token
50
+ self._host_enforce_type = (
51
+ host_enforce_type
52
+ if isinstance(host_enforce_type, HostEnforceType)
53
+ else getattr(HostEnforceType, host_enforce_type)
54
+ )
55
+
56
+ def _rewrite_path(self, path: str) -> str:
57
+ return build_oss_path(
58
+ path,
59
+ access_key_id=self._access_key_id,
60
+ access_key_secret=self._access_key_secret,
61
+ security_token=self._security_token,
62
+ host_enforce_type=self._host_enforce_type,
63
+ )
37
64
 
38
65
  @implements(FileSystem.cat)
39
66
  def cat(self, path: path_type):
@@ -46,39 +73,37 @@ class OSSFileSystem(FileSystem):
46
73
  if not file_entry.is_dir():
47
74
  raise OSError("ls for file is not supported")
48
75
  else:
49
- bucket, key, access_key_id, access_key_secret, end_point = oc.parse_osspath(
50
- path
51
- )
52
- oss_bucket = oss2.Bucket(
53
- auth=oss2.Auth(
54
- access_key_id=access_key_id, access_key_secret=access_key_secret
55
- ),
56
- endpoint=end_point,
57
- bucket_name=bucket,
58
- connect_timeout=_oss_time_out,
59
- )
60
- for obj in oss2.ObjectIteratorV2(oss_bucket, prefix=key):
76
+ parsed_path = oc.parse_osspath(path)
77
+ oss_bucket = oc.get_oss_bucket(parsed_path)
78
+ for obj in oss2.ObjectIteratorV2(oss_bucket, prefix=parsed_path.key):
61
79
  if obj.key.endswith("/"):
62
80
  continue
63
- obj_path = rf"oss://{bucket}/{obj.key}"
81
+ obj_path = rf"{parsed_path.bucket}/{obj.key}"
64
82
  file_list.append(
65
83
  build_oss_path(
66
- obj_path, access_key_id, access_key_secret, end_point
84
+ obj_path,
85
+ parsed_path.endpoint,
86
+ parsed_path.access_key_id,
87
+ parsed_path.access_key_secret,
88
+ parsed_path.security_token,
67
89
  )
68
90
  )
69
91
  return file_list
70
92
 
71
93
  @implements(FileSystem.delete)
72
94
  def delete(self, path: path_type, recursive: bool = False):
73
- raise NotImplementedError
95
+ return oc.oss_delete(self._rewrite_path(path))
74
96
 
75
97
  @implements(FileSystem.rename)
76
98
  def rename(self, path: path_type, new_path: path_type):
77
- raise NotImplementedError
99
+ # in OSS, you need to move file by copy and delete
100
+ path = self._rewrite_path(path)
101
+ oc.oss_copy_file(path, self._rewrite_path(new_path))
102
+ oc.oss_delete(path)
78
103
 
79
104
  @implements(FileSystem.stat)
80
105
  def stat(self, path: path_type) -> Dict:
81
- ofe = oc.OSSFileEntry(path)
106
+ ofe = oc.OSSFileEntry(self._rewrite_path(path))
82
107
  return ofe.stat()
83
108
 
84
109
  @implements(FileSystem.mkdir)
@@ -87,12 +112,12 @@ class OSSFileSystem(FileSystem):
87
112
 
88
113
  @implements(FileSystem.isdir)
89
114
  def isdir(self, path: path_type) -> bool:
90
- file_entry = oc.OSSFileEntry(path)
115
+ file_entry = oc.OSSFileEntry(self._rewrite_path(path))
91
116
  return file_entry.is_dir()
92
117
 
93
118
  @implements(FileSystem.isfile)
94
119
  def isfile(self, path: path_type) -> bool:
95
- file_entry = oc.OSSFileEntry(path)
120
+ file_entry = oc.OSSFileEntry(self._rewrite_path(path))
96
121
  return file_entry.is_file()
97
122
 
98
123
  @implements(FileSystem._isfilestore)
@@ -101,11 +126,11 @@ class OSSFileSystem(FileSystem):
101
126
 
102
127
  @implements(FileSystem.exists)
103
128
  def exists(self, path: path_type):
104
- return oc.oss_exists(path)
129
+ return oc.oss_exists(self._rewrite_path(path))
105
130
 
106
131
  @implements(FileSystem.open)
107
132
  def open(self, path: path_type, mode: str = "rb") -> OSSIOBase:
108
- file_handle = OSSIOBase(path, mode)
133
+ file_handle = OSSIOBase(self._rewrite_path(path), mode)
109
134
  return file_handle
110
135
 
111
136
  @implements(FileSystem.walk)
@@ -114,10 +139,37 @@ class OSSFileSystem(FileSystem):
114
139
 
115
140
  @implements(FileSystem.glob)
116
141
  def glob(self, path: path_type, recursive: bool = False) -> List[path_type]:
117
- return glob(path, recursive=recursive)
118
-
119
-
120
- def build_oss_path(path: path_type, access_key_id, access_key_secret, end_point):
142
+ return glob(self._rewrite_path(path), recursive=recursive)
143
+
144
+
145
+ def _rewrite_internal_endpoint(
146
+ endpoint: str, host_enforce_type: HostEnforceType = HostEnforceType.no_enforce
147
+ ) -> str:
148
+ if (
149
+ not endpoint
150
+ or host_enforce_type == HostEnforceType.no_enforce
151
+ or _ip_regex.match(endpoint)
152
+ ):
153
+ return endpoint
154
+
155
+ ep_first, ep_rest = endpoint.split(".", 1)
156
+ host_with_internal = ep_first.endswith("-internal")
157
+ if host_enforce_type == HostEnforceType.force_external and host_with_internal:
158
+ return ep_first.replace("-internal", "") + "." + ep_rest
159
+ elif host_enforce_type == HostEnforceType.force_internal and not host_with_internal:
160
+ return ep_first + "-internal." + ep_rest
161
+ else:
162
+ return endpoint
163
+
164
+
165
+ def build_oss_path(
166
+ path: path_type,
167
+ endpoint: str = None,
168
+ access_key_id: str = None,
169
+ access_key_secret: str = None,
170
+ security_token: str = None,
171
+ host_enforce_type: HostEnforceType = HostEnforceType.no_enforce,
172
+ ):
121
173
  """
122
174
  Returns a path with oss info.
123
175
  Used to register the access_key_id, access_key_secret and
@@ -127,16 +179,19 @@ def build_oss_path(path: path_type, access_key_id, access_key_secret, end_point)
127
179
  Parameters
128
180
  ----------
129
181
  path : path_type
130
- The original oss url.
182
+ The original OSS url.
183
+
184
+ endpoint : str
185
+ The endpoint of OSS.
131
186
 
132
187
  access_key_id : str
133
- The access key id of oss.
188
+ The access key id of OSS.
134
189
 
135
190
  access_key_secret : str
136
- The access key secret of oss.
191
+ The access key secret of OSS.
137
192
 
138
- end_point : str
139
- The endpoint of oss.
193
+ security_token : str
194
+ The security token of OSS.
140
195
 
141
196
  Returns
142
197
  -------
@@ -146,12 +201,26 @@ def build_oss_path(path: path_type, access_key_id, access_key_secret, end_point)
146
201
  """
147
202
  if isinstance(path, (list, tuple)):
148
203
  path = path[0]
149
- param_dict = {"access_key_id": access_key_id, "end_point": end_point}
150
- id_endpoint = oc.dict_to_url(param_dict)
151
- password = access_key_secret
152
- parse_result = parse.urlparse(path)
153
- new_path = (
154
- f"{parse_result.scheme}://{id_endpoint}:{password}"
155
- f"@{parse_result.netloc}{parse_result.path}"
204
+ parse_result = oc.parse_osspath(path, check_errors=False)
205
+ access_key_id = parse_result.access_key_id or access_key_id
206
+ access_key_secret = parse_result.access_key_secret or access_key_secret
207
+ security_token = parse_result.security_token or security_token
208
+
209
+ scheme = parse_result.scheme or "oss"
210
+ endpoint = _rewrite_internal_endpoint(
211
+ parse_result.endpoint or endpoint, host_enforce_type
156
212
  )
213
+
214
+ if access_key_id and access_key_secret:
215
+ creds = f"{access_key_id}:{access_key_secret}@"
216
+ else:
217
+ creds = ""
218
+
219
+ new_path = f"{scheme}://{creds}{endpoint}/{parse_result.bucket}"
220
+ if parse_result.key:
221
+ new_path += f"/{parse_result.key}"
222
+ if security_token:
223
+ new_path += f"?{urlencode(dict(security_token=security_token))}"
224
+ # reparse to check errors
225
+ oc.parse_osspath(new_path)
157
226
  return new_path