maxframe 2.0.0b2__cp37-cp37m-win32.whl → 2.3.0rc1__cp37-cp37m-win32.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of maxframe might be problematic. Click here for more details.
- maxframe/__init__.py +1 -0
- maxframe/_utils.cp37-win32.pyd +0 -0
- maxframe/_utils.pyx +14 -1
- maxframe/codegen/core.py +9 -8
- maxframe/codegen/spe/core.py +1 -1
- maxframe/codegen/spe/dataframe/__init__.py +1 -0
- maxframe/codegen/spe/dataframe/accessors/base.py +18 -0
- maxframe/codegen/spe/dataframe/accessors/dict_.py +25 -130
- maxframe/codegen/spe/dataframe/accessors/list_.py +12 -48
- maxframe/codegen/spe/dataframe/accessors/struct_.py +28 -0
- maxframe/codegen/spe/dataframe/arithmetic.py +7 -2
- maxframe/codegen/spe/dataframe/groupby.py +88 -0
- maxframe/codegen/spe/dataframe/indexing.py +99 -4
- maxframe/codegen/spe/dataframe/merge.py +38 -1
- maxframe/codegen/spe/dataframe/misc.py +11 -33
- maxframe/codegen/spe/dataframe/reduction.py +32 -9
- maxframe/codegen/spe/dataframe/reshape.py +46 -0
- maxframe/codegen/spe/dataframe/sort.py +39 -18
- maxframe/codegen/spe/dataframe/tests/accessors/test_dict.py +9 -15
- maxframe/codegen/spe/dataframe/tests/accessors/test_list.py +4 -7
- maxframe/codegen/spe/dataframe/tests/accessors/test_struct.py +75 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_iloc.py +20 -1
- maxframe/codegen/spe/dataframe/tests/indexing/test_loc.py +35 -0
- maxframe/codegen/spe/dataframe/tests/misc/test_misc.py +0 -32
- maxframe/codegen/spe/dataframe/tests/test_groupby.py +81 -18
- maxframe/codegen/spe/dataframe/tests/test_merge.py +27 -1
- maxframe/codegen/spe/dataframe/tests/test_reduction.py +13 -0
- maxframe/codegen/spe/dataframe/tests/test_reshape.py +79 -0
- maxframe/codegen/spe/dataframe/tests/test_sort.py +20 -0
- maxframe/codegen/spe/dataframe/tseries.py +9 -0
- maxframe/codegen/spe/learn/contrib/lightgbm.py +4 -3
- maxframe/codegen/spe/learn/contrib/tests/test_xgboost.py +2 -1
- maxframe/codegen/spe/learn/metrics/__init__.py +1 -1
- maxframe/codegen/spe/learn/metrics/_ranking.py +76 -0
- maxframe/codegen/spe/learn/metrics/pairwise.py +51 -0
- maxframe/codegen/spe/learn/metrics/tests/test_pairwise.py +36 -0
- maxframe/codegen/spe/learn/metrics/tests/test_ranking.py +59 -0
- maxframe/codegen/spe/tensor/__init__.py +3 -0
- maxframe/codegen/spe/tensor/datasource.py +1 -0
- maxframe/codegen/spe/tensor/fft.py +74 -0
- maxframe/codegen/spe/tensor/linalg.py +29 -2
- maxframe/codegen/spe/tensor/misc.py +79 -25
- maxframe/codegen/spe/tensor/spatial.py +45 -0
- maxframe/codegen/spe/tensor/statistics.py +44 -0
- maxframe/codegen/spe/tensor/tests/test_fft.py +64 -0
- maxframe/codegen/spe/tensor/tests/test_linalg.py +15 -1
- maxframe/codegen/spe/tensor/tests/test_misc.py +52 -2
- maxframe/codegen/spe/tensor/tests/test_spatial.py +33 -0
- maxframe/codegen/spe/tensor/tests/test_statistics.py +15 -1
- maxframe/codegen/spe/tests/test_spe_codegen.py +6 -12
- maxframe/codegen/spe/utils.py +2 -0
- maxframe/config/config.py +73 -9
- maxframe/config/tests/test_validators.py +13 -1
- maxframe/config/validators.py +49 -0
- maxframe/conftest.py +54 -17
- maxframe/core/accessor.py +2 -2
- maxframe/core/base.py +2 -1
- maxframe/core/entity/core.py +5 -0
- maxframe/core/entity/tileables.py +3 -1
- maxframe/core/graph/core.cp37-win32.pyd +0 -0
- maxframe/core/graph/entity.py +8 -3
- maxframe/core/mode.py +6 -1
- maxframe/core/operator/base.py +9 -2
- maxframe/core/operator/core.py +10 -2
- maxframe/core/operator/utils.py +13 -0
- maxframe/dataframe/__init__.py +12 -5
- maxframe/dataframe/accessors/__init__.py +1 -1
- maxframe/dataframe/accessors/compat.py +45 -0
- maxframe/dataframe/accessors/datetime_/__init__.py +4 -1
- maxframe/dataframe/accessors/dict_/contains.py +7 -16
- maxframe/dataframe/accessors/dict_/core.py +48 -0
- maxframe/dataframe/accessors/dict_/getitem.py +17 -21
- maxframe/dataframe/accessors/dict_/length.py +7 -16
- maxframe/dataframe/accessors/dict_/remove.py +6 -18
- maxframe/dataframe/accessors/dict_/setitem.py +8 -18
- maxframe/dataframe/accessors/dict_/tests/test_dict_accessor.py +62 -22
- maxframe/dataframe/accessors/list_/__init__.py +2 -2
- maxframe/dataframe/accessors/list_/core.py +48 -0
- maxframe/dataframe/accessors/list_/getitem.py +12 -19
- maxframe/dataframe/accessors/list_/length.py +7 -16
- maxframe/dataframe/accessors/list_/tests/test_list_accessor.py +11 -9
- maxframe/dataframe/accessors/string_/__init__.py +4 -1
- maxframe/dataframe/accessors/struct_/__init__.py +37 -0
- maxframe/dataframe/accessors/struct_/accessor.py +39 -0
- maxframe/dataframe/accessors/struct_/core.py +43 -0
- maxframe/dataframe/accessors/struct_/dtypes.py +53 -0
- maxframe/dataframe/accessors/struct_/field.py +123 -0
- maxframe/dataframe/accessors/struct_/tests/__init__.py +13 -0
- maxframe/dataframe/accessors/struct_/tests/test_struct_accessor.py +91 -0
- maxframe/dataframe/arithmetic/__init__.py +18 -4
- maxframe/dataframe/arithmetic/between.py +106 -0
- maxframe/dataframe/arithmetic/dot.py +237 -0
- maxframe/dataframe/arithmetic/maximum.py +33 -0
- maxframe/dataframe/arithmetic/minimum.py +33 -0
- maxframe/dataframe/arithmetic/{around.py → round.py} +11 -7
- maxframe/dataframe/core.py +161 -224
- maxframe/dataframe/datasource/__init__.py +18 -0
- maxframe/dataframe/datasource/core.py +6 -0
- maxframe/dataframe/datasource/direct.py +57 -0
- maxframe/dataframe/datasource/from_dict.py +124 -0
- maxframe/dataframe/datasource/from_index.py +1 -1
- maxframe/dataframe/datasource/from_records.py +77 -0
- maxframe/dataframe/datasource/from_tensor.py +109 -41
- maxframe/dataframe/datasource/read_csv.py +21 -14
- maxframe/dataframe/datasource/read_odps_query.py +29 -6
- maxframe/dataframe/datasource/read_odps_table.py +32 -10
- maxframe/dataframe/datasource/read_parquet.py +38 -39
- maxframe/dataframe/datasource/tests/test_datasource.py +37 -0
- maxframe/dataframe/datastore/__init__.py +11 -1
- maxframe/dataframe/datastore/direct.py +268 -0
- maxframe/dataframe/datastore/to_csv.py +29 -41
- maxframe/dataframe/datastore/to_odps.py +36 -4
- maxframe/dataframe/extensions/__init__.py +20 -4
- maxframe/dataframe/extensions/apply_chunk.py +32 -6
- maxframe/dataframe/extensions/cartesian_chunk.py +153 -0
- maxframe/dataframe/extensions/collect_kv.py +126 -0
- maxframe/dataframe/extensions/extract_kv.py +177 -0
- maxframe/dataframe/extensions/flatjson.py +2 -1
- maxframe/dataframe/extensions/map_reduce.py +263 -0
- maxframe/dataframe/extensions/rebalance.py +62 -0
- maxframe/dataframe/extensions/tests/test_apply_chunk.py +9 -2
- maxframe/dataframe/extensions/tests/test_extensions.py +54 -0
- maxframe/dataframe/extensions/tests/test_map_reduce.py +135 -0
- maxframe/dataframe/groupby/__init__.py +17 -2
- maxframe/dataframe/groupby/aggregation.py +86 -49
- maxframe/dataframe/groupby/apply.py +1 -1
- maxframe/dataframe/groupby/apply_chunk.py +19 -5
- maxframe/dataframe/groupby/core.py +116 -16
- maxframe/dataframe/groupby/cum.py +4 -25
- maxframe/dataframe/groupby/expanding.py +264 -0
- maxframe/dataframe/groupby/fill.py +1 -1
- maxframe/dataframe/groupby/getitem.py +12 -5
- maxframe/dataframe/groupby/head.py +11 -1
- maxframe/dataframe/groupby/rank.py +136 -0
- maxframe/dataframe/groupby/rolling.py +206 -0
- maxframe/dataframe/groupby/shift.py +114 -0
- maxframe/dataframe/groupby/tests/test_groupby.py +0 -5
- maxframe/dataframe/indexing/__init__.py +22 -2
- maxframe/dataframe/indexing/droplevel.py +195 -0
- maxframe/dataframe/indexing/filter.py +169 -0
- maxframe/dataframe/indexing/get_level_values.py +76 -0
- maxframe/dataframe/indexing/iat.py +45 -0
- maxframe/dataframe/indexing/iloc.py +152 -12
- maxframe/dataframe/indexing/insert.py +46 -18
- maxframe/dataframe/indexing/loc.py +287 -7
- maxframe/dataframe/indexing/reindex.py +14 -5
- maxframe/dataframe/indexing/rename.py +6 -0
- maxframe/dataframe/indexing/rename_axis.py +2 -2
- maxframe/dataframe/indexing/reorder_levels.py +143 -0
- maxframe/dataframe/indexing/reset_index.py +33 -6
- maxframe/dataframe/indexing/sample.py +8 -0
- maxframe/dataframe/indexing/setitem.py +3 -3
- maxframe/dataframe/indexing/swaplevel.py +185 -0
- maxframe/dataframe/indexing/take.py +99 -0
- maxframe/dataframe/indexing/truncate.py +140 -0
- maxframe/dataframe/indexing/where.py +0 -11
- maxframe/dataframe/indexing/xs.py +148 -0
- maxframe/dataframe/merge/__init__.py +15 -1
- maxframe/dataframe/merge/append.py +97 -98
- maxframe/dataframe/merge/combine.py +244 -0
- maxframe/dataframe/merge/combine_first.py +120 -0
- maxframe/dataframe/merge/compare.py +387 -0
- maxframe/dataframe/merge/concat.py +183 -0
- maxframe/dataframe/merge/update.py +271 -0
- maxframe/dataframe/misc/__init__.py +28 -11
- maxframe/dataframe/misc/_duplicate.py +10 -4
- maxframe/dataframe/misc/apply.py +1 -1
- maxframe/dataframe/misc/check_unique.py +82 -0
- maxframe/dataframe/misc/clip.py +145 -0
- maxframe/dataframe/misc/describe.py +175 -9
- maxframe/dataframe/misc/drop.py +31 -0
- maxframe/dataframe/misc/drop_duplicates.py +2 -2
- maxframe/dataframe/misc/duplicated.py +2 -2
- maxframe/dataframe/misc/get_dummies.py +5 -1
- maxframe/dataframe/misc/infer_dtypes.py +251 -0
- maxframe/dataframe/misc/isin.py +2 -2
- maxframe/dataframe/misc/map.py +125 -18
- maxframe/dataframe/misc/repeat.py +159 -0
- maxframe/dataframe/misc/tests/test_misc.py +48 -3
- maxframe/dataframe/misc/to_numeric.py +3 -0
- maxframe/dataframe/misc/transform.py +12 -5
- maxframe/dataframe/misc/transpose.py +13 -1
- maxframe/dataframe/misc/valid_index.py +115 -0
- maxframe/dataframe/misc/value_counts.py +38 -4
- maxframe/dataframe/missing/checkna.py +14 -6
- maxframe/dataframe/missing/dropna.py +5 -0
- maxframe/dataframe/missing/fillna.py +1 -1
- maxframe/dataframe/missing/replace.py +7 -4
- maxframe/dataframe/reduction/__init__.py +35 -16
- maxframe/dataframe/reduction/aggregation.py +43 -14
- maxframe/dataframe/reduction/all.py +2 -2
- maxframe/dataframe/reduction/any.py +2 -2
- maxframe/dataframe/reduction/argmax.py +103 -0
- maxframe/dataframe/reduction/argmin.py +103 -0
- maxframe/dataframe/reduction/core.py +80 -24
- maxframe/dataframe/reduction/count.py +13 -9
- maxframe/dataframe/reduction/cov.py +166 -0
- maxframe/dataframe/reduction/cummax.py +2 -2
- maxframe/dataframe/reduction/cummin.py +2 -2
- maxframe/dataframe/reduction/cumprod.py +2 -2
- maxframe/dataframe/reduction/cumsum.py +2 -2
- maxframe/dataframe/reduction/custom_reduction.py +2 -2
- maxframe/dataframe/reduction/idxmax.py +185 -0
- maxframe/dataframe/reduction/idxmin.py +185 -0
- maxframe/dataframe/reduction/kurtosis.py +37 -30
- maxframe/dataframe/reduction/max.py +2 -2
- maxframe/dataframe/reduction/mean.py +9 -7
- maxframe/dataframe/reduction/median.py +2 -2
- maxframe/dataframe/reduction/min.py +2 -2
- maxframe/dataframe/reduction/mode.py +144 -0
- maxframe/dataframe/reduction/nunique.py +19 -11
- maxframe/dataframe/reduction/prod.py +18 -13
- maxframe/dataframe/reduction/reduction_size.py +2 -2
- maxframe/dataframe/reduction/sem.py +13 -9
- maxframe/dataframe/reduction/skew.py +31 -27
- maxframe/dataframe/reduction/str_concat.py +10 -7
- maxframe/dataframe/reduction/sum.py +18 -14
- maxframe/dataframe/reduction/tests/test_reduction.py +12 -0
- maxframe/dataframe/reduction/unique.py +20 -3
- maxframe/dataframe/reduction/var.py +16 -12
- maxframe/dataframe/reshape/__init__.py +38 -0
- maxframe/dataframe/{misc → reshape}/pivot.py +1 -0
- maxframe/dataframe/{misc → reshape}/pivot_table.py +1 -0
- maxframe/dataframe/reshape/unstack.py +114 -0
- maxframe/dataframe/sort/__init__.py +16 -1
- maxframe/dataframe/sort/argsort.py +68 -0
- maxframe/dataframe/sort/core.py +2 -1
- maxframe/dataframe/sort/nlargest.py +238 -0
- maxframe/dataframe/sort/nsmallest.py +228 -0
- maxframe/dataframe/sort/rank.py +147 -0
- maxframe/dataframe/statistics/__init__.py +3 -3
- maxframe/dataframe/statistics/corr.py +1 -0
- maxframe/dataframe/statistics/quantile.py +2 -2
- maxframe/dataframe/tests/test_typing.py +104 -0
- maxframe/dataframe/tests/test_utils.py +66 -2
- maxframe/dataframe/tseries/__init__.py +19 -0
- maxframe/dataframe/tseries/at_time.py +61 -0
- maxframe/dataframe/tseries/between_time.py +122 -0
- maxframe/dataframe/typing_.py +185 -0
- maxframe/dataframe/utils.py +125 -52
- maxframe/dataframe/window/aggregation.py +8 -4
- maxframe/dataframe/window/core.py +14 -1
- maxframe/dataframe/window/ewm.py +1 -3
- maxframe/dataframe/window/expanding.py +37 -35
- maxframe/dataframe/window/rolling.py +49 -39
- maxframe/dataframe/window/tests/test_expanding.py +1 -7
- maxframe/dataframe/window/tests/test_rolling.py +1 -1
- maxframe/env.py +7 -4
- maxframe/errors.py +2 -2
- maxframe/io/odpsio/schema.py +9 -3
- maxframe/io/odpsio/tableio.py +7 -2
- maxframe/io/odpsio/tests/test_schema.py +198 -83
- maxframe/learn/__init__.py +10 -2
- maxframe/learn/cluster/__init__.py +15 -0
- maxframe/learn/cluster/_kmeans.py +782 -0
- maxframe/learn/contrib/llm/core.py +18 -7
- maxframe/learn/contrib/llm/deploy/__init__.py +13 -0
- maxframe/learn/contrib/llm/deploy/config.py +221 -0
- maxframe/learn/contrib/llm/deploy/core.py +247 -0
- maxframe/learn/contrib/llm/deploy/framework.py +35 -0
- maxframe/learn/contrib/llm/deploy/loader.py +360 -0
- maxframe/learn/contrib/llm/deploy/tests/__init__.py +13 -0
- maxframe/learn/contrib/llm/deploy/tests/test_register_models.py +359 -0
- maxframe/learn/contrib/llm/models/__init__.py +1 -0
- maxframe/learn/contrib/llm/models/dashscope.py +12 -6
- maxframe/learn/contrib/llm/models/managed.py +76 -11
- maxframe/learn/contrib/llm/models/openai.py +72 -0
- maxframe/learn/contrib/llm/tests/__init__.py +13 -0
- maxframe/learn/contrib/llm/tests/test_core.py +34 -0
- maxframe/learn/contrib/llm/tests/test_openai.py +187 -0
- maxframe/learn/contrib/llm/tests/test_text_gen.py +155 -0
- maxframe/learn/contrib/llm/text.py +348 -42
- maxframe/learn/contrib/models.py +4 -1
- maxframe/learn/contrib/xgboost/classifier.py +2 -0
- maxframe/learn/contrib/xgboost/core.py +113 -4
- maxframe/learn/contrib/xgboost/predict.py +4 -2
- maxframe/learn/contrib/xgboost/regressor.py +5 -0
- maxframe/learn/contrib/xgboost/train.py +7 -2
- maxframe/learn/core.py +66 -0
- maxframe/learn/linear_model/_base.py +58 -1
- maxframe/learn/linear_model/_lin_reg.py +1 -1
- maxframe/learn/metrics/__init__.py +6 -0
- maxframe/learn/metrics/_classification.py +145 -0
- maxframe/learn/metrics/_ranking.py +477 -0
- maxframe/learn/metrics/_scorer.py +60 -0
- maxframe/learn/metrics/pairwise/__init__.py +21 -0
- maxframe/learn/metrics/pairwise/core.py +77 -0
- maxframe/learn/metrics/pairwise/cosine.py +115 -0
- maxframe/learn/metrics/pairwise/euclidean.py +176 -0
- maxframe/learn/metrics/pairwise/haversine.py +96 -0
- maxframe/learn/metrics/pairwise/manhattan.py +80 -0
- maxframe/learn/metrics/pairwise/pairwise.py +127 -0
- maxframe/learn/metrics/pairwise/pairwise_distances_topk.py +121 -0
- maxframe/learn/metrics/pairwise/rbf_kernel.py +51 -0
- maxframe/learn/metrics/tests/__init__.py +13 -0
- maxframe/learn/metrics/tests/test_scorer.py +26 -0
- maxframe/learn/preprocessing/_data/min_max_scaler.py +34 -23
- maxframe/learn/preprocessing/_data/standard_scaler.py +34 -25
- maxframe/learn/utils/__init__.py +2 -1
- maxframe/learn/utils/checks.py +1 -2
- maxframe/learn/utils/core.py +59 -0
- maxframe/learn/utils/extmath.py +79 -9
- maxframe/learn/utils/odpsio.py +262 -0
- maxframe/learn/utils/validation.py +2 -2
- maxframe/lib/compat.py +40 -0
- maxframe/lib/dtypes_extension/__init__.py +16 -1
- maxframe/lib/dtypes_extension/_fake_arrow_dtype.py +604 -0
- maxframe/lib/dtypes_extension/blob.py +304 -0
- maxframe/lib/dtypes_extension/dtypes.py +40 -0
- maxframe/lib/dtypes_extension/tests/test_blob.py +88 -0
- maxframe/lib/dtypes_extension/tests/test_dtypes.py +16 -1
- maxframe/lib/dtypes_extension/tests/test_fake_arrow_dtype.py +75 -0
- maxframe/lib/filesystem/_oss_lib/common.py +124 -50
- maxframe/lib/filesystem/_oss_lib/glob.py +1 -1
- maxframe/lib/filesystem/_oss_lib/handle.py +21 -25
- maxframe/lib/filesystem/base.py +1 -1
- maxframe/lib/filesystem/core.py +1 -1
- maxframe/lib/filesystem/oss.py +115 -46
- maxframe/lib/filesystem/tests/test_oss.py +74 -36
- maxframe/lib/mmh3.cp37-win32.pyd +0 -0
- maxframe/lib/wrapped_pickle.py +10 -0
- maxframe/opcodes.py +41 -15
- maxframe/protocol.py +12 -0
- maxframe/remote/core.py +4 -0
- maxframe/serialization/__init__.py +11 -2
- maxframe/serialization/arrow.py +38 -13
- maxframe/serialization/blob.py +32 -0
- maxframe/serialization/core.cp37-win32.pyd +0 -0
- maxframe/serialization/core.pyx +39 -1
- maxframe/serialization/exception.py +2 -4
- maxframe/serialization/numpy.py +11 -0
- maxframe/serialization/pandas.py +46 -9
- maxframe/serialization/serializables/core.py +2 -2
- maxframe/serialization/tests/test_serial.py +31 -4
- maxframe/tensor/__init__.py +38 -8
- maxframe/tensor/arithmetic/__init__.py +19 -10
- maxframe/tensor/arithmetic/core.py +2 -2
- maxframe/tensor/arithmetic/iscomplexobj.py +53 -0
- maxframe/tensor/arithmetic/tests/test_arithmetic.py +6 -9
- maxframe/tensor/core.py +6 -2
- maxframe/tensor/datasource/tests/test_datasource.py +2 -1
- maxframe/tensor/extensions/__init__.py +2 -0
- maxframe/tensor/extensions/apply_chunk.py +3 -3
- maxframe/tensor/extensions/rebalance.py +65 -0
- maxframe/tensor/fft/__init__.py +32 -0
- maxframe/tensor/fft/core.py +168 -0
- maxframe/tensor/fft/fft.py +112 -0
- maxframe/tensor/fft/fft2.py +118 -0
- maxframe/tensor/fft/fftfreq.py +80 -0
- maxframe/tensor/fft/fftn.py +123 -0
- maxframe/tensor/fft/fftshift.py +79 -0
- maxframe/tensor/fft/hfft.py +112 -0
- maxframe/tensor/fft/ifft.py +114 -0
- maxframe/tensor/fft/ifft2.py +115 -0
- maxframe/tensor/fft/ifftn.py +123 -0
- maxframe/tensor/fft/ifftshift.py +73 -0
- maxframe/tensor/fft/ihfft.py +93 -0
- maxframe/tensor/fft/irfft.py +118 -0
- maxframe/tensor/fft/irfft2.py +62 -0
- maxframe/tensor/fft/irfftn.py +114 -0
- maxframe/tensor/fft/rfft.py +116 -0
- maxframe/tensor/fft/rfft2.py +63 -0
- maxframe/tensor/fft/rfftfreq.py +87 -0
- maxframe/tensor/fft/rfftn.py +113 -0
- maxframe/tensor/indexing/fill_diagonal.py +1 -7
- maxframe/tensor/linalg/__init__.py +7 -0
- maxframe/tensor/linalg/_einsumfunc.py +1025 -0
- maxframe/tensor/linalg/cholesky.py +117 -0
- maxframe/tensor/linalg/einsum.py +339 -0
- maxframe/tensor/linalg/lstsq.py +100 -0
- maxframe/tensor/linalg/matrix_norm.py +75 -0
- maxframe/tensor/linalg/norm.py +249 -0
- maxframe/tensor/linalg/solve.py +72 -0
- maxframe/tensor/linalg/solve_triangular.py +2 -2
- maxframe/tensor/linalg/vector_norm.py +113 -0
- maxframe/tensor/misc/__init__.py +24 -1
- maxframe/tensor/misc/argwhere.py +72 -0
- maxframe/tensor/misc/array_split.py +46 -0
- maxframe/tensor/misc/broadcast_arrays.py +57 -0
- maxframe/tensor/misc/copyto.py +130 -0
- maxframe/tensor/misc/delete.py +104 -0
- maxframe/tensor/misc/dsplit.py +68 -0
- maxframe/tensor/misc/ediff1d.py +74 -0
- maxframe/tensor/misc/expand_dims.py +85 -0
- maxframe/tensor/misc/flip.py +90 -0
- maxframe/tensor/misc/fliplr.py +64 -0
- maxframe/tensor/misc/flipud.py +68 -0
- maxframe/tensor/misc/hsplit.py +85 -0
- maxframe/tensor/misc/insert.py +139 -0
- maxframe/tensor/misc/moveaxis.py +83 -0
- maxframe/tensor/misc/result_type.py +88 -0
- maxframe/tensor/misc/roll.py +124 -0
- maxframe/tensor/misc/rollaxis.py +77 -0
- maxframe/tensor/misc/shape.py +89 -0
- maxframe/tensor/misc/split.py +190 -0
- maxframe/tensor/misc/tile.py +109 -0
- maxframe/tensor/misc/vsplit.py +74 -0
- maxframe/tensor/reduction/array_equal.py +2 -1
- maxframe/tensor/sort/__init__.py +2 -0
- maxframe/tensor/sort/argpartition.py +98 -0
- maxframe/tensor/sort/partition.py +228 -0
- maxframe/tensor/spatial/__init__.py +15 -0
- maxframe/tensor/spatial/distance/__init__.py +17 -0
- maxframe/tensor/spatial/distance/cdist.py +421 -0
- maxframe/tensor/spatial/distance/pdist.py +398 -0
- maxframe/tensor/spatial/distance/squareform.py +153 -0
- maxframe/tensor/special/__init__.py +159 -21
- maxframe/tensor/special/airy.py +55 -0
- maxframe/tensor/special/bessel.py +199 -0
- maxframe/tensor/special/core.py +65 -4
- maxframe/tensor/special/ellip_func_integrals.py +155 -0
- maxframe/tensor/special/ellip_harm.py +55 -0
- maxframe/tensor/special/err_fresnel.py +223 -0
- maxframe/tensor/special/gamma_funcs.py +303 -0
- maxframe/tensor/special/hypergeometric_funcs.py +69 -0
- maxframe/tensor/special/info_theory.py +189 -0
- maxframe/tensor/special/misc.py +21 -0
- maxframe/tensor/statistics/__init__.py +6 -0
- maxframe/tensor/statistics/corrcoef.py +77 -0
- maxframe/tensor/statistics/cov.py +222 -0
- maxframe/tensor/statistics/digitize.py +126 -0
- maxframe/tensor/statistics/histogram.py +520 -0
- maxframe/tensor/statistics/median.py +85 -0
- maxframe/tensor/statistics/ptp.py +89 -0
- maxframe/tensor/utils.py +3 -3
- maxframe/tests/test_udf.py +61 -0
- maxframe/tests/test_utils.py +51 -6
- maxframe/tests/utils.py +0 -2
- maxframe/typing_.py +2 -0
- maxframe/udf.py +130 -9
- maxframe/utils.py +254 -27
- {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/METADATA +3 -3
- {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/RECORD +442 -264
- maxframe_client/fetcher.py +35 -4
- maxframe_client/session/odps.py +7 -2
- maxframe_client/session/task.py +8 -1
- maxframe_client/tests/test_fetcher.py +76 -3
- maxframe_client/tests/test_session.py +28 -1
- maxframe/dataframe/arrays.py +0 -864
- /maxframe/dataframe/{misc → reshape}/melt.py +0 -0
- /maxframe/dataframe/{misc → reshape}/stack.py +0 -0
- {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/WHEEL +0 -0
- {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/top_level.txt +0 -0
|
@@ -12,15 +12,18 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
-
import
|
|
16
|
-
import json
|
|
15
|
+
import logging
|
|
17
16
|
import os
|
|
17
|
+
from typing import NamedTuple, Optional
|
|
18
|
+
from urllib.parse import parse_qs, urlparse
|
|
18
19
|
|
|
19
20
|
from ....utils import lazy_import
|
|
20
21
|
from ..base import path_type, stringify_path
|
|
21
22
|
|
|
22
23
|
oss2 = lazy_import("oss2", placeholder=True)
|
|
23
24
|
|
|
25
|
+
logger = logging.getLogger(__name__)
|
|
26
|
+
|
|
24
27
|
# OSS api time out
|
|
25
28
|
_oss_time_out = 10
|
|
26
29
|
|
|
@@ -37,6 +40,8 @@ class OSSFileEntry:
|
|
|
37
40
|
self._storage_options = storage_options
|
|
38
41
|
|
|
39
42
|
def is_dir(self):
|
|
43
|
+
if self._path.endswith("/"):
|
|
44
|
+
self._is_dir = True
|
|
40
45
|
if self._is_dir is None:
|
|
41
46
|
self._is_dir = oss_isdir(self._path)
|
|
42
47
|
return self._is_dir
|
|
@@ -63,43 +68,81 @@ class OSSFileEntry:
|
|
|
63
68
|
return self._path
|
|
64
69
|
|
|
65
70
|
|
|
66
|
-
|
|
71
|
+
class ParsedOSSPath(NamedTuple):
|
|
72
|
+
endpoint: str
|
|
73
|
+
bucket: str
|
|
74
|
+
key: str
|
|
75
|
+
access_key_id: Optional[str] = None
|
|
76
|
+
access_key_secret: Optional[str] = None
|
|
77
|
+
security_token: Optional[str] = None
|
|
78
|
+
scheme: str = None
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def parse_osspath(path: path_type, check_errors: bool = True) -> ParsedOSSPath:
|
|
67
82
|
# Extract OSS configuration from the encoded URL.
|
|
68
83
|
str_path = stringify_path(path)
|
|
69
|
-
parse_result =
|
|
70
|
-
if parse_result.scheme != "oss":
|
|
84
|
+
parse_result = urlparse(str_path)
|
|
85
|
+
if check_errors and parse_result.scheme != "oss":
|
|
71
86
|
raise ValueError(
|
|
72
87
|
f"Except scheme oss, but got scheme: {parse_result.scheme}"
|
|
73
88
|
f" in path: {str_path}"
|
|
74
89
|
)
|
|
75
|
-
|
|
76
|
-
if not (parse_result.username and parse_result.password):
|
|
77
|
-
raise RuntimeError(r"Please use build_oss_path to add OSS info")
|
|
78
|
-
param_dict = url_to_dict(parse_result.username)
|
|
79
|
-
access_key_id = param_dict["access_key_id"]
|
|
90
|
+
access_key_id = parse_result.username
|
|
80
91
|
access_key_secret = parse_result.password
|
|
81
|
-
|
|
92
|
+
|
|
93
|
+
if not parse_result.query:
|
|
94
|
+
sts_token = None
|
|
95
|
+
else:
|
|
96
|
+
sts_token = parse_qs(parse_result.query).get("security_token", [None])[0]
|
|
97
|
+
|
|
98
|
+
if check_errors and not (access_key_id and access_key_secret):
|
|
99
|
+
raise ValueError(r"No credentials provided")
|
|
100
|
+
|
|
82
101
|
key = parse_result.path
|
|
83
102
|
key = key[1:] if key.startswith("/") else key
|
|
84
|
-
|
|
103
|
+
if "/" not in key:
|
|
104
|
+
bucket, key = key, None
|
|
105
|
+
if check_errors:
|
|
106
|
+
raise ValueError("Need to use format bucket/key to separate bucket and key")
|
|
107
|
+
else:
|
|
108
|
+
bucket, key = key.split("/", 1)
|
|
109
|
+
|
|
110
|
+
endpoint = parse_result.hostname
|
|
111
|
+
if endpoint and parse_result.port:
|
|
112
|
+
endpoint += f":{parse_result.port}"
|
|
113
|
+
return ParsedOSSPath(
|
|
114
|
+
endpoint,
|
|
115
|
+
bucket,
|
|
116
|
+
key,
|
|
117
|
+
access_key_id,
|
|
118
|
+
access_key_secret,
|
|
119
|
+
sts_token,
|
|
120
|
+
parse_result.scheme,
|
|
121
|
+
)
|
|
85
122
|
|
|
86
123
|
|
|
87
|
-
def
|
|
124
|
+
def get_oss_bucket(parsed_path: ParsedOSSPath):
|
|
125
|
+
if parsed_path.security_token is not None:
|
|
126
|
+
auth = oss2.StsAuth(
|
|
127
|
+
parsed_path.access_key_id,
|
|
128
|
+
parsed_path.access_key_secret,
|
|
129
|
+
parsed_path.security_token,
|
|
130
|
+
)
|
|
131
|
+
else:
|
|
132
|
+
auth = oss2.Auth(parsed_path.access_key_id, parsed_path.access_key_secret)
|
|
88
133
|
oss_bucket = oss2.Bucket(
|
|
89
|
-
auth=
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
endpoint=end_point,
|
|
93
|
-
bucket_name=bucket,
|
|
134
|
+
auth=auth,
|
|
135
|
+
endpoint=parsed_path.endpoint,
|
|
136
|
+
bucket_name=parsed_path.bucket,
|
|
94
137
|
connect_timeout=_oss_time_out,
|
|
95
138
|
)
|
|
96
139
|
return oss_bucket
|
|
97
140
|
|
|
98
141
|
|
|
99
142
|
def oss_exists(path: path_type):
|
|
100
|
-
|
|
101
|
-
oss_bucket =
|
|
102
|
-
return oss_bucket.object_exists(key) or oss_isdir(path)
|
|
143
|
+
parsed_path = parse_osspath(path)
|
|
144
|
+
oss_bucket = get_oss_bucket(parsed_path)
|
|
145
|
+
return oss_bucket.object_exists(parsed_path.key) or oss_isdir(path)
|
|
103
146
|
|
|
104
147
|
|
|
105
148
|
def oss_isdir(path: path_type):
|
|
@@ -112,26 +155,71 @@ def oss_isdir(path: path_type):
|
|
|
112
155
|
dirname = stringify_path(path)
|
|
113
156
|
if not dirname.endswith("/"):
|
|
114
157
|
dirname = dirname + "/"
|
|
115
|
-
|
|
116
|
-
|
|
158
|
+
logger.info("Checking isdir for path %s", dirname)
|
|
159
|
+
parsed_path = parse_osspath(dirname)
|
|
160
|
+
oss_bucket = get_oss_bucket(parsed_path)
|
|
117
161
|
isdir = False
|
|
118
|
-
for obj in oss2.ObjectIteratorV2(oss_bucket, prefix=key, max_keys=2):
|
|
119
|
-
if obj.key == key:
|
|
162
|
+
for obj in oss2.ObjectIteratorV2(oss_bucket, prefix=parsed_path.key, max_keys=2):
|
|
163
|
+
if obj.key == parsed_path.key:
|
|
120
164
|
continue
|
|
121
165
|
isdir = True
|
|
122
166
|
break
|
|
123
167
|
return isdir
|
|
124
168
|
|
|
125
169
|
|
|
170
|
+
def oss_delete(path: path_type):
|
|
171
|
+
"""
|
|
172
|
+
Perform both key deletion and prefix deletion. Once no files
|
|
173
|
+
deleted in both scenarios, we can make assertion that the file
|
|
174
|
+
does not exist.
|
|
175
|
+
"""
|
|
176
|
+
parsed_path = parse_osspath(path)
|
|
177
|
+
oss_bucket = get_oss_bucket(parsed_path)
|
|
178
|
+
|
|
179
|
+
try:
|
|
180
|
+
oss_bucket.delete_object(parsed_path.key)
|
|
181
|
+
return
|
|
182
|
+
except oss2.exceptions.NoSuchKey:
|
|
183
|
+
pass
|
|
184
|
+
|
|
185
|
+
is_missing = True
|
|
186
|
+
dir_key = parsed_path.key.rstrip("/") + "/"
|
|
187
|
+
for obj in oss2.ObjectIteratorV2(oss_bucket, prefix=dir_key):
|
|
188
|
+
try:
|
|
189
|
+
oss_bucket.delete_object(obj.key)
|
|
190
|
+
is_missing = False
|
|
191
|
+
except oss2.exceptions.NoSuchKey:
|
|
192
|
+
pass
|
|
193
|
+
if is_missing:
|
|
194
|
+
raise FileNotFoundError("No such file or directory: %s", path)
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
def oss_copy_file(src_path: path_type, dest_path: path_type):
|
|
198
|
+
# todo implements copy of huge files
|
|
199
|
+
parsed_src_path = parse_osspath(src_path)
|
|
200
|
+
parsed_dest_path = parse_osspath(dest_path)
|
|
201
|
+
try:
|
|
202
|
+
if oss_isdir(src_path):
|
|
203
|
+
raise NotImplementedError("Copying directories not implemented yet")
|
|
204
|
+
except:
|
|
205
|
+
# fixme currently we cannot handle error with iterating files with STS token
|
|
206
|
+
logger.exception("Failed to judge if src is a directory")
|
|
207
|
+
|
|
208
|
+
oss_bucket = get_oss_bucket(parsed_dest_path)
|
|
209
|
+
oss_bucket.copy_object(
|
|
210
|
+
parsed_src_path.bucket, parsed_src_path.key, parsed_dest_path.key
|
|
211
|
+
)
|
|
212
|
+
|
|
213
|
+
|
|
126
214
|
def oss_stat(path: path_type):
|
|
127
215
|
path = stringify_path(path)
|
|
128
|
-
|
|
129
|
-
oss_bucket =
|
|
216
|
+
parsed_path = parse_osspath(path)
|
|
217
|
+
oss_bucket = get_oss_bucket(parsed_path)
|
|
130
218
|
if oss_isdir(path):
|
|
131
219
|
stat = dict(name=path, size=0, modified_time=-1)
|
|
132
220
|
stat["type"] = "directory"
|
|
133
221
|
else:
|
|
134
|
-
meta = oss_bucket.get_object_meta(key)
|
|
222
|
+
meta = oss_bucket.get_object_meta(parsed_path.key)
|
|
135
223
|
stat = dict(
|
|
136
224
|
name=path,
|
|
137
225
|
size=int(meta.headers["Content-Length"]),
|
|
@@ -145,11 +233,11 @@ def oss_scandir(dirname: path_type):
|
|
|
145
233
|
dirname = stringify_path(dirname)
|
|
146
234
|
if not dirname.endswith("/"):
|
|
147
235
|
dirname = dirname + "/"
|
|
148
|
-
|
|
149
|
-
oss_bucket =
|
|
236
|
+
parsed_path = parse_osspath(dirname)
|
|
237
|
+
oss_bucket = get_oss_bucket(parsed_path)
|
|
150
238
|
dirname_set = set()
|
|
151
|
-
for obj in oss2.ObjectIteratorV2(oss_bucket, prefix=key):
|
|
152
|
-
rel_path = obj.key[len(key) :]
|
|
239
|
+
for obj in oss2.ObjectIteratorV2(oss_bucket, prefix=parsed_path.key):
|
|
240
|
+
rel_path = obj.key[len(parsed_path.key) :]
|
|
153
241
|
try:
|
|
154
242
|
inside_dirname, inside_filename = rel_path.split("/", 1)
|
|
155
243
|
except ValueError:
|
|
@@ -160,11 +248,11 @@ def oss_scandir(dirname: path_type):
|
|
|
160
248
|
continue
|
|
161
249
|
dirname_set.add(inside_dirname)
|
|
162
250
|
yield OSSFileEntry(
|
|
163
|
-
|
|
251
|
+
"/".join([dirname, inside_dirname]),
|
|
164
252
|
is_dir=True,
|
|
165
253
|
is_file=False,
|
|
166
254
|
stat={
|
|
167
|
-
"name":
|
|
255
|
+
"name": "/".join([dirname, inside_dirname]),
|
|
168
256
|
"type": "directory",
|
|
169
257
|
"size": 0,
|
|
170
258
|
"modified_time": -1,
|
|
@@ -172,27 +260,13 @@ def oss_scandir(dirname: path_type):
|
|
|
172
260
|
)
|
|
173
261
|
else:
|
|
174
262
|
yield OSSFileEntry(
|
|
175
|
-
|
|
263
|
+
"/".join([dirname, inside_filename]),
|
|
176
264
|
is_dir=False,
|
|
177
265
|
is_file=True,
|
|
178
266
|
stat={
|
|
179
|
-
"name":
|
|
267
|
+
"name": "/".join([dirname, inside_filename]),
|
|
180
268
|
"type": "file",
|
|
181
269
|
"size": obj.size,
|
|
182
270
|
"modified_time": obj.last_modified,
|
|
183
271
|
},
|
|
184
272
|
)
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
def dict_to_url(param: dict):
|
|
188
|
-
# Encode the dictionary with url-safe-base64.
|
|
189
|
-
str_param = json.dumps(param)
|
|
190
|
-
url_param = base64.urlsafe_b64encode(bytes(str_param, encoding="utf8"))
|
|
191
|
-
return bytes.decode(url_param, encoding="utf8")
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
def url_to_dict(url_param: str):
|
|
195
|
-
# Decode url-safe-base64 encoded string.
|
|
196
|
-
bytes_param = bytes(url_param, encoding="utf8")
|
|
197
|
-
str_param = bytes.decode(base64.urlsafe_b64decode(bytes_param), encoding="utf8")
|
|
198
|
-
return json.loads(str_param)
|
|
@@ -15,7 +15,7 @@
|
|
|
15
15
|
from io import IOBase
|
|
16
16
|
|
|
17
17
|
from ....utils import lazy_import
|
|
18
|
-
from .common import oss_stat, parse_osspath
|
|
18
|
+
from .common import get_oss_bucket, oss_stat, parse_osspath
|
|
19
19
|
|
|
20
20
|
oss2 = lazy_import("oss2", placeholder=True)
|
|
21
21
|
|
|
@@ -23,20 +23,20 @@ oss2 = lazy_import("oss2", placeholder=True)
|
|
|
23
23
|
class OSSIOBase(IOBase):
|
|
24
24
|
def __init__(self, path, mode):
|
|
25
25
|
self._path = path
|
|
26
|
-
(
|
|
27
|
-
|
|
28
|
-
self._key_name,
|
|
29
|
-
self._access_key_id,
|
|
30
|
-
self._access_key_secret,
|
|
31
|
-
self._end_point,
|
|
32
|
-
) = parse_osspath(self._path)
|
|
33
|
-
self._bucket = self._get_bucket()
|
|
26
|
+
self._parsed_path = parse_osspath(self._path)
|
|
27
|
+
self._bucket = get_oss_bucket(self._parsed_path)
|
|
34
28
|
self._current_pos = 0
|
|
35
29
|
self._size = None
|
|
36
30
|
self._buffer = b""
|
|
37
31
|
self._buffer_size = 1 * 1024
|
|
38
32
|
self._mode = mode
|
|
39
33
|
|
|
34
|
+
if mode and mode.startswith("w"):
|
|
35
|
+
try:
|
|
36
|
+
self._bucket.delete_object(self._parsed_path.key)
|
|
37
|
+
except oss2.exceptions.NoSuchKey:
|
|
38
|
+
pass
|
|
39
|
+
|
|
40
40
|
@property
|
|
41
41
|
def mode(self):
|
|
42
42
|
return self._mode
|
|
@@ -44,16 +44,6 @@ class OSSIOBase(IOBase):
|
|
|
44
44
|
def fileno(self) -> int:
|
|
45
45
|
raise AttributeError
|
|
46
46
|
|
|
47
|
-
def _get_bucket(self):
|
|
48
|
-
return oss2.Bucket(
|
|
49
|
-
auth=oss2.Auth(
|
|
50
|
-
access_key_id=self._access_key_id,
|
|
51
|
-
access_key_secret=self._access_key_secret,
|
|
52
|
-
),
|
|
53
|
-
endpoint=self._end_point,
|
|
54
|
-
bucket_name=self._bucket_name,
|
|
55
|
-
)
|
|
56
|
-
|
|
57
47
|
def _get_size(self):
|
|
58
48
|
if self._size is None:
|
|
59
49
|
self._size = int(oss_stat(self._path)["size"])
|
|
@@ -79,7 +69,7 @@ class OSSIOBase(IOBase):
|
|
|
79
69
|
return self._current_pos
|
|
80
70
|
|
|
81
71
|
def seekable(self):
|
|
82
|
-
return
|
|
72
|
+
return "r" in self._mode
|
|
83
73
|
|
|
84
74
|
def read(self, size=-1):
|
|
85
75
|
"""
|
|
@@ -97,12 +87,12 @@ class OSSIOBase(IOBase):
|
|
|
97
87
|
return b""
|
|
98
88
|
elif size < 0:
|
|
99
89
|
obj = self._bucket.get_object(
|
|
100
|
-
self.
|
|
90
|
+
self._parsed_path.key, byte_range=(self._current_pos, None)
|
|
101
91
|
)
|
|
102
92
|
self._current_pos = self._get_size()
|
|
103
93
|
else:
|
|
104
94
|
obj = self._bucket.get_object(
|
|
105
|
-
self.
|
|
95
|
+
self._parsed_path.key,
|
|
106
96
|
byte_range=(self._current_pos, self._current_pos + size - 1),
|
|
107
97
|
)
|
|
108
98
|
self._current_pos = self._current_pos + size
|
|
@@ -117,7 +107,7 @@ class OSSIOBase(IOBase):
|
|
|
117
107
|
self._get_size() - 1, self._current_pos + self._buffer_size - 1
|
|
118
108
|
)
|
|
119
109
|
buffer = self._bucket.get_object(
|
|
120
|
-
self.
|
|
110
|
+
self._parsed_path.key, byte_range=(self._current_pos, read_to)
|
|
121
111
|
).read()
|
|
122
112
|
if not buffer:
|
|
123
113
|
return 1
|
|
@@ -145,11 +135,17 @@ class OSSIOBase(IOBase):
|
|
|
145
135
|
break
|
|
146
136
|
return bytes(res)
|
|
147
137
|
|
|
138
|
+
def write(self, block):
|
|
139
|
+
append_result = self._bucket.append_object(
|
|
140
|
+
self._parsed_path.key, self._current_pos, block
|
|
141
|
+
)
|
|
142
|
+
self._current_pos = append_result.next_position
|
|
143
|
+
|
|
148
144
|
def readable(self):
|
|
149
|
-
return
|
|
145
|
+
return "r" in self._mode
|
|
150
146
|
|
|
151
147
|
def writable(self):
|
|
152
|
-
return
|
|
148
|
+
return "w" in self._mode or "a" in self._mode
|
|
153
149
|
|
|
154
150
|
def close(self):
|
|
155
151
|
# already closed by oss
|
maxframe/lib/filesystem/base.py
CHANGED
|
@@ -247,7 +247,7 @@ class FileSystem(ABC):
|
|
|
247
247
|
def parse_from_path(uri: str):
|
|
248
248
|
parsed_uri = urlparse(uri)
|
|
249
249
|
options = dict()
|
|
250
|
-
options["host"] = parsed_uri.
|
|
250
|
+
options["host"] = parsed_uri.hostname
|
|
251
251
|
if parsed_uri.port:
|
|
252
252
|
options["port"] = parsed_uri.port
|
|
253
253
|
if parsed_uri.username:
|
maxframe/lib/filesystem/core.py
CHANGED
|
@@ -49,7 +49,7 @@ def get_fs(path: path_type, storage_options: Dict = None) -> FileSystem:
|
|
|
49
49
|
|
|
50
50
|
if scheme in _filesystems:
|
|
51
51
|
file_system_type = _filesystems[scheme]
|
|
52
|
-
if scheme == "file"
|
|
52
|
+
if scheme == "file":
|
|
53
53
|
# local file systems are singletons.
|
|
54
54
|
return file_system_type.get_instance()
|
|
55
55
|
else:
|
maxframe/lib/filesystem/oss.py
CHANGED
|
@@ -12,8 +12,10 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
-
|
|
16
|
-
|
|
15
|
+
import enum
|
|
16
|
+
import re
|
|
17
|
+
from typing import Dict, Iterator, List, Tuple, Union
|
|
18
|
+
from urllib.parse import urlencode
|
|
17
19
|
|
|
18
20
|
from ...utils import implements, lazy_import
|
|
19
21
|
from ._oss_lib import common as oc
|
|
@@ -22,18 +24,43 @@ from ._oss_lib.handle import OSSIOBase
|
|
|
22
24
|
from .base import FileSystem, path_type
|
|
23
25
|
|
|
24
26
|
oss2 = lazy_import("oss2", placeholder=True)
|
|
27
|
+
_ip_regex = re.compile(r"^([0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3})")
|
|
25
28
|
|
|
26
29
|
_oss_time_out = 10
|
|
27
30
|
|
|
28
31
|
|
|
29
|
-
class
|
|
30
|
-
|
|
32
|
+
class HostEnforceType(enum.Enum):
|
|
33
|
+
no_enforce = 0
|
|
34
|
+
force_internal = 1
|
|
35
|
+
force_external = 2
|
|
36
|
+
|
|
31
37
|
|
|
32
|
-
|
|
33
|
-
def
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
38
|
+
class OSSFileSystem(FileSystem):
|
|
39
|
+
def __init__(
|
|
40
|
+
self,
|
|
41
|
+
access_key_id: str = None,
|
|
42
|
+
access_key_secret: str = None,
|
|
43
|
+
security_token: str = None,
|
|
44
|
+
host_enforce_type: Union[HostEnforceType, str] = HostEnforceType.no_enforce,
|
|
45
|
+
**kw,
|
|
46
|
+
):
|
|
47
|
+
self._access_key_id = access_key_id or kw.get("user")
|
|
48
|
+
self._access_key_secret = access_key_secret or kw.get("password")
|
|
49
|
+
self._security_token = security_token
|
|
50
|
+
self._host_enforce_type = (
|
|
51
|
+
host_enforce_type
|
|
52
|
+
if isinstance(host_enforce_type, HostEnforceType)
|
|
53
|
+
else getattr(HostEnforceType, host_enforce_type)
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
def _rewrite_path(self, path: str) -> str:
|
|
57
|
+
return build_oss_path(
|
|
58
|
+
path,
|
|
59
|
+
access_key_id=self._access_key_id,
|
|
60
|
+
access_key_secret=self._access_key_secret,
|
|
61
|
+
security_token=self._security_token,
|
|
62
|
+
host_enforce_type=self._host_enforce_type,
|
|
63
|
+
)
|
|
37
64
|
|
|
38
65
|
@implements(FileSystem.cat)
|
|
39
66
|
def cat(self, path: path_type):
|
|
@@ -46,39 +73,37 @@ class OSSFileSystem(FileSystem):
|
|
|
46
73
|
if not file_entry.is_dir():
|
|
47
74
|
raise OSError("ls for file is not supported")
|
|
48
75
|
else:
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
)
|
|
52
|
-
oss_bucket = oss2.Bucket(
|
|
53
|
-
auth=oss2.Auth(
|
|
54
|
-
access_key_id=access_key_id, access_key_secret=access_key_secret
|
|
55
|
-
),
|
|
56
|
-
endpoint=end_point,
|
|
57
|
-
bucket_name=bucket,
|
|
58
|
-
connect_timeout=_oss_time_out,
|
|
59
|
-
)
|
|
60
|
-
for obj in oss2.ObjectIteratorV2(oss_bucket, prefix=key):
|
|
76
|
+
parsed_path = oc.parse_osspath(path)
|
|
77
|
+
oss_bucket = oc.get_oss_bucket(parsed_path)
|
|
78
|
+
for obj in oss2.ObjectIteratorV2(oss_bucket, prefix=parsed_path.key):
|
|
61
79
|
if obj.key.endswith("/"):
|
|
62
80
|
continue
|
|
63
|
-
obj_path = rf"
|
|
81
|
+
obj_path = rf"{parsed_path.bucket}/{obj.key}"
|
|
64
82
|
file_list.append(
|
|
65
83
|
build_oss_path(
|
|
66
|
-
obj_path,
|
|
84
|
+
obj_path,
|
|
85
|
+
parsed_path.endpoint,
|
|
86
|
+
parsed_path.access_key_id,
|
|
87
|
+
parsed_path.access_key_secret,
|
|
88
|
+
parsed_path.security_token,
|
|
67
89
|
)
|
|
68
90
|
)
|
|
69
91
|
return file_list
|
|
70
92
|
|
|
71
93
|
@implements(FileSystem.delete)
|
|
72
94
|
def delete(self, path: path_type, recursive: bool = False):
|
|
73
|
-
|
|
95
|
+
return oc.oss_delete(self._rewrite_path(path))
|
|
74
96
|
|
|
75
97
|
@implements(FileSystem.rename)
|
|
76
98
|
def rename(self, path: path_type, new_path: path_type):
|
|
77
|
-
|
|
99
|
+
# in OSS, you need to move file by copy and delete
|
|
100
|
+
path = self._rewrite_path(path)
|
|
101
|
+
oc.oss_copy_file(path, self._rewrite_path(new_path))
|
|
102
|
+
oc.oss_delete(path)
|
|
78
103
|
|
|
79
104
|
@implements(FileSystem.stat)
|
|
80
105
|
def stat(self, path: path_type) -> Dict:
|
|
81
|
-
ofe = oc.OSSFileEntry(path)
|
|
106
|
+
ofe = oc.OSSFileEntry(self._rewrite_path(path))
|
|
82
107
|
return ofe.stat()
|
|
83
108
|
|
|
84
109
|
@implements(FileSystem.mkdir)
|
|
@@ -87,12 +112,12 @@ class OSSFileSystem(FileSystem):
|
|
|
87
112
|
|
|
88
113
|
@implements(FileSystem.isdir)
|
|
89
114
|
def isdir(self, path: path_type) -> bool:
|
|
90
|
-
file_entry = oc.OSSFileEntry(path)
|
|
115
|
+
file_entry = oc.OSSFileEntry(self._rewrite_path(path))
|
|
91
116
|
return file_entry.is_dir()
|
|
92
117
|
|
|
93
118
|
@implements(FileSystem.isfile)
|
|
94
119
|
def isfile(self, path: path_type) -> bool:
|
|
95
|
-
file_entry = oc.OSSFileEntry(path)
|
|
120
|
+
file_entry = oc.OSSFileEntry(self._rewrite_path(path))
|
|
96
121
|
return file_entry.is_file()
|
|
97
122
|
|
|
98
123
|
@implements(FileSystem._isfilestore)
|
|
@@ -101,11 +126,11 @@ class OSSFileSystem(FileSystem):
|
|
|
101
126
|
|
|
102
127
|
@implements(FileSystem.exists)
|
|
103
128
|
def exists(self, path: path_type):
|
|
104
|
-
return oc.oss_exists(path)
|
|
129
|
+
return oc.oss_exists(self._rewrite_path(path))
|
|
105
130
|
|
|
106
131
|
@implements(FileSystem.open)
|
|
107
132
|
def open(self, path: path_type, mode: str = "rb") -> OSSIOBase:
|
|
108
|
-
file_handle = OSSIOBase(path, mode)
|
|
133
|
+
file_handle = OSSIOBase(self._rewrite_path(path), mode)
|
|
109
134
|
return file_handle
|
|
110
135
|
|
|
111
136
|
@implements(FileSystem.walk)
|
|
@@ -114,10 +139,37 @@ class OSSFileSystem(FileSystem):
|
|
|
114
139
|
|
|
115
140
|
@implements(FileSystem.glob)
|
|
116
141
|
def glob(self, path: path_type, recursive: bool = False) -> List[path_type]:
|
|
117
|
-
return glob(path, recursive=recursive)
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
def
|
|
142
|
+
return glob(self._rewrite_path(path), recursive=recursive)
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def _rewrite_internal_endpoint(
|
|
146
|
+
endpoint: str, host_enforce_type: HostEnforceType = HostEnforceType.no_enforce
|
|
147
|
+
) -> str:
|
|
148
|
+
if (
|
|
149
|
+
not endpoint
|
|
150
|
+
or host_enforce_type == HostEnforceType.no_enforce
|
|
151
|
+
or _ip_regex.match(endpoint)
|
|
152
|
+
):
|
|
153
|
+
return endpoint
|
|
154
|
+
|
|
155
|
+
ep_first, ep_rest = endpoint.split(".", 1)
|
|
156
|
+
host_with_internal = ep_first.endswith("-internal")
|
|
157
|
+
if host_enforce_type == HostEnforceType.force_external and host_with_internal:
|
|
158
|
+
return ep_first.replace("-internal", "") + "." + ep_rest
|
|
159
|
+
elif host_enforce_type == HostEnforceType.force_internal and not host_with_internal:
|
|
160
|
+
return ep_first + "-internal." + ep_rest
|
|
161
|
+
else:
|
|
162
|
+
return endpoint
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
def build_oss_path(
|
|
166
|
+
path: path_type,
|
|
167
|
+
endpoint: str = None,
|
|
168
|
+
access_key_id: str = None,
|
|
169
|
+
access_key_secret: str = None,
|
|
170
|
+
security_token: str = None,
|
|
171
|
+
host_enforce_type: HostEnforceType = HostEnforceType.no_enforce,
|
|
172
|
+
):
|
|
121
173
|
"""
|
|
122
174
|
Returns a path with oss info.
|
|
123
175
|
Used to register the access_key_id, access_key_secret and
|
|
@@ -127,16 +179,19 @@ def build_oss_path(path: path_type, access_key_id, access_key_secret, end_point)
|
|
|
127
179
|
Parameters
|
|
128
180
|
----------
|
|
129
181
|
path : path_type
|
|
130
|
-
The original
|
|
182
|
+
The original OSS url.
|
|
183
|
+
|
|
184
|
+
endpoint : str
|
|
185
|
+
The endpoint of OSS.
|
|
131
186
|
|
|
132
187
|
access_key_id : str
|
|
133
|
-
The access key id of
|
|
188
|
+
The access key id of OSS.
|
|
134
189
|
|
|
135
190
|
access_key_secret : str
|
|
136
|
-
The access key secret of
|
|
191
|
+
The access key secret of OSS.
|
|
137
192
|
|
|
138
|
-
|
|
139
|
-
The
|
|
193
|
+
security_token : str
|
|
194
|
+
The security token of OSS.
|
|
140
195
|
|
|
141
196
|
Returns
|
|
142
197
|
-------
|
|
@@ -146,12 +201,26 @@ def build_oss_path(path: path_type, access_key_id, access_key_secret, end_point)
|
|
|
146
201
|
"""
|
|
147
202
|
if isinstance(path, (list, tuple)):
|
|
148
203
|
path = path[0]
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
204
|
+
parse_result = oc.parse_osspath(path, check_errors=False)
|
|
205
|
+
access_key_id = parse_result.access_key_id or access_key_id
|
|
206
|
+
access_key_secret = parse_result.access_key_secret or access_key_secret
|
|
207
|
+
security_token = parse_result.security_token or security_token
|
|
208
|
+
|
|
209
|
+
scheme = parse_result.scheme or "oss"
|
|
210
|
+
endpoint = _rewrite_internal_endpoint(
|
|
211
|
+
parse_result.endpoint or endpoint, host_enforce_type
|
|
156
212
|
)
|
|
213
|
+
|
|
214
|
+
if access_key_id and access_key_secret:
|
|
215
|
+
creds = f"{access_key_id}:{access_key_secret}@"
|
|
216
|
+
else:
|
|
217
|
+
creds = ""
|
|
218
|
+
|
|
219
|
+
new_path = f"{scheme}://{creds}{endpoint}/{parse_result.bucket}"
|
|
220
|
+
if parse_result.key:
|
|
221
|
+
new_path += f"/{parse_result.key}"
|
|
222
|
+
if security_token:
|
|
223
|
+
new_path += f"?{urlencode(dict(security_token=security_token))}"
|
|
224
|
+
# reparse to check errors
|
|
225
|
+
oc.parse_osspath(new_path)
|
|
157
226
|
return new_path
|