maxframe 2.0.0b2__cp37-cp37m-win_amd64.whl → 2.2.0__cp37-cp37m-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of maxframe might be problematic. Click here for more details.
- maxframe/__init__.py +1 -0
- maxframe/_utils.cp37-win_amd64.pyd +0 -0
- maxframe/_utils.pyx +14 -1
- maxframe/codegen/core.py +6 -6
- maxframe/codegen/spe/core.py +1 -1
- maxframe/codegen/spe/dataframe/__init__.py +1 -0
- maxframe/codegen/spe/dataframe/accessors/base.py +18 -0
- maxframe/codegen/spe/dataframe/accessors/dict_.py +25 -130
- maxframe/codegen/spe/dataframe/accessors/list_.py +12 -48
- maxframe/codegen/spe/dataframe/accessors/struct_.py +28 -0
- maxframe/codegen/spe/dataframe/arithmetic.py +7 -2
- maxframe/codegen/spe/dataframe/groupby.py +88 -0
- maxframe/codegen/spe/dataframe/indexing.py +99 -4
- maxframe/codegen/spe/dataframe/merge.py +34 -1
- maxframe/codegen/spe/dataframe/misc.py +9 -33
- maxframe/codegen/spe/dataframe/reduction.py +14 -9
- maxframe/codegen/spe/dataframe/reshape.py +46 -0
- maxframe/codegen/spe/dataframe/sort.py +30 -17
- maxframe/codegen/spe/dataframe/tests/accessors/test_dict.py +9 -15
- maxframe/codegen/spe/dataframe/tests/accessors/test_list.py +4 -7
- maxframe/codegen/spe/dataframe/tests/accessors/test_struct.py +75 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_iloc.py +20 -1
- maxframe/codegen/spe/dataframe/tests/indexing/test_loc.py +35 -0
- maxframe/codegen/spe/dataframe/tests/misc/test_misc.py +0 -32
- maxframe/codegen/spe/dataframe/tests/test_groupby.py +81 -18
- maxframe/codegen/spe/dataframe/tests/test_merge.py +27 -1
- maxframe/codegen/spe/dataframe/tests/test_reshape.py +79 -0
- maxframe/codegen/spe/dataframe/tests/test_sort.py +20 -0
- maxframe/codegen/spe/learn/contrib/tests/test_xgboost.py +2 -1
- maxframe/codegen/spe/learn/metrics/__init__.py +1 -1
- maxframe/codegen/spe/learn/metrics/_ranking.py +76 -0
- maxframe/codegen/spe/learn/metrics/pairwise.py +51 -0
- maxframe/codegen/spe/learn/metrics/tests/test_pairwise.py +36 -0
- maxframe/codegen/spe/learn/metrics/tests/test_ranking.py +59 -0
- maxframe/codegen/spe/tensor/__init__.py +3 -0
- maxframe/codegen/spe/tensor/fft.py +74 -0
- maxframe/codegen/spe/tensor/linalg.py +29 -2
- maxframe/codegen/spe/tensor/misc.py +79 -25
- maxframe/codegen/spe/tensor/spatial.py +45 -0
- maxframe/codegen/spe/tensor/statistics.py +44 -0
- maxframe/codegen/spe/tensor/tests/test_fft.py +64 -0
- maxframe/codegen/spe/tensor/tests/test_linalg.py +15 -1
- maxframe/codegen/spe/tensor/tests/test_misc.py +52 -2
- maxframe/codegen/spe/tensor/tests/test_spatial.py +33 -0
- maxframe/codegen/spe/tensor/tests/test_statistics.py +15 -1
- maxframe/codegen/spe/tests/test_spe_codegen.py +6 -12
- maxframe/codegen/spe/utils.py +2 -0
- maxframe/config/config.py +70 -9
- maxframe/config/tests/test_validators.py +13 -1
- maxframe/config/validators.py +49 -0
- maxframe/conftest.py +44 -17
- maxframe/core/accessor.py +2 -2
- maxframe/core/entity/core.py +5 -0
- maxframe/core/entity/tileables.py +1 -1
- maxframe/core/graph/core.cp37-win_amd64.pyd +0 -0
- maxframe/core/graph/entity.py +1 -2
- maxframe/core/operator/base.py +9 -2
- maxframe/core/operator/core.py +10 -2
- maxframe/core/operator/utils.py +13 -0
- maxframe/dataframe/__init__.py +10 -3
- maxframe/dataframe/accessors/__init__.py +1 -1
- maxframe/dataframe/accessors/compat.py +45 -0
- maxframe/dataframe/accessors/datetime_/__init__.py +4 -1
- maxframe/dataframe/accessors/dict_/contains.py +7 -16
- maxframe/dataframe/accessors/dict_/core.py +48 -0
- maxframe/dataframe/accessors/dict_/getitem.py +17 -21
- maxframe/dataframe/accessors/dict_/length.py +7 -16
- maxframe/dataframe/accessors/dict_/remove.py +6 -18
- maxframe/dataframe/accessors/dict_/setitem.py +8 -18
- maxframe/dataframe/accessors/dict_/tests/test_dict_accessor.py +62 -22
- maxframe/dataframe/accessors/list_/__init__.py +2 -2
- maxframe/dataframe/accessors/list_/core.py +48 -0
- maxframe/dataframe/accessors/list_/getitem.py +12 -19
- maxframe/dataframe/accessors/list_/length.py +7 -16
- maxframe/dataframe/accessors/list_/tests/test_list_accessor.py +11 -9
- maxframe/dataframe/accessors/string_/__init__.py +4 -1
- maxframe/dataframe/accessors/struct_/__init__.py +37 -0
- maxframe/dataframe/accessors/struct_/accessor.py +39 -0
- maxframe/dataframe/accessors/struct_/core.py +43 -0
- maxframe/dataframe/accessors/struct_/dtypes.py +53 -0
- maxframe/dataframe/accessors/struct_/field.py +123 -0
- maxframe/dataframe/accessors/struct_/tests/__init__.py +13 -0
- maxframe/dataframe/accessors/struct_/tests/test_struct_accessor.py +91 -0
- maxframe/dataframe/arithmetic/__init__.py +14 -4
- maxframe/dataframe/arithmetic/between.py +106 -0
- maxframe/dataframe/arithmetic/dot.py +237 -0
- maxframe/dataframe/arithmetic/{around.py → round.py} +11 -7
- maxframe/dataframe/core.py +63 -118
- maxframe/dataframe/datasource/__init__.py +18 -0
- maxframe/dataframe/datasource/from_dict.py +124 -0
- maxframe/dataframe/datasource/from_index.py +1 -1
- maxframe/dataframe/datasource/from_records.py +77 -0
- maxframe/dataframe/datasource/from_tensor.py +109 -41
- maxframe/dataframe/datasource/read_csv.py +2 -3
- maxframe/dataframe/datasource/tests/test_datasource.py +37 -0
- maxframe/dataframe/datastore/__init__.py +5 -1
- maxframe/dataframe/datastore/to_csv.py +29 -41
- maxframe/dataframe/datastore/to_odps.py +30 -4
- maxframe/dataframe/extensions/__init__.py +20 -4
- maxframe/dataframe/extensions/apply_chunk.py +32 -6
- maxframe/dataframe/extensions/cartesian_chunk.py +153 -0
- maxframe/dataframe/extensions/collect_kv.py +126 -0
- maxframe/dataframe/extensions/extract_kv.py +177 -0
- maxframe/dataframe/extensions/map_reduce.py +263 -0
- maxframe/dataframe/extensions/rebalance.py +62 -0
- maxframe/dataframe/extensions/tests/test_apply_chunk.py +9 -2
- maxframe/dataframe/extensions/tests/test_extensions.py +54 -0
- maxframe/dataframe/extensions/tests/test_map_reduce.py +135 -0
- maxframe/dataframe/groupby/__init__.py +12 -1
- maxframe/dataframe/groupby/aggregation.py +78 -45
- maxframe/dataframe/groupby/apply.py +1 -1
- maxframe/dataframe/groupby/apply_chunk.py +18 -2
- maxframe/dataframe/groupby/core.py +96 -12
- maxframe/dataframe/groupby/cum.py +4 -25
- maxframe/dataframe/groupby/expanding.py +264 -0
- maxframe/dataframe/groupby/fill.py +1 -1
- maxframe/dataframe/groupby/getitem.py +12 -5
- maxframe/dataframe/groupby/head.py +11 -1
- maxframe/dataframe/groupby/rank.py +136 -0
- maxframe/dataframe/groupby/rolling.py +206 -0
- maxframe/dataframe/groupby/shift.py +114 -0
- maxframe/dataframe/groupby/tests/test_groupby.py +0 -5
- maxframe/dataframe/indexing/__init__.py +20 -1
- maxframe/dataframe/indexing/droplevel.py +195 -0
- maxframe/dataframe/indexing/filter.py +169 -0
- maxframe/dataframe/indexing/get_level_values.py +76 -0
- maxframe/dataframe/indexing/iat.py +45 -0
- maxframe/dataframe/indexing/iloc.py +152 -12
- maxframe/dataframe/indexing/insert.py +1 -1
- maxframe/dataframe/indexing/loc.py +287 -7
- maxframe/dataframe/indexing/reindex.py +14 -5
- maxframe/dataframe/indexing/rename.py +6 -0
- maxframe/dataframe/indexing/rename_axis.py +2 -2
- maxframe/dataframe/indexing/reorder_levels.py +143 -0
- maxframe/dataframe/indexing/reset_index.py +33 -6
- maxframe/dataframe/indexing/sample.py +8 -0
- maxframe/dataframe/indexing/setitem.py +3 -3
- maxframe/dataframe/indexing/swaplevel.py +185 -0
- maxframe/dataframe/indexing/take.py +99 -0
- maxframe/dataframe/indexing/truncate.py +140 -0
- maxframe/dataframe/indexing/where.py +0 -11
- maxframe/dataframe/indexing/xs.py +148 -0
- maxframe/dataframe/merge/__init__.py +12 -1
- maxframe/dataframe/merge/append.py +97 -98
- maxframe/dataframe/merge/combine_first.py +120 -0
- maxframe/dataframe/merge/compare.py +387 -0
- maxframe/dataframe/merge/concat.py +183 -0
- maxframe/dataframe/merge/update.py +271 -0
- maxframe/dataframe/misc/__init__.py +16 -10
- maxframe/dataframe/misc/_duplicate.py +10 -4
- maxframe/dataframe/misc/apply.py +1 -1
- maxframe/dataframe/misc/check_unique.py +51 -0
- maxframe/dataframe/misc/clip.py +145 -0
- maxframe/dataframe/misc/describe.py +175 -9
- maxframe/dataframe/misc/drop_duplicates.py +2 -2
- maxframe/dataframe/misc/duplicated.py +2 -2
- maxframe/dataframe/misc/get_dummies.py +5 -1
- maxframe/dataframe/misc/isin.py +2 -2
- maxframe/dataframe/misc/map.py +94 -0
- maxframe/dataframe/misc/tests/test_misc.py +13 -2
- maxframe/dataframe/misc/to_numeric.py +3 -0
- maxframe/dataframe/misc/transform.py +12 -5
- maxframe/dataframe/misc/transpose.py +13 -1
- maxframe/dataframe/misc/valid_index.py +115 -0
- maxframe/dataframe/misc/value_counts.py +38 -4
- maxframe/dataframe/missing/checkna.py +13 -6
- maxframe/dataframe/missing/dropna.py +5 -0
- maxframe/dataframe/missing/fillna.py +1 -1
- maxframe/dataframe/missing/replace.py +7 -4
- maxframe/dataframe/reduction/__init__.py +29 -15
- maxframe/dataframe/reduction/aggregation.py +38 -9
- maxframe/dataframe/reduction/all.py +2 -2
- maxframe/dataframe/reduction/any.py +2 -2
- maxframe/dataframe/reduction/argmax.py +100 -0
- maxframe/dataframe/reduction/argmin.py +100 -0
- maxframe/dataframe/reduction/core.py +65 -18
- maxframe/dataframe/reduction/count.py +13 -9
- maxframe/dataframe/reduction/cov.py +166 -0
- maxframe/dataframe/reduction/cummax.py +2 -2
- maxframe/dataframe/reduction/cummin.py +2 -2
- maxframe/dataframe/reduction/cumprod.py +2 -2
- maxframe/dataframe/reduction/cumsum.py +2 -2
- maxframe/dataframe/reduction/custom_reduction.py +2 -2
- maxframe/dataframe/reduction/idxmax.py +185 -0
- maxframe/dataframe/reduction/idxmin.py +185 -0
- maxframe/dataframe/reduction/kurtosis.py +37 -30
- maxframe/dataframe/reduction/max.py +2 -2
- maxframe/dataframe/reduction/mean.py +9 -7
- maxframe/dataframe/reduction/median.py +2 -2
- maxframe/dataframe/reduction/min.py +2 -2
- maxframe/dataframe/reduction/nunique.py +9 -8
- maxframe/dataframe/reduction/prod.py +18 -13
- maxframe/dataframe/reduction/reduction_size.py +2 -2
- maxframe/dataframe/reduction/sem.py +13 -9
- maxframe/dataframe/reduction/skew.py +31 -27
- maxframe/dataframe/reduction/str_concat.py +10 -7
- maxframe/dataframe/reduction/sum.py +18 -14
- maxframe/dataframe/reduction/unique.py +20 -3
- maxframe/dataframe/reduction/var.py +16 -12
- maxframe/dataframe/reshape/__init__.py +38 -0
- maxframe/dataframe/{misc → reshape}/pivot.py +1 -0
- maxframe/dataframe/{misc → reshape}/pivot_table.py +1 -0
- maxframe/dataframe/reshape/unstack.py +114 -0
- maxframe/dataframe/sort/__init__.py +8 -0
- maxframe/dataframe/sort/argsort.py +62 -0
- maxframe/dataframe/sort/core.py +1 -0
- maxframe/dataframe/sort/nlargest.py +238 -0
- maxframe/dataframe/sort/nsmallest.py +228 -0
- maxframe/dataframe/statistics/__init__.py +3 -3
- maxframe/dataframe/statistics/corr.py +1 -0
- maxframe/dataframe/statistics/quantile.py +2 -2
- maxframe/dataframe/tests/test_typing.py +104 -0
- maxframe/dataframe/tests/test_utils.py +66 -2
- maxframe/dataframe/typing_.py +185 -0
- maxframe/dataframe/utils.py +95 -26
- maxframe/dataframe/window/aggregation.py +8 -4
- maxframe/dataframe/window/core.py +14 -1
- maxframe/dataframe/window/ewm.py +1 -3
- maxframe/dataframe/window/expanding.py +37 -35
- maxframe/dataframe/window/rolling.py +49 -39
- maxframe/dataframe/window/tests/test_expanding.py +1 -7
- maxframe/dataframe/window/tests/test_rolling.py +1 -1
- maxframe/env.py +7 -4
- maxframe/errors.py +2 -2
- maxframe/io/odpsio/schema.py +9 -3
- maxframe/io/odpsio/tableio.py +7 -2
- maxframe/io/odpsio/tests/test_schema.py +198 -83
- maxframe/learn/__init__.py +10 -2
- maxframe/learn/cluster/__init__.py +15 -0
- maxframe/learn/cluster/_kmeans.py +782 -0
- maxframe/learn/contrib/llm/core.py +2 -0
- maxframe/learn/contrib/xgboost/core.py +86 -1
- maxframe/learn/contrib/xgboost/train.py +5 -2
- maxframe/learn/core.py +66 -0
- maxframe/learn/linear_model/_base.py +58 -1
- maxframe/learn/linear_model/_lin_reg.py +1 -1
- maxframe/learn/metrics/__init__.py +6 -0
- maxframe/learn/metrics/_classification.py +145 -0
- maxframe/learn/metrics/_ranking.py +477 -0
- maxframe/learn/metrics/_scorer.py +60 -0
- maxframe/learn/metrics/pairwise/__init__.py +21 -0
- maxframe/learn/metrics/pairwise/core.py +77 -0
- maxframe/learn/metrics/pairwise/cosine.py +115 -0
- maxframe/learn/metrics/pairwise/euclidean.py +176 -0
- maxframe/learn/metrics/pairwise/haversine.py +96 -0
- maxframe/learn/metrics/pairwise/manhattan.py +80 -0
- maxframe/learn/metrics/pairwise/pairwise.py +127 -0
- maxframe/learn/metrics/pairwise/pairwise_distances_topk.py +121 -0
- maxframe/learn/metrics/pairwise/rbf_kernel.py +51 -0
- maxframe/learn/metrics/tests/__init__.py +13 -0
- maxframe/learn/metrics/tests/test_scorer.py +26 -0
- maxframe/learn/utils/__init__.py +1 -1
- maxframe/learn/utils/checks.py +1 -2
- maxframe/learn/utils/core.py +59 -0
- maxframe/learn/utils/extmath.py +37 -0
- maxframe/learn/utils/odpsio.py +193 -0
- maxframe/learn/utils/validation.py +2 -2
- maxframe/lib/compat.py +40 -0
- maxframe/lib/dtypes_extension/__init__.py +16 -1
- maxframe/lib/dtypes_extension/_fake_arrow_dtype.py +604 -0
- maxframe/lib/dtypes_extension/blob.py +304 -0
- maxframe/lib/dtypes_extension/dtypes.py +40 -0
- maxframe/lib/dtypes_extension/tests/test_blob.py +88 -0
- maxframe/lib/dtypes_extension/tests/test_dtypes.py +16 -1
- maxframe/lib/dtypes_extension/tests/test_fake_arrow_dtype.py +75 -0
- maxframe/lib/filesystem/_oss_lib/common.py +122 -50
- maxframe/lib/filesystem/_oss_lib/glob.py +1 -1
- maxframe/lib/filesystem/_oss_lib/handle.py +21 -25
- maxframe/lib/filesystem/base.py +1 -1
- maxframe/lib/filesystem/core.py +1 -1
- maxframe/lib/filesystem/oss.py +115 -46
- maxframe/lib/filesystem/tests/test_oss.py +74 -36
- maxframe/lib/mmh3.cp37-win_amd64.pyd +0 -0
- maxframe/lib/wrapped_pickle.py +10 -0
- maxframe/opcodes.py +33 -15
- maxframe/protocol.py +12 -0
- maxframe/serialization/__init__.py +11 -2
- maxframe/serialization/arrow.py +38 -13
- maxframe/serialization/blob.py +32 -0
- maxframe/serialization/core.cp37-win_amd64.pyd +0 -0
- maxframe/serialization/core.pyx +39 -1
- maxframe/serialization/exception.py +2 -4
- maxframe/serialization/numpy.py +11 -0
- maxframe/serialization/pandas.py +46 -9
- maxframe/serialization/serializables/core.py +2 -2
- maxframe/serialization/tests/test_serial.py +29 -2
- maxframe/tensor/__init__.py +38 -8
- maxframe/tensor/arithmetic/__init__.py +19 -10
- maxframe/tensor/arithmetic/iscomplexobj.py +53 -0
- maxframe/tensor/arithmetic/tests/test_arithmetic.py +6 -0
- maxframe/tensor/core.py +3 -2
- maxframe/tensor/datasource/tests/test_datasource.py +2 -1
- maxframe/tensor/extensions/__init__.py +2 -0
- maxframe/tensor/extensions/apply_chunk.py +3 -3
- maxframe/tensor/extensions/rebalance.py +65 -0
- maxframe/tensor/fft/__init__.py +32 -0
- maxframe/tensor/fft/core.py +168 -0
- maxframe/tensor/fft/fft.py +112 -0
- maxframe/tensor/fft/fft2.py +118 -0
- maxframe/tensor/fft/fftfreq.py +80 -0
- maxframe/tensor/fft/fftn.py +123 -0
- maxframe/tensor/fft/fftshift.py +79 -0
- maxframe/tensor/fft/hfft.py +112 -0
- maxframe/tensor/fft/ifft.py +114 -0
- maxframe/tensor/fft/ifft2.py +115 -0
- maxframe/tensor/fft/ifftn.py +123 -0
- maxframe/tensor/fft/ifftshift.py +73 -0
- maxframe/tensor/fft/ihfft.py +93 -0
- maxframe/tensor/fft/irfft.py +118 -0
- maxframe/tensor/fft/irfft2.py +62 -0
- maxframe/tensor/fft/irfftn.py +114 -0
- maxframe/tensor/fft/rfft.py +116 -0
- maxframe/tensor/fft/rfft2.py +63 -0
- maxframe/tensor/fft/rfftfreq.py +87 -0
- maxframe/tensor/fft/rfftn.py +113 -0
- maxframe/tensor/indexing/fill_diagonal.py +1 -7
- maxframe/tensor/linalg/__init__.py +7 -0
- maxframe/tensor/linalg/_einsumfunc.py +1025 -0
- maxframe/tensor/linalg/cholesky.py +117 -0
- maxframe/tensor/linalg/einsum.py +339 -0
- maxframe/tensor/linalg/lstsq.py +100 -0
- maxframe/tensor/linalg/matrix_norm.py +75 -0
- maxframe/tensor/linalg/norm.py +249 -0
- maxframe/tensor/linalg/solve.py +72 -0
- maxframe/tensor/linalg/solve_triangular.py +2 -2
- maxframe/tensor/linalg/vector_norm.py +113 -0
- maxframe/tensor/misc/__init__.py +24 -1
- maxframe/tensor/misc/argwhere.py +72 -0
- maxframe/tensor/misc/array_split.py +46 -0
- maxframe/tensor/misc/broadcast_arrays.py +57 -0
- maxframe/tensor/misc/copyto.py +130 -0
- maxframe/tensor/misc/delete.py +104 -0
- maxframe/tensor/misc/dsplit.py +68 -0
- maxframe/tensor/misc/ediff1d.py +74 -0
- maxframe/tensor/misc/expand_dims.py +85 -0
- maxframe/tensor/misc/flip.py +90 -0
- maxframe/tensor/misc/fliplr.py +64 -0
- maxframe/tensor/misc/flipud.py +68 -0
- maxframe/tensor/misc/hsplit.py +85 -0
- maxframe/tensor/misc/insert.py +139 -0
- maxframe/tensor/misc/moveaxis.py +83 -0
- maxframe/tensor/misc/result_type.py +88 -0
- maxframe/tensor/misc/roll.py +124 -0
- maxframe/tensor/misc/rollaxis.py +77 -0
- maxframe/tensor/misc/shape.py +89 -0
- maxframe/tensor/misc/split.py +190 -0
- maxframe/tensor/misc/tile.py +109 -0
- maxframe/tensor/misc/vsplit.py +74 -0
- maxframe/tensor/reduction/array_equal.py +2 -1
- maxframe/tensor/sort/__init__.py +2 -0
- maxframe/tensor/sort/argpartition.py +98 -0
- maxframe/tensor/sort/partition.py +228 -0
- maxframe/tensor/spatial/__init__.py +15 -0
- maxframe/tensor/spatial/distance/__init__.py +17 -0
- maxframe/tensor/spatial/distance/cdist.py +421 -0
- maxframe/tensor/spatial/distance/pdist.py +398 -0
- maxframe/tensor/spatial/distance/squareform.py +153 -0
- maxframe/tensor/special/__init__.py +159 -21
- maxframe/tensor/special/airy.py +55 -0
- maxframe/tensor/special/bessel.py +199 -0
- maxframe/tensor/special/core.py +65 -4
- maxframe/tensor/special/ellip_func_integrals.py +155 -0
- maxframe/tensor/special/ellip_harm.py +55 -0
- maxframe/tensor/special/err_fresnel.py +223 -0
- maxframe/tensor/special/gamma_funcs.py +303 -0
- maxframe/tensor/special/hypergeometric_funcs.py +69 -0
- maxframe/tensor/special/info_theory.py +189 -0
- maxframe/tensor/special/misc.py +21 -0
- maxframe/tensor/statistics/__init__.py +6 -0
- maxframe/tensor/statistics/corrcoef.py +77 -0
- maxframe/tensor/statistics/cov.py +222 -0
- maxframe/tensor/statistics/digitize.py +126 -0
- maxframe/tensor/statistics/histogram.py +520 -0
- maxframe/tensor/statistics/median.py +85 -0
- maxframe/tensor/statistics/ptp.py +89 -0
- maxframe/tensor/utils.py +3 -3
- maxframe/tests/test_utils.py +43 -1
- maxframe/tests/utils.py +0 -2
- maxframe/typing_.py +2 -0
- maxframe/udf.py +27 -2
- maxframe/utils.py +193 -19
- {maxframe-2.0.0b2.dist-info → maxframe-2.2.0.dist-info}/METADATA +3 -2
- {maxframe-2.0.0b2.dist-info → maxframe-2.2.0.dist-info}/RECORD +391 -236
- maxframe_client/fetcher.py +35 -4
- maxframe_client/session/odps.py +7 -2
- maxframe_client/tests/test_fetcher.py +76 -3
- maxframe_client/tests/test_session.py +4 -1
- /maxframe/dataframe/{misc → reshape}/melt.py +0 -0
- /maxframe/dataframe/{misc → reshape}/stack.py +0 -0
- {maxframe-2.0.0b2.dist-info → maxframe-2.2.0.dist-info}/WHEEL +0 -0
- {maxframe-2.0.0b2.dist-info → maxframe-2.2.0.dist-info}/top_level.txt +0 -0
|
@@ -12,15 +12,18 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
-
import
|
|
16
|
-
import json
|
|
15
|
+
import logging
|
|
17
16
|
import os
|
|
17
|
+
from typing import NamedTuple, Optional
|
|
18
|
+
from urllib.parse import parse_qs, urlparse
|
|
18
19
|
|
|
19
20
|
from ....utils import lazy_import
|
|
20
21
|
from ..base import path_type, stringify_path
|
|
21
22
|
|
|
22
23
|
oss2 = lazy_import("oss2", placeholder=True)
|
|
23
24
|
|
|
25
|
+
logger = logging.getLogger(__name__)
|
|
26
|
+
|
|
24
27
|
# OSS api time out
|
|
25
28
|
_oss_time_out = 10
|
|
26
29
|
|
|
@@ -63,43 +66,81 @@ class OSSFileEntry:
|
|
|
63
66
|
return self._path
|
|
64
67
|
|
|
65
68
|
|
|
66
|
-
|
|
69
|
+
class ParsedOSSPath(NamedTuple):
|
|
70
|
+
endpoint: str
|
|
71
|
+
bucket: str
|
|
72
|
+
key: str
|
|
73
|
+
access_key_id: Optional[str] = None
|
|
74
|
+
access_key_secret: Optional[str] = None
|
|
75
|
+
security_token: Optional[str] = None
|
|
76
|
+
scheme: str = None
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def parse_osspath(path: path_type, check_errors: bool = True) -> ParsedOSSPath:
|
|
67
80
|
# Extract OSS configuration from the encoded URL.
|
|
68
81
|
str_path = stringify_path(path)
|
|
69
|
-
parse_result =
|
|
70
|
-
if parse_result.scheme != "oss":
|
|
82
|
+
parse_result = urlparse(str_path)
|
|
83
|
+
if check_errors and parse_result.scheme != "oss":
|
|
71
84
|
raise ValueError(
|
|
72
85
|
f"Except scheme oss, but got scheme: {parse_result.scheme}"
|
|
73
86
|
f" in path: {str_path}"
|
|
74
87
|
)
|
|
75
|
-
|
|
76
|
-
if not (parse_result.username and parse_result.password):
|
|
77
|
-
raise RuntimeError(r"Please use build_oss_path to add OSS info")
|
|
78
|
-
param_dict = url_to_dict(parse_result.username)
|
|
79
|
-
access_key_id = param_dict["access_key_id"]
|
|
88
|
+
access_key_id = parse_result.username
|
|
80
89
|
access_key_secret = parse_result.password
|
|
81
|
-
|
|
90
|
+
|
|
91
|
+
if not parse_result.query:
|
|
92
|
+
sts_token = None
|
|
93
|
+
else:
|
|
94
|
+
sts_token = parse_qs(parse_result.query).get("security_token", [None])[0]
|
|
95
|
+
|
|
96
|
+
if check_errors and not (access_key_id and access_key_secret):
|
|
97
|
+
raise ValueError(r"No credentials provided")
|
|
98
|
+
|
|
82
99
|
key = parse_result.path
|
|
83
100
|
key = key[1:] if key.startswith("/") else key
|
|
84
|
-
|
|
101
|
+
if "/" not in key:
|
|
102
|
+
bucket, key = key, None
|
|
103
|
+
if check_errors:
|
|
104
|
+
raise ValueError("Need to use format bucket/key to separate bucket and key")
|
|
105
|
+
else:
|
|
106
|
+
bucket, key = key.split("/", 1)
|
|
107
|
+
|
|
108
|
+
endpoint = parse_result.hostname
|
|
109
|
+
if endpoint and parse_result.port:
|
|
110
|
+
endpoint += f":{parse_result.port}"
|
|
111
|
+
return ParsedOSSPath(
|
|
112
|
+
endpoint,
|
|
113
|
+
bucket,
|
|
114
|
+
key,
|
|
115
|
+
access_key_id,
|
|
116
|
+
access_key_secret,
|
|
117
|
+
sts_token,
|
|
118
|
+
parse_result.scheme,
|
|
119
|
+
)
|
|
85
120
|
|
|
86
121
|
|
|
87
|
-
def
|
|
122
|
+
def get_oss_bucket(parsed_path: ParsedOSSPath):
|
|
123
|
+
if parsed_path.security_token is not None:
|
|
124
|
+
auth = oss2.StsAuth(
|
|
125
|
+
parsed_path.access_key_id,
|
|
126
|
+
parsed_path.access_key_secret,
|
|
127
|
+
parsed_path.security_token,
|
|
128
|
+
)
|
|
129
|
+
else:
|
|
130
|
+
auth = oss2.Auth(parsed_path.access_key_id, parsed_path.access_key_secret)
|
|
88
131
|
oss_bucket = oss2.Bucket(
|
|
89
|
-
auth=
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
endpoint=end_point,
|
|
93
|
-
bucket_name=bucket,
|
|
132
|
+
auth=auth,
|
|
133
|
+
endpoint=parsed_path.endpoint,
|
|
134
|
+
bucket_name=parsed_path.bucket,
|
|
94
135
|
connect_timeout=_oss_time_out,
|
|
95
136
|
)
|
|
96
137
|
return oss_bucket
|
|
97
138
|
|
|
98
139
|
|
|
99
140
|
def oss_exists(path: path_type):
|
|
100
|
-
|
|
101
|
-
oss_bucket =
|
|
102
|
-
return oss_bucket.object_exists(key) or oss_isdir(path)
|
|
141
|
+
parsed_path = parse_osspath(path)
|
|
142
|
+
oss_bucket = get_oss_bucket(parsed_path)
|
|
143
|
+
return oss_bucket.object_exists(parsed_path.key) or oss_isdir(path)
|
|
103
144
|
|
|
104
145
|
|
|
105
146
|
def oss_isdir(path: path_type):
|
|
@@ -112,26 +153,71 @@ def oss_isdir(path: path_type):
|
|
|
112
153
|
dirname = stringify_path(path)
|
|
113
154
|
if not dirname.endswith("/"):
|
|
114
155
|
dirname = dirname + "/"
|
|
115
|
-
|
|
116
|
-
|
|
156
|
+
logger.info("Checking isdir for path %s", dirname)
|
|
157
|
+
parsed_path = parse_osspath(dirname)
|
|
158
|
+
oss_bucket = get_oss_bucket(parsed_path)
|
|
117
159
|
isdir = False
|
|
118
|
-
for obj in oss2.ObjectIteratorV2(oss_bucket, prefix=key, max_keys=2):
|
|
119
|
-
if obj.key == key:
|
|
160
|
+
for obj in oss2.ObjectIteratorV2(oss_bucket, prefix=parsed_path.key, max_keys=2):
|
|
161
|
+
if obj.key == parsed_path.key:
|
|
120
162
|
continue
|
|
121
163
|
isdir = True
|
|
122
164
|
break
|
|
123
165
|
return isdir
|
|
124
166
|
|
|
125
167
|
|
|
168
|
+
def oss_delete(path: path_type):
|
|
169
|
+
"""
|
|
170
|
+
Perform both key deletion and prefix deletion. Once no files
|
|
171
|
+
deleted in both scenarios, we can make assertion that the file
|
|
172
|
+
does not exist.
|
|
173
|
+
"""
|
|
174
|
+
parsed_path = parse_osspath(path)
|
|
175
|
+
oss_bucket = get_oss_bucket(parsed_path)
|
|
176
|
+
|
|
177
|
+
try:
|
|
178
|
+
oss_bucket.delete_object(parsed_path.key)
|
|
179
|
+
return
|
|
180
|
+
except oss2.exceptions.NoSuchKey:
|
|
181
|
+
pass
|
|
182
|
+
|
|
183
|
+
is_missing = True
|
|
184
|
+
dir_key = parsed_path.key.rstrip("/") + "/"
|
|
185
|
+
for obj in oss2.ObjectIteratorV2(oss_bucket, prefix=dir_key):
|
|
186
|
+
try:
|
|
187
|
+
oss_bucket.delete_object(obj.key)
|
|
188
|
+
is_missing = False
|
|
189
|
+
except oss2.exceptions.NoSuchKey:
|
|
190
|
+
pass
|
|
191
|
+
if is_missing:
|
|
192
|
+
raise FileNotFoundError("No such file or directory: %s", path)
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
def oss_copy_file(src_path: path_type, dest_path: path_type):
|
|
196
|
+
# todo implements copy of huge files
|
|
197
|
+
parsed_src_path = parse_osspath(src_path)
|
|
198
|
+
parsed_dest_path = parse_osspath(dest_path)
|
|
199
|
+
try:
|
|
200
|
+
if oss_isdir(src_path):
|
|
201
|
+
raise NotImplementedError("Copying directories not implemented yet")
|
|
202
|
+
except:
|
|
203
|
+
# fixme currently we cannot handle error with iterating files with STS token
|
|
204
|
+
logger.exception("Failed to judge if src is a directory")
|
|
205
|
+
|
|
206
|
+
oss_bucket = get_oss_bucket(parsed_dest_path)
|
|
207
|
+
oss_bucket.copy_object(
|
|
208
|
+
parsed_src_path.bucket, parsed_src_path.key, parsed_dest_path.key
|
|
209
|
+
)
|
|
210
|
+
|
|
211
|
+
|
|
126
212
|
def oss_stat(path: path_type):
|
|
127
213
|
path = stringify_path(path)
|
|
128
|
-
|
|
129
|
-
oss_bucket =
|
|
214
|
+
parsed_path = parse_osspath(path)
|
|
215
|
+
oss_bucket = get_oss_bucket(parsed_path)
|
|
130
216
|
if oss_isdir(path):
|
|
131
217
|
stat = dict(name=path, size=0, modified_time=-1)
|
|
132
218
|
stat["type"] = "directory"
|
|
133
219
|
else:
|
|
134
|
-
meta = oss_bucket.get_object_meta(key)
|
|
220
|
+
meta = oss_bucket.get_object_meta(parsed_path.key)
|
|
135
221
|
stat = dict(
|
|
136
222
|
name=path,
|
|
137
223
|
size=int(meta.headers["Content-Length"]),
|
|
@@ -145,11 +231,11 @@ def oss_scandir(dirname: path_type):
|
|
|
145
231
|
dirname = stringify_path(dirname)
|
|
146
232
|
if not dirname.endswith("/"):
|
|
147
233
|
dirname = dirname + "/"
|
|
148
|
-
|
|
149
|
-
oss_bucket =
|
|
234
|
+
parsed_path = parse_osspath(dirname)
|
|
235
|
+
oss_bucket = get_oss_bucket(parsed_path)
|
|
150
236
|
dirname_set = set()
|
|
151
|
-
for obj in oss2.ObjectIteratorV2(oss_bucket, prefix=key):
|
|
152
|
-
rel_path = obj.key[len(key) :]
|
|
237
|
+
for obj in oss2.ObjectIteratorV2(oss_bucket, prefix=parsed_path.key):
|
|
238
|
+
rel_path = obj.key[len(parsed_path.key) :]
|
|
153
239
|
try:
|
|
154
240
|
inside_dirname, inside_filename = rel_path.split("/", 1)
|
|
155
241
|
except ValueError:
|
|
@@ -160,11 +246,11 @@ def oss_scandir(dirname: path_type):
|
|
|
160
246
|
continue
|
|
161
247
|
dirname_set.add(inside_dirname)
|
|
162
248
|
yield OSSFileEntry(
|
|
163
|
-
|
|
249
|
+
"/".join([dirname, inside_dirname]),
|
|
164
250
|
is_dir=True,
|
|
165
251
|
is_file=False,
|
|
166
252
|
stat={
|
|
167
|
-
"name":
|
|
253
|
+
"name": "/".join([dirname, inside_dirname]),
|
|
168
254
|
"type": "directory",
|
|
169
255
|
"size": 0,
|
|
170
256
|
"modified_time": -1,
|
|
@@ -172,27 +258,13 @@ def oss_scandir(dirname: path_type):
|
|
|
172
258
|
)
|
|
173
259
|
else:
|
|
174
260
|
yield OSSFileEntry(
|
|
175
|
-
|
|
261
|
+
"/".join([dirname, inside_filename]),
|
|
176
262
|
is_dir=False,
|
|
177
263
|
is_file=True,
|
|
178
264
|
stat={
|
|
179
|
-
"name":
|
|
265
|
+
"name": "/".join([dirname, inside_filename]),
|
|
180
266
|
"type": "file",
|
|
181
267
|
"size": obj.size,
|
|
182
268
|
"modified_time": obj.last_modified,
|
|
183
269
|
},
|
|
184
270
|
)
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
def dict_to_url(param: dict):
|
|
188
|
-
# Encode the dictionary with url-safe-base64.
|
|
189
|
-
str_param = json.dumps(param)
|
|
190
|
-
url_param = base64.urlsafe_b64encode(bytes(str_param, encoding="utf8"))
|
|
191
|
-
return bytes.decode(url_param, encoding="utf8")
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
def url_to_dict(url_param: str):
|
|
195
|
-
# Decode url-safe-base64 encoded string.
|
|
196
|
-
bytes_param = bytes(url_param, encoding="utf8")
|
|
197
|
-
str_param = bytes.decode(base64.urlsafe_b64decode(bytes_param), encoding="utf8")
|
|
198
|
-
return json.loads(str_param)
|
|
@@ -15,7 +15,7 @@
|
|
|
15
15
|
from io import IOBase
|
|
16
16
|
|
|
17
17
|
from ....utils import lazy_import
|
|
18
|
-
from .common import oss_stat, parse_osspath
|
|
18
|
+
from .common import get_oss_bucket, oss_stat, parse_osspath
|
|
19
19
|
|
|
20
20
|
oss2 = lazy_import("oss2", placeholder=True)
|
|
21
21
|
|
|
@@ -23,20 +23,20 @@ oss2 = lazy_import("oss2", placeholder=True)
|
|
|
23
23
|
class OSSIOBase(IOBase):
|
|
24
24
|
def __init__(self, path, mode):
|
|
25
25
|
self._path = path
|
|
26
|
-
(
|
|
27
|
-
|
|
28
|
-
self._key_name,
|
|
29
|
-
self._access_key_id,
|
|
30
|
-
self._access_key_secret,
|
|
31
|
-
self._end_point,
|
|
32
|
-
) = parse_osspath(self._path)
|
|
33
|
-
self._bucket = self._get_bucket()
|
|
26
|
+
self._parsed_path = parse_osspath(self._path)
|
|
27
|
+
self._bucket = get_oss_bucket(self._parsed_path)
|
|
34
28
|
self._current_pos = 0
|
|
35
29
|
self._size = None
|
|
36
30
|
self._buffer = b""
|
|
37
31
|
self._buffer_size = 1 * 1024
|
|
38
32
|
self._mode = mode
|
|
39
33
|
|
|
34
|
+
if mode and mode.startswith("w"):
|
|
35
|
+
try:
|
|
36
|
+
self._bucket.delete_object(self._parsed_path.key)
|
|
37
|
+
except oss2.exceptions.NoSuchKey:
|
|
38
|
+
pass
|
|
39
|
+
|
|
40
40
|
@property
|
|
41
41
|
def mode(self):
|
|
42
42
|
return self._mode
|
|
@@ -44,16 +44,6 @@ class OSSIOBase(IOBase):
|
|
|
44
44
|
def fileno(self) -> int:
|
|
45
45
|
raise AttributeError
|
|
46
46
|
|
|
47
|
-
def _get_bucket(self):
|
|
48
|
-
return oss2.Bucket(
|
|
49
|
-
auth=oss2.Auth(
|
|
50
|
-
access_key_id=self._access_key_id,
|
|
51
|
-
access_key_secret=self._access_key_secret,
|
|
52
|
-
),
|
|
53
|
-
endpoint=self._end_point,
|
|
54
|
-
bucket_name=self._bucket_name,
|
|
55
|
-
)
|
|
56
|
-
|
|
57
47
|
def _get_size(self):
|
|
58
48
|
if self._size is None:
|
|
59
49
|
self._size = int(oss_stat(self._path)["size"])
|
|
@@ -79,7 +69,7 @@ class OSSIOBase(IOBase):
|
|
|
79
69
|
return self._current_pos
|
|
80
70
|
|
|
81
71
|
def seekable(self):
|
|
82
|
-
return
|
|
72
|
+
return "r" in self._mode
|
|
83
73
|
|
|
84
74
|
def read(self, size=-1):
|
|
85
75
|
"""
|
|
@@ -97,12 +87,12 @@ class OSSIOBase(IOBase):
|
|
|
97
87
|
return b""
|
|
98
88
|
elif size < 0:
|
|
99
89
|
obj = self._bucket.get_object(
|
|
100
|
-
self.
|
|
90
|
+
self._parsed_path.key, byte_range=(self._current_pos, None)
|
|
101
91
|
)
|
|
102
92
|
self._current_pos = self._get_size()
|
|
103
93
|
else:
|
|
104
94
|
obj = self._bucket.get_object(
|
|
105
|
-
self.
|
|
95
|
+
self._parsed_path.key,
|
|
106
96
|
byte_range=(self._current_pos, self._current_pos + size - 1),
|
|
107
97
|
)
|
|
108
98
|
self._current_pos = self._current_pos + size
|
|
@@ -117,7 +107,7 @@ class OSSIOBase(IOBase):
|
|
|
117
107
|
self._get_size() - 1, self._current_pos + self._buffer_size - 1
|
|
118
108
|
)
|
|
119
109
|
buffer = self._bucket.get_object(
|
|
120
|
-
self.
|
|
110
|
+
self._parsed_path.key, byte_range=(self._current_pos, read_to)
|
|
121
111
|
).read()
|
|
122
112
|
if not buffer:
|
|
123
113
|
return 1
|
|
@@ -145,11 +135,17 @@ class OSSIOBase(IOBase):
|
|
|
145
135
|
break
|
|
146
136
|
return bytes(res)
|
|
147
137
|
|
|
138
|
+
def write(self, block):
|
|
139
|
+
append_result = self._bucket.append_object(
|
|
140
|
+
self._parsed_path.key, self._current_pos, block
|
|
141
|
+
)
|
|
142
|
+
self._current_pos = append_result.next_position
|
|
143
|
+
|
|
148
144
|
def readable(self):
|
|
149
|
-
return
|
|
145
|
+
return "r" in self._mode
|
|
150
146
|
|
|
151
147
|
def writable(self):
|
|
152
|
-
return
|
|
148
|
+
return "w" in self._mode or "a" in self._mode
|
|
153
149
|
|
|
154
150
|
def close(self):
|
|
155
151
|
# already closed by oss
|
maxframe/lib/filesystem/base.py
CHANGED
|
@@ -247,7 +247,7 @@ class FileSystem(ABC):
|
|
|
247
247
|
def parse_from_path(uri: str):
|
|
248
248
|
parsed_uri = urlparse(uri)
|
|
249
249
|
options = dict()
|
|
250
|
-
options["host"] = parsed_uri.
|
|
250
|
+
options["host"] = parsed_uri.hostname
|
|
251
251
|
if parsed_uri.port:
|
|
252
252
|
options["port"] = parsed_uri.port
|
|
253
253
|
if parsed_uri.username:
|
maxframe/lib/filesystem/core.py
CHANGED
|
@@ -49,7 +49,7 @@ def get_fs(path: path_type, storage_options: Dict = None) -> FileSystem:
|
|
|
49
49
|
|
|
50
50
|
if scheme in _filesystems:
|
|
51
51
|
file_system_type = _filesystems[scheme]
|
|
52
|
-
if scheme == "file"
|
|
52
|
+
if scheme == "file":
|
|
53
53
|
# local file systems are singletons.
|
|
54
54
|
return file_system_type.get_instance()
|
|
55
55
|
else:
|
maxframe/lib/filesystem/oss.py
CHANGED
|
@@ -12,8 +12,10 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
-
|
|
16
|
-
|
|
15
|
+
import enum
|
|
16
|
+
import re
|
|
17
|
+
from typing import Dict, Iterator, List, Tuple, Union
|
|
18
|
+
from urllib.parse import urlencode
|
|
17
19
|
|
|
18
20
|
from ...utils import implements, lazy_import
|
|
19
21
|
from ._oss_lib import common as oc
|
|
@@ -22,18 +24,43 @@ from ._oss_lib.handle import OSSIOBase
|
|
|
22
24
|
from .base import FileSystem, path_type
|
|
23
25
|
|
|
24
26
|
oss2 = lazy_import("oss2", placeholder=True)
|
|
27
|
+
_ip_regex = re.compile(r"^([0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3})")
|
|
25
28
|
|
|
26
29
|
_oss_time_out = 10
|
|
27
30
|
|
|
28
31
|
|
|
29
|
-
class
|
|
30
|
-
|
|
32
|
+
class HostEnforceType(enum.Enum):
|
|
33
|
+
no_enforce = 0
|
|
34
|
+
force_internal = 1
|
|
35
|
+
force_external = 2
|
|
36
|
+
|
|
31
37
|
|
|
32
|
-
|
|
33
|
-
def
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
38
|
+
class OSSFileSystem(FileSystem):
|
|
39
|
+
def __init__(
|
|
40
|
+
self,
|
|
41
|
+
access_key_id: str = None,
|
|
42
|
+
access_key_secret: str = None,
|
|
43
|
+
security_token: str = None,
|
|
44
|
+
host_enforce_type: Union[HostEnforceType, str] = HostEnforceType.no_enforce,
|
|
45
|
+
**kw,
|
|
46
|
+
):
|
|
47
|
+
self._access_key_id = access_key_id or kw.get("user")
|
|
48
|
+
self._access_key_secret = access_key_secret or kw.get("password")
|
|
49
|
+
self._security_token = security_token
|
|
50
|
+
self._host_enforce_type = (
|
|
51
|
+
host_enforce_type
|
|
52
|
+
if isinstance(host_enforce_type, HostEnforceType)
|
|
53
|
+
else getattr(HostEnforceType, host_enforce_type)
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
def _rewrite_path(self, path: str) -> str:
|
|
57
|
+
return build_oss_path(
|
|
58
|
+
path,
|
|
59
|
+
access_key_id=self._access_key_id,
|
|
60
|
+
access_key_secret=self._access_key_secret,
|
|
61
|
+
security_token=self._security_token,
|
|
62
|
+
host_enforce_type=self._host_enforce_type,
|
|
63
|
+
)
|
|
37
64
|
|
|
38
65
|
@implements(FileSystem.cat)
|
|
39
66
|
def cat(self, path: path_type):
|
|
@@ -46,39 +73,37 @@ class OSSFileSystem(FileSystem):
|
|
|
46
73
|
if not file_entry.is_dir():
|
|
47
74
|
raise OSError("ls for file is not supported")
|
|
48
75
|
else:
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
)
|
|
52
|
-
oss_bucket = oss2.Bucket(
|
|
53
|
-
auth=oss2.Auth(
|
|
54
|
-
access_key_id=access_key_id, access_key_secret=access_key_secret
|
|
55
|
-
),
|
|
56
|
-
endpoint=end_point,
|
|
57
|
-
bucket_name=bucket,
|
|
58
|
-
connect_timeout=_oss_time_out,
|
|
59
|
-
)
|
|
60
|
-
for obj in oss2.ObjectIteratorV2(oss_bucket, prefix=key):
|
|
76
|
+
parsed_path = oc.parse_osspath(path)
|
|
77
|
+
oss_bucket = oc.get_oss_bucket(parsed_path)
|
|
78
|
+
for obj in oss2.ObjectIteratorV2(oss_bucket, prefix=parsed_path.key):
|
|
61
79
|
if obj.key.endswith("/"):
|
|
62
80
|
continue
|
|
63
|
-
obj_path = rf"
|
|
81
|
+
obj_path = rf"{parsed_path.bucket}/{obj.key}"
|
|
64
82
|
file_list.append(
|
|
65
83
|
build_oss_path(
|
|
66
|
-
obj_path,
|
|
84
|
+
obj_path,
|
|
85
|
+
parsed_path.endpoint,
|
|
86
|
+
parsed_path.access_key_id,
|
|
87
|
+
parsed_path.access_key_secret,
|
|
88
|
+
parsed_path.security_token,
|
|
67
89
|
)
|
|
68
90
|
)
|
|
69
91
|
return file_list
|
|
70
92
|
|
|
71
93
|
@implements(FileSystem.delete)
|
|
72
94
|
def delete(self, path: path_type, recursive: bool = False):
|
|
73
|
-
|
|
95
|
+
return oc.oss_delete(self._rewrite_path(path))
|
|
74
96
|
|
|
75
97
|
@implements(FileSystem.rename)
|
|
76
98
|
def rename(self, path: path_type, new_path: path_type):
|
|
77
|
-
|
|
99
|
+
# in OSS, you need to move file by copy and delete
|
|
100
|
+
path = self._rewrite_path(path)
|
|
101
|
+
oc.oss_copy_file(path, self._rewrite_path(new_path))
|
|
102
|
+
oc.oss_delete(path)
|
|
78
103
|
|
|
79
104
|
@implements(FileSystem.stat)
|
|
80
105
|
def stat(self, path: path_type) -> Dict:
|
|
81
|
-
ofe = oc.OSSFileEntry(path)
|
|
106
|
+
ofe = oc.OSSFileEntry(self._rewrite_path(path))
|
|
82
107
|
return ofe.stat()
|
|
83
108
|
|
|
84
109
|
@implements(FileSystem.mkdir)
|
|
@@ -87,12 +112,12 @@ class OSSFileSystem(FileSystem):
|
|
|
87
112
|
|
|
88
113
|
@implements(FileSystem.isdir)
|
|
89
114
|
def isdir(self, path: path_type) -> bool:
|
|
90
|
-
file_entry = oc.OSSFileEntry(path)
|
|
115
|
+
file_entry = oc.OSSFileEntry(self._rewrite_path(path))
|
|
91
116
|
return file_entry.is_dir()
|
|
92
117
|
|
|
93
118
|
@implements(FileSystem.isfile)
|
|
94
119
|
def isfile(self, path: path_type) -> bool:
|
|
95
|
-
file_entry = oc.OSSFileEntry(path)
|
|
120
|
+
file_entry = oc.OSSFileEntry(self._rewrite_path(path))
|
|
96
121
|
return file_entry.is_file()
|
|
97
122
|
|
|
98
123
|
@implements(FileSystem._isfilestore)
|
|
@@ -101,11 +126,11 @@ class OSSFileSystem(FileSystem):
|
|
|
101
126
|
|
|
102
127
|
@implements(FileSystem.exists)
|
|
103
128
|
def exists(self, path: path_type):
|
|
104
|
-
return oc.oss_exists(path)
|
|
129
|
+
return oc.oss_exists(self._rewrite_path(path))
|
|
105
130
|
|
|
106
131
|
@implements(FileSystem.open)
|
|
107
132
|
def open(self, path: path_type, mode: str = "rb") -> OSSIOBase:
|
|
108
|
-
file_handle = OSSIOBase(path, mode)
|
|
133
|
+
file_handle = OSSIOBase(self._rewrite_path(path), mode)
|
|
109
134
|
return file_handle
|
|
110
135
|
|
|
111
136
|
@implements(FileSystem.walk)
|
|
@@ -114,10 +139,37 @@ class OSSFileSystem(FileSystem):
|
|
|
114
139
|
|
|
115
140
|
@implements(FileSystem.glob)
|
|
116
141
|
def glob(self, path: path_type, recursive: bool = False) -> List[path_type]:
|
|
117
|
-
return glob(path, recursive=recursive)
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
def
|
|
142
|
+
return glob(self._rewrite_path(path), recursive=recursive)
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def _rewrite_internal_endpoint(
|
|
146
|
+
endpoint: str, host_enforce_type: HostEnforceType = HostEnforceType.no_enforce
|
|
147
|
+
) -> str:
|
|
148
|
+
if (
|
|
149
|
+
not endpoint
|
|
150
|
+
or host_enforce_type == HostEnforceType.no_enforce
|
|
151
|
+
or _ip_regex.match(endpoint)
|
|
152
|
+
):
|
|
153
|
+
return endpoint
|
|
154
|
+
|
|
155
|
+
ep_first, ep_rest = endpoint.split(".", 1)
|
|
156
|
+
host_with_internal = ep_first.endswith("-internal")
|
|
157
|
+
if host_enforce_type == HostEnforceType.force_external and host_with_internal:
|
|
158
|
+
return ep_first.replace("-internal", "") + "." + ep_rest
|
|
159
|
+
elif host_enforce_type == HostEnforceType.force_internal and not host_with_internal:
|
|
160
|
+
return ep_first + "-internal." + ep_rest
|
|
161
|
+
else:
|
|
162
|
+
return endpoint
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
def build_oss_path(
|
|
166
|
+
path: path_type,
|
|
167
|
+
endpoint: str = None,
|
|
168
|
+
access_key_id: str = None,
|
|
169
|
+
access_key_secret: str = None,
|
|
170
|
+
security_token: str = None,
|
|
171
|
+
host_enforce_type: HostEnforceType = HostEnforceType.no_enforce,
|
|
172
|
+
):
|
|
121
173
|
"""
|
|
122
174
|
Returns a path with oss info.
|
|
123
175
|
Used to register the access_key_id, access_key_secret and
|
|
@@ -127,16 +179,19 @@ def build_oss_path(path: path_type, access_key_id, access_key_secret, end_point)
|
|
|
127
179
|
Parameters
|
|
128
180
|
----------
|
|
129
181
|
path : path_type
|
|
130
|
-
The original
|
|
182
|
+
The original OSS url.
|
|
183
|
+
|
|
184
|
+
endpoint : str
|
|
185
|
+
The endpoint of OSS.
|
|
131
186
|
|
|
132
187
|
access_key_id : str
|
|
133
|
-
The access key id of
|
|
188
|
+
The access key id of OSS.
|
|
134
189
|
|
|
135
190
|
access_key_secret : str
|
|
136
|
-
The access key secret of
|
|
191
|
+
The access key secret of OSS.
|
|
137
192
|
|
|
138
|
-
|
|
139
|
-
The
|
|
193
|
+
security_token : str
|
|
194
|
+
The security token of OSS.
|
|
140
195
|
|
|
141
196
|
Returns
|
|
142
197
|
-------
|
|
@@ -146,12 +201,26 @@ def build_oss_path(path: path_type, access_key_id, access_key_secret, end_point)
|
|
|
146
201
|
"""
|
|
147
202
|
if isinstance(path, (list, tuple)):
|
|
148
203
|
path = path[0]
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
204
|
+
parse_result = oc.parse_osspath(path, check_errors=False)
|
|
205
|
+
access_key_id = parse_result.access_key_id or access_key_id
|
|
206
|
+
access_key_secret = parse_result.access_key_secret or access_key_secret
|
|
207
|
+
security_token = parse_result.security_token or security_token
|
|
208
|
+
|
|
209
|
+
scheme = parse_result.scheme or "oss"
|
|
210
|
+
endpoint = _rewrite_internal_endpoint(
|
|
211
|
+
parse_result.endpoint or endpoint, host_enforce_type
|
|
156
212
|
)
|
|
213
|
+
|
|
214
|
+
if access_key_id and access_key_secret:
|
|
215
|
+
creds = f"{access_key_id}:{access_key_secret}@"
|
|
216
|
+
else:
|
|
217
|
+
creds = ""
|
|
218
|
+
|
|
219
|
+
new_path = f"{scheme}://{creds}{endpoint}/{parse_result.bucket}"
|
|
220
|
+
if parse_result.key:
|
|
221
|
+
new_path += f"/{parse_result.key}"
|
|
222
|
+
if security_token:
|
|
223
|
+
new_path += f"?{urlencode(dict(security_token=security_token))}"
|
|
224
|
+
# reparse to check errors
|
|
225
|
+
oc.parse_osspath(new_path)
|
|
157
226
|
return new_path
|