maxframe 2.0.0b2__cp37-cp37m-win32.whl → 2.3.0rc1__cp37-cp37m-win32.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of maxframe might be problematic. Click here for more details.
- maxframe/__init__.py +1 -0
- maxframe/_utils.cp37-win32.pyd +0 -0
- maxframe/_utils.pyx +14 -1
- maxframe/codegen/core.py +9 -8
- maxframe/codegen/spe/core.py +1 -1
- maxframe/codegen/spe/dataframe/__init__.py +1 -0
- maxframe/codegen/spe/dataframe/accessors/base.py +18 -0
- maxframe/codegen/spe/dataframe/accessors/dict_.py +25 -130
- maxframe/codegen/spe/dataframe/accessors/list_.py +12 -48
- maxframe/codegen/spe/dataframe/accessors/struct_.py +28 -0
- maxframe/codegen/spe/dataframe/arithmetic.py +7 -2
- maxframe/codegen/spe/dataframe/groupby.py +88 -0
- maxframe/codegen/spe/dataframe/indexing.py +99 -4
- maxframe/codegen/spe/dataframe/merge.py +38 -1
- maxframe/codegen/spe/dataframe/misc.py +11 -33
- maxframe/codegen/spe/dataframe/reduction.py +32 -9
- maxframe/codegen/spe/dataframe/reshape.py +46 -0
- maxframe/codegen/spe/dataframe/sort.py +39 -18
- maxframe/codegen/spe/dataframe/tests/accessors/test_dict.py +9 -15
- maxframe/codegen/spe/dataframe/tests/accessors/test_list.py +4 -7
- maxframe/codegen/spe/dataframe/tests/accessors/test_struct.py +75 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_iloc.py +20 -1
- maxframe/codegen/spe/dataframe/tests/indexing/test_loc.py +35 -0
- maxframe/codegen/spe/dataframe/tests/misc/test_misc.py +0 -32
- maxframe/codegen/spe/dataframe/tests/test_groupby.py +81 -18
- maxframe/codegen/spe/dataframe/tests/test_merge.py +27 -1
- maxframe/codegen/spe/dataframe/tests/test_reduction.py +13 -0
- maxframe/codegen/spe/dataframe/tests/test_reshape.py +79 -0
- maxframe/codegen/spe/dataframe/tests/test_sort.py +20 -0
- maxframe/codegen/spe/dataframe/tseries.py +9 -0
- maxframe/codegen/spe/learn/contrib/lightgbm.py +4 -3
- maxframe/codegen/spe/learn/contrib/tests/test_xgboost.py +2 -1
- maxframe/codegen/spe/learn/metrics/__init__.py +1 -1
- maxframe/codegen/spe/learn/metrics/_ranking.py +76 -0
- maxframe/codegen/spe/learn/metrics/pairwise.py +51 -0
- maxframe/codegen/spe/learn/metrics/tests/test_pairwise.py +36 -0
- maxframe/codegen/spe/learn/metrics/tests/test_ranking.py +59 -0
- maxframe/codegen/spe/tensor/__init__.py +3 -0
- maxframe/codegen/spe/tensor/datasource.py +1 -0
- maxframe/codegen/spe/tensor/fft.py +74 -0
- maxframe/codegen/spe/tensor/linalg.py +29 -2
- maxframe/codegen/spe/tensor/misc.py +79 -25
- maxframe/codegen/spe/tensor/spatial.py +45 -0
- maxframe/codegen/spe/tensor/statistics.py +44 -0
- maxframe/codegen/spe/tensor/tests/test_fft.py +64 -0
- maxframe/codegen/spe/tensor/tests/test_linalg.py +15 -1
- maxframe/codegen/spe/tensor/tests/test_misc.py +52 -2
- maxframe/codegen/spe/tensor/tests/test_spatial.py +33 -0
- maxframe/codegen/spe/tensor/tests/test_statistics.py +15 -1
- maxframe/codegen/spe/tests/test_spe_codegen.py +6 -12
- maxframe/codegen/spe/utils.py +2 -0
- maxframe/config/config.py +73 -9
- maxframe/config/tests/test_validators.py +13 -1
- maxframe/config/validators.py +49 -0
- maxframe/conftest.py +54 -17
- maxframe/core/accessor.py +2 -2
- maxframe/core/base.py +2 -1
- maxframe/core/entity/core.py +5 -0
- maxframe/core/entity/tileables.py +3 -1
- maxframe/core/graph/core.cp37-win32.pyd +0 -0
- maxframe/core/graph/entity.py +8 -3
- maxframe/core/mode.py +6 -1
- maxframe/core/operator/base.py +9 -2
- maxframe/core/operator/core.py +10 -2
- maxframe/core/operator/utils.py +13 -0
- maxframe/dataframe/__init__.py +12 -5
- maxframe/dataframe/accessors/__init__.py +1 -1
- maxframe/dataframe/accessors/compat.py +45 -0
- maxframe/dataframe/accessors/datetime_/__init__.py +4 -1
- maxframe/dataframe/accessors/dict_/contains.py +7 -16
- maxframe/dataframe/accessors/dict_/core.py +48 -0
- maxframe/dataframe/accessors/dict_/getitem.py +17 -21
- maxframe/dataframe/accessors/dict_/length.py +7 -16
- maxframe/dataframe/accessors/dict_/remove.py +6 -18
- maxframe/dataframe/accessors/dict_/setitem.py +8 -18
- maxframe/dataframe/accessors/dict_/tests/test_dict_accessor.py +62 -22
- maxframe/dataframe/accessors/list_/__init__.py +2 -2
- maxframe/dataframe/accessors/list_/core.py +48 -0
- maxframe/dataframe/accessors/list_/getitem.py +12 -19
- maxframe/dataframe/accessors/list_/length.py +7 -16
- maxframe/dataframe/accessors/list_/tests/test_list_accessor.py +11 -9
- maxframe/dataframe/accessors/string_/__init__.py +4 -1
- maxframe/dataframe/accessors/struct_/__init__.py +37 -0
- maxframe/dataframe/accessors/struct_/accessor.py +39 -0
- maxframe/dataframe/accessors/struct_/core.py +43 -0
- maxframe/dataframe/accessors/struct_/dtypes.py +53 -0
- maxframe/dataframe/accessors/struct_/field.py +123 -0
- maxframe/dataframe/accessors/struct_/tests/__init__.py +13 -0
- maxframe/dataframe/accessors/struct_/tests/test_struct_accessor.py +91 -0
- maxframe/dataframe/arithmetic/__init__.py +18 -4
- maxframe/dataframe/arithmetic/between.py +106 -0
- maxframe/dataframe/arithmetic/dot.py +237 -0
- maxframe/dataframe/arithmetic/maximum.py +33 -0
- maxframe/dataframe/arithmetic/minimum.py +33 -0
- maxframe/dataframe/arithmetic/{around.py → round.py} +11 -7
- maxframe/dataframe/core.py +161 -224
- maxframe/dataframe/datasource/__init__.py +18 -0
- maxframe/dataframe/datasource/core.py +6 -0
- maxframe/dataframe/datasource/direct.py +57 -0
- maxframe/dataframe/datasource/from_dict.py +124 -0
- maxframe/dataframe/datasource/from_index.py +1 -1
- maxframe/dataframe/datasource/from_records.py +77 -0
- maxframe/dataframe/datasource/from_tensor.py +109 -41
- maxframe/dataframe/datasource/read_csv.py +21 -14
- maxframe/dataframe/datasource/read_odps_query.py +29 -6
- maxframe/dataframe/datasource/read_odps_table.py +32 -10
- maxframe/dataframe/datasource/read_parquet.py +38 -39
- maxframe/dataframe/datasource/tests/test_datasource.py +37 -0
- maxframe/dataframe/datastore/__init__.py +11 -1
- maxframe/dataframe/datastore/direct.py +268 -0
- maxframe/dataframe/datastore/to_csv.py +29 -41
- maxframe/dataframe/datastore/to_odps.py +36 -4
- maxframe/dataframe/extensions/__init__.py +20 -4
- maxframe/dataframe/extensions/apply_chunk.py +32 -6
- maxframe/dataframe/extensions/cartesian_chunk.py +153 -0
- maxframe/dataframe/extensions/collect_kv.py +126 -0
- maxframe/dataframe/extensions/extract_kv.py +177 -0
- maxframe/dataframe/extensions/flatjson.py +2 -1
- maxframe/dataframe/extensions/map_reduce.py +263 -0
- maxframe/dataframe/extensions/rebalance.py +62 -0
- maxframe/dataframe/extensions/tests/test_apply_chunk.py +9 -2
- maxframe/dataframe/extensions/tests/test_extensions.py +54 -0
- maxframe/dataframe/extensions/tests/test_map_reduce.py +135 -0
- maxframe/dataframe/groupby/__init__.py +17 -2
- maxframe/dataframe/groupby/aggregation.py +86 -49
- maxframe/dataframe/groupby/apply.py +1 -1
- maxframe/dataframe/groupby/apply_chunk.py +19 -5
- maxframe/dataframe/groupby/core.py +116 -16
- maxframe/dataframe/groupby/cum.py +4 -25
- maxframe/dataframe/groupby/expanding.py +264 -0
- maxframe/dataframe/groupby/fill.py +1 -1
- maxframe/dataframe/groupby/getitem.py +12 -5
- maxframe/dataframe/groupby/head.py +11 -1
- maxframe/dataframe/groupby/rank.py +136 -0
- maxframe/dataframe/groupby/rolling.py +206 -0
- maxframe/dataframe/groupby/shift.py +114 -0
- maxframe/dataframe/groupby/tests/test_groupby.py +0 -5
- maxframe/dataframe/indexing/__init__.py +22 -2
- maxframe/dataframe/indexing/droplevel.py +195 -0
- maxframe/dataframe/indexing/filter.py +169 -0
- maxframe/dataframe/indexing/get_level_values.py +76 -0
- maxframe/dataframe/indexing/iat.py +45 -0
- maxframe/dataframe/indexing/iloc.py +152 -12
- maxframe/dataframe/indexing/insert.py +46 -18
- maxframe/dataframe/indexing/loc.py +287 -7
- maxframe/dataframe/indexing/reindex.py +14 -5
- maxframe/dataframe/indexing/rename.py +6 -0
- maxframe/dataframe/indexing/rename_axis.py +2 -2
- maxframe/dataframe/indexing/reorder_levels.py +143 -0
- maxframe/dataframe/indexing/reset_index.py +33 -6
- maxframe/dataframe/indexing/sample.py +8 -0
- maxframe/dataframe/indexing/setitem.py +3 -3
- maxframe/dataframe/indexing/swaplevel.py +185 -0
- maxframe/dataframe/indexing/take.py +99 -0
- maxframe/dataframe/indexing/truncate.py +140 -0
- maxframe/dataframe/indexing/where.py +0 -11
- maxframe/dataframe/indexing/xs.py +148 -0
- maxframe/dataframe/merge/__init__.py +15 -1
- maxframe/dataframe/merge/append.py +97 -98
- maxframe/dataframe/merge/combine.py +244 -0
- maxframe/dataframe/merge/combine_first.py +120 -0
- maxframe/dataframe/merge/compare.py +387 -0
- maxframe/dataframe/merge/concat.py +183 -0
- maxframe/dataframe/merge/update.py +271 -0
- maxframe/dataframe/misc/__init__.py +28 -11
- maxframe/dataframe/misc/_duplicate.py +10 -4
- maxframe/dataframe/misc/apply.py +1 -1
- maxframe/dataframe/misc/check_unique.py +82 -0
- maxframe/dataframe/misc/clip.py +145 -0
- maxframe/dataframe/misc/describe.py +175 -9
- maxframe/dataframe/misc/drop.py +31 -0
- maxframe/dataframe/misc/drop_duplicates.py +2 -2
- maxframe/dataframe/misc/duplicated.py +2 -2
- maxframe/dataframe/misc/get_dummies.py +5 -1
- maxframe/dataframe/misc/infer_dtypes.py +251 -0
- maxframe/dataframe/misc/isin.py +2 -2
- maxframe/dataframe/misc/map.py +125 -18
- maxframe/dataframe/misc/repeat.py +159 -0
- maxframe/dataframe/misc/tests/test_misc.py +48 -3
- maxframe/dataframe/misc/to_numeric.py +3 -0
- maxframe/dataframe/misc/transform.py +12 -5
- maxframe/dataframe/misc/transpose.py +13 -1
- maxframe/dataframe/misc/valid_index.py +115 -0
- maxframe/dataframe/misc/value_counts.py +38 -4
- maxframe/dataframe/missing/checkna.py +14 -6
- maxframe/dataframe/missing/dropna.py +5 -0
- maxframe/dataframe/missing/fillna.py +1 -1
- maxframe/dataframe/missing/replace.py +7 -4
- maxframe/dataframe/reduction/__init__.py +35 -16
- maxframe/dataframe/reduction/aggregation.py +43 -14
- maxframe/dataframe/reduction/all.py +2 -2
- maxframe/dataframe/reduction/any.py +2 -2
- maxframe/dataframe/reduction/argmax.py +103 -0
- maxframe/dataframe/reduction/argmin.py +103 -0
- maxframe/dataframe/reduction/core.py +80 -24
- maxframe/dataframe/reduction/count.py +13 -9
- maxframe/dataframe/reduction/cov.py +166 -0
- maxframe/dataframe/reduction/cummax.py +2 -2
- maxframe/dataframe/reduction/cummin.py +2 -2
- maxframe/dataframe/reduction/cumprod.py +2 -2
- maxframe/dataframe/reduction/cumsum.py +2 -2
- maxframe/dataframe/reduction/custom_reduction.py +2 -2
- maxframe/dataframe/reduction/idxmax.py +185 -0
- maxframe/dataframe/reduction/idxmin.py +185 -0
- maxframe/dataframe/reduction/kurtosis.py +37 -30
- maxframe/dataframe/reduction/max.py +2 -2
- maxframe/dataframe/reduction/mean.py +9 -7
- maxframe/dataframe/reduction/median.py +2 -2
- maxframe/dataframe/reduction/min.py +2 -2
- maxframe/dataframe/reduction/mode.py +144 -0
- maxframe/dataframe/reduction/nunique.py +19 -11
- maxframe/dataframe/reduction/prod.py +18 -13
- maxframe/dataframe/reduction/reduction_size.py +2 -2
- maxframe/dataframe/reduction/sem.py +13 -9
- maxframe/dataframe/reduction/skew.py +31 -27
- maxframe/dataframe/reduction/str_concat.py +10 -7
- maxframe/dataframe/reduction/sum.py +18 -14
- maxframe/dataframe/reduction/tests/test_reduction.py +12 -0
- maxframe/dataframe/reduction/unique.py +20 -3
- maxframe/dataframe/reduction/var.py +16 -12
- maxframe/dataframe/reshape/__init__.py +38 -0
- maxframe/dataframe/{misc → reshape}/pivot.py +1 -0
- maxframe/dataframe/{misc → reshape}/pivot_table.py +1 -0
- maxframe/dataframe/reshape/unstack.py +114 -0
- maxframe/dataframe/sort/__init__.py +16 -1
- maxframe/dataframe/sort/argsort.py +68 -0
- maxframe/dataframe/sort/core.py +2 -1
- maxframe/dataframe/sort/nlargest.py +238 -0
- maxframe/dataframe/sort/nsmallest.py +228 -0
- maxframe/dataframe/sort/rank.py +147 -0
- maxframe/dataframe/statistics/__init__.py +3 -3
- maxframe/dataframe/statistics/corr.py +1 -0
- maxframe/dataframe/statistics/quantile.py +2 -2
- maxframe/dataframe/tests/test_typing.py +104 -0
- maxframe/dataframe/tests/test_utils.py +66 -2
- maxframe/dataframe/tseries/__init__.py +19 -0
- maxframe/dataframe/tseries/at_time.py +61 -0
- maxframe/dataframe/tseries/between_time.py +122 -0
- maxframe/dataframe/typing_.py +185 -0
- maxframe/dataframe/utils.py +125 -52
- maxframe/dataframe/window/aggregation.py +8 -4
- maxframe/dataframe/window/core.py +14 -1
- maxframe/dataframe/window/ewm.py +1 -3
- maxframe/dataframe/window/expanding.py +37 -35
- maxframe/dataframe/window/rolling.py +49 -39
- maxframe/dataframe/window/tests/test_expanding.py +1 -7
- maxframe/dataframe/window/tests/test_rolling.py +1 -1
- maxframe/env.py +7 -4
- maxframe/errors.py +2 -2
- maxframe/io/odpsio/schema.py +9 -3
- maxframe/io/odpsio/tableio.py +7 -2
- maxframe/io/odpsio/tests/test_schema.py +198 -83
- maxframe/learn/__init__.py +10 -2
- maxframe/learn/cluster/__init__.py +15 -0
- maxframe/learn/cluster/_kmeans.py +782 -0
- maxframe/learn/contrib/llm/core.py +18 -7
- maxframe/learn/contrib/llm/deploy/__init__.py +13 -0
- maxframe/learn/contrib/llm/deploy/config.py +221 -0
- maxframe/learn/contrib/llm/deploy/core.py +247 -0
- maxframe/learn/contrib/llm/deploy/framework.py +35 -0
- maxframe/learn/contrib/llm/deploy/loader.py +360 -0
- maxframe/learn/contrib/llm/deploy/tests/__init__.py +13 -0
- maxframe/learn/contrib/llm/deploy/tests/test_register_models.py +359 -0
- maxframe/learn/contrib/llm/models/__init__.py +1 -0
- maxframe/learn/contrib/llm/models/dashscope.py +12 -6
- maxframe/learn/contrib/llm/models/managed.py +76 -11
- maxframe/learn/contrib/llm/models/openai.py +72 -0
- maxframe/learn/contrib/llm/tests/__init__.py +13 -0
- maxframe/learn/contrib/llm/tests/test_core.py +34 -0
- maxframe/learn/contrib/llm/tests/test_openai.py +187 -0
- maxframe/learn/contrib/llm/tests/test_text_gen.py +155 -0
- maxframe/learn/contrib/llm/text.py +348 -42
- maxframe/learn/contrib/models.py +4 -1
- maxframe/learn/contrib/xgboost/classifier.py +2 -0
- maxframe/learn/contrib/xgboost/core.py +113 -4
- maxframe/learn/contrib/xgboost/predict.py +4 -2
- maxframe/learn/contrib/xgboost/regressor.py +5 -0
- maxframe/learn/contrib/xgboost/train.py +7 -2
- maxframe/learn/core.py +66 -0
- maxframe/learn/linear_model/_base.py +58 -1
- maxframe/learn/linear_model/_lin_reg.py +1 -1
- maxframe/learn/metrics/__init__.py +6 -0
- maxframe/learn/metrics/_classification.py +145 -0
- maxframe/learn/metrics/_ranking.py +477 -0
- maxframe/learn/metrics/_scorer.py +60 -0
- maxframe/learn/metrics/pairwise/__init__.py +21 -0
- maxframe/learn/metrics/pairwise/core.py +77 -0
- maxframe/learn/metrics/pairwise/cosine.py +115 -0
- maxframe/learn/metrics/pairwise/euclidean.py +176 -0
- maxframe/learn/metrics/pairwise/haversine.py +96 -0
- maxframe/learn/metrics/pairwise/manhattan.py +80 -0
- maxframe/learn/metrics/pairwise/pairwise.py +127 -0
- maxframe/learn/metrics/pairwise/pairwise_distances_topk.py +121 -0
- maxframe/learn/metrics/pairwise/rbf_kernel.py +51 -0
- maxframe/learn/metrics/tests/__init__.py +13 -0
- maxframe/learn/metrics/tests/test_scorer.py +26 -0
- maxframe/learn/preprocessing/_data/min_max_scaler.py +34 -23
- maxframe/learn/preprocessing/_data/standard_scaler.py +34 -25
- maxframe/learn/utils/__init__.py +2 -1
- maxframe/learn/utils/checks.py +1 -2
- maxframe/learn/utils/core.py +59 -0
- maxframe/learn/utils/extmath.py +79 -9
- maxframe/learn/utils/odpsio.py +262 -0
- maxframe/learn/utils/validation.py +2 -2
- maxframe/lib/compat.py +40 -0
- maxframe/lib/dtypes_extension/__init__.py +16 -1
- maxframe/lib/dtypes_extension/_fake_arrow_dtype.py +604 -0
- maxframe/lib/dtypes_extension/blob.py +304 -0
- maxframe/lib/dtypes_extension/dtypes.py +40 -0
- maxframe/lib/dtypes_extension/tests/test_blob.py +88 -0
- maxframe/lib/dtypes_extension/tests/test_dtypes.py +16 -1
- maxframe/lib/dtypes_extension/tests/test_fake_arrow_dtype.py +75 -0
- maxframe/lib/filesystem/_oss_lib/common.py +124 -50
- maxframe/lib/filesystem/_oss_lib/glob.py +1 -1
- maxframe/lib/filesystem/_oss_lib/handle.py +21 -25
- maxframe/lib/filesystem/base.py +1 -1
- maxframe/lib/filesystem/core.py +1 -1
- maxframe/lib/filesystem/oss.py +115 -46
- maxframe/lib/filesystem/tests/test_oss.py +74 -36
- maxframe/lib/mmh3.cp37-win32.pyd +0 -0
- maxframe/lib/wrapped_pickle.py +10 -0
- maxframe/opcodes.py +41 -15
- maxframe/protocol.py +12 -0
- maxframe/remote/core.py +4 -0
- maxframe/serialization/__init__.py +11 -2
- maxframe/serialization/arrow.py +38 -13
- maxframe/serialization/blob.py +32 -0
- maxframe/serialization/core.cp37-win32.pyd +0 -0
- maxframe/serialization/core.pyx +39 -1
- maxframe/serialization/exception.py +2 -4
- maxframe/serialization/numpy.py +11 -0
- maxframe/serialization/pandas.py +46 -9
- maxframe/serialization/serializables/core.py +2 -2
- maxframe/serialization/tests/test_serial.py +31 -4
- maxframe/tensor/__init__.py +38 -8
- maxframe/tensor/arithmetic/__init__.py +19 -10
- maxframe/tensor/arithmetic/core.py +2 -2
- maxframe/tensor/arithmetic/iscomplexobj.py +53 -0
- maxframe/tensor/arithmetic/tests/test_arithmetic.py +6 -9
- maxframe/tensor/core.py +6 -2
- maxframe/tensor/datasource/tests/test_datasource.py +2 -1
- maxframe/tensor/extensions/__init__.py +2 -0
- maxframe/tensor/extensions/apply_chunk.py +3 -3
- maxframe/tensor/extensions/rebalance.py +65 -0
- maxframe/tensor/fft/__init__.py +32 -0
- maxframe/tensor/fft/core.py +168 -0
- maxframe/tensor/fft/fft.py +112 -0
- maxframe/tensor/fft/fft2.py +118 -0
- maxframe/tensor/fft/fftfreq.py +80 -0
- maxframe/tensor/fft/fftn.py +123 -0
- maxframe/tensor/fft/fftshift.py +79 -0
- maxframe/tensor/fft/hfft.py +112 -0
- maxframe/tensor/fft/ifft.py +114 -0
- maxframe/tensor/fft/ifft2.py +115 -0
- maxframe/tensor/fft/ifftn.py +123 -0
- maxframe/tensor/fft/ifftshift.py +73 -0
- maxframe/tensor/fft/ihfft.py +93 -0
- maxframe/tensor/fft/irfft.py +118 -0
- maxframe/tensor/fft/irfft2.py +62 -0
- maxframe/tensor/fft/irfftn.py +114 -0
- maxframe/tensor/fft/rfft.py +116 -0
- maxframe/tensor/fft/rfft2.py +63 -0
- maxframe/tensor/fft/rfftfreq.py +87 -0
- maxframe/tensor/fft/rfftn.py +113 -0
- maxframe/tensor/indexing/fill_diagonal.py +1 -7
- maxframe/tensor/linalg/__init__.py +7 -0
- maxframe/tensor/linalg/_einsumfunc.py +1025 -0
- maxframe/tensor/linalg/cholesky.py +117 -0
- maxframe/tensor/linalg/einsum.py +339 -0
- maxframe/tensor/linalg/lstsq.py +100 -0
- maxframe/tensor/linalg/matrix_norm.py +75 -0
- maxframe/tensor/linalg/norm.py +249 -0
- maxframe/tensor/linalg/solve.py +72 -0
- maxframe/tensor/linalg/solve_triangular.py +2 -2
- maxframe/tensor/linalg/vector_norm.py +113 -0
- maxframe/tensor/misc/__init__.py +24 -1
- maxframe/tensor/misc/argwhere.py +72 -0
- maxframe/tensor/misc/array_split.py +46 -0
- maxframe/tensor/misc/broadcast_arrays.py +57 -0
- maxframe/tensor/misc/copyto.py +130 -0
- maxframe/tensor/misc/delete.py +104 -0
- maxframe/tensor/misc/dsplit.py +68 -0
- maxframe/tensor/misc/ediff1d.py +74 -0
- maxframe/tensor/misc/expand_dims.py +85 -0
- maxframe/tensor/misc/flip.py +90 -0
- maxframe/tensor/misc/fliplr.py +64 -0
- maxframe/tensor/misc/flipud.py +68 -0
- maxframe/tensor/misc/hsplit.py +85 -0
- maxframe/tensor/misc/insert.py +139 -0
- maxframe/tensor/misc/moveaxis.py +83 -0
- maxframe/tensor/misc/result_type.py +88 -0
- maxframe/tensor/misc/roll.py +124 -0
- maxframe/tensor/misc/rollaxis.py +77 -0
- maxframe/tensor/misc/shape.py +89 -0
- maxframe/tensor/misc/split.py +190 -0
- maxframe/tensor/misc/tile.py +109 -0
- maxframe/tensor/misc/vsplit.py +74 -0
- maxframe/tensor/reduction/array_equal.py +2 -1
- maxframe/tensor/sort/__init__.py +2 -0
- maxframe/tensor/sort/argpartition.py +98 -0
- maxframe/tensor/sort/partition.py +228 -0
- maxframe/tensor/spatial/__init__.py +15 -0
- maxframe/tensor/spatial/distance/__init__.py +17 -0
- maxframe/tensor/spatial/distance/cdist.py +421 -0
- maxframe/tensor/spatial/distance/pdist.py +398 -0
- maxframe/tensor/spatial/distance/squareform.py +153 -0
- maxframe/tensor/special/__init__.py +159 -21
- maxframe/tensor/special/airy.py +55 -0
- maxframe/tensor/special/bessel.py +199 -0
- maxframe/tensor/special/core.py +65 -4
- maxframe/tensor/special/ellip_func_integrals.py +155 -0
- maxframe/tensor/special/ellip_harm.py +55 -0
- maxframe/tensor/special/err_fresnel.py +223 -0
- maxframe/tensor/special/gamma_funcs.py +303 -0
- maxframe/tensor/special/hypergeometric_funcs.py +69 -0
- maxframe/tensor/special/info_theory.py +189 -0
- maxframe/tensor/special/misc.py +21 -0
- maxframe/tensor/statistics/__init__.py +6 -0
- maxframe/tensor/statistics/corrcoef.py +77 -0
- maxframe/tensor/statistics/cov.py +222 -0
- maxframe/tensor/statistics/digitize.py +126 -0
- maxframe/tensor/statistics/histogram.py +520 -0
- maxframe/tensor/statistics/median.py +85 -0
- maxframe/tensor/statistics/ptp.py +89 -0
- maxframe/tensor/utils.py +3 -3
- maxframe/tests/test_udf.py +61 -0
- maxframe/tests/test_utils.py +51 -6
- maxframe/tests/utils.py +0 -2
- maxframe/typing_.py +2 -0
- maxframe/udf.py +130 -9
- maxframe/utils.py +254 -27
- {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/METADATA +3 -3
- {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/RECORD +442 -264
- maxframe_client/fetcher.py +35 -4
- maxframe_client/session/odps.py +7 -2
- maxframe_client/session/task.py +8 -1
- maxframe_client/tests/test_fetcher.py +76 -3
- maxframe_client/tests/test_session.py +28 -1
- maxframe/dataframe/arrays.py +0 -864
- /maxframe/dataframe/{misc → reshape}/melt.py +0 -0
- /maxframe/dataframe/{misc → reshape}/stack.py +0 -0
- {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/WHEEL +0 -0
- {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/top_level.txt +0 -0
maxframe_client/fetcher.py
CHANGED
|
@@ -13,13 +13,16 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
15
|
import sys
|
|
16
|
+
import warnings
|
|
16
17
|
from abc import ABC, abstractmethod
|
|
17
18
|
from numbers import Integral
|
|
18
19
|
from typing import Any, Dict, List, Optional, Tuple, Type, Union
|
|
19
20
|
|
|
21
|
+
import numpy as np
|
|
20
22
|
import pandas as pd
|
|
21
23
|
import pyarrow as pa
|
|
22
24
|
from odps import ODPS
|
|
25
|
+
from odps.errors import ODPSError
|
|
23
26
|
from odps.models import ExternalVolume
|
|
24
27
|
|
|
25
28
|
from maxframe import utils
|
|
@@ -175,6 +178,36 @@ class ODPSTableFetcher(ToThreadMixin, ResultFetcher):
|
|
|
175
178
|
table = self._odps_entry.get_table(table_name)
|
|
176
179
|
return getattr(table, "comment", None)
|
|
177
180
|
|
|
181
|
+
def _get_table_record_num(self, table, part_specs):
|
|
182
|
+
if not part_specs or part_specs == [None]:
|
|
183
|
+
rec_num = getattr(table, "record_num", None)
|
|
184
|
+
else:
|
|
185
|
+
rec_num = 0
|
|
186
|
+
for part_spec in part_specs:
|
|
187
|
+
pt_rec_num = getattr(table.get_partition(part_spec), "record_num", None)
|
|
188
|
+
if pt_rec_num is None or pt_rec_num < 0:
|
|
189
|
+
rec_num = None
|
|
190
|
+
break
|
|
191
|
+
else:
|
|
192
|
+
rec_num += pt_rec_num
|
|
193
|
+
|
|
194
|
+
if rec_num is not None and rec_num >= 0:
|
|
195
|
+
return rec_num
|
|
196
|
+
|
|
197
|
+
try:
|
|
198
|
+
part_sessions = TunnelTableIO.create_download_sessions(
|
|
199
|
+
self._odps_entry, table.full_table_name, part_specs
|
|
200
|
+
)
|
|
201
|
+
return sum(session.count for session in part_sessions.values())
|
|
202
|
+
except ODPSError as ex:
|
|
203
|
+
if "StatusConflict" not in str(ex):
|
|
204
|
+
raise
|
|
205
|
+
warnings.warn(
|
|
206
|
+
f"Failed to obtain record count of table {table.full_table_name}. "
|
|
207
|
+
f"The original error is:\n{ex}"
|
|
208
|
+
)
|
|
209
|
+
return None
|
|
210
|
+
|
|
178
211
|
async def update_tileable_meta(
|
|
179
212
|
self,
|
|
180
213
|
tileable: TileableType,
|
|
@@ -204,10 +237,8 @@ class ODPSTableFetcher(ToThreadMixin, ResultFetcher):
|
|
|
204
237
|
dtypes = odps_schema_to_pandas_dtypes(table.table_schema)
|
|
205
238
|
tileable.refresh_from_dtypes(dtypes)
|
|
206
239
|
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
)
|
|
210
|
-
total_records = sum(session.count for session in part_sessions.values())
|
|
240
|
+
total_records = self._get_table_record_num(table, part_specs)
|
|
241
|
+
total_records = np.nan if total_records is None else total_records
|
|
211
242
|
|
|
212
243
|
new_shape_list = list(tileable.shape)
|
|
213
244
|
new_shape_list[0] = total_records
|
maxframe_client/session/odps.py
CHANGED
|
@@ -91,10 +91,15 @@ logger = logging.getLogger(__name__)
|
|
|
91
91
|
|
|
92
92
|
class MaxFrameServiceCaller(metaclass=abc.ABCMeta):
|
|
93
93
|
def get_settings_to_upload(self) -> Dict[str, Any]:
|
|
94
|
+
odps_entry = getattr(self, "_odps_entry", None)
|
|
95
|
+
entry_quota_name = getattr(odps_entry, "quota_name", None)
|
|
96
|
+
|
|
94
97
|
sql_settings = (odps_options.sql.settings or {}).copy()
|
|
95
98
|
sql_settings.update(options.sql.settings or {})
|
|
96
|
-
quota_name =
|
|
97
|
-
|
|
99
|
+
quota_name = (
|
|
100
|
+
options.session.quota_name
|
|
101
|
+
or entry_quota_name
|
|
102
|
+
or getattr(odps_options, "quota_name", None)
|
|
98
103
|
)
|
|
99
104
|
quota_settings = {
|
|
100
105
|
sql_settings.get("odps.task.wlm.quota", None),
|
maxframe_client/session/task.py
CHANGED
|
@@ -112,7 +112,14 @@ class MaxFrameInstanceCaller(MaxFrameServiceCaller):
|
|
|
112
112
|
return None
|
|
113
113
|
else:
|
|
114
114
|
raise SessionAlreadyClosedError(self._instance.id)
|
|
115
|
-
|
|
115
|
+
|
|
116
|
+
try:
|
|
117
|
+
result_data = base64.b64decode(encoded_result)
|
|
118
|
+
except:
|
|
119
|
+
# todo change to a better logic when it is possible
|
|
120
|
+
# to judge if server side returns success or fail
|
|
121
|
+
raise parse_instance_error(encoded_result)
|
|
122
|
+
|
|
116
123
|
if self._output_format == MAXFRAME_OUTPUT_MAXFRAME_FORMAT:
|
|
117
124
|
return deserialize_serializable(result_data)
|
|
118
125
|
elif self._output_format == MAXFRAME_OUTPUT_JSON_FORMAT:
|
|
@@ -14,11 +14,13 @@
|
|
|
14
14
|
|
|
15
15
|
import uuid
|
|
16
16
|
|
|
17
|
+
import mock
|
|
17
18
|
import numpy as np
|
|
18
19
|
import pandas as pd
|
|
19
20
|
import pyarrow as pa
|
|
20
21
|
import pytest
|
|
21
22
|
from odps import ODPS
|
|
23
|
+
from odps.errors import ODPSError
|
|
22
24
|
|
|
23
25
|
import maxframe.dataframe as md
|
|
24
26
|
from maxframe.config import options
|
|
@@ -40,9 +42,9 @@ def switch_table_io(request):
|
|
|
40
42
|
|
|
41
43
|
|
|
42
44
|
@pytest.mark.parametrize("switch_table_io", [False, True], indirect=True)
|
|
43
|
-
async def
|
|
45
|
+
async def test_fetch_table_data(switch_table_io):
|
|
44
46
|
odps_entry = ODPS.from_environments()
|
|
45
|
-
|
|
47
|
+
table_io = ODPSTableIO(odps_entry)
|
|
46
48
|
fetcher = ODPSTableFetcher(odps_entry)
|
|
47
49
|
|
|
48
50
|
data = pd.DataFrame(
|
|
@@ -60,7 +62,7 @@ async def test_table_fetcher(switch_table_io):
|
|
|
60
62
|
odps_entry.create_table(
|
|
61
63
|
table_name, "_idx_0 bigint, a double, b bigint, c string", lifecycle=1
|
|
62
64
|
)
|
|
63
|
-
with
|
|
65
|
+
with table_io.open_writer(table_name) as writer:
|
|
64
66
|
writer.write(pa.Table.from_pandas(data, preserve_index=False))
|
|
65
67
|
|
|
66
68
|
raw_data = data[list("abc")]
|
|
@@ -105,3 +107,74 @@ async def test_table_fetcher(switch_table_io):
|
|
|
105
107
|
pd.testing.assert_frame_equal(raw_data.iloc[-1:-6:-1, :1], fetched)
|
|
106
108
|
|
|
107
109
|
odps_entry.delete_table(table_name, if_exists=True)
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
async def test_fetch_data_shape():
|
|
113
|
+
odps_entry = ODPS.from_environments()
|
|
114
|
+
table_io = ODPSTableIO(odps_entry)
|
|
115
|
+
fetcher = ODPSTableFetcher(odps_entry)
|
|
116
|
+
|
|
117
|
+
data = pd.DataFrame(
|
|
118
|
+
{
|
|
119
|
+
"_idx_0": np.arange(1000),
|
|
120
|
+
"a": np.random.rand(1000),
|
|
121
|
+
"b": np.random.randint(0, 10, 1000),
|
|
122
|
+
"c": np.random.choice(list("ABC"), 1000),
|
|
123
|
+
}
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
table_names = [
|
|
127
|
+
tn("mf_test_groupby_table_" + str(uuid.uuid4().hex)) for _ in range(3)
|
|
128
|
+
]
|
|
129
|
+
pt_specs = [None, None, ["pt='2020-01-01'"]]
|
|
130
|
+
for tn_to_check in table_names:
|
|
131
|
+
odps_entry.delete_table(tn_to_check, if_exists=True)
|
|
132
|
+
|
|
133
|
+
odps_entry.create_table(
|
|
134
|
+
table_names[0], "_idx_0 bigint, a double, b bigint, c string", lifecycle=1
|
|
135
|
+
)
|
|
136
|
+
odps_entry.create_table(
|
|
137
|
+
table_names[1], "_idx_0 bigint, a double, b bigint, c string", lifecycle=1
|
|
138
|
+
)
|
|
139
|
+
odps_entry.create_table(
|
|
140
|
+
table_names[2],
|
|
141
|
+
("_idx_0 bigint, a double, b bigint, c string", "pt string"),
|
|
142
|
+
lifecycle=1,
|
|
143
|
+
)
|
|
144
|
+
with table_io.open_writer(table_names[0]) as writer:
|
|
145
|
+
writer.write(pa.Table.from_pandas(data, preserve_index=False))
|
|
146
|
+
|
|
147
|
+
odps_entry.execute_sql(
|
|
148
|
+
f"insert overwrite table {table_names[1]} select * from {table_names[0]}"
|
|
149
|
+
)
|
|
150
|
+
odps_entry.execute_sql(
|
|
151
|
+
f"insert overwrite table {table_names[2]} partition ({pt_specs[-1][0]}) "
|
|
152
|
+
f"select * from {table_names[0]}"
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
for tn_to_check, pt_spec in zip(table_names, pt_specs):
|
|
156
|
+
tileable = md.read_pandas(data[list("abc")])
|
|
157
|
+
tileable._shape = (np.nan, 3)
|
|
158
|
+
result_info = ODPSTableResultInfo(
|
|
159
|
+
ResultType.ODPS_TABLE, full_table_name=tn_to_check, partition_specs=pt_spec
|
|
160
|
+
)
|
|
161
|
+
await fetcher.update_tileable_meta(tileable, result_info)
|
|
162
|
+
assert tileable.shape == (1000, 3)
|
|
163
|
+
|
|
164
|
+
def _create_session_with_error(*_, **__):
|
|
165
|
+
raise ODPSError("StatusConflict: mock")
|
|
166
|
+
|
|
167
|
+
with mock.patch(
|
|
168
|
+
"maxframe.io.odpsio.tableio.TunnelTableIO.create_download_sessions",
|
|
169
|
+
new=_create_session_with_error,
|
|
170
|
+
):
|
|
171
|
+
tileable = md.read_pandas(data[list("abc")])
|
|
172
|
+
tileable._shape = (np.nan, 3)
|
|
173
|
+
result_info = ODPSTableResultInfo(
|
|
174
|
+
ResultType.ODPS_TABLE, full_table_name=table_names[0]
|
|
175
|
+
)
|
|
176
|
+
await fetcher.update_tileable_meta(tileable, result_info)
|
|
177
|
+
assert tileable.shape == (np.nan, 3)
|
|
178
|
+
|
|
179
|
+
for tn_to_check in table_names:
|
|
180
|
+
odps_entry.delete_table(tn_to_check, if_exists=True, wait=False)
|
|
@@ -20,6 +20,7 @@ import numpy as np
|
|
|
20
20
|
import pandas as pd
|
|
21
21
|
import pytest
|
|
22
22
|
from odps import ODPS
|
|
23
|
+
from odps import options as odps_options
|
|
23
24
|
|
|
24
25
|
import maxframe.dataframe as md
|
|
25
26
|
import maxframe.remote as mr
|
|
@@ -96,7 +97,10 @@ def test_local_run_dataframe(start_mock_session, enable_local_execution):
|
|
|
96
97
|
cat_df = md.concat([df, df2])
|
|
97
98
|
executed = cat_df.execute()
|
|
98
99
|
result = executed.fetch()
|
|
99
|
-
|
|
100
|
+
try:
|
|
101
|
+
pd.testing.assert_frame_equal(pd.concat([pd_result, pd_df2]), result)
|
|
102
|
+
except AssertionError:
|
|
103
|
+
pd.testing.assert_frame_equal(pd.concat([pd_df2, pd_result]), result)
|
|
100
104
|
|
|
101
105
|
|
|
102
106
|
def test_simple_run_dataframe(start_mock_session):
|
|
@@ -206,6 +210,29 @@ def test_run_empty_table(start_mock_session):
|
|
|
206
210
|
empty_table.drop()
|
|
207
211
|
|
|
208
212
|
|
|
213
|
+
def test_read_table_with_arrow_dtype(start_mock_session):
|
|
214
|
+
if not hasattr(pd, "ArrowDtype"):
|
|
215
|
+
pytest.skip("Need ArrowDtype in pandas to run the test")
|
|
216
|
+
|
|
217
|
+
odps_entry = ODPS.from_environments()
|
|
218
|
+
odps_options.sql.use_odps2_extension = True
|
|
219
|
+
|
|
220
|
+
table_name = tn("test_read_table_with_arrow_dtype")
|
|
221
|
+
odps_entry.delete_table(table_name, if_exists=True)
|
|
222
|
+
test_table = odps_entry.create_table(table_name, "a bigint, b binary", lifecycle=1)
|
|
223
|
+
|
|
224
|
+
with test_table.open_writer() as writer:
|
|
225
|
+
writer.write([123, b"abcd"])
|
|
226
|
+
writer.write([None, b"uvx"])
|
|
227
|
+
writer.write([456, b"asfdl\x11hawl"])
|
|
228
|
+
|
|
229
|
+
df = md.read_odps_table(table_name, dtype_backend="pyarrow")
|
|
230
|
+
executed = df.execute().fetch()
|
|
231
|
+
assert all(isinstance(tp, pd.ArrowDtype) for tp in executed.dtypes)
|
|
232
|
+
assert executed.a.tolist() == [123, pd.NA, 456]
|
|
233
|
+
assert executed.b.tolist() == [b"abcd", b"uvx", b"asfdl\x11hawl"]
|
|
234
|
+
|
|
235
|
+
|
|
209
236
|
def test_run_odps_query_without_schema(start_mock_session):
|
|
210
237
|
odps_entry = ODPS.from_environments()
|
|
211
238
|
|