maxframe 2.0.0b2__cp37-cp37m-win32.whl → 2.3.0rc1__cp37-cp37m-win32.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of maxframe might be problematic. Click here for more details.
- maxframe/__init__.py +1 -0
- maxframe/_utils.cp37-win32.pyd +0 -0
- maxframe/_utils.pyx +14 -1
- maxframe/codegen/core.py +9 -8
- maxframe/codegen/spe/core.py +1 -1
- maxframe/codegen/spe/dataframe/__init__.py +1 -0
- maxframe/codegen/spe/dataframe/accessors/base.py +18 -0
- maxframe/codegen/spe/dataframe/accessors/dict_.py +25 -130
- maxframe/codegen/spe/dataframe/accessors/list_.py +12 -48
- maxframe/codegen/spe/dataframe/accessors/struct_.py +28 -0
- maxframe/codegen/spe/dataframe/arithmetic.py +7 -2
- maxframe/codegen/spe/dataframe/groupby.py +88 -0
- maxframe/codegen/spe/dataframe/indexing.py +99 -4
- maxframe/codegen/spe/dataframe/merge.py +38 -1
- maxframe/codegen/spe/dataframe/misc.py +11 -33
- maxframe/codegen/spe/dataframe/reduction.py +32 -9
- maxframe/codegen/spe/dataframe/reshape.py +46 -0
- maxframe/codegen/spe/dataframe/sort.py +39 -18
- maxframe/codegen/spe/dataframe/tests/accessors/test_dict.py +9 -15
- maxframe/codegen/spe/dataframe/tests/accessors/test_list.py +4 -7
- maxframe/codegen/spe/dataframe/tests/accessors/test_struct.py +75 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_iloc.py +20 -1
- maxframe/codegen/spe/dataframe/tests/indexing/test_loc.py +35 -0
- maxframe/codegen/spe/dataframe/tests/misc/test_misc.py +0 -32
- maxframe/codegen/spe/dataframe/tests/test_groupby.py +81 -18
- maxframe/codegen/spe/dataframe/tests/test_merge.py +27 -1
- maxframe/codegen/spe/dataframe/tests/test_reduction.py +13 -0
- maxframe/codegen/spe/dataframe/tests/test_reshape.py +79 -0
- maxframe/codegen/spe/dataframe/tests/test_sort.py +20 -0
- maxframe/codegen/spe/dataframe/tseries.py +9 -0
- maxframe/codegen/spe/learn/contrib/lightgbm.py +4 -3
- maxframe/codegen/spe/learn/contrib/tests/test_xgboost.py +2 -1
- maxframe/codegen/spe/learn/metrics/__init__.py +1 -1
- maxframe/codegen/spe/learn/metrics/_ranking.py +76 -0
- maxframe/codegen/spe/learn/metrics/pairwise.py +51 -0
- maxframe/codegen/spe/learn/metrics/tests/test_pairwise.py +36 -0
- maxframe/codegen/spe/learn/metrics/tests/test_ranking.py +59 -0
- maxframe/codegen/spe/tensor/__init__.py +3 -0
- maxframe/codegen/spe/tensor/datasource.py +1 -0
- maxframe/codegen/spe/tensor/fft.py +74 -0
- maxframe/codegen/spe/tensor/linalg.py +29 -2
- maxframe/codegen/spe/tensor/misc.py +79 -25
- maxframe/codegen/spe/tensor/spatial.py +45 -0
- maxframe/codegen/spe/tensor/statistics.py +44 -0
- maxframe/codegen/spe/tensor/tests/test_fft.py +64 -0
- maxframe/codegen/spe/tensor/tests/test_linalg.py +15 -1
- maxframe/codegen/spe/tensor/tests/test_misc.py +52 -2
- maxframe/codegen/spe/tensor/tests/test_spatial.py +33 -0
- maxframe/codegen/spe/tensor/tests/test_statistics.py +15 -1
- maxframe/codegen/spe/tests/test_spe_codegen.py +6 -12
- maxframe/codegen/spe/utils.py +2 -0
- maxframe/config/config.py +73 -9
- maxframe/config/tests/test_validators.py +13 -1
- maxframe/config/validators.py +49 -0
- maxframe/conftest.py +54 -17
- maxframe/core/accessor.py +2 -2
- maxframe/core/base.py +2 -1
- maxframe/core/entity/core.py +5 -0
- maxframe/core/entity/tileables.py +3 -1
- maxframe/core/graph/core.cp37-win32.pyd +0 -0
- maxframe/core/graph/entity.py +8 -3
- maxframe/core/mode.py +6 -1
- maxframe/core/operator/base.py +9 -2
- maxframe/core/operator/core.py +10 -2
- maxframe/core/operator/utils.py +13 -0
- maxframe/dataframe/__init__.py +12 -5
- maxframe/dataframe/accessors/__init__.py +1 -1
- maxframe/dataframe/accessors/compat.py +45 -0
- maxframe/dataframe/accessors/datetime_/__init__.py +4 -1
- maxframe/dataframe/accessors/dict_/contains.py +7 -16
- maxframe/dataframe/accessors/dict_/core.py +48 -0
- maxframe/dataframe/accessors/dict_/getitem.py +17 -21
- maxframe/dataframe/accessors/dict_/length.py +7 -16
- maxframe/dataframe/accessors/dict_/remove.py +6 -18
- maxframe/dataframe/accessors/dict_/setitem.py +8 -18
- maxframe/dataframe/accessors/dict_/tests/test_dict_accessor.py +62 -22
- maxframe/dataframe/accessors/list_/__init__.py +2 -2
- maxframe/dataframe/accessors/list_/core.py +48 -0
- maxframe/dataframe/accessors/list_/getitem.py +12 -19
- maxframe/dataframe/accessors/list_/length.py +7 -16
- maxframe/dataframe/accessors/list_/tests/test_list_accessor.py +11 -9
- maxframe/dataframe/accessors/string_/__init__.py +4 -1
- maxframe/dataframe/accessors/struct_/__init__.py +37 -0
- maxframe/dataframe/accessors/struct_/accessor.py +39 -0
- maxframe/dataframe/accessors/struct_/core.py +43 -0
- maxframe/dataframe/accessors/struct_/dtypes.py +53 -0
- maxframe/dataframe/accessors/struct_/field.py +123 -0
- maxframe/dataframe/accessors/struct_/tests/__init__.py +13 -0
- maxframe/dataframe/accessors/struct_/tests/test_struct_accessor.py +91 -0
- maxframe/dataframe/arithmetic/__init__.py +18 -4
- maxframe/dataframe/arithmetic/between.py +106 -0
- maxframe/dataframe/arithmetic/dot.py +237 -0
- maxframe/dataframe/arithmetic/maximum.py +33 -0
- maxframe/dataframe/arithmetic/minimum.py +33 -0
- maxframe/dataframe/arithmetic/{around.py → round.py} +11 -7
- maxframe/dataframe/core.py +161 -224
- maxframe/dataframe/datasource/__init__.py +18 -0
- maxframe/dataframe/datasource/core.py +6 -0
- maxframe/dataframe/datasource/direct.py +57 -0
- maxframe/dataframe/datasource/from_dict.py +124 -0
- maxframe/dataframe/datasource/from_index.py +1 -1
- maxframe/dataframe/datasource/from_records.py +77 -0
- maxframe/dataframe/datasource/from_tensor.py +109 -41
- maxframe/dataframe/datasource/read_csv.py +21 -14
- maxframe/dataframe/datasource/read_odps_query.py +29 -6
- maxframe/dataframe/datasource/read_odps_table.py +32 -10
- maxframe/dataframe/datasource/read_parquet.py +38 -39
- maxframe/dataframe/datasource/tests/test_datasource.py +37 -0
- maxframe/dataframe/datastore/__init__.py +11 -1
- maxframe/dataframe/datastore/direct.py +268 -0
- maxframe/dataframe/datastore/to_csv.py +29 -41
- maxframe/dataframe/datastore/to_odps.py +36 -4
- maxframe/dataframe/extensions/__init__.py +20 -4
- maxframe/dataframe/extensions/apply_chunk.py +32 -6
- maxframe/dataframe/extensions/cartesian_chunk.py +153 -0
- maxframe/dataframe/extensions/collect_kv.py +126 -0
- maxframe/dataframe/extensions/extract_kv.py +177 -0
- maxframe/dataframe/extensions/flatjson.py +2 -1
- maxframe/dataframe/extensions/map_reduce.py +263 -0
- maxframe/dataframe/extensions/rebalance.py +62 -0
- maxframe/dataframe/extensions/tests/test_apply_chunk.py +9 -2
- maxframe/dataframe/extensions/tests/test_extensions.py +54 -0
- maxframe/dataframe/extensions/tests/test_map_reduce.py +135 -0
- maxframe/dataframe/groupby/__init__.py +17 -2
- maxframe/dataframe/groupby/aggregation.py +86 -49
- maxframe/dataframe/groupby/apply.py +1 -1
- maxframe/dataframe/groupby/apply_chunk.py +19 -5
- maxframe/dataframe/groupby/core.py +116 -16
- maxframe/dataframe/groupby/cum.py +4 -25
- maxframe/dataframe/groupby/expanding.py +264 -0
- maxframe/dataframe/groupby/fill.py +1 -1
- maxframe/dataframe/groupby/getitem.py +12 -5
- maxframe/dataframe/groupby/head.py +11 -1
- maxframe/dataframe/groupby/rank.py +136 -0
- maxframe/dataframe/groupby/rolling.py +206 -0
- maxframe/dataframe/groupby/shift.py +114 -0
- maxframe/dataframe/groupby/tests/test_groupby.py +0 -5
- maxframe/dataframe/indexing/__init__.py +22 -2
- maxframe/dataframe/indexing/droplevel.py +195 -0
- maxframe/dataframe/indexing/filter.py +169 -0
- maxframe/dataframe/indexing/get_level_values.py +76 -0
- maxframe/dataframe/indexing/iat.py +45 -0
- maxframe/dataframe/indexing/iloc.py +152 -12
- maxframe/dataframe/indexing/insert.py +46 -18
- maxframe/dataframe/indexing/loc.py +287 -7
- maxframe/dataframe/indexing/reindex.py +14 -5
- maxframe/dataframe/indexing/rename.py +6 -0
- maxframe/dataframe/indexing/rename_axis.py +2 -2
- maxframe/dataframe/indexing/reorder_levels.py +143 -0
- maxframe/dataframe/indexing/reset_index.py +33 -6
- maxframe/dataframe/indexing/sample.py +8 -0
- maxframe/dataframe/indexing/setitem.py +3 -3
- maxframe/dataframe/indexing/swaplevel.py +185 -0
- maxframe/dataframe/indexing/take.py +99 -0
- maxframe/dataframe/indexing/truncate.py +140 -0
- maxframe/dataframe/indexing/where.py +0 -11
- maxframe/dataframe/indexing/xs.py +148 -0
- maxframe/dataframe/merge/__init__.py +15 -1
- maxframe/dataframe/merge/append.py +97 -98
- maxframe/dataframe/merge/combine.py +244 -0
- maxframe/dataframe/merge/combine_first.py +120 -0
- maxframe/dataframe/merge/compare.py +387 -0
- maxframe/dataframe/merge/concat.py +183 -0
- maxframe/dataframe/merge/update.py +271 -0
- maxframe/dataframe/misc/__init__.py +28 -11
- maxframe/dataframe/misc/_duplicate.py +10 -4
- maxframe/dataframe/misc/apply.py +1 -1
- maxframe/dataframe/misc/check_unique.py +82 -0
- maxframe/dataframe/misc/clip.py +145 -0
- maxframe/dataframe/misc/describe.py +175 -9
- maxframe/dataframe/misc/drop.py +31 -0
- maxframe/dataframe/misc/drop_duplicates.py +2 -2
- maxframe/dataframe/misc/duplicated.py +2 -2
- maxframe/dataframe/misc/get_dummies.py +5 -1
- maxframe/dataframe/misc/infer_dtypes.py +251 -0
- maxframe/dataframe/misc/isin.py +2 -2
- maxframe/dataframe/misc/map.py +125 -18
- maxframe/dataframe/misc/repeat.py +159 -0
- maxframe/dataframe/misc/tests/test_misc.py +48 -3
- maxframe/dataframe/misc/to_numeric.py +3 -0
- maxframe/dataframe/misc/transform.py +12 -5
- maxframe/dataframe/misc/transpose.py +13 -1
- maxframe/dataframe/misc/valid_index.py +115 -0
- maxframe/dataframe/misc/value_counts.py +38 -4
- maxframe/dataframe/missing/checkna.py +14 -6
- maxframe/dataframe/missing/dropna.py +5 -0
- maxframe/dataframe/missing/fillna.py +1 -1
- maxframe/dataframe/missing/replace.py +7 -4
- maxframe/dataframe/reduction/__init__.py +35 -16
- maxframe/dataframe/reduction/aggregation.py +43 -14
- maxframe/dataframe/reduction/all.py +2 -2
- maxframe/dataframe/reduction/any.py +2 -2
- maxframe/dataframe/reduction/argmax.py +103 -0
- maxframe/dataframe/reduction/argmin.py +103 -0
- maxframe/dataframe/reduction/core.py +80 -24
- maxframe/dataframe/reduction/count.py +13 -9
- maxframe/dataframe/reduction/cov.py +166 -0
- maxframe/dataframe/reduction/cummax.py +2 -2
- maxframe/dataframe/reduction/cummin.py +2 -2
- maxframe/dataframe/reduction/cumprod.py +2 -2
- maxframe/dataframe/reduction/cumsum.py +2 -2
- maxframe/dataframe/reduction/custom_reduction.py +2 -2
- maxframe/dataframe/reduction/idxmax.py +185 -0
- maxframe/dataframe/reduction/idxmin.py +185 -0
- maxframe/dataframe/reduction/kurtosis.py +37 -30
- maxframe/dataframe/reduction/max.py +2 -2
- maxframe/dataframe/reduction/mean.py +9 -7
- maxframe/dataframe/reduction/median.py +2 -2
- maxframe/dataframe/reduction/min.py +2 -2
- maxframe/dataframe/reduction/mode.py +144 -0
- maxframe/dataframe/reduction/nunique.py +19 -11
- maxframe/dataframe/reduction/prod.py +18 -13
- maxframe/dataframe/reduction/reduction_size.py +2 -2
- maxframe/dataframe/reduction/sem.py +13 -9
- maxframe/dataframe/reduction/skew.py +31 -27
- maxframe/dataframe/reduction/str_concat.py +10 -7
- maxframe/dataframe/reduction/sum.py +18 -14
- maxframe/dataframe/reduction/tests/test_reduction.py +12 -0
- maxframe/dataframe/reduction/unique.py +20 -3
- maxframe/dataframe/reduction/var.py +16 -12
- maxframe/dataframe/reshape/__init__.py +38 -0
- maxframe/dataframe/{misc → reshape}/pivot.py +1 -0
- maxframe/dataframe/{misc → reshape}/pivot_table.py +1 -0
- maxframe/dataframe/reshape/unstack.py +114 -0
- maxframe/dataframe/sort/__init__.py +16 -1
- maxframe/dataframe/sort/argsort.py +68 -0
- maxframe/dataframe/sort/core.py +2 -1
- maxframe/dataframe/sort/nlargest.py +238 -0
- maxframe/dataframe/sort/nsmallest.py +228 -0
- maxframe/dataframe/sort/rank.py +147 -0
- maxframe/dataframe/statistics/__init__.py +3 -3
- maxframe/dataframe/statistics/corr.py +1 -0
- maxframe/dataframe/statistics/quantile.py +2 -2
- maxframe/dataframe/tests/test_typing.py +104 -0
- maxframe/dataframe/tests/test_utils.py +66 -2
- maxframe/dataframe/tseries/__init__.py +19 -0
- maxframe/dataframe/tseries/at_time.py +61 -0
- maxframe/dataframe/tseries/between_time.py +122 -0
- maxframe/dataframe/typing_.py +185 -0
- maxframe/dataframe/utils.py +125 -52
- maxframe/dataframe/window/aggregation.py +8 -4
- maxframe/dataframe/window/core.py +14 -1
- maxframe/dataframe/window/ewm.py +1 -3
- maxframe/dataframe/window/expanding.py +37 -35
- maxframe/dataframe/window/rolling.py +49 -39
- maxframe/dataframe/window/tests/test_expanding.py +1 -7
- maxframe/dataframe/window/tests/test_rolling.py +1 -1
- maxframe/env.py +7 -4
- maxframe/errors.py +2 -2
- maxframe/io/odpsio/schema.py +9 -3
- maxframe/io/odpsio/tableio.py +7 -2
- maxframe/io/odpsio/tests/test_schema.py +198 -83
- maxframe/learn/__init__.py +10 -2
- maxframe/learn/cluster/__init__.py +15 -0
- maxframe/learn/cluster/_kmeans.py +782 -0
- maxframe/learn/contrib/llm/core.py +18 -7
- maxframe/learn/contrib/llm/deploy/__init__.py +13 -0
- maxframe/learn/contrib/llm/deploy/config.py +221 -0
- maxframe/learn/contrib/llm/deploy/core.py +247 -0
- maxframe/learn/contrib/llm/deploy/framework.py +35 -0
- maxframe/learn/contrib/llm/deploy/loader.py +360 -0
- maxframe/learn/contrib/llm/deploy/tests/__init__.py +13 -0
- maxframe/learn/contrib/llm/deploy/tests/test_register_models.py +359 -0
- maxframe/learn/contrib/llm/models/__init__.py +1 -0
- maxframe/learn/contrib/llm/models/dashscope.py +12 -6
- maxframe/learn/contrib/llm/models/managed.py +76 -11
- maxframe/learn/contrib/llm/models/openai.py +72 -0
- maxframe/learn/contrib/llm/tests/__init__.py +13 -0
- maxframe/learn/contrib/llm/tests/test_core.py +34 -0
- maxframe/learn/contrib/llm/tests/test_openai.py +187 -0
- maxframe/learn/contrib/llm/tests/test_text_gen.py +155 -0
- maxframe/learn/contrib/llm/text.py +348 -42
- maxframe/learn/contrib/models.py +4 -1
- maxframe/learn/contrib/xgboost/classifier.py +2 -0
- maxframe/learn/contrib/xgboost/core.py +113 -4
- maxframe/learn/contrib/xgboost/predict.py +4 -2
- maxframe/learn/contrib/xgboost/regressor.py +5 -0
- maxframe/learn/contrib/xgboost/train.py +7 -2
- maxframe/learn/core.py +66 -0
- maxframe/learn/linear_model/_base.py +58 -1
- maxframe/learn/linear_model/_lin_reg.py +1 -1
- maxframe/learn/metrics/__init__.py +6 -0
- maxframe/learn/metrics/_classification.py +145 -0
- maxframe/learn/metrics/_ranking.py +477 -0
- maxframe/learn/metrics/_scorer.py +60 -0
- maxframe/learn/metrics/pairwise/__init__.py +21 -0
- maxframe/learn/metrics/pairwise/core.py +77 -0
- maxframe/learn/metrics/pairwise/cosine.py +115 -0
- maxframe/learn/metrics/pairwise/euclidean.py +176 -0
- maxframe/learn/metrics/pairwise/haversine.py +96 -0
- maxframe/learn/metrics/pairwise/manhattan.py +80 -0
- maxframe/learn/metrics/pairwise/pairwise.py +127 -0
- maxframe/learn/metrics/pairwise/pairwise_distances_topk.py +121 -0
- maxframe/learn/metrics/pairwise/rbf_kernel.py +51 -0
- maxframe/learn/metrics/tests/__init__.py +13 -0
- maxframe/learn/metrics/tests/test_scorer.py +26 -0
- maxframe/learn/preprocessing/_data/min_max_scaler.py +34 -23
- maxframe/learn/preprocessing/_data/standard_scaler.py +34 -25
- maxframe/learn/utils/__init__.py +2 -1
- maxframe/learn/utils/checks.py +1 -2
- maxframe/learn/utils/core.py +59 -0
- maxframe/learn/utils/extmath.py +79 -9
- maxframe/learn/utils/odpsio.py +262 -0
- maxframe/learn/utils/validation.py +2 -2
- maxframe/lib/compat.py +40 -0
- maxframe/lib/dtypes_extension/__init__.py +16 -1
- maxframe/lib/dtypes_extension/_fake_arrow_dtype.py +604 -0
- maxframe/lib/dtypes_extension/blob.py +304 -0
- maxframe/lib/dtypes_extension/dtypes.py +40 -0
- maxframe/lib/dtypes_extension/tests/test_blob.py +88 -0
- maxframe/lib/dtypes_extension/tests/test_dtypes.py +16 -1
- maxframe/lib/dtypes_extension/tests/test_fake_arrow_dtype.py +75 -0
- maxframe/lib/filesystem/_oss_lib/common.py +124 -50
- maxframe/lib/filesystem/_oss_lib/glob.py +1 -1
- maxframe/lib/filesystem/_oss_lib/handle.py +21 -25
- maxframe/lib/filesystem/base.py +1 -1
- maxframe/lib/filesystem/core.py +1 -1
- maxframe/lib/filesystem/oss.py +115 -46
- maxframe/lib/filesystem/tests/test_oss.py +74 -36
- maxframe/lib/mmh3.cp37-win32.pyd +0 -0
- maxframe/lib/wrapped_pickle.py +10 -0
- maxframe/opcodes.py +41 -15
- maxframe/protocol.py +12 -0
- maxframe/remote/core.py +4 -0
- maxframe/serialization/__init__.py +11 -2
- maxframe/serialization/arrow.py +38 -13
- maxframe/serialization/blob.py +32 -0
- maxframe/serialization/core.cp37-win32.pyd +0 -0
- maxframe/serialization/core.pyx +39 -1
- maxframe/serialization/exception.py +2 -4
- maxframe/serialization/numpy.py +11 -0
- maxframe/serialization/pandas.py +46 -9
- maxframe/serialization/serializables/core.py +2 -2
- maxframe/serialization/tests/test_serial.py +31 -4
- maxframe/tensor/__init__.py +38 -8
- maxframe/tensor/arithmetic/__init__.py +19 -10
- maxframe/tensor/arithmetic/core.py +2 -2
- maxframe/tensor/arithmetic/iscomplexobj.py +53 -0
- maxframe/tensor/arithmetic/tests/test_arithmetic.py +6 -9
- maxframe/tensor/core.py +6 -2
- maxframe/tensor/datasource/tests/test_datasource.py +2 -1
- maxframe/tensor/extensions/__init__.py +2 -0
- maxframe/tensor/extensions/apply_chunk.py +3 -3
- maxframe/tensor/extensions/rebalance.py +65 -0
- maxframe/tensor/fft/__init__.py +32 -0
- maxframe/tensor/fft/core.py +168 -0
- maxframe/tensor/fft/fft.py +112 -0
- maxframe/tensor/fft/fft2.py +118 -0
- maxframe/tensor/fft/fftfreq.py +80 -0
- maxframe/tensor/fft/fftn.py +123 -0
- maxframe/tensor/fft/fftshift.py +79 -0
- maxframe/tensor/fft/hfft.py +112 -0
- maxframe/tensor/fft/ifft.py +114 -0
- maxframe/tensor/fft/ifft2.py +115 -0
- maxframe/tensor/fft/ifftn.py +123 -0
- maxframe/tensor/fft/ifftshift.py +73 -0
- maxframe/tensor/fft/ihfft.py +93 -0
- maxframe/tensor/fft/irfft.py +118 -0
- maxframe/tensor/fft/irfft2.py +62 -0
- maxframe/tensor/fft/irfftn.py +114 -0
- maxframe/tensor/fft/rfft.py +116 -0
- maxframe/tensor/fft/rfft2.py +63 -0
- maxframe/tensor/fft/rfftfreq.py +87 -0
- maxframe/tensor/fft/rfftn.py +113 -0
- maxframe/tensor/indexing/fill_diagonal.py +1 -7
- maxframe/tensor/linalg/__init__.py +7 -0
- maxframe/tensor/linalg/_einsumfunc.py +1025 -0
- maxframe/tensor/linalg/cholesky.py +117 -0
- maxframe/tensor/linalg/einsum.py +339 -0
- maxframe/tensor/linalg/lstsq.py +100 -0
- maxframe/tensor/linalg/matrix_norm.py +75 -0
- maxframe/tensor/linalg/norm.py +249 -0
- maxframe/tensor/linalg/solve.py +72 -0
- maxframe/tensor/linalg/solve_triangular.py +2 -2
- maxframe/tensor/linalg/vector_norm.py +113 -0
- maxframe/tensor/misc/__init__.py +24 -1
- maxframe/tensor/misc/argwhere.py +72 -0
- maxframe/tensor/misc/array_split.py +46 -0
- maxframe/tensor/misc/broadcast_arrays.py +57 -0
- maxframe/tensor/misc/copyto.py +130 -0
- maxframe/tensor/misc/delete.py +104 -0
- maxframe/tensor/misc/dsplit.py +68 -0
- maxframe/tensor/misc/ediff1d.py +74 -0
- maxframe/tensor/misc/expand_dims.py +85 -0
- maxframe/tensor/misc/flip.py +90 -0
- maxframe/tensor/misc/fliplr.py +64 -0
- maxframe/tensor/misc/flipud.py +68 -0
- maxframe/tensor/misc/hsplit.py +85 -0
- maxframe/tensor/misc/insert.py +139 -0
- maxframe/tensor/misc/moveaxis.py +83 -0
- maxframe/tensor/misc/result_type.py +88 -0
- maxframe/tensor/misc/roll.py +124 -0
- maxframe/tensor/misc/rollaxis.py +77 -0
- maxframe/tensor/misc/shape.py +89 -0
- maxframe/tensor/misc/split.py +190 -0
- maxframe/tensor/misc/tile.py +109 -0
- maxframe/tensor/misc/vsplit.py +74 -0
- maxframe/tensor/reduction/array_equal.py +2 -1
- maxframe/tensor/sort/__init__.py +2 -0
- maxframe/tensor/sort/argpartition.py +98 -0
- maxframe/tensor/sort/partition.py +228 -0
- maxframe/tensor/spatial/__init__.py +15 -0
- maxframe/tensor/spatial/distance/__init__.py +17 -0
- maxframe/tensor/spatial/distance/cdist.py +421 -0
- maxframe/tensor/spatial/distance/pdist.py +398 -0
- maxframe/tensor/spatial/distance/squareform.py +153 -0
- maxframe/tensor/special/__init__.py +159 -21
- maxframe/tensor/special/airy.py +55 -0
- maxframe/tensor/special/bessel.py +199 -0
- maxframe/tensor/special/core.py +65 -4
- maxframe/tensor/special/ellip_func_integrals.py +155 -0
- maxframe/tensor/special/ellip_harm.py +55 -0
- maxframe/tensor/special/err_fresnel.py +223 -0
- maxframe/tensor/special/gamma_funcs.py +303 -0
- maxframe/tensor/special/hypergeometric_funcs.py +69 -0
- maxframe/tensor/special/info_theory.py +189 -0
- maxframe/tensor/special/misc.py +21 -0
- maxframe/tensor/statistics/__init__.py +6 -0
- maxframe/tensor/statistics/corrcoef.py +77 -0
- maxframe/tensor/statistics/cov.py +222 -0
- maxframe/tensor/statistics/digitize.py +126 -0
- maxframe/tensor/statistics/histogram.py +520 -0
- maxframe/tensor/statistics/median.py +85 -0
- maxframe/tensor/statistics/ptp.py +89 -0
- maxframe/tensor/utils.py +3 -3
- maxframe/tests/test_udf.py +61 -0
- maxframe/tests/test_utils.py +51 -6
- maxframe/tests/utils.py +0 -2
- maxframe/typing_.py +2 -0
- maxframe/udf.py +130 -9
- maxframe/utils.py +254 -27
- {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/METADATA +3 -3
- {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/RECORD +442 -264
- maxframe_client/fetcher.py +35 -4
- maxframe_client/session/odps.py +7 -2
- maxframe_client/session/task.py +8 -1
- maxframe_client/tests/test_fetcher.py +76 -3
- maxframe_client/tests/test_session.py +28 -1
- maxframe/dataframe/arrays.py +0 -864
- /maxframe/dataframe/{misc → reshape}/melt.py +0 -0
- /maxframe/dataframe/{misc → reshape}/stack.py +0 -0
- {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/WHEEL +0 -0
- {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,169 @@
|
|
|
1
|
+
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
import re
|
|
16
|
+
|
|
17
|
+
import numpy as np
|
|
18
|
+
import pandas as pd
|
|
19
|
+
|
|
20
|
+
from ... import opcodes
|
|
21
|
+
from ...core import get_output_types
|
|
22
|
+
from ...serialization.serializables import Int32Field, ListField, StringField
|
|
23
|
+
from ..operators import DataFrameOperator, DataFrameOperatorMixin
|
|
24
|
+
from ..utils import parse_index
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class DataFrameFilter(DataFrameOperatorMixin, DataFrameOperator):
|
|
28
|
+
_op_type_ = opcodes.DATAFRAME_FILTER
|
|
29
|
+
|
|
30
|
+
items = ListField("items", default=None)
|
|
31
|
+
like = StringField("like", default=None)
|
|
32
|
+
regex = StringField("regex", default=None)
|
|
33
|
+
axis = Int32Field("axis", default=None)
|
|
34
|
+
|
|
35
|
+
def __call__(self, df_or_series):
|
|
36
|
+
self._output_types = get_output_types(df_or_series)
|
|
37
|
+
|
|
38
|
+
# Get axis labels to filter
|
|
39
|
+
if self.axis == 0:
|
|
40
|
+
# Filter by index
|
|
41
|
+
labels = df_or_series.index_value.to_pandas()
|
|
42
|
+
else:
|
|
43
|
+
# Filter by columns (DataFrame only)
|
|
44
|
+
if not hasattr(df_or_series, "columns"):
|
|
45
|
+
raise ValueError("axis=1 (columns) not valid for Series")
|
|
46
|
+
labels = df_or_series.columns_value.to_pandas()
|
|
47
|
+
|
|
48
|
+
# Apply filter criteria
|
|
49
|
+
filtered_labels = self._apply_filter_criteria(labels)
|
|
50
|
+
|
|
51
|
+
# Calculate output shape and metadata
|
|
52
|
+
out_params = self._calculate_output_metadata(df_or_series, filtered_labels)
|
|
53
|
+
return self.new_tileable([df_or_series], **out_params)
|
|
54
|
+
|
|
55
|
+
def _apply_filter_criteria(self, labels):
|
|
56
|
+
"""Apply filter criteria to labels"""
|
|
57
|
+
if self.items is not None:
|
|
58
|
+
# Exact match filter
|
|
59
|
+
return [label for label in labels if label in self.items]
|
|
60
|
+
elif self.like is not None:
|
|
61
|
+
# Substring match filter
|
|
62
|
+
return [label for label in labels if self.like in str(label)]
|
|
63
|
+
elif self.regex is not None:
|
|
64
|
+
# Regex match filter
|
|
65
|
+
pattern = re.compile(self.regex)
|
|
66
|
+
return [label for label in labels if pattern.search(str(label))]
|
|
67
|
+
else:
|
|
68
|
+
return list(labels)
|
|
69
|
+
|
|
70
|
+
def _calculate_output_metadata(self, input_tileable, filtered_labels):
|
|
71
|
+
input_shape = input_tileable.shape
|
|
72
|
+
|
|
73
|
+
out_params = input_tileable.params
|
|
74
|
+
if self.axis == 0:
|
|
75
|
+
out_params["shape"] = (len(filtered_labels) or np.nan,) + input_shape[1:]
|
|
76
|
+
out_params["index_value"] = parse_index(
|
|
77
|
+
pd.Index(filtered_labels), input_tileable.index_value
|
|
78
|
+
)
|
|
79
|
+
else:
|
|
80
|
+
out_params["shape"] = (input_shape[0], len(filtered_labels))
|
|
81
|
+
out_params["columns_value"] = parse_index(
|
|
82
|
+
input_tileable.dtypes[filtered_labels].index, store_data=True
|
|
83
|
+
)
|
|
84
|
+
return out_params
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def filter_dataframe(df_or_series, items=None, like=None, regex=None, axis=None):
|
|
88
|
+
"""
|
|
89
|
+
Subset the dataframe rows or columns according to the specified index labels.
|
|
90
|
+
|
|
91
|
+
Note that this routine does not filter a dataframe on its
|
|
92
|
+
contents. The filter is applied to the labels of the index.
|
|
93
|
+
|
|
94
|
+
Parameters
|
|
95
|
+
----------
|
|
96
|
+
items : list-like
|
|
97
|
+
Keep labels from axis which are in items.
|
|
98
|
+
like : str
|
|
99
|
+
Keep labels from axis for which "like in label == True".
|
|
100
|
+
regex : str (regular expression)
|
|
101
|
+
Keep labels from axis for which re.search(regex, label) == True.
|
|
102
|
+
axis : {0 or 'index', 1 or 'columns', None}, default None
|
|
103
|
+
The axis to filter on, expressed either as an index (int)
|
|
104
|
+
or axis name (str). By default this is the info axis, 'columns' for
|
|
105
|
+
DataFrame. For `Series` this parameter is unused and defaults to `None`.
|
|
106
|
+
|
|
107
|
+
Returns
|
|
108
|
+
-------
|
|
109
|
+
same type as input object
|
|
110
|
+
|
|
111
|
+
See Also
|
|
112
|
+
--------
|
|
113
|
+
DataFrame.loc : Access a group of rows and columns
|
|
114
|
+
by label(s) or a boolean array.
|
|
115
|
+
|
|
116
|
+
Notes
|
|
117
|
+
-----
|
|
118
|
+
The ``items``, ``like``, and ``regex`` parameters are
|
|
119
|
+
enforced to be mutually exclusive.
|
|
120
|
+
|
|
121
|
+
``axis`` defaults to the info axis that is used when indexing
|
|
122
|
+
with ``[]``.
|
|
123
|
+
|
|
124
|
+
Examples
|
|
125
|
+
--------
|
|
126
|
+
>>> import maxframe.tensor as mt
|
|
127
|
+
>>> import maxframe.dataframe as md
|
|
128
|
+
>>> df = md.DataFrame(mt.array(([1, 2, 3], [4, 5, 6])),
|
|
129
|
+
... index=['mouse', 'rabbit'],
|
|
130
|
+
... columns=['one', 'two', 'three'])
|
|
131
|
+
>>> df.execute()
|
|
132
|
+
one two three
|
|
133
|
+
mouse 1 2 3
|
|
134
|
+
rabbit 4 5 6
|
|
135
|
+
|
|
136
|
+
>>> # select columns by name
|
|
137
|
+
>>> df.filter(items=['one', 'three']).execute()
|
|
138
|
+
one three
|
|
139
|
+
mouse 1 3
|
|
140
|
+
rabbit 4 6
|
|
141
|
+
|
|
142
|
+
>>> # select columns by regular expression
|
|
143
|
+
>>> df.filter(regex='e$', axis=1).execute()
|
|
144
|
+
one three
|
|
145
|
+
mouse 1 3
|
|
146
|
+
rabbit 4 6
|
|
147
|
+
|
|
148
|
+
>>> # select rows containing 'bbi'
|
|
149
|
+
>>> df.filter(like='bbi', axis=0).execute()
|
|
150
|
+
one two three
|
|
151
|
+
rabbit 4 5 6
|
|
152
|
+
"""
|
|
153
|
+
if axis is None:
|
|
154
|
+
# For Series, axis is always 0 (index)
|
|
155
|
+
# For DataFrame, default is 1 (columns)
|
|
156
|
+
if hasattr(df_or_series, "columns"):
|
|
157
|
+
axis = 1 # DataFrame - filter columns by default
|
|
158
|
+
else:
|
|
159
|
+
axis = 0 # Series - filter index
|
|
160
|
+
|
|
161
|
+
param_count = sum(x is not None for x in [items, like, regex])
|
|
162
|
+
if param_count == 0:
|
|
163
|
+
raise TypeError("Must pass either `items`, `like`, or `regex`")
|
|
164
|
+
if param_count > 1:
|
|
165
|
+
raise TypeError(
|
|
166
|
+
"keyword arguments `items`, `like`, `regex` are mutually exclusive"
|
|
167
|
+
)
|
|
168
|
+
op = DataFrameFilter(items=items, like=like, regex=regex, axis=axis)
|
|
169
|
+
return op(df_or_series)
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from ... import opcodes
|
|
16
|
+
from ...serialization.serializables import AnyField
|
|
17
|
+
from ..operators import DataFrameOperator, DataFrameOperatorMixin
|
|
18
|
+
from ..utils import parse_index
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class IndexGetLevelValues(DataFrameOperator, DataFrameOperatorMixin):
|
|
22
|
+
_op_type_ = opcodes.GET_LEVEL_VALUES
|
|
23
|
+
|
|
24
|
+
level = AnyField("level")
|
|
25
|
+
|
|
26
|
+
def __init__(self, output_types=None, **kw):
|
|
27
|
+
super().__init__(_output_types=output_types, **kw)
|
|
28
|
+
|
|
29
|
+
def __call__(self, index):
|
|
30
|
+
empty_index = index.index_value.to_pandas()
|
|
31
|
+
result_index = empty_index.get_level_values(self.level)
|
|
32
|
+
|
|
33
|
+
return self.new_index(
|
|
34
|
+
[index],
|
|
35
|
+
shape=(index.shape[0],),
|
|
36
|
+
dtype=result_index.dtype,
|
|
37
|
+
index_value=parse_index(result_index, store_data=False),
|
|
38
|
+
names=result_index.names,
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def get_level_values(index, level):
|
|
43
|
+
"""
|
|
44
|
+
Return vector of label values for requested level.
|
|
45
|
+
|
|
46
|
+
Length of returned vector is equal to the length of the index.
|
|
47
|
+
|
|
48
|
+
Parameters
|
|
49
|
+
----------
|
|
50
|
+
level : int or str
|
|
51
|
+
``level`` is either the integer position of the level in the
|
|
52
|
+
MultiIndex, or the name of the level.
|
|
53
|
+
|
|
54
|
+
Returns
|
|
55
|
+
-------
|
|
56
|
+
values : Index
|
|
57
|
+
Values is a level of this MultiIndex converted to
|
|
58
|
+
a single :class:`Index` (or subclass thereof).
|
|
59
|
+
|
|
60
|
+
Examples
|
|
61
|
+
--------
|
|
62
|
+
Create a MultiIndex:
|
|
63
|
+
|
|
64
|
+
>>> import maxframe.dataframe as md
|
|
65
|
+
>>> import pandas as pd
|
|
66
|
+
>>> mi = md.Index(pd.MultiIndex.from_arrays((list('abc'), list('def')), names=['level_1', 'level_2']))
|
|
67
|
+
|
|
68
|
+
Get level values by supplying level as either integer or name:
|
|
69
|
+
|
|
70
|
+
>>> mi.get_level_values(0).execute()
|
|
71
|
+
Index(['a', 'b', 'c'], dtype='object', name='level_1')
|
|
72
|
+
>>> mi.get_level_values('level_2').execute()
|
|
73
|
+
Index(['d', 'e', 'f'], dtype='object', name='level_2')
|
|
74
|
+
"""
|
|
75
|
+
op = IndexGetLevelValues(level=level)
|
|
76
|
+
return op(index)
|
|
@@ -34,4 +34,49 @@ class DataFrameIat:
|
|
|
34
34
|
|
|
35
35
|
|
|
36
36
|
def iat(a):
|
|
37
|
+
"""
|
|
38
|
+
Access a single value for a row/column pair by integer position.
|
|
39
|
+
|
|
40
|
+
Similar to ``iloc``, in that both provide integer-based lookups. Use
|
|
41
|
+
``iat`` if you only need to get or set a single value in a DataFrame
|
|
42
|
+
or Series.
|
|
43
|
+
|
|
44
|
+
Raises
|
|
45
|
+
------
|
|
46
|
+
IndexError
|
|
47
|
+
When integer position is out of bounds.
|
|
48
|
+
|
|
49
|
+
See Also
|
|
50
|
+
--------
|
|
51
|
+
DataFrame.at : Access a single value for a row/column label pair.
|
|
52
|
+
DataFrame.loc : Access a group of rows and columns by label(s).
|
|
53
|
+
DataFrame.iloc : Access a group of rows and columns by integer position(s).
|
|
54
|
+
|
|
55
|
+
Examples
|
|
56
|
+
--------
|
|
57
|
+
>>> import maxframe.dataframe as md
|
|
58
|
+
>>> df = md.DataFrame([[0, 2, 3], [0, 4, 1], [10, 20, 30]],
|
|
59
|
+
... columns=['A', 'B', 'C'])
|
|
60
|
+
>>> df.execute()
|
|
61
|
+
A B C
|
|
62
|
+
0 0 2 3
|
|
63
|
+
1 0 4 1
|
|
64
|
+
2 10 20 30
|
|
65
|
+
|
|
66
|
+
Get value at specified row/column pair
|
|
67
|
+
|
|
68
|
+
>>> df.iat[1, 2].execute()
|
|
69
|
+
1
|
|
70
|
+
|
|
71
|
+
Set value at specified row/column pair
|
|
72
|
+
|
|
73
|
+
>>> df.iat[1, 2] = 10
|
|
74
|
+
>>> df.iat[1, 2].execute()
|
|
75
|
+
10
|
|
76
|
+
|
|
77
|
+
Get value within a series
|
|
78
|
+
|
|
79
|
+
>>> df.loc[0].iat[1].execute()
|
|
80
|
+
2
|
|
81
|
+
"""
|
|
37
82
|
return DataFrameIat(a)
|
|
@@ -27,7 +27,7 @@ from ...serialization.serializables import AnyField, KeyField, ListField
|
|
|
27
27
|
from ...tensor import asarray
|
|
28
28
|
from ...tensor.indexing.core import calc_shape
|
|
29
29
|
from ..operators import DATAFRAME_TYPE, DataFrameOperator, DataFrameOperatorMixin
|
|
30
|
-
from ..utils import indexing_index_value
|
|
30
|
+
from ..utils import indexing_index_value, validate_axis
|
|
31
31
|
|
|
32
32
|
_ILOC_ERROR_MSG = (
|
|
33
33
|
"Location based indexing can only have [integer, "
|
|
@@ -36,13 +36,16 @@ _ILOC_ERROR_MSG = (
|
|
|
36
36
|
)
|
|
37
37
|
|
|
38
38
|
|
|
39
|
-
def process_iloc_indexes(inp, indexes):
|
|
39
|
+
def process_iloc_indexes(inp, indexes, axis=0):
|
|
40
40
|
ndim = inp.ndim
|
|
41
41
|
|
|
42
42
|
if not isinstance(indexes, tuple):
|
|
43
43
|
indexes = (indexes,)
|
|
44
44
|
if len(indexes) < ndim:
|
|
45
|
-
|
|
45
|
+
if not axis:
|
|
46
|
+
indexes += (slice(None),) * (ndim - len(indexes))
|
|
47
|
+
else:
|
|
48
|
+
indexes = (slice(None),) * (ndim - len(indexes)) + indexes
|
|
46
49
|
if len(indexes) > ndim:
|
|
47
50
|
raise IndexingError("Too many indexers")
|
|
48
51
|
|
|
@@ -105,32 +108,35 @@ def process_iloc_indexes(inp, indexes):
|
|
|
105
108
|
|
|
106
109
|
|
|
107
110
|
class DataFrameIloc:
|
|
108
|
-
def __init__(self, obj):
|
|
111
|
+
def __init__(self, obj, axis=None):
|
|
109
112
|
self._obj = obj
|
|
113
|
+
self._axis = axis
|
|
110
114
|
|
|
111
115
|
def __getitem__(self, indexes):
|
|
116
|
+
indexes = process_iloc_indexes(self._obj, indexes, axis=self._axis)
|
|
112
117
|
if isinstance(self._obj, DATAFRAME_TYPE):
|
|
113
|
-
op = DataFrameIlocGetItem(indexes=
|
|
118
|
+
op = DataFrameIlocGetItem(indexes=indexes)
|
|
114
119
|
else:
|
|
115
|
-
op = SeriesIlocGetItem(indexes=
|
|
120
|
+
op = SeriesIlocGetItem(indexes=indexes)
|
|
116
121
|
return op(self._obj)
|
|
117
122
|
|
|
118
123
|
def __setitem__(self, indexes, value):
|
|
119
124
|
if not np.isscalar(value):
|
|
120
125
|
raise NotImplementedError("Only scalar value is supported to set by iloc")
|
|
121
126
|
|
|
127
|
+
indexes = process_iloc_indexes(self._obj, indexes, axis=self._axis)
|
|
122
128
|
if isinstance(self._obj, DATAFRAME_TYPE):
|
|
123
|
-
op = DataFrameIlocSetItem(
|
|
124
|
-
indexes=process_iloc_indexes(self._obj, indexes), value=value
|
|
125
|
-
)
|
|
129
|
+
op = DataFrameIlocSetItem(indexes=indexes, value=value)
|
|
126
130
|
else:
|
|
127
|
-
op = SeriesIlocSetItem(
|
|
128
|
-
indexes=process_iloc_indexes(self._obj, indexes), value=value
|
|
129
|
-
)
|
|
131
|
+
op = SeriesIlocSetItem(indexes=indexes, value=value)
|
|
130
132
|
|
|
131
133
|
ret = op(self._obj)
|
|
132
134
|
self._obj.data = ret.data
|
|
133
135
|
|
|
136
|
+
def __call__(self, axis):
|
|
137
|
+
axis = validate_axis(axis, self._obj)
|
|
138
|
+
return DataFrameIloc(self._obj, axis)
|
|
139
|
+
|
|
134
140
|
|
|
135
141
|
class HeadTailOptimizedOperatorMixin(DataFrameOperatorMixin):
|
|
136
142
|
__slots__ = ()
|
|
@@ -420,6 +426,140 @@ def index_setitem(_idx, *_):
|
|
|
420
426
|
|
|
421
427
|
|
|
422
428
|
def iloc(a):
|
|
429
|
+
"""
|
|
430
|
+
Purely integer-location based indexing for selection by position.
|
|
431
|
+
|
|
432
|
+
``.iloc[]`` is primarily integer position based (from ``0`` to
|
|
433
|
+
``length-1`` of the axis), but may also be used with a boolean
|
|
434
|
+
array.
|
|
435
|
+
|
|
436
|
+
Allowed inputs are:
|
|
437
|
+
|
|
438
|
+
- An integer, e.g. ``5``.
|
|
439
|
+
- A list or array of integers, e.g. ``[4, 3, 0]``.
|
|
440
|
+
- A slice object with ints, e.g. ``1:7``.
|
|
441
|
+
- A boolean array.
|
|
442
|
+
- A ``callable`` function with one argument (the calling Series or
|
|
443
|
+
DataFrame) and that returns valid output for indexing (one of the above).
|
|
444
|
+
This is useful in method chains, when you don't have a reference to the
|
|
445
|
+
calling object, but would like to base your selection on some value.
|
|
446
|
+
|
|
447
|
+
``.iloc`` will raise ``IndexError`` if a requested indexer is
|
|
448
|
+
out-of-bounds, except *slice* indexers which allow out-of-bounds
|
|
449
|
+
indexing (this conforms with python/numpy *slice* semantics).
|
|
450
|
+
|
|
451
|
+
See more at :ref:`Selection by Position <indexing.integer>`.
|
|
452
|
+
|
|
453
|
+
See Also
|
|
454
|
+
--------
|
|
455
|
+
DataFrame.iat : Fast integer location scalar accessor.
|
|
456
|
+
DataFrame.loc : Purely label-location based indexer for selection by label.
|
|
457
|
+
Series.iloc : Purely integer-location based indexing for
|
|
458
|
+
selection by position.
|
|
459
|
+
|
|
460
|
+
Examples
|
|
461
|
+
--------
|
|
462
|
+
>>> import maxframe.dataframe as md
|
|
463
|
+
>>> mydict = [{'a': 1, 'b': 2, 'c': 3, 'd': 4},
|
|
464
|
+
... {'a': 100, 'b': 200, 'c': 300, 'd': 400},
|
|
465
|
+
... {'a': 1000, 'b': 2000, 'c': 3000, 'd': 4000 }]
|
|
466
|
+
>>> df = md.DataFrame(mydict)
|
|
467
|
+
>>> df.execute()
|
|
468
|
+
a b c d
|
|
469
|
+
0 1 2 3 4
|
|
470
|
+
1 100 200 300 400
|
|
471
|
+
2 1000 2000 3000 4000
|
|
472
|
+
|
|
473
|
+
**Indexing just the rows**
|
|
474
|
+
|
|
475
|
+
With a scalar integer.
|
|
476
|
+
|
|
477
|
+
>>> type(df.iloc[0]).execute()
|
|
478
|
+
<class 'pandas.core.series.Series'>
|
|
479
|
+
>>> df.iloc[0].execute()
|
|
480
|
+
a 1
|
|
481
|
+
b 2
|
|
482
|
+
c 3
|
|
483
|
+
d 4
|
|
484
|
+
Name: 0, dtype: int64
|
|
485
|
+
|
|
486
|
+
With a list of integers.
|
|
487
|
+
|
|
488
|
+
>>> df.iloc[[0]].execute()
|
|
489
|
+
a b c d
|
|
490
|
+
0 1 2 3 4
|
|
491
|
+
>>> type(df.iloc[[0]]).execute()
|
|
492
|
+
<class 'pandas.core.frame.DataFrame'>
|
|
493
|
+
|
|
494
|
+
>>> df.iloc[[0, 1]].execute()
|
|
495
|
+
a b c d
|
|
496
|
+
0 1 2 3 4
|
|
497
|
+
1 100 200 300 400
|
|
498
|
+
|
|
499
|
+
With a `slice` object.
|
|
500
|
+
|
|
501
|
+
>>> df.iloc[:3].execute()
|
|
502
|
+
a b c d
|
|
503
|
+
0 1 2 3 4
|
|
504
|
+
1 100 200 300 400
|
|
505
|
+
2 1000 2000 3000 4000
|
|
506
|
+
|
|
507
|
+
With a boolean mask the same length as the index.
|
|
508
|
+
|
|
509
|
+
>>> df.iloc[[True, False, True]].execute()
|
|
510
|
+
a b c d
|
|
511
|
+
0 1 2 3 4
|
|
512
|
+
2 1000 2000 3000 4000
|
|
513
|
+
|
|
514
|
+
With a callable, useful in method chains. The `x` passed
|
|
515
|
+
to the ``lambda`` is the DataFrame being sliced. This selects
|
|
516
|
+
the rows whose index label even.
|
|
517
|
+
|
|
518
|
+
>>> df.iloc[lambda x: x.index % 2 == 0].execute()
|
|
519
|
+
a b c d
|
|
520
|
+
0 1 2 3 4
|
|
521
|
+
2 1000 2000 3000 4000
|
|
522
|
+
|
|
523
|
+
**Indexing both axes**
|
|
524
|
+
|
|
525
|
+
You can mix the indexer types for the index and columns. Use ``:`` to
|
|
526
|
+
select the entire axis.
|
|
527
|
+
|
|
528
|
+
With scalar integers.
|
|
529
|
+
|
|
530
|
+
>>> df.iloc[0, 1].execute()
|
|
531
|
+
2
|
|
532
|
+
|
|
533
|
+
With lists of integers.
|
|
534
|
+
|
|
535
|
+
>>> df.iloc[[0, 2], [1, 3]].execute()
|
|
536
|
+
b d
|
|
537
|
+
0 2 4
|
|
538
|
+
2 2000 4000
|
|
539
|
+
|
|
540
|
+
With `slice` objects.
|
|
541
|
+
|
|
542
|
+
>>> df.iloc[1:3, 0:3].execute()
|
|
543
|
+
a b c
|
|
544
|
+
1 100 200 300
|
|
545
|
+
2 1000 2000 3000
|
|
546
|
+
|
|
547
|
+
With a boolean array whose length matches the columns.
|
|
548
|
+
|
|
549
|
+
>>> df.iloc[:, [True, False, True, False]].execute()
|
|
550
|
+
a c
|
|
551
|
+
0 1 3
|
|
552
|
+
1 100 300
|
|
553
|
+
2 1000 3000
|
|
554
|
+
|
|
555
|
+
With a callable function that expects the Series or DataFrame.
|
|
556
|
+
|
|
557
|
+
>>> df.iloc[:, lambda df: [0, 2]].execute()
|
|
558
|
+
a c
|
|
559
|
+
0 1 3
|
|
560
|
+
1 100 300
|
|
561
|
+
2 1000 3000
|
|
562
|
+
"""
|
|
423
563
|
return DataFrameIloc(a)
|
|
424
564
|
|
|
425
565
|
|
|
@@ -17,10 +17,10 @@ from typing import List
|
|
|
17
17
|
import pandas as pd
|
|
18
18
|
|
|
19
19
|
from ... import opcodes
|
|
20
|
-
from ...core import EntityData
|
|
20
|
+
from ...core import EntityData, get_output_types
|
|
21
21
|
from ...serialization.serializables import AnyField, BoolField, Int64Field
|
|
22
22
|
from ...tensor.core import TENSOR_TYPE
|
|
23
|
-
from ..core import SERIES_TYPE
|
|
23
|
+
from ..core import INDEX_TYPE, SERIES_TYPE
|
|
24
24
|
from ..operators import DataFrameOperator, DataFrameOperatorMixin
|
|
25
25
|
from ..utils import build_empty_df, parse_index
|
|
26
26
|
|
|
@@ -29,17 +29,18 @@ class DataFrameInsert(DataFrameOperator, DataFrameOperatorMixin):
|
|
|
29
29
|
_op_type_ = opcodes.INSERT
|
|
30
30
|
|
|
31
31
|
loc = Int64Field("loc")
|
|
32
|
-
column = AnyField("column")
|
|
33
|
-
value = AnyField("value")
|
|
34
|
-
allow_duplicates = BoolField("allow_duplicates")
|
|
32
|
+
column = AnyField("column", default=None)
|
|
33
|
+
value = AnyField("value", default=None)
|
|
34
|
+
allow_duplicates = BoolField("allow_duplicates", default=False)
|
|
35
35
|
|
|
36
36
|
@classmethod
|
|
37
37
|
def _set_inputs(cls, op: "DataFrameInsert", inputs: List[EntityData]):
|
|
38
38
|
super()._set_inputs(op, inputs)
|
|
39
39
|
if len(inputs) > 1:
|
|
40
|
-
op.
|
|
40
|
+
op.value = op._inputs[-1]
|
|
41
41
|
|
|
42
42
|
def __call__(self, df):
|
|
43
|
+
self._output_types = get_output_types(df)
|
|
43
44
|
inputs = [df]
|
|
44
45
|
if isinstance(self.value, (SERIES_TYPE, TENSOR_TYPE)):
|
|
45
46
|
value_dtype = self.value.dtype
|
|
@@ -47,19 +48,27 @@ class DataFrameInsert(DataFrameOperator, DataFrameOperatorMixin):
|
|
|
47
48
|
else:
|
|
48
49
|
value_dtype = pd.Series(self.value).dtype
|
|
49
50
|
|
|
50
|
-
empty_df = build_empty_df(df.dtypes)
|
|
51
|
-
empty_df.insert(
|
|
52
|
-
loc=self.loc,
|
|
53
|
-
column=self.column,
|
|
54
|
-
allow_duplicates=self.allow_duplicates,
|
|
55
|
-
value=pd.Series([], dtype=value_dtype),
|
|
56
|
-
)
|
|
57
|
-
|
|
58
51
|
params = df.params
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
52
|
+
|
|
53
|
+
if df.ndim == 2:
|
|
54
|
+
empty_obj = build_empty_df(df.dtypes)
|
|
55
|
+
empty_obj.insert(
|
|
56
|
+
loc=self.loc,
|
|
57
|
+
column=self.column,
|
|
58
|
+
allow_duplicates=self.allow_duplicates,
|
|
59
|
+
value=pd.Series([], dtype=value_dtype),
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
params["columns_value"] = parse_index(empty_obj.columns, store_data=True)
|
|
63
|
+
params["dtypes"] = empty_obj.dtypes
|
|
64
|
+
params["shape"] = (df.shape[0], df.shape[1] + 1)
|
|
65
|
+
else:
|
|
66
|
+
assert isinstance(df, INDEX_TYPE)
|
|
67
|
+
params["index_value"] = parse_index(
|
|
68
|
+
df.index_value, type(self), df, self.loc, self.value
|
|
69
|
+
)
|
|
70
|
+
params["shape"] = (df.shape[0] + 1,)
|
|
71
|
+
return self.new_tileable(inputs, **params)
|
|
63
72
|
|
|
64
73
|
|
|
65
74
|
def df_insert(df, loc, column, value, allow_duplicates=False):
|
|
@@ -88,3 +97,22 @@ def df_insert(df, loc, column, value, allow_duplicates=False):
|
|
|
88
97
|
)
|
|
89
98
|
out_df = op(df)
|
|
90
99
|
df.data = out_df.data
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def index_insert(idx, loc, value):
|
|
103
|
+
"""
|
|
104
|
+
Make new Index inserting new item at location.
|
|
105
|
+
|
|
106
|
+
Follows Python list.append semantics for negative values.
|
|
107
|
+
|
|
108
|
+
Parameters
|
|
109
|
+
----------
|
|
110
|
+
loc : int
|
|
111
|
+
item : object
|
|
112
|
+
|
|
113
|
+
Returns
|
|
114
|
+
-------
|
|
115
|
+
new_index : Index
|
|
116
|
+
"""
|
|
117
|
+
op = DataFrameInsert(loc=loc, value=value)
|
|
118
|
+
return op(idx)
|