maxframe 2.0.0b2__cp37-cp37m-win32.whl → 2.3.0rc1__cp37-cp37m-win32.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of maxframe might be problematic. Click here for more details.
- maxframe/__init__.py +1 -0
- maxframe/_utils.cp37-win32.pyd +0 -0
- maxframe/_utils.pyx +14 -1
- maxframe/codegen/core.py +9 -8
- maxframe/codegen/spe/core.py +1 -1
- maxframe/codegen/spe/dataframe/__init__.py +1 -0
- maxframe/codegen/spe/dataframe/accessors/base.py +18 -0
- maxframe/codegen/spe/dataframe/accessors/dict_.py +25 -130
- maxframe/codegen/spe/dataframe/accessors/list_.py +12 -48
- maxframe/codegen/spe/dataframe/accessors/struct_.py +28 -0
- maxframe/codegen/spe/dataframe/arithmetic.py +7 -2
- maxframe/codegen/spe/dataframe/groupby.py +88 -0
- maxframe/codegen/spe/dataframe/indexing.py +99 -4
- maxframe/codegen/spe/dataframe/merge.py +38 -1
- maxframe/codegen/spe/dataframe/misc.py +11 -33
- maxframe/codegen/spe/dataframe/reduction.py +32 -9
- maxframe/codegen/spe/dataframe/reshape.py +46 -0
- maxframe/codegen/spe/dataframe/sort.py +39 -18
- maxframe/codegen/spe/dataframe/tests/accessors/test_dict.py +9 -15
- maxframe/codegen/spe/dataframe/tests/accessors/test_list.py +4 -7
- maxframe/codegen/spe/dataframe/tests/accessors/test_struct.py +75 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_iloc.py +20 -1
- maxframe/codegen/spe/dataframe/tests/indexing/test_loc.py +35 -0
- maxframe/codegen/spe/dataframe/tests/misc/test_misc.py +0 -32
- maxframe/codegen/spe/dataframe/tests/test_groupby.py +81 -18
- maxframe/codegen/spe/dataframe/tests/test_merge.py +27 -1
- maxframe/codegen/spe/dataframe/tests/test_reduction.py +13 -0
- maxframe/codegen/spe/dataframe/tests/test_reshape.py +79 -0
- maxframe/codegen/spe/dataframe/tests/test_sort.py +20 -0
- maxframe/codegen/spe/dataframe/tseries.py +9 -0
- maxframe/codegen/spe/learn/contrib/lightgbm.py +4 -3
- maxframe/codegen/spe/learn/contrib/tests/test_xgboost.py +2 -1
- maxframe/codegen/spe/learn/metrics/__init__.py +1 -1
- maxframe/codegen/spe/learn/metrics/_ranking.py +76 -0
- maxframe/codegen/spe/learn/metrics/pairwise.py +51 -0
- maxframe/codegen/spe/learn/metrics/tests/test_pairwise.py +36 -0
- maxframe/codegen/spe/learn/metrics/tests/test_ranking.py +59 -0
- maxframe/codegen/spe/tensor/__init__.py +3 -0
- maxframe/codegen/spe/tensor/datasource.py +1 -0
- maxframe/codegen/spe/tensor/fft.py +74 -0
- maxframe/codegen/spe/tensor/linalg.py +29 -2
- maxframe/codegen/spe/tensor/misc.py +79 -25
- maxframe/codegen/spe/tensor/spatial.py +45 -0
- maxframe/codegen/spe/tensor/statistics.py +44 -0
- maxframe/codegen/spe/tensor/tests/test_fft.py +64 -0
- maxframe/codegen/spe/tensor/tests/test_linalg.py +15 -1
- maxframe/codegen/spe/tensor/tests/test_misc.py +52 -2
- maxframe/codegen/spe/tensor/tests/test_spatial.py +33 -0
- maxframe/codegen/spe/tensor/tests/test_statistics.py +15 -1
- maxframe/codegen/spe/tests/test_spe_codegen.py +6 -12
- maxframe/codegen/spe/utils.py +2 -0
- maxframe/config/config.py +73 -9
- maxframe/config/tests/test_validators.py +13 -1
- maxframe/config/validators.py +49 -0
- maxframe/conftest.py +54 -17
- maxframe/core/accessor.py +2 -2
- maxframe/core/base.py +2 -1
- maxframe/core/entity/core.py +5 -0
- maxframe/core/entity/tileables.py +3 -1
- maxframe/core/graph/core.cp37-win32.pyd +0 -0
- maxframe/core/graph/entity.py +8 -3
- maxframe/core/mode.py +6 -1
- maxframe/core/operator/base.py +9 -2
- maxframe/core/operator/core.py +10 -2
- maxframe/core/operator/utils.py +13 -0
- maxframe/dataframe/__init__.py +12 -5
- maxframe/dataframe/accessors/__init__.py +1 -1
- maxframe/dataframe/accessors/compat.py +45 -0
- maxframe/dataframe/accessors/datetime_/__init__.py +4 -1
- maxframe/dataframe/accessors/dict_/contains.py +7 -16
- maxframe/dataframe/accessors/dict_/core.py +48 -0
- maxframe/dataframe/accessors/dict_/getitem.py +17 -21
- maxframe/dataframe/accessors/dict_/length.py +7 -16
- maxframe/dataframe/accessors/dict_/remove.py +6 -18
- maxframe/dataframe/accessors/dict_/setitem.py +8 -18
- maxframe/dataframe/accessors/dict_/tests/test_dict_accessor.py +62 -22
- maxframe/dataframe/accessors/list_/__init__.py +2 -2
- maxframe/dataframe/accessors/list_/core.py +48 -0
- maxframe/dataframe/accessors/list_/getitem.py +12 -19
- maxframe/dataframe/accessors/list_/length.py +7 -16
- maxframe/dataframe/accessors/list_/tests/test_list_accessor.py +11 -9
- maxframe/dataframe/accessors/string_/__init__.py +4 -1
- maxframe/dataframe/accessors/struct_/__init__.py +37 -0
- maxframe/dataframe/accessors/struct_/accessor.py +39 -0
- maxframe/dataframe/accessors/struct_/core.py +43 -0
- maxframe/dataframe/accessors/struct_/dtypes.py +53 -0
- maxframe/dataframe/accessors/struct_/field.py +123 -0
- maxframe/dataframe/accessors/struct_/tests/__init__.py +13 -0
- maxframe/dataframe/accessors/struct_/tests/test_struct_accessor.py +91 -0
- maxframe/dataframe/arithmetic/__init__.py +18 -4
- maxframe/dataframe/arithmetic/between.py +106 -0
- maxframe/dataframe/arithmetic/dot.py +237 -0
- maxframe/dataframe/arithmetic/maximum.py +33 -0
- maxframe/dataframe/arithmetic/minimum.py +33 -0
- maxframe/dataframe/arithmetic/{around.py → round.py} +11 -7
- maxframe/dataframe/core.py +161 -224
- maxframe/dataframe/datasource/__init__.py +18 -0
- maxframe/dataframe/datasource/core.py +6 -0
- maxframe/dataframe/datasource/direct.py +57 -0
- maxframe/dataframe/datasource/from_dict.py +124 -0
- maxframe/dataframe/datasource/from_index.py +1 -1
- maxframe/dataframe/datasource/from_records.py +77 -0
- maxframe/dataframe/datasource/from_tensor.py +109 -41
- maxframe/dataframe/datasource/read_csv.py +21 -14
- maxframe/dataframe/datasource/read_odps_query.py +29 -6
- maxframe/dataframe/datasource/read_odps_table.py +32 -10
- maxframe/dataframe/datasource/read_parquet.py +38 -39
- maxframe/dataframe/datasource/tests/test_datasource.py +37 -0
- maxframe/dataframe/datastore/__init__.py +11 -1
- maxframe/dataframe/datastore/direct.py +268 -0
- maxframe/dataframe/datastore/to_csv.py +29 -41
- maxframe/dataframe/datastore/to_odps.py +36 -4
- maxframe/dataframe/extensions/__init__.py +20 -4
- maxframe/dataframe/extensions/apply_chunk.py +32 -6
- maxframe/dataframe/extensions/cartesian_chunk.py +153 -0
- maxframe/dataframe/extensions/collect_kv.py +126 -0
- maxframe/dataframe/extensions/extract_kv.py +177 -0
- maxframe/dataframe/extensions/flatjson.py +2 -1
- maxframe/dataframe/extensions/map_reduce.py +263 -0
- maxframe/dataframe/extensions/rebalance.py +62 -0
- maxframe/dataframe/extensions/tests/test_apply_chunk.py +9 -2
- maxframe/dataframe/extensions/tests/test_extensions.py +54 -0
- maxframe/dataframe/extensions/tests/test_map_reduce.py +135 -0
- maxframe/dataframe/groupby/__init__.py +17 -2
- maxframe/dataframe/groupby/aggregation.py +86 -49
- maxframe/dataframe/groupby/apply.py +1 -1
- maxframe/dataframe/groupby/apply_chunk.py +19 -5
- maxframe/dataframe/groupby/core.py +116 -16
- maxframe/dataframe/groupby/cum.py +4 -25
- maxframe/dataframe/groupby/expanding.py +264 -0
- maxframe/dataframe/groupby/fill.py +1 -1
- maxframe/dataframe/groupby/getitem.py +12 -5
- maxframe/dataframe/groupby/head.py +11 -1
- maxframe/dataframe/groupby/rank.py +136 -0
- maxframe/dataframe/groupby/rolling.py +206 -0
- maxframe/dataframe/groupby/shift.py +114 -0
- maxframe/dataframe/groupby/tests/test_groupby.py +0 -5
- maxframe/dataframe/indexing/__init__.py +22 -2
- maxframe/dataframe/indexing/droplevel.py +195 -0
- maxframe/dataframe/indexing/filter.py +169 -0
- maxframe/dataframe/indexing/get_level_values.py +76 -0
- maxframe/dataframe/indexing/iat.py +45 -0
- maxframe/dataframe/indexing/iloc.py +152 -12
- maxframe/dataframe/indexing/insert.py +46 -18
- maxframe/dataframe/indexing/loc.py +287 -7
- maxframe/dataframe/indexing/reindex.py +14 -5
- maxframe/dataframe/indexing/rename.py +6 -0
- maxframe/dataframe/indexing/rename_axis.py +2 -2
- maxframe/dataframe/indexing/reorder_levels.py +143 -0
- maxframe/dataframe/indexing/reset_index.py +33 -6
- maxframe/dataframe/indexing/sample.py +8 -0
- maxframe/dataframe/indexing/setitem.py +3 -3
- maxframe/dataframe/indexing/swaplevel.py +185 -0
- maxframe/dataframe/indexing/take.py +99 -0
- maxframe/dataframe/indexing/truncate.py +140 -0
- maxframe/dataframe/indexing/where.py +0 -11
- maxframe/dataframe/indexing/xs.py +148 -0
- maxframe/dataframe/merge/__init__.py +15 -1
- maxframe/dataframe/merge/append.py +97 -98
- maxframe/dataframe/merge/combine.py +244 -0
- maxframe/dataframe/merge/combine_first.py +120 -0
- maxframe/dataframe/merge/compare.py +387 -0
- maxframe/dataframe/merge/concat.py +183 -0
- maxframe/dataframe/merge/update.py +271 -0
- maxframe/dataframe/misc/__init__.py +28 -11
- maxframe/dataframe/misc/_duplicate.py +10 -4
- maxframe/dataframe/misc/apply.py +1 -1
- maxframe/dataframe/misc/check_unique.py +82 -0
- maxframe/dataframe/misc/clip.py +145 -0
- maxframe/dataframe/misc/describe.py +175 -9
- maxframe/dataframe/misc/drop.py +31 -0
- maxframe/dataframe/misc/drop_duplicates.py +2 -2
- maxframe/dataframe/misc/duplicated.py +2 -2
- maxframe/dataframe/misc/get_dummies.py +5 -1
- maxframe/dataframe/misc/infer_dtypes.py +251 -0
- maxframe/dataframe/misc/isin.py +2 -2
- maxframe/dataframe/misc/map.py +125 -18
- maxframe/dataframe/misc/repeat.py +159 -0
- maxframe/dataframe/misc/tests/test_misc.py +48 -3
- maxframe/dataframe/misc/to_numeric.py +3 -0
- maxframe/dataframe/misc/transform.py +12 -5
- maxframe/dataframe/misc/transpose.py +13 -1
- maxframe/dataframe/misc/valid_index.py +115 -0
- maxframe/dataframe/misc/value_counts.py +38 -4
- maxframe/dataframe/missing/checkna.py +14 -6
- maxframe/dataframe/missing/dropna.py +5 -0
- maxframe/dataframe/missing/fillna.py +1 -1
- maxframe/dataframe/missing/replace.py +7 -4
- maxframe/dataframe/reduction/__init__.py +35 -16
- maxframe/dataframe/reduction/aggregation.py +43 -14
- maxframe/dataframe/reduction/all.py +2 -2
- maxframe/dataframe/reduction/any.py +2 -2
- maxframe/dataframe/reduction/argmax.py +103 -0
- maxframe/dataframe/reduction/argmin.py +103 -0
- maxframe/dataframe/reduction/core.py +80 -24
- maxframe/dataframe/reduction/count.py +13 -9
- maxframe/dataframe/reduction/cov.py +166 -0
- maxframe/dataframe/reduction/cummax.py +2 -2
- maxframe/dataframe/reduction/cummin.py +2 -2
- maxframe/dataframe/reduction/cumprod.py +2 -2
- maxframe/dataframe/reduction/cumsum.py +2 -2
- maxframe/dataframe/reduction/custom_reduction.py +2 -2
- maxframe/dataframe/reduction/idxmax.py +185 -0
- maxframe/dataframe/reduction/idxmin.py +185 -0
- maxframe/dataframe/reduction/kurtosis.py +37 -30
- maxframe/dataframe/reduction/max.py +2 -2
- maxframe/dataframe/reduction/mean.py +9 -7
- maxframe/dataframe/reduction/median.py +2 -2
- maxframe/dataframe/reduction/min.py +2 -2
- maxframe/dataframe/reduction/mode.py +144 -0
- maxframe/dataframe/reduction/nunique.py +19 -11
- maxframe/dataframe/reduction/prod.py +18 -13
- maxframe/dataframe/reduction/reduction_size.py +2 -2
- maxframe/dataframe/reduction/sem.py +13 -9
- maxframe/dataframe/reduction/skew.py +31 -27
- maxframe/dataframe/reduction/str_concat.py +10 -7
- maxframe/dataframe/reduction/sum.py +18 -14
- maxframe/dataframe/reduction/tests/test_reduction.py +12 -0
- maxframe/dataframe/reduction/unique.py +20 -3
- maxframe/dataframe/reduction/var.py +16 -12
- maxframe/dataframe/reshape/__init__.py +38 -0
- maxframe/dataframe/{misc → reshape}/pivot.py +1 -0
- maxframe/dataframe/{misc → reshape}/pivot_table.py +1 -0
- maxframe/dataframe/reshape/unstack.py +114 -0
- maxframe/dataframe/sort/__init__.py +16 -1
- maxframe/dataframe/sort/argsort.py +68 -0
- maxframe/dataframe/sort/core.py +2 -1
- maxframe/dataframe/sort/nlargest.py +238 -0
- maxframe/dataframe/sort/nsmallest.py +228 -0
- maxframe/dataframe/sort/rank.py +147 -0
- maxframe/dataframe/statistics/__init__.py +3 -3
- maxframe/dataframe/statistics/corr.py +1 -0
- maxframe/dataframe/statistics/quantile.py +2 -2
- maxframe/dataframe/tests/test_typing.py +104 -0
- maxframe/dataframe/tests/test_utils.py +66 -2
- maxframe/dataframe/tseries/__init__.py +19 -0
- maxframe/dataframe/tseries/at_time.py +61 -0
- maxframe/dataframe/tseries/between_time.py +122 -0
- maxframe/dataframe/typing_.py +185 -0
- maxframe/dataframe/utils.py +125 -52
- maxframe/dataframe/window/aggregation.py +8 -4
- maxframe/dataframe/window/core.py +14 -1
- maxframe/dataframe/window/ewm.py +1 -3
- maxframe/dataframe/window/expanding.py +37 -35
- maxframe/dataframe/window/rolling.py +49 -39
- maxframe/dataframe/window/tests/test_expanding.py +1 -7
- maxframe/dataframe/window/tests/test_rolling.py +1 -1
- maxframe/env.py +7 -4
- maxframe/errors.py +2 -2
- maxframe/io/odpsio/schema.py +9 -3
- maxframe/io/odpsio/tableio.py +7 -2
- maxframe/io/odpsio/tests/test_schema.py +198 -83
- maxframe/learn/__init__.py +10 -2
- maxframe/learn/cluster/__init__.py +15 -0
- maxframe/learn/cluster/_kmeans.py +782 -0
- maxframe/learn/contrib/llm/core.py +18 -7
- maxframe/learn/contrib/llm/deploy/__init__.py +13 -0
- maxframe/learn/contrib/llm/deploy/config.py +221 -0
- maxframe/learn/contrib/llm/deploy/core.py +247 -0
- maxframe/learn/contrib/llm/deploy/framework.py +35 -0
- maxframe/learn/contrib/llm/deploy/loader.py +360 -0
- maxframe/learn/contrib/llm/deploy/tests/__init__.py +13 -0
- maxframe/learn/contrib/llm/deploy/tests/test_register_models.py +359 -0
- maxframe/learn/contrib/llm/models/__init__.py +1 -0
- maxframe/learn/contrib/llm/models/dashscope.py +12 -6
- maxframe/learn/contrib/llm/models/managed.py +76 -11
- maxframe/learn/contrib/llm/models/openai.py +72 -0
- maxframe/learn/contrib/llm/tests/__init__.py +13 -0
- maxframe/learn/contrib/llm/tests/test_core.py +34 -0
- maxframe/learn/contrib/llm/tests/test_openai.py +187 -0
- maxframe/learn/contrib/llm/tests/test_text_gen.py +155 -0
- maxframe/learn/contrib/llm/text.py +348 -42
- maxframe/learn/contrib/models.py +4 -1
- maxframe/learn/contrib/xgboost/classifier.py +2 -0
- maxframe/learn/contrib/xgboost/core.py +113 -4
- maxframe/learn/contrib/xgboost/predict.py +4 -2
- maxframe/learn/contrib/xgboost/regressor.py +5 -0
- maxframe/learn/contrib/xgboost/train.py +7 -2
- maxframe/learn/core.py +66 -0
- maxframe/learn/linear_model/_base.py +58 -1
- maxframe/learn/linear_model/_lin_reg.py +1 -1
- maxframe/learn/metrics/__init__.py +6 -0
- maxframe/learn/metrics/_classification.py +145 -0
- maxframe/learn/metrics/_ranking.py +477 -0
- maxframe/learn/metrics/_scorer.py +60 -0
- maxframe/learn/metrics/pairwise/__init__.py +21 -0
- maxframe/learn/metrics/pairwise/core.py +77 -0
- maxframe/learn/metrics/pairwise/cosine.py +115 -0
- maxframe/learn/metrics/pairwise/euclidean.py +176 -0
- maxframe/learn/metrics/pairwise/haversine.py +96 -0
- maxframe/learn/metrics/pairwise/manhattan.py +80 -0
- maxframe/learn/metrics/pairwise/pairwise.py +127 -0
- maxframe/learn/metrics/pairwise/pairwise_distances_topk.py +121 -0
- maxframe/learn/metrics/pairwise/rbf_kernel.py +51 -0
- maxframe/learn/metrics/tests/__init__.py +13 -0
- maxframe/learn/metrics/tests/test_scorer.py +26 -0
- maxframe/learn/preprocessing/_data/min_max_scaler.py +34 -23
- maxframe/learn/preprocessing/_data/standard_scaler.py +34 -25
- maxframe/learn/utils/__init__.py +2 -1
- maxframe/learn/utils/checks.py +1 -2
- maxframe/learn/utils/core.py +59 -0
- maxframe/learn/utils/extmath.py +79 -9
- maxframe/learn/utils/odpsio.py +262 -0
- maxframe/learn/utils/validation.py +2 -2
- maxframe/lib/compat.py +40 -0
- maxframe/lib/dtypes_extension/__init__.py +16 -1
- maxframe/lib/dtypes_extension/_fake_arrow_dtype.py +604 -0
- maxframe/lib/dtypes_extension/blob.py +304 -0
- maxframe/lib/dtypes_extension/dtypes.py +40 -0
- maxframe/lib/dtypes_extension/tests/test_blob.py +88 -0
- maxframe/lib/dtypes_extension/tests/test_dtypes.py +16 -1
- maxframe/lib/dtypes_extension/tests/test_fake_arrow_dtype.py +75 -0
- maxframe/lib/filesystem/_oss_lib/common.py +124 -50
- maxframe/lib/filesystem/_oss_lib/glob.py +1 -1
- maxframe/lib/filesystem/_oss_lib/handle.py +21 -25
- maxframe/lib/filesystem/base.py +1 -1
- maxframe/lib/filesystem/core.py +1 -1
- maxframe/lib/filesystem/oss.py +115 -46
- maxframe/lib/filesystem/tests/test_oss.py +74 -36
- maxframe/lib/mmh3.cp37-win32.pyd +0 -0
- maxframe/lib/wrapped_pickle.py +10 -0
- maxframe/opcodes.py +41 -15
- maxframe/protocol.py +12 -0
- maxframe/remote/core.py +4 -0
- maxframe/serialization/__init__.py +11 -2
- maxframe/serialization/arrow.py +38 -13
- maxframe/serialization/blob.py +32 -0
- maxframe/serialization/core.cp37-win32.pyd +0 -0
- maxframe/serialization/core.pyx +39 -1
- maxframe/serialization/exception.py +2 -4
- maxframe/serialization/numpy.py +11 -0
- maxframe/serialization/pandas.py +46 -9
- maxframe/serialization/serializables/core.py +2 -2
- maxframe/serialization/tests/test_serial.py +31 -4
- maxframe/tensor/__init__.py +38 -8
- maxframe/tensor/arithmetic/__init__.py +19 -10
- maxframe/tensor/arithmetic/core.py +2 -2
- maxframe/tensor/arithmetic/iscomplexobj.py +53 -0
- maxframe/tensor/arithmetic/tests/test_arithmetic.py +6 -9
- maxframe/tensor/core.py +6 -2
- maxframe/tensor/datasource/tests/test_datasource.py +2 -1
- maxframe/tensor/extensions/__init__.py +2 -0
- maxframe/tensor/extensions/apply_chunk.py +3 -3
- maxframe/tensor/extensions/rebalance.py +65 -0
- maxframe/tensor/fft/__init__.py +32 -0
- maxframe/tensor/fft/core.py +168 -0
- maxframe/tensor/fft/fft.py +112 -0
- maxframe/tensor/fft/fft2.py +118 -0
- maxframe/tensor/fft/fftfreq.py +80 -0
- maxframe/tensor/fft/fftn.py +123 -0
- maxframe/tensor/fft/fftshift.py +79 -0
- maxframe/tensor/fft/hfft.py +112 -0
- maxframe/tensor/fft/ifft.py +114 -0
- maxframe/tensor/fft/ifft2.py +115 -0
- maxframe/tensor/fft/ifftn.py +123 -0
- maxframe/tensor/fft/ifftshift.py +73 -0
- maxframe/tensor/fft/ihfft.py +93 -0
- maxframe/tensor/fft/irfft.py +118 -0
- maxframe/tensor/fft/irfft2.py +62 -0
- maxframe/tensor/fft/irfftn.py +114 -0
- maxframe/tensor/fft/rfft.py +116 -0
- maxframe/tensor/fft/rfft2.py +63 -0
- maxframe/tensor/fft/rfftfreq.py +87 -0
- maxframe/tensor/fft/rfftn.py +113 -0
- maxframe/tensor/indexing/fill_diagonal.py +1 -7
- maxframe/tensor/linalg/__init__.py +7 -0
- maxframe/tensor/linalg/_einsumfunc.py +1025 -0
- maxframe/tensor/linalg/cholesky.py +117 -0
- maxframe/tensor/linalg/einsum.py +339 -0
- maxframe/tensor/linalg/lstsq.py +100 -0
- maxframe/tensor/linalg/matrix_norm.py +75 -0
- maxframe/tensor/linalg/norm.py +249 -0
- maxframe/tensor/linalg/solve.py +72 -0
- maxframe/tensor/linalg/solve_triangular.py +2 -2
- maxframe/tensor/linalg/vector_norm.py +113 -0
- maxframe/tensor/misc/__init__.py +24 -1
- maxframe/tensor/misc/argwhere.py +72 -0
- maxframe/tensor/misc/array_split.py +46 -0
- maxframe/tensor/misc/broadcast_arrays.py +57 -0
- maxframe/tensor/misc/copyto.py +130 -0
- maxframe/tensor/misc/delete.py +104 -0
- maxframe/tensor/misc/dsplit.py +68 -0
- maxframe/tensor/misc/ediff1d.py +74 -0
- maxframe/tensor/misc/expand_dims.py +85 -0
- maxframe/tensor/misc/flip.py +90 -0
- maxframe/tensor/misc/fliplr.py +64 -0
- maxframe/tensor/misc/flipud.py +68 -0
- maxframe/tensor/misc/hsplit.py +85 -0
- maxframe/tensor/misc/insert.py +139 -0
- maxframe/tensor/misc/moveaxis.py +83 -0
- maxframe/tensor/misc/result_type.py +88 -0
- maxframe/tensor/misc/roll.py +124 -0
- maxframe/tensor/misc/rollaxis.py +77 -0
- maxframe/tensor/misc/shape.py +89 -0
- maxframe/tensor/misc/split.py +190 -0
- maxframe/tensor/misc/tile.py +109 -0
- maxframe/tensor/misc/vsplit.py +74 -0
- maxframe/tensor/reduction/array_equal.py +2 -1
- maxframe/tensor/sort/__init__.py +2 -0
- maxframe/tensor/sort/argpartition.py +98 -0
- maxframe/tensor/sort/partition.py +228 -0
- maxframe/tensor/spatial/__init__.py +15 -0
- maxframe/tensor/spatial/distance/__init__.py +17 -0
- maxframe/tensor/spatial/distance/cdist.py +421 -0
- maxframe/tensor/spatial/distance/pdist.py +398 -0
- maxframe/tensor/spatial/distance/squareform.py +153 -0
- maxframe/tensor/special/__init__.py +159 -21
- maxframe/tensor/special/airy.py +55 -0
- maxframe/tensor/special/bessel.py +199 -0
- maxframe/tensor/special/core.py +65 -4
- maxframe/tensor/special/ellip_func_integrals.py +155 -0
- maxframe/tensor/special/ellip_harm.py +55 -0
- maxframe/tensor/special/err_fresnel.py +223 -0
- maxframe/tensor/special/gamma_funcs.py +303 -0
- maxframe/tensor/special/hypergeometric_funcs.py +69 -0
- maxframe/tensor/special/info_theory.py +189 -0
- maxframe/tensor/special/misc.py +21 -0
- maxframe/tensor/statistics/__init__.py +6 -0
- maxframe/tensor/statistics/corrcoef.py +77 -0
- maxframe/tensor/statistics/cov.py +222 -0
- maxframe/tensor/statistics/digitize.py +126 -0
- maxframe/tensor/statistics/histogram.py +520 -0
- maxframe/tensor/statistics/median.py +85 -0
- maxframe/tensor/statistics/ptp.py +89 -0
- maxframe/tensor/utils.py +3 -3
- maxframe/tests/test_udf.py +61 -0
- maxframe/tests/test_utils.py +51 -6
- maxframe/tests/utils.py +0 -2
- maxframe/typing_.py +2 -0
- maxframe/udf.py +130 -9
- maxframe/utils.py +254 -27
- {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/METADATA +3 -3
- {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/RECORD +442 -264
- maxframe_client/fetcher.py +35 -4
- maxframe_client/session/odps.py +7 -2
- maxframe_client/session/task.py +8 -1
- maxframe_client/tests/test_fetcher.py +76 -3
- maxframe_client/tests/test_session.py +28 -1
- maxframe/dataframe/arrays.py +0 -864
- /maxframe/dataframe/{misc → reshape}/melt.py +0 -0
- /maxframe/dataframe/{misc → reshape}/stack.py +0 -0
- {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/WHEEL +0 -0
- {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/top_level.txt +0 -0
maxframe/dataframe/core.py
CHANGED
|
@@ -56,8 +56,11 @@ from ..utils import (
|
|
|
56
56
|
ceildiv,
|
|
57
57
|
estimate_pandas_size,
|
|
58
58
|
on_serialize_numpy_type,
|
|
59
|
+
pd_release_version,
|
|
60
|
+
prevent_called_from_pandas,
|
|
59
61
|
tokenize,
|
|
60
62
|
)
|
|
63
|
+
from .typing_ import DataFrameType, IndexType, SeriesType
|
|
61
64
|
from .utils import (
|
|
62
65
|
ReprSeries,
|
|
63
66
|
apply_if_callable,
|
|
@@ -66,6 +69,8 @@ from .utils import (
|
|
|
66
69
|
parse_index,
|
|
67
70
|
)
|
|
68
71
|
|
|
72
|
+
_df_with_iteritems = pd_release_version[:2] < (2, 0)
|
|
73
|
+
|
|
69
74
|
|
|
70
75
|
class IndexValue(Serializable):
|
|
71
76
|
"""
|
|
@@ -478,9 +483,17 @@ _lazy_chunk_meta_properties = (
|
|
|
478
483
|
)
|
|
479
484
|
|
|
480
485
|
|
|
486
|
+
def _calc_cum_nsplit(nsplit: Tuple[int]) -> List[int]:
|
|
487
|
+
return [0] + np.cumsum(nsplit).tolist()
|
|
488
|
+
|
|
489
|
+
|
|
490
|
+
def calc_cum_nsplits(nsplits: Tuple[Tuple[int]]) -> List[List[int]]:
|
|
491
|
+
return tuple(_calc_cum_nsplit(nsplit) for nsplit in nsplits)
|
|
492
|
+
|
|
493
|
+
|
|
481
494
|
@functools.lru_cache(maxsize=128)
|
|
482
495
|
def _get_cum_nsplit(nsplit: Tuple[int]) -> List[int]:
|
|
483
|
-
return
|
|
496
|
+
return _calc_cum_nsplit(nsplit)
|
|
484
497
|
|
|
485
498
|
|
|
486
499
|
def _calc_axis_slice(nsplit: Tuple[int], index: int) -> slice:
|
|
@@ -684,6 +697,10 @@ class IndexData(HasShapeTileableData, _ToPandasMixin):
|
|
|
684
697
|
def names(self):
|
|
685
698
|
return getattr(self, "_names", None) or [self.name]
|
|
686
699
|
|
|
700
|
+
@property
|
|
701
|
+
def nlevels(self) -> int:
|
|
702
|
+
return len(self.names)
|
|
703
|
+
|
|
687
704
|
@property
|
|
688
705
|
def index_value(self) -> IndexValue:
|
|
689
706
|
return self._index_value
|
|
@@ -698,55 +715,6 @@ class IndexData(HasShapeTileableData, _ToPandasMixin):
|
|
|
698
715
|
return from_index(self, dtype=dtype, extract_multi_index=extract_multi_index)
|
|
699
716
|
|
|
700
717
|
def to_frame(self, index: bool = True, name=None):
|
|
701
|
-
"""
|
|
702
|
-
Create a DataFrame with a column containing the Index.
|
|
703
|
-
|
|
704
|
-
Parameters
|
|
705
|
-
----------
|
|
706
|
-
index : bool, default True
|
|
707
|
-
Set the index of the returned DataFrame as the original Index.
|
|
708
|
-
|
|
709
|
-
name : object, default None
|
|
710
|
-
The passed name should substitute for the index name (if it has
|
|
711
|
-
one).
|
|
712
|
-
|
|
713
|
-
Returns
|
|
714
|
-
-------
|
|
715
|
-
DataFrame
|
|
716
|
-
DataFrame containing the original Index data.
|
|
717
|
-
|
|
718
|
-
See Also
|
|
719
|
-
--------
|
|
720
|
-
Index.to_series : Convert an Index to a Series.
|
|
721
|
-
Series.to_frame : Convert Series to DataFrame.
|
|
722
|
-
|
|
723
|
-
Examples
|
|
724
|
-
--------
|
|
725
|
-
>>> import maxframe.dataframe as md
|
|
726
|
-
>>> idx = md.Index(['Ant', 'Bear', 'Cow'], name='animal')
|
|
727
|
-
>>> idx.to_frame().execute()
|
|
728
|
-
animal
|
|
729
|
-
animal
|
|
730
|
-
Ant Ant
|
|
731
|
-
Bear Bear
|
|
732
|
-
Cow Cow
|
|
733
|
-
|
|
734
|
-
By default, the original Index is reused. To enforce a new Index:
|
|
735
|
-
|
|
736
|
-
>>> idx.to_frame(index=False).execute()
|
|
737
|
-
animal
|
|
738
|
-
0 Ant
|
|
739
|
-
1 Bear
|
|
740
|
-
2 Cow
|
|
741
|
-
|
|
742
|
-
To override the name of the resulting column, specify `name`:
|
|
743
|
-
|
|
744
|
-
>>> idx.to_frame(index=False, name='zoo').execute()
|
|
745
|
-
zoo
|
|
746
|
-
0 Ant
|
|
747
|
-
1 Bear
|
|
748
|
-
2 Cow
|
|
749
|
-
"""
|
|
750
718
|
from . import dataframe_from_tensor
|
|
751
719
|
|
|
752
720
|
if isinstance(self.index_value.value, IndexValue.MultiIndex):
|
|
@@ -772,34 +740,20 @@ class IndexData(HasShapeTileableData, _ToPandasMixin):
|
|
|
772
740
|
columns = [name or self.name or 0]
|
|
773
741
|
index_ = self if index else None
|
|
774
742
|
return dataframe_from_tensor(
|
|
775
|
-
self._to_maxframe_tensor(
|
|
743
|
+
self._to_maxframe_tensor(extract_multi_index=True),
|
|
776
744
|
index=index_,
|
|
777
745
|
columns=columns,
|
|
778
746
|
)
|
|
779
747
|
|
|
780
748
|
def to_series(self, index=None, name=None):
|
|
781
|
-
"""
|
|
782
|
-
Create a Series with both index and values equal to the index keys.
|
|
783
|
-
|
|
784
|
-
Useful with map for returning an indexer based on an index.
|
|
785
|
-
|
|
786
|
-
Parameters
|
|
787
|
-
----------
|
|
788
|
-
index : Index, optional
|
|
789
|
-
Index of resulting Series. If None, defaults to original index.
|
|
790
|
-
name : str, optional
|
|
791
|
-
Dame of resulting Series. If None, defaults to name of original
|
|
792
|
-
index.
|
|
793
|
-
|
|
794
|
-
Returns
|
|
795
|
-
-------
|
|
796
|
-
Series
|
|
797
|
-
The dtype will be based on the type of the Index values.
|
|
798
|
-
"""
|
|
799
749
|
from . import series_from_index
|
|
800
750
|
|
|
801
751
|
return series_from_index(self, index=index, name=name)
|
|
802
752
|
|
|
753
|
+
@property
|
|
754
|
+
def hasnans(self):
|
|
755
|
+
return self.isna().any()
|
|
756
|
+
|
|
803
757
|
|
|
804
758
|
class Index(HasShapeTileable, _ToPandasMixin):
|
|
805
759
|
__slots__ = "_df_or_series", "_parent_key", "_axis"
|
|
@@ -818,6 +772,9 @@ class Index(HasShapeTileable, _ToPandasMixin):
|
|
|
818
772
|
def __len__(self):
|
|
819
773
|
return len(self._data)
|
|
820
774
|
|
|
775
|
+
def __class_getitem__(cls, item):
|
|
776
|
+
return IndexType.from_getitem_args(item)
|
|
777
|
+
|
|
821
778
|
def __maxframe_tensor__(self, dtype=None, order="K"):
|
|
822
779
|
return self._data.__maxframe_tensor__(dtype=dtype, order=order)
|
|
823
780
|
|
|
@@ -867,6 +824,99 @@ class Index(HasShapeTileable, _ToPandasMixin):
|
|
|
867
824
|
def values(self):
|
|
868
825
|
return self.to_tensor()
|
|
869
826
|
|
|
827
|
+
def to_frame(self, index: bool = True, name=None):
|
|
828
|
+
"""
|
|
829
|
+
Create a DataFrame with a column containing the Index.
|
|
830
|
+
|
|
831
|
+
Parameters
|
|
832
|
+
----------
|
|
833
|
+
index : bool, default True
|
|
834
|
+
Set the index of the returned DataFrame as the original Index.
|
|
835
|
+
|
|
836
|
+
name : object, default None
|
|
837
|
+
The passed name should substitute for the index name (if it has
|
|
838
|
+
one).
|
|
839
|
+
|
|
840
|
+
Returns
|
|
841
|
+
-------
|
|
842
|
+
DataFrame
|
|
843
|
+
DataFrame containing the original Index data.
|
|
844
|
+
|
|
845
|
+
See Also
|
|
846
|
+
--------
|
|
847
|
+
Index.to_series : Convert an Index to a Series.
|
|
848
|
+
Series.to_frame : Convert Series to DataFrame.
|
|
849
|
+
|
|
850
|
+
Examples
|
|
851
|
+
--------
|
|
852
|
+
>>> import maxframe.dataframe as md
|
|
853
|
+
>>> idx = md.Index(['Ant', 'Bear', 'Cow'], name='animal')
|
|
854
|
+
>>> idx.to_frame().execute()
|
|
855
|
+
animal
|
|
856
|
+
animal
|
|
857
|
+
Ant Ant
|
|
858
|
+
Bear Bear
|
|
859
|
+
Cow Cow
|
|
860
|
+
|
|
861
|
+
By default, the original Index is reused. To enforce a new Index:
|
|
862
|
+
|
|
863
|
+
>>> idx.to_frame(index=False).execute()
|
|
864
|
+
animal
|
|
865
|
+
0 Ant
|
|
866
|
+
1 Bear
|
|
867
|
+
2 Cow
|
|
868
|
+
|
|
869
|
+
To override the name of the resulting column, specify `name`:
|
|
870
|
+
|
|
871
|
+
>>> idx.to_frame(index=False, name='zoo').execute()
|
|
872
|
+
zoo
|
|
873
|
+
0 Ant
|
|
874
|
+
1 Bear
|
|
875
|
+
2 Cow
|
|
876
|
+
"""
|
|
877
|
+
return self._data.to_frame(index=index, name=name)
|
|
878
|
+
|
|
879
|
+
def to_series(self, index=None, name=None):
|
|
880
|
+
"""
|
|
881
|
+
Create a Series with both index and values equal to the index keys.
|
|
882
|
+
|
|
883
|
+
Useful with map for returning an indexer based on an index.
|
|
884
|
+
|
|
885
|
+
Parameters
|
|
886
|
+
----------
|
|
887
|
+
index : Index, optional
|
|
888
|
+
Index of resulting Series. If None, defaults to original index.
|
|
889
|
+
name : str, optional
|
|
890
|
+
Dame of resulting Series. If None, defaults to name of original
|
|
891
|
+
index.
|
|
892
|
+
|
|
893
|
+
Returns
|
|
894
|
+
-------
|
|
895
|
+
Series
|
|
896
|
+
The dtype will be based on the type of the Index values.
|
|
897
|
+
"""
|
|
898
|
+
return self._data.to_series(index=index, name=name)
|
|
899
|
+
|
|
900
|
+
@property
|
|
901
|
+
def hasnans(self):
|
|
902
|
+
"""
|
|
903
|
+
Return True if there are any NaNs.
|
|
904
|
+
|
|
905
|
+
Returns
|
|
906
|
+
-------
|
|
907
|
+
bool
|
|
908
|
+
|
|
909
|
+
Examples
|
|
910
|
+
--------
|
|
911
|
+
>>> import maxframe.dataframe as md
|
|
912
|
+
>>> idx = md.Index([1, 2, 3, None])
|
|
913
|
+
>>> idx.execute()
|
|
914
|
+
Index([1.0, 2.0, 3.0, nan], dtype='float64')
|
|
915
|
+
>>> idx.hasnans.execute()
|
|
916
|
+
True
|
|
917
|
+
"""
|
|
918
|
+
return self._data.hasnans
|
|
919
|
+
|
|
870
920
|
|
|
871
921
|
class RangeIndex(Index):
|
|
872
922
|
__slots__ = ()
|
|
@@ -1049,12 +1099,6 @@ class BaseSeriesData(HasShapeTileableData, _ToPandasMixin):
|
|
|
1049
1099
|
|
|
1050
1100
|
return from_series(self, dtype=dtype)
|
|
1051
1101
|
|
|
1052
|
-
@staticmethod
|
|
1053
|
-
def from_tensor(in_tensor, index=None, name=None):
|
|
1054
|
-
from .datasource.from_tensor import series_from_tensor
|
|
1055
|
-
|
|
1056
|
-
return series_from_tensor(in_tensor, index=index, name=name)
|
|
1057
|
-
|
|
1058
1102
|
|
|
1059
1103
|
class SeriesData(_BatchedFetcher, BaseSeriesData):
|
|
1060
1104
|
type_name = "Series"
|
|
@@ -1065,29 +1109,51 @@ class SeriesData(_BatchedFetcher, BaseSeriesData):
|
|
|
1065
1109
|
return tensor.astype(dtype=dtype, order=order, copy=False)
|
|
1066
1110
|
|
|
1067
1111
|
def iteritems(self, batch_size=10000, session=None):
|
|
1112
|
+
method_name = "iteritems" if _df_with_iteritems else "items"
|
|
1068
1113
|
for batch_data in self.iterbatch(batch_size=batch_size, session=session):
|
|
1069
|
-
yield from getattr(batch_data,
|
|
1114
|
+
yield from getattr(batch_data, method_name)()
|
|
1070
1115
|
|
|
1071
1116
|
items = iteritems
|
|
1072
1117
|
|
|
1073
|
-
def to_dict(self, into=dict, batch_size=10000, session=None):
|
|
1074
|
-
fetch_kwargs = dict(batch_size=batch_size)
|
|
1075
|
-
return self.to_pandas(session=session, fetch_kwargs=fetch_kwargs).to_dict(
|
|
1076
|
-
into=into
|
|
1077
|
-
)
|
|
1078
|
-
|
|
1079
1118
|
def to_frame(self, name=None):
|
|
1080
1119
|
from . import dataframe_from_tensor
|
|
1081
1120
|
|
|
1082
1121
|
name = name or self.name or 0
|
|
1083
1122
|
return dataframe_from_tensor(self, columns=[name])
|
|
1084
1123
|
|
|
1124
|
+
@property
|
|
1125
|
+
def hasnans(self):
|
|
1126
|
+
"""
|
|
1127
|
+
Return True if there are any NaNs.
|
|
1128
|
+
|
|
1129
|
+
Returns
|
|
1130
|
+
-------
|
|
1131
|
+
bool
|
|
1132
|
+
|
|
1133
|
+
Examples
|
|
1134
|
+
--------
|
|
1135
|
+
>>> import maxframe.dataframe as md
|
|
1136
|
+
>>> s = md.Series([1, 2, 3, None])
|
|
1137
|
+
>>> s.execute()
|
|
1138
|
+
0 1.0
|
|
1139
|
+
1 2.0
|
|
1140
|
+
2 3.0
|
|
1141
|
+
3 NaN
|
|
1142
|
+
dtype: float64
|
|
1143
|
+
>>> s.hasnans.execute()
|
|
1144
|
+
True
|
|
1145
|
+
"""
|
|
1146
|
+
return self.isna().any()
|
|
1147
|
+
|
|
1085
1148
|
|
|
1086
1149
|
class Series(HasShapeTileable, _ToPandasMixin):
|
|
1087
1150
|
__slots__ = ("_cache",)
|
|
1088
1151
|
_allow_data_type_ = (SeriesData,)
|
|
1089
1152
|
type_name = "Series"
|
|
1090
1153
|
|
|
1154
|
+
def __class_getitem__(cls, item):
|
|
1155
|
+
return SeriesType.from_getitem_args(item)
|
|
1156
|
+
|
|
1091
1157
|
def to_tensor(self, dtype=None):
|
|
1092
1158
|
return self._data.to_tensor(dtype=dtype)
|
|
1093
1159
|
|
|
@@ -1185,6 +1251,11 @@ class Series(HasShapeTileable, _ToPandasMixin):
|
|
|
1185
1251
|
else:
|
|
1186
1252
|
return super()._view()
|
|
1187
1253
|
|
|
1254
|
+
def __iter__(self):
|
|
1255
|
+
# prevent being called by pandas to make sure `__eq__` works
|
|
1256
|
+
prevent_called_from_pandas()
|
|
1257
|
+
return (tp[1] for tp in self.items())
|
|
1258
|
+
|
|
1188
1259
|
def __len__(self):
|
|
1189
1260
|
return len(self._data)
|
|
1190
1261
|
|
|
@@ -1238,38 +1309,6 @@ class Series(HasShapeTileable, _ToPandasMixin):
|
|
|
1238
1309
|
|
|
1239
1310
|
items = iteritems
|
|
1240
1311
|
|
|
1241
|
-
def to_dict(self, into=dict, batch_size=10000, session=None):
|
|
1242
|
-
"""
|
|
1243
|
-
Convert Series to {label -> value} dict or dict-like object.
|
|
1244
|
-
|
|
1245
|
-
Parameters
|
|
1246
|
-
----------
|
|
1247
|
-
into : class, default dict
|
|
1248
|
-
The collections.abc.Mapping subclass to use as the return
|
|
1249
|
-
object. Can be the actual class or an empty
|
|
1250
|
-
instance of the mapping type you want. If you want a
|
|
1251
|
-
collections.defaultdict, you must pass it initialized.
|
|
1252
|
-
|
|
1253
|
-
Returns
|
|
1254
|
-
-------
|
|
1255
|
-
collections.abc.Mapping
|
|
1256
|
-
Key-value representation of Series.
|
|
1257
|
-
|
|
1258
|
-
Examples
|
|
1259
|
-
--------
|
|
1260
|
-
>>> import maxframe.dataframe as md
|
|
1261
|
-
>>> s = md.Series([1, 2, 3, 4])
|
|
1262
|
-
>>> s.to_dict()
|
|
1263
|
-
{0: 1, 1: 2, 2: 3, 3: 4}
|
|
1264
|
-
>>> from collections import OrderedDict, defaultdict
|
|
1265
|
-
>>> s.to_dict(OrderedDict)
|
|
1266
|
-
OrderedDict([(0, 1), (1, 2), (2, 3), (3, 4)])
|
|
1267
|
-
>>> dd = defaultdict(list)
|
|
1268
|
-
>>> s.to_dict(dd)
|
|
1269
|
-
defaultdict(<class 'list'>, {0: 1, 1: 2, 2: 3, 3: 4})
|
|
1270
|
-
"""
|
|
1271
|
-
return self._data.to_dict(into=into, batch_size=batch_size, session=session)
|
|
1272
|
-
|
|
1273
1312
|
def to_frame(self, name=None):
|
|
1274
1313
|
"""
|
|
1275
1314
|
Convert Series to DataFrame.
|
|
@@ -1297,98 +1336,6 @@ class Series(HasShapeTileable, _ToPandasMixin):
|
|
|
1297
1336
|
"""
|
|
1298
1337
|
return self._data.to_frame(name=name)
|
|
1299
1338
|
|
|
1300
|
-
def between(self, left, right, inclusive="both"):
|
|
1301
|
-
"""
|
|
1302
|
-
Return boolean Series equivalent to left <= series <= right.
|
|
1303
|
-
This function returns a boolean vector containing `True` wherever the
|
|
1304
|
-
corresponding Series element is between the boundary values `left` and
|
|
1305
|
-
`right`. NA values are treated as `False`.
|
|
1306
|
-
|
|
1307
|
-
Parameters
|
|
1308
|
-
----------
|
|
1309
|
-
left : scalar or list-like
|
|
1310
|
-
Left boundary.
|
|
1311
|
-
right : scalar or list-like
|
|
1312
|
-
Right boundary.
|
|
1313
|
-
inclusive : {"both", "neither", "left", "right"}
|
|
1314
|
-
Include boundaries. Whether to set each bound as closed or open.
|
|
1315
|
-
|
|
1316
|
-
Returns
|
|
1317
|
-
-------
|
|
1318
|
-
Series
|
|
1319
|
-
Series representing whether each element is between left and
|
|
1320
|
-
right (inclusive).
|
|
1321
|
-
|
|
1322
|
-
See Also
|
|
1323
|
-
--------
|
|
1324
|
-
Series.gt : Greater than of series and other.
|
|
1325
|
-
Series.lt : Less than of series and other.
|
|
1326
|
-
|
|
1327
|
-
Notes
|
|
1328
|
-
-----
|
|
1329
|
-
This function is equivalent to ``(left <= ser) & (ser <= right)``
|
|
1330
|
-
|
|
1331
|
-
Examples
|
|
1332
|
-
--------
|
|
1333
|
-
>>> import maxframe.dataframe as md
|
|
1334
|
-
>>> s = md.Series([2, 0, 4, 8, np.nan])
|
|
1335
|
-
|
|
1336
|
-
Boundary values are included by default:
|
|
1337
|
-
|
|
1338
|
-
>>> s.between(1, 4).execute()
|
|
1339
|
-
0 True
|
|
1340
|
-
1 False
|
|
1341
|
-
2 True
|
|
1342
|
-
3 False
|
|
1343
|
-
4 False
|
|
1344
|
-
dtype: bool
|
|
1345
|
-
|
|
1346
|
-
With `inclusive` set to ``"neither"`` boundary values are excluded:
|
|
1347
|
-
|
|
1348
|
-
>>> s.between(1, 4, inclusive="neither").execute()
|
|
1349
|
-
0 True
|
|
1350
|
-
1 False
|
|
1351
|
-
2 False
|
|
1352
|
-
3 False
|
|
1353
|
-
4 False
|
|
1354
|
-
dtype: bool
|
|
1355
|
-
|
|
1356
|
-
`left` and `right` can be any scalar value:
|
|
1357
|
-
|
|
1358
|
-
>>> s = md.Series(['Alice', 'Bob', 'Carol', 'Eve'])
|
|
1359
|
-
>>> s.between('Anna', 'Daniel').execute()
|
|
1360
|
-
0 False
|
|
1361
|
-
1 True
|
|
1362
|
-
2 True
|
|
1363
|
-
3 False
|
|
1364
|
-
dtype: bool
|
|
1365
|
-
"""
|
|
1366
|
-
if isinstance(inclusive, bool): # pragma: no cover
|
|
1367
|
-
# for pandas < 1.3.0
|
|
1368
|
-
if inclusive:
|
|
1369
|
-
inclusive = "both"
|
|
1370
|
-
else:
|
|
1371
|
-
inclusive = "neither"
|
|
1372
|
-
if inclusive == "both":
|
|
1373
|
-
lmask = self >= left
|
|
1374
|
-
rmask = self <= right
|
|
1375
|
-
elif inclusive == "left":
|
|
1376
|
-
lmask = self >= left
|
|
1377
|
-
rmask = self < right
|
|
1378
|
-
elif inclusive == "right":
|
|
1379
|
-
lmask = self > left
|
|
1380
|
-
rmask = self <= right
|
|
1381
|
-
elif inclusive == "neither":
|
|
1382
|
-
lmask = self > left
|
|
1383
|
-
rmask = self < right
|
|
1384
|
-
else:
|
|
1385
|
-
raise ValueError(
|
|
1386
|
-
"Inclusive has to be either string of 'both',"
|
|
1387
|
-
"'left', 'right', or 'neither'."
|
|
1388
|
-
)
|
|
1389
|
-
|
|
1390
|
-
return lmask & rmask
|
|
1391
|
-
|
|
1392
1339
|
# def median(
|
|
1393
1340
|
# self, axis=None, skipna=True, out=None, overwrite_input=False, keepdims=False
|
|
1394
1341
|
# ):
|
|
@@ -1589,18 +1536,6 @@ class BaseDataFrameData(HasShapeTileableData, _ToPandasMixin):
|
|
|
1589
1536
|
|
|
1590
1537
|
return from_dataframe(self, dtype=dtype)
|
|
1591
1538
|
|
|
1592
|
-
@staticmethod
|
|
1593
|
-
def from_tensor(in_tensor, index=None, columns=None):
|
|
1594
|
-
from .datasource.from_tensor import dataframe_from_tensor
|
|
1595
|
-
|
|
1596
|
-
return dataframe_from_tensor(in_tensor, index=index, columns=columns)
|
|
1597
|
-
|
|
1598
|
-
@staticmethod
|
|
1599
|
-
def from_records(records, **kw):
|
|
1600
|
-
from .datasource.from_records import from_records
|
|
1601
|
-
|
|
1602
|
-
return from_records(records, **kw)
|
|
1603
|
-
|
|
1604
1539
|
@property
|
|
1605
1540
|
def index(self):
|
|
1606
1541
|
from .datasource.index import from_tileable
|
|
@@ -1747,12 +1682,6 @@ class DataFrame(HasShapeTileable, _ToPandasMixin):
|
|
|
1747
1682
|
def to_tensor(self):
|
|
1748
1683
|
return self._data.to_tensor()
|
|
1749
1684
|
|
|
1750
|
-
def from_tensor(self, in_tensor, index=None, columns=None):
|
|
1751
|
-
return self._data.from_tensor(in_tensor, index=index, columns=columns)
|
|
1752
|
-
|
|
1753
|
-
def from_records(self, records, **kw):
|
|
1754
|
-
return self._data.from_records(records, **kw)
|
|
1755
|
-
|
|
1756
1685
|
def __maxframe_tensor__(self, dtype=None, order="K"):
|
|
1757
1686
|
return self._data.__maxframe_tensor__(dtype=dtype, order=order)
|
|
1758
1687
|
|
|
@@ -1772,6 +1701,14 @@ class DataFrame(HasShapeTileable, _ToPandasMixin):
|
|
|
1772
1701
|
+ [k for k in self.dtypes.index if isinstance(k, str) and k.isidentifier()]
|
|
1773
1702
|
)
|
|
1774
1703
|
|
|
1704
|
+
def __iter__(self):
|
|
1705
|
+
# prevent being called by pandas to make sure `__eq__` works
|
|
1706
|
+
prevent_called_from_pandas()
|
|
1707
|
+
return iter(self.dtypes.index)
|
|
1708
|
+
|
|
1709
|
+
def __class_getitem__(cls, item):
|
|
1710
|
+
return DataFrameType.from_getitem_args(item)
|
|
1711
|
+
|
|
1775
1712
|
@property
|
|
1776
1713
|
def T(self):
|
|
1777
1714
|
return self.transpose()
|
|
@@ -13,3 +13,21 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
15
|
from .core import PandasDataSourceOperator
|
|
16
|
+
from .from_dict import dataframe_from_dict
|
|
17
|
+
from .from_records import from_records
|
|
18
|
+
from .from_tensor import dataframe_from_tensor, series_from_tensor
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def _install():
|
|
22
|
+
from ..core import DATAFRAME_TYPE, SERIES_TYPE
|
|
23
|
+
|
|
24
|
+
for t in DATAFRAME_TYPE:
|
|
25
|
+
t.from_dict = staticmethod(dataframe_from_dict)
|
|
26
|
+
t.from_records = staticmethod(from_records)
|
|
27
|
+
t.from_tensor = staticmethod(dataframe_from_tensor)
|
|
28
|
+
for t in SERIES_TYPE:
|
|
29
|
+
t.from_tensor = staticmethod(series_from_tensor)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
_install()
|
|
33
|
+
del _install
|
|
@@ -18,6 +18,7 @@ from typing import List, MutableMapping, Optional, Union
|
|
|
18
18
|
from ...serialization.serializables import Int64Field, StringField
|
|
19
19
|
from ...utils import estimate_pandas_size
|
|
20
20
|
from ..operators import DataFrameOperator, DataFrameOperatorMixin
|
|
21
|
+
from ..utils import validate_dtype_backend
|
|
21
22
|
|
|
22
23
|
|
|
23
24
|
class HeadOptimizedDataSource(DataFrameOperator, DataFrameOperatorMixin):
|
|
@@ -86,3 +87,8 @@ class PandasDataSourceOperator(DataFrameOperator):
|
|
|
86
87
|
cls, ctx: MutableMapping[str, Union[int, float]], op: "PandasDataSourceOperator"
|
|
87
88
|
):
|
|
88
89
|
ctx[op.outputs[0].key] = estimate_pandas_size(op.get_data())
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
class DtypeBackendCompatibleMixin:
|
|
93
|
+
def __on_deserialize__(self):
|
|
94
|
+
self.dtype_backend = validate_dtype_backend(self.dtype_backend)
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
import pandas as pd
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def read_clipboard(sep=None, **kwargs):
|
|
19
|
+
"""
|
|
20
|
+
Read text from clipboard and pass to :func:`~pandas.read_csv`.
|
|
21
|
+
|
|
22
|
+
Parses clipboard contents similar to how CSV files are parsed
|
|
23
|
+
using :func:`~pandas.read_csv`.
|
|
24
|
+
|
|
25
|
+
Parameters
|
|
26
|
+
----------
|
|
27
|
+
sep : str, default '\\s+'
|
|
28
|
+
A string or regex delimiter. The default of ``'\\s+'`` denotes
|
|
29
|
+
one or more whitespace characters.
|
|
30
|
+
|
|
31
|
+
**kwargs
|
|
32
|
+
See :func:`~pandas.read_csv` for the full argument list.
|
|
33
|
+
|
|
34
|
+
Returns
|
|
35
|
+
-------
|
|
36
|
+
DataFrame
|
|
37
|
+
A parsed :class:`DataFrame` object.
|
|
38
|
+
|
|
39
|
+
See Also
|
|
40
|
+
--------
|
|
41
|
+
DataFrame.to_clipboard : Copy object to the system clipboard.
|
|
42
|
+
read_csv : Read a comma-separated values (csv) file into DataFrame.
|
|
43
|
+
read_fwf : Read a table of fixed-width formatted lines into DataFrame.
|
|
44
|
+
|
|
45
|
+
Examples
|
|
46
|
+
--------
|
|
47
|
+
>>> import maxframe.dataframe as md
|
|
48
|
+
>>> df = md.DataFrame([[1, 2, 3], [4, 5, 6]], columns=['A', 'B', 'C'])
|
|
49
|
+
>>> df.to_clipboard() # doctest: +SKIP
|
|
50
|
+
>>> md.read_clipboard() # doctest: +SKIP.execute()
|
|
51
|
+
A B C
|
|
52
|
+
0 1 2 3
|
|
53
|
+
1 4 5 6
|
|
54
|
+
"""
|
|
55
|
+
from ..initializer import DataFrame
|
|
56
|
+
|
|
57
|
+
return DataFrame(pd.read_clipboard(sep=sep, **kwargs))
|