PyPI - maxframe - Versions diffs - 2.0.0b2__cp38-cp38-win_amd64.whl → 2.2.0__cp38-cp38-win_amd64.whl - Mend

maxframe 2.0.0b2__cp38-cp38-win_amd64.whl → 2.2.0__cp38-cp38-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of maxframe might be problematic. Click here for more details.

Files changed (391) hide show

maxframe/__init__.py +1 -0
maxframe/_utils.cp38-win_amd64.pyd +0 -0
maxframe/_utils.pyx +14 -1
maxframe/codegen/core.py +6 -6
maxframe/codegen/spe/core.py +1 -1
maxframe/codegen/spe/dataframe/__init__.py +1 -0
maxframe/codegen/spe/dataframe/accessors/base.py +18 -0
maxframe/codegen/spe/dataframe/accessors/dict_.py +25 -130
maxframe/codegen/spe/dataframe/accessors/list_.py +12 -48
maxframe/codegen/spe/dataframe/accessors/struct_.py +28 -0
maxframe/codegen/spe/dataframe/arithmetic.py +7 -2
maxframe/codegen/spe/dataframe/groupby.py +88 -0
maxframe/codegen/spe/dataframe/indexing.py +99 -4
maxframe/codegen/spe/dataframe/merge.py +34 -1
maxframe/codegen/spe/dataframe/misc.py +9 -33
maxframe/codegen/spe/dataframe/reduction.py +14 -9
maxframe/codegen/spe/dataframe/reshape.py +46 -0
maxframe/codegen/spe/dataframe/sort.py +30 -17
maxframe/codegen/spe/dataframe/tests/accessors/test_dict.py +9 -15
maxframe/codegen/spe/dataframe/tests/accessors/test_list.py +4 -7
maxframe/codegen/spe/dataframe/tests/accessors/test_struct.py +75 -0
maxframe/codegen/spe/dataframe/tests/indexing/test_iloc.py +20 -1
maxframe/codegen/spe/dataframe/tests/indexing/test_loc.py +35 -0
maxframe/codegen/spe/dataframe/tests/misc/test_misc.py +0 -32
maxframe/codegen/spe/dataframe/tests/test_groupby.py +81 -18
maxframe/codegen/spe/dataframe/tests/test_merge.py +27 -1
maxframe/codegen/spe/dataframe/tests/test_reshape.py +79 -0
maxframe/codegen/spe/dataframe/tests/test_sort.py +20 -0
maxframe/codegen/spe/learn/contrib/tests/test_xgboost.py +2 -1
maxframe/codegen/spe/learn/metrics/__init__.py +1 -1
maxframe/codegen/spe/learn/metrics/_ranking.py +76 -0
maxframe/codegen/spe/learn/metrics/pairwise.py +51 -0
maxframe/codegen/spe/learn/metrics/tests/test_pairwise.py +36 -0
maxframe/codegen/spe/learn/metrics/tests/test_ranking.py +59 -0
maxframe/codegen/spe/tensor/__init__.py +3 -0
maxframe/codegen/spe/tensor/fft.py +74 -0
maxframe/codegen/spe/tensor/linalg.py +29 -2
maxframe/codegen/spe/tensor/misc.py +79 -25
maxframe/codegen/spe/tensor/spatial.py +45 -0
maxframe/codegen/spe/tensor/statistics.py +44 -0
maxframe/codegen/spe/tensor/tests/test_fft.py +64 -0
maxframe/codegen/spe/tensor/tests/test_linalg.py +15 -1
maxframe/codegen/spe/tensor/tests/test_misc.py +52 -2
maxframe/codegen/spe/tensor/tests/test_spatial.py +33 -0
maxframe/codegen/spe/tensor/tests/test_statistics.py +15 -1
maxframe/codegen/spe/tests/test_spe_codegen.py +6 -12
maxframe/codegen/spe/utils.py +2 -0
maxframe/config/config.py +70 -9
maxframe/config/tests/test_validators.py +13 -1
maxframe/config/validators.py +49 -0
maxframe/conftest.py +44 -17
maxframe/core/accessor.py +2 -2
maxframe/core/entity/core.py +5 -0
maxframe/core/entity/tileables.py +1 -1
maxframe/core/graph/core.cp38-win_amd64.pyd +0 -0
maxframe/core/graph/entity.py +1 -2
maxframe/core/operator/base.py +9 -2
maxframe/core/operator/core.py +10 -2
maxframe/core/operator/utils.py +13 -0
maxframe/dataframe/__init__.py +10 -3
maxframe/dataframe/accessors/__init__.py +1 -1
maxframe/dataframe/accessors/compat.py +45 -0
maxframe/dataframe/accessors/datetime_/__init__.py +4 -1
maxframe/dataframe/accessors/dict_/contains.py +7 -16
maxframe/dataframe/accessors/dict_/core.py +48 -0
maxframe/dataframe/accessors/dict_/getitem.py +17 -21
maxframe/dataframe/accessors/dict_/length.py +7 -16
maxframe/dataframe/accessors/dict_/remove.py +6 -18
maxframe/dataframe/accessors/dict_/setitem.py +8 -18
maxframe/dataframe/accessors/dict_/tests/test_dict_accessor.py +62 -22
maxframe/dataframe/accessors/list_/__init__.py +2 -2
maxframe/dataframe/accessors/list_/core.py +48 -0
maxframe/dataframe/accessors/list_/getitem.py +12 -19
maxframe/dataframe/accessors/list_/length.py +7 -16
maxframe/dataframe/accessors/list_/tests/test_list_accessor.py +11 -9
maxframe/dataframe/accessors/string_/__init__.py +4 -1
maxframe/dataframe/accessors/struct_/__init__.py +37 -0
maxframe/dataframe/accessors/struct_/accessor.py +39 -0
maxframe/dataframe/accessors/struct_/core.py +43 -0
maxframe/dataframe/accessors/struct_/dtypes.py +53 -0
maxframe/dataframe/accessors/struct_/field.py +123 -0
maxframe/dataframe/accessors/struct_/tests/__init__.py +13 -0
maxframe/dataframe/accessors/struct_/tests/test_struct_accessor.py +91 -0
maxframe/dataframe/arithmetic/__init__.py +14 -4
maxframe/dataframe/arithmetic/between.py +106 -0
maxframe/dataframe/arithmetic/dot.py +237 -0
maxframe/dataframe/arithmetic/{around.py → round.py} +11 -7
maxframe/dataframe/core.py +63 -118
maxframe/dataframe/datasource/__init__.py +18 -0
maxframe/dataframe/datasource/from_dict.py +124 -0
maxframe/dataframe/datasource/from_index.py +1 -1
maxframe/dataframe/datasource/from_records.py +77 -0
maxframe/dataframe/datasource/from_tensor.py +109 -41
maxframe/dataframe/datasource/read_csv.py +2 -3
maxframe/dataframe/datasource/tests/test_datasource.py +37 -0
maxframe/dataframe/datastore/__init__.py +5 -1
maxframe/dataframe/datastore/to_csv.py +29 -41
maxframe/dataframe/datastore/to_odps.py +30 -4
maxframe/dataframe/extensions/__init__.py +20 -4
maxframe/dataframe/extensions/apply_chunk.py +32 -6
maxframe/dataframe/extensions/cartesian_chunk.py +153 -0
maxframe/dataframe/extensions/collect_kv.py +126 -0
maxframe/dataframe/extensions/extract_kv.py +177 -0
maxframe/dataframe/extensions/map_reduce.py +263 -0
maxframe/dataframe/extensions/rebalance.py +62 -0
maxframe/dataframe/extensions/tests/test_apply_chunk.py +9 -2
maxframe/dataframe/extensions/tests/test_extensions.py +54 -0
maxframe/dataframe/extensions/tests/test_map_reduce.py +135 -0
maxframe/dataframe/groupby/__init__.py +12 -1
maxframe/dataframe/groupby/aggregation.py +78 -45
maxframe/dataframe/groupby/apply.py +1 -1
maxframe/dataframe/groupby/apply_chunk.py +18 -2
maxframe/dataframe/groupby/core.py +96 -12
maxframe/dataframe/groupby/cum.py +4 -25
maxframe/dataframe/groupby/expanding.py +264 -0
maxframe/dataframe/groupby/fill.py +1 -1
maxframe/dataframe/groupby/getitem.py +12 -5
maxframe/dataframe/groupby/head.py +11 -1
maxframe/dataframe/groupby/rank.py +136 -0
maxframe/dataframe/groupby/rolling.py +206 -0
maxframe/dataframe/groupby/shift.py +114 -0
maxframe/dataframe/groupby/tests/test_groupby.py +0 -5
maxframe/dataframe/indexing/__init__.py +20 -1
maxframe/dataframe/indexing/droplevel.py +195 -0
maxframe/dataframe/indexing/filter.py +169 -0
maxframe/dataframe/indexing/get_level_values.py +76 -0
maxframe/dataframe/indexing/iat.py +45 -0
maxframe/dataframe/indexing/iloc.py +152 -12
maxframe/dataframe/indexing/insert.py +1 -1
maxframe/dataframe/indexing/loc.py +287 -7
maxframe/dataframe/indexing/reindex.py +14 -5
maxframe/dataframe/indexing/rename.py +6 -0
maxframe/dataframe/indexing/rename_axis.py +2 -2
maxframe/dataframe/indexing/reorder_levels.py +143 -0
maxframe/dataframe/indexing/reset_index.py +33 -6
maxframe/dataframe/indexing/sample.py +8 -0
maxframe/dataframe/indexing/setitem.py +3 -3
maxframe/dataframe/indexing/swaplevel.py +185 -0
maxframe/dataframe/indexing/take.py +99 -0
maxframe/dataframe/indexing/truncate.py +140 -0
maxframe/dataframe/indexing/where.py +0 -11
maxframe/dataframe/indexing/xs.py +148 -0
maxframe/dataframe/merge/__init__.py +12 -1
maxframe/dataframe/merge/append.py +97 -98
maxframe/dataframe/merge/combine_first.py +120 -0
maxframe/dataframe/merge/compare.py +387 -0
maxframe/dataframe/merge/concat.py +183 -0
maxframe/dataframe/merge/update.py +271 -0
maxframe/dataframe/misc/__init__.py +16 -10
maxframe/dataframe/misc/_duplicate.py +10 -4
maxframe/dataframe/misc/apply.py +1 -1
maxframe/dataframe/misc/check_unique.py +51 -0
maxframe/dataframe/misc/clip.py +145 -0
maxframe/dataframe/misc/describe.py +175 -9
maxframe/dataframe/misc/drop_duplicates.py +2 -2
maxframe/dataframe/misc/duplicated.py +2 -2
maxframe/dataframe/misc/get_dummies.py +5 -1
maxframe/dataframe/misc/isin.py +2 -2
maxframe/dataframe/misc/map.py +94 -0
maxframe/dataframe/misc/tests/test_misc.py +13 -2
maxframe/dataframe/misc/to_numeric.py +3 -0
maxframe/dataframe/misc/transform.py +12 -5
maxframe/dataframe/misc/transpose.py +13 -1
maxframe/dataframe/misc/valid_index.py +115 -0
maxframe/dataframe/misc/value_counts.py +38 -4
maxframe/dataframe/missing/checkna.py +13 -6
maxframe/dataframe/missing/dropna.py +5 -0
maxframe/dataframe/missing/fillna.py +1 -1
maxframe/dataframe/missing/replace.py +7 -4
maxframe/dataframe/reduction/__init__.py +29 -15
maxframe/dataframe/reduction/aggregation.py +38 -9
maxframe/dataframe/reduction/all.py +2 -2
maxframe/dataframe/reduction/any.py +2 -2
maxframe/dataframe/reduction/argmax.py +100 -0
maxframe/dataframe/reduction/argmin.py +100 -0
maxframe/dataframe/reduction/core.py +65 -18
maxframe/dataframe/reduction/count.py +13 -9
maxframe/dataframe/reduction/cov.py +166 -0
maxframe/dataframe/reduction/cummax.py +2 -2
maxframe/dataframe/reduction/cummin.py +2 -2
maxframe/dataframe/reduction/cumprod.py +2 -2
maxframe/dataframe/reduction/cumsum.py +2 -2
maxframe/dataframe/reduction/custom_reduction.py +2 -2
maxframe/dataframe/reduction/idxmax.py +185 -0
maxframe/dataframe/reduction/idxmin.py +185 -0
maxframe/dataframe/reduction/kurtosis.py +37 -30
maxframe/dataframe/reduction/max.py +2 -2
maxframe/dataframe/reduction/mean.py +9 -7
maxframe/dataframe/reduction/median.py +2 -2
maxframe/dataframe/reduction/min.py +2 -2
maxframe/dataframe/reduction/nunique.py +9 -8
maxframe/dataframe/reduction/prod.py +18 -13
maxframe/dataframe/reduction/reduction_size.py +2 -2
maxframe/dataframe/reduction/sem.py +13 -9
maxframe/dataframe/reduction/skew.py +31 -27
maxframe/dataframe/reduction/str_concat.py +10 -7
maxframe/dataframe/reduction/sum.py +18 -14
maxframe/dataframe/reduction/unique.py +20 -3
maxframe/dataframe/reduction/var.py +16 -12
maxframe/dataframe/reshape/__init__.py +38 -0
maxframe/dataframe/{misc → reshape}/pivot.py +1 -0
maxframe/dataframe/{misc → reshape}/pivot_table.py +1 -0
maxframe/dataframe/reshape/unstack.py +114 -0
maxframe/dataframe/sort/__init__.py +8 -0
maxframe/dataframe/sort/argsort.py +62 -0
maxframe/dataframe/sort/core.py +1 -0
maxframe/dataframe/sort/nlargest.py +238 -0
maxframe/dataframe/sort/nsmallest.py +228 -0
maxframe/dataframe/statistics/__init__.py +3 -3
maxframe/dataframe/statistics/corr.py +1 -0
maxframe/dataframe/statistics/quantile.py +2 -2
maxframe/dataframe/tests/test_typing.py +104 -0
maxframe/dataframe/tests/test_utils.py +66 -2
maxframe/dataframe/typing_.py +185 -0
maxframe/dataframe/utils.py +95 -26
maxframe/dataframe/window/aggregation.py +8 -4
maxframe/dataframe/window/core.py +14 -1
maxframe/dataframe/window/ewm.py +1 -3
maxframe/dataframe/window/expanding.py +37 -35
maxframe/dataframe/window/rolling.py +49 -39
maxframe/dataframe/window/tests/test_expanding.py +1 -7
maxframe/dataframe/window/tests/test_rolling.py +1 -1
maxframe/env.py +7 -4
maxframe/errors.py +2 -2
maxframe/io/odpsio/schema.py +9 -3
maxframe/io/odpsio/tableio.py +7 -2
maxframe/io/odpsio/tests/test_schema.py +198 -83
maxframe/learn/__init__.py +10 -2
maxframe/learn/cluster/__init__.py +15 -0
maxframe/learn/cluster/_kmeans.py +782 -0
maxframe/learn/contrib/llm/core.py +2 -0
maxframe/learn/contrib/xgboost/core.py +86 -1
maxframe/learn/contrib/xgboost/train.py +5 -2
maxframe/learn/core.py +66 -0
maxframe/learn/linear_model/_base.py +58 -1
maxframe/learn/linear_model/_lin_reg.py +1 -1
maxframe/learn/metrics/__init__.py +6 -0
maxframe/learn/metrics/_classification.py +145 -0
maxframe/learn/metrics/_ranking.py +477 -0
maxframe/learn/metrics/_scorer.py +60 -0
maxframe/learn/metrics/pairwise/__init__.py +21 -0
maxframe/learn/metrics/pairwise/core.py +77 -0
maxframe/learn/metrics/pairwise/cosine.py +115 -0
maxframe/learn/metrics/pairwise/euclidean.py +176 -0
maxframe/learn/metrics/pairwise/haversine.py +96 -0
maxframe/learn/metrics/pairwise/manhattan.py +80 -0
maxframe/learn/metrics/pairwise/pairwise.py +127 -0
maxframe/learn/metrics/pairwise/pairwise_distances_topk.py +121 -0
maxframe/learn/metrics/pairwise/rbf_kernel.py +51 -0
maxframe/learn/metrics/tests/__init__.py +13 -0
maxframe/learn/metrics/tests/test_scorer.py +26 -0
maxframe/learn/utils/__init__.py +1 -1
maxframe/learn/utils/checks.py +1 -2
maxframe/learn/utils/core.py +59 -0
maxframe/learn/utils/extmath.py +37 -0
maxframe/learn/utils/odpsio.py +193 -0
maxframe/learn/utils/validation.py +2 -2
maxframe/lib/compat.py +40 -0
maxframe/lib/dtypes_extension/__init__.py +16 -1
maxframe/lib/dtypes_extension/_fake_arrow_dtype.py +604 -0
maxframe/lib/dtypes_extension/blob.py +304 -0
maxframe/lib/dtypes_extension/dtypes.py +40 -0
maxframe/lib/dtypes_extension/tests/test_blob.py +88 -0
maxframe/lib/dtypes_extension/tests/test_dtypes.py +16 -1
maxframe/lib/dtypes_extension/tests/test_fake_arrow_dtype.py +75 -0
maxframe/lib/filesystem/_oss_lib/common.py +122 -50
maxframe/lib/filesystem/_oss_lib/glob.py +1 -1
maxframe/lib/filesystem/_oss_lib/handle.py +21 -25
maxframe/lib/filesystem/base.py +1 -1
maxframe/lib/filesystem/core.py +1 -1
maxframe/lib/filesystem/oss.py +115 -46
maxframe/lib/filesystem/tests/test_oss.py +74 -36
maxframe/lib/mmh3.cp38-win_amd64.pyd +0 -0
maxframe/lib/wrapped_pickle.py +10 -0
maxframe/opcodes.py +33 -15
maxframe/protocol.py +12 -0
maxframe/serialization/__init__.py +11 -2
maxframe/serialization/arrow.py +38 -13
maxframe/serialization/blob.py +32 -0
maxframe/serialization/core.cp38-win_amd64.pyd +0 -0
maxframe/serialization/core.pyx +39 -1
maxframe/serialization/exception.py +2 -4
maxframe/serialization/numpy.py +11 -0
maxframe/serialization/pandas.py +46 -9
maxframe/serialization/serializables/core.py +2 -2
maxframe/serialization/tests/test_serial.py +29 -2
maxframe/tensor/__init__.py +38 -8
maxframe/tensor/arithmetic/__init__.py +19 -10
maxframe/tensor/arithmetic/iscomplexobj.py +53 -0
maxframe/tensor/arithmetic/tests/test_arithmetic.py +6 -0
maxframe/tensor/core.py +3 -2
maxframe/tensor/datasource/tests/test_datasource.py +2 -1
maxframe/tensor/extensions/__init__.py +2 -0
maxframe/tensor/extensions/apply_chunk.py +3 -3
maxframe/tensor/extensions/rebalance.py +65 -0
maxframe/tensor/fft/__init__.py +32 -0
maxframe/tensor/fft/core.py +168 -0
maxframe/tensor/fft/fft.py +112 -0
maxframe/tensor/fft/fft2.py +118 -0
maxframe/tensor/fft/fftfreq.py +80 -0
maxframe/tensor/fft/fftn.py +123 -0
maxframe/tensor/fft/fftshift.py +79 -0
maxframe/tensor/fft/hfft.py +112 -0
maxframe/tensor/fft/ifft.py +114 -0
maxframe/tensor/fft/ifft2.py +115 -0
maxframe/tensor/fft/ifftn.py +123 -0
maxframe/tensor/fft/ifftshift.py +73 -0
maxframe/tensor/fft/ihfft.py +93 -0
maxframe/tensor/fft/irfft.py +118 -0
maxframe/tensor/fft/irfft2.py +62 -0
maxframe/tensor/fft/irfftn.py +114 -0
maxframe/tensor/fft/rfft.py +116 -0
maxframe/tensor/fft/rfft2.py +63 -0
maxframe/tensor/fft/rfftfreq.py +87 -0
maxframe/tensor/fft/rfftn.py +113 -0
maxframe/tensor/indexing/fill_diagonal.py +1 -7
maxframe/tensor/linalg/__init__.py +7 -0
maxframe/tensor/linalg/_einsumfunc.py +1025 -0
maxframe/tensor/linalg/cholesky.py +117 -0
maxframe/tensor/linalg/einsum.py +339 -0
maxframe/tensor/linalg/lstsq.py +100 -0
maxframe/tensor/linalg/matrix_norm.py +75 -0
maxframe/tensor/linalg/norm.py +249 -0
maxframe/tensor/linalg/solve.py +72 -0
maxframe/tensor/linalg/solve_triangular.py +2 -2
maxframe/tensor/linalg/vector_norm.py +113 -0
maxframe/tensor/misc/__init__.py +24 -1
maxframe/tensor/misc/argwhere.py +72 -0
maxframe/tensor/misc/array_split.py +46 -0
maxframe/tensor/misc/broadcast_arrays.py +57 -0
maxframe/tensor/misc/copyto.py +130 -0
maxframe/tensor/misc/delete.py +104 -0
maxframe/tensor/misc/dsplit.py +68 -0
maxframe/tensor/misc/ediff1d.py +74 -0
maxframe/tensor/misc/expand_dims.py +85 -0
maxframe/tensor/misc/flip.py +90 -0
maxframe/tensor/misc/fliplr.py +64 -0
maxframe/tensor/misc/flipud.py +68 -0
maxframe/tensor/misc/hsplit.py +85 -0
maxframe/tensor/misc/insert.py +139 -0
maxframe/tensor/misc/moveaxis.py +83 -0
maxframe/tensor/misc/result_type.py +88 -0
maxframe/tensor/misc/roll.py +124 -0
maxframe/tensor/misc/rollaxis.py +77 -0
maxframe/tensor/misc/shape.py +89 -0
maxframe/tensor/misc/split.py +190 -0
maxframe/tensor/misc/tile.py +109 -0
maxframe/tensor/misc/vsplit.py +74 -0
maxframe/tensor/reduction/array_equal.py +2 -1
maxframe/tensor/sort/__init__.py +2 -0
maxframe/tensor/sort/argpartition.py +98 -0
maxframe/tensor/sort/partition.py +228 -0
maxframe/tensor/spatial/__init__.py +15 -0
maxframe/tensor/spatial/distance/__init__.py +17 -0
maxframe/tensor/spatial/distance/cdist.py +421 -0
maxframe/tensor/spatial/distance/pdist.py +398 -0
maxframe/tensor/spatial/distance/squareform.py +153 -0
maxframe/tensor/special/__init__.py +159 -21
maxframe/tensor/special/airy.py +55 -0
maxframe/tensor/special/bessel.py +199 -0
maxframe/tensor/special/core.py +65 -4
maxframe/tensor/special/ellip_func_integrals.py +155 -0
maxframe/tensor/special/ellip_harm.py +55 -0
maxframe/tensor/special/err_fresnel.py +223 -0
maxframe/tensor/special/gamma_funcs.py +303 -0
maxframe/tensor/special/hypergeometric_funcs.py +69 -0
maxframe/tensor/special/info_theory.py +189 -0
maxframe/tensor/special/misc.py +21 -0
maxframe/tensor/statistics/__init__.py +6 -0
maxframe/tensor/statistics/corrcoef.py +77 -0
maxframe/tensor/statistics/cov.py +222 -0
maxframe/tensor/statistics/digitize.py +126 -0
maxframe/tensor/statistics/histogram.py +520 -0
maxframe/tensor/statistics/median.py +85 -0
maxframe/tensor/statistics/ptp.py +89 -0
maxframe/tensor/utils.py +3 -3
maxframe/tests/test_utils.py +43 -1
maxframe/tests/utils.py +0 -2
maxframe/typing_.py +2 -0
maxframe/udf.py +27 -2
maxframe/utils.py +193 -19
{maxframe-2.0.0b2.dist-info → maxframe-2.2.0.dist-info}/METADATA +3 -2
{maxframe-2.0.0b2.dist-info → maxframe-2.2.0.dist-info}/RECORD +391 -236
maxframe_client/fetcher.py +35 -4
maxframe_client/session/odps.py +7 -2
maxframe_client/tests/test_fetcher.py +76 -3
maxframe_client/tests/test_session.py +4 -1
/maxframe/dataframe/{misc → reshape}/melt.py +0 -0
/maxframe/dataframe/{misc → reshape}/stack.py +0 -0
{maxframe-2.0.0b2.dist-info → maxframe-2.2.0.dist-info}/WHEEL +0 -0
{maxframe-2.0.0b2.dist-info → maxframe-2.2.0.dist-info}/top_level.txt +0 -0

maxframe/dataframe/datasource/from_index.py CHANGED Viewed

@@ -51,7 +51,7 @@ class SeriesFromIndex(DataFrameOperator, DataFrameOperatorMixin):
 def series_from_index(ind, index=None, name=None):
-    name = name or ind.name or 0
+    name = name or ind.name
     if index is not None:
         index = Index(index)
     op = SeriesFromIndex(input_=ind, index=index, name=name)

maxframe/dataframe/datasource/from_records.py CHANGED Viewed

@@ -77,6 +77,83 @@ def from_records(
     sparse=False,
     **kw
 ):
+    """
+    Convert structured or record ndarray to DataFrame.
+    Creates a DataFrame object from a structured ndarray, sequence of
+    tuples or dicts, or DataFrame.
+    Parameters
+    ----------
+    data : structured ndarray, sequence of tuples or dicts, or DataFrame
+        Structured input data.
+        .. deprecated:: 2.1.0
+            Passing a DataFrame is deprecated.
+    index : str, list of fields, array-like
+        Field of array to use as the index, alternately a specific set of
+        input labels to use.
+    exclude : sequence, default None
+        Columns or fields to exclude.
+    columns : sequence, default None
+        Column names to use. If the passed data do not have names
+        associated with them, this argument provides names for the
+        columns. Otherwise this argument indicates the order of the columns
+        in the result (any names not found in the data will become all-NA
+        columns).
+    coerce_float : bool, default False
+        Attempt to convert values of non-string, non-numeric objects (like
+        decimal.Decimal) to floating point, useful for SQL result sets.
+    nrows : int, default None
+        Number of rows to read if data is an iterator.
+    Returns
+    -------
+    DataFrame
+    See Also
+    --------
+    DataFrame.from_dict : DataFrame from dict of array-like or dicts.
+    DataFrame : DataFrame object creation using constructor.
+    Examples
+    --------
+    Data can be provided as a structured ndarray:
+    >>> import maxframe.tensor as mt
+    >>> import maxframe.dataframe as md
+    >>> data = mt.array([(3, 'a'), (2, 'b'), (1, 'c'), (0, 'd')],
+    ...                 dtype=[('col_1', 'i4'), ('col_2', 'U1')])
+    >>> md.DataFrame.from_records(data).execute()
+       col_1 col_2
+    0      3     a
+    1      2     b
+    2      1     c
+    3      0     d
+    Data can be provided as a list of dicts:
+    >>> data = [{'col_1': 3, 'col_2': 'a'},
+    ...         {'col_1': 2, 'col_2': 'b'},
+    ...         {'col_1': 1, 'col_2': 'c'},
+    ...         {'col_1': 0, 'col_2': 'd'}]
+    >>> md.DataFrame.from_records(data).execute()
+       col_1 col_2
+    0      3     a
+    1      2     b
+    2      1     c
+    3      0     d
+    Data can be provided as a list of tuples with corresponding columns:
+    >>> data = [(3, 'a'), (2, 'b'), (1, 'c'), (0, 'd')]
+    >>> md.DataFrame.from_records(data, columns=['col_1', 'col_2']).execute()
+       col_1 col_2
+    0      3     a
+    1      2     b
+    2      1     c
+    3      0     d
+    """
     if isinstance(data, np.ndarray):
         from .dataframe import from_pandas

maxframe/dataframe/datasource/from_tensor.py CHANGED Viewed

@@ -39,6 +39,7 @@ class DataFrameFromTensor(DataFrameOperator, DataFrameOperatorMixin):
     input = AnyField("input")
     index = AnyField("index")
     columns = AnyField("columns")
+    axis = AnyField("axis")
     def __init__(self, *args, **kwargs):
         kwargs["_output_types"] = [OutputType.dataframe]
@@ -120,46 +121,82 @@ class DataFrameFromTensor(DataFrameOperator, DataFrameOperatorMixin):
             if isinstance(tileable, ENTITY_TYPE):
                 tileables.append(tileable)
-        if index is not None:
-            tileable_size = tileables[0].shape[0]
-            if hasattr(index, "shape"):
-                index_size = index.shape[0]
+        if self.axis == 0:
+            if index is not None:
+                raise NotImplementedError("Cannot accept index when axis=0")
             else:
-                index_size = len(index)
-            if (
-                not pd.isna(tileable_size)
-                and not pd.isna(index_size)
-                and tileable_size != index_size
-            ):
-                raise ValueError(
-                    f"index {index} should have the same shape "
-                    f"with tensor: {tileable_size}"
-                )
-            index_value = self._process_index(index, tileables)
+                index = pd.Index(list(input_1d_tileables.keys()))
+                index_value = parse_index(index, store_data=True)
+                self.index = index
+            if columns is not None:
+                tileable_size = tileables[0].shape[0] if tileables else 0
+                if not isinstance(columns, pd.Index):
+                    columns = self.columns = pd.Index(columns)
+                column_size = columns.shape[0]
+                if (
+                    not pd.isna(tileable_size)
+                    and not pd.isna(column_size)
+                    and tileable_size != column_size
+                ):
+                    raise ValueError(
+                        f"columns {columns} should have the same shape "
+                        f"with tensor: {tileable_size}"
+                    )
+                columns_value = self._process_index(columns, tileables)
+            else:
+                if not tileables or np.isnan(tileables[0].shape[0]):
+                    columns = columns_value = None
+                else:
+                    columns = pd.RangeIndex(0, tileables[0].shape[0])
+                    columns_value = parse_index(columns, store_data=True)
+                self.columns = columns
+            shape = (len(input_1d_tileables), shape[0] if shape else 0)
         else:
-            if np.isnan(tileables[0].shape[0]):
-                index = pd.RangeIndex(0)
+            if index is not None:
+                tileable_size = tileables[0].shape[0] if tileables else 0
+                if hasattr(index, "shape"):
+                    index_size = index.shape[0]
+                else:
+                    index_size = len(index)
+                if (
+                    not pd.isna(tileable_size)
+                    and not pd.isna(index_size)
+                    and tileable_size != index_size
+                ):
+                    raise ValueError(
+                        f"index {index} should have the same shape "
+                        f"with tensor: {tileable_size}"
+                    )
+                index_value = self._process_index(index, tileables)
             else:
-                index = pd.RangeIndex(0, tileables[0].shape[0])
-            self.index = index
-            index_value = parse_index(index)
+                if not tileables or np.isnan(tileables[0].shape[0]):
+                    index = pd.RangeIndex(0)
+                else:
+                    index = pd.RangeIndex(0, tileables[0].shape[0])
+                self.index = index
+                index_value = parse_index(index)
-        if columns is not None:
-            if len(input_1d_tileables) != len(columns):
-                raise ValueError(
-                    f"columns {columns} should have size {len(input_1d_tileables)}"
+            if columns is not None:
+                if len(input_1d_tileables) != len(columns):
+                    raise ValueError(
+                        f"columns {columns} should have size {len(input_1d_tileables)}"
+                    )
+                if not isinstance(columns, pd.Index):
+                    if isinstance(columns, ENTITY_TYPE):
+                        raise NotImplementedError(
+                            "The columns value cannot be a tileable"
+                        )
+                    columns = pd.Index(columns)
+                columns_value = parse_index(columns, store_data=True)
+            else:
+                columns_value = parse_index(
+                    pd.RangeIndex(0, len(input_1d_tileables)), store_data=True
                 )
-            if not isinstance(columns, pd.Index):
-                if isinstance(columns, ENTITY_TYPE):
-                    raise NotImplementedError("The columns value cannot be a tileable")
-                columns = pd.Index(columns)
-            columns_value = parse_index(columns, store_data=True)
-        else:
-            columns_value = parse_index(
-                pd.RangeIndex(0, len(input_1d_tileables)), store_data=True
-            )
-        shape = (shape[0], len(input_1d_tileables))
+            shape = (shape[0] if shape else 0, len(input_1d_tileables))
         return self.new_dataframe(
             tileables,
             shape,
@@ -278,6 +315,9 @@ def dataframe_from_tensor(
     gpu: bool = None,
     sparse: bool = False,
 ):
+    if isinstance(columns, list) and columns and isinstance(columns[0], tuple):
+        columns = pd.MultiIndex.from_tuples(columns)
     if tensor is not None:
         if tensor.ndim > 2 or tensor.ndim <= 0:
             raise TypeError(
@@ -299,6 +339,8 @@ def dataframe_from_tensor(
             dtypes = pd.Series([], index=pd.Index([], dtype=object))
     if index is not None and not isinstance(index, ENTITY_TYPE):
         index = pd.Index(index)
+        if isinstance(index[0], tuple):
+            index = pd.MultiIndex.from_tuples(index)
     op = DataFrameFromTensor(
         input=tensor, index=index, columns=columns, gpu=gpu, sparse=sparse
     )
@@ -311,7 +353,10 @@ def dataframe_from_1d_tileables(
     columns: Union[pd.Index, list] = None,
     gpu: bool = None,
     sparse: bool = False,
+    axis: int = 1,
 ):
+    from pandas.core.dtypes.cast import find_common_type
     data = dict()
     for k, v in d.items():
         if isinstance(v, (list, tuple)) and any(
@@ -322,9 +367,9 @@ def dataframe_from_1d_tileables(
             data[k] = v
     d = data
     if columns is not None:
-        tileables = [d.get(c) for c in columns]
+        tileables = [d.get(c) for c in columns] if axis == 1 else list(d.values())
     else:
-        columns = list(d.keys())
+        columns = list(d.keys()) if axis == 1 else None
         tileables = list(d.values())
     gpu = (
@@ -332,14 +377,37 @@ def dataframe_from_1d_tileables(
         if gpu is None
         else gpu
     )
-    dtypes = pd.Series(
-        [t.dtype if hasattr(t, "dtype") else pd.Series(t).dtype for t in tileables],
-        index=columns,
-    )
+    if axis == 0:
+        col_num = (
+            tileables[0].shape[0]
+            if hasattr(tileables[0], "shape")
+            else len(tileables[0])
+        )
+        if pd.isna(col_num):
+            dtypes = None
+        else:
+            common_dtype = find_common_type(
+                [
+                    t.dtype if hasattr(t, "dtype") else pd.Series(t).dtype
+                    for t in tileables
+                ]
+            )
+            dtypes = pd.Series(
+                [common_dtype] * col_num,
+                index=columns if columns is not None else pd.RangeIndex(col_num),
+            )
+    else:
+        dtypes = pd.Series(
+            [t.dtype if hasattr(t, "dtype") else pd.Series(t).dtype for t in tileables],
+            index=columns,
+        )
     if index is not None and not isinstance(index, ENTITY_TYPE):
         index = pd.Index(index)
     op = DataFrameFromTensor(
-        input=d, index=index, columns=columns, gpu=gpu, sparse=sparse
+        input=d, index=index, columns=columns, gpu=gpu, sparse=sparse, axis=axis
     )
     return op(d, index, columns, dtypes)

maxframe/dataframe/datasource/read_csv.py CHANGED Viewed

@@ -441,13 +441,12 @@ def read_csv(
     Examples
     --------
     >>> import maxframe.dataframe as md
-    >>> from maxframe.lib.filesystem.oss import build_oss_path
     >>> md.read_csv('data.csv')  # doctest: +SKIP
     >>> # read from HDFS
     >>> md.read_csv('hdfs://localhost:8020/test.csv')  # doctest: +SKIP
     >>> # read from OSS
-    >>> auth_path = build_oss_path(file_path, access_key_id, access_key_secret, end_point)
-    >>> md.read_csv(auth_path)
+    >>> md.read_csv('oss://oss-cn-hangzhou-internal.aliyuncs.com/bucket/test.csv',
+    >>>             storage_options={'role_arn': 'acs:ram::xxxxxx:role/aliyunodpsdefaultrole'})
     """
     # infer dtypes and columns
     if isinstance(path, (list, tuple)):

maxframe/dataframe/datasource/tests/test_datasource.py CHANGED Viewed

@@ -184,6 +184,23 @@ def test_from_tensor():
     df = dataframe_from_1d_tileables(d)
     pd.testing.assert_index_equal(df.columns_value.to_pandas(), pd.RangeIndex(2))
+    # test axis parameter for dataframe_from_1d_tileables
+    d = OrderedDict(
+        [("a", mt.tensor(np.random.rand(4))), ("b", mt.tensor(np.random.rand(4)))]
+    )
+    # axis=1 (default behavior) - keys become columns
+    df = dataframe_from_1d_tileables(d, axis=1)
+    assert df.shape == (4, 2)
+    pd.testing.assert_index_equal(df.columns_value.to_pandas(), pd.Index(["a", "b"]))
+    pd.testing.assert_index_equal(df.index_value.to_pandas(), pd.RangeIndex(4))
+    # axis=0 - keys become index (rows)
+    df = dataframe_from_1d_tileables(d, axis=0)
+    assert df.shape == (2, 4)
+    pd.testing.assert_index_equal(df.index_value.to_pandas(), pd.Index(["a", "b"]))
+    pd.testing.assert_index_equal(df.columns_value.to_pandas(), pd.RangeIndex(4))
     series = series_from_tensor(mt.random.rand(4))
     pd.testing.assert_index_equal(series.index_value.to_pandas(), pd.RangeIndex(4))
@@ -207,6 +224,26 @@ def test_from_tensor():
     with pytest.raises(ValueError):
         dataframe_from_tensor(mt.random.rand(4, 3), columns=["a", "b"])
+    # 1-d tensors should have same shape
+    with pytest.raises(ValueError):
+        dataframe_from_1d_tileables(
+            OrderedDict(
+                [(0, mt.tensor(np.random.rand(3))), (1, mt.tensor(np.random.rand(2)))]
+            )
+        )
+    # index has wrong shape
+    with pytest.raises(ValueError):
+        dataframe_from_1d_tileables(
+            {0: mt.tensor(np.random.rand(3))}, index=mt.tensor(np.random.rand(2))
+        )
+    # columns have wrong shape
+    with pytest.raises(ValueError):
+        dataframe_from_1d_tileables(
+            {0: mt.tensor(np.random.rand(3))}, columns=["a", "b"]
+        )
     # index should be 1-d
     with pytest.raises(ValueError):
         dataframe_from_tensor(

maxframe/dataframe/datastore/__init__.py CHANGED Viewed

@@ -12,14 +12,18 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
+from .to_csv import to_csv
 from .to_odps import to_odps_table
 def _install():
-    from ..core import DATAFRAME_TYPE
+    from ..core import DATAFRAME_TYPE, SERIES_TYPE
     for t in DATAFRAME_TYPE:
+        t.to_csv = to_csv
         t.to_odps_table = to_odps_table
+    for t in SERIES_TYPE:
+        t.to_csv = to_csv
 _install()

maxframe/dataframe/datastore/to_csv.py CHANGED Viewed

@@ -12,17 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from typing import List
 from ... import opcodes
-from ...core import EntityData
 from ...serialization.serializables import (
     AnyField,
     BoolField,
     DictField,
     Int32Field,
     Int64Field,
-    KeyField,
     ListField,
     StringField,
 )
@@ -33,27 +29,26 @@ from .core import DataFrameDataStore
 class DataFrameToCSV(DataFrameDataStore):
     _op_type_ = opcodes.TO_CSV
-    input = KeyField("input")
-    path = AnyField("path")
-    sep = StringField("sep")
-    na_rep = StringField("na_rep")
-    float_format = StringField("float_format")
-    columns = ListField("columns")
-    header = AnyField("header")
-    index = BoolField("index")
-    index_label = AnyField("index_label")
-    mode = StringField("mode")
-    encoding = StringField("encoding")
-    compression = AnyField("compression")
-    quoting = Int32Field("quoting")
-    quotechar = StringField("quotechar")
-    line_terminator = StringField("line_terminator")
-    chunksize = Int64Field("chunksize")
-    date_format = StringField("date_format")
-    doublequote = BoolField("doublequote")
-    escapechar = StringField("escapechar")
-    decimal = StringField("decimal")
-    storage_options = DictField("storage_options")
+    path = AnyField("path", default=None)
+    sep = StringField("sep", default=None)
+    na_rep = StringField("na_rep", default=None)
+    float_format = StringField("float_format", default=None)
+    columns = ListField("columns", default=None)
+    header = AnyField("header", default=None)
+    index = BoolField("index", default=None)
+    index_label = AnyField("index_label", default=None)
+    mode = StringField("mode", default=None)
+    encoding = StringField("encoding", default=None)
+    compression = AnyField("compression", default=None)
+    quoting = Int32Field("quoting", default=None)
+    quotechar = StringField("quotechar", default=None)
+    line_terminator = StringField("line_terminator", default=None)
+    chunksize = Int64Field("chunksize", default=None)
+    date_format = StringField("date_format", default=None)
+    doublequote = BoolField("doublequote", default=None)
+    escapechar = StringField("escapechar", default=None)
+    decimal = StringField("decimal", default=None)
+    storage_options = DictField("storage_options", default=None)
     def __init__(self, output_types=None, **kw):
         super().__init__(_output_types=output_types, **kw)
@@ -63,19 +58,6 @@ class DataFrameToCSV(DataFrameDataStore):
         # if wildcard in path, write csv into multiple files
         return "*" not in self.path
-    @property
-    def output_stat(self):
-        return self.output_stat
-    @property
-    def output_limit(self):
-        return 1 if not self.output_stat else 2
-    @classmethod
-    def _set_inputs(cls, op: "DataFrameToCSV", inputs: List[EntityData]):
-        super()._set_inputs(op, inputs)
-        op._input = op._inputs[0]
     def __call__(self, df):
         index_value = parse_index(df.index_value.to_pandas()[:0], df)
         if df.ndim == 2:
@@ -110,13 +92,14 @@ def to_csv(
     compression="infer",
     quoting=None,
     quotechar='"',
-    line_terminator=None,
+    lineterminator=None,
     chunksize=None,
     date_format=None,
     doublequote=True,
     escapechar=None,
     decimal=".",
     storage_options=None,
+    **kw,
 ):
     r"""
     Write object to a comma-separated values (csv) file.
@@ -169,7 +152,7 @@ def to_csv(
         will treat them as non-numeric.
     quotechar : str, default '\"'
         String of length 1. Character used to quote fields.
-    line_terminator : str, optional
+    lineterminator : str, optional
         The newline character or character sequence to use in the output
         file. Defaults to `os.linesep`, which depends on the OS in which
         this method is called ('\n' for linux, '\r\n' for Windows, i.e.).
@@ -203,6 +186,11 @@ def to_csv(
     ...                    'weapon': ['sai', 'bo staff']})
     >>> df.to_csv('out.csv', index=False).execute()
     """
+    lineterminator = lineterminator or kw.pop("line_terminator", None)
+    if kw:
+        raise TypeError(
+            f"to_csv() got an unexpected keyword argument '{next(iter(kw))}'"
+        )
     if mode != "w":  # pragma: no cover
         raise NotImplementedError("only support to_csv with mode 'w' for now")
@@ -220,7 +208,7 @@ def to_csv(
         compression=compression,
         quoting=quoting,
         quotechar=quotechar,
-        line_terminator=line_terminator,
+        line_terminator=lineterminator,
         chunksize=chunksize,
         date_format=date_format,
         doublequote=doublequote,

maxframe/dataframe/datastore/to_odps.py CHANGED Viewed

@@ -56,6 +56,7 @@ class DataFrameToODPSTable(DataFrameDataStore):
     index_label = ListField("index_label", FieldTypes.string, default=None)
     lifecycle = Int64Field("lifecycle", default=None)
     table_properties = DictField("table_properties", default=None)
+    primary_key = ListField("primary_key", FieldTypes.string, default=None)
     def __init__(self, **kw):
         super().__init__(_output_types=[OutputType.dataframe], **kw)
@@ -100,11 +101,12 @@ def to_odps_table(
     partition: Optional[str] = None,
     partition_col: Union[None, str, List[str]] = None,
     overwrite: bool = False,
-    unknown_as_string: Optional[bool] = None,
+    unknown_as_string: Optional[bool] = True,
     index: bool = True,
     index_label: Union[None, str, List[str]] = None,
     lifecycle: Optional[int] = None,
     table_properties: Optional[dict] = None,
+    primary_key: Union[None, str, List[str]] = None,
 ):
     """
     Write DataFrame object into a MaxCompute (ODPS) table.
@@ -145,6 +147,10 @@ def to_odps_table(
         Specify lifecycle of the output table.
     table_properties: Optional[dict]
         Specify properties of the output table.
+    primary_key: Union[None, str, List[str]]
+        If provided and target table does not exist, target table
+        will be a delta table with columns specified in this argument
+        as primary key.
     Returns
     -------
@@ -201,12 +207,14 @@ def to_odps_table(
         index_table_intersect = index_cols & table_cols
         if index_table_intersect:
             raise ValueError(
-                f"Index column(s) {index_table_intersect} conflict with column(s) of the input dataframe."
+                f"Index column(s) {index_table_intersect} conflict with "
+                f"column(s) of the input dataframe."
             )
         index_partition_intersect = index_cols & partition_col_set
         if index_partition_intersect:
             raise ValueError(
-                f"Index column(s) {index_partition_intersect} conflict with partition column(s)."
+                f"Index column(s) {index_partition_intersect} conflict "
+                f"with partition column(s)."
             )
     if partition_col:
@@ -217,6 +225,23 @@ def to_odps_table(
                 " is not the data column(s) of the input dataframe."
             )
+    table_properties = table_properties or {}
+    if primary_key is not None:
+        table_properties["transactional"] = "true"
+    if odps_entry.exist_table(table):
+        table_obj = odps_entry.get_table(table)
+        if table_obj.is_transactional:
+            table_properties = table_properties or {}
+            table_properties["transactional"] = "true"
+            primary_key = primary_key or table_obj.primary_key or ()
+            if set(primary_key) != set(table_obj.primary_key or ()):
+                raise ValueError(
+                    f"Primary keys between existing table {table} and "
+                    f"provided arguments are not same."
+                )
+    if primary_key and not isinstance(primary_key, (list, tuple)):
+        primary_key = [primary_key]
     op = DataFrameToODPSTable(
         dtypes=df.dtypes,
         table_name=table,
@@ -227,6 +252,7 @@ def to_odps_table(
         index=index,
         index_label=index_label,
         lifecycle=lifecycle or options.session.table_lifecycle,
-        table_properties=table_properties,
+        table_properties=table_properties or None,
+        primary_key=primary_key or None,
     )
     return op(df)

maxframe/dataframe/extensions/__init__.py CHANGED Viewed

@@ -24,20 +24,36 @@ from .apply_chunk import (
     df_apply_chunk,
     series_apply_chunk,
 )
+from .cartesian_chunk import cartesian_chunk
+from .collect_kv import collect_kv
+from .extract_kv import extract_kv
 from .flatjson import series_flatjson
 from .flatmap import df_flatmap, series_flatmap
+from .map_reduce import map_reduce
+from .rebalance import DataFrameRebalance, rebalance
 from .reshuffle import DataFrameReshuffle, df_reshuffle
 def _install():
     from ..core import DATAFRAME_TYPE, INDEX_TYPE, SERIES_TYPE
-    DataFrameMaxFrameAccessor._register("reshuffle", df_reshuffle)
-    DataFrameMaxFrameAccessor._register("flatmap", df_flatmap)
     DataFrameMaxFrameAccessor._register("apply_chunk", df_apply_chunk)
-    SeriesMaxFrameAccessor._register("flatmap", series_flatmap)
-    SeriesMaxFrameAccessor._register("flatjson", series_flatjson)
+    DataFrameMaxFrameAccessor._register("cartesian_chunk", cartesian_chunk)
+    DataFrameMaxFrameAccessor._register("collect_kv", collect_kv)
+    DataFrameMaxFrameAccessor._register("extract_kv", extract_kv)
+    DataFrameMaxFrameAccessor._register("flatmap", df_flatmap)
+    DataFrameMaxFrameAccessor._register("map_reduce", map_reduce)
+    DataFrameMaxFrameAccessor._register("rebalance", rebalance)
+    DataFrameMaxFrameAccessor._register("reshuffle", df_reshuffle)
     SeriesMaxFrameAccessor._register("apply_chunk", series_apply_chunk)
+    SeriesMaxFrameAccessor._register("cartesian_chunk", cartesian_chunk)
+    SeriesMaxFrameAccessor._register("extract_kv", extract_kv)
+    SeriesMaxFrameAccessor._register("flatjson", series_flatjson)
+    SeriesMaxFrameAccessor._register("flatmap", series_flatmap)
+    SeriesMaxFrameAccessor._register("rebalance", rebalance)
+    IndexMaxFrameAccessor._register("rebalance", rebalance)
     if DataFrameMaxFrameAccessor._api_count:
         for t in DATAFRAME_TYPE: