PyPI - maxframe - Versions diffs - 2.0.0b1__cp38-cp38-macosx_10_9_universal2.whl → 2.2.0__cp38-cp38-macosx_10_9_universal2.whl - Mend

maxframe 2.0.0b1__cp38-cp38-macosx_10_9_universal2.whl → 2.2.0__cp38-cp38-macosx_10_9_universal2.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of maxframe might be problematic. Click here for more details.

Files changed (395) hide show

maxframe/__init__.py +1 -0
maxframe/_utils.cpython-38-darwin.so +0 -0
maxframe/_utils.pyx +14 -1
maxframe/codegen/core.py +6 -6
maxframe/codegen/spe/core.py +1 -1
maxframe/codegen/spe/dataframe/__init__.py +1 -0
maxframe/codegen/spe/dataframe/accessors/base.py +18 -0
maxframe/codegen/spe/dataframe/accessors/dict_.py +25 -130
maxframe/codegen/spe/dataframe/accessors/list_.py +12 -48
maxframe/codegen/spe/dataframe/accessors/struct_.py +28 -0
maxframe/codegen/spe/dataframe/arithmetic.py +7 -2
maxframe/codegen/spe/dataframe/groupby.py +88 -0
maxframe/codegen/spe/dataframe/indexing.py +99 -4
maxframe/codegen/spe/dataframe/merge.py +34 -1
maxframe/codegen/spe/dataframe/misc.py +9 -33
maxframe/codegen/spe/dataframe/reduction.py +14 -9
maxframe/codegen/spe/dataframe/reshape.py +46 -0
maxframe/codegen/spe/dataframe/sort.py +30 -17
maxframe/codegen/spe/dataframe/tests/accessors/test_dict.py +9 -15
maxframe/codegen/spe/dataframe/tests/accessors/test_list.py +4 -7
maxframe/codegen/spe/dataframe/tests/accessors/test_struct.py +75 -0
maxframe/codegen/spe/dataframe/tests/indexing/test_iloc.py +20 -1
maxframe/codegen/spe/dataframe/tests/indexing/test_loc.py +35 -0
maxframe/codegen/spe/dataframe/tests/misc/test_misc.py +0 -32
maxframe/codegen/spe/dataframe/tests/test_groupby.py +81 -18
maxframe/codegen/spe/dataframe/tests/test_merge.py +27 -1
maxframe/codegen/spe/dataframe/tests/test_reshape.py +79 -0
maxframe/codegen/spe/dataframe/tests/test_sort.py +20 -0
maxframe/codegen/spe/learn/contrib/tests/test_xgboost.py +2 -1
maxframe/codegen/spe/learn/metrics/__init__.py +1 -1
maxframe/codegen/spe/learn/metrics/_ranking.py +76 -0
maxframe/codegen/spe/learn/metrics/pairwise.py +51 -0
maxframe/codegen/spe/learn/metrics/tests/test_pairwise.py +36 -0
maxframe/codegen/spe/learn/metrics/tests/test_ranking.py +59 -0
maxframe/codegen/spe/tensor/__init__.py +3 -0
maxframe/codegen/spe/tensor/fft.py +74 -0
maxframe/codegen/spe/tensor/linalg.py +29 -2
maxframe/codegen/spe/tensor/misc.py +79 -25
maxframe/codegen/spe/tensor/spatial.py +45 -0
maxframe/codegen/spe/tensor/statistics.py +44 -0
maxframe/codegen/spe/tensor/tests/test_fft.py +64 -0
maxframe/codegen/spe/tensor/tests/test_linalg.py +15 -1
maxframe/codegen/spe/tensor/tests/test_misc.py +52 -2
maxframe/codegen/spe/tensor/tests/test_spatial.py +33 -0
maxframe/codegen/spe/tensor/tests/test_statistics.py +15 -1
maxframe/codegen/spe/tests/test_spe_codegen.py +6 -12
maxframe/codegen/spe/utils.py +2 -0
maxframe/config/config.py +70 -9
maxframe/config/tests/test_validators.py +13 -1
maxframe/config/validators.py +49 -0
maxframe/conftest.py +44 -17
maxframe/core/accessor.py +2 -2
maxframe/core/entity/core.py +5 -0
maxframe/core/entity/tileables.py +1 -1
maxframe/core/graph/core.cpython-38-darwin.so +0 -0
maxframe/core/graph/entity.py +1 -2
maxframe/core/operator/base.py +9 -2
maxframe/core/operator/core.py +10 -2
maxframe/core/operator/utils.py +13 -0
maxframe/dataframe/__init__.py +10 -3
maxframe/dataframe/accessors/__init__.py +1 -1
maxframe/dataframe/accessors/compat.py +45 -0
maxframe/dataframe/accessors/datetime_/__init__.py +4 -1
maxframe/dataframe/accessors/dict_/contains.py +7 -16
maxframe/dataframe/accessors/dict_/core.py +48 -0
maxframe/dataframe/accessors/dict_/getitem.py +17 -21
maxframe/dataframe/accessors/dict_/length.py +7 -16
maxframe/dataframe/accessors/dict_/remove.py +6 -18
maxframe/dataframe/accessors/dict_/setitem.py +8 -18
maxframe/dataframe/accessors/dict_/tests/test_dict_accessor.py +62 -22
maxframe/dataframe/accessors/list_/__init__.py +2 -2
maxframe/dataframe/accessors/list_/core.py +48 -0
maxframe/dataframe/accessors/list_/getitem.py +12 -19
maxframe/dataframe/accessors/list_/length.py +7 -16
maxframe/dataframe/accessors/list_/tests/test_list_accessor.py +11 -9
maxframe/dataframe/accessors/string_/__init__.py +4 -1
maxframe/dataframe/accessors/struct_/__init__.py +37 -0
maxframe/dataframe/accessors/struct_/accessor.py +39 -0
maxframe/dataframe/accessors/struct_/core.py +43 -0
maxframe/dataframe/accessors/struct_/dtypes.py +53 -0
maxframe/dataframe/accessors/struct_/field.py +123 -0
maxframe/dataframe/accessors/struct_/tests/__init__.py +13 -0
maxframe/dataframe/accessors/struct_/tests/test_struct_accessor.py +91 -0
maxframe/dataframe/arithmetic/__init__.py +14 -4
maxframe/dataframe/arithmetic/between.py +106 -0
maxframe/dataframe/arithmetic/dot.py +237 -0
maxframe/dataframe/arithmetic/{around.py → round.py} +11 -7
maxframe/dataframe/core.py +63 -118
maxframe/dataframe/datasource/__init__.py +18 -0
maxframe/dataframe/datasource/from_dict.py +124 -0
maxframe/dataframe/datasource/from_index.py +1 -1
maxframe/dataframe/datasource/from_records.py +77 -0
maxframe/dataframe/datasource/from_tensor.py +109 -41
maxframe/dataframe/datasource/read_csv.py +2 -3
maxframe/dataframe/datasource/read_odps_query.py +76 -16
maxframe/dataframe/datasource/tests/test_datasource.py +84 -1
maxframe/dataframe/datastore/__init__.py +5 -1
maxframe/dataframe/datastore/to_csv.py +29 -41
maxframe/dataframe/datastore/to_odps.py +30 -4
maxframe/dataframe/extensions/__init__.py +20 -4
maxframe/dataframe/extensions/apply_chunk.py +32 -6
maxframe/dataframe/extensions/cartesian_chunk.py +153 -0
maxframe/dataframe/extensions/collect_kv.py +126 -0
maxframe/dataframe/extensions/extract_kv.py +177 -0
maxframe/dataframe/extensions/map_reduce.py +263 -0
maxframe/dataframe/extensions/rebalance.py +62 -0
maxframe/dataframe/extensions/tests/test_apply_chunk.py +9 -2
maxframe/dataframe/extensions/tests/test_extensions.py +54 -0
maxframe/dataframe/extensions/tests/test_map_reduce.py +135 -0
maxframe/dataframe/groupby/__init__.py +12 -1
maxframe/dataframe/groupby/aggregation.py +78 -45
maxframe/dataframe/groupby/apply.py +1 -1
maxframe/dataframe/groupby/apply_chunk.py +18 -2
maxframe/dataframe/groupby/core.py +96 -12
maxframe/dataframe/groupby/cum.py +4 -25
maxframe/dataframe/groupby/expanding.py +264 -0
maxframe/dataframe/groupby/fill.py +1 -1
maxframe/dataframe/groupby/getitem.py +12 -5
maxframe/dataframe/groupby/head.py +11 -1
maxframe/dataframe/groupby/rank.py +136 -0
maxframe/dataframe/groupby/rolling.py +206 -0
maxframe/dataframe/groupby/shift.py +114 -0
maxframe/dataframe/groupby/tests/test_groupby.py +0 -5
maxframe/dataframe/indexing/__init__.py +20 -1
maxframe/dataframe/indexing/droplevel.py +195 -0
maxframe/dataframe/indexing/filter.py +169 -0
maxframe/dataframe/indexing/get_level_values.py +76 -0
maxframe/dataframe/indexing/iat.py +45 -0
maxframe/dataframe/indexing/iloc.py +152 -12
maxframe/dataframe/indexing/insert.py +1 -1
maxframe/dataframe/indexing/loc.py +287 -7
maxframe/dataframe/indexing/reindex.py +14 -5
maxframe/dataframe/indexing/rename.py +6 -0
maxframe/dataframe/indexing/rename_axis.py +2 -2
maxframe/dataframe/indexing/reorder_levels.py +143 -0
maxframe/dataframe/indexing/reset_index.py +33 -6
maxframe/dataframe/indexing/sample.py +8 -0
maxframe/dataframe/indexing/setitem.py +3 -3
maxframe/dataframe/indexing/swaplevel.py +185 -0
maxframe/dataframe/indexing/take.py +99 -0
maxframe/dataframe/indexing/truncate.py +140 -0
maxframe/dataframe/indexing/where.py +0 -11
maxframe/dataframe/indexing/xs.py +148 -0
maxframe/dataframe/merge/__init__.py +12 -1
maxframe/dataframe/merge/append.py +97 -98
maxframe/dataframe/merge/combine_first.py +120 -0
maxframe/dataframe/merge/compare.py +387 -0
maxframe/dataframe/merge/concat.py +183 -0
maxframe/dataframe/merge/update.py +271 -0
maxframe/dataframe/misc/__init__.py +16 -10
maxframe/dataframe/misc/_duplicate.py +10 -4
maxframe/dataframe/misc/apply.py +1 -1
maxframe/dataframe/misc/check_unique.py +51 -0
maxframe/dataframe/misc/clip.py +145 -0
maxframe/dataframe/misc/describe.py +175 -9
maxframe/dataframe/misc/drop_duplicates.py +2 -2
maxframe/dataframe/misc/duplicated.py +2 -2
maxframe/dataframe/misc/get_dummies.py +5 -1
maxframe/dataframe/misc/isin.py +2 -2
maxframe/dataframe/misc/map.py +94 -0
maxframe/dataframe/misc/tests/test_misc.py +13 -2
maxframe/dataframe/misc/to_numeric.py +3 -0
maxframe/dataframe/misc/transform.py +12 -5
maxframe/dataframe/misc/transpose.py +13 -1
maxframe/dataframe/misc/valid_index.py +115 -0
maxframe/dataframe/misc/value_counts.py +38 -4
maxframe/dataframe/missing/checkna.py +13 -6
maxframe/dataframe/missing/dropna.py +5 -0
maxframe/dataframe/missing/fillna.py +1 -1
maxframe/dataframe/missing/replace.py +7 -4
maxframe/dataframe/reduction/__init__.py +29 -15
maxframe/dataframe/reduction/aggregation.py +38 -9
maxframe/dataframe/reduction/all.py +2 -2
maxframe/dataframe/reduction/any.py +2 -2
maxframe/dataframe/reduction/argmax.py +100 -0
maxframe/dataframe/reduction/argmin.py +100 -0
maxframe/dataframe/reduction/core.py +65 -18
maxframe/dataframe/reduction/count.py +13 -9
maxframe/dataframe/reduction/cov.py +166 -0
maxframe/dataframe/reduction/cummax.py +2 -2
maxframe/dataframe/reduction/cummin.py +2 -2
maxframe/dataframe/reduction/cumprod.py +2 -2
maxframe/dataframe/reduction/cumsum.py +2 -2
maxframe/dataframe/reduction/custom_reduction.py +2 -2
maxframe/dataframe/reduction/idxmax.py +185 -0
maxframe/dataframe/reduction/idxmin.py +185 -0
maxframe/dataframe/reduction/kurtosis.py +37 -30
maxframe/dataframe/reduction/max.py +2 -2
maxframe/dataframe/reduction/mean.py +9 -7
maxframe/dataframe/reduction/median.py +2 -2
maxframe/dataframe/reduction/min.py +2 -2
maxframe/dataframe/reduction/nunique.py +9 -8
maxframe/dataframe/reduction/prod.py +18 -13
maxframe/dataframe/reduction/reduction_size.py +2 -2
maxframe/dataframe/reduction/sem.py +13 -9
maxframe/dataframe/reduction/skew.py +31 -27
maxframe/dataframe/reduction/str_concat.py +10 -7
maxframe/dataframe/reduction/sum.py +18 -14
maxframe/dataframe/reduction/unique.py +20 -3
maxframe/dataframe/reduction/var.py +16 -12
maxframe/dataframe/reshape/__init__.py +38 -0
maxframe/dataframe/{misc → reshape}/pivot.py +1 -0
maxframe/dataframe/{misc → reshape}/pivot_table.py +1 -0
maxframe/dataframe/reshape/unstack.py +114 -0
maxframe/dataframe/sort/__init__.py +8 -0
maxframe/dataframe/sort/argsort.py +62 -0
maxframe/dataframe/sort/core.py +1 -0
maxframe/dataframe/sort/nlargest.py +238 -0
maxframe/dataframe/sort/nsmallest.py +228 -0
maxframe/dataframe/statistics/__init__.py +3 -3
maxframe/dataframe/statistics/corr.py +1 -0
maxframe/dataframe/statistics/quantile.py +2 -2
maxframe/dataframe/tests/test_typing.py +104 -0
maxframe/dataframe/tests/test_utils.py +66 -2
maxframe/dataframe/typing_.py +185 -0
maxframe/dataframe/utils.py +95 -26
maxframe/dataframe/window/aggregation.py +8 -4
maxframe/dataframe/window/core.py +14 -1
maxframe/dataframe/window/ewm.py +1 -3
maxframe/dataframe/window/expanding.py +37 -35
maxframe/dataframe/window/rolling.py +49 -39
maxframe/dataframe/window/tests/test_expanding.py +1 -7
maxframe/dataframe/window/tests/test_rolling.py +1 -1
maxframe/env.py +7 -4
maxframe/errors.py +2 -2
maxframe/io/objects/tests/test_object_io.py +4 -2
maxframe/io/odpsio/schema.py +9 -3
maxframe/io/odpsio/tableio.py +7 -2
maxframe/io/odpsio/tests/test_schema.py +198 -83
maxframe/io/odpsio/tests/test_volumeio.py +4 -15
maxframe/io/odpsio/volumeio.py +23 -8
maxframe/learn/__init__.py +10 -2
maxframe/learn/cluster/__init__.py +15 -0
maxframe/learn/cluster/_kmeans.py +782 -0
maxframe/learn/contrib/llm/core.py +2 -0
maxframe/learn/contrib/xgboost/core.py +87 -1
maxframe/learn/contrib/xgboost/train.py +5 -2
maxframe/learn/core.py +66 -0
maxframe/learn/linear_model/_base.py +58 -1
maxframe/learn/linear_model/_lin_reg.py +1 -1
maxframe/learn/metrics/__init__.py +6 -0
maxframe/learn/metrics/_classification.py +145 -0
maxframe/learn/metrics/_ranking.py +477 -0
maxframe/learn/metrics/_scorer.py +60 -0
maxframe/learn/metrics/pairwise/__init__.py +21 -0
maxframe/learn/metrics/pairwise/core.py +77 -0
maxframe/learn/metrics/pairwise/cosine.py +115 -0
maxframe/learn/metrics/pairwise/euclidean.py +176 -0
maxframe/learn/metrics/pairwise/haversine.py +96 -0
maxframe/learn/metrics/pairwise/manhattan.py +80 -0
maxframe/learn/metrics/pairwise/pairwise.py +127 -0
maxframe/learn/metrics/pairwise/pairwise_distances_topk.py +121 -0
maxframe/learn/metrics/pairwise/rbf_kernel.py +51 -0
maxframe/learn/metrics/tests/__init__.py +13 -0
maxframe/learn/metrics/tests/test_scorer.py +26 -0
maxframe/learn/utils/__init__.py +1 -1
maxframe/learn/utils/checks.py +1 -2
maxframe/learn/utils/core.py +59 -0
maxframe/learn/utils/extmath.py +37 -0
maxframe/learn/utils/odpsio.py +193 -0
maxframe/learn/utils/validation.py +2 -2
maxframe/lib/compat.py +40 -0
maxframe/lib/dtypes_extension/__init__.py +16 -1
maxframe/lib/dtypes_extension/_fake_arrow_dtype.py +604 -0
maxframe/lib/dtypes_extension/blob.py +304 -0
maxframe/lib/dtypes_extension/dtypes.py +40 -0
maxframe/lib/dtypes_extension/tests/test_blob.py +88 -0
maxframe/lib/dtypes_extension/tests/test_dtypes.py +16 -1
maxframe/lib/dtypes_extension/tests/test_fake_arrow_dtype.py +75 -0
maxframe/lib/filesystem/_oss_lib/common.py +122 -50
maxframe/lib/filesystem/_oss_lib/glob.py +1 -1
maxframe/lib/filesystem/_oss_lib/handle.py +21 -25
maxframe/lib/filesystem/base.py +1 -1
maxframe/lib/filesystem/core.py +1 -1
maxframe/lib/filesystem/oss.py +115 -46
maxframe/lib/filesystem/tests/test_oss.py +74 -36
maxframe/lib/mmh3.cpython-38-darwin.so +0 -0
maxframe/lib/wrapped_pickle.py +10 -0
maxframe/opcodes.py +33 -15
maxframe/protocol.py +12 -0
maxframe/serialization/__init__.py +11 -2
maxframe/serialization/arrow.py +38 -13
maxframe/serialization/blob.py +32 -0
maxframe/serialization/core.cpython-38-darwin.so +0 -0
maxframe/serialization/core.pyx +39 -1
maxframe/serialization/exception.py +2 -4
maxframe/serialization/numpy.py +11 -0
maxframe/serialization/pandas.py +46 -9
maxframe/serialization/serializables/core.py +2 -2
maxframe/serialization/tests/test_serial.py +29 -2
maxframe/tensor/__init__.py +38 -8
maxframe/tensor/arithmetic/__init__.py +19 -10
maxframe/tensor/arithmetic/iscomplexobj.py +53 -0
maxframe/tensor/arithmetic/tests/test_arithmetic.py +6 -0
maxframe/tensor/core.py +3 -2
maxframe/tensor/datasource/tests/test_datasource.py +2 -1
maxframe/tensor/extensions/__init__.py +2 -0
maxframe/tensor/extensions/apply_chunk.py +3 -3
maxframe/tensor/extensions/rebalance.py +65 -0
maxframe/tensor/fft/__init__.py +32 -0
maxframe/tensor/fft/core.py +168 -0
maxframe/tensor/fft/fft.py +112 -0
maxframe/tensor/fft/fft2.py +118 -0
maxframe/tensor/fft/fftfreq.py +80 -0
maxframe/tensor/fft/fftn.py +123 -0
maxframe/tensor/fft/fftshift.py +79 -0
maxframe/tensor/fft/hfft.py +112 -0
maxframe/tensor/fft/ifft.py +114 -0
maxframe/tensor/fft/ifft2.py +115 -0
maxframe/tensor/fft/ifftn.py +123 -0
maxframe/tensor/fft/ifftshift.py +73 -0
maxframe/tensor/fft/ihfft.py +93 -0
maxframe/tensor/fft/irfft.py +118 -0
maxframe/tensor/fft/irfft2.py +62 -0
maxframe/tensor/fft/irfftn.py +114 -0
maxframe/tensor/fft/rfft.py +116 -0
maxframe/tensor/fft/rfft2.py +63 -0
maxframe/tensor/fft/rfftfreq.py +87 -0
maxframe/tensor/fft/rfftn.py +113 -0
maxframe/tensor/indexing/fill_diagonal.py +1 -7
maxframe/tensor/linalg/__init__.py +7 -0
maxframe/tensor/linalg/_einsumfunc.py +1025 -0
maxframe/tensor/linalg/cholesky.py +117 -0
maxframe/tensor/linalg/einsum.py +339 -0
maxframe/tensor/linalg/lstsq.py +100 -0
maxframe/tensor/linalg/matrix_norm.py +75 -0
maxframe/tensor/linalg/norm.py +249 -0
maxframe/tensor/linalg/solve.py +72 -0
maxframe/tensor/linalg/solve_triangular.py +2 -2
maxframe/tensor/linalg/vector_norm.py +113 -0
maxframe/tensor/misc/__init__.py +24 -1
maxframe/tensor/misc/argwhere.py +72 -0
maxframe/tensor/misc/array_split.py +46 -0
maxframe/tensor/misc/broadcast_arrays.py +57 -0
maxframe/tensor/misc/copyto.py +130 -0
maxframe/tensor/misc/delete.py +104 -0
maxframe/tensor/misc/dsplit.py +68 -0
maxframe/tensor/misc/ediff1d.py +74 -0
maxframe/tensor/misc/expand_dims.py +85 -0
maxframe/tensor/misc/flip.py +90 -0
maxframe/tensor/misc/fliplr.py +64 -0
maxframe/tensor/misc/flipud.py +68 -0
maxframe/tensor/misc/hsplit.py +85 -0
maxframe/tensor/misc/insert.py +139 -0
maxframe/tensor/misc/moveaxis.py +83 -0
maxframe/tensor/misc/result_type.py +88 -0
maxframe/tensor/misc/roll.py +124 -0
maxframe/tensor/misc/rollaxis.py +77 -0
maxframe/tensor/misc/shape.py +89 -0
maxframe/tensor/misc/split.py +190 -0
maxframe/tensor/misc/tile.py +109 -0
maxframe/tensor/misc/vsplit.py +74 -0
maxframe/tensor/reduction/array_equal.py +2 -1
maxframe/tensor/sort/__init__.py +2 -0
maxframe/tensor/sort/argpartition.py +98 -0
maxframe/tensor/sort/partition.py +228 -0
maxframe/tensor/spatial/__init__.py +15 -0
maxframe/tensor/spatial/distance/__init__.py +17 -0
maxframe/tensor/spatial/distance/cdist.py +421 -0
maxframe/tensor/spatial/distance/pdist.py +398 -0
maxframe/tensor/spatial/distance/squareform.py +153 -0
maxframe/tensor/special/__init__.py +159 -21
maxframe/tensor/special/airy.py +55 -0
maxframe/tensor/special/bessel.py +199 -0
maxframe/tensor/special/core.py +65 -4
maxframe/tensor/special/ellip_func_integrals.py +155 -0
maxframe/tensor/special/ellip_harm.py +55 -0
maxframe/tensor/special/err_fresnel.py +223 -0
maxframe/tensor/special/gamma_funcs.py +303 -0
maxframe/tensor/special/hypergeometric_funcs.py +69 -0
maxframe/tensor/special/info_theory.py +189 -0
maxframe/tensor/special/misc.py +21 -0
maxframe/tensor/statistics/__init__.py +6 -0
maxframe/tensor/statistics/corrcoef.py +77 -0
maxframe/tensor/statistics/cov.py +222 -0
maxframe/tensor/statistics/digitize.py +126 -0
maxframe/tensor/statistics/histogram.py +520 -0
maxframe/tensor/statistics/median.py +85 -0
maxframe/tensor/statistics/ptp.py +89 -0
maxframe/tensor/utils.py +3 -3
maxframe/tests/test_utils.py +43 -1
maxframe/tests/utils.py +3 -13
maxframe/typing_.py +2 -0
maxframe/udf.py +27 -2
maxframe/utils.py +193 -19
{maxframe-2.0.0b1.dist-info → maxframe-2.2.0.dist-info}/METADATA +3 -2
{maxframe-2.0.0b1.dist-info → maxframe-2.2.0.dist-info}/RECORD +395 -240
maxframe_client/fetcher.py +35 -4
maxframe_client/session/odps.py +7 -2
maxframe_client/tests/test_fetcher.py +76 -3
maxframe_client/tests/test_session.py +4 -1
/maxframe/dataframe/{misc → reshape}/melt.py +0 -0
/maxframe/dataframe/{misc → reshape}/stack.py +0 -0
{maxframe-2.0.0b1.dist-info → maxframe-2.2.0.dist-info}/WHEEL +0 -0
{maxframe-2.0.0b1.dist-info → maxframe-2.2.0.dist-info}/top_level.txt +0 -0

maxframe/dataframe/datasource/from_index.py CHANGED Viewed

@@ -51,7 +51,7 @@ class SeriesFromIndex(DataFrameOperator, DataFrameOperatorMixin):
 def series_from_index(ind, index=None, name=None):
-    name = name or ind.name or 0
+    name = name or ind.name
     if index is not None:
         index = Index(index)
     op = SeriesFromIndex(input_=ind, index=index, name=name)

maxframe/dataframe/datasource/from_records.py CHANGED Viewed

@@ -77,6 +77,83 @@ def from_records(
     sparse=False,
     **kw
 ):
+    """
+    Convert structured or record ndarray to DataFrame.
+    Creates a DataFrame object from a structured ndarray, sequence of
+    tuples or dicts, or DataFrame.
+    Parameters
+    ----------
+    data : structured ndarray, sequence of tuples or dicts, or DataFrame
+        Structured input data.
+        .. deprecated:: 2.1.0
+            Passing a DataFrame is deprecated.
+    index : str, list of fields, array-like
+        Field of array to use as the index, alternately a specific set of
+        input labels to use.
+    exclude : sequence, default None
+        Columns or fields to exclude.
+    columns : sequence, default None
+        Column names to use. If the passed data do not have names
+        associated with them, this argument provides names for the
+        columns. Otherwise this argument indicates the order of the columns
+        in the result (any names not found in the data will become all-NA
+        columns).
+    coerce_float : bool, default False
+        Attempt to convert values of non-string, non-numeric objects (like
+        decimal.Decimal) to floating point, useful for SQL result sets.
+    nrows : int, default None
+        Number of rows to read if data is an iterator.
+    Returns
+    -------
+    DataFrame
+    See Also
+    --------
+    DataFrame.from_dict : DataFrame from dict of array-like or dicts.
+    DataFrame : DataFrame object creation using constructor.
+    Examples
+    --------
+    Data can be provided as a structured ndarray:
+    >>> import maxframe.tensor as mt
+    >>> import maxframe.dataframe as md
+    >>> data = mt.array([(3, 'a'), (2, 'b'), (1, 'c'), (0, 'd')],
+    ...                 dtype=[('col_1', 'i4'), ('col_2', 'U1')])
+    >>> md.DataFrame.from_records(data).execute()
+       col_1 col_2
+    0      3     a
+    1      2     b
+    2      1     c
+    3      0     d
+    Data can be provided as a list of dicts:
+    >>> data = [{'col_1': 3, 'col_2': 'a'},
+    ...         {'col_1': 2, 'col_2': 'b'},
+    ...         {'col_1': 1, 'col_2': 'c'},
+    ...         {'col_1': 0, 'col_2': 'd'}]
+    >>> md.DataFrame.from_records(data).execute()
+       col_1 col_2
+    0      3     a
+    1      2     b
+    2      1     c
+    3      0     d
+    Data can be provided as a list of tuples with corresponding columns:
+    >>> data = [(3, 'a'), (2, 'b'), (1, 'c'), (0, 'd')]
+    >>> md.DataFrame.from_records(data, columns=['col_1', 'col_2']).execute()
+       col_1 col_2
+    0      3     a
+    1      2     b
+    2      1     c
+    3      0     d
+    """
     if isinstance(data, np.ndarray):
         from .dataframe import from_pandas

maxframe/dataframe/datasource/from_tensor.py CHANGED Viewed

@@ -39,6 +39,7 @@ class DataFrameFromTensor(DataFrameOperator, DataFrameOperatorMixin):
     input = AnyField("input")
     index = AnyField("index")
     columns = AnyField("columns")
+    axis = AnyField("axis")
     def __init__(self, *args, **kwargs):
         kwargs["_output_types"] = [OutputType.dataframe]
@@ -120,46 +121,82 @@ class DataFrameFromTensor(DataFrameOperator, DataFrameOperatorMixin):
             if isinstance(tileable, ENTITY_TYPE):
                 tileables.append(tileable)
-        if index is not None:
-            tileable_size = tileables[0].shape[0]
-            if hasattr(index, "shape"):
-                index_size = index.shape[0]
+        if self.axis == 0:
+            if index is not None:
+                raise NotImplementedError("Cannot accept index when axis=0")
             else:
-                index_size = len(index)
-            if (
-                not pd.isna(tileable_size)
-                and not pd.isna(index_size)
-                and tileable_size != index_size
-            ):
-                raise ValueError(
-                    f"index {index} should have the same shape "
-                    f"with tensor: {tileable_size}"
-                )
-            index_value = self._process_index(index, tileables)
+                index = pd.Index(list(input_1d_tileables.keys()))
+                index_value = parse_index(index, store_data=True)
+                self.index = index
+            if columns is not None:
+                tileable_size = tileables[0].shape[0] if tileables else 0
+                if not isinstance(columns, pd.Index):
+                    columns = self.columns = pd.Index(columns)
+                column_size = columns.shape[0]
+                if (
+                    not pd.isna(tileable_size)
+                    and not pd.isna(column_size)
+                    and tileable_size != column_size
+                ):
+                    raise ValueError(
+                        f"columns {columns} should have the same shape "
+                        f"with tensor: {tileable_size}"
+                    )
+                columns_value = self._process_index(columns, tileables)
+            else:
+                if not tileables or np.isnan(tileables[0].shape[0]):
+                    columns = columns_value = None
+                else:
+                    columns = pd.RangeIndex(0, tileables[0].shape[0])
+                    columns_value = parse_index(columns, store_data=True)
+                self.columns = columns
+            shape = (len(input_1d_tileables), shape[0] if shape else 0)
         else:
-            if np.isnan(tileables[0].shape[0]):
-                index = pd.RangeIndex(0)
+            if index is not None:
+                tileable_size = tileables[0].shape[0] if tileables else 0
+                if hasattr(index, "shape"):
+                    index_size = index.shape[0]
+                else:
+                    index_size = len(index)
+                if (
+                    not pd.isna(tileable_size)
+                    and not pd.isna(index_size)
+                    and tileable_size != index_size
+                ):
+                    raise ValueError(
+                        f"index {index} should have the same shape "
+                        f"with tensor: {tileable_size}"
+                    )
+                index_value = self._process_index(index, tileables)
             else:
-                index = pd.RangeIndex(0, tileables[0].shape[0])
-            self.index = index
-            index_value = parse_index(index)
+                if not tileables or np.isnan(tileables[0].shape[0]):
+                    index = pd.RangeIndex(0)
+                else:
+                    index = pd.RangeIndex(0, tileables[0].shape[0])
+                self.index = index
+                index_value = parse_index(index)
-        if columns is not None:
-            if len(input_1d_tileables) != len(columns):
-                raise ValueError(
-                    f"columns {columns} should have size {len(input_1d_tileables)}"
+            if columns is not None:
+                if len(input_1d_tileables) != len(columns):
+                    raise ValueError(
+                        f"columns {columns} should have size {len(input_1d_tileables)}"
+                    )
+                if not isinstance(columns, pd.Index):
+                    if isinstance(columns, ENTITY_TYPE):
+                        raise NotImplementedError(
+                            "The columns value cannot be a tileable"
+                        )
+                    columns = pd.Index(columns)
+                columns_value = parse_index(columns, store_data=True)
+            else:
+                columns_value = parse_index(
+                    pd.RangeIndex(0, len(input_1d_tileables)), store_data=True
                 )
-            if not isinstance(columns, pd.Index):
-                if isinstance(columns, ENTITY_TYPE):
-                    raise NotImplementedError("The columns value cannot be a tileable")
-                columns = pd.Index(columns)
-            columns_value = parse_index(columns, store_data=True)
-        else:
-            columns_value = parse_index(
-                pd.RangeIndex(0, len(input_1d_tileables)), store_data=True
-            )
-        shape = (shape[0], len(input_1d_tileables))
+            shape = (shape[0] if shape else 0, len(input_1d_tileables))
         return self.new_dataframe(
             tileables,
             shape,
@@ -278,6 +315,9 @@ def dataframe_from_tensor(
     gpu: bool = None,
     sparse: bool = False,
 ):
+    if isinstance(columns, list) and columns and isinstance(columns[0], tuple):
+        columns = pd.MultiIndex.from_tuples(columns)
     if tensor is not None:
         if tensor.ndim > 2 or tensor.ndim <= 0:
             raise TypeError(
@@ -299,6 +339,8 @@ def dataframe_from_tensor(
             dtypes = pd.Series([], index=pd.Index([], dtype=object))
     if index is not None and not isinstance(index, ENTITY_TYPE):
         index = pd.Index(index)
+        if isinstance(index[0], tuple):
+            index = pd.MultiIndex.from_tuples(index)
     op = DataFrameFromTensor(
         input=tensor, index=index, columns=columns, gpu=gpu, sparse=sparse
     )
@@ -311,7 +353,10 @@ def dataframe_from_1d_tileables(
     columns: Union[pd.Index, list] = None,
     gpu: bool = None,
     sparse: bool = False,
+    axis: int = 1,
 ):
+    from pandas.core.dtypes.cast import find_common_type
     data = dict()
     for k, v in d.items():
         if isinstance(v, (list, tuple)) and any(
@@ -322,9 +367,9 @@ def dataframe_from_1d_tileables(
             data[k] = v
     d = data
     if columns is not None:
-        tileables = [d.get(c) for c in columns]
+        tileables = [d.get(c) for c in columns] if axis == 1 else list(d.values())
     else:
-        columns = list(d.keys())
+        columns = list(d.keys()) if axis == 1 else None
         tileables = list(d.values())
     gpu = (
@@ -332,14 +377,37 @@ def dataframe_from_1d_tileables(
         if gpu is None
         else gpu
     )
-    dtypes = pd.Series(
-        [t.dtype if hasattr(t, "dtype") else pd.Series(t).dtype for t in tileables],
-        index=columns,
-    )
+    if axis == 0:
+        col_num = (
+            tileables[0].shape[0]
+            if hasattr(tileables[0], "shape")
+            else len(tileables[0])
+        )
+        if pd.isna(col_num):
+            dtypes = None
+        else:
+            common_dtype = find_common_type(
+                [
+                    t.dtype if hasattr(t, "dtype") else pd.Series(t).dtype
+                    for t in tileables
+                ]
+            )
+            dtypes = pd.Series(
+                [common_dtype] * col_num,
+                index=columns if columns is not None else pd.RangeIndex(col_num),
+            )
+    else:
+        dtypes = pd.Series(
+            [t.dtype if hasattr(t, "dtype") else pd.Series(t).dtype for t in tileables],
+            index=columns,
+        )
     if index is not None and not isinstance(index, ENTITY_TYPE):
         index = pd.Index(index)
     op = DataFrameFromTensor(
-        input=d, index=index, columns=columns, gpu=gpu, sparse=sparse
+        input=d, index=index, columns=columns, gpu=gpu, sparse=sparse, axis=axis
     )
     return op(d, index, columns, dtypes)

maxframe/dataframe/datasource/read_csv.py CHANGED Viewed

@@ -441,13 +441,12 @@ def read_csv(
     Examples
     --------
     >>> import maxframe.dataframe as md
-    >>> from maxframe.lib.filesystem.oss import build_oss_path
     >>> md.read_csv('data.csv')  # doctest: +SKIP
     >>> # read from HDFS
     >>> md.read_csv('hdfs://localhost:8020/test.csv')  # doctest: +SKIP
     >>> # read from OSS
-    >>> auth_path = build_oss_path(file_path, access_key_id, access_key_secret, end_point)
-    >>> md.read_csv(auth_path)
+    >>> md.read_csv('oss://oss-cn-hangzhou-internal.aliyuncs.com/bucket/test.csv',
+    >>>             storage_options={'role_arn': 'acs:ram::xxxxxx:role/aliyunodpsdefaultrole'})
     """
     # infer dtypes and columns
     if isinstance(path, (list, tuple)):

maxframe/dataframe/datasource/read_odps_query.py CHANGED Viewed

@@ -13,6 +13,7 @@
 # limitations under the License.
 import dataclasses
+import functools
 import io
 import logging
 import re
@@ -22,6 +23,8 @@ from typing import Dict, List, MutableMapping, Optional, Tuple, Union
 import numpy as np
 import pandas as pd
 from odps import ODPS
+from odps.errors import ODPSError
+from odps.models import TableSchema
 from odps.types import Column, OdpsSchema, validate_data_type
 from odps.utils import split_sql_by_semicolon
@@ -245,13 +248,18 @@ def _parse_explained_schema(explain_string: str) -> OdpsSchema:
         return _parse_full_explain(explain_string)
-def _build_explain_sql(sql_stmt: str, no_split: bool = False) -> str:
+def _build_explain_sql(
+    sql_stmt: str, no_split: bool = False, use_output: bool = False
+) -> str:
+    clause = "EXPLAIN "
+    if use_output:
+        clause += "OUTPUT "
     if no_split:
-        return "EXPLAIN " + sql_stmt
+        return clause + sql_stmt
     sql_parts = split_sql_by_semicolon(sql_stmt)
     if not sql_parts:
         raise ValueError(f"Cannot explain SQL statement {sql_stmt}")
-    sql_parts[-1] = "EXPLAIN " + sql_parts[-1]
+    sql_parts[-1] = clause + sql_parts[-1]
     return "\n".join(sql_parts)
@@ -332,6 +340,62 @@ def _check_token_in_sql(token: str, sql: str) -> bool:
         return False
+def _resolve_schema_by_explain(
+    odps_entry: ODPS,
+    query: str,
+    no_split_sql: bool = False,
+    hints: Dict[str, str] = None,
+    use_explain_output: bool = True,
+) -> OdpsSchema:
+    hints = (hints or dict()).copy()
+    hints["odps.sql.select.output.format"] = "json"
+    explain_stmt = _build_explain_sql(
+        query, no_split=no_split_sql, use_output=use_explain_output
+    )
+    inst = odps_entry.execute_sql(explain_stmt, hints=hints)
+    logger.debug("Explain output instance ID: %s", inst.id)
+    explain_str = list(inst.get_task_results().values())[0]
+    if use_explain_output:
+        if not explain_str or "nothing to explain" in explain_str:
+            raise ValueError("The SQL statement should be an instant query")
+        return TableSchema.parse(None, explain_str)
+    else:
+        return _parse_explained_schema(explain_str)
+def _resolve_query_schema(
+    odps_entry: ODPS,
+    query: str,
+    no_split_sql: bool = False,
+    hints: Dict[str, str] = None,
+    use_explain_output: Optional[bool] = None,
+) -> OdpsSchema:
+    methods = []
+    if use_explain_output is not False:
+        # None or True
+        methods.append(_resolve_schema_by_explain)
+    if use_explain_output is not True:
+        # None or False
+        methods.append(
+            functools.partial(_resolve_schema_by_explain, use_explain_output=False)
+        )
+    for idx, resolve_method in enumerate(methods):
+        try:
+            return resolve_method(
+                odps_entry, query, no_split_sql=no_split_sql, hints=hints
+            )
+        except ODPSError as ex:
+            msg = (
+                f"Failed to obtain schema from SQL explain: {ex!r}\n"
+                f"Explain instance ID: {ex.instance_id}"
+            )
+            if idx + 1 == len(methods) or "ODPS-0130161" not in str(ex):
+                exc = ValueError(msg)
+                raise exc.with_traceback(ex.__traceback__) from None
+    # will this happen?
+    raise ValueError("Failed to obtain schema from SQL explain")  # pragma: no cover
 def read_odps_query(
     query: str,
     odps_entry: ODPS = None,
@@ -371,6 +435,8 @@ def read_odps_query(
         DataFrame read from MaxCompute (ODPS) table
     """
     no_split_sql = kw.pop("no_split_sql", False)
+    # if use_explain_output is None, will try two methods.
+    use_explain_output = kw.pop("use_explain_output", None)
     hints = options.sql.settings.copy() or {}
     if sql_hints:
@@ -395,19 +461,13 @@ def read_odps_query(
     col_renames = {}
     if not skip_schema:
-        explain_stmt = _build_explain_sql(query, no_split=no_split_sql)
-        inst = odps_entry.execute_sql(explain_stmt, hints=hints)
-        logger.debug("Explain instance ID: %s", inst.id)
-        explain_str = list(inst.get_task_results().values())[0]
-        try:
-            odps_schema = _parse_explained_schema(explain_str)
-        except BaseException as ex:
-            exc = ValueError(
-                f"Failed to obtain schema from SQL explain: {ex!r}"
-                f"\nExplain instance ID: {inst.id}"
-            )
-            raise exc.with_traceback(ex.__traceback__) from None
+        odps_schema = _resolve_query_schema(
+            odps_entry,
+            query,
+            no_split_sql=no_split_sql,
+            hints=hints,
+            use_explain_output=use_explain_output,
+        )
         new_columns = []
         for col in odps_schema.columns:

maxframe/dataframe/datasource/tests/test_datasource.py CHANGED Viewed

@@ -17,11 +17,13 @@ import uuid
 from collections import OrderedDict
 from math import isinf
+import mock
 import numpy as np
 import pandas as pd
 import pytest
 from odps import ODPS
 from odps import types as odps_types
+from odps.errors import ODPSError
 from .... import tensor as mt
 from ....core import OutputType
@@ -50,6 +52,7 @@ from ..read_odps_query import (
     ColumnSchema,
     _parse_full_explain,
     _parse_simple_explain,
+    _resolve_query_schema,
     _resolve_task_sector,
 )
 from ..series import from_pandas as from_pandas_series
@@ -181,6 +184,23 @@ def test_from_tensor():
     df = dataframe_from_1d_tileables(d)
     pd.testing.assert_index_equal(df.columns_value.to_pandas(), pd.RangeIndex(2))
+    # test axis parameter for dataframe_from_1d_tileables
+    d = OrderedDict(
+        [("a", mt.tensor(np.random.rand(4))), ("b", mt.tensor(np.random.rand(4)))]
+    )
+    # axis=1 (default behavior) - keys become columns
+    df = dataframe_from_1d_tileables(d, axis=1)
+    assert df.shape == (4, 2)
+    pd.testing.assert_index_equal(df.columns_value.to_pandas(), pd.Index(["a", "b"]))
+    pd.testing.assert_index_equal(df.index_value.to_pandas(), pd.RangeIndex(4))
+    # axis=0 - keys become index (rows)
+    df = dataframe_from_1d_tileables(d, axis=0)
+    assert df.shape == (2, 4)
+    pd.testing.assert_index_equal(df.index_value.to_pandas(), pd.Index(["a", "b"]))
+    pd.testing.assert_index_equal(df.columns_value.to_pandas(), pd.RangeIndex(4))
     series = series_from_tensor(mt.random.rand(4))
     pd.testing.assert_index_equal(series.index_value.to_pandas(), pd.RangeIndex(4))
@@ -204,6 +224,26 @@ def test_from_tensor():
     with pytest.raises(ValueError):
         dataframe_from_tensor(mt.random.rand(4, 3), columns=["a", "b"])
+    # 1-d tensors should have same shape
+    with pytest.raises(ValueError):
+        dataframe_from_1d_tileables(
+            OrderedDict(
+                [(0, mt.tensor(np.random.rand(3))), (1, mt.tensor(np.random.rand(2)))]
+            )
+        )
+    # index has wrong shape
+    with pytest.raises(ValueError):
+        dataframe_from_1d_tileables(
+            {0: mt.tensor(np.random.rand(3))}, index=mt.tensor(np.random.rand(2))
+        )
+    # columns have wrong shape
+    with pytest.raises(ValueError):
+        dataframe_from_1d_tileables(
+            {0: mt.tensor(np.random.rand(3))}, columns=["a", "b"]
+        )
     # index should be 1-d
     with pytest.raises(ValueError):
         dataframe_from_tensor(
@@ -360,7 +400,7 @@ def test_from_odps_query():
     with pytest.raises(ValueError) as err_info:
         read_odps_query(
-            f"CREATE TABLE dummy_table_{uuid.uuid4().hex} "
+            f"CREATE TABLE dummy_table_{uuid.uuid4().hex} LIFECYCLE 1 "
             f"AS SELECT * FROM {table1_name}"
         )
     assert "instant query" in err_info.value.args[0]
@@ -578,3 +618,46 @@ def test_resolve_break_lines():
     for col, (exp_nm, exp_tp) in zip(schema.columns, expected_col_types.items()):
         assert col.name == exp_nm
         assert col.type == odps_types.validate_data_type(exp_tp)
+@pytest.mark.parametrize("use_explain_output", [None, False, True])
+def test_explain_use_explain_output(use_explain_output):
+    class MockInstance:
+        @property
+        def id(self):
+            return "mock_id"
+        def get_task_results(self):
+            return {"pot": """{"columns":[{"name":"a_bigint","type":"BIGINT"}]}"""}
+    old_execute_sql = ODPS.execute_sql
+    exec_count = 0
+    def new_execute_sql(self, sql, *args, **kw):
+        nonlocal exec_count
+        exec_count += 1
+        if use_explain_output and sql.lower().startswith("explain output select"):
+            return MockInstance()
+        elif use_explain_output is None and sql.lower().startswith("explain output"):
+            raise ODPSError("ODPS-0130161: mock error")
+        return old_execute_sql(self, sql, *args, **kw)
+    odps_entry = ODPS.from_environments()
+    with mock.patch("odps.core.ODPS.execute_sql", new=new_execute_sql):
+        with pytest.raises(ValueError):
+            _resolve_query_schema(
+                odps_entry, "not_a_sql", use_explain_output=use_explain_output
+            )
+        assert exec_count == (2 if use_explain_output is None else 1)
+        exec_count = 0
+        schema = _resolve_query_schema(
+            odps_entry,
+            "select cast(1 as bigint) as a_bigint",
+            use_explain_output=use_explain_output,
+        )
+        assert schema.columns[0].name == "a_bigint"
+        assert schema.columns[0].type == odps_types.bigint
+        assert exec_count == (2 if use_explain_output is None else 1)

maxframe/dataframe/datastore/__init__.py CHANGED Viewed

@@ -12,14 +12,18 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
+from .to_csv import to_csv
 from .to_odps import to_odps_table
 def _install():
-    from ..core import DATAFRAME_TYPE
+    from ..core import DATAFRAME_TYPE, SERIES_TYPE
     for t in DATAFRAME_TYPE:
+        t.to_csv = to_csv
         t.to_odps_table = to_odps_table
+    for t in SERIES_TYPE:
+        t.to_csv = to_csv
 _install()