maxframe 2.0.0b2__cp311-cp311-win_amd64.whl → 2.2.0__cp311-cp311-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of maxframe might be problematic. Click here for more details.
- maxframe/__init__.py +1 -0
- maxframe/_utils.cp311-win_amd64.pyd +0 -0
- maxframe/_utils.pyx +14 -1
- maxframe/codegen/core.py +6 -6
- maxframe/codegen/spe/core.py +1 -1
- maxframe/codegen/spe/dataframe/__init__.py +1 -0
- maxframe/codegen/spe/dataframe/accessors/base.py +18 -0
- maxframe/codegen/spe/dataframe/accessors/dict_.py +25 -130
- maxframe/codegen/spe/dataframe/accessors/list_.py +12 -48
- maxframe/codegen/spe/dataframe/accessors/struct_.py +28 -0
- maxframe/codegen/spe/dataframe/arithmetic.py +7 -2
- maxframe/codegen/spe/dataframe/groupby.py +88 -0
- maxframe/codegen/spe/dataframe/indexing.py +99 -4
- maxframe/codegen/spe/dataframe/merge.py +34 -1
- maxframe/codegen/spe/dataframe/misc.py +9 -33
- maxframe/codegen/spe/dataframe/reduction.py +14 -9
- maxframe/codegen/spe/dataframe/reshape.py +46 -0
- maxframe/codegen/spe/dataframe/sort.py +30 -17
- maxframe/codegen/spe/dataframe/tests/accessors/test_dict.py +9 -15
- maxframe/codegen/spe/dataframe/tests/accessors/test_list.py +4 -7
- maxframe/codegen/spe/dataframe/tests/accessors/test_struct.py +75 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_iloc.py +20 -1
- maxframe/codegen/spe/dataframe/tests/indexing/test_loc.py +35 -0
- maxframe/codegen/spe/dataframe/tests/misc/test_misc.py +0 -32
- maxframe/codegen/spe/dataframe/tests/test_groupby.py +81 -18
- maxframe/codegen/spe/dataframe/tests/test_merge.py +27 -1
- maxframe/codegen/spe/dataframe/tests/test_reshape.py +79 -0
- maxframe/codegen/spe/dataframe/tests/test_sort.py +20 -0
- maxframe/codegen/spe/learn/contrib/tests/test_xgboost.py +2 -1
- maxframe/codegen/spe/learn/metrics/__init__.py +1 -1
- maxframe/codegen/spe/learn/metrics/_ranking.py +76 -0
- maxframe/codegen/spe/learn/metrics/pairwise.py +51 -0
- maxframe/codegen/spe/learn/metrics/tests/test_pairwise.py +36 -0
- maxframe/codegen/spe/learn/metrics/tests/test_ranking.py +59 -0
- maxframe/codegen/spe/tensor/__init__.py +3 -0
- maxframe/codegen/spe/tensor/fft.py +74 -0
- maxframe/codegen/spe/tensor/linalg.py +29 -2
- maxframe/codegen/spe/tensor/misc.py +79 -25
- maxframe/codegen/spe/tensor/spatial.py +45 -0
- maxframe/codegen/spe/tensor/statistics.py +44 -0
- maxframe/codegen/spe/tensor/tests/test_fft.py +64 -0
- maxframe/codegen/spe/tensor/tests/test_linalg.py +15 -1
- maxframe/codegen/spe/tensor/tests/test_misc.py +52 -2
- maxframe/codegen/spe/tensor/tests/test_spatial.py +33 -0
- maxframe/codegen/spe/tensor/tests/test_statistics.py +15 -1
- maxframe/codegen/spe/tests/test_spe_codegen.py +6 -12
- maxframe/codegen/spe/utils.py +2 -0
- maxframe/config/config.py +70 -9
- maxframe/config/tests/test_validators.py +13 -1
- maxframe/config/validators.py +49 -0
- maxframe/conftest.py +44 -17
- maxframe/core/accessor.py +2 -2
- maxframe/core/entity/core.py +5 -0
- maxframe/core/entity/tileables.py +1 -1
- maxframe/core/graph/core.cp311-win_amd64.pyd +0 -0
- maxframe/core/graph/entity.py +1 -2
- maxframe/core/operator/base.py +9 -2
- maxframe/core/operator/core.py +10 -2
- maxframe/core/operator/utils.py +13 -0
- maxframe/dataframe/__init__.py +10 -3
- maxframe/dataframe/accessors/__init__.py +1 -1
- maxframe/dataframe/accessors/compat.py +45 -0
- maxframe/dataframe/accessors/datetime_/__init__.py +4 -1
- maxframe/dataframe/accessors/dict_/contains.py +7 -16
- maxframe/dataframe/accessors/dict_/core.py +48 -0
- maxframe/dataframe/accessors/dict_/getitem.py +17 -21
- maxframe/dataframe/accessors/dict_/length.py +7 -16
- maxframe/dataframe/accessors/dict_/remove.py +6 -18
- maxframe/dataframe/accessors/dict_/setitem.py +8 -18
- maxframe/dataframe/accessors/dict_/tests/test_dict_accessor.py +62 -22
- maxframe/dataframe/accessors/list_/__init__.py +2 -2
- maxframe/dataframe/accessors/list_/core.py +48 -0
- maxframe/dataframe/accessors/list_/getitem.py +12 -19
- maxframe/dataframe/accessors/list_/length.py +7 -16
- maxframe/dataframe/accessors/list_/tests/test_list_accessor.py +11 -9
- maxframe/dataframe/accessors/string_/__init__.py +4 -1
- maxframe/dataframe/accessors/struct_/__init__.py +37 -0
- maxframe/dataframe/accessors/struct_/accessor.py +39 -0
- maxframe/dataframe/accessors/struct_/core.py +43 -0
- maxframe/dataframe/accessors/struct_/dtypes.py +53 -0
- maxframe/dataframe/accessors/struct_/field.py +123 -0
- maxframe/dataframe/accessors/struct_/tests/__init__.py +13 -0
- maxframe/dataframe/accessors/struct_/tests/test_struct_accessor.py +91 -0
- maxframe/dataframe/arithmetic/__init__.py +14 -4
- maxframe/dataframe/arithmetic/between.py +106 -0
- maxframe/dataframe/arithmetic/dot.py +237 -0
- maxframe/dataframe/arithmetic/{around.py → round.py} +11 -7
- maxframe/dataframe/core.py +63 -118
- maxframe/dataframe/datasource/__init__.py +18 -0
- maxframe/dataframe/datasource/from_dict.py +124 -0
- maxframe/dataframe/datasource/from_index.py +1 -1
- maxframe/dataframe/datasource/from_records.py +77 -0
- maxframe/dataframe/datasource/from_tensor.py +109 -41
- maxframe/dataframe/datasource/read_csv.py +2 -3
- maxframe/dataframe/datasource/tests/test_datasource.py +37 -0
- maxframe/dataframe/datastore/__init__.py +5 -1
- maxframe/dataframe/datastore/to_csv.py +29 -41
- maxframe/dataframe/datastore/to_odps.py +30 -4
- maxframe/dataframe/extensions/__init__.py +20 -4
- maxframe/dataframe/extensions/apply_chunk.py +32 -6
- maxframe/dataframe/extensions/cartesian_chunk.py +153 -0
- maxframe/dataframe/extensions/collect_kv.py +126 -0
- maxframe/dataframe/extensions/extract_kv.py +177 -0
- maxframe/dataframe/extensions/map_reduce.py +263 -0
- maxframe/dataframe/extensions/rebalance.py +62 -0
- maxframe/dataframe/extensions/tests/test_apply_chunk.py +9 -2
- maxframe/dataframe/extensions/tests/test_extensions.py +54 -0
- maxframe/dataframe/extensions/tests/test_map_reduce.py +135 -0
- maxframe/dataframe/groupby/__init__.py +12 -1
- maxframe/dataframe/groupby/aggregation.py +78 -45
- maxframe/dataframe/groupby/apply.py +1 -1
- maxframe/dataframe/groupby/apply_chunk.py +18 -2
- maxframe/dataframe/groupby/core.py +96 -12
- maxframe/dataframe/groupby/cum.py +4 -25
- maxframe/dataframe/groupby/expanding.py +264 -0
- maxframe/dataframe/groupby/fill.py +1 -1
- maxframe/dataframe/groupby/getitem.py +12 -5
- maxframe/dataframe/groupby/head.py +11 -1
- maxframe/dataframe/groupby/rank.py +136 -0
- maxframe/dataframe/groupby/rolling.py +206 -0
- maxframe/dataframe/groupby/shift.py +114 -0
- maxframe/dataframe/groupby/tests/test_groupby.py +0 -5
- maxframe/dataframe/indexing/__init__.py +20 -1
- maxframe/dataframe/indexing/droplevel.py +195 -0
- maxframe/dataframe/indexing/filter.py +169 -0
- maxframe/dataframe/indexing/get_level_values.py +76 -0
- maxframe/dataframe/indexing/iat.py +45 -0
- maxframe/dataframe/indexing/iloc.py +152 -12
- maxframe/dataframe/indexing/insert.py +1 -1
- maxframe/dataframe/indexing/loc.py +287 -7
- maxframe/dataframe/indexing/reindex.py +14 -5
- maxframe/dataframe/indexing/rename.py +6 -0
- maxframe/dataframe/indexing/rename_axis.py +2 -2
- maxframe/dataframe/indexing/reorder_levels.py +143 -0
- maxframe/dataframe/indexing/reset_index.py +33 -6
- maxframe/dataframe/indexing/sample.py +8 -0
- maxframe/dataframe/indexing/setitem.py +3 -3
- maxframe/dataframe/indexing/swaplevel.py +185 -0
- maxframe/dataframe/indexing/take.py +99 -0
- maxframe/dataframe/indexing/truncate.py +140 -0
- maxframe/dataframe/indexing/where.py +0 -11
- maxframe/dataframe/indexing/xs.py +148 -0
- maxframe/dataframe/merge/__init__.py +12 -1
- maxframe/dataframe/merge/append.py +97 -98
- maxframe/dataframe/merge/combine_first.py +120 -0
- maxframe/dataframe/merge/compare.py +387 -0
- maxframe/dataframe/merge/concat.py +183 -0
- maxframe/dataframe/merge/update.py +271 -0
- maxframe/dataframe/misc/__init__.py +16 -10
- maxframe/dataframe/misc/_duplicate.py +10 -4
- maxframe/dataframe/misc/apply.py +1 -1
- maxframe/dataframe/misc/check_unique.py +51 -0
- maxframe/dataframe/misc/clip.py +145 -0
- maxframe/dataframe/misc/describe.py +175 -9
- maxframe/dataframe/misc/drop_duplicates.py +2 -2
- maxframe/dataframe/misc/duplicated.py +2 -2
- maxframe/dataframe/misc/get_dummies.py +5 -1
- maxframe/dataframe/misc/isin.py +2 -2
- maxframe/dataframe/misc/map.py +94 -0
- maxframe/dataframe/misc/tests/test_misc.py +13 -2
- maxframe/dataframe/misc/to_numeric.py +3 -0
- maxframe/dataframe/misc/transform.py +12 -5
- maxframe/dataframe/misc/transpose.py +13 -1
- maxframe/dataframe/misc/valid_index.py +115 -0
- maxframe/dataframe/misc/value_counts.py +38 -4
- maxframe/dataframe/missing/checkna.py +13 -6
- maxframe/dataframe/missing/dropna.py +5 -0
- maxframe/dataframe/missing/fillna.py +1 -1
- maxframe/dataframe/missing/replace.py +7 -4
- maxframe/dataframe/reduction/__init__.py +29 -15
- maxframe/dataframe/reduction/aggregation.py +38 -9
- maxframe/dataframe/reduction/all.py +2 -2
- maxframe/dataframe/reduction/any.py +2 -2
- maxframe/dataframe/reduction/argmax.py +100 -0
- maxframe/dataframe/reduction/argmin.py +100 -0
- maxframe/dataframe/reduction/core.py +65 -18
- maxframe/dataframe/reduction/count.py +13 -9
- maxframe/dataframe/reduction/cov.py +166 -0
- maxframe/dataframe/reduction/cummax.py +2 -2
- maxframe/dataframe/reduction/cummin.py +2 -2
- maxframe/dataframe/reduction/cumprod.py +2 -2
- maxframe/dataframe/reduction/cumsum.py +2 -2
- maxframe/dataframe/reduction/custom_reduction.py +2 -2
- maxframe/dataframe/reduction/idxmax.py +185 -0
- maxframe/dataframe/reduction/idxmin.py +185 -0
- maxframe/dataframe/reduction/kurtosis.py +37 -30
- maxframe/dataframe/reduction/max.py +2 -2
- maxframe/dataframe/reduction/mean.py +9 -7
- maxframe/dataframe/reduction/median.py +2 -2
- maxframe/dataframe/reduction/min.py +2 -2
- maxframe/dataframe/reduction/nunique.py +9 -8
- maxframe/dataframe/reduction/prod.py +18 -13
- maxframe/dataframe/reduction/reduction_size.py +2 -2
- maxframe/dataframe/reduction/sem.py +13 -9
- maxframe/dataframe/reduction/skew.py +31 -27
- maxframe/dataframe/reduction/str_concat.py +10 -7
- maxframe/dataframe/reduction/sum.py +18 -14
- maxframe/dataframe/reduction/unique.py +20 -3
- maxframe/dataframe/reduction/var.py +16 -12
- maxframe/dataframe/reshape/__init__.py +38 -0
- maxframe/dataframe/{misc → reshape}/pivot.py +1 -0
- maxframe/dataframe/{misc → reshape}/pivot_table.py +1 -0
- maxframe/dataframe/reshape/unstack.py +114 -0
- maxframe/dataframe/sort/__init__.py +8 -0
- maxframe/dataframe/sort/argsort.py +62 -0
- maxframe/dataframe/sort/core.py +1 -0
- maxframe/dataframe/sort/nlargest.py +238 -0
- maxframe/dataframe/sort/nsmallest.py +228 -0
- maxframe/dataframe/statistics/__init__.py +3 -3
- maxframe/dataframe/statistics/corr.py +1 -0
- maxframe/dataframe/statistics/quantile.py +2 -2
- maxframe/dataframe/tests/test_typing.py +104 -0
- maxframe/dataframe/tests/test_utils.py +66 -2
- maxframe/dataframe/typing_.py +185 -0
- maxframe/dataframe/utils.py +95 -26
- maxframe/dataframe/window/aggregation.py +8 -4
- maxframe/dataframe/window/core.py +14 -1
- maxframe/dataframe/window/ewm.py +1 -3
- maxframe/dataframe/window/expanding.py +37 -35
- maxframe/dataframe/window/rolling.py +49 -39
- maxframe/dataframe/window/tests/test_expanding.py +1 -7
- maxframe/dataframe/window/tests/test_rolling.py +1 -1
- maxframe/env.py +7 -4
- maxframe/errors.py +2 -2
- maxframe/io/odpsio/schema.py +9 -3
- maxframe/io/odpsio/tableio.py +7 -2
- maxframe/io/odpsio/tests/test_schema.py +198 -83
- maxframe/learn/__init__.py +10 -2
- maxframe/learn/cluster/__init__.py +15 -0
- maxframe/learn/cluster/_kmeans.py +782 -0
- maxframe/learn/contrib/llm/core.py +2 -0
- maxframe/learn/contrib/xgboost/core.py +86 -1
- maxframe/learn/contrib/xgboost/train.py +5 -2
- maxframe/learn/core.py +66 -0
- maxframe/learn/linear_model/_base.py +58 -1
- maxframe/learn/linear_model/_lin_reg.py +1 -1
- maxframe/learn/metrics/__init__.py +6 -0
- maxframe/learn/metrics/_classification.py +145 -0
- maxframe/learn/metrics/_ranking.py +477 -0
- maxframe/learn/metrics/_scorer.py +60 -0
- maxframe/learn/metrics/pairwise/__init__.py +21 -0
- maxframe/learn/metrics/pairwise/core.py +77 -0
- maxframe/learn/metrics/pairwise/cosine.py +115 -0
- maxframe/learn/metrics/pairwise/euclidean.py +176 -0
- maxframe/learn/metrics/pairwise/haversine.py +96 -0
- maxframe/learn/metrics/pairwise/manhattan.py +80 -0
- maxframe/learn/metrics/pairwise/pairwise.py +127 -0
- maxframe/learn/metrics/pairwise/pairwise_distances_topk.py +121 -0
- maxframe/learn/metrics/pairwise/rbf_kernel.py +51 -0
- maxframe/learn/metrics/tests/__init__.py +13 -0
- maxframe/learn/metrics/tests/test_scorer.py +26 -0
- maxframe/learn/utils/__init__.py +1 -1
- maxframe/learn/utils/checks.py +1 -2
- maxframe/learn/utils/core.py +59 -0
- maxframe/learn/utils/extmath.py +37 -0
- maxframe/learn/utils/odpsio.py +193 -0
- maxframe/learn/utils/validation.py +2 -2
- maxframe/lib/compat.py +40 -0
- maxframe/lib/dtypes_extension/__init__.py +16 -1
- maxframe/lib/dtypes_extension/_fake_arrow_dtype.py +604 -0
- maxframe/lib/dtypes_extension/blob.py +304 -0
- maxframe/lib/dtypes_extension/dtypes.py +40 -0
- maxframe/lib/dtypes_extension/tests/test_blob.py +88 -0
- maxframe/lib/dtypes_extension/tests/test_dtypes.py +16 -1
- maxframe/lib/dtypes_extension/tests/test_fake_arrow_dtype.py +75 -0
- maxframe/lib/filesystem/_oss_lib/common.py +122 -50
- maxframe/lib/filesystem/_oss_lib/glob.py +1 -1
- maxframe/lib/filesystem/_oss_lib/handle.py +21 -25
- maxframe/lib/filesystem/base.py +1 -1
- maxframe/lib/filesystem/core.py +1 -1
- maxframe/lib/filesystem/oss.py +115 -46
- maxframe/lib/filesystem/tests/test_oss.py +74 -36
- maxframe/lib/mmh3.cp311-win_amd64.pyd +0 -0
- maxframe/lib/wrapped_pickle.py +10 -0
- maxframe/opcodes.py +33 -15
- maxframe/protocol.py +12 -0
- maxframe/serialization/__init__.py +11 -2
- maxframe/serialization/arrow.py +38 -13
- maxframe/serialization/blob.py +32 -0
- maxframe/serialization/core.cp311-win_amd64.pyd +0 -0
- maxframe/serialization/core.pyx +39 -1
- maxframe/serialization/exception.py +2 -4
- maxframe/serialization/numpy.py +11 -0
- maxframe/serialization/pandas.py +46 -9
- maxframe/serialization/serializables/core.py +2 -2
- maxframe/serialization/tests/test_serial.py +29 -2
- maxframe/tensor/__init__.py +38 -8
- maxframe/tensor/arithmetic/__init__.py +19 -10
- maxframe/tensor/arithmetic/iscomplexobj.py +53 -0
- maxframe/tensor/arithmetic/tests/test_arithmetic.py +6 -0
- maxframe/tensor/core.py +3 -2
- maxframe/tensor/datasource/tests/test_datasource.py +2 -1
- maxframe/tensor/extensions/__init__.py +2 -0
- maxframe/tensor/extensions/apply_chunk.py +3 -3
- maxframe/tensor/extensions/rebalance.py +65 -0
- maxframe/tensor/fft/__init__.py +32 -0
- maxframe/tensor/fft/core.py +168 -0
- maxframe/tensor/fft/fft.py +112 -0
- maxframe/tensor/fft/fft2.py +118 -0
- maxframe/tensor/fft/fftfreq.py +80 -0
- maxframe/tensor/fft/fftn.py +123 -0
- maxframe/tensor/fft/fftshift.py +79 -0
- maxframe/tensor/fft/hfft.py +112 -0
- maxframe/tensor/fft/ifft.py +114 -0
- maxframe/tensor/fft/ifft2.py +115 -0
- maxframe/tensor/fft/ifftn.py +123 -0
- maxframe/tensor/fft/ifftshift.py +73 -0
- maxframe/tensor/fft/ihfft.py +93 -0
- maxframe/tensor/fft/irfft.py +118 -0
- maxframe/tensor/fft/irfft2.py +62 -0
- maxframe/tensor/fft/irfftn.py +114 -0
- maxframe/tensor/fft/rfft.py +116 -0
- maxframe/tensor/fft/rfft2.py +63 -0
- maxframe/tensor/fft/rfftfreq.py +87 -0
- maxframe/tensor/fft/rfftn.py +113 -0
- maxframe/tensor/indexing/fill_diagonal.py +1 -7
- maxframe/tensor/linalg/__init__.py +7 -0
- maxframe/tensor/linalg/_einsumfunc.py +1025 -0
- maxframe/tensor/linalg/cholesky.py +117 -0
- maxframe/tensor/linalg/einsum.py +339 -0
- maxframe/tensor/linalg/lstsq.py +100 -0
- maxframe/tensor/linalg/matrix_norm.py +75 -0
- maxframe/tensor/linalg/norm.py +249 -0
- maxframe/tensor/linalg/solve.py +72 -0
- maxframe/tensor/linalg/solve_triangular.py +2 -2
- maxframe/tensor/linalg/vector_norm.py +113 -0
- maxframe/tensor/misc/__init__.py +24 -1
- maxframe/tensor/misc/argwhere.py +72 -0
- maxframe/tensor/misc/array_split.py +46 -0
- maxframe/tensor/misc/broadcast_arrays.py +57 -0
- maxframe/tensor/misc/copyto.py +130 -0
- maxframe/tensor/misc/delete.py +104 -0
- maxframe/tensor/misc/dsplit.py +68 -0
- maxframe/tensor/misc/ediff1d.py +74 -0
- maxframe/tensor/misc/expand_dims.py +85 -0
- maxframe/tensor/misc/flip.py +90 -0
- maxframe/tensor/misc/fliplr.py +64 -0
- maxframe/tensor/misc/flipud.py +68 -0
- maxframe/tensor/misc/hsplit.py +85 -0
- maxframe/tensor/misc/insert.py +139 -0
- maxframe/tensor/misc/moveaxis.py +83 -0
- maxframe/tensor/misc/result_type.py +88 -0
- maxframe/tensor/misc/roll.py +124 -0
- maxframe/tensor/misc/rollaxis.py +77 -0
- maxframe/tensor/misc/shape.py +89 -0
- maxframe/tensor/misc/split.py +190 -0
- maxframe/tensor/misc/tile.py +109 -0
- maxframe/tensor/misc/vsplit.py +74 -0
- maxframe/tensor/reduction/array_equal.py +2 -1
- maxframe/tensor/sort/__init__.py +2 -0
- maxframe/tensor/sort/argpartition.py +98 -0
- maxframe/tensor/sort/partition.py +228 -0
- maxframe/tensor/spatial/__init__.py +15 -0
- maxframe/tensor/spatial/distance/__init__.py +17 -0
- maxframe/tensor/spatial/distance/cdist.py +421 -0
- maxframe/tensor/spatial/distance/pdist.py +398 -0
- maxframe/tensor/spatial/distance/squareform.py +153 -0
- maxframe/tensor/special/__init__.py +159 -21
- maxframe/tensor/special/airy.py +55 -0
- maxframe/tensor/special/bessel.py +199 -0
- maxframe/tensor/special/core.py +65 -4
- maxframe/tensor/special/ellip_func_integrals.py +155 -0
- maxframe/tensor/special/ellip_harm.py +55 -0
- maxframe/tensor/special/err_fresnel.py +223 -0
- maxframe/tensor/special/gamma_funcs.py +303 -0
- maxframe/tensor/special/hypergeometric_funcs.py +69 -0
- maxframe/tensor/special/info_theory.py +189 -0
- maxframe/tensor/special/misc.py +21 -0
- maxframe/tensor/statistics/__init__.py +6 -0
- maxframe/tensor/statistics/corrcoef.py +77 -0
- maxframe/tensor/statistics/cov.py +222 -0
- maxframe/tensor/statistics/digitize.py +126 -0
- maxframe/tensor/statistics/histogram.py +520 -0
- maxframe/tensor/statistics/median.py +85 -0
- maxframe/tensor/statistics/ptp.py +89 -0
- maxframe/tensor/utils.py +3 -3
- maxframe/tests/test_utils.py +43 -1
- maxframe/tests/utils.py +0 -2
- maxframe/typing_.py +2 -0
- maxframe/udf.py +27 -2
- maxframe/utils.py +193 -19
- {maxframe-2.0.0b2.dist-info → maxframe-2.2.0.dist-info}/METADATA +3 -2
- {maxframe-2.0.0b2.dist-info → maxframe-2.2.0.dist-info}/RECORD +391 -236
- maxframe_client/fetcher.py +35 -4
- maxframe_client/session/odps.py +7 -2
- maxframe_client/tests/test_fetcher.py +76 -3
- maxframe_client/tests/test_session.py +4 -1
- /maxframe/dataframe/{misc → reshape}/melt.py +0 -0
- /maxframe/dataframe/{misc → reshape}/stack.py +0 -0
- {maxframe-2.0.0b2.dist-info → maxframe-2.2.0.dist-info}/WHEEL +0 -0
- {maxframe-2.0.0b2.dist-info → maxframe-2.2.0.dist-info}/top_level.txt +0 -0
|
@@ -51,7 +51,7 @@ class SeriesFromIndex(DataFrameOperator, DataFrameOperatorMixin):
|
|
|
51
51
|
|
|
52
52
|
|
|
53
53
|
def series_from_index(ind, index=None, name=None):
|
|
54
|
-
name = name or ind.name
|
|
54
|
+
name = name or ind.name
|
|
55
55
|
if index is not None:
|
|
56
56
|
index = Index(index)
|
|
57
57
|
op = SeriesFromIndex(input_=ind, index=index, name=name)
|
|
@@ -77,6 +77,83 @@ def from_records(
|
|
|
77
77
|
sparse=False,
|
|
78
78
|
**kw
|
|
79
79
|
):
|
|
80
|
+
"""
|
|
81
|
+
Convert structured or record ndarray to DataFrame.
|
|
82
|
+
|
|
83
|
+
Creates a DataFrame object from a structured ndarray, sequence of
|
|
84
|
+
tuples or dicts, or DataFrame.
|
|
85
|
+
|
|
86
|
+
Parameters
|
|
87
|
+
----------
|
|
88
|
+
data : structured ndarray, sequence of tuples or dicts, or DataFrame
|
|
89
|
+
Structured input data.
|
|
90
|
+
|
|
91
|
+
.. deprecated:: 2.1.0
|
|
92
|
+
Passing a DataFrame is deprecated.
|
|
93
|
+
index : str, list of fields, array-like
|
|
94
|
+
Field of array to use as the index, alternately a specific set of
|
|
95
|
+
input labels to use.
|
|
96
|
+
exclude : sequence, default None
|
|
97
|
+
Columns or fields to exclude.
|
|
98
|
+
columns : sequence, default None
|
|
99
|
+
Column names to use. If the passed data do not have names
|
|
100
|
+
associated with them, this argument provides names for the
|
|
101
|
+
columns. Otherwise this argument indicates the order of the columns
|
|
102
|
+
in the result (any names not found in the data will become all-NA
|
|
103
|
+
columns).
|
|
104
|
+
coerce_float : bool, default False
|
|
105
|
+
Attempt to convert values of non-string, non-numeric objects (like
|
|
106
|
+
decimal.Decimal) to floating point, useful for SQL result sets.
|
|
107
|
+
nrows : int, default None
|
|
108
|
+
Number of rows to read if data is an iterator.
|
|
109
|
+
|
|
110
|
+
Returns
|
|
111
|
+
-------
|
|
112
|
+
DataFrame
|
|
113
|
+
|
|
114
|
+
See Also
|
|
115
|
+
--------
|
|
116
|
+
DataFrame.from_dict : DataFrame from dict of array-like or dicts.
|
|
117
|
+
DataFrame : DataFrame object creation using constructor.
|
|
118
|
+
|
|
119
|
+
Examples
|
|
120
|
+
--------
|
|
121
|
+
Data can be provided as a structured ndarray:
|
|
122
|
+
|
|
123
|
+
>>> import maxframe.tensor as mt
|
|
124
|
+
>>> import maxframe.dataframe as md
|
|
125
|
+
>>> data = mt.array([(3, 'a'), (2, 'b'), (1, 'c'), (0, 'd')],
|
|
126
|
+
... dtype=[('col_1', 'i4'), ('col_2', 'U1')])
|
|
127
|
+
>>> md.DataFrame.from_records(data).execute()
|
|
128
|
+
col_1 col_2
|
|
129
|
+
0 3 a
|
|
130
|
+
1 2 b
|
|
131
|
+
2 1 c
|
|
132
|
+
3 0 d
|
|
133
|
+
|
|
134
|
+
Data can be provided as a list of dicts:
|
|
135
|
+
|
|
136
|
+
>>> data = [{'col_1': 3, 'col_2': 'a'},
|
|
137
|
+
... {'col_1': 2, 'col_2': 'b'},
|
|
138
|
+
... {'col_1': 1, 'col_2': 'c'},
|
|
139
|
+
... {'col_1': 0, 'col_2': 'd'}]
|
|
140
|
+
>>> md.DataFrame.from_records(data).execute()
|
|
141
|
+
col_1 col_2
|
|
142
|
+
0 3 a
|
|
143
|
+
1 2 b
|
|
144
|
+
2 1 c
|
|
145
|
+
3 0 d
|
|
146
|
+
|
|
147
|
+
Data can be provided as a list of tuples with corresponding columns:
|
|
148
|
+
|
|
149
|
+
>>> data = [(3, 'a'), (2, 'b'), (1, 'c'), (0, 'd')]
|
|
150
|
+
>>> md.DataFrame.from_records(data, columns=['col_1', 'col_2']).execute()
|
|
151
|
+
col_1 col_2
|
|
152
|
+
0 3 a
|
|
153
|
+
1 2 b
|
|
154
|
+
2 1 c
|
|
155
|
+
3 0 d
|
|
156
|
+
"""
|
|
80
157
|
if isinstance(data, np.ndarray):
|
|
81
158
|
from .dataframe import from_pandas
|
|
82
159
|
|
|
@@ -39,6 +39,7 @@ class DataFrameFromTensor(DataFrameOperator, DataFrameOperatorMixin):
|
|
|
39
39
|
input = AnyField("input")
|
|
40
40
|
index = AnyField("index")
|
|
41
41
|
columns = AnyField("columns")
|
|
42
|
+
axis = AnyField("axis")
|
|
42
43
|
|
|
43
44
|
def __init__(self, *args, **kwargs):
|
|
44
45
|
kwargs["_output_types"] = [OutputType.dataframe]
|
|
@@ -120,46 +121,82 @@ class DataFrameFromTensor(DataFrameOperator, DataFrameOperatorMixin):
|
|
|
120
121
|
if isinstance(tileable, ENTITY_TYPE):
|
|
121
122
|
tileables.append(tileable)
|
|
122
123
|
|
|
123
|
-
if
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
index_size = index.shape[0]
|
|
124
|
+
if self.axis == 0:
|
|
125
|
+
if index is not None:
|
|
126
|
+
raise NotImplementedError("Cannot accept index when axis=0")
|
|
127
127
|
else:
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
128
|
+
index = pd.Index(list(input_1d_tileables.keys()))
|
|
129
|
+
index_value = parse_index(index, store_data=True)
|
|
130
|
+
self.index = index
|
|
131
|
+
|
|
132
|
+
if columns is not None:
|
|
133
|
+
tileable_size = tileables[0].shape[0] if tileables else 0
|
|
134
|
+
if not isinstance(columns, pd.Index):
|
|
135
|
+
columns = self.columns = pd.Index(columns)
|
|
136
|
+
column_size = columns.shape[0]
|
|
137
|
+
if (
|
|
138
|
+
not pd.isna(tileable_size)
|
|
139
|
+
and not pd.isna(column_size)
|
|
140
|
+
and tileable_size != column_size
|
|
141
|
+
):
|
|
142
|
+
raise ValueError(
|
|
143
|
+
f"columns {columns} should have the same shape "
|
|
144
|
+
f"with tensor: {tileable_size}"
|
|
145
|
+
)
|
|
146
|
+
columns_value = self._process_index(columns, tileables)
|
|
147
|
+
else:
|
|
148
|
+
if not tileables or np.isnan(tileables[0].shape[0]):
|
|
149
|
+
columns = columns_value = None
|
|
150
|
+
else:
|
|
151
|
+
columns = pd.RangeIndex(0, tileables[0].shape[0])
|
|
152
|
+
columns_value = parse_index(columns, store_data=True)
|
|
153
|
+
self.columns = columns
|
|
154
|
+
|
|
155
|
+
shape = (len(input_1d_tileables), shape[0] if shape else 0)
|
|
139
156
|
else:
|
|
140
|
-
if
|
|
141
|
-
|
|
157
|
+
if index is not None:
|
|
158
|
+
tileable_size = tileables[0].shape[0] if tileables else 0
|
|
159
|
+
if hasattr(index, "shape"):
|
|
160
|
+
index_size = index.shape[0]
|
|
161
|
+
else:
|
|
162
|
+
index_size = len(index)
|
|
163
|
+
if (
|
|
164
|
+
not pd.isna(tileable_size)
|
|
165
|
+
and not pd.isna(index_size)
|
|
166
|
+
and tileable_size != index_size
|
|
167
|
+
):
|
|
168
|
+
raise ValueError(
|
|
169
|
+
f"index {index} should have the same shape "
|
|
170
|
+
f"with tensor: {tileable_size}"
|
|
171
|
+
)
|
|
172
|
+
index_value = self._process_index(index, tileables)
|
|
142
173
|
else:
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
174
|
+
if not tileables or np.isnan(tileables[0].shape[0]):
|
|
175
|
+
index = pd.RangeIndex(0)
|
|
176
|
+
else:
|
|
177
|
+
index = pd.RangeIndex(0, tileables[0].shape[0])
|
|
178
|
+
self.index = index
|
|
179
|
+
index_value = parse_index(index)
|
|
146
180
|
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
181
|
+
if columns is not None:
|
|
182
|
+
if len(input_1d_tileables) != len(columns):
|
|
183
|
+
raise ValueError(
|
|
184
|
+
f"columns {columns} should have size {len(input_1d_tileables)}"
|
|
185
|
+
)
|
|
186
|
+
if not isinstance(columns, pd.Index):
|
|
187
|
+
if isinstance(columns, ENTITY_TYPE):
|
|
188
|
+
raise NotImplementedError(
|
|
189
|
+
"The columns value cannot be a tileable"
|
|
190
|
+
)
|
|
191
|
+
columns = pd.Index(columns)
|
|
192
|
+
columns_value = parse_index(columns, store_data=True)
|
|
193
|
+
else:
|
|
194
|
+
columns_value = parse_index(
|
|
195
|
+
pd.RangeIndex(0, len(input_1d_tileables)), store_data=True
|
|
151
196
|
)
|
|
152
|
-
if not isinstance(columns, pd.Index):
|
|
153
|
-
if isinstance(columns, ENTITY_TYPE):
|
|
154
|
-
raise NotImplementedError("The columns value cannot be a tileable")
|
|
155
|
-
columns = pd.Index(columns)
|
|
156
|
-
columns_value = parse_index(columns, store_data=True)
|
|
157
|
-
else:
|
|
158
|
-
columns_value = parse_index(
|
|
159
|
-
pd.RangeIndex(0, len(input_1d_tileables)), store_data=True
|
|
160
|
-
)
|
|
161
197
|
|
|
162
|
-
|
|
198
|
+
shape = (shape[0] if shape else 0, len(input_1d_tileables))
|
|
199
|
+
|
|
163
200
|
return self.new_dataframe(
|
|
164
201
|
tileables,
|
|
165
202
|
shape,
|
|
@@ -278,6 +315,9 @@ def dataframe_from_tensor(
|
|
|
278
315
|
gpu: bool = None,
|
|
279
316
|
sparse: bool = False,
|
|
280
317
|
):
|
|
318
|
+
if isinstance(columns, list) and columns and isinstance(columns[0], tuple):
|
|
319
|
+
columns = pd.MultiIndex.from_tuples(columns)
|
|
320
|
+
|
|
281
321
|
if tensor is not None:
|
|
282
322
|
if tensor.ndim > 2 or tensor.ndim <= 0:
|
|
283
323
|
raise TypeError(
|
|
@@ -299,6 +339,8 @@ def dataframe_from_tensor(
|
|
|
299
339
|
dtypes = pd.Series([], index=pd.Index([], dtype=object))
|
|
300
340
|
if index is not None and not isinstance(index, ENTITY_TYPE):
|
|
301
341
|
index = pd.Index(index)
|
|
342
|
+
if isinstance(index[0], tuple):
|
|
343
|
+
index = pd.MultiIndex.from_tuples(index)
|
|
302
344
|
op = DataFrameFromTensor(
|
|
303
345
|
input=tensor, index=index, columns=columns, gpu=gpu, sparse=sparse
|
|
304
346
|
)
|
|
@@ -311,7 +353,10 @@ def dataframe_from_1d_tileables(
|
|
|
311
353
|
columns: Union[pd.Index, list] = None,
|
|
312
354
|
gpu: bool = None,
|
|
313
355
|
sparse: bool = False,
|
|
356
|
+
axis: int = 1,
|
|
314
357
|
):
|
|
358
|
+
from pandas.core.dtypes.cast import find_common_type
|
|
359
|
+
|
|
315
360
|
data = dict()
|
|
316
361
|
for k, v in d.items():
|
|
317
362
|
if isinstance(v, (list, tuple)) and any(
|
|
@@ -322,9 +367,9 @@ def dataframe_from_1d_tileables(
|
|
|
322
367
|
data[k] = v
|
|
323
368
|
d = data
|
|
324
369
|
if columns is not None:
|
|
325
|
-
tileables = [d.get(c) for c in columns]
|
|
370
|
+
tileables = [d.get(c) for c in columns] if axis == 1 else list(d.values())
|
|
326
371
|
else:
|
|
327
|
-
columns = list(d.keys())
|
|
372
|
+
columns = list(d.keys()) if axis == 1 else None
|
|
328
373
|
tileables = list(d.values())
|
|
329
374
|
|
|
330
375
|
gpu = (
|
|
@@ -332,14 +377,37 @@ def dataframe_from_1d_tileables(
|
|
|
332
377
|
if gpu is None
|
|
333
378
|
else gpu
|
|
334
379
|
)
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
380
|
+
|
|
381
|
+
if axis == 0:
|
|
382
|
+
col_num = (
|
|
383
|
+
tileables[0].shape[0]
|
|
384
|
+
if hasattr(tileables[0], "shape")
|
|
385
|
+
else len(tileables[0])
|
|
386
|
+
)
|
|
387
|
+
if pd.isna(col_num):
|
|
388
|
+
dtypes = None
|
|
389
|
+
else:
|
|
390
|
+
common_dtype = find_common_type(
|
|
391
|
+
[
|
|
392
|
+
t.dtype if hasattr(t, "dtype") else pd.Series(t).dtype
|
|
393
|
+
for t in tileables
|
|
394
|
+
]
|
|
395
|
+
)
|
|
396
|
+
dtypes = pd.Series(
|
|
397
|
+
[common_dtype] * col_num,
|
|
398
|
+
index=columns if columns is not None else pd.RangeIndex(col_num),
|
|
399
|
+
)
|
|
400
|
+
else:
|
|
401
|
+
dtypes = pd.Series(
|
|
402
|
+
[t.dtype if hasattr(t, "dtype") else pd.Series(t).dtype for t in tileables],
|
|
403
|
+
index=columns,
|
|
404
|
+
)
|
|
405
|
+
|
|
339
406
|
if index is not None and not isinstance(index, ENTITY_TYPE):
|
|
340
407
|
index = pd.Index(index)
|
|
408
|
+
|
|
341
409
|
op = DataFrameFromTensor(
|
|
342
|
-
input=d, index=index, columns=columns, gpu=gpu, sparse=sparse
|
|
410
|
+
input=d, index=index, columns=columns, gpu=gpu, sparse=sparse, axis=axis
|
|
343
411
|
)
|
|
344
412
|
return op(d, index, columns, dtypes)
|
|
345
413
|
|
|
@@ -441,13 +441,12 @@ def read_csv(
|
|
|
441
441
|
Examples
|
|
442
442
|
--------
|
|
443
443
|
>>> import maxframe.dataframe as md
|
|
444
|
-
>>> from maxframe.lib.filesystem.oss import build_oss_path
|
|
445
444
|
>>> md.read_csv('data.csv') # doctest: +SKIP
|
|
446
445
|
>>> # read from HDFS
|
|
447
446
|
>>> md.read_csv('hdfs://localhost:8020/test.csv') # doctest: +SKIP
|
|
448
447
|
>>> # read from OSS
|
|
449
|
-
>>>
|
|
450
|
-
>>>
|
|
448
|
+
>>> md.read_csv('oss://oss-cn-hangzhou-internal.aliyuncs.com/bucket/test.csv',
|
|
449
|
+
>>> storage_options={'role_arn': 'acs:ram::xxxxxx:role/aliyunodpsdefaultrole'})
|
|
451
450
|
"""
|
|
452
451
|
# infer dtypes and columns
|
|
453
452
|
if isinstance(path, (list, tuple)):
|
|
@@ -184,6 +184,23 @@ def test_from_tensor():
|
|
|
184
184
|
df = dataframe_from_1d_tileables(d)
|
|
185
185
|
pd.testing.assert_index_equal(df.columns_value.to_pandas(), pd.RangeIndex(2))
|
|
186
186
|
|
|
187
|
+
# test axis parameter for dataframe_from_1d_tileables
|
|
188
|
+
d = OrderedDict(
|
|
189
|
+
[("a", mt.tensor(np.random.rand(4))), ("b", mt.tensor(np.random.rand(4)))]
|
|
190
|
+
)
|
|
191
|
+
|
|
192
|
+
# axis=1 (default behavior) - keys become columns
|
|
193
|
+
df = dataframe_from_1d_tileables(d, axis=1)
|
|
194
|
+
assert df.shape == (4, 2)
|
|
195
|
+
pd.testing.assert_index_equal(df.columns_value.to_pandas(), pd.Index(["a", "b"]))
|
|
196
|
+
pd.testing.assert_index_equal(df.index_value.to_pandas(), pd.RangeIndex(4))
|
|
197
|
+
|
|
198
|
+
# axis=0 - keys become index (rows)
|
|
199
|
+
df = dataframe_from_1d_tileables(d, axis=0)
|
|
200
|
+
assert df.shape == (2, 4)
|
|
201
|
+
pd.testing.assert_index_equal(df.index_value.to_pandas(), pd.Index(["a", "b"]))
|
|
202
|
+
pd.testing.assert_index_equal(df.columns_value.to_pandas(), pd.RangeIndex(4))
|
|
203
|
+
|
|
187
204
|
series = series_from_tensor(mt.random.rand(4))
|
|
188
205
|
pd.testing.assert_index_equal(series.index_value.to_pandas(), pd.RangeIndex(4))
|
|
189
206
|
|
|
@@ -207,6 +224,26 @@ def test_from_tensor():
|
|
|
207
224
|
with pytest.raises(ValueError):
|
|
208
225
|
dataframe_from_tensor(mt.random.rand(4, 3), columns=["a", "b"])
|
|
209
226
|
|
|
227
|
+
# 1-d tensors should have same shape
|
|
228
|
+
with pytest.raises(ValueError):
|
|
229
|
+
dataframe_from_1d_tileables(
|
|
230
|
+
OrderedDict(
|
|
231
|
+
[(0, mt.tensor(np.random.rand(3))), (1, mt.tensor(np.random.rand(2)))]
|
|
232
|
+
)
|
|
233
|
+
)
|
|
234
|
+
|
|
235
|
+
# index has wrong shape
|
|
236
|
+
with pytest.raises(ValueError):
|
|
237
|
+
dataframe_from_1d_tileables(
|
|
238
|
+
{0: mt.tensor(np.random.rand(3))}, index=mt.tensor(np.random.rand(2))
|
|
239
|
+
)
|
|
240
|
+
|
|
241
|
+
# columns have wrong shape
|
|
242
|
+
with pytest.raises(ValueError):
|
|
243
|
+
dataframe_from_1d_tileables(
|
|
244
|
+
{0: mt.tensor(np.random.rand(3))}, columns=["a", "b"]
|
|
245
|
+
)
|
|
246
|
+
|
|
210
247
|
# index should be 1-d
|
|
211
248
|
with pytest.raises(ValueError):
|
|
212
249
|
dataframe_from_tensor(
|
|
@@ -12,14 +12,18 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
+
from .to_csv import to_csv
|
|
15
16
|
from .to_odps import to_odps_table
|
|
16
17
|
|
|
17
18
|
|
|
18
19
|
def _install():
|
|
19
|
-
from ..core import DATAFRAME_TYPE
|
|
20
|
+
from ..core import DATAFRAME_TYPE, SERIES_TYPE
|
|
20
21
|
|
|
21
22
|
for t in DATAFRAME_TYPE:
|
|
23
|
+
t.to_csv = to_csv
|
|
22
24
|
t.to_odps_table = to_odps_table
|
|
25
|
+
for t in SERIES_TYPE:
|
|
26
|
+
t.to_csv = to_csv
|
|
23
27
|
|
|
24
28
|
|
|
25
29
|
_install()
|
|
@@ -12,17 +12,13 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
-
from typing import List
|
|
16
|
-
|
|
17
15
|
from ... import opcodes
|
|
18
|
-
from ...core import EntityData
|
|
19
16
|
from ...serialization.serializables import (
|
|
20
17
|
AnyField,
|
|
21
18
|
BoolField,
|
|
22
19
|
DictField,
|
|
23
20
|
Int32Field,
|
|
24
21
|
Int64Field,
|
|
25
|
-
KeyField,
|
|
26
22
|
ListField,
|
|
27
23
|
StringField,
|
|
28
24
|
)
|
|
@@ -33,27 +29,26 @@ from .core import DataFrameDataStore
|
|
|
33
29
|
class DataFrameToCSV(DataFrameDataStore):
|
|
34
30
|
_op_type_ = opcodes.TO_CSV
|
|
35
31
|
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
storage_options = DictField("storage_options")
|
|
32
|
+
path = AnyField("path", default=None)
|
|
33
|
+
sep = StringField("sep", default=None)
|
|
34
|
+
na_rep = StringField("na_rep", default=None)
|
|
35
|
+
float_format = StringField("float_format", default=None)
|
|
36
|
+
columns = ListField("columns", default=None)
|
|
37
|
+
header = AnyField("header", default=None)
|
|
38
|
+
index = BoolField("index", default=None)
|
|
39
|
+
index_label = AnyField("index_label", default=None)
|
|
40
|
+
mode = StringField("mode", default=None)
|
|
41
|
+
encoding = StringField("encoding", default=None)
|
|
42
|
+
compression = AnyField("compression", default=None)
|
|
43
|
+
quoting = Int32Field("quoting", default=None)
|
|
44
|
+
quotechar = StringField("quotechar", default=None)
|
|
45
|
+
line_terminator = StringField("line_terminator", default=None)
|
|
46
|
+
chunksize = Int64Field("chunksize", default=None)
|
|
47
|
+
date_format = StringField("date_format", default=None)
|
|
48
|
+
doublequote = BoolField("doublequote", default=None)
|
|
49
|
+
escapechar = StringField("escapechar", default=None)
|
|
50
|
+
decimal = StringField("decimal", default=None)
|
|
51
|
+
storage_options = DictField("storage_options", default=None)
|
|
57
52
|
|
|
58
53
|
def __init__(self, output_types=None, **kw):
|
|
59
54
|
super().__init__(_output_types=output_types, **kw)
|
|
@@ -63,19 +58,6 @@ class DataFrameToCSV(DataFrameDataStore):
|
|
|
63
58
|
# if wildcard in path, write csv into multiple files
|
|
64
59
|
return "*" not in self.path
|
|
65
60
|
|
|
66
|
-
@property
|
|
67
|
-
def output_stat(self):
|
|
68
|
-
return self.output_stat
|
|
69
|
-
|
|
70
|
-
@property
|
|
71
|
-
def output_limit(self):
|
|
72
|
-
return 1 if not self.output_stat else 2
|
|
73
|
-
|
|
74
|
-
@classmethod
|
|
75
|
-
def _set_inputs(cls, op: "DataFrameToCSV", inputs: List[EntityData]):
|
|
76
|
-
super()._set_inputs(op, inputs)
|
|
77
|
-
op._input = op._inputs[0]
|
|
78
|
-
|
|
79
61
|
def __call__(self, df):
|
|
80
62
|
index_value = parse_index(df.index_value.to_pandas()[:0], df)
|
|
81
63
|
if df.ndim == 2:
|
|
@@ -110,13 +92,14 @@ def to_csv(
|
|
|
110
92
|
compression="infer",
|
|
111
93
|
quoting=None,
|
|
112
94
|
quotechar='"',
|
|
113
|
-
|
|
95
|
+
lineterminator=None,
|
|
114
96
|
chunksize=None,
|
|
115
97
|
date_format=None,
|
|
116
98
|
doublequote=True,
|
|
117
99
|
escapechar=None,
|
|
118
100
|
decimal=".",
|
|
119
101
|
storage_options=None,
|
|
102
|
+
**kw,
|
|
120
103
|
):
|
|
121
104
|
r"""
|
|
122
105
|
Write object to a comma-separated values (csv) file.
|
|
@@ -169,7 +152,7 @@ def to_csv(
|
|
|
169
152
|
will treat them as non-numeric.
|
|
170
153
|
quotechar : str, default '\"'
|
|
171
154
|
String of length 1. Character used to quote fields.
|
|
172
|
-
|
|
155
|
+
lineterminator : str, optional
|
|
173
156
|
The newline character or character sequence to use in the output
|
|
174
157
|
file. Defaults to `os.linesep`, which depends on the OS in which
|
|
175
158
|
this method is called ('\n' for linux, '\r\n' for Windows, i.e.).
|
|
@@ -203,6 +186,11 @@ def to_csv(
|
|
|
203
186
|
... 'weapon': ['sai', 'bo staff']})
|
|
204
187
|
>>> df.to_csv('out.csv', index=False).execute()
|
|
205
188
|
"""
|
|
189
|
+
lineterminator = lineterminator or kw.pop("line_terminator", None)
|
|
190
|
+
if kw:
|
|
191
|
+
raise TypeError(
|
|
192
|
+
f"to_csv() got an unexpected keyword argument '{next(iter(kw))}'"
|
|
193
|
+
)
|
|
206
194
|
|
|
207
195
|
if mode != "w": # pragma: no cover
|
|
208
196
|
raise NotImplementedError("only support to_csv with mode 'w' for now")
|
|
@@ -220,7 +208,7 @@ def to_csv(
|
|
|
220
208
|
compression=compression,
|
|
221
209
|
quoting=quoting,
|
|
222
210
|
quotechar=quotechar,
|
|
223
|
-
line_terminator=
|
|
211
|
+
line_terminator=lineterminator,
|
|
224
212
|
chunksize=chunksize,
|
|
225
213
|
date_format=date_format,
|
|
226
214
|
doublequote=doublequote,
|
|
@@ -56,6 +56,7 @@ class DataFrameToODPSTable(DataFrameDataStore):
|
|
|
56
56
|
index_label = ListField("index_label", FieldTypes.string, default=None)
|
|
57
57
|
lifecycle = Int64Field("lifecycle", default=None)
|
|
58
58
|
table_properties = DictField("table_properties", default=None)
|
|
59
|
+
primary_key = ListField("primary_key", FieldTypes.string, default=None)
|
|
59
60
|
|
|
60
61
|
def __init__(self, **kw):
|
|
61
62
|
super().__init__(_output_types=[OutputType.dataframe], **kw)
|
|
@@ -100,11 +101,12 @@ def to_odps_table(
|
|
|
100
101
|
partition: Optional[str] = None,
|
|
101
102
|
partition_col: Union[None, str, List[str]] = None,
|
|
102
103
|
overwrite: bool = False,
|
|
103
|
-
unknown_as_string: Optional[bool] =
|
|
104
|
+
unknown_as_string: Optional[bool] = True,
|
|
104
105
|
index: bool = True,
|
|
105
106
|
index_label: Union[None, str, List[str]] = None,
|
|
106
107
|
lifecycle: Optional[int] = None,
|
|
107
108
|
table_properties: Optional[dict] = None,
|
|
109
|
+
primary_key: Union[None, str, List[str]] = None,
|
|
108
110
|
):
|
|
109
111
|
"""
|
|
110
112
|
Write DataFrame object into a MaxCompute (ODPS) table.
|
|
@@ -145,6 +147,10 @@ def to_odps_table(
|
|
|
145
147
|
Specify lifecycle of the output table.
|
|
146
148
|
table_properties: Optional[dict]
|
|
147
149
|
Specify properties of the output table.
|
|
150
|
+
primary_key: Union[None, str, List[str]]
|
|
151
|
+
If provided and target table does not exist, target table
|
|
152
|
+
will be a delta table with columns specified in this argument
|
|
153
|
+
as primary key.
|
|
148
154
|
|
|
149
155
|
Returns
|
|
150
156
|
-------
|
|
@@ -201,12 +207,14 @@ def to_odps_table(
|
|
|
201
207
|
index_table_intersect = index_cols & table_cols
|
|
202
208
|
if index_table_intersect:
|
|
203
209
|
raise ValueError(
|
|
204
|
-
f"Index column(s) {index_table_intersect} conflict with
|
|
210
|
+
f"Index column(s) {index_table_intersect} conflict with "
|
|
211
|
+
f"column(s) of the input dataframe."
|
|
205
212
|
)
|
|
206
213
|
index_partition_intersect = index_cols & partition_col_set
|
|
207
214
|
if index_partition_intersect:
|
|
208
215
|
raise ValueError(
|
|
209
|
-
f"Index column(s) {index_partition_intersect} conflict
|
|
216
|
+
f"Index column(s) {index_partition_intersect} conflict "
|
|
217
|
+
f"with partition column(s)."
|
|
210
218
|
)
|
|
211
219
|
|
|
212
220
|
if partition_col:
|
|
@@ -217,6 +225,23 @@ def to_odps_table(
|
|
|
217
225
|
" is not the data column(s) of the input dataframe."
|
|
218
226
|
)
|
|
219
227
|
|
|
228
|
+
table_properties = table_properties or {}
|
|
229
|
+
if primary_key is not None:
|
|
230
|
+
table_properties["transactional"] = "true"
|
|
231
|
+
if odps_entry.exist_table(table):
|
|
232
|
+
table_obj = odps_entry.get_table(table)
|
|
233
|
+
if table_obj.is_transactional:
|
|
234
|
+
table_properties = table_properties or {}
|
|
235
|
+
table_properties["transactional"] = "true"
|
|
236
|
+
primary_key = primary_key or table_obj.primary_key or ()
|
|
237
|
+
if set(primary_key) != set(table_obj.primary_key or ()):
|
|
238
|
+
raise ValueError(
|
|
239
|
+
f"Primary keys between existing table {table} and "
|
|
240
|
+
f"provided arguments are not same."
|
|
241
|
+
)
|
|
242
|
+
if primary_key and not isinstance(primary_key, (list, tuple)):
|
|
243
|
+
primary_key = [primary_key]
|
|
244
|
+
|
|
220
245
|
op = DataFrameToODPSTable(
|
|
221
246
|
dtypes=df.dtypes,
|
|
222
247
|
table_name=table,
|
|
@@ -227,6 +252,7 @@ def to_odps_table(
|
|
|
227
252
|
index=index,
|
|
228
253
|
index_label=index_label,
|
|
229
254
|
lifecycle=lifecycle or options.session.table_lifecycle,
|
|
230
|
-
table_properties=table_properties,
|
|
255
|
+
table_properties=table_properties or None,
|
|
256
|
+
primary_key=primary_key or None,
|
|
231
257
|
)
|
|
232
258
|
return op(df)
|
|
@@ -24,20 +24,36 @@ from .apply_chunk import (
|
|
|
24
24
|
df_apply_chunk,
|
|
25
25
|
series_apply_chunk,
|
|
26
26
|
)
|
|
27
|
+
from .cartesian_chunk import cartesian_chunk
|
|
28
|
+
from .collect_kv import collect_kv
|
|
29
|
+
from .extract_kv import extract_kv
|
|
27
30
|
from .flatjson import series_flatjson
|
|
28
31
|
from .flatmap import df_flatmap, series_flatmap
|
|
32
|
+
from .map_reduce import map_reduce
|
|
33
|
+
from .rebalance import DataFrameRebalance, rebalance
|
|
29
34
|
from .reshuffle import DataFrameReshuffle, df_reshuffle
|
|
30
35
|
|
|
31
36
|
|
|
32
37
|
def _install():
|
|
33
38
|
from ..core import DATAFRAME_TYPE, INDEX_TYPE, SERIES_TYPE
|
|
34
39
|
|
|
35
|
-
DataFrameMaxFrameAccessor._register("reshuffle", df_reshuffle)
|
|
36
|
-
DataFrameMaxFrameAccessor._register("flatmap", df_flatmap)
|
|
37
40
|
DataFrameMaxFrameAccessor._register("apply_chunk", df_apply_chunk)
|
|
38
|
-
|
|
39
|
-
|
|
41
|
+
DataFrameMaxFrameAccessor._register("cartesian_chunk", cartesian_chunk)
|
|
42
|
+
DataFrameMaxFrameAccessor._register("collect_kv", collect_kv)
|
|
43
|
+
DataFrameMaxFrameAccessor._register("extract_kv", extract_kv)
|
|
44
|
+
DataFrameMaxFrameAccessor._register("flatmap", df_flatmap)
|
|
45
|
+
DataFrameMaxFrameAccessor._register("map_reduce", map_reduce)
|
|
46
|
+
DataFrameMaxFrameAccessor._register("rebalance", rebalance)
|
|
47
|
+
DataFrameMaxFrameAccessor._register("reshuffle", df_reshuffle)
|
|
48
|
+
|
|
40
49
|
SeriesMaxFrameAccessor._register("apply_chunk", series_apply_chunk)
|
|
50
|
+
SeriesMaxFrameAccessor._register("cartesian_chunk", cartesian_chunk)
|
|
51
|
+
SeriesMaxFrameAccessor._register("extract_kv", extract_kv)
|
|
52
|
+
SeriesMaxFrameAccessor._register("flatjson", series_flatjson)
|
|
53
|
+
SeriesMaxFrameAccessor._register("flatmap", series_flatmap)
|
|
54
|
+
SeriesMaxFrameAccessor._register("rebalance", rebalance)
|
|
55
|
+
|
|
56
|
+
IndexMaxFrameAccessor._register("rebalance", rebalance)
|
|
41
57
|
|
|
42
58
|
if DataFrameMaxFrameAccessor._api_count:
|
|
43
59
|
for t in DATAFRAME_TYPE:
|