maxframe 0.1.0b5__cp39-cp39-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of maxframe might be problematic. Click here for more details.
- maxframe/__init__.py +32 -0
- maxframe/_utils.cpython-39-darwin.so +0 -0
- maxframe/_utils.pxd +33 -0
- maxframe/_utils.pyx +547 -0
- maxframe/codegen.py +528 -0
- maxframe/config/__init__.py +15 -0
- maxframe/config/config.py +443 -0
- maxframe/config/tests/__init__.py +13 -0
- maxframe/config/tests/test_config.py +103 -0
- maxframe/config/tests/test_validators.py +34 -0
- maxframe/config/validators.py +57 -0
- maxframe/conftest.py +139 -0
- maxframe/core/__init__.py +65 -0
- maxframe/core/base.py +156 -0
- maxframe/core/entity/__init__.py +44 -0
- maxframe/core/entity/chunks.py +68 -0
- maxframe/core/entity/core.py +152 -0
- maxframe/core/entity/executable.py +337 -0
- maxframe/core/entity/fuse.py +73 -0
- maxframe/core/entity/objects.py +100 -0
- maxframe/core/entity/output_types.py +90 -0
- maxframe/core/entity/tileables.py +438 -0
- maxframe/core/entity/utils.py +24 -0
- maxframe/core/graph/__init__.py +17 -0
- maxframe/core/graph/builder/__init__.py +16 -0
- maxframe/core/graph/builder/base.py +86 -0
- maxframe/core/graph/builder/chunk.py +430 -0
- maxframe/core/graph/builder/tileable.py +34 -0
- maxframe/core/graph/builder/utils.py +41 -0
- maxframe/core/graph/core.cpython-39-darwin.so +0 -0
- maxframe/core/graph/core.pyx +467 -0
- maxframe/core/graph/entity.py +171 -0
- maxframe/core/graph/tests/__init__.py +13 -0
- maxframe/core/graph/tests/test_graph.py +205 -0
- maxframe/core/mode.py +96 -0
- maxframe/core/operator/__init__.py +34 -0
- maxframe/core/operator/base.py +450 -0
- maxframe/core/operator/core.py +276 -0
- maxframe/core/operator/fetch.py +53 -0
- maxframe/core/operator/fuse.py +29 -0
- maxframe/core/operator/objects.py +72 -0
- maxframe/core/operator/shuffle.py +111 -0
- maxframe/core/operator/tests/__init__.py +13 -0
- maxframe/core/operator/tests/test_core.py +64 -0
- maxframe/core/tests/__init__.py +13 -0
- maxframe/core/tests/test_mode.py +75 -0
- maxframe/dataframe/__init__.py +81 -0
- maxframe/dataframe/arithmetic/__init__.py +359 -0
- maxframe/dataframe/arithmetic/abs.py +33 -0
- maxframe/dataframe/arithmetic/add.py +60 -0
- maxframe/dataframe/arithmetic/arccos.py +28 -0
- maxframe/dataframe/arithmetic/arccosh.py +28 -0
- maxframe/dataframe/arithmetic/arcsin.py +28 -0
- maxframe/dataframe/arithmetic/arcsinh.py +28 -0
- maxframe/dataframe/arithmetic/arctan.py +28 -0
- maxframe/dataframe/arithmetic/arctanh.py +28 -0
- maxframe/dataframe/arithmetic/around.py +152 -0
- maxframe/dataframe/arithmetic/bitwise_and.py +46 -0
- maxframe/dataframe/arithmetic/bitwise_or.py +50 -0
- maxframe/dataframe/arithmetic/bitwise_xor.py +46 -0
- maxframe/dataframe/arithmetic/ceil.py +28 -0
- maxframe/dataframe/arithmetic/core.py +342 -0
- maxframe/dataframe/arithmetic/cos.py +28 -0
- maxframe/dataframe/arithmetic/cosh.py +28 -0
- maxframe/dataframe/arithmetic/degrees.py +28 -0
- maxframe/dataframe/arithmetic/docstring.py +442 -0
- maxframe/dataframe/arithmetic/equal.py +56 -0
- maxframe/dataframe/arithmetic/exp.py +28 -0
- maxframe/dataframe/arithmetic/exp2.py +28 -0
- maxframe/dataframe/arithmetic/expm1.py +28 -0
- maxframe/dataframe/arithmetic/floor.py +28 -0
- maxframe/dataframe/arithmetic/floordiv.py +64 -0
- maxframe/dataframe/arithmetic/greater.py +57 -0
- maxframe/dataframe/arithmetic/greater_equal.py +57 -0
- maxframe/dataframe/arithmetic/invert.py +33 -0
- maxframe/dataframe/arithmetic/is_ufuncs.py +62 -0
- maxframe/dataframe/arithmetic/less.py +57 -0
- maxframe/dataframe/arithmetic/less_equal.py +57 -0
- maxframe/dataframe/arithmetic/log.py +28 -0
- maxframe/dataframe/arithmetic/log10.py +28 -0
- maxframe/dataframe/arithmetic/log2.py +28 -0
- maxframe/dataframe/arithmetic/mod.py +60 -0
- maxframe/dataframe/arithmetic/multiply.py +60 -0
- maxframe/dataframe/arithmetic/negative.py +33 -0
- maxframe/dataframe/arithmetic/not_equal.py +56 -0
- maxframe/dataframe/arithmetic/power.py +68 -0
- maxframe/dataframe/arithmetic/radians.py +28 -0
- maxframe/dataframe/arithmetic/sin.py +28 -0
- maxframe/dataframe/arithmetic/sinh.py +28 -0
- maxframe/dataframe/arithmetic/sqrt.py +28 -0
- maxframe/dataframe/arithmetic/subtract.py +64 -0
- maxframe/dataframe/arithmetic/tan.py +28 -0
- maxframe/dataframe/arithmetic/tanh.py +28 -0
- maxframe/dataframe/arithmetic/tests/__init__.py +13 -0
- maxframe/dataframe/arithmetic/tests/test_arithmetic.py +695 -0
- maxframe/dataframe/arithmetic/truediv.py +64 -0
- maxframe/dataframe/arithmetic/trunc.py +28 -0
- maxframe/dataframe/arrays.py +864 -0
- maxframe/dataframe/core.py +2417 -0
- maxframe/dataframe/datasource/__init__.py +15 -0
- maxframe/dataframe/datasource/core.py +81 -0
- maxframe/dataframe/datasource/dataframe.py +59 -0
- maxframe/dataframe/datasource/date_range.py +504 -0
- maxframe/dataframe/datasource/from_index.py +54 -0
- maxframe/dataframe/datasource/from_records.py +107 -0
- maxframe/dataframe/datasource/from_tensor.py +419 -0
- maxframe/dataframe/datasource/index.py +117 -0
- maxframe/dataframe/datasource/read_csv.py +528 -0
- maxframe/dataframe/datasource/read_odps_query.py +299 -0
- maxframe/dataframe/datasource/read_odps_table.py +253 -0
- maxframe/dataframe/datasource/read_parquet.py +421 -0
- maxframe/dataframe/datasource/series.py +55 -0
- maxframe/dataframe/datasource/tests/__init__.py +13 -0
- maxframe/dataframe/datasource/tests/test_datasource.py +401 -0
- maxframe/dataframe/datastore/__init__.py +26 -0
- maxframe/dataframe/datastore/core.py +19 -0
- maxframe/dataframe/datastore/to_csv.py +227 -0
- maxframe/dataframe/datastore/to_odps.py +162 -0
- maxframe/dataframe/extensions/__init__.py +41 -0
- maxframe/dataframe/extensions/accessor.py +50 -0
- maxframe/dataframe/extensions/reshuffle.py +83 -0
- maxframe/dataframe/extensions/tests/__init__.py +13 -0
- maxframe/dataframe/extensions/tests/test_extensions.py +38 -0
- maxframe/dataframe/fetch/__init__.py +15 -0
- maxframe/dataframe/fetch/core.py +86 -0
- maxframe/dataframe/groupby/__init__.py +82 -0
- maxframe/dataframe/groupby/aggregation.py +350 -0
- maxframe/dataframe/groupby/apply.py +251 -0
- maxframe/dataframe/groupby/core.py +179 -0
- maxframe/dataframe/groupby/cum.py +124 -0
- maxframe/dataframe/groupby/fill.py +141 -0
- maxframe/dataframe/groupby/getitem.py +92 -0
- maxframe/dataframe/groupby/head.py +105 -0
- maxframe/dataframe/groupby/sample.py +214 -0
- maxframe/dataframe/groupby/tests/__init__.py +13 -0
- maxframe/dataframe/groupby/tests/test_groupby.py +374 -0
- maxframe/dataframe/groupby/transform.py +255 -0
- maxframe/dataframe/indexing/__init__.py +84 -0
- maxframe/dataframe/indexing/add_prefix_suffix.py +110 -0
- maxframe/dataframe/indexing/align.py +349 -0
- maxframe/dataframe/indexing/at.py +83 -0
- maxframe/dataframe/indexing/getitem.py +204 -0
- maxframe/dataframe/indexing/iat.py +37 -0
- maxframe/dataframe/indexing/iloc.py +566 -0
- maxframe/dataframe/indexing/insert.py +86 -0
- maxframe/dataframe/indexing/loc.py +411 -0
- maxframe/dataframe/indexing/reindex.py +526 -0
- maxframe/dataframe/indexing/rename.py +462 -0
- maxframe/dataframe/indexing/rename_axis.py +209 -0
- maxframe/dataframe/indexing/reset_index.py +402 -0
- maxframe/dataframe/indexing/sample.py +221 -0
- maxframe/dataframe/indexing/set_axis.py +194 -0
- maxframe/dataframe/indexing/set_index.py +61 -0
- maxframe/dataframe/indexing/setitem.py +130 -0
- maxframe/dataframe/indexing/tests/__init__.py +13 -0
- maxframe/dataframe/indexing/tests/test_indexing.py +488 -0
- maxframe/dataframe/indexing/where.py +308 -0
- maxframe/dataframe/initializer.py +288 -0
- maxframe/dataframe/merge/__init__.py +32 -0
- maxframe/dataframe/merge/append.py +121 -0
- maxframe/dataframe/merge/concat.py +325 -0
- maxframe/dataframe/merge/merge.py +593 -0
- maxframe/dataframe/merge/tests/__init__.py +13 -0
- maxframe/dataframe/merge/tests/test_merge.py +215 -0
- maxframe/dataframe/misc/__init__.py +134 -0
- maxframe/dataframe/misc/_duplicate.py +46 -0
- maxframe/dataframe/misc/accessor.py +276 -0
- maxframe/dataframe/misc/apply.py +692 -0
- maxframe/dataframe/misc/astype.py +236 -0
- maxframe/dataframe/misc/case_when.py +141 -0
- maxframe/dataframe/misc/check_monotonic.py +84 -0
- maxframe/dataframe/misc/cut.py +383 -0
- maxframe/dataframe/misc/datetimes.py +79 -0
- maxframe/dataframe/misc/describe.py +108 -0
- maxframe/dataframe/misc/diff.py +210 -0
- maxframe/dataframe/misc/drop.py +440 -0
- maxframe/dataframe/misc/drop_duplicates.py +248 -0
- maxframe/dataframe/misc/duplicated.py +292 -0
- maxframe/dataframe/misc/eval.py +728 -0
- maxframe/dataframe/misc/explode.py +171 -0
- maxframe/dataframe/misc/get_dummies.py +208 -0
- maxframe/dataframe/misc/isin.py +217 -0
- maxframe/dataframe/misc/map.py +236 -0
- maxframe/dataframe/misc/melt.py +162 -0
- maxframe/dataframe/misc/memory_usage.py +248 -0
- maxframe/dataframe/misc/pct_change.py +150 -0
- maxframe/dataframe/misc/pivot_table.py +262 -0
- maxframe/dataframe/misc/qcut.py +104 -0
- maxframe/dataframe/misc/select_dtypes.py +104 -0
- maxframe/dataframe/misc/shift.py +256 -0
- maxframe/dataframe/misc/stack.py +238 -0
- maxframe/dataframe/misc/string_.py +221 -0
- maxframe/dataframe/misc/tests/__init__.py +13 -0
- maxframe/dataframe/misc/tests/test_misc.py +468 -0
- maxframe/dataframe/misc/to_numeric.py +178 -0
- maxframe/dataframe/misc/transform.py +361 -0
- maxframe/dataframe/misc/transpose.py +136 -0
- maxframe/dataframe/misc/value_counts.py +182 -0
- maxframe/dataframe/missing/__init__.py +53 -0
- maxframe/dataframe/missing/checkna.py +223 -0
- maxframe/dataframe/missing/dropna.py +280 -0
- maxframe/dataframe/missing/fillna.py +275 -0
- maxframe/dataframe/missing/replace.py +439 -0
- maxframe/dataframe/missing/tests/__init__.py +13 -0
- maxframe/dataframe/missing/tests/test_missing.py +89 -0
- maxframe/dataframe/operators.py +273 -0
- maxframe/dataframe/plotting/__init__.py +40 -0
- maxframe/dataframe/plotting/core.py +78 -0
- maxframe/dataframe/plotting/tests/__init__.py +13 -0
- maxframe/dataframe/plotting/tests/test_plotting.py +136 -0
- maxframe/dataframe/reduction/__init__.py +107 -0
- maxframe/dataframe/reduction/aggregation.py +344 -0
- maxframe/dataframe/reduction/all.py +78 -0
- maxframe/dataframe/reduction/any.py +78 -0
- maxframe/dataframe/reduction/core.py +837 -0
- maxframe/dataframe/reduction/count.py +59 -0
- maxframe/dataframe/reduction/cummax.py +30 -0
- maxframe/dataframe/reduction/cummin.py +30 -0
- maxframe/dataframe/reduction/cumprod.py +30 -0
- maxframe/dataframe/reduction/cumsum.py +30 -0
- maxframe/dataframe/reduction/custom_reduction.py +42 -0
- maxframe/dataframe/reduction/kurtosis.py +104 -0
- maxframe/dataframe/reduction/max.py +65 -0
- maxframe/dataframe/reduction/mean.py +61 -0
- maxframe/dataframe/reduction/min.py +65 -0
- maxframe/dataframe/reduction/nunique.py +141 -0
- maxframe/dataframe/reduction/prod.py +76 -0
- maxframe/dataframe/reduction/reduction_size.py +36 -0
- maxframe/dataframe/reduction/sem.py +69 -0
- maxframe/dataframe/reduction/skew.py +89 -0
- maxframe/dataframe/reduction/std.py +53 -0
- maxframe/dataframe/reduction/str_concat.py +48 -0
- maxframe/dataframe/reduction/sum.py +77 -0
- maxframe/dataframe/reduction/tests/__init__.py +13 -0
- maxframe/dataframe/reduction/tests/test_reduction.py +486 -0
- maxframe/dataframe/reduction/unique.py +90 -0
- maxframe/dataframe/reduction/var.py +72 -0
- maxframe/dataframe/sort/__init__.py +34 -0
- maxframe/dataframe/sort/core.py +36 -0
- maxframe/dataframe/sort/sort_index.py +153 -0
- maxframe/dataframe/sort/sort_values.py +311 -0
- maxframe/dataframe/sort/tests/__init__.py +13 -0
- maxframe/dataframe/sort/tests/test_sort.py +81 -0
- maxframe/dataframe/statistics/__init__.py +33 -0
- maxframe/dataframe/statistics/corr.py +280 -0
- maxframe/dataframe/statistics/quantile.py +341 -0
- maxframe/dataframe/statistics/tests/__init__.py +13 -0
- maxframe/dataframe/statistics/tests/test_statistics.py +82 -0
- maxframe/dataframe/tests/__init__.py +13 -0
- maxframe/dataframe/tests/test_initializer.py +29 -0
- maxframe/dataframe/tseries/__init__.py +13 -0
- maxframe/dataframe/tseries/tests/__init__.py +13 -0
- maxframe/dataframe/tseries/tests/test_tseries.py +30 -0
- maxframe/dataframe/tseries/to_datetime.py +297 -0
- maxframe/dataframe/ufunc/__init__.py +27 -0
- maxframe/dataframe/ufunc/tensor.py +54 -0
- maxframe/dataframe/ufunc/ufunc.py +52 -0
- maxframe/dataframe/utils.py +1267 -0
- maxframe/dataframe/window/__init__.py +29 -0
- maxframe/dataframe/window/aggregation.py +96 -0
- maxframe/dataframe/window/core.py +69 -0
- maxframe/dataframe/window/ewm.py +249 -0
- maxframe/dataframe/window/expanding.py +147 -0
- maxframe/dataframe/window/rolling.py +376 -0
- maxframe/dataframe/window/tests/__init__.py +13 -0
- maxframe/dataframe/window/tests/test_ewm.py +70 -0
- maxframe/dataframe/window/tests/test_expanding.py +66 -0
- maxframe/dataframe/window/tests/test_rolling.py +57 -0
- maxframe/env.py +33 -0
- maxframe/errors.py +21 -0
- maxframe/extension.py +81 -0
- maxframe/learn/__init__.py +17 -0
- maxframe/learn/contrib/__init__.py +17 -0
- maxframe/learn/contrib/pytorch/__init__.py +16 -0
- maxframe/learn/contrib/pytorch/run_function.py +110 -0
- maxframe/learn/contrib/pytorch/run_script.py +102 -0
- maxframe/learn/contrib/pytorch/tests/__init__.py +13 -0
- maxframe/learn/contrib/pytorch/tests/test_pytorch.py +42 -0
- maxframe/learn/contrib/utils.py +52 -0
- maxframe/learn/contrib/xgboost/__init__.py +26 -0
- maxframe/learn/contrib/xgboost/classifier.py +86 -0
- maxframe/learn/contrib/xgboost/core.py +156 -0
- maxframe/learn/contrib/xgboost/dmatrix.py +150 -0
- maxframe/learn/contrib/xgboost/predict.py +138 -0
- maxframe/learn/contrib/xgboost/regressor.py +78 -0
- maxframe/learn/contrib/xgboost/tests/__init__.py +13 -0
- maxframe/learn/contrib/xgboost/tests/test_core.py +43 -0
- maxframe/learn/contrib/xgboost/train.py +121 -0
- maxframe/learn/utils/__init__.py +15 -0
- maxframe/learn/utils/core.py +29 -0
- maxframe/lib/__init__.py +15 -0
- maxframe/lib/aio/__init__.py +27 -0
- maxframe/lib/aio/_runners.py +162 -0
- maxframe/lib/aio/_threads.py +35 -0
- maxframe/lib/aio/base.py +82 -0
- maxframe/lib/aio/file.py +85 -0
- maxframe/lib/aio/isolation.py +100 -0
- maxframe/lib/aio/lru.py +242 -0
- maxframe/lib/aio/parallelism.py +37 -0
- maxframe/lib/aio/tests/__init__.py +13 -0
- maxframe/lib/aio/tests/test_aio_file.py +55 -0
- maxframe/lib/compression.py +55 -0
- maxframe/lib/cython/__init__.py +13 -0
- maxframe/lib/cython/libcpp.pxd +30 -0
- maxframe/lib/filesystem/__init__.py +21 -0
- maxframe/lib/filesystem/_glob.py +173 -0
- maxframe/lib/filesystem/_oss_lib/__init__.py +13 -0
- maxframe/lib/filesystem/_oss_lib/common.py +198 -0
- maxframe/lib/filesystem/_oss_lib/glob.py +147 -0
- maxframe/lib/filesystem/_oss_lib/handle.py +156 -0
- maxframe/lib/filesystem/arrow.py +236 -0
- maxframe/lib/filesystem/base.py +263 -0
- maxframe/lib/filesystem/core.py +95 -0
- maxframe/lib/filesystem/fsmap.py +164 -0
- maxframe/lib/filesystem/hdfs.py +31 -0
- maxframe/lib/filesystem/local.py +112 -0
- maxframe/lib/filesystem/oss.py +157 -0
- maxframe/lib/filesystem/tests/__init__.py +13 -0
- maxframe/lib/filesystem/tests/test_filesystem.py +223 -0
- maxframe/lib/filesystem/tests/test_oss.py +182 -0
- maxframe/lib/functools_compat.py +81 -0
- maxframe/lib/mmh3.cpython-39-darwin.so +0 -0
- maxframe/lib/mmh3_src/MurmurHash3.cpp +339 -0
- maxframe/lib/mmh3_src/MurmurHash3.h +43 -0
- maxframe/lib/mmh3_src/mmh3module.cpp +387 -0
- maxframe/lib/sparse/__init__.py +861 -0
- maxframe/lib/sparse/array.py +1604 -0
- maxframe/lib/sparse/core.py +92 -0
- maxframe/lib/sparse/matrix.py +241 -0
- maxframe/lib/sparse/tests/__init__.py +15 -0
- maxframe/lib/sparse/tests/test_sparse.py +476 -0
- maxframe/lib/sparse/vector.py +150 -0
- maxframe/lib/tblib/LICENSE +20 -0
- maxframe/lib/tblib/__init__.py +327 -0
- maxframe/lib/tblib/cpython.py +83 -0
- maxframe/lib/tblib/decorators.py +44 -0
- maxframe/lib/tblib/pickling_support.py +90 -0
- maxframe/lib/tests/__init__.py +13 -0
- maxframe/lib/tests/test_wrapped_pickle.py +51 -0
- maxframe/lib/version.py +620 -0
- maxframe/lib/wrapped_pickle.py +139 -0
- maxframe/mixin.py +100 -0
- maxframe/odpsio/__init__.py +21 -0
- maxframe/odpsio/arrow.py +91 -0
- maxframe/odpsio/schema.py +364 -0
- maxframe/odpsio/tableio.py +322 -0
- maxframe/odpsio/tests/__init__.py +13 -0
- maxframe/odpsio/tests/test_arrow.py +88 -0
- maxframe/odpsio/tests/test_schema.py +297 -0
- maxframe/odpsio/tests/test_tableio.py +136 -0
- maxframe/odpsio/tests/test_volumeio.py +90 -0
- maxframe/odpsio/volumeio.py +95 -0
- maxframe/opcodes.py +590 -0
- maxframe/protocol.py +415 -0
- maxframe/remote/__init__.py +18 -0
- maxframe/remote/core.py +210 -0
- maxframe/remote/run_script.py +121 -0
- maxframe/serialization/__init__.py +26 -0
- maxframe/serialization/arrow.py +95 -0
- maxframe/serialization/core.cpython-39-darwin.so +0 -0
- maxframe/serialization/core.pxd +44 -0
- maxframe/serialization/core.pyi +61 -0
- maxframe/serialization/core.pyx +1094 -0
- maxframe/serialization/exception.py +86 -0
- maxframe/serialization/maxframe_objects.py +39 -0
- maxframe/serialization/numpy.py +91 -0
- maxframe/serialization/pandas.py +202 -0
- maxframe/serialization/scipy.py +71 -0
- maxframe/serialization/serializables/__init__.py +55 -0
- maxframe/serialization/serializables/core.py +262 -0
- maxframe/serialization/serializables/field.py +624 -0
- maxframe/serialization/serializables/field_type.py +589 -0
- maxframe/serialization/serializables/tests/__init__.py +13 -0
- maxframe/serialization/serializables/tests/test_field_type.py +121 -0
- maxframe/serialization/serializables/tests/test_serializable.py +250 -0
- maxframe/serialization/tests/__init__.py +13 -0
- maxframe/serialization/tests/test_serial.py +412 -0
- maxframe/session.py +1310 -0
- maxframe/tensor/__init__.py +183 -0
- maxframe/tensor/arithmetic/__init__.py +315 -0
- maxframe/tensor/arithmetic/abs.py +68 -0
- maxframe/tensor/arithmetic/absolute.py +68 -0
- maxframe/tensor/arithmetic/add.py +82 -0
- maxframe/tensor/arithmetic/angle.py +72 -0
- maxframe/tensor/arithmetic/arccos.py +104 -0
- maxframe/tensor/arithmetic/arccosh.py +91 -0
- maxframe/tensor/arithmetic/arcsin.py +94 -0
- maxframe/tensor/arithmetic/arcsinh.py +86 -0
- maxframe/tensor/arithmetic/arctan.py +106 -0
- maxframe/tensor/arithmetic/arctan2.py +128 -0
- maxframe/tensor/arithmetic/arctanh.py +86 -0
- maxframe/tensor/arithmetic/around.py +114 -0
- maxframe/tensor/arithmetic/bitand.py +95 -0
- maxframe/tensor/arithmetic/bitor.py +102 -0
- maxframe/tensor/arithmetic/bitxor.py +95 -0
- maxframe/tensor/arithmetic/cbrt.py +66 -0
- maxframe/tensor/arithmetic/ceil.py +71 -0
- maxframe/tensor/arithmetic/clip.py +165 -0
- maxframe/tensor/arithmetic/conj.py +74 -0
- maxframe/tensor/arithmetic/copysign.py +78 -0
- maxframe/tensor/arithmetic/core.py +544 -0
- maxframe/tensor/arithmetic/cos.py +85 -0
- maxframe/tensor/arithmetic/cosh.py +72 -0
- maxframe/tensor/arithmetic/deg2rad.py +72 -0
- maxframe/tensor/arithmetic/degrees.py +77 -0
- maxframe/tensor/arithmetic/divide.py +114 -0
- maxframe/tensor/arithmetic/equal.py +76 -0
- maxframe/tensor/arithmetic/exp.py +106 -0
- maxframe/tensor/arithmetic/exp2.py +67 -0
- maxframe/tensor/arithmetic/expm1.py +79 -0
- maxframe/tensor/arithmetic/fabs.py +74 -0
- maxframe/tensor/arithmetic/fix.py +69 -0
- maxframe/tensor/arithmetic/float_power.py +103 -0
- maxframe/tensor/arithmetic/floor.py +77 -0
- maxframe/tensor/arithmetic/floordiv.py +94 -0
- maxframe/tensor/arithmetic/fmax.py +105 -0
- maxframe/tensor/arithmetic/fmin.py +106 -0
- maxframe/tensor/arithmetic/fmod.py +99 -0
- maxframe/tensor/arithmetic/frexp.py +92 -0
- maxframe/tensor/arithmetic/greater.py +77 -0
- maxframe/tensor/arithmetic/greater_equal.py +69 -0
- maxframe/tensor/arithmetic/hypot.py +77 -0
- maxframe/tensor/arithmetic/i0.py +89 -0
- maxframe/tensor/arithmetic/imag.py +67 -0
- maxframe/tensor/arithmetic/invert.py +110 -0
- maxframe/tensor/arithmetic/isclose.py +115 -0
- maxframe/tensor/arithmetic/iscomplex.py +64 -0
- maxframe/tensor/arithmetic/isfinite.py +106 -0
- maxframe/tensor/arithmetic/isinf.py +103 -0
- maxframe/tensor/arithmetic/isnan.py +82 -0
- maxframe/tensor/arithmetic/isreal.py +63 -0
- maxframe/tensor/arithmetic/ldexp.py +99 -0
- maxframe/tensor/arithmetic/less.py +69 -0
- maxframe/tensor/arithmetic/less_equal.py +69 -0
- maxframe/tensor/arithmetic/log.py +92 -0
- maxframe/tensor/arithmetic/log10.py +85 -0
- maxframe/tensor/arithmetic/log1p.py +95 -0
- maxframe/tensor/arithmetic/log2.py +85 -0
- maxframe/tensor/arithmetic/logaddexp.py +80 -0
- maxframe/tensor/arithmetic/logaddexp2.py +78 -0
- maxframe/tensor/arithmetic/logical_and.py +81 -0
- maxframe/tensor/arithmetic/logical_not.py +74 -0
- maxframe/tensor/arithmetic/logical_or.py +82 -0
- maxframe/tensor/arithmetic/logical_xor.py +88 -0
- maxframe/tensor/arithmetic/lshift.py +82 -0
- maxframe/tensor/arithmetic/maximum.py +108 -0
- maxframe/tensor/arithmetic/minimum.py +108 -0
- maxframe/tensor/arithmetic/mod.py +104 -0
- maxframe/tensor/arithmetic/modf.py +83 -0
- maxframe/tensor/arithmetic/multiply.py +81 -0
- maxframe/tensor/arithmetic/nan_to_num.py +99 -0
- maxframe/tensor/arithmetic/negative.py +65 -0
- maxframe/tensor/arithmetic/nextafter.py +68 -0
- maxframe/tensor/arithmetic/not_equal.py +72 -0
- maxframe/tensor/arithmetic/positive.py +47 -0
- maxframe/tensor/arithmetic/power.py +106 -0
- maxframe/tensor/arithmetic/rad2deg.py +71 -0
- maxframe/tensor/arithmetic/radians.py +77 -0
- maxframe/tensor/arithmetic/real.py +70 -0
- maxframe/tensor/arithmetic/reciprocal.py +76 -0
- maxframe/tensor/arithmetic/rint.py +68 -0
- maxframe/tensor/arithmetic/rshift.py +81 -0
- maxframe/tensor/arithmetic/setimag.py +29 -0
- maxframe/tensor/arithmetic/setreal.py +29 -0
- maxframe/tensor/arithmetic/sign.py +81 -0
- maxframe/tensor/arithmetic/signbit.py +65 -0
- maxframe/tensor/arithmetic/sin.py +98 -0
- maxframe/tensor/arithmetic/sinc.py +102 -0
- maxframe/tensor/arithmetic/sinh.py +93 -0
- maxframe/tensor/arithmetic/spacing.py +72 -0
- maxframe/tensor/arithmetic/sqrt.py +81 -0
- maxframe/tensor/arithmetic/square.py +69 -0
- maxframe/tensor/arithmetic/subtract.py +81 -0
- maxframe/tensor/arithmetic/tan.py +88 -0
- maxframe/tensor/arithmetic/tanh.py +92 -0
- maxframe/tensor/arithmetic/tests/__init__.py +15 -0
- maxframe/tensor/arithmetic/tests/test_arithmetic.py +414 -0
- maxframe/tensor/arithmetic/truediv.py +104 -0
- maxframe/tensor/arithmetic/trunc.py +72 -0
- maxframe/tensor/arithmetic/utils.py +65 -0
- maxframe/tensor/array_utils.py +186 -0
- maxframe/tensor/base/__init__.py +34 -0
- maxframe/tensor/base/astype.py +119 -0
- maxframe/tensor/base/atleast_1d.py +74 -0
- maxframe/tensor/base/broadcast_to.py +89 -0
- maxframe/tensor/base/ravel.py +92 -0
- maxframe/tensor/base/tests/__init__.py +13 -0
- maxframe/tensor/base/tests/test_base.py +114 -0
- maxframe/tensor/base/transpose.py +125 -0
- maxframe/tensor/base/unique.py +205 -0
- maxframe/tensor/base/where.py +127 -0
- maxframe/tensor/core.py +724 -0
- maxframe/tensor/datasource/__init__.py +32 -0
- maxframe/tensor/datasource/arange.py +156 -0
- maxframe/tensor/datasource/array.py +415 -0
- maxframe/tensor/datasource/core.py +109 -0
- maxframe/tensor/datasource/empty.py +169 -0
- maxframe/tensor/datasource/from_dataframe.py +70 -0
- maxframe/tensor/datasource/from_dense.py +54 -0
- maxframe/tensor/datasource/from_sparse.py +47 -0
- maxframe/tensor/datasource/full.py +186 -0
- maxframe/tensor/datasource/ones.py +173 -0
- maxframe/tensor/datasource/scalar.py +40 -0
- maxframe/tensor/datasource/tests/__init__.py +13 -0
- maxframe/tensor/datasource/tests/test_datasource.py +278 -0
- maxframe/tensor/datasource/zeros.py +188 -0
- maxframe/tensor/fetch/__init__.py +15 -0
- maxframe/tensor/fetch/core.py +54 -0
- maxframe/tensor/indexing/__init__.py +47 -0
- maxframe/tensor/indexing/choose.py +196 -0
- maxframe/tensor/indexing/compress.py +124 -0
- maxframe/tensor/indexing/core.py +190 -0
- maxframe/tensor/indexing/extract.py +71 -0
- maxframe/tensor/indexing/fill_diagonal.py +183 -0
- maxframe/tensor/indexing/flatnonzero.py +60 -0
- maxframe/tensor/indexing/getitem.py +175 -0
- maxframe/tensor/indexing/nonzero.py +120 -0
- maxframe/tensor/indexing/setitem.py +132 -0
- maxframe/tensor/indexing/slice.py +29 -0
- maxframe/tensor/indexing/take.py +130 -0
- maxframe/tensor/indexing/tests/__init__.py +15 -0
- maxframe/tensor/indexing/tests/test_indexing.py +234 -0
- maxframe/tensor/indexing/unravel_index.py +103 -0
- maxframe/tensor/merge/__init__.py +15 -0
- maxframe/tensor/merge/stack.py +132 -0
- maxframe/tensor/merge/tests/__init__.py +13 -0
- maxframe/tensor/merge/tests/test_merge.py +52 -0
- maxframe/tensor/operators.py +123 -0
- maxframe/tensor/random/__init__.py +168 -0
- maxframe/tensor/random/beta.py +87 -0
- maxframe/tensor/random/binomial.py +137 -0
- maxframe/tensor/random/bytes.py +39 -0
- maxframe/tensor/random/chisquare.py +110 -0
- maxframe/tensor/random/choice.py +186 -0
- maxframe/tensor/random/core.py +234 -0
- maxframe/tensor/random/dirichlet.py +123 -0
- maxframe/tensor/random/exponential.py +94 -0
- maxframe/tensor/random/f.py +135 -0
- maxframe/tensor/random/gamma.py +128 -0
- maxframe/tensor/random/geometric.py +93 -0
- maxframe/tensor/random/gumbel.py +167 -0
- maxframe/tensor/random/hypergeometric.py +148 -0
- maxframe/tensor/random/laplace.py +133 -0
- maxframe/tensor/random/logistic.py +129 -0
- maxframe/tensor/random/lognormal.py +159 -0
- maxframe/tensor/random/logseries.py +122 -0
- maxframe/tensor/random/multinomial.py +133 -0
- maxframe/tensor/random/multivariate_normal.py +192 -0
- maxframe/tensor/random/negative_binomial.py +125 -0
- maxframe/tensor/random/noncentral_chisquare.py +132 -0
- maxframe/tensor/random/noncentral_f.py +126 -0
- maxframe/tensor/random/normal.py +143 -0
- maxframe/tensor/random/pareto.py +140 -0
- maxframe/tensor/random/permutation.py +104 -0
- maxframe/tensor/random/poisson.py +111 -0
- maxframe/tensor/random/power.py +142 -0
- maxframe/tensor/random/rand.py +82 -0
- maxframe/tensor/random/randint.py +121 -0
- maxframe/tensor/random/randn.py +96 -0
- maxframe/tensor/random/random_integers.py +123 -0
- maxframe/tensor/random/random_sample.py +86 -0
- maxframe/tensor/random/rayleigh.py +110 -0
- maxframe/tensor/random/shuffle.py +61 -0
- maxframe/tensor/random/standard_cauchy.py +105 -0
- maxframe/tensor/random/standard_exponential.py +72 -0
- maxframe/tensor/random/standard_gamma.py +120 -0
- maxframe/tensor/random/standard_normal.py +74 -0
- maxframe/tensor/random/standard_t.py +135 -0
- maxframe/tensor/random/tests/__init__.py +15 -0
- maxframe/tensor/random/tests/test_random.py +167 -0
- maxframe/tensor/random/triangular.py +119 -0
- maxframe/tensor/random/uniform.py +131 -0
- maxframe/tensor/random/vonmises.py +131 -0
- maxframe/tensor/random/wald.py +114 -0
- maxframe/tensor/random/weibull.py +140 -0
- maxframe/tensor/random/zipf.py +122 -0
- maxframe/tensor/rechunk/__init__.py +26 -0
- maxframe/tensor/rechunk/rechunk.py +43 -0
- maxframe/tensor/reduction/__init__.py +66 -0
- maxframe/tensor/reduction/all.py +103 -0
- maxframe/tensor/reduction/allclose.py +88 -0
- maxframe/tensor/reduction/any.py +105 -0
- maxframe/tensor/reduction/argmax.py +103 -0
- maxframe/tensor/reduction/argmin.py +103 -0
- maxframe/tensor/reduction/array_equal.py +64 -0
- maxframe/tensor/reduction/core.py +168 -0
- maxframe/tensor/reduction/count_nonzero.py +81 -0
- maxframe/tensor/reduction/cumprod.py +97 -0
- maxframe/tensor/reduction/cumsum.py +101 -0
- maxframe/tensor/reduction/max.py +120 -0
- maxframe/tensor/reduction/mean.py +123 -0
- maxframe/tensor/reduction/min.py +120 -0
- maxframe/tensor/reduction/nanargmax.py +82 -0
- maxframe/tensor/reduction/nanargmin.py +76 -0
- maxframe/tensor/reduction/nancumprod.py +91 -0
- maxframe/tensor/reduction/nancumsum.py +94 -0
- maxframe/tensor/reduction/nanmax.py +111 -0
- maxframe/tensor/reduction/nanmean.py +106 -0
- maxframe/tensor/reduction/nanmin.py +111 -0
- maxframe/tensor/reduction/nanprod.py +94 -0
- maxframe/tensor/reduction/nanstd.py +126 -0
- maxframe/tensor/reduction/nansum.py +115 -0
- maxframe/tensor/reduction/nanvar.py +149 -0
- maxframe/tensor/reduction/prod.py +130 -0
- maxframe/tensor/reduction/std.py +134 -0
- maxframe/tensor/reduction/sum.py +125 -0
- maxframe/tensor/reduction/tests/__init__.py +13 -0
- maxframe/tensor/reduction/tests/test_reduction.py +181 -0
- maxframe/tensor/reduction/var.py +176 -0
- maxframe/tensor/reshape/__init__.py +17 -0
- maxframe/tensor/reshape/reshape.py +188 -0
- maxframe/tensor/reshape/tests/__init__.py +15 -0
- maxframe/tensor/reshape/tests/test_reshape.py +37 -0
- maxframe/tensor/statistics/__init__.py +13 -0
- maxframe/tensor/statistics/percentile.py +175 -0
- maxframe/tensor/statistics/quantile.py +288 -0
- maxframe/tensor/ufunc/__init__.py +26 -0
- maxframe/tensor/ufunc/ufunc.py +200 -0
- maxframe/tensor/utils.py +718 -0
- maxframe/tests/__init__.py +13 -0
- maxframe/tests/test_codegen.py +69 -0
- maxframe/tests/test_protocol.py +144 -0
- maxframe/tests/test_utils.py +376 -0
- maxframe/tests/utils.py +164 -0
- maxframe/typing_.py +37 -0
- maxframe/udf.py +134 -0
- maxframe/utils.py +1114 -0
- maxframe-0.1.0b5.dist-info/METADATA +104 -0
- maxframe-0.1.0b5.dist-info/RECORD +647 -0
- maxframe-0.1.0b5.dist-info/WHEEL +5 -0
- maxframe-0.1.0b5.dist-info/top_level.txt +3 -0
- maxframe_client/__init__.py +17 -0
- maxframe_client/clients/__init__.py +13 -0
- maxframe_client/clients/framedriver.py +118 -0
- maxframe_client/clients/spe.py +104 -0
- maxframe_client/conftest.py +15 -0
- maxframe_client/fetcher.py +264 -0
- maxframe_client/session/__init__.py +22 -0
- maxframe_client/session/consts.py +36 -0
- maxframe_client/session/graph.py +119 -0
- maxframe_client/session/odps.py +482 -0
- maxframe_client/session/task.py +280 -0
- maxframe_client/session/tests/__init__.py +13 -0
- maxframe_client/session/tests/test_task.py +85 -0
- maxframe_client/tests/__init__.py +13 -0
- maxframe_client/tests/test_fetcher.py +89 -0
- maxframe_client/tests/test_session.py +255 -0
|
@@ -0,0 +1,236 @@
|
|
|
1
|
+
# Copyright 1999-2024 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
import os
|
|
16
|
+
import subprocess
|
|
17
|
+
import weakref
|
|
18
|
+
from typing import BinaryIO, Dict, Iterator, List, TextIO, Tuple, Union
|
|
19
|
+
from urllib.parse import urlparse
|
|
20
|
+
|
|
21
|
+
import pyarrow as pa
|
|
22
|
+
from pyarrow.fs import FileInfo, FileSelector
|
|
23
|
+
from pyarrow.fs import FileSystem as ArrowFileSystem
|
|
24
|
+
from pyarrow.fs import FileType
|
|
25
|
+
from pyarrow.fs import HadoopFileSystem as ArrowHadoopFileSystem
|
|
26
|
+
from pyarrow.fs import LocalFileSystem as ArrowLocalFileSystem
|
|
27
|
+
|
|
28
|
+
from ...utils import implements, stringify_path
|
|
29
|
+
from .core import FileSystem, path_type
|
|
30
|
+
|
|
31
|
+
__all__ = ("ArrowBasedLocalFileSystem", "HadoopFileSystem")
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
# When pyarrow.fs.FileSystem gc collected,
|
|
35
|
+
# the underlying connection will be closed,
|
|
36
|
+
# so we hold the reference to make sure
|
|
37
|
+
# FileSystem will not be gc collected before file object
|
|
38
|
+
_file_to_filesystems = weakref.WeakKeyDictionary()
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class ArrowBasedFileSystem(FileSystem):
|
|
42
|
+
"""
|
|
43
|
+
FileSystem implemented with arrow fs API (>=2.0.0).
|
|
44
|
+
"""
|
|
45
|
+
|
|
46
|
+
def __init__(self, arrow_fs: ArrowFileSystem, sequential_read=False):
|
|
47
|
+
self._arrow_fs = arrow_fs
|
|
48
|
+
# for open('rb'), open a sequential reading only or not
|
|
49
|
+
self._sequential_read = sequential_read
|
|
50
|
+
|
|
51
|
+
@staticmethod
|
|
52
|
+
def _process_path(path):
|
|
53
|
+
return stringify_path(path)
|
|
54
|
+
|
|
55
|
+
@implements(FileSystem.cat)
|
|
56
|
+
def cat(self, path: path_type) -> bytes:
|
|
57
|
+
path = self._process_path(path)
|
|
58
|
+
file: pa.NativeFile = self._arrow_fs.open_input_stream(path)
|
|
59
|
+
return file.read()
|
|
60
|
+
|
|
61
|
+
@implements(FileSystem.ls)
|
|
62
|
+
def ls(self, path: path_type) -> List[path_type]:
|
|
63
|
+
path = self._process_path(path)
|
|
64
|
+
file_selector: FileSelector = FileSelector(path)
|
|
65
|
+
paths = []
|
|
66
|
+
for file_info in self._arrow_fs.get_file_info(file_selector):
|
|
67
|
+
paths.append(file_info.path)
|
|
68
|
+
return paths
|
|
69
|
+
|
|
70
|
+
def _get_file_info(self, path: path_type) -> FileInfo:
|
|
71
|
+
path = self._process_path(path)
|
|
72
|
+
file_info: FileInfo = self._arrow_fs.get_file_info([path])[0]
|
|
73
|
+
return file_info
|
|
74
|
+
|
|
75
|
+
@implements(FileSystem.delete)
|
|
76
|
+
def delete(self, path: path_type, recursive: bool = False):
|
|
77
|
+
path = self._process_path(path)
|
|
78
|
+
info = self._get_file_info(path)
|
|
79
|
+
if info.is_file:
|
|
80
|
+
self._arrow_fs.delete_file(path)
|
|
81
|
+
elif info.type == FileType.Directory:
|
|
82
|
+
if not recursive and len(self.ls(path)) > 0:
|
|
83
|
+
raise OSError(f"[Errno 66] Directory not empty: '{path}'")
|
|
84
|
+
self._arrow_fs.delete_dir(path)
|
|
85
|
+
else: # pragma: no cover
|
|
86
|
+
raise TypeError(f"path({path}) to delete must be a file or directory")
|
|
87
|
+
|
|
88
|
+
@implements(FileSystem.rename)
|
|
89
|
+
def rename(self, path: path_type, new_path: path_type):
|
|
90
|
+
path = self._process_path(path)
|
|
91
|
+
new_path = self._process_path(new_path)
|
|
92
|
+
self._arrow_fs.move(path, new_path)
|
|
93
|
+
|
|
94
|
+
@implements(FileSystem.stat)
|
|
95
|
+
def stat(self, path: path_type) -> Dict:
|
|
96
|
+
path = self._process_path(path)
|
|
97
|
+
info = self._get_file_info(path)
|
|
98
|
+
stat = dict(name=path, size=info.size, modified_time=info.mtime_ns / 1e9)
|
|
99
|
+
if info.type == FileType.File:
|
|
100
|
+
stat["type"] = "file"
|
|
101
|
+
elif info.type == FileType.Directory:
|
|
102
|
+
stat["type"] = "directory"
|
|
103
|
+
else: # pragma: no cover
|
|
104
|
+
stat["type"] = "other"
|
|
105
|
+
return stat
|
|
106
|
+
|
|
107
|
+
@implements(FileSystem.mkdir)
|
|
108
|
+
def mkdir(self, path: path_type, create_parents: bool = True):
|
|
109
|
+
path = self._process_path(path)
|
|
110
|
+
self._arrow_fs.create_dir(path, recursive=create_parents)
|
|
111
|
+
|
|
112
|
+
@implements(FileSystem.isdir)
|
|
113
|
+
def isdir(self, path: path_type) -> bool:
|
|
114
|
+
path = self._process_path(path)
|
|
115
|
+
info = self._get_file_info(path)
|
|
116
|
+
return info.type == FileType.Directory
|
|
117
|
+
|
|
118
|
+
@implements(FileSystem.isfile)
|
|
119
|
+
def isfile(self, path: path_type) -> bool:
|
|
120
|
+
path = self._process_path(path)
|
|
121
|
+
info = self._get_file_info(path)
|
|
122
|
+
return info.is_file
|
|
123
|
+
|
|
124
|
+
@implements(FileSystem._isfilestore)
|
|
125
|
+
def _isfilestore(self) -> bool:
|
|
126
|
+
return True
|
|
127
|
+
|
|
128
|
+
@implements(FileSystem.exists)
|
|
129
|
+
def exists(self, path: path_type):
|
|
130
|
+
path = self._process_path(path)
|
|
131
|
+
info = self._get_file_info(path)
|
|
132
|
+
return info.type != FileType.NotFound
|
|
133
|
+
|
|
134
|
+
@implements(FileSystem.open)
|
|
135
|
+
def open(self, path: path_type, mode: str = "rb") -> Union[BinaryIO, TextIO]:
|
|
136
|
+
path = self._process_path(path)
|
|
137
|
+
is_binary = mode.endswith("b")
|
|
138
|
+
if not is_binary: # pragma: no cover
|
|
139
|
+
raise ValueError(
|
|
140
|
+
f"mode can only be binary for arrow based filesystem, got {mode}"
|
|
141
|
+
)
|
|
142
|
+
mode = mode.rstrip("b")
|
|
143
|
+
if mode == "w":
|
|
144
|
+
file = self._arrow_fs.open_output_stream(path)
|
|
145
|
+
elif mode == "r":
|
|
146
|
+
if self._sequential_read: # pragma: no cover
|
|
147
|
+
file = self._arrow_fs.open_input_stream(path)
|
|
148
|
+
else:
|
|
149
|
+
file = self._arrow_fs.open_input_file(path)
|
|
150
|
+
elif mode == "a":
|
|
151
|
+
file = self._arrow_fs.open_append_stream(path)
|
|
152
|
+
else: # pragma: no cover
|
|
153
|
+
raise ValueError(
|
|
154
|
+
f'mode can only be "wb", "rb" and "ab" for '
|
|
155
|
+
f"arrow based filesystem, got {mode}"
|
|
156
|
+
)
|
|
157
|
+
|
|
158
|
+
_file_to_filesystems[file] = self._arrow_fs
|
|
159
|
+
return file
|
|
160
|
+
|
|
161
|
+
@implements(FileSystem.walk)
|
|
162
|
+
def walk(self, path: path_type) -> Iterator[Tuple[str, List[str], List[str]]]:
|
|
163
|
+
path = self._process_path(path)
|
|
164
|
+
q = [path]
|
|
165
|
+
while q:
|
|
166
|
+
curr = q.pop(0)
|
|
167
|
+
file_selector: FileSelector = FileSelector(curr)
|
|
168
|
+
dirs, files = [], []
|
|
169
|
+
for info in self._arrow_fs.get_file_info(file_selector):
|
|
170
|
+
if info.type == FileType.File:
|
|
171
|
+
files.append(info.base_name)
|
|
172
|
+
elif info.type == FileType.Directory:
|
|
173
|
+
dirs.append(info.base_name)
|
|
174
|
+
q.append(info.path)
|
|
175
|
+
else: # pragma: no cover
|
|
176
|
+
continue
|
|
177
|
+
yield curr, dirs, files
|
|
178
|
+
|
|
179
|
+
@implements(FileSystem.glob)
|
|
180
|
+
def glob(self, path: path_type, recursive: bool = False) -> List[path_type]:
|
|
181
|
+
from ._glob import FileSystemGlob
|
|
182
|
+
|
|
183
|
+
path = self._process_path(path)
|
|
184
|
+
return FileSystemGlob(self).glob(path, recursive=recursive)
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
class ArrowBasedLocalFileSystem(ArrowBasedFileSystem):
|
|
188
|
+
def __init__(self):
|
|
189
|
+
super().__init__(ArrowLocalFileSystem())
|
|
190
|
+
|
|
191
|
+
_instance = None
|
|
192
|
+
|
|
193
|
+
@classmethod
|
|
194
|
+
def get_instance(cls):
|
|
195
|
+
if cls._instance is None:
|
|
196
|
+
cls._instance = ArrowBasedLocalFileSystem()
|
|
197
|
+
return cls._instance
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
class HadoopFileSystem(ArrowBasedFileSystem):
|
|
201
|
+
def __init__(
|
|
202
|
+
self,
|
|
203
|
+
host="default",
|
|
204
|
+
port=0,
|
|
205
|
+
user=None,
|
|
206
|
+
kerb_ticket=None,
|
|
207
|
+
driver="libhdfs",
|
|
208
|
+
extra_conf=None,
|
|
209
|
+
):
|
|
210
|
+
assert driver == "libhdfs"
|
|
211
|
+
if "HADOOP_HOME" in os.environ and "CLASSPATH" not in os.environ:
|
|
212
|
+
classpath_proc = subprocess.run(
|
|
213
|
+
[os.environ["HADOOP_HOME"] + "/bin/hdfs", "classpath", "--glob"],
|
|
214
|
+
stdout=subprocess.PIPE,
|
|
215
|
+
)
|
|
216
|
+
os.environ["CLASSPATH"] = classpath_proc.stdout.decode().strip()
|
|
217
|
+
arrow_fs = ArrowHadoopFileSystem(
|
|
218
|
+
host=host,
|
|
219
|
+
port=port,
|
|
220
|
+
user=user,
|
|
221
|
+
kerb_ticket=kerb_ticket,
|
|
222
|
+
extra_conf=extra_conf,
|
|
223
|
+
)
|
|
224
|
+
super().__init__(arrow_fs)
|
|
225
|
+
|
|
226
|
+
@staticmethod
|
|
227
|
+
def _process_path(path):
|
|
228
|
+
path = ArrowBasedFileSystem._process_path(path)
|
|
229
|
+
# use urlparse to extract path from like:
|
|
230
|
+
# hdfs://localhost:8020/tmp/test/simple_test.csv,
|
|
231
|
+
# due to the reason that pa.fs.HadoopFileSystem cannot accept
|
|
232
|
+
# path with hdfs:// prefix
|
|
233
|
+
if path.startswith("hdfs://"):
|
|
234
|
+
return urlparse(path).path
|
|
235
|
+
else:
|
|
236
|
+
return path
|
|
@@ -0,0 +1,263 @@
|
|
|
1
|
+
# Copyright 1999-2024 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
import os
|
|
16
|
+
from abc import ABC, abstractmethod
|
|
17
|
+
from typing import BinaryIO, Dict, Iterator, List, TextIO, Tuple, Union
|
|
18
|
+
from urllib.parse import urlparse
|
|
19
|
+
|
|
20
|
+
from ...utils import stringify_path
|
|
21
|
+
|
|
22
|
+
path_type = Union[str, os.PathLike]
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class FileSystem(ABC):
|
|
26
|
+
"""
|
|
27
|
+
Abstract filesystem interface
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
@abstractmethod
|
|
31
|
+
def cat(self, path: path_type) -> bytes:
|
|
32
|
+
"""
|
|
33
|
+
Return contents of file as a bytes object
|
|
34
|
+
|
|
35
|
+
Parameters
|
|
36
|
+
----------
|
|
37
|
+
path : str or path-like
|
|
38
|
+
File path to read content from.
|
|
39
|
+
|
|
40
|
+
Returns
|
|
41
|
+
-------
|
|
42
|
+
contents : bytes
|
|
43
|
+
"""
|
|
44
|
+
|
|
45
|
+
@abstractmethod
|
|
46
|
+
def ls(self, path: path_type) -> List[path_type]:
|
|
47
|
+
"""
|
|
48
|
+
Return list of file paths
|
|
49
|
+
|
|
50
|
+
Returns
|
|
51
|
+
-------
|
|
52
|
+
paths : list
|
|
53
|
+
"""
|
|
54
|
+
|
|
55
|
+
@abstractmethod
|
|
56
|
+
def delete(self, path: path_type, recursive: bool = False):
|
|
57
|
+
"""
|
|
58
|
+
Delete the indicated file or directory
|
|
59
|
+
|
|
60
|
+
Parameters
|
|
61
|
+
----------
|
|
62
|
+
path : str
|
|
63
|
+
recursive : bool, default False
|
|
64
|
+
If True, also delete child paths for directories
|
|
65
|
+
"""
|
|
66
|
+
|
|
67
|
+
def disk_usage(self, path: path_type) -> int:
|
|
68
|
+
"""
|
|
69
|
+
Compute bytes used by all contents under indicated path in file tree
|
|
70
|
+
|
|
71
|
+
Parameters
|
|
72
|
+
----------
|
|
73
|
+
path : string
|
|
74
|
+
Can be a file path or directory
|
|
75
|
+
|
|
76
|
+
Returns
|
|
77
|
+
-------
|
|
78
|
+
usage : int
|
|
79
|
+
"""
|
|
80
|
+
path = stringify_path(path)
|
|
81
|
+
path_info = self.stat(path)
|
|
82
|
+
if path_info["type"] == "file":
|
|
83
|
+
return path_info["size"]
|
|
84
|
+
|
|
85
|
+
total = 0
|
|
86
|
+
for root, directories, files in self.walk(path):
|
|
87
|
+
for child_path in files:
|
|
88
|
+
abspath = self.path_join(root, child_path)
|
|
89
|
+
total += self.stat(abspath)["size"]
|
|
90
|
+
|
|
91
|
+
return total
|
|
92
|
+
|
|
93
|
+
def path_join(self, *args):
|
|
94
|
+
return self.pathsep.join(args)
|
|
95
|
+
|
|
96
|
+
def path_split(self, path):
|
|
97
|
+
"""
|
|
98
|
+
Split a pathname. Returns tuple "(head, tail)" where "tail" is everything after the final slash. Either part
|
|
99
|
+
may be empty.
|
|
100
|
+
|
|
101
|
+
Parameters
|
|
102
|
+
----------
|
|
103
|
+
path : string
|
|
104
|
+
Can be a file path or directory
|
|
105
|
+
|
|
106
|
+
Returns
|
|
107
|
+
-------
|
|
108
|
+
usage : int
|
|
109
|
+
"""
|
|
110
|
+
splits = path.rsplit(self.pathsep, 1)
|
|
111
|
+
if len(splits) == 1:
|
|
112
|
+
return "", splits[0]
|
|
113
|
+
else:
|
|
114
|
+
return splits
|
|
115
|
+
|
|
116
|
+
@abstractmethod
|
|
117
|
+
def stat(self, path: path_type) -> Dict:
|
|
118
|
+
"""
|
|
119
|
+
Information about a filesystem entry.
|
|
120
|
+
|
|
121
|
+
Returns
|
|
122
|
+
-------
|
|
123
|
+
stat : dict
|
|
124
|
+
"""
|
|
125
|
+
|
|
126
|
+
def rm(self, path: path_type, recursive: bool = False):
|
|
127
|
+
"""
|
|
128
|
+
Alias for FileSystem.delete
|
|
129
|
+
"""
|
|
130
|
+
return self.delete(path, recursive=recursive)
|
|
131
|
+
|
|
132
|
+
def mv(self, path, new_path):
|
|
133
|
+
"""
|
|
134
|
+
Alias for FileSystem.rename
|
|
135
|
+
"""
|
|
136
|
+
return self.rename(path, new_path)
|
|
137
|
+
|
|
138
|
+
@abstractmethod
|
|
139
|
+
def rename(self, path: path_type, new_path: path_type):
|
|
140
|
+
"""
|
|
141
|
+
Rename file, like UNIX mv command
|
|
142
|
+
|
|
143
|
+
Parameters
|
|
144
|
+
----------
|
|
145
|
+
path : string
|
|
146
|
+
Path to alter
|
|
147
|
+
new_path : string
|
|
148
|
+
Path to move to
|
|
149
|
+
"""
|
|
150
|
+
|
|
151
|
+
@abstractmethod
|
|
152
|
+
def mkdir(self, path: path_type, create_parents: bool = True):
|
|
153
|
+
"""
|
|
154
|
+
Create a directory.
|
|
155
|
+
|
|
156
|
+
Parameters
|
|
157
|
+
----------
|
|
158
|
+
path : str
|
|
159
|
+
Path to the directory.
|
|
160
|
+
create_parents : bool, default True
|
|
161
|
+
If the parent directories don't exists create them as well.
|
|
162
|
+
"""
|
|
163
|
+
|
|
164
|
+
@abstractmethod
|
|
165
|
+
def exists(self, path: path_type):
|
|
166
|
+
"""
|
|
167
|
+
Return True if path exists.
|
|
168
|
+
|
|
169
|
+
Parameters
|
|
170
|
+
----------
|
|
171
|
+
path : str
|
|
172
|
+
Path to check.
|
|
173
|
+
"""
|
|
174
|
+
|
|
175
|
+
@abstractmethod
|
|
176
|
+
def isdir(self, path: path_type) -> bool:
|
|
177
|
+
"""
|
|
178
|
+
Return True if path is a directory.
|
|
179
|
+
|
|
180
|
+
Parameters
|
|
181
|
+
----------
|
|
182
|
+
path : str
|
|
183
|
+
Path to check.
|
|
184
|
+
"""
|
|
185
|
+
|
|
186
|
+
@abstractmethod
|
|
187
|
+
def isfile(self, path: path_type) -> bool:
|
|
188
|
+
"""
|
|
189
|
+
Return True if path is a file.
|
|
190
|
+
|
|
191
|
+
Parameters
|
|
192
|
+
----------
|
|
193
|
+
path : str
|
|
194
|
+
Path to check.
|
|
195
|
+
"""
|
|
196
|
+
|
|
197
|
+
@abstractmethod
|
|
198
|
+
def _isfilestore(self) -> bool:
|
|
199
|
+
"""
|
|
200
|
+
Returns True if this FileSystem is a unix-style file store with
|
|
201
|
+
directories.
|
|
202
|
+
"""
|
|
203
|
+
|
|
204
|
+
@abstractmethod
|
|
205
|
+
def open(self, path: path_type, mode: str = "rb") -> Union[BinaryIO, TextIO]:
|
|
206
|
+
"""
|
|
207
|
+
Open file for reading or writing.
|
|
208
|
+
"""
|
|
209
|
+
|
|
210
|
+
@abstractmethod
|
|
211
|
+
def walk(self, path: path_type) -> Iterator[Tuple[str, List[str], List[str]]]:
|
|
212
|
+
"""
|
|
213
|
+
Directory tree generator.
|
|
214
|
+
|
|
215
|
+
Parameters
|
|
216
|
+
----------
|
|
217
|
+
path : str
|
|
218
|
+
|
|
219
|
+
Returns
|
|
220
|
+
-------
|
|
221
|
+
generator
|
|
222
|
+
"""
|
|
223
|
+
|
|
224
|
+
@abstractmethod
|
|
225
|
+
def glob(self, path: path_type, recursive: bool = False) -> List[path_type]:
|
|
226
|
+
"""
|
|
227
|
+
Return a list of paths matching a pathname pattern.
|
|
228
|
+
|
|
229
|
+
Parameters
|
|
230
|
+
----------
|
|
231
|
+
path : str
|
|
232
|
+
Pattern may contain simple shell-style wildcards
|
|
233
|
+
recursive : bool
|
|
234
|
+
If recursive is true, the pattern '**' will match any files and
|
|
235
|
+
zero or more directories and subdirectories.
|
|
236
|
+
|
|
237
|
+
Returns
|
|
238
|
+
-------
|
|
239
|
+
paths : List
|
|
240
|
+
"""
|
|
241
|
+
|
|
242
|
+
@property
|
|
243
|
+
def pathsep(self) -> str:
|
|
244
|
+
return "/"
|
|
245
|
+
|
|
246
|
+
@staticmethod
|
|
247
|
+
def parse_from_path(uri: str):
|
|
248
|
+
parsed_uri = urlparse(uri)
|
|
249
|
+
options = dict()
|
|
250
|
+
options["host"] = parsed_uri.netloc.rsplit("@", 1)[-1].rsplit(":", 1)[0]
|
|
251
|
+
if parsed_uri.port:
|
|
252
|
+
options["port"] = parsed_uri.port
|
|
253
|
+
if parsed_uri.username:
|
|
254
|
+
options["user"] = parsed_uri.username
|
|
255
|
+
if parsed_uri.password:
|
|
256
|
+
options["password"] = parsed_uri.password
|
|
257
|
+
return options
|
|
258
|
+
|
|
259
|
+
@classmethod
|
|
260
|
+
def get_storage_options(cls, storage_options: Dict, uri: str) -> Dict:
|
|
261
|
+
options = cls.parse_from_path(uri)
|
|
262
|
+
storage_options.update(options)
|
|
263
|
+
return storage_options
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
# Copyright 1999-2024 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
import glob as glob_
|
|
16
|
+
import os
|
|
17
|
+
from typing import Dict, List
|
|
18
|
+
from urllib.parse import urlparse
|
|
19
|
+
|
|
20
|
+
from ..compression import compress
|
|
21
|
+
from .base import FileSystem, path_type
|
|
22
|
+
from .local import LocalFileSystem
|
|
23
|
+
from .oss import OSSFileSystem
|
|
24
|
+
|
|
25
|
+
_filesystems = {"file": LocalFileSystem, "oss": OSSFileSystem}
|
|
26
|
+
_scheme_to_dependencies = {
|
|
27
|
+
"hdfs": ["pyarrow"],
|
|
28
|
+
"az": ["fsspec", "adlfs"],
|
|
29
|
+
"abfs": ["fsspec", "adlfs"],
|
|
30
|
+
"s3": ["fsspec", "s3fs"],
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def register_filesystem(name: str, fs):
|
|
35
|
+
_filesystems[name] = fs
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def get_fs(path: path_type, storage_options: Dict = None) -> FileSystem:
|
|
39
|
+
if storage_options is None:
|
|
40
|
+
storage_options = dict()
|
|
41
|
+
|
|
42
|
+
# detect scheme
|
|
43
|
+
if os.path.exists(path) or glob_.glob(path):
|
|
44
|
+
scheme = "file"
|
|
45
|
+
else:
|
|
46
|
+
scheme = urlparse(path).scheme
|
|
47
|
+
if scheme == "" or len(scheme) == 1: # len == 1 for windows
|
|
48
|
+
scheme = "file"
|
|
49
|
+
|
|
50
|
+
if scheme in _filesystems:
|
|
51
|
+
file_system_type = _filesystems[scheme]
|
|
52
|
+
if scheme == "file" or scheme == "oss":
|
|
53
|
+
# local file systems are singletons.
|
|
54
|
+
return file_system_type.get_instance()
|
|
55
|
+
else:
|
|
56
|
+
storage_options = file_system_type.get_storage_options(
|
|
57
|
+
storage_options, path
|
|
58
|
+
)
|
|
59
|
+
return file_system_type(**storage_options)
|
|
60
|
+
elif scheme in _scheme_to_dependencies: # pragma: no cover
|
|
61
|
+
dependencies = ", ".join(_scheme_to_dependencies[scheme])
|
|
62
|
+
raise ImportError(f"Need to install {dependencies} to access {scheme}.")
|
|
63
|
+
else:
|
|
64
|
+
raise ValueError(
|
|
65
|
+
f"Unknown file system type: {scheme}, "
|
|
66
|
+
f'available include: {", ".join(_scheme_to_dependencies.keys())}'
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def glob(path: path_type, storage_options: Dict = None) -> List[path_type]:
|
|
71
|
+
if "*" in path:
|
|
72
|
+
fs = get_fs(path, storage_options)
|
|
73
|
+
return fs.glob(path)
|
|
74
|
+
else:
|
|
75
|
+
return [path]
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def file_size(path: path_type, storage_options: Dict = None) -> int:
|
|
79
|
+
fs = get_fs(path, storage_options)
|
|
80
|
+
return fs.stat(path)["size"]
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def open_file(
|
|
84
|
+
path: path_type,
|
|
85
|
+
mode: str = "rb",
|
|
86
|
+
compression: str = None,
|
|
87
|
+
storage_options: Dict = None,
|
|
88
|
+
):
|
|
89
|
+
fs = get_fs(path, storage_options)
|
|
90
|
+
file = fs.open(path, mode=mode)
|
|
91
|
+
|
|
92
|
+
if compression is not None:
|
|
93
|
+
file = compress(file, compression)
|
|
94
|
+
|
|
95
|
+
return file
|