pyspark-client 4.2.0.dev3__tar.gz → 4.2.0.dev4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pyspark_client-4.2.0.dev3/pyspark_client.egg-info → pyspark_client-4.2.0.dev4}/PKG-INFO +1 -2
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/__init__.py +2 -1
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/_globals.py +3 -7
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/accumulators.py +21 -26
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/conf.py +6 -11
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/daemon.py +9 -8
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/errors/error-conditions.json +24 -298
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/errors/error_classes.py +1 -4
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/errors/exceptions/base.py +1 -1
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/errors/exceptions/connect.py +15 -9
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/errors/utils.py +4 -4
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/find_spark_home.py +19 -23
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/install.py +18 -9
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/instrumentation_utils.py +0 -1
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/logger/__init__.py +1 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/logger/logger.py +1 -1
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/logger/worker_io.py +1 -1
- pyspark_client-4.2.0.dev4/pyspark/memory_profiler_ext.py +190 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/ml/__init__.py +1 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/ml/base.py +2 -4
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/ml/classification.py +6 -10
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/ml/clustering.py +2 -3
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/ml/connect/__init__.py +5 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/ml/connect/functions.py +1 -2
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/ml/connect/io_utils.py +0 -1
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/ml/connect/pipeline.py +1 -4
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/ml/connect/proto.py +0 -4
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/ml/connect/readwrite.py +2 -6
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/ml/connect/serialize.py +0 -4
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/ml/connect/tuning.py +2 -7
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/ml/dl_util.py +2 -4
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/ml/evaluation.py +1 -1
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/ml/feature.py +35 -69
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/ml/fpm.py +2 -1
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/ml/functions.py +7 -7
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/ml/image.py +1 -1
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/ml/linalg/__init__.py +29 -45
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/ml/param/__init__.py +6 -9
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/ml/recommendation.py +1 -1
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/ml/regression.py +1 -1
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/ml/torch/distributor.py +2 -4
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/ml/tree.py +1 -1
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/ml/tuning.py +7 -8
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/ml/util.py +3 -3
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/ml/wrapper.py +0 -1
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/mllib/__init__.py +1 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/mllib/classification.py +13 -21
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/mllib/clustering.py +11 -25
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/mllib/evaluation.py +1 -1
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/mllib/feature.py +18 -33
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/mllib/fpm.py +1 -1
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/mllib/linalg/__init__.py +24 -40
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/mllib/linalg/distributed.py +2 -3
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/mllib/random.py +1 -2
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/mllib/recommendation.py +1 -2
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/mllib/regression.py +7 -20
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/mllib/stat/_statistics.py +7 -13
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/mllib/stat/distribution.py +0 -1
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/mllib/stat/test.py +0 -1
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/mllib/tree.py +5 -9
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/mllib/util.py +1 -2
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/pandas/_typing.py +0 -1
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/pandas/accessors.py +2 -1
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/pandas/base.py +68 -9
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/pandas/categorical.py +1 -1
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/pandas/config.py +2 -2
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/pandas/correlation.py +6 -11
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/pandas/data_type_ops/base.py +2 -1
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/pandas/data_type_ops/boolean_ops.py +3 -2
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/pandas/data_type_ops/datetime_ops.py +2 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/pandas/data_type_ops/num_ops.py +9 -34
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/pandas/data_type_ops/string_ops.py +21 -2
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/pandas/datetimes.py +2 -1
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/pandas/exceptions.py +5 -3
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/pandas/extensions.py +1 -1
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/pandas/frame.py +139 -54
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/pandas/generic.py +8 -9
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/pandas/groupby.py +63 -30
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/pandas/indexes/base.py +18 -15
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/pandas/indexes/category.py +1 -1
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/pandas/indexes/datetimes.py +1 -1
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/pandas/indexes/multi.py +3 -4
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/pandas/indexes/timedelta.py +0 -1
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/pandas/indexing.py +13 -5
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/pandas/internal.py +54 -27
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/pandas/mlflow.py +2 -1
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/pandas/namespace.py +19 -10
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/pandas/numpy_compat.py +1 -2
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/pandas/plot/core.py +5 -6
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/pandas/plot/matplotlib.py +11 -7
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/pandas/plot/plotly.py +36 -11
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/pandas/resample.py +3 -2
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/pandas/series.py +140 -74
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/pandas/spark/accessors.py +2 -1
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/pandas/spark/utils.py +10 -17
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/pandas/sql_formatter.py +2 -4
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/pandas/sql_processor.py +1 -2
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/pandas/strings.py +2 -2
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/pandas/supported_api_gen.py +6 -3
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/pandas/testing.py +1 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/pandas/typedef/typehints.py +14 -9
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/pandas/utils.py +50 -36
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/pandas/window.py +1 -1
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/pipelines/api.py +48 -27
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/pipelines/cli.py +4 -3
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/pipelines/spark_connect_pipeline.py +1 -1
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/pipelines/type_error_utils.py +4 -2
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/profiler.py +3 -127
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/rddsampler.py +22 -13
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/resource/__init__.py +1 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/resource/information.py +0 -1
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/resource/profile.py +3 -7
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/resource/requests.py +4 -10
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/resultiterable.py +0 -1
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/serializers.py +4 -15
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/shuffle.py +13 -16
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/avro/functions.py +1 -2
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/catalog.py +414 -45
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/column.py +31 -62
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/conf.py +3 -2
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/connect/__init__.py +4 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/connect/_typing.py +4 -9
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/connect/avro/functions.py +1 -4
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/connect/catalog.py +90 -6
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/connect/client/__init__.py +0 -4
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/connect/client/artifact.py +0 -4
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/connect/client/core.py +6 -9
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/connect/client/reattach.py +3 -6
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/connect/column.py +52 -24
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/connect/conf.py +3 -5
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/connect/conversion.py +0 -5
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/connect/dataframe.py +431 -323
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/connect/datasource.py +0 -5
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/connect/expressions.py +2 -7
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/connect/functions/__init__.py +1 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/connect/functions/builtin.py +141 -93
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/connect/functions/partitioning.py +3 -7
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/connect/group.py +21 -15
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/connect/merge.py +3 -6
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/connect/observation.py +7 -8
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/connect/plan.py +200 -22
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/connect/proto/base_pb2.py +1 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/connect/proto/base_pb2.pyi +19 -6
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/connect/proto/base_pb2_grpc.py +1 -0
- pyspark_client-4.2.0.dev4/pyspark/sql/connect/proto/catalog_pb2.py +144 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/connect/proto/catalog_pb2.pyi +395 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/connect/proto/commands_pb2.py +1 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/connect/proto/commands_pb2.pyi +4 -1
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/connect/proto/common_pb2.py +1 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/connect/proto/common_pb2.pyi +1 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/connect/proto/example_plugins_pb2.py +1 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/connect/proto/example_plugins_pb2.pyi +1 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/connect/proto/expressions_pb2.py +1 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/connect/proto/expressions_pb2.pyi +1 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/connect/proto/ml_common_pb2.py +1 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/connect/proto/ml_common_pb2.pyi +1 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/connect/proto/ml_pb2.py +1 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/connect/proto/ml_pb2.pyi +1 -0
- pyspark_client-4.2.0.dev4/pyspark/sql/connect/proto/pipelines_pb2.py +149 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/connect/proto/pipelines_pb2.pyi +64 -4
- pyspark_client-4.2.0.dev4/pyspark/sql/connect/proto/relations_pb2.py +258 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/connect/proto/relations_pb2.pyi +72 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/connect/proto/types_pb2.py +1 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/connect/proto/types_pb2.pyi +1 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/connect/protobuf/functions.py +1 -5
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/connect/readwriter.py +57 -42
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/connect/session.py +23 -16
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/connect/shell/progress.py +1 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/connect/streaming/query.py +3 -7
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/connect/streaming/readwriter.py +86 -50
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/connect/streaming/worker/foreach_batch_worker.py +2 -1
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/connect/streaming/worker/listener_worker.py +1 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/connect/tvf.py +1 -1
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/connect/types.py +0 -4
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/connect/udf.py +14 -8
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/connect/udtf.py +0 -4
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/connect/utils.py +16 -10
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/connect/window.py +1 -5
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/context.py +7 -13
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/conversion.py +36 -77
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/dataframe.py +87 -151
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/datasource.py +7 -10
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/datasource_internal.py +6 -5
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/functions/builtin.py +130 -117
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/functions/partitioning.py +3 -2
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/group.py +3 -5
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/internal.py +2 -1
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/merge.py +1 -1
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/metrics.py +4 -9
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/observation.py +7 -3
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/pandas/conversion.py +12 -22
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/pandas/functions.py +1 -1
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/pandas/group_ops.py +2 -4
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/pandas/map_ops.py +1 -1
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/pandas/serializers.py +90 -260
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/pandas/types.py +54 -42
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/pandas/utils.py +10 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/plot/__init__.py +1 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/plot/core.py +3 -3
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/profiler.py +8 -11
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/protobuf/functions.py +3 -4
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/readwriter.py +64 -40
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/session.py +76 -33
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/streaming/list_state_client.py +5 -7
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/streaming/listener.py +16 -15
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/streaming/map_state_client.py +1 -1
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/streaming/proto/StateMessage_pb2.py +1 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/streaming/proto/StateMessage_pb2.pyi +1 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/streaming/python_streaming_source_runner.py +0 -1
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/streaming/query.py +1 -1
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/streaming/readwriter.py +102 -43
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/streaming/state.py +2 -1
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/streaming/stateful_processor_api_client.py +15 -19
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/streaming/value_state_client.py +4 -6
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/table_arg.py +0 -1
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/tvf.py +1 -1
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/types.py +27 -30
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/udf.py +27 -15
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/udtf.py +2 -1
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/utils.py +3 -5
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/variant_utils.py +16 -16
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/worker/analyze_udtf.py +21 -28
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/worker/create_data_source.py +10 -11
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/worker/plan_data_source_read.py +32 -33
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/worker/write_into_data_source.py +10 -16
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/statcounter.py +16 -13
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/storagelevel.py +0 -1
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/streaming/dstream.py +15 -20
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/streaming/kinesis.py +2 -5
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/streaming/listener.py +0 -1
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/streaming/util.py +1 -1
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/taskcontext.py +2 -5
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/testing/connectutils.py +0 -1
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/testing/goldenutils.py +27 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/testing/pandasutils.py +0 -1
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/testing/sqlutils.py +14 -1
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/testing/streamingutils.py +0 -1
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/testing/utils.py +24 -15
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/traceback_utils.py +0 -1
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/util.py +32 -33
- pyspark_client-4.2.0.dev4/pyspark/version.py +1 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/worker.py +480 -417
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/worker_util.py +6 -5
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4/pyspark_client.egg-info}/PKG-INFO +1 -2
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark_client.egg-info/SOURCES.txt +1 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/setup.py +1 -1
- pyspark_client-4.2.0.dev3/pyspark/sql/connect/proto/catalog_pb2.py +0 -117
- pyspark_client-4.2.0.dev3/pyspark/sql/connect/proto/pipelines_pb2.py +0 -132
- pyspark_client-4.2.0.dev3/pyspark/sql/connect/proto/relations_pb2.py +0 -251
- pyspark_client-4.2.0.dev3/pyspark/version.py +0 -1
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/MANIFEST.in +0 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/README.md +0 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/_typing.pyi +0 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/cloudpickle/__init__.py +0 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/cloudpickle/cloudpickle.py +0 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/cloudpickle/cloudpickle_fast.py +0 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/errors/__init__.py +1 -1
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/errors/exceptions/__init__.py +0 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/errors/exceptions/captured.py +0 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/errors/exceptions/tblib.py +0 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/errors_doc_gen.py +0 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/java_gateway.py +0 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/join.py +0 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/loose_version.py +0 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/ml/_typing.pyi +0 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/ml/common.py +0 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/ml/connect/base.py +0 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/ml/connect/classification.py +0 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/ml/connect/evaluation.py +0 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/ml/connect/feature.py +0 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/ml/connect/summarizer.py +0 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/ml/connect/util.py +0 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/ml/deepspeed/__init__.py +0 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/ml/deepspeed/deepspeed_distributor.py +0 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/ml/model_cache.py +0 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/ml/param/_shared_params_code_gen.py +0 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/ml/param/shared.py +0 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/ml/pipeline.py +0 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/ml/stat.py +0 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/ml/torch/__init__.py +0 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/ml/torch/data.py +0 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/ml/torch/log_communication.py +0 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/ml/torch/torch_run_process_wrapper.py +0 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/mllib/_typing.pyi +0 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/mllib/common.py +0 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/mllib/stat/KernelDensity.py +0 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/mllib/stat/__init__.py +0 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/pandas/__init__.py +0 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/pandas/data_type_ops/__init__.py +0 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/pandas/data_type_ops/binary_ops.py +0 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/pandas/data_type_ops/categorical_ops.py +0 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/pandas/data_type_ops/complex_ops.py +0 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/pandas/data_type_ops/date_ops.py +0 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/pandas/data_type_ops/null_ops.py +0 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/pandas/data_type_ops/timedelta_ops.py +0 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/pandas/data_type_ops/udt_ops.py +0 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/pandas/indexes/__init__.py +0 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/pandas/missing/__init__.py +0 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/pandas/missing/common.py +0 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/pandas/missing/frame.py +0 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/pandas/missing/general_functions.py +0 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/pandas/missing/groupby.py +0 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/pandas/missing/indexes.py +0 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/pandas/missing/resample.py +0 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/pandas/missing/scalars.py +0 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/pandas/missing/series.py +0 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/pandas/missing/window.py +0 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/pandas/plot/__init__.py +0 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/pandas/spark/__init__.py +0 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/pandas/typedef/__init__.py +0 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/pandas/usage_logging/__init__.py +0 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/pandas/usage_logging/usage_logger.py +0 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/pipelines/__init__.py +0 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/pipelines/add_pipeline_analysis_context.py +0 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/pipelines/block_session_mutations.py +0 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/pipelines/flow.py +0 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/pipelines/graph_element_registry.py +0 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/pipelines/init_cli.py +0 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/pipelines/logging_utils.py +0 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/pipelines/output.py +0 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/pipelines/source_code_location.py +0 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/pipelines/spark_connect_graph_element_registry.py +0 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/py.typed +0 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/shell.py +0 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/__init__.py +1 -1
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/_typing.pyi +0 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/avro/__init__.py +0 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/connect/avro/__init__.py +0 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/connect/client/retries.py +0 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/connect/logging.py +0 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/connect/profiler.py +0 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/connect/proto/__init__.py +0 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/connect/protobuf/__init__.py +0 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/connect/resource/__init__.py +0 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/connect/resource/profile.py +0 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/connect/shell/__init__.py +0 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/connect/sql_formatter.py +0 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/connect/streaming/__init__.py +0 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/connect/streaming/worker/__init__.py +0 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/connect/table_arg.py +0 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/functions/__init__.py +0 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/geo_utils.py +0 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/interchange.py +0 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/pandas/__init__.py +0 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/pandas/_typing/__init__.pyi +0 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/pandas/_typing/protocols/__init__.pyi +0 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/pandas/_typing/protocols/frame.pyi +0 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/pandas/_typing/protocols/series.pyi +0 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/pandas/functions.pyi +0 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/pandas/typehints.py +0 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/plot/plotly.py +0 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/protobuf/__init__.py +0 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/sql_formatter.py +0 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/streaming/__init__.py +0 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/streaming/datasource.py +0 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/streaming/proto/__init__.py +0 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/streaming/stateful_processor.py +0 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/streaming/stateful_processor_util.py +0 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/streaming/transform_with_state_driver_worker.py +0 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/streaming/tws_tester.py +0 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/tvf_argument.py +0 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/window.py +0 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/worker/__init__.py +0 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/worker/commit_data_source_write.py +0 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/worker/data_source_pushdown_filters.py +0 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/worker/lookup_data_sources.py +0 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/worker/python_streaming_sink_runner.py +0 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/sql/worker/utils.py +0 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/streaming/__init__.py +0 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/streaming/context.py +0 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/testing/__init__.py +0 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/testing/mllibutils.py +0 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/testing/mlutils.py +0 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/testing/objects.py +0 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/testing/unittestutils.py +0 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark/threaddump.py +0 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark_client.egg-info/dependency_links.txt +0 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark_client.egg-info/requires.txt +0 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/pyspark_client.egg-info/top_level.txt +0 -0
- {pyspark_client-4.2.0.dev3 → pyspark_client-4.2.0.dev4}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: pyspark-client
|
|
3
|
-
Version: 4.2.0.
|
|
3
|
+
Version: 4.2.0.dev4
|
|
4
4
|
Summary: Python Spark Connect client for Apache Spark
|
|
5
5
|
Home-page: https://github.com/apache/spark/tree/master/python
|
|
6
6
|
Author: Spark Developers
|
|
@@ -13,7 +13,6 @@ Classifier: Programming Language :: Python :: 3.12
|
|
|
13
13
|
Classifier: Programming Language :: Python :: 3.13
|
|
14
14
|
Classifier: Programming Language :: Python :: 3.14
|
|
15
15
|
Classifier: Programming Language :: Python :: Implementation :: CPython
|
|
16
|
-
Classifier: Programming Language :: Python :: Implementation :: PyPy
|
|
17
16
|
Classifier: Typing :: Typed
|
|
18
17
|
Requires-Python: >=3.10
|
|
19
18
|
Description-Content-Type: text/markdown
|
|
@@ -55,7 +55,7 @@ from pyspark.util import is_remote_only
|
|
|
55
55
|
if not is_remote_only():
|
|
56
56
|
from pyspark.core.rdd import RDD, RDDBarrier
|
|
57
57
|
from pyspark.core.files import SparkFiles
|
|
58
|
-
from pyspark.core.status import StatusTracker, SparkJobInfo, SparkStageInfo
|
|
58
|
+
from pyspark.core.status import StatusTracker, SparkJobInfo, SparkStageInfo, SparkExecutorInfo
|
|
59
59
|
from pyspark.core.broadcast import Broadcast
|
|
60
60
|
from pyspark.core import rdd, files, status, broadcast
|
|
61
61
|
|
|
@@ -144,6 +144,7 @@ __all__ = [
|
|
|
144
144
|
"StatusTracker",
|
|
145
145
|
"SparkJobInfo",
|
|
146
146
|
"SparkStageInfo",
|
|
147
|
+
"SparkExecutorInfo",
|
|
147
148
|
"Profiler",
|
|
148
149
|
"BasicProfiler",
|
|
149
150
|
"TaskContext",
|
|
@@ -54,17 +54,13 @@ class _NoValueType:
|
|
|
54
54
|
|
|
55
55
|
__instance = None
|
|
56
56
|
|
|
57
|
-
def __new__(cls):
|
|
57
|
+
def __new__(cls) -> "_NoValueType":
|
|
58
58
|
# ensure that only one instance exists
|
|
59
59
|
if not cls.__instance:
|
|
60
|
-
cls.__instance = super(
|
|
60
|
+
cls.__instance = super().__new__(cls)
|
|
61
61
|
return cls.__instance
|
|
62
62
|
|
|
63
|
-
|
|
64
|
-
def __reduce__(self):
|
|
65
|
-
return (self.__class__, ())
|
|
66
|
-
|
|
67
|
-
def __repr__(self):
|
|
63
|
+
def __repr__(self) -> str:
|
|
68
64
|
return "<no value>"
|
|
69
65
|
|
|
70
66
|
|
|
@@ -28,13 +28,13 @@ from pyspark.errors import PySparkRuntimeError
|
|
|
28
28
|
|
|
29
29
|
if TYPE_CHECKING:
|
|
30
30
|
from pyspark._typing import SupportsIAdd
|
|
31
|
-
|
|
31
|
+
from socketserver import BaseRequestHandler
|
|
32
32
|
|
|
33
33
|
|
|
34
34
|
__all__ = ["Accumulator", "AccumulatorParam"]
|
|
35
35
|
|
|
36
36
|
T = TypeVar("T")
|
|
37
|
-
U = TypeVar("U", bound="SupportsIAdd")
|
|
37
|
+
U = TypeVar("U", bound=Union["SupportsIAdd", int, float, complex])
|
|
38
38
|
|
|
39
39
|
pickleSer = CPickleSerializer()
|
|
40
40
|
|
|
@@ -63,7 +63,6 @@ class SpecialAccumulatorIds:
|
|
|
63
63
|
|
|
64
64
|
|
|
65
65
|
class Accumulator(Generic[T]):
|
|
66
|
-
|
|
67
66
|
"""
|
|
68
67
|
A shared variable that can be accumulated, i.e., has a commutative and associative "add"
|
|
69
68
|
operation. Worker tasks on a Spark cluster can add values to an Accumulator with the `+=`
|
|
@@ -186,7 +185,6 @@ class Accumulator(Generic[T]):
|
|
|
186
185
|
|
|
187
186
|
|
|
188
187
|
class AccumulatorParam(Generic[T]):
|
|
189
|
-
|
|
190
188
|
"""
|
|
191
189
|
Helper object that defines how to accumulate values of a given type.
|
|
192
190
|
|
|
@@ -229,7 +227,6 @@ class AccumulatorParam(Generic[T]):
|
|
|
229
227
|
|
|
230
228
|
|
|
231
229
|
class AddingAccumulatorParam(AccumulatorParam[U]):
|
|
232
|
-
|
|
233
230
|
"""
|
|
234
231
|
An AccumulatorParam that uses the + operators to add values. Designed for simple types
|
|
235
232
|
such as integers, floats, and lists. Requires the zero value for the underlying type
|
|
@@ -243,27 +240,28 @@ class AddingAccumulatorParam(AccumulatorParam[U]):
|
|
|
243
240
|
return self.zero_value
|
|
244
241
|
|
|
245
242
|
def addInPlace(self, value1: U, value2: U) -> U:
|
|
246
|
-
value1 += value2 # type: ignore[operator]
|
|
243
|
+
value1 += value2 # type: ignore[operator, assignment]
|
|
247
244
|
return value1
|
|
248
245
|
|
|
249
246
|
|
|
250
247
|
# Singleton accumulator params for some standard types
|
|
251
|
-
INT_ACCUMULATOR_PARAM = AddingAccumulatorParam(0)
|
|
252
|
-
FLOAT_ACCUMULATOR_PARAM = AddingAccumulatorParam(0.0)
|
|
253
|
-
COMPLEX_ACCUMULATOR_PARAM = AddingAccumulatorParam(0.0j)
|
|
248
|
+
INT_ACCUMULATOR_PARAM = AddingAccumulatorParam(0)
|
|
249
|
+
FLOAT_ACCUMULATOR_PARAM = AddingAccumulatorParam(0.0)
|
|
250
|
+
COMPLEX_ACCUMULATOR_PARAM = AddingAccumulatorParam(0.0j)
|
|
254
251
|
|
|
255
252
|
|
|
256
253
|
class UpdateRequestHandler(socketserver.StreamRequestHandler):
|
|
257
|
-
|
|
258
254
|
"""
|
|
259
255
|
This handler will keep polling updates from the same socket until the
|
|
260
256
|
server is shutdown.
|
|
261
257
|
"""
|
|
262
258
|
|
|
259
|
+
server: Union["AccumulatorTCPServer", "AccumulatorUnixServer"]
|
|
260
|
+
|
|
263
261
|
def handle(self) -> None:
|
|
264
262
|
from pyspark.accumulators import _accumulatorRegistry
|
|
265
263
|
|
|
266
|
-
auth_token = self.server.auth_token
|
|
264
|
+
auth_token = self.server.auth_token
|
|
267
265
|
|
|
268
266
|
def poll(func: Callable[[], bool]) -> None:
|
|
269
267
|
poller = None
|
|
@@ -273,7 +271,7 @@ class UpdateRequestHandler(socketserver.StreamRequestHandler):
|
|
|
273
271
|
poller = select.poll()
|
|
274
272
|
poller.register(self.rfile, select.POLLIN)
|
|
275
273
|
|
|
276
|
-
while not self.server.server_shutdown:
|
|
274
|
+
while not self.server.server_shutdown:
|
|
277
275
|
# Poll every 1 second for new data -- don't block in case of shutdown.
|
|
278
276
|
if poller is not None:
|
|
279
277
|
r = []
|
|
@@ -299,13 +297,14 @@ class UpdateRequestHandler(socketserver.StreamRequestHandler):
|
|
|
299
297
|
def accum_updates() -> bool:
|
|
300
298
|
num_updates = read_int(self.rfile)
|
|
301
299
|
for _ in range(num_updates):
|
|
302
|
-
|
|
300
|
+
aid, update = pickleSer._read_with_length(self.rfile)
|
|
303
301
|
_accumulatorRegistry[aid] += update
|
|
304
302
|
# Write a byte in acknowledgement
|
|
305
303
|
self.wfile.write(struct.pack("!b", 1))
|
|
306
304
|
return False
|
|
307
305
|
|
|
308
306
|
def authenticate_and_accum_updates() -> bool:
|
|
307
|
+
assert auth_token is not None
|
|
309
308
|
received_token: Union[bytes, str] = self.rfile.read(len(auth_token))
|
|
310
309
|
if isinstance(received_token, bytes):
|
|
311
310
|
received_token = received_token.decode("utf-8")
|
|
@@ -333,7 +332,7 @@ class AccumulatorTCPServer(socketserver.TCPServer):
|
|
|
333
332
|
def __init__(
|
|
334
333
|
self,
|
|
335
334
|
server_address: Tuple[str, int],
|
|
336
|
-
RequestHandlerClass: Type["
|
|
335
|
+
RequestHandlerClass: Type["BaseRequestHandler"],
|
|
337
336
|
auth_token: str,
|
|
338
337
|
):
|
|
339
338
|
super().__init__(server_address, RequestHandlerClass)
|
|
@@ -352,9 +351,7 @@ if hasattr(socketserver, "UnixStreamServer"):
|
|
|
352
351
|
class AccumulatorUnixServer(socketserver.UnixStreamServer):
|
|
353
352
|
server_shutdown = False
|
|
354
353
|
|
|
355
|
-
def __init__(
|
|
356
|
-
self, socket_path: str, RequestHandlerClass: Type[socketserver.BaseRequestHandler]
|
|
357
|
-
):
|
|
354
|
+
def __init__(self, socket_path: str, RequestHandlerClass: Type["BaseRequestHandler"]):
|
|
358
355
|
super().__init__(socket_path, RequestHandlerClass)
|
|
359
356
|
self.auth_token = None
|
|
360
357
|
|
|
@@ -362,15 +359,14 @@ if hasattr(socketserver, "UnixStreamServer"):
|
|
|
362
359
|
self.server_shutdown = True
|
|
363
360
|
super().shutdown()
|
|
364
361
|
self.server_close()
|
|
365
|
-
|
|
366
|
-
|
|
362
|
+
assert isinstance(self.server_address, str)
|
|
363
|
+
if os.path.exists(self.server_address):
|
|
364
|
+
os.remove(self.server_address)
|
|
367
365
|
|
|
368
366
|
else:
|
|
369
367
|
|
|
370
368
|
class AccumulatorUnixServer(socketserver.TCPServer): # type: ignore[no-redef]
|
|
371
|
-
def __init__(
|
|
372
|
-
self, socket_path: str, RequestHandlerClass: Type[socketserver.BaseRequestHandler]
|
|
373
|
-
):
|
|
369
|
+
def __init__(self, socket_path: str, RequestHandlerClass: Type["BaseRequestHandler"]):
|
|
374
370
|
raise NotImplementedError(
|
|
375
371
|
"Unix Domain Sockets are not supported on this platform. "
|
|
376
372
|
"Please disable it by setting spark.python.unix.domain.socket.enabled to false."
|
|
@@ -381,15 +377,14 @@ def _start_update_server(
|
|
|
381
377
|
auth_token: str, is_unix_domain_sock: bool, socket_path: Optional[str] = None
|
|
382
378
|
) -> Union[AccumulatorTCPServer, AccumulatorUnixServer]:
|
|
383
379
|
"""Start a TCP or Unix Domain Socket server for accumulator updates."""
|
|
380
|
+
server: Union[AccumulatorTCPServer, AccumulatorUnixServer]
|
|
384
381
|
if is_unix_domain_sock:
|
|
385
382
|
assert socket_path is not None
|
|
386
383
|
if os.path.exists(socket_path):
|
|
387
384
|
os.remove(socket_path)
|
|
388
385
|
server = AccumulatorUnixServer(socket_path, UpdateRequestHandler)
|
|
389
386
|
else:
|
|
390
|
-
server = AccumulatorTCPServer(
|
|
391
|
-
("localhost", 0), UpdateRequestHandler, auth_token
|
|
392
|
-
) # type: ignore[assignment]
|
|
387
|
+
server = AccumulatorTCPServer(("localhost", 0), UpdateRequestHandler, auth_token)
|
|
393
388
|
|
|
394
389
|
thread = threading.Thread(target=server.serve_forever)
|
|
395
390
|
thread.daemon = True
|
|
@@ -406,7 +401,7 @@ if __name__ == "__main__":
|
|
|
406
401
|
# The small batch size here ensures that we see multiple batches,
|
|
407
402
|
# even in these small test examples:
|
|
408
403
|
globs["sc"] = SparkContext("local", "test")
|
|
409
|
-
|
|
404
|
+
failure_count, test_count = doctest.testmod(globs=globs, optionflags=doctest.ELLIPSIS)
|
|
410
405
|
globs["sc"].stop()
|
|
411
406
|
if failure_count:
|
|
412
407
|
sys.exit(-1)
|
|
@@ -172,12 +172,10 @@ class SparkConf:
|
|
|
172
172
|
return self
|
|
173
173
|
|
|
174
174
|
@overload
|
|
175
|
-
def setExecutorEnv(self, key: str, value: str) -> "SparkConf":
|
|
176
|
-
...
|
|
175
|
+
def setExecutorEnv(self, key: str, value: str) -> "SparkConf": ...
|
|
177
176
|
|
|
178
177
|
@overload
|
|
179
|
-
def setExecutorEnv(self, *, pairs: List[Tuple[str, str]]) -> "SparkConf":
|
|
180
|
-
...
|
|
178
|
+
def setExecutorEnv(self, *, pairs: List[Tuple[str, str]]) -> "SparkConf": ...
|
|
181
179
|
|
|
182
180
|
def setExecutorEnv(
|
|
183
181
|
self,
|
|
@@ -212,16 +210,13 @@ class SparkConf:
|
|
|
212
210
|
return self
|
|
213
211
|
|
|
214
212
|
@overload
|
|
215
|
-
def get(self, key: str) -> Optional[str]:
|
|
216
|
-
...
|
|
213
|
+
def get(self, key: str) -> Optional[str]: ...
|
|
217
214
|
|
|
218
215
|
@overload
|
|
219
|
-
def get(self, key: str, defaultValue: None) -> Optional[str]:
|
|
220
|
-
...
|
|
216
|
+
def get(self, key: str, defaultValue: None) -> Optional[str]: ...
|
|
221
217
|
|
|
222
218
|
@overload
|
|
223
|
-
def get(self, key: str, defaultValue: str) -> str:
|
|
224
|
-
...
|
|
219
|
+
def get(self, key: str, defaultValue: str) -> str: ...
|
|
225
220
|
|
|
226
221
|
def get(self, key: str, defaultValue: Optional[str] = None) -> Optional[str]:
|
|
227
222
|
"""Get the configured value for some key, or return a default otherwise."""
|
|
@@ -273,7 +268,7 @@ class SparkConf:
|
|
|
273
268
|
def _test() -> None:
|
|
274
269
|
import doctest
|
|
275
270
|
|
|
276
|
-
|
|
271
|
+
failure_count, test_count = doctest.testmod(optionflags=doctest.ELLIPSIS)
|
|
277
272
|
if failure_count:
|
|
278
273
|
sys.exit(-1)
|
|
279
274
|
|
|
@@ -15,7 +15,6 @@
|
|
|
15
15
|
# limitations under the License.
|
|
16
16
|
#
|
|
17
17
|
import uuid
|
|
18
|
-
import numbers
|
|
19
18
|
import os
|
|
20
19
|
import signal
|
|
21
20
|
import select
|
|
@@ -28,21 +27,23 @@ import faulthandler
|
|
|
28
27
|
from errno import EINTR, EAGAIN
|
|
29
28
|
from socket import AF_INET, AF_INET6, SOCK_STREAM, SOMAXCONN
|
|
30
29
|
from signal import SIGHUP, SIGTERM, SIGCHLD, SIG_DFL, SIG_IGN, SIGINT
|
|
30
|
+
from types import FrameType
|
|
31
|
+
from typing import Any, Optional
|
|
31
32
|
|
|
32
33
|
from pyspark.serializers import read_int, write_int, write_with_length, UTF8Deserializer
|
|
33
34
|
from pyspark.util import enable_faulthandler
|
|
34
35
|
from pyspark.errors import PySparkRuntimeError
|
|
35
36
|
|
|
36
37
|
|
|
37
|
-
def compute_real_exit_code(exit_code):
|
|
38
|
-
# SystemExit's code can be
|
|
39
|
-
if isinstance(exit_code,
|
|
38
|
+
def compute_real_exit_code(exit_code: Any) -> int:
|
|
39
|
+
# SystemExit's code can be anything, but os._exit only accepts integer
|
|
40
|
+
if isinstance(exit_code, int):
|
|
40
41
|
return exit_code
|
|
41
42
|
else:
|
|
42
43
|
return 1
|
|
43
44
|
|
|
44
45
|
|
|
45
|
-
def worker(sock, authenticated):
|
|
46
|
+
def worker(sock: socket.socket, authenticated: bool) -> int:
|
|
46
47
|
"""
|
|
47
48
|
Called by a worker process after the fork().
|
|
48
49
|
"""
|
|
@@ -109,7 +110,7 @@ def worker(sock, authenticated):
|
|
|
109
110
|
return exit_code
|
|
110
111
|
|
|
111
112
|
|
|
112
|
-
def manager():
|
|
113
|
+
def manager() -> None:
|
|
113
114
|
# Create a new process group to corral our children
|
|
114
115
|
os.setpgid(0, 0)
|
|
115
116
|
|
|
@@ -146,7 +147,7 @@ def manager():
|
|
|
146
147
|
write_int(listen_port, stdout_bin)
|
|
147
148
|
stdout_bin.flush()
|
|
148
149
|
|
|
149
|
-
def shutdown(code):
|
|
150
|
+
def shutdown(code: int) -> None:
|
|
150
151
|
if socket_path is not None and os.path.exists(socket_path):
|
|
151
152
|
os.remove(socket_path)
|
|
152
153
|
signal.signal(SIGTERM, SIG_DFL)
|
|
@@ -154,7 +155,7 @@ def manager():
|
|
|
154
155
|
os.kill(0, SIGHUP)
|
|
155
156
|
sys.exit(code)
|
|
156
157
|
|
|
157
|
-
def handle_sigterm(
|
|
158
|
+
def handle_sigterm(signal_number: int, frame: Optional[FrameType]) -> None:
|
|
158
159
|
shutdown(1)
|
|
159
160
|
|
|
160
161
|
signal.signal(SIGTERM, handle_sigterm) # Gracefully exit on SIGTERM
|