pyspark-client 4.2.0.dev2__tar.gz → 4.2.0.dev4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pyspark_client-4.2.0.dev2/pyspark_client.egg-info → pyspark_client-4.2.0.dev4}/PKG-INFO +1 -2
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/__init__.py +2 -1
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/_globals.py +3 -7
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/accumulators.py +22 -27
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/conf.py +6 -11
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/daemon.py +55 -52
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/errors/__init__.py +2 -2
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/errors/error-conditions.json +38 -301
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/errors/error_classes.py +1 -4
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/errors/exceptions/base.py +1 -1
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/errors/exceptions/connect.py +88 -35
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/errors/exceptions/tblib.py +1 -3
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/errors/utils.py +4 -4
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/errors_doc_gen.py +1 -1
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/find_spark_home.py +19 -23
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/install.py +54 -10
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/instrumentation_utils.py +0 -1
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/logger/__init__.py +2 -1
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/logger/logger.py +1 -1
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/logger/worker_io.py +7 -4
- pyspark_client-4.2.0.dev4/pyspark/memory_profiler_ext.py +190 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/ml/__init__.py +1 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/ml/base.py +2 -4
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/ml/classification.py +7 -11
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/ml/clustering.py +2 -3
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/ml/connect/__init__.py +5 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/ml/connect/base.py +1 -1
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/ml/connect/functions.py +1 -2
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/ml/connect/io_utils.py +0 -1
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/ml/connect/pipeline.py +1 -4
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/ml/connect/proto.py +0 -4
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/ml/connect/readwrite.py +2 -6
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/ml/connect/serialize.py +0 -4
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/ml/connect/tuning.py +2 -7
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/ml/dl_util.py +2 -4
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/ml/evaluation.py +1 -1
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/ml/feature.py +35 -69
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/ml/fpm.py +2 -1
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/ml/functions.py +7 -7
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/ml/image.py +1 -1
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/ml/linalg/__init__.py +30 -46
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/ml/param/__init__.py +6 -9
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/ml/recommendation.py +1 -1
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/ml/regression.py +4 -4
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/ml/torch/distributor.py +2 -4
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/ml/tree.py +1 -1
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/ml/tuning.py +10 -11
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/ml/util.py +3 -3
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/ml/wrapper.py +0 -1
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/mllib/__init__.py +1 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/mllib/_typing.pyi +1 -1
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/mllib/classification.py +13 -21
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/mllib/clustering.py +11 -25
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/mllib/evaluation.py +1 -1
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/mllib/feature.py +18 -33
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/mllib/fpm.py +1 -1
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/mllib/linalg/__init__.py +25 -41
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/mllib/linalg/distributed.py +2 -3
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/mllib/random.py +1 -2
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/mllib/recommendation.py +1 -2
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/mllib/regression.py +7 -20
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/mllib/stat/_statistics.py +7 -13
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/mllib/stat/distribution.py +0 -1
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/mllib/stat/test.py +0 -1
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/mllib/tree.py +5 -9
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/mllib/util.py +1 -2
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/pandas/_typing.py +0 -1
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/pandas/accessors.py +3 -2
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/pandas/base.py +71 -12
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/pandas/categorical.py +1 -1
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/pandas/config.py +3 -3
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/pandas/correlation.py +6 -11
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/pandas/data_type_ops/base.py +26 -11
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/pandas/data_type_ops/boolean_ops.py +13 -8
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/pandas/data_type_ops/datetime_ops.py +2 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/pandas/data_type_ops/num_ops.py +40 -59
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/pandas/data_type_ops/string_ops.py +23 -6
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/pandas/datetimes.py +29 -6
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/pandas/exceptions.py +5 -3
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/pandas/extensions.py +1 -1
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/pandas/frame.py +612 -169
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/pandas/generic.py +69 -22
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/pandas/groupby.py +185 -57
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/pandas/indexes/base.py +29 -20
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/pandas/indexes/category.py +1 -1
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/pandas/indexes/datetimes.py +39 -26
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/pandas/indexes/multi.py +5 -5
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/pandas/indexes/timedelta.py +20 -8
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/pandas/indexing.py +67 -17
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/pandas/internal.py +57 -30
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/pandas/mlflow.py +2 -1
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/pandas/namespace.py +105 -67
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/pandas/numpy_compat.py +1 -2
- pyspark_client-4.2.0.dev4/pyspark/pandas/plot/__init__.py +17 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/pandas/plot/core.py +8 -9
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/pandas/plot/matplotlib.py +21 -11
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/pandas/plot/plotly.py +36 -11
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/pandas/resample.py +3 -2
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/pandas/series.py +233 -104
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/pandas/spark/accessors.py +2 -1
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/pandas/spark/utils.py +10 -17
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/pandas/sql_formatter.py +2 -4
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/pandas/sql_processor.py +1 -2
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/pandas/strings.py +64 -21
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/pandas/supported_api_gen.py +6 -3
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/pandas/testing.py +1 -0
- {pyspark_client-4.2.0.dev2/pyspark/sql/connect → pyspark_client-4.2.0.dev4/pyspark/pandas/typedef}/__init__.py +1 -1
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/pandas/typedef/typehints.py +68 -24
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/pandas/usage_logging/__init__.py +8 -8
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/pandas/utils.py +52 -38
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/pandas/window.py +2 -2
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/pipelines/api.py +48 -27
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/pipelines/cli.py +4 -3
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/pipelines/spark_connect_pipeline.py +1 -1
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/pipelines/type_error_utils.py +4 -2
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/profiler.py +8 -133
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/rddsampler.py +22 -13
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/resource/__init__.py +1 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/resource/information.py +0 -1
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/resource/profile.py +3 -7
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/resource/requests.py +4 -10
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/resultiterable.py +0 -1
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/serializers.py +4 -15
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/shuffle.py +13 -16
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/avro/functions.py +1 -2
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/catalog.py +414 -45
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/column.py +31 -62
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/conf.py +3 -2
- {pyspark_client-4.2.0.dev2/pyspark/pandas/typedef → pyspark_client-4.2.0.dev4/pyspark/sql/connect}/__init__.py +5 -1
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/connect/_typing.py +4 -9
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/connect/avro/functions.py +1 -4
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/connect/catalog.py +90 -6
- pyspark_client-4.2.0.dev4/pyspark/sql/connect/client/__init__.py +19 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/connect/client/artifact.py +0 -4
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/connect/client/core.py +124 -40
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/connect/client/reattach.py +53 -57
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/connect/column.py +52 -24
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/connect/conf.py +3 -5
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/connect/conversion.py +0 -5
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/connect/dataframe.py +438 -325
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/connect/datasource.py +0 -5
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/connect/expressions.py +4 -12
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/connect/functions/__init__.py +3 -2
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/connect/functions/builtin.py +458 -110
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/connect/functions/partitioning.py +3 -7
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/connect/group.py +21 -15
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/connect/merge.py +3 -6
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/connect/observation.py +14 -8
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/connect/plan.py +201 -23
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/connect/proto/base_pb2.py +118 -117
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/connect/proto/base_pb2.pyi +59 -7
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/connect/proto/base_pb2_grpc.py +1 -0
- pyspark_client-4.2.0.dev4/pyspark/sql/connect/proto/catalog_pb2.py +144 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/connect/proto/catalog_pb2.pyi +395 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/connect/proto/commands_pb2.py +1 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/connect/proto/commands_pb2.pyi +4 -1
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/connect/proto/common_pb2.py +1 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/connect/proto/common_pb2.pyi +1 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/connect/proto/example_plugins_pb2.py +1 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/connect/proto/example_plugins_pb2.pyi +1 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/connect/proto/expressions_pb2.py +1 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/connect/proto/expressions_pb2.pyi +1 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/connect/proto/ml_common_pb2.py +1 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/connect/proto/ml_common_pb2.pyi +1 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/connect/proto/ml_pb2.py +1 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/connect/proto/ml_pb2.pyi +1 -0
- pyspark_client-4.2.0.dev4/pyspark/sql/connect/proto/pipelines_pb2.py +149 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/connect/proto/pipelines_pb2.pyi +165 -4
- pyspark_client-4.2.0.dev4/pyspark/sql/connect/proto/relations_pb2.py +258 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/connect/proto/relations_pb2.pyi +72 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/connect/proto/types_pb2.py +1 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/connect/proto/types_pb2.pyi +1 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/connect/protobuf/functions.py +1 -5
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/connect/readwriter.py +57 -42
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/connect/session.py +29 -21
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/connect/shell/progress.py +1 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/connect/streaming/query.py +3 -7
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/connect/streaming/readwriter.py +86 -50
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/connect/streaming/worker/foreach_batch_worker.py +5 -13
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/connect/streaming/worker/listener_worker.py +4 -12
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/connect/tvf.py +1 -1
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/connect/types.py +0 -4
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/connect/udf.py +14 -8
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/connect/udtf.py +0 -4
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/connect/utils.py +16 -10
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/connect/window.py +1 -5
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/context.py +8 -14
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/conversion.py +547 -150
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/dataframe.py +144 -156
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/datasource.py +82 -14
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/datasource_internal.py +84 -31
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/functions/__init__.py +48 -23
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/functions/builtin.py +1649 -413
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/functions/partitioning.py +3 -2
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/group.py +3 -5
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/internal.py +5 -2
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/merge.py +1 -1
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/metrics.py +4 -9
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/observation.py +7 -3
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/pandas/conversion.py +45 -42
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/pandas/functions.py +1 -1
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/pandas/group_ops.py +2 -4
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/pandas/map_ops.py +1 -1
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/pandas/serializers.py +289 -748
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/pandas/types.py +69 -47
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/pandas/utils.py +10 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/plot/__init__.py +2 -1
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/plot/core.py +5 -5
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/profiler.py +8 -11
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/protobuf/functions.py +3 -4
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/readwriter.py +67 -43
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/session.py +82 -38
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/streaming/__init__.py +3 -3
- pyspark_client-4.2.0.dev4/pyspark/sql/streaming/datasource.py +119 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/streaming/list_state_client.py +5 -7
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/streaming/listener.py +28 -19
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/streaming/map_state_client.py +1 -1
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/streaming/proto/StateMessage_pb2.py +1 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/streaming/proto/StateMessage_pb2.pyi +1 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/streaming/python_streaming_source_runner.py +113 -15
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/streaming/query.py +2 -2
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/streaming/readwriter.py +102 -43
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/streaming/state.py +2 -1
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/streaming/stateful_processor_api_client.py +15 -19
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/streaming/transform_with_state_driver_worker.py +3 -11
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/streaming/value_state_client.py +4 -6
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/table_arg.py +0 -1
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/tvf.py +1 -1
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/types.py +28 -42
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/udf.py +28 -16
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/udtf.py +2 -1
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/utils.py +3 -5
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/variant_utils.py +16 -16
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/worker/analyze_udtf.py +24 -40
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/worker/commit_data_source_write.py +3 -12
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/worker/create_data_source.py +13 -22
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/worker/data_source_pushdown_filters.py +3 -9
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/worker/lookup_data_sources.py +3 -12
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/worker/plan_data_source_read.py +51 -44
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/worker/python_streaming_sink_runner.py +3 -11
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/worker/utils.py +9 -1
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/worker/write_into_data_source.py +18 -30
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/statcounter.py +16 -13
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/storagelevel.py +0 -1
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/streaming/dstream.py +15 -20
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/streaming/kinesis.py +2 -5
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/streaming/listener.py +0 -1
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/streaming/util.py +1 -1
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/taskcontext.py +2 -5
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/testing/connectutils.py +0 -11
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/testing/goldenutils.py +168 -39
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/testing/pandasutils.py +44 -6
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/testing/sqlutils.py +14 -5
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/testing/streamingutils.py +0 -1
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/testing/utils.py +32 -19
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/traceback_utils.py +0 -1
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/util.py +77 -53
- pyspark_client-4.2.0.dev4/pyspark/version.py +1 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/worker.py +734 -596
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/worker_util.py +31 -5
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4/pyspark_client.egg-info}/PKG-INFO +1 -2
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark_client.egg-info/SOURCES.txt +2 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/setup.py +1 -1
- pyspark_client-4.2.0.dev2/pyspark/pandas/plot/__init__.py +0 -17
- pyspark_client-4.2.0.dev2/pyspark/sql/connect/client/__init__.py +0 -23
- pyspark_client-4.2.0.dev2/pyspark/sql/connect/proto/catalog_pb2.py +0 -117
- pyspark_client-4.2.0.dev2/pyspark/sql/connect/proto/pipelines_pb2.py +0 -130
- pyspark_client-4.2.0.dev2/pyspark/sql/connect/proto/relations_pb2.py +0 -251
- pyspark_client-4.2.0.dev2/pyspark/version.py +0 -1
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/MANIFEST.in +0 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/README.md +0 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/_typing.pyi +0 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/cloudpickle/__init__.py +0 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/cloudpickle/cloudpickle.py +0 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/cloudpickle/cloudpickle_fast.py +0 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/errors/exceptions/__init__.py +0 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/errors/exceptions/captured.py +0 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/java_gateway.py +0 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/join.py +0 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/loose_version.py +0 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/ml/_typing.pyi +0 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/ml/common.py +0 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/ml/connect/classification.py +0 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/ml/connect/evaluation.py +0 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/ml/connect/feature.py +0 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/ml/connect/summarizer.py +0 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/ml/connect/util.py +0 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/ml/deepspeed/__init__.py +0 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/ml/deepspeed/deepspeed_distributor.py +0 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/ml/model_cache.py +0 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/ml/param/_shared_params_code_gen.py +0 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/ml/param/shared.py +0 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/ml/pipeline.py +0 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/ml/stat.py +0 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/ml/torch/__init__.py +0 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/ml/torch/data.py +0 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/ml/torch/log_communication.py +0 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/ml/torch/torch_run_process_wrapper.py +0 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/mllib/common.py +0 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/mllib/stat/KernelDensity.py +0 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/mllib/stat/__init__.py +0 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/pandas/__init__.py +0 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/pandas/data_type_ops/__init__.py +0 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/pandas/data_type_ops/binary_ops.py +0 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/pandas/data_type_ops/categorical_ops.py +0 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/pandas/data_type_ops/complex_ops.py +0 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/pandas/data_type_ops/date_ops.py +0 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/pandas/data_type_ops/null_ops.py +0 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/pandas/data_type_ops/timedelta_ops.py +0 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/pandas/data_type_ops/udt_ops.py +0 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/pandas/indexes/__init__.py +0 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/pandas/missing/__init__.py +0 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/pandas/missing/common.py +0 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/pandas/missing/frame.py +0 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/pandas/missing/general_functions.py +0 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/pandas/missing/groupby.py +0 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/pandas/missing/indexes.py +0 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/pandas/missing/resample.py +0 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/pandas/missing/scalars.py +0 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/pandas/missing/series.py +0 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/pandas/missing/window.py +0 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/pandas/spark/__init__.py +0 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/pandas/usage_logging/usage_logger.py +0 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/pipelines/__init__.py +0 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/pipelines/add_pipeline_analysis_context.py +0 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/pipelines/block_session_mutations.py +0 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/pipelines/flow.py +0 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/pipelines/graph_element_registry.py +0 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/pipelines/init_cli.py +0 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/pipelines/logging_utils.py +0 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/pipelines/output.py +0 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/pipelines/source_code_location.py +0 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/pipelines/spark_connect_graph_element_registry.py +0 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/py.typed +0 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/shell.py +0 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/__init__.py +1 -1
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/_typing.pyi +0 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/avro/__init__.py +0 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/connect/avro/__init__.py +0 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/connect/client/retries.py +0 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/connect/logging.py +0 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/connect/profiler.py +0 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/connect/proto/__init__.py +0 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/connect/protobuf/__init__.py +0 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/connect/resource/__init__.py +0 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/connect/resource/profile.py +0 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/connect/shell/__init__.py +0 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/connect/sql_formatter.py +0 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/connect/streaming/__init__.py +0 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/connect/streaming/worker/__init__.py +0 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/connect/table_arg.py +0 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/geo_utils.py +0 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/interchange.py +0 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/pandas/__init__.py +0 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/pandas/_typing/__init__.pyi +0 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/pandas/_typing/protocols/__init__.pyi +0 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/pandas/_typing/protocols/frame.pyi +0 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/pandas/_typing/protocols/series.pyi +0 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/pandas/functions.pyi +0 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/pandas/typehints.py +0 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/plot/plotly.py +0 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/protobuf/__init__.py +0 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/sql_formatter.py +0 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/streaming/proto/__init__.py +0 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/streaming/stateful_processor.py +0 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/streaming/stateful_processor_util.py +0 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/streaming/tws_tester.py +0 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/tvf_argument.py +0 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/window.py +0 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/sql/worker/__init__.py +0 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/streaming/__init__.py +0 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/streaming/context.py +0 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/testing/__init__.py +0 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/testing/mllibutils.py +0 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/testing/mlutils.py +0 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/testing/objects.py +0 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/testing/unittestutils.py +0 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark/threaddump.py +0 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark_client.egg-info/dependency_links.txt +0 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark_client.egg-info/requires.txt +0 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/pyspark_client.egg-info/top_level.txt +0 -0
- {pyspark_client-4.2.0.dev2 → pyspark_client-4.2.0.dev4}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: pyspark-client
|
|
3
|
-
Version: 4.2.0.
|
|
3
|
+
Version: 4.2.0.dev4
|
|
4
4
|
Summary: Python Spark Connect client for Apache Spark
|
|
5
5
|
Home-page: https://github.com/apache/spark/tree/master/python
|
|
6
6
|
Author: Spark Developers
|
|
@@ -13,7 +13,6 @@ Classifier: Programming Language :: Python :: 3.12
|
|
|
13
13
|
Classifier: Programming Language :: Python :: 3.13
|
|
14
14
|
Classifier: Programming Language :: Python :: 3.14
|
|
15
15
|
Classifier: Programming Language :: Python :: Implementation :: CPython
|
|
16
|
-
Classifier: Programming Language :: Python :: Implementation :: PyPy
|
|
17
16
|
Classifier: Typing :: Typed
|
|
18
17
|
Requires-Python: >=3.10
|
|
19
18
|
Description-Content-Type: text/markdown
|
|
@@ -55,7 +55,7 @@ from pyspark.util import is_remote_only
|
|
|
55
55
|
if not is_remote_only():
|
|
56
56
|
from pyspark.core.rdd import RDD, RDDBarrier
|
|
57
57
|
from pyspark.core.files import SparkFiles
|
|
58
|
-
from pyspark.core.status import StatusTracker, SparkJobInfo, SparkStageInfo
|
|
58
|
+
from pyspark.core.status import StatusTracker, SparkJobInfo, SparkStageInfo, SparkExecutorInfo
|
|
59
59
|
from pyspark.core.broadcast import Broadcast
|
|
60
60
|
from pyspark.core import rdd, files, status, broadcast
|
|
61
61
|
|
|
@@ -144,6 +144,7 @@ __all__ = [
|
|
|
144
144
|
"StatusTracker",
|
|
145
145
|
"SparkJobInfo",
|
|
146
146
|
"SparkStageInfo",
|
|
147
|
+
"SparkExecutorInfo",
|
|
147
148
|
"Profiler",
|
|
148
149
|
"BasicProfiler",
|
|
149
150
|
"TaskContext",
|
|
@@ -54,17 +54,13 @@ class _NoValueType:
|
|
|
54
54
|
|
|
55
55
|
__instance = None
|
|
56
56
|
|
|
57
|
-
def __new__(cls):
|
|
57
|
+
def __new__(cls) -> "_NoValueType":
|
|
58
58
|
# ensure that only one instance exists
|
|
59
59
|
if not cls.__instance:
|
|
60
|
-
cls.__instance = super(
|
|
60
|
+
cls.__instance = super().__new__(cls)
|
|
61
61
|
return cls.__instance
|
|
62
62
|
|
|
63
|
-
|
|
64
|
-
def __reduce__(self):
|
|
65
|
-
return (self.__class__, ())
|
|
66
|
-
|
|
67
|
-
def __repr__(self):
|
|
63
|
+
def __repr__(self) -> str:
|
|
68
64
|
return "<no value>"
|
|
69
65
|
|
|
70
66
|
|
|
@@ -27,14 +27,14 @@ from pyspark.serializers import read_int, CPickleSerializer
|
|
|
27
27
|
from pyspark.errors import PySparkRuntimeError
|
|
28
28
|
|
|
29
29
|
if TYPE_CHECKING:
|
|
30
|
-
from pyspark._typing import SupportsIAdd
|
|
31
|
-
|
|
30
|
+
from pyspark._typing import SupportsIAdd
|
|
31
|
+
from socketserver import BaseRequestHandler
|
|
32
32
|
|
|
33
33
|
|
|
34
34
|
__all__ = ["Accumulator", "AccumulatorParam"]
|
|
35
35
|
|
|
36
36
|
T = TypeVar("T")
|
|
37
|
-
U = TypeVar("U", bound="SupportsIAdd")
|
|
37
|
+
U = TypeVar("U", bound=Union["SupportsIAdd", int, float, complex])
|
|
38
38
|
|
|
39
39
|
pickleSer = CPickleSerializer()
|
|
40
40
|
|
|
@@ -63,7 +63,6 @@ class SpecialAccumulatorIds:
|
|
|
63
63
|
|
|
64
64
|
|
|
65
65
|
class Accumulator(Generic[T]):
|
|
66
|
-
|
|
67
66
|
"""
|
|
68
67
|
A shared variable that can be accumulated, i.e., has a commutative and associative "add"
|
|
69
68
|
operation. Worker tasks on a Spark cluster can add values to an Accumulator with the `+=`
|
|
@@ -186,7 +185,6 @@ class Accumulator(Generic[T]):
|
|
|
186
185
|
|
|
187
186
|
|
|
188
187
|
class AccumulatorParam(Generic[T]):
|
|
189
|
-
|
|
190
188
|
"""
|
|
191
189
|
Helper object that defines how to accumulate values of a given type.
|
|
192
190
|
|
|
@@ -229,7 +227,6 @@ class AccumulatorParam(Generic[T]):
|
|
|
229
227
|
|
|
230
228
|
|
|
231
229
|
class AddingAccumulatorParam(AccumulatorParam[U]):
|
|
232
|
-
|
|
233
230
|
"""
|
|
234
231
|
An AccumulatorParam that uses the + operators to add values. Designed for simple types
|
|
235
232
|
such as integers, floats, and lists. Requires the zero value for the underlying type
|
|
@@ -243,27 +240,28 @@ class AddingAccumulatorParam(AccumulatorParam[U]):
|
|
|
243
240
|
return self.zero_value
|
|
244
241
|
|
|
245
242
|
def addInPlace(self, value1: U, value2: U) -> U:
|
|
246
|
-
value1 += value2 # type: ignore[operator]
|
|
243
|
+
value1 += value2 # type: ignore[operator, assignment]
|
|
247
244
|
return value1
|
|
248
245
|
|
|
249
246
|
|
|
250
247
|
# Singleton accumulator params for some standard types
|
|
251
|
-
INT_ACCUMULATOR_PARAM = AddingAccumulatorParam(0)
|
|
252
|
-
FLOAT_ACCUMULATOR_PARAM = AddingAccumulatorParam(0.0)
|
|
253
|
-
COMPLEX_ACCUMULATOR_PARAM = AddingAccumulatorParam(0.0j)
|
|
248
|
+
INT_ACCUMULATOR_PARAM = AddingAccumulatorParam(0)
|
|
249
|
+
FLOAT_ACCUMULATOR_PARAM = AddingAccumulatorParam(0.0)
|
|
250
|
+
COMPLEX_ACCUMULATOR_PARAM = AddingAccumulatorParam(0.0j)
|
|
254
251
|
|
|
255
252
|
|
|
256
253
|
class UpdateRequestHandler(socketserver.StreamRequestHandler):
|
|
257
|
-
|
|
258
254
|
"""
|
|
259
255
|
This handler will keep polling updates from the same socket until the
|
|
260
256
|
server is shutdown.
|
|
261
257
|
"""
|
|
262
258
|
|
|
259
|
+
server: Union["AccumulatorTCPServer", "AccumulatorUnixServer"]
|
|
260
|
+
|
|
263
261
|
def handle(self) -> None:
|
|
264
262
|
from pyspark.accumulators import _accumulatorRegistry
|
|
265
263
|
|
|
266
|
-
auth_token = self.server.auth_token
|
|
264
|
+
auth_token = self.server.auth_token
|
|
267
265
|
|
|
268
266
|
def poll(func: Callable[[], bool]) -> None:
|
|
269
267
|
poller = None
|
|
@@ -273,7 +271,7 @@ class UpdateRequestHandler(socketserver.StreamRequestHandler):
|
|
|
273
271
|
poller = select.poll()
|
|
274
272
|
poller.register(self.rfile, select.POLLIN)
|
|
275
273
|
|
|
276
|
-
while not self.server.server_shutdown:
|
|
274
|
+
while not self.server.server_shutdown:
|
|
277
275
|
# Poll every 1 second for new data -- don't block in case of shutdown.
|
|
278
276
|
if poller is not None:
|
|
279
277
|
r = []
|
|
@@ -299,13 +297,14 @@ class UpdateRequestHandler(socketserver.StreamRequestHandler):
|
|
|
299
297
|
def accum_updates() -> bool:
|
|
300
298
|
num_updates = read_int(self.rfile)
|
|
301
299
|
for _ in range(num_updates):
|
|
302
|
-
|
|
300
|
+
aid, update = pickleSer._read_with_length(self.rfile)
|
|
303
301
|
_accumulatorRegistry[aid] += update
|
|
304
302
|
# Write a byte in acknowledgement
|
|
305
303
|
self.wfile.write(struct.pack("!b", 1))
|
|
306
304
|
return False
|
|
307
305
|
|
|
308
306
|
def authenticate_and_accum_updates() -> bool:
|
|
307
|
+
assert auth_token is not None
|
|
309
308
|
received_token: Union[bytes, str] = self.rfile.read(len(auth_token))
|
|
310
309
|
if isinstance(received_token, bytes):
|
|
311
310
|
received_token = received_token.decode("utf-8")
|
|
@@ -333,7 +332,7 @@ class AccumulatorTCPServer(socketserver.TCPServer):
|
|
|
333
332
|
def __init__(
|
|
334
333
|
self,
|
|
335
334
|
server_address: Tuple[str, int],
|
|
336
|
-
RequestHandlerClass: Type["
|
|
335
|
+
RequestHandlerClass: Type["BaseRequestHandler"],
|
|
337
336
|
auth_token: str,
|
|
338
337
|
):
|
|
339
338
|
super().__init__(server_address, RequestHandlerClass)
|
|
@@ -352,9 +351,7 @@ if hasattr(socketserver, "UnixStreamServer"):
|
|
|
352
351
|
class AccumulatorUnixServer(socketserver.UnixStreamServer):
|
|
353
352
|
server_shutdown = False
|
|
354
353
|
|
|
355
|
-
def __init__(
|
|
356
|
-
self, socket_path: str, RequestHandlerClass: Type[socketserver.BaseRequestHandler]
|
|
357
|
-
):
|
|
354
|
+
def __init__(self, socket_path: str, RequestHandlerClass: Type["BaseRequestHandler"]):
|
|
358
355
|
super().__init__(socket_path, RequestHandlerClass)
|
|
359
356
|
self.auth_token = None
|
|
360
357
|
|
|
@@ -362,15 +359,14 @@ if hasattr(socketserver, "UnixStreamServer"):
|
|
|
362
359
|
self.server_shutdown = True
|
|
363
360
|
super().shutdown()
|
|
364
361
|
self.server_close()
|
|
365
|
-
|
|
366
|
-
|
|
362
|
+
assert isinstance(self.server_address, str)
|
|
363
|
+
if os.path.exists(self.server_address):
|
|
364
|
+
os.remove(self.server_address)
|
|
367
365
|
|
|
368
366
|
else:
|
|
369
367
|
|
|
370
368
|
class AccumulatorUnixServer(socketserver.TCPServer): # type: ignore[no-redef]
|
|
371
|
-
def __init__(
|
|
372
|
-
self, socket_path: str, RequestHandlerClass: Type[socketserver.BaseRequestHandler]
|
|
373
|
-
):
|
|
369
|
+
def __init__(self, socket_path: str, RequestHandlerClass: Type["BaseRequestHandler"]):
|
|
374
370
|
raise NotImplementedError(
|
|
375
371
|
"Unix Domain Sockets are not supported on this platform. "
|
|
376
372
|
"Please disable it by setting spark.python.unix.domain.socket.enabled to false."
|
|
@@ -381,15 +377,14 @@ def _start_update_server(
|
|
|
381
377
|
auth_token: str, is_unix_domain_sock: bool, socket_path: Optional[str] = None
|
|
382
378
|
) -> Union[AccumulatorTCPServer, AccumulatorUnixServer]:
|
|
383
379
|
"""Start a TCP or Unix Domain Socket server for accumulator updates."""
|
|
380
|
+
server: Union[AccumulatorTCPServer, AccumulatorUnixServer]
|
|
384
381
|
if is_unix_domain_sock:
|
|
385
382
|
assert socket_path is not None
|
|
386
383
|
if os.path.exists(socket_path):
|
|
387
384
|
os.remove(socket_path)
|
|
388
385
|
server = AccumulatorUnixServer(socket_path, UpdateRequestHandler)
|
|
389
386
|
else:
|
|
390
|
-
server = AccumulatorTCPServer(
|
|
391
|
-
("localhost", 0), UpdateRequestHandler, auth_token
|
|
392
|
-
) # type: ignore[assignment]
|
|
387
|
+
server = AccumulatorTCPServer(("localhost", 0), UpdateRequestHandler, auth_token)
|
|
393
388
|
|
|
394
389
|
thread = threading.Thread(target=server.serve_forever)
|
|
395
390
|
thread.daemon = True
|
|
@@ -406,7 +401,7 @@ if __name__ == "__main__":
|
|
|
406
401
|
# The small batch size here ensures that we see multiple batches,
|
|
407
402
|
# even in these small test examples:
|
|
408
403
|
globs["sc"] = SparkContext("local", "test")
|
|
409
|
-
|
|
404
|
+
failure_count, test_count = doctest.testmod(globs=globs, optionflags=doctest.ELLIPSIS)
|
|
410
405
|
globs["sc"].stop()
|
|
411
406
|
if failure_count:
|
|
412
407
|
sys.exit(-1)
|
|
@@ -172,12 +172,10 @@ class SparkConf:
|
|
|
172
172
|
return self
|
|
173
173
|
|
|
174
174
|
@overload
|
|
175
|
-
def setExecutorEnv(self, key: str, value: str) -> "SparkConf":
|
|
176
|
-
...
|
|
175
|
+
def setExecutorEnv(self, key: str, value: str) -> "SparkConf": ...
|
|
177
176
|
|
|
178
177
|
@overload
|
|
179
|
-
def setExecutorEnv(self, *, pairs: List[Tuple[str, str]]) -> "SparkConf":
|
|
180
|
-
...
|
|
178
|
+
def setExecutorEnv(self, *, pairs: List[Tuple[str, str]]) -> "SparkConf": ...
|
|
181
179
|
|
|
182
180
|
def setExecutorEnv(
|
|
183
181
|
self,
|
|
@@ -212,16 +210,13 @@ class SparkConf:
|
|
|
212
210
|
return self
|
|
213
211
|
|
|
214
212
|
@overload
|
|
215
|
-
def get(self, key: str) -> Optional[str]:
|
|
216
|
-
...
|
|
213
|
+
def get(self, key: str) -> Optional[str]: ...
|
|
217
214
|
|
|
218
215
|
@overload
|
|
219
|
-
def get(self, key: str, defaultValue: None) -> Optional[str]:
|
|
220
|
-
...
|
|
216
|
+
def get(self, key: str, defaultValue: None) -> Optional[str]: ...
|
|
221
217
|
|
|
222
218
|
@overload
|
|
223
|
-
def get(self, key: str, defaultValue: str) -> str:
|
|
224
|
-
...
|
|
219
|
+
def get(self, key: str, defaultValue: str) -> str: ...
|
|
225
220
|
|
|
226
221
|
def get(self, key: str, defaultValue: Optional[str] = None) -> Optional[str]:
|
|
227
222
|
"""Get the configured value for some key, or return a default otherwise."""
|
|
@@ -273,7 +268,7 @@ class SparkConf:
|
|
|
273
268
|
def _test() -> None:
|
|
274
269
|
import doctest
|
|
275
270
|
|
|
276
|
-
|
|
271
|
+
failure_count, test_count = doctest.testmod(optionflags=doctest.ELLIPSIS)
|
|
277
272
|
if failure_count:
|
|
278
273
|
sys.exit(-1)
|
|
279
274
|
|
|
@@ -15,7 +15,6 @@
|
|
|
15
15
|
# limitations under the License.
|
|
16
16
|
#
|
|
17
17
|
import uuid
|
|
18
|
-
import numbers
|
|
19
18
|
import os
|
|
20
19
|
import signal
|
|
21
20
|
import select
|
|
@@ -28,20 +27,23 @@ import faulthandler
|
|
|
28
27
|
from errno import EINTR, EAGAIN
|
|
29
28
|
from socket import AF_INET, AF_INET6, SOCK_STREAM, SOMAXCONN
|
|
30
29
|
from signal import SIGHUP, SIGTERM, SIGCHLD, SIG_DFL, SIG_IGN, SIGINT
|
|
30
|
+
from types import FrameType
|
|
31
|
+
from typing import Any, Optional
|
|
31
32
|
|
|
32
33
|
from pyspark.serializers import read_int, write_int, write_with_length, UTF8Deserializer
|
|
34
|
+
from pyspark.util import enable_faulthandler
|
|
33
35
|
from pyspark.errors import PySparkRuntimeError
|
|
34
36
|
|
|
35
37
|
|
|
36
|
-
def compute_real_exit_code(exit_code):
|
|
37
|
-
# SystemExit's code can be
|
|
38
|
-
if isinstance(exit_code,
|
|
38
|
+
def compute_real_exit_code(exit_code: Any) -> int:
|
|
39
|
+
# SystemExit's code can be anything, but os._exit only accepts integer
|
|
40
|
+
if isinstance(exit_code, int):
|
|
39
41
|
return exit_code
|
|
40
42
|
else:
|
|
41
43
|
return 1
|
|
42
44
|
|
|
43
45
|
|
|
44
|
-
def worker(sock, authenticated):
|
|
46
|
+
def worker(sock: socket.socket, authenticated: bool) -> int:
|
|
45
47
|
"""
|
|
46
48
|
Called by a worker process after the fork().
|
|
47
49
|
"""
|
|
@@ -108,7 +110,7 @@ def worker(sock, authenticated):
|
|
|
108
110
|
return exit_code
|
|
109
111
|
|
|
110
112
|
|
|
111
|
-
def manager():
|
|
113
|
+
def manager() -> None:
|
|
112
114
|
# Create a new process group to corral our children
|
|
113
115
|
os.setpgid(0, 0)
|
|
114
116
|
|
|
@@ -145,7 +147,7 @@ def manager():
|
|
|
145
147
|
write_int(listen_port, stdout_bin)
|
|
146
148
|
stdout_bin.flush()
|
|
147
149
|
|
|
148
|
-
def shutdown(code):
|
|
150
|
+
def shutdown(code: int) -> None:
|
|
149
151
|
if socket_path is not None and os.path.exists(socket_path):
|
|
150
152
|
os.remove(socket_path)
|
|
151
153
|
signal.signal(SIGTERM, SIG_DFL)
|
|
@@ -153,7 +155,7 @@ def manager():
|
|
|
153
155
|
os.kill(0, SIGHUP)
|
|
154
156
|
sys.exit(code)
|
|
155
157
|
|
|
156
|
-
def handle_sigterm(
|
|
158
|
+
def handle_sigterm(signal_number: int, frame: Optional[FrameType]) -> None:
|
|
157
159
|
shutdown(1)
|
|
158
160
|
|
|
159
161
|
signal.signal(SIGTERM, handle_sigterm) # Gracefully exit on SIGTERM
|
|
@@ -226,53 +228,54 @@ def manager():
|
|
|
226
228
|
|
|
227
229
|
if pid == 0:
|
|
228
230
|
# in child process
|
|
229
|
-
|
|
230
|
-
poller
|
|
231
|
-
|
|
232
|
-
|
|
231
|
+
with enable_faulthandler():
|
|
232
|
+
if poller is not None:
|
|
233
|
+
poller.unregister(0)
|
|
234
|
+
poller.unregister(listen_sock)
|
|
235
|
+
listen_sock.close()
|
|
233
236
|
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
237
|
+
# It should close the standard input in the child process so that
|
|
238
|
+
# Python native function executions stay intact.
|
|
239
|
+
#
|
|
240
|
+
# Note that if we just close the standard input (file descriptor 0),
|
|
241
|
+
# the lowest file descriptor (file descriptor 0) will be allocated,
|
|
242
|
+
# later when other file descriptors should happen to open.
|
|
243
|
+
#
|
|
244
|
+
# Therefore, here we redirects it to '/dev/null' by duplicating
|
|
245
|
+
# another file descriptor for '/dev/null' to the standard input (0).
|
|
246
|
+
# See SPARK-26175.
|
|
247
|
+
devnull = open(os.devnull, "r")
|
|
248
|
+
os.dup2(devnull.fileno(), 0)
|
|
249
|
+
devnull.close()
|
|
247
250
|
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
251
|
+
try:
|
|
252
|
+
# Acknowledge that the fork was successful
|
|
253
|
+
outfile = sock.makefile(mode="wb")
|
|
254
|
+
write_int(os.getpid(), outfile)
|
|
255
|
+
outfile.flush()
|
|
256
|
+
outfile.close()
|
|
257
|
+
authenticated = (
|
|
258
|
+
os.environ.get("PYTHON_UNIX_DOMAIN_ENABLED", "false").lower()
|
|
259
|
+
== "true"
|
|
260
|
+
)
|
|
261
|
+
while True:
|
|
262
|
+
code = worker(sock, authenticated)
|
|
263
|
+
if code == 0:
|
|
264
|
+
authenticated = True
|
|
265
|
+
if not reuse or code:
|
|
266
|
+
# wait for closing
|
|
267
|
+
try:
|
|
268
|
+
while sock.recv(1024):
|
|
269
|
+
pass
|
|
270
|
+
except Exception:
|
|
266
271
|
pass
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
else:
|
|
275
|
-
os._exit(0)
|
|
272
|
+
break
|
|
273
|
+
gc.collect()
|
|
274
|
+
except BaseException:
|
|
275
|
+
traceback.print_exc()
|
|
276
|
+
os._exit(1)
|
|
277
|
+
else:
|
|
278
|
+
os._exit(0)
|
|
276
279
|
else:
|
|
277
280
|
sock.close()
|
|
278
281
|
|
|
@@ -18,7 +18,8 @@
|
|
|
18
18
|
"""
|
|
19
19
|
PySpark exceptions.
|
|
20
20
|
"""
|
|
21
|
-
|
|
21
|
+
|
|
22
|
+
from pyspark.errors.exceptions.base import (
|
|
22
23
|
PySparkException,
|
|
23
24
|
AnalysisException,
|
|
24
25
|
SessionNotSameException,
|
|
@@ -53,7 +54,6 @@ from pyspark.errors.exceptions.base import ( # noqa: F401
|
|
|
53
54
|
PickleException,
|
|
54
55
|
)
|
|
55
56
|
|
|
56
|
-
|
|
57
57
|
__all__ = [
|
|
58
58
|
"PySparkException",
|
|
59
59
|
"AnalysisException",
|