pyspark-client 4.2.0.dev1__tar.gz → 4.2.0.dev3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pyspark_client-4.2.0.dev1/pyspark_client.egg-info → pyspark_client-4.2.0.dev3}/PKG-INFO +2 -2
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/_typing.pyi +3 -3
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/accumulators.py +1 -1
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/daemon.py +46 -44
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/errors/__init__.py +1 -1
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/errors/error-conditions.json +25 -3
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/errors/exceptions/captured.py +18 -19
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/errors/exceptions/connect.py +79 -32
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/errors/exceptions/tblib.py +1 -3
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/errors/utils.py +1 -1
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/errors_doc_gen.py +1 -1
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/install.py +37 -2
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/instrumentation_utils.py +1 -1
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/logger/__init__.py +1 -1
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/logger/worker_io.py +6 -3
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/ml/_typing.pyi +5 -3
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/ml/classification.py +1 -1
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/ml/connect/base.py +1 -1
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/ml/functions.py +1 -1
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/ml/linalg/__init__.py +28 -23
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/ml/regression.py +3 -3
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/ml/tuning.py +3 -3
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/mllib/_typing.pyi +7 -3
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/mllib/linalg/__init__.py +27 -23
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/mllib/linalg/distributed.py +1 -1
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/mllib/regression.py +1 -1
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/mllib/stat/_statistics.py +2 -1
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/mllib/util.py +1 -5
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pandas/accessors.py +2 -2
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pandas/base.py +4 -4
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pandas/categorical.py +1 -1
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pandas/config.py +1 -1
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pandas/data_type_ops/base.py +26 -12
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pandas/data_type_ops/boolean_ops.py +12 -8
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pandas/data_type_ops/categorical_ops.py +3 -3
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pandas/data_type_ops/datetime_ops.py +8 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pandas/data_type_ops/null_ops.py +1 -1
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pandas/data_type_ops/num_ops.py +32 -26
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pandas/data_type_ops/string_ops.py +3 -5
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pandas/data_type_ops/timedelta_ops.py +8 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pandas/datetimes.py +31 -9
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pandas/frame.py +651 -237
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pandas/generic.py +67 -19
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pandas/groupby.py +139 -33
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pandas/indexes/base.py +18 -14
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pandas/indexes/category.py +1 -1
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pandas/indexes/datetimes.py +43 -30
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pandas/indexes/multi.py +3 -2
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pandas/indexes/timedelta.py +21 -8
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pandas/indexing.py +58 -16
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pandas/internal.py +3 -3
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pandas/namespace.py +102 -66
- pyspark_client-4.2.0.dev3/pyspark/pandas/plot/__init__.py +17 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pandas/plot/core.py +4 -4
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pandas/plot/matplotlib.py +10 -4
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pandas/resample.py +10 -7
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pandas/series.py +127 -59
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pandas/strings.py +62 -19
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pandas/testing.py +7 -7
- pyspark_client-4.2.0.dev3/pyspark/pandas/typedef/__init__.py +18 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pandas/typedef/typehints.py +90 -22
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pandas/usage_logging/__init__.py +8 -8
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pandas/utils.py +9 -8
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pandas/window.py +1 -1
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/profiler.py +22 -6
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/_typing.pyi +1 -1
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/client/__init__.py +1 -1
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/client/artifact.py +12 -2
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/client/core.py +184 -34
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/client/reattach.py +53 -52
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/dataframe.py +27 -14
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/expressions.py +2 -5
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/functions/__init__.py +2 -2
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/functions/builtin.py +425 -66
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/observation.py +7 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/plan.py +8 -2
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/proto/base_pb2.py +119 -109
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/proto/base_pb2.pyi +307 -1
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/proto/base_pb2_grpc.py +47 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/proto/catalog_pb2.py +2 -2
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/proto/commands_pb2.py +72 -72
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/proto/commands_pb2.pyi +12 -1
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/proto/common_pb2.py +2 -2
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/proto/example_plugins_pb2.py +2 -2
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/proto/expressions_pb2.py +2 -2
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/proto/ml_common_pb2.py +2 -2
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/proto/ml_pb2.py +2 -2
- pyspark_client-4.2.0.dev3/pyspark/sql/connect/proto/pipelines_pb2.py +132 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/proto/pipelines_pb2.pyi +101 -0
- pyspark_client-4.2.0.dev3/pyspark/sql/connect/proto/relations_pb2.py +251 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/proto/relations_pb2.pyi +19 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/proto/types_pb2.py +2 -2
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/session.py +54 -42
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/streaming/readwriter.py +41 -3
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/streaming/worker/foreach_batch_worker.py +3 -12
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/streaming/worker/listener_worker.py +3 -12
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/context.py +15 -2
- pyspark_client-4.2.0.dev3/pyspark/sql/conversion.py +1823 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/dataframe.py +98 -25
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/datasource.py +75 -4
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/datasource_internal.py +82 -30
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/functions/__init__.py +51 -23
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/functions/builtin.py +1588 -215
- pyspark_client-4.2.0.dev3/pyspark/sql/interchange.py +89 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/internal.py +3 -1
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/pandas/conversion.py +161 -36
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/pandas/serializers.py +525 -838
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/pandas/types.py +126 -44
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/plot/__init__.py +1 -1
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/plot/core.py +1 -1
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/profiler.py +157 -34
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/readwriter.py +3 -3
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/session.py +63 -22
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/streaming/__init__.py +5 -3
- pyspark_client-4.2.0.dev3/pyspark/sql/streaming/datasource.py +119 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/streaming/listener.py +15 -7
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/streaming/proto/StateMessage_pb2.py +2 -2
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/streaming/python_streaming_source_runner.py +114 -15
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/streaming/query.py +57 -1
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/streaming/readwriter.py +70 -3
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/streaming/transform_with_state_driver_worker.py +3 -11
- pyspark_client-4.2.0.dev3/pyspark/sql/streaming/tws_tester.py +689 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/types.py +1 -12
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/udf.py +1 -1
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/utils.py +12 -1
- pyspark_client-4.2.0.dev3/pyspark/sql/worker/analyze_udtf.py +241 -0
- pyspark_client-4.2.0.dev3/pyspark/sql/worker/commit_data_source_write.py +80 -0
- pyspark_client-4.2.0.dev3/pyspark/sql/worker/create_data_source.py +149 -0
- pyspark_client-4.2.0.dev3/pyspark/sql/worker/data_source_pushdown_filters.py +229 -0
- pyspark_client-4.2.0.dev3/pyspark/sql/worker/lookup_data_sources.py +62 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/worker/plan_data_source_read.py +104 -132
- pyspark_client-4.2.0.dev3/pyspark/sql/worker/python_streaming_sink_runner.py +116 -0
- pyspark_client-4.2.0.dev1/pyspark/sql/worker/lookup_data_sources.py → pyspark_client-4.2.0.dev3/pyspark/sql/worker/utils.py +39 -47
- pyspark_client-4.2.0.dev3/pyspark/sql/worker/write_into_data_source.py +246 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/taskcontext.py +44 -4
- {pyspark_client-4.2.0.dev1/pyspark/pandas/typedef → pyspark_client-4.2.0.dev3/pyspark/testing}/__init__.py +5 -2
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/testing/connectutils.py +0 -10
- pyspark_client-4.2.0.dev3/pyspark/testing/goldenutils.py +356 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/testing/pandasutils.py +59 -6
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/testing/sqlutils.py +2 -6
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/testing/streamingutils.py +2 -2
- pyspark_client-4.2.0.dev3/pyspark/testing/unittestutils.py +55 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/testing/utils.py +56 -41
- pyspark_client-4.2.0.dev3/pyspark/threaddump.py +62 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/util.py +67 -18
- pyspark_client-4.2.0.dev3/pyspark/version.py +1 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/worker.py +392 -459
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/worker_util.py +58 -1
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3/pyspark_client.egg-info}/PKG-INFO +2 -2
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark_client.egg-info/SOURCES.txt +7 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark_client.egg-info/requires.txt +1 -1
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/setup.py +1 -1
- pyspark_client-4.2.0.dev1/pyspark/pandas/plot/__init__.py +0 -17
- pyspark_client-4.2.0.dev1/pyspark/sql/connect/proto/pipelines_pb2.py +0 -130
- pyspark_client-4.2.0.dev1/pyspark/sql/connect/proto/relations_pb2.py +0 -251
- pyspark_client-4.2.0.dev1/pyspark/sql/conversion.py +0 -847
- pyspark_client-4.2.0.dev1/pyspark/sql/worker/analyze_udtf.py +0 -288
- pyspark_client-4.2.0.dev1/pyspark/sql/worker/commit_data_source_write.py +0 -127
- pyspark_client-4.2.0.dev1/pyspark/sql/worker/create_data_source.py +0 -193
- pyspark_client-4.2.0.dev1/pyspark/sql/worker/data_source_pushdown_filters.py +0 -277
- pyspark_client-4.2.0.dev1/pyspark/sql/worker/python_streaming_sink_runner.py +0 -159
- pyspark_client-4.2.0.dev1/pyspark/sql/worker/write_into_data_source.py +0 -285
- pyspark_client-4.2.0.dev1/pyspark/testing/__init__.py +0 -47
- pyspark_client-4.2.0.dev1/pyspark/version.py +0 -1
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/MANIFEST.in +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/README.md +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/__init__.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/_globals.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/cloudpickle/__init__.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/cloudpickle/cloudpickle.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/cloudpickle/cloudpickle_fast.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/conf.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/errors/error_classes.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/errors/exceptions/__init__.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/errors/exceptions/base.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/find_spark_home.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/java_gateway.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/join.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/logger/logger.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/loose_version.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/ml/__init__.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/ml/base.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/ml/clustering.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/ml/common.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/ml/connect/__init__.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/ml/connect/classification.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/ml/connect/evaluation.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/ml/connect/feature.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/ml/connect/functions.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/ml/connect/io_utils.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/ml/connect/pipeline.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/ml/connect/proto.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/ml/connect/readwrite.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/ml/connect/serialize.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/ml/connect/summarizer.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/ml/connect/tuning.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/ml/connect/util.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/ml/deepspeed/__init__.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/ml/deepspeed/deepspeed_distributor.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/ml/dl_util.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/ml/evaluation.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/ml/feature.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/ml/fpm.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/ml/image.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/ml/model_cache.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/ml/param/__init__.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/ml/param/_shared_params_code_gen.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/ml/param/shared.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/ml/pipeline.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/ml/recommendation.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/ml/stat.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/ml/torch/__init__.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/ml/torch/data.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/ml/torch/distributor.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/ml/torch/log_communication.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/ml/torch/torch_run_process_wrapper.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/ml/tree.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/ml/util.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/ml/wrapper.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/mllib/__init__.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/mllib/classification.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/mllib/clustering.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/mllib/common.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/mllib/evaluation.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/mllib/feature.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/mllib/fpm.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/mllib/random.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/mllib/recommendation.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/mllib/stat/KernelDensity.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/mllib/stat/__init__.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/mllib/stat/distribution.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/mllib/stat/test.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/mllib/tree.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pandas/__init__.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pandas/_typing.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pandas/correlation.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pandas/data_type_ops/__init__.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pandas/data_type_ops/binary_ops.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pandas/data_type_ops/complex_ops.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pandas/data_type_ops/date_ops.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pandas/data_type_ops/udt_ops.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pandas/exceptions.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pandas/extensions.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pandas/indexes/__init__.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pandas/missing/__init__.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pandas/missing/common.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pandas/missing/frame.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pandas/missing/general_functions.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pandas/missing/groupby.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pandas/missing/indexes.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pandas/missing/resample.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pandas/missing/scalars.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pandas/missing/series.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pandas/missing/window.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pandas/mlflow.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pandas/numpy_compat.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pandas/plot/plotly.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pandas/spark/__init__.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pandas/spark/accessors.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pandas/spark/utils.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pandas/sql_formatter.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pandas/sql_processor.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pandas/supported_api_gen.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pandas/usage_logging/usage_logger.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pipelines/__init__.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pipelines/add_pipeline_analysis_context.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pipelines/api.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pipelines/block_session_mutations.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pipelines/cli.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pipelines/flow.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pipelines/graph_element_registry.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pipelines/init_cli.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pipelines/logging_utils.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pipelines/output.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pipelines/source_code_location.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pipelines/spark_connect_graph_element_registry.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pipelines/spark_connect_pipeline.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pipelines/type_error_utils.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/py.typed +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/rddsampler.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/resource/__init__.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/resource/information.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/resource/profile.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/resource/requests.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/resultiterable.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/serializers.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/shell.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/shuffle.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/__init__.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/avro/__init__.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/avro/functions.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/catalog.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/column.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/conf.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/__init__.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/_typing.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/avro/__init__.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/avro/functions.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/catalog.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/client/retries.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/column.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/conf.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/conversion.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/datasource.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/functions/partitioning.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/group.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/logging.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/merge.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/profiler.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/proto/__init__.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/proto/catalog_pb2.pyi +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/proto/common_pb2.pyi +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/proto/example_plugins_pb2.pyi +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/proto/expressions_pb2.pyi +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/proto/ml_common_pb2.pyi +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/proto/ml_pb2.pyi +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/proto/types_pb2.pyi +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/protobuf/__init__.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/protobuf/functions.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/readwriter.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/resource/__init__.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/resource/profile.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/shell/__init__.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/shell/progress.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/sql_formatter.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/streaming/__init__.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/streaming/query.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/streaming/worker/__init__.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/table_arg.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/tvf.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/types.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/udf.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/udtf.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/utils.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/window.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/functions/partitioning.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/geo_utils.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/group.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/merge.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/metrics.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/observation.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/pandas/__init__.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/pandas/_typing/__init__.pyi +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/pandas/_typing/protocols/__init__.pyi +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/pandas/_typing/protocols/frame.pyi +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/pandas/_typing/protocols/series.pyi +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/pandas/functions.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/pandas/functions.pyi +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/pandas/group_ops.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/pandas/map_ops.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/pandas/typehints.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/pandas/utils.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/plot/plotly.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/protobuf/__init__.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/protobuf/functions.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/sql_formatter.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/streaming/list_state_client.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/streaming/map_state_client.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/streaming/proto/StateMessage_pb2.pyi +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/streaming/proto/__init__.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/streaming/state.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/streaming/stateful_processor.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/streaming/stateful_processor_api_client.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/streaming/stateful_processor_util.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/streaming/value_state_client.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/table_arg.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/tvf.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/tvf_argument.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/udtf.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/variant_utils.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/window.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/worker/__init__.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/statcounter.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/storagelevel.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/streaming/__init__.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/streaming/context.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/streaming/dstream.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/streaming/kinesis.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/streaming/listener.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/streaming/util.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/testing/mllibutils.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/testing/mlutils.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/testing/objects.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/traceback_utils.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark_client.egg-info/dependency_links.txt +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark_client.egg-info/top_level.txt +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: pyspark-client
|
|
3
|
-
Version: 4.2.0.
|
|
3
|
+
Version: 4.2.0.dev3
|
|
4
4
|
Summary: Python Spark Connect client for Apache Spark
|
|
5
5
|
Home-page: https://github.com/apache/spark/tree/master/python
|
|
6
6
|
Author: Spark Developers
|
|
@@ -18,7 +18,7 @@ Classifier: Typing :: Typed
|
|
|
18
18
|
Requires-Python: >=3.10
|
|
19
19
|
Description-Content-Type: text/markdown
|
|
20
20
|
Requires-Dist: pandas>=2.2.0
|
|
21
|
-
Requires-Dist: pyarrow>=
|
|
21
|
+
Requires-Dist: pyarrow>=18.0.0
|
|
22
22
|
Requires-Dist: grpcio>=1.76.0
|
|
23
23
|
Requires-Dist: grpcio-status>=1.76.0
|
|
24
24
|
Requires-Dist: googleapis-common-protos>=1.71.0
|
|
@@ -16,7 +16,7 @@
|
|
|
16
16
|
# specific language governing permissions and limitations
|
|
17
17
|
# under the License.
|
|
18
18
|
|
|
19
|
-
from typing import Callable, Iterable, Sized, TypeVar, Union
|
|
19
|
+
from typing import Any, Callable, Iterable, Sized, TypeVar, Union
|
|
20
20
|
from typing_extensions import Literal, Protocol
|
|
21
21
|
|
|
22
22
|
from numpy import int32, int64, float32, float64, ndarray
|
|
@@ -29,10 +29,10 @@ PrimitiveType = Union[bool, float, int, str]
|
|
|
29
29
|
NonUDFType = Literal[0]
|
|
30
30
|
|
|
31
31
|
class SupportsIAdd(Protocol):
|
|
32
|
-
def __iadd__(self, other:
|
|
32
|
+
def __iadd__(self, other: Any) -> SupportsIAdd: ...
|
|
33
33
|
|
|
34
34
|
class SupportsOrdering(Protocol):
|
|
35
|
-
def __lt__(self, other:
|
|
35
|
+
def __lt__(self, other: Any) -> bool: ...
|
|
36
36
|
|
|
37
37
|
class SizedIterable(Protocol, Sized, Iterable[T_co]): ...
|
|
38
38
|
|
|
@@ -27,7 +27,7 @@ from pyspark.serializers import read_int, CPickleSerializer
|
|
|
27
27
|
from pyspark.errors import PySparkRuntimeError
|
|
28
28
|
|
|
29
29
|
if TYPE_CHECKING:
|
|
30
|
-
from pyspark._typing import SupportsIAdd
|
|
30
|
+
from pyspark._typing import SupportsIAdd
|
|
31
31
|
import socketserver.BaseRequestHandler # type: ignore[import-not-found]
|
|
32
32
|
|
|
33
33
|
|
|
@@ -30,6 +30,7 @@ from socket import AF_INET, AF_INET6, SOCK_STREAM, SOMAXCONN
|
|
|
30
30
|
from signal import SIGHUP, SIGTERM, SIGCHLD, SIG_DFL, SIG_IGN, SIGINT
|
|
31
31
|
|
|
32
32
|
from pyspark.serializers import read_int, write_int, write_with_length, UTF8Deserializer
|
|
33
|
+
from pyspark.util import enable_faulthandler
|
|
33
34
|
from pyspark.errors import PySparkRuntimeError
|
|
34
35
|
|
|
35
36
|
|
|
@@ -226,53 +227,54 @@ def manager():
|
|
|
226
227
|
|
|
227
228
|
if pid == 0:
|
|
228
229
|
# in child process
|
|
229
|
-
|
|
230
|
-
poller
|
|
231
|
-
|
|
232
|
-
|
|
230
|
+
with enable_faulthandler():
|
|
231
|
+
if poller is not None:
|
|
232
|
+
poller.unregister(0)
|
|
233
|
+
poller.unregister(listen_sock)
|
|
234
|
+
listen_sock.close()
|
|
233
235
|
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
236
|
+
# It should close the standard input in the child process so that
|
|
237
|
+
# Python native function executions stay intact.
|
|
238
|
+
#
|
|
239
|
+
# Note that if we just close the standard input (file descriptor 0),
|
|
240
|
+
# the lowest file descriptor (file descriptor 0) will be allocated,
|
|
241
|
+
# later when other file descriptors should happen to open.
|
|
242
|
+
#
|
|
243
|
+
# Therefore, here we redirects it to '/dev/null' by duplicating
|
|
244
|
+
# another file descriptor for '/dev/null' to the standard input (0).
|
|
245
|
+
# See SPARK-26175.
|
|
246
|
+
devnull = open(os.devnull, "r")
|
|
247
|
+
os.dup2(devnull.fileno(), 0)
|
|
248
|
+
devnull.close()
|
|
247
249
|
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
250
|
+
try:
|
|
251
|
+
# Acknowledge that the fork was successful
|
|
252
|
+
outfile = sock.makefile(mode="wb")
|
|
253
|
+
write_int(os.getpid(), outfile)
|
|
254
|
+
outfile.flush()
|
|
255
|
+
outfile.close()
|
|
256
|
+
authenticated = (
|
|
257
|
+
os.environ.get("PYTHON_UNIX_DOMAIN_ENABLED", "false").lower()
|
|
258
|
+
== "true"
|
|
259
|
+
)
|
|
260
|
+
while True:
|
|
261
|
+
code = worker(sock, authenticated)
|
|
262
|
+
if code == 0:
|
|
263
|
+
authenticated = True
|
|
264
|
+
if not reuse or code:
|
|
265
|
+
# wait for closing
|
|
266
|
+
try:
|
|
267
|
+
while sock.recv(1024):
|
|
268
|
+
pass
|
|
269
|
+
except Exception:
|
|
266
270
|
pass
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
else:
|
|
275
|
-
os._exit(0)
|
|
271
|
+
break
|
|
272
|
+
gc.collect()
|
|
273
|
+
except BaseException:
|
|
274
|
+
traceback.print_exc()
|
|
275
|
+
os._exit(1)
|
|
276
|
+
else:
|
|
277
|
+
os._exit(0)
|
|
276
278
|
else:
|
|
277
279
|
sock.close()
|
|
278
280
|
|
{pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/errors/error-conditions.json
RENAMED
|
@@ -465,6 +465,11 @@
|
|
|
465
465
|
"Parameter value <arg_name> must be a valid UUID format: <origin>"
|
|
466
466
|
]
|
|
467
467
|
},
|
|
468
|
+
"INVALID_STREAMING_SOURCE_NAME": {
|
|
469
|
+
"message": [
|
|
470
|
+
"Invalid streaming source name '<source_name>'. Source names must contain only ASCII letters, digits, and underscores."
|
|
471
|
+
]
|
|
472
|
+
},
|
|
468
473
|
"INVALID_TIMEOUT_TIMESTAMP": {
|
|
469
474
|
"message": [
|
|
470
475
|
"Timeout timestamp (<timestamp>) cannot be earlier than the current watermark (<watermark>)."
|
|
@@ -551,6 +556,12 @@
|
|
|
551
556
|
"<arg1> and <arg2> should be of the same length, got <arg1_length> and <arg2_length>."
|
|
552
557
|
]
|
|
553
558
|
},
|
|
559
|
+
"LOCAL_RELATION_SIZE_LIMIT_EXCEEDED": {
|
|
560
|
+
"message": [
|
|
561
|
+
"Local relation size (<actualSize> bytes) exceeds the limit (<sizeLimit> bytes)."
|
|
562
|
+
],
|
|
563
|
+
"sqlState": "54000"
|
|
564
|
+
},
|
|
554
565
|
"MALFORMED_GEOGRAPHY": {
|
|
555
566
|
"message": [
|
|
556
567
|
"Geography binary is malformed. Please check the data source is valid."
|
|
@@ -1174,6 +1185,11 @@
|
|
|
1174
1185
|
"SparkContext or SparkSession should be created first."
|
|
1175
1186
|
]
|
|
1176
1187
|
},
|
|
1188
|
+
"SIMPLE_STREAM_READER_OFFSET_DID_NOT_ADVANCE": {
|
|
1189
|
+
"message": [
|
|
1190
|
+
"SimpleDataSourceStreamReader.read() returned a non-empty batch but the end offset: <end_offset> did not advance past the start offset: <start_offset>. The end offset must represent the position after the last record returned."
|
|
1191
|
+
]
|
|
1192
|
+
},
|
|
1177
1193
|
"SLICE_WITH_STEP": {
|
|
1178
1194
|
"message": [
|
|
1179
1195
|
"Slice with step is not supported."
|
|
@@ -1237,12 +1253,12 @@
|
|
|
1237
1253
|
"Return type of the user-defined function should be <expected>, but is <actual>."
|
|
1238
1254
|
]
|
|
1239
1255
|
},
|
|
1240
|
-
"
|
|
1256
|
+
"UDTF_ARROW_DATA_CONVERSION_ERROR": {
|
|
1241
1257
|
"message": [
|
|
1242
|
-
"Cannot convert
|
|
1258
|
+
"Cannot convert UDTF output to Arrow. Data: <data>. Schema: <schema>. Arrow Schema: <arrow_schema>."
|
|
1243
1259
|
]
|
|
1244
1260
|
},
|
|
1245
|
-
|
|
1261
|
+
"UDTF_ARROW_TYPE_CONVERSION_ERROR": {
|
|
1246
1262
|
"message": [
|
|
1247
1263
|
"PyArrow UDTF must return an iterator of pyarrow.Table or pyarrow.RecordBatch objects."
|
|
1248
1264
|
]
|
|
@@ -1467,6 +1483,12 @@
|
|
|
1467
1483
|
"Value for `<arg_name>` must be between <lower_bound> and <upper_bound> (inclusive), got <actual>"
|
|
1468
1484
|
]
|
|
1469
1485
|
},
|
|
1486
|
+
"WKB_PARSE_ERROR" : {
|
|
1487
|
+
"message" : [
|
|
1488
|
+
"Error parsing WKB: <parseError> at position <pos>"
|
|
1489
|
+
],
|
|
1490
|
+
"sqlState" : "22023"
|
|
1491
|
+
},
|
|
1470
1492
|
"WRONG_NUM_ARGS_FOR_HIGHER_ORDER_FUNCTION": {
|
|
1471
1493
|
"message": [
|
|
1472
1494
|
"Function `<func_name>` should take between 1 and 3 arguments, but the provided function takes <num_args>."
|
{pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/errors/exceptions/captured.py
RENAMED
|
@@ -234,25 +234,13 @@ def _convert_exception(e: "Py4JJavaError") -> CapturedException:
|
|
|
234
234
|
return SparkUpgradeException(origin=e)
|
|
235
235
|
elif is_instance_of(gw, e, "org.apache.spark.SparkNoSuchElementException"):
|
|
236
236
|
return SparkNoSuchElementException(origin=e)
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
map(
|
|
245
|
-
lambda v: "org.apache.spark.sql.execution.python" in v.toString(), c.getStackTrace()
|
|
246
|
-
)
|
|
247
|
-
)
|
|
248
|
-
):
|
|
249
|
-
msg = (
|
|
250
|
-
"\n An exception was thrown from the Python worker. "
|
|
251
|
-
"Please see the stack trace below.\n%s" % c.getMessage()
|
|
252
|
-
)
|
|
253
|
-
return PythonException(msg, stacktrace)
|
|
254
|
-
|
|
255
|
-
return UnknownException(desc=e.toString(), stackTrace=stacktrace, cause=c)
|
|
237
|
+
elif is_instance_of(gw, e, "org.apache.spark.api.python.PythonException"):
|
|
238
|
+
return PythonException(origin=e)
|
|
239
|
+
return UnknownException(
|
|
240
|
+
desc=e.toString(),
|
|
241
|
+
stackTrace=getattr(jvm, "org.apache.spark.util.Utils").exceptionString(e),
|
|
242
|
+
cause=e.getCause(),
|
|
243
|
+
)
|
|
256
244
|
|
|
257
245
|
|
|
258
246
|
def capture_sql_exception(f: Callable[..., Any]) -> Callable[..., Any]:
|
|
@@ -348,6 +336,17 @@ class PythonException(CapturedException, BasePythonException):
|
|
|
348
336
|
Exceptions thrown from Python workers.
|
|
349
337
|
"""
|
|
350
338
|
|
|
339
|
+
def __str__(self) -> str:
|
|
340
|
+
messageParameters = self.getMessageParameters()
|
|
341
|
+
|
|
342
|
+
if (
|
|
343
|
+
messageParameters is None
|
|
344
|
+
or "msg" not in messageParameters
|
|
345
|
+
or "traceback" not in messageParameters
|
|
346
|
+
):
|
|
347
|
+
return super().__str__()
|
|
348
|
+
return f"{messageParameters['msg']}:\n{messageParameters['traceback'].strip()}"
|
|
349
|
+
|
|
351
350
|
|
|
352
351
|
class ArithmeticException(CapturedException, BaseArithmeticException):
|
|
353
352
|
"""
|
{pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/errors/exceptions/connect.py
RENAMED
|
@@ -61,43 +61,89 @@ def convert_exception(
|
|
|
61
61
|
display_server_stacktrace: bool = False,
|
|
62
62
|
grpc_status_code: grpc.StatusCode = StatusCode.UNKNOWN,
|
|
63
63
|
) -> SparkConnectException:
|
|
64
|
+
raw_classes = info.metadata.get("classes")
|
|
65
|
+
classes: List[str] = json.loads(raw_classes) if raw_classes else []
|
|
66
|
+
raw_message_parameters = info.metadata.get("messageParameters")
|
|
67
|
+
message_parameters: Dict[str, str] = (
|
|
68
|
+
json.loads(raw_message_parameters) if raw_message_parameters else {}
|
|
69
|
+
)
|
|
70
|
+
root_error_idx = (
|
|
71
|
+
resp.root_error_idx if resp is not None and resp.HasField("root_error_idx") else None
|
|
72
|
+
)
|
|
64
73
|
converted = _convert_exception(
|
|
65
|
-
|
|
74
|
+
classes=classes,
|
|
75
|
+
sql_state=info.metadata.get("sqlState"),
|
|
76
|
+
error_class=info.metadata.get("errorClass"),
|
|
77
|
+
reason=info.reason,
|
|
78
|
+
root_error_idx=root_error_idx,
|
|
79
|
+
errors=list(resp.errors) if resp is not None else None,
|
|
80
|
+
truncated_message=truncated_message,
|
|
81
|
+
truncated_message_parameters=message_parameters,
|
|
82
|
+
truncated_stacktrace=info.metadata.get("stackTrace"),
|
|
83
|
+
display_server_stacktrace=display_server_stacktrace,
|
|
84
|
+
grpc_status_code=grpc_status_code,
|
|
66
85
|
)
|
|
67
86
|
return recover_python_exception(converted)
|
|
68
87
|
|
|
69
88
|
|
|
89
|
+
def convert_observation_errors(
|
|
90
|
+
root_error_idx: int,
|
|
91
|
+
errors: List["pb2.FetchErrorDetailsResponse.Error"],
|
|
92
|
+
) -> SparkConnectException:
|
|
93
|
+
"""
|
|
94
|
+
Convert observation error payload (root_error_idx + list of Error from ObservedMetrics)
|
|
95
|
+
to a SparkConnectException.
|
|
96
|
+
"""
|
|
97
|
+
if root_error_idx < 0 or root_error_idx >= len(errors):
|
|
98
|
+
return SparkConnectException("Observation error: invalid root_error_idx")
|
|
99
|
+
|
|
100
|
+
if len(errors) == 0:
|
|
101
|
+
return SparkConnectException("Observation error: no errors")
|
|
102
|
+
|
|
103
|
+
root_error = errors[root_error_idx]
|
|
104
|
+
|
|
105
|
+
return _convert_exception(
|
|
106
|
+
classes=list(root_error.error_type_hierarchy),
|
|
107
|
+
sql_state=root_error.spark_throwable.sql_state
|
|
108
|
+
if root_error.spark_throwable.HasField("sql_state")
|
|
109
|
+
else None,
|
|
110
|
+
error_class=root_error.spark_throwable.error_class
|
|
111
|
+
if root_error.spark_throwable.HasField("error_class")
|
|
112
|
+
else None,
|
|
113
|
+
reason=None,
|
|
114
|
+
root_error_idx=root_error_idx,
|
|
115
|
+
errors=errors,
|
|
116
|
+
truncated_message="",
|
|
117
|
+
truncated_message_parameters=None,
|
|
118
|
+
truncated_stacktrace=None,
|
|
119
|
+
)
|
|
120
|
+
|
|
121
|
+
|
|
70
122
|
def _convert_exception(
|
|
71
|
-
|
|
123
|
+
classes: List[str],
|
|
124
|
+
sql_state: Optional[str],
|
|
125
|
+
error_class: Optional[str],
|
|
126
|
+
reason: Optional[str],
|
|
127
|
+
root_error_idx: Optional[int],
|
|
128
|
+
errors: Optional[List["pb2.FetchErrorDetailsResponse.Error"]],
|
|
72
129
|
truncated_message: str,
|
|
73
|
-
|
|
130
|
+
truncated_message_parameters: Optional[Dict[str, str]],
|
|
131
|
+
truncated_stacktrace: Optional[str],
|
|
74
132
|
display_server_stacktrace: bool = False,
|
|
75
133
|
grpc_status_code: grpc.StatusCode = StatusCode.UNKNOWN,
|
|
76
134
|
) -> SparkConnectException:
|
|
77
135
|
import pyspark.sql.connect.proto as pb2
|
|
78
136
|
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
error_class = info.metadata.get("errorClass")
|
|
83
|
-
raw_message_parameters = info.metadata.get("messageParameters")
|
|
84
|
-
message_parameters: Dict[str, str] = (
|
|
85
|
-
json.loads(raw_message_parameters) if raw_message_parameters else {}
|
|
86
|
-
)
|
|
87
|
-
stacktrace: Optional[str] = None
|
|
88
|
-
|
|
89
|
-
if resp is not None and resp.HasField("root_error_idx"):
|
|
90
|
-
message = resp.errors[resp.root_error_idx].message
|
|
91
|
-
stacktrace = _extract_jvm_stacktrace(resp)
|
|
92
|
-
else:
|
|
93
|
-
message = truncated_message
|
|
94
|
-
stacktrace = info.metadata.get("stackTrace")
|
|
95
|
-
display_server_stacktrace = display_server_stacktrace if stacktrace else False
|
|
96
|
-
|
|
137
|
+
message = truncated_message
|
|
138
|
+
stacktrace = truncated_stacktrace
|
|
139
|
+
message_parameters = truncated_message_parameters
|
|
97
140
|
contexts = None
|
|
98
141
|
breaking_change_info = None
|
|
99
|
-
|
|
100
|
-
|
|
142
|
+
|
|
143
|
+
if root_error_idx is not None and errors is not None:
|
|
144
|
+
root_error = errors[root_error_idx]
|
|
145
|
+
message = root_error.message
|
|
146
|
+
stacktrace = _extract_jvm_stacktrace(root_error_idx, errors)
|
|
101
147
|
if hasattr(root_error, "spark_throwable"):
|
|
102
148
|
# Extract errorClass from FetchErrorDetailsResponse if not in metadata
|
|
103
149
|
if error_class is None and root_error.spark_throwable.HasField("error_class"):
|
|
@@ -123,6 +169,8 @@ def _convert_exception(
|
|
|
123
169
|
"key": bci.mitigation_config.key,
|
|
124
170
|
"value": bci.mitigation_config.value,
|
|
125
171
|
}
|
|
172
|
+
else:
|
|
173
|
+
display_server_stacktrace = display_server_stacktrace if stacktrace else False
|
|
126
174
|
|
|
127
175
|
if "org.apache.spark.api.python.PythonException" in classes:
|
|
128
176
|
return PythonException(
|
|
@@ -150,7 +198,7 @@ def _convert_exception(
|
|
|
150
198
|
sql_state=sql_state,
|
|
151
199
|
server_stacktrace=stacktrace,
|
|
152
200
|
display_server_stacktrace=display_server_stacktrace,
|
|
153
|
-
contexts=contexts,
|
|
201
|
+
contexts=contexts, # type: ignore[arg-type]
|
|
154
202
|
grpc_status_code=grpc_status_code,
|
|
155
203
|
breaking_change_info=breaking_change_info,
|
|
156
204
|
)
|
|
@@ -158,22 +206,21 @@ def _convert_exception(
|
|
|
158
206
|
# Return UnknownException if there is no matched exception class
|
|
159
207
|
return UnknownException(
|
|
160
208
|
message,
|
|
161
|
-
reason=
|
|
209
|
+
reason=reason,
|
|
162
210
|
messageParameters=message_parameters,
|
|
163
211
|
errorClass=error_class,
|
|
164
212
|
sql_state=sql_state,
|
|
165
213
|
server_stacktrace=stacktrace,
|
|
166
214
|
display_server_stacktrace=display_server_stacktrace,
|
|
167
|
-
contexts=contexts,
|
|
215
|
+
contexts=contexts, # type: ignore[arg-type]
|
|
168
216
|
grpc_status_code=grpc_status_code,
|
|
169
217
|
breaking_change_info=breaking_change_info,
|
|
170
218
|
)
|
|
171
219
|
|
|
172
220
|
|
|
173
|
-
def _extract_jvm_stacktrace(
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
221
|
+
def _extract_jvm_stacktrace(
|
|
222
|
+
root_error_idx: int, errors: List["pb2.FetchErrorDetailsResponse.Error"]
|
|
223
|
+
) -> str:
|
|
177
224
|
lines: List[str] = []
|
|
178
225
|
|
|
179
226
|
def format_stacktrace(error: "pb2.FetchErrorDetailsResponse.Error") -> None:
|
|
@@ -190,9 +237,9 @@ def _extract_jvm_stacktrace(resp: "pb2.FetchErrorDetailsResponse") -> str:
|
|
|
190
237
|
|
|
191
238
|
# If this error has a cause, format that recursively
|
|
192
239
|
if error.HasField("cause_idx"):
|
|
193
|
-
format_stacktrace(
|
|
240
|
+
format_stacktrace(errors[error.cause_idx])
|
|
194
241
|
|
|
195
|
-
format_stacktrace(
|
|
242
|
+
format_stacktrace(errors[root_error_idx])
|
|
196
243
|
|
|
197
244
|
return "\n".join(lines)
|
|
198
245
|
|
|
@@ -206,9 +206,7 @@ class Traceback:
|
|
|
206
206
|
|
|
207
207
|
# noinspection PyBroadException
|
|
208
208
|
try:
|
|
209
|
-
exec(
|
|
210
|
-
code, dict(current.tb_frame.f_globals), dict(current.tb_frame.f_locals)
|
|
211
|
-
) # noqa: S102
|
|
209
|
+
exec(code, dict(current.tb_frame.f_globals), dict(current.tb_frame.f_locals))
|
|
212
210
|
except Exception:
|
|
213
211
|
next_tb = sys.exc_info()[2].tb_next # type: ignore
|
|
214
212
|
if top_tb is None:
|
|
@@ -271,7 +271,7 @@ def _capture_call_site(depth: int) -> str:
|
|
|
271
271
|
import IPython
|
|
272
272
|
|
|
273
273
|
# ipykernel is required for IPython
|
|
274
|
-
import ipykernel
|
|
274
|
+
import ipykernel
|
|
275
275
|
|
|
276
276
|
ipython = IPython.get_ipython()
|
|
277
277
|
# Filtering out IPython related frames
|
|
@@ -44,7 +44,7 @@ Error classes in PySpark
|
|
|
44
44
|
This is a list of common, named error classes returned by PySpark which are defined at `error-conditions.json <https://github.com/apache/spark/blob/master/python/pyspark/errors/error-conditions.json>`_.
|
|
45
45
|
|
|
46
46
|
When writing PySpark errors, developers must use an error class from the list. If an appropriate error class is not available, add a new one into the list. For more information, please refer to `Contributing Error and Exception <contributing.rst#contributing-error-and-exception>`_.
|
|
47
|
-
"""
|
|
47
|
+
"""
|
|
48
48
|
with open(output_rst_file_path, "w") as f:
|
|
49
49
|
f.write(header + "\n\n")
|
|
50
50
|
for error_key, error_details in ERROR_CLASSES_MAP.items():
|
|
@@ -17,6 +17,7 @@
|
|
|
17
17
|
import os
|
|
18
18
|
import re
|
|
19
19
|
import tarfile
|
|
20
|
+
import time
|
|
20
21
|
import traceback
|
|
21
22
|
import urllib.request
|
|
22
23
|
from shutil import rmtree
|
|
@@ -143,7 +144,7 @@ def install_spark(dest, spark_version, hadoop_version, hive_version):
|
|
|
143
144
|
tar = None
|
|
144
145
|
try:
|
|
145
146
|
print("Downloading %s from:\n- %s" % (pretty_pkg_name, url))
|
|
146
|
-
|
|
147
|
+
_download_with_retries(url, package_local_path)
|
|
147
148
|
|
|
148
149
|
print("Installing to %s" % dest)
|
|
149
150
|
tar = tarfile.open(package_local_path, "r:gz")
|
|
@@ -171,7 +172,7 @@ def get_preferred_mirrors():
|
|
|
171
172
|
for _ in range(3):
|
|
172
173
|
try:
|
|
173
174
|
response = urllib.request.urlopen(
|
|
174
|
-
"https://www.apache.org/dyn/closer.lua?preferred=true"
|
|
175
|
+
"https://www.apache.org/dyn/closer.lua?preferred=true", timeout=10
|
|
175
176
|
)
|
|
176
177
|
mirror_urls.append(response.read().decode("utf-8"))
|
|
177
178
|
except Exception:
|
|
@@ -186,6 +187,40 @@ def get_preferred_mirrors():
|
|
|
186
187
|
return list(set(mirror_urls)) + [x for x in default_sites if x not in mirror_urls]
|
|
187
188
|
|
|
188
189
|
|
|
190
|
+
def _download_with_retries(url, path, max_retries=3, timeout=600):
|
|
191
|
+
"""
|
|
192
|
+
Download a file from a URL with retry logic and timeout handling.
|
|
193
|
+
|
|
194
|
+
Parameters
|
|
195
|
+
----------
|
|
196
|
+
url : str
|
|
197
|
+
The URL to download from.
|
|
198
|
+
path : str
|
|
199
|
+
The local file path to save the downloaded file.
|
|
200
|
+
max_retries : int
|
|
201
|
+
Maximum number of retry attempts per URL.
|
|
202
|
+
timeout : int
|
|
203
|
+
Timeout in seconds for the HTTP request.
|
|
204
|
+
"""
|
|
205
|
+
for attempt in range(max_retries):
|
|
206
|
+
try:
|
|
207
|
+
response = urllib.request.urlopen(url, timeout=timeout)
|
|
208
|
+
download_to_file(response, path)
|
|
209
|
+
return
|
|
210
|
+
except Exception as e:
|
|
211
|
+
if os.path.exists(path):
|
|
212
|
+
os.remove(path)
|
|
213
|
+
if attempt < max_retries - 1:
|
|
214
|
+
wait = 2**attempt * 5
|
|
215
|
+
print(
|
|
216
|
+
"Download attempt %d/%d failed: %s. Retrying in %d seconds..."
|
|
217
|
+
% (attempt + 1, max_retries, str(e), wait)
|
|
218
|
+
)
|
|
219
|
+
time.sleep(wait)
|
|
220
|
+
else:
|
|
221
|
+
raise
|
|
222
|
+
|
|
223
|
+
|
|
189
224
|
def download_to_file(response, path, chunk_size=1024 * 1024):
|
|
190
225
|
total_size = int(response.info().get("Content-Length").strip())
|
|
191
226
|
bytes_so_far = 0
|
|
@@ -124,7 +124,7 @@ def _attach(
|
|
|
124
124
|
logger_module: Union[str, ModuleType],
|
|
125
125
|
modules: List[ModuleType],
|
|
126
126
|
classes: List[Type[Any]],
|
|
127
|
-
missings: List[Tuple[Type[Any], Type[Any]]],
|
|
127
|
+
missings: List[Tuple[Union[ModuleType, Type[Any]], Type[Any]]],
|
|
128
128
|
) -> None:
|
|
129
129
|
if isinstance(logger_module, str):
|
|
130
130
|
logger_module = importlib.import_module(logger_module)
|
|
@@ -223,7 +223,11 @@ def context_provider() -> dict[str, str]:
|
|
|
223
223
|
- class_name: Name of the class that initiated the logging if available
|
|
224
224
|
"""
|
|
225
225
|
|
|
226
|
-
def is_pyspark_module(
|
|
226
|
+
def is_pyspark_module(frame: FrameType) -> bool:
|
|
227
|
+
module_name = frame.f_globals.get("__name__", "")
|
|
228
|
+
if module_name == "__main__":
|
|
229
|
+
if (mod := sys.modules.get("__main__", None)) and mod.__spec__:
|
|
230
|
+
module_name = mod.__spec__.name
|
|
227
231
|
return module_name.startswith("pyspark.") and ".tests." not in module_name
|
|
228
232
|
|
|
229
233
|
bottom: Optional[FrameType] = None
|
|
@@ -236,9 +240,8 @@ def context_provider() -> dict[str, str]:
|
|
|
236
240
|
if frame:
|
|
237
241
|
while frame.f_back:
|
|
238
242
|
f_back = frame.f_back
|
|
239
|
-
module_name = f_back.f_globals.get("__name__", "")
|
|
240
243
|
|
|
241
|
-
if is_pyspark_module(
|
|
244
|
+
if is_pyspark_module(f_back):
|
|
242
245
|
if not is_in_pyspark_module:
|
|
243
246
|
bottom = frame
|
|
244
247
|
is_in_pyspark_module = True
|
|
@@ -16,7 +16,7 @@
|
|
|
16
16
|
# specific language governing permissions and limitations
|
|
17
17
|
# under the License.
|
|
18
18
|
|
|
19
|
-
from typing import Any, Dict, List, TypeVar, Tuple, Union
|
|
19
|
+
from typing import Any, Dict, List, TYPE_CHECKING, TypeVar, Tuple, Union
|
|
20
20
|
from typing_extensions import Literal
|
|
21
21
|
|
|
22
22
|
from numpy import ndarray
|
|
@@ -24,10 +24,12 @@ from py4j.java_gateway import JavaObject
|
|
|
24
24
|
|
|
25
25
|
import pyspark.ml.base
|
|
26
26
|
import pyspark.ml.param
|
|
27
|
-
import pyspark.ml.util
|
|
28
27
|
from pyspark.ml.linalg import Vector
|
|
29
28
|
import pyspark.ml.wrapper
|
|
30
29
|
|
|
30
|
+
if TYPE_CHECKING:
|
|
31
|
+
from scipy.sparse import spmatrix, sparray
|
|
32
|
+
|
|
31
33
|
ParamMap = Dict[pyspark.ml.param.Param, Any]
|
|
32
34
|
PipelineStage = Union[pyspark.ml.base.Estimator, pyspark.ml.base.Transformer]
|
|
33
35
|
|
|
@@ -81,4 +83,4 @@ RankingEvaluatorMetricType = Union[
|
|
|
81
83
|
Literal["recallAtK"],
|
|
82
84
|
]
|
|
83
85
|
|
|
84
|
-
VectorLike = Union[ndarray, Vector, List[float], Tuple[float, ...]]
|
|
86
|
+
VectorLike = Union[ndarray, Vector, List[float], Tuple[float, ...], "spmatrix", "sparray", range]
|
|
@@ -2253,7 +2253,7 @@ class RandomForestClassifier(
|
|
|
2253
2253
|
return self._set(minWeightFractionPerNode=value)
|
|
2254
2254
|
|
|
2255
2255
|
|
|
2256
|
-
class RandomForestClassificationModel(
|
|
2256
|
+
class RandomForestClassificationModel( # type: ignore[misc]
|
|
2257
2257
|
_TreeEnsembleModel,
|
|
2258
2258
|
_JavaProbabilisticClassificationModel[Vector],
|
|
2259
2259
|
_RandomForestClassifierParams,
|
|
@@ -155,7 +155,7 @@ class Transformer(Params, metaclass=ABCMeta):
|
|
|
155
155
|
) -> Union[DataFrame, pd.DataFrame]:
|
|
156
156
|
"""
|
|
157
157
|
Transforms the input dataset.
|
|
158
|
-
The dataset can be either pandas dataframe or spark dataframe
|
|
158
|
+
The dataset can be either pandas dataframe or spark dataframe,
|
|
159
159
|
if it is a spark DataFrame, the result of transformation is a new spark DataFrame
|
|
160
160
|
that contains all existing columns and output columns with names,
|
|
161
161
|
If it is a pandas DataFrame, the result of transformation is a shallow copy
|
|
@@ -241,7 +241,7 @@ def _validate_and_transform_single_input(
|
|
|
241
241
|
# tensor columns
|
|
242
242
|
if len(batch.columns) == 1:
|
|
243
243
|
# one tensor column and one expected input, vstack rows
|
|
244
|
-
single_input = np.vstack(batch.iloc[:, 0])
|
|
244
|
+
single_input = np.vstack(batch.iloc[:, 0]) # type: ignore[call-overload]
|
|
245
245
|
else:
|
|
246
246
|
raise ValueError(
|
|
247
247
|
"Multiple input columns found, but model expected a single "
|