pyspark-client 4.1.0.dev2__tar.gz → 4.1.0.dev4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pyspark_client-4.1.0.dev2/pyspark_client.egg-info → pyspark_client-4.1.0.dev4}/PKG-INFO +6 -4
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/cloudpickle/__init__.py +1 -1
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/cloudpickle/cloudpickle.py +13 -6
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/errors/error-conditions.json +28 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/errors/exceptions/captured.py +12 -37
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/logger/logger.py +19 -3
- pyspark_client-4.1.0.dev4/pyspark/logger/worker_io.py +297 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pandas/supported_api_gen.py +1 -1
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pipelines/__init__.py +2 -0
- pyspark_client-4.1.0.dev4/pyspark/pipelines/add_pipeline_analysis_context.py +48 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pipelines/api.py +64 -7
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pipelines/block_connect_access.py +37 -9
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pipelines/cli.py +30 -7
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pipelines/graph_element_registry.py +2 -2
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pipelines/init_cli.py +10 -1
- pyspark_client-4.1.0.dev2/pyspark/pipelines/dataset.py → pyspark_client-4.1.0.dev4/pyspark/pipelines/output.py +18 -7
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pipelines/source_code_location.py +28 -0
- pyspark_client-4.1.0.dev4/pyspark/pipelines/spark_connect_graph_element_registry.py +154 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pipelines/spark_connect_pipeline.py +4 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/__init__.py +3 -1
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/avro/functions.py +3 -3
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/column.py +53 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/connect/_typing.py +1 -1
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/connect/client/artifact.py +55 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/connect/client/core.py +259 -3
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/connect/column.py +18 -3
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/connect/dataframe.py +22 -9
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/connect/expressions.py +1 -1
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/connect/functions/builtin.py +554 -22
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/connect/group.py +30 -10
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/connect/plan.py +85 -13
- pyspark_client-4.1.0.dev4/pyspark/sql/connect/proto/base_pb2.py +275 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/connect/proto/base_pb2.pyi +251 -6
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/connect/proto/base_pb2_grpc.py +55 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/connect/proto/catalog_pb2.py +2 -2
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/connect/proto/commands_pb2.py +2 -2
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/connect/proto/common_pb2.py +17 -15
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/connect/proto/common_pb2.pyi +28 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/connect/proto/example_plugins_pb2.py +2 -2
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/connect/proto/expressions_pb2.py +53 -53
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/connect/proto/ml_common_pb2.py +2 -2
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/connect/proto/ml_pb2.py +2 -2
- pyspark_client-4.1.0.dev4/pyspark/sql/connect/proto/pipelines_pb2.py +130 -0
- pyspark_client-4.1.0.dev4/pyspark/sql/connect/proto/pipelines_pb2.pyi +1574 -0
- pyspark_client-4.1.0.dev4/pyspark/sql/connect/proto/relations_pb2.py +251 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/connect/proto/relations_pb2.pyi +57 -1
- pyspark_client-4.1.0.dev4/pyspark/sql/connect/proto/types_pb2.py +109 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/connect/proto/types_pb2.pyi +59 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/connect/session.py +123 -7
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/connect/tvf.py +5 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/connect/types.py +51 -5
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/connect/utils.py +17 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/connect/window.py +4 -1
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/conversion.py +104 -17
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/functions/__init__.py +33 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/functions/builtin.py +1883 -225
- pyspark_client-4.1.0.dev4/pyspark/sql/geo_utils.py +103 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/group.py +19 -16
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/pandas/_typing/__init__.pyi +14 -1
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/pandas/functions.py +29 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/pandas/functions.pyi +10 -4
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/pandas/group_ops.py +154 -31
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/pandas/serializers.py +267 -10
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/pandas/typehints.py +197 -3
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/pandas/types.py +124 -2
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/streaming/list_state_client.py +10 -38
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/streaming/proto/StateMessage_pb2.py +4 -4
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/streaming/query.py +5 -2
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/tvf.py +39 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/types.py +410 -1
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/udf.py +19 -10
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/worker/analyze_udtf.py +10 -2
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/worker/commit_data_source_write.py +8 -6
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/worker/create_data_source.py +46 -43
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/worker/data_source_pushdown_filters.py +58 -54
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/worker/plan_data_source_read.py +45 -37
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/worker/python_streaming_sink_runner.py +31 -27
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/worker/write_into_data_source.py +78 -69
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/testing/connectutils.py +56 -7
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/testing/sqlutils.py +20 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/util.py +5 -1
- pyspark_client-4.1.0.dev4/pyspark/version.py +1 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/worker.py +298 -70
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4/pyspark_client.egg-info}/PKG-INFO +6 -4
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark_client.egg-info/SOURCES.txt +4 -1
- pyspark_client-4.1.0.dev4/pyspark_client.egg-info/requires.txt +8 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/setup.py +5 -2
- pyspark_client-4.1.0.dev2/pyspark/pipelines/spark_connect_graph_element_registry.py +0 -111
- pyspark_client-4.1.0.dev2/pyspark/sql/connect/proto/base_pb2.py +0 -265
- pyspark_client-4.1.0.dev2/pyspark/sql/connect/proto/pipelines_pb2.py +0 -94
- pyspark_client-4.1.0.dev2/pyspark/sql/connect/proto/pipelines_pb2.pyi +0 -877
- pyspark_client-4.1.0.dev2/pyspark/sql/connect/proto/relations_pb2.py +0 -249
- pyspark_client-4.1.0.dev2/pyspark/sql/connect/proto/types_pb2.py +0 -105
- pyspark_client-4.1.0.dev2/pyspark/version.py +0 -1
- pyspark_client-4.1.0.dev2/pyspark_client.egg-info/requires.txt +0 -7
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/MANIFEST.in +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/README.md +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/__init__.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/_globals.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/_typing.pyi +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/accumulators.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/cloudpickle/cloudpickle_fast.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/conf.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/daemon.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/errors/__init__.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/errors/error_classes.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/errors/exceptions/__init__.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/errors/exceptions/base.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/errors/exceptions/connect.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/errors/exceptions/tblib.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/errors/utils.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/errors_doc_gen.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/find_spark_home.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/install.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/instrumentation_utils.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/java_gateway.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/join.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/logger/__init__.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/loose_version.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/ml/__init__.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/ml/_typing.pyi +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/ml/base.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/ml/classification.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/ml/clustering.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/ml/common.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/ml/connect/__init__.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/ml/connect/base.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/ml/connect/classification.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/ml/connect/evaluation.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/ml/connect/feature.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/ml/connect/functions.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/ml/connect/io_utils.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/ml/connect/pipeline.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/ml/connect/proto.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/ml/connect/readwrite.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/ml/connect/serialize.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/ml/connect/summarizer.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/ml/connect/tuning.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/ml/connect/util.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/ml/deepspeed/__init__.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/ml/deepspeed/deepspeed_distributor.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/ml/dl_util.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/ml/evaluation.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/ml/feature.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/ml/fpm.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/ml/functions.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/ml/image.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/ml/linalg/__init__.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/ml/model_cache.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/ml/param/__init__.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/ml/param/_shared_params_code_gen.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/ml/param/shared.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/ml/pipeline.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/ml/recommendation.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/ml/regression.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/ml/stat.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/ml/torch/__init__.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/ml/torch/data.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/ml/torch/distributor.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/ml/torch/log_communication.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/ml/torch/torch_run_process_wrapper.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/ml/tree.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/ml/tuning.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/ml/util.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/ml/wrapper.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/mllib/__init__.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/mllib/_typing.pyi +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/mllib/classification.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/mllib/clustering.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/mllib/common.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/mllib/evaluation.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/mllib/feature.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/mllib/fpm.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/mllib/linalg/__init__.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/mllib/linalg/distributed.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/mllib/random.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/mllib/recommendation.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/mllib/regression.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/mllib/stat/KernelDensity.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/mllib/stat/__init__.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/mllib/stat/_statistics.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/mllib/stat/distribution.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/mllib/stat/test.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/mllib/tree.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/mllib/util.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pandas/__init__.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pandas/_typing.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pandas/accessors.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pandas/base.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pandas/categorical.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pandas/config.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pandas/correlation.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pandas/data_type_ops/__init__.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pandas/data_type_ops/base.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pandas/data_type_ops/binary_ops.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pandas/data_type_ops/boolean_ops.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pandas/data_type_ops/categorical_ops.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pandas/data_type_ops/complex_ops.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pandas/data_type_ops/date_ops.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pandas/data_type_ops/datetime_ops.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pandas/data_type_ops/null_ops.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pandas/data_type_ops/num_ops.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pandas/data_type_ops/string_ops.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pandas/data_type_ops/timedelta_ops.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pandas/data_type_ops/udt_ops.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pandas/datetimes.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pandas/exceptions.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pandas/extensions.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pandas/frame.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pandas/generic.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pandas/groupby.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pandas/indexes/__init__.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pandas/indexes/base.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pandas/indexes/category.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pandas/indexes/datetimes.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pandas/indexes/multi.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pandas/indexes/timedelta.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pandas/indexing.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pandas/internal.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pandas/missing/__init__.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pandas/missing/common.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pandas/missing/frame.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pandas/missing/general_functions.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pandas/missing/groupby.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pandas/missing/indexes.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pandas/missing/resample.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pandas/missing/scalars.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pandas/missing/series.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pandas/missing/window.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pandas/mlflow.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pandas/namespace.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pandas/numpy_compat.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pandas/plot/__init__.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pandas/plot/core.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pandas/plot/matplotlib.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pandas/plot/plotly.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pandas/resample.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pandas/series.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pandas/spark/__init__.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pandas/spark/accessors.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pandas/spark/utils.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pandas/sql_formatter.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pandas/sql_processor.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pandas/strings.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pandas/testing.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pandas/typedef/__init__.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pandas/typedef/typehints.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pandas/usage_logging/__init__.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pandas/usage_logging/usage_logger.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pandas/utils.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pandas/window.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pipelines/block_session_mutations.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pipelines/flow.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pipelines/logging_utils.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pipelines/type_error_utils.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/profiler.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/py.typed +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/rddsampler.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/resource/__init__.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/resource/information.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/resource/profile.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/resource/requests.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/resultiterable.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/serializers.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/shell.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/shuffle.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/_typing.pyi +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/avro/__init__.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/catalog.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/conf.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/connect/__init__.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/connect/avro/__init__.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/connect/avro/functions.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/connect/catalog.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/connect/client/__init__.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/connect/client/reattach.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/connect/client/retries.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/connect/conf.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/connect/conversion.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/connect/datasource.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/connect/functions/__init__.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/connect/functions/partitioning.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/connect/logging.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/connect/merge.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/connect/observation.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/connect/profiler.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/connect/proto/__init__.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/connect/proto/catalog_pb2.pyi +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/connect/proto/commands_pb2.pyi +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/connect/proto/example_plugins_pb2.pyi +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/connect/proto/expressions_pb2.pyi +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/connect/proto/ml_common_pb2.pyi +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/connect/proto/ml_pb2.pyi +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/connect/protobuf/__init__.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/connect/protobuf/functions.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/connect/readwriter.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/connect/resource/__init__.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/connect/resource/profile.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/connect/shell/__init__.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/connect/shell/progress.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/connect/sql_formatter.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/connect/streaming/__init__.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/connect/streaming/query.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/connect/streaming/readwriter.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/connect/streaming/worker/__init__.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/connect/streaming/worker/foreach_batch_worker.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/connect/streaming/worker/listener_worker.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/connect/table_arg.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/connect/udf.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/connect/udtf.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/context.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/dataframe.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/datasource.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/datasource_internal.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/functions/partitioning.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/internal.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/merge.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/metrics.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/observation.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/pandas/__init__.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/pandas/_typing/protocols/__init__.pyi +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/pandas/_typing/protocols/frame.pyi +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/pandas/_typing/protocols/series.pyi +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/pandas/conversion.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/pandas/map_ops.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/pandas/utils.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/plot/__init__.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/plot/core.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/plot/plotly.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/profiler.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/protobuf/__init__.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/protobuf/functions.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/readwriter.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/session.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/sql_formatter.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/streaming/__init__.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/streaming/listener.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/streaming/map_state_client.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/streaming/proto/StateMessage_pb2.pyi +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/streaming/proto/__init__.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/streaming/python_streaming_source_runner.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/streaming/readwriter.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/streaming/state.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/streaming/stateful_processor.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/streaming/stateful_processor_api_client.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/streaming/stateful_processor_util.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/streaming/transform_with_state_driver_worker.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/streaming/value_state_client.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/table_arg.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/tvf_argument.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/udtf.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/utils.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/variant_utils.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/window.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/worker/__init__.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/worker/lookup_data_sources.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/statcounter.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/storagelevel.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/streaming/__init__.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/streaming/context.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/streaming/dstream.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/streaming/kinesis.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/streaming/listener.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/streaming/util.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/taskcontext.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/testing/__init__.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/testing/mllibutils.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/testing/mlutils.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/testing/objects.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/testing/pandasutils.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/testing/streamingutils.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/testing/utils.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/traceback_utils.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/worker_util.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark_client.egg-info/dependency_links.txt +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark_client.egg-info/top_level.txt +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: pyspark-client
|
|
3
|
-
Version: 4.1.0.
|
|
3
|
+
Version: 4.1.0.dev4
|
|
4
4
|
Summary: Python Spark Connect client for Apache Spark
|
|
5
5
|
Home-page: https://github.com/apache/spark/tree/master/python
|
|
6
6
|
Author: Spark Developers
|
|
@@ -11,6 +11,7 @@ Classifier: Programming Language :: Python :: 3.10
|
|
|
11
11
|
Classifier: Programming Language :: Python :: 3.11
|
|
12
12
|
Classifier: Programming Language :: Python :: 3.12
|
|
13
13
|
Classifier: Programming Language :: Python :: 3.13
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
14
15
|
Classifier: Programming Language :: Python :: Implementation :: CPython
|
|
15
16
|
Classifier: Programming Language :: Python :: Implementation :: PyPy
|
|
16
17
|
Classifier: Typing :: Typed
|
|
@@ -18,9 +19,10 @@ Requires-Python: >=3.10
|
|
|
18
19
|
Description-Content-Type: text/markdown
|
|
19
20
|
Requires-Dist: pandas>=2.2.0
|
|
20
21
|
Requires-Dist: pyarrow>=15.0.0
|
|
21
|
-
Requires-Dist: grpcio>=1.
|
|
22
|
-
Requires-Dist: grpcio-status>=1.
|
|
23
|
-
Requires-Dist: googleapis-common-protos>=1.
|
|
22
|
+
Requires-Dist: grpcio>=1.76.0
|
|
23
|
+
Requires-Dist: grpcio-status>=1.76.0
|
|
24
|
+
Requires-Dist: googleapis-common-protos>=1.71.0
|
|
25
|
+
Requires-Dist: zstandard>=0.25.0
|
|
24
26
|
Requires-Dist: numpy>=1.21
|
|
25
27
|
Requires-Dist: pyyaml>=3.11
|
|
26
28
|
Dynamic: author
|
|
@@ -783,6 +783,12 @@ def _class_getstate(obj):
|
|
|
783
783
|
|
|
784
784
|
clsdict.pop("__dict__", None) # unpicklable property object
|
|
785
785
|
|
|
786
|
+
if sys.version_info >= (3, 14):
|
|
787
|
+
# PEP-649/749: __annotate_func__ contains a closure that references the class
|
|
788
|
+
# dict. We need to exclude it from pickling. Python will recreate it when
|
|
789
|
+
# __annotations__ is accessed at unpickling time.
|
|
790
|
+
clsdict.pop("__annotate_func__", None)
|
|
791
|
+
|
|
786
792
|
return (clsdict, {})
|
|
787
793
|
|
|
788
794
|
|
|
@@ -1190,6 +1196,10 @@ def _class_setstate(obj, state):
|
|
|
1190
1196
|
for subclass in registry:
|
|
1191
1197
|
obj.register(subclass)
|
|
1192
1198
|
|
|
1199
|
+
# PEP-649/749: During pickling, we excluded the __annotate_func__ attribute but it
|
|
1200
|
+
# will be created by Python. Subsequently, annotations will be recreated when
|
|
1201
|
+
# __annotations__ is accessed.
|
|
1202
|
+
|
|
1193
1203
|
return obj
|
|
1194
1204
|
|
|
1195
1205
|
|
|
@@ -1301,12 +1311,9 @@ class Pickler(pickle.Pickler):
|
|
|
1301
1311
|
def dump(self, obj):
|
|
1302
1312
|
try:
|
|
1303
1313
|
return super().dump(obj)
|
|
1304
|
-
except
|
|
1305
|
-
|
|
1306
|
-
|
|
1307
|
-
raise pickle.PicklingError(msg) from e
|
|
1308
|
-
else:
|
|
1309
|
-
raise
|
|
1314
|
+
except RecursionError as e:
|
|
1315
|
+
msg = "Could not pickle object as excessively deep recursion required."
|
|
1316
|
+
raise pickle.PicklingError(msg) from e
|
|
1310
1317
|
|
|
1311
1318
|
def __init__(self, file, protocol=None, buffer_callback=None):
|
|
1312
1319
|
if protocol is None:
|
{pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/errors/error-conditions.json
RENAMED
|
@@ -549,6 +549,16 @@
|
|
|
549
549
|
"<arg1> and <arg2> should be of the same length, got <arg1_length> and <arg2_length>."
|
|
550
550
|
]
|
|
551
551
|
},
|
|
552
|
+
"MALFORMED_GEOGRAPHY": {
|
|
553
|
+
"message": [
|
|
554
|
+
"Geography binary is malformed. Please check the data source is valid."
|
|
555
|
+
]
|
|
556
|
+
},
|
|
557
|
+
"MALFORMED_GEOMETRY": {
|
|
558
|
+
"message": [
|
|
559
|
+
"Geometry binary is malformed. Please check the data source is valid."
|
|
560
|
+
]
|
|
561
|
+
},
|
|
552
562
|
"MALFORMED_VARIANT": {
|
|
553
563
|
"message": [
|
|
554
564
|
"Variant binary is malformed. Please check the data source is valid."
|
|
@@ -1134,6 +1144,24 @@
|
|
|
1134
1144
|
"Cannot serialize the function `<name>`. If you accessed the Spark session, or a DataFrame defined outside of the function, or any object that contains a Spark session, please be aware that they are not allowed in Spark Connect. For `foreachBatch`, please access the Spark session using `df.sparkSession`, where `df` is the first parameter in your `foreachBatch` function. For `StreamingQueryListener`, please access the Spark session using `self.spark`. For details please check out the PySpark doc for `foreachBatch` and `StreamingQueryListener`."
|
|
1135
1145
|
]
|
|
1136
1146
|
},
|
|
1147
|
+
"ST_INVALID_ALGORITHM_VALUE" : {
|
|
1148
|
+
"message" : [
|
|
1149
|
+
"Invalid or unsupported edge interpolation algorithm value: '<alg>'."
|
|
1150
|
+
],
|
|
1151
|
+
"sqlState" : "22023"
|
|
1152
|
+
},
|
|
1153
|
+
"ST_INVALID_CRS_VALUE" : {
|
|
1154
|
+
"message" : [
|
|
1155
|
+
"Invalid or unsupported CRS (coordinate reference system) value: '<crs>'."
|
|
1156
|
+
],
|
|
1157
|
+
"sqlState" : "22023"
|
|
1158
|
+
},
|
|
1159
|
+
"ST_INVALID_SRID_VALUE" : {
|
|
1160
|
+
"message" : [
|
|
1161
|
+
"Invalid or unsupported SRID (spatial reference identifier) value: <srid>."
|
|
1162
|
+
],
|
|
1163
|
+
"sqlState" : "22023"
|
|
1164
|
+
},
|
|
1137
1165
|
"TEST_CLASS_NOT_COMPILED": {
|
|
1138
1166
|
"message": [
|
|
1139
1167
|
"<test_class_path> doesn't exist. Spark sql test classes are not compiled."
|
{pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/errors/exceptions/captured.py
RENAMED
|
@@ -107,7 +107,8 @@ class CapturedException(PySparkException):
|
|
|
107
107
|
if self._origin is not None and is_instance_of(
|
|
108
108
|
gw, self._origin, "org.apache.spark.SparkThrowable"
|
|
109
109
|
):
|
|
110
|
-
|
|
110
|
+
utils = SparkContext._jvm.PythonErrorUtils # type: ignore[union-attr]
|
|
111
|
+
return utils.getCondition(self._origin)
|
|
111
112
|
else:
|
|
112
113
|
return None
|
|
113
114
|
|
|
@@ -118,7 +119,6 @@ class CapturedException(PySparkException):
|
|
|
118
119
|
def getMessageParameters(self) -> Optional[Dict[str, str]]:
|
|
119
120
|
from pyspark import SparkContext
|
|
120
121
|
from py4j.java_gateway import is_instance_of
|
|
121
|
-
from py4j.protocol import Py4JError
|
|
122
122
|
|
|
123
123
|
assert SparkContext._gateway is not None
|
|
124
124
|
|
|
@@ -126,38 +126,28 @@ class CapturedException(PySparkException):
|
|
|
126
126
|
if self._origin is not None and is_instance_of(
|
|
127
127
|
gw, self._origin, "org.apache.spark.SparkThrowable"
|
|
128
128
|
):
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
except Py4JError as e:
|
|
132
|
-
if "py4j.Py4JException" in str(e) and "Method getMessageParameters" in str(e):
|
|
133
|
-
return None
|
|
134
|
-
raise e
|
|
129
|
+
utils = SparkContext._jvm.PythonErrorUtils # type: ignore[union-attr]
|
|
130
|
+
return dict(utils.getMessageParameters(self._origin))
|
|
135
131
|
else:
|
|
136
132
|
return None
|
|
137
133
|
|
|
138
134
|
def getSqlState(self) -> Optional[str]:
|
|
139
135
|
from pyspark import SparkContext
|
|
140
136
|
from py4j.java_gateway import is_instance_of
|
|
141
|
-
from py4j.protocol import Py4JError
|
|
142
137
|
|
|
143
138
|
assert SparkContext._gateway is not None
|
|
144
139
|
gw = SparkContext._gateway
|
|
145
140
|
if self._origin is not None and is_instance_of(
|
|
146
141
|
gw, self._origin, "org.apache.spark.SparkThrowable"
|
|
147
142
|
):
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
except Py4JError as e:
|
|
151
|
-
if "py4j.Py4JException" in str(e) and "Method getSqlState" in str(e):
|
|
152
|
-
return None
|
|
153
|
-
raise e
|
|
143
|
+
utils = SparkContext._jvm.PythonErrorUtils # type: ignore[union-attr]
|
|
144
|
+
return utils.getSqlState(self._origin)
|
|
154
145
|
else:
|
|
155
146
|
return None
|
|
156
147
|
|
|
157
148
|
def getMessage(self) -> str:
|
|
158
149
|
from pyspark import SparkContext
|
|
159
150
|
from py4j.java_gateway import is_instance_of
|
|
160
|
-
from py4j.protocol import Py4JError
|
|
161
151
|
|
|
162
152
|
assert SparkContext._gateway is not None
|
|
163
153
|
gw = SparkContext._gateway
|
|
@@ -165,21 +155,12 @@ class CapturedException(PySparkException):
|
|
|
165
155
|
if self._origin is not None and is_instance_of(
|
|
166
156
|
gw, self._origin, "org.apache.spark.SparkThrowable"
|
|
167
157
|
):
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
if "py4j.Py4JException" in str(e) and "Method getCondition" in str(e):
|
|
172
|
-
return ""
|
|
173
|
-
raise e
|
|
174
|
-
try:
|
|
175
|
-
message_parameters = self._origin.getMessageParameters()
|
|
176
|
-
except Py4JError as e:
|
|
177
|
-
if "py4j.Py4JException" in str(e) and "Method getMessageParameters" in str(e):
|
|
178
|
-
return ""
|
|
179
|
-
raise e
|
|
158
|
+
utils = SparkContext._jvm.PythonErrorUtils # type: ignore[union-attr]
|
|
159
|
+
errorClass = utils.getCondition(self._origin)
|
|
160
|
+
messageParameters = utils.getMessageParameters(self._origin)
|
|
180
161
|
|
|
181
162
|
error_message = getattr(gw.jvm, "org.apache.spark.SparkThrowableHelper").getMessage(
|
|
182
|
-
|
|
163
|
+
errorClass, messageParameters
|
|
183
164
|
)
|
|
184
165
|
|
|
185
166
|
return error_message
|
|
@@ -189,7 +170,6 @@ class CapturedException(PySparkException):
|
|
|
189
170
|
def getQueryContext(self) -> List[BaseQueryContext]:
|
|
190
171
|
from pyspark import SparkContext
|
|
191
172
|
from py4j.java_gateway import is_instance_of
|
|
192
|
-
from py4j.protocol import Py4JError
|
|
193
173
|
|
|
194
174
|
assert SparkContext._gateway is not None
|
|
195
175
|
|
|
@@ -198,13 +178,8 @@ class CapturedException(PySparkException):
|
|
|
198
178
|
gw, self._origin, "org.apache.spark.SparkThrowable"
|
|
199
179
|
):
|
|
200
180
|
contexts: List[BaseQueryContext] = []
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
except Py4JError as e:
|
|
204
|
-
if "py4j.Py4JException" in str(e) and "Method getQueryContext" in str(e):
|
|
205
|
-
return []
|
|
206
|
-
raise e
|
|
207
|
-
for q in context:
|
|
181
|
+
utils = SparkContext._jvm.PythonErrorUtils # type: ignore[union-attr]
|
|
182
|
+
for q in utils.getQueryContext(self._origin):
|
|
208
183
|
if q.contextType().toString() == "SQL":
|
|
209
184
|
contexts.append(SQLQueryContext(q))
|
|
210
185
|
else:
|
|
@@ -50,6 +50,10 @@ class JSONFormatter(logging.Formatter):
|
|
|
50
50
|
|
|
51
51
|
default_msec_format = "%s.%03d"
|
|
52
52
|
|
|
53
|
+
def __init__(self, ensure_ascii: bool = False):
|
|
54
|
+
super().__init__()
|
|
55
|
+
self._ensure_ascii = ensure_ascii
|
|
56
|
+
|
|
53
57
|
def format(self, record: logging.LogRecord) -> str:
|
|
54
58
|
"""
|
|
55
59
|
Format the specified record as a JSON string.
|
|
@@ -69,7 +73,7 @@ class JSONFormatter(logging.Formatter):
|
|
|
69
73
|
"level": record.levelname,
|
|
70
74
|
"logger": record.name,
|
|
71
75
|
"msg": record.getMessage(),
|
|
72
|
-
"context": record.__dict__.get("
|
|
76
|
+
"context": record.__dict__.get("context", {}),
|
|
73
77
|
}
|
|
74
78
|
if record.exc_info:
|
|
75
79
|
exc_type, exc_value, exc_tb = record.exc_info
|
|
@@ -89,7 +93,7 @@ class JSONFormatter(logging.Formatter):
|
|
|
89
93
|
"msg": str(exc_value),
|
|
90
94
|
"stacktrace": structured_stacktrace,
|
|
91
95
|
}
|
|
92
|
-
return json.dumps(log_entry, ensure_ascii=
|
|
96
|
+
return json.dumps(log_entry, ensure_ascii=self._ensure_ascii)
|
|
93
97
|
|
|
94
98
|
|
|
95
99
|
class PySparkLogger(logging.Logger):
|
|
@@ -136,7 +140,19 @@ class PySparkLogger(logging.Logger):
|
|
|
136
140
|
"""
|
|
137
141
|
|
|
138
142
|
def __init__(self, name: str = "PySparkLogger"):
|
|
143
|
+
from pyspark.logger.worker_io import JSONFormatterWithMarker
|
|
144
|
+
|
|
139
145
|
super().__init__(name, level=logging.WARN)
|
|
146
|
+
|
|
147
|
+
root_logger = logging.getLogger()
|
|
148
|
+
if any(
|
|
149
|
+
isinstance(h, logging.StreamHandler)
|
|
150
|
+
and isinstance(h.formatter, JSONFormatterWithMarker)
|
|
151
|
+
for h in root_logger.handlers
|
|
152
|
+
):
|
|
153
|
+
# Likely in the `capture_outputs` context, so don't add a handler
|
|
154
|
+
return
|
|
155
|
+
|
|
140
156
|
_handler = logging.StreamHandler()
|
|
141
157
|
self.addHandler(_handler)
|
|
142
158
|
|
|
@@ -291,7 +307,7 @@ class PySparkLogger(logging.Logger):
|
|
|
291
307
|
msg=msg,
|
|
292
308
|
args=args,
|
|
293
309
|
exc_info=exc_info,
|
|
294
|
-
extra={"
|
|
310
|
+
extra={"context": kwargs},
|
|
295
311
|
stack_info=stack_info,
|
|
296
312
|
stacklevel=stacklevel,
|
|
297
313
|
)
|
|
@@ -0,0 +1,297 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Licensed to the Apache Software Foundation (ASF) under one or more
|
|
3
|
+
# contributor license agreements. See the NOTICE file distributed with
|
|
4
|
+
# this work for additional information regarding copyright ownership.
|
|
5
|
+
# The ASF licenses this file to You under the Apache License, Version 2.0
|
|
6
|
+
# (the "License"); you may not use this file except in compliance with
|
|
7
|
+
# the License. You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
12
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
+
# See the License for the specific language governing permissions and
|
|
15
|
+
# limitations under the License.
|
|
16
|
+
#
|
|
17
|
+
|
|
18
|
+
from contextlib import contextmanager
|
|
19
|
+
import inspect
|
|
20
|
+
import io
|
|
21
|
+
import logging
|
|
22
|
+
import os
|
|
23
|
+
import sys
|
|
24
|
+
import time
|
|
25
|
+
from typing import BinaryIO, Callable, Generator, Iterable, Iterator, Optional, TextIO, Union
|
|
26
|
+
from types import FrameType, TracebackType
|
|
27
|
+
|
|
28
|
+
from pyspark.logger.logger import JSONFormatter
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class DelegatingTextIOWrapper(TextIO):
|
|
32
|
+
"""A TextIO that delegates all operations to another TextIO object."""
|
|
33
|
+
|
|
34
|
+
def __init__(self, delegate: TextIO):
|
|
35
|
+
self._delegate = delegate
|
|
36
|
+
|
|
37
|
+
# Required TextIO properties
|
|
38
|
+
@property
|
|
39
|
+
def encoding(self) -> str:
|
|
40
|
+
return self._delegate.encoding
|
|
41
|
+
|
|
42
|
+
@property
|
|
43
|
+
def errors(self) -> Optional[str]:
|
|
44
|
+
return self._delegate.errors
|
|
45
|
+
|
|
46
|
+
@property
|
|
47
|
+
def newlines(self) -> Optional[Union[str, tuple[str, ...]]]:
|
|
48
|
+
return self._delegate.newlines
|
|
49
|
+
|
|
50
|
+
@property
|
|
51
|
+
def buffer(self) -> BinaryIO:
|
|
52
|
+
return self._delegate.buffer
|
|
53
|
+
|
|
54
|
+
@property
|
|
55
|
+
def mode(self) -> str:
|
|
56
|
+
return self._delegate.mode
|
|
57
|
+
|
|
58
|
+
@property
|
|
59
|
+
def name(self) -> str:
|
|
60
|
+
return self._delegate.name
|
|
61
|
+
|
|
62
|
+
@property
|
|
63
|
+
def line_buffering(self) -> int:
|
|
64
|
+
return self._delegate.line_buffering
|
|
65
|
+
|
|
66
|
+
@property
|
|
67
|
+
def closed(self) -> bool:
|
|
68
|
+
return self._delegate.closed
|
|
69
|
+
|
|
70
|
+
# Iterator protocol
|
|
71
|
+
def __iter__(self) -> Iterator[str]:
|
|
72
|
+
return iter(self._delegate)
|
|
73
|
+
|
|
74
|
+
def __next__(self) -> str:
|
|
75
|
+
return next(self._delegate)
|
|
76
|
+
|
|
77
|
+
# Context manager protocol
|
|
78
|
+
def __enter__(self) -> TextIO:
|
|
79
|
+
return self._delegate.__enter__()
|
|
80
|
+
|
|
81
|
+
def __exit__(
|
|
82
|
+
self,
|
|
83
|
+
exc_type: Optional[type[BaseException]],
|
|
84
|
+
exc_val: Optional[BaseException],
|
|
85
|
+
exc_tb: Optional[TracebackType],
|
|
86
|
+
) -> None:
|
|
87
|
+
return self._delegate.__exit__(exc_type, exc_val, exc_tb)
|
|
88
|
+
|
|
89
|
+
# Core I/O methods
|
|
90
|
+
def write(self, s: str) -> int:
|
|
91
|
+
return self._delegate.write(s)
|
|
92
|
+
|
|
93
|
+
def writelines(self, lines: Iterable[str]) -> None:
|
|
94
|
+
return self._delegate.writelines(lines)
|
|
95
|
+
|
|
96
|
+
def read(self, size: int = -1) -> str:
|
|
97
|
+
return self._delegate.read(size)
|
|
98
|
+
|
|
99
|
+
def readline(self, size: int = -1) -> str:
|
|
100
|
+
return self._delegate.readline(size)
|
|
101
|
+
|
|
102
|
+
def readlines(self, hint: int = -1) -> list[str]:
|
|
103
|
+
return self._delegate.readlines(hint)
|
|
104
|
+
|
|
105
|
+
# Stream control methods
|
|
106
|
+
def close(self) -> None:
|
|
107
|
+
return self._delegate.close()
|
|
108
|
+
|
|
109
|
+
def flush(self) -> None:
|
|
110
|
+
return self._delegate.flush()
|
|
111
|
+
|
|
112
|
+
def seek(self, offset: int, whence: int = io.SEEK_SET) -> int:
|
|
113
|
+
return self._delegate.seek(offset, whence)
|
|
114
|
+
|
|
115
|
+
def tell(self) -> int:
|
|
116
|
+
return self._delegate.tell()
|
|
117
|
+
|
|
118
|
+
def truncate(self, size: Optional[int] = None) -> int:
|
|
119
|
+
return self._delegate.truncate(size)
|
|
120
|
+
|
|
121
|
+
# Stream capability methods
|
|
122
|
+
def fileno(self) -> int:
|
|
123
|
+
return self._delegate.fileno()
|
|
124
|
+
|
|
125
|
+
def isatty(self) -> bool:
|
|
126
|
+
return self._delegate.isatty()
|
|
127
|
+
|
|
128
|
+
def readable(self) -> bool:
|
|
129
|
+
return self._delegate.readable()
|
|
130
|
+
|
|
131
|
+
def seekable(self) -> bool:
|
|
132
|
+
return self._delegate.seekable()
|
|
133
|
+
|
|
134
|
+
def writable(self) -> bool:
|
|
135
|
+
return self._delegate.writable()
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
class JSONFormatterWithMarker(JSONFormatter):
|
|
139
|
+
default_microsec_format = "%s.%06d"
|
|
140
|
+
|
|
141
|
+
def __init__(self, marker: str, worker_id: str, context_provider: Callable[[], dict[str, str]]):
|
|
142
|
+
super().__init__(ensure_ascii=True)
|
|
143
|
+
self._marker = marker
|
|
144
|
+
self._worker_id = worker_id
|
|
145
|
+
self._context_provider = context_provider
|
|
146
|
+
|
|
147
|
+
def format(self, record: logging.LogRecord) -> str:
|
|
148
|
+
context = self._context_provider()
|
|
149
|
+
if context:
|
|
150
|
+
context.update(record.__dict__.get("context", {}))
|
|
151
|
+
record.__dict__["context"] = context
|
|
152
|
+
return f"{self._marker}:{self._worker_id}:{super().format(record)}"
|
|
153
|
+
|
|
154
|
+
def formatTime(self, record: logging.LogRecord, datefmt: Optional[str] = None) -> str:
|
|
155
|
+
ct = self.converter(record.created)
|
|
156
|
+
if datefmt:
|
|
157
|
+
s = time.strftime(datefmt, ct)
|
|
158
|
+
else:
|
|
159
|
+
s = time.strftime(self.default_time_format, ct)
|
|
160
|
+
if self.default_microsec_format:
|
|
161
|
+
s = self.default_microsec_format % (
|
|
162
|
+
s,
|
|
163
|
+
int((record.created - int(record.created)) * 1000000),
|
|
164
|
+
)
|
|
165
|
+
elif self.default_msec_format:
|
|
166
|
+
s = self.default_msec_format % (s, record.msecs)
|
|
167
|
+
s = f"{s}{time.strftime('%z', ct)}"
|
|
168
|
+
return s
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
class JsonOutput(DelegatingTextIOWrapper):
|
|
172
|
+
def __init__(
|
|
173
|
+
self,
|
|
174
|
+
delegate: TextIO,
|
|
175
|
+
json_out: TextIO,
|
|
176
|
+
logger_name: str,
|
|
177
|
+
log_level: int,
|
|
178
|
+
marker: str,
|
|
179
|
+
worker_id: str,
|
|
180
|
+
context_provider: Callable[[], dict[str, str]],
|
|
181
|
+
):
|
|
182
|
+
super().__init__(delegate)
|
|
183
|
+
self._json_out = json_out
|
|
184
|
+
self._logger_name = logger_name
|
|
185
|
+
self._log_level = log_level
|
|
186
|
+
self._formatter = JSONFormatterWithMarker(marker, worker_id, context_provider)
|
|
187
|
+
|
|
188
|
+
def write(self, s: str) -> int:
|
|
189
|
+
if s.strip():
|
|
190
|
+
log_record = logging.LogRecord(
|
|
191
|
+
name=self._logger_name,
|
|
192
|
+
level=self._log_level,
|
|
193
|
+
pathname=None, # type: ignore[arg-type]
|
|
194
|
+
lineno=None, # type: ignore[arg-type]
|
|
195
|
+
msg=s.strip(),
|
|
196
|
+
args=None,
|
|
197
|
+
exc_info=None,
|
|
198
|
+
func=None,
|
|
199
|
+
sinfo=None,
|
|
200
|
+
)
|
|
201
|
+
self._json_out.write(f"{self._formatter.format(log_record)}\n")
|
|
202
|
+
self._json_out.flush()
|
|
203
|
+
return self._delegate.write(s)
|
|
204
|
+
|
|
205
|
+
def writelines(self, lines: Iterable[str]) -> None:
|
|
206
|
+
# Process each line through our JSON logging logic
|
|
207
|
+
for line in lines:
|
|
208
|
+
self.write(line)
|
|
209
|
+
|
|
210
|
+
def close(self) -> None:
|
|
211
|
+
pass
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
def context_provider() -> dict[str, str]:
|
|
215
|
+
"""
|
|
216
|
+
Provides context information for logging, including caller function name.
|
|
217
|
+
Finds the function name from the bottom of the stack, ignoring Python builtin
|
|
218
|
+
libraries and PySpark modules. Test packages are included.
|
|
219
|
+
|
|
220
|
+
Returns:
|
|
221
|
+
dict[str, str]: A dictionary containing context information including:
|
|
222
|
+
- func_name: Name of the function that initiated the logging
|
|
223
|
+
- class_name: Name of the class that initiated the logging if available
|
|
224
|
+
"""
|
|
225
|
+
|
|
226
|
+
def is_pyspark_module(module_name: str) -> bool:
|
|
227
|
+
return module_name.startswith("pyspark.") and ".tests." not in module_name
|
|
228
|
+
|
|
229
|
+
bottom: Optional[FrameType] = None
|
|
230
|
+
|
|
231
|
+
# Get caller function information using inspect
|
|
232
|
+
try:
|
|
233
|
+
frame = inspect.currentframe()
|
|
234
|
+
is_in_pyspark_module = False
|
|
235
|
+
|
|
236
|
+
if frame:
|
|
237
|
+
while frame.f_back:
|
|
238
|
+
f_back = frame.f_back
|
|
239
|
+
module_name = f_back.f_globals.get("__name__", "")
|
|
240
|
+
|
|
241
|
+
if is_pyspark_module(module_name):
|
|
242
|
+
if not is_in_pyspark_module:
|
|
243
|
+
bottom = frame
|
|
244
|
+
is_in_pyspark_module = True
|
|
245
|
+
else:
|
|
246
|
+
is_in_pyspark_module = False
|
|
247
|
+
|
|
248
|
+
frame = f_back
|
|
249
|
+
except Exception:
|
|
250
|
+
# If anything goes wrong with introspection, don't fail the logging
|
|
251
|
+
# Just continue without caller information
|
|
252
|
+
pass
|
|
253
|
+
|
|
254
|
+
context = {}
|
|
255
|
+
if bottom:
|
|
256
|
+
context["func_name"] = bottom.f_code.co_name
|
|
257
|
+
if "self" in bottom.f_locals:
|
|
258
|
+
context["class_name"] = bottom.f_locals["self"].__class__.__name__
|
|
259
|
+
elif "cls" in bottom.f_locals:
|
|
260
|
+
context["class_name"] = bottom.f_locals["cls"].__name__
|
|
261
|
+
return context
|
|
262
|
+
|
|
263
|
+
|
|
264
|
+
@contextmanager
|
|
265
|
+
def capture_outputs(
|
|
266
|
+
context_provider: Callable[[], dict[str, str]] = context_provider
|
|
267
|
+
) -> Generator[None, None, None]:
|
|
268
|
+
if "PYSPARK_SPARK_SESSION_UUID" in os.environ:
|
|
269
|
+
marker: str = "PYTHON_WORKER_LOGGING"
|
|
270
|
+
worker_id: str = str(os.getpid())
|
|
271
|
+
json_out = original_stdout = sys.stdout
|
|
272
|
+
delegate = original_stderr = sys.stderr
|
|
273
|
+
|
|
274
|
+
handler = logging.StreamHandler(json_out)
|
|
275
|
+
handler.setFormatter(JSONFormatterWithMarker(marker, worker_id, context_provider))
|
|
276
|
+
logger = logging.getLogger()
|
|
277
|
+
try:
|
|
278
|
+
sys.stdout = JsonOutput(
|
|
279
|
+
delegate, json_out, "stdout", logging.INFO, marker, worker_id, context_provider
|
|
280
|
+
)
|
|
281
|
+
sys.stderr = JsonOutput(
|
|
282
|
+
delegate, json_out, "stderr", logging.ERROR, marker, worker_id, context_provider
|
|
283
|
+
)
|
|
284
|
+
logger.addHandler(handler)
|
|
285
|
+
try:
|
|
286
|
+
yield
|
|
287
|
+
finally:
|
|
288
|
+
# Send an empty line to indicate the end of the outputs.
|
|
289
|
+
json_out.write(f"{marker}:{worker_id}:\n")
|
|
290
|
+
json_out.flush()
|
|
291
|
+
finally:
|
|
292
|
+
sys.stdout = original_stdout
|
|
293
|
+
sys.stderr = original_stderr
|
|
294
|
+
logger.removeHandler(handler)
|
|
295
|
+
handler.close()
|
|
296
|
+
else:
|
|
297
|
+
yield
|
|
@@ -38,7 +38,7 @@ from pyspark.pandas.exceptions import PandasNotImplementedError
|
|
|
38
38
|
MAX_MISSING_PARAMS_SIZE = 5
|
|
39
39
|
COMMON_PARAMETER_SET = {"kwargs", "args", "cls"}
|
|
40
40
|
MODULE_GROUP_MATCH = [(pd, ps), (pdw, psw), (pdg, psg)]
|
|
41
|
-
PANDAS_LATEST_VERSION = "2.3.
|
|
41
|
+
PANDAS_LATEST_VERSION = "2.3.3"
|
|
42
42
|
|
|
43
43
|
RST_HEADER = """
|
|
44
44
|
=====================
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Licensed to the Apache Software Foundation (ASF) under one or more
|
|
3
|
+
# contributor license agreements. See the NOTICE file distributed with
|
|
4
|
+
# this work for additional information regarding copyright ownership.
|
|
5
|
+
# The ASF licenses this file to You under the Apache License, Version 2.0
|
|
6
|
+
# (the "License"); you may not use this file except in compliance with
|
|
7
|
+
# the License. You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
12
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
+
# See the License for the specific language governing permissions and
|
|
15
|
+
# limitations under the License.
|
|
16
|
+
#
|
|
17
|
+
from contextlib import contextmanager
|
|
18
|
+
from typing import Generator, Optional
|
|
19
|
+
from pyspark.sql import SparkSession
|
|
20
|
+
|
|
21
|
+
from typing import Any, cast
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@contextmanager
|
|
25
|
+
def add_pipeline_analysis_context(
|
|
26
|
+
spark: SparkSession, dataflow_graph_id: str, flow_name: Optional[str]
|
|
27
|
+
) -> Generator[None, None, None]:
|
|
28
|
+
"""
|
|
29
|
+
Context manager that add PipelineAnalysisContext extension to the user context
|
|
30
|
+
used for pipeline specific analysis.
|
|
31
|
+
"""
|
|
32
|
+
extension_id = None
|
|
33
|
+
# Cast because mypy seems to think `spark` is a function, not an object.
|
|
34
|
+
# Likely related to SPARK-47544.
|
|
35
|
+
client = cast(Any, spark).client
|
|
36
|
+
try:
|
|
37
|
+
import pyspark.sql.connect.proto as pb2
|
|
38
|
+
from google.protobuf import any_pb2
|
|
39
|
+
|
|
40
|
+
analysis_context = pb2.PipelineAnalysisContext(
|
|
41
|
+
dataflow_graph_id=dataflow_graph_id, flow_name=flow_name
|
|
42
|
+
)
|
|
43
|
+
extension = any_pb2.Any()
|
|
44
|
+
extension.Pack(analysis_context)
|
|
45
|
+
extension_id = client.add_threadlocal_user_context_extension(extension)
|
|
46
|
+
yield
|
|
47
|
+
finally:
|
|
48
|
+
client.remove_user_context_extension(extension_id)
|