pyspark-client 4.1.0.dev2__tar.gz → 4.1.0.dev3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pyspark_client-4.1.0.dev2/pyspark_client.egg-info → pyspark_client-4.1.0.dev3}/PKG-INFO +1 -1
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/errors/error-conditions.json +18 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pipelines/__init__.py +2 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pipelines/api.py +48 -5
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pipelines/cli.py +17 -3
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pipelines/graph_element_registry.py +2 -2
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pipelines/init_cli.py +1 -0
- pyspark_client-4.1.0.dev2/pyspark/pipelines/dataset.py → pyspark_client-4.1.0.dev3/pyspark/pipelines/output.py +16 -7
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pipelines/source_code_location.py +28 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pipelines/spark_connect_graph_element_registry.py +68 -31
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pipelines/spark_connect_pipeline.py +4 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/avro/functions.py +3 -3
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/column.py +53 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/_typing.py +1 -1
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/client/artifact.py +55 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/client/core.py +72 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/column.py +18 -3
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/dataframe.py +17 -4
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/expressions.py +1 -1
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/functions/builtin.py +361 -22
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/group.py +12 -2
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/plan.py +79 -12
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/proto/base_pb2.py +120 -116
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/proto/base_pb2.pyi +164 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/proto/base_pb2_grpc.py +55 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/proto/catalog_pb2.py +2 -2
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/proto/commands_pb2.py +2 -2
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/proto/common_pb2.py +17 -15
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/proto/common_pb2.pyi +28 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/proto/example_plugins_pb2.py +2 -2
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/proto/expressions_pb2.py +2 -2
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/proto/ml_common_pb2.py +2 -2
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/proto/ml_pb2.py +2 -2
- pyspark_client-4.1.0.dev3/pyspark/sql/connect/proto/pipelines_pb2.py +130 -0
- pyspark_client-4.1.0.dev3/pyspark/sql/connect/proto/pipelines_pb2.pyi +1528 -0
- pyspark_client-4.1.0.dev3/pyspark/sql/connect/proto/relations_pb2.py +251 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/proto/relations_pb2.pyi +57 -1
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/proto/types_pb2.py +2 -2
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/session.py +78 -7
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/window.py +4 -1
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/conversion.py +30 -17
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/functions/__init__.py +9 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/functions/builtin.py +1044 -102
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/pandas/_typing/__init__.pyi +11 -1
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/pandas/functions.py +10 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/pandas/group_ops.py +72 -17
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/pandas/serializers.py +185 -10
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/pandas/typehints.py +91 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/streaming/list_state_client.py +10 -38
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/streaming/proto/StateMessage_pb2.py +3 -3
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/streaming/query.py +5 -2
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/types.py +204 -1
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/worker/data_source_pushdown_filters.py +3 -1
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/worker/plan_data_source_read.py +7 -2
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/worker/write_into_data_source.py +7 -2
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/testing/connectutils.py +32 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/testing/sqlutils.py +14 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/util.py +2 -0
- pyspark_client-4.1.0.dev3/pyspark/version.py +1 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/worker.py +139 -35
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3/pyspark_client.egg-info}/PKG-INFO +1 -1
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark_client.egg-info/SOURCES.txt +1 -1
- pyspark_client-4.1.0.dev2/pyspark/sql/connect/proto/pipelines_pb2.py +0 -94
- pyspark_client-4.1.0.dev2/pyspark/sql/connect/proto/pipelines_pb2.pyi +0 -877
- pyspark_client-4.1.0.dev2/pyspark/sql/connect/proto/relations_pb2.py +0 -249
- pyspark_client-4.1.0.dev2/pyspark/version.py +0 -1
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/MANIFEST.in +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/README.md +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/__init__.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/_globals.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/_typing.pyi +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/accumulators.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/cloudpickle/__init__.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/cloudpickle/cloudpickle.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/cloudpickle/cloudpickle_fast.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/conf.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/daemon.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/errors/__init__.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/errors/error_classes.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/errors/exceptions/__init__.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/errors/exceptions/base.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/errors/exceptions/captured.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/errors/exceptions/connect.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/errors/exceptions/tblib.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/errors/utils.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/errors_doc_gen.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/find_spark_home.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/install.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/instrumentation_utils.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/java_gateway.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/join.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/logger/__init__.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/logger/logger.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/loose_version.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/ml/__init__.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/ml/_typing.pyi +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/ml/base.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/ml/classification.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/ml/clustering.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/ml/common.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/ml/connect/__init__.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/ml/connect/base.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/ml/connect/classification.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/ml/connect/evaluation.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/ml/connect/feature.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/ml/connect/functions.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/ml/connect/io_utils.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/ml/connect/pipeline.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/ml/connect/proto.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/ml/connect/readwrite.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/ml/connect/serialize.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/ml/connect/summarizer.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/ml/connect/tuning.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/ml/connect/util.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/ml/deepspeed/__init__.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/ml/deepspeed/deepspeed_distributor.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/ml/dl_util.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/ml/evaluation.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/ml/feature.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/ml/fpm.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/ml/functions.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/ml/image.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/ml/linalg/__init__.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/ml/model_cache.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/ml/param/__init__.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/ml/param/_shared_params_code_gen.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/ml/param/shared.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/ml/pipeline.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/ml/recommendation.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/ml/regression.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/ml/stat.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/ml/torch/__init__.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/ml/torch/data.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/ml/torch/distributor.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/ml/torch/log_communication.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/ml/torch/torch_run_process_wrapper.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/ml/tree.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/ml/tuning.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/ml/util.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/ml/wrapper.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/mllib/__init__.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/mllib/_typing.pyi +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/mllib/classification.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/mllib/clustering.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/mllib/common.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/mllib/evaluation.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/mllib/feature.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/mllib/fpm.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/mllib/linalg/__init__.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/mllib/linalg/distributed.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/mllib/random.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/mllib/recommendation.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/mllib/regression.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/mllib/stat/KernelDensity.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/mllib/stat/__init__.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/mllib/stat/_statistics.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/mllib/stat/distribution.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/mllib/stat/test.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/mllib/tree.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/mllib/util.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pandas/__init__.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pandas/_typing.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pandas/accessors.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pandas/base.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pandas/categorical.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pandas/config.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pandas/correlation.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pandas/data_type_ops/__init__.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pandas/data_type_ops/base.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pandas/data_type_ops/binary_ops.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pandas/data_type_ops/boolean_ops.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pandas/data_type_ops/categorical_ops.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pandas/data_type_ops/complex_ops.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pandas/data_type_ops/date_ops.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pandas/data_type_ops/datetime_ops.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pandas/data_type_ops/null_ops.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pandas/data_type_ops/num_ops.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pandas/data_type_ops/string_ops.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pandas/data_type_ops/timedelta_ops.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pandas/data_type_ops/udt_ops.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pandas/datetimes.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pandas/exceptions.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pandas/extensions.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pandas/frame.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pandas/generic.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pandas/groupby.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pandas/indexes/__init__.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pandas/indexes/base.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pandas/indexes/category.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pandas/indexes/datetimes.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pandas/indexes/multi.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pandas/indexes/timedelta.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pandas/indexing.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pandas/internal.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pandas/missing/__init__.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pandas/missing/common.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pandas/missing/frame.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pandas/missing/general_functions.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pandas/missing/groupby.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pandas/missing/indexes.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pandas/missing/resample.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pandas/missing/scalars.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pandas/missing/series.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pandas/missing/window.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pandas/mlflow.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pandas/namespace.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pandas/numpy_compat.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pandas/plot/__init__.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pandas/plot/core.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pandas/plot/matplotlib.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pandas/plot/plotly.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pandas/resample.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pandas/series.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pandas/spark/__init__.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pandas/spark/accessors.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pandas/spark/utils.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pandas/sql_formatter.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pandas/sql_processor.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pandas/strings.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pandas/supported_api_gen.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pandas/testing.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pandas/typedef/__init__.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pandas/typedef/typehints.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pandas/usage_logging/__init__.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pandas/usage_logging/usage_logger.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pandas/utils.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pandas/window.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pipelines/block_connect_access.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pipelines/block_session_mutations.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pipelines/flow.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pipelines/logging_utils.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pipelines/type_error_utils.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/profiler.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/py.typed +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/rddsampler.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/resource/__init__.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/resource/information.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/resource/profile.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/resource/requests.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/resultiterable.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/serializers.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/shell.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/shuffle.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/__init__.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/_typing.pyi +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/avro/__init__.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/catalog.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/conf.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/__init__.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/avro/__init__.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/avro/functions.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/catalog.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/client/__init__.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/client/reattach.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/client/retries.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/conf.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/conversion.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/datasource.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/functions/__init__.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/functions/partitioning.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/logging.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/merge.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/observation.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/profiler.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/proto/__init__.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/proto/catalog_pb2.pyi +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/proto/commands_pb2.pyi +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/proto/example_plugins_pb2.pyi +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/proto/expressions_pb2.pyi +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/proto/ml_common_pb2.pyi +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/proto/ml_pb2.pyi +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/proto/types_pb2.pyi +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/protobuf/__init__.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/protobuf/functions.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/readwriter.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/resource/__init__.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/resource/profile.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/shell/__init__.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/shell/progress.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/sql_formatter.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/streaming/__init__.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/streaming/query.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/streaming/readwriter.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/streaming/worker/__init__.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/streaming/worker/foreach_batch_worker.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/streaming/worker/listener_worker.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/table_arg.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/tvf.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/types.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/udf.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/udtf.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/utils.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/context.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/dataframe.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/datasource.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/datasource_internal.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/functions/partitioning.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/group.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/internal.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/merge.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/metrics.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/observation.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/pandas/__init__.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/pandas/_typing/protocols/__init__.pyi +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/pandas/_typing/protocols/frame.pyi +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/pandas/_typing/protocols/series.pyi +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/pandas/conversion.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/pandas/functions.pyi +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/pandas/map_ops.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/pandas/types.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/pandas/utils.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/plot/__init__.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/plot/core.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/plot/plotly.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/profiler.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/protobuf/__init__.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/protobuf/functions.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/readwriter.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/session.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/sql_formatter.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/streaming/__init__.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/streaming/listener.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/streaming/map_state_client.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/streaming/proto/StateMessage_pb2.pyi +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/streaming/proto/__init__.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/streaming/python_streaming_source_runner.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/streaming/readwriter.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/streaming/state.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/streaming/stateful_processor.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/streaming/stateful_processor_api_client.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/streaming/stateful_processor_util.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/streaming/transform_with_state_driver_worker.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/streaming/value_state_client.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/table_arg.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/tvf.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/tvf_argument.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/udf.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/udtf.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/utils.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/variant_utils.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/window.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/worker/__init__.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/worker/analyze_udtf.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/worker/commit_data_source_write.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/worker/create_data_source.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/worker/lookup_data_sources.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/worker/python_streaming_sink_runner.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/statcounter.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/storagelevel.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/streaming/__init__.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/streaming/context.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/streaming/dstream.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/streaming/kinesis.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/streaming/listener.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/streaming/util.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/taskcontext.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/testing/__init__.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/testing/mllibutils.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/testing/mlutils.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/testing/objects.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/testing/pandasutils.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/testing/streamingutils.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/testing/utils.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/traceback_utils.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/worker_util.py +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark_client.egg-info/dependency_links.txt +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark_client.egg-info/requires.txt +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark_client.egg-info/top_level.txt +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/setup.cfg +0 -0
- {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/setup.py +0 -0
{pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/errors/error-conditions.json
RENAMED
|
@@ -1134,6 +1134,24 @@
|
|
|
1134
1134
|
"Cannot serialize the function `<name>`. If you accessed the Spark session, or a DataFrame defined outside of the function, or any object that contains a Spark session, please be aware that they are not allowed in Spark Connect. For `foreachBatch`, please access the Spark session using `df.sparkSession`, where `df` is the first parameter in your `foreachBatch` function. For `StreamingQueryListener`, please access the Spark session using `self.spark`. For details please check out the PySpark doc for `foreachBatch` and `StreamingQueryListener`."
|
|
1135
1135
|
]
|
|
1136
1136
|
},
|
|
1137
|
+
"ST_INVALID_ALGORITHM_VALUE" : {
|
|
1138
|
+
"message" : [
|
|
1139
|
+
"Invalid or unsupported edge interpolation algorithm value: '<alg>'."
|
|
1140
|
+
],
|
|
1141
|
+
"sqlState" : "22023"
|
|
1142
|
+
},
|
|
1143
|
+
"ST_INVALID_CRS_VALUE" : {
|
|
1144
|
+
"message" : [
|
|
1145
|
+
"Invalid or unsupported CRS (coordinate reference system) value: '<crs>'."
|
|
1146
|
+
],
|
|
1147
|
+
"sqlState" : "22023"
|
|
1148
|
+
},
|
|
1149
|
+
"ST_INVALID_SRID_VALUE" : {
|
|
1150
|
+
"message" : [
|
|
1151
|
+
"Invalid or unsupported SRID (spatial reference identifier) value: <srid>."
|
|
1152
|
+
],
|
|
1153
|
+
"sqlState" : "22023"
|
|
1154
|
+
},
|
|
1137
1155
|
"TEST_CLASS_NOT_COMPILED": {
|
|
1138
1156
|
"message": [
|
|
1139
1157
|
"<test_class_path> doesn't exist. Spark sql test classes are not compiled."
|
|
@@ -23,10 +23,11 @@ from pyspark.pipelines.flow import Flow, QueryFunction
|
|
|
23
23
|
from pyspark.pipelines.source_code_location import (
|
|
24
24
|
get_caller_source_code_location,
|
|
25
25
|
)
|
|
26
|
-
from pyspark.pipelines.
|
|
26
|
+
from pyspark.pipelines.output import (
|
|
27
27
|
MaterializedView,
|
|
28
28
|
StreamingTable,
|
|
29
29
|
TemporaryView,
|
|
30
|
+
Sink,
|
|
30
31
|
)
|
|
31
32
|
from pyspark.sql.types import StructType
|
|
32
33
|
|
|
@@ -156,7 +157,7 @@ def table(
|
|
|
156
157
|
|
|
157
158
|
resolved_name = name or decorated.__name__
|
|
158
159
|
registry = get_active_graph_element_registry()
|
|
159
|
-
registry.
|
|
160
|
+
registry.register_output(
|
|
160
161
|
StreamingTable(
|
|
161
162
|
comment=comment,
|
|
162
163
|
name=resolved_name,
|
|
@@ -258,7 +259,7 @@ def materialized_view(
|
|
|
258
259
|
|
|
259
260
|
resolved_name = name or decorated.__name__
|
|
260
261
|
registry = get_active_graph_element_registry()
|
|
261
|
-
registry.
|
|
262
|
+
registry.register_output(
|
|
262
263
|
MaterializedView(
|
|
263
264
|
comment=comment,
|
|
264
265
|
name=resolved_name,
|
|
@@ -351,7 +352,7 @@ def temporary_view(
|
|
|
351
352
|
|
|
352
353
|
resolved_name = name or decorated.__name__
|
|
353
354
|
registry = get_active_graph_element_registry()
|
|
354
|
-
registry.
|
|
355
|
+
registry.register_output(
|
|
355
356
|
TemporaryView(
|
|
356
357
|
comment=comment,
|
|
357
358
|
name=resolved_name,
|
|
@@ -446,4 +447,46 @@ def create_streaming_table(
|
|
|
446
447
|
schema=schema,
|
|
447
448
|
format=format,
|
|
448
449
|
)
|
|
449
|
-
get_active_graph_element_registry().
|
|
450
|
+
get_active_graph_element_registry().register_output(table)
|
|
451
|
+
|
|
452
|
+
|
|
453
|
+
def create_sink(
|
|
454
|
+
name: str,
|
|
455
|
+
format: str,
|
|
456
|
+
options: Optional[Dict[str, str]] = None,
|
|
457
|
+
) -> None:
|
|
458
|
+
"""
|
|
459
|
+
Creates a sink that can be targeted by streaming flows, providing a generic destination \
|
|
460
|
+
for flows to send data external to the pipeline.
|
|
461
|
+
|
|
462
|
+
:param name: The name of the sink.
|
|
463
|
+
:param format: The format of the sink, e.g. "parquet".
|
|
464
|
+
:param options: A dict where the keys are the property names and the values are the \
|
|
465
|
+
property values. These properties will be set on the sink.
|
|
466
|
+
"""
|
|
467
|
+
if type(name) is not str:
|
|
468
|
+
raise PySparkTypeError(
|
|
469
|
+
errorClass="NOT_STR",
|
|
470
|
+
messageParameters={"arg_name": "name", "arg_type": type(name).__name__},
|
|
471
|
+
)
|
|
472
|
+
if type(format) is not str:
|
|
473
|
+
raise PySparkTypeError(
|
|
474
|
+
errorClass="NOT_STR",
|
|
475
|
+
messageParameters={"arg_name": "format", "arg_type": type(format).__name__},
|
|
476
|
+
)
|
|
477
|
+
if options is not None and not isinstance(options, dict):
|
|
478
|
+
raise PySparkTypeError(
|
|
479
|
+
errorClass="NOT_DICT",
|
|
480
|
+
messageParameters={
|
|
481
|
+
"arg_name": "options",
|
|
482
|
+
"arg_type": type(options).__name__,
|
|
483
|
+
},
|
|
484
|
+
)
|
|
485
|
+
sink = Sink(
|
|
486
|
+
name=name,
|
|
487
|
+
format=format,
|
|
488
|
+
options=options or {},
|
|
489
|
+
source_code_location=get_caller_source_code_location(stacklevel=1),
|
|
490
|
+
comment=None,
|
|
491
|
+
)
|
|
492
|
+
get_active_graph_element_registry().register_output(sink)
|
|
@@ -90,6 +90,7 @@ class PipelineSpec:
|
|
|
90
90
|
"""Spec for a pipeline.
|
|
91
91
|
|
|
92
92
|
:param name: The name of the pipeline.
|
|
93
|
+
:param storage: The root directory for storing metadata, such as streaming checkpoints.
|
|
93
94
|
:param catalog: The default catalog to use for the pipeline.
|
|
94
95
|
:param database: The default database to use for the pipeline.
|
|
95
96
|
:param configuration: A dictionary of Spark configuration properties to set for the pipeline.
|
|
@@ -97,6 +98,7 @@ class PipelineSpec:
|
|
|
97
98
|
"""
|
|
98
99
|
|
|
99
100
|
name: str
|
|
101
|
+
storage: str
|
|
100
102
|
catalog: Optional[str]
|
|
101
103
|
database: Optional[str]
|
|
102
104
|
configuration: Mapping[str, str]
|
|
@@ -150,8 +152,16 @@ def load_pipeline_spec(spec_path: Path) -> PipelineSpec:
|
|
|
150
152
|
|
|
151
153
|
|
|
152
154
|
def unpack_pipeline_spec(spec_data: Mapping[str, Any]) -> PipelineSpec:
|
|
153
|
-
ALLOWED_FIELDS = {
|
|
154
|
-
|
|
155
|
+
ALLOWED_FIELDS = {
|
|
156
|
+
"name",
|
|
157
|
+
"storage",
|
|
158
|
+
"catalog",
|
|
159
|
+
"database",
|
|
160
|
+
"schema",
|
|
161
|
+
"configuration",
|
|
162
|
+
"libraries",
|
|
163
|
+
}
|
|
164
|
+
REQUIRED_FIELDS = ["name", "storage"]
|
|
155
165
|
for key in spec_data.keys():
|
|
156
166
|
if key not in ALLOWED_FIELDS:
|
|
157
167
|
raise PySparkException(
|
|
@@ -167,6 +177,7 @@ def unpack_pipeline_spec(spec_data: Mapping[str, Any]) -> PipelineSpec:
|
|
|
167
177
|
|
|
168
178
|
return PipelineSpec(
|
|
169
179
|
name=spec_data["name"],
|
|
180
|
+
storage=spec_data["storage"],
|
|
170
181
|
catalog=spec_data.get("catalog"),
|
|
171
182
|
database=spec_data.get("database", spec_data.get("schema")),
|
|
172
183
|
configuration=validate_str_dict(spec_data.get("configuration", {}), "configuration"),
|
|
@@ -295,7 +306,9 @@ def run(
|
|
|
295
306
|
spec = load_pipeline_spec(spec_path)
|
|
296
307
|
|
|
297
308
|
log_with_curr_timestamp("Creating Spark session...")
|
|
298
|
-
spark_builder = SparkSession.builder
|
|
309
|
+
spark_builder = SparkSession.builder.config(
|
|
310
|
+
"spark.sql.connect.serverStacktrace.enabled", "false"
|
|
311
|
+
)
|
|
299
312
|
for key, value in spec.configuration.items():
|
|
300
313
|
spark_builder = spark_builder.config(key, value)
|
|
301
314
|
|
|
@@ -321,6 +334,7 @@ def run(
|
|
|
321
334
|
full_refresh_all=full_refresh_all,
|
|
322
335
|
refresh=refresh,
|
|
323
336
|
dry=dry,
|
|
337
|
+
storage=spec.storage,
|
|
324
338
|
)
|
|
325
339
|
try:
|
|
326
340
|
handle_pipeline_events(result_iter)
|
{pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pipelines/graph_element_registry.py
RENAMED
|
@@ -18,7 +18,7 @@
|
|
|
18
18
|
from abc import ABC, abstractmethod
|
|
19
19
|
from pathlib import Path
|
|
20
20
|
|
|
21
|
-
from pyspark.pipelines.
|
|
21
|
+
from pyspark.pipelines.output import Output
|
|
22
22
|
from pyspark.pipelines.flow import Flow
|
|
23
23
|
from contextlib import contextmanager
|
|
24
24
|
from contextvars import ContextVar
|
|
@@ -35,7 +35,7 @@ class GraphElementRegistry(ABC):
|
|
|
35
35
|
"""
|
|
36
36
|
|
|
37
37
|
@abstractmethod
|
|
38
|
-
def
|
|
38
|
+
def register_output(self, output: Output) -> None:
|
|
39
39
|
"""Add the given dataset to the registry."""
|
|
40
40
|
|
|
41
41
|
@abstractmethod
|
|
@@ -22,12 +22,12 @@ from pyspark.sql.types import StructType
|
|
|
22
22
|
|
|
23
23
|
|
|
24
24
|
@dataclass(frozen=True)
|
|
25
|
-
class
|
|
26
|
-
"""Base class for definitions of
|
|
25
|
+
class Output:
|
|
26
|
+
"""Base class for definitions of outputs in a pipeline dataflow graph.
|
|
27
27
|
|
|
28
|
-
:param name: The name of the
|
|
29
|
-
:param comment: Optional comment for the
|
|
30
|
-
:param source_code_location: The location of the source code that created this
|
|
28
|
+
:param name: The name of the outputs. May be a multi-part name, such as "db.table".
|
|
29
|
+
:param comment: Optional comment for the output.
|
|
30
|
+
:param source_code_location: The location of the source code that created this output.
|
|
31
31
|
This is used for debugging and tracing purposes.
|
|
32
32
|
"""
|
|
33
33
|
|
|
@@ -37,7 +37,7 @@ class Dataset:
|
|
|
37
37
|
|
|
38
38
|
|
|
39
39
|
@dataclass(frozen=True)
|
|
40
|
-
class Table(
|
|
40
|
+
class Table(Output):
|
|
41
41
|
"""
|
|
42
42
|
Definition of a table in a pipeline dataflow graph, i.e. a catalog object backed by data in
|
|
43
43
|
physical storage.
|
|
@@ -69,8 +69,17 @@ class StreamingTable(Table):
|
|
|
69
69
|
|
|
70
70
|
|
|
71
71
|
@dataclass(frozen=True)
|
|
72
|
-
class TemporaryView(
|
|
72
|
+
class TemporaryView(Output):
|
|
73
73
|
"""Definition of a temporary view in a pipeline dataflow graph. Temporary views can be
|
|
74
74
|
referenced by flows within the dataflow graph, but are not visible outside of the graph."""
|
|
75
75
|
|
|
76
76
|
pass
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
@dataclass(frozen=True)
|
|
80
|
+
class Sink(Output):
|
|
81
|
+
"""Definition of an external sink in a pipeline dataflow graph. An external sink's
|
|
82
|
+
contents are written to an external system rather than managed by the pipeline itself."""
|
|
83
|
+
|
|
84
|
+
format: str
|
|
85
|
+
options: Mapping[str, str]
|
{pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pipelines/source_code_location.py
RENAMED
|
@@ -30,6 +30,34 @@ def get_caller_source_code_location(stacklevel: int) -> SourceCodeLocation:
|
|
|
30
30
|
"""
|
|
31
31
|
Returns a SourceCodeLocation object representing the location code that invokes this function.
|
|
32
32
|
|
|
33
|
+
If this function is called from a decorator (ex. @sdp.table), note that the returned line
|
|
34
|
+
number is affected by how the decorator was triggered - i.e. whether @sdp.table or @sdp.table()
|
|
35
|
+
was called - AND what python version is being used
|
|
36
|
+
|
|
37
|
+
Case 1:
|
|
38
|
+
|@sdp.table()
|
|
39
|
+
|def fn
|
|
40
|
+
|
|
41
|
+
@sdp.table() is executed immediately, on line 1. This is true for all python versions.
|
|
42
|
+
|
|
43
|
+
Case 2:
|
|
44
|
+
|@sdp.table
|
|
45
|
+
|def fn
|
|
46
|
+
|
|
47
|
+
In python < 3.10, @sdp.table will expand to fn = sdp.table(fn), replacing the line that `fn` is
|
|
48
|
+
defined on. This would be line 2. More interestingly, this means:
|
|
49
|
+
|
|
50
|
+
|@sdp.table
|
|
51
|
+
|
|
|
52
|
+
|
|
|
53
|
+
|def fn
|
|
54
|
+
|
|
55
|
+
Will expand to fn = sdp.table(fn) on line 4, where `fn` is defined.
|
|
56
|
+
|
|
57
|
+
However, in python 3.10+, the line number in the stack trace will still be the line that the
|
|
58
|
+
decorator was defined on. In other words, case 2 will be treated the same as case 1, and the
|
|
59
|
+
line number will be 1.
|
|
60
|
+
|
|
33
61
|
:param stacklevel: The number of stack frames to go up. 0 means the direct caller of this
|
|
34
62
|
function, 1 means the caller of the caller, and so on.
|
|
35
63
|
"""
|
|
@@ -20,21 +20,25 @@ from pyspark.errors import PySparkTypeError
|
|
|
20
20
|
from pyspark.sql import SparkSession
|
|
21
21
|
from pyspark.sql.connect.dataframe import DataFrame as ConnectDataFrame
|
|
22
22
|
from pyspark.pipelines.block_connect_access import block_spark_connect_execution_and_analysis
|
|
23
|
-
from pyspark.pipelines.
|
|
24
|
-
|
|
23
|
+
from pyspark.pipelines.output import (
|
|
24
|
+
Output,
|
|
25
25
|
MaterializedView,
|
|
26
26
|
Table,
|
|
27
|
+
Sink,
|
|
27
28
|
StreamingTable,
|
|
28
29
|
TemporaryView,
|
|
29
30
|
)
|
|
30
31
|
from pyspark.pipelines.flow import Flow
|
|
31
32
|
from pyspark.pipelines.graph_element_registry import GraphElementRegistry
|
|
33
|
+
from pyspark.pipelines.source_code_location import SourceCodeLocation
|
|
34
|
+
from pyspark.sql.connect.types import pyspark_types_to_proto_types
|
|
35
|
+
from pyspark.sql.types import StructType
|
|
32
36
|
from typing import Any, cast
|
|
33
37
|
import pyspark.sql.connect.proto as pb2
|
|
34
38
|
|
|
35
39
|
|
|
36
40
|
class SparkConnectGraphElementRegistry(GraphElementRegistry):
|
|
37
|
-
"""Registers
|
|
41
|
+
"""Registers outputs and flows in a dataflow graph held in a Spark Connect server."""
|
|
38
42
|
|
|
39
43
|
def __init__(self, spark: SparkSession, dataflow_graph_id: str) -> None:
|
|
40
44
|
# Cast because mypy seems to think `spark`` is a function, not an object. Likely related to
|
|
@@ -42,46 +46,66 @@ class SparkConnectGraphElementRegistry(GraphElementRegistry):
|
|
|
42
46
|
self._client = cast(Any, spark).client
|
|
43
47
|
self._dataflow_graph_id = dataflow_graph_id
|
|
44
48
|
|
|
45
|
-
def
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
schema
|
|
50
|
-
|
|
49
|
+
def register_output(self, output: Output) -> None:
|
|
50
|
+
table_details = None
|
|
51
|
+
sink_details = None
|
|
52
|
+
if isinstance(output, Table):
|
|
53
|
+
if isinstance(output.schema, str):
|
|
54
|
+
schema_string = output.schema
|
|
55
|
+
schema_data_type = None
|
|
56
|
+
elif isinstance(output.schema, StructType):
|
|
57
|
+
schema_string = None
|
|
58
|
+
schema_data_type = pyspark_types_to_proto_types(output.schema)
|
|
59
|
+
else:
|
|
60
|
+
schema_string = None
|
|
61
|
+
schema_data_type = None
|
|
62
|
+
|
|
63
|
+
table_details = pb2.PipelineCommand.DefineOutput.TableDetails(
|
|
64
|
+
table_properties=output.table_properties,
|
|
65
|
+
partition_cols=output.partition_cols,
|
|
66
|
+
format=output.format,
|
|
67
|
+
# Even though schema_string is not required, the generated Python code seems to
|
|
68
|
+
# erroneously think it is required.
|
|
69
|
+
schema_string=schema_string, # type: ignore[arg-type]
|
|
70
|
+
schema_data_type=schema_data_type,
|
|
71
|
+
)
|
|
51
72
|
|
|
52
|
-
if isinstance(
|
|
53
|
-
|
|
54
|
-
elif isinstance(
|
|
55
|
-
|
|
73
|
+
if isinstance(output, MaterializedView):
|
|
74
|
+
output_type = pb2.OutputType.MATERIALIZED_VIEW
|
|
75
|
+
elif isinstance(output, StreamingTable):
|
|
76
|
+
output_type = pb2.OutputType.TABLE
|
|
56
77
|
else:
|
|
57
78
|
raise PySparkTypeError(
|
|
58
79
|
errorClass="UNSUPPORTED_PIPELINES_DATASET_TYPE",
|
|
59
|
-
messageParameters={"
|
|
80
|
+
messageParameters={"output_type": type(output).__name__},
|
|
60
81
|
)
|
|
61
|
-
elif isinstance(
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
82
|
+
elif isinstance(output, TemporaryView):
|
|
83
|
+
output_type = pb2.OutputType.TEMPORARY_VIEW
|
|
84
|
+
table_details = None
|
|
85
|
+
elif isinstance(output, Sink):
|
|
86
|
+
output_type = pb2.OutputType.SINK
|
|
87
|
+
sink_details = pb2.PipelineCommand.DefineOutput.SinkDetails(
|
|
88
|
+
options=output.options,
|
|
89
|
+
format=output.format,
|
|
90
|
+
)
|
|
67
91
|
else:
|
|
68
92
|
raise PySparkTypeError(
|
|
69
93
|
errorClass="UNSUPPORTED_PIPELINES_DATASET_TYPE",
|
|
70
|
-
messageParameters={"
|
|
94
|
+
messageParameters={"output_type": type(output).__name__},
|
|
71
95
|
)
|
|
72
96
|
|
|
73
|
-
inner_command = pb2.PipelineCommand.
|
|
97
|
+
inner_command = pb2.PipelineCommand.DefineOutput(
|
|
74
98
|
dataflow_graph_id=self._dataflow_graph_id,
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
comment=
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
format=format,
|
|
99
|
+
output_name=output.name,
|
|
100
|
+
output_type=output_type,
|
|
101
|
+
comment=output.comment,
|
|
102
|
+
sink_details=sink_details,
|
|
103
|
+
table_details=table_details,
|
|
104
|
+
source_code_location=source_code_location_to_proto(output.source_code_location),
|
|
82
105
|
)
|
|
106
|
+
|
|
83
107
|
command = pb2.Command()
|
|
84
|
-
command.pipeline_command.
|
|
108
|
+
command.pipeline_command.define_output.CopyFrom(inner_command)
|
|
85
109
|
self._client.execute_command(command)
|
|
86
110
|
|
|
87
111
|
def register_flow(self, flow: Flow) -> None:
|
|
@@ -89,12 +113,17 @@ class SparkConnectGraphElementRegistry(GraphElementRegistry):
|
|
|
89
113
|
df = flow.func()
|
|
90
114
|
relation = cast(ConnectDataFrame, df)._plan.plan(self._client)
|
|
91
115
|
|
|
116
|
+
relation_flow_details = pb2.PipelineCommand.DefineFlow.WriteRelationFlowDetails(
|
|
117
|
+
relation=relation,
|
|
118
|
+
)
|
|
119
|
+
|
|
92
120
|
inner_command = pb2.PipelineCommand.DefineFlow(
|
|
93
121
|
dataflow_graph_id=self._dataflow_graph_id,
|
|
94
122
|
flow_name=flow.name,
|
|
95
123
|
target_dataset_name=flow.target,
|
|
96
|
-
|
|
124
|
+
relation_flow_details=relation_flow_details,
|
|
97
125
|
sql_conf=flow.spark_conf,
|
|
126
|
+
source_code_location=source_code_location_to_proto(flow.source_code_location),
|
|
98
127
|
)
|
|
99
128
|
command = pb2.Command()
|
|
100
129
|
command.pipeline_command.define_flow.CopyFrom(inner_command)
|
|
@@ -109,3 +138,11 @@ class SparkConnectGraphElementRegistry(GraphElementRegistry):
|
|
|
109
138
|
command = pb2.Command()
|
|
110
139
|
command.pipeline_command.define_sql_graph_elements.CopyFrom(inner_command)
|
|
111
140
|
self._client.execute_command(command)
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def source_code_location_to_proto(
|
|
144
|
+
source_code_location: SourceCodeLocation,
|
|
145
|
+
) -> pb2.SourceCodeLocation:
|
|
146
|
+
return pb2.SourceCodeLocation(
|
|
147
|
+
file_name=source_code_location.filename, line_number=source_code_location.line_number
|
|
148
|
+
)
|
{pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pipelines/spark_connect_pipeline.py
RENAMED
|
@@ -72,6 +72,7 @@ def start_run(
|
|
|
72
72
|
full_refresh_all: bool,
|
|
73
73
|
refresh: Optional[Sequence[str]],
|
|
74
74
|
dry: bool,
|
|
75
|
+
storage: str,
|
|
75
76
|
) -> Iterator[Dict[str, Any]]:
|
|
76
77
|
"""Start a run of the dataflow graph in the Spark Connect server.
|
|
77
78
|
|
|
@@ -79,6 +80,8 @@ def start_run(
|
|
|
79
80
|
:param full_refresh: List of datasets to reset and recompute.
|
|
80
81
|
:param full_refresh_all: Perform a full graph reset and recompute.
|
|
81
82
|
:param refresh: List of datasets to update.
|
|
83
|
+
:param dry: If true, the run will not actually execute any flows, but only validate the graph.
|
|
84
|
+
:param storage: The storage location to store metadata such as streaming checkpoints.
|
|
82
85
|
"""
|
|
83
86
|
inner_command = pb2.PipelineCommand.StartRun(
|
|
84
87
|
dataflow_graph_id=dataflow_graph_id,
|
|
@@ -86,6 +89,7 @@ def start_run(
|
|
|
86
89
|
full_refresh_all=full_refresh_all,
|
|
87
90
|
refresh_selection=refresh or [],
|
|
88
91
|
dry=dry,
|
|
92
|
+
storage=storage,
|
|
89
93
|
)
|
|
90
94
|
command = pb2.Command()
|
|
91
95
|
command.pipeline_command.start_run.CopyFrom(inner_command)
|
|
@@ -69,7 +69,7 @@ def from_avro(
|
|
|
69
69
|
>>> df = spark.createDataFrame(data, ("key", "value"))
|
|
70
70
|
>>> avroDf = df.select(to_avro(df.value).alias("avro"))
|
|
71
71
|
>>> avroDf.collect()
|
|
72
|
-
[Row(avro=
|
|
72
|
+
[Row(avro=b'\\x00\\x00\\x04\\x00\\nAlice')]
|
|
73
73
|
|
|
74
74
|
>>> jsonFormatSchema = '''{"type":"record","name":"topLevelRecord","fields":
|
|
75
75
|
... [{"name":"avro","type":[{"type":"record","name":"value","namespace":"topLevelRecord",
|
|
@@ -141,12 +141,12 @@ def to_avro(data: "ColumnOrName", jsonFormatSchema: str = "") -> Column:
|
|
|
141
141
|
>>> data = ['SPADES']
|
|
142
142
|
>>> df = spark.createDataFrame(data, "string")
|
|
143
143
|
>>> df.select(to_avro(df.value).alias("suite")).collect()
|
|
144
|
-
[Row(suite=
|
|
144
|
+
[Row(suite=b'\\x00\\x0cSPADES')]
|
|
145
145
|
|
|
146
146
|
>>> jsonFormatSchema = '''["null", {"type": "enum", "name": "value",
|
|
147
147
|
... "symbols": ["SPADES", "HEARTS", "DIAMONDS", "CLUBS"]}]'''
|
|
148
148
|
>>> df.select(to_avro(df.value, jsonFormatSchema).alias("suite")).collect()
|
|
149
|
-
[Row(suite=
|
|
149
|
+
[Row(suite=b'\\x02\\x00')]
|
|
150
150
|
"""
|
|
151
151
|
from py4j.java_gateway import JVMView
|
|
152
152
|
from pyspark.sql.classic.column import _to_java_column
|
|
@@ -21,6 +21,7 @@ import sys
|
|
|
21
21
|
from typing import (
|
|
22
22
|
overload,
|
|
23
23
|
Any,
|
|
24
|
+
Callable,
|
|
24
25
|
TYPE_CHECKING,
|
|
25
26
|
Union,
|
|
26
27
|
)
|
|
@@ -1538,6 +1539,58 @@ class Column(TableValuedFunctionArgument):
|
|
|
1538
1539
|
"""
|
|
1539
1540
|
...
|
|
1540
1541
|
|
|
1542
|
+
@dispatch_col_method
|
|
1543
|
+
def transform(self, f: Callable[["Column"], "Column"]) -> "Column":
|
|
1544
|
+
"""
|
|
1545
|
+
Applies a transformation function to this column.
|
|
1546
|
+
|
|
1547
|
+
This method allows you to apply a function that takes a Column and returns a Column,
|
|
1548
|
+
enabling method chaining and functional transformations.
|
|
1549
|
+
|
|
1550
|
+
.. versionadded:: 4.1.0
|
|
1551
|
+
|
|
1552
|
+
Parameters
|
|
1553
|
+
----------
|
|
1554
|
+
f : callable
|
|
1555
|
+
A function that takes a :class:`Column` and returns a :class:`Column`.
|
|
1556
|
+
|
|
1557
|
+
Returns
|
|
1558
|
+
-------
|
|
1559
|
+
:class:`Column`
|
|
1560
|
+
The result of applying the function to this column.
|
|
1561
|
+
|
|
1562
|
+
Examples
|
|
1563
|
+
--------
|
|
1564
|
+
Example 1: Chain built-in functions
|
|
1565
|
+
|
|
1566
|
+
>>> from pyspark.sql.functions import trim, upper
|
|
1567
|
+
>>> df = spark.createDataFrame([(" hello ",), (" world ",)], ["text"])
|
|
1568
|
+
>>> df.select(df.text.transform(trim).transform(upper).alias("result")).show()
|
|
1569
|
+
+------+
|
|
1570
|
+
|result|
|
|
1571
|
+
+------+
|
|
1572
|
+
| HELLO|
|
|
1573
|
+
| WORLD|
|
|
1574
|
+
+------+
|
|
1575
|
+
|
|
1576
|
+
Example 2: Use lambda functions
|
|
1577
|
+
|
|
1578
|
+
>>> df = spark.createDataFrame([(10,), (20,), (30,)], ["value"])
|
|
1579
|
+
>>> df.select(
|
|
1580
|
+
... df.value.transform(lambda c: c + 5)
|
|
1581
|
+
... .transform(lambda c: c * 2)
|
|
1582
|
+
... .transform(lambda c: c - 10).alias("result")
|
|
1583
|
+
... ).show()
|
|
1584
|
+
+------+
|
|
1585
|
+
|result|
|
|
1586
|
+
+------+
|
|
1587
|
+
| 20|
|
|
1588
|
+
| 40|
|
|
1589
|
+
| 60|
|
|
1590
|
+
+------+
|
|
1591
|
+
"""
|
|
1592
|
+
...
|
|
1593
|
+
|
|
1541
1594
|
@dispatch_col_method
|
|
1542
1595
|
def outer(self) -> "Column":
|
|
1543
1596
|
"""
|
{pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/client/artifact.py
RENAMED
|
@@ -427,6 +427,30 @@ class ArtifactManager:
|
|
|
427
427
|
status = resp.statuses.get(artifactName)
|
|
428
428
|
return status.exists if status is not None else False
|
|
429
429
|
|
|
430
|
+
def get_cached_artifacts(self, hashes: list[str]) -> set[str]:
|
|
431
|
+
"""
|
|
432
|
+
Batch check which artifacts are already cached on the server.
|
|
433
|
+
Returns a set of hashes that are already cached.
|
|
434
|
+
"""
|
|
435
|
+
if not hashes:
|
|
436
|
+
return set()
|
|
437
|
+
|
|
438
|
+
artifact_names = [f"{CACHE_PREFIX}/{hash}" for hash in hashes]
|
|
439
|
+
request = proto.ArtifactStatusesRequest(
|
|
440
|
+
user_context=self._user_context, session_id=self._session_id, names=artifact_names
|
|
441
|
+
)
|
|
442
|
+
resp: proto.ArtifactStatusesResponse = self._stub.ArtifactStatus(
|
|
443
|
+
request, metadata=self._metadata
|
|
444
|
+
)
|
|
445
|
+
|
|
446
|
+
cached = set()
|
|
447
|
+
for hash in hashes:
|
|
448
|
+
artifact_name = f"{CACHE_PREFIX}/{hash}"
|
|
449
|
+
status = resp.statuses.get(artifact_name)
|
|
450
|
+
if status is not None and status.exists:
|
|
451
|
+
cached.add(hash)
|
|
452
|
+
return cached
|
|
453
|
+
|
|
430
454
|
def cache_artifact(self, blob: bytes) -> str:
|
|
431
455
|
"""
|
|
432
456
|
Cache the give blob at the session.
|
|
@@ -442,3 +466,34 @@ class ArtifactManager:
|
|
|
442
466
|
# TODO(SPARK-42658): Handle responses containing CRC failures.
|
|
443
467
|
|
|
444
468
|
return hash
|
|
469
|
+
|
|
470
|
+
def cache_artifacts(self, blobs: list[bytes]) -> list[str]:
|
|
471
|
+
"""
|
|
472
|
+
Cache the given blobs at the session.
|
|
473
|
+
|
|
474
|
+
This method batches artifact status checks and uploads to minimize RPC overhead.
|
|
475
|
+
"""
|
|
476
|
+
# Compute hashes for all blobs upfront
|
|
477
|
+
hashes = [hashlib.sha256(blob).hexdigest() for blob in blobs]
|
|
478
|
+
unique_hashes = list(set(hashes))
|
|
479
|
+
|
|
480
|
+
# Batch check which artifacts are already cached
|
|
481
|
+
cached_hashes = self.get_cached_artifacts(unique_hashes)
|
|
482
|
+
|
|
483
|
+
# Collect unique artifacts that need to be uploaded
|
|
484
|
+
seen_hashes = set()
|
|
485
|
+
artifacts_to_add = []
|
|
486
|
+
for blob, hash in zip(blobs, hashes):
|
|
487
|
+
if hash not in cached_hashes and hash not in seen_hashes:
|
|
488
|
+
artifacts_to_add.append(new_cache_artifact(hash, InMemory(blob)))
|
|
489
|
+
seen_hashes.add(hash)
|
|
490
|
+
|
|
491
|
+
# Batch upload all missing artifacts in a single RPC call
|
|
492
|
+
if artifacts_to_add:
|
|
493
|
+
requests = self._add_artifacts(artifacts_to_add)
|
|
494
|
+
response: proto.AddArtifactsResponse = self._retrieve_responses(requests)
|
|
495
|
+
summaries: List[proto.AddArtifactsResponse.ArtifactSummary] = []
|
|
496
|
+
for summary in response.artifacts:
|
|
497
|
+
summaries.append(summary)
|
|
498
|
+
# TODO(SPARK-42658): Handle responses containing CRC failures.
|
|
499
|
+
return hashes
|