pyspark-client 4.2.0.dev1__tar.gz → 4.2.0.dev2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pyspark_client-4.2.0.dev1/pyspark_client.egg-info → pyspark_client-4.2.0.dev2}/PKG-INFO +2 -2
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/_typing.pyi +3 -3
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/errors/error-conditions.json +11 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/errors/exceptions/captured.py +18 -19
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/errors/utils.py +1 -1
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/instrumentation_utils.py +1 -1
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/ml/_typing.pyi +5 -3
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/ml/functions.py +1 -1
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/ml/linalg/__init__.py +29 -24
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/mllib/_typing.pyi +6 -2
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/mllib/linalg/__init__.py +28 -24
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/mllib/linalg/distributed.py +1 -1
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/mllib/regression.py +1 -1
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/mllib/stat/_statistics.py +2 -1
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/mllib/util.py +1 -5
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/accessors.py +1 -1
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/base.py +1 -1
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/categorical.py +1 -1
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/data_type_ops/base.py +1 -1
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/data_type_ops/boolean_ops.py +1 -1
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/data_type_ops/categorical_ops.py +3 -3
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/data_type_ops/datetime_ops.py +8 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/data_type_ops/null_ops.py +1 -1
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/data_type_ops/num_ops.py +1 -1
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/data_type_ops/timedelta_ops.py +8 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/datetimes.py +4 -4
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/frame.py +175 -119
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/generic.py +6 -6
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/groupby.py +18 -7
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/indexes/base.py +8 -10
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/indexes/category.py +1 -1
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/indexes/datetimes.py +5 -5
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/indexes/multi.py +1 -1
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/indexes/timedelta.py +1 -1
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/indexing.py +4 -4
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/internal.py +2 -2
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/namespace.py +18 -11
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/plot/core.py +1 -1
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/plot/matplotlib.py +3 -3
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/resample.py +10 -7
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/series.py +33 -28
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/testing.py +7 -7
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/typedef/typehints.py +41 -12
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/usage_logging/__init__.py +2 -2
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/utils.py +7 -6
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/profiler.py +17 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/_typing.pyi +1 -1
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/client/artifact.py +12 -2
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/client/core.py +67 -4
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/client/reattach.py +3 -1
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/dataframe.py +20 -12
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/functions/builtin.py +109 -50
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/plan.py +6 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/proto/base_pb2.py +17 -7
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/proto/base_pb2.pyi +267 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/proto/base_pb2_grpc.py +47 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/proto/catalog_pb2.py +2 -2
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/proto/commands_pb2.py +72 -72
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/proto/commands_pb2.pyi +12 -1
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/proto/common_pb2.py +2 -2
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/proto/example_plugins_pb2.py +2 -2
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/proto/expressions_pb2.py +2 -2
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/proto/ml_common_pb2.py +2 -2
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/proto/ml_pb2.py +2 -2
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/proto/pipelines_pb2.py +2 -2
- pyspark_client-4.2.0.dev2/pyspark/sql/connect/proto/relations_pb2.py +251 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/proto/relations_pb2.pyi +19 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/proto/types_pb2.py +2 -2
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/session.py +49 -38
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/streaming/readwriter.py +41 -3
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/context.py +14 -1
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/conversion.py +539 -1
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/dataframe.py +40 -19
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/functions/__init__.py +3 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/functions/builtin.py +207 -57
- pyspark_client-4.2.0.dev2/pyspark/sql/interchange.py +89 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/pandas/conversion.py +138 -26
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/pandas/serializers.py +491 -515
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/pandas/types.py +104 -32
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/plot/core.py +1 -1
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/profiler.py +157 -34
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/session.py +60 -20
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/streaming/__init__.py +2 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/streaming/listener.py +3 -3
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/streaming/proto/StateMessage_pb2.py +2 -2
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/streaming/query.py +56 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/streaming/readwriter.py +70 -3
- pyspark_client-4.2.0.dev2/pyspark/sql/streaming/tws_tester.py +689 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/utils.py +12 -1
- pyspark_client-4.2.0.dev2/pyspark/sql/worker/analyze_udtf.py +250 -0
- pyspark_client-4.2.0.dev2/pyspark/sql/worker/commit_data_source_write.py +89 -0
- pyspark_client-4.2.0.dev2/pyspark/sql/worker/create_data_source.py +157 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/worker/data_source_pushdown_filters.py +79 -121
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/worker/lookup_data_sources.py +18 -57
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/worker/plan_data_source_read.py +86 -122
- pyspark_client-4.2.0.dev1/pyspark/sql/worker/commit_data_source_write.py → pyspark_client-4.2.0.dev2/pyspark/sql/worker/python_streaming_sink_runner.py +55 -58
- pyspark_client-4.2.0.dev2/pyspark/sql/worker/utils.py +94 -0
- pyspark_client-4.2.0.dev2/pyspark/sql/worker/write_into_data_source.py +252 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/taskcontext.py +44 -4
- pyspark_client-4.2.0.dev2/pyspark/testing/__init__.py +21 -0
- pyspark_client-4.2.0.dev2/pyspark/testing/goldenutils.py +254 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/testing/pandasutils.py +15 -1
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/testing/sqlutils.py +2 -2
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/testing/streamingutils.py +2 -2
- pyspark_client-4.2.0.dev2/pyspark/testing/unittestutils.py +55 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/testing/utils.py +48 -37
- pyspark_client-4.2.0.dev2/pyspark/threaddump.py +62 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/util.py +35 -11
- pyspark_client-4.2.0.dev2/pyspark/version.py +1 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/worker.py +145 -287
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/worker_util.py +33 -1
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2/pyspark_client.egg-info}/PKG-INFO +2 -2
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark_client.egg-info/SOURCES.txt +6 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark_client.egg-info/requires.txt +1 -1
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/setup.py +1 -1
- pyspark_client-4.2.0.dev1/pyspark/sql/connect/proto/relations_pb2.py +0 -251
- pyspark_client-4.2.0.dev1/pyspark/sql/worker/analyze_udtf.py +0 -288
- pyspark_client-4.2.0.dev1/pyspark/sql/worker/create_data_source.py +0 -193
- pyspark_client-4.2.0.dev1/pyspark/sql/worker/python_streaming_sink_runner.py +0 -159
- pyspark_client-4.2.0.dev1/pyspark/sql/worker/write_into_data_source.py +0 -285
- pyspark_client-4.2.0.dev1/pyspark/testing/__init__.py +0 -47
- pyspark_client-4.2.0.dev1/pyspark/version.py +0 -1
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/MANIFEST.in +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/README.md +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/__init__.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/_globals.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/accumulators.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/cloudpickle/__init__.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/cloudpickle/cloudpickle.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/cloudpickle/cloudpickle_fast.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/conf.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/daemon.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/errors/__init__.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/errors/error_classes.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/errors/exceptions/__init__.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/errors/exceptions/base.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/errors/exceptions/connect.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/errors/exceptions/tblib.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/errors_doc_gen.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/find_spark_home.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/install.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/java_gateway.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/join.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/logger/__init__.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/logger/logger.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/logger/worker_io.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/loose_version.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/ml/__init__.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/ml/base.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/ml/classification.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/ml/clustering.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/ml/common.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/ml/connect/__init__.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/ml/connect/base.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/ml/connect/classification.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/ml/connect/evaluation.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/ml/connect/feature.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/ml/connect/functions.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/ml/connect/io_utils.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/ml/connect/pipeline.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/ml/connect/proto.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/ml/connect/readwrite.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/ml/connect/serialize.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/ml/connect/summarizer.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/ml/connect/tuning.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/ml/connect/util.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/ml/deepspeed/__init__.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/ml/deepspeed/deepspeed_distributor.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/ml/dl_util.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/ml/evaluation.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/ml/feature.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/ml/fpm.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/ml/image.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/ml/model_cache.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/ml/param/__init__.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/ml/param/_shared_params_code_gen.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/ml/param/shared.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/ml/pipeline.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/ml/recommendation.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/ml/regression.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/ml/stat.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/ml/torch/__init__.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/ml/torch/data.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/ml/torch/distributor.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/ml/torch/log_communication.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/ml/torch/torch_run_process_wrapper.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/ml/tree.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/ml/tuning.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/ml/util.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/ml/wrapper.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/mllib/__init__.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/mllib/classification.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/mllib/clustering.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/mllib/common.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/mllib/evaluation.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/mllib/feature.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/mllib/fpm.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/mllib/random.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/mllib/recommendation.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/mllib/stat/KernelDensity.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/mllib/stat/__init__.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/mllib/stat/distribution.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/mllib/stat/test.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/mllib/tree.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/__init__.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/_typing.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/config.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/correlation.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/data_type_ops/__init__.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/data_type_ops/binary_ops.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/data_type_ops/complex_ops.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/data_type_ops/date_ops.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/data_type_ops/string_ops.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/data_type_ops/udt_ops.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/exceptions.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/extensions.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/indexes/__init__.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/missing/__init__.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/missing/common.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/missing/frame.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/missing/general_functions.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/missing/groupby.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/missing/indexes.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/missing/resample.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/missing/scalars.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/missing/series.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/missing/window.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/mlflow.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/numpy_compat.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/plot/__init__.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/plot/plotly.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/spark/__init__.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/spark/accessors.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/spark/utils.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/sql_formatter.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/sql_processor.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/strings.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/supported_api_gen.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/typedef/__init__.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/usage_logging/usage_logger.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/window.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pipelines/__init__.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pipelines/add_pipeline_analysis_context.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pipelines/api.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pipelines/block_session_mutations.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pipelines/cli.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pipelines/flow.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pipelines/graph_element_registry.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pipelines/init_cli.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pipelines/logging_utils.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pipelines/output.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pipelines/source_code_location.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pipelines/spark_connect_graph_element_registry.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pipelines/spark_connect_pipeline.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pipelines/type_error_utils.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/py.typed +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/rddsampler.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/resource/__init__.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/resource/information.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/resource/profile.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/resource/requests.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/resultiterable.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/serializers.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/shell.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/shuffle.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/__init__.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/avro/__init__.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/avro/functions.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/catalog.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/column.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/conf.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/__init__.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/_typing.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/avro/__init__.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/avro/functions.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/catalog.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/client/__init__.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/client/retries.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/column.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/conf.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/conversion.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/datasource.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/expressions.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/functions/__init__.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/functions/partitioning.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/group.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/logging.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/merge.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/observation.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/profiler.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/proto/__init__.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/proto/catalog_pb2.pyi +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/proto/common_pb2.pyi +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/proto/example_plugins_pb2.pyi +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/proto/expressions_pb2.pyi +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/proto/ml_common_pb2.pyi +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/proto/ml_pb2.pyi +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/proto/pipelines_pb2.pyi +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/proto/types_pb2.pyi +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/protobuf/__init__.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/protobuf/functions.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/readwriter.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/resource/__init__.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/resource/profile.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/shell/__init__.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/shell/progress.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/sql_formatter.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/streaming/__init__.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/streaming/query.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/streaming/worker/__init__.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/streaming/worker/foreach_batch_worker.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/streaming/worker/listener_worker.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/table_arg.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/tvf.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/types.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/udf.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/udtf.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/utils.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/window.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/datasource.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/datasource_internal.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/functions/partitioning.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/geo_utils.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/group.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/internal.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/merge.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/metrics.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/observation.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/pandas/__init__.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/pandas/_typing/__init__.pyi +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/pandas/_typing/protocols/__init__.pyi +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/pandas/_typing/protocols/frame.pyi +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/pandas/_typing/protocols/series.pyi +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/pandas/functions.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/pandas/functions.pyi +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/pandas/group_ops.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/pandas/map_ops.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/pandas/typehints.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/pandas/utils.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/plot/__init__.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/plot/plotly.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/protobuf/__init__.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/protobuf/functions.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/readwriter.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/sql_formatter.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/streaming/list_state_client.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/streaming/map_state_client.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/streaming/proto/StateMessage_pb2.pyi +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/streaming/proto/__init__.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/streaming/python_streaming_source_runner.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/streaming/state.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/streaming/stateful_processor.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/streaming/stateful_processor_api_client.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/streaming/stateful_processor_util.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/streaming/transform_with_state_driver_worker.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/streaming/value_state_client.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/table_arg.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/tvf.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/tvf_argument.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/types.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/udf.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/udtf.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/variant_utils.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/window.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/worker/__init__.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/statcounter.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/storagelevel.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/streaming/__init__.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/streaming/context.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/streaming/dstream.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/streaming/kinesis.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/streaming/listener.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/streaming/util.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/testing/connectutils.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/testing/mllibutils.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/testing/mlutils.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/testing/objects.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/traceback_utils.py +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark_client.egg-info/dependency_links.txt +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark_client.egg-info/top_level.txt +0 -0
- {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: pyspark-client
|
|
3
|
-
Version: 4.2.0.
|
|
3
|
+
Version: 4.2.0.dev2
|
|
4
4
|
Summary: Python Spark Connect client for Apache Spark
|
|
5
5
|
Home-page: https://github.com/apache/spark/tree/master/python
|
|
6
6
|
Author: Spark Developers
|
|
@@ -18,7 +18,7 @@ Classifier: Typing :: Typed
|
|
|
18
18
|
Requires-Python: >=3.10
|
|
19
19
|
Description-Content-Type: text/markdown
|
|
20
20
|
Requires-Dist: pandas>=2.2.0
|
|
21
|
-
Requires-Dist: pyarrow>=
|
|
21
|
+
Requires-Dist: pyarrow>=18.0.0
|
|
22
22
|
Requires-Dist: grpcio>=1.76.0
|
|
23
23
|
Requires-Dist: grpcio-status>=1.76.0
|
|
24
24
|
Requires-Dist: googleapis-common-protos>=1.71.0
|
|
@@ -16,7 +16,7 @@
|
|
|
16
16
|
# specific language governing permissions and limitations
|
|
17
17
|
# under the License.
|
|
18
18
|
|
|
19
|
-
from typing import Callable, Iterable, Sized, TypeVar, Union
|
|
19
|
+
from typing import Any, Callable, Iterable, Sized, TypeVar, Union
|
|
20
20
|
from typing_extensions import Literal, Protocol
|
|
21
21
|
|
|
22
22
|
from numpy import int32, int64, float32, float64, ndarray
|
|
@@ -29,10 +29,10 @@ PrimitiveType = Union[bool, float, int, str]
|
|
|
29
29
|
NonUDFType = Literal[0]
|
|
30
30
|
|
|
31
31
|
class SupportsIAdd(Protocol):
|
|
32
|
-
def __iadd__(self, other:
|
|
32
|
+
def __iadd__(self, other: Any) -> SupportsIAdd: ...
|
|
33
33
|
|
|
34
34
|
class SupportsOrdering(Protocol):
|
|
35
|
-
def __lt__(self, other:
|
|
35
|
+
def __lt__(self, other: Any) -> bool: ...
|
|
36
36
|
|
|
37
37
|
class SizedIterable(Protocol, Sized, Iterable[T_co]): ...
|
|
38
38
|
|
{pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/errors/error-conditions.json
RENAMED
|
@@ -465,6 +465,11 @@
|
|
|
465
465
|
"Parameter value <arg_name> must be a valid UUID format: <origin>"
|
|
466
466
|
]
|
|
467
467
|
},
|
|
468
|
+
"INVALID_STREAMING_SOURCE_NAME": {
|
|
469
|
+
"message": [
|
|
470
|
+
"Invalid streaming source name '<source_name>'. Source names must contain only ASCII letters, digits, and underscores."
|
|
471
|
+
]
|
|
472
|
+
},
|
|
468
473
|
"INVALID_TIMEOUT_TIMESTAMP": {
|
|
469
474
|
"message": [
|
|
470
475
|
"Timeout timestamp (<timestamp>) cannot be earlier than the current watermark (<watermark>)."
|
|
@@ -551,6 +556,12 @@
|
|
|
551
556
|
"<arg1> and <arg2> should be of the same length, got <arg1_length> and <arg2_length>."
|
|
552
557
|
]
|
|
553
558
|
},
|
|
559
|
+
"LOCAL_RELATION_SIZE_LIMIT_EXCEEDED": {
|
|
560
|
+
"message": [
|
|
561
|
+
"Local relation size (<actualSize> bytes) exceeds the limit (<sizeLimit> bytes)."
|
|
562
|
+
],
|
|
563
|
+
"sqlState": "54000"
|
|
564
|
+
},
|
|
554
565
|
"MALFORMED_GEOGRAPHY": {
|
|
555
566
|
"message": [
|
|
556
567
|
"Geography binary is malformed. Please check the data source is valid."
|
{pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/errors/exceptions/captured.py
RENAMED
|
@@ -234,25 +234,13 @@ def _convert_exception(e: "Py4JJavaError") -> CapturedException:
|
|
|
234
234
|
return SparkUpgradeException(origin=e)
|
|
235
235
|
elif is_instance_of(gw, e, "org.apache.spark.SparkNoSuchElementException"):
|
|
236
236
|
return SparkNoSuchElementException(origin=e)
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
map(
|
|
245
|
-
lambda v: "org.apache.spark.sql.execution.python" in v.toString(), c.getStackTrace()
|
|
246
|
-
)
|
|
247
|
-
)
|
|
248
|
-
):
|
|
249
|
-
msg = (
|
|
250
|
-
"\n An exception was thrown from the Python worker. "
|
|
251
|
-
"Please see the stack trace below.\n%s" % c.getMessage()
|
|
252
|
-
)
|
|
253
|
-
return PythonException(msg, stacktrace)
|
|
254
|
-
|
|
255
|
-
return UnknownException(desc=e.toString(), stackTrace=stacktrace, cause=c)
|
|
237
|
+
elif is_instance_of(gw, e, "org.apache.spark.api.python.PythonException"):
|
|
238
|
+
return PythonException(origin=e)
|
|
239
|
+
return UnknownException(
|
|
240
|
+
desc=e.toString(),
|
|
241
|
+
stackTrace=getattr(jvm, "org.apache.spark.util.Utils").exceptionString(e),
|
|
242
|
+
cause=e.getCause(),
|
|
243
|
+
)
|
|
256
244
|
|
|
257
245
|
|
|
258
246
|
def capture_sql_exception(f: Callable[..., Any]) -> Callable[..., Any]:
|
|
@@ -348,6 +336,17 @@ class PythonException(CapturedException, BasePythonException):
|
|
|
348
336
|
Exceptions thrown from Python workers.
|
|
349
337
|
"""
|
|
350
338
|
|
|
339
|
+
def __str__(self) -> str:
|
|
340
|
+
messageParameters = self.getMessageParameters()
|
|
341
|
+
|
|
342
|
+
if (
|
|
343
|
+
messageParameters is None
|
|
344
|
+
or "msg" not in messageParameters
|
|
345
|
+
or "traceback" not in messageParameters
|
|
346
|
+
):
|
|
347
|
+
return super().__str__()
|
|
348
|
+
return f"{messageParameters['msg']}:\n{messageParameters['traceback'].strip()}"
|
|
349
|
+
|
|
351
350
|
|
|
352
351
|
class ArithmeticException(CapturedException, BaseArithmeticException):
|
|
353
352
|
"""
|
|
@@ -271,7 +271,7 @@ def _capture_call_site(depth: int) -> str:
|
|
|
271
271
|
import IPython
|
|
272
272
|
|
|
273
273
|
# ipykernel is required for IPython
|
|
274
|
-
import ipykernel
|
|
274
|
+
import ipykernel
|
|
275
275
|
|
|
276
276
|
ipython = IPython.get_ipython()
|
|
277
277
|
# Filtering out IPython related frames
|
|
@@ -124,7 +124,7 @@ def _attach(
|
|
|
124
124
|
logger_module: Union[str, ModuleType],
|
|
125
125
|
modules: List[ModuleType],
|
|
126
126
|
classes: List[Type[Any]],
|
|
127
|
-
missings: List[Tuple[Type[Any], Type[Any]]],
|
|
127
|
+
missings: List[Tuple[Union[ModuleType, Type[Any]], Type[Any]]],
|
|
128
128
|
) -> None:
|
|
129
129
|
if isinstance(logger_module, str):
|
|
130
130
|
logger_module = importlib.import_module(logger_module)
|
|
@@ -16,7 +16,7 @@
|
|
|
16
16
|
# specific language governing permissions and limitations
|
|
17
17
|
# under the License.
|
|
18
18
|
|
|
19
|
-
from typing import Any, Dict, List, TypeVar, Tuple, Union
|
|
19
|
+
from typing import Any, Dict, List, TYPE_CHECKING, TypeVar, Tuple, Union
|
|
20
20
|
from typing_extensions import Literal
|
|
21
21
|
|
|
22
22
|
from numpy import ndarray
|
|
@@ -24,10 +24,12 @@ from py4j.java_gateway import JavaObject
|
|
|
24
24
|
|
|
25
25
|
import pyspark.ml.base
|
|
26
26
|
import pyspark.ml.param
|
|
27
|
-
import pyspark.ml.util
|
|
28
27
|
from pyspark.ml.linalg import Vector
|
|
29
28
|
import pyspark.ml.wrapper
|
|
30
29
|
|
|
30
|
+
if TYPE_CHECKING:
|
|
31
|
+
from scipy.sparse import spmatrix, sparray
|
|
32
|
+
|
|
31
33
|
ParamMap = Dict[pyspark.ml.param.Param, Any]
|
|
32
34
|
PipelineStage = Union[pyspark.ml.base.Estimator, pyspark.ml.base.Transformer]
|
|
33
35
|
|
|
@@ -81,4 +83,4 @@ RankingEvaluatorMetricType = Union[
|
|
|
81
83
|
Literal["recallAtK"],
|
|
82
84
|
]
|
|
83
85
|
|
|
84
|
-
VectorLike = Union[ndarray, Vector, List[float], Tuple[float, ...]]
|
|
86
|
+
VectorLike = Union[ndarray, Vector, List[float], Tuple[float, ...], "spmatrix", "sparray", range]
|
|
@@ -241,7 +241,7 @@ def _validate_and_transform_single_input(
|
|
|
241
241
|
# tensor columns
|
|
242
242
|
if len(batch.columns) == 1:
|
|
243
243
|
# one tensor column and one expected input, vstack rows
|
|
244
|
-
single_input = np.vstack(batch.iloc[:, 0])
|
|
244
|
+
single_input = np.vstack(batch.iloc[:, 0]) # type: ignore[call-overload]
|
|
245
245
|
else:
|
|
246
246
|
raise ValueError(
|
|
247
247
|
"Multiple input columns found, but model expected a single "
|
|
@@ -70,7 +70,6 @@ __all__ = [
|
|
|
70
70
|
if TYPE_CHECKING:
|
|
71
71
|
from pyspark.mllib._typing import NormType
|
|
72
72
|
from pyspark.ml._typing import VectorLike
|
|
73
|
-
from scipy.sparse import spmatrix
|
|
74
73
|
|
|
75
74
|
|
|
76
75
|
# Check whether we have SciPy. MLlib works without it too, but if we have it, some methods,
|
|
@@ -85,23 +84,25 @@ except BaseException:
|
|
|
85
84
|
_have_scipy = False
|
|
86
85
|
|
|
87
86
|
|
|
88
|
-
def _convert_to_vector(d:
|
|
87
|
+
def _convert_to_vector(d: "VectorLike") -> "Vector":
|
|
89
88
|
if isinstance(d, Vector):
|
|
90
89
|
return d
|
|
91
|
-
elif
|
|
90
|
+
elif isinstance(d, (array.array, np.ndarray, list, tuple, range)):
|
|
92
91
|
return DenseVector(d)
|
|
93
92
|
elif _have_scipy and scipy.sparse.issparse(d):
|
|
94
|
-
assert
|
|
93
|
+
assert hasattr(d, "shape")
|
|
94
|
+
assert d.shape[1] == 1, "Expected column vector"
|
|
95
95
|
# Make sure the converted csc_matrix has sorted indices.
|
|
96
|
-
|
|
96
|
+
assert hasattr(d, "tocsc")
|
|
97
|
+
csc = d.tocsc()
|
|
97
98
|
if not csc.has_sorted_indices:
|
|
98
99
|
csc.sort_indices()
|
|
99
|
-
return SparseVector(
|
|
100
|
+
return SparseVector(d.shape[0], csc.indices, csc.data)
|
|
100
101
|
else:
|
|
101
102
|
raise TypeError("Cannot convert type %s into Vector" % type(d))
|
|
102
103
|
|
|
103
104
|
|
|
104
|
-
def _vector_size(v:
|
|
105
|
+
def _vector_size(v: "VectorLike") -> int:
|
|
105
106
|
"""
|
|
106
107
|
Returns the size of the vector.
|
|
107
108
|
|
|
@@ -124,16 +125,17 @@ def _vector_size(v: Union["VectorLike", "spmatrix", range]) -> int:
|
|
|
124
125
|
"""
|
|
125
126
|
if isinstance(v, Vector):
|
|
126
127
|
return len(v)
|
|
127
|
-
elif
|
|
128
|
+
elif isinstance(v, (array.array, list, tuple, range)):
|
|
128
129
|
return len(v)
|
|
129
|
-
elif
|
|
130
|
+
elif isinstance(v, np.ndarray):
|
|
130
131
|
if v.ndim == 1 or (v.ndim == 2 and v.shape[1] == 1):
|
|
131
132
|
return len(v)
|
|
132
133
|
else:
|
|
133
134
|
raise ValueError("Cannot treat an ndarray of shape %s as a vector" % str(v.shape))
|
|
134
135
|
elif _have_scipy and scipy.sparse.issparse(v):
|
|
135
|
-
assert
|
|
136
|
-
|
|
136
|
+
assert hasattr(v, "shape")
|
|
137
|
+
assert v.shape[1] == 1, "Expected column vector"
|
|
138
|
+
return v.shape[0]
|
|
137
139
|
else:
|
|
138
140
|
raise TypeError("Cannot treat type %s as a vector" % type(v))
|
|
139
141
|
|
|
@@ -337,13 +339,13 @@ class DenseVector(Vector):
|
|
|
337
339
|
def __reduce__(self) -> Tuple[Type["DenseVector"], Tuple[bytes]]:
|
|
338
340
|
return DenseVector, (self.array.tobytes(),)
|
|
339
341
|
|
|
340
|
-
def numNonzeros(self) -> int:
|
|
342
|
+
def numNonzeros(self) -> Union[int, np.intp]:
|
|
341
343
|
"""
|
|
342
344
|
Number of nonzero elements. This scans all active values and count non zeros
|
|
343
345
|
"""
|
|
344
346
|
return np.count_nonzero(self.array)
|
|
345
347
|
|
|
346
|
-
def norm(self, p: "NormType") -> np.
|
|
348
|
+
def norm(self, p: "NormType") -> np.floating[Any]:
|
|
347
349
|
"""
|
|
348
350
|
Calculates the norm of a DenseVector.
|
|
349
351
|
|
|
@@ -386,21 +388,23 @@ class DenseVector(Vector):
|
|
|
386
388
|
...
|
|
387
389
|
AssertionError: dimension mismatch
|
|
388
390
|
"""
|
|
389
|
-
if
|
|
391
|
+
if isinstance(other, np.ndarray):
|
|
390
392
|
if other.ndim > 1:
|
|
391
393
|
assert len(self) == other.shape[0], "dimension mismatch"
|
|
392
394
|
return np.dot(self.array, other)
|
|
393
395
|
elif _have_scipy and scipy.sparse.issparse(other):
|
|
394
|
-
assert
|
|
395
|
-
|
|
396
|
+
assert hasattr(other, "shape")
|
|
397
|
+
assert len(self) == other.shape[0], "dimension mismatch"
|
|
398
|
+
assert hasattr(other, "transpose")
|
|
399
|
+
return other.transpose().dot(self.toArray())
|
|
396
400
|
else:
|
|
397
|
-
assert len(self) == _vector_size(other), "dimension mismatch"
|
|
401
|
+
assert len(self) == _vector_size(other), "dimension mismatch" # type: ignore[arg-type]
|
|
398
402
|
if isinstance(other, SparseVector):
|
|
399
403
|
return other.dot(self)
|
|
400
404
|
elif isinstance(other, Vector):
|
|
401
405
|
return np.dot(self.toArray(), other.toArray())
|
|
402
406
|
else:
|
|
403
|
-
return np.dot(self.toArray(), other)
|
|
407
|
+
return np.dot(self.toArray(), other)
|
|
404
408
|
|
|
405
409
|
def squared_distance(self, other: Iterable[float]) -> np.float64:
|
|
406
410
|
"""
|
|
@@ -429,10 +433,11 @@ class DenseVector(Vector):
|
|
|
429
433
|
...
|
|
430
434
|
AssertionError: dimension mismatch
|
|
431
435
|
"""
|
|
432
|
-
assert len(self) == _vector_size(other), "dimension mismatch"
|
|
436
|
+
assert len(self) == _vector_size(other), "dimension mismatch" # type: ignore[arg-type]
|
|
433
437
|
if isinstance(other, SparseVector):
|
|
434
438
|
return other.squared_distance(self)
|
|
435
439
|
elif _have_scipy and scipy.sparse.issparse(other):
|
|
440
|
+
assert isinstance(other, scipy.sparse.spmatrix), "other must be a scipy.sparse.spmatrix"
|
|
436
441
|
return _convert_to_vector(other).squared_distance(self) # type: ignore[attr-defined]
|
|
437
442
|
|
|
438
443
|
if isinstance(other, Vector):
|
|
@@ -636,13 +641,13 @@ class SparseVector(Vector):
|
|
|
636
641
|
)
|
|
637
642
|
assert np.min(self.indices) >= 0, "Contains negative index %d" % (np.min(self.indices))
|
|
638
643
|
|
|
639
|
-
def numNonzeros(self) -> int:
|
|
644
|
+
def numNonzeros(self) -> Union[int, np.intp]:
|
|
640
645
|
"""
|
|
641
646
|
Number of nonzero elements. This scans all active values and count non zeros.
|
|
642
647
|
"""
|
|
643
648
|
return np.count_nonzero(self.values)
|
|
644
649
|
|
|
645
|
-
def norm(self, p: "NormType") -> np.
|
|
650
|
+
def norm(self, p: "NormType") -> np.floating[Any]:
|
|
646
651
|
"""
|
|
647
652
|
Calculates the norm of a SparseVector.
|
|
648
653
|
|
|
@@ -699,7 +704,7 @@ class SparseVector(Vector):
|
|
|
699
704
|
assert len(self) == other.shape[0], "dimension mismatch"
|
|
700
705
|
return np.dot(self.values, other[self.indices])
|
|
701
706
|
|
|
702
|
-
assert len(self) == _vector_size(other), "dimension mismatch"
|
|
707
|
+
assert len(self) == _vector_size(other), "dimension mismatch" # type: ignore[arg-type]
|
|
703
708
|
|
|
704
709
|
if isinstance(other, DenseVector):
|
|
705
710
|
return np.dot(other.array[self.indices], self.values)
|
|
@@ -717,7 +722,7 @@ class SparseVector(Vector):
|
|
|
717
722
|
else:
|
|
718
723
|
return self.dot(_convert_to_vector(other)) # type: ignore[arg-type]
|
|
719
724
|
|
|
720
|
-
def squared_distance(self, other:
|
|
725
|
+
def squared_distance(self, other: "VectorLike") -> np.float64:
|
|
721
726
|
"""
|
|
722
727
|
Squared distance from a SparseVector or 1-dimensional NumPy array.
|
|
723
728
|
|
|
@@ -785,7 +790,7 @@ class SparseVector(Vector):
|
|
|
785
790
|
j += 1
|
|
786
791
|
return result
|
|
787
792
|
else:
|
|
788
|
-
return self.squared_distance(_convert_to_vector(other))
|
|
793
|
+
return self.squared_distance(_convert_to_vector(other))
|
|
789
794
|
|
|
790
795
|
def toArray(self) -> np.ndarray:
|
|
791
796
|
"""
|
|
@@ -16,7 +16,7 @@
|
|
|
16
16
|
# specific language governing permissions and limitations
|
|
17
17
|
# under the License.
|
|
18
18
|
|
|
19
|
-
from typing import List, Tuple, TypeVar, Union
|
|
19
|
+
from typing import List, Tuple, TYPE_CHECKING, TypeVar, Union
|
|
20
20
|
|
|
21
21
|
from typing_extensions import Literal
|
|
22
22
|
from numpy import ndarray # noqa: F401
|
|
@@ -24,10 +24,14 @@ from py4j.java_gateway import JavaObject
|
|
|
24
24
|
|
|
25
25
|
from pyspark.mllib.linalg import Vector
|
|
26
26
|
|
|
27
|
-
|
|
27
|
+
if TYPE_CHECKING:
|
|
28
|
+
from scipy.sparse import spmatrix, sparray
|
|
29
|
+
|
|
28
30
|
C = TypeVar("C", bound=type)
|
|
29
31
|
JavaObjectOrPickleDump = Union[JavaObject, bytearray, bytes]
|
|
30
32
|
|
|
31
33
|
CorrMethodType = Union[Literal["spearman"], Literal["pearson"]]
|
|
32
34
|
KolmogorovSmirnovTestDistNameType = Literal["norm"]
|
|
33
35
|
NormType = Union[None, float, Literal["fro"], Literal["nuc"]]
|
|
36
|
+
|
|
37
|
+
VectorLike = Union[ndarray, Vector, List[float], Tuple[float, ...], "spmatrix", "sparray", range]
|
|
@@ -61,7 +61,6 @@ from pyspark.sql.types import (
|
|
|
61
61
|
|
|
62
62
|
if TYPE_CHECKING:
|
|
63
63
|
from pyspark.mllib._typing import VectorLike, NormType
|
|
64
|
-
from scipy.sparse import spmatrix
|
|
65
64
|
from numpy.typing import ArrayLike
|
|
66
65
|
|
|
67
66
|
|
|
@@ -94,23 +93,25 @@ except BaseException:
|
|
|
94
93
|
_have_scipy = False
|
|
95
94
|
|
|
96
95
|
|
|
97
|
-
def _convert_to_vector(d:
|
|
96
|
+
def _convert_to_vector(d: "VectorLike") -> "Vector":
|
|
98
97
|
if isinstance(d, Vector):
|
|
99
98
|
return d
|
|
100
|
-
elif
|
|
99
|
+
elif isinstance(d, (array.array, np.ndarray, list, tuple, range)):
|
|
101
100
|
return DenseVector(d)
|
|
102
101
|
elif _have_scipy and scipy.sparse.issparse(d):
|
|
103
|
-
assert
|
|
102
|
+
assert hasattr(d, "shape")
|
|
103
|
+
assert d.shape[1] == 1, "Expected column vector"
|
|
104
104
|
# Make sure the converted csc_matrix has sorted indices.
|
|
105
|
-
|
|
105
|
+
assert hasattr(d, "tocsc")
|
|
106
|
+
csc = d.tocsc()
|
|
106
107
|
if not csc.has_sorted_indices:
|
|
107
108
|
csc.sort_indices()
|
|
108
|
-
return SparseVector(
|
|
109
|
+
return SparseVector(d.shape[0], csc.indices, csc.data)
|
|
109
110
|
else:
|
|
110
111
|
raise TypeError("Cannot convert type %s into Vector" % type(d))
|
|
111
112
|
|
|
112
113
|
|
|
113
|
-
def _vector_size(v:
|
|
114
|
+
def _vector_size(v: "VectorLike") -> int:
|
|
114
115
|
"""
|
|
115
116
|
Returns the size of the vector.
|
|
116
117
|
|
|
@@ -133,16 +134,17 @@ def _vector_size(v: Union["VectorLike", "spmatrix", range]) -> int:
|
|
|
133
134
|
"""
|
|
134
135
|
if isinstance(v, Vector):
|
|
135
136
|
return len(v)
|
|
136
|
-
elif
|
|
137
|
+
elif isinstance(v, (array.array, list, tuple, range)):
|
|
137
138
|
return len(v)
|
|
138
|
-
elif
|
|
139
|
+
elif isinstance(v, np.ndarray):
|
|
139
140
|
if v.ndim == 1 or (v.ndim == 2 and v.shape[1] == 1):
|
|
140
141
|
return len(v)
|
|
141
142
|
else:
|
|
142
143
|
raise ValueError("Cannot treat an ndarray of shape %s as a vector" % str(v.shape))
|
|
143
144
|
elif _have_scipy and scipy.sparse.issparse(v):
|
|
144
|
-
assert
|
|
145
|
-
|
|
145
|
+
assert hasattr(v, "shape")
|
|
146
|
+
assert v.shape[1] == 1, "Expected column vector"
|
|
147
|
+
return v.shape[0]
|
|
146
148
|
else:
|
|
147
149
|
raise TypeError("Cannot treat type %s as a vector" % type(v))
|
|
148
150
|
|
|
@@ -390,13 +392,13 @@ class DenseVector(Vector):
|
|
|
390
392
|
def __reduce__(self) -> Tuple[Type["DenseVector"], Tuple[bytes]]:
|
|
391
393
|
return DenseVector, (self.array.tobytes(),)
|
|
392
394
|
|
|
393
|
-
def numNonzeros(self) -> int:
|
|
395
|
+
def numNonzeros(self) -> Union[int, np.intp]:
|
|
394
396
|
"""
|
|
395
397
|
Number of nonzero elements. This scans all active values and count non zeros
|
|
396
398
|
"""
|
|
397
399
|
return np.count_nonzero(self.array)
|
|
398
400
|
|
|
399
|
-
def norm(self, p: "NormType") -> np.
|
|
401
|
+
def norm(self, p: "NormType") -> np.floating[Any]:
|
|
400
402
|
"""
|
|
401
403
|
Calculates the norm of a DenseVector.
|
|
402
404
|
|
|
@@ -410,7 +412,7 @@ class DenseVector(Vector):
|
|
|
410
412
|
"""
|
|
411
413
|
return np.linalg.norm(self.array, p)
|
|
412
414
|
|
|
413
|
-
def dot(self, other:
|
|
415
|
+
def dot(self, other: "VectorLike") -> np.float64:
|
|
414
416
|
"""
|
|
415
417
|
Compute the dot product of two Vectors. We support
|
|
416
418
|
(Numpy array, list, SparseVector, or SciPy sparse)
|
|
@@ -444,8 +446,10 @@ class DenseVector(Vector):
|
|
|
444
446
|
assert len(self) == other.shape[0], "dimension mismatch"
|
|
445
447
|
return np.dot(self.array, other)
|
|
446
448
|
elif _have_scipy and scipy.sparse.issparse(other):
|
|
447
|
-
assert
|
|
448
|
-
|
|
449
|
+
assert hasattr(other, "shape")
|
|
450
|
+
assert len(self) == other.shape[0], "dimension mismatch"
|
|
451
|
+
assert hasattr(other, "transpose")
|
|
452
|
+
return other.transpose().dot(self.toArray())
|
|
449
453
|
else:
|
|
450
454
|
assert len(self) == _vector_size(other), "dimension mismatch"
|
|
451
455
|
if isinstance(other, SparseVector):
|
|
@@ -453,9 +457,9 @@ class DenseVector(Vector):
|
|
|
453
457
|
elif isinstance(other, Vector):
|
|
454
458
|
return np.dot(self.toArray(), other.toArray())
|
|
455
459
|
else:
|
|
456
|
-
return np.dot(self.toArray(), cast("ArrayLike", other))
|
|
460
|
+
return np.dot(self.toArray(), cast("ArrayLike", other)) # type: ignore[valid-type]
|
|
457
461
|
|
|
458
|
-
def squared_distance(self, other:
|
|
462
|
+
def squared_distance(self, other: "VectorLike") -> np.float64:
|
|
459
463
|
"""
|
|
460
464
|
Squared distance of two Vectors.
|
|
461
465
|
|
|
@@ -685,13 +689,13 @@ class SparseVector(Vector):
|
|
|
685
689
|
% (self.indices[i], self.indices[i + 1])
|
|
686
690
|
)
|
|
687
691
|
|
|
688
|
-
def numNonzeros(self) -> int:
|
|
692
|
+
def numNonzeros(self) -> Union[int, np.intp]:
|
|
689
693
|
"""
|
|
690
694
|
Number of nonzero elements. This scans all active values and count non zeros.
|
|
691
695
|
"""
|
|
692
696
|
return np.count_nonzero(self.values)
|
|
693
697
|
|
|
694
|
-
def norm(self, p: "NormType") -> np.
|
|
698
|
+
def norm(self, p: "NormType") -> np.floating[Any]:
|
|
695
699
|
"""
|
|
696
700
|
Calculates the norm of a SparseVector.
|
|
697
701
|
|
|
@@ -766,7 +770,7 @@ class SparseVector(Vector):
|
|
|
766
770
|
raise ValueError("Unable to parse values from %s." % s)
|
|
767
771
|
return SparseVector(cast(int, size), indices, values)
|
|
768
772
|
|
|
769
|
-
def dot(self, other:
|
|
773
|
+
def dot(self, other: "VectorLike") -> np.float64:
|
|
770
774
|
"""
|
|
771
775
|
Dot product with a SparseVector or 1- or 2-dimensional Numpy array.
|
|
772
776
|
|
|
@@ -822,9 +826,9 @@ class SparseVector(Vector):
|
|
|
822
826
|
return np.dot(self_values, other.values[other_cmind])
|
|
823
827
|
|
|
824
828
|
else:
|
|
825
|
-
return self.dot(_convert_to_vector(other))
|
|
829
|
+
return self.dot(_convert_to_vector(other))
|
|
826
830
|
|
|
827
|
-
def squared_distance(self, other:
|
|
831
|
+
def squared_distance(self, other: "VectorLike") -> np.float64:
|
|
828
832
|
"""
|
|
829
833
|
Squared distance from a SparseVector or 1-dimensional NumPy array.
|
|
830
834
|
|
|
@@ -892,7 +896,7 @@ class SparseVector(Vector):
|
|
|
892
896
|
j += 1
|
|
893
897
|
return result
|
|
894
898
|
else:
|
|
895
|
-
return self.squared_distance(_convert_to_vector(other))
|
|
899
|
+
return self.squared_distance(_convert_to_vector(other))
|
|
896
900
|
|
|
897
901
|
def toArray(self) -> np.ndarray:
|
|
898
902
|
"""
|
|
@@ -84,7 +84,7 @@ class LabeledPoint:
|
|
|
84
84
|
'label' and 'features' are accessible as class attributes.
|
|
85
85
|
"""
|
|
86
86
|
|
|
87
|
-
def __init__(self, label: float, features:
|
|
87
|
+
def __init__(self, label: float, features: "VectorLike"):
|
|
88
88
|
self.label = float(label)
|
|
89
89
|
self.features = _convert_to_vector(features)
|
|
90
90
|
|
|
@@ -189,7 +189,8 @@ class Statistics:
|
|
|
189
189
|
|
|
190
190
|
if not y:
|
|
191
191
|
return cast(
|
|
192
|
-
JavaObject,
|
|
192
|
+
JavaObject,
|
|
193
|
+
callMLlibFunc("corr", cast(RDD[Vector], x).map(_convert_to_vector), method),
|
|
193
194
|
).toArray()
|
|
194
195
|
else:
|
|
195
196
|
return cast(
|
|
@@ -145,11 +145,7 @@ class MLUtils:
|
|
|
145
145
|
if numFeatures <= 0:
|
|
146
146
|
parsed.cache()
|
|
147
147
|
numFeatures = parsed.map(lambda x: -1 if x[1].size == 0 else x[1][-1]).reduce(max) + 1
|
|
148
|
-
return parsed.map(
|
|
149
|
-
lambda x: LabeledPoint(
|
|
150
|
-
x[0], Vectors.sparse(numFeatures, x[1], x[2]) # type: ignore[arg-type]
|
|
151
|
-
)
|
|
152
|
-
)
|
|
148
|
+
return parsed.map(lambda x: LabeledPoint(x[0], Vectors.sparse(numFeatures, x[1], x[2])))
|
|
153
149
|
|
|
154
150
|
@staticmethod
|
|
155
151
|
def saveAsLibSVMFile(data: RDD["LabeledPoint"], dir: str) -> None:
|
|
@@ -579,7 +579,7 @@ class PandasOnSparkFrameMethods:
|
|
|
579
579
|
return original_func(o, *args, **kwargs)
|
|
580
580
|
|
|
581
581
|
def apply_func(pdf: pd.DataFrame) -> pd.DataFrame:
|
|
582
|
-
return new_func(pdf).to_frame()
|
|
582
|
+
return new_func(pdf).to_frame() # type: ignore[operator]
|
|
583
583
|
|
|
584
584
|
def pandas_series_func(
|
|
585
585
|
f: Callable[[pd.DataFrame], pd.DataFrame], return_type: DataType
|
|
@@ -26,7 +26,7 @@ from typing import Any, Callable, Optional, Sequence, Tuple, Union, cast, TYPE_C
|
|
|
26
26
|
|
|
27
27
|
import numpy as np
|
|
28
28
|
import pandas as pd
|
|
29
|
-
from pandas.api.types import is_list_like, CategoricalDtype
|
|
29
|
+
from pandas.api.types import is_list_like, CategoricalDtype
|
|
30
30
|
|
|
31
31
|
from pyspark.sql import functions as F, Column, Window
|
|
32
32
|
from pyspark.sql.types import LongType, BooleanType, NumericType
|
{pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/data_type_ops/base.py
RENAMED
|
@@ -116,7 +116,7 @@ def _should_return_all_false(left: IndexOpsLike, right: Any) -> bool:
|
|
|
116
116
|
based on incompatible dtypes: non-numeric vs. numeric (including bools).
|
|
117
117
|
"""
|
|
118
118
|
from pyspark.pandas.base import IndexOpsMixin
|
|
119
|
-
from pandas.api.types import is_list_like
|
|
119
|
+
from pandas.api.types import is_list_like
|
|
120
120
|
|
|
121
121
|
def are_both_numeric(left_dtype: Dtype, right_dtype: Dtype) -> bool:
|
|
122
122
|
return is_numeric_dtype(left_dtype) and is_numeric_dtype(right_dtype)
|
{pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/data_type_ops/boolean_ops.py
RENAMED
|
@@ -19,7 +19,7 @@ import numbers
|
|
|
19
19
|
from typing import Any, Union
|
|
20
20
|
|
|
21
21
|
import pandas as pd
|
|
22
|
-
from pandas.api.types import CategoricalDtype, is_integer_dtype
|
|
22
|
+
from pandas.api.types import CategoricalDtype, is_integer_dtype
|
|
23
23
|
from pandas.core.dtypes.common import is_numeric_dtype
|
|
24
24
|
|
|
25
25
|
from pyspark.pandas.base import column_op, IndexOpsMixin
|
|
@@ -16,11 +16,11 @@
|
|
|
16
16
|
#
|
|
17
17
|
|
|
18
18
|
from itertools import chain
|
|
19
|
-
from typing import cast, Any, Union
|
|
19
|
+
from typing import cast, Any, Sequence, Union
|
|
20
20
|
|
|
21
21
|
import pandas as pd
|
|
22
22
|
import numpy as np
|
|
23
|
-
from pandas.api.types import is_list_like, CategoricalDtype
|
|
23
|
+
from pandas.api.types import is_list_like, CategoricalDtype
|
|
24
24
|
|
|
25
25
|
from pyspark.pandas._typing import Dtype, IndexOpsLike, SeriesOrIndex
|
|
26
26
|
from pyspark.pandas.base import IndexOpsMixin
|
|
@@ -43,7 +43,7 @@ class CategoricalOps(DataTypeOps):
|
|
|
43
43
|
"""Restore column when to_pandas."""
|
|
44
44
|
return pd.Series(
|
|
45
45
|
pd.Categorical.from_codes(
|
|
46
|
-
col.replace(np.nan, -1).astype(int),
|
|
46
|
+
cast(Sequence[int], col.replace(np.nan, -1).astype(int)),
|
|
47
47
|
categories=cast(CategoricalDtype, self.dtype).categories,
|
|
48
48
|
ordered=cast(CategoricalDtype, self.dtype).ordered,
|
|
49
49
|
)
|