pyspark-client 4.2.0.dev1__tar.gz → 4.2.0.dev2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (381) hide show
  1. {pyspark_client-4.2.0.dev1/pyspark_client.egg-info → pyspark_client-4.2.0.dev2}/PKG-INFO +2 -2
  2. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/_typing.pyi +3 -3
  3. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/errors/error-conditions.json +11 -0
  4. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/errors/exceptions/captured.py +18 -19
  5. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/errors/utils.py +1 -1
  6. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/instrumentation_utils.py +1 -1
  7. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/ml/_typing.pyi +5 -3
  8. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/ml/functions.py +1 -1
  9. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/ml/linalg/__init__.py +29 -24
  10. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/mllib/_typing.pyi +6 -2
  11. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/mllib/linalg/__init__.py +28 -24
  12. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/mllib/linalg/distributed.py +1 -1
  13. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/mllib/regression.py +1 -1
  14. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/mllib/stat/_statistics.py +2 -1
  15. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/mllib/util.py +1 -5
  16. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/accessors.py +1 -1
  17. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/base.py +1 -1
  18. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/categorical.py +1 -1
  19. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/data_type_ops/base.py +1 -1
  20. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/data_type_ops/boolean_ops.py +1 -1
  21. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/data_type_ops/categorical_ops.py +3 -3
  22. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/data_type_ops/datetime_ops.py +8 -0
  23. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/data_type_ops/null_ops.py +1 -1
  24. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/data_type_ops/num_ops.py +1 -1
  25. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/data_type_ops/timedelta_ops.py +8 -0
  26. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/datetimes.py +4 -4
  27. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/frame.py +175 -119
  28. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/generic.py +6 -6
  29. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/groupby.py +18 -7
  30. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/indexes/base.py +8 -10
  31. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/indexes/category.py +1 -1
  32. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/indexes/datetimes.py +5 -5
  33. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/indexes/multi.py +1 -1
  34. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/indexes/timedelta.py +1 -1
  35. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/indexing.py +4 -4
  36. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/internal.py +2 -2
  37. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/namespace.py +18 -11
  38. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/plot/core.py +1 -1
  39. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/plot/matplotlib.py +3 -3
  40. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/resample.py +10 -7
  41. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/series.py +33 -28
  42. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/testing.py +7 -7
  43. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/typedef/typehints.py +41 -12
  44. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/usage_logging/__init__.py +2 -2
  45. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/utils.py +7 -6
  46. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/profiler.py +17 -0
  47. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/_typing.pyi +1 -1
  48. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/client/artifact.py +12 -2
  49. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/client/core.py +67 -4
  50. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/client/reattach.py +3 -1
  51. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/dataframe.py +20 -12
  52. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/functions/builtin.py +109 -50
  53. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/plan.py +6 -0
  54. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/proto/base_pb2.py +17 -7
  55. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/proto/base_pb2.pyi +267 -0
  56. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/proto/base_pb2_grpc.py +47 -0
  57. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/proto/catalog_pb2.py +2 -2
  58. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/proto/commands_pb2.py +72 -72
  59. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/proto/commands_pb2.pyi +12 -1
  60. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/proto/common_pb2.py +2 -2
  61. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/proto/example_plugins_pb2.py +2 -2
  62. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/proto/expressions_pb2.py +2 -2
  63. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/proto/ml_common_pb2.py +2 -2
  64. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/proto/ml_pb2.py +2 -2
  65. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/proto/pipelines_pb2.py +2 -2
  66. pyspark_client-4.2.0.dev2/pyspark/sql/connect/proto/relations_pb2.py +251 -0
  67. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/proto/relations_pb2.pyi +19 -0
  68. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/proto/types_pb2.py +2 -2
  69. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/session.py +49 -38
  70. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/streaming/readwriter.py +41 -3
  71. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/context.py +14 -1
  72. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/conversion.py +539 -1
  73. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/dataframe.py +40 -19
  74. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/functions/__init__.py +3 -0
  75. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/functions/builtin.py +207 -57
  76. pyspark_client-4.2.0.dev2/pyspark/sql/interchange.py +89 -0
  77. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/pandas/conversion.py +138 -26
  78. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/pandas/serializers.py +491 -515
  79. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/pandas/types.py +104 -32
  80. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/plot/core.py +1 -1
  81. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/profiler.py +157 -34
  82. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/session.py +60 -20
  83. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/streaming/__init__.py +2 -0
  84. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/streaming/listener.py +3 -3
  85. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/streaming/proto/StateMessage_pb2.py +2 -2
  86. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/streaming/query.py +56 -0
  87. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/streaming/readwriter.py +70 -3
  88. pyspark_client-4.2.0.dev2/pyspark/sql/streaming/tws_tester.py +689 -0
  89. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/utils.py +12 -1
  90. pyspark_client-4.2.0.dev2/pyspark/sql/worker/analyze_udtf.py +250 -0
  91. pyspark_client-4.2.0.dev2/pyspark/sql/worker/commit_data_source_write.py +89 -0
  92. pyspark_client-4.2.0.dev2/pyspark/sql/worker/create_data_source.py +157 -0
  93. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/worker/data_source_pushdown_filters.py +79 -121
  94. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/worker/lookup_data_sources.py +18 -57
  95. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/worker/plan_data_source_read.py +86 -122
  96. pyspark_client-4.2.0.dev1/pyspark/sql/worker/commit_data_source_write.py → pyspark_client-4.2.0.dev2/pyspark/sql/worker/python_streaming_sink_runner.py +55 -58
  97. pyspark_client-4.2.0.dev2/pyspark/sql/worker/utils.py +94 -0
  98. pyspark_client-4.2.0.dev2/pyspark/sql/worker/write_into_data_source.py +252 -0
  99. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/taskcontext.py +44 -4
  100. pyspark_client-4.2.0.dev2/pyspark/testing/__init__.py +21 -0
  101. pyspark_client-4.2.0.dev2/pyspark/testing/goldenutils.py +254 -0
  102. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/testing/pandasutils.py +15 -1
  103. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/testing/sqlutils.py +2 -2
  104. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/testing/streamingutils.py +2 -2
  105. pyspark_client-4.2.0.dev2/pyspark/testing/unittestutils.py +55 -0
  106. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/testing/utils.py +48 -37
  107. pyspark_client-4.2.0.dev2/pyspark/threaddump.py +62 -0
  108. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/util.py +35 -11
  109. pyspark_client-4.2.0.dev2/pyspark/version.py +1 -0
  110. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/worker.py +145 -287
  111. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/worker_util.py +33 -1
  112. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2/pyspark_client.egg-info}/PKG-INFO +2 -2
  113. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark_client.egg-info/SOURCES.txt +6 -0
  114. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark_client.egg-info/requires.txt +1 -1
  115. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/setup.py +1 -1
  116. pyspark_client-4.2.0.dev1/pyspark/sql/connect/proto/relations_pb2.py +0 -251
  117. pyspark_client-4.2.0.dev1/pyspark/sql/worker/analyze_udtf.py +0 -288
  118. pyspark_client-4.2.0.dev1/pyspark/sql/worker/create_data_source.py +0 -193
  119. pyspark_client-4.2.0.dev1/pyspark/sql/worker/python_streaming_sink_runner.py +0 -159
  120. pyspark_client-4.2.0.dev1/pyspark/sql/worker/write_into_data_source.py +0 -285
  121. pyspark_client-4.2.0.dev1/pyspark/testing/__init__.py +0 -47
  122. pyspark_client-4.2.0.dev1/pyspark/version.py +0 -1
  123. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/MANIFEST.in +0 -0
  124. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/README.md +0 -0
  125. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/__init__.py +0 -0
  126. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/_globals.py +0 -0
  127. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/accumulators.py +0 -0
  128. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/cloudpickle/__init__.py +0 -0
  129. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/cloudpickle/cloudpickle.py +0 -0
  130. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/cloudpickle/cloudpickle_fast.py +0 -0
  131. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/conf.py +0 -0
  132. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/daemon.py +0 -0
  133. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/errors/__init__.py +0 -0
  134. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/errors/error_classes.py +0 -0
  135. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/errors/exceptions/__init__.py +0 -0
  136. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/errors/exceptions/base.py +0 -0
  137. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/errors/exceptions/connect.py +0 -0
  138. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/errors/exceptions/tblib.py +0 -0
  139. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/errors_doc_gen.py +0 -0
  140. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/find_spark_home.py +0 -0
  141. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/install.py +0 -0
  142. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/java_gateway.py +0 -0
  143. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/join.py +0 -0
  144. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/logger/__init__.py +0 -0
  145. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/logger/logger.py +0 -0
  146. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/logger/worker_io.py +0 -0
  147. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/loose_version.py +0 -0
  148. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/ml/__init__.py +0 -0
  149. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/ml/base.py +0 -0
  150. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/ml/classification.py +0 -0
  151. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/ml/clustering.py +0 -0
  152. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/ml/common.py +0 -0
  153. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/ml/connect/__init__.py +0 -0
  154. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/ml/connect/base.py +0 -0
  155. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/ml/connect/classification.py +0 -0
  156. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/ml/connect/evaluation.py +0 -0
  157. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/ml/connect/feature.py +0 -0
  158. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/ml/connect/functions.py +0 -0
  159. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/ml/connect/io_utils.py +0 -0
  160. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/ml/connect/pipeline.py +0 -0
  161. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/ml/connect/proto.py +0 -0
  162. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/ml/connect/readwrite.py +0 -0
  163. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/ml/connect/serialize.py +0 -0
  164. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/ml/connect/summarizer.py +0 -0
  165. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/ml/connect/tuning.py +0 -0
  166. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/ml/connect/util.py +0 -0
  167. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/ml/deepspeed/__init__.py +0 -0
  168. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/ml/deepspeed/deepspeed_distributor.py +0 -0
  169. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/ml/dl_util.py +0 -0
  170. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/ml/evaluation.py +0 -0
  171. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/ml/feature.py +0 -0
  172. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/ml/fpm.py +0 -0
  173. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/ml/image.py +0 -0
  174. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/ml/model_cache.py +0 -0
  175. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/ml/param/__init__.py +0 -0
  176. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/ml/param/_shared_params_code_gen.py +0 -0
  177. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/ml/param/shared.py +0 -0
  178. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/ml/pipeline.py +0 -0
  179. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/ml/recommendation.py +0 -0
  180. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/ml/regression.py +0 -0
  181. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/ml/stat.py +0 -0
  182. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/ml/torch/__init__.py +0 -0
  183. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/ml/torch/data.py +0 -0
  184. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/ml/torch/distributor.py +0 -0
  185. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/ml/torch/log_communication.py +0 -0
  186. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/ml/torch/torch_run_process_wrapper.py +0 -0
  187. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/ml/tree.py +0 -0
  188. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/ml/tuning.py +0 -0
  189. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/ml/util.py +0 -0
  190. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/ml/wrapper.py +0 -0
  191. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/mllib/__init__.py +0 -0
  192. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/mllib/classification.py +0 -0
  193. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/mllib/clustering.py +0 -0
  194. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/mllib/common.py +0 -0
  195. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/mllib/evaluation.py +0 -0
  196. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/mllib/feature.py +0 -0
  197. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/mllib/fpm.py +0 -0
  198. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/mllib/random.py +0 -0
  199. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/mllib/recommendation.py +0 -0
  200. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/mllib/stat/KernelDensity.py +0 -0
  201. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/mllib/stat/__init__.py +0 -0
  202. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/mllib/stat/distribution.py +0 -0
  203. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/mllib/stat/test.py +0 -0
  204. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/mllib/tree.py +0 -0
  205. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/__init__.py +0 -0
  206. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/_typing.py +0 -0
  207. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/config.py +0 -0
  208. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/correlation.py +0 -0
  209. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/data_type_ops/__init__.py +0 -0
  210. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/data_type_ops/binary_ops.py +0 -0
  211. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/data_type_ops/complex_ops.py +0 -0
  212. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/data_type_ops/date_ops.py +0 -0
  213. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/data_type_ops/string_ops.py +0 -0
  214. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/data_type_ops/udt_ops.py +0 -0
  215. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/exceptions.py +0 -0
  216. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/extensions.py +0 -0
  217. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/indexes/__init__.py +0 -0
  218. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/missing/__init__.py +0 -0
  219. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/missing/common.py +0 -0
  220. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/missing/frame.py +0 -0
  221. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/missing/general_functions.py +0 -0
  222. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/missing/groupby.py +0 -0
  223. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/missing/indexes.py +0 -0
  224. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/missing/resample.py +0 -0
  225. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/missing/scalars.py +0 -0
  226. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/missing/series.py +0 -0
  227. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/missing/window.py +0 -0
  228. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/mlflow.py +0 -0
  229. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/numpy_compat.py +0 -0
  230. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/plot/__init__.py +0 -0
  231. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/plot/plotly.py +0 -0
  232. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/spark/__init__.py +0 -0
  233. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/spark/accessors.py +0 -0
  234. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/spark/utils.py +0 -0
  235. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/sql_formatter.py +0 -0
  236. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/sql_processor.py +0 -0
  237. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/strings.py +0 -0
  238. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/supported_api_gen.py +0 -0
  239. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/typedef/__init__.py +0 -0
  240. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/usage_logging/usage_logger.py +0 -0
  241. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pandas/window.py +0 -0
  242. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pipelines/__init__.py +0 -0
  243. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pipelines/add_pipeline_analysis_context.py +0 -0
  244. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pipelines/api.py +0 -0
  245. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pipelines/block_session_mutations.py +0 -0
  246. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pipelines/cli.py +0 -0
  247. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pipelines/flow.py +0 -0
  248. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pipelines/graph_element_registry.py +0 -0
  249. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pipelines/init_cli.py +0 -0
  250. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pipelines/logging_utils.py +0 -0
  251. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pipelines/output.py +0 -0
  252. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pipelines/source_code_location.py +0 -0
  253. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pipelines/spark_connect_graph_element_registry.py +0 -0
  254. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pipelines/spark_connect_pipeline.py +0 -0
  255. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/pipelines/type_error_utils.py +0 -0
  256. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/py.typed +0 -0
  257. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/rddsampler.py +0 -0
  258. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/resource/__init__.py +0 -0
  259. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/resource/information.py +0 -0
  260. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/resource/profile.py +0 -0
  261. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/resource/requests.py +0 -0
  262. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/resultiterable.py +0 -0
  263. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/serializers.py +0 -0
  264. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/shell.py +0 -0
  265. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/shuffle.py +0 -0
  266. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/__init__.py +0 -0
  267. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/avro/__init__.py +0 -0
  268. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/avro/functions.py +0 -0
  269. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/catalog.py +0 -0
  270. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/column.py +0 -0
  271. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/conf.py +0 -0
  272. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/__init__.py +0 -0
  273. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/_typing.py +0 -0
  274. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/avro/__init__.py +0 -0
  275. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/avro/functions.py +0 -0
  276. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/catalog.py +0 -0
  277. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/client/__init__.py +0 -0
  278. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/client/retries.py +0 -0
  279. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/column.py +0 -0
  280. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/conf.py +0 -0
  281. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/conversion.py +0 -0
  282. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/datasource.py +0 -0
  283. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/expressions.py +0 -0
  284. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/functions/__init__.py +0 -0
  285. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/functions/partitioning.py +0 -0
  286. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/group.py +0 -0
  287. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/logging.py +0 -0
  288. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/merge.py +0 -0
  289. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/observation.py +0 -0
  290. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/profiler.py +0 -0
  291. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/proto/__init__.py +0 -0
  292. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/proto/catalog_pb2.pyi +0 -0
  293. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/proto/common_pb2.pyi +0 -0
  294. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/proto/example_plugins_pb2.pyi +0 -0
  295. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/proto/expressions_pb2.pyi +0 -0
  296. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/proto/ml_common_pb2.pyi +0 -0
  297. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/proto/ml_pb2.pyi +0 -0
  298. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/proto/pipelines_pb2.pyi +0 -0
  299. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/proto/types_pb2.pyi +0 -0
  300. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/protobuf/__init__.py +0 -0
  301. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/protobuf/functions.py +0 -0
  302. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/readwriter.py +0 -0
  303. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/resource/__init__.py +0 -0
  304. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/resource/profile.py +0 -0
  305. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/shell/__init__.py +0 -0
  306. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/shell/progress.py +0 -0
  307. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/sql_formatter.py +0 -0
  308. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/streaming/__init__.py +0 -0
  309. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/streaming/query.py +0 -0
  310. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/streaming/worker/__init__.py +0 -0
  311. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/streaming/worker/foreach_batch_worker.py +0 -0
  312. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/streaming/worker/listener_worker.py +0 -0
  313. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/table_arg.py +0 -0
  314. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/tvf.py +0 -0
  315. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/types.py +0 -0
  316. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/udf.py +0 -0
  317. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/udtf.py +0 -0
  318. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/utils.py +0 -0
  319. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/connect/window.py +0 -0
  320. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/datasource.py +0 -0
  321. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/datasource_internal.py +0 -0
  322. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/functions/partitioning.py +0 -0
  323. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/geo_utils.py +0 -0
  324. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/group.py +0 -0
  325. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/internal.py +0 -0
  326. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/merge.py +0 -0
  327. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/metrics.py +0 -0
  328. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/observation.py +0 -0
  329. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/pandas/__init__.py +0 -0
  330. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/pandas/_typing/__init__.pyi +0 -0
  331. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/pandas/_typing/protocols/__init__.pyi +0 -0
  332. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/pandas/_typing/protocols/frame.pyi +0 -0
  333. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/pandas/_typing/protocols/series.pyi +0 -0
  334. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/pandas/functions.py +0 -0
  335. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/pandas/functions.pyi +0 -0
  336. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/pandas/group_ops.py +0 -0
  337. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/pandas/map_ops.py +0 -0
  338. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/pandas/typehints.py +0 -0
  339. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/pandas/utils.py +0 -0
  340. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/plot/__init__.py +0 -0
  341. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/plot/plotly.py +0 -0
  342. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/protobuf/__init__.py +0 -0
  343. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/protobuf/functions.py +0 -0
  344. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/readwriter.py +0 -0
  345. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/sql_formatter.py +0 -0
  346. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/streaming/list_state_client.py +0 -0
  347. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/streaming/map_state_client.py +0 -0
  348. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/streaming/proto/StateMessage_pb2.pyi +0 -0
  349. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/streaming/proto/__init__.py +0 -0
  350. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/streaming/python_streaming_source_runner.py +0 -0
  351. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/streaming/state.py +0 -0
  352. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/streaming/stateful_processor.py +0 -0
  353. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/streaming/stateful_processor_api_client.py +0 -0
  354. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/streaming/stateful_processor_util.py +0 -0
  355. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/streaming/transform_with_state_driver_worker.py +0 -0
  356. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/streaming/value_state_client.py +0 -0
  357. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/table_arg.py +0 -0
  358. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/tvf.py +0 -0
  359. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/tvf_argument.py +0 -0
  360. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/types.py +0 -0
  361. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/udf.py +0 -0
  362. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/udtf.py +0 -0
  363. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/variant_utils.py +0 -0
  364. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/window.py +0 -0
  365. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/sql/worker/__init__.py +0 -0
  366. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/statcounter.py +0 -0
  367. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/storagelevel.py +0 -0
  368. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/streaming/__init__.py +0 -0
  369. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/streaming/context.py +0 -0
  370. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/streaming/dstream.py +0 -0
  371. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/streaming/kinesis.py +0 -0
  372. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/streaming/listener.py +0 -0
  373. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/streaming/util.py +0 -0
  374. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/testing/connectutils.py +0 -0
  375. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/testing/mllibutils.py +0 -0
  376. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/testing/mlutils.py +0 -0
  377. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/testing/objects.py +0 -0
  378. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark/traceback_utils.py +0 -0
  379. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark_client.egg-info/dependency_links.txt +0 -0
  380. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/pyspark_client.egg-info/top_level.txt +0 -0
  381. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev2}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pyspark-client
3
- Version: 4.2.0.dev1
3
+ Version: 4.2.0.dev2
4
4
  Summary: Python Spark Connect client for Apache Spark
5
5
  Home-page: https://github.com/apache/spark/tree/master/python
6
6
  Author: Spark Developers
@@ -18,7 +18,7 @@ Classifier: Typing :: Typed
18
18
  Requires-Python: >=3.10
19
19
  Description-Content-Type: text/markdown
20
20
  Requires-Dist: pandas>=2.2.0
21
- Requires-Dist: pyarrow>=15.0.0
21
+ Requires-Dist: pyarrow>=18.0.0
22
22
  Requires-Dist: grpcio>=1.76.0
23
23
  Requires-Dist: grpcio-status>=1.76.0
24
24
  Requires-Dist: googleapis-common-protos>=1.71.0
@@ -16,7 +16,7 @@
16
16
  # specific language governing permissions and limitations
17
17
  # under the License.
18
18
 
19
- from typing import Callable, Iterable, Sized, TypeVar, Union
19
+ from typing import Any, Callable, Iterable, Sized, TypeVar, Union
20
20
  from typing_extensions import Literal, Protocol
21
21
 
22
22
  from numpy import int32, int64, float32, float64, ndarray
@@ -29,10 +29,10 @@ PrimitiveType = Union[bool, float, int, str]
29
29
  NonUDFType = Literal[0]
30
30
 
31
31
  class SupportsIAdd(Protocol):
32
- def __iadd__(self, other: SupportsIAdd) -> SupportsIAdd: ...
32
+ def __iadd__(self, other: Any) -> SupportsIAdd: ...
33
33
 
34
34
  class SupportsOrdering(Protocol):
35
- def __lt__(self, other: SupportsOrdering) -> bool: ...
35
+ def __lt__(self, other: Any) -> bool: ...
36
36
 
37
37
  class SizedIterable(Protocol, Sized, Iterable[T_co]): ...
38
38
 
@@ -465,6 +465,11 @@
465
465
  "Parameter value <arg_name> must be a valid UUID format: <origin>"
466
466
  ]
467
467
  },
468
+ "INVALID_STREAMING_SOURCE_NAME": {
469
+ "message": [
470
+ "Invalid streaming source name '<source_name>'. Source names must contain only ASCII letters, digits, and underscores."
471
+ ]
472
+ },
468
473
  "INVALID_TIMEOUT_TIMESTAMP": {
469
474
  "message": [
470
475
  "Timeout timestamp (<timestamp>) cannot be earlier than the current watermark (<watermark>)."
@@ -551,6 +556,12 @@
551
556
  "<arg1> and <arg2> should be of the same length, got <arg1_length> and <arg2_length>."
552
557
  ]
553
558
  },
559
+ "LOCAL_RELATION_SIZE_LIMIT_EXCEEDED": {
560
+ "message": [
561
+ "Local relation size (<actualSize> bytes) exceeds the limit (<sizeLimit> bytes)."
562
+ ],
563
+ "sqlState": "54000"
564
+ },
554
565
  "MALFORMED_GEOGRAPHY": {
555
566
  "message": [
556
567
  "Geography binary is malformed. Please check the data source is valid."
@@ -234,25 +234,13 @@ def _convert_exception(e: "Py4JJavaError") -> CapturedException:
234
234
  return SparkUpgradeException(origin=e)
235
235
  elif is_instance_of(gw, e, "org.apache.spark.SparkNoSuchElementException"):
236
236
  return SparkNoSuchElementException(origin=e)
237
-
238
- c: "Py4JJavaError" = e.getCause()
239
- stacktrace: str = getattr(jvm, "org.apache.spark.util.Utils").exceptionString(e)
240
- if c is not None and (
241
- is_instance_of(gw, c, "org.apache.spark.api.python.PythonException")
242
- # To make sure this only catches Python UDFs.
243
- and any(
244
- map(
245
- lambda v: "org.apache.spark.sql.execution.python" in v.toString(), c.getStackTrace()
246
- )
247
- )
248
- ):
249
- msg = (
250
- "\n An exception was thrown from the Python worker. "
251
- "Please see the stack trace below.\n%s" % c.getMessage()
252
- )
253
- return PythonException(msg, stacktrace)
254
-
255
- return UnknownException(desc=e.toString(), stackTrace=stacktrace, cause=c)
237
+ elif is_instance_of(gw, e, "org.apache.spark.api.python.PythonException"):
238
+ return PythonException(origin=e)
239
+ return UnknownException(
240
+ desc=e.toString(),
241
+ stackTrace=getattr(jvm, "org.apache.spark.util.Utils").exceptionString(e),
242
+ cause=e.getCause(),
243
+ )
256
244
 
257
245
 
258
246
  def capture_sql_exception(f: Callable[..., Any]) -> Callable[..., Any]:
@@ -348,6 +336,17 @@ class PythonException(CapturedException, BasePythonException):
348
336
  Exceptions thrown from Python workers.
349
337
  """
350
338
 
339
+ def __str__(self) -> str:
340
+ messageParameters = self.getMessageParameters()
341
+
342
+ if (
343
+ messageParameters is None
344
+ or "msg" not in messageParameters
345
+ or "traceback" not in messageParameters
346
+ ):
347
+ return super().__str__()
348
+ return f"{messageParameters['msg']}:\n{messageParameters['traceback'].strip()}"
349
+
351
350
 
352
351
  class ArithmeticException(CapturedException, BaseArithmeticException):
353
352
  """
@@ -271,7 +271,7 @@ def _capture_call_site(depth: int) -> str:
271
271
  import IPython
272
272
 
273
273
  # ipykernel is required for IPython
274
- import ipykernel # type: ignore[import-not-found]
274
+ import ipykernel
275
275
 
276
276
  ipython = IPython.get_ipython()
277
277
  # Filtering out IPython related frames
@@ -124,7 +124,7 @@ def _attach(
124
124
  logger_module: Union[str, ModuleType],
125
125
  modules: List[ModuleType],
126
126
  classes: List[Type[Any]],
127
- missings: List[Tuple[Type[Any], Type[Any]]],
127
+ missings: List[Tuple[Union[ModuleType, Type[Any]], Type[Any]]],
128
128
  ) -> None:
129
129
  if isinstance(logger_module, str):
130
130
  logger_module = importlib.import_module(logger_module)
@@ -16,7 +16,7 @@
16
16
  # specific language governing permissions and limitations
17
17
  # under the License.
18
18
 
19
- from typing import Any, Dict, List, TypeVar, Tuple, Union
19
+ from typing import Any, Dict, List, TYPE_CHECKING, TypeVar, Tuple, Union
20
20
  from typing_extensions import Literal
21
21
 
22
22
  from numpy import ndarray
@@ -24,10 +24,12 @@ from py4j.java_gateway import JavaObject
24
24
 
25
25
  import pyspark.ml.base
26
26
  import pyspark.ml.param
27
- import pyspark.ml.util
28
27
  from pyspark.ml.linalg import Vector
29
28
  import pyspark.ml.wrapper
30
29
 
30
+ if TYPE_CHECKING:
31
+ from scipy.sparse import spmatrix, sparray
32
+
31
33
  ParamMap = Dict[pyspark.ml.param.Param, Any]
32
34
  PipelineStage = Union[pyspark.ml.base.Estimator, pyspark.ml.base.Transformer]
33
35
 
@@ -81,4 +83,4 @@ RankingEvaluatorMetricType = Union[
81
83
  Literal["recallAtK"],
82
84
  ]
83
85
 
84
- VectorLike = Union[ndarray, Vector, List[float], Tuple[float, ...]]
86
+ VectorLike = Union[ndarray, Vector, List[float], Tuple[float, ...], "spmatrix", "sparray", range]
@@ -241,7 +241,7 @@ def _validate_and_transform_single_input(
241
241
  # tensor columns
242
242
  if len(batch.columns) == 1:
243
243
  # one tensor column and one expected input, vstack rows
244
- single_input = np.vstack(batch.iloc[:, 0])
244
+ single_input = np.vstack(batch.iloc[:, 0]) # type: ignore[call-overload]
245
245
  else:
246
246
  raise ValueError(
247
247
  "Multiple input columns found, but model expected a single "
@@ -70,7 +70,6 @@ __all__ = [
70
70
  if TYPE_CHECKING:
71
71
  from pyspark.mllib._typing import NormType
72
72
  from pyspark.ml._typing import VectorLike
73
- from scipy.sparse import spmatrix
74
73
 
75
74
 
76
75
  # Check whether we have SciPy. MLlib works without it too, but if we have it, some methods,
@@ -85,23 +84,25 @@ except BaseException:
85
84
  _have_scipy = False
86
85
 
87
86
 
88
- def _convert_to_vector(d: Union["VectorLike", "spmatrix", range]) -> "Vector":
87
+ def _convert_to_vector(d: "VectorLike") -> "Vector":
89
88
  if isinstance(d, Vector):
90
89
  return d
91
- elif type(d) in (array.array, np.array, np.ndarray, list, tuple, range):
90
+ elif isinstance(d, (array.array, np.ndarray, list, tuple, range)):
92
91
  return DenseVector(d)
93
92
  elif _have_scipy and scipy.sparse.issparse(d):
94
- assert cast("spmatrix", d).shape[1] == 1, "Expected column vector"
93
+ assert hasattr(d, "shape")
94
+ assert d.shape[1] == 1, "Expected column vector"
95
95
  # Make sure the converted csc_matrix has sorted indices.
96
- csc = cast("spmatrix", d).tocsc()
96
+ assert hasattr(d, "tocsc")
97
+ csc = d.tocsc()
97
98
  if not csc.has_sorted_indices:
98
99
  csc.sort_indices()
99
- return SparseVector(cast("spmatrix", d).shape[0], csc.indices, csc.data)
100
+ return SparseVector(d.shape[0], csc.indices, csc.data)
100
101
  else:
101
102
  raise TypeError("Cannot convert type %s into Vector" % type(d))
102
103
 
103
104
 
104
- def _vector_size(v: Union["VectorLike", "spmatrix", range]) -> int:
105
+ def _vector_size(v: "VectorLike") -> int:
105
106
  """
106
107
  Returns the size of the vector.
107
108
 
@@ -124,16 +125,17 @@ def _vector_size(v: Union["VectorLike", "spmatrix", range]) -> int:
124
125
  """
125
126
  if isinstance(v, Vector):
126
127
  return len(v)
127
- elif type(v) in (array.array, list, tuple, range):
128
+ elif isinstance(v, (array.array, list, tuple, range)):
128
129
  return len(v)
129
- elif type(v) == np.ndarray:
130
+ elif isinstance(v, np.ndarray):
130
131
  if v.ndim == 1 or (v.ndim == 2 and v.shape[1] == 1):
131
132
  return len(v)
132
133
  else:
133
134
  raise ValueError("Cannot treat an ndarray of shape %s as a vector" % str(v.shape))
134
135
  elif _have_scipy and scipy.sparse.issparse(v):
135
- assert cast("spmatrix", v).shape[1] == 1, "Expected column vector"
136
- return cast("spmatrix", v).shape[0]
136
+ assert hasattr(v, "shape")
137
+ assert v.shape[1] == 1, "Expected column vector"
138
+ return v.shape[0]
137
139
  else:
138
140
  raise TypeError("Cannot treat type %s as a vector" % type(v))
139
141
 
@@ -337,13 +339,13 @@ class DenseVector(Vector):
337
339
  def __reduce__(self) -> Tuple[Type["DenseVector"], Tuple[bytes]]:
338
340
  return DenseVector, (self.array.tobytes(),)
339
341
 
340
- def numNonzeros(self) -> int:
342
+ def numNonzeros(self) -> Union[int, np.intp]:
341
343
  """
342
344
  Number of nonzero elements. This scans all active values and count non zeros
343
345
  """
344
346
  return np.count_nonzero(self.array)
345
347
 
346
- def norm(self, p: "NormType") -> np.float64:
348
+ def norm(self, p: "NormType") -> np.floating[Any]:
347
349
  """
348
350
  Calculates the norm of a DenseVector.
349
351
 
@@ -386,21 +388,23 @@ class DenseVector(Vector):
386
388
  ...
387
389
  AssertionError: dimension mismatch
388
390
  """
389
- if type(other) == np.ndarray:
391
+ if isinstance(other, np.ndarray):
390
392
  if other.ndim > 1:
391
393
  assert len(self) == other.shape[0], "dimension mismatch"
392
394
  return np.dot(self.array, other)
393
395
  elif _have_scipy and scipy.sparse.issparse(other):
394
- assert len(self) == cast("spmatrix", other).shape[0], "dimension mismatch"
395
- return cast("spmatrix", other).transpose().dot(self.toArray())
396
+ assert hasattr(other, "shape")
397
+ assert len(self) == other.shape[0], "dimension mismatch"
398
+ assert hasattr(other, "transpose")
399
+ return other.transpose().dot(self.toArray())
396
400
  else:
397
- assert len(self) == _vector_size(other), "dimension mismatch"
401
+ assert len(self) == _vector_size(other), "dimension mismatch" # type: ignore[arg-type]
398
402
  if isinstance(other, SparseVector):
399
403
  return other.dot(self)
400
404
  elif isinstance(other, Vector):
401
405
  return np.dot(self.toArray(), other.toArray())
402
406
  else:
403
- return np.dot(self.toArray(), other) # type: ignore[call-overload]
407
+ return np.dot(self.toArray(), other)
404
408
 
405
409
  def squared_distance(self, other: Iterable[float]) -> np.float64:
406
410
  """
@@ -429,10 +433,11 @@ class DenseVector(Vector):
429
433
  ...
430
434
  AssertionError: dimension mismatch
431
435
  """
432
- assert len(self) == _vector_size(other), "dimension mismatch"
436
+ assert len(self) == _vector_size(other), "dimension mismatch" # type: ignore[arg-type]
433
437
  if isinstance(other, SparseVector):
434
438
  return other.squared_distance(self)
435
439
  elif _have_scipy and scipy.sparse.issparse(other):
440
+ assert isinstance(other, scipy.sparse.spmatrix), "other must be a scipy.sparse.spmatrix"
436
441
  return _convert_to_vector(other).squared_distance(self) # type: ignore[attr-defined]
437
442
 
438
443
  if isinstance(other, Vector):
@@ -636,13 +641,13 @@ class SparseVector(Vector):
636
641
  )
637
642
  assert np.min(self.indices) >= 0, "Contains negative index %d" % (np.min(self.indices))
638
643
 
639
- def numNonzeros(self) -> int:
644
+ def numNonzeros(self) -> Union[int, np.intp]:
640
645
  """
641
646
  Number of nonzero elements. This scans all active values and count non zeros.
642
647
  """
643
648
  return np.count_nonzero(self.values)
644
649
 
645
- def norm(self, p: "NormType") -> np.float64:
650
+ def norm(self, p: "NormType") -> np.floating[Any]:
646
651
  """
647
652
  Calculates the norm of a SparseVector.
648
653
 
@@ -699,7 +704,7 @@ class SparseVector(Vector):
699
704
  assert len(self) == other.shape[0], "dimension mismatch"
700
705
  return np.dot(self.values, other[self.indices])
701
706
 
702
- assert len(self) == _vector_size(other), "dimension mismatch"
707
+ assert len(self) == _vector_size(other), "dimension mismatch" # type: ignore[arg-type]
703
708
 
704
709
  if isinstance(other, DenseVector):
705
710
  return np.dot(other.array[self.indices], self.values)
@@ -717,7 +722,7 @@ class SparseVector(Vector):
717
722
  else:
718
723
  return self.dot(_convert_to_vector(other)) # type: ignore[arg-type]
719
724
 
720
- def squared_distance(self, other: Iterable[float]) -> np.float64:
725
+ def squared_distance(self, other: "VectorLike") -> np.float64:
721
726
  """
722
727
  Squared distance from a SparseVector or 1-dimensional NumPy array.
723
728
 
@@ -785,7 +790,7 @@ class SparseVector(Vector):
785
790
  j += 1
786
791
  return result
787
792
  else:
788
- return self.squared_distance(_convert_to_vector(other)) # type: ignore[arg-type]
793
+ return self.squared_distance(_convert_to_vector(other))
789
794
 
790
795
  def toArray(self) -> np.ndarray:
791
796
  """
@@ -16,7 +16,7 @@
16
16
  # specific language governing permissions and limitations
17
17
  # under the License.
18
18
 
19
- from typing import List, Tuple, TypeVar, Union
19
+ from typing import List, Tuple, TYPE_CHECKING, TypeVar, Union
20
20
 
21
21
  from typing_extensions import Literal
22
22
  from numpy import ndarray # noqa: F401
@@ -24,10 +24,14 @@ from py4j.java_gateway import JavaObject
24
24
 
25
25
  from pyspark.mllib.linalg import Vector
26
26
 
27
- VectorLike = Union[ndarray, Vector, List[float], Tuple[float, ...]]
27
+ if TYPE_CHECKING:
28
+ from scipy.sparse import spmatrix, sparray
29
+
28
30
  C = TypeVar("C", bound=type)
29
31
  JavaObjectOrPickleDump = Union[JavaObject, bytearray, bytes]
30
32
 
31
33
  CorrMethodType = Union[Literal["spearman"], Literal["pearson"]]
32
34
  KolmogorovSmirnovTestDistNameType = Literal["norm"]
33
35
  NormType = Union[None, float, Literal["fro"], Literal["nuc"]]
36
+
37
+ VectorLike = Union[ndarray, Vector, List[float], Tuple[float, ...], "spmatrix", "sparray", range]
@@ -61,7 +61,6 @@ from pyspark.sql.types import (
61
61
 
62
62
  if TYPE_CHECKING:
63
63
  from pyspark.mllib._typing import VectorLike, NormType
64
- from scipy.sparse import spmatrix
65
64
  from numpy.typing import ArrayLike
66
65
 
67
66
 
@@ -94,23 +93,25 @@ except BaseException:
94
93
  _have_scipy = False
95
94
 
96
95
 
97
- def _convert_to_vector(d: Union["VectorLike", "spmatrix", range]) -> "Vector":
96
+ def _convert_to_vector(d: "VectorLike") -> "Vector":
98
97
  if isinstance(d, Vector):
99
98
  return d
100
- elif type(d) in (array.array, np.array, np.ndarray, list, tuple, range):
99
+ elif isinstance(d, (array.array, np.ndarray, list, tuple, range)):
101
100
  return DenseVector(d)
102
101
  elif _have_scipy and scipy.sparse.issparse(d):
103
- assert cast("spmatrix", d).shape[1] == 1, "Expected column vector"
102
+ assert hasattr(d, "shape")
103
+ assert d.shape[1] == 1, "Expected column vector"
104
104
  # Make sure the converted csc_matrix has sorted indices.
105
- csc = cast("spmatrix", d).tocsc()
105
+ assert hasattr(d, "tocsc")
106
+ csc = d.tocsc()
106
107
  if not csc.has_sorted_indices:
107
108
  csc.sort_indices()
108
- return SparseVector(cast("spmatrix", d).shape[0], csc.indices, csc.data)
109
+ return SparseVector(d.shape[0], csc.indices, csc.data)
109
110
  else:
110
111
  raise TypeError("Cannot convert type %s into Vector" % type(d))
111
112
 
112
113
 
113
- def _vector_size(v: Union["VectorLike", "spmatrix", range]) -> int:
114
+ def _vector_size(v: "VectorLike") -> int:
114
115
  """
115
116
  Returns the size of the vector.
116
117
 
@@ -133,16 +134,17 @@ def _vector_size(v: Union["VectorLike", "spmatrix", range]) -> int:
133
134
  """
134
135
  if isinstance(v, Vector):
135
136
  return len(v)
136
- elif type(v) in (array.array, list, tuple, range):
137
+ elif isinstance(v, (array.array, list, tuple, range)):
137
138
  return len(v)
138
- elif type(v) == np.ndarray:
139
+ elif isinstance(v, np.ndarray):
139
140
  if v.ndim == 1 or (v.ndim == 2 and v.shape[1] == 1):
140
141
  return len(v)
141
142
  else:
142
143
  raise ValueError("Cannot treat an ndarray of shape %s as a vector" % str(v.shape))
143
144
  elif _have_scipy and scipy.sparse.issparse(v):
144
- assert cast("spmatrix", v).shape[1] == 1, "Expected column vector"
145
- return cast("spmatrix", v).shape[0]
145
+ assert hasattr(v, "shape")
146
+ assert v.shape[1] == 1, "Expected column vector"
147
+ return v.shape[0]
146
148
  else:
147
149
  raise TypeError("Cannot treat type %s as a vector" % type(v))
148
150
 
@@ -390,13 +392,13 @@ class DenseVector(Vector):
390
392
  def __reduce__(self) -> Tuple[Type["DenseVector"], Tuple[bytes]]:
391
393
  return DenseVector, (self.array.tobytes(),)
392
394
 
393
- def numNonzeros(self) -> int:
395
+ def numNonzeros(self) -> Union[int, np.intp]:
394
396
  """
395
397
  Number of nonzero elements. This scans all active values and count non zeros
396
398
  """
397
399
  return np.count_nonzero(self.array)
398
400
 
399
- def norm(self, p: "NormType") -> np.float64:
401
+ def norm(self, p: "NormType") -> np.floating[Any]:
400
402
  """
401
403
  Calculates the norm of a DenseVector.
402
404
 
@@ -410,7 +412,7 @@ class DenseVector(Vector):
410
412
  """
411
413
  return np.linalg.norm(self.array, p)
412
414
 
413
- def dot(self, other: Iterable[float]) -> np.float64:
415
+ def dot(self, other: "VectorLike") -> np.float64:
414
416
  """
415
417
  Compute the dot product of two Vectors. We support
416
418
  (Numpy array, list, SparseVector, or SciPy sparse)
@@ -444,8 +446,10 @@ class DenseVector(Vector):
444
446
  assert len(self) == other.shape[0], "dimension mismatch"
445
447
  return np.dot(self.array, other)
446
448
  elif _have_scipy and scipy.sparse.issparse(other):
447
- assert len(self) == cast("spmatrix", other).shape[0], "dimension mismatch"
448
- return cast("spmatrix", other).transpose().dot(self.toArray())
449
+ assert hasattr(other, "shape")
450
+ assert len(self) == other.shape[0], "dimension mismatch"
451
+ assert hasattr(other, "transpose")
452
+ return other.transpose().dot(self.toArray())
449
453
  else:
450
454
  assert len(self) == _vector_size(other), "dimension mismatch"
451
455
  if isinstance(other, SparseVector):
@@ -453,9 +457,9 @@ class DenseVector(Vector):
453
457
  elif isinstance(other, Vector):
454
458
  return np.dot(self.toArray(), other.toArray())
455
459
  else:
456
- return np.dot(self.toArray(), cast("ArrayLike", other))
460
+ return np.dot(self.toArray(), cast("ArrayLike", other)) # type: ignore[valid-type]
457
461
 
458
- def squared_distance(self, other: Iterable[float]) -> np.float64:
462
+ def squared_distance(self, other: "VectorLike") -> np.float64:
459
463
  """
460
464
  Squared distance of two Vectors.
461
465
 
@@ -685,13 +689,13 @@ class SparseVector(Vector):
685
689
  % (self.indices[i], self.indices[i + 1])
686
690
  )
687
691
 
688
- def numNonzeros(self) -> int:
692
+ def numNonzeros(self) -> Union[int, np.intp]:
689
693
  """
690
694
  Number of nonzero elements. This scans all active values and count non zeros.
691
695
  """
692
696
  return np.count_nonzero(self.values)
693
697
 
694
- def norm(self, p: "NormType") -> np.float64:
698
+ def norm(self, p: "NormType") -> np.floating[Any]:
695
699
  """
696
700
  Calculates the norm of a SparseVector.
697
701
 
@@ -766,7 +770,7 @@ class SparseVector(Vector):
766
770
  raise ValueError("Unable to parse values from %s." % s)
767
771
  return SparseVector(cast(int, size), indices, values)
768
772
 
769
- def dot(self, other: Iterable[float]) -> np.float64:
773
+ def dot(self, other: "VectorLike") -> np.float64:
770
774
  """
771
775
  Dot product with a SparseVector or 1- or 2-dimensional Numpy array.
772
776
 
@@ -822,9 +826,9 @@ class SparseVector(Vector):
822
826
  return np.dot(self_values, other.values[other_cmind])
823
827
 
824
828
  else:
825
- return self.dot(_convert_to_vector(other)) # type: ignore[arg-type]
829
+ return self.dot(_convert_to_vector(other))
826
830
 
827
- def squared_distance(self, other: Iterable[float]) -> np.float64:
831
+ def squared_distance(self, other: "VectorLike") -> np.float64:
828
832
  """
829
833
  Squared distance from a SparseVector or 1-dimensional NumPy array.
830
834
 
@@ -892,7 +896,7 @@ class SparseVector(Vector):
892
896
  j += 1
893
897
  return result
894
898
  else:
895
- return self.squared_distance(_convert_to_vector(other)) # type: ignore[arg-type]
899
+ return self.squared_distance(_convert_to_vector(other))
896
900
 
897
901
  def toArray(self) -> np.ndarray:
898
902
  """
@@ -35,7 +35,7 @@ UT = TypeVar("UT", bound="DistributedMatrix")
35
35
  VT = TypeVar("VT", bound="Matrix")
36
36
 
37
37
  if TYPE_CHECKING:
38
- from pyspark.ml._typing import VectorLike
38
+ from pyspark.mllib._typing import VectorLike
39
39
 
40
40
  __all__ = [
41
41
  "BlockMatrix",
@@ -84,7 +84,7 @@ class LabeledPoint:
84
84
  'label' and 'features' are accessible as class attributes.
85
85
  """
86
86
 
87
- def __init__(self, label: float, features: Iterable[float]):
87
+ def __init__(self, label: float, features: "VectorLike"):
88
88
  self.label = float(label)
89
89
  self.features = _convert_to_vector(features)
90
90
 
@@ -189,7 +189,8 @@ class Statistics:
189
189
 
190
190
  if not y:
191
191
  return cast(
192
- JavaObject, callMLlibFunc("corr", x.map(_convert_to_vector), method)
192
+ JavaObject,
193
+ callMLlibFunc("corr", cast(RDD[Vector], x).map(_convert_to_vector), method),
193
194
  ).toArray()
194
195
  else:
195
196
  return cast(
@@ -145,11 +145,7 @@ class MLUtils:
145
145
  if numFeatures <= 0:
146
146
  parsed.cache()
147
147
  numFeatures = parsed.map(lambda x: -1 if x[1].size == 0 else x[1][-1]).reduce(max) + 1
148
- return parsed.map(
149
- lambda x: LabeledPoint(
150
- x[0], Vectors.sparse(numFeatures, x[1], x[2]) # type: ignore[arg-type]
151
- )
152
- )
148
+ return parsed.map(lambda x: LabeledPoint(x[0], Vectors.sparse(numFeatures, x[1], x[2])))
153
149
 
154
150
  @staticmethod
155
151
  def saveAsLibSVMFile(data: RDD["LabeledPoint"], dir: str) -> None:
@@ -579,7 +579,7 @@ class PandasOnSparkFrameMethods:
579
579
  return original_func(o, *args, **kwargs)
580
580
 
581
581
  def apply_func(pdf: pd.DataFrame) -> pd.DataFrame:
582
- return new_func(pdf).to_frame()
582
+ return new_func(pdf).to_frame() # type: ignore[operator]
583
583
 
584
584
  def pandas_series_func(
585
585
  f: Callable[[pd.DataFrame], pd.DataFrame], return_type: DataType
@@ -26,7 +26,7 @@ from typing import Any, Callable, Optional, Sequence, Tuple, Union, cast, TYPE_C
26
26
 
27
27
  import numpy as np
28
28
  import pandas as pd
29
- from pandas.api.types import is_list_like, CategoricalDtype # type: ignore[attr-defined]
29
+ from pandas.api.types import is_list_like, CategoricalDtype
30
30
 
31
31
  from pyspark.sql import functions as F, Column, Window
32
32
  from pyspark.sql.types import LongType, BooleanType, NumericType
@@ -17,7 +17,7 @@
17
17
  from typing import Any, Callable, List, Optional, Union, TYPE_CHECKING, cast
18
18
 
19
19
  import pandas as pd
20
- from pandas.api.types import ( # type: ignore[attr-defined]
20
+ from pandas.api.types import (
21
21
  CategoricalDtype,
22
22
  is_dict_like,
23
23
  is_list_like,
@@ -116,7 +116,7 @@ def _should_return_all_false(left: IndexOpsLike, right: Any) -> bool:
116
116
  based on incompatible dtypes: non-numeric vs. numeric (including bools).
117
117
  """
118
118
  from pyspark.pandas.base import IndexOpsMixin
119
- from pandas.api.types import is_list_like # type: ignore[attr-defined]
119
+ from pandas.api.types import is_list_like
120
120
 
121
121
  def are_both_numeric(left_dtype: Dtype, right_dtype: Dtype) -> bool:
122
122
  return is_numeric_dtype(left_dtype) and is_numeric_dtype(right_dtype)
@@ -19,7 +19,7 @@ import numbers
19
19
  from typing import Any, Union
20
20
 
21
21
  import pandas as pd
22
- from pandas.api.types import CategoricalDtype, is_integer_dtype # type: ignore[attr-defined]
22
+ from pandas.api.types import CategoricalDtype, is_integer_dtype
23
23
  from pandas.core.dtypes.common import is_numeric_dtype
24
24
 
25
25
  from pyspark.pandas.base import column_op, IndexOpsMixin
@@ -16,11 +16,11 @@
16
16
  #
17
17
 
18
18
  from itertools import chain
19
- from typing import cast, Any, Union
19
+ from typing import cast, Any, Sequence, Union
20
20
 
21
21
  import pandas as pd
22
22
  import numpy as np
23
- from pandas.api.types import is_list_like, CategoricalDtype # type: ignore[attr-defined]
23
+ from pandas.api.types import is_list_like, CategoricalDtype
24
24
 
25
25
  from pyspark.pandas._typing import Dtype, IndexOpsLike, SeriesOrIndex
26
26
  from pyspark.pandas.base import IndexOpsMixin
@@ -43,7 +43,7 @@ class CategoricalOps(DataTypeOps):
43
43
  """Restore column when to_pandas."""
44
44
  return pd.Series(
45
45
  pd.Categorical.from_codes(
46
- col.replace(np.nan, -1).astype(int),
46
+ cast(Sequence[int], col.replace(np.nan, -1).astype(int)),
47
47
  categories=cast(CategoricalDtype, self.dtype).categories,
48
48
  ordered=cast(CategoricalDtype, self.dtype).ordered,
49
49
  )