pyspark-client 4.2.0.dev1__tar.gz → 4.2.0.dev3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (387) hide show
  1. {pyspark_client-4.2.0.dev1/pyspark_client.egg-info → pyspark_client-4.2.0.dev3}/PKG-INFO +2 -2
  2. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/_typing.pyi +3 -3
  3. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/accumulators.py +1 -1
  4. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/daemon.py +46 -44
  5. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/errors/__init__.py +1 -1
  6. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/errors/error-conditions.json +25 -3
  7. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/errors/exceptions/captured.py +18 -19
  8. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/errors/exceptions/connect.py +79 -32
  9. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/errors/exceptions/tblib.py +1 -3
  10. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/errors/utils.py +1 -1
  11. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/errors_doc_gen.py +1 -1
  12. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/install.py +37 -2
  13. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/instrumentation_utils.py +1 -1
  14. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/logger/__init__.py +1 -1
  15. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/logger/worker_io.py +6 -3
  16. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/ml/_typing.pyi +5 -3
  17. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/ml/classification.py +1 -1
  18. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/ml/connect/base.py +1 -1
  19. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/ml/functions.py +1 -1
  20. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/ml/linalg/__init__.py +28 -23
  21. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/ml/regression.py +3 -3
  22. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/ml/tuning.py +3 -3
  23. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/mllib/_typing.pyi +7 -3
  24. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/mllib/linalg/__init__.py +27 -23
  25. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/mllib/linalg/distributed.py +1 -1
  26. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/mllib/regression.py +1 -1
  27. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/mllib/stat/_statistics.py +2 -1
  28. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/mllib/util.py +1 -5
  29. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pandas/accessors.py +2 -2
  30. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pandas/base.py +4 -4
  31. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pandas/categorical.py +1 -1
  32. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pandas/config.py +1 -1
  33. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pandas/data_type_ops/base.py +26 -12
  34. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pandas/data_type_ops/boolean_ops.py +12 -8
  35. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pandas/data_type_ops/categorical_ops.py +3 -3
  36. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pandas/data_type_ops/datetime_ops.py +8 -0
  37. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pandas/data_type_ops/null_ops.py +1 -1
  38. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pandas/data_type_ops/num_ops.py +32 -26
  39. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pandas/data_type_ops/string_ops.py +3 -5
  40. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pandas/data_type_ops/timedelta_ops.py +8 -0
  41. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pandas/datetimes.py +31 -9
  42. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pandas/frame.py +651 -237
  43. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pandas/generic.py +67 -19
  44. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pandas/groupby.py +139 -33
  45. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pandas/indexes/base.py +18 -14
  46. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pandas/indexes/category.py +1 -1
  47. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pandas/indexes/datetimes.py +43 -30
  48. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pandas/indexes/multi.py +3 -2
  49. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pandas/indexes/timedelta.py +21 -8
  50. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pandas/indexing.py +58 -16
  51. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pandas/internal.py +3 -3
  52. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pandas/namespace.py +102 -66
  53. pyspark_client-4.2.0.dev3/pyspark/pandas/plot/__init__.py +17 -0
  54. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pandas/plot/core.py +4 -4
  55. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pandas/plot/matplotlib.py +10 -4
  56. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pandas/resample.py +10 -7
  57. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pandas/series.py +127 -59
  58. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pandas/strings.py +62 -19
  59. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pandas/testing.py +7 -7
  60. pyspark_client-4.2.0.dev3/pyspark/pandas/typedef/__init__.py +18 -0
  61. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pandas/typedef/typehints.py +90 -22
  62. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pandas/usage_logging/__init__.py +8 -8
  63. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pandas/utils.py +9 -8
  64. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pandas/window.py +1 -1
  65. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/profiler.py +22 -6
  66. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/_typing.pyi +1 -1
  67. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/client/__init__.py +1 -1
  68. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/client/artifact.py +12 -2
  69. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/client/core.py +184 -34
  70. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/client/reattach.py +53 -52
  71. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/dataframe.py +27 -14
  72. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/expressions.py +2 -5
  73. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/functions/__init__.py +2 -2
  74. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/functions/builtin.py +425 -66
  75. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/observation.py +7 -0
  76. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/plan.py +8 -2
  77. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/proto/base_pb2.py +119 -109
  78. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/proto/base_pb2.pyi +307 -1
  79. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/proto/base_pb2_grpc.py +47 -0
  80. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/proto/catalog_pb2.py +2 -2
  81. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/proto/commands_pb2.py +72 -72
  82. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/proto/commands_pb2.pyi +12 -1
  83. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/proto/common_pb2.py +2 -2
  84. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/proto/example_plugins_pb2.py +2 -2
  85. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/proto/expressions_pb2.py +2 -2
  86. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/proto/ml_common_pb2.py +2 -2
  87. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/proto/ml_pb2.py +2 -2
  88. pyspark_client-4.2.0.dev3/pyspark/sql/connect/proto/pipelines_pb2.py +132 -0
  89. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/proto/pipelines_pb2.pyi +101 -0
  90. pyspark_client-4.2.0.dev3/pyspark/sql/connect/proto/relations_pb2.py +251 -0
  91. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/proto/relations_pb2.pyi +19 -0
  92. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/proto/types_pb2.py +2 -2
  93. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/session.py +54 -42
  94. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/streaming/readwriter.py +41 -3
  95. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/streaming/worker/foreach_batch_worker.py +3 -12
  96. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/streaming/worker/listener_worker.py +3 -12
  97. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/context.py +15 -2
  98. pyspark_client-4.2.0.dev3/pyspark/sql/conversion.py +1823 -0
  99. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/dataframe.py +98 -25
  100. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/datasource.py +75 -4
  101. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/datasource_internal.py +82 -30
  102. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/functions/__init__.py +51 -23
  103. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/functions/builtin.py +1588 -215
  104. pyspark_client-4.2.0.dev3/pyspark/sql/interchange.py +89 -0
  105. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/internal.py +3 -1
  106. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/pandas/conversion.py +161 -36
  107. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/pandas/serializers.py +525 -838
  108. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/pandas/types.py +126 -44
  109. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/plot/__init__.py +1 -1
  110. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/plot/core.py +1 -1
  111. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/profiler.py +157 -34
  112. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/readwriter.py +3 -3
  113. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/session.py +63 -22
  114. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/streaming/__init__.py +5 -3
  115. pyspark_client-4.2.0.dev3/pyspark/sql/streaming/datasource.py +119 -0
  116. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/streaming/listener.py +15 -7
  117. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/streaming/proto/StateMessage_pb2.py +2 -2
  118. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/streaming/python_streaming_source_runner.py +114 -15
  119. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/streaming/query.py +57 -1
  120. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/streaming/readwriter.py +70 -3
  121. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/streaming/transform_with_state_driver_worker.py +3 -11
  122. pyspark_client-4.2.0.dev3/pyspark/sql/streaming/tws_tester.py +689 -0
  123. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/types.py +1 -12
  124. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/udf.py +1 -1
  125. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/utils.py +12 -1
  126. pyspark_client-4.2.0.dev3/pyspark/sql/worker/analyze_udtf.py +241 -0
  127. pyspark_client-4.2.0.dev3/pyspark/sql/worker/commit_data_source_write.py +80 -0
  128. pyspark_client-4.2.0.dev3/pyspark/sql/worker/create_data_source.py +149 -0
  129. pyspark_client-4.2.0.dev3/pyspark/sql/worker/data_source_pushdown_filters.py +229 -0
  130. pyspark_client-4.2.0.dev3/pyspark/sql/worker/lookup_data_sources.py +62 -0
  131. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/worker/plan_data_source_read.py +104 -132
  132. pyspark_client-4.2.0.dev3/pyspark/sql/worker/python_streaming_sink_runner.py +116 -0
  133. pyspark_client-4.2.0.dev1/pyspark/sql/worker/lookup_data_sources.py → pyspark_client-4.2.0.dev3/pyspark/sql/worker/utils.py +39 -47
  134. pyspark_client-4.2.0.dev3/pyspark/sql/worker/write_into_data_source.py +246 -0
  135. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/taskcontext.py +44 -4
  136. {pyspark_client-4.2.0.dev1/pyspark/pandas/typedef → pyspark_client-4.2.0.dev3/pyspark/testing}/__init__.py +5 -2
  137. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/testing/connectutils.py +0 -10
  138. pyspark_client-4.2.0.dev3/pyspark/testing/goldenutils.py +356 -0
  139. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/testing/pandasutils.py +59 -6
  140. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/testing/sqlutils.py +2 -6
  141. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/testing/streamingutils.py +2 -2
  142. pyspark_client-4.2.0.dev3/pyspark/testing/unittestutils.py +55 -0
  143. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/testing/utils.py +56 -41
  144. pyspark_client-4.2.0.dev3/pyspark/threaddump.py +62 -0
  145. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/util.py +67 -18
  146. pyspark_client-4.2.0.dev3/pyspark/version.py +1 -0
  147. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/worker.py +392 -459
  148. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/worker_util.py +58 -1
  149. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3/pyspark_client.egg-info}/PKG-INFO +2 -2
  150. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark_client.egg-info/SOURCES.txt +7 -0
  151. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark_client.egg-info/requires.txt +1 -1
  152. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/setup.py +1 -1
  153. pyspark_client-4.2.0.dev1/pyspark/pandas/plot/__init__.py +0 -17
  154. pyspark_client-4.2.0.dev1/pyspark/sql/connect/proto/pipelines_pb2.py +0 -130
  155. pyspark_client-4.2.0.dev1/pyspark/sql/connect/proto/relations_pb2.py +0 -251
  156. pyspark_client-4.2.0.dev1/pyspark/sql/conversion.py +0 -847
  157. pyspark_client-4.2.0.dev1/pyspark/sql/worker/analyze_udtf.py +0 -288
  158. pyspark_client-4.2.0.dev1/pyspark/sql/worker/commit_data_source_write.py +0 -127
  159. pyspark_client-4.2.0.dev1/pyspark/sql/worker/create_data_source.py +0 -193
  160. pyspark_client-4.2.0.dev1/pyspark/sql/worker/data_source_pushdown_filters.py +0 -277
  161. pyspark_client-4.2.0.dev1/pyspark/sql/worker/python_streaming_sink_runner.py +0 -159
  162. pyspark_client-4.2.0.dev1/pyspark/sql/worker/write_into_data_source.py +0 -285
  163. pyspark_client-4.2.0.dev1/pyspark/testing/__init__.py +0 -47
  164. pyspark_client-4.2.0.dev1/pyspark/version.py +0 -1
  165. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/MANIFEST.in +0 -0
  166. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/README.md +0 -0
  167. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/__init__.py +0 -0
  168. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/_globals.py +0 -0
  169. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/cloudpickle/__init__.py +0 -0
  170. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/cloudpickle/cloudpickle.py +0 -0
  171. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/cloudpickle/cloudpickle_fast.py +0 -0
  172. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/conf.py +0 -0
  173. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/errors/error_classes.py +0 -0
  174. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/errors/exceptions/__init__.py +0 -0
  175. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/errors/exceptions/base.py +0 -0
  176. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/find_spark_home.py +0 -0
  177. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/java_gateway.py +0 -0
  178. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/join.py +0 -0
  179. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/logger/logger.py +0 -0
  180. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/loose_version.py +0 -0
  181. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/ml/__init__.py +0 -0
  182. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/ml/base.py +0 -0
  183. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/ml/clustering.py +0 -0
  184. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/ml/common.py +0 -0
  185. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/ml/connect/__init__.py +0 -0
  186. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/ml/connect/classification.py +0 -0
  187. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/ml/connect/evaluation.py +0 -0
  188. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/ml/connect/feature.py +0 -0
  189. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/ml/connect/functions.py +0 -0
  190. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/ml/connect/io_utils.py +0 -0
  191. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/ml/connect/pipeline.py +0 -0
  192. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/ml/connect/proto.py +0 -0
  193. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/ml/connect/readwrite.py +0 -0
  194. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/ml/connect/serialize.py +0 -0
  195. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/ml/connect/summarizer.py +0 -0
  196. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/ml/connect/tuning.py +0 -0
  197. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/ml/connect/util.py +0 -0
  198. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/ml/deepspeed/__init__.py +0 -0
  199. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/ml/deepspeed/deepspeed_distributor.py +0 -0
  200. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/ml/dl_util.py +0 -0
  201. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/ml/evaluation.py +0 -0
  202. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/ml/feature.py +0 -0
  203. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/ml/fpm.py +0 -0
  204. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/ml/image.py +0 -0
  205. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/ml/model_cache.py +0 -0
  206. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/ml/param/__init__.py +0 -0
  207. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/ml/param/_shared_params_code_gen.py +0 -0
  208. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/ml/param/shared.py +0 -0
  209. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/ml/pipeline.py +0 -0
  210. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/ml/recommendation.py +0 -0
  211. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/ml/stat.py +0 -0
  212. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/ml/torch/__init__.py +0 -0
  213. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/ml/torch/data.py +0 -0
  214. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/ml/torch/distributor.py +0 -0
  215. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/ml/torch/log_communication.py +0 -0
  216. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/ml/torch/torch_run_process_wrapper.py +0 -0
  217. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/ml/tree.py +0 -0
  218. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/ml/util.py +0 -0
  219. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/ml/wrapper.py +0 -0
  220. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/mllib/__init__.py +0 -0
  221. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/mllib/classification.py +0 -0
  222. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/mllib/clustering.py +0 -0
  223. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/mllib/common.py +0 -0
  224. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/mllib/evaluation.py +0 -0
  225. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/mllib/feature.py +0 -0
  226. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/mllib/fpm.py +0 -0
  227. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/mllib/random.py +0 -0
  228. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/mllib/recommendation.py +0 -0
  229. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/mllib/stat/KernelDensity.py +0 -0
  230. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/mllib/stat/__init__.py +0 -0
  231. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/mllib/stat/distribution.py +0 -0
  232. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/mllib/stat/test.py +0 -0
  233. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/mllib/tree.py +0 -0
  234. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pandas/__init__.py +0 -0
  235. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pandas/_typing.py +0 -0
  236. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pandas/correlation.py +0 -0
  237. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pandas/data_type_ops/__init__.py +0 -0
  238. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pandas/data_type_ops/binary_ops.py +0 -0
  239. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pandas/data_type_ops/complex_ops.py +0 -0
  240. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pandas/data_type_ops/date_ops.py +0 -0
  241. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pandas/data_type_ops/udt_ops.py +0 -0
  242. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pandas/exceptions.py +0 -0
  243. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pandas/extensions.py +0 -0
  244. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pandas/indexes/__init__.py +0 -0
  245. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pandas/missing/__init__.py +0 -0
  246. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pandas/missing/common.py +0 -0
  247. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pandas/missing/frame.py +0 -0
  248. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pandas/missing/general_functions.py +0 -0
  249. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pandas/missing/groupby.py +0 -0
  250. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pandas/missing/indexes.py +0 -0
  251. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pandas/missing/resample.py +0 -0
  252. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pandas/missing/scalars.py +0 -0
  253. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pandas/missing/series.py +0 -0
  254. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pandas/missing/window.py +0 -0
  255. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pandas/mlflow.py +0 -0
  256. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pandas/numpy_compat.py +0 -0
  257. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pandas/plot/plotly.py +0 -0
  258. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pandas/spark/__init__.py +0 -0
  259. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pandas/spark/accessors.py +0 -0
  260. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pandas/spark/utils.py +0 -0
  261. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pandas/sql_formatter.py +0 -0
  262. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pandas/sql_processor.py +0 -0
  263. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pandas/supported_api_gen.py +0 -0
  264. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pandas/usage_logging/usage_logger.py +0 -0
  265. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pipelines/__init__.py +0 -0
  266. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pipelines/add_pipeline_analysis_context.py +0 -0
  267. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pipelines/api.py +0 -0
  268. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pipelines/block_session_mutations.py +0 -0
  269. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pipelines/cli.py +0 -0
  270. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pipelines/flow.py +0 -0
  271. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pipelines/graph_element_registry.py +0 -0
  272. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pipelines/init_cli.py +0 -0
  273. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pipelines/logging_utils.py +0 -0
  274. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pipelines/output.py +0 -0
  275. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pipelines/source_code_location.py +0 -0
  276. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pipelines/spark_connect_graph_element_registry.py +0 -0
  277. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pipelines/spark_connect_pipeline.py +0 -0
  278. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/pipelines/type_error_utils.py +0 -0
  279. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/py.typed +0 -0
  280. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/rddsampler.py +0 -0
  281. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/resource/__init__.py +0 -0
  282. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/resource/information.py +0 -0
  283. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/resource/profile.py +0 -0
  284. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/resource/requests.py +0 -0
  285. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/resultiterable.py +0 -0
  286. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/serializers.py +0 -0
  287. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/shell.py +0 -0
  288. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/shuffle.py +0 -0
  289. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/__init__.py +0 -0
  290. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/avro/__init__.py +0 -0
  291. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/avro/functions.py +0 -0
  292. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/catalog.py +0 -0
  293. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/column.py +0 -0
  294. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/conf.py +0 -0
  295. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/__init__.py +0 -0
  296. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/_typing.py +0 -0
  297. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/avro/__init__.py +0 -0
  298. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/avro/functions.py +0 -0
  299. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/catalog.py +0 -0
  300. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/client/retries.py +0 -0
  301. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/column.py +0 -0
  302. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/conf.py +0 -0
  303. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/conversion.py +0 -0
  304. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/datasource.py +0 -0
  305. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/functions/partitioning.py +0 -0
  306. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/group.py +0 -0
  307. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/logging.py +0 -0
  308. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/merge.py +0 -0
  309. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/profiler.py +0 -0
  310. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/proto/__init__.py +0 -0
  311. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/proto/catalog_pb2.pyi +0 -0
  312. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/proto/common_pb2.pyi +0 -0
  313. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/proto/example_plugins_pb2.pyi +0 -0
  314. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/proto/expressions_pb2.pyi +0 -0
  315. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/proto/ml_common_pb2.pyi +0 -0
  316. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/proto/ml_pb2.pyi +0 -0
  317. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/proto/types_pb2.pyi +0 -0
  318. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/protobuf/__init__.py +0 -0
  319. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/protobuf/functions.py +0 -0
  320. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/readwriter.py +0 -0
  321. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/resource/__init__.py +0 -0
  322. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/resource/profile.py +0 -0
  323. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/shell/__init__.py +0 -0
  324. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/shell/progress.py +0 -0
  325. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/sql_formatter.py +0 -0
  326. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/streaming/__init__.py +0 -0
  327. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/streaming/query.py +0 -0
  328. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/streaming/worker/__init__.py +0 -0
  329. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/table_arg.py +0 -0
  330. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/tvf.py +0 -0
  331. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/types.py +0 -0
  332. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/udf.py +0 -0
  333. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/udtf.py +0 -0
  334. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/utils.py +0 -0
  335. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/connect/window.py +0 -0
  336. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/functions/partitioning.py +0 -0
  337. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/geo_utils.py +0 -0
  338. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/group.py +0 -0
  339. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/merge.py +0 -0
  340. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/metrics.py +0 -0
  341. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/observation.py +0 -0
  342. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/pandas/__init__.py +0 -0
  343. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/pandas/_typing/__init__.pyi +0 -0
  344. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/pandas/_typing/protocols/__init__.pyi +0 -0
  345. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/pandas/_typing/protocols/frame.pyi +0 -0
  346. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/pandas/_typing/protocols/series.pyi +0 -0
  347. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/pandas/functions.py +0 -0
  348. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/pandas/functions.pyi +0 -0
  349. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/pandas/group_ops.py +0 -0
  350. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/pandas/map_ops.py +0 -0
  351. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/pandas/typehints.py +0 -0
  352. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/pandas/utils.py +0 -0
  353. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/plot/plotly.py +0 -0
  354. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/protobuf/__init__.py +0 -0
  355. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/protobuf/functions.py +0 -0
  356. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/sql_formatter.py +0 -0
  357. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/streaming/list_state_client.py +0 -0
  358. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/streaming/map_state_client.py +0 -0
  359. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/streaming/proto/StateMessage_pb2.pyi +0 -0
  360. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/streaming/proto/__init__.py +0 -0
  361. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/streaming/state.py +0 -0
  362. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/streaming/stateful_processor.py +0 -0
  363. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/streaming/stateful_processor_api_client.py +0 -0
  364. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/streaming/stateful_processor_util.py +0 -0
  365. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/streaming/value_state_client.py +0 -0
  366. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/table_arg.py +0 -0
  367. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/tvf.py +0 -0
  368. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/tvf_argument.py +0 -0
  369. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/udtf.py +0 -0
  370. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/variant_utils.py +0 -0
  371. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/window.py +0 -0
  372. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/sql/worker/__init__.py +0 -0
  373. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/statcounter.py +0 -0
  374. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/storagelevel.py +0 -0
  375. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/streaming/__init__.py +0 -0
  376. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/streaming/context.py +0 -0
  377. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/streaming/dstream.py +0 -0
  378. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/streaming/kinesis.py +0 -0
  379. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/streaming/listener.py +0 -0
  380. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/streaming/util.py +0 -0
  381. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/testing/mllibutils.py +0 -0
  382. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/testing/mlutils.py +0 -0
  383. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/testing/objects.py +0 -0
  384. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark/traceback_utils.py +0 -0
  385. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark_client.egg-info/dependency_links.txt +0 -0
  386. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/pyspark_client.egg-info/top_level.txt +0 -0
  387. {pyspark_client-4.2.0.dev1 → pyspark_client-4.2.0.dev3}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pyspark-client
3
- Version: 4.2.0.dev1
3
+ Version: 4.2.0.dev3
4
4
  Summary: Python Spark Connect client for Apache Spark
5
5
  Home-page: https://github.com/apache/spark/tree/master/python
6
6
  Author: Spark Developers
@@ -18,7 +18,7 @@ Classifier: Typing :: Typed
18
18
  Requires-Python: >=3.10
19
19
  Description-Content-Type: text/markdown
20
20
  Requires-Dist: pandas>=2.2.0
21
- Requires-Dist: pyarrow>=15.0.0
21
+ Requires-Dist: pyarrow>=18.0.0
22
22
  Requires-Dist: grpcio>=1.76.0
23
23
  Requires-Dist: grpcio-status>=1.76.0
24
24
  Requires-Dist: googleapis-common-protos>=1.71.0
@@ -16,7 +16,7 @@
16
16
  # specific language governing permissions and limitations
17
17
  # under the License.
18
18
 
19
- from typing import Callable, Iterable, Sized, TypeVar, Union
19
+ from typing import Any, Callable, Iterable, Sized, TypeVar, Union
20
20
  from typing_extensions import Literal, Protocol
21
21
 
22
22
  from numpy import int32, int64, float32, float64, ndarray
@@ -29,10 +29,10 @@ PrimitiveType = Union[bool, float, int, str]
29
29
  NonUDFType = Literal[0]
30
30
 
31
31
  class SupportsIAdd(Protocol):
32
- def __iadd__(self, other: SupportsIAdd) -> SupportsIAdd: ...
32
+ def __iadd__(self, other: Any) -> SupportsIAdd: ...
33
33
 
34
34
  class SupportsOrdering(Protocol):
35
- def __lt__(self, other: SupportsOrdering) -> bool: ...
35
+ def __lt__(self, other: Any) -> bool: ...
36
36
 
37
37
  class SizedIterable(Protocol, Sized, Iterable[T_co]): ...
38
38
 
@@ -27,7 +27,7 @@ from pyspark.serializers import read_int, CPickleSerializer
27
27
  from pyspark.errors import PySparkRuntimeError
28
28
 
29
29
  if TYPE_CHECKING:
30
- from pyspark._typing import SupportsIAdd # noqa: F401
30
+ from pyspark._typing import SupportsIAdd
31
31
  import socketserver.BaseRequestHandler # type: ignore[import-not-found]
32
32
 
33
33
 
@@ -30,6 +30,7 @@ from socket import AF_INET, AF_INET6, SOCK_STREAM, SOMAXCONN
30
30
  from signal import SIGHUP, SIGTERM, SIGCHLD, SIG_DFL, SIG_IGN, SIGINT
31
31
 
32
32
  from pyspark.serializers import read_int, write_int, write_with_length, UTF8Deserializer
33
+ from pyspark.util import enable_faulthandler
33
34
  from pyspark.errors import PySparkRuntimeError
34
35
 
35
36
 
@@ -226,53 +227,54 @@ def manager():
226
227
 
227
228
  if pid == 0:
228
229
  # in child process
229
- if poller is not None:
230
- poller.unregister(0)
231
- poller.unregister(listen_sock)
232
- listen_sock.close()
230
+ with enable_faulthandler():
231
+ if poller is not None:
232
+ poller.unregister(0)
233
+ poller.unregister(listen_sock)
234
+ listen_sock.close()
233
235
 
234
- # It should close the standard input in the child process so that
235
- # Python native function executions stay intact.
236
- #
237
- # Note that if we just close the standard input (file descriptor 0),
238
- # the lowest file descriptor (file descriptor 0) will be allocated,
239
- # later when other file descriptors should happen to open.
240
- #
241
- # Therefore, here we redirects it to '/dev/null' by duplicating
242
- # another file descriptor for '/dev/null' to the standard input (0).
243
- # See SPARK-26175.
244
- devnull = open(os.devnull, "r")
245
- os.dup2(devnull.fileno(), 0)
246
- devnull.close()
236
+ # It should close the standard input in the child process so that
237
+ # Python native function executions stay intact.
238
+ #
239
+ # Note that if we just close the standard input (file descriptor 0),
240
+ # the lowest file descriptor (file descriptor 0) will be allocated,
241
+ # later when other file descriptors should happen to open.
242
+ #
243
+ # Therefore, here we redirects it to '/dev/null' by duplicating
244
+ # another file descriptor for '/dev/null' to the standard input (0).
245
+ # See SPARK-26175.
246
+ devnull = open(os.devnull, "r")
247
+ os.dup2(devnull.fileno(), 0)
248
+ devnull.close()
247
249
 
248
- try:
249
- # Acknowledge that the fork was successful
250
- outfile = sock.makefile(mode="wb")
251
- write_int(os.getpid(), outfile)
252
- outfile.flush()
253
- outfile.close()
254
- authenticated = (
255
- os.environ.get("PYTHON_UNIX_DOMAIN_ENABLED", "false").lower() == "true"
256
- or False
257
- )
258
- while True:
259
- code = worker(sock, authenticated)
260
- if code == 0:
261
- authenticated = True
262
- if not reuse or code:
263
- # wait for closing
264
- try:
265
- while sock.recv(1024):
250
+ try:
251
+ # Acknowledge that the fork was successful
252
+ outfile = sock.makefile(mode="wb")
253
+ write_int(os.getpid(), outfile)
254
+ outfile.flush()
255
+ outfile.close()
256
+ authenticated = (
257
+ os.environ.get("PYTHON_UNIX_DOMAIN_ENABLED", "false").lower()
258
+ == "true"
259
+ )
260
+ while True:
261
+ code = worker(sock, authenticated)
262
+ if code == 0:
263
+ authenticated = True
264
+ if not reuse or code:
265
+ # wait for closing
266
+ try:
267
+ while sock.recv(1024):
268
+ pass
269
+ except Exception:
266
270
  pass
267
- except Exception:
268
- pass
269
- break
270
- gc.collect()
271
- except BaseException:
272
- traceback.print_exc()
273
- os._exit(1)
274
- else:
275
- os._exit(0)
271
+ break
272
+ gc.collect()
273
+ except BaseException:
274
+ traceback.print_exc()
275
+ os._exit(1)
276
+ else:
277
+ os._exit(0)
276
278
  else:
277
279
  sock.close()
278
280
 
@@ -18,7 +18,7 @@
18
18
  """
19
19
  PySpark exceptions.
20
20
  """
21
- from pyspark.errors.exceptions.base import ( # noqa: F401
21
+ from pyspark.errors.exceptions.base import (
22
22
  PySparkException,
23
23
  AnalysisException,
24
24
  SessionNotSameException,
@@ -465,6 +465,11 @@
465
465
  "Parameter value <arg_name> must be a valid UUID format: <origin>"
466
466
  ]
467
467
  },
468
+ "INVALID_STREAMING_SOURCE_NAME": {
469
+ "message": [
470
+ "Invalid streaming source name '<source_name>'. Source names must contain only ASCII letters, digits, and underscores."
471
+ ]
472
+ },
468
473
  "INVALID_TIMEOUT_TIMESTAMP": {
469
474
  "message": [
470
475
  "Timeout timestamp (<timestamp>) cannot be earlier than the current watermark (<watermark>)."
@@ -551,6 +556,12 @@
551
556
  "<arg1> and <arg2> should be of the same length, got <arg1_length> and <arg2_length>."
552
557
  ]
553
558
  },
559
+ "LOCAL_RELATION_SIZE_LIMIT_EXCEEDED": {
560
+ "message": [
561
+ "Local relation size (<actualSize> bytes) exceeds the limit (<sizeLimit> bytes)."
562
+ ],
563
+ "sqlState": "54000"
564
+ },
554
565
  "MALFORMED_GEOGRAPHY": {
555
566
  "message": [
556
567
  "Geography binary is malformed. Please check the data source is valid."
@@ -1174,6 +1185,11 @@
1174
1185
  "SparkContext or SparkSession should be created first."
1175
1186
  ]
1176
1187
  },
1188
+ "SIMPLE_STREAM_READER_OFFSET_DID_NOT_ADVANCE": {
1189
+ "message": [
1190
+ "SimpleDataSourceStreamReader.read() returned a non-empty batch but the end offset: <end_offset> did not advance past the start offset: <start_offset>. The end offset must represent the position after the last record returned."
1191
+ ]
1192
+ },
1177
1193
  "SLICE_WITH_STEP": {
1178
1194
  "message": [
1179
1195
  "Slice with step is not supported."
@@ -1237,12 +1253,12 @@
1237
1253
  "Return type of the user-defined function should be <expected>, but is <actual>."
1238
1254
  ]
1239
1255
  },
1240
- "UDTF_ARROW_TYPE_CAST_ERROR": {
1256
+ "UDTF_ARROW_DATA_CONVERSION_ERROR": {
1241
1257
  "message": [
1242
- "Cannot convert the output value of the column '<col_name>' with type '<col_type>' to the specified return type of the column: '<arrow_type>'. Please check if the data types match and try again."
1258
+ "Cannot convert UDTF output to Arrow. Data: <data>. Schema: <schema>. Arrow Schema: <arrow_schema>."
1243
1259
  ]
1244
1260
  },
1245
- "UDTF_ARROW_TYPE_CONVERSION_ERROR": {
1261
+ "UDTF_ARROW_TYPE_CONVERSION_ERROR": {
1246
1262
  "message": [
1247
1263
  "PyArrow UDTF must return an iterator of pyarrow.Table or pyarrow.RecordBatch objects."
1248
1264
  ]
@@ -1467,6 +1483,12 @@
1467
1483
  "Value for `<arg_name>` must be between <lower_bound> and <upper_bound> (inclusive), got <actual>"
1468
1484
  ]
1469
1485
  },
1486
+ "WKB_PARSE_ERROR" : {
1487
+ "message" : [
1488
+ "Error parsing WKB: <parseError> at position <pos>"
1489
+ ],
1490
+ "sqlState" : "22023"
1491
+ },
1470
1492
  "WRONG_NUM_ARGS_FOR_HIGHER_ORDER_FUNCTION": {
1471
1493
  "message": [
1472
1494
  "Function `<func_name>` should take between 1 and 3 arguments, but the provided function takes <num_args>."
@@ -234,25 +234,13 @@ def _convert_exception(e: "Py4JJavaError") -> CapturedException:
234
234
  return SparkUpgradeException(origin=e)
235
235
  elif is_instance_of(gw, e, "org.apache.spark.SparkNoSuchElementException"):
236
236
  return SparkNoSuchElementException(origin=e)
237
-
238
- c: "Py4JJavaError" = e.getCause()
239
- stacktrace: str = getattr(jvm, "org.apache.spark.util.Utils").exceptionString(e)
240
- if c is not None and (
241
- is_instance_of(gw, c, "org.apache.spark.api.python.PythonException")
242
- # To make sure this only catches Python UDFs.
243
- and any(
244
- map(
245
- lambda v: "org.apache.spark.sql.execution.python" in v.toString(), c.getStackTrace()
246
- )
247
- )
248
- ):
249
- msg = (
250
- "\n An exception was thrown from the Python worker. "
251
- "Please see the stack trace below.\n%s" % c.getMessage()
252
- )
253
- return PythonException(msg, stacktrace)
254
-
255
- return UnknownException(desc=e.toString(), stackTrace=stacktrace, cause=c)
237
+ elif is_instance_of(gw, e, "org.apache.spark.api.python.PythonException"):
238
+ return PythonException(origin=e)
239
+ return UnknownException(
240
+ desc=e.toString(),
241
+ stackTrace=getattr(jvm, "org.apache.spark.util.Utils").exceptionString(e),
242
+ cause=e.getCause(),
243
+ )
256
244
 
257
245
 
258
246
  def capture_sql_exception(f: Callable[..., Any]) -> Callable[..., Any]:
@@ -348,6 +336,17 @@ class PythonException(CapturedException, BasePythonException):
348
336
  Exceptions thrown from Python workers.
349
337
  """
350
338
 
339
+ def __str__(self) -> str:
340
+ messageParameters = self.getMessageParameters()
341
+
342
+ if (
343
+ messageParameters is None
344
+ or "msg" not in messageParameters
345
+ or "traceback" not in messageParameters
346
+ ):
347
+ return super().__str__()
348
+ return f"{messageParameters['msg']}:\n{messageParameters['traceback'].strip()}"
349
+
351
350
 
352
351
  class ArithmeticException(CapturedException, BaseArithmeticException):
353
352
  """
@@ -61,43 +61,89 @@ def convert_exception(
61
61
  display_server_stacktrace: bool = False,
62
62
  grpc_status_code: grpc.StatusCode = StatusCode.UNKNOWN,
63
63
  ) -> SparkConnectException:
64
+ raw_classes = info.metadata.get("classes")
65
+ classes: List[str] = json.loads(raw_classes) if raw_classes else []
66
+ raw_message_parameters = info.metadata.get("messageParameters")
67
+ message_parameters: Dict[str, str] = (
68
+ json.loads(raw_message_parameters) if raw_message_parameters else {}
69
+ )
70
+ root_error_idx = (
71
+ resp.root_error_idx if resp is not None and resp.HasField("root_error_idx") else None
72
+ )
64
73
  converted = _convert_exception(
65
- info, truncated_message, resp, display_server_stacktrace, grpc_status_code
74
+ classes=classes,
75
+ sql_state=info.metadata.get("sqlState"),
76
+ error_class=info.metadata.get("errorClass"),
77
+ reason=info.reason,
78
+ root_error_idx=root_error_idx,
79
+ errors=list(resp.errors) if resp is not None else None,
80
+ truncated_message=truncated_message,
81
+ truncated_message_parameters=message_parameters,
82
+ truncated_stacktrace=info.metadata.get("stackTrace"),
83
+ display_server_stacktrace=display_server_stacktrace,
84
+ grpc_status_code=grpc_status_code,
66
85
  )
67
86
  return recover_python_exception(converted)
68
87
 
69
88
 
89
+ def convert_observation_errors(
90
+ root_error_idx: int,
91
+ errors: List["pb2.FetchErrorDetailsResponse.Error"],
92
+ ) -> SparkConnectException:
93
+ """
94
+ Convert observation error payload (root_error_idx + list of Error from ObservedMetrics)
95
+ to a SparkConnectException.
96
+ """
97
+ if root_error_idx < 0 or root_error_idx >= len(errors):
98
+ return SparkConnectException("Observation error: invalid root_error_idx")
99
+
100
+ if len(errors) == 0:
101
+ return SparkConnectException("Observation error: no errors")
102
+
103
+ root_error = errors[root_error_idx]
104
+
105
+ return _convert_exception(
106
+ classes=list(root_error.error_type_hierarchy),
107
+ sql_state=root_error.spark_throwable.sql_state
108
+ if root_error.spark_throwable.HasField("sql_state")
109
+ else None,
110
+ error_class=root_error.spark_throwable.error_class
111
+ if root_error.spark_throwable.HasField("error_class")
112
+ else None,
113
+ reason=None,
114
+ root_error_idx=root_error_idx,
115
+ errors=errors,
116
+ truncated_message="",
117
+ truncated_message_parameters=None,
118
+ truncated_stacktrace=None,
119
+ )
120
+
121
+
70
122
  def _convert_exception(
71
- info: "ErrorInfo",
123
+ classes: List[str],
124
+ sql_state: Optional[str],
125
+ error_class: Optional[str],
126
+ reason: Optional[str],
127
+ root_error_idx: Optional[int],
128
+ errors: Optional[List["pb2.FetchErrorDetailsResponse.Error"]],
72
129
  truncated_message: str,
73
- resp: Optional["pb2.FetchErrorDetailsResponse"],
130
+ truncated_message_parameters: Optional[Dict[str, str]],
131
+ truncated_stacktrace: Optional[str],
74
132
  display_server_stacktrace: bool = False,
75
133
  grpc_status_code: grpc.StatusCode = StatusCode.UNKNOWN,
76
134
  ) -> SparkConnectException:
77
135
  import pyspark.sql.connect.proto as pb2
78
136
 
79
- raw_classes = info.metadata.get("classes")
80
- classes: List[str] = json.loads(raw_classes) if raw_classes else []
81
- sql_state = info.metadata.get("sqlState")
82
- error_class = info.metadata.get("errorClass")
83
- raw_message_parameters = info.metadata.get("messageParameters")
84
- message_parameters: Dict[str, str] = (
85
- json.loads(raw_message_parameters) if raw_message_parameters else {}
86
- )
87
- stacktrace: Optional[str] = None
88
-
89
- if resp is not None and resp.HasField("root_error_idx"):
90
- message = resp.errors[resp.root_error_idx].message
91
- stacktrace = _extract_jvm_stacktrace(resp)
92
- else:
93
- message = truncated_message
94
- stacktrace = info.metadata.get("stackTrace")
95
- display_server_stacktrace = display_server_stacktrace if stacktrace else False
96
-
137
+ message = truncated_message
138
+ stacktrace = truncated_stacktrace
139
+ message_parameters = truncated_message_parameters
97
140
  contexts = None
98
141
  breaking_change_info = None
99
- if resp and resp.HasField("root_error_idx"):
100
- root_error = resp.errors[resp.root_error_idx]
142
+
143
+ if root_error_idx is not None and errors is not None:
144
+ root_error = errors[root_error_idx]
145
+ message = root_error.message
146
+ stacktrace = _extract_jvm_stacktrace(root_error_idx, errors)
101
147
  if hasattr(root_error, "spark_throwable"):
102
148
  # Extract errorClass from FetchErrorDetailsResponse if not in metadata
103
149
  if error_class is None and root_error.spark_throwable.HasField("error_class"):
@@ -123,6 +169,8 @@ def _convert_exception(
123
169
  "key": bci.mitigation_config.key,
124
170
  "value": bci.mitigation_config.value,
125
171
  }
172
+ else:
173
+ display_server_stacktrace = display_server_stacktrace if stacktrace else False
126
174
 
127
175
  if "org.apache.spark.api.python.PythonException" in classes:
128
176
  return PythonException(
@@ -150,7 +198,7 @@ def _convert_exception(
150
198
  sql_state=sql_state,
151
199
  server_stacktrace=stacktrace,
152
200
  display_server_stacktrace=display_server_stacktrace,
153
- contexts=contexts,
201
+ contexts=contexts, # type: ignore[arg-type]
154
202
  grpc_status_code=grpc_status_code,
155
203
  breaking_change_info=breaking_change_info,
156
204
  )
@@ -158,22 +206,21 @@ def _convert_exception(
158
206
  # Return UnknownException if there is no matched exception class
159
207
  return UnknownException(
160
208
  message,
161
- reason=info.reason,
209
+ reason=reason,
162
210
  messageParameters=message_parameters,
163
211
  errorClass=error_class,
164
212
  sql_state=sql_state,
165
213
  server_stacktrace=stacktrace,
166
214
  display_server_stacktrace=display_server_stacktrace,
167
- contexts=contexts,
215
+ contexts=contexts, # type: ignore[arg-type]
168
216
  grpc_status_code=grpc_status_code,
169
217
  breaking_change_info=breaking_change_info,
170
218
  )
171
219
 
172
220
 
173
- def _extract_jvm_stacktrace(resp: "pb2.FetchErrorDetailsResponse") -> str:
174
- if len(resp.errors[resp.root_error_idx].stack_trace) == 0:
175
- return ""
176
-
221
+ def _extract_jvm_stacktrace(
222
+ root_error_idx: int, errors: List["pb2.FetchErrorDetailsResponse.Error"]
223
+ ) -> str:
177
224
  lines: List[str] = []
178
225
 
179
226
  def format_stacktrace(error: "pb2.FetchErrorDetailsResponse.Error") -> None:
@@ -190,9 +237,9 @@ def _extract_jvm_stacktrace(resp: "pb2.FetchErrorDetailsResponse") -> str:
190
237
 
191
238
  # If this error has a cause, format that recursively
192
239
  if error.HasField("cause_idx"):
193
- format_stacktrace(resp.errors[error.cause_idx])
240
+ format_stacktrace(errors[error.cause_idx])
194
241
 
195
- format_stacktrace(resp.errors[resp.root_error_idx])
242
+ format_stacktrace(errors[root_error_idx])
196
243
 
197
244
  return "\n".join(lines)
198
245
 
@@ -206,9 +206,7 @@ class Traceback:
206
206
 
207
207
  # noinspection PyBroadException
208
208
  try:
209
- exec(
210
- code, dict(current.tb_frame.f_globals), dict(current.tb_frame.f_locals)
211
- ) # noqa: S102
209
+ exec(code, dict(current.tb_frame.f_globals), dict(current.tb_frame.f_locals))
212
210
  except Exception:
213
211
  next_tb = sys.exc_info()[2].tb_next # type: ignore
214
212
  if top_tb is None:
@@ -271,7 +271,7 @@ def _capture_call_site(depth: int) -> str:
271
271
  import IPython
272
272
 
273
273
  # ipykernel is required for IPython
274
- import ipykernel # type: ignore[import-not-found]
274
+ import ipykernel
275
275
 
276
276
  ipython = IPython.get_ipython()
277
277
  # Filtering out IPython related frames
@@ -44,7 +44,7 @@ Error classes in PySpark
44
44
  This is a list of common, named error classes returned by PySpark which are defined at `error-conditions.json <https://github.com/apache/spark/blob/master/python/pyspark/errors/error-conditions.json>`_.
45
45
 
46
46
  When writing PySpark errors, developers must use an error class from the list. If an appropriate error class is not available, add a new one into the list. For more information, please refer to `Contributing Error and Exception <contributing.rst#contributing-error-and-exception>`_.
47
- """ # noqa
47
+ """
48
48
  with open(output_rst_file_path, "w") as f:
49
49
  f.write(header + "\n\n")
50
50
  for error_key, error_details in ERROR_CLASSES_MAP.items():
@@ -17,6 +17,7 @@
17
17
  import os
18
18
  import re
19
19
  import tarfile
20
+ import time
20
21
  import traceback
21
22
  import urllib.request
22
23
  from shutil import rmtree
@@ -143,7 +144,7 @@ def install_spark(dest, spark_version, hadoop_version, hive_version):
143
144
  tar = None
144
145
  try:
145
146
  print("Downloading %s from:\n- %s" % (pretty_pkg_name, url))
146
- download_to_file(urllib.request.urlopen(url), package_local_path)
147
+ _download_with_retries(url, package_local_path)
147
148
 
148
149
  print("Installing to %s" % dest)
149
150
  tar = tarfile.open(package_local_path, "r:gz")
@@ -171,7 +172,7 @@ def get_preferred_mirrors():
171
172
  for _ in range(3):
172
173
  try:
173
174
  response = urllib.request.urlopen(
174
- "https://www.apache.org/dyn/closer.lua?preferred=true"
175
+ "https://www.apache.org/dyn/closer.lua?preferred=true", timeout=10
175
176
  )
176
177
  mirror_urls.append(response.read().decode("utf-8"))
177
178
  except Exception:
@@ -186,6 +187,40 @@ def get_preferred_mirrors():
186
187
  return list(set(mirror_urls)) + [x for x in default_sites if x not in mirror_urls]
187
188
 
188
189
 
190
+ def _download_with_retries(url, path, max_retries=3, timeout=600):
191
+ """
192
+ Download a file from a URL with retry logic and timeout handling.
193
+
194
+ Parameters
195
+ ----------
196
+ url : str
197
+ The URL to download from.
198
+ path : str
199
+ The local file path to save the downloaded file.
200
+ max_retries : int
201
+ Maximum number of retry attempts per URL.
202
+ timeout : int
203
+ Timeout in seconds for the HTTP request.
204
+ """
205
+ for attempt in range(max_retries):
206
+ try:
207
+ response = urllib.request.urlopen(url, timeout=timeout)
208
+ download_to_file(response, path)
209
+ return
210
+ except Exception as e:
211
+ if os.path.exists(path):
212
+ os.remove(path)
213
+ if attempt < max_retries - 1:
214
+ wait = 2**attempt * 5
215
+ print(
216
+ "Download attempt %d/%d failed: %s. Retrying in %d seconds..."
217
+ % (attempt + 1, max_retries, str(e), wait)
218
+ )
219
+ time.sleep(wait)
220
+ else:
221
+ raise
222
+
223
+
189
224
  def download_to_file(response, path, chunk_size=1024 * 1024):
190
225
  total_size = int(response.info().get("Content-Length").strip())
191
226
  bytes_so_far = 0
@@ -124,7 +124,7 @@ def _attach(
124
124
  logger_module: Union[str, ModuleType],
125
125
  modules: List[ModuleType],
126
126
  classes: List[Type[Any]],
127
- missings: List[Tuple[Type[Any], Type[Any]]],
127
+ missings: List[Tuple[Union[ModuleType, Type[Any]], Type[Any]]],
128
128
  ) -> None:
129
129
  if isinstance(logger_module, str):
130
130
  logger_module = importlib.import_module(logger_module)
@@ -18,6 +18,6 @@
18
18
  """
19
19
  PySpark logging
20
20
  """
21
- from pyspark.logger.logger import PySparkLogger, SPARK_LOG_SCHEMA # noqa: F401
21
+ from pyspark.logger.logger import PySparkLogger, SPARK_LOG_SCHEMA
22
22
 
23
23
  __all__ = ["PySparkLogger", "SPARK_LOG_SCHEMA"]
@@ -223,7 +223,11 @@ def context_provider() -> dict[str, str]:
223
223
  - class_name: Name of the class that initiated the logging if available
224
224
  """
225
225
 
226
- def is_pyspark_module(module_name: str) -> bool:
226
+ def is_pyspark_module(frame: FrameType) -> bool:
227
+ module_name = frame.f_globals.get("__name__", "")
228
+ if module_name == "__main__":
229
+ if (mod := sys.modules.get("__main__", None)) and mod.__spec__:
230
+ module_name = mod.__spec__.name
227
231
  return module_name.startswith("pyspark.") and ".tests." not in module_name
228
232
 
229
233
  bottom: Optional[FrameType] = None
@@ -236,9 +240,8 @@ def context_provider() -> dict[str, str]:
236
240
  if frame:
237
241
  while frame.f_back:
238
242
  f_back = frame.f_back
239
- module_name = f_back.f_globals.get("__name__", "")
240
243
 
241
- if is_pyspark_module(module_name):
244
+ if is_pyspark_module(f_back):
242
245
  if not is_in_pyspark_module:
243
246
  bottom = frame
244
247
  is_in_pyspark_module = True
@@ -16,7 +16,7 @@
16
16
  # specific language governing permissions and limitations
17
17
  # under the License.
18
18
 
19
- from typing import Any, Dict, List, TypeVar, Tuple, Union
19
+ from typing import Any, Dict, List, TYPE_CHECKING, TypeVar, Tuple, Union
20
20
  from typing_extensions import Literal
21
21
 
22
22
  from numpy import ndarray
@@ -24,10 +24,12 @@ from py4j.java_gateway import JavaObject
24
24
 
25
25
  import pyspark.ml.base
26
26
  import pyspark.ml.param
27
- import pyspark.ml.util
28
27
  from pyspark.ml.linalg import Vector
29
28
  import pyspark.ml.wrapper
30
29
 
30
+ if TYPE_CHECKING:
31
+ from scipy.sparse import spmatrix, sparray
32
+
31
33
  ParamMap = Dict[pyspark.ml.param.Param, Any]
32
34
  PipelineStage = Union[pyspark.ml.base.Estimator, pyspark.ml.base.Transformer]
33
35
 
@@ -81,4 +83,4 @@ RankingEvaluatorMetricType = Union[
81
83
  Literal["recallAtK"],
82
84
  ]
83
85
 
84
- VectorLike = Union[ndarray, Vector, List[float], Tuple[float, ...]]
86
+ VectorLike = Union[ndarray, Vector, List[float], Tuple[float, ...], "spmatrix", "sparray", range]
@@ -2253,7 +2253,7 @@ class RandomForestClassifier(
2253
2253
  return self._set(minWeightFractionPerNode=value)
2254
2254
 
2255
2255
 
2256
- class RandomForestClassificationModel(
2256
+ class RandomForestClassificationModel( # type: ignore[misc]
2257
2257
  _TreeEnsembleModel,
2258
2258
  _JavaProbabilisticClassificationModel[Vector],
2259
2259
  _RandomForestClassifierParams,
@@ -155,7 +155,7 @@ class Transformer(Params, metaclass=ABCMeta):
155
155
  ) -> Union[DataFrame, pd.DataFrame]:
156
156
  """
157
157
  Transforms the input dataset.
158
- The dataset can be either pandas dataframe or spark dataframe
158
+ The dataset can be either pandas dataframe or spark dataframe,
159
159
  if it is a spark DataFrame, the result of transformation is a new spark DataFrame
160
160
  that contains all existing columns and output columns with names,
161
161
  If it is a pandas DataFrame, the result of transformation is a shallow copy
@@ -241,7 +241,7 @@ def _validate_and_transform_single_input(
241
241
  # tensor columns
242
242
  if len(batch.columns) == 1:
243
243
  # one tensor column and one expected input, vstack rows
244
- single_input = np.vstack(batch.iloc[:, 0])
244
+ single_input = np.vstack(batch.iloc[:, 0]) # type: ignore[call-overload]
245
245
  else:
246
246
  raise ValueError(
247
247
  "Multiple input columns found, but model expected a single "