pyspark-client 4.1.0.dev2__tar.gz → 4.1.0.dev4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (377) hide show
  1. {pyspark_client-4.1.0.dev2/pyspark_client.egg-info → pyspark_client-4.1.0.dev4}/PKG-INFO +6 -4
  2. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/cloudpickle/__init__.py +1 -1
  3. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/cloudpickle/cloudpickle.py +13 -6
  4. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/errors/error-conditions.json +28 -0
  5. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/errors/exceptions/captured.py +12 -37
  6. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/logger/logger.py +19 -3
  7. pyspark_client-4.1.0.dev4/pyspark/logger/worker_io.py +297 -0
  8. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pandas/supported_api_gen.py +1 -1
  9. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pipelines/__init__.py +2 -0
  10. pyspark_client-4.1.0.dev4/pyspark/pipelines/add_pipeline_analysis_context.py +48 -0
  11. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pipelines/api.py +64 -7
  12. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pipelines/block_connect_access.py +37 -9
  13. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pipelines/cli.py +30 -7
  14. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pipelines/graph_element_registry.py +2 -2
  15. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pipelines/init_cli.py +10 -1
  16. pyspark_client-4.1.0.dev2/pyspark/pipelines/dataset.py → pyspark_client-4.1.0.dev4/pyspark/pipelines/output.py +18 -7
  17. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pipelines/source_code_location.py +28 -0
  18. pyspark_client-4.1.0.dev4/pyspark/pipelines/spark_connect_graph_element_registry.py +154 -0
  19. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pipelines/spark_connect_pipeline.py +4 -0
  20. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/__init__.py +3 -1
  21. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/avro/functions.py +3 -3
  22. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/column.py +53 -0
  23. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/connect/_typing.py +1 -1
  24. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/connect/client/artifact.py +55 -0
  25. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/connect/client/core.py +259 -3
  26. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/connect/column.py +18 -3
  27. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/connect/dataframe.py +22 -9
  28. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/connect/expressions.py +1 -1
  29. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/connect/functions/builtin.py +554 -22
  30. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/connect/group.py +30 -10
  31. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/connect/plan.py +85 -13
  32. pyspark_client-4.1.0.dev4/pyspark/sql/connect/proto/base_pb2.py +275 -0
  33. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/connect/proto/base_pb2.pyi +251 -6
  34. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/connect/proto/base_pb2_grpc.py +55 -0
  35. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/connect/proto/catalog_pb2.py +2 -2
  36. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/connect/proto/commands_pb2.py +2 -2
  37. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/connect/proto/common_pb2.py +17 -15
  38. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/connect/proto/common_pb2.pyi +28 -0
  39. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/connect/proto/example_plugins_pb2.py +2 -2
  40. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/connect/proto/expressions_pb2.py +53 -53
  41. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/connect/proto/ml_common_pb2.py +2 -2
  42. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/connect/proto/ml_pb2.py +2 -2
  43. pyspark_client-4.1.0.dev4/pyspark/sql/connect/proto/pipelines_pb2.py +130 -0
  44. pyspark_client-4.1.0.dev4/pyspark/sql/connect/proto/pipelines_pb2.pyi +1574 -0
  45. pyspark_client-4.1.0.dev4/pyspark/sql/connect/proto/relations_pb2.py +251 -0
  46. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/connect/proto/relations_pb2.pyi +57 -1
  47. pyspark_client-4.1.0.dev4/pyspark/sql/connect/proto/types_pb2.py +109 -0
  48. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/connect/proto/types_pb2.pyi +59 -0
  49. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/connect/session.py +123 -7
  50. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/connect/tvf.py +5 -0
  51. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/connect/types.py +51 -5
  52. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/connect/utils.py +17 -0
  53. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/connect/window.py +4 -1
  54. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/conversion.py +104 -17
  55. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/functions/__init__.py +33 -0
  56. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/functions/builtin.py +1883 -225
  57. pyspark_client-4.1.0.dev4/pyspark/sql/geo_utils.py +103 -0
  58. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/group.py +19 -16
  59. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/pandas/_typing/__init__.pyi +14 -1
  60. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/pandas/functions.py +29 -0
  61. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/pandas/functions.pyi +10 -4
  62. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/pandas/group_ops.py +154 -31
  63. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/pandas/serializers.py +267 -10
  64. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/pandas/typehints.py +197 -3
  65. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/pandas/types.py +124 -2
  66. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/streaming/list_state_client.py +10 -38
  67. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/streaming/proto/StateMessage_pb2.py +4 -4
  68. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/streaming/query.py +5 -2
  69. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/tvf.py +39 -0
  70. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/types.py +410 -1
  71. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/udf.py +19 -10
  72. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/worker/analyze_udtf.py +10 -2
  73. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/worker/commit_data_source_write.py +8 -6
  74. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/worker/create_data_source.py +46 -43
  75. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/worker/data_source_pushdown_filters.py +58 -54
  76. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/worker/plan_data_source_read.py +45 -37
  77. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/worker/python_streaming_sink_runner.py +31 -27
  78. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/worker/write_into_data_source.py +78 -69
  79. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/testing/connectutils.py +56 -7
  80. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/testing/sqlutils.py +20 -0
  81. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/util.py +5 -1
  82. pyspark_client-4.1.0.dev4/pyspark/version.py +1 -0
  83. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/worker.py +298 -70
  84. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4/pyspark_client.egg-info}/PKG-INFO +6 -4
  85. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark_client.egg-info/SOURCES.txt +4 -1
  86. pyspark_client-4.1.0.dev4/pyspark_client.egg-info/requires.txt +8 -0
  87. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/setup.py +5 -2
  88. pyspark_client-4.1.0.dev2/pyspark/pipelines/spark_connect_graph_element_registry.py +0 -111
  89. pyspark_client-4.1.0.dev2/pyspark/sql/connect/proto/base_pb2.py +0 -265
  90. pyspark_client-4.1.0.dev2/pyspark/sql/connect/proto/pipelines_pb2.py +0 -94
  91. pyspark_client-4.1.0.dev2/pyspark/sql/connect/proto/pipelines_pb2.pyi +0 -877
  92. pyspark_client-4.1.0.dev2/pyspark/sql/connect/proto/relations_pb2.py +0 -249
  93. pyspark_client-4.1.0.dev2/pyspark/sql/connect/proto/types_pb2.py +0 -105
  94. pyspark_client-4.1.0.dev2/pyspark/version.py +0 -1
  95. pyspark_client-4.1.0.dev2/pyspark_client.egg-info/requires.txt +0 -7
  96. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/MANIFEST.in +0 -0
  97. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/README.md +0 -0
  98. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/__init__.py +0 -0
  99. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/_globals.py +0 -0
  100. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/_typing.pyi +0 -0
  101. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/accumulators.py +0 -0
  102. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/cloudpickle/cloudpickle_fast.py +0 -0
  103. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/conf.py +0 -0
  104. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/daemon.py +0 -0
  105. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/errors/__init__.py +0 -0
  106. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/errors/error_classes.py +0 -0
  107. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/errors/exceptions/__init__.py +0 -0
  108. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/errors/exceptions/base.py +0 -0
  109. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/errors/exceptions/connect.py +0 -0
  110. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/errors/exceptions/tblib.py +0 -0
  111. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/errors/utils.py +0 -0
  112. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/errors_doc_gen.py +0 -0
  113. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/find_spark_home.py +0 -0
  114. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/install.py +0 -0
  115. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/instrumentation_utils.py +0 -0
  116. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/java_gateway.py +0 -0
  117. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/join.py +0 -0
  118. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/logger/__init__.py +0 -0
  119. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/loose_version.py +0 -0
  120. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/ml/__init__.py +0 -0
  121. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/ml/_typing.pyi +0 -0
  122. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/ml/base.py +0 -0
  123. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/ml/classification.py +0 -0
  124. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/ml/clustering.py +0 -0
  125. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/ml/common.py +0 -0
  126. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/ml/connect/__init__.py +0 -0
  127. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/ml/connect/base.py +0 -0
  128. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/ml/connect/classification.py +0 -0
  129. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/ml/connect/evaluation.py +0 -0
  130. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/ml/connect/feature.py +0 -0
  131. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/ml/connect/functions.py +0 -0
  132. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/ml/connect/io_utils.py +0 -0
  133. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/ml/connect/pipeline.py +0 -0
  134. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/ml/connect/proto.py +0 -0
  135. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/ml/connect/readwrite.py +0 -0
  136. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/ml/connect/serialize.py +0 -0
  137. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/ml/connect/summarizer.py +0 -0
  138. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/ml/connect/tuning.py +0 -0
  139. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/ml/connect/util.py +0 -0
  140. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/ml/deepspeed/__init__.py +0 -0
  141. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/ml/deepspeed/deepspeed_distributor.py +0 -0
  142. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/ml/dl_util.py +0 -0
  143. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/ml/evaluation.py +0 -0
  144. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/ml/feature.py +0 -0
  145. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/ml/fpm.py +0 -0
  146. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/ml/functions.py +0 -0
  147. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/ml/image.py +0 -0
  148. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/ml/linalg/__init__.py +0 -0
  149. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/ml/model_cache.py +0 -0
  150. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/ml/param/__init__.py +0 -0
  151. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/ml/param/_shared_params_code_gen.py +0 -0
  152. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/ml/param/shared.py +0 -0
  153. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/ml/pipeline.py +0 -0
  154. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/ml/recommendation.py +0 -0
  155. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/ml/regression.py +0 -0
  156. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/ml/stat.py +0 -0
  157. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/ml/torch/__init__.py +0 -0
  158. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/ml/torch/data.py +0 -0
  159. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/ml/torch/distributor.py +0 -0
  160. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/ml/torch/log_communication.py +0 -0
  161. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/ml/torch/torch_run_process_wrapper.py +0 -0
  162. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/ml/tree.py +0 -0
  163. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/ml/tuning.py +0 -0
  164. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/ml/util.py +0 -0
  165. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/ml/wrapper.py +0 -0
  166. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/mllib/__init__.py +0 -0
  167. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/mllib/_typing.pyi +0 -0
  168. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/mllib/classification.py +0 -0
  169. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/mllib/clustering.py +0 -0
  170. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/mllib/common.py +0 -0
  171. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/mllib/evaluation.py +0 -0
  172. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/mllib/feature.py +0 -0
  173. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/mllib/fpm.py +0 -0
  174. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/mllib/linalg/__init__.py +0 -0
  175. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/mllib/linalg/distributed.py +0 -0
  176. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/mllib/random.py +0 -0
  177. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/mllib/recommendation.py +0 -0
  178. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/mllib/regression.py +0 -0
  179. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/mllib/stat/KernelDensity.py +0 -0
  180. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/mllib/stat/__init__.py +0 -0
  181. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/mllib/stat/_statistics.py +0 -0
  182. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/mllib/stat/distribution.py +0 -0
  183. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/mllib/stat/test.py +0 -0
  184. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/mllib/tree.py +0 -0
  185. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/mllib/util.py +0 -0
  186. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pandas/__init__.py +0 -0
  187. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pandas/_typing.py +0 -0
  188. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pandas/accessors.py +0 -0
  189. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pandas/base.py +0 -0
  190. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pandas/categorical.py +0 -0
  191. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pandas/config.py +0 -0
  192. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pandas/correlation.py +0 -0
  193. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pandas/data_type_ops/__init__.py +0 -0
  194. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pandas/data_type_ops/base.py +0 -0
  195. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pandas/data_type_ops/binary_ops.py +0 -0
  196. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pandas/data_type_ops/boolean_ops.py +0 -0
  197. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pandas/data_type_ops/categorical_ops.py +0 -0
  198. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pandas/data_type_ops/complex_ops.py +0 -0
  199. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pandas/data_type_ops/date_ops.py +0 -0
  200. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pandas/data_type_ops/datetime_ops.py +0 -0
  201. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pandas/data_type_ops/null_ops.py +0 -0
  202. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pandas/data_type_ops/num_ops.py +0 -0
  203. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pandas/data_type_ops/string_ops.py +0 -0
  204. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pandas/data_type_ops/timedelta_ops.py +0 -0
  205. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pandas/data_type_ops/udt_ops.py +0 -0
  206. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pandas/datetimes.py +0 -0
  207. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pandas/exceptions.py +0 -0
  208. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pandas/extensions.py +0 -0
  209. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pandas/frame.py +0 -0
  210. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pandas/generic.py +0 -0
  211. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pandas/groupby.py +0 -0
  212. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pandas/indexes/__init__.py +0 -0
  213. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pandas/indexes/base.py +0 -0
  214. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pandas/indexes/category.py +0 -0
  215. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pandas/indexes/datetimes.py +0 -0
  216. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pandas/indexes/multi.py +0 -0
  217. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pandas/indexes/timedelta.py +0 -0
  218. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pandas/indexing.py +0 -0
  219. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pandas/internal.py +0 -0
  220. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pandas/missing/__init__.py +0 -0
  221. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pandas/missing/common.py +0 -0
  222. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pandas/missing/frame.py +0 -0
  223. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pandas/missing/general_functions.py +0 -0
  224. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pandas/missing/groupby.py +0 -0
  225. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pandas/missing/indexes.py +0 -0
  226. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pandas/missing/resample.py +0 -0
  227. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pandas/missing/scalars.py +0 -0
  228. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pandas/missing/series.py +0 -0
  229. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pandas/missing/window.py +0 -0
  230. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pandas/mlflow.py +0 -0
  231. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pandas/namespace.py +0 -0
  232. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pandas/numpy_compat.py +0 -0
  233. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pandas/plot/__init__.py +0 -0
  234. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pandas/plot/core.py +0 -0
  235. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pandas/plot/matplotlib.py +0 -0
  236. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pandas/plot/plotly.py +0 -0
  237. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pandas/resample.py +0 -0
  238. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pandas/series.py +0 -0
  239. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pandas/spark/__init__.py +0 -0
  240. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pandas/spark/accessors.py +0 -0
  241. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pandas/spark/utils.py +0 -0
  242. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pandas/sql_formatter.py +0 -0
  243. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pandas/sql_processor.py +0 -0
  244. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pandas/strings.py +0 -0
  245. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pandas/testing.py +0 -0
  246. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pandas/typedef/__init__.py +0 -0
  247. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pandas/typedef/typehints.py +0 -0
  248. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pandas/usage_logging/__init__.py +0 -0
  249. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pandas/usage_logging/usage_logger.py +0 -0
  250. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pandas/utils.py +0 -0
  251. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pandas/window.py +0 -0
  252. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pipelines/block_session_mutations.py +0 -0
  253. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pipelines/flow.py +0 -0
  254. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pipelines/logging_utils.py +0 -0
  255. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/pipelines/type_error_utils.py +0 -0
  256. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/profiler.py +0 -0
  257. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/py.typed +0 -0
  258. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/rddsampler.py +0 -0
  259. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/resource/__init__.py +0 -0
  260. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/resource/information.py +0 -0
  261. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/resource/profile.py +0 -0
  262. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/resource/requests.py +0 -0
  263. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/resultiterable.py +0 -0
  264. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/serializers.py +0 -0
  265. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/shell.py +0 -0
  266. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/shuffle.py +0 -0
  267. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/_typing.pyi +0 -0
  268. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/avro/__init__.py +0 -0
  269. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/catalog.py +0 -0
  270. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/conf.py +0 -0
  271. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/connect/__init__.py +0 -0
  272. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/connect/avro/__init__.py +0 -0
  273. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/connect/avro/functions.py +0 -0
  274. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/connect/catalog.py +0 -0
  275. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/connect/client/__init__.py +0 -0
  276. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/connect/client/reattach.py +0 -0
  277. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/connect/client/retries.py +0 -0
  278. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/connect/conf.py +0 -0
  279. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/connect/conversion.py +0 -0
  280. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/connect/datasource.py +0 -0
  281. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/connect/functions/__init__.py +0 -0
  282. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/connect/functions/partitioning.py +0 -0
  283. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/connect/logging.py +0 -0
  284. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/connect/merge.py +0 -0
  285. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/connect/observation.py +0 -0
  286. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/connect/profiler.py +0 -0
  287. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/connect/proto/__init__.py +0 -0
  288. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/connect/proto/catalog_pb2.pyi +0 -0
  289. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/connect/proto/commands_pb2.pyi +0 -0
  290. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/connect/proto/example_plugins_pb2.pyi +0 -0
  291. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/connect/proto/expressions_pb2.pyi +0 -0
  292. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/connect/proto/ml_common_pb2.pyi +0 -0
  293. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/connect/proto/ml_pb2.pyi +0 -0
  294. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/connect/protobuf/__init__.py +0 -0
  295. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/connect/protobuf/functions.py +0 -0
  296. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/connect/readwriter.py +0 -0
  297. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/connect/resource/__init__.py +0 -0
  298. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/connect/resource/profile.py +0 -0
  299. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/connect/shell/__init__.py +0 -0
  300. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/connect/shell/progress.py +0 -0
  301. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/connect/sql_formatter.py +0 -0
  302. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/connect/streaming/__init__.py +0 -0
  303. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/connect/streaming/query.py +0 -0
  304. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/connect/streaming/readwriter.py +0 -0
  305. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/connect/streaming/worker/__init__.py +0 -0
  306. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/connect/streaming/worker/foreach_batch_worker.py +0 -0
  307. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/connect/streaming/worker/listener_worker.py +0 -0
  308. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/connect/table_arg.py +0 -0
  309. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/connect/udf.py +0 -0
  310. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/connect/udtf.py +0 -0
  311. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/context.py +0 -0
  312. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/dataframe.py +0 -0
  313. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/datasource.py +0 -0
  314. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/datasource_internal.py +0 -0
  315. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/functions/partitioning.py +0 -0
  316. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/internal.py +0 -0
  317. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/merge.py +0 -0
  318. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/metrics.py +0 -0
  319. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/observation.py +0 -0
  320. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/pandas/__init__.py +0 -0
  321. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/pandas/_typing/protocols/__init__.pyi +0 -0
  322. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/pandas/_typing/protocols/frame.pyi +0 -0
  323. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/pandas/_typing/protocols/series.pyi +0 -0
  324. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/pandas/conversion.py +0 -0
  325. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/pandas/map_ops.py +0 -0
  326. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/pandas/utils.py +0 -0
  327. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/plot/__init__.py +0 -0
  328. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/plot/core.py +0 -0
  329. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/plot/plotly.py +0 -0
  330. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/profiler.py +0 -0
  331. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/protobuf/__init__.py +0 -0
  332. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/protobuf/functions.py +0 -0
  333. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/readwriter.py +0 -0
  334. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/session.py +0 -0
  335. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/sql_formatter.py +0 -0
  336. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/streaming/__init__.py +0 -0
  337. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/streaming/listener.py +0 -0
  338. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/streaming/map_state_client.py +0 -0
  339. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/streaming/proto/StateMessage_pb2.pyi +0 -0
  340. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/streaming/proto/__init__.py +0 -0
  341. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/streaming/python_streaming_source_runner.py +0 -0
  342. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/streaming/readwriter.py +0 -0
  343. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/streaming/state.py +0 -0
  344. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/streaming/stateful_processor.py +0 -0
  345. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/streaming/stateful_processor_api_client.py +0 -0
  346. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/streaming/stateful_processor_util.py +0 -0
  347. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/streaming/transform_with_state_driver_worker.py +0 -0
  348. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/streaming/value_state_client.py +0 -0
  349. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/table_arg.py +0 -0
  350. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/tvf_argument.py +0 -0
  351. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/udtf.py +0 -0
  352. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/utils.py +0 -0
  353. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/variant_utils.py +0 -0
  354. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/window.py +0 -0
  355. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/worker/__init__.py +0 -0
  356. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/sql/worker/lookup_data_sources.py +0 -0
  357. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/statcounter.py +0 -0
  358. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/storagelevel.py +0 -0
  359. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/streaming/__init__.py +0 -0
  360. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/streaming/context.py +0 -0
  361. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/streaming/dstream.py +0 -0
  362. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/streaming/kinesis.py +0 -0
  363. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/streaming/listener.py +0 -0
  364. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/streaming/util.py +0 -0
  365. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/taskcontext.py +0 -0
  366. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/testing/__init__.py +0 -0
  367. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/testing/mllibutils.py +0 -0
  368. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/testing/mlutils.py +0 -0
  369. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/testing/objects.py +0 -0
  370. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/testing/pandasutils.py +0 -0
  371. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/testing/streamingutils.py +0 -0
  372. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/testing/utils.py +0 -0
  373. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/traceback_utils.py +0 -0
  374. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark/worker_util.py +0 -0
  375. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark_client.egg-info/dependency_links.txt +0 -0
  376. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/pyspark_client.egg-info/top_level.txt +0 -0
  377. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev4}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pyspark-client
3
- Version: 4.1.0.dev2
3
+ Version: 4.1.0.dev4
4
4
  Summary: Python Spark Connect client for Apache Spark
5
5
  Home-page: https://github.com/apache/spark/tree/master/python
6
6
  Author: Spark Developers
@@ -11,6 +11,7 @@ Classifier: Programming Language :: Python :: 3.10
11
11
  Classifier: Programming Language :: Python :: 3.11
12
12
  Classifier: Programming Language :: Python :: 3.12
13
13
  Classifier: Programming Language :: Python :: 3.13
14
+ Classifier: Programming Language :: Python :: 3.14
14
15
  Classifier: Programming Language :: Python :: Implementation :: CPython
15
16
  Classifier: Programming Language :: Python :: Implementation :: PyPy
16
17
  Classifier: Typing :: Typed
@@ -18,9 +19,10 @@ Requires-Python: >=3.10
18
19
  Description-Content-Type: text/markdown
19
20
  Requires-Dist: pandas>=2.2.0
20
21
  Requires-Dist: pyarrow>=15.0.0
21
- Requires-Dist: grpcio>=1.67.0
22
- Requires-Dist: grpcio-status>=1.67.0
23
- Requires-Dist: googleapis-common-protos>=1.65.0
22
+ Requires-Dist: grpcio>=1.76.0
23
+ Requires-Dist: grpcio-status>=1.76.0
24
+ Requires-Dist: googleapis-common-protos>=1.71.0
25
+ Requires-Dist: zstandard>=0.25.0
24
26
  Requires-Dist: numpy>=1.21
25
27
  Requires-Dist: pyyaml>=3.11
26
28
  Dynamic: author
@@ -3,7 +3,7 @@ from pyspark.cloudpickle.cloudpickle import * # noqa
3
3
 
4
4
  __doc__ = cloudpickle.__doc__
5
5
 
6
- __version__ = "3.1.1"
6
+ __version__ = "3.1.2"
7
7
 
8
8
  __all__ = [ # noqa
9
9
  "__version__",
@@ -783,6 +783,12 @@ def _class_getstate(obj):
783
783
 
784
784
  clsdict.pop("__dict__", None) # unpicklable property object
785
785
 
786
+ if sys.version_info >= (3, 14):
787
+ # PEP-649/749: __annotate_func__ contains a closure that references the class
788
+ # dict. We need to exclude it from pickling. Python will recreate it when
789
+ # __annotations__ is accessed at unpickling time.
790
+ clsdict.pop("__annotate_func__", None)
791
+
786
792
  return (clsdict, {})
787
793
 
788
794
 
@@ -1190,6 +1196,10 @@ def _class_setstate(obj, state):
1190
1196
  for subclass in registry:
1191
1197
  obj.register(subclass)
1192
1198
 
1199
+ # PEP-649/749: During pickling, we excluded the __annotate_func__ attribute but it
1200
+ # will be created by Python. Subsequently, annotations will be recreated when
1201
+ # __annotations__ is accessed.
1202
+
1193
1203
  return obj
1194
1204
 
1195
1205
 
@@ -1301,12 +1311,9 @@ class Pickler(pickle.Pickler):
1301
1311
  def dump(self, obj):
1302
1312
  try:
1303
1313
  return super().dump(obj)
1304
- except RuntimeError as e:
1305
- if len(e.args) > 0 and "recursion" in e.args[0]:
1306
- msg = "Could not pickle object as excessively deep recursion required."
1307
- raise pickle.PicklingError(msg) from e
1308
- else:
1309
- raise
1314
+ except RecursionError as e:
1315
+ msg = "Could not pickle object as excessively deep recursion required."
1316
+ raise pickle.PicklingError(msg) from e
1310
1317
 
1311
1318
  def __init__(self, file, protocol=None, buffer_callback=None):
1312
1319
  if protocol is None:
@@ -549,6 +549,16 @@
549
549
  "<arg1> and <arg2> should be of the same length, got <arg1_length> and <arg2_length>."
550
550
  ]
551
551
  },
552
+ "MALFORMED_GEOGRAPHY": {
553
+ "message": [
554
+ "Geography binary is malformed. Please check the data source is valid."
555
+ ]
556
+ },
557
+ "MALFORMED_GEOMETRY": {
558
+ "message": [
559
+ "Geometry binary is malformed. Please check the data source is valid."
560
+ ]
561
+ },
552
562
  "MALFORMED_VARIANT": {
553
563
  "message": [
554
564
  "Variant binary is malformed. Please check the data source is valid."
@@ -1134,6 +1144,24 @@
1134
1144
  "Cannot serialize the function `<name>`. If you accessed the Spark session, or a DataFrame defined outside of the function, or any object that contains a Spark session, please be aware that they are not allowed in Spark Connect. For `foreachBatch`, please access the Spark session using `df.sparkSession`, where `df` is the first parameter in your `foreachBatch` function. For `StreamingQueryListener`, please access the Spark session using `self.spark`. For details please check out the PySpark doc for `foreachBatch` and `StreamingQueryListener`."
1135
1145
  ]
1136
1146
  },
1147
+ "ST_INVALID_ALGORITHM_VALUE" : {
1148
+ "message" : [
1149
+ "Invalid or unsupported edge interpolation algorithm value: '<alg>'."
1150
+ ],
1151
+ "sqlState" : "22023"
1152
+ },
1153
+ "ST_INVALID_CRS_VALUE" : {
1154
+ "message" : [
1155
+ "Invalid or unsupported CRS (coordinate reference system) value: '<crs>'."
1156
+ ],
1157
+ "sqlState" : "22023"
1158
+ },
1159
+ "ST_INVALID_SRID_VALUE" : {
1160
+ "message" : [
1161
+ "Invalid or unsupported SRID (spatial reference identifier) value: <srid>."
1162
+ ],
1163
+ "sqlState" : "22023"
1164
+ },
1137
1165
  "TEST_CLASS_NOT_COMPILED": {
1138
1166
  "message": [
1139
1167
  "<test_class_path> doesn't exist. Spark sql test classes are not compiled."
@@ -107,7 +107,8 @@ class CapturedException(PySparkException):
107
107
  if self._origin is not None and is_instance_of(
108
108
  gw, self._origin, "org.apache.spark.SparkThrowable"
109
109
  ):
110
- return self._origin.getCondition()
110
+ utils = SparkContext._jvm.PythonErrorUtils # type: ignore[union-attr]
111
+ return utils.getCondition(self._origin)
111
112
  else:
112
113
  return None
113
114
 
@@ -118,7 +119,6 @@ class CapturedException(PySparkException):
118
119
  def getMessageParameters(self) -> Optional[Dict[str, str]]:
119
120
  from pyspark import SparkContext
120
121
  from py4j.java_gateway import is_instance_of
121
- from py4j.protocol import Py4JError
122
122
 
123
123
  assert SparkContext._gateway is not None
124
124
 
@@ -126,38 +126,28 @@ class CapturedException(PySparkException):
126
126
  if self._origin is not None and is_instance_of(
127
127
  gw, self._origin, "org.apache.spark.SparkThrowable"
128
128
  ):
129
- try:
130
- return dict(self._origin.getMessageParameters())
131
- except Py4JError as e:
132
- if "py4j.Py4JException" in str(e) and "Method getMessageParameters" in str(e):
133
- return None
134
- raise e
129
+ utils = SparkContext._jvm.PythonErrorUtils # type: ignore[union-attr]
130
+ return dict(utils.getMessageParameters(self._origin))
135
131
  else:
136
132
  return None
137
133
 
138
134
  def getSqlState(self) -> Optional[str]:
139
135
  from pyspark import SparkContext
140
136
  from py4j.java_gateway import is_instance_of
141
- from py4j.protocol import Py4JError
142
137
 
143
138
  assert SparkContext._gateway is not None
144
139
  gw = SparkContext._gateway
145
140
  if self._origin is not None and is_instance_of(
146
141
  gw, self._origin, "org.apache.spark.SparkThrowable"
147
142
  ):
148
- try:
149
- return self._origin.getSqlState()
150
- except Py4JError as e:
151
- if "py4j.Py4JException" in str(e) and "Method getSqlState" in str(e):
152
- return None
153
- raise e
143
+ utils = SparkContext._jvm.PythonErrorUtils # type: ignore[union-attr]
144
+ return utils.getSqlState(self._origin)
154
145
  else:
155
146
  return None
156
147
 
157
148
  def getMessage(self) -> str:
158
149
  from pyspark import SparkContext
159
150
  from py4j.java_gateway import is_instance_of
160
- from py4j.protocol import Py4JError
161
151
 
162
152
  assert SparkContext._gateway is not None
163
153
  gw = SparkContext._gateway
@@ -165,21 +155,12 @@ class CapturedException(PySparkException):
165
155
  if self._origin is not None and is_instance_of(
166
156
  gw, self._origin, "org.apache.spark.SparkThrowable"
167
157
  ):
168
- try:
169
- error_class = self._origin.getCondition()
170
- except Py4JError as e:
171
- if "py4j.Py4JException" in str(e) and "Method getCondition" in str(e):
172
- return ""
173
- raise e
174
- try:
175
- message_parameters = self._origin.getMessageParameters()
176
- except Py4JError as e:
177
- if "py4j.Py4JException" in str(e) and "Method getMessageParameters" in str(e):
178
- return ""
179
- raise e
158
+ utils = SparkContext._jvm.PythonErrorUtils # type: ignore[union-attr]
159
+ errorClass = utils.getCondition(self._origin)
160
+ messageParameters = utils.getMessageParameters(self._origin)
180
161
 
181
162
  error_message = getattr(gw.jvm, "org.apache.spark.SparkThrowableHelper").getMessage(
182
- error_class, message_parameters
163
+ errorClass, messageParameters
183
164
  )
184
165
 
185
166
  return error_message
@@ -189,7 +170,6 @@ class CapturedException(PySparkException):
189
170
  def getQueryContext(self) -> List[BaseQueryContext]:
190
171
  from pyspark import SparkContext
191
172
  from py4j.java_gateway import is_instance_of
192
- from py4j.protocol import Py4JError
193
173
 
194
174
  assert SparkContext._gateway is not None
195
175
 
@@ -198,13 +178,8 @@ class CapturedException(PySparkException):
198
178
  gw, self._origin, "org.apache.spark.SparkThrowable"
199
179
  ):
200
180
  contexts: List[BaseQueryContext] = []
201
- try:
202
- context = self._origin.getQueryContext()
203
- except Py4JError as e:
204
- if "py4j.Py4JException" in str(e) and "Method getQueryContext" in str(e):
205
- return []
206
- raise e
207
- for q in context:
181
+ utils = SparkContext._jvm.PythonErrorUtils # type: ignore[union-attr]
182
+ for q in utils.getQueryContext(self._origin):
208
183
  if q.contextType().toString() == "SQL":
209
184
  contexts.append(SQLQueryContext(q))
210
185
  else:
@@ -50,6 +50,10 @@ class JSONFormatter(logging.Formatter):
50
50
 
51
51
  default_msec_format = "%s.%03d"
52
52
 
53
+ def __init__(self, ensure_ascii: bool = False):
54
+ super().__init__()
55
+ self._ensure_ascii = ensure_ascii
56
+
53
57
  def format(self, record: logging.LogRecord) -> str:
54
58
  """
55
59
  Format the specified record as a JSON string.
@@ -69,7 +73,7 @@ class JSONFormatter(logging.Formatter):
69
73
  "level": record.levelname,
70
74
  "logger": record.name,
71
75
  "msg": record.getMessage(),
72
- "context": record.__dict__.get("kwargs", {}),
76
+ "context": record.__dict__.get("context", {}),
73
77
  }
74
78
  if record.exc_info:
75
79
  exc_type, exc_value, exc_tb = record.exc_info
@@ -89,7 +93,7 @@ class JSONFormatter(logging.Formatter):
89
93
  "msg": str(exc_value),
90
94
  "stacktrace": structured_stacktrace,
91
95
  }
92
- return json.dumps(log_entry, ensure_ascii=False)
96
+ return json.dumps(log_entry, ensure_ascii=self._ensure_ascii)
93
97
 
94
98
 
95
99
  class PySparkLogger(logging.Logger):
@@ -136,7 +140,19 @@ class PySparkLogger(logging.Logger):
136
140
  """
137
141
 
138
142
  def __init__(self, name: str = "PySparkLogger"):
143
+ from pyspark.logger.worker_io import JSONFormatterWithMarker
144
+
139
145
  super().__init__(name, level=logging.WARN)
146
+
147
+ root_logger = logging.getLogger()
148
+ if any(
149
+ isinstance(h, logging.StreamHandler)
150
+ and isinstance(h.formatter, JSONFormatterWithMarker)
151
+ for h in root_logger.handlers
152
+ ):
153
+ # Likely in the `capture_outputs` context, so don't add a handler
154
+ return
155
+
140
156
  _handler = logging.StreamHandler()
141
157
  self.addHandler(_handler)
142
158
 
@@ -291,7 +307,7 @@ class PySparkLogger(logging.Logger):
291
307
  msg=msg,
292
308
  args=args,
293
309
  exc_info=exc_info,
294
- extra={"kwargs": kwargs},
310
+ extra={"context": kwargs},
295
311
  stack_info=stack_info,
296
312
  stacklevel=stacklevel,
297
313
  )
@@ -0,0 +1,297 @@
1
+ #
2
+ # Licensed to the Apache Software Foundation (ASF) under one or more
3
+ # contributor license agreements. See the NOTICE file distributed with
4
+ # this work for additional information regarding copyright ownership.
5
+ # The ASF licenses this file to You under the Apache License, Version 2.0
6
+ # (the "License"); you may not use this file except in compliance with
7
+ # the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+ #
17
+
18
+ from contextlib import contextmanager
19
+ import inspect
20
+ import io
21
+ import logging
22
+ import os
23
+ import sys
24
+ import time
25
+ from typing import BinaryIO, Callable, Generator, Iterable, Iterator, Optional, TextIO, Union
26
+ from types import FrameType, TracebackType
27
+
28
+ from pyspark.logger.logger import JSONFormatter
29
+
30
+
31
+ class DelegatingTextIOWrapper(TextIO):
32
+ """A TextIO that delegates all operations to another TextIO object."""
33
+
34
+ def __init__(self, delegate: TextIO):
35
+ self._delegate = delegate
36
+
37
+ # Required TextIO properties
38
+ @property
39
+ def encoding(self) -> str:
40
+ return self._delegate.encoding
41
+
42
+ @property
43
+ def errors(self) -> Optional[str]:
44
+ return self._delegate.errors
45
+
46
+ @property
47
+ def newlines(self) -> Optional[Union[str, tuple[str, ...]]]:
48
+ return self._delegate.newlines
49
+
50
+ @property
51
+ def buffer(self) -> BinaryIO:
52
+ return self._delegate.buffer
53
+
54
+ @property
55
+ def mode(self) -> str:
56
+ return self._delegate.mode
57
+
58
+ @property
59
+ def name(self) -> str:
60
+ return self._delegate.name
61
+
62
+ @property
63
+ def line_buffering(self) -> int:
64
+ return self._delegate.line_buffering
65
+
66
+ @property
67
+ def closed(self) -> bool:
68
+ return self._delegate.closed
69
+
70
+ # Iterator protocol
71
+ def __iter__(self) -> Iterator[str]:
72
+ return iter(self._delegate)
73
+
74
+ def __next__(self) -> str:
75
+ return next(self._delegate)
76
+
77
+ # Context manager protocol
78
+ def __enter__(self) -> TextIO:
79
+ return self._delegate.__enter__()
80
+
81
+ def __exit__(
82
+ self,
83
+ exc_type: Optional[type[BaseException]],
84
+ exc_val: Optional[BaseException],
85
+ exc_tb: Optional[TracebackType],
86
+ ) -> None:
87
+ return self._delegate.__exit__(exc_type, exc_val, exc_tb)
88
+
89
+ # Core I/O methods
90
+ def write(self, s: str) -> int:
91
+ return self._delegate.write(s)
92
+
93
+ def writelines(self, lines: Iterable[str]) -> None:
94
+ return self._delegate.writelines(lines)
95
+
96
+ def read(self, size: int = -1) -> str:
97
+ return self._delegate.read(size)
98
+
99
+ def readline(self, size: int = -1) -> str:
100
+ return self._delegate.readline(size)
101
+
102
+ def readlines(self, hint: int = -1) -> list[str]:
103
+ return self._delegate.readlines(hint)
104
+
105
+ # Stream control methods
106
+ def close(self) -> None:
107
+ return self._delegate.close()
108
+
109
+ def flush(self) -> None:
110
+ return self._delegate.flush()
111
+
112
+ def seek(self, offset: int, whence: int = io.SEEK_SET) -> int:
113
+ return self._delegate.seek(offset, whence)
114
+
115
+ def tell(self) -> int:
116
+ return self._delegate.tell()
117
+
118
+ def truncate(self, size: Optional[int] = None) -> int:
119
+ return self._delegate.truncate(size)
120
+
121
+ # Stream capability methods
122
+ def fileno(self) -> int:
123
+ return self._delegate.fileno()
124
+
125
+ def isatty(self) -> bool:
126
+ return self._delegate.isatty()
127
+
128
+ def readable(self) -> bool:
129
+ return self._delegate.readable()
130
+
131
+ def seekable(self) -> bool:
132
+ return self._delegate.seekable()
133
+
134
+ def writable(self) -> bool:
135
+ return self._delegate.writable()
136
+
137
+
138
+ class JSONFormatterWithMarker(JSONFormatter):
139
+ default_microsec_format = "%s.%06d"
140
+
141
+ def __init__(self, marker: str, worker_id: str, context_provider: Callable[[], dict[str, str]]):
142
+ super().__init__(ensure_ascii=True)
143
+ self._marker = marker
144
+ self._worker_id = worker_id
145
+ self._context_provider = context_provider
146
+
147
+ def format(self, record: logging.LogRecord) -> str:
148
+ context = self._context_provider()
149
+ if context:
150
+ context.update(record.__dict__.get("context", {}))
151
+ record.__dict__["context"] = context
152
+ return f"{self._marker}:{self._worker_id}:{super().format(record)}"
153
+
154
+ def formatTime(self, record: logging.LogRecord, datefmt: Optional[str] = None) -> str:
155
+ ct = self.converter(record.created)
156
+ if datefmt:
157
+ s = time.strftime(datefmt, ct)
158
+ else:
159
+ s = time.strftime(self.default_time_format, ct)
160
+ if self.default_microsec_format:
161
+ s = self.default_microsec_format % (
162
+ s,
163
+ int((record.created - int(record.created)) * 1000000),
164
+ )
165
+ elif self.default_msec_format:
166
+ s = self.default_msec_format % (s, record.msecs)
167
+ s = f"{s}{time.strftime('%z', ct)}"
168
+ return s
169
+
170
+
171
+ class JsonOutput(DelegatingTextIOWrapper):
172
+ def __init__(
173
+ self,
174
+ delegate: TextIO,
175
+ json_out: TextIO,
176
+ logger_name: str,
177
+ log_level: int,
178
+ marker: str,
179
+ worker_id: str,
180
+ context_provider: Callable[[], dict[str, str]],
181
+ ):
182
+ super().__init__(delegate)
183
+ self._json_out = json_out
184
+ self._logger_name = logger_name
185
+ self._log_level = log_level
186
+ self._formatter = JSONFormatterWithMarker(marker, worker_id, context_provider)
187
+
188
+ def write(self, s: str) -> int:
189
+ if s.strip():
190
+ log_record = logging.LogRecord(
191
+ name=self._logger_name,
192
+ level=self._log_level,
193
+ pathname=None, # type: ignore[arg-type]
194
+ lineno=None, # type: ignore[arg-type]
195
+ msg=s.strip(),
196
+ args=None,
197
+ exc_info=None,
198
+ func=None,
199
+ sinfo=None,
200
+ )
201
+ self._json_out.write(f"{self._formatter.format(log_record)}\n")
202
+ self._json_out.flush()
203
+ return self._delegate.write(s)
204
+
205
+ def writelines(self, lines: Iterable[str]) -> None:
206
+ # Process each line through our JSON logging logic
207
+ for line in lines:
208
+ self.write(line)
209
+
210
+ def close(self) -> None:
211
+ pass
212
+
213
+
214
+ def context_provider() -> dict[str, str]:
215
+ """
216
+ Provides context information for logging, including caller function name.
217
+ Finds the function name from the bottom of the stack, ignoring Python builtin
218
+ libraries and PySpark modules. Test packages are included.
219
+
220
+ Returns:
221
+ dict[str, str]: A dictionary containing context information including:
222
+ - func_name: Name of the function that initiated the logging
223
+ - class_name: Name of the class that initiated the logging if available
224
+ """
225
+
226
+ def is_pyspark_module(module_name: str) -> bool:
227
+ return module_name.startswith("pyspark.") and ".tests." not in module_name
228
+
229
+ bottom: Optional[FrameType] = None
230
+
231
+ # Get caller function information using inspect
232
+ try:
233
+ frame = inspect.currentframe()
234
+ is_in_pyspark_module = False
235
+
236
+ if frame:
237
+ while frame.f_back:
238
+ f_back = frame.f_back
239
+ module_name = f_back.f_globals.get("__name__", "")
240
+
241
+ if is_pyspark_module(module_name):
242
+ if not is_in_pyspark_module:
243
+ bottom = frame
244
+ is_in_pyspark_module = True
245
+ else:
246
+ is_in_pyspark_module = False
247
+
248
+ frame = f_back
249
+ except Exception:
250
+ # If anything goes wrong with introspection, don't fail the logging
251
+ # Just continue without caller information
252
+ pass
253
+
254
+ context = {}
255
+ if bottom:
256
+ context["func_name"] = bottom.f_code.co_name
257
+ if "self" in bottom.f_locals:
258
+ context["class_name"] = bottom.f_locals["self"].__class__.__name__
259
+ elif "cls" in bottom.f_locals:
260
+ context["class_name"] = bottom.f_locals["cls"].__name__
261
+ return context
262
+
263
+
264
+ @contextmanager
265
+ def capture_outputs(
266
+ context_provider: Callable[[], dict[str, str]] = context_provider
267
+ ) -> Generator[None, None, None]:
268
+ if "PYSPARK_SPARK_SESSION_UUID" in os.environ:
269
+ marker: str = "PYTHON_WORKER_LOGGING"
270
+ worker_id: str = str(os.getpid())
271
+ json_out = original_stdout = sys.stdout
272
+ delegate = original_stderr = sys.stderr
273
+
274
+ handler = logging.StreamHandler(json_out)
275
+ handler.setFormatter(JSONFormatterWithMarker(marker, worker_id, context_provider))
276
+ logger = logging.getLogger()
277
+ try:
278
+ sys.stdout = JsonOutput(
279
+ delegate, json_out, "stdout", logging.INFO, marker, worker_id, context_provider
280
+ )
281
+ sys.stderr = JsonOutput(
282
+ delegate, json_out, "stderr", logging.ERROR, marker, worker_id, context_provider
283
+ )
284
+ logger.addHandler(handler)
285
+ try:
286
+ yield
287
+ finally:
288
+ # Send an empty line to indicate the end of the outputs.
289
+ json_out.write(f"{marker}:{worker_id}:\n")
290
+ json_out.flush()
291
+ finally:
292
+ sys.stdout = original_stdout
293
+ sys.stderr = original_stderr
294
+ logger.removeHandler(handler)
295
+ handler.close()
296
+ else:
297
+ yield
@@ -38,7 +38,7 @@ from pyspark.pandas.exceptions import PandasNotImplementedError
38
38
  MAX_MISSING_PARAMS_SIZE = 5
39
39
  COMMON_PARAMETER_SET = {"kwargs", "args", "cls"}
40
40
  MODULE_GROUP_MATCH = [(pd, ps), (pdw, psw), (pdg, psg)]
41
- PANDAS_LATEST_VERSION = "2.3.2"
41
+ PANDAS_LATEST_VERSION = "2.3.3"
42
42
 
43
43
  RST_HEADER = """
44
44
  =====================
@@ -20,6 +20,7 @@ from pyspark.pipelines.api import (
20
20
  materialized_view,
21
21
  table,
22
22
  temporary_view,
23
+ create_sink,
23
24
  )
24
25
 
25
26
  __all__ = [
@@ -28,4 +29,5 @@ __all__ = [
28
29
  "materialized_view",
29
30
  "table",
30
31
  "temporary_view",
32
+ "create_sink",
31
33
  ]
@@ -0,0 +1,48 @@
1
+ #
2
+ # Licensed to the Apache Software Foundation (ASF) under one or more
3
+ # contributor license agreements. See the NOTICE file distributed with
4
+ # this work for additional information regarding copyright ownership.
5
+ # The ASF licenses this file to You under the Apache License, Version 2.0
6
+ # (the "License"); you may not use this file except in compliance with
7
+ # the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+ #
17
+ from contextlib import contextmanager
18
+ from typing import Generator, Optional
19
+ from pyspark.sql import SparkSession
20
+
21
+ from typing import Any, cast
22
+
23
+
24
+ @contextmanager
25
+ def add_pipeline_analysis_context(
26
+ spark: SparkSession, dataflow_graph_id: str, flow_name: Optional[str]
27
+ ) -> Generator[None, None, None]:
28
+ """
29
+ Context manager that add PipelineAnalysisContext extension to the user context
30
+ used for pipeline specific analysis.
31
+ """
32
+ extension_id = None
33
+ # Cast because mypy seems to think `spark` is a function, not an object.
34
+ # Likely related to SPARK-47544.
35
+ client = cast(Any, spark).client
36
+ try:
37
+ import pyspark.sql.connect.proto as pb2
38
+ from google.protobuf import any_pb2
39
+
40
+ analysis_context = pb2.PipelineAnalysisContext(
41
+ dataflow_graph_id=dataflow_graph_id, flow_name=flow_name
42
+ )
43
+ extension = any_pb2.Any()
44
+ extension.Pack(analysis_context)
45
+ extension_id = client.add_threadlocal_user_context_extension(extension)
46
+ yield
47
+ finally:
48
+ client.remove_user_context_extension(extension_id)