pyspark-client 4.1.0.dev2__tar.gz → 4.1.0.dev3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (370) hide show
  1. {pyspark_client-4.1.0.dev2/pyspark_client.egg-info → pyspark_client-4.1.0.dev3}/PKG-INFO +1 -1
  2. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/errors/error-conditions.json +18 -0
  3. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pipelines/__init__.py +2 -0
  4. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pipelines/api.py +48 -5
  5. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pipelines/cli.py +17 -3
  6. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pipelines/graph_element_registry.py +2 -2
  7. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pipelines/init_cli.py +1 -0
  8. pyspark_client-4.1.0.dev2/pyspark/pipelines/dataset.py → pyspark_client-4.1.0.dev3/pyspark/pipelines/output.py +16 -7
  9. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pipelines/source_code_location.py +28 -0
  10. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pipelines/spark_connect_graph_element_registry.py +68 -31
  11. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pipelines/spark_connect_pipeline.py +4 -0
  12. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/avro/functions.py +3 -3
  13. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/column.py +53 -0
  14. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/_typing.py +1 -1
  15. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/client/artifact.py +55 -0
  16. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/client/core.py +72 -0
  17. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/column.py +18 -3
  18. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/dataframe.py +17 -4
  19. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/expressions.py +1 -1
  20. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/functions/builtin.py +361 -22
  21. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/group.py +12 -2
  22. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/plan.py +79 -12
  23. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/proto/base_pb2.py +120 -116
  24. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/proto/base_pb2.pyi +164 -0
  25. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/proto/base_pb2_grpc.py +55 -0
  26. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/proto/catalog_pb2.py +2 -2
  27. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/proto/commands_pb2.py +2 -2
  28. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/proto/common_pb2.py +17 -15
  29. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/proto/common_pb2.pyi +28 -0
  30. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/proto/example_plugins_pb2.py +2 -2
  31. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/proto/expressions_pb2.py +2 -2
  32. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/proto/ml_common_pb2.py +2 -2
  33. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/proto/ml_pb2.py +2 -2
  34. pyspark_client-4.1.0.dev3/pyspark/sql/connect/proto/pipelines_pb2.py +130 -0
  35. pyspark_client-4.1.0.dev3/pyspark/sql/connect/proto/pipelines_pb2.pyi +1528 -0
  36. pyspark_client-4.1.0.dev3/pyspark/sql/connect/proto/relations_pb2.py +251 -0
  37. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/proto/relations_pb2.pyi +57 -1
  38. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/proto/types_pb2.py +2 -2
  39. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/session.py +78 -7
  40. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/window.py +4 -1
  41. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/conversion.py +30 -17
  42. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/functions/__init__.py +9 -0
  43. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/functions/builtin.py +1044 -102
  44. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/pandas/_typing/__init__.pyi +11 -1
  45. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/pandas/functions.py +10 -0
  46. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/pandas/group_ops.py +72 -17
  47. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/pandas/serializers.py +185 -10
  48. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/pandas/typehints.py +91 -0
  49. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/streaming/list_state_client.py +10 -38
  50. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/streaming/proto/StateMessage_pb2.py +3 -3
  51. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/streaming/query.py +5 -2
  52. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/types.py +204 -1
  53. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/worker/data_source_pushdown_filters.py +3 -1
  54. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/worker/plan_data_source_read.py +7 -2
  55. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/worker/write_into_data_source.py +7 -2
  56. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/testing/connectutils.py +32 -0
  57. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/testing/sqlutils.py +14 -0
  58. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/util.py +2 -0
  59. pyspark_client-4.1.0.dev3/pyspark/version.py +1 -0
  60. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/worker.py +139 -35
  61. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3/pyspark_client.egg-info}/PKG-INFO +1 -1
  62. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark_client.egg-info/SOURCES.txt +1 -1
  63. pyspark_client-4.1.0.dev2/pyspark/sql/connect/proto/pipelines_pb2.py +0 -94
  64. pyspark_client-4.1.0.dev2/pyspark/sql/connect/proto/pipelines_pb2.pyi +0 -877
  65. pyspark_client-4.1.0.dev2/pyspark/sql/connect/proto/relations_pb2.py +0 -249
  66. pyspark_client-4.1.0.dev2/pyspark/version.py +0 -1
  67. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/MANIFEST.in +0 -0
  68. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/README.md +0 -0
  69. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/__init__.py +0 -0
  70. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/_globals.py +0 -0
  71. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/_typing.pyi +0 -0
  72. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/accumulators.py +0 -0
  73. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/cloudpickle/__init__.py +0 -0
  74. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/cloudpickle/cloudpickle.py +0 -0
  75. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/cloudpickle/cloudpickle_fast.py +0 -0
  76. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/conf.py +0 -0
  77. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/daemon.py +0 -0
  78. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/errors/__init__.py +0 -0
  79. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/errors/error_classes.py +0 -0
  80. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/errors/exceptions/__init__.py +0 -0
  81. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/errors/exceptions/base.py +0 -0
  82. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/errors/exceptions/captured.py +0 -0
  83. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/errors/exceptions/connect.py +0 -0
  84. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/errors/exceptions/tblib.py +0 -0
  85. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/errors/utils.py +0 -0
  86. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/errors_doc_gen.py +0 -0
  87. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/find_spark_home.py +0 -0
  88. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/install.py +0 -0
  89. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/instrumentation_utils.py +0 -0
  90. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/java_gateway.py +0 -0
  91. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/join.py +0 -0
  92. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/logger/__init__.py +0 -0
  93. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/logger/logger.py +0 -0
  94. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/loose_version.py +0 -0
  95. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/ml/__init__.py +0 -0
  96. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/ml/_typing.pyi +0 -0
  97. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/ml/base.py +0 -0
  98. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/ml/classification.py +0 -0
  99. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/ml/clustering.py +0 -0
  100. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/ml/common.py +0 -0
  101. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/ml/connect/__init__.py +0 -0
  102. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/ml/connect/base.py +0 -0
  103. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/ml/connect/classification.py +0 -0
  104. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/ml/connect/evaluation.py +0 -0
  105. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/ml/connect/feature.py +0 -0
  106. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/ml/connect/functions.py +0 -0
  107. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/ml/connect/io_utils.py +0 -0
  108. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/ml/connect/pipeline.py +0 -0
  109. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/ml/connect/proto.py +0 -0
  110. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/ml/connect/readwrite.py +0 -0
  111. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/ml/connect/serialize.py +0 -0
  112. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/ml/connect/summarizer.py +0 -0
  113. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/ml/connect/tuning.py +0 -0
  114. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/ml/connect/util.py +0 -0
  115. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/ml/deepspeed/__init__.py +0 -0
  116. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/ml/deepspeed/deepspeed_distributor.py +0 -0
  117. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/ml/dl_util.py +0 -0
  118. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/ml/evaluation.py +0 -0
  119. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/ml/feature.py +0 -0
  120. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/ml/fpm.py +0 -0
  121. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/ml/functions.py +0 -0
  122. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/ml/image.py +0 -0
  123. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/ml/linalg/__init__.py +0 -0
  124. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/ml/model_cache.py +0 -0
  125. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/ml/param/__init__.py +0 -0
  126. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/ml/param/_shared_params_code_gen.py +0 -0
  127. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/ml/param/shared.py +0 -0
  128. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/ml/pipeline.py +0 -0
  129. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/ml/recommendation.py +0 -0
  130. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/ml/regression.py +0 -0
  131. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/ml/stat.py +0 -0
  132. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/ml/torch/__init__.py +0 -0
  133. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/ml/torch/data.py +0 -0
  134. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/ml/torch/distributor.py +0 -0
  135. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/ml/torch/log_communication.py +0 -0
  136. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/ml/torch/torch_run_process_wrapper.py +0 -0
  137. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/ml/tree.py +0 -0
  138. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/ml/tuning.py +0 -0
  139. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/ml/util.py +0 -0
  140. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/ml/wrapper.py +0 -0
  141. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/mllib/__init__.py +0 -0
  142. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/mllib/_typing.pyi +0 -0
  143. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/mllib/classification.py +0 -0
  144. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/mllib/clustering.py +0 -0
  145. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/mllib/common.py +0 -0
  146. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/mllib/evaluation.py +0 -0
  147. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/mllib/feature.py +0 -0
  148. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/mllib/fpm.py +0 -0
  149. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/mllib/linalg/__init__.py +0 -0
  150. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/mllib/linalg/distributed.py +0 -0
  151. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/mllib/random.py +0 -0
  152. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/mllib/recommendation.py +0 -0
  153. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/mllib/regression.py +0 -0
  154. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/mllib/stat/KernelDensity.py +0 -0
  155. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/mllib/stat/__init__.py +0 -0
  156. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/mllib/stat/_statistics.py +0 -0
  157. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/mllib/stat/distribution.py +0 -0
  158. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/mllib/stat/test.py +0 -0
  159. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/mllib/tree.py +0 -0
  160. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/mllib/util.py +0 -0
  161. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pandas/__init__.py +0 -0
  162. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pandas/_typing.py +0 -0
  163. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pandas/accessors.py +0 -0
  164. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pandas/base.py +0 -0
  165. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pandas/categorical.py +0 -0
  166. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pandas/config.py +0 -0
  167. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pandas/correlation.py +0 -0
  168. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pandas/data_type_ops/__init__.py +0 -0
  169. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pandas/data_type_ops/base.py +0 -0
  170. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pandas/data_type_ops/binary_ops.py +0 -0
  171. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pandas/data_type_ops/boolean_ops.py +0 -0
  172. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pandas/data_type_ops/categorical_ops.py +0 -0
  173. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pandas/data_type_ops/complex_ops.py +0 -0
  174. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pandas/data_type_ops/date_ops.py +0 -0
  175. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pandas/data_type_ops/datetime_ops.py +0 -0
  176. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pandas/data_type_ops/null_ops.py +0 -0
  177. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pandas/data_type_ops/num_ops.py +0 -0
  178. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pandas/data_type_ops/string_ops.py +0 -0
  179. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pandas/data_type_ops/timedelta_ops.py +0 -0
  180. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pandas/data_type_ops/udt_ops.py +0 -0
  181. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pandas/datetimes.py +0 -0
  182. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pandas/exceptions.py +0 -0
  183. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pandas/extensions.py +0 -0
  184. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pandas/frame.py +0 -0
  185. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pandas/generic.py +0 -0
  186. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pandas/groupby.py +0 -0
  187. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pandas/indexes/__init__.py +0 -0
  188. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pandas/indexes/base.py +0 -0
  189. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pandas/indexes/category.py +0 -0
  190. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pandas/indexes/datetimes.py +0 -0
  191. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pandas/indexes/multi.py +0 -0
  192. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pandas/indexes/timedelta.py +0 -0
  193. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pandas/indexing.py +0 -0
  194. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pandas/internal.py +0 -0
  195. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pandas/missing/__init__.py +0 -0
  196. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pandas/missing/common.py +0 -0
  197. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pandas/missing/frame.py +0 -0
  198. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pandas/missing/general_functions.py +0 -0
  199. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pandas/missing/groupby.py +0 -0
  200. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pandas/missing/indexes.py +0 -0
  201. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pandas/missing/resample.py +0 -0
  202. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pandas/missing/scalars.py +0 -0
  203. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pandas/missing/series.py +0 -0
  204. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pandas/missing/window.py +0 -0
  205. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pandas/mlflow.py +0 -0
  206. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pandas/namespace.py +0 -0
  207. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pandas/numpy_compat.py +0 -0
  208. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pandas/plot/__init__.py +0 -0
  209. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pandas/plot/core.py +0 -0
  210. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pandas/plot/matplotlib.py +0 -0
  211. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pandas/plot/plotly.py +0 -0
  212. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pandas/resample.py +0 -0
  213. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pandas/series.py +0 -0
  214. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pandas/spark/__init__.py +0 -0
  215. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pandas/spark/accessors.py +0 -0
  216. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pandas/spark/utils.py +0 -0
  217. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pandas/sql_formatter.py +0 -0
  218. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pandas/sql_processor.py +0 -0
  219. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pandas/strings.py +0 -0
  220. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pandas/supported_api_gen.py +0 -0
  221. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pandas/testing.py +0 -0
  222. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pandas/typedef/__init__.py +0 -0
  223. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pandas/typedef/typehints.py +0 -0
  224. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pandas/usage_logging/__init__.py +0 -0
  225. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pandas/usage_logging/usage_logger.py +0 -0
  226. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pandas/utils.py +0 -0
  227. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pandas/window.py +0 -0
  228. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pipelines/block_connect_access.py +0 -0
  229. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pipelines/block_session_mutations.py +0 -0
  230. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pipelines/flow.py +0 -0
  231. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pipelines/logging_utils.py +0 -0
  232. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/pipelines/type_error_utils.py +0 -0
  233. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/profiler.py +0 -0
  234. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/py.typed +0 -0
  235. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/rddsampler.py +0 -0
  236. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/resource/__init__.py +0 -0
  237. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/resource/information.py +0 -0
  238. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/resource/profile.py +0 -0
  239. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/resource/requests.py +0 -0
  240. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/resultiterable.py +0 -0
  241. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/serializers.py +0 -0
  242. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/shell.py +0 -0
  243. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/shuffle.py +0 -0
  244. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/__init__.py +0 -0
  245. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/_typing.pyi +0 -0
  246. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/avro/__init__.py +0 -0
  247. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/catalog.py +0 -0
  248. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/conf.py +0 -0
  249. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/__init__.py +0 -0
  250. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/avro/__init__.py +0 -0
  251. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/avro/functions.py +0 -0
  252. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/catalog.py +0 -0
  253. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/client/__init__.py +0 -0
  254. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/client/reattach.py +0 -0
  255. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/client/retries.py +0 -0
  256. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/conf.py +0 -0
  257. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/conversion.py +0 -0
  258. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/datasource.py +0 -0
  259. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/functions/__init__.py +0 -0
  260. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/functions/partitioning.py +0 -0
  261. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/logging.py +0 -0
  262. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/merge.py +0 -0
  263. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/observation.py +0 -0
  264. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/profiler.py +0 -0
  265. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/proto/__init__.py +0 -0
  266. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/proto/catalog_pb2.pyi +0 -0
  267. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/proto/commands_pb2.pyi +0 -0
  268. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/proto/example_plugins_pb2.pyi +0 -0
  269. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/proto/expressions_pb2.pyi +0 -0
  270. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/proto/ml_common_pb2.pyi +0 -0
  271. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/proto/ml_pb2.pyi +0 -0
  272. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/proto/types_pb2.pyi +0 -0
  273. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/protobuf/__init__.py +0 -0
  274. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/protobuf/functions.py +0 -0
  275. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/readwriter.py +0 -0
  276. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/resource/__init__.py +0 -0
  277. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/resource/profile.py +0 -0
  278. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/shell/__init__.py +0 -0
  279. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/shell/progress.py +0 -0
  280. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/sql_formatter.py +0 -0
  281. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/streaming/__init__.py +0 -0
  282. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/streaming/query.py +0 -0
  283. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/streaming/readwriter.py +0 -0
  284. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/streaming/worker/__init__.py +0 -0
  285. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/streaming/worker/foreach_batch_worker.py +0 -0
  286. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/streaming/worker/listener_worker.py +0 -0
  287. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/table_arg.py +0 -0
  288. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/tvf.py +0 -0
  289. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/types.py +0 -0
  290. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/udf.py +0 -0
  291. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/udtf.py +0 -0
  292. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/connect/utils.py +0 -0
  293. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/context.py +0 -0
  294. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/dataframe.py +0 -0
  295. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/datasource.py +0 -0
  296. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/datasource_internal.py +0 -0
  297. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/functions/partitioning.py +0 -0
  298. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/group.py +0 -0
  299. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/internal.py +0 -0
  300. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/merge.py +0 -0
  301. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/metrics.py +0 -0
  302. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/observation.py +0 -0
  303. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/pandas/__init__.py +0 -0
  304. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/pandas/_typing/protocols/__init__.pyi +0 -0
  305. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/pandas/_typing/protocols/frame.pyi +0 -0
  306. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/pandas/_typing/protocols/series.pyi +0 -0
  307. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/pandas/conversion.py +0 -0
  308. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/pandas/functions.pyi +0 -0
  309. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/pandas/map_ops.py +0 -0
  310. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/pandas/types.py +0 -0
  311. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/pandas/utils.py +0 -0
  312. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/plot/__init__.py +0 -0
  313. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/plot/core.py +0 -0
  314. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/plot/plotly.py +0 -0
  315. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/profiler.py +0 -0
  316. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/protobuf/__init__.py +0 -0
  317. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/protobuf/functions.py +0 -0
  318. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/readwriter.py +0 -0
  319. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/session.py +0 -0
  320. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/sql_formatter.py +0 -0
  321. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/streaming/__init__.py +0 -0
  322. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/streaming/listener.py +0 -0
  323. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/streaming/map_state_client.py +0 -0
  324. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/streaming/proto/StateMessage_pb2.pyi +0 -0
  325. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/streaming/proto/__init__.py +0 -0
  326. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/streaming/python_streaming_source_runner.py +0 -0
  327. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/streaming/readwriter.py +0 -0
  328. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/streaming/state.py +0 -0
  329. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/streaming/stateful_processor.py +0 -0
  330. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/streaming/stateful_processor_api_client.py +0 -0
  331. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/streaming/stateful_processor_util.py +0 -0
  332. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/streaming/transform_with_state_driver_worker.py +0 -0
  333. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/streaming/value_state_client.py +0 -0
  334. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/table_arg.py +0 -0
  335. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/tvf.py +0 -0
  336. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/tvf_argument.py +0 -0
  337. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/udf.py +0 -0
  338. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/udtf.py +0 -0
  339. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/utils.py +0 -0
  340. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/variant_utils.py +0 -0
  341. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/window.py +0 -0
  342. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/worker/__init__.py +0 -0
  343. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/worker/analyze_udtf.py +0 -0
  344. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/worker/commit_data_source_write.py +0 -0
  345. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/worker/create_data_source.py +0 -0
  346. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/worker/lookup_data_sources.py +0 -0
  347. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/sql/worker/python_streaming_sink_runner.py +0 -0
  348. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/statcounter.py +0 -0
  349. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/storagelevel.py +0 -0
  350. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/streaming/__init__.py +0 -0
  351. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/streaming/context.py +0 -0
  352. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/streaming/dstream.py +0 -0
  353. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/streaming/kinesis.py +0 -0
  354. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/streaming/listener.py +0 -0
  355. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/streaming/util.py +0 -0
  356. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/taskcontext.py +0 -0
  357. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/testing/__init__.py +0 -0
  358. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/testing/mllibutils.py +0 -0
  359. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/testing/mlutils.py +0 -0
  360. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/testing/objects.py +0 -0
  361. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/testing/pandasutils.py +0 -0
  362. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/testing/streamingutils.py +0 -0
  363. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/testing/utils.py +0 -0
  364. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/traceback_utils.py +0 -0
  365. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark/worker_util.py +0 -0
  366. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark_client.egg-info/dependency_links.txt +0 -0
  367. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark_client.egg-info/requires.txt +0 -0
  368. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/pyspark_client.egg-info/top_level.txt +0 -0
  369. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/setup.cfg +0 -0
  370. {pyspark_client-4.1.0.dev2 → pyspark_client-4.1.0.dev3}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pyspark-client
3
- Version: 4.1.0.dev2
3
+ Version: 4.1.0.dev3
4
4
  Summary: Python Spark Connect client for Apache Spark
5
5
  Home-page: https://github.com/apache/spark/tree/master/python
6
6
  Author: Spark Developers
@@ -1134,6 +1134,24 @@
1134
1134
  "Cannot serialize the function `<name>`. If you accessed the Spark session, or a DataFrame defined outside of the function, or any object that contains a Spark session, please be aware that they are not allowed in Spark Connect. For `foreachBatch`, please access the Spark session using `df.sparkSession`, where `df` is the first parameter in your `foreachBatch` function. For `StreamingQueryListener`, please access the Spark session using `self.spark`. For details please check out the PySpark doc for `foreachBatch` and `StreamingQueryListener`."
1135
1135
  ]
1136
1136
  },
1137
+ "ST_INVALID_ALGORITHM_VALUE" : {
1138
+ "message" : [
1139
+ "Invalid or unsupported edge interpolation algorithm value: '<alg>'."
1140
+ ],
1141
+ "sqlState" : "22023"
1142
+ },
1143
+ "ST_INVALID_CRS_VALUE" : {
1144
+ "message" : [
1145
+ "Invalid or unsupported CRS (coordinate reference system) value: '<crs>'."
1146
+ ],
1147
+ "sqlState" : "22023"
1148
+ },
1149
+ "ST_INVALID_SRID_VALUE" : {
1150
+ "message" : [
1151
+ "Invalid or unsupported SRID (spatial reference identifier) value: <srid>."
1152
+ ],
1153
+ "sqlState" : "22023"
1154
+ },
1137
1155
  "TEST_CLASS_NOT_COMPILED": {
1138
1156
  "message": [
1139
1157
  "<test_class_path> doesn't exist. Spark sql test classes are not compiled."
@@ -20,6 +20,7 @@ from pyspark.pipelines.api import (
20
20
  materialized_view,
21
21
  table,
22
22
  temporary_view,
23
+ create_sink,
23
24
  )
24
25
 
25
26
  __all__ = [
@@ -28,4 +29,5 @@ __all__ = [
28
29
  "materialized_view",
29
30
  "table",
30
31
  "temporary_view",
32
+ "create_sink",
31
33
  ]
@@ -23,10 +23,11 @@ from pyspark.pipelines.flow import Flow, QueryFunction
23
23
  from pyspark.pipelines.source_code_location import (
24
24
  get_caller_source_code_location,
25
25
  )
26
- from pyspark.pipelines.dataset import (
26
+ from pyspark.pipelines.output import (
27
27
  MaterializedView,
28
28
  StreamingTable,
29
29
  TemporaryView,
30
+ Sink,
30
31
  )
31
32
  from pyspark.sql.types import StructType
32
33
 
@@ -156,7 +157,7 @@ def table(
156
157
 
157
158
  resolved_name = name or decorated.__name__
158
159
  registry = get_active_graph_element_registry()
159
- registry.register_dataset(
160
+ registry.register_output(
160
161
  StreamingTable(
161
162
  comment=comment,
162
163
  name=resolved_name,
@@ -258,7 +259,7 @@ def materialized_view(
258
259
 
259
260
  resolved_name = name or decorated.__name__
260
261
  registry = get_active_graph_element_registry()
261
- registry.register_dataset(
262
+ registry.register_output(
262
263
  MaterializedView(
263
264
  comment=comment,
264
265
  name=resolved_name,
@@ -351,7 +352,7 @@ def temporary_view(
351
352
 
352
353
  resolved_name = name or decorated.__name__
353
354
  registry = get_active_graph_element_registry()
354
- registry.register_dataset(
355
+ registry.register_output(
355
356
  TemporaryView(
356
357
  comment=comment,
357
358
  name=resolved_name,
@@ -446,4 +447,46 @@ def create_streaming_table(
446
447
  schema=schema,
447
448
  format=format,
448
449
  )
449
- get_active_graph_element_registry().register_dataset(table)
450
+ get_active_graph_element_registry().register_output(table)
451
+
452
+
453
+ def create_sink(
454
+ name: str,
455
+ format: str,
456
+ options: Optional[Dict[str, str]] = None,
457
+ ) -> None:
458
+ """
459
+ Creates a sink that can be targeted by streaming flows, providing a generic destination \
460
+ for flows to send data external to the pipeline.
461
+
462
+ :param name: The name of the sink.
463
+ :param format: The format of the sink, e.g. "parquet".
464
+ :param options: A dict where the keys are the property names and the values are the \
465
+ property values. These properties will be set on the sink.
466
+ """
467
+ if type(name) is not str:
468
+ raise PySparkTypeError(
469
+ errorClass="NOT_STR",
470
+ messageParameters={"arg_name": "name", "arg_type": type(name).__name__},
471
+ )
472
+ if type(format) is not str:
473
+ raise PySparkTypeError(
474
+ errorClass="NOT_STR",
475
+ messageParameters={"arg_name": "format", "arg_type": type(format).__name__},
476
+ )
477
+ if options is not None and not isinstance(options, dict):
478
+ raise PySparkTypeError(
479
+ errorClass="NOT_DICT",
480
+ messageParameters={
481
+ "arg_name": "options",
482
+ "arg_type": type(options).__name__,
483
+ },
484
+ )
485
+ sink = Sink(
486
+ name=name,
487
+ format=format,
488
+ options=options or {},
489
+ source_code_location=get_caller_source_code_location(stacklevel=1),
490
+ comment=None,
491
+ )
492
+ get_active_graph_element_registry().register_output(sink)
@@ -90,6 +90,7 @@ class PipelineSpec:
90
90
  """Spec for a pipeline.
91
91
 
92
92
  :param name: The name of the pipeline.
93
+ :param storage: The root directory for storing metadata, such as streaming checkpoints.
93
94
  :param catalog: The default catalog to use for the pipeline.
94
95
  :param database: The default database to use for the pipeline.
95
96
  :param configuration: A dictionary of Spark configuration properties to set for the pipeline.
@@ -97,6 +98,7 @@ class PipelineSpec:
97
98
  """
98
99
 
99
100
  name: str
101
+ storage: str
100
102
  catalog: Optional[str]
101
103
  database: Optional[str]
102
104
  configuration: Mapping[str, str]
@@ -150,8 +152,16 @@ def load_pipeline_spec(spec_path: Path) -> PipelineSpec:
150
152
 
151
153
 
152
154
  def unpack_pipeline_spec(spec_data: Mapping[str, Any]) -> PipelineSpec:
153
- ALLOWED_FIELDS = {"name", "catalog", "database", "schema", "configuration", "libraries"}
154
- REQUIRED_FIELDS = ["name"]
155
+ ALLOWED_FIELDS = {
156
+ "name",
157
+ "storage",
158
+ "catalog",
159
+ "database",
160
+ "schema",
161
+ "configuration",
162
+ "libraries",
163
+ }
164
+ REQUIRED_FIELDS = ["name", "storage"]
155
165
  for key in spec_data.keys():
156
166
  if key not in ALLOWED_FIELDS:
157
167
  raise PySparkException(
@@ -167,6 +177,7 @@ def unpack_pipeline_spec(spec_data: Mapping[str, Any]) -> PipelineSpec:
167
177
 
168
178
  return PipelineSpec(
169
179
  name=spec_data["name"],
180
+ storage=spec_data["storage"],
170
181
  catalog=spec_data.get("catalog"),
171
182
  database=spec_data.get("database", spec_data.get("schema")),
172
183
  configuration=validate_str_dict(spec_data.get("configuration", {}), "configuration"),
@@ -295,7 +306,9 @@ def run(
295
306
  spec = load_pipeline_spec(spec_path)
296
307
 
297
308
  log_with_curr_timestamp("Creating Spark session...")
298
- spark_builder = SparkSession.builder
309
+ spark_builder = SparkSession.builder.config(
310
+ "spark.sql.connect.serverStacktrace.enabled", "false"
311
+ )
299
312
  for key, value in spec.configuration.items():
300
313
  spark_builder = spark_builder.config(key, value)
301
314
 
@@ -321,6 +334,7 @@ def run(
321
334
  full_refresh_all=full_refresh_all,
322
335
  refresh=refresh,
323
336
  dry=dry,
337
+ storage=spec.storage,
324
338
  )
325
339
  try:
326
340
  handle_pipeline_events(result_iter)
@@ -18,7 +18,7 @@
18
18
  from abc import ABC, abstractmethod
19
19
  from pathlib import Path
20
20
 
21
- from pyspark.pipelines.dataset import Dataset
21
+ from pyspark.pipelines.output import Output
22
22
  from pyspark.pipelines.flow import Flow
23
23
  from contextlib import contextmanager
24
24
  from contextvars import ContextVar
@@ -35,7 +35,7 @@ class GraphElementRegistry(ABC):
35
35
  """
36
36
 
37
37
  @abstractmethod
38
- def register_dataset(self, dataset: Dataset) -> None:
38
+ def register_output(self, output: Output) -> None:
39
39
  """Add the given dataset to the registry."""
40
40
 
41
41
  @abstractmethod
@@ -19,6 +19,7 @@ from pathlib import Path
19
19
 
20
20
  SPEC = """
21
21
  name: {{ name }}
22
+ storage: storage-root
22
23
  libraries:
23
24
  - glob:
24
25
  include: transformations/**
@@ -22,12 +22,12 @@ from pyspark.sql.types import StructType
22
22
 
23
23
 
24
24
  @dataclass(frozen=True)
25
- class Dataset:
26
- """Base class for definitions of datasets in a pipeline dataflow graph.
25
+ class Output:
26
+ """Base class for definitions of outputs in a pipeline dataflow graph.
27
27
 
28
- :param name: The name of the dataset. May be a multi-part name, such as "db.table".
29
- :param comment: Optional comment for the dataset.
30
- :param source_code_location: The location of the source code that created this dataset.
28
+ :param name: The name of the outputs. May be a multi-part name, such as "db.table".
29
+ :param comment: Optional comment for the output.
30
+ :param source_code_location: The location of the source code that created this output.
31
31
  This is used for debugging and tracing purposes.
32
32
  """
33
33
 
@@ -37,7 +37,7 @@ class Dataset:
37
37
 
38
38
 
39
39
  @dataclass(frozen=True)
40
- class Table(Dataset):
40
+ class Table(Output):
41
41
  """
42
42
  Definition of a table in a pipeline dataflow graph, i.e. a catalog object backed by data in
43
43
  physical storage.
@@ -69,8 +69,17 @@ class StreamingTable(Table):
69
69
 
70
70
 
71
71
  @dataclass(frozen=True)
72
- class TemporaryView(Dataset):
72
+ class TemporaryView(Output):
73
73
  """Definition of a temporary view in a pipeline dataflow graph. Temporary views can be
74
74
  referenced by flows within the dataflow graph, but are not visible outside of the graph."""
75
75
 
76
76
  pass
77
+
78
+
79
+ @dataclass(frozen=True)
80
+ class Sink(Output):
81
+ """Definition of an external sink in a pipeline dataflow graph. An external sink's
82
+ contents are written to an external system rather than managed by the pipeline itself."""
83
+
84
+ format: str
85
+ options: Mapping[str, str]
@@ -30,6 +30,34 @@ def get_caller_source_code_location(stacklevel: int) -> SourceCodeLocation:
30
30
  """
31
31
  Returns a SourceCodeLocation object representing the location code that invokes this function.
32
32
 
33
+ If this function is called from a decorator (ex. @sdp.table), note that the returned line
34
+ number is affected by how the decorator was triggered - i.e. whether @sdp.table or @sdp.table()
35
+ was called - AND what python version is being used
36
+
37
+ Case 1:
38
+ |@sdp.table()
39
+ |def fn
40
+
41
+ @sdp.table() is executed immediately, on line 1. This is true for all python versions.
42
+
43
+ Case 2:
44
+ |@sdp.table
45
+ |def fn
46
+
47
+ In python < 3.10, @sdp.table will expand to fn = sdp.table(fn), replacing the line that `fn` is
48
+ defined on. This would be line 2. More interestingly, this means:
49
+
50
+ |@sdp.table
51
+ |
52
+ |
53
+ |def fn
54
+
55
+ Will expand to fn = sdp.table(fn) on line 4, where `fn` is defined.
56
+
57
+ However, in python 3.10+, the line number in the stack trace will still be the line that the
58
+ decorator was defined on. In other words, case 2 will be treated the same as case 1, and the
59
+ line number will be 1.
60
+
33
61
  :param stacklevel: The number of stack frames to go up. 0 means the direct caller of this
34
62
  function, 1 means the caller of the caller, and so on.
35
63
  """
@@ -20,21 +20,25 @@ from pyspark.errors import PySparkTypeError
20
20
  from pyspark.sql import SparkSession
21
21
  from pyspark.sql.connect.dataframe import DataFrame as ConnectDataFrame
22
22
  from pyspark.pipelines.block_connect_access import block_spark_connect_execution_and_analysis
23
- from pyspark.pipelines.dataset import (
24
- Dataset,
23
+ from pyspark.pipelines.output import (
24
+ Output,
25
25
  MaterializedView,
26
26
  Table,
27
+ Sink,
27
28
  StreamingTable,
28
29
  TemporaryView,
29
30
  )
30
31
  from pyspark.pipelines.flow import Flow
31
32
  from pyspark.pipelines.graph_element_registry import GraphElementRegistry
33
+ from pyspark.pipelines.source_code_location import SourceCodeLocation
34
+ from pyspark.sql.connect.types import pyspark_types_to_proto_types
35
+ from pyspark.sql.types import StructType
32
36
  from typing import Any, cast
33
37
  import pyspark.sql.connect.proto as pb2
34
38
 
35
39
 
36
40
  class SparkConnectGraphElementRegistry(GraphElementRegistry):
37
- """Registers datasets and flows in a dataflow graph held in a Spark Connect server."""
41
+ """Registers outputs and flows in a dataflow graph held in a Spark Connect server."""
38
42
 
39
43
  def __init__(self, spark: SparkSession, dataflow_graph_id: str) -> None:
40
44
  # Cast because mypy seems to think `spark`` is a function, not an object. Likely related to
@@ -42,46 +46,66 @@ class SparkConnectGraphElementRegistry(GraphElementRegistry):
42
46
  self._client = cast(Any, spark).client
43
47
  self._dataflow_graph_id = dataflow_graph_id
44
48
 
45
- def register_dataset(self, dataset: Dataset) -> None:
46
- if isinstance(dataset, Table):
47
- table_properties = dataset.table_properties
48
- partition_cols = dataset.partition_cols
49
- schema = None # TODO
50
- format = dataset.format
49
+ def register_output(self, output: Output) -> None:
50
+ table_details = None
51
+ sink_details = None
52
+ if isinstance(output, Table):
53
+ if isinstance(output.schema, str):
54
+ schema_string = output.schema
55
+ schema_data_type = None
56
+ elif isinstance(output.schema, StructType):
57
+ schema_string = None
58
+ schema_data_type = pyspark_types_to_proto_types(output.schema)
59
+ else:
60
+ schema_string = None
61
+ schema_data_type = None
62
+
63
+ table_details = pb2.PipelineCommand.DefineOutput.TableDetails(
64
+ table_properties=output.table_properties,
65
+ partition_cols=output.partition_cols,
66
+ format=output.format,
67
+ # Even though schema_string is not required, the generated Python code seems to
68
+ # erroneously think it is required.
69
+ schema_string=schema_string, # type: ignore[arg-type]
70
+ schema_data_type=schema_data_type,
71
+ )
51
72
 
52
- if isinstance(dataset, MaterializedView):
53
- dataset_type = pb2.DatasetType.MATERIALIZED_VIEW
54
- elif isinstance(dataset, StreamingTable):
55
- dataset_type = pb2.DatasetType.TABLE
73
+ if isinstance(output, MaterializedView):
74
+ output_type = pb2.OutputType.MATERIALIZED_VIEW
75
+ elif isinstance(output, StreamingTable):
76
+ output_type = pb2.OutputType.TABLE
56
77
  else:
57
78
  raise PySparkTypeError(
58
79
  errorClass="UNSUPPORTED_PIPELINES_DATASET_TYPE",
59
- messageParameters={"dataset_type": type(dataset).__name__},
80
+ messageParameters={"output_type": type(output).__name__},
60
81
  )
61
- elif isinstance(dataset, TemporaryView):
62
- table_properties = None
63
- partition_cols = None
64
- schema = None
65
- format = None
66
- dataset_type = pb2.DatasetType.TEMPORARY_VIEW
82
+ elif isinstance(output, TemporaryView):
83
+ output_type = pb2.OutputType.TEMPORARY_VIEW
84
+ table_details = None
85
+ elif isinstance(output, Sink):
86
+ output_type = pb2.OutputType.SINK
87
+ sink_details = pb2.PipelineCommand.DefineOutput.SinkDetails(
88
+ options=output.options,
89
+ format=output.format,
90
+ )
67
91
  else:
68
92
  raise PySparkTypeError(
69
93
  errorClass="UNSUPPORTED_PIPELINES_DATASET_TYPE",
70
- messageParameters={"dataset_type": type(dataset).__name__},
94
+ messageParameters={"output_type": type(output).__name__},
71
95
  )
72
96
 
73
- inner_command = pb2.PipelineCommand.DefineDataset(
97
+ inner_command = pb2.PipelineCommand.DefineOutput(
74
98
  dataflow_graph_id=self._dataflow_graph_id,
75
- dataset_name=dataset.name,
76
- dataset_type=dataset_type,
77
- comment=dataset.comment,
78
- table_properties=table_properties,
79
- partition_cols=partition_cols,
80
- schema=schema,
81
- format=format,
99
+ output_name=output.name,
100
+ output_type=output_type,
101
+ comment=output.comment,
102
+ sink_details=sink_details,
103
+ table_details=table_details,
104
+ source_code_location=source_code_location_to_proto(output.source_code_location),
82
105
  )
106
+
83
107
  command = pb2.Command()
84
- command.pipeline_command.define_dataset.CopyFrom(inner_command)
108
+ command.pipeline_command.define_output.CopyFrom(inner_command)
85
109
  self._client.execute_command(command)
86
110
 
87
111
  def register_flow(self, flow: Flow) -> None:
@@ -89,12 +113,17 @@ class SparkConnectGraphElementRegistry(GraphElementRegistry):
89
113
  df = flow.func()
90
114
  relation = cast(ConnectDataFrame, df)._plan.plan(self._client)
91
115
 
116
+ relation_flow_details = pb2.PipelineCommand.DefineFlow.WriteRelationFlowDetails(
117
+ relation=relation,
118
+ )
119
+
92
120
  inner_command = pb2.PipelineCommand.DefineFlow(
93
121
  dataflow_graph_id=self._dataflow_graph_id,
94
122
  flow_name=flow.name,
95
123
  target_dataset_name=flow.target,
96
- relation=relation,
124
+ relation_flow_details=relation_flow_details,
97
125
  sql_conf=flow.spark_conf,
126
+ source_code_location=source_code_location_to_proto(flow.source_code_location),
98
127
  )
99
128
  command = pb2.Command()
100
129
  command.pipeline_command.define_flow.CopyFrom(inner_command)
@@ -109,3 +138,11 @@ class SparkConnectGraphElementRegistry(GraphElementRegistry):
109
138
  command = pb2.Command()
110
139
  command.pipeline_command.define_sql_graph_elements.CopyFrom(inner_command)
111
140
  self._client.execute_command(command)
141
+
142
+
143
+ def source_code_location_to_proto(
144
+ source_code_location: SourceCodeLocation,
145
+ ) -> pb2.SourceCodeLocation:
146
+ return pb2.SourceCodeLocation(
147
+ file_name=source_code_location.filename, line_number=source_code_location.line_number
148
+ )
@@ -72,6 +72,7 @@ def start_run(
72
72
  full_refresh_all: bool,
73
73
  refresh: Optional[Sequence[str]],
74
74
  dry: bool,
75
+ storage: str,
75
76
  ) -> Iterator[Dict[str, Any]]:
76
77
  """Start a run of the dataflow graph in the Spark Connect server.
77
78
 
@@ -79,6 +80,8 @@ def start_run(
79
80
  :param full_refresh: List of datasets to reset and recompute.
80
81
  :param full_refresh_all: Perform a full graph reset and recompute.
81
82
  :param refresh: List of datasets to update.
83
+ :param dry: If true, the run will not actually execute any flows, but only validate the graph.
84
+ :param storage: The storage location to store metadata such as streaming checkpoints.
82
85
  """
83
86
  inner_command = pb2.PipelineCommand.StartRun(
84
87
  dataflow_graph_id=dataflow_graph_id,
@@ -86,6 +89,7 @@ def start_run(
86
89
  full_refresh_all=full_refresh_all,
87
90
  refresh_selection=refresh or [],
88
91
  dry=dry,
92
+ storage=storage,
89
93
  )
90
94
  command = pb2.Command()
91
95
  command.pipeline_command.start_run.CopyFrom(inner_command)
@@ -69,7 +69,7 @@ def from_avro(
69
69
  >>> df = spark.createDataFrame(data, ("key", "value"))
70
70
  >>> avroDf = df.select(to_avro(df.value).alias("avro"))
71
71
  >>> avroDf.collect()
72
- [Row(avro=bytearray(b'\\x00\\x00\\x04\\x00\\nAlice'))]
72
+ [Row(avro=b'\\x00\\x00\\x04\\x00\\nAlice')]
73
73
 
74
74
  >>> jsonFormatSchema = '''{"type":"record","name":"topLevelRecord","fields":
75
75
  ... [{"name":"avro","type":[{"type":"record","name":"value","namespace":"topLevelRecord",
@@ -141,12 +141,12 @@ def to_avro(data: "ColumnOrName", jsonFormatSchema: str = "") -> Column:
141
141
  >>> data = ['SPADES']
142
142
  >>> df = spark.createDataFrame(data, "string")
143
143
  >>> df.select(to_avro(df.value).alias("suite")).collect()
144
- [Row(suite=bytearray(b'\\x00\\x0cSPADES'))]
144
+ [Row(suite=b'\\x00\\x0cSPADES')]
145
145
 
146
146
  >>> jsonFormatSchema = '''["null", {"type": "enum", "name": "value",
147
147
  ... "symbols": ["SPADES", "HEARTS", "DIAMONDS", "CLUBS"]}]'''
148
148
  >>> df.select(to_avro(df.value, jsonFormatSchema).alias("suite")).collect()
149
- [Row(suite=bytearray(b'\\x02\\x00'))]
149
+ [Row(suite=b'\\x02\\x00')]
150
150
  """
151
151
  from py4j.java_gateway import JVMView
152
152
  from pyspark.sql.classic.column import _to_java_column
@@ -21,6 +21,7 @@ import sys
21
21
  from typing import (
22
22
  overload,
23
23
  Any,
24
+ Callable,
24
25
  TYPE_CHECKING,
25
26
  Union,
26
27
  )
@@ -1538,6 +1539,58 @@ class Column(TableValuedFunctionArgument):
1538
1539
  """
1539
1540
  ...
1540
1541
 
1542
+ @dispatch_col_method
1543
+ def transform(self, f: Callable[["Column"], "Column"]) -> "Column":
1544
+ """
1545
+ Applies a transformation function to this column.
1546
+
1547
+ This method allows you to apply a function that takes a Column and returns a Column,
1548
+ enabling method chaining and functional transformations.
1549
+
1550
+ .. versionadded:: 4.1.0
1551
+
1552
+ Parameters
1553
+ ----------
1554
+ f : callable
1555
+ A function that takes a :class:`Column` and returns a :class:`Column`.
1556
+
1557
+ Returns
1558
+ -------
1559
+ :class:`Column`
1560
+ The result of applying the function to this column.
1561
+
1562
+ Examples
1563
+ --------
1564
+ Example 1: Chain built-in functions
1565
+
1566
+ >>> from pyspark.sql.functions import trim, upper
1567
+ >>> df = spark.createDataFrame([(" hello ",), (" world ",)], ["text"])
1568
+ >>> df.select(df.text.transform(trim).transform(upper).alias("result")).show()
1569
+ +------+
1570
+ |result|
1571
+ +------+
1572
+ | HELLO|
1573
+ | WORLD|
1574
+ +------+
1575
+
1576
+ Example 2: Use lambda functions
1577
+
1578
+ >>> df = spark.createDataFrame([(10,), (20,), (30,)], ["value"])
1579
+ >>> df.select(
1580
+ ... df.value.transform(lambda c: c + 5)
1581
+ ... .transform(lambda c: c * 2)
1582
+ ... .transform(lambda c: c - 10).alias("result")
1583
+ ... ).show()
1584
+ +------+
1585
+ |result|
1586
+ +------+
1587
+ | 20|
1588
+ | 40|
1589
+ | 60|
1590
+ +------+
1591
+ """
1592
+ ...
1593
+
1541
1594
  @dispatch_col_method
1542
1595
  def outer(self) -> "Column":
1543
1596
  """
@@ -39,7 +39,7 @@ LiteralType = PrimitiveType
39
39
 
40
40
  DecimalLiteral = decimal.Decimal
41
41
 
42
- DateTimeLiteral = Union[datetime.datetime, datetime.date]
42
+ DateTimeLiteral = Union[datetime.date, datetime.time, datetime.datetime]
43
43
 
44
44
  DataTypeOrString = Union[DataType, str]
45
45
 
@@ -427,6 +427,30 @@ class ArtifactManager:
427
427
  status = resp.statuses.get(artifactName)
428
428
  return status.exists if status is not None else False
429
429
 
430
+ def get_cached_artifacts(self, hashes: list[str]) -> set[str]:
431
+ """
432
+ Batch check which artifacts are already cached on the server.
433
+ Returns a set of hashes that are already cached.
434
+ """
435
+ if not hashes:
436
+ return set()
437
+
438
+ artifact_names = [f"{CACHE_PREFIX}/{hash}" for hash in hashes]
439
+ request = proto.ArtifactStatusesRequest(
440
+ user_context=self._user_context, session_id=self._session_id, names=artifact_names
441
+ )
442
+ resp: proto.ArtifactStatusesResponse = self._stub.ArtifactStatus(
443
+ request, metadata=self._metadata
444
+ )
445
+
446
+ cached = set()
447
+ for hash in hashes:
448
+ artifact_name = f"{CACHE_PREFIX}/{hash}"
449
+ status = resp.statuses.get(artifact_name)
450
+ if status is not None and status.exists:
451
+ cached.add(hash)
452
+ return cached
453
+
430
454
  def cache_artifact(self, blob: bytes) -> str:
431
455
  """
432
456
  Cache the give blob at the session.
@@ -442,3 +466,34 @@ class ArtifactManager:
442
466
  # TODO(SPARK-42658): Handle responses containing CRC failures.
443
467
 
444
468
  return hash
469
+
470
+ def cache_artifacts(self, blobs: list[bytes]) -> list[str]:
471
+ """
472
+ Cache the given blobs at the session.
473
+
474
+ This method batches artifact status checks and uploads to minimize RPC overhead.
475
+ """
476
+ # Compute hashes for all blobs upfront
477
+ hashes = [hashlib.sha256(blob).hexdigest() for blob in blobs]
478
+ unique_hashes = list(set(hashes))
479
+
480
+ # Batch check which artifacts are already cached
481
+ cached_hashes = self.get_cached_artifacts(unique_hashes)
482
+
483
+ # Collect unique artifacts that need to be uploaded
484
+ seen_hashes = set()
485
+ artifacts_to_add = []
486
+ for blob, hash in zip(blobs, hashes):
487
+ if hash not in cached_hashes and hash not in seen_hashes:
488
+ artifacts_to_add.append(new_cache_artifact(hash, InMemory(blob)))
489
+ seen_hashes.add(hash)
490
+
491
+ # Batch upload all missing artifacts in a single RPC call
492
+ if artifacts_to_add:
493
+ requests = self._add_artifacts(artifacts_to_add)
494
+ response: proto.AddArtifactsResponse = self._retrieve_responses(requests)
495
+ summaries: List[proto.AddArtifactsResponse.ArtifactSummary] = []
496
+ for summary in response.artifacts:
497
+ summaries.append(summary)
498
+ # TODO(SPARK-42658): Handle responses containing CRC failures.
499
+ return hashes