dataproc-spark-connect 0.8.1__tar.gz → 0.8.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (18) hide show
  1. {dataproc_spark_connect-0.8.1 → dataproc_spark_connect-0.8.3}/PKG-INFO +1 -1
  2. {dataproc_spark_connect-0.8.1 → dataproc_spark_connect-0.8.3}/dataproc_spark_connect.egg-info/PKG-INFO +1 -1
  3. {dataproc_spark_connect-0.8.1 → dataproc_spark_connect-0.8.3}/google/cloud/dataproc_spark_connect/session.py +49 -7
  4. {dataproc_spark_connect-0.8.1 → dataproc_spark_connect-0.8.3}/setup.py +1 -1
  5. {dataproc_spark_connect-0.8.1 → dataproc_spark_connect-0.8.3}/LICENSE +0 -0
  6. {dataproc_spark_connect-0.8.1 → dataproc_spark_connect-0.8.3}/README.md +0 -0
  7. {dataproc_spark_connect-0.8.1 → dataproc_spark_connect-0.8.3}/dataproc_spark_connect.egg-info/SOURCES.txt +0 -0
  8. {dataproc_spark_connect-0.8.1 → dataproc_spark_connect-0.8.3}/dataproc_spark_connect.egg-info/dependency_links.txt +0 -0
  9. {dataproc_spark_connect-0.8.1 → dataproc_spark_connect-0.8.3}/dataproc_spark_connect.egg-info/requires.txt +0 -0
  10. {dataproc_spark_connect-0.8.1 → dataproc_spark_connect-0.8.3}/dataproc_spark_connect.egg-info/top_level.txt +0 -0
  11. {dataproc_spark_connect-0.8.1 → dataproc_spark_connect-0.8.3}/google/cloud/dataproc_spark_connect/__init__.py +0 -0
  12. {dataproc_spark_connect-0.8.1 → dataproc_spark_connect-0.8.3}/google/cloud/dataproc_spark_connect/client/__init__.py +0 -0
  13. {dataproc_spark_connect-0.8.1 → dataproc_spark_connect-0.8.3}/google/cloud/dataproc_spark_connect/client/core.py +0 -0
  14. {dataproc_spark_connect-0.8.1 → dataproc_spark_connect-0.8.3}/google/cloud/dataproc_spark_connect/client/proxy.py +0 -0
  15. {dataproc_spark_connect-0.8.1 → dataproc_spark_connect-0.8.3}/google/cloud/dataproc_spark_connect/exceptions.py +0 -0
  16. {dataproc_spark_connect-0.8.1 → dataproc_spark_connect-0.8.3}/google/cloud/dataproc_spark_connect/pypi_artifacts.py +0 -0
  17. {dataproc_spark_connect-0.8.1 → dataproc_spark_connect-0.8.3}/pyproject.toml +0 -0
  18. {dataproc_spark_connect-0.8.1 → dataproc_spark_connect-0.8.3}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dataproc-spark-connect
3
- Version: 0.8.1
3
+ Version: 0.8.3
4
4
  Summary: Dataproc client library for Spark Connect
5
5
  Home-page: https://github.com/GoogleCloudDataproc/dataproc-spark-connect-python
6
6
  Author: Google LLC
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dataproc-spark-connect
3
- Version: 0.8.1
3
+ Version: 0.8.3
4
4
  Summary: Dataproc client library for Spark Connect
5
5
  Home-page: https://github.com/GoogleCloudDataproc/dataproc-spark-connect-python
6
6
  Author: Google LLC
@@ -22,9 +22,10 @@ import re
22
22
  import string
23
23
  import threading
24
24
  import time
25
- from typing import Any, cast, ClassVar, Dict, Optional, Union
26
25
  import uuid
27
26
  import tqdm
27
+ from types import MethodType
28
+ from typing import Any, cast, ClassVar, Dict, Optional, Union
28
29
 
29
30
  from google.api_core import retry
30
31
  from google.api_core.client_options import ClientOptions
@@ -234,6 +235,9 @@ class DataprocSparkSession(SparkSession):
234
235
  target=create_session_pbar
235
236
  )
236
237
 
238
+ # Activate Spark Connect mode for Spark client
239
+ os.environ["SPARK_CONNECT_MODE_ENABLED"] = "1"
240
+
237
241
  try:
238
242
  if (
239
243
  os.getenv(
@@ -538,19 +542,57 @@ class DataprocSparkSession(SparkSession):
538
542
 
539
543
  super().__init__(connection, user_id)
540
544
 
541
- base_method = self.client._execute_plan_request_with_metadata
545
+ execute_plan_request_base_method = (
546
+ self.client._execute_plan_request_with_metadata
547
+ )
548
+ execute_base_method = self.client._execute
549
+ execute_and_fetch_as_iterator_base_method = (
550
+ self.client._execute_and_fetch_as_iterator
551
+ )
542
552
 
543
- def wrapped_method(*args, **kwargs):
544
- req = base_method(*args, **kwargs)
553
+ def execute_plan_request_wrapped_method(*args, **kwargs):
554
+ req = execute_plan_request_base_method(*args, **kwargs)
545
555
  if not req.operation_id:
546
556
  req.operation_id = str(uuid.uuid4())
547
557
  logger.debug(
548
558
  f"No operation_id found. Setting operation_id: {req.operation_id}"
549
559
  )
550
- self._display_operation_link(req.operation_id)
551
560
  return req
552
561
 
553
- self.client._execute_plan_request_with_metadata = wrapped_method
562
+ self.client._execute_plan_request_with_metadata = (
563
+ execute_plan_request_wrapped_method
564
+ )
565
+
566
+ def execute_wrapped_method(client_self, req, *args, **kwargs):
567
+ if not self._sql_lazy_transformation(req):
568
+ self._display_operation_link(req.operation_id)
569
+ execute_base_method(req, *args, **kwargs)
570
+
571
+ self.client._execute = MethodType(execute_wrapped_method, self.client)
572
+
573
+ def execute_and_fetch_as_iterator_wrapped_method(
574
+ client_self, req, *args, **kwargs
575
+ ):
576
+ if not self._sql_lazy_transformation(req):
577
+ self._display_operation_link(req.operation_id)
578
+ return execute_and_fetch_as_iterator_base_method(
579
+ req, *args, **kwargs
580
+ )
581
+
582
+ self.client._execute_and_fetch_as_iterator = MethodType(
583
+ execute_and_fetch_as_iterator_wrapped_method, self.client
584
+ )
585
+
586
+ @staticmethod
587
+ def _sql_lazy_transformation(req):
588
+ # Select SQL command
589
+ if req.plan and req.plan.command and req.plan.command.sql_command:
590
+ return (
591
+ "select"
592
+ in req.plan.command.sql_command.sql.strip().lower().split()
593
+ )
594
+
595
+ return False
554
596
 
555
597
  def _repr_html_(self) -> str:
556
598
  if not self._active_s8s_session_id:
@@ -593,7 +635,7 @@ class DataprocSparkSession(SparkSession):
593
635
  return
594
636
  html_element = f"""
595
637
  <div>
596
- <p><a href="{url}">Spark UI</a> (Operation: {operation_id})</p>
638
+ <p><a href="{url}">Spark Query</a> (Operation: {operation_id})</p>
597
639
  </div>
598
640
  """
599
641
  display(HTML(html_element))
@@ -20,7 +20,7 @@ long_description = (this_directory / "README.md").read_text()
20
20
 
21
21
  setup(
22
22
  name="dataproc-spark-connect",
23
- version="0.8.1",
23
+ version="0.8.3",
24
24
  description="Dataproc client library for Spark Connect",
25
25
  long_description=long_description,
26
26
  author="Google LLC",