dataproc-spark-connect 0.8.2__py2.py3-none-any.whl → 0.8.3__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dataproc-spark-connect
3
- Version: 0.8.2
3
+ Version: 0.8.3
4
4
  Summary: Dataproc client library for Spark Connect
5
5
  Home-page: https://github.com/GoogleCloudDataproc/dataproc-spark-connect-python
6
6
  Author: Google LLC
@@ -1,12 +1,12 @@
1
- dataproc_spark_connect-0.8.2.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
1
+ dataproc_spark_connect-0.8.3.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
2
2
  google/cloud/dataproc_spark_connect/__init__.py,sha256=dIqHNWVWWrSuRf26x11kX5e9yMKSHCtmI_GBj1-FDdE,1101
3
3
  google/cloud/dataproc_spark_connect/exceptions.py,sha256=WF-qdzgdofRwILCriIkjjsmjObZfF0P3Ecg4lv-Hmec,968
4
4
  google/cloud/dataproc_spark_connect/pypi_artifacts.py,sha256=gd-VMwiVP-EJuPp9Vf9Shx8pqps3oSKp0hBcSSZQS-A,1575
5
- google/cloud/dataproc_spark_connect/session.py,sha256=Sr9ISKIJ6U5dJ13FzKQ8UC_pGeFXbchc7X3d9U5Hj48,32144
5
+ google/cloud/dataproc_spark_connect/session.py,sha256=ZWoW9-otaCJnttPt7h9W3pmhHpdbQsAOl8ypOX3fVbo,33556
6
6
  google/cloud/dataproc_spark_connect/client/__init__.py,sha256=6hCNSsgYlie6GuVpc5gjFsPnyeMTScTpXSPYqp1fplY,615
7
7
  google/cloud/dataproc_spark_connect/client/core.py,sha256=m3oXTKBm3sBy6jhDu9GRecrxLb5CdEM53SgMlnJb6ag,4616
8
8
  google/cloud/dataproc_spark_connect/client/proxy.py,sha256=qUZXvVY1yn934vE6nlO495XUZ53AUx9O74a9ozkGI9U,8976
9
- dataproc_spark_connect-0.8.2.dist-info/METADATA,sha256=2PCMrKtuuab4232elYKFHiTdaJcqiM4N38ceD_AhS-E,3465
10
- dataproc_spark_connect-0.8.2.dist-info/WHEEL,sha256=JNWh1Fm1UdwIQV075glCn4MVuCRs0sotJIq-J6rbxCU,109
11
- dataproc_spark_connect-0.8.2.dist-info/top_level.txt,sha256=_1QvSJIhFAGfxb79D6DhB7SUw2X6T4rwnz_LLrbcD3c,7
12
- dataproc_spark_connect-0.8.2.dist-info/RECORD,,
9
+ dataproc_spark_connect-0.8.3.dist-info/METADATA,sha256=croGipnWGtSrd2NLyMCHrcVagYCk9yJ6cEOqCEAm-Qc,3465
10
+ dataproc_spark_connect-0.8.3.dist-info/WHEEL,sha256=JNWh1Fm1UdwIQV075glCn4MVuCRs0sotJIq-J6rbxCU,109
11
+ dataproc_spark_connect-0.8.3.dist-info/top_level.txt,sha256=_1QvSJIhFAGfxb79D6DhB7SUw2X6T4rwnz_LLrbcD3c,7
12
+ dataproc_spark_connect-0.8.3.dist-info/RECORD,,
@@ -22,9 +22,10 @@ import re
22
22
  import string
23
23
  import threading
24
24
  import time
25
- from typing import Any, cast, ClassVar, Dict, Optional, Union
26
25
  import uuid
27
26
  import tqdm
27
+ from types import MethodType
28
+ from typing import Any, cast, ClassVar, Dict, Optional, Union
28
29
 
29
30
  from google.api_core import retry
30
31
  from google.api_core.client_options import ClientOptions
@@ -541,19 +542,57 @@ class DataprocSparkSession(SparkSession):
541
542
 
542
543
  super().__init__(connection, user_id)
543
544
 
544
- base_method = self.client._execute_plan_request_with_metadata
545
+ execute_plan_request_base_method = (
546
+ self.client._execute_plan_request_with_metadata
547
+ )
548
+ execute_base_method = self.client._execute
549
+ execute_and_fetch_as_iterator_base_method = (
550
+ self.client._execute_and_fetch_as_iterator
551
+ )
545
552
 
546
- def wrapped_method(*args, **kwargs):
547
- req = base_method(*args, **kwargs)
553
+ def execute_plan_request_wrapped_method(*args, **kwargs):
554
+ req = execute_plan_request_base_method(*args, **kwargs)
548
555
  if not req.operation_id:
549
556
  req.operation_id = str(uuid.uuid4())
550
557
  logger.debug(
551
558
  f"No operation_id found. Setting operation_id: {req.operation_id}"
552
559
  )
553
- self._display_operation_link(req.operation_id)
554
560
  return req
555
561
 
556
- self.client._execute_plan_request_with_metadata = wrapped_method
562
+ self.client._execute_plan_request_with_metadata = (
563
+ execute_plan_request_wrapped_method
564
+ )
565
+
566
+ def execute_wrapped_method(client_self, req, *args, **kwargs):
567
+ if not self._sql_lazy_transformation(req):
568
+ self._display_operation_link(req.operation_id)
569
+ execute_base_method(req, *args, **kwargs)
570
+
571
+ self.client._execute = MethodType(execute_wrapped_method, self.client)
572
+
573
+ def execute_and_fetch_as_iterator_wrapped_method(
574
+ client_self, req, *args, **kwargs
575
+ ):
576
+ if not self._sql_lazy_transformation(req):
577
+ self._display_operation_link(req.operation_id)
578
+ return execute_and_fetch_as_iterator_base_method(
579
+ req, *args, **kwargs
580
+ )
581
+
582
+ self.client._execute_and_fetch_as_iterator = MethodType(
583
+ execute_and_fetch_as_iterator_wrapped_method, self.client
584
+ )
585
+
586
+ @staticmethod
587
+ def _sql_lazy_transformation(req):
588
+ # Select SQL command
589
+ if req.plan and req.plan.command and req.plan.command.sql_command:
590
+ return (
591
+ "select"
592
+ in req.plan.command.sql_command.sql.strip().lower().split()
593
+ )
594
+
595
+ return False
557
596
 
558
597
  def _repr_html_(self) -> str:
559
598
  if not self._active_s8s_session_id:
@@ -596,7 +635,7 @@ class DataprocSparkSession(SparkSession):
596
635
  return
597
636
  html_element = f"""
598
637
  <div>
599
- <p><a href="{url}">Spark UI</a> (Operation: {operation_id})</p>
638
+ <p><a href="{url}">Spark Query</a> (Operation: {operation_id})</p>
600
639
  </div>
601
640
  """
602
641
  display(HTML(html_element))