dataproc-spark-connect 1.0.0rc1__py2.py3-none-any.whl → 1.0.0rc2__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dataproc-spark-connect
3
- Version: 1.0.0rc1
3
+ Version: 1.0.0rc2
4
4
  Summary: Dataproc client library for Spark Connect
5
5
  Home-page: https://github.com/GoogleCloudDataproc/dataproc-spark-connect-python
6
6
  Author: Google LLC
@@ -1,13 +1,13 @@
1
- dataproc_spark_connect-1.0.0rc1.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
1
+ dataproc_spark_connect-1.0.0rc2.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
2
2
  google/cloud/dataproc_spark_connect/__init__.py,sha256=dIqHNWVWWrSuRf26x11kX5e9yMKSHCtmI_GBj1-FDdE,1101
3
- google/cloud/dataproc_spark_connect/environment.py,sha256=UICy9XyqAxL-cryVWx7GZPRAxoir5LKk0dtqqY_l--c,2307
3
+ google/cloud/dataproc_spark_connect/environment.py,sha256=l1wWiHMHtBQ9YonE-kHTpaZlN9vLE4fyJSTn7RZP6kA,2503
4
4
  google/cloud/dataproc_spark_connect/exceptions.py,sha256=WF-qdzgdofRwILCriIkjjsmjObZfF0P3Ecg4lv-Hmec,968
5
5
  google/cloud/dataproc_spark_connect/pypi_artifacts.py,sha256=gd-VMwiVP-EJuPp9Vf9Shx8pqps3oSKp0hBcSSZQS-A,1575
6
- google/cloud/dataproc_spark_connect/session.py,sha256=vD9kJXJbkzfKlYt5TFp0umHi6A7ZdheSJNjrqVyL0oo,37432
6
+ google/cloud/dataproc_spark_connect/session.py,sha256=FdJI_F9k6EfIvlgC1-f-Qb_Uwg9SmkIyWhpNZlqGQhw,40405
7
7
  google/cloud/dataproc_spark_connect/client/__init__.py,sha256=6hCNSsgYlie6GuVpc5gjFsPnyeMTScTpXSPYqp1fplY,615
8
8
  google/cloud/dataproc_spark_connect/client/core.py,sha256=GRc4OCTBvIvdagjxOPoDO22vLtt8xDSerdREMRDeUBY,4659
9
9
  google/cloud/dataproc_spark_connect/client/proxy.py,sha256=qUZXvVY1yn934vE6nlO495XUZ53AUx9O74a9ozkGI9U,8976
10
- dataproc_spark_connect-1.0.0rc1.dist-info/METADATA,sha256=D1e6sjZ8-hVccXMjHwkoX0OPOOW876hzsQi25WxMbMI,3468
11
- dataproc_spark_connect-1.0.0rc1.dist-info/WHEEL,sha256=JNWh1Fm1UdwIQV075glCn4MVuCRs0sotJIq-J6rbxCU,109
12
- dataproc_spark_connect-1.0.0rc1.dist-info/top_level.txt,sha256=_1QvSJIhFAGfxb79D6DhB7SUw2X6T4rwnz_LLrbcD3c,7
13
- dataproc_spark_connect-1.0.0rc1.dist-info/RECORD,,
10
+ dataproc_spark_connect-1.0.0rc2.dist-info/METADATA,sha256=o2vfu5NRn2Pb0N7cavrBm2OLwP_LXQBVrclNjEtb9Do,3468
11
+ dataproc_spark_connect-1.0.0rc2.dist-info/WHEEL,sha256=JNWh1Fm1UdwIQV075glCn4MVuCRs0sotJIq-J6rbxCU,109
12
+ dataproc_spark_connect-1.0.0rc2.dist-info/top_level.txt,sha256=_1QvSJIhFAGfxb79D6DhB7SUw2X6T4rwnz_LLrbcD3c,7
13
+ dataproc_spark_connect-1.0.0rc2.dist-info/RECORD,,
@@ -13,6 +13,7 @@
13
13
  # limitations under the License.
14
14
 
15
15
  import os
16
+ import sys
16
17
  from typing import Callable, Tuple, List
17
18
 
18
19
 
@@ -46,6 +47,18 @@ def is_jetbrains_ide() -> bool:
46
47
  return "jetbrains" in os.getenv("TERMINAL_EMULATOR", "").lower()
47
48
 
48
49
 
50
+ def is_interactive():
51
+ return hasattr(sys, "ps1")
52
+
53
+
54
+ def is_terminal():
55
+ return sys.stdin.isatty()
56
+
57
+
58
+ def is_interactive_terminal():
59
+ return is_interactive() and is_terminal()
60
+
61
+
49
62
  def get_client_environment_label() -> str:
50
63
  """
51
64
  Map current environment to a standardized client label.
@@ -24,8 +24,10 @@ import threading
24
24
  import time
25
25
  import uuid
26
26
  import tqdm
27
+ from tqdm import tqdm as cli_tqdm
28
+ from tqdm.notebook import tqdm as notebook_tqdm
27
29
  from types import MethodType
28
- from typing import Any, cast, ClassVar, Dict, Optional, Union
30
+ from typing import Any, cast, ClassVar, Dict, Iterable, Optional, Union
29
31
 
30
32
  from google.api_core import retry
31
33
  from google.api_core.client_options import ClientOptions
@@ -110,6 +112,7 @@ class DataprocSparkSession(SparkSession):
110
112
  _region = None
111
113
  _client_options = None
112
114
  _active_s8s_session_id: ClassVar[Optional[str]] = None
115
+ _execution_progress_bar = dict()
113
116
 
114
117
  class Builder(SparkSession.Builder):
115
118
 
@@ -247,6 +250,9 @@ class DataprocSparkSession(SparkSession):
247
250
  assert self._channel_builder is not None
248
251
  session = DataprocSparkSession(connection=self._channel_builder)
249
252
 
253
+ # Register handler for Cell Execution Progress bar
254
+ session._register_progress_execution_handler()
255
+
250
256
  DataprocSparkSession._set_default_and_active_session(session)
251
257
  return session
252
258
 
@@ -682,6 +688,78 @@ class DataprocSparkSession(SparkSession):
682
688
  execute_and_fetch_as_iterator_wrapped_method, self.client
683
689
  )
684
690
 
691
+ # Patching clearProgressHandlers method to not remove Dataproc Progress Handler
692
+ clearProgressHandlers_base_method = self.clearProgressHandlers
693
+
694
+ def clearProgressHandlers_wrapper_method(_, *args, **kwargs):
695
+ clearProgressHandlers_base_method(*args, **kwargs)
696
+
697
+ self._register_progress_execution_handler()
698
+
699
+ self.clearProgressHandlers = MethodType(
700
+ clearProgressHandlers_wrapper_method, self
701
+ )
702
+
703
+ def _register_progress_execution_handler(self):
704
+ from pyspark.sql.connect.shell.progress import StageInfo
705
+
706
+ def handler(
707
+ stages: Optional[Iterable[StageInfo]],
708
+ inflight_tasks: int,
709
+ operation_id: Optional[str],
710
+ done: bool,
711
+ ):
712
+ if operation_id is None:
713
+ return
714
+
715
+ # Don't build / render progress bar for non-interactive (despite
716
+ # Ipython or non-IPython)
717
+ if not environment.is_interactive():
718
+ return
719
+
720
+ total_tasks = 0
721
+ completed_tasks = 0
722
+
723
+ for stage in stages or []:
724
+ total_tasks += stage.num_tasks
725
+ completed_tasks += stage.num_completed_tasks
726
+
727
+ tqdm_pbar = notebook_tqdm
728
+ if environment.is_interactive_terminal():
729
+ tqdm_pbar = cli_tqdm
730
+
731
+ # Use a lock to ensure only one thread can access and modify
732
+ # the shared dictionaries at a time.
733
+ with self._lock:
734
+ if operation_id in self._execution_progress_bar:
735
+ pbar = self._execution_progress_bar[operation_id]
736
+ if pbar.total != total_tasks:
737
+ pbar.reset(
738
+ total=total_tasks
739
+ ) # This force resets the progress bar % too on next refresh
740
+ else:
741
+ pbar = tqdm_pbar(
742
+ total=total_tasks,
743
+ leave=True,
744
+ dynamic_ncols=True,
745
+ bar_format="{l_bar}{bar} {n_fmt}/{total_fmt} Tasks",
746
+ )
747
+ self._execution_progress_bar[operation_id] = pbar
748
+
749
+ # To handle skipped or failed tasks.
750
+ # StageInfo proto doesn't have skipped and failed tasks information to process.
751
+ if done and completed_tasks < total_tasks:
752
+ completed_tasks = total_tasks
753
+
754
+ pbar.n = completed_tasks
755
+ pbar.refresh()
756
+
757
+ if done:
758
+ pbar.close()
759
+ self._execution_progress_bar.pop(operation_id, None)
760
+
761
+ self.registerProgressHandler(handler)
762
+
685
763
  @staticmethod
686
764
  def _sql_lazy_transformation(req):
687
765
  # Select SQL command