manta-common-core 0.5b0.dev3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,892 @@
1
+ """
2
+ Base Client Architecture for gRPC services.
3
+
4
+ This module provides the abstract base class for all gRPC clients in the system,
5
+ standardizing connection management, stub creation, and resource cleanup.
6
+ """
7
+
8
+ import abc
9
+ import asyncio
10
+ import contextlib
11
+ import ssl
12
+ import sys
13
+ import traceback
14
+ from dataclasses import dataclass
15
+ from logging import Logger, getLogger
16
+ from pathlib import Path
17
+ from typing import Any # TypeAlias, # need 3.10 or use type in 3.12
18
+ from typing import AsyncIterator, Optional, Type, TypeVar, Union
19
+
20
+ from betterproto import ServiceStub
21
+ from grpclib.client import Channel
22
+ from grpclib.config import Configuration
23
+ from grpclib.exceptions import GRPCError, StreamTerminatedError
24
+
25
+ from .decorators import with_retry, with_streaming_retry
26
+ from .errors import MantaError, wrap_exception
27
+ from .retry import DefaultRetryPolicy, RetryPolicy, StreamingRetryPolicy
28
+ from .traces import Tracer
29
+
30
+ # Type for metadata
31
+ MetadataDict = dict[str, str]
32
+
33
+ # TypeVar for self-referencing return types
34
+ T = TypeVar("T", bound="GrpcClientBase")
35
+
36
+ __all__ = ["GrpcClientBase", "ConnectionKey", "MetadataDict"]
37
+
38
+
39
+ @dataclass(frozen=True)
40
+ class ConnectionKey:
41
+ """
42
+ Immutable key for connection identification.
43
+ """
44
+
45
+ host: str
46
+ port: int
47
+ secure: bool = False
48
+
49
+ def __str__(self) -> str:
50
+ scheme = "secure" if self.secure else "insecure"
51
+ return f"{scheme}://{self.host}:{self.port}"
52
+
53
+
54
+ class GrpcClientBase(abc.ABC):
55
+ """
56
+ Abstract base class for all gRPC clients.
57
+
58
+ This class provides standardized channel lifecycle management and
59
+ common utilities for all gRPC client implementations.
60
+ """
61
+
62
+ #: Default host for the gRPC service
63
+ DEFAULT_HOST: str = "localhost"
64
+
65
+ #: Default port for the gRPC service
66
+ DEFAULT_PORT: int = 50051
67
+
68
+ #: Default channel options for gRPC connections
69
+ DEFAULT_CHANNEL_OPTIONS: dict[str, Any] = {
70
+ "_keepalive_time": 30.0, # 30s
71
+ "_keepalive_timeout": 20.0, # 20s
72
+ "_keepalive_permit_without_calls": False,
73
+ "_http2_max_pings_without_data": 2,
74
+ "_http2_min_sent_ping_interval_without_data": 300.0, # 300s
75
+ "http2_connection_window_size": 4194304, # 4MB
76
+ "http2_stream_window_size": 4194304, # 4MB
77
+ }
78
+
79
+ #: Default metadata to include in each request
80
+ DEFAULT_METADATA: MetadataDict = {}
81
+
82
+ #: Default retry policy for regular methods
83
+ DEFAULT_RETRY_POLICY: RetryPolicy = DefaultRetryPolicy(
84
+ max_retries=3,
85
+ initial_delay=0.2,
86
+ max_delay=5.0,
87
+ backoff_factor=2.0,
88
+ jitter_factor=0.2,
89
+ )
90
+
91
+ #: Default retry policy for streaming methods
92
+ DEFAULT_STREAMING_RETRY_POLICY: RetryPolicy = StreamingRetryPolicy(
93
+ max_retries=3,
94
+ initial_delay=0.2,
95
+ max_delay=5.0,
96
+ backoff_factor=2.0,
97
+ jitter_factor=0.2,
98
+ retry_if_no_items_processed=True,
99
+ )
100
+
101
+ @abc.abstractmethod
102
+ def _get_stub_class(self) -> Type[ServiceStub]:
103
+ """
104
+ Get the stub class for this client.
105
+
106
+ This method must be implemented by all subclasses to return
107
+ the appropriate stub class.
108
+
109
+ Returns
110
+ -------
111
+ Type[ServiceStub]
112
+ The stub class
113
+ """
114
+ pass
115
+
116
+ @property
117
+ def ssl_context(self):
118
+ """
119
+ Get the SSL context for the client.
120
+
121
+ Returns SSL context for secure connections, None for insecure.
122
+ """
123
+ if not self.secure:
124
+ return None
125
+
126
+ if hasattr(self, "_ssl_context"):
127
+ return self._ssl_context
128
+
129
+ # If no custom cert folder is specified and connecting to port 443,
130
+ # use system default SSL context (for public HTTPS/gRPC endpoints)
131
+ if self.cert_folder is None and self.port == 443:
132
+ import certifi
133
+
134
+ self.tracer.info(
135
+ f"Using system default SSL context for public endpoint {self.host}:443"
136
+ )
137
+ # Create system default SSL context that trusts system CA bundle (including Let's Encrypt)
138
+ self._ssl_context = ssl.create_default_context(
139
+ ssl.Purpose.SERVER_AUTH, cafile=certifi.where()
140
+ )
141
+ # CRITICAL: Enable HTTP/2 via ALPN for gRPC over TLS
142
+ # This is required for gRPC connections through NGINX Ingress
143
+ self._ssl_context.set_alpn_protocols(["h2"])
144
+ return self._ssl_context
145
+
146
+ # Create SSL context based on custom certificate configuration
147
+ from .cert_loader import create_ssl_context
148
+
149
+ self._ssl_context = create_ssl_context(
150
+ component_name=self.component_name,
151
+ environment=self.environment,
152
+ cert_folder=self.cert_folder,
153
+ verify_mode=(
154
+ ssl.CERT_REQUIRED if self.environment != "dev" else ssl.CERT_NONE
155
+ ),
156
+ )
157
+
158
+ return self._ssl_context
159
+
160
+ @property
161
+ def metadata(self) -> MetadataDict:
162
+ """
163
+ Get metadata for gRPC requests.
164
+
165
+ Subclasses can override this property to provide custom metadata.
166
+ Similar to ssl_context property pattern.
167
+
168
+ Returns
169
+ -------
170
+ MetadataDict
171
+ Metadata dictionary for gRPC requests
172
+ """
173
+ return {}
174
+
175
+ def __init__(
176
+ self,
177
+ host: Optional[str] = None,
178
+ port: Optional[int] = None,
179
+ secure: bool = False,
180
+ channel_options: Optional[dict[str, Any]] = None,
181
+ tracer: Optional[Union[Tracer, Logger]] = None,
182
+ retry_policy: Optional[RetryPolicy] = None,
183
+ streaming_retry_policy: Optional[RetryPolicy] = None,
184
+ component_name: str = "manta-sdk",
185
+ environment: Optional[str] = None,
186
+ cert_folder: Optional[Union[str, Path]] = None,
187
+ ):
188
+ """
189
+ Initialize the gRPC client.
190
+
191
+ Parameters
192
+ ----------
193
+ host : Optional[str]
194
+ The host to connect to
195
+ port : Optional[int]
196
+ The port to connect to
197
+ secure : bool
198
+ Whether to use a secure channel
199
+ channel_options : Optional[dict[str, Any]]
200
+ Optional channel configuration
201
+ tracer : Optional[Union[Tracer, Logger]]
202
+ Logger to use for tracing (if None, a default logger will be created)
203
+ retry_policy : Optional[RetryPolicy]
204
+ Retry policy for regular methods
205
+ streaming_retry_policy : Optional[RetryPolicy]
206
+ Retry policy for streaming methods
207
+ component_name : Optional[str]
208
+ Component name for certificate loading (e.g., 'manta-sdk', 'manta-node')
209
+ environment : Optional[str]
210
+ Environment name for certificate loading (e.g., 'dev', 'staging', 'prod')
211
+ cert_folder : Optional[Union[str, Path]]
212
+ Custom certificate folder path
213
+ """
214
+ self.host = host or self.DEFAULT_HOST
215
+ self.port = port or self.DEFAULT_PORT
216
+ self.secure = secure
217
+
218
+ # Certificate configuration
219
+ self.component_name = component_name
220
+ self.environment = environment
221
+ self.cert_folder = cert_folder
222
+
223
+ final_channel_options = self.DEFAULT_CHANNEL_OPTIONS.copy()
224
+ if channel_options:
225
+ final_channel_options.update(channel_options)
226
+ self.config = Configuration(**final_channel_options)
227
+
228
+ self.connection_key = ConnectionKey(
229
+ host=self.host, port=self.port, secure=self.secure
230
+ )
231
+
232
+ self._channel = None
233
+ self._is_connected = False
234
+
235
+ # Initialize tracer from provided logger or create a new one
236
+ if tracer is None:
237
+ self.tracer = Tracer(
238
+ getLogger(f"{self.__class__.__module__}.{self.__class__.__name__}")
239
+ )
240
+ else:
241
+ self.tracer = tracer
242
+
243
+ self.tracer.debug(f"Initialized client for {self.connection_key}")
244
+
245
+ # Set up retry policies
246
+ self.retry_policy = retry_policy or self.DEFAULT_RETRY_POLICY
247
+ self.streaming_retry_policy = (
248
+ streaming_retry_policy or self.DEFAULT_STREAMING_RETRY_POLICY
249
+ )
250
+
251
+ # Initialize connection-related attributes
252
+ self._active_calls: list[asyncio.Task] = []
253
+
254
+ async def connect(self) -> Channel:
255
+ """
256
+ Connect to the gRPC service and create a stub.
257
+
258
+ This method should be called before making any service calls.
259
+ If already connected, returns the existing channel.
260
+
261
+ Returns
262
+ -------
263
+ Channel
264
+ The gRPC channel for making RPC calls
265
+
266
+ Examples
267
+ --------
268
+ >>> client = MyServiceClient("localhost", 50051)
269
+ >>> channel = await client.connect()
270
+ >>> await client.disconnect()
271
+ """
272
+ if self.is_connected and self._channel is not None:
273
+ self.tracer.debug(f"Already connected to {self.connection_key}")
274
+ return self._channel
275
+
276
+ self.tracer.debug(f"Connecting to {self.connection_key}")
277
+ self._channel = Channel(
278
+ self.host, self.port, config=self.config, ssl=self.ssl_context
279
+ )
280
+ self._is_connected = True
281
+ return self._channel
282
+
283
+ async def disconnect(self) -> None:
284
+ """
285
+ Disconnect from the gRPC service and clean up resources.
286
+
287
+ This method should be called after all service calls are completed.
288
+ """
289
+ if not self.is_connected and self._channel is None:
290
+ self.tracer.debug(
291
+ f"Already disconnected or channel is None for {self.connection_key}."
292
+ )
293
+ return
294
+
295
+ self.tracer.debug(f"Disconnecting from {self.connection_key}...")
296
+
297
+ # Asynchronously cancel active calls and allow them to process
298
+ # Create a list of tasks that are not done yet to attempt cancellation
299
+ tasks_to_cancel = [task for task in self._active_calls if not task.done()]
300
+
301
+ if tasks_to_cancel:
302
+ self.tracer.debug(
303
+ f"Attempting to cancel {len(tasks_to_cancel)} active call(s) for {self.connection_key}."
304
+ )
305
+ for task in tasks_to_cancel:
306
+ task.cancel() # Schedule cancellation
307
+
308
+ # Wait for the tasks to acknowledge cancellation or complete WITH TIMEOUT
309
+ # This allows tasks to run their cleanup code (e.g., finally blocks)
310
+ task_cleanup_timeout = 2.0 # 2 seconds to complete cancellation
311
+ try:
312
+ results = await asyncio.wait_for(
313
+ asyncio.gather(*tasks_to_cancel, return_exceptions=True),
314
+ timeout=task_cleanup_timeout,
315
+ )
316
+ for i, result in enumerate(results):
317
+ task_ref = tasks_to_cancel[i]
318
+ if isinstance(result, asyncio.CancelledError):
319
+ self.tracer.debug(
320
+ f"Task {task_ref} was successfully cancelled."
321
+ )
322
+ elif isinstance(result, Exception):
323
+ self.tracer.error(
324
+ f"Task {task_ref} raised an exception during/after cancellation: {result}",
325
+ exc_info=result,
326
+ ) # Log with exc_info if it's an actual exception
327
+ else:
328
+ self.tracer.debug(
329
+ f"Task {task_ref} completed (result: {result}) before cancellation fully processed or was not cancelled."
330
+ )
331
+ except asyncio.TimeoutError:
332
+ self.tracer.warning(
333
+ f"Task cancellation timed out after {task_cleanup_timeout}s for {self.connection_key}. "
334
+ f"Forcefully clearing {len(tasks_to_cancel)} task(s)."
335
+ )
336
+
337
+ # Clear the list of active calls after attempting to process them
338
+ self._active_calls.clear()
339
+
340
+ # CRITICAL FIX: channel.close() is SYNCHRONOUS and can block the event loop
341
+ # in selectors.select() when there are pending operations.
342
+ # Run it in a thread executor with aggressive timeout to prevent hanging.
343
+ if self._channel:
344
+ self.tracer.debug(f"Closing channel for {self.connection_key}...")
345
+ loop = asyncio.get_running_loop()
346
+ try:
347
+ # Run synchronous channel.close() in executor thread with 500ms timeout
348
+ await asyncio.wait_for(
349
+ loop.run_in_executor(None, self._channel.close),
350
+ timeout=0.5, # Fail fast - don't wait more than 500ms
351
+ )
352
+ self.tracer.debug(
353
+ f"Channel closed successfully for {self.connection_key}"
354
+ )
355
+ except asyncio.TimeoutError:
356
+ # Channel close timed out - likely blocked in selectors.select()
357
+ # Don't wait - let OS cleanup the socket when process exits
358
+ self.tracer.warning(
359
+ f"Channel close timed out after 500ms for {self.connection_key}. "
360
+ "Proceeding without waiting - OS will cleanup socket on process exit."
361
+ )
362
+ except Exception as e:
363
+ self.tracer.warning(
364
+ f"Warning closing channel for {self.connection_key}: {e}. "
365
+ "Proceeding - OS will cleanup socket on process exit."
366
+ )
367
+ finally:
368
+ # Always reset state, even if close failed
369
+ self._channel = None
370
+ self._is_connected = False
371
+
372
+ self.tracer.debug(f"Disconnect completed for {self.connection_key}")
373
+
374
+ def _finalize_cleanup_resources(self) -> None:
375
+ """
376
+ Synchronously attempts to cancel active calls and close the channel.
377
+ This method is designed to be safe to call multiple times and from __del__.
378
+ """
379
+ # Request cancellation of active calls
380
+ if self._active_calls:
381
+ self.tracer.debug(
382
+ f"Requesting cancellation for {len(self._active_calls)} active call(s) during cleanup for {self.connection_key}."
383
+ )
384
+ for task in list(self._active_calls): # Iterate over a copy
385
+ if not task.done():
386
+ try:
387
+ task.cancel()
388
+ # Note: Actual cancellation processing depends on the event loop.
389
+ except Exception as e:
390
+ self.tracer.error(
391
+ f"Error requesting task cancellation for {task} during cleanup: {e}",
392
+ exc_info=True,
393
+ )
394
+ self._active_calls.clear()
395
+
396
+ # Close the channel
397
+ if self._channel:
398
+ self.tracer.debug(
399
+ f"Closing channel for {self.connection_key} during cleanup."
400
+ )
401
+ try:
402
+ self._channel.close() # This is synchronous
403
+ except Exception as e:
404
+ self.tracer.error(
405
+ f"Error closing channel for {self.connection_key} during cleanup: {e}",
406
+ exc_info=True,
407
+ )
408
+ finally:
409
+ self._is_connected = False
410
+
411
+ # Reset state
412
+ self._channel = None
413
+ self._is_connected = False
414
+ self.tracer.debug(f"Resource cleanup finalized for {self.connection_key}.")
415
+
416
+ def __del__(self):
417
+ """
418
+ Destructor to attempt resource cleanup as a last resort.
419
+ It's best practice to explicitly call `disconnect()` or use the async context manager.
420
+ """
421
+ # Check if Python is shutting down - if so, skip cleanup to avoid ImportError
422
+ try:
423
+ if sys.meta_path is None:
424
+ return
425
+ except ImportError:
426
+ # Python is shutting down, skip cleanup
427
+ return
428
+
429
+ # Check if object is properly initialized before accessing attributes
430
+ # During testing or shutdown, attributes may not exist
431
+ try:
432
+ is_connected = self.is_connected
433
+ active_calls = self._active_calls
434
+ except AttributeError:
435
+ # Object not fully initialized, skip cleanup
436
+ return
437
+
438
+ if is_connected or active_calls:
439
+ try:
440
+ self.tracer.warning(
441
+ f"Client for {self.connection_key} was not explicitly disconnected. "
442
+ f"Attempting synchronous cleanup in __del__. Active calls: {len(self._active_calls)}"
443
+ )
444
+ except (ImportError, AttributeError, ValueError):
445
+ # During shutdown, modules might be unavailable or logging streams closed
446
+ # ValueError: I/O operation on closed file (during pytest teardown)
447
+ pass
448
+
449
+ # Check if an event loop is available and running
450
+ # If not, skip cleanup to avoid blocking on channel.close()
451
+ try:
452
+ asyncio.get_running_loop()
453
+ # Event loop exists, safe to cleanup
454
+ self._finalize_cleanup_resources()
455
+ except RuntimeError: # No running event loop
456
+ try:
457
+ self.tracer.warning(
458
+ f"No running event loop detected in __del__ for {self.connection_key}. "
459
+ "Skipping channel cleanup to avoid blocking. Python shutdown will handle resource cleanup."
460
+ )
461
+ except (ImportError, AttributeError, ValueError):
462
+ # During shutdown, logging streams might be closed
463
+ pass
464
+ # DO NOT call _finalize_cleanup_resources() without an event loop
465
+ # channel.close() will block indefinitely in selectors.select()
466
+ # Let Python's shutdown mechanism handle the cleanup
467
+
468
+ @property
469
+ def is_connected(self) -> bool:
470
+ """
471
+ Check if the client is connected.
472
+
473
+ Returns
474
+ -------
475
+ bool
476
+ True if connected, False otherwise
477
+ """
478
+ return self._is_connected
479
+
480
+ async def __aenter__(self) -> "GrpcClientBase":
481
+ """
482
+ Enter the context manager.
483
+
484
+ Returns
485
+ -------
486
+ GrpcClientBase
487
+ The client instance
488
+ """
489
+ await self.connect()
490
+ return self
491
+
492
+ async def __aexit__(self, exc_type, exc_val, exc_tb) -> None:
493
+ """
494
+ Exit the context manager and clean up resources.
495
+
496
+ This method ensures the connection is properly closed even if an
497
+ exception is raised within the context.
498
+ """
499
+ await self.disconnect()
500
+
501
+ @contextlib.asynccontextmanager
502
+ async def with_stub(self):
503
+ """
504
+ Context manager for executing operations with a managed stub.
505
+
506
+ Yields
507
+ ------
508
+ StubT
509
+ The service stub for making RPC calls
510
+
511
+ Examples
512
+ --------
513
+ >>> client = MyServiceClient("localhost", 50051)
514
+ >>> async with client.with_stub("get_data") as stub:
515
+ ... response = await stub.get_data(request)
516
+ """
517
+ stub_class = self._get_stub_class()
518
+ channel = await self.connect()
519
+ stub = stub_class(channel)
520
+ yield stub
521
+
522
+ async def _ensure_connected(self) -> None:
523
+ """
524
+ Ensure that the client is connected to the server.
525
+
526
+ This internal method is used by service methods to ensure that a connection
527
+ exists before making service calls.
528
+
529
+ Returns
530
+ -------
531
+ None
532
+
533
+ Raises
534
+ ------
535
+ ConnectionError
536
+ If a connection cannot be established
537
+ """
538
+ if not self.is_connected:
539
+ await self.connect()
540
+
541
+ async def _reset_channel(self) -> None:
542
+ """Close the dead channel and clear state so the next call reconnects."""
543
+ if self._channel:
544
+ loop = asyncio.get_running_loop()
545
+ try:
546
+ await asyncio.wait_for(
547
+ loop.run_in_executor(None, self._channel.close),
548
+ timeout=0.5,
549
+ )
550
+ except (asyncio.TimeoutError, Exception) as e:
551
+ self.tracer.debug(f"Channel close during GOAWAY reset: {e}")
552
+ finally:
553
+ self._channel = None
554
+ self._is_connected = False
555
+ else:
556
+ self._is_connected = False
557
+
558
+ async def health_check(self) -> bool:
559
+ """
560
+ Check if the service is healthy and responding.
561
+
562
+ This is a basic implementation that simply checks if a connection can be made.
563
+ Subclasses may override to implement service-specific health checks.
564
+
565
+ Returns
566
+ -------
567
+ bool
568
+ True if the service is healthy, False otherwise
569
+ """
570
+ try:
571
+ await self._ensure_connected()
572
+ return True
573
+ except Exception as e:
574
+ self.tracer.debug(f"Health check failed: {e}")
575
+ return False
576
+
577
+ def create_metadata(
578
+ self, extra_metadata: Optional[MetadataDict] = None
579
+ ) -> MetadataDict:
580
+ """
581
+ Create metadata for a gRPC call by merging the default metadata with any extra metadata.
582
+
583
+ Parameters
584
+ ----------
585
+ extra_metadata : Optional[MetadataDict]
586
+ Extra metadata to add to the call
587
+
588
+ Returns
589
+ -------
590
+ MetadataDict
591
+ The merged metadata
592
+ """
593
+ # Start with metadata from the property
594
+ md = self.metadata.copy() if self.metadata else {}
595
+
596
+ # Add extra metadata
597
+ if extra_metadata:
598
+ md.update(extra_metadata)
599
+
600
+ return md
601
+
602
+ @contextlib.asynccontextmanager
603
+ async def _track_service_call(self):
604
+ """
605
+ Context manager to track active service calls.
606
+
607
+ This allows for graceful cancellation of ongoing calls during disconnect.
608
+
609
+ Yields
610
+ ------
611
+ None
612
+ """
613
+ # Get the current task
614
+ current_task = asyncio.current_task()
615
+ if current_task is not None:
616
+ self._active_calls.append(current_task)
617
+
618
+ try:
619
+ yield
620
+ finally:
621
+ # Remove the task from active calls if it exists
622
+ if current_task is not None and current_task in self._active_calls:
623
+ self._active_calls.remove(current_task)
624
+
625
+ @classmethod
626
+ async def create(
627
+ cls: Type[T],
628
+ host: Optional[str] = None,
629
+ port: Optional[int] = None,
630
+ **kwargs,
631
+ ) -> T:
632
+ """
633
+ Factory method to create and connect a client.
634
+
635
+ Parameters
636
+ ----------
637
+ host : Optional[str]
638
+ The hostname of the gRPC server
639
+ port : Optional[int]
640
+ The port number of the gRPC server
641
+ **kwargs
642
+ Additional arguments to pass to the constructor
643
+
644
+ Returns
645
+ -------
646
+ GrpcClientBase
647
+ A connected client instance
648
+ """
649
+ client = cls(host=host, port=port, **kwargs)
650
+ await client.connect()
651
+ return client
652
+
653
+ async def call_service_method(
654
+ self,
655
+ method_name: str,
656
+ request: Any,
657
+ metadata: Optional[MetadataDict] = None,
658
+ retry_policy: Optional[RetryPolicy] = None,
659
+ error_class: Optional[Type[MantaError]] = None,
660
+ include_operation: bool = True,
661
+ include_request: bool = True,
662
+ include_metadata: bool = True,
663
+ log_call_stack: bool = False,
664
+ ) -> Any:
665
+ """
666
+ Call a service method with retries.
667
+
668
+ This method handles connecting to the service, execution with retries,
669
+ and proper error handling.
670
+
671
+ Parameters
672
+ ----------
673
+ method_name : str
674
+ The name of the method to call
675
+ request : Any
676
+ The request object to pass to the method
677
+ metadata : Optional[MetadataDict]
678
+ Additional metadata to include with the request
679
+ retry_policy : Optional[RetryPolicy]
680
+ Retry policy to use (falls back to client's policy if None)
681
+ error_class : Optional[Type[Exception]]
682
+ The exception class to wrap the error in
683
+ include_operation : bool
684
+ Whether to include the operation name in the log
685
+ include_request : bool
686
+ Whether to include the request in the log
687
+ log_call_stack : bool
688
+ Whether to log the call stack
689
+
690
+ Returns
691
+ -------
692
+ Message
693
+ The response from the service method
694
+
695
+ Raises
696
+ ------
697
+ MantaError
698
+ If the service call fails after retries
699
+ """
700
+ # Set up retry policy from parameters or defaults
701
+ policy = retry_policy or self.retry_policy
702
+
703
+ @with_retry(
704
+ tracer=self.tracer,
705
+ policy=policy,
706
+ operation_name=f"{self.__class__.__name__}.{method_name}",
707
+ )
708
+ async def _execute_call():
709
+ try:
710
+ async with self._track_service_call():
711
+ async with self.with_stub() as stub:
712
+ method = getattr(stub, method_name)
713
+
714
+ final_metadata = self.create_metadata(metadata)
715
+
716
+ # Log information about the request
717
+ message = f"Calling {method_name}"
718
+ if include_request:
719
+ message += f" with {request}"
720
+ if include_metadata:
721
+ message += f" with metadata {final_metadata}"
722
+ if include_operation:
723
+ self.tracer.debug(message)
724
+
725
+ result = await method(request, metadata=final_metadata)
726
+
727
+ # Log completion of the call
728
+ if include_operation:
729
+ self.tracer.debug(f"Completed {method_name}")
730
+
731
+ return result
732
+ except StreamTerminatedError:
733
+ self.tracer.warning(
734
+ f"GOAWAY received during {method_name}, resetting channel for reconnect"
735
+ )
736
+ await self._reset_channel()
737
+ raise
738
+ except GRPCError as e:
739
+ # Add enhanced error logging
740
+ self.tracer.error(
741
+ f"GRPC Error in {method_name}: status={e.status}, message={e.message}"
742
+ )
743
+ if hasattr(e, "details") and e.details:
744
+ self.tracer.error(f"Error details: {e.details}")
745
+
746
+ # Include stack trace
747
+ if log_call_stack:
748
+ self.tracer.error(f"Call stack: {traceback.format_exc()}")
749
+
750
+ raise wrap_exception(
751
+ e,
752
+ message=f"Error in {method_name}",
753
+ error_class=error_class,
754
+ )
755
+ except Exception as e:
756
+ self.tracer.exception(f"Unexpected error in {method_name}: {str(e)}")
757
+ if log_call_stack:
758
+ self.tracer.error(f"Call stack: {traceback.format_exc()}")
759
+ raise wrap_exception(
760
+ e,
761
+ message=f"Error in {method_name}",
762
+ error_class=error_class,
763
+ )
764
+
765
+ return await _execute_call()
766
+
767
+ async def stream_service_method(
768
+ self,
769
+ method_name: str,
770
+ request: Any,
771
+ metadata: Optional[MetadataDict] = None,
772
+ retry_policy: Optional[RetryPolicy] = None,
773
+ error_class: Optional[Type[MantaError]] = None,
774
+ log_call_stack: bool = False,
775
+ include_operation: bool = True,
776
+ include_request: bool = True,
777
+ include_metadata: bool = True,
778
+ ) -> AsyncIterator[Any]:
779
+ """
780
+ Execute a streaming service method with retry and error handling.
781
+
782
+ This helper method simplifies making streaming service calls with proper
783
+ connection management, error handling, and retry logic.
784
+
785
+ Parameters
786
+ ----------
787
+ method_name : str
788
+ The name of the service method to call
789
+ request : Any
790
+ The request object to pass to the method
791
+ metadata : Optional[MetadataDict]
792
+ Additional metadata to include in the request
793
+ retry_policy : Optional[RetryPolicy]
794
+ Custom retry policy for this call (uses instance streaming policy if None)
795
+ error_class : Optional[Type[MantaError]]
796
+ The exception class to wrap the error in
797
+ log_call_stack : bool
798
+ Whether to log the call stack
799
+ include_operation : bool
800
+ Whether to include the operation name in the log
801
+ include_request : bool
802
+ Whether to include the request in the log
803
+ include_metadata : bool
804
+ Whether to include the metadata in the log
805
+
806
+ Yields
807
+ ------
808
+ Any
809
+ Each response item from the streaming service method
810
+
811
+ Raises
812
+ ------
813
+ MantaError
814
+ If an error occurs during the service call
815
+ """
816
+ # Use the specified retry policy or the client's default for streaming
817
+ policy = retry_policy or self.streaming_retry_policy
818
+
819
+ # Execute the call with streaming retry
820
+ @with_streaming_retry(
821
+ tracer=self.tracer,
822
+ policy=policy,
823
+ operation_name=f"{self.__class__.__name__}.{method_name}",
824
+ )
825
+ async def _execute_streaming_call() -> AsyncIterator[Any]:
826
+ async with self._track_service_call():
827
+ try:
828
+ async with self.with_stub() as stub:
829
+ method = getattr(stub, method_name)
830
+
831
+ final_metadata = self.create_metadata(metadata)
832
+
833
+ # Log information about the request
834
+ message = f"Streaming {method_name}"
835
+ if include_request:
836
+ message += f" with {request}"
837
+ if include_metadata:
838
+ message += f" with metadata {final_metadata}"
839
+ if include_operation:
840
+ self.tracer.debug(message)
841
+
842
+ response_count = 0
843
+
844
+ async for response in method(request, metadata=final_metadata):
845
+ self.tracer.debug(f"Received response from {method_name}")
846
+ response_count += 1
847
+ yield response
848
+
849
+ # Log completion of the call
850
+ if include_operation:
851
+ self.tracer.debug(
852
+ f"Completed streaming {method_name} with {response_count} items"
853
+ )
854
+
855
+ except StreamTerminatedError:
856
+ self.tracer.warning(
857
+ f"GOAWAY received during streaming {method_name}, resetting channel for reconnect"
858
+ )
859
+ await self._reset_channel()
860
+ raise
861
+ except GRPCError as e:
862
+ # Add enhanced error logging
863
+ self.tracer.error(
864
+ f"GRPC Error in streaming {method_name}: status={e.status}, message={e.message}"
865
+ )
866
+ if hasattr(e, "details") and e.details:
867
+ self.tracer.error(f"Error details: {e.details}")
868
+
869
+ # Include stack trace
870
+ if log_call_stack:
871
+ self.tracer.error(f"Call stack: {traceback.format_exc()}")
872
+
873
+ raise wrap_exception(
874
+ e,
875
+ message=f"Error in streaming {method_name}",
876
+ error_class=error_class,
877
+ )
878
+ except Exception as e:
879
+ self.tracer.exception(
880
+ f"Unexpected error in {method_name}: {str(e)}"
881
+ )
882
+ if log_call_stack:
883
+ self.tracer.error(f"Call stack: {traceback.format_exc()}")
884
+ raise wrap_exception(
885
+ e,
886
+ message=f"Error in streaming {method_name}",
887
+ error_class=error_class,
888
+ )
889
+
890
+ # Get all responses and yield them one by one
891
+ async for response in _execute_streaming_call():
892
+ yield response