manta-common-core 0.5b0.dev3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- manta_common/base_client.py +892 -0
- manta_common/base_mqtt.py +208 -0
- manta_common/build/common/cluster/__init__.py +48 -0
- manta_common/build/common/informations/__init__.py +247 -0
- manta_common/build/common/mqtt/__init__.py +42 -0
- manta_common/build/common/permissions/__init__.py +312 -0
- manta_common/build/common/results/__init__.py +92 -0
- manta_common/build/common/swarms/__init__.py +70 -0
- manta_common/build/common/system/__init__.py +159 -0
- manta_common/build/common/tasks/__init__.py +120 -0
- manta_common/build/common/user/__init__.py +60 -0
- manta_common/build/common/wireguard/__init__.py +111 -0
- manta_common/build/core/__init__.py +0 -0
- manta_common/build/core/user_services/__init__.py +2436 -0
- manta_common/build/node/__init__.py +0 -0
- manta_common/build/node/light_service/__init__.py +505 -0
- manta_common/build/node/node_service/__init__.py +805 -0
- manta_common/cert_loader.py +383 -0
- manta_common/const.py +21 -0
- manta_common/conversions.py +544 -0
- manta_common/decorators.py +528 -0
- manta_common/errors.py +530 -0
- manta_common/event_loop.py +47 -0
- manta_common/logging_config.py +147 -0
- manta_common/retry.py +596 -0
- manta_common/traces.py +608 -0
- manta_common_core-0.5b0.dev3.dist-info/METADATA +46 -0
- manta_common_core-0.5b0.dev3.dist-info/RECORD +30 -0
- manta_common_core-0.5b0.dev3.dist-info/WHEEL +5 -0
- manta_common_core-0.5b0.dev3.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,892 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Base Client Architecture for gRPC services.
|
|
3
|
+
|
|
4
|
+
This module provides the abstract base class for all gRPC clients in the system,
|
|
5
|
+
standardizing connection management, stub creation, and resource cleanup.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import abc
|
|
9
|
+
import asyncio
|
|
10
|
+
import contextlib
|
|
11
|
+
import ssl
|
|
12
|
+
import sys
|
|
13
|
+
import traceback
|
|
14
|
+
from dataclasses import dataclass
|
|
15
|
+
from logging import Logger, getLogger
|
|
16
|
+
from pathlib import Path
|
|
17
|
+
from typing import Any # TypeAlias, # need 3.10 or use type in 3.12
|
|
18
|
+
from typing import AsyncIterator, Optional, Type, TypeVar, Union
|
|
19
|
+
|
|
20
|
+
from betterproto import ServiceStub
|
|
21
|
+
from grpclib.client import Channel
|
|
22
|
+
from grpclib.config import Configuration
|
|
23
|
+
from grpclib.exceptions import GRPCError, StreamTerminatedError
|
|
24
|
+
|
|
25
|
+
from .decorators import with_retry, with_streaming_retry
|
|
26
|
+
from .errors import MantaError, wrap_exception
|
|
27
|
+
from .retry import DefaultRetryPolicy, RetryPolicy, StreamingRetryPolicy
|
|
28
|
+
from .traces import Tracer
|
|
29
|
+
|
|
30
|
+
# Type for metadata
|
|
31
|
+
MetadataDict = dict[str, str]
|
|
32
|
+
|
|
33
|
+
# TypeVar for self-referencing return types
|
|
34
|
+
T = TypeVar("T", bound="GrpcClientBase")
|
|
35
|
+
|
|
36
|
+
__all__ = ["GrpcClientBase", "ConnectionKey", "MetadataDict"]
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
@dataclass(frozen=True)
|
|
40
|
+
class ConnectionKey:
|
|
41
|
+
"""
|
|
42
|
+
Immutable key for connection identification.
|
|
43
|
+
"""
|
|
44
|
+
|
|
45
|
+
host: str
|
|
46
|
+
port: int
|
|
47
|
+
secure: bool = False
|
|
48
|
+
|
|
49
|
+
def __str__(self) -> str:
|
|
50
|
+
scheme = "secure" if self.secure else "insecure"
|
|
51
|
+
return f"{scheme}://{self.host}:{self.port}"
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
class GrpcClientBase(abc.ABC):
|
|
55
|
+
"""
|
|
56
|
+
Abstract base class for all gRPC clients.
|
|
57
|
+
|
|
58
|
+
This class provides standardized channel lifecycle management and
|
|
59
|
+
common utilities for all gRPC client implementations.
|
|
60
|
+
"""
|
|
61
|
+
|
|
62
|
+
#: Default host for the gRPC service
|
|
63
|
+
DEFAULT_HOST: str = "localhost"
|
|
64
|
+
|
|
65
|
+
#: Default port for the gRPC service
|
|
66
|
+
DEFAULT_PORT: int = 50051
|
|
67
|
+
|
|
68
|
+
#: Default channel options for gRPC connections
|
|
69
|
+
DEFAULT_CHANNEL_OPTIONS: dict[str, Any] = {
|
|
70
|
+
"_keepalive_time": 30.0, # 30s
|
|
71
|
+
"_keepalive_timeout": 20.0, # 20s
|
|
72
|
+
"_keepalive_permit_without_calls": False,
|
|
73
|
+
"_http2_max_pings_without_data": 2,
|
|
74
|
+
"_http2_min_sent_ping_interval_without_data": 300.0, # 300s
|
|
75
|
+
"http2_connection_window_size": 4194304, # 4MB
|
|
76
|
+
"http2_stream_window_size": 4194304, # 4MB
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
#: Default metadata to include in each request
|
|
80
|
+
DEFAULT_METADATA: MetadataDict = {}
|
|
81
|
+
|
|
82
|
+
#: Default retry policy for regular methods
|
|
83
|
+
DEFAULT_RETRY_POLICY: RetryPolicy = DefaultRetryPolicy(
|
|
84
|
+
max_retries=3,
|
|
85
|
+
initial_delay=0.2,
|
|
86
|
+
max_delay=5.0,
|
|
87
|
+
backoff_factor=2.0,
|
|
88
|
+
jitter_factor=0.2,
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
#: Default retry policy for streaming methods
|
|
92
|
+
DEFAULT_STREAMING_RETRY_POLICY: RetryPolicy = StreamingRetryPolicy(
|
|
93
|
+
max_retries=3,
|
|
94
|
+
initial_delay=0.2,
|
|
95
|
+
max_delay=5.0,
|
|
96
|
+
backoff_factor=2.0,
|
|
97
|
+
jitter_factor=0.2,
|
|
98
|
+
retry_if_no_items_processed=True,
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
@abc.abstractmethod
|
|
102
|
+
def _get_stub_class(self) -> Type[ServiceStub]:
|
|
103
|
+
"""
|
|
104
|
+
Get the stub class for this client.
|
|
105
|
+
|
|
106
|
+
This method must be implemented by all subclasses to return
|
|
107
|
+
the appropriate stub class.
|
|
108
|
+
|
|
109
|
+
Returns
|
|
110
|
+
-------
|
|
111
|
+
Type[ServiceStub]
|
|
112
|
+
The stub class
|
|
113
|
+
"""
|
|
114
|
+
pass
|
|
115
|
+
|
|
116
|
+
@property
|
|
117
|
+
def ssl_context(self):
|
|
118
|
+
"""
|
|
119
|
+
Get the SSL context for the client.
|
|
120
|
+
|
|
121
|
+
Returns SSL context for secure connections, None for insecure.
|
|
122
|
+
"""
|
|
123
|
+
if not self.secure:
|
|
124
|
+
return None
|
|
125
|
+
|
|
126
|
+
if hasattr(self, "_ssl_context"):
|
|
127
|
+
return self._ssl_context
|
|
128
|
+
|
|
129
|
+
# If no custom cert folder is specified and connecting to port 443,
|
|
130
|
+
# use system default SSL context (for public HTTPS/gRPC endpoints)
|
|
131
|
+
if self.cert_folder is None and self.port == 443:
|
|
132
|
+
import certifi
|
|
133
|
+
|
|
134
|
+
self.tracer.info(
|
|
135
|
+
f"Using system default SSL context for public endpoint {self.host}:443"
|
|
136
|
+
)
|
|
137
|
+
# Create system default SSL context that trusts system CA bundle (including Let's Encrypt)
|
|
138
|
+
self._ssl_context = ssl.create_default_context(
|
|
139
|
+
ssl.Purpose.SERVER_AUTH, cafile=certifi.where()
|
|
140
|
+
)
|
|
141
|
+
# CRITICAL: Enable HTTP/2 via ALPN for gRPC over TLS
|
|
142
|
+
# This is required for gRPC connections through NGINX Ingress
|
|
143
|
+
self._ssl_context.set_alpn_protocols(["h2"])
|
|
144
|
+
return self._ssl_context
|
|
145
|
+
|
|
146
|
+
# Create SSL context based on custom certificate configuration
|
|
147
|
+
from .cert_loader import create_ssl_context
|
|
148
|
+
|
|
149
|
+
self._ssl_context = create_ssl_context(
|
|
150
|
+
component_name=self.component_name,
|
|
151
|
+
environment=self.environment,
|
|
152
|
+
cert_folder=self.cert_folder,
|
|
153
|
+
verify_mode=(
|
|
154
|
+
ssl.CERT_REQUIRED if self.environment != "dev" else ssl.CERT_NONE
|
|
155
|
+
),
|
|
156
|
+
)
|
|
157
|
+
|
|
158
|
+
return self._ssl_context
|
|
159
|
+
|
|
160
|
+
@property
|
|
161
|
+
def metadata(self) -> MetadataDict:
|
|
162
|
+
"""
|
|
163
|
+
Get metadata for gRPC requests.
|
|
164
|
+
|
|
165
|
+
Subclasses can override this property to provide custom metadata.
|
|
166
|
+
Similar to ssl_context property pattern.
|
|
167
|
+
|
|
168
|
+
Returns
|
|
169
|
+
-------
|
|
170
|
+
MetadataDict
|
|
171
|
+
Metadata dictionary for gRPC requests
|
|
172
|
+
"""
|
|
173
|
+
return {}
|
|
174
|
+
|
|
175
|
+
def __init__(
|
|
176
|
+
self,
|
|
177
|
+
host: Optional[str] = None,
|
|
178
|
+
port: Optional[int] = None,
|
|
179
|
+
secure: bool = False,
|
|
180
|
+
channel_options: Optional[dict[str, Any]] = None,
|
|
181
|
+
tracer: Optional[Union[Tracer, Logger]] = None,
|
|
182
|
+
retry_policy: Optional[RetryPolicy] = None,
|
|
183
|
+
streaming_retry_policy: Optional[RetryPolicy] = None,
|
|
184
|
+
component_name: str = "manta-sdk",
|
|
185
|
+
environment: Optional[str] = None,
|
|
186
|
+
cert_folder: Optional[Union[str, Path]] = None,
|
|
187
|
+
):
|
|
188
|
+
"""
|
|
189
|
+
Initialize the gRPC client.
|
|
190
|
+
|
|
191
|
+
Parameters
|
|
192
|
+
----------
|
|
193
|
+
host : Optional[str]
|
|
194
|
+
The host to connect to
|
|
195
|
+
port : Optional[int]
|
|
196
|
+
The port to connect to
|
|
197
|
+
secure : bool
|
|
198
|
+
Whether to use a secure channel
|
|
199
|
+
channel_options : Optional[dict[str, Any]]
|
|
200
|
+
Optional channel configuration
|
|
201
|
+
tracer : Optional[Union[Tracer, Logger]]
|
|
202
|
+
Logger to use for tracing (if None, a default logger will be created)
|
|
203
|
+
retry_policy : Optional[RetryPolicy]
|
|
204
|
+
Retry policy for regular methods
|
|
205
|
+
streaming_retry_policy : Optional[RetryPolicy]
|
|
206
|
+
Retry policy for streaming methods
|
|
207
|
+
component_name : Optional[str]
|
|
208
|
+
Component name for certificate loading (e.g., 'manta-sdk', 'manta-node')
|
|
209
|
+
environment : Optional[str]
|
|
210
|
+
Environment name for certificate loading (e.g., 'dev', 'staging', 'prod')
|
|
211
|
+
cert_folder : Optional[Union[str, Path]]
|
|
212
|
+
Custom certificate folder path
|
|
213
|
+
"""
|
|
214
|
+
self.host = host or self.DEFAULT_HOST
|
|
215
|
+
self.port = port or self.DEFAULT_PORT
|
|
216
|
+
self.secure = secure
|
|
217
|
+
|
|
218
|
+
# Certificate configuration
|
|
219
|
+
self.component_name = component_name
|
|
220
|
+
self.environment = environment
|
|
221
|
+
self.cert_folder = cert_folder
|
|
222
|
+
|
|
223
|
+
final_channel_options = self.DEFAULT_CHANNEL_OPTIONS.copy()
|
|
224
|
+
if channel_options:
|
|
225
|
+
final_channel_options.update(channel_options)
|
|
226
|
+
self.config = Configuration(**final_channel_options)
|
|
227
|
+
|
|
228
|
+
self.connection_key = ConnectionKey(
|
|
229
|
+
host=self.host, port=self.port, secure=self.secure
|
|
230
|
+
)
|
|
231
|
+
|
|
232
|
+
self._channel = None
|
|
233
|
+
self._is_connected = False
|
|
234
|
+
|
|
235
|
+
# Initialize tracer from provided logger or create a new one
|
|
236
|
+
if tracer is None:
|
|
237
|
+
self.tracer = Tracer(
|
|
238
|
+
getLogger(f"{self.__class__.__module__}.{self.__class__.__name__}")
|
|
239
|
+
)
|
|
240
|
+
else:
|
|
241
|
+
self.tracer = tracer
|
|
242
|
+
|
|
243
|
+
self.tracer.debug(f"Initialized client for {self.connection_key}")
|
|
244
|
+
|
|
245
|
+
# Set up retry policies
|
|
246
|
+
self.retry_policy = retry_policy or self.DEFAULT_RETRY_POLICY
|
|
247
|
+
self.streaming_retry_policy = (
|
|
248
|
+
streaming_retry_policy or self.DEFAULT_STREAMING_RETRY_POLICY
|
|
249
|
+
)
|
|
250
|
+
|
|
251
|
+
# Initialize connection-related attributes
|
|
252
|
+
self._active_calls: list[asyncio.Task] = []
|
|
253
|
+
|
|
254
|
+
async def connect(self) -> Channel:
|
|
255
|
+
"""
|
|
256
|
+
Connect to the gRPC service and create a stub.
|
|
257
|
+
|
|
258
|
+
This method should be called before making any service calls.
|
|
259
|
+
If already connected, returns the existing channel.
|
|
260
|
+
|
|
261
|
+
Returns
|
|
262
|
+
-------
|
|
263
|
+
Channel
|
|
264
|
+
The gRPC channel for making RPC calls
|
|
265
|
+
|
|
266
|
+
Examples
|
|
267
|
+
--------
|
|
268
|
+
>>> client = MyServiceClient("localhost", 50051)
|
|
269
|
+
>>> channel = await client.connect()
|
|
270
|
+
>>> await client.disconnect()
|
|
271
|
+
"""
|
|
272
|
+
if self.is_connected and self._channel is not None:
|
|
273
|
+
self.tracer.debug(f"Already connected to {self.connection_key}")
|
|
274
|
+
return self._channel
|
|
275
|
+
|
|
276
|
+
self.tracer.debug(f"Connecting to {self.connection_key}")
|
|
277
|
+
self._channel = Channel(
|
|
278
|
+
self.host, self.port, config=self.config, ssl=self.ssl_context
|
|
279
|
+
)
|
|
280
|
+
self._is_connected = True
|
|
281
|
+
return self._channel
|
|
282
|
+
|
|
283
|
+
async def disconnect(self) -> None:
|
|
284
|
+
"""
|
|
285
|
+
Disconnect from the gRPC service and clean up resources.
|
|
286
|
+
|
|
287
|
+
This method should be called after all service calls are completed.
|
|
288
|
+
"""
|
|
289
|
+
if not self.is_connected and self._channel is None:
|
|
290
|
+
self.tracer.debug(
|
|
291
|
+
f"Already disconnected or channel is None for {self.connection_key}."
|
|
292
|
+
)
|
|
293
|
+
return
|
|
294
|
+
|
|
295
|
+
self.tracer.debug(f"Disconnecting from {self.connection_key}...")
|
|
296
|
+
|
|
297
|
+
# Asynchronously cancel active calls and allow them to process
|
|
298
|
+
# Create a list of tasks that are not done yet to attempt cancellation
|
|
299
|
+
tasks_to_cancel = [task for task in self._active_calls if not task.done()]
|
|
300
|
+
|
|
301
|
+
if tasks_to_cancel:
|
|
302
|
+
self.tracer.debug(
|
|
303
|
+
f"Attempting to cancel {len(tasks_to_cancel)} active call(s) for {self.connection_key}."
|
|
304
|
+
)
|
|
305
|
+
for task in tasks_to_cancel:
|
|
306
|
+
task.cancel() # Schedule cancellation
|
|
307
|
+
|
|
308
|
+
# Wait for the tasks to acknowledge cancellation or complete WITH TIMEOUT
|
|
309
|
+
# This allows tasks to run their cleanup code (e.g., finally blocks)
|
|
310
|
+
task_cleanup_timeout = 2.0 # 2 seconds to complete cancellation
|
|
311
|
+
try:
|
|
312
|
+
results = await asyncio.wait_for(
|
|
313
|
+
asyncio.gather(*tasks_to_cancel, return_exceptions=True),
|
|
314
|
+
timeout=task_cleanup_timeout,
|
|
315
|
+
)
|
|
316
|
+
for i, result in enumerate(results):
|
|
317
|
+
task_ref = tasks_to_cancel[i]
|
|
318
|
+
if isinstance(result, asyncio.CancelledError):
|
|
319
|
+
self.tracer.debug(
|
|
320
|
+
f"Task {task_ref} was successfully cancelled."
|
|
321
|
+
)
|
|
322
|
+
elif isinstance(result, Exception):
|
|
323
|
+
self.tracer.error(
|
|
324
|
+
f"Task {task_ref} raised an exception during/after cancellation: {result}",
|
|
325
|
+
exc_info=result,
|
|
326
|
+
) # Log with exc_info if it's an actual exception
|
|
327
|
+
else:
|
|
328
|
+
self.tracer.debug(
|
|
329
|
+
f"Task {task_ref} completed (result: {result}) before cancellation fully processed or was not cancelled."
|
|
330
|
+
)
|
|
331
|
+
except asyncio.TimeoutError:
|
|
332
|
+
self.tracer.warning(
|
|
333
|
+
f"Task cancellation timed out after {task_cleanup_timeout}s for {self.connection_key}. "
|
|
334
|
+
f"Forcefully clearing {len(tasks_to_cancel)} task(s)."
|
|
335
|
+
)
|
|
336
|
+
|
|
337
|
+
# Clear the list of active calls after attempting to process them
|
|
338
|
+
self._active_calls.clear()
|
|
339
|
+
|
|
340
|
+
# CRITICAL FIX: channel.close() is SYNCHRONOUS and can block the event loop
|
|
341
|
+
# in selectors.select() when there are pending operations.
|
|
342
|
+
# Run it in a thread executor with aggressive timeout to prevent hanging.
|
|
343
|
+
if self._channel:
|
|
344
|
+
self.tracer.debug(f"Closing channel for {self.connection_key}...")
|
|
345
|
+
loop = asyncio.get_running_loop()
|
|
346
|
+
try:
|
|
347
|
+
# Run synchronous channel.close() in executor thread with 500ms timeout
|
|
348
|
+
await asyncio.wait_for(
|
|
349
|
+
loop.run_in_executor(None, self._channel.close),
|
|
350
|
+
timeout=0.5, # Fail fast - don't wait more than 500ms
|
|
351
|
+
)
|
|
352
|
+
self.tracer.debug(
|
|
353
|
+
f"Channel closed successfully for {self.connection_key}"
|
|
354
|
+
)
|
|
355
|
+
except asyncio.TimeoutError:
|
|
356
|
+
# Channel close timed out - likely blocked in selectors.select()
|
|
357
|
+
# Don't wait - let OS cleanup the socket when process exits
|
|
358
|
+
self.tracer.warning(
|
|
359
|
+
f"Channel close timed out after 500ms for {self.connection_key}. "
|
|
360
|
+
"Proceeding without waiting - OS will cleanup socket on process exit."
|
|
361
|
+
)
|
|
362
|
+
except Exception as e:
|
|
363
|
+
self.tracer.warning(
|
|
364
|
+
f"Warning closing channel for {self.connection_key}: {e}. "
|
|
365
|
+
"Proceeding - OS will cleanup socket on process exit."
|
|
366
|
+
)
|
|
367
|
+
finally:
|
|
368
|
+
# Always reset state, even if close failed
|
|
369
|
+
self._channel = None
|
|
370
|
+
self._is_connected = False
|
|
371
|
+
|
|
372
|
+
self.tracer.debug(f"Disconnect completed for {self.connection_key}")
|
|
373
|
+
|
|
374
|
+
def _finalize_cleanup_resources(self) -> None:
|
|
375
|
+
"""
|
|
376
|
+
Synchronously attempts to cancel active calls and close the channel.
|
|
377
|
+
This method is designed to be safe to call multiple times and from __del__.
|
|
378
|
+
"""
|
|
379
|
+
# Request cancellation of active calls
|
|
380
|
+
if self._active_calls:
|
|
381
|
+
self.tracer.debug(
|
|
382
|
+
f"Requesting cancellation for {len(self._active_calls)} active call(s) during cleanup for {self.connection_key}."
|
|
383
|
+
)
|
|
384
|
+
for task in list(self._active_calls): # Iterate over a copy
|
|
385
|
+
if not task.done():
|
|
386
|
+
try:
|
|
387
|
+
task.cancel()
|
|
388
|
+
# Note: Actual cancellation processing depends on the event loop.
|
|
389
|
+
except Exception as e:
|
|
390
|
+
self.tracer.error(
|
|
391
|
+
f"Error requesting task cancellation for {task} during cleanup: {e}",
|
|
392
|
+
exc_info=True,
|
|
393
|
+
)
|
|
394
|
+
self._active_calls.clear()
|
|
395
|
+
|
|
396
|
+
# Close the channel
|
|
397
|
+
if self._channel:
|
|
398
|
+
self.tracer.debug(
|
|
399
|
+
f"Closing channel for {self.connection_key} during cleanup."
|
|
400
|
+
)
|
|
401
|
+
try:
|
|
402
|
+
self._channel.close() # This is synchronous
|
|
403
|
+
except Exception as e:
|
|
404
|
+
self.tracer.error(
|
|
405
|
+
f"Error closing channel for {self.connection_key} during cleanup: {e}",
|
|
406
|
+
exc_info=True,
|
|
407
|
+
)
|
|
408
|
+
finally:
|
|
409
|
+
self._is_connected = False
|
|
410
|
+
|
|
411
|
+
# Reset state
|
|
412
|
+
self._channel = None
|
|
413
|
+
self._is_connected = False
|
|
414
|
+
self.tracer.debug(f"Resource cleanup finalized for {self.connection_key}.")
|
|
415
|
+
|
|
416
|
+
def __del__(self):
|
|
417
|
+
"""
|
|
418
|
+
Destructor to attempt resource cleanup as a last resort.
|
|
419
|
+
It's best practice to explicitly call `disconnect()` or use the async context manager.
|
|
420
|
+
"""
|
|
421
|
+
# Check if Python is shutting down - if so, skip cleanup to avoid ImportError
|
|
422
|
+
try:
|
|
423
|
+
if sys.meta_path is None:
|
|
424
|
+
return
|
|
425
|
+
except ImportError:
|
|
426
|
+
# Python is shutting down, skip cleanup
|
|
427
|
+
return
|
|
428
|
+
|
|
429
|
+
# Check if object is properly initialized before accessing attributes
|
|
430
|
+
# During testing or shutdown, attributes may not exist
|
|
431
|
+
try:
|
|
432
|
+
is_connected = self.is_connected
|
|
433
|
+
active_calls = self._active_calls
|
|
434
|
+
except AttributeError:
|
|
435
|
+
# Object not fully initialized, skip cleanup
|
|
436
|
+
return
|
|
437
|
+
|
|
438
|
+
if is_connected or active_calls:
|
|
439
|
+
try:
|
|
440
|
+
self.tracer.warning(
|
|
441
|
+
f"Client for {self.connection_key} was not explicitly disconnected. "
|
|
442
|
+
f"Attempting synchronous cleanup in __del__. Active calls: {len(self._active_calls)}"
|
|
443
|
+
)
|
|
444
|
+
except (ImportError, AttributeError, ValueError):
|
|
445
|
+
# During shutdown, modules might be unavailable or logging streams closed
|
|
446
|
+
# ValueError: I/O operation on closed file (during pytest teardown)
|
|
447
|
+
pass
|
|
448
|
+
|
|
449
|
+
# Check if an event loop is available and running
|
|
450
|
+
# If not, skip cleanup to avoid blocking on channel.close()
|
|
451
|
+
try:
|
|
452
|
+
asyncio.get_running_loop()
|
|
453
|
+
# Event loop exists, safe to cleanup
|
|
454
|
+
self._finalize_cleanup_resources()
|
|
455
|
+
except RuntimeError: # No running event loop
|
|
456
|
+
try:
|
|
457
|
+
self.tracer.warning(
|
|
458
|
+
f"No running event loop detected in __del__ for {self.connection_key}. "
|
|
459
|
+
"Skipping channel cleanup to avoid blocking. Python shutdown will handle resource cleanup."
|
|
460
|
+
)
|
|
461
|
+
except (ImportError, AttributeError, ValueError):
|
|
462
|
+
# During shutdown, logging streams might be closed
|
|
463
|
+
pass
|
|
464
|
+
# DO NOT call _finalize_cleanup_resources() without an event loop
|
|
465
|
+
# channel.close() will block indefinitely in selectors.select()
|
|
466
|
+
# Let Python's shutdown mechanism handle the cleanup
|
|
467
|
+
|
|
468
|
+
@property
|
|
469
|
+
def is_connected(self) -> bool:
|
|
470
|
+
"""
|
|
471
|
+
Check if the client is connected.
|
|
472
|
+
|
|
473
|
+
Returns
|
|
474
|
+
-------
|
|
475
|
+
bool
|
|
476
|
+
True if connected, False otherwise
|
|
477
|
+
"""
|
|
478
|
+
return self._is_connected
|
|
479
|
+
|
|
480
|
+
async def __aenter__(self) -> "GrpcClientBase":
|
|
481
|
+
"""
|
|
482
|
+
Enter the context manager.
|
|
483
|
+
|
|
484
|
+
Returns
|
|
485
|
+
-------
|
|
486
|
+
GrpcClientBase
|
|
487
|
+
The client instance
|
|
488
|
+
"""
|
|
489
|
+
await self.connect()
|
|
490
|
+
return self
|
|
491
|
+
|
|
492
|
+
async def __aexit__(self, exc_type, exc_val, exc_tb) -> None:
|
|
493
|
+
"""
|
|
494
|
+
Exit the context manager and clean up resources.
|
|
495
|
+
|
|
496
|
+
This method ensures the connection is properly closed even if an
|
|
497
|
+
exception is raised within the context.
|
|
498
|
+
"""
|
|
499
|
+
await self.disconnect()
|
|
500
|
+
|
|
501
|
+
@contextlib.asynccontextmanager
|
|
502
|
+
async def with_stub(self):
|
|
503
|
+
"""
|
|
504
|
+
Context manager for executing operations with a managed stub.
|
|
505
|
+
|
|
506
|
+
Yields
|
|
507
|
+
------
|
|
508
|
+
StubT
|
|
509
|
+
The service stub for making RPC calls
|
|
510
|
+
|
|
511
|
+
Examples
|
|
512
|
+
--------
|
|
513
|
+
>>> client = MyServiceClient("localhost", 50051)
|
|
514
|
+
>>> async with client.with_stub("get_data") as stub:
|
|
515
|
+
... response = await stub.get_data(request)
|
|
516
|
+
"""
|
|
517
|
+
stub_class = self._get_stub_class()
|
|
518
|
+
channel = await self.connect()
|
|
519
|
+
stub = stub_class(channel)
|
|
520
|
+
yield stub
|
|
521
|
+
|
|
522
|
+
async def _ensure_connected(self) -> None:
|
|
523
|
+
"""
|
|
524
|
+
Ensure that the client is connected to the server.
|
|
525
|
+
|
|
526
|
+
This internal method is used by service methods to ensure that a connection
|
|
527
|
+
exists before making service calls.
|
|
528
|
+
|
|
529
|
+
Returns
|
|
530
|
+
-------
|
|
531
|
+
None
|
|
532
|
+
|
|
533
|
+
Raises
|
|
534
|
+
------
|
|
535
|
+
ConnectionError
|
|
536
|
+
If a connection cannot be established
|
|
537
|
+
"""
|
|
538
|
+
if not self.is_connected:
|
|
539
|
+
await self.connect()
|
|
540
|
+
|
|
541
|
+
async def _reset_channel(self) -> None:
|
|
542
|
+
"""Close the dead channel and clear state so the next call reconnects."""
|
|
543
|
+
if self._channel:
|
|
544
|
+
loop = asyncio.get_running_loop()
|
|
545
|
+
try:
|
|
546
|
+
await asyncio.wait_for(
|
|
547
|
+
loop.run_in_executor(None, self._channel.close),
|
|
548
|
+
timeout=0.5,
|
|
549
|
+
)
|
|
550
|
+
except (asyncio.TimeoutError, Exception) as e:
|
|
551
|
+
self.tracer.debug(f"Channel close during GOAWAY reset: {e}")
|
|
552
|
+
finally:
|
|
553
|
+
self._channel = None
|
|
554
|
+
self._is_connected = False
|
|
555
|
+
else:
|
|
556
|
+
self._is_connected = False
|
|
557
|
+
|
|
558
|
+
async def health_check(self) -> bool:
|
|
559
|
+
"""
|
|
560
|
+
Check if the service is healthy and responding.
|
|
561
|
+
|
|
562
|
+
This is a basic implementation that simply checks if a connection can be made.
|
|
563
|
+
Subclasses may override to implement service-specific health checks.
|
|
564
|
+
|
|
565
|
+
Returns
|
|
566
|
+
-------
|
|
567
|
+
bool
|
|
568
|
+
True if the service is healthy, False otherwise
|
|
569
|
+
"""
|
|
570
|
+
try:
|
|
571
|
+
await self._ensure_connected()
|
|
572
|
+
return True
|
|
573
|
+
except Exception as e:
|
|
574
|
+
self.tracer.debug(f"Health check failed: {e}")
|
|
575
|
+
return False
|
|
576
|
+
|
|
577
|
+
def create_metadata(
|
|
578
|
+
self, extra_metadata: Optional[MetadataDict] = None
|
|
579
|
+
) -> MetadataDict:
|
|
580
|
+
"""
|
|
581
|
+
Create metadata for a gRPC call by merging the default metadata with any extra metadata.
|
|
582
|
+
|
|
583
|
+
Parameters
|
|
584
|
+
----------
|
|
585
|
+
extra_metadata : Optional[MetadataDict]
|
|
586
|
+
Extra metadata to add to the call
|
|
587
|
+
|
|
588
|
+
Returns
|
|
589
|
+
-------
|
|
590
|
+
MetadataDict
|
|
591
|
+
The merged metadata
|
|
592
|
+
"""
|
|
593
|
+
# Start with metadata from the property
|
|
594
|
+
md = self.metadata.copy() if self.metadata else {}
|
|
595
|
+
|
|
596
|
+
# Add extra metadata
|
|
597
|
+
if extra_metadata:
|
|
598
|
+
md.update(extra_metadata)
|
|
599
|
+
|
|
600
|
+
return md
|
|
601
|
+
|
|
602
|
+
@contextlib.asynccontextmanager
|
|
603
|
+
async def _track_service_call(self):
|
|
604
|
+
"""
|
|
605
|
+
Context manager to track active service calls.
|
|
606
|
+
|
|
607
|
+
This allows for graceful cancellation of ongoing calls during disconnect.
|
|
608
|
+
|
|
609
|
+
Yields
|
|
610
|
+
------
|
|
611
|
+
None
|
|
612
|
+
"""
|
|
613
|
+
# Get the current task
|
|
614
|
+
current_task = asyncio.current_task()
|
|
615
|
+
if current_task is not None:
|
|
616
|
+
self._active_calls.append(current_task)
|
|
617
|
+
|
|
618
|
+
try:
|
|
619
|
+
yield
|
|
620
|
+
finally:
|
|
621
|
+
# Remove the task from active calls if it exists
|
|
622
|
+
if current_task is not None and current_task in self._active_calls:
|
|
623
|
+
self._active_calls.remove(current_task)
|
|
624
|
+
|
|
625
|
+
@classmethod
|
|
626
|
+
async def create(
|
|
627
|
+
cls: Type[T],
|
|
628
|
+
host: Optional[str] = None,
|
|
629
|
+
port: Optional[int] = None,
|
|
630
|
+
**kwargs,
|
|
631
|
+
) -> T:
|
|
632
|
+
"""
|
|
633
|
+
Factory method to create and connect a client.
|
|
634
|
+
|
|
635
|
+
Parameters
|
|
636
|
+
----------
|
|
637
|
+
host : Optional[str]
|
|
638
|
+
The hostname of the gRPC server
|
|
639
|
+
port : Optional[int]
|
|
640
|
+
The port number of the gRPC server
|
|
641
|
+
**kwargs
|
|
642
|
+
Additional arguments to pass to the constructor
|
|
643
|
+
|
|
644
|
+
Returns
|
|
645
|
+
-------
|
|
646
|
+
GrpcClientBase
|
|
647
|
+
A connected client instance
|
|
648
|
+
"""
|
|
649
|
+
client = cls(host=host, port=port, **kwargs)
|
|
650
|
+
await client.connect()
|
|
651
|
+
return client
|
|
652
|
+
|
|
653
|
+
async def call_service_method(
|
|
654
|
+
self,
|
|
655
|
+
method_name: str,
|
|
656
|
+
request: Any,
|
|
657
|
+
metadata: Optional[MetadataDict] = None,
|
|
658
|
+
retry_policy: Optional[RetryPolicy] = None,
|
|
659
|
+
error_class: Optional[Type[MantaError]] = None,
|
|
660
|
+
include_operation: bool = True,
|
|
661
|
+
include_request: bool = True,
|
|
662
|
+
include_metadata: bool = True,
|
|
663
|
+
log_call_stack: bool = False,
|
|
664
|
+
) -> Any:
|
|
665
|
+
"""
|
|
666
|
+
Call a service method with retries.
|
|
667
|
+
|
|
668
|
+
This method handles connecting to the service, execution with retries,
|
|
669
|
+
and proper error handling.
|
|
670
|
+
|
|
671
|
+
Parameters
|
|
672
|
+
----------
|
|
673
|
+
method_name : str
|
|
674
|
+
The name of the method to call
|
|
675
|
+
request : Any
|
|
676
|
+
The request object to pass to the method
|
|
677
|
+
metadata : Optional[MetadataDict]
|
|
678
|
+
Additional metadata to include with the request
|
|
679
|
+
retry_policy : Optional[RetryPolicy]
|
|
680
|
+
Retry policy to use (falls back to client's policy if None)
|
|
681
|
+
error_class : Optional[Type[Exception]]
|
|
682
|
+
The exception class to wrap the error in
|
|
683
|
+
include_operation : bool
|
|
684
|
+
Whether to include the operation name in the log
|
|
685
|
+
include_request : bool
|
|
686
|
+
Whether to include the request in the log
|
|
687
|
+
log_call_stack : bool
|
|
688
|
+
Whether to log the call stack
|
|
689
|
+
|
|
690
|
+
Returns
|
|
691
|
+
-------
|
|
692
|
+
Message
|
|
693
|
+
The response from the service method
|
|
694
|
+
|
|
695
|
+
Raises
|
|
696
|
+
------
|
|
697
|
+
MantaError
|
|
698
|
+
If the service call fails after retries
|
|
699
|
+
"""
|
|
700
|
+
# Set up retry policy from parameters or defaults
|
|
701
|
+
policy = retry_policy or self.retry_policy
|
|
702
|
+
|
|
703
|
+
@with_retry(
|
|
704
|
+
tracer=self.tracer,
|
|
705
|
+
policy=policy,
|
|
706
|
+
operation_name=f"{self.__class__.__name__}.{method_name}",
|
|
707
|
+
)
|
|
708
|
+
async def _execute_call():
|
|
709
|
+
try:
|
|
710
|
+
async with self._track_service_call():
|
|
711
|
+
async with self.with_stub() as stub:
|
|
712
|
+
method = getattr(stub, method_name)
|
|
713
|
+
|
|
714
|
+
final_metadata = self.create_metadata(metadata)
|
|
715
|
+
|
|
716
|
+
# Log information about the request
|
|
717
|
+
message = f"Calling {method_name}"
|
|
718
|
+
if include_request:
|
|
719
|
+
message += f" with {request}"
|
|
720
|
+
if include_metadata:
|
|
721
|
+
message += f" with metadata {final_metadata}"
|
|
722
|
+
if include_operation:
|
|
723
|
+
self.tracer.debug(message)
|
|
724
|
+
|
|
725
|
+
result = await method(request, metadata=final_metadata)
|
|
726
|
+
|
|
727
|
+
# Log completion of the call
|
|
728
|
+
if include_operation:
|
|
729
|
+
self.tracer.debug(f"Completed {method_name}")
|
|
730
|
+
|
|
731
|
+
return result
|
|
732
|
+
except StreamTerminatedError:
|
|
733
|
+
self.tracer.warning(
|
|
734
|
+
f"GOAWAY received during {method_name}, resetting channel for reconnect"
|
|
735
|
+
)
|
|
736
|
+
await self._reset_channel()
|
|
737
|
+
raise
|
|
738
|
+
except GRPCError as e:
|
|
739
|
+
# Add enhanced error logging
|
|
740
|
+
self.tracer.error(
|
|
741
|
+
f"GRPC Error in {method_name}: status={e.status}, message={e.message}"
|
|
742
|
+
)
|
|
743
|
+
if hasattr(e, "details") and e.details:
|
|
744
|
+
self.tracer.error(f"Error details: {e.details}")
|
|
745
|
+
|
|
746
|
+
# Include stack trace
|
|
747
|
+
if log_call_stack:
|
|
748
|
+
self.tracer.error(f"Call stack: {traceback.format_exc()}")
|
|
749
|
+
|
|
750
|
+
raise wrap_exception(
|
|
751
|
+
e,
|
|
752
|
+
message=f"Error in {method_name}",
|
|
753
|
+
error_class=error_class,
|
|
754
|
+
)
|
|
755
|
+
except Exception as e:
|
|
756
|
+
self.tracer.exception(f"Unexpected error in {method_name}: {str(e)}")
|
|
757
|
+
if log_call_stack:
|
|
758
|
+
self.tracer.error(f"Call stack: {traceback.format_exc()}")
|
|
759
|
+
raise wrap_exception(
|
|
760
|
+
e,
|
|
761
|
+
message=f"Error in {method_name}",
|
|
762
|
+
error_class=error_class,
|
|
763
|
+
)
|
|
764
|
+
|
|
765
|
+
return await _execute_call()
|
|
766
|
+
|
|
767
|
+
async def stream_service_method(
|
|
768
|
+
self,
|
|
769
|
+
method_name: str,
|
|
770
|
+
request: Any,
|
|
771
|
+
metadata: Optional[MetadataDict] = None,
|
|
772
|
+
retry_policy: Optional[RetryPolicy] = None,
|
|
773
|
+
error_class: Optional[Type[MantaError]] = None,
|
|
774
|
+
log_call_stack: bool = False,
|
|
775
|
+
include_operation: bool = True,
|
|
776
|
+
include_request: bool = True,
|
|
777
|
+
include_metadata: bool = True,
|
|
778
|
+
) -> AsyncIterator[Any]:
|
|
779
|
+
"""
|
|
780
|
+
Execute a streaming service method with retry and error handling.
|
|
781
|
+
|
|
782
|
+
This helper method simplifies making streaming service calls with proper
|
|
783
|
+
connection management, error handling, and retry logic.
|
|
784
|
+
|
|
785
|
+
Parameters
|
|
786
|
+
----------
|
|
787
|
+
method_name : str
|
|
788
|
+
The name of the service method to call
|
|
789
|
+
request : Any
|
|
790
|
+
The request object to pass to the method
|
|
791
|
+
metadata : Optional[MetadataDict]
|
|
792
|
+
Additional metadata to include in the request
|
|
793
|
+
retry_policy : Optional[RetryPolicy]
|
|
794
|
+
Custom retry policy for this call (uses instance streaming policy if None)
|
|
795
|
+
error_class : Optional[Type[MantaError]]
|
|
796
|
+
The exception class to wrap the error in
|
|
797
|
+
log_call_stack : bool
|
|
798
|
+
Whether to log the call stack
|
|
799
|
+
include_operation : bool
|
|
800
|
+
Whether to include the operation name in the log
|
|
801
|
+
include_request : bool
|
|
802
|
+
Whether to include the request in the log
|
|
803
|
+
include_metadata : bool
|
|
804
|
+
Whether to include the metadata in the log
|
|
805
|
+
|
|
806
|
+
Yields
|
|
807
|
+
------
|
|
808
|
+
Any
|
|
809
|
+
Each response item from the streaming service method
|
|
810
|
+
|
|
811
|
+
Raises
|
|
812
|
+
------
|
|
813
|
+
MantaError
|
|
814
|
+
If an error occurs during the service call
|
|
815
|
+
"""
|
|
816
|
+
# Use the specified retry policy or the client's default for streaming
|
|
817
|
+
policy = retry_policy or self.streaming_retry_policy
|
|
818
|
+
|
|
819
|
+
# Execute the call with streaming retry
|
|
820
|
+
@with_streaming_retry(
|
|
821
|
+
tracer=self.tracer,
|
|
822
|
+
policy=policy,
|
|
823
|
+
operation_name=f"{self.__class__.__name__}.{method_name}",
|
|
824
|
+
)
|
|
825
|
+
async def _execute_streaming_call() -> AsyncIterator[Any]:
|
|
826
|
+
async with self._track_service_call():
|
|
827
|
+
try:
|
|
828
|
+
async with self.with_stub() as stub:
|
|
829
|
+
method = getattr(stub, method_name)
|
|
830
|
+
|
|
831
|
+
final_metadata = self.create_metadata(metadata)
|
|
832
|
+
|
|
833
|
+
# Log information about the request
|
|
834
|
+
message = f"Streaming {method_name}"
|
|
835
|
+
if include_request:
|
|
836
|
+
message += f" with {request}"
|
|
837
|
+
if include_metadata:
|
|
838
|
+
message += f" with metadata {final_metadata}"
|
|
839
|
+
if include_operation:
|
|
840
|
+
self.tracer.debug(message)
|
|
841
|
+
|
|
842
|
+
response_count = 0
|
|
843
|
+
|
|
844
|
+
async for response in method(request, metadata=final_metadata):
|
|
845
|
+
self.tracer.debug(f"Received response from {method_name}")
|
|
846
|
+
response_count += 1
|
|
847
|
+
yield response
|
|
848
|
+
|
|
849
|
+
# Log completion of the call
|
|
850
|
+
if include_operation:
|
|
851
|
+
self.tracer.debug(
|
|
852
|
+
f"Completed streaming {method_name} with {response_count} items"
|
|
853
|
+
)
|
|
854
|
+
|
|
855
|
+
except StreamTerminatedError:
|
|
856
|
+
self.tracer.warning(
|
|
857
|
+
f"GOAWAY received during streaming {method_name}, resetting channel for reconnect"
|
|
858
|
+
)
|
|
859
|
+
await self._reset_channel()
|
|
860
|
+
raise
|
|
861
|
+
except GRPCError as e:
|
|
862
|
+
# Add enhanced error logging
|
|
863
|
+
self.tracer.error(
|
|
864
|
+
f"GRPC Error in streaming {method_name}: status={e.status}, message={e.message}"
|
|
865
|
+
)
|
|
866
|
+
if hasattr(e, "details") and e.details:
|
|
867
|
+
self.tracer.error(f"Error details: {e.details}")
|
|
868
|
+
|
|
869
|
+
# Include stack trace
|
|
870
|
+
if log_call_stack:
|
|
871
|
+
self.tracer.error(f"Call stack: {traceback.format_exc()}")
|
|
872
|
+
|
|
873
|
+
raise wrap_exception(
|
|
874
|
+
e,
|
|
875
|
+
message=f"Error in streaming {method_name}",
|
|
876
|
+
error_class=error_class,
|
|
877
|
+
)
|
|
878
|
+
except Exception as e:
|
|
879
|
+
self.tracer.exception(
|
|
880
|
+
f"Unexpected error in {method_name}: {str(e)}"
|
|
881
|
+
)
|
|
882
|
+
if log_call_stack:
|
|
883
|
+
self.tracer.error(f"Call stack: {traceback.format_exc()}")
|
|
884
|
+
raise wrap_exception(
|
|
885
|
+
e,
|
|
886
|
+
message=f"Error in streaming {method_name}",
|
|
887
|
+
error_class=error_class,
|
|
888
|
+
)
|
|
889
|
+
|
|
890
|
+
# Get all responses and yield them one by one
|
|
891
|
+
async for response in _execute_streaming_call():
|
|
892
|
+
yield response
|