kbm-ledsas-sdk 0.3.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kbm_ledsas_sdk/__init__.py +77 -0
- kbm_ledsas_sdk/amqp/__init__.py +0 -0
- kbm_ledsas_sdk/amqp/consumer.py +457 -0
- kbm_ledsas_sdk/amqp/publisher.py +316 -0
- kbm_ledsas_sdk/amqp/topology.py +262 -0
- kbm_ledsas_sdk/app.py +772 -0
- kbm_ledsas_sdk/blob/__init__.py +10 -0
- kbm_ledsas_sdk/blob/azure_client.py +661 -0
- kbm_ledsas_sdk/blob/direct_operations.py +417 -0
- kbm_ledsas_sdk/blob/operations.py +277 -0
- kbm_ledsas_sdk/health/__init__.py +39 -0
- kbm_ledsas_sdk/health/checks.py +68 -0
- kbm_ledsas_sdk/health/server.py +268 -0
- kbm_ledsas_sdk/models/__init__.py +34 -0
- kbm_ledsas_sdk/models/blob.py +177 -0
- kbm_ledsas_sdk/models/envelope.py +195 -0
- kbm_ledsas_sdk/models/errors.py +105 -0
- kbm_ledsas_sdk/models/messages.py +145 -0
- kbm_ledsas_sdk/py.typed +0 -0
- kbm_ledsas_sdk/runtime/__init__.py +19 -0
- kbm_ledsas_sdk/runtime/config.py +345 -0
- kbm_ledsas_sdk/runtime/context.py +189 -0
- kbm_ledsas_sdk/runtime/env_vars.py +249 -0
- kbm_ledsas_sdk/runtime/handler.py +443 -0
- kbm_ledsas_sdk/runtime/security.py +147 -0
- kbm_ledsas_sdk/transport/__init__.py +19 -0
- kbm_ledsas_sdk/transport/base.py +191 -0
- kbm_ledsas_sdk/transport/direct.py +512 -0
- kbm_ledsas_sdk/transport/factory.py +49 -0
- kbm_ledsas_sdk/transport/mock.py +261 -0
- kbm_ledsas_sdk/utils/__init__.py +45 -0
- kbm_ledsas_sdk/utils/logging.py +263 -0
- kbm_ledsas_sdk/utils/tracing.py +214 -0
- kbm_ledsas_sdk-0.3.2.dist-info/METADATA +203 -0
- kbm_ledsas_sdk-0.3.2.dist-info/RECORD +39 -0
- kbm_ledsas_sdk-0.3.2.dist-info/WHEEL +5 -0
- kbm_ledsas_sdk-0.3.2.dist-info/licenses/LICENSE +201 -0
- kbm_ledsas_sdk-0.3.2.dist-info/licenses/NOTICE +9 -0
- kbm_ledsas_sdk-0.3.2.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
"""
|
|
2
|
+
KeborMed LEDSAS SDK - Python client for Local External Data Sources Action Service.
|
|
3
|
+
|
|
4
|
+
The SDK lets you build LEDSAS data-processing services in Python. A
|
|
5
|
+
service registers handler functions, the SDK consumes commands from
|
|
6
|
+
RabbitMQ, runs the handler, and (optionally) publishes the response
|
|
7
|
+
back to the caller.
|
|
8
|
+
|
|
9
|
+
Quick Start:
|
|
10
|
+
import logging
|
|
11
|
+
from kbm_ledsas_sdk import ServiceApp, errors
|
|
12
|
+
|
|
13
|
+
logging.basicConfig(level=logging.INFO)
|
|
14
|
+
|
|
15
|
+
app = ServiceApp(service_name="csv-processor")
|
|
16
|
+
|
|
17
|
+
@app.handler("ProcessCSV")
|
|
18
|
+
async def handle(ctx, req: dict) -> dict:
|
|
19
|
+
csv_uri = req.get("csv_uri")
|
|
20
|
+
if not csv_uri:
|
|
21
|
+
# user_message is what the caller sees; the internal
|
|
22
|
+
# `message` always goes to logs.
|
|
23
|
+
raise errors.Permanent(
|
|
24
|
+
"Validation: csv_uri missing",
|
|
25
|
+
user_message="csv_uri is required",
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
text = await ctx.blob.download_text(csv_uri)
|
|
29
|
+
container = csv_uri.replace("azblob://", "").split("/", 1)[0]
|
|
30
|
+
out = await ctx.blob.upload_json(
|
|
31
|
+
container=container,
|
|
32
|
+
obj={"rows": len(text.splitlines()) - 1},
|
|
33
|
+
# idempotency_key (not message_id) so a DLQ replay of the
|
|
34
|
+
# same logical request overwrites the same blob. overwrite=True
|
|
35
|
+
# is required for that pattern — without it, a replay would
|
|
36
|
+
# fail with BlobAlreadyExists.
|
|
37
|
+
path=f"result-{ctx.idempotency_key}.json",
|
|
38
|
+
overwrite=True,
|
|
39
|
+
)
|
|
40
|
+
return {"result_uri": out.uri}
|
|
41
|
+
|
|
42
|
+
# Optional liveness / readiness hooks. The SDK already exposes
|
|
43
|
+
# sensible defaults; these layer on top.
|
|
44
|
+
@app.readiness_check("warmup_done")
|
|
45
|
+
def _():
|
|
46
|
+
return True
|
|
47
|
+
|
|
48
|
+
if __name__ == "__main__":
|
|
49
|
+
app.run()
|
|
50
|
+
|
|
51
|
+
Run with:
|
|
52
|
+
export KBM_LEDSAS_RABBITMQ_URL=amqp://guest:guest@127.0.0.1:5672/
|
|
53
|
+
export KBM_LEDSAS_BLOB_CONN_STRING="DefaultEndpointsProtocol=http;..."
|
|
54
|
+
python my_service.py
|
|
55
|
+
|
|
56
|
+
The service also exposes liveness/readiness HTTP endpoints at
|
|
57
|
+
``http://127.0.0.1:${KBM_LEDSAS_HEALTH_PORT:-8090}/health`` and ``/ready``
|
|
58
|
+
(loopback only by default; set ``KBM_LEDSAS_HEALTH_HOST=0.0.0.0`` to
|
|
59
|
+
bind on all interfaces). See ``docs/SDK_API_REFERENCE.md`` for the full API.
|
|
60
|
+
"""
|
|
61
|
+
|
|
62
|
+
from . import models
|
|
63
|
+
from .app import ServiceApp
|
|
64
|
+
from .models import errors
|
|
65
|
+
from .runtime.context import ExecutionContext
|
|
66
|
+
from .utils.logging import json_log_formatter
|
|
67
|
+
|
|
68
|
+
__version__ = "0.3.2"
|
|
69
|
+
|
|
70
|
+
__all__ = [
|
|
71
|
+
"ExecutionContext",
|
|
72
|
+
"ServiceApp",
|
|
73
|
+
"__version__",
|
|
74
|
+
"errors",
|
|
75
|
+
"json_log_formatter",
|
|
76
|
+
"models",
|
|
77
|
+
]
|
|
File without changes
|
|
@@ -0,0 +1,457 @@
|
|
|
1
|
+
"""
|
|
2
|
+
AMQP command consumer for Direct transport mode.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import asyncio
|
|
6
|
+
import json
|
|
7
|
+
import logging
|
|
8
|
+
|
|
9
|
+
from aio_pika.abc import AbstractIncomingMessage, AbstractQueue
|
|
10
|
+
from pydantic import ValidationError
|
|
11
|
+
|
|
12
|
+
from kbm_ledsas_sdk.models.messages import Command
|
|
13
|
+
|
|
14
|
+
logger = logging.getLogger(__name__)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class AMQPConsumer:
|
|
18
|
+
"""
|
|
19
|
+
Consumes commands from RabbitMQ queue.
|
|
20
|
+
|
|
21
|
+
This consumer is used in Direct transport mode (dev/testing).
|
|
22
|
+
It provides a simple interface for consuming LEDSAS commands
|
|
23
|
+
from a RabbitMQ queue.
|
|
24
|
+
|
|
25
|
+
Features:
|
|
26
|
+
- Manual message acknowledgment (no auto-ack)
|
|
27
|
+
- Pushes commands to asyncio.Queue for async iteration
|
|
28
|
+
- Tracks pending messages for ACK/NACK correlation
|
|
29
|
+
- JSON parsing with validation
|
|
30
|
+
|
|
31
|
+
Attributes:
|
|
32
|
+
queue: RabbitMQ queue to consume from
|
|
33
|
+
command_queue: Internal asyncio.Queue for commands
|
|
34
|
+
pending_messages: Maps message_id -> AMQP IncomingMessage
|
|
35
|
+
consumer_tag: RabbitMQ consumer tag (set when started)
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
def __init__(
|
|
39
|
+
self,
|
|
40
|
+
queue: AbstractQueue,
|
|
41
|
+
prefetch_count: int = 10,
|
|
42
|
+
max_pending_multiplier: int = 10,
|
|
43
|
+
max_payload_bytes: int = 16 * 1024 * 1024,
|
|
44
|
+
):
|
|
45
|
+
"""
|
|
46
|
+
Initialize AMQP consumer.
|
|
47
|
+
|
|
48
|
+
Args:
|
|
49
|
+
queue: RabbitMQ queue to consume from
|
|
50
|
+
prefetch_count: Number of messages to prefetch
|
|
51
|
+
max_pending_multiplier: Cap on ``pending_messages`` dict
|
|
52
|
+
size, expressed as a multiple of ``prefetch_count``.
|
|
53
|
+
A consumer with prefetch=10 and multiplier=10 caps at
|
|
54
|
+
100 in-flight entries — well above legitimate steady
|
|
55
|
+
state, low enough that a hostile sender flooding
|
|
56
|
+
distinct message_ids can't grow it without bound.
|
|
57
|
+
max_payload_bytes: Reject messages whose body exceeds this
|
|
58
|
+
size (DLQ them, single WARNING line). 0 disables the
|
|
59
|
+
check. Default 16 MiB — large payloads should be
|
|
60
|
+
shipped via blob storage, not AMQP body.
|
|
61
|
+
"""
|
|
62
|
+
self.queue = queue
|
|
63
|
+
self.prefetch_count = prefetch_count
|
|
64
|
+
self.max_pending = max(prefetch_count * max_pending_multiplier, prefetch_count)
|
|
65
|
+
self.max_payload_bytes = max_payload_bytes
|
|
66
|
+
|
|
67
|
+
# Internal queue for commands (consumed by Transport.subscribe())
|
|
68
|
+
self.command_queue: asyncio.Queue[Command] = asyncio.Queue()
|
|
69
|
+
|
|
70
|
+
# Track pending messages: message_id -> IncomingMessage
|
|
71
|
+
self.pending_messages: dict[str, AbstractIncomingMessage] = {}
|
|
72
|
+
|
|
73
|
+
# Retry attempt count per message_id. Incremented on every
|
|
74
|
+
# nack(requeue=True) and cleared on ack() / nack(requeue=False).
|
|
75
|
+
# app.py reads this via DirectTransport.get_retry_count() to
|
|
76
|
+
# enforce KBM_LEDSAS_MAX_RETRIES. In-process only — survives
|
|
77
|
+
# requeue cycles in the same consumer but resets across
|
|
78
|
+
# consumer restarts (a restart is a fresh attempt from the
|
|
79
|
+
# SDK's POV; the broker doesn't track per-message retry counts
|
|
80
|
+
# for simple requeue, only for full DLX cycles).
|
|
81
|
+
self.retry_counts: dict[str, int] = {}
|
|
82
|
+
|
|
83
|
+
# Consumer tag (set when started)
|
|
84
|
+
self.consumer_tag: str | None = None
|
|
85
|
+
|
|
86
|
+
async def start(self) -> None:
|
|
87
|
+
"""
|
|
88
|
+
Start consuming messages from the queue.
|
|
89
|
+
|
|
90
|
+
Sets up consumer with manual acknowledgment and prefetch.
|
|
91
|
+
|
|
92
|
+
Raises:
|
|
93
|
+
Exception: If consumer startup fails
|
|
94
|
+
"""
|
|
95
|
+
logger.info(
|
|
96
|
+
"Starting AMQP consumer",
|
|
97
|
+
extra={
|
|
98
|
+
"queue": self.queue.name,
|
|
99
|
+
"prefetch": self.prefetch_count,
|
|
100
|
+
},
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
try:
|
|
104
|
+
# Set prefetch count (QoS)
|
|
105
|
+
await self.queue.channel.set_qos(prefetch_count=self.prefetch_count)
|
|
106
|
+
|
|
107
|
+
# Start consuming with manual ack
|
|
108
|
+
self.consumer_tag = await self.queue.consume(
|
|
109
|
+
callback=self._on_message,
|
|
110
|
+
no_ack=False, # Manual acknowledgment
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
logger.info("AMQP consumer started", extra={"consumer_tag": self.consumer_tag})
|
|
114
|
+
|
|
115
|
+
except Exception as e:
|
|
116
|
+
logger.error("Failed to start AMQP consumer", exc_info=True, extra={"error": str(e)})
|
|
117
|
+
raise
|
|
118
|
+
|
|
119
|
+
async def stop(self) -> None:
|
|
120
|
+
"""
|
|
121
|
+
Stop consuming messages.
|
|
122
|
+
|
|
123
|
+
Cancels the consumer and clears pending messages.
|
|
124
|
+
"""
|
|
125
|
+
if self.consumer_tag:
|
|
126
|
+
logger.info("Stopping AMQP consumer")
|
|
127
|
+
|
|
128
|
+
try:
|
|
129
|
+
await self.queue.cancel(self.consumer_tag)
|
|
130
|
+
self.consumer_tag = None
|
|
131
|
+
|
|
132
|
+
# Log pending messages (they will be requeued by RabbitMQ)
|
|
133
|
+
if self.pending_messages:
|
|
134
|
+
logger.warning(
|
|
135
|
+
f"Consumer stopped with {len(self.pending_messages)} "
|
|
136
|
+
"pending messages (will be requeued)"
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
# Clear pending messages
|
|
140
|
+
self.pending_messages.clear()
|
|
141
|
+
|
|
142
|
+
logger.info("AMQP consumer stopped")
|
|
143
|
+
|
|
144
|
+
except Exception as e:
|
|
145
|
+
logger.error(
|
|
146
|
+
"Error stopping AMQP consumer",
|
|
147
|
+
exc_info=True,
|
|
148
|
+
extra={"error": str(e)},
|
|
149
|
+
)
|
|
150
|
+
|
|
151
|
+
async def _on_message(self, message: AbstractIncomingMessage) -> None:
|
|
152
|
+
"""
|
|
153
|
+
Handle incoming AMQP message.
|
|
154
|
+
|
|
155
|
+
Parses JSON body to Command and pushes to internal queue.
|
|
156
|
+
|
|
157
|
+
Args:
|
|
158
|
+
message: Incoming AMQP message
|
|
159
|
+
"""
|
|
160
|
+
try:
|
|
161
|
+
# Payload size limit. Large payloads should travel
|
|
162
|
+
# via blob storage, not the AMQP body. Reject oversize
|
|
163
|
+
# messages straight to DLQ with a single WARNING — no
|
|
164
|
+
# decode, no validate, bounded RSS pressure.
|
|
165
|
+
if self.max_payload_bytes and len(message.body) > self.max_payload_bytes:
|
|
166
|
+
logger.warning(
|
|
167
|
+
"Payload exceeds KBM_LEDSAS_MAX_PAYLOAD_BYTES; "
|
|
168
|
+
"dead-lettering (body=%d, max=%d). Use blob storage "
|
|
169
|
+
"for large data, not the AMQP body.",
|
|
170
|
+
len(message.body),
|
|
171
|
+
self.max_payload_bytes,
|
|
172
|
+
)
|
|
173
|
+
await message.reject(requeue=False)
|
|
174
|
+
return
|
|
175
|
+
|
|
176
|
+
# Deserialize message body to Command
|
|
177
|
+
body = message.body.decode("utf-8")
|
|
178
|
+
data = json.loads(body)
|
|
179
|
+
command = Command.model_validate(data)
|
|
180
|
+
|
|
181
|
+
# Enforce envelope.type=="command" on the command
|
|
182
|
+
# exchange. The Envelope schema's Literal accepts four type
|
|
183
|
+
# values (command/response/status/error) because the same
|
|
184
|
+
# model is reused for outbound responses and status updates,
|
|
185
|
+
# but only "command" is legitimate on a *command* queue. A
|
|
186
|
+
# forged response/status/error envelope reaching this queue
|
|
187
|
+
# is either a misconfigured upstream or a hostile reflection
|
|
188
|
+
# attempt — dead-letter it without invoking the handler.
|
|
189
|
+
envelope_type = command.envelope.type
|
|
190
|
+
if envelope_type != "command":
|
|
191
|
+
logger.warning(
|
|
192
|
+
"Envelope type %r is not 'command' on command " "exchange; dead-lettering",
|
|
193
|
+
envelope_type,
|
|
194
|
+
extra={
|
|
195
|
+
"message_id": command.envelope.message_id,
|
|
196
|
+
"envelope_type": envelope_type,
|
|
197
|
+
},
|
|
198
|
+
)
|
|
199
|
+
await message.reject(requeue=False)
|
|
200
|
+
return
|
|
201
|
+
|
|
202
|
+
message_id = command.envelope.message_id
|
|
203
|
+
|
|
204
|
+
# Duplicate-id guard. If a sender resends the same
|
|
205
|
+
# envelope.message_id while the original is still pending,
|
|
206
|
+
# we can't safely overwrite the dict entry (the original
|
|
207
|
+
# would become un-ack-able). Reject the duplicate to DLQ
|
|
208
|
+
# immediately — almost certainly a replay attack or buggy
|
|
209
|
+
# sender; the broker's x-death header records it.
|
|
210
|
+
if message_id in self.pending_messages:
|
|
211
|
+
logger.warning(
|
|
212
|
+
"Duplicate message_id while original still pending; "
|
|
213
|
+
"rejecting duplicate to DLQ",
|
|
214
|
+
extra={"message_id": message_id},
|
|
215
|
+
)
|
|
216
|
+
await message.reject(requeue=False)
|
|
217
|
+
return
|
|
218
|
+
|
|
219
|
+
# Cap pending_messages to bound memory. A hostile sender
|
|
220
|
+
# can otherwise grow it without limit by streaming distinct
|
|
221
|
+
# message_ids that crash before ack/nack. The cap is well
|
|
222
|
+
# above legitimate steady state (prefetch × multiplier).
|
|
223
|
+
if len(self.pending_messages) >= self.max_pending:
|
|
224
|
+
logger.warning(
|
|
225
|
+
"pending_messages dict at cap; rejecting new message to DLQ "
|
|
226
|
+
"(in-flight=%d, cap=%d). Investigate stuck handlers.",
|
|
227
|
+
len(self.pending_messages),
|
|
228
|
+
self.max_pending,
|
|
229
|
+
extra={"message_id": message_id},
|
|
230
|
+
)
|
|
231
|
+
await message.reject(requeue=False)
|
|
232
|
+
return
|
|
233
|
+
|
|
234
|
+
self.pending_messages[message_id] = message
|
|
235
|
+
|
|
236
|
+
logger.info(
|
|
237
|
+
"Received command",
|
|
238
|
+
extra={
|
|
239
|
+
"message_id": message_id,
|
|
240
|
+
"correlation_id": command.envelope.correlation_id,
|
|
241
|
+
"command_name": command.envelope.name,
|
|
242
|
+
},
|
|
243
|
+
)
|
|
244
|
+
|
|
245
|
+
# Push to internal queue for Transport.subscribe()
|
|
246
|
+
await self.command_queue.put(command)
|
|
247
|
+
|
|
248
|
+
except UnicodeDecodeError as e:
|
|
249
|
+
# The body isn't valid UTF-8 (e.g. a stray 0xff byte, or a
|
|
250
|
+
# binary payload published to a text queue). This is the very
|
|
251
|
+
# first step — ``message.body.decode("utf-8")`` — and it is
|
|
252
|
+
# deterministic: the same bytes always fail the same way, so
|
|
253
|
+
# requeueing would hot-loop. Dead-letter immediately.
|
|
254
|
+
#
|
|
255
|
+
# exc_info=False — like the JSONDecodeError branch, the error
|
|
256
|
+
# message (codec + byte offset) is fully self-contained; a
|
|
257
|
+
# traceback adds no diagnostic value and only amplifies
|
|
258
|
+
# log volume when an attacker floods the queue with binary
|
|
259
|
+
# bodies. Without this branch the decode error falls through
|
|
260
|
+
# to the generic catch-all below (exc_info=True), which is the
|
|
261
|
+
# one traceback that breaks the zero-traceback invariant.
|
|
262
|
+
logger.error(
|
|
263
|
+
"Message body is not valid UTF-8; dead-lettering",
|
|
264
|
+
extra={"error": str(e)},
|
|
265
|
+
)
|
|
266
|
+
await message.reject(requeue=False)
|
|
267
|
+
|
|
268
|
+
except json.JSONDecodeError as e:
|
|
269
|
+
# Deterministic parse failure: same input → same error. No
|
|
270
|
+
# point retrying. Dead-letter immediately.
|
|
271
|
+
#
|
|
272
|
+
# exc_info=False — the decoder error message is fully
|
|
273
|
+
# self-contained; a Python traceback adds nothing
|
|
274
|
+
# actionable here and contributes to log-noise on
|
|
275
|
+
# malformed-message floods.
|
|
276
|
+
logger.error("Failed to parse message JSON", extra={"error": str(e)})
|
|
277
|
+
await message.reject(requeue=False)
|
|
278
|
+
|
|
279
|
+
except RecursionError as e:
|
|
280
|
+
# Deeply-nested JSON body. Python's json.loads is
|
|
281
|
+
# recursive for nested structures and hits sys.getrecursionlimit
|
|
282
|
+
# well before the 16 MiB payload cap. Classify this branch
|
|
283
|
+
# explicitly (rather than lumping it into the generic
|
|
284
|
+
# "Unexpected error" catch-all below) so operators can
|
|
285
|
+
# alert on the pattern — it's a well-known DoS attack
|
|
286
|
+
# signature.
|
|
287
|
+
#
|
|
288
|
+
# exc_info=False — the traceback repeats the same json
|
|
289
|
+
# decoder frames thousands of times and adds no diagnostic
|
|
290
|
+
# value beyond "the parser ran out of stack".
|
|
291
|
+
logger.error(
|
|
292
|
+
"Message body exceeds JSON-parsing nesting limit; " "dead-lettering",
|
|
293
|
+
extra={"error_type": "ExcessiveNesting", "error": str(e)},
|
|
294
|
+
)
|
|
295
|
+
await message.reject(requeue=False)
|
|
296
|
+
|
|
297
|
+
except ValidationError as e:
|
|
298
|
+
# Pydantic envelope validation failed (missing fields, wrong
|
|
299
|
+
# types, etc.). This is deterministic: requeueing would just
|
|
300
|
+
# produce the same error forever and the consumer would burn
|
|
301
|
+
# CPU + disk in a hot loop. Dead-letter immediately.
|
|
302
|
+
#
|
|
303
|
+
# Strip pydantic-docs URL noise from the logged error —
|
|
304
|
+
# walk e.errors() like app.py's config-error path so the
|
|
305
|
+
# log line stays short.
|
|
306
|
+
msgs = []
|
|
307
|
+
for err in e.errors():
|
|
308
|
+
loc = ".".join(str(p) for p in err.get("loc", ()))
|
|
309
|
+
msg = err.get("msg", "")
|
|
310
|
+
if msg.startswith("Value error, "):
|
|
311
|
+
msg = msg[len("Value error, ") :]
|
|
312
|
+
msgs.append(f"{loc}: {msg}" if loc else msg)
|
|
313
|
+
error_text = "; ".join(msgs) if msgs else str(e)
|
|
314
|
+
logger.error(
|
|
315
|
+
"Envelope failed schema validation; dead-lettering",
|
|
316
|
+
extra={"error": error_text},
|
|
317
|
+
)
|
|
318
|
+
await message.reject(requeue=False)
|
|
319
|
+
|
|
320
|
+
except Exception as e:
|
|
321
|
+
# Deterministic-friendly catch-all. If something in
|
|
322
|
+
# decoding/validation raises a non-ValidationError
|
|
323
|
+
# exception (bug, OOM during model_validate), requeueing
|
|
324
|
+
# would just hot-loop. Dead-letter immediately — a retry
|
|
325
|
+
# won't help if the very first deserialization step crashed.
|
|
326
|
+
# RecursionError is handled by its own branch above
|
|
327
|
+
# so we always emit exc_info here.
|
|
328
|
+
logger.error(
|
|
329
|
+
"Unexpected error processing message; dead-lettering",
|
|
330
|
+
exc_info=True,
|
|
331
|
+
extra={"error": str(e)},
|
|
332
|
+
)
|
|
333
|
+
await message.reject(requeue=False)
|
|
334
|
+
|
|
335
|
+
async def consume(self) -> Command:
|
|
336
|
+
"""
|
|
337
|
+
Consume next command from the queue.
|
|
338
|
+
|
|
339
|
+
This is an async generator method used by Transport.subscribe().
|
|
340
|
+
|
|
341
|
+
Returns:
|
|
342
|
+
Next command from the queue
|
|
343
|
+
|
|
344
|
+
Raises:
|
|
345
|
+
asyncio.QueueEmpty: If no commands available
|
|
346
|
+
"""
|
|
347
|
+
return await self.command_queue.get()
|
|
348
|
+
|
|
349
|
+
async def ack(self, message_id: str) -> None:
|
|
350
|
+
"""
|
|
351
|
+
Acknowledge successful command processing.
|
|
352
|
+
|
|
353
|
+
Removes message from pending set and sends ACK to RabbitMQ.
|
|
354
|
+
Also clears the per-message retry counter so a future delivery
|
|
355
|
+
of the same id (rare but possible) starts fresh.
|
|
356
|
+
|
|
357
|
+
Args:
|
|
358
|
+
message_id: Message ID to acknowledge
|
|
359
|
+
|
|
360
|
+
Raises:
|
|
361
|
+
ValueError: If message_id not found in pending messages
|
|
362
|
+
"""
|
|
363
|
+
message = self.pending_messages.pop(message_id, None)
|
|
364
|
+
self.retry_counts.pop(message_id, None)
|
|
365
|
+
if not message:
|
|
366
|
+
logger.warning("Cannot ACK unknown message", extra={"message_id": message_id})
|
|
367
|
+
raise ValueError(f"Unknown message_id: {message_id}")
|
|
368
|
+
|
|
369
|
+
logger.debug("Acknowledging message", extra={"message_id": message_id})
|
|
370
|
+
|
|
371
|
+
try:
|
|
372
|
+
await message.ack()
|
|
373
|
+
except Exception as e:
|
|
374
|
+
logger.error(
|
|
375
|
+
"Failed to ACK message",
|
|
376
|
+
exc_info=True,
|
|
377
|
+
extra={"message_id": message_id, "error": str(e)},
|
|
378
|
+
)
|
|
379
|
+
raise
|
|
380
|
+
|
|
381
|
+
async def nack(self, message_id: str, requeue: bool = False) -> None:
|
|
382
|
+
"""
|
|
383
|
+
Reject command processing (negative acknowledgment).
|
|
384
|
+
|
|
385
|
+
NACKed messages go to DLQ (requeue=False) or back to queue
|
|
386
|
+
(requeue=True) per RabbitMQ DLQ configuration.
|
|
387
|
+
|
|
388
|
+
Retry-count bookkeeping:
|
|
389
|
+
- ``requeue=True``: increment retry_counts[message_id] BEFORE
|
|
390
|
+
calling nack — the next delivery will see the higher count.
|
|
391
|
+
- ``requeue=False`` (DLQ): clear retry_counts — the message is
|
|
392
|
+
terminal, no further attempt will read its counter.
|
|
393
|
+
|
|
394
|
+
Args:
|
|
395
|
+
message_id: Message ID to reject
|
|
396
|
+
requeue: Whether to requeue (default: False -> DLQ)
|
|
397
|
+
|
|
398
|
+
Raises:
|
|
399
|
+
ValueError: If message_id not found in pending messages
|
|
400
|
+
"""
|
|
401
|
+
message = self.pending_messages.pop(message_id, None)
|
|
402
|
+
if not message:
|
|
403
|
+
logger.warning("Cannot NACK unknown message", extra={"message_id": message_id})
|
|
404
|
+
raise ValueError(f"Unknown message_id: {message_id}")
|
|
405
|
+
|
|
406
|
+
# Capture the retry_count we want to *log* BEFORE mutating
|
|
407
|
+
# the dict. For requeue=True we log the new (incremented)
|
|
408
|
+
# count; for requeue=False (DLQ) we log the count this message
|
|
409
|
+
# ended at, not 0 from the just-cleared slot — otherwise the
|
|
410
|
+
# final-DLQ line misleadingly reports retry_count: 0 right
|
|
411
|
+
# after the app-level "Max retries (N) exceeded" line.
|
|
412
|
+
prior_count = self.retry_counts.get(message_id, 0)
|
|
413
|
+
if requeue:
|
|
414
|
+
self.retry_counts[message_id] = prior_count + 1
|
|
415
|
+
logged_count = prior_count + 1
|
|
416
|
+
else:
|
|
417
|
+
self.retry_counts.pop(message_id, None)
|
|
418
|
+
logged_count = prior_count
|
|
419
|
+
|
|
420
|
+
logger.info(
|
|
421
|
+
"Rejecting message",
|
|
422
|
+
extra={
|
|
423
|
+
"message_id": message_id,
|
|
424
|
+
"requeue": requeue,
|
|
425
|
+
"retry_count": logged_count,
|
|
426
|
+
},
|
|
427
|
+
)
|
|
428
|
+
|
|
429
|
+
try:
|
|
430
|
+
if requeue:
|
|
431
|
+
# Requeue (retry)
|
|
432
|
+
await message.nack(requeue=True)
|
|
433
|
+
else:
|
|
434
|
+
# Send to DLQ (via x-dead-letter-exchange)
|
|
435
|
+
await message.reject(requeue=False)
|
|
436
|
+
|
|
437
|
+
except Exception as e:
|
|
438
|
+
logger.error(
|
|
439
|
+
"Failed to NACK message",
|
|
440
|
+
exc_info=True,
|
|
441
|
+
extra={"message_id": message_id, "error": str(e)},
|
|
442
|
+
)
|
|
443
|
+
raise
|
|
444
|
+
|
|
445
|
+
def get_retry_count(self, message_id: str) -> int:
|
|
446
|
+
"""How many times this message has been NACK-requeued.
|
|
447
|
+
|
|
448
|
+
Returns 0 for never-retried or unknown ids. app.py consults
|
|
449
|
+
this before deciding to requeue vs DLQ a Retryable failure,
|
|
450
|
+
enforcing KBM_LEDSAS_MAX_RETRIES.
|
|
451
|
+
"""
|
|
452
|
+
return self.retry_counts.get(message_id, 0)
|
|
453
|
+
|
|
454
|
+
@property
|
|
455
|
+
def pending_count(self) -> int:
|
|
456
|
+
"""Number of messages awaiting acknowledgment."""
|
|
457
|
+
return len(self.pending_messages)
|