puda-comms 0.0.3__py3-none-any.whl → 0.0.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- puda_comms/command_service.py +34 -22
- puda_comms/machine_client.py +141 -61
- puda_comms/models.py +5 -4
- {puda_comms-0.0.3.dist-info → puda_comms-0.0.4.dist-info}/METADATA +14 -2
- puda_comms-0.0.4.dist-info/RECORD +8 -0
- puda_comms-0.0.3.dist-info/RECORD +0 -8
- {puda_comms-0.0.3.dist-info → puda_comms-0.0.4.dist-info}/WHEEL +0 -0
puda_comms/command_service.py
CHANGED
|
@@ -12,11 +12,11 @@ import json
|
|
|
12
12
|
import logging
|
|
13
13
|
import signal
|
|
14
14
|
from datetime import datetime, timezone
|
|
15
|
-
from typing import Dict, Any, Optional
|
|
15
|
+
from typing import Dict, Any, Optional
|
|
16
16
|
import nats
|
|
17
17
|
from nats.js.client import JetStreamContext
|
|
18
18
|
from nats.aio.msg import Msg
|
|
19
|
-
from puda_comms.models import CommandRequest,
|
|
19
|
+
from puda_comms.models import CommandRequest, CommandResponseStatus, NATSMessage, MessageHeader, MessageType
|
|
20
20
|
|
|
21
21
|
logger = logging.getLogger(__name__)
|
|
22
22
|
|
|
@@ -37,7 +37,7 @@ class ResponseHandler:
|
|
|
37
37
|
def __init__(self, js: JetStreamContext, machine_id: str):
|
|
38
38
|
self.js = js
|
|
39
39
|
self.machine_id = machine_id
|
|
40
|
-
self._pending_responses: Dict[str,
|
|
40
|
+
self._pending_responses: Dict[str, Dict[str, Any]] = {} # {'event': asyncio.Event, 'response': Optional[NATSMessage]}
|
|
41
41
|
self._queue_consumer = None
|
|
42
42
|
self._immediate_consumer = None
|
|
43
43
|
self._initialized = False
|
|
@@ -102,8 +102,8 @@ class ResponseHandler:
|
|
|
102
102
|
|
|
103
103
|
# Get the pending response
|
|
104
104
|
pending = self._pending_responses[key]
|
|
105
|
-
# Store the
|
|
106
|
-
pending['response'] = message
|
|
105
|
+
# Store the NATSMessage directly
|
|
106
|
+
pending['response'] = message
|
|
107
107
|
# Signal that response was received
|
|
108
108
|
# Don't delete here - let get_response() delete it after retrieval
|
|
109
109
|
pending['event'].set()
|
|
@@ -152,7 +152,7 @@ class ResponseHandler:
|
|
|
152
152
|
}
|
|
153
153
|
return event
|
|
154
154
|
|
|
155
|
-
def get_response(self, run_id: str, step_number: int) -> Optional[
|
|
155
|
+
def get_response(self, run_id: str, step_number: int) -> Optional[NATSMessage]:
|
|
156
156
|
"""
|
|
157
157
|
Get the response for a pending command.
|
|
158
158
|
|
|
@@ -161,7 +161,7 @@ class ResponseHandler:
|
|
|
161
161
|
step_number: Step number for the command
|
|
162
162
|
|
|
163
163
|
Returns:
|
|
164
|
-
The NATSMessage
|
|
164
|
+
The NATSMessage if available, None otherwise
|
|
165
165
|
"""
|
|
166
166
|
key = f"{run_id}:{str(step_number)}"
|
|
167
167
|
if key in self._pending_responses:
|
|
@@ -343,6 +343,8 @@ class CommandService:
|
|
|
343
343
|
request: CommandRequest,
|
|
344
344
|
machine_id: str,
|
|
345
345
|
run_id: str,
|
|
346
|
+
user_id: str,
|
|
347
|
+
username: str,
|
|
346
348
|
timeout: int = 120
|
|
347
349
|
) -> Optional[NATSMessage]:
|
|
348
350
|
"""
|
|
@@ -352,6 +354,8 @@ class CommandService:
|
|
|
352
354
|
request: CommandRequest model containing command details
|
|
353
355
|
machine_id: Machine ID to send the command to
|
|
354
356
|
run_id: Run ID for the command
|
|
357
|
+
user_id: User ID who initiated the command
|
|
358
|
+
username: Username who initiated the command
|
|
355
359
|
timeout: Maximum time to wait for response in seconds
|
|
356
360
|
|
|
357
361
|
Returns:
|
|
@@ -364,8 +368,8 @@ class CommandService:
|
|
|
364
368
|
subject = f"{NAMESPACE}.{machine_id}.cmd.queue"
|
|
365
369
|
|
|
366
370
|
logger.info(
|
|
367
|
-
"Sending queue command:
|
|
368
|
-
|
|
371
|
+
"Sending queue command: subject=%s, command=%s, run_id=%s, step_number=%s",
|
|
372
|
+
subject, request.name, run_id, request.step_number
|
|
369
373
|
)
|
|
370
374
|
|
|
371
375
|
# Get or create response handler for this machine
|
|
@@ -374,7 +378,7 @@ class CommandService:
|
|
|
374
378
|
response_event = response_handler.register_pending(run_id, request.step_number)
|
|
375
379
|
|
|
376
380
|
# Build payload
|
|
377
|
-
payload = self._build_command_payload(request, machine_id, run_id)
|
|
381
|
+
payload = self._build_command_payload(request, machine_id, run_id, user_id, username)
|
|
378
382
|
|
|
379
383
|
try:
|
|
380
384
|
# Publish to JetStream
|
|
@@ -397,11 +401,7 @@ class CommandService:
|
|
|
397
401
|
await asyncio.sleep(0.1)
|
|
398
402
|
|
|
399
403
|
# Get the response
|
|
400
|
-
|
|
401
|
-
if response_data is None:
|
|
402
|
-
return None
|
|
403
|
-
|
|
404
|
-
return NATSMessage.model_validate(response_data)
|
|
404
|
+
return response_handler.get_response(run_id, request.step_number)
|
|
405
405
|
|
|
406
406
|
except Exception as e:
|
|
407
407
|
logger.error("Error sending queue command: %s", e)
|
|
@@ -414,6 +414,8 @@ class CommandService:
|
|
|
414
414
|
requests: list[CommandRequest],
|
|
415
415
|
machine_id: str,
|
|
416
416
|
run_id: str,
|
|
417
|
+
user_id: str,
|
|
418
|
+
username: str,
|
|
417
419
|
timeout: int = 120
|
|
418
420
|
) -> Optional[NATSMessage]:
|
|
419
421
|
"""
|
|
@@ -427,6 +429,8 @@ class CommandService:
|
|
|
427
429
|
requests: List of CommandRequest models to send sequentially
|
|
428
430
|
machine_id: Machine ID to send the commands to
|
|
429
431
|
run_id: Run ID for all commands
|
|
432
|
+
user_id: User ID who initiated the commands
|
|
433
|
+
username: Username who initiated the commands
|
|
430
434
|
timeout: Maximum time to wait for each response in seconds
|
|
431
435
|
|
|
432
436
|
Returns:
|
|
@@ -462,6 +466,8 @@ class CommandService:
|
|
|
462
466
|
request=request,
|
|
463
467
|
machine_id=machine_id,
|
|
464
468
|
run_id=run_id,
|
|
469
|
+
user_id=user_id,
|
|
470
|
+
username=username,
|
|
465
471
|
timeout=timeout
|
|
466
472
|
)
|
|
467
473
|
|
|
@@ -522,6 +528,8 @@ class CommandService:
|
|
|
522
528
|
request: CommandRequest,
|
|
523
529
|
machine_id: str,
|
|
524
530
|
run_id: str,
|
|
531
|
+
user_id: str,
|
|
532
|
+
username: str,
|
|
525
533
|
timeout: int = 120
|
|
526
534
|
) -> Optional[NATSMessage]:
|
|
527
535
|
"""
|
|
@@ -531,6 +539,8 @@ class CommandService:
|
|
|
531
539
|
request: CommandRequest model containing command details
|
|
532
540
|
machine_id: Machine ID to send the command to
|
|
533
541
|
run_id: Run ID for the command
|
|
542
|
+
user_id: User ID who initiated the command
|
|
543
|
+
username: Username who initiated the command
|
|
534
544
|
timeout: Maximum time to wait for response in seconds
|
|
535
545
|
|
|
536
546
|
Returns:
|
|
@@ -555,7 +565,7 @@ class CommandService:
|
|
|
555
565
|
response_received = response_handler.register_pending(run_id, request.step_number)
|
|
556
566
|
|
|
557
567
|
# Build payload
|
|
558
|
-
payload = self._build_command_payload(request, machine_id, run_id)
|
|
568
|
+
payload = self._build_command_payload(request, machine_id, run_id, user_id, username)
|
|
559
569
|
|
|
560
570
|
try:
|
|
561
571
|
# Publish to JetStream
|
|
@@ -578,11 +588,7 @@ class CommandService:
|
|
|
578
588
|
await asyncio.sleep(0.1)
|
|
579
589
|
|
|
580
590
|
# Get the response
|
|
581
|
-
|
|
582
|
-
if response_data is None:
|
|
583
|
-
return None
|
|
584
|
-
|
|
585
|
-
return NATSMessage.model_validate(response_data)
|
|
591
|
+
return response_handler.get_response(run_id, request.step_number)
|
|
586
592
|
|
|
587
593
|
except Exception as e:
|
|
588
594
|
logger.error("Error sending immediate command: %s", e)
|
|
@@ -635,7 +641,9 @@ class CommandService:
|
|
|
635
641
|
self,
|
|
636
642
|
command_request: CommandRequest,
|
|
637
643
|
machine_id: str,
|
|
638
|
-
run_id: str
|
|
644
|
+
run_id: str,
|
|
645
|
+
user_id: str,
|
|
646
|
+
username: str
|
|
639
647
|
) -> NATSMessage:
|
|
640
648
|
"""
|
|
641
649
|
Build a command payload in the expected format.
|
|
@@ -644,6 +652,8 @@ class CommandService:
|
|
|
644
652
|
command_request: CommandRequest model containing command details
|
|
645
653
|
machine_id: Machine ID for the command
|
|
646
654
|
run_id: Run ID for the command
|
|
655
|
+
user_id: User ID who initiated the command
|
|
656
|
+
username: Username who initiated the command
|
|
647
657
|
|
|
648
658
|
Returns:
|
|
649
659
|
NATSMessage object ready for NATS transmission
|
|
@@ -652,6 +662,8 @@ class CommandService:
|
|
|
652
662
|
message_type=MessageType.COMMAND,
|
|
653
663
|
version="1.0",
|
|
654
664
|
timestamp=datetime.now(timezone.utc).strftime('%Y-%m-%dT%H:%M:%SZ'),
|
|
665
|
+
user_id=user_id,
|
|
666
|
+
username=username,
|
|
655
667
|
machine_id=machine_id,
|
|
656
668
|
run_id=run_id
|
|
657
669
|
)
|
puda_comms/machine_client.py
CHANGED
|
@@ -20,7 +20,7 @@ from puda_comms.models import (
|
|
|
20
20
|
ImmediateCommand,
|
|
21
21
|
)
|
|
22
22
|
from nats.js.client import JetStreamContext
|
|
23
|
-
from nats.js.api import StreamConfig
|
|
23
|
+
from nats.js.api import StreamConfig, ConsumerConfig
|
|
24
24
|
from nats.js.errors import NotFoundError
|
|
25
25
|
from nats.aio.msg import Msg
|
|
26
26
|
|
|
@@ -69,11 +69,13 @@ class MachineClient:
|
|
|
69
69
|
|
|
70
70
|
# Default subscriptions
|
|
71
71
|
self._cmd_queue_sub = None
|
|
72
|
+
self._cmd_queue_task = None # Background task for pull consumer
|
|
72
73
|
self._cmd_immediate_sub = None
|
|
73
74
|
|
|
74
75
|
# Connection state
|
|
75
76
|
self._is_connected = False
|
|
76
|
-
self.
|
|
77
|
+
self._queue_handler = None
|
|
78
|
+
self._immediate_handler = None
|
|
77
79
|
|
|
78
80
|
# Queue control state
|
|
79
81
|
self._pause_lock = asyncio.Lock()
|
|
@@ -184,30 +186,22 @@ class MachineClient:
|
|
|
184
186
|
logger.error("Error ensuring %s stream: %s", stream_name, e, exc_info=True)
|
|
185
187
|
raise
|
|
186
188
|
|
|
187
|
-
async def
|
|
188
|
-
"""Ensure
|
|
189
|
+
async def _ensure_all_streams(self):
|
|
190
|
+
"""Ensure all required streams exist with correct retention policies."""
|
|
189
191
|
await self._ensure_stream(
|
|
190
192
|
self.STREAM_COMMAND_QUEUE,
|
|
191
|
-
f"{self.NAMESPACE}.*.cmd.queue"
|
|
193
|
+
f"{self.NAMESPACE}.*.cmd.queue",
|
|
194
|
+
retention='workqueue'
|
|
192
195
|
)
|
|
193
|
-
|
|
194
|
-
async def _ensure_command_immediate_stream(self):
|
|
195
|
-
"""Ensure COMMAND_IMMEDIATE stream exists with WorkQueue retention policy."""
|
|
196
196
|
await self._ensure_stream(
|
|
197
197
|
self.STREAM_COMMAND_IMMEDIATE,
|
|
198
198
|
f"{self.NAMESPACE}.*.cmd.immediate"
|
|
199
199
|
)
|
|
200
|
-
|
|
201
|
-
async def _ensure_response_queue_stream(self):
|
|
202
|
-
"""Ensure RESPONSE_QUEUE stream exists with Interest retention policy."""
|
|
203
200
|
await self._ensure_stream(
|
|
204
201
|
self.STREAM_RESPONSE_QUEUE,
|
|
205
202
|
f"{self.NAMESPACE}.*.cmd.response.queue",
|
|
206
203
|
retention='interest'
|
|
207
204
|
)
|
|
208
|
-
|
|
209
|
-
async def _ensure_response_immediate_stream(self):
|
|
210
|
-
"""Ensure RESPONSE_IMMEDIATE stream exists with Interest retention policy."""
|
|
211
205
|
await self._ensure_stream(
|
|
212
206
|
self.STREAM_RESPONSE_IMMEDIATE,
|
|
213
207
|
f"{self.NAMESPACE}.*.cmd.response.immediate",
|
|
@@ -230,7 +224,17 @@ class MachineClient:
|
|
|
230
224
|
|
|
231
225
|
async def _cleanup_subscriptions(self):
|
|
232
226
|
"""Unsubscribe from all subscriptions."""
|
|
233
|
-
# Clean up
|
|
227
|
+
# Clean up queue subscription (pull consumer)
|
|
228
|
+
if self._cmd_queue_task:
|
|
229
|
+
try:
|
|
230
|
+
self._cmd_queue_task.cancel()
|
|
231
|
+
await self._cmd_queue_task
|
|
232
|
+
except asyncio.CancelledError:
|
|
233
|
+
pass
|
|
234
|
+
except Exception:
|
|
235
|
+
pass
|
|
236
|
+
self._cmd_queue_task = None
|
|
237
|
+
|
|
234
238
|
if self._cmd_queue_sub:
|
|
235
239
|
try:
|
|
236
240
|
await self._cmd_queue_sub.unsubscribe()
|
|
@@ -252,6 +256,7 @@ class MachineClient:
|
|
|
252
256
|
self.kv = None
|
|
253
257
|
# Subscriptions will be recreated on reconnection
|
|
254
258
|
self._cmd_queue_sub = None
|
|
259
|
+
self._cmd_queue_task = None
|
|
255
260
|
self._cmd_immediate_sub = None
|
|
256
261
|
|
|
257
262
|
# ==================== CONNECTION MANAGEMENT ====================
|
|
@@ -270,10 +275,7 @@ class MachineClient:
|
|
|
270
275
|
closed_cb=self._closed_callback
|
|
271
276
|
)
|
|
272
277
|
self.js = self.nc.jetstream()
|
|
273
|
-
await self.
|
|
274
|
-
await self._ensure_command_immediate_stream()
|
|
275
|
-
await self._ensure_response_queue_stream()
|
|
276
|
-
await self._ensure_response_immediate_stream()
|
|
278
|
+
await self._ensure_all_streams()
|
|
277
279
|
self.kv = await self._get_or_create_kv_bucket()
|
|
278
280
|
self._is_connected = True
|
|
279
281
|
logger.info("Connected to NATS servers: %s", self.servers)
|
|
@@ -299,32 +301,16 @@ class MachineClient:
|
|
|
299
301
|
|
|
300
302
|
if self.nc:
|
|
301
303
|
self.js = self.nc.jetstream()
|
|
302
|
-
await self.
|
|
303
|
-
await self._ensure_command_immediate_stream()
|
|
304
|
-
await self._ensure_response_queue_stream()
|
|
305
|
-
await self._ensure_response_immediate_stream()
|
|
304
|
+
await self._ensure_all_streams()
|
|
306
305
|
self.kv = await self._get_or_create_kv_bucket()
|
|
307
306
|
await self._resubscribe_handlers()
|
|
308
307
|
|
|
309
308
|
async def _resubscribe_handlers(self):
|
|
310
309
|
"""Re-subscribe to all handlers after reconnection."""
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
for handler_info in self._reconnect_handlers:
|
|
317
|
-
try:
|
|
318
|
-
handler_type = handler_info['type']
|
|
319
|
-
handler = handler_info['handler']
|
|
320
|
-
subscribe_method = subscribe_methods.get(handler_type)
|
|
321
|
-
|
|
322
|
-
if subscribe_method:
|
|
323
|
-
await subscribe_method(handler)
|
|
324
|
-
else:
|
|
325
|
-
logger.warning("Unknown handler type: %s", handler_type)
|
|
326
|
-
except Exception as e:
|
|
327
|
-
logger.error("Failed to re-subscribe %s: %s", handler_type, e)
|
|
310
|
+
if self._queue_handler:
|
|
311
|
+
await self.subscribe_queue(self._queue_handler)
|
|
312
|
+
if self._immediate_handler:
|
|
313
|
+
await self.subscribe_immediate(self._immediate_handler)
|
|
328
314
|
|
|
329
315
|
async def _closed_callback(self):
|
|
330
316
|
"""Callback when connection is closed."""
|
|
@@ -439,7 +425,7 @@ class MachineClient:
|
|
|
439
425
|
async def process_queue_cmd(
|
|
440
426
|
self,
|
|
441
427
|
msg: Msg,
|
|
442
|
-
handler: Callable[[
|
|
428
|
+
handler: Callable[[NATSMessage], Awaitable[CommandResponse]]
|
|
443
429
|
) -> None:
|
|
444
430
|
"""
|
|
445
431
|
Handle the lifecycle of a single message: Parse -> Handle -> Ack/Nak/Term.
|
|
@@ -661,9 +647,54 @@ class MachineClient:
|
|
|
661
647
|
)
|
|
662
648
|
await self.publish_state({'state': 'error', 'run_id': None})
|
|
663
649
|
|
|
650
|
+
async def _verify_or_recreate_consumer(self, durable_name: str):
|
|
651
|
+
"""
|
|
652
|
+
Check if consumer exists and verify/update its configuration.
|
|
653
|
+
Deletes and recreates the consumer if configuration doesn't match.
|
|
654
|
+
|
|
655
|
+
Args:
|
|
656
|
+
durable_name: Name of the durable consumer to verify
|
|
657
|
+
"""
|
|
658
|
+
# Check if consumer exists and verify/update its configuration
|
|
659
|
+
try:
|
|
660
|
+
consumer_info = await self.js.consumer_info(self.STREAM_COMMAND_QUEUE, durable_name)
|
|
661
|
+
logger.debug("Durable consumer %s already exists", durable_name)
|
|
662
|
+
|
|
663
|
+
# Check if consumer config matches what we need
|
|
664
|
+
config = consumer_info.config
|
|
665
|
+
needs_recreate = False
|
|
666
|
+
if getattr(config, 'filter_subject', None) != self.cmd_queue:
|
|
667
|
+
logger.warning("Consumer filter_subject mismatch: expected %s, got %s",
|
|
668
|
+
self.cmd_queue, getattr(config, 'filter_subject', None))
|
|
669
|
+
needs_recreate = True
|
|
670
|
+
if getattr(config, 'ack_policy', None) != 'explicit':
|
|
671
|
+
logger.warning("Consumer ack_policy mismatch: expected explicit, got %s",
|
|
672
|
+
getattr(config, 'ack_policy', None))
|
|
673
|
+
needs_recreate = True
|
|
674
|
+
if getattr(config, 'deliver_policy', None) != 'all':
|
|
675
|
+
logger.warning("Consumer deliver_policy mismatch: expected all, got %s",
|
|
676
|
+
getattr(config, 'deliver_policy', None))
|
|
677
|
+
needs_recreate = True
|
|
678
|
+
|
|
679
|
+
if needs_recreate:
|
|
680
|
+
# Consumer exists but config doesn't match - delete and recreate
|
|
681
|
+
logger.info("Consumer config mismatch, deleting and recreating: %s", durable_name)
|
|
682
|
+
try:
|
|
683
|
+
await self.js.delete_consumer(self.STREAM_COMMAND_QUEUE, durable_name)
|
|
684
|
+
except Exception as e:
|
|
685
|
+
logger.warning("Error deleting consumer: %s", e)
|
|
686
|
+
else:
|
|
687
|
+
# Log consumer state for diagnostics
|
|
688
|
+
logger.info("Consumer exists with correct config - pending: %d, delivered: %d, ack_pending: %d",
|
|
689
|
+
consumer_info.num_pending, consumer_info.delivered.consumer_seq,
|
|
690
|
+
consumer_info.num_ack_pending)
|
|
691
|
+
except NotFoundError:
|
|
692
|
+
# Consumer doesn't exist, will be created by pull_subscribe
|
|
693
|
+
logger.debug("Durable consumer %s does not exist, will be created", durable_name)
|
|
694
|
+
|
|
664
695
|
async def subscribe_queue(self, handler: Callable[[NATSMessage], Awaitable[CommandResponse]]):
|
|
665
696
|
"""
|
|
666
|
-
Subscribe to queue commands with
|
|
697
|
+
Subscribe to queue commands with pull consumer.
|
|
667
698
|
|
|
668
699
|
Args:
|
|
669
700
|
handler: Async function that processes command payloads and returns CommandResponse
|
|
@@ -673,19 +704,65 @@ class MachineClient:
|
|
|
673
704
|
return
|
|
674
705
|
|
|
675
706
|
# Ensure stream exists before attempting to subscribe
|
|
676
|
-
await self.
|
|
707
|
+
await self._ensure_all_streams()
|
|
677
708
|
|
|
678
709
|
try:
|
|
679
|
-
|
|
680
|
-
|
|
681
|
-
|
|
682
|
-
|
|
683
|
-
|
|
710
|
+
durable_name = f"cmd_queue_{self.machine_id}"
|
|
711
|
+
|
|
712
|
+
await self._verify_or_recreate_consumer(durable_name)
|
|
713
|
+
|
|
714
|
+
# Create pull subscription - this will create the consumer if it doesn't exist
|
|
715
|
+
# Pass config directly to ensure correct consumer configuration
|
|
716
|
+
consumer_config = ConsumerConfig(
|
|
717
|
+
durable_name=durable_name,
|
|
718
|
+
filter_subject=self.cmd_queue,
|
|
719
|
+
ack_policy="explicit",
|
|
720
|
+
deliver_policy="all", # Required for WorkQueue: deliver all messages from the beginning
|
|
721
|
+
)
|
|
722
|
+
|
|
723
|
+
self._cmd_queue_sub = await self.js.pull_subscribe(
|
|
684
724
|
subject=self.cmd_queue,
|
|
725
|
+
durable=durable_name,
|
|
685
726
|
stream=self.STREAM_COMMAND_QUEUE,
|
|
686
|
-
|
|
687
|
-
cb=message_handler
|
|
727
|
+
config=consumer_config
|
|
688
728
|
)
|
|
729
|
+
|
|
730
|
+
# Log final consumer info for diagnostics
|
|
731
|
+
try:
|
|
732
|
+
consumer_info = await self.js.consumer_info(self.STREAM_COMMAND_QUEUE, durable_name)
|
|
733
|
+
logger.info("Pull subscription created - subject: %s, durable: %s, stream: %s, pending: %d, ack_pending: %d",
|
|
734
|
+
self.cmd_queue, durable_name, self.STREAM_COMMAND_QUEUE,
|
|
735
|
+
consumer_info.num_pending, consumer_info.num_ack_pending)
|
|
736
|
+
except Exception as e:
|
|
737
|
+
logger.warning("Could not get consumer info after subscription: %s", e)
|
|
738
|
+
logger.info("Pull subscription created - subject: %s, durable: %s, stream: %s",
|
|
739
|
+
self.cmd_queue, durable_name, self.STREAM_COMMAND_QUEUE)
|
|
740
|
+
|
|
741
|
+
# Start background task to pull and process messages
|
|
742
|
+
async def pull_messages():
|
|
743
|
+
"""Continuously pull messages from the queue."""
|
|
744
|
+
try:
|
|
745
|
+
while True:
|
|
746
|
+
try:
|
|
747
|
+
# Fetch messages (batch of 1, timeout 1 second)
|
|
748
|
+
msgs = await self._cmd_queue_sub.fetch(batch=1, timeout=1.0)
|
|
749
|
+
if msgs:
|
|
750
|
+
logger.debug("Pulled %d message(s) from queue", len(msgs))
|
|
751
|
+
for msg in msgs:
|
|
752
|
+
await self.process_queue_cmd(msg, handler)
|
|
753
|
+
except asyncio.TimeoutError:
|
|
754
|
+
# Timeout is expected when no messages are available
|
|
755
|
+
continue
|
|
756
|
+
except Exception as e:
|
|
757
|
+
logger.error("Error pulling queue messages: %s", e, exc_info=True)
|
|
758
|
+
await asyncio.sleep(1) # Wait before retrying
|
|
759
|
+
except asyncio.CancelledError:
|
|
760
|
+
logger.debug("Queue pull task cancelled")
|
|
761
|
+
raise
|
|
762
|
+
|
|
763
|
+
self._cmd_queue_task = asyncio.create_task(pull_messages())
|
|
764
|
+
logger.info("Started background task for pulling queue messages")
|
|
765
|
+
|
|
689
766
|
except NotFoundError:
|
|
690
767
|
# Stream still not found after ensuring it exists - this shouldn't happen
|
|
691
768
|
# but handle it gracefully with detailed diagnostics
|
|
@@ -703,10 +780,9 @@ class MachineClient:
|
|
|
703
780
|
logger.error(" Stream verification failed: %s", stream_check_error)
|
|
704
781
|
raise
|
|
705
782
|
|
|
706
|
-
#
|
|
707
|
-
|
|
708
|
-
|
|
709
|
-
logger.info("Subscribed to queue commands: %s (durable: cmd_queue_%s, stream: %s)",
|
|
783
|
+
# Store handler for reconnection
|
|
784
|
+
self._queue_handler = handler
|
|
785
|
+
logger.info("Subscribed to queue commands: %s (durable: cmd_queue_%s, stream: %s, pull consumer)",
|
|
710
786
|
self.cmd_queue, self.machine_id, self.STREAM_COMMAND_QUEUE)
|
|
711
787
|
|
|
712
788
|
async def subscribe_immediate(self, handler: Callable[[NATSMessage], Awaitable[CommandResponse]]):
|
|
@@ -720,19 +796,26 @@ class MachineClient:
|
|
|
720
796
|
logger.error("JetStream not available for immediate subscription")
|
|
721
797
|
return
|
|
722
798
|
|
|
799
|
+
# Store handler for use in callback and reconnection
|
|
800
|
+
self._immediate_handler = handler
|
|
801
|
+
|
|
723
802
|
async def message_handler(msg: Msg):
|
|
724
|
-
"""
|
|
725
|
-
await self.process_immediate_cmd(msg,
|
|
803
|
+
"""Process immediate messages using stored handler."""
|
|
804
|
+
await self.process_immediate_cmd(msg, self._immediate_handler)
|
|
726
805
|
|
|
727
806
|
# Ensure stream exists before attempting to subscribe
|
|
728
|
-
await self.
|
|
807
|
+
await self._ensure_stream(
|
|
808
|
+
self.STREAM_COMMAND_IMMEDIATE,
|
|
809
|
+
f"{self.NAMESPACE}.*.cmd.immediate",
|
|
810
|
+
retention='workqueue'
|
|
811
|
+
)
|
|
729
812
|
|
|
730
813
|
try:
|
|
731
814
|
self._cmd_immediate_sub = await self.js.subscribe(
|
|
732
815
|
subject=self.cmd_immediate,
|
|
733
816
|
stream=self.STREAM_COMMAND_IMMEDIATE,
|
|
734
817
|
durable=f"cmd_immed_{self.machine_id}",
|
|
735
|
-
cb=message_handler
|
|
818
|
+
cb=message_handler # required for push consumer to handle messages
|
|
736
819
|
)
|
|
737
820
|
except NotFoundError:
|
|
738
821
|
# Stream still not found after ensuring it exists - this shouldn't happen
|
|
@@ -741,9 +824,6 @@ class MachineClient:
|
|
|
741
824
|
self.STREAM_COMMAND_IMMEDIATE)
|
|
742
825
|
raise
|
|
743
826
|
|
|
744
|
-
# Register handler for reconnection
|
|
745
|
-
if not any(h['type'] == 'immediate' for h in self._reconnect_handlers):
|
|
746
|
-
self._reconnect_handlers.append({'type': 'immediate', 'handler': handler})
|
|
747
827
|
logger.info("Subscribed to immediate commands: %s (durable: cmd_immed_%s, stream: %s)",
|
|
748
828
|
self.cmd_immediate, self.machine_id, self.STREAM_COMMAND_IMMEDIATE)
|
|
749
829
|
|
puda_comms/models.py
CHANGED
|
@@ -68,18 +68,19 @@ class CommandResponse(BaseModel):
|
|
|
68
68
|
|
|
69
69
|
class MessageHeader(BaseModel):
|
|
70
70
|
"""Header for NATS messages."""
|
|
71
|
-
message_type: MessageType = Field(description="Type of message")
|
|
72
71
|
version: str = Field(default="1.0", description="Message version")
|
|
73
|
-
|
|
72
|
+
message_type: MessageType = Field(description="Type of message")
|
|
73
|
+
user_id: str = Field(description="User ID")
|
|
74
|
+
username: str = Field(description="User name")
|
|
74
75
|
machine_id: str = Field(description="Machine ID")
|
|
75
76
|
run_id: Optional[str] = Field(default=None, description="Unique identifier (uuid) for the run/workflow")
|
|
76
|
-
|
|
77
|
+
timestamp: str = Field(default_factory=_get_current_timestamp, description="ISO format timestamp (auto-set on creation)")
|
|
77
78
|
class NATSMessage(BaseModel):
|
|
78
79
|
"""
|
|
79
80
|
Complete NATS message structure.
|
|
80
81
|
|
|
81
82
|
Structure:
|
|
82
|
-
- header: MessageHeader with message_type, version, timestamp, machine_id, run_id
|
|
83
|
+
- header: MessageHeader with message_type, version, timestamp, user_id, username, machine_id, run_id
|
|
83
84
|
- command: Optional CommandRequest (for command messages)
|
|
84
85
|
- response: Optional CommandResponse data (for response messages)
|
|
85
86
|
"""
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: puda-comms
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.4
|
|
4
4
|
Summary: Communication library for the PUDA platform.
|
|
5
5
|
Author: zhao
|
|
6
6
|
Author-email: zhao <20024592+agentzhao@users.noreply.github.com>
|
|
@@ -121,6 +121,8 @@ Header metadata for NATS messages.
|
|
|
121
121
|
- `message_type` (MessageType): Type of message (COMMAND, RESPONSE, LOG, etc.)
|
|
122
122
|
- `version` (str): Message version (default: "1.0")
|
|
123
123
|
- `timestamp` (str): ISO 8601 UTC timestamp (auto-generated)
|
|
124
|
+
- `user_id` (str): User ID who initiated the command
|
|
125
|
+
- `username` (str): Username who initiated the command
|
|
124
126
|
- `machine_id` (str): Identifier for the target machine
|
|
125
127
|
- `run_id` (Optional[str]): Unique identifier (UUID) for the run/workflow
|
|
126
128
|
|
|
@@ -130,6 +132,8 @@ header = MessageHeader(
|
|
|
130
132
|
message_type=MessageType.RESPONSE,
|
|
131
133
|
version="1.0",
|
|
132
134
|
timestamp="2026-01-20T02:00:46Z",
|
|
135
|
+
user_id="user123",
|
|
136
|
+
username="John Doe",
|
|
133
137
|
machine_id="first",
|
|
134
138
|
run_id="092073e6-13d0-4756-8d99-eff1612a5a72"
|
|
135
139
|
)
|
|
@@ -154,6 +158,8 @@ Complete NATS message structure combining header with optional command or respon
|
|
|
154
158
|
"message_type": "response",
|
|
155
159
|
"version": "1.0",
|
|
156
160
|
"timestamp": "2026-01-20T02:00:46Z",
|
|
161
|
+
"user_id": "user123",
|
|
162
|
+
"username": "John Doe",
|
|
157
163
|
"machine_id": "first",
|
|
158
164
|
"run_id": "092073e6-13d0-4756-8d99-eff1612a5a72"
|
|
159
165
|
},
|
|
@@ -229,6 +235,8 @@ reply = await service.send_queue_command(
|
|
|
229
235
|
request=request,
|
|
230
236
|
machine_id="first",
|
|
231
237
|
run_id=run_id,
|
|
238
|
+
user_id="user123",
|
|
239
|
+
username="John Doe",
|
|
232
240
|
timeout=60 # Wait up to 60 seconds
|
|
233
241
|
)
|
|
234
242
|
|
|
@@ -237,6 +245,8 @@ reply = await service.send_queue_commands(
|
|
|
237
245
|
requests=commands,
|
|
238
246
|
machine_id="first",
|
|
239
247
|
run_id=run_id,
|
|
248
|
+
user_id="user123",
|
|
249
|
+
username="John Doe",
|
|
240
250
|
timeout=60 # Wait up to 60 seconds per command
|
|
241
251
|
)
|
|
242
252
|
```
|
|
@@ -274,7 +284,9 @@ Always check the response status and handle errors appropriately:
|
|
|
274
284
|
reply: NATSMessage = await service.send_queue_command(
|
|
275
285
|
request=request,
|
|
276
286
|
machine_id="first",
|
|
277
|
-
run_id=run_id
|
|
287
|
+
run_id=run_id,
|
|
288
|
+
user_id="user123",
|
|
289
|
+
username="John Doe"
|
|
278
290
|
)
|
|
279
291
|
|
|
280
292
|
if reply is None:
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
puda_comms/__init__.py,sha256=lntvVFJJez_rv5lZy5mYj4_43B9Y3NRNzxWfBuSAQ1M,194
|
|
2
|
+
puda_comms/command_service.py,sha256=KFremcEGfsTeUVQMIhyk1knYmUCvRYQ12vS_jy_14wA,25193
|
|
3
|
+
puda_comms/execution_state.py,sha256=aTaejCnJgg1y_FP-ymIC1GQzqC81FIWo0RZ18XzAQnA,2881
|
|
4
|
+
puda_comms/machine_client.py,sha256=wj6t_QHGs7l1Oc8JQ6hq2hqBd5C14TCPA_dTU9qOLzw,37430
|
|
5
|
+
puda_comms/models.py,sha256=9ZGX0PR7SgMBOL5zVLrPuSUhZqutQU96PubyjyQLhf8,3617
|
|
6
|
+
puda_comms-0.0.4.dist-info/WHEEL,sha256=ZyFSCYkV2BrxH6-HRVRg3R9Fo7MALzer9KiPYqNxSbo,79
|
|
7
|
+
puda_comms-0.0.4.dist-info/METADATA,sha256=0cMHDub_3NZt7Cj5U1jzrQXI8atQqpMM-i3vSMrT5lo,11512
|
|
8
|
+
puda_comms-0.0.4.dist-info/RECORD,,
|
|
@@ -1,8 +0,0 @@
|
|
|
1
|
-
puda_comms/__init__.py,sha256=lntvVFJJez_rv5lZy5mYj4_43B9Y3NRNzxWfBuSAQ1M,194
|
|
2
|
-
puda_comms/command_service.py,sha256=E5kGzl2hjkSTubxv01nxuo9XMXHY5aTEsn-k3IDJVB8,24727
|
|
3
|
-
puda_comms/execution_state.py,sha256=aTaejCnJgg1y_FP-ymIC1GQzqC81FIWo0RZ18XzAQnA,2881
|
|
4
|
-
puda_comms/machine_client.py,sha256=r8oSnkRoqhKykvyR94kGlA1vRrCKLq-o9uNZQftxqDU,33120
|
|
5
|
-
puda_comms/models.py,sha256=cVH5uKzyLmjzPeBcm3RIJMTkoynmxqe_P26GtZwlIN8,3500
|
|
6
|
-
puda_comms-0.0.3.dist-info/WHEEL,sha256=ZyFSCYkV2BrxH6-HRVRg3R9Fo7MALzer9KiPYqNxSbo,79
|
|
7
|
-
puda_comms-0.0.3.dist-info/METADATA,sha256=Fnf_YWeOZAcefPUTY976BUT95M0w-8bSqAhjVMkmjxA,11158
|
|
8
|
-
puda_comms-0.0.3.dist-info/RECORD,,
|
|
File without changes
|