puda-comms 0.0.3__tar.gz → 0.0.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {puda_comms-0.0.3 → puda_comms-0.0.5}/PKG-INFO +14 -2
- {puda_comms-0.0.3 → puda_comms-0.0.5}/README.md +13 -1
- {puda_comms-0.0.3 → puda_comms-0.0.5}/pyproject.toml +12 -1
- {puda_comms-0.0.3 → puda_comms-0.0.5}/src/puda_comms/command_service.py +233 -86
- {puda_comms-0.0.3 → puda_comms-0.0.5}/src/puda_comms/machine_client.py +269 -137
- {puda_comms-0.0.3 → puda_comms-0.0.5}/src/puda_comms/models.py +8 -4
- puda_comms-0.0.5/src/puda_comms/run_manager.py +112 -0
- {puda_comms-0.0.3 → puda_comms-0.0.5}/src/puda_comms/__init__.py +0 -0
- {puda_comms-0.0.3 → puda_comms-0.0.5}/src/puda_comms/execution_state.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: puda-comms
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.5
|
|
4
4
|
Summary: Communication library for the PUDA platform.
|
|
5
5
|
Author: zhao
|
|
6
6
|
Author-email: zhao <20024592+agentzhao@users.noreply.github.com>
|
|
@@ -121,6 +121,8 @@ Header metadata for NATS messages.
|
|
|
121
121
|
- `message_type` (MessageType): Type of message (COMMAND, RESPONSE, LOG, etc.)
|
|
122
122
|
- `version` (str): Message version (default: "1.0")
|
|
123
123
|
- `timestamp` (str): ISO 8601 UTC timestamp (auto-generated)
|
|
124
|
+
- `user_id` (str): User ID who initiated the command
|
|
125
|
+
- `username` (str): Username who initiated the command
|
|
124
126
|
- `machine_id` (str): Identifier for the target machine
|
|
125
127
|
- `run_id` (Optional[str]): Unique identifier (UUID) for the run/workflow
|
|
126
128
|
|
|
@@ -130,6 +132,8 @@ header = MessageHeader(
|
|
|
130
132
|
message_type=MessageType.RESPONSE,
|
|
131
133
|
version="1.0",
|
|
132
134
|
timestamp="2026-01-20T02:00:46Z",
|
|
135
|
+
user_id="user123",
|
|
136
|
+
username="John Doe",
|
|
133
137
|
machine_id="first",
|
|
134
138
|
run_id="092073e6-13d0-4756-8d99-eff1612a5a72"
|
|
135
139
|
)
|
|
@@ -154,6 +158,8 @@ Complete NATS message structure combining header with optional command or respon
|
|
|
154
158
|
"message_type": "response",
|
|
155
159
|
"version": "1.0",
|
|
156
160
|
"timestamp": "2026-01-20T02:00:46Z",
|
|
161
|
+
"user_id": "user123",
|
|
162
|
+
"username": "John Doe",
|
|
157
163
|
"machine_id": "first",
|
|
158
164
|
"run_id": "092073e6-13d0-4756-8d99-eff1612a5a72"
|
|
159
165
|
},
|
|
@@ -229,6 +235,8 @@ reply = await service.send_queue_command(
|
|
|
229
235
|
request=request,
|
|
230
236
|
machine_id="first",
|
|
231
237
|
run_id=run_id,
|
|
238
|
+
user_id="user123",
|
|
239
|
+
username="John Doe",
|
|
232
240
|
timeout=60 # Wait up to 60 seconds
|
|
233
241
|
)
|
|
234
242
|
|
|
@@ -237,6 +245,8 @@ reply = await service.send_queue_commands(
|
|
|
237
245
|
requests=commands,
|
|
238
246
|
machine_id="first",
|
|
239
247
|
run_id=run_id,
|
|
248
|
+
user_id="user123",
|
|
249
|
+
username="John Doe",
|
|
240
250
|
timeout=60 # Wait up to 60 seconds per command
|
|
241
251
|
)
|
|
242
252
|
```
|
|
@@ -274,7 +284,9 @@ Always check the response status and handle errors appropriately:
|
|
|
274
284
|
reply: NATSMessage = await service.send_queue_command(
|
|
275
285
|
request=request,
|
|
276
286
|
machine_id="first",
|
|
277
|
-
run_id=run_id
|
|
287
|
+
run_id=run_id,
|
|
288
|
+
user_id="user123",
|
|
289
|
+
username="John Doe"
|
|
278
290
|
)
|
|
279
291
|
|
|
280
292
|
if reply is None:
|
|
@@ -109,6 +109,8 @@ Header metadata for NATS messages.
|
|
|
109
109
|
- `message_type` (MessageType): Type of message (COMMAND, RESPONSE, LOG, etc.)
|
|
110
110
|
- `version` (str): Message version (default: "1.0")
|
|
111
111
|
- `timestamp` (str): ISO 8601 UTC timestamp (auto-generated)
|
|
112
|
+
- `user_id` (str): User ID who initiated the command
|
|
113
|
+
- `username` (str): Username who initiated the command
|
|
112
114
|
- `machine_id` (str): Identifier for the target machine
|
|
113
115
|
- `run_id` (Optional[str]): Unique identifier (UUID) for the run/workflow
|
|
114
116
|
|
|
@@ -118,6 +120,8 @@ header = MessageHeader(
|
|
|
118
120
|
message_type=MessageType.RESPONSE,
|
|
119
121
|
version="1.0",
|
|
120
122
|
timestamp="2026-01-20T02:00:46Z",
|
|
123
|
+
user_id="user123",
|
|
124
|
+
username="John Doe",
|
|
121
125
|
machine_id="first",
|
|
122
126
|
run_id="092073e6-13d0-4756-8d99-eff1612a5a72"
|
|
123
127
|
)
|
|
@@ -142,6 +146,8 @@ Complete NATS message structure combining header with optional command or respon
|
|
|
142
146
|
"message_type": "response",
|
|
143
147
|
"version": "1.0",
|
|
144
148
|
"timestamp": "2026-01-20T02:00:46Z",
|
|
149
|
+
"user_id": "user123",
|
|
150
|
+
"username": "John Doe",
|
|
145
151
|
"machine_id": "first",
|
|
146
152
|
"run_id": "092073e6-13d0-4756-8d99-eff1612a5a72"
|
|
147
153
|
},
|
|
@@ -217,6 +223,8 @@ reply = await service.send_queue_command(
|
|
|
217
223
|
request=request,
|
|
218
224
|
machine_id="first",
|
|
219
225
|
run_id=run_id,
|
|
226
|
+
user_id="user123",
|
|
227
|
+
username="John Doe",
|
|
220
228
|
timeout=60 # Wait up to 60 seconds
|
|
221
229
|
)
|
|
222
230
|
|
|
@@ -225,6 +233,8 @@ reply = await service.send_queue_commands(
|
|
|
225
233
|
requests=commands,
|
|
226
234
|
machine_id="first",
|
|
227
235
|
run_id=run_id,
|
|
236
|
+
user_id="user123",
|
|
237
|
+
username="John Doe",
|
|
228
238
|
timeout=60 # Wait up to 60 seconds per command
|
|
229
239
|
)
|
|
230
240
|
```
|
|
@@ -262,7 +272,9 @@ Always check the response status and handle errors appropriately:
|
|
|
262
272
|
reply: NATSMessage = await service.send_queue_command(
|
|
263
273
|
request=request,
|
|
264
274
|
machine_id="first",
|
|
265
|
-
run_id=run_id
|
|
275
|
+
run_id=run_id,
|
|
276
|
+
user_id="user123",
|
|
277
|
+
username="John Doe"
|
|
266
278
|
)
|
|
267
279
|
|
|
268
280
|
if reply is None:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "puda-comms"
|
|
3
|
-
version = "0.0.
|
|
3
|
+
version = "0.0.5"
|
|
4
4
|
description = "Communication library for the PUDA platform."
|
|
5
5
|
readme = "README.md"
|
|
6
6
|
authors = [
|
|
@@ -17,6 +17,17 @@ dependencies = [
|
|
|
17
17
|
[tool.uv.sources]
|
|
18
18
|
puda-drivers = {workspace = true}
|
|
19
19
|
|
|
20
|
+
[tool.ruff]
|
|
21
|
+
line-length = 100
|
|
22
|
+
|
|
23
|
+
[tool.ruff.lint.mccabe]
|
|
24
|
+
max-complexity = 10
|
|
25
|
+
|
|
20
26
|
[build-system]
|
|
21
27
|
requires = ["uv_build>=0.9.18,<0.10.0"]
|
|
22
28
|
build-backend = "uv_build"
|
|
29
|
+
|
|
30
|
+
[dependency-groups]
|
|
31
|
+
dev = [
|
|
32
|
+
"ruff>=0.14.13",
|
|
33
|
+
]
|
|
@@ -12,11 +12,17 @@ import json
|
|
|
12
12
|
import logging
|
|
13
13
|
import signal
|
|
14
14
|
from datetime import datetime, timezone
|
|
15
|
-
from typing import Dict, Any, Optional
|
|
15
|
+
from typing import Dict, Any, Optional
|
|
16
16
|
import nats
|
|
17
17
|
from nats.js.client import JetStreamContext
|
|
18
18
|
from nats.aio.msg import Msg
|
|
19
|
-
from puda_comms.models import
|
|
19
|
+
from puda_comms.models import (
|
|
20
|
+
CommandRequest,
|
|
21
|
+
CommandResponseStatus,
|
|
22
|
+
NATSMessage,
|
|
23
|
+
MessageHeader,
|
|
24
|
+
MessageType,
|
|
25
|
+
)
|
|
20
26
|
|
|
21
27
|
logger = logging.getLogger(__name__)
|
|
22
28
|
|
|
@@ -37,7 +43,7 @@ class ResponseHandler:
|
|
|
37
43
|
def __init__(self, js: JetStreamContext, machine_id: str):
|
|
38
44
|
self.js = js
|
|
39
45
|
self.machine_id = machine_id
|
|
40
|
-
self._pending_responses: Dict[str,
|
|
46
|
+
self._pending_responses: Dict[str, Dict[str, Any]] = {} # {'event': asyncio.Event, 'response': Optional[NATSMessage]}
|
|
41
47
|
self._queue_consumer = None
|
|
42
48
|
self._immediate_consumer = None
|
|
43
49
|
self._initialized = False
|
|
@@ -102,8 +108,8 @@ class ResponseHandler:
|
|
|
102
108
|
|
|
103
109
|
# Get the pending response
|
|
104
110
|
pending = self._pending_responses[key]
|
|
105
|
-
# Store the
|
|
106
|
-
pending['response'] = message
|
|
111
|
+
# Store the NATSMessage directly
|
|
112
|
+
pending['response'] = message
|
|
107
113
|
# Signal that response was received
|
|
108
114
|
# Don't delete here - let get_response() delete it after retrieval
|
|
109
115
|
pending['event'].set()
|
|
@@ -152,7 +158,7 @@ class ResponseHandler:
|
|
|
152
158
|
}
|
|
153
159
|
return event
|
|
154
160
|
|
|
155
|
-
def get_response(self, run_id: str, step_number: int) -> Optional[
|
|
161
|
+
def get_response(self, run_id: str, step_number: int) -> Optional[NATSMessage]:
|
|
156
162
|
"""
|
|
157
163
|
Get the response for a pending command.
|
|
158
164
|
|
|
@@ -161,7 +167,7 @@ class ResponseHandler:
|
|
|
161
167
|
step_number: Step number for the command
|
|
162
168
|
|
|
163
169
|
Returns:
|
|
164
|
-
The NATSMessage
|
|
170
|
+
The NATSMessage if available, None otherwise
|
|
165
171
|
"""
|
|
166
172
|
key = f"{run_id}:{str(step_number)}"
|
|
167
173
|
if key in self._pending_responses:
|
|
@@ -266,9 +272,9 @@ class CommandService:
|
|
|
266
272
|
max_attempts = 3
|
|
267
273
|
connect_timeout = 3 # 3 seconds timeout per connection attempt
|
|
268
274
|
|
|
269
|
-
for attempt in range(
|
|
275
|
+
for attempt in range(max_attempts):
|
|
270
276
|
try:
|
|
271
|
-
logger.info("Connection attempt %d/%d to NATS servers: %s", attempt, max_attempts, self.servers)
|
|
277
|
+
logger.info("Connection attempt %d/%d to NATS servers: %s", attempt + 1, max_attempts, self.servers)
|
|
272
278
|
self.nc = await asyncio.wait_for(
|
|
273
279
|
nats.connect(
|
|
274
280
|
servers=self.servers,
|
|
@@ -285,14 +291,14 @@ class CommandService:
|
|
|
285
291
|
return True
|
|
286
292
|
|
|
287
293
|
except asyncio.TimeoutError:
|
|
288
|
-
logger.warning("Connection attempt %d/%d timed out after %d seconds", attempt, max_attempts, connect_timeout)
|
|
289
|
-
if attempt < max_attempts:
|
|
294
|
+
logger.warning("Connection attempt %d/%d timed out after %d seconds", attempt + 1, max_attempts, connect_timeout)
|
|
295
|
+
if attempt < max_attempts - 1:
|
|
290
296
|
logger.info("Retrying connection...")
|
|
291
297
|
else:
|
|
292
298
|
logger.error("Failed to connect after %d attempts. Giving up.", max_attempts)
|
|
293
299
|
except Exception as e:
|
|
294
|
-
logger.warning("Connection attempt %d/%d failed: %s", attempt, max_attempts, e)
|
|
295
|
-
if attempt < max_attempts:
|
|
300
|
+
logger.warning("Connection attempt %d/%d failed: %s", attempt + 1, max_attempts, e)
|
|
301
|
+
if attempt < max_attempts - 1:
|
|
296
302
|
logger.info("Retrying connection...")
|
|
297
303
|
else:
|
|
298
304
|
logger.error("Failed to connect after %d attempts. Giving up.", max_attempts)
|
|
@@ -343,6 +349,8 @@ class CommandService:
|
|
|
343
349
|
request: CommandRequest,
|
|
344
350
|
machine_id: str,
|
|
345
351
|
run_id: str,
|
|
352
|
+
user_id: str,
|
|
353
|
+
username: str,
|
|
346
354
|
timeout: int = 120
|
|
347
355
|
) -> Optional[NATSMessage]:
|
|
348
356
|
"""
|
|
@@ -352,6 +360,8 @@ class CommandService:
|
|
|
352
360
|
request: CommandRequest model containing command details
|
|
353
361
|
machine_id: Machine ID to send the command to
|
|
354
362
|
run_id: Run ID for the command
|
|
363
|
+
user_id: User ID who initiated the command
|
|
364
|
+
username: Username who initiated the command
|
|
355
365
|
timeout: Maximum time to wait for response in seconds
|
|
356
366
|
|
|
357
367
|
Returns:
|
|
@@ -364,8 +374,8 @@ class CommandService:
|
|
|
364
374
|
subject = f"{NAMESPACE}.{machine_id}.cmd.queue"
|
|
365
375
|
|
|
366
376
|
logger.info(
|
|
367
|
-
"Sending queue command:
|
|
368
|
-
|
|
377
|
+
"Sending queue command: subject=%s, command=%s, run_id=%s, step_number=%s",
|
|
378
|
+
subject, request.name, run_id, request.step_number
|
|
369
379
|
)
|
|
370
380
|
|
|
371
381
|
# Get or create response handler for this machine
|
|
@@ -374,7 +384,7 @@ class CommandService:
|
|
|
374
384
|
response_event = response_handler.register_pending(run_id, request.step_number)
|
|
375
385
|
|
|
376
386
|
# Build payload
|
|
377
|
-
payload = self._build_command_payload(request, machine_id, run_id)
|
|
387
|
+
payload = self._build_command_payload(request, machine_id, run_id, user_id, username)
|
|
378
388
|
|
|
379
389
|
try:
|
|
380
390
|
# Publish to JetStream
|
|
@@ -397,36 +407,107 @@ class CommandService:
|
|
|
397
407
|
await asyncio.sleep(0.1)
|
|
398
408
|
|
|
399
409
|
# Get the response
|
|
400
|
-
|
|
401
|
-
if response_data is None:
|
|
402
|
-
return None
|
|
403
|
-
|
|
404
|
-
return NATSMessage.model_validate(response_data)
|
|
410
|
+
return response_handler.get_response(run_id, request.step_number)
|
|
405
411
|
|
|
406
412
|
except Exception as e:
|
|
407
413
|
logger.error("Error sending queue command: %s", e)
|
|
408
414
|
response_handler.remove_pending(run_id, request.step_number)
|
|
409
415
|
return None
|
|
410
416
|
|
|
417
|
+
async def start_run(
|
|
418
|
+
self,
|
|
419
|
+
machine_id: str,
|
|
420
|
+
run_id: str,
|
|
421
|
+
user_id: str,
|
|
422
|
+
username: str,
|
|
423
|
+
timeout: int = 120
|
|
424
|
+
) -> Optional[NATSMessage]:
|
|
425
|
+
"""
|
|
426
|
+
Send START immediate command to begin a run.
|
|
427
|
+
|
|
428
|
+
Args:
|
|
429
|
+
machine_id: Machine ID to send the command to
|
|
430
|
+
run_id: Run ID for the command
|
|
431
|
+
user_id: User ID who initiated the command
|
|
432
|
+
username: Username who initiated the command
|
|
433
|
+
timeout: Maximum time to wait for response in seconds
|
|
434
|
+
|
|
435
|
+
Returns:
|
|
436
|
+
NATSMessage if successful, None if failed or timeout
|
|
437
|
+
"""
|
|
438
|
+
request = CommandRequest(
|
|
439
|
+
name="start",
|
|
440
|
+
params={},
|
|
441
|
+
step_number=0
|
|
442
|
+
)
|
|
443
|
+
return await self.send_immediate_command(
|
|
444
|
+
request=request,
|
|
445
|
+
machine_id=machine_id,
|
|
446
|
+
run_id=run_id,
|
|
447
|
+
user_id=user_id,
|
|
448
|
+
username=username,
|
|
449
|
+
timeout=timeout
|
|
450
|
+
)
|
|
451
|
+
|
|
452
|
+
async def complete_run(
|
|
453
|
+
self,
|
|
454
|
+
machine_id: str,
|
|
455
|
+
run_id: str,
|
|
456
|
+
user_id: str,
|
|
457
|
+
username: str,
|
|
458
|
+
timeout: int = 120
|
|
459
|
+
) -> Optional[NATSMessage]:
|
|
460
|
+
"""
|
|
461
|
+
Send COMPLETE immediate command to end a run.
|
|
462
|
+
|
|
463
|
+
Args:
|
|
464
|
+
machine_id: Machine ID to send the command to
|
|
465
|
+
run_id: Run ID for the command
|
|
466
|
+
user_id: User ID who initiated the command
|
|
467
|
+
username: Username who initiated the command
|
|
468
|
+
timeout: Maximum time to wait for response in seconds
|
|
469
|
+
|
|
470
|
+
Returns:
|
|
471
|
+
NATSMessage if successful, None if failed or timeout
|
|
472
|
+
"""
|
|
473
|
+
request = CommandRequest(
|
|
474
|
+
name="complete",
|
|
475
|
+
params={},
|
|
476
|
+
step_number=0
|
|
477
|
+
)
|
|
478
|
+
return await self.send_immediate_command(
|
|
479
|
+
request=request,
|
|
480
|
+
machine_id=machine_id,
|
|
481
|
+
run_id=run_id,
|
|
482
|
+
user_id=user_id,
|
|
483
|
+
username=username,
|
|
484
|
+
timeout=timeout
|
|
485
|
+
)
|
|
486
|
+
|
|
411
487
|
async def send_queue_commands(
|
|
412
488
|
self,
|
|
413
489
|
*,
|
|
414
490
|
requests: list[CommandRequest],
|
|
415
491
|
machine_id: str,
|
|
416
492
|
run_id: str,
|
|
493
|
+
user_id: str,
|
|
494
|
+
username: str,
|
|
417
495
|
timeout: int = 120
|
|
418
496
|
) -> Optional[NATSMessage]:
|
|
419
497
|
"""
|
|
420
498
|
Send multiple queue commands sequentially and wait for responses.
|
|
421
499
|
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
If
|
|
500
|
+
Automatically sends START command before the sequence and COMPLETE command after
|
|
501
|
+
successful completion. Sends commands one by one, waiting for each response before
|
|
502
|
+
sending the next. If any command fails or times out, stops immediately and returns
|
|
503
|
+
the error response. If all commands succeed, returns the last command's response.
|
|
425
504
|
|
|
426
505
|
Args:
|
|
427
506
|
requests: List of CommandRequest models to send sequentially
|
|
428
507
|
machine_id: Machine ID to send the commands to
|
|
429
508
|
run_id: Run ID for all commands
|
|
509
|
+
user_id: User ID who initiated the commands
|
|
510
|
+
username: Username who initiated the commands
|
|
430
511
|
timeout: Maximum time to wait for each response in seconds
|
|
431
512
|
|
|
432
513
|
Returns:
|
|
@@ -447,74 +528,131 @@ class CommandService:
|
|
|
447
528
|
run_id
|
|
448
529
|
)
|
|
449
530
|
|
|
531
|
+
# Always send START command before sequence
|
|
532
|
+
logger.info("Sending START command before sequence")
|
|
533
|
+
start_response = await self.start_run(
|
|
534
|
+
machine_id=machine_id,
|
|
535
|
+
run_id=run_id,
|
|
536
|
+
user_id=user_id,
|
|
537
|
+
username=username,
|
|
538
|
+
timeout=timeout
|
|
539
|
+
)
|
|
540
|
+
if start_response is None:
|
|
541
|
+
logger.error("START command timed out")
|
|
542
|
+
return None
|
|
543
|
+
if start_response.response and start_response.response.status == CommandResponseStatus.ERROR:
|
|
544
|
+
logger.error("START command failed: %s", start_response.response.message)
|
|
545
|
+
return start_response
|
|
546
|
+
|
|
450
547
|
last_response: Optional[NATSMessage] = None
|
|
451
548
|
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
request
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
request=request,
|
|
463
|
-
machine_id=machine_id,
|
|
464
|
-
run_id=run_id,
|
|
465
|
-
timeout=timeout
|
|
466
|
-
)
|
|
467
|
-
|
|
468
|
-
# Check if command failed (None means timeout or exception)
|
|
469
|
-
if response is None:
|
|
470
|
-
logger.error(
|
|
471
|
-
"Command %d/%d failed or timed out: %s (step %s)",
|
|
549
|
+
try:
|
|
550
|
+
for idx, request in enumerate(requests, start=1):
|
|
551
|
+
# Validate request - convert dict to CommandRequest if needed
|
|
552
|
+
if isinstance(request, dict):
|
|
553
|
+
request = CommandRequest.model_validate(request)
|
|
554
|
+
elif not isinstance(request, CommandRequest):
|
|
555
|
+
raise ValueError(f"Request {idx} must be a CommandRequest or dict, got {type(request)}")
|
|
556
|
+
|
|
557
|
+
logger.info(
|
|
558
|
+
"Sending command %d/%d: %s (step %s)",
|
|
472
559
|
idx,
|
|
473
560
|
len(requests),
|
|
474
561
|
request.name,
|
|
475
562
|
request.step_number
|
|
476
563
|
)
|
|
477
|
-
return None
|
|
478
564
|
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
565
|
+
response = await self.send_queue_command(
|
|
566
|
+
request=request,
|
|
567
|
+
machine_id=machine_id,
|
|
568
|
+
run_id=run_id,
|
|
569
|
+
user_id=user_id,
|
|
570
|
+
username=username,
|
|
571
|
+
timeout=timeout
|
|
572
|
+
)
|
|
573
|
+
|
|
574
|
+
# Check if command failed (None means timeout or exception)
|
|
575
|
+
if response is None:
|
|
482
576
|
logger.error(
|
|
483
|
-
"Command %d/%d failed
|
|
577
|
+
"Command %d/%d failed or timed out: %s (step %s)",
|
|
578
|
+
idx,
|
|
579
|
+
len(requests),
|
|
580
|
+
request.name,
|
|
581
|
+
request.step_number
|
|
582
|
+
)
|
|
583
|
+
return None
|
|
584
|
+
|
|
585
|
+
# Check if command returned an error status
|
|
586
|
+
if response.response is not None:
|
|
587
|
+
if response.response.status == CommandResponseStatus.ERROR:
|
|
588
|
+
logger.error(
|
|
589
|
+
"Command %d/%d failed with error: %s (step %s) - code: %s, message: %s",
|
|
590
|
+
idx,
|
|
591
|
+
len(requests),
|
|
592
|
+
request.name,
|
|
593
|
+
request.step_number,
|
|
594
|
+
response.response.code,
|
|
595
|
+
response.response.message
|
|
596
|
+
)
|
|
597
|
+
return response
|
|
598
|
+
|
|
599
|
+
# Command succeeded, store as last response
|
|
600
|
+
last_response = response
|
|
601
|
+
logger.info(
|
|
602
|
+
"Command %d/%d succeeded: %s (step %s)",
|
|
484
603
|
idx,
|
|
485
604
|
len(requests),
|
|
486
605
|
request.name,
|
|
487
|
-
request.step_number
|
|
488
|
-
|
|
489
|
-
|
|
606
|
+
request.step_number
|
|
607
|
+
)
|
|
608
|
+
else:
|
|
609
|
+
# Response exists but has no response data (shouldn't happen, but handle it)
|
|
610
|
+
logger.warning(
|
|
611
|
+
"Command %d/%d returned response with no response data: %s (step %s)",
|
|
612
|
+
idx,
|
|
613
|
+
len(requests),
|
|
614
|
+
request.name,
|
|
615
|
+
request.step_number
|
|
490
616
|
)
|
|
491
617
|
return response
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
618
|
+
|
|
619
|
+
logger.info(
|
|
620
|
+
"All %d commands completed successfully",
|
|
621
|
+
len(requests)
|
|
622
|
+
)
|
|
623
|
+
|
|
624
|
+
# Always send COMPLETE command after successful sequence
|
|
625
|
+
logger.info("Sending COMPLETE command after successful sequence")
|
|
626
|
+
complete_response = await self.complete_run(
|
|
627
|
+
machine_id=machine_id,
|
|
628
|
+
run_id=run_id,
|
|
629
|
+
user_id=user_id,
|
|
630
|
+
username=username,
|
|
631
|
+
timeout=timeout
|
|
632
|
+
)
|
|
633
|
+
if complete_response is None:
|
|
634
|
+
logger.error("COMPLETE command timed out")
|
|
635
|
+
return None
|
|
636
|
+
if complete_response.response and complete_response.response.status == CommandResponseStatus.ERROR:
|
|
637
|
+
logger.error("COMPLETE command failed: %s", complete_response.response.message)
|
|
638
|
+
return complete_response
|
|
639
|
+
# Return the last command response, not the COMPLETE response
|
|
640
|
+
return last_response
|
|
641
|
+
except Exception as e:
|
|
642
|
+
# If any error occurs during command execution, try to complete the run
|
|
643
|
+
# to clean up state (but don't fail if this also fails)
|
|
644
|
+
logger.warning("Error during command sequence, attempting to complete run: %s", e)
|
|
645
|
+
try:
|
|
646
|
+
await self.complete_run(
|
|
647
|
+
machine_id=machine_id,
|
|
648
|
+
run_id=run_id,
|
|
649
|
+
user_id=user_id,
|
|
650
|
+
username=username,
|
|
651
|
+
timeout=timeout
|
|
510
652
|
)
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
"All %d commands completed successfully",
|
|
515
|
-
len(requests)
|
|
516
|
-
)
|
|
517
|
-
return last_response
|
|
653
|
+
except Exception as cleanup_error:
|
|
654
|
+
logger.error("Failed to complete run during error cleanup: %s", cleanup_error)
|
|
655
|
+
raise
|
|
518
656
|
|
|
519
657
|
async def send_immediate_command(
|
|
520
658
|
self,
|
|
@@ -522,6 +660,8 @@ class CommandService:
|
|
|
522
660
|
request: CommandRequest,
|
|
523
661
|
machine_id: str,
|
|
524
662
|
run_id: str,
|
|
663
|
+
user_id: str,
|
|
664
|
+
username: str,
|
|
525
665
|
timeout: int = 120
|
|
526
666
|
) -> Optional[NATSMessage]:
|
|
527
667
|
"""
|
|
@@ -531,6 +671,8 @@ class CommandService:
|
|
|
531
671
|
request: CommandRequest model containing command details
|
|
532
672
|
machine_id: Machine ID to send the command to
|
|
533
673
|
run_id: Run ID for the command
|
|
674
|
+
user_id: User ID who initiated the command
|
|
675
|
+
username: Username who initiated the command
|
|
534
676
|
timeout: Maximum time to wait for response in seconds
|
|
535
677
|
|
|
536
678
|
Returns:
|
|
@@ -555,7 +697,7 @@ class CommandService:
|
|
|
555
697
|
response_received = response_handler.register_pending(run_id, request.step_number)
|
|
556
698
|
|
|
557
699
|
# Build payload
|
|
558
|
-
payload = self._build_command_payload(request, machine_id, run_id)
|
|
700
|
+
payload = self._build_command_payload(request, machine_id, run_id, user_id, username)
|
|
559
701
|
|
|
560
702
|
try:
|
|
561
703
|
# Publish to JetStream
|
|
@@ -578,11 +720,7 @@ class CommandService:
|
|
|
578
720
|
await asyncio.sleep(0.1)
|
|
579
721
|
|
|
580
722
|
# Get the response
|
|
581
|
-
|
|
582
|
-
if response_data is None:
|
|
583
|
-
return None
|
|
584
|
-
|
|
585
|
-
return NATSMessage.model_validate(response_data)
|
|
723
|
+
return response_handler.get_response(run_id, request.step_number)
|
|
586
724
|
|
|
587
725
|
except Exception as e:
|
|
588
726
|
logger.error("Error sending immediate command: %s", e)
|
|
@@ -635,7 +773,9 @@ class CommandService:
|
|
|
635
773
|
self,
|
|
636
774
|
command_request: CommandRequest,
|
|
637
775
|
machine_id: str,
|
|
638
|
-
run_id: str
|
|
776
|
+
run_id: str,
|
|
777
|
+
user_id: str,
|
|
778
|
+
username: str
|
|
639
779
|
) -> NATSMessage:
|
|
640
780
|
"""
|
|
641
781
|
Build a command payload in the expected format.
|
|
@@ -643,17 +783,24 @@ class CommandService:
|
|
|
643
783
|
Args:
|
|
644
784
|
command_request: CommandRequest model containing command details
|
|
645
785
|
machine_id: Machine ID for the command
|
|
646
|
-
run_id: Run ID for the command
|
|
786
|
+
run_id: Run ID for the command (empty string will be converted to None)
|
|
787
|
+
user_id: User ID who initiated the command
|
|
788
|
+
username: Username who initiated the command
|
|
647
789
|
|
|
648
790
|
Returns:
|
|
649
791
|
NATSMessage object ready for NATS transmission
|
|
650
792
|
"""
|
|
793
|
+
# Convert empty string to None for run_id
|
|
794
|
+
run_id_value = run_id if run_id else None
|
|
795
|
+
|
|
651
796
|
header = MessageHeader(
|
|
652
797
|
message_type=MessageType.COMMAND,
|
|
653
798
|
version="1.0",
|
|
654
799
|
timestamp=datetime.now(timezone.utc).strftime('%Y-%m-%dT%H:%M:%SZ'),
|
|
800
|
+
user_id=user_id,
|
|
801
|
+
username=username,
|
|
655
802
|
machine_id=machine_id,
|
|
656
|
-
run_id=
|
|
803
|
+
run_id=run_id_value
|
|
657
804
|
)
|
|
658
805
|
|
|
659
806
|
return NATSMessage(
|