osism 0.20250709.0__py3-none-any.whl → 0.20250823.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- osism/api.py +55 -1
- osism/commands/baremetal.py +370 -2
- osism/commands/compose.py +1 -1
- osism/commands/console.py +95 -7
- osism/commands/container.py +1 -1
- osism/commands/log.py +1 -1
- osism/commands/netbox.py +35 -12
- osism/commands/sonic.py +197 -2
- osism/settings.py +1 -1
- osism/tasks/__init__.py +43 -0
- osism/tasks/conductor/__init__.py +2 -2
- osism/tasks/conductor/config.py +3 -3
- osism/tasks/conductor/ironic.py +48 -21
- osism/tasks/conductor/netbox.py +58 -1
- osism/tasks/conductor/sonic/config_generator.py +341 -26
- osism/tasks/conductor/sonic/connections.py +123 -0
- osism/tasks/conductor/sonic/interface.py +13 -1
- osism/tasks/conductor/utils.py +9 -2
- osism/tasks/openstack.py +35 -15
- osism/utils/__init__.py +36 -3
- osism/utils/ssh.py +250 -0
- {osism-0.20250709.0.dist-info → osism-0.20250823.0.dist-info}/METADATA +9 -9
- {osism-0.20250709.0.dist-info → osism-0.20250823.0.dist-info}/RECORD +29 -28
- {osism-0.20250709.0.dist-info → osism-0.20250823.0.dist-info}/entry_points.txt +10 -0
- osism-0.20250823.0.dist-info/licenses/AUTHORS +1 -0
- osism-0.20250823.0.dist-info/pbr.json +1 -0
- osism-0.20250709.0.dist-info/licenses/AUTHORS +0 -1
- osism-0.20250709.0.dist-info/pbr.json +0 -1
- {osism-0.20250709.0.dist-info → osism-0.20250823.0.dist-info}/WHEEL +0 -0
- {osism-0.20250709.0.dist-info → osism-0.20250823.0.dist-info}/licenses/LICENSE +0 -0
- {osism-0.20250709.0.dist-info → osism-0.20250823.0.dist-info}/top_level.txt +0 -0
osism/api.py
CHANGED
@@ -10,7 +10,7 @@ from fastapi import FastAPI, Header, Request, Response, HTTPException, status
|
|
10
10
|
from pydantic import BaseModel, Field
|
11
11
|
from starlette.middleware.cors import CORSMiddleware
|
12
12
|
|
13
|
-
from osism.tasks import reconciler
|
13
|
+
from osism.tasks import reconciler, openstack
|
14
14
|
from osism import utils
|
15
15
|
from osism.services.listener import BaremetalEvents
|
16
16
|
|
@@ -109,6 +109,35 @@ class DeviceSearchResult(BaseModel):
|
|
109
109
|
device: Optional[str] = Field(None, description="Device name if found")
|
110
110
|
|
111
111
|
|
112
|
+
class BaremetalNode(BaseModel):
|
113
|
+
uuid: str = Field(..., description="Unique identifier of the node")
|
114
|
+
name: Optional[str] = Field(None, description="Name of the node")
|
115
|
+
power_state: Optional[str] = Field(None, description="Current power state")
|
116
|
+
provision_state: Optional[str] = Field(None, description="Current provision state")
|
117
|
+
maintenance: bool = Field(..., description="Whether node is in maintenance mode")
|
118
|
+
instance_uuid: Optional[str] = Field(
|
119
|
+
None, description="UUID of associated instance"
|
120
|
+
)
|
121
|
+
driver: Optional[str] = Field(None, description="Driver used for the node")
|
122
|
+
resource_class: Optional[str] = Field(
|
123
|
+
None, description="Resource class of the node"
|
124
|
+
)
|
125
|
+
properties: Dict[str, Any] = Field(
|
126
|
+
default_factory=dict, description="Node properties"
|
127
|
+
)
|
128
|
+
extra: Dict[str, Any] = Field(
|
129
|
+
default_factory=dict, description="Extra node information"
|
130
|
+
)
|
131
|
+
last_error: Optional[str] = Field(None, description="Last error message")
|
132
|
+
created_at: Optional[str] = Field(None, description="Creation timestamp")
|
133
|
+
updated_at: Optional[str] = Field(None, description="Last update timestamp")
|
134
|
+
|
135
|
+
|
136
|
+
class BaremetalNodesResponse(BaseModel):
|
137
|
+
nodes: list[BaremetalNode] = Field(..., description="List of baremetal nodes")
|
138
|
+
count: int = Field(..., description="Total number of nodes")
|
139
|
+
|
140
|
+
|
112
141
|
def find_device_by_identifier(identifier: str):
|
113
142
|
"""Find a device in NetBox by various identifiers."""
|
114
143
|
if not utils.nb:
|
@@ -188,6 +217,31 @@ async def write_sink_events(request: Request) -> SinkResponse:
|
|
188
217
|
)
|
189
218
|
|
190
219
|
|
220
|
+
@app.get(
|
221
|
+
"/v1/baremetal/nodes", response_model=BaremetalNodesResponse, tags=["baremetal"]
|
222
|
+
)
|
223
|
+
async def get_baremetal_nodes_list() -> BaremetalNodesResponse:
|
224
|
+
"""Get list of all baremetal nodes managed by Ironic.
|
225
|
+
|
226
|
+
Returns information similar to the 'baremetal list' command,
|
227
|
+
including node details, power state, provision state, and more.
|
228
|
+
"""
|
229
|
+
try:
|
230
|
+
# Use the generalized function to get baremetal nodes
|
231
|
+
nodes_data = openstack.get_baremetal_nodes()
|
232
|
+
|
233
|
+
# Convert to response model
|
234
|
+
nodes = [BaremetalNode(**node) for node in nodes_data]
|
235
|
+
|
236
|
+
return BaremetalNodesResponse(nodes=nodes, count=len(nodes))
|
237
|
+
except Exception as e:
|
238
|
+
logger.error(f"Error retrieving baremetal nodes: {str(e)}")
|
239
|
+
raise HTTPException(
|
240
|
+
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
241
|
+
detail=f"Failed to retrieve baremetal nodes: {str(e)}",
|
242
|
+
)
|
243
|
+
|
244
|
+
|
191
245
|
@app.post("/v1/notifications/baremetal", status_code=204, tags=["notifications"])
|
192
246
|
async def notifications_baremetal(notification: NotificationBaremetal) -> None:
|
193
247
|
"""Handle baremetal notifications."""
|
osism/commands/baremetal.py
CHANGED
@@ -1,9 +1,12 @@
|
|
1
1
|
# SPDX-License-Identifier: Apache-2.0
|
2
2
|
|
3
3
|
from cliff.command import Command
|
4
|
+
from argparse import BooleanOptionalAction
|
4
5
|
|
5
6
|
import tempfile
|
6
7
|
import os
|
8
|
+
import subprocess
|
9
|
+
import threading
|
7
10
|
from loguru import logger
|
8
11
|
import openstack
|
9
12
|
from tabulate import tabulate
|
@@ -13,6 +16,9 @@ from openstack.baremetal import configdrive as configdrive_builder
|
|
13
16
|
|
14
17
|
from osism.commands import get_cloud_connection
|
15
18
|
from osism import utils
|
19
|
+
from osism.tasks.conductor.netbox import get_nb_device_query_list_ironic
|
20
|
+
from osism.tasks import netbox
|
21
|
+
from osism.utils.ssh import cleanup_ssh_known_hosts_for_node
|
16
22
|
|
17
23
|
|
18
24
|
class BaremetalList(Command):
|
@@ -57,6 +63,8 @@ class BaremetalList(Command):
|
|
57
63
|
for b in baremetal
|
58
64
|
]
|
59
65
|
|
66
|
+
result.sort(key=lambda x: x[0])
|
67
|
+
|
60
68
|
print(
|
61
69
|
tabulate(
|
62
70
|
result,
|
@@ -206,7 +214,7 @@ class BaremetalDeploy(Command):
|
|
206
214
|
|
207
215
|
playbook = []
|
208
216
|
play = {
|
209
|
-
"name": "Run bootstrap
|
217
|
+
"name": "Run bootstrap",
|
210
218
|
"hosts": "localhost",
|
211
219
|
"connection": "local",
|
212
220
|
"gather_facts": True,
|
@@ -216,6 +224,15 @@ class BaremetalDeploy(Command):
|
|
216
224
|
"osism.commons.hosts",
|
217
225
|
"osism.commons.operator",
|
218
226
|
],
|
227
|
+
"tasks": [
|
228
|
+
{
|
229
|
+
"name": "Restart rsyslog service after hostname change",
|
230
|
+
"ansible.builtin.systemd": {
|
231
|
+
"name": "rsyslog",
|
232
|
+
"state": "restarted",
|
233
|
+
},
|
234
|
+
}
|
235
|
+
],
|
219
236
|
}
|
220
237
|
play["vars"].update(
|
221
238
|
{"hostname_name": node.name, "hosts_type": "template"}
|
@@ -321,9 +338,30 @@ class BaremetalUndeploy(Command):
|
|
321
338
|
if not node:
|
322
339
|
continue
|
323
340
|
|
324
|
-
if node.provision_state in [
|
341
|
+
if node.provision_state in [
|
342
|
+
"active",
|
343
|
+
"wait call-back",
|
344
|
+
"deploy failed",
|
345
|
+
"error",
|
346
|
+
]:
|
325
347
|
try:
|
326
348
|
node = conn.baremetal.set_node_provision_state(node.id, "undeploy")
|
349
|
+
logger.info(
|
350
|
+
f"Successfully initiated undeploy for node {node.name} ({node.id})"
|
351
|
+
)
|
352
|
+
|
353
|
+
# Clean up SSH known_hosts entries for the undeployed node
|
354
|
+
logger.info(f"Cleaning up SSH known_hosts entries for {node.name}")
|
355
|
+
result = cleanup_ssh_known_hosts_for_node(node.name)
|
356
|
+
if result:
|
357
|
+
logger.info(
|
358
|
+
f"SSH known_hosts cleanup completed successfully for {node.name}"
|
359
|
+
)
|
360
|
+
else:
|
361
|
+
logger.warning(
|
362
|
+
f"SSH known_hosts cleanup completed with warnings for {node.name}"
|
363
|
+
)
|
364
|
+
|
327
365
|
except Exception as exc:
|
328
366
|
logger.warning(
|
329
367
|
f"Node {node.name} ({node.id}) could not be moved to available state: {exc}"
|
@@ -333,3 +371,333 @@ class BaremetalUndeploy(Command):
|
|
333
371
|
logger.warning(
|
334
372
|
f"Node {node.name} ({node.id}) not in supported provision state"
|
335
373
|
)
|
374
|
+
|
375
|
+
|
376
|
+
class BaremetalPing(Command):
|
377
|
+
def get_parser(self, prog_name):
|
378
|
+
parser = super(BaremetalPing, self).get_parser(prog_name)
|
379
|
+
parser.add_argument(
|
380
|
+
"name",
|
381
|
+
nargs="?",
|
382
|
+
type=str,
|
383
|
+
help="Ping specific baremetal node by name",
|
384
|
+
)
|
385
|
+
return parser
|
386
|
+
|
387
|
+
def _ping_host(self, host, results, host_name):
|
388
|
+
"""Ping a host 3 times and store results."""
|
389
|
+
try:
|
390
|
+
result = subprocess.run(
|
391
|
+
["ping", "-c", "3", "-W", "5", host],
|
392
|
+
capture_output=True,
|
393
|
+
text=True,
|
394
|
+
timeout=20,
|
395
|
+
)
|
396
|
+
|
397
|
+
if result.returncode == 0:
|
398
|
+
output_lines = result.stdout.strip().split("\n")
|
399
|
+
stats_line = [line for line in output_lines if "packet loss" in line]
|
400
|
+
if stats_line:
|
401
|
+
loss_info = stats_line[0]
|
402
|
+
if "0% packet loss" in loss_info:
|
403
|
+
status = "SUCCESS"
|
404
|
+
else:
|
405
|
+
status = f"PARTIAL ({loss_info.split(',')[2].strip()})"
|
406
|
+
else:
|
407
|
+
status = "SUCCESS"
|
408
|
+
|
409
|
+
time_lines = [
|
410
|
+
line
|
411
|
+
for line in output_lines
|
412
|
+
if "round-trip" in line or "rtt" in line
|
413
|
+
]
|
414
|
+
if time_lines:
|
415
|
+
time_info = (
|
416
|
+
time_lines[0].split("=")[-1].strip()
|
417
|
+
if "=" in time_lines[0]
|
418
|
+
else "N/A"
|
419
|
+
)
|
420
|
+
else:
|
421
|
+
time_info = "N/A"
|
422
|
+
else:
|
423
|
+
status = "FAILED"
|
424
|
+
time_info = "N/A"
|
425
|
+
|
426
|
+
except (
|
427
|
+
subprocess.TimeoutExpired,
|
428
|
+
subprocess.CalledProcessError,
|
429
|
+
Exception,
|
430
|
+
) as e:
|
431
|
+
status = "ERROR"
|
432
|
+
time_info = str(e)[:50]
|
433
|
+
|
434
|
+
results[host_name] = {"host": host, "status": status, "time_info": time_info}
|
435
|
+
|
436
|
+
def take_action(self, parsed_args):
|
437
|
+
name = parsed_args.name
|
438
|
+
|
439
|
+
if not utils.nb:
|
440
|
+
logger.error("NetBox connection not available")
|
441
|
+
return
|
442
|
+
|
443
|
+
conn = get_cloud_connection()
|
444
|
+
|
445
|
+
try:
|
446
|
+
if name:
|
447
|
+
devices = [utils.nb.dcim.devices.get(name=name)]
|
448
|
+
if not devices[0]:
|
449
|
+
logger.error(f"Device {name} not found in NetBox")
|
450
|
+
return
|
451
|
+
else:
|
452
|
+
# Use the NETBOX_FILTER_CONDUCTOR_IRONIC setting to get devices
|
453
|
+
devices = set()
|
454
|
+
nb_device_query_list = get_nb_device_query_list_ironic()
|
455
|
+
for nb_device_query in nb_device_query_list:
|
456
|
+
devices |= set(netbox.get_devices(**nb_device_query))
|
457
|
+
devices = list(devices)
|
458
|
+
|
459
|
+
# Additionally filter by power state and provision state
|
460
|
+
filtered_devices = []
|
461
|
+
for device in devices:
|
462
|
+
if (
|
463
|
+
hasattr(device, "custom_fields")
|
464
|
+
and device.custom_fields
|
465
|
+
and device.custom_fields.get("power_state") == "power on"
|
466
|
+
and device.custom_fields.get("provision_state") == "active"
|
467
|
+
):
|
468
|
+
filtered_devices.append(device)
|
469
|
+
devices = filtered_devices
|
470
|
+
|
471
|
+
if not devices:
|
472
|
+
logger.info(
|
473
|
+
"No devices found matching criteria (managed-by-ironic, power on, active)"
|
474
|
+
)
|
475
|
+
return
|
476
|
+
|
477
|
+
ping_candidates = []
|
478
|
+
for device in devices:
|
479
|
+
if device.primary_ip4:
|
480
|
+
ip_address = str(device.primary_ip4.address).split("/")[0]
|
481
|
+
ping_candidates.append({"name": device.name, "ip": ip_address})
|
482
|
+
else:
|
483
|
+
logger.warning(f"Device {device.name} has no primary IPv4 address")
|
484
|
+
|
485
|
+
if not ping_candidates:
|
486
|
+
logger.info("No devices found with primary IPv4 addresses")
|
487
|
+
return
|
488
|
+
|
489
|
+
logger.info(f"Pinging {len(ping_candidates)} nodes (3 pings each)...")
|
490
|
+
|
491
|
+
results = {}
|
492
|
+
threads = []
|
493
|
+
|
494
|
+
for candidate in ping_candidates:
|
495
|
+
thread = threading.Thread(
|
496
|
+
target=self._ping_host,
|
497
|
+
args=(candidate["ip"], results, candidate["name"]),
|
498
|
+
)
|
499
|
+
threads.append(thread)
|
500
|
+
thread.start()
|
501
|
+
|
502
|
+
for thread in threads:
|
503
|
+
thread.join()
|
504
|
+
|
505
|
+
table_data = []
|
506
|
+
success_count = 0
|
507
|
+
failed_count = 0
|
508
|
+
|
509
|
+
for device_name in sorted(results.keys()):
|
510
|
+
result = results[device_name]
|
511
|
+
table_data.append(
|
512
|
+
[device_name, result["host"], result["status"], result["time_info"]]
|
513
|
+
)
|
514
|
+
|
515
|
+
if result["status"] == "SUCCESS":
|
516
|
+
success_count += 1
|
517
|
+
elif result["status"].startswith("PARTIAL"):
|
518
|
+
failed_count += 1
|
519
|
+
else:
|
520
|
+
failed_count += 1
|
521
|
+
|
522
|
+
print(
|
523
|
+
tabulate(
|
524
|
+
table_data,
|
525
|
+
headers=["Name", "IP Address", "Status", "Time Info"],
|
526
|
+
tablefmt="psql",
|
527
|
+
)
|
528
|
+
)
|
529
|
+
|
530
|
+
print(
|
531
|
+
f"\nSummary: {success_count} successful, {failed_count} failed/partial out of {len(ping_candidates)} total"
|
532
|
+
)
|
533
|
+
|
534
|
+
except Exception as e:
|
535
|
+
logger.error(f"Error during ping operation: {e}")
|
536
|
+
return
|
537
|
+
|
538
|
+
|
539
|
+
class BaremetalBurnIn(Command):
|
540
|
+
def get_parser(self, prog_name):
|
541
|
+
parser = super(BaremetalBurnIn, self).get_parser(prog_name)
|
542
|
+
|
543
|
+
parser.add_argument(
|
544
|
+
"name",
|
545
|
+
nargs="?",
|
546
|
+
type=str,
|
547
|
+
help="Run burn-in on given baremetal node when in provision state available",
|
548
|
+
)
|
549
|
+
parser.add_argument(
|
550
|
+
"--all",
|
551
|
+
default=False,
|
552
|
+
help="Run burn-in on all baremetal nodes in provision state available",
|
553
|
+
action="store_true",
|
554
|
+
)
|
555
|
+
parser.add_argument(
|
556
|
+
"--cpu",
|
557
|
+
default=True,
|
558
|
+
help="Enable CPU burn-in",
|
559
|
+
action=BooleanOptionalAction,
|
560
|
+
)
|
561
|
+
parser.add_argument(
|
562
|
+
"--memory",
|
563
|
+
default=True,
|
564
|
+
help="Enable memory burn-in",
|
565
|
+
action=BooleanOptionalAction,
|
566
|
+
)
|
567
|
+
parser.add_argument(
|
568
|
+
"--disk",
|
569
|
+
default=True,
|
570
|
+
help="Enable disk burn-in",
|
571
|
+
action=BooleanOptionalAction,
|
572
|
+
)
|
573
|
+
return parser
|
574
|
+
|
575
|
+
def take_action(self, parsed_args):
|
576
|
+
all_nodes = parsed_args.all
|
577
|
+
name = parsed_args.name
|
578
|
+
|
579
|
+
stressor = {}
|
580
|
+
stressor["cpu"] = parsed_args.cpu
|
581
|
+
stressor["memory"] = parsed_args.memory
|
582
|
+
stressor["disk"] = parsed_args.disk
|
583
|
+
|
584
|
+
if not all_nodes and not name:
|
585
|
+
logger.error("Please specify a node name or use --all")
|
586
|
+
return
|
587
|
+
|
588
|
+
clean_steps = []
|
589
|
+
for step, activated in stressor.items():
|
590
|
+
if activated:
|
591
|
+
clean_steps.append({"step": "burnin_" + step, "interface": "deploy"})
|
592
|
+
if not clean_steps:
|
593
|
+
logger.error(
|
594
|
+
f"Please specify at least one of {', '.join(stressor.keys())} for burn-in"
|
595
|
+
)
|
596
|
+
return
|
597
|
+
|
598
|
+
conn = get_cloud_connection()
|
599
|
+
|
600
|
+
if all_nodes:
|
601
|
+
burn_in_nodes = list(conn.baremetal.nodes(details=True))
|
602
|
+
else:
|
603
|
+
node = conn.baremetal.find_node(name, ignore_missing=True, details=True)
|
604
|
+
if not node:
|
605
|
+
logger.warning(f"Could not find node {name}")
|
606
|
+
return
|
607
|
+
burn_in_nodes = [node]
|
608
|
+
|
609
|
+
for node in burn_in_nodes:
|
610
|
+
if not node:
|
611
|
+
continue
|
612
|
+
|
613
|
+
if node.provision_state in ["available"]:
|
614
|
+
# NOTE: Burn-In is available in the "manageable" provision state, so we move the node into this state
|
615
|
+
try:
|
616
|
+
node = conn.baremetal.set_node_provision_state(node.id, "manage")
|
617
|
+
node = conn.baremetal.wait_for_nodes_provision_state(
|
618
|
+
[node.id], "manageable"
|
619
|
+
)[0]
|
620
|
+
except Exception as exc:
|
621
|
+
logger.warning(
|
622
|
+
f"Node {node.name} ({node.id}) could not be moved to manageable state: {exc}"
|
623
|
+
)
|
624
|
+
continue
|
625
|
+
|
626
|
+
if node.provision_state in ["manageable"]:
|
627
|
+
try:
|
628
|
+
conn.baremetal.set_node_provision_state(
|
629
|
+
node.id, "clean", clean_steps=clean_steps
|
630
|
+
)
|
631
|
+
except Exception as exc:
|
632
|
+
logger.warning(
|
633
|
+
f"Burn-In of node {node.name} ({node.id}) failed: {exc}"
|
634
|
+
)
|
635
|
+
continue
|
636
|
+
else:
|
637
|
+
logger.warning(
|
638
|
+
f"Node {node.name} ({node.id}) not in supported state! Provision state: {node.provision_state}, maintenance mode: {node['maintenance']}"
|
639
|
+
)
|
640
|
+
continue
|
641
|
+
|
642
|
+
|
643
|
+
class BaremetalMaintenanceSet(Command):
|
644
|
+
def get_parser(self, prog_name):
|
645
|
+
parser = super(BaremetalMaintenanceSet, self).get_parser(prog_name)
|
646
|
+
|
647
|
+
parser.add_argument(
|
648
|
+
"name",
|
649
|
+
nargs="?",
|
650
|
+
type=str,
|
651
|
+
help="Set maintenance on given baremetal node",
|
652
|
+
)
|
653
|
+
parser.add_argument(
|
654
|
+
"--reason",
|
655
|
+
default=None,
|
656
|
+
type=str,
|
657
|
+
help="Reason for maintenance",
|
658
|
+
)
|
659
|
+
return parser
|
660
|
+
|
661
|
+
def take_action(self, parsed_args):
|
662
|
+
name = parsed_args.name
|
663
|
+
reason = parsed_args.reason
|
664
|
+
|
665
|
+
conn = get_cloud_connection()
|
666
|
+
node = conn.baremetal.find_node(name, ignore_missing=True, details=True)
|
667
|
+
if not node:
|
668
|
+
logger.warning(f"Could not find node {name}")
|
669
|
+
return
|
670
|
+
try:
|
671
|
+
conn.baremetal.set_node_maintenance(node, reason=reason)
|
672
|
+
except Exception as exc:
|
673
|
+
logger.error(
|
674
|
+
f"Setting maintenance mode on node {node.name} ({node.id}) failed: {exc}"
|
675
|
+
)
|
676
|
+
|
677
|
+
|
678
|
+
class BaremetalMaintenanceUnset(Command):
|
679
|
+
def get_parser(self, prog_name):
|
680
|
+
parser = super(BaremetalMaintenanceUnset, self).get_parser(prog_name)
|
681
|
+
|
682
|
+
parser.add_argument(
|
683
|
+
"name",
|
684
|
+
nargs="?",
|
685
|
+
type=str,
|
686
|
+
help="Unset maintenance on given baremetal node",
|
687
|
+
)
|
688
|
+
return parser
|
689
|
+
|
690
|
+
def take_action(self, parsed_args):
|
691
|
+
name = parsed_args.name
|
692
|
+
|
693
|
+
conn = get_cloud_connection()
|
694
|
+
node = conn.baremetal.find_node(name, ignore_missing=True, details=True)
|
695
|
+
if not node:
|
696
|
+
logger.warning(f"Could not find node {name}")
|
697
|
+
return
|
698
|
+
try:
|
699
|
+
conn.baremetal.unset_node_maintenance(node)
|
700
|
+
except Exception as exc:
|
701
|
+
logger.error(
|
702
|
+
f"Unsetting maintenance mode on node {node.name} ({node.id}) failed: {exc}"
|
703
|
+
)
|
osism/commands/compose.py
CHANGED
@@ -26,7 +26,7 @@ class Run(Command):
|
|
26
26
|
ssh_command = (
|
27
27
|
f"docker compose --project-directory=/opt/{environment} {arguments}"
|
28
28
|
)
|
29
|
-
ssh_options = "-o StrictHostKeyChecking=no -o LogLevel=ERROR"
|
29
|
+
ssh_options = "-o StrictHostKeyChecking=no -o LogLevel=ERROR -o UserKnownHostsFile=/share/known_hosts"
|
30
30
|
|
31
31
|
# FIXME: use paramiko or something else more Pythonic + make operator user + key configurable
|
32
32
|
subprocess.call(
|
osism/commands/console.py
CHANGED
@@ -1,10 +1,94 @@
|
|
1
1
|
# SPDX-License-Identifier: Apache-2.0
|
2
2
|
|
3
|
+
import socket
|
3
4
|
import subprocess
|
5
|
+
from typing import Optional
|
4
6
|
|
5
7
|
from cliff.command import Command
|
8
|
+
from loguru import logger
|
6
9
|
from prompt_toolkit import prompt
|
7
10
|
|
11
|
+
from osism import utils
|
12
|
+
|
13
|
+
|
14
|
+
def resolve_hostname_to_ip(hostname: str) -> Optional[str]:
|
15
|
+
"""
|
16
|
+
Attempt to resolve hostname to IPv4 address using DNS.
|
17
|
+
|
18
|
+
Args:
|
19
|
+
hostname: The hostname to resolve
|
20
|
+
|
21
|
+
Returns:
|
22
|
+
IPv4 address string if successful, None if resolution fails
|
23
|
+
"""
|
24
|
+
try:
|
25
|
+
ip_address = socket.gethostbyname(hostname)
|
26
|
+
logger.debug(f"Resolved hostname {hostname} to {ip_address}")
|
27
|
+
return ip_address
|
28
|
+
except socket.gaierror as e:
|
29
|
+
logger.debug(f"DNS resolution failed for {hostname}: {e}")
|
30
|
+
return None
|
31
|
+
|
32
|
+
|
33
|
+
def get_primary_ipv4_from_netbox(hostname: str) -> Optional[str]:
|
34
|
+
"""
|
35
|
+
Retrieve primary IPv4 address for hostname from Netbox.
|
36
|
+
|
37
|
+
Args:
|
38
|
+
hostname: The hostname to look up in Netbox
|
39
|
+
|
40
|
+
Returns:
|
41
|
+
Primary IPv4 address string if found, None otherwise
|
42
|
+
"""
|
43
|
+
if not utils.nb:
|
44
|
+
logger.debug("Netbox integration not available")
|
45
|
+
return None
|
46
|
+
|
47
|
+
try:
|
48
|
+
device = utils.nb.dcim.devices.get(name=hostname)
|
49
|
+
if device and device.primary_ip4:
|
50
|
+
ip_address = str(device.primary_ip4.address).split("/")[0]
|
51
|
+
logger.info(f"Found primary IPv4 for {hostname} in Netbox: {ip_address}")
|
52
|
+
return ip_address
|
53
|
+
else:
|
54
|
+
logger.debug(f"No device or primary IPv4 found for {hostname} in Netbox")
|
55
|
+
return None
|
56
|
+
except Exception as e:
|
57
|
+
logger.warning(f"Error querying Netbox for {hostname}: {e}")
|
58
|
+
return None
|
59
|
+
|
60
|
+
|
61
|
+
def resolve_host_with_fallback(hostname: str) -> str:
|
62
|
+
"""
|
63
|
+
Resolve hostname with Netbox fallback.
|
64
|
+
|
65
|
+
First attempts DNS resolution. If that fails and Netbox integration is enabled,
|
66
|
+
attempts to retrieve the primary IPv4 address from Netbox.
|
67
|
+
|
68
|
+
Args:
|
69
|
+
hostname: The hostname to resolve
|
70
|
+
|
71
|
+
Returns:
|
72
|
+
Resolved IP address or original hostname if all resolution attempts fail
|
73
|
+
"""
|
74
|
+
# First try DNS resolution
|
75
|
+
ip_address = resolve_hostname_to_ip(hostname)
|
76
|
+
if ip_address:
|
77
|
+
return ip_address
|
78
|
+
|
79
|
+
# Fallback to Netbox if DNS resolution failed
|
80
|
+
logger.info(f"DNS resolution failed for {hostname}, trying Netbox fallback")
|
81
|
+
netbox_ip = get_primary_ipv4_from_netbox(hostname)
|
82
|
+
if netbox_ip:
|
83
|
+
logger.info(f"Using IPv4 address {netbox_ip} from Netbox for {hostname}")
|
84
|
+
return netbox_ip
|
85
|
+
|
86
|
+
# If both methods fail, return original hostname and let SSH handle the error
|
87
|
+
logger.warning(
|
88
|
+
f"Could not resolve {hostname} via DNS or Netbox, using original hostname"
|
89
|
+
)
|
90
|
+
return hostname
|
91
|
+
|
8
92
|
|
9
93
|
class Run(Command):
|
10
94
|
def get_parser(self, prog_name):
|
@@ -45,7 +129,7 @@ class Run(Command):
|
|
45
129
|
type_console = "clush"
|
46
130
|
host = host[1:]
|
47
131
|
|
48
|
-
ssh_options = "-o StrictHostKeyChecking=no -o LogLevel=ERROR"
|
132
|
+
ssh_options = "-o StrictHostKeyChecking=no -o LogLevel=ERROR -o UserKnownHostsFile=/share/known_hosts"
|
49
133
|
|
50
134
|
if type_console == "ansible":
|
51
135
|
subprocess.call(f"/run-ansible-console.sh {host}", shell=True)
|
@@ -55,9 +139,11 @@ class Run(Command):
|
|
55
139
|
shell=True,
|
56
140
|
)
|
57
141
|
elif type_console == "ssh":
|
142
|
+
# Resolve hostname with Netbox fallback
|
143
|
+
resolved_host = resolve_host_with_fallback(host)
|
58
144
|
# FIXME: use paramiko or something else more Pythonic + make operator user + key configurable
|
59
145
|
subprocess.call(
|
60
|
-
f"/usr/bin/ssh -i /ansible/secrets/id_rsa.operator {ssh_options} dragon@{
|
146
|
+
f"/usr/bin/ssh -i /ansible/secrets/id_rsa.operator {ssh_options} dragon@{resolved_host}",
|
61
147
|
shell=True,
|
62
148
|
)
|
63
149
|
elif type_console == "container_prompt":
|
@@ -67,9 +153,11 @@ class Run(Command):
|
|
67
153
|
break
|
68
154
|
|
69
155
|
ssh_command = f"docker {command}"
|
156
|
+
# Resolve hostname with Netbox fallback
|
157
|
+
resolved_host = resolve_host_with_fallback(host[:-1])
|
70
158
|
# FIXME: use paramiko or something else more Pythonic + make operator user + key configurable
|
71
159
|
subprocess.call(
|
72
|
-
f"/usr/bin/ssh -i /ansible/secrets/id_rsa.operator {ssh_options} dragon@{
|
160
|
+
f"/usr/bin/ssh -i /ansible/secrets/id_rsa.operator {ssh_options} dragon@{resolved_host} {ssh_command}",
|
73
161
|
shell=True,
|
74
162
|
)
|
75
163
|
elif type_console == "container":
|
@@ -78,12 +166,12 @@ class Run(Command):
|
|
78
166
|
target_command = "bash"
|
79
167
|
|
80
168
|
ssh_command = f"docker exec -it {target_containername} {target_command}"
|
81
|
-
ssh_options =
|
82
|
-
"-o RequestTTY=force -o StrictHostKeyChecking=no -o LogLevel=ERROR"
|
83
|
-
)
|
169
|
+
ssh_options = "-o RequestTTY=force -o StrictHostKeyChecking=no -o LogLevel=ERROR -o UserKnownHostsFile=/share/known_hosts"
|
84
170
|
|
171
|
+
# Resolve hostname with Netbox fallback
|
172
|
+
resolved_target_host = resolve_host_with_fallback(target_host)
|
85
173
|
# FIXME: use paramiko or something else more Pythonic + make operator user + key configurable
|
86
174
|
subprocess.call(
|
87
|
-
f"/usr/bin/ssh -i /ansible/secrets/id_rsa.operator {ssh_options} dragon@{
|
175
|
+
f"/usr/bin/ssh -i /ansible/secrets/id_rsa.operator {ssh_options} dragon@{resolved_target_host} {ssh_command}",
|
88
176
|
shell=True,
|
89
177
|
)
|
osism/commands/container.py
CHANGED
@@ -23,7 +23,7 @@ class Run(Command):
|
|
23
23
|
host = parsed_args.host[0]
|
24
24
|
command = " ".join(parsed_args.command)
|
25
25
|
|
26
|
-
ssh_options = "-o StrictHostKeyChecking=no -o LogLevel=ERROR"
|
26
|
+
ssh_options = "-o StrictHostKeyChecking=no -o LogLevel=ERROR -o UserKnownHostsFile=/share/known_hosts"
|
27
27
|
|
28
28
|
if not command:
|
29
29
|
while True:
|
osism/commands/log.py
CHANGED
@@ -52,7 +52,7 @@ class Container(Command):
|
|
52
52
|
parameters = " ".join(parsed_args.parameter)
|
53
53
|
|
54
54
|
ssh_command = f"docker logs {parameters} {container_name}"
|
55
|
-
ssh_options = "-o StrictHostKeyChecking=no -o LogLevel=ERROR"
|
55
|
+
ssh_options = "-o StrictHostKeyChecking=no -o LogLevel=ERROR -o UserKnownHostsFile=/share/known_hosts"
|
56
56
|
|
57
57
|
# FIXME: use paramiko or something else more Pythonic + make operator user + key configurable
|
58
58
|
subprocess.call(
|