salt-api-cli 1.4.2__tar.gz → 1.4.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: salt-api-cli
3
- Version: 1.4.2
3
+ Version: 1.4.4
4
4
  Summary: CLI to access salt-api
5
5
  Author-email: Pradish Bijukchhe <pradish@sandbox.com.np>
6
6
  License-Expression: MIT
@@ -41,7 +41,7 @@ from rich.spinner import Spinner
41
41
  from rich.table import Table
42
42
  from rich.text import Text
43
43
 
44
- from salt_api_cli.lowlevel import split_args
44
+ from salt_api_cli.lowlevel import SaltApiError, split_args
45
45
 
46
46
  console = Console()
47
47
 
@@ -177,8 +177,23 @@ def _print_state_return(minion: str, states: dict[str, Any]) -> None:
177
177
  if status == "ok":
178
178
  detail: str | Text = ""
179
179
  elif status == "change":
180
- changed = ", ".join(state.get("changes", {})) or "(changes)"
181
- detail = f"changed: {_short(changed)}"
180
+ changes = state.get("changes", {})
181
+ if "stdout" in changes:
182
+ # cmd.run-style changes: show the command's output itself
183
+ # rather than the fixed pid/retcode/stdout/stderr key list.
184
+ out = (
185
+ str(changes.get("stdout") or "").strip()
186
+ or str(changes.get("stderr") or "").strip()
187
+ )
188
+ # Full output, folded across lines, so nothing is cut off.
189
+ detail = (
190
+ Text(out, no_wrap=False, overflow="fold")
191
+ if out
192
+ else "changed: (no output)"
193
+ )
194
+ else:
195
+ changed = ", ".join(changes) or "(changes)"
196
+ detail = f"changed: {_short(changed)}"
182
197
  elif status == "fail":
183
198
  detail = Text(_short(state.get("comment", ""), 240), style="red")
184
199
  else: # diff / skip
@@ -193,7 +208,12 @@ def _print_state_return(minion: str, states: dict[str, Any]) -> None:
193
208
  fn_w = max((len(fn) for _, fn, _, _ in rows), default=8)
194
209
  ref_w = max((len(ref) for _, _, ref, _ in rows), default=8)
195
210
  nat_w = max(
196
- (len(d.plain if isinstance(d, Text) else d) for _, _, _, d in rows), default=0
211
+ (
212
+ len(line)
213
+ for _, _, _, d in rows
214
+ for line in (d.plain if isinstance(d, Text) else d).splitlines()
215
+ ),
216
+ default=0,
197
217
  )
198
218
  detail_w = min(nat_w, max(20, console.width - 2 - 1 - fn_w - ref_w - 3 * 2))
199
219
 
@@ -242,7 +262,7 @@ def _print_state_result(result: dict[str, Any]) -> None:
242
262
  if not ret:
243
263
  console.print("(no minions responded)")
244
264
  return
245
- for minion in sorted(ret):
265
+ for minion in sorted(ret, key=_natural_key):
246
266
  _print_one_minion(minion, ret[minion])
247
267
 
248
268
 
@@ -250,15 +270,36 @@ def _print_state_result(result: dict[str, Any]) -> None:
250
270
  # before giving up on minions that never reported. Each poll is a fast,
251
271
  # self-contained request, so the proxy/gateway connection cap never bites.
252
272
  #
253
- # We don't probe minion liveness (saltutil.find_job): an empty probe is
254
- # ambiguous — a busy-but-alive Windows minion mid-highstate can simply fail to
255
- # answer in time and look identical to a down one, so probing wrongly dropped
256
- # live minions. Instead we just poll until every targeted minion has returned
257
- # or _POLL_DEADLINE trips, then render whatever came back. The job keeps
258
- # running on the minions regardless; results stay fetchable later by jid. Press
259
- # Ctrl+C to stop waiting early and render the partial results gathered so far.
273
+ # We can't probe minion liveness mid-job (saltutil.find_job): an empty probe
274
+ # is ambiguous — a busy-but-alive Windows minion mid-highstate can simply fail
275
+ # to answer in time and look identical to a down one, so probing wrongly
276
+ # dropped live minions. Connection-level presence (the manage.present runner)
277
+ # is no better here: it matches connection source IPs against cached minion
278
+ # addresses, which NAT breaks, so it reports every minion absent. What does
279
+ # work is liveness pings (test.ping): one published immediately *before* the
280
+ # real job, while each minion's job loop is still idle, then re-published to
281
+ # whoever stays silent every _PING_INTERVAL. A minion that answers any ping —
282
+ # late answers count too — is provably alive and worth waiting for. One that
283
+ # has ignored the job plus _OFFLINE_PINGS pings (each at least _PING_GRACE
284
+ # old) is tagged "silent", and after _OFFLINE_AFTER of unbroken silence it is
285
+ # presumed offline; once every minion still outstanding is offline we stop
286
+ # waiting. The long fuse matters because of the NAT half-dead case: a minion
287
+ # whose stale connection swallowed the publish (so it never received the job)
288
+ # typically reconnects within a couple of minutes — TCP keepalive bounds it —
289
+ # and answers a fresh ping after ignoring several. On that signature the job
290
+ # is re-sent to it (published jobs are not queued for disconnected minions,
291
+ # so this is never a double run), up to _MAX_RESEND times. Otherwise we poll
292
+ # until every targeted minion has returned or _POLL_DEADLINE trips, then
293
+ # render whatever came back. The job keeps running on the minions regardless;
294
+ # results stay fetchable later by jid. Press Ctrl+C to stop waiting early and
295
+ # render the partial results gathered so far.
260
296
  _POLL_INTERVAL = 3.0
261
297
  _POLL_DEADLINE = 1800.0 # 30 minutes (hard backstop)
298
+ _PING_INTERVAL = 20.0 # re-ping still-silent minions this often
299
+ _PING_GRACE = 15.0 # how long a ping may go unanswered before it counts missed
300
+ _OFFLINE_PINGS = 3 # missed pings before a silent minion is tagged as such
301
+ _OFFLINE_AFTER = 180.0 # unbroken silence before "silent" hardens to offline
302
+ _MAX_RESEND = 1 # times the job is re-sent to a minion that reconnects
262
303
 
263
304
 
264
305
  def _first_return(resp: dict[str, Any]) -> Any:
@@ -283,6 +324,21 @@ def _lookup_returns(raw: Any) -> dict[str, Any]:
283
324
  return cast("dict[str, Any]", inner) if isinstance(inner, dict) else data
284
325
 
285
326
 
327
+ def _submit_async(call: Callable[..., dict[str, Any]], **payload: Any) -> str | None:
328
+ """Submit a ``local_async`` job and return its jid, or ``None`` if the
329
+ submission failed or matched nothing — callers treat that as "no job",
330
+ never as an error worth aborting the run for (these are the auxiliary
331
+ liveness pings and re-sends, not the main job)."""
332
+ try:
333
+ info: Any = _first_return(call("local_async", **payload))
334
+ except SaltApiError:
335
+ return None
336
+ if not isinstance(info, dict):
337
+ return None
338
+ jid = cast("dict[str, Any]", info).get("jid")
339
+ return str(jid) if jid else None
340
+
341
+
286
342
  def _count_cells(counts: dict[str, int]) -> list[Text]:
287
343
  """One right-padded cell per status category, for column alignment in the
288
344
  live view. ``ok``/``failed`` always render; the rest blank when zero so
@@ -321,6 +377,8 @@ def _live_view(
321
377
  returns: dict[str, Any],
322
378
  done: set[str],
323
379
  missing: set[str],
380
+ quiet: set[str],
381
+ offline: set[str],
324
382
  spinner: Spinner,
325
383
  *,
326
384
  n_cells: int,
@@ -328,20 +386,34 @@ def _live_view(
328
386
  ) -> Group:
329
387
  """A live checklist: a tick for finished minions (with ``cells_for`` of
330
388
  their reply in aligned columns), a spinner for the ones still running, an x
331
- for those that never reported, under a one-line status header. ``missing``
332
- is only populated in the final frame (after the deadline or a Ctrl+C); while
333
- polling it's empty, so still-pending minions show a spinner. ``n_cells`` is
334
- how many trailing columns ``cells_for`` produces (so blank rows stay
335
- aligned)."""
389
+ for those that never reported, under a one-line status header. ``quiet``
390
+ is the targeted minions that have ignored several liveness pings (a ? with
391
+ a ``silent`` tag might yet be a reconnecting NAT drop); after enough
392
+ unbroken silence they harden into ``offline`` (an x with an ``offline``
393
+ tag). Both sets stay inside the outstanding minions — a returned minion is
394
+ neither. ``missing`` is only populated in the final frame (after the
395
+ deadline or a Ctrl+C); while polling it's empty, so still-pending minions
396
+ show a spinner. ``n_cells`` is how many trailing columns ``cells_for``
397
+ produces (so blank rows stay aligned)."""
336
398
  blanks = [Text("")] * n_cells
399
+ quiet_cells = [Text("silent", style="yellow"), *[Text("")] * (n_cells - 1)]
400
+ offline_cells = [Text("offline", style="red"), *[Text("")] * (n_cells - 1)]
337
401
  grid = Table.grid(padding=(0, 1))
338
402
  grid.add_column(no_wrap=True) # marker
339
403
  grid.add_column(no_wrap=True) # minion id
340
404
  for _ in range(n_cells): # per-command trailing columns
341
405
  grid.add_column(no_wrap=True, justify="left")
342
406
  for minion in targeted:
343
- if minion in missing:
407
+ if minion in offline:
408
+ grid.add_row(
409
+ Text("X", style="red"), Text(minion, style="dim"), *offline_cells
410
+ )
411
+ elif minion in missing:
344
412
  grid.add_row(Text("X", style="red"), Text(minion, style="dim"), *blanks)
413
+ elif minion in quiet:
414
+ grid.add_row(
415
+ Text("?", style="yellow"), Text(minion, style="dim"), *quiet_cells
416
+ )
345
417
  elif minion in done:
346
418
  grid.add_row(
347
419
  Text("+", style="green"), Text(minion), *cells_for(returns.get(minion))
@@ -349,12 +421,18 @@ def _live_view(
349
421
  else:
350
422
  grid.add_row(spinner, Text(minion, style="dim"), *blanks)
351
423
 
352
- pending = len(targeted) - len(done) - len(missing)
424
+ n_missing = len(missing - offline)
425
+ n_quiet = len(quiet - missing - offline)
426
+ pending = len(targeted) - len(done) - n_missing - n_quiet - len(offline)
353
427
  bits = [f"{len(done)}/{len(targeted)} done"]
354
428
  if pending:
355
429
  bits.append(f"{pending} running")
356
- if missing:
357
- bits.append(f"[red]{len(missing)} no response[/]")
430
+ if n_quiet:
431
+ bits.append(f"[yellow]{n_quiet} silent[/]")
432
+ if offline:
433
+ bits.append(f"[red]{len(offline)} offline[/]")
434
+ if n_missing:
435
+ bits.append(f"[red]{n_missing} no response[/]")
358
436
  header = Text.from_markup(f"[dim]{' '.join(bits)}[/]")
359
437
  return Group(header, grid)
360
438
 
@@ -365,19 +443,27 @@ def _stream_job(
365
443
  *,
366
444
  n_cells: int,
367
445
  cells_for: Callable[[Any], list[Text]],
368
- ) -> tuple[dict[str, Any], set[str], float, bool] | None:
446
+ ) -> tuple[dict[str, Any], set[str], set[str], float, bool] | None:
369
447
  """Fire a job async, show a live checklist, and return its raw results.
370
448
 
371
- Submits ``payload`` via the ``local_async`` client (returns a job id at
372
- once), then polls ``runner jobs.lookup_jid`` until every targeted minion
373
- has returned, the deadline trips, or the user hits Ctrl+C. While polling it
374
- shows a live per-minion checklist (spinner -> tick), whose trailing columns
375
- come from ``cells_for(value)`` (``n_cells`` of them). In every case it then
376
- renders the final checklist frame and returns ``(returns, outstanding,
377
- start, interrupted)`` ``outstanding`` being the targeted minions that
378
- never reported for the caller to render, or ``None`` if no job started
379
- (already reported). ``call(name, **kw)`` invokes the named salt-api
380
- client."""
449
+ Submits a liveness ping then ``payload`` via the ``local_async`` client
450
+ (returns a job id at once), then polls ``runner jobs.lookup_jid`` until
451
+ every targeted minion has returned, everyone still outstanding is presumed
452
+ offline (ignored the job plus _OFFLINE_PINGS liveness pings, for at least
453
+ _OFFLINE_AFTER), the deadline trips, or the user hits Ctrl+C. Silent
454
+ minions are re-pinged every _PING_INTERVAL, and one that ignores several
455
+ pings then answers a later one just reconnected after missing the publish
456
+ the job is re-sent to it (see the comment above _POLL_INTERVAL). While
457
+ polling it shows a live per-minion checklist (spinner -> tick, silent and
458
+ offline minions tagged), whose trailing
459
+ columns come from ``cells_for(value)`` (``n_cells`` of them). In
460
+ every case it then renders the final checklist frame and returns
461
+ ``(returns, outstanding, offline, start, interrupted)`` — ``outstanding``
462
+ being the targeted minions that never reported and ``offline`` the subset
463
+ of those presumed unreachable — for the caller to render, or ``None`` if
464
+ no job started (already reported). ``call(name, **kw)`` invokes the named
465
+ salt-api client."""
466
+ ping_jid = _submit_async(call, tgt=payload["tgt"], fun="test.ping")
381
467
  submit = call("local_async", **payload)
382
468
  info: Any = _first_return(submit)
383
469
  jid = info.get("jid")
@@ -401,6 +487,8 @@ def _stream_job(
401
487
  console.print(f"[dim]job {jid} -> {len(targeted)} minion(s)[/]")
402
488
  start = time.monotonic()
403
489
  returns: dict[str, Any] = {}
490
+ quiet: set[str] = set()
491
+ offline: set[str] = set()
404
492
  spinner = Spinner("dots", style="cyan")
405
493
 
406
494
  def view(missing: set[str] | None = None) -> Group:
@@ -410,30 +498,109 @@ def _stream_job(
410
498
  returns,
411
499
  done,
412
500
  missing or set(),
501
+ quiet,
502
+ offline,
413
503
  spinner,
414
504
  n_cells=n_cells,
415
505
  cells_for=cells_for,
416
506
  )
417
507
 
418
- # Poll lookup_jid until everyone's back or the deadline trips; Ctrl+C stops
419
- # waiting early. The job keeps running on the minions either way — we just
420
- # stop watching and render whatever was gathered. transient=False keeps the
421
- # finished checklist on screen above the rendered tables.
508
+ # Poll lookup_jid until everyone's back, everyone left is offline, or the
509
+ # deadline trips; Ctrl+C stops waiting early. The job keeps running on the
510
+ # minions either way — we just stop watching and render whatever was
511
+ # gathered. transient=False keeps the finished checklist on screen above
512
+ # the rendered tables.
422
513
  interrupted = False
514
+ jids = [jid] # the job, plus any re-sends to reconnected minions
515
+ alive: set[str] = set() # answered some liveness ping
516
+ resent: dict[str, int] = {} # minion -> times the job was re-sent to it
517
+ reset_at: dict[str, float] = {} # ignore pings before this (post re-send)
518
+ # Each ping round: its jid, publish time, and who it targeted. Round 0 is
519
+ # the pre-job ping at the original target expression.
520
+ rounds: list[tuple[str, float, set[str]]] = []
521
+ if ping_jid:
522
+ rounds.append((ping_jid, start, expected))
423
523
  with Live(console=console, refresh_per_second=12, transient=False) as live:
424
524
  try:
425
525
  while True:
426
526
  # lookup_jid is cumulative: each poll returns every minion that
427
- # has reported so far, so we just keep the latest snapshot.
428
- returns = _lookup_returns(
429
- _first_return(
430
- call("runner", fun="jobs.lookup_jid", kwarg={"jid": jid})
527
+ # has reported so far; merge the snapshots across all jids.
528
+ for j in jids:
529
+ returns.update(
530
+ _lookup_returns(
531
+ _first_return(
532
+ call("runner", fun="jobs.lookup_jid", kwarg={"jid": j})
533
+ )
534
+ )
431
535
  )
432
- )
536
+ outstanding = expected - set(returns)
537
+ now = time.monotonic()
538
+ # Collect liveness answers from the newest ping rounds (late
539
+ # answers count; a reconnected minion only ever receives the
540
+ # newest, so polling further back buys nothing).
541
+ for rjid, _, targets in rounds[-2:]:
542
+ if targets - alive - set(returns):
543
+ answers = _lookup_returns(
544
+ _first_return(
545
+ call(
546
+ "runner",
547
+ fun="jobs.lookup_jid",
548
+ kwarg={"jid": rjid},
549
+ )
550
+ )
551
+ )
552
+ alive |= expected & set(answers)
553
+ # The reconnect signature: tagged silent (ignored several
554
+ # pings and the job — the publish never reached it), now
555
+ # answering. Re-send the job to it and make it re-prove
556
+ # liveness from scratch, so a second drop re-runs this cycle.
557
+ recovered = {m for m in quiet & alive if resent.get(m, 0) < _MAX_RESEND}
558
+ if recovered:
559
+ rejid = _submit_async(
560
+ call,
561
+ **{**payload, "tgt": sorted(recovered), "tgt_type": "list"},
562
+ )
563
+ if rejid:
564
+ jids.append(rejid)
565
+ for m in recovered:
566
+ resent[m] = resent.get(m, 0) + 1
567
+ reset_at[m] = now
568
+ alive -= recovered
569
+ # Silent: missed _OFFLINE_PINGS pings (each old enough that an
570
+ # answer would have arrived) plus the job. Offline: silent for
571
+ # _OFFLINE_AFTER straight — long enough to have reconnected
572
+ # and answered a fresh ping, were it a NAT-dropped connection.
573
+ quiet = {
574
+ m
575
+ for m in outstanding - alive
576
+ if sum(
577
+ 1
578
+ for _, t, targets in rounds
579
+ if m in targets
580
+ and t >= reset_at.get(m, -1.0)
581
+ and now - t >= _PING_GRACE
582
+ )
583
+ >= _OFFLINE_PINGS
584
+ }
585
+ offline = {
586
+ m for m in quiet if now - reset_at.get(m, start) >= _OFFLINE_AFTER
587
+ }
588
+ # Re-ping whoever is still silent, so slow answers, reconnects,
589
+ # and genuinely-down minions keep accumulating evidence.
590
+ silent = outstanding - alive
591
+ last_round = rounds[-1][1] if rounds else start
592
+ if silent and now - last_round >= _PING_INTERVAL:
593
+ rjid = _submit_async(
594
+ call, tgt=sorted(silent), tgt_type="list", fun="test.ping"
595
+ )
596
+ if rjid:
597
+ rounds.append((rjid, now, set(silent)))
433
598
  live.update(view())
434
- if not expected - set(returns):
599
+ if not outstanding:
600
+ break
601
+ if offline == outstanding:
435
602
  break
436
- if time.monotonic() - start > _POLL_DEADLINE:
603
+ if now - start > _POLL_DEADLINE:
437
604
  break
438
605
  time.sleep(_POLL_INTERVAL)
439
606
  except KeyboardInterrupt:
@@ -441,23 +608,42 @@ def _stream_job(
441
608
  # Final frame: mark whoever never reported so the persisted checklist
442
609
  # reflects the true end state rather than a frozen spinner.
443
610
  outstanding = expected - set(returns)
611
+ offline &= outstanding
444
612
  live.update(view(outstanding))
445
613
 
446
- return returns, expected - set(returns), start, interrupted
614
+ if resent:
615
+ names = ", ".join(sorted(resent, key=_natural_key))
616
+ console.print(
617
+ f"[dim]re-sent the job to {names} - reconnected after missing the "
618
+ f"original publish[/]"
619
+ )
620
+ return returns, expected - set(returns), offline, start, interrupted
447
621
 
448
622
 
449
- def _print_outstanding(outstanding: set[str], interrupted: bool) -> None:
623
+ def _print_outstanding(
624
+ outstanding: set[str], offline: set[str], interrupted: bool
625
+ ) -> None:
450
626
  """Trailer naming the minions that hadn't reported when we stopped waiting
451
- — because the user interrupted, or the deadline tripped."""
627
+ — because the user interrupted, everyone left was offline, or the deadline
628
+ tripped."""
452
629
  if not outstanding:
453
630
  return
454
- names = ", ".join(sorted(outstanding, key=_natural_key))
455
631
  if interrupted:
632
+ names = ", ".join(sorted(outstanding, key=_natural_key))
456
633
  console.print(
457
634
  f"[yellow]stopped waiting (Ctrl+C); no result yet from: {names} "
458
635
  f"- the job may still be running on them[/]"
459
636
  )
460
- else:
637
+ return
638
+ if offline:
639
+ names = ", ".join(sorted(offline, key=_natural_key))
640
+ console.print(
641
+ f"[yellow]no result from: {names} - ignored the job and repeated "
642
+ f"liveness pings, so presumed offline; the job never reached them[/]"
643
+ )
644
+ waiting = outstanding - offline
645
+ if waiting:
646
+ names = ", ".join(sorted(waiting, key=_natural_key))
461
647
  console.print(
462
648
  f"[yellow]no result from: {names} within the "
463
649
  f"{int(_POLL_DEADLINE)}s deadline (still running, or down)[/]"
@@ -470,11 +656,11 @@ def _stream_state(call: Callable[..., dict[str, Any]], payload: dict[str, Any])
470
656
  result = _stream_job(call, payload, n_cells=5, cells_for=_state_cells)
471
657
  if result is None:
472
658
  return
473
- returns, outstanding, start, interrupted = result
659
+ returns, outstanding, offline, start, interrupted = result
474
660
 
475
661
  # Live view cleared — render the coloured tables, one block per minion.
476
662
  _print_state_result({"return": [returns]})
477
- _print_outstanding(outstanding, interrupted)
663
+ _print_outstanding(outstanding, offline, interrupted)
478
664
 
479
665
  # Fleet-wide summary: totals across all minions + wall-clock elapsed.
480
666
  totals, n = _grand_totals(returns)
@@ -634,10 +820,10 @@ def _stream_cmd(call: Callable[..., dict[str, Any]], payload: dict[str, Any]) ->
634
820
  result = _stream_job(call, payload, n_cells=1, cells_for=_cmd_cells)
635
821
  if result is None:
636
822
  return
637
- returns, outstanding, start, interrupted = result
823
+ returns, outstanding, offline, start, interrupted = result
638
824
 
639
825
  _print_cmd_result({"return": [returns]})
640
- _print_outstanding(outstanding, interrupted)
826
+ _print_outstanding(outstanding, offline, interrupted)
641
827
 
642
828
  n = len(returns)
643
829
  if n:
@@ -0,0 +1 @@
1
+ __version__ = "1.4.4"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: salt-api-cli
3
- Version: 1.4.2
3
+ Version: 1.4.4
4
4
  Summary: CLI to access salt-api
5
5
  Author-email: Pradish Bijukchhe <pradish@sandbox.com.np>
6
6
  License-Expression: MIT
@@ -1 +0,0 @@
1
- __version__ = "1.4.2"
File without changes
File without changes
File without changes