loki-mode 7.56.0 → 7.57.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -11,6 +11,7 @@ import json
11
11
  import logging
12
12
  import os
13
13
  import subprocess
14
+ import threading
14
15
  import time
15
16
  from collections import defaultdict
16
17
  from dataclasses import asdict
@@ -7599,18 +7600,397 @@ def _reconcile_app_runner_liveness(state):
7599
7600
  return state
7600
7601
 
7601
7602
 
7603
+ # =============================================================================
7604
+ # Docker-compose app-runner discovery
7605
+ #
7606
+ # When the autonomous agent brings up a docker-compose stack itself (rather than
7607
+ # via autonomy/app-runner.sh), no .loki/app-runner/state.json is written, so the
7608
+ # status endpoint reports "not_initialized" / "stopped" even though the app is
7609
+ # genuinely running. The discovery helper below inspects the live compose stack
7610
+ # for the project directory and synthesizes an equivalent status so the dashboard
7611
+ # App Runner panel surfaces the running app and its URL.
7612
+ #
7613
+ # Safety contract (all mandatory):
7614
+ # - Every docker subprocess.run has an explicit timeout; total work is bounded.
7615
+ # - On ANY error (TimeoutExpired/OSError/SubprocessError/parse failure) the
7616
+ # helper returns None and the caller falls back to its prior behavior. The
7617
+ # handler never raises and never blocks the event loop (it is offloaded via
7618
+ # asyncio.to_thread / run_in_threadpool).
7619
+ # - A short TTL cache prevents the 3s/5s dashboard pollers from spawning
7620
+ # repeated docker invocations.
7621
+ # - A URL is never fabricated for a non-running or non-published container.
7622
+ # =============================================================================
7623
+
7624
+ # Common host ports a web service typically publishes, in precedence order.
7625
+ # Mirrors autonomy/app-runner.sh _identify_compose_web_service (COMMON list).
7626
+ _COMPOSE_COMMON_WEB_PORTS = ["3000", "8000", "8080", "5000", "4200", "5173", "80"]
7627
+
7628
+ # Per-docker-call timeout (seconds). Several calls run in sequence; keep each
7629
+ # tight so total discovery stays bounded well under the poller interval.
7630
+ _COMPOSE_DISCOVERY_CMD_TIMEOUT = 3
7631
+
7632
+ # TTL (seconds) for the discovery result cache, keyed by resolved project dir.
7633
+ # The dashboard polls every 3-5s; a 2.5s TTL collapses a burst of concurrent
7634
+ # pollers onto a single docker probe without making the status feel stale.
7635
+ _COMPOSE_DISCOVERY_TTL_SECONDS = 2.5
7636
+
7637
+ # Cache: {project_dir_str: (expiry_epoch, result_or_None)}. Module-level so it
7638
+ # survives across requests. Guarded by a lock because to_thread runs the sync
7639
+ # helper on worker threads that can overlap.
7640
+ _compose_discovery_cache: dict[str, tuple[float, Optional[dict]]] = {}
7641
+ _compose_discovery_lock = threading.Lock()
7642
+
7643
+
7644
+ def _parse_docker_json(raw):
7645
+ """Parse docker --format json output into a list of dicts, defensively.
7646
+
7647
+ Docker emits either a single JSON array or newline-delimited JSON (one
7648
+ object per line), and the shape has varied across docker/compose versions.
7649
+ Try a whole-blob parse first; if that fails or does not yield a list, fall
7650
+ back to parsing each non-empty line individually. Returns a list of dicts
7651
+ (possibly empty). Never raises.
7652
+ """
7653
+ raw = (raw or "").strip()
7654
+ if not raw:
7655
+ return []
7656
+ try:
7657
+ parsed = json.loads(raw)
7658
+ if isinstance(parsed, list):
7659
+ return [x for x in parsed if isinstance(x, dict)]
7660
+ if isinstance(parsed, dict):
7661
+ return [parsed]
7662
+ except (ValueError, TypeError):
7663
+ pass
7664
+ items = []
7665
+ for line in raw.splitlines():
7666
+ line = line.strip()
7667
+ if not line:
7668
+ continue
7669
+ try:
7670
+ obj = json.loads(line)
7671
+ except (ValueError, TypeError):
7672
+ continue
7673
+ if isinstance(obj, dict):
7674
+ items.append(obj)
7675
+ return items
7676
+
7677
+
7678
+ def _run_docker_json(args, cwd=None):
7679
+ """Run a docker command and return parsed JSON rows, or None on any failure.
7680
+
7681
+ args is the argument list AFTER `docker` (e.g. ["compose", "ps", ...]). Uses
7682
+ an explicit per-call timeout and a list argv (no shell). A non-zero exit,
7683
+ timeout, missing docker binary, or unparseable output all yield None so the
7684
+ caller fails open.
7685
+ """
7686
+ try:
7687
+ proc = subprocess.run(
7688
+ ["docker", *args],
7689
+ capture_output=True,
7690
+ text=True,
7691
+ timeout=_COMPOSE_DISCOVERY_CMD_TIMEOUT,
7692
+ cwd=str(cwd) if cwd else None,
7693
+ )
7694
+ except (OSError, subprocess.SubprocessError):
7695
+ return None
7696
+ if proc.returncode != 0:
7697
+ return None
7698
+ return _parse_docker_json(proc.stdout)
7699
+
7700
+
7701
+ def _compose_published_ports(container):
7702
+ """Host ports actually published by a running compose container (compose ps).
7703
+
7704
+ `docker compose ps --format json` exposes published ports under the
7705
+ "Publishers" list, each like {"PublishedPort": 3000, "TargetPort": 3000,
7706
+ "Protocol": "tcp", "URL": "0.0.0.0"}. A PublishedPort of 0 means the port is
7707
+ exposed but not published to the host, so it is filtered out. Returns a list
7708
+ of host port strings, preserving order. Never raises.
7709
+ """
7710
+ out = []
7711
+ pubs = container.get("Publishers")
7712
+ if not isinstance(pubs, list):
7713
+ return out
7714
+ for p in pubs:
7715
+ if not isinstance(p, dict):
7716
+ continue
7717
+ port = p.get("PublishedPort")
7718
+ try:
7719
+ port = int(port)
7720
+ except (TypeError, ValueError):
7721
+ continue
7722
+ if port > 0:
7723
+ out.append(str(port))
7724
+ return out
7725
+
7726
+
7727
+ def _compose_service_labels(svc):
7728
+ """Normalize a compose-config service's labels into a dict. Never raises."""
7729
+ labels = svc.get("labels") or {}
7730
+ if isinstance(labels, dict):
7731
+ return labels
7732
+ if isinstance(labels, list):
7733
+ normalized = {}
7734
+ for item in labels:
7735
+ if isinstance(item, str) and "=" in item:
7736
+ k, v = item.split("=", 1)
7737
+ normalized[k] = v
7738
+ return normalized
7739
+ return {}
7740
+
7741
+
7742
+ def _identify_compose_web_service(config_services, running_by_service):
7743
+ """Pick the primary web service and its published host port.
7744
+
7745
+ Mirrors the precedence in autonomy/app-runner.sh:431-481:
7746
+ (1) service labelled loki.primary=true
7747
+ (2) service named web/app
7748
+ (3) service publishing a common web port (3000/8000/8080/5000/4200/5173/80)
7749
+ (4) first service with any published port
7750
+ Declared names/labels come from `docker compose config`; the actual runtime
7751
+ published port comes from the matching RUNNING container (compose ps), since
7752
+ only running, published containers can yield a real URL. Returns
7753
+ (service_name, port_str) or (None, None). Never raises.
7754
+
7755
+ config_services: dict {service_name: service_config_dict} (may be empty).
7756
+ running_by_service: dict {service_name: [published_port_str, ...]} for
7757
+ currently-running containers with at least one published host port.
7758
+ """
7759
+ if not running_by_service:
7760
+ return (None, None)
7761
+
7762
+ # (1) label loki.primary=true (declared in compose config)
7763
+ for name, svc in (config_services or {}).items():
7764
+ if not isinstance(svc, dict):
7765
+ continue
7766
+ labels = _compose_service_labels(svc)
7767
+ if str(labels.get("loki.primary", "")).lower() == "true":
7768
+ ports = running_by_service.get(name)
7769
+ if ports:
7770
+ return (name, ports[0])
7771
+
7772
+ # (2) service named web/app
7773
+ for cand in ("web", "app"):
7774
+ ports = running_by_service.get(cand)
7775
+ if ports:
7776
+ return (cand, ports[0])
7777
+
7778
+ # (3) service publishing a common web port
7779
+ for cp in _COMPOSE_COMMON_WEB_PORTS:
7780
+ for name, ports in running_by_service.items():
7781
+ if cp in ports:
7782
+ return (name, cp)
7783
+
7784
+ # (4) first running service with any published port. Sort for determinism.
7785
+ for name in sorted(running_by_service.keys()):
7786
+ ports = running_by_service[name]
7787
+ if ports:
7788
+ return (name, ports[0])
7789
+
7790
+ return (None, None)
7791
+
7792
+
7793
+ def _container_health_state(container):
7794
+ """Classify a running compose container into 'running' | 'starting' | None.
7795
+
7796
+ Reads the container State + Health fields from `docker compose ps`:
7797
+ - State exited/dead/paused/removing -> None (no live URL to surface)
7798
+ - State running + Health healthy or empty (no healthcheck) -> 'running'
7799
+ - State running + Health unhealthy/starting -> 'starting' (still surface
7800
+ the URL: e.g. a Next.js app whose home renders but whose '/' healthcheck
7801
+ fails is reachable and should show as starting, not hidden)
7802
+ - State created/restarting -> 'starting'
7803
+ Returns the status string or None. Never raises.
7804
+ """
7805
+ state = str(container.get("State", "")).lower()
7806
+ health = str(container.get("Health", "")).lower()
7807
+ if state in ("exited", "dead", "paused", "removing"):
7808
+ return None
7809
+ if state == "running":
7810
+ if health in ("", "healthy"):
7811
+ return "running"
7812
+ # unhealthy or starting healthcheck: reachable, treat as starting.
7813
+ return "starting"
7814
+ if state in ("created", "restarting"):
7815
+ return "starting"
7816
+ # Unknown/other states: do not fabricate a running URL.
7817
+ return None
7818
+
7819
+
7820
+ def _discover_compose_app_runner_state():
7821
+ """Discover a running docker-compose stack for the active project, or None.
7822
+
7823
+ Returns a synthesized app-runner state dict (source=="discovered") when the
7824
+ project directory hosts a compose file AND a primary web service is running
7825
+ with a published host port. Returns None in every other case (no compose
7826
+ file, docker absent, nothing running, no published web port, only
7827
+ dead/exited containers, or any error). Synchronous and self-contained; the
7828
+ caller offloads it onto a worker thread. Never raises.
7829
+ """
7830
+ try:
7831
+ project_dir = _get_loki_dir().parent.resolve()
7832
+ except Exception:
7833
+ return None
7834
+ cache_key = str(project_dir)
7835
+
7836
+ now = time.monotonic()
7837
+ with _compose_discovery_lock:
7838
+ cached = _compose_discovery_cache.get(cache_key)
7839
+ if cached is not None and cached[0] > now:
7840
+ return cached[1]
7841
+
7842
+ result = _discover_compose_app_runner_state_uncached(project_dir)
7843
+
7844
+ with _compose_discovery_lock:
7845
+ _compose_discovery_cache[cache_key] = (
7846
+ time.monotonic() + _COMPOSE_DISCOVERY_TTL_SECONDS,
7847
+ result,
7848
+ )
7849
+ return result
7850
+
7851
+
7852
+ def _discover_compose_app_runner_state_uncached(project_dir):
7853
+ """Uncached body of _discover_compose_app_runner_state. Never raises."""
7854
+ try:
7855
+ # Step A: a compose file must exist in the project dir, else this is a
7856
+ # single-process app and discovery does not apply.
7857
+ compose_names = (
7858
+ "docker-compose.yml", "docker-compose.yaml",
7859
+ "compose.yml", "compose.yaml",
7860
+ )
7861
+ if not any((project_dir / n).is_file() for n in compose_names):
7862
+ return None
7863
+
7864
+ # Step C: running containers for THIS project's compose stack, with the
7865
+ # runtime published ports. Run from the project dir so compose resolves
7866
+ # the right project. (Step B project matching is implicitly handled by
7867
+ # running compose from project_dir; we keep ls/ps from this dir.)
7868
+ ps_rows = _run_docker_json(
7869
+ ["compose", "ps", "--format", "json"], cwd=project_dir
7870
+ )
7871
+ if ps_rows is None:
7872
+ # docker absent / timeout / error -> fail open.
7873
+ return None
7874
+ if not ps_rows:
7875
+ # No containers for this compose project (not up). Nothing to show.
7876
+ return None
7877
+
7878
+ # Map running, published services to their host ports. Track health and
7879
+ # the raw container for the primary so we can classify it precisely.
7880
+ running_by_service = {}
7881
+ container_by_service = {}
7882
+ for c in ps_rows:
7883
+ service = c.get("Service") or c.get("Name")
7884
+ if not service:
7885
+ continue
7886
+ ports = _compose_published_ports(c)
7887
+ if ports:
7888
+ running_by_service.setdefault(service, [])
7889
+ for p in ports:
7890
+ if p not in running_by_service[service]:
7891
+ running_by_service[service].append(p)
7892
+ container_by_service.setdefault(service, c)
7893
+ if not running_by_service:
7894
+ # Stack is up but nothing publishes a host port: no surfaceable URL.
7895
+ return None
7896
+
7897
+ # Step D: declared service config (names/labels) for precedence. Best
7898
+ # effort: if config is unavailable we still proceed with ps data alone.
7899
+ config_rows = _run_docker_json(
7900
+ ["compose", "config", "--format", "json"], cwd=project_dir
7901
+ )
7902
+ config_services = {}
7903
+ if config_rows:
7904
+ cfg = config_rows[0]
7905
+ svcs = cfg.get("services")
7906
+ if isinstance(svcs, dict):
7907
+ config_services = svcs
7908
+
7909
+ primary_service, port = _identify_compose_web_service(
7910
+ config_services, running_by_service
7911
+ )
7912
+ if not primary_service or not port:
7913
+ return None
7914
+
7915
+ # Step E health classification, from the primary's running container.
7916
+ primary_container = container_by_service.get(primary_service)
7917
+ if not isinstance(primary_container, dict):
7918
+ return None
7919
+ health_status = _container_health_state(primary_container)
7920
+ if health_status is None:
7921
+ # exited/dead/paused/unknown -> do not fabricate a URL.
7922
+ return None
7923
+
7924
+ # Step B (best effort): record the compose project name for the panel.
7925
+ compose_project = (
7926
+ primary_container.get("Project")
7927
+ or "".join(ch for ch in project_dir.name.lower() if ch.isalnum())
7928
+ )
7929
+
7930
+ health_text = str(primary_container.get("Health", "")).lower()
7931
+ health_ok = health_text in ("", "healthy")
7932
+
7933
+ # Step F: synthesize the state dict using the SAME field names the UI and
7934
+ # app-runner.sh state.json use (status/url/port/method/last_health), plus
7935
+ # discovery-provenance fields the panel safely ignores.
7936
+ return {
7937
+ "status": health_status,
7938
+ "url": "http://localhost:{}".format(port),
7939
+ "port": int(port),
7940
+ "method": "docker compose (detected)",
7941
+ "primary_service": primary_service,
7942
+ "compose_project": compose_project,
7943
+ "source": "discovered",
7944
+ "externally_managed": True,
7945
+ "last_health": {"ok": health_ok},
7946
+ }
7947
+ except Exception:
7948
+ # Fail open on anything unexpected; never break the status endpoint.
7949
+ return None
7950
+
7951
+
7602
7952
  @app.get("/api/app-runner/status")
7603
7953
  async def get_app_runner_status():
7604
- """Get app runner current status (with dead-run liveness reconciliation)."""
7954
+ """Get app runner current status (with dead-run liveness reconciliation).
7955
+
7956
+ Resolution order:
7957
+ 1. state.json present AND reconciles to running/starting -> return it (an
7958
+ app-runner.sh-managed run is authoritative).
7959
+ 2. state.json missing OR reconciles to stopped/stale -> attempt
7960
+ docker-compose discovery for stacks the autonomous agent launched
7961
+ itself; if a running stack is found, return the synthesized state
7962
+ (bypassing pid-based liveness reconciliation, which is meaningless for
7963
+ externally-launched containers).
7964
+ 3. otherwise return the existing (possibly reconciled / not_initialized)
7965
+ result.
7966
+ Discovery runs on a worker thread so its bounded docker calls never block
7967
+ the event loop.
7968
+ """
7605
7969
  loki_dir = _get_loki_dir()
7606
7970
  state_file = loki_dir / "app-runner" / "state.json"
7971
+
7607
7972
  if not state_file.exists():
7973
+ discovered = await asyncio.to_thread(_discover_compose_app_runner_state)
7974
+ if discovered is not None:
7975
+ return discovered
7608
7976
  return {"status": "not_initialized"}
7977
+
7609
7978
  try:
7610
7979
  state = json.loads(state_file.read_text())
7611
7980
  except (json.JSONDecodeError, OSError):
7612
7981
  return {"status": "error"}
7613
- return _reconcile_app_runner_liveness(state)
7982
+
7983
+ reconciled = _reconcile_app_runner_liveness(state)
7984
+ if isinstance(reconciled, dict) and reconciled.get("status") in ("running", "starting"):
7985
+ # An app-runner.sh-managed run that is still live is authoritative.
7986
+ return reconciled
7987
+
7988
+ # State is missing-live (stopped/stale/other): the agent may have brought up
7989
+ # a compose stack outside app-runner.sh. Prefer a live discovered stack.
7990
+ discovered = await asyncio.to_thread(_discover_compose_app_runner_state)
7991
+ if discovered is not None:
7992
+ return discovered
7993
+ return reconciled
7614
7994
 
7615
7995
 
7616
7996
  def _get_log_redactor():