loki-mode 7.26.0 → 7.28.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,636 @@
1
+ #!/usr/bin/env python3
2
+ """Trust-layer metrics aggregator (single project).
3
+
4
+ Computes the four AVAILABLE-TODAY trust metrics from
5
+ internal/BENCHMARK-PROGRAM-2026-06.md section 3, plus the corpus-level
6
+ denominators that make them honest. These metrics are the real
7
+ differentiator: no single-pass agent can report them because no
8
+ single-pass agent refuses its own "done" claim.
9
+
10
+ Metric definitions (verbatim from the benchmark doc, section 3):
11
+
12
+ Metric 1 -- Evidence-gate block rate
13
+ Fraction of instrumented runs in which the verified-completion evidence
14
+ gate refused at least one "done" claim (empty diff vs run-start SHA, or
15
+ red tests) before completion was honored.
16
+
17
+ Metric 2 -- Gate failure-rate distribution per run
18
+ Per run, the count and which-gate breakdown of quality-gate failures
19
+ before the run reached an acceptable state; published as a distribution
20
+ across N runs (median, p90), not a single number.
21
+
22
+ Metric 3 -- Council rejection / split rate
23
+ Across council votes, the fraction REJECTED, and of those the
24
+ split-verdict fraction (rejected with at least one approver). Reported
25
+ as a rate with the underlying vote count.
26
+
27
+ Metric 4 -- Cost-per-VERIFIED-task
28
+ Dollars and tokens spent per verified-completed run. Local denominator =
29
+ runs whose proof.json final_verdict is a pass (the EXTERNAL grader
30
+ denominator from the doc is not available on a single machine; see
31
+ NOTE_EXTERNAL_GRADER). Reported raw alongside the count.
32
+
33
+ DATA SOURCES (the design):
34
+
35
+ The runtime's single-state control files are deliberately NOT used as the
36
+ cross-run corpus because the SUCCESSFUL-run case erases exactly the
37
+ self-correction event we want to publish:
38
+ - .loki/council/evidence-block.json is DELETED on the passing run
39
+ - .loki/quality/gate-failure-count.json is RESET by clear_gate_failure
40
+ - .loki/metrics/efficiency/iteration-*.json is wiped at run start
41
+ So this module reads two durable records instead:
42
+ 1. .loki/metrics/trust-events.jsonl -- append-only event log written by
43
+ record_trust_event() (run_start, evidence_block, council_vote,
44
+ gate_failure). One JSON object per line.
45
+ 2. .loki/proofs/<id>/proof.json -- the persistent per-run proof corpus
46
+ (council.final_verdict, cost, iterations).
47
+
48
+ HONESTY RULE (the central trap): a metric is only emitted when its source
49
+ artifact genuinely exists. We distinguish "instrumented, 0 events" from "not
50
+ instrumented at all": the denominator of Metrics 1 and 2 is the count of
51
+ INSTRUMENTED runs (runs that emitted a run_start event), never the proof
52
+ corpus. Every metric reports its own n= explicitly. A metric with no source
53
+ data is reported available=False, never a fabricated 0.
54
+
55
+ No external deps. Python 3.8+ (matches the rest of autonomy/lib).
56
+ Single project only; an --all-projects registry aggregator is OUT of scope
57
+ (see cmd_trust_metrics help).
58
+
59
+ Public API:
60
+ record_trust_event(loki_dir, event_type, **fields) -> bool
61
+ compute_trust_metrics(loki_dir) -> dict (schema_version 1)
62
+ format_metrics_human(m) -> str
63
+ format_metrics_json(m) -> str
64
+ write_metrics_cache(loki_dir, m) -> str | None
65
+ main(argv) -> int
66
+ """
67
+
68
+ import json
69
+ import os
70
+ import sys
71
+ from datetime import datetime, timezone
72
+
73
+ SCHEMA_VERSION = 1
74
+
75
+ # Verdict tokens that count as a verified / passing run (mirrors
76
+ # trust_trajectory._PASS_TOKENS for cross-module consistency).
77
+ _PASS_TOKENS = ("APPROVE", "APPROVED", "COMPLETE", "PASS", "PASSED")
78
+
79
+ # Event types written to trust-events.jsonl. Kept as constants so the writer
80
+ # (run.sh sites) and reader cannot drift apart silently.
81
+ EVENT_RUN_START = "run_start"
82
+ EVENT_EVIDENCE_BLOCK = "evidence_block"
83
+ EVENT_COUNCIL_VOTE = "council_vote"
84
+ EVENT_GATE_FAILURE = "gate_failure"
85
+
86
+ _EVENTS_FILENAME = "trust-events.jsonl"
87
+
88
+ NOTE_EXTERNAL_GRADER = (
89
+ "cost-per-verified uses the LOCAL verified denominator (proof.json "
90
+ "final_verdict pass). The benchmark doc's external-grader denominator "
91
+ "is not available on a single machine and is not computed here."
92
+ )
93
+
94
+
95
+ def _utc_now():
96
+ return datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
97
+
98
+
99
+ def _read_json(path, default=None):
100
+ try:
101
+ with open(path, "r", encoding="utf-8") as fh:
102
+ return json.load(fh)
103
+ except Exception:
104
+ return default
105
+
106
+
107
+ def _obj(v):
108
+ return v if isinstance(v, dict) else {}
109
+
110
+
111
+ # ---------------------------------------------------------------------------
112
+ # Writer (called from the runtime via record_trust_event; additive, best-effort)
113
+ # ---------------------------------------------------------------------------
114
+
115
+ def record_trust_event(loki_dir, event_type, **fields):
116
+ """Append one durable trust event to .loki/metrics/trust-events.jsonl.
117
+
118
+ Best-effort and side-effect-only: returns True on success, False on any
119
+ failure, and never raises. The runtime callers must not depend on the
120
+ return value or on stdout (this writes nothing to stdout). Each record
121
+ carries run_id + iteration + ts + type so it joins to the proof corpus.
122
+ """
123
+ try:
124
+ out_dir = os.path.join(loki_dir, "metrics")
125
+ os.makedirs(out_dir, exist_ok=True)
126
+ record = {
127
+ "type": str(event_type),
128
+ "run_id": str(
129
+ fields.pop("run_id", "")
130
+ or os.environ.get("LOKI_SESSION_ID", "")
131
+ or "unknown"
132
+ ),
133
+ "iteration": _to_int(fields.pop("iteration", 0), 0),
134
+ "ts": str(fields.pop("ts", "") or _utc_now()),
135
+ }
136
+ # Remaining caller fields are folded in verbatim (already simple types).
137
+ for k, v in fields.items():
138
+ record[k] = v
139
+ line = json.dumps(record, sort_keys=True)
140
+ with open(os.path.join(out_dir, _EVENTS_FILENAME), "a", encoding="utf-8") as fh:
141
+ fh.write(line + "\n")
142
+ return True
143
+ except Exception:
144
+ return False
145
+
146
+
147
+ def _to_int(v, default=0):
148
+ try:
149
+ return int(v)
150
+ except (TypeError, ValueError):
151
+ return default
152
+
153
+
154
+ # ---------------------------------------------------------------------------
155
+ # Reader / aggregation
156
+ # ---------------------------------------------------------------------------
157
+
158
+ def _load_events(loki_dir):
159
+ """Read trust-events.jsonl into a list of dicts. Missing file -> []."""
160
+ path = os.path.join(loki_dir, "metrics", _EVENTS_FILENAME)
161
+ events = []
162
+ try:
163
+ with open(path, "r", encoding="utf-8") as fh:
164
+ for raw in fh:
165
+ raw = raw.strip()
166
+ if not raw:
167
+ continue
168
+ try:
169
+ obj = json.loads(raw)
170
+ except (ValueError, TypeError):
171
+ # Skip a torn / partial line, never fail the whole report.
172
+ continue
173
+ if isinstance(obj, dict):
174
+ events.append(obj)
175
+ except (OSError, FileNotFoundError):
176
+ return []
177
+ return events
178
+
179
+
180
+ def _verdict_is_pass(verdict):
181
+ v = str(verdict or "").strip().upper()
182
+ if not v:
183
+ return None
184
+ for tok in _PASS_TOKENS:
185
+ if v.startswith(tok):
186
+ return True
187
+ return False
188
+
189
+
190
+ def _load_proofs(loki_dir):
191
+ """Read every proof.json into a list keyed by run_id with cost+verdict."""
192
+ proofs_dir = os.path.join(loki_dir, "proofs")
193
+ out = []
194
+ try:
195
+ entries = sorted(os.listdir(proofs_dir))
196
+ except (OSError, FileNotFoundError):
197
+ return out
198
+ for name in entries:
199
+ d = os.path.join(proofs_dir, name)
200
+ if not os.path.isdir(d):
201
+ continue
202
+ proof = _read_json(os.path.join(d, "proof.json"), default=None)
203
+ if not isinstance(proof, dict):
204
+ continue
205
+ out.append(proof)
206
+ return out
207
+
208
+
209
+ def _percentile(sorted_vals, pct):
210
+ """Nearest-rank percentile over a pre-sorted non-empty list."""
211
+ if not sorted_vals:
212
+ return None
213
+ if len(sorted_vals) == 1:
214
+ return sorted_vals[0]
215
+ # Nearest-rank: rank = ceil(pct/100 * n), 1-indexed.
216
+ import math
217
+ rank = max(1, math.ceil((pct / 100.0) * len(sorted_vals)))
218
+ rank = min(rank, len(sorted_vals))
219
+ return sorted_vals[rank - 1]
220
+
221
+
222
+ def _median(sorted_vals):
223
+ n = len(sorted_vals)
224
+ if n == 0:
225
+ return None
226
+ mid = n // 2
227
+ if n % 2 == 1:
228
+ return float(sorted_vals[mid])
229
+ return (sorted_vals[mid - 1] + sorted_vals[mid]) / 2.0
230
+
231
+
232
+ # ---------------------------------------------------------------------------
233
+ # Metric 1: evidence-gate block rate
234
+ # ---------------------------------------------------------------------------
235
+
236
+ def _metric_evidence_block(events):
237
+ """Block rate over INSTRUMENTED runs.
238
+
239
+ Denominator = runs that emitted a run_start event (instrumented).
240
+ Numerator = those runs that also emitted >=1 evidence_block event.
241
+ A proof corpus with no run_start events -> available=False (not 0%),
242
+ so we never present an old, un-instrumented corpus as "0% block rate".
243
+ """
244
+ instrumented = set()
245
+ blocked = set()
246
+ block_events = 0
247
+ for e in events:
248
+ et = e.get("type")
249
+ rid = e.get("run_id") or "unknown"
250
+ if et == EVENT_RUN_START:
251
+ instrumented.add(rid)
252
+ elif et == EVENT_EVIDENCE_BLOCK:
253
+ blocked.add(rid)
254
+ block_events += 1
255
+ # Only count runs that are instrumented; a block event for a run with no
256
+ # run_start still proves instrumentation of that run, so union them in.
257
+ instrumented |= blocked
258
+ n = len(instrumented)
259
+ if n == 0:
260
+ return {
261
+ "available": False,
262
+ "reason": "no instrumented runs yet (no run_start events in "
263
+ "trust-events.jsonl). Distinct from a measured 0%.",
264
+ }
265
+ blocked_runs = len(blocked & instrumented)
266
+ return {
267
+ "available": True,
268
+ "instrumented_runs": n,
269
+ "runs_with_block": blocked_runs,
270
+ "block_events_total": block_events,
271
+ "block_rate": round(blocked_runs / n, 4),
272
+ }
273
+
274
+
275
+ # ---------------------------------------------------------------------------
276
+ # Metric 2: gate failure-rate distribution per run
277
+ # ---------------------------------------------------------------------------
278
+
279
+ def _metric_gate_distribution(events):
280
+ """Per-run gate-failure counts + which-gate breakdown, then a distribution.
281
+
282
+ Each gate_failure event carries gate=<name>. We tally per run, then report
283
+ median / p90 of the per-run failure counts across instrumented runs, plus
284
+ the aggregate which-gate breakdown. Denominator = instrumented runs.
285
+ """
286
+ instrumented = set()
287
+ per_run_counts = {}
288
+ gate_breakdown = {}
289
+ for e in events:
290
+ et = e.get("type")
291
+ rid = e.get("run_id") or "unknown"
292
+ if et == EVENT_RUN_START:
293
+ instrumented.add(rid)
294
+ per_run_counts.setdefault(rid, 0)
295
+ elif et == EVENT_GATE_FAILURE:
296
+ instrumented.add(rid)
297
+ per_run_counts[rid] = per_run_counts.get(rid, 0) + 1
298
+ gate = str(e.get("gate") or "unknown")
299
+ gate_breakdown[gate] = gate_breakdown.get(gate, 0) + 1
300
+ n = len(instrumented)
301
+ if n == 0:
302
+ return {
303
+ "available": False,
304
+ "reason": "no instrumented runs yet (no run_start/gate_failure "
305
+ "events). Distinct from a measured 0 failures.",
306
+ }
307
+ counts = sorted(per_run_counts.get(rid, 0) for rid in instrumented)
308
+ total_failures = sum(counts)
309
+ return {
310
+ "available": True,
311
+ "instrumented_runs": n,
312
+ "total_gate_failures": total_failures,
313
+ "per_run_median": _median(counts),
314
+ "per_run_p90": _percentile(counts, 90),
315
+ "per_run_max": counts[-1] if counts else 0,
316
+ "gate_breakdown": dict(sorted(gate_breakdown.items())),
317
+ }
318
+
319
+
320
+ # ---------------------------------------------------------------------------
321
+ # Metric 3: council rejection / split rate
322
+ # ---------------------------------------------------------------------------
323
+
324
+ def _metric_council(events):
325
+ """Rejection rate and split-verdict-among-rejections rate over all votes.
326
+
327
+ Each council_vote event carries approve, reject, result. A "split" reject
328
+ is a REJECTED vote that still had at least one approver (approve > 0).
329
+ Denominator = total recorded council votes.
330
+ """
331
+ votes = [e for e in events if e.get("type") == EVENT_COUNCIL_VOTE]
332
+ total = len(votes)
333
+ if total == 0:
334
+ return {
335
+ "available": False,
336
+ "reason": "no council votes recorded in trust-events.jsonl. "
337
+ "Distinct from a measured 0% rejection rate.",
338
+ }
339
+ rejected = 0
340
+ split_rejected = 0
341
+ for v in votes:
342
+ result = str(v.get("result") or "").strip().upper()
343
+ approve = _to_int(v.get("approve"), 0)
344
+ is_reject = result.startswith("REJECT") or (
345
+ not result and _to_int(v.get("reject"), 0) > approve
346
+ )
347
+ if is_reject:
348
+ rejected += 1
349
+ if approve > 0:
350
+ split_rejected += 1
351
+ return {
352
+ "available": True,
353
+ "total_votes": total,
354
+ "rejected_votes": rejected,
355
+ "rejection_rate": round(rejected / total, 4),
356
+ "split_rejected_votes": split_rejected,
357
+ # Split rate is reported as a fraction OF the rejections (per the doc:
358
+ # "of those, the split-verdict fraction").
359
+ "split_rate_of_rejections": (
360
+ round(split_rejected / rejected, 4) if rejected else None
361
+ ),
362
+ }
363
+
364
+
365
+ # ---------------------------------------------------------------------------
366
+ # Metric 4: cost-per-verified-task (local denominator)
367
+ # ---------------------------------------------------------------------------
368
+
369
+ def _proof_cost(proof):
370
+ """Return (usd, total_tokens) from a proof's cost block, or (None, None)."""
371
+ cost = _obj(proof.get("cost"))
372
+ usd = cost.get("usd")
373
+ try:
374
+ usd = float(usd)
375
+ except (TypeError, ValueError):
376
+ usd = None
377
+ tokens = cost.get("total_tokens")
378
+ if tokens is None:
379
+ # Some proofs carry input/output separately.
380
+ it = cost.get("input_tokens")
381
+ ot = cost.get("output_tokens")
382
+ if it is not None or ot is not None:
383
+ tokens = _to_int(it, 0) + _to_int(ot, 0)
384
+ try:
385
+ tokens = int(tokens) if tokens is not None else None
386
+ except (TypeError, ValueError):
387
+ tokens = None
388
+ return usd, tokens
389
+
390
+
391
+ def _metric_cost_per_verified(proofs):
392
+ """Sum cost over verified runs / count(verified). Local denominator only.
393
+
394
+ A "verified" run = proof.json council.final_verdict is a pass token.
395
+ Honesty: if NO proof carries cost, available=False (we never divide a
396
+ fabricated 0 cost). If cost exists but no run is verified, we say so
397
+ explicitly rather than dividing by zero.
398
+ """
399
+ verified_with_cost = 0
400
+ usd_sum = 0.0
401
+ usd_seen = False
402
+ tokens_sum = 0
403
+ tokens_seen = False
404
+ verified_total = 0
405
+ any_cost = False
406
+
407
+ for p in proofs:
408
+ verdict_pass = _verdict_is_pass(_obj(p.get("council")).get("final_verdict"))
409
+ usd, tokens = _proof_cost(p)
410
+ if usd is not None or tokens is not None:
411
+ any_cost = True
412
+ if verdict_pass:
413
+ verified_total += 1
414
+ if usd is not None or tokens is not None:
415
+ verified_with_cost += 1
416
+ if usd is not None:
417
+ usd_sum += usd
418
+ usd_seen = True
419
+ if tokens is not None:
420
+ tokens_sum += tokens
421
+ tokens_seen = True
422
+
423
+ if not any_cost:
424
+ return {
425
+ "available": False,
426
+ "reason": "no proof.json carries cost data (cost never collected "
427
+ "for any run). Distinct from a measured $0.",
428
+ "note": NOTE_EXTERNAL_GRADER,
429
+ }
430
+ if verified_with_cost == 0:
431
+ return {
432
+ "available": False,
433
+ "reason": "cost data exists but no run is verified-complete "
434
+ "(council.final_verdict pass) with cost; cannot divide.",
435
+ "verified_runs": verified_total,
436
+ "note": NOTE_EXTERNAL_GRADER,
437
+ }
438
+ return {
439
+ "available": True,
440
+ "verified_runs": verified_total,
441
+ "verified_runs_with_cost": verified_with_cost,
442
+ "total_usd": round(usd_sum, 6) if usd_seen else None,
443
+ "total_tokens": tokens_sum if tokens_seen else None,
444
+ "usd_per_verified": (
445
+ round(usd_sum / verified_with_cost, 6) if usd_seen else None
446
+ ),
447
+ "tokens_per_verified": (
448
+ round(tokens_sum / verified_with_cost, 1) if tokens_seen else None
449
+ ),
450
+ "note": NOTE_EXTERNAL_GRADER,
451
+ }
452
+
453
+
454
+ # ---------------------------------------------------------------------------
455
+ # Top-level compute
456
+ # ---------------------------------------------------------------------------
457
+
458
+ def compute_trust_metrics(loki_dir):
459
+ events = _load_events(loki_dir)
460
+ proofs = _load_proofs(loki_dir)
461
+ run_starts = sum(1 for e in events if e.get("type") == EVENT_RUN_START)
462
+
463
+ return {
464
+ "schema_version": SCHEMA_VERSION,
465
+ "generated_at": _utc_now(),
466
+ "loki_dir": loki_dir,
467
+ "scope": "single-project",
468
+ "corpus": {
469
+ "events_total": len(events),
470
+ "instrumented_runs": run_starts,
471
+ "proofs_total": len(proofs),
472
+ },
473
+ "metrics": {
474
+ "evidence_block_rate": _metric_evidence_block(events),
475
+ "gate_failure_distribution": _metric_gate_distribution(events),
476
+ "council_rejection_rate": _metric_council(events),
477
+ "cost_per_verified_task": _metric_cost_per_verified(proofs),
478
+ },
479
+ }
480
+
481
+
482
+ def write_metrics_cache(loki_dir, m):
483
+ out_dir = os.path.join(loki_dir, "metrics")
484
+ out_path = os.path.join(out_dir, "trust-metrics.json")
485
+ try:
486
+ os.makedirs(out_dir, exist_ok=True)
487
+ with open(out_path, "w", encoding="utf-8") as fh:
488
+ json.dump(m, fh, indent=2)
489
+ return out_path
490
+ except Exception:
491
+ return None
492
+
493
+
494
+ def format_metrics_json(m):
495
+ return json.dumps(m, indent=2)
496
+
497
+
498
+ # ---------------------------------------------------------------------------
499
+ # Human formatting
500
+ # ---------------------------------------------------------------------------
501
+
502
+ def _fmt_pct(x):
503
+ return "n/a" if x is None else ("%.1f%%" % (x * 100.0))
504
+
505
+
506
+ def format_metrics_human(m):
507
+ lines = []
508
+ corpus = _obj(m.get("corpus"))
509
+ lines.append("Loki Mode Trust Metrics (snapshot at %s)" % m.get("generated_at"))
510
+ lines.append("Source: %s [single project]" % m.get("loki_dir"))
511
+ lines.append(
512
+ "Corpus: %d events, %d instrumented run(s), %d proof(s)"
513
+ % (corpus.get("events_total", 0),
514
+ corpus.get("instrumented_runs", 0),
515
+ corpus.get("proofs_total", 0))
516
+ )
517
+ lines.append("")
518
+ mt = _obj(m.get("metrics"))
519
+
520
+ # Metric 1
521
+ e = _obj(mt.get("evidence_block_rate"))
522
+ lines.append("1. Evidence-gate block rate")
523
+ if not e.get("available"):
524
+ lines.append(" not instrumented: %s" % e.get("reason"))
525
+ else:
526
+ lines.append(
527
+ " %s of runs caught an unproven 'done' claim "
528
+ "(%d/%d runs, %d block events) n=%d"
529
+ % (_fmt_pct(e.get("block_rate")),
530
+ e.get("runs_with_block", 0),
531
+ e.get("instrumented_runs", 0),
532
+ e.get("block_events_total", 0),
533
+ e.get("instrumented_runs", 0))
534
+ )
535
+ lines.append("")
536
+
537
+ # Metric 2
538
+ g = _obj(mt.get("gate_failure_distribution"))
539
+ lines.append("2. Gate failure distribution per run")
540
+ if not g.get("available"):
541
+ lines.append(" not instrumented: %s" % g.get("reason"))
542
+ else:
543
+ lines.append(
544
+ " per-run failures median=%s p90=%s max=%s "
545
+ "(total=%d over n=%d runs)"
546
+ % (g.get("per_run_median"), g.get("per_run_p90"),
547
+ g.get("per_run_max"), g.get("total_gate_failures", 0),
548
+ g.get("instrumented_runs", 0))
549
+ )
550
+ breakdown = _obj(g.get("gate_breakdown"))
551
+ if breakdown:
552
+ lines.append(" which gate fired:")
553
+ for name, cnt in breakdown.items():
554
+ lines.append(" %-28s %d" % (name, cnt))
555
+ lines.append("")
556
+
557
+ # Metric 3
558
+ c = _obj(mt.get("council_rejection_rate"))
559
+ lines.append("3. Council rejection / split rate")
560
+ if not c.get("available"):
561
+ lines.append(" not instrumented: %s" % c.get("reason"))
562
+ else:
563
+ lines.append(
564
+ " rejection rate %s (%d/%d votes)"
565
+ % (_fmt_pct(c.get("rejection_rate")),
566
+ c.get("rejected_votes", 0), c.get("total_votes", 0))
567
+ )
568
+ sr = c.get("split_rate_of_rejections")
569
+ lines.append(
570
+ " split verdicts among rejections: %s (%d/%d)"
571
+ % (_fmt_pct(sr) if sr is not None else "n/a",
572
+ c.get("split_rejected_votes", 0), c.get("rejected_votes", 0))
573
+ )
574
+ lines.append("")
575
+
576
+ # Metric 4
577
+ cv = _obj(mt.get("cost_per_verified_task"))
578
+ lines.append("4. Cost per VERIFIED task (local denominator)")
579
+ if not cv.get("available"):
580
+ lines.append(" not available: %s" % cv.get("reason"))
581
+ else:
582
+ usd = cv.get("usd_per_verified")
583
+ tok = cv.get("tokens_per_verified")
584
+ lines.append(
585
+ " $%s / verified run" % ("%.4f" % usd if usd is not None else "n/a")
586
+ + (" %.0f tokens / verified run" % tok if tok is not None else "")
587
+ )
588
+ lines.append(
589
+ " over %d verified run(s) with cost data"
590
+ % cv.get("verified_runs_with_cost", 0)
591
+ )
592
+ lines.append("")
593
+ lines.append("Honesty note: each metric reports its own n=. 'not instrumented'")
594
+ lines.append("means no source artifact exists yet, NOT a measured zero.")
595
+ return "\n".join(lines)
596
+
597
+
598
+ # ---------------------------------------------------------------------------
599
+ # CLI
600
+ # ---------------------------------------------------------------------------
601
+
602
+ def _resolve_loki_dir(argv):
603
+ for i, a in enumerate(argv):
604
+ if a == "--loki-dir" and i + 1 < len(argv):
605
+ return argv[i + 1]
606
+ if a.startswith("--loki-dir="):
607
+ return a.split("=", 1)[1]
608
+ env = os.environ.get("LOKI_DIR")
609
+ if env:
610
+ return env
611
+ return os.path.join(os.getcwd(), ".loki")
612
+
613
+
614
+ def main(argv=None):
615
+ argv = list(sys.argv[1:] if argv is None else argv)
616
+ as_json = "--json" in argv
617
+ no_cache = "--no-cache" in argv
618
+ if "--all-projects" in argv:
619
+ sys.stderr.write(
620
+ "loki trust-metrics: --all-projects is out of scope (single "
621
+ "project only). Run it inside each project directory.\n"
622
+ )
623
+ return 2
624
+ loki_dir = _resolve_loki_dir(argv)
625
+ m = compute_trust_metrics(loki_dir)
626
+ if not no_cache:
627
+ write_metrics_cache(loki_dir, m)
628
+ if as_json:
629
+ sys.stdout.write(format_metrics_json(m) + "\n")
630
+ else:
631
+ sys.stdout.write(format_metrics_human(m) + "\n")
632
+ return 0
633
+
634
+
635
+ if __name__ == "__main__":
636
+ raise SystemExit(main())