shkit 1.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (94) hide show
  1. healing_kit/__init__.py +3 -0
  2. healing_kit/auth.py +79 -0
  3. healing_kit/clients/__init__.py +1 -0
  4. healing_kit/clients/databricks_client.py +183 -0
  5. healing_kit/clients/teams_client.py +128 -0
  6. healing_kit/models/__init__.py +1 -0
  7. healing_kit/models/diagnosis.py +45 -0
  8. healing_kit/models/events.py +30 -0
  9. healing_kit/models/evidence.py +83 -0
  10. healing_kit/runtime/__init__.py +6 -0
  11. healing_kit/runtime/approval.py +141 -0
  12. healing_kit/runtime/maintenance.py +52 -0
  13. healing_kit/services/__init__.py +1 -0
  14. healing_kit/services/cache_service.py +120 -0
  15. healing_kit/services/circuit_breaker.py +114 -0
  16. healing_kit/services/context_agent.py +127 -0
  17. healing_kit/services/dependency_graph.py +141 -0
  18. healing_kit/services/diagnosis_engine.py +165 -0
  19. healing_kit/services/identity.py +61 -0
  20. healing_kit/services/model_router.py +52 -0
  21. healing_kit/services/query_guard.py +168 -0
  22. healing_kit/services/resolution_verifier.py +100 -0
  23. healing_kit/services/token_budget.py +137 -0
  24. healing_kit/utils/__init__.py +1 -0
  25. healing_kit/utils/error_hash.py +15 -0
  26. healing_kit/utils/hmac_tokens.py +86 -0
  27. healing_kit/utils/sql_safety.py +84 -0
  28. iic/__init__.py +51 -0
  29. iic/__main__.py +18 -0
  30. iic/_console.py +235 -0
  31. iic/_doctor.py +143 -0
  32. iic/change/__init__.py +7 -0
  33. iic/change/change_detector.py +154 -0
  34. iic/context/__init__.py +7 -0
  35. iic/context/context_builder.py +117 -0
  36. iic/dependency/__init__.py +7 -0
  37. iic/dependency/dependency_analyzer.py +93 -0
  38. iic/diagnosis/__init__.py +7 -0
  39. iic/diagnosis/diagnosis_engine.py +183 -0
  40. iic/dna/__init__.py +7 -0
  41. iic/dna/dna_builder.py +184 -0
  42. iic/impact/__init__.py +7 -0
  43. iic/impact/impact_engine.py +102 -0
  44. iic/ingestion/__init__.py +14 -0
  45. iic/ingestion/base.py +21 -0
  46. iic/ingestion/databricks_source.py +98 -0
  47. iic/ingestion/static_source.py +23 -0
  48. iic/ingestion/webhook_source.py +39 -0
  49. iic/models/__init__.py +44 -0
  50. iic/models/change.py +77 -0
  51. iic/models/context.py +46 -0
  52. iic/models/diagnosis.py +37 -0
  53. iic/models/dna.py +77 -0
  54. iic/models/event.py +78 -0
  55. iic/models/impact.py +60 -0
  56. iic/models/report.py +88 -0
  57. iic/models/routing.py +41 -0
  58. iic/notify/__init__.py +7 -0
  59. iic/notify/teams_notifier.py +112 -0
  60. iic/report/__init__.py +7 -0
  61. iic/report/report_generator.py +67 -0
  62. iic/routing/__init__.py +7 -0
  63. iic/routing/router.py +42 -0
  64. iic/runtime/__init__.py +10 -0
  65. iic/runtime/_sql.py +11 -0
  66. iic/runtime/agent_config.py +48 -0
  67. iic/runtime/agent_runtime.py +70 -0
  68. iic/runtime/antibodies.py +100 -0
  69. iic/runtime/bootstrap.py +157 -0
  70. iic/runtime/constants.py +40 -0
  71. iic/runtime/context.py +46 -0
  72. iic/runtime/detective.py +72 -0
  73. iic/runtime/hooks.py +85 -0
  74. iic/runtime/incident_engine.py +207 -0
  75. iic/runtime/inprocess.py +350 -0
  76. iic/runtime/ledger.py +120 -0
  77. iic/runtime/monitor.py +155 -0
  78. iic/runtime/pattern_store.py +53 -0
  79. iic/runtime/reconciler.py +139 -0
  80. iic/runtime/scope_config.py +127 -0
  81. iic/runtime/store.py +150 -0
  82. iic/runtime/wrapper.py +28 -0
  83. iic_autoload.pth +1 -0
  84. onboarding/__init__.py +1 -0
  85. onboarding/cli.py +168 -0
  86. onboarding/config_schema.py +62 -0
  87. onboarding/manifest.py +27 -0
  88. onboarding/preflight.py +129 -0
  89. onboarding/provisioner.py +573 -0
  90. onboarding/rollback.py +81 -0
  91. shkit-1.2.0.dist-info/METADATA +239 -0
  92. shkit-1.2.0.dist-info/RECORD +94 -0
  93. shkit-1.2.0.dist-info/WHEEL +4 -0
  94. shkit-1.2.0.dist-info/entry_points.txt +2 -0
@@ -0,0 +1,137 @@
1
+ """Token budget enforcer — cost governance for LLM calls."""
2
+
3
+ from dataclasses import dataclass
4
+ from datetime import datetime
5
+ from enum import Enum
6
+
7
+
8
+ class BudgetMode(str, Enum):
9
+ NORMAL = "normal"
10
+ WARNING = "warning"
11
+ DEGRADED = "degraded"
12
+
13
+
14
+ @dataclass
15
+ class BudgetState:
16
+ """Current budget window state."""
17
+
18
+ window_id: str
19
+ tokens_used: int
20
+ cost_usd: float
21
+ hourly_budget: int
22
+ current_mode: BudgetMode
23
+ spend_pct: float
24
+
25
+
26
+ class TokenBudgetEnforcer:
27
+ """
28
+ Tracks hourly LLM token spend and enforces mode transitions.
29
+
30
+ Modes:
31
+ - Normal (< 70% budget): Full AI healing active
32
+ - Warning (70-90%): Lightweight model only
33
+ - Degraded (> 90%): No LLM calls, log + page on-call
34
+ """
35
+
36
+ def __init__(self, spark_session, catalog: str, schema: str = "healing_schema", hourly_budget: int = 50000):
37
+ self.spark = spark_session
38
+ self.table = f"{catalog}.{schema}.token_budget"
39
+ self.hourly_budget = hourly_budget
40
+
41
+ def _current_window_id(self) -> str:
42
+ """Get the current hourly window ID (YYYY-MM-DD-HH)."""
43
+ return datetime.utcnow().strftime("%Y-%m-%d-%H")
44
+
45
+ def get_current_state(self) -> BudgetState:
46
+ """Get the current budget window state."""
47
+ window_id = self._current_window_id()
48
+ df = self.spark.sql(f"""
49
+ SELECT window_id, tokens_used, cost_usd, hourly_budget, current_mode
50
+ FROM {self.table}
51
+ WHERE window_id = '{window_id}'
52
+ """)
53
+ rows = df.collect()
54
+
55
+ if not rows:
56
+ return BudgetState(
57
+ window_id=window_id,
58
+ tokens_used=0,
59
+ cost_usd=0.0,
60
+ hourly_budget=self.hourly_budget,
61
+ current_mode=BudgetMode.NORMAL,
62
+ spend_pct=0.0,
63
+ )
64
+
65
+ row = rows[0]
66
+ tokens = row["tokens_used"]
67
+ budget = row["hourly_budget"]
68
+ spend_pct = (tokens / budget * 100) if budget > 0 else 0
69
+
70
+ return BudgetState(
71
+ window_id=row["window_id"],
72
+ tokens_used=tokens,
73
+ cost_usd=row["cost_usd"],
74
+ hourly_budget=budget,
75
+ current_mode=BudgetMode(row["current_mode"]),
76
+ spend_pct=spend_pct,
77
+ )
78
+
79
+ def get_current_mode(self) -> BudgetMode:
80
+ """Return current mode based on spend percentage."""
81
+ state = self.get_current_state()
82
+ return self._compute_mode(state.spend_pct)
83
+
84
+ def can_invoke_model(self, model_tier: str) -> bool:
85
+ """
86
+ Check if a model invocation is allowed under current budget mode.
87
+
88
+ model_tier: 'lightweight' or 'powerful'
89
+ """
90
+ mode = self.get_current_mode()
91
+ if mode == BudgetMode.DEGRADED:
92
+ return False
93
+ if mode == BudgetMode.WARNING and model_tier == "powerful":
94
+ return False
95
+ return True
96
+
97
+ def record_usage(self, tokens_used: int, cost_usd: float) -> BudgetMode:
98
+ """
99
+ Record token usage and return the resulting mode.
100
+ Creates the window entry if it doesn't exist.
101
+ """
102
+ window_id = self._current_window_id()
103
+ now = datetime.utcnow().isoformat()
104
+
105
+ self.spark.sql(f"""
106
+ MERGE INTO {self.table} AS target
107
+ USING (SELECT '{window_id}' AS window_id) AS source
108
+ ON target.window_id = source.window_id
109
+ WHEN MATCHED THEN UPDATE SET
110
+ tokens_used = target.tokens_used + {tokens_used},
111
+ cost_usd = target.cost_usd + {cost_usd},
112
+ updated_at = '{now}'
113
+ WHEN NOT MATCHED THEN INSERT
114
+ (window_id, window_start, window_end, tokens_used, cost_usd, hourly_budget, current_mode, created_at, updated_at)
115
+ VALUES ('{window_id}', '{now}', '{now}', {tokens_used}, {cost_usd}, {self.hourly_budget}, 'normal', '{now}', '{now}')
116
+ """)
117
+
118
+ # Evaluate and update mode
119
+ state = self.get_current_state()
120
+ new_mode = self._compute_mode(state.spend_pct)
121
+
122
+ if new_mode != state.current_mode:
123
+ self.spark.sql(f"""
124
+ UPDATE {self.table}
125
+ SET current_mode = '{new_mode.value}', mode_changed_at = '{now}', updated_at = '{now}'
126
+ WHERE window_id = '{window_id}'
127
+ """)
128
+
129
+ return new_mode
130
+
131
+ def _compute_mode(self, spend_pct: float) -> BudgetMode:
132
+ """Determine mode from spend percentage."""
133
+ if spend_pct >= 90:
134
+ return BudgetMode.DEGRADED
135
+ elif spend_pct >= 70:
136
+ return BudgetMode.WARNING
137
+ return BudgetMode.NORMAL
@@ -0,0 +1 @@
1
+ """Utility functions."""
@@ -0,0 +1,15 @@
1
+ """SHA-256 error fingerprinting for resolution cache keys."""
2
+
3
+ import hashlib
4
+
5
+
6
+ def compute_error_hash(error_type: str, notebook_path: str, affected_tables: list[str]) -> str:
7
+ """
8
+ Compute a deterministic SHA-256 hash of the failure fingerprint.
9
+
10
+ The hash is built from: error_type + notebook_path + sorted affected table names.
11
+ This ensures the same failure pattern always produces the same cache key.
12
+ """
13
+ sorted_tables = sorted(t.strip() for t in affected_tables if t.strip())
14
+ payload = f"{error_type}||{notebook_path}||{'|'.join(sorted_tables)}"
15
+ return hashlib.sha256(payload.encode("utf-8")).hexdigest()
@@ -0,0 +1,86 @@
1
+ """HMAC-SHA256 signed one-time tokens for HITL approval flow."""
2
+
3
+ import hashlib
4
+ import hmac
5
+ import json
6
+ import time
7
+ from dataclasses import dataclass
8
+
9
+ TOKEN_TTL_SECONDS = 900 # 15 minutes
10
+
11
+
12
+ @dataclass
13
+ class TokenPayload:
14
+ """Decoded token payload."""
15
+
16
+ run_id: str
17
+ approver_email: str
18
+ action_id: str
19
+ issued_at: float
20
+ expires_at: float
21
+
22
+
23
+ def generate_token(run_id: str, approver_email: str, action_id: str, secret_key: bytes) -> str:
24
+ """
25
+ Generate a signed HMAC-SHA256 token with 15-minute TTL.
26
+
27
+ The token encodes: run_id + approver_email + action_id + timestamp.
28
+ """
29
+ issued_at = time.time()
30
+ expires_at = issued_at + TOKEN_TTL_SECONDS
31
+
32
+ payload = json.dumps({
33
+ "run_id": run_id,
34
+ "approver_email": approver_email,
35
+ "action_id": action_id,
36
+ "issued_at": issued_at,
37
+ "expires_at": expires_at,
38
+ }, separators=(",", ":"))
39
+
40
+ signature = hmac.HMAC(secret_key, payload.encode(), hashlib.sha256).hexdigest()
41
+ # Token = base64-like: payload_hex.signature
42
+ payload_hex = payload.encode().hex()
43
+ return f"{payload_hex}.{signature}"
44
+
45
+
46
+ def validate_token(token: str, secret_key: bytes) -> TokenPayload:
47
+ """
48
+ Validate a signed token. Raises ValueError if invalid or expired.
49
+
50
+ Checks:
51
+ 1. Token format is valid
52
+ 2. HMAC signature matches
53
+ 3. Token has not expired (15-min TTL)
54
+ """
55
+ parts = token.split(".")
56
+ if len(parts) != 2:
57
+ raise ValueError("Invalid token format")
58
+
59
+ payload_hex, signature = parts
60
+
61
+ # Reconstruct payload
62
+ try:
63
+ payload_bytes = bytes.fromhex(payload_hex)
64
+ payload_str = payload_bytes.decode()
65
+ except (ValueError, UnicodeDecodeError):
66
+ raise ValueError("Invalid token encoding")
67
+
68
+ # Verify signature
69
+ expected_sig = hmac.HMAC(secret_key, payload_bytes, hashlib.sha256).hexdigest()
70
+ if not hmac.compare_digest(signature, expected_sig):
71
+ raise ValueError("Invalid token signature — possible tampering")
72
+
73
+ # Parse payload
74
+ data = json.loads(payload_str)
75
+
76
+ # Check expiry
77
+ if time.time() > data["expires_at"]:
78
+ raise ValueError(f"Token expired at {data['expires_at']}")
79
+
80
+ return TokenPayload(
81
+ run_id=data["run_id"],
82
+ approver_email=data["approver_email"],
83
+ action_id=data["action_id"],
84
+ issued_at=data["issued_at"],
85
+ expires_at=data["expires_at"],
86
+ )
@@ -0,0 +1,84 @@
1
+ """Safe SQL helpers — literal escaping and a word-boundary SELECT guard.
2
+
3
+ These exist because the runtime previously (a) interpolated LLM/free-text output
4
+ directly into INSERT statements and (b) used a substring keyword blocklist that
5
+ both let crafted statements through and false-positived on legitimate columns
6
+ like ``created_at`` (contains "CREATE") and ``updated_at`` (contains "UPDATE").
7
+ """
8
+
9
+ import re
10
+
11
+ # DDL/DML keywords that must never appear in a diagnostic query. Matched as whole
12
+ # words (see ``is_safe_select``) so ``created_at`` / ``updated_at`` are allowed.
13
+ _FORBIDDEN_KEYWORDS = (
14
+ "INSERT", "UPDATE", "DELETE", "DROP", "ALTER", "TRUNCATE",
15
+ "CREATE", "MERGE", "GRANT", "REVOKE", "REPLACE", "COPY",
16
+ "CALL", "EXECUTE", "SET", "USE",
17
+ )
18
+
19
+ _KEYWORD_RE = re.compile(
20
+ r"\b(" + "|".join(_FORBIDDEN_KEYWORDS) + r")\b", re.IGNORECASE
21
+ )
22
+ # A SQL line/block comment can be used to smuggle a second statement past naive
23
+ # checks; reject anything containing comment markers.
24
+ _COMMENT_RE = re.compile(r"(--|/\*|\*/|#)")
25
+
26
+
27
+ def sql_literal(value) -> str:
28
+ """Render a Python value as a safe SQL literal.
29
+
30
+ - ``None`` -> ``NULL``
31
+ - ``bool`` -> ``TRUE``/``FALSE``
32
+ - ``int``/``float`` -> bare numeric
33
+ - everything else -> single-quoted string with quotes doubled
34
+ """
35
+ if value is None:
36
+ return "NULL"
37
+ if isinstance(value, bool):
38
+ return "TRUE" if value else "FALSE"
39
+ if isinstance(value, (int, float)):
40
+ return repr(value)
41
+ return "'" + str(value).replace("'", "''") + "'"
42
+
43
+
44
+ # Short alias used by the runtime / notebook code paths.
45
+ sql_lit = sql_literal
46
+
47
+
48
+ def is_safe_select(query: str) -> bool:
49
+ """Return True only if ``query`` is a single read-only SELECT statement.
50
+
51
+ Rejects: empty/non-SELECT queries, any forbidden DDL/DML keyword (whole-word),
52
+ multiple statements (a non-trailing semicolon), and SQL comments.
53
+ """
54
+ if not query or not isinstance(query, str):
55
+ return False
56
+
57
+ stripped = query.strip().rstrip(";").strip()
58
+ if not stripped:
59
+ return False
60
+
61
+ # No comments (could hide a second statement or smuggle keywords).
62
+ if _COMMENT_RE.search(stripped):
63
+ return False
64
+
65
+ # Only one statement allowed: no semicolons left after stripping a trailing one.
66
+ if ";" in stripped:
67
+ return False
68
+
69
+ upper = stripped.upper()
70
+ if not (upper.startswith("SELECT") or upper.startswith("WITH")):
71
+ return False
72
+
73
+ if _KEYWORD_RE.search(stripped):
74
+ return False
75
+
76
+ return True
77
+
78
+
79
+ def ensure_limit(query: str, limit: int = 20) -> str:
80
+ """Append a LIMIT clause if the query doesn't already have one."""
81
+ stripped = query.strip().rstrip(";")
82
+ if re.search(r"\bLIMIT\b", stripped, re.IGNORECASE):
83
+ return stripped
84
+ return f"{stripped} LIMIT {limit}"
iic/__init__.py ADDED
@@ -0,0 +1,51 @@
1
+ """Incident Intelligence Core (IIC).
2
+
3
+ A deterministic + AI hybrid engine that converts raw data-pipeline failures into
4
+ structured, prioritized, explainable incident knowledge.
5
+
6
+ Design principles
7
+ -----------------
8
+ 1. Deterministic first, AI second — the LLM only runs after the full structured
9
+ context (the :class:`~iic.models.IncidentDNA`) has been built.
10
+ 2. Every failure becomes a structured object — one ``IncidentReport`` per root cause.
11
+ 3. No external workflow dependencies — Databricks + logs only, no Jira/ServiceNow.
12
+ 4. No "chatty AI" — the diagnosis engine returns structured output only.
13
+ 5. Every decision is traceable from evidence.
14
+
15
+ The 11-stage pipeline lives in :mod:`iic.runtime.incident_engine`.
16
+ """
17
+
18
+ from __future__ import annotations
19
+
20
+ __version__ = "1.2.0"
21
+
22
+ # NOTE: `import iic` is intentionally lightweight and does NOT arm anything.
23
+ # The self-arming tripwire is installed by `iic_autoload.pth` at interpreter
24
+ # startup (it runs `import iic.runtime.bootstrap`). See docs/SELF_ARMING.md.
25
+
26
+ # Operator/console entry points — thin lazy wrappers so `import iic` stays light
27
+ # (the heavy bits load only when these are actually called from a notebook).
28
+
29
+ def console(*args, **kwargs):
30
+ """Fold pending occurrences and render the antibody ledger."""
31
+ from iic._console import console as _fn
32
+ return _fn(*args, **kwargs)
33
+
34
+
35
+ def console_record(*args, **kwargs):
36
+ """Record a human-confirmed resolution for a pattern (append-only)."""
37
+ from iic._console import console_record as _fn
38
+ return _fn(*args, **kwargs)
39
+
40
+
41
+ def onboard(*args, **kwargs):
42
+ """One-time first-run setup: secret scope, keys, ACL, dirs, doctor, test card."""
43
+ from iic._console import onboard as _fn
44
+ return _fn(*args, **kwargs)
45
+
46
+
47
+ def doctor(*args, **kwargs):
48
+ """Verify the secret scope, keys, volume write, and webhook. Exit 0 iff healthy."""
49
+ from iic._doctor import doctor as _fn
50
+ return _fn(*args, **kwargs)
51
+
iic/__main__.py ADDED
@@ -0,0 +1,18 @@
1
+ """``python -m iic doctor [--check-principal <name>]`` — the support entry point."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import sys
6
+
7
+
8
+ def main(argv=None) -> int:
9
+ argv = argv if argv is not None else sys.argv[1:]
10
+ if argv and argv[0] == "doctor":
11
+ from iic._doctor import main as doctor_main
12
+ return doctor_main(argv[1:])
13
+ print("usage: python -m iic doctor [--check-principal <name>]")
14
+ return 2
15
+
16
+
17
+ if __name__ == "__main__":
18
+ sys.exit(main())
iic/_console.py ADDED
@@ -0,0 +1,235 @@
1
+ """In-workspace console (``iic.console``) and first-run onboarding (``iic.onboard``).
2
+
3
+ Runs from a Databricks notebook on serverless. The console is the ONLY writer of
4
+ ``antibodies.yaml`` besides a human editing it: it folds ``.iic_pending/*`` into the
5
+ ledger with the canonical merge rule (:mod:`iic.runtime.ledger`), renders the ledger,
6
+ and records resolutions (append-only). ``onboard`` does the one-time setup: create
7
+ the ``iic`` secret scope, write the keys, grant READ, create the volume dirs, run
8
+ doctor, and send a test card. Everything is best-effort / fail-open.
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ import glob
14
+ import json
15
+ import os
16
+
17
+ from iic.runtime.antibodies import load_ledger
18
+ from iic.runtime.constants import (
19
+ ANTIBODIES_FILENAME,
20
+ DEFAULT_SECRET_SCOPE,
21
+ PENDING_DIRNAME,
22
+ )
23
+ from iic.runtime.ledger import dump_ledger, merge_ledgers
24
+
25
+ # ── ledger console ──
26
+
27
+ def _resolve_base(base_dir):
28
+ if base_dir:
29
+ return base_dir
30
+ try:
31
+ from iic.runtime.scope_config import load_settings
32
+ return (load_settings() or {}).get("volume_path")
33
+ except Exception:
34
+ return None
35
+
36
+
37
+ def _read_markers(base_dir):
38
+ out = []
39
+ try:
40
+ for p in glob.glob(os.path.join(base_dir, PENDING_DIRNAME, "*.json")):
41
+ try:
42
+ with open(p) as f:
43
+ out.append((p, json.load(f)))
44
+ except Exception:
45
+ continue
46
+ except Exception:
47
+ pass
48
+ return out
49
+
50
+
51
+ def _write_ledger(base_dir, ledger):
52
+ with open(os.path.join(base_dir, ANTIBODIES_FILENAME), "w") as f:
53
+ f.write(dump_ledger(ledger))
54
+
55
+
56
+ def fold(base_dir=None) -> dict:
57
+ """Fold ``.iic_pending/*`` into ``antibodies.yaml`` (the only non-human writer)."""
58
+ base = _resolve_base(base_dir)
59
+ if not base:
60
+ raise RuntimeError("no volume_path — configure the 'iic' secret scope or pass base_dir")
61
+ existing = load_ledger(base)
62
+ markers = _read_markers(base)
63
+ merged, stats = merge_ledgers(existing, {}, [m for _, m in markers])
64
+ _write_ledger(base, merged)
65
+ for p, _ in markers:
66
+ try:
67
+ os.remove(p)
68
+ except Exception:
69
+ pass
70
+ fresh = stats["new_keys"]
71
+ print(f"[console] folded {len(markers)} occurrence(s); "
72
+ + (f"new pattern(s): {fresh}" if fresh else "no new patterns"))
73
+ return merged
74
+
75
+
76
+ def render(ledger: dict) -> str:
77
+ if not ledger:
78
+ return "No patterns recorded yet. Break something to see your first incident."
79
+ resolved = [(k, e) for k, e in ledger.items() if str((e or {}).get("resolution", "")).strip()]
80
+ unresolved = [(k, e) for k, e in ledger.items() if not str((e or {}).get("resolution", "")).strip()]
81
+ lines = [f"Antibody ledger — {len(ledger)} pattern(s): "
82
+ f"{len(resolved)} resolved, {len(unresolved)} awaiting a fix", ""]
83
+ if unresolved:
84
+ lines.append("⚠️ Awaiting resolution (record a fix for these):")
85
+ for k, e in sorted(unresolved, key=lambda kv: -_int((kv[1] or {}).get("times_seen"))):
86
+ lines.append(f" • {k} (seen {_int((e or {}).get('times_seen'))}×) "
87
+ f"e.g. {(e or {}).get('example', '')}")
88
+ lines.append("")
89
+ if resolved:
90
+ lines.append("♻️ Resolved (these fixes show on the card):")
91
+ for k, e in sorted(resolved):
92
+ lines.append(f" • {k} → {(e or {}).get('resolution', '')}")
93
+ return "\n".join(lines)
94
+
95
+
96
+ def record_resolution(pattern_id, resolution, *, base_dir=None, overwrite=False) -> bool:
97
+ """Append-only: set a resolution for a pattern. Refuses to overwrite a non-empty
98
+ resolution unless ``overwrite=True``. Returns True if written."""
99
+ base = _resolve_base(base_dir)
100
+ if not base:
101
+ raise RuntimeError("no volume_path — configure the 'iic' secret scope or pass base_dir")
102
+ ledger = load_ledger(base)
103
+ entry = dict(ledger.get(pattern_id) or {})
104
+ if str(entry.get("resolution", "")).strip() and not overwrite:
105
+ print(f"[console] '{pattern_id}' already has a resolution; pass overwrite=True to replace it")
106
+ return False
107
+ entry.setdefault("times_seen", entry.get("times_seen", 0))
108
+ entry["resolution"] = resolution
109
+ ledger[pattern_id] = entry
110
+ _write_ledger(base, ledger)
111
+ print(f"[console] recorded resolution for '{pattern_id}'")
112
+ return True
113
+
114
+
115
+ def console(base_dir=None) -> dict:
116
+ """Fold pending occurrences, then render the ledger. Returns the ledger dict."""
117
+ ledger = fold(base_dir)
118
+ print(render(ledger))
119
+ print("\nRecord a fix: iic.console_record('<pattern_id>', 'the fix that worked')")
120
+ return ledger
121
+
122
+
123
+ def console_record(pattern_id, resolution, *, base_dir=None, overwrite=False) -> bool:
124
+ """Alias matching the hint printed by console()."""
125
+ return record_resolution(pattern_id, resolution, base_dir=base_dir, overwrite=overwrite)
126
+
127
+
128
+ def _int(x) -> int:
129
+ try:
130
+ return int(x or 0)
131
+ except Exception:
132
+ return 0
133
+
134
+
135
+ # ── onboarding ──
136
+
137
+ _SCOPE_KEYS = ("teams_webhook", "volume_path", "host", "pat",
138
+ "github_repo", "github_dispatch_token", "dedup_ttl_seconds")
139
+
140
+
141
+ def _workspace_client():
142
+ try:
143
+ from databricks.sdk import WorkspaceClient
144
+ return WorkspaceClient()
145
+ except Exception:
146
+ return None
147
+
148
+
149
+ def _prompt_answers() -> dict: # pragma: no cover - interactive
150
+ print("IIC onboarding — answer a few questions (blank to skip optional ones):")
151
+ a = {
152
+ "teams_webhook": input(" Teams webhook URL (required): ").strip(),
153
+ "volume_path": input(" Volume path for memory, e.g. /Volumes/cat/sch/libs (required): ").strip(),
154
+ "host": input(" Workspace host for 'View Run' links (optional): ").strip(),
155
+ "pat": input(" Workspace PAT to enable enrichment (optional): ").strip(),
156
+ "github_repo": input(" GitHub owner/repo for incident archiving (optional): ").strip(),
157
+ "github_dispatch_token": input(" GitHub dispatch token (optional): ").strip(),
158
+ }
159
+ principals = input(" Group/SP names to grant READ (comma-separated): ").strip()
160
+ a["read_principals"] = [p.strip() for p in principals.split(",") if p.strip()]
161
+ return a
162
+
163
+
164
+ def _send_test_card(webhook) -> None:
165
+ if not webhook:
166
+ return
167
+ try:
168
+ import requests
169
+ card = {"type": "message", "attachments": [{
170
+ "contentType": "application/vnd.microsoft.card.adaptive",
171
+ "content": {"type": "AdaptiveCard", "version": "1.4",
172
+ "body": [{"type": "TextBlock", "weight": "Bolder",
173
+ "text": "✅ IIC connected — you'll see incident cards here."}]}}]}
174
+ requests.post(webhook, json=card, headers={"Content-Type": "application/json"}, timeout=5)
175
+ except Exception:
176
+ pass
177
+
178
+
179
+ def onboard(*, scope=None, answers=None) -> bool: # pragma: no cover - Databricks/interactive
180
+ """One-time setup: create the secret scope, write keys, grant READ, create the
181
+ volume dirs, run doctor, and send a test card. Pass ``answers`` to skip prompts."""
182
+ scope = scope or os.environ.get("IIC_SECRET_SCOPE", DEFAULT_SECRET_SCOPE)
183
+ a = answers or _prompt_answers()
184
+ if not (a.get("teams_webhook") and a.get("volume_path")):
185
+ print("❌ teams_webhook and volume_path are required.")
186
+ return False
187
+ w = _workspace_client()
188
+ if w is None:
189
+ print("❌ could not construct a Databricks WorkspaceClient — run this in your workspace.")
190
+ return False
191
+
192
+ try:
193
+ existing = [s.name for s in w.secrets.list_scopes()]
194
+ if scope not in existing:
195
+ w.secrets.create_scope(scope=scope)
196
+ print(f"✅ created secret scope '{scope}'")
197
+ else:
198
+ print(f"✅ secret scope '{scope}' already exists")
199
+ except Exception as ex:
200
+ print(f"⚠️ could not list/create scope ({str(ex)[:120]}); assuming it exists")
201
+
202
+ for key in _SCOPE_KEYS:
203
+ val = a.get(key)
204
+ if val:
205
+ try:
206
+ w.secrets.put_secret(scope=scope, key=key, string_value=str(val))
207
+ except Exception as ex:
208
+ print(f"⚠️ put_secret {key} failed: {str(ex)[:100]}")
209
+ print(f"✅ wrote secrets to scope '{scope}'")
210
+
211
+ for principal in a.get("read_principals", []):
212
+ try:
213
+ from databricks.sdk.service.workspace import AclPermission
214
+ w.secrets.put_acl(scope=scope, principal=principal, permission=AclPermission.READ)
215
+ print(f"✅ granted READ on '{scope}' to {principal}")
216
+ except Exception as ex:
217
+ print(f"⚠️ put_acl {principal} failed: {str(ex)[:100]}")
218
+ print("ℹ️ EVERY identity that runs monitored jobs must have READ on this scope, "
219
+ "or the agent can't load config there.")
220
+
221
+ vol = a.get("volume_path")
222
+ for d in (vol, os.path.join(vol, PENDING_DIRNAME), os.path.join(vol, ".iic_seen")):
223
+ try:
224
+ os.makedirs(d, exist_ok=True)
225
+ except Exception:
226
+ pass
227
+
228
+ try:
229
+ from iic._doctor import doctor
230
+ doctor()
231
+ except Exception:
232
+ pass
233
+ _send_test_card(a.get("teams_webhook"))
234
+ print("✅ onboarding complete — break something to see your first card.")
235
+ return True