@smilintux/skcapstone 0.3.1 → 0.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/docs/CUSTOM_AGENT.md +184 -0
- package/docs/GETTING_STARTED.md +3 -0
- package/package.json +1 -1
- package/scripts/archive-sessions.sh +72 -0
- package/scripts/nvidia-proxy.mjs +79 -15
- package/scripts/telegram-catchup-all.sh +136 -0
- package/src/skcapstone/blueprints/builtins/itil-operations.yaml +40 -0
- package/src/skcapstone/cli/__init__.py +2 -0
- package/src/skcapstone/cli/itil.py +434 -0
- package/src/skcapstone/coordination.py +1 -0
- package/src/skcapstone/itil.py +1104 -0
- package/src/skcapstone/mcp_server.py +258 -0
- package/src/skcapstone/mcp_tools/__init__.py +2 -0
- package/src/skcapstone/mcp_tools/gtd_tools.py +1 -1
- package/src/skcapstone/mcp_tools/itil_tools.py +657 -0
- package/src/skcapstone/scheduled_tasks.py +62 -0
- package/src/skcapstone/service_health.py +81 -2
|
@@ -0,0 +1,1104 @@
|
|
|
1
|
+
"""
|
|
2
|
+
SKCapstone ITIL Service Management — Incident, Problem, and Change Management.
|
|
3
|
+
|
|
4
|
+
Conflict-free design: each ITIL record has a ``managed_by`` field — only that
|
|
5
|
+
agent writes to the file. CAB votes use per-agent files to avoid conflicts.
|
|
6
|
+
|
|
7
|
+
Directory layout:
|
|
8
|
+
~/.skcapstone/coordination/itil/
|
|
9
|
+
├── incidents/ # One JSON per incident (managed_by agent owns it)
|
|
10
|
+
├── problems/ # One JSON per problem
|
|
11
|
+
├── changes/ # One JSON per RFC
|
|
12
|
+
├── kedb/ # Known Error Database entries
|
|
13
|
+
├── cab-decisions/ # Per-agent CAB vote files (conflict-free)
|
|
14
|
+
└── ITIL-BOARD.md # Auto-generated overview
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
from __future__ import annotations
|
|
18
|
+
|
|
19
|
+
import json
|
|
20
|
+
import logging
|
|
21
|
+
import re
|
|
22
|
+
import uuid
|
|
23
|
+
from datetime import datetime, timezone
|
|
24
|
+
from enum import Enum
|
|
25
|
+
from pathlib import Path
|
|
26
|
+
from typing import Any, Optional
|
|
27
|
+
|
|
28
|
+
from pydantic import BaseModel, Field
|
|
29
|
+
|
|
30
|
+
logger = logging.getLogger("skcapstone.itil")
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
# ---------------------------------------------------------------------------
|
|
34
|
+
# Enums
|
|
35
|
+
# ---------------------------------------------------------------------------
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class Severity(str, Enum):
|
|
39
|
+
SEV1 = "sev1"
|
|
40
|
+
SEV2 = "sev2"
|
|
41
|
+
SEV3 = "sev3"
|
|
42
|
+
SEV4 = "sev4"
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
class IncidentStatus(str, Enum):
|
|
46
|
+
DETECTED = "detected"
|
|
47
|
+
ACKNOWLEDGED = "acknowledged"
|
|
48
|
+
INVESTIGATING = "investigating"
|
|
49
|
+
ESCALATED = "escalated"
|
|
50
|
+
RESOLVED = "resolved"
|
|
51
|
+
CLOSED = "closed"
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
class ProblemStatus(str, Enum):
|
|
55
|
+
IDENTIFIED = "identified"
|
|
56
|
+
ANALYZING = "analyzing"
|
|
57
|
+
KNOWN_ERROR = "known_error"
|
|
58
|
+
RESOLVED = "resolved"
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
class ChangeType(str, Enum):
|
|
62
|
+
STANDARD = "standard"
|
|
63
|
+
NORMAL = "normal"
|
|
64
|
+
EMERGENCY = "emergency"
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
class ChangeStatus(str, Enum):
|
|
68
|
+
PROPOSED = "proposed"
|
|
69
|
+
REVIEWING = "reviewing"
|
|
70
|
+
APPROVED = "approved"
|
|
71
|
+
REJECTED = "rejected"
|
|
72
|
+
IMPLEMENTING = "implementing"
|
|
73
|
+
DEPLOYED = "deployed"
|
|
74
|
+
VERIFIED = "verified"
|
|
75
|
+
FAILED = "failed"
|
|
76
|
+
CLOSED = "closed"
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
class Risk(str, Enum):
|
|
80
|
+
LOW = "low"
|
|
81
|
+
MEDIUM = "medium"
|
|
82
|
+
HIGH = "high"
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
class CABDecisionValue(str, Enum):
|
|
86
|
+
APPROVED = "approved"
|
|
87
|
+
REJECTED = "rejected"
|
|
88
|
+
ABSTAIN = "abstain"
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
# ---------------------------------------------------------------------------
|
|
92
|
+
# Lifecycle state machines — valid transitions
|
|
93
|
+
# ---------------------------------------------------------------------------
|
|
94
|
+
|
|
95
|
+
_INCIDENT_TRANSITIONS: dict[str, set[str]] = {
|
|
96
|
+
"detected": {"acknowledged", "escalated", "resolved"},
|
|
97
|
+
"acknowledged": {"investigating", "escalated", "resolved"},
|
|
98
|
+
"investigating": {"escalated", "resolved"},
|
|
99
|
+
"escalated": {"investigating", "resolved"},
|
|
100
|
+
"resolved": {"closed"},
|
|
101
|
+
"closed": set(),
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
_PROBLEM_TRANSITIONS: dict[str, set[str]] = {
|
|
105
|
+
"identified": {"analyzing"},
|
|
106
|
+
"analyzing": {"known_error", "resolved"},
|
|
107
|
+
"known_error": {"resolved"},
|
|
108
|
+
"resolved": set(),
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
_CHANGE_TRANSITIONS: dict[str, set[str]] = {
|
|
112
|
+
"proposed": {"reviewing", "approved", "rejected"},
|
|
113
|
+
"reviewing": {"approved", "rejected"},
|
|
114
|
+
"approved": {"implementing", "rejected"},
|
|
115
|
+
"rejected": {"closed"},
|
|
116
|
+
"implementing": {"deployed", "failed"},
|
|
117
|
+
"deployed": {"verified", "failed"},
|
|
118
|
+
"verified": {"closed"},
|
|
119
|
+
"failed": {"implementing", "closed"},
|
|
120
|
+
"closed": set(),
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
# ---------------------------------------------------------------------------
|
|
125
|
+
# Pydantic models
|
|
126
|
+
# ---------------------------------------------------------------------------
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
class TimelineEntry(BaseModel):
|
|
130
|
+
ts: str = Field(default_factory=lambda: datetime.now(timezone.utc).isoformat())
|
|
131
|
+
agent: str
|
|
132
|
+
action: str
|
|
133
|
+
note: str = ""
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
class Incident(BaseModel):
|
|
137
|
+
id: str = Field(default_factory=lambda: f"inc-{uuid.uuid4().hex[:8]}")
|
|
138
|
+
type: str = "incident"
|
|
139
|
+
title: str
|
|
140
|
+
severity: Severity = Severity.SEV3
|
|
141
|
+
status: IncidentStatus = IncidentStatus.DETECTED
|
|
142
|
+
source: str = "manual"
|
|
143
|
+
affected_services: list[str] = Field(default_factory=list)
|
|
144
|
+
impact: str = ""
|
|
145
|
+
managed_by: str = ""
|
|
146
|
+
created_by: str = ""
|
|
147
|
+
detected_at: str = Field(
|
|
148
|
+
default_factory=lambda: datetime.now(timezone.utc).isoformat()
|
|
149
|
+
)
|
|
150
|
+
acknowledged_at: Optional[str] = None
|
|
151
|
+
resolved_at: Optional[str] = None
|
|
152
|
+
closed_at: Optional[str] = None
|
|
153
|
+
timeline: list[dict[str, Any]] = Field(default_factory=list)
|
|
154
|
+
related_problem_id: Optional[str] = None
|
|
155
|
+
gtd_item_ids: list[str] = Field(default_factory=list)
|
|
156
|
+
resolution_summary: Optional[str] = None
|
|
157
|
+
tags: list[str] = Field(default_factory=list)
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
class Problem(BaseModel):
|
|
161
|
+
id: str = Field(default_factory=lambda: f"prb-{uuid.uuid4().hex[:8]}")
|
|
162
|
+
type: str = "problem"
|
|
163
|
+
title: str
|
|
164
|
+
status: ProblemStatus = ProblemStatus.IDENTIFIED
|
|
165
|
+
root_cause: Optional[str] = None
|
|
166
|
+
workaround: Optional[str] = None
|
|
167
|
+
managed_by: str = ""
|
|
168
|
+
created_by: str = ""
|
|
169
|
+
created_at: str = Field(
|
|
170
|
+
default_factory=lambda: datetime.now(timezone.utc).isoformat()
|
|
171
|
+
)
|
|
172
|
+
related_incident_ids: list[str] = Field(default_factory=list)
|
|
173
|
+
related_change_id: Optional[str] = None
|
|
174
|
+
kedb_id: Optional[str] = None
|
|
175
|
+
timeline: list[dict[str, Any]] = Field(default_factory=list)
|
|
176
|
+
tags: list[str] = Field(default_factory=list)
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
class Change(BaseModel):
|
|
180
|
+
id: str = Field(default_factory=lambda: f"chg-{uuid.uuid4().hex[:8]}")
|
|
181
|
+
type: str = "change"
|
|
182
|
+
title: str
|
|
183
|
+
change_type: ChangeType = ChangeType.NORMAL
|
|
184
|
+
status: ChangeStatus = ChangeStatus.PROPOSED
|
|
185
|
+
risk: Risk = Risk.MEDIUM
|
|
186
|
+
rollback_plan: str = ""
|
|
187
|
+
test_plan: str = ""
|
|
188
|
+
managed_by: str = ""
|
|
189
|
+
created_by: str = ""
|
|
190
|
+
implementer: Optional[str] = None
|
|
191
|
+
cab_required: bool = True
|
|
192
|
+
created_at: str = Field(
|
|
193
|
+
default_factory=lambda: datetime.now(timezone.utc).isoformat()
|
|
194
|
+
)
|
|
195
|
+
related_problem_id: Optional[str] = None
|
|
196
|
+
gtd_item_ids: list[str] = Field(default_factory=list)
|
|
197
|
+
timeline: list[dict[str, Any]] = Field(default_factory=list)
|
|
198
|
+
tags: list[str] = Field(default_factory=list)
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
class KEDBEntry(BaseModel):
|
|
202
|
+
id: str = Field(default_factory=lambda: f"ke-{uuid.uuid4().hex[:8]}")
|
|
203
|
+
title: str
|
|
204
|
+
symptoms: list[str] = Field(default_factory=list)
|
|
205
|
+
root_cause: str = ""
|
|
206
|
+
workaround: str = ""
|
|
207
|
+
permanent_fix_change_id: Optional[str] = None
|
|
208
|
+
related_problem_id: Optional[str] = None
|
|
209
|
+
managed_by: str = ""
|
|
210
|
+
created_at: str = Field(
|
|
211
|
+
default_factory=lambda: datetime.now(timezone.utc).isoformat()
|
|
212
|
+
)
|
|
213
|
+
tags: list[str] = Field(default_factory=list)
|
|
214
|
+
|
|
215
|
+
|
|
216
|
+
class CABDecision(BaseModel):
|
|
217
|
+
change_id: str
|
|
218
|
+
agent: str
|
|
219
|
+
decision: CABDecisionValue = CABDecisionValue.ABSTAIN
|
|
220
|
+
conditions: str = ""
|
|
221
|
+
decided_at: str = Field(
|
|
222
|
+
default_factory=lambda: datetime.now(timezone.utc).isoformat()
|
|
223
|
+
)
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
# ---------------------------------------------------------------------------
|
|
227
|
+
# Helpers
|
|
228
|
+
# ---------------------------------------------------------------------------
|
|
229
|
+
|
|
230
|
+
|
|
231
|
+
def _slugify(text: str) -> str:
|
|
232
|
+
"""Convert text to a filesystem-safe slug."""
|
|
233
|
+
slug = text.lower().strip()
|
|
234
|
+
slug = re.sub(r'[/\\:*?"<>|]', '-', slug)
|
|
235
|
+
slug = re.sub(r'[^\w\s-]', '', slug)
|
|
236
|
+
slug = re.sub(r'[\s_]+', '-', slug)
|
|
237
|
+
return slug.strip('-')[:40]
|
|
238
|
+
|
|
239
|
+
|
|
240
|
+
def _now_iso() -> str:
|
|
241
|
+
return datetime.now(timezone.utc).isoformat()
|
|
242
|
+
|
|
243
|
+
|
|
244
|
+
def _make_timeline_entry(agent: str, action: str, note: str = "") -> dict[str, str]:
|
|
245
|
+
return {
|
|
246
|
+
"ts": _now_iso(),
|
|
247
|
+
"agent": agent,
|
|
248
|
+
"action": action,
|
|
249
|
+
"note": note,
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
|
|
253
|
+
# ---------------------------------------------------------------------------
|
|
254
|
+
# ITILManager
|
|
255
|
+
# ---------------------------------------------------------------------------
|
|
256
|
+
|
|
257
|
+
|
|
258
|
+
class ITILManager:
|
|
259
|
+
"""Manages ITIL records on disk with lifecycle validation.
|
|
260
|
+
|
|
261
|
+
Args:
|
|
262
|
+
home: Path to the shared root (``~/.skcapstone`` or equivalent).
|
|
263
|
+
"""
|
|
264
|
+
|
|
265
|
+
def __init__(self, home: Path) -> None:
|
|
266
|
+
self.home = Path(home).expanduser()
|
|
267
|
+
self.itil_dir = self.home / "coordination" / "itil"
|
|
268
|
+
self.incidents_dir = self.itil_dir / "incidents"
|
|
269
|
+
self.problems_dir = self.itil_dir / "problems"
|
|
270
|
+
self.changes_dir = self.itil_dir / "changes"
|
|
271
|
+
self.kedb_dir = self.itil_dir / "kedb"
|
|
272
|
+
self.cab_dir = self.itil_dir / "cab-decisions"
|
|
273
|
+
|
|
274
|
+
def ensure_dirs(self) -> None:
|
|
275
|
+
"""Create ITIL directories if they don't exist."""
|
|
276
|
+
for d in (
|
|
277
|
+
self.incidents_dir,
|
|
278
|
+
self.problems_dir,
|
|
279
|
+
self.changes_dir,
|
|
280
|
+
self.kedb_dir,
|
|
281
|
+
self.cab_dir,
|
|
282
|
+
):
|
|
283
|
+
d.mkdir(parents=True, exist_ok=True)
|
|
284
|
+
|
|
285
|
+
# ── File I/O ──────────────────────────────────────────────────────
|
|
286
|
+
|
|
287
|
+
def _write_record(self, directory: Path, record_id: str, title: str, data: dict) -> Path:
|
|
288
|
+
"""Write a record JSON file."""
|
|
289
|
+
self.ensure_dirs()
|
|
290
|
+
slug = _slugify(title)
|
|
291
|
+
filename = f"{record_id}-{slug}.json" if slug else f"{record_id}.json"
|
|
292
|
+
path = directory / filename
|
|
293
|
+
path.write_text(
|
|
294
|
+
json.dumps(data, indent=2, default=str) + "\n", encoding="utf-8"
|
|
295
|
+
)
|
|
296
|
+
return path
|
|
297
|
+
|
|
298
|
+
def _load_records(self, directory: Path, model_class: type) -> list:
|
|
299
|
+
"""Load all JSON records from a directory, validating with model_class."""
|
|
300
|
+
records = []
|
|
301
|
+
if not directory.exists():
|
|
302
|
+
return records
|
|
303
|
+
for f in sorted(directory.glob("*.json")):
|
|
304
|
+
try:
|
|
305
|
+
data = json.loads(f.read_text(encoding="utf-8"))
|
|
306
|
+
records.append(model_class.model_validate(data))
|
|
307
|
+
except (json.JSONDecodeError, Exception):
|
|
308
|
+
continue
|
|
309
|
+
return records
|
|
310
|
+
|
|
311
|
+
def _find_record_path(self, directory: Path, record_id: str) -> Optional[Path]:
|
|
312
|
+
"""Find a record file by ID prefix in filename."""
|
|
313
|
+
if not directory.exists():
|
|
314
|
+
return None
|
|
315
|
+
for f in directory.glob(f"{record_id}*.json"):
|
|
316
|
+
return f
|
|
317
|
+
return None
|
|
318
|
+
|
|
319
|
+
def _load_record(self, directory: Path, record_id: str, model_class: type):
|
|
320
|
+
"""Load a single record by ID."""
|
|
321
|
+
path = self._find_record_path(directory, record_id)
|
|
322
|
+
if path is None:
|
|
323
|
+
return None
|
|
324
|
+
try:
|
|
325
|
+
data = json.loads(path.read_text(encoding="utf-8"))
|
|
326
|
+
return model_class.model_validate(data)
|
|
327
|
+
except (json.JSONDecodeError, Exception):
|
|
328
|
+
return None
|
|
329
|
+
|
|
330
|
+
def _update_record(self, directory: Path, record_id: str, title: str, data: dict) -> Path:
|
|
331
|
+
"""Update a record, removing old file if slug changed."""
|
|
332
|
+
old_path = self._find_record_path(directory, record_id)
|
|
333
|
+
new_path = self._write_record(directory, record_id, title, data)
|
|
334
|
+
if old_path and old_path != new_path and old_path.exists():
|
|
335
|
+
old_path.unlink()
|
|
336
|
+
return new_path
|
|
337
|
+
|
|
338
|
+
# ── Incidents ─────────────────────────────────────────────────────
|
|
339
|
+
|
|
340
|
+
def create_incident(
|
|
341
|
+
self,
|
|
342
|
+
title: str,
|
|
343
|
+
severity: str = "sev3",
|
|
344
|
+
source: str = "manual",
|
|
345
|
+
affected_services: list[str] | None = None,
|
|
346
|
+
impact: str = "",
|
|
347
|
+
managed_by: str = "",
|
|
348
|
+
created_by: str = "",
|
|
349
|
+
tags: list[str] | None = None,
|
|
350
|
+
) -> Incident:
|
|
351
|
+
"""Create a new incident record."""
|
|
352
|
+
agent = managed_by or created_by or "unknown"
|
|
353
|
+
incident = Incident(
|
|
354
|
+
title=title,
|
|
355
|
+
severity=Severity(severity),
|
|
356
|
+
source=source,
|
|
357
|
+
affected_services=affected_services or [],
|
|
358
|
+
impact=impact,
|
|
359
|
+
managed_by=agent,
|
|
360
|
+
created_by=created_by or agent,
|
|
361
|
+
tags=tags or [],
|
|
362
|
+
)
|
|
363
|
+
incident.timeline.append(
|
|
364
|
+
_make_timeline_entry(agent, "created", f"Incident detected: {title}")
|
|
365
|
+
)
|
|
366
|
+
self._write_record(
|
|
367
|
+
self.incidents_dir, incident.id, title, incident.model_dump()
|
|
368
|
+
)
|
|
369
|
+
|
|
370
|
+
# Publish event
|
|
371
|
+
self._publish_event("itil.incident.created", {
|
|
372
|
+
"id": incident.id,
|
|
373
|
+
"title": title,
|
|
374
|
+
"severity": severity,
|
|
375
|
+
"managed_by": agent,
|
|
376
|
+
})
|
|
377
|
+
|
|
378
|
+
# Auto-create GTD item
|
|
379
|
+
gtd_id = self._create_gtd_item_for_incident(incident)
|
|
380
|
+
if gtd_id:
|
|
381
|
+
incident.gtd_item_ids.append(gtd_id)
|
|
382
|
+
self._update_record(
|
|
383
|
+
self.incidents_dir, incident.id, title, incident.model_dump()
|
|
384
|
+
)
|
|
385
|
+
|
|
386
|
+
return incident
|
|
387
|
+
|
|
388
|
+
def update_incident(
|
|
389
|
+
self,
|
|
390
|
+
incident_id: str,
|
|
391
|
+
agent: str,
|
|
392
|
+
new_status: str | None = None,
|
|
393
|
+
severity: str | None = None,
|
|
394
|
+
note: str = "",
|
|
395
|
+
resolution_summary: str | None = None,
|
|
396
|
+
related_problem_id: str | None = None,
|
|
397
|
+
) -> Incident:
|
|
398
|
+
"""Update an incident's status, severity, or metadata."""
|
|
399
|
+
inc = self._load_record(self.incidents_dir, incident_id, Incident)
|
|
400
|
+
if inc is None:
|
|
401
|
+
raise ValueError(f"Incident {incident_id} not found")
|
|
402
|
+
|
|
403
|
+
if new_status:
|
|
404
|
+
current = inc.status.value
|
|
405
|
+
if new_status not in _INCIDENT_TRANSITIONS.get(current, set()):
|
|
406
|
+
raise ValueError(
|
|
407
|
+
f"Invalid transition: {current} -> {new_status}. "
|
|
408
|
+
f"Valid: {_INCIDENT_TRANSITIONS.get(current, set())}"
|
|
409
|
+
)
|
|
410
|
+
old_status = current
|
|
411
|
+
inc.status = IncidentStatus(new_status)
|
|
412
|
+
inc.timeline.append(
|
|
413
|
+
_make_timeline_entry(agent, f"status:{old_status}->{new_status}", note)
|
|
414
|
+
)
|
|
415
|
+
|
|
416
|
+
if new_status == "acknowledged":
|
|
417
|
+
inc.acknowledged_at = _now_iso()
|
|
418
|
+
elif new_status == "resolved":
|
|
419
|
+
inc.resolved_at = _now_iso()
|
|
420
|
+
if resolution_summary:
|
|
421
|
+
inc.resolution_summary = resolution_summary
|
|
422
|
+
self._complete_gtd_items(inc.gtd_item_ids)
|
|
423
|
+
elif new_status == "closed":
|
|
424
|
+
inc.closed_at = _now_iso()
|
|
425
|
+
|
|
426
|
+
if severity and severity != inc.severity.value:
|
|
427
|
+
old_sev = inc.severity.value
|
|
428
|
+
inc.severity = Severity(severity)
|
|
429
|
+
inc.timeline.append(
|
|
430
|
+
_make_timeline_entry(agent, f"severity:{old_sev}->{severity}", note)
|
|
431
|
+
)
|
|
432
|
+
self._publish_event("itil.incident.escalated", {
|
|
433
|
+
"id": inc.id,
|
|
434
|
+
"old_severity": old_sev,
|
|
435
|
+
"new_severity": severity,
|
|
436
|
+
})
|
|
437
|
+
|
|
438
|
+
if related_problem_id:
|
|
439
|
+
inc.related_problem_id = related_problem_id
|
|
440
|
+
|
|
441
|
+
if note and not new_status and not severity:
|
|
442
|
+
inc.timeline.append(_make_timeline_entry(agent, "note", note))
|
|
443
|
+
|
|
444
|
+
self._update_record(
|
|
445
|
+
self.incidents_dir, inc.id, inc.title, inc.model_dump()
|
|
446
|
+
)
|
|
447
|
+
return inc
|
|
448
|
+
|
|
449
|
+
def list_incidents(
|
|
450
|
+
self,
|
|
451
|
+
status: str | None = None,
|
|
452
|
+
severity: str | None = None,
|
|
453
|
+
service: str | None = None,
|
|
454
|
+
) -> list[Incident]:
|
|
455
|
+
"""List incidents with optional filters."""
|
|
456
|
+
incidents = self._load_records(self.incidents_dir, Incident)
|
|
457
|
+
if status:
|
|
458
|
+
incidents = [i for i in incidents if i.status.value == status]
|
|
459
|
+
if severity:
|
|
460
|
+
incidents = [i for i in incidents if i.severity.value == severity]
|
|
461
|
+
if service:
|
|
462
|
+
incidents = [
|
|
463
|
+
i for i in incidents if service in i.affected_services
|
|
464
|
+
]
|
|
465
|
+
return incidents
|
|
466
|
+
|
|
467
|
+
def find_open_incident_for_service(self, service: str) -> Optional[Incident]:
|
|
468
|
+
"""Find an existing open incident for a service (dedup check)."""
|
|
469
|
+
open_statuses = {"detected", "acknowledged", "investigating", "escalated"}
|
|
470
|
+
for inc in self.list_incidents():
|
|
471
|
+
if inc.status.value in open_statuses and service in inc.affected_services:
|
|
472
|
+
return inc
|
|
473
|
+
return None
|
|
474
|
+
|
|
475
|
+
# ── Problems ──────────────────────────────────────────────────────
|
|
476
|
+
|
|
477
|
+
def create_problem(
|
|
478
|
+
self,
|
|
479
|
+
title: str,
|
|
480
|
+
managed_by: str = "",
|
|
481
|
+
created_by: str = "",
|
|
482
|
+
related_incident_ids: list[str] | None = None,
|
|
483
|
+
workaround: str = "",
|
|
484
|
+
tags: list[str] | None = None,
|
|
485
|
+
) -> Problem:
|
|
486
|
+
"""Create a new problem record."""
|
|
487
|
+
agent = managed_by or created_by or "unknown"
|
|
488
|
+
problem = Problem(
|
|
489
|
+
title=title,
|
|
490
|
+
managed_by=agent,
|
|
491
|
+
created_by=created_by or agent,
|
|
492
|
+
related_incident_ids=related_incident_ids or [],
|
|
493
|
+
workaround=workaround,
|
|
494
|
+
tags=tags or [],
|
|
495
|
+
)
|
|
496
|
+
problem.timeline.append(
|
|
497
|
+
_make_timeline_entry(agent, "created", f"Problem identified: {title}")
|
|
498
|
+
)
|
|
499
|
+
self._write_record(
|
|
500
|
+
self.problems_dir, problem.id, title, problem.model_dump()
|
|
501
|
+
)
|
|
502
|
+
|
|
503
|
+
self._publish_event("itil.problem.created", {
|
|
504
|
+
"id": problem.id,
|
|
505
|
+
"title": title,
|
|
506
|
+
"related_incidents": related_incident_ids or [],
|
|
507
|
+
})
|
|
508
|
+
|
|
509
|
+
# Auto-create GTD project
|
|
510
|
+
self._create_gtd_project_for_problem(problem)
|
|
511
|
+
|
|
512
|
+
return problem
|
|
513
|
+
|
|
514
|
+
def update_problem(
|
|
515
|
+
self,
|
|
516
|
+
problem_id: str,
|
|
517
|
+
agent: str,
|
|
518
|
+
new_status: str | None = None,
|
|
519
|
+
root_cause: str | None = None,
|
|
520
|
+
workaround: str | None = None,
|
|
521
|
+
note: str = "",
|
|
522
|
+
create_kedb: bool = False,
|
|
523
|
+
) -> Problem:
|
|
524
|
+
"""Update a problem's status or metadata."""
|
|
525
|
+
prb = self._load_record(self.problems_dir, problem_id, Problem)
|
|
526
|
+
if prb is None:
|
|
527
|
+
raise ValueError(f"Problem {problem_id} not found")
|
|
528
|
+
|
|
529
|
+
if new_status:
|
|
530
|
+
current = prb.status.value
|
|
531
|
+
if new_status not in _PROBLEM_TRANSITIONS.get(current, set()):
|
|
532
|
+
raise ValueError(
|
|
533
|
+
f"Invalid transition: {current} -> {new_status}. "
|
|
534
|
+
f"Valid: {_PROBLEM_TRANSITIONS.get(current, set())}"
|
|
535
|
+
)
|
|
536
|
+
prb.status = ProblemStatus(new_status)
|
|
537
|
+
prb.timeline.append(
|
|
538
|
+
_make_timeline_entry(agent, f"status:{current}->{new_status}", note)
|
|
539
|
+
)
|
|
540
|
+
|
|
541
|
+
if root_cause:
|
|
542
|
+
prb.root_cause = root_cause
|
|
543
|
+
if workaround:
|
|
544
|
+
prb.workaround = workaround
|
|
545
|
+
|
|
546
|
+
if note and not new_status:
|
|
547
|
+
prb.timeline.append(_make_timeline_entry(agent, "note", note))
|
|
548
|
+
|
|
549
|
+
# Auto-create KEDB entry when transitioning to known_error
|
|
550
|
+
if create_kedb and prb.root_cause:
|
|
551
|
+
kedb = self.create_kedb_entry(
|
|
552
|
+
title=prb.title,
|
|
553
|
+
symptoms=[],
|
|
554
|
+
root_cause=prb.root_cause,
|
|
555
|
+
workaround=prb.workaround or "",
|
|
556
|
+
related_problem_id=prb.id,
|
|
557
|
+
managed_by=agent,
|
|
558
|
+
)
|
|
559
|
+
prb.kedb_id = kedb.id
|
|
560
|
+
|
|
561
|
+
self._update_record(
|
|
562
|
+
self.problems_dir, prb.id, prb.title, prb.model_dump()
|
|
563
|
+
)
|
|
564
|
+
return prb
|
|
565
|
+
|
|
566
|
+
def list_problems(self, status: str | None = None) -> list[Problem]:
|
|
567
|
+
"""List problems with optional status filter."""
|
|
568
|
+
problems = self._load_records(self.problems_dir, Problem)
|
|
569
|
+
if status:
|
|
570
|
+
problems = [p for p in problems if p.status.value == status]
|
|
571
|
+
return problems
|
|
572
|
+
|
|
573
|
+
# ── Changes ───────────────────────────────────────────────────────
|
|
574
|
+
|
|
575
|
+
def propose_change(
|
|
576
|
+
self,
|
|
577
|
+
title: str,
|
|
578
|
+
change_type: str = "normal",
|
|
579
|
+
risk: str = "medium",
|
|
580
|
+
rollback_plan: str = "",
|
|
581
|
+
test_plan: str = "",
|
|
582
|
+
managed_by: str = "",
|
|
583
|
+
created_by: str = "",
|
|
584
|
+
implementer: str | None = None,
|
|
585
|
+
related_problem_id: str | None = None,
|
|
586
|
+
tags: list[str] | None = None,
|
|
587
|
+
) -> Change:
|
|
588
|
+
"""Propose a new change (RFC)."""
|
|
589
|
+
agent = managed_by or created_by or "unknown"
|
|
590
|
+
ct = ChangeType(change_type)
|
|
591
|
+
change = Change(
|
|
592
|
+
title=title,
|
|
593
|
+
change_type=ct,
|
|
594
|
+
risk=Risk(risk),
|
|
595
|
+
rollback_plan=rollback_plan,
|
|
596
|
+
test_plan=test_plan,
|
|
597
|
+
managed_by=agent,
|
|
598
|
+
created_by=created_by or agent,
|
|
599
|
+
implementer=implementer,
|
|
600
|
+
cab_required=ct != ChangeType.STANDARD,
|
|
601
|
+
related_problem_id=related_problem_id,
|
|
602
|
+
tags=tags or [],
|
|
603
|
+
)
|
|
604
|
+
change.timeline.append(
|
|
605
|
+
_make_timeline_entry(agent, "proposed", f"RFC: {title}")
|
|
606
|
+
)
|
|
607
|
+
|
|
608
|
+
# Standard changes auto-approve
|
|
609
|
+
if ct == ChangeType.STANDARD:
|
|
610
|
+
change.status = ChangeStatus.APPROVED
|
|
611
|
+
change.timeline.append(
|
|
612
|
+
_make_timeline_entry(agent, "auto-approved", "Standard change")
|
|
613
|
+
)
|
|
614
|
+
|
|
615
|
+
self._write_record(
|
|
616
|
+
self.changes_dir, change.id, title, change.model_dump()
|
|
617
|
+
)
|
|
618
|
+
|
|
619
|
+
self._publish_event("itil.change.proposed", {
|
|
620
|
+
"id": change.id,
|
|
621
|
+
"title": title,
|
|
622
|
+
"change_type": change_type,
|
|
623
|
+
"cab_required": change.cab_required,
|
|
624
|
+
})
|
|
625
|
+
|
|
626
|
+
return change
|
|
627
|
+
|
|
628
|
+
def update_change(
|
|
629
|
+
self,
|
|
630
|
+
change_id: str,
|
|
631
|
+
agent: str,
|
|
632
|
+
new_status: str | None = None,
|
|
633
|
+
note: str = "",
|
|
634
|
+
) -> Change:
|
|
635
|
+
"""Update a change's status."""
|
|
636
|
+
chg = self._load_record(self.changes_dir, change_id, Change)
|
|
637
|
+
if chg is None:
|
|
638
|
+
raise ValueError(f"Change {change_id} not found")
|
|
639
|
+
|
|
640
|
+
if new_status:
|
|
641
|
+
current = chg.status.value
|
|
642
|
+
if new_status not in _CHANGE_TRANSITIONS.get(current, set()):
|
|
643
|
+
raise ValueError(
|
|
644
|
+
f"Invalid transition: {current} -> {new_status}. "
|
|
645
|
+
f"Valid: {_CHANGE_TRANSITIONS.get(current, set())}"
|
|
646
|
+
)
|
|
647
|
+
chg.status = ChangeStatus(new_status)
|
|
648
|
+
chg.timeline.append(
|
|
649
|
+
_make_timeline_entry(agent, f"status:{current}->{new_status}", note)
|
|
650
|
+
)
|
|
651
|
+
|
|
652
|
+
if new_status == "approved":
|
|
653
|
+
self._publish_event("itil.change.approved", {
|
|
654
|
+
"id": chg.id, "title": chg.title, "implementer": chg.implementer,
|
|
655
|
+
})
|
|
656
|
+
# Auto-create GTD next-action for implementer
|
|
657
|
+
if chg.implementer:
|
|
658
|
+
self._create_gtd_item_for_change(chg)
|
|
659
|
+
elif new_status == "deployed":
|
|
660
|
+
self._publish_event("itil.change.deployed", {
|
|
661
|
+
"id": chg.id, "title": chg.title,
|
|
662
|
+
})
|
|
663
|
+
|
|
664
|
+
if note and not new_status:
|
|
665
|
+
chg.timeline.append(_make_timeline_entry(agent, "note", note))
|
|
666
|
+
|
|
667
|
+
self._update_record(
|
|
668
|
+
self.changes_dir, chg.id, chg.title, chg.model_dump()
|
|
669
|
+
)
|
|
670
|
+
return chg
|
|
671
|
+
|
|
672
|
+
def list_changes(self, status: str | None = None) -> list[Change]:
|
|
673
|
+
"""List changes with optional status filter."""
|
|
674
|
+
changes = self._load_records(self.changes_dir, Change)
|
|
675
|
+
if status:
|
|
676
|
+
changes = [c for c in changes if c.status.value == status]
|
|
677
|
+
return changes
|
|
678
|
+
|
|
679
|
+
# ── CAB ───────────────────────────────────────────────────────────
|
|
680
|
+
|
|
681
|
+
def submit_cab_vote(
|
|
682
|
+
self,
|
|
683
|
+
change_id: str,
|
|
684
|
+
agent: str,
|
|
685
|
+
decision: str = "abstain",
|
|
686
|
+
conditions: str = "",
|
|
687
|
+
) -> CABDecision:
|
|
688
|
+
"""Submit a CAB vote for a change (per-agent file)."""
|
|
689
|
+
self.ensure_dirs()
|
|
690
|
+
vote = CABDecision(
|
|
691
|
+
change_id=change_id,
|
|
692
|
+
agent=agent,
|
|
693
|
+
decision=CABDecisionValue(decision),
|
|
694
|
+
conditions=conditions,
|
|
695
|
+
)
|
|
696
|
+
filename = f"{change_id}-{agent}.json"
|
|
697
|
+
path = self.cab_dir / filename
|
|
698
|
+
path.write_text(
|
|
699
|
+
json.dumps(vote.model_dump(), indent=2, default=str) + "\n",
|
|
700
|
+
encoding="utf-8",
|
|
701
|
+
)
|
|
702
|
+
|
|
703
|
+
# Check if all required votes are in and auto-approve/reject
|
|
704
|
+
self._evaluate_cab(change_id)
|
|
705
|
+
|
|
706
|
+
return vote
|
|
707
|
+
|
|
708
|
+
def get_cab_votes(self, change_id: str) -> list[CABDecision]:
|
|
709
|
+
"""Get all CAB votes for a change."""
|
|
710
|
+
votes = []
|
|
711
|
+
if not self.cab_dir.exists():
|
|
712
|
+
return votes
|
|
713
|
+
for f in self.cab_dir.glob(f"{change_id}-*.json"):
|
|
714
|
+
try:
|
|
715
|
+
data = json.loads(f.read_text(encoding="utf-8"))
|
|
716
|
+
votes.append(CABDecision.model_validate(data))
|
|
717
|
+
except (json.JSONDecodeError, Exception):
|
|
718
|
+
continue
|
|
719
|
+
return votes
|
|
720
|
+
|
|
721
|
+
def _evaluate_cab(self, change_id: str) -> None:
|
|
722
|
+
"""Evaluate CAB votes and auto-transition if decisive."""
|
|
723
|
+
chg = self._load_record(self.changes_dir, change_id, Change)
|
|
724
|
+
if chg is None or chg.status.value not in ("proposed", "reviewing"):
|
|
725
|
+
return
|
|
726
|
+
|
|
727
|
+
votes = self.get_cab_votes(change_id)
|
|
728
|
+
if not votes:
|
|
729
|
+
return
|
|
730
|
+
|
|
731
|
+
rejections = [v for v in votes if v.decision == CABDecisionValue.REJECTED]
|
|
732
|
+
approvals = [v for v in votes if v.decision == CABDecisionValue.APPROVED]
|
|
733
|
+
|
|
734
|
+
# Any rejection blocks the change
|
|
735
|
+
if rejections:
|
|
736
|
+
try:
|
|
737
|
+
self.update_change(change_id, "cab-system", new_status="rejected",
|
|
738
|
+
note=f"Rejected by: {', '.join(v.agent for v in rejections)}")
|
|
739
|
+
except ValueError:
|
|
740
|
+
pass
|
|
741
|
+
return
|
|
742
|
+
|
|
743
|
+
# Need at least one human approval for normal changes
|
|
744
|
+
human_approvals = [v for v in approvals if v.agent == "human"]
|
|
745
|
+
if human_approvals:
|
|
746
|
+
try:
|
|
747
|
+
self.update_change(change_id, "cab-system", new_status="approved",
|
|
748
|
+
note=f"Approved by: {', '.join(v.agent for v in approvals)}")
|
|
749
|
+
except ValueError:
|
|
750
|
+
pass
|
|
751
|
+
|
|
752
|
+
# ── KEDB ──────────────────────────────────────────────────────────
|
|
753
|
+
|
|
754
|
+
def create_kedb_entry(
|
|
755
|
+
self,
|
|
756
|
+
title: str,
|
|
757
|
+
symptoms: list[str],
|
|
758
|
+
root_cause: str = "",
|
|
759
|
+
workaround: str = "",
|
|
760
|
+
permanent_fix_change_id: str | None = None,
|
|
761
|
+
related_problem_id: str | None = None,
|
|
762
|
+
managed_by: str = "",
|
|
763
|
+
tags: list[str] | None = None,
|
|
764
|
+
) -> KEDBEntry:
|
|
765
|
+
"""Create a Known Error Database entry."""
|
|
766
|
+
entry = KEDBEntry(
|
|
767
|
+
title=title,
|
|
768
|
+
symptoms=symptoms,
|
|
769
|
+
root_cause=root_cause,
|
|
770
|
+
workaround=workaround,
|
|
771
|
+
permanent_fix_change_id=permanent_fix_change_id,
|
|
772
|
+
related_problem_id=related_problem_id,
|
|
773
|
+
managed_by=managed_by,
|
|
774
|
+
tags=tags or [],
|
|
775
|
+
)
|
|
776
|
+
self._write_record(
|
|
777
|
+
self.kedb_dir, entry.id, title, entry.model_dump()
|
|
778
|
+
)
|
|
779
|
+
return entry
|
|
780
|
+
|
|
781
|
+
def search_kedb(self, query: str) -> list[KEDBEntry]:
|
|
782
|
+
"""Search KEDB entries by matching query against title, symptoms, root_cause."""
|
|
783
|
+
entries = self._load_records(self.kedb_dir, KEDBEntry)
|
|
784
|
+
query_lower = query.lower()
|
|
785
|
+
results = []
|
|
786
|
+
for e in entries:
|
|
787
|
+
searchable = " ".join([
|
|
788
|
+
e.title,
|
|
789
|
+
" ".join(e.symptoms),
|
|
790
|
+
e.root_cause,
|
|
791
|
+
e.workaround,
|
|
792
|
+
" ".join(e.tags),
|
|
793
|
+
]).lower()
|
|
794
|
+
if query_lower in searchable:
|
|
795
|
+
results.append(e)
|
|
796
|
+
return results
|
|
797
|
+
|
|
798
|
+
# ── Status dashboard ──────────────────────────────────────────────
|
|
799
|
+
|
|
800
|
+
def get_status(self) -> dict[str, Any]:
|
|
801
|
+
"""Return a dashboard summary of all ITIL records."""
|
|
802
|
+
incidents = self._load_records(self.incidents_dir, Incident)
|
|
803
|
+
problems = self._load_records(self.problems_dir, Problem)
|
|
804
|
+
changes = self._load_records(self.changes_dir, Change)
|
|
805
|
+
kedb = self._load_records(self.kedb_dir, KEDBEntry)
|
|
806
|
+
|
|
807
|
+
open_inc_statuses = {"detected", "acknowledged", "investigating", "escalated"}
|
|
808
|
+
open_incidents = [i for i in incidents if i.status.value in open_inc_statuses]
|
|
809
|
+
active_problems = [p for p in problems if p.status.value != "resolved"]
|
|
810
|
+
pending_changes = [
|
|
811
|
+
c for c in changes
|
|
812
|
+
if c.status.value in ("proposed", "reviewing", "approved", "implementing")
|
|
813
|
+
]
|
|
814
|
+
|
|
815
|
+
return {
|
|
816
|
+
"incidents": {
|
|
817
|
+
"total": len(incidents),
|
|
818
|
+
"open": len(open_incidents),
|
|
819
|
+
"by_severity": {
|
|
820
|
+
sev.value: sum(1 for i in open_incidents if i.severity == sev)
|
|
821
|
+
for sev in Severity
|
|
822
|
+
},
|
|
823
|
+
"open_list": [
|
|
824
|
+
{
|
|
825
|
+
"id": i.id,
|
|
826
|
+
"title": i.title,
|
|
827
|
+
"severity": i.severity.value,
|
|
828
|
+
"status": i.status.value,
|
|
829
|
+
"managed_by": i.managed_by,
|
|
830
|
+
"detected_at": i.detected_at,
|
|
831
|
+
}
|
|
832
|
+
for i in open_incidents
|
|
833
|
+
],
|
|
834
|
+
},
|
|
835
|
+
"problems": {
|
|
836
|
+
"total": len(problems),
|
|
837
|
+
"active": len(active_problems),
|
|
838
|
+
"active_list": [
|
|
839
|
+
{
|
|
840
|
+
"id": p.id,
|
|
841
|
+
"title": p.title,
|
|
842
|
+
"status": p.status.value,
|
|
843
|
+
"managed_by": p.managed_by,
|
|
844
|
+
}
|
|
845
|
+
for p in active_problems
|
|
846
|
+
],
|
|
847
|
+
},
|
|
848
|
+
"changes": {
|
|
849
|
+
"total": len(changes),
|
|
850
|
+
"pending": len(pending_changes),
|
|
851
|
+
"pending_list": [
|
|
852
|
+
{
|
|
853
|
+
"id": c.id,
|
|
854
|
+
"title": c.title,
|
|
855
|
+
"status": c.status.value,
|
|
856
|
+
"change_type": c.change_type.value,
|
|
857
|
+
"managed_by": c.managed_by,
|
|
858
|
+
}
|
|
859
|
+
for c in pending_changes
|
|
860
|
+
],
|
|
861
|
+
},
|
|
862
|
+
"kedb": {
|
|
863
|
+
"total": len(kedb),
|
|
864
|
+
},
|
|
865
|
+
}
|
|
866
|
+
|
|
867
|
+
# ── Auto-close / Escalation (for scheduled tasks) ─────────────────
|
|
868
|
+
|
|
869
|
+
def auto_close_resolved(self, stable_hours: int = 24) -> list[str]:
|
|
870
|
+
"""Auto-close incidents that have been resolved for stable_hours."""
|
|
871
|
+
now = datetime.now(timezone.utc)
|
|
872
|
+
closed_ids = []
|
|
873
|
+
for inc in self.list_incidents(status="resolved"):
|
|
874
|
+
if inc.resolved_at:
|
|
875
|
+
try:
|
|
876
|
+
resolved = datetime.fromisoformat(
|
|
877
|
+
inc.resolved_at.replace("Z", "+00:00")
|
|
878
|
+
)
|
|
879
|
+
hours = (now - resolved).total_seconds() / 3600
|
|
880
|
+
if hours >= stable_hours:
|
|
881
|
+
self.update_incident(
|
|
882
|
+
inc.id, "auto-close",
|
|
883
|
+
new_status="closed",
|
|
884
|
+
note=f"Auto-closed after {int(hours)}h stable",
|
|
885
|
+
)
|
|
886
|
+
closed_ids.append(inc.id)
|
|
887
|
+
except (ValueError, TypeError):
|
|
888
|
+
continue
|
|
889
|
+
return closed_ids
|
|
890
|
+
|
|
891
|
+
def check_sla_breaches(self) -> list[dict[str, Any]]:
|
|
892
|
+
"""Check for SLA breaches on open incidents."""
|
|
893
|
+
now = datetime.now(timezone.utc)
|
|
894
|
+
breaches = []
|
|
895
|
+
sla_minutes = {"sev1": 5, "sev2": 15, "sev3": 60, "sev4": 240}
|
|
896
|
+
|
|
897
|
+
for inc in self.list_incidents():
|
|
898
|
+
if inc.status.value in ("resolved", "closed"):
|
|
899
|
+
continue
|
|
900
|
+
if inc.status.value == "detected" and inc.detected_at:
|
|
901
|
+
try:
|
|
902
|
+
detected = datetime.fromisoformat(
|
|
903
|
+
inc.detected_at.replace("Z", "+00:00")
|
|
904
|
+
)
|
|
905
|
+
elapsed_min = (now - detected).total_seconds() / 60
|
|
906
|
+
limit = sla_minutes.get(inc.severity.value, 60)
|
|
907
|
+
if elapsed_min > limit:
|
|
908
|
+
breaches.append({
|
|
909
|
+
"id": inc.id,
|
|
910
|
+
"severity": inc.severity.value,
|
|
911
|
+
"breach_type": "unacknowledged",
|
|
912
|
+
"elapsed_minutes": round(elapsed_min),
|
|
913
|
+
"sla_minutes": limit,
|
|
914
|
+
})
|
|
915
|
+
self._publish_event("itil.sla.breach", {
|
|
916
|
+
"id": inc.id,
|
|
917
|
+
"severity": inc.severity.value,
|
|
918
|
+
"breach_type": "unacknowledged",
|
|
919
|
+
})
|
|
920
|
+
except (ValueError, TypeError):
|
|
921
|
+
continue
|
|
922
|
+
return breaches
|
|
923
|
+
|
|
924
|
+
# ── ITIL Board generation ─────────────────────────────────────────
|
|
925
|
+
|
|
926
|
+
def generate_board_md(self) -> str:
|
|
927
|
+
"""Generate an ITIL-BOARD.md overview."""
|
|
928
|
+
status = self.get_status()
|
|
929
|
+
now = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M UTC")
|
|
930
|
+
|
|
931
|
+
lines = [
|
|
932
|
+
"# ITIL Service Management Board",
|
|
933
|
+
f"*Auto-generated {now} — do not edit manually*",
|
|
934
|
+
"",
|
|
935
|
+
]
|
|
936
|
+
|
|
937
|
+
# Incidents
|
|
938
|
+
inc = status["incidents"]
|
|
939
|
+
lines.append(f"## Open Incidents ({inc['open']})")
|
|
940
|
+
lines.append("")
|
|
941
|
+
if inc["open_list"]:
|
|
942
|
+
for i in inc["open_list"]:
|
|
943
|
+
sev_icon = {"sev1": "P1", "sev2": "P2", "sev3": "P3", "sev4": "P4"}.get(
|
|
944
|
+
i["severity"], "?"
|
|
945
|
+
)
|
|
946
|
+
lines.append(
|
|
947
|
+
f"- **[{i['id']}]** {sev_icon} {i['title']} "
|
|
948
|
+
f"({i['status']}) @{i['managed_by']}"
|
|
949
|
+
)
|
|
950
|
+
else:
|
|
951
|
+
lines.append("*No open incidents*")
|
|
952
|
+
lines.append("")
|
|
953
|
+
|
|
954
|
+
# Problems
|
|
955
|
+
prb = status["problems"]
|
|
956
|
+
lines.append(f"## Active Problems ({prb['active']})")
|
|
957
|
+
lines.append("")
|
|
958
|
+
if prb["active_list"]:
|
|
959
|
+
for p in prb["active_list"]:
|
|
960
|
+
lines.append(
|
|
961
|
+
f"- **[{p['id']}]** {p['title']} ({p['status']}) @{p['managed_by']}"
|
|
962
|
+
)
|
|
963
|
+
else:
|
|
964
|
+
lines.append("*No active problems*")
|
|
965
|
+
lines.append("")
|
|
966
|
+
|
|
967
|
+
# Changes
|
|
968
|
+
chg = status["changes"]
|
|
969
|
+
lines.append(f"## Pending Changes ({chg['pending']})")
|
|
970
|
+
lines.append("")
|
|
971
|
+
if chg["pending_list"]:
|
|
972
|
+
for c in chg["pending_list"]:
|
|
973
|
+
lines.append(
|
|
974
|
+
f"- **[{c['id']}]** {c['title']} ({c['status']}, "
|
|
975
|
+
f"{c['change_type']}) @{c['managed_by']}"
|
|
976
|
+
)
|
|
977
|
+
else:
|
|
978
|
+
lines.append("*No pending changes*")
|
|
979
|
+
lines.append("")
|
|
980
|
+
|
|
981
|
+
# KEDB
|
|
982
|
+
lines.append(f"## Known Errors ({status['kedb']['total']})")
|
|
983
|
+
lines.append("")
|
|
984
|
+
|
|
985
|
+
return "\n".join(lines)
|
|
986
|
+
|
|
987
|
+
def write_board_md(self) -> Path:
|
|
988
|
+
"""Write ITIL-BOARD.md to the ITIL directory."""
|
|
989
|
+
self.ensure_dirs()
|
|
990
|
+
content = self.generate_board_md()
|
|
991
|
+
path = self.itil_dir / "ITIL-BOARD.md"
|
|
992
|
+
path.write_text(content, encoding="utf-8")
|
|
993
|
+
return path
|
|
994
|
+
|
|
995
|
+
# ── GTD integration helpers ───────────────────────────────────────
|
|
996
|
+
|
|
997
|
+
def _create_gtd_item_for_incident(self, incident: Incident) -> Optional[str]:
|
|
998
|
+
"""Auto-create a GTD inbox/next-action item for an incident."""
|
|
999
|
+
try:
|
|
1000
|
+
from .mcp_tools.gtd_tools import _make_item, _load_list, _save_list
|
|
1001
|
+
|
|
1002
|
+
priority_map = {"sev1": "critical", "sev2": "high", "sev3": "medium", "sev4": "low"}
|
|
1003
|
+
priority = priority_map.get(incident.severity.value, "medium")
|
|
1004
|
+
|
|
1005
|
+
text = f"[ITIL:{incident.id}] {incident.title}"
|
|
1006
|
+
item = _make_item(text=text, source="itil", context="@ops")
|
|
1007
|
+
item["priority"] = priority
|
|
1008
|
+
|
|
1009
|
+
if incident.severity.value in ("sev1", "sev2"):
|
|
1010
|
+
# Urgent: go straight to next-actions
|
|
1011
|
+
item["status"] = "next"
|
|
1012
|
+
items = _load_list("next-actions")
|
|
1013
|
+
items.append(item)
|
|
1014
|
+
_save_list("next-actions", items)
|
|
1015
|
+
else:
|
|
1016
|
+
# Minor: inbox for processing
|
|
1017
|
+
items = _load_list("inbox")
|
|
1018
|
+
items.append(item)
|
|
1019
|
+
_save_list("inbox", items)
|
|
1020
|
+
|
|
1021
|
+
return item["id"]
|
|
1022
|
+
except Exception:
|
|
1023
|
+
logger.debug("Failed to create GTD item for incident %s", incident.id)
|
|
1024
|
+
return None
|
|
1025
|
+
|
|
1026
|
+
def _create_gtd_project_for_problem(self, problem: Problem) -> Optional[str]:
|
|
1027
|
+
"""Auto-create a GTD project for a problem investigation."""
|
|
1028
|
+
try:
|
|
1029
|
+
from .mcp_tools.gtd_tools import _make_item, _load_list, _save_list
|
|
1030
|
+
|
|
1031
|
+
text = f"[ITIL:{problem.id}] Investigate: {problem.title}"
|
|
1032
|
+
item = _make_item(text=text, source="itil", context="@ops")
|
|
1033
|
+
item["status"] = "project"
|
|
1034
|
+
|
|
1035
|
+
projects = _load_list("projects")
|
|
1036
|
+
projects.append(item)
|
|
1037
|
+
_save_list("projects", projects)
|
|
1038
|
+
|
|
1039
|
+
return item["id"]
|
|
1040
|
+
except Exception:
|
|
1041
|
+
logger.debug("Failed to create GTD project for problem %s", problem.id)
|
|
1042
|
+
return None
|
|
1043
|
+
|
|
1044
|
+
def _create_gtd_item_for_change(self, change: Change) -> Optional[str]:
|
|
1045
|
+
"""Auto-create a GTD next-action for an approved change."""
|
|
1046
|
+
try:
|
|
1047
|
+
from .mcp_tools.gtd_tools import _make_item, _load_list, _save_list
|
|
1048
|
+
|
|
1049
|
+
text = f"[ITIL:{change.id}] Implement: {change.title}"
|
|
1050
|
+
item = _make_item(text=text, source="itil", context="@ops")
|
|
1051
|
+
item["status"] = "next"
|
|
1052
|
+
item["priority"] = "high"
|
|
1053
|
+
|
|
1054
|
+
items = _load_list("next-actions")
|
|
1055
|
+
items.append(item)
|
|
1056
|
+
_save_list("next-actions", items)
|
|
1057
|
+
|
|
1058
|
+
change.gtd_item_ids.append(item["id"])
|
|
1059
|
+
return item["id"]
|
|
1060
|
+
except Exception:
|
|
1061
|
+
logger.debug("Failed to create GTD item for change %s", change.id)
|
|
1062
|
+
return None
|
|
1063
|
+
|
|
1064
|
+
def _complete_gtd_items(self, gtd_item_ids: list[str]) -> None:
|
|
1065
|
+
"""Mark linked GTD items as done when an incident is resolved."""
|
|
1066
|
+
try:
|
|
1067
|
+
from .mcp_tools.gtd_tools import (
|
|
1068
|
+
_find_item_across_lists,
|
|
1069
|
+
_remove_item_from_list,
|
|
1070
|
+
_load_archive,
|
|
1071
|
+
_save_archive,
|
|
1072
|
+
)
|
|
1073
|
+
|
|
1074
|
+
for item_id in gtd_item_ids:
|
|
1075
|
+
source_list, item, _ = _find_item_across_lists(item_id)
|
|
1076
|
+
if source_list and item:
|
|
1077
|
+
_remove_item_from_list(source_list, item_id)
|
|
1078
|
+
item["status"] = "done"
|
|
1079
|
+
item["completed_at"] = _now_iso()
|
|
1080
|
+
archive = _load_archive()
|
|
1081
|
+
archive.append(item)
|
|
1082
|
+
_save_archive(archive)
|
|
1083
|
+
except Exception:
|
|
1084
|
+
logger.debug("Failed to complete GTD items: %s", gtd_item_ids)
|
|
1085
|
+
|
|
1086
|
+
# ── PubSub helper ─────────────────────────────────────────────────
|
|
1087
|
+
|
|
1088
|
+
def _publish_event(self, topic: str, payload: dict) -> None:
|
|
1089
|
+
"""Publish an ITIL event via PubSub (best-effort)."""
|
|
1090
|
+
try:
|
|
1091
|
+
from .pubsub import PubSub
|
|
1092
|
+
|
|
1093
|
+
agent_name = payload.get("managed_by", "itil-system")
|
|
1094
|
+
bus = PubSub(self.home, agent_name=agent_name)
|
|
1095
|
+
bus.publish(topic, payload, ttl_seconds=86400)
|
|
1096
|
+
except Exception:
|
|
1097
|
+
logger.debug("Failed to publish event %s", topic)
|
|
1098
|
+
|
|
1099
|
+
# Also push to activity bus
|
|
1100
|
+
try:
|
|
1101
|
+
from . import activity
|
|
1102
|
+
activity.push(topic, payload)
|
|
1103
|
+
except Exception:
|
|
1104
|
+
pass
|