rootecho 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rootecho/__init__.py +3 -0
- rootecho/__main__.py +6 -0
- rootecho/cli.py +284 -0
- rootecho/core.py +173 -0
- rootecho/store.py +56 -0
- rootecho-0.1.0.dist-info/METADATA +130 -0
- rootecho-0.1.0.dist-info/RECORD +11 -0
- rootecho-0.1.0.dist-info/WHEEL +5 -0
- rootecho-0.1.0.dist-info/entry_points.txt +2 -0
- rootecho-0.1.0.dist-info/licenses/LICENSE +21 -0
- rootecho-0.1.0.dist-info/top_level.txt +1 -0
rootecho/__init__.py
ADDED
rootecho/__main__.py
ADDED
rootecho/cli.py
ADDED
|
@@ -0,0 +1,284 @@
|
|
|
1
|
+
"""rootecho command-line interface."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import math
|
|
5
|
+
import os
|
|
6
|
+
import re
|
|
7
|
+
import sys
|
|
8
|
+
from datetime import datetime, timezone
|
|
9
|
+
|
|
10
|
+
from . import core, store
|
|
11
|
+
from .core import now_ms
|
|
12
|
+
|
|
13
|
+
_NUM_RE = re.compile(r"^-?\d+(\.\d+)?$")
|
|
14
|
+
_CTRL_RE = re.compile(r"[\x00-\x1f\x7f]")
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def sanitize(s):
|
|
18
|
+
"""Strip C0 control characters (including ESC, the start of ANSI escape
|
|
19
|
+
sequences) and DEL from incident-supplied text before it hits the
|
|
20
|
+
terminal — an incident.json (or a shared history.jsonl entry someone
|
|
21
|
+
else wrote) is untrusted input, and its id/title/description fields are
|
|
22
|
+
printed verbatim."""
|
|
23
|
+
return _CTRL_RE.sub("", "" if s is None else str(s))
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def _json_number(x):
|
|
27
|
+
"""Render a whole-number float as an int before JSON-encoding it, so
|
|
28
|
+
Python's json module doesn't emit a trailing ``.0`` where Node's
|
|
29
|
+
JSON.stringify wouldn't (e.g. an exact-match similarity score of 1.0 vs
|
|
30
|
+
1) — the two builds must produce byte-identical --json output."""
|
|
31
|
+
if isinstance(x, float) and x.is_integer():
|
|
32
|
+
return int(x)
|
|
33
|
+
return x
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _round_half_up(x):
|
|
37
|
+
"""Round-half-away-from-zero, matching JS's Math.round — Python's builtin
|
|
38
|
+
round() uses banker's rounding (round-half-to-even), which disagrees with
|
|
39
|
+
Math.round on exact .5 boundaries (round(12.5) == 12 in Python but
|
|
40
|
+
Math.round(12.5) === 13 in JS)."""
|
|
41
|
+
return math.floor(x + 0.5)
|
|
42
|
+
|
|
43
|
+
VERSION = "0.1.0"
|
|
44
|
+
|
|
45
|
+
# ---- tiny color helpers (no dep) ----
|
|
46
|
+
_COLOR = sys.stdout.isatty() and not os.environ.get("NO_COLOR")
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def _c(code, s):
|
|
50
|
+
return f"\x1b[{code}m{s}\x1b[0m" if _COLOR else s
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def red(s): return _c("31", s)
|
|
54
|
+
def green(s): return _c("32", s)
|
|
55
|
+
def yellow(s): return _c("33", s)
|
|
56
|
+
def dim(s): return _c("2", s)
|
|
57
|
+
def bold(s): return _c("1", s)
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
HELP = f"""{bold('rootecho')} — catch recurring root causes across postmortems. Local, no server.
|
|
61
|
+
|
|
62
|
+
{bold('Record & compare')}
|
|
63
|
+
rootecho add <incident.json> Record a postmortem, flag if its root cause echoes a past one
|
|
64
|
+
rootecho check <incident.json> Same comparison without recording (CI gate: exit 1 on echo)
|
|
65
|
+
|
|
66
|
+
{bold('Inspect')}
|
|
67
|
+
rootecho list [--json] Show recorded incidents and their open action items
|
|
68
|
+
rootecho init [file] [--force] Write a starter incident.json template
|
|
69
|
+
|
|
70
|
+
{bold('Options')} --dir <path> --threshold <0-1, default 0.34> --json --force --version
|
|
71
|
+
|
|
72
|
+
{bold('Exit')} 0 no echo / ok 1 echo detected (check only) 2 usage or input error
|
|
73
|
+
"""
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def fail(msg):
|
|
77
|
+
sys.stderr.write(red(f"rootecho: {msg}\n"))
|
|
78
|
+
sys.exit(2)
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def flag(args, name):
|
|
82
|
+
"""Value after --name, or None. Won't return a token that itself looks
|
|
83
|
+
like a flag (e.g. `--dir --force` must not treat "--force" as the
|
|
84
|
+
directory)."""
|
|
85
|
+
if name in args:
|
|
86
|
+
i = args.index(name)
|
|
87
|
+
if i + 1 < len(args):
|
|
88
|
+
v = args[i + 1]
|
|
89
|
+
if not v.startswith("--"):
|
|
90
|
+
return v
|
|
91
|
+
return None
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def has(args, name):
|
|
95
|
+
return name in args
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
# ---- commands --------------------------------------------------------------
|
|
99
|
+
|
|
100
|
+
def read_incident(file):
|
|
101
|
+
if not file or file.startswith("-"):
|
|
102
|
+
fail("needs an <incident.json> path")
|
|
103
|
+
try:
|
|
104
|
+
with open(file, "r", encoding="utf-8") as f:
|
|
105
|
+
raw = f.read()
|
|
106
|
+
except (OSError, UnicodeDecodeError) as e:
|
|
107
|
+
fail(f'cannot read "{file}": {e}')
|
|
108
|
+
try:
|
|
109
|
+
incident = json.loads(raw)
|
|
110
|
+
except (ValueError, RecursionError) as e:
|
|
111
|
+
fail(f'"{file}" is not valid JSON: {e}')
|
|
112
|
+
errors = core.validate_incident(incident)
|
|
113
|
+
if errors:
|
|
114
|
+
fail(f'"{file}" is missing required fields:\n - ' + "\n - ".join(errors))
|
|
115
|
+
if not incident.get("date"):
|
|
116
|
+
incident["date"] = datetime.now(timezone.utc).strftime("%Y-%m-%d")
|
|
117
|
+
return incident
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def print_matches(incident, matches, t):
|
|
121
|
+
if not matches:
|
|
122
|
+
sys.stdout.write(green(f"✓ no echo — \"{sanitize(incident['id'])}\" looks like a new root cause\n"))
|
|
123
|
+
return
|
|
124
|
+
sys.stdout.write(yellow(f"⚠ root cause echo detected for \"{sanitize(incident['id'])}\":\n"))
|
|
125
|
+
for m in matches:
|
|
126
|
+
past = m["incident"]
|
|
127
|
+
pct = _round_half_up(m["score"] * 100)
|
|
128
|
+
past_id = sanitize(past.get("id"))
|
|
129
|
+
past_date = sanitize(past.get("date"))
|
|
130
|
+
sys.stdout.write(f"\n {bold(past_id)} ({past_date}) — {pct}% similar root cause\n")
|
|
131
|
+
if past.get("title"):
|
|
132
|
+
sys.stdout.write(f" {dim(sanitize(past['title']))}\n")
|
|
133
|
+
items = core.summarize_action_items(past.get("action_items"), t)
|
|
134
|
+
if not items:
|
|
135
|
+
continue
|
|
136
|
+
for it in items:
|
|
137
|
+
mark = green("✓") if it["done"] else (red("✗") if it["overdue"] else yellow("○"))
|
|
138
|
+
label = sanitize(it.get("description") or it.get("id") or "(action item)")
|
|
139
|
+
suffix = ""
|
|
140
|
+
if not it["done"] and it["overdue"]:
|
|
141
|
+
suffix = red(f" — {it['overdueDays']}d overdue")
|
|
142
|
+
status_tag = dim("[" + sanitize(it["status"]) + "]")
|
|
143
|
+
sys.stdout.write(f" {mark} {label} {status_tag}{suffix}\n")
|
|
144
|
+
unfinished = [it for it in items if not it["done"]]
|
|
145
|
+
if unfinished:
|
|
146
|
+
sys.stdout.write(dim(f" → {len(unfinished)} action item(s) from this past incident were never finished.\n"))
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
def cmd_add_or_check(mode, args):
|
|
150
|
+
file = args[0] if args else None
|
|
151
|
+
opts = args[1:]
|
|
152
|
+
|
|
153
|
+
threshold = 0.34
|
|
154
|
+
if has(opts, "--threshold"):
|
|
155
|
+
raw = flag(opts, "--threshold")
|
|
156
|
+
if raw is None or not _NUM_RE.match(raw):
|
|
157
|
+
fail("--threshold must be a number between 0 and 1")
|
|
158
|
+
threshold = float(raw)
|
|
159
|
+
if threshold < 0 or threshold > 1:
|
|
160
|
+
fail("--threshold must be a number between 0 and 1")
|
|
161
|
+
dir_ = flag(opts, "--dir")
|
|
162
|
+
as_json = has(opts, "--json")
|
|
163
|
+
t = now_ms()
|
|
164
|
+
|
|
165
|
+
incident = read_incident(file)
|
|
166
|
+
history = store.load_history(dir_)
|
|
167
|
+
|
|
168
|
+
if mode == "add" and any(h.get("id") == incident["id"] for h in history) and not has(opts, "--force"):
|
|
169
|
+
fail(f"incident \"{incident['id']}\" is already recorded (use a different id, or --force to append a duplicate)")
|
|
170
|
+
|
|
171
|
+
matches = core.find_matches(incident, history, threshold)
|
|
172
|
+
|
|
173
|
+
if as_json:
|
|
174
|
+
out = {
|
|
175
|
+
"id": incident["id"],
|
|
176
|
+
"echoDetected": len(matches) > 0,
|
|
177
|
+
"matches": [
|
|
178
|
+
{
|
|
179
|
+
"id": m["incident"].get("id"),
|
|
180
|
+
"date": m["incident"].get("date"),
|
|
181
|
+
"score": _json_number(m["score"]),
|
|
182
|
+
"actionItems": core.summarize_action_items(m["incident"].get("action_items"), t),
|
|
183
|
+
}
|
|
184
|
+
for m in matches
|
|
185
|
+
],
|
|
186
|
+
}
|
|
187
|
+
sys.stdout.write(json.dumps(out, indent=2, ensure_ascii=False) + "\n")
|
|
188
|
+
else:
|
|
189
|
+
print_matches(incident, matches, t)
|
|
190
|
+
|
|
191
|
+
if mode == "add":
|
|
192
|
+
store.append_incident(incident, dir_)
|
|
193
|
+
if not as_json:
|
|
194
|
+
sys.stdout.write(dim(f"\nrecorded to {store.history_path(dir_)}\n"))
|
|
195
|
+
sys.exit(0)
|
|
196
|
+
sys.exit(1 if matches else 0)
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
def cmd_list(args):
|
|
200
|
+
dir_ = flag(args, "--dir")
|
|
201
|
+
as_json = has(args, "--json")
|
|
202
|
+
history = store.load_history(dir_)
|
|
203
|
+
t = now_ms()
|
|
204
|
+
|
|
205
|
+
if as_json:
|
|
206
|
+
rows = []
|
|
207
|
+
for inc in history:
|
|
208
|
+
open_count = sum(1 for it in core.summarize_action_items(inc.get("action_items"), t) if not it["done"])
|
|
209
|
+
rows.append({
|
|
210
|
+
"id": inc.get("id"),
|
|
211
|
+
"date": inc.get("date"),
|
|
212
|
+
"title": inc.get("title"),
|
|
213
|
+
"tags": core.normalize_tags(inc.get("root_cause_tags")),
|
|
214
|
+
"openActionItems": open_count,
|
|
215
|
+
})
|
|
216
|
+
sys.stdout.write(json.dumps(rows, indent=2, ensure_ascii=False) + "\n")
|
|
217
|
+
return
|
|
218
|
+
|
|
219
|
+
if not history:
|
|
220
|
+
sys.stdout.write(dim("no incidents recorded yet. Track one with: rootecho add <incident.json>\n"))
|
|
221
|
+
return
|
|
222
|
+
for inc in history:
|
|
223
|
+
open_count = sum(1 for it in core.summarize_action_items(inc.get("action_items"), t) if not it["done"])
|
|
224
|
+
open_str = yellow(f"{open_count} open") if open_count > 0 else dim("0 open")
|
|
225
|
+
tags = ", ".join(core.normalize_tags(inc.get("root_cause_tags")))
|
|
226
|
+
inc_id = sanitize(inc.get("id"))
|
|
227
|
+
inc_date = sanitize(inc.get("date")) or "?"
|
|
228
|
+
sys.stdout.write(f"{bold(inc_id.ljust(20))} {dim(inc_date)} {open_str.ljust(18)} {dim(tags)}\n")
|
|
229
|
+
|
|
230
|
+
|
|
231
|
+
def _template():
|
|
232
|
+
return {
|
|
233
|
+
"id": "INC-YYYY-NNN",
|
|
234
|
+
"date": datetime.now(timezone.utc).strftime("%Y-%m-%d"),
|
|
235
|
+
"title": "Short incident title",
|
|
236
|
+
"service": "service-name",
|
|
237
|
+
"severity": "sev2",
|
|
238
|
+
"root_cause": "Describe what actually caused it, not just the symptom.",
|
|
239
|
+
"root_cause_tags": ["tag-one", "tag-two"],
|
|
240
|
+
"action_items": [
|
|
241
|
+
{"id": "AI-1", "description": "What will prevent this from recurring", "owner": "someone", "status": "open", "due_date": None},
|
|
242
|
+
],
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
|
|
246
|
+
def cmd_init(args):
|
|
247
|
+
file = args[0] if args and not args[0].startswith("--") else "incident.json"
|
|
248
|
+
if os.path.exists(file) and not has(args, "--force"):
|
|
249
|
+
fail(f'"{file}" already exists (use --force to overwrite)')
|
|
250
|
+
with open(file, "w", encoding="utf-8") as f:
|
|
251
|
+
f.write(json.dumps(_template(), indent=2, ensure_ascii=False) + "\n")
|
|
252
|
+
sys.stdout.write(green(f"✓ wrote template to {file}\n"))
|
|
253
|
+
|
|
254
|
+
|
|
255
|
+
def main():
|
|
256
|
+
argv = sys.argv[1:]
|
|
257
|
+
if not argv or argv[0] in ("-h", "--help"):
|
|
258
|
+
sys.stdout.write(HELP)
|
|
259
|
+
return 0
|
|
260
|
+
if argv[0] in ("-v", "--version"):
|
|
261
|
+
sys.stdout.write(VERSION + "\n")
|
|
262
|
+
return 0
|
|
263
|
+
|
|
264
|
+
command, rest = argv[0], argv[1:]
|
|
265
|
+
try:
|
|
266
|
+
if command == "add":
|
|
267
|
+
cmd_add_or_check("add", rest)
|
|
268
|
+
elif command == "check":
|
|
269
|
+
cmd_add_or_check("check", rest)
|
|
270
|
+
elif command == "list":
|
|
271
|
+
cmd_list(rest)
|
|
272
|
+
elif command == "init":
|
|
273
|
+
cmd_init(rest)
|
|
274
|
+
else:
|
|
275
|
+
fail(f"unknown command: {command} (try --help)")
|
|
276
|
+
except SystemExit:
|
|
277
|
+
raise
|
|
278
|
+
except Exception as e:
|
|
279
|
+
fail(str(e))
|
|
280
|
+
return 0
|
|
281
|
+
|
|
282
|
+
|
|
283
|
+
if __name__ == "__main__":
|
|
284
|
+
sys.exit(main())
|
rootecho/core.py
ADDED
|
@@ -0,0 +1,173 @@
|
|
|
1
|
+
"""rootecho core — pure root-cause similarity + action-item health logic.
|
|
2
|
+
|
|
3
|
+
No fs, no clock, no network. Timestamps are epoch **milliseconds** to match
|
|
4
|
+
the Node implementation byte-for-byte (both read/write the same
|
|
5
|
+
``.rootecho/history.jsonl``).
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import re
|
|
9
|
+
import time
|
|
10
|
+
|
|
11
|
+
DAY_MS = 86_400_000
|
|
12
|
+
|
|
13
|
+
# Common English filler words stripped before free-text comparison so two
|
|
14
|
+
# unrelated incidents that both happen to say "the service was down" don't
|
|
15
|
+
# score a false match on stopwords alone.
|
|
16
|
+
STOPWORDS = {
|
|
17
|
+
"a", "an", "the", "to", "of", "in", "on", "for", "and", "or", "is", "was",
|
|
18
|
+
"were", "be", "been", "with", "that", "this", "it", "as", "at", "by",
|
|
19
|
+
"from", "which", "when", "due", "because", "not", "no", "did", "does",
|
|
20
|
+
"do", "after", "before", "during", "then", "than", "into", "out",
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
_WORD_RE = re.compile(r"[^a-z0-9]+")
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def now_ms() -> int:
|
|
27
|
+
"""Current epoch time in milliseconds (matches JS ``Date.now()``)."""
|
|
28
|
+
return int(time.time() * 1000)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def tokenize(text) -> list:
|
|
32
|
+
"""Lowercase, strip punctuation, split on whitespace, drop stopwords and
|
|
33
|
+
single-character tokens."""
|
|
34
|
+
if not text:
|
|
35
|
+
return []
|
|
36
|
+
words = _WORD_RE.sub(" ", str(text).lower()).strip().split()
|
|
37
|
+
return [w for w in words if len(w) > 1 and w not in STOPWORDS]
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def normalize_tags(tags) -> list:
|
|
41
|
+
"""Lowercase + trim + dedupe a tag list (order preserved). Non-list input
|
|
42
|
+
becomes []; entries that aren't already str/int/float are dropped (not
|
|
43
|
+
stringified) so a stray ``None``/object in the list can't turn into a
|
|
44
|
+
bogus "none" token that two unrelated incidents could spuriously share.
|
|
45
|
+
Booleans are excluded even though ``bool`` is an ``int`` subclass, to
|
|
46
|
+
match the JS implementation (``typeof true !== 'number'``)."""
|
|
47
|
+
if not isinstance(tags, list):
|
|
48
|
+
return []
|
|
49
|
+
seen = set()
|
|
50
|
+
out = []
|
|
51
|
+
for t in tags:
|
|
52
|
+
if isinstance(t, bool) or not isinstance(t, (str, int, float)):
|
|
53
|
+
continue
|
|
54
|
+
v = str(t).strip().lower()
|
|
55
|
+
if v and v not in seen:
|
|
56
|
+
seen.add(v)
|
|
57
|
+
out.append(v)
|
|
58
|
+
return out
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def jaccard(a, b) -> float:
|
|
62
|
+
"""Jaccard similarity between two token/tag lists: |intersection| / |union|.
|
|
63
|
+
Two empty sets score 0 (no signal, not a match)."""
|
|
64
|
+
set_a, set_b = set(a), set(b)
|
|
65
|
+
if not set_a and not set_b:
|
|
66
|
+
return 0.0
|
|
67
|
+
inter = len(set_a & set_b)
|
|
68
|
+
union = len(set_a) + len(set_b) - inter
|
|
69
|
+
return 0.0 if union == 0 else inter / union
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def similarity(a: dict, b: dict) -> float:
|
|
73
|
+
"""Similarity score between two incidents' root causes, 0..1.
|
|
74
|
+
|
|
75
|
+
``root_cause_tags`` is the primary signal (curated, low-noise) when both
|
|
76
|
+
incidents have tags — blended 70/30 with free-text overlap. If either
|
|
77
|
+
side lacks tags, falls back to free-text ``root_cause`` overlap alone.
|
|
78
|
+
"""
|
|
79
|
+
tags_a = normalize_tags(a.get("root_cause_tags"))
|
|
80
|
+
tags_b = normalize_tags(b.get("root_cause_tags"))
|
|
81
|
+
text_score = jaccard(tokenize(a.get("root_cause")), tokenize(b.get("root_cause")))
|
|
82
|
+
|
|
83
|
+
if tags_a and tags_b:
|
|
84
|
+
tag_score = jaccard(tags_a, tags_b)
|
|
85
|
+
return tag_score * 0.7 + text_score * 0.3
|
|
86
|
+
return text_score
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def find_matches(incident: dict, history: list, threshold: float = 0.34) -> list:
|
|
90
|
+
"""Find past incidents whose root cause echoes ``incident``'s, sorted by
|
|
91
|
+
score (desc), then by date (desc, most recent first). Excludes an
|
|
92
|
+
incident from matching itself by id.
|
|
93
|
+
|
|
94
|
+
Returns a list of ``{"incident": ..., "score": ...}`` dicts.
|
|
95
|
+
"""
|
|
96
|
+
candidates = [
|
|
97
|
+
{"incident": past, "score": similarity(incident, past)}
|
|
98
|
+
for past in history
|
|
99
|
+
if past.get("id") != incident.get("id")
|
|
100
|
+
]
|
|
101
|
+
matches = [m for m in candidates if m["score"] >= threshold]
|
|
102
|
+
matches.sort(key=lambda m: (m["score"], str(m["incident"].get("date", ""))), reverse=True)
|
|
103
|
+
return matches
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def action_item_health(item: dict, now: int) -> dict:
|
|
107
|
+
"""Compute one action item's health at time ``now`` (ms)."""
|
|
108
|
+
status = item.get("status") or "open"
|
|
109
|
+
done = status in ("done", "cancelled")
|
|
110
|
+
overdue_days = 0
|
|
111
|
+
due_date = item.get("due_date")
|
|
112
|
+
if not done and due_date:
|
|
113
|
+
due = _parse_date_ms(due_date)
|
|
114
|
+
if due is not None and now > due:
|
|
115
|
+
overdue_days = (now - due) // DAY_MS
|
|
116
|
+
return {**item, "status": status, "done": done, "overdue": overdue_days > 0, "overdueDays": overdue_days}
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def summarize_action_items(items, now: int) -> list:
|
|
120
|
+
"""Map ``action_item_health`` over a list, tolerating a missing/non-list
|
|
121
|
+
input. Non-dict entries (e.g. a stray ``None`` or string from a
|
|
122
|
+
hand-edited file) are dropped rather than crashing — same "one bad entry
|
|
123
|
+
doesn't take down the rest" tolerance as history-file loading."""
|
|
124
|
+
if not isinstance(items, list):
|
|
125
|
+
return []
|
|
126
|
+
return [action_item_health(it, now) for it in items if isinstance(it, dict)]
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def validate_incident(incident) -> list:
|
|
130
|
+
"""Validate the minimal shape rootecho needs from an incident record.
|
|
131
|
+
Returns human-readable problems; empty list = valid."""
|
|
132
|
+
if not isinstance(incident, dict):
|
|
133
|
+
return ["incident must be a JSON object"]
|
|
134
|
+
errors = []
|
|
135
|
+
if not incident.get("id") or not isinstance(incident.get("id"), str):
|
|
136
|
+
errors.append('missing "id" (string)')
|
|
137
|
+
tags = incident.get("root_cause_tags")
|
|
138
|
+
has_tags = isinstance(tags, list) and len(tags) > 0
|
|
139
|
+
text = incident.get("root_cause")
|
|
140
|
+
has_text = isinstance(text, str) and text.strip() != ""
|
|
141
|
+
if not has_tags and not has_text:
|
|
142
|
+
errors.append('missing both "root_cause" and "root_cause_tags" — need at least one')
|
|
143
|
+
return errors
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
# Strict ISO 8601 date/datetime, UTC only (no numeric offsets — unsupported
|
|
147
|
+
# for now, see README). Matches YYYY-MM-DD or YYYY-MM-DD[T ]HH:MM:SS[.sss][Z].
|
|
148
|
+
# Deliberately narrower than (and independent of) datetime.fromisoformat,
|
|
149
|
+
# whose accepted grammar varies across Python 3.8-3.11+ and diverges from JS
|
|
150
|
+
# Date.parse — both implementations must agree on exactly which due_date
|
|
151
|
+
# strings are valid, or one silently computes "not overdue" for a date the
|
|
152
|
+
# other correctly flags.
|
|
153
|
+
_ISO_DATE_RE = re.compile(
|
|
154
|
+
r"^(\d{4})-(\d{2})-(\d{2})(?:[T ](\d{2}):(\d{2}):(\d{2})(?:\.\d+)?Z?)?$"
|
|
155
|
+
)
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
def _parse_date_ms(value):
|
|
159
|
+
"""Parse a strict ISO 8601 UTC date/datetime string to epoch ms, or None
|
|
160
|
+
if ``value`` isn't a string in the accepted shape or isn't a real
|
|
161
|
+
calendar date."""
|
|
162
|
+
if not isinstance(value, str):
|
|
163
|
+
return None
|
|
164
|
+
m = _ISO_DATE_RE.match(value.strip())
|
|
165
|
+
if not m:
|
|
166
|
+
return None
|
|
167
|
+
y, mo, d, h, mi, s = m.groups()
|
|
168
|
+
from datetime import datetime, timezone
|
|
169
|
+
try:
|
|
170
|
+
dt = datetime(int(y), int(mo), int(d), int(h or 0), int(mi or 0), int(s or 0), tzinfo=timezone.utc)
|
|
171
|
+
except ValueError:
|
|
172
|
+
return None # e.g. month=13, day=32, Feb 30 — not a real calendar date
|
|
173
|
+
return int(dt.timestamp() * 1000)
|
rootecho/store.py
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
"""rootecho persistence.
|
|
2
|
+
|
|
3
|
+
History is a JSON-Lines file (one incident object per line) under a
|
|
4
|
+
project-local directory, default ``.rootecho/`` in the current working
|
|
5
|
+
directory — meant to be committed to the team's repo so the whole team
|
|
6
|
+
shares (and diffs) incident history, unlike a per-machine dotfile. Override
|
|
7
|
+
with ``$ROOTECHO_HOME`` for tests or a non-default layout. The on-disk format
|
|
8
|
+
is identical to the Node implementation.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
import json
|
|
12
|
+
import os
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def default_dir() -> Path:
|
|
17
|
+
return Path(os.environ.get("ROOTECHO_HOME") or (Path.cwd() / ".rootecho"))
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def history_path(dir_=None) -> Path:
|
|
21
|
+
return Path(dir_ or default_dir()) / "history.jsonl"
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def load_history(dir_=None) -> list:
|
|
25
|
+
"""Load all incidents from the history file. Corrupt/blank lines —
|
|
26
|
+
including lines that are syntactically valid JSON but not an object
|
|
27
|
+
(``null``, a bare number, a string, an array) — are skipped rather than
|
|
28
|
+
failing the whole load — one bad line (e.g. a botched manual edit or
|
|
29
|
+
merge) shouldn't lose the rest of the team's history."""
|
|
30
|
+
p = history_path(dir_)
|
|
31
|
+
if not p.exists():
|
|
32
|
+
return []
|
|
33
|
+
out = []
|
|
34
|
+
for line in p.read_text(encoding="utf-8").split("\n"):
|
|
35
|
+
t = line.strip()
|
|
36
|
+
if not t:
|
|
37
|
+
continue
|
|
38
|
+
try:
|
|
39
|
+
parsed = json.loads(t)
|
|
40
|
+
except ValueError:
|
|
41
|
+
continue # skip corrupt line
|
|
42
|
+
if isinstance(parsed, dict):
|
|
43
|
+
out.append(parsed)
|
|
44
|
+
return out
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def append_incident(incident: dict, dir_=None) -> None:
|
|
48
|
+
"""Append one incident record as a new line. Creates the directory/file
|
|
49
|
+
if needed. ``ensure_ascii=False`` so non-ASCII text (e.g. Chinese in a
|
|
50
|
+
title) is written as raw UTF-8 — matching Node's ``JSON.stringify``,
|
|
51
|
+
which never escapes it — so the shared history.jsonl is byte-identical
|
|
52
|
+
regardless of which build wrote a given line."""
|
|
53
|
+
d = Path(dir_ or default_dir())
|
|
54
|
+
d.mkdir(parents=True, exist_ok=True)
|
|
55
|
+
with open(history_path(d), "a", encoding="utf-8") as f:
|
|
56
|
+
f.write(json.dumps(incident, ensure_ascii=False) + "\n")
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: rootecho
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Catch recurring root causes across postmortems. Zero dependencies, no server.
|
|
5
|
+
Author: yyfjj
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/jjdoor/rootecho-py
|
|
8
|
+
Project-URL: Repository, https://github.com/jjdoor/rootecho-py
|
|
9
|
+
Project-URL: Issues, https://github.com/jjdoor/rootecho-py/issues
|
|
10
|
+
Keywords: postmortem,incident,sre,devops,root-cause,cli,incident-response,reliability,on-call
|
|
11
|
+
Classifier: Development Status :: 4 - Beta
|
|
12
|
+
Classifier: Environment :: Console
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: Intended Audience :: System Administrators
|
|
15
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
16
|
+
Classifier: Operating System :: POSIX
|
|
17
|
+
Classifier: Operating System :: MacOS
|
|
18
|
+
Classifier: Programming Language :: Python :: 3
|
|
19
|
+
Classifier: Topic :: System :: Monitoring
|
|
20
|
+
Classifier: Topic :: Utilities
|
|
21
|
+
Requires-Python: >=3.8
|
|
22
|
+
Description-Content-Type: text/markdown
|
|
23
|
+
License-File: LICENSE
|
|
24
|
+
Dynamic: license-file
|
|
25
|
+
|
|
26
|
+
# rootecho
|
|
27
|
+
|
|
28
|
+
**Catch recurring root causes across postmortems.** Heavyweight incident
|
|
29
|
+
platforms (rootly, incident.io) flag when a new incident shares a root cause
|
|
30
|
+
with a past one — teams without that budget just... don't find out, until the
|
|
31
|
+
same failure bites twice. `rootecho` does the comparison locally: no account,
|
|
32
|
+
no server, your incident history lives in your repo.
|
|
33
|
+
|
|
34
|
+
```bash
|
|
35
|
+
pip install rootecho
|
|
36
|
+
rootecho init incident.json # scaffold a postmortem
|
|
37
|
+
rootecho add incident.json # record it, flag any echo of a past root cause
|
|
38
|
+
rootecho check incident.json # same check, no recording — use as a CI gate
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
This is the Python build — a Node build (`npx rootecho`) exists too and reads
|
|
42
|
+
the exact same `.rootecho/history.jsonl`, so a team split across both
|
|
43
|
+
ecosystems shares one history.
|
|
44
|
+
|
|
45
|
+
## Why
|
|
46
|
+
|
|
47
|
+
> "We use rootly to track this automatically. It flags when incidents have the
|
|
48
|
+
> same root cause as previous ones."
|
|
49
|
+
|
|
50
|
+
That's a paid, hosted feature. For everyone else, a postmortem gets written,
|
|
51
|
+
action items get filed, and six months later the exact same root cause causes
|
|
52
|
+
the exact same outage — because nobody had a system for "hey, we've seen this
|
|
53
|
+
before, and last time's fix never shipped." `rootecho` is that system, as a
|
|
54
|
+
zero-dependency local CLI.
|
|
55
|
+
|
|
56
|
+
## How it works
|
|
57
|
+
|
|
58
|
+
1. **Each postmortem is one JSON record** — `root_cause` (free text) and/or
|
|
59
|
+
`root_cause_tags` (curated labels), plus `action_items` with a status.
|
|
60
|
+
2. **`add`/`check` compare it against your history** using Jaccard similarity
|
|
61
|
+
over tags (primary signal) blended with free-text overlap (secondary). No
|
|
62
|
+
ML dependency, no network call.
|
|
63
|
+
3. **A match above the threshold prints the past incident's action items** —
|
|
64
|
+
so you see immediately whether last time's fix ever actually shipped.
|
|
65
|
+
|
|
66
|
+
## Incident format
|
|
67
|
+
|
|
68
|
+
```json
|
|
69
|
+
{
|
|
70
|
+
"id": "INC-2026-014",
|
|
71
|
+
"date": "2026-07-03",
|
|
72
|
+
"title": "Payment webhook retries exhausted",
|
|
73
|
+
"root_cause": "webhook retry queue misconfigured to drop after 3 attempts, no dead-letter fallback",
|
|
74
|
+
"root_cause_tags": ["webhook", "retry-queue", "dead-letter", "config"],
|
|
75
|
+
"action_items": [
|
|
76
|
+
{ "id": "AI-1", "description": "Add dead-letter queue for webhook retries", "owner": "alice", "status": "open", "due_date": "2026-07-20" }
|
|
77
|
+
]
|
|
78
|
+
}
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
Only `id` and one of `root_cause`/`root_cause_tags` are required. `rootecho init`
|
|
82
|
+
writes a starter file.
|
|
83
|
+
|
|
84
|
+
## Example
|
|
85
|
+
|
|
86
|
+
```
|
|
87
|
+
$ rootecho add inc-2026-014.json
|
|
88
|
+
⚠ root cause echo detected for "INC-2026-014":
|
|
89
|
+
|
|
90
|
+
INC-2026-003 (2026-03-15) — 100% similar root cause
|
|
91
|
+
Payment webhook retries exhausted
|
|
92
|
+
✓ Add retry backoff [done]
|
|
93
|
+
✗ Add monitoring alert for queue depth [open] — 93d overdue
|
|
94
|
+
→ 1 action item(s) from this past incident were never finished.
|
|
95
|
+
|
|
96
|
+
recorded to .rootecho/history.jsonl
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
## Commands
|
|
100
|
+
|
|
101
|
+
```bash
|
|
102
|
+
rootecho add <file> # record + compare (always exits 0 on success)
|
|
103
|
+
rootecho check <file> # compare only, no recording — exit 1 if an echo is found
|
|
104
|
+
rootecho list [--json] # show recorded incidents and open action-item counts
|
|
105
|
+
rootecho init [file] # write a starter incident.json
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
Flags: `--dir <path>` (state location), `--threshold <0-1>` (default `0.34`,
|
|
109
|
+
lower = more sensitive), `--json`, `--force` (init: overwrite; add: allow a
|
|
110
|
+
duplicate id).
|
|
111
|
+
|
|
112
|
+
## Storage
|
|
113
|
+
|
|
114
|
+
History is a JSON-Lines file at `.rootecho/history.jsonl`, **local to your
|
|
115
|
+
project by default** (not your home directory) — the idea is your team commits
|
|
116
|
+
it alongside the postmortems it describes, so `git blame`/`git log` on the file
|
|
117
|
+
doubles as an incident timeline. Override the location with `--dir` or
|
|
118
|
+
`$ROOTECHO_HOME`.
|
|
119
|
+
|
|
120
|
+
## Exit codes
|
|
121
|
+
|
|
122
|
+
| Code | Meaning |
|
|
123
|
+
|------|---------|
|
|
124
|
+
| `0` | `add` succeeded, or `check` found no echo |
|
|
125
|
+
| `1` | `check` found an echo of a past root cause |
|
|
126
|
+
| `2` | error (bad args, invalid JSON, duplicate id) |
|
|
127
|
+
|
|
128
|
+
## License
|
|
129
|
+
|
|
130
|
+
MIT
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
rootecho/__init__.py,sha256=f6EDEmqApfxtOEKSbuzKXU_KjNWyzrsE0fIggS9FHAE,109
|
|
2
|
+
rootecho/__main__.py,sha256=4JMK66Wj4uLZTKbF-sT3LAxOsr6buig77PmOkJCRRxw,83
|
|
3
|
+
rootecho/cli.py,sha256=7fPANuK-bbVRYrnxDooRtGnHT7-k-3e8vAVVPhy_phI,10020
|
|
4
|
+
rootecho/core.py,sha256=zGRGFzfSGuQbOjBXp8YFOJHGX6m4D4C5gAGYBSxRBK4,6863
|
|
5
|
+
rootecho/store.py,sha256=yVETjf5AJYgZwgLXszz_Ox1pEPfX592ufNyyzX7tP5o,2107
|
|
6
|
+
rootecho-0.1.0.dist-info/licenses/LICENSE,sha256=f8E6uW6J-6eobJFY4XQY5f7oQm4PNWxzqzt6KjvyrbE,1078
|
|
7
|
+
rootecho-0.1.0.dist-info/METADATA,sha256=IjbNJeC1UK3DD_Tgvhh1kSBekI3sJVPHZifx4dOEHd8,4809
|
|
8
|
+
rootecho-0.1.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
|
|
9
|
+
rootecho-0.1.0.dist-info/entry_points.txt,sha256=eEkpFtfMQ2nQExgM1dCo48RrQt6BiN6yD9dOcACKy90,47
|
|
10
|
+
rootecho-0.1.0.dist-info/top_level.txt,sha256=DWM2R_EbeTKVDkId2Xv5js5eDdx-zfCayIxhAj4Jdg0,9
|
|
11
|
+
rootecho-0.1.0.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 rootecho contributors
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
rootecho
|