knowledge-worker 0.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
mygraph/audit.py ADDED
@@ -0,0 +1,137 @@
1
+ """
2
+ audit.py — structural verifier for external-query audits (e.g. copilot_response_audit.md).
3
+
4
+ No LLM. Just structural checks against the graph:
5
+ - confidence labels surfaced for non-high nodes?
6
+ - provenance (source:*) cited?
7
+ - completeness: did the response list all expected nodes for a typed question?
8
+
9
+ Outputs JSONL eval_record entries appended to eval_record.jsonl.
10
+
11
+ Usage:
12
+ python audit.py copilot_response_audit.md
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ import json
18
+ import re
19
+ import sys
20
+ from datetime import datetime, timezone
21
+ from pathlib import Path
22
+
23
+ from mygraph import Graph
24
+
25
+ HERE = Path(__file__).parent
26
+ EVAL_LOG = HERE / "eval_record.jsonl"
27
+
28
+
29
+ def parse_audit(md_text: str) -> list[dict]:
30
+ blocks = re.split(r'\n## \d+\.', md_text)[1:]
31
+ out = []
32
+ for b in blocks:
33
+ q = re.search(r'\*\*User:\*\*\s*(.+?)(?=\*\*Copilot:\*\*)', b, re.DOTALL)
34
+ r = re.search(r'\*\*Copilot:\*\*\s*(.+?)(?=\*\*Evaluation:\*\*|\Z)', b, re.DOTALL)
35
+ e = re.search(r'\*\*Evaluation:\*\*\s*(.+?)\Z', b, re.DOTALL)
36
+ out.append({
37
+ "question": q.group(1).strip() if q else "",
38
+ "response": r.group(1).strip() if r else "",
39
+ "self_eval": e.group(1).strip() if e else "",
40
+ })
41
+ return out
42
+
43
+
44
+ def referenced_nodes(text: str, g: Graph) -> list[str]:
45
+ ids = set(re.findall(r'\b([a-z]+:[a-z0-9-]+)\b', text))
46
+ low = text.lower()
47
+ for nid, n in g.nodes.items():
48
+ if len(n.label) > 4 and n.label.lower() in low:
49
+ ids.add(nid)
50
+ return sorted(i for i in ids if i in g.nodes)
51
+
52
+
53
+ def check_confidence(refs, g, resp):
54
+ issues = []
55
+ low = resp.lower()
56
+ for r in refs:
57
+ n = g.nodes[r]
58
+ if n.confidence != "high" and n.confidence not in low and "confidence" not in low:
59
+ issues.append(f"unflagged_{n.confidence}_confidence:{r}")
60
+ return issues
61
+
62
+
63
+ def check_provenance(refs, resp):
64
+ if not refs:
65
+ return []
66
+ return [] if "source:" in resp.lower() else ["no_source_cited"]
67
+
68
+
69
+ def check_completeness(question, refs, g, resp):
70
+ q = question.lower()
71
+ issues = []
72
+ typed_checks = [
73
+ ("goal", "goal"),
74
+ ("decid", "decision"),
75
+ ("idea", "idea"),
76
+ ("question", "question"),
77
+ ]
78
+ for keyword, node_type in typed_checks:
79
+ if keyword in q and "?" in q:
80
+ all_of = [nid for nid, n in g.nodes.items() if n.type == node_type]
81
+ if not all_of:
82
+ continue
83
+ listed = [r for r in refs if r in all_of]
84
+ if len(listed) < len(all_of):
85
+ issues.append(f"incomplete_{node_type}_listing:{len(listed)}/{len(all_of)}")
86
+ return issues
87
+
88
+
89
+ def audit(audit_path: Path) -> list[dict]:
90
+ g = Graph.load()
91
+ md = audit_path.read_text(encoding="utf-8")
92
+ blocks = parse_audit(md)
93
+ records = []
94
+ ts = datetime.now(timezone.utc).isoformat()
95
+ for i, b in enumerate(blocks, 1):
96
+ refs = referenced_nodes(b["response"], g)
97
+ misses = (
98
+ check_confidence(refs, g, b["response"])
99
+ + check_provenance(refs, b["response"])
100
+ + check_completeness(b["question"], refs, g, b["response"])
101
+ )
102
+ records.append({
103
+ "ts": ts,
104
+ "kind": "external_query",
105
+ "audit_source": audit_path.name,
106
+ "q_index": i,
107
+ "question": b["question"][:200],
108
+ "response_excerpt": b["response"][:300],
109
+ "self_eval": b["self_eval"][:200],
110
+ "referenced_nodes": refs,
111
+ "claude_verdict": "ok" if not misses else "miss",
112
+ "misses": misses,
113
+ })
114
+ with EVAL_LOG.open("a", encoding="utf-8") as f:
115
+ for r in records:
116
+ f.write(json.dumps(r) + "\n")
117
+ return records
118
+
119
+
120
+ def main(argv):
121
+ path = Path(argv[1]) if len(argv) > 1 else HERE / "copilot_response_audit.md"
122
+ if not path.exists():
123
+ print(f"Not found: {path}")
124
+ return 1
125
+ records = audit(path)
126
+ misses = [r for r in records if r["misses"]]
127
+ print(f"Wrote {len(records)} eval_records -> {EVAL_LOG}")
128
+ print(f" {len(misses)}/{len(records)} responses flagged with misses\n")
129
+ for r in misses:
130
+ print(f" Q{r['q_index']}: {r['question'][:60]}")
131
+ for m in r["misses"]:
132
+ print(f" - {m}")
133
+ return 0
134
+
135
+
136
+ if __name__ == "__main__":
137
+ sys.exit(main(sys.argv))
mygraph/check.py ADDED
@@ -0,0 +1,273 @@
1
+ """
2
+ check.py — v1 M2 offline health checks.
3
+
4
+ Subcommands (all write JSONL records to eval_record.jsonl):
5
+
6
+ --provenance hard invariant. Any node (except `source`) without a
7
+ MENTIONED_IN edge → kind: provenance_violation. Any
8
+ edge without source_id → same.
9
+ --stale-edges [--days N] edges with last_seen older than N days (default 90)
10
+ → kind: stale_candidate.
11
+ --pairs N pick N random non-adjacent node pairs, ask the LLM
12
+ "is X related to Y? if yes, by what predicate?".
13
+ Logs kind: relational_probe.
14
+ --source-candidates DIR read recent .md/.txt files in DIR; ask the LLM if
15
+ any look like Sources we should ingest. Logs
16
+ kind: source_candidate. Never auto-ingests.
17
+
18
+ Default (no subcommand): runs --provenance and --stale-edges. LLM-bound checks
19
+ use the configured Anthropic provider; they're skipped if no supported provider
20
+ env is present.
21
+ """
22
+
23
+ from __future__ import annotations
24
+
25
+ import random
26
+ import sys
27
+ from datetime import datetime, timedelta, timezone
28
+ from pathlib import Path
29
+
30
+ from mygraph import Graph
31
+ try:
32
+ from .anthropic_client import anthropic_configured, get_anthropic_client
33
+ from .eval_log import append as eval_append, append_many
34
+ except ImportError: # direct script execution
35
+ from anthropic_client import anthropic_configured, get_anthropic_client
36
+ from eval_log import append as eval_append, append_many
37
+
38
+
39
+ # ------------ provenance ------------------------------------------------------
40
+
41
+ def check_provenance(g: Graph) -> list[dict]:
42
+ """Return list of violation records (also appended to eval_record.jsonl)."""
43
+ violations = []
44
+ mentioned_node_ids = {e.src for e in g.edges if e.type == "MENTIONED_IN"}
45
+ mentioned_node_ids |= {e.dst for e in g.edges if e.type == "MENTIONED_IN"}
46
+ for nid, n in g.nodes.items():
47
+ if n.type == "source":
48
+ continue
49
+ if nid not in mentioned_node_ids:
50
+ violations.append({
51
+ "kind": "provenance_violation",
52
+ "subkind": "node_without_source",
53
+ "node_id": nid,
54
+ "node_type": n.type,
55
+ "label": n.label,
56
+ })
57
+ for i, e in enumerate(g.edges):
58
+ if not e.source_id:
59
+ violations.append({
60
+ "kind": "provenance_violation",
61
+ "subkind": "edge_without_source_id",
62
+ "edge_index": i,
63
+ "src": e.src, "dst": e.dst, "type": e.type,
64
+ })
65
+ append_many(violations)
66
+ return violations
67
+
68
+
69
+ # ------------ stale edges -----------------------------------------------------
70
+
71
+ def check_stale_edges(g: Graph, days: int = 90) -> list[dict]:
72
+ cutoff = datetime.now(timezone.utc) - timedelta(days=days)
73
+ stale = []
74
+ for i, e in enumerate(g.edges):
75
+ try:
76
+ ls = datetime.fromisoformat(e.last_seen)
77
+ except (ValueError, TypeError):
78
+ continue
79
+ if ls < cutoff:
80
+ stale.append({
81
+ "kind": "stale_candidate",
82
+ "edge_index": i,
83
+ "src": e.src, "dst": e.dst, "type": e.type,
84
+ "last_seen": e.last_seen,
85
+ "age_days": (datetime.now(timezone.utc) - ls).days,
86
+ })
87
+ append_many(stale)
88
+ return stale
89
+
90
+
91
+ # ------------ relational probe (LLM) -----------------------------------------
92
+
93
+ PAIR_PROMPT = """\
94
+ You are evaluating a personal knowledge graph. Two nodes are below. Decide:
95
+ (a) Are these conceptually related?
96
+ (b) If yes, what predicate name from this set best fits?
97
+ {edge_types}
98
+
99
+ Respond as a single JSON object with keys:
100
+ related : true | false
101
+ predicate : one of the predicates above, or null if related=false
102
+ rationale : one short sentence
103
+ confidence: high | medium | low
104
+
105
+ NODE A: id={a_id} type={a_type} label={a_label}
106
+ body: {a_body}
107
+ NODE B: id={b_id} type={b_type} label={b_label}
108
+ body: {b_body}
109
+ """
110
+
111
+
112
+ def _call_claude_json(prompt: str) -> dict | None:
113
+ """Lightweight Claude call returning parsed JSON; None if no LLM config."""
114
+ if not anthropic_configured():
115
+ return None
116
+ try:
117
+ client, config = get_anthropic_client()
118
+ except RuntimeError as e:
119
+ print(f"check: {e}; skipping LLM-bound checks.")
120
+ return None
121
+ import json as _json
122
+ resp = client.messages.create(
123
+ model=config.model, max_tokens=400,
124
+ messages=[{"role": "user", "content": prompt + "\n\nReturn ONLY JSON."}],
125
+ )
126
+ text = "".join(getattr(b, "text", "") for b in resp.content)
127
+ text = text.strip()
128
+ # be forgiving: strip ```json fences
129
+ if text.startswith("```"):
130
+ text = text.strip("`")
131
+ text = text.split("\n", 1)[1] if "\n" in text else text
132
+ text = text.rsplit("```", 1)[0] if text.endswith("```") else text
133
+ try:
134
+ return _json.loads(text)
135
+ except _json.JSONDecodeError:
136
+ return {"_raw": text, "_parse_error": True}
137
+
138
+
139
+ def check_pairs(g: Graph, k: int = 10) -> list[dict]:
140
+ from mygraph import EDGE_TYPES
141
+ if not anthropic_configured():
142
+ print("check --pairs: no Anthropic provider env configured; skipping.")
143
+ return []
144
+ adj = set()
145
+ for e in g.edges:
146
+ adj.add((e.src, e.dst))
147
+ adj.add((e.dst, e.src))
148
+ ids = [nid for nid, n in g.nodes.items() if n.type != "source"]
149
+ pairs = []
150
+ attempts = 0
151
+ while len(pairs) < k and attempts < k * 20:
152
+ attempts += 1
153
+ a, b = random.sample(ids, 2)
154
+ if (a, b) in adj:
155
+ continue
156
+ if (a, b) in pairs or (b, a) in pairs:
157
+ continue
158
+ pairs.append((a, b))
159
+ records = []
160
+ for a, b in pairs:
161
+ na, nb = g.nodes[a], g.nodes[b]
162
+ prompt = PAIR_PROMPT.format(
163
+ edge_types=", ".join(sorted(EDGE_TYPES)),
164
+ a_id=a, a_type=na.type, a_label=na.label, a_body=na.body[:200],
165
+ b_id=b, b_type=nb.type, b_label=nb.label, b_body=nb.body[:200],
166
+ )
167
+ result = _call_claude_json(prompt)
168
+ records.append({
169
+ "kind": "relational_probe",
170
+ "a_id": a, "b_id": b,
171
+ "claude_result": result,
172
+ })
173
+ append_many(records)
174
+ return records
175
+
176
+
177
+ # ------------ source candidacy (LLM) -----------------------------------------
178
+
179
+ SOURCE_CANDIDATE_PROMPT = """\
180
+ A markdown/text document is below. Decide whether it should be ingested as a
181
+ Source into the user's personal knowledge graph. Respond as JSON:
182
+
183
+ ingest_recommendation : "yes" | "no" | "maybe"
184
+ rationale : one short sentence
185
+ candidate_concepts : list of 1-5 plain-English concept labels you'd extract
186
+ if ingested (empty list if no)
187
+
188
+ DOCUMENT (filename: {fname}):
189
+ ---
190
+ {content}
191
+ ---
192
+ """
193
+
194
+
195
+ def check_source_candidates(g: Graph, dir_path: Path) -> list[dict]:
196
+ if not anthropic_configured():
197
+ print("check --source-candidates: no Anthropic provider env configured; skipping.")
198
+ return []
199
+ if not dir_path.is_dir():
200
+ print(f"check --source-candidates: not a directory: {dir_path}")
201
+ return []
202
+ existing_source_paths = {n.body for n in g.nodes.values() if n.type == "source"}
203
+ records = []
204
+ for p in sorted(dir_path.glob("*.md")) + sorted(dir_path.glob("*.txt")):
205
+ if str(p) in existing_source_paths:
206
+ continue
207
+ content = p.read_text(encoding="utf-8")[:8000]
208
+ prompt = SOURCE_CANDIDATE_PROMPT.format(fname=p.name, content=content)
209
+ result = _call_claude_json(prompt)
210
+ records.append({
211
+ "kind": "source_candidate",
212
+ "path": str(p),
213
+ "claude_result": result,
214
+ })
215
+ append_many(records)
216
+ return records
217
+
218
+
219
+ # ------------ CLI dispatch ----------------------------------------------------
220
+
221
+ def run_check(args: list[str]) -> int:
222
+ g = Graph.load()
223
+ flags = list(args)
224
+
225
+ # parse value-bearing flags
226
+ days = 90
227
+ if "--days" in flags:
228
+ i = flags.index("--days")
229
+ days = int(flags[i + 1]); del flags[i:i + 2]
230
+ pairs_n = 0
231
+ if "--pairs" in flags:
232
+ i = flags.index("--pairs")
233
+ try:
234
+ pairs_n = int(flags[i + 1]); del flags[i:i + 2]
235
+ except (ValueError, IndexError):
236
+ pairs_n = 10; del flags[i:i + 1]
237
+ source_dir = None
238
+ if "--source-candidates" in flags:
239
+ i = flags.index("--source-candidates")
240
+ if i + 1 < len(flags):
241
+ source_dir = Path(flags[i + 1]).expanduser().resolve()
242
+ del flags[i:i + 2]
243
+ else:
244
+ print("check: --source-candidates needs a directory")
245
+ return 1
246
+
247
+ only = set(f for f in flags if f.startswith("--"))
248
+ run_all = not only and not pairs_n and not source_dir
249
+
250
+ rc = 0
251
+ if run_all or "--provenance" in only:
252
+ v = check_provenance(g)
253
+ print(f"provenance violations: {len(v)}")
254
+ for r in v[:10]:
255
+ print(f" - {r['subkind']}: {r.get('node_id') or r.get('src')+'->'+r.get('dst')}")
256
+ if v:
257
+ rc = 2 # non-zero exit on hard-invariant break
258
+ if run_all or "--stale-edges" in only:
259
+ s = check_stale_edges(g, days=days)
260
+ print(f"stale edges (>{days}d): {len(s)}")
261
+ for r in s[:10]:
262
+ print(f" - {r['src']} --{r['type']}--> {r['dst']} ({r['age_days']}d)")
263
+ if pairs_n:
264
+ p = check_pairs(g, k=pairs_n)
265
+ print(f"relational probes: {len(p)} run")
266
+ if source_dir:
267
+ sc = check_source_candidates(g, source_dir)
268
+ print(f"source candidates: {len(sc)} evaluated")
269
+ return rc
270
+
271
+
272
+ if __name__ == "__main__":
273
+ sys.exit(run_check(sys.argv[1:]))