knowledge-worker 0.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- knowledge_worker-0.6.0.dist-info/METADATA +365 -0
- knowledge_worker-0.6.0.dist-info/RECORD +27 -0
- knowledge_worker-0.6.0.dist-info/WHEEL +5 -0
- knowledge_worker-0.6.0.dist-info/entry_points.txt +3 -0
- knowledge_worker-0.6.0.dist-info/licenses/LICENSE +21 -0
- knowledge_worker-0.6.0.dist-info/top_level.txt +2 -0
- mygraph/__init__.py +23 -0
- mygraph/anthropic_client.py +199 -0
- mygraph/audit.py +137 -0
- mygraph/check.py +273 -0
- mygraph/discover.py +654 -0
- mygraph/eval_log.py +36 -0
- mygraph/export_context.py +124 -0
- mygraph/extractor.py +243 -0
- mygraph/extractor_openai.py +165 -0
- mygraph/ingest.py +170 -0
- mygraph/memory_audit.py +1094 -0
- mygraph/merge.py +133 -0
- mygraph/mygraph.py +773 -0
- mygraph/owl_io.py +202 -0
- mygraph/review.py +151 -0
- mygraph/validator.py +149 -0
- mygraph/viz.py +409 -0
- ollama_proxy/eval_compare.py +185 -0
- ollama_proxy/extractor_adapter.py +168 -0
- ollama_proxy/proxy.py +143 -0
- ollama_proxy/server.py +194 -0
mygraph/audit.py
ADDED
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
"""
|
|
2
|
+
audit.py — structural verifier for external-query audits (e.g. copilot_response_audit.md).
|
|
3
|
+
|
|
4
|
+
No LLM. Just structural checks against the graph:
|
|
5
|
+
- confidence labels surfaced for non-high nodes?
|
|
6
|
+
- provenance (source:*) cited?
|
|
7
|
+
- completeness: did the response list all expected nodes for a typed question?
|
|
8
|
+
|
|
9
|
+
Outputs JSONL eval_record entries appended to eval_record.jsonl.
|
|
10
|
+
|
|
11
|
+
Usage:
|
|
12
|
+
python audit.py copilot_response_audit.md
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
import json
|
|
18
|
+
import re
|
|
19
|
+
import sys
|
|
20
|
+
from datetime import datetime, timezone
|
|
21
|
+
from pathlib import Path
|
|
22
|
+
|
|
23
|
+
from mygraph import Graph
|
|
24
|
+
|
|
25
|
+
HERE = Path(__file__).parent
|
|
26
|
+
EVAL_LOG = HERE / "eval_record.jsonl"
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def parse_audit(md_text: str) -> list[dict]:
|
|
30
|
+
blocks = re.split(r'\n## \d+\.', md_text)[1:]
|
|
31
|
+
out = []
|
|
32
|
+
for b in blocks:
|
|
33
|
+
q = re.search(r'\*\*User:\*\*\s*(.+?)(?=\*\*Copilot:\*\*)', b, re.DOTALL)
|
|
34
|
+
r = re.search(r'\*\*Copilot:\*\*\s*(.+?)(?=\*\*Evaluation:\*\*|\Z)', b, re.DOTALL)
|
|
35
|
+
e = re.search(r'\*\*Evaluation:\*\*\s*(.+?)\Z', b, re.DOTALL)
|
|
36
|
+
out.append({
|
|
37
|
+
"question": q.group(1).strip() if q else "",
|
|
38
|
+
"response": r.group(1).strip() if r else "",
|
|
39
|
+
"self_eval": e.group(1).strip() if e else "",
|
|
40
|
+
})
|
|
41
|
+
return out
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def referenced_nodes(text: str, g: Graph) -> list[str]:
|
|
45
|
+
ids = set(re.findall(r'\b([a-z]+:[a-z0-9-]+)\b', text))
|
|
46
|
+
low = text.lower()
|
|
47
|
+
for nid, n in g.nodes.items():
|
|
48
|
+
if len(n.label) > 4 and n.label.lower() in low:
|
|
49
|
+
ids.add(nid)
|
|
50
|
+
return sorted(i for i in ids if i in g.nodes)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def check_confidence(refs, g, resp):
|
|
54
|
+
issues = []
|
|
55
|
+
low = resp.lower()
|
|
56
|
+
for r in refs:
|
|
57
|
+
n = g.nodes[r]
|
|
58
|
+
if n.confidence != "high" and n.confidence not in low and "confidence" not in low:
|
|
59
|
+
issues.append(f"unflagged_{n.confidence}_confidence:{r}")
|
|
60
|
+
return issues
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def check_provenance(refs, resp):
|
|
64
|
+
if not refs:
|
|
65
|
+
return []
|
|
66
|
+
return [] if "source:" in resp.lower() else ["no_source_cited"]
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def check_completeness(question, refs, g, resp):
|
|
70
|
+
q = question.lower()
|
|
71
|
+
issues = []
|
|
72
|
+
typed_checks = [
|
|
73
|
+
("goal", "goal"),
|
|
74
|
+
("decid", "decision"),
|
|
75
|
+
("idea", "idea"),
|
|
76
|
+
("question", "question"),
|
|
77
|
+
]
|
|
78
|
+
for keyword, node_type in typed_checks:
|
|
79
|
+
if keyword in q and "?" in q:
|
|
80
|
+
all_of = [nid for nid, n in g.nodes.items() if n.type == node_type]
|
|
81
|
+
if not all_of:
|
|
82
|
+
continue
|
|
83
|
+
listed = [r for r in refs if r in all_of]
|
|
84
|
+
if len(listed) < len(all_of):
|
|
85
|
+
issues.append(f"incomplete_{node_type}_listing:{len(listed)}/{len(all_of)}")
|
|
86
|
+
return issues
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def audit(audit_path: Path) -> list[dict]:
|
|
90
|
+
g = Graph.load()
|
|
91
|
+
md = audit_path.read_text(encoding="utf-8")
|
|
92
|
+
blocks = parse_audit(md)
|
|
93
|
+
records = []
|
|
94
|
+
ts = datetime.now(timezone.utc).isoformat()
|
|
95
|
+
for i, b in enumerate(blocks, 1):
|
|
96
|
+
refs = referenced_nodes(b["response"], g)
|
|
97
|
+
misses = (
|
|
98
|
+
check_confidence(refs, g, b["response"])
|
|
99
|
+
+ check_provenance(refs, b["response"])
|
|
100
|
+
+ check_completeness(b["question"], refs, g, b["response"])
|
|
101
|
+
)
|
|
102
|
+
records.append({
|
|
103
|
+
"ts": ts,
|
|
104
|
+
"kind": "external_query",
|
|
105
|
+
"audit_source": audit_path.name,
|
|
106
|
+
"q_index": i,
|
|
107
|
+
"question": b["question"][:200],
|
|
108
|
+
"response_excerpt": b["response"][:300],
|
|
109
|
+
"self_eval": b["self_eval"][:200],
|
|
110
|
+
"referenced_nodes": refs,
|
|
111
|
+
"claude_verdict": "ok" if not misses else "miss",
|
|
112
|
+
"misses": misses,
|
|
113
|
+
})
|
|
114
|
+
with EVAL_LOG.open("a", encoding="utf-8") as f:
|
|
115
|
+
for r in records:
|
|
116
|
+
f.write(json.dumps(r) + "\n")
|
|
117
|
+
return records
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def main(argv):
|
|
121
|
+
path = Path(argv[1]) if len(argv) > 1 else HERE / "copilot_response_audit.md"
|
|
122
|
+
if not path.exists():
|
|
123
|
+
print(f"Not found: {path}")
|
|
124
|
+
return 1
|
|
125
|
+
records = audit(path)
|
|
126
|
+
misses = [r for r in records if r["misses"]]
|
|
127
|
+
print(f"Wrote {len(records)} eval_records -> {EVAL_LOG}")
|
|
128
|
+
print(f" {len(misses)}/{len(records)} responses flagged with misses\n")
|
|
129
|
+
for r in misses:
|
|
130
|
+
print(f" Q{r['q_index']}: {r['question'][:60]}")
|
|
131
|
+
for m in r["misses"]:
|
|
132
|
+
print(f" - {m}")
|
|
133
|
+
return 0
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
if __name__ == "__main__":
|
|
137
|
+
sys.exit(main(sys.argv))
|
mygraph/check.py
ADDED
|
@@ -0,0 +1,273 @@
|
|
|
1
|
+
"""
|
|
2
|
+
check.py — v1 M2 offline health checks.
|
|
3
|
+
|
|
4
|
+
Subcommands (all write JSONL records to eval_record.jsonl):
|
|
5
|
+
|
|
6
|
+
--provenance hard invariant. Any node (except `source`) without a
|
|
7
|
+
MENTIONED_IN edge → kind: provenance_violation. Any
|
|
8
|
+
edge without source_id → same.
|
|
9
|
+
--stale-edges [--days N] edges with last_seen older than N days (default 90)
|
|
10
|
+
→ kind: stale_candidate.
|
|
11
|
+
--pairs N pick N random non-adjacent node pairs, ask the LLM
|
|
12
|
+
"is X related to Y? if yes, by what predicate?".
|
|
13
|
+
Logs kind: relational_probe.
|
|
14
|
+
--source-candidates DIR read recent .md/.txt files in DIR; ask the LLM if
|
|
15
|
+
any look like Sources we should ingest. Logs
|
|
16
|
+
kind: source_candidate. Never auto-ingests.
|
|
17
|
+
|
|
18
|
+
Default (no subcommand): runs --provenance and --stale-edges. LLM-bound checks
|
|
19
|
+
use the configured Anthropic provider; they're skipped if no supported provider
|
|
20
|
+
env is present.
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
from __future__ import annotations
|
|
24
|
+
|
|
25
|
+
import random
|
|
26
|
+
import sys
|
|
27
|
+
from datetime import datetime, timedelta, timezone
|
|
28
|
+
from pathlib import Path
|
|
29
|
+
|
|
30
|
+
from mygraph import Graph
|
|
31
|
+
try:
|
|
32
|
+
from .anthropic_client import anthropic_configured, get_anthropic_client
|
|
33
|
+
from .eval_log import append as eval_append, append_many
|
|
34
|
+
except ImportError: # direct script execution
|
|
35
|
+
from anthropic_client import anthropic_configured, get_anthropic_client
|
|
36
|
+
from eval_log import append as eval_append, append_many
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
# ------------ provenance ------------------------------------------------------
|
|
40
|
+
|
|
41
|
+
def check_provenance(g: Graph) -> list[dict]:
|
|
42
|
+
"""Return list of violation records (also appended to eval_record.jsonl)."""
|
|
43
|
+
violations = []
|
|
44
|
+
mentioned_node_ids = {e.src for e in g.edges if e.type == "MENTIONED_IN"}
|
|
45
|
+
mentioned_node_ids |= {e.dst for e in g.edges if e.type == "MENTIONED_IN"}
|
|
46
|
+
for nid, n in g.nodes.items():
|
|
47
|
+
if n.type == "source":
|
|
48
|
+
continue
|
|
49
|
+
if nid not in mentioned_node_ids:
|
|
50
|
+
violations.append({
|
|
51
|
+
"kind": "provenance_violation",
|
|
52
|
+
"subkind": "node_without_source",
|
|
53
|
+
"node_id": nid,
|
|
54
|
+
"node_type": n.type,
|
|
55
|
+
"label": n.label,
|
|
56
|
+
})
|
|
57
|
+
for i, e in enumerate(g.edges):
|
|
58
|
+
if not e.source_id:
|
|
59
|
+
violations.append({
|
|
60
|
+
"kind": "provenance_violation",
|
|
61
|
+
"subkind": "edge_without_source_id",
|
|
62
|
+
"edge_index": i,
|
|
63
|
+
"src": e.src, "dst": e.dst, "type": e.type,
|
|
64
|
+
})
|
|
65
|
+
append_many(violations)
|
|
66
|
+
return violations
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
# ------------ stale edges -----------------------------------------------------
|
|
70
|
+
|
|
71
|
+
def check_stale_edges(g: Graph, days: int = 90) -> list[dict]:
|
|
72
|
+
cutoff = datetime.now(timezone.utc) - timedelta(days=days)
|
|
73
|
+
stale = []
|
|
74
|
+
for i, e in enumerate(g.edges):
|
|
75
|
+
try:
|
|
76
|
+
ls = datetime.fromisoformat(e.last_seen)
|
|
77
|
+
except (ValueError, TypeError):
|
|
78
|
+
continue
|
|
79
|
+
if ls < cutoff:
|
|
80
|
+
stale.append({
|
|
81
|
+
"kind": "stale_candidate",
|
|
82
|
+
"edge_index": i,
|
|
83
|
+
"src": e.src, "dst": e.dst, "type": e.type,
|
|
84
|
+
"last_seen": e.last_seen,
|
|
85
|
+
"age_days": (datetime.now(timezone.utc) - ls).days,
|
|
86
|
+
})
|
|
87
|
+
append_many(stale)
|
|
88
|
+
return stale
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
# ------------ relational probe (LLM) -----------------------------------------
|
|
92
|
+
|
|
93
|
+
PAIR_PROMPT = """\
|
|
94
|
+
You are evaluating a personal knowledge graph. Two nodes are below. Decide:
|
|
95
|
+
(a) Are these conceptually related?
|
|
96
|
+
(b) If yes, what predicate name from this set best fits?
|
|
97
|
+
{edge_types}
|
|
98
|
+
|
|
99
|
+
Respond as a single JSON object with keys:
|
|
100
|
+
related : true | false
|
|
101
|
+
predicate : one of the predicates above, or null if related=false
|
|
102
|
+
rationale : one short sentence
|
|
103
|
+
confidence: high | medium | low
|
|
104
|
+
|
|
105
|
+
NODE A: id={a_id} type={a_type} label={a_label}
|
|
106
|
+
body: {a_body}
|
|
107
|
+
NODE B: id={b_id} type={b_type} label={b_label}
|
|
108
|
+
body: {b_body}
|
|
109
|
+
"""
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def _call_claude_json(prompt: str) -> dict | None:
|
|
113
|
+
"""Lightweight Claude call returning parsed JSON; None if no LLM config."""
|
|
114
|
+
if not anthropic_configured():
|
|
115
|
+
return None
|
|
116
|
+
try:
|
|
117
|
+
client, config = get_anthropic_client()
|
|
118
|
+
except RuntimeError as e:
|
|
119
|
+
print(f"check: {e}; skipping LLM-bound checks.")
|
|
120
|
+
return None
|
|
121
|
+
import json as _json
|
|
122
|
+
resp = client.messages.create(
|
|
123
|
+
model=config.model, max_tokens=400,
|
|
124
|
+
messages=[{"role": "user", "content": prompt + "\n\nReturn ONLY JSON."}],
|
|
125
|
+
)
|
|
126
|
+
text = "".join(getattr(b, "text", "") for b in resp.content)
|
|
127
|
+
text = text.strip()
|
|
128
|
+
# be forgiving: strip ```json fences
|
|
129
|
+
if text.startswith("```"):
|
|
130
|
+
text = text.strip("`")
|
|
131
|
+
text = text.split("\n", 1)[1] if "\n" in text else text
|
|
132
|
+
text = text.rsplit("```", 1)[0] if text.endswith("```") else text
|
|
133
|
+
try:
|
|
134
|
+
return _json.loads(text)
|
|
135
|
+
except _json.JSONDecodeError:
|
|
136
|
+
return {"_raw": text, "_parse_error": True}
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def check_pairs(g: Graph, k: int = 10) -> list[dict]:
|
|
140
|
+
from mygraph import EDGE_TYPES
|
|
141
|
+
if not anthropic_configured():
|
|
142
|
+
print("check --pairs: no Anthropic provider env configured; skipping.")
|
|
143
|
+
return []
|
|
144
|
+
adj = set()
|
|
145
|
+
for e in g.edges:
|
|
146
|
+
adj.add((e.src, e.dst))
|
|
147
|
+
adj.add((e.dst, e.src))
|
|
148
|
+
ids = [nid for nid, n in g.nodes.items() if n.type != "source"]
|
|
149
|
+
pairs = []
|
|
150
|
+
attempts = 0
|
|
151
|
+
while len(pairs) < k and attempts < k * 20:
|
|
152
|
+
attempts += 1
|
|
153
|
+
a, b = random.sample(ids, 2)
|
|
154
|
+
if (a, b) in adj:
|
|
155
|
+
continue
|
|
156
|
+
if (a, b) in pairs or (b, a) in pairs:
|
|
157
|
+
continue
|
|
158
|
+
pairs.append((a, b))
|
|
159
|
+
records = []
|
|
160
|
+
for a, b in pairs:
|
|
161
|
+
na, nb = g.nodes[a], g.nodes[b]
|
|
162
|
+
prompt = PAIR_PROMPT.format(
|
|
163
|
+
edge_types=", ".join(sorted(EDGE_TYPES)),
|
|
164
|
+
a_id=a, a_type=na.type, a_label=na.label, a_body=na.body[:200],
|
|
165
|
+
b_id=b, b_type=nb.type, b_label=nb.label, b_body=nb.body[:200],
|
|
166
|
+
)
|
|
167
|
+
result = _call_claude_json(prompt)
|
|
168
|
+
records.append({
|
|
169
|
+
"kind": "relational_probe",
|
|
170
|
+
"a_id": a, "b_id": b,
|
|
171
|
+
"claude_result": result,
|
|
172
|
+
})
|
|
173
|
+
append_many(records)
|
|
174
|
+
return records
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
# ------------ source candidacy (LLM) -----------------------------------------
|
|
178
|
+
|
|
179
|
+
SOURCE_CANDIDATE_PROMPT = """\
|
|
180
|
+
A markdown/text document is below. Decide whether it should be ingested as a
|
|
181
|
+
Source into the user's personal knowledge graph. Respond as JSON:
|
|
182
|
+
|
|
183
|
+
ingest_recommendation : "yes" | "no" | "maybe"
|
|
184
|
+
rationale : one short sentence
|
|
185
|
+
candidate_concepts : list of 1-5 plain-English concept labels you'd extract
|
|
186
|
+
if ingested (empty list if no)
|
|
187
|
+
|
|
188
|
+
DOCUMENT (filename: {fname}):
|
|
189
|
+
---
|
|
190
|
+
{content}
|
|
191
|
+
---
|
|
192
|
+
"""
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
def check_source_candidates(g: Graph, dir_path: Path) -> list[dict]:
|
|
196
|
+
if not anthropic_configured():
|
|
197
|
+
print("check --source-candidates: no Anthropic provider env configured; skipping.")
|
|
198
|
+
return []
|
|
199
|
+
if not dir_path.is_dir():
|
|
200
|
+
print(f"check --source-candidates: not a directory: {dir_path}")
|
|
201
|
+
return []
|
|
202
|
+
existing_source_paths = {n.body for n in g.nodes.values() if n.type == "source"}
|
|
203
|
+
records = []
|
|
204
|
+
for p in sorted(dir_path.glob("*.md")) + sorted(dir_path.glob("*.txt")):
|
|
205
|
+
if str(p) in existing_source_paths:
|
|
206
|
+
continue
|
|
207
|
+
content = p.read_text(encoding="utf-8")[:8000]
|
|
208
|
+
prompt = SOURCE_CANDIDATE_PROMPT.format(fname=p.name, content=content)
|
|
209
|
+
result = _call_claude_json(prompt)
|
|
210
|
+
records.append({
|
|
211
|
+
"kind": "source_candidate",
|
|
212
|
+
"path": str(p),
|
|
213
|
+
"claude_result": result,
|
|
214
|
+
})
|
|
215
|
+
append_many(records)
|
|
216
|
+
return records
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
# ------------ CLI dispatch ----------------------------------------------------
|
|
220
|
+
|
|
221
|
+
def run_check(args: list[str]) -> int:
|
|
222
|
+
g = Graph.load()
|
|
223
|
+
flags = list(args)
|
|
224
|
+
|
|
225
|
+
# parse value-bearing flags
|
|
226
|
+
days = 90
|
|
227
|
+
if "--days" in flags:
|
|
228
|
+
i = flags.index("--days")
|
|
229
|
+
days = int(flags[i + 1]); del flags[i:i + 2]
|
|
230
|
+
pairs_n = 0
|
|
231
|
+
if "--pairs" in flags:
|
|
232
|
+
i = flags.index("--pairs")
|
|
233
|
+
try:
|
|
234
|
+
pairs_n = int(flags[i + 1]); del flags[i:i + 2]
|
|
235
|
+
except (ValueError, IndexError):
|
|
236
|
+
pairs_n = 10; del flags[i:i + 1]
|
|
237
|
+
source_dir = None
|
|
238
|
+
if "--source-candidates" in flags:
|
|
239
|
+
i = flags.index("--source-candidates")
|
|
240
|
+
if i + 1 < len(flags):
|
|
241
|
+
source_dir = Path(flags[i + 1]).expanduser().resolve()
|
|
242
|
+
del flags[i:i + 2]
|
|
243
|
+
else:
|
|
244
|
+
print("check: --source-candidates needs a directory")
|
|
245
|
+
return 1
|
|
246
|
+
|
|
247
|
+
only = set(f for f in flags if f.startswith("--"))
|
|
248
|
+
run_all = not only and not pairs_n and not source_dir
|
|
249
|
+
|
|
250
|
+
rc = 0
|
|
251
|
+
if run_all or "--provenance" in only:
|
|
252
|
+
v = check_provenance(g)
|
|
253
|
+
print(f"provenance violations: {len(v)}")
|
|
254
|
+
for r in v[:10]:
|
|
255
|
+
print(f" - {r['subkind']}: {r.get('node_id') or r.get('src')+'->'+r.get('dst')}")
|
|
256
|
+
if v:
|
|
257
|
+
rc = 2 # non-zero exit on hard-invariant break
|
|
258
|
+
if run_all or "--stale-edges" in only:
|
|
259
|
+
s = check_stale_edges(g, days=days)
|
|
260
|
+
print(f"stale edges (>{days}d): {len(s)}")
|
|
261
|
+
for r in s[:10]:
|
|
262
|
+
print(f" - {r['src']} --{r['type']}--> {r['dst']} ({r['age_days']}d)")
|
|
263
|
+
if pairs_n:
|
|
264
|
+
p = check_pairs(g, k=pairs_n)
|
|
265
|
+
print(f"relational probes: {len(p)} run")
|
|
266
|
+
if source_dir:
|
|
267
|
+
sc = check_source_candidates(g, source_dir)
|
|
268
|
+
print(f"source candidates: {len(sc)} evaluated")
|
|
269
|
+
return rc
|
|
270
|
+
|
|
271
|
+
|
|
272
|
+
if __name__ == "__main__":
|
|
273
|
+
sys.exit(run_check(sys.argv[1:]))
|