knowledge-worker 0.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- knowledge_worker-0.6.0.dist-info/METADATA +365 -0
- knowledge_worker-0.6.0.dist-info/RECORD +27 -0
- knowledge_worker-0.6.0.dist-info/WHEEL +5 -0
- knowledge_worker-0.6.0.dist-info/entry_points.txt +3 -0
- knowledge_worker-0.6.0.dist-info/licenses/LICENSE +21 -0
- knowledge_worker-0.6.0.dist-info/top_level.txt +2 -0
- mygraph/__init__.py +23 -0
- mygraph/anthropic_client.py +199 -0
- mygraph/audit.py +137 -0
- mygraph/check.py +273 -0
- mygraph/discover.py +654 -0
- mygraph/eval_log.py +36 -0
- mygraph/export_context.py +124 -0
- mygraph/extractor.py +243 -0
- mygraph/extractor_openai.py +165 -0
- mygraph/ingest.py +170 -0
- mygraph/memory_audit.py +1094 -0
- mygraph/merge.py +133 -0
- mygraph/mygraph.py +773 -0
- mygraph/owl_io.py +202 -0
- mygraph/review.py +151 -0
- mygraph/validator.py +149 -0
- mygraph/viz.py +409 -0
- ollama_proxy/eval_compare.py +185 -0
- ollama_proxy/extractor_adapter.py +168 -0
- ollama_proxy/proxy.py +143 -0
- ollama_proxy/server.py +194 -0
mygraph/merge.py
ADDED
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
"""
|
|
2
|
+
merge.py — Stage 4 of the v1 ingest pipeline.
|
|
3
|
+
|
|
4
|
+
Idempotent merge of approved candidates into the graph. Slug-based IDs already
|
|
5
|
+
make `add_node` / `add_edge` idempotent (per v0). We add:
|
|
6
|
+
|
|
7
|
+
- Source node: always merges cleanly (Stage 1 emits it).
|
|
8
|
+
- MENTIONED_IN edge from each new concept node to the Source (auto-injected if
|
|
9
|
+
the extractor didn't include it).
|
|
10
|
+
- Body-diff: if a candidate ID matches an existing node but the body differs,
|
|
11
|
+
we surface the diff and prompt keep_old / replace / append. Logged as a
|
|
12
|
+
review eval_record.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
import difflib
|
|
18
|
+
from datetime import datetime, timezone
|
|
19
|
+
from pathlib import Path
|
|
20
|
+
|
|
21
|
+
from mygraph import Graph, Node, Edge
|
|
22
|
+
try:
|
|
23
|
+
from .eval_log import append as eval_append
|
|
24
|
+
except ImportError: # direct script execution
|
|
25
|
+
from eval_log import append as eval_append
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _now() -> str:
|
|
29
|
+
return datetime.now(timezone.utc).isoformat()
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _diff(old: str, new: str) -> str:
|
|
33
|
+
return "\n".join(difflib.unified_diff(
|
|
34
|
+
old.splitlines(), new.splitlines(),
|
|
35
|
+
fromfile="existing", tofile="candidate", lineterm=""))
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def _resolve_body_conflict(existing: Node, cand: dict, interactive: bool) -> str:
|
|
39
|
+
if not interactive:
|
|
40
|
+
# default to keep_old in non-interactive mode (safest)
|
|
41
|
+
eval_append({"kind": "merge_body_conflict", "candidate_id": cand["id"],
|
|
42
|
+
"resolution": "keep_old_auto"})
|
|
43
|
+
return existing.body
|
|
44
|
+
print(f"\n[merge] body conflict for {cand['id']}:")
|
|
45
|
+
print(_diff(existing.body, cand.get("body", "")))
|
|
46
|
+
choice = ""
|
|
47
|
+
while choice not in {"k", "r", "a"}:
|
|
48
|
+
try:
|
|
49
|
+
choice = input(" [k]eep old / [r]eplace / [a]ppend > ").strip().lower()
|
|
50
|
+
except EOFError:
|
|
51
|
+
choice = "k"
|
|
52
|
+
eval_append({"kind": "merge_body_conflict", "candidate_id": cand["id"],
|
|
53
|
+
"resolution": {"k": "keep_old", "r": "replace", "a": "append"}[choice]})
|
|
54
|
+
if choice == "k":
|
|
55
|
+
return existing.body
|
|
56
|
+
if choice == "r":
|
|
57
|
+
return cand.get("body", "")
|
|
58
|
+
return (existing.body + "\n\n--- (appended) ---\n" + cand.get("body", "")).strip()
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def merge(approved: dict, interactive: bool = True) -> tuple[int, int]:
|
|
62
|
+
"""
|
|
63
|
+
Merge approved nodes/edges into the graph. Returns (nodes_added, edges_added).
|
|
64
|
+
"""
|
|
65
|
+
g = Graph.load()
|
|
66
|
+
src = approved["source"]
|
|
67
|
+
src_id = src["id"]
|
|
68
|
+
|
|
69
|
+
# 1. Source node always merges
|
|
70
|
+
src_existed = src_id in g.nodes
|
|
71
|
+
g.add_node(Node(id=src_id, type="source", label=src["label"],
|
|
72
|
+
body=src.get("body", ""), confidence="high"))
|
|
73
|
+
nodes_added = 0 if src_existed else 1
|
|
74
|
+
|
|
75
|
+
# 2. Concept nodes
|
|
76
|
+
for cand in approved.get("nodes", []):
|
|
77
|
+
nid = cand["id"]
|
|
78
|
+
if nid in g.nodes:
|
|
79
|
+
existing = g.nodes[nid]
|
|
80
|
+
new_body = cand.get("body", "")
|
|
81
|
+
if new_body and new_body.strip() != (existing.body or "").strip():
|
|
82
|
+
resolved = _resolve_body_conflict(existing, cand, interactive)
|
|
83
|
+
existing.body = resolved
|
|
84
|
+
existing.label = cand.get("label", existing.label)
|
|
85
|
+
existing.confidence = cand.get("confidence", existing.confidence)
|
|
86
|
+
else:
|
|
87
|
+
g.add_node(Node(id=nid, type=cand["type"], label=cand["label"],
|
|
88
|
+
body=cand.get("body", ""),
|
|
89
|
+
confidence=cand.get("confidence", "medium")))
|
|
90
|
+
nodes_added += 1
|
|
91
|
+
|
|
92
|
+
# 3. Edges (extractor-emitted)
|
|
93
|
+
edges_added = 0
|
|
94
|
+
for cand in approved.get("edges", []):
|
|
95
|
+
try:
|
|
96
|
+
e = Edge(src=cand["src"], dst=cand["dst"], type=cand["type"],
|
|
97
|
+
source_id=src_id, excerpt=cand.get("excerpt", ""),
|
|
98
|
+
confidence=cand.get("confidence", "medium"))
|
|
99
|
+
except (AssertionError, ValueError) as exc:
|
|
100
|
+
eval_append({"kind": "merge_edge_skipped", "edge": cand, "reason": str(exc)})
|
|
101
|
+
continue
|
|
102
|
+
before = len(g.edges)
|
|
103
|
+
g.add_edge(e)
|
|
104
|
+
if len(g.edges) > before:
|
|
105
|
+
edges_added += 1
|
|
106
|
+
else:
|
|
107
|
+
# de-duped: refresh last_seen on the existing edge
|
|
108
|
+
for existing in g.edges:
|
|
109
|
+
if (existing.src, existing.dst, existing.type, existing.source_id) == \
|
|
110
|
+
(e.src, e.dst, e.type, e.source_id):
|
|
111
|
+
existing.last_seen = _now()
|
|
112
|
+
break
|
|
113
|
+
|
|
114
|
+
# 4. Auto-inject MENTIONED_IN for any approved node missing one to this source
|
|
115
|
+
new_concept_ids = {n["id"] for n in approved.get("nodes", [])}
|
|
116
|
+
has_mentioned = {(e.src, e.dst, e.type) for e in g.edges if e.type == "MENTIONED_IN"}
|
|
117
|
+
for nid in new_concept_ids:
|
|
118
|
+
if nid == src_id:
|
|
119
|
+
continue
|
|
120
|
+
if (nid, src_id, "MENTIONED_IN") in has_mentioned:
|
|
121
|
+
continue
|
|
122
|
+
# find the candidate to grab its excerpt
|
|
123
|
+
cand = next((n for n in approved.get("nodes", []) if n["id"] == nid), {})
|
|
124
|
+
e = Edge(src=nid, dst=src_id, type="MENTIONED_IN",
|
|
125
|
+
source_id=src_id, excerpt=cand.get("excerpt", ""),
|
|
126
|
+
confidence=cand.get("confidence", "medium"))
|
|
127
|
+
before = len(g.edges)
|
|
128
|
+
g.add_edge(e)
|
|
129
|
+
if len(g.edges) > before:
|
|
130
|
+
edges_added += 1
|
|
131
|
+
|
|
132
|
+
g.save()
|
|
133
|
+
return nodes_added, edges_added
|