knowledge-worker 0.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- knowledge_worker-0.6.0.dist-info/METADATA +365 -0
- knowledge_worker-0.6.0.dist-info/RECORD +27 -0
- knowledge_worker-0.6.0.dist-info/WHEEL +5 -0
- knowledge_worker-0.6.0.dist-info/entry_points.txt +3 -0
- knowledge_worker-0.6.0.dist-info/licenses/LICENSE +21 -0
- knowledge_worker-0.6.0.dist-info/top_level.txt +2 -0
- mygraph/__init__.py +23 -0
- mygraph/anthropic_client.py +199 -0
- mygraph/audit.py +137 -0
- mygraph/check.py +273 -0
- mygraph/discover.py +654 -0
- mygraph/eval_log.py +36 -0
- mygraph/export_context.py +124 -0
- mygraph/extractor.py +243 -0
- mygraph/extractor_openai.py +165 -0
- mygraph/ingest.py +170 -0
- mygraph/memory_audit.py +1094 -0
- mygraph/merge.py +133 -0
- mygraph/mygraph.py +773 -0
- mygraph/owl_io.py +202 -0
- mygraph/review.py +151 -0
- mygraph/validator.py +149 -0
- mygraph/viz.py +409 -0
- ollama_proxy/eval_compare.py +185 -0
- ollama_proxy/extractor_adapter.py +168 -0
- ollama_proxy/proxy.py +143 -0
- ollama_proxy/server.py +194 -0
mygraph/ingest.py
ADDED
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
"""
|
|
2
|
+
ingest.py — orchestrates the 5-stage v1 pipeline.
|
|
3
|
+
|
|
4
|
+
mykg ingest <path/to/file.md>
|
|
5
|
+
[--non-interactive]
|
|
6
|
+
[--auto-accept-high]
|
|
7
|
+
[--auto-accept-all]
|
|
8
|
+
[--candidates-file <path>] # skip Stage 1 (extractor)
|
|
9
|
+
[--keep-candidates] # don't delete intermediate JSON
|
|
10
|
+
[--backend claude|openai|ollama] # extractor LLM (default claude)
|
|
11
|
+
[--model <name>] # v1.5: override model tag
|
|
12
|
+
|
|
13
|
+
Stage 1 (extractor) → candidates.json
|
|
14
|
+
Stage 2 (validator) → manifest + validated.json (in-memory)
|
|
15
|
+
Stage 3 (review CLI) → approved subset
|
|
16
|
+
Stage 4 (merge) → graph mutated, saved
|
|
17
|
+
Stage 5 (eval log) → review verdicts appended
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
from __future__ import annotations
|
|
21
|
+
|
|
22
|
+
import json
|
|
23
|
+
import sys
|
|
24
|
+
from pathlib import Path
|
|
25
|
+
|
|
26
|
+
try:
|
|
27
|
+
from .validator import validate
|
|
28
|
+
from .review import review
|
|
29
|
+
from .merge import merge
|
|
30
|
+
from .eval_log import append as eval_append
|
|
31
|
+
except ImportError: # direct script execution
|
|
32
|
+
from validator import validate
|
|
33
|
+
from review import review
|
|
34
|
+
from merge import merge
|
|
35
|
+
from eval_log import append as eval_append
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def _load_extractor(backend: str):
|
|
39
|
+
"""Return the extract() callable for the chosen backend.
|
|
40
|
+
backend ∈ {"claude", "openai", "ollama"}. Imported lazily so a missing dep
|
|
41
|
+
on one side doesn't break the other."""
|
|
42
|
+
if backend == "ollama":
|
|
43
|
+
# ollama_proxy lives as a sibling to mygraph/
|
|
44
|
+
import sys as _sys
|
|
45
|
+
from pathlib import Path as _Path
|
|
46
|
+
op = _Path(__file__).resolve().parent.parent / "ollama_proxy"
|
|
47
|
+
if str(op) not in _sys.path:
|
|
48
|
+
_sys.path.insert(0, str(op))
|
|
49
|
+
from extractor_adapter import extract as _extract # type: ignore
|
|
50
|
+
return _extract
|
|
51
|
+
if backend == "claude":
|
|
52
|
+
try:
|
|
53
|
+
from .extractor import extract as _extract
|
|
54
|
+
except ImportError:
|
|
55
|
+
from extractor import extract as _extract
|
|
56
|
+
return _extract
|
|
57
|
+
if backend == "openai":
|
|
58
|
+
try:
|
|
59
|
+
from .extractor_openai import extract as _extract
|
|
60
|
+
except ImportError:
|
|
61
|
+
from extractor_openai import extract as _extract
|
|
62
|
+
return _extract
|
|
63
|
+
raise ValueError(f"ingest: unknown --backend {backend!r} (valid: claude, openai, ollama)")
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def run_ingest(args: list[str]) -> int:
|
|
67
|
+
if not args:
|
|
68
|
+
print("Usage: mykg ingest <file.md> [flags]")
|
|
69
|
+
return 1
|
|
70
|
+
md_path = Path(args[0]).expanduser().resolve()
|
|
71
|
+
if not md_path.exists():
|
|
72
|
+
print(f"ingest: file not found: {md_path}")
|
|
73
|
+
return 1
|
|
74
|
+
|
|
75
|
+
flags = set(args[1:]) # simple set membership; value-bearing flags handled below
|
|
76
|
+
candidates_file = None
|
|
77
|
+
if "--candidates-file" in args:
|
|
78
|
+
i = args.index("--candidates-file")
|
|
79
|
+
if i + 1 >= len(args):
|
|
80
|
+
print("ingest: --candidates-file needs a path")
|
|
81
|
+
return 1
|
|
82
|
+
candidates_file = Path(args[i + 1]).expanduser().resolve()
|
|
83
|
+
backend = "claude"
|
|
84
|
+
if "--backend" in args:
|
|
85
|
+
i = args.index("--backend")
|
|
86
|
+
if i + 1 >= len(args):
|
|
87
|
+
print("ingest: --backend needs a value (claude|openai|ollama)")
|
|
88
|
+
return 1
|
|
89
|
+
backend = args[i + 1]
|
|
90
|
+
model = None
|
|
91
|
+
if "--model" in args:
|
|
92
|
+
i = args.index("--model")
|
|
93
|
+
if i + 1 >= len(args):
|
|
94
|
+
print("ingest: --model needs a value")
|
|
95
|
+
return 1
|
|
96
|
+
model = args[i + 1]
|
|
97
|
+
non_interactive = "--non-interactive" in flags
|
|
98
|
+
auto_high = "--auto-accept-high" in flags
|
|
99
|
+
auto_all = "--auto-accept-all" in flags
|
|
100
|
+
keep_candidates = "--keep-candidates" in flags
|
|
101
|
+
|
|
102
|
+
if non_interactive and not (auto_high or auto_all):
|
|
103
|
+
# Default headless behavior: be conservative — accept only `high`.
|
|
104
|
+
auto_high = True
|
|
105
|
+
|
|
106
|
+
# ---- Stage 1: Extract --------------------------------------------------
|
|
107
|
+
if candidates_file:
|
|
108
|
+
print(f"[1/5] using candidates from: {candidates_file}")
|
|
109
|
+
payload = json.loads(candidates_file.read_text(encoding="utf-8"))
|
|
110
|
+
candidates_path = candidates_file
|
|
111
|
+
else:
|
|
112
|
+
extract = _load_extractor(backend)
|
|
113
|
+
print(f"[1/5] extract -> backend={backend} on {md_path.name} ...")
|
|
114
|
+
candidates_path = md_path.parent / f"{md_path.stem}.candidates.json"
|
|
115
|
+
payload = extract(md_path, candidates_path, model=model) if model else extract(md_path, candidates_path)
|
|
116
|
+
print(f" wrote {candidates_path}")
|
|
117
|
+
|
|
118
|
+
# ---- Stage 2: Validate -------------------------------------------------
|
|
119
|
+
print("[2/5] validate ...")
|
|
120
|
+
src_text = md_path.read_text(encoding="utf-8")
|
|
121
|
+
validated, manifest = validate(payload, src_text)
|
|
122
|
+
print(manifest.summary())
|
|
123
|
+
|
|
124
|
+
# log the manifest
|
|
125
|
+
eval_append({"kind": "extract_manifest", "source_id": payload["source"]["id"],
|
|
126
|
+
"source_path": str(md_path),
|
|
127
|
+
"n_accepted_nodes": len(manifest.accepted_nodes),
|
|
128
|
+
"n_accepted_edges": len(manifest.accepted_edges),
|
|
129
|
+
"n_demoted_nodes": len(manifest.demoted_nodes),
|
|
130
|
+
"n_rejected_nodes": len(manifest.rejected_nodes),
|
|
131
|
+
"n_rejected_edges": len(manifest.rejected_edges),
|
|
132
|
+
"demotions": [{"id": n["id"], "reason": r} for n, r in manifest.demoted_nodes],
|
|
133
|
+
"rejections_n": [{"id": n.get("id", "?"), "reason": r} for n, r in manifest.rejected_nodes],
|
|
134
|
+
"rejections_e": [{"src": e.get("src", "?"), "dst": e.get("dst", "?"),
|
|
135
|
+
"type": e.get("type", "?"), "reason": r}
|
|
136
|
+
for e, r in manifest.rejected_edges]})
|
|
137
|
+
|
|
138
|
+
# ---- Stage 3: Review --------------------------------------------------
|
|
139
|
+
print("[3/5] review ...")
|
|
140
|
+
approved = review(validated, src_text,
|
|
141
|
+
auto_accept_high=auto_high, auto_accept_all=auto_all)
|
|
142
|
+
print(f" approved: {len(approved['nodes'])} nodes, {len(approved['edges'])} edges")
|
|
143
|
+
|
|
144
|
+
# ---- Stage 4: Merge ----------------------------------------------------
|
|
145
|
+
print("[4/5] merge ...")
|
|
146
|
+
n_added, e_added = merge(approved, interactive=not non_interactive)
|
|
147
|
+
print(f" +{n_added} nodes, +{e_added} edges")
|
|
148
|
+
|
|
149
|
+
# ---- Stage 5: Eval log -------------------------------------------------
|
|
150
|
+
eval_append({"kind": "ingest_complete", "source_id": approved["source"]["id"],
|
|
151
|
+
"source_path": str(md_path), "nodes_added": n_added,
|
|
152
|
+
"edges_added": e_added,
|
|
153
|
+
"candidates_file": str(candidates_path) if candidates_path else None,
|
|
154
|
+
"backend": backend, "model": model,
|
|
155
|
+
"non_interactive": non_interactive,
|
|
156
|
+
"auto_accept_high": auto_high, "auto_accept_all": auto_all})
|
|
157
|
+
print("[5/5] eval log updated.")
|
|
158
|
+
|
|
159
|
+
if not keep_candidates and candidates_path and candidates_file is None:
|
|
160
|
+
# only auto-clean if WE wrote it
|
|
161
|
+
try:
|
|
162
|
+
candidates_path.unlink()
|
|
163
|
+
except OSError:
|
|
164
|
+
pass
|
|
165
|
+
|
|
166
|
+
return 0
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
if __name__ == "__main__":
|
|
170
|
+
sys.exit(run_ingest(sys.argv[1:]))
|