pulse8-ai-cortex-knowledge-vault 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cortex/__init__.py +1 -0
- cortex/api/__init__.py +0 -0
- cortex/api/routes.py +328 -0
- cortex/compiler/__init__.py +0 -0
- cortex/compiler/compiler.py +151 -0
- cortex/compiler/extractor.py +41 -0
- cortex/compiler/prompts.py +35 -0
- cortex/config.py +36 -0
- cortex/graph/__init__.py +0 -0
- cortex/graph/builder.py +70 -0
- cortex/graph/context.py +127 -0
- cortex/graph/engine.py +170 -0
- cortex/log/__init__.py +0 -0
- cortex/log/audit.py +19 -0
- cortex/main.py +89 -0
- cortex/mcp/__init__.py +0 -0
- cortex/mcp/__main__.py +9 -0
- cortex/mcp/http_server.py +227 -0
- cortex/mcp/server.py +227 -0
- cortex/mcp/tools.py +309 -0
- cortex/search/__init__.py +0 -0
- cortex/search/qmd.py +76 -0
- cortex/search/qmd_cache.py +58 -0
- cortex/search/qmd_debounce.py +36 -0
- cortex/search/qmd_http.py +82 -0
- cortex/search/qmd_refresh.py +23 -0
- cortex/vault/__init__.py +0 -0
- cortex/vault/index.py +48 -0
- cortex/vault/models.py +85 -0
- cortex/vault/reader.py +170 -0
- cortex/vault/watcher.py +105 -0
- cortex/vault/writer.py +80 -0
- pulse8_ai_cortex_knowledge_vault-0.3.0.dist-info/METADATA +266 -0
- pulse8_ai_cortex_knowledge_vault-0.3.0.dist-info/RECORD +45 -0
- pulse8_ai_cortex_knowledge_vault-0.3.0.dist-info/WHEEL +4 -0
- pulse8_ai_cortex_knowledge_vault-0.3.0.dist-info/entry_points.txt +2 -0
- pulse8_ai_cortex_knowledge_vault-0.3.0.dist-info/licenses/LICENSE.md +248 -0
- scripts/__init__.py +0 -0
- scripts/compile.py +60 -0
- scripts/env_check.sh +69 -0
- scripts/lint.py +90 -0
- scripts/reindex.py +43 -0
- scripts/serve.py +36 -0
- scripts/start.sh +61 -0
- scripts/stop.sh +9 -0
cortex/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Cortex — Agent-native knowledge OS built on Markdown files."""
|
cortex/api/__init__.py
ADDED
|
File without changes
|
cortex/api/routes.py
ADDED
|
@@ -0,0 +1,328 @@
|
|
|
1
|
+
"""REST API route handlers for the Cortex vault."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
from typing import Any, Optional
|
|
5
|
+
|
|
6
|
+
from fastapi import APIRouter, HTTPException, Request
|
|
7
|
+
from pydantic import BaseModel
|
|
8
|
+
|
|
9
|
+
from cortex.graph.engine import GraphEngine
|
|
10
|
+
from cortex.log.audit import log_operation
|
|
11
|
+
from cortex.search.qmd import QMDSearch
|
|
12
|
+
from cortex.vault.index import rebuild_index
|
|
13
|
+
from cortex.vault.models import Edge, EdgeType
|
|
14
|
+
from cortex.vault.reader import read_note, resolve_wikilink, scan_vault
|
|
15
|
+
from cortex.vault.writer import write_note
|
|
16
|
+
|
|
17
|
+
router = APIRouter()
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class WriteNoteBody(BaseModel):
|
|
21
|
+
"""Request body for creating or updating a note."""
|
|
22
|
+
content: str
|
|
23
|
+
frontmatter: Optional[dict[str, Any]] = None
|
|
24
|
+
mode: str = "upsert"
|
|
25
|
+
authored_by: str = "human"
|
|
26
|
+
model: Optional[str] = None
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class CreateLinkBody(BaseModel):
|
|
30
|
+
"""Request body for creating a graph edge."""
|
|
31
|
+
source: str
|
|
32
|
+
target: str
|
|
33
|
+
edge_type: str
|
|
34
|
+
metadata: Optional[dict[str, Any]] = None
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class IngestBody(BaseModel):
|
|
38
|
+
"""Request body for ingesting a raw source."""
|
|
39
|
+
content: str
|
|
40
|
+
filename: str
|
|
41
|
+
source_type: str = "text"
|
|
42
|
+
auto_compile: bool = False
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def get_vault_path(request: Request):
|
|
46
|
+
"""Extract the vault path from application state."""
|
|
47
|
+
return request.app.state.vault_path
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def get_graph(request: Request) -> GraphEngine:
|
|
51
|
+
"""Extract the graph engine from application state."""
|
|
52
|
+
return request.app.state.graph
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def get_qmd(request: Request) -> QMDSearch:
|
|
56
|
+
"""Extract the QMD search backend from application state."""
|
|
57
|
+
return request.app.state.qmd
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def get_qmd_debounce(request: Request):
|
|
61
|
+
"""Extract the debounced QMD updater from application state."""
|
|
62
|
+
return request.app.state.qmd_debounce
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
@router.get("/health")
|
|
66
|
+
async def health():
|
|
67
|
+
"""Liveness probe."""
|
|
68
|
+
return {"status": "healthy"}
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
@router.get("/notes/{path:path}")
|
|
72
|
+
async def read_note_endpoint(path: str, request: Request):
|
|
73
|
+
"""Read a note by vault-relative path."""
|
|
74
|
+
vault_path = get_vault_path(request)
|
|
75
|
+
graph = get_graph(request)
|
|
76
|
+
|
|
77
|
+
try:
|
|
78
|
+
note = read_note(vault_path / path, vault_path)
|
|
79
|
+
except FileNotFoundError as exc:
|
|
80
|
+
raise HTTPException(status_code=404, detail=f"Note not found: {path}") from exc
|
|
81
|
+
|
|
82
|
+
edges = await graph.get_edges(note.path)
|
|
83
|
+
edge_dicts = [
|
|
84
|
+
{"source": e.source, "target": e.target, "edge_type": e.edge_type.value}
|
|
85
|
+
for e in edges
|
|
86
|
+
]
|
|
87
|
+
|
|
88
|
+
await log_operation(vault_path, "api", "vault:read", f"Read {path}")
|
|
89
|
+
|
|
90
|
+
return {
|
|
91
|
+
"path": note.path,
|
|
92
|
+
"title": note.title,
|
|
93
|
+
"content": note.content,
|
|
94
|
+
"frontmatter": note.frontmatter,
|
|
95
|
+
"node_type": note.node_type.value,
|
|
96
|
+
"wikilinks": note.wikilinks,
|
|
97
|
+
"tags": note.tags,
|
|
98
|
+
"edges": edge_dicts,
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
@router.put("/notes/{path:path}")
|
|
103
|
+
async def write_note_endpoint(path: str, body: WriteNoteBody, request: Request):
|
|
104
|
+
"""Create or update a note with provenance tracking."""
|
|
105
|
+
vault_path = get_vault_path(request)
|
|
106
|
+
graph = get_graph(request)
|
|
107
|
+
qmd_debounce = get_qmd_debounce(request)
|
|
108
|
+
|
|
109
|
+
try:
|
|
110
|
+
note = write_note(
|
|
111
|
+
path=vault_path / path,
|
|
112
|
+
vault_root=vault_path,
|
|
113
|
+
content=body.content,
|
|
114
|
+
frontmatter=body.frontmatter,
|
|
115
|
+
mode=body.mode,
|
|
116
|
+
authored_by=body.authored_by,
|
|
117
|
+
model=body.model,
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
await graph.add_note_node(note)
|
|
121
|
+
|
|
122
|
+
for tag in note.tags:
|
|
123
|
+
tag_id = f"tag:{tag}"
|
|
124
|
+
if not graph.graph.has_node(tag_id):
|
|
125
|
+
graph.graph.add_node(tag_id, node_type="tag", title=tag)
|
|
126
|
+
await graph.add_edge(
|
|
127
|
+
Edge(source=note.path, target=tag_id, edge_type=EdgeType.TAGGED_WITH)
|
|
128
|
+
)
|
|
129
|
+
|
|
130
|
+
for link in note.wikilinks:
|
|
131
|
+
resolved = resolve_wikilink(link, vault_path)
|
|
132
|
+
if resolved:
|
|
133
|
+
await graph.add_edge(
|
|
134
|
+
Edge(source=note.path, target=resolved, edge_type=EdgeType.LINKS_TO)
|
|
135
|
+
)
|
|
136
|
+
|
|
137
|
+
await rebuild_index(vault_path)
|
|
138
|
+
qmd_debounce.schedule()
|
|
139
|
+
await log_operation(vault_path, body.authored_by, "vault:write", f"Wrote {path}")
|
|
140
|
+
|
|
141
|
+
return {"path": note.path, "title": note.title, "status": "written"}
|
|
142
|
+
|
|
143
|
+
except FileExistsError as exc:
|
|
144
|
+
raise HTTPException(status_code=409, detail=f"Note already exists: {path}") from exc
|
|
145
|
+
except FileNotFoundError as exc:
|
|
146
|
+
raise HTTPException(status_code=404, detail=f"Note not found: {path}") from exc
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
@router.get("/search")
|
|
150
|
+
async def search_endpoint(
|
|
151
|
+
q: str,
|
|
152
|
+
request: Request,
|
|
153
|
+
mode: str | None = None,
|
|
154
|
+
collection: Optional[str] = None,
|
|
155
|
+
top_k: int = 10,
|
|
156
|
+
):
|
|
157
|
+
"""Search the vault via QMD and return graph-enriched results."""
|
|
158
|
+
from cortex.config import settings
|
|
159
|
+
|
|
160
|
+
vault_path = get_vault_path(request)
|
|
161
|
+
graph = get_graph(request)
|
|
162
|
+
qmd = get_qmd(request)
|
|
163
|
+
effective_mode = mode or settings.qmd_search_mode
|
|
164
|
+
|
|
165
|
+
raw_results = await qmd.search(q, mode=effective_mode, collection=collection, top_k=top_k)
|
|
166
|
+
|
|
167
|
+
paths = [r.get("path", "") for r in raw_results]
|
|
168
|
+
edges_by_path = await graph.get_edges_batch(paths)
|
|
169
|
+
|
|
170
|
+
enriched = []
|
|
171
|
+
for r in raw_results:
|
|
172
|
+
path = r.get("path", "")
|
|
173
|
+
edges = edges_by_path.get(path, [])
|
|
174
|
+
edge_dicts = [
|
|
175
|
+
{"source": e.source, "target": e.target, "edge_type": e.edge_type.value}
|
|
176
|
+
for e in edges
|
|
177
|
+
]
|
|
178
|
+
enriched.append({**r, "edges": edge_dicts})
|
|
179
|
+
|
|
180
|
+
await log_operation(vault_path, "api", "vault:search", f"Search: {q}")
|
|
181
|
+
return {"query": q, "mode": mode, "results": enriched}
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
@router.post("/links")
|
|
185
|
+
async def create_link_endpoint(body: CreateLinkBody, request: Request):
|
|
186
|
+
"""Create a typed edge in the knowledge graph."""
|
|
187
|
+
vault_path = get_vault_path(request)
|
|
188
|
+
graph = get_graph(request)
|
|
189
|
+
|
|
190
|
+
edge = Edge(
|
|
191
|
+
source=body.source,
|
|
192
|
+
target=body.target,
|
|
193
|
+
edge_type=EdgeType(body.edge_type),
|
|
194
|
+
metadata=body.metadata or {},
|
|
195
|
+
)
|
|
196
|
+
await graph.add_edge(edge)
|
|
197
|
+
await log_operation(
|
|
198
|
+
vault_path, "api", "vault:link",
|
|
199
|
+
f"Created {body.edge_type}: {body.source} → {body.target}",
|
|
200
|
+
)
|
|
201
|
+
return {"status": "created", "source": body.source, "target": body.target, "edge_type": body.edge_type}
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
@router.get("/links")
|
|
205
|
+
async def query_links_endpoint(
|
|
206
|
+
source: str,
|
|
207
|
+
request: Request,
|
|
208
|
+
edge_type: Optional[str] = None,
|
|
209
|
+
):
|
|
210
|
+
"""Query edges from a source node."""
|
|
211
|
+
graph = get_graph(request)
|
|
212
|
+
edge_types = [EdgeType(edge_type)] if edge_type else None
|
|
213
|
+
edges = await graph.get_edges(source, edge_types=edge_types)
|
|
214
|
+
return {
|
|
215
|
+
"source": source,
|
|
216
|
+
"edges": [
|
|
217
|
+
{
|
|
218
|
+
"source": e.source,
|
|
219
|
+
"target": e.target,
|
|
220
|
+
"edge_type": e.edge_type.value,
|
|
221
|
+
"metadata": e.metadata,
|
|
222
|
+
}
|
|
223
|
+
for e in edges
|
|
224
|
+
],
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
|
|
228
|
+
@router.delete("/links/{source:path}")
|
|
229
|
+
async def delete_link_endpoint(
|
|
230
|
+
source: str,
|
|
231
|
+
request: Request,
|
|
232
|
+
target: str = "",
|
|
233
|
+
edge_type: str = "",
|
|
234
|
+
):
|
|
235
|
+
"""Delete a typed edge from the knowledge graph."""
|
|
236
|
+
vault_path = get_vault_path(request)
|
|
237
|
+
graph = get_graph(request)
|
|
238
|
+
|
|
239
|
+
if not target or not edge_type:
|
|
240
|
+
raise HTTPException(status_code=400, detail="target and edge_type query params required")
|
|
241
|
+
|
|
242
|
+
await graph.remove_edge(source, target, EdgeType(edge_type))
|
|
243
|
+
await log_operation(
|
|
244
|
+
vault_path, "api", "vault:link",
|
|
245
|
+
f"Deleted {edge_type}: {source} → {target}",
|
|
246
|
+
)
|
|
247
|
+
return {"status": "deleted", "source": source, "target": target, "edge_type": edge_type}
|
|
248
|
+
|
|
249
|
+
|
|
250
|
+
@router.get("/graph/stats")
|
|
251
|
+
async def graph_stats_endpoint(request: Request):
|
|
252
|
+
"""Return graph node/edge statistics."""
|
|
253
|
+
graph = get_graph(request)
|
|
254
|
+
return await graph.get_stats()
|
|
255
|
+
|
|
256
|
+
|
|
257
|
+
@router.post("/ingest")
|
|
258
|
+
async def ingest_endpoint(body: IngestBody, request: Request):
|
|
259
|
+
"""Ingest a raw source file and optionally compile it."""
|
|
260
|
+
vault_path = get_vault_path(request)
|
|
261
|
+
qmd_debounce = get_qmd_debounce(request)
|
|
262
|
+
|
|
263
|
+
raw_path = vault_path / "raw" / body.filename
|
|
264
|
+
raw_path.parent.mkdir(parents=True, exist_ok=True)
|
|
265
|
+
raw_path.write_text(body.content)
|
|
266
|
+
|
|
267
|
+
rel_path = f"raw/{body.filename}"
|
|
268
|
+
await log_operation(vault_path, "api", "vault:ingest", f"Ingested {rel_path}")
|
|
269
|
+
|
|
270
|
+
result: dict[str, Any] = {
|
|
271
|
+
"path": rel_path,
|
|
272
|
+
"status": "ingested",
|
|
273
|
+
"compiled": False,
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
if body.auto_compile:
|
|
277
|
+
from cortex.compiler.compiler import KnowledgeCompiler
|
|
278
|
+
|
|
279
|
+
compiler = KnowledgeCompiler(vault_path)
|
|
280
|
+
created = await compiler.ingest_source(raw_path)
|
|
281
|
+
result["compiled"] = True
|
|
282
|
+
result["wiki_articles"] = [str(p.relative_to(vault_path)) for p in created]
|
|
283
|
+
|
|
284
|
+
qmd_debounce.schedule()
|
|
285
|
+
|
|
286
|
+
return result
|
|
287
|
+
|
|
288
|
+
|
|
289
|
+
@router.post("/compile")
|
|
290
|
+
async def compile_endpoint(request: Request):
|
|
291
|
+
"""Compile all unprocessed raw sources into wiki articles."""
|
|
292
|
+
vault_path = get_vault_path(request)
|
|
293
|
+
qmd_debounce = get_qmd_debounce(request)
|
|
294
|
+
|
|
295
|
+
existing_sources: set[str] = set()
|
|
296
|
+
for note in scan_vault(vault_path):
|
|
297
|
+
sp = note.frontmatter.get("source_path")
|
|
298
|
+
if sp:
|
|
299
|
+
existing_sources.add(sp)
|
|
300
|
+
|
|
301
|
+
raw_dir = vault_path / "raw"
|
|
302
|
+
if not raw_dir.exists():
|
|
303
|
+
return {"status": "no raw directory", "compiled": 0}
|
|
304
|
+
|
|
305
|
+
from cortex.compiler.compiler import KnowledgeCompiler
|
|
306
|
+
|
|
307
|
+
compiler = KnowledgeCompiler(vault_path)
|
|
308
|
+
compiled_count = 0
|
|
309
|
+
all_created: list[str] = []
|
|
310
|
+
|
|
311
|
+
for raw_file in sorted(raw_dir.iterdir()):
|
|
312
|
+
if raw_file.is_dir():
|
|
313
|
+
continue
|
|
314
|
+
rel = str(raw_file.relative_to(vault_path))
|
|
315
|
+
if rel not in existing_sources:
|
|
316
|
+
created = await compiler.ingest_source(raw_file)
|
|
317
|
+
compiled_count += 1
|
|
318
|
+
all_created.extend(str(p.relative_to(vault_path)) for p in created)
|
|
319
|
+
|
|
320
|
+
await rebuild_index(vault_path)
|
|
321
|
+
qmd_debounce.schedule()
|
|
322
|
+
await log_operation(vault_path, "api", "vault:compile", f"Compiled {compiled_count} sources")
|
|
323
|
+
|
|
324
|
+
return {
|
|
325
|
+
"status": "compiled",
|
|
326
|
+
"sources_compiled": compiled_count,
|
|
327
|
+
"articles_created": all_created,
|
|
328
|
+
}
|
|
File without changes
|
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
"""LLM-powered knowledge compiler for raw source ingestion."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import json
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
from openai import AsyncOpenAI
|
|
8
|
+
|
|
9
|
+
from cortex.compiler.prompts import COMPILE_SYSTEM_PROMPT, INGEST_SYSTEM_PROMPT
|
|
10
|
+
from cortex.config import settings
|
|
11
|
+
from cortex.vault.reader import read_note
|
|
12
|
+
from cortex.vault.writer import write_note
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class KnowledgeCompiler:
|
|
16
|
+
"""Compiles raw sources into structured wiki articles using an LLM."""
|
|
17
|
+
|
|
18
|
+
def __init__(self, vault_path: Path) -> None:
|
|
19
|
+
self.vault_path = vault_path
|
|
20
|
+
self.client = AsyncOpenAI(
|
|
21
|
+
api_key=settings.llm_api_key or "unused",
|
|
22
|
+
base_url=settings.llm_base_url,
|
|
23
|
+
)
|
|
24
|
+
self.model = settings.compiler_model
|
|
25
|
+
|
|
26
|
+
async def _chat(self, system: str, user_content: str) -> str:
|
|
27
|
+
"""Send a chat completion request and return the assistant's text."""
|
|
28
|
+
response = await self.client.chat.completions.create(
|
|
29
|
+
model=self.model,
|
|
30
|
+
max_tokens=settings.compiler_max_tokens,
|
|
31
|
+
messages=[
|
|
32
|
+
{"role": "system", "content": system},
|
|
33
|
+
{"role": "user", "content": user_content},
|
|
34
|
+
],
|
|
35
|
+
)
|
|
36
|
+
return response.choices[0].message.content or ""
|
|
37
|
+
|
|
38
|
+
async def ingest_source(self, source_path: Path) -> list[Path]:
|
|
39
|
+
"""Read a raw source, call LLM to produce wiki articles, write them to wiki/."""
|
|
40
|
+
source_content = source_path.read_text()
|
|
41
|
+
relative_source = str(source_path.relative_to(self.vault_path))
|
|
42
|
+
|
|
43
|
+
index_content = self._build_index_context()
|
|
44
|
+
|
|
45
|
+
text = await self._chat(
|
|
46
|
+
INGEST_SYSTEM_PROMPT,
|
|
47
|
+
f"## Raw Source: {relative_source}\n\n"
|
|
48
|
+
f"{source_content}\n\n"
|
|
49
|
+
f"## Existing Wiki Index\n\n{index_content}",
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
articles = self._parse_articles(text)
|
|
53
|
+
|
|
54
|
+
created_paths: list[Path] = []
|
|
55
|
+
for article in articles:
|
|
56
|
+
filename = article["filename"]
|
|
57
|
+
if not filename.endswith(".md"):
|
|
58
|
+
filename += ".md"
|
|
59
|
+
note_path = self.vault_path / "wiki" / filename
|
|
60
|
+
|
|
61
|
+
frontmatter = article.get("frontmatter", {})
|
|
62
|
+
frontmatter["source_path"] = relative_source
|
|
63
|
+
|
|
64
|
+
write_note(
|
|
65
|
+
path=note_path,
|
|
66
|
+
vault_root=self.vault_path,
|
|
67
|
+
content=article["content"],
|
|
68
|
+
frontmatter=frontmatter,
|
|
69
|
+
mode="upsert",
|
|
70
|
+
authored_by=self.model,
|
|
71
|
+
model=self.model,
|
|
72
|
+
)
|
|
73
|
+
created_paths.append(note_path)
|
|
74
|
+
|
|
75
|
+
return created_paths
|
|
76
|
+
|
|
77
|
+
async def compile_cross_references(self, new_paths: list[Path]) -> None:
|
|
78
|
+
"""After new articles are created, identify cross-references and contradictions."""
|
|
79
|
+
new_articles = []
|
|
80
|
+
for p in new_paths:
|
|
81
|
+
note = read_note(p, self.vault_path)
|
|
82
|
+
new_articles.append(
|
|
83
|
+
f"### {note.title}\n"
|
|
84
|
+
f"Path: {note.path}\n"
|
|
85
|
+
f"Tags: {', '.join(note.tags)}\n\n"
|
|
86
|
+
f"{note.content[:500]}"
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
index_context = self._build_index_context()
|
|
90
|
+
|
|
91
|
+
text = await self._chat(
|
|
92
|
+
COMPILE_SYSTEM_PROMPT,
|
|
93
|
+
f"## New Articles\n\n{'---'.join(new_articles)}\n\n"
|
|
94
|
+
f"## Existing Wiki Index\n\n{index_context}",
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
updates = self._parse_updates(text)
|
|
98
|
+
await self._apply_updates(updates)
|
|
99
|
+
|
|
100
|
+
def _build_index_context(self) -> str:
|
|
101
|
+
"""Build a summary of existing wiki articles for LLM context."""
|
|
102
|
+
wiki_dir = self.vault_path / "wiki"
|
|
103
|
+
if not wiki_dir.exists():
|
|
104
|
+
return "No existing articles."
|
|
105
|
+
lines = []
|
|
106
|
+
for md_file in sorted(wiki_dir.rglob("*.md")):
|
|
107
|
+
note = read_note(md_file, self.vault_path)
|
|
108
|
+
tags = ", ".join(note.tags) if note.tags else "none"
|
|
109
|
+
lines.append(f"- [{note.title}]({note.path}) — tags: {tags}")
|
|
110
|
+
return "\n".join(lines) if lines else "No existing articles."
|
|
111
|
+
|
|
112
|
+
def _parse_articles(self, text: str) -> list[dict]:
|
|
113
|
+
"""Parse LLM response into article dicts, handling code fences."""
|
|
114
|
+
cleaned = text.strip()
|
|
115
|
+
if cleaned.startswith("```"):
|
|
116
|
+
cleaned = cleaned.split("\n", 1)[1]
|
|
117
|
+
if cleaned.endswith("```"):
|
|
118
|
+
cleaned = cleaned.rsplit("```", 1)[0]
|
|
119
|
+
try:
|
|
120
|
+
return json.loads(cleaned)
|
|
121
|
+
except json.JSONDecodeError:
|
|
122
|
+
return []
|
|
123
|
+
|
|
124
|
+
def _parse_updates(self, text: str) -> list[dict]:
|
|
125
|
+
"""Parse cross-reference updates from LLM response."""
|
|
126
|
+
cleaned = text.strip()
|
|
127
|
+
if cleaned.startswith("```"):
|
|
128
|
+
cleaned = cleaned.split("\n", 1)[1]
|
|
129
|
+
if cleaned.endswith("```"):
|
|
130
|
+
cleaned = cleaned.rsplit("```", 1)[0]
|
|
131
|
+
try:
|
|
132
|
+
return json.loads(cleaned)
|
|
133
|
+
except json.JSONDecodeError:
|
|
134
|
+
return []
|
|
135
|
+
|
|
136
|
+
async def _apply_updates(self, updates: list[dict]) -> None:
|
|
137
|
+
"""Apply cross-reference updates to existing articles."""
|
|
138
|
+
for update in updates:
|
|
139
|
+
path = self.vault_path / update["path"]
|
|
140
|
+
if not path.exists():
|
|
141
|
+
continue
|
|
142
|
+
action = update.get("action", "")
|
|
143
|
+
details = update.get("details", "")
|
|
144
|
+
if action == "add_link":
|
|
145
|
+
content = path.read_text()
|
|
146
|
+
content += f"\n\nSee also: {details}\n"
|
|
147
|
+
path.write_text(content)
|
|
148
|
+
elif action == "add_contradiction":
|
|
149
|
+
content = path.read_text()
|
|
150
|
+
content += f"\n\n> [!contradiction]\n> {details}\n"
|
|
151
|
+
path.write_text(content)
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
"""Source type detection and text extraction helpers."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def detect_source_type(path: Path) -> str:
|
|
8
|
+
"""Detect the type of a raw source file based on its extension."""
|
|
9
|
+
ext = path.suffix.lower()
|
|
10
|
+
type_map = {
|
|
11
|
+
".pdf": "pdf",
|
|
12
|
+
".md": "markdown",
|
|
13
|
+
".markdown": "markdown",
|
|
14
|
+
".url": "url",
|
|
15
|
+
}
|
|
16
|
+
return type_map.get(ext, "text")
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
async def extract_text_from_pdf(path: Path) -> str:
|
|
20
|
+
"""Extract text from a PDF file using pdftotext."""
|
|
21
|
+
import asyncio
|
|
22
|
+
|
|
23
|
+
proc = await asyncio.create_subprocess_exec(
|
|
24
|
+
"pdftotext", str(path), "-",
|
|
25
|
+
stdout=asyncio.subprocess.PIPE,
|
|
26
|
+
stderr=asyncio.subprocess.PIPE,
|
|
27
|
+
)
|
|
28
|
+
stdout, stderr = await proc.communicate()
|
|
29
|
+
if proc.returncode != 0:
|
|
30
|
+
raise RuntimeError(f"pdftotext error: {stderr.decode()}")
|
|
31
|
+
return stdout.decode()
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
async def extract_text_from_url(url: str) -> str:
|
|
35
|
+
"""Fetch a URL and return its text content."""
|
|
36
|
+
import httpx
|
|
37
|
+
|
|
38
|
+
async with httpx.AsyncClient() as client:
|
|
39
|
+
response = await client.get(url, follow_redirects=True)
|
|
40
|
+
response.raise_for_status()
|
|
41
|
+
return response.text
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
"""System prompts for the knowledge compiler LLM."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
INGEST_SYSTEM_PROMPT = """You are a knowledge compiler for a Markdown wiki called Cortex.
|
|
5
|
+
|
|
6
|
+
Given a raw source document, you must:
|
|
7
|
+
1. Read the source carefully and identify key entities, concepts, claims, and relationships.
|
|
8
|
+
2. Produce one or more structured Markdown wiki articles.
|
|
9
|
+
3. Each article must have YAML frontmatter with: title, tags, authored_by (your model name), created_at, source_path (path to the raw source).
|
|
10
|
+
4. Use [[wikilinks]] to cross-reference other concepts. Link generously.
|
|
11
|
+
5. Flag any claims that might contradict existing knowledge with > [!contradiction] callouts.
|
|
12
|
+
6. Write clearly and concisely. The wiki is for both humans and LLMs to read.
|
|
13
|
+
|
|
14
|
+
Output format: return a JSON array of objects, each with:
|
|
15
|
+
- "filename": suggested filename (kebab-case, no extension)
|
|
16
|
+
- "frontmatter": YAML frontmatter as a dict
|
|
17
|
+
- "content": Markdown body content
|
|
18
|
+
|
|
19
|
+
Do NOT include the raw source text verbatim. Synthesize and structure it."""
|
|
20
|
+
|
|
21
|
+
COMPILE_SYSTEM_PROMPT = """You are maintaining a knowledge wiki called Cortex.
|
|
22
|
+
|
|
23
|
+
You will receive:
|
|
24
|
+
1. A NEW article that was just created from a raw source.
|
|
25
|
+
2. A list of EXISTING wiki articles (title + path + tags) from the index.
|
|
26
|
+
|
|
27
|
+
Your job:
|
|
28
|
+
1. Identify which existing articles should be updated with cross-references to the new article.
|
|
29
|
+
2. Identify if any existing claims are contradicted by the new article.
|
|
30
|
+
3. For each article to update, output the specific changes needed.
|
|
31
|
+
|
|
32
|
+
Output format: return a JSON array of objects, each with:
|
|
33
|
+
- "path": path to the existing article to update
|
|
34
|
+
- "action": "add_link" | "add_contradiction" | "update_content"
|
|
35
|
+
- "details": description of what to add or change"""
|
cortex/config.py
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
"""Cortex application settings via pydantic-settings."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
from pydantic_settings import BaseSettings
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class CortexSettings(BaseSettings):
|
|
10
|
+
"""Cortex configuration loaded from environment variables with CORTEX_ prefix."""
|
|
11
|
+
vault_path: Path = Path("./vault")
|
|
12
|
+
|
|
13
|
+
qmd_bin: str = "qmd"
|
|
14
|
+
qmd_url: str = ""
|
|
15
|
+
qmd_search_mode: str = "keyword"
|
|
16
|
+
|
|
17
|
+
llm_api_key: str = ""
|
|
18
|
+
llm_base_url: str = "https://openrouter.ai/api/v1"
|
|
19
|
+
compiler_model: str = "anthropic/claude-sonnet-4"
|
|
20
|
+
compiler_max_tokens: int = 4096
|
|
21
|
+
|
|
22
|
+
qmd_refresh_interval_seconds: int = 900
|
|
23
|
+
|
|
24
|
+
mcp_transport: str = "stdio"
|
|
25
|
+
mcp_sse_host: str = "0.0.0.0"
|
|
26
|
+
mcp_sse_port: int = 8420
|
|
27
|
+
|
|
28
|
+
max_context_depth: int = 2
|
|
29
|
+
max_context_notes: int = 8
|
|
30
|
+
|
|
31
|
+
default_author: str = "human"
|
|
32
|
+
|
|
33
|
+
model_config = {"env_prefix": "CORTEX_"}
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
settings = CortexSettings()
|
cortex/graph/__init__.py
ADDED
|
File without changes
|
cortex/graph/builder.py
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
"""Knowledge graph builder — creates nodes and edges from scanned notes."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
from cortex.graph.engine import GraphEngine
|
|
7
|
+
from cortex.vault.models import Edge, EdgeType, NodeType, Note
|
|
8
|
+
from cortex.vault.reader import build_wikilink_index, resolve_wikilink
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
async def build_graph(
|
|
12
|
+
notes: list[Note], graph_path: Path, vault_root: Path
|
|
13
|
+
) -> GraphEngine:
|
|
14
|
+
"""Build or rebuild the knowledge graph from scanned notes.
|
|
15
|
+
|
|
16
|
+
1. Add all notes as nodes
|
|
17
|
+
2. Resolve wikilinks → links_to edges
|
|
18
|
+
3. Extract tags → tagged_with edges to tag nodes
|
|
19
|
+
4. Detect source_path → derived_from edges
|
|
20
|
+
"""
|
|
21
|
+
engine = GraphEngine(graph_path)
|
|
22
|
+
await engine.load()
|
|
23
|
+
|
|
24
|
+
wikilink_index = build_wikilink_index(vault_root)
|
|
25
|
+
|
|
26
|
+
async with engine.batch():
|
|
27
|
+
for note in notes:
|
|
28
|
+
await engine.add_note_node(note)
|
|
29
|
+
|
|
30
|
+
for note in notes:
|
|
31
|
+
for link in note.wikilinks:
|
|
32
|
+
resolved = resolve_wikilink(link, vault_root, _index=wikilink_index)
|
|
33
|
+
if resolved:
|
|
34
|
+
await engine.add_edge(
|
|
35
|
+
Edge(
|
|
36
|
+
source=note.path,
|
|
37
|
+
target=resolved,
|
|
38
|
+
edge_type=EdgeType.LINKS_TO,
|
|
39
|
+
)
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
for tag in note.tags:
|
|
43
|
+
tag_id = f"tag:{tag}"
|
|
44
|
+
if not engine.graph.has_node(tag_id):
|
|
45
|
+
engine.graph.add_node(tag_id, node_type=NodeType.TAG.value, title=tag)
|
|
46
|
+
await engine.add_edge(
|
|
47
|
+
Edge(
|
|
48
|
+
source=note.path,
|
|
49
|
+
target=tag_id,
|
|
50
|
+
edge_type=EdgeType.TAGGED_WITH,
|
|
51
|
+
)
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
source_path = note.frontmatter.get("source_path")
|
|
55
|
+
if source_path:
|
|
56
|
+
if not engine.graph.has_node(source_path):
|
|
57
|
+
engine.graph.add_node(
|
|
58
|
+
source_path,
|
|
59
|
+
node_type=NodeType.RAW_SOURCE.value,
|
|
60
|
+
title=source_path,
|
|
61
|
+
)
|
|
62
|
+
await engine.add_edge(
|
|
63
|
+
Edge(
|
|
64
|
+
source=note.path,
|
|
65
|
+
target=source_path,
|
|
66
|
+
edge_type=EdgeType.DERIVED_FROM,
|
|
67
|
+
)
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
return engine
|