memograph 0.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- memograph/__init__.py +5 -0
- memograph/cli.py +192 -0
- memograph/core/__init__.py +5 -0
- memograph/core/assistant.py +120 -0
- memograph/core/compressor.py +21 -0
- memograph/core/enums.py +8 -0
- memograph/core/graph.py +74 -0
- memograph/core/indexer.py +173 -0
- memograph/core/kernel.py +128 -0
- memograph/core/node.py +35 -0
- memograph/core/parser.py +58 -0
- memograph/core/retriever.py +81 -0
- memograph/py.typed +0 -0
- memograph-0.0.2.dist-info/METADATA +330 -0
- memograph-0.0.2.dist-info/RECORD +19 -0
- memograph-0.0.2.dist-info/WHEEL +5 -0
- memograph-0.0.2.dist-info/entry_points.txt +2 -0
- memograph-0.0.2.dist-info/licenses/LICENSE +21 -0
- memograph-0.0.2.dist-info/top_level.txt +1 -0
memograph/__init__.py
ADDED
memograph/cli.py
ADDED
|
@@ -0,0 +1,192 @@
|
|
|
1
|
+
import argparse
|
|
2
|
+
import json
|
|
3
|
+
import os
|
|
4
|
+
from urllib import error, request
|
|
5
|
+
|
|
6
|
+
from .core.assistant import build_answer_prompt, retrieve_cited_context, run_answer
|
|
7
|
+
from .core.enums import MemoryType
|
|
8
|
+
from .core.kernel import MemoryKernel
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def _run_ask(kernel: MemoryKernel, args) -> None:
|
|
12
|
+
if args.provider == "claude" and not os.environ.get("ANTHROPIC_API_KEY"):
|
|
13
|
+
raise RuntimeError("Set ANTHROPIC_API_KEY or use --provider ollama.")
|
|
14
|
+
|
|
15
|
+
def ask_once(query_text: str) -> None:
|
|
16
|
+
context, sources = retrieve_cited_context(
|
|
17
|
+
kernel=kernel,
|
|
18
|
+
query=query_text,
|
|
19
|
+
tags=args.tags,
|
|
20
|
+
depth=args.depth,
|
|
21
|
+
top_k=args.top_k,
|
|
22
|
+
token_limit=args.token_limit,
|
|
23
|
+
)
|
|
24
|
+
prompt = build_answer_prompt(context=context, query=query_text)
|
|
25
|
+
try:
|
|
26
|
+
answer = run_answer(
|
|
27
|
+
provider=args.provider,
|
|
28
|
+
prompt=prompt,
|
|
29
|
+
model=args.model,
|
|
30
|
+
max_tokens=args.max_tokens,
|
|
31
|
+
temperature=args.temperature,
|
|
32
|
+
base_url=args.base_url,
|
|
33
|
+
)
|
|
34
|
+
except Exception as exc:
|
|
35
|
+
print(f"LLM error: {exc}")
|
|
36
|
+
return
|
|
37
|
+
|
|
38
|
+
if args.show_context:
|
|
39
|
+
print("=== Retrieved Context ===")
|
|
40
|
+
print(context)
|
|
41
|
+
print()
|
|
42
|
+
|
|
43
|
+
print("=== Answer ===")
|
|
44
|
+
print(answer)
|
|
45
|
+
|
|
46
|
+
if sources and not args.no_citations:
|
|
47
|
+
print("\n=== Sources ===")
|
|
48
|
+
for src in sources:
|
|
49
|
+
tags = ", ".join(src.tags) if src.tags else "-"
|
|
50
|
+
print(
|
|
51
|
+
f"[{src.source_id}] {src.title} (id={src.node_id}, type={src.memory_type}, tags={tags})"
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
if args.chat:
|
|
55
|
+
print("Interactive chat mode. Type 'exit' or 'quit' to stop.")
|
|
56
|
+
while True:
|
|
57
|
+
query_text = input("you> ").strip()
|
|
58
|
+
if not query_text:
|
|
59
|
+
continue
|
|
60
|
+
if query_text.lower() in {"exit", "quit"}:
|
|
61
|
+
print("bye")
|
|
62
|
+
break
|
|
63
|
+
ask_once(query_text)
|
|
64
|
+
print()
|
|
65
|
+
return
|
|
66
|
+
|
|
67
|
+
if not args.query:
|
|
68
|
+
raise RuntimeError("Provide --query for non-chat mode.")
|
|
69
|
+
ask_once(args.query)
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def _run_doctor(args) -> None:
|
|
73
|
+
print("=== Mnemo Doctor ===")
|
|
74
|
+
vault = MemoryKernel(args.vault)
|
|
75
|
+
stats = vault.ingest(force=False)
|
|
76
|
+
print(f"vault: {vault.vault_path}")
|
|
77
|
+
print(f"indexed: {stats['indexed']} | skipped: {stats['skipped']} | total: {stats['total']}")
|
|
78
|
+
|
|
79
|
+
anth_key = os.environ.get("ANTHROPIC_API_KEY")
|
|
80
|
+
print(f"claude_api_key: {'present' if anth_key else 'missing'}")
|
|
81
|
+
|
|
82
|
+
ollama_url = (
|
|
83
|
+
args.ollama_url or os.environ.get("OLLAMA_BASE_URL") or "http://localhost:11434"
|
|
84
|
+
).rstrip("/")
|
|
85
|
+
tags_url = f"{ollama_url}/api/tags"
|
|
86
|
+
try:
|
|
87
|
+
req = request.Request(tags_url, method="GET")
|
|
88
|
+
with request.urlopen(req, timeout=5) as resp:
|
|
89
|
+
payload = json.loads(resp.read().decode("utf-8"))
|
|
90
|
+
models = payload.get("models", [])
|
|
91
|
+
print(f"ollama: reachable ({len(models)} models) @ {ollama_url}")
|
|
92
|
+
except error.URLError as exc:
|
|
93
|
+
print(f"ollama: unreachable @ {ollama_url} ({exc})")
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def main():
|
|
97
|
+
parser = argparse.ArgumentParser(description="Mnemo CLI")
|
|
98
|
+
parser.add_argument("--vault", default="~/my-vault", help="Path to memory vault")
|
|
99
|
+
|
|
100
|
+
subparsers = parser.add_subparsers(dest="command", required=True)
|
|
101
|
+
|
|
102
|
+
ingest_parser = subparsers.add_parser("ingest", help="Index all markdown memories")
|
|
103
|
+
ingest_parser.add_argument("--force", action="store_true", help="Reindex all files")
|
|
104
|
+
|
|
105
|
+
remember_parser = subparsers.add_parser("remember", help="Create a new memory note")
|
|
106
|
+
remember_parser.add_argument("--title", required=True, help="Memory title")
|
|
107
|
+
remember_parser.add_argument("--content", required=True, help="Memory content")
|
|
108
|
+
remember_parser.add_argument(
|
|
109
|
+
"--type",
|
|
110
|
+
choices=[member.value for member in MemoryType],
|
|
111
|
+
default=MemoryType.FACT.value,
|
|
112
|
+
help="Memory type",
|
|
113
|
+
)
|
|
114
|
+
remember_parser.add_argument(
|
|
115
|
+
"--tags",
|
|
116
|
+
nargs="*",
|
|
117
|
+
default=[],
|
|
118
|
+
help="Tags (with or without # prefix)",
|
|
119
|
+
)
|
|
120
|
+
|
|
121
|
+
context_parser = subparsers.add_parser("context", help="Build context window for a query")
|
|
122
|
+
context_parser.add_argument("--query", required=True, help="Query text")
|
|
123
|
+
context_parser.add_argument("--tags", nargs="*", default=[], help="Optional tag filter")
|
|
124
|
+
context_parser.add_argument("--depth", type=int, default=2, help="Graph traversal depth")
|
|
125
|
+
context_parser.add_argument("--top-k", type=int, default=8, help="Max memories to return")
|
|
126
|
+
context_parser.add_argument(
|
|
127
|
+
"--token-limit", type=int, default=2048, help="Compression token limit"
|
|
128
|
+
)
|
|
129
|
+
|
|
130
|
+
ask_parser = subparsers.add_parser("ask", help="Ask an LLM with retrieved memory context")
|
|
131
|
+
ask_parser.add_argument(
|
|
132
|
+
"--provider", choices=["claude", "ollama"], default="ollama", help="LLM provider"
|
|
133
|
+
)
|
|
134
|
+
ask_parser.add_argument("--query", help="Question text (omit in --chat mode)")
|
|
135
|
+
ask_parser.add_argument("--chat", action="store_true", help="Interactive chat mode")
|
|
136
|
+
ask_parser.add_argument("--tags", nargs="*", default=[], help="Optional tag filter")
|
|
137
|
+
ask_parser.add_argument("--depth", type=int, default=2, help="Graph traversal depth")
|
|
138
|
+
ask_parser.add_argument("--top-k", type=int, default=8, help="Max memories to retrieve")
|
|
139
|
+
ask_parser.add_argument(
|
|
140
|
+
"--token-limit", type=int, default=2048, help="Context compression budget"
|
|
141
|
+
)
|
|
142
|
+
ask_parser.add_argument("--model", default=None, help="Provider-specific model")
|
|
143
|
+
ask_parser.add_argument("--base-url", default=None, help="Provider base URL override")
|
|
144
|
+
ask_parser.add_argument("--max-tokens", type=int, default=1024, help="Max generated tokens")
|
|
145
|
+
ask_parser.add_argument("--temperature", type=float, default=0.1, help="Sampling temperature")
|
|
146
|
+
ask_parser.add_argument("--show-context", action="store_true", help="Print context sent to LLM")
|
|
147
|
+
ask_parser.add_argument("--no-citations", action="store_true", help="Hide source list output")
|
|
148
|
+
|
|
149
|
+
doctor_parser = subparsers.add_parser(
|
|
150
|
+
"doctor", help="Run environment and integration diagnostics"
|
|
151
|
+
)
|
|
152
|
+
doctor_parser.add_argument("--ollama-url", default=None, help="Override Ollama base URL")
|
|
153
|
+
|
|
154
|
+
args = parser.parse_args()
|
|
155
|
+
kernel = MemoryKernel(args.vault)
|
|
156
|
+
|
|
157
|
+
if args.command == "ingest":
|
|
158
|
+
print(kernel.ingest(force=args.force))
|
|
159
|
+
return
|
|
160
|
+
|
|
161
|
+
if args.command == "remember":
|
|
162
|
+
path = kernel.remember(
|
|
163
|
+
title=args.title,
|
|
164
|
+
content=args.content,
|
|
165
|
+
memory_type=MemoryType(args.type),
|
|
166
|
+
tags=args.tags,
|
|
167
|
+
)
|
|
168
|
+
print(f"Created memory: {path}")
|
|
169
|
+
return
|
|
170
|
+
|
|
171
|
+
if args.command == "context":
|
|
172
|
+
context = kernel.context_window(
|
|
173
|
+
query=args.query,
|
|
174
|
+
tags=args.tags,
|
|
175
|
+
depth=args.depth,
|
|
176
|
+
top_k=args.top_k,
|
|
177
|
+
token_limit=args.token_limit,
|
|
178
|
+
)
|
|
179
|
+
print(context)
|
|
180
|
+
return
|
|
181
|
+
|
|
182
|
+
if args.command == "ask":
|
|
183
|
+
_run_ask(kernel, args)
|
|
184
|
+
return
|
|
185
|
+
|
|
186
|
+
if args.command == "doctor":
|
|
187
|
+
_run_doctor(args)
|
|
188
|
+
return
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
if __name__ == "__main__":
|
|
192
|
+
main()
|
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
|
|
5
|
+
from .kernel import MemoryKernel
|
|
6
|
+
from .node import MemoryNode
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@dataclass
|
|
10
|
+
class SourceRef:
|
|
11
|
+
source_id: str
|
|
12
|
+
node_id: str
|
|
13
|
+
title: str
|
|
14
|
+
memory_type: str
|
|
15
|
+
tags: list[str]
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def build_cited_context(
|
|
19
|
+
nodes: list[MemoryNode],
|
|
20
|
+
token_limit: int = 2048,
|
|
21
|
+
chars_per_token: float = 3.8,
|
|
22
|
+
) -> tuple[str, list[SourceRef]]:
|
|
23
|
+
char_limit = int(token_limit * chars_per_token)
|
|
24
|
+
sections: list[str] = []
|
|
25
|
+
sources: list[SourceRef] = []
|
|
26
|
+
total = 0
|
|
27
|
+
separator = "\n\n---\n\n"
|
|
28
|
+
|
|
29
|
+
for index, node in enumerate(nodes, start=1):
|
|
30
|
+
sid = f"S{index}"
|
|
31
|
+
tags = ", ".join(node.tags) if node.tags else "-"
|
|
32
|
+
header = f"[{sid}] {node.title} | type={node.memory_type.value} | tags={tags}"
|
|
33
|
+
chunk = f"{header}\n{node.content}"
|
|
34
|
+
|
|
35
|
+
projected = total + len(chunk) + (len(separator) if sections else 0)
|
|
36
|
+
if projected > char_limit:
|
|
37
|
+
remaining = char_limit - total
|
|
38
|
+
if remaining > 120:
|
|
39
|
+
truncated = chunk[:remaining].rstrip() + "…"
|
|
40
|
+
sections.append(truncated)
|
|
41
|
+
sources.append(
|
|
42
|
+
SourceRef(
|
|
43
|
+
source_id=sid,
|
|
44
|
+
node_id=node.id,
|
|
45
|
+
title=node.title,
|
|
46
|
+
memory_type=node.memory_type.value,
|
|
47
|
+
tags=node.tags,
|
|
48
|
+
)
|
|
49
|
+
)
|
|
50
|
+
break
|
|
51
|
+
|
|
52
|
+
sections.append(chunk)
|
|
53
|
+
sources.append(
|
|
54
|
+
SourceRef(
|
|
55
|
+
source_id=sid,
|
|
56
|
+
node_id=node.id,
|
|
57
|
+
title=node.title,
|
|
58
|
+
memory_type=node.memory_type.value,
|
|
59
|
+
tags=node.tags,
|
|
60
|
+
)
|
|
61
|
+
)
|
|
62
|
+
total = projected
|
|
63
|
+
|
|
64
|
+
return separator.join(sections), sources
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def build_answer_prompt(context: str, query: str) -> str:
|
|
68
|
+
return (
|
|
69
|
+
"You are a helpful assistant. Use memory context only when relevant. "
|
|
70
|
+
"If context is insufficient, say what is missing. "
|
|
71
|
+
"When you use evidence, cite source markers like [S1], [S2].\n\n"
|
|
72
|
+
f"<memory>\n{context}\n</memory>\n\n"
|
|
73
|
+
f"User question: {query}"
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def run_answer(
|
|
78
|
+
provider: str,
|
|
79
|
+
prompt: str,
|
|
80
|
+
model: str | None = None,
|
|
81
|
+
max_tokens: int = 1024,
|
|
82
|
+
temperature: float = 0.1,
|
|
83
|
+
base_url: str | None = None,
|
|
84
|
+
) -> str:
|
|
85
|
+
if provider == "claude":
|
|
86
|
+
from ..adapters.llm.claude import ClaudeLLMClient, ClaudeLLMConfig
|
|
87
|
+
|
|
88
|
+
client = ClaudeLLMClient(base_url=base_url)
|
|
89
|
+
# Default to a widely available model if not specified, though future models may differ.
|
|
90
|
+
config = ClaudeLLMConfig(
|
|
91
|
+
model=model or "claude-3-5-sonnet-20240620",
|
|
92
|
+
max_tokens=max_tokens,
|
|
93
|
+
temperature=temperature,
|
|
94
|
+
)
|
|
95
|
+
return client.generate(prompt=prompt, config=config)
|
|
96
|
+
|
|
97
|
+
if provider == "ollama":
|
|
98
|
+
from ..adapters.llm.ollama import OllamaLLMClient, OllamaLLMConfig
|
|
99
|
+
|
|
100
|
+
client = OllamaLLMClient(base_url=base_url)
|
|
101
|
+
config = OllamaLLMConfig(
|
|
102
|
+
model=model or "llama3.1:8b",
|
|
103
|
+
max_tokens=max_tokens,
|
|
104
|
+
temperature=temperature,
|
|
105
|
+
)
|
|
106
|
+
return client.generate(prompt=prompt, config=config)
|
|
107
|
+
|
|
108
|
+
raise ValueError(f"Unsupported provider: {provider}")
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def retrieve_cited_context(
|
|
112
|
+
kernel: MemoryKernel,
|
|
113
|
+
query: str,
|
|
114
|
+
tags: list[str] | None = None,
|
|
115
|
+
depth: int = 2,
|
|
116
|
+
top_k: int = 8,
|
|
117
|
+
token_limit: int = 2048,
|
|
118
|
+
) -> tuple[str, list[SourceRef]]:
|
|
119
|
+
nodes = kernel.retrieve_nodes(query=query, tags=tags, depth=depth, top_k=top_k)
|
|
120
|
+
return build_cited_context(nodes=nodes, token_limit=token_limit)
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
# core/compressor.py
|
|
2
|
+
from .node import MemoryNode
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class TokenCompressor:
|
|
6
|
+
def __init__(self, token_limit: int = 4096, chars_per_token: float = 3.8):
|
|
7
|
+
self.char_limit = int(token_limit * chars_per_token)
|
|
8
|
+
|
|
9
|
+
def compress(self, nodes: list[MemoryNode], separator: str = "\n\n---\n\n") -> str:
|
|
10
|
+
parts, total = [], 0
|
|
11
|
+
for node in nodes:
|
|
12
|
+
chunk = f"## {node.title}\n*Type: {node.memory_type.value} | Salience: {node.salience:.2f}*\n\n{node.content}"
|
|
13
|
+
if total + len(chunk) > self.char_limit:
|
|
14
|
+
# Truncate last entry to fit
|
|
15
|
+
remaining = self.char_limit - total
|
|
16
|
+
if remaining > 100:
|
|
17
|
+
parts.append(chunk[:remaining] + "…")
|
|
18
|
+
break
|
|
19
|
+
parts.append(chunk)
|
|
20
|
+
total += len(chunk) + len(separator)
|
|
21
|
+
return separator.join(parts)
|
memograph/core/enums.py
ADDED
memograph/core/graph.py
ADDED
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
# core/graph.py
|
|
2
|
+
from collections import defaultdict, deque
|
|
3
|
+
|
|
4
|
+
from .node import MemoryNode
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class VaultGraph:
|
|
8
|
+
def __init__(self):
|
|
9
|
+
self._nodes: dict[str, MemoryNode] = {}
|
|
10
|
+
self._adjacency: dict[str, set[str]] = defaultdict(set)
|
|
11
|
+
|
|
12
|
+
def add_node(self, node: MemoryNode):
|
|
13
|
+
self._nodes[node.id] = node
|
|
14
|
+
for link in node.links:
|
|
15
|
+
self._adjacency[node.id].add(link)
|
|
16
|
+
|
|
17
|
+
def build_backlinks(self):
|
|
18
|
+
"""Populate backlinks after all nodes are loaded."""
|
|
19
|
+
backlink_map: dict[str, list[str]] = defaultdict(list)
|
|
20
|
+
for node_id, targets in self._adjacency.items():
|
|
21
|
+
for target in targets:
|
|
22
|
+
backlink_map[target].append(node_id)
|
|
23
|
+
for node_id, node in self._nodes.items():
|
|
24
|
+
node.backlinks = backlink_map.get(node_id, [])
|
|
25
|
+
|
|
26
|
+
def get(self, node_id: str) -> MemoryNode | None:
|
|
27
|
+
return self._nodes.get(node_id)
|
|
28
|
+
|
|
29
|
+
def neighbors(self, node_id: str, depth: int = 1) -> list[MemoryNode]:
|
|
30
|
+
"""BFS traversal up to `depth` hops, following both links and backlinks."""
|
|
31
|
+
visited, queue = set(), deque([(node_id, 0)])
|
|
32
|
+
result = []
|
|
33
|
+
while queue:
|
|
34
|
+
curr_id, d = queue.popleft()
|
|
35
|
+
if curr_id in visited or d > depth:
|
|
36
|
+
continue
|
|
37
|
+
visited.add(curr_id)
|
|
38
|
+
if node := self._nodes.get(curr_id):
|
|
39
|
+
if curr_id != node_id:
|
|
40
|
+
result.append(node)
|
|
41
|
+
if d < depth:
|
|
42
|
+
for nxt in self._adjacency[curr_id] | set(node.backlinks):
|
|
43
|
+
queue.append((nxt, d + 1))
|
|
44
|
+
return result
|
|
45
|
+
|
|
46
|
+
def remove_node(self, node_id: str):
|
|
47
|
+
"""Remove a node from the graph (e.g., when a file is deleted)."""
|
|
48
|
+
if node_id in self._nodes:
|
|
49
|
+
# Remove the node itself
|
|
50
|
+
del self._nodes[node_id]
|
|
51
|
+
|
|
52
|
+
# Remove from adjacency list
|
|
53
|
+
if node_id in self._adjacency:
|
|
54
|
+
del self._adjacency[node_id]
|
|
55
|
+
|
|
56
|
+
# Remove any references to this node in other adjacency lists
|
|
57
|
+
for adj_set in self._adjacency.values():
|
|
58
|
+
adj_set.discard(node_id)
|
|
59
|
+
|
|
60
|
+
def all_nodes(self) -> list[MemoryNode]:
|
|
61
|
+
"""Return all nodes in the graph."""
|
|
62
|
+
return list(self._nodes.values())
|
|
63
|
+
|
|
64
|
+
def filter(self, tags=None, memory_type=None, min_salience=0.0) -> list[MemoryNode]:
|
|
65
|
+
results = []
|
|
66
|
+
for node in self._nodes.values():
|
|
67
|
+
if tags and not set(tags).intersection(node.tags):
|
|
68
|
+
continue
|
|
69
|
+
if memory_type and node.memory_type != memory_type:
|
|
70
|
+
continue
|
|
71
|
+
if node.salience < min_salience:
|
|
72
|
+
continue
|
|
73
|
+
results.append(node)
|
|
74
|
+
return sorted(results, key=lambda n: n.salience, reverse=True)
|
|
@@ -0,0 +1,173 @@
|
|
|
1
|
+
# core/indexer.py
|
|
2
|
+
import json
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
from .graph import VaultGraph
|
|
6
|
+
from .parser import parse_file
|
|
7
|
+
|
|
8
|
+
CACHE_FILE = ".memograph_cache.json"
|
|
9
|
+
GRAPH_CACHE_FILE = ".memograph_graph.json"
|
|
10
|
+
EMBEDDINGS_CACHE_FILE = ".memograph_embeddings.json"
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class VaultIndexer:
|
|
14
|
+
def __init__(self, vault_root: Path, embedding_adapter=None):
|
|
15
|
+
self.root = vault_root
|
|
16
|
+
self.cache_path = vault_root / CACHE_FILE
|
|
17
|
+
self.graph_cache_path = vault_root / GRAPH_CACHE_FILE
|
|
18
|
+
self.embeddings_cache_path = vault_root / EMBEDDINGS_CACHE_FILE
|
|
19
|
+
self.embedding_adapter = embedding_adapter
|
|
20
|
+
self._mtime_cache: dict[str, float] = self._load_cache()
|
|
21
|
+
self._embeddings_cache: dict[str, list[float]] = self._load_embeddings_cache()
|
|
22
|
+
|
|
23
|
+
def _load_cache(self) -> dict[str, float]:
|
|
24
|
+
if self.cache_path.exists():
|
|
25
|
+
try:
|
|
26
|
+
return json.loads(self.cache_path.read_text())
|
|
27
|
+
except json.JSONDecodeError:
|
|
28
|
+
return {}
|
|
29
|
+
return {}
|
|
30
|
+
|
|
31
|
+
def _save_cache(self, mtimes: dict[str, float]):
|
|
32
|
+
self.cache_path.write_text(json.dumps(mtimes))
|
|
33
|
+
self._mtime_cache = mtimes
|
|
34
|
+
|
|
35
|
+
def _load_embeddings_cache(self) -> dict[str, list[float]]:
|
|
36
|
+
"""Load cached embeddings from disk."""
|
|
37
|
+
if self.embeddings_cache_path.exists():
|
|
38
|
+
try:
|
|
39
|
+
return json.loads(self.embeddings_cache_path.read_text())
|
|
40
|
+
except json.JSONDecodeError:
|
|
41
|
+
return {}
|
|
42
|
+
return {}
|
|
43
|
+
|
|
44
|
+
def _save_embeddings_cache(self):
|
|
45
|
+
"""Save embeddings cache to disk."""
|
|
46
|
+
self.embeddings_cache_path.write_text(json.dumps(self._embeddings_cache))
|
|
47
|
+
|
|
48
|
+
def _load_graph_from_cache(self, graph: VaultGraph) -> bool:
|
|
49
|
+
"""Load previously indexed graph state from cache. Returns True if successful."""
|
|
50
|
+
if not self.graph_cache_path.exists():
|
|
51
|
+
return False
|
|
52
|
+
|
|
53
|
+
try:
|
|
54
|
+
cache_data = json.loads(self.graph_cache_path.read_text())
|
|
55
|
+
# Reconstruct nodes from cached data
|
|
56
|
+
for node_data in cache_data.get("nodes", []):
|
|
57
|
+
# Import here to avoid circular dependency
|
|
58
|
+
from .enums import MemoryType
|
|
59
|
+
from .node import MemoryNode
|
|
60
|
+
|
|
61
|
+
node = MemoryNode(
|
|
62
|
+
id=node_data["id"],
|
|
63
|
+
title=node_data["title"],
|
|
64
|
+
content=node_data["content"],
|
|
65
|
+
memory_type=MemoryType[node_data["memory_type"]],
|
|
66
|
+
tags=node_data["tags"],
|
|
67
|
+
links=node_data["links"],
|
|
68
|
+
backlinks=node_data["backlinks"],
|
|
69
|
+
salience=node_data["salience"],
|
|
70
|
+
access_count=node_data["access_count"],
|
|
71
|
+
source_path=node_data.get("source_path"),
|
|
72
|
+
)
|
|
73
|
+
# Restore embedding if available
|
|
74
|
+
if node.id in self._embeddings_cache:
|
|
75
|
+
node.embedding = self._embeddings_cache[node.id]
|
|
76
|
+
graph.add_node(node)
|
|
77
|
+
return True
|
|
78
|
+
except (json.JSONDecodeError, KeyError, ValueError):
|
|
79
|
+
return False
|
|
80
|
+
|
|
81
|
+
def _save_graph_to_cache(self, graph: VaultGraph):
|
|
82
|
+
"""Save graph state to cache for faster subsequent loads."""
|
|
83
|
+
nodes_data = []
|
|
84
|
+
for node in graph.all_nodes():
|
|
85
|
+
nodes_data.append(
|
|
86
|
+
{
|
|
87
|
+
"id": node.id,
|
|
88
|
+
"title": node.title,
|
|
89
|
+
"content": node.content,
|
|
90
|
+
"memory_type": node.memory_type.name,
|
|
91
|
+
"tags": node.tags,
|
|
92
|
+
"links": node.links,
|
|
93
|
+
"backlinks": node.backlinks,
|
|
94
|
+
"salience": node.salience,
|
|
95
|
+
"access_count": node.access_count,
|
|
96
|
+
"source_path": node.source_path,
|
|
97
|
+
}
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
cache_data = {"nodes": nodes_data}
|
|
101
|
+
self.graph_cache_path.write_text(json.dumps(cache_data, indent=2))
|
|
102
|
+
|
|
103
|
+
def index(self, graph: VaultGraph, force=False) -> tuple[int, int]:
|
|
104
|
+
"""Returns (indexed, skipped) counts."""
|
|
105
|
+
new_mtimes = {}
|
|
106
|
+
indexed = skipped = 0
|
|
107
|
+
|
|
108
|
+
if not self.root.exists():
|
|
109
|
+
self.root.mkdir(parents=True, exist_ok=True)
|
|
110
|
+
|
|
111
|
+
# Load existing graph from cache if not forcing rebuild
|
|
112
|
+
if not force and self._load_graph_from_cache(graph):
|
|
113
|
+
# Graph loaded from cache, now check for changes
|
|
114
|
+
current_files = set()
|
|
115
|
+
|
|
116
|
+
for md_file in self.root.rglob("*.md"):
|
|
117
|
+
if md_file.name in [CACHE_FILE, GRAPH_CACHE_FILE, EMBEDDINGS_CACHE_FILE]:
|
|
118
|
+
continue
|
|
119
|
+
|
|
120
|
+
rel = md_file.relative_to(self.root).as_posix()
|
|
121
|
+
current_files.add(rel)
|
|
122
|
+
mtime = md_file.stat().st_mtime
|
|
123
|
+
new_mtimes[rel] = mtime
|
|
124
|
+
|
|
125
|
+
unchanged = self._mtime_cache.get(rel) == mtime
|
|
126
|
+
if unchanged:
|
|
127
|
+
skipped += 1
|
|
128
|
+
else:
|
|
129
|
+
# File is new or modified - parse and update
|
|
130
|
+
node = parse_file(md_file, self.root)
|
|
131
|
+
self._generate_and_cache_embedding(node)
|
|
132
|
+
graph.add_node(node)
|
|
133
|
+
indexed += 1
|
|
134
|
+
|
|
135
|
+
# Remove nodes for deleted files
|
|
136
|
+
cached_files = set(self._mtime_cache.keys())
|
|
137
|
+
deleted_files = cached_files - current_files
|
|
138
|
+
for deleted_rel in deleted_files:
|
|
139
|
+
# Extract node ID from relative path
|
|
140
|
+
node_id = Path(deleted_rel).stem
|
|
141
|
+
if graph.get(node_id):
|
|
142
|
+
graph.remove_node(node_id)
|
|
143
|
+
else:
|
|
144
|
+
# Force rebuild or no cache - parse all files
|
|
145
|
+
for md_file in self.root.rglob("*.md"):
|
|
146
|
+
if md_file.name in [CACHE_FILE, GRAPH_CACHE_FILE, EMBEDDINGS_CACHE_FILE]:
|
|
147
|
+
continue
|
|
148
|
+
|
|
149
|
+
rel = md_file.relative_to(self.root).as_posix()
|
|
150
|
+
mtime = md_file.stat().st_mtime
|
|
151
|
+
new_mtimes[rel] = mtime
|
|
152
|
+
|
|
153
|
+
node = parse_file(md_file, self.root)
|
|
154
|
+
self._generate_and_cache_embedding(node)
|
|
155
|
+
graph.add_node(node)
|
|
156
|
+
indexed += 1
|
|
157
|
+
|
|
158
|
+
graph.build_backlinks()
|
|
159
|
+
self._save_cache(new_mtimes)
|
|
160
|
+
self._save_graph_to_cache(graph)
|
|
161
|
+
self._save_embeddings_cache()
|
|
162
|
+
return indexed, skipped
|
|
163
|
+
|
|
164
|
+
def _generate_and_cache_embedding(self, node):
|
|
165
|
+
"""Generate and cache embedding for a node if adapter is available."""
|
|
166
|
+
if self.embedding_adapter:
|
|
167
|
+
# Check if we already have a cached embedding
|
|
168
|
+
if node.id in self._embeddings_cache:
|
|
169
|
+
node.embedding = self._embeddings_cache[node.id]
|
|
170
|
+
else:
|
|
171
|
+
# Generate new embedding
|
|
172
|
+
node.embedding = self.embedding_adapter.embed(node.content)
|
|
173
|
+
self._embeddings_cache[node.id] = node.embedding
|
memograph/core/kernel.py
ADDED
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
import re
|
|
2
|
+
from datetime import datetime, timezone
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
import yaml
|
|
6
|
+
|
|
7
|
+
from .compressor import TokenCompressor
|
|
8
|
+
from .enums import MemoryType
|
|
9
|
+
from .graph import VaultGraph
|
|
10
|
+
from .indexer import VaultIndexer
|
|
11
|
+
from .node import MemoryNode
|
|
12
|
+
from .retriever import HybridRetriever
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class MemoryKernel:
|
|
16
|
+
def __init__(self, vault_path: str, embedding_adapter=None):
|
|
17
|
+
self.vault_path = Path(vault_path).expanduser()
|
|
18
|
+
self.vault_path.mkdir(parents=True, exist_ok=True)
|
|
19
|
+
|
|
20
|
+
self.graph = VaultGraph()
|
|
21
|
+
self.embedding_adapter = embedding_adapter
|
|
22
|
+
self.indexer = VaultIndexer(self.vault_path, embedding_adapter=embedding_adapter)
|
|
23
|
+
self.retriever = HybridRetriever(self.graph, embedding_adapter=embedding_adapter)
|
|
24
|
+
|
|
25
|
+
@staticmethod
|
|
26
|
+
def _normalize_tags(tags: list[str] | None) -> list[str]:
|
|
27
|
+
if not tags:
|
|
28
|
+
return []
|
|
29
|
+
normalized = []
|
|
30
|
+
for tag in tags:
|
|
31
|
+
clean = tag.strip().lstrip("#")
|
|
32
|
+
if clean:
|
|
33
|
+
normalized.append(clean)
|
|
34
|
+
return normalized
|
|
35
|
+
|
|
36
|
+
@staticmethod
|
|
37
|
+
def _slugify(text: str) -> str:
|
|
38
|
+
slug = re.sub(r"[^a-zA-Z0-9]+", "-", text.strip().lower()).strip("-")
|
|
39
|
+
return slug or datetime.now(timezone.utc).strftime("memory-%Y%m%d-%H%M%S")
|
|
40
|
+
|
|
41
|
+
def ingest(self, force: bool = False) -> dict[str, int]:
|
|
42
|
+
"""
|
|
43
|
+
Ingests all memories from the vault.
|
|
44
|
+
"""
|
|
45
|
+
self.graph = VaultGraph()
|
|
46
|
+
indexed, skipped = self.indexer.index(self.graph, force=force)
|
|
47
|
+
self.retriever = HybridRetriever(self.graph, embedding_adapter=self.retriever.embeddings)
|
|
48
|
+
total = len(self.graph._nodes)
|
|
49
|
+
return {"indexed": indexed, "skipped": skipped, "total": total}
|
|
50
|
+
|
|
51
|
+
def remember(
|
|
52
|
+
self,
|
|
53
|
+
title: str,
|
|
54
|
+
content: str,
|
|
55
|
+
memory_type: MemoryType = MemoryType.FACT,
|
|
56
|
+
tags: list[str] = None,
|
|
57
|
+
):
|
|
58
|
+
"""
|
|
59
|
+
Creates a new memory.
|
|
60
|
+
"""
|
|
61
|
+
normalized_tags = self._normalize_tags(tags)
|
|
62
|
+
slug = self._slugify(title)
|
|
63
|
+
file_path = self.vault_path / f"{slug}.md"
|
|
64
|
+
|
|
65
|
+
counter = 2
|
|
66
|
+
while file_path.exists():
|
|
67
|
+
file_path = self.vault_path / f"{slug}-{counter}.md"
|
|
68
|
+
counter += 1
|
|
69
|
+
|
|
70
|
+
created_at = datetime.now(timezone.utc).isoformat()
|
|
71
|
+
tags_line = " ".join(f"#{tag}" for tag in normalized_tags)
|
|
72
|
+
|
|
73
|
+
payload = {
|
|
74
|
+
"title": title,
|
|
75
|
+
"memory_type": memory_type.value,
|
|
76
|
+
"created": created_at,
|
|
77
|
+
"salience": 1.0,
|
|
78
|
+
}
|
|
79
|
+
frontmatter = "---\n" + yaml.safe_dump(payload, sort_keys=False).strip() + "\n---\n\n"
|
|
80
|
+
|
|
81
|
+
body = content.strip()
|
|
82
|
+
if tags_line:
|
|
83
|
+
body = f"{body}\n\n{tags_line}"
|
|
84
|
+
|
|
85
|
+
file_path.write_text(frontmatter + body + "\n", encoding="utf-8")
|
|
86
|
+
return str(file_path)
|
|
87
|
+
|
|
88
|
+
def context_window(
|
|
89
|
+
self,
|
|
90
|
+
query: str,
|
|
91
|
+
tags: list[str] = None,
|
|
92
|
+
depth: int = 2,
|
|
93
|
+
top_k: int = 8,
|
|
94
|
+
token_limit: int = 2048,
|
|
95
|
+
) -> str:
|
|
96
|
+
"""
|
|
97
|
+
Retrieves relevant context from the memory vault.
|
|
98
|
+
"""
|
|
99
|
+
nodes = self.retrieve_nodes(query=query, tags=tags, depth=depth, top_k=top_k)
|
|
100
|
+
compressor = TokenCompressor(token_limit=token_limit)
|
|
101
|
+
return compressor.compress(nodes)
|
|
102
|
+
|
|
103
|
+
def retrieve_nodes(
|
|
104
|
+
self,
|
|
105
|
+
query: str,
|
|
106
|
+
tags: list[str] = None,
|
|
107
|
+
depth: int = 2,
|
|
108
|
+
top_k: int = 8,
|
|
109
|
+
) -> list[MemoryNode]:
|
|
110
|
+
normalized_tags = self._normalize_tags(tags)
|
|
111
|
+
|
|
112
|
+
if not self.graph._nodes:
|
|
113
|
+
self.ingest()
|
|
114
|
+
|
|
115
|
+
query_words = [w.lower() for w in re.findall(r"\w+", query) if len(w) > 2]
|
|
116
|
+
seed_ids = []
|
|
117
|
+
for node in self.graph._nodes.values():
|
|
118
|
+
haystack = f"{node.title} {node.content}".lower()
|
|
119
|
+
if any(word in haystack for word in query_words):
|
|
120
|
+
seed_ids.append(node.id)
|
|
121
|
+
|
|
122
|
+
return self.retriever.retrieve(
|
|
123
|
+
query=query,
|
|
124
|
+
seed_ids=seed_ids,
|
|
125
|
+
tags=normalized_tags,
|
|
126
|
+
depth=depth,
|
|
127
|
+
top_k=top_k,
|
|
128
|
+
)
|
memograph/core/node.py
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
# core/node.py
|
|
2
|
+
from dataclasses import dataclass, field
|
|
3
|
+
from datetime import datetime, timezone
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
6
|
+
from .enums import MemoryType
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@dataclass
|
|
10
|
+
class MemoryNode:
|
|
11
|
+
id: str # Derived from file path (slug)
|
|
12
|
+
title: str
|
|
13
|
+
content: str
|
|
14
|
+
memory_type: MemoryType = MemoryType.SEMANTIC
|
|
15
|
+
|
|
16
|
+
# Graph relationships
|
|
17
|
+
links: list[str] = field(default_factory=list) # Outgoing wikilinks
|
|
18
|
+
backlinks: list[str] = field(default_factory=list) # Populated by graph
|
|
19
|
+
tags: list[str] = field(default_factory=list)
|
|
20
|
+
|
|
21
|
+
# Metadata / reinforcement signals
|
|
22
|
+
salience: float = 1.0 # 0.0–1.0, boosted on access/linking
|
|
23
|
+
access_count: int = 0
|
|
24
|
+
last_accessed: datetime = field(default_factory=lambda: datetime.now(timezone.utc))
|
|
25
|
+
created_at: datetime = field(default_factory=lambda: datetime.now(timezone.utc))
|
|
26
|
+
modified_at: datetime = field(default_factory=lambda: datetime.now(timezone.utc))
|
|
27
|
+
|
|
28
|
+
# Source file path
|
|
29
|
+
source_path: str | None = None
|
|
30
|
+
|
|
31
|
+
# Arbitrary YAML frontmatter passthrough
|
|
32
|
+
frontmatter: dict[str, Any] = field(default_factory=dict)
|
|
33
|
+
|
|
34
|
+
# Optional: set by embedding adapter
|
|
35
|
+
embedding: list[float] | None = None
|
memograph/core/parser.py
ADDED
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
# core/parser.py
|
|
2
|
+
import contextlib
|
|
3
|
+
import re
|
|
4
|
+
from datetime import datetime, timezone
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
import yaml
|
|
8
|
+
|
|
9
|
+
from .enums import MemoryType
|
|
10
|
+
from .node import MemoryNode
|
|
11
|
+
|
|
12
|
+
WIKILINK_RE = re.compile(r"\[\[([^\]|#]+)(?:[|#][^\]]*)?\]\]")
|
|
13
|
+
TAG_RE = re.compile(r"(?:^|\s)#([\w/-]+)")
|
|
14
|
+
FRONTMATTER_RE = re.compile(r"^---\r?\n(.*?)\r?\n---\r?\n", re.DOTALL)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def parse_file(path: Path, vault_root: Path) -> MemoryNode:
|
|
18
|
+
raw = path.read_text(encoding="utf-8").lstrip("\ufeff")
|
|
19
|
+
frontmatter = {}
|
|
20
|
+
content = raw
|
|
21
|
+
|
|
22
|
+
if m := FRONTMATTER_RE.match(raw):
|
|
23
|
+
with contextlib.suppress(yaml.YAMLError):
|
|
24
|
+
frontmatter = yaml.safe_load(m.group(1)) or {}
|
|
25
|
+
content = raw[m.end() :]
|
|
26
|
+
|
|
27
|
+
links = [link.lower().replace(" ", "-") for link in WIKILINK_RE.findall(content)]
|
|
28
|
+
tags = TAG_RE.findall(content)
|
|
29
|
+
|
|
30
|
+
node_id = path.relative_to(vault_root).with_suffix("").as_posix()
|
|
31
|
+
node_id = node_id.lower().replace(" ", "-")
|
|
32
|
+
|
|
33
|
+
stat = path.stat()
|
|
34
|
+
|
|
35
|
+
try:
|
|
36
|
+
mem_type = MemoryType(frontmatter.get("memory_type", MemoryType.SEMANTIC.value))
|
|
37
|
+
except (TypeError, ValueError):
|
|
38
|
+
mem_type = MemoryType.SEMANTIC
|
|
39
|
+
|
|
40
|
+
created = frontmatter.get("created")
|
|
41
|
+
try:
|
|
42
|
+
created_at = datetime.fromisoformat(created) if created else datetime.now(timezone.utc)
|
|
43
|
+
except (TypeError, ValueError):
|
|
44
|
+
created_at = datetime.now(timezone.utc)
|
|
45
|
+
|
|
46
|
+
return MemoryNode(
|
|
47
|
+
id=node_id,
|
|
48
|
+
title=frontmatter.get("title", path.stem),
|
|
49
|
+
content=content.strip(),
|
|
50
|
+
memory_type=mem_type,
|
|
51
|
+
links=links,
|
|
52
|
+
tags=tags,
|
|
53
|
+
salience=float(frontmatter.get("salience", 1.0)),
|
|
54
|
+
created_at=created_at,
|
|
55
|
+
modified_at=datetime.fromtimestamp(stat.st_mtime, tz=timezone.utc),
|
|
56
|
+
source_path=str(path),
|
|
57
|
+
frontmatter=frontmatter,
|
|
58
|
+
)
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
# core/retriever.py
|
|
2
|
+
from .enums import MemoryType
|
|
3
|
+
from .graph import VaultGraph
|
|
4
|
+
from .node import MemoryNode
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class HybridRetriever:
|
|
8
|
+
def __init__(self, graph: VaultGraph, embedding_adapter=None):
|
|
9
|
+
self.graph = graph
|
|
10
|
+
self.embeddings = embedding_adapter # Optional
|
|
11
|
+
|
|
12
|
+
def retrieve(
|
|
13
|
+
self,
|
|
14
|
+
query: str,
|
|
15
|
+
seed_ids: list[str] = None,
|
|
16
|
+
tags: list[str] = None,
|
|
17
|
+
memory_type: MemoryType = None,
|
|
18
|
+
depth: int = 2,
|
|
19
|
+
top_k: int = 10,
|
|
20
|
+
min_salience: float = 0.0,
|
|
21
|
+
) -> list[MemoryNode]:
|
|
22
|
+
|
|
23
|
+
candidates: dict[str, MemoryNode] = {}
|
|
24
|
+
|
|
25
|
+
# 1. Graph traversal from seeds
|
|
26
|
+
for seed_id in seed_ids or []:
|
|
27
|
+
seed = self.graph.get(seed_id)
|
|
28
|
+
if seed:
|
|
29
|
+
candidates[seed.id] = seed
|
|
30
|
+
neighbors = self.graph.neighbors(seed_id, depth=depth)
|
|
31
|
+
for n in neighbors:
|
|
32
|
+
candidates[n.id] = n
|
|
33
|
+
|
|
34
|
+
# 2. Metadata filter
|
|
35
|
+
# Only fetch from full graph if filters are applied or we have no seeds
|
|
36
|
+
filters_active = (tags is not None) or (memory_type is not None) or (min_salience > 0.0)
|
|
37
|
+
|
|
38
|
+
if filters_active or not candidates:
|
|
39
|
+
filtered = self.graph.filter(
|
|
40
|
+
tags=tags, memory_type=memory_type, min_salience=min_salience
|
|
41
|
+
)
|
|
42
|
+
for n in filtered:
|
|
43
|
+
candidates[n.id] = n
|
|
44
|
+
|
|
45
|
+
# 3. Optional: re-rank with vector similarity
|
|
46
|
+
if self.embeddings and query:
|
|
47
|
+
candidates = self._rerank(query, list(candidates.values()))
|
|
48
|
+
else:
|
|
49
|
+
candidates = sorted(
|
|
50
|
+
candidates.values(), key=lambda n: (n.salience, n.access_count), reverse=True
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
return candidates[:top_k]
|
|
54
|
+
|
|
55
|
+
def _rerank(self, query: str, nodes: list[MemoryNode]) -> list[MemoryNode]:
|
|
56
|
+
q_emb = self.embeddings.embed(query)
|
|
57
|
+
scored = []
|
|
58
|
+
for node in nodes:
|
|
59
|
+
if node.embedding is None:
|
|
60
|
+
node.embedding = self.embeddings.embed(node.content)
|
|
61
|
+
sim = self._cosine_similarity(q_emb, node.embedding)
|
|
62
|
+
scored.append((sim, node))
|
|
63
|
+
return [n for _, n in sorted(scored, key=lambda x: x[0], reverse=True)]
|
|
64
|
+
|
|
65
|
+
@staticmethod
|
|
66
|
+
def _cosine_similarity(left: list[float], right: list[float]) -> float:
|
|
67
|
+
"""Calculate cosine similarity between two vectors (normalized dot product)."""
|
|
68
|
+
if not left or not right:
|
|
69
|
+
return 0.0
|
|
70
|
+
|
|
71
|
+
size = min(len(left), len(right))
|
|
72
|
+
dot_product = sum(left[i] * right[i] for i in range(size))
|
|
73
|
+
|
|
74
|
+
# Calculate magnitudes
|
|
75
|
+
mag_left = sum(x * x for x in left[:size]) ** 0.5
|
|
76
|
+
mag_right = sum(x * x for x in right[:size]) ** 0.5
|
|
77
|
+
|
|
78
|
+
if mag_left == 0 or mag_right == 0:
|
|
79
|
+
return 0.0
|
|
80
|
+
|
|
81
|
+
return dot_product / (mag_left * mag_right)
|
memograph/py.typed
ADDED
|
File without changes
|
|
@@ -0,0 +1,330 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: memograph
|
|
3
|
+
Version: 0.0.2
|
|
4
|
+
Summary: A graph-based memory system for LLMs with intelligent retrieval using knowledge graphs, hybrid search, and semantic embeddings
|
|
5
|
+
Author-email: MemoGraph Contributors <author@example.com>
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/Indhar01/MemoGraph
|
|
8
|
+
Project-URL: Documentation, https://github.com/Indhar01/MemoGraph#readme
|
|
9
|
+
Project-URL: Repository, https://github.com/Indhar01/MemoGraph
|
|
10
|
+
Project-URL: Issues, https://github.com/Indhar01/MemoGraph/issues
|
|
11
|
+
Project-URL: Changelog, https://github.com/Indhar01/MemoGraph/blob/main/CHANGELOG.md
|
|
12
|
+
Keywords: llm,memory,knowledge-graph,rag,retrieval,ai,machine-learning,graph-database,semantic-search,vector-embeddings,chatgpt,claude,ollama,langchain,llamaindex,markdown,note-taking,personal-knowledge-management,pkm,second-brain
|
|
13
|
+
Classifier: Development Status :: 3 - Alpha
|
|
14
|
+
Classifier: Intended Audience :: Developers
|
|
15
|
+
Classifier: Intended Audience :: Science/Research
|
|
16
|
+
Classifier: Intended Audience :: Information Technology
|
|
17
|
+
Classifier: Programming Language :: Python :: 3
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
21
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
22
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
23
|
+
Classifier: Operating System :: OS Independent
|
|
24
|
+
Classifier: Operating System :: POSIX :: Linux
|
|
25
|
+
Classifier: Operating System :: Microsoft :: Windows
|
|
26
|
+
Classifier: Operating System :: MacOS
|
|
27
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
28
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
29
|
+
Classifier: Topic :: Text Processing :: Markup :: Markdown
|
|
30
|
+
Classifier: Topic :: Database
|
|
31
|
+
Classifier: Topic :: Office/Business
|
|
32
|
+
Classifier: Framework :: AsyncIO
|
|
33
|
+
Classifier: Typing :: Typed
|
|
34
|
+
Requires-Python: >=3.10
|
|
35
|
+
Description-Content-Type: text/markdown
|
|
36
|
+
License-File: LICENSE
|
|
37
|
+
Requires-Dist: PyYAML>=6.0
|
|
38
|
+
Requires-Dist: requests>=2.31.0
|
|
39
|
+
Provides-Extra: openai
|
|
40
|
+
Requires-Dist: openai>=1.0.0; extra == "openai"
|
|
41
|
+
Requires-Dist: tiktoken>=0.5.0; extra == "openai"
|
|
42
|
+
Provides-Extra: anthropic
|
|
43
|
+
Requires-Dist: anthropic>=0.39.0; extra == "anthropic"
|
|
44
|
+
Provides-Extra: ollama
|
|
45
|
+
Requires-Dist: ollama>=0.3.0; extra == "ollama"
|
|
46
|
+
Provides-Extra: embeddings
|
|
47
|
+
Requires-Dist: sentence-transformers>=2.2.0; extra == "embeddings"
|
|
48
|
+
Requires-Dist: numpy>=1.24.0; extra == "embeddings"
|
|
49
|
+
Provides-Extra: all
|
|
50
|
+
Requires-Dist: memograph[anthropic,embeddings,ollama,openai]; extra == "all"
|
|
51
|
+
Provides-Extra: dev
|
|
52
|
+
Requires-Dist: pytest>=7.4.0; extra == "dev"
|
|
53
|
+
Requires-Dist: pytest-cov>=4.1.0; extra == "dev"
|
|
54
|
+
Requires-Dist: pytest-asyncio>=0.21.0; extra == "dev"
|
|
55
|
+
Requires-Dist: ruff>=0.7.0; extra == "dev"
|
|
56
|
+
Requires-Dist: mypy>=1.7.0; extra == "dev"
|
|
57
|
+
Requires-Dist: pre-commit>=3.5.0; extra == "dev"
|
|
58
|
+
Requires-Dist: black>=23.11.0; extra == "dev"
|
|
59
|
+
Requires-Dist: isort>=5.12.0; extra == "dev"
|
|
60
|
+
Requires-Dist: types-PyYAML>=6.0; extra == "dev"
|
|
61
|
+
Requires-Dist: types-requests>=2.31.0; extra == "dev"
|
|
62
|
+
Dynamic: license-file
|
|
63
|
+
|
|
64
|
+
# MemoGraph 🧠
|
|
65
|
+
|
|
66
|
+
[](https://pypi.org/project/memograph/)
|
|
67
|
+
[](https://pypi.org/project/memograph/)
|
|
68
|
+
[](https://github.com/Indhar01/MemoGraph/blob/main/LICENSE)
|
|
69
|
+
[](https://github.com/astral-sh/ruff)
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
A graph-based memory system for LLMs with intelligent retrieval. MemoGraph provides a powerful solution to the LLM memory problem by combining knowledge graphs, hybrid retrieval, and semantic search.
|
|
73
|
+
|
|
74
|
+
## ✨ Features
|
|
75
|
+
|
|
76
|
+
- **Graph-Based Memory**: Navigate knowledge using bidirectional wikilinks and backlinks
|
|
77
|
+
- **Hybrid Retrieval**: Combines keyword matching, graph traversal, and optional vector embeddings
|
|
78
|
+
- **Markdown-Native**: Human-readable markdown files with YAML frontmatter
|
|
79
|
+
- **Memory Types**: Support for episodic, semantic, procedural, and fact-based memories
|
|
80
|
+
- **Smart Indexing**: Efficient caching system that only re-indexes changed files
|
|
81
|
+
- **CLI & Python API**: Use via command line or integrate into your Python applications
|
|
82
|
+
- **Multiple LLM Providers**: Works with Ollama, Claude, and OpenAI
|
|
83
|
+
- **Context Compression**: Intelligent token budgeting for optimal context windows
|
|
84
|
+
- **Salience Scoring**: Memory importance ranking for better retrieval
|
|
85
|
+
|
|
86
|
+
## 🚀 Quick Start
|
|
87
|
+
|
|
88
|
+
### Installation
|
|
89
|
+
|
|
90
|
+
```bash
|
|
91
|
+
pip install memograph
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
Install with optional dependencies:
|
|
95
|
+
|
|
96
|
+
```bash
|
|
97
|
+
# For OpenAI support
|
|
98
|
+
pip install memograph[openai]
|
|
99
|
+
|
|
100
|
+
# For Anthropic Claude support
|
|
101
|
+
pip install memograph[anthropic]
|
|
102
|
+
|
|
103
|
+
# For Ollama support
|
|
104
|
+
pip install memograph[ollama]
|
|
105
|
+
|
|
106
|
+
# For embedding support
|
|
107
|
+
pip install memograph[embeddings]
|
|
108
|
+
|
|
109
|
+
# Install everything
|
|
110
|
+
pip install memograph[all]
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
### Python Usage
|
|
114
|
+
|
|
115
|
+
```python
|
|
116
|
+
from memograph import MemoryKernel, MemoryType
|
|
117
|
+
|
|
118
|
+
# Initialize the kernel attached to your vault path
|
|
119
|
+
kernel = MemoryKernel("~/my-vault")
|
|
120
|
+
|
|
121
|
+
# Ingest all notes in the vault
|
|
122
|
+
stats = kernel.ingest()
|
|
123
|
+
print(f"Indexed {stats['indexed']} memories.")
|
|
124
|
+
|
|
125
|
+
# Programmatically add a new memory
|
|
126
|
+
kernel.remember(
|
|
127
|
+
title="Meeting Note",
|
|
128
|
+
content="Decided to use BFS graph traversal for retrieval.",
|
|
129
|
+
memory_type=MemoryType.EPISODIC,
|
|
130
|
+
tags=["design", "retrieval"]
|
|
131
|
+
)
|
|
132
|
+
|
|
133
|
+
# Retrieve context for an LLM query
|
|
134
|
+
context = kernel.context_window(
|
|
135
|
+
query="how does retrieval work?",
|
|
136
|
+
tags=["retrieval"],
|
|
137
|
+
depth=2,
|
|
138
|
+
top_k=8
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
print(context)
|
|
142
|
+
```
|
|
143
|
+
|
|
144
|
+
## 🎯 CLI Usage
|
|
145
|
+
|
|
146
|
+
MemoGraph comes with a powerful CLI for managing your vault and chatting with it.
|
|
147
|
+
|
|
148
|
+
### Ingest
|
|
149
|
+
|
|
150
|
+
Index your markdown files into the graph database:
|
|
151
|
+
|
|
152
|
+
```bash
|
|
153
|
+
memograph --vault ~/my-vault ingest
|
|
154
|
+
```
|
|
155
|
+
|
|
156
|
+
Force re-indexing all files:
|
|
157
|
+
|
|
158
|
+
```bash
|
|
159
|
+
memograph --vault ~/my-vault ingest --force
|
|
160
|
+
```
|
|
161
|
+
|
|
162
|
+
### Remember
|
|
163
|
+
|
|
164
|
+
Quickly add a memory from the command line:
|
|
165
|
+
|
|
166
|
+
```bash
|
|
167
|
+
memograph --vault ~/my-vault remember \
|
|
168
|
+
--title "Team Sync" \
|
|
169
|
+
--content "Discussed Q3 goals." \
|
|
170
|
+
--tags planning q3
|
|
171
|
+
```
|
|
172
|
+
|
|
173
|
+
### Context Window
|
|
174
|
+
|
|
175
|
+
Generate context for a query:
|
|
176
|
+
|
|
177
|
+
```bash
|
|
178
|
+
memograph --vault ~/my-vault context \
|
|
179
|
+
--query "What did we decide about the database?" \
|
|
180
|
+
--tags architecture \
|
|
181
|
+
--depth 2 \
|
|
182
|
+
--top-k 5
|
|
183
|
+
```
|
|
184
|
+
|
|
185
|
+
### Ask (Interactive Chat)
|
|
186
|
+
|
|
187
|
+
Start an interactive chat session with your vault context:
|
|
188
|
+
|
|
189
|
+
```bash
|
|
190
|
+
memograph --vault ~/my-vault ask --chat --provider ollama --model llama3
|
|
191
|
+
```
|
|
192
|
+
|
|
193
|
+
Or ask a single question:
|
|
194
|
+
|
|
195
|
+
```bash
|
|
196
|
+
memograph --vault ~/my-vault ask \
|
|
197
|
+
--query "Summarize our design decisions" \
|
|
198
|
+
--provider claude \
|
|
199
|
+
--model claude-3-5-sonnet-20240620
|
|
200
|
+
```
|
|
201
|
+
|
|
202
|
+
### Diagnostics
|
|
203
|
+
|
|
204
|
+
Check your environment and connection to LLM providers:
|
|
205
|
+
|
|
206
|
+
```bash
|
|
207
|
+
memograph --vault ~/my-vault doctor
|
|
208
|
+
```
|
|
209
|
+
|
|
210
|
+
## 📖 Core Concepts
|
|
211
|
+
|
|
212
|
+
### Memory Types
|
|
213
|
+
|
|
214
|
+
MemoGraph supports different types of memories inspired by cognitive science:
|
|
215
|
+
|
|
216
|
+
- **Episodic**: Personal experiences and events (e.g., meeting notes)
|
|
217
|
+
- **Semantic**: Facts and general knowledge (e.g., documentation)
|
|
218
|
+
- **Procedural**: How-to knowledge and processes (e.g., tutorials)
|
|
219
|
+
- **Fact**: Discrete factual information (e.g., configuration values)
|
|
220
|
+
|
|
221
|
+
### Graph Traversal
|
|
222
|
+
|
|
223
|
+
The library uses BFS (Breadth-First Search) to traverse your knowledge graph:
|
|
224
|
+
|
|
225
|
+
```python
|
|
226
|
+
# Retrieve nodes with depth=2 (2 hops from seed nodes)
|
|
227
|
+
nodes = kernel.retrieve_nodes(
|
|
228
|
+
query="graph algorithms",
|
|
229
|
+
depth=2, # Traverse up to 2 levels deep
|
|
230
|
+
top_k=10 # Return top 10 relevant memories
|
|
231
|
+
)
|
|
232
|
+
```
|
|
233
|
+
|
|
234
|
+
### Salience Scoring
|
|
235
|
+
|
|
236
|
+
Each memory has a salience score (0.0-1.0) that represents its importance:
|
|
237
|
+
|
|
238
|
+
```yaml
|
|
239
|
+
---
|
|
240
|
+
title: "Critical Architecture Decision"
|
|
241
|
+
salience: 0.9
|
|
242
|
+
memory_type: semantic
|
|
243
|
+
---
|
|
244
|
+
|
|
245
|
+
We decided to use PostgreSQL for better ACID guarantees...
|
|
246
|
+
```
|
|
247
|
+
|
|
248
|
+
## 🏗️ Project Structure
|
|
249
|
+
|
|
250
|
+
```
|
|
251
|
+
MemoGraph/
|
|
252
|
+
├── memograph/ # Main package
|
|
253
|
+
│ ├── core/ # Core functionality
|
|
254
|
+
│ │ ├── kernel.py # Memory kernel
|
|
255
|
+
│ │ ├── graph.py # Graph implementation
|
|
256
|
+
│ │ ├── retriever.py # Hybrid retrieval
|
|
257
|
+
│ │ ├── indexer.py # File indexing
|
|
258
|
+
│ │ └── parser.py # Markdown parsing
|
|
259
|
+
│ ├── adapters/ # LLM and embedding adapters
|
|
260
|
+
│ │ ├── embeddings/ # Embedding providers
|
|
261
|
+
│ │ ├── frameworks/ # Framework integrations
|
|
262
|
+
│ │ └── llm/ # LLM providers
|
|
263
|
+
│ ├── storage/ # Storage and caching
|
|
264
|
+
│ └── cli.py # CLI implementation
|
|
265
|
+
├── tests/ # Test suite
|
|
266
|
+
└── examples/ # Example usage
|
|
267
|
+
```
|
|
268
|
+
|
|
269
|
+
## 🤝 Contributing
|
|
270
|
+
|
|
271
|
+
We welcome contributions! Please see our [Contributing Guide](CONTRIBUTING.md) for details.
|
|
272
|
+
|
|
273
|
+
### Development Setup
|
|
274
|
+
|
|
275
|
+
1. Clone the repository:
|
|
276
|
+
```bash
|
|
277
|
+
git clone https://github.com/Indhar01/MemoGraph.git
|
|
278
|
+
cd MemoGraph
|
|
279
|
+
```
|
|
280
|
+
|
|
281
|
+
2. Install in development mode:
|
|
282
|
+
```bash
|
|
283
|
+
pip install -e ".[all,dev]"
|
|
284
|
+
```
|
|
285
|
+
|
|
286
|
+
3. Install pre-commit hooks:
|
|
287
|
+
```bash
|
|
288
|
+
pre-commit install
|
|
289
|
+
```
|
|
290
|
+
|
|
291
|
+
4. Run tests:
|
|
292
|
+
```bash
|
|
293
|
+
pytest
|
|
294
|
+
```
|
|
295
|
+
|
|
296
|
+
## 📚 Documentation
|
|
297
|
+
|
|
298
|
+
- **[Contributing Guide](CONTRIBUTING.md)** - How to contribute to the project
|
|
299
|
+
- **[Code of Conduct](CODE_OF_CONDUCT.md)** - Community guidelines
|
|
300
|
+
- **[Security Policy](SECURITY.md)** - Security reporting and best practices
|
|
301
|
+
- **[Changelog](CHANGELOG.md)** - Version history and changes
|
|
302
|
+
|
|
303
|
+
## 🔒 Security
|
|
304
|
+
|
|
305
|
+
See our [Security Policy](SECURITY.md) for reporting vulnerabilities.
|
|
306
|
+
|
|
307
|
+
## 📄 License
|
|
308
|
+
|
|
309
|
+
This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
|
|
310
|
+
|
|
311
|
+
## 🌟 Acknowledgments
|
|
312
|
+
|
|
313
|
+
Inspired by the need for better memory management in LLM applications. Built with:
|
|
314
|
+
|
|
315
|
+
- Graph-based knowledge representation
|
|
316
|
+
- Hybrid retrieval strategies
|
|
317
|
+
- Cognitive science principles
|
|
318
|
+
|
|
319
|
+
## 📬 Contact & Support
|
|
320
|
+
|
|
321
|
+
- **Issues**: [GitHub Issues](https://github.com/Indhar01/MemoGraph/issues)
|
|
322
|
+
- **Discussions**: [GitHub Discussions](https://github.com/Indhar01/MemoGraph/discussions)
|
|
323
|
+
|
|
324
|
+
## 🚦 Status
|
|
325
|
+
|
|
326
|
+
This project is in active development. While the core functionality is stable, the API may change in minor versions until we reach v1.0.0.
|
|
327
|
+
|
|
328
|
+
---
|
|
329
|
+
|
|
330
|
+
Made with ❤️ for better LLM memory management
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
memograph/__init__.py,sha256=sxGD_kLAm9u30e4JK1nSnPtTl3U7odvMWyv9ToYS5Uk,137
|
|
2
|
+
memograph/cli.py,sha256=W3y5cdkVVSB1tkVqcWiAKjhLflCwDVK0J-64963Cuas,7210
|
|
3
|
+
memograph/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
4
|
+
memograph/core/__init__.py,sha256=4pp2UsBR8ZNO6pOWcGQFtc-oESYULW_qAV3NfXksl-c,148
|
|
5
|
+
memograph/core/assistant.py,sha256=yU4xN4AjQPs6XJbH6ANPtsOkktDOc0b54D6mui-O05U,3671
|
|
6
|
+
memograph/core/compressor.py,sha256=4NwdUgrj568rhd_SH1cVbGtv9WPHGh-yp2wj_LJdZsM,869
|
|
7
|
+
memograph/core/enums.py,sha256=npxTlajgEyqH__SBprD7kskKRMd5XfBqsm4mfqPMezY,148
|
|
8
|
+
memograph/core/graph.py,sha256=HFJhBgC3VD4tWvOPZYx9DEdVvlAhTBZkSAHNEX_EqcM,2782
|
|
9
|
+
memograph/core/indexer.py,sha256=PkQR7WLb88ZsWAjMcAFdfFnBmCD1YtmV9adYXbgg2pw,6922
|
|
10
|
+
memograph/core/kernel.py,sha256=3HR097ShMyViq-tuU22XicaTLcTYXWhNy4SF2b18FZI,4073
|
|
11
|
+
memograph/core/node.py,sha256=uBnaqdtPsC-4zSH3cnJrQcq5gw5pLduoBFNEeE14ObA,1176
|
|
12
|
+
memograph/core/parser.py,sha256=GShK6G-iSs7BxT71GQizOimjCTHVws3Qpi3lRstrDe4,1807
|
|
13
|
+
memograph/core/retriever.py,sha256=oFU2vAkS-lYTsROLzuvgvuH9K89cgRfLtCOaezjFl1g,2823
|
|
14
|
+
memograph-0.0.2.dist-info/licenses/LICENSE,sha256=QJn2W-w0yb-h3SNSWppUpFO4dS7vybnvYVySDbsydas,1096
|
|
15
|
+
memograph-0.0.2.dist-info/METADATA,sha256=wt1xXOFPXWxMU7TMi6VYOMmnnP9_KXs36DPZ_q80PRg,10165
|
|
16
|
+
memograph-0.0.2.dist-info/WHEEL,sha256=YCfwYGOYMi5Jhw2fU4yNgwErybb2IX5PEwBKV4ZbdBo,91
|
|
17
|
+
memograph-0.0.2.dist-info/entry_points.txt,sha256=nCq_dnXTqGXjwi3AKq8NUC_XGWUUYSrgyIsrN1qvEXw,49
|
|
18
|
+
memograph-0.0.2.dist-info/top_level.txt,sha256=rh1wBTM4dvMzgsJONWEliTDNi0AnMkeXNaiA4HS2UX4,10
|
|
19
|
+
memograph-0.0.2.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2024 Mnemo Contributors
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
memograph
|