memnos 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- core/__init__.py +12 -0
- core/consolidate.py +150 -0
- core/control.py +517 -0
- core/embed.py +42 -0
- core/encode.py +123 -0
- core/local_models.py +37 -0
- core/redact.py +87 -0
- core/rerank.py +27 -0
- core/retrieve.py +92 -0
- core/schema.sql +124 -0
- core/service.py +425 -0
- core/store.py +757 -0
- core/temporal.py +125 -0
- core/usage.py +62 -0
- core/vault.py +116 -0
- memnos-0.1.0.data/data/share/memnos/ui/app.css +30 -0
- memnos-0.1.0.data/data/share/memnos/ui/app.js +181 -0
- memnos-0.1.0.data/data/share/memnos/ui/index.html +120 -0
- memnos-0.1.0.dist-info/METADATA +211 -0
- memnos-0.1.0.dist-info/RECORD +29 -0
- memnos-0.1.0.dist-info/WHEEL +5 -0
- memnos-0.1.0.dist-info/entry_points.txt +2 -0
- memnos-0.1.0.dist-info/licenses/LICENSE +201 -0
- memnos-0.1.0.dist-info/top_level.txt +6 -0
- memnos_admin.py +157 -0
- memnos_cli.py +591 -0
- memnos_mcp.py +405 -0
- memnos_server.py +708 -0
- nsresolve.py +57 -0
core/__init__.py
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
"""memnos brain-inspired memory engine (B1+).
|
|
2
|
+
|
|
3
|
+
Layers: raw_turns (sensory) → episodic (hippocampus) → semantic (neocortex).
|
|
4
|
+
B1 = schema + write-time encoding (event segmentation, salience, entity graph).
|
|
5
|
+
"""
|
|
6
|
+
from .store import BrainStore
|
|
7
|
+
from .encode import Encoder, extract_entities, salience
|
|
8
|
+
from .consolidate import Consolidator
|
|
9
|
+
from .retrieve import Retriever, context_block
|
|
10
|
+
|
|
11
|
+
__all__ = ["BrainStore", "Encoder", "extract_entities", "salience", "Consolidator",
|
|
12
|
+
"Retriever", "context_block"]
|
core/consolidate.py
ADDED
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
"""B2 — CONSOLIDATION ("sleep") pass. The core accuracy fix.
|
|
2
|
+
|
|
3
|
+
Offline, the brain replays episodes and writes durable SEMANTIC memory. We do the
|
|
4
|
+
same: episodic events → (1) decontextualized propositions, (2) per-entity dossiers
|
|
5
|
+
that PRE-JOIN multi-hop facts ("A works at B" + "B in C" => "A works in C"). Every
|
|
6
|
+
semantic fact keeps provenance back to its episodic evidence (auditable — consolidation
|
|
7
|
+
hallucinates) and a valid_from for bi-temporal recall. New facts SUPERSEDE
|
|
8
|
+
contradicted ones (set valid_to, never delete).
|
|
9
|
+
|
|
10
|
+
LLM is used HERE (offline) — never at query time. Calls run concurrently; pass a
|
|
11
|
+
metered client to enforce a budget.
|
|
12
|
+
"""
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
import json
|
|
16
|
+
import threading
|
|
17
|
+
from concurrent.futures import ThreadPoolExecutor
|
|
18
|
+
|
|
19
|
+
from .store import BrainStore
|
|
20
|
+
|
|
21
|
+
PROP_SYS = (
|
|
22
|
+
"Convert this dated conversation EVENT into atomic, self-contained FACTS. "
|
|
23
|
+
"Resolve pronouns and references to explicit named entities. Attach the date when "
|
|
24
|
+
"relevant. Each fact must be understandable with NO access to the conversation. "
|
|
25
|
+
'Return JSON {"facts": ["...", ...]} — short declarative sentences, no commentary.')
|
|
26
|
+
|
|
27
|
+
DOSSIER_SYS = (
|
|
28
|
+
"You consolidate everything known about ONE subject into durable, CURRENT facts. "
|
|
29
|
+
"Critically, DERIVE facts that require COMBINING multiple inputs "
|
|
30
|
+
"(e.g. 'Alice works at Boeing' + 'Boeing is in Seattle' => 'Alice works in Seattle'). "
|
|
31
|
+
"When inputs conflict, keep the MOST RECENT (dates are given) and drop the stale one. "
|
|
32
|
+
"Preserve dates. Return JSON {\"facts\": [\"...\", ...]} of standalone sentences.")
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def _facts(cli, model, sys_prompt, content, meter):
|
|
36
|
+
r = cli.chat.completions.create(
|
|
37
|
+
model=model, temperature=0, max_tokens=700,
|
|
38
|
+
response_format={"type": "json_object"},
|
|
39
|
+
messages=[{"role": "system", "content": sys_prompt},
|
|
40
|
+
{"role": "user", "content": content}])
|
|
41
|
+
if meter is not None:
|
|
42
|
+
meter.record("consolidate", model, r.usage.prompt_tokens, r.usage.completion_tokens)
|
|
43
|
+
try:
|
|
44
|
+
return [str(x).strip() for x in json.loads(r.choices[0].message.content).get("facts", [])
|
|
45
|
+
if str(x).strip()]
|
|
46
|
+
except (json.JSONDecodeError, ValueError, AttributeError):
|
|
47
|
+
return []
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
class Consolidator:
|
|
51
|
+
def __init__(self, store: BrainStore, schema: str, ns: str, llm, model: str,
|
|
52
|
+
embed_fn, meter=None, workers: int = 8,
|
|
53
|
+
max_entities: int = 30, min_episodes: int = 3, max_facts_per_dossier: int = 8):
|
|
54
|
+
self.store, self.schema, self.ns = store, schema, ns
|
|
55
|
+
self.llm, self.model, self.embed = llm, model, embed_fn
|
|
56
|
+
self.meter, self.workers = meter, workers
|
|
57
|
+
self.max_entities, self.min_episodes = max_entities, min_episodes
|
|
58
|
+
self.max_facts_per_dossier = max_facts_per_dossier
|
|
59
|
+
self._wlock = threading.Lock() # serialize DB writes (single conn)
|
|
60
|
+
self._seen = set() # statement-level dedup within a run
|
|
61
|
+
|
|
62
|
+
# --- pass 1: episode -> propositions ---------------------------------
|
|
63
|
+
def _propositions(self, episodes):
|
|
64
|
+
def one(ep):
|
|
65
|
+
content = (f"[date: {ep['t_start']}]\n{ep['text']}")
|
|
66
|
+
facts = _facts(self.llm, self.model, PROP_SYS, content, self.meter)
|
|
67
|
+
return ep, facts
|
|
68
|
+
out = []
|
|
69
|
+
with ThreadPoolExecutor(max_workers=self.workers) as ex:
|
|
70
|
+
for ep, facts in ex.map(one, episodes):
|
|
71
|
+
out.append((ep, facts))
|
|
72
|
+
return out # [(episode, [fact,...])]
|
|
73
|
+
|
|
74
|
+
# --- pass 2: entity -> dossier (multi-hop pre-join) ------------------
|
|
75
|
+
def _dossiers(self, ent_clusters, prop_by_ep):
|
|
76
|
+
def one(item):
|
|
77
|
+
name, ep_ids = item["name"], item["ep_ids"]
|
|
78
|
+
facts_in = []
|
|
79
|
+
for eid in ep_ids:
|
|
80
|
+
facts_in += prop_by_ep.get(eid, [])
|
|
81
|
+
if len(facts_in) < 3:
|
|
82
|
+
return name, ep_ids, []
|
|
83
|
+
content = f"Subject: {name}\nKnown facts (dated):\n- " + "\n- ".join(facts_in[:50])
|
|
84
|
+
return name, ep_ids, _facts(self.llm, self.model, DOSSIER_SYS, content, self.meter)
|
|
85
|
+
out = []
|
|
86
|
+
with ThreadPoolExecutor(max_workers=self.workers) as ex:
|
|
87
|
+
for name, ep_ids, facts in ex.map(one, ent_clusters):
|
|
88
|
+
out.append((name, ep_ids, facts))
|
|
89
|
+
return out
|
|
90
|
+
|
|
91
|
+
def _write(self, kind, statement, ep_ids, valid_from, salience, subject=None):
|
|
92
|
+
key = " ".join(statement.lower().split())
|
|
93
|
+
with self._wlock:
|
|
94
|
+
if key in self._seen: # dedup identical consolidated statements
|
|
95
|
+
return 0
|
|
96
|
+
self._seen.add(key)
|
|
97
|
+
vec = self.embed(statement)
|
|
98
|
+
with self._wlock:
|
|
99
|
+
n_super = self.store.supersede_similar(self.schema, self.ns, vec, subject, valid_from)
|
|
100
|
+
if subject: # belief-change: close out the prior value for this subject
|
|
101
|
+
n_super += self.store.supersede_subject(self.schema, self.ns, subject, vec, valid_from)
|
|
102
|
+
sid = self.store.insert_semantic(
|
|
103
|
+
self.schema, self.ns, kind, statement, subject=subject,
|
|
104
|
+
valid_from=valid_from, salience=salience, vec=vec)
|
|
105
|
+
self.store.add_provenance(self.schema, sid, ep_ids)
|
|
106
|
+
# link semantic fact to the entity graph too
|
|
107
|
+
if subject:
|
|
108
|
+
eid = self.store.upsert_entity(self.schema, self.ns, subject[:100], vec=self.embed(subject))
|
|
109
|
+
self.store.add_mention(self.schema, eid, sid, "semantic")
|
|
110
|
+
return n_super
|
|
111
|
+
|
|
112
|
+
def run(self) -> dict:
|
|
113
|
+
episodes = self.store.fetch_episodes(self.schema, self.ns, only_unconsolidated=True)
|
|
114
|
+
if not episodes:
|
|
115
|
+
return {"episodes": 0, "propositions": 0, "dossiers": 0, "superseded": 0}
|
|
116
|
+
|
|
117
|
+
# PASS 1 — propositions per episode
|
|
118
|
+
prop_results = self._propositions(episodes)
|
|
119
|
+
prop_by_ep, n_prop, superseded = {}, 0, 0
|
|
120
|
+
for ep, facts in prop_results:
|
|
121
|
+
prop_by_ep[ep["id"]] = facts
|
|
122
|
+
|
|
123
|
+
# PASS 2 — entity dossiers (the multi-hop pre-join); cap entities to cut noise
|
|
124
|
+
clusters = self.store.entity_episodes(self.schema, self.ns,
|
|
125
|
+
min_episodes=self.min_episodes)[: self.max_entities]
|
|
126
|
+
dossiers = self._dossiers(clusters, prop_by_ep)
|
|
127
|
+
|
|
128
|
+
# batch-embed all fact statements up front if the embedder supports it (fast OpenAI path)
|
|
129
|
+
all_facts = [f for _, fs in prop_results for f in fs]
|
|
130
|
+
for _, _, fs in dossiers:
|
|
131
|
+
all_facts += fs[: self.max_facts_per_dossier]
|
|
132
|
+
if hasattr(self.embed, "prime"):
|
|
133
|
+
self.embed.prime(all_facts)
|
|
134
|
+
|
|
135
|
+
for ep, facts in prop_results:
|
|
136
|
+
for f in facts:
|
|
137
|
+
superseded += self._write("proposition", f, [ep["id"]], ep["t_start"],
|
|
138
|
+
float(ep["salience"]))
|
|
139
|
+
n_prop += 1
|
|
140
|
+
n_dos = 0
|
|
141
|
+
ep_time = {e["id"]: e["t_start"] for e in episodes}
|
|
142
|
+
for name, ep_ids, facts in dossiers:
|
|
143
|
+
vf = max((ep_time.get(i) for i in ep_ids if ep_time.get(i)), default=None)
|
|
144
|
+
for f in facts[: self.max_facts_per_dossier]:
|
|
145
|
+
superseded += self._write("dossier", f, list(ep_ids), vf, 0.8, subject=name)
|
|
146
|
+
n_dos += 1
|
|
147
|
+
|
|
148
|
+
self.store.mark_consolidated(self.schema, [e["id"] for e in episodes])
|
|
149
|
+
return {"episodes": len(episodes), "propositions": n_prop, "dossiers": n_dos,
|
|
150
|
+
"superseded": superseded}
|