dory-memory 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
dory/__init__.py ADDED
@@ -0,0 +1,13 @@
1
+ from .graph import Graph
2
+ from .schema import NodeType, EdgeType
3
+ from .memory import DoryMemory
4
+ from . import session, activation, consolidation
5
+ from .pipeline import Observer, Prefixer, PrefixResult, Decayer, DecayConfig, Reflector
6
+
7
+ __all__ = [
8
+ "DoryMemory",
9
+ "Graph", "NodeType", "EdgeType",
10
+ "session", "activation", "consolidation",
11
+ "Observer", "Prefixer", "PrefixResult",
12
+ "Decayer", "DecayConfig", "Reflector",
13
+ ]
dory/activation.py ADDED
@@ -0,0 +1,213 @@
1
+ from __future__ import annotations
2
+
3
+ from datetime import datetime, timezone
4
+
5
+ from .graph import Graph
6
+ from .schema import now_iso, EdgeType
7
+
8
+
9
+ def _fmt_date(iso: str | None) -> str:
10
+ """Return 'YYYY-MM-DD' from an ISO timestamp, or '' on failure."""
11
+ if not iso:
12
+ return ""
13
+ try:
14
+ dt = datetime.fromisoformat(iso.replace("Z", "+00:00"))
15
+ return dt.strftime("%Y-%m-%d")
16
+ except Exception:
17
+ return ""
18
+
19
+
20
+ _STOPWORDS = frozenset({
21
+ "a", "an", "the", "is", "are", "was", "were", "be", "been", "have", "has",
22
+ "had", "do", "does", "did", "will", "would", "should", "may", "might",
23
+ "can", "could", "to", "of", "in", "on", "at", "for", "with", "by", "from",
24
+ "as", "and", "or", "but", "not", "this", "that", "it", "its", "also",
25
+ "more", "than", "just", "very", "all", "any", "one", "two", "get",
26
+ "use", "uses", "used", "using", "new", "add", "i", "my", "me", "you",
27
+ "your", "we", "our", "what", "which", "who", "when", "where", "how",
28
+ "first", "last", "did", "after", "before", "about",
29
+ })
30
+
31
+
32
+ def _fts_query(text: str, n: int = 10) -> str:
33
+ """
34
+ Extract meaningful terms from text for FTS5, joined with OR.
35
+ OR mode gives much better recall than FTS5's default AND.
36
+ Includes numeric tokens (years, day numbers) for date matching.
37
+ """
38
+ import re
39
+ # Alpha tokens (words)
40
+ alpha = re.findall(r"[a-zA-Z]\w*", text)
41
+ # Numeric tokens: extract raw digit sequences (1-4 digits) — captures years, day
42
+ # numbers, and ordinals like "15th" (extracts "15"). Longer numbers are ignored.
43
+ numeric = [m for m in re.findall(r"\d+", text) if 1 <= len(m) <= 4]
44
+
45
+ seen: set[str] = set()
46
+ terms = []
47
+
48
+ for w in alpha:
49
+ lo = w.lower()
50
+ if len(lo) >= 3 and lo not in _STOPWORDS and lo not in seen:
51
+ seen.add(lo)
52
+ terms.append(lo)
53
+ if len(terms) >= n:
54
+ break
55
+
56
+ for num in numeric:
57
+ if num not in seen:
58
+ seen.add(num)
59
+ terms.append(num)
60
+
61
+ return " OR ".join(terms) if terms else text
62
+
63
+
64
+ def find_seeds(query: str, graph: Graph) -> list[str]:
65
+ """
66
+ Return node IDs ranked by relevance to the query.
67
+
68
+ Priority order:
69
+ 1. FTS5 BM25 search (best recall, handles partial terms)
70
+ 2. Vector KNN search (semantic similarity, if Ollama running)
71
+ 3. Substring fallback (always works, no dependencies)
72
+
73
+ Results are deduplicated and merged, with FTS hits ranked first.
74
+ """
75
+ from . import store, vector
76
+
77
+ seen: dict[str, int] = {} # node_id → score (lower = better rank)
78
+
79
+ # 1. FTS BM25 — use OR over key terms for recall (avoids AND-mode over-constraining)
80
+ fts_ids = store.search_fts(_fts_query(query), graph.path)
81
+ for rank, nid in enumerate(fts_ids):
82
+ if nid in graph._nodes:
83
+ seen[nid] = rank
84
+
85
+ # 2. Vector KNN (if available)
86
+ if vector.available():
87
+ vec_ids = vector.knn_search(query, graph.path)
88
+ for rank, nid in enumerate(vec_ids):
89
+ if nid in graph._nodes and nid not in seen:
90
+ seen[nid] = len(fts_ids) + rank
91
+
92
+ # 3. Substring fallback for anything not caught above
93
+ if not seen:
94
+ terms = query.lower().split()
95
+ for node in graph.all_nodes():
96
+ text = (node.content + " " + " ".join(node.tags)).lower()
97
+ hits = sum(1 for t in terms if t in text)
98
+ if hits:
99
+ seen[node.id] = -hits # negative so higher hits = lower score
100
+
101
+ return sorted(seen, key=lambda nid: seen[nid])
102
+
103
+
104
+ def spread(
105
+ seed_ids: list[str],
106
+ graph: Graph,
107
+ depth: int = 3,
108
+ depth_decay: float = 0.5,
109
+ threshold: float = 0.05,
110
+ ) -> dict[str, float]:
111
+ """
112
+ Spread activation from seed nodes outward through the graph.
113
+ Returns {node_id: activation_level} for all nodes above threshold.
114
+ Activation received = source_activation × edge_weight × depth_decay per hop.
115
+ """
116
+ activation: dict[str, float] = {sid: 1.0 for sid in seed_ids}
117
+ frontier: dict[str, float] = dict(activation)
118
+
119
+ traversed_edges: set[str] = set()
120
+
121
+ for _ in range(depth):
122
+ next_frontier: dict[str, float] = {}
123
+ for node_id, level in frontier.items():
124
+ for edge in graph.edges_for_node(node_id):
125
+ neighbor_id = (
126
+ edge.target_id if edge.source_id == node_id else edge.source_id
127
+ )
128
+ received = level * edge.weight * depth_decay
129
+ if received >= threshold:
130
+ traversed_edges.add(edge.id)
131
+ current = activation.get(neighbor_id, 0.0)
132
+ new_val = min(1.0, current + received)
133
+ if new_val > current:
134
+ activation[neighbor_id] = new_val
135
+ next_frontier[neighbor_id] = new_val
136
+ frontier = next_frontier
137
+ if not frontier:
138
+ break
139
+
140
+ # Record activation on touched nodes and traversed edges
141
+ now = now_iso()
142
+ for node_id, level in activation.items():
143
+ if level >= threshold:
144
+ node = graph.get_node(node_id)
145
+ if node:
146
+ node.activation_count += 1
147
+ node.last_activated = now
148
+
149
+ for edge in graph.all_edges():
150
+ if edge.id in traversed_edges:
151
+ edge.activation_count += 1
152
+ edge.last_activated = now
153
+
154
+ return {nid: v for nid, v in activation.items() if v >= threshold}
155
+
156
+
157
+ def serialize(activated: dict[str, float], graph: Graph, max_nodes: int = 20) -> str:
158
+ """Convert activated subgraph to a natural language context block."""
159
+ if not activated:
160
+ return "(no relevant memories found)"
161
+
162
+ ranked = sorted(
163
+ activated.items(),
164
+ key=lambda kv: (
165
+ kv[1],
166
+ graph.get_node(kv[0]).salience if graph.get_node(kv[0]) else 0,
167
+ ),
168
+ reverse=True,
169
+ )[:max_nodes]
170
+
171
+ lines = []
172
+ for node_id, level in ranked:
173
+ node = graph.get_node(node_id)
174
+ if not node:
175
+ continue
176
+ core_marker = " [CORE]" if node.is_core else ""
177
+ # SESSION nodes already embed the date in their content as "[YYYY-MM-DD] Session: ..."
178
+ # so we don't add a redundant (and potentially wrong) date hint for them.
179
+ # EVENT nodes still get the hint from created_at.
180
+ date_hint = ""
181
+ if node.type.value == "EVENT" and node.created_at:
182
+ d = _fmt_date(node.created_at)
183
+ if d:
184
+ date_hint = f" ({d})"
185
+ lines.append(f"- [{node.type.value}{core_marker}]{date_hint} {node.content}")
186
+
187
+ # Include edges between activated nodes
188
+ activated_ids = set(activated)
189
+ edge_lines = []
190
+ seen: set[tuple] = set()
191
+ for edge in graph.all_edges():
192
+ if edge.source_id in activated_ids and edge.target_id in activated_ids:
193
+ key = (edge.source_id, edge.target_id, edge.type.value)
194
+ if key not in seen:
195
+ src = graph.get_node(edge.source_id)
196
+ tgt = graph.get_node(edge.target_id)
197
+ if src and tgt:
198
+ if edge.type == EdgeType.SUPERSEDES:
199
+ date = _fmt_date(src.superseded_at or edge.created_at)
200
+ date_str = f" (updated {date})" if date else ""
201
+ edge_lines.append(
202
+ f" [KNOWLEDGE UPDATE{date_str}] Previously: {src.content} → Now: {tgt.content}"
203
+ )
204
+ else:
205
+ edge_lines.append(
206
+ f" {src.content} --[{edge.type.value}]--> {tgt.content}"
207
+ )
208
+ seen.add(key)
209
+
210
+ result = "Activated memories:\n" + "\n".join(lines)
211
+ if edge_lines:
212
+ result += "\n\nRelationships:\n" + "\n".join(edge_lines[:15])
213
+ return result
@@ -0,0 +1,7 @@
1
+ """
2
+ Dory framework adapters.
3
+
4
+ from dory.adapters.langchain import DoryMemoryAdapter # LangChain BaseMemory
5
+ from dory.adapters.langgraph import DoryMemoryNode # LangGraph node functions
6
+ from dory.adapters.multi_agent import SharedMemoryPool # thread-safe multi-agent memory
7
+ """
@@ -0,0 +1,156 @@
1
+ """
2
+ LangChain memory adapter for Dory.
3
+
4
+ Implements LangChain's BaseMemory interface so Dory can be used as a
5
+ drop-in memory backend in any LangChain chain or agent.
6
+
7
+ Usage:
8
+ from dory.adapters.langchain import DoryMemoryAdapter
9
+ from langchain.chains import ConversationChain
10
+ from langchain_anthropic import ChatAnthropic
11
+
12
+ memory = DoryMemoryAdapter(
13
+ db_path="myapp.db",
14
+ extract_model="claude-haiku-4-5-20251001",
15
+ extract_backend="anthropic",
16
+ extract_api_key="sk-ant-...",
17
+ )
18
+
19
+ chain = ConversationChain(
20
+ llm=ChatAnthropic(model="claude-sonnet-4-6"),
21
+ memory=memory,
22
+ )
23
+
24
+ response = chain.invoke({"input": "What are we working on?"})
25
+ # memory context is injected automatically via load_memory_variables()
26
+ # turns are saved automatically via save_context()
27
+ """
28
+ from __future__ import annotations
29
+
30
+ from pathlib import Path
31
+ from typing import Any
32
+
33
+ from ..memory import DoryMemory
34
+ from .. import store as _store
35
+
36
+
37
+ class DoryMemoryAdapter:
38
+ """
39
+ LangChain-compatible memory backend backed by Dory.
40
+
41
+ Exposes two memory variables:
42
+ - ``context`` — spreading-activation retrieval from the graph
43
+ - ``history`` — last N raw turns from the episodic store
44
+
45
+ Compatible with langchain BaseMemory duck-typing without requiring
46
+ langchain as a hard dependency.
47
+ """
48
+
49
+ memory_variables: list[str] = ["context", "history"]
50
+
51
+ def __init__(
52
+ self,
53
+ db_path: str | Path | None = None,
54
+ extract_model: str | None = None,
55
+ extract_backend: str = "ollama",
56
+ extract_base_url: str = "http://localhost:11434",
57
+ extract_api_key: str = "local",
58
+ history_turns: int = 6,
59
+ input_key: str = "input",
60
+ output_key: str = "output",
61
+ ) -> None:
62
+ self._dory = DoryMemory(
63
+ db_path=db_path,
64
+ extract_model=extract_model,
65
+ extract_backend=extract_backend,
66
+ extract_base_url=extract_base_url,
67
+ extract_api_key=extract_api_key,
68
+ )
69
+ self._history_turns = history_turns
70
+ self._input_key = input_key
71
+ self._output_key = output_key
72
+
73
+ # ------------------------------------------------------------------
74
+ # LangChain BaseMemory interface
75
+ # ------------------------------------------------------------------
76
+
77
+ def load_memory_variables(self, inputs: dict[str, Any]) -> dict[str, str]:
78
+ """
79
+ Called at the start of each chain run.
80
+ Retrieves memory context relevant to the current input.
81
+ """
82
+ query = inputs.get(self._input_key, "")
83
+ result = self._dory.build_context(query)
84
+ return {
85
+ "context": result.full,
86
+ "history": self._recent_history(),
87
+ }
88
+
89
+ def save_context(
90
+ self,
91
+ inputs: dict[str, Any],
92
+ outputs: dict[str, Any],
93
+ ) -> None:
94
+ """Called at the end of each chain run. Logs both turns."""
95
+ user_msg = str(inputs.get(self._input_key, ""))
96
+ ai_msg = str(outputs.get(self._output_key, ""))
97
+ if user_msg:
98
+ self._dory.add_turn("user", user_msg)
99
+ if ai_msg:
100
+ self._dory.add_turn("assistant", ai_msg)
101
+
102
+ def clear(self) -> None:
103
+ """Flush memory and run consolidation."""
104
+ self._dory.flush()
105
+
106
+ # ------------------------------------------------------------------
107
+ # Async interface
108
+ # ------------------------------------------------------------------
109
+
110
+ async def aload_memory_variables(
111
+ self, inputs: dict[str, Any]
112
+ ) -> dict[str, str]:
113
+ """Async version of load_memory_variables() for use with async chains."""
114
+ query = inputs.get(self._input_key, "")
115
+ result = await self._dory.abuild_context(query)
116
+ return {
117
+ "context": result.full,
118
+ "history": self._recent_history(),
119
+ }
120
+
121
+ async def asave_context(
122
+ self,
123
+ inputs: dict[str, Any],
124
+ outputs: dict[str, Any],
125
+ ) -> None:
126
+ """Async version of save_context()."""
127
+ user_msg = str(inputs.get(self._input_key, ""))
128
+ ai_msg = str(outputs.get(self._output_key, ""))
129
+ if user_msg:
130
+ await self._dory.aadd_turn("user", user_msg)
131
+ if ai_msg:
132
+ await self._dory.aadd_turn("assistant", ai_msg)
133
+
134
+ async def aclear(self) -> None:
135
+ """Async flush."""
136
+ await self._dory.aflush()
137
+
138
+ # ------------------------------------------------------------------
139
+ # Helpers
140
+ # ------------------------------------------------------------------
141
+
142
+ def _recent_history(self) -> str:
143
+ obs = _store.get_observations(
144
+ self._dory.graph.path,
145
+ limit=self._history_turns,
146
+ )
147
+ if not obs:
148
+ return ""
149
+ return "\n".join(
150
+ f"{o['role'].upper()}: {o['content']}" for o in reversed(obs)
151
+ )
152
+
153
+ # Expose underlying DoryMemory for power users
154
+ @property
155
+ def dory(self) -> DoryMemory:
156
+ return self._dory
@@ -0,0 +1,174 @@
1
+ """
2
+ LangGraph memory adapter for Dory.
3
+
4
+ Provides DoryMemoryNode — a class whose methods are designed to be used
5
+ as nodes in a LangGraph StateGraph. Handles memory retrieval, turn logging,
6
+ and end-of-session consolidation as discrete graph nodes.
7
+
8
+ Usage:
9
+ from dory.adapters.langgraph import DoryMemoryNode, MemoryState
10
+ from langgraph.graph import StateGraph, START, END
11
+
12
+ mem = DoryMemoryNode(
13
+ db_path="myapp.db",
14
+ extract_model="claude-haiku-4-5-20251001",
15
+ extract_backend="anthropic",
16
+ extract_api_key="sk-ant-...",
17
+ )
18
+
19
+ builder = StateGraph(MemoryState)
20
+ builder.add_node("load_memory", mem.load_context)
21
+ builder.add_node("record_turn", mem.record_turn)
22
+ builder.add_edge(START, "load_memory")
23
+ builder.add_edge("load_memory", "record_turn")
24
+ builder.add_edge("record_turn", END)
25
+ graph = builder.compile()
26
+
27
+ # In your agent loop:
28
+ state = graph.invoke({"query": "What are we building?", "messages": []})
29
+ # state["context"] is now populated with relevant memory
30
+ """
31
+ from __future__ import annotations
32
+
33
+ from pathlib import Path
34
+ from typing import Any, TypedDict
35
+
36
+ from ..memory import DoryMemory
37
+
38
+
39
+ class MemoryState(TypedDict, total=False):
40
+ """
41
+ Typed state dict for LangGraph graphs that use DoryMemoryNode.
42
+
43
+ Add these fields to your own StateGraph state to enable memory.
44
+ """
45
+ query: str # the current user query
46
+ context: str # memory context retrieved by load_context
47
+ messages: list[dict] # conversation messages [{"role": ..., "content": ...}]
48
+ memory_stats: dict # populated by consolidate()
49
+
50
+
51
+ class DoryMemoryNode:
52
+ """
53
+ LangGraph node class for Dory memory operations.
54
+
55
+ Each public method has the signature ``(state: dict) -> dict``
56
+ so it can be passed directly to ``StateGraph.add_node()``.
57
+ """
58
+
59
+ def __init__(
60
+ self,
61
+ db_path: str | Path | None = None,
62
+ extract_model: str | None = None,
63
+ extract_backend: str = "ollama",
64
+ extract_base_url: str = "http://localhost:11434",
65
+ extract_api_key: str = "local",
66
+ ) -> None:
67
+ self._dory = DoryMemory(
68
+ db_path=db_path,
69
+ extract_model=extract_model,
70
+ extract_backend=extract_backend,
71
+ extract_base_url=extract_base_url,
72
+ extract_api_key=extract_api_key,
73
+ )
74
+
75
+ # ------------------------------------------------------------------
76
+ # Node functions (state → state)
77
+ # ------------------------------------------------------------------
78
+
79
+ def load_context(self, state: dict[str, Any]) -> dict[str, Any]:
80
+ """
81
+ Retrieve memory context relevant to the current query.
82
+ Populates state["context"] with the result.
83
+ Add this as the first node in your graph.
84
+ """
85
+ query = state.get("query", "")
86
+ result = self._dory.build_context(query)
87
+ return {**state, "context": result.full}
88
+
89
+ def record_turn(self, state: dict[str, Any]) -> dict[str, Any]:
90
+ """
91
+ Log the most recent message to the episodic store.
92
+ Reads from state["messages"] — expects the last entry to be the
93
+ turn to record. No-op if messages is empty or no extract_model set.
94
+ """
95
+ messages = state.get("messages", [])
96
+ if messages:
97
+ last = messages[-1]
98
+ role = last.get("role", "user")
99
+ content = last.get("content", "")
100
+ if content:
101
+ self._dory.add_turn(role, str(content))
102
+ return state
103
+
104
+ def record_exchange(self, state: dict[str, Any]) -> dict[str, Any]:
105
+ """
106
+ Log the last user+assistant exchange (last two messages).
107
+ Use instead of record_turn when your graph appends both turns at once.
108
+ """
109
+ messages = state.get("messages", [])
110
+ for msg in messages[-2:]:
111
+ role = msg.get("role", "user")
112
+ content = msg.get("content", "")
113
+ if content:
114
+ self._dory.add_turn(role, str(content))
115
+ return state
116
+
117
+ def consolidate(self, state: dict[str, Any]) -> dict[str, Any]:
118
+ """
119
+ End-of-session consolidation: flush pending turns, run decay/dedup.
120
+ Populates state["memory_stats"] with consolidation results.
121
+ Add this as a terminal node or call at session end.
122
+ """
123
+ stats = self._dory.flush()
124
+ return {**state, "memory_stats": stats}
125
+
126
+ # ------------------------------------------------------------------
127
+ # Async node functions
128
+ # Same signatures as sync versions — use these when your LangGraph
129
+ # graph is compiled with async support (graph.ainvoke / astream).
130
+ # ------------------------------------------------------------------
131
+
132
+ async def aload_context(self, state: dict[str, Any]) -> dict[str, Any]:
133
+ """Async version of load_context()."""
134
+ query = state.get("query", "")
135
+ result = await self._dory.abuild_context(query)
136
+ return {**state, "context": result.full}
137
+
138
+ async def arecord_turn(self, state: dict[str, Any]) -> dict[str, Any]:
139
+ """Async version of record_turn()."""
140
+ messages = state.get("messages", [])
141
+ if messages:
142
+ last = messages[-1]
143
+ role = last.get("role", "user")
144
+ content = last.get("content", "")
145
+ if content:
146
+ await self._dory.aadd_turn(role, str(content))
147
+ return state
148
+
149
+ async def arecord_exchange(self, state: dict[str, Any]) -> dict[str, Any]:
150
+ """Async version of record_exchange()."""
151
+ messages = state.get("messages", [])
152
+ for msg in messages[-2:]:
153
+ role = msg.get("role", "user")
154
+ content = msg.get("content", "")
155
+ if content:
156
+ await self._dory.aadd_turn(role, str(content))
157
+ return state
158
+
159
+ async def aconsolidate(self, state: dict[str, Any]) -> dict[str, Any]:
160
+ """Async version of consolidate()."""
161
+ stats = await self._dory.aflush()
162
+ return {**state, "memory_stats": stats}
163
+
164
+ # ------------------------------------------------------------------
165
+ # Direct access
166
+ # ------------------------------------------------------------------
167
+
168
+ @property
169
+ def dory(self) -> DoryMemory:
170
+ return self._dory
171
+
172
+ def observe(self, content: str, node_type: str = "CONCEPT") -> str:
173
+ """Manually add a memory node. Returns the new node ID."""
174
+ return self._dory.observe(content, node_type=node_type)