hb-cortex-memory 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,126 @@
1
+ """
2
+ cortex_memory — the CORTEX hierarchical-memory engine, extracted as a
3
+ host-independent package (Phase 12 track `04`).
4
+
5
+ Boundary rule (the whole point of the extraction): **this package never
6
+ imports the host** (`src.ai.*`). The host depends on the package and injects
7
+ its concerns — LLM calls, embeddings, usage metering, run lookups — through the
8
+ Protocols in :mod:`cortex_memory.providers`. A host adapter (the thin
9
+ ``cortex_bridge`` that stays in ``src/ai/memory``) implements those Protocols.
10
+
11
+ Stage-B status: the data layer (own ``Base`` + ORM models + enums + DTOs) and
12
+ the provider boundary live here. The CORTEX services move in next; see
13
+ ``README.md``.
14
+ """
15
+ from __future__ import annotations
16
+
17
+ from cortex_memory.db import Base, metadata
18
+ from cortex_memory.dtos import (
19
+ DEFAULT_TRUST_BY_SOURCE,
20
+ CortexCheckpointCreate,
21
+ CortexNodeContentResponse,
22
+ CortexNodeCreate,
23
+ CortexNodeDetailResponse,
24
+ CortexNodeSummary,
25
+ CortexRecurseRequest,
26
+ CortexTreeCreate,
27
+ CortexTreeListResponse,
28
+ CortexTreeResponse,
29
+ CortexViewportResponse,
30
+ GoalNode,
31
+ Provenance,
32
+ SourceType,
33
+ )
34
+ from cortex_memory.enums import (
35
+ CortexNodeStatus,
36
+ CortexNodeType,
37
+ CortexTreeStatus,
38
+ MemoryDomain,
39
+ ScopeLevel,
40
+ )
41
+ from cortex_memory.assembly import MemoryAssemblyResult, MemoryAssemblyService
42
+ from cortex_memory.dreaming import DreamingEngine
43
+ from cortex_memory.episodic_tree import EpisodicTreeService
44
+ from cortex_memory.experience_tree import ExperienceTreeService
45
+ from cortex_memory.graph import SemanticGraphService
46
+ from cortex_memory.intelligence_tree import IntelligenceTreeService
47
+ from cortex_memory.knowledge_tree import KnowledgeTreeService
48
+ from cortex_memory.models import CortexEdge, CortexNode, CortexTree
49
+ from cortex_memory.prompts import CORTEX_OPS_HELP
50
+ from cortex_memory.service import (
51
+ CheckpointData,
52
+ CortexService,
53
+ NodeContent,
54
+ NodeSummaryDTO,
55
+ Viewport,
56
+ )
57
+ from cortex_memory.providers import (
58
+ EmbeddingProvider,
59
+ EmbeddingResult,
60
+ LLMProvider,
61
+ LLMResult,
62
+ RunfRef,
63
+ RunRef,
64
+ RunRegistry,
65
+ UsageReporter,
66
+ )
67
+ from cortex_memory.scope_policy import ScopePolicy, ScopeViolation
68
+
69
+ __version__ = "0.1.0"
70
+
71
+ __all__ = [
72
+ # data layer
73
+ "Base",
74
+ "metadata",
75
+ "CortexTree",
76
+ "CortexNode",
77
+ "CortexEdge",
78
+ "CortexTreeStatus",
79
+ "CortexNodeType",
80
+ "CortexNodeStatus",
81
+ "MemoryDomain",
82
+ "ScopeLevel",
83
+ # DTOs
84
+ "Provenance",
85
+ "SourceType",
86
+ "DEFAULT_TRUST_BY_SOURCE",
87
+ "GoalNode",
88
+ "CortexTreeCreate",
89
+ "CortexTreeResponse",
90
+ "CortexTreeListResponse",
91
+ "CortexNodeSummary",
92
+ "CortexViewportResponse",
93
+ "CortexNodeContentResponse",
94
+ "CortexNodeCreate",
95
+ "CortexCheckpointCreate",
96
+ "CortexRecurseRequest",
97
+ "CortexNodeDetailResponse",
98
+ # providers
99
+ "LLMProvider",
100
+ "LLMResult",
101
+ "EmbeddingProvider",
102
+ "EmbeddingResult",
103
+ "UsageReporter",
104
+ "RunRegistry",
105
+ "RunRef",
106
+ "RunfRef",
107
+ # service
108
+ "CortexService",
109
+ "SemanticGraphService",
110
+ "KnowledgeTreeService",
111
+ "EpisodicTreeService",
112
+ "ExperienceTreeService",
113
+ "IntelligenceTreeService",
114
+ "DreamingEngine",
115
+ "MemoryAssemblyService",
116
+ "MemoryAssemblyResult",
117
+ "Viewport",
118
+ "NodeSummaryDTO",
119
+ "NodeContent",
120
+ "CheckpointData",
121
+ "CORTEX_OPS_HELP",
122
+ # tree primitives
123
+ "ScopePolicy",
124
+ "ScopeViolation",
125
+ "__version__",
126
+ ]
@@ -0,0 +1,83 @@
1
+ """
2
+ cortex_memory._textutil — small pure text helpers (vendored, host-free).
3
+
4
+ These are tiny utility functions the CORTEX services use; vendored into the
5
+ package so it carries no dependency on the host's ``ai.shared`` utilities.
6
+ """
7
+ from __future__ import annotations
8
+
9
+ import json
10
+ import logging
11
+ import re
12
+ from typing import Any, Dict, List, Optional, cast
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+
17
+ def truncate_for_storage(data: Any, max_chars: int = 400) -> str:
18
+ """Convert any value to a short readable string for episodic storage."""
19
+ if data is None:
20
+ return ""
21
+ if isinstance(data, str):
22
+ return data[:max_chars]
23
+ try:
24
+ s = json.dumps(data, default=str)
25
+ except Exception:
26
+ s = str(data)
27
+ return s[:max_chars]
28
+
29
+
30
+ def strip_markdown_fences(text: str) -> str:
31
+ """Remove ```json ... ``` fences from LLM output."""
32
+ text = text.strip()
33
+ if text.startswith("```"):
34
+ lines = text.split("\n")
35
+ end = -1 if lines[-1].strip() == "```" else len(lines)
36
+ text = "\n".join(lines[1:end])
37
+ return text.strip()
38
+
39
+
40
+ def parse_json_array(text: str, warn_label: str = "LLM output") -> List[Dict[str, Any]]:
41
+ """Parse a JSON array from LLM output (markdown-fence aware). [] on failure."""
42
+ text = strip_markdown_fences(text)
43
+ try:
44
+ result = json.loads(text)
45
+ if isinstance(result, list):
46
+ return cast(List[Dict[str, Any]], result)
47
+ except json.JSONDecodeError:
48
+ pass
49
+ match = re.search(r"\[.*\]", text, re.DOTALL)
50
+ if match:
51
+ try:
52
+ return cast(List[Dict[str, Any]], json.loads(match.group()))
53
+ except json.JSONDecodeError:
54
+ pass
55
+ logger.warning(f"Failed to parse JSON array from {warn_label}: {text[:200]}")
56
+ return []
57
+
58
+
59
+ def parse_json_object(text: str, warn_label: str = "LLM output") -> Optional[Dict[str, Any]]:
60
+ """Parse a JSON object from LLM output (markdown-fence aware). None on failure."""
61
+ text = strip_markdown_fences(text)
62
+ try:
63
+ result = json.loads(text)
64
+ if isinstance(result, dict):
65
+ return cast(Dict[str, Any], result)
66
+ except json.JSONDecodeError:
67
+ pass
68
+ match = re.search(r"\{.*\}", text, re.DOTALL)
69
+ if match:
70
+ try:
71
+ return cast(Dict[str, Any], json.loads(match.group()))
72
+ except json.JSONDecodeError:
73
+ pass
74
+ logger.warning(f"Failed to parse JSON object from {warn_label}: {text[:200]}")
75
+ return None
76
+
77
+
78
+ __all__ = [
79
+ "truncate_for_storage",
80
+ "strip_markdown_fences",
81
+ "parse_json_array",
82
+ "parse_json_object",
83
+ ]
@@ -0,0 +1,335 @@
1
+ """
2
+ memory_assembly_service.py — Unified Memory Assembly Pipeline (Phase F)
3
+
4
+ Central orchestrator that replaces MemoryRouter.retrieve() with a
5
+ comprehensive assembly that draws from all four memory domains:
6
+ - Knowledge (reference nodes from persistent KB trees)
7
+ - Experience (suggestions from learned patterns)
8
+ - Intelligence (distilled rules and strategies)
9
+ - Episodic (recent execution history)
10
+
11
+ Usage:
12
+ assembler = MemoryAssemblyService(db, company_id)
13
+ result = await assembler.assemble_runtime_memory(
14
+ entity_id=entity_id,
15
+ task_description="Analyze Q3 revenue trends",
16
+ )
17
+ prompt_text = result.formatted_prompt
18
+ """
19
+ from __future__ import annotations
20
+
21
+ import json
22
+ import logging
23
+ from dataclasses import dataclass, field
24
+ from datetime import datetime, timedelta
25
+ from typing import Any, Dict, List, Optional
26
+ from uuid import UUID
27
+
28
+ from sqlalchemy.ext.asyncio import AsyncSession
29
+
30
+ logger = logging.getLogger(__name__)
31
+
32
+
33
+ @dataclass
34
+ class MemoryAssemblyResult:
35
+ """Container for assembled memory from all four domains."""
36
+ knowledge_refs: List[Dict[str, Any]] = field(default_factory=list)
37
+ experience_suggestions: List[Dict[str, Any]] = field(default_factory=list)
38
+ intelligence_rules: List[Dict[str, Any]] = field(default_factory=list)
39
+ episodic_context: List[Dict[str, Any]] = field(default_factory=list)
40
+ formatted_prompt: str = ""
41
+
42
+
43
+ class MemoryAssemblyService:
44
+ """
45
+ Unified Memory Assembly Pipeline for v2.
46
+
47
+ Replaces MemoryRouter.retrieve() with a comprehensive assembly
48
+ that draws from all four memory domains.
49
+ """
50
+
51
+ def __init__(
52
+ self,
53
+ db: AsyncSession,
54
+ company_id: UUID,
55
+ *,
56
+ embedding: Any = None,
57
+ llm: Any = None,
58
+ child_run_factory: Any = None,
59
+ ):
60
+ self.db = db
61
+ self.company_id = company_id
62
+ # Injected cortex_memory providers, passed down to the graph / domain /
63
+ # CORTEX services this assembler constructs.
64
+ self._embedding = embedding
65
+ self._llm = llm
66
+ self._child_run_factory = child_run_factory
67
+
68
+ async def assemble_runtime_memory(
69
+ self,
70
+ entity_id: UUID,
71
+ user_id: Optional[UUID] = None,
72
+ task_description: str = "",
73
+ runtime_tree: Any = None,
74
+ include_domains: Optional[List[str]] = None,
75
+ ) -> MemoryAssemblyResult:
76
+ """
77
+ Assemble memory from all four domains for a new execution.
78
+
79
+ Returns a MemoryAssemblyResult containing domain-specific data
80
+ and a pre-formatted prompt string for system prompt injection.
81
+ """
82
+ domains = include_domains or ["knowledge", "experience", "intelligence", "episodic"]
83
+ result = MemoryAssemblyResult()
84
+
85
+ # 1. KNOWLEDGE ASSEMBLY
86
+ if "knowledge" in domains:
87
+ result.knowledge_refs = await self._assemble_knowledge(
88
+ entity_id, task_description, runtime_tree,
89
+ )
90
+
91
+ # 2. EXPERIENCE RETRIEVAL
92
+ if "experience" in domains:
93
+ result.experience_suggestions = await self._retrieve_experience(
94
+ entity_id, task_description,
95
+ )
96
+
97
+ # 3. INTELLIGENCE INJECTION
98
+ if "intelligence" in domains:
99
+ result.intelligence_rules = await self._retrieve_intelligence(
100
+ entity_id, task_description,
101
+ )
102
+
103
+ # 4. EPISODIC CONTEXT
104
+ if "episodic" in domains:
105
+ result.episodic_context = await self._retrieve_episodic(
106
+ entity_id, user_id, task_description,
107
+ )
108
+
109
+ # 5. Format for prompt
110
+ result.formatted_prompt = self._format_assembled_memory(result)
111
+ return result
112
+
113
+ # ===================================================================
114
+ # Domain Assemblers
115
+ # ===================================================================
116
+
117
+ async def _assemble_knowledge(
118
+ self,
119
+ entity_id: UUID,
120
+ task_description: str,
121
+ runtime_tree: Any = None,
122
+ ) -> List[Dict[str, Any]]:
123
+ """
124
+ Find relevant knowledge nodes via semantic graph search.
125
+ Creates reference nodes in runtime tree if available.
126
+ """
127
+ try:
128
+ from cortex_memory.graph import SemanticGraphService
129
+ graph = SemanticGraphService(self.db, self.company_id, embedding=self._embedding)
130
+
131
+ results = await graph.semantic_graph_search(
132
+ query=task_description,
133
+ entity_id=entity_id,
134
+ domains=["knowledge"],
135
+ top_k=10,
136
+ graph_expansion_depth=1,
137
+ )
138
+
139
+ if results and runtime_tree:
140
+ await self._create_runtime_knowledge_refs(runtime_tree, results[:5])
141
+
142
+ return results or []
143
+ except Exception as e:
144
+ logger.debug(f"Knowledge assembly failed: {e}")
145
+ return []
146
+
147
+ async def _create_runtime_knowledge_refs(self, runtime_tree: Any, results: Any) -> None:
148
+ """Create reference nodes in the runtime tree's knowledge root."""
149
+ try:
150
+ from cortex_memory.service import CortexService
151
+ cortex = CortexService(self.db, self.company_id, llm=self._llm, child_run_factory=self._child_run_factory)
152
+ knowledge_root = await cortex.get_knowledge_root(runtime_tree.id)
153
+ if not knowledge_root:
154
+ return
155
+
156
+ for item in results:
157
+ try:
158
+ await cortex.write(
159
+ parent_id=knowledge_root.id,
160
+ node_type="knowledge",
161
+ title=f"📎 {item.get('title', 'Reference')[:100]}",
162
+ summary=item.get("summary", ""),
163
+ content=None,
164
+ source_ref={
165
+ "ref_type": "cortex_node",
166
+ "source_tree_id": item.get("tree_id"),
167
+ "source_node_id": item.get("node_id"),
168
+ "relevance_score": item.get("combined_score", 0),
169
+ },
170
+ )
171
+ except Exception:
172
+ pass
173
+ except Exception as e:
174
+ logger.debug(f"Runtime knowledge reference creation failed: {e}")
175
+
176
+ async def _retrieve_experience(
177
+ self,
178
+ entity_id: UUID,
179
+ task_description: str,
180
+ ) -> List[Dict[str, Any]]:
181
+ """Query Experience Tree for suggestions relevant to the current task."""
182
+ try:
183
+ from cortex_memory.graph import SemanticGraphService
184
+ graph = SemanticGraphService(self.db, self.company_id, embedding=self._embedding)
185
+
186
+ results = await graph.semantic_graph_search(
187
+ query=task_description,
188
+ entity_id=entity_id,
189
+ domains=["experience"],
190
+ top_k=5,
191
+ )
192
+
193
+ return [
194
+ {
195
+ "suggestion": r.get("summary", ""),
196
+ "type": r.get("node_type"),
197
+ "confidence": r.get("combined_score", 0),
198
+ }
199
+ for r in (results or [])
200
+ if r.get("node_type") in ("suggestion", "pattern", "observation")
201
+ ]
202
+ except Exception as e:
203
+ logger.debug(f"Experience retrieval failed: {e}")
204
+ return []
205
+
206
+ async def _retrieve_intelligence(
207
+ self,
208
+ entity_id: UUID,
209
+ task_description: str,
210
+ ) -> List[Dict[str, Any]]:
211
+ """Query Intelligence Tree for applicable rules."""
212
+ try:
213
+ from cortex_memory.intelligence_tree import IntelligenceTreeService
214
+ intelligence_svc = IntelligenceTreeService(self.db, self.company_id, embedding=self._embedding)
215
+ return await intelligence_svc.get_applicable_rules(
216
+ entity_id=entity_id,
217
+ task_description=task_description,
218
+ max_rules=10,
219
+ )
220
+ except Exception as e:
221
+ logger.debug(f"Intelligence retrieval failed: {e}")
222
+ return []
223
+
224
+ async def _retrieve_episodic(
225
+ self,
226
+ entity_id: UUID,
227
+ user_id: Optional[UUID] = None,
228
+ task_description: str = "",
229
+ ) -> List[Dict[str, Any]]:
230
+ """Retrieve recent and topically relevant episodes."""
231
+ try:
232
+ from cortex_memory.episodic_tree import EpisodicTreeService
233
+ episodic_svc = EpisodicTreeService(self.db, self.company_id, embedding=self._embedding)
234
+
235
+ # Recent episodes
236
+ recent = await episodic_svc.get_recent_episodes(
237
+ entity_id=entity_id, limit=5,
238
+ )
239
+
240
+ # Topic-relevant episodes (semantic)
241
+ relevant = []
242
+ if task_description:
243
+ try:
244
+ relevant_raw = await episodic_svc.query_by_topic(
245
+ entity_id=entity_id,
246
+ query=task_description,
247
+ top_k=3,
248
+ )
249
+ for ep in relevant_raw:
250
+ relevant.append({
251
+ "input": ep.get("content", ""),
252
+ "output": ep.get("summary", ""),
253
+ "status": (ep.get("metadata") or {}).get("status", ""),
254
+ "at": ep.get("created_at", ""),
255
+ })
256
+ except Exception:
257
+ pass
258
+
259
+ # Merge and deduplicate
260
+ seen = set()
261
+ episodes = []
262
+ for ep in recent + relevant:
263
+ key = ep.get("at", "") + ep.get("input", "")[:50]
264
+ if key not in seen:
265
+ episodes.append(ep)
266
+ seen.add(key)
267
+
268
+ return episodes[:10]
269
+ except Exception as e:
270
+ logger.debug(f"Episodic retrieval failed: {e}")
271
+ return []
272
+
273
+ # ===================================================================
274
+ # Prompt Formatting
275
+ # ===================================================================
276
+
277
+ def _format_assembled_memory(self, result: MemoryAssemblyResult) -> str:
278
+ """
279
+ Format assembled memory into structured prompt text.
280
+
281
+ Layout:
282
+ [INTELLIGENCE] — Rules and strategies (highest priority)
283
+ [KNOWLEDGE] — Relevant KB references
284
+ [EXPERIENCE] — Suggestions from past patterns
285
+ [EPISODIC] — Recent execution history
286
+ """
287
+ parts = []
288
+
289
+ # Intelligence Rules (highest priority — goes first)
290
+ if result.intelligence_rules:
291
+ rule_lines = []
292
+ for rule in result.intelligence_rules:
293
+ confidence = rule.get("confidence", 0.5)
294
+ emoji = {
295
+ "instruction": "📏",
296
+ "strategy": "🎯",
297
+ "preference": "❤️",
298
+ }.get(rule.get("type", ""), "💡")
299
+ rule_lines.append(
300
+ f" {emoji} [{confidence:.0%}] {rule.get('rule', rule.get('title', ''))}"
301
+ )
302
+ parts.append(
303
+ "## Learned Intelligence\n"
304
+ "The following rules have been learned from past experience:\n"
305
+ + "\n".join(rule_lines)
306
+ )
307
+
308
+ # Knowledge References
309
+ if result.knowledge_refs:
310
+ kb_lines = [
311
+ f" 📎 [{r.get('combined_score', 0):.2f}] "
312
+ f"{r.get('title', 'Untitled')}: {r.get('summary', '')[:200]}"
313
+ for r in result.knowledge_refs[:5]
314
+ ]
315
+ parts.append("## Relevant Knowledge\n" + "\n".join(kb_lines))
316
+
317
+ # Experience Suggestions
318
+ if result.experience_suggestions:
319
+ exp_lines = [
320
+ f" 💡 [{s.get('confidence', 0):.2f}] {s.get('suggestion', '')[:200]}"
321
+ for s in result.experience_suggestions
322
+ ]
323
+ parts.append("## Experience Suggestions\n" + "\n".join(exp_lines))
324
+
325
+ # Episodic Context
326
+ if result.episodic_context:
327
+ ep_lines = []
328
+ for ep in result.episodic_context[:5]:
329
+ inp = (ep.get("input") or "")[:150]
330
+ out = (ep.get("output") or "")[:150]
331
+ at = ep.get("at", "")
332
+ ep_lines.append(f" [{at}] {inp!r} → {out!r}")
333
+ parts.append("## Recent Execution History\n" + "\n".join(ep_lines))
334
+
335
+ return "\n\n".join(parts)
cortex_memory/db.py ADDED
@@ -0,0 +1,25 @@
1
+ """
2
+ cortex_memory.db — the package's own SQLAlchemy declarative base.
3
+
4
+ The package owns its ``Base`` (plan `04` decision K4) so it can ship its own
5
+ schema + Alembic migrations and be installed standalone. The host shares this
6
+ metadata during the in-host phase (its Alembic ``target_metadata`` is a list
7
+ including ``cortex_memory.db.metadata``) so host autogenerate never drops the
8
+ CORTEX tables.
9
+
10
+ External references (company/user/entity/run) are **opaque nullable UUIDs**
11
+ (decision K5): no ``ForeignKey`` to host tables, so the package's schema stands
12
+ alone. The host enforces referential integrity in its own schema.
13
+ """
14
+ from __future__ import annotations
15
+
16
+ from sqlalchemy.orm import DeclarativeBase
17
+
18
+
19
+ class Base(DeclarativeBase):
20
+ """The package's declarative base (SQLAlchemy 2.0, typed)."""
21
+
22
+
23
+ metadata = Base.metadata
24
+
25
+ __all__ = ["Base", "metadata"]