comet-memory 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. comet_memory-0.1.0/PKG-INFO +215 -0
  2. comet_memory-0.1.0/README.md +193 -0
  3. comet_memory-0.1.0/comet/__init__.py +27 -0
  4. comet_memory-0.1.0/comet/cli.py +390 -0
  5. comet_memory-0.1.0/comet/compacter.py +104 -0
  6. comet_memory-0.1.0/comet/config.py +60 -0
  7. comet_memory-0.1.0/comet/consolidator.py +248 -0
  8. comet_memory-0.1.0/comet/orchestrator.py +337 -0
  9. comet_memory-0.1.0/comet/retriever.py +209 -0
  10. comet_memory-0.1.0/comet/schemas.py +68 -0
  11. comet_memory-0.1.0/comet/sensor.py +84 -0
  12. comet_memory-0.1.0/comet/skills/__init__.py +5 -0
  13. comet_memory-0.1.0/comet/skills/__main__.py +4 -0
  14. comet_memory-0.1.0/comet/skills/cli.py +249 -0
  15. comet_memory-0.1.0/comet/skills/client.py +90 -0
  16. comet_memory-0.1.0/comet/skills/session.py +51 -0
  17. comet_memory-0.1.0/comet/storage.py +196 -0
  18. comet_memory-0.1.0/comet/templates/__init__.py +38 -0
  19. comet_memory-0.1.0/comet/vector_index.py +215 -0
  20. comet_memory-0.1.0/comet_memory.egg-info/PKG-INFO +215 -0
  21. comet_memory-0.1.0/comet_memory.egg-info/SOURCES.txt +30 -0
  22. comet_memory-0.1.0/comet_memory.egg-info/dependency_links.txt +1 -0
  23. comet_memory-0.1.0/comet_memory.egg-info/entry_points.txt +3 -0
  24. comet_memory-0.1.0/comet_memory.egg-info/requires.txt +8 -0
  25. comet_memory-0.1.0/comet_memory.egg-info/top_level.txt +1 -0
  26. comet_memory-0.1.0/pyproject.toml +38 -0
  27. comet_memory-0.1.0/setup.cfg +4 -0
  28. comet_memory-0.1.0/tests/test_agent_retrieval.py +113 -0
  29. comet_memory-0.1.0/tests/test_comet.py +106 -0
  30. comet_memory-0.1.0/tests/test_comparison.py +152 -0
  31. comet_memory-0.1.0/tests/test_extended.py +201 -0
  32. comet_memory-0.1.0/tests/test_rag.py +217 -0
@@ -0,0 +1,215 @@
1
+ Metadata-Version: 2.4
2
+ Name: comet-memory
3
+ Version: 0.1.0
4
+ Summary: CoMeT — Cognitive Memory Tree: Hierarchical memory system for LLM agents
5
+ Author: Dirac-Robot
6
+ License: MIT
7
+ Keywords: memory,llm,agent,rag,cognitive
8
+ Classifier: Development Status :: 3 - Alpha
9
+ Classifier: Intended Audience :: Developers
10
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
11
+ Classifier: Programming Language :: Python :: 3.12
12
+ Requires-Python: >=3.12
13
+ Description-Content-Type: text/markdown
14
+ Requires-Dist: ato>=2.2.0
15
+ Requires-Dist: chromadb>=1.4.1
16
+ Requires-Dist: langchain>=1.2.2
17
+ Requires-Dist: langchain-openai>=1.1.7
18
+ Requires-Dist: langgraph>=1.0.5
19
+ Requires-Dist: loguru>=0.7.3
20
+ Requires-Dist: openai>=2.16.0
21
+ Requires-Dist: pydantic>=2.12.5
22
+
23
+ # ☄️ CoMeT — Cognitive Memory Tree
24
+
25
+ **Lossless structured memory for AI agents.**
26
+
27
+ CoMeT compresses long conversations into a navigable tree of memory nodes.
28
+ Unlike naive summarization that loses details, CoMeT preserves raw data behind structured summaries — agents read summaries first, then drill into raw data only when needed.
29
+
30
+ ## Architecture
31
+
32
+ ```
33
+ User Input
34
+
35
+
36
+ ┌─────────┐ SLM (fast) ┌───────────┐
37
+ │ Sensor │ ───────────────▶ │ L1 Buffer │
38
+ └─────────┘ entity/intent └─────┬─────┘
39
+ │ cognitive load trigger
40
+
41
+ ┌───────────┐
42
+ │ Compacter │ LLM (slow)
43
+ └─────┬─────┘
44
+ │ summary + trigger + recall_mode + tags
45
+
46
+ ┌──────────┴──────────┐
47
+ │ │
48
+ ┌───────────┐ ┌─────────────┐
49
+ │ Store │ │ VectorIndex │ ChromaDB
50
+ │ depth 0-2│ │ (dual-path) │ summary + trigger
51
+ └───────────┘ └──────┬──────┘
52
+ │ semantic search
53
+
54
+ ┌───────────┐
55
+ │ Retriever │ RRF fusion
56
+ └───────────┘
57
+ ```
58
+
59
+ ### Dual-Speed Layer
60
+ - **Fast (Sensor)**: SLM extracts entities/intent per turn, detects topic shifts via cognitive load assessment
61
+ - **Slow (Compacter)**: Main LLM structures accumulated L1 buffer into `MemoryNode` with summary, trigger, recall mode, and topic tags
62
+
63
+ ### Dynamic Resolution (depth 0 → 1 → 2)
64
+
65
+ | Depth | Content | Use Case |
66
+ |-------|---------|----------|
67
+ | 0 | Summary + Trigger | Agent's initial context window |
68
+ | 1 | + Topic tags + Links | Navigation / node selection |
69
+ | 2 | Full raw data + Links | Fact retrieval |
70
+
71
+ ### Recall Mode
72
+
73
+ Each memory node is classified by `recall_mode` at compaction time:
74
+
75
+ | Mode | Behavior | Examples |
76
+ |------|----------|----------|
77
+ | `passive` | Always included in context window | User identity, persistent preferences |
78
+ | `active` | Retrieved on-demand via semantic search | Factual details, decisions, events |
79
+ | `both` | Always in context + searchable via RAG | Core constraints with retrievable details |
80
+
81
+ ### Dual-Path RAG Retrieval
82
+
83
+ CoMeT embeds both `summary` (what the node contains) and `trigger` (when to recall it) into separate vector collections. At query time:
84
+
85
+ 1. **QueryAnalyzer** decomposes the query into `semantic_query` + `search_intent`
86
+ 2. **Summary path**: matches what the information is about
87
+ 3. **Trigger path**: matches when the information would be needed
88
+ 4. **ScoreFusion** (Reciprocal Rank Fusion): merges results from both paths
89
+
90
+ Triggers are written from the **LLM's perspective** (`"내가 ~정보가 필요할 때"`) rather than user-centric, enabling broader semantic matching even without explicit user requests.
91
+
92
+ ### Topic-Aware Auto-Linking
93
+ Nodes share a global topic tag set. The compacter reuses existing tags when possible, enabling automatic bidirectional linking between related nodes across different conversation segments.
94
+
95
+ ## Benchmark (52 turns, 5 conversations, 10 questions)
96
+
97
+ | Method | Context Cost | Accuracy |
98
+ |--------|-------------|----------|
99
+ | Full Context Injection | 5,198 chars (100%) | 10/10 |
100
+ | **CoMeT** | **1,397 chars (27%)** | **9/10** |
101
+ | Naive Summary | 1,179 chars (23%) | 1/10 |
102
+
103
+ - CoMeT uses **27% of the tokens** while retaining **90% accuracy**
104
+ - 6/10 questions required **link traversal** (agent read 2-3 nodes)
105
+ - Cross-topic questions: CoMeT 5/5 vs Naive 0/5
106
+
107
+ ## Quick Start
108
+
109
+ ### Session Memory (within a conversation)
110
+
111
+ ```python
112
+ from comet import CoMeT, scope
113
+
114
+ @scope
115
+ def main(config):
116
+ memo = CoMeT(config)
117
+
118
+ # Add conversation turns
119
+ memo.add("B200 4대로 월드모델 학습 가능할까?")
120
+ memo.add("2B면 충분하고 커봐야 8B")
121
+ memo.add("DPO 데이터는 negative를 syntax error로 구성했어")
122
+
123
+ # Force compact remaining buffer
124
+ memo.force_compact()
125
+
126
+ # Navigation
127
+ for node in memo.list_memories():
128
+ print(memo.read_memory(node['node_id'], depth=0))
129
+
130
+ # Agent tools (LangChain compatible)
131
+ tools = memo.get_tools()
132
+ # → get_memory_index, read_memory_node, search_memory
133
+
134
+ main()
135
+ ```
136
+
137
+ ### Cross-Session RAG Retrieval
138
+
139
+ ```python
140
+ from comet import CoMeT, scope
141
+
142
+ @scope
143
+ def main(config):
144
+ config.retrieval.vector_db_path = './memory_store/vectors'
145
+
146
+ memo = CoMeT(config)
147
+
148
+ # Ingest turns (auto-indexed to VectorIndex on compaction)
149
+ memo.add("JWT 액세스 토큰 만료는 15분, 리프레시는 7일로 설정")
150
+ memo.force_compact()
151
+
152
+ # Semantic retrieval across all sessions
153
+ results = memo.retrieve("토큰 만료 설정이 어떻게 되어있어?")
154
+ for r in results:
155
+ print(f"[{r.node.node_id}] score={r.relevance_score:.4f}")
156
+ print(f" {r.node.summary}")
157
+
158
+ # Agent tools include retrieve_memory when retrieval is configured
159
+ tools = memo.get_tools()
160
+ # → get_memory_index, read_memory_node, search_memory, retrieve_memory
161
+
162
+ main()
163
+ ```
164
+
165
+ ## Configuration ([ato](https://github.com/Dirac-Robot/ato))
166
+
167
+ ```python
168
+ # comet/config.py
169
+ @scope.observe(default=True)
170
+ def default(config):
171
+ config.slm_model = 'gpt-4o-mini'
172
+ config.main_model = 'gpt-4o'
173
+ config.compacting.load_threshold = 3
174
+ config.compacting.max_l1_buffer = 5
175
+
176
+ # RAG retrieval (enabled when retrieval block exists)
177
+ config.retrieval.embedding_model = 'text-embedding-3-small'
178
+ config.retrieval.vector_backend = 'chroma'
179
+ config.retrieval.vector_db_path = './memory_store/vectors'
180
+ config.retrieval.top_k = 5
181
+
182
+ @scope.observe()
183
+ def local_slm(config):
184
+ config.slm_model = 'ollama/gemma3:4b'
185
+
186
+ @scope.observe()
187
+ def aggressive(config):
188
+ config.compacting.load_threshold = 2
189
+ config.compacting.max_l1_buffer = 3
190
+ ```
191
+
192
+ ```bash
193
+ # Use default
194
+ python main.py
195
+
196
+ # Local SLM + aggressive compacting
197
+ python main.py local_slm aggressive
198
+ ```
199
+
200
+ ## Project Structure
201
+
202
+ ```
203
+ comet/
204
+ ├── orchestrator.py # CoMeT main class
205
+ ├── sensor.py # L1 extraction + cognitive load (SLM)
206
+ ├── compacter.py # L1→L2 structuring + auto-linking (LLM)
207
+ ├── storage.py # JSON key-value store + navigation
208
+ ├── schemas.py # MemoryNode, L1Memory, CognitiveLoad, RetrievalResult
209
+ ├── config.py # ato scope configuration
210
+ ├── vector_index.py # ChromaDB dual-collection vector store
211
+ ├── retriever.py # QueryAnalyzer + ScoreFusion + Retriever
212
+ └── templates/
213
+ ├── compacting.txt # Memory structuring prompt
214
+ └── query_analysis.txt # Query decomposition prompt
215
+ ```
@@ -0,0 +1,193 @@
1
+ # ☄️ CoMeT — Cognitive Memory Tree
2
+
3
+ **Lossless structured memory for AI agents.**
4
+
5
+ CoMeT compresses long conversations into a navigable tree of memory nodes.
6
+ Unlike naive summarization that loses details, CoMeT preserves raw data behind structured summaries — agents read summaries first, then drill into raw data only when needed.
7
+
8
+ ## Architecture
9
+
10
+ ```
11
+ User Input
12
+
13
+
14
+ ┌─────────┐ SLM (fast) ┌───────────┐
15
+ │ Sensor │ ───────────────▶ │ L1 Buffer │
16
+ └─────────┘ entity/intent └─────┬─────┘
17
+ │ cognitive load trigger
18
+
19
+ ┌───────────┐
20
+ │ Compacter │ LLM (slow)
21
+ └─────┬─────┘
22
+ │ summary + trigger + recall_mode + tags
23
+
24
+ ┌──────────┴──────────┐
25
+ │ │
26
+ ┌───────────┐ ┌─────────────┐
27
+ │ Store │ │ VectorIndex │ ChromaDB
28
+ │ depth 0-2│ │ (dual-path) │ summary + trigger
29
+ └───────────┘ └──────┬──────┘
30
+ │ semantic search
31
+
32
+ ┌───────────┐
33
+ │ Retriever │ RRF fusion
34
+ └───────────┘
35
+ ```
36
+
37
+ ### Dual-Speed Layer
38
+ - **Fast (Sensor)**: SLM extracts entities/intent per turn, detects topic shifts via cognitive load assessment
39
+ - **Slow (Compacter)**: Main LLM structures accumulated L1 buffer into `MemoryNode` with summary, trigger, recall mode, and topic tags
40
+
41
+ ### Dynamic Resolution (depth 0 → 1 → 2)
42
+
43
+ | Depth | Content | Use Case |
44
+ |-------|---------|----------|
45
+ | 0 | Summary + Trigger | Agent's initial context window |
46
+ | 1 | + Topic tags + Links | Navigation / node selection |
47
+ | 2 | Full raw data + Links | Fact retrieval |
48
+
49
+ ### Recall Mode
50
+
51
+ Each memory node is classified by `recall_mode` at compaction time:
52
+
53
+ | Mode | Behavior | Examples |
54
+ |------|----------|----------|
55
+ | `passive` | Always included in context window | User identity, persistent preferences |
56
+ | `active` | Retrieved on-demand via semantic search | Factual details, decisions, events |
57
+ | `both` | Always in context + searchable via RAG | Core constraints with retrievable details |
58
+
59
+ ### Dual-Path RAG Retrieval
60
+
61
+ CoMeT embeds both `summary` (what the node contains) and `trigger` (when to recall it) into separate vector collections. At query time:
62
+
63
+ 1. **QueryAnalyzer** decomposes the query into `semantic_query` + `search_intent`
64
+ 2. **Summary path**: matches what the information is about
65
+ 3. **Trigger path**: matches when the information would be needed
66
+ 4. **ScoreFusion** (Reciprocal Rank Fusion): merges results from both paths
67
+
68
+ Triggers are written from the **LLM's perspective** (`"내가 ~정보가 필요할 때"`) rather than user-centric, enabling broader semantic matching even without explicit user requests.
69
+
70
+ ### Topic-Aware Auto-Linking
71
+ Nodes share a global topic tag set. The compacter reuses existing tags when possible, enabling automatic bidirectional linking between related nodes across different conversation segments.
72
+
73
+ ## Benchmark (52 turns, 5 conversations, 10 questions)
74
+
75
+ | Method | Context Cost | Accuracy |
76
+ |--------|-------------|----------|
77
+ | Full Context Injection | 5,198 chars (100%) | 10/10 |
78
+ | **CoMeT** | **1,397 chars (27%)** | **9/10** |
79
+ | Naive Summary | 1,179 chars (23%) | 1/10 |
80
+
81
+ - CoMeT uses **27% of the tokens** while retaining **90% accuracy**
82
+ - 6/10 questions required **link traversal** (agent read 2-3 nodes)
83
+ - Cross-topic questions: CoMeT 5/5 vs Naive 0/5
84
+
85
+ ## Quick Start
86
+
87
+ ### Session Memory (within a conversation)
88
+
89
+ ```python
90
+ from comet import CoMeT, scope
91
+
92
+ @scope
93
+ def main(config):
94
+ memo = CoMeT(config)
95
+
96
+ # Add conversation turns
97
+ memo.add("B200 4대로 월드모델 학습 가능할까?")
98
+ memo.add("2B면 충분하고 커봐야 8B")
99
+ memo.add("DPO 데이터는 negative를 syntax error로 구성했어")
100
+
101
+ # Force compact remaining buffer
102
+ memo.force_compact()
103
+
104
+ # Navigation
105
+ for node in memo.list_memories():
106
+ print(memo.read_memory(node['node_id'], depth=0))
107
+
108
+ # Agent tools (LangChain compatible)
109
+ tools = memo.get_tools()
110
+ # → get_memory_index, read_memory_node, search_memory
111
+
112
+ main()
113
+ ```
114
+
115
+ ### Cross-Session RAG Retrieval
116
+
117
+ ```python
118
+ from comet import CoMeT, scope
119
+
120
+ @scope
121
+ def main(config):
122
+ config.retrieval.vector_db_path = './memory_store/vectors'
123
+
124
+ memo = CoMeT(config)
125
+
126
+ # Ingest turns (auto-indexed to VectorIndex on compaction)
127
+ memo.add("JWT 액세스 토큰 만료는 15분, 리프레시는 7일로 설정")
128
+ memo.force_compact()
129
+
130
+ # Semantic retrieval across all sessions
131
+ results = memo.retrieve("토큰 만료 설정이 어떻게 되어있어?")
132
+ for r in results:
133
+ print(f"[{r.node.node_id}] score={r.relevance_score:.4f}")
134
+ print(f" {r.node.summary}")
135
+
136
+ # Agent tools include retrieve_memory when retrieval is configured
137
+ tools = memo.get_tools()
138
+ # → get_memory_index, read_memory_node, search_memory, retrieve_memory
139
+
140
+ main()
141
+ ```
142
+
143
+ ## Configuration ([ato](https://github.com/Dirac-Robot/ato))
144
+
145
+ ```python
146
+ # comet/config.py
147
+ @scope.observe(default=True)
148
+ def default(config):
149
+ config.slm_model = 'gpt-4o-mini'
150
+ config.main_model = 'gpt-4o'
151
+ config.compacting.load_threshold = 3
152
+ config.compacting.max_l1_buffer = 5
153
+
154
+ # RAG retrieval (enabled when retrieval block exists)
155
+ config.retrieval.embedding_model = 'text-embedding-3-small'
156
+ config.retrieval.vector_backend = 'chroma'
157
+ config.retrieval.vector_db_path = './memory_store/vectors'
158
+ config.retrieval.top_k = 5
159
+
160
+ @scope.observe()
161
+ def local_slm(config):
162
+ config.slm_model = 'ollama/gemma3:4b'
163
+
164
+ @scope.observe()
165
+ def aggressive(config):
166
+ config.compacting.load_threshold = 2
167
+ config.compacting.max_l1_buffer = 3
168
+ ```
169
+
170
+ ```bash
171
+ # Use default
172
+ python main.py
173
+
174
+ # Local SLM + aggressive compacting
175
+ python main.py local_slm aggressive
176
+ ```
177
+
178
+ ## Project Structure
179
+
180
+ ```
181
+ comet/
182
+ ├── orchestrator.py # CoMeT main class
183
+ ├── sensor.py # L1 extraction + cognitive load (SLM)
184
+ ├── compacter.py # L1→L2 structuring + auto-linking (LLM)
185
+ ├── storage.py # JSON key-value store + navigation
186
+ ├── schemas.py # MemoryNode, L1Memory, CognitiveLoad, RetrievalResult
187
+ ├── config.py # ato scope configuration
188
+ ├── vector_index.py # ChromaDB dual-collection vector store
189
+ ├── retriever.py # QueryAnalyzer + ScoreFusion + Retriever
190
+ └── templates/
191
+ ├── compacting.txt # Memory structuring prompt
192
+ └── query_analysis.txt # Query decomposition prompt
193
+ ```
@@ -0,0 +1,27 @@
1
+ """CoMeT: Cognitive Memory OS - Dynamic Resolution Memory System"""
2
+ from comet.schemas import MemoryNode, CognitiveLoad, CoMeTState, L1Memory, RetrievalResult
3
+ from comet.sensor import CognitiveSensor
4
+ from comet.compacter import MemoryCompacter
5
+ from comet.storage import MemoryStore
6
+ from comet.vector_index import VectorIndex
7
+ from comet.retriever import Retriever
8
+ from comet.consolidator import Consolidator
9
+ from comet.orchestrator import CoMeT, MessageInput
10
+ from comet.config import scope
11
+
12
+ __all__ = [
13
+ 'CoMeT',
14
+ 'MemoryNode',
15
+ 'CognitiveLoad',
16
+ 'CoMeTState',
17
+ 'L1Memory',
18
+ 'RetrievalResult',
19
+ 'CognitiveSensor',
20
+ 'MemoryCompacter',
21
+ 'MemoryStore',
22
+ 'VectorIndex',
23
+ 'Retriever',
24
+ 'Consolidator',
25
+ 'scope',
26
+ ]
27
+