htm 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.architecture/decisions/adrs/001-use-postgresql-timescaledb-storage.md +227 -0
- data/.architecture/decisions/adrs/002-two-tier-memory-architecture.md +322 -0
- data/.architecture/decisions/adrs/003-ollama-default-embedding-provider.md +339 -0
- data/.architecture/decisions/adrs/004-multi-robot-shared-memory-hive-mind.md +374 -0
- data/.architecture/decisions/adrs/005-rag-based-retrieval-with-hybrid-search.md +443 -0
- data/.architecture/decisions/adrs/006-context-assembly-strategies.md +444 -0
- data/.architecture/decisions/adrs/007-working-memory-eviction-strategy.md +461 -0
- data/.architecture/decisions/adrs/008-robot-identification-system.md +550 -0
- data/.architecture/decisions/adrs/009-never-forget-explicit-deletion-only.md +570 -0
- data/.architecture/decisions/adrs/010-redis-working-memory-rejected.md +323 -0
- data/.architecture/decisions/adrs/011-database-side-embedding-generation-with-pgai.md +585 -0
- data/.architecture/decisions/adrs/012-llm-driven-ontology-topic-extraction.md +583 -0
- data/.architecture/decisions/adrs/013-activerecord-orm-and-many-to-many-tagging.md +299 -0
- data/.architecture/decisions/adrs/014-client-side-embedding-generation-workflow.md +569 -0
- data/.architecture/decisions/adrs/015-hierarchical-tag-ontology-and-llm-extraction.md +701 -0
- data/.architecture/decisions/adrs/016-async-embedding-and-tag-generation.md +694 -0
- data/.architecture/members.yml +144 -0
- data/.architecture/reviews/2025-10-29-llm-configuration-and-async-processing-review.md +1137 -0
- data/.architecture/reviews/initial-system-analysis.md +330 -0
- data/.envrc +32 -0
- data/.irbrc +145 -0
- data/CHANGELOG.md +150 -0
- data/COMMITS.md +196 -0
- data/LICENSE +21 -0
- data/README.md +1347 -0
- data/Rakefile +51 -0
- data/SETUP.md +268 -0
- data/config/database.yml +67 -0
- data/db/migrate/20250101000001_enable_extensions.rb +14 -0
- data/db/migrate/20250101000002_create_robots.rb +14 -0
- data/db/migrate/20250101000003_create_nodes.rb +42 -0
- data/db/migrate/20250101000005_create_tags.rb +38 -0
- data/db/migrate/20250101000007_add_node_vector_indexes.rb +30 -0
- data/db/schema.sql +473 -0
- data/db/seed_data/README.md +100 -0
- data/db/seed_data/presidents.md +136 -0
- data/db/seed_data/states.md +151 -0
- data/db/seeds.rb +208 -0
- data/dbdoc/README.md +173 -0
- data/dbdoc/public.node_stats.md +48 -0
- data/dbdoc/public.node_stats.svg +41 -0
- data/dbdoc/public.node_tags.md +40 -0
- data/dbdoc/public.node_tags.svg +112 -0
- data/dbdoc/public.nodes.md +54 -0
- data/dbdoc/public.nodes.svg +118 -0
- data/dbdoc/public.nodes_tags.md +39 -0
- data/dbdoc/public.nodes_tags.svg +112 -0
- data/dbdoc/public.ontology_structure.md +48 -0
- data/dbdoc/public.ontology_structure.svg +38 -0
- data/dbdoc/public.operations_log.md +42 -0
- data/dbdoc/public.operations_log.svg +130 -0
- data/dbdoc/public.relationships.md +39 -0
- data/dbdoc/public.relationships.svg +41 -0
- data/dbdoc/public.robot_activity.md +46 -0
- data/dbdoc/public.robot_activity.svg +35 -0
- data/dbdoc/public.robots.md +35 -0
- data/dbdoc/public.robots.svg +90 -0
- data/dbdoc/public.schema_migrations.md +29 -0
- data/dbdoc/public.schema_migrations.svg +26 -0
- data/dbdoc/public.tags.md +35 -0
- data/dbdoc/public.tags.svg +60 -0
- data/dbdoc/public.topic_relationships.md +45 -0
- data/dbdoc/public.topic_relationships.svg +32 -0
- data/dbdoc/schema.json +1437 -0
- data/dbdoc/schema.svg +154 -0
- data/docs/api/database.md +806 -0
- data/docs/api/embedding-service.md +532 -0
- data/docs/api/htm.md +797 -0
- data/docs/api/index.md +259 -0
- data/docs/api/long-term-memory.md +1096 -0
- data/docs/api/working-memory.md +665 -0
- data/docs/architecture/adrs/001-postgresql-timescaledb.md +314 -0
- data/docs/architecture/adrs/002-two-tier-memory.md +411 -0
- data/docs/architecture/adrs/003-ollama-embeddings.md +421 -0
- data/docs/architecture/adrs/004-hive-mind.md +437 -0
- data/docs/architecture/adrs/005-rag-retrieval.md +531 -0
- data/docs/architecture/adrs/006-context-assembly.md +496 -0
- data/docs/architecture/adrs/007-eviction-strategy.md +645 -0
- data/docs/architecture/adrs/008-robot-identification.md +625 -0
- data/docs/architecture/adrs/009-never-forget.md +648 -0
- data/docs/architecture/adrs/010-redis-working-memory-rejected.md +323 -0
- data/docs/architecture/adrs/011-pgai-integration.md +494 -0
- data/docs/architecture/adrs/index.md +215 -0
- data/docs/architecture/hive-mind.md +736 -0
- data/docs/architecture/index.md +351 -0
- data/docs/architecture/overview.md +538 -0
- data/docs/architecture/two-tier-memory.md +873 -0
- data/docs/assets/css/custom.css +83 -0
- data/docs/assets/images/htm-core-components.svg +63 -0
- data/docs/assets/images/htm-database-schema.svg +93 -0
- data/docs/assets/images/htm-hive-mind-architecture.svg +125 -0
- data/docs/assets/images/htm-importance-scoring-framework.svg +83 -0
- data/docs/assets/images/htm-layered-architecture.svg +71 -0
- data/docs/assets/images/htm-long-term-memory-architecture.svg +115 -0
- data/docs/assets/images/htm-working-memory-architecture.svg +120 -0
- data/docs/assets/images/htm.jpg +0 -0
- data/docs/assets/images/htm_demo.gif +0 -0
- data/docs/assets/js/mathjax.js +18 -0
- data/docs/assets/videos/htm_video.mp4 +0 -0
- data/docs/database_rake_tasks.md +322 -0
- data/docs/development/contributing.md +787 -0
- data/docs/development/index.md +336 -0
- data/docs/development/schema.md +596 -0
- data/docs/development/setup.md +719 -0
- data/docs/development/testing.md +819 -0
- data/docs/guides/adding-memories.md +824 -0
- data/docs/guides/context-assembly.md +1009 -0
- data/docs/guides/getting-started.md +577 -0
- data/docs/guides/index.md +118 -0
- data/docs/guides/long-term-memory.md +941 -0
- data/docs/guides/multi-robot.md +866 -0
- data/docs/guides/recalling-memories.md +927 -0
- data/docs/guides/search-strategies.md +953 -0
- data/docs/guides/working-memory.md +717 -0
- data/docs/index.md +214 -0
- data/docs/installation.md +477 -0
- data/docs/multi_framework_support.md +519 -0
- data/docs/quick-start.md +655 -0
- data/docs/setup_local_database.md +302 -0
- data/docs/using_rake_tasks_in_your_app.md +383 -0
- data/examples/basic_usage.rb +93 -0
- data/examples/cli_app/README.md +317 -0
- data/examples/cli_app/htm_cli.rb +270 -0
- data/examples/custom_llm_configuration.rb +183 -0
- data/examples/example_app/Rakefile +71 -0
- data/examples/example_app/app.rb +206 -0
- data/examples/sinatra_app/Gemfile +21 -0
- data/examples/sinatra_app/app.rb +335 -0
- data/lib/htm/active_record_config.rb +113 -0
- data/lib/htm/configuration.rb +342 -0
- data/lib/htm/database.rb +594 -0
- data/lib/htm/embedding_service.rb +115 -0
- data/lib/htm/errors.rb +34 -0
- data/lib/htm/job_adapter.rb +154 -0
- data/lib/htm/jobs/generate_embedding_job.rb +65 -0
- data/lib/htm/jobs/generate_tags_job.rb +82 -0
- data/lib/htm/long_term_memory.rb +965 -0
- data/lib/htm/models/node.rb +109 -0
- data/lib/htm/models/node_tag.rb +33 -0
- data/lib/htm/models/robot.rb +52 -0
- data/lib/htm/models/tag.rb +76 -0
- data/lib/htm/railtie.rb +76 -0
- data/lib/htm/sinatra.rb +157 -0
- data/lib/htm/tag_service.rb +135 -0
- data/lib/htm/tasks.rb +38 -0
- data/lib/htm/version.rb +5 -0
- data/lib/htm/working_memory.rb +182 -0
- data/lib/htm.rb +400 -0
- data/lib/tasks/db.rake +19 -0
- data/lib/tasks/htm.rake +147 -0
- data/lib/tasks/jobs.rake +312 -0
- data/mkdocs.yml +190 -0
- data/scripts/install_local_database.sh +309 -0
- metadata +341 -0
|
@@ -0,0 +1,873 @@
|
|
|
1
|
+
# Two-Tier Memory System
|
|
2
|
+
|
|
3
|
+
HTM implements a sophisticated two-tier memory architecture that balances the competing needs of fast access (working memory) and unlimited retention (long-term memory). This document provides a comprehensive deep dive into both tiers, their interactions, and optimization strategies.
|
|
4
|
+
|
|
5
|
+
## Overview
|
|
6
|
+
|
|
7
|
+
The two-tier architecture addresses a fundamental challenge in LLM-based applications: LLMs have limited context windows but need to maintain awareness across long conversations spanning days, weeks, or months.
|
|
8
|
+
|
|
9
|
+
<svg viewBox="0 0 800 500" xmlns="http://www.w3.org/2000/svg" style="background: transparent;">
|
|
10
|
+
<!-- Title -->
|
|
11
|
+
<text x="400" y="30" text-anchor="middle" fill="#E0E0E0" font-size="18" font-weight="bold">Two-Tier Memory Architecture</text>
|
|
12
|
+
|
|
13
|
+
<!-- Working Memory (Hot Tier) -->
|
|
14
|
+
<rect x="50" y="80" width="300" height="180" fill="rgba(33, 150, 243, 0.2)" stroke="#2196F3" stroke-width="3" rx="5"/>
|
|
15
|
+
<text x="200" y="110" text-anchor="middle" fill="#E0E0E0" font-size="16" font-weight="bold">Working Memory (Hot)</text>
|
|
16
|
+
<text x="80" y="140" fill="#B0B0B0" font-size="12">Capacity: Token-limited (128K)</text>
|
|
17
|
+
<text x="80" y="160" fill="#B0B0B0" font-size="12">Storage: In-memory Ruby Hash</text>
|
|
18
|
+
<text x="80" y="180" fill="#B0B0B0" font-size="12">Speed: O(1) lookups</text>
|
|
19
|
+
<text x="80" y="200" fill="#B0B0B0" font-size="12">Lifetime: Process lifetime</text>
|
|
20
|
+
<text x="80" y="220" fill="#B0B0B0" font-size="12">Eviction: Importance + Recency</text>
|
|
21
|
+
<text x="80" y="240" fill="#4CAF50" font-size="12" font-weight="bold">Fast, Token-Aware, Volatile</text>
|
|
22
|
+
|
|
23
|
+
<!-- Long-Term Memory (Cold Tier) -->
|
|
24
|
+
<rect x="450" y="80" width="300" height="180" fill="rgba(156, 39, 176, 0.2)" stroke="#9C27B0" stroke-width="3" rx="5"/>
|
|
25
|
+
<text x="600" y="110" text-anchor="middle" fill="#E0E0E0" font-size="16" font-weight="bold">Long-Term Memory (Cold)</text>
|
|
26
|
+
<text x="480" y="140" fill="#B0B0B0" font-size="12">Capacity: Unlimited</text>
|
|
27
|
+
<text x="480" y="160" fill="#B0B0B0" font-size="12">Storage: PostgreSQL + TimescaleDB</text>
|
|
28
|
+
<text x="480" y="180" fill="#B0B0B0" font-size="12">Speed: O(log n) with indexes</text>
|
|
29
|
+
<text x="480" y="200" fill="#B0B0B0" font-size="12">Lifetime: Permanent</text>
|
|
30
|
+
<text x="480" y="220" fill="#B0B0B0" font-size="12">Retrieval: RAG (semantic + temporal)</text>
|
|
31
|
+
<text x="480" y="240" fill="#4CAF50" font-size="12" font-weight="bold">Durable, Searchable, Persistent</text>
|
|
32
|
+
|
|
33
|
+
<!-- Data Flow: Add Memory -->
|
|
34
|
+
<path d="M 200 280 L 200 320 L 400 320 L 400 280" stroke="#4CAF50" stroke-width="3" fill="none" marker-end="url(#arrow-green)"/>
|
|
35
|
+
<text x="300" y="310" text-anchor="middle" fill="#4CAF50" font-size="12" font-weight="bold">Add Memory</text>
|
|
36
|
+
<text x="300" y="330" text-anchor="middle" fill="#B0B0B0" font-size="10">(Stored in both tiers)</text>
|
|
37
|
+
|
|
38
|
+
<!-- Data Flow: Eviction -->
|
|
39
|
+
<path d="M 350 360 L 600 360" stroke="#FF9800" stroke-width="3" marker-end="url(#arrow-orange)"/>
|
|
40
|
+
<text x="475" y="350" text-anchor="middle" fill="#FF9800" font-size="12" font-weight="bold">Eviction</text>
|
|
41
|
+
<text x="475" y="380" text-anchor="middle" fill="#B0B0B0" font-size="10">(Token limit → move to LTM only)</text>
|
|
42
|
+
|
|
43
|
+
<!-- Data Flow: Recall -->
|
|
44
|
+
<path d="M 600 400 L 200 400" stroke="#9C27B0" stroke-width="3" marker-end="url(#arrow-purple)"/>
|
|
45
|
+
<text x="400" y="390" text-anchor="middle" fill="#9C27B0" font-size="12" font-weight="bold">Recall</text>
|
|
46
|
+
<text x="400" y="420" text-anchor="middle" fill="#B0B0B0" font-size="10">(RAG search → load back to WM)</text>
|
|
47
|
+
|
|
48
|
+
<!-- Never Forget Note -->
|
|
49
|
+
<rect x="150" y="450" width="500" height="40" fill="rgba(76, 175, 80, 0.1)" stroke="#4CAF50" stroke-width="1" rx="3"/>
|
|
50
|
+
<text x="400" y="475" text-anchor="middle" fill="#4CAF50" font-size="13" font-weight="bold">Never Forget: Evicted memories stay in LTM forever (explicit deletion only)</text>
|
|
51
|
+
|
|
52
|
+
<defs>
|
|
53
|
+
<marker id="arrow-green" markerWidth="10" markerHeight="10" refX="9" refY="3" orient="auto">
|
|
54
|
+
<polygon points="0 0, 10 3, 0 6" fill="#4CAF50"/>
|
|
55
|
+
</marker>
|
|
56
|
+
<marker id="arrow-orange" markerWidth="10" markerHeight="10" refX="9" refY="3" orient="auto">
|
|
57
|
+
<polygon points="0 0, 10 3, 0 6" fill="#FF9800"/>
|
|
58
|
+
</marker>
|
|
59
|
+
<marker id="arrow-purple" markerWidth="10" markerHeight="10" refX="9" refY="3" orient="auto">
|
|
60
|
+
<polygon points="0 0, 10 3, 0 6" fill="#9C27B0"/>
|
|
61
|
+
</marker>
|
|
62
|
+
</defs>
|
|
63
|
+
</svg>
|
|
64
|
+
|
|
65
|
+
!!! info "Related ADR"
|
|
66
|
+
See [ADR-002: Two-Tier Memory Architecture](adrs/002-two-tier-memory.md) for the complete architectural decision record.
|
|
67
|
+
|
|
68
|
+
## Working Memory (Hot Tier)
|
|
69
|
+
|
|
70
|
+
Working memory is a token-limited, in-memory cache for recently accessed or highly important memories. It provides O(1) access times for the LLM's immediate context needs.
|
|
71
|
+
|
|
72
|
+
### Design Characteristics
|
|
73
|
+
|
|
74
|
+
| Aspect | Details |
|
|
75
|
+
|--------|---------|
|
|
76
|
+
| **Purpose** | Immediate context for LLM consumption |
|
|
77
|
+
| **Capacity** | Token-limited (default: 128,000 tokens) |
|
|
78
|
+
| **Storage** | Ruby Hash: `{ key => node }` |
|
|
79
|
+
| **Access Pattern** | Frequent reads, moderate writes |
|
|
80
|
+
| **Eviction Policy** | Hybrid importance + recency (LRU-based) |
|
|
81
|
+
| **Lifetime** | Process lifetime (cleared on restart) |
|
|
82
|
+
| **Performance** | O(1) hash lookups, O(n log n) eviction |
|
|
83
|
+
|
|
84
|
+
### Data Structure
|
|
85
|
+
|
|
86
|
+
```ruby
|
|
87
|
+
class WorkingMemory
|
|
88
|
+
def initialize(max_tokens:)
|
|
89
|
+
@max_tokens = max_tokens
|
|
90
|
+
@nodes = {} # key => node_data
|
|
91
|
+
@access_order = [] # LRU tracking
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
# Node structure
|
|
95
|
+
# {
|
|
96
|
+
# value: "Memory content",
|
|
97
|
+
# token_count: 150,
|
|
98
|
+
# importance: 5.0,
|
|
99
|
+
# added_at: Time.now,
|
|
100
|
+
# from_recall: false
|
|
101
|
+
# }
|
|
102
|
+
end
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
### Why Token-Limited?
|
|
106
|
+
|
|
107
|
+
LLMs have finite context windows. Even with 200K token models, managing token budgets is critical:
|
|
108
|
+
|
|
109
|
+
- **Prevent context overflow**: Ensure working memory fits in LLM context
|
|
110
|
+
- **Cost control**: API-based LLMs charge per token
|
|
111
|
+
- **Focus attention**: Too much context dilutes LLM focus
|
|
112
|
+
- **Performance**: Smaller context = faster LLM inference
|
|
113
|
+
|
|
114
|
+
!!! tip "Token Budget Strategy"
|
|
115
|
+
Working memory token limit should be **60-70% of LLM context window** to leave room for system prompts, user queries, and LLM responses.
|
|
116
|
+
|
|
117
|
+
### Working Memory Operations
|
|
118
|
+
|
|
119
|
+
#### Add Node
|
|
120
|
+
|
|
121
|
+
```ruby
|
|
122
|
+
def add(key, value, token_count:, importance: 1.0, from_recall: false)
|
|
123
|
+
# Check if eviction needed
|
|
124
|
+
if token_count + current_tokens > @max_tokens
|
|
125
|
+
evict_to_make_space(token_count)
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
# Add to working memory
|
|
129
|
+
@nodes[key] = {
|
|
130
|
+
value: value,
|
|
131
|
+
token_count: token_count,
|
|
132
|
+
importance: importance,
|
|
133
|
+
added_at: Time.now,
|
|
134
|
+
from_recall: from_recall
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
# Track access order (LRU)
|
|
138
|
+
update_access(key)
|
|
139
|
+
end
|
|
140
|
+
```
|
|
141
|
+
|
|
142
|
+
**Time Complexity:** O(1) amortized (eviction is O(n log n) when triggered)
|
|
143
|
+
|
|
144
|
+
#### Retrieve Node
|
|
145
|
+
|
|
146
|
+
```ruby
|
|
147
|
+
def retrieve(key)
|
|
148
|
+
return nil unless @nodes.key?(key)
|
|
149
|
+
|
|
150
|
+
# Update access order
|
|
151
|
+
update_access(key)
|
|
152
|
+
|
|
153
|
+
@nodes[key]
|
|
154
|
+
end
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
**Time Complexity:** O(1)
|
|
158
|
+
|
|
159
|
+
#### Remove Node
|
|
160
|
+
|
|
161
|
+
```ruby
|
|
162
|
+
def remove(key)
|
|
163
|
+
@nodes.delete(key)
|
|
164
|
+
@access_order.delete(key)
|
|
165
|
+
end
|
|
166
|
+
```
|
|
167
|
+
|
|
168
|
+
**Time Complexity:** O(1) for hash, O(n) for access_order
|
|
169
|
+
|
|
170
|
+
### Eviction Strategy
|
|
171
|
+
|
|
172
|
+
When working memory exceeds its token limit, HTM evicts nodes based on a hybrid importance + recency score.
|
|
173
|
+
|
|
174
|
+
#### Eviction Algorithm
|
|
175
|
+
|
|
176
|
+
```ruby
|
|
177
|
+
def evict_to_make_space(needed_tokens)
|
|
178
|
+
evicted = []
|
|
179
|
+
tokens_freed = 0
|
|
180
|
+
|
|
181
|
+
# Sort by [importance ASC, age DESC]
|
|
182
|
+
# Lower importance evicted first
|
|
183
|
+
# Within same importance, older evicted first
|
|
184
|
+
candidates = @nodes.sort_by do |key, node|
|
|
185
|
+
recency = Time.now - node[:added_at]
|
|
186
|
+
[node[:importance], -recency]
|
|
187
|
+
end
|
|
188
|
+
|
|
189
|
+
# Greedy eviction: stop when enough space
|
|
190
|
+
candidates.each do |key, node|
|
|
191
|
+
break if tokens_freed >= needed_tokens
|
|
192
|
+
|
|
193
|
+
evicted << { key: key, value: node[:value] }
|
|
194
|
+
tokens_freed += node[:token_count]
|
|
195
|
+
@nodes.delete(key)
|
|
196
|
+
@access_order.delete(key)
|
|
197
|
+
end
|
|
198
|
+
|
|
199
|
+
evicted
|
|
200
|
+
end
|
|
201
|
+
```
|
|
202
|
+
|
|
203
|
+
#### Eviction Priority
|
|
204
|
+
|
|
205
|
+
Nodes are evicted in this order:
|
|
206
|
+
|
|
207
|
+
1. **Low importance, old** (e.g., importance: 1.0, age: 5 days)
|
|
208
|
+
2. **Low importance, recent** (e.g., importance: 1.0, age: 1 hour)
|
|
209
|
+
3. **High importance, old** (e.g., importance: 9.0, age: 5 days)
|
|
210
|
+
4. **High importance, recent** (e.g., importance: 9.0, age: 1 hour) ← **Kept longest**
|
|
211
|
+
|
|
212
|
+
<svg viewBox="0 0 800 400" xmlns="http://www.w3.org/2000/svg" style="background: transparent;">
|
|
213
|
+
<!-- Title -->
|
|
214
|
+
<text x="400" y="30" text-anchor="middle" fill="#E0E0E0" font-size="16" font-weight="bold">Eviction Priority (Lower → Higher retention)</text>
|
|
215
|
+
|
|
216
|
+
<!-- Priority bars -->
|
|
217
|
+
<rect x="50" y="80" width="150" height="50" fill="rgba(244, 67, 54, 0.6)" stroke="#F44336" stroke-width="2" rx="3"/>
|
|
218
|
+
<text x="125" y="110" text-anchor="middle" fill="#E0E0E0" font-size="12" font-weight="bold">Tier 1: Evict First</text>
|
|
219
|
+
|
|
220
|
+
<rect x="220" y="80" width="150" height="50" fill="rgba(255, 152, 0, 0.6)" stroke="#FF9800" stroke-width="2" rx="3"/>
|
|
221
|
+
<text x="295" y="110" text-anchor="middle" fill="#E0E0E0" font-size="12" font-weight="bold">Tier 2</text>
|
|
222
|
+
|
|
223
|
+
<rect x="390" y="80" width="150" height="50" fill="rgba(255, 193, 7, 0.6)" stroke="#FFC107" stroke-width="2" rx="3"/>
|
|
224
|
+
<text x="465" y="110" text-anchor="middle" fill="#E0E0E0" font-size="12" font-weight="bold">Tier 3</text>
|
|
225
|
+
|
|
226
|
+
<rect x="560" y="80" width="150" height="50" fill="rgba(76, 175, 80, 0.6)" stroke="#4CAF50" stroke-width="2" rx="3"/>
|
|
227
|
+
<text x="635" y="110" text-anchor="middle" fill="#E0E0E0" font-size="12" font-weight="bold">Tier 4: Keep Longest</text>
|
|
228
|
+
|
|
229
|
+
<!-- Details -->
|
|
230
|
+
<text x="125" y="160" text-anchor="middle" fill="#B0B0B0" font-size="11">Importance: 1.0</text>
|
|
231
|
+
<text x="125" y="180" text-anchor="middle" fill="#B0B0B0" font-size="11">Age: 5 days</text>
|
|
232
|
+
<text x="125" y="200" text-anchor="middle" fill="#F44336" font-size="10" font-weight="bold">Low value, stale</text>
|
|
233
|
+
|
|
234
|
+
<text x="295" y="160" text-anchor="middle" fill="#B0B0B0" font-size="11">Importance: 1.0</text>
|
|
235
|
+
<text x="295" y="180" text-anchor="middle" fill="#B0B0B0" font-size="11">Age: 1 hour</text>
|
|
236
|
+
<text x="295" y="200" text-anchor="middle" fill="#FF9800" font-size="10" font-weight="bold">Low value, recent</text>
|
|
237
|
+
|
|
238
|
+
<text x="465" y="160" text-anchor="middle" fill="#B0B0B0" font-size="11">Importance: 9.0</text>
|
|
239
|
+
<text x="465" y="180" text-anchor="middle" fill="#B0B0B0" font-size="11">Age: 5 days</text>
|
|
240
|
+
<text x="465" y="200" text-anchor="middle" fill="#FFC107" font-size="10" font-weight="bold">High value, older</text>
|
|
241
|
+
|
|
242
|
+
<text x="635" y="160" text-anchor="middle" fill="#B0B0B0" font-size="11">Importance: 9.0</text>
|
|
243
|
+
<text x="635" y="180" text-anchor="middle" fill="#B0B0B0" font-size="11">Age: 1 hour</text>
|
|
244
|
+
<text x="635" y="200" text-anchor="middle" fill="#4CAF50" font-size="10" font-weight="bold">High value, fresh</text>
|
|
245
|
+
|
|
246
|
+
<!-- Example scenario -->
|
|
247
|
+
<text x="50" y="250" fill="#E0E0E0" font-size="13" font-weight="bold">Example Eviction Scenario:</text>
|
|
248
|
+
<text x="50" y="280" fill="#B0B0B0" font-size="11">Working Memory: 127,500 / 128,000 tokens (99% full)</text>
|
|
249
|
+
<text x="50" y="300" fill="#B0B0B0" font-size="11">New memory to add: 5,000 tokens</text>
|
|
250
|
+
<text x="50" y="320" fill="#B0B0B0" font-size="11">Need to free: 4,500 tokens</text>
|
|
251
|
+
|
|
252
|
+
<text x="50" y="350" fill="#4CAF50" font-size="11">Eviction: Remove Tier 1 and Tier 2 nodes until 4,500+ tokens freed</text>
|
|
253
|
+
<text x="50" y="370" fill="#4CAF50" font-size="11">Result: Tier 3 and Tier 4 nodes preserved (high importance)</text>
|
|
254
|
+
</svg>
|
|
255
|
+
|
|
256
|
+
!!! warning "Importance Matters"
|
|
257
|
+
**Assign meaningful importance scores!** Low-importance memories (1.0-3.0) will be evicted first. Use higher scores (7.0-10.0) for critical information like architectural decisions, user preferences, and long-term facts.
|
|
258
|
+
|
|
259
|
+
!!! info "Related ADR"
|
|
260
|
+
See [ADR-007: Working Memory Eviction Strategy](adrs/007-eviction-strategy.md) for detailed rationale and alternatives considered.
|
|
261
|
+
|
|
262
|
+
### Context Assembly Strategies
|
|
263
|
+
|
|
264
|
+
Working memory provides three strategies for assembling context strings for LLM consumption:
|
|
265
|
+
|
|
266
|
+
#### 1. Recent (`:recent`)
|
|
267
|
+
|
|
268
|
+
Sort by access order, most recently accessed first.
|
|
269
|
+
|
|
270
|
+
```ruby
|
|
271
|
+
def assemble_context(strategy: :recent, max_tokens: nil)
|
|
272
|
+
nodes = @access_order.reverse.map { |key| @nodes[key] }
|
|
273
|
+
build_context(nodes, max_tokens || @max_tokens)
|
|
274
|
+
end
|
|
275
|
+
```
|
|
276
|
+
|
|
277
|
+
**Best For:**
|
|
278
|
+
|
|
279
|
+
- Conversational continuity
|
|
280
|
+
- Chat interfaces
|
|
281
|
+
- Following current discussion thread
|
|
282
|
+
- Debugging sessions
|
|
283
|
+
|
|
284
|
+
**Example Use Case:**
|
|
285
|
+
|
|
286
|
+
```ruby
|
|
287
|
+
# User having back-and-forth coding conversation
|
|
288
|
+
context = htm.create_context(strategy: :recent, max_tokens: 8000)
|
|
289
|
+
# Recent messages prioritized for coherent conversation flow
|
|
290
|
+
```
|
|
291
|
+
|
|
292
|
+
#### 2. Important (`:important`)
|
|
293
|
+
|
|
294
|
+
Sort by importance score, highest first.
|
|
295
|
+
|
|
296
|
+
```ruby
|
|
297
|
+
def assemble_context(strategy: :important, max_tokens: nil)
|
|
298
|
+
nodes = @nodes.sort_by { |k, v| -v[:importance] }.map(&:last)
|
|
299
|
+
build_context(nodes, max_tokens || @max_tokens)
|
|
300
|
+
end
|
|
301
|
+
```
|
|
302
|
+
|
|
303
|
+
**Best For:**
|
|
304
|
+
|
|
305
|
+
- Strategic planning
|
|
306
|
+
- Architectural decisions
|
|
307
|
+
- Summarization tasks
|
|
308
|
+
- Key facts retrieval
|
|
309
|
+
|
|
310
|
+
**Example Use Case:**
|
|
311
|
+
|
|
312
|
+
```ruby
|
|
313
|
+
# LLM helping with architectural review
|
|
314
|
+
context = htm.create_context(strategy: :important)
|
|
315
|
+
# Critical decisions and facts prioritized over recent chat
|
|
316
|
+
```
|
|
317
|
+
|
|
318
|
+
#### 3. Balanced (`:balanced`) - **Recommended Default**
|
|
319
|
+
|
|
320
|
+
Hybrid scoring with time decay: `importance * (1.0 / (1 + recency_hours))`
|
|
321
|
+
|
|
322
|
+
```ruby
|
|
323
|
+
def assemble_context(strategy: :balanced, max_tokens: nil)
|
|
324
|
+
nodes = @nodes.sort_by { |k, v|
|
|
325
|
+
recency_hours = (Time.now - v[:added_at]) / 3600.0
|
|
326
|
+
score = v[:importance] * (1.0 / (1 + recency_hours))
|
|
327
|
+
-score # Descending
|
|
328
|
+
}.map(&:last)
|
|
329
|
+
|
|
330
|
+
build_context(nodes, max_tokens || @max_tokens)
|
|
331
|
+
end
|
|
332
|
+
```
|
|
333
|
+
|
|
334
|
+
**Decay Function:**
|
|
335
|
+
|
|
336
|
+
- **Just added (0 hours):** `importance * 1.0` (full weight)
|
|
337
|
+
- **1 hour old:** `importance * 0.5` (half weight)
|
|
338
|
+
- **3 hours old:** `importance * 0.25` (quarter weight)
|
|
339
|
+
- **24 hours old:** `importance * 0.04` (4% weight)
|
|
340
|
+
|
|
341
|
+
**Best For:**
|
|
342
|
+
|
|
343
|
+
- General-purpose LLM interactions
|
|
344
|
+
- Mixed conversational + strategic tasks
|
|
345
|
+
- Default strategy when unsure
|
|
346
|
+
|
|
347
|
+
**Example Use Case:**
|
|
348
|
+
|
|
349
|
+
```ruby
|
|
350
|
+
# Code helper assisting with debugging and design
|
|
351
|
+
context = htm.create_context(strategy: :balanced)
|
|
352
|
+
# Recent debugging context + important architectural decisions
|
|
353
|
+
```
|
|
354
|
+
|
|
355
|
+
<svg viewBox="0 0 800 400" xmlns="http://www.w3.org/2000/svg" style="background: transparent;">
|
|
356
|
+
<!-- Title -->
|
|
357
|
+
<text x="400" y="30" text-anchor="middle" fill="#E0E0E0" font-size="16" font-weight="bold">Balanced Strategy: Importance Decay Over Time</text>
|
|
358
|
+
|
|
359
|
+
<!-- Axes -->
|
|
360
|
+
<line x1="100" y1="350" x2="700" y2="350" stroke="#808080" stroke-width="2"/>
|
|
361
|
+
<line x1="100" y1="350" x2="100" y2="80" stroke="#808080" stroke-width="2"/>
|
|
362
|
+
|
|
363
|
+
<!-- X-axis labels -->
|
|
364
|
+
<text x="100" y="375" text-anchor="middle" fill="#B0B0B0" font-size="11">0h</text>
|
|
365
|
+
<text x="250" y="375" text-anchor="middle" fill="#B0B0B0" font-size="11">1h</text>
|
|
366
|
+
<text x="400" y="375" text-anchor="middle" fill="#B0B0B0" font-size="11">3h</text>
|
|
367
|
+
<text x="550" y="375" text-anchor="middle" fill="#B0B0B0" font-size="11">6h</text>
|
|
368
|
+
<text x="700" y="375" text-anchor="middle" fill="#B0B0B0" font-size="11">24h</text>
|
|
369
|
+
<text x="400" y="395" text-anchor="middle" fill="#E0E0E0" font-size="12" font-weight="bold">Time Since Added (hours)</text>
|
|
370
|
+
|
|
371
|
+
<!-- Y-axis labels -->
|
|
372
|
+
<text x="85" y="355" text-anchor="end" fill="#B0B0B0" font-size="11">0</text>
|
|
373
|
+
<text x="85" y="280" text-anchor="end" fill="#B0B0B0" font-size="11">3</text>
|
|
374
|
+
<text x="85" y="205" text-anchor="end" fill="#B0B0B0" font-size="11">6</text>
|
|
375
|
+
<text x="85" y="130" text-anchor="end" fill="#B0B0B0" font-size="11">9</text>
|
|
376
|
+
<text x="85" y="85" text-anchor="end" fill="#B0B0B0" font-size="11">10</text>
|
|
377
|
+
<text x="40" y="220" text-anchor="middle" fill="#E0E0E0" font-size="12" font-weight="bold" transform="rotate(-90 40 220)">Effective Score</text>
|
|
378
|
+
|
|
379
|
+
<!-- Decay curves for different importance levels -->
|
|
380
|
+
<!-- Importance 10.0 -->
|
|
381
|
+
<path d="M 100 80 Q 250 105 400 155 T 700 320" stroke="#4CAF50" stroke-width="3" fill="none"/>
|
|
382
|
+
<text x="710" y="320" fill="#4CAF50" font-size="11" font-weight="bold">Imp: 10.0</text>
|
|
383
|
+
|
|
384
|
+
<!-- Importance 5.0 -->
|
|
385
|
+
<path d="M 100 205 Q 250 230 400 255 T 700 335" stroke="#2196F3" stroke-width="3" fill="none"/>
|
|
386
|
+
<text x="710" y="335" fill="#2196F3" font-size="11" font-weight="bold">Imp: 5.0</text>
|
|
387
|
+
|
|
388
|
+
<!-- Importance 1.0 -->
|
|
389
|
+
<path d="M 100 330 Q 250 340 400 345 T 700 348" stroke="#FF9800" stroke-width="3" fill="none"/>
|
|
390
|
+
<text x="710" y="348" fill="#FF9800" font-size="11" font-weight="bold">Imp: 1.0</text>
|
|
391
|
+
|
|
392
|
+
<!-- Key insight -->
|
|
393
|
+
<rect x="150" y="50" width="500" height="25" fill="rgba(76, 175, 80, 0.1)" stroke="#4CAF50" stroke-width="1" rx="3"/>
|
|
394
|
+
<text x="400" y="68" text-anchor="middle" fill="#4CAF50" font-size="12">High-importance memories retain value longer, but recency still matters</text>
|
|
395
|
+
</svg>
|
|
396
|
+
|
|
397
|
+
!!! info "Related ADR"
|
|
398
|
+
See [ADR-006: Context Assembly Strategies](adrs/006-context-assembly.md) for detailed strategy analysis.
|
|
399
|
+
|
|
400
|
+
### Performance Characteristics
|
|
401
|
+
|
|
402
|
+
| Operation | Time Complexity | Typical Latency |
|
|
403
|
+
|-----------|----------------|-----------------|
|
|
404
|
+
| Add node | O(1) amortized | < 1ms |
|
|
405
|
+
| Retrieve node | O(1) | < 1ms |
|
|
406
|
+
| Eviction (when needed) | O(n log n) | < 10ms (for 200 nodes) |
|
|
407
|
+
| Context assembly | O(n log n) | < 10ms (for 200 nodes) |
|
|
408
|
+
| Check space | O(n) | < 1ms |
|
|
409
|
+
|
|
410
|
+
**Memory Usage:**
|
|
411
|
+
|
|
412
|
+
- Empty working memory: ~1KB
|
|
413
|
+
- 100 nodes (avg 500 tokens each): ~50KB metadata + node content
|
|
414
|
+
- 200 nodes (128K tokens): ~2-5MB total (including Ruby overhead)
|
|
415
|
+
|
|
416
|
+
## Long-Term Memory (Cold Tier)
|
|
417
|
+
|
|
418
|
+
Long-term memory provides unlimited, durable storage for all memories with advanced retrieval capabilities using RAG (Retrieval-Augmented Generation) patterns.
|
|
419
|
+
|
|
420
|
+
### Design Characteristics
|
|
421
|
+
|
|
422
|
+
| Aspect | Details |
|
|
423
|
+
|--------|---------|
|
|
424
|
+
| **Purpose** | Permanent knowledge base |
|
|
425
|
+
| **Capacity** | Effectively unlimited |
|
|
426
|
+
| **Storage** | PostgreSQL 16+ with TimescaleDB |
|
|
427
|
+
| **Access Pattern** | RAG-based retrieval (semantic + temporal) |
|
|
428
|
+
| **Retention** | Permanent (explicit deletion only) |
|
|
429
|
+
| **Lifetime** | Forever (survives process restarts) |
|
|
430
|
+
| **Performance** | O(log n) with indexes and HNSW |
|
|
431
|
+
|
|
432
|
+
### Database Schema (Simplified)
|
|
433
|
+
|
|
434
|
+
```sql
|
|
435
|
+
CREATE TABLE nodes (
|
|
436
|
+
id BIGSERIAL PRIMARY KEY,
|
|
437
|
+
key TEXT UNIQUE NOT NULL,
|
|
438
|
+
value TEXT NOT NULL,
|
|
439
|
+
type TEXT,
|
|
440
|
+
importance REAL DEFAULT 1.0,
|
|
441
|
+
token_count INTEGER,
|
|
442
|
+
in_working_memory BOOLEAN DEFAULT FALSE,
|
|
443
|
+
robot_id TEXT NOT NULL REFERENCES robots(id),
|
|
444
|
+
embedding vector(1536),
|
|
445
|
+
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
446
|
+
...
|
|
447
|
+
);
|
|
448
|
+
|
|
449
|
+
-- HNSW index for vector similarity
|
|
450
|
+
CREATE INDEX idx_nodes_embedding ON nodes
|
|
451
|
+
USING hnsw (embedding vector_cosine_ops)
|
|
452
|
+
WITH (m = 16, ef_construction = 64);
|
|
453
|
+
|
|
454
|
+
-- Full-text search
|
|
455
|
+
CREATE INDEX idx_nodes_value_gin ON nodes
|
|
456
|
+
USING gin(to_tsvector('english', value));
|
|
457
|
+
```
|
|
458
|
+
|
|
459
|
+
### Why PostgreSQL + TimescaleDB?
|
|
460
|
+
|
|
461
|
+
**PostgreSQL provides:**
|
|
462
|
+
|
|
463
|
+
- ACID guarantees for data integrity
|
|
464
|
+
- Rich ecosystem and tooling
|
|
465
|
+
- pgvector for vector similarity search
|
|
466
|
+
- Full-text search with GIN indexes
|
|
467
|
+
- Mature, production-proven
|
|
468
|
+
|
|
469
|
+
**TimescaleDB adds:**
|
|
470
|
+
|
|
471
|
+
- Hypertable partitioning by time
|
|
472
|
+
- Automatic compression (70-90% reduction)
|
|
473
|
+
- Time-range query optimization
|
|
474
|
+
- Retention policies for data lifecycle
|
|
475
|
+
|
|
476
|
+
!!! info "Related ADR"
|
|
477
|
+
See [ADR-001: Use PostgreSQL with TimescaleDB for Storage](adrs/001-postgresql-timescaledb.md) for complete rationale.
|
|
478
|
+
|
|
479
|
+
### Long-Term Memory Operations
|
|
480
|
+
|
|
481
|
+
#### Add Node
|
|
482
|
+
|
|
483
|
+
```ruby
|
|
484
|
+
def add(key:, value:, embedding:, importance:, token_count:, robot_id:, type: nil)
|
|
485
|
+
result = @db.exec_params(<<~SQL, [key, value, type, importance, token_count, robot_id, embedding])
|
|
486
|
+
INSERT INTO nodes (key, value, type, importance, token_count, robot_id, embedding, in_working_memory)
|
|
487
|
+
VALUES ($1, $2, $3, $4, $5, $6, $7, TRUE)
|
|
488
|
+
RETURNING id
|
|
489
|
+
SQL
|
|
490
|
+
|
|
491
|
+
result[0]['id'].to_i
|
|
492
|
+
end
|
|
493
|
+
```
|
|
494
|
+
|
|
495
|
+
**Time Complexity:** O(log n) with B-tree and HNSW indexes
|
|
496
|
+
|
|
497
|
+
#### Retrieve by Key
|
|
498
|
+
|
|
499
|
+
```ruby
|
|
500
|
+
def retrieve(key)
|
|
501
|
+
result = @db.exec_params(<<~SQL, [key])
|
|
502
|
+
SELECT * FROM nodes WHERE key = $1
|
|
503
|
+
SQL
|
|
504
|
+
|
|
505
|
+
result.first
|
|
506
|
+
end
|
|
507
|
+
```
|
|
508
|
+
|
|
509
|
+
**Time Complexity:** O(1) with unique index on key
|
|
510
|
+
|
|
511
|
+
#### Vector Search
|
|
512
|
+
|
|
513
|
+
```ruby
|
|
514
|
+
def search(timeframe:, query:, limit:, embedding_service:)
|
|
515
|
+
query_embedding = embedding_service.embed(query)
|
|
516
|
+
|
|
517
|
+
result = @db.exec_params(<<~SQL, [timeframe.begin, timeframe.end, query_embedding, limit])
|
|
518
|
+
SELECT *, embedding <=> $3 AS distance
|
|
519
|
+
FROM nodes
|
|
520
|
+
WHERE created_at BETWEEN $1 AND $2
|
|
521
|
+
ORDER BY distance ASC
|
|
522
|
+
LIMIT $4
|
|
523
|
+
SQL
|
|
524
|
+
|
|
525
|
+
result.to_a
|
|
526
|
+
end
|
|
527
|
+
```
|
|
528
|
+
|
|
529
|
+
**Time Complexity:** O(log n) with HNSW approximate nearest neighbor
|
|
530
|
+
|
|
531
|
+
#### Hybrid Search (Vector + Full-Text)
|
|
532
|
+
|
|
533
|
+
Combines vector similarity and full-text search using Reciprocal Rank Fusion (RRF):
|
|
534
|
+
|
|
535
|
+
```ruby
|
|
536
|
+
def search_hybrid(timeframe:, query:, limit:, embedding_service:)
|
|
537
|
+
query_embedding = embedding_service.embed(query)
|
|
538
|
+
|
|
539
|
+
# Get both vector and full-text results
|
|
540
|
+
vector_results = search(timeframe, query, limit * 2, embedding_service)
|
|
541
|
+
fulltext_results = search_fulltext(timeframe, query, limit * 2)
|
|
542
|
+
|
|
543
|
+
# RRF scoring
|
|
544
|
+
scores = {}
|
|
545
|
+
|
|
546
|
+
vector_results.each_with_index do |node, rank|
|
|
547
|
+
scores[node['id']] ||= 0
|
|
548
|
+
scores[node['id']] += 1.0 / (rank + 60) # RRF constant: 60
|
|
549
|
+
end
|
|
550
|
+
|
|
551
|
+
fulltext_results.each_with_index do |node, rank|
|
|
552
|
+
scores[node['id']] ||= 0
|
|
553
|
+
scores[node['id']] += 1.0 / (rank + 60)
|
|
554
|
+
end
|
|
555
|
+
|
|
556
|
+
# Sort by combined score
|
|
557
|
+
all_nodes = (vector_results + fulltext_results).uniq { |n| n['id'] }
|
|
558
|
+
all_nodes.sort_by { |n| -scores[n['id']] }.take(limit)
|
|
559
|
+
end
|
|
560
|
+
```
|
|
561
|
+
|
|
562
|
+
!!! info "Related ADR"
|
|
563
|
+
See [ADR-005: RAG-Based Retrieval with Hybrid Search](adrs/005-rag-retrieval.md) for search strategy details.
|
|
564
|
+
|
|
565
|
+
### RAG-Based Retrieval
|
|
566
|
+
|
|
567
|
+
HTM uses Retrieval-Augmented Generation patterns to find relevant memories:
|
|
568
|
+
|
|
569
|
+
#### Search Strategies
|
|
570
|
+
|
|
571
|
+
##### 1. Vector Search (`:vector`)
|
|
572
|
+
|
|
573
|
+
Pure semantic similarity using cosine distance between embeddings.
|
|
574
|
+
|
|
575
|
+
**Best For:**
|
|
576
|
+
|
|
577
|
+
- Conceptual similarity
|
|
578
|
+
- Finding related ideas
|
|
579
|
+
- Semantic matching across paraphrases
|
|
580
|
+
|
|
581
|
+
**Example:**
|
|
582
|
+
|
|
583
|
+
```ruby
|
|
584
|
+
# Query: "database optimization"
|
|
585
|
+
# Finds: "PostgreSQL index tuning", "query performance", "EXPLAIN ANALYZE"
|
|
586
|
+
memories = htm.recall(timeframe: "last month", topic: "database optimization", strategy: :vector)
|
|
587
|
+
```
|
|
588
|
+
|
|
589
|
+
##### 2. Full-Text Search (`:fulltext`)
|
|
590
|
+
|
|
591
|
+
Keyword-based matching using PostgreSQL GIN indexes.
|
|
592
|
+
|
|
593
|
+
**Best For:**
|
|
594
|
+
|
|
595
|
+
- Exact phrase matching
|
|
596
|
+
- Keyword search
|
|
597
|
+
- Code snippets
|
|
598
|
+
- Proper nouns
|
|
599
|
+
|
|
600
|
+
**Example:**
|
|
601
|
+
|
|
602
|
+
```ruby
|
|
603
|
+
# Query: "TimescaleDB compression"
|
|
604
|
+
# Finds exact matches for "TimescaleDB" and "compression"
|
|
605
|
+
memories = htm.recall(timeframe: "last week", topic: "TimescaleDB compression", strategy: :fulltext)
|
|
606
|
+
```
|
|
607
|
+
|
|
608
|
+
##### 3. Hybrid Search (`:hybrid`) - **Recommended**
|
|
609
|
+
|
|
610
|
+
Combines vector and full-text with RRF scoring.
|
|
611
|
+
|
|
612
|
+
**Best For:**
|
|
613
|
+
|
|
614
|
+
- General-purpose retrieval
|
|
615
|
+
- Balanced precision and recall
|
|
616
|
+
- Most use cases
|
|
617
|
+
|
|
618
|
+
**Example:**
|
|
619
|
+
|
|
620
|
+
```ruby
|
|
621
|
+
# Combines semantic similarity AND keyword matching
|
|
622
|
+
memories = htm.recall(timeframe: "last month", topic: "PostgreSQL performance", strategy: :hybrid)
|
|
623
|
+
```
|
|
624
|
+
|
|
625
|
+
### Performance Characteristics
|
|
626
|
+
|
|
627
|
+
| Operation | Time Complexity | Typical Latency | Notes |
|
|
628
|
+
|-----------|----------------|-----------------|-------|
|
|
629
|
+
| Add node | O(log n) | 20-50ms | Includes index updates |
|
|
630
|
+
| Retrieve by key | O(1) | 5-10ms | Unique index lookup |
|
|
631
|
+
| Vector search | O(log n) | 50-100ms | HNSW approximate |
|
|
632
|
+
| Full-text search | O(log n) | 20-40ms | GIN index |
|
|
633
|
+
| Hybrid search | O(log n) | 80-150ms | Both + RRF merge |
|
|
634
|
+
| Delete node | O(log n) | 10-30ms | Cascading deletes |
|
|
635
|
+
|
|
636
|
+
**Storage Efficiency:**
|
|
637
|
+
|
|
638
|
+
With TimescaleDB compression (after 30 days):
|
|
639
|
+
|
|
640
|
+
- Text node: ~1KB → ~200 bytes (80% reduction)
|
|
641
|
+
- Node with embedding: ~7KB → ~1-2KB (70-85% reduction)
|
|
642
|
+
- 100,000 nodes: ~700MB → ~100-200MB
|
|
643
|
+
|
|
644
|
+
## Memory Flow: Add → Evict → Recall
|
|
645
|
+
|
|
646
|
+
### Complete Flow Diagram
|
|
647
|
+
|
|
648
|
+
```mermaid
|
|
649
|
+
graph TB
|
|
650
|
+
subgraph "1. Add Memory"
|
|
651
|
+
A[User: add_node] --> B[Generate embedding]
|
|
652
|
+
B --> C[Store in LTM]
|
|
653
|
+
C --> D{WM has space?}
|
|
654
|
+
D -->|Yes| E[Add to WM]
|
|
655
|
+
D -->|No| F[Evict from WM]
|
|
656
|
+
F --> G[Mark evicted in LTM]
|
|
657
|
+
G --> E
|
|
658
|
+
end
|
|
659
|
+
|
|
660
|
+
subgraph "2. Memory States"
|
|
661
|
+
E --> H[In Both Memories]
|
|
662
|
+
G --> I[In LTM Only]
|
|
663
|
+
end
|
|
664
|
+
|
|
665
|
+
subgraph "3. Recall Memory"
|
|
666
|
+
J[User: recall] --> K[RAG Search in LTM]
|
|
667
|
+
K --> L[Results found]
|
|
668
|
+
L --> M{WM has space?}
|
|
669
|
+
M -->|Yes| N[Add to WM]
|
|
670
|
+
M -->|No| O[Evict from WM]
|
|
671
|
+
O --> N
|
|
672
|
+
N --> H
|
|
673
|
+
end
|
|
674
|
+
|
|
675
|
+
I -.recall.-> J
|
|
676
|
+
H -.already loaded.-> P[Return from WM]
|
|
677
|
+
|
|
678
|
+
style A fill:rgba(76,175,80,0.3)
|
|
679
|
+
style J fill:rgba(33,150,243,0.3)
|
|
680
|
+
style H fill:rgba(255,193,7,0.3)
|
|
681
|
+
style I fill:rgba(156,39,176,0.3)
|
|
682
|
+
```
|
|
683
|
+
|
|
684
|
+
### Example: Adding 5000-Token Memory to Full Working Memory
|
|
685
|
+
|
|
686
|
+
```ruby
|
|
687
|
+
# Initial state
|
|
688
|
+
htm.memory_stats[:working_memory]
|
|
689
|
+
# => {
|
|
690
|
+
# current_tokens: 127_500,
|
|
691
|
+
# max_tokens: 128_000,
|
|
692
|
+
# utilization: 99.6%,
|
|
693
|
+
# node_count: 85
|
|
694
|
+
# }
|
|
695
|
+
|
|
696
|
+
# Add large memory
|
|
697
|
+
htm.add_node("large_doc", large_documentation, importance: 7.0)
|
|
698
|
+
|
|
699
|
+
# HTM automatically:
|
|
700
|
+
# 1. Generates embedding (Ollama: ~50ms)
|
|
701
|
+
# 2. Stores in long-term memory (PostgreSQL: ~30ms)
|
|
702
|
+
# 3. Checks working memory space: 5000 + 127500 > 128000 (no space!)
|
|
703
|
+
# 4. Evicts low-importance old nodes to free 4500+ tokens
|
|
704
|
+
# - Evicts 3 nodes: (importance: 1.0, age: 3 days), (importance: 2.0, age: 1 day), etc.
|
|
705
|
+
# 5. Adds new memory to working memory
|
|
706
|
+
# 6. Total time: ~100ms
|
|
707
|
+
|
|
708
|
+
# New state
|
|
709
|
+
htm.memory_stats[:working_memory]
|
|
710
|
+
# => {
|
|
711
|
+
# current_tokens: 128_000,
|
|
712
|
+
# max_tokens: 128_000,
|
|
713
|
+
# utilization: 100.0%,
|
|
714
|
+
# node_count: 83 # Lost 3 nodes, gained 1
|
|
715
|
+
# }
|
|
716
|
+
|
|
717
|
+
# Evicted nodes still in long-term memory!
|
|
718
|
+
evicted_node = htm.retrieve("old_debug_log") # Still works
|
|
719
|
+
# => { "key" => "old_debug_log", "in_working_memory" => false, ... }
|
|
720
|
+
```
|
|
721
|
+
|
|
722
|
+
## Context Assembly Strategies in Detail
|
|
723
|
+
|
|
724
|
+
### Strategy Comparison
|
|
725
|
+
|
|
726
|
+
| Strategy | Sort Key | Best Use Case | Typical Output |
|
|
727
|
+
|----------|----------|---------------|----------------|
|
|
728
|
+
| **Recent** | Access order (newest first) | Conversations, debugging | Recent 5-10 messages |
|
|
729
|
+
| **Important** | Importance score (highest first) | Planning, decisions | Top 10 most important facts |
|
|
730
|
+
| **Balanced** | `importance / (1 + hours)` | General assistant | Mix of recent + important |
|
|
731
|
+
|
|
732
|
+
### Code Examples
|
|
733
|
+
|
|
734
|
+
#### Recent Strategy
|
|
735
|
+
|
|
736
|
+
```ruby
|
|
737
|
+
# Assemble context from most recent memories
|
|
738
|
+
context = htm.create_context(strategy: :recent, max_tokens: 8000)
|
|
739
|
+
|
|
740
|
+
# Typical output (recent conversation):
|
|
741
|
+
# """
|
|
742
|
+
# User: What's the capital of France?
|
|
743
|
+
# Assistant: The capital of France is Paris.
|
|
744
|
+
# User: Tell me about its history.
|
|
745
|
+
# Assistant: Paris has been inhabited since...
|
|
746
|
+
# ...
|
|
747
|
+
# """
|
|
748
|
+
```
|
|
749
|
+
|
|
750
|
+
#### Important Strategy
|
|
751
|
+
|
|
752
|
+
```ruby
|
|
753
|
+
# Assemble context from most important memories
|
|
754
|
+
context = htm.create_context(strategy: :important, max_tokens: 4000)
|
|
755
|
+
|
|
756
|
+
# Typical output (critical facts):
|
|
757
|
+
# """
|
|
758
|
+
# Decision: Use PostgreSQL with TimescaleDB for storage (importance: 10.0)
|
|
759
|
+
# User preference: Always use debug_me over puts (importance: 9.0)
|
|
760
|
+
# Architecture: Two-tier memory system (importance: 9.0)
|
|
761
|
+
# ...
|
|
762
|
+
# """
|
|
763
|
+
```
|
|
764
|
+
|
|
765
|
+
#### Balanced Strategy
|
|
766
|
+
|
|
767
|
+
```ruby
|
|
768
|
+
# Assemble context with time decay
|
|
769
|
+
context = htm.create_context(strategy: :balanced)
|
|
770
|
+
|
|
771
|
+
# Typical output (hybrid):
|
|
772
|
+
# """
|
|
773
|
+
# Recent debugging: ValueError in embedding service (importance: 7.0, 10 min ago) [score: 42.0]
|
|
774
|
+
# Critical decision: PostgreSQL chosen (importance: 10.0, 3 days ago) [score: 0.14]
|
|
775
|
+
# Current task: Implementing RAG search (importance: 6.0, 1 hour ago) [score: 3.0]
|
|
776
|
+
# ...
|
|
777
|
+
# """
|
|
778
|
+
```
|
|
779
|
+
|
|
780
|
+
## Performance Optimization
|
|
781
|
+
|
|
782
|
+
### Working Memory Optimization
|
|
783
|
+
|
|
784
|
+
#### 1. Tune Token Limit
|
|
785
|
+
|
|
786
|
+
```ruby
|
|
787
|
+
# For shorter context windows (e.g., GPT-3.5 with 16K tokens)
|
|
788
|
+
htm = HTM.new(working_memory_size: 8_000) # Leave room for prompt + response
|
|
789
|
+
|
|
790
|
+
# For longer context windows (e.g., Claude 3 with 200K tokens)
|
|
791
|
+
htm = HTM.new(working_memory_size: 128_000) # Default
|
|
792
|
+
```
|
|
793
|
+
|
|
794
|
+
#### 2. Adjust Importance Scores
|
|
795
|
+
|
|
796
|
+
```ruby
|
|
797
|
+
# High importance for critical information
|
|
798
|
+
htm.add_node("user_preference", "User prefers Vim keybindings", importance: 9.0)
|
|
799
|
+
|
|
800
|
+
# Low importance for transient information
|
|
801
|
+
htm.add_node("debug_log", "Temporary debug output", importance: 1.0)
|
|
802
|
+
|
|
803
|
+
# Medium importance for general context
|
|
804
|
+
htm.add_node("discussion", "Discussed API design patterns", importance: 5.0)
|
|
805
|
+
```
|
|
806
|
+
|
|
807
|
+
#### 3. Use Appropriate Context Strategy
|
|
808
|
+
|
|
809
|
+
```ruby
|
|
810
|
+
# For chat: recent strategy
|
|
811
|
+
chat_context = htm.create_context(strategy: :recent, max_tokens: 8000)
|
|
812
|
+
|
|
813
|
+
# For planning: important strategy
|
|
814
|
+
planning_context = htm.create_context(strategy: :important, max_tokens: 4000)
|
|
815
|
+
|
|
816
|
+
# For general: balanced strategy (default)
|
|
817
|
+
general_context = htm.create_context(strategy: :balanced)
|
|
818
|
+
```
|
|
819
|
+
|
|
820
|
+
### Long-Term Memory Optimization
|
|
821
|
+
|
|
822
|
+
#### 1. Leverage TimescaleDB Compression
|
|
823
|
+
|
|
824
|
+
```sql
|
|
825
|
+
-- Enable compression after 30 days
|
|
826
|
+
SELECT add_compression_policy('nodes', INTERVAL '30 days');
|
|
827
|
+
|
|
828
|
+
-- Compress by robot_id and type for better ratio
|
|
829
|
+
ALTER TABLE nodes SET (
|
|
830
|
+
timescaledb.compress,
|
|
831
|
+
timescaledb.compress_segmentby = 'robot_id,type'
|
|
832
|
+
);
|
|
833
|
+
```
|
|
834
|
+
|
|
835
|
+
#### 2. Use Appropriate Search Strategy
|
|
836
|
+
|
|
837
|
+
```ruby
|
|
838
|
+
# For exact matches: full-text
|
|
839
|
+
exact_matches = htm.recall(timeframe: "last week", topic: "PostgreSQL", strategy: :fulltext)
|
|
840
|
+
|
|
841
|
+
# For semantic similarity: vector
|
|
842
|
+
similar_concepts = htm.recall(timeframe: "last month", topic: "database performance", strategy: :vector)
|
|
843
|
+
|
|
844
|
+
# For best results: hybrid (default)
|
|
845
|
+
best_results = htm.recall(timeframe: "last month", topic: "PostgreSQL performance", strategy: :hybrid)
|
|
846
|
+
```
|
|
847
|
+
|
|
848
|
+
#### 3. Index Tuning
|
|
849
|
+
|
|
850
|
+
```sql
|
|
851
|
+
-- Monitor HNSW build time
|
|
852
|
+
SELECT pg_size_pretty(pg_relation_size('idx_nodes_embedding')) AS index_size;
|
|
853
|
+
|
|
854
|
+
-- Rebuild HNSW index if needed
|
|
855
|
+
REINDEX INDEX CONCURRENTLY idx_nodes_embedding;
|
|
856
|
+
|
|
857
|
+
-- Analyze query plans
|
|
858
|
+
EXPLAIN ANALYZE
|
|
859
|
+
SELECT * FROM nodes
|
|
860
|
+
WHERE created_at > NOW() - INTERVAL '7 days'
|
|
861
|
+
AND embedding <=> '[...]' < 0.5
|
|
862
|
+
ORDER BY embedding <=> '[...]'
|
|
863
|
+
LIMIT 20;
|
|
864
|
+
```
|
|
865
|
+
|
|
866
|
+
## Related Documentation
|
|
867
|
+
|
|
868
|
+
- [Architecture Index](index.md) - System overview and component summary
|
|
869
|
+
- [Architecture Overview](overview.md) - Detailed architecture and data flows
|
|
870
|
+
- [Hive Mind Architecture](hive-mind.md) - Multi-robot shared memory
|
|
871
|
+
- [ADR-002: Two-Tier Memory Architecture](adrs/002-two-tier-memory.md)
|
|
872
|
+
- [ADR-006: Context Assembly Strategies](adrs/006-context-assembly.md)
|
|
873
|
+
- [ADR-007: Working Memory Eviction Strategy](adrs/007-eviction-strategy.md)
|