htm 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.architecture/decisions/adrs/001-use-postgresql-timescaledb-storage.md +227 -0
- data/.architecture/decisions/adrs/002-two-tier-memory-architecture.md +322 -0
- data/.architecture/decisions/adrs/003-ollama-default-embedding-provider.md +339 -0
- data/.architecture/decisions/adrs/004-multi-robot-shared-memory-hive-mind.md +374 -0
- data/.architecture/decisions/adrs/005-rag-based-retrieval-with-hybrid-search.md +443 -0
- data/.architecture/decisions/adrs/006-context-assembly-strategies.md +444 -0
- data/.architecture/decisions/adrs/007-working-memory-eviction-strategy.md +461 -0
- data/.architecture/decisions/adrs/008-robot-identification-system.md +550 -0
- data/.architecture/decisions/adrs/009-never-forget-explicit-deletion-only.md +570 -0
- data/.architecture/decisions/adrs/010-redis-working-memory-rejected.md +323 -0
- data/.architecture/decisions/adrs/011-database-side-embedding-generation-with-pgai.md +585 -0
- data/.architecture/decisions/adrs/012-llm-driven-ontology-topic-extraction.md +583 -0
- data/.architecture/decisions/adrs/013-activerecord-orm-and-many-to-many-tagging.md +299 -0
- data/.architecture/decisions/adrs/014-client-side-embedding-generation-workflow.md +569 -0
- data/.architecture/decisions/adrs/015-hierarchical-tag-ontology-and-llm-extraction.md +701 -0
- data/.architecture/decisions/adrs/016-async-embedding-and-tag-generation.md +694 -0
- data/.architecture/members.yml +144 -0
- data/.architecture/reviews/2025-10-29-llm-configuration-and-async-processing-review.md +1137 -0
- data/.architecture/reviews/initial-system-analysis.md +330 -0
- data/.envrc +32 -0
- data/.irbrc +145 -0
- data/CHANGELOG.md +150 -0
- data/COMMITS.md +196 -0
- data/LICENSE +21 -0
- data/README.md +1347 -0
- data/Rakefile +51 -0
- data/SETUP.md +268 -0
- data/config/database.yml +67 -0
- data/db/migrate/20250101000001_enable_extensions.rb +14 -0
- data/db/migrate/20250101000002_create_robots.rb +14 -0
- data/db/migrate/20250101000003_create_nodes.rb +42 -0
- data/db/migrate/20250101000005_create_tags.rb +38 -0
- data/db/migrate/20250101000007_add_node_vector_indexes.rb +30 -0
- data/db/schema.sql +473 -0
- data/db/seed_data/README.md +100 -0
- data/db/seed_data/presidents.md +136 -0
- data/db/seed_data/states.md +151 -0
- data/db/seeds.rb +208 -0
- data/dbdoc/README.md +173 -0
- data/dbdoc/public.node_stats.md +48 -0
- data/dbdoc/public.node_stats.svg +41 -0
- data/dbdoc/public.node_tags.md +40 -0
- data/dbdoc/public.node_tags.svg +112 -0
- data/dbdoc/public.nodes.md +54 -0
- data/dbdoc/public.nodes.svg +118 -0
- data/dbdoc/public.nodes_tags.md +39 -0
- data/dbdoc/public.nodes_tags.svg +112 -0
- data/dbdoc/public.ontology_structure.md +48 -0
- data/dbdoc/public.ontology_structure.svg +38 -0
- data/dbdoc/public.operations_log.md +42 -0
- data/dbdoc/public.operations_log.svg +130 -0
- data/dbdoc/public.relationships.md +39 -0
- data/dbdoc/public.relationships.svg +41 -0
- data/dbdoc/public.robot_activity.md +46 -0
- data/dbdoc/public.robot_activity.svg +35 -0
- data/dbdoc/public.robots.md +35 -0
- data/dbdoc/public.robots.svg +90 -0
- data/dbdoc/public.schema_migrations.md +29 -0
- data/dbdoc/public.schema_migrations.svg +26 -0
- data/dbdoc/public.tags.md +35 -0
- data/dbdoc/public.tags.svg +60 -0
- data/dbdoc/public.topic_relationships.md +45 -0
- data/dbdoc/public.topic_relationships.svg +32 -0
- data/dbdoc/schema.json +1437 -0
- data/dbdoc/schema.svg +154 -0
- data/docs/api/database.md +806 -0
- data/docs/api/embedding-service.md +532 -0
- data/docs/api/htm.md +797 -0
- data/docs/api/index.md +259 -0
- data/docs/api/long-term-memory.md +1096 -0
- data/docs/api/working-memory.md +665 -0
- data/docs/architecture/adrs/001-postgresql-timescaledb.md +314 -0
- data/docs/architecture/adrs/002-two-tier-memory.md +411 -0
- data/docs/architecture/adrs/003-ollama-embeddings.md +421 -0
- data/docs/architecture/adrs/004-hive-mind.md +437 -0
- data/docs/architecture/adrs/005-rag-retrieval.md +531 -0
- data/docs/architecture/adrs/006-context-assembly.md +496 -0
- data/docs/architecture/adrs/007-eviction-strategy.md +645 -0
- data/docs/architecture/adrs/008-robot-identification.md +625 -0
- data/docs/architecture/adrs/009-never-forget.md +648 -0
- data/docs/architecture/adrs/010-redis-working-memory-rejected.md +323 -0
- data/docs/architecture/adrs/011-pgai-integration.md +494 -0
- data/docs/architecture/adrs/index.md +215 -0
- data/docs/architecture/hive-mind.md +736 -0
- data/docs/architecture/index.md +351 -0
- data/docs/architecture/overview.md +538 -0
- data/docs/architecture/two-tier-memory.md +873 -0
- data/docs/assets/css/custom.css +83 -0
- data/docs/assets/images/htm-core-components.svg +63 -0
- data/docs/assets/images/htm-database-schema.svg +93 -0
- data/docs/assets/images/htm-hive-mind-architecture.svg +125 -0
- data/docs/assets/images/htm-importance-scoring-framework.svg +83 -0
- data/docs/assets/images/htm-layered-architecture.svg +71 -0
- data/docs/assets/images/htm-long-term-memory-architecture.svg +115 -0
- data/docs/assets/images/htm-working-memory-architecture.svg +120 -0
- data/docs/assets/images/htm.jpg +0 -0
- data/docs/assets/images/htm_demo.gif +0 -0
- data/docs/assets/js/mathjax.js +18 -0
- data/docs/assets/videos/htm_video.mp4 +0 -0
- data/docs/database_rake_tasks.md +322 -0
- data/docs/development/contributing.md +787 -0
- data/docs/development/index.md +336 -0
- data/docs/development/schema.md +596 -0
- data/docs/development/setup.md +719 -0
- data/docs/development/testing.md +819 -0
- data/docs/guides/adding-memories.md +824 -0
- data/docs/guides/context-assembly.md +1009 -0
- data/docs/guides/getting-started.md +577 -0
- data/docs/guides/index.md +118 -0
- data/docs/guides/long-term-memory.md +941 -0
- data/docs/guides/multi-robot.md +866 -0
- data/docs/guides/recalling-memories.md +927 -0
- data/docs/guides/search-strategies.md +953 -0
- data/docs/guides/working-memory.md +717 -0
- data/docs/index.md +214 -0
- data/docs/installation.md +477 -0
- data/docs/multi_framework_support.md +519 -0
- data/docs/quick-start.md +655 -0
- data/docs/setup_local_database.md +302 -0
- data/docs/using_rake_tasks_in_your_app.md +383 -0
- data/examples/basic_usage.rb +93 -0
- data/examples/cli_app/README.md +317 -0
- data/examples/cli_app/htm_cli.rb +270 -0
- data/examples/custom_llm_configuration.rb +183 -0
- data/examples/example_app/Rakefile +71 -0
- data/examples/example_app/app.rb +206 -0
- data/examples/sinatra_app/Gemfile +21 -0
- data/examples/sinatra_app/app.rb +335 -0
- data/lib/htm/active_record_config.rb +113 -0
- data/lib/htm/configuration.rb +342 -0
- data/lib/htm/database.rb +594 -0
- data/lib/htm/embedding_service.rb +115 -0
- data/lib/htm/errors.rb +34 -0
- data/lib/htm/job_adapter.rb +154 -0
- data/lib/htm/jobs/generate_embedding_job.rb +65 -0
- data/lib/htm/jobs/generate_tags_job.rb +82 -0
- data/lib/htm/long_term_memory.rb +965 -0
- data/lib/htm/models/node.rb +109 -0
- data/lib/htm/models/node_tag.rb +33 -0
- data/lib/htm/models/robot.rb +52 -0
- data/lib/htm/models/tag.rb +76 -0
- data/lib/htm/railtie.rb +76 -0
- data/lib/htm/sinatra.rb +157 -0
- data/lib/htm/tag_service.rb +135 -0
- data/lib/htm/tasks.rb +38 -0
- data/lib/htm/version.rb +5 -0
- data/lib/htm/working_memory.rb +182 -0
- data/lib/htm.rb +400 -0
- data/lib/tasks/db.rake +19 -0
- data/lib/tasks/htm.rake +147 -0
- data/lib/tasks/jobs.rake +312 -0
- data/mkdocs.yml +190 -0
- data/scripts/install_local_database.sh +309 -0
- metadata +341 -0
|
@@ -0,0 +1,717 @@
|
|
|
1
|
+
# Working Memory Management
|
|
2
|
+
|
|
3
|
+
Working memory is HTM's token-limited active context system designed for immediate LLM use. This guide explains how it works, how to manage it effectively, and best practices for optimal performance.
|
|
4
|
+
|
|
5
|
+
## What is Working Memory?
|
|
6
|
+
|
|
7
|
+
Working memory is an in-memory cache that:
|
|
8
|
+
|
|
9
|
+
- **Stores active memories** for fast access
|
|
10
|
+
- **Respects token limits** (default: 128,000 tokens)
|
|
11
|
+
- **Evicts old/unimportant memories** when full
|
|
12
|
+
- **Syncs with long-term memory** for durability
|
|
13
|
+
|
|
14
|
+
Think of it as RAM for your robot's consciousness - fast, limited, and volatile.
|
|
15
|
+
|
|
16
|
+
## Architecture
|
|
17
|
+
|
|
18
|
+

|
|
19
|
+
|
|
20
|
+
## Initialization
|
|
21
|
+
|
|
22
|
+
Configure working memory size when creating HTM:
|
|
23
|
+
|
|
24
|
+
```ruby
|
|
25
|
+
# Default: 128K tokens (roughly 512KB of text)
|
|
26
|
+
htm = HTM.new(
|
|
27
|
+
robot_name: "Assistant",
|
|
28
|
+
working_memory_size: 128_000
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
# Large working memory for extensive context
|
|
32
|
+
htm = HTM.new(
|
|
33
|
+
robot_name: "Long Context Bot",
|
|
34
|
+
working_memory_size: 1_000_000 # 1M tokens
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
# Small working memory for focused tasks
|
|
38
|
+
htm = HTM.new(
|
|
39
|
+
robot_name: "Focused Bot",
|
|
40
|
+
working_memory_size: 32_000 # 32K tokens
|
|
41
|
+
)
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
!!! tip "Choosing Memory Size"
|
|
45
|
+
- **32K-64K**: Focused tasks, single conversations
|
|
46
|
+
- **128K-256K**: General purpose, multiple topics (recommended)
|
|
47
|
+
- **512K-1M**: Extensive context, long sessions
|
|
48
|
+
- **>1M**: Specialized use cases only (memory overhead)
|
|
49
|
+
|
|
50
|
+
## How Working Memory Works
|
|
51
|
+
|
|
52
|
+
### Adding Memories
|
|
53
|
+
|
|
54
|
+
When you add a node, it goes to both working and long-term memory:
|
|
55
|
+
|
|
56
|
+
```ruby
|
|
57
|
+
htm.add_node(
|
|
58
|
+
"fact_001",
|
|
59
|
+
"User prefers Ruby for scripting",
|
|
60
|
+
type: :fact,
|
|
61
|
+
importance: 7.0
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
# Internally:
|
|
65
|
+
# 1. Calculate token count
|
|
66
|
+
# 2. Store in long-term memory (PostgreSQL)
|
|
67
|
+
# 3. Add to working memory (in-memory)
|
|
68
|
+
# 4. Check capacity, evict if needed
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
### Recalling Memories
|
|
72
|
+
|
|
73
|
+
When you recall, memories are added to working memory:
|
|
74
|
+
|
|
75
|
+
```ruby
|
|
76
|
+
memories = htm.recall(
|
|
77
|
+
timeframe: "last week",
|
|
78
|
+
topic: "database design"
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
# Internally:
|
|
82
|
+
# 1. Search long-term memory (RAG)
|
|
83
|
+
# 2. For each result:
|
|
84
|
+
# a. Check if space available
|
|
85
|
+
# b. Evict if needed
|
|
86
|
+
# c. Add to working memory
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
### Automatic Eviction
|
|
90
|
+
|
|
91
|
+
When working memory is full, HTM evicts memories using a smart algorithm:
|
|
92
|
+
|
|
93
|
+
```ruby
|
|
94
|
+
# Algorithm:
|
|
95
|
+
# 1. Calculate eviction score = importance × recency
|
|
96
|
+
# 2. Sort by score (lowest first)
|
|
97
|
+
# 3. Evict until enough space
|
|
98
|
+
# 4. Mark as evicted in long-term memory
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
!!! note
|
|
102
|
+
Evicted memories are **not deleted** - they remain in long-term memory and can be recalled later.
|
|
103
|
+
|
|
104
|
+
## Monitoring Utilization
|
|
105
|
+
|
|
106
|
+
### Basic Stats
|
|
107
|
+
|
|
108
|
+
```ruby
|
|
109
|
+
wm = htm.working_memory
|
|
110
|
+
|
|
111
|
+
puts "Nodes: #{wm.node_count}"
|
|
112
|
+
puts "Tokens: #{wm.token_count} / #{wm.max_tokens}"
|
|
113
|
+
puts "Utilization: #{wm.utilization_percentage}%"
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
### Detailed Monitoring
|
|
117
|
+
|
|
118
|
+
```ruby
|
|
119
|
+
class MemoryMonitor
|
|
120
|
+
def initialize(htm)
|
|
121
|
+
@htm = htm
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
def report
|
|
125
|
+
wm = @htm.working_memory
|
|
126
|
+
stats = @htm.memory_stats
|
|
127
|
+
|
|
128
|
+
puts "=== Working Memory Report ==="
|
|
129
|
+
puts "Capacity: #{wm.max_tokens} tokens"
|
|
130
|
+
puts "Used: #{wm.token_count} tokens (#{wm.utilization_percentage}%)"
|
|
131
|
+
puts "Free: #{wm.max_tokens - wm.token_count} tokens"
|
|
132
|
+
puts "Nodes: #{wm.node_count}"
|
|
133
|
+
puts
|
|
134
|
+
puts "Average tokens per node: #{wm.token_count / wm.node_count}" if wm.node_count > 0
|
|
135
|
+
puts
|
|
136
|
+
puts "=== Long-term Memory ==="
|
|
137
|
+
puts "Total nodes: #{stats[:total_nodes]}"
|
|
138
|
+
puts "Database size: #{(stats[:database_size] / 1024.0 / 1024.0).round(2)} MB"
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
def health_check
|
|
142
|
+
util = @htm.working_memory.utilization_percentage
|
|
143
|
+
|
|
144
|
+
case util
|
|
145
|
+
when 0..50
|
|
146
|
+
{ status: :healthy, message: "Plenty of space" }
|
|
147
|
+
when 51..80
|
|
148
|
+
{ status: :warning, message: "Approaching capacity" }
|
|
149
|
+
when 81..95
|
|
150
|
+
{ status: :critical, message: "Nearly full, evictions likely" }
|
|
151
|
+
else
|
|
152
|
+
{ status: :full, message: "At capacity, frequent evictions" }
|
|
153
|
+
end
|
|
154
|
+
end
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
monitor = MemoryMonitor.new(htm)
|
|
158
|
+
monitor.report
|
|
159
|
+
health = monitor.health_check
|
|
160
|
+
puts "Health: #{health[:status]} - #{health[:message]}"
|
|
161
|
+
```
|
|
162
|
+
|
|
163
|
+
## Eviction Behavior
|
|
164
|
+
|
|
165
|
+
### Understanding Eviction
|
|
166
|
+
|
|
167
|
+
HTM evicts memories based on two factors:
|
|
168
|
+
|
|
169
|
+
1. **Importance**: Higher importance = less likely to evict
|
|
170
|
+
2. **Recency**: Newer memories = less likely to evict
|
|
171
|
+
|
|
172
|
+
```ruby
|
|
173
|
+
# Eviction score calculation
|
|
174
|
+
score = importance × (1 / age_in_hours)
|
|
175
|
+
|
|
176
|
+
# Example scores:
|
|
177
|
+
# High importance (9.0), recent (1 hour): 9.0 × 1.0 = 9.0 (keep)
|
|
178
|
+
# High importance (9.0), old (24 hours): 9.0 × 0.042 = 0.38 (maybe evict)
|
|
179
|
+
# Low importance (2.0), recent (1 hour): 2.0 × 1.0 = 2.0 (evict soon)
|
|
180
|
+
# Low importance (2.0), old (24 hours): 2.0 × 0.042 = 0.08 (evict first)
|
|
181
|
+
```
|
|
182
|
+
|
|
183
|
+
### Eviction Example
|
|
184
|
+
|
|
185
|
+
```ruby
|
|
186
|
+
# Fill working memory
|
|
187
|
+
htm = HTM.new(
|
|
188
|
+
robot_name: "Test",
|
|
189
|
+
working_memory_size: 10_000 # Small for demo
|
|
190
|
+
)
|
|
191
|
+
|
|
192
|
+
# Add important fact (will stay)
|
|
193
|
+
htm.add_node(
|
|
194
|
+
"critical",
|
|
195
|
+
"Critical system password",
|
|
196
|
+
importance: 10.0
|
|
197
|
+
)
|
|
198
|
+
|
|
199
|
+
# Add many low-importance items
|
|
200
|
+
100.times do |i|
|
|
201
|
+
htm.add_node(
|
|
202
|
+
"temp_#{i}",
|
|
203
|
+
"Temporary note #{i}",
|
|
204
|
+
importance: 1.0
|
|
205
|
+
)
|
|
206
|
+
end
|
|
207
|
+
|
|
208
|
+
# Check what survived
|
|
209
|
+
wm = htm.working_memory
|
|
210
|
+
puts "Surviving nodes: #{wm.node_count}"
|
|
211
|
+
|
|
212
|
+
# Critical fact should still be there
|
|
213
|
+
critical = htm.retrieve("critical")
|
|
214
|
+
puts "Critical fact present: #{!critical.nil?}"
|
|
215
|
+
```
|
|
216
|
+
|
|
217
|
+
### Manual Eviction
|
|
218
|
+
|
|
219
|
+
You can trigger eviction manually:
|
|
220
|
+
|
|
221
|
+
```ruby
|
|
222
|
+
# Access the eviction mechanism (internal API)
|
|
223
|
+
needed_tokens = 50_000
|
|
224
|
+
|
|
225
|
+
evicted = htm.working_memory.evict_to_make_space(needed_tokens)
|
|
226
|
+
|
|
227
|
+
puts "Evicted #{evicted.length} memories:"
|
|
228
|
+
evicted.each do |mem|
|
|
229
|
+
puts "- #{mem[:key]}: #{mem[:value][0..50]}..."
|
|
230
|
+
end
|
|
231
|
+
```
|
|
232
|
+
|
|
233
|
+
!!! warning
|
|
234
|
+
Manual eviction is rarely needed. HTM handles this automatically during normal operations.
|
|
235
|
+
|
|
236
|
+
## Best Practices
|
|
237
|
+
|
|
238
|
+
### 1. Set Appropriate Importance
|
|
239
|
+
|
|
240
|
+
```ruby
|
|
241
|
+
# Critical data: Never evict
|
|
242
|
+
htm.add_node(
|
|
243
|
+
"api_key",
|
|
244
|
+
"Production API key",
|
|
245
|
+
importance: 10.0
|
|
246
|
+
)
|
|
247
|
+
|
|
248
|
+
# Important context: Retain longer
|
|
249
|
+
htm.add_node(
|
|
250
|
+
"user_goal",
|
|
251
|
+
"User wants to optimize database",
|
|
252
|
+
importance: 8.0
|
|
253
|
+
)
|
|
254
|
+
|
|
255
|
+
# Temporary context: Evict when needed
|
|
256
|
+
htm.add_node(
|
|
257
|
+
"current_topic",
|
|
258
|
+
"Discussing query optimization",
|
|
259
|
+
importance: 5.0
|
|
260
|
+
)
|
|
261
|
+
|
|
262
|
+
# Disposable notes: Evict first
|
|
263
|
+
htm.add_node(
|
|
264
|
+
"scratch",
|
|
265
|
+
"Temporary calculation result",
|
|
266
|
+
importance: 1.0
|
|
267
|
+
)
|
|
268
|
+
```
|
|
269
|
+
|
|
270
|
+
### 2. Monitor Utilization Regularly
|
|
271
|
+
|
|
272
|
+
```ruby
|
|
273
|
+
class WorkingMemoryManager
|
|
274
|
+
def initialize(htm, threshold: 80.0)
|
|
275
|
+
@htm = htm
|
|
276
|
+
@threshold = threshold
|
|
277
|
+
end
|
|
278
|
+
|
|
279
|
+
def check_and_warn
|
|
280
|
+
util = @htm.working_memory.utilization_percentage
|
|
281
|
+
|
|
282
|
+
if util > @threshold
|
|
283
|
+
warn "Working memory at #{util}%!"
|
|
284
|
+
warn "Consider increasing working_memory_size or reducing context"
|
|
285
|
+
end
|
|
286
|
+
end
|
|
287
|
+
|
|
288
|
+
def auto_adjust_importance
|
|
289
|
+
util = @htm.working_memory.utilization_percentage
|
|
290
|
+
|
|
291
|
+
# If critically full, boost importance of current context
|
|
292
|
+
if util > 90
|
|
293
|
+
# Implementation would require tracking current context keys
|
|
294
|
+
# and updating their importance in the database
|
|
295
|
+
warn "Critical capacity reached"
|
|
296
|
+
end
|
|
297
|
+
end
|
|
298
|
+
end
|
|
299
|
+
```
|
|
300
|
+
|
|
301
|
+
### 3. Use Context Strategically
|
|
302
|
+
|
|
303
|
+
Don't load unnecessary data into working memory:
|
|
304
|
+
|
|
305
|
+
```ruby
|
|
306
|
+
# Bad: Load everything
|
|
307
|
+
all_memories = htm.recall(
|
|
308
|
+
timeframe: "all time",
|
|
309
|
+
topic: "anything",
|
|
310
|
+
limit: 1000
|
|
311
|
+
)
|
|
312
|
+
# This fills working memory with potentially irrelevant data
|
|
313
|
+
|
|
314
|
+
# Good: Load what you need
|
|
315
|
+
relevant = htm.recall(
|
|
316
|
+
timeframe: "last week",
|
|
317
|
+
topic: "current project",
|
|
318
|
+
limit: 20
|
|
319
|
+
)
|
|
320
|
+
# This keeps working memory focused
|
|
321
|
+
```
|
|
322
|
+
|
|
323
|
+
### 4. Clean Up When Done
|
|
324
|
+
|
|
325
|
+
Remove temporary memories:
|
|
326
|
+
|
|
327
|
+
```ruby
|
|
328
|
+
def with_temporary_context(htm, key, value)
|
|
329
|
+
# Add temporary context
|
|
330
|
+
htm.add_node(key, value, type: :context, importance: 2.0)
|
|
331
|
+
|
|
332
|
+
yield
|
|
333
|
+
|
|
334
|
+
# Clean up
|
|
335
|
+
htm.forget(key, confirm: :confirmed)
|
|
336
|
+
end
|
|
337
|
+
|
|
338
|
+
with_temporary_context(htm, "scratch_001", "Temp data") do
|
|
339
|
+
# Use the temporary context
|
|
340
|
+
context = htm.create_context(strategy: :recent)
|
|
341
|
+
# ... do work
|
|
342
|
+
end
|
|
343
|
+
# Temp data is now removed
|
|
344
|
+
```
|
|
345
|
+
|
|
346
|
+
### 5. Batch Operations Carefully
|
|
347
|
+
|
|
348
|
+
Be mindful when adding many memories at once:
|
|
349
|
+
|
|
350
|
+
```ruby
|
|
351
|
+
# Risky: Might fill working memory quickly
|
|
352
|
+
1000.times do |i|
|
|
353
|
+
htm.add_node("item_#{i}", "Data #{i}", importance: 5.0)
|
|
354
|
+
end
|
|
355
|
+
|
|
356
|
+
# Better: Add with appropriate importance
|
|
357
|
+
1000.times do |i|
|
|
358
|
+
htm.add_node(
|
|
359
|
+
"item_#{i}",
|
|
360
|
+
"Data #{i}",
|
|
361
|
+
importance: 3.0 # Lower importance for bulk data
|
|
362
|
+
)
|
|
363
|
+
end
|
|
364
|
+
|
|
365
|
+
# Or: Monitor during batch operations
|
|
366
|
+
batch_data.each_with_index do |data, i|
|
|
367
|
+
htm.add_node("item_#{i}", data, importance: 5.0)
|
|
368
|
+
|
|
369
|
+
# Check capacity every 100 items
|
|
370
|
+
if i % 100 == 0
|
|
371
|
+
util = htm.working_memory.utilization_percentage
|
|
372
|
+
puts "Utilization: #{util}%"
|
|
373
|
+
end
|
|
374
|
+
end
|
|
375
|
+
```
|
|
376
|
+
|
|
377
|
+
## Working Memory Strategies
|
|
378
|
+
|
|
379
|
+
### Strategy 1: Sliding Window
|
|
380
|
+
|
|
381
|
+
Keep only recent memories:
|
|
382
|
+
|
|
383
|
+
```ruby
|
|
384
|
+
class SlidingWindow
|
|
385
|
+
def initialize(htm, window_size: 50)
|
|
386
|
+
@htm = htm
|
|
387
|
+
@window_size = window_size
|
|
388
|
+
@keys = []
|
|
389
|
+
end
|
|
390
|
+
|
|
391
|
+
def add(key, value, **opts)
|
|
392
|
+
@htm.add_node(key, value, **opts)
|
|
393
|
+
@keys << key
|
|
394
|
+
|
|
395
|
+
# Evict oldest if window exceeded
|
|
396
|
+
if @keys.length > @window_size
|
|
397
|
+
oldest = @keys.shift
|
|
398
|
+
@htm.forget(oldest, confirm: :confirmed) rescue nil
|
|
399
|
+
end
|
|
400
|
+
end
|
|
401
|
+
end
|
|
402
|
+
```
|
|
403
|
+
|
|
404
|
+
### Strategy 2: Importance Thresholding
|
|
405
|
+
|
|
406
|
+
Only keep high-importance memories:
|
|
407
|
+
|
|
408
|
+
```ruby
|
|
409
|
+
class ImportanceFilter
|
|
410
|
+
def initialize(htm, min_importance: 7.0)
|
|
411
|
+
@htm = htm
|
|
412
|
+
@min_importance = min_importance
|
|
413
|
+
end
|
|
414
|
+
|
|
415
|
+
def add(key, value, importance:, **opts)
|
|
416
|
+
@htm.add_node(key, value, importance: importance, **opts)
|
|
417
|
+
|
|
418
|
+
# If low importance and memory is tight, evict immediately
|
|
419
|
+
if importance < @min_importance &&
|
|
420
|
+
@htm.working_memory.utilization_percentage > 80
|
|
421
|
+
|
|
422
|
+
# Let it evict naturally or remove from working memory
|
|
423
|
+
# (Note: HTM doesn't expose direct working memory removal,
|
|
424
|
+
# so we rely on natural eviction)
|
|
425
|
+
end
|
|
426
|
+
end
|
|
427
|
+
end
|
|
428
|
+
```
|
|
429
|
+
|
|
430
|
+
### Strategy 3: Topic-Based Management
|
|
431
|
+
|
|
432
|
+
Group memories by topic and manage separately:
|
|
433
|
+
|
|
434
|
+
```ruby
|
|
435
|
+
class TopicManager
|
|
436
|
+
def initialize(htm)
|
|
437
|
+
@htm = htm
|
|
438
|
+
@topics = Hash.new { |h, k| h[k] = [] }
|
|
439
|
+
end
|
|
440
|
+
|
|
441
|
+
def add(key, value, topic:, **opts)
|
|
442
|
+
@htm.add_node(key, value, **opts)
|
|
443
|
+
@topics[topic] << key
|
|
444
|
+
end
|
|
445
|
+
|
|
446
|
+
def clear_topic(topic)
|
|
447
|
+
keys = @topics[topic] || []
|
|
448
|
+
keys.each do |key|
|
|
449
|
+
@htm.forget(key, confirm: :confirmed) rescue nil
|
|
450
|
+
end
|
|
451
|
+
@topics.delete(topic)
|
|
452
|
+
end
|
|
453
|
+
|
|
454
|
+
def focus_on_topic(topic)
|
|
455
|
+
# Clear all other topics to make space
|
|
456
|
+
@topics.keys.each do |t|
|
|
457
|
+
clear_topic(t) unless t == topic
|
|
458
|
+
end
|
|
459
|
+
end
|
|
460
|
+
end
|
|
461
|
+
```
|
|
462
|
+
|
|
463
|
+
## Token Counting
|
|
464
|
+
|
|
465
|
+
HTM uses Tiktoken to count tokens:
|
|
466
|
+
|
|
467
|
+
```ruby
|
|
468
|
+
# Token counts vary by content
|
|
469
|
+
short = "Hello world" # ~2 tokens
|
|
470
|
+
medium = "A" * 100 # ~25 tokens
|
|
471
|
+
long = "word " * 1000 # ~1000 tokens
|
|
472
|
+
|
|
473
|
+
# Check token count of a string
|
|
474
|
+
embedding_service = HTM::EmbeddingService.new
|
|
475
|
+
tokens = embedding_service.count_tokens(long)
|
|
476
|
+
puts "Token count: #{tokens}"
|
|
477
|
+
```
|
|
478
|
+
|
|
479
|
+
!!! note "Token vs Characters"
|
|
480
|
+
- 1 token ≈ 4 characters (English)
|
|
481
|
+
- 128K tokens ≈ 512KB text
|
|
482
|
+
- Code uses fewer tokens per character
|
|
483
|
+
- Special characters use more tokens
|
|
484
|
+
|
|
485
|
+
## Performance Considerations
|
|
486
|
+
|
|
487
|
+
### Memory Overhead
|
|
488
|
+
|
|
489
|
+
Working memory has minimal overhead:
|
|
490
|
+
|
|
491
|
+
```ruby
|
|
492
|
+
# Memory usage per node (approximate):
|
|
493
|
+
# - Key: ~50 bytes
|
|
494
|
+
# - Value: N bytes (your content)
|
|
495
|
+
# - Metadata: ~100 bytes
|
|
496
|
+
# - Total: ~150 bytes + content
|
|
497
|
+
|
|
498
|
+
# For 1000 nodes with 500-char content:
|
|
499
|
+
# 1000 × (150 + 500) = ~650KB
|
|
500
|
+
|
|
501
|
+
# Token count is stored but content dominates
|
|
502
|
+
```
|
|
503
|
+
|
|
504
|
+
### Access Speed
|
|
505
|
+
|
|
506
|
+
Working memory is very fast:
|
|
507
|
+
|
|
508
|
+
```ruby
|
|
509
|
+
require 'benchmark'
|
|
510
|
+
|
|
511
|
+
htm = HTM.new(robot_name: "Perf Test")
|
|
512
|
+
|
|
513
|
+
# Add 1000 memories
|
|
514
|
+
1000.times do |i|
|
|
515
|
+
htm.add_node("key_#{i}", "Value #{i}", importance: 5.0)
|
|
516
|
+
end
|
|
517
|
+
|
|
518
|
+
# Benchmark working memory access
|
|
519
|
+
Benchmark.bm do |x|
|
|
520
|
+
x.report("create_context:") do
|
|
521
|
+
1000.times { htm.create_context(strategy: :balanced) }
|
|
522
|
+
end
|
|
523
|
+
end
|
|
524
|
+
|
|
525
|
+
# Typical results:
|
|
526
|
+
# create_context: ~1ms per call
|
|
527
|
+
```
|
|
528
|
+
|
|
529
|
+
### Optimization Tips
|
|
530
|
+
|
|
531
|
+
```ruby
|
|
532
|
+
# 1. Avoid frequent context assembly
|
|
533
|
+
# Bad: Assemble context every message
|
|
534
|
+
def process_message(message)
|
|
535
|
+
context = htm.create_context # Slow if called frequently
|
|
536
|
+
llm.chat(context + message)
|
|
537
|
+
end
|
|
538
|
+
|
|
539
|
+
# Good: Cache context, update periodically
|
|
540
|
+
@context_cache = nil
|
|
541
|
+
@context_age = 0
|
|
542
|
+
|
|
543
|
+
def process_message(message)
|
|
544
|
+
if @context_cache.nil? || @context_age > 10
|
|
545
|
+
@context_cache = htm.create_context
|
|
546
|
+
@context_age = 0
|
|
547
|
+
end
|
|
548
|
+
@context_age += 1
|
|
549
|
+
|
|
550
|
+
llm.chat(@context_cache + message)
|
|
551
|
+
end
|
|
552
|
+
|
|
553
|
+
# 2. Use appropriate token limits
|
|
554
|
+
# Don't request more than your LLM can handle
|
|
555
|
+
context = htm.create_context(
|
|
556
|
+
strategy: :balanced,
|
|
557
|
+
max_tokens: 100_000 # Match LLM's context window
|
|
558
|
+
)
|
|
559
|
+
|
|
560
|
+
# 3. Monitor and adjust
|
|
561
|
+
util = htm.working_memory.utilization_percentage
|
|
562
|
+
if util > 90
|
|
563
|
+
# Reduce working memory size or increase eviction
|
|
564
|
+
end
|
|
565
|
+
```
|
|
566
|
+
|
|
567
|
+
## Debugging Working Memory
|
|
568
|
+
|
|
569
|
+
### Inspecting Contents
|
|
570
|
+
|
|
571
|
+
```ruby
|
|
572
|
+
class WorkingMemoryInspector
|
|
573
|
+
def initialize(htm)
|
|
574
|
+
@htm = htm
|
|
575
|
+
end
|
|
576
|
+
|
|
577
|
+
def show_contents
|
|
578
|
+
wm = @htm.working_memory
|
|
579
|
+
|
|
580
|
+
puts "=== Working Memory Contents ==="
|
|
581
|
+
puts "Total nodes: #{wm.node_count}"
|
|
582
|
+
puts "Total tokens: #{wm.token_count}"
|
|
583
|
+
puts
|
|
584
|
+
|
|
585
|
+
# Access internal structure (advanced)
|
|
586
|
+
# Note: This requires access to WorkingMemory internals
|
|
587
|
+
# For production, use public APIs only
|
|
588
|
+
end
|
|
589
|
+
|
|
590
|
+
def find_large_nodes(threshold: 1000)
|
|
591
|
+
# Find nodes using many tokens
|
|
592
|
+
# This would require iterating working memory
|
|
593
|
+
# (not directly exposed in current API)
|
|
594
|
+
end
|
|
595
|
+
|
|
596
|
+
def show_eviction_candidates
|
|
597
|
+
# Show which nodes would be evicted next
|
|
598
|
+
# Based on importance and recency
|
|
599
|
+
end
|
|
600
|
+
end
|
|
601
|
+
```
|
|
602
|
+
|
|
603
|
+
### Common Issues
|
|
604
|
+
|
|
605
|
+
**Issue: Working memory always full**
|
|
606
|
+
|
|
607
|
+
```ruby
|
|
608
|
+
# Check if you're adding too much
|
|
609
|
+
stats = htm.memory_stats
|
|
610
|
+
wm_util = stats[:working_memory][:utilization]
|
|
611
|
+
|
|
612
|
+
if wm_util > 95
|
|
613
|
+
puts "Working memory consistently full"
|
|
614
|
+
puts "Solutions:"
|
|
615
|
+
puts "1. Increase working_memory_size"
|
|
616
|
+
puts "2. Lower importance of bulk data"
|
|
617
|
+
puts "3. Reduce recall limit"
|
|
618
|
+
puts "4. Clean up temporary data more frequently"
|
|
619
|
+
end
|
|
620
|
+
```
|
|
621
|
+
|
|
622
|
+
**Issue: Important data getting evicted**
|
|
623
|
+
|
|
624
|
+
```ruby
|
|
625
|
+
# Increase importance of critical data
|
|
626
|
+
htm.add_node(
|
|
627
|
+
"critical_data",
|
|
628
|
+
"Important information",
|
|
629
|
+
importance: 9.5 # High enough to avoid eviction
|
|
630
|
+
)
|
|
631
|
+
```
|
|
632
|
+
|
|
633
|
+
**Issue: Memory utilization too low**
|
|
634
|
+
|
|
635
|
+
```ruby
|
|
636
|
+
# Working memory underutilized
|
|
637
|
+
wm_util = htm.working_memory.utilization_percentage
|
|
638
|
+
|
|
639
|
+
if wm_util < 20
|
|
640
|
+
puts "Working memory underutilized"
|
|
641
|
+
puts "Consider:"
|
|
642
|
+
puts "1. Reducing working_memory_size to save RAM"
|
|
643
|
+
puts "2. Recalling more context"
|
|
644
|
+
puts "3. Using larger token limits in create_context"
|
|
645
|
+
end
|
|
646
|
+
```
|
|
647
|
+
|
|
648
|
+
## Next Steps
|
|
649
|
+
|
|
650
|
+
- [**Context Assembly**](context-assembly.md) - Use working memory effectively with LLMs
|
|
651
|
+
- [**Long-term Memory**](long-term-memory.md) - Understand persistent storage
|
|
652
|
+
- [**Adding Memories**](adding-memories.md) - Learn about importance scoring
|
|
653
|
+
|
|
654
|
+
## Complete Example
|
|
655
|
+
|
|
656
|
+
```ruby
|
|
657
|
+
require 'htm'
|
|
658
|
+
|
|
659
|
+
# Initialize with moderate working memory
|
|
660
|
+
htm = HTM.new(
|
|
661
|
+
robot_name: "Memory Manager",
|
|
662
|
+
working_memory_size: 128_000
|
|
663
|
+
)
|
|
664
|
+
|
|
665
|
+
# Monitor class
|
|
666
|
+
class Monitor
|
|
667
|
+
def initialize(htm)
|
|
668
|
+
@htm = htm
|
|
669
|
+
end
|
|
670
|
+
|
|
671
|
+
def report
|
|
672
|
+
wm = @htm.working_memory
|
|
673
|
+
puts "Utilization: #{wm.utilization_percentage}%"
|
|
674
|
+
puts "Nodes: #{wm.node_count}"
|
|
675
|
+
puts "Tokens: #{wm.token_count} / #{wm.max_tokens}"
|
|
676
|
+
end
|
|
677
|
+
end
|
|
678
|
+
|
|
679
|
+
monitor = Monitor.new(htm)
|
|
680
|
+
|
|
681
|
+
# Add memories with different importance
|
|
682
|
+
puts "Adding critical data..."
|
|
683
|
+
htm.add_node("critical", "Critical system data", importance: 10.0)
|
|
684
|
+
monitor.report
|
|
685
|
+
|
|
686
|
+
puts "\nAdding important data..."
|
|
687
|
+
10.times do |i|
|
|
688
|
+
htm.add_node("important_#{i}", "Important item #{i}", importance: 8.0)
|
|
689
|
+
end
|
|
690
|
+
monitor.report
|
|
691
|
+
|
|
692
|
+
puts "\nAdding regular data..."
|
|
693
|
+
50.times do |i|
|
|
694
|
+
htm.add_node("regular_#{i}", "Regular item #{i}", importance: 5.0)
|
|
695
|
+
end
|
|
696
|
+
monitor.report
|
|
697
|
+
|
|
698
|
+
puts "\nAdding temporary data..."
|
|
699
|
+
100.times do |i|
|
|
700
|
+
htm.add_node("temp_#{i}", "Temporary item #{i}", importance: 2.0)
|
|
701
|
+
end
|
|
702
|
+
monitor.report
|
|
703
|
+
|
|
704
|
+
# Check what survived
|
|
705
|
+
puts "\n=== Survival Check ==="
|
|
706
|
+
critical = htm.retrieve("critical")
|
|
707
|
+
puts "Critical survived: #{!critical.nil?}"
|
|
708
|
+
|
|
709
|
+
# Create context
|
|
710
|
+
puts "\nCreating context..."
|
|
711
|
+
context = htm.create_context(strategy: :important, max_tokens: 50_000)
|
|
712
|
+
puts "Context length: #{context.length} characters"
|
|
713
|
+
|
|
714
|
+
# Final stats
|
|
715
|
+
puts "\n=== Final Stats ==="
|
|
716
|
+
monitor.report
|
|
717
|
+
```
|