htm 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (155) hide show
  1. checksums.yaml +7 -0
  2. data/.architecture/decisions/adrs/001-use-postgresql-timescaledb-storage.md +227 -0
  3. data/.architecture/decisions/adrs/002-two-tier-memory-architecture.md +322 -0
  4. data/.architecture/decisions/adrs/003-ollama-default-embedding-provider.md +339 -0
  5. data/.architecture/decisions/adrs/004-multi-robot-shared-memory-hive-mind.md +374 -0
  6. data/.architecture/decisions/adrs/005-rag-based-retrieval-with-hybrid-search.md +443 -0
  7. data/.architecture/decisions/adrs/006-context-assembly-strategies.md +444 -0
  8. data/.architecture/decisions/adrs/007-working-memory-eviction-strategy.md +461 -0
  9. data/.architecture/decisions/adrs/008-robot-identification-system.md +550 -0
  10. data/.architecture/decisions/adrs/009-never-forget-explicit-deletion-only.md +570 -0
  11. data/.architecture/decisions/adrs/010-redis-working-memory-rejected.md +323 -0
  12. data/.architecture/decisions/adrs/011-database-side-embedding-generation-with-pgai.md +585 -0
  13. data/.architecture/decisions/adrs/012-llm-driven-ontology-topic-extraction.md +583 -0
  14. data/.architecture/decisions/adrs/013-activerecord-orm-and-many-to-many-tagging.md +299 -0
  15. data/.architecture/decisions/adrs/014-client-side-embedding-generation-workflow.md +569 -0
  16. data/.architecture/decisions/adrs/015-hierarchical-tag-ontology-and-llm-extraction.md +701 -0
  17. data/.architecture/decisions/adrs/016-async-embedding-and-tag-generation.md +694 -0
  18. data/.architecture/members.yml +144 -0
  19. data/.architecture/reviews/2025-10-29-llm-configuration-and-async-processing-review.md +1137 -0
  20. data/.architecture/reviews/initial-system-analysis.md +330 -0
  21. data/.envrc +32 -0
  22. data/.irbrc +145 -0
  23. data/CHANGELOG.md +150 -0
  24. data/COMMITS.md +196 -0
  25. data/LICENSE +21 -0
  26. data/README.md +1347 -0
  27. data/Rakefile +51 -0
  28. data/SETUP.md +268 -0
  29. data/config/database.yml +67 -0
  30. data/db/migrate/20250101000001_enable_extensions.rb +14 -0
  31. data/db/migrate/20250101000002_create_robots.rb +14 -0
  32. data/db/migrate/20250101000003_create_nodes.rb +42 -0
  33. data/db/migrate/20250101000005_create_tags.rb +38 -0
  34. data/db/migrate/20250101000007_add_node_vector_indexes.rb +30 -0
  35. data/db/schema.sql +473 -0
  36. data/db/seed_data/README.md +100 -0
  37. data/db/seed_data/presidents.md +136 -0
  38. data/db/seed_data/states.md +151 -0
  39. data/db/seeds.rb +208 -0
  40. data/dbdoc/README.md +173 -0
  41. data/dbdoc/public.node_stats.md +48 -0
  42. data/dbdoc/public.node_stats.svg +41 -0
  43. data/dbdoc/public.node_tags.md +40 -0
  44. data/dbdoc/public.node_tags.svg +112 -0
  45. data/dbdoc/public.nodes.md +54 -0
  46. data/dbdoc/public.nodes.svg +118 -0
  47. data/dbdoc/public.nodes_tags.md +39 -0
  48. data/dbdoc/public.nodes_tags.svg +112 -0
  49. data/dbdoc/public.ontology_structure.md +48 -0
  50. data/dbdoc/public.ontology_structure.svg +38 -0
  51. data/dbdoc/public.operations_log.md +42 -0
  52. data/dbdoc/public.operations_log.svg +130 -0
  53. data/dbdoc/public.relationships.md +39 -0
  54. data/dbdoc/public.relationships.svg +41 -0
  55. data/dbdoc/public.robot_activity.md +46 -0
  56. data/dbdoc/public.robot_activity.svg +35 -0
  57. data/dbdoc/public.robots.md +35 -0
  58. data/dbdoc/public.robots.svg +90 -0
  59. data/dbdoc/public.schema_migrations.md +29 -0
  60. data/dbdoc/public.schema_migrations.svg +26 -0
  61. data/dbdoc/public.tags.md +35 -0
  62. data/dbdoc/public.tags.svg +60 -0
  63. data/dbdoc/public.topic_relationships.md +45 -0
  64. data/dbdoc/public.topic_relationships.svg +32 -0
  65. data/dbdoc/schema.json +1437 -0
  66. data/dbdoc/schema.svg +154 -0
  67. data/docs/api/database.md +806 -0
  68. data/docs/api/embedding-service.md +532 -0
  69. data/docs/api/htm.md +797 -0
  70. data/docs/api/index.md +259 -0
  71. data/docs/api/long-term-memory.md +1096 -0
  72. data/docs/api/working-memory.md +665 -0
  73. data/docs/architecture/adrs/001-postgresql-timescaledb.md +314 -0
  74. data/docs/architecture/adrs/002-two-tier-memory.md +411 -0
  75. data/docs/architecture/adrs/003-ollama-embeddings.md +421 -0
  76. data/docs/architecture/adrs/004-hive-mind.md +437 -0
  77. data/docs/architecture/adrs/005-rag-retrieval.md +531 -0
  78. data/docs/architecture/adrs/006-context-assembly.md +496 -0
  79. data/docs/architecture/adrs/007-eviction-strategy.md +645 -0
  80. data/docs/architecture/adrs/008-robot-identification.md +625 -0
  81. data/docs/architecture/adrs/009-never-forget.md +648 -0
  82. data/docs/architecture/adrs/010-redis-working-memory-rejected.md +323 -0
  83. data/docs/architecture/adrs/011-pgai-integration.md +494 -0
  84. data/docs/architecture/adrs/index.md +215 -0
  85. data/docs/architecture/hive-mind.md +736 -0
  86. data/docs/architecture/index.md +351 -0
  87. data/docs/architecture/overview.md +538 -0
  88. data/docs/architecture/two-tier-memory.md +873 -0
  89. data/docs/assets/css/custom.css +83 -0
  90. data/docs/assets/images/htm-core-components.svg +63 -0
  91. data/docs/assets/images/htm-database-schema.svg +93 -0
  92. data/docs/assets/images/htm-hive-mind-architecture.svg +125 -0
  93. data/docs/assets/images/htm-importance-scoring-framework.svg +83 -0
  94. data/docs/assets/images/htm-layered-architecture.svg +71 -0
  95. data/docs/assets/images/htm-long-term-memory-architecture.svg +115 -0
  96. data/docs/assets/images/htm-working-memory-architecture.svg +120 -0
  97. data/docs/assets/images/htm.jpg +0 -0
  98. data/docs/assets/images/htm_demo.gif +0 -0
  99. data/docs/assets/js/mathjax.js +18 -0
  100. data/docs/assets/videos/htm_video.mp4 +0 -0
  101. data/docs/database_rake_tasks.md +322 -0
  102. data/docs/development/contributing.md +787 -0
  103. data/docs/development/index.md +336 -0
  104. data/docs/development/schema.md +596 -0
  105. data/docs/development/setup.md +719 -0
  106. data/docs/development/testing.md +819 -0
  107. data/docs/guides/adding-memories.md +824 -0
  108. data/docs/guides/context-assembly.md +1009 -0
  109. data/docs/guides/getting-started.md +577 -0
  110. data/docs/guides/index.md +118 -0
  111. data/docs/guides/long-term-memory.md +941 -0
  112. data/docs/guides/multi-robot.md +866 -0
  113. data/docs/guides/recalling-memories.md +927 -0
  114. data/docs/guides/search-strategies.md +953 -0
  115. data/docs/guides/working-memory.md +717 -0
  116. data/docs/index.md +214 -0
  117. data/docs/installation.md +477 -0
  118. data/docs/multi_framework_support.md +519 -0
  119. data/docs/quick-start.md +655 -0
  120. data/docs/setup_local_database.md +302 -0
  121. data/docs/using_rake_tasks_in_your_app.md +383 -0
  122. data/examples/basic_usage.rb +93 -0
  123. data/examples/cli_app/README.md +317 -0
  124. data/examples/cli_app/htm_cli.rb +270 -0
  125. data/examples/custom_llm_configuration.rb +183 -0
  126. data/examples/example_app/Rakefile +71 -0
  127. data/examples/example_app/app.rb +206 -0
  128. data/examples/sinatra_app/Gemfile +21 -0
  129. data/examples/sinatra_app/app.rb +335 -0
  130. data/lib/htm/active_record_config.rb +113 -0
  131. data/lib/htm/configuration.rb +342 -0
  132. data/lib/htm/database.rb +594 -0
  133. data/lib/htm/embedding_service.rb +115 -0
  134. data/lib/htm/errors.rb +34 -0
  135. data/lib/htm/job_adapter.rb +154 -0
  136. data/lib/htm/jobs/generate_embedding_job.rb +65 -0
  137. data/lib/htm/jobs/generate_tags_job.rb +82 -0
  138. data/lib/htm/long_term_memory.rb +965 -0
  139. data/lib/htm/models/node.rb +109 -0
  140. data/lib/htm/models/node_tag.rb +33 -0
  141. data/lib/htm/models/robot.rb +52 -0
  142. data/lib/htm/models/tag.rb +76 -0
  143. data/lib/htm/railtie.rb +76 -0
  144. data/lib/htm/sinatra.rb +157 -0
  145. data/lib/htm/tag_service.rb +135 -0
  146. data/lib/htm/tasks.rb +38 -0
  147. data/lib/htm/version.rb +5 -0
  148. data/lib/htm/working_memory.rb +182 -0
  149. data/lib/htm.rb +400 -0
  150. data/lib/tasks/db.rake +19 -0
  151. data/lib/tasks/htm.rake +147 -0
  152. data/lib/tasks/jobs.rake +312 -0
  153. data/mkdocs.yml +190 -0
  154. data/scripts/install_local_database.sh +309 -0
  155. metadata +341 -0
@@ -0,0 +1,717 @@
1
+ # Working Memory Management
2
+
3
+ Working memory is HTM's token-limited active context system designed for immediate LLM use. This guide explains how it works, how to manage it effectively, and best practices for optimal performance.
4
+
5
+ ## What is Working Memory?
6
+
7
+ Working memory is an in-memory cache that:
8
+
9
+ - **Stores active memories** for fast access
10
+ - **Respects token limits** (default: 128,000 tokens)
11
+ - **Evicts old/unimportant memories** when full
12
+ - **Syncs with long-term memory** for durability
13
+
14
+ Think of it as RAM for your robot's consciousness - fast, limited, and volatile.
15
+
16
+ ## Architecture
17
+
18
+ ![Working Memory Architecture](../assets/images/htm-working-memory-architecture.svg)
19
+
20
+ ## Initialization
21
+
22
+ Configure working memory size when creating HTM:
23
+
24
+ ```ruby
25
+ # Default: 128K tokens (roughly 512KB of text)
26
+ htm = HTM.new(
27
+ robot_name: "Assistant",
28
+ working_memory_size: 128_000
29
+ )
30
+
31
+ # Large working memory for extensive context
32
+ htm = HTM.new(
33
+ robot_name: "Long Context Bot",
34
+ working_memory_size: 1_000_000 # 1M tokens
35
+ )
36
+
37
+ # Small working memory for focused tasks
38
+ htm = HTM.new(
39
+ robot_name: "Focused Bot",
40
+ working_memory_size: 32_000 # 32K tokens
41
+ )
42
+ ```
43
+
44
+ !!! tip "Choosing Memory Size"
45
+ - **32K-64K**: Focused tasks, single conversations
46
+ - **128K-256K**: General purpose, multiple topics (recommended)
47
+ - **512K-1M**: Extensive context, long sessions
48
+ - **>1M**: Specialized use cases only (memory overhead)
49
+
50
+ ## How Working Memory Works
51
+
52
+ ### Adding Memories
53
+
54
+ When you add a node, it goes to both working and long-term memory:
55
+
56
+ ```ruby
57
+ htm.add_node(
58
+ "fact_001",
59
+ "User prefers Ruby for scripting",
60
+ type: :fact,
61
+ importance: 7.0
62
+ )
63
+
64
+ # Internally:
65
+ # 1. Calculate token count
66
+ # 2. Store in long-term memory (PostgreSQL)
67
+ # 3. Add to working memory (in-memory)
68
+ # 4. Check capacity, evict if needed
69
+ ```
70
+
71
+ ### Recalling Memories
72
+
73
+ When you recall, memories are added to working memory:
74
+
75
+ ```ruby
76
+ memories = htm.recall(
77
+ timeframe: "last week",
78
+ topic: "database design"
79
+ )
80
+
81
+ # Internally:
82
+ # 1. Search long-term memory (RAG)
83
+ # 2. For each result:
84
+ # a. Check if space available
85
+ # b. Evict if needed
86
+ # c. Add to working memory
87
+ ```
88
+
89
+ ### Automatic Eviction
90
+
91
+ When working memory is full, HTM evicts memories using a smart algorithm:
92
+
93
+ ```ruby
94
+ # Algorithm:
95
+ # 1. Calculate eviction score = importance × recency
96
+ # 2. Sort by score (lowest first)
97
+ # 3. Evict until enough space
98
+ # 4. Mark as evicted in long-term memory
99
+ ```
100
+
101
+ !!! note
102
+ Evicted memories are **not deleted** - they remain in long-term memory and can be recalled later.
103
+
104
+ ## Monitoring Utilization
105
+
106
+ ### Basic Stats
107
+
108
+ ```ruby
109
+ wm = htm.working_memory
110
+
111
+ puts "Nodes: #{wm.node_count}"
112
+ puts "Tokens: #{wm.token_count} / #{wm.max_tokens}"
113
+ puts "Utilization: #{wm.utilization_percentage}%"
114
+ ```
115
+
116
+ ### Detailed Monitoring
117
+
118
+ ```ruby
119
+ class MemoryMonitor
120
+ def initialize(htm)
121
+ @htm = htm
122
+ end
123
+
124
+ def report
125
+ wm = @htm.working_memory
126
+ stats = @htm.memory_stats
127
+
128
+ puts "=== Working Memory Report ==="
129
+ puts "Capacity: #{wm.max_tokens} tokens"
130
+ puts "Used: #{wm.token_count} tokens (#{wm.utilization_percentage}%)"
131
+ puts "Free: #{wm.max_tokens - wm.token_count} tokens"
132
+ puts "Nodes: #{wm.node_count}"
133
+ puts
134
+ puts "Average tokens per node: #{wm.token_count / wm.node_count}" if wm.node_count > 0
135
+ puts
136
+ puts "=== Long-term Memory ==="
137
+ puts "Total nodes: #{stats[:total_nodes]}"
138
+ puts "Database size: #{(stats[:database_size] / 1024.0 / 1024.0).round(2)} MB"
139
+ end
140
+
141
+ def health_check
142
+ util = @htm.working_memory.utilization_percentage
143
+
144
+ case util
145
+ when 0..50
146
+ { status: :healthy, message: "Plenty of space" }
147
+ when 51..80
148
+ { status: :warning, message: "Approaching capacity" }
149
+ when 81..95
150
+ { status: :critical, message: "Nearly full, evictions likely" }
151
+ else
152
+ { status: :full, message: "At capacity, frequent evictions" }
153
+ end
154
+ end
155
+ end
156
+
157
+ monitor = MemoryMonitor.new(htm)
158
+ monitor.report
159
+ health = monitor.health_check
160
+ puts "Health: #{health[:status]} - #{health[:message]}"
161
+ ```
162
+
163
+ ## Eviction Behavior
164
+
165
+ ### Understanding Eviction
166
+
167
+ HTM evicts memories based on two factors:
168
+
169
+ 1. **Importance**: Higher importance = less likely to evict
170
+ 2. **Recency**: Newer memories = less likely to evict
171
+
172
+ ```ruby
173
+ # Eviction score calculation
174
+ score = importance × (1 / age_in_hours)
175
+
176
+ # Example scores:
177
+ # High importance (9.0), recent (1 hour): 9.0 × 1.0 = 9.0 (keep)
178
+ # High importance (9.0), old (24 hours): 9.0 × 0.042 = 0.38 (maybe evict)
179
+ # Low importance (2.0), recent (1 hour): 2.0 × 1.0 = 2.0 (evict soon)
180
+ # Low importance (2.0), old (24 hours): 2.0 × 0.042 = 0.08 (evict first)
181
+ ```
182
+
183
+ ### Eviction Example
184
+
185
+ ```ruby
186
+ # Fill working memory
187
+ htm = HTM.new(
188
+ robot_name: "Test",
189
+ working_memory_size: 10_000 # Small for demo
190
+ )
191
+
192
+ # Add important fact (will stay)
193
+ htm.add_node(
194
+ "critical",
195
+ "Critical system password",
196
+ importance: 10.0
197
+ )
198
+
199
+ # Add many low-importance items
200
+ 100.times do |i|
201
+ htm.add_node(
202
+ "temp_#{i}",
203
+ "Temporary note #{i}",
204
+ importance: 1.0
205
+ )
206
+ end
207
+
208
+ # Check what survived
209
+ wm = htm.working_memory
210
+ puts "Surviving nodes: #{wm.node_count}"
211
+
212
+ # Critical fact should still be there
213
+ critical = htm.retrieve("critical")
214
+ puts "Critical fact present: #{!critical.nil?}"
215
+ ```
216
+
217
+ ### Manual Eviction
218
+
219
+ You can trigger eviction manually:
220
+
221
+ ```ruby
222
+ # Access the eviction mechanism (internal API)
223
+ needed_tokens = 50_000
224
+
225
+ evicted = htm.working_memory.evict_to_make_space(needed_tokens)
226
+
227
+ puts "Evicted #{evicted.length} memories:"
228
+ evicted.each do |mem|
229
+ puts "- #{mem[:key]}: #{mem[:value][0..50]}..."
230
+ end
231
+ ```
232
+
233
+ !!! warning
234
+ Manual eviction is rarely needed. HTM handles this automatically during normal operations.
235
+
236
+ ## Best Practices
237
+
238
+ ### 1. Set Appropriate Importance
239
+
240
+ ```ruby
241
+ # Critical data: Never evict
242
+ htm.add_node(
243
+ "api_key",
244
+ "Production API key",
245
+ importance: 10.0
246
+ )
247
+
248
+ # Important context: Retain longer
249
+ htm.add_node(
250
+ "user_goal",
251
+ "User wants to optimize database",
252
+ importance: 8.0
253
+ )
254
+
255
+ # Temporary context: Evict when needed
256
+ htm.add_node(
257
+ "current_topic",
258
+ "Discussing query optimization",
259
+ importance: 5.0
260
+ )
261
+
262
+ # Disposable notes: Evict first
263
+ htm.add_node(
264
+ "scratch",
265
+ "Temporary calculation result",
266
+ importance: 1.0
267
+ )
268
+ ```
269
+
270
+ ### 2. Monitor Utilization Regularly
271
+
272
+ ```ruby
273
+ class WorkingMemoryManager
274
+ def initialize(htm, threshold: 80.0)
275
+ @htm = htm
276
+ @threshold = threshold
277
+ end
278
+
279
+ def check_and_warn
280
+ util = @htm.working_memory.utilization_percentage
281
+
282
+ if util > @threshold
283
+ warn "Working memory at #{util}%!"
284
+ warn "Consider increasing working_memory_size or reducing context"
285
+ end
286
+ end
287
+
288
+ def auto_adjust_importance
289
+ util = @htm.working_memory.utilization_percentage
290
+
291
+ # If critically full, boost importance of current context
292
+ if util > 90
293
+ # Implementation would require tracking current context keys
294
+ # and updating their importance in the database
295
+ warn "Critical capacity reached"
296
+ end
297
+ end
298
+ end
299
+ ```
300
+
301
+ ### 3. Use Context Strategically
302
+
303
+ Don't load unnecessary data into working memory:
304
+
305
+ ```ruby
306
+ # Bad: Load everything
307
+ all_memories = htm.recall(
308
+ timeframe: "all time",
309
+ topic: "anything",
310
+ limit: 1000
311
+ )
312
+ # This fills working memory with potentially irrelevant data
313
+
314
+ # Good: Load what you need
315
+ relevant = htm.recall(
316
+ timeframe: "last week",
317
+ topic: "current project",
318
+ limit: 20
319
+ )
320
+ # This keeps working memory focused
321
+ ```
322
+
323
+ ### 4. Clean Up When Done
324
+
325
+ Remove temporary memories:
326
+
327
+ ```ruby
328
+ def with_temporary_context(htm, key, value)
329
+ # Add temporary context
330
+ htm.add_node(key, value, type: :context, importance: 2.0)
331
+
332
+ yield
333
+
334
+ # Clean up
335
+ htm.forget(key, confirm: :confirmed)
336
+ end
337
+
338
+ with_temporary_context(htm, "scratch_001", "Temp data") do
339
+ # Use the temporary context
340
+ context = htm.create_context(strategy: :recent)
341
+ # ... do work
342
+ end
343
+ # Temp data is now removed
344
+ ```
345
+
346
+ ### 5. Batch Operations Carefully
347
+
348
+ Be mindful when adding many memories at once:
349
+
350
+ ```ruby
351
+ # Risky: Might fill working memory quickly
352
+ 1000.times do |i|
353
+ htm.add_node("item_#{i}", "Data #{i}", importance: 5.0)
354
+ end
355
+
356
+ # Better: Add with appropriate importance
357
+ 1000.times do |i|
358
+ htm.add_node(
359
+ "item_#{i}",
360
+ "Data #{i}",
361
+ importance: 3.0 # Lower importance for bulk data
362
+ )
363
+ end
364
+
365
+ # Or: Monitor during batch operations
366
+ batch_data.each_with_index do |data, i|
367
+ htm.add_node("item_#{i}", data, importance: 5.0)
368
+
369
+ # Check capacity every 100 items
370
+ if i % 100 == 0
371
+ util = htm.working_memory.utilization_percentage
372
+ puts "Utilization: #{util}%"
373
+ end
374
+ end
375
+ ```
376
+
377
+ ## Working Memory Strategies
378
+
379
+ ### Strategy 1: Sliding Window
380
+
381
+ Keep only recent memories:
382
+
383
+ ```ruby
384
+ class SlidingWindow
385
+ def initialize(htm, window_size: 50)
386
+ @htm = htm
387
+ @window_size = window_size
388
+ @keys = []
389
+ end
390
+
391
+ def add(key, value, **opts)
392
+ @htm.add_node(key, value, **opts)
393
+ @keys << key
394
+
395
+ # Evict oldest if window exceeded
396
+ if @keys.length > @window_size
397
+ oldest = @keys.shift
398
+ @htm.forget(oldest, confirm: :confirmed) rescue nil
399
+ end
400
+ end
401
+ end
402
+ ```
403
+
404
+ ### Strategy 2: Importance Thresholding
405
+
406
+ Only keep high-importance memories:
407
+
408
+ ```ruby
409
+ class ImportanceFilter
410
+ def initialize(htm, min_importance: 7.0)
411
+ @htm = htm
412
+ @min_importance = min_importance
413
+ end
414
+
415
+ def add(key, value, importance:, **opts)
416
+ @htm.add_node(key, value, importance: importance, **opts)
417
+
418
+ # If low importance and memory is tight, evict immediately
419
+ if importance < @min_importance &&
420
+ @htm.working_memory.utilization_percentage > 80
421
+
422
+ # Let it evict naturally or remove from working memory
423
+ # (Note: HTM doesn't expose direct working memory removal,
424
+ # so we rely on natural eviction)
425
+ end
426
+ end
427
+ end
428
+ ```
429
+
430
+ ### Strategy 3: Topic-Based Management
431
+
432
+ Group memories by topic and manage separately:
433
+
434
+ ```ruby
435
+ class TopicManager
436
+ def initialize(htm)
437
+ @htm = htm
438
+ @topics = Hash.new { |h, k| h[k] = [] }
439
+ end
440
+
441
+ def add(key, value, topic:, **opts)
442
+ @htm.add_node(key, value, **opts)
443
+ @topics[topic] << key
444
+ end
445
+
446
+ def clear_topic(topic)
447
+ keys = @topics[topic] || []
448
+ keys.each do |key|
449
+ @htm.forget(key, confirm: :confirmed) rescue nil
450
+ end
451
+ @topics.delete(topic)
452
+ end
453
+
454
+ def focus_on_topic(topic)
455
+ # Clear all other topics to make space
456
+ @topics.keys.each do |t|
457
+ clear_topic(t) unless t == topic
458
+ end
459
+ end
460
+ end
461
+ ```
462
+
463
+ ## Token Counting
464
+
465
+ HTM uses Tiktoken to count tokens:
466
+
467
+ ```ruby
468
+ # Token counts vary by content
469
+ short = "Hello world" # ~2 tokens
470
+ medium = "A" * 100 # ~25 tokens
471
+ long = "word " * 1000 # ~1000 tokens
472
+
473
+ # Check token count of a string
474
+ embedding_service = HTM::EmbeddingService.new
475
+ tokens = embedding_service.count_tokens(long)
476
+ puts "Token count: #{tokens}"
477
+ ```
478
+
479
+ !!! note "Token vs Characters"
480
+ - 1 token ≈ 4 characters (English)
481
+ - 128K tokens ≈ 512KB text
482
+ - Code uses fewer tokens per character
483
+ - Special characters use more tokens
484
+
485
+ ## Performance Considerations
486
+
487
+ ### Memory Overhead
488
+
489
+ Working memory has minimal overhead:
490
+
491
+ ```ruby
492
+ # Memory usage per node (approximate):
493
+ # - Key: ~50 bytes
494
+ # - Value: N bytes (your content)
495
+ # - Metadata: ~100 bytes
496
+ # - Total: ~150 bytes + content
497
+
498
+ # For 1000 nodes with 500-char content:
499
+ # 1000 × (150 + 500) = ~650KB
500
+
501
+ # Token count is stored but content dominates
502
+ ```
503
+
504
+ ### Access Speed
505
+
506
+ Working memory is very fast:
507
+
508
+ ```ruby
509
+ require 'benchmark'
510
+
511
+ htm = HTM.new(robot_name: "Perf Test")
512
+
513
+ # Add 1000 memories
514
+ 1000.times do |i|
515
+ htm.add_node("key_#{i}", "Value #{i}", importance: 5.0)
516
+ end
517
+
518
+ # Benchmark working memory access
519
+ Benchmark.bm do |x|
520
+ x.report("create_context:") do
521
+ 1000.times { htm.create_context(strategy: :balanced) }
522
+ end
523
+ end
524
+
525
+ # Typical results:
526
+ # create_context: ~1ms per call
527
+ ```
528
+
529
+ ### Optimization Tips
530
+
531
+ ```ruby
532
+ # 1. Avoid frequent context assembly
533
+ # Bad: Assemble context every message
534
+ def process_message(message)
535
+ context = htm.create_context # Slow if called frequently
536
+ llm.chat(context + message)
537
+ end
538
+
539
+ # Good: Cache context, update periodically
540
+ @context_cache = nil
541
+ @context_age = 0
542
+
543
+ def process_message(message)
544
+ if @context_cache.nil? || @context_age > 10
545
+ @context_cache = htm.create_context
546
+ @context_age = 0
547
+ end
548
+ @context_age += 1
549
+
550
+ llm.chat(@context_cache + message)
551
+ end
552
+
553
+ # 2. Use appropriate token limits
554
+ # Don't request more than your LLM can handle
555
+ context = htm.create_context(
556
+ strategy: :balanced,
557
+ max_tokens: 100_000 # Match LLM's context window
558
+ )
559
+
560
+ # 3. Monitor and adjust
561
+ util = htm.working_memory.utilization_percentage
562
+ if util > 90
563
+ # Reduce working memory size or increase eviction
564
+ end
565
+ ```
566
+
567
+ ## Debugging Working Memory
568
+
569
+ ### Inspecting Contents
570
+
571
+ ```ruby
572
+ class WorkingMemoryInspector
573
+ def initialize(htm)
574
+ @htm = htm
575
+ end
576
+
577
+ def show_contents
578
+ wm = @htm.working_memory
579
+
580
+ puts "=== Working Memory Contents ==="
581
+ puts "Total nodes: #{wm.node_count}"
582
+ puts "Total tokens: #{wm.token_count}"
583
+ puts
584
+
585
+ # Access internal structure (advanced)
586
+ # Note: This requires access to WorkingMemory internals
587
+ # For production, use public APIs only
588
+ end
589
+
590
+ def find_large_nodes(threshold: 1000)
591
+ # Find nodes using many tokens
592
+ # This would require iterating working memory
593
+ # (not directly exposed in current API)
594
+ end
595
+
596
+ def show_eviction_candidates
597
+ # Show which nodes would be evicted next
598
+ # Based on importance and recency
599
+ end
600
+ end
601
+ ```
602
+
603
+ ### Common Issues
604
+
605
+ **Issue: Working memory always full**
606
+
607
+ ```ruby
608
+ # Check if you're adding too much
609
+ stats = htm.memory_stats
610
+ wm_util = stats[:working_memory][:utilization]
611
+
612
+ if wm_util > 95
613
+ puts "Working memory consistently full"
614
+ puts "Solutions:"
615
+ puts "1. Increase working_memory_size"
616
+ puts "2. Lower importance of bulk data"
617
+ puts "3. Reduce recall limit"
618
+ puts "4. Clean up temporary data more frequently"
619
+ end
620
+ ```
621
+
622
+ **Issue: Important data getting evicted**
623
+
624
+ ```ruby
625
+ # Increase importance of critical data
626
+ htm.add_node(
627
+ "critical_data",
628
+ "Important information",
629
+ importance: 9.5 # High enough to avoid eviction
630
+ )
631
+ ```
632
+
633
+ **Issue: Memory utilization too low**
634
+
635
+ ```ruby
636
+ # Working memory underutilized
637
+ wm_util = htm.working_memory.utilization_percentage
638
+
639
+ if wm_util < 20
640
+ puts "Working memory underutilized"
641
+ puts "Consider:"
642
+ puts "1. Reducing working_memory_size to save RAM"
643
+ puts "2. Recalling more context"
644
+ puts "3. Using larger token limits in create_context"
645
+ end
646
+ ```
647
+
648
+ ## Next Steps
649
+
650
+ - [**Context Assembly**](context-assembly.md) - Use working memory effectively with LLMs
651
+ - [**Long-term Memory**](long-term-memory.md) - Understand persistent storage
652
+ - [**Adding Memories**](adding-memories.md) - Learn about importance scoring
653
+
654
+ ## Complete Example
655
+
656
+ ```ruby
657
+ require 'htm'
658
+
659
+ # Initialize with moderate working memory
660
+ htm = HTM.new(
661
+ robot_name: "Memory Manager",
662
+ working_memory_size: 128_000
663
+ )
664
+
665
+ # Monitor class
666
+ class Monitor
667
+ def initialize(htm)
668
+ @htm = htm
669
+ end
670
+
671
+ def report
672
+ wm = @htm.working_memory
673
+ puts "Utilization: #{wm.utilization_percentage}%"
674
+ puts "Nodes: #{wm.node_count}"
675
+ puts "Tokens: #{wm.token_count} / #{wm.max_tokens}"
676
+ end
677
+ end
678
+
679
+ monitor = Monitor.new(htm)
680
+
681
+ # Add memories with different importance
682
+ puts "Adding critical data..."
683
+ htm.add_node("critical", "Critical system data", importance: 10.0)
684
+ monitor.report
685
+
686
+ puts "\nAdding important data..."
687
+ 10.times do |i|
688
+ htm.add_node("important_#{i}", "Important item #{i}", importance: 8.0)
689
+ end
690
+ monitor.report
691
+
692
+ puts "\nAdding regular data..."
693
+ 50.times do |i|
694
+ htm.add_node("regular_#{i}", "Regular item #{i}", importance: 5.0)
695
+ end
696
+ monitor.report
697
+
698
+ puts "\nAdding temporary data..."
699
+ 100.times do |i|
700
+ htm.add_node("temp_#{i}", "Temporary item #{i}", importance: 2.0)
701
+ end
702
+ monitor.report
703
+
704
+ # Check what survived
705
+ puts "\n=== Survival Check ==="
706
+ critical = htm.retrieve("critical")
707
+ puts "Critical survived: #{!critical.nil?}"
708
+
709
+ # Create context
710
+ puts "\nCreating context..."
711
+ context = htm.create_context(strategy: :important, max_tokens: 50_000)
712
+ puts "Context length: #{context.length} characters"
713
+
714
+ # Final stats
715
+ puts "\n=== Final Stats ==="
716
+ monitor.report
717
+ ```