htm 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (109) hide show
  1. checksums.yaml +4 -4
  2. data/.envrc +1 -0
  3. data/.tbls.yml +30 -0
  4. data/CHANGELOG.md +30 -0
  5. data/SETUP.md +132 -101
  6. data/db/migrate/20250125000001_add_content_hash_to_nodes.rb +14 -0
  7. data/db/migrate/20250125000002_create_robot_nodes.rb +35 -0
  8. data/db/migrate/20250125000003_remove_source_and_robot_id_from_nodes.rb +28 -0
  9. data/db/migrate/20250126000001_create_working_memories.rb +19 -0
  10. data/db/migrate/20250126000002_remove_unused_columns.rb +12 -0
  11. data/db/schema.sql +226 -43
  12. data/docs/api/database.md +20 -232
  13. data/docs/api/embedding-service.md +1 -7
  14. data/docs/api/htm.md +195 -449
  15. data/docs/api/index.md +1 -7
  16. data/docs/api/long-term-memory.md +342 -590
  17. data/docs/architecture/adrs/001-postgresql-timescaledb.md +1 -1
  18. data/docs/architecture/adrs/003-ollama-embeddings.md +1 -1
  19. data/docs/architecture/adrs/010-redis-working-memory-rejected.md +2 -27
  20. data/docs/architecture/adrs/index.md +2 -13
  21. data/docs/architecture/hive-mind.md +165 -166
  22. data/docs/architecture/index.md +2 -2
  23. data/docs/architecture/overview.md +5 -171
  24. data/docs/architecture/two-tier-memory.md +1 -35
  25. data/docs/assets/images/adr-010-current-architecture.svg +37 -0
  26. data/docs/assets/images/adr-010-proposed-architecture.svg +48 -0
  27. data/docs/assets/images/adr-dependency-tree.svg +93 -0
  28. data/docs/assets/images/class-hierarchy.svg +55 -0
  29. data/docs/assets/images/exception-hierarchy.svg +45 -0
  30. data/docs/assets/images/htm-architecture-overview.svg +83 -0
  31. data/docs/assets/images/htm-complete-memory-flow.svg +160 -0
  32. data/docs/assets/images/htm-context-assembly-flow.svg +148 -0
  33. data/docs/assets/images/htm-eviction-process.svg +141 -0
  34. data/docs/assets/images/htm-memory-addition-flow.svg +138 -0
  35. data/docs/assets/images/htm-memory-recall-flow.svg +152 -0
  36. data/docs/assets/images/htm-node-states.svg +123 -0
  37. data/docs/assets/images/project-structure.svg +78 -0
  38. data/docs/assets/images/test-directory-structure.svg +38 -0
  39. data/{dbdoc → docs/database}/README.md +5 -3
  40. data/{dbdoc → docs/database}/public.node_tags.md +4 -5
  41. data/docs/database/public.node_tags.svg +106 -0
  42. data/{dbdoc → docs/database}/public.nodes.md +3 -8
  43. data/docs/database/public.nodes.svg +152 -0
  44. data/docs/database/public.robot_nodes.md +44 -0
  45. data/docs/database/public.robot_nodes.svg +121 -0
  46. data/{dbdoc → docs/database}/public.robots.md +1 -2
  47. data/docs/database/public.robots.svg +106 -0
  48. data/docs/database/public.working_memories.md +40 -0
  49. data/docs/database/public.working_memories.svg +112 -0
  50. data/{dbdoc → docs/database}/schema.json +342 -110
  51. data/docs/database/schema.svg +223 -0
  52. data/docs/development/index.md +1 -29
  53. data/docs/development/schema.md +84 -324
  54. data/docs/development/testing.md +1 -9
  55. data/docs/getting-started/index.md +47 -0
  56. data/docs/{installation.md → getting-started/installation.md} +2 -2
  57. data/docs/{quick-start.md → getting-started/quick-start.md} +5 -5
  58. data/docs/guides/adding-memories.md +221 -655
  59. data/docs/guides/search-strategies.md +85 -51
  60. data/docs/images/htm-er-diagram.svg +156 -0
  61. data/docs/index.md +16 -31
  62. data/docs/multi_framework_support.md +4 -4
  63. data/examples/basic_usage.rb +18 -16
  64. data/examples/cli_app/htm_cli.rb +86 -8
  65. data/examples/custom_llm_configuration.rb +1 -2
  66. data/examples/example_app/app.rb +11 -14
  67. data/examples/sinatra_app/Gemfile +1 -0
  68. data/examples/sinatra_app/Gemfile.lock +166 -0
  69. data/examples/sinatra_app/app.rb +219 -24
  70. data/lib/htm/active_record_config.rb +10 -3
  71. data/lib/htm/configuration.rb +265 -78
  72. data/lib/htm/{sinatra.rb → integrations/sinatra.rb} +87 -12
  73. data/lib/htm/job_adapter.rb +10 -3
  74. data/lib/htm/long_term_memory.rb +220 -57
  75. data/lib/htm/models/node.rb +36 -7
  76. data/lib/htm/models/robot.rb +30 -4
  77. data/lib/htm/models/robot_node.rb +50 -0
  78. data/lib/htm/models/tag.rb +52 -0
  79. data/lib/htm/models/working_memory_entry.rb +88 -0
  80. data/lib/htm/tasks.rb +4 -0
  81. data/lib/htm/version.rb +1 -1
  82. data/lib/htm.rb +34 -13
  83. data/lib/tasks/htm.rake +32 -1
  84. data/lib/tasks/jobs.rake +7 -3
  85. data/lib/tasks/tags.rake +34 -0
  86. data/mkdocs.yml +56 -9
  87. metadata +61 -31
  88. data/dbdoc/public.node_tags.svg +0 -112
  89. data/dbdoc/public.nodes.svg +0 -118
  90. data/dbdoc/public.robots.svg +0 -90
  91. data/dbdoc/schema.svg +0 -154
  92. /data/{dbdoc → docs/database}/public.node_stats.md +0 -0
  93. /data/{dbdoc → docs/database}/public.node_stats.svg +0 -0
  94. /data/{dbdoc → docs/database}/public.nodes_tags.md +0 -0
  95. /data/{dbdoc → docs/database}/public.nodes_tags.svg +0 -0
  96. /data/{dbdoc → docs/database}/public.ontology_structure.md +0 -0
  97. /data/{dbdoc → docs/database}/public.ontology_structure.svg +0 -0
  98. /data/{dbdoc → docs/database}/public.operations_log.md +0 -0
  99. /data/{dbdoc → docs/database}/public.operations_log.svg +0 -0
  100. /data/{dbdoc → docs/database}/public.relationships.md +0 -0
  101. /data/{dbdoc → docs/database}/public.relationships.svg +0 -0
  102. /data/{dbdoc → docs/database}/public.robot_activity.md +0 -0
  103. /data/{dbdoc → docs/database}/public.robot_activity.svg +0 -0
  104. /data/{dbdoc → docs/database}/public.schema_migrations.md +0 -0
  105. /data/{dbdoc → docs/database}/public.schema_migrations.svg +0 -0
  106. /data/{dbdoc → docs/database}/public.tags.md +0 -0
  107. /data/{dbdoc → docs/database}/public.tags.svg +0 -0
  108. /data/{dbdoc → docs/database}/public.topic_relationships.md +0 -0
  109. /data/{dbdoc → docs/database}/public.topic_relationships.svg +0 -0
@@ -4,744 +4,344 @@ This guide covers everything you need to know about storing information in HTM e
4
4
 
5
5
  ## Basic Usage
6
6
 
7
- The primary method for adding memories is `add_node`:
7
+ The primary method for adding memories is `remember`:
8
8
 
9
9
  ```ruby
10
- node_id = htm.add_node(
11
- key, # Unique identifier
12
- value, # Content (string)
13
- type: :fact, # Memory type
14
- category: nil, # Optional category
15
- importance: 1.0, # Importance score (0.0-10.0)
16
- related_to: [], # Array of related node keys
17
- tags: [] # Array of tags
18
- )
10
+ node_id = htm.remember(content, tags: [])
19
11
  ```
20
12
 
21
- The method returns the database ID of the created node.
22
-
23
- ## Memory Types Deep Dive
24
-
25
- HTM supports six memory types, each optimized for specific use cases.
13
+ **Parameters:**
26
14
 
27
- ### :fact - Immutable Facts
15
+ | Parameter | Type | Default | Description |
16
+ |-----------|------|---------|-------------|
17
+ | `content` | String | *required* | The information to remember |
18
+ | `tags` | Array\<String\> | `[]` | Manual tags to assign (in addition to auto-extracted tags) |
28
19
 
29
- Facts are unchanging truths about the world, users, or systems.
30
-
31
- ```ruby
32
- # User information
33
- htm.add_node(
34
- "user_name",
35
- "The user's name is Alice Thompson",
36
- type: :fact,
37
- importance: 9.0,
38
- tags: ["user", "identity"]
39
- )
40
-
41
- # System configuration
42
- htm.add_node(
43
- "system_timezone",
44
- "System timezone is UTC",
45
- type: :fact,
46
- importance: 6.0,
47
- tags: ["system", "config"]
48
- )
49
-
50
- # Domain knowledge
51
- htm.add_node(
52
- "fact_photosynthesis",
53
- "Photosynthesis converts light energy into chemical energy in plants",
54
- type: :fact,
55
- importance: 7.0,
56
- tags: ["biology", "science"]
57
- )
58
- ```
20
+ The method returns the database ID of the created node.
59
21
 
60
- !!! tip "When to Use :fact"
61
- - User profile information (name, email, preferences)
62
- - System configuration that rarely changes
63
- - Scientific facts or domain knowledge
64
- - Historical events
65
- - API endpoints and credentials
22
+ ## How Remember Works
66
23
 
67
- ### :context - Conversation State
24
+ When you call `remember()`:
68
25
 
69
- Context captures the current state of conversations or sessions.
26
+ 1. **Content hashing**: A SHA-256 hash of the content is computed
27
+ 2. **Deduplication check**: If a node with the same hash exists, reuse it
28
+ 3. **Node creation/linking**: Create new node OR link robot to existing node
29
+ 4. **Working memory**: Add node to working memory (evict if needed)
30
+ 5. **Background jobs**: Enqueue embedding and tag generation (async)
70
31
 
71
32
  ```ruby
72
- # Current conversation topic
73
- htm.add_node(
74
- "context_#{session_id}_001",
75
- "User is asking about database performance optimization",
76
- type: :context,
77
- importance: 6.0,
78
- tags: ["conversation", "current"]
79
- )
80
-
81
- # Conversation mood
82
- htm.add_node(
83
- "context_mood",
84
- "User seems frustrated with slow query times",
85
- type: :context,
86
- importance: 7.0,
87
- tags: ["conversation", "sentiment"]
88
- )
33
+ # First robot remembers something
34
+ node_id = htm.remember("PostgreSQL supports vector similarity search")
35
+ # => 123 (new node created)
89
36
 
90
- # Current task
91
- htm.add_node(
92
- "context_task",
93
- "Helping user optimize their PostgreSQL queries",
94
- type: :context,
95
- importance: 8.0,
96
- tags: ["task", "active"]
97
- )
37
+ # Same content remembered again (by same or different robot)
38
+ node_id = htm.remember("PostgreSQL supports vector similarity search")
39
+ # => 123 (same node_id returned, just updates remember_count)
98
40
  ```
99
41
 
100
- !!! tip "When to Use :context"
101
- - Current conversation topics
102
- - Session state
103
- - Temporary workflow status
104
- - User's current goals or questions
105
- - Conversation sentiment or mood
106
-
107
- !!! note
108
- Context memories are typically lower importance (4-6) since they become outdated quickly. They'll naturally get evicted from working memory as new context arrives.
42
+ ## Content Types
109
43
 
110
- ### :code - Code Snippets and Patterns
44
+ HTM doesn't enforce content types - just store meaningful text that stands alone:
111
45
 
112
- Store code examples, patterns, and technical solutions.
46
+ ### Facts
113
47
 
114
48
  ```ruby
115
- # Function example
116
- htm.add_node(
117
- "code_date_parser",
118
- <<~CODE,
119
- def parse_date(date_string)
120
- Date.parse(date_string)
121
- rescue ArgumentError
122
- nil
123
- end
124
- CODE
125
- type: :code,
126
- importance: 6.0,
127
- tags: ["ruby", "date", "parsing"]
128
- )
49
+ # User information
50
+ htm.remember("The user's name is Alice Thompson")
129
51
 
130
- # SQL query pattern
131
- htm.add_node(
132
- "code_user_query",
133
- <<~SQL,
134
- SELECT u.id, u.name, COUNT(o.id) as order_count
135
- FROM users u
136
- LEFT JOIN orders o ON u.id = o.user_id
137
- GROUP BY u.id, u.name
138
- HAVING COUNT(o.id) > 10
139
- SQL
140
- type: :code,
141
- category: "sql",
142
- importance: 7.0,
143
- tags: ["sql", "aggregation", "joins"]
144
- )
52
+ # System configuration
53
+ htm.remember("System timezone is UTC")
145
54
 
146
- # Configuration example
147
- htm.add_node(
148
- "code_redis_config",
149
- <<~YAML,
150
- redis:
151
- host: localhost
152
- port: 6379
153
- pool_size: 5
154
- timeout: 2
155
- YAML
156
- type: :code,
157
- category: "config",
158
- importance: 5.0,
159
- tags: ["redis", "configuration", "yaml"]
160
- )
55
+ # Domain knowledge
56
+ htm.remember("Photosynthesis converts light energy into chemical energy in plants")
161
57
  ```
162
58
 
163
- !!! tip "When to Use :code"
164
- - Reusable code snippets
165
- - Configuration examples
166
- - SQL queries and patterns
167
- - API request/response examples
168
- - Algorithm implementations
169
- - Regular expressions
170
-
171
- ### :preference - User Preferences
172
-
173
- Store user preferences and settings.
59
+ ### Preferences
174
60
 
175
61
  ```ruby
176
62
  # Communication style
177
- htm.add_node(
178
- "pref_communication",
179
- "User prefers concise answers with bullet points",
180
- type: :preference,
181
- importance: 8.0,
182
- tags: ["user", "communication", "style"]
183
- )
63
+ htm.remember("User prefers concise answers with bullet points")
184
64
 
185
65
  # Technical preferences
186
- htm.add_node(
187
- "pref_language",
188
- "User prefers Ruby over Python for scripting tasks",
189
- type: :preference,
190
- importance: 7.0,
191
- tags: ["user", "programming", "language"]
192
- )
193
-
194
- # UI preferences
195
- htm.add_node(
196
- "pref_theme",
197
- "User uses dark theme in their IDE",
198
- type: :preference,
199
- importance: 4.0,
200
- tags: ["user", "ui", "theme"]
201
- )
202
-
203
- # Work preferences
204
- htm.add_node(
205
- "pref_working_hours",
206
- "User typically codes in the morning, prefers design work in afternoon",
207
- type: :preference,
208
- importance: 5.0,
209
- tags: ["user", "schedule", "productivity"]
210
- )
66
+ htm.remember("User prefers Ruby over Python for scripting tasks")
211
67
  ```
212
68
 
213
- !!! tip "When to Use :preference"
214
- - Communication style preferences
215
- - Technical tool preferences
216
- - UI/UX preferences
217
- - Work habits and patterns
218
- - Learning style preferences
219
-
220
- ### :decision - Architectural Decisions
221
-
222
- Track important decisions with rationale.
69
+ ### Decisions
223
70
 
224
71
  ```ruby
225
72
  # Technology choice
226
- htm.add_node(
227
- "decision_database",
228
- <<~DECISION,
229
- Decision: Use PostgreSQL with TimescaleDB for HTM storage
230
-
231
- Rationale:
232
- - Excellent time-series optimization
233
- - Native vector search with pgvector
234
- - Strong consistency guarantees
235
- - Mature ecosystem
236
-
237
- Alternatives considered:
238
- - MongoDB (rejected: eventual consistency issues)
239
- - Redis (rejected: limited persistence)
240
- DECISION
241
- type: :decision,
242
- category: "architecture",
243
- importance: 9.5,
244
- tags: ["architecture", "database", "timescaledb"]
245
- )
246
-
247
- # Design pattern choice
248
- htm.add_node(
249
- "decision_memory_architecture",
250
- <<~DECISION,
251
- Decision: Implement two-tier memory (working + long-term)
252
-
253
- Rationale:
254
- - Working memory provides fast access
255
- - Long-term memory ensures durability
256
- - Mirrors human memory architecture
257
- - Allows token-limited LLM context
258
-
259
- Trade-offs:
260
- - Added complexity in synchronization
261
- - Eviction strategy needs tuning
262
- DECISION
263
- type: :decision,
264
- category: "architecture",
265
- importance: 10.0,
266
- tags: ["architecture", "memory", "design-pattern"]
267
- )
268
-
269
- # Process decision
270
- htm.add_node(
271
- "decision_testing",
272
- "Decided to use Minitest over RSpec for simplicity and speed",
273
- type: :decision,
274
- category: "process",
275
- importance: 6.0,
276
- tags: ["testing", "tools"]
277
- )
73
+ htm.remember(<<~DECISION)
74
+ Decision: Use PostgreSQL with pgvector for HTM storage
75
+
76
+ Rationale:
77
+ - Excellent vector search via pgvector
78
+ - Strong consistency guarantees
79
+ - Mature ecosystem
80
+
81
+ Alternatives considered:
82
+ - MongoDB (rejected: eventual consistency issues)
83
+ - Redis (rejected: limited persistence)
84
+ DECISION
278
85
  ```
279
86
 
280
- !!! tip "When to Use :decision"
281
- - Technology selections
282
- - Architecture patterns
283
- - API design choices
284
- - Process decisions
285
- - Trade-off analysis results
286
-
287
- !!! note "Decision Template"
288
- Include: what was decided, why, alternatives considered, and trade-offs. This context helps future decision-making.
289
-
290
- ### :question - Unresolved Questions
291
-
292
- Track questions that need answering.
87
+ ### Code Snippets
293
88
 
294
89
  ```ruby
295
- # Technical question
296
- htm.add_node(
297
- "question_caching",
298
- "Should we implement Redis caching for frequently accessed memories?",
299
- type: :question,
300
- importance: 7.0,
301
- tags: ["performance", "caching", "open"]
302
- )
303
-
304
- # Design question
305
- htm.add_node(
306
- "question_auth",
307
- "How should we handle authentication for multi-robot scenarios?",
308
- type: :question,
309
- importance: 8.0,
310
- tags: ["security", "architecture", "open"]
311
- )
312
-
313
- # Research question
314
- htm.add_node(
315
- "question_embeddings",
316
- "Would fine-tuning embeddings on our domain improve recall accuracy?",
317
- type: :question,
318
- importance: 6.0,
319
- tags: ["embeddings", "research", "open"]
320
- )
321
- ```
322
-
323
- !!! tip "When to Use :question"
324
- - Open technical questions
325
- - Design uncertainties
326
- - Research topics to investigate
327
- - Feature requests to evaluate
328
- - Performance questions
329
-
330
- !!! tip "Closing Questions"
331
- When a question is answered, add a related decision node and mark the question as resolved by updating its tags.
332
-
333
- ## Importance Scoring Guidelines
334
-
335
- The importance score (0.0-10.0) determines memory retention and eviction priority.
336
-
337
- ![Importance Scoring Framework](../assets/images/htm-importance-scoring-framework.svg)
338
-
339
- ### Scoring Framework
90
+ # Function example
91
+ htm.remember(<<~CODE)
92
+ def parse_date(date_string)
93
+ Date.parse(date_string)
94
+ rescue ArgumentError
95
+ nil
96
+ end
97
+ CODE
340
98
 
341
- ```ruby
342
- # Critical (9.0-10.0): Must never lose
343
- htm.add_node("api_key", "Production API key: ...", importance: 10.0)
344
- htm.add_node("decision_architecture", "Core architecture decision", importance: 9.5)
345
-
346
- # High (7.0-8.9): Very important, high retention
347
- htm.add_node("user_identity", "User's name and email", importance: 8.0)
348
- htm.add_node("major_decision", "Chose Rails for web framework", importance: 7.5)
349
-
350
- # Medium (4.0-6.9): Moderately important
351
- htm.add_node("code_snippet", "Useful utility function", importance: 6.0)
352
- htm.add_node("context_current", "Current conversation topic", importance: 5.0)
353
- htm.add_node("preference_minor", "Prefers tabs over spaces", importance: 4.0)
354
-
355
- # Low (1.0-3.9): Nice to have, can evict
356
- htm.add_node("temp_note", "Check logs later", importance: 3.0)
357
- htm.add_node("minor_context", "Mentioned weather briefly", importance: 2.0)
358
- htm.add_node("throwaway", "Temporary calculation result", importance: 1.0)
99
+ # SQL query pattern
100
+ htm.remember(<<~SQL)
101
+ SELECT u.id, u.name, COUNT(o.id) as order_count
102
+ FROM users u
103
+ LEFT JOIN orders o ON u.id = o.user_id
104
+ GROUP BY u.id, u.name
105
+ HAVING COUNT(o.id) > 10
106
+ SQL
359
107
  ```
360
108
 
361
- ### Importance by Type
362
-
363
- Typical importance ranges for each type:
364
-
365
- | Type | Typical Range | Example |
366
- |------|---------------|---------|
367
- | `:fact` | 7.0-10.0 | User identity, system facts |
368
- | `:decision` | 7.0-10.0 | Architecture, major choices |
369
- | `:preference` | 4.0-8.0 | User preferences |
370
- | `:code` | 4.0-7.0 | Code snippets, examples |
371
- | `:context` | 3.0-6.0 | Conversation state |
372
- | `:question` | 5.0-8.0 | Open questions |
109
+ ## Using Tags
373
110
 
374
- !!! warning "Importance Affects Eviction"
375
- When working memory is full, HTM evicts memories with lower importance first. Set importance thoughtfully based on long-term value.
111
+ Tags provide hierarchical organization for your memories. HTM automatically extracts tags from content, but you can also specify manual tags.
376
112
 
377
- ## Adding Relationships
113
+ ### Hierarchical Tag Convention
378
114
 
379
- Link related memories to build a knowledge graph:
115
+ Use colons to create hierarchical namespaces:
380
116
 
381
117
  ```ruby
382
- # Add a decision
383
- htm.add_node(
384
- "decision_database",
385
- "Use PostgreSQL for data storage",
386
- type: :decision,
387
- importance: 9.0
118
+ # Manual tags with hierarchy
119
+ htm.remember(
120
+ "PostgreSQL 17 adds MERGE statement improvements",
121
+ tags: ["database:postgresql", "database:sql", "version:17"]
388
122
  )
389
123
 
390
- # Add related implementation code
391
- htm.add_node(
392
- "code_db_connection",
393
- "PG.connect(ENV['DATABASE_URL'])",
394
- type: :code,
395
- importance: 6.0,
396
- related_to: ["decision_database"]
397
- )
398
-
399
- # Add related configuration
400
- htm.add_node(
401
- "fact_db_config",
402
- "Database uses connection pool of size 5",
403
- type: :fact,
404
- importance: 7.0,
405
- related_to: ["decision_database", "code_db_connection"]
406
- )
407
- ```
408
-
409
- !!! tip "Relationship Patterns"
410
- - Link implementation code to decisions
411
- - Connect questions to related facts
412
- - Link preferences to user facts
413
- - Connect related decisions (e.g., database choice → ORM choice)
414
-
415
- ## Categorization with Tags
416
-
417
- Tags enable flexible organization and retrieval:
418
-
419
- ```ruby
420
- # Use multiple tags for rich categorization
421
- htm.add_node(
422
- "decision_api_design",
423
- "RESTful API with JSON responses",
424
- type: :decision,
425
- importance: 8.0,
426
- tags: [
427
- "api", # Domain
428
- "rest", # Approach
429
- "architecture", # Category
430
- "backend", # Layer
431
- "json", # Format
432
- "http" # Protocol
433
- ]
434
- )
124
+ # Tags are used in hybrid search for relevance boosting
125
+ # A recall for "postgresql" will boost nodes with matching tags
435
126
  ```
436
127
 
437
128
  ### Tag Naming Conventions
438
129
 
439
130
  ```ruby
440
- # Good: Consistent, lowercase, descriptive
441
- tags: ["user", "authentication", "security", "oauth"]
131
+ # Good: Consistent, lowercase, hierarchical
132
+ tags: ["database:postgresql", "architecture:api", "security:authentication"]
442
133
 
443
- # Avoid: Inconsistent casing, vague terms
444
- tags: ["User", "auth", "stuff", "misc"]
134
+ # Avoid: Inconsistent casing, flat tags, vague terms
135
+ tags: ["PostgreSQL", "stuff", "misc"]
445
136
  ```
446
137
 
447
138
  ### Common Tag Patterns
448
139
 
449
140
  ```ruby
450
141
  # Domain tags
451
- tags: ["database", "api", "ui", "auth", "billing"]
142
+ tags: ["database:postgresql", "api:rest", "auth:jwt"]
452
143
 
453
144
  # Layer tags
454
- tags: ["frontend", "backend", "infrastructure", "data"]
455
-
456
- # Status tags
457
- tags: ["active", "deprecated", "experimental", "stable"]
145
+ tags: ["layer:frontend", "layer:backend", "layer:infrastructure"]
458
146
 
459
- # Priority tags
460
- tags: ["critical", "high-priority", "low-priority"]
147
+ # Technology tags
148
+ tags: ["tech:ruby", "tech:javascript", "tech:docker"]
461
149
 
462
150
  # Project tags
463
- tags: ["project-alpha", "project-beta"]
151
+ tags: ["project:alpha", "project:beta"]
464
152
  ```
465
153
 
466
- ## Advanced Patterns
154
+ ### Automatic Tag Extraction
467
155
 
468
- ### Timestamped Entries
469
-
470
- Create time-series logs:
156
+ When a node is created, a background job (GenerateTagsJob) automatically extracts hierarchical tags from the content using an LLM. This happens asynchronously.
471
157
 
472
158
  ```ruby
473
- def log_event(event_type, description)
474
- timestamp = Time.now.to_i
475
-
476
- htm.add_node(
477
- "event_#{event_type}_#{timestamp}",
478
- "#{event_type.upcase}: #{description}",
479
- type: :context,
480
- importance: 5.0,
481
- tags: ["event", event_type, "log"]
482
- )
483
- end
484
-
485
- log_event("error", "Database connection timeout")
486
- log_event("performance", "Query took 3.2 seconds")
159
+ # Just provide content, tags are auto-extracted
160
+ htm.remember("We're using Redis for session caching with a 24-hour TTL")
161
+ # Background job might extract: ["database:redis", "caching:session", "performance"]
487
162
  ```
488
163
 
489
- ### Versioned Information
490
-
491
- Track changes over time:
164
+ ## Content Deduplication
492
165
 
493
- ```ruby
494
- def update_fact(base_key, new_value, version)
495
- # Add versioned node
496
- htm.add_node(
497
- "#{base_key}_v#{version}",
498
- new_value,
499
- type: :fact,
500
- importance: 8.0,
501
- tags: ["versioned", "v#{version}"],
502
- related_to: version > 1 ? ["#{base_key}_v#{version-1}"] : []
503
- )
504
- end
505
-
506
- update_fact("user_email", "alice@example.com", 1)
507
- update_fact("user_email", "alice@newdomain.com", 2)
508
- ```
166
+ HTM automatically deduplicates content across all robots using SHA-256 hashing.
509
167
 
510
- ### Compound Memories
511
-
512
- Store structured information:
168
+ ### How It Works
513
169
 
514
170
  ```ruby
515
- # User profile as compound memory
516
- user_profile = {
517
- name: "Alice Thompson",
518
- email: "alice@example.com",
519
- role: "Senior Engineer",
520
- joined: "2023-01-15"
521
- }.map { |k, v| "#{k}: #{v}" }.join("\n")
522
-
523
- htm.add_node(
524
- "user_profile_001",
525
- user_profile,
526
- type: :fact,
527
- importance: 9.0,
528
- tags: ["user", "profile", "complete"]
529
- )
171
+ # Robot 1 remembers something
172
+ robot1 = HTM.new(robot_name: "assistant_1")
173
+ node_id = robot1.remember("Ruby 3.3 supports YJIT by default")
174
+ # => 123 (new node)
175
+
176
+ # Robot 2 remembers the same thing
177
+ robot2 = HTM.new(robot_name: "assistant_2")
178
+ node_id = robot2.remember("Ruby 3.3 supports YJIT by default")
179
+ # => 123 (same node_id! Content matched by hash)
530
180
  ```
531
181
 
532
- ### Conditional Importance
182
+ ### Robot-Node Association
533
183
 
534
- Adjust importance based on context:
184
+ Each robot-node relationship is tracked in `robot_nodes`:
535
185
 
536
186
  ```ruby
537
- def add_memory_with_context(key, value, type, base_importance, current_project)
538
- # Boost importance for current project
539
- importance = base_importance
540
- importance += 2.0 if tags.include?(current_project)
541
- importance = [importance, 10.0].min # Cap at 10.0
542
-
543
- htm.add_node(
544
- key,
545
- value,
546
- type: type,
547
- importance: importance,
548
- tags: [current_project, type.to_s]
549
- )
550
- end
187
+ # Check how many times a robot has "remembered" content
188
+ rn = HTM::Models::RobotNode.find_by(robot_id: htm.robot_id, node_id: node_id)
189
+ rn.remember_count # => 3 (remembered 3 times)
190
+ rn.first_remembered_at # => When first encountered
191
+ rn.last_remembered_at # => When last tried to remember
551
192
  ```
552
193
 
553
194
  ## Best Practices
554
195
 
555
- ### 1. Use Descriptive Keys
556
-
557
- ```ruby
558
- # Good: Descriptive and namespaced
559
- "user_profile_alice_001"
560
- "decision_database_selection"
561
- "code_authentication_jwt"
562
-
563
- # Bad: Vague or collision-prone
564
- "profile"
565
- "dec1"
566
- "code"
567
- ```
568
-
569
- ### 2. Be Consistent with Categories
196
+ ### 1. Make Content Self-Contained
570
197
 
571
198
  ```ruby
572
- # Define standard categories
573
- CATEGORIES = {
574
- architecture: "architecture",
575
- security: "security",
576
- performance: "performance",
577
- ui: "user-interface"
578
- }
579
-
580
- htm.add_node(
581
- key, value,
582
- category: CATEGORIES[:architecture]
199
+ # Good: Self-contained, understandable without context
200
+ htm.remember(
201
+ "Decided to use Redis for session storage because it provides fast access and automatic expiration"
583
202
  )
203
+
204
+ # Bad: Requires external context
205
+ htm.remember("Use Redis") # Why? For what?
584
206
  ```
585
207
 
586
- ### 3. Include Context in Values
208
+ ### 2. Include Rich Context
587
209
 
588
210
  ```ruby
589
- # Good: Self-contained
590
- htm.add_node(
591
- "decision_001",
592
- "Decided to use Redis for session storage because it provides fast access and automatic expiration",
593
- type: :decision
594
- )
595
-
596
- # Bad: Requires external context
597
- htm.add_node(
598
- "decision_001",
599
- "Use Redis", # Why? For what?
600
- type: :decision
601
- )
211
+ # Good: Includes rationale and alternatives
212
+ htm.remember(<<~DECISION)
213
+ Decision: Use OAuth 2.0 for authentication
214
+
215
+ Rationale:
216
+ - Industry standard
217
+ - Better security than basic auth
218
+ - Supports SSO
219
+
220
+ Alternatives considered:
221
+ - Basic auth (rejected: security concerns)
222
+ - Custom tokens (rejected: maintenance burden)
223
+ DECISION
602
224
  ```
603
225
 
604
- ### 4. Tag Generously
226
+ ### 3. Use Hierarchical Tags
605
227
 
606
228
  ```ruby
607
229
  # Good: Rich tags for multiple retrieval paths
608
- htm.add_node(
609
- "code_api_auth",
610
- "...",
611
- tags: ["api", "authentication", "security", "jwt", "middleware", "ruby"]
230
+ htm.remember(
231
+ "JWT tokens are stateless authentication tokens",
232
+ tags: ["auth:jwt", "security:tokens", "architecture:stateless"]
612
233
  )
613
234
 
614
- # Suboptimal: Minimal tags
615
- htm.add_node(
616
- "code_api_auth",
617
- "...",
618
- tags: ["code"]
619
- )
235
+ # Suboptimal: Flat or minimal tags
236
+ htm.remember("JWT info", tags: ["jwt"])
620
237
  ```
621
238
 
622
- ### 5. Use Relationships to Build Context
239
+ ### 4. Keep Content Focused
623
240
 
624
241
  ```ruby
625
- # Create a narrative with relationships
626
- decision_id = htm.add_node("decision_api", "Use GraphQL", type: :decision)
627
-
628
- htm.add_node(
629
- "question_api",
630
- "How to handle file uploads in GraphQL?",
631
- type: :question,
632
- related_to: ["decision_api"]
633
- )
242
+ # Good: One concept per memory
243
+ htm.remember("PostgreSQL's EXPLAIN ANALYZE shows actual execution times")
244
+ htm.remember("PostgreSQL's EXPLAIN shows the query plan without executing")
634
245
 
635
- htm.add_node(
636
- "code_upload",
637
- "GraphQL upload implementation",
638
- type: :code,
639
- related_to: ["decision_api", "question_api"]
640
- )
246
+ # Suboptimal: Multiple unrelated concepts
247
+ htm.remember("PostgreSQL has EXPLAIN and also supports JSON and has good performance")
641
248
  ```
642
249
 
643
- ## Common Pitfalls
644
-
645
- ### Pitfall 1: Duplicate Keys
250
+ ## Async Processing
646
251
 
647
- ```ruby
648
- # This will fail - keys must be unique
649
- htm.add_node("user_001", "Alice")
650
- htm.add_node("user_001", "Bob") # Error: key already exists
651
- ```
252
+ Embedding generation and tag extraction happen asynchronously:
652
253
 
653
- **Solution**: Use unique keys with timestamps or UUIDs:
254
+ ### Workflow
654
255
 
655
256
  ```ruby
656
- require 'securerandom'
257
+ # 1. Node created immediately (~15ms)
258
+ node_id = htm.remember("Important fact about databases")
259
+ # Returns immediately with node_id
657
260
 
658
- htm.add_node("user_#{SecureRandom.hex(4)}", "Alice")
659
- htm.add_node("user_#{SecureRandom.hex(4)}", "Bob")
261
+ # 2. Background jobs enqueue (async)
262
+ # - GenerateEmbeddingJob runs (~100ms)
263
+ # - GenerateTagsJob runs (~1 second)
264
+
265
+ # 3. Node is eventually enriched
266
+ # - embedding field populated (enables vector search)
267
+ # - tags associated (enables tag navigation and boosting)
660
268
  ```
661
269
 
662
- ### Pitfall 2: Too-High Importance
270
+ ### Immediate vs Eventual Capabilities
663
271
 
664
- ```ruby
665
- # Don't make everything critical
666
- htm.add_node("note", "Random thought", importance: 10.0) # Too high!
667
- ```
272
+ | Capability | Available | Notes |
273
+ |------------|-----------|-------|
274
+ | Full-text search | Immediately | Works on content |
275
+ | Basic retrieval | Immediately | By node ID |
276
+ | Vector search | After ~100ms | Needs embedding |
277
+ | Tag-enhanced search | After ~1s | Needs tags |
278
+ | Hybrid search | After ~1s | Needs embedding + tags |
668
279
 
669
- **Solution**: Reserve high importance (9-10) for truly critical data.
280
+ ## Working Memory Integration
670
281
 
671
- ### Pitfall 3: Missing Context
282
+ When you `remember()`, the node is automatically added to working memory:
672
283
 
673
284
  ```ruby
674
- # Bad: No context
675
- htm.add_node("decision", "Chose option A", type: :decision)
676
-
677
- # Good: Include rationale
678
- htm.add_node(
679
- "decision_auth",
680
- "Chose OAuth 2.0 for authentication because it provides better security and is industry standard",
681
- type: :decision
682
- )
285
+ # Remember adds to both LTM and WM
286
+ htm.remember("Important fact")
287
+
288
+ # Check working memory
289
+ stats = htm.working_memory.stats
290
+ puts "Nodes in WM: #{stats[:node_count]}"
291
+ puts "Token usage: #{stats[:utilization]}%"
683
292
  ```
684
293
 
685
- ### Pitfall 4: No Tags
294
+ ### Eviction
295
+
296
+ If working memory is full, older/less important nodes are evicted to make room:
686
297
 
687
298
  ```ruby
688
- # Harder to find later
689
- htm.add_node("code_001", "def foo...", type: :code)
690
-
691
- # Better: Tags enable multiple retrieval paths
692
- htm.add_node(
693
- "code_001",
694
- "def foo...",
695
- type: :code,
696
- tags: ["ruby", "functions", "utilities"]
697
- )
299
+ # Working memory has a token budget
300
+ htm = HTM.new(working_memory_size: 128_000) # 128K tokens
301
+
302
+ # As you remember more, older items may be evicted from WM
303
+ # They remain in LTM and can be recalled later
698
304
  ```
699
305
 
700
306
  ## Performance Considerations
701
307
 
702
308
  ### Batch Operations
703
309
 
704
- When adding many memories, consider transaction efficiency:
310
+ Each `remember()` call is a database operation. For bulk inserts:
705
311
 
706
312
  ```ruby
707
- # Instead of many individual adds
708
- memories = [
709
- {key: "fact_001", value: "...", type: :fact},
710
- {key: "fact_002", value: "...", type: :fact},
711
- # ... many more
313
+ # Multiple memories
314
+ facts = [
315
+ "PostgreSQL supports JSONB",
316
+ "PostgreSQL has excellent indexing",
317
+ "PostgreSQL handles concurrent writes well"
712
318
  ]
713
319
 
714
- # Add them efficiently
715
- memories.each do |m|
716
- htm.add_node(m[:key], m[:value], type: m[:type], importance: m[:importance])
320
+ facts.each do |fact|
321
+ htm.remember(fact)
717
322
  end
718
323
  ```
719
324
 
720
- !!! note
721
- Each `add_node` call generates embeddings via Ollama. For large batches, this can take time. Consider adding in the background or showing progress.
722
-
723
- ### Embedding Generation
325
+ ### Content Length
724
326
 
725
- Embedding generation has a cost:
327
+ Longer content takes more time to process:
726
328
 
727
329
  ```ruby
728
- # Short text: Fast (~50ms)
729
- htm.add_node("fact", "User name is Alice", ...)
330
+ # Short text: Fast (~15ms save, ~100ms embedding)
331
+ htm.remember("User name is Alice")
730
332
 
731
- # Long text: Slower (~500ms)
732
- htm.add_node("code", "..." * 1000, ...) # 1000 chars
333
+ # Long text: Slower (~15ms save, ~500ms embedding)
334
+ htm.remember("..." * 1000) # 1000 chars
733
335
  ```
734
336
 
735
- !!! tip
736
- For very long content (>1000 tokens), consider splitting into multiple nodes or summarizing.
337
+ For very long content (>1000 tokens), consider splitting into multiple memories.
737
338
 
738
339
  ## Next Steps
739
340
 
740
341
  Now that you know how to add memories effectively, learn about:
741
342
 
742
- - [**Recalling Memories**](recalling-memories.md) - Search and retrieve memories
743
343
  - [**Search Strategies**](search-strategies.md) - Optimize retrieval with different strategies
744
- - [**Context Assembly**](context-assembly.md) - Use memories with your LLM
344
+ - [**Recalling Memories**](recalling-memories.md) - Search and retrieve memories
745
345
 
746
346
  ## Complete Example
747
347
 
@@ -750,75 +350,41 @@ require 'htm'
750
350
 
751
351
  htm = HTM.new(robot_name: "Memory Demo")
752
352
 
753
- # Add a fact with rich metadata
754
- htm.add_node(
755
- "user_profile",
756
- "Alice Thompson is a senior software engineer specializing in distributed systems",
757
- type: :fact,
758
- category: "user",
759
- importance: 9.0,
760
- tags: ["user", "profile", "engineering"]
353
+ # Add a fact
354
+ htm.remember(
355
+ "Alice Thompson is a senior software engineer specializing in distributed systems"
761
356
  )
762
357
 
763
- # Add a related preference
764
- htm.add_node(
765
- "user_pref_tools",
766
- "Alice prefers Vim for editing and tmux for terminal management",
767
- type: :preference,
768
- importance: 7.0,
769
- tags: ["user", "tools", "preferences"],
770
- related_to: ["user_profile"]
358
+ # Add a preference
359
+ htm.remember(
360
+ "Alice prefers Vim for editing and tmux for terminal management"
771
361
  )
772
362
 
773
363
  # Add a decision with context
774
- htm.add_node(
775
- "decision_messaging",
776
- <<~DECISION,
777
- Decision: Use RabbitMQ for async job processing
778
-
779
- Rationale:
780
- - Need reliable message delivery
781
- - Support for multiple consumer patterns
782
- - Excellent Ruby client library
783
-
784
- Alternatives:
785
- - Redis (simpler but less reliable)
786
- - Kafka (overkill for our scale)
787
- DECISION
788
- type: :decision,
789
- category: "architecture",
790
- importance: 8.5,
791
- tags: ["architecture", "messaging", "rabbitmq", "async"]
792
- )
364
+ htm.remember(<<~DECISION, tags: ["architecture", "messaging"])
365
+ Decision: Use RabbitMQ for async job processing
366
+
367
+ Rationale:
368
+ - Need reliable message delivery
369
+ - Support for multiple consumer patterns
370
+ - Excellent Ruby client library
371
+
372
+ Alternatives:
373
+ - Redis (simpler but less reliable)
374
+ - Kafka (overkill for our scale)
375
+ DECISION
793
376
 
794
377
  # Add implementation code
795
- htm.add_node(
796
- "code_rabbitmq_setup",
797
- <<~RUBY,
798
- require 'bunny'
799
-
800
- connection = Bunny.new(ENV['RABBITMQ_URL'])
801
- connection.start
802
-
803
- channel = connection.create_channel
804
- queue = channel.queue('jobs', durable: true)
805
- RUBY
806
- type: :code,
807
- importance: 6.0,
808
- tags: ["ruby", "rabbitmq", "setup", "code"],
809
- related_to: ["decision_messaging"]
810
- )
378
+ htm.remember(<<~RUBY, tags: ["code:ruby", "messaging:rabbitmq"])
379
+ require 'bunny'
811
380
 
812
- # Add an open question
813
- htm.add_node(
814
- "question_scaling",
815
- "Should we implement message partitioning for better scaling?",
816
- type: :question,
817
- importance: 7.0,
818
- tags: ["rabbitmq", "scaling", "performance", "open"],
819
- related_to: ["decision_messaging"]
820
- )
381
+ connection = Bunny.new(ENV['RABBITMQ_URL'])
382
+ connection.start
383
+
384
+ channel = connection.create_channel
385
+ queue = channel.queue('jobs', durable: true)
386
+ RUBY
821
387
 
822
- puts "Added 5 memories with relationships and rich metadata"
823
- puts "Stats: #{htm.memory_stats[:total_nodes]} total nodes"
388
+ puts "Added memories with relationships and rich metadata"
389
+ puts "Stats: #{HTM::Models::Node.count} total nodes"
824
390
  ```