htm 0.0.2 → 0.0.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (127) hide show
  1. checksums.yaml +4 -4
  2. data/.aigcm_msg +1 -0
  3. data/.architecture/reviews/comprehensive-codebase-review.md +577 -0
  4. data/.claude/settings.local.json +92 -0
  5. data/.irbrc +283 -80
  6. data/.tbls.yml +2 -1
  7. data/CHANGELOG.md +294 -26
  8. data/CLAUDE.md +603 -0
  9. data/README.md +76 -5
  10. data/Rakefile +5 -0
  11. data/db/migrate/{20250101000001_enable_extensions.rb → 00001_enable_extensions.rb} +0 -1
  12. data/db/migrate/00002_create_robots.rb +11 -0
  13. data/db/migrate/00003_create_file_sources.rb +20 -0
  14. data/db/migrate/00004_create_nodes.rb +65 -0
  15. data/db/migrate/00005_create_tags.rb +13 -0
  16. data/db/migrate/00006_create_node_tags.rb +18 -0
  17. data/db/migrate/00007_create_robot_nodes.rb +26 -0
  18. data/db/migrate/00009_add_working_memory_to_robot_nodes.rb +12 -0
  19. data/db/schema.sql +172 -1
  20. data/docs/api/database.md +1 -2
  21. data/docs/api/htm.md +197 -2
  22. data/docs/api/yard/HTM/ActiveRecordConfig.md +23 -0
  23. data/docs/api/yard/HTM/AuthorizationError.md +11 -0
  24. data/docs/api/yard/HTM/CircuitBreaker.md +92 -0
  25. data/docs/api/yard/HTM/CircuitBreakerOpenError.md +34 -0
  26. data/docs/api/yard/HTM/Configuration.md +175 -0
  27. data/docs/api/yard/HTM/Database.md +99 -0
  28. data/docs/api/yard/HTM/DatabaseError.md +14 -0
  29. data/docs/api/yard/HTM/EmbeddingError.md +18 -0
  30. data/docs/api/yard/HTM/EmbeddingService.md +58 -0
  31. data/docs/api/yard/HTM/Error.md +11 -0
  32. data/docs/api/yard/HTM/JobAdapter.md +39 -0
  33. data/docs/api/yard/HTM/LongTermMemory.md +342 -0
  34. data/docs/api/yard/HTM/NotFoundError.md +17 -0
  35. data/docs/api/yard/HTM/Observability.md +107 -0
  36. data/docs/api/yard/HTM/QueryTimeoutError.md +19 -0
  37. data/docs/api/yard/HTM/Railtie.md +27 -0
  38. data/docs/api/yard/HTM/ResourceExhaustedError.md +13 -0
  39. data/docs/api/yard/HTM/TagError.md +18 -0
  40. data/docs/api/yard/HTM/TagService.md +67 -0
  41. data/docs/api/yard/HTM/Timeframe/Result.md +24 -0
  42. data/docs/api/yard/HTM/Timeframe.md +40 -0
  43. data/docs/api/yard/HTM/TimeframeExtractor/Result.md +24 -0
  44. data/docs/api/yard/HTM/TimeframeExtractor.md +45 -0
  45. data/docs/api/yard/HTM/ValidationError.md +20 -0
  46. data/docs/api/yard/HTM/WorkingMemory.md +131 -0
  47. data/docs/api/yard/HTM.md +80 -0
  48. data/docs/api/yard/index.csv +179 -0
  49. data/docs/api/yard-reference.md +51 -0
  50. data/docs/database/README.md +128 -128
  51. data/docs/database/public.file_sources.md +42 -0
  52. data/docs/database/public.file_sources.svg +211 -0
  53. data/docs/database/public.node_tags.md +4 -4
  54. data/docs/database/public.node_tags.svg +212 -79
  55. data/docs/database/public.nodes.md +22 -12
  56. data/docs/database/public.nodes.svg +246 -127
  57. data/docs/database/public.robot_nodes.md +11 -9
  58. data/docs/database/public.robot_nodes.svg +220 -98
  59. data/docs/database/public.robots.md +2 -2
  60. data/docs/database/public.robots.svg +136 -81
  61. data/docs/database/public.tags.md +3 -3
  62. data/docs/database/public.tags.svg +118 -39
  63. data/docs/database/schema.json +850 -771
  64. data/docs/database/schema.svg +256 -197
  65. data/docs/development/schema.md +67 -2
  66. data/docs/guides/adding-memories.md +93 -7
  67. data/docs/guides/recalling-memories.md +36 -1
  68. data/examples/README.md +280 -0
  69. data/examples/cli_app/htm_cli.rb +65 -5
  70. data/examples/cli_app/temp.log +93 -0
  71. data/examples/file_loader_usage.rb +177 -0
  72. data/examples/robot_groups/lib/robot_group.rb +419 -0
  73. data/examples/robot_groups/lib/working_memory_channel.rb +140 -0
  74. data/examples/robot_groups/multi_process.rb +286 -0
  75. data/examples/robot_groups/robot_worker.rb +136 -0
  76. data/examples/robot_groups/same_process.rb +229 -0
  77. data/examples/timeframe_demo.rb +276 -0
  78. data/lib/htm/active_record_config.rb +1 -1
  79. data/lib/htm/circuit_breaker.rb +202 -0
  80. data/lib/htm/configuration.rb +59 -13
  81. data/lib/htm/database.rb +67 -36
  82. data/lib/htm/embedding_service.rb +39 -2
  83. data/lib/htm/errors.rb +131 -11
  84. data/lib/htm/jobs/generate_embedding_job.rb +5 -4
  85. data/lib/htm/jobs/generate_tags_job.rb +4 -0
  86. data/lib/htm/loaders/markdown_loader.rb +263 -0
  87. data/lib/htm/loaders/paragraph_chunker.rb +112 -0
  88. data/lib/htm/long_term_memory.rb +460 -343
  89. data/lib/htm/models/file_source.rb +99 -0
  90. data/lib/htm/models/node.rb +80 -5
  91. data/lib/htm/models/robot.rb +24 -1
  92. data/lib/htm/models/robot_node.rb +1 -0
  93. data/lib/htm/models/tag.rb +254 -4
  94. data/lib/htm/observability.rb +395 -0
  95. data/lib/htm/tag_service.rb +60 -3
  96. data/lib/htm/tasks.rb +26 -1
  97. data/lib/htm/timeframe.rb +194 -0
  98. data/lib/htm/timeframe_extractor.rb +307 -0
  99. data/lib/htm/version.rb +1 -1
  100. data/lib/htm/working_memory.rb +165 -70
  101. data/lib/htm.rb +328 -130
  102. data/lib/tasks/doc.rake +300 -0
  103. data/lib/tasks/files.rake +299 -0
  104. data/lib/tasks/htm.rake +158 -3
  105. data/lib/tasks/jobs.rake +3 -9
  106. data/lib/tasks/tags.rake +166 -6
  107. data/mkdocs.yml +36 -1
  108. data/notes/ARCHITECTURE_REVIEW.md +1167 -0
  109. data/notes/IMPLEMENTATION_SUMMARY.md +606 -0
  110. data/notes/MULTI_FRAMEWORK_IMPLEMENTATION.md +451 -0
  111. data/notes/next_steps.md +100 -0
  112. data/notes/plan.md +627 -0
  113. data/notes/tag_ontology_enhancement_ideas.md +222 -0
  114. data/notes/timescaledb_removal_summary.md +200 -0
  115. metadata +125 -15
  116. data/db/migrate/20250101000002_create_robots.rb +0 -14
  117. data/db/migrate/20250101000003_create_nodes.rb +0 -42
  118. data/db/migrate/20250101000005_create_tags.rb +0 -38
  119. data/db/migrate/20250101000007_add_node_vector_indexes.rb +0 -30
  120. data/db/migrate/20250125000001_add_content_hash_to_nodes.rb +0 -14
  121. data/db/migrate/20250125000002_create_robot_nodes.rb +0 -35
  122. data/db/migrate/20250125000003_remove_source_and_robot_id_from_nodes.rb +0 -28
  123. data/db/migrate/20250126000001_create_working_memories.rb +0 -19
  124. data/db/migrate/20250126000002_remove_unused_columns.rb +0 -12
  125. data/docs/database/public.working_memories.md +0 -40
  126. data/docs/database/public.working_memories.svg +0 -112
  127. data/lib/htm/models/working_memory_entry.rb +0 -88
@@ -37,13 +37,13 @@ For detailed table definitions, columns, indexes, and constraints, see the auto-
37
37
  | [robots](../database/public.robots.md) | Registry of all LLM robots using the HTM system | Stores robot metadata and activity tracking |
38
38
  | [nodes](../database/public.nodes.md) | Core memory storage for conversation messages and context | Vector embeddings, full-text search, deduplication |
39
39
  | [tags](../database/public.tags.md) | Unique hierarchical tag names for categorization | Colon-separated namespaces (e.g., `ai:llm:embeddings`) |
40
- | [working_memories](../database/public.working_memories.md) | Per-robot working memory state | Optional persistence for token-limited context |
40
+ | file_sources | Source file metadata for loaded documents | Path, mtime, frontmatter, sync tracking |
41
41
 
42
42
  ### Join Tables
43
43
 
44
44
  | Table | Description | Details |
45
45
  |-------|-------------|---------|
46
- | [robot_nodes](../database/public.robot_nodes.md) | Links robots to nodes (many-to-many) | Enables "hive mind" shared memory architecture |
46
+ | [robot_nodes](../database/public.robot_nodes.md) | Links robots to nodes (many-to-many) | Enables "hive mind" shared memory; includes `working_memory` boolean for per-robot working memory state |
47
47
  | [node_tags](../database/public.node_tags.md) | Links nodes to tags (many-to-many) | Flexible multi-tag categorization |
48
48
 
49
49
  ### System Tables
@@ -65,6 +65,40 @@ Content deduplication is enforced via SHA-256 hashing in the `nodes` table:
65
65
  3. A new `robot_nodes` association is created (or updated if it already exists)
66
66
  4. This ensures identical memories are stored once but can be "remembered" by multiple robots
67
67
 
68
+ ### JSONB Metadata
69
+
70
+ The `nodes` table includes a `metadata` JSONB column for flexible key-value storage:
71
+
72
+ | Column | Type | Default | Description |
73
+ |--------|------|---------|-------------|
74
+ | `metadata` | jsonb | `{}` | Arbitrary key-value data |
75
+
76
+ **Features:**
77
+ - Stores any valid JSON data (strings, numbers, booleans, arrays, objects)
78
+ - GIN index (`idx_nodes_metadata`) for efficient containment queries
79
+ - Queried using PostgreSQL's `@>` containment operator
80
+
81
+ **Query examples:**
82
+ ```sql
83
+ -- Find nodes with specific metadata
84
+ SELECT * FROM nodes WHERE metadata @> '{"priority": "high"}'::jsonb;
85
+
86
+ -- Find nodes with nested metadata
87
+ SELECT * FROM nodes WHERE metadata @> '{"user": {"role": "admin"}}'::jsonb;
88
+
89
+ -- Find nodes with multiple conditions
90
+ SELECT * FROM nodes WHERE metadata @> '{"environment": "production", "version": 2}'::jsonb;
91
+ ```
92
+
93
+ **Ruby usage:**
94
+ ```ruby
95
+ # Store with metadata
96
+ htm.remember("API config", metadata: { environment: "production", version: 2 })
97
+
98
+ # Recall filtering by metadata
99
+ htm.recall("config", metadata: { environment: "production" })
100
+ ```
101
+
68
102
  ### Hierarchical Tags
69
103
 
70
104
  Tags use colon-separated hierarchies for organization:
@@ -78,6 +112,35 @@ SELECT * FROM tags WHERE name LIKE 'database:%'; -- All database-related tags
78
112
  SELECT * FROM tags WHERE name LIKE 'ai:llm:%'; -- All LLM-related tags
79
113
  ```
80
114
 
115
+ ### File Source Tracking
116
+
117
+ The `file_sources` table tracks loaded documents for re-sync support:
118
+
119
+ | Column | Type | Description |
120
+ |--------|------|-------------|
121
+ | `id` | bigint | Primary key |
122
+ | `file_path` | text | Absolute path to the source file |
123
+ | `file_hash` | varchar(64) | SHA-256 hash of file contents |
124
+ | `mtime` | timestamptz | File modification time for change detection |
125
+ | `file_size` | integer | File size in bytes |
126
+ | `frontmatter` | jsonb | Parsed YAML frontmatter metadata |
127
+ | `last_synced_at` | timestamptz | When file was last synced |
128
+ | `created_at` | timestamptz | When source was first loaded |
129
+ | `updated_at` | timestamptz | When source was last updated |
130
+
131
+ Nodes loaded from files have:
132
+ - `source_id` - Foreign key to file_sources (nullable, ON DELETE SET NULL)
133
+ - `chunk_position` - Integer position within the file (0-indexed)
134
+
135
+ Query nodes from a file:
136
+ ```sql
137
+ SELECT n.*
138
+ FROM nodes n
139
+ JOIN file_sources fs ON n.source_id = fs.id
140
+ WHERE fs.file_path = '/path/to/file.md'
141
+ ORDER BY n.chunk_position;
142
+ ```
143
+
81
144
  ### Remember Tracking
82
145
 
83
146
  The `robot_nodes` table tracks per-robot remember metadata:
@@ -278,6 +341,8 @@ The schema is managed through ActiveRecord migrations located in `db/migrate/`:
278
341
  1. `20250101000001_create_robots.rb` - Creates robots table
279
342
  2. `20250101000002_create_nodes.rb` - Creates nodes table with all indexes
280
343
  3. `20250101000005_create_tags.rb` - Creates tags and nodes_tags tables
344
+ 4. `20251128000002_create_file_sources.rb` - Creates file_sources table for document tracking
345
+ 5. `20251128000003_add_source_to_nodes.rb` - Adds source_id and chunk_position to nodes
281
346
 
282
347
  To apply migrations:
283
348
  ```bash
@@ -7,7 +7,7 @@ This guide covers everything you need to know about storing information in HTM e
7
7
  The primary method for adding memories is `remember`:
8
8
 
9
9
  ```ruby
10
- node_id = htm.remember(content, tags: [])
10
+ node_id = htm.remember(content, tags: [], metadata: {})
11
11
  ```
12
12
 
13
13
  **Parameters:**
@@ -16,6 +16,7 @@ node_id = htm.remember(content, tags: [])
16
16
  |-----------|------|---------|-------------|
17
17
  | `content` | String | *required* | The information to remember |
18
18
  | `tags` | Array\<String\> | `[]` | Manual tags to assign (in addition to auto-extracted tags) |
19
+ | `metadata` | Hash | `{}` | Arbitrary key-value metadata stored as JSONB |
19
20
 
20
21
  The method returns the database ID of the created node.
21
22
 
@@ -161,6 +162,86 @@ htm.remember("We're using Redis for session caching with a 24-hour TTL")
161
162
  # Background job might extract: ["database:redis", "caching:session", "performance"]
162
163
  ```
163
164
 
165
+ ## Using Metadata
166
+
167
+ Metadata provides flexible key-value storage for arbitrary data that doesn't fit into tags. Unlike tags (which are for hierarchical categorization), metadata is for structured data like version numbers, priorities, source systems, or any custom attributes.
168
+
169
+ ### Basic Metadata Usage
170
+
171
+ ```ruby
172
+ # Store with metadata
173
+ htm.remember(
174
+ "User prefers dark mode",
175
+ metadata: { category: "preference", priority: "high" }
176
+ )
177
+
178
+ # Multiple metadata fields
179
+ htm.remember(
180
+ "API endpoint changed from /v1 to /v2",
181
+ metadata: {
182
+ category: "migration",
183
+ version: 2,
184
+ breaking_change: true,
185
+ affected_services: ["web", "mobile"]
186
+ }
187
+ )
188
+ ```
189
+
190
+ ### Metadata vs Tags
191
+
192
+ | Feature | Tags | Metadata |
193
+ |---------|------|----------|
194
+ | Structure | Hierarchical (colon-separated) | Flat key-value pairs |
195
+ | Type | String only | Any JSON type (string, number, boolean, array, object) |
196
+ | Search | Prefix matching (`LIKE 'ai:%'`) | JSONB containment (`@>`) |
197
+ | Purpose | Categorization & navigation | Arbitrary attributes & filtering |
198
+ | Auto-extraction | Yes (via LLM) | No (always explicit) |
199
+
200
+ ### Common Metadata Patterns
201
+
202
+ ```ruby
203
+ # Version tracking
204
+ htm.remember("API uses OAuth 2.0", metadata: { version: 3, deprecated: false })
205
+
206
+ # Source tracking
207
+ htm.remember("Error rate is 0.1%", metadata: { source: "monitoring", dashboard: "errors" })
208
+
209
+ # Priority/importance
210
+ htm.remember("Deploy to prod on Fridays is forbidden", metadata: { priority: "critical" })
211
+
212
+ # Environment-specific
213
+ htm.remember("Database connection limit is 100", metadata: { environment: "production" })
214
+
215
+ # Combining with tags
216
+ htm.remember(
217
+ "Use connection pooling for better performance",
218
+ tags: ["database:postgresql", "performance"],
219
+ metadata: { priority: "high", reviewed: true, author: "dba-team" }
220
+ )
221
+ ```
222
+
223
+ ### Querying by Metadata
224
+
225
+ Use the `metadata` parameter in `recall()` to filter by metadata:
226
+
227
+ ```ruby
228
+ # Find all high-priority items
229
+ htm.recall("settings", metadata: { priority: "high" })
230
+
231
+ # Find production-specific configurations
232
+ htm.recall("database", metadata: { environment: "production" })
233
+
234
+ # Combine with other filters
235
+ htm.recall(
236
+ "API changes",
237
+ timeframe: "last month",
238
+ metadata: { breaking_change: true },
239
+ strategy: :hybrid
240
+ )
241
+ ```
242
+
243
+ Metadata filtering uses PostgreSQL's JSONB containment operator (`@>`), which means the node's metadata must contain all the key-value pairs you specify.
244
+
164
245
  ## Content Deduplication
165
246
 
166
247
  HTM automatically deduplicates content across all robots using SHA-256 hashing.
@@ -355,13 +436,14 @@ htm.remember(
355
436
  "Alice Thompson is a senior software engineer specializing in distributed systems"
356
437
  )
357
438
 
358
- # Add a preference
439
+ # Add a preference with metadata
359
440
  htm.remember(
360
- "Alice prefers Vim for editing and tmux for terminal management"
441
+ "Alice prefers Vim for editing and tmux for terminal management",
442
+ metadata: { category: "preference", source: "user-interview" }
361
443
  )
362
444
 
363
- # Add a decision with context
364
- htm.remember(<<~DECISION, tags: ["architecture", "messaging"])
445
+ # Add a decision with context, tags, and metadata
446
+ htm.remember(<<~DECISION, tags: ["architecture", "messaging"], metadata: { priority: "high", approved: true, version: 1 })
365
447
  Decision: Use RabbitMQ for async job processing
366
448
 
367
449
  Rationale:
@@ -374,8 +456,8 @@ htm.remember(<<~DECISION, tags: ["architecture", "messaging"])
374
456
  - Kafka (overkill for our scale)
375
457
  DECISION
376
458
 
377
- # Add implementation code
378
- htm.remember(<<~RUBY, tags: ["code:ruby", "messaging:rabbitmq"])
459
+ # Add implementation code with metadata
460
+ htm.remember(<<~RUBY, tags: ["code:ruby", "messaging:rabbitmq"], metadata: { language: "ruby", tested: true })
379
461
  require 'bunny'
380
462
 
381
463
  connection = Bunny.new(ENV['RABBITMQ_URL'])
@@ -387,4 +469,8 @@ RUBY
387
469
 
388
470
  puts "Added memories with relationships and rich metadata"
389
471
  puts "Stats: #{HTM::Models::Node.count} total nodes"
472
+
473
+ # Query by metadata
474
+ high_priority = htm.recall("decisions", metadata: { priority: "high" })
475
+ puts "High priority decisions: #{high_priority.count}"
390
476
  ```
@@ -390,9 +390,43 @@ results = htm.recall(topic: "JWT authentication", strategy: :fulltext)
390
390
  results = htm.recall(topic: "user validation methods", strategy: :vector)
391
391
  ```
392
392
 
393
+ ## Filtering by Metadata
394
+
395
+ HTM supports metadata filtering directly in the `recall()` method. This is more efficient than post-filtering because the database does the work.
396
+
397
+ ```ruby
398
+ # Filter by single metadata field
399
+ memories = htm.recall(
400
+ topic: "user settings",
401
+ metadata: { category: "preference" }
402
+ )
403
+ # => Returns only nodes with metadata containing { category: "preference" }
404
+
405
+ # Filter by multiple metadata fields
406
+ memories = htm.recall(
407
+ topic: "API configuration",
408
+ metadata: { environment: "production", version: 2 }
409
+ )
410
+ # => Returns nodes with BOTH environment: "production" AND version: 2
411
+
412
+ # Combine with other filters
413
+ memories = htm.recall(
414
+ topic: "database changes",
415
+ timeframe: "last month",
416
+ strategy: :hybrid,
417
+ metadata: { breaking_change: true },
418
+ limit: 10
419
+ )
420
+ ```
421
+
422
+ Metadata filtering uses PostgreSQL's JSONB containment operator (`@>`), which means:
423
+ - The node's metadata must contain ALL the key-value pairs you specify
424
+ - The node's metadata can have additional fields (they're ignored)
425
+ - Nested objects work: `metadata: { user: { role: "admin" } }` matches `{ user: { role: "admin", name: "..." } }`
426
+
393
427
  ## Combining Search with Filters
394
428
 
395
- While `recall` handles timeframes and topics, you can filter results further:
429
+ While `recall` handles timeframes, topics, and metadata, you can filter results further:
396
430
 
397
431
  ```ruby
398
432
  # Recall memories
@@ -622,6 +656,7 @@ memory = {
622
656
  'created_at' => "2024-01-15 10:30:00", # Timestamp
623
657
  'robot_id' => "uuid...", # Which robot added it
624
658
  'token_count' => 150, # Token count
659
+ 'metadata' => { 'priority' => 'high', 'version' => 2 }, # JSONB metadata
625
660
  'similarity' => 0.85 # Similarity score (vector/hybrid)
626
661
  # or 'rank' for fulltext
627
662
  }
@@ -0,0 +1,280 @@
1
+ # HTM Examples
2
+
3
+ This directory contains example applications demonstrating various ways to use the HTM (Hierarchical Temporary Memory) gem.
4
+
5
+ ## Prerequisites
6
+
7
+ All examples require:
8
+
9
+ 1. **PostgreSQL Database** with pgvector extension:
10
+ ```bash
11
+ export HTM_DBURL="postgresql://user@localhost:5432/htm_development"
12
+ ```
13
+
14
+ 2. **Ollama** (recommended for local LLM):
15
+ ```bash
16
+ ollama pull nomic-embed-text # For embeddings
17
+ ollama pull gemma3 # For tag extraction
18
+ ```
19
+
20
+ 3. **Ruby Dependencies**:
21
+ ```bash
22
+ bundle install
23
+ ```
24
+
25
+ ---
26
+
27
+ ## Standalone Scripts
28
+
29
+ ### basic_usage.rb
30
+
31
+ **Getting started with HTM fundamentals.**
32
+
33
+ Demonstrates the core API: configuring HTM, registering a robot, and using the three primary methods (`remember`, `recall`, `forget`).
34
+
35
+ ```bash
36
+ ruby examples/basic_usage.rb
37
+ ```
38
+
39
+ **Features:**
40
+ - HTM configuration with Ollama provider
41
+ - Robot initialization
42
+ - Storing memories with `remember()`
43
+ - Retrieving memories with `recall()` using timeframes
44
+ - Understanding async embedding/tag generation
45
+
46
+ ---
47
+
48
+ ### custom_llm_configuration.rb
49
+
50
+ **Flexible LLM integration patterns.**
51
+
52
+ Shows how to configure HTM with custom embedding and tag generation methods, supporting multiple LLM providers or custom infrastructure.
53
+
54
+ ```bash
55
+ ruby examples/custom_llm_configuration.rb
56
+ ```
57
+
58
+ **Features:**
59
+ - Default configuration (RubyLLM + Ollama)
60
+ - Custom lambdas for embedding generation
61
+ - Custom lambdas for tag extraction
62
+ - Service object integration pattern
63
+ - Mixed configuration (custom embedding + default tags)
64
+ - Provider settings (OpenAI, Anthropic, Gemini, etc.)
65
+
66
+ ---
67
+
68
+ ### file_loader_usage.rb
69
+
70
+ **Loading documents into long-term memory.**
71
+
72
+ Demonstrates loading markdown files with automatic paragraph chunking, YAML frontmatter extraction, and source tracking for re-sync.
73
+
74
+ ```bash
75
+ ruby examples/file_loader_usage.rb
76
+ ```
77
+
78
+ **Features:**
79
+ - Single file loading with `load_file()`
80
+ - Directory loading with glob patterns via `load_directory()`
81
+ - YAML frontmatter extraction (title, author, tags)
82
+ - Querying nodes from a specific file
83
+ - Re-sync behavior (skip unchanged files)
84
+ - Force reload option
85
+ - Unloading files with `unload_file()`
86
+
87
+ ---
88
+
89
+ ### timeframe_demo.rb
90
+
91
+ **Flexible time-based filtering for recall.**
92
+
93
+ Comprehensive demonstration of all timeframe options supported by `recall()`, including natural language parsing.
94
+
95
+ ```bash
96
+ ruby examples/timeframe_demo.rb
97
+ ```
98
+
99
+ **Features:**
100
+ - No filter (`nil`)
101
+ - Date/DateTime/Time objects (entire day)
102
+ - Range for precise time windows
103
+ - Natural language strings ("yesterday", "last week", "few days ago")
104
+ - Weekend expressions ("last weekend", "2 weekends ago")
105
+ - Automatic extraction (`:auto`) from query text
106
+ - Multiple time windows (array of ranges)
107
+
108
+ ---
109
+
110
+ ## Application Examples
111
+
112
+ ### example_app/
113
+
114
+ **Full-featured HTM demonstration with RubyLLM integration.**
115
+
116
+ A standalone application showing complete HTM workflow with database connection, Ollama integration, memory operations, and multiple search strategies.
117
+
118
+ ```bash
119
+ ruby examples/example_app/app.rb
120
+ ```
121
+
122
+ **Features:**
123
+ - Database connection verification
124
+ - RubyLLM configuration for embeddings and tags
125
+ - Async embedding/tag generation with wait
126
+ - Comparison of search strategies (:fulltext, :vector, :hybrid)
127
+ - Detailed output of generated tags and embeddings
128
+
129
+ ---
130
+
131
+ ### sinatra_app/
132
+
133
+ **Web application with Sidekiq background processing.**
134
+
135
+ A Sinatra-based web application demonstrating HTM in a multi-user web context with async job processing.
136
+
137
+ ```bash
138
+ cd examples/sinatra_app
139
+ bundle install
140
+ bundle exec ruby app.rb
141
+ ```
142
+
143
+ **Features:**
144
+ - Sidekiq integration for background jobs
145
+ - Session-based robot identification
146
+ - RESTful API endpoints:
147
+ - `POST /api/remember` - Store information
148
+ - `GET /api/recall` - Search memories with timeframe filtering
149
+ - `GET /api/stats` - Memory statistics
150
+ - `GET /api/tags` - Tag tree structure
151
+ - `GET /api/health` - Health check
152
+ - Interactive HTML UI with hybrid search scoring display
153
+ - Tag tree visualization
154
+
155
+ **Environment Variables:**
156
+ - `HTM_DBURL` - PostgreSQL connection (required)
157
+ - `REDIS_URL` - Redis for Sidekiq (default: redis://localhost:6379/0)
158
+ - `SESSION_SECRET` - Session encryption key
159
+
160
+ ---
161
+
162
+ ### cli_app/
163
+
164
+ **Interactive command-line application.**
165
+
166
+ A REPL-style CLI demonstrating synchronous job execution with the `:inline` backend, ideal for CLI tools and scripts.
167
+
168
+ ```bash
169
+ ruby examples/cli_app/htm_cli.rb
170
+ ```
171
+
172
+ **Commands:**
173
+ - `remember <text>` - Store information (waits for embedding/tags)
174
+ - `recall <topic>` - Hybrid search with LLM-powered response generation
175
+ - `tags [prefix]` - List all tags with linked node content
176
+ - `stats` - Memory statistics
177
+ - `help` - Show help
178
+ - `exit` - Quit
179
+
180
+ **Features:**
181
+ - Synchronous job execution (`:inline` backend)
182
+ - Real-time progress feedback
183
+ - Tag extraction visibility during search
184
+ - Hybrid search with scoring (similarity, tag_boost, combined)
185
+ - RubyLLM chat integration for context-aware responses
186
+ - Response storage in long-term memory
187
+
188
+ See [cli_app/README.md](cli_app/README.md) for detailed documentation.
189
+
190
+ ---
191
+
192
+ ### robot_groups/
193
+
194
+ **Multi-robot coordination with shared working memory.**
195
+
196
+ Demonstrates high-availability patterns with shared working memory, failover, and real-time synchronization via PostgreSQL LISTEN/NOTIFY.
197
+
198
+ #### same_process.rb
199
+
200
+ Single-process demonstration of robot groups:
201
+
202
+ ```bash
203
+ ruby examples/robot_groups/same_process.rb
204
+ ```
205
+
206
+ **Scenarios demonstrated:**
207
+ 1. Creating a group with primary + standby robots
208
+ 2. Adding shared memories
209
+ 3. Verifying synchronization
210
+ 4. Simulating failover (primary dies, standby takes over)
211
+ 5. Verifying standby has full context
212
+ 6. Dynamic scaling (adding new robots)
213
+ 7. Collaborative memory (multiple robots adding)
214
+ 8. Real-time sync via PostgreSQL LISTEN/NOTIFY
215
+
216
+ #### multi_process.rb
217
+
218
+ Cross-process demonstration with separate Ruby processes:
219
+
220
+ ```bash
221
+ ruby examples/robot_groups/multi_process.rb
222
+ ```
223
+
224
+ **Scenarios demonstrated:**
225
+ 1. Spawning robot worker processes
226
+ 2. Cross-process memory sharing
227
+ 3. Collaborative memory updates
228
+ 4. Failover when a process dies
229
+ 5. Dynamic scaling (adding new processes)
230
+
231
+ **Key concepts:**
232
+ - **Shared Working Memory**: Multiple robots share context via `robot_nodes` table
233
+ - **Active/Passive Roles**: Active robots participate; passive robots maintain warm standby
234
+ - **Failover**: Instant takeover with full context already loaded
235
+ - **Real-time Sync**: PostgreSQL LISTEN/NOTIFY for in-memory cache coordination
236
+
237
+ ---
238
+
239
+ ## Directory Structure
240
+
241
+ ```
242
+ examples/
243
+ ├── README.md # This file
244
+ ├── basic_usage.rb # Core API demonstration
245
+ ├── custom_llm_configuration.rb # LLM integration patterns
246
+ ├── file_loader_usage.rb # Document loading
247
+ ├── timeframe_demo.rb # Time-based filtering
248
+ ├── example_app/
249
+ │ ├── app.rb # Full-featured demo app
250
+ │ └── Rakefile
251
+ ├── sinatra_app/
252
+ │ ├── app.rb # Sinatra web application
253
+ │ ├── Gemfile
254
+ │ └── Gemfile.lock
255
+ ├── cli_app/
256
+ │ ├── htm_cli.rb # Interactive CLI
257
+ │ └── README.md # Detailed CLI documentation
258
+ └── robot_groups/
259
+ ├── same_process.rb # Single-process robot groups
260
+ ├── multi_process.rb # Multi-process coordination
261
+ ├── robot_worker.rb # Worker process for multi_process.rb
262
+ └── lib/
263
+ ├── robot_group.rb # RobotGroup coordination class
264
+ └── working_memory_channel.rb # PostgreSQL pub/sub
265
+ ```
266
+
267
+ ---
268
+
269
+ ## Choosing the Right Example
270
+
271
+ | Use Case | Example |
272
+ |----------|---------|
273
+ | Learning HTM basics | `basic_usage.rb` |
274
+ | Custom LLM integration | `custom_llm_configuration.rb` |
275
+ | Loading documents/files | `file_loader_usage.rb` |
276
+ | Time-based queries | `timeframe_demo.rb` |
277
+ | Web application | `sinatra_app/` |
278
+ | CLI tool | `cli_app/` |
279
+ | Multi-robot coordination | `robot_groups/` |
280
+ | High availability | `robot_groups/` |
@@ -19,6 +19,20 @@
19
19
 
20
20
  require_relative '../../lib/htm'
21
21
  require 'io/console'
22
+ require 'ruby_llm'
23
+
24
+ PROVIDER = :ollama
25
+ MODEL = 'gpt-oss:latest'
26
+
27
+ # Configure RubyLLM for Ollama provider (same pattern as HTM uses)
28
+ RubyLLM.configure do |config|
29
+ ollama_url = ENV.fetch('OLLAMA_URL', 'http://localhost:11434')
30
+ ollama_api_base = ollama_url.end_with?('/v1') ? ollama_url : "#{ollama_url}/v1"
31
+ config.ollama_api_base = ollama_api_base
32
+ end
33
+
34
+ # Create chat with Ollama model - use assume_model_exists to bypass registry check
35
+ # AI = RubyLLM.chat(model: MODEL, provider: PROVIDER, assume_model_exists: true)
22
36
 
23
37
  class HTMCli
24
38
  def initialize
@@ -43,6 +57,9 @@ class HTMCli
43
57
  end
44
58
  end
45
59
 
60
+ # Initialize RubyLLM chat for context-aware responses
61
+ @chat = RubyLLM.chat(model: MODEL, provider: PROVIDER)
62
+
46
63
  # Initialize HTM instance
47
64
  @htm = HTM.new(robot_name: "cli_assistant")
48
65
  end
@@ -146,12 +163,27 @@ class HTMCli
146
163
  puts "\nSearching for: \"#{topic}\""
147
164
  puts "Strategy: hybrid (vector + fulltext + tags)"
148
165
 
149
- # Show which tags match the query
150
- matching_tags = @htm.long_term_memory.find_query_matching_tags(topic)
151
- if matching_tags.any?
152
- puts "Matching tags: #{matching_tags.join(', ')}"
166
+ # Show tags extracted from query and which ones matched
167
+ tag_result = @htm.long_term_memory.find_query_matching_tags(topic, include_extracted: true)
168
+
169
+ if tag_result[:extracted].any?
170
+ puts "Extracted tags: #{tag_result[:extracted].join(', ')}"
171
+
172
+ # Show what was actually searched (exact + prefixes)
173
+ searched = tag_result[:extracted].dup
174
+ tag_result[:extracted].each do |tag|
175
+ levels = tag.split(':')
176
+ (1...levels.size).each { |i| searched << levels[0, i].join(':') }
177
+ end
178
+ puts "Searched for: #{searched.uniq.join(', ')}"
179
+
180
+ if tag_result[:matched].any?
181
+ puts "Matched in DB: #{tag_result[:matched].join(', ')}"
182
+ else
183
+ puts "Matched in DB: (none)"
184
+ end
153
185
  else
154
- puts "Matching tags: (none found)"
186
+ puts "Extracted tags: (none)"
155
187
  end
156
188
 
157
189
  start_time = Time.now
@@ -192,6 +224,34 @@ class HTMCli
192
224
  puts " Tags: (none)"
193
225
  end
194
226
  end
227
+
228
+ # Build LLM prompt with context from retrieved memories
229
+ context_content = memories.map { |m| m['content'] }.join("\n\n")
230
+
231
+ llm_prompt = <<~PROMPT
232
+ #{topic}
233
+ Your response should highlight information also found in the
234
+ following context:
235
+ <CONTEXT>
236
+ #{context_content}
237
+ </CONTEXT>
238
+ PROMPT
239
+
240
+ puts "\n" + "=" * 60
241
+ puts "Generating response for this prompt..."
242
+ puts llm_prompt
243
+ puts "=" * 60
244
+
245
+ begin
246
+ response = @chat.ask(llm_prompt)
247
+ puts "\n#{response.content}"
248
+
249
+ # Remember the LLM response in long-term memory
250
+ node_id = @htm.remember(response.content)
251
+ puts "\n[✓] Response stored as node #{node_id}"
252
+ rescue StandardError => e
253
+ puts "[✗] LLM Error: #{e.message}"
254
+ end
195
255
  end
196
256
 
197
257
  def handle_tags(filter = nil)