htm 0.0.2 → 0.0.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.aigcm_msg +1 -0
- data/.architecture/reviews/comprehensive-codebase-review.md +577 -0
- data/.claude/settings.local.json +95 -0
- data/.irbrc +283 -80
- data/.tbls.yml +2 -1
- data/CHANGELOG.md +327 -26
- data/CLAUDE.md +603 -0
- data/README.md +83 -12
- data/Rakefile +5 -0
- data/bin/htm_mcp.rb +527 -0
- data/db/migrate/{20250101000001_enable_extensions.rb → 00001_enable_extensions.rb} +0 -1
- data/db/migrate/00002_create_robots.rb +11 -0
- data/db/migrate/00003_create_file_sources.rb +20 -0
- data/db/migrate/00004_create_nodes.rb +65 -0
- data/db/migrate/00005_create_tags.rb +13 -0
- data/db/migrate/00006_create_node_tags.rb +18 -0
- data/db/migrate/00007_create_robot_nodes.rb +26 -0
- data/db/migrate/00009_add_working_memory_to_robot_nodes.rb +12 -0
- data/db/schema.sql +172 -1
- data/docs/api/database.md +1 -2
- data/docs/api/htm.md +197 -2
- data/docs/api/yard/HTM/ActiveRecordConfig.md +23 -0
- data/docs/api/yard/HTM/AuthorizationError.md +11 -0
- data/docs/api/yard/HTM/CircuitBreaker.md +92 -0
- data/docs/api/yard/HTM/CircuitBreakerOpenError.md +34 -0
- data/docs/api/yard/HTM/Configuration.md +175 -0
- data/docs/api/yard/HTM/Database.md +99 -0
- data/docs/api/yard/HTM/DatabaseError.md +14 -0
- data/docs/api/yard/HTM/EmbeddingError.md +18 -0
- data/docs/api/yard/HTM/EmbeddingService.md +58 -0
- data/docs/api/yard/HTM/Error.md +11 -0
- data/docs/api/yard/HTM/JobAdapter.md +39 -0
- data/docs/api/yard/HTM/LongTermMemory.md +342 -0
- data/docs/api/yard/HTM/NotFoundError.md +17 -0
- data/docs/api/yard/HTM/Observability.md +107 -0
- data/docs/api/yard/HTM/QueryTimeoutError.md +19 -0
- data/docs/api/yard/HTM/Railtie.md +27 -0
- data/docs/api/yard/HTM/ResourceExhaustedError.md +13 -0
- data/docs/api/yard/HTM/TagError.md +18 -0
- data/docs/api/yard/HTM/TagService.md +67 -0
- data/docs/api/yard/HTM/Timeframe/Result.md +24 -0
- data/docs/api/yard/HTM/Timeframe.md +40 -0
- data/docs/api/yard/HTM/TimeframeExtractor/Result.md +24 -0
- data/docs/api/yard/HTM/TimeframeExtractor.md +45 -0
- data/docs/api/yard/HTM/ValidationError.md +20 -0
- data/docs/api/yard/HTM/WorkingMemory.md +131 -0
- data/docs/api/yard/HTM.md +80 -0
- data/docs/api/yard/index.csv +179 -0
- data/docs/api/yard-reference.md +51 -0
- data/docs/database/README.md +128 -128
- data/docs/database/public.file_sources.md +42 -0
- data/docs/database/public.file_sources.svg +211 -0
- data/docs/database/public.node_tags.md +4 -4
- data/docs/database/public.node_tags.svg +212 -79
- data/docs/database/public.nodes.md +22 -12
- data/docs/database/public.nodes.svg +246 -127
- data/docs/database/public.robot_nodes.md +11 -9
- data/docs/database/public.robot_nodes.svg +220 -98
- data/docs/database/public.robots.md +2 -2
- data/docs/database/public.robots.svg +136 -81
- data/docs/database/public.tags.md +3 -3
- data/docs/database/public.tags.svg +118 -39
- data/docs/database/schema.json +850 -771
- data/docs/database/schema.svg +256 -197
- data/docs/development/schema.md +67 -2
- data/docs/guides/adding-memories.md +93 -7
- data/docs/guides/recalling-memories.md +36 -1
- data/examples/README.md +405 -0
- data/examples/cli_app/htm_cli.rb +65 -5
- data/examples/cli_app/temp.log +93 -0
- data/examples/file_loader_usage.rb +177 -0
- data/examples/mcp_client.rb +529 -0
- data/examples/robot_groups/lib/robot_group.rb +419 -0
- data/examples/robot_groups/lib/working_memory_channel.rb +140 -0
- data/examples/robot_groups/multi_process.rb +286 -0
- data/examples/robot_groups/robot_worker.rb +136 -0
- data/examples/robot_groups/same_process.rb +229 -0
- data/examples/timeframe_demo.rb +276 -0
- data/lib/htm/active_record_config.rb +1 -1
- data/lib/htm/circuit_breaker.rb +202 -0
- data/lib/htm/configuration.rb +59 -13
- data/lib/htm/database.rb +67 -36
- data/lib/htm/embedding_service.rb +39 -2
- data/lib/htm/errors.rb +131 -11
- data/lib/htm/jobs/generate_embedding_job.rb +5 -4
- data/lib/htm/jobs/generate_tags_job.rb +4 -0
- data/lib/htm/loaders/markdown_loader.rb +263 -0
- data/lib/htm/loaders/paragraph_chunker.rb +112 -0
- data/lib/htm/long_term_memory.rb +460 -343
- data/lib/htm/models/file_source.rb +99 -0
- data/lib/htm/models/node.rb +80 -5
- data/lib/htm/models/robot.rb +24 -1
- data/lib/htm/models/robot_node.rb +1 -0
- data/lib/htm/models/tag.rb +254 -4
- data/lib/htm/observability.rb +395 -0
- data/lib/htm/tag_service.rb +60 -3
- data/lib/htm/tasks.rb +26 -1
- data/lib/htm/timeframe.rb +194 -0
- data/lib/htm/timeframe_extractor.rb +307 -0
- data/lib/htm/version.rb +1 -1
- data/lib/htm/working_memory.rb +165 -70
- data/lib/htm.rb +328 -130
- data/lib/tasks/doc.rake +300 -0
- data/lib/tasks/files.rake +299 -0
- data/lib/tasks/htm.rake +158 -3
- data/lib/tasks/jobs.rake +3 -9
- data/lib/tasks/tags.rake +166 -6
- data/mkdocs.yml +36 -1
- data/notes/ARCHITECTURE_REVIEW.md +1167 -0
- data/notes/IMPLEMENTATION_SUMMARY.md +606 -0
- data/notes/MULTI_FRAMEWORK_IMPLEMENTATION.md +451 -0
- data/notes/next_steps.md +100 -0
- data/notes/plan.md +627 -0
- data/notes/tag_ontology_enhancement_ideas.md +222 -0
- data/notes/timescaledb_removal_summary.md +200 -0
- metadata +158 -17
- data/db/migrate/20250101000002_create_robots.rb +0 -14
- data/db/migrate/20250101000003_create_nodes.rb +0 -42
- data/db/migrate/20250101000005_create_tags.rb +0 -38
- data/db/migrate/20250101000007_add_node_vector_indexes.rb +0 -30
- data/db/migrate/20250125000001_add_content_hash_to_nodes.rb +0 -14
- data/db/migrate/20250125000002_create_robot_nodes.rb +0 -35
- data/db/migrate/20250125000003_remove_source_and_robot_id_from_nodes.rb +0 -28
- data/db/migrate/20250126000001_create_working_memories.rb +0 -19
- data/db/migrate/20250126000002_remove_unused_columns.rb +0 -12
- data/docs/database/public.working_memories.md +0 -40
- data/docs/database/public.working_memories.svg +0 -112
- data/lib/htm/models/working_memory_entry.rb +0 -88
data/docs/development/schema.md
CHANGED
|
@@ -37,13 +37,13 @@ For detailed table definitions, columns, indexes, and constraints, see the auto-
|
|
|
37
37
|
| [robots](../database/public.robots.md) | Registry of all LLM robots using the HTM system | Stores robot metadata and activity tracking |
|
|
38
38
|
| [nodes](../database/public.nodes.md) | Core memory storage for conversation messages and context | Vector embeddings, full-text search, deduplication |
|
|
39
39
|
| [tags](../database/public.tags.md) | Unique hierarchical tag names for categorization | Colon-separated namespaces (e.g., `ai:llm:embeddings`) |
|
|
40
|
-
|
|
|
40
|
+
| file_sources | Source file metadata for loaded documents | Path, mtime, frontmatter, sync tracking |
|
|
41
41
|
|
|
42
42
|
### Join Tables
|
|
43
43
|
|
|
44
44
|
| Table | Description | Details |
|
|
45
45
|
|-------|-------------|---------|
|
|
46
|
-
| [robot_nodes](../database/public.robot_nodes.md) | Links robots to nodes (many-to-many) | Enables "hive mind" shared memory
|
|
46
|
+
| [robot_nodes](../database/public.robot_nodes.md) | Links robots to nodes (many-to-many) | Enables "hive mind" shared memory; includes `working_memory` boolean for per-robot working memory state |
|
|
47
47
|
| [node_tags](../database/public.node_tags.md) | Links nodes to tags (many-to-many) | Flexible multi-tag categorization |
|
|
48
48
|
|
|
49
49
|
### System Tables
|
|
@@ -65,6 +65,40 @@ Content deduplication is enforced via SHA-256 hashing in the `nodes` table:
|
|
|
65
65
|
3. A new `robot_nodes` association is created (or updated if it already exists)
|
|
66
66
|
4. This ensures identical memories are stored once but can be "remembered" by multiple robots
|
|
67
67
|
|
|
68
|
+
### JSONB Metadata
|
|
69
|
+
|
|
70
|
+
The `nodes` table includes a `metadata` JSONB column for flexible key-value storage:
|
|
71
|
+
|
|
72
|
+
| Column | Type | Default | Description |
|
|
73
|
+
|--------|------|---------|-------------|
|
|
74
|
+
| `metadata` | jsonb | `{}` | Arbitrary key-value data |
|
|
75
|
+
|
|
76
|
+
**Features:**
|
|
77
|
+
- Stores any valid JSON data (strings, numbers, booleans, arrays, objects)
|
|
78
|
+
- GIN index (`idx_nodes_metadata`) for efficient containment queries
|
|
79
|
+
- Queried using PostgreSQL's `@>` containment operator
|
|
80
|
+
|
|
81
|
+
**Query examples:**
|
|
82
|
+
```sql
|
|
83
|
+
-- Find nodes with specific metadata
|
|
84
|
+
SELECT * FROM nodes WHERE metadata @> '{"priority": "high"}'::jsonb;
|
|
85
|
+
|
|
86
|
+
-- Find nodes with nested metadata
|
|
87
|
+
SELECT * FROM nodes WHERE metadata @> '{"user": {"role": "admin"}}'::jsonb;
|
|
88
|
+
|
|
89
|
+
-- Find nodes with multiple conditions
|
|
90
|
+
SELECT * FROM nodes WHERE metadata @> '{"environment": "production", "version": 2}'::jsonb;
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
**Ruby usage:**
|
|
94
|
+
```ruby
|
|
95
|
+
# Store with metadata
|
|
96
|
+
htm.remember("API config", metadata: { environment: "production", version: 2 })
|
|
97
|
+
|
|
98
|
+
# Recall filtering by metadata
|
|
99
|
+
htm.recall("config", metadata: { environment: "production" })
|
|
100
|
+
```
|
|
101
|
+
|
|
68
102
|
### Hierarchical Tags
|
|
69
103
|
|
|
70
104
|
Tags use colon-separated hierarchies for organization:
|
|
@@ -78,6 +112,35 @@ SELECT * FROM tags WHERE name LIKE 'database:%'; -- All database-related tags
|
|
|
78
112
|
SELECT * FROM tags WHERE name LIKE 'ai:llm:%'; -- All LLM-related tags
|
|
79
113
|
```
|
|
80
114
|
|
|
115
|
+
### File Source Tracking
|
|
116
|
+
|
|
117
|
+
The `file_sources` table tracks loaded documents for re-sync support:
|
|
118
|
+
|
|
119
|
+
| Column | Type | Description |
|
|
120
|
+
|--------|------|-------------|
|
|
121
|
+
| `id` | bigint | Primary key |
|
|
122
|
+
| `file_path` | text | Absolute path to the source file |
|
|
123
|
+
| `file_hash` | varchar(64) | SHA-256 hash of file contents |
|
|
124
|
+
| `mtime` | timestamptz | File modification time for change detection |
|
|
125
|
+
| `file_size` | integer | File size in bytes |
|
|
126
|
+
| `frontmatter` | jsonb | Parsed YAML frontmatter metadata |
|
|
127
|
+
| `last_synced_at` | timestamptz | When file was last synced |
|
|
128
|
+
| `created_at` | timestamptz | When source was first loaded |
|
|
129
|
+
| `updated_at` | timestamptz | When source was last updated |
|
|
130
|
+
|
|
131
|
+
Nodes loaded from files have:
|
|
132
|
+
- `source_id` - Foreign key to file_sources (nullable, ON DELETE SET NULL)
|
|
133
|
+
- `chunk_position` - Integer position within the file (0-indexed)
|
|
134
|
+
|
|
135
|
+
Query nodes from a file:
|
|
136
|
+
```sql
|
|
137
|
+
SELECT n.*
|
|
138
|
+
FROM nodes n
|
|
139
|
+
JOIN file_sources fs ON n.source_id = fs.id
|
|
140
|
+
WHERE fs.file_path = '/path/to/file.md'
|
|
141
|
+
ORDER BY n.chunk_position;
|
|
142
|
+
```
|
|
143
|
+
|
|
81
144
|
### Remember Tracking
|
|
82
145
|
|
|
83
146
|
The `robot_nodes` table tracks per-robot remember metadata:
|
|
@@ -278,6 +341,8 @@ The schema is managed through ActiveRecord migrations located in `db/migrate/`:
|
|
|
278
341
|
1. `20250101000001_create_robots.rb` - Creates robots table
|
|
279
342
|
2. `20250101000002_create_nodes.rb` - Creates nodes table with all indexes
|
|
280
343
|
3. `20250101000005_create_tags.rb` - Creates tags and nodes_tags tables
|
|
344
|
+
4. `20251128000002_create_file_sources.rb` - Creates file_sources table for document tracking
|
|
345
|
+
5. `20251128000003_add_source_to_nodes.rb` - Adds source_id and chunk_position to nodes
|
|
281
346
|
|
|
282
347
|
To apply migrations:
|
|
283
348
|
```bash
|
|
@@ -7,7 +7,7 @@ This guide covers everything you need to know about storing information in HTM e
|
|
|
7
7
|
The primary method for adding memories is `remember`:
|
|
8
8
|
|
|
9
9
|
```ruby
|
|
10
|
-
node_id = htm.remember(content, tags: [])
|
|
10
|
+
node_id = htm.remember(content, tags: [], metadata: {})
|
|
11
11
|
```
|
|
12
12
|
|
|
13
13
|
**Parameters:**
|
|
@@ -16,6 +16,7 @@ node_id = htm.remember(content, tags: [])
|
|
|
16
16
|
|-----------|------|---------|-------------|
|
|
17
17
|
| `content` | String | *required* | The information to remember |
|
|
18
18
|
| `tags` | Array\<String\> | `[]` | Manual tags to assign (in addition to auto-extracted tags) |
|
|
19
|
+
| `metadata` | Hash | `{}` | Arbitrary key-value metadata stored as JSONB |
|
|
19
20
|
|
|
20
21
|
The method returns the database ID of the created node.
|
|
21
22
|
|
|
@@ -161,6 +162,86 @@ htm.remember("We're using Redis for session caching with a 24-hour TTL")
|
|
|
161
162
|
# Background job might extract: ["database:redis", "caching:session", "performance"]
|
|
162
163
|
```
|
|
163
164
|
|
|
165
|
+
## Using Metadata
|
|
166
|
+
|
|
167
|
+
Metadata provides flexible key-value storage for arbitrary data that doesn't fit into tags. Unlike tags (which are for hierarchical categorization), metadata is for structured data like version numbers, priorities, source systems, or any custom attributes.
|
|
168
|
+
|
|
169
|
+
### Basic Metadata Usage
|
|
170
|
+
|
|
171
|
+
```ruby
|
|
172
|
+
# Store with metadata
|
|
173
|
+
htm.remember(
|
|
174
|
+
"User prefers dark mode",
|
|
175
|
+
metadata: { category: "preference", priority: "high" }
|
|
176
|
+
)
|
|
177
|
+
|
|
178
|
+
# Multiple metadata fields
|
|
179
|
+
htm.remember(
|
|
180
|
+
"API endpoint changed from /v1 to /v2",
|
|
181
|
+
metadata: {
|
|
182
|
+
category: "migration",
|
|
183
|
+
version: 2,
|
|
184
|
+
breaking_change: true,
|
|
185
|
+
affected_services: ["web", "mobile"]
|
|
186
|
+
}
|
|
187
|
+
)
|
|
188
|
+
```
|
|
189
|
+
|
|
190
|
+
### Metadata vs Tags
|
|
191
|
+
|
|
192
|
+
| Feature | Tags | Metadata |
|
|
193
|
+
|---------|------|----------|
|
|
194
|
+
| Structure | Hierarchical (colon-separated) | Flat key-value pairs |
|
|
195
|
+
| Type | String only | Any JSON type (string, number, boolean, array, object) |
|
|
196
|
+
| Search | Prefix matching (`LIKE 'ai:%'`) | JSONB containment (`@>`) |
|
|
197
|
+
| Purpose | Categorization & navigation | Arbitrary attributes & filtering |
|
|
198
|
+
| Auto-extraction | Yes (via LLM) | No (always explicit) |
|
|
199
|
+
|
|
200
|
+
### Common Metadata Patterns
|
|
201
|
+
|
|
202
|
+
```ruby
|
|
203
|
+
# Version tracking
|
|
204
|
+
htm.remember("API uses OAuth 2.0", metadata: { version: 3, deprecated: false })
|
|
205
|
+
|
|
206
|
+
# Source tracking
|
|
207
|
+
htm.remember("Error rate is 0.1%", metadata: { source: "monitoring", dashboard: "errors" })
|
|
208
|
+
|
|
209
|
+
# Priority/importance
|
|
210
|
+
htm.remember("Deploy to prod on Fridays is forbidden", metadata: { priority: "critical" })
|
|
211
|
+
|
|
212
|
+
# Environment-specific
|
|
213
|
+
htm.remember("Database connection limit is 100", metadata: { environment: "production" })
|
|
214
|
+
|
|
215
|
+
# Combining with tags
|
|
216
|
+
htm.remember(
|
|
217
|
+
"Use connection pooling for better performance",
|
|
218
|
+
tags: ["database:postgresql", "performance"],
|
|
219
|
+
metadata: { priority: "high", reviewed: true, author: "dba-team" }
|
|
220
|
+
)
|
|
221
|
+
```
|
|
222
|
+
|
|
223
|
+
### Querying by Metadata
|
|
224
|
+
|
|
225
|
+
Use the `metadata` parameter in `recall()` to filter by metadata:
|
|
226
|
+
|
|
227
|
+
```ruby
|
|
228
|
+
# Find all high-priority items
|
|
229
|
+
htm.recall("settings", metadata: { priority: "high" })
|
|
230
|
+
|
|
231
|
+
# Find production-specific configurations
|
|
232
|
+
htm.recall("database", metadata: { environment: "production" })
|
|
233
|
+
|
|
234
|
+
# Combine with other filters
|
|
235
|
+
htm.recall(
|
|
236
|
+
"API changes",
|
|
237
|
+
timeframe: "last month",
|
|
238
|
+
metadata: { breaking_change: true },
|
|
239
|
+
strategy: :hybrid
|
|
240
|
+
)
|
|
241
|
+
```
|
|
242
|
+
|
|
243
|
+
Metadata filtering uses PostgreSQL's JSONB containment operator (`@>`), which means the node's metadata must contain all the key-value pairs you specify.
|
|
244
|
+
|
|
164
245
|
## Content Deduplication
|
|
165
246
|
|
|
166
247
|
HTM automatically deduplicates content across all robots using SHA-256 hashing.
|
|
@@ -355,13 +436,14 @@ htm.remember(
|
|
|
355
436
|
"Alice Thompson is a senior software engineer specializing in distributed systems"
|
|
356
437
|
)
|
|
357
438
|
|
|
358
|
-
# Add a preference
|
|
439
|
+
# Add a preference with metadata
|
|
359
440
|
htm.remember(
|
|
360
|
-
"Alice prefers Vim for editing and tmux for terminal management"
|
|
441
|
+
"Alice prefers Vim for editing and tmux for terminal management",
|
|
442
|
+
metadata: { category: "preference", source: "user-interview" }
|
|
361
443
|
)
|
|
362
444
|
|
|
363
|
-
# Add a decision with context
|
|
364
|
-
htm.remember(<<~DECISION, tags: ["architecture", "messaging"])
|
|
445
|
+
# Add a decision with context, tags, and metadata
|
|
446
|
+
htm.remember(<<~DECISION, tags: ["architecture", "messaging"], metadata: { priority: "high", approved: true, version: 1 })
|
|
365
447
|
Decision: Use RabbitMQ for async job processing
|
|
366
448
|
|
|
367
449
|
Rationale:
|
|
@@ -374,8 +456,8 @@ htm.remember(<<~DECISION, tags: ["architecture", "messaging"])
|
|
|
374
456
|
- Kafka (overkill for our scale)
|
|
375
457
|
DECISION
|
|
376
458
|
|
|
377
|
-
# Add implementation code
|
|
378
|
-
htm.remember(<<~RUBY, tags: ["code:ruby", "messaging:rabbitmq"])
|
|
459
|
+
# Add implementation code with metadata
|
|
460
|
+
htm.remember(<<~RUBY, tags: ["code:ruby", "messaging:rabbitmq"], metadata: { language: "ruby", tested: true })
|
|
379
461
|
require 'bunny'
|
|
380
462
|
|
|
381
463
|
connection = Bunny.new(ENV['RABBITMQ_URL'])
|
|
@@ -387,4 +469,8 @@ RUBY
|
|
|
387
469
|
|
|
388
470
|
puts "Added memories with relationships and rich metadata"
|
|
389
471
|
puts "Stats: #{HTM::Models::Node.count} total nodes"
|
|
472
|
+
|
|
473
|
+
# Query by metadata
|
|
474
|
+
high_priority = htm.recall("decisions", metadata: { priority: "high" })
|
|
475
|
+
puts "High priority decisions: #{high_priority.count}"
|
|
390
476
|
```
|
|
@@ -390,9 +390,43 @@ results = htm.recall(topic: "JWT authentication", strategy: :fulltext)
|
|
|
390
390
|
results = htm.recall(topic: "user validation methods", strategy: :vector)
|
|
391
391
|
```
|
|
392
392
|
|
|
393
|
+
## Filtering by Metadata
|
|
394
|
+
|
|
395
|
+
HTM supports metadata filtering directly in the `recall()` method. This is more efficient than post-filtering because the database does the work.
|
|
396
|
+
|
|
397
|
+
```ruby
|
|
398
|
+
# Filter by single metadata field
|
|
399
|
+
memories = htm.recall(
|
|
400
|
+
topic: "user settings",
|
|
401
|
+
metadata: { category: "preference" }
|
|
402
|
+
)
|
|
403
|
+
# => Returns only nodes with metadata containing { category: "preference" }
|
|
404
|
+
|
|
405
|
+
# Filter by multiple metadata fields
|
|
406
|
+
memories = htm.recall(
|
|
407
|
+
topic: "API configuration",
|
|
408
|
+
metadata: { environment: "production", version: 2 }
|
|
409
|
+
)
|
|
410
|
+
# => Returns nodes with BOTH environment: "production" AND version: 2
|
|
411
|
+
|
|
412
|
+
# Combine with other filters
|
|
413
|
+
memories = htm.recall(
|
|
414
|
+
topic: "database changes",
|
|
415
|
+
timeframe: "last month",
|
|
416
|
+
strategy: :hybrid,
|
|
417
|
+
metadata: { breaking_change: true },
|
|
418
|
+
limit: 10
|
|
419
|
+
)
|
|
420
|
+
```
|
|
421
|
+
|
|
422
|
+
Metadata filtering uses PostgreSQL's JSONB containment operator (`@>`), which means:
|
|
423
|
+
- The node's metadata must contain ALL the key-value pairs you specify
|
|
424
|
+
- The node's metadata can have additional fields (they're ignored)
|
|
425
|
+
- Nested objects work: `metadata: { user: { role: "admin" } }` matches `{ user: { role: "admin", name: "..." } }`
|
|
426
|
+
|
|
393
427
|
## Combining Search with Filters
|
|
394
428
|
|
|
395
|
-
While `recall` handles timeframes and
|
|
429
|
+
While `recall` handles timeframes, topics, and metadata, you can filter results further:
|
|
396
430
|
|
|
397
431
|
```ruby
|
|
398
432
|
# Recall memories
|
|
@@ -622,6 +656,7 @@ memory = {
|
|
|
622
656
|
'created_at' => "2024-01-15 10:30:00", # Timestamp
|
|
623
657
|
'robot_id' => "uuid...", # Which robot added it
|
|
624
658
|
'token_count' => 150, # Token count
|
|
659
|
+
'metadata' => { 'priority' => 'high', 'version' => 2 }, # JSONB metadata
|
|
625
660
|
'similarity' => 0.85 # Similarity score (vector/hybrid)
|
|
626
661
|
# or 'rank' for fulltext
|
|
627
662
|
}
|
data/examples/README.md
ADDED
|
@@ -0,0 +1,405 @@
|
|
|
1
|
+
# HTM Examples
|
|
2
|
+
|
|
3
|
+
This directory contains example applications demonstrating various ways to use the HTM (Hierarchical Temporary Memory) gem.
|
|
4
|
+
|
|
5
|
+
## Prerequisites
|
|
6
|
+
|
|
7
|
+
All examples require:
|
|
8
|
+
|
|
9
|
+
1. **PostgreSQL Database** with pgvector extension:
|
|
10
|
+
```bash
|
|
11
|
+
export HTM_DBURL="postgresql://user@localhost:5432/htm_development"
|
|
12
|
+
```
|
|
13
|
+
|
|
14
|
+
2. **Ollama** (recommended for local LLM):
|
|
15
|
+
```bash
|
|
16
|
+
ollama pull nomic-embed-text # For embeddings
|
|
17
|
+
ollama pull gemma3 # For tag extraction
|
|
18
|
+
```
|
|
19
|
+
|
|
20
|
+
3. **Ruby Dependencies**:
|
|
21
|
+
```bash
|
|
22
|
+
bundle install
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
---
|
|
26
|
+
|
|
27
|
+
## Standalone Scripts
|
|
28
|
+
|
|
29
|
+
### basic_usage.rb
|
|
30
|
+
|
|
31
|
+
**Getting started with HTM fundamentals.**
|
|
32
|
+
|
|
33
|
+
Demonstrates the core API: configuring HTM, registering a robot, and using the three primary methods (`remember`, `recall`, `forget`).
|
|
34
|
+
|
|
35
|
+
```bash
|
|
36
|
+
ruby examples/basic_usage.rb
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
**Features:**
|
|
40
|
+
- HTM configuration with Ollama provider
|
|
41
|
+
- Robot initialization
|
|
42
|
+
- Storing memories with `remember()`
|
|
43
|
+
- Retrieving memories with `recall()` using timeframes
|
|
44
|
+
- Understanding async embedding/tag generation
|
|
45
|
+
|
|
46
|
+
---
|
|
47
|
+
|
|
48
|
+
### custom_llm_configuration.rb
|
|
49
|
+
|
|
50
|
+
**Flexible LLM integration patterns.**
|
|
51
|
+
|
|
52
|
+
Shows how to configure HTM with custom embedding and tag generation methods, supporting multiple LLM providers or custom infrastructure.
|
|
53
|
+
|
|
54
|
+
```bash
|
|
55
|
+
ruby examples/custom_llm_configuration.rb
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
**Features:**
|
|
59
|
+
- Default configuration (RubyLLM + Ollama)
|
|
60
|
+
- Custom lambdas for embedding generation
|
|
61
|
+
- Custom lambdas for tag extraction
|
|
62
|
+
- Service object integration pattern
|
|
63
|
+
- Mixed configuration (custom embedding + default tags)
|
|
64
|
+
- Provider settings (OpenAI, Anthropic, Gemini, etc.)
|
|
65
|
+
|
|
66
|
+
---
|
|
67
|
+
|
|
68
|
+
### file_loader_usage.rb
|
|
69
|
+
|
|
70
|
+
**Loading documents into long-term memory.**
|
|
71
|
+
|
|
72
|
+
Demonstrates loading markdown files with automatic paragraph chunking, YAML frontmatter extraction, and source tracking for re-sync.
|
|
73
|
+
|
|
74
|
+
```bash
|
|
75
|
+
ruby examples/file_loader_usage.rb
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
**Features:**
|
|
79
|
+
- Single file loading with `load_file()`
|
|
80
|
+
- Directory loading with glob patterns via `load_directory()`
|
|
81
|
+
- YAML frontmatter extraction (title, author, tags)
|
|
82
|
+
- Querying nodes from a specific file
|
|
83
|
+
- Re-sync behavior (skip unchanged files)
|
|
84
|
+
- Force reload option
|
|
85
|
+
- Unloading files with `unload_file()`
|
|
86
|
+
|
|
87
|
+
---
|
|
88
|
+
|
|
89
|
+
### timeframe_demo.rb
|
|
90
|
+
|
|
91
|
+
**Flexible time-based filtering for recall.**
|
|
92
|
+
|
|
93
|
+
Comprehensive demonstration of all timeframe options supported by `recall()`, including natural language parsing.
|
|
94
|
+
|
|
95
|
+
```bash
|
|
96
|
+
ruby examples/timeframe_demo.rb
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
**Features:**
|
|
100
|
+
- No filter (`nil`)
|
|
101
|
+
- Date/DateTime/Time objects (entire day)
|
|
102
|
+
- Range for precise time windows
|
|
103
|
+
- Natural language strings ("yesterday", "last week", "few days ago")
|
|
104
|
+
- Weekend expressions ("last weekend", "2 weekends ago")
|
|
105
|
+
- Automatic extraction (`:auto`) from query text
|
|
106
|
+
- Multiple time windows (array of ranges)
|
|
107
|
+
|
|
108
|
+
---
|
|
109
|
+
|
|
110
|
+
### mcp_server.rb & mcp_client.rb
|
|
111
|
+
|
|
112
|
+
**Model Context Protocol (MCP) integration for AI assistants.**
|
|
113
|
+
|
|
114
|
+
A pair of examples demonstrating how to expose HTM as an MCP server and connect to it from a chat client. This enables AI assistants like Claude Desktop to use HTM's memory capabilities.
|
|
115
|
+
|
|
116
|
+
#### mcp_server.rb
|
|
117
|
+
|
|
118
|
+
An MCP server that exposes HTM's memory operations as tools:
|
|
119
|
+
|
|
120
|
+
```bash
|
|
121
|
+
ruby examples/mcp_server.rb
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
**Tools exposed:**
|
|
125
|
+
- `SetRobotTool` - Set the robot identity for this session (call first)
|
|
126
|
+
- `GetRobotTool` - Get current robot information
|
|
127
|
+
- `GetWorkingMemoryTool` - Get working memory contents for session restore
|
|
128
|
+
- `RememberTool` - Store information with optional tags and metadata
|
|
129
|
+
- `RecallTool` - Search memories using vector, fulltext, or hybrid strategies
|
|
130
|
+
- `ForgetTool` - Soft-delete a memory (recoverable)
|
|
131
|
+
- `RestoreTool` - Restore a soft-deleted memory
|
|
132
|
+
- `ListTagsTool` - List tags with optional prefix filtering
|
|
133
|
+
- `StatsTool` - Get memory usage statistics
|
|
134
|
+
|
|
135
|
+
**Resources exposed:**
|
|
136
|
+
- `htm://statistics` - Memory statistics as JSON
|
|
137
|
+
- `htm://tags/hierarchy` - Tag hierarchy as text tree
|
|
138
|
+
- `htm://memories/recent` - Last 20 memories
|
|
139
|
+
|
|
140
|
+
**Claude Desktop configuration** (`~/.config/claude/claude_desktop_config.json`):
|
|
141
|
+
```json
|
|
142
|
+
{
|
|
143
|
+
"mcpServers": {
|
|
144
|
+
"htm-memory": {
|
|
145
|
+
"command": "ruby",
|
|
146
|
+
"args": ["/path/to/htm/examples/mcp_server.rb"],
|
|
147
|
+
"env": {
|
|
148
|
+
"HTM_DBURL": "postgresql://user@localhost:5432/htm_development"
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
```
|
|
154
|
+
|
|
155
|
+
#### mcp_client.rb
|
|
156
|
+
|
|
157
|
+
An interactive chat client that connects to the MCP server via STDIO and uses a local Ollama model (gpt-oss) for conversation:
|
|
158
|
+
|
|
159
|
+
```bash
|
|
160
|
+
ruby examples/mcp_client.rb
|
|
161
|
+
```
|
|
162
|
+
|
|
163
|
+
**Features:**
|
|
164
|
+
- Prompts for robot name on startup (or uses `HTM_ROBOT_NAME` env var)
|
|
165
|
+
- Calls `SetRobotTool` to establish robot identity with the server
|
|
166
|
+
- Offers to restore previous session from working memory
|
|
167
|
+
- Connects to `mcp_server.rb` automatically via STDIO transport
|
|
168
|
+
- Interactive chat loop with tool calling
|
|
169
|
+
- LLM decides when to remember/recall information
|
|
170
|
+
- Logs tool calls and results for visibility
|
|
171
|
+
|
|
172
|
+
**Commands:**
|
|
173
|
+
- `/tools` - List available MCP tools
|
|
174
|
+
- `/resources` - List available MCP resources
|
|
175
|
+
- `/clear` - Clear chat history
|
|
176
|
+
- `/help` - Show help
|
|
177
|
+
- `/exit` - Quit
|
|
178
|
+
|
|
179
|
+
**Example startup and conversation:**
|
|
180
|
+
```
|
|
181
|
+
$ ruby examples/mcp_client.rb
|
|
182
|
+
Connecting to HTM MCP server...
|
|
183
|
+
[✓] Connected to HTM MCP server
|
|
184
|
+
[✓] Found 9 tools:
|
|
185
|
+
- SetRobotTool: Set the robot identity for this session...
|
|
186
|
+
- GetRobotTool: Get information about the current robot...
|
|
187
|
+
- GetWorkingMemoryTool: Get all working memory contents...
|
|
188
|
+
...
|
|
189
|
+
|
|
190
|
+
Enter your robot name (or press Enter for default): alice-assistant
|
|
191
|
+
[✓] Robot name: alice-assistant
|
|
192
|
+
Setting robot identity on MCP server...
|
|
193
|
+
[✓] Robot identity set: alice-assistant (id=5, nodes=12)
|
|
194
|
+
|
|
195
|
+
Found 3 memories in working memory from previous session.
|
|
196
|
+
Restore previous session? (y/N): y
|
|
197
|
+
[✓] Will restore 3 memories after chat setup
|
|
198
|
+
|
|
199
|
+
Initializing chat with gpt-oss:latest...
|
|
200
|
+
[✓] Chat initialized with tools attached
|
|
201
|
+
Restoring 3 memories to chat context...
|
|
202
|
+
[✓] Restored 3 memories to chat context
|
|
203
|
+
|
|
204
|
+
======================================================================
|
|
205
|
+
HTM MCP Client - AI Chat with Memory Tools
|
|
206
|
+
======================================================================
|
|
207
|
+
|
|
208
|
+
Robot: alice-assistant
|
|
209
|
+
Model: gpt-oss:latest (via Ollama)
|
|
210
|
+
...
|
|
211
|
+
|
|
212
|
+
you> What's the API rate limit?
|
|
213
|
+
|
|
214
|
+
assistant> The API rate limit is 1000 requests per minute.
|
|
215
|
+
```
|
|
216
|
+
|
|
217
|
+
**Additional dependencies:**
|
|
218
|
+
```bash
|
|
219
|
+
gem install fast-mcp ruby_llm-mcp
|
|
220
|
+
ollama pull gpt-oss # Or your preferred model
|
|
221
|
+
```
|
|
222
|
+
|
|
223
|
+
**Environment Variables:**
|
|
224
|
+
- `HTM_DBURL` - PostgreSQL connection (required)
|
|
225
|
+
- `OLLAMA_URL` - Ollama server URL (default: http://localhost:11434)
|
|
226
|
+
- `OLLAMA_MODEL` - Model to use (default: gpt-oss:latest)
|
|
227
|
+
- `HTM_ROBOT_NAME` - Robot name (optional, prompts if not set)
|
|
228
|
+
|
|
229
|
+
---
|
|
230
|
+
|
|
231
|
+
## Application Examples
|
|
232
|
+
|
|
233
|
+
### example_app/
|
|
234
|
+
|
|
235
|
+
**Full-featured HTM demonstration with RubyLLM integration.**
|
|
236
|
+
|
|
237
|
+
A standalone application showing complete HTM workflow with database connection, Ollama integration, memory operations, and multiple search strategies.
|
|
238
|
+
|
|
239
|
+
```bash
|
|
240
|
+
ruby examples/example_app/app.rb
|
|
241
|
+
```
|
|
242
|
+
|
|
243
|
+
**Features:**
|
|
244
|
+
- Database connection verification
|
|
245
|
+
- RubyLLM configuration for embeddings and tags
|
|
246
|
+
- Async embedding/tag generation with wait
|
|
247
|
+
- Comparison of search strategies (:fulltext, :vector, :hybrid)
|
|
248
|
+
- Detailed output of generated tags and embeddings
|
|
249
|
+
|
|
250
|
+
---
|
|
251
|
+
|
|
252
|
+
### sinatra_app/
|
|
253
|
+
|
|
254
|
+
**Web application with Sidekiq background processing.**
|
|
255
|
+
|
|
256
|
+
A Sinatra-based web application demonstrating HTM in a multi-user web context with async job processing.
|
|
257
|
+
|
|
258
|
+
```bash
|
|
259
|
+
cd examples/sinatra_app
|
|
260
|
+
bundle install
|
|
261
|
+
bundle exec ruby app.rb
|
|
262
|
+
```
|
|
263
|
+
|
|
264
|
+
**Features:**
|
|
265
|
+
- Sidekiq integration for background jobs
|
|
266
|
+
- Session-based robot identification
|
|
267
|
+
- RESTful API endpoints:
|
|
268
|
+
- `POST /api/remember` - Store information
|
|
269
|
+
- `GET /api/recall` - Search memories with timeframe filtering
|
|
270
|
+
- `GET /api/stats` - Memory statistics
|
|
271
|
+
- `GET /api/tags` - Tag tree structure
|
|
272
|
+
- `GET /api/health` - Health check
|
|
273
|
+
- Interactive HTML UI with hybrid search scoring display
|
|
274
|
+
- Tag tree visualization
|
|
275
|
+
|
|
276
|
+
**Environment Variables:**
|
|
277
|
+
- `HTM_DBURL` - PostgreSQL connection (required)
|
|
278
|
+
- `REDIS_URL` - Redis for Sidekiq (default: redis://localhost:6379/0)
|
|
279
|
+
- `SESSION_SECRET` - Session encryption key
|
|
280
|
+
|
|
281
|
+
---
|
|
282
|
+
|
|
283
|
+
### cli_app/
|
|
284
|
+
|
|
285
|
+
**Interactive command-line application.**
|
|
286
|
+
|
|
287
|
+
A REPL-style CLI demonstrating synchronous job execution with the `:inline` backend, ideal for CLI tools and scripts.
|
|
288
|
+
|
|
289
|
+
```bash
|
|
290
|
+
ruby examples/cli_app/htm_cli.rb
|
|
291
|
+
```
|
|
292
|
+
|
|
293
|
+
**Commands:**
|
|
294
|
+
- `remember <text>` - Store information (waits for embedding/tags)
|
|
295
|
+
- `recall <topic>` - Hybrid search with LLM-powered response generation
|
|
296
|
+
- `tags [prefix]` - List all tags with linked node content
|
|
297
|
+
- `stats` - Memory statistics
|
|
298
|
+
- `help` - Show help
|
|
299
|
+
- `exit` - Quit
|
|
300
|
+
|
|
301
|
+
**Features:**
|
|
302
|
+
- Synchronous job execution (`:inline` backend)
|
|
303
|
+
- Real-time progress feedback
|
|
304
|
+
- Tag extraction visibility during search
|
|
305
|
+
- Hybrid search with scoring (similarity, tag_boost, combined)
|
|
306
|
+
- RubyLLM chat integration for context-aware responses
|
|
307
|
+
- Response storage in long-term memory
|
|
308
|
+
|
|
309
|
+
See [cli_app/README.md](cli_app/README.md) for detailed documentation.
|
|
310
|
+
|
|
311
|
+
---
|
|
312
|
+
|
|
313
|
+
### robot_groups/
|
|
314
|
+
|
|
315
|
+
**Multi-robot coordination with shared working memory.**
|
|
316
|
+
|
|
317
|
+
Demonstrates high-availability patterns with shared working memory, failover, and real-time synchronization via PostgreSQL LISTEN/NOTIFY.
|
|
318
|
+
|
|
319
|
+
#### same_process.rb
|
|
320
|
+
|
|
321
|
+
Single-process demonstration of robot groups:
|
|
322
|
+
|
|
323
|
+
```bash
|
|
324
|
+
ruby examples/robot_groups/same_process.rb
|
|
325
|
+
```
|
|
326
|
+
|
|
327
|
+
**Scenarios demonstrated:**
|
|
328
|
+
1. Creating a group with primary + standby robots
|
|
329
|
+
2. Adding shared memories
|
|
330
|
+
3. Verifying synchronization
|
|
331
|
+
4. Simulating failover (primary dies, standby takes over)
|
|
332
|
+
5. Verifying standby has full context
|
|
333
|
+
6. Dynamic scaling (adding new robots)
|
|
334
|
+
7. Collaborative memory (multiple robots adding)
|
|
335
|
+
8. Real-time sync via PostgreSQL LISTEN/NOTIFY
|
|
336
|
+
|
|
337
|
+
#### multi_process.rb
|
|
338
|
+
|
|
339
|
+
Cross-process demonstration with separate Ruby processes:
|
|
340
|
+
|
|
341
|
+
```bash
|
|
342
|
+
ruby examples/robot_groups/multi_process.rb
|
|
343
|
+
```
|
|
344
|
+
|
|
345
|
+
**Scenarios demonstrated:**
|
|
346
|
+
1. Spawning robot worker processes
|
|
347
|
+
2. Cross-process memory sharing
|
|
348
|
+
3. Collaborative memory updates
|
|
349
|
+
4. Failover when a process dies
|
|
350
|
+
5. Dynamic scaling (adding new processes)
|
|
351
|
+
|
|
352
|
+
**Key concepts:**
|
|
353
|
+
- **Shared Working Memory**: Multiple robots share context via `robot_nodes` table
|
|
354
|
+
- **Active/Passive Roles**: Active robots participate; passive robots maintain warm standby
|
|
355
|
+
- **Failover**: Instant takeover with full context already loaded
|
|
356
|
+
- **Real-time Sync**: PostgreSQL LISTEN/NOTIFY for in-memory cache coordination
|
|
357
|
+
|
|
358
|
+
---
|
|
359
|
+
|
|
360
|
+
## Directory Structure
|
|
361
|
+
|
|
362
|
+
```
|
|
363
|
+
examples/
|
|
364
|
+
├── README.md # This file
|
|
365
|
+
├── basic_usage.rb # Core API demonstration
|
|
366
|
+
├── custom_llm_configuration.rb # LLM integration patterns
|
|
367
|
+
├── file_loader_usage.rb # Document loading
|
|
368
|
+
├── timeframe_demo.rb # Time-based filtering
|
|
369
|
+
├── mcp_server.rb # MCP server exposing HTM tools
|
|
370
|
+
├── mcp_client.rb # MCP client with chat interface
|
|
371
|
+
├── example_app/
|
|
372
|
+
│ ├── app.rb # Full-featured demo app
|
|
373
|
+
│ └── Rakefile
|
|
374
|
+
├── sinatra_app/
|
|
375
|
+
│ ├── app.rb # Sinatra web application
|
|
376
|
+
│ ├── Gemfile
|
|
377
|
+
│ └── Gemfile.lock
|
|
378
|
+
├── cli_app/
|
|
379
|
+
│ ├── htm_cli.rb # Interactive CLI
|
|
380
|
+
│ └── README.md # Detailed CLI documentation
|
|
381
|
+
└── robot_groups/
|
|
382
|
+
├── same_process.rb # Single-process robot groups
|
|
383
|
+
├── multi_process.rb # Multi-process coordination
|
|
384
|
+
├── robot_worker.rb # Worker process for multi_process.rb
|
|
385
|
+
└── lib/
|
|
386
|
+
├── robot_group.rb # RobotGroup coordination class
|
|
387
|
+
└── working_memory_channel.rb # PostgreSQL pub/sub
|
|
388
|
+
```
|
|
389
|
+
|
|
390
|
+
---
|
|
391
|
+
|
|
392
|
+
## Choosing the Right Example
|
|
393
|
+
|
|
394
|
+
| Use Case | Example |
|
|
395
|
+
|----------|---------|
|
|
396
|
+
| Learning HTM basics | `basic_usage.rb` |
|
|
397
|
+
| Custom LLM integration | `custom_llm_configuration.rb` |
|
|
398
|
+
| Loading documents/files | `file_loader_usage.rb` |
|
|
399
|
+
| Time-based queries | `timeframe_demo.rb` |
|
|
400
|
+
| MCP server for AI assistants | `mcp_server.rb` |
|
|
401
|
+
| MCP client with chat interface | `mcp_client.rb` |
|
|
402
|
+
| Web application | `sinatra_app/` |
|
|
403
|
+
| CLI tool | `cli_app/` |
|
|
404
|
+
| Multi-robot coordination | `robot_groups/` |
|
|
405
|
+
| High availability | `robot_groups/` |
|