htm 0.0.2 → 0.0.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.aigcm_msg +1 -0
- data/.architecture/reviews/comprehensive-codebase-review.md +577 -0
- data/.claude/settings.local.json +92 -0
- data/.irbrc +283 -80
- data/.tbls.yml +2 -1
- data/CHANGELOG.md +294 -26
- data/CLAUDE.md +603 -0
- data/README.md +76 -5
- data/Rakefile +5 -0
- data/db/migrate/{20250101000001_enable_extensions.rb → 00001_enable_extensions.rb} +0 -1
- data/db/migrate/00002_create_robots.rb +11 -0
- data/db/migrate/00003_create_file_sources.rb +20 -0
- data/db/migrate/00004_create_nodes.rb +65 -0
- data/db/migrate/00005_create_tags.rb +13 -0
- data/db/migrate/00006_create_node_tags.rb +18 -0
- data/db/migrate/00007_create_robot_nodes.rb +26 -0
- data/db/migrate/00009_add_working_memory_to_robot_nodes.rb +12 -0
- data/db/schema.sql +172 -1
- data/docs/api/database.md +1 -2
- data/docs/api/htm.md +197 -2
- data/docs/api/yard/HTM/ActiveRecordConfig.md +23 -0
- data/docs/api/yard/HTM/AuthorizationError.md +11 -0
- data/docs/api/yard/HTM/CircuitBreaker.md +92 -0
- data/docs/api/yard/HTM/CircuitBreakerOpenError.md +34 -0
- data/docs/api/yard/HTM/Configuration.md +175 -0
- data/docs/api/yard/HTM/Database.md +99 -0
- data/docs/api/yard/HTM/DatabaseError.md +14 -0
- data/docs/api/yard/HTM/EmbeddingError.md +18 -0
- data/docs/api/yard/HTM/EmbeddingService.md +58 -0
- data/docs/api/yard/HTM/Error.md +11 -0
- data/docs/api/yard/HTM/JobAdapter.md +39 -0
- data/docs/api/yard/HTM/LongTermMemory.md +342 -0
- data/docs/api/yard/HTM/NotFoundError.md +17 -0
- data/docs/api/yard/HTM/Observability.md +107 -0
- data/docs/api/yard/HTM/QueryTimeoutError.md +19 -0
- data/docs/api/yard/HTM/Railtie.md +27 -0
- data/docs/api/yard/HTM/ResourceExhaustedError.md +13 -0
- data/docs/api/yard/HTM/TagError.md +18 -0
- data/docs/api/yard/HTM/TagService.md +67 -0
- data/docs/api/yard/HTM/Timeframe/Result.md +24 -0
- data/docs/api/yard/HTM/Timeframe.md +40 -0
- data/docs/api/yard/HTM/TimeframeExtractor/Result.md +24 -0
- data/docs/api/yard/HTM/TimeframeExtractor.md +45 -0
- data/docs/api/yard/HTM/ValidationError.md +20 -0
- data/docs/api/yard/HTM/WorkingMemory.md +131 -0
- data/docs/api/yard/HTM.md +80 -0
- data/docs/api/yard/index.csv +179 -0
- data/docs/api/yard-reference.md +51 -0
- data/docs/database/README.md +128 -128
- data/docs/database/public.file_sources.md +42 -0
- data/docs/database/public.file_sources.svg +211 -0
- data/docs/database/public.node_tags.md +4 -4
- data/docs/database/public.node_tags.svg +212 -79
- data/docs/database/public.nodes.md +22 -12
- data/docs/database/public.nodes.svg +246 -127
- data/docs/database/public.robot_nodes.md +11 -9
- data/docs/database/public.robot_nodes.svg +220 -98
- data/docs/database/public.robots.md +2 -2
- data/docs/database/public.robots.svg +136 -81
- data/docs/database/public.tags.md +3 -3
- data/docs/database/public.tags.svg +118 -39
- data/docs/database/schema.json +850 -771
- data/docs/database/schema.svg +256 -197
- data/docs/development/schema.md +67 -2
- data/docs/guides/adding-memories.md +93 -7
- data/docs/guides/recalling-memories.md +36 -1
- data/examples/README.md +280 -0
- data/examples/cli_app/htm_cli.rb +65 -5
- data/examples/cli_app/temp.log +93 -0
- data/examples/file_loader_usage.rb +177 -0
- data/examples/robot_groups/lib/robot_group.rb +419 -0
- data/examples/robot_groups/lib/working_memory_channel.rb +140 -0
- data/examples/robot_groups/multi_process.rb +286 -0
- data/examples/robot_groups/robot_worker.rb +136 -0
- data/examples/robot_groups/same_process.rb +229 -0
- data/examples/timeframe_demo.rb +276 -0
- data/lib/htm/active_record_config.rb +1 -1
- data/lib/htm/circuit_breaker.rb +202 -0
- data/lib/htm/configuration.rb +59 -13
- data/lib/htm/database.rb +67 -36
- data/lib/htm/embedding_service.rb +39 -2
- data/lib/htm/errors.rb +131 -11
- data/lib/htm/jobs/generate_embedding_job.rb +5 -4
- data/lib/htm/jobs/generate_tags_job.rb +4 -0
- data/lib/htm/loaders/markdown_loader.rb +263 -0
- data/lib/htm/loaders/paragraph_chunker.rb +112 -0
- data/lib/htm/long_term_memory.rb +460 -343
- data/lib/htm/models/file_source.rb +99 -0
- data/lib/htm/models/node.rb +80 -5
- data/lib/htm/models/robot.rb +24 -1
- data/lib/htm/models/robot_node.rb +1 -0
- data/lib/htm/models/tag.rb +254 -4
- data/lib/htm/observability.rb +395 -0
- data/lib/htm/tag_service.rb +60 -3
- data/lib/htm/tasks.rb +26 -1
- data/lib/htm/timeframe.rb +194 -0
- data/lib/htm/timeframe_extractor.rb +307 -0
- data/lib/htm/version.rb +1 -1
- data/lib/htm/working_memory.rb +165 -70
- data/lib/htm.rb +328 -130
- data/lib/tasks/doc.rake +300 -0
- data/lib/tasks/files.rake +299 -0
- data/lib/tasks/htm.rake +158 -3
- data/lib/tasks/jobs.rake +3 -9
- data/lib/tasks/tags.rake +166 -6
- data/mkdocs.yml +36 -1
- data/notes/ARCHITECTURE_REVIEW.md +1167 -0
- data/notes/IMPLEMENTATION_SUMMARY.md +606 -0
- data/notes/MULTI_FRAMEWORK_IMPLEMENTATION.md +451 -0
- data/notes/next_steps.md +100 -0
- data/notes/plan.md +627 -0
- data/notes/tag_ontology_enhancement_ideas.md +222 -0
- data/notes/timescaledb_removal_summary.md +200 -0
- metadata +125 -15
- data/db/migrate/20250101000002_create_robots.rb +0 -14
- data/db/migrate/20250101000003_create_nodes.rb +0 -42
- data/db/migrate/20250101000005_create_tags.rb +0 -38
- data/db/migrate/20250101000007_add_node_vector_indexes.rb +0 -30
- data/db/migrate/20250125000001_add_content_hash_to_nodes.rb +0 -14
- data/db/migrate/20250125000002_create_robot_nodes.rb +0 -35
- data/db/migrate/20250125000003_remove_source_and_robot_id_from_nodes.rb +0 -28
- data/db/migrate/20250126000001_create_working_memories.rb +0 -19
- data/db/migrate/20250126000002_remove_unused_columns.rb +0 -12
- data/docs/database/public.working_memories.md +0 -40
- data/docs/database/public.working_memories.svg +0 -112
- data/lib/htm/models/working_memory_entry.rb +0 -88
data/docs/development/schema.md
CHANGED
|
@@ -37,13 +37,13 @@ For detailed table definitions, columns, indexes, and constraints, see the auto-
|
|
|
37
37
|
| [robots](../database/public.robots.md) | Registry of all LLM robots using the HTM system | Stores robot metadata and activity tracking |
|
|
38
38
|
| [nodes](../database/public.nodes.md) | Core memory storage for conversation messages and context | Vector embeddings, full-text search, deduplication |
|
|
39
39
|
| [tags](../database/public.tags.md) | Unique hierarchical tag names for categorization | Colon-separated namespaces (e.g., `ai:llm:embeddings`) |
|
|
40
|
-
|
|
|
40
|
+
| file_sources | Source file metadata for loaded documents | Path, mtime, frontmatter, sync tracking |
|
|
41
41
|
|
|
42
42
|
### Join Tables
|
|
43
43
|
|
|
44
44
|
| Table | Description | Details |
|
|
45
45
|
|-------|-------------|---------|
|
|
46
|
-
| [robot_nodes](../database/public.robot_nodes.md) | Links robots to nodes (many-to-many) | Enables "hive mind" shared memory
|
|
46
|
+
| [robot_nodes](../database/public.robot_nodes.md) | Links robots to nodes (many-to-many) | Enables "hive mind" shared memory; includes `working_memory` boolean for per-robot working memory state |
|
|
47
47
|
| [node_tags](../database/public.node_tags.md) | Links nodes to tags (many-to-many) | Flexible multi-tag categorization |
|
|
48
48
|
|
|
49
49
|
### System Tables
|
|
@@ -65,6 +65,40 @@ Content deduplication is enforced via SHA-256 hashing in the `nodes` table:
|
|
|
65
65
|
3. A new `robot_nodes` association is created (or updated if it already exists)
|
|
66
66
|
4. This ensures identical memories are stored once but can be "remembered" by multiple robots
|
|
67
67
|
|
|
68
|
+
### JSONB Metadata
|
|
69
|
+
|
|
70
|
+
The `nodes` table includes a `metadata` JSONB column for flexible key-value storage:
|
|
71
|
+
|
|
72
|
+
| Column | Type | Default | Description |
|
|
73
|
+
|--------|------|---------|-------------|
|
|
74
|
+
| `metadata` | jsonb | `{}` | Arbitrary key-value data |
|
|
75
|
+
|
|
76
|
+
**Features:**
|
|
77
|
+
- Stores any valid JSON data (strings, numbers, booleans, arrays, objects)
|
|
78
|
+
- GIN index (`idx_nodes_metadata`) for efficient containment queries
|
|
79
|
+
- Queried using PostgreSQL's `@>` containment operator
|
|
80
|
+
|
|
81
|
+
**Query examples:**
|
|
82
|
+
```sql
|
|
83
|
+
-- Find nodes with specific metadata
|
|
84
|
+
SELECT * FROM nodes WHERE metadata @> '{"priority": "high"}'::jsonb;
|
|
85
|
+
|
|
86
|
+
-- Find nodes with nested metadata
|
|
87
|
+
SELECT * FROM nodes WHERE metadata @> '{"user": {"role": "admin"}}'::jsonb;
|
|
88
|
+
|
|
89
|
+
-- Find nodes with multiple conditions
|
|
90
|
+
SELECT * FROM nodes WHERE metadata @> '{"environment": "production", "version": 2}'::jsonb;
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
**Ruby usage:**
|
|
94
|
+
```ruby
|
|
95
|
+
# Store with metadata
|
|
96
|
+
htm.remember("API config", metadata: { environment: "production", version: 2 })
|
|
97
|
+
|
|
98
|
+
# Recall filtering by metadata
|
|
99
|
+
htm.recall("config", metadata: { environment: "production" })
|
|
100
|
+
```
|
|
101
|
+
|
|
68
102
|
### Hierarchical Tags
|
|
69
103
|
|
|
70
104
|
Tags use colon-separated hierarchies for organization:
|
|
@@ -78,6 +112,35 @@ SELECT * FROM tags WHERE name LIKE 'database:%'; -- All database-related tags
|
|
|
78
112
|
SELECT * FROM tags WHERE name LIKE 'ai:llm:%'; -- All LLM-related tags
|
|
79
113
|
```
|
|
80
114
|
|
|
115
|
+
### File Source Tracking
|
|
116
|
+
|
|
117
|
+
The `file_sources` table tracks loaded documents for re-sync support:
|
|
118
|
+
|
|
119
|
+
| Column | Type | Description |
|
|
120
|
+
|--------|------|-------------|
|
|
121
|
+
| `id` | bigint | Primary key |
|
|
122
|
+
| `file_path` | text | Absolute path to the source file |
|
|
123
|
+
| `file_hash` | varchar(64) | SHA-256 hash of file contents |
|
|
124
|
+
| `mtime` | timestamptz | File modification time for change detection |
|
|
125
|
+
| `file_size` | integer | File size in bytes |
|
|
126
|
+
| `frontmatter` | jsonb | Parsed YAML frontmatter metadata |
|
|
127
|
+
| `last_synced_at` | timestamptz | When file was last synced |
|
|
128
|
+
| `created_at` | timestamptz | When source was first loaded |
|
|
129
|
+
| `updated_at` | timestamptz | When source was last updated |
|
|
130
|
+
|
|
131
|
+
Nodes loaded from files have:
|
|
132
|
+
- `source_id` - Foreign key to file_sources (nullable, ON DELETE SET NULL)
|
|
133
|
+
- `chunk_position` - Integer position within the file (0-indexed)
|
|
134
|
+
|
|
135
|
+
Query nodes from a file:
|
|
136
|
+
```sql
|
|
137
|
+
SELECT n.*
|
|
138
|
+
FROM nodes n
|
|
139
|
+
JOIN file_sources fs ON n.source_id = fs.id
|
|
140
|
+
WHERE fs.file_path = '/path/to/file.md'
|
|
141
|
+
ORDER BY n.chunk_position;
|
|
142
|
+
```
|
|
143
|
+
|
|
81
144
|
### Remember Tracking
|
|
82
145
|
|
|
83
146
|
The `robot_nodes` table tracks per-robot remember metadata:
|
|
@@ -278,6 +341,8 @@ The schema is managed through ActiveRecord migrations located in `db/migrate/`:
|
|
|
278
341
|
1. `20250101000001_create_robots.rb` - Creates robots table
|
|
279
342
|
2. `20250101000002_create_nodes.rb` - Creates nodes table with all indexes
|
|
280
343
|
3. `20250101000005_create_tags.rb` - Creates tags and nodes_tags tables
|
|
344
|
+
4. `20251128000002_create_file_sources.rb` - Creates file_sources table for document tracking
|
|
345
|
+
5. `20251128000003_add_source_to_nodes.rb` - Adds source_id and chunk_position to nodes
|
|
281
346
|
|
|
282
347
|
To apply migrations:
|
|
283
348
|
```bash
|
|
@@ -7,7 +7,7 @@ This guide covers everything you need to know about storing information in HTM e
|
|
|
7
7
|
The primary method for adding memories is `remember`:
|
|
8
8
|
|
|
9
9
|
```ruby
|
|
10
|
-
node_id = htm.remember(content, tags: [])
|
|
10
|
+
node_id = htm.remember(content, tags: [], metadata: {})
|
|
11
11
|
```
|
|
12
12
|
|
|
13
13
|
**Parameters:**
|
|
@@ -16,6 +16,7 @@ node_id = htm.remember(content, tags: [])
|
|
|
16
16
|
|-----------|------|---------|-------------|
|
|
17
17
|
| `content` | String | *required* | The information to remember |
|
|
18
18
|
| `tags` | Array\<String\> | `[]` | Manual tags to assign (in addition to auto-extracted tags) |
|
|
19
|
+
| `metadata` | Hash | `{}` | Arbitrary key-value metadata stored as JSONB |
|
|
19
20
|
|
|
20
21
|
The method returns the database ID of the created node.
|
|
21
22
|
|
|
@@ -161,6 +162,86 @@ htm.remember("We're using Redis for session caching with a 24-hour TTL")
|
|
|
161
162
|
# Background job might extract: ["database:redis", "caching:session", "performance"]
|
|
162
163
|
```
|
|
163
164
|
|
|
165
|
+
## Using Metadata
|
|
166
|
+
|
|
167
|
+
Metadata provides flexible key-value storage for arbitrary data that doesn't fit into tags. Unlike tags (which are for hierarchical categorization), metadata is for structured data like version numbers, priorities, source systems, or any custom attributes.
|
|
168
|
+
|
|
169
|
+
### Basic Metadata Usage
|
|
170
|
+
|
|
171
|
+
```ruby
|
|
172
|
+
# Store with metadata
|
|
173
|
+
htm.remember(
|
|
174
|
+
"User prefers dark mode",
|
|
175
|
+
metadata: { category: "preference", priority: "high" }
|
|
176
|
+
)
|
|
177
|
+
|
|
178
|
+
# Multiple metadata fields
|
|
179
|
+
htm.remember(
|
|
180
|
+
"API endpoint changed from /v1 to /v2",
|
|
181
|
+
metadata: {
|
|
182
|
+
category: "migration",
|
|
183
|
+
version: 2,
|
|
184
|
+
breaking_change: true,
|
|
185
|
+
affected_services: ["web", "mobile"]
|
|
186
|
+
}
|
|
187
|
+
)
|
|
188
|
+
```
|
|
189
|
+
|
|
190
|
+
### Metadata vs Tags
|
|
191
|
+
|
|
192
|
+
| Feature | Tags | Metadata |
|
|
193
|
+
|---------|------|----------|
|
|
194
|
+
| Structure | Hierarchical (colon-separated) | Flat key-value pairs |
|
|
195
|
+
| Type | String only | Any JSON type (string, number, boolean, array, object) |
|
|
196
|
+
| Search | Prefix matching (`LIKE 'ai:%'`) | JSONB containment (`@>`) |
|
|
197
|
+
| Purpose | Categorization & navigation | Arbitrary attributes & filtering |
|
|
198
|
+
| Auto-extraction | Yes (via LLM) | No (always explicit) |
|
|
199
|
+
|
|
200
|
+
### Common Metadata Patterns
|
|
201
|
+
|
|
202
|
+
```ruby
|
|
203
|
+
# Version tracking
|
|
204
|
+
htm.remember("API uses OAuth 2.0", metadata: { version: 3, deprecated: false })
|
|
205
|
+
|
|
206
|
+
# Source tracking
|
|
207
|
+
htm.remember("Error rate is 0.1%", metadata: { source: "monitoring", dashboard: "errors" })
|
|
208
|
+
|
|
209
|
+
# Priority/importance
|
|
210
|
+
htm.remember("Deploy to prod on Fridays is forbidden", metadata: { priority: "critical" })
|
|
211
|
+
|
|
212
|
+
# Environment-specific
|
|
213
|
+
htm.remember("Database connection limit is 100", metadata: { environment: "production" })
|
|
214
|
+
|
|
215
|
+
# Combining with tags
|
|
216
|
+
htm.remember(
|
|
217
|
+
"Use connection pooling for better performance",
|
|
218
|
+
tags: ["database:postgresql", "performance"],
|
|
219
|
+
metadata: { priority: "high", reviewed: true, author: "dba-team" }
|
|
220
|
+
)
|
|
221
|
+
```
|
|
222
|
+
|
|
223
|
+
### Querying by Metadata
|
|
224
|
+
|
|
225
|
+
Use the `metadata` parameter in `recall()` to filter by metadata:
|
|
226
|
+
|
|
227
|
+
```ruby
|
|
228
|
+
# Find all high-priority items
|
|
229
|
+
htm.recall("settings", metadata: { priority: "high" })
|
|
230
|
+
|
|
231
|
+
# Find production-specific configurations
|
|
232
|
+
htm.recall("database", metadata: { environment: "production" })
|
|
233
|
+
|
|
234
|
+
# Combine with other filters
|
|
235
|
+
htm.recall(
|
|
236
|
+
"API changes",
|
|
237
|
+
timeframe: "last month",
|
|
238
|
+
metadata: { breaking_change: true },
|
|
239
|
+
strategy: :hybrid
|
|
240
|
+
)
|
|
241
|
+
```
|
|
242
|
+
|
|
243
|
+
Metadata filtering uses PostgreSQL's JSONB containment operator (`@>`), which means the node's metadata must contain all the key-value pairs you specify.
|
|
244
|
+
|
|
164
245
|
## Content Deduplication
|
|
165
246
|
|
|
166
247
|
HTM automatically deduplicates content across all robots using SHA-256 hashing.
|
|
@@ -355,13 +436,14 @@ htm.remember(
|
|
|
355
436
|
"Alice Thompson is a senior software engineer specializing in distributed systems"
|
|
356
437
|
)
|
|
357
438
|
|
|
358
|
-
# Add a preference
|
|
439
|
+
# Add a preference with metadata
|
|
359
440
|
htm.remember(
|
|
360
|
-
"Alice prefers Vim for editing and tmux for terminal management"
|
|
441
|
+
"Alice prefers Vim for editing and tmux for terminal management",
|
|
442
|
+
metadata: { category: "preference", source: "user-interview" }
|
|
361
443
|
)
|
|
362
444
|
|
|
363
|
-
# Add a decision with context
|
|
364
|
-
htm.remember(<<~DECISION, tags: ["architecture", "messaging"])
|
|
445
|
+
# Add a decision with context, tags, and metadata
|
|
446
|
+
htm.remember(<<~DECISION, tags: ["architecture", "messaging"], metadata: { priority: "high", approved: true, version: 1 })
|
|
365
447
|
Decision: Use RabbitMQ for async job processing
|
|
366
448
|
|
|
367
449
|
Rationale:
|
|
@@ -374,8 +456,8 @@ htm.remember(<<~DECISION, tags: ["architecture", "messaging"])
|
|
|
374
456
|
- Kafka (overkill for our scale)
|
|
375
457
|
DECISION
|
|
376
458
|
|
|
377
|
-
# Add implementation code
|
|
378
|
-
htm.remember(<<~RUBY, tags: ["code:ruby", "messaging:rabbitmq"])
|
|
459
|
+
# Add implementation code with metadata
|
|
460
|
+
htm.remember(<<~RUBY, tags: ["code:ruby", "messaging:rabbitmq"], metadata: { language: "ruby", tested: true })
|
|
379
461
|
require 'bunny'
|
|
380
462
|
|
|
381
463
|
connection = Bunny.new(ENV['RABBITMQ_URL'])
|
|
@@ -387,4 +469,8 @@ RUBY
|
|
|
387
469
|
|
|
388
470
|
puts "Added memories with relationships and rich metadata"
|
|
389
471
|
puts "Stats: #{HTM::Models::Node.count} total nodes"
|
|
472
|
+
|
|
473
|
+
# Query by metadata
|
|
474
|
+
high_priority = htm.recall("decisions", metadata: { priority: "high" })
|
|
475
|
+
puts "High priority decisions: #{high_priority.count}"
|
|
390
476
|
```
|
|
@@ -390,9 +390,43 @@ results = htm.recall(topic: "JWT authentication", strategy: :fulltext)
|
|
|
390
390
|
results = htm.recall(topic: "user validation methods", strategy: :vector)
|
|
391
391
|
```
|
|
392
392
|
|
|
393
|
+
## Filtering by Metadata
|
|
394
|
+
|
|
395
|
+
HTM supports metadata filtering directly in the `recall()` method. This is more efficient than post-filtering because the database does the work.
|
|
396
|
+
|
|
397
|
+
```ruby
|
|
398
|
+
# Filter by single metadata field
|
|
399
|
+
memories = htm.recall(
|
|
400
|
+
topic: "user settings",
|
|
401
|
+
metadata: { category: "preference" }
|
|
402
|
+
)
|
|
403
|
+
# => Returns only nodes with metadata containing { category: "preference" }
|
|
404
|
+
|
|
405
|
+
# Filter by multiple metadata fields
|
|
406
|
+
memories = htm.recall(
|
|
407
|
+
topic: "API configuration",
|
|
408
|
+
metadata: { environment: "production", version: 2 }
|
|
409
|
+
)
|
|
410
|
+
# => Returns nodes with BOTH environment: "production" AND version: 2
|
|
411
|
+
|
|
412
|
+
# Combine with other filters
|
|
413
|
+
memories = htm.recall(
|
|
414
|
+
topic: "database changes",
|
|
415
|
+
timeframe: "last month",
|
|
416
|
+
strategy: :hybrid,
|
|
417
|
+
metadata: { breaking_change: true },
|
|
418
|
+
limit: 10
|
|
419
|
+
)
|
|
420
|
+
```
|
|
421
|
+
|
|
422
|
+
Metadata filtering uses PostgreSQL's JSONB containment operator (`@>`), which means:
|
|
423
|
+
- The node's metadata must contain ALL the key-value pairs you specify
|
|
424
|
+
- The node's metadata can have additional fields (they're ignored)
|
|
425
|
+
- Nested objects work: `metadata: { user: { role: "admin" } }` matches `{ user: { role: "admin", name: "..." } }`
|
|
426
|
+
|
|
393
427
|
## Combining Search with Filters
|
|
394
428
|
|
|
395
|
-
While `recall` handles timeframes and
|
|
429
|
+
While `recall` handles timeframes, topics, and metadata, you can filter results further:
|
|
396
430
|
|
|
397
431
|
```ruby
|
|
398
432
|
# Recall memories
|
|
@@ -622,6 +656,7 @@ memory = {
|
|
|
622
656
|
'created_at' => "2024-01-15 10:30:00", # Timestamp
|
|
623
657
|
'robot_id' => "uuid...", # Which robot added it
|
|
624
658
|
'token_count' => 150, # Token count
|
|
659
|
+
'metadata' => { 'priority' => 'high', 'version' => 2 }, # JSONB metadata
|
|
625
660
|
'similarity' => 0.85 # Similarity score (vector/hybrid)
|
|
626
661
|
# or 'rank' for fulltext
|
|
627
662
|
}
|
data/examples/README.md
ADDED
|
@@ -0,0 +1,280 @@
|
|
|
1
|
+
# HTM Examples
|
|
2
|
+
|
|
3
|
+
This directory contains example applications demonstrating various ways to use the HTM (Hierarchical Temporary Memory) gem.
|
|
4
|
+
|
|
5
|
+
## Prerequisites
|
|
6
|
+
|
|
7
|
+
All examples require:
|
|
8
|
+
|
|
9
|
+
1. **PostgreSQL Database** with pgvector extension:
|
|
10
|
+
```bash
|
|
11
|
+
export HTM_DBURL="postgresql://user@localhost:5432/htm_development"
|
|
12
|
+
```
|
|
13
|
+
|
|
14
|
+
2. **Ollama** (recommended for local LLM):
|
|
15
|
+
```bash
|
|
16
|
+
ollama pull nomic-embed-text # For embeddings
|
|
17
|
+
ollama pull gemma3 # For tag extraction
|
|
18
|
+
```
|
|
19
|
+
|
|
20
|
+
3. **Ruby Dependencies**:
|
|
21
|
+
```bash
|
|
22
|
+
bundle install
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
---
|
|
26
|
+
|
|
27
|
+
## Standalone Scripts
|
|
28
|
+
|
|
29
|
+
### basic_usage.rb
|
|
30
|
+
|
|
31
|
+
**Getting started with HTM fundamentals.**
|
|
32
|
+
|
|
33
|
+
Demonstrates the core API: configuring HTM, registering a robot, and using the three primary methods (`remember`, `recall`, `forget`).
|
|
34
|
+
|
|
35
|
+
```bash
|
|
36
|
+
ruby examples/basic_usage.rb
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
**Features:**
|
|
40
|
+
- HTM configuration with Ollama provider
|
|
41
|
+
- Robot initialization
|
|
42
|
+
- Storing memories with `remember()`
|
|
43
|
+
- Retrieving memories with `recall()` using timeframes
|
|
44
|
+
- Understanding async embedding/tag generation
|
|
45
|
+
|
|
46
|
+
---
|
|
47
|
+
|
|
48
|
+
### custom_llm_configuration.rb
|
|
49
|
+
|
|
50
|
+
**Flexible LLM integration patterns.**
|
|
51
|
+
|
|
52
|
+
Shows how to configure HTM with custom embedding and tag generation methods, supporting multiple LLM providers or custom infrastructure.
|
|
53
|
+
|
|
54
|
+
```bash
|
|
55
|
+
ruby examples/custom_llm_configuration.rb
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
**Features:**
|
|
59
|
+
- Default configuration (RubyLLM + Ollama)
|
|
60
|
+
- Custom lambdas for embedding generation
|
|
61
|
+
- Custom lambdas for tag extraction
|
|
62
|
+
- Service object integration pattern
|
|
63
|
+
- Mixed configuration (custom embedding + default tags)
|
|
64
|
+
- Provider settings (OpenAI, Anthropic, Gemini, etc.)
|
|
65
|
+
|
|
66
|
+
---
|
|
67
|
+
|
|
68
|
+
### file_loader_usage.rb
|
|
69
|
+
|
|
70
|
+
**Loading documents into long-term memory.**
|
|
71
|
+
|
|
72
|
+
Demonstrates loading markdown files with automatic paragraph chunking, YAML frontmatter extraction, and source tracking for re-sync.
|
|
73
|
+
|
|
74
|
+
```bash
|
|
75
|
+
ruby examples/file_loader_usage.rb
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
**Features:**
|
|
79
|
+
- Single file loading with `load_file()`
|
|
80
|
+
- Directory loading with glob patterns via `load_directory()`
|
|
81
|
+
- YAML frontmatter extraction (title, author, tags)
|
|
82
|
+
- Querying nodes from a specific file
|
|
83
|
+
- Re-sync behavior (skip unchanged files)
|
|
84
|
+
- Force reload option
|
|
85
|
+
- Unloading files with `unload_file()`
|
|
86
|
+
|
|
87
|
+
---
|
|
88
|
+
|
|
89
|
+
### timeframe_demo.rb
|
|
90
|
+
|
|
91
|
+
**Flexible time-based filtering for recall.**
|
|
92
|
+
|
|
93
|
+
Comprehensive demonstration of all timeframe options supported by `recall()`, including natural language parsing.
|
|
94
|
+
|
|
95
|
+
```bash
|
|
96
|
+
ruby examples/timeframe_demo.rb
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
**Features:**
|
|
100
|
+
- No filter (`nil`)
|
|
101
|
+
- Date/DateTime/Time objects (entire day)
|
|
102
|
+
- Range for precise time windows
|
|
103
|
+
- Natural language strings ("yesterday", "last week", "few days ago")
|
|
104
|
+
- Weekend expressions ("last weekend", "2 weekends ago")
|
|
105
|
+
- Automatic extraction (`:auto`) from query text
|
|
106
|
+
- Multiple time windows (array of ranges)
|
|
107
|
+
|
|
108
|
+
---
|
|
109
|
+
|
|
110
|
+
## Application Examples
|
|
111
|
+
|
|
112
|
+
### example_app/
|
|
113
|
+
|
|
114
|
+
**Full-featured HTM demonstration with RubyLLM integration.**
|
|
115
|
+
|
|
116
|
+
A standalone application showing complete HTM workflow with database connection, Ollama integration, memory operations, and multiple search strategies.
|
|
117
|
+
|
|
118
|
+
```bash
|
|
119
|
+
ruby examples/example_app/app.rb
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
**Features:**
|
|
123
|
+
- Database connection verification
|
|
124
|
+
- RubyLLM configuration for embeddings and tags
|
|
125
|
+
- Async embedding/tag generation with wait
|
|
126
|
+
- Comparison of search strategies (:fulltext, :vector, :hybrid)
|
|
127
|
+
- Detailed output of generated tags and embeddings
|
|
128
|
+
|
|
129
|
+
---
|
|
130
|
+
|
|
131
|
+
### sinatra_app/
|
|
132
|
+
|
|
133
|
+
**Web application with Sidekiq background processing.**
|
|
134
|
+
|
|
135
|
+
A Sinatra-based web application demonstrating HTM in a multi-user web context with async job processing.
|
|
136
|
+
|
|
137
|
+
```bash
|
|
138
|
+
cd examples/sinatra_app
|
|
139
|
+
bundle install
|
|
140
|
+
bundle exec ruby app.rb
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
**Features:**
|
|
144
|
+
- Sidekiq integration for background jobs
|
|
145
|
+
- Session-based robot identification
|
|
146
|
+
- RESTful API endpoints:
|
|
147
|
+
- `POST /api/remember` - Store information
|
|
148
|
+
- `GET /api/recall` - Search memories with timeframe filtering
|
|
149
|
+
- `GET /api/stats` - Memory statistics
|
|
150
|
+
- `GET /api/tags` - Tag tree structure
|
|
151
|
+
- `GET /api/health` - Health check
|
|
152
|
+
- Interactive HTML UI with hybrid search scoring display
|
|
153
|
+
- Tag tree visualization
|
|
154
|
+
|
|
155
|
+
**Environment Variables:**
|
|
156
|
+
- `HTM_DBURL` - PostgreSQL connection (required)
|
|
157
|
+
- `REDIS_URL` - Redis for Sidekiq (default: redis://localhost:6379/0)
|
|
158
|
+
- `SESSION_SECRET` - Session encryption key
|
|
159
|
+
|
|
160
|
+
---
|
|
161
|
+
|
|
162
|
+
### cli_app/
|
|
163
|
+
|
|
164
|
+
**Interactive command-line application.**
|
|
165
|
+
|
|
166
|
+
A REPL-style CLI demonstrating synchronous job execution with the `:inline` backend, ideal for CLI tools and scripts.
|
|
167
|
+
|
|
168
|
+
```bash
|
|
169
|
+
ruby examples/cli_app/htm_cli.rb
|
|
170
|
+
```
|
|
171
|
+
|
|
172
|
+
**Commands:**
|
|
173
|
+
- `remember <text>` - Store information (waits for embedding/tags)
|
|
174
|
+
- `recall <topic>` - Hybrid search with LLM-powered response generation
|
|
175
|
+
- `tags [prefix]` - List all tags with linked node content
|
|
176
|
+
- `stats` - Memory statistics
|
|
177
|
+
- `help` - Show help
|
|
178
|
+
- `exit` - Quit
|
|
179
|
+
|
|
180
|
+
**Features:**
|
|
181
|
+
- Synchronous job execution (`:inline` backend)
|
|
182
|
+
- Real-time progress feedback
|
|
183
|
+
- Tag extraction visibility during search
|
|
184
|
+
- Hybrid search with scoring (similarity, tag_boost, combined)
|
|
185
|
+
- RubyLLM chat integration for context-aware responses
|
|
186
|
+
- Response storage in long-term memory
|
|
187
|
+
|
|
188
|
+
See [cli_app/README.md](cli_app/README.md) for detailed documentation.
|
|
189
|
+
|
|
190
|
+
---
|
|
191
|
+
|
|
192
|
+
### robot_groups/
|
|
193
|
+
|
|
194
|
+
**Multi-robot coordination with shared working memory.**
|
|
195
|
+
|
|
196
|
+
Demonstrates high-availability patterns with shared working memory, failover, and real-time synchronization via PostgreSQL LISTEN/NOTIFY.
|
|
197
|
+
|
|
198
|
+
#### same_process.rb
|
|
199
|
+
|
|
200
|
+
Single-process demonstration of robot groups:
|
|
201
|
+
|
|
202
|
+
```bash
|
|
203
|
+
ruby examples/robot_groups/same_process.rb
|
|
204
|
+
```
|
|
205
|
+
|
|
206
|
+
**Scenarios demonstrated:**
|
|
207
|
+
1. Creating a group with primary + standby robots
|
|
208
|
+
2. Adding shared memories
|
|
209
|
+
3. Verifying synchronization
|
|
210
|
+
4. Simulating failover (primary dies, standby takes over)
|
|
211
|
+
5. Verifying standby has full context
|
|
212
|
+
6. Dynamic scaling (adding new robots)
|
|
213
|
+
7. Collaborative memory (multiple robots adding)
|
|
214
|
+
8. Real-time sync via PostgreSQL LISTEN/NOTIFY
|
|
215
|
+
|
|
216
|
+
#### multi_process.rb
|
|
217
|
+
|
|
218
|
+
Cross-process demonstration with separate Ruby processes:
|
|
219
|
+
|
|
220
|
+
```bash
|
|
221
|
+
ruby examples/robot_groups/multi_process.rb
|
|
222
|
+
```
|
|
223
|
+
|
|
224
|
+
**Scenarios demonstrated:**
|
|
225
|
+
1. Spawning robot worker processes
|
|
226
|
+
2. Cross-process memory sharing
|
|
227
|
+
3. Collaborative memory updates
|
|
228
|
+
4. Failover when a process dies
|
|
229
|
+
5. Dynamic scaling (adding new processes)
|
|
230
|
+
|
|
231
|
+
**Key concepts:**
|
|
232
|
+
- **Shared Working Memory**: Multiple robots share context via `robot_nodes` table
|
|
233
|
+
- **Active/Passive Roles**: Active robots participate; passive robots maintain warm standby
|
|
234
|
+
- **Failover**: Instant takeover with full context already loaded
|
|
235
|
+
- **Real-time Sync**: PostgreSQL LISTEN/NOTIFY for in-memory cache coordination
|
|
236
|
+
|
|
237
|
+
---
|
|
238
|
+
|
|
239
|
+
## Directory Structure
|
|
240
|
+
|
|
241
|
+
```
|
|
242
|
+
examples/
|
|
243
|
+
├── README.md # This file
|
|
244
|
+
├── basic_usage.rb # Core API demonstration
|
|
245
|
+
├── custom_llm_configuration.rb # LLM integration patterns
|
|
246
|
+
├── file_loader_usage.rb # Document loading
|
|
247
|
+
├── timeframe_demo.rb # Time-based filtering
|
|
248
|
+
├── example_app/
|
|
249
|
+
│ ├── app.rb # Full-featured demo app
|
|
250
|
+
│ └── Rakefile
|
|
251
|
+
├── sinatra_app/
|
|
252
|
+
│ ├── app.rb # Sinatra web application
|
|
253
|
+
│ ├── Gemfile
|
|
254
|
+
│ └── Gemfile.lock
|
|
255
|
+
├── cli_app/
|
|
256
|
+
│ ├── htm_cli.rb # Interactive CLI
|
|
257
|
+
│ └── README.md # Detailed CLI documentation
|
|
258
|
+
└── robot_groups/
|
|
259
|
+
├── same_process.rb # Single-process robot groups
|
|
260
|
+
├── multi_process.rb # Multi-process coordination
|
|
261
|
+
├── robot_worker.rb # Worker process for multi_process.rb
|
|
262
|
+
└── lib/
|
|
263
|
+
├── robot_group.rb # RobotGroup coordination class
|
|
264
|
+
└── working_memory_channel.rb # PostgreSQL pub/sub
|
|
265
|
+
```
|
|
266
|
+
|
|
267
|
+
---
|
|
268
|
+
|
|
269
|
+
## Choosing the Right Example
|
|
270
|
+
|
|
271
|
+
| Use Case | Example |
|
|
272
|
+
|----------|---------|
|
|
273
|
+
| Learning HTM basics | `basic_usage.rb` |
|
|
274
|
+
| Custom LLM integration | `custom_llm_configuration.rb` |
|
|
275
|
+
| Loading documents/files | `file_loader_usage.rb` |
|
|
276
|
+
| Time-based queries | `timeframe_demo.rb` |
|
|
277
|
+
| Web application | `sinatra_app/` |
|
|
278
|
+
| CLI tool | `cli_app/` |
|
|
279
|
+
| Multi-robot coordination | `robot_groups/` |
|
|
280
|
+
| High availability | `robot_groups/` |
|
data/examples/cli_app/htm_cli.rb
CHANGED
|
@@ -19,6 +19,20 @@
|
|
|
19
19
|
|
|
20
20
|
require_relative '../../lib/htm'
|
|
21
21
|
require 'io/console'
|
|
22
|
+
require 'ruby_llm'
|
|
23
|
+
|
|
24
|
+
PROVIDER = :ollama
|
|
25
|
+
MODEL = 'gpt-oss:latest'
|
|
26
|
+
|
|
27
|
+
# Configure RubyLLM for Ollama provider (same pattern as HTM uses)
|
|
28
|
+
RubyLLM.configure do |config|
|
|
29
|
+
ollama_url = ENV.fetch('OLLAMA_URL', 'http://localhost:11434')
|
|
30
|
+
ollama_api_base = ollama_url.end_with?('/v1') ? ollama_url : "#{ollama_url}/v1"
|
|
31
|
+
config.ollama_api_base = ollama_api_base
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
# Create chat with Ollama model - use assume_model_exists to bypass registry check
|
|
35
|
+
# AI = RubyLLM.chat(model: MODEL, provider: PROVIDER, assume_model_exists: true)
|
|
22
36
|
|
|
23
37
|
class HTMCli
|
|
24
38
|
def initialize
|
|
@@ -43,6 +57,9 @@ class HTMCli
|
|
|
43
57
|
end
|
|
44
58
|
end
|
|
45
59
|
|
|
60
|
+
# Initialize RubyLLM chat for context-aware responses
|
|
61
|
+
@chat = RubyLLM.chat(model: MODEL, provider: PROVIDER)
|
|
62
|
+
|
|
46
63
|
# Initialize HTM instance
|
|
47
64
|
@htm = HTM.new(robot_name: "cli_assistant")
|
|
48
65
|
end
|
|
@@ -146,12 +163,27 @@ class HTMCli
|
|
|
146
163
|
puts "\nSearching for: \"#{topic}\""
|
|
147
164
|
puts "Strategy: hybrid (vector + fulltext + tags)"
|
|
148
165
|
|
|
149
|
-
# Show
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
166
|
+
# Show tags extracted from query and which ones matched
|
|
167
|
+
tag_result = @htm.long_term_memory.find_query_matching_tags(topic, include_extracted: true)
|
|
168
|
+
|
|
169
|
+
if tag_result[:extracted].any?
|
|
170
|
+
puts "Extracted tags: #{tag_result[:extracted].join(', ')}"
|
|
171
|
+
|
|
172
|
+
# Show what was actually searched (exact + prefixes)
|
|
173
|
+
searched = tag_result[:extracted].dup
|
|
174
|
+
tag_result[:extracted].each do |tag|
|
|
175
|
+
levels = tag.split(':')
|
|
176
|
+
(1...levels.size).each { |i| searched << levels[0, i].join(':') }
|
|
177
|
+
end
|
|
178
|
+
puts "Searched for: #{searched.uniq.join(', ')}"
|
|
179
|
+
|
|
180
|
+
if tag_result[:matched].any?
|
|
181
|
+
puts "Matched in DB: #{tag_result[:matched].join(', ')}"
|
|
182
|
+
else
|
|
183
|
+
puts "Matched in DB: (none)"
|
|
184
|
+
end
|
|
153
185
|
else
|
|
154
|
-
puts "
|
|
186
|
+
puts "Extracted tags: (none)"
|
|
155
187
|
end
|
|
156
188
|
|
|
157
189
|
start_time = Time.now
|
|
@@ -192,6 +224,34 @@ class HTMCli
|
|
|
192
224
|
puts " Tags: (none)"
|
|
193
225
|
end
|
|
194
226
|
end
|
|
227
|
+
|
|
228
|
+
# Build LLM prompt with context from retrieved memories
|
|
229
|
+
context_content = memories.map { |m| m['content'] }.join("\n\n")
|
|
230
|
+
|
|
231
|
+
llm_prompt = <<~PROMPT
|
|
232
|
+
#{topic}
|
|
233
|
+
Your response should highlight information also found in the
|
|
234
|
+
following context:
|
|
235
|
+
<CONTEXT>
|
|
236
|
+
#{context_content}
|
|
237
|
+
</CONTEXT>
|
|
238
|
+
PROMPT
|
|
239
|
+
|
|
240
|
+
puts "\n" + "=" * 60
|
|
241
|
+
puts "Generating response for this prompt..."
|
|
242
|
+
puts llm_prompt
|
|
243
|
+
puts "=" * 60
|
|
244
|
+
|
|
245
|
+
begin
|
|
246
|
+
response = @chat.ask(llm_prompt)
|
|
247
|
+
puts "\n#{response.content}"
|
|
248
|
+
|
|
249
|
+
# Remember the LLM response in long-term memory
|
|
250
|
+
node_id = @htm.remember(response.content)
|
|
251
|
+
puts "\n[✓] Response stored as node #{node_id}"
|
|
252
|
+
rescue StandardError => e
|
|
253
|
+
puts "[✗] LLM Error: #{e.message}"
|
|
254
|
+
end
|
|
195
255
|
end
|
|
196
256
|
|
|
197
257
|
def handle_tags(filter = nil)
|