htm 0.0.2 → 0.0.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.aigcm_msg +1 -0
- data/.architecture/reviews/comprehensive-codebase-review.md +577 -0
- data/.claude/settings.local.json +95 -0
- data/.irbrc +283 -80
- data/.tbls.yml +2 -1
- data/CHANGELOG.md +327 -26
- data/CLAUDE.md +603 -0
- data/README.md +83 -12
- data/Rakefile +5 -0
- data/bin/htm_mcp.rb +527 -0
- data/db/migrate/{20250101000001_enable_extensions.rb → 00001_enable_extensions.rb} +0 -1
- data/db/migrate/00002_create_robots.rb +11 -0
- data/db/migrate/00003_create_file_sources.rb +20 -0
- data/db/migrate/00004_create_nodes.rb +65 -0
- data/db/migrate/00005_create_tags.rb +13 -0
- data/db/migrate/00006_create_node_tags.rb +18 -0
- data/db/migrate/00007_create_robot_nodes.rb +26 -0
- data/db/migrate/00009_add_working_memory_to_robot_nodes.rb +12 -0
- data/db/schema.sql +172 -1
- data/docs/api/database.md +1 -2
- data/docs/api/htm.md +197 -2
- data/docs/api/yard/HTM/ActiveRecordConfig.md +23 -0
- data/docs/api/yard/HTM/AuthorizationError.md +11 -0
- data/docs/api/yard/HTM/CircuitBreaker.md +92 -0
- data/docs/api/yard/HTM/CircuitBreakerOpenError.md +34 -0
- data/docs/api/yard/HTM/Configuration.md +175 -0
- data/docs/api/yard/HTM/Database.md +99 -0
- data/docs/api/yard/HTM/DatabaseError.md +14 -0
- data/docs/api/yard/HTM/EmbeddingError.md +18 -0
- data/docs/api/yard/HTM/EmbeddingService.md +58 -0
- data/docs/api/yard/HTM/Error.md +11 -0
- data/docs/api/yard/HTM/JobAdapter.md +39 -0
- data/docs/api/yard/HTM/LongTermMemory.md +342 -0
- data/docs/api/yard/HTM/NotFoundError.md +17 -0
- data/docs/api/yard/HTM/Observability.md +107 -0
- data/docs/api/yard/HTM/QueryTimeoutError.md +19 -0
- data/docs/api/yard/HTM/Railtie.md +27 -0
- data/docs/api/yard/HTM/ResourceExhaustedError.md +13 -0
- data/docs/api/yard/HTM/TagError.md +18 -0
- data/docs/api/yard/HTM/TagService.md +67 -0
- data/docs/api/yard/HTM/Timeframe/Result.md +24 -0
- data/docs/api/yard/HTM/Timeframe.md +40 -0
- data/docs/api/yard/HTM/TimeframeExtractor/Result.md +24 -0
- data/docs/api/yard/HTM/TimeframeExtractor.md +45 -0
- data/docs/api/yard/HTM/ValidationError.md +20 -0
- data/docs/api/yard/HTM/WorkingMemory.md +131 -0
- data/docs/api/yard/HTM.md +80 -0
- data/docs/api/yard/index.csv +179 -0
- data/docs/api/yard-reference.md +51 -0
- data/docs/database/README.md +128 -128
- data/docs/database/public.file_sources.md +42 -0
- data/docs/database/public.file_sources.svg +211 -0
- data/docs/database/public.node_tags.md +4 -4
- data/docs/database/public.node_tags.svg +212 -79
- data/docs/database/public.nodes.md +22 -12
- data/docs/database/public.nodes.svg +246 -127
- data/docs/database/public.robot_nodes.md +11 -9
- data/docs/database/public.robot_nodes.svg +220 -98
- data/docs/database/public.robots.md +2 -2
- data/docs/database/public.robots.svg +136 -81
- data/docs/database/public.tags.md +3 -3
- data/docs/database/public.tags.svg +118 -39
- data/docs/database/schema.json +850 -771
- data/docs/database/schema.svg +256 -197
- data/docs/development/schema.md +67 -2
- data/docs/guides/adding-memories.md +93 -7
- data/docs/guides/recalling-memories.md +36 -1
- data/examples/README.md +405 -0
- data/examples/cli_app/htm_cli.rb +65 -5
- data/examples/cli_app/temp.log +93 -0
- data/examples/file_loader_usage.rb +177 -0
- data/examples/mcp_client.rb +529 -0
- data/examples/robot_groups/lib/robot_group.rb +419 -0
- data/examples/robot_groups/lib/working_memory_channel.rb +140 -0
- data/examples/robot_groups/multi_process.rb +286 -0
- data/examples/robot_groups/robot_worker.rb +136 -0
- data/examples/robot_groups/same_process.rb +229 -0
- data/examples/timeframe_demo.rb +276 -0
- data/lib/htm/active_record_config.rb +1 -1
- data/lib/htm/circuit_breaker.rb +202 -0
- data/lib/htm/configuration.rb +59 -13
- data/lib/htm/database.rb +67 -36
- data/lib/htm/embedding_service.rb +39 -2
- data/lib/htm/errors.rb +131 -11
- data/lib/htm/jobs/generate_embedding_job.rb +5 -4
- data/lib/htm/jobs/generate_tags_job.rb +4 -0
- data/lib/htm/loaders/markdown_loader.rb +263 -0
- data/lib/htm/loaders/paragraph_chunker.rb +112 -0
- data/lib/htm/long_term_memory.rb +460 -343
- data/lib/htm/models/file_source.rb +99 -0
- data/lib/htm/models/node.rb +80 -5
- data/lib/htm/models/robot.rb +24 -1
- data/lib/htm/models/robot_node.rb +1 -0
- data/lib/htm/models/tag.rb +254 -4
- data/lib/htm/observability.rb +395 -0
- data/lib/htm/tag_service.rb +60 -3
- data/lib/htm/tasks.rb +26 -1
- data/lib/htm/timeframe.rb +194 -0
- data/lib/htm/timeframe_extractor.rb +307 -0
- data/lib/htm/version.rb +1 -1
- data/lib/htm/working_memory.rb +165 -70
- data/lib/htm.rb +328 -130
- data/lib/tasks/doc.rake +300 -0
- data/lib/tasks/files.rake +299 -0
- data/lib/tasks/htm.rake +158 -3
- data/lib/tasks/jobs.rake +3 -9
- data/lib/tasks/tags.rake +166 -6
- data/mkdocs.yml +36 -1
- data/notes/ARCHITECTURE_REVIEW.md +1167 -0
- data/notes/IMPLEMENTATION_SUMMARY.md +606 -0
- data/notes/MULTI_FRAMEWORK_IMPLEMENTATION.md +451 -0
- data/notes/next_steps.md +100 -0
- data/notes/plan.md +627 -0
- data/notes/tag_ontology_enhancement_ideas.md +222 -0
- data/notes/timescaledb_removal_summary.md +200 -0
- metadata +158 -17
- data/db/migrate/20250101000002_create_robots.rb +0 -14
- data/db/migrate/20250101000003_create_nodes.rb +0 -42
- data/db/migrate/20250101000005_create_tags.rb +0 -38
- data/db/migrate/20250101000007_add_node_vector_indexes.rb +0 -30
- data/db/migrate/20250125000001_add_content_hash_to_nodes.rb +0 -14
- data/db/migrate/20250125000002_create_robot_nodes.rb +0 -35
- data/db/migrate/20250125000003_remove_source_and_robot_id_from_nodes.rb +0 -28
- data/db/migrate/20250126000001_create_working_memories.rb +0 -19
- data/db/migrate/20250126000002_remove_unused_columns.rb +0 -12
- data/docs/database/public.working_memories.md +0 -40
- data/docs/database/public.working_memories.svg +0 -112
- data/lib/htm/models/working_memory_entry.rb +0 -88
|
@@ -0,0 +1,342 @@
|
|
|
1
|
+
# Class: HTM::LongTermMemory
|
|
2
|
+
**Inherits:** Object
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
Long-term Memory - PostgreSQL/TimescaleDB-backed permanent storage
|
|
6
|
+
|
|
7
|
+
LongTermMemory provides durable storage for all memory nodes with:
|
|
8
|
+
* Vector similarity search (RAG)
|
|
9
|
+
* Full-text search
|
|
10
|
+
* Time-range queries
|
|
11
|
+
* Relationship graphs
|
|
12
|
+
* Tag system
|
|
13
|
+
* ActiveRecord ORM for data access
|
|
14
|
+
* Query result caching for efficiency
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
# Attributes
|
|
18
|
+
## query_timeout[RW] {: #attribute-i-query_timeout }
|
|
19
|
+
Returns the value of attribute query_timeout.
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
# Instance Methods
|
|
23
|
+
## add(content:, token_count:0, robot_id:, embedding:nil, metadata:{}) {: #method-i-add }
|
|
24
|
+
Add a node to long-term memory (with deduplication)
|
|
25
|
+
|
|
26
|
+
If content already exists (by content_hash), links the robot to the existing
|
|
27
|
+
node and updates timestamps. Otherwise creates a new node.
|
|
28
|
+
|
|
29
|
+
**`@param`** [String] Conversation message/utterance
|
|
30
|
+
|
|
31
|
+
**`@param`** [Integer] Token count
|
|
32
|
+
|
|
33
|
+
**`@param`** [Integer] Robot identifier
|
|
34
|
+
|
|
35
|
+
**`@param`** [Array<Float>, nil] Pre-generated embedding vector
|
|
36
|
+
|
|
37
|
+
**`@param`** [Hash] Flexible metadata for the node (default: {})
|
|
38
|
+
|
|
39
|
+
**`@return`** [Hash] { node_id:, is_new:, robot_node: }
|
|
40
|
+
|
|
41
|
+
## add_tag(node_id:, tag:) {: #method-i-add_tag }
|
|
42
|
+
Add a tag to a node
|
|
43
|
+
|
|
44
|
+
**`@param`** [Integer] Node database ID
|
|
45
|
+
|
|
46
|
+
**`@param`** [String] Tag name
|
|
47
|
+
|
|
48
|
+
**`@return`** [void]
|
|
49
|
+
|
|
50
|
+
## batch_load_node_tags(node_ids) {: #method-i-batch_load_node_tags }
|
|
51
|
+
Batch load tags for multiple nodes (avoids N+1 queries)
|
|
52
|
+
|
|
53
|
+
**`@param`** [Array<Integer>] Node database IDs
|
|
54
|
+
|
|
55
|
+
**`@return`** [Hash<Integer, Array<String>>] Map of node_id to array of tag names
|
|
56
|
+
|
|
57
|
+
## calculate_relevance(node:, query_tags:[], vector_similarity:nil, node_tags:nil) {: #method-i-calculate_relevance }
|
|
58
|
+
Calculate dynamic relevance score for a node given query context
|
|
59
|
+
|
|
60
|
+
Combines multiple signals:
|
|
61
|
+
* Vector similarity (semantic match)
|
|
62
|
+
* Tag overlap (categorical match)
|
|
63
|
+
* Recency (freshness)
|
|
64
|
+
* Access frequency (popularity/utility)
|
|
65
|
+
|
|
66
|
+
**`@param`** [Hash] Node data with similarity, tags, created_at, access_count
|
|
67
|
+
|
|
68
|
+
**`@param`** [Array<String>] Tags associated with the query
|
|
69
|
+
|
|
70
|
+
**`@param`** [Float, nil] Pre-computed vector similarity (0-1)
|
|
71
|
+
|
|
72
|
+
**`@param`** [Array<String>, nil] Pre-loaded tags for this node (avoids N+1 query)
|
|
73
|
+
|
|
74
|
+
**`@return`** [Float] Composite relevance score (0-10)
|
|
75
|
+
|
|
76
|
+
## clear_cache!() {: #method-i-clear_cache! }
|
|
77
|
+
Clear the query cache
|
|
78
|
+
|
|
79
|
+
Call this after any operation that modifies data (soft delete, restore, etc.)
|
|
80
|
+
to ensure subsequent queries see fresh results.
|
|
81
|
+
|
|
82
|
+
**`@return`** [void]
|
|
83
|
+
|
|
84
|
+
## delete(node_id) {: #method-i-delete }
|
|
85
|
+
Delete a node
|
|
86
|
+
|
|
87
|
+
**`@param`** [Integer] Node database ID
|
|
88
|
+
|
|
89
|
+
**`@return`** [void]
|
|
90
|
+
|
|
91
|
+
## exists?(node_id) {: #method-i-exists? }
|
|
92
|
+
Check if a node exists
|
|
93
|
+
|
|
94
|
+
**`@param`** [Integer] Node database ID
|
|
95
|
+
|
|
96
|
+
**`@return`** [Boolean] True if node exists
|
|
97
|
+
|
|
98
|
+
## find_query_matching_tags(query, include_extracted:false) {: #method-i-find_query_matching_tags }
|
|
99
|
+
Find tags that match terms in the query
|
|
100
|
+
|
|
101
|
+
Searches the tags table for tags where any hierarchy level matches query
|
|
102
|
+
words. For example, query "PostgreSQL database" would match tags like
|
|
103
|
+
"database:postgresql", "database:sql", etc. Find tags matching a query using
|
|
104
|
+
semantic extraction
|
|
105
|
+
|
|
106
|
+
**`@param`** [String] Search query
|
|
107
|
+
|
|
108
|
+
**`@param`** [Boolean] If true, returns hash with :extracted and :matched keys
|
|
109
|
+
|
|
110
|
+
**`@return`** [Array<String>] Matching tag names (default)
|
|
111
|
+
|
|
112
|
+
**`@return`** [Hash] If include_extracted: { extracted: [...], matched: [...] }
|
|
113
|
+
|
|
114
|
+
## get_node_tags(node_id) {: #method-i-get_node_tags }
|
|
115
|
+
Get tags for a specific node
|
|
116
|
+
|
|
117
|
+
**`@param`** [Integer] Node database ID
|
|
118
|
+
|
|
119
|
+
**`@return`** [Array<String>] Tag names
|
|
120
|
+
|
|
121
|
+
## initialize(config, pool_size:nil, query_timeout:DEFAULT_QUERY_TIMEOUT, cache_size:DEFAULT_CACHE_SIZE, cache_ttl:DEFAULT_CACHE_TTL) {: #method-i-initialize }
|
|
122
|
+
Initialize long-term memory storage
|
|
123
|
+
|
|
124
|
+
**`@param`** [Hash] Database configuration (host, port, dbname, user, password)
|
|
125
|
+
|
|
126
|
+
**`@param`** [Integer, nil] Connection pool size (uses ActiveRecord default if nil)
|
|
127
|
+
|
|
128
|
+
**`@param`** [Integer] Query timeout in milliseconds (default: 30000)
|
|
129
|
+
|
|
130
|
+
**`@param`** [Integer] Number of query results to cache (default: 1000, use 0 to disable)
|
|
131
|
+
|
|
132
|
+
**`@param`** [Integer] Cache time-to-live in seconds (default: 300)
|
|
133
|
+
|
|
134
|
+
**`@return`** [LongTermMemory] a new instance of LongTermMemory
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
**`@example`**
|
|
138
|
+
```ruby
|
|
139
|
+
ltm = LongTermMemory.new(HTM::Database.default_config)
|
|
140
|
+
```
|
|
141
|
+
**`@example`**
|
|
142
|
+
```ruby
|
|
143
|
+
ltm = LongTermMemory.new(config, cache_size: 500, cache_ttl: 600)
|
|
144
|
+
```
|
|
145
|
+
**`@example`**
|
|
146
|
+
```ruby
|
|
147
|
+
ltm = LongTermMemory.new(config, cache_size: 0)
|
|
148
|
+
```
|
|
149
|
+
## link_robot_to_node(robot_id:, node:, working_memory:false) {: #method-i-link_robot_to_node }
|
|
150
|
+
Link a robot to a node (create or update robot_node record)
|
|
151
|
+
|
|
152
|
+
**`@param`** [Integer] Robot ID
|
|
153
|
+
|
|
154
|
+
**`@param`** [HTM::Models::Node] Node to link
|
|
155
|
+
|
|
156
|
+
**`@param`** [Boolean] Whether node is in working memory (default: false)
|
|
157
|
+
|
|
158
|
+
**`@return`** [HTM::Models::RobotNode] The robot_node link record
|
|
159
|
+
|
|
160
|
+
## mark_evicted(robot_id:, node_ids:) {: #method-i-mark_evicted }
|
|
161
|
+
Mark nodes as evicted from working memory
|
|
162
|
+
|
|
163
|
+
Sets working_memory = false on the robot_nodes join table for the specified
|
|
164
|
+
robot and node IDs.
|
|
165
|
+
|
|
166
|
+
**`@param`** [Integer] Robot ID whose working memory is being evicted
|
|
167
|
+
|
|
168
|
+
**`@param`** [Array<Integer>] Node IDs to mark as evicted
|
|
169
|
+
|
|
170
|
+
**`@return`** [void]
|
|
171
|
+
|
|
172
|
+
## node_topics(node_id) {: #method-i-node_topics }
|
|
173
|
+
Get topics for a specific node
|
|
174
|
+
|
|
175
|
+
**`@param`** [Integer] Node database ID
|
|
176
|
+
|
|
177
|
+
**`@return`** [Array<String>] Topic paths
|
|
178
|
+
|
|
179
|
+
## nodes_by_topic(topic_path, exact:false, limit:50) {: #method-i-nodes_by_topic }
|
|
180
|
+
Retrieve nodes by ontological topic
|
|
181
|
+
|
|
182
|
+
**`@param`** [String] Topic hierarchy path
|
|
183
|
+
|
|
184
|
+
**`@param`** [Boolean] Exact match or prefix match
|
|
185
|
+
|
|
186
|
+
**`@param`** [Integer] Maximum results
|
|
187
|
+
|
|
188
|
+
**`@return`** [Array<Hash>] Matching nodes
|
|
189
|
+
|
|
190
|
+
## ontology_structure() {: #method-i-ontology_structure }
|
|
191
|
+
Get ontology structure view
|
|
192
|
+
|
|
193
|
+
**`@return`** [Array<Hash>] Ontology structure
|
|
194
|
+
|
|
195
|
+
## pool_size() {: #method-i-pool_size }
|
|
196
|
+
For backwards compatibility with tests/code that expect pool_size
|
|
197
|
+
|
|
198
|
+
## popular_tags(limit:20, timeframe:nil) {: #method-i-popular_tags }
|
|
199
|
+
Get most popular tags
|
|
200
|
+
|
|
201
|
+
**`@param`** [Integer] Number of tags to return
|
|
202
|
+
|
|
203
|
+
**`@param`** [Range, nil] Optional time range filter
|
|
204
|
+
|
|
205
|
+
**`@return`** [Array<Hash>] Tags with usage counts
|
|
206
|
+
|
|
207
|
+
## register_robot(robot_name) {: #method-i-register_robot }
|
|
208
|
+
Register a robot
|
|
209
|
+
|
|
210
|
+
**`@param`** [String] Robot identifier
|
|
211
|
+
|
|
212
|
+
**`@param`** [String] Robot name
|
|
213
|
+
|
|
214
|
+
**`@return`** [void]
|
|
215
|
+
|
|
216
|
+
## retrieve(node_id) {: #method-i-retrieve }
|
|
217
|
+
Retrieve a node by ID
|
|
218
|
+
|
|
219
|
+
Automatically tracks access by incrementing access_count and updating
|
|
220
|
+
last_accessed
|
|
221
|
+
|
|
222
|
+
**`@param`** [Integer] Node database ID
|
|
223
|
+
|
|
224
|
+
**`@return`** [Hash, nil] Node data or nil
|
|
225
|
+
|
|
226
|
+
## search(timeframe:, query:, limit:, embedding_service:, metadata:{}) {: #method-i-search }
|
|
227
|
+
Vector similarity search
|
|
228
|
+
|
|
229
|
+
**`@param`** [nil, Range, Array<Range>] Time range(s) to search (nil = no filter)
|
|
230
|
+
|
|
231
|
+
**`@param`** [String] Search query
|
|
232
|
+
|
|
233
|
+
**`@param`** [Integer] Maximum results
|
|
234
|
+
|
|
235
|
+
**`@param`** [Object] Service to generate embeddings
|
|
236
|
+
|
|
237
|
+
**`@param`** [Hash] Filter by metadata fields (default: {})
|
|
238
|
+
|
|
239
|
+
**`@return`** [Array<Hash>] Matching nodes
|
|
240
|
+
|
|
241
|
+
## search_by_tags(tags:, match_all:false, timeframe:nil, limit:20) {: #method-i-search_by_tags }
|
|
242
|
+
Search nodes by tags
|
|
243
|
+
|
|
244
|
+
**`@param`** [Array<String>] Tags to search for
|
|
245
|
+
|
|
246
|
+
**`@param`** [Boolean] If true, match ALL tags; if false, match ANY tag
|
|
247
|
+
|
|
248
|
+
**`@param`** [Range, nil] Optional time range filter
|
|
249
|
+
|
|
250
|
+
**`@param`** [Integer] Maximum results
|
|
251
|
+
|
|
252
|
+
**`@return`** [Array<Hash>] Matching nodes with relevance scores
|
|
253
|
+
|
|
254
|
+
## search_fulltext(timeframe:, query:, limit:, metadata:{}) {: #method-i-search_fulltext }
|
|
255
|
+
Full-text search
|
|
256
|
+
|
|
257
|
+
**`@param`** [Range] Time range to search
|
|
258
|
+
|
|
259
|
+
**`@param`** [String] Search query
|
|
260
|
+
|
|
261
|
+
**`@param`** [Integer] Maximum results
|
|
262
|
+
|
|
263
|
+
**`@param`** [Hash] Filter by metadata fields (default: {})
|
|
264
|
+
|
|
265
|
+
**`@return`** [Array<Hash>] Matching nodes
|
|
266
|
+
|
|
267
|
+
## search_hybrid(timeframe:, query:, limit:, embedding_service:, prefilter_limit:100, metadata:{}) {: #method-i-search_hybrid }
|
|
268
|
+
Hybrid search (full-text + vector)
|
|
269
|
+
|
|
270
|
+
**`@param`** [Range] Time range to search
|
|
271
|
+
|
|
272
|
+
**`@param`** [String] Search query
|
|
273
|
+
|
|
274
|
+
**`@param`** [Integer] Maximum results
|
|
275
|
+
|
|
276
|
+
**`@param`** [Object] Service to generate embeddings
|
|
277
|
+
|
|
278
|
+
**`@param`** [Integer] Candidates to consider (default: 100)
|
|
279
|
+
|
|
280
|
+
**`@param`** [Hash] Filter by metadata fields (default: {})
|
|
281
|
+
|
|
282
|
+
**`@return`** [Array<Hash>] Matching nodes
|
|
283
|
+
|
|
284
|
+
## search_with_relevance(timeframe:, query:nil, query_tags:[], limit:20, embedding_service:nil, metadata:{}) {: #method-i-search_with_relevance }
|
|
285
|
+
Search with dynamic relevance scoring
|
|
286
|
+
|
|
287
|
+
Returns nodes with calculated relevance scores based on query context
|
|
288
|
+
|
|
289
|
+
**`@param`** [nil, Range, Array<Range>] Time range(s) to search (nil = no filter)
|
|
290
|
+
|
|
291
|
+
**`@param`** [String, nil] Search query
|
|
292
|
+
|
|
293
|
+
**`@param`** [Array<String>] Tags to match
|
|
294
|
+
|
|
295
|
+
**`@param`** [Integer] Maximum results
|
|
296
|
+
|
|
297
|
+
**`@param`** [Object, nil] Service to generate embeddings
|
|
298
|
+
|
|
299
|
+
**`@param`** [Hash] Filter by metadata fields (default: {})
|
|
300
|
+
|
|
301
|
+
**`@return`** [Array<Hash>] Nodes with relevance scores
|
|
302
|
+
|
|
303
|
+
## shutdown() {: #method-i-shutdown }
|
|
304
|
+
Shutdown - no-op with ActiveRecord (connection pool managed by ActiveRecord)
|
|
305
|
+
|
|
306
|
+
## stats() {: #method-i-stats }
|
|
307
|
+
Get memory statistics
|
|
308
|
+
|
|
309
|
+
**`@return`** [Hash] Statistics
|
|
310
|
+
|
|
311
|
+
## topic_relationships(min_shared_nodes:2, limit:50) {: #method-i-topic_relationships }
|
|
312
|
+
Get topic relationships (co-occurrence)
|
|
313
|
+
|
|
314
|
+
**`@param`** [Integer] Minimum shared nodes
|
|
315
|
+
|
|
316
|
+
**`@param`** [Integer] Maximum relationships
|
|
317
|
+
|
|
318
|
+
**`@return`** [Array<Hash>] Topic relationships
|
|
319
|
+
|
|
320
|
+
## track_access(node_ids) {: #method-i-track_access }
|
|
321
|
+
Track access for multiple nodes (bulk operation)
|
|
322
|
+
|
|
323
|
+
Updates access_count and last_accessed for all nodes in the array
|
|
324
|
+
|
|
325
|
+
**`@param`** [Array<Integer>] Node IDs that were accessed
|
|
326
|
+
|
|
327
|
+
**`@return`** [void]
|
|
328
|
+
|
|
329
|
+
## update_last_accessed(node_id) {: #method-i-update_last_accessed }
|
|
330
|
+
Update last_accessed timestamp
|
|
331
|
+
|
|
332
|
+
**`@param`** [Integer] Node database ID
|
|
333
|
+
|
|
334
|
+
**`@return`** [void]
|
|
335
|
+
|
|
336
|
+
## update_robot_activity(robot_id) {: #method-i-update_robot_activity }
|
|
337
|
+
Update robot activity timestamp
|
|
338
|
+
|
|
339
|
+
**`@param`** [String] Robot identifier
|
|
340
|
+
|
|
341
|
+
**`@return`** [void]
|
|
342
|
+
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
# Exception: HTM::NotFoundError
|
|
2
|
+
**Inherits:** HTM::Error
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
Raised when a requested resource cannot be found
|
|
6
|
+
|
|
7
|
+
Common causes:
|
|
8
|
+
* Node ID does not exist
|
|
9
|
+
* Robot not registered
|
|
10
|
+
* File source not found
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
**`@example`**
|
|
14
|
+
```ruby
|
|
15
|
+
htm.forget(999999) # => raises NotFoundError if node doesn't exist
|
|
16
|
+
```
|
|
17
|
+
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
# Module: HTM::Observability
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
Observability module for monitoring and metrics collection
|
|
5
|
+
|
|
6
|
+
Provides comprehensive monitoring of HTM components including:
|
|
7
|
+
* Connection pool health monitoring with alerts
|
|
8
|
+
* Query timing and performance metrics
|
|
9
|
+
* Cache efficiency tracking
|
|
10
|
+
* Service health checks
|
|
11
|
+
* Memory usage statistics
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
**`@example`**
|
|
15
|
+
```ruby
|
|
16
|
+
stats = HTM::Observability.collect_all
|
|
17
|
+
puts stats[:connection_pool][:status] # => :healthy
|
|
18
|
+
```
|
|
19
|
+
**`@example`**
|
|
20
|
+
```ruby
|
|
21
|
+
pool_stats = HTM::Observability.connection_pool_stats
|
|
22
|
+
if pool_stats[:status] == :exhausted
|
|
23
|
+
logger.error "Connection pool exhausted!"
|
|
24
|
+
end
|
|
25
|
+
```
|
|
26
|
+
**`@example`**
|
|
27
|
+
```ruby
|
|
28
|
+
if HTM::Observability.healthy?
|
|
29
|
+
puts "All systems operational"
|
|
30
|
+
else
|
|
31
|
+
puts "Health check failed: #{HTM::Observability.health_check[:issues]}"
|
|
32
|
+
end
|
|
33
|
+
```
|
|
34
|
+
# Class Methods
|
|
35
|
+
## cache_stats() {: #method-c-cache_stats }
|
|
36
|
+
Get query cache statistics
|
|
37
|
+
**`@return`** [Hash, nil] Cache stats or nil if unavailable
|
|
38
|
+
|
|
39
|
+
## circuit_breaker_stats() {: #method-c-circuit_breaker_stats }
|
|
40
|
+
Get circuit breaker states for all services
|
|
41
|
+
**`@return`** [Hash] Circuit breaker states:
|
|
42
|
+
- :embedding_service - State and failure count
|
|
43
|
+
- :tag_service - State and failure count
|
|
44
|
+
|
|
45
|
+
## collect_all() {: #method-c-collect_all }
|
|
46
|
+
Collect all observability metrics
|
|
47
|
+
**`@return`** [Hash] Comprehensive metrics including:
|
|
48
|
+
- :connection_pool - Pool stats with health status
|
|
49
|
+
- :cache - Query cache hit rates and size
|
|
50
|
+
- :circuit_breakers - Service circuit breaker states
|
|
51
|
+
- :query_timings - Recent query performance
|
|
52
|
+
- :service_timings - Embedding/tag generation times
|
|
53
|
+
- :memory_usage - System memory stats
|
|
54
|
+
|
|
55
|
+
## connection_pool_stats() {: #method-c-connection_pool_stats }
|
|
56
|
+
Get connection pool statistics with health status
|
|
57
|
+
**`@return`** [Hash] Pool statistics including:
|
|
58
|
+
- :size - Maximum pool size
|
|
59
|
+
- :connections - Current total connections
|
|
60
|
+
- :in_use - Connections currently checked out
|
|
61
|
+
- :available - Connections available for checkout
|
|
62
|
+
- :utilization - Usage percentage (0.0-1.0)
|
|
63
|
+
- :status - Health status (:healthy, :warning, :critical, :exhausted)
|
|
64
|
+
- :wait_timeout - Connection wait timeout (ms)
|
|
65
|
+
|
|
66
|
+
## health_check() {: #method-c-health_check }
|
|
67
|
+
Perform comprehensive health check
|
|
68
|
+
**`@return`** [Hash] Health check results:
|
|
69
|
+
- :healthy - Boolean overall health status
|
|
70
|
+
- :checks - Individual check results
|
|
71
|
+
- :issues - Array of identified issues
|
|
72
|
+
|
|
73
|
+
## healthy?() {: #method-c-healthy? }
|
|
74
|
+
Quick health check - returns boolean
|
|
75
|
+
**`@return`** [Boolean] true if system is healthy
|
|
76
|
+
|
|
77
|
+
## memory_stats() {: #method-c-memory_stats }
|
|
78
|
+
Get memory usage statistics
|
|
79
|
+
**`@return`** [Hash] Memory stats
|
|
80
|
+
|
|
81
|
+
## query_timing_stats() {: #method-c-query_timing_stats }
|
|
82
|
+
Get query timing statistics
|
|
83
|
+
**`@return`** [Hash] Timing statistics including avg, min, max, p95
|
|
84
|
+
|
|
85
|
+
## record_embedding_timing(duration_ms ) {: #method-c-record_embedding_timing }
|
|
86
|
+
Record embedding generation timing
|
|
87
|
+
**`@param`** [Float] Generation duration in milliseconds
|
|
88
|
+
|
|
89
|
+
## record_query_timing(duration_ms , query_type: :unknown) {: #method-c-record_query_timing }
|
|
90
|
+
Record query timing for metrics
|
|
91
|
+
**`@param`** [Float] Query duration in milliseconds
|
|
92
|
+
|
|
93
|
+
**`@param`** [Symbol] Type of query (:vector, :fulltext, :hybrid)
|
|
94
|
+
|
|
95
|
+
## record_tag_timing(duration_ms ) {: #method-c-record_tag_timing }
|
|
96
|
+
Record tag extraction timing
|
|
97
|
+
**`@param`** [Float] Extraction duration in milliseconds
|
|
98
|
+
|
|
99
|
+
## reset_metrics!() {: #method-c-reset_metrics! }
|
|
100
|
+
Clear all collected timing metrics
|
|
101
|
+
**`@return`** [void]
|
|
102
|
+
|
|
103
|
+
## service_timing_stats() {: #method-c-service_timing_stats }
|
|
104
|
+
Get service timing statistics (embedding and tag extraction)
|
|
105
|
+
**`@return`** [Hash] Timing stats for embedding and tag services
|
|
106
|
+
|
|
107
|
+
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
# Exception: HTM::QueryTimeoutError
|
|
2
|
+
**Inherits:** HTM::DatabaseError
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
Raised when a database query exceeds the configured timeout
|
|
6
|
+
|
|
7
|
+
Default timeout is 30 seconds. Configure via db_query_timeout parameter when
|
|
8
|
+
initializing HTM.
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
**`@example`**
|
|
12
|
+
```ruby
|
|
13
|
+
begin
|
|
14
|
+
htm.recall("complex query", strategy: :hybrid)
|
|
15
|
+
rescue HTM::QueryTimeoutError
|
|
16
|
+
# Retry with simpler query or smaller limit
|
|
17
|
+
end
|
|
18
|
+
```
|
|
19
|
+
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
# Class: HTM::Railtie
|
|
2
|
+
**Inherits:** Rails::Railtie
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
Rails Railtie for automatic HTM configuration in Rails applications
|
|
6
|
+
|
|
7
|
+
This railtie automatically configures HTM when Rails boots:
|
|
8
|
+
* Sets logger to Rails.logger
|
|
9
|
+
* Sets job backend to :active_job
|
|
10
|
+
* Loads Rake tasks
|
|
11
|
+
* Configures test environment for synchronous jobs
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
**`@example`**
|
|
15
|
+
```ruby
|
|
16
|
+
# HTM is automatically configured on Rails boot
|
|
17
|
+
# No additional setup required
|
|
18
|
+
```
|
|
19
|
+
**`@example`**
|
|
20
|
+
```ruby
|
|
21
|
+
# config/initializers/htm.rb
|
|
22
|
+
HTM.configure do |config|
|
|
23
|
+
config.embedding_model = 'custom-model'
|
|
24
|
+
config.tag_model = 'custom-tag-model'
|
|
25
|
+
end
|
|
26
|
+
```
|
|
27
|
+
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
# Exception: HTM::ResourceExhaustedError
|
|
2
|
+
**Inherits:** HTM::Error
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
Raised when system resources are exhausted
|
|
6
|
+
|
|
7
|
+
Common causes:
|
|
8
|
+
* Working memory token limit exceeded
|
|
9
|
+
* Database connection pool exhausted
|
|
10
|
+
* Memory allocation failures
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
# Exception: HTM::TagError
|
|
2
|
+
**Inherits:** HTM::Error
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
Raised when tag extraction fails
|
|
6
|
+
|
|
7
|
+
Common causes:
|
|
8
|
+
* LLM provider API errors
|
|
9
|
+
* Invalid tag response format
|
|
10
|
+
* Network connectivity issues
|
|
11
|
+
* Model not available
|
|
12
|
+
|
|
13
|
+
Note: This error is distinct from CircuitBreakerOpenError. TagError indicates
|
|
14
|
+
a single failure, while CircuitBreakerOpenError indicates repeated failures
|
|
15
|
+
have triggered protective circuit breaking.
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
# Class: HTM::TagService
|
|
2
|
+
**Inherits:** Object
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
Tag Service - Processes and validates hierarchical tags
|
|
6
|
+
|
|
7
|
+
This service wraps the configured tag extractor and provides:
|
|
8
|
+
* Response parsing (string or array)
|
|
9
|
+
* Format validation (lowercase, alphanumeric, hyphens, colons)
|
|
10
|
+
* Depth validation (max 5 levels)
|
|
11
|
+
* Ontology consistency
|
|
12
|
+
* Circuit breaker protection for external LLM failures
|
|
13
|
+
|
|
14
|
+
The actual LLM call is delegated to HTM.configuration.tag_extractor
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
# Class Methods
|
|
18
|
+
## circuit_breaker() {: #method-c-circuit_breaker }
|
|
19
|
+
Get or create the circuit breaker for tag service
|
|
20
|
+
**`@return`** [HTM::CircuitBreaker] The circuit breaker instance
|
|
21
|
+
|
|
22
|
+
## extract(content , existing_ontology: []) {: #method-c-extract }
|
|
23
|
+
Extract tags with validation and processing
|
|
24
|
+
**`@param`** [String] Text to analyze
|
|
25
|
+
|
|
26
|
+
**`@param`** [Array<String>] Sample of existing tags for context
|
|
27
|
+
|
|
28
|
+
**`@raise`** [CircuitBreakerOpenError] If circuit breaker is open
|
|
29
|
+
|
|
30
|
+
**`@return`** [Array<String>] Validated tag names
|
|
31
|
+
|
|
32
|
+
## parse_hierarchy(tag ) {: #method-c-parse_hierarchy }
|
|
33
|
+
Parse hierarchical structure of a tag
|
|
34
|
+
**`@param`** [String] Hierarchical tag (e.g., "ai:llm:embedding")
|
|
35
|
+
|
|
36
|
+
**`@return`** [Hash] Hierarchy structure
|
|
37
|
+
{
|
|
38
|
+
full: "ai:llm:embedding",
|
|
39
|
+
root: "ai",
|
|
40
|
+
parent: "ai:llm",
|
|
41
|
+
levels: ["ai", "llm", "embedding"],
|
|
42
|
+
depth: 3
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
## parse_tags(raw_tags ) {: #method-c-parse_tags }
|
|
46
|
+
Parse tag response (handles string or array input)
|
|
47
|
+
**`@param`** [String, Array] Raw response from extractor
|
|
48
|
+
|
|
49
|
+
**`@return`** [Array<String>] Parsed tag strings
|
|
50
|
+
|
|
51
|
+
## reset_circuit_breaker!() {: #method-c-reset_circuit_breaker! }
|
|
52
|
+
Reset the circuit breaker (useful for testing)
|
|
53
|
+
**`@return`** [void]
|
|
54
|
+
|
|
55
|
+
## valid_tag?(tag ) {: #method-c-valid_tag? }
|
|
56
|
+
Validate single tag format
|
|
57
|
+
**`@param`** [String] Tag to validate
|
|
58
|
+
|
|
59
|
+
**`@return`** [Boolean] True if valid
|
|
60
|
+
|
|
61
|
+
## validate_and_filter_tags(tags ) {: #method-c-validate_and_filter_tags }
|
|
62
|
+
Validate and filter tags
|
|
63
|
+
**`@param`** [Array<String>] Parsed tags
|
|
64
|
+
|
|
65
|
+
**`@return`** [Array<String>] Valid tags only
|
|
66
|
+
|
|
67
|
+
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
# Class: HTM::Timeframe::Result
|
|
2
|
+
**Inherits:** Struct
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
Result structure for :auto mode
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
# Attributes
|
|
9
|
+
## extracted[RW] {: #attribute-i-extracted }
|
|
10
|
+
Returns the value of attribute extracted
|
|
11
|
+
|
|
12
|
+
**`@return`** [Object] the current value of extracted
|
|
13
|
+
|
|
14
|
+
## query[RW] {: #attribute-i-query }
|
|
15
|
+
Returns the value of attribute query
|
|
16
|
+
|
|
17
|
+
**`@return`** [Object] the current value of query
|
|
18
|
+
|
|
19
|
+
## timeframe[RW] {: #attribute-i-timeframe }
|
|
20
|
+
Returns the value of attribute timeframe
|
|
21
|
+
|
|
22
|
+
**`@return`** [Object] the current value of timeframe
|
|
23
|
+
|
|
24
|
+
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
# Class: HTM::Timeframe
|
|
2
|
+
**Inherits:** Object
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
Timeframe - Normalizes various timeframe inputs for database queries
|
|
6
|
+
|
|
7
|
+
Handles multiple input types and normalizes them to either:
|
|
8
|
+
* nil (no timeframe filter)
|
|
9
|
+
* Range (single time window)
|
|
10
|
+
* Array<Range> (multiple time windows, OR'd together)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
**`@example`**
|
|
14
|
+
```ruby
|
|
15
|
+
Timeframe.normalize(nil) # => nil (no filter)
|
|
16
|
+
Timeframe.normalize(Date.today) # => Range for entire day
|
|
17
|
+
Timeframe.normalize(Time.now) # => Range for entire day
|
|
18
|
+
Timeframe.normalize("last week") # => Range from chronic/extractor
|
|
19
|
+
Timeframe.normalize(:auto, query: "...") # => Extract from query text
|
|
20
|
+
Timeframe.normalize(range1..range2) # => Pass through
|
|
21
|
+
Timeframe.normalize([range1, range2]) # => Array of ranges
|
|
22
|
+
```
|
|
23
|
+
# Class Methods
|
|
24
|
+
## normalize(input , query: nil) {: #method-c-normalize }
|
|
25
|
+
Normalize a timeframe input to nil, Range, or Array<Range>
|
|
26
|
+
**`@param`** [nil, Range, Array, Date, DateTime, Time, String, Symbol] Timeframe specification
|
|
27
|
+
|
|
28
|
+
**`@param`** [String, nil] Query text (required when input is :auto)
|
|
29
|
+
|
|
30
|
+
**`@return`** [nil, Range, Array<Range>] Normalized timeframe
|
|
31
|
+
|
|
32
|
+
**`@return`** [Result] When input is :auto, returns Result with :timeframe, :query, :extracted
|
|
33
|
+
|
|
34
|
+
## valid?(input ) {: #method-c-valid? }
|
|
35
|
+
Check if a value is a valid timeframe input
|
|
36
|
+
**`@param`** [Object] Value to check
|
|
37
|
+
|
|
38
|
+
**`@return`** [Boolean]
|
|
39
|
+
|
|
40
|
+
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
# Class: HTM::TimeframeExtractor::Result
|
|
2
|
+
**Inherits:** Struct
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
Result structure for extracted timeframe
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
# Attributes
|
|
9
|
+
## original_expression[RW] {: #attribute-i-original_expression }
|
|
10
|
+
Returns the value of attribute original_expression
|
|
11
|
+
|
|
12
|
+
**`@return`** [Object] the current value of original_expression
|
|
13
|
+
|
|
14
|
+
## query[RW] {: #attribute-i-query }
|
|
15
|
+
Returns the value of attribute query
|
|
16
|
+
|
|
17
|
+
**`@return`** [Object] the current value of query
|
|
18
|
+
|
|
19
|
+
## timeframe[RW] {: #attribute-i-timeframe }
|
|
20
|
+
Returns the value of attribute timeframe
|
|
21
|
+
|
|
22
|
+
**`@return`** [Object] the current value of timeframe
|
|
23
|
+
|
|
24
|
+
|