htm 0.0.2 → 0.0.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (127) hide show
  1. checksums.yaml +4 -4
  2. data/.aigcm_msg +1 -0
  3. data/.architecture/reviews/comprehensive-codebase-review.md +577 -0
  4. data/.claude/settings.local.json +92 -0
  5. data/.irbrc +283 -80
  6. data/.tbls.yml +2 -1
  7. data/CHANGELOG.md +294 -26
  8. data/CLAUDE.md +603 -0
  9. data/README.md +76 -5
  10. data/Rakefile +5 -0
  11. data/db/migrate/{20250101000001_enable_extensions.rb → 00001_enable_extensions.rb} +0 -1
  12. data/db/migrate/00002_create_robots.rb +11 -0
  13. data/db/migrate/00003_create_file_sources.rb +20 -0
  14. data/db/migrate/00004_create_nodes.rb +65 -0
  15. data/db/migrate/00005_create_tags.rb +13 -0
  16. data/db/migrate/00006_create_node_tags.rb +18 -0
  17. data/db/migrate/00007_create_robot_nodes.rb +26 -0
  18. data/db/migrate/00009_add_working_memory_to_robot_nodes.rb +12 -0
  19. data/db/schema.sql +172 -1
  20. data/docs/api/database.md +1 -2
  21. data/docs/api/htm.md +197 -2
  22. data/docs/api/yard/HTM/ActiveRecordConfig.md +23 -0
  23. data/docs/api/yard/HTM/AuthorizationError.md +11 -0
  24. data/docs/api/yard/HTM/CircuitBreaker.md +92 -0
  25. data/docs/api/yard/HTM/CircuitBreakerOpenError.md +34 -0
  26. data/docs/api/yard/HTM/Configuration.md +175 -0
  27. data/docs/api/yard/HTM/Database.md +99 -0
  28. data/docs/api/yard/HTM/DatabaseError.md +14 -0
  29. data/docs/api/yard/HTM/EmbeddingError.md +18 -0
  30. data/docs/api/yard/HTM/EmbeddingService.md +58 -0
  31. data/docs/api/yard/HTM/Error.md +11 -0
  32. data/docs/api/yard/HTM/JobAdapter.md +39 -0
  33. data/docs/api/yard/HTM/LongTermMemory.md +342 -0
  34. data/docs/api/yard/HTM/NotFoundError.md +17 -0
  35. data/docs/api/yard/HTM/Observability.md +107 -0
  36. data/docs/api/yard/HTM/QueryTimeoutError.md +19 -0
  37. data/docs/api/yard/HTM/Railtie.md +27 -0
  38. data/docs/api/yard/HTM/ResourceExhaustedError.md +13 -0
  39. data/docs/api/yard/HTM/TagError.md +18 -0
  40. data/docs/api/yard/HTM/TagService.md +67 -0
  41. data/docs/api/yard/HTM/Timeframe/Result.md +24 -0
  42. data/docs/api/yard/HTM/Timeframe.md +40 -0
  43. data/docs/api/yard/HTM/TimeframeExtractor/Result.md +24 -0
  44. data/docs/api/yard/HTM/TimeframeExtractor.md +45 -0
  45. data/docs/api/yard/HTM/ValidationError.md +20 -0
  46. data/docs/api/yard/HTM/WorkingMemory.md +131 -0
  47. data/docs/api/yard/HTM.md +80 -0
  48. data/docs/api/yard/index.csv +179 -0
  49. data/docs/api/yard-reference.md +51 -0
  50. data/docs/database/README.md +128 -128
  51. data/docs/database/public.file_sources.md +42 -0
  52. data/docs/database/public.file_sources.svg +211 -0
  53. data/docs/database/public.node_tags.md +4 -4
  54. data/docs/database/public.node_tags.svg +212 -79
  55. data/docs/database/public.nodes.md +22 -12
  56. data/docs/database/public.nodes.svg +246 -127
  57. data/docs/database/public.robot_nodes.md +11 -9
  58. data/docs/database/public.robot_nodes.svg +220 -98
  59. data/docs/database/public.robots.md +2 -2
  60. data/docs/database/public.robots.svg +136 -81
  61. data/docs/database/public.tags.md +3 -3
  62. data/docs/database/public.tags.svg +118 -39
  63. data/docs/database/schema.json +850 -771
  64. data/docs/database/schema.svg +256 -197
  65. data/docs/development/schema.md +67 -2
  66. data/docs/guides/adding-memories.md +93 -7
  67. data/docs/guides/recalling-memories.md +36 -1
  68. data/examples/README.md +280 -0
  69. data/examples/cli_app/htm_cli.rb +65 -5
  70. data/examples/cli_app/temp.log +93 -0
  71. data/examples/file_loader_usage.rb +177 -0
  72. data/examples/robot_groups/lib/robot_group.rb +419 -0
  73. data/examples/robot_groups/lib/working_memory_channel.rb +140 -0
  74. data/examples/robot_groups/multi_process.rb +286 -0
  75. data/examples/robot_groups/robot_worker.rb +136 -0
  76. data/examples/robot_groups/same_process.rb +229 -0
  77. data/examples/timeframe_demo.rb +276 -0
  78. data/lib/htm/active_record_config.rb +1 -1
  79. data/lib/htm/circuit_breaker.rb +202 -0
  80. data/lib/htm/configuration.rb +59 -13
  81. data/lib/htm/database.rb +67 -36
  82. data/lib/htm/embedding_service.rb +39 -2
  83. data/lib/htm/errors.rb +131 -11
  84. data/lib/htm/jobs/generate_embedding_job.rb +5 -4
  85. data/lib/htm/jobs/generate_tags_job.rb +4 -0
  86. data/lib/htm/loaders/markdown_loader.rb +263 -0
  87. data/lib/htm/loaders/paragraph_chunker.rb +112 -0
  88. data/lib/htm/long_term_memory.rb +460 -343
  89. data/lib/htm/models/file_source.rb +99 -0
  90. data/lib/htm/models/node.rb +80 -5
  91. data/lib/htm/models/robot.rb +24 -1
  92. data/lib/htm/models/robot_node.rb +1 -0
  93. data/lib/htm/models/tag.rb +254 -4
  94. data/lib/htm/observability.rb +395 -0
  95. data/lib/htm/tag_service.rb +60 -3
  96. data/lib/htm/tasks.rb +26 -1
  97. data/lib/htm/timeframe.rb +194 -0
  98. data/lib/htm/timeframe_extractor.rb +307 -0
  99. data/lib/htm/version.rb +1 -1
  100. data/lib/htm/working_memory.rb +165 -70
  101. data/lib/htm.rb +328 -130
  102. data/lib/tasks/doc.rake +300 -0
  103. data/lib/tasks/files.rake +299 -0
  104. data/lib/tasks/htm.rake +158 -3
  105. data/lib/tasks/jobs.rake +3 -9
  106. data/lib/tasks/tags.rake +166 -6
  107. data/mkdocs.yml +36 -1
  108. data/notes/ARCHITECTURE_REVIEW.md +1167 -0
  109. data/notes/IMPLEMENTATION_SUMMARY.md +606 -0
  110. data/notes/MULTI_FRAMEWORK_IMPLEMENTATION.md +451 -0
  111. data/notes/next_steps.md +100 -0
  112. data/notes/plan.md +627 -0
  113. data/notes/tag_ontology_enhancement_ideas.md +222 -0
  114. data/notes/timescaledb_removal_summary.md +200 -0
  115. metadata +125 -15
  116. data/db/migrate/20250101000002_create_robots.rb +0 -14
  117. data/db/migrate/20250101000003_create_nodes.rb +0 -42
  118. data/db/migrate/20250101000005_create_tags.rb +0 -38
  119. data/db/migrate/20250101000007_add_node_vector_indexes.rb +0 -30
  120. data/db/migrate/20250125000001_add_content_hash_to_nodes.rb +0 -14
  121. data/db/migrate/20250125000002_create_robot_nodes.rb +0 -35
  122. data/db/migrate/20250125000003_remove_source_and_robot_id_from_nodes.rb +0 -28
  123. data/db/migrate/20250126000001_create_working_memories.rb +0 -19
  124. data/db/migrate/20250126000002_remove_unused_columns.rb +0 -12
  125. data/docs/database/public.working_memories.md +0 -40
  126. data/docs/database/public.working_memories.svg +0 -112
  127. data/lib/htm/models/working_memory_entry.rb +0 -88
@@ -0,0 +1,14 @@
1
+ # Exception: HTM::DatabaseError
2
+ **Inherits:** HTM::Error
3
+
4
+
5
+ Raised when database operations fail
6
+
7
+ Common causes:
8
+ * Connection failures
9
+ * Query syntax errors
10
+ * Constraint violations
11
+ * Extension not installed (pgvector, pg_trgm)
12
+
13
+
14
+
@@ -0,0 +1,18 @@
1
+ # Exception: HTM::EmbeddingError
2
+ **Inherits:** HTM::Error
3
+
4
+
5
+ Raised when embedding generation fails
6
+
7
+ Common causes:
8
+ * LLM provider API errors
9
+ * Invalid embedding response format
10
+ * Network connectivity issues
11
+ * Model not available
12
+
13
+ Note: This error is distinct from CircuitBreakerOpenError. EmbeddingError
14
+ indicates a single failure, while CircuitBreakerOpenError indicates repeated
15
+ failures have triggered protective circuit breaking.
16
+
17
+
18
+
@@ -0,0 +1,58 @@
1
+ # Class: HTM::EmbeddingService
2
+ **Inherits:** Object
3
+
4
+
5
+ Embedding Service - Processes and validates vector embeddings
6
+
7
+ This service wraps the configured embedding generator and provides:
8
+ * Response validation
9
+ * Dimension handling (padding/truncation)
10
+ * Error handling and logging
11
+ * Storage formatting
12
+ * Circuit breaker protection for external LLM failures
13
+
14
+ The actual LLM call is delegated to HTM.configuration.embedding_generator
15
+
16
+
17
+ # Class Methods
18
+ ## circuit_breaker() {: #method-c-circuit_breaker }
19
+ Get or create the circuit breaker for embedding service
20
+ **`@return`** [HTM::CircuitBreaker] The circuit breaker instance
21
+
22
+ ## format_for_storage(embedding ) {: #method-c-format_for_storage }
23
+ Format embedding for database storage
24
+ **`@param`** [Array<Float>] Padded embedding
25
+
26
+ **`@return`** [String] PostgreSQL array format
27
+
28
+ ## generate(text ) {: #method-c-generate }
29
+ Generate embedding with validation and processing
30
+ **`@param`** [String] Text to embed
31
+
32
+ **`@raise`** [CircuitBreakerOpenError] If circuit breaker is open
33
+
34
+ **`@return`** [Hash] Processed embedding with metadata
35
+ {
36
+ embedding: Array<Float>, # Original embedding
37
+ dimension: Integer, # Original dimension
38
+ storage_embedding: String, # Formatted for database storage
39
+ storage_dimension: Integer # Padded dimension (2000)
40
+ }
41
+
42
+ ## pad_embedding(embedding ) {: #method-c-pad_embedding }
43
+ Pad embedding to MAX_DIMENSION with zeros
44
+ **`@param`** [Array<Float>] Original embedding
45
+
46
+ **`@return`** [Array<Float>] Padded embedding
47
+
48
+ ## reset_circuit_breaker!() {: #method-c-reset_circuit_breaker! }
49
+ Reset the circuit breaker (useful for testing)
50
+ **`@return`** [void]
51
+
52
+ ## validate_embedding!(embedding ) {: #method-c-validate_embedding! }
53
+ Validate embedding response format
54
+ **`@param`** [Object] Raw embedding from generator
55
+
56
+ **`@raise`** [HTM::EmbeddingError] if invalid
57
+
58
+
@@ -0,0 +1,11 @@
1
+ # Exception: HTM::Error
2
+ **Inherits:** StandardError
3
+
4
+
5
+ Base error class for all HTM errors
6
+
7
+ All custom HTM errors inherit from this class, providing a common ancestor for
8
+ error handling.
9
+
10
+
11
+
@@ -0,0 +1,39 @@
1
+ # Module: HTM::JobAdapter
2
+
3
+
4
+ Job adapter for pluggable background job backends
5
+
6
+ Supports multiple job backends to work seamlessly across different application
7
+ types (CLI, Sinatra, Rails).
8
+
9
+ Supported backends:
10
+ * :active_job - Rails ActiveJob (recommended for Rails apps)
11
+ * :sidekiq - Direct Sidekiq integration (recommended for Sinatra apps)
12
+ * :inline - Synchronous execution (recommended for CLI and tests)
13
+ * :thread - Background thread (legacy, for standalone apps)
14
+
15
+ **`@see`** [] Async Embedding and Tag Generation
16
+
17
+
18
+ **`@example`**
19
+ ```ruby
20
+ HTM.configure do |config|
21
+ config.job_backend = :active_job
22
+ end
23
+ ```
24
+ **`@example`**
25
+ ```ruby
26
+ HTM::JobAdapter.enqueue(HTM::Jobs::GenerateEmbeddingJob, node_id: 123)
27
+ ```
28
+ # Class Methods
29
+ ## enqueue(job_class , **params ) {: #method-c-enqueue }
30
+ Enqueue a background job using the configured backend
31
+ **`@param`** [Class] Job class to enqueue (must respond to :perform)
32
+
33
+ **`@param`** [Hash] Parameters to pass to the job
34
+
35
+ **`@raise`** [HTM::Error] If job backend is unknown
36
+
37
+ **`@return`** [void]
38
+
39
+
@@ -0,0 +1,342 @@
1
+ # Class: HTM::LongTermMemory
2
+ **Inherits:** Object
3
+
4
+
5
+ Long-term Memory - PostgreSQL/TimescaleDB-backed permanent storage
6
+
7
+ LongTermMemory provides durable storage for all memory nodes with:
8
+ * Vector similarity search (RAG)
9
+ * Full-text search
10
+ * Time-range queries
11
+ * Relationship graphs
12
+ * Tag system
13
+ * ActiveRecord ORM for data access
14
+ * Query result caching for efficiency
15
+
16
+
17
+ # Attributes
18
+ ## query_timeout[RW] {: #attribute-i-query_timeout }
19
+ Returns the value of attribute query_timeout.
20
+
21
+
22
+ # Instance Methods
23
+ ## add(content:, token_count:0, robot_id:, embedding:nil, metadata:{}) {: #method-i-add }
24
+ Add a node to long-term memory (with deduplication)
25
+
26
+ If content already exists (by content_hash), links the robot to the existing
27
+ node and updates timestamps. Otherwise creates a new node.
28
+
29
+ **`@param`** [String] Conversation message/utterance
30
+
31
+ **`@param`** [Integer] Token count
32
+
33
+ **`@param`** [Integer] Robot identifier
34
+
35
+ **`@param`** [Array<Float>, nil] Pre-generated embedding vector
36
+
37
+ **`@param`** [Hash] Flexible metadata for the node (default: {})
38
+
39
+ **`@return`** [Hash] { node_id:, is_new:, robot_node: }
40
+
41
+ ## add_tag(node_id:, tag:) {: #method-i-add_tag }
42
+ Add a tag to a node
43
+
44
+ **`@param`** [Integer] Node database ID
45
+
46
+ **`@param`** [String] Tag name
47
+
48
+ **`@return`** [void]
49
+
50
+ ## batch_load_node_tags(node_ids) {: #method-i-batch_load_node_tags }
51
+ Batch load tags for multiple nodes (avoids N+1 queries)
52
+
53
+ **`@param`** [Array<Integer>] Node database IDs
54
+
55
+ **`@return`** [Hash<Integer, Array<String>>] Map of node_id to array of tag names
56
+
57
+ ## calculate_relevance(node:, query_tags:[], vector_similarity:nil, node_tags:nil) {: #method-i-calculate_relevance }
58
+ Calculate dynamic relevance score for a node given query context
59
+
60
+ Combines multiple signals:
61
+ * Vector similarity (semantic match)
62
+ * Tag overlap (categorical match)
63
+ * Recency (freshness)
64
+ * Access frequency (popularity/utility)
65
+
66
+ **`@param`** [Hash] Node data with similarity, tags, created_at, access_count
67
+
68
+ **`@param`** [Array<String>] Tags associated with the query
69
+
70
+ **`@param`** [Float, nil] Pre-computed vector similarity (0-1)
71
+
72
+ **`@param`** [Array<String>, nil] Pre-loaded tags for this node (avoids N+1 query)
73
+
74
+ **`@return`** [Float] Composite relevance score (0-10)
75
+
76
+ ## clear_cache!() {: #method-i-clear_cache! }
77
+ Clear the query cache
78
+
79
+ Call this after any operation that modifies data (soft delete, restore, etc.)
80
+ to ensure subsequent queries see fresh results.
81
+
82
+ **`@return`** [void]
83
+
84
+ ## delete(node_id) {: #method-i-delete }
85
+ Delete a node
86
+
87
+ **`@param`** [Integer] Node database ID
88
+
89
+ **`@return`** [void]
90
+
91
+ ## exists?(node_id) {: #method-i-exists? }
92
+ Check if a node exists
93
+
94
+ **`@param`** [Integer] Node database ID
95
+
96
+ **`@return`** [Boolean] True if node exists
97
+
98
+ ## find_query_matching_tags(query, include_extracted:false) {: #method-i-find_query_matching_tags }
99
+ Find tags that match terms in the query
100
+
101
+ Searches the tags table for tags where any hierarchy level matches query
102
+ words. For example, query "PostgreSQL database" would match tags like
103
+ "database:postgresql", "database:sql", etc. Find tags matching a query using
104
+ semantic extraction
105
+
106
+ **`@param`** [String] Search query
107
+
108
+ **`@param`** [Boolean] If true, returns hash with :extracted and :matched keys
109
+
110
+ **`@return`** [Array<String>] Matching tag names (default)
111
+
112
+ **`@return`** [Hash] If include_extracted: { extracted: [...], matched: [...] }
113
+
114
+ ## get_node_tags(node_id) {: #method-i-get_node_tags }
115
+ Get tags for a specific node
116
+
117
+ **`@param`** [Integer] Node database ID
118
+
119
+ **`@return`** [Array<String>] Tag names
120
+
121
+ ## initialize(config, pool_size:nil, query_timeout:DEFAULT_QUERY_TIMEOUT, cache_size:DEFAULT_CACHE_SIZE, cache_ttl:DEFAULT_CACHE_TTL) {: #method-i-initialize }
122
+ Initialize long-term memory storage
123
+
124
+ **`@param`** [Hash] Database configuration (host, port, dbname, user, password)
125
+
126
+ **`@param`** [Integer, nil] Connection pool size (uses ActiveRecord default if nil)
127
+
128
+ **`@param`** [Integer] Query timeout in milliseconds (default: 30000)
129
+
130
+ **`@param`** [Integer] Number of query results to cache (default: 1000, use 0 to disable)
131
+
132
+ **`@param`** [Integer] Cache time-to-live in seconds (default: 300)
133
+
134
+ **`@return`** [LongTermMemory] a new instance of LongTermMemory
135
+
136
+
137
+ **`@example`**
138
+ ```ruby
139
+ ltm = LongTermMemory.new(HTM::Database.default_config)
140
+ ```
141
+ **`@example`**
142
+ ```ruby
143
+ ltm = LongTermMemory.new(config, cache_size: 500, cache_ttl: 600)
144
+ ```
145
+ **`@example`**
146
+ ```ruby
147
+ ltm = LongTermMemory.new(config, cache_size: 0)
148
+ ```
149
+ ## link_robot_to_node(robot_id:, node:, working_memory:false) {: #method-i-link_robot_to_node }
150
+ Link a robot to a node (create or update robot_node record)
151
+
152
+ **`@param`** [Integer] Robot ID
153
+
154
+ **`@param`** [HTM::Models::Node] Node to link
155
+
156
+ **`@param`** [Boolean] Whether node is in working memory (default: false)
157
+
158
+ **`@return`** [HTM::Models::RobotNode] The robot_node link record
159
+
160
+ ## mark_evicted(robot_id:, node_ids:) {: #method-i-mark_evicted }
161
+ Mark nodes as evicted from working memory
162
+
163
+ Sets working_memory = false on the robot_nodes join table for the specified
164
+ robot and node IDs.
165
+
166
+ **`@param`** [Integer] Robot ID whose working memory is being evicted
167
+
168
+ **`@param`** [Array<Integer>] Node IDs to mark as evicted
169
+
170
+ **`@return`** [void]
171
+
172
+ ## node_topics(node_id) {: #method-i-node_topics }
173
+ Get topics for a specific node
174
+
175
+ **`@param`** [Integer] Node database ID
176
+
177
+ **`@return`** [Array<String>] Topic paths
178
+
179
+ ## nodes_by_topic(topic_path, exact:false, limit:50) {: #method-i-nodes_by_topic }
180
+ Retrieve nodes by ontological topic
181
+
182
+ **`@param`** [String] Topic hierarchy path
183
+
184
+ **`@param`** [Boolean] Exact match or prefix match
185
+
186
+ **`@param`** [Integer] Maximum results
187
+
188
+ **`@return`** [Array<Hash>] Matching nodes
189
+
190
+ ## ontology_structure() {: #method-i-ontology_structure }
191
+ Get ontology structure view
192
+
193
+ **`@return`** [Array<Hash>] Ontology structure
194
+
195
+ ## pool_size() {: #method-i-pool_size }
196
+ For backwards compatibility with tests/code that expect pool_size
197
+
198
+ ## popular_tags(limit:20, timeframe:nil) {: #method-i-popular_tags }
199
+ Get most popular tags
200
+
201
+ **`@param`** [Integer] Number of tags to return
202
+
203
+ **`@param`** [Range, nil] Optional time range filter
204
+
205
+ **`@return`** [Array<Hash>] Tags with usage counts
206
+
207
+ ## register_robot(robot_name) {: #method-i-register_robot }
208
+ Register a robot
209
+
210
+ **`@param`** [String] Robot identifier
211
+
212
+ **`@param`** [String] Robot name
213
+
214
+ **`@return`** [void]
215
+
216
+ ## retrieve(node_id) {: #method-i-retrieve }
217
+ Retrieve a node by ID
218
+
219
+ Automatically tracks access by incrementing access_count and updating
220
+ last_accessed
221
+
222
+ **`@param`** [Integer] Node database ID
223
+
224
+ **`@return`** [Hash, nil] Node data or nil
225
+
226
+ ## search(timeframe:, query:, limit:, embedding_service:, metadata:{}) {: #method-i-search }
227
+ Vector similarity search
228
+
229
+ **`@param`** [nil, Range, Array<Range>] Time range(s) to search (nil = no filter)
230
+
231
+ **`@param`** [String] Search query
232
+
233
+ **`@param`** [Integer] Maximum results
234
+
235
+ **`@param`** [Object] Service to generate embeddings
236
+
237
+ **`@param`** [Hash] Filter by metadata fields (default: {})
238
+
239
+ **`@return`** [Array<Hash>] Matching nodes
240
+
241
+ ## search_by_tags(tags:, match_all:false, timeframe:nil, limit:20) {: #method-i-search_by_tags }
242
+ Search nodes by tags
243
+
244
+ **`@param`** [Array<String>] Tags to search for
245
+
246
+ **`@param`** [Boolean] If true, match ALL tags; if false, match ANY tag
247
+
248
+ **`@param`** [Range, nil] Optional time range filter
249
+
250
+ **`@param`** [Integer] Maximum results
251
+
252
+ **`@return`** [Array<Hash>] Matching nodes with relevance scores
253
+
254
+ ## search_fulltext(timeframe:, query:, limit:, metadata:{}) {: #method-i-search_fulltext }
255
+ Full-text search
256
+
257
+ **`@param`** [Range] Time range to search
258
+
259
+ **`@param`** [String] Search query
260
+
261
+ **`@param`** [Integer] Maximum results
262
+
263
+ **`@param`** [Hash] Filter by metadata fields (default: {})
264
+
265
+ **`@return`** [Array<Hash>] Matching nodes
266
+
267
+ ## search_hybrid(timeframe:, query:, limit:, embedding_service:, prefilter_limit:100, metadata:{}) {: #method-i-search_hybrid }
268
+ Hybrid search (full-text + vector)
269
+
270
+ **`@param`** [Range] Time range to search
271
+
272
+ **`@param`** [String] Search query
273
+
274
+ **`@param`** [Integer] Maximum results
275
+
276
+ **`@param`** [Object] Service to generate embeddings
277
+
278
+ **`@param`** [Integer] Candidates to consider (default: 100)
279
+
280
+ **`@param`** [Hash] Filter by metadata fields (default: {})
281
+
282
+ **`@return`** [Array<Hash>] Matching nodes
283
+
284
+ ## search_with_relevance(timeframe:, query:nil, query_tags:[], limit:20, embedding_service:nil, metadata:{}) {: #method-i-search_with_relevance }
285
+ Search with dynamic relevance scoring
286
+
287
+ Returns nodes with calculated relevance scores based on query context
288
+
289
+ **`@param`** [nil, Range, Array<Range>] Time range(s) to search (nil = no filter)
290
+
291
+ **`@param`** [String, nil] Search query
292
+
293
+ **`@param`** [Array<String>] Tags to match
294
+
295
+ **`@param`** [Integer] Maximum results
296
+
297
+ **`@param`** [Object, nil] Service to generate embeddings
298
+
299
+ **`@param`** [Hash] Filter by metadata fields (default: {})
300
+
301
+ **`@return`** [Array<Hash>] Nodes with relevance scores
302
+
303
+ ## shutdown() {: #method-i-shutdown }
304
+ Shutdown - no-op with ActiveRecord (connection pool managed by ActiveRecord)
305
+
306
+ ## stats() {: #method-i-stats }
307
+ Get memory statistics
308
+
309
+ **`@return`** [Hash] Statistics
310
+
311
+ ## topic_relationships(min_shared_nodes:2, limit:50) {: #method-i-topic_relationships }
312
+ Get topic relationships (co-occurrence)
313
+
314
+ **`@param`** [Integer] Minimum shared nodes
315
+
316
+ **`@param`** [Integer] Maximum relationships
317
+
318
+ **`@return`** [Array<Hash>] Topic relationships
319
+
320
+ ## track_access(node_ids) {: #method-i-track_access }
321
+ Track access for multiple nodes (bulk operation)
322
+
323
+ Updates access_count and last_accessed for all nodes in the array
324
+
325
+ **`@param`** [Array<Integer>] Node IDs that were accessed
326
+
327
+ **`@return`** [void]
328
+
329
+ ## update_last_accessed(node_id) {: #method-i-update_last_accessed }
330
+ Update last_accessed timestamp
331
+
332
+ **`@param`** [Integer] Node database ID
333
+
334
+ **`@return`** [void]
335
+
336
+ ## update_robot_activity(robot_id) {: #method-i-update_robot_activity }
337
+ Update robot activity timestamp
338
+
339
+ **`@param`** [String] Robot identifier
340
+
341
+ **`@return`** [void]
342
+
@@ -0,0 +1,17 @@
1
+ # Exception: HTM::NotFoundError
2
+ **Inherits:** HTM::Error
3
+
4
+
5
+ Raised when a requested resource cannot be found
6
+
7
+ Common causes:
8
+ * Node ID does not exist
9
+ * Robot not registered
10
+ * File source not found
11
+
12
+
13
+ **`@example`**
14
+ ```ruby
15
+ htm.forget(999999) # => raises NotFoundError if node doesn't exist
16
+ ```
17
+
@@ -0,0 +1,107 @@
1
+ # Module: HTM::Observability
2
+
3
+
4
+ Observability module for monitoring and metrics collection
5
+
6
+ Provides comprehensive monitoring of HTM components including:
7
+ * Connection pool health monitoring with alerts
8
+ * Query timing and performance metrics
9
+ * Cache efficiency tracking
10
+ * Service health checks
11
+ * Memory usage statistics
12
+
13
+
14
+ **`@example`**
15
+ ```ruby
16
+ stats = HTM::Observability.collect_all
17
+ puts stats[:connection_pool][:status] # => :healthy
18
+ ```
19
+ **`@example`**
20
+ ```ruby
21
+ pool_stats = HTM::Observability.connection_pool_stats
22
+ if pool_stats[:status] == :exhausted
23
+ logger.error "Connection pool exhausted!"
24
+ end
25
+ ```
26
+ **`@example`**
27
+ ```ruby
28
+ if HTM::Observability.healthy?
29
+ puts "All systems operational"
30
+ else
31
+ puts "Health check failed: #{HTM::Observability.health_check[:issues]}"
32
+ end
33
+ ```
34
+ # Class Methods
35
+ ## cache_stats() {: #method-c-cache_stats }
36
+ Get query cache statistics
37
+ **`@return`** [Hash, nil] Cache stats or nil if unavailable
38
+
39
+ ## circuit_breaker_stats() {: #method-c-circuit_breaker_stats }
40
+ Get circuit breaker states for all services
41
+ **`@return`** [Hash] Circuit breaker states:
42
+ - :embedding_service - State and failure count
43
+ - :tag_service - State and failure count
44
+
45
+ ## collect_all() {: #method-c-collect_all }
46
+ Collect all observability metrics
47
+ **`@return`** [Hash] Comprehensive metrics including:
48
+ - :connection_pool - Pool stats with health status
49
+ - :cache - Query cache hit rates and size
50
+ - :circuit_breakers - Service circuit breaker states
51
+ - :query_timings - Recent query performance
52
+ - :service_timings - Embedding/tag generation times
53
+ - :memory_usage - System memory stats
54
+
55
+ ## connection_pool_stats() {: #method-c-connection_pool_stats }
56
+ Get connection pool statistics with health status
57
+ **`@return`** [Hash] Pool statistics including:
58
+ - :size - Maximum pool size
59
+ - :connections - Current total connections
60
+ - :in_use - Connections currently checked out
61
+ - :available - Connections available for checkout
62
+ - :utilization - Usage percentage (0.0-1.0)
63
+ - :status - Health status (:healthy, :warning, :critical, :exhausted)
64
+ - :wait_timeout - Connection wait timeout (ms)
65
+
66
+ ## health_check() {: #method-c-health_check }
67
+ Perform comprehensive health check
68
+ **`@return`** [Hash] Health check results:
69
+ - :healthy - Boolean overall health status
70
+ - :checks - Individual check results
71
+ - :issues - Array of identified issues
72
+
73
+ ## healthy?() {: #method-c-healthy? }
74
+ Quick health check - returns boolean
75
+ **`@return`** [Boolean] true if system is healthy
76
+
77
+ ## memory_stats() {: #method-c-memory_stats }
78
+ Get memory usage statistics
79
+ **`@return`** [Hash] Memory stats
80
+
81
+ ## query_timing_stats() {: #method-c-query_timing_stats }
82
+ Get query timing statistics
83
+ **`@return`** [Hash] Timing statistics including avg, min, max, p95
84
+
85
+ ## record_embedding_timing(duration_ms ) {: #method-c-record_embedding_timing }
86
+ Record embedding generation timing
87
+ **`@param`** [Float] Generation duration in milliseconds
88
+
89
+ ## record_query_timing(duration_ms , query_type: :unknown) {: #method-c-record_query_timing }
90
+ Record query timing for metrics
91
+ **`@param`** [Float] Query duration in milliseconds
92
+
93
+ **`@param`** [Symbol] Type of query (:vector, :fulltext, :hybrid)
94
+
95
+ ## record_tag_timing(duration_ms ) {: #method-c-record_tag_timing }
96
+ Record tag extraction timing
97
+ **`@param`** [Float] Extraction duration in milliseconds
98
+
99
+ ## reset_metrics!() {: #method-c-reset_metrics! }
100
+ Clear all collected timing metrics
101
+ **`@return`** [void]
102
+
103
+ ## service_timing_stats() {: #method-c-service_timing_stats }
104
+ Get service timing statistics (embedding and tag extraction)
105
+ **`@return`** [Hash] Timing stats for embedding and tag services
106
+
107
+
@@ -0,0 +1,19 @@
1
+ # Exception: HTM::QueryTimeoutError
2
+ **Inherits:** HTM::DatabaseError
3
+
4
+
5
+ Raised when a database query exceeds the configured timeout
6
+
7
+ Default timeout is 30 seconds. Configure via db_query_timeout parameter when
8
+ initializing HTM.
9
+
10
+
11
+ **`@example`**
12
+ ```ruby
13
+ begin
14
+ htm.recall("complex query", strategy: :hybrid)
15
+ rescue HTM::QueryTimeoutError
16
+ # Retry with simpler query or smaller limit
17
+ end
18
+ ```
19
+
@@ -0,0 +1,27 @@
1
+ # Class: HTM::Railtie
2
+ **Inherits:** Rails::Railtie
3
+
4
+
5
+ Rails Railtie for automatic HTM configuration in Rails applications
6
+
7
+ This railtie automatically configures HTM when Rails boots:
8
+ * Sets logger to Rails.logger
9
+ * Sets job backend to :active_job
10
+ * Loads Rake tasks
11
+ * Configures test environment for synchronous jobs
12
+
13
+
14
+ **`@example`**
15
+ ```ruby
16
+ # HTM is automatically configured on Rails boot
17
+ # No additional setup required
18
+ ```
19
+ **`@example`**
20
+ ```ruby
21
+ # config/initializers/htm.rb
22
+ HTM.configure do |config|
23
+ config.embedding_model = 'custom-model'
24
+ config.tag_model = 'custom-tag-model'
25
+ end
26
+ ```
27
+
@@ -0,0 +1,13 @@
1
+ # Exception: HTM::ResourceExhaustedError
2
+ **Inherits:** HTM::Error
3
+
4
+
5
+ Raised when system resources are exhausted
6
+
7
+ Common causes:
8
+ * Working memory token limit exceeded
9
+ * Database connection pool exhausted
10
+ * Memory allocation failures
11
+
12
+
13
+
@@ -0,0 +1,18 @@
1
+ # Exception: HTM::TagError
2
+ **Inherits:** HTM::Error
3
+
4
+
5
+ Raised when tag extraction fails
6
+
7
+ Common causes:
8
+ * LLM provider API errors
9
+ * Invalid tag response format
10
+ * Network connectivity issues
11
+ * Model not available
12
+
13
+ Note: This error is distinct from CircuitBreakerOpenError. TagError indicates
14
+ a single failure, while CircuitBreakerOpenError indicates repeated failures
15
+ have triggered protective circuit breaking.
16
+
17
+
18
+