htm 0.0.31 → 0.0.32

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (157) hide show
  1. checksums.yaml +4 -4
  2. data/.irbrc +2 -3
  3. data/.rubocop.yml +184 -0
  4. data/CHANGELOG.md +46 -0
  5. data/README.md +2 -0
  6. data/Rakefile +93 -12
  7. data/db/migrate/00008_create_node_relationships.rb +54 -0
  8. data/db/migrate/00009_fix_node_relationships_column_types.rb +17 -0
  9. data/db/schema.sql +124 -1
  10. data/docs/api/database.md +35 -57
  11. data/docs/api/embedding-service.md +1 -1
  12. data/docs/api/index.md +26 -15
  13. data/docs/api/working-memory.md +8 -8
  14. data/docs/architecture/index.md +5 -7
  15. data/docs/architecture/overview.md +5 -8
  16. data/docs/assets/images/htm-architecture-overview.svg +1 -1
  17. data/docs/assets/images/htm-context-assembly-flow.svg +2 -2
  18. data/docs/assets/images/htm-layered-architecture.svg +3 -3
  19. data/docs/assets/images/two-tier-memory-architecture.svg +1 -1
  20. data/docs/database/README.md +1 -0
  21. data/docs/database_rake_tasks.md +20 -28
  22. data/docs/development/contributing.md +5 -5
  23. data/docs/development/index.md +4 -7
  24. data/docs/development/schema.md +71 -1
  25. data/docs/development/setup.md +40 -82
  26. data/docs/development/testing.md +1 -1
  27. data/docs/examples/file-loading.md +4 -4
  28. data/docs/examples/mcp-client.md +1 -1
  29. data/docs/getting-started/quick-start.md +4 -4
  30. data/docs/guides/adding-memories.md +14 -1
  31. data/docs/guides/configuration.md +5 -5
  32. data/docs/guides/context-assembly.md +4 -4
  33. data/docs/guides/file-loading.md +12 -12
  34. data/docs/guides/getting-started.md +2 -2
  35. data/docs/guides/long-term-memory.md +7 -27
  36. data/docs/guides/propositions.md +20 -19
  37. data/docs/guides/recalling-memories.md +5 -5
  38. data/docs/guides/tags.md +18 -13
  39. data/docs/multi_framework_support.md +1 -1
  40. data/docs/robots/hive-mind.md +1 -1
  41. data/docs/robots/multi-robot.md +2 -2
  42. data/docs/robots/robot-groups.md +1 -1
  43. data/docs/robots/two-tier-memory.md +72 -94
  44. data/docs/setup_local_database.md +8 -54
  45. data/docs/using_rake_tasks_in_your_app.md +6 -6
  46. data/examples/01_basic_usage.rb +1 -0
  47. data/examples/03_custom_llm_configuration.rb +1 -0
  48. data/examples/04_file_loader_usage.rb +1 -0
  49. data/examples/05_timeframe_demo.rb +1 -0
  50. data/examples/06_example_app/app.rb +1 -0
  51. data/examples/07_cli_app/htm_cli.rb +1 -0
  52. data/examples/09_mcp_client.rb +1 -0
  53. data/examples/10_telemetry/demo.rb +1 -0
  54. data/examples/11_robot_groups/multi_process.rb +1 -0
  55. data/examples/11_robot_groups/same_process.rb +1 -0
  56. data/examples/12_rails_app/.envrc +12 -0
  57. data/examples/12_rails_app/Gemfile +8 -3
  58. data/examples/12_rails_app/Gemfile.lock +94 -89
  59. data/examples/12_rails_app/README.md +70 -19
  60. data/examples/12_rails_app/app/controllers/application_controller.rb +6 -0
  61. data/examples/12_rails_app/app/controllers/chats_controller.rb +305 -0
  62. data/examples/12_rails_app/app/controllers/dashboard_controller.rb +3 -0
  63. data/examples/12_rails_app/app/controllers/files_controller.rb +17 -2
  64. data/examples/12_rails_app/app/controllers/home_controller.rb +8 -0
  65. data/examples/12_rails_app/app/controllers/memories_controller.rb +9 -4
  66. data/examples/12_rails_app/app/controllers/messages_controller.rb +214 -0
  67. data/examples/12_rails_app/app/controllers/robots_controller.rb +11 -1
  68. data/examples/12_rails_app/app/controllers/tags_controller.rb +14 -1
  69. data/examples/12_rails_app/app/javascript/application.js +1 -1
  70. data/examples/12_rails_app/app/models/application_record.rb +5 -0
  71. data/examples/12_rails_app/app/models/chat.rb +36 -0
  72. data/examples/12_rails_app/app/models/message.rb +5 -0
  73. data/examples/12_rails_app/app/models/model.rb +5 -0
  74. data/examples/12_rails_app/app/models/tool_call.rb +5 -0
  75. data/examples/12_rails_app/app/views/chats/index.html.erb +61 -0
  76. data/examples/12_rails_app/app/views/chats/show.html.erb +213 -0
  77. data/examples/12_rails_app/app/views/dashboard/index.html.erb +3 -0
  78. data/examples/12_rails_app/app/views/files/index.html.erb +10 -5
  79. data/examples/12_rails_app/app/views/files/new.html.erb +4 -2
  80. data/examples/12_rails_app/app/views/files/show.html.erb +19 -3
  81. data/examples/12_rails_app/app/views/home/index.html.erb +45 -0
  82. data/examples/12_rails_app/app/views/layouts/application.html.erb +20 -18
  83. data/examples/12_rails_app/app/views/memories/_memory_card.html.erb +1 -1
  84. data/examples/12_rails_app/app/views/memories/deleted.html.erb +3 -1
  85. data/examples/12_rails_app/app/views/memories/edit.html.erb +2 -0
  86. data/examples/12_rails_app/app/views/memories/index.html.erb +2 -0
  87. data/examples/12_rails_app/app/views/memories/new.html.erb +2 -0
  88. data/examples/12_rails_app/app/views/memories/show.html.erb +4 -2
  89. data/examples/12_rails_app/app/views/messages/_message.html.erb +20 -0
  90. data/examples/12_rails_app/app/views/robots/index.html.erb +2 -0
  91. data/examples/12_rails_app/app/views/robots/new.html.erb +2 -0
  92. data/examples/12_rails_app/app/views/robots/show.html.erb +2 -0
  93. data/examples/12_rails_app/app/views/search/index.html.erb +59 -8
  94. data/examples/12_rails_app/app/views/shared/_navbar.html.erb +75 -29
  95. data/examples/12_rails_app/app/views/tags/index.html.erb +2 -0
  96. data/examples/12_rails_app/app/views/tags/show.html.erb +3 -1
  97. data/examples/12_rails_app/config/application.rb +1 -1
  98. data/examples/12_rails_app/config/database.yml +9 -5
  99. data/examples/12_rails_app/config/importmap.rb +1 -1
  100. data/examples/12_rails_app/config/initializers/htm.rb +9 -2
  101. data/examples/12_rails_app/config/initializers/ruby_llm.rb +33 -0
  102. data/examples/12_rails_app/config/routes.rb +39 -23
  103. data/examples/12_rails_app/db/migrate/20250124000001_create_ruby_llm_tables.rb +34 -0
  104. data/examples/12_rails_app/db/migrate/20250124000002_create_models_table.rb +28 -0
  105. data/examples/12_rails_app/db/schema.rb +67 -0
  106. data/examples/examples_helper.rb +25 -0
  107. data/lib/htm/circuit_breaker.rb +5 -6
  108. data/lib/htm/config/builder.rb +12 -12
  109. data/lib/htm/config/database.rb +21 -27
  110. data/lib/htm/config/validator.rb +12 -18
  111. data/lib/htm/config.rb +76 -65
  112. data/lib/htm/database.rb +193 -199
  113. data/lib/htm/embedding_service.rb +4 -9
  114. data/lib/htm/integrations/sinatra.rb +7 -7
  115. data/lib/htm/job_adapter.rb +14 -21
  116. data/lib/htm/jobs/generate_embedding_job.rb +28 -44
  117. data/lib/htm/jobs/generate_propositions_job.rb +29 -55
  118. data/lib/htm/jobs/generate_relationships_job.rb +137 -0
  119. data/lib/htm/jobs/generate_tags_job.rb +45 -67
  120. data/lib/htm/loaders/markdown_loader.rb +65 -112
  121. data/lib/htm/long_term_memory/fulltext_search.rb +1 -1
  122. data/lib/htm/long_term_memory/hybrid_search.rb +300 -128
  123. data/lib/htm/long_term_memory/node_operations.rb +2 -2
  124. data/lib/htm/long_term_memory/relevance_scorer.rb +100 -68
  125. data/lib/htm/long_term_memory/tag_operations.rb +87 -120
  126. data/lib/htm/long_term_memory/vector_search.rb +1 -1
  127. data/lib/htm/long_term_memory.rb +2 -1
  128. data/lib/htm/mcp/cli.rb +59 -58
  129. data/lib/htm/mcp/server.rb +5 -6
  130. data/lib/htm/mcp/tools.rb +30 -36
  131. data/lib/htm/migration.rb +10 -10
  132. data/lib/htm/models/node.rb +2 -3
  133. data/lib/htm/models/node_relationship.rb +72 -0
  134. data/lib/htm/models/node_tag.rb +2 -2
  135. data/lib/htm/models/robot_node.rb +2 -2
  136. data/lib/htm/models/tag.rb +41 -28
  137. data/lib/htm/observability.rb +45 -51
  138. data/lib/htm/proposition_service.rb +3 -7
  139. data/lib/htm/query_cache.rb +13 -15
  140. data/lib/htm/railtie.rb +1 -2
  141. data/lib/htm/robot_group.rb +9 -9
  142. data/lib/htm/sequel_config.rb +1 -0
  143. data/lib/htm/sql_builder.rb +1 -1
  144. data/lib/htm/tag_service.rb +2 -6
  145. data/lib/htm/timeframe.rb +4 -5
  146. data/lib/htm/timeframe_extractor.rb +42 -83
  147. data/lib/htm/version.rb +1 -1
  148. data/lib/htm/workflows/remember_workflow.rb +112 -115
  149. data/lib/htm/working_memory.rb +21 -26
  150. data/lib/htm.rb +103 -116
  151. data/lib/tasks/db.rake +0 -2
  152. data/lib/tasks/doc.rake +14 -13
  153. data/lib/tasks/files.rake +5 -12
  154. data/lib/tasks/htm.rake +70 -71
  155. data/lib/tasks/jobs.rake +41 -47
  156. data/lib/tasks/tags.rake +3 -8
  157. metadata +25 -100
@@ -79,134 +79,131 @@ class HTM
79
79
  private
80
80
 
81
81
  def build_pipeline
82
+ save_step = save_node_step
83
+ embed_step = generate_embedding_step
84
+ tags_step = generate_tags_step
85
+ props_step = generate_propositions_step
86
+ final_step = finalize_step
87
+
82
88
  SimpleFlow::Pipeline.new(concurrency: @concurrency) do
83
- # Step 1: Save node to database (no dependencies)
84
- step :save_node, ->(result) {
85
- data = result.value
86
- htm = data[:htm]
87
-
88
- # Calculate token count
89
- token_count = HTM.count_tokens(data[:content])
90
-
91
- # Store in long-term memory
92
- save_result = htm.long_term_memory.add(
93
- content: data[:content],
94
- token_count: token_count,
95
- robot_id: data[:robot_id],
96
- embedding: nil,
97
- metadata: data[:metadata]
98
- )
99
-
100
- node_id = save_result[:node_id]
101
- is_new = save_result[:is_new]
102
-
103
- HTM.logger.info "RememberWorkflow: Node #{node_id} saved (new: #{is_new})"
104
-
105
- result
106
- .with_context(:node_id, node_id)
107
- .with_context(:is_new, is_new)
108
- .with_context(:token_count, token_count)
109
- .with_context(:robot_node, save_result[:robot_node])
110
- .continue(data)
111
- }, depends_on: :none
112
-
113
- # Step 2: Generate embedding (depends on save_node, runs in parallel with tags/propositions)
114
- step :generate_embedding, ->(result) {
115
- node_id = result.context[:node_id]
116
- is_new = result.context[:is_new]
117
-
118
- # Only generate for new nodes
119
- if is_new
120
- begin
121
- HTM::Jobs::GenerateEmbeddingJob.perform(node_id: node_id)
122
- rescue StandardError => e
123
- HTM.logger.error "RememberWorkflow: Embedding generation failed: #{e.message}"
124
- # Continue despite error - embedding is non-critical
125
- end
126
- end
89
+ step :save_node, save_step, depends_on: :none
90
+ step :generate_embedding, embed_step, depends_on: [:save_node]
91
+ step :generate_tags, tags_step, depends_on: [:save_node]
92
+ step :generate_propositions, props_step, depends_on: [:save_node]
93
+ step :finalize, final_step, depends_on: %i[generate_embedding generate_tags generate_propositions]
94
+ end
95
+ end
127
96
 
128
- result.continue(result.value)
129
- }, depends_on: [:save_node]
130
-
131
- # Step 3: Generate tags (depends on save_node, runs in parallel with embedding/propositions)
132
- step :generate_tags, ->(result) {
133
- node_id = result.context[:node_id]
134
- is_new = result.context[:is_new]
135
- manual_tags = result.value[:tags] || []
136
-
137
- if is_new
138
- # Add manual tags immediately (including parent tags)
139
- if manual_tags.any?
140
- manual_tags.each do |tag_name|
141
- HTM::Models::Tag.find_or_create_with_ancestors(tag_name).each do |tag|
142
- HTM::Models::NodeTag.find_or_create(node_id: node_id, tag_id: tag.id)
143
- end
144
- end
145
- end
97
+ def save_node_step
98
+ lambda { |result|
99
+ data = result.value
100
+ htm = data[:htm]
101
+ token_count = HTM.count_tokens(data[:content])
102
+ save_result = htm.long_term_memory.add(
103
+ content: data[:content],
104
+ token_count: token_count,
105
+ robot_id: data[:robot_id],
106
+ embedding: nil,
107
+ metadata: data[:metadata]
108
+ )
109
+ node_id = save_result[:node_id]
110
+ is_new = save_result[:is_new]
111
+ HTM.logger.info "RememberWorkflow: Node #{node_id} saved (new: #{is_new})"
112
+ result
113
+ .with_context(:node_id, node_id)
114
+ .with_context(:is_new, is_new)
115
+ .with_context(:token_count, token_count)
116
+ .with_context(:robot_node, save_result[:robot_node])
117
+ .continue(data)
118
+ }
119
+ end
146
120
 
147
- begin
148
- HTM::Jobs::GenerateTagsJob.perform(node_id: node_id)
149
- rescue StandardError => e
150
- HTM.logger.error "RememberWorkflow: Tag generation failed: #{e.message}"
151
- # Continue despite error - tags are non-critical
152
- end
153
- else
154
- # For existing nodes, only add manual tags
155
- if manual_tags.any?
156
- node = HTM::Models::Node[node_id]
157
- node.add_tags(manual_tags)
158
- end
121
+ def generate_embedding_step
122
+ lambda { |result|
123
+ node_id = result.context[:node_id]
124
+ if result.context[:is_new]
125
+ begin
126
+ HTM::Jobs::GenerateEmbeddingJob.perform(node_id: node_id)
127
+ rescue StandardError => e
128
+ HTM.logger.error "RememberWorkflow: Embedding generation failed: #{e.message}"
159
129
  end
130
+ end
131
+ result.continue(result.value)
132
+ }
133
+ end
160
134
 
161
- result.continue(result.value)
162
- }, depends_on: [:save_node]
163
-
164
- # Step 4: Generate propositions (depends on save_node, runs in parallel with embedding/tags)
165
- step :generate_propositions, ->(result) {
166
- node_id = result.context[:node_id]
167
- is_new = result.context[:is_new]
168
- metadata = result.value[:metadata] || {}
169
- robot_id = result.value[:robot_id]
170
-
171
- # Only extract propositions for new nodes that aren't already propositions
172
- if is_new && HTM.config.extract_propositions && !metadata[:is_proposition]
173
- begin
174
- HTM::Jobs::GeneratePropositionsJob.perform(node_id: node_id, robot_id: robot_id)
175
- rescue StandardError => e
176
- HTM.logger.error "RememberWorkflow: Proposition extraction failed: #{e.message}"
177
- # Continue despite error - propositions are non-critical
135
+ def generate_tags_step
136
+ lambda { |result|
137
+ node_id = result.context[:node_id]
138
+ is_new = result.context[:is_new]
139
+ manual_tags = result.value[:tags] || []
140
+
141
+ if is_new
142
+ manual_tags.each do |tag_name|
143
+ HTM::Models::Tag.find_or_create_with_ancestors(tag_name).each do |tag|
144
+ HTM::Models::NodeTag.find_or_create(node_id: node_id, tag_id: tag.id)
178
145
  end
179
146
  end
147
+ begin
148
+ HTM::Jobs::GenerateTagsJob.perform(node_id: node_id)
149
+ rescue StandardError => e
150
+ HTM.logger.error "RememberWorkflow: Tag generation failed: #{e.message}"
151
+ end
152
+ elsif manual_tags.any?
153
+ HTM::Models::Node[node_id].add_tags(manual_tags)
154
+ end
155
+
156
+ result.continue(result.value)
157
+ }
158
+ end
180
159
 
181
- result.continue(result.value)
182
- }, depends_on: [:save_node]
183
-
184
- # Step 5: Finalize (depends on all enrichment steps)
185
- step :finalize, ->(result) {
186
- node_id = result.context[:node_id]
187
- token_count = result.context[:token_count]
188
- robot_node = result.context[:robot_node]
189
- htm = result.value[:htm]
190
-
191
- # Add to working memory
192
- unless htm.working_memory.has_space?(token_count)
193
- evicted = htm.working_memory.evict_to_make_space(token_count)
194
- evicted_keys = evicted.map { |n| n[:key] }
195
- htm.long_term_memory.mark_evicted(robot_id: result.value[:robot_id], node_ids: evicted_keys) if evicted_keys.any?
160
+ def generate_propositions_step
161
+ lambda { |result|
162
+ node_id = result.context[:node_id]
163
+ is_new = result.context[:is_new]
164
+ metadata = result.value[:metadata] || {}
165
+ robot_id = result.value[:robot_id]
166
+
167
+ if is_new && HTM.config.extract_propositions && !metadata[:is_proposition]
168
+ begin
169
+ HTM::Jobs::GeneratePropositionsJob.perform(node_id: node_id, robot_id: robot_id)
170
+ rescue StandardError => e
171
+ HTM.logger.error "RememberWorkflow: Proposition extraction failed: #{e.message}"
196
172
  end
197
- htm.working_memory.add(node_id, result.value[:content], token_count: token_count, access_count: 0)
173
+ end
198
174
 
199
- # Mark as in working memory
200
- robot_node.update(working_memory: true)
175
+ result.continue(result.value)
176
+ }
177
+ end
201
178
 
202
- # Update robot activity
203
- htm.long_term_memory.update_robot_activity(result.value[:robot_id])
179
+ def finalize_step
180
+ lambda { |result|
181
+ ctx = result.context
182
+ finalize_node(
183
+ htm: result.value[:htm],
184
+ node_id: ctx[:node_id],
185
+ token_count: ctx[:token_count],
186
+ robot_node: ctx[:robot_node],
187
+ content: result.value[:content],
188
+ robot_id: result.value[:robot_id]
189
+ )
190
+ result.continue(result.value)
191
+ }
192
+ end
204
193
 
205
- HTM.logger.info "RememberWorkflow: Node #{node_id} finalized"
194
+ def finalize_node(htm:, node_id:, token_count:, robot_node:, content:, robot_id:)
195
+ evict_working_memory_if_needed(htm, token_count, robot_id)
196
+ htm.working_memory.add(node_id, content, token_count: token_count, access_count: 0)
197
+ robot_node.update(working_memory: true)
198
+ htm.long_term_memory.update_robot_activity(robot_id)
199
+ HTM.logger.info "RememberWorkflow: Node #{node_id} finalized"
200
+ end
206
201
 
207
- result.continue(result.value)
208
- }, depends_on: [:generate_embedding, :generate_tags, :generate_propositions]
209
- end
202
+ def evict_working_memory_if_needed(htm, token_count, robot_id)
203
+ return if htm.working_memory.has_space?(token_count)
204
+ evicted = htm.working_memory.evict_to_make_space(token_count)
205
+ evicted_keys = evicted.map { |n| n[:key] }
206
+ htm.long_term_memory.mark_evicted(robot_id: robot_id, node_ids: evicted_keys) if evicted_keys.any?
210
207
  end
211
208
  end
212
209
  end
@@ -84,7 +84,7 @@ class HTM
84
84
  tokens_freed = 0
85
85
 
86
86
  # Sort by access frequency + recency (lower score = more evictable)
87
- candidates = @nodes.sort_by do |key, node|
87
+ candidates = @nodes.sort_by do |_key, node|
88
88
  access_frequency = node[:access_count] || 0
89
89
  time_since_accessed = Time.now - (node[:last_accessed] || node[:added_at])
90
90
 
@@ -92,7 +92,7 @@ class HTM
92
92
  # Frequently accessed = higher score (keep)
93
93
  # Recently accessed = higher score (keep)
94
94
  access_score = Math.log(1 + access_frequency)
95
- recency_score = 1.0 / (1 + time_since_accessed / 3600.0)
95
+ recency_score = 1.0 / (1 + (time_since_accessed / 3600.0))
96
96
 
97
97
  -(access_score + recency_score) # Negative for ascending sort
98
98
  end
@@ -122,33 +122,10 @@ class HTM
122
122
  def assemble_context(strategy:, max_tokens: nil)
123
123
  @mutex.synchronize do
124
124
  max = max_tokens || @max_tokens
125
+ nodes = sorted_nodes_by_strategy(strategy)
125
126
 
126
- # Make defensive copies of nodes to prevent external mutation of internal state
127
- nodes = case strategy
128
- when :recent
129
- # Most recently accessed (LRU)
130
- @access_order.reverse.map { |k| @nodes[k]&.dup }.compact
131
- when :frequent
132
- # Most frequently accessed (LFU)
133
- @nodes.sort_by { |k, v| -(v[:access_count] || 0) }.map { |_, v| v.dup }
134
- when :balanced
135
- # Combined frequency × recency
136
- @nodes.sort_by { |k, v|
137
- access_frequency = v[:access_count] || 0
138
- time_since_accessed = Time.now - (v[:last_accessed] || v[:added_at])
139
- recency_factor = 1.0 / (1 + time_since_accessed / 3600.0)
140
-
141
- # Higher score = more relevant
142
- -(Math.log(1 + access_frequency) * recency_factor)
143
- }.map { |_, v| v.dup }
144
- else
145
- raise ArgumentError, "Unknown strategy: #{strategy}. Use :recent, :frequent, or :balanced"
146
- end
147
-
148
- # Build context up to token limit
149
127
  context_parts = []
150
128
  current_tokens = 0
151
-
152
129
  nodes.each do |node|
153
130
  break if current_tokens + node[:token_count] > max
154
131
  context_parts << node[:value]
@@ -263,6 +240,24 @@ class HTM
263
240
 
264
241
  private
265
242
 
243
+ def sorted_nodes_by_strategy(strategy)
244
+ case strategy
245
+ when :recent
246
+ @access_order.reverse.map { |k| @nodes[k]&.dup }.compact
247
+ when :frequent
248
+ @nodes.sort_by { |_k, v| -(v[:access_count] || 0) }.map { |_, v| v.dup }
249
+ when :balanced
250
+ sorted = @nodes.sort_by do |_k, v|
251
+ freq = v[:access_count] || 0
252
+ age = Time.now - (v[:last_accessed] || v[:added_at])
253
+ -(Math.log(1 + freq) * (1.0 / (1 + (age / 3600.0))))
254
+ end
255
+ sorted.map { |_, v| v.dup }
256
+ else
257
+ raise ArgumentError, "Unknown strategy: #{strategy}. Use :recent, :frequent, or :balanced"
258
+ end
259
+ end
260
+
266
261
  # Internal unlocked version - must be called within @mutex.synchronize
267
262
  def current_tokens_unlocked
268
263
  @nodes.values.sum { |n| n[:token_count] }
data/lib/htm.rb CHANGED
@@ -17,6 +17,7 @@ require_relative "htm/job_adapter"
17
17
  require_relative "htm/jobs/generate_embedding_job"
18
18
  require_relative "htm/jobs/generate_tags_job"
19
19
  require_relative "htm/jobs/generate_propositions_job"
20
+ require_relative "htm/jobs/generate_relationships_job"
20
21
  require_relative "htm/loaders/markdown_chunker"
21
22
  require_relative "htm/loaders/markdown_loader"
22
23
  require_relative "htm/workflows/remember_workflow"
@@ -49,13 +50,13 @@ require_relative "htm/railtie" if defined?(Rails::Railtie)
49
50
  # htm = HTM.new(robot_name: "Code Helper")
50
51
  #
51
52
  # # Remember information
52
- # htm.remember("We decided to use PostgreSQL for HTM", source: "architect")
53
+ # htm.remember("We decided to use PostgreSQL for HTM", metadata: { source: "architect" })
53
54
  #
54
55
  # # Recall from the past
55
- # memories = htm.recall(timeframe: "last week", topic: "PostgreSQL")
56
+ # memories = htm.recall("PostgreSQL", timeframe: "last week")
56
57
  #
57
58
  # # Create context for LLM
58
- # context = htm.create_context(strategy: :balanced)
59
+ # context = htm.working_memory.assemble_context(strategy: :balanced)
59
60
  #
60
61
  class HTM
61
62
  attr_reader :robot_id, :robot_name, :working_memory, :long_term_memory
@@ -65,7 +66,7 @@ class HTM
65
66
  MAX_VALUE_LENGTH = 1_000_000 # 1MB
66
67
  MAX_ARRAY_SIZE = 1000
67
68
 
68
- VALID_RECALL_STRATEGIES = [:vector, :fulltext, :hybrid].freeze
69
+ VALID_RECALL_STRATEGIES = %i[vector fulltext hybrid].freeze
69
70
 
70
71
  # Initialize a new HTM instance
71
72
  #
@@ -125,77 +126,30 @@ class HTM
125
126
  # node_id = htm.remember("User prefers dark mode", metadata: { source: "user", confidence: 0.95 })
126
127
  #
127
128
  def remember(content, tags: [], metadata: {})
128
- # Validate inputs
129
129
  raise ValidationError, "Content cannot be nil" if content.nil?
130
-
131
- content_str = content.to_s.strip
132
- raise ValidationError, "Content cannot be empty" if content_str.empty?
133
-
134
- if content_str.bytesize > MAX_VALUE_LENGTH
135
- raise ValidationError, "Content exceeds maximum size (#{MAX_VALUE_LENGTH} bytes)"
136
- end
137
-
138
- validate_array!(tags, "tags")
139
- tags.each do |tag|
140
- unless tag.is_a?(String) && tag.match?(/\A[a-z0-9\-]+(:[a-z0-9\-]+)*\z/)
141
- raise ValidationError, "Invalid tag format: #{tag.inspect}. Tags must be lowercase alphanumeric with hyphens, separated by colons."
142
- end
143
- end
144
-
130
+ content = content.to_s.strip
131
+ raise ValidationError, "Content cannot be empty" if content.empty?
132
+ validate_remember_content!(content)
133
+ validate_remember_tags!(tags)
145
134
  validate_metadata!(metadata)
146
135
 
147
- content = content_str
148
-
149
- # Calculate token count using configured counter
150
136
  token_count = HTM.count_tokens(content)
151
-
152
- # Store in long-term memory (with deduplication)
153
- # Returns { node_id:, is_new:, robot_node: }
154
137
  result = @long_term_memory.add(
155
- content: content,
156
- token_count: token_count,
157
- robot_id: @robot_id,
158
- embedding: nil, # Will be generated in background
159
- metadata: metadata
138
+ content: content, token_count: token_count, robot_id: @robot_id,
139
+ embedding: nil, metadata: metadata
160
140
  )
161
-
162
141
  node_id = result[:node_id]
163
- is_new = result[:is_new]
164
142
 
165
- if is_new
143
+ if result[:is_new]
166
144
  HTM.logger.info "Node #{node_id} created for robot #{@robot_name} (#{token_count} tokens)"
167
-
168
- # Enqueue background jobs for embedding and tag generation
169
- # Only for NEW nodes - existing nodes already have embeddings/tags
170
- enqueue_embedding_job(node_id)
171
- enqueue_tags_job(node_id, manual_tags: tags)
172
-
173
- # Enqueue proposition extraction if enabled and not already a proposition
174
- if HTM.config.extract_propositions && !metadata[:is_proposition]
175
- enqueue_propositions_job(node_id)
176
- end
145
+ enqueue_background_jobs(node_id, tags: tags, metadata: metadata)
177
146
  else
178
- HTM.logger.info "Node #{node_id} already exists, linked to robot #{@robot_name} (remember_count: #{result[:robot_node].remember_count})"
179
-
180
- # For existing nodes, only add manual tags if provided
181
- if tags.any?
182
- node = HTM::Models::Node[node_id]
183
- node.add_tags(tags)
184
- HTM.logger.info "Added #{tags.length} manual tags to existing node #{node_id}"
185
- end
147
+ rc = result[:robot_node].remember_count
148
+ HTM.logger.info "Node #{node_id} already exists, linked to robot #{@robot_name} (remember_count: #{rc})"
149
+ handle_existing_node_tags(node_id, tags)
186
150
  end
187
151
 
188
- # Add to working memory (evict if needed, access_count starts at 0)
189
- unless @working_memory.has_space?(token_count)
190
- evicted = @working_memory.evict_to_make_space(token_count)
191
- evicted_keys = evicted.map { |n| n[:key] }
192
- @long_term_memory.mark_evicted(robot_id: @robot_id, node_ids: evicted_keys) if evicted_keys.any?
193
- end
194
- @working_memory.add(node_id, content, token_count: token_count, access_count: 0)
195
-
196
- # Mark node as in working memory in the robot_nodes join table
197
- result[:robot_node].update(working_memory: true)
198
-
152
+ store_in_working_memory(node_id, content, token_count: token_count, robot_node: result[:robot_node])
199
153
  update_robot_activity
200
154
  node_id
201
155
  end
@@ -250,53 +204,53 @@ class HTM
250
204
  # Normalize timeframe and potentially extract from query
251
205
  search_query = topic
252
206
  normalized_timeframe = if timeframe == :auto
253
- result = HTM::Timeframe.normalize(:auto, query: topic)
254
- search_query = result.query # Use cleaned query for search
255
- result.timeframe
256
- else
257
- HTM::Timeframe.normalize(timeframe)
258
- end
207
+ result = HTM::Timeframe.normalize(:auto, query: topic)
208
+ search_query = result.query # Use cleaned query for search
209
+ result.timeframe
210
+ else
211
+ HTM::Timeframe.normalize(timeframe)
212
+ end
259
213
 
260
214
  # Use relevance-based search if requested
261
- if with_relevance
262
- nodes = @long_term_memory.search_with_relevance(
263
- timeframe: normalized_timeframe,
264
- query: search_query,
265
- query_tags: query_tags,
266
- limit: limit,
267
- embedding_service: (strategy == :vector || strategy == :hybrid) ? HTM : nil,
268
- metadata: metadata
269
- )
270
- else
271
- # Perform standard RAG-based retrieval
272
- nodes = case strategy
273
- when :vector
274
- # Vector search using query embedding
275
- @long_term_memory.search(
276
- timeframe: normalized_timeframe,
277
- query: search_query,
278
- limit: limit,
279
- embedding_service: HTM,
280
- metadata: metadata
281
- )
282
- when :fulltext
283
- @long_term_memory.search_fulltext(
284
- timeframe: normalized_timeframe,
285
- query: search_query,
286
- limit: limit,
287
- metadata: metadata
288
- )
289
- when :hybrid
290
- # Hybrid search combining vector + fulltext
291
- @long_term_memory.search_hybrid(
292
- timeframe: normalized_timeframe,
293
- query: search_query,
294
- limit: limit,
295
- embedding_service: HTM,
296
- metadata: metadata
297
- )
298
- end
299
- end
215
+ nodes = if with_relevance
216
+ @long_term_memory.search_with_relevance(
217
+ timeframe: normalized_timeframe,
218
+ query: search_query,
219
+ query_tags: query_tags,
220
+ limit: limit,
221
+ embedding_service: %i[vector hybrid].include?(strategy) ? HTM : nil,
222
+ metadata: metadata
223
+ )
224
+ else
225
+ # Perform standard RAG-based retrieval
226
+ case strategy
227
+ when :vector
228
+ # Vector search using query embedding
229
+ @long_term_memory.search(
230
+ timeframe: normalized_timeframe,
231
+ query: search_query,
232
+ limit: limit,
233
+ embedding_service: HTM,
234
+ metadata: metadata
235
+ )
236
+ when :fulltext
237
+ @long_term_memory.search_fulltext(
238
+ timeframe: normalized_timeframe,
239
+ query: search_query,
240
+ limit: limit,
241
+ metadata: metadata
242
+ )
243
+ when :hybrid
244
+ # Hybrid search combining vector + fulltext
245
+ @long_term_memory.search_hybrid(
246
+ timeframe: normalized_timeframe,
247
+ query: search_query,
248
+ limit: limit,
249
+ embedding_service: HTM,
250
+ metadata: metadata
251
+ )
252
+ end
253
+ end
300
254
 
301
255
  # Add to working memory (evict if needed)
302
256
  nodes.each do |node|
@@ -471,8 +425,8 @@ class HTM
471
425
 
472
426
  # Update database: mark all as evicted from working memory
473
427
  count = HTM::Models::RobotNode
474
- .where(robot_id: @robot_id, working_memory: true)
475
- .update(working_memory: false)
428
+ .where(robot_id: @robot_id, working_memory: true)
429
+ .update(working_memory: false)
476
430
 
477
431
  HTM.logger.info "Cleared #{count} nodes from working memory"
478
432
  count
@@ -580,6 +534,41 @@ class HTM
580
534
  @long_term_memory.update_robot_activity(@robot_id)
581
535
  end
582
536
 
537
+ def validate_remember_content!(content)
538
+ return unless content.bytesize > MAX_VALUE_LENGTH
539
+ raise ValidationError, "Content exceeds maximum size (#{MAX_VALUE_LENGTH} bytes)"
540
+ end
541
+
542
+ def validate_remember_tags!(tags)
543
+ validate_array!(tags, "tags")
544
+ tags.each do |tag|
545
+ next if tag.is_a?(String) && tag.match?(/\A[a-z0-9-]+(:[a-z0-9-]+)*\z/)
546
+ raise ValidationError, "Invalid tag format: #{tag.inspect}. Tags must be lowercase alphanumeric with hyphens, separated by colons."
547
+ end
548
+ end
549
+
550
+ def enqueue_background_jobs(node_id, tags:, metadata:)
551
+ enqueue_embedding_job(node_id)
552
+ enqueue_tags_job(node_id, manual_tags: tags)
553
+ enqueue_propositions_job(node_id) if HTM.config.extract_propositions && !metadata[:is_proposition]
554
+ end
555
+
556
+ def handle_existing_node_tags(node_id, tags)
557
+ return unless tags.any?
558
+ HTM::Models::Node[node_id].add_tags(tags)
559
+ HTM.logger.info "Added #{tags.length} manual tags to existing node #{node_id}"
560
+ end
561
+
562
+ def store_in_working_memory(node_id, content, token_count:, robot_node:)
563
+ unless @working_memory.has_space?(token_count)
564
+ evicted = @working_memory.evict_to_make_space(token_count)
565
+ evicted_keys = evicted.map { |n| n[:key] }
566
+ @long_term_memory.mark_evicted(robot_id: @robot_id, node_ids: evicted_keys) if evicted_keys.any?
567
+ end
568
+ @working_memory.add(node_id, content, token_count: token_count, access_count: 0)
569
+ robot_node.update(working_memory: true)
570
+ end
571
+
583
572
  def enqueue_embedding_job(node_id)
584
573
  # Enqueue embedding generation using configured job backend
585
574
  # Job will use HTM.embed which delegates to configured embedding_generator
@@ -655,20 +644,18 @@ class HTM
655
644
 
656
645
  def validate_recall_strategy!(strategy)
657
646
  raise ValidationError, "Strategy must be a Symbol" unless strategy.is_a?(Symbol)
658
- unless VALID_RECALL_STRATEGIES.include?(strategy)
659
- raise ValidationError, "Invalid strategy: #{strategy}. Must be one of #{VALID_RECALL_STRATEGIES.join(', ')}"
660
- end
647
+ return if VALID_RECALL_STRATEGIES.include?(strategy)
648
+ raise ValidationError, "Invalid strategy: #{strategy}. Must be one of #{VALID_RECALL_STRATEGIES.join(', ')}"
661
649
  end
662
650
 
663
-
664
651
  def validate_timeframe!(timeframe)
665
652
  return if HTM::Timeframe.valid?(timeframe)
666
653
  raise ValidationError, "Invalid timeframe type: #{timeframe.class}. " \
667
- "Expected nil, Range, Array<Range>, Date, DateTime, Time, String, or :auto"
654
+ "Expected nil, Range, Array<Range>, Date, DateTime, Time, String, or :auto"
668
655
  end
669
656
 
670
657
  def validate_positive_integer!(value, name)
671
- raise ValidationError, "#{name} must be a positive Integer" unless value.is_a?(Integer) && value > 0
658
+ raise ValidationError, "#{name} must be a positive Integer" unless value.is_a?(Integer) && value.positive?
672
659
  end
673
660
 
674
661
  def validate_metadata!(metadata)
data/lib/tasks/db.rake CHANGED
@@ -3,14 +3,12 @@
3
3
  namespace :db do
4
4
  desc "Run database migrations"
5
5
  task :migrate do
6
-
7
6
  HTM::Database.migrate
8
7
  puts "Database migrations completed successfully"
9
8
  end
10
9
 
11
10
  desc "Setup database schema (includes migrations)"
12
11
  task :setup do
13
-
14
12
  HTM::Database.setup
15
13
  puts "Database setup completed successfully"
16
14
  end