htm 0.0.31 → 0.0.32
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.irbrc +2 -3
- data/.rubocop.yml +184 -0
- data/CHANGELOG.md +46 -0
- data/README.md +2 -0
- data/Rakefile +93 -12
- data/db/migrate/00008_create_node_relationships.rb +54 -0
- data/db/migrate/00009_fix_node_relationships_column_types.rb +17 -0
- data/db/schema.sql +124 -1
- data/docs/api/database.md +35 -57
- data/docs/api/embedding-service.md +1 -1
- data/docs/api/index.md +26 -15
- data/docs/api/working-memory.md +8 -8
- data/docs/architecture/index.md +5 -7
- data/docs/architecture/overview.md +5 -8
- data/docs/assets/images/htm-architecture-overview.svg +1 -1
- data/docs/assets/images/htm-context-assembly-flow.svg +2 -2
- data/docs/assets/images/htm-layered-architecture.svg +3 -3
- data/docs/assets/images/two-tier-memory-architecture.svg +1 -1
- data/docs/database/README.md +1 -0
- data/docs/database_rake_tasks.md +20 -28
- data/docs/development/contributing.md +5 -5
- data/docs/development/index.md +4 -7
- data/docs/development/schema.md +71 -1
- data/docs/development/setup.md +40 -82
- data/docs/development/testing.md +1 -1
- data/docs/examples/file-loading.md +4 -4
- data/docs/examples/mcp-client.md +1 -1
- data/docs/getting-started/quick-start.md +4 -4
- data/docs/guides/adding-memories.md +14 -1
- data/docs/guides/configuration.md +5 -5
- data/docs/guides/context-assembly.md +4 -4
- data/docs/guides/file-loading.md +12 -12
- data/docs/guides/getting-started.md +2 -2
- data/docs/guides/long-term-memory.md +7 -27
- data/docs/guides/propositions.md +20 -19
- data/docs/guides/recalling-memories.md +5 -5
- data/docs/guides/tags.md +18 -13
- data/docs/multi_framework_support.md +1 -1
- data/docs/robots/hive-mind.md +1 -1
- data/docs/robots/multi-robot.md +2 -2
- data/docs/robots/robot-groups.md +1 -1
- data/docs/robots/two-tier-memory.md +72 -94
- data/docs/setup_local_database.md +8 -54
- data/docs/using_rake_tasks_in_your_app.md +6 -6
- data/examples/01_basic_usage.rb +1 -0
- data/examples/03_custom_llm_configuration.rb +1 -0
- data/examples/04_file_loader_usage.rb +1 -0
- data/examples/05_timeframe_demo.rb +1 -0
- data/examples/06_example_app/app.rb +1 -0
- data/examples/07_cli_app/htm_cli.rb +1 -0
- data/examples/09_mcp_client.rb +1 -0
- data/examples/10_telemetry/demo.rb +1 -0
- data/examples/11_robot_groups/multi_process.rb +1 -0
- data/examples/11_robot_groups/same_process.rb +1 -0
- data/examples/12_rails_app/.envrc +12 -0
- data/examples/12_rails_app/Gemfile +8 -3
- data/examples/12_rails_app/Gemfile.lock +94 -89
- data/examples/12_rails_app/README.md +70 -19
- data/examples/12_rails_app/app/controllers/application_controller.rb +6 -0
- data/examples/12_rails_app/app/controllers/chats_controller.rb +305 -0
- data/examples/12_rails_app/app/controllers/dashboard_controller.rb +3 -0
- data/examples/12_rails_app/app/controllers/files_controller.rb +17 -2
- data/examples/12_rails_app/app/controllers/home_controller.rb +8 -0
- data/examples/12_rails_app/app/controllers/memories_controller.rb +9 -4
- data/examples/12_rails_app/app/controllers/messages_controller.rb +214 -0
- data/examples/12_rails_app/app/controllers/robots_controller.rb +11 -1
- data/examples/12_rails_app/app/controllers/tags_controller.rb +14 -1
- data/examples/12_rails_app/app/javascript/application.js +1 -1
- data/examples/12_rails_app/app/models/application_record.rb +5 -0
- data/examples/12_rails_app/app/models/chat.rb +36 -0
- data/examples/12_rails_app/app/models/message.rb +5 -0
- data/examples/12_rails_app/app/models/model.rb +5 -0
- data/examples/12_rails_app/app/models/tool_call.rb +5 -0
- data/examples/12_rails_app/app/views/chats/index.html.erb +61 -0
- data/examples/12_rails_app/app/views/chats/show.html.erb +213 -0
- data/examples/12_rails_app/app/views/dashboard/index.html.erb +3 -0
- data/examples/12_rails_app/app/views/files/index.html.erb +10 -5
- data/examples/12_rails_app/app/views/files/new.html.erb +4 -2
- data/examples/12_rails_app/app/views/files/show.html.erb +19 -3
- data/examples/12_rails_app/app/views/home/index.html.erb +45 -0
- data/examples/12_rails_app/app/views/layouts/application.html.erb +20 -18
- data/examples/12_rails_app/app/views/memories/_memory_card.html.erb +1 -1
- data/examples/12_rails_app/app/views/memories/deleted.html.erb +3 -1
- data/examples/12_rails_app/app/views/memories/edit.html.erb +2 -0
- data/examples/12_rails_app/app/views/memories/index.html.erb +2 -0
- data/examples/12_rails_app/app/views/memories/new.html.erb +2 -0
- data/examples/12_rails_app/app/views/memories/show.html.erb +4 -2
- data/examples/12_rails_app/app/views/messages/_message.html.erb +20 -0
- data/examples/12_rails_app/app/views/robots/index.html.erb +2 -0
- data/examples/12_rails_app/app/views/robots/new.html.erb +2 -0
- data/examples/12_rails_app/app/views/robots/show.html.erb +2 -0
- data/examples/12_rails_app/app/views/search/index.html.erb +59 -8
- data/examples/12_rails_app/app/views/shared/_navbar.html.erb +75 -29
- data/examples/12_rails_app/app/views/tags/index.html.erb +2 -0
- data/examples/12_rails_app/app/views/tags/show.html.erb +3 -1
- data/examples/12_rails_app/config/application.rb +1 -1
- data/examples/12_rails_app/config/database.yml +9 -5
- data/examples/12_rails_app/config/importmap.rb +1 -1
- data/examples/12_rails_app/config/initializers/htm.rb +9 -2
- data/examples/12_rails_app/config/initializers/ruby_llm.rb +33 -0
- data/examples/12_rails_app/config/routes.rb +39 -23
- data/examples/12_rails_app/db/migrate/20250124000001_create_ruby_llm_tables.rb +34 -0
- data/examples/12_rails_app/db/migrate/20250124000002_create_models_table.rb +28 -0
- data/examples/12_rails_app/db/schema.rb +67 -0
- data/examples/examples_helper.rb +25 -0
- data/lib/htm/circuit_breaker.rb +5 -6
- data/lib/htm/config/builder.rb +12 -12
- data/lib/htm/config/database.rb +21 -27
- data/lib/htm/config/validator.rb +12 -18
- data/lib/htm/config.rb +76 -65
- data/lib/htm/database.rb +193 -199
- data/lib/htm/embedding_service.rb +4 -9
- data/lib/htm/integrations/sinatra.rb +7 -7
- data/lib/htm/job_adapter.rb +14 -21
- data/lib/htm/jobs/generate_embedding_job.rb +28 -44
- data/lib/htm/jobs/generate_propositions_job.rb +29 -55
- data/lib/htm/jobs/generate_relationships_job.rb +137 -0
- data/lib/htm/jobs/generate_tags_job.rb +45 -67
- data/lib/htm/loaders/markdown_loader.rb +65 -112
- data/lib/htm/long_term_memory/fulltext_search.rb +1 -1
- data/lib/htm/long_term_memory/hybrid_search.rb +300 -128
- data/lib/htm/long_term_memory/node_operations.rb +2 -2
- data/lib/htm/long_term_memory/relevance_scorer.rb +100 -68
- data/lib/htm/long_term_memory/tag_operations.rb +87 -120
- data/lib/htm/long_term_memory/vector_search.rb +1 -1
- data/lib/htm/long_term_memory.rb +2 -1
- data/lib/htm/mcp/cli.rb +59 -58
- data/lib/htm/mcp/server.rb +5 -6
- data/lib/htm/mcp/tools.rb +30 -36
- data/lib/htm/migration.rb +10 -10
- data/lib/htm/models/node.rb +2 -3
- data/lib/htm/models/node_relationship.rb +72 -0
- data/lib/htm/models/node_tag.rb +2 -2
- data/lib/htm/models/robot_node.rb +2 -2
- data/lib/htm/models/tag.rb +41 -28
- data/lib/htm/observability.rb +45 -51
- data/lib/htm/proposition_service.rb +3 -7
- data/lib/htm/query_cache.rb +13 -15
- data/lib/htm/railtie.rb +1 -2
- data/lib/htm/robot_group.rb +9 -9
- data/lib/htm/sequel_config.rb +1 -0
- data/lib/htm/sql_builder.rb +1 -1
- data/lib/htm/tag_service.rb +2 -6
- data/lib/htm/timeframe.rb +4 -5
- data/lib/htm/timeframe_extractor.rb +42 -83
- data/lib/htm/version.rb +1 -1
- data/lib/htm/workflows/remember_workflow.rb +112 -115
- data/lib/htm/working_memory.rb +21 -26
- data/lib/htm.rb +103 -116
- data/lib/tasks/db.rake +0 -2
- data/lib/tasks/doc.rake +14 -13
- data/lib/tasks/files.rake +5 -12
- data/lib/tasks/htm.rake +70 -71
- data/lib/tasks/jobs.rake +41 -47
- data/lib/tasks/tags.rake +3 -8
- metadata +25 -100
|
@@ -79,134 +79,131 @@ class HTM
|
|
|
79
79
|
private
|
|
80
80
|
|
|
81
81
|
def build_pipeline
|
|
82
|
+
save_step = save_node_step
|
|
83
|
+
embed_step = generate_embedding_step
|
|
84
|
+
tags_step = generate_tags_step
|
|
85
|
+
props_step = generate_propositions_step
|
|
86
|
+
final_step = finalize_step
|
|
87
|
+
|
|
82
88
|
SimpleFlow::Pipeline.new(concurrency: @concurrency) do
|
|
83
|
-
|
|
84
|
-
step :
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
# Store in long-term memory
|
|
92
|
-
save_result = htm.long_term_memory.add(
|
|
93
|
-
content: data[:content],
|
|
94
|
-
token_count: token_count,
|
|
95
|
-
robot_id: data[:robot_id],
|
|
96
|
-
embedding: nil,
|
|
97
|
-
metadata: data[:metadata]
|
|
98
|
-
)
|
|
99
|
-
|
|
100
|
-
node_id = save_result[:node_id]
|
|
101
|
-
is_new = save_result[:is_new]
|
|
102
|
-
|
|
103
|
-
HTM.logger.info "RememberWorkflow: Node #{node_id} saved (new: #{is_new})"
|
|
104
|
-
|
|
105
|
-
result
|
|
106
|
-
.with_context(:node_id, node_id)
|
|
107
|
-
.with_context(:is_new, is_new)
|
|
108
|
-
.with_context(:token_count, token_count)
|
|
109
|
-
.with_context(:robot_node, save_result[:robot_node])
|
|
110
|
-
.continue(data)
|
|
111
|
-
}, depends_on: :none
|
|
112
|
-
|
|
113
|
-
# Step 2: Generate embedding (depends on save_node, runs in parallel with tags/propositions)
|
|
114
|
-
step :generate_embedding, ->(result) {
|
|
115
|
-
node_id = result.context[:node_id]
|
|
116
|
-
is_new = result.context[:is_new]
|
|
117
|
-
|
|
118
|
-
# Only generate for new nodes
|
|
119
|
-
if is_new
|
|
120
|
-
begin
|
|
121
|
-
HTM::Jobs::GenerateEmbeddingJob.perform(node_id: node_id)
|
|
122
|
-
rescue StandardError => e
|
|
123
|
-
HTM.logger.error "RememberWorkflow: Embedding generation failed: #{e.message}"
|
|
124
|
-
# Continue despite error - embedding is non-critical
|
|
125
|
-
end
|
|
126
|
-
end
|
|
89
|
+
step :save_node, save_step, depends_on: :none
|
|
90
|
+
step :generate_embedding, embed_step, depends_on: [:save_node]
|
|
91
|
+
step :generate_tags, tags_step, depends_on: [:save_node]
|
|
92
|
+
step :generate_propositions, props_step, depends_on: [:save_node]
|
|
93
|
+
step :finalize, final_step, depends_on: %i[generate_embedding generate_tags generate_propositions]
|
|
94
|
+
end
|
|
95
|
+
end
|
|
127
96
|
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
97
|
+
def save_node_step
|
|
98
|
+
lambda { |result|
|
|
99
|
+
data = result.value
|
|
100
|
+
htm = data[:htm]
|
|
101
|
+
token_count = HTM.count_tokens(data[:content])
|
|
102
|
+
save_result = htm.long_term_memory.add(
|
|
103
|
+
content: data[:content],
|
|
104
|
+
token_count: token_count,
|
|
105
|
+
robot_id: data[:robot_id],
|
|
106
|
+
embedding: nil,
|
|
107
|
+
metadata: data[:metadata]
|
|
108
|
+
)
|
|
109
|
+
node_id = save_result[:node_id]
|
|
110
|
+
is_new = save_result[:is_new]
|
|
111
|
+
HTM.logger.info "RememberWorkflow: Node #{node_id} saved (new: #{is_new})"
|
|
112
|
+
result
|
|
113
|
+
.with_context(:node_id, node_id)
|
|
114
|
+
.with_context(:is_new, is_new)
|
|
115
|
+
.with_context(:token_count, token_count)
|
|
116
|
+
.with_context(:robot_node, save_result[:robot_node])
|
|
117
|
+
.continue(data)
|
|
118
|
+
}
|
|
119
|
+
end
|
|
146
120
|
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
if manual_tags.any?
|
|
156
|
-
node = HTM::Models::Node[node_id]
|
|
157
|
-
node.add_tags(manual_tags)
|
|
158
|
-
end
|
|
121
|
+
def generate_embedding_step
|
|
122
|
+
lambda { |result|
|
|
123
|
+
node_id = result.context[:node_id]
|
|
124
|
+
if result.context[:is_new]
|
|
125
|
+
begin
|
|
126
|
+
HTM::Jobs::GenerateEmbeddingJob.perform(node_id: node_id)
|
|
127
|
+
rescue StandardError => e
|
|
128
|
+
HTM.logger.error "RememberWorkflow: Embedding generation failed: #{e.message}"
|
|
159
129
|
end
|
|
130
|
+
end
|
|
131
|
+
result.continue(result.value)
|
|
132
|
+
}
|
|
133
|
+
end
|
|
160
134
|
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
# Only extract propositions for new nodes that aren't already propositions
|
|
172
|
-
if is_new && HTM.config.extract_propositions && !metadata[:is_proposition]
|
|
173
|
-
begin
|
|
174
|
-
HTM::Jobs::GeneratePropositionsJob.perform(node_id: node_id, robot_id: robot_id)
|
|
175
|
-
rescue StandardError => e
|
|
176
|
-
HTM.logger.error "RememberWorkflow: Proposition extraction failed: #{e.message}"
|
|
177
|
-
# Continue despite error - propositions are non-critical
|
|
135
|
+
def generate_tags_step
|
|
136
|
+
lambda { |result|
|
|
137
|
+
node_id = result.context[:node_id]
|
|
138
|
+
is_new = result.context[:is_new]
|
|
139
|
+
manual_tags = result.value[:tags] || []
|
|
140
|
+
|
|
141
|
+
if is_new
|
|
142
|
+
manual_tags.each do |tag_name|
|
|
143
|
+
HTM::Models::Tag.find_or_create_with_ancestors(tag_name).each do |tag|
|
|
144
|
+
HTM::Models::NodeTag.find_or_create(node_id: node_id, tag_id: tag.id)
|
|
178
145
|
end
|
|
179
146
|
end
|
|
147
|
+
begin
|
|
148
|
+
HTM::Jobs::GenerateTagsJob.perform(node_id: node_id)
|
|
149
|
+
rescue StandardError => e
|
|
150
|
+
HTM.logger.error "RememberWorkflow: Tag generation failed: #{e.message}"
|
|
151
|
+
end
|
|
152
|
+
elsif manual_tags.any?
|
|
153
|
+
HTM::Models::Node[node_id].add_tags(manual_tags)
|
|
154
|
+
end
|
|
155
|
+
|
|
156
|
+
result.continue(result.value)
|
|
157
|
+
}
|
|
158
|
+
end
|
|
180
159
|
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
evicted = htm.working_memory.evict_to_make_space(token_count)
|
|
194
|
-
evicted_keys = evicted.map { |n| n[:key] }
|
|
195
|
-
htm.long_term_memory.mark_evicted(robot_id: result.value[:robot_id], node_ids: evicted_keys) if evicted_keys.any?
|
|
160
|
+
def generate_propositions_step
|
|
161
|
+
lambda { |result|
|
|
162
|
+
node_id = result.context[:node_id]
|
|
163
|
+
is_new = result.context[:is_new]
|
|
164
|
+
metadata = result.value[:metadata] || {}
|
|
165
|
+
robot_id = result.value[:robot_id]
|
|
166
|
+
|
|
167
|
+
if is_new && HTM.config.extract_propositions && !metadata[:is_proposition]
|
|
168
|
+
begin
|
|
169
|
+
HTM::Jobs::GeneratePropositionsJob.perform(node_id: node_id, robot_id: robot_id)
|
|
170
|
+
rescue StandardError => e
|
|
171
|
+
HTM.logger.error "RememberWorkflow: Proposition extraction failed: #{e.message}"
|
|
196
172
|
end
|
|
197
|
-
|
|
173
|
+
end
|
|
198
174
|
|
|
199
|
-
|
|
200
|
-
|
|
175
|
+
result.continue(result.value)
|
|
176
|
+
}
|
|
177
|
+
end
|
|
201
178
|
|
|
202
|
-
|
|
203
|
-
|
|
179
|
+
def finalize_step
|
|
180
|
+
lambda { |result|
|
|
181
|
+
ctx = result.context
|
|
182
|
+
finalize_node(
|
|
183
|
+
htm: result.value[:htm],
|
|
184
|
+
node_id: ctx[:node_id],
|
|
185
|
+
token_count: ctx[:token_count],
|
|
186
|
+
robot_node: ctx[:robot_node],
|
|
187
|
+
content: result.value[:content],
|
|
188
|
+
robot_id: result.value[:robot_id]
|
|
189
|
+
)
|
|
190
|
+
result.continue(result.value)
|
|
191
|
+
}
|
|
192
|
+
end
|
|
204
193
|
|
|
205
|
-
|
|
194
|
+
def finalize_node(htm:, node_id:, token_count:, robot_node:, content:, robot_id:)
|
|
195
|
+
evict_working_memory_if_needed(htm, token_count, robot_id)
|
|
196
|
+
htm.working_memory.add(node_id, content, token_count: token_count, access_count: 0)
|
|
197
|
+
robot_node.update(working_memory: true)
|
|
198
|
+
htm.long_term_memory.update_robot_activity(robot_id)
|
|
199
|
+
HTM.logger.info "RememberWorkflow: Node #{node_id} finalized"
|
|
200
|
+
end
|
|
206
201
|
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
202
|
+
def evict_working_memory_if_needed(htm, token_count, robot_id)
|
|
203
|
+
return if htm.working_memory.has_space?(token_count)
|
|
204
|
+
evicted = htm.working_memory.evict_to_make_space(token_count)
|
|
205
|
+
evicted_keys = evicted.map { |n| n[:key] }
|
|
206
|
+
htm.long_term_memory.mark_evicted(robot_id: robot_id, node_ids: evicted_keys) if evicted_keys.any?
|
|
210
207
|
end
|
|
211
208
|
end
|
|
212
209
|
end
|
data/lib/htm/working_memory.rb
CHANGED
|
@@ -84,7 +84,7 @@ class HTM
|
|
|
84
84
|
tokens_freed = 0
|
|
85
85
|
|
|
86
86
|
# Sort by access frequency + recency (lower score = more evictable)
|
|
87
|
-
candidates = @nodes.sort_by do |
|
|
87
|
+
candidates = @nodes.sort_by do |_key, node|
|
|
88
88
|
access_frequency = node[:access_count] || 0
|
|
89
89
|
time_since_accessed = Time.now - (node[:last_accessed] || node[:added_at])
|
|
90
90
|
|
|
@@ -92,7 +92,7 @@ class HTM
|
|
|
92
92
|
# Frequently accessed = higher score (keep)
|
|
93
93
|
# Recently accessed = higher score (keep)
|
|
94
94
|
access_score = Math.log(1 + access_frequency)
|
|
95
|
-
recency_score = 1.0 / (1 + time_since_accessed / 3600.0)
|
|
95
|
+
recency_score = 1.0 / (1 + (time_since_accessed / 3600.0))
|
|
96
96
|
|
|
97
97
|
-(access_score + recency_score) # Negative for ascending sort
|
|
98
98
|
end
|
|
@@ -122,33 +122,10 @@ class HTM
|
|
|
122
122
|
def assemble_context(strategy:, max_tokens: nil)
|
|
123
123
|
@mutex.synchronize do
|
|
124
124
|
max = max_tokens || @max_tokens
|
|
125
|
+
nodes = sorted_nodes_by_strategy(strategy)
|
|
125
126
|
|
|
126
|
-
# Make defensive copies of nodes to prevent external mutation of internal state
|
|
127
|
-
nodes = case strategy
|
|
128
|
-
when :recent
|
|
129
|
-
# Most recently accessed (LRU)
|
|
130
|
-
@access_order.reverse.map { |k| @nodes[k]&.dup }.compact
|
|
131
|
-
when :frequent
|
|
132
|
-
# Most frequently accessed (LFU)
|
|
133
|
-
@nodes.sort_by { |k, v| -(v[:access_count] || 0) }.map { |_, v| v.dup }
|
|
134
|
-
when :balanced
|
|
135
|
-
# Combined frequency × recency
|
|
136
|
-
@nodes.sort_by { |k, v|
|
|
137
|
-
access_frequency = v[:access_count] || 0
|
|
138
|
-
time_since_accessed = Time.now - (v[:last_accessed] || v[:added_at])
|
|
139
|
-
recency_factor = 1.0 / (1 + time_since_accessed / 3600.0)
|
|
140
|
-
|
|
141
|
-
# Higher score = more relevant
|
|
142
|
-
-(Math.log(1 + access_frequency) * recency_factor)
|
|
143
|
-
}.map { |_, v| v.dup }
|
|
144
|
-
else
|
|
145
|
-
raise ArgumentError, "Unknown strategy: #{strategy}. Use :recent, :frequent, or :balanced"
|
|
146
|
-
end
|
|
147
|
-
|
|
148
|
-
# Build context up to token limit
|
|
149
127
|
context_parts = []
|
|
150
128
|
current_tokens = 0
|
|
151
|
-
|
|
152
129
|
nodes.each do |node|
|
|
153
130
|
break if current_tokens + node[:token_count] > max
|
|
154
131
|
context_parts << node[:value]
|
|
@@ -263,6 +240,24 @@ class HTM
|
|
|
263
240
|
|
|
264
241
|
private
|
|
265
242
|
|
|
243
|
+
def sorted_nodes_by_strategy(strategy)
|
|
244
|
+
case strategy
|
|
245
|
+
when :recent
|
|
246
|
+
@access_order.reverse.map { |k| @nodes[k]&.dup }.compact
|
|
247
|
+
when :frequent
|
|
248
|
+
@nodes.sort_by { |_k, v| -(v[:access_count] || 0) }.map { |_, v| v.dup }
|
|
249
|
+
when :balanced
|
|
250
|
+
sorted = @nodes.sort_by do |_k, v|
|
|
251
|
+
freq = v[:access_count] || 0
|
|
252
|
+
age = Time.now - (v[:last_accessed] || v[:added_at])
|
|
253
|
+
-(Math.log(1 + freq) * (1.0 / (1 + (age / 3600.0))))
|
|
254
|
+
end
|
|
255
|
+
sorted.map { |_, v| v.dup }
|
|
256
|
+
else
|
|
257
|
+
raise ArgumentError, "Unknown strategy: #{strategy}. Use :recent, :frequent, or :balanced"
|
|
258
|
+
end
|
|
259
|
+
end
|
|
260
|
+
|
|
266
261
|
# Internal unlocked version - must be called within @mutex.synchronize
|
|
267
262
|
def current_tokens_unlocked
|
|
268
263
|
@nodes.values.sum { |n| n[:token_count] }
|
data/lib/htm.rb
CHANGED
|
@@ -17,6 +17,7 @@ require_relative "htm/job_adapter"
|
|
|
17
17
|
require_relative "htm/jobs/generate_embedding_job"
|
|
18
18
|
require_relative "htm/jobs/generate_tags_job"
|
|
19
19
|
require_relative "htm/jobs/generate_propositions_job"
|
|
20
|
+
require_relative "htm/jobs/generate_relationships_job"
|
|
20
21
|
require_relative "htm/loaders/markdown_chunker"
|
|
21
22
|
require_relative "htm/loaders/markdown_loader"
|
|
22
23
|
require_relative "htm/workflows/remember_workflow"
|
|
@@ -49,13 +50,13 @@ require_relative "htm/railtie" if defined?(Rails::Railtie)
|
|
|
49
50
|
# htm = HTM.new(robot_name: "Code Helper")
|
|
50
51
|
#
|
|
51
52
|
# # Remember information
|
|
52
|
-
# htm.remember("We decided to use PostgreSQL for HTM", source: "architect")
|
|
53
|
+
# htm.remember("We decided to use PostgreSQL for HTM", metadata: { source: "architect" })
|
|
53
54
|
#
|
|
54
55
|
# # Recall from the past
|
|
55
|
-
# memories = htm.recall(timeframe: "last week"
|
|
56
|
+
# memories = htm.recall("PostgreSQL", timeframe: "last week")
|
|
56
57
|
#
|
|
57
58
|
# # Create context for LLM
|
|
58
|
-
# context = htm.
|
|
59
|
+
# context = htm.working_memory.assemble_context(strategy: :balanced)
|
|
59
60
|
#
|
|
60
61
|
class HTM
|
|
61
62
|
attr_reader :robot_id, :robot_name, :working_memory, :long_term_memory
|
|
@@ -65,7 +66,7 @@ class HTM
|
|
|
65
66
|
MAX_VALUE_LENGTH = 1_000_000 # 1MB
|
|
66
67
|
MAX_ARRAY_SIZE = 1000
|
|
67
68
|
|
|
68
|
-
VALID_RECALL_STRATEGIES = [
|
|
69
|
+
VALID_RECALL_STRATEGIES = %i[vector fulltext hybrid].freeze
|
|
69
70
|
|
|
70
71
|
# Initialize a new HTM instance
|
|
71
72
|
#
|
|
@@ -125,77 +126,30 @@ class HTM
|
|
|
125
126
|
# node_id = htm.remember("User prefers dark mode", metadata: { source: "user", confidence: 0.95 })
|
|
126
127
|
#
|
|
127
128
|
def remember(content, tags: [], metadata: {})
|
|
128
|
-
# Validate inputs
|
|
129
129
|
raise ValidationError, "Content cannot be nil" if content.nil?
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
if content_str.bytesize > MAX_VALUE_LENGTH
|
|
135
|
-
raise ValidationError, "Content exceeds maximum size (#{MAX_VALUE_LENGTH} bytes)"
|
|
136
|
-
end
|
|
137
|
-
|
|
138
|
-
validate_array!(tags, "tags")
|
|
139
|
-
tags.each do |tag|
|
|
140
|
-
unless tag.is_a?(String) && tag.match?(/\A[a-z0-9\-]+(:[a-z0-9\-]+)*\z/)
|
|
141
|
-
raise ValidationError, "Invalid tag format: #{tag.inspect}. Tags must be lowercase alphanumeric with hyphens, separated by colons."
|
|
142
|
-
end
|
|
143
|
-
end
|
|
144
|
-
|
|
130
|
+
content = content.to_s.strip
|
|
131
|
+
raise ValidationError, "Content cannot be empty" if content.empty?
|
|
132
|
+
validate_remember_content!(content)
|
|
133
|
+
validate_remember_tags!(tags)
|
|
145
134
|
validate_metadata!(metadata)
|
|
146
135
|
|
|
147
|
-
content = content_str
|
|
148
|
-
|
|
149
|
-
# Calculate token count using configured counter
|
|
150
136
|
token_count = HTM.count_tokens(content)
|
|
151
|
-
|
|
152
|
-
# Store in long-term memory (with deduplication)
|
|
153
|
-
# Returns { node_id:, is_new:, robot_node: }
|
|
154
137
|
result = @long_term_memory.add(
|
|
155
|
-
content: content,
|
|
156
|
-
|
|
157
|
-
robot_id: @robot_id,
|
|
158
|
-
embedding: nil, # Will be generated in background
|
|
159
|
-
metadata: metadata
|
|
138
|
+
content: content, token_count: token_count, robot_id: @robot_id,
|
|
139
|
+
embedding: nil, metadata: metadata
|
|
160
140
|
)
|
|
161
|
-
|
|
162
141
|
node_id = result[:node_id]
|
|
163
|
-
is_new = result[:is_new]
|
|
164
142
|
|
|
165
|
-
if is_new
|
|
143
|
+
if result[:is_new]
|
|
166
144
|
HTM.logger.info "Node #{node_id} created for robot #{@robot_name} (#{token_count} tokens)"
|
|
167
|
-
|
|
168
|
-
# Enqueue background jobs for embedding and tag generation
|
|
169
|
-
# Only for NEW nodes - existing nodes already have embeddings/tags
|
|
170
|
-
enqueue_embedding_job(node_id)
|
|
171
|
-
enqueue_tags_job(node_id, manual_tags: tags)
|
|
172
|
-
|
|
173
|
-
# Enqueue proposition extraction if enabled and not already a proposition
|
|
174
|
-
if HTM.config.extract_propositions && !metadata[:is_proposition]
|
|
175
|
-
enqueue_propositions_job(node_id)
|
|
176
|
-
end
|
|
145
|
+
enqueue_background_jobs(node_id, tags: tags, metadata: metadata)
|
|
177
146
|
else
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
if tags.any?
|
|
182
|
-
node = HTM::Models::Node[node_id]
|
|
183
|
-
node.add_tags(tags)
|
|
184
|
-
HTM.logger.info "Added #{tags.length} manual tags to existing node #{node_id}"
|
|
185
|
-
end
|
|
147
|
+
rc = result[:robot_node].remember_count
|
|
148
|
+
HTM.logger.info "Node #{node_id} already exists, linked to robot #{@robot_name} (remember_count: #{rc})"
|
|
149
|
+
handle_existing_node_tags(node_id, tags)
|
|
186
150
|
end
|
|
187
151
|
|
|
188
|
-
|
|
189
|
-
unless @working_memory.has_space?(token_count)
|
|
190
|
-
evicted = @working_memory.evict_to_make_space(token_count)
|
|
191
|
-
evicted_keys = evicted.map { |n| n[:key] }
|
|
192
|
-
@long_term_memory.mark_evicted(robot_id: @robot_id, node_ids: evicted_keys) if evicted_keys.any?
|
|
193
|
-
end
|
|
194
|
-
@working_memory.add(node_id, content, token_count: token_count, access_count: 0)
|
|
195
|
-
|
|
196
|
-
# Mark node as in working memory in the robot_nodes join table
|
|
197
|
-
result[:robot_node].update(working_memory: true)
|
|
198
|
-
|
|
152
|
+
store_in_working_memory(node_id, content, token_count: token_count, robot_node: result[:robot_node])
|
|
199
153
|
update_robot_activity
|
|
200
154
|
node_id
|
|
201
155
|
end
|
|
@@ -250,53 +204,53 @@ class HTM
|
|
|
250
204
|
# Normalize timeframe and potentially extract from query
|
|
251
205
|
search_query = topic
|
|
252
206
|
normalized_timeframe = if timeframe == :auto
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
207
|
+
result = HTM::Timeframe.normalize(:auto, query: topic)
|
|
208
|
+
search_query = result.query # Use cleaned query for search
|
|
209
|
+
result.timeframe
|
|
210
|
+
else
|
|
211
|
+
HTM::Timeframe.normalize(timeframe)
|
|
212
|
+
end
|
|
259
213
|
|
|
260
214
|
# Use relevance-based search if requested
|
|
261
|
-
if with_relevance
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
215
|
+
nodes = if with_relevance
|
|
216
|
+
@long_term_memory.search_with_relevance(
|
|
217
|
+
timeframe: normalized_timeframe,
|
|
218
|
+
query: search_query,
|
|
219
|
+
query_tags: query_tags,
|
|
220
|
+
limit: limit,
|
|
221
|
+
embedding_service: %i[vector hybrid].include?(strategy) ? HTM : nil,
|
|
222
|
+
metadata: metadata
|
|
223
|
+
)
|
|
224
|
+
else
|
|
225
|
+
# Perform standard RAG-based retrieval
|
|
226
|
+
case strategy
|
|
227
|
+
when :vector
|
|
228
|
+
# Vector search using query embedding
|
|
229
|
+
@long_term_memory.search(
|
|
230
|
+
timeframe: normalized_timeframe,
|
|
231
|
+
query: search_query,
|
|
232
|
+
limit: limit,
|
|
233
|
+
embedding_service: HTM,
|
|
234
|
+
metadata: metadata
|
|
235
|
+
)
|
|
236
|
+
when :fulltext
|
|
237
|
+
@long_term_memory.search_fulltext(
|
|
238
|
+
timeframe: normalized_timeframe,
|
|
239
|
+
query: search_query,
|
|
240
|
+
limit: limit,
|
|
241
|
+
metadata: metadata
|
|
242
|
+
)
|
|
243
|
+
when :hybrid
|
|
244
|
+
# Hybrid search combining vector + fulltext
|
|
245
|
+
@long_term_memory.search_hybrid(
|
|
246
|
+
timeframe: normalized_timeframe,
|
|
247
|
+
query: search_query,
|
|
248
|
+
limit: limit,
|
|
249
|
+
embedding_service: HTM,
|
|
250
|
+
metadata: metadata
|
|
251
|
+
)
|
|
252
|
+
end
|
|
253
|
+
end
|
|
300
254
|
|
|
301
255
|
# Add to working memory (evict if needed)
|
|
302
256
|
nodes.each do |node|
|
|
@@ -471,8 +425,8 @@ class HTM
|
|
|
471
425
|
|
|
472
426
|
# Update database: mark all as evicted from working memory
|
|
473
427
|
count = HTM::Models::RobotNode
|
|
474
|
-
|
|
475
|
-
|
|
428
|
+
.where(robot_id: @robot_id, working_memory: true)
|
|
429
|
+
.update(working_memory: false)
|
|
476
430
|
|
|
477
431
|
HTM.logger.info "Cleared #{count} nodes from working memory"
|
|
478
432
|
count
|
|
@@ -580,6 +534,41 @@ class HTM
|
|
|
580
534
|
@long_term_memory.update_robot_activity(@robot_id)
|
|
581
535
|
end
|
|
582
536
|
|
|
537
|
+
def validate_remember_content!(content)
|
|
538
|
+
return unless content.bytesize > MAX_VALUE_LENGTH
|
|
539
|
+
raise ValidationError, "Content exceeds maximum size (#{MAX_VALUE_LENGTH} bytes)"
|
|
540
|
+
end
|
|
541
|
+
|
|
542
|
+
def validate_remember_tags!(tags)
|
|
543
|
+
validate_array!(tags, "tags")
|
|
544
|
+
tags.each do |tag|
|
|
545
|
+
next if tag.is_a?(String) && tag.match?(/\A[a-z0-9-]+(:[a-z0-9-]+)*\z/)
|
|
546
|
+
raise ValidationError, "Invalid tag format: #{tag.inspect}. Tags must be lowercase alphanumeric with hyphens, separated by colons."
|
|
547
|
+
end
|
|
548
|
+
end
|
|
549
|
+
|
|
550
|
+
def enqueue_background_jobs(node_id, tags:, metadata:)
|
|
551
|
+
enqueue_embedding_job(node_id)
|
|
552
|
+
enqueue_tags_job(node_id, manual_tags: tags)
|
|
553
|
+
enqueue_propositions_job(node_id) if HTM.config.extract_propositions && !metadata[:is_proposition]
|
|
554
|
+
end
|
|
555
|
+
|
|
556
|
+
def handle_existing_node_tags(node_id, tags)
|
|
557
|
+
return unless tags.any?
|
|
558
|
+
HTM::Models::Node[node_id].add_tags(tags)
|
|
559
|
+
HTM.logger.info "Added #{tags.length} manual tags to existing node #{node_id}"
|
|
560
|
+
end
|
|
561
|
+
|
|
562
|
+
def store_in_working_memory(node_id, content, token_count:, robot_node:)
|
|
563
|
+
unless @working_memory.has_space?(token_count)
|
|
564
|
+
evicted = @working_memory.evict_to_make_space(token_count)
|
|
565
|
+
evicted_keys = evicted.map { |n| n[:key] }
|
|
566
|
+
@long_term_memory.mark_evicted(robot_id: @robot_id, node_ids: evicted_keys) if evicted_keys.any?
|
|
567
|
+
end
|
|
568
|
+
@working_memory.add(node_id, content, token_count: token_count, access_count: 0)
|
|
569
|
+
robot_node.update(working_memory: true)
|
|
570
|
+
end
|
|
571
|
+
|
|
583
572
|
def enqueue_embedding_job(node_id)
|
|
584
573
|
# Enqueue embedding generation using configured job backend
|
|
585
574
|
# Job will use HTM.embed which delegates to configured embedding_generator
|
|
@@ -655,20 +644,18 @@ class HTM
|
|
|
655
644
|
|
|
656
645
|
def validate_recall_strategy!(strategy)
|
|
657
646
|
raise ValidationError, "Strategy must be a Symbol" unless strategy.is_a?(Symbol)
|
|
658
|
-
|
|
659
|
-
|
|
660
|
-
end
|
|
647
|
+
return if VALID_RECALL_STRATEGIES.include?(strategy)
|
|
648
|
+
raise ValidationError, "Invalid strategy: #{strategy}. Must be one of #{VALID_RECALL_STRATEGIES.join(', ')}"
|
|
661
649
|
end
|
|
662
650
|
|
|
663
|
-
|
|
664
651
|
def validate_timeframe!(timeframe)
|
|
665
652
|
return if HTM::Timeframe.valid?(timeframe)
|
|
666
653
|
raise ValidationError, "Invalid timeframe type: #{timeframe.class}. " \
|
|
667
|
-
|
|
654
|
+
"Expected nil, Range, Array<Range>, Date, DateTime, Time, String, or :auto"
|
|
668
655
|
end
|
|
669
656
|
|
|
670
657
|
def validate_positive_integer!(value, name)
|
|
671
|
-
raise ValidationError, "#{name} must be a positive Integer" unless value.is_a?(Integer) && value
|
|
658
|
+
raise ValidationError, "#{name} must be a positive Integer" unless value.is_a?(Integer) && value.positive?
|
|
672
659
|
end
|
|
673
660
|
|
|
674
661
|
def validate_metadata!(metadata)
|
data/lib/tasks/db.rake
CHANGED
|
@@ -3,14 +3,12 @@
|
|
|
3
3
|
namespace :db do
|
|
4
4
|
desc "Run database migrations"
|
|
5
5
|
task :migrate do
|
|
6
|
-
|
|
7
6
|
HTM::Database.migrate
|
|
8
7
|
puts "Database migrations completed successfully"
|
|
9
8
|
end
|
|
10
9
|
|
|
11
10
|
desc "Setup database schema (includes migrations)"
|
|
12
11
|
task :setup do
|
|
13
|
-
|
|
14
12
|
HTM::Database.setup
|
|
15
13
|
puts "Database setup completed successfully"
|
|
16
14
|
end
|