htm 0.0.20 → 0.0.30
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +60 -0
- data/Rakefile +104 -18
- data/db/migrate/00001_enable_extensions.rb +9 -5
- data/db/migrate/00002_create_robots.rb +18 -6
- data/db/migrate/00003_create_file_sources.rb +30 -17
- data/db/migrate/00004_create_nodes.rb +60 -48
- data/db/migrate/00005_create_tags.rb +24 -12
- data/db/migrate/00006_create_node_tags.rb +28 -13
- data/db/migrate/00007_create_robot_nodes.rb +40 -26
- data/db/schema.sql +17 -1
- data/db/seeds.rb +33 -33
- data/docs/database/naming-convention.md +244 -0
- data/docs/database_rake_tasks.md +31 -0
- data/docs/development/rake-tasks.md +80 -35
- data/docs/guides/mcp-server.md +70 -1
- data/examples/.envrc +6 -0
- data/examples/.gitignore +2 -0
- data/examples/00_create_examples_db.rb +94 -0
- data/examples/{basic_usage.rb → 01_basic_usage.rb} +12 -16
- data/examples/{custom_llm_configuration.rb → 03_custom_llm_configuration.rb} +13 -3
- data/examples/{file_loader_usage.rb → 04_file_loader_usage.rb} +11 -14
- data/examples/{timeframe_demo.rb → 05_timeframe_demo.rb} +10 -3
- data/examples/{example_app → 06_example_app}/app.rb +15 -15
- data/examples/{cli_app → 07_cli_app}/htm_cli.rb +15 -22
- data/examples/08_sinatra_app/Gemfile.lock +241 -0
- data/examples/{sinatra_app → 08_sinatra_app}/app.rb +19 -18
- data/examples/{mcp_client.rb → 09_mcp_client.rb} +5 -8
- data/examples/{telemetry → 10_telemetry}/SETUP_README.md +1 -1
- data/examples/{telemetry → 10_telemetry}/demo.rb +14 -10
- data/examples/11_robot_groups/README.md +335 -0
- data/examples/{robot_groups → 11_robot_groups/lib}/robot_worker.rb +17 -3
- data/examples/{robot_groups → 11_robot_groups}/multi_process.rb +9 -9
- data/examples/{robot_groups → 11_robot_groups}/same_process.rb +9 -12
- data/examples/{rails_app → 12_rails_app}/Gemfile +3 -0
- data/examples/{rails_app → 12_rails_app}/Gemfile.lock +87 -58
- data/examples/{rails_app → 12_rails_app}/app/controllers/dashboard_controller.rb +10 -6
- data/examples/{rails_app → 12_rails_app}/app/controllers/files_controller.rb +5 -5
- data/examples/{rails_app → 12_rails_app}/app/controllers/memories_controller.rb +11 -7
- data/examples/{rails_app → 12_rails_app}/app/controllers/robots_controller.rb +8 -8
- data/examples/12_rails_app/app/controllers/tags_controller.rb +36 -0
- data/examples/{rails_app → 12_rails_app}/app/views/dashboard/index.html.erb +2 -2
- data/examples/{rails_app → 12_rails_app}/app/views/files/new.html.erb +5 -2
- data/examples/{rails_app → 12_rails_app}/app/views/memories/_memory_card.html.erb +3 -3
- data/examples/{rails_app → 12_rails_app}/app/views/memories/deleted.html.erb +3 -3
- data/examples/{rails_app → 12_rails_app}/app/views/memories/edit.html.erb +3 -3
- data/examples/{rails_app → 12_rails_app}/app/views/memories/show.html.erb +4 -4
- data/examples/{rails_app → 12_rails_app}/app/views/robots/index.html.erb +2 -2
- data/examples/{rails_app → 12_rails_app}/app/views/robots/show.html.erb +4 -4
- data/examples/{rails_app → 12_rails_app}/app/views/search/index.html.erb +1 -1
- data/examples/{rails_app → 12_rails_app}/app/views/tags/index.html.erb +2 -2
- data/examples/{rails_app → 12_rails_app}/app/views/tags/show.html.erb +1 -1
- data/examples/12_rails_app/config/initializers/htm.rb +7 -0
- data/examples/12_rails_app/config/initializers/rack.rb +5 -0
- data/examples/README.md +230 -211
- data/examples/examples_helper.rb +138 -0
- data/lib/htm/config/builder.rb +167 -0
- data/lib/htm/config/database.rb +317 -0
- data/lib/htm/config/defaults.yml +37 -9
- data/lib/htm/config/section.rb +74 -0
- data/lib/htm/config/validator.rb +83 -0
- data/lib/htm/config.rb +64 -360
- data/lib/htm/database.rb +85 -127
- data/lib/htm/errors.rb +14 -0
- data/lib/htm/integrations/sinatra.rb +13 -44
- data/lib/htm/jobs/generate_embedding_job.rb +3 -4
- data/lib/htm/jobs/generate_propositions_job.rb +4 -5
- data/lib/htm/jobs/generate_tags_job.rb +16 -15
- data/lib/htm/loaders/defaults_loader.rb +23 -0
- data/lib/htm/loaders/markdown_loader.rb +17 -15
- data/lib/htm/loaders/xdg_config_loader.rb +9 -9
- data/lib/htm/long_term_memory/fulltext_search.rb +14 -14
- data/lib/htm/long_term_memory/hybrid_search.rb +396 -229
- data/lib/htm/long_term_memory/node_operations.rb +24 -23
- data/lib/htm/long_term_memory/relevance_scorer.rb +23 -20
- data/lib/htm/long_term_memory/robot_operations.rb +4 -4
- data/lib/htm/long_term_memory/tag_operations.rb +91 -77
- data/lib/htm/long_term_memory/vector_search.rb +4 -5
- data/lib/htm/long_term_memory.rb +13 -13
- data/lib/htm/mcp/cli.rb +115 -8
- data/lib/htm/mcp/resources.rb +4 -3
- data/lib/htm/mcp/server.rb +5 -4
- data/lib/htm/mcp/tools.rb +37 -28
- data/lib/htm/migration.rb +72 -0
- data/lib/htm/models/file_source.rb +52 -31
- data/lib/htm/models/node.rb +224 -108
- data/lib/htm/models/node_tag.rb +49 -28
- data/lib/htm/models/robot.rb +38 -27
- data/lib/htm/models/robot_node.rb +63 -35
- data/lib/htm/models/tag.rb +126 -123
- data/lib/htm/observability.rb +45 -41
- data/lib/htm/proposition_service.rb +76 -7
- data/lib/htm/railtie.rb +2 -2
- data/lib/htm/robot_group.rb +30 -18
- data/lib/htm/sequel_config.rb +215 -0
- data/lib/htm/sql_builder.rb +14 -16
- data/lib/htm/tag_service.rb +78 -0
- data/lib/htm/tasks.rb +3 -0
- data/lib/htm/version.rb +1 -1
- data/lib/htm/workflows/remember_workflow.rb +6 -5
- data/lib/htm.rb +26 -22
- data/lib/tasks/db.rake +0 -2
- data/lib/tasks/doc.rake +2 -2
- data/lib/tasks/files.rake +11 -18
- data/lib/tasks/htm.rake +190 -62
- data/lib/tasks/jobs.rake +179 -54
- data/lib/tasks/tags.rake +8 -13
- data/scripts/backfill_parent_tags.rb +376 -0
- data/scripts/normalize_plural_tags.rb +335 -0
- metadata +109 -80
- data/examples/rails_app/app/controllers/tags_controller.rb +0 -30
- data/examples/sinatra_app/Gemfile.lock +0 -166
- data/lib/htm/active_record_config.rb +0 -104
- /data/examples/{config_file_example → 02_config_file_example}/README.md +0 -0
- /data/examples/{config_file_example → 02_config_file_example}/config/htm.local.yml +0 -0
- /data/examples/{config_file_example → 02_config_file_example}/custom_config.yml +0 -0
- /data/examples/{config_file_example → 02_config_file_example}/show_config.rb +0 -0
- /data/examples/{example_app → 06_example_app}/Rakefile +0 -0
- /data/examples/{cli_app → 07_cli_app}/README.md +0 -0
- /data/examples/{sinatra_app → 08_sinatra_app}/Gemfile +0 -0
- /data/examples/{telemetry → 10_telemetry}/README.md +0 -0
- /data/examples/{telemetry → 10_telemetry}/grafana/dashboards/htm-metrics.json +0 -0
- /data/examples/{rails_app → 12_rails_app}/.gitignore +0 -0
- /data/examples/{rails_app → 12_rails_app}/Procfile.dev +0 -0
- /data/examples/{rails_app → 12_rails_app}/README.md +0 -0
- /data/examples/{rails_app → 12_rails_app}/Rakefile +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/assets/stylesheets/application.css +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/assets/stylesheets/inter-font.css +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/controllers/application_controller.rb +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/controllers/search_controller.rb +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/javascript/application.js +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/javascript/controllers/application.js +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/javascript/controllers/index.js +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/views/files/index.html.erb +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/views/files/show.html.erb +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/views/layouts/application.html.erb +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/views/memories/index.html.erb +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/views/memories/new.html.erb +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/views/robots/new.html.erb +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/views/shared/_navbar.html.erb +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/views/shared/_stat_card.html.erb +0 -0
- /data/examples/{rails_app → 12_rails_app}/bin/dev +0 -0
- /data/examples/{rails_app → 12_rails_app}/bin/rails +0 -0
- /data/examples/{rails_app → 12_rails_app}/bin/rake +0 -0
- /data/examples/{rails_app → 12_rails_app}/config/application.rb +0 -0
- /data/examples/{rails_app → 12_rails_app}/config/boot.rb +0 -0
- /data/examples/{rails_app → 12_rails_app}/config/database.yml +0 -0
- /data/examples/{rails_app → 12_rails_app}/config/environment.rb +0 -0
- /data/examples/{rails_app → 12_rails_app}/config/importmap.rb +0 -0
- /data/examples/{rails_app → 12_rails_app}/config/routes.rb +0 -0
- /data/examples/{rails_app → 12_rails_app}/config/tailwind.config.js +0 -0
- /data/examples/{rails_app → 12_rails_app}/config.ru +0 -0
- /data/examples/{rails_app → 12_rails_app}/log/.keep +0 -0
- /data/examples/{rails_app → 12_rails_app}/tmp/local_secret.txt +0 -0
|
@@ -32,10 +32,10 @@ class HTM
|
|
|
32
32
|
content_hash = HTM::Models::Node.generate_content_hash(content)
|
|
33
33
|
|
|
34
34
|
# Wrap in transaction to ensure data consistency
|
|
35
|
-
|
|
35
|
+
HTM.db.transaction do
|
|
36
36
|
# Check for existing node with same content (including soft-deleted)
|
|
37
37
|
# This avoids unique constraint violations on content_hash
|
|
38
|
-
existing_node = HTM::Models::Node.with_deleted.
|
|
38
|
+
existing_node = HTM::Models::Node.with_deleted.first(content_hash: content_hash)
|
|
39
39
|
|
|
40
40
|
# If found but soft-deleted, restore it
|
|
41
41
|
if existing_node&.deleted?
|
|
@@ -48,7 +48,7 @@ class HTM
|
|
|
48
48
|
robot_node = link_robot_to_node(robot_id: robot_id, node: existing_node)
|
|
49
49
|
|
|
50
50
|
# Update the node's updated_at timestamp
|
|
51
|
-
existing_node.
|
|
51
|
+
existing_node.update(updated_at: Time.now)
|
|
52
52
|
|
|
53
53
|
{
|
|
54
54
|
node_id: existing_node.id,
|
|
@@ -65,7 +65,7 @@ class HTM
|
|
|
65
65
|
end
|
|
66
66
|
|
|
67
67
|
# Create new node
|
|
68
|
-
node = HTM::Models::Node.create
|
|
68
|
+
node = HTM::Models::Node.create(
|
|
69
69
|
content: content,
|
|
70
70
|
content_hash: content_hash,
|
|
71
71
|
token_count: token_count,
|
|
@@ -97,19 +97,19 @@ class HTM
|
|
|
97
97
|
# @return [HTM::Models::RobotNode] The robot_node link record
|
|
98
98
|
#
|
|
99
99
|
def link_robot_to_node(robot_id:, node:, working_memory: false)
|
|
100
|
-
robot_node = HTM::Models::RobotNode.
|
|
100
|
+
robot_node = HTM::Models::RobotNode.first(robot_id: robot_id, node_id: node.id)
|
|
101
101
|
|
|
102
102
|
if robot_node
|
|
103
103
|
# Existing link - record that robot remembered this again
|
|
104
104
|
robot_node.record_remember!
|
|
105
|
-
robot_node.update
|
|
105
|
+
robot_node.update(working_memory: working_memory) if working_memory
|
|
106
106
|
else
|
|
107
107
|
# New link
|
|
108
|
-
robot_node = HTM::Models::RobotNode.create
|
|
108
|
+
robot_node = HTM::Models::RobotNode.create(
|
|
109
109
|
robot_id: robot_id,
|
|
110
110
|
node_id: node.id,
|
|
111
|
-
first_remembered_at: Time.
|
|
112
|
-
last_remembered_at: Time.
|
|
111
|
+
first_remembered_at: Time.now,
|
|
112
|
+
last_remembered_at: Time.now,
|
|
113
113
|
remember_count: 1,
|
|
114
114
|
working_memory: working_memory
|
|
115
115
|
)
|
|
@@ -127,17 +127,17 @@ class HTM
|
|
|
127
127
|
# @return [Hash, nil] Node data or nil
|
|
128
128
|
#
|
|
129
129
|
def retrieve(node_id)
|
|
130
|
-
node = HTM::Models::Node.
|
|
130
|
+
node = HTM::Models::Node.first(id: node_id)
|
|
131
131
|
return nil unless node
|
|
132
132
|
|
|
133
|
-
# Track access in a single UPDATE query (instead of separate
|
|
134
|
-
node.
|
|
135
|
-
access_count:
|
|
136
|
-
last_accessed: Time.
|
|
133
|
+
# Track access in a single UPDATE query (instead of separate operations)
|
|
134
|
+
node.this.update(
|
|
135
|
+
access_count: Sequel[:access_count] + 1,
|
|
136
|
+
last_accessed: Time.now
|
|
137
137
|
)
|
|
138
138
|
|
|
139
139
|
# Reload to get updated values
|
|
140
|
-
node.
|
|
140
|
+
node.refresh.to_hash
|
|
141
141
|
end
|
|
142
142
|
|
|
143
143
|
# Update last_accessed timestamp
|
|
@@ -146,8 +146,8 @@ class HTM
|
|
|
146
146
|
# @return [void]
|
|
147
147
|
#
|
|
148
148
|
def update_last_accessed(node_id)
|
|
149
|
-
node = HTM::Models::Node.
|
|
150
|
-
node&.update(last_accessed: Time.
|
|
149
|
+
node = HTM::Models::Node.first(id: node_id)
|
|
150
|
+
node&.update(last_accessed: Time.now)
|
|
151
151
|
end
|
|
152
152
|
|
|
153
153
|
# Delete a node
|
|
@@ -156,8 +156,8 @@ class HTM
|
|
|
156
156
|
# @return [void]
|
|
157
157
|
#
|
|
158
158
|
def delete(node_id)
|
|
159
|
-
node = HTM::Models::Node.
|
|
160
|
-
node&.
|
|
159
|
+
node = HTM::Models::Node.first(id: node_id)
|
|
160
|
+
node&.delete
|
|
161
161
|
|
|
162
162
|
# Selectively invalidate search-related cache entries only
|
|
163
163
|
@cache&.invalidate_methods!(:search, :fulltext, :hybrid)
|
|
@@ -169,7 +169,7 @@ class HTM
|
|
|
169
169
|
# @return [Boolean] True if node exists
|
|
170
170
|
#
|
|
171
171
|
def exists?(node_id)
|
|
172
|
-
HTM::Models::Node.
|
|
172
|
+
HTM::Models::Node.where(id: node_id).count > 0
|
|
173
173
|
end
|
|
174
174
|
|
|
175
175
|
# Mark nodes as evicted from working memory
|
|
@@ -186,7 +186,7 @@ class HTM
|
|
|
186
186
|
|
|
187
187
|
HTM::Models::RobotNode
|
|
188
188
|
.where(robot_id: robot_id, node_id: node_ids)
|
|
189
|
-
.
|
|
189
|
+
.update(working_memory: false)
|
|
190
190
|
end
|
|
191
191
|
|
|
192
192
|
# Track access for multiple nodes (bulk operation)
|
|
@@ -200,8 +200,9 @@ class HTM
|
|
|
200
200
|
return if node_ids.empty?
|
|
201
201
|
|
|
202
202
|
# Atomic batch update
|
|
203
|
-
HTM::Models::Node.where(id: node_ids).
|
|
204
|
-
|
|
203
|
+
HTM::Models::Node.where(id: node_ids).update(
|
|
204
|
+
access_count: Sequel[:access_count] + 1,
|
|
205
|
+
last_accessed: Sequel.lit('NOW()')
|
|
205
206
|
)
|
|
206
207
|
end
|
|
207
208
|
end
|
|
@@ -114,7 +114,7 @@ class HTM
|
|
|
114
114
|
# Full-text search (returns hashes directly)
|
|
115
115
|
search_fulltext_uncached(timeframe: timeframe, query: query, limit: limit * 2, metadata: metadata)
|
|
116
116
|
else
|
|
117
|
-
# Time-range only - use raw SQL to avoid
|
|
117
|
+
# Time-range only - use raw SQL to avoid ORM object instantiation
|
|
118
118
|
# This is more efficient than .map(&:attributes) which creates intermediate objects
|
|
119
119
|
fetch_candidates_by_timeframe(timeframe: timeframe, metadata: metadata, limit: limit * 2)
|
|
120
120
|
end
|
|
@@ -146,7 +146,7 @@ class HTM
|
|
|
146
146
|
.take(limit)
|
|
147
147
|
end
|
|
148
148
|
|
|
149
|
-
# Fetch candidates by timeframe using raw SQL (avoids
|
|
149
|
+
# Fetch candidates by timeframe using raw SQL (avoids ORM overhead)
|
|
150
150
|
#
|
|
151
151
|
# @param timeframe [nil, Range, Array<Range>] Time range(s) to search
|
|
152
152
|
# @param metadata [Hash] Filter by metadata fields
|
|
@@ -169,10 +169,7 @@ class HTM
|
|
|
169
169
|
LIMIT ?
|
|
170
170
|
SQL
|
|
171
171
|
|
|
172
|
-
|
|
173
|
-
ActiveRecord::Base.sanitize_sql_array([sql, limit])
|
|
174
|
-
)
|
|
175
|
-
result.to_a
|
|
172
|
+
HTM.db.fetch(sql, limit).all.map { |r| r.transform_keys(&:to_s) }
|
|
176
173
|
end
|
|
177
174
|
|
|
178
175
|
# Search nodes by tags
|
|
@@ -188,30 +185,36 @@ class HTM
|
|
|
188
185
|
|
|
189
186
|
# Build base query with specific columns to avoid loading unnecessary data
|
|
190
187
|
query = HTM::Models::Node
|
|
191
|
-
.select(
|
|
192
|
-
|
|
193
|
-
|
|
188
|
+
.select(
|
|
189
|
+
Sequel[:nodes][:id],
|
|
190
|
+
Sequel[:nodes][:content],
|
|
191
|
+
Sequel[:nodes][:access_count],
|
|
192
|
+
Sequel[:nodes][:created_at],
|
|
193
|
+
Sequel[:nodes][:token_count]
|
|
194
|
+
)
|
|
195
|
+
.join(:node_tags, node_id: :id)
|
|
196
|
+
.join(:tags, id: Sequel[:node_tags][:tag_id])
|
|
197
|
+
.where(Sequel[:tags][:name] => tags)
|
|
194
198
|
.distinct
|
|
195
199
|
|
|
196
200
|
# Apply timeframe filter if provided
|
|
197
|
-
query = query.where(created_at
|
|
201
|
+
query = query.where(Sequel[:nodes][:created_at] => timeframe) if timeframe
|
|
198
202
|
|
|
199
203
|
if match_all
|
|
200
204
|
# Match ALL tags (intersection)
|
|
201
205
|
query = query
|
|
202
|
-
.group(
|
|
203
|
-
.having(
|
|
206
|
+
.group(Sequel[:nodes][:id])
|
|
207
|
+
.having { Sequel.function(:count, Sequel[:tags][:name].distinct) =~ tags.size }
|
|
204
208
|
end
|
|
205
209
|
|
|
206
|
-
#
|
|
207
|
-
|
|
208
|
-
nodes = query.limit(limit).map do |node|
|
|
210
|
+
# Fetch and convert to hashes with string keys
|
|
211
|
+
nodes = query.limit(limit).all.map do |row|
|
|
209
212
|
{
|
|
210
|
-
'id' =>
|
|
211
|
-
'content' =>
|
|
212
|
-
'access_count' =>
|
|
213
|
-
'created_at' =>
|
|
214
|
-
'token_count' =>
|
|
213
|
+
'id' => row[:id],
|
|
214
|
+
'content' => row[:content],
|
|
215
|
+
'access_count' => row[:access_count],
|
|
216
|
+
'created_at' => row[:created_at],
|
|
217
|
+
'token_count' => row[:token_count]
|
|
215
218
|
}
|
|
216
219
|
end
|
|
217
220
|
|
|
@@ -15,8 +15,8 @@ class HTM
|
|
|
15
15
|
# @return [Integer] Robot ID
|
|
16
16
|
#
|
|
17
17
|
def register_robot(robot_name)
|
|
18
|
-
robot = HTM::Models::Robot.
|
|
19
|
-
robot.update(last_active: Time.
|
|
18
|
+
robot = HTM::Models::Robot.find_or_create(name: robot_name)
|
|
19
|
+
robot.update(last_active: Time.now)
|
|
20
20
|
robot.id
|
|
21
21
|
end
|
|
22
22
|
|
|
@@ -26,8 +26,8 @@ class HTM
|
|
|
26
26
|
# @return [void]
|
|
27
27
|
#
|
|
28
28
|
def update_robot_activity(robot_id)
|
|
29
|
-
robot = HTM::Models::Robot.
|
|
30
|
-
robot&.update(last_active: Time.
|
|
29
|
+
robot = HTM::Models::Robot.first(id: robot_id)
|
|
30
|
+
robot&.update(last_active: Time.now)
|
|
31
31
|
end
|
|
32
32
|
end
|
|
33
33
|
end
|
|
@@ -36,20 +36,31 @@ class HTM
|
|
|
36
36
|
attr_accessor :popular_tags_cache, :popular_tags_cache_expires_at, :popular_tags_mutex
|
|
37
37
|
end
|
|
38
38
|
|
|
39
|
-
# Add a tag to a node
|
|
39
|
+
# Add a tag to a node (creates tag and all parent tags)
|
|
40
|
+
#
|
|
41
|
+
# When adding a hierarchical tag like "database:postgresql:extensions",
|
|
42
|
+
# this also creates and associates the parent tags "database" and
|
|
43
|
+
# "database:postgresql" with the node.
|
|
40
44
|
#
|
|
41
45
|
# @param node_id [Integer] Node database ID
|
|
42
46
|
# @param tag [String] Tag name
|
|
43
47
|
# @return [void]
|
|
44
48
|
#
|
|
49
|
+
# @example
|
|
50
|
+
# add_tag(node_id: 123, tag: "database:postgresql:extensions")
|
|
51
|
+
# # Creates tags: "database", "database:postgresql", "database:postgresql:extensions"
|
|
52
|
+
# # Associates all three with node 123
|
|
53
|
+
#
|
|
45
54
|
def add_tag(node_id:, tag:)
|
|
46
|
-
|
|
47
|
-
HTM::Models::
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
55
|
+
# Create tag and all ancestor tags, then associate each with the node
|
|
56
|
+
HTM::Models::Tag.find_or_create_with_ancestors(tag).each do |tag_record|
|
|
57
|
+
HTM::Models::NodeTag.find_or_create(
|
|
58
|
+
node_id: node_id,
|
|
59
|
+
tag_id: tag_record.id
|
|
60
|
+
)
|
|
61
|
+
rescue Sequel::UniqueConstraintViolation
|
|
62
|
+
# Tag association already exists, ignore
|
|
63
|
+
end
|
|
53
64
|
end
|
|
54
65
|
|
|
55
66
|
# Retrieve nodes by ontological topic
|
|
@@ -70,34 +81,48 @@ class HTM
|
|
|
70
81
|
# Enforce limit to prevent DoS
|
|
71
82
|
safe_limit = [[limit.to_i, 1].max, MAX_TAG_QUERY_LIMIT].min
|
|
72
83
|
|
|
84
|
+
# Build base query with joins
|
|
85
|
+
# Use subquery with DISTINCT ON to get unique nodes by id
|
|
73
86
|
if exact
|
|
74
|
-
|
|
75
|
-
.
|
|
76
|
-
.
|
|
87
|
+
node_ids = HTM::Models::Node
|
|
88
|
+
.select(Sequel[:nodes][:id])
|
|
89
|
+
.join(:node_tags, node_id: :id)
|
|
90
|
+
.join(:tags, id: Sequel[:node_tags][:tag_id])
|
|
91
|
+
.where(Sequel[:tags][:name] => topic_path)
|
|
77
92
|
.distinct
|
|
78
|
-
.
|
|
79
|
-
.limit(safe_limit)
|
|
93
|
+
.select_map(Sequel[:nodes][:id])
|
|
80
94
|
elsif fuzzy
|
|
81
95
|
# Trigram similarity search - tolerates typos and partial matches
|
|
82
96
|
safe_similarity = [[min_similarity.to_f, 0.0].max, 1.0].min
|
|
83
|
-
|
|
84
|
-
.
|
|
85
|
-
.
|
|
97
|
+
node_ids = HTM::Models::Node
|
|
98
|
+
.select(Sequel[:nodes][:id])
|
|
99
|
+
.join(:node_tags, node_id: :id)
|
|
100
|
+
.join(:tags, id: Sequel[:node_tags][:tag_id])
|
|
101
|
+
.where(Sequel.lit("similarity(tags.name, ?) >= ?", topic_path, safe_similarity))
|
|
86
102
|
.distinct
|
|
87
|
-
.
|
|
88
|
-
.limit(safe_limit)
|
|
103
|
+
.select_map(Sequel[:nodes][:id])
|
|
89
104
|
else
|
|
90
105
|
# Sanitize LIKE pattern to prevent wildcard injection
|
|
91
106
|
safe_pattern = HTM::SqlBuilder.sanitize_like_pattern(topic_path)
|
|
92
|
-
|
|
93
|
-
.
|
|
94
|
-
.
|
|
107
|
+
node_ids = HTM::Models::Node
|
|
108
|
+
.select(Sequel[:nodes][:id])
|
|
109
|
+
.join(:node_tags, node_id: :id)
|
|
110
|
+
.join(:tags, id: Sequel[:node_tags][:tag_id])
|
|
111
|
+
.where(Sequel.like(Sequel[:tags][:name], "#{safe_pattern}%"))
|
|
95
112
|
.distinct
|
|
96
|
-
.
|
|
97
|
-
.limit(safe_limit)
|
|
113
|
+
.select_map(Sequel[:nodes][:id])
|
|
98
114
|
end
|
|
99
115
|
|
|
100
|
-
|
|
116
|
+
# Return empty array if no node_ids found
|
|
117
|
+
return [] if node_ids.empty?
|
|
118
|
+
|
|
119
|
+
# Fetch full node records for the matching ids
|
|
120
|
+
HTM::Models::Node
|
|
121
|
+
.where(id: node_ids)
|
|
122
|
+
.order(Sequel.desc(:created_at))
|
|
123
|
+
.limit(safe_limit)
|
|
124
|
+
.all
|
|
125
|
+
.map(&:to_hash)
|
|
101
126
|
end
|
|
102
127
|
|
|
103
128
|
# Get ontology structure view
|
|
@@ -105,10 +130,9 @@ class HTM
|
|
|
105
130
|
# @return [Array<Hash>] Ontology structure
|
|
106
131
|
#
|
|
107
132
|
def ontology_structure
|
|
108
|
-
|
|
133
|
+
HTM.db.fetch(
|
|
109
134
|
"SELECT * FROM ontology_structure WHERE root_topic IS NOT NULL ORDER BY root_topic, level1_topic, level2_topic"
|
|
110
|
-
)
|
|
111
|
-
result.to_a
|
|
135
|
+
).all.map { |r| r.transform_keys(&:to_s) }
|
|
112
136
|
end
|
|
113
137
|
|
|
114
138
|
# Get topic relationships (co-occurrence)
|
|
@@ -122,7 +146,6 @@ class HTM
|
|
|
122
146
|
safe_limit = [[limit.to_i, 1].max, MAX_TAG_QUERY_LIMIT].min
|
|
123
147
|
safe_min = [min_shared_nodes.to_i, 1].max
|
|
124
148
|
|
|
125
|
-
# Use parameterized query to prevent SQL injection
|
|
126
149
|
sql = <<~SQL
|
|
127
150
|
SELECT t1.name AS topic1, t2.name AS topic2, COUNT(DISTINCT nt1.node_id) AS shared_nodes
|
|
128
151
|
FROM tags t1
|
|
@@ -131,17 +154,12 @@ class HTM
|
|
|
131
154
|
JOIN tags t2 ON nt2.tag_id = t2.id
|
|
132
155
|
WHERE t1.name < t2.name
|
|
133
156
|
GROUP BY t1.name, t2.name
|
|
134
|
-
HAVING COUNT(DISTINCT nt1.node_id) >=
|
|
157
|
+
HAVING COUNT(DISTINCT nt1.node_id) >= ?
|
|
135
158
|
ORDER BY shared_nodes DESC
|
|
136
|
-
LIMIT
|
|
159
|
+
LIMIT ?
|
|
137
160
|
SQL
|
|
138
161
|
|
|
139
|
-
|
|
140
|
-
sql,
|
|
141
|
-
'topic_relationships',
|
|
142
|
-
[[nil, safe_min], [nil, safe_limit]]
|
|
143
|
-
)
|
|
144
|
-
result.to_a
|
|
162
|
+
HTM.db.fetch(sql, safe_min, safe_limit).all.map { |r| r.transform_keys(&:to_s) }
|
|
145
163
|
end
|
|
146
164
|
|
|
147
165
|
# Get topics for a specific node
|
|
@@ -151,10 +169,10 @@ class HTM
|
|
|
151
169
|
#
|
|
152
170
|
def node_topics(node_id)
|
|
153
171
|
HTM::Models::Tag
|
|
154
|
-
.
|
|
155
|
-
.where(node_tags:
|
|
172
|
+
.join(:node_tags, tag_id: :id)
|
|
173
|
+
.where(Sequel[:node_tags][:node_id] => node_id)
|
|
156
174
|
.order(:name)
|
|
157
|
-
.
|
|
175
|
+
.select_map(:name)
|
|
158
176
|
end
|
|
159
177
|
|
|
160
178
|
# Get tags for a specific node
|
|
@@ -164,10 +182,10 @@ class HTM
|
|
|
164
182
|
#
|
|
165
183
|
def get_node_tags(node_id)
|
|
166
184
|
HTM::Models::Tag
|
|
167
|
-
.
|
|
168
|
-
.where(node_tags:
|
|
169
|
-
.
|
|
170
|
-
rescue
|
|
185
|
+
.join(:node_tags, tag_id: :id)
|
|
186
|
+
.where(Sequel[:node_tags][:node_id] => node_id)
|
|
187
|
+
.select_map(:name)
|
|
188
|
+
rescue Sequel::Error => e
|
|
171
189
|
HTM.logger.error("Failed to retrieve tags for node #{node_id}: #{e.message}")
|
|
172
190
|
[]
|
|
173
191
|
end
|
|
@@ -182,13 +200,13 @@ class HTM
|
|
|
182
200
|
|
|
183
201
|
# Single query to get all tags for all nodes
|
|
184
202
|
results = HTM::Models::NodeTag
|
|
185
|
-
.
|
|
203
|
+
.join(:tags, id: :tag_id)
|
|
186
204
|
.where(node_id: node_ids)
|
|
187
|
-
.
|
|
205
|
+
.select_map([:node_id, Sequel[:tags][:name]])
|
|
188
206
|
|
|
189
207
|
# Group by node_id
|
|
190
208
|
results.group_by(&:first).transform_values { |pairs| pairs.map(&:last) }
|
|
191
|
-
rescue
|
|
209
|
+
rescue Sequel::Error => e
|
|
192
210
|
HTM.logger.error("Failed to batch load tags: #{e.message}")
|
|
193
211
|
{}
|
|
194
212
|
end
|
|
@@ -204,17 +222,21 @@ class HTM
|
|
|
204
222
|
safe_limit = [[limit.to_i, 1].max, MAX_TAG_QUERY_LIMIT].min
|
|
205
223
|
|
|
206
224
|
query = HTM::Models::Tag
|
|
207
|
-
.
|
|
208
|
-
.
|
|
209
|
-
.group(
|
|
210
|
-
.select(
|
|
211
|
-
|
|
212
|
-
|
|
225
|
+
.join(:node_tags, tag_id: :id)
|
|
226
|
+
.join(:nodes, id: Sequel[:node_tags][:node_id])
|
|
227
|
+
.group(Sequel[:tags][:id], Sequel[:tags][:name])
|
|
228
|
+
.select(Sequel[:tags][:name], Sequel.function(:count, Sequel[:node_tags][:id]).as(:usage_count))
|
|
229
|
+
|
|
230
|
+
if timeframe
|
|
231
|
+
query = query.where(Sequel[:nodes][:created_at] >= timeframe.begin)
|
|
232
|
+
.where(Sequel[:nodes][:created_at] <= timeframe.end)
|
|
233
|
+
end
|
|
213
234
|
|
|
214
235
|
query
|
|
215
|
-
.order(
|
|
236
|
+
.order(Sequel.desc(:usage_count))
|
|
216
237
|
.limit(safe_limit)
|
|
217
|
-
.
|
|
238
|
+
.all
|
|
239
|
+
.map { |tag| { name: tag[:name], usage_count: tag[:usage_count].to_i } }
|
|
218
240
|
end
|
|
219
241
|
|
|
220
242
|
# Fuzzy search for tags using trigram similarity
|
|
@@ -243,12 +265,10 @@ class HTM
|
|
|
243
265
|
LIMIT ?
|
|
244
266
|
SQL
|
|
245
267
|
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
result.map { |r| { name: r['name'], similarity: r['similarity'].to_f } }
|
|
251
|
-
rescue ActiveRecord::ActiveRecordError => e
|
|
268
|
+
HTM.db.fetch(sql, query, query, safe_similarity, safe_limit)
|
|
269
|
+
.all
|
|
270
|
+
.map { |r| { name: r[:name], similarity: r[:similarity].to_f } }
|
|
271
|
+
rescue Sequel::Error => e
|
|
252
272
|
HTM.logger.error("Failed to search tags: #{e.message}")
|
|
253
273
|
[]
|
|
254
274
|
end
|
|
@@ -353,6 +373,7 @@ class HTM
|
|
|
353
373
|
params = []
|
|
354
374
|
|
|
355
375
|
# Exact matches (highest priority)
|
|
376
|
+
# Use Sequel.lit with ? placeholders for proper parameter binding
|
|
356
377
|
if exact_candidates.any?
|
|
357
378
|
placeholders = exact_candidates.map { '?' }.join(', ')
|
|
358
379
|
conditions << "(SELECT name, 1 as priority FROM tags WHERE name IN (#{placeholders}))"
|
|
@@ -367,19 +388,13 @@ class HTM
|
|
|
367
388
|
end
|
|
368
389
|
|
|
369
390
|
# Component matches
|
|
370
|
-
# Pre-sanitize components once to avoid duplicate processing
|
|
371
391
|
if component_candidates.any?
|
|
372
|
-
|
|
373
|
-
sanitized_components = component_candidates.map do |component|
|
|
374
|
-
[component, HTM::SqlBuilder.sanitize_like_pattern(component)]
|
|
375
|
-
end
|
|
376
|
-
|
|
377
|
-
component_conditions = sanitized_components.map do |_component, _safe|
|
|
378
|
-
# Match: exact, starts with, ends with, or middle
|
|
392
|
+
component_conditions = component_candidates.map do |_|
|
|
379
393
|
"(name = ? OR name LIKE ? OR name LIKE ? OR name LIKE ?)"
|
|
380
394
|
end
|
|
381
395
|
|
|
382
|
-
component_params =
|
|
396
|
+
component_params = component_candidates.flat_map do |component|
|
|
397
|
+
safe_component = HTM::SqlBuilder.sanitize_like_pattern(component)
|
|
383
398
|
[
|
|
384
399
|
component, # exact match
|
|
385
400
|
"#{safe_component}:%", # starts with
|
|
@@ -393,10 +408,11 @@ class HTM
|
|
|
393
408
|
end
|
|
394
409
|
|
|
395
410
|
# Trigram fuzzy matches (lowest priority - fallback for typos)
|
|
396
|
-
# Uses pg_trgm similarity to find tags even with spelling errors
|
|
397
411
|
if fuzzy_fallback && component_candidates.any?
|
|
398
412
|
safe_similarity = [[min_similarity.to_f, 0.0].max, 1.0].min
|
|
399
|
-
trigram_conditions = component_candidates.map
|
|
413
|
+
trigram_conditions = component_candidates.map do |_|
|
|
414
|
+
"similarity(name, ?) >= ?"
|
|
415
|
+
end
|
|
400
416
|
trigram_params = component_candidates.flat_map { |c| [c, safe_similarity] }
|
|
401
417
|
|
|
402
418
|
conditions << "(SELECT name, 4 as priority FROM tags WHERE #{trigram_conditions.join(' OR ')})"
|
|
@@ -406,6 +422,8 @@ class HTM
|
|
|
406
422
|
return [] if conditions.empty?
|
|
407
423
|
|
|
408
424
|
# Combine with UNION and order by priority
|
|
425
|
+
params << MAX_TAG_QUERY_LIMIT
|
|
426
|
+
|
|
409
427
|
sql = <<~SQL
|
|
410
428
|
SELECT DISTINCT name FROM (
|
|
411
429
|
#{conditions.join(' UNION ')}
|
|
@@ -413,13 +431,9 @@ class HTM
|
|
|
413
431
|
ORDER BY name
|
|
414
432
|
LIMIT ?
|
|
415
433
|
SQL
|
|
416
|
-
params << MAX_TAG_QUERY_LIMIT
|
|
417
434
|
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
)
|
|
421
|
-
result.map { |r| r['name'] }
|
|
422
|
-
rescue ActiveRecord::ActiveRecordError => e
|
|
435
|
+
HTM.db.fetch(sql, *params).all.map { |r| r[:name] }
|
|
436
|
+
rescue Sequel::Error => e
|
|
423
437
|
HTM.logger.error("Failed to find matching tags: #{e.message}")
|
|
424
438
|
[]
|
|
425
439
|
end
|
|
@@ -94,15 +94,14 @@ class HTM
|
|
|
94
94
|
LIMIT ?
|
|
95
95
|
SQL
|
|
96
96
|
|
|
97
|
-
result =
|
|
98
|
-
ActiveRecord::Base.sanitize_sql_array([sql, embedding_str, embedding_str, limit])
|
|
99
|
-
)
|
|
97
|
+
result = HTM.db.fetch(sql, embedding_str, embedding_str, limit).all
|
|
100
98
|
|
|
101
99
|
# Track access for retrieved nodes
|
|
102
|
-
node_ids = result.map { |r| r[
|
|
100
|
+
node_ids = result.map { |r| r[:id] }
|
|
103
101
|
track_access(node_ids)
|
|
104
102
|
|
|
105
|
-
|
|
103
|
+
# Convert to hash with string keys for compatibility
|
|
104
|
+
result.map { |r| r.transform_keys(&:to_s) }
|
|
106
105
|
end
|
|
107
106
|
end
|
|
108
107
|
end
|
data/lib/htm/long_term_memory.rb
CHANGED
|
@@ -17,7 +17,7 @@ require_relative 'long_term_memory/fulltext_search'
|
|
|
17
17
|
require_relative 'long_term_memory/hybrid_search'
|
|
18
18
|
|
|
19
19
|
class HTM
|
|
20
|
-
# Long-term Memory - PostgreSQL
|
|
20
|
+
# Long-term Memory - PostgreSQL-backed permanent storage
|
|
21
21
|
#
|
|
22
22
|
# LongTermMemory provides durable storage for all memory nodes with:
|
|
23
23
|
# - Vector similarity search (RAG)
|
|
@@ -25,7 +25,7 @@ class HTM
|
|
|
25
25
|
# - Time-range queries
|
|
26
26
|
# - Relationship graphs
|
|
27
27
|
# - Tag system
|
|
28
|
-
# -
|
|
28
|
+
# - Sequel ORM for data access
|
|
29
29
|
# - Query result caching for efficiency
|
|
30
30
|
#
|
|
31
31
|
# This class uses standalone utility classes and modules:
|
|
@@ -72,7 +72,7 @@ class HTM
|
|
|
72
72
|
# Initialize long-term memory storage
|
|
73
73
|
#
|
|
74
74
|
# @param config [Hash] Database configuration (host, port, dbname, user, password)
|
|
75
|
-
# @param pool_size [Integer, nil] Connection pool size (uses
|
|
75
|
+
# @param pool_size [Integer, nil] Connection pool size (uses Sequel default if nil)
|
|
76
76
|
# @param query_timeout [Integer] Query timeout in milliseconds (default: 30000)
|
|
77
77
|
# @param cache_size [Integer] Number of query results to cache (default: 1000, use 0 to disable)
|
|
78
78
|
# @param cache_ttl [Integer] Cache time-to-live in seconds (default: 300)
|
|
@@ -90,8 +90,8 @@ class HTM
|
|
|
90
90
|
@config = config
|
|
91
91
|
@query_timeout = query_timeout # in milliseconds
|
|
92
92
|
|
|
93
|
-
# Set statement timeout for
|
|
94
|
-
|
|
93
|
+
# Set statement timeout for Sequel queries
|
|
94
|
+
HTM.db.run("SET statement_timeout = #{@query_timeout}")
|
|
95
95
|
|
|
96
96
|
# Initialize query result cache (disable with cache_size: 0)
|
|
97
97
|
@cache = HTM::QueryCache.new(size: cache_size, ttl: cache_ttl)
|
|
@@ -104,13 +104,13 @@ class HTM
|
|
|
104
104
|
def stats
|
|
105
105
|
base_stats = {
|
|
106
106
|
total_nodes: HTM::Models::Node.count,
|
|
107
|
-
nodes_by_robot: HTM::Models::RobotNode.
|
|
107
|
+
nodes_by_robot: HTM::Models::RobotNode.group_and_count(:robot_id).as_hash(:robot_id, :count),
|
|
108
108
|
total_tags: HTM::Models::Tag.count,
|
|
109
|
-
oldest_memory: HTM::Models::Node.
|
|
110
|
-
newest_memory: HTM::Models::Node.
|
|
109
|
+
oldest_memory: HTM::Models::Node.min(:created_at),
|
|
110
|
+
newest_memory: HTM::Models::Node.max(:created_at),
|
|
111
111
|
active_robots: HTM::Models::Robot.count,
|
|
112
|
-
robot_activity: HTM::Models::Robot.select(:id, :name, :last_active).map(&:
|
|
113
|
-
database_size:
|
|
112
|
+
robot_activity: HTM::Models::Robot.select(:id, :name, :last_active).all.map(&:values),
|
|
113
|
+
database_size: HTM.db.get(Sequel.function(:pg_database_size, Sequel.function(:current_database))).to_i
|
|
114
114
|
}
|
|
115
115
|
|
|
116
116
|
# Include cache statistics if cache is enabled
|
|
@@ -121,9 +121,9 @@ class HTM
|
|
|
121
121
|
base_stats
|
|
122
122
|
end
|
|
123
123
|
|
|
124
|
-
# Shutdown - no-op with
|
|
124
|
+
# Shutdown - no-op with Sequel (connection pool managed by Sequel)
|
|
125
125
|
def shutdown
|
|
126
|
-
#
|
|
126
|
+
# Sequel handles connection pool shutdown
|
|
127
127
|
# This method kept for API compatibility
|
|
128
128
|
end
|
|
129
129
|
|
|
@@ -137,7 +137,7 @@ class HTM
|
|
|
137
137
|
|
|
138
138
|
# For backwards compatibility with tests/code that expect pool_size
|
|
139
139
|
def pool_size
|
|
140
|
-
|
|
140
|
+
HTM.db.pool.size
|
|
141
141
|
end
|
|
142
142
|
end
|
|
143
143
|
end
|