htm 0.0.31 → 0.0.32
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.irbrc +2 -3
- data/.rubocop.yml +184 -0
- data/CHANGELOG.md +46 -0
- data/README.md +2 -0
- data/Rakefile +93 -12
- data/db/migrate/00008_create_node_relationships.rb +54 -0
- data/db/migrate/00009_fix_node_relationships_column_types.rb +17 -0
- data/db/schema.sql +124 -1
- data/docs/api/database.md +35 -57
- data/docs/api/embedding-service.md +1 -1
- data/docs/api/index.md +26 -15
- data/docs/api/working-memory.md +8 -8
- data/docs/architecture/index.md +5 -7
- data/docs/architecture/overview.md +5 -8
- data/docs/assets/images/htm-architecture-overview.svg +1 -1
- data/docs/assets/images/htm-context-assembly-flow.svg +2 -2
- data/docs/assets/images/htm-layered-architecture.svg +3 -3
- data/docs/assets/images/two-tier-memory-architecture.svg +1 -1
- data/docs/database/README.md +1 -0
- data/docs/database_rake_tasks.md +20 -28
- data/docs/development/contributing.md +5 -5
- data/docs/development/index.md +4 -7
- data/docs/development/schema.md +71 -1
- data/docs/development/setup.md +40 -82
- data/docs/development/testing.md +1 -1
- data/docs/examples/file-loading.md +4 -4
- data/docs/examples/mcp-client.md +1 -1
- data/docs/getting-started/quick-start.md +4 -4
- data/docs/guides/adding-memories.md +14 -1
- data/docs/guides/configuration.md +5 -5
- data/docs/guides/context-assembly.md +4 -4
- data/docs/guides/file-loading.md +12 -12
- data/docs/guides/getting-started.md +2 -2
- data/docs/guides/long-term-memory.md +7 -27
- data/docs/guides/propositions.md +20 -19
- data/docs/guides/recalling-memories.md +5 -5
- data/docs/guides/tags.md +18 -13
- data/docs/multi_framework_support.md +1 -1
- data/docs/robots/hive-mind.md +1 -1
- data/docs/robots/multi-robot.md +2 -2
- data/docs/robots/robot-groups.md +1 -1
- data/docs/robots/two-tier-memory.md +72 -94
- data/docs/setup_local_database.md +8 -54
- data/docs/using_rake_tasks_in_your_app.md +6 -6
- data/examples/01_basic_usage.rb +1 -0
- data/examples/03_custom_llm_configuration.rb +1 -0
- data/examples/04_file_loader_usage.rb +1 -0
- data/examples/05_timeframe_demo.rb +1 -0
- data/examples/06_example_app/app.rb +1 -0
- data/examples/07_cli_app/htm_cli.rb +1 -0
- data/examples/09_mcp_client.rb +1 -0
- data/examples/10_telemetry/demo.rb +1 -0
- data/examples/11_robot_groups/multi_process.rb +1 -0
- data/examples/11_robot_groups/same_process.rb +1 -0
- data/examples/12_rails_app/.envrc +12 -0
- data/examples/12_rails_app/Gemfile +8 -3
- data/examples/12_rails_app/Gemfile.lock +94 -89
- data/examples/12_rails_app/README.md +70 -19
- data/examples/12_rails_app/app/controllers/application_controller.rb +6 -0
- data/examples/12_rails_app/app/controllers/chats_controller.rb +305 -0
- data/examples/12_rails_app/app/controllers/dashboard_controller.rb +3 -0
- data/examples/12_rails_app/app/controllers/files_controller.rb +17 -2
- data/examples/12_rails_app/app/controllers/home_controller.rb +8 -0
- data/examples/12_rails_app/app/controllers/memories_controller.rb +9 -4
- data/examples/12_rails_app/app/controllers/messages_controller.rb +214 -0
- data/examples/12_rails_app/app/controllers/robots_controller.rb +11 -1
- data/examples/12_rails_app/app/controllers/tags_controller.rb +14 -1
- data/examples/12_rails_app/app/javascript/application.js +1 -1
- data/examples/12_rails_app/app/models/application_record.rb +5 -0
- data/examples/12_rails_app/app/models/chat.rb +36 -0
- data/examples/12_rails_app/app/models/message.rb +5 -0
- data/examples/12_rails_app/app/models/model.rb +5 -0
- data/examples/12_rails_app/app/models/tool_call.rb +5 -0
- data/examples/12_rails_app/app/views/chats/index.html.erb +61 -0
- data/examples/12_rails_app/app/views/chats/show.html.erb +213 -0
- data/examples/12_rails_app/app/views/dashboard/index.html.erb +3 -0
- data/examples/12_rails_app/app/views/files/index.html.erb +10 -5
- data/examples/12_rails_app/app/views/files/new.html.erb +4 -2
- data/examples/12_rails_app/app/views/files/show.html.erb +19 -3
- data/examples/12_rails_app/app/views/home/index.html.erb +45 -0
- data/examples/12_rails_app/app/views/layouts/application.html.erb +20 -18
- data/examples/12_rails_app/app/views/memories/_memory_card.html.erb +1 -1
- data/examples/12_rails_app/app/views/memories/deleted.html.erb +3 -1
- data/examples/12_rails_app/app/views/memories/edit.html.erb +2 -0
- data/examples/12_rails_app/app/views/memories/index.html.erb +2 -0
- data/examples/12_rails_app/app/views/memories/new.html.erb +2 -0
- data/examples/12_rails_app/app/views/memories/show.html.erb +4 -2
- data/examples/12_rails_app/app/views/messages/_message.html.erb +20 -0
- data/examples/12_rails_app/app/views/robots/index.html.erb +2 -0
- data/examples/12_rails_app/app/views/robots/new.html.erb +2 -0
- data/examples/12_rails_app/app/views/robots/show.html.erb +2 -0
- data/examples/12_rails_app/app/views/search/index.html.erb +59 -8
- data/examples/12_rails_app/app/views/shared/_navbar.html.erb +75 -29
- data/examples/12_rails_app/app/views/tags/index.html.erb +2 -0
- data/examples/12_rails_app/app/views/tags/show.html.erb +3 -1
- data/examples/12_rails_app/config/application.rb +1 -1
- data/examples/12_rails_app/config/database.yml +9 -5
- data/examples/12_rails_app/config/importmap.rb +1 -1
- data/examples/12_rails_app/config/initializers/htm.rb +9 -2
- data/examples/12_rails_app/config/initializers/ruby_llm.rb +33 -0
- data/examples/12_rails_app/config/routes.rb +39 -23
- data/examples/12_rails_app/db/migrate/20250124000001_create_ruby_llm_tables.rb +34 -0
- data/examples/12_rails_app/db/migrate/20250124000002_create_models_table.rb +28 -0
- data/examples/12_rails_app/db/schema.rb +67 -0
- data/examples/examples_helper.rb +25 -0
- data/lib/htm/circuit_breaker.rb +5 -6
- data/lib/htm/config/builder.rb +12 -12
- data/lib/htm/config/database.rb +21 -27
- data/lib/htm/config/validator.rb +12 -18
- data/lib/htm/config.rb +76 -65
- data/lib/htm/database.rb +193 -199
- data/lib/htm/embedding_service.rb +4 -9
- data/lib/htm/integrations/sinatra.rb +7 -7
- data/lib/htm/job_adapter.rb +14 -21
- data/lib/htm/jobs/generate_embedding_job.rb +28 -44
- data/lib/htm/jobs/generate_propositions_job.rb +29 -55
- data/lib/htm/jobs/generate_relationships_job.rb +137 -0
- data/lib/htm/jobs/generate_tags_job.rb +45 -67
- data/lib/htm/loaders/markdown_loader.rb +65 -112
- data/lib/htm/long_term_memory/fulltext_search.rb +1 -1
- data/lib/htm/long_term_memory/hybrid_search.rb +300 -128
- data/lib/htm/long_term_memory/node_operations.rb +2 -2
- data/lib/htm/long_term_memory/relevance_scorer.rb +100 -68
- data/lib/htm/long_term_memory/tag_operations.rb +87 -120
- data/lib/htm/long_term_memory/vector_search.rb +1 -1
- data/lib/htm/long_term_memory.rb +2 -1
- data/lib/htm/mcp/cli.rb +59 -58
- data/lib/htm/mcp/server.rb +5 -6
- data/lib/htm/mcp/tools.rb +30 -36
- data/lib/htm/migration.rb +10 -10
- data/lib/htm/models/node.rb +2 -3
- data/lib/htm/models/node_relationship.rb +72 -0
- data/lib/htm/models/node_tag.rb +2 -2
- data/lib/htm/models/robot_node.rb +2 -2
- data/lib/htm/models/tag.rb +41 -28
- data/lib/htm/observability.rb +45 -51
- data/lib/htm/proposition_service.rb +3 -7
- data/lib/htm/query_cache.rb +13 -15
- data/lib/htm/railtie.rb +1 -2
- data/lib/htm/robot_group.rb +9 -9
- data/lib/htm/sequel_config.rb +1 -0
- data/lib/htm/sql_builder.rb +1 -1
- data/lib/htm/tag_service.rb +2 -6
- data/lib/htm/timeframe.rb +4 -5
- data/lib/htm/timeframe_extractor.rb +42 -83
- data/lib/htm/version.rb +1 -1
- data/lib/htm/workflows/remember_workflow.rb +112 -115
- data/lib/htm/working_memory.rb +21 -26
- data/lib/htm.rb +103 -116
- data/lib/tasks/db.rake +0 -2
- data/lib/tasks/doc.rake +14 -13
- data/lib/tasks/files.rake +5 -12
- data/lib/tasks/htm.rake +70 -71
- data/lib/tasks/jobs.rake +41 -47
- data/lib/tasks/tags.rake +3 -8
- metadata +25 -100
|
@@ -58,21 +58,21 @@ class HTM
|
|
|
58
58
|
def calculate_relevance(node:, query_tags: [], vector_similarity: nil, node_tags: nil)
|
|
59
59
|
# 1. Vector similarity (semantic match)
|
|
60
60
|
semantic_score = if vector_similarity
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
61
|
+
vector_similarity
|
|
62
|
+
elsif node['similarity']
|
|
63
|
+
node['similarity'].to_f
|
|
64
|
+
else
|
|
65
|
+
DEFAULT_NEUTRAL_SCORE # Neutral if no embedding
|
|
66
|
+
end
|
|
67
67
|
|
|
68
68
|
# 2. Tag overlap (categorical relevance)
|
|
69
69
|
# Use pre-loaded tags if provided, otherwise fetch (for backward compatibility)
|
|
70
70
|
node_tags ||= get_node_tags(node['id'])
|
|
71
71
|
tag_score = if query_tags.any? && node_tags.any?
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
72
|
+
weighted_hierarchical_jaccard(query_tags, node_tags)
|
|
73
|
+
else
|
|
74
|
+
DEFAULT_NEUTRAL_SCORE # Neutral if no tags
|
|
75
|
+
end
|
|
76
76
|
|
|
77
77
|
# 3. Recency (temporal relevance) - exponential decay with half-life
|
|
78
78
|
age_hours = (Time.now - Time.parse(node['created_at'].to_s)) / 3600.0
|
|
@@ -108,16 +108,22 @@ class HTM
|
|
|
108
108
|
def search_with_relevance(timeframe:, query: nil, query_tags: [], limit: 20, embedding_service: nil, metadata: {})
|
|
109
109
|
# Get candidates from appropriate search method
|
|
110
110
|
candidates = if query && embedding_service
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
111
|
+
# Vector search (returns hashes directly)
|
|
112
|
+
search_uncached(timeframe: timeframe, query: query, limit: limit * 2, embedding_service: embedding_service,
|
|
113
|
+
metadata: metadata)
|
|
114
|
+
elsif query
|
|
115
|
+
# Full-text search (returns hashes directly)
|
|
116
|
+
search_fulltext_uncached(timeframe: timeframe, query: query, limit: limit * 2, metadata: metadata)
|
|
117
|
+
else
|
|
118
|
+
# Time-range only - use raw SQL to avoid ORM object instantiation
|
|
119
|
+
# This is more efficient than .map(&:attributes) which creates intermediate objects
|
|
120
|
+
fetch_candidates_by_timeframe(timeframe: timeframe, metadata: metadata, limit: limit * 2)
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
# Normalize similarity and text_rank to [0,1] across all candidates
|
|
124
|
+
# before scoring so weighted sum is unbiased (ts_rank is unbounded,
|
|
125
|
+
# similarity is already [0,1] but may be narrow)
|
|
126
|
+
normalize_scores_batch(candidates)
|
|
121
127
|
|
|
122
128
|
# Batch load all tags for candidates (fixes N+1 query)
|
|
123
129
|
node_ids = candidates.map { |n| n['id'] }
|
|
@@ -183,62 +189,49 @@ class HTM
|
|
|
183
189
|
def search_by_tags(tags:, match_all: false, timeframe: nil, limit: 20)
|
|
184
190
|
return [] if tags.empty?
|
|
185
191
|
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
Sequel[:nodes][:id],
|
|
190
|
-
Sequel[:nodes][:content],
|
|
191
|
-
Sequel[:nodes][:access_count],
|
|
192
|
-
Sequel[:nodes][:created_at],
|
|
193
|
-
Sequel[:nodes][:token_count]
|
|
194
|
-
)
|
|
195
|
-
.join(:node_tags, node_id: :id)
|
|
196
|
-
.join(:tags, id: Sequel[:node_tags][:tag_id])
|
|
197
|
-
.where(Sequel[:tags][:name] => tags)
|
|
198
|
-
.distinct
|
|
199
|
-
|
|
200
|
-
# Apply timeframe filter if provided
|
|
201
|
-
query = query.where(Sequel[:nodes][:created_at] => timeframe) if timeframe
|
|
202
|
-
|
|
203
|
-
if match_all
|
|
204
|
-
# Match ALL tags (intersection)
|
|
205
|
-
query = query
|
|
206
|
-
.group(Sequel[:nodes][:id])
|
|
207
|
-
.having { Sequel.function(:count, Sequel[:tags][:name].distinct) =~ tags.size }
|
|
208
|
-
end
|
|
192
|
+
nodes = fetch_nodes_by_tags(tags, match_all: match_all, timeframe: timeframe, limit: limit)
|
|
193
|
+
enrich_nodes_with_relevance(nodes, query_tags: tags)
|
|
194
|
+
end
|
|
209
195
|
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
'token_count' => row[:token_count]
|
|
218
|
-
}
|
|
196
|
+
private
|
|
197
|
+
|
|
198
|
+
def fetch_nodes_by_tags(tags, match_all:, timeframe:, limit:)
|
|
199
|
+
query = build_tag_base_query(tags, timeframe)
|
|
200
|
+
query = apply_match_all_constraint(query, tags) if match_all
|
|
201
|
+
query.limit(limit).all.map do |row|
|
|
202
|
+
{ 'id' => row[:id], 'content' => row[:content],
|
|
203
|
+
'access_count' => row[:access_count], 'created_at' => row[:created_at], 'token_count' => row[:token_count] }
|
|
219
204
|
end
|
|
205
|
+
end
|
|
220
206
|
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
207
|
+
def build_tag_base_query(tags, timeframe)
|
|
208
|
+
cols = [Sequel[:nodes][:id], Sequel[:nodes][:content], Sequel[:nodes][:access_count],
|
|
209
|
+
Sequel[:nodes][:created_at], Sequel[:nodes][:token_count]]
|
|
210
|
+
query = HTM::Models::Node
|
|
211
|
+
.select(*cols)
|
|
212
|
+
.join(:node_tags, node_id: :id)
|
|
213
|
+
.join(:tags, id: Sequel[:node_tags][:tag_id])
|
|
214
|
+
.where(Sequel[:tags][:name] => tags)
|
|
215
|
+
.distinct
|
|
216
|
+
timeframe ? query.where(Sequel[:nodes][:created_at] => timeframe) : query
|
|
217
|
+
end
|
|
224
218
|
|
|
225
|
-
|
|
226
|
-
nodes
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
node: node,
|
|
230
|
-
query_tags: tags,
|
|
231
|
-
node_tags: node_tags
|
|
232
|
-
)
|
|
219
|
+
def apply_match_all_constraint(query, tags)
|
|
220
|
+
query.group(Sequel[:nodes][:id])
|
|
221
|
+
.having { Sequel.function(:count, Sequel[:tags][:name].distinct) =~ tags.size }
|
|
222
|
+
end
|
|
233
223
|
|
|
234
|
-
|
|
224
|
+
def enrich_nodes_with_relevance(nodes, query_tags:)
|
|
225
|
+
tags_by_node = batch_load_node_tags(nodes.map { |n| n['id'] })
|
|
226
|
+
enriched = nodes.map do |node|
|
|
227
|
+
node_tags = tags_by_node[node['id']] || []
|
|
228
|
+
node['relevance'] = calculate_relevance(node: node, query_tags: query_tags, node_tags: node_tags)
|
|
235
229
|
node['tags'] = node_tags
|
|
236
230
|
node
|
|
237
|
-
end
|
|
231
|
+
end
|
|
232
|
+
enriched.sort_by { |n| -n['relevance'] }
|
|
238
233
|
end
|
|
239
234
|
|
|
240
|
-
private
|
|
241
|
-
|
|
242
235
|
# Calculate Jaccard similarity between two sets
|
|
243
236
|
#
|
|
244
237
|
# @param set_a [Array] First set
|
|
@@ -309,7 +302,7 @@ class HTM
|
|
|
309
302
|
end
|
|
310
303
|
end
|
|
311
304
|
|
|
312
|
-
total_weights
|
|
305
|
+
total_weights.positive? ? total_weighted_similarity / total_weights : 0.0
|
|
313
306
|
end
|
|
314
307
|
|
|
315
308
|
# Calculate similarity between two pre-split hierarchical tags
|
|
@@ -341,6 +334,45 @@ class HTM
|
|
|
341
334
|
[similarity, depth_weight]
|
|
342
335
|
end
|
|
343
336
|
|
|
337
|
+
# Min-max normalize signal columns across all candidates to [0, 1]
|
|
338
|
+
#
|
|
339
|
+
# Normalizes 'similarity' and 'text_rank' in-place so the weighted
|
|
340
|
+
# composite in calculate_relevance is not biased by different scales
|
|
341
|
+
# (ts_rank is unbounded, similarity is [0,1]).
|
|
342
|
+
#
|
|
343
|
+
# Handles edge cases:
|
|
344
|
+
# - Single element: no-op (already effectively normalized)
|
|
345
|
+
# - All-same values: maps to 1.0 (avoids division by zero)
|
|
346
|
+
# - Missing keys: skips normalization for that signal
|
|
347
|
+
#
|
|
348
|
+
# @param candidates [Array<Hash>] Candidate nodes (modified in-place)
|
|
349
|
+
# @return [Array<Hash>] Same array, normalized
|
|
350
|
+
#
|
|
351
|
+
def normalize_scores_batch(candidates)
|
|
352
|
+
return candidates if candidates.size <= 1
|
|
353
|
+
|
|
354
|
+
%w[similarity text_rank].each do |key|
|
|
355
|
+
values = candidates.filter_map { |c| c[key]&.to_f }
|
|
356
|
+
next if values.empty?
|
|
357
|
+
|
|
358
|
+
min_val = values.min
|
|
359
|
+
max_val = values.max
|
|
360
|
+
range = max_val - min_val
|
|
361
|
+
|
|
362
|
+
candidates.each do |c|
|
|
363
|
+
next unless c.key?(key) && c[key]
|
|
364
|
+
|
|
365
|
+
c[key] = if range.zero?
|
|
366
|
+
1.0
|
|
367
|
+
else
|
|
368
|
+
(c[key].to_f - min_val) / range
|
|
369
|
+
end
|
|
370
|
+
end
|
|
371
|
+
end
|
|
372
|
+
|
|
373
|
+
candidates
|
|
374
|
+
end
|
|
375
|
+
|
|
344
376
|
# Calculate similarity between two hierarchical tags (string version)
|
|
345
377
|
#
|
|
346
378
|
# Compares tags level by level, returning both similarity and a weight
|
|
@@ -78,45 +78,10 @@ class HTM
|
|
|
78
78
|
# - default - LIKE prefix match (e.g., "database" matches "database:postgresql")
|
|
79
79
|
#
|
|
80
80
|
def nodes_by_topic(topic_path, exact: false, fuzzy: false, min_similarity: DEFAULT_TAG_SIMILARITY_THRESHOLD, limit: 50)
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
# Build base query with joins
|
|
85
|
-
# Use subquery with DISTINCT ON to get unique nodes by id
|
|
86
|
-
if exact
|
|
87
|
-
node_ids = HTM::Models::Node
|
|
88
|
-
.select(Sequel[:nodes][:id])
|
|
89
|
-
.join(:node_tags, node_id: :id)
|
|
90
|
-
.join(:tags, id: Sequel[:node_tags][:tag_id])
|
|
91
|
-
.where(Sequel[:tags][:name] => topic_path)
|
|
92
|
-
.distinct
|
|
93
|
-
.select_map(Sequel[:nodes][:id])
|
|
94
|
-
elsif fuzzy
|
|
95
|
-
# Trigram similarity search - tolerates typos and partial matches
|
|
96
|
-
safe_similarity = [[min_similarity.to_f, 0.0].max, 1.0].min
|
|
97
|
-
node_ids = HTM::Models::Node
|
|
98
|
-
.select(Sequel[:nodes][:id])
|
|
99
|
-
.join(:node_tags, node_id: :id)
|
|
100
|
-
.join(:tags, id: Sequel[:node_tags][:tag_id])
|
|
101
|
-
.where(Sequel.lit("similarity(tags.name, ?) >= ?", topic_path, safe_similarity))
|
|
102
|
-
.distinct
|
|
103
|
-
.select_map(Sequel[:nodes][:id])
|
|
104
|
-
else
|
|
105
|
-
# Sanitize LIKE pattern to prevent wildcard injection
|
|
106
|
-
safe_pattern = HTM::SqlBuilder.sanitize_like_pattern(topic_path)
|
|
107
|
-
node_ids = HTM::Models::Node
|
|
108
|
-
.select(Sequel[:nodes][:id])
|
|
109
|
-
.join(:node_tags, node_id: :id)
|
|
110
|
-
.join(:tags, id: Sequel[:node_tags][:tag_id])
|
|
111
|
-
.where(Sequel.like(Sequel[:tags][:name], "#{safe_pattern}%"))
|
|
112
|
-
.distinct
|
|
113
|
-
.select_map(Sequel[:nodes][:id])
|
|
114
|
-
end
|
|
115
|
-
|
|
116
|
-
# Return empty array if no node_ids found
|
|
81
|
+
safe_limit = limit.to_i.clamp(1, MAX_TAG_QUERY_LIMIT)
|
|
82
|
+
node_ids = node_ids_for_topic(topic_path, exact: exact, fuzzy: fuzzy, min_similarity: min_similarity)
|
|
117
83
|
return [] if node_ids.empty?
|
|
118
84
|
|
|
119
|
-
# Fetch full node records for the matching ids
|
|
120
85
|
HTM::Models::Node
|
|
121
86
|
.where(id: node_ids)
|
|
122
87
|
.order(Sequel.desc(:created_at))
|
|
@@ -143,7 +108,7 @@ class HTM
|
|
|
143
108
|
#
|
|
144
109
|
def topic_relationships(min_shared_nodes: 2, limit: 50)
|
|
145
110
|
# Enforce limit to prevent DoS
|
|
146
|
-
safe_limit =
|
|
111
|
+
safe_limit = limit.to_i.clamp(1, MAX_TAG_QUERY_LIMIT)
|
|
147
112
|
safe_min = [min_shared_nodes.to_i, 1].max
|
|
148
113
|
|
|
149
114
|
sql = <<~SQL
|
|
@@ -200,9 +165,9 @@ class HTM
|
|
|
200
165
|
|
|
201
166
|
# Single query to get all tags for all nodes
|
|
202
167
|
results = HTM::Models::NodeTag
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
168
|
+
.join(:tags, id: :tag_id)
|
|
169
|
+
.where(node_id: node_ids)
|
|
170
|
+
.select_map([:node_id, Sequel[:tags][:name]])
|
|
206
171
|
|
|
207
172
|
# Group by node_id
|
|
208
173
|
results.group_by(&:first).transform_values { |pairs| pairs.map(&:last) }
|
|
@@ -218,25 +183,11 @@ class HTM
|
|
|
218
183
|
# @return [Array<Hash>] Tags with usage counts
|
|
219
184
|
#
|
|
220
185
|
def popular_tags(limit: 20, timeframe: nil)
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
query
|
|
225
|
-
|
|
226
|
-
.join(:nodes, id: Sequel[:node_tags][:node_id])
|
|
227
|
-
.group(Sequel[:tags][:id], Sequel[:tags][:name])
|
|
228
|
-
.select(Sequel[:tags][:name], Sequel.function(:count, Sequel[:node_tags][:id]).as(:usage_count))
|
|
229
|
-
|
|
230
|
-
if timeframe
|
|
231
|
-
query = query.where(Sequel[:nodes][:created_at] >= timeframe.begin)
|
|
232
|
-
.where(Sequel[:nodes][:created_at] <= timeframe.end)
|
|
233
|
-
end
|
|
234
|
-
|
|
235
|
-
query
|
|
236
|
-
.order(Sequel.desc(:usage_count))
|
|
237
|
-
.limit(safe_limit)
|
|
238
|
-
.all
|
|
239
|
-
.map { |tag| { name: tag[:name], usage_count: tag[:usage_count].to_i } }
|
|
186
|
+
safe_limit = limit.to_i.clamp(1, MAX_TAG_QUERY_LIMIT)
|
|
187
|
+
query = base_popular_tags_query
|
|
188
|
+
query = filter_by_timeframe(query, timeframe) if timeframe
|
|
189
|
+
query.order(Sequel.desc(:usage_count)).limit(safe_limit).all
|
|
190
|
+
.map { |tag| { name: tag[:name], usage_count: tag[:usage_count].to_i } }
|
|
240
191
|
end
|
|
241
192
|
|
|
242
193
|
# Fuzzy search for tags using trigram similarity
|
|
@@ -254,8 +205,8 @@ class HTM
|
|
|
254
205
|
return [] if query.nil? || query.strip.empty?
|
|
255
206
|
|
|
256
207
|
# Enforce limits
|
|
257
|
-
safe_limit =
|
|
258
|
-
safe_similarity =
|
|
208
|
+
safe_limit = limit.to_i.clamp(1, MAX_TAG_QUERY_LIMIT)
|
|
209
|
+
safe_similarity = min_similarity.to_f.clamp(0.0, 1.0)
|
|
259
210
|
|
|
260
211
|
sql = <<~SQL
|
|
261
212
|
SELECT name, similarity(name, ?) as similarity
|
|
@@ -266,8 +217,8 @@ class HTM
|
|
|
266
217
|
SQL
|
|
267
218
|
|
|
268
219
|
HTM.db.fetch(sql, query, query, safe_similarity, safe_limit)
|
|
269
|
-
|
|
270
|
-
|
|
220
|
+
.all
|
|
221
|
+
.map { |r| { name: r[:name], similarity: r[:similarity].to_f } }
|
|
271
222
|
rescue Sequel::Error => e
|
|
272
223
|
HTM.logger.error("Failed to search tags: #{e.message}")
|
|
273
224
|
[]
|
|
@@ -366,76 +317,92 @@ class HTM
|
|
|
366
317
|
# @param min_similarity [Float] Minimum similarity for trigram matching
|
|
367
318
|
# @return [Array<String>] Matched tag names
|
|
368
319
|
#
|
|
369
|
-
def find_matching_tags_unified(exact_candidates:, prefix_candidates:, component_candidates:, fuzzy_fallback: true,
|
|
320
|
+
def find_matching_tags_unified(exact_candidates:, prefix_candidates:, component_candidates:, fuzzy_fallback: true,
|
|
321
|
+
min_similarity: DEFAULT_TAG_SIMILARITY_THRESHOLD)
|
|
370
322
|
return [] if exact_candidates.empty? && prefix_candidates.empty? && component_candidates.empty?
|
|
371
323
|
|
|
372
324
|
conditions = []
|
|
373
325
|
params = []
|
|
326
|
+
append_exact_conditions(conditions, params, exact_candidates)
|
|
327
|
+
append_prefix_conditions(conditions, params, prefix_candidates)
|
|
328
|
+
append_component_conditions(conditions, params, component_candidates)
|
|
329
|
+
append_trigram_conditions(conditions, params, component_candidates, min_similarity) if fuzzy_fallback && component_candidates.any?
|
|
330
|
+
return [] if conditions.empty?
|
|
374
331
|
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
# Prefix matches
|
|
384
|
-
if prefix_candidates.any?
|
|
385
|
-
placeholders = prefix_candidates.map { '?' }.join(', ')
|
|
386
|
-
conditions << "(SELECT name, 2 as priority FROM tags WHERE name IN (#{placeholders}))"
|
|
387
|
-
params.concat(prefix_candidates)
|
|
388
|
-
end
|
|
389
|
-
|
|
390
|
-
# Component matches
|
|
391
|
-
if component_candidates.any?
|
|
392
|
-
component_conditions = component_candidates.map do |_|
|
|
393
|
-
"(name = ? OR name LIKE ? OR name LIKE ? OR name LIKE ?)"
|
|
394
|
-
end
|
|
332
|
+
params << MAX_TAG_QUERY_LIMIT
|
|
333
|
+
sql = "SELECT DISTINCT name FROM (#{conditions.join(' UNION ')}) AS matches ORDER BY name LIMIT ?"
|
|
334
|
+
HTM.db.fetch(sql, *params).all.map { |r| r[:name] }
|
|
335
|
+
rescue Sequel::Error => e
|
|
336
|
+
HTM.logger.error("Failed to find matching tags: #{e.message}")
|
|
337
|
+
[]
|
|
338
|
+
end
|
|
395
339
|
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
]
|
|
404
|
-
end
|
|
340
|
+
def base_popular_tags_query
|
|
341
|
+
HTM::Models::Tag
|
|
342
|
+
.join(:node_tags, tag_id: :id)
|
|
343
|
+
.join(:nodes, id: Sequel[:node_tags][:node_id])
|
|
344
|
+
.group(Sequel[:tags][:id], Sequel[:tags][:name])
|
|
345
|
+
.select(Sequel[:tags][:name], Sequel.function(:count, Sequel[:node_tags][:id]).as(:usage_count))
|
|
346
|
+
end
|
|
405
347
|
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
348
|
+
def filter_by_timeframe(query, timeframe)
|
|
349
|
+
query
|
|
350
|
+
.where(Sequel[:nodes][:created_at] >= timeframe.begin)
|
|
351
|
+
.where(Sequel[:nodes][:created_at] <= timeframe.end)
|
|
352
|
+
end
|
|
409
353
|
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
354
|
+
def node_ids_for_topic(topic_path, exact:, fuzzy:, min_similarity:)
|
|
355
|
+
base = HTM::Models::Node
|
|
356
|
+
.select(Sequel[:nodes][:id])
|
|
357
|
+
.join(:node_tags, node_id: :id)
|
|
358
|
+
.join(:tags, id: Sequel[:node_tags][:tag_id])
|
|
359
|
+
.distinct
|
|
360
|
+
|
|
361
|
+
node_ids_dataset =
|
|
362
|
+
if exact
|
|
363
|
+
base.where(Sequel[:tags][:name] => topic_path)
|
|
364
|
+
elsif fuzzy
|
|
365
|
+
safe_sim = min_similarity.to_f.clamp(0.0, 1.0)
|
|
366
|
+
base.where(Sequel.lit("similarity(tags.name, ?) >= ?", topic_path, safe_sim))
|
|
367
|
+
else
|
|
368
|
+
safe_pattern = HTM::SqlBuilder.sanitize_like_pattern(topic_path)
|
|
369
|
+
base.where(Sequel.like(Sequel[:tags][:name], "#{safe_pattern}%"))
|
|
415
370
|
end
|
|
416
|
-
trigram_params = component_candidates.flat_map { |c| [c, safe_similarity] }
|
|
417
371
|
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
end
|
|
372
|
+
node_ids_dataset.select_map(Sequel[:nodes][:id])
|
|
373
|
+
end
|
|
421
374
|
|
|
422
|
-
|
|
375
|
+
def append_exact_conditions(conditions, params, exact_candidates)
|
|
376
|
+
return unless exact_candidates.any?
|
|
377
|
+
placeholders = exact_candidates.map { '?' }.join(', ')
|
|
378
|
+
conditions << "(SELECT name, 1 as priority FROM tags WHERE name IN (#{placeholders}))"
|
|
379
|
+
params.concat(exact_candidates)
|
|
380
|
+
end
|
|
423
381
|
|
|
424
|
-
|
|
425
|
-
|
|
382
|
+
def append_prefix_conditions(conditions, params, prefix_candidates)
|
|
383
|
+
return unless prefix_candidates.any?
|
|
384
|
+
placeholders = prefix_candidates.map { '?' }.join(', ')
|
|
385
|
+
conditions << "(SELECT name, 2 as priority FROM tags WHERE name IN (#{placeholders}))"
|
|
386
|
+
params.concat(prefix_candidates)
|
|
387
|
+
end
|
|
426
388
|
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
389
|
+
def append_component_conditions(conditions, params, component_candidates)
|
|
390
|
+
return unless component_candidates.any?
|
|
391
|
+
component_conditions = component_candidates.map { "(name = ? OR name LIKE ? OR name LIKE ? OR name LIKE ?)" }
|
|
392
|
+
component_params = component_candidates.flat_map do |component|
|
|
393
|
+
safe = HTM::SqlBuilder.sanitize_like_pattern(component)
|
|
394
|
+
[component, "#{safe}:%", "%:#{safe}", "%:#{safe}:%"]
|
|
395
|
+
end
|
|
396
|
+
conditions << "(SELECT name, 3 as priority FROM tags WHERE #{component_conditions.join(' OR ')})"
|
|
397
|
+
params.concat(component_params)
|
|
398
|
+
end
|
|
434
399
|
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
[]
|
|
400
|
+
def append_trigram_conditions(conditions, params, component_candidates, min_similarity)
|
|
401
|
+
safe_similarity = min_similarity.to_f.clamp(0.0, 1.0)
|
|
402
|
+
trigram_conditions = component_candidates.map { "similarity(name, ?) >= ?" }
|
|
403
|
+
trigram_params = component_candidates.flat_map { |c| [c, safe_similarity] }
|
|
404
|
+
conditions << "(SELECT name, 4 as priority FROM tags WHERE #{trigram_conditions.join(' OR ')})"
|
|
405
|
+
params.concat(trigram_params)
|
|
439
406
|
end
|
|
440
407
|
end
|
|
441
408
|
end
|
|
@@ -28,7 +28,7 @@ class HTM
|
|
|
28
28
|
#
|
|
29
29
|
def search(timeframe:, query:, limit:, embedding_service:, metadata: {})
|
|
30
30
|
# Enforce limit to prevent DoS
|
|
31
|
-
safe_limit =
|
|
31
|
+
safe_limit = limit.to_i.clamp(1, MAX_VECTOR_LIMIT)
|
|
32
32
|
|
|
33
33
|
start_time = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
34
34
|
result = @cache.fetch(:search, timeframe, query, safe_limit, metadata) do
|
data/lib/htm/long_term_memory.rb
CHANGED
|
@@ -86,7 +86,8 @@ class HTM
|
|
|
86
86
|
# @example Disable caching
|
|
87
87
|
# ltm = LongTermMemory.new(config, cache_size: 0)
|
|
88
88
|
#
|
|
89
|
-
def initialize(config, pool_size: nil, query_timeout: DEFAULT_QUERY_TIMEOUT, cache_size: DEFAULT_CACHE_SIZE,
|
|
89
|
+
def initialize(config, pool_size: nil, query_timeout: DEFAULT_QUERY_TIMEOUT, cache_size: DEFAULT_CACHE_SIZE,
|
|
90
|
+
cache_ttl: DEFAULT_CACHE_TTL)
|
|
90
91
|
@config = config
|
|
91
92
|
@query_timeout = query_timeout # in milliseconds
|
|
92
93
|
|