htm 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.envrc +1 -0
- data/.tbls.yml +30 -0
- data/CHANGELOG.md +30 -0
- data/SETUP.md +132 -101
- data/db/migrate/20250125000001_add_content_hash_to_nodes.rb +14 -0
- data/db/migrate/20250125000002_create_robot_nodes.rb +35 -0
- data/db/migrate/20250125000003_remove_source_and_robot_id_from_nodes.rb +28 -0
- data/db/migrate/20250126000001_create_working_memories.rb +19 -0
- data/db/migrate/20250126000002_remove_unused_columns.rb +12 -0
- data/db/schema.sql +226 -43
- data/docs/api/database.md +20 -232
- data/docs/api/embedding-service.md +1 -7
- data/docs/api/htm.md +195 -449
- data/docs/api/index.md +1 -7
- data/docs/api/long-term-memory.md +342 -590
- data/docs/architecture/adrs/001-postgresql-timescaledb.md +1 -1
- data/docs/architecture/adrs/003-ollama-embeddings.md +1 -1
- data/docs/architecture/adrs/010-redis-working-memory-rejected.md +2 -27
- data/docs/architecture/adrs/index.md +2 -13
- data/docs/architecture/hive-mind.md +165 -166
- data/docs/architecture/index.md +2 -2
- data/docs/architecture/overview.md +5 -171
- data/docs/architecture/two-tier-memory.md +1 -35
- data/docs/assets/images/adr-010-current-architecture.svg +37 -0
- data/docs/assets/images/adr-010-proposed-architecture.svg +48 -0
- data/docs/assets/images/adr-dependency-tree.svg +93 -0
- data/docs/assets/images/class-hierarchy.svg +55 -0
- data/docs/assets/images/exception-hierarchy.svg +45 -0
- data/docs/assets/images/htm-architecture-overview.svg +83 -0
- data/docs/assets/images/htm-complete-memory-flow.svg +160 -0
- data/docs/assets/images/htm-context-assembly-flow.svg +148 -0
- data/docs/assets/images/htm-eviction-process.svg +141 -0
- data/docs/assets/images/htm-memory-addition-flow.svg +138 -0
- data/docs/assets/images/htm-memory-recall-flow.svg +152 -0
- data/docs/assets/images/htm-node-states.svg +123 -0
- data/docs/assets/images/project-structure.svg +78 -0
- data/docs/assets/images/test-directory-structure.svg +38 -0
- data/{dbdoc → docs/database}/README.md +5 -3
- data/{dbdoc → docs/database}/public.node_tags.md +4 -5
- data/docs/database/public.node_tags.svg +106 -0
- data/{dbdoc → docs/database}/public.nodes.md +3 -8
- data/docs/database/public.nodes.svg +152 -0
- data/docs/database/public.robot_nodes.md +44 -0
- data/docs/database/public.robot_nodes.svg +121 -0
- data/{dbdoc → docs/database}/public.robots.md +1 -2
- data/docs/database/public.robots.svg +106 -0
- data/docs/database/public.working_memories.md +40 -0
- data/docs/database/public.working_memories.svg +112 -0
- data/{dbdoc → docs/database}/schema.json +342 -110
- data/docs/database/schema.svg +223 -0
- data/docs/development/index.md +1 -29
- data/docs/development/schema.md +84 -324
- data/docs/development/testing.md +1 -9
- data/docs/getting-started/index.md +47 -0
- data/docs/{installation.md → getting-started/installation.md} +2 -2
- data/docs/{quick-start.md → getting-started/quick-start.md} +5 -5
- data/docs/guides/adding-memories.md +221 -655
- data/docs/guides/search-strategies.md +85 -51
- data/docs/images/htm-er-diagram.svg +156 -0
- data/docs/index.md +16 -31
- data/docs/multi_framework_support.md +4 -4
- data/examples/basic_usage.rb +18 -16
- data/examples/cli_app/htm_cli.rb +86 -8
- data/examples/custom_llm_configuration.rb +1 -2
- data/examples/example_app/app.rb +11 -14
- data/examples/sinatra_app/Gemfile +1 -0
- data/examples/sinatra_app/Gemfile.lock +166 -0
- data/examples/sinatra_app/app.rb +219 -24
- data/lib/htm/active_record_config.rb +10 -3
- data/lib/htm/configuration.rb +265 -78
- data/lib/htm/{sinatra.rb → integrations/sinatra.rb} +87 -12
- data/lib/htm/job_adapter.rb +10 -3
- data/lib/htm/long_term_memory.rb +220 -57
- data/lib/htm/models/node.rb +36 -7
- data/lib/htm/models/robot.rb +30 -4
- data/lib/htm/models/robot_node.rb +50 -0
- data/lib/htm/models/tag.rb +52 -0
- data/lib/htm/models/working_memory_entry.rb +88 -0
- data/lib/htm/tasks.rb +4 -0
- data/lib/htm/version.rb +1 -1
- data/lib/htm.rb +34 -13
- data/lib/tasks/htm.rake +32 -1
- data/lib/tasks/jobs.rake +7 -3
- data/lib/tasks/tags.rake +34 -0
- data/mkdocs.yml +56 -9
- metadata +61 -31
- data/dbdoc/public.node_tags.svg +0 -112
- data/dbdoc/public.nodes.svg +0 -118
- data/dbdoc/public.robots.svg +0 -90
- data/dbdoc/schema.svg +0 -154
- /data/{dbdoc → docs/database}/public.node_stats.md +0 -0
- /data/{dbdoc → docs/database}/public.node_stats.svg +0 -0
- /data/{dbdoc → docs/database}/public.nodes_tags.md +0 -0
- /data/{dbdoc → docs/database}/public.nodes_tags.svg +0 -0
- /data/{dbdoc → docs/database}/public.ontology_structure.md +0 -0
- /data/{dbdoc → docs/database}/public.ontology_structure.svg +0 -0
- /data/{dbdoc → docs/database}/public.operations_log.md +0 -0
- /data/{dbdoc → docs/database}/public.operations_log.svg +0 -0
- /data/{dbdoc → docs/database}/public.relationships.md +0 -0
- /data/{dbdoc → docs/database}/public.relationships.svg +0 -0
- /data/{dbdoc → docs/database}/public.robot_activity.md +0 -0
- /data/{dbdoc → docs/database}/public.robot_activity.svg +0 -0
- /data/{dbdoc → docs/database}/public.schema_migrations.md +0 -0
- /data/{dbdoc → docs/database}/public.schema_migrations.svg +0 -0
- /data/{dbdoc → docs/database}/public.tags.md +0 -0
- /data/{dbdoc → docs/database}/public.tags.svg +0 -0
- /data/{dbdoc → docs/database}/public.topic_relationships.md +0 -0
- /data/{dbdoc → docs/database}/public.topic_relationships.svg +0 -0
data/lib/htm/long_term_memory.rb
CHANGED
|
@@ -39,44 +39,96 @@ class HTM
|
|
|
39
39
|
end
|
|
40
40
|
end
|
|
41
41
|
|
|
42
|
-
# Add a node to long-term memory
|
|
42
|
+
# Add a node to long-term memory (with deduplication)
|
|
43
43
|
#
|
|
44
|
-
#
|
|
44
|
+
# If content already exists (by content_hash), links the robot to the existing
|
|
45
|
+
# node and updates timestamps. Otherwise creates a new node.
|
|
45
46
|
#
|
|
46
47
|
# @param content [String] Conversation message/utterance
|
|
47
|
-
# @param speaker [String] Who said it: 'user' or robot name
|
|
48
48
|
# @param token_count [Integer] Token count
|
|
49
|
-
# @param robot_id [
|
|
49
|
+
# @param robot_id [Integer] Robot identifier
|
|
50
50
|
# @param embedding [Array<Float>, nil] Pre-generated embedding vector
|
|
51
|
-
# @return [
|
|
52
|
-
#
|
|
53
|
-
def add(content:,
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
51
|
+
# @return [Hash] { node_id:, is_new:, robot_node: }
|
|
52
|
+
#
|
|
53
|
+
def add(content:, token_count: 0, robot_id:, embedding: nil)
|
|
54
|
+
content_hash = HTM::Models::Node.generate_content_hash(content)
|
|
55
|
+
|
|
56
|
+
# Check for existing node with same content
|
|
57
|
+
existing_node = HTM::Models::Node.find_by(content_hash: content_hash)
|
|
58
|
+
|
|
59
|
+
if existing_node
|
|
60
|
+
# Link robot to existing node (or update if already linked)
|
|
61
|
+
robot_node = link_robot_to_node(robot_id: robot_id, node: existing_node)
|
|
62
|
+
|
|
63
|
+
# Update the node's updated_at timestamp
|
|
64
|
+
existing_node.touch
|
|
65
|
+
|
|
66
|
+
{
|
|
67
|
+
node_id: existing_node.id,
|
|
68
|
+
is_new: false,
|
|
69
|
+
robot_node: robot_node
|
|
70
|
+
}
|
|
71
|
+
else
|
|
72
|
+
# Prepare embedding if provided
|
|
73
|
+
embedding_str = nil
|
|
74
|
+
if embedding
|
|
75
|
+
# Pad embedding to 2000 dimensions if needed
|
|
76
|
+
actual_dimension = embedding.length
|
|
77
|
+
padded_embedding = if actual_dimension < 2000
|
|
78
|
+
embedding + Array.new(2000 - actual_dimension, 0.0)
|
|
79
|
+
else
|
|
80
|
+
embedding
|
|
81
|
+
end
|
|
82
|
+
embedding_str = "[#{padded_embedding.join(',')}]"
|
|
62
83
|
end
|
|
63
|
-
|
|
84
|
+
|
|
85
|
+
# Create new node
|
|
86
|
+
node = HTM::Models::Node.create!(
|
|
87
|
+
content: content,
|
|
88
|
+
content_hash: content_hash,
|
|
89
|
+
token_count: token_count,
|
|
90
|
+
embedding: embedding_str,
|
|
91
|
+
embedding_dimension: embedding&.length
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
# Link robot to new node
|
|
95
|
+
robot_node = link_robot_to_node(robot_id: robot_id, node: node)
|
|
96
|
+
|
|
97
|
+
# Invalidate cache since database content changed
|
|
98
|
+
invalidate_cache!
|
|
99
|
+
|
|
100
|
+
{
|
|
101
|
+
node_id: node.id,
|
|
102
|
+
is_new: true,
|
|
103
|
+
robot_node: robot_node
|
|
104
|
+
}
|
|
64
105
|
end
|
|
106
|
+
end
|
|
65
107
|
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
)
|
|
108
|
+
# Link a robot to a node (create or update robot_node record)
|
|
109
|
+
#
|
|
110
|
+
# @param robot_id [Integer] Robot ID
|
|
111
|
+
# @param node [HTM::Models::Node] Node to link
|
|
112
|
+
# @return [HTM::Models::RobotNode] The robot_node link record
|
|
113
|
+
#
|
|
114
|
+
def link_robot_to_node(robot_id:, node:)
|
|
115
|
+
robot_node = HTM::Models::RobotNode.find_by(robot_id: robot_id, node_id: node.id)
|
|
75
116
|
|
|
76
|
-
|
|
77
|
-
|
|
117
|
+
if robot_node
|
|
118
|
+
# Existing link - record that robot remembered this again
|
|
119
|
+
robot_node.record_remember!
|
|
120
|
+
else
|
|
121
|
+
# New link
|
|
122
|
+
robot_node = HTM::Models::RobotNode.create!(
|
|
123
|
+
robot_id: robot_id,
|
|
124
|
+
node_id: node.id,
|
|
125
|
+
first_remembered_at: Time.current,
|
|
126
|
+
last_remembered_at: Time.current,
|
|
127
|
+
remember_count: 1
|
|
128
|
+
)
|
|
129
|
+
end
|
|
78
130
|
|
|
79
|
-
|
|
131
|
+
robot_node
|
|
80
132
|
end
|
|
81
133
|
|
|
82
134
|
# Retrieve a node by ID
|
|
@@ -240,13 +292,15 @@ class HTM
|
|
|
240
292
|
|
|
241
293
|
# Mark nodes as evicted from working memory
|
|
242
294
|
#
|
|
243
|
-
#
|
|
295
|
+
# Working memory state is now tracked per-robot in the working_memories table
|
|
296
|
+
# (optional persistence). The in-memory WorkingMemory class handles eviction
|
|
297
|
+
# tracking. This method is retained for API compatibility but is a no-op.
|
|
298
|
+
#
|
|
299
|
+
# @param node_ids [Array<Integer>] Node IDs (ignored)
|
|
244
300
|
# @return [void]
|
|
245
301
|
#
|
|
246
302
|
def mark_evicted(node_ids)
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
HTM::Models::Node.where(id: node_ids).update_all(in_working_memory: false)
|
|
303
|
+
# No-op: working memory is tracked in-memory or via WorkingMemoryEntry model
|
|
250
304
|
end
|
|
251
305
|
|
|
252
306
|
# Track access for multiple nodes (bulk operation)
|
|
@@ -294,7 +348,7 @@ class HTM
|
|
|
294
348
|
def stats
|
|
295
349
|
base_stats = {
|
|
296
350
|
total_nodes: HTM::Models::Node.count,
|
|
297
|
-
nodes_by_robot: HTM::Models::
|
|
351
|
+
nodes_by_robot: HTM::Models::RobotNode.group(:robot_id).count,
|
|
298
352
|
total_tags: HTM::Models::Tag.count,
|
|
299
353
|
oldest_memory: HTM::Models::Node.minimum(:created_at),
|
|
300
354
|
newest_memory: HTM::Models::Node.maximum(:created_at),
|
|
@@ -574,6 +628,32 @@ class HTM
|
|
|
574
628
|
.map { |tag| { name: tag.name, usage_count: tag.usage_count } }
|
|
575
629
|
end
|
|
576
630
|
|
|
631
|
+
# Find tags that match terms in the query
|
|
632
|
+
#
|
|
633
|
+
# Searches the tags table for tags where any hierarchy level matches
|
|
634
|
+
# query words. For example, query "PostgreSQL database" would match
|
|
635
|
+
# tags like "database:postgresql", "database:sql", etc.
|
|
636
|
+
#
|
|
637
|
+
# @param query [String] Search query
|
|
638
|
+
# @return [Array<String>] Matching tag names
|
|
639
|
+
#
|
|
640
|
+
def find_query_matching_tags(query)
|
|
641
|
+
return [] if query.nil? || query.strip.empty?
|
|
642
|
+
|
|
643
|
+
# Extract words from query (lowercase, 3+ chars)
|
|
644
|
+
words = query.downcase.split(/\s+/).select { |w| w.length >= 3 }
|
|
645
|
+
return [] if words.empty?
|
|
646
|
+
|
|
647
|
+
# Build LIKE conditions for each word
|
|
648
|
+
# Match tags where any part of the hierarchy contains the word
|
|
649
|
+
conditions = words.map { |w| "name ILIKE ?" }
|
|
650
|
+
values = words.map { |w| "%#{w}%" }
|
|
651
|
+
|
|
652
|
+
HTM::Models::Tag
|
|
653
|
+
.where(conditions.join(' OR '), *values)
|
|
654
|
+
.pluck(:name)
|
|
655
|
+
end
|
|
656
|
+
|
|
577
657
|
private
|
|
578
658
|
|
|
579
659
|
# Generate cache key for query
|
|
@@ -682,7 +762,7 @@ class HTM
|
|
|
682
762
|
|
|
683
763
|
result = ActiveRecord::Base.connection.select_all(
|
|
684
764
|
<<~SQL,
|
|
685
|
-
SELECT id, content,
|
|
765
|
+
SELECT id, content, access_count, created_at, token_count,
|
|
686
766
|
1 - (embedding <=> '#{embedding_str}'::vector) as similarity
|
|
687
767
|
FROM nodes
|
|
688
768
|
WHERE created_at BETWEEN '#{timeframe.begin.iso8601}' AND '#{timeframe.end.iso8601}'
|
|
@@ -710,7 +790,7 @@ class HTM
|
|
|
710
790
|
result = ActiveRecord::Base.connection.select_all(
|
|
711
791
|
ActiveRecord::Base.sanitize_sql_array([
|
|
712
792
|
<<~SQL,
|
|
713
|
-
SELECT id, content,
|
|
793
|
+
SELECT id, content, access_count, created_at, token_count,
|
|
714
794
|
ts_rank(to_tsvector('english', content), plainto_tsquery('english', ?)) as rank
|
|
715
795
|
FROM nodes
|
|
716
796
|
WHERE created_at BETWEEN ? AND ?
|
|
@@ -731,15 +811,17 @@ class HTM
|
|
|
731
811
|
|
|
732
812
|
# Uncached hybrid search
|
|
733
813
|
#
|
|
734
|
-
# Generates query embedding client-side, then combines
|
|
735
|
-
#
|
|
814
|
+
# Generates query embedding client-side, then combines:
|
|
815
|
+
# 1. Full-text search for content matching
|
|
816
|
+
# 2. Tag matching for categorical relevance
|
|
817
|
+
# 3. Vector similarity for semantic ranking
|
|
736
818
|
#
|
|
737
819
|
# @param timeframe [Range] Time range to search
|
|
738
820
|
# @param query [String] Search query
|
|
739
821
|
# @param limit [Integer] Maximum results
|
|
740
822
|
# @param embedding_service [Object] Service to generate query embedding
|
|
741
823
|
# @param prefilter_limit [Integer] Candidates to consider
|
|
742
|
-
# @return [Array<Hash>] Matching nodes
|
|
824
|
+
# @return [Array<Hash>] Matching nodes with similarity and tag_boost scores
|
|
743
825
|
#
|
|
744
826
|
def search_hybrid_uncached(timeframe:, query:, limit:, embedding_service:, prefilter_limit:)
|
|
745
827
|
# Generate query embedding client-side
|
|
@@ -753,26 +835,107 @@ class HTM
|
|
|
753
835
|
# Convert to PostgreSQL vector format
|
|
754
836
|
embedding_str = "[#{query_embedding.join(',')}]"
|
|
755
837
|
|
|
756
|
-
|
|
757
|
-
|
|
758
|
-
|
|
759
|
-
|
|
760
|
-
|
|
761
|
-
|
|
762
|
-
|
|
763
|
-
|
|
764
|
-
|
|
838
|
+
# Find tags that match query terms
|
|
839
|
+
matching_tags = find_query_matching_tags(query)
|
|
840
|
+
|
|
841
|
+
# Build the hybrid query
|
|
842
|
+
# If we have matching tags, include nodes with those tags in the candidate pool
|
|
843
|
+
# NOTE: Hybrid search includes nodes without embeddings using a default
|
|
844
|
+
# similarity score of 0.5. This allows newly created nodes to appear in
|
|
845
|
+
# search results immediately (via fulltext matching) before their embeddings
|
|
846
|
+
# are generated by background jobs. Useful for demos with short timeframes
|
|
847
|
+
# (seconds) where async embedding generation hasn't completed yet.
|
|
848
|
+
# In production with longer timeframes, embeddings are typically ready
|
|
849
|
+
# within 1-5 seconds, so this fallback is rarely used.
|
|
850
|
+
|
|
851
|
+
if matching_tags.any?
|
|
852
|
+
# Escape tag names for SQL
|
|
853
|
+
tag_list = matching_tags.map { |t| ActiveRecord::Base.connection.quote(t) }.join(', ')
|
|
854
|
+
result = ActiveRecord::Base.connection.select_all(
|
|
855
|
+
ActiveRecord::Base.sanitize_sql_array([
|
|
856
|
+
<<~SQL,
|
|
857
|
+
WITH fulltext_candidates AS (
|
|
858
|
+
-- Nodes matching full-text search (with or without embeddings)
|
|
859
|
+
SELECT DISTINCT n.id, n.content, n.access_count, n.created_at, n.token_count, n.embedding
|
|
860
|
+
FROM nodes n
|
|
861
|
+
WHERE n.created_at BETWEEN ? AND ?
|
|
862
|
+
AND to_tsvector('english', n.content) @@ plainto_tsquery('english', ?)
|
|
863
|
+
LIMIT ?
|
|
864
|
+
),
|
|
865
|
+
tag_candidates AS (
|
|
866
|
+
-- Nodes matching relevant tags (with or without embeddings)
|
|
867
|
+
SELECT DISTINCT n.id, n.content, n.access_count, n.created_at, n.token_count, n.embedding
|
|
868
|
+
FROM nodes n
|
|
869
|
+
JOIN node_tags nt ON nt.node_id = n.id
|
|
870
|
+
JOIN tags t ON t.id = nt.tag_id
|
|
871
|
+
WHERE n.created_at BETWEEN ? AND ?
|
|
872
|
+
AND t.name IN (#{tag_list})
|
|
873
|
+
LIMIT ?
|
|
874
|
+
),
|
|
875
|
+
all_candidates AS (
|
|
876
|
+
SELECT * FROM fulltext_candidates
|
|
877
|
+
UNION
|
|
878
|
+
SELECT * FROM tag_candidates
|
|
879
|
+
),
|
|
880
|
+
scored AS (
|
|
881
|
+
SELECT
|
|
882
|
+
ac.id, ac.content, ac.access_count, ac.created_at, ac.token_count,
|
|
883
|
+
CASE
|
|
884
|
+
WHEN ac.embedding IS NOT NULL THEN 1 - (ac.embedding <=> '#{embedding_str}'::vector)
|
|
885
|
+
ELSE 0.5 -- Default similarity for nodes without embeddings
|
|
886
|
+
END as similarity,
|
|
887
|
+
COALESCE((
|
|
888
|
+
SELECT COUNT(DISTINCT t.name)::float / ?
|
|
889
|
+
FROM node_tags nt
|
|
890
|
+
JOIN tags t ON t.id = nt.tag_id
|
|
891
|
+
WHERE nt.node_id = ac.id AND t.name IN (#{tag_list})
|
|
892
|
+
), 0) as tag_boost
|
|
893
|
+
FROM all_candidates ac
|
|
894
|
+
)
|
|
895
|
+
SELECT id, content, access_count, created_at, token_count,
|
|
896
|
+
similarity, tag_boost,
|
|
897
|
+
(similarity * 0.7 + tag_boost * 0.3) as combined_score
|
|
898
|
+
FROM scored
|
|
899
|
+
ORDER BY combined_score DESC
|
|
765
900
|
LIMIT ?
|
|
766
|
-
|
|
767
|
-
|
|
768
|
-
|
|
769
|
-
|
|
770
|
-
|
|
771
|
-
|
|
772
|
-
|
|
773
|
-
|
|
774
|
-
|
|
775
|
-
|
|
901
|
+
SQL
|
|
902
|
+
timeframe.begin, timeframe.end, query, prefilter_limit,
|
|
903
|
+
timeframe.begin, timeframe.end, prefilter_limit,
|
|
904
|
+
matching_tags.length.to_f,
|
|
905
|
+
limit
|
|
906
|
+
])
|
|
907
|
+
)
|
|
908
|
+
else
|
|
909
|
+
# No matching tags, fall back to standard hybrid (fulltext + vector)
|
|
910
|
+
# Include nodes without embeddings with a default similarity score
|
|
911
|
+
result = ActiveRecord::Base.connection.select_all(
|
|
912
|
+
ActiveRecord::Base.sanitize_sql_array([
|
|
913
|
+
<<~SQL,
|
|
914
|
+
WITH candidates AS (
|
|
915
|
+
SELECT id, content, access_count, created_at, token_count, embedding
|
|
916
|
+
FROM nodes
|
|
917
|
+
WHERE created_at BETWEEN ? AND ?
|
|
918
|
+
AND to_tsvector('english', content) @@ plainto_tsquery('english', ?)
|
|
919
|
+
LIMIT ?
|
|
920
|
+
)
|
|
921
|
+
SELECT id, content, access_count, created_at, token_count,
|
|
922
|
+
CASE
|
|
923
|
+
WHEN embedding IS NOT NULL THEN 1 - (embedding <=> '#{embedding_str}'::vector)
|
|
924
|
+
ELSE 0.5 -- Default similarity for nodes without embeddings
|
|
925
|
+
END as similarity,
|
|
926
|
+
0.0 as tag_boost,
|
|
927
|
+
CASE
|
|
928
|
+
WHEN embedding IS NOT NULL THEN 1 - (embedding <=> '#{embedding_str}'::vector)
|
|
929
|
+
ELSE 0.5 -- Default score for nodes without embeddings (fulltext matched)
|
|
930
|
+
END as combined_score
|
|
931
|
+
FROM candidates
|
|
932
|
+
ORDER BY combined_score DESC
|
|
933
|
+
LIMIT ?
|
|
934
|
+
SQL
|
|
935
|
+
timeframe.begin, timeframe.end, query, prefilter_limit, limit
|
|
936
|
+
])
|
|
937
|
+
)
|
|
938
|
+
end
|
|
776
939
|
|
|
777
940
|
# Track access for retrieved nodes
|
|
778
941
|
node_ids = result.map { |r| r['id'] }
|
data/lib/htm/models/node.rb
CHANGED
|
@@ -1,9 +1,14 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require 'digest'
|
|
4
|
+
|
|
3
5
|
class HTM
|
|
4
6
|
module Models
|
|
5
7
|
# Node model - represents a memory node (conversation message)
|
|
6
8
|
#
|
|
9
|
+
# Nodes are globally unique by content (via content_hash) and can be
|
|
10
|
+
# linked to multiple robots through the robot_nodes join table.
|
|
11
|
+
#
|
|
7
12
|
# Nearest Neighbor Search (via neighbor gem):
|
|
8
13
|
# # Find 5 nearest neighbors by cosine distance
|
|
9
14
|
# neighbors = Node.nearest_neighbors(:embedding, query_vector, distance: "cosine").limit(5)
|
|
@@ -18,8 +23,9 @@ class HTM
|
|
|
18
23
|
class Node < ActiveRecord::Base
|
|
19
24
|
self.table_name = 'nodes'
|
|
20
25
|
|
|
21
|
-
# Associations
|
|
22
|
-
|
|
26
|
+
# Associations - Many-to-many with robots via robot_nodes
|
|
27
|
+
has_many :robot_nodes, class_name: 'HTM::Models::RobotNode', dependent: :destroy
|
|
28
|
+
has_many :robots, through: :robot_nodes, class_name: 'HTM::Models::Robot'
|
|
23
29
|
has_many :node_tags, class_name: 'HTM::Models::NodeTag', dependent: :destroy
|
|
24
30
|
has_many :tags, through: :node_tags, class_name: 'HTM::Models::Tag'
|
|
25
31
|
|
|
@@ -28,21 +34,41 @@ class HTM
|
|
|
28
34
|
|
|
29
35
|
# Validations
|
|
30
36
|
validates :content, presence: true
|
|
31
|
-
validates :
|
|
37
|
+
validates :content_hash, presence: true, uniqueness: true
|
|
32
38
|
validates :embedding_dimension, numericality: { greater_than: 0, less_than_or_equal_to: 2000 }, allow_nil: true
|
|
33
39
|
|
|
34
40
|
# Callbacks
|
|
41
|
+
before_validation :set_content_hash, if: -> { content_hash.blank? && content.present? }
|
|
35
42
|
before_create :set_defaults
|
|
36
43
|
before_save :update_timestamps
|
|
37
44
|
|
|
38
45
|
# Scopes
|
|
39
|
-
scope :by_robot, ->(robot_id) { where(robot_id: robot_id) }
|
|
40
|
-
scope :by_source, ->(source) { where(source: source) }
|
|
41
|
-
scope :in_working_memory, -> { where(in_working_memory: true) }
|
|
46
|
+
scope :by_robot, ->(robot_id) { joins(:robot_nodes).where(robot_nodes: { robot_id: robot_id }) }
|
|
42
47
|
scope :recent, -> { order(created_at: :desc) }
|
|
43
48
|
scope :in_timeframe, ->(start_time, end_time) { where(created_at: start_time..end_time) }
|
|
44
49
|
scope :with_embeddings, -> { where.not(embedding: nil) }
|
|
45
50
|
|
|
51
|
+
# Class methods
|
|
52
|
+
|
|
53
|
+
# Find a node by content hash, or return nil
|
|
54
|
+
#
|
|
55
|
+
# @param content [String] The content to search for
|
|
56
|
+
# @return [Node, nil] The existing node or nil
|
|
57
|
+
#
|
|
58
|
+
def self.find_by_content(content)
|
|
59
|
+
hash = generate_content_hash(content)
|
|
60
|
+
find_by(content_hash: hash)
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
# Generate SHA-256 hash for content
|
|
64
|
+
#
|
|
65
|
+
# @param content [String] Content to hash
|
|
66
|
+
# @return [String] 64-character hex hash
|
|
67
|
+
#
|
|
68
|
+
def self.generate_content_hash(content)
|
|
69
|
+
Digest::SHA256.hexdigest(content.to_s)
|
|
70
|
+
end
|
|
71
|
+
|
|
46
72
|
# Instance methods
|
|
47
73
|
|
|
48
74
|
# Find nearest neighbors to this node's embedding
|
|
@@ -94,8 +120,11 @@ class HTM
|
|
|
94
120
|
|
|
95
121
|
private
|
|
96
122
|
|
|
123
|
+
def set_content_hash
|
|
124
|
+
self.content_hash = self.class.generate_content_hash(content)
|
|
125
|
+
end
|
|
126
|
+
|
|
97
127
|
def set_defaults
|
|
98
|
-
self.in_working_memory ||= false
|
|
99
128
|
self.created_at ||= Time.current
|
|
100
129
|
self.updated_at ||= Time.current
|
|
101
130
|
self.last_accessed ||= Time.current
|
data/lib/htm/models/robot.rb
CHANGED
|
@@ -3,12 +3,18 @@
|
|
|
3
3
|
class HTM
|
|
4
4
|
module Models
|
|
5
5
|
# Robot model - represents an LLM agent using the HTM system
|
|
6
|
+
#
|
|
7
|
+
# Robots can share memories through the many-to-many relationship with nodes.
|
|
8
|
+
# When a robot is deleted, only the robot_nodes links are removed; shared
|
|
9
|
+
# nodes remain in the database for other robots.
|
|
10
|
+
#
|
|
6
11
|
class Robot < ActiveRecord::Base
|
|
7
12
|
self.table_name = 'robots'
|
|
8
13
|
|
|
9
|
-
# Associations
|
|
10
|
-
|
|
11
|
-
has_many :
|
|
14
|
+
# Associations - Many-to-many with nodes via robot_nodes
|
|
15
|
+
# dependent: :destroy removes links only, NOT the shared nodes
|
|
16
|
+
has_many :robot_nodes, class_name: 'HTM::Models::RobotNode', dependent: :destroy
|
|
17
|
+
has_many :nodes, through: :robot_nodes, class_name: 'HTM::Models::Node'
|
|
12
18
|
|
|
13
19
|
# Validations
|
|
14
20
|
validates :name, presence: true
|
|
@@ -34,10 +40,30 @@ class HTM
|
|
|
34
40
|
nodes.recent.limit(limit)
|
|
35
41
|
end
|
|
36
42
|
|
|
43
|
+
# Get nodes with their remember metadata for this robot
|
|
44
|
+
#
|
|
45
|
+
# @param limit [Integer] Max nodes to return
|
|
46
|
+
# @return [Array<Hash>] Nodes with remember_count, first/last_remembered_at
|
|
47
|
+
#
|
|
48
|
+
def nodes_with_metadata(limit = 10)
|
|
49
|
+
robot_nodes
|
|
50
|
+
.includes(:node)
|
|
51
|
+
.order(last_remembered_at: :desc)
|
|
52
|
+
.limit(limit)
|
|
53
|
+
.map do |rn|
|
|
54
|
+
{
|
|
55
|
+
node: rn.node,
|
|
56
|
+
remember_count: rn.remember_count,
|
|
57
|
+
first_remembered_at: rn.first_remembered_at,
|
|
58
|
+
last_remembered_at: rn.last_remembered_at
|
|
59
|
+
}
|
|
60
|
+
end
|
|
61
|
+
end
|
|
62
|
+
|
|
37
63
|
def memory_summary
|
|
38
64
|
{
|
|
39
65
|
total_nodes: nodes.count,
|
|
40
|
-
in_working_memory:
|
|
66
|
+
in_working_memory: HTM::Models::WorkingMemoryEntry.where(robot_id: id).count,
|
|
41
67
|
with_embeddings: nodes.with_embeddings.count
|
|
42
68
|
}
|
|
43
69
|
end
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
class HTM
|
|
4
|
+
module Models
|
|
5
|
+
# RobotNode Join Model - Links robots to nodes (many-to-many)
|
|
6
|
+
#
|
|
7
|
+
# This model represents the relationship between a robot and a node,
|
|
8
|
+
# tracking when and how many times a robot has "remembered" a piece of content.
|
|
9
|
+
#
|
|
10
|
+
# @example Find all robots that remember a node
|
|
11
|
+
# node.robots
|
|
12
|
+
#
|
|
13
|
+
# @example Find all nodes a robot remembers
|
|
14
|
+
# robot.nodes
|
|
15
|
+
#
|
|
16
|
+
# @example Track remember activity
|
|
17
|
+
# link = RobotNode.find_by(robot: robot, node: node)
|
|
18
|
+
# link.remember_count # => 3
|
|
19
|
+
# link.first_remembered_at
|
|
20
|
+
# link.last_remembered_at
|
|
21
|
+
#
|
|
22
|
+
class RobotNode < ActiveRecord::Base
|
|
23
|
+
self.table_name = 'robot_nodes'
|
|
24
|
+
|
|
25
|
+
belongs_to :robot, class_name: 'HTM::Models::Robot'
|
|
26
|
+
belongs_to :node, class_name: 'HTM::Models::Node'
|
|
27
|
+
|
|
28
|
+
validates :robot_id, presence: true
|
|
29
|
+
validates :node_id, presence: true
|
|
30
|
+
validates :robot_id, uniqueness: { scope: :node_id, message: 'already linked to this node' }
|
|
31
|
+
|
|
32
|
+
# Scopes
|
|
33
|
+
scope :recent, -> { order(last_remembered_at: :desc) }
|
|
34
|
+
scope :by_robot, ->(robot_id) { where(robot_id: robot_id) }
|
|
35
|
+
scope :by_node, ->(node_id) { where(node_id: node_id) }
|
|
36
|
+
scope :frequently_remembered, -> { where('remember_count > 1').order(remember_count: :desc) }
|
|
37
|
+
|
|
38
|
+
# Record that a robot remembered this content again
|
|
39
|
+
#
|
|
40
|
+
# @return [RobotNode] Updated record
|
|
41
|
+
#
|
|
42
|
+
def record_remember!
|
|
43
|
+
self.remember_count += 1
|
|
44
|
+
self.last_remembered_at = Time.current
|
|
45
|
+
save!
|
|
46
|
+
self
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
end
|
data/lib/htm/models/tag.rb
CHANGED
|
@@ -45,6 +45,58 @@ class HTM
|
|
|
45
45
|
find_or_create_by(name: name)
|
|
46
46
|
end
|
|
47
47
|
|
|
48
|
+
# Returns a nested hash tree structure from the current scope
|
|
49
|
+
# Example: Tag.all.tree => { "database" => { "postgresql" => {} } }
|
|
50
|
+
# Example: Tag.with_prefix("database").tree => { "database" => { "postgresql" => {} } }
|
|
51
|
+
def self.tree
|
|
52
|
+
tree = {}
|
|
53
|
+
|
|
54
|
+
all.order(:name).pluck(:name).each do |tag_name|
|
|
55
|
+
parts = tag_name.split(':')
|
|
56
|
+
current = tree
|
|
57
|
+
|
|
58
|
+
parts.each do |part|
|
|
59
|
+
current[part] ||= {}
|
|
60
|
+
current = current[part]
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
tree
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
# Returns a formatted string representation of the tag tree
|
|
68
|
+
# Uses directory-style formatting with ├── and └── characters
|
|
69
|
+
# Example: puts Tag.all.tree_string
|
|
70
|
+
# Example: puts Tag.with_prefix("database").tree_string
|
|
71
|
+
def self.tree_string
|
|
72
|
+
format_tree_branch(tree)
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
# Format a tree branch recursively (internal helper)
|
|
76
|
+
def self.format_tree_branch(node, is_last_array = [])
|
|
77
|
+
result = ''
|
|
78
|
+
sorted_keys = node.keys.sort
|
|
79
|
+
|
|
80
|
+
sorted_keys.each_with_index do |key, index|
|
|
81
|
+
is_last = (index == sorted_keys.size - 1)
|
|
82
|
+
|
|
83
|
+
# Build prefix from parent branches
|
|
84
|
+
line_prefix = is_last_array.map { |was_last| was_last ? ' ' : '│ ' }.join
|
|
85
|
+
|
|
86
|
+
# Add branch character and key
|
|
87
|
+
branch = is_last ? '└── ' : '├── '
|
|
88
|
+
result += "#{line_prefix}#{branch}#{key}\n"
|
|
89
|
+
|
|
90
|
+
# Recurse into children
|
|
91
|
+
children = node[key]
|
|
92
|
+
unless children.empty?
|
|
93
|
+
result += format_tree_branch(children, is_last_array + [is_last])
|
|
94
|
+
end
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
result
|
|
98
|
+
end
|
|
99
|
+
|
|
48
100
|
# Instance methods
|
|
49
101
|
def root_topic
|
|
50
102
|
name.split(':').first
|