htm 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (109) hide show
  1. checksums.yaml +4 -4
  2. data/.envrc +1 -0
  3. data/.tbls.yml +30 -0
  4. data/CHANGELOG.md +30 -0
  5. data/SETUP.md +132 -101
  6. data/db/migrate/20250125000001_add_content_hash_to_nodes.rb +14 -0
  7. data/db/migrate/20250125000002_create_robot_nodes.rb +35 -0
  8. data/db/migrate/20250125000003_remove_source_and_robot_id_from_nodes.rb +28 -0
  9. data/db/migrate/20250126000001_create_working_memories.rb +19 -0
  10. data/db/migrate/20250126000002_remove_unused_columns.rb +12 -0
  11. data/db/schema.sql +226 -43
  12. data/docs/api/database.md +20 -232
  13. data/docs/api/embedding-service.md +1 -7
  14. data/docs/api/htm.md +195 -449
  15. data/docs/api/index.md +1 -7
  16. data/docs/api/long-term-memory.md +342 -590
  17. data/docs/architecture/adrs/001-postgresql-timescaledb.md +1 -1
  18. data/docs/architecture/adrs/003-ollama-embeddings.md +1 -1
  19. data/docs/architecture/adrs/010-redis-working-memory-rejected.md +2 -27
  20. data/docs/architecture/adrs/index.md +2 -13
  21. data/docs/architecture/hive-mind.md +165 -166
  22. data/docs/architecture/index.md +2 -2
  23. data/docs/architecture/overview.md +5 -171
  24. data/docs/architecture/two-tier-memory.md +1 -35
  25. data/docs/assets/images/adr-010-current-architecture.svg +37 -0
  26. data/docs/assets/images/adr-010-proposed-architecture.svg +48 -0
  27. data/docs/assets/images/adr-dependency-tree.svg +93 -0
  28. data/docs/assets/images/class-hierarchy.svg +55 -0
  29. data/docs/assets/images/exception-hierarchy.svg +45 -0
  30. data/docs/assets/images/htm-architecture-overview.svg +83 -0
  31. data/docs/assets/images/htm-complete-memory-flow.svg +160 -0
  32. data/docs/assets/images/htm-context-assembly-flow.svg +148 -0
  33. data/docs/assets/images/htm-eviction-process.svg +141 -0
  34. data/docs/assets/images/htm-memory-addition-flow.svg +138 -0
  35. data/docs/assets/images/htm-memory-recall-flow.svg +152 -0
  36. data/docs/assets/images/htm-node-states.svg +123 -0
  37. data/docs/assets/images/project-structure.svg +78 -0
  38. data/docs/assets/images/test-directory-structure.svg +38 -0
  39. data/{dbdoc → docs/database}/README.md +5 -3
  40. data/{dbdoc → docs/database}/public.node_tags.md +4 -5
  41. data/docs/database/public.node_tags.svg +106 -0
  42. data/{dbdoc → docs/database}/public.nodes.md +3 -8
  43. data/docs/database/public.nodes.svg +152 -0
  44. data/docs/database/public.robot_nodes.md +44 -0
  45. data/docs/database/public.robot_nodes.svg +121 -0
  46. data/{dbdoc → docs/database}/public.robots.md +1 -2
  47. data/docs/database/public.robots.svg +106 -0
  48. data/docs/database/public.working_memories.md +40 -0
  49. data/docs/database/public.working_memories.svg +112 -0
  50. data/{dbdoc → docs/database}/schema.json +342 -110
  51. data/docs/database/schema.svg +223 -0
  52. data/docs/development/index.md +1 -29
  53. data/docs/development/schema.md +84 -324
  54. data/docs/development/testing.md +1 -9
  55. data/docs/getting-started/index.md +47 -0
  56. data/docs/{installation.md → getting-started/installation.md} +2 -2
  57. data/docs/{quick-start.md → getting-started/quick-start.md} +5 -5
  58. data/docs/guides/adding-memories.md +221 -655
  59. data/docs/guides/search-strategies.md +85 -51
  60. data/docs/images/htm-er-diagram.svg +156 -0
  61. data/docs/index.md +16 -31
  62. data/docs/multi_framework_support.md +4 -4
  63. data/examples/basic_usage.rb +18 -16
  64. data/examples/cli_app/htm_cli.rb +86 -8
  65. data/examples/custom_llm_configuration.rb +1 -2
  66. data/examples/example_app/app.rb +11 -14
  67. data/examples/sinatra_app/Gemfile +1 -0
  68. data/examples/sinatra_app/Gemfile.lock +166 -0
  69. data/examples/sinatra_app/app.rb +219 -24
  70. data/lib/htm/active_record_config.rb +10 -3
  71. data/lib/htm/configuration.rb +265 -78
  72. data/lib/htm/{sinatra.rb → integrations/sinatra.rb} +87 -12
  73. data/lib/htm/job_adapter.rb +10 -3
  74. data/lib/htm/long_term_memory.rb +220 -57
  75. data/lib/htm/models/node.rb +36 -7
  76. data/lib/htm/models/robot.rb +30 -4
  77. data/lib/htm/models/robot_node.rb +50 -0
  78. data/lib/htm/models/tag.rb +52 -0
  79. data/lib/htm/models/working_memory_entry.rb +88 -0
  80. data/lib/htm/tasks.rb +4 -0
  81. data/lib/htm/version.rb +1 -1
  82. data/lib/htm.rb +34 -13
  83. data/lib/tasks/htm.rake +32 -1
  84. data/lib/tasks/jobs.rake +7 -3
  85. data/lib/tasks/tags.rake +34 -0
  86. data/mkdocs.yml +56 -9
  87. metadata +61 -31
  88. data/dbdoc/public.node_tags.svg +0 -112
  89. data/dbdoc/public.nodes.svg +0 -118
  90. data/dbdoc/public.robots.svg +0 -90
  91. data/dbdoc/schema.svg +0 -154
  92. /data/{dbdoc → docs/database}/public.node_stats.md +0 -0
  93. /data/{dbdoc → docs/database}/public.node_stats.svg +0 -0
  94. /data/{dbdoc → docs/database}/public.nodes_tags.md +0 -0
  95. /data/{dbdoc → docs/database}/public.nodes_tags.svg +0 -0
  96. /data/{dbdoc → docs/database}/public.ontology_structure.md +0 -0
  97. /data/{dbdoc → docs/database}/public.ontology_structure.svg +0 -0
  98. /data/{dbdoc → docs/database}/public.operations_log.md +0 -0
  99. /data/{dbdoc → docs/database}/public.operations_log.svg +0 -0
  100. /data/{dbdoc → docs/database}/public.relationships.md +0 -0
  101. /data/{dbdoc → docs/database}/public.relationships.svg +0 -0
  102. /data/{dbdoc → docs/database}/public.robot_activity.md +0 -0
  103. /data/{dbdoc → docs/database}/public.robot_activity.svg +0 -0
  104. /data/{dbdoc → docs/database}/public.schema_migrations.md +0 -0
  105. /data/{dbdoc → docs/database}/public.schema_migrations.svg +0 -0
  106. /data/{dbdoc → docs/database}/public.tags.md +0 -0
  107. /data/{dbdoc → docs/database}/public.tags.svg +0 -0
  108. /data/{dbdoc → docs/database}/public.topic_relationships.md +0 -0
  109. /data/{dbdoc → docs/database}/public.topic_relationships.svg +0 -0
@@ -39,44 +39,96 @@ class HTM
39
39
  end
40
40
  end
41
41
 
42
- # Add a node to long-term memory
42
+ # Add a node to long-term memory (with deduplication)
43
43
  #
44
- # Embeddings should be generated client-side and provided via the embedding parameter.
44
+ # If content already exists (by content_hash), links the robot to the existing
45
+ # node and updates timestamps. Otherwise creates a new node.
45
46
  #
46
47
  # @param content [String] Conversation message/utterance
47
- # @param speaker [String] Who said it: 'user' or robot name
48
48
  # @param token_count [Integer] Token count
49
- # @param robot_id [String] Robot identifier
49
+ # @param robot_id [Integer] Robot identifier
50
50
  # @param embedding [Array<Float>, nil] Pre-generated embedding vector
51
- # @return [Integer] Node database ID
52
- #
53
- def add(content:, source:, token_count: 0, robot_id:, embedding: nil)
54
- # Prepare embedding if provided
55
- if embedding
56
- # Pad embedding to 2000 dimensions if needed
57
- actual_dimension = embedding.length
58
- if actual_dimension < 2000
59
- padded_embedding = embedding + Array.new(2000 - actual_dimension, 0.0)
60
- else
61
- padded_embedding = embedding
51
+ # @return [Hash] { node_id:, is_new:, robot_node: }
52
+ #
53
+ def add(content:, token_count: 0, robot_id:, embedding: nil)
54
+ content_hash = HTM::Models::Node.generate_content_hash(content)
55
+
56
+ # Check for existing node with same content
57
+ existing_node = HTM::Models::Node.find_by(content_hash: content_hash)
58
+
59
+ if existing_node
60
+ # Link robot to existing node (or update if already linked)
61
+ robot_node = link_robot_to_node(robot_id: robot_id, node: existing_node)
62
+
63
+ # Update the node's updated_at timestamp
64
+ existing_node.touch
65
+
66
+ {
67
+ node_id: existing_node.id,
68
+ is_new: false,
69
+ robot_node: robot_node
70
+ }
71
+ else
72
+ # Prepare embedding if provided
73
+ embedding_str = nil
74
+ if embedding
75
+ # Pad embedding to 2000 dimensions if needed
76
+ actual_dimension = embedding.length
77
+ padded_embedding = if actual_dimension < 2000
78
+ embedding + Array.new(2000 - actual_dimension, 0.0)
79
+ else
80
+ embedding
81
+ end
82
+ embedding_str = "[#{padded_embedding.join(',')}]"
62
83
  end
63
- embedding_str = "[#{padded_embedding.join(',')}]"
84
+
85
+ # Create new node
86
+ node = HTM::Models::Node.create!(
87
+ content: content,
88
+ content_hash: content_hash,
89
+ token_count: token_count,
90
+ embedding: embedding_str,
91
+ embedding_dimension: embedding&.length
92
+ )
93
+
94
+ # Link robot to new node
95
+ robot_node = link_robot_to_node(robot_id: robot_id, node: node)
96
+
97
+ # Invalidate cache since database content changed
98
+ invalidate_cache!
99
+
100
+ {
101
+ node_id: node.id,
102
+ is_new: true,
103
+ robot_node: robot_node
104
+ }
64
105
  end
106
+ end
65
107
 
66
- # Create node using ActiveRecord
67
- node = HTM::Models::Node.create!(
68
- content: content,
69
- source: source,
70
- token_count: token_count,
71
- robot_id: robot_id,
72
- embedding: embedding ? embedding_str : nil,
73
- embedding_dimension: embedding ? embedding.length : nil
74
- )
108
+ # Link a robot to a node (create or update robot_node record)
109
+ #
110
+ # @param robot_id [Integer] Robot ID
111
+ # @param node [HTM::Models::Node] Node to link
112
+ # @return [HTM::Models::RobotNode] The robot_node link record
113
+ #
114
+ def link_robot_to_node(robot_id:, node:)
115
+ robot_node = HTM::Models::RobotNode.find_by(robot_id: robot_id, node_id: node.id)
75
116
 
76
- # Invalidate cache since database content changed
77
- invalidate_cache!
117
+ if robot_node
118
+ # Existing link - record that robot remembered this again
119
+ robot_node.record_remember!
120
+ else
121
+ # New link
122
+ robot_node = HTM::Models::RobotNode.create!(
123
+ robot_id: robot_id,
124
+ node_id: node.id,
125
+ first_remembered_at: Time.current,
126
+ last_remembered_at: Time.current,
127
+ remember_count: 1
128
+ )
129
+ end
78
130
 
79
- node.id
131
+ robot_node
80
132
  end
81
133
 
82
134
  # Retrieve a node by ID
@@ -240,13 +292,15 @@ class HTM
240
292
 
241
293
  # Mark nodes as evicted from working memory
242
294
  #
243
- # @param node_ids [Array<Integer>] Node IDs
295
+ # Working memory state is now tracked per-robot in the working_memories table
296
+ # (optional persistence). The in-memory WorkingMemory class handles eviction
297
+ # tracking. This method is retained for API compatibility but is a no-op.
298
+ #
299
+ # @param node_ids [Array<Integer>] Node IDs (ignored)
244
300
  # @return [void]
245
301
  #
246
302
  def mark_evicted(node_ids)
247
- return if node_ids.empty?
248
-
249
- HTM::Models::Node.where(id: node_ids).update_all(in_working_memory: false)
303
+ # No-op: working memory is tracked in-memory or via WorkingMemoryEntry model
250
304
  end
251
305
 
252
306
  # Track access for multiple nodes (bulk operation)
@@ -294,7 +348,7 @@ class HTM
294
348
  def stats
295
349
  base_stats = {
296
350
  total_nodes: HTM::Models::Node.count,
297
- nodes_by_robot: HTM::Models::Node.group(:robot_id).count,
351
+ nodes_by_robot: HTM::Models::RobotNode.group(:robot_id).count,
298
352
  total_tags: HTM::Models::Tag.count,
299
353
  oldest_memory: HTM::Models::Node.minimum(:created_at),
300
354
  newest_memory: HTM::Models::Node.maximum(:created_at),
@@ -574,6 +628,32 @@ class HTM
574
628
  .map { |tag| { name: tag.name, usage_count: tag.usage_count } }
575
629
  end
576
630
 
631
+ # Find tags that match terms in the query
632
+ #
633
+ # Searches the tags table for tags where any hierarchy level matches
634
+ # query words. For example, query "PostgreSQL database" would match
635
+ # tags like "database:postgresql", "database:sql", etc.
636
+ #
637
+ # @param query [String] Search query
638
+ # @return [Array<String>] Matching tag names
639
+ #
640
+ def find_query_matching_tags(query)
641
+ return [] if query.nil? || query.strip.empty?
642
+
643
+ # Extract words from query (lowercase, 3+ chars)
644
+ words = query.downcase.split(/\s+/).select { |w| w.length >= 3 }
645
+ return [] if words.empty?
646
+
647
+ # Build LIKE conditions for each word
648
+ # Match tags where any part of the hierarchy contains the word
649
+ conditions = words.map { |w| "name ILIKE ?" }
650
+ values = words.map { |w| "%#{w}%" }
651
+
652
+ HTM::Models::Tag
653
+ .where(conditions.join(' OR '), *values)
654
+ .pluck(:name)
655
+ end
656
+
577
657
  private
578
658
 
579
659
  # Generate cache key for query
@@ -682,7 +762,7 @@ class HTM
682
762
 
683
763
  result = ActiveRecord::Base.connection.select_all(
684
764
  <<~SQL,
685
- SELECT id, content, source, access_count, created_at, robot_id, token_count,
765
+ SELECT id, content, access_count, created_at, token_count,
686
766
  1 - (embedding <=> '#{embedding_str}'::vector) as similarity
687
767
  FROM nodes
688
768
  WHERE created_at BETWEEN '#{timeframe.begin.iso8601}' AND '#{timeframe.end.iso8601}'
@@ -710,7 +790,7 @@ class HTM
710
790
  result = ActiveRecord::Base.connection.select_all(
711
791
  ActiveRecord::Base.sanitize_sql_array([
712
792
  <<~SQL,
713
- SELECT id, content, source, access_count, created_at, robot_id, token_count,
793
+ SELECT id, content, access_count, created_at, token_count,
714
794
  ts_rank(to_tsvector('english', content), plainto_tsquery('english', ?)) as rank
715
795
  FROM nodes
716
796
  WHERE created_at BETWEEN ? AND ?
@@ -731,15 +811,17 @@ class HTM
731
811
 
732
812
  # Uncached hybrid search
733
813
  #
734
- # Generates query embedding client-side, then combines full-text search for
735
- # candidate selection with vector similarity for ranking.
814
+ # Generates query embedding client-side, then combines:
815
+ # 1. Full-text search for content matching
816
+ # 2. Tag matching for categorical relevance
817
+ # 3. Vector similarity for semantic ranking
736
818
  #
737
819
  # @param timeframe [Range] Time range to search
738
820
  # @param query [String] Search query
739
821
  # @param limit [Integer] Maximum results
740
822
  # @param embedding_service [Object] Service to generate query embedding
741
823
  # @param prefilter_limit [Integer] Candidates to consider
742
- # @return [Array<Hash>] Matching nodes
824
+ # @return [Array<Hash>] Matching nodes with similarity and tag_boost scores
743
825
  #
744
826
  def search_hybrid_uncached(timeframe:, query:, limit:, embedding_service:, prefilter_limit:)
745
827
  # Generate query embedding client-side
@@ -753,26 +835,107 @@ class HTM
753
835
  # Convert to PostgreSQL vector format
754
836
  embedding_str = "[#{query_embedding.join(',')}]"
755
837
 
756
- result = ActiveRecord::Base.connection.select_all(
757
- ActiveRecord::Base.sanitize_sql_array([
758
- <<~SQL,
759
- WITH candidates AS (
760
- SELECT id, content, source, access_count, created_at, robot_id, token_count, embedding
761
- FROM nodes
762
- WHERE created_at BETWEEN ? AND ?
763
- AND to_tsvector('english', content) @@ plainto_tsquery('english', ?)
764
- AND embedding IS NOT NULL
838
+ # Find tags that match query terms
839
+ matching_tags = find_query_matching_tags(query)
840
+
841
+ # Build the hybrid query
842
+ # If we have matching tags, include nodes with those tags in the candidate pool
843
+ # NOTE: Hybrid search includes nodes without embeddings using a default
844
+ # similarity score of 0.5. This allows newly created nodes to appear in
845
+ # search results immediately (via fulltext matching) before their embeddings
846
+ # are generated by background jobs. Useful for demos with short timeframes
847
+ # (seconds) where async embedding generation hasn't completed yet.
848
+ # In production with longer timeframes, embeddings are typically ready
849
+ # within 1-5 seconds, so this fallback is rarely used.
850
+
851
+ if matching_tags.any?
852
+ # Escape tag names for SQL
853
+ tag_list = matching_tags.map { |t| ActiveRecord::Base.connection.quote(t) }.join(', ')
854
+ result = ActiveRecord::Base.connection.select_all(
855
+ ActiveRecord::Base.sanitize_sql_array([
856
+ <<~SQL,
857
+ WITH fulltext_candidates AS (
858
+ -- Nodes matching full-text search (with or without embeddings)
859
+ SELECT DISTINCT n.id, n.content, n.access_count, n.created_at, n.token_count, n.embedding
860
+ FROM nodes n
861
+ WHERE n.created_at BETWEEN ? AND ?
862
+ AND to_tsvector('english', n.content) @@ plainto_tsquery('english', ?)
863
+ LIMIT ?
864
+ ),
865
+ tag_candidates AS (
866
+ -- Nodes matching relevant tags (with or without embeddings)
867
+ SELECT DISTINCT n.id, n.content, n.access_count, n.created_at, n.token_count, n.embedding
868
+ FROM nodes n
869
+ JOIN node_tags nt ON nt.node_id = n.id
870
+ JOIN tags t ON t.id = nt.tag_id
871
+ WHERE n.created_at BETWEEN ? AND ?
872
+ AND t.name IN (#{tag_list})
873
+ LIMIT ?
874
+ ),
875
+ all_candidates AS (
876
+ SELECT * FROM fulltext_candidates
877
+ UNION
878
+ SELECT * FROM tag_candidates
879
+ ),
880
+ scored AS (
881
+ SELECT
882
+ ac.id, ac.content, ac.access_count, ac.created_at, ac.token_count,
883
+ CASE
884
+ WHEN ac.embedding IS NOT NULL THEN 1 - (ac.embedding <=> '#{embedding_str}'::vector)
885
+ ELSE 0.5 -- Default similarity for nodes without embeddings
886
+ END as similarity,
887
+ COALESCE((
888
+ SELECT COUNT(DISTINCT t.name)::float / ?
889
+ FROM node_tags nt
890
+ JOIN tags t ON t.id = nt.tag_id
891
+ WHERE nt.node_id = ac.id AND t.name IN (#{tag_list})
892
+ ), 0) as tag_boost
893
+ FROM all_candidates ac
894
+ )
895
+ SELECT id, content, access_count, created_at, token_count,
896
+ similarity, tag_boost,
897
+ (similarity * 0.7 + tag_boost * 0.3) as combined_score
898
+ FROM scored
899
+ ORDER BY combined_score DESC
765
900
  LIMIT ?
766
- )
767
- SELECT id, content, source, access_count, created_at, robot_id, token_count,
768
- 1 - (embedding <=> '#{embedding_str}'::vector) as similarity
769
- FROM candidates
770
- ORDER BY embedding <=> '#{embedding_str}'::vector
771
- LIMIT ?
772
- SQL
773
- timeframe.begin, timeframe.end, query, prefilter_limit, limit
774
- ])
775
- )
901
+ SQL
902
+ timeframe.begin, timeframe.end, query, prefilter_limit,
903
+ timeframe.begin, timeframe.end, prefilter_limit,
904
+ matching_tags.length.to_f,
905
+ limit
906
+ ])
907
+ )
908
+ else
909
+ # No matching tags, fall back to standard hybrid (fulltext + vector)
910
+ # Include nodes without embeddings with a default similarity score
911
+ result = ActiveRecord::Base.connection.select_all(
912
+ ActiveRecord::Base.sanitize_sql_array([
913
+ <<~SQL,
914
+ WITH candidates AS (
915
+ SELECT id, content, access_count, created_at, token_count, embedding
916
+ FROM nodes
917
+ WHERE created_at BETWEEN ? AND ?
918
+ AND to_tsvector('english', content) @@ plainto_tsquery('english', ?)
919
+ LIMIT ?
920
+ )
921
+ SELECT id, content, access_count, created_at, token_count,
922
+ CASE
923
+ WHEN embedding IS NOT NULL THEN 1 - (embedding <=> '#{embedding_str}'::vector)
924
+ ELSE 0.5 -- Default similarity for nodes without embeddings
925
+ END as similarity,
926
+ 0.0 as tag_boost,
927
+ CASE
928
+ WHEN embedding IS NOT NULL THEN 1 - (embedding <=> '#{embedding_str}'::vector)
929
+ ELSE 0.5 -- Default score for nodes without embeddings (fulltext matched)
930
+ END as combined_score
931
+ FROM candidates
932
+ ORDER BY combined_score DESC
933
+ LIMIT ?
934
+ SQL
935
+ timeframe.begin, timeframe.end, query, prefilter_limit, limit
936
+ ])
937
+ )
938
+ end
776
939
 
777
940
  # Track access for retrieved nodes
778
941
  node_ids = result.map { |r| r['id'] }
@@ -1,9 +1,14 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'digest'
4
+
3
5
  class HTM
4
6
  module Models
5
7
  # Node model - represents a memory node (conversation message)
6
8
  #
9
+ # Nodes are globally unique by content (via content_hash) and can be
10
+ # linked to multiple robots through the robot_nodes join table.
11
+ #
7
12
  # Nearest Neighbor Search (via neighbor gem):
8
13
  # # Find 5 nearest neighbors by cosine distance
9
14
  # neighbors = Node.nearest_neighbors(:embedding, query_vector, distance: "cosine").limit(5)
@@ -18,8 +23,9 @@ class HTM
18
23
  class Node < ActiveRecord::Base
19
24
  self.table_name = 'nodes'
20
25
 
21
- # Associations
22
- belongs_to :robot, class_name: 'HTM::Models::Robot', foreign_key: 'robot_id', primary_key: 'id'
26
+ # Associations - Many-to-many with robots via robot_nodes
27
+ has_many :robot_nodes, class_name: 'HTM::Models::RobotNode', dependent: :destroy
28
+ has_many :robots, through: :robot_nodes, class_name: 'HTM::Models::Robot'
23
29
  has_many :node_tags, class_name: 'HTM::Models::NodeTag', dependent: :destroy
24
30
  has_many :tags, through: :node_tags, class_name: 'HTM::Models::Tag'
25
31
 
@@ -28,21 +34,41 @@ class HTM
28
34
 
29
35
  # Validations
30
36
  validates :content, presence: true
31
- validates :robot_id, presence: true
37
+ validates :content_hash, presence: true, uniqueness: true
32
38
  validates :embedding_dimension, numericality: { greater_than: 0, less_than_or_equal_to: 2000 }, allow_nil: true
33
39
 
34
40
  # Callbacks
41
+ before_validation :set_content_hash, if: -> { content_hash.blank? && content.present? }
35
42
  before_create :set_defaults
36
43
  before_save :update_timestamps
37
44
 
38
45
  # Scopes
39
- scope :by_robot, ->(robot_id) { where(robot_id: robot_id) }
40
- scope :by_source, ->(source) { where(source: source) }
41
- scope :in_working_memory, -> { where(in_working_memory: true) }
46
+ scope :by_robot, ->(robot_id) { joins(:robot_nodes).where(robot_nodes: { robot_id: robot_id }) }
42
47
  scope :recent, -> { order(created_at: :desc) }
43
48
  scope :in_timeframe, ->(start_time, end_time) { where(created_at: start_time..end_time) }
44
49
  scope :with_embeddings, -> { where.not(embedding: nil) }
45
50
 
51
+ # Class methods
52
+
53
+ # Find a node by content hash, or return nil
54
+ #
55
+ # @param content [String] The content to search for
56
+ # @return [Node, nil] The existing node or nil
57
+ #
58
+ def self.find_by_content(content)
59
+ hash = generate_content_hash(content)
60
+ find_by(content_hash: hash)
61
+ end
62
+
63
+ # Generate SHA-256 hash for content
64
+ #
65
+ # @param content [String] Content to hash
66
+ # @return [String] 64-character hex hash
67
+ #
68
+ def self.generate_content_hash(content)
69
+ Digest::SHA256.hexdigest(content.to_s)
70
+ end
71
+
46
72
  # Instance methods
47
73
 
48
74
  # Find nearest neighbors to this node's embedding
@@ -94,8 +120,11 @@ class HTM
94
120
 
95
121
  private
96
122
 
123
+ def set_content_hash
124
+ self.content_hash = self.class.generate_content_hash(content)
125
+ end
126
+
97
127
  def set_defaults
98
- self.in_working_memory ||= false
99
128
  self.created_at ||= Time.current
100
129
  self.updated_at ||= Time.current
101
130
  self.last_accessed ||= Time.current
@@ -3,12 +3,18 @@
3
3
  class HTM
4
4
  module Models
5
5
  # Robot model - represents an LLM agent using the HTM system
6
+ #
7
+ # Robots can share memories through the many-to-many relationship with nodes.
8
+ # When a robot is deleted, only the robot_nodes links are removed; shared
9
+ # nodes remain in the database for other robots.
10
+ #
6
11
  class Robot < ActiveRecord::Base
7
12
  self.table_name = 'robots'
8
13
 
9
- # Associations
10
- has_many :nodes, class_name: 'HTM::Models::Node', dependent: :destroy
11
- has_many :operation_logs, class_name: 'HTM::Models::OperationLog', dependent: :destroy
14
+ # Associations - Many-to-many with nodes via robot_nodes
15
+ # dependent: :destroy removes links only, NOT the shared nodes
16
+ has_many :robot_nodes, class_name: 'HTM::Models::RobotNode', dependent: :destroy
17
+ has_many :nodes, through: :robot_nodes, class_name: 'HTM::Models::Node'
12
18
 
13
19
  # Validations
14
20
  validates :name, presence: true
@@ -34,10 +40,30 @@ class HTM
34
40
  nodes.recent.limit(limit)
35
41
  end
36
42
 
43
+ # Get nodes with their remember metadata for this robot
44
+ #
45
+ # @param limit [Integer] Max nodes to return
46
+ # @return [Array<Hash>] Nodes with remember_count, first/last_remembered_at
47
+ #
48
+ def nodes_with_metadata(limit = 10)
49
+ robot_nodes
50
+ .includes(:node)
51
+ .order(last_remembered_at: :desc)
52
+ .limit(limit)
53
+ .map do |rn|
54
+ {
55
+ node: rn.node,
56
+ remember_count: rn.remember_count,
57
+ first_remembered_at: rn.first_remembered_at,
58
+ last_remembered_at: rn.last_remembered_at
59
+ }
60
+ end
61
+ end
62
+
37
63
  def memory_summary
38
64
  {
39
65
  total_nodes: nodes.count,
40
- in_working_memory: nodes.in_working_memory.count,
66
+ in_working_memory: HTM::Models::WorkingMemoryEntry.where(robot_id: id).count,
41
67
  with_embeddings: nodes.with_embeddings.count
42
68
  }
43
69
  end
@@ -0,0 +1,50 @@
1
+ # frozen_string_literal: true
2
+
3
+ class HTM
4
+ module Models
5
+ # RobotNode Join Model - Links robots to nodes (many-to-many)
6
+ #
7
+ # This model represents the relationship between a robot and a node,
8
+ # tracking when and how many times a robot has "remembered" a piece of content.
9
+ #
10
+ # @example Find all robots that remember a node
11
+ # node.robots
12
+ #
13
+ # @example Find all nodes a robot remembers
14
+ # robot.nodes
15
+ #
16
+ # @example Track remember activity
17
+ # link = RobotNode.find_by(robot: robot, node: node)
18
+ # link.remember_count # => 3
19
+ # link.first_remembered_at
20
+ # link.last_remembered_at
21
+ #
22
+ class RobotNode < ActiveRecord::Base
23
+ self.table_name = 'robot_nodes'
24
+
25
+ belongs_to :robot, class_name: 'HTM::Models::Robot'
26
+ belongs_to :node, class_name: 'HTM::Models::Node'
27
+
28
+ validates :robot_id, presence: true
29
+ validates :node_id, presence: true
30
+ validates :robot_id, uniqueness: { scope: :node_id, message: 'already linked to this node' }
31
+
32
+ # Scopes
33
+ scope :recent, -> { order(last_remembered_at: :desc) }
34
+ scope :by_robot, ->(robot_id) { where(robot_id: robot_id) }
35
+ scope :by_node, ->(node_id) { where(node_id: node_id) }
36
+ scope :frequently_remembered, -> { where('remember_count > 1').order(remember_count: :desc) }
37
+
38
+ # Record that a robot remembered this content again
39
+ #
40
+ # @return [RobotNode] Updated record
41
+ #
42
+ def record_remember!
43
+ self.remember_count += 1
44
+ self.last_remembered_at = Time.current
45
+ save!
46
+ self
47
+ end
48
+ end
49
+ end
50
+ end
@@ -45,6 +45,58 @@ class HTM
45
45
  find_or_create_by(name: name)
46
46
  end
47
47
 
48
+ # Returns a nested hash tree structure from the current scope
49
+ # Example: Tag.all.tree => { "database" => { "postgresql" => {} } }
50
+ # Example: Tag.with_prefix("database").tree => { "database" => { "postgresql" => {} } }
51
+ def self.tree
52
+ tree = {}
53
+
54
+ all.order(:name).pluck(:name).each do |tag_name|
55
+ parts = tag_name.split(':')
56
+ current = tree
57
+
58
+ parts.each do |part|
59
+ current[part] ||= {}
60
+ current = current[part]
61
+ end
62
+ end
63
+
64
+ tree
65
+ end
66
+
67
+ # Returns a formatted string representation of the tag tree
68
+ # Uses directory-style formatting with ├── and └── characters
69
+ # Example: puts Tag.all.tree_string
70
+ # Example: puts Tag.with_prefix("database").tree_string
71
+ def self.tree_string
72
+ format_tree_branch(tree)
73
+ end
74
+
75
+ # Format a tree branch recursively (internal helper)
76
+ def self.format_tree_branch(node, is_last_array = [])
77
+ result = ''
78
+ sorted_keys = node.keys.sort
79
+
80
+ sorted_keys.each_with_index do |key, index|
81
+ is_last = (index == sorted_keys.size - 1)
82
+
83
+ # Build prefix from parent branches
84
+ line_prefix = is_last_array.map { |was_last| was_last ? ' ' : '│ ' }.join
85
+
86
+ # Add branch character and key
87
+ branch = is_last ? '└── ' : '├── '
88
+ result += "#{line_prefix}#{branch}#{key}\n"
89
+
90
+ # Recurse into children
91
+ children = node[key]
92
+ unless children.empty?
93
+ result += format_tree_branch(children, is_last_array + [is_last])
94
+ end
95
+ end
96
+
97
+ result
98
+ end
99
+
48
100
  # Instance methods
49
101
  def root_topic
50
102
  name.split(':').first