htm 0.0.18 → 0.0.30

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (216) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +119 -1
  3. data/README.md +12 -0
  4. data/Rakefile +104 -18
  5. data/db/migrate/00001_enable_extensions.rb +9 -5
  6. data/db/migrate/00002_create_robots.rb +18 -6
  7. data/db/migrate/00003_create_file_sources.rb +30 -17
  8. data/db/migrate/00004_create_nodes.rb +60 -48
  9. data/db/migrate/00005_create_tags.rb +24 -12
  10. data/db/migrate/00006_create_node_tags.rb +28 -13
  11. data/db/migrate/00007_create_robot_nodes.rb +40 -26
  12. data/db/schema.sql +17 -1
  13. data/db/seeds.rb +34 -34
  14. data/docs/api/embedding-service.md +140 -110
  15. data/docs/api/yard/HTM/ActiveRecordConfig.md +6 -0
  16. data/docs/api/yard/HTM/Config.md +173 -0
  17. data/docs/api/yard/HTM/ConfigSection.md +28 -0
  18. data/docs/api/yard/HTM/Database.md +1 -1
  19. data/docs/api/yard/HTM/Railtie.md +2 -2
  20. data/docs/api/yard/HTM.md +0 -57
  21. data/docs/api/yard/index.csv +76 -61
  22. data/docs/api/yard-reference.md +2 -1
  23. data/docs/architecture/adrs/003-ollama-embeddings.md +45 -36
  24. data/docs/architecture/adrs/004-hive-mind.md +1 -1
  25. data/docs/architecture/adrs/008-robot-identification.md +1 -1
  26. data/docs/architecture/index.md +11 -9
  27. data/docs/architecture/overview.md +11 -7
  28. data/docs/assets/images/balanced-strategy-decay.svg +41 -0
  29. data/docs/assets/images/class-hierarchy.svg +1 -1
  30. data/docs/assets/images/eviction-priority.svg +43 -0
  31. data/docs/assets/images/exception-hierarchy.svg +2 -2
  32. data/docs/assets/images/hive-mind-shared-memory.svg +52 -0
  33. data/docs/assets/images/htm-architecture-overview.svg +3 -3
  34. data/docs/assets/images/htm-core-components.svg +4 -4
  35. data/docs/assets/images/htm-layered-architecture.svg +1 -1
  36. data/docs/assets/images/htm-memory-addition-flow.svg +2 -2
  37. data/docs/assets/images/htm-memory-recall-flow.svg +2 -2
  38. data/docs/assets/images/memory-topology.svg +53 -0
  39. data/docs/assets/images/two-tier-memory-architecture.svg +55 -0
  40. data/docs/database/naming-convention.md +244 -0
  41. data/docs/database_rake_tasks.md +31 -0
  42. data/docs/development/rake-tasks.md +80 -35
  43. data/docs/development/setup.md +76 -44
  44. data/docs/examples/basic-usage.md +133 -0
  45. data/docs/examples/config-files.md +170 -0
  46. data/docs/examples/file-loading.md +208 -0
  47. data/docs/examples/index.md +116 -0
  48. data/docs/examples/llm-configuration.md +168 -0
  49. data/docs/examples/mcp-client.md +172 -0
  50. data/docs/examples/rails-integration.md +173 -0
  51. data/docs/examples/robot-groups.md +210 -0
  52. data/docs/examples/sinatra-integration.md +218 -0
  53. data/docs/examples/standalone-app.md +216 -0
  54. data/docs/examples/telemetry.md +224 -0
  55. data/docs/examples/timeframes.md +143 -0
  56. data/docs/getting-started/installation.md +97 -40
  57. data/docs/getting-started/quick-start.md +28 -11
  58. data/docs/guides/configuration.md +515 -0
  59. data/docs/guides/file-loading.md +322 -0
  60. data/docs/guides/getting-started.md +40 -9
  61. data/docs/guides/index.md +3 -3
  62. data/docs/guides/mcp-server.md +100 -13
  63. data/docs/guides/propositions.md +264 -0
  64. data/docs/guides/recalling-memories.md +4 -4
  65. data/docs/guides/search-strategies.md +3 -3
  66. data/docs/guides/tags.md +318 -0
  67. data/docs/guides/telemetry.md +229 -0
  68. data/docs/index.md +8 -16
  69. data/docs/{architecture → robots}/hive-mind.md +8 -111
  70. data/docs/robots/index.md +73 -0
  71. data/docs/{guides → robots}/multi-robot.md +3 -3
  72. data/docs/{guides → robots}/robot-groups.md +8 -7
  73. data/docs/{architecture → robots}/two-tier-memory.md +13 -149
  74. data/docs/robots/why-robots.md +85 -0
  75. data/examples/.envrc +6 -0
  76. data/examples/.gitignore +2 -0
  77. data/examples/00_create_examples_db.rb +94 -0
  78. data/examples/{basic_usage.rb → 01_basic_usage.rb} +12 -16
  79. data/examples/{custom_llm_configuration.rb → 03_custom_llm_configuration.rb} +13 -3
  80. data/examples/{file_loader_usage.rb → 04_file_loader_usage.rb} +11 -14
  81. data/examples/{timeframe_demo.rb → 05_timeframe_demo.rb} +10 -3
  82. data/examples/{example_app → 06_example_app}/app.rb +15 -15
  83. data/examples/{cli_app → 07_cli_app}/htm_cli.rb +15 -22
  84. data/examples/08_sinatra_app/Gemfile.lock +241 -0
  85. data/examples/{sinatra_app → 08_sinatra_app}/app.rb +19 -18
  86. data/examples/{mcp_client.rb → 09_mcp_client.rb} +5 -8
  87. data/examples/{telemetry → 10_telemetry}/SETUP_README.md +1 -1
  88. data/examples/{telemetry → 10_telemetry}/demo.rb +14 -10
  89. data/examples/11_robot_groups/README.md +335 -0
  90. data/examples/{robot_groups → 11_robot_groups/lib}/robot_worker.rb +17 -3
  91. data/examples/{robot_groups → 11_robot_groups}/multi_process.rb +9 -9
  92. data/examples/{robot_groups → 11_robot_groups}/same_process.rb +9 -12
  93. data/examples/{rails_app → 12_rails_app}/Gemfile +3 -0
  94. data/examples/{rails_app → 12_rails_app}/Gemfile.lock +87 -58
  95. data/examples/{rails_app → 12_rails_app}/app/controllers/dashboard_controller.rb +10 -6
  96. data/examples/{rails_app → 12_rails_app}/app/controllers/files_controller.rb +5 -5
  97. data/examples/{rails_app → 12_rails_app}/app/controllers/memories_controller.rb +11 -7
  98. data/examples/{rails_app → 12_rails_app}/app/controllers/robots_controller.rb +8 -8
  99. data/examples/12_rails_app/app/controllers/tags_controller.rb +36 -0
  100. data/examples/{rails_app → 12_rails_app}/app/views/dashboard/index.html.erb +2 -2
  101. data/examples/{rails_app → 12_rails_app}/app/views/files/new.html.erb +5 -2
  102. data/examples/{rails_app → 12_rails_app}/app/views/memories/_memory_card.html.erb +3 -3
  103. data/examples/{rails_app → 12_rails_app}/app/views/memories/deleted.html.erb +3 -3
  104. data/examples/{rails_app → 12_rails_app}/app/views/memories/edit.html.erb +3 -3
  105. data/examples/{rails_app → 12_rails_app}/app/views/memories/show.html.erb +4 -4
  106. data/examples/{rails_app → 12_rails_app}/app/views/robots/index.html.erb +2 -2
  107. data/examples/{rails_app → 12_rails_app}/app/views/robots/show.html.erb +4 -4
  108. data/examples/{rails_app → 12_rails_app}/app/views/search/index.html.erb +1 -1
  109. data/examples/{rails_app → 12_rails_app}/app/views/tags/index.html.erb +2 -2
  110. data/examples/{rails_app → 12_rails_app}/app/views/tags/show.html.erb +1 -1
  111. data/examples/12_rails_app/config/initializers/htm.rb +7 -0
  112. data/examples/12_rails_app/config/initializers/rack.rb +5 -0
  113. data/examples/README.md +230 -211
  114. data/examples/examples_helper.rb +138 -0
  115. data/lib/htm/config/builder.rb +167 -0
  116. data/lib/htm/config/database.rb +317 -0
  117. data/lib/htm/config/defaults.yml +41 -13
  118. data/lib/htm/config/section.rb +74 -0
  119. data/lib/htm/config/validator.rb +83 -0
  120. data/lib/htm/config.rb +65 -361
  121. data/lib/htm/database.rb +85 -127
  122. data/lib/htm/errors.rb +14 -0
  123. data/lib/htm/integrations/sinatra.rb +13 -44
  124. data/lib/htm/job_adapter.rb +75 -1
  125. data/lib/htm/jobs/generate_embedding_job.rb +3 -4
  126. data/lib/htm/jobs/generate_propositions_job.rb +4 -5
  127. data/lib/htm/jobs/generate_tags_job.rb +16 -15
  128. data/lib/htm/loaders/defaults_loader.rb +23 -0
  129. data/lib/htm/loaders/markdown_loader.rb +17 -15
  130. data/lib/htm/loaders/xdg_config_loader.rb +9 -9
  131. data/lib/htm/long_term_memory/fulltext_search.rb +14 -14
  132. data/lib/htm/long_term_memory/hybrid_search.rb +396 -229
  133. data/lib/htm/long_term_memory/node_operations.rb +24 -23
  134. data/lib/htm/long_term_memory/relevance_scorer.rb +23 -20
  135. data/lib/htm/long_term_memory/robot_operations.rb +4 -4
  136. data/lib/htm/long_term_memory/tag_operations.rb +91 -77
  137. data/lib/htm/long_term_memory/vector_search.rb +4 -5
  138. data/lib/htm/long_term_memory.rb +13 -13
  139. data/lib/htm/mcp/cli.rb +115 -8
  140. data/lib/htm/mcp/resources.rb +4 -3
  141. data/lib/htm/mcp/server.rb +5 -4
  142. data/lib/htm/mcp/tools.rb +37 -28
  143. data/lib/htm/migration.rb +72 -0
  144. data/lib/htm/models/file_source.rb +52 -31
  145. data/lib/htm/models/node.rb +224 -108
  146. data/lib/htm/models/node_tag.rb +49 -28
  147. data/lib/htm/models/robot.rb +38 -27
  148. data/lib/htm/models/robot_node.rb +63 -35
  149. data/lib/htm/models/tag.rb +126 -123
  150. data/lib/htm/observability.rb +45 -41
  151. data/lib/htm/proposition_service.rb +76 -7
  152. data/lib/htm/railtie.rb +2 -2
  153. data/lib/htm/robot_group.rb +30 -18
  154. data/lib/htm/sequel_config.rb +215 -0
  155. data/lib/htm/sql_builder.rb +14 -16
  156. data/lib/htm/tag_service.rb +78 -0
  157. data/lib/htm/tasks.rb +3 -0
  158. data/lib/htm/version.rb +1 -1
  159. data/lib/htm/workflows/remember_workflow.rb +213 -0
  160. data/lib/htm.rb +27 -22
  161. data/lib/tasks/db.rake +0 -2
  162. data/lib/tasks/doc.rake +2 -2
  163. data/lib/tasks/files.rake +11 -18
  164. data/lib/tasks/htm.rake +190 -62
  165. data/lib/tasks/jobs.rake +179 -54
  166. data/lib/tasks/tags.rake +8 -13
  167. data/mkdocs.yml +33 -8
  168. data/scripts/backfill_parent_tags.rb +376 -0
  169. data/scripts/normalize_plural_tags.rb +335 -0
  170. metadata +168 -86
  171. data/docs/api/yard/HTM/Configuration.md +0 -240
  172. data/docs/telemetry.md +0 -391
  173. data/examples/rails_app/app/controllers/tags_controller.rb +0 -30
  174. data/examples/sinatra_app/Gemfile.lock +0 -166
  175. data/lib/htm/active_record_config.rb +0 -104
  176. /data/examples/{config_file_example → 02_config_file_example}/README.md +0 -0
  177. /data/examples/{config_file_example → 02_config_file_example}/config/htm.local.yml +0 -0
  178. /data/examples/{config_file_example → 02_config_file_example}/custom_config.yml +0 -0
  179. /data/examples/{config_file_example → 02_config_file_example}/show_config.rb +0 -0
  180. /data/examples/{example_app → 06_example_app}/Rakefile +0 -0
  181. /data/examples/{cli_app → 07_cli_app}/README.md +0 -0
  182. /data/examples/{sinatra_app → 08_sinatra_app}/Gemfile +0 -0
  183. /data/examples/{telemetry → 10_telemetry}/README.md +0 -0
  184. /data/examples/{telemetry → 10_telemetry}/grafana/dashboards/htm-metrics.json +0 -0
  185. /data/examples/{rails_app → 12_rails_app}/.gitignore +0 -0
  186. /data/examples/{rails_app → 12_rails_app}/Procfile.dev +0 -0
  187. /data/examples/{rails_app → 12_rails_app}/README.md +0 -0
  188. /data/examples/{rails_app → 12_rails_app}/Rakefile +0 -0
  189. /data/examples/{rails_app → 12_rails_app}/app/assets/stylesheets/application.css +0 -0
  190. /data/examples/{rails_app → 12_rails_app}/app/assets/stylesheets/inter-font.css +0 -0
  191. /data/examples/{rails_app → 12_rails_app}/app/controllers/application_controller.rb +0 -0
  192. /data/examples/{rails_app → 12_rails_app}/app/controllers/search_controller.rb +0 -0
  193. /data/examples/{rails_app → 12_rails_app}/app/javascript/application.js +0 -0
  194. /data/examples/{rails_app → 12_rails_app}/app/javascript/controllers/application.js +0 -0
  195. /data/examples/{rails_app → 12_rails_app}/app/javascript/controllers/index.js +0 -0
  196. /data/examples/{rails_app → 12_rails_app}/app/views/files/index.html.erb +0 -0
  197. /data/examples/{rails_app → 12_rails_app}/app/views/files/show.html.erb +0 -0
  198. /data/examples/{rails_app → 12_rails_app}/app/views/layouts/application.html.erb +0 -0
  199. /data/examples/{rails_app → 12_rails_app}/app/views/memories/index.html.erb +0 -0
  200. /data/examples/{rails_app → 12_rails_app}/app/views/memories/new.html.erb +0 -0
  201. /data/examples/{rails_app → 12_rails_app}/app/views/robots/new.html.erb +0 -0
  202. /data/examples/{rails_app → 12_rails_app}/app/views/shared/_navbar.html.erb +0 -0
  203. /data/examples/{rails_app → 12_rails_app}/app/views/shared/_stat_card.html.erb +0 -0
  204. /data/examples/{rails_app → 12_rails_app}/bin/dev +0 -0
  205. /data/examples/{rails_app → 12_rails_app}/bin/rails +0 -0
  206. /data/examples/{rails_app → 12_rails_app}/bin/rake +0 -0
  207. /data/examples/{rails_app → 12_rails_app}/config/application.rb +0 -0
  208. /data/examples/{rails_app → 12_rails_app}/config/boot.rb +0 -0
  209. /data/examples/{rails_app → 12_rails_app}/config/database.yml +0 -0
  210. /data/examples/{rails_app → 12_rails_app}/config/environment.rb +0 -0
  211. /data/examples/{rails_app → 12_rails_app}/config/importmap.rb +0 -0
  212. /data/examples/{rails_app → 12_rails_app}/config/routes.rb +0 -0
  213. /data/examples/{rails_app → 12_rails_app}/config/tailwind.config.js +0 -0
  214. /data/examples/{rails_app → 12_rails_app}/config.ru +0 -0
  215. /data/examples/{rails_app → 12_rails_app}/log/.keep +0 -0
  216. /data/examples/{rails_app → 12_rails_app}/tmp/local_secret.txt +0 -0
@@ -9,71 +9,153 @@ class HTM
9
9
  # Nodes are globally unique by content (via content_hash) and can be
10
10
  # linked to multiple robots through the robot_nodes join table.
11
11
  #
12
- # Nearest Neighbor Search (via neighbor gem):
13
- # # Find 5 nearest neighbors by cosine distance
14
- # neighbors = Node.nearest_neighbors(:embedding, query_vector, distance: "cosine").limit(5)
15
- #
16
- # # Get distance to query for each result
17
- # neighbors.each do |node|
18
- # puts "Node #{node.id}: distance = #{node.neighbor_distance}"
19
- # end
20
- #
21
- # Distance metrics: "cosine", "euclidean", "inner_product", "taxicab"
22
- #
23
- class Node < ActiveRecord::Base
24
- self.table_name = 'nodes'
25
-
26
- # Associations - Many-to-many with robots via robot_nodes
27
- has_many :robot_nodes, class_name: 'HTM::Models::RobotNode', dependent: :destroy
28
- has_many :robots, through: :robot_nodes, class_name: 'HTM::Models::Robot'
29
- has_many :node_tags, class_name: 'HTM::Models::NodeTag', dependent: :destroy
30
- has_many :tags, through: :node_tags, class_name: 'HTM::Models::Tag'
12
+ class Node < Sequel::Model(:nodes)
13
+ # Associations
14
+ one_to_many :robot_nodes, class: 'HTM::Models::RobotNode', key: :node_id
15
+ many_to_many :robots, class: 'HTM::Models::Robot',
16
+ join_table: :robot_nodes, left_key: :node_id, right_key: :robot_id
17
+ one_to_many :node_tags, class: 'HTM::Models::NodeTag', key: :node_id
18
+ many_to_many :tags, class: 'HTM::Models::Tag',
19
+ join_table: :node_tags, left_key: :node_id, right_key: :tag_id
31
20
 
32
21
  # Optional source file association (for nodes loaded from files)
33
- belongs_to :file_source, class_name: 'HTM::Models::FileSource',
34
- foreign_key: :source_id, optional: true
35
-
36
- # Neighbor - vector similarity search
37
- has_neighbors :embedding
22
+ many_to_one :file_source, class: 'HTM::Models::FileSource', key: :source_id
23
+
24
+ # Plugins
25
+ plugin :validation_helpers
26
+ plugin :timestamps, update_on_create: true
27
+
28
+ # Override embedding getter to return Array instead of String
29
+ # pgvector stores as string format "[0.1,0.2,...]" and we need Array<Float>
30
+ def embedding
31
+ raw = super
32
+ return nil if raw.nil?
33
+ return raw if raw.is_a?(Array)
34
+
35
+ # Parse string format: "[0.1,0.2,0.3]"
36
+ if raw.is_a?(String)
37
+ raw.gsub(/[\[\]]/, '').split(',').map(&:to_f)
38
+ else
39
+ raw.to_a
40
+ end
41
+ end
38
42
 
39
43
  # Validations
40
- validates :content, presence: true
41
- validates :content_hash, presence: true, uniqueness: true
44
+ def validate
45
+ super
46
+ validates_presence [:content, :content_hash]
47
+ validates_unique :content_hash
48
+ end
49
+
50
+ # Dataset methods (scopes)
51
+ dataset_module do
52
+ def active
53
+ where(deleted_at: nil)
54
+ end
55
+
56
+ def by_robot(robot_id)
57
+ join(:robot_nodes, node_id: :id).where(robot_nodes__robot_id: robot_id)
58
+ end
59
+
60
+ def recent
61
+ order(Sequel.desc(:created_at))
62
+ end
63
+
64
+ def in_timeframe(start_time, end_time)
65
+ where(created_at: start_time..end_time)
66
+ end
67
+
68
+ def with_embeddings
69
+ exclude(embedding: nil)
70
+ end
42
71
 
43
- # Callbacks
44
- before_validation :set_content_hash, if: -> { content_hash.blank? && content.present? }
45
- before_create :set_defaults
46
- before_save :update_timestamps
72
+ def from_source(source_id)
73
+ where(source_id: source_id).order(:chunk_position)
74
+ end
47
75
 
48
- # Scopes
49
- # Soft delete - by default, only show non-deleted nodes
50
- default_scope { where(deleted_at: nil) }
76
+ # Proposition scopes
77
+ def propositions
78
+ where(Sequel.lit("metadata->>'is_proposition' = 'true'"))
79
+ end
51
80
 
52
- scope :by_robot, ->(robot_id) { joins(:robot_nodes).where(robot_nodes: { robot_id: robot_id }) }
53
- scope :recent, -> { order(created_at: :desc) }
54
- scope :in_timeframe, ->(start_time, end_time) { where(created_at: start_time..end_time) }
55
- scope :with_embeddings, -> { where.not(embedding: nil) }
56
- scope :from_source, ->(source_id) { where(source_id: source_id).order(:chunk_position) }
81
+ def non_propositions
82
+ where(Sequel.lit("metadata IS NULL OR metadata->>'is_proposition' IS NULL OR metadata->>'is_proposition' != 'true'"))
83
+ end
57
84
 
58
- # Proposition scopes
59
- scope :propositions, -> { where("metadata->>'is_proposition' = 'true'") }
60
- scope :non_propositions, -> { where("metadata IS NULL OR metadata->>'is_proposition' IS NULL OR metadata->>'is_proposition' != 'true'") }
85
+ # Soft delete scopes
86
+ def deleted
87
+ exclude(deleted_at: nil)
88
+ end
61
89
 
62
- # Soft delete scopes
63
- scope :deleted, -> { unscoped.where.not(deleted_at: nil) }
64
- scope :with_deleted, -> { unscoped }
65
- scope :deleted_before, ->(time) { deleted.where('deleted_at < ?', time) }
90
+ def with_deleted
91
+ unfiltered
92
+ end
93
+
94
+ def deleted_before(time)
95
+ deleted.where { deleted_at < time }
96
+ end
97
+
98
+ # Find nearest neighbors by vector similarity
99
+ #
100
+ # @param column [Symbol] Column containing the embedding (typically :embedding)
101
+ # @param query_embedding [Array<Numeric>] Query vector to find neighbors for
102
+ # @param distance [String] Distance metric ("cosine", "euclidean", "inner_product")
103
+ # @return [Sequel::Dataset] Dataset ordered by distance with neighbor_distance column
104
+ #
105
+ def nearest_neighbors(column, query_embedding, distance: "cosine")
106
+ return where(Sequel.lit('1=0')) unless query_embedding.is_a?(Array) && query_embedding.any?
107
+
108
+ # Convert embedding to vector string format
109
+ vector_str = "[#{query_embedding.map(&:to_f).join(',')}]"
110
+
111
+ # Select distance operator based on metric
112
+ operator = case distance.to_s
113
+ when "cosine" then "<=>"
114
+ when "euclidean", "l2" then "<->"
115
+ when "inner_product" then "<#>"
116
+ else "<=>"
117
+ end
118
+
119
+ # Return dataset with distance calculation
120
+ select_all(:nodes)
121
+ .select_append(Sequel.lit("(#{column} #{operator} ?::vector) AS neighbor_distance", vector_str))
122
+ .exclude(column => nil)
123
+ .order(Sequel.lit("#{column} #{operator} ?::vector", vector_str))
124
+ end
125
+ end
126
+
127
+ # Apply default scope for active records
128
+ set_dataset(dataset.where(Sequel[:nodes][:deleted_at] => nil))
129
+
130
+ # Hooks
131
+ def before_validation
132
+ if content_hash.nil? && content
133
+ self.content_hash = self.class.generate_content_hash(content)
134
+ end
135
+ super
136
+ end
137
+
138
+ def before_create
139
+ self.created_at ||= Time.now
140
+ self.updated_at ||= Time.now
141
+ self.last_accessed ||= Time.now
142
+ super
143
+ end
144
+
145
+ def before_save
146
+ self.updated_at = Time.now if changed_columns.any?
147
+ super
148
+ end
66
149
 
67
150
  # Class methods
68
151
 
69
152
  # Permanently delete all soft-deleted nodes older than the specified time
70
153
  #
71
- # @param older_than [Time, ActiveSupport::Duration] Delete nodes soft-deleted before this time
72
- # Can be a Time object or a duration like 30.days.ago
154
+ # @param older_than [Time] Delete nodes soft-deleted before this time
73
155
  # @return [Integer] Number of nodes permanently deleted
74
156
  #
75
157
  def self.purge_deleted(older_than:)
76
- deleted_before(older_than).destroy_all.count
158
+ dataset.unfiltered.where { deleted_at < older_than }.delete
77
159
  end
78
160
 
79
161
  # Find a node by content hash, or return nil
@@ -83,7 +165,7 @@ class HTM
83
165
  #
84
166
  def self.find_by_content(content)
85
167
  hash = generate_content_hash(content)
86
- find_by(content_hash: hash)
168
+ first(content_hash: hash)
87
169
  end
88
170
 
89
171
  # Generate SHA-256 hash for content
@@ -98,66 +180,105 @@ class HTM
98
180
  # Instance methods
99
181
 
100
182
  # Find nearest neighbors to this node's embedding
183
+ #
101
184
  # @param limit [Integer] number of neighbors to return (default: 10)
102
- # @param distance [String] distance metric: "cosine", "euclidean", "inner_product", "taxicab" (default: "cosine")
103
- # @return [ActiveRecord::Relation] ordered by distance (closest first)
185
+ # @param distance [String] distance metric (default: "cosine")
186
+ # @return [Array<Node>] ordered by distance (closest first)
187
+ #
104
188
  def nearest_neighbors(limit: 10, distance: "cosine")
105
- return self.class.none unless embedding.present?
189
+ return [] unless embedding
190
+
191
+ # Use raw SQL for vector similarity search
192
+ db = self.class.db
193
+
194
+ # Handle embedding - might be String or Array depending on Sequel pg extension
195
+ emb = embedding_array
196
+ return [] if emb.nil? || emb.empty?
197
+
198
+ vector_str = "[#{emb.join(',')}]"
199
+
200
+ sql = <<-SQL
201
+ SELECT nodes.*, (embedding <=> '#{vector_str}'::vector) AS neighbor_distance
202
+ FROM nodes
203
+ WHERE embedding IS NOT NULL
204
+ AND deleted_at IS NULL
205
+ AND id != #{id}
206
+ ORDER BY embedding <=> '#{vector_str}'::vector
207
+ LIMIT #{limit}
208
+ SQL
209
+
210
+ # Use call() to create instances from raw hashes without mass assignment restrictions
211
+ db.fetch(sql).all.map do |row|
212
+ node = self.class.call(row)
213
+ # Store neighbor_distance as an instance variable
214
+ node.instance_variable_set(:@neighbor_distance, row[:neighbor_distance])
215
+ node
216
+ end
217
+ end
106
218
 
107
- self.class.with_embeddings
108
- .where.not(id: id) # Exclude self
109
- .nearest_neighbors(:embedding, embedding, distance: distance)
110
- .limit(limit)
219
+ # Accessor for neighbor_distance from nearest_neighbors query
220
+ # Works with both:
221
+ # - Instance method (stores in @neighbor_distance)
222
+ # - Dataset method (stores in values hash from SELECT)
223
+ def neighbor_distance
224
+ @neighbor_distance || values[:neighbor_distance]
225
+ end
226
+
227
+ # Get embedding as an Array (handles both String and Array storage)
228
+ # Note: The `embedding` getter already returns Array, this is an alias for compatibility
229
+ #
230
+ # @return [Array<Float>, nil] The embedding vector as an array
231
+ #
232
+ def embedding_array
233
+ embedding
111
234
  end
112
235
 
113
236
  # Calculate cosine similarity to another embedding or node
237
+ #
114
238
  # @param other [Array, Node] query embedding vector or another Node
115
239
  # @return [Float] similarity score (0.0 to 1.0, higher is more similar)
240
+ #
116
241
  def similarity_to(other)
117
- query_embedding = other.is_a?(Node) ? other.embedding : other
118
- return nil unless embedding.present? && query_embedding.present?
242
+ query_embedding = other.is_a?(Node) ? other.embedding_array : other
243
+ return nil unless embedding_array && query_embedding
244
+
245
+ # Handle query_embedding that might be a String
246
+ if query_embedding.is_a?(String)
247
+ query_embedding = query_embedding.gsub(/[\[\]]/, '').split(',').map(&:to_f)
248
+ end
119
249
 
120
- # Validate embedding is an array of finite numeric values
121
250
  unless query_embedding.is_a?(Array) && query_embedding.all? { |v| v.is_a?(Numeric) && v.finite? }
122
251
  return nil
123
252
  end
124
253
 
125
- # Calculate cosine similarity: 1 - (embedding <=> query_embedding)
126
- # Safely format the array as a PostgreSQL vector literal
127
- vector_str = "[#{query_embedding.map { |v| v.to_f }.join(',')}]"
128
- conn = self.class.connection
129
- quoted_vector = conn.quote(vector_str)
130
- quoted_id = conn.quote(id)
254
+ vector_str = "[#{query_embedding.map(&:to_f).join(',')}]"
255
+
256
+ result = self.class.db.fetch(
257
+ "SELECT 1 - (embedding <=> ?::vector) AS similarity FROM nodes WHERE id = ?",
258
+ vector_str, id
259
+ ).first
131
260
 
132
- result = conn.select_value(
133
- "SELECT 1 - (embedding <=> #{quoted_vector}::vector) FROM nodes WHERE id = #{quoted_id}"
134
- )
135
- result&.to_f
261
+ result&.[](:similarity)&.to_f
136
262
  end
137
263
 
138
264
  # Get all tag names associated with this node
139
265
  #
140
- # @return [Array<String>] Array of hierarchical tag names (e.g., ["database:postgresql", "ai:llm"])
266
+ # @return [Array<String>] Array of hierarchical tag names
141
267
  #
142
268
  def tag_names
143
- tags.pluck(:name)
269
+ tags_dataset.select_map(:name)
144
270
  end
145
271
 
146
- # Add tags to this node (creates tags if they don't exist)
272
+ # Add tags to this node (creates tags and all parent tags if they don't exist)
147
273
  #
148
274
  # @param tag_names [Array<String>, String] Tag name(s) to add
149
275
  # @return [void]
150
276
  #
151
- # @example Add a single tag
152
- # node.add_tags("database:postgresql")
153
- #
154
- # @example Add multiple tags
155
- # node.add_tags(["database:postgresql", "ai:embeddings"])
156
- #
157
277
  def add_tags(tag_names)
158
278
  Array(tag_names).each do |tag_name|
159
- tag = HTM::Models::Tag.find_or_create_by(name: tag_name)
160
- node_tags.find_or_create_by(tag_id: tag.id)
279
+ HTM::Models::Tag.find_or_create_with_ancestors(tag_name).each do |tag|
280
+ HTM::Models::NodeTag.find_or_create(node_id: id, tag_id: tag.id)
281
+ end
161
282
  end
162
283
  end
163
284
 
@@ -167,45 +288,47 @@ class HTM
167
288
  # @return [void]
168
289
  #
169
290
  def remove_tag(tag_name)
170
- tag = HTM::Models::Tag.find_by(name: tag_name)
291
+ tag = HTM::Models::Tag.first(name: tag_name)
171
292
  return unless tag
172
293
 
173
- node_tags.where(tag_id: tag.id).destroy_all
294
+ node_tags_dataset.where(tag_id: tag.id).delete
174
295
  end
175
296
 
176
297
  # Soft delete - mark node as deleted without removing from database
177
- # Also cascades soft delete to associated robot_nodes and node_tags
178
298
  #
179
299
  # @return [Boolean] true if soft deleted successfully
180
300
  #
181
301
  def soft_delete!
182
- transaction do
183
- now = Time.current
184
- update!(deleted_at: now)
302
+ db.transaction do
303
+ now = Time.now
304
+ update(deleted_at: now)
185
305
 
186
306
  # Cascade soft delete to associated robot_nodes
187
- robot_nodes.update_all(deleted_at: now)
307
+ HTM::Models::RobotNode.where(node_id: id).update(deleted_at: now)
188
308
 
189
309
  # Cascade soft delete to associated node_tags
190
- node_tags.update_all(deleted_at: now)
310
+ HTM::Models::NodeTag.where(node_id: id).update(deleted_at: now)
191
311
  end
192
312
  true
193
313
  end
194
314
 
195
315
  # Restore a soft-deleted node
196
- # Also cascades restoration to associated robot_nodes and node_tags
197
316
  #
198
317
  # @return [Boolean] true if restored successfully
199
318
  #
200
319
  def restore!
201
- transaction do
202
- update!(deleted_at: nil)
320
+ db.transaction do
321
+ # Use unfiltered dataset to bypass the default scope that excludes deleted records
322
+ self.class.dataset.unfiltered.where(id: id).update(deleted_at: nil)
203
323
 
204
324
  # Cascade restoration to associated robot_nodes
205
- HTM::Models::RobotNode.unscoped.where(node_id: id).update_all(deleted_at: nil)
325
+ HTM::Models::RobotNode.dataset.unfiltered.where(node_id: id).update(deleted_at: nil)
206
326
 
207
327
  # Cascade restoration to associated node_tags
208
- HTM::Models::NodeTag.unscoped.where(node_id: id).update_all(deleted_at: nil)
328
+ HTM::Models::NodeTag.dataset.unfiltered.where(node_id: id).update(deleted_at: nil)
329
+
330
+ # Refresh this instance to reflect the change
331
+ self.deleted_at = nil
209
332
  end
210
333
  true
211
334
  end
@@ -215,7 +338,7 @@ class HTM
215
338
  # @return [Boolean] true if deleted_at is set
216
339
  #
217
340
  def deleted?
218
- deleted_at.present?
341
+ !deleted_at.nil?
219
342
  end
220
343
 
221
344
  # Check if node is a proposition (extracted atomic fact)
@@ -226,21 +349,14 @@ class HTM
226
349
  metadata&.dig('is_proposition') == true
227
350
  end
228
351
 
229
- private
230
-
231
- def set_content_hash
232
- self.content_hash = self.class.generate_content_hash(content)
233
- end
234
-
235
- def set_defaults
236
- self.created_at ||= Time.current
237
- self.updated_at ||= Time.current
238
- self.last_accessed ||= Time.current
239
- end
240
-
241
- def update_timestamps
242
- self.updated_at = Time.current if changed?
352
+ # Convert to hash (for compatibility with existing code)
353
+ #
354
+ # @return [Hash] Hash representation of the node
355
+ #
356
+ def to_hash
357
+ values.transform_keys(&:to_s)
243
358
  end
359
+ alias_method :attributes, :to_hash
244
360
  end
245
361
  end
246
362
  end
@@ -3,39 +3,65 @@
3
3
  class HTM
4
4
  module Models
5
5
  # NodeTag model - join table for many-to-many relationship between nodes and tags
6
- class NodeTag < ActiveRecord::Base
7
- self.table_name = 'node_tags'
8
-
6
+ class NodeTag < Sequel::Model(:node_tags)
9
7
  # Associations
10
- belongs_to :node, class_name: 'HTM::Models::Node'
11
- belongs_to :tag, class_name: 'HTM::Models::Tag'
8
+ many_to_one :node, class: 'HTM::Models::Node', key: :node_id
9
+ many_to_one :tag, class: 'HTM::Models::Tag', key: :tag_id
10
+
11
+ # Plugins
12
+ plugin :validation_helpers
13
+ plugin :timestamps, update_on_create: true
12
14
 
13
15
  # Validations
14
- validates :node_id, presence: true
15
- validates :tag_id, presence: true
16
- validates :tag_id, uniqueness: { scope: :node_id, message: "already associated with this node" }
16
+ def validate
17
+ super
18
+ validates_presence [:node_id, :tag_id]
19
+ validates_unique [:node_id, :tag_id], message: "already associated with this node"
20
+ end
21
+
22
+ # Dataset methods (scopes)
23
+ dataset_module do
24
+ def active
25
+ where(deleted_at: nil)
26
+ end
27
+
28
+ def for_node(node_id)
29
+ where(node_id: node_id)
30
+ end
31
+
32
+ def for_tag(tag_id)
33
+ where(tag_id: tag_id)
34
+ end
17
35
 
18
- # Callbacks
19
- before_create :set_created_at
36
+ def recent
37
+ order(Sequel.desc(:created_at))
38
+ end
20
39
 
21
- # Scopes
22
- # Soft delete - by default, only show non-deleted entries
23
- default_scope { where(deleted_at: nil) }
40
+ def deleted
41
+ exclude(deleted_at: nil)
42
+ end
24
43
 
25
- scope :for_node, ->(node_id) { where(node_id: node_id) }
26
- scope :for_tag, ->(tag_id) { where(tag_id: tag_id) }
27
- scope :recent, -> { order(created_at: :desc) }
44
+ def with_deleted
45
+ unfiltered
46
+ end
47
+ end
48
+
49
+ # Apply default scope for active records
50
+ set_dataset(dataset.where(Sequel[:node_tags][:deleted_at] => nil))
28
51
 
29
- # Soft delete scopes
30
- scope :deleted, -> { unscoped.where.not(deleted_at: nil) }
31
- scope :with_deleted, -> { unscoped }
52
+ # Hooks
53
+ def before_create
54
+ self.created_at ||= Time.now
55
+ super
56
+ end
32
57
 
33
58
  # Soft delete - mark as deleted without removing from database
34
59
  #
35
60
  # @return [Boolean] true if soft deleted successfully
36
61
  #
37
62
  def soft_delete!
38
- update!(deleted_at: Time.current)
63
+ update(deleted_at: Time.now)
64
+ true
39
65
  end
40
66
 
41
67
  # Restore a soft-deleted entry
@@ -43,7 +69,8 @@ class HTM
43
69
  # @return [Boolean] true if restored successfully
44
70
  #
45
71
  def restore!
46
- update!(deleted_at: nil)
72
+ update(deleted_at: nil)
73
+ true
47
74
  end
48
75
 
49
76
  # Check if entry is soft-deleted
@@ -51,13 +78,7 @@ class HTM
51
78
  # @return [Boolean] true if deleted_at is set
52
79
  #
53
80
  def deleted?
54
- deleted_at.present?
55
- end
56
-
57
- private
58
-
59
- def set_created_at
60
- self.created_at ||= Time.current
81
+ !deleted_at.nil?
61
82
  end
62
83
  end
63
84
  end
@@ -8,23 +8,39 @@ class HTM
8
8
  # When a robot is deleted, only the robot_nodes links are removed; shared
9
9
  # nodes remain in the database for other robots.
10
10
  #
11
- class Robot < ActiveRecord::Base
12
- self.table_name = 'robots'
13
-
11
+ class Robot < Sequel::Model(:robots)
14
12
  # Associations - Many-to-many with nodes via robot_nodes
15
13
  # dependent: :destroy removes links only, NOT the shared nodes
16
- has_many :robot_nodes, class_name: 'HTM::Models::RobotNode', dependent: :destroy
17
- has_many :nodes, through: :robot_nodes, class_name: 'HTM::Models::Node'
14
+ one_to_many :robot_nodes, class: 'HTM::Models::RobotNode', key: :robot_id
15
+ many_to_many :nodes, class: 'HTM::Models::Node',
16
+ join_table: :robot_nodes, left_key: :robot_id, right_key: :node_id
17
+
18
+ # Plugins
19
+ plugin :validation_helpers
20
+ plugin :timestamps, update_on_create: true
18
21
 
19
22
  # Validations
20
- validates :name, presence: true
23
+ def validate
24
+ super
25
+ validates_presence :name
26
+ end
21
27
 
22
- # Callbacks
23
- before_create :set_created_at
28
+ # Dataset methods (scopes)
29
+ dataset_module do
30
+ def recent
31
+ order(Sequel.desc(:created_at))
32
+ end
24
33
 
25
- # Scopes
26
- scope :recent, -> { order(created_at: :desc) }
27
- scope :by_name, ->(name) { where(name: name) }
34
+ def by_name(name)
35
+ where(name: name)
36
+ end
37
+ end
38
+
39
+ # Hooks
40
+ def before_create
41
+ self.created_at ||= Time.now
42
+ super
43
+ end
28
44
 
29
45
  # Class methods
30
46
 
@@ -34,7 +50,7 @@ class HTM
34
50
  # @return [Robot] The found or created robot
35
51
  #
36
52
  def self.find_or_create_by_name(robot_name)
37
- find_or_create_by(name: robot_name)
53
+ find_or_create(name: robot_name)
38
54
  end
39
55
 
40
56
  # Instance methods
@@ -44,16 +60,16 @@ class HTM
44
60
  # @return [Integer] Number of nodes
45
61
  #
46
62
  def node_count
47
- nodes.count
63
+ nodes_dataset.count
48
64
  end
49
65
 
50
66
  # Get the most recent nodes for this robot
51
67
  #
52
68
  # @param limit [Integer] Maximum number of nodes to return (default: 10)
53
- # @return [ActiveRecord::Relation] Recent nodes ordered by created_at desc
69
+ # @return [Array<Node>] Recent nodes ordered by created_at desc
54
70
  #
55
71
  def recent_nodes(limit = 10)
56
- nodes.recent.limit(limit)
72
+ nodes_dataset.order(Sequel.desc(:created_at)).limit(limit).all
57
73
  end
58
74
 
59
75
  # Get nodes with their remember metadata for this robot
@@ -62,10 +78,11 @@ class HTM
62
78
  # @return [Array<Hash>] Nodes with remember_count, first/last_remembered_at
63
79
  #
64
80
  def nodes_with_metadata(limit = 10)
65
- robot_nodes
66
- .includes(:node)
67
- .order(last_remembered_at: :desc)
81
+ robot_nodes_dataset
82
+ .eager(:node)
83
+ .order(Sequel.desc(:last_remembered_at))
68
84
  .limit(limit)
85
+ .all
69
86
  .map do |rn|
70
87
  {
71
88
  node: rn.node,
@@ -85,17 +102,11 @@ class HTM
85
102
  #
86
103
  def memory_summary
87
104
  {
88
- total_nodes: nodes.count,
89
- in_working_memory: robot_nodes.in_working_memory.count,
90
- with_embeddings: nodes.with_embeddings.count
105
+ total_nodes: nodes_dataset.count,
106
+ in_working_memory: robot_nodes_dataset.where(working_memory: true).count,
107
+ with_embeddings: nodes_dataset.exclude(embedding: nil).count
91
108
  }
92
109
  end
93
-
94
- private
95
-
96
- def set_created_at
97
- self.created_at ||= Time.current
98
- end
99
110
  end
100
111
  end
101
112
  end