htm 0.0.18 → 0.0.30
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +119 -1
- data/README.md +12 -0
- data/Rakefile +104 -18
- data/db/migrate/00001_enable_extensions.rb +9 -5
- data/db/migrate/00002_create_robots.rb +18 -6
- data/db/migrate/00003_create_file_sources.rb +30 -17
- data/db/migrate/00004_create_nodes.rb +60 -48
- data/db/migrate/00005_create_tags.rb +24 -12
- data/db/migrate/00006_create_node_tags.rb +28 -13
- data/db/migrate/00007_create_robot_nodes.rb +40 -26
- data/db/schema.sql +17 -1
- data/db/seeds.rb +34 -34
- data/docs/api/embedding-service.md +140 -110
- data/docs/api/yard/HTM/ActiveRecordConfig.md +6 -0
- data/docs/api/yard/HTM/Config.md +173 -0
- data/docs/api/yard/HTM/ConfigSection.md +28 -0
- data/docs/api/yard/HTM/Database.md +1 -1
- data/docs/api/yard/HTM/Railtie.md +2 -2
- data/docs/api/yard/HTM.md +0 -57
- data/docs/api/yard/index.csv +76 -61
- data/docs/api/yard-reference.md +2 -1
- data/docs/architecture/adrs/003-ollama-embeddings.md +45 -36
- data/docs/architecture/adrs/004-hive-mind.md +1 -1
- data/docs/architecture/adrs/008-robot-identification.md +1 -1
- data/docs/architecture/index.md +11 -9
- data/docs/architecture/overview.md +11 -7
- data/docs/assets/images/balanced-strategy-decay.svg +41 -0
- data/docs/assets/images/class-hierarchy.svg +1 -1
- data/docs/assets/images/eviction-priority.svg +43 -0
- data/docs/assets/images/exception-hierarchy.svg +2 -2
- data/docs/assets/images/hive-mind-shared-memory.svg +52 -0
- data/docs/assets/images/htm-architecture-overview.svg +3 -3
- data/docs/assets/images/htm-core-components.svg +4 -4
- data/docs/assets/images/htm-layered-architecture.svg +1 -1
- data/docs/assets/images/htm-memory-addition-flow.svg +2 -2
- data/docs/assets/images/htm-memory-recall-flow.svg +2 -2
- data/docs/assets/images/memory-topology.svg +53 -0
- data/docs/assets/images/two-tier-memory-architecture.svg +55 -0
- data/docs/database/naming-convention.md +244 -0
- data/docs/database_rake_tasks.md +31 -0
- data/docs/development/rake-tasks.md +80 -35
- data/docs/development/setup.md +76 -44
- data/docs/examples/basic-usage.md +133 -0
- data/docs/examples/config-files.md +170 -0
- data/docs/examples/file-loading.md +208 -0
- data/docs/examples/index.md +116 -0
- data/docs/examples/llm-configuration.md +168 -0
- data/docs/examples/mcp-client.md +172 -0
- data/docs/examples/rails-integration.md +173 -0
- data/docs/examples/robot-groups.md +210 -0
- data/docs/examples/sinatra-integration.md +218 -0
- data/docs/examples/standalone-app.md +216 -0
- data/docs/examples/telemetry.md +224 -0
- data/docs/examples/timeframes.md +143 -0
- data/docs/getting-started/installation.md +97 -40
- data/docs/getting-started/quick-start.md +28 -11
- data/docs/guides/configuration.md +515 -0
- data/docs/guides/file-loading.md +322 -0
- data/docs/guides/getting-started.md +40 -9
- data/docs/guides/index.md +3 -3
- data/docs/guides/mcp-server.md +100 -13
- data/docs/guides/propositions.md +264 -0
- data/docs/guides/recalling-memories.md +4 -4
- data/docs/guides/search-strategies.md +3 -3
- data/docs/guides/tags.md +318 -0
- data/docs/guides/telemetry.md +229 -0
- data/docs/index.md +8 -16
- data/docs/{architecture → robots}/hive-mind.md +8 -111
- data/docs/robots/index.md +73 -0
- data/docs/{guides → robots}/multi-robot.md +3 -3
- data/docs/{guides → robots}/robot-groups.md +8 -7
- data/docs/{architecture → robots}/two-tier-memory.md +13 -149
- data/docs/robots/why-robots.md +85 -0
- data/examples/.envrc +6 -0
- data/examples/.gitignore +2 -0
- data/examples/00_create_examples_db.rb +94 -0
- data/examples/{basic_usage.rb → 01_basic_usage.rb} +12 -16
- data/examples/{custom_llm_configuration.rb → 03_custom_llm_configuration.rb} +13 -3
- data/examples/{file_loader_usage.rb → 04_file_loader_usage.rb} +11 -14
- data/examples/{timeframe_demo.rb → 05_timeframe_demo.rb} +10 -3
- data/examples/{example_app → 06_example_app}/app.rb +15 -15
- data/examples/{cli_app → 07_cli_app}/htm_cli.rb +15 -22
- data/examples/08_sinatra_app/Gemfile.lock +241 -0
- data/examples/{sinatra_app → 08_sinatra_app}/app.rb +19 -18
- data/examples/{mcp_client.rb → 09_mcp_client.rb} +5 -8
- data/examples/{telemetry → 10_telemetry}/SETUP_README.md +1 -1
- data/examples/{telemetry → 10_telemetry}/demo.rb +14 -10
- data/examples/11_robot_groups/README.md +335 -0
- data/examples/{robot_groups → 11_robot_groups/lib}/robot_worker.rb +17 -3
- data/examples/{robot_groups → 11_robot_groups}/multi_process.rb +9 -9
- data/examples/{robot_groups → 11_robot_groups}/same_process.rb +9 -12
- data/examples/{rails_app → 12_rails_app}/Gemfile +3 -0
- data/examples/{rails_app → 12_rails_app}/Gemfile.lock +87 -58
- data/examples/{rails_app → 12_rails_app}/app/controllers/dashboard_controller.rb +10 -6
- data/examples/{rails_app → 12_rails_app}/app/controllers/files_controller.rb +5 -5
- data/examples/{rails_app → 12_rails_app}/app/controllers/memories_controller.rb +11 -7
- data/examples/{rails_app → 12_rails_app}/app/controllers/robots_controller.rb +8 -8
- data/examples/12_rails_app/app/controllers/tags_controller.rb +36 -0
- data/examples/{rails_app → 12_rails_app}/app/views/dashboard/index.html.erb +2 -2
- data/examples/{rails_app → 12_rails_app}/app/views/files/new.html.erb +5 -2
- data/examples/{rails_app → 12_rails_app}/app/views/memories/_memory_card.html.erb +3 -3
- data/examples/{rails_app → 12_rails_app}/app/views/memories/deleted.html.erb +3 -3
- data/examples/{rails_app → 12_rails_app}/app/views/memories/edit.html.erb +3 -3
- data/examples/{rails_app → 12_rails_app}/app/views/memories/show.html.erb +4 -4
- data/examples/{rails_app → 12_rails_app}/app/views/robots/index.html.erb +2 -2
- data/examples/{rails_app → 12_rails_app}/app/views/robots/show.html.erb +4 -4
- data/examples/{rails_app → 12_rails_app}/app/views/search/index.html.erb +1 -1
- data/examples/{rails_app → 12_rails_app}/app/views/tags/index.html.erb +2 -2
- data/examples/{rails_app → 12_rails_app}/app/views/tags/show.html.erb +1 -1
- data/examples/12_rails_app/config/initializers/htm.rb +7 -0
- data/examples/12_rails_app/config/initializers/rack.rb +5 -0
- data/examples/README.md +230 -211
- data/examples/examples_helper.rb +138 -0
- data/lib/htm/config/builder.rb +167 -0
- data/lib/htm/config/database.rb +317 -0
- data/lib/htm/config/defaults.yml +41 -13
- data/lib/htm/config/section.rb +74 -0
- data/lib/htm/config/validator.rb +83 -0
- data/lib/htm/config.rb +65 -361
- data/lib/htm/database.rb +85 -127
- data/lib/htm/errors.rb +14 -0
- data/lib/htm/integrations/sinatra.rb +13 -44
- data/lib/htm/job_adapter.rb +75 -1
- data/lib/htm/jobs/generate_embedding_job.rb +3 -4
- data/lib/htm/jobs/generate_propositions_job.rb +4 -5
- data/lib/htm/jobs/generate_tags_job.rb +16 -15
- data/lib/htm/loaders/defaults_loader.rb +23 -0
- data/lib/htm/loaders/markdown_loader.rb +17 -15
- data/lib/htm/loaders/xdg_config_loader.rb +9 -9
- data/lib/htm/long_term_memory/fulltext_search.rb +14 -14
- data/lib/htm/long_term_memory/hybrid_search.rb +396 -229
- data/lib/htm/long_term_memory/node_operations.rb +24 -23
- data/lib/htm/long_term_memory/relevance_scorer.rb +23 -20
- data/lib/htm/long_term_memory/robot_operations.rb +4 -4
- data/lib/htm/long_term_memory/tag_operations.rb +91 -77
- data/lib/htm/long_term_memory/vector_search.rb +4 -5
- data/lib/htm/long_term_memory.rb +13 -13
- data/lib/htm/mcp/cli.rb +115 -8
- data/lib/htm/mcp/resources.rb +4 -3
- data/lib/htm/mcp/server.rb +5 -4
- data/lib/htm/mcp/tools.rb +37 -28
- data/lib/htm/migration.rb +72 -0
- data/lib/htm/models/file_source.rb +52 -31
- data/lib/htm/models/node.rb +224 -108
- data/lib/htm/models/node_tag.rb +49 -28
- data/lib/htm/models/robot.rb +38 -27
- data/lib/htm/models/robot_node.rb +63 -35
- data/lib/htm/models/tag.rb +126 -123
- data/lib/htm/observability.rb +45 -41
- data/lib/htm/proposition_service.rb +76 -7
- data/lib/htm/railtie.rb +2 -2
- data/lib/htm/robot_group.rb +30 -18
- data/lib/htm/sequel_config.rb +215 -0
- data/lib/htm/sql_builder.rb +14 -16
- data/lib/htm/tag_service.rb +78 -0
- data/lib/htm/tasks.rb +3 -0
- data/lib/htm/version.rb +1 -1
- data/lib/htm/workflows/remember_workflow.rb +213 -0
- data/lib/htm.rb +27 -22
- data/lib/tasks/db.rake +0 -2
- data/lib/tasks/doc.rake +2 -2
- data/lib/tasks/files.rake +11 -18
- data/lib/tasks/htm.rake +190 -62
- data/lib/tasks/jobs.rake +179 -54
- data/lib/tasks/tags.rake +8 -13
- data/mkdocs.yml +33 -8
- data/scripts/backfill_parent_tags.rb +376 -0
- data/scripts/normalize_plural_tags.rb +335 -0
- metadata +168 -86
- data/docs/api/yard/HTM/Configuration.md +0 -240
- data/docs/telemetry.md +0 -391
- data/examples/rails_app/app/controllers/tags_controller.rb +0 -30
- data/examples/sinatra_app/Gemfile.lock +0 -166
- data/lib/htm/active_record_config.rb +0 -104
- /data/examples/{config_file_example → 02_config_file_example}/README.md +0 -0
- /data/examples/{config_file_example → 02_config_file_example}/config/htm.local.yml +0 -0
- /data/examples/{config_file_example → 02_config_file_example}/custom_config.yml +0 -0
- /data/examples/{config_file_example → 02_config_file_example}/show_config.rb +0 -0
- /data/examples/{example_app → 06_example_app}/Rakefile +0 -0
- /data/examples/{cli_app → 07_cli_app}/README.md +0 -0
- /data/examples/{sinatra_app → 08_sinatra_app}/Gemfile +0 -0
- /data/examples/{telemetry → 10_telemetry}/README.md +0 -0
- /data/examples/{telemetry → 10_telemetry}/grafana/dashboards/htm-metrics.json +0 -0
- /data/examples/{rails_app → 12_rails_app}/.gitignore +0 -0
- /data/examples/{rails_app → 12_rails_app}/Procfile.dev +0 -0
- /data/examples/{rails_app → 12_rails_app}/README.md +0 -0
- /data/examples/{rails_app → 12_rails_app}/Rakefile +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/assets/stylesheets/application.css +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/assets/stylesheets/inter-font.css +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/controllers/application_controller.rb +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/controllers/search_controller.rb +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/javascript/application.js +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/javascript/controllers/application.js +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/javascript/controllers/index.js +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/views/files/index.html.erb +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/views/files/show.html.erb +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/views/layouts/application.html.erb +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/views/memories/index.html.erb +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/views/memories/new.html.erb +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/views/robots/new.html.erb +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/views/shared/_navbar.html.erb +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/views/shared/_stat_card.html.erb +0 -0
- /data/examples/{rails_app → 12_rails_app}/bin/dev +0 -0
- /data/examples/{rails_app → 12_rails_app}/bin/rails +0 -0
- /data/examples/{rails_app → 12_rails_app}/bin/rake +0 -0
- /data/examples/{rails_app → 12_rails_app}/config/application.rb +0 -0
- /data/examples/{rails_app → 12_rails_app}/config/boot.rb +0 -0
- /data/examples/{rails_app → 12_rails_app}/config/database.yml +0 -0
- /data/examples/{rails_app → 12_rails_app}/config/environment.rb +0 -0
- /data/examples/{rails_app → 12_rails_app}/config/importmap.rb +0 -0
- /data/examples/{rails_app → 12_rails_app}/config/routes.rb +0 -0
- /data/examples/{rails_app → 12_rails_app}/config/tailwind.config.js +0 -0
- /data/examples/{rails_app → 12_rails_app}/config.ru +0 -0
- /data/examples/{rails_app → 12_rails_app}/log/.keep +0 -0
- /data/examples/{rails_app → 12_rails_app}/tmp/local_secret.txt +0 -0
data/lib/htm/models/node.rb
CHANGED
|
@@ -9,71 +9,153 @@ class HTM
|
|
|
9
9
|
# Nodes are globally unique by content (via content_hash) and can be
|
|
10
10
|
# linked to multiple robots through the robot_nodes join table.
|
|
11
11
|
#
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
#
|
|
21
|
-
# Distance metrics: "cosine", "euclidean", "inner_product", "taxicab"
|
|
22
|
-
#
|
|
23
|
-
class Node < ActiveRecord::Base
|
|
24
|
-
self.table_name = 'nodes'
|
|
25
|
-
|
|
26
|
-
# Associations - Many-to-many with robots via robot_nodes
|
|
27
|
-
has_many :robot_nodes, class_name: 'HTM::Models::RobotNode', dependent: :destroy
|
|
28
|
-
has_many :robots, through: :robot_nodes, class_name: 'HTM::Models::Robot'
|
|
29
|
-
has_many :node_tags, class_name: 'HTM::Models::NodeTag', dependent: :destroy
|
|
30
|
-
has_many :tags, through: :node_tags, class_name: 'HTM::Models::Tag'
|
|
12
|
+
class Node < Sequel::Model(:nodes)
|
|
13
|
+
# Associations
|
|
14
|
+
one_to_many :robot_nodes, class: 'HTM::Models::RobotNode', key: :node_id
|
|
15
|
+
many_to_many :robots, class: 'HTM::Models::Robot',
|
|
16
|
+
join_table: :robot_nodes, left_key: :node_id, right_key: :robot_id
|
|
17
|
+
one_to_many :node_tags, class: 'HTM::Models::NodeTag', key: :node_id
|
|
18
|
+
many_to_many :tags, class: 'HTM::Models::Tag',
|
|
19
|
+
join_table: :node_tags, left_key: :node_id, right_key: :tag_id
|
|
31
20
|
|
|
32
21
|
# Optional source file association (for nodes loaded from files)
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
22
|
+
many_to_one :file_source, class: 'HTM::Models::FileSource', key: :source_id
|
|
23
|
+
|
|
24
|
+
# Plugins
|
|
25
|
+
plugin :validation_helpers
|
|
26
|
+
plugin :timestamps, update_on_create: true
|
|
27
|
+
|
|
28
|
+
# Override embedding getter to return Array instead of String
|
|
29
|
+
# pgvector stores as string format "[0.1,0.2,...]" and we need Array<Float>
|
|
30
|
+
def embedding
|
|
31
|
+
raw = super
|
|
32
|
+
return nil if raw.nil?
|
|
33
|
+
return raw if raw.is_a?(Array)
|
|
34
|
+
|
|
35
|
+
# Parse string format: "[0.1,0.2,0.3]"
|
|
36
|
+
if raw.is_a?(String)
|
|
37
|
+
raw.gsub(/[\[\]]/, '').split(',').map(&:to_f)
|
|
38
|
+
else
|
|
39
|
+
raw.to_a
|
|
40
|
+
end
|
|
41
|
+
end
|
|
38
42
|
|
|
39
43
|
# Validations
|
|
40
|
-
|
|
41
|
-
|
|
44
|
+
def validate
|
|
45
|
+
super
|
|
46
|
+
validates_presence [:content, :content_hash]
|
|
47
|
+
validates_unique :content_hash
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
# Dataset methods (scopes)
|
|
51
|
+
dataset_module do
|
|
52
|
+
def active
|
|
53
|
+
where(deleted_at: nil)
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
def by_robot(robot_id)
|
|
57
|
+
join(:robot_nodes, node_id: :id).where(robot_nodes__robot_id: robot_id)
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
def recent
|
|
61
|
+
order(Sequel.desc(:created_at))
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
def in_timeframe(start_time, end_time)
|
|
65
|
+
where(created_at: start_time..end_time)
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
def with_embeddings
|
|
69
|
+
exclude(embedding: nil)
|
|
70
|
+
end
|
|
42
71
|
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
before_save :update_timestamps
|
|
72
|
+
def from_source(source_id)
|
|
73
|
+
where(source_id: source_id).order(:chunk_position)
|
|
74
|
+
end
|
|
47
75
|
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
76
|
+
# Proposition scopes
|
|
77
|
+
def propositions
|
|
78
|
+
where(Sequel.lit("metadata->>'is_proposition' = 'true'"))
|
|
79
|
+
end
|
|
51
80
|
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
scope :with_embeddings, -> { where.not(embedding: nil) }
|
|
56
|
-
scope :from_source, ->(source_id) { where(source_id: source_id).order(:chunk_position) }
|
|
81
|
+
def non_propositions
|
|
82
|
+
where(Sequel.lit("metadata IS NULL OR metadata->>'is_proposition' IS NULL OR metadata->>'is_proposition' != 'true'"))
|
|
83
|
+
end
|
|
57
84
|
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
85
|
+
# Soft delete scopes
|
|
86
|
+
def deleted
|
|
87
|
+
exclude(deleted_at: nil)
|
|
88
|
+
end
|
|
61
89
|
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
90
|
+
def with_deleted
|
|
91
|
+
unfiltered
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
def deleted_before(time)
|
|
95
|
+
deleted.where { deleted_at < time }
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
# Find nearest neighbors by vector similarity
|
|
99
|
+
#
|
|
100
|
+
# @param column [Symbol] Column containing the embedding (typically :embedding)
|
|
101
|
+
# @param query_embedding [Array<Numeric>] Query vector to find neighbors for
|
|
102
|
+
# @param distance [String] Distance metric ("cosine", "euclidean", "inner_product")
|
|
103
|
+
# @return [Sequel::Dataset] Dataset ordered by distance with neighbor_distance column
|
|
104
|
+
#
|
|
105
|
+
def nearest_neighbors(column, query_embedding, distance: "cosine")
|
|
106
|
+
return where(Sequel.lit('1=0')) unless query_embedding.is_a?(Array) && query_embedding.any?
|
|
107
|
+
|
|
108
|
+
# Convert embedding to vector string format
|
|
109
|
+
vector_str = "[#{query_embedding.map(&:to_f).join(',')}]"
|
|
110
|
+
|
|
111
|
+
# Select distance operator based on metric
|
|
112
|
+
operator = case distance.to_s
|
|
113
|
+
when "cosine" then "<=>"
|
|
114
|
+
when "euclidean", "l2" then "<->"
|
|
115
|
+
when "inner_product" then "<#>"
|
|
116
|
+
else "<=>"
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
# Return dataset with distance calculation
|
|
120
|
+
select_all(:nodes)
|
|
121
|
+
.select_append(Sequel.lit("(#{column} #{operator} ?::vector) AS neighbor_distance", vector_str))
|
|
122
|
+
.exclude(column => nil)
|
|
123
|
+
.order(Sequel.lit("#{column} #{operator} ?::vector", vector_str))
|
|
124
|
+
end
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
# Apply default scope for active records
|
|
128
|
+
set_dataset(dataset.where(Sequel[:nodes][:deleted_at] => nil))
|
|
129
|
+
|
|
130
|
+
# Hooks
|
|
131
|
+
def before_validation
|
|
132
|
+
if content_hash.nil? && content
|
|
133
|
+
self.content_hash = self.class.generate_content_hash(content)
|
|
134
|
+
end
|
|
135
|
+
super
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
def before_create
|
|
139
|
+
self.created_at ||= Time.now
|
|
140
|
+
self.updated_at ||= Time.now
|
|
141
|
+
self.last_accessed ||= Time.now
|
|
142
|
+
super
|
|
143
|
+
end
|
|
144
|
+
|
|
145
|
+
def before_save
|
|
146
|
+
self.updated_at = Time.now if changed_columns.any?
|
|
147
|
+
super
|
|
148
|
+
end
|
|
66
149
|
|
|
67
150
|
# Class methods
|
|
68
151
|
|
|
69
152
|
# Permanently delete all soft-deleted nodes older than the specified time
|
|
70
153
|
#
|
|
71
|
-
# @param older_than [Time
|
|
72
|
-
# Can be a Time object or a duration like 30.days.ago
|
|
154
|
+
# @param older_than [Time] Delete nodes soft-deleted before this time
|
|
73
155
|
# @return [Integer] Number of nodes permanently deleted
|
|
74
156
|
#
|
|
75
157
|
def self.purge_deleted(older_than:)
|
|
76
|
-
|
|
158
|
+
dataset.unfiltered.where { deleted_at < older_than }.delete
|
|
77
159
|
end
|
|
78
160
|
|
|
79
161
|
# Find a node by content hash, or return nil
|
|
@@ -83,7 +165,7 @@ class HTM
|
|
|
83
165
|
#
|
|
84
166
|
def self.find_by_content(content)
|
|
85
167
|
hash = generate_content_hash(content)
|
|
86
|
-
|
|
168
|
+
first(content_hash: hash)
|
|
87
169
|
end
|
|
88
170
|
|
|
89
171
|
# Generate SHA-256 hash for content
|
|
@@ -98,66 +180,105 @@ class HTM
|
|
|
98
180
|
# Instance methods
|
|
99
181
|
|
|
100
182
|
# Find nearest neighbors to this node's embedding
|
|
183
|
+
#
|
|
101
184
|
# @param limit [Integer] number of neighbors to return (default: 10)
|
|
102
|
-
# @param distance [String] distance metric
|
|
103
|
-
# @return [
|
|
185
|
+
# @param distance [String] distance metric (default: "cosine")
|
|
186
|
+
# @return [Array<Node>] ordered by distance (closest first)
|
|
187
|
+
#
|
|
104
188
|
def nearest_neighbors(limit: 10, distance: "cosine")
|
|
105
|
-
return
|
|
189
|
+
return [] unless embedding
|
|
190
|
+
|
|
191
|
+
# Use raw SQL for vector similarity search
|
|
192
|
+
db = self.class.db
|
|
193
|
+
|
|
194
|
+
# Handle embedding - might be String or Array depending on Sequel pg extension
|
|
195
|
+
emb = embedding_array
|
|
196
|
+
return [] if emb.nil? || emb.empty?
|
|
197
|
+
|
|
198
|
+
vector_str = "[#{emb.join(',')}]"
|
|
199
|
+
|
|
200
|
+
sql = <<-SQL
|
|
201
|
+
SELECT nodes.*, (embedding <=> '#{vector_str}'::vector) AS neighbor_distance
|
|
202
|
+
FROM nodes
|
|
203
|
+
WHERE embedding IS NOT NULL
|
|
204
|
+
AND deleted_at IS NULL
|
|
205
|
+
AND id != #{id}
|
|
206
|
+
ORDER BY embedding <=> '#{vector_str}'::vector
|
|
207
|
+
LIMIT #{limit}
|
|
208
|
+
SQL
|
|
209
|
+
|
|
210
|
+
# Use call() to create instances from raw hashes without mass assignment restrictions
|
|
211
|
+
db.fetch(sql).all.map do |row|
|
|
212
|
+
node = self.class.call(row)
|
|
213
|
+
# Store neighbor_distance as an instance variable
|
|
214
|
+
node.instance_variable_set(:@neighbor_distance, row[:neighbor_distance])
|
|
215
|
+
node
|
|
216
|
+
end
|
|
217
|
+
end
|
|
106
218
|
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
219
|
+
# Accessor for neighbor_distance from nearest_neighbors query
|
|
220
|
+
# Works with both:
|
|
221
|
+
# - Instance method (stores in @neighbor_distance)
|
|
222
|
+
# - Dataset method (stores in values hash from SELECT)
|
|
223
|
+
def neighbor_distance
|
|
224
|
+
@neighbor_distance || values[:neighbor_distance]
|
|
225
|
+
end
|
|
226
|
+
|
|
227
|
+
# Get embedding as an Array (handles both String and Array storage)
|
|
228
|
+
# Note: The `embedding` getter already returns Array, this is an alias for compatibility
|
|
229
|
+
#
|
|
230
|
+
# @return [Array<Float>, nil] The embedding vector as an array
|
|
231
|
+
#
|
|
232
|
+
def embedding_array
|
|
233
|
+
embedding
|
|
111
234
|
end
|
|
112
235
|
|
|
113
236
|
# Calculate cosine similarity to another embedding or node
|
|
237
|
+
#
|
|
114
238
|
# @param other [Array, Node] query embedding vector or another Node
|
|
115
239
|
# @return [Float] similarity score (0.0 to 1.0, higher is more similar)
|
|
240
|
+
#
|
|
116
241
|
def similarity_to(other)
|
|
117
|
-
query_embedding = other.is_a?(Node) ? other.
|
|
118
|
-
return nil unless
|
|
242
|
+
query_embedding = other.is_a?(Node) ? other.embedding_array : other
|
|
243
|
+
return nil unless embedding_array && query_embedding
|
|
244
|
+
|
|
245
|
+
# Handle query_embedding that might be a String
|
|
246
|
+
if query_embedding.is_a?(String)
|
|
247
|
+
query_embedding = query_embedding.gsub(/[\[\]]/, '').split(',').map(&:to_f)
|
|
248
|
+
end
|
|
119
249
|
|
|
120
|
-
# Validate embedding is an array of finite numeric values
|
|
121
250
|
unless query_embedding.is_a?(Array) && query_embedding.all? { |v| v.is_a?(Numeric) && v.finite? }
|
|
122
251
|
return nil
|
|
123
252
|
end
|
|
124
253
|
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
254
|
+
vector_str = "[#{query_embedding.map(&:to_f).join(',')}]"
|
|
255
|
+
|
|
256
|
+
result = self.class.db.fetch(
|
|
257
|
+
"SELECT 1 - (embedding <=> ?::vector) AS similarity FROM nodes WHERE id = ?",
|
|
258
|
+
vector_str, id
|
|
259
|
+
).first
|
|
131
260
|
|
|
132
|
-
result
|
|
133
|
-
"SELECT 1 - (embedding <=> #{quoted_vector}::vector) FROM nodes WHERE id = #{quoted_id}"
|
|
134
|
-
)
|
|
135
|
-
result&.to_f
|
|
261
|
+
result&.[](:similarity)&.to_f
|
|
136
262
|
end
|
|
137
263
|
|
|
138
264
|
# Get all tag names associated with this node
|
|
139
265
|
#
|
|
140
|
-
# @return [Array<String>] Array of hierarchical tag names
|
|
266
|
+
# @return [Array<String>] Array of hierarchical tag names
|
|
141
267
|
#
|
|
142
268
|
def tag_names
|
|
143
|
-
|
|
269
|
+
tags_dataset.select_map(:name)
|
|
144
270
|
end
|
|
145
271
|
|
|
146
|
-
# Add tags to this node (creates tags if they don't exist)
|
|
272
|
+
# Add tags to this node (creates tags and all parent tags if they don't exist)
|
|
147
273
|
#
|
|
148
274
|
# @param tag_names [Array<String>, String] Tag name(s) to add
|
|
149
275
|
# @return [void]
|
|
150
276
|
#
|
|
151
|
-
# @example Add a single tag
|
|
152
|
-
# node.add_tags("database:postgresql")
|
|
153
|
-
#
|
|
154
|
-
# @example Add multiple tags
|
|
155
|
-
# node.add_tags(["database:postgresql", "ai:embeddings"])
|
|
156
|
-
#
|
|
157
277
|
def add_tags(tag_names)
|
|
158
278
|
Array(tag_names).each do |tag_name|
|
|
159
|
-
|
|
160
|
-
|
|
279
|
+
HTM::Models::Tag.find_or_create_with_ancestors(tag_name).each do |tag|
|
|
280
|
+
HTM::Models::NodeTag.find_or_create(node_id: id, tag_id: tag.id)
|
|
281
|
+
end
|
|
161
282
|
end
|
|
162
283
|
end
|
|
163
284
|
|
|
@@ -167,45 +288,47 @@ class HTM
|
|
|
167
288
|
# @return [void]
|
|
168
289
|
#
|
|
169
290
|
def remove_tag(tag_name)
|
|
170
|
-
tag = HTM::Models::Tag.
|
|
291
|
+
tag = HTM::Models::Tag.first(name: tag_name)
|
|
171
292
|
return unless tag
|
|
172
293
|
|
|
173
|
-
|
|
294
|
+
node_tags_dataset.where(tag_id: tag.id).delete
|
|
174
295
|
end
|
|
175
296
|
|
|
176
297
|
# Soft delete - mark node as deleted without removing from database
|
|
177
|
-
# Also cascades soft delete to associated robot_nodes and node_tags
|
|
178
298
|
#
|
|
179
299
|
# @return [Boolean] true if soft deleted successfully
|
|
180
300
|
#
|
|
181
301
|
def soft_delete!
|
|
182
|
-
transaction do
|
|
183
|
-
now = Time.
|
|
184
|
-
update
|
|
302
|
+
db.transaction do
|
|
303
|
+
now = Time.now
|
|
304
|
+
update(deleted_at: now)
|
|
185
305
|
|
|
186
306
|
# Cascade soft delete to associated robot_nodes
|
|
187
|
-
|
|
307
|
+
HTM::Models::RobotNode.where(node_id: id).update(deleted_at: now)
|
|
188
308
|
|
|
189
309
|
# Cascade soft delete to associated node_tags
|
|
190
|
-
|
|
310
|
+
HTM::Models::NodeTag.where(node_id: id).update(deleted_at: now)
|
|
191
311
|
end
|
|
192
312
|
true
|
|
193
313
|
end
|
|
194
314
|
|
|
195
315
|
# Restore a soft-deleted node
|
|
196
|
-
# Also cascades restoration to associated robot_nodes and node_tags
|
|
197
316
|
#
|
|
198
317
|
# @return [Boolean] true if restored successfully
|
|
199
318
|
#
|
|
200
319
|
def restore!
|
|
201
|
-
transaction do
|
|
202
|
-
|
|
320
|
+
db.transaction do
|
|
321
|
+
# Use unfiltered dataset to bypass the default scope that excludes deleted records
|
|
322
|
+
self.class.dataset.unfiltered.where(id: id).update(deleted_at: nil)
|
|
203
323
|
|
|
204
324
|
# Cascade restoration to associated robot_nodes
|
|
205
|
-
HTM::Models::RobotNode.
|
|
325
|
+
HTM::Models::RobotNode.dataset.unfiltered.where(node_id: id).update(deleted_at: nil)
|
|
206
326
|
|
|
207
327
|
# Cascade restoration to associated node_tags
|
|
208
|
-
HTM::Models::NodeTag.
|
|
328
|
+
HTM::Models::NodeTag.dataset.unfiltered.where(node_id: id).update(deleted_at: nil)
|
|
329
|
+
|
|
330
|
+
# Refresh this instance to reflect the change
|
|
331
|
+
self.deleted_at = nil
|
|
209
332
|
end
|
|
210
333
|
true
|
|
211
334
|
end
|
|
@@ -215,7 +338,7 @@ class HTM
|
|
|
215
338
|
# @return [Boolean] true if deleted_at is set
|
|
216
339
|
#
|
|
217
340
|
def deleted?
|
|
218
|
-
deleted_at.
|
|
341
|
+
!deleted_at.nil?
|
|
219
342
|
end
|
|
220
343
|
|
|
221
344
|
# Check if node is a proposition (extracted atomic fact)
|
|
@@ -226,21 +349,14 @@ class HTM
|
|
|
226
349
|
metadata&.dig('is_proposition') == true
|
|
227
350
|
end
|
|
228
351
|
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
def set_defaults
|
|
236
|
-
self.created_at ||= Time.current
|
|
237
|
-
self.updated_at ||= Time.current
|
|
238
|
-
self.last_accessed ||= Time.current
|
|
239
|
-
end
|
|
240
|
-
|
|
241
|
-
def update_timestamps
|
|
242
|
-
self.updated_at = Time.current if changed?
|
|
352
|
+
# Convert to hash (for compatibility with existing code)
|
|
353
|
+
#
|
|
354
|
+
# @return [Hash] Hash representation of the node
|
|
355
|
+
#
|
|
356
|
+
def to_hash
|
|
357
|
+
values.transform_keys(&:to_s)
|
|
243
358
|
end
|
|
359
|
+
alias_method :attributes, :to_hash
|
|
244
360
|
end
|
|
245
361
|
end
|
|
246
362
|
end
|
data/lib/htm/models/node_tag.rb
CHANGED
|
@@ -3,39 +3,65 @@
|
|
|
3
3
|
class HTM
|
|
4
4
|
module Models
|
|
5
5
|
# NodeTag model - join table for many-to-many relationship between nodes and tags
|
|
6
|
-
class NodeTag <
|
|
7
|
-
self.table_name = 'node_tags'
|
|
8
|
-
|
|
6
|
+
class NodeTag < Sequel::Model(:node_tags)
|
|
9
7
|
# Associations
|
|
10
|
-
|
|
11
|
-
|
|
8
|
+
many_to_one :node, class: 'HTM::Models::Node', key: :node_id
|
|
9
|
+
many_to_one :tag, class: 'HTM::Models::Tag', key: :tag_id
|
|
10
|
+
|
|
11
|
+
# Plugins
|
|
12
|
+
plugin :validation_helpers
|
|
13
|
+
plugin :timestamps, update_on_create: true
|
|
12
14
|
|
|
13
15
|
# Validations
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
16
|
+
def validate
|
|
17
|
+
super
|
|
18
|
+
validates_presence [:node_id, :tag_id]
|
|
19
|
+
validates_unique [:node_id, :tag_id], message: "already associated with this node"
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
# Dataset methods (scopes)
|
|
23
|
+
dataset_module do
|
|
24
|
+
def active
|
|
25
|
+
where(deleted_at: nil)
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def for_node(node_id)
|
|
29
|
+
where(node_id: node_id)
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
def for_tag(tag_id)
|
|
33
|
+
where(tag_id: tag_id)
|
|
34
|
+
end
|
|
17
35
|
|
|
18
|
-
|
|
19
|
-
|
|
36
|
+
def recent
|
|
37
|
+
order(Sequel.desc(:created_at))
|
|
38
|
+
end
|
|
20
39
|
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
40
|
+
def deleted
|
|
41
|
+
exclude(deleted_at: nil)
|
|
42
|
+
end
|
|
24
43
|
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
44
|
+
def with_deleted
|
|
45
|
+
unfiltered
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
# Apply default scope for active records
|
|
50
|
+
set_dataset(dataset.where(Sequel[:node_tags][:deleted_at] => nil))
|
|
28
51
|
|
|
29
|
-
#
|
|
30
|
-
|
|
31
|
-
|
|
52
|
+
# Hooks
|
|
53
|
+
def before_create
|
|
54
|
+
self.created_at ||= Time.now
|
|
55
|
+
super
|
|
56
|
+
end
|
|
32
57
|
|
|
33
58
|
# Soft delete - mark as deleted without removing from database
|
|
34
59
|
#
|
|
35
60
|
# @return [Boolean] true if soft deleted successfully
|
|
36
61
|
#
|
|
37
62
|
def soft_delete!
|
|
38
|
-
update
|
|
63
|
+
update(deleted_at: Time.now)
|
|
64
|
+
true
|
|
39
65
|
end
|
|
40
66
|
|
|
41
67
|
# Restore a soft-deleted entry
|
|
@@ -43,7 +69,8 @@ class HTM
|
|
|
43
69
|
# @return [Boolean] true if restored successfully
|
|
44
70
|
#
|
|
45
71
|
def restore!
|
|
46
|
-
update
|
|
72
|
+
update(deleted_at: nil)
|
|
73
|
+
true
|
|
47
74
|
end
|
|
48
75
|
|
|
49
76
|
# Check if entry is soft-deleted
|
|
@@ -51,13 +78,7 @@ class HTM
|
|
|
51
78
|
# @return [Boolean] true if deleted_at is set
|
|
52
79
|
#
|
|
53
80
|
def deleted?
|
|
54
|
-
deleted_at.
|
|
55
|
-
end
|
|
56
|
-
|
|
57
|
-
private
|
|
58
|
-
|
|
59
|
-
def set_created_at
|
|
60
|
-
self.created_at ||= Time.current
|
|
81
|
+
!deleted_at.nil?
|
|
61
82
|
end
|
|
62
83
|
end
|
|
63
84
|
end
|
data/lib/htm/models/robot.rb
CHANGED
|
@@ -8,23 +8,39 @@ class HTM
|
|
|
8
8
|
# When a robot is deleted, only the robot_nodes links are removed; shared
|
|
9
9
|
# nodes remain in the database for other robots.
|
|
10
10
|
#
|
|
11
|
-
class Robot <
|
|
12
|
-
self.table_name = 'robots'
|
|
13
|
-
|
|
11
|
+
class Robot < Sequel::Model(:robots)
|
|
14
12
|
# Associations - Many-to-many with nodes via robot_nodes
|
|
15
13
|
# dependent: :destroy removes links only, NOT the shared nodes
|
|
16
|
-
|
|
17
|
-
|
|
14
|
+
one_to_many :robot_nodes, class: 'HTM::Models::RobotNode', key: :robot_id
|
|
15
|
+
many_to_many :nodes, class: 'HTM::Models::Node',
|
|
16
|
+
join_table: :robot_nodes, left_key: :robot_id, right_key: :node_id
|
|
17
|
+
|
|
18
|
+
# Plugins
|
|
19
|
+
plugin :validation_helpers
|
|
20
|
+
plugin :timestamps, update_on_create: true
|
|
18
21
|
|
|
19
22
|
# Validations
|
|
20
|
-
|
|
23
|
+
def validate
|
|
24
|
+
super
|
|
25
|
+
validates_presence :name
|
|
26
|
+
end
|
|
21
27
|
|
|
22
|
-
#
|
|
23
|
-
|
|
28
|
+
# Dataset methods (scopes)
|
|
29
|
+
dataset_module do
|
|
30
|
+
def recent
|
|
31
|
+
order(Sequel.desc(:created_at))
|
|
32
|
+
end
|
|
24
33
|
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
34
|
+
def by_name(name)
|
|
35
|
+
where(name: name)
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
# Hooks
|
|
40
|
+
def before_create
|
|
41
|
+
self.created_at ||= Time.now
|
|
42
|
+
super
|
|
43
|
+
end
|
|
28
44
|
|
|
29
45
|
# Class methods
|
|
30
46
|
|
|
@@ -34,7 +50,7 @@ class HTM
|
|
|
34
50
|
# @return [Robot] The found or created robot
|
|
35
51
|
#
|
|
36
52
|
def self.find_or_create_by_name(robot_name)
|
|
37
|
-
|
|
53
|
+
find_or_create(name: robot_name)
|
|
38
54
|
end
|
|
39
55
|
|
|
40
56
|
# Instance methods
|
|
@@ -44,16 +60,16 @@ class HTM
|
|
|
44
60
|
# @return [Integer] Number of nodes
|
|
45
61
|
#
|
|
46
62
|
def node_count
|
|
47
|
-
|
|
63
|
+
nodes_dataset.count
|
|
48
64
|
end
|
|
49
65
|
|
|
50
66
|
# Get the most recent nodes for this robot
|
|
51
67
|
#
|
|
52
68
|
# @param limit [Integer] Maximum number of nodes to return (default: 10)
|
|
53
|
-
# @return [
|
|
69
|
+
# @return [Array<Node>] Recent nodes ordered by created_at desc
|
|
54
70
|
#
|
|
55
71
|
def recent_nodes(limit = 10)
|
|
56
|
-
|
|
72
|
+
nodes_dataset.order(Sequel.desc(:created_at)).limit(limit).all
|
|
57
73
|
end
|
|
58
74
|
|
|
59
75
|
# Get nodes with their remember metadata for this robot
|
|
@@ -62,10 +78,11 @@ class HTM
|
|
|
62
78
|
# @return [Array<Hash>] Nodes with remember_count, first/last_remembered_at
|
|
63
79
|
#
|
|
64
80
|
def nodes_with_metadata(limit = 10)
|
|
65
|
-
|
|
66
|
-
.
|
|
67
|
-
.order(last_remembered_at
|
|
81
|
+
robot_nodes_dataset
|
|
82
|
+
.eager(:node)
|
|
83
|
+
.order(Sequel.desc(:last_remembered_at))
|
|
68
84
|
.limit(limit)
|
|
85
|
+
.all
|
|
69
86
|
.map do |rn|
|
|
70
87
|
{
|
|
71
88
|
node: rn.node,
|
|
@@ -85,17 +102,11 @@ class HTM
|
|
|
85
102
|
#
|
|
86
103
|
def memory_summary
|
|
87
104
|
{
|
|
88
|
-
total_nodes:
|
|
89
|
-
in_working_memory:
|
|
90
|
-
with_embeddings:
|
|
105
|
+
total_nodes: nodes_dataset.count,
|
|
106
|
+
in_working_memory: robot_nodes_dataset.where(working_memory: true).count,
|
|
107
|
+
with_embeddings: nodes_dataset.exclude(embedding: nil).count
|
|
91
108
|
}
|
|
92
109
|
end
|
|
93
|
-
|
|
94
|
-
private
|
|
95
|
-
|
|
96
|
-
def set_created_at
|
|
97
|
-
self.created_at ||= Time.current
|
|
98
|
-
end
|
|
99
110
|
end
|
|
100
111
|
end
|
|
101
112
|
end
|