htm 0.0.18 → 0.0.30
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +119 -1
- data/README.md +12 -0
- data/Rakefile +104 -18
- data/db/migrate/00001_enable_extensions.rb +9 -5
- data/db/migrate/00002_create_robots.rb +18 -6
- data/db/migrate/00003_create_file_sources.rb +30 -17
- data/db/migrate/00004_create_nodes.rb +60 -48
- data/db/migrate/00005_create_tags.rb +24 -12
- data/db/migrate/00006_create_node_tags.rb +28 -13
- data/db/migrate/00007_create_robot_nodes.rb +40 -26
- data/db/schema.sql +17 -1
- data/db/seeds.rb +34 -34
- data/docs/api/embedding-service.md +140 -110
- data/docs/api/yard/HTM/ActiveRecordConfig.md +6 -0
- data/docs/api/yard/HTM/Config.md +173 -0
- data/docs/api/yard/HTM/ConfigSection.md +28 -0
- data/docs/api/yard/HTM/Database.md +1 -1
- data/docs/api/yard/HTM/Railtie.md +2 -2
- data/docs/api/yard/HTM.md +0 -57
- data/docs/api/yard/index.csv +76 -61
- data/docs/api/yard-reference.md +2 -1
- data/docs/architecture/adrs/003-ollama-embeddings.md +45 -36
- data/docs/architecture/adrs/004-hive-mind.md +1 -1
- data/docs/architecture/adrs/008-robot-identification.md +1 -1
- data/docs/architecture/index.md +11 -9
- data/docs/architecture/overview.md +11 -7
- data/docs/assets/images/balanced-strategy-decay.svg +41 -0
- data/docs/assets/images/class-hierarchy.svg +1 -1
- data/docs/assets/images/eviction-priority.svg +43 -0
- data/docs/assets/images/exception-hierarchy.svg +2 -2
- data/docs/assets/images/hive-mind-shared-memory.svg +52 -0
- data/docs/assets/images/htm-architecture-overview.svg +3 -3
- data/docs/assets/images/htm-core-components.svg +4 -4
- data/docs/assets/images/htm-layered-architecture.svg +1 -1
- data/docs/assets/images/htm-memory-addition-flow.svg +2 -2
- data/docs/assets/images/htm-memory-recall-flow.svg +2 -2
- data/docs/assets/images/memory-topology.svg +53 -0
- data/docs/assets/images/two-tier-memory-architecture.svg +55 -0
- data/docs/database/naming-convention.md +244 -0
- data/docs/database_rake_tasks.md +31 -0
- data/docs/development/rake-tasks.md +80 -35
- data/docs/development/setup.md +76 -44
- data/docs/examples/basic-usage.md +133 -0
- data/docs/examples/config-files.md +170 -0
- data/docs/examples/file-loading.md +208 -0
- data/docs/examples/index.md +116 -0
- data/docs/examples/llm-configuration.md +168 -0
- data/docs/examples/mcp-client.md +172 -0
- data/docs/examples/rails-integration.md +173 -0
- data/docs/examples/robot-groups.md +210 -0
- data/docs/examples/sinatra-integration.md +218 -0
- data/docs/examples/standalone-app.md +216 -0
- data/docs/examples/telemetry.md +224 -0
- data/docs/examples/timeframes.md +143 -0
- data/docs/getting-started/installation.md +97 -40
- data/docs/getting-started/quick-start.md +28 -11
- data/docs/guides/configuration.md +515 -0
- data/docs/guides/file-loading.md +322 -0
- data/docs/guides/getting-started.md +40 -9
- data/docs/guides/index.md +3 -3
- data/docs/guides/mcp-server.md +100 -13
- data/docs/guides/propositions.md +264 -0
- data/docs/guides/recalling-memories.md +4 -4
- data/docs/guides/search-strategies.md +3 -3
- data/docs/guides/tags.md +318 -0
- data/docs/guides/telemetry.md +229 -0
- data/docs/index.md +8 -16
- data/docs/{architecture → robots}/hive-mind.md +8 -111
- data/docs/robots/index.md +73 -0
- data/docs/{guides → robots}/multi-robot.md +3 -3
- data/docs/{guides → robots}/robot-groups.md +8 -7
- data/docs/{architecture → robots}/two-tier-memory.md +13 -149
- data/docs/robots/why-robots.md +85 -0
- data/examples/.envrc +6 -0
- data/examples/.gitignore +2 -0
- data/examples/00_create_examples_db.rb +94 -0
- data/examples/{basic_usage.rb → 01_basic_usage.rb} +12 -16
- data/examples/{custom_llm_configuration.rb → 03_custom_llm_configuration.rb} +13 -3
- data/examples/{file_loader_usage.rb → 04_file_loader_usage.rb} +11 -14
- data/examples/{timeframe_demo.rb → 05_timeframe_demo.rb} +10 -3
- data/examples/{example_app → 06_example_app}/app.rb +15 -15
- data/examples/{cli_app → 07_cli_app}/htm_cli.rb +15 -22
- data/examples/08_sinatra_app/Gemfile.lock +241 -0
- data/examples/{sinatra_app → 08_sinatra_app}/app.rb +19 -18
- data/examples/{mcp_client.rb → 09_mcp_client.rb} +5 -8
- data/examples/{telemetry → 10_telemetry}/SETUP_README.md +1 -1
- data/examples/{telemetry → 10_telemetry}/demo.rb +14 -10
- data/examples/11_robot_groups/README.md +335 -0
- data/examples/{robot_groups → 11_robot_groups/lib}/robot_worker.rb +17 -3
- data/examples/{robot_groups → 11_robot_groups}/multi_process.rb +9 -9
- data/examples/{robot_groups → 11_robot_groups}/same_process.rb +9 -12
- data/examples/{rails_app → 12_rails_app}/Gemfile +3 -0
- data/examples/{rails_app → 12_rails_app}/Gemfile.lock +87 -58
- data/examples/{rails_app → 12_rails_app}/app/controllers/dashboard_controller.rb +10 -6
- data/examples/{rails_app → 12_rails_app}/app/controllers/files_controller.rb +5 -5
- data/examples/{rails_app → 12_rails_app}/app/controllers/memories_controller.rb +11 -7
- data/examples/{rails_app → 12_rails_app}/app/controllers/robots_controller.rb +8 -8
- data/examples/12_rails_app/app/controllers/tags_controller.rb +36 -0
- data/examples/{rails_app → 12_rails_app}/app/views/dashboard/index.html.erb +2 -2
- data/examples/{rails_app → 12_rails_app}/app/views/files/new.html.erb +5 -2
- data/examples/{rails_app → 12_rails_app}/app/views/memories/_memory_card.html.erb +3 -3
- data/examples/{rails_app → 12_rails_app}/app/views/memories/deleted.html.erb +3 -3
- data/examples/{rails_app → 12_rails_app}/app/views/memories/edit.html.erb +3 -3
- data/examples/{rails_app → 12_rails_app}/app/views/memories/show.html.erb +4 -4
- data/examples/{rails_app → 12_rails_app}/app/views/robots/index.html.erb +2 -2
- data/examples/{rails_app → 12_rails_app}/app/views/robots/show.html.erb +4 -4
- data/examples/{rails_app → 12_rails_app}/app/views/search/index.html.erb +1 -1
- data/examples/{rails_app → 12_rails_app}/app/views/tags/index.html.erb +2 -2
- data/examples/{rails_app → 12_rails_app}/app/views/tags/show.html.erb +1 -1
- data/examples/12_rails_app/config/initializers/htm.rb +7 -0
- data/examples/12_rails_app/config/initializers/rack.rb +5 -0
- data/examples/README.md +230 -211
- data/examples/examples_helper.rb +138 -0
- data/lib/htm/config/builder.rb +167 -0
- data/lib/htm/config/database.rb +317 -0
- data/lib/htm/config/defaults.yml +41 -13
- data/lib/htm/config/section.rb +74 -0
- data/lib/htm/config/validator.rb +83 -0
- data/lib/htm/config.rb +65 -361
- data/lib/htm/database.rb +85 -127
- data/lib/htm/errors.rb +14 -0
- data/lib/htm/integrations/sinatra.rb +13 -44
- data/lib/htm/job_adapter.rb +75 -1
- data/lib/htm/jobs/generate_embedding_job.rb +3 -4
- data/lib/htm/jobs/generate_propositions_job.rb +4 -5
- data/lib/htm/jobs/generate_tags_job.rb +16 -15
- data/lib/htm/loaders/defaults_loader.rb +23 -0
- data/lib/htm/loaders/markdown_loader.rb +17 -15
- data/lib/htm/loaders/xdg_config_loader.rb +9 -9
- data/lib/htm/long_term_memory/fulltext_search.rb +14 -14
- data/lib/htm/long_term_memory/hybrid_search.rb +396 -229
- data/lib/htm/long_term_memory/node_operations.rb +24 -23
- data/lib/htm/long_term_memory/relevance_scorer.rb +23 -20
- data/lib/htm/long_term_memory/robot_operations.rb +4 -4
- data/lib/htm/long_term_memory/tag_operations.rb +91 -77
- data/lib/htm/long_term_memory/vector_search.rb +4 -5
- data/lib/htm/long_term_memory.rb +13 -13
- data/lib/htm/mcp/cli.rb +115 -8
- data/lib/htm/mcp/resources.rb +4 -3
- data/lib/htm/mcp/server.rb +5 -4
- data/lib/htm/mcp/tools.rb +37 -28
- data/lib/htm/migration.rb +72 -0
- data/lib/htm/models/file_source.rb +52 -31
- data/lib/htm/models/node.rb +224 -108
- data/lib/htm/models/node_tag.rb +49 -28
- data/lib/htm/models/robot.rb +38 -27
- data/lib/htm/models/robot_node.rb +63 -35
- data/lib/htm/models/tag.rb +126 -123
- data/lib/htm/observability.rb +45 -41
- data/lib/htm/proposition_service.rb +76 -7
- data/lib/htm/railtie.rb +2 -2
- data/lib/htm/robot_group.rb +30 -18
- data/lib/htm/sequel_config.rb +215 -0
- data/lib/htm/sql_builder.rb +14 -16
- data/lib/htm/tag_service.rb +78 -0
- data/lib/htm/tasks.rb +3 -0
- data/lib/htm/version.rb +1 -1
- data/lib/htm/workflows/remember_workflow.rb +213 -0
- data/lib/htm.rb +27 -22
- data/lib/tasks/db.rake +0 -2
- data/lib/tasks/doc.rake +2 -2
- data/lib/tasks/files.rake +11 -18
- data/lib/tasks/htm.rake +190 -62
- data/lib/tasks/jobs.rake +179 -54
- data/lib/tasks/tags.rake +8 -13
- data/mkdocs.yml +33 -8
- data/scripts/backfill_parent_tags.rb +376 -0
- data/scripts/normalize_plural_tags.rb +335 -0
- metadata +168 -86
- data/docs/api/yard/HTM/Configuration.md +0 -240
- data/docs/telemetry.md +0 -391
- data/examples/rails_app/app/controllers/tags_controller.rb +0 -30
- data/examples/sinatra_app/Gemfile.lock +0 -166
- data/lib/htm/active_record_config.rb +0 -104
- /data/examples/{config_file_example → 02_config_file_example}/README.md +0 -0
- /data/examples/{config_file_example → 02_config_file_example}/config/htm.local.yml +0 -0
- /data/examples/{config_file_example → 02_config_file_example}/custom_config.yml +0 -0
- /data/examples/{config_file_example → 02_config_file_example}/show_config.rb +0 -0
- /data/examples/{example_app → 06_example_app}/Rakefile +0 -0
- /data/examples/{cli_app → 07_cli_app}/README.md +0 -0
- /data/examples/{sinatra_app → 08_sinatra_app}/Gemfile +0 -0
- /data/examples/{telemetry → 10_telemetry}/README.md +0 -0
- /data/examples/{telemetry → 10_telemetry}/grafana/dashboards/htm-metrics.json +0 -0
- /data/examples/{rails_app → 12_rails_app}/.gitignore +0 -0
- /data/examples/{rails_app → 12_rails_app}/Procfile.dev +0 -0
- /data/examples/{rails_app → 12_rails_app}/README.md +0 -0
- /data/examples/{rails_app → 12_rails_app}/Rakefile +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/assets/stylesheets/application.css +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/assets/stylesheets/inter-font.css +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/controllers/application_controller.rb +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/controllers/search_controller.rb +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/javascript/application.js +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/javascript/controllers/application.js +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/javascript/controllers/index.js +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/views/files/index.html.erb +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/views/files/show.html.erb +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/views/layouts/application.html.erb +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/views/memories/index.html.erb +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/views/memories/new.html.erb +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/views/robots/new.html.erb +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/views/shared/_navbar.html.erb +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/views/shared/_stat_card.html.erb +0 -0
- /data/examples/{rails_app → 12_rails_app}/bin/dev +0 -0
- /data/examples/{rails_app → 12_rails_app}/bin/rails +0 -0
- /data/examples/{rails_app → 12_rails_app}/bin/rake +0 -0
- /data/examples/{rails_app → 12_rails_app}/config/application.rb +0 -0
- /data/examples/{rails_app → 12_rails_app}/config/boot.rb +0 -0
- /data/examples/{rails_app → 12_rails_app}/config/database.yml +0 -0
- /data/examples/{rails_app → 12_rails_app}/config/environment.rb +0 -0
- /data/examples/{rails_app → 12_rails_app}/config/importmap.rb +0 -0
- /data/examples/{rails_app → 12_rails_app}/config/routes.rb +0 -0
- /data/examples/{rails_app → 12_rails_app}/config/tailwind.config.js +0 -0
- /data/examples/{rails_app → 12_rails_app}/config.ru +0 -0
- /data/examples/{rails_app → 12_rails_app}/log/.keep +0 -0
- /data/examples/{rails_app → 12_rails_app}/tmp/local_secret.txt +0 -0
data/lib/htm/sql_builder.rb
CHANGED
|
@@ -105,29 +105,28 @@ class HTM
|
|
|
105
105
|
|
|
106
106
|
prefix = table_alias ? "#{table_alias}." : ""
|
|
107
107
|
full_column = "#{prefix}#{column}"
|
|
108
|
-
conn = ActiveRecord::Base.connection
|
|
109
108
|
|
|
110
109
|
case timeframe
|
|
111
110
|
when Range
|
|
112
|
-
begin_quoted =
|
|
113
|
-
end_quoted =
|
|
111
|
+
begin_quoted = HTM.db.literal(timeframe.begin.iso8601)
|
|
112
|
+
end_quoted = HTM.db.literal(timeframe.end.iso8601)
|
|
114
113
|
"(#{full_column} BETWEEN #{begin_quoted} AND #{end_quoted})"
|
|
115
114
|
when Array
|
|
116
115
|
conditions = timeframe.map do |range|
|
|
117
|
-
begin_quoted =
|
|
118
|
-
end_quoted =
|
|
116
|
+
begin_quoted = HTM.db.literal(range.begin.iso8601)
|
|
117
|
+
end_quoted = HTM.db.literal(range.end.iso8601)
|
|
119
118
|
"(#{full_column} BETWEEN #{begin_quoted} AND #{end_quoted})"
|
|
120
119
|
end
|
|
121
120
|
"(#{conditions.join(' OR ')})"
|
|
122
121
|
end
|
|
123
122
|
end
|
|
124
123
|
|
|
125
|
-
# Apply timeframe filter to
|
|
124
|
+
# Apply timeframe filter to Sequel dataset
|
|
126
125
|
#
|
|
127
|
-
# @param scope [
|
|
126
|
+
# @param scope [Sequel::Dataset] Base dataset
|
|
128
127
|
# @param timeframe [nil, Range, Array<Range>] Time range(s)
|
|
129
128
|
# @param column [Symbol] Column name (default: :created_at)
|
|
130
|
-
# @return [
|
|
129
|
+
# @return [Sequel::Dataset] Filtered dataset
|
|
131
130
|
#
|
|
132
131
|
def apply_timeframe(scope, timeframe, column: :created_at)
|
|
133
132
|
return scope if timeframe.nil?
|
|
@@ -136,8 +135,8 @@ class HTM
|
|
|
136
135
|
when Range
|
|
137
136
|
scope.where(column => timeframe)
|
|
138
137
|
when Array
|
|
139
|
-
conditions = timeframe.map { |range|
|
|
140
|
-
|
|
138
|
+
conditions = timeframe.map { |range| Sequel.expr(column => range) }
|
|
139
|
+
scope.where(Sequel.|(*conditions))
|
|
141
140
|
else
|
|
142
141
|
scope
|
|
143
142
|
end
|
|
@@ -155,23 +154,22 @@ class HTM
|
|
|
155
154
|
|
|
156
155
|
prefix = table_alias ? "#{table_alias}." : ""
|
|
157
156
|
full_column = "#{prefix}#{column}"
|
|
158
|
-
conn = ActiveRecord::Base.connection
|
|
159
157
|
|
|
160
|
-
quoted_metadata =
|
|
158
|
+
quoted_metadata = HTM.db.literal(metadata.to_json)
|
|
161
159
|
"(#{full_column} @> #{quoted_metadata}::jsonb)"
|
|
162
160
|
end
|
|
163
161
|
|
|
164
|
-
# Apply metadata filter to
|
|
162
|
+
# Apply metadata filter to Sequel dataset
|
|
165
163
|
#
|
|
166
|
-
# @param scope [
|
|
164
|
+
# @param scope [Sequel::Dataset] Base dataset
|
|
167
165
|
# @param metadata [Hash] Metadata to filter by
|
|
168
166
|
# @param column [String] Column name (default: "metadata")
|
|
169
|
-
# @return [
|
|
167
|
+
# @return [Sequel::Dataset] Filtered dataset
|
|
170
168
|
#
|
|
171
169
|
def apply_metadata(scope, metadata, column: "metadata")
|
|
172
170
|
return scope if metadata.nil? || metadata.empty?
|
|
173
171
|
|
|
174
|
-
scope.where("#{column} @> ?::jsonb", metadata.to_json)
|
|
172
|
+
scope.where(Sequel.lit("#{column} @> ?::jsonb", metadata.to_json))
|
|
175
173
|
end
|
|
176
174
|
end
|
|
177
175
|
end
|
data/lib/htm/tag_service.rb
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
require_relative 'errors'
|
|
4
|
+
require 'active_support/core_ext/string/inflections'
|
|
4
5
|
|
|
5
6
|
class HTM
|
|
6
7
|
# Tag Service - Processes and validates hierarchical tags
|
|
@@ -113,6 +114,8 @@ class HTM
|
|
|
113
114
|
valid_tags = []
|
|
114
115
|
|
|
115
116
|
tags.each do |tag|
|
|
117
|
+
# Normalize: convert plural levels to singular
|
|
118
|
+
tag = singularize_tag_levels(tag)
|
|
116
119
|
# Check format
|
|
117
120
|
unless tag.match?(TAG_FORMAT)
|
|
118
121
|
HTM.logger.warn "TagService: Invalid tag format, skipping: #{tag}"
|
|
@@ -191,5 +194,80 @@ class HTM
|
|
|
191
194
|
depth: levels.size
|
|
192
195
|
}
|
|
193
196
|
end
|
|
197
|
+
|
|
198
|
+
# Words that should NOT be singularized (proper nouns, technical terms, etc.)
|
|
199
|
+
SINGULARIZE_SKIP_LIST = %w[
|
|
200
|
+
rails kubernetes aws gcp azure s3 ios macos redis postgres
|
|
201
|
+
postgresql mysql jenkins travis github gitlab mkdocs devops
|
|
202
|
+
analytics statistics mathematics physics ethics dynamics
|
|
203
|
+
graphics linguistics economics robotics
|
|
204
|
+
pages windows
|
|
205
|
+
].freeze
|
|
206
|
+
|
|
207
|
+
# Normalize tag levels to singular form
|
|
208
|
+
#
|
|
209
|
+
# Converts plural levels to singular using ActiveSupport's singularize.
|
|
210
|
+
# This ensures taxonomy consistency (e.g., "users" -> "user").
|
|
211
|
+
#
|
|
212
|
+
# Skips:
|
|
213
|
+
# - Proper nouns and technical terms (Rails, MkDocs, etc.)
|
|
214
|
+
# - Words ending in -ics (analytics, robotics, etc.)
|
|
215
|
+
# - Words that don't end in common plural patterns
|
|
216
|
+
#
|
|
217
|
+
# @param tag [String] Tag with potentially plural levels
|
|
218
|
+
# @return [String] Tag with all levels singularized
|
|
219
|
+
#
|
|
220
|
+
def self.singularize_tag_levels(tag)
|
|
221
|
+
levels = tag.split(':')
|
|
222
|
+
singularized = levels.map do |level|
|
|
223
|
+
singularize_level(level)
|
|
224
|
+
end
|
|
225
|
+
singularized.join(':')
|
|
226
|
+
rescue NoMethodError
|
|
227
|
+
# singularize not available (ActiveSupport not loaded)
|
|
228
|
+
tag
|
|
229
|
+
end
|
|
230
|
+
|
|
231
|
+
# Singularize a single tag level with safety checks
|
|
232
|
+
#
|
|
233
|
+
# @param level [String] Single tag level
|
|
234
|
+
# @return [String] Singularized level or original if skipped
|
|
235
|
+
#
|
|
236
|
+
def self.singularize_level(level)
|
|
237
|
+
# Skip if in the skip list
|
|
238
|
+
return level if SINGULARIZE_SKIP_LIST.include?(level.downcase)
|
|
239
|
+
|
|
240
|
+
# Skip words ending in -ics (usually singular: analytics, robotics, etc.)
|
|
241
|
+
return level if level.end_with?('ics')
|
|
242
|
+
|
|
243
|
+
# Skip words ending in -ous (adjectives: victorious, precious, etc.)
|
|
244
|
+
return level if level.end_with?('ous')
|
|
245
|
+
|
|
246
|
+
# Skip words ending in -ss (class, access, etc.)
|
|
247
|
+
return level if level.end_with?('ss')
|
|
248
|
+
|
|
249
|
+
# Skip single-letter or very short words
|
|
250
|
+
return level if level.length <= 2
|
|
251
|
+
|
|
252
|
+
# Only singularize if it looks like a regular plural
|
|
253
|
+
# (ends in s but not ss, ics, ous)
|
|
254
|
+
unless level.end_with?('s')
|
|
255
|
+
return level
|
|
256
|
+
end
|
|
257
|
+
|
|
258
|
+
singular = level.singularize
|
|
259
|
+
|
|
260
|
+
# Sanity check: if singularize made it weird, keep original
|
|
261
|
+
# (e.g., "pages" -> "page" is fine, but "bus" -> "bu" is not)
|
|
262
|
+
if singular.length < level.length - 2
|
|
263
|
+
return level
|
|
264
|
+
end
|
|
265
|
+
|
|
266
|
+
if singular != level
|
|
267
|
+
HTM.logger.debug "TagService: Normalized '#{level}' to '#{singular}'"
|
|
268
|
+
end
|
|
269
|
+
|
|
270
|
+
singular
|
|
271
|
+
end
|
|
194
272
|
end
|
|
195
273
|
end
|
data/lib/htm/tasks.rb
CHANGED
data/lib/htm/version.rb
CHANGED
|
@@ -0,0 +1,213 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'simple_flow'
|
|
4
|
+
|
|
5
|
+
class HTM
|
|
6
|
+
module Workflows
|
|
7
|
+
# RememberWorkflow orchestrates the parallel processing of node enrichment
|
|
8
|
+
#
|
|
9
|
+
# Uses simple_flow to manage the dependency graph and parallel execution
|
|
10
|
+
# of embedding generation, tag extraction, and proposition extraction.
|
|
11
|
+
#
|
|
12
|
+
# The workflow structure:
|
|
13
|
+
# save_node (no deps) -> embedding, tags, propositions (parallel)
|
|
14
|
+
#
|
|
15
|
+
# @example Basic usage with fiber concurrency
|
|
16
|
+
# workflow = HTM::Workflows::RememberWorkflow.new(htm_instance)
|
|
17
|
+
# node_id = workflow.call(content: "PostgreSQL is great", tags: ["database"])
|
|
18
|
+
#
|
|
19
|
+
# @example With inline execution (for testing)
|
|
20
|
+
# workflow = HTM::Workflows::RememberWorkflow.new(htm_instance, concurrency: :threads)
|
|
21
|
+
# node_id = workflow.call(content: "Test content")
|
|
22
|
+
#
|
|
23
|
+
class RememberWorkflow
|
|
24
|
+
attr_reader :htm, :pipeline
|
|
25
|
+
|
|
26
|
+
# Initialize the remember workflow
|
|
27
|
+
#
|
|
28
|
+
# @param htm [HTM] HTM instance for the robot
|
|
29
|
+
# @param concurrency [Symbol] Concurrency model (:auto, :threads, :async)
|
|
30
|
+
#
|
|
31
|
+
def initialize(htm, concurrency: :auto)
|
|
32
|
+
@htm = htm
|
|
33
|
+
@concurrency = concurrency
|
|
34
|
+
@pipeline = build_pipeline
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
# Execute the remember workflow
|
|
38
|
+
#
|
|
39
|
+
# @param content [String] Content to remember
|
|
40
|
+
# @param tags [Array<String>] Manual tags to assign
|
|
41
|
+
# @param metadata [Hash] Metadata for the node
|
|
42
|
+
# @return [Integer] Node ID of the created memory
|
|
43
|
+
#
|
|
44
|
+
def call(content:, tags: [], metadata: {})
|
|
45
|
+
initial_data = {
|
|
46
|
+
content: content,
|
|
47
|
+
tags: tags,
|
|
48
|
+
metadata: metadata,
|
|
49
|
+
robot_id: @htm.robot_id,
|
|
50
|
+
htm: @htm
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
result = @pipeline.call_parallel(SimpleFlow::Result.new(initial_data))
|
|
54
|
+
|
|
55
|
+
if result.continue?
|
|
56
|
+
result.context[:node_id]
|
|
57
|
+
else
|
|
58
|
+
HTM.logger.error "RememberWorkflow failed: #{result.errors.inspect}"
|
|
59
|
+
raise HTM::Error, "Remember workflow failed: #{result.errors.values.flatten.join(', ')}"
|
|
60
|
+
end
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
# Get visualization of the workflow as Mermaid diagram
|
|
64
|
+
#
|
|
65
|
+
# @return [String] Mermaid diagram source
|
|
66
|
+
#
|
|
67
|
+
def to_mermaid
|
|
68
|
+
@pipeline.visualize_mermaid
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
# Get execution plan
|
|
72
|
+
#
|
|
73
|
+
# @return [String] Execution plan description
|
|
74
|
+
#
|
|
75
|
+
def execution_plan
|
|
76
|
+
@pipeline.execution_plan
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
private
|
|
80
|
+
|
|
81
|
+
def build_pipeline
|
|
82
|
+
SimpleFlow::Pipeline.new(concurrency: @concurrency) do
|
|
83
|
+
# Step 1: Save node to database (no dependencies)
|
|
84
|
+
step :save_node, ->(result) {
|
|
85
|
+
data = result.value
|
|
86
|
+
htm = data[:htm]
|
|
87
|
+
|
|
88
|
+
# Calculate token count
|
|
89
|
+
token_count = HTM.count_tokens(data[:content])
|
|
90
|
+
|
|
91
|
+
# Store in long-term memory
|
|
92
|
+
save_result = htm.long_term_memory.add(
|
|
93
|
+
content: data[:content],
|
|
94
|
+
token_count: token_count,
|
|
95
|
+
robot_id: data[:robot_id],
|
|
96
|
+
embedding: nil,
|
|
97
|
+
metadata: data[:metadata]
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
node_id = save_result[:node_id]
|
|
101
|
+
is_new = save_result[:is_new]
|
|
102
|
+
|
|
103
|
+
HTM.logger.info "RememberWorkflow: Node #{node_id} saved (new: #{is_new})"
|
|
104
|
+
|
|
105
|
+
result
|
|
106
|
+
.with_context(:node_id, node_id)
|
|
107
|
+
.with_context(:is_new, is_new)
|
|
108
|
+
.with_context(:token_count, token_count)
|
|
109
|
+
.with_context(:robot_node, save_result[:robot_node])
|
|
110
|
+
.continue(data)
|
|
111
|
+
}, depends_on: :none
|
|
112
|
+
|
|
113
|
+
# Step 2: Generate embedding (depends on save_node, runs in parallel with tags/propositions)
|
|
114
|
+
step :generate_embedding, ->(result) {
|
|
115
|
+
node_id = result.context[:node_id]
|
|
116
|
+
is_new = result.context[:is_new]
|
|
117
|
+
|
|
118
|
+
# Only generate for new nodes
|
|
119
|
+
if is_new
|
|
120
|
+
begin
|
|
121
|
+
HTM::Jobs::GenerateEmbeddingJob.perform(node_id: node_id)
|
|
122
|
+
rescue StandardError => e
|
|
123
|
+
HTM.logger.error "RememberWorkflow: Embedding generation failed: #{e.message}"
|
|
124
|
+
# Continue despite error - embedding is non-critical
|
|
125
|
+
end
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
result.continue(result.value)
|
|
129
|
+
}, depends_on: [:save_node]
|
|
130
|
+
|
|
131
|
+
# Step 3: Generate tags (depends on save_node, runs in parallel with embedding/propositions)
|
|
132
|
+
step :generate_tags, ->(result) {
|
|
133
|
+
node_id = result.context[:node_id]
|
|
134
|
+
is_new = result.context[:is_new]
|
|
135
|
+
manual_tags = result.value[:tags] || []
|
|
136
|
+
|
|
137
|
+
if is_new
|
|
138
|
+
# Add manual tags immediately (including parent tags)
|
|
139
|
+
if manual_tags.any?
|
|
140
|
+
manual_tags.each do |tag_name|
|
|
141
|
+
HTM::Models::Tag.find_or_create_with_ancestors(tag_name).each do |tag|
|
|
142
|
+
HTM::Models::NodeTag.find_or_create(node_id: node_id, tag_id: tag.id)
|
|
143
|
+
end
|
|
144
|
+
end
|
|
145
|
+
end
|
|
146
|
+
|
|
147
|
+
begin
|
|
148
|
+
HTM::Jobs::GenerateTagsJob.perform(node_id: node_id)
|
|
149
|
+
rescue StandardError => e
|
|
150
|
+
HTM.logger.error "RememberWorkflow: Tag generation failed: #{e.message}"
|
|
151
|
+
# Continue despite error - tags are non-critical
|
|
152
|
+
end
|
|
153
|
+
else
|
|
154
|
+
# For existing nodes, only add manual tags
|
|
155
|
+
if manual_tags.any?
|
|
156
|
+
node = HTM::Models::Node[node_id]
|
|
157
|
+
node.add_tags(manual_tags)
|
|
158
|
+
end
|
|
159
|
+
end
|
|
160
|
+
|
|
161
|
+
result.continue(result.value)
|
|
162
|
+
}, depends_on: [:save_node]
|
|
163
|
+
|
|
164
|
+
# Step 4: Generate propositions (depends on save_node, runs in parallel with embedding/tags)
|
|
165
|
+
step :generate_propositions, ->(result) {
|
|
166
|
+
node_id = result.context[:node_id]
|
|
167
|
+
is_new = result.context[:is_new]
|
|
168
|
+
metadata = result.value[:metadata] || {}
|
|
169
|
+
robot_id = result.value[:robot_id]
|
|
170
|
+
|
|
171
|
+
# Only extract propositions for new nodes that aren't already propositions
|
|
172
|
+
if is_new && HTM.config.extract_propositions && !metadata[:is_proposition]
|
|
173
|
+
begin
|
|
174
|
+
HTM::Jobs::GeneratePropositionsJob.perform(node_id: node_id, robot_id: robot_id)
|
|
175
|
+
rescue StandardError => e
|
|
176
|
+
HTM.logger.error "RememberWorkflow: Proposition extraction failed: #{e.message}"
|
|
177
|
+
# Continue despite error - propositions are non-critical
|
|
178
|
+
end
|
|
179
|
+
end
|
|
180
|
+
|
|
181
|
+
result.continue(result.value)
|
|
182
|
+
}, depends_on: [:save_node]
|
|
183
|
+
|
|
184
|
+
# Step 5: Finalize (depends on all enrichment steps)
|
|
185
|
+
step :finalize, ->(result) {
|
|
186
|
+
node_id = result.context[:node_id]
|
|
187
|
+
token_count = result.context[:token_count]
|
|
188
|
+
robot_node = result.context[:robot_node]
|
|
189
|
+
htm = result.value[:htm]
|
|
190
|
+
|
|
191
|
+
# Add to working memory
|
|
192
|
+
unless htm.working_memory.has_space?(token_count)
|
|
193
|
+
evicted = htm.working_memory.evict_to_make_space(token_count)
|
|
194
|
+
evicted_keys = evicted.map { |n| n[:key] }
|
|
195
|
+
htm.long_term_memory.mark_evicted(robot_id: result.value[:robot_id], node_ids: evicted_keys) if evicted_keys.any?
|
|
196
|
+
end
|
|
197
|
+
htm.working_memory.add(node_id, result.value[:content], token_count: token_count, access_count: 0)
|
|
198
|
+
|
|
199
|
+
# Mark as in working memory
|
|
200
|
+
robot_node.update(working_memory: true)
|
|
201
|
+
|
|
202
|
+
# Update robot activity
|
|
203
|
+
htm.long_term_memory.update_robot_activity(result.value[:robot_id])
|
|
204
|
+
|
|
205
|
+
HTM.logger.info "RememberWorkflow: Node #{node_id} finalized"
|
|
206
|
+
|
|
207
|
+
result.continue(result.value)
|
|
208
|
+
}, depends_on: [:generate_embedding, :generate_tags, :generate_propositions]
|
|
209
|
+
end
|
|
210
|
+
end
|
|
211
|
+
end
|
|
212
|
+
end
|
|
213
|
+
end
|
data/lib/htm.rb
CHANGED
|
@@ -4,7 +4,7 @@ require_relative "htm/version"
|
|
|
4
4
|
require_relative "htm/errors"
|
|
5
5
|
require_relative "htm/config"
|
|
6
6
|
require_relative "htm/circuit_breaker"
|
|
7
|
-
require_relative "htm/
|
|
7
|
+
require_relative "htm/sequel_config"
|
|
8
8
|
require_relative "htm/database"
|
|
9
9
|
require_relative "htm/long_term_memory"
|
|
10
10
|
require_relative "htm/working_memory"
|
|
@@ -19,6 +19,7 @@ require_relative "htm/jobs/generate_tags_job"
|
|
|
19
19
|
require_relative "htm/jobs/generate_propositions_job"
|
|
20
20
|
require_relative "htm/loaders/markdown_chunker"
|
|
21
21
|
require_relative "htm/loaders/markdown_loader"
|
|
22
|
+
require_relative "htm/workflows/remember_workflow"
|
|
22
23
|
require_relative "htm/observability"
|
|
23
24
|
require_relative "htm/telemetry"
|
|
24
25
|
require_relative "htm/working_memory_channel"
|
|
@@ -85,8 +86,8 @@ class HTM
|
|
|
85
86
|
db_cache_size: 1000,
|
|
86
87
|
db_cache_ttl: 300
|
|
87
88
|
)
|
|
88
|
-
# Establish
|
|
89
|
-
HTM::
|
|
89
|
+
# Establish Sequel connection if not already connected
|
|
90
|
+
HTM::SequelConfig.establish_connection! unless HTM::SequelConfig.db
|
|
90
91
|
|
|
91
92
|
@robot_name = robot_name || "robot_#{SecureRandom.uuid[0..7]}"
|
|
92
93
|
|
|
@@ -178,7 +179,7 @@ class HTM
|
|
|
178
179
|
|
|
179
180
|
# For existing nodes, only add manual tags if provided
|
|
180
181
|
if tags.any?
|
|
181
|
-
node = HTM::Models::Node
|
|
182
|
+
node = HTM::Models::Node[node_id]
|
|
182
183
|
node.add_tags(tags)
|
|
183
184
|
HTM.logger.info "Added #{tags.length} manual tags to existing node #{node_id}"
|
|
184
185
|
end
|
|
@@ -193,7 +194,7 @@ class HTM
|
|
|
193
194
|
@working_memory.add(node_id, content, token_count: token_count, access_count: 0)
|
|
194
195
|
|
|
195
196
|
# Mark node as in working memory in the robot_nodes join table
|
|
196
|
-
result[:robot_node].update
|
|
197
|
+
result[:robot_node].update(working_memory: true)
|
|
197
198
|
|
|
198
199
|
update_robot_activity
|
|
199
200
|
node_id
|
|
@@ -338,7 +339,7 @@ class HTM
|
|
|
338
339
|
end
|
|
339
340
|
|
|
340
341
|
# Verify node exists (including soft-deleted for restore scenarios)
|
|
341
|
-
node = HTM::Models::Node.with_deleted.
|
|
342
|
+
node = HTM::Models::Node.with_deleted.first(id: node_id)
|
|
342
343
|
raise HTM::NotFoundError, "Node not found: #{node_id}" unless node
|
|
343
344
|
|
|
344
345
|
if soft
|
|
@@ -387,8 +388,8 @@ class HTM
|
|
|
387
388
|
end
|
|
388
389
|
|
|
389
390
|
# Find all nodes containing the substring (case-insensitive)
|
|
390
|
-
matching_nodes = HTM::Models::Node.where(
|
|
391
|
-
node_ids = matching_nodes.
|
|
391
|
+
matching_nodes = HTM::Models::Node.where(Sequel.ilike(:content, "%#{content_substring}%"))
|
|
392
|
+
node_ids = matching_nodes.select_map(:id)
|
|
392
393
|
|
|
393
394
|
if node_ids.empty?
|
|
394
395
|
HTM.logger.info "No nodes found containing: #{content_substring}"
|
|
@@ -418,7 +419,7 @@ class HTM
|
|
|
418
419
|
raise ArgumentError, "Node ID cannot be nil" if node_id.nil?
|
|
419
420
|
|
|
420
421
|
# Find including soft-deleted nodes
|
|
421
|
-
node = HTM::Models::Node.with_deleted.
|
|
422
|
+
node = HTM::Models::Node.with_deleted.first(id: node_id)
|
|
422
423
|
raise HTM::NotFoundError, "Node not found: #{node_id}" unless node
|
|
423
424
|
|
|
424
425
|
unless node.deleted?
|
|
@@ -471,7 +472,7 @@ class HTM
|
|
|
471
472
|
# Update database: mark all as evicted from working memory
|
|
472
473
|
count = HTM::Models::RobotNode
|
|
473
474
|
.where(robot_id: @robot_id, working_memory: true)
|
|
474
|
-
.
|
|
475
|
+
.update(working_memory: false)
|
|
475
476
|
|
|
476
477
|
HTM.logger.info "Cleared #{count} nodes from working memory"
|
|
477
478
|
count
|
|
@@ -535,17 +536,17 @@ class HTM
|
|
|
535
536
|
# Get all nodes loaded from a specific file
|
|
536
537
|
#
|
|
537
538
|
# @param file_path [String] Path to the source file
|
|
538
|
-
# @return [
|
|
539
|
+
# @return [Array<HTM::Models::Node>] Nodes from this file, ordered by chunk_position
|
|
539
540
|
#
|
|
540
541
|
# @example
|
|
541
542
|
# nodes = htm.nodes_from_file('/path/to/doc.md')
|
|
542
543
|
# nodes.each { |node| puts node.content }
|
|
543
544
|
#
|
|
544
545
|
def nodes_from_file(file_path)
|
|
545
|
-
source = HTM::Models::FileSource.
|
|
546
|
-
return
|
|
546
|
+
source = HTM::Models::FileSource.first(file_path: File.expand_path(file_path))
|
|
547
|
+
return [] unless source
|
|
547
548
|
|
|
548
|
-
HTM::Models::Node.from_source(source.id)
|
|
549
|
+
HTM::Models::Node.from_source(source.id).all
|
|
549
550
|
end
|
|
550
551
|
|
|
551
552
|
# Unload a file (soft-delete all its chunks and remove source record)
|
|
@@ -558,12 +559,12 @@ class HTM
|
|
|
558
559
|
# puts "Unloaded #{count} chunks"
|
|
559
560
|
#
|
|
560
561
|
def unload_file(file_path)
|
|
561
|
-
source = HTM::Models::FileSource.
|
|
562
|
+
source = HTM::Models::FileSource.first(file_path: File.expand_path(file_path))
|
|
562
563
|
return 0 unless source
|
|
563
564
|
|
|
564
565
|
count = source.soft_delete_chunks!
|
|
565
566
|
@long_term_memory.clear_cache!
|
|
566
|
-
source.
|
|
567
|
+
source.delete
|
|
567
568
|
|
|
568
569
|
update_robot_activity
|
|
569
570
|
count
|
|
@@ -588,11 +589,16 @@ class HTM
|
|
|
588
589
|
end
|
|
589
590
|
|
|
590
591
|
def enqueue_tags_job(node_id, manual_tags: [])
|
|
591
|
-
# Add manual tags immediately if provided
|
|
592
|
+
# Add manual tags immediately if provided (including all parent tags)
|
|
593
|
+
# For "database:postgresql:extensions", this creates and associates:
|
|
594
|
+
# - "database"
|
|
595
|
+
# - "database:postgresql"
|
|
596
|
+
# - "database:postgresql:extensions"
|
|
592
597
|
if manual_tags.any?
|
|
593
598
|
manual_tags.each do |tag_name|
|
|
594
|
-
|
|
595
|
-
|
|
599
|
+
HTM::Models::Tag.find_or_create_with_ancestors(tag_name).each do |tag|
|
|
600
|
+
HTM::Models::NodeTag.find_or_create(node_id: node_id, tag_id: tag.id)
|
|
601
|
+
end
|
|
596
602
|
end
|
|
597
603
|
end
|
|
598
604
|
|
|
@@ -636,9 +642,8 @@ class HTM
|
|
|
636
642
|
)
|
|
637
643
|
|
|
638
644
|
# Mark node as in working memory in the robot_nodes join table
|
|
639
|
-
HTM::Models::RobotNode
|
|
640
|
-
|
|
641
|
-
&.update!(working_memory: true)
|
|
645
|
+
robot_node = HTM::Models::RobotNode.first(robot_id: @robot_id, node_id: node_id)
|
|
646
|
+
robot_node&.update(working_memory: true)
|
|
642
647
|
end
|
|
643
648
|
|
|
644
649
|
# Validation helper methods
|
data/lib/tasks/db.rake
CHANGED
|
@@ -3,7 +3,6 @@
|
|
|
3
3
|
namespace :db do
|
|
4
4
|
desc "Run database migrations"
|
|
5
5
|
task :migrate do
|
|
6
|
-
require_relative '../htm'
|
|
7
6
|
|
|
8
7
|
HTM::Database.migrate
|
|
9
8
|
puts "Database migrations completed successfully"
|
|
@@ -11,7 +10,6 @@ namespace :db do
|
|
|
11
10
|
|
|
12
11
|
desc "Setup database schema (includes migrations)"
|
|
13
12
|
task :setup do
|
|
14
|
-
require_relative '../htm'
|
|
15
13
|
|
|
16
14
|
HTM::Database.setup
|
|
17
15
|
puts "Database setup completed successfully"
|
data/lib/tasks/doc.rake
CHANGED
|
@@ -206,7 +206,7 @@ namespace :htm do
|
|
|
206
206
|
# Skip error classes and internal classes
|
|
207
207
|
next if class_name.end_with?("Error")
|
|
208
208
|
next if class_name.include?("Railtie")
|
|
209
|
-
next if class_name.include?("
|
|
209
|
+
next if class_name.include?("SequelConfig")
|
|
210
210
|
|
|
211
211
|
# Get description
|
|
212
212
|
simple_name = basename
|
|
@@ -228,7 +228,7 @@ namespace :htm do
|
|
|
228
228
|
basename = File.basename(file, ".html")
|
|
229
229
|
next if basename.end_with?("Error")
|
|
230
230
|
next if basename == "Railtie"
|
|
231
|
-
next if basename == "
|
|
231
|
+
next if basename == "SequelConfig"
|
|
232
232
|
|
|
233
233
|
desc = descriptions[basename] || "#{basename} class"
|
|
234
234
|
classes << ["HTM::#{basename}", desc, "yard/HTM/#{basename}.html"]
|