htm 0.0.11 → 0.0.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.dictate.toml +46 -0
- data/.envrc +2 -0
- data/CHANGELOG.md +85 -2
- data/README.md +348 -79
- data/Rakefile +14 -2
- data/bin/htm_mcp.rb +94 -0
- data/config/database.yml +20 -13
- data/db/migrate/00003_create_file_sources.rb +5 -0
- data/db/migrate/00004_create_nodes.rb +17 -0
- data/db/migrate/00005_create_tags.rb +7 -0
- data/db/migrate/00006_create_node_tags.rb +2 -0
- data/db/migrate/00007_create_robot_nodes.rb +7 -0
- data/db/schema.sql +69 -100
- data/docs/api/index.md +1 -1
- data/docs/api/yard/HTM/Configuration.md +54 -0
- data/docs/api/yard/HTM/Database.md +13 -10
- data/docs/api/yard/HTM/EmbeddingService.md +5 -1
- data/docs/api/yard/HTM/LongTermMemory.md +18 -277
- data/docs/api/yard/HTM/PropositionError.md +18 -0
- data/docs/api/yard/HTM/PropositionService.md +66 -0
- data/docs/api/yard/HTM/QueryCache.md +88 -0
- data/docs/api/yard/HTM/RobotGroup.md +481 -0
- data/docs/api/yard/HTM/SqlBuilder.md +108 -0
- data/docs/api/yard/HTM/TagService.md +4 -0
- data/docs/api/yard/HTM/Telemetry/NullInstrument.md +13 -0
- data/docs/api/yard/HTM/Telemetry/NullMeter.md +15 -0
- data/docs/api/yard/HTM/Telemetry.md +109 -0
- data/docs/api/yard/HTM/WorkingMemoryChannel.md +176 -0
- data/docs/api/yard/HTM.md +8 -22
- data/docs/api/yard/index.csv +102 -25
- data/docs/api/yard-reference.md +8 -0
- data/docs/architecture/index.md +1 -1
- data/docs/assets/images/multi-provider-failover.svg +51 -0
- data/docs/assets/images/robot-group-architecture.svg +65 -0
- data/docs/database/README.md +3 -3
- data/docs/database/public.file_sources.svg +29 -21
- data/docs/database/public.node_tags.md +2 -0
- data/docs/database/public.node_tags.svg +53 -41
- data/docs/database/public.nodes.md +2 -0
- data/docs/database/public.nodes.svg +52 -40
- data/docs/database/public.robot_nodes.md +2 -0
- data/docs/database/public.robot_nodes.svg +30 -22
- data/docs/database/public.robots.svg +16 -12
- data/docs/database/public.tags.md +3 -0
- data/docs/database/public.tags.svg +41 -33
- data/docs/database/schema.json +66 -0
- data/docs/database/schema.svg +60 -48
- data/docs/development/index.md +14 -1
- data/docs/development/rake-tasks.md +1068 -0
- data/docs/getting-started/index.md +1 -1
- data/docs/getting-started/quick-start.md +144 -155
- data/docs/guides/adding-memories.md +2 -3
- data/docs/guides/context-assembly.md +185 -184
- data/docs/guides/getting-started.md +154 -148
- data/docs/guides/index.md +8 -1
- data/docs/guides/long-term-memory.md +60 -92
- data/docs/guides/mcp-server.md +617 -0
- data/docs/guides/multi-robot.md +249 -345
- data/docs/guides/recalling-memories.md +153 -163
- data/docs/guides/robot-groups.md +604 -0
- data/docs/guides/search-strategies.md +61 -58
- data/docs/guides/working-memory.md +103 -136
- data/docs/images/telemetry-architecture.svg +153 -0
- data/docs/index.md +30 -26
- data/docs/telemetry.md +391 -0
- data/examples/README.md +46 -1
- data/examples/cli_app/README.md +1 -1
- data/examples/cli_app/htm_cli.rb +1 -1
- data/examples/robot_groups/robot_worker.rb +1 -2
- data/examples/robot_groups/same_process.rb +1 -4
- data/examples/sinatra_app/app.rb +1 -1
- data/examples/telemetry/README.md +147 -0
- data/examples/telemetry/SETUP_README.md +169 -0
- data/examples/telemetry/demo.rb +498 -0
- data/examples/telemetry/grafana/dashboards/htm-metrics.json +457 -0
- data/lib/htm/configuration.rb +261 -70
- data/lib/htm/database.rb +46 -22
- data/lib/htm/embedding_service.rb +24 -14
- data/lib/htm/errors.rb +15 -1
- data/lib/htm/jobs/generate_embedding_job.rb +19 -0
- data/lib/htm/jobs/generate_propositions_job.rb +103 -0
- data/lib/htm/jobs/generate_tags_job.rb +24 -0
- data/lib/htm/loaders/markdown_chunker.rb +79 -0
- data/lib/htm/loaders/markdown_loader.rb +41 -15
- data/lib/htm/long_term_memory/fulltext_search.rb +138 -0
- data/lib/htm/long_term_memory/hybrid_search.rb +324 -0
- data/lib/htm/long_term_memory/node_operations.rb +209 -0
- data/lib/htm/long_term_memory/relevance_scorer.rb +355 -0
- data/lib/htm/long_term_memory/robot_operations.rb +34 -0
- data/lib/htm/long_term_memory/tag_operations.rb +428 -0
- data/lib/htm/long_term_memory/vector_search.rb +109 -0
- data/lib/htm/long_term_memory.rb +51 -1153
- data/lib/htm/models/node.rb +35 -2
- data/lib/htm/models/node_tag.rb +31 -0
- data/lib/htm/models/robot_node.rb +31 -0
- data/lib/htm/models/tag.rb +44 -0
- data/lib/htm/proposition_service.rb +169 -0
- data/lib/htm/query_cache.rb +214 -0
- data/lib/htm/robot_group.rb +721 -0
- data/lib/htm/sql_builder.rb +178 -0
- data/lib/htm/tag_service.rb +16 -6
- data/lib/htm/tasks.rb +8 -2
- data/lib/htm/telemetry.rb +224 -0
- data/lib/htm/version.rb +1 -1
- data/lib/htm/working_memory_channel.rb +250 -0
- data/lib/htm.rb +66 -3
- data/lib/tasks/doc.rake +1 -1
- data/lib/tasks/htm.rake +259 -13
- data/mkdocs.yml +98 -96
- metadata +55 -20
- data/.aigcm_msg +0 -1
- data/.claude/settings.local.json +0 -95
- data/CLAUDE.md +0 -603
- data/db/migrate/00009_add_working_memory_to_robot_nodes.rb +0 -12
- data/examples/cli_app/temp.log +0 -93
- data/examples/robot_groups/lib/robot_group.rb +0 -419
- data/examples/robot_groups/lib/working_memory_channel.rb +0 -140
- data/lib/htm/loaders/paragraph_chunker.rb +0 -112
- data/notes/ARCHITECTURE_REVIEW.md +0 -1167
- data/notes/IMPLEMENTATION_SUMMARY.md +0 -606
- data/notes/MULTI_FRAMEWORK_IMPLEMENTATION.md +0 -451
- data/notes/next_steps.md +0 -100
- data/notes/plan.md +0 -627
- data/notes/tag_ontology_enhancement_ideas.md +0 -222
- data/notes/timescaledb_removal_summary.md +0 -200
|
@@ -0,0 +1,250 @@
|
|
|
1
|
+
# examples/robot_groups/lib/htm/working_memory_channel.rb
|
|
2
|
+
# frozen_string_literal: true
|
|
3
|
+
|
|
4
|
+
class HTM
|
|
5
|
+
# Provides real-time synchronization of working memory changes across multiple
|
|
6
|
+
# robots using PostgreSQL LISTEN/NOTIFY pub/sub mechanism.
|
|
7
|
+
#
|
|
8
|
+
# This class enables distributed robots to maintain synchronized working memory
|
|
9
|
+
# by broadcasting change notifications through PostgreSQL channels. When one robot
|
|
10
|
+
# adds, evicts, or clears working memory, all other robots in the group receive
|
|
11
|
+
# immediate notification.
|
|
12
|
+
#
|
|
13
|
+
# @example Basic usage
|
|
14
|
+
# channel = HTM::WorkingMemoryChannel.new('support-team', db_config)
|
|
15
|
+
#
|
|
16
|
+
# # Subscribe to changes
|
|
17
|
+
# channel.on_change do |event, node_id, robot_id|
|
|
18
|
+
# case event
|
|
19
|
+
# when :added then puts "Node #{node_id} added by robot #{robot_id}"
|
|
20
|
+
# when :evicted then puts "Node #{node_id} evicted by robot #{robot_id}"
|
|
21
|
+
# when :cleared then puts "Working memory cleared by robot #{robot_id}"
|
|
22
|
+
# end
|
|
23
|
+
# end
|
|
24
|
+
#
|
|
25
|
+
# # Start listening in background thread
|
|
26
|
+
# channel.start_listening
|
|
27
|
+
#
|
|
28
|
+
# # Publish a change
|
|
29
|
+
# channel.notify(:added, node_id: 123, robot_id: 456)
|
|
30
|
+
#
|
|
31
|
+
# # Cleanup when done
|
|
32
|
+
# channel.stop_listening
|
|
33
|
+
#
|
|
34
|
+
# @see HTM::RobotGroup Higher-level coordination using this channel
|
|
35
|
+
#
|
|
36
|
+
class WorkingMemoryChannel
|
|
37
|
+
# Prefix used for all PostgreSQL channel names
|
|
38
|
+
# @return [String]
|
|
39
|
+
CHANNEL_PREFIX = 'htm_wm'
|
|
40
|
+
|
|
41
|
+
# Number of notifications received since channel was created
|
|
42
|
+
# @return [Integer]
|
|
43
|
+
attr_reader :notifications_received
|
|
44
|
+
|
|
45
|
+
# Creates a new working memory channel for a robot group.
|
|
46
|
+
#
|
|
47
|
+
# The channel name is derived from the group name with non-alphanumeric
|
|
48
|
+
# characters replaced by underscores to ensure PostgreSQL compatibility.
|
|
49
|
+
#
|
|
50
|
+
# @param group_name [String] Name of the robot group (used to create unique channel)
|
|
51
|
+
# @param db_config [Hash] PostgreSQL connection configuration hash
|
|
52
|
+
# @option db_config [String] :host Database host
|
|
53
|
+
# @option db_config [Integer] :port Database port
|
|
54
|
+
# @option db_config [String] :dbname Database name
|
|
55
|
+
# @option db_config [String] :user Database user
|
|
56
|
+
# @option db_config [String] :password Database password (optional)
|
|
57
|
+
#
|
|
58
|
+
# @example
|
|
59
|
+
# db_config = { host: 'localhost', port: 5432, dbname: 'htm_dev', user: 'postgres' }
|
|
60
|
+
# channel = HTM::WorkingMemoryChannel.new('customer-support', db_config)
|
|
61
|
+
#
|
|
62
|
+
def initialize(group_name, db_config)
|
|
63
|
+
@group_name = group_name
|
|
64
|
+
@channel = "#{CHANNEL_PREFIX}_#{group_name.gsub(/[^a-z0-9_]/i, '_')}"
|
|
65
|
+
@db_config = db_config
|
|
66
|
+
@listeners = []
|
|
67
|
+
@listen_thread = nil
|
|
68
|
+
@stop_requested = false
|
|
69
|
+
@notifications_received = 0
|
|
70
|
+
@mutex = Mutex.new
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
# @!group Publishing
|
|
74
|
+
|
|
75
|
+
# Broadcasts a working memory change notification to all listeners.
|
|
76
|
+
#
|
|
77
|
+
# Uses PostgreSQL's pg_notify function to send a JSON payload containing
|
|
78
|
+
# the event type, affected node ID, originating robot ID, and timestamp.
|
|
79
|
+
#
|
|
80
|
+
# @param event [Symbol] Type of change (:added, :evicted, or :cleared)
|
|
81
|
+
# @param node_id [Integer, nil] ID of the affected node (nil for :cleared events)
|
|
82
|
+
# @param robot_id [Integer] ID of the robot that triggered the change
|
|
83
|
+
# @return [void]
|
|
84
|
+
#
|
|
85
|
+
# @example Notify that a node was added
|
|
86
|
+
# channel.notify(:added, node_id: 123, robot_id: 1)
|
|
87
|
+
#
|
|
88
|
+
# @example Notify that working memory was cleared
|
|
89
|
+
# channel.notify(:cleared, node_id: nil, robot_id: 1)
|
|
90
|
+
#
|
|
91
|
+
def notify(event, node_id:, robot_id:)
|
|
92
|
+
payload = {
|
|
93
|
+
event: event,
|
|
94
|
+
node_id: node_id,
|
|
95
|
+
robot_id: robot_id,
|
|
96
|
+
timestamp: Time.now.iso8601
|
|
97
|
+
}.to_json
|
|
98
|
+
|
|
99
|
+
with_connection do |conn|
|
|
100
|
+
conn.exec_params('SELECT pg_notify($1, $2)', [@channel, payload])
|
|
101
|
+
end
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
# @!endgroup
|
|
105
|
+
|
|
106
|
+
# @!group Subscribing
|
|
107
|
+
|
|
108
|
+
# Registers a callback to be invoked when working memory changes occur.
|
|
109
|
+
#
|
|
110
|
+
# Multiple callbacks can be registered; all will be called for each event.
|
|
111
|
+
# Callbacks are invoked synchronously within the listener thread.
|
|
112
|
+
#
|
|
113
|
+
# @yield [event, node_id, robot_id] Block called for each notification
|
|
114
|
+
# @yieldparam event [Symbol] Type of change (:added, :evicted, or :cleared)
|
|
115
|
+
# @yieldparam node_id [Integer, nil] ID of the affected node
|
|
116
|
+
# @yieldparam robot_id [Integer] ID of the robot that triggered the change
|
|
117
|
+
# @return [void]
|
|
118
|
+
#
|
|
119
|
+
# @example Register a change handler
|
|
120
|
+
# channel.on_change do |event, node_id, robot_id|
|
|
121
|
+
# puts "Received #{event} event for node #{node_id}"
|
|
122
|
+
# end
|
|
123
|
+
#
|
|
124
|
+
def on_change(&callback)
|
|
125
|
+
@mutex.synchronize { @listeners << callback }
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
# Starts listening for notifications in a background thread.
|
|
129
|
+
#
|
|
130
|
+
# Creates a dedicated PostgreSQL connection that uses LISTEN to receive
|
|
131
|
+
# notifications. The thread polls every 0.5 seconds, allowing for clean
|
|
132
|
+
# shutdown via {#stop_listening}.
|
|
133
|
+
#
|
|
134
|
+
# @return [Thread] The background listener thread
|
|
135
|
+
#
|
|
136
|
+
# @example Start and verify listening
|
|
137
|
+
# thread = channel.start_listening
|
|
138
|
+
# puts "Listening: #{channel.listening?}" # => true
|
|
139
|
+
#
|
|
140
|
+
def start_listening
|
|
141
|
+
@stop_requested = false
|
|
142
|
+
@listen_thread = Thread.new do
|
|
143
|
+
listen_loop
|
|
144
|
+
end
|
|
145
|
+
@listen_thread.abort_on_exception = true
|
|
146
|
+
@listen_thread
|
|
147
|
+
end
|
|
148
|
+
|
|
149
|
+
# Stops the background listener thread.
|
|
150
|
+
#
|
|
151
|
+
# Signals the listener to stop, waits up to 0.5 seconds for clean exit,
|
|
152
|
+
# then forcefully terminates if still running. The PostgreSQL connection
|
|
153
|
+
# is closed automatically.
|
|
154
|
+
#
|
|
155
|
+
# @return [void]
|
|
156
|
+
#
|
|
157
|
+
# @example Clean shutdown
|
|
158
|
+
# channel.stop_listening
|
|
159
|
+
# puts "Listening: #{channel.listening?}" # => false
|
|
160
|
+
#
|
|
161
|
+
def stop_listening
|
|
162
|
+
@stop_requested = true
|
|
163
|
+
# Give the thread a moment to exit cleanly
|
|
164
|
+
@listen_thread&.join(0.5)
|
|
165
|
+
@listen_thread&.kill if @listen_thread&.alive?
|
|
166
|
+
@listen_thread = nil
|
|
167
|
+
end
|
|
168
|
+
|
|
169
|
+
# @!endgroup
|
|
170
|
+
|
|
171
|
+
# @!group Status
|
|
172
|
+
|
|
173
|
+
# Checks if the listener thread is currently active.
|
|
174
|
+
#
|
|
175
|
+
# @return [Boolean] true if listening for notifications, false otherwise
|
|
176
|
+
#
|
|
177
|
+
# @example
|
|
178
|
+
# channel.start_listening
|
|
179
|
+
# channel.listening? # => true
|
|
180
|
+
# channel.stop_listening
|
|
181
|
+
# channel.listening? # => false
|
|
182
|
+
#
|
|
183
|
+
def listening?
|
|
184
|
+
@listen_thread&.alive? || false
|
|
185
|
+
end
|
|
186
|
+
|
|
187
|
+
# Returns the PostgreSQL channel name used for notifications.
|
|
188
|
+
#
|
|
189
|
+
# The channel name is derived from the group name with a prefix and
|
|
190
|
+
# sanitization of special characters.
|
|
191
|
+
#
|
|
192
|
+
# @return [String] The PostgreSQL LISTEN/NOTIFY channel name
|
|
193
|
+
#
|
|
194
|
+
# @example
|
|
195
|
+
# channel = HTM::WorkingMemoryChannel.new('my-group', db_config)
|
|
196
|
+
# channel.channel_name # => "htm_wm_my_group"
|
|
197
|
+
#
|
|
198
|
+
def channel_name
|
|
199
|
+
@channel
|
|
200
|
+
end
|
|
201
|
+
|
|
202
|
+
# @!endgroup
|
|
203
|
+
|
|
204
|
+
private
|
|
205
|
+
|
|
206
|
+
def listen_loop
|
|
207
|
+
conn = PG.connect(@db_config)
|
|
208
|
+
conn.exec("LISTEN #{conn.escape_identifier(@channel)}")
|
|
209
|
+
|
|
210
|
+
until @stop_requested
|
|
211
|
+
# Wait for notification with timeout (allows checking @stop_requested)
|
|
212
|
+
conn.wait_for_notify(0.5) do |_channel, _pid, payload|
|
|
213
|
+
handle_notification(payload)
|
|
214
|
+
end
|
|
215
|
+
end
|
|
216
|
+
rescue PG::Error => e
|
|
217
|
+
unless @stop_requested
|
|
218
|
+
HTM.logger.error "WorkingMemoryChannel error: #{e.message}"
|
|
219
|
+
sleep 1
|
|
220
|
+
retry
|
|
221
|
+
end
|
|
222
|
+
ensure
|
|
223
|
+
conn&.close
|
|
224
|
+
end
|
|
225
|
+
|
|
226
|
+
def handle_notification(payload)
|
|
227
|
+
data = JSON.parse(payload, symbolize_names: true)
|
|
228
|
+
|
|
229
|
+
@mutex.synchronize do
|
|
230
|
+
@notifications_received += 1
|
|
231
|
+
@listeners.each do |callback|
|
|
232
|
+
callback.call(
|
|
233
|
+
data[:event].to_sym,
|
|
234
|
+
data[:node_id],
|
|
235
|
+
data[:robot_id]
|
|
236
|
+
)
|
|
237
|
+
end
|
|
238
|
+
end
|
|
239
|
+
rescue JSON::ParserError => e
|
|
240
|
+
HTM.logger.error "Invalid notification payload: #{e.message}"
|
|
241
|
+
end
|
|
242
|
+
|
|
243
|
+
def with_connection
|
|
244
|
+
conn = PG.connect(@db_config)
|
|
245
|
+
yield conn
|
|
246
|
+
ensure
|
|
247
|
+
conn&.close
|
|
248
|
+
end
|
|
249
|
+
end
|
|
250
|
+
end
|
data/lib/htm.rb
CHANGED
|
@@ -10,14 +10,19 @@ require_relative "htm/long_term_memory"
|
|
|
10
10
|
require_relative "htm/working_memory"
|
|
11
11
|
require_relative "htm/embedding_service"
|
|
12
12
|
require_relative "htm/tag_service"
|
|
13
|
+
require_relative "htm/proposition_service"
|
|
13
14
|
require_relative "htm/timeframe_extractor"
|
|
14
15
|
require_relative "htm/timeframe"
|
|
15
16
|
require_relative "htm/job_adapter"
|
|
16
17
|
require_relative "htm/jobs/generate_embedding_job"
|
|
17
18
|
require_relative "htm/jobs/generate_tags_job"
|
|
18
|
-
require_relative "htm/
|
|
19
|
+
require_relative "htm/jobs/generate_propositions_job"
|
|
20
|
+
require_relative "htm/loaders/markdown_chunker"
|
|
19
21
|
require_relative "htm/loaders/markdown_loader"
|
|
20
22
|
require_relative "htm/observability"
|
|
23
|
+
require_relative "htm/telemetry"
|
|
24
|
+
require_relative "htm/working_memory_channel"
|
|
25
|
+
require_relative "htm/robot_group"
|
|
21
26
|
|
|
22
27
|
require "pg"
|
|
23
28
|
require "securerandom"
|
|
@@ -26,7 +31,7 @@ require "uri"
|
|
|
26
31
|
# Load Rails integration if Rails is defined
|
|
27
32
|
require_relative "htm/railtie" if defined?(Rails::Railtie)
|
|
28
33
|
|
|
29
|
-
# HTM (Hierarchical
|
|
34
|
+
# HTM (Hierarchical Temporal Memory) - Intelligent memory management for LLM robots
|
|
30
35
|
#
|
|
31
36
|
# HTM implements a two-tier memory system:
|
|
32
37
|
# - Working Memory: Token-limited, active context for immediate LLM use
|
|
@@ -163,6 +168,11 @@ class HTM
|
|
|
163
168
|
# Only for NEW nodes - existing nodes already have embeddings/tags
|
|
164
169
|
enqueue_embedding_job(node_id)
|
|
165
170
|
enqueue_tags_job(node_id, manual_tags: tags)
|
|
171
|
+
|
|
172
|
+
# Enqueue proposition extraction if enabled and not already a proposition
|
|
173
|
+
if HTM.configuration.extract_propositions && !metadata[:is_proposition]
|
|
174
|
+
enqueue_propositions_job(node_id)
|
|
175
|
+
end
|
|
166
176
|
else
|
|
167
177
|
HTM.logger.info "Node #{node_id} already exists, linked to robot #{@robot_name} (remember_count: #{result[:robot_node].remember_count})"
|
|
168
178
|
|
|
@@ -228,7 +238,7 @@ class HTM
|
|
|
228
238
|
# memories = htm.recall("preferences", metadata: { source: "user" })
|
|
229
239
|
# memories = htm.recall("decisions", metadata: { confidence: 0.9, type: "architectural" })
|
|
230
240
|
#
|
|
231
|
-
def recall(topic, timeframe: nil, limit: 20, strategy: :
|
|
241
|
+
def recall(topic, timeframe: nil, limit: 20, strategy: :fulltext, with_relevance: false, query_tags: [], raw: false, metadata: {})
|
|
232
242
|
# Validate inputs
|
|
233
243
|
validate_timeframe!(timeframe)
|
|
234
244
|
validate_positive_integer!(limit, "limit")
|
|
@@ -350,6 +360,51 @@ class HTM
|
|
|
350
360
|
true
|
|
351
361
|
end
|
|
352
362
|
|
|
363
|
+
# Forget all nodes whose content includes the given string
|
|
364
|
+
#
|
|
365
|
+
# Performs a soft delete on all matching nodes. The nodes remain in the
|
|
366
|
+
# database but are excluded from queries. Use case-insensitive LIKE matching.
|
|
367
|
+
#
|
|
368
|
+
# @param content_substring [String] Substring to search for in node content
|
|
369
|
+
# @param soft [Boolean] If true (default), soft delete; if false, permanent delete
|
|
370
|
+
# @param confirm [Symbol] Must be :confirmed to proceed with permanent deletion
|
|
371
|
+
# @return [Array<Integer>] Array of node IDs that were deleted
|
|
372
|
+
# @raise [ArgumentError] if content_substring is blank
|
|
373
|
+
# @raise [ArgumentError] if permanent deletion requested without confirmation
|
|
374
|
+
#
|
|
375
|
+
# @example Soft delete all nodes containing "deprecated"
|
|
376
|
+
# htm.forget_content("deprecated")
|
|
377
|
+
# # => [42, 56, 78] # IDs of deleted nodes
|
|
378
|
+
#
|
|
379
|
+
# @example Permanent delete all nodes containing "test data"
|
|
380
|
+
# htm.forget_content("test data", soft: false, confirm: :confirmed)
|
|
381
|
+
#
|
|
382
|
+
def forget_content(content_substring, soft: true, confirm: false)
|
|
383
|
+
raise ArgumentError, "Content substring cannot be blank" if content_substring.to_s.strip.empty?
|
|
384
|
+
|
|
385
|
+
# Permanent delete requires confirmation
|
|
386
|
+
if !soft && confirm != :confirmed
|
|
387
|
+
raise ArgumentError, "Permanent deletion requires confirm: :confirmed"
|
|
388
|
+
end
|
|
389
|
+
|
|
390
|
+
# Find all nodes containing the substring (case-insensitive)
|
|
391
|
+
matching_nodes = HTM::Models::Node.where("content ILIKE ?", "%#{content_substring}%")
|
|
392
|
+
node_ids = matching_nodes.pluck(:id)
|
|
393
|
+
|
|
394
|
+
if node_ids.empty?
|
|
395
|
+
HTM.logger.info "No nodes found containing: #{content_substring}"
|
|
396
|
+
return []
|
|
397
|
+
end
|
|
398
|
+
|
|
399
|
+
# Delete each matching node
|
|
400
|
+
node_ids.each do |node_id|
|
|
401
|
+
forget(node_id, soft: soft, confirm: confirm)
|
|
402
|
+
end
|
|
403
|
+
|
|
404
|
+
HTM.logger.info "Forgot #{node_ids.length} nodes containing: #{content_substring}"
|
|
405
|
+
node_ids
|
|
406
|
+
end
|
|
407
|
+
|
|
353
408
|
# Restore a soft-deleted memory node
|
|
354
409
|
#
|
|
355
410
|
# @param node_id [Integer] ID of the soft-deleted node to restore
|
|
@@ -550,6 +605,14 @@ class HTM
|
|
|
550
605
|
HTM.logger.error "Failed to enqueue tags job for node #{node_id}: #{e.message}"
|
|
551
606
|
end
|
|
552
607
|
|
|
608
|
+
def enqueue_propositions_job(node_id)
|
|
609
|
+
# Enqueue proposition extraction using configured job backend
|
|
610
|
+
# Job will use HTM.extract_propositions which delegates to configured proposition_extractor
|
|
611
|
+
HTM::JobAdapter.enqueue(HTM::Jobs::GeneratePropositionsJob, node_id: node_id, robot_id: @robot_id)
|
|
612
|
+
rescue StandardError => e
|
|
613
|
+
HTM.logger.error "Failed to enqueue propositions job for node #{node_id}: #{e.message}"
|
|
614
|
+
end
|
|
615
|
+
|
|
553
616
|
def add_to_working_memory(node)
|
|
554
617
|
# Convert token_count to integer (may be String from database/cache)
|
|
555
618
|
token_count = node['token_count'].to_i
|
data/lib/tasks/doc.rake
CHANGED
|
@@ -22,7 +22,7 @@ namespace :htm do
|
|
|
22
22
|
"--plugin markdown",
|
|
23
23
|
"--output-dir #{output_dir}",
|
|
24
24
|
"--format markdown",
|
|
25
|
-
"--title 'HTM - Hierarchical
|
|
25
|
+
"--title 'HTM - Hierarchical Temporal Memory API'",
|
|
26
26
|
"--markup markdown",
|
|
27
27
|
"--charset utf-8",
|
|
28
28
|
"--protected",
|
data/lib/tasks/htm.rake
CHANGED
|
@@ -68,21 +68,28 @@ namespace :htm do
|
|
|
68
68
|
end
|
|
69
69
|
end
|
|
70
70
|
|
|
71
|
-
desc "
|
|
72
|
-
task :
|
|
71
|
+
desc "Verify database connection (respects RAILS_ENV)"
|
|
72
|
+
task :verify do
|
|
73
73
|
require 'htm'
|
|
74
|
-
config = HTM::Database.default_config
|
|
75
|
-
raise "Database not configured. Set HTM_DBURL environment variable." unless config
|
|
76
74
|
|
|
77
|
-
|
|
75
|
+
env = ENV['RAILS_ENV'] || ENV['RACK_ENV'] || 'development'
|
|
76
|
+
config = HTM::ActiveRecordConfig.load_database_config
|
|
77
|
+
|
|
78
|
+
puts "Verifying HTM database connection (#{env})..."
|
|
78
79
|
puts " Host: #{config[:host]}"
|
|
79
80
|
puts " Port: #{config[:port]}"
|
|
80
|
-
puts " Database: #{config[:
|
|
81
|
-
puts " User: #{config[:
|
|
81
|
+
puts " Database: #{config[:database]}"
|
|
82
|
+
puts " User: #{config[:username]}"
|
|
82
83
|
|
|
83
84
|
begin
|
|
84
85
|
require 'pg'
|
|
85
|
-
conn = PG.connect(
|
|
86
|
+
conn = PG.connect(
|
|
87
|
+
host: config[:host],
|
|
88
|
+
port: config[:port],
|
|
89
|
+
dbname: config[:database],
|
|
90
|
+
user: config[:username],
|
|
91
|
+
password: config[:password]
|
|
92
|
+
)
|
|
86
93
|
|
|
87
94
|
# Check pgvector
|
|
88
95
|
pgvector = conn.exec("SELECT extversion FROM pg_extension WHERE extname='vector'").first
|
|
@@ -100,16 +107,18 @@ namespace :htm do
|
|
|
100
107
|
end
|
|
101
108
|
end
|
|
102
109
|
|
|
103
|
-
desc "Open PostgreSQL console"
|
|
110
|
+
desc "Open PostgreSQL console (respects RAILS_ENV)"
|
|
104
111
|
task :console do
|
|
105
112
|
require 'htm'
|
|
106
|
-
config = HTM::Database.default_config
|
|
107
|
-
raise "Database not configured. Set HTM_DBURL environment variable." unless config
|
|
108
113
|
|
|
114
|
+
env = ENV['RAILS_ENV'] || ENV['RACK_ENV'] || 'development'
|
|
115
|
+
config = HTM::ActiveRecordConfig.load_database_config
|
|
116
|
+
|
|
117
|
+
puts "Connecting to #{config[:database]} (#{env})..."
|
|
109
118
|
exec "psql", "-h", config[:host],
|
|
110
119
|
"-p", config[:port].to_s,
|
|
111
|
-
"-U", config[:
|
|
112
|
-
"-d", config[:
|
|
120
|
+
"-U", config[:username],
|
|
121
|
+
"-d", config[:database]
|
|
113
122
|
end
|
|
114
123
|
|
|
115
124
|
desc "Seed database with sample data"
|
|
@@ -277,6 +286,120 @@ namespace :htm do
|
|
|
277
286
|
puts " Errors: #{errors}"
|
|
278
287
|
puts " Nodes with embeddings: #{final_with_embeddings}"
|
|
279
288
|
end
|
|
289
|
+
|
|
290
|
+
desc "Rebuild propositions for all non-proposition nodes. Extracts atomic facts and creates new nodes."
|
|
291
|
+
task :propositions do
|
|
292
|
+
require 'htm'
|
|
293
|
+
require 'ruby-progressbar'
|
|
294
|
+
|
|
295
|
+
# Ensure database connection
|
|
296
|
+
HTM::ActiveRecordConfig.establish_connection!
|
|
297
|
+
|
|
298
|
+
# Find all non-proposition nodes (nodes that haven't been extracted from)
|
|
299
|
+
source_nodes = HTM::Models::Node.non_propositions
|
|
300
|
+
source_count = source_nodes.count
|
|
301
|
+
|
|
302
|
+
# Count existing proposition nodes
|
|
303
|
+
existing_propositions = HTM::Models::Node.propositions.count
|
|
304
|
+
|
|
305
|
+
puts "\nHTM Propositions Rebuild"
|
|
306
|
+
puts "=" * 50
|
|
307
|
+
puts "Current state:"
|
|
308
|
+
puts " Source nodes (non-propositions): #{source_count}"
|
|
309
|
+
puts " Existing proposition nodes: #{existing_propositions}"
|
|
310
|
+
puts "\nThis will extract propositions from ALL #{source_count} source nodes."
|
|
311
|
+
puts "Existing proposition nodes will be deleted and regenerated."
|
|
312
|
+
puts "This operation may take a long time depending on your LLM provider."
|
|
313
|
+
print "\nType 'yes' to confirm: "
|
|
314
|
+
|
|
315
|
+
confirmation = $stdin.gets&.strip
|
|
316
|
+
unless confirmation == 'yes'
|
|
317
|
+
puts "Aborted."
|
|
318
|
+
next
|
|
319
|
+
end
|
|
320
|
+
|
|
321
|
+
# Delete existing proposition nodes
|
|
322
|
+
if existing_propositions > 0
|
|
323
|
+
puts "\nDeleting #{existing_propositions} existing proposition nodes..."
|
|
324
|
+
deleted = HTM::Models::Node.propositions.delete_all
|
|
325
|
+
puts " Deleted #{deleted} proposition nodes"
|
|
326
|
+
end
|
|
327
|
+
|
|
328
|
+
puts "\nExtracting propositions from #{source_count} nodes..."
|
|
329
|
+
puts "(This may take a while depending on your LLM provider)\n"
|
|
330
|
+
|
|
331
|
+
# Get a robot ID for linking proposition nodes
|
|
332
|
+
# Use the first robot or create a system robot
|
|
333
|
+
robot = HTM::Models::Robot.first || HTM::Models::Robot.create!(name: 'proposition_rebuilder')
|
|
334
|
+
|
|
335
|
+
# Create progress bar with ETA
|
|
336
|
+
progressbar = ProgressBar.create(
|
|
337
|
+
total: source_count,
|
|
338
|
+
format: '%t: |%B| %c/%C (%p%%) %e',
|
|
339
|
+
title: 'Extracting',
|
|
340
|
+
output: $stdout,
|
|
341
|
+
smoothing: 0.5
|
|
342
|
+
)
|
|
343
|
+
|
|
344
|
+
# Track stats
|
|
345
|
+
errors = 0
|
|
346
|
+
nodes_processed = 0
|
|
347
|
+
propositions_created = 0
|
|
348
|
+
|
|
349
|
+
source_nodes.find_each do |node|
|
|
350
|
+
begin
|
|
351
|
+
# Extract propositions
|
|
352
|
+
propositions = HTM::PropositionService.extract(node.content)
|
|
353
|
+
|
|
354
|
+
if propositions.any?
|
|
355
|
+
propositions.each do |proposition_text|
|
|
356
|
+
token_count = HTM.count_tokens(proposition_text)
|
|
357
|
+
|
|
358
|
+
# Create proposition node
|
|
359
|
+
prop_node = HTM::Models::Node.create!(
|
|
360
|
+
content: proposition_text,
|
|
361
|
+
token_count: token_count,
|
|
362
|
+
metadata: { is_proposition: true, source_node_id: node.id }
|
|
363
|
+
)
|
|
364
|
+
|
|
365
|
+
# Link to robot
|
|
366
|
+
HTM::Models::RobotNode.find_or_create_by!(
|
|
367
|
+
robot_id: robot.id,
|
|
368
|
+
node_id: prop_node.id
|
|
369
|
+
)
|
|
370
|
+
|
|
371
|
+
# Generate embedding for proposition node
|
|
372
|
+
begin
|
|
373
|
+
result = HTM::EmbeddingService.generate(proposition_text)
|
|
374
|
+
prop_node.update!(embedding: result[:storage_embedding])
|
|
375
|
+
rescue StandardError => e
|
|
376
|
+
progressbar.log " Warning: Embedding failed for proposition: #{e.message}"
|
|
377
|
+
end
|
|
378
|
+
|
|
379
|
+
propositions_created += 1
|
|
380
|
+
end
|
|
381
|
+
end
|
|
382
|
+
|
|
383
|
+
nodes_processed += 1
|
|
384
|
+
rescue StandardError => e
|
|
385
|
+
errors += 1
|
|
386
|
+
progressbar.log " Error on node #{node.id}: #{e.message}"
|
|
387
|
+
end
|
|
388
|
+
|
|
389
|
+
progressbar.increment
|
|
390
|
+
end
|
|
391
|
+
|
|
392
|
+
progressbar.finish
|
|
393
|
+
|
|
394
|
+
# Final stats
|
|
395
|
+
final_proposition_count = HTM::Models::Node.propositions.count
|
|
396
|
+
|
|
397
|
+
puts "\nRebuild complete!"
|
|
398
|
+
puts " Source nodes processed: #{nodes_processed}"
|
|
399
|
+
puts " Propositions created: #{propositions_created}"
|
|
400
|
+
puts " Errors: #{errors}"
|
|
401
|
+
puts " Total proposition nodes: #{final_proposition_count}"
|
|
402
|
+
end
|
|
280
403
|
end
|
|
281
404
|
|
|
282
405
|
namespace :schema do
|
|
@@ -293,6 +416,129 @@ namespace :htm do
|
|
|
293
416
|
end
|
|
294
417
|
end
|
|
295
418
|
|
|
419
|
+
desc "Create database if it doesn't exist (respects RAILS_ENV)"
|
|
420
|
+
task :create do
|
|
421
|
+
require 'htm'
|
|
422
|
+
|
|
423
|
+
env = ENV['RAILS_ENV'] || ENV['RACK_ENV'] || 'development'
|
|
424
|
+
config = HTM::ActiveRecordConfig.load_database_config
|
|
425
|
+
db_name = config[:database]
|
|
426
|
+
|
|
427
|
+
puts "Creating database: #{db_name} (#{env})"
|
|
428
|
+
|
|
429
|
+
admin_config = config.dup
|
|
430
|
+
admin_config[:database] = 'postgres'
|
|
431
|
+
|
|
432
|
+
begin
|
|
433
|
+
require 'pg'
|
|
434
|
+
admin_conn = PG.connect(
|
|
435
|
+
host: admin_config[:host],
|
|
436
|
+
port: admin_config[:port],
|
|
437
|
+
dbname: admin_config[:database],
|
|
438
|
+
user: admin_config[:username],
|
|
439
|
+
password: admin_config[:password]
|
|
440
|
+
)
|
|
441
|
+
|
|
442
|
+
result = admin_conn.exec_params(
|
|
443
|
+
"SELECT 1 FROM pg_database WHERE datname = $1",
|
|
444
|
+
[db_name]
|
|
445
|
+
)
|
|
446
|
+
|
|
447
|
+
if result.ntuples == 0
|
|
448
|
+
admin_conn.exec("CREATE DATABASE #{PG::Connection.quote_ident(db_name)}")
|
|
449
|
+
puts "✓ Database created: #{db_name}"
|
|
450
|
+
|
|
451
|
+
# Connect to new database and enable extensions
|
|
452
|
+
db_conn = PG.connect(
|
|
453
|
+
host: config[:host],
|
|
454
|
+
port: config[:port],
|
|
455
|
+
dbname: db_name,
|
|
456
|
+
user: config[:username],
|
|
457
|
+
password: config[:password]
|
|
458
|
+
)
|
|
459
|
+
%w[vector pg_trgm].each do |ext|
|
|
460
|
+
db_conn.exec("CREATE EXTENSION IF NOT EXISTS #{ext}")
|
|
461
|
+
end
|
|
462
|
+
db_conn.close
|
|
463
|
+
puts "✓ Extensions enabled (pgvector, pg_trgm)"
|
|
464
|
+
else
|
|
465
|
+
puts "✓ Database already exists: #{db_name}"
|
|
466
|
+
end
|
|
467
|
+
|
|
468
|
+
admin_conn.close
|
|
469
|
+
rescue PG::Error => e
|
|
470
|
+
puts "✗ Error: #{e.message}"
|
|
471
|
+
exit 1
|
|
472
|
+
end
|
|
473
|
+
end
|
|
474
|
+
|
|
475
|
+
namespace :tags do
|
|
476
|
+
desc "Soft delete orphaned tags and stale node_tags entries"
|
|
477
|
+
task :cleanup do
|
|
478
|
+
require 'htm'
|
|
479
|
+
|
|
480
|
+
# Ensure database connection
|
|
481
|
+
HTM::ActiveRecordConfig.establish_connection!
|
|
482
|
+
|
|
483
|
+
puts "\nHTM Tag Cleanup"
|
|
484
|
+
puts "=" * 50
|
|
485
|
+
|
|
486
|
+
# Step 1: Find active node_tags pointing to soft-deleted or missing nodes
|
|
487
|
+
stale_node_tags = HTM::Models::NodeTag
|
|
488
|
+
.joins("LEFT JOIN nodes ON nodes.id = node_tags.node_id")
|
|
489
|
+
.where("nodes.id IS NULL OR nodes.deleted_at IS NOT NULL")
|
|
490
|
+
|
|
491
|
+
stale_count = stale_node_tags.count
|
|
492
|
+
|
|
493
|
+
# Step 2: Find orphaned tags using the Tag.orphaned scope
|
|
494
|
+
orphaned_tags = HTM::Models::Tag.orphaned
|
|
495
|
+
orphan_count = orphaned_tags.count
|
|
496
|
+
|
|
497
|
+
if stale_count == 0 && orphan_count == 0
|
|
498
|
+
puts "No cleanup needed."
|
|
499
|
+
puts " Stale node_tags entries: 0"
|
|
500
|
+
puts " Orphaned tags: 0"
|
|
501
|
+
next
|
|
502
|
+
end
|
|
503
|
+
|
|
504
|
+
puts "Found:"
|
|
505
|
+
puts " Stale node_tags entries: #{stale_count} (pointing to deleted/missing nodes)"
|
|
506
|
+
puts " Orphaned tags: #{orphan_count} (no active nodes)"
|
|
507
|
+
|
|
508
|
+
if orphan_count > 0
|
|
509
|
+
puts "\nOrphaned tags:"
|
|
510
|
+
orphaned_tags.limit(20).pluck(:name).each do |name|
|
|
511
|
+
puts " - #{name}"
|
|
512
|
+
end
|
|
513
|
+
puts " ... and #{orphan_count - 20} more" if orphan_count > 20
|
|
514
|
+
end
|
|
515
|
+
|
|
516
|
+
print "\nSoft delete these entries? (yes/no): "
|
|
517
|
+
confirmation = $stdin.gets&.strip
|
|
518
|
+
|
|
519
|
+
unless confirmation == 'yes'
|
|
520
|
+
puts "Cancelled."
|
|
521
|
+
next
|
|
522
|
+
end
|
|
523
|
+
|
|
524
|
+
now = Time.current
|
|
525
|
+
|
|
526
|
+
# Soft delete stale node_tags first
|
|
527
|
+
if stale_count > 0
|
|
528
|
+
soft_deleted_node_tags = stale_node_tags.update_all(deleted_at: now)
|
|
529
|
+
puts "\nSoft deleted #{soft_deleted_node_tags} stale node_tags entries."
|
|
530
|
+
end
|
|
531
|
+
|
|
532
|
+
# Then soft delete orphaned tags
|
|
533
|
+
if orphan_count > 0
|
|
534
|
+
soft_deleted_tags = orphaned_tags.update_all(deleted_at: now)
|
|
535
|
+
puts "Soft deleted #{soft_deleted_tags} orphaned tags."
|
|
536
|
+
end
|
|
537
|
+
|
|
538
|
+
puts "\nCleanup complete (soft delete)."
|
|
539
|
+
end
|
|
540
|
+
end
|
|
541
|
+
|
|
296
542
|
end
|
|
297
543
|
|
|
298
544
|
namespace :doc do
|