htm 0.0.11 → 0.0.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (126) hide show
  1. checksums.yaml +4 -4
  2. data/.dictate.toml +46 -0
  3. data/.envrc +2 -0
  4. data/CHANGELOG.md +85 -2
  5. data/README.md +348 -79
  6. data/Rakefile +14 -2
  7. data/bin/htm_mcp.rb +94 -0
  8. data/config/database.yml +20 -13
  9. data/db/migrate/00003_create_file_sources.rb +5 -0
  10. data/db/migrate/00004_create_nodes.rb +17 -0
  11. data/db/migrate/00005_create_tags.rb +7 -0
  12. data/db/migrate/00006_create_node_tags.rb +2 -0
  13. data/db/migrate/00007_create_robot_nodes.rb +7 -0
  14. data/db/schema.sql +69 -100
  15. data/docs/api/index.md +1 -1
  16. data/docs/api/yard/HTM/Configuration.md +54 -0
  17. data/docs/api/yard/HTM/Database.md +13 -10
  18. data/docs/api/yard/HTM/EmbeddingService.md +5 -1
  19. data/docs/api/yard/HTM/LongTermMemory.md +18 -277
  20. data/docs/api/yard/HTM/PropositionError.md +18 -0
  21. data/docs/api/yard/HTM/PropositionService.md +66 -0
  22. data/docs/api/yard/HTM/QueryCache.md +88 -0
  23. data/docs/api/yard/HTM/RobotGroup.md +481 -0
  24. data/docs/api/yard/HTM/SqlBuilder.md +108 -0
  25. data/docs/api/yard/HTM/TagService.md +4 -0
  26. data/docs/api/yard/HTM/Telemetry/NullInstrument.md +13 -0
  27. data/docs/api/yard/HTM/Telemetry/NullMeter.md +15 -0
  28. data/docs/api/yard/HTM/Telemetry.md +109 -0
  29. data/docs/api/yard/HTM/WorkingMemoryChannel.md +176 -0
  30. data/docs/api/yard/HTM.md +8 -22
  31. data/docs/api/yard/index.csv +102 -25
  32. data/docs/api/yard-reference.md +8 -0
  33. data/docs/architecture/index.md +1 -1
  34. data/docs/assets/images/multi-provider-failover.svg +51 -0
  35. data/docs/assets/images/robot-group-architecture.svg +65 -0
  36. data/docs/database/README.md +3 -3
  37. data/docs/database/public.file_sources.svg +29 -21
  38. data/docs/database/public.node_tags.md +2 -0
  39. data/docs/database/public.node_tags.svg +53 -41
  40. data/docs/database/public.nodes.md +2 -0
  41. data/docs/database/public.nodes.svg +52 -40
  42. data/docs/database/public.robot_nodes.md +2 -0
  43. data/docs/database/public.robot_nodes.svg +30 -22
  44. data/docs/database/public.robots.svg +16 -12
  45. data/docs/database/public.tags.md +3 -0
  46. data/docs/database/public.tags.svg +41 -33
  47. data/docs/database/schema.json +66 -0
  48. data/docs/database/schema.svg +60 -48
  49. data/docs/development/index.md +14 -1
  50. data/docs/development/rake-tasks.md +1068 -0
  51. data/docs/getting-started/index.md +1 -1
  52. data/docs/getting-started/quick-start.md +144 -155
  53. data/docs/guides/adding-memories.md +2 -3
  54. data/docs/guides/context-assembly.md +185 -184
  55. data/docs/guides/getting-started.md +154 -148
  56. data/docs/guides/index.md +8 -1
  57. data/docs/guides/long-term-memory.md +60 -92
  58. data/docs/guides/mcp-server.md +617 -0
  59. data/docs/guides/multi-robot.md +249 -345
  60. data/docs/guides/recalling-memories.md +153 -163
  61. data/docs/guides/robot-groups.md +604 -0
  62. data/docs/guides/search-strategies.md +61 -58
  63. data/docs/guides/working-memory.md +103 -136
  64. data/docs/images/telemetry-architecture.svg +153 -0
  65. data/docs/index.md +30 -26
  66. data/docs/telemetry.md +391 -0
  67. data/examples/README.md +46 -1
  68. data/examples/cli_app/README.md +1 -1
  69. data/examples/cli_app/htm_cli.rb +1 -1
  70. data/examples/robot_groups/robot_worker.rb +1 -2
  71. data/examples/robot_groups/same_process.rb +1 -4
  72. data/examples/sinatra_app/app.rb +1 -1
  73. data/examples/telemetry/README.md +147 -0
  74. data/examples/telemetry/SETUP_README.md +169 -0
  75. data/examples/telemetry/demo.rb +498 -0
  76. data/examples/telemetry/grafana/dashboards/htm-metrics.json +457 -0
  77. data/lib/htm/configuration.rb +261 -70
  78. data/lib/htm/database.rb +46 -22
  79. data/lib/htm/embedding_service.rb +24 -14
  80. data/lib/htm/errors.rb +15 -1
  81. data/lib/htm/jobs/generate_embedding_job.rb +19 -0
  82. data/lib/htm/jobs/generate_propositions_job.rb +103 -0
  83. data/lib/htm/jobs/generate_tags_job.rb +24 -0
  84. data/lib/htm/loaders/markdown_chunker.rb +79 -0
  85. data/lib/htm/loaders/markdown_loader.rb +41 -15
  86. data/lib/htm/long_term_memory/fulltext_search.rb +138 -0
  87. data/lib/htm/long_term_memory/hybrid_search.rb +324 -0
  88. data/lib/htm/long_term_memory/node_operations.rb +209 -0
  89. data/lib/htm/long_term_memory/relevance_scorer.rb +355 -0
  90. data/lib/htm/long_term_memory/robot_operations.rb +34 -0
  91. data/lib/htm/long_term_memory/tag_operations.rb +428 -0
  92. data/lib/htm/long_term_memory/vector_search.rb +109 -0
  93. data/lib/htm/long_term_memory.rb +51 -1153
  94. data/lib/htm/models/node.rb +35 -2
  95. data/lib/htm/models/node_tag.rb +31 -0
  96. data/lib/htm/models/robot_node.rb +31 -0
  97. data/lib/htm/models/tag.rb +44 -0
  98. data/lib/htm/proposition_service.rb +169 -0
  99. data/lib/htm/query_cache.rb +214 -0
  100. data/lib/htm/robot_group.rb +721 -0
  101. data/lib/htm/sql_builder.rb +178 -0
  102. data/lib/htm/tag_service.rb +16 -6
  103. data/lib/htm/tasks.rb +8 -2
  104. data/lib/htm/telemetry.rb +224 -0
  105. data/lib/htm/version.rb +1 -1
  106. data/lib/htm/working_memory_channel.rb +250 -0
  107. data/lib/htm.rb +66 -3
  108. data/lib/tasks/doc.rake +1 -1
  109. data/lib/tasks/htm.rake +259 -13
  110. data/mkdocs.yml +98 -96
  111. metadata +55 -20
  112. data/.aigcm_msg +0 -1
  113. data/.claude/settings.local.json +0 -95
  114. data/CLAUDE.md +0 -603
  115. data/db/migrate/00009_add_working_memory_to_robot_nodes.rb +0 -12
  116. data/examples/cli_app/temp.log +0 -93
  117. data/examples/robot_groups/lib/robot_group.rb +0 -419
  118. data/examples/robot_groups/lib/working_memory_channel.rb +0 -140
  119. data/lib/htm/loaders/paragraph_chunker.rb +0 -112
  120. data/notes/ARCHITECTURE_REVIEW.md +0 -1167
  121. data/notes/IMPLEMENTATION_SUMMARY.md +0 -606
  122. data/notes/MULTI_FRAMEWORK_IMPLEMENTATION.md +0 -451
  123. data/notes/next_steps.md +0 -100
  124. data/notes/plan.md +0 -627
  125. data/notes/tag_ontology_enhancement_ideas.md +0 -222
  126. data/notes/timescaledb_removal_summary.md +0 -200
@@ -0,0 +1,250 @@
1
+ # examples/robot_groups/lib/htm/working_memory_channel.rb
2
+ # frozen_string_literal: true
3
+
4
+ class HTM
5
+ # Provides real-time synchronization of working memory changes across multiple
6
+ # robots using PostgreSQL LISTEN/NOTIFY pub/sub mechanism.
7
+ #
8
+ # This class enables distributed robots to maintain synchronized working memory
9
+ # by broadcasting change notifications through PostgreSQL channels. When one robot
10
+ # adds, evicts, or clears working memory, all other robots in the group receive
11
+ # immediate notification.
12
+ #
13
+ # @example Basic usage
14
+ # channel = HTM::WorkingMemoryChannel.new('support-team', db_config)
15
+ #
16
+ # # Subscribe to changes
17
+ # channel.on_change do |event, node_id, robot_id|
18
+ # case event
19
+ # when :added then puts "Node #{node_id} added by robot #{robot_id}"
20
+ # when :evicted then puts "Node #{node_id} evicted by robot #{robot_id}"
21
+ # when :cleared then puts "Working memory cleared by robot #{robot_id}"
22
+ # end
23
+ # end
24
+ #
25
+ # # Start listening in background thread
26
+ # channel.start_listening
27
+ #
28
+ # # Publish a change
29
+ # channel.notify(:added, node_id: 123, robot_id: 456)
30
+ #
31
+ # # Cleanup when done
32
+ # channel.stop_listening
33
+ #
34
+ # @see HTM::RobotGroup Higher-level coordination using this channel
35
+ #
36
+ class WorkingMemoryChannel
37
+ # Prefix used for all PostgreSQL channel names
38
+ # @return [String]
39
+ CHANNEL_PREFIX = 'htm_wm'
40
+
41
+ # Number of notifications received since channel was created
42
+ # @return [Integer]
43
+ attr_reader :notifications_received
44
+
45
+ # Creates a new working memory channel for a robot group.
46
+ #
47
+ # The channel name is derived from the group name with non-alphanumeric
48
+ # characters replaced by underscores to ensure PostgreSQL compatibility.
49
+ #
50
+ # @param group_name [String] Name of the robot group (used to create unique channel)
51
+ # @param db_config [Hash] PostgreSQL connection configuration hash
52
+ # @option db_config [String] :host Database host
53
+ # @option db_config [Integer] :port Database port
54
+ # @option db_config [String] :dbname Database name
55
+ # @option db_config [String] :user Database user
56
+ # @option db_config [String] :password Database password (optional)
57
+ #
58
+ # @example
59
+ # db_config = { host: 'localhost', port: 5432, dbname: 'htm_dev', user: 'postgres' }
60
+ # channel = HTM::WorkingMemoryChannel.new('customer-support', db_config)
61
+ #
62
+ def initialize(group_name, db_config)
63
+ @group_name = group_name
64
+ @channel = "#{CHANNEL_PREFIX}_#{group_name.gsub(/[^a-z0-9_]/i, '_')}"
65
+ @db_config = db_config
66
+ @listeners = []
67
+ @listen_thread = nil
68
+ @stop_requested = false
69
+ @notifications_received = 0
70
+ @mutex = Mutex.new
71
+ end
72
+
73
+ # @!group Publishing
74
+
75
+ # Broadcasts a working memory change notification to all listeners.
76
+ #
77
+ # Uses PostgreSQL's pg_notify function to send a JSON payload containing
78
+ # the event type, affected node ID, originating robot ID, and timestamp.
79
+ #
80
+ # @param event [Symbol] Type of change (:added, :evicted, or :cleared)
81
+ # @param node_id [Integer, nil] ID of the affected node (nil for :cleared events)
82
+ # @param robot_id [Integer] ID of the robot that triggered the change
83
+ # @return [void]
84
+ #
85
+ # @example Notify that a node was added
86
+ # channel.notify(:added, node_id: 123, robot_id: 1)
87
+ #
88
+ # @example Notify that working memory was cleared
89
+ # channel.notify(:cleared, node_id: nil, robot_id: 1)
90
+ #
91
+ def notify(event, node_id:, robot_id:)
92
+ payload = {
93
+ event: event,
94
+ node_id: node_id,
95
+ robot_id: robot_id,
96
+ timestamp: Time.now.iso8601
97
+ }.to_json
98
+
99
+ with_connection do |conn|
100
+ conn.exec_params('SELECT pg_notify($1, $2)', [@channel, payload])
101
+ end
102
+ end
103
+
104
+ # @!endgroup
105
+
106
+ # @!group Subscribing
107
+
108
+ # Registers a callback to be invoked when working memory changes occur.
109
+ #
110
+ # Multiple callbacks can be registered; all will be called for each event.
111
+ # Callbacks are invoked synchronously within the listener thread.
112
+ #
113
+ # @yield [event, node_id, robot_id] Block called for each notification
114
+ # @yieldparam event [Symbol] Type of change (:added, :evicted, or :cleared)
115
+ # @yieldparam node_id [Integer, nil] ID of the affected node
116
+ # @yieldparam robot_id [Integer] ID of the robot that triggered the change
117
+ # @return [void]
118
+ #
119
+ # @example Register a change handler
120
+ # channel.on_change do |event, node_id, robot_id|
121
+ # puts "Received #{event} event for node #{node_id}"
122
+ # end
123
+ #
124
+ def on_change(&callback)
125
+ @mutex.synchronize { @listeners << callback }
126
+ end
127
+
128
+ # Starts listening for notifications in a background thread.
129
+ #
130
+ # Creates a dedicated PostgreSQL connection that uses LISTEN to receive
131
+ # notifications. The thread polls every 0.5 seconds, allowing for clean
132
+ # shutdown via {#stop_listening}.
133
+ #
134
+ # @return [Thread] The background listener thread
135
+ #
136
+ # @example Start and verify listening
137
+ # thread = channel.start_listening
138
+ # puts "Listening: #{channel.listening?}" # => true
139
+ #
140
+ def start_listening
141
+ @stop_requested = false
142
+ @listen_thread = Thread.new do
143
+ listen_loop
144
+ end
145
+ @listen_thread.abort_on_exception = true
146
+ @listen_thread
147
+ end
148
+
149
+ # Stops the background listener thread.
150
+ #
151
+ # Signals the listener to stop, waits up to 0.5 seconds for clean exit,
152
+ # then forcefully terminates if still running. The PostgreSQL connection
153
+ # is closed automatically.
154
+ #
155
+ # @return [void]
156
+ #
157
+ # @example Clean shutdown
158
+ # channel.stop_listening
159
+ # puts "Listening: #{channel.listening?}" # => false
160
+ #
161
+ def stop_listening
162
+ @stop_requested = true
163
+ # Give the thread a moment to exit cleanly
164
+ @listen_thread&.join(0.5)
165
+ @listen_thread&.kill if @listen_thread&.alive?
166
+ @listen_thread = nil
167
+ end
168
+
169
+ # @!endgroup
170
+
171
+ # @!group Status
172
+
173
+ # Checks if the listener thread is currently active.
174
+ #
175
+ # @return [Boolean] true if listening for notifications, false otherwise
176
+ #
177
+ # @example
178
+ # channel.start_listening
179
+ # channel.listening? # => true
180
+ # channel.stop_listening
181
+ # channel.listening? # => false
182
+ #
183
+ def listening?
184
+ @listen_thread&.alive? || false
185
+ end
186
+
187
+ # Returns the PostgreSQL channel name used for notifications.
188
+ #
189
+ # The channel name is derived from the group name with a prefix and
190
+ # sanitization of special characters.
191
+ #
192
+ # @return [String] The PostgreSQL LISTEN/NOTIFY channel name
193
+ #
194
+ # @example
195
+ # channel = HTM::WorkingMemoryChannel.new('my-group', db_config)
196
+ # channel.channel_name # => "htm_wm_my_group"
197
+ #
198
+ def channel_name
199
+ @channel
200
+ end
201
+
202
+ # @!endgroup
203
+
204
+ private
205
+
206
+ def listen_loop
207
+ conn = PG.connect(@db_config)
208
+ conn.exec("LISTEN #{conn.escape_identifier(@channel)}")
209
+
210
+ until @stop_requested
211
+ # Wait for notification with timeout (allows checking @stop_requested)
212
+ conn.wait_for_notify(0.5) do |_channel, _pid, payload|
213
+ handle_notification(payload)
214
+ end
215
+ end
216
+ rescue PG::Error => e
217
+ unless @stop_requested
218
+ HTM.logger.error "WorkingMemoryChannel error: #{e.message}"
219
+ sleep 1
220
+ retry
221
+ end
222
+ ensure
223
+ conn&.close
224
+ end
225
+
226
+ def handle_notification(payload)
227
+ data = JSON.parse(payload, symbolize_names: true)
228
+
229
+ @mutex.synchronize do
230
+ @notifications_received += 1
231
+ @listeners.each do |callback|
232
+ callback.call(
233
+ data[:event].to_sym,
234
+ data[:node_id],
235
+ data[:robot_id]
236
+ )
237
+ end
238
+ end
239
+ rescue JSON::ParserError => e
240
+ HTM.logger.error "Invalid notification payload: #{e.message}"
241
+ end
242
+
243
+ def with_connection
244
+ conn = PG.connect(@db_config)
245
+ yield conn
246
+ ensure
247
+ conn&.close
248
+ end
249
+ end
250
+ end
data/lib/htm.rb CHANGED
@@ -10,14 +10,19 @@ require_relative "htm/long_term_memory"
10
10
  require_relative "htm/working_memory"
11
11
  require_relative "htm/embedding_service"
12
12
  require_relative "htm/tag_service"
13
+ require_relative "htm/proposition_service"
13
14
  require_relative "htm/timeframe_extractor"
14
15
  require_relative "htm/timeframe"
15
16
  require_relative "htm/job_adapter"
16
17
  require_relative "htm/jobs/generate_embedding_job"
17
18
  require_relative "htm/jobs/generate_tags_job"
18
- require_relative "htm/loaders/paragraph_chunker"
19
+ require_relative "htm/jobs/generate_propositions_job"
20
+ require_relative "htm/loaders/markdown_chunker"
19
21
  require_relative "htm/loaders/markdown_loader"
20
22
  require_relative "htm/observability"
23
+ require_relative "htm/telemetry"
24
+ require_relative "htm/working_memory_channel"
25
+ require_relative "htm/robot_group"
21
26
 
22
27
  require "pg"
23
28
  require "securerandom"
@@ -26,7 +31,7 @@ require "uri"
26
31
  # Load Rails integration if Rails is defined
27
32
  require_relative "htm/railtie" if defined?(Rails::Railtie)
28
33
 
29
- # HTM (Hierarchical Temporary Memory) - Intelligent memory management for LLM robots
34
+ # HTM (Hierarchical Temporal Memory) - Intelligent memory management for LLM robots
30
35
  #
31
36
  # HTM implements a two-tier memory system:
32
37
  # - Working Memory: Token-limited, active context for immediate LLM use
@@ -163,6 +168,11 @@ class HTM
163
168
  # Only for NEW nodes - existing nodes already have embeddings/tags
164
169
  enqueue_embedding_job(node_id)
165
170
  enqueue_tags_job(node_id, manual_tags: tags)
171
+
172
+ # Enqueue proposition extraction if enabled and not already a proposition
173
+ if HTM.configuration.extract_propositions && !metadata[:is_proposition]
174
+ enqueue_propositions_job(node_id)
175
+ end
166
176
  else
167
177
  HTM.logger.info "Node #{node_id} already exists, linked to robot #{@robot_name} (remember_count: #{result[:robot_node].remember_count})"
168
178
 
@@ -228,7 +238,7 @@ class HTM
228
238
  # memories = htm.recall("preferences", metadata: { source: "user" })
229
239
  # memories = htm.recall("decisions", metadata: { confidence: 0.9, type: "architectural" })
230
240
  #
231
- def recall(topic, timeframe: nil, limit: 20, strategy: :vector, with_relevance: false, query_tags: [], raw: false, metadata: {})
241
+ def recall(topic, timeframe: nil, limit: 20, strategy: :fulltext, with_relevance: false, query_tags: [], raw: false, metadata: {})
232
242
  # Validate inputs
233
243
  validate_timeframe!(timeframe)
234
244
  validate_positive_integer!(limit, "limit")
@@ -350,6 +360,51 @@ class HTM
350
360
  true
351
361
  end
352
362
 
363
+ # Forget all nodes whose content includes the given string
364
+ #
365
+ # Performs a soft delete on all matching nodes. The nodes remain in the
366
+ # database but are excluded from queries. Use case-insensitive LIKE matching.
367
+ #
368
+ # @param content_substring [String] Substring to search for in node content
369
+ # @param soft [Boolean] If true (default), soft delete; if false, permanent delete
370
+ # @param confirm [Symbol] Must be :confirmed to proceed with permanent deletion
371
+ # @return [Array<Integer>] Array of node IDs that were deleted
372
+ # @raise [ArgumentError] if content_substring is blank
373
+ # @raise [ArgumentError] if permanent deletion requested without confirmation
374
+ #
375
+ # @example Soft delete all nodes containing "deprecated"
376
+ # htm.forget_content("deprecated")
377
+ # # => [42, 56, 78] # IDs of deleted nodes
378
+ #
379
+ # @example Permanent delete all nodes containing "test data"
380
+ # htm.forget_content("test data", soft: false, confirm: :confirmed)
381
+ #
382
+ def forget_content(content_substring, soft: true, confirm: false)
383
+ raise ArgumentError, "Content substring cannot be blank" if content_substring.to_s.strip.empty?
384
+
385
+ # Permanent delete requires confirmation
386
+ if !soft && confirm != :confirmed
387
+ raise ArgumentError, "Permanent deletion requires confirm: :confirmed"
388
+ end
389
+
390
+ # Find all nodes containing the substring (case-insensitive)
391
+ matching_nodes = HTM::Models::Node.where("content ILIKE ?", "%#{content_substring}%")
392
+ node_ids = matching_nodes.pluck(:id)
393
+
394
+ if node_ids.empty?
395
+ HTM.logger.info "No nodes found containing: #{content_substring}"
396
+ return []
397
+ end
398
+
399
+ # Delete each matching node
400
+ node_ids.each do |node_id|
401
+ forget(node_id, soft: soft, confirm: confirm)
402
+ end
403
+
404
+ HTM.logger.info "Forgot #{node_ids.length} nodes containing: #{content_substring}"
405
+ node_ids
406
+ end
407
+
353
408
  # Restore a soft-deleted memory node
354
409
  #
355
410
  # @param node_id [Integer] ID of the soft-deleted node to restore
@@ -550,6 +605,14 @@ class HTM
550
605
  HTM.logger.error "Failed to enqueue tags job for node #{node_id}: #{e.message}"
551
606
  end
552
607
 
608
+ def enqueue_propositions_job(node_id)
609
+ # Enqueue proposition extraction using configured job backend
610
+ # Job will use HTM.extract_propositions which delegates to configured proposition_extractor
611
+ HTM::JobAdapter.enqueue(HTM::Jobs::GeneratePropositionsJob, node_id: node_id, robot_id: @robot_id)
612
+ rescue StandardError => e
613
+ HTM.logger.error "Failed to enqueue propositions job for node #{node_id}: #{e.message}"
614
+ end
615
+
553
616
  def add_to_working_memory(node)
554
617
  # Convert token_count to integer (may be String from database/cache)
555
618
  token_count = node['token_count'].to_i
data/lib/tasks/doc.rake CHANGED
@@ -22,7 +22,7 @@ namespace :htm do
22
22
  "--plugin markdown",
23
23
  "--output-dir #{output_dir}",
24
24
  "--format markdown",
25
- "--title 'HTM - Hierarchical Temporary Memory API'",
25
+ "--title 'HTM - Hierarchical Temporal Memory API'",
26
26
  "--markup markdown",
27
27
  "--charset utf-8",
28
28
  "--protected",
data/lib/tasks/htm.rake CHANGED
@@ -68,21 +68,28 @@ namespace :htm do
68
68
  end
69
69
  end
70
70
 
71
- desc "Test database connection"
72
- task :test do
71
+ desc "Verify database connection (respects RAILS_ENV)"
72
+ task :verify do
73
73
  require 'htm'
74
- config = HTM::Database.default_config
75
- raise "Database not configured. Set HTM_DBURL environment variable." unless config
76
74
 
77
- puts "Testing HTM database connection..."
75
+ env = ENV['RAILS_ENV'] || ENV['RACK_ENV'] || 'development'
76
+ config = HTM::ActiveRecordConfig.load_database_config
77
+
78
+ puts "Verifying HTM database connection (#{env})..."
78
79
  puts " Host: #{config[:host]}"
79
80
  puts " Port: #{config[:port]}"
80
- puts " Database: #{config[:dbname]}"
81
- puts " User: #{config[:user]}"
81
+ puts " Database: #{config[:database]}"
82
+ puts " User: #{config[:username]}"
82
83
 
83
84
  begin
84
85
  require 'pg'
85
- conn = PG.connect(config)
86
+ conn = PG.connect(
87
+ host: config[:host],
88
+ port: config[:port],
89
+ dbname: config[:database],
90
+ user: config[:username],
91
+ password: config[:password]
92
+ )
86
93
 
87
94
  # Check pgvector
88
95
  pgvector = conn.exec("SELECT extversion FROM pg_extension WHERE extname='vector'").first
@@ -100,16 +107,18 @@ namespace :htm do
100
107
  end
101
108
  end
102
109
 
103
- desc "Open PostgreSQL console"
110
+ desc "Open PostgreSQL console (respects RAILS_ENV)"
104
111
  task :console do
105
112
  require 'htm'
106
- config = HTM::Database.default_config
107
- raise "Database not configured. Set HTM_DBURL environment variable." unless config
108
113
 
114
+ env = ENV['RAILS_ENV'] || ENV['RACK_ENV'] || 'development'
115
+ config = HTM::ActiveRecordConfig.load_database_config
116
+
117
+ puts "Connecting to #{config[:database]} (#{env})..."
109
118
  exec "psql", "-h", config[:host],
110
119
  "-p", config[:port].to_s,
111
- "-U", config[:user],
112
- "-d", config[:dbname]
120
+ "-U", config[:username],
121
+ "-d", config[:database]
113
122
  end
114
123
 
115
124
  desc "Seed database with sample data"
@@ -277,6 +286,120 @@ namespace :htm do
277
286
  puts " Errors: #{errors}"
278
287
  puts " Nodes with embeddings: #{final_with_embeddings}"
279
288
  end
289
+
290
+ desc "Rebuild propositions for all non-proposition nodes. Extracts atomic facts and creates new nodes."
291
+ task :propositions do
292
+ require 'htm'
293
+ require 'ruby-progressbar'
294
+
295
+ # Ensure database connection
296
+ HTM::ActiveRecordConfig.establish_connection!
297
+
298
+ # Find all non-proposition nodes (nodes that haven't been extracted from)
299
+ source_nodes = HTM::Models::Node.non_propositions
300
+ source_count = source_nodes.count
301
+
302
+ # Count existing proposition nodes
303
+ existing_propositions = HTM::Models::Node.propositions.count
304
+
305
+ puts "\nHTM Propositions Rebuild"
306
+ puts "=" * 50
307
+ puts "Current state:"
308
+ puts " Source nodes (non-propositions): #{source_count}"
309
+ puts " Existing proposition nodes: #{existing_propositions}"
310
+ puts "\nThis will extract propositions from ALL #{source_count} source nodes."
311
+ puts "Existing proposition nodes will be deleted and regenerated."
312
+ puts "This operation may take a long time depending on your LLM provider."
313
+ print "\nType 'yes' to confirm: "
314
+
315
+ confirmation = $stdin.gets&.strip
316
+ unless confirmation == 'yes'
317
+ puts "Aborted."
318
+ next
319
+ end
320
+
321
+ # Delete existing proposition nodes
322
+ if existing_propositions > 0
323
+ puts "\nDeleting #{existing_propositions} existing proposition nodes..."
324
+ deleted = HTM::Models::Node.propositions.delete_all
325
+ puts " Deleted #{deleted} proposition nodes"
326
+ end
327
+
328
+ puts "\nExtracting propositions from #{source_count} nodes..."
329
+ puts "(This may take a while depending on your LLM provider)\n"
330
+
331
+ # Get a robot ID for linking proposition nodes
332
+ # Use the first robot or create a system robot
333
+ robot = HTM::Models::Robot.first || HTM::Models::Robot.create!(name: 'proposition_rebuilder')
334
+
335
+ # Create progress bar with ETA
336
+ progressbar = ProgressBar.create(
337
+ total: source_count,
338
+ format: '%t: |%B| %c/%C (%p%%) %e',
339
+ title: 'Extracting',
340
+ output: $stdout,
341
+ smoothing: 0.5
342
+ )
343
+
344
+ # Track stats
345
+ errors = 0
346
+ nodes_processed = 0
347
+ propositions_created = 0
348
+
349
+ source_nodes.find_each do |node|
350
+ begin
351
+ # Extract propositions
352
+ propositions = HTM::PropositionService.extract(node.content)
353
+
354
+ if propositions.any?
355
+ propositions.each do |proposition_text|
356
+ token_count = HTM.count_tokens(proposition_text)
357
+
358
+ # Create proposition node
359
+ prop_node = HTM::Models::Node.create!(
360
+ content: proposition_text,
361
+ token_count: token_count,
362
+ metadata: { is_proposition: true, source_node_id: node.id }
363
+ )
364
+
365
+ # Link to robot
366
+ HTM::Models::RobotNode.find_or_create_by!(
367
+ robot_id: robot.id,
368
+ node_id: prop_node.id
369
+ )
370
+
371
+ # Generate embedding for proposition node
372
+ begin
373
+ result = HTM::EmbeddingService.generate(proposition_text)
374
+ prop_node.update!(embedding: result[:storage_embedding])
375
+ rescue StandardError => e
376
+ progressbar.log " Warning: Embedding failed for proposition: #{e.message}"
377
+ end
378
+
379
+ propositions_created += 1
380
+ end
381
+ end
382
+
383
+ nodes_processed += 1
384
+ rescue StandardError => e
385
+ errors += 1
386
+ progressbar.log " Error on node #{node.id}: #{e.message}"
387
+ end
388
+
389
+ progressbar.increment
390
+ end
391
+
392
+ progressbar.finish
393
+
394
+ # Final stats
395
+ final_proposition_count = HTM::Models::Node.propositions.count
396
+
397
+ puts "\nRebuild complete!"
398
+ puts " Source nodes processed: #{nodes_processed}"
399
+ puts " Propositions created: #{propositions_created}"
400
+ puts " Errors: #{errors}"
401
+ puts " Total proposition nodes: #{final_proposition_count}"
402
+ end
280
403
  end
281
404
 
282
405
  namespace :schema do
@@ -293,6 +416,129 @@ namespace :htm do
293
416
  end
294
417
  end
295
418
 
419
+ desc "Create database if it doesn't exist (respects RAILS_ENV)"
420
+ task :create do
421
+ require 'htm'
422
+
423
+ env = ENV['RAILS_ENV'] || ENV['RACK_ENV'] || 'development'
424
+ config = HTM::ActiveRecordConfig.load_database_config
425
+ db_name = config[:database]
426
+
427
+ puts "Creating database: #{db_name} (#{env})"
428
+
429
+ admin_config = config.dup
430
+ admin_config[:database] = 'postgres'
431
+
432
+ begin
433
+ require 'pg'
434
+ admin_conn = PG.connect(
435
+ host: admin_config[:host],
436
+ port: admin_config[:port],
437
+ dbname: admin_config[:database],
438
+ user: admin_config[:username],
439
+ password: admin_config[:password]
440
+ )
441
+
442
+ result = admin_conn.exec_params(
443
+ "SELECT 1 FROM pg_database WHERE datname = $1",
444
+ [db_name]
445
+ )
446
+
447
+ if result.ntuples == 0
448
+ admin_conn.exec("CREATE DATABASE #{PG::Connection.quote_ident(db_name)}")
449
+ puts "✓ Database created: #{db_name}"
450
+
451
+ # Connect to new database and enable extensions
452
+ db_conn = PG.connect(
453
+ host: config[:host],
454
+ port: config[:port],
455
+ dbname: db_name,
456
+ user: config[:username],
457
+ password: config[:password]
458
+ )
459
+ %w[vector pg_trgm].each do |ext|
460
+ db_conn.exec("CREATE EXTENSION IF NOT EXISTS #{ext}")
461
+ end
462
+ db_conn.close
463
+ puts "✓ Extensions enabled (pgvector, pg_trgm)"
464
+ else
465
+ puts "✓ Database already exists: #{db_name}"
466
+ end
467
+
468
+ admin_conn.close
469
+ rescue PG::Error => e
470
+ puts "✗ Error: #{e.message}"
471
+ exit 1
472
+ end
473
+ end
474
+
475
+ namespace :tags do
476
+ desc "Soft delete orphaned tags and stale node_tags entries"
477
+ task :cleanup do
478
+ require 'htm'
479
+
480
+ # Ensure database connection
481
+ HTM::ActiveRecordConfig.establish_connection!
482
+
483
+ puts "\nHTM Tag Cleanup"
484
+ puts "=" * 50
485
+
486
+ # Step 1: Find active node_tags pointing to soft-deleted or missing nodes
487
+ stale_node_tags = HTM::Models::NodeTag
488
+ .joins("LEFT JOIN nodes ON nodes.id = node_tags.node_id")
489
+ .where("nodes.id IS NULL OR nodes.deleted_at IS NOT NULL")
490
+
491
+ stale_count = stale_node_tags.count
492
+
493
+ # Step 2: Find orphaned tags using the Tag.orphaned scope
494
+ orphaned_tags = HTM::Models::Tag.orphaned
495
+ orphan_count = orphaned_tags.count
496
+
497
+ if stale_count == 0 && orphan_count == 0
498
+ puts "No cleanup needed."
499
+ puts " Stale node_tags entries: 0"
500
+ puts " Orphaned tags: 0"
501
+ next
502
+ end
503
+
504
+ puts "Found:"
505
+ puts " Stale node_tags entries: #{stale_count} (pointing to deleted/missing nodes)"
506
+ puts " Orphaned tags: #{orphan_count} (no active nodes)"
507
+
508
+ if orphan_count > 0
509
+ puts "\nOrphaned tags:"
510
+ orphaned_tags.limit(20).pluck(:name).each do |name|
511
+ puts " - #{name}"
512
+ end
513
+ puts " ... and #{orphan_count - 20} more" if orphan_count > 20
514
+ end
515
+
516
+ print "\nSoft delete these entries? (yes/no): "
517
+ confirmation = $stdin.gets&.strip
518
+
519
+ unless confirmation == 'yes'
520
+ puts "Cancelled."
521
+ next
522
+ end
523
+
524
+ now = Time.current
525
+
526
+ # Soft delete stale node_tags first
527
+ if stale_count > 0
528
+ soft_deleted_node_tags = stale_node_tags.update_all(deleted_at: now)
529
+ puts "\nSoft deleted #{soft_deleted_node_tags} stale node_tags entries."
530
+ end
531
+
532
+ # Then soft delete orphaned tags
533
+ if orphan_count > 0
534
+ soft_deleted_tags = orphaned_tags.update_all(deleted_at: now)
535
+ puts "Soft deleted #{soft_deleted_tags} orphaned tags."
536
+ end
537
+
538
+ puts "\nCleanup complete (soft delete)."
539
+ end
540
+ end
541
+
296
542
  end
297
543
 
298
544
  namespace :doc do