htm 0.0.11 → 0.0.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. checksums.yaml +4 -4
  2. data/.dictate.toml +46 -0
  3. data/.envrc +2 -0
  4. data/CHANGELOG.md +52 -2
  5. data/README.md +79 -0
  6. data/Rakefile +14 -2
  7. data/bin/htm_mcp.rb +94 -0
  8. data/config/database.yml +20 -13
  9. data/db/migrate/00010_add_soft_delete_to_associations.rb +29 -0
  10. data/db/migrate/00011_add_performance_indexes.rb +21 -0
  11. data/db/migrate/00012_add_tags_trigram_index.rb +18 -0
  12. data/db/migrate/00013_enable_lz4_compression.rb +43 -0
  13. data/db/schema.sql +49 -92
  14. data/docs/api/index.md +1 -1
  15. data/docs/api/yard/HTM.md +2 -4
  16. data/docs/architecture/index.md +1 -1
  17. data/docs/development/index.md +1 -1
  18. data/docs/getting-started/index.md +1 -1
  19. data/docs/guides/index.md +1 -1
  20. data/docs/images/telemetry-architecture.svg +153 -0
  21. data/docs/telemetry.md +391 -0
  22. data/examples/README.md +46 -1
  23. data/examples/cli_app/README.md +1 -1
  24. data/examples/cli_app/htm_cli.rb +1 -1
  25. data/examples/sinatra_app/app.rb +1 -1
  26. data/examples/telemetry/README.md +147 -0
  27. data/examples/telemetry/SETUP_README.md +169 -0
  28. data/examples/telemetry/demo.rb +498 -0
  29. data/examples/telemetry/grafana/dashboards/htm-metrics.json +457 -0
  30. data/lib/htm/configuration.rb +261 -70
  31. data/lib/htm/database.rb +46 -22
  32. data/lib/htm/embedding_service.rb +24 -14
  33. data/lib/htm/errors.rb +15 -1
  34. data/lib/htm/jobs/generate_embedding_job.rb +19 -0
  35. data/lib/htm/jobs/generate_propositions_job.rb +103 -0
  36. data/lib/htm/jobs/generate_tags_job.rb +24 -0
  37. data/lib/htm/loaders/markdown_chunker.rb +79 -0
  38. data/lib/htm/loaders/markdown_loader.rb +41 -15
  39. data/lib/htm/long_term_memory/fulltext_search.rb +138 -0
  40. data/lib/htm/long_term_memory/hybrid_search.rb +324 -0
  41. data/lib/htm/long_term_memory/node_operations.rb +209 -0
  42. data/lib/htm/long_term_memory/relevance_scorer.rb +355 -0
  43. data/lib/htm/long_term_memory/robot_operations.rb +34 -0
  44. data/lib/htm/long_term_memory/tag_operations.rb +428 -0
  45. data/lib/htm/long_term_memory/vector_search.rb +109 -0
  46. data/lib/htm/long_term_memory.rb +51 -1153
  47. data/lib/htm/models/node.rb +35 -2
  48. data/lib/htm/models/node_tag.rb +31 -0
  49. data/lib/htm/models/robot_node.rb +31 -0
  50. data/lib/htm/models/tag.rb +44 -0
  51. data/lib/htm/proposition_service.rb +169 -0
  52. data/lib/htm/query_cache.rb +214 -0
  53. data/lib/htm/sql_builder.rb +178 -0
  54. data/lib/htm/tag_service.rb +16 -6
  55. data/lib/htm/tasks.rb +8 -2
  56. data/lib/htm/telemetry.rb +224 -0
  57. data/lib/htm/version.rb +1 -1
  58. data/lib/htm.rb +64 -3
  59. data/lib/tasks/doc.rake +1 -1
  60. data/lib/tasks/htm.rake +259 -13
  61. data/mkdocs.yml +96 -96
  62. metadata +42 -16
  63. data/.aigcm_msg +0 -1
  64. data/.claude/settings.local.json +0 -95
  65. data/CLAUDE.md +0 -603
  66. data/examples/cli_app/temp.log +0 -93
  67. data/lib/htm/loaders/paragraph_chunker.rb +0 -112
  68. data/notes/ARCHITECTURE_REVIEW.md +0 -1167
  69. data/notes/IMPLEMENTATION_SUMMARY.md +0 -606
  70. data/notes/MULTI_FRAMEWORK_IMPLEMENTATION.md +0 -451
  71. data/notes/next_steps.md +0 -100
  72. data/notes/plan.md +0 -627
  73. data/notes/tag_ontology_enhancement_ideas.md +0 -222
  74. data/notes/timescaledb_removal_summary.md +0 -200
@@ -0,0 +1,178 @@
1
+ # frozen_string_literal: true
2
+
3
+ class HTM
4
+ # SQL building utilities for constructing safe, parameterized queries
5
+ #
6
+ # Provides class methods for building SQL conditions for:
7
+ # - Timeframe filtering (single range or multiple ranges)
8
+ # - Metadata filtering (JSONB containment)
9
+ # - Embedding sanitization and padding (SQL injection prevention)
10
+ # - LIKE pattern sanitization (wildcard injection prevention)
11
+ #
12
+ # All methods use proper escaping and parameterization to prevent SQL injection.
13
+ #
14
+ # @example Build a timeframe condition
15
+ # HTM::SqlBuilder.timeframe_condition(1.week.ago..Time.now)
16
+ # # => "(created_at BETWEEN '2024-01-01' AND '2024-01-08')"
17
+ #
18
+ # @example Build a metadata condition
19
+ # HTM::SqlBuilder.metadata_condition({ priority: "high" })
20
+ # # => "(metadata @> '{\"priority\":\"high\"}'::jsonb)"
21
+ #
22
+ # @example Sanitize an embedding
23
+ # HTM::SqlBuilder.sanitize_embedding([0.1, 0.2, 0.3])
24
+ # # => "[0.1,0.2,0.3]"
25
+ #
26
+ # @example Sanitize a LIKE pattern
27
+ # HTM::SqlBuilder.sanitize_like_pattern("test%pattern")
28
+ # # => "test\\%pattern"
29
+ #
30
+ class SqlBuilder
31
+ # Maximum embedding dimension supported by pgvector with HNSW index
32
+ MAX_EMBEDDING_DIMENSION = 2000
33
+
34
+ class << self
35
+ # Sanitize embedding for SQL use
36
+ #
37
+ # Validates that all values are numeric and converts to safe PostgreSQL vector format.
38
+ # This prevents SQL injection by ensuring only valid numeric values are included.
39
+ #
40
+ # @param embedding [Array<Numeric>] Embedding vector
41
+ # @return [String] Sanitized vector string for PostgreSQL (e.g., "[0.1,0.2,0.3]")
42
+ # @raise [ArgumentError] If embedding contains non-numeric values
43
+ #
44
+ def sanitize_embedding(embedding)
45
+ unless embedding.is_a?(Array)
46
+ raise ArgumentError, "Embedding must be an Array, got #{embedding.class}"
47
+ end
48
+
49
+ if embedding.empty?
50
+ raise ArgumentError, "Embedding cannot be empty"
51
+ end
52
+
53
+ # Find invalid values for detailed error message
54
+ invalid_indices = []
55
+ embedding.each_with_index do |v, i|
56
+ unless v.is_a?(Numeric) && v.respond_to?(:finite?) && v.finite?
57
+ invalid_indices << i
58
+ end
59
+ end
60
+
61
+ unless invalid_indices.empty?
62
+ sample = invalid_indices.first(5).map { |i| "index #{i}: #{embedding[i].inspect}" }.join(", ")
63
+ raise ArgumentError, "Embedding contains invalid values at #{sample}"
64
+ end
65
+
66
+ "[#{embedding.map { |v| v.to_f }.join(',')}]"
67
+ end
68
+
69
+ # Pad embedding to target dimension
70
+ #
71
+ # Pads embedding with zeros to reach the target dimension for pgvector compatibility.
72
+ #
73
+ # @param embedding [Array<Numeric>] Embedding vector
74
+ # @param target_dimension [Integer] Target dimension (default: MAX_EMBEDDING_DIMENSION)
75
+ # @return [Array<Numeric>] Padded embedding
76
+ #
77
+ def pad_embedding(embedding, target_dimension: MAX_EMBEDDING_DIMENSION)
78
+ return embedding if embedding.length >= target_dimension
79
+
80
+ embedding + Array.new(target_dimension - embedding.length, 0.0)
81
+ end
82
+
83
+ # Sanitize a string for use in SQL LIKE patterns
84
+ #
85
+ # Escapes SQL LIKE wildcards (% and _) to prevent pattern injection.
86
+ #
87
+ # @param pattern [String] Pattern to sanitize
88
+ # @return [String] Sanitized pattern safe for LIKE queries
89
+ #
90
+ def sanitize_like_pattern(pattern)
91
+ return "" if pattern.nil?
92
+
93
+ pattern.to_s.gsub(/[%_\\]/) { |match| "\\#{match}" }
94
+ end
95
+
96
+ # Build SQL condition for timeframe filtering
97
+ #
98
+ # @param timeframe [nil, Range, Array<Range>] Time range(s)
99
+ # @param table_alias [String, nil] Table alias (default: none)
100
+ # @param column [String] Column name (default: "created_at")
101
+ # @return [String, nil] SQL condition or nil for no filter
102
+ #
103
+ def timeframe_condition(timeframe, table_alias: nil, column: "created_at")
104
+ return nil if timeframe.nil?
105
+
106
+ prefix = table_alias ? "#{table_alias}." : ""
107
+ full_column = "#{prefix}#{column}"
108
+ conn = ActiveRecord::Base.connection
109
+
110
+ case timeframe
111
+ when Range
112
+ begin_quoted = conn.quote(timeframe.begin.iso8601)
113
+ end_quoted = conn.quote(timeframe.end.iso8601)
114
+ "(#{full_column} BETWEEN #{begin_quoted} AND #{end_quoted})"
115
+ when Array
116
+ conditions = timeframe.map do |range|
117
+ begin_quoted = conn.quote(range.begin.iso8601)
118
+ end_quoted = conn.quote(range.end.iso8601)
119
+ "(#{full_column} BETWEEN #{begin_quoted} AND #{end_quoted})"
120
+ end
121
+ "(#{conditions.join(' OR ')})"
122
+ end
123
+ end
124
+
125
+ # Apply timeframe filter to ActiveRecord scope
126
+ #
127
+ # @param scope [ActiveRecord::Relation] Base scope
128
+ # @param timeframe [nil, Range, Array<Range>] Time range(s)
129
+ # @param column [Symbol] Column name (default: :created_at)
130
+ # @return [ActiveRecord::Relation] Scoped query
131
+ #
132
+ def apply_timeframe(scope, timeframe, column: :created_at)
133
+ return scope if timeframe.nil?
134
+
135
+ case timeframe
136
+ when Range
137
+ scope.where(column => timeframe)
138
+ when Array
139
+ conditions = timeframe.map { |range| scope.where(column => range) }
140
+ conditions.reduce { |result, condition| result.or(condition) }
141
+ else
142
+ scope
143
+ end
144
+ end
145
+
146
+ # Build SQL condition for metadata filtering (JSONB containment)
147
+ #
148
+ # @param metadata [Hash] Metadata to filter by
149
+ # @param table_alias [String, nil] Table alias (default: none)
150
+ # @param column [String] Column name (default: "metadata")
151
+ # @return [String, nil] SQL condition or nil for no filter
152
+ #
153
+ def metadata_condition(metadata, table_alias: nil, column: "metadata")
154
+ return nil if metadata.nil? || metadata.empty?
155
+
156
+ prefix = table_alias ? "#{table_alias}." : ""
157
+ full_column = "#{prefix}#{column}"
158
+ conn = ActiveRecord::Base.connection
159
+
160
+ quoted_metadata = conn.quote(metadata.to_json)
161
+ "(#{full_column} @> #{quoted_metadata}::jsonb)"
162
+ end
163
+
164
+ # Apply metadata filter to ActiveRecord scope
165
+ #
166
+ # @param scope [ActiveRecord::Relation] Base scope
167
+ # @param metadata [Hash] Metadata to filter by
168
+ # @param column [String] Column name (default: "metadata")
169
+ # @return [ActiveRecord::Relation] Scoped query
170
+ #
171
+ def apply_metadata(scope, metadata, column: "metadata")
172
+ return scope if metadata.nil? || metadata.empty?
173
+
174
+ scope.where("#{column} @> ?::jsonb", metadata.to_json)
175
+ end
176
+ end
177
+ end
178
+ end
@@ -15,7 +15,6 @@ class HTM
15
15
  # The actual LLM call is delegated to HTM.configuration.tag_extractor
16
16
  #
17
17
  class TagService
18
- MAX_DEPTH = 4 # Maximum hierarchy depth (3 colons)
19
18
  TAG_FORMAT = /^[a-z0-9\-]+(:[a-z0-9\-]+)*$/ # Validation regex
20
19
 
21
20
  # Circuit breaker for tag extraction API calls
@@ -23,16 +22,26 @@ class HTM
23
22
  @circuit_breaker_mutex = Mutex.new
24
23
 
25
24
  class << self
25
+ # Maximum tag hierarchy depth (configurable, default 4)
26
+ #
27
+ # @return [Integer] Max depth (3 colons max by default)
28
+ #
29
+ def max_depth
30
+ HTM.configuration.max_tag_depth
31
+ end
32
+
26
33
  # Get or create the circuit breaker for tag service
27
34
  #
28
35
  # @return [HTM::CircuitBreaker] The circuit breaker instance
29
36
  #
30
37
  def circuit_breaker
38
+ config = HTM.configuration
31
39
  @circuit_breaker_mutex.synchronize do
32
40
  @circuit_breaker ||= HTM::CircuitBreaker.new(
33
41
  name: 'tag_service',
34
- failure_threshold: 5,
35
- reset_timeout: 60
42
+ failure_threshold: config.circuit_breaker_failure_threshold,
43
+ reset_timeout: config.circuit_breaker_reset_timeout,
44
+ half_open_max_calls: config.circuit_breaker_half_open_max_calls
36
45
  )
37
46
  end
38
47
  end
@@ -119,8 +128,9 @@ class HTM
119
128
 
120
129
  # Check depth
121
130
  depth = tag.count(':')
122
- if depth >= MAX_DEPTH
123
- HTM.logger.warn "TagService: Tag depth #{depth + 1} exceeds max #{MAX_DEPTH}, skipping: #{tag}"
131
+ max_tag_depth = max_depth
132
+ if depth >= max_tag_depth
133
+ HTM.logger.warn "TagService: Tag depth #{depth + 1} exceeds max #{max_tag_depth}, skipping: #{tag}"
124
134
  next
125
135
  end
126
136
 
@@ -155,7 +165,7 @@ class HTM
155
165
  return false unless tag.is_a?(String)
156
166
  return false if tag.empty?
157
167
  return false unless tag.match?(TAG_FORMAT)
158
- return false if tag.count(':') >= MAX_DEPTH
168
+ return false if tag.count(':') >= max_depth
159
169
 
160
170
  # Ontological validation
161
171
  levels = tag.split(':')
data/lib/htm/tasks.rb CHANGED
@@ -8,17 +8,23 @@
8
8
  #
9
9
  # This will make the following tasks available:
10
10
  #
11
- # Database tasks:
11
+ # Database tasks (all respect RAILS_ENV, default: development):
12
+ # rake htm:db:create # Create database if it doesn't exist
12
13
  # rake htm:db:setup # Set up HTM database schema and run migrations
13
14
  # rake htm:db:migrate # Run pending database migrations
14
15
  # rake htm:db:status # Show migration status
15
16
  # rake htm:db:info # Show database info
16
- # rake htm:db:test # Test database connection
17
+ # rake htm:db:verify # Verify database connection
17
18
  # rake htm:db:console # Open PostgreSQL console
18
19
  # rake htm:db:seed # Seed database with sample data
19
20
  # rake htm:db:drop # Drop all HTM tables (destructive!)
20
21
  # rake htm:db:reset # Drop and recreate database (destructive!)
21
22
  #
23
+ # Examples:
24
+ # RAILS_ENV=test rake htm:db:create # Create htm_test database
25
+ # RAILS_ENV=test rake htm:db:setup # Setup test database with migrations
26
+ # RAILS_ENV=test rake htm:db:drop # Drop test database
27
+ #
22
28
  # Async job tasks:
23
29
  # rake htm:jobs:stats # Show async job statistics
24
30
  # rake htm:jobs:process_embeddings # Process pending embedding jobs
@@ -0,0 +1,224 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'singleton'
4
+
5
+ class HTM
6
+ # OpenTelemetry-based observability for HTM
7
+ #
8
+ # Provides opt-in metrics collection with zero overhead when disabled.
9
+ # Uses the null object pattern - when telemetry is disabled or the SDK
10
+ # is not available, all metric operations are no-ops.
11
+ #
12
+ # @example Enable telemetry
13
+ # HTM.configure do |config|
14
+ # config.telemetry_enabled = true
15
+ # end
16
+ #
17
+ # @example Set destination via environment
18
+ # # Export to OTLP endpoint
19
+ # ENV['OTEL_METRICS_EXPORTER'] = 'otlp'
20
+ # ENV['OTEL_EXPORTER_OTLP_ENDPOINT'] = 'http://localhost:4318'
21
+ #
22
+ # @see notes/ot.md for full implementation details
23
+ #
24
+ module Telemetry
25
+ # Null meter that creates null instruments
26
+ # Used when telemetry is disabled or SDK unavailable
27
+ class NullMeter
28
+ include Singleton
29
+
30
+ def create_counter(*)
31
+ NullInstrument.instance
32
+ end
33
+
34
+ def create_histogram(*)
35
+ NullInstrument.instance
36
+ end
37
+
38
+ def create_up_down_counter(*)
39
+ NullInstrument.instance
40
+ end
41
+ end
42
+
43
+ # Null instrument that accepts but ignores all metric operations
44
+ class NullInstrument
45
+ include Singleton
46
+
47
+ def add(*) = nil
48
+ def record(*) = nil
49
+ end
50
+
51
+ class << self
52
+ # Check if telemetry is enabled and SDK is available
53
+ #
54
+ # @return [Boolean] true if telemetry should be active
55
+ #
56
+ def enabled?
57
+ HTM.configuration.telemetry_enabled && sdk_available?
58
+ end
59
+
60
+ # Check if OpenTelemetry SDK is installed
61
+ #
62
+ # @return [Boolean] true if SDK can be loaded
63
+ #
64
+ def sdk_available?
65
+ return @sdk_available if defined?(@sdk_available)
66
+
67
+ @sdk_available = begin
68
+ require 'opentelemetry-metrics-sdk'
69
+ true
70
+ rescue LoadError
71
+ false
72
+ end
73
+ end
74
+
75
+ # Initialize OpenTelemetry SDK
76
+ #
77
+ # Called automatically when telemetry is enabled.
78
+ # Safe to call multiple times.
79
+ #
80
+ # @return [void]
81
+ #
82
+ def setup
83
+ return unless enabled?
84
+ return if @setup_complete
85
+
86
+ OpenTelemetry::SDK.configure do |c|
87
+ c.service_name = 'htm'
88
+ end
89
+
90
+ @setup_complete = true
91
+ HTM.logger.info "Telemetry: OpenTelemetry SDK initialized"
92
+ end
93
+
94
+ # Get the meter for creating instruments
95
+ #
96
+ # @return [OpenTelemetry::Metrics::Meter, NullMeter] Real or null meter
97
+ #
98
+ def meter
99
+ return NullMeter.instance unless enabled?
100
+
101
+ setup
102
+ @meter ||= OpenTelemetry.meter_provider.meter('htm')
103
+ end
104
+
105
+ # Reset telemetry state (for testing)
106
+ #
107
+ # @return [void]
108
+ #
109
+ def reset!
110
+ @meter = nil
111
+ @job_counter = nil
112
+ @embedding_latency = nil
113
+ @tag_latency = nil
114
+ @search_latency = nil
115
+ @cache_operations = nil
116
+ @setup_complete = false
117
+ # Don't reset @sdk_available - that's a system property
118
+ end
119
+
120
+ # =========================================
121
+ # Instrument Accessors
122
+ # =========================================
123
+
124
+ # Counter for job execution (enqueued, completed, failed)
125
+ #
126
+ # @return [OpenTelemetry::Metrics::Counter, NullInstrument]
127
+ #
128
+ # @example Record a completed job
129
+ # Telemetry.job_counter.add(1, attributes: { 'job' => 'embedding', 'status' => 'success' })
130
+ #
131
+ def job_counter
132
+ @job_counter ||= meter.create_counter(
133
+ 'htm.jobs',
134
+ unit: 'count',
135
+ description: 'Job execution counts by type and status'
136
+ )
137
+ end
138
+
139
+ # Histogram for embedding generation latency
140
+ #
141
+ # @return [OpenTelemetry::Metrics::Histogram, NullInstrument]
142
+ #
143
+ # @example Record latency
144
+ # Telemetry.embedding_latency.record(145, attributes: { 'provider' => 'ollama', 'status' => 'success' })
145
+ #
146
+ def embedding_latency
147
+ @embedding_latency ||= meter.create_histogram(
148
+ 'htm.embedding.latency',
149
+ unit: 'ms',
150
+ description: 'Embedding generation latency in milliseconds'
151
+ )
152
+ end
153
+
154
+ # Histogram for tag extraction latency
155
+ #
156
+ # @return [OpenTelemetry::Metrics::Histogram, NullInstrument]
157
+ #
158
+ # @example Record latency
159
+ # Telemetry.tag_latency.record(250, attributes: { 'provider' => 'ollama', 'status' => 'success' })
160
+ #
161
+ def tag_latency
162
+ @tag_latency ||= meter.create_histogram(
163
+ 'htm.tag.latency',
164
+ unit: 'ms',
165
+ description: 'Tag extraction latency in milliseconds'
166
+ )
167
+ end
168
+
169
+ # Histogram for search operation latency
170
+ #
171
+ # @return [OpenTelemetry::Metrics::Histogram, NullInstrument]
172
+ #
173
+ # @example Record latency
174
+ # Telemetry.search_latency.record(50, attributes: { 'strategy' => 'vector' })
175
+ #
176
+ def search_latency
177
+ @search_latency ||= meter.create_histogram(
178
+ 'htm.search.latency',
179
+ unit: 'ms',
180
+ description: 'Search operation latency in milliseconds'
181
+ )
182
+ end
183
+
184
+ # Counter for cache operations (hits, misses)
185
+ #
186
+ # @return [OpenTelemetry::Metrics::Counter, NullInstrument]
187
+ #
188
+ # @example Record a cache hit
189
+ # Telemetry.cache_operations.add(1, attributes: { 'operation' => 'hit' })
190
+ #
191
+ def cache_operations
192
+ @cache_operations ||= meter.create_counter(
193
+ 'htm.cache.operations',
194
+ unit: 'count',
195
+ description: 'Cache hit/miss counts'
196
+ )
197
+ end
198
+
199
+ # =========================================
200
+ # Convenience Methods for Timing
201
+ # =========================================
202
+
203
+ # Measure execution time of a block and record to a histogram
204
+ #
205
+ # @param histogram [OpenTelemetry::Metrics::Histogram, NullInstrument] The histogram to record to
206
+ # @param attributes [Hash] Attributes to attach to the measurement
207
+ # @yield The block to measure
208
+ # @return [Object] The result of the block
209
+ #
210
+ # @example Measure embedding generation
211
+ # result = Telemetry.measure(Telemetry.embedding_latency, 'provider' => 'ollama') do
212
+ # generate_embedding(text)
213
+ # end
214
+ #
215
+ def measure(histogram, attributes = {})
216
+ start = Process.clock_gettime(Process::CLOCK_MONOTONIC)
217
+ result = yield
218
+ elapsed_ms = ((Process.clock_gettime(Process::CLOCK_MONOTONIC) - start) * 1000).round
219
+ histogram.record(elapsed_ms, attributes: attributes)
220
+ result
221
+ end
222
+ end
223
+ end
224
+ end
data/lib/htm/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  class HTM
4
- VERSION = "0.0.11"
4
+ VERSION = '0.0.14'
5
5
  end
data/lib/htm.rb CHANGED
@@ -10,14 +10,17 @@ require_relative "htm/long_term_memory"
10
10
  require_relative "htm/working_memory"
11
11
  require_relative "htm/embedding_service"
12
12
  require_relative "htm/tag_service"
13
+ require_relative "htm/proposition_service"
13
14
  require_relative "htm/timeframe_extractor"
14
15
  require_relative "htm/timeframe"
15
16
  require_relative "htm/job_adapter"
16
17
  require_relative "htm/jobs/generate_embedding_job"
17
18
  require_relative "htm/jobs/generate_tags_job"
18
- require_relative "htm/loaders/paragraph_chunker"
19
+ require_relative "htm/jobs/generate_propositions_job"
20
+ require_relative "htm/loaders/markdown_chunker"
19
21
  require_relative "htm/loaders/markdown_loader"
20
22
  require_relative "htm/observability"
23
+ require_relative "htm/telemetry"
21
24
 
22
25
  require "pg"
23
26
  require "securerandom"
@@ -26,7 +29,7 @@ require "uri"
26
29
  # Load Rails integration if Rails is defined
27
30
  require_relative "htm/railtie" if defined?(Rails::Railtie)
28
31
 
29
- # HTM (Hierarchical Temporary Memory) - Intelligent memory management for LLM robots
32
+ # HTM (Hierarchical Temporal Memory) - Intelligent memory management for LLM robots
30
33
  #
31
34
  # HTM implements a two-tier memory system:
32
35
  # - Working Memory: Token-limited, active context for immediate LLM use
@@ -163,6 +166,11 @@ class HTM
163
166
  # Only for NEW nodes - existing nodes already have embeddings/tags
164
167
  enqueue_embedding_job(node_id)
165
168
  enqueue_tags_job(node_id, manual_tags: tags)
169
+
170
+ # Enqueue proposition extraction if enabled and not already a proposition
171
+ if HTM.configuration.extract_propositions && !metadata[:is_proposition]
172
+ enqueue_propositions_job(node_id)
173
+ end
166
174
  else
167
175
  HTM.logger.info "Node #{node_id} already exists, linked to robot #{@robot_name} (remember_count: #{result[:robot_node].remember_count})"
168
176
 
@@ -228,7 +236,7 @@ class HTM
228
236
  # memories = htm.recall("preferences", metadata: { source: "user" })
229
237
  # memories = htm.recall("decisions", metadata: { confidence: 0.9, type: "architectural" })
230
238
  #
231
- def recall(topic, timeframe: nil, limit: 20, strategy: :vector, with_relevance: false, query_tags: [], raw: false, metadata: {})
239
+ def recall(topic, timeframe: nil, limit: 20, strategy: :fulltext, with_relevance: false, query_tags: [], raw: false, metadata: {})
232
240
  # Validate inputs
233
241
  validate_timeframe!(timeframe)
234
242
  validate_positive_integer!(limit, "limit")
@@ -350,6 +358,51 @@ class HTM
350
358
  true
351
359
  end
352
360
 
361
+ # Forget all nodes whose content includes the given string
362
+ #
363
+ # Performs a soft delete on all matching nodes. The nodes remain in the
364
+ # database but are excluded from queries. Use case-insensitive LIKE matching.
365
+ #
366
+ # @param content_substring [String] Substring to search for in node content
367
+ # @param soft [Boolean] If true (default), soft delete; if false, permanent delete
368
+ # @param confirm [Symbol] Must be :confirmed to proceed with permanent deletion
369
+ # @return [Array<Integer>] Array of node IDs that were deleted
370
+ # @raise [ArgumentError] if content_substring is blank
371
+ # @raise [ArgumentError] if permanent deletion requested without confirmation
372
+ #
373
+ # @example Soft delete all nodes containing "deprecated"
374
+ # htm.forget_content("deprecated")
375
+ # # => [42, 56, 78] # IDs of deleted nodes
376
+ #
377
+ # @example Permanent delete all nodes containing "test data"
378
+ # htm.forget_content("test data", soft: false, confirm: :confirmed)
379
+ #
380
+ def forget_content(content_substring, soft: true, confirm: false)
381
+ raise ArgumentError, "Content substring cannot be blank" if content_substring.to_s.strip.empty?
382
+
383
+ # Permanent delete requires confirmation
384
+ if !soft && confirm != :confirmed
385
+ raise ArgumentError, "Permanent deletion requires confirm: :confirmed"
386
+ end
387
+
388
+ # Find all nodes containing the substring (case-insensitive)
389
+ matching_nodes = HTM::Models::Node.where("content ILIKE ?", "%#{content_substring}%")
390
+ node_ids = matching_nodes.pluck(:id)
391
+
392
+ if node_ids.empty?
393
+ HTM.logger.info "No nodes found containing: #{content_substring}"
394
+ return []
395
+ end
396
+
397
+ # Delete each matching node
398
+ node_ids.each do |node_id|
399
+ forget(node_id, soft: soft, confirm: confirm)
400
+ end
401
+
402
+ HTM.logger.info "Forgot #{node_ids.length} nodes containing: #{content_substring}"
403
+ node_ids
404
+ end
405
+
353
406
  # Restore a soft-deleted memory node
354
407
  #
355
408
  # @param node_id [Integer] ID of the soft-deleted node to restore
@@ -550,6 +603,14 @@ class HTM
550
603
  HTM.logger.error "Failed to enqueue tags job for node #{node_id}: #{e.message}"
551
604
  end
552
605
 
606
+ def enqueue_propositions_job(node_id)
607
+ # Enqueue proposition extraction using configured job backend
608
+ # Job will use HTM.extract_propositions which delegates to configured proposition_extractor
609
+ HTM::JobAdapter.enqueue(HTM::Jobs::GeneratePropositionsJob, node_id: node_id, robot_id: @robot_id)
610
+ rescue StandardError => e
611
+ HTM.logger.error "Failed to enqueue propositions job for node #{node_id}: #{e.message}"
612
+ end
613
+
553
614
  def add_to_working_memory(node)
554
615
  # Convert token_count to integer (may be String from database/cache)
555
616
  token_count = node['token_count'].to_i
data/lib/tasks/doc.rake CHANGED
@@ -22,7 +22,7 @@ namespace :htm do
22
22
  "--plugin markdown",
23
23
  "--output-dir #{output_dir}",
24
24
  "--format markdown",
25
- "--title 'HTM - Hierarchical Temporary Memory API'",
25
+ "--title 'HTM - Hierarchical Temporal Memory API'",
26
26
  "--markup markdown",
27
27
  "--charset utf-8",
28
28
  "--protected",