htm 0.0.10 → 0.0.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.dictate.toml +46 -0
- data/.envrc +2 -0
- data/CHANGELOG.md +86 -3
- data/README.md +86 -7
- data/Rakefile +14 -2
- data/bin/htm_mcp.rb +621 -0
- data/config/database.yml +20 -13
- data/db/migrate/00010_add_soft_delete_to_associations.rb +29 -0
- data/db/migrate/00011_add_performance_indexes.rb +21 -0
- data/db/migrate/00012_add_tags_trigram_index.rb +18 -0
- data/db/migrate/00013_enable_lz4_compression.rb +43 -0
- data/db/schema.sql +49 -92
- data/docs/api/index.md +1 -1
- data/docs/api/yard/HTM.md +2 -4
- data/docs/architecture/index.md +1 -1
- data/docs/development/index.md +1 -1
- data/docs/getting-started/index.md +1 -1
- data/docs/guides/index.md +1 -1
- data/docs/images/telemetry-architecture.svg +153 -0
- data/docs/telemetry.md +391 -0
- data/examples/README.md +171 -1
- data/examples/cli_app/README.md +1 -1
- data/examples/cli_app/htm_cli.rb +1 -1
- data/examples/mcp_client.rb +529 -0
- data/examples/sinatra_app/app.rb +1 -1
- data/examples/telemetry/README.md +147 -0
- data/examples/telemetry/SETUP_README.md +169 -0
- data/examples/telemetry/demo.rb +498 -0
- data/examples/telemetry/grafana/dashboards/htm-metrics.json +457 -0
- data/lib/htm/configuration.rb +261 -70
- data/lib/htm/database.rb +46 -22
- data/lib/htm/embedding_service.rb +24 -14
- data/lib/htm/errors.rb +15 -1
- data/lib/htm/jobs/generate_embedding_job.rb +19 -0
- data/lib/htm/jobs/generate_propositions_job.rb +103 -0
- data/lib/htm/jobs/generate_tags_job.rb +24 -0
- data/lib/htm/loaders/markdown_chunker.rb +79 -0
- data/lib/htm/loaders/markdown_loader.rb +41 -15
- data/lib/htm/long_term_memory/fulltext_search.rb +138 -0
- data/lib/htm/long_term_memory/hybrid_search.rb +324 -0
- data/lib/htm/long_term_memory/node_operations.rb +209 -0
- data/lib/htm/long_term_memory/relevance_scorer.rb +355 -0
- data/lib/htm/long_term_memory/robot_operations.rb +34 -0
- data/lib/htm/long_term_memory/tag_operations.rb +428 -0
- data/lib/htm/long_term_memory/vector_search.rb +109 -0
- data/lib/htm/long_term_memory.rb +51 -1153
- data/lib/htm/models/node.rb +35 -2
- data/lib/htm/models/node_tag.rb +31 -0
- data/lib/htm/models/robot_node.rb +31 -0
- data/lib/htm/models/tag.rb +44 -0
- data/lib/htm/proposition_service.rb +169 -0
- data/lib/htm/query_cache.rb +214 -0
- data/lib/htm/sql_builder.rb +178 -0
- data/lib/htm/tag_service.rb +16 -6
- data/lib/htm/tasks.rb +8 -2
- data/lib/htm/telemetry.rb +224 -0
- data/lib/htm/version.rb +1 -1
- data/lib/htm.rb +64 -3
- data/lib/tasks/doc.rake +1 -1
- data/lib/tasks/htm.rake +259 -13
- data/mkdocs.yml +96 -96
- metadata +75 -18
- data/.aigcm_msg +0 -1
- data/.claude/settings.local.json +0 -92
- data/CLAUDE.md +0 -603
- data/examples/cli_app/temp.log +0 -93
- data/lib/htm/loaders/paragraph_chunker.rb +0 -112
- data/notes/ARCHITECTURE_REVIEW.md +0 -1167
- data/notes/IMPLEMENTATION_SUMMARY.md +0 -606
- data/notes/MULTI_FRAMEWORK_IMPLEMENTATION.md +0 -451
- data/notes/next_steps.md +0 -100
- data/notes/plan.md +0 -627
- data/notes/tag_ontology_enhancement_ideas.md +0 -222
- data/notes/timescaledb_removal_summary.md +0 -200
data/lib/htm/models/node.rb
CHANGED
|
@@ -55,6 +55,10 @@ class HTM
|
|
|
55
55
|
scope :with_embeddings, -> { where.not(embedding: nil) }
|
|
56
56
|
scope :from_source, ->(source_id) { where(source_id: source_id).order(:chunk_position) }
|
|
57
57
|
|
|
58
|
+
# Proposition scopes
|
|
59
|
+
scope :propositions, -> { where("metadata->>'is_proposition' = 'true'") }
|
|
60
|
+
scope :non_propositions, -> { where("metadata IS NULL OR metadata->>'is_proposition' IS NULL OR metadata->>'is_proposition' != 'true'") }
|
|
61
|
+
|
|
58
62
|
# Soft delete scopes
|
|
59
63
|
scope :deleted, -> { unscoped.where.not(deleted_at: nil) }
|
|
60
64
|
scope :with_deleted, -> { unscoped }
|
|
@@ -170,19 +174,40 @@ class HTM
|
|
|
170
174
|
end
|
|
171
175
|
|
|
172
176
|
# Soft delete - mark node as deleted without removing from database
|
|
177
|
+
# Also cascades soft delete to associated robot_nodes and node_tags
|
|
173
178
|
#
|
|
174
179
|
# @return [Boolean] true if soft deleted successfully
|
|
175
180
|
#
|
|
176
181
|
def soft_delete!
|
|
177
|
-
|
|
182
|
+
transaction do
|
|
183
|
+
now = Time.current
|
|
184
|
+
update!(deleted_at: now)
|
|
185
|
+
|
|
186
|
+
# Cascade soft delete to associated robot_nodes
|
|
187
|
+
robot_nodes.update_all(deleted_at: now)
|
|
188
|
+
|
|
189
|
+
# Cascade soft delete to associated node_tags
|
|
190
|
+
node_tags.update_all(deleted_at: now)
|
|
191
|
+
end
|
|
192
|
+
true
|
|
178
193
|
end
|
|
179
194
|
|
|
180
195
|
# Restore a soft-deleted node
|
|
196
|
+
# Also cascades restoration to associated robot_nodes and node_tags
|
|
181
197
|
#
|
|
182
198
|
# @return [Boolean] true if restored successfully
|
|
183
199
|
#
|
|
184
200
|
def restore!
|
|
185
|
-
|
|
201
|
+
transaction do
|
|
202
|
+
update!(deleted_at: nil)
|
|
203
|
+
|
|
204
|
+
# Cascade restoration to associated robot_nodes
|
|
205
|
+
HTM::Models::RobotNode.unscoped.where(node_id: id).update_all(deleted_at: nil)
|
|
206
|
+
|
|
207
|
+
# Cascade restoration to associated node_tags
|
|
208
|
+
HTM::Models::NodeTag.unscoped.where(node_id: id).update_all(deleted_at: nil)
|
|
209
|
+
end
|
|
210
|
+
true
|
|
186
211
|
end
|
|
187
212
|
|
|
188
213
|
# Check if node is soft-deleted
|
|
@@ -193,6 +218,14 @@ class HTM
|
|
|
193
218
|
deleted_at.present?
|
|
194
219
|
end
|
|
195
220
|
|
|
221
|
+
# Check if node is a proposition (extracted atomic fact)
|
|
222
|
+
#
|
|
223
|
+
# @return [Boolean] true if metadata['is_proposition'] is true
|
|
224
|
+
#
|
|
225
|
+
def proposition?
|
|
226
|
+
metadata&.dig('is_proposition') == true
|
|
227
|
+
end
|
|
228
|
+
|
|
196
229
|
private
|
|
197
230
|
|
|
198
231
|
def set_content_hash
|
data/lib/htm/models/node_tag.rb
CHANGED
|
@@ -19,10 +19,41 @@ class HTM
|
|
|
19
19
|
before_create :set_created_at
|
|
20
20
|
|
|
21
21
|
# Scopes
|
|
22
|
+
# Soft delete - by default, only show non-deleted entries
|
|
23
|
+
default_scope { where(deleted_at: nil) }
|
|
24
|
+
|
|
22
25
|
scope :for_node, ->(node_id) { where(node_id: node_id) }
|
|
23
26
|
scope :for_tag, ->(tag_id) { where(tag_id: tag_id) }
|
|
24
27
|
scope :recent, -> { order(created_at: :desc) }
|
|
25
28
|
|
|
29
|
+
# Soft delete scopes
|
|
30
|
+
scope :deleted, -> { unscoped.where.not(deleted_at: nil) }
|
|
31
|
+
scope :with_deleted, -> { unscoped }
|
|
32
|
+
|
|
33
|
+
# Soft delete - mark as deleted without removing from database
|
|
34
|
+
#
|
|
35
|
+
# @return [Boolean] true if soft deleted successfully
|
|
36
|
+
#
|
|
37
|
+
def soft_delete!
|
|
38
|
+
update!(deleted_at: Time.current)
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
# Restore a soft-deleted entry
|
|
42
|
+
#
|
|
43
|
+
# @return [Boolean] true if restored successfully
|
|
44
|
+
#
|
|
45
|
+
def restore!
|
|
46
|
+
update!(deleted_at: nil)
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
# Check if entry is soft-deleted
|
|
50
|
+
#
|
|
51
|
+
# @return [Boolean] true if deleted_at is set
|
|
52
|
+
#
|
|
53
|
+
def deleted?
|
|
54
|
+
deleted_at.present?
|
|
55
|
+
end
|
|
56
|
+
|
|
26
57
|
private
|
|
27
58
|
|
|
28
59
|
def set_created_at
|
|
@@ -30,12 +30,43 @@ class HTM
|
|
|
30
30
|
validates :robot_id, uniqueness: { scope: :node_id, message: 'already linked to this node' }
|
|
31
31
|
|
|
32
32
|
# Scopes
|
|
33
|
+
# Soft delete - by default, only show non-deleted entries
|
|
34
|
+
default_scope { where(deleted_at: nil) }
|
|
35
|
+
|
|
33
36
|
scope :recent, -> { order(last_remembered_at: :desc) }
|
|
34
37
|
scope :by_robot, ->(robot_id) { where(robot_id: robot_id) }
|
|
35
38
|
scope :by_node, ->(node_id) { where(node_id: node_id) }
|
|
36
39
|
scope :frequently_remembered, -> { where('remember_count > 1').order(remember_count: :desc) }
|
|
37
40
|
scope :in_working_memory, -> { where(working_memory: true) }
|
|
38
41
|
|
|
42
|
+
# Soft delete scopes
|
|
43
|
+
scope :deleted, -> { unscoped.where.not(deleted_at: nil) }
|
|
44
|
+
scope :with_deleted, -> { unscoped }
|
|
45
|
+
|
|
46
|
+
# Soft delete - mark as deleted without removing from database
|
|
47
|
+
#
|
|
48
|
+
# @return [Boolean] true if soft deleted successfully
|
|
49
|
+
#
|
|
50
|
+
def soft_delete!
|
|
51
|
+
update!(deleted_at: Time.current)
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
# Restore a soft-deleted entry
|
|
55
|
+
#
|
|
56
|
+
# @return [Boolean] true if restored successfully
|
|
57
|
+
#
|
|
58
|
+
def restore!
|
|
59
|
+
update!(deleted_at: nil)
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
# Check if entry is soft-deleted
|
|
63
|
+
#
|
|
64
|
+
# @return [Boolean] true if deleted_at is set
|
|
65
|
+
#
|
|
66
|
+
def deleted?
|
|
67
|
+
deleted_at.present?
|
|
68
|
+
end
|
|
69
|
+
|
|
39
70
|
# Record that a robot remembered this content again
|
|
40
71
|
#
|
|
41
72
|
# @return [RobotNode] Updated record
|
data/lib/htm/models/tag.rb
CHANGED
|
@@ -23,11 +23,31 @@ class HTM
|
|
|
23
23
|
before_create :set_created_at
|
|
24
24
|
|
|
25
25
|
# Scopes
|
|
26
|
+
# Soft delete - by default, only show non-deleted tags
|
|
27
|
+
default_scope { where(deleted_at: nil) }
|
|
28
|
+
|
|
26
29
|
scope :by_name, ->(name) { where(name: name) }
|
|
27
30
|
scope :with_prefix, ->(prefix) { where("name LIKE ?", "#{prefix}%") }
|
|
28
31
|
scope :hierarchical, -> { where("name LIKE '%:%'") }
|
|
29
32
|
scope :root_level, -> { where("name NOT LIKE '%:%'") }
|
|
30
33
|
|
|
34
|
+
# Soft delete scopes
|
|
35
|
+
scope :deleted, -> { unscoped.where.not(deleted_at: nil) }
|
|
36
|
+
scope :with_deleted, -> { unscoped }
|
|
37
|
+
|
|
38
|
+
# Orphaned tags - tags with no active (non-deleted) node associations
|
|
39
|
+
scope :orphaned, -> {
|
|
40
|
+
where(
|
|
41
|
+
"NOT EXISTS (
|
|
42
|
+
SELECT 1 FROM node_tags
|
|
43
|
+
JOIN nodes ON nodes.id = node_tags.node_id
|
|
44
|
+
WHERE node_tags.tag_id = tags.id
|
|
45
|
+
AND node_tags.deleted_at IS NULL
|
|
46
|
+
AND nodes.deleted_at IS NULL
|
|
47
|
+
)"
|
|
48
|
+
)
|
|
49
|
+
}
|
|
50
|
+
|
|
31
51
|
# Class methods
|
|
32
52
|
|
|
33
53
|
# Find tags with a given prefix (hierarchical query)
|
|
@@ -368,6 +388,30 @@ class HTM
|
|
|
368
388
|
node_tags.count
|
|
369
389
|
end
|
|
370
390
|
|
|
391
|
+
# Soft delete - mark tag as deleted without removing from database
|
|
392
|
+
#
|
|
393
|
+
# @return [Boolean] true if soft deleted successfully
|
|
394
|
+
#
|
|
395
|
+
def soft_delete!
|
|
396
|
+
update!(deleted_at: Time.current)
|
|
397
|
+
end
|
|
398
|
+
|
|
399
|
+
# Restore a soft-deleted tag
|
|
400
|
+
#
|
|
401
|
+
# @return [Boolean] true if restored successfully
|
|
402
|
+
#
|
|
403
|
+
def restore!
|
|
404
|
+
update!(deleted_at: nil)
|
|
405
|
+
end
|
|
406
|
+
|
|
407
|
+
# Check if tag is soft-deleted
|
|
408
|
+
#
|
|
409
|
+
# @return [Boolean] true if deleted_at is set
|
|
410
|
+
#
|
|
411
|
+
def deleted?
|
|
412
|
+
deleted_at.present?
|
|
413
|
+
end
|
|
414
|
+
|
|
371
415
|
private
|
|
372
416
|
|
|
373
417
|
def set_created_at
|
|
@@ -0,0 +1,169 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative 'errors'
|
|
4
|
+
|
|
5
|
+
class HTM
|
|
6
|
+
# Proposition Service - Extracts atomic factual propositions from text
|
|
7
|
+
#
|
|
8
|
+
# This service breaks complex text into simple, self-contained factual
|
|
9
|
+
# statements that can be stored as independent memory nodes. Each proposition:
|
|
10
|
+
# - Expresses a single fact
|
|
11
|
+
# - Is understandable without context
|
|
12
|
+
# - Uses full names, not pronouns
|
|
13
|
+
# - Includes relevant dates/qualifiers
|
|
14
|
+
# - Contains one subject-predicate relationship
|
|
15
|
+
#
|
|
16
|
+
# The actual LLM call is delegated to HTM.configuration.proposition_extractor
|
|
17
|
+
#
|
|
18
|
+
# @example
|
|
19
|
+
# propositions = HTM::PropositionService.extract(
|
|
20
|
+
# "In 1969, Neil Armstrong became the first person to walk on the Moon during Apollo 11."
|
|
21
|
+
# )
|
|
22
|
+
# # => ["Neil Armstrong was an astronaut.",
|
|
23
|
+
# # "Neil Armstrong walked on the Moon in 1969.",
|
|
24
|
+
# # "Neil Armstrong was the first person to walk on the Moon.",
|
|
25
|
+
# # "Neil Armstrong walked on the Moon during the Apollo 11 mission.",
|
|
26
|
+
# # "The Apollo 11 mission occurred in 1969."]
|
|
27
|
+
#
|
|
28
|
+
class PropositionService
|
|
29
|
+
MIN_PROPOSITION_LENGTH = 10 # Minimum characters for a valid proposition
|
|
30
|
+
MAX_PROPOSITION_LENGTH = 1000 # Maximum characters for a valid proposition
|
|
31
|
+
|
|
32
|
+
# Circuit breaker for proposition extraction API calls
|
|
33
|
+
@circuit_breaker = nil
|
|
34
|
+
@circuit_breaker_mutex = Mutex.new
|
|
35
|
+
|
|
36
|
+
class << self
|
|
37
|
+
# Get or create the circuit breaker for proposition service
|
|
38
|
+
#
|
|
39
|
+
# @return [HTM::CircuitBreaker] The circuit breaker instance
|
|
40
|
+
#
|
|
41
|
+
def circuit_breaker
|
|
42
|
+
@circuit_breaker_mutex.synchronize do
|
|
43
|
+
@circuit_breaker ||= HTM::CircuitBreaker.new(
|
|
44
|
+
name: 'proposition_service',
|
|
45
|
+
failure_threshold: 5,
|
|
46
|
+
reset_timeout: 60
|
|
47
|
+
)
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
# Reset the circuit breaker (useful for testing)
|
|
52
|
+
#
|
|
53
|
+
# @return [void]
|
|
54
|
+
#
|
|
55
|
+
def reset_circuit_breaker!
|
|
56
|
+
@circuit_breaker_mutex.synchronize do
|
|
57
|
+
@circuit_breaker&.reset!
|
|
58
|
+
end
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
# Extract propositions from text content
|
|
63
|
+
#
|
|
64
|
+
# @param content [String] Text to analyze
|
|
65
|
+
# @return [Array<String>] Array of atomic propositions
|
|
66
|
+
# @raise [CircuitBreakerOpenError] If circuit breaker is open
|
|
67
|
+
# @raise [PropositionError] If extraction fails
|
|
68
|
+
#
|
|
69
|
+
def self.extract(content)
|
|
70
|
+
HTM.logger.debug "PropositionService: Extracting propositions from #{content.length} chars"
|
|
71
|
+
|
|
72
|
+
# Use circuit breaker to protect against cascading failures
|
|
73
|
+
raw_propositions = circuit_breaker.call do
|
|
74
|
+
HTM.configuration.proposition_extractor.call(content)
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
# Parse response (may be string or array)
|
|
78
|
+
parsed_propositions = parse_propositions(raw_propositions)
|
|
79
|
+
|
|
80
|
+
# Validate and filter propositions
|
|
81
|
+
valid_propositions = validate_and_filter_propositions(parsed_propositions)
|
|
82
|
+
|
|
83
|
+
HTM.logger.debug "PropositionService: Extracted #{valid_propositions.length} valid propositions"
|
|
84
|
+
|
|
85
|
+
valid_propositions
|
|
86
|
+
|
|
87
|
+
rescue HTM::CircuitBreakerOpenError
|
|
88
|
+
# Re-raise circuit breaker errors without wrapping
|
|
89
|
+
raise
|
|
90
|
+
rescue HTM::PropositionError
|
|
91
|
+
raise
|
|
92
|
+
rescue StandardError => e
|
|
93
|
+
HTM.logger.error "PropositionService: Failed to extract propositions: #{e.message}"
|
|
94
|
+
raise HTM::PropositionError, "Proposition extraction failed: #{e.message}"
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
# Parse proposition response (handles string or array input)
|
|
98
|
+
#
|
|
99
|
+
# @param raw_propositions [String, Array] Raw response from extractor
|
|
100
|
+
# @return [Array<String>] Parsed proposition strings
|
|
101
|
+
#
|
|
102
|
+
def self.parse_propositions(raw_propositions)
|
|
103
|
+
case raw_propositions
|
|
104
|
+
when Array
|
|
105
|
+
# Already an array, return as-is
|
|
106
|
+
raw_propositions.map(&:to_s).map(&:strip).reject(&:empty?)
|
|
107
|
+
when String
|
|
108
|
+
# String response - split by newlines, remove list markers
|
|
109
|
+
raw_propositions
|
|
110
|
+
.split("\n")
|
|
111
|
+
.map(&:strip)
|
|
112
|
+
.map { |line| line.sub(/^[-*•]\s*/, '') } # Remove bullet points
|
|
113
|
+
.map { |line| line.sub(/^\d+\.\s*/, '') } # Remove numbered lists
|
|
114
|
+
.map(&:strip)
|
|
115
|
+
.reject(&:empty?)
|
|
116
|
+
else
|
|
117
|
+
raise HTM::PropositionError, "Proposition response must be Array or String, got #{raw_propositions.class}"
|
|
118
|
+
end
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
# Validate and filter propositions
|
|
122
|
+
#
|
|
123
|
+
# @param propositions [Array<String>] Parsed propositions
|
|
124
|
+
# @return [Array<String>] Valid propositions only
|
|
125
|
+
#
|
|
126
|
+
def self.validate_and_filter_propositions(propositions)
|
|
127
|
+
valid_propositions = []
|
|
128
|
+
|
|
129
|
+
propositions.each do |proposition|
|
|
130
|
+
# Check minimum length
|
|
131
|
+
if proposition.length < MIN_PROPOSITION_LENGTH
|
|
132
|
+
HTM.logger.debug "PropositionService: Proposition too short, skipping: #{proposition}"
|
|
133
|
+
next
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
# Check maximum length
|
|
137
|
+
if proposition.length > MAX_PROPOSITION_LENGTH
|
|
138
|
+
HTM.logger.warn "PropositionService: Proposition too long, skipping: #{proposition[0..50]}..."
|
|
139
|
+
next
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
# Check for actual content (not just punctuation/whitespace)
|
|
143
|
+
unless proposition.match?(/[a-zA-Z]{3,}/)
|
|
144
|
+
HTM.logger.debug "PropositionService: Proposition lacks content, skipping: #{proposition}"
|
|
145
|
+
next
|
|
146
|
+
end
|
|
147
|
+
|
|
148
|
+
# Proposition is valid
|
|
149
|
+
valid_propositions << proposition
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
valid_propositions.uniq
|
|
153
|
+
end
|
|
154
|
+
|
|
155
|
+
# Validate single proposition
|
|
156
|
+
#
|
|
157
|
+
# @param proposition [String] Proposition to validate
|
|
158
|
+
# @return [Boolean] True if valid
|
|
159
|
+
#
|
|
160
|
+
def self.valid_proposition?(proposition)
|
|
161
|
+
return false unless proposition.is_a?(String)
|
|
162
|
+
return false if proposition.length < MIN_PROPOSITION_LENGTH
|
|
163
|
+
return false if proposition.length > MAX_PROPOSITION_LENGTH
|
|
164
|
+
return false unless proposition.match?(/[a-zA-Z]{3,}/)
|
|
165
|
+
|
|
166
|
+
true
|
|
167
|
+
end
|
|
168
|
+
end
|
|
169
|
+
end
|
|
@@ -0,0 +1,214 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'lru_redux'
|
|
4
|
+
require 'set'
|
|
5
|
+
|
|
6
|
+
class HTM
|
|
7
|
+
# Thread-safe query result cache with TTL and statistics
|
|
8
|
+
#
|
|
9
|
+
# Provides LRU caching for expensive query results with:
|
|
10
|
+
# - Configurable size and TTL
|
|
11
|
+
# - Thread-safe statistics tracking
|
|
12
|
+
# - Fast cache key generation (using Ruby's built-in hash)
|
|
13
|
+
# - Selective cache invalidation by method type
|
|
14
|
+
#
|
|
15
|
+
# @example Create a cache
|
|
16
|
+
# cache = HTM::QueryCache.new(size: 1000, ttl: 300)
|
|
17
|
+
#
|
|
18
|
+
# @example Use the cache
|
|
19
|
+
# result = cache.fetch(:search, timeframe, query, limit) do
|
|
20
|
+
# expensive_search_operation
|
|
21
|
+
# end
|
|
22
|
+
#
|
|
23
|
+
# @example Check statistics
|
|
24
|
+
# cache.stats
|
|
25
|
+
# # => { hits: 42, misses: 10, hit_rate: 80.77, size: 52 }
|
|
26
|
+
#
|
|
27
|
+
# @example Selective invalidation
|
|
28
|
+
# cache.invalidate_methods!(:search, :hybrid) # Only invalidate search-related entries
|
|
29
|
+
#
|
|
30
|
+
class QueryCache
|
|
31
|
+
attr_reader :enabled
|
|
32
|
+
|
|
33
|
+
# Cache key prefix for method-based invalidation
|
|
34
|
+
METHOD_PREFIX = "m:".freeze
|
|
35
|
+
|
|
36
|
+
# Initialize a new query cache
|
|
37
|
+
#
|
|
38
|
+
# @param size [Integer] Maximum number of entries (default: 1000, use 0 to disable)
|
|
39
|
+
# @param ttl [Integer] Time-to-live in seconds (default: 300)
|
|
40
|
+
#
|
|
41
|
+
def initialize(size: 1000, ttl: 300)
|
|
42
|
+
@enabled = size > 0
|
|
43
|
+
|
|
44
|
+
if @enabled
|
|
45
|
+
@cache = LruRedux::TTL::ThreadSafeCache.new(size, ttl)
|
|
46
|
+
@hits = 0
|
|
47
|
+
@misses = 0
|
|
48
|
+
@mutex = Mutex.new
|
|
49
|
+
# Track keys by method for selective invalidation
|
|
50
|
+
@keys_by_method = Hash.new { |h, k| h[k] = Set.new }
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
# Fetch a value from cache or execute block
|
|
55
|
+
#
|
|
56
|
+
# @param method [Symbol] Method name for cache key
|
|
57
|
+
# @param args [Array] Arguments for cache key
|
|
58
|
+
# @yield Block that computes the value if not cached
|
|
59
|
+
# @return [Object] Cached or computed value
|
|
60
|
+
#
|
|
61
|
+
def fetch(method, *args, &block)
|
|
62
|
+
return yield unless @enabled
|
|
63
|
+
|
|
64
|
+
key = cache_key(method, *args)
|
|
65
|
+
|
|
66
|
+
if (cached = @cache[key])
|
|
67
|
+
@mutex.synchronize { @hits += 1 }
|
|
68
|
+
HTM::Telemetry.cache_operations.add(1, attributes: { 'operation' => 'hit' })
|
|
69
|
+
return cached
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
@mutex.synchronize { @misses += 1 }
|
|
73
|
+
HTM::Telemetry.cache_operations.add(1, attributes: { 'operation' => 'miss' })
|
|
74
|
+
result = yield
|
|
75
|
+
@cache[key] = result
|
|
76
|
+
|
|
77
|
+
# Track key for selective invalidation
|
|
78
|
+
@mutex.synchronize { @keys_by_method[method] << key }
|
|
79
|
+
|
|
80
|
+
result
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
# Clear all cached entries
|
|
84
|
+
#
|
|
85
|
+
# @return [void]
|
|
86
|
+
#
|
|
87
|
+
def clear!
|
|
88
|
+
return unless @enabled
|
|
89
|
+
|
|
90
|
+
@cache.clear
|
|
91
|
+
@mutex.synchronize { @keys_by_method.clear }
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
# Invalidate cache (alias for clear!)
|
|
95
|
+
#
|
|
96
|
+
# @return [void]
|
|
97
|
+
#
|
|
98
|
+
def invalidate!
|
|
99
|
+
clear!
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
# Invalidate cache entries for specific methods only
|
|
103
|
+
#
|
|
104
|
+
# More efficient than full invalidation when only certain
|
|
105
|
+
# types of cached data need to be refreshed.
|
|
106
|
+
#
|
|
107
|
+
# @param methods [Array<Symbol>] Method names to invalidate
|
|
108
|
+
# @return [Integer] Number of entries invalidated
|
|
109
|
+
#
|
|
110
|
+
def invalidate_methods!(*methods)
|
|
111
|
+
return 0 unless @enabled
|
|
112
|
+
|
|
113
|
+
count = 0
|
|
114
|
+
@mutex.synchronize do
|
|
115
|
+
methods.each do |method|
|
|
116
|
+
keys = @keys_by_method.delete(method) || Set.new
|
|
117
|
+
keys.each do |key|
|
|
118
|
+
@cache.delete(key)
|
|
119
|
+
count += 1
|
|
120
|
+
end
|
|
121
|
+
end
|
|
122
|
+
end
|
|
123
|
+
count
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
# Get cache statistics
|
|
127
|
+
#
|
|
128
|
+
# @return [Hash, nil] Statistics hash or nil if disabled
|
|
129
|
+
#
|
|
130
|
+
def stats
|
|
131
|
+
return nil unless @enabled
|
|
132
|
+
|
|
133
|
+
total = @hits + @misses
|
|
134
|
+
hit_rate = total > 0 ? (@hits.to_f / total * 100).round(2) : 0.0
|
|
135
|
+
|
|
136
|
+
{
|
|
137
|
+
hits: @hits,
|
|
138
|
+
misses: @misses,
|
|
139
|
+
hit_rate: hit_rate,
|
|
140
|
+
size: @cache.count
|
|
141
|
+
}
|
|
142
|
+
end
|
|
143
|
+
|
|
144
|
+
# Check if cache is enabled
|
|
145
|
+
#
|
|
146
|
+
# @return [Boolean]
|
|
147
|
+
#
|
|
148
|
+
def enabled?
|
|
149
|
+
@enabled
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
private
|
|
153
|
+
|
|
154
|
+
# Generate a cache key from method and arguments
|
|
155
|
+
#
|
|
156
|
+
# Uses Ruby's built-in hash method which is much faster than SHA-256.
|
|
157
|
+
# The combination of method name and argument hash provides sufficient
|
|
158
|
+
# uniqueness for cache keys while being ~10x faster than cryptographic hashing.
|
|
159
|
+
#
|
|
160
|
+
# @param method [Symbol] Method name
|
|
161
|
+
# @param args [Array] Arguments
|
|
162
|
+
# @return [String] Hash-based key
|
|
163
|
+
#
|
|
164
|
+
def cache_key(method, *args)
|
|
165
|
+
# Build composite hash from all arguments
|
|
166
|
+
args_hash = args.map { |arg| normalize_arg(arg) }.hash
|
|
167
|
+
# Combine method and args into a single key
|
|
168
|
+
"#{method}:#{args_hash}"
|
|
169
|
+
end
|
|
170
|
+
|
|
171
|
+
# Normalize an argument for cache key generation
|
|
172
|
+
#
|
|
173
|
+
# Uses type-safe serialization to prevent cache poisoning via malicious to_s.
|
|
174
|
+
# Only known safe types are serialized; unknown types include class name.
|
|
175
|
+
#
|
|
176
|
+
# @param arg [Object] Argument to normalize
|
|
177
|
+
# @return [String] Normalized string representation
|
|
178
|
+
#
|
|
179
|
+
def normalize_arg(arg)
|
|
180
|
+
case arg
|
|
181
|
+
when nil
|
|
182
|
+
"nil"
|
|
183
|
+
when Integer, Float
|
|
184
|
+
# Safe numeric types
|
|
185
|
+
"#{arg.class}:#{arg}"
|
|
186
|
+
when String
|
|
187
|
+
# Include class to differentiate from symbols
|
|
188
|
+
"String:#{arg}"
|
|
189
|
+
when Symbol
|
|
190
|
+
"Symbol:#{arg}"
|
|
191
|
+
when TrueClass, FalseClass
|
|
192
|
+
"Bool:#{arg}"
|
|
193
|
+
when Time, DateTime
|
|
194
|
+
# Use ISO8601 for consistent time representation
|
|
195
|
+
"Time:#{arg.to_i}"
|
|
196
|
+
when Date
|
|
197
|
+
"Date:#{arg.iso8601}"
|
|
198
|
+
when Range
|
|
199
|
+
# Use normalized form for range endpoints
|
|
200
|
+
"Range:#{normalize_arg(arg.begin)}-#{normalize_arg(arg.end)}"
|
|
201
|
+
when Array
|
|
202
|
+
# Recursively normalize array elements
|
|
203
|
+
"Array:[#{arg.map { |a| normalize_arg(a) }.join(',')}]"
|
|
204
|
+
when Hash
|
|
205
|
+
# Sort keys for deterministic ordering, recursively normalize values
|
|
206
|
+
"Hash:{#{arg.sort_by { |k, _| k.to_s }.map { |k, v| "#{normalize_arg(k)}=>#{normalize_arg(v)}" }.join(',')}}"
|
|
207
|
+
else
|
|
208
|
+
# Unknown types: use class name and object_id to prevent collision
|
|
209
|
+
# Don't rely on to_s which could be maliciously overridden
|
|
210
|
+
"#{arg.class}##{arg.object_id}"
|
|
211
|
+
end
|
|
212
|
+
end
|
|
213
|
+
end
|
|
214
|
+
end
|