ruby_llm-semantic_cache 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,199 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "digest"
4
+
5
+ module RubyLLM
6
+ module SemanticCache
7
+ # Middleware wrapper for RubyLLM::Chat that automatically caches responses
8
+ #
9
+ # @example Basic usage
10
+ # chat = RubyLLM.chat(model: "gpt-5.2")
11
+ # cached_chat = RubyLLM::SemanticCache.wrap(chat)
12
+ # cached_chat.ask("What is 2+2?") # First call - executes LLM
13
+ #
14
+ # @example With custom threshold
15
+ # cached_chat = RubyLLM::SemanticCache.wrap(chat, threshold: 0.95)
16
+ #
17
+ class Middleware
18
+ # Methods to delegate directly to the wrapped chat (no caching)
19
+ DELEGATED_METHODS = %i[
20
+ model messages tools params headers schema
21
+ with_instructions with_tool with_tools with_model
22
+ with_temperature with_context with_params with_headers with_schema
23
+ on_new_message on_end_message on_tool_call on_tool_result
24
+ each reset_messages!
25
+ ].freeze
26
+
27
+ attr_reader :chat
28
+
29
+ # @param chat [RubyLLM::Chat] the chat instance to wrap
30
+ # @param threshold [Float, nil] similarity threshold override
31
+ # @param ttl [Integer, nil] TTL override in seconds
32
+ # @param on_cache_hit [Proc, nil] callback when cache hit occurs, receives (chat, user_message, cached_response)
33
+ # @param max_messages [Integer, :unlimited, false, nil] max conversation messages before skipping cache
34
+ # - Integer: skip cache after N messages (default: 1, only first message cached)
35
+ # - :unlimited or false: cache all messages regardless of conversation length
36
+ # - nil: use config default
37
+ def initialize(chat, threshold: nil, ttl: nil, on_cache_hit: nil, max_messages: nil)
38
+ @chat = chat
39
+ @threshold = threshold
40
+ @ttl = ttl
41
+ @on_cache_hit = on_cache_hit
42
+ @max_messages = max_messages
43
+ end
44
+
45
+ # Ask a question with automatic caching
46
+ # @param message [String] the message to send
47
+ # @param with [Object] attachments to include
48
+ # @return [RubyLLM::Message] the response message
49
+ def ask(message = nil, with: nil, &block)
50
+ # Skip caching if message has attachments
51
+ return @chat.ask(message, with: with, &block) if with
52
+
53
+ # Skip caching for tool-enabled chats (responses may vary)
54
+ return @chat.ask(message, with: with, &block) if @chat.tools.any?
55
+
56
+ # Skip caching if conversation exceeds max_messages (excluding system messages)
57
+ return @chat.ask(message, with: with, &block) if conversation_too_long?
58
+
59
+ # Skip caching for streaming (too complex to handle correctly)
60
+ return @chat.ask(message, with: with, &block) if block_given?
61
+
62
+ # Use cache for non-streaming
63
+ cache_key = build_cache_key(message)
64
+
65
+ cached = cache_lookup(cache_key)
66
+ if cached
67
+ handle_cache_hit(message, cached)
68
+ return cached
69
+ end
70
+
71
+ # Execute the actual LLM call
72
+ response = @chat.ask(message)
73
+
74
+ # Cache the response
75
+ store_in_cache(cache_key, response)
76
+ RubyLLM::SemanticCache.record_miss!
77
+
78
+ response
79
+ end
80
+
81
+ alias say ask
82
+
83
+ # Delegate other methods to the wrapped chat
84
+ DELEGATED_METHODS.each do |method|
85
+ define_method(method) do |*args, **kwargs, &block|
86
+ result = @chat.send(method, *args, **kwargs, &block)
87
+ # If the method returns the chat (for chaining), return self instead
88
+ result.equal?(@chat) ? self : result
89
+ end
90
+ end
91
+
92
+ private
93
+
94
+ def conversation_too_long?
95
+ max = effective_max_messages
96
+ return false if max.nil?
97
+
98
+ # Count non-system messages in the conversation
99
+ conversation_length = @chat.messages.count { |m| m.role != :system }
100
+ conversation_length >= max
101
+ end
102
+
103
+ def effective_max_messages
104
+ # Use instance setting if provided, otherwise config
105
+ max = @max_messages.nil? ? RubyLLM::SemanticCache.config.max_messages : @max_messages
106
+
107
+ # :unlimited or false means no limit
108
+ return nil if max == :unlimited || max == false
109
+
110
+ max
111
+ end
112
+
113
+ def build_cache_key(message)
114
+ # Include model and system instructions in the cache key
115
+ parts = []
116
+
117
+ # Add model ID to ensure different models have separate cache entries
118
+ model_id = @chat.model&.id || @chat.model
119
+ parts << "[MODEL:#{model_id}]" if model_id
120
+
121
+ # Add system instructions
122
+ system_messages = @chat.messages.select { |m| m.role == :system }
123
+ system_context = system_messages.map { |m| extract_text(m.content) }.join("\n")
124
+ parts << "[SYSTEM]\n#{system_context}" unless system_context.empty?
125
+
126
+ # Add current message
127
+ parts << "[USER]\n#{message}"
128
+
129
+ parts.join("\n---\n")
130
+ end
131
+
132
+ def extract_text(content)
133
+ case content
134
+ when String
135
+ content
136
+ when ->(c) { c.respond_to?(:text) }
137
+ content.text
138
+ else
139
+ content.to_s
140
+ end
141
+ end
142
+
143
+ def handle_cache_hit(user_message, cached_response)
144
+ if @on_cache_hit
145
+ # Let the callback handle persistence (for ActiveRecord-backed chats)
146
+ @on_cache_hit.call(@chat, user_message, cached_response)
147
+ else
148
+ # Default: add to in-memory messages array for conversation continuity
149
+ add_message_to_chat(:user, user_message)
150
+ add_message_to_chat(:assistant, cached_response.content, cached_response)
151
+ end
152
+ end
153
+
154
+ def add_message_to_chat(role, content, original_message = nil)
155
+ return unless defined?(RubyLLM::Message)
156
+
157
+ message = if role == :user
158
+ RubyLLM::Message.new(role: :user, content: content)
159
+ elsif original_message.is_a?(RubyLLM::Message)
160
+ original_message
161
+ else
162
+ RubyLLM::Message.new(role: role, content: content)
163
+ end
164
+
165
+ @chat.messages << message if @chat.messages.respond_to?(:<<)
166
+ end
167
+
168
+ def cache_lookup(key)
169
+ embedding = RubyLLM::SemanticCache.embedding_generator.generate(key)
170
+ threshold = @threshold || RubyLLM::SemanticCache.config.similarity_threshold
171
+
172
+ matches = RubyLLM::SemanticCache.vector_store.search(embedding, limit: 1)
173
+
174
+ if matches.any? && matches.first[:similarity] >= threshold
175
+ entry_data = RubyLLM::SemanticCache.cache_store.get(matches.first[:id])
176
+ return nil unless entry_data
177
+
178
+ RubyLLM::SemanticCache.record_hit!
179
+ Serializer.deserialize(entry_data[:response])
180
+ end
181
+ end
182
+
183
+ def store_in_cache(key, response)
184
+ embedding = RubyLLM::SemanticCache.embedding_generator.generate(key)
185
+ ttl = @ttl || RubyLLM::SemanticCache.config.ttl_seconds
186
+
187
+ entry = Entry.new(
188
+ query: key,
189
+ response: Serializer.serialize(response),
190
+ embedding: embedding,
191
+ metadata: { model: @chat.model&.id }
192
+ )
193
+
194
+ RubyLLM::SemanticCache.vector_store.add(entry.id, embedding)
195
+ RubyLLM::SemanticCache.cache_store.set(entry.id, entry.to_h, ttl: ttl)
196
+ end
197
+ end
198
+ end
199
+ end
@@ -0,0 +1,263 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RubyLLM
4
+ module SemanticCache
5
+ # Scoped cache wrapper for multi-tenant scenarios
6
+ # Each scoped instance maintains its own stores for true isolation
7
+ #
8
+ # @example
9
+ # support = RubyLLM::SemanticCache::Scoped.new(namespace: "support")
10
+ # sales = RubyLLM::SemanticCache::Scoped.new(namespace: "sales")
11
+ #
12
+ # support.store(query: "How to reset password?", response: "...")
13
+ # sales.store(query: "What is the price?", response: "...")
14
+ #
15
+ class Scoped
16
+ attr_reader :namespace
17
+
18
+ def initialize(namespace:)
19
+ @namespace = namespace
20
+ @vector_store = nil
21
+ @cache_store = nil
22
+ @hits = 0
23
+ @misses = 0
24
+ end
25
+
26
+ def fetch(query, threshold: nil, ttl: nil, &block)
27
+ raise ArgumentError, "Block required" unless block_given?
28
+
29
+ threshold ||= config.similarity_threshold
30
+ ttl ||= config.ttl_seconds
31
+
32
+ embedding = embedding_generator.generate(query)
33
+ matches = vector_store.search(embedding, limit: 1)
34
+
35
+ if matches.any? && matches.first[:similarity] >= threshold
36
+ @hits += 1
37
+ entry_data = cache_store.get(matches.first[:id])
38
+ return Serializer.deserialize(entry_data[:response]) if entry_data
39
+ end
40
+
41
+ @misses += 1
42
+ response = block.call
43
+
44
+ store(query: query, response: response, embedding: embedding, ttl: ttl)
45
+ response
46
+ end
47
+
48
+ def store(query:, response:, embedding: nil, metadata: {}, ttl: nil)
49
+ embedding ||= embedding_generator.generate(query)
50
+ ttl ||= config.ttl_seconds
51
+
52
+ entry = Entry.new(
53
+ query: query,
54
+ response: Serializer.serialize(response),
55
+ embedding: embedding,
56
+ metadata: metadata
57
+ )
58
+
59
+ vector_store.add(entry.id, embedding)
60
+ cache_store.set(entry.id, entry.to_h, ttl: ttl)
61
+
62
+ entry
63
+ end
64
+
65
+ def search(query, limit: 5)
66
+ embedding = embedding_generator.generate(query)
67
+ matches = vector_store.search(embedding, limit: limit)
68
+
69
+ matches.filter_map do |match|
70
+ entry_data = cache_store.get(match[:id])
71
+ next unless entry_data
72
+
73
+ {
74
+ query: entry_data[:query],
75
+ response: Serializer.deserialize(entry_data[:response]),
76
+ similarity: match[:similarity],
77
+ metadata: entry_data[:metadata]
78
+ }
79
+ end
80
+ end
81
+
82
+ def exists?(query, threshold: nil)
83
+ threshold ||= config.similarity_threshold
84
+ embedding = embedding_generator.generate(query)
85
+ matches = vector_store.search(embedding, limit: 1)
86
+ matches.any? && matches.first[:similarity] >= threshold
87
+ end
88
+
89
+ def delete(query, threshold: nil)
90
+ threshold ||= config.similarity_threshold
91
+ embedding = embedding_generator.generate(query)
92
+ matches = vector_store.search(embedding, limit: 1)
93
+
94
+ return false unless matches.any? && matches.first[:similarity] >= threshold
95
+
96
+ id = matches.first[:id]
97
+ vector_store.delete(id)
98
+ cache_store.delete(id)
99
+ true
100
+ end
101
+
102
+ def invalidate(query, threshold: nil, limit: 100)
103
+ threshold ||= config.similarity_threshold
104
+ embedding = embedding_generator.generate(query)
105
+ matches = vector_store.search(embedding, limit: limit)
106
+
107
+ count = 0
108
+ matches.each do |match|
109
+ next unless match[:similarity] >= threshold
110
+
111
+ vector_store.delete(match[:id])
112
+ cache_store.delete(match[:id])
113
+ count += 1
114
+ end
115
+
116
+ count
117
+ end
118
+
119
+ def clear!
120
+ vector_store.clear!
121
+ cache_store.clear!
122
+ @hits = 0
123
+ @misses = 0
124
+ end
125
+
126
+ def stats
127
+ {
128
+ hits: @hits,
129
+ misses: @misses,
130
+ hit_rate: hit_rate,
131
+ entries: cache_store.size
132
+ }
133
+ end
134
+
135
+ def wrap(chat, threshold: nil, ttl: nil, on_cache_hit: nil, max_messages: nil)
136
+ # For scoped wrap, we create a middleware that uses this scoped instance
137
+ ScopedMiddleware.new(
138
+ self,
139
+ chat,
140
+ threshold: threshold,
141
+ ttl: ttl,
142
+ on_cache_hit: on_cache_hit,
143
+ max_messages: max_messages
144
+ )
145
+ end
146
+
147
+ private
148
+
149
+ def config
150
+ RubyLLM::SemanticCache.config
151
+ end
152
+
153
+ def embedding_generator
154
+ RubyLLM::SemanticCache.embedding_generator
155
+ end
156
+
157
+ def vector_store
158
+ @vector_store ||= build_vector_store
159
+ end
160
+
161
+ def cache_store
162
+ @cache_store ||= build_cache_store
163
+ end
164
+
165
+ def build_vector_store
166
+ case config.vector_store
167
+ when :memory
168
+ VectorStores::Memory.new(config)
169
+ when :redis
170
+ require_relative "vector_stores/redis"
171
+ VectorStores::Redis.new(scoped_config)
172
+ else
173
+ raise Error, "Unknown vector store: #{config.vector_store}"
174
+ end
175
+ end
176
+
177
+ def build_cache_store
178
+ case config.cache_store
179
+ when :memory
180
+ CacheStores::Memory.new(config)
181
+ when :redis
182
+ require_relative "cache_stores/redis"
183
+ CacheStores::Redis.new(scoped_config)
184
+ else
185
+ raise Error, "Unknown cache store: #{config.cache_store}"
186
+ end
187
+ end
188
+
189
+ # Create a config-like object with the scoped namespace
190
+ def scoped_config
191
+ ScopedConfig.new(config, @namespace)
192
+ end
193
+
194
+ # Wrapper that delegates to main config but overrides namespace
195
+ class ScopedConfig
196
+ def initialize(config, namespace)
197
+ @config = config
198
+ @namespace = namespace
199
+ end
200
+
201
+ def namespace
202
+ @namespace
203
+ end
204
+
205
+ def method_missing(method, *args, &block)
206
+ @config.send(method, *args, &block)
207
+ end
208
+
209
+ def respond_to_missing?(method, include_private = false)
210
+ @config.respond_to?(method, include_private)
211
+ end
212
+ end
213
+
214
+ def hit_rate
215
+ total = @hits + @misses
216
+ return 0.0 if total.zero?
217
+
218
+ @hits.to_f / total
219
+ end
220
+ end
221
+
222
+ # Middleware that uses a scoped cache instance
223
+ class ScopedMiddleware < Middleware
224
+ def initialize(scoped, chat, threshold: nil, ttl: nil, on_cache_hit: nil, max_messages: nil)
225
+ super(chat, threshold: threshold, ttl: ttl, on_cache_hit: on_cache_hit, max_messages: max_messages)
226
+ @scoped = scoped
227
+ end
228
+
229
+ private
230
+
231
+ def cache_lookup(key)
232
+ embedding = RubyLLM::SemanticCache.embedding_generator.generate(key)
233
+ threshold = @threshold || RubyLLM::SemanticCache.config.similarity_threshold
234
+
235
+ matches = @scoped.send(:vector_store).search(embedding, limit: 1)
236
+
237
+ if matches.any? && matches.first[:similarity] >= threshold
238
+ entry_data = @scoped.send(:cache_store).get(matches.first[:id])
239
+ return nil unless entry_data
240
+
241
+ @scoped.instance_variable_set(:@hits, @scoped.instance_variable_get(:@hits) + 1)
242
+ Serializer.deserialize(entry_data[:response])
243
+ end
244
+ end
245
+
246
+ def store_in_cache(key, response)
247
+ embedding = RubyLLM::SemanticCache.embedding_generator.generate(key)
248
+ ttl = @ttl || RubyLLM::SemanticCache.config.ttl_seconds
249
+
250
+ entry = Entry.new(
251
+ query: key,
252
+ response: Serializer.serialize(response),
253
+ embedding: embedding,
254
+ metadata: { model: @chat.model&.id }
255
+ )
256
+
257
+ @scoped.send(:vector_store).add(entry.id, embedding)
258
+ @scoped.send(:cache_store).set(entry.id, entry.to_h, ttl: ttl)
259
+ @scoped.instance_variable_set(:@misses, @scoped.instance_variable_get(:@misses) + 1)
260
+ end
261
+ end
262
+ end
263
+ end
@@ -0,0 +1,116 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RubyLLM
4
+ module SemanticCache
5
+ # Handles serialization/deserialization of cached responses
6
+ module Serializer
7
+ class << self
8
+ def serialize(response)
9
+ if defined?(RubyLLM::Message) && response.is_a?(RubyLLM::Message)
10
+ serialize_message(response)
11
+ else
12
+ serialize_basic(response)
13
+ end
14
+ end
15
+
16
+ def deserialize(data)
17
+ return data unless data.is_a?(Hash)
18
+
19
+ type = data[:type] || data["type"]
20
+ value = data[:value] || data["value"]
21
+
22
+ case type
23
+ when "rubyllm_message"
24
+ deserialize_message(value)
25
+ when "string", "hash", "object"
26
+ value
27
+ when "nil"
28
+ nil
29
+ else
30
+ value
31
+ end
32
+ end
33
+
34
+ private
35
+
36
+ def serialize_basic(response)
37
+ case response
38
+ when String
39
+ { type: "string", value: response }
40
+ when Hash
41
+ { type: "hash", value: response }
42
+ when NilClass
43
+ { type: "nil", value: nil }
44
+ else
45
+ if response.respond_to?(:to_h)
46
+ { type: "object", class: response.class.name, value: response.to_h }
47
+ else
48
+ { type: "string", value: response.to_s }
49
+ end
50
+ end
51
+ end
52
+
53
+ def serialize_message(message)
54
+ {
55
+ type: "rubyllm_message",
56
+ value: {
57
+ role: message.role,
58
+ content: serialize_content(message.content),
59
+ model_id: message.model_id,
60
+ tool_calls: message.tool_calls,
61
+ tool_call_id: message.tool_call_id,
62
+ input_tokens: message.input_tokens,
63
+ output_tokens: message.output_tokens,
64
+ cached_tokens: message.cached_tokens,
65
+ cache_creation_tokens: message.cache_creation_tokens
66
+ }.compact
67
+ }
68
+ end
69
+
70
+ def serialize_content(content)
71
+ case content
72
+ when String
73
+ { type: "string", value: content }
74
+ when Hash
75
+ { type: "hash", value: content }
76
+ when ->(c) { defined?(RubyLLM::Content) && c.is_a?(RubyLLM::Content) }
77
+ { type: "rubyllm_content", value: content.to_h }
78
+ else
79
+ { type: "string", value: content.to_s }
80
+ end
81
+ end
82
+
83
+ def deserialize_message(value)
84
+ return value unless defined?(RubyLLM::Message)
85
+
86
+ content = deserialize_content(value[:content] || value["content"])
87
+ RubyLLM::Message.new(
88
+ role: (value[:role] || value["role"]).to_sym,
89
+ content: content,
90
+ model_id: value[:model_id] || value["model_id"],
91
+ tool_calls: value[:tool_calls] || value["tool_calls"],
92
+ tool_call_id: value[:tool_call_id] || value["tool_call_id"],
93
+ input_tokens: value[:input_tokens] || value["input_tokens"],
94
+ output_tokens: value[:output_tokens] || value["output_tokens"],
95
+ cached_tokens: value[:cached_tokens] || value["cached_tokens"],
96
+ cache_creation_tokens: value[:cache_creation_tokens] || value["cache_creation_tokens"]
97
+ )
98
+ end
99
+
100
+ def deserialize_content(data)
101
+ return data unless data.is_a?(Hash)
102
+
103
+ type = data[:type] || data["type"]
104
+ value = data[:value] || data["value"]
105
+
106
+ case type
107
+ when "string", "hash", "rubyllm_content"
108
+ value
109
+ else
110
+ value
111
+ end
112
+ end
113
+ end
114
+ end
115
+ end
116
+ end
@@ -0,0 +1,49 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RubyLLM
4
+ module SemanticCache
5
+ module VectorStores
6
+ class Base
7
+ def initialize(config)
8
+ @config = config
9
+ end
10
+
11
+ # Add a vector with the given ID
12
+ # @param id [String] unique identifier
13
+ # @param embedding [Array<Float>] vector embedding
14
+ def add(id, embedding)
15
+ raise NotImplementedError
16
+ end
17
+
18
+ # Search for similar vectors
19
+ # @param embedding [Array<Float>] query vector
20
+ # @param limit [Integer] maximum number of results
21
+ # @return [Array<Hash>] array of { id:, similarity: } hashes
22
+ def search(embedding, limit: 5)
23
+ raise NotImplementedError
24
+ end
25
+
26
+ # Delete a vector by ID
27
+ # @param id [String] unique identifier
28
+ def delete(id)
29
+ raise NotImplementedError
30
+ end
31
+
32
+ # Clear all vectors
33
+ def clear!
34
+ raise NotImplementedError
35
+ end
36
+
37
+ # Check if the store is empty
38
+ def empty?
39
+ raise NotImplementedError
40
+ end
41
+
42
+ # Get the number of vectors stored
43
+ def size
44
+ raise NotImplementedError
45
+ end
46
+ end
47
+ end
48
+ end
49
+ end
@@ -0,0 +1,85 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "base"
4
+
5
+ module RubyLLM
6
+ module SemanticCache
7
+ module VectorStores
8
+ class Memory < Base
9
+ def initialize(config)
10
+ super
11
+ @vectors = {}
12
+ @mutex = Mutex.new
13
+ end
14
+
15
+ def add(id, embedding)
16
+ @mutex.synchronize do
17
+ @vectors[id] = embedding
18
+ end
19
+ end
20
+
21
+ def search(embedding, limit: 5)
22
+ @mutex.synchronize do
23
+ return [] if @vectors.empty?
24
+
25
+ results = @vectors.map do |id, stored_embedding|
26
+ similarity = cosine_similarity(embedding, stored_embedding)
27
+ { id: id, similarity: similarity }
28
+ end
29
+
30
+ results
31
+ .sort_by { |r| -r[:similarity] }
32
+ .first(limit)
33
+ end
34
+ end
35
+
36
+ def delete(id)
37
+ @mutex.synchronize do
38
+ @vectors.delete(id)
39
+ end
40
+ end
41
+
42
+ def clear!
43
+ @mutex.synchronize do
44
+ @vectors.clear
45
+ end
46
+ end
47
+
48
+ def empty?
49
+ @mutex.synchronize do
50
+ @vectors.empty?
51
+ end
52
+ end
53
+
54
+ def size
55
+ @mutex.synchronize do
56
+ @vectors.size
57
+ end
58
+ end
59
+
60
+ private
61
+
62
+ def cosine_similarity(vec_a, vec_b)
63
+ return 0.0 if vec_a.nil? || vec_b.nil?
64
+ return 0.0 if vec_a.empty? || vec_b.empty?
65
+ return 0.0 if vec_a.length != vec_b.length
66
+
67
+ dot_product = 0.0
68
+ norm_a = 0.0
69
+ norm_b = 0.0
70
+
71
+ vec_a.each_with_index do |a, i|
72
+ b = vec_b[i]
73
+ dot_product += a * b
74
+ norm_a += a * a
75
+ norm_b += b * b
76
+ end
77
+
78
+ return 0.0 if norm_a.zero? || norm_b.zero?
79
+
80
+ dot_product / (Math.sqrt(norm_a) * Math.sqrt(norm_b))
81
+ end
82
+ end
83
+ end
84
+ end
85
+ end