ruby_llm-semantic_cache 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,89 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "base"
4
+
5
+ module RubyLLM
6
+ module SemanticCache
7
+ module VectorStores
8
+ class Redis < Base
9
+ def initialize(config)
10
+ super
11
+ require_neighbor_redis!
12
+ setup_client
13
+ setup_index
14
+ end
15
+
16
+ def add(id, embedding)
17
+ @index.add(id, embedding)
18
+ end
19
+
20
+ def search(embedding, limit: 5)
21
+ results = @index.search(embedding, count: limit)
22
+
23
+ results.map do |result|
24
+ # VectorSet returns array of hashes: [{id: "...", distance: 0.0}, ...]
25
+ # For cosine distance: similarity = 1 - distance
26
+ id = result[:id]
27
+ distance = result[:distance].to_f
28
+ similarity = 1.0 - distance
29
+ { id: id, similarity: similarity }
30
+ end
31
+ end
32
+
33
+ def delete(id)
34
+ @index.remove(id)
35
+ end
36
+
37
+ def clear!
38
+ # VectorSet doesn't have a drop method, remove all entries
39
+ # We need to iterate and remove, or delete the key
40
+ @client.call("DEL", index_name)
41
+ setup_index
42
+ end
43
+
44
+ def empty?
45
+ size.zero?
46
+ end
47
+
48
+ def size
49
+ @index.count
50
+ rescue StandardError
51
+ 0
52
+ end
53
+
54
+ private
55
+
56
+ def require_neighbor_redis!
57
+ require "neighbor-redis"
58
+ rescue LoadError
59
+ raise Error, "neighbor-redis gem is required for Redis vector store. " \
60
+ "Install it with: gem install neighbor-redis"
61
+ end
62
+
63
+ def setup_client
64
+ require "redis-client"
65
+
66
+ @client = if @config.redis_client
67
+ @config.redis_client
68
+ elsif @config.redis_url
69
+ RedisClient.config(url: @config.redis_url).new_pool
70
+ else
71
+ RedisClient.config.new_pool
72
+ end
73
+
74
+ Neighbor::Redis.client = @client
75
+ end
76
+
77
+ def setup_index
78
+ # Use VectorSet for Redis 8+ (works without RediSearch module)
79
+ @index = Neighbor::Redis::VectorSet.new(index_name)
80
+ end
81
+
82
+ def index_name
83
+ # VectorSet names cannot contain colons, use underscore
84
+ @config.namespace.gsub(":", "_") + "_vectors"
85
+ end
86
+ end
87
+ end
88
+ end
89
+ end
@@ -0,0 +1,7 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RubyLLM
4
+ module SemanticCache
5
+ VERSION = "0.1.0"
6
+ end
7
+ end
@@ -0,0 +1,317 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "semantic_cache/version"
4
+ require_relative "semantic_cache/configuration"
5
+ require_relative "semantic_cache/entry"
6
+ require_relative "semantic_cache/embedding"
7
+ require_relative "semantic_cache/serializer"
8
+ require_relative "semantic_cache/vector_stores/base"
9
+ require_relative "semantic_cache/vector_stores/memory"
10
+ require_relative "semantic_cache/cache_stores/base"
11
+ require_relative "semantic_cache/cache_stores/memory"
12
+ require_relative "semantic_cache/middleware"
13
+ require_relative "semantic_cache/scoped"
14
+
15
+ module RubyLLM
16
+ module SemanticCache
17
+ class Error < StandardError; end
18
+ class NotFoundError < Error; end
19
+
20
+ class << self
21
+ # Configure the cache
22
+ # @yield [Configuration] the configuration object
23
+ def configure
24
+ yield(config)
25
+ reset! # Reset stores when configuration changes
26
+ end
27
+
28
+ # Get the current configuration
29
+ # @return [Configuration]
30
+ def config
31
+ @config ||= Configuration.new
32
+ end
33
+
34
+ # Fetch a cached response or execute the block and cache the result
35
+ # @param query [String] the query to cache
36
+ # @param threshold [Float] similarity threshold (overrides config)
37
+ # @param ttl [Integer] time-to-live in seconds (overrides config)
38
+ # @return the cached or computed response
39
+ def fetch(query, threshold: nil, ttl: nil, &block)
40
+ raise ArgumentError, "Block required" unless block_given?
41
+
42
+ threshold ||= config.similarity_threshold
43
+ ttl ||= config.ttl_seconds
44
+
45
+ # Generate embedding for the query
46
+ embedding = embedding_generator.generate(query)
47
+
48
+ # Search for similar cached queries
49
+ matches = vector_store.search(embedding, limit: 1)
50
+
51
+ if matches.any? && matches.first[:similarity] >= threshold
52
+ # Cache hit
53
+ record_hit!
54
+ entry_data = cache_store.get(matches.first[:id])
55
+
56
+ if entry_data
57
+ return Serializer.deserialize(entry_data[:response])
58
+ end
59
+ end
60
+
61
+ # Cache miss - execute block
62
+ record_miss!
63
+ response = block.call
64
+
65
+ # Store in cache
66
+ store(query: query, response: response, embedding: embedding, ttl: ttl)
67
+
68
+ response
69
+ end
70
+
71
+ # Store a response in the cache
72
+ # @param query [String] the query
73
+ # @param response the response to cache
74
+ # @param embedding [Array<Float>] pre-computed embedding (optional)
75
+ # @param metadata [Hash] additional metadata
76
+ # @param ttl [Integer] time-to-live in seconds
77
+ # @return [Entry] the created entry
78
+ def store(query:, response:, embedding: nil, metadata: {}, ttl: nil)
79
+ embedding ||= embedding_generator.generate(query)
80
+ ttl ||= config.ttl_seconds
81
+
82
+ entry = Entry.new(
83
+ query: query,
84
+ response: Serializer.serialize(response),
85
+ embedding: embedding,
86
+ metadata: metadata
87
+ )
88
+
89
+ vector_store.add(entry.id, embedding)
90
+ cache_store.set(entry.id, entry.to_h, ttl: ttl)
91
+
92
+ entry
93
+ end
94
+
95
+ # Search for similar cached queries
96
+ # @param query [String] the query to search for
97
+ # @param limit [Integer] maximum number of results
98
+ # @return [Array<Hash>] matching entries with similarity scores
99
+ def search(query, limit: 5)
100
+ embedding = embedding_generator.generate(query)
101
+ matches = vector_store.search(embedding, limit: limit)
102
+
103
+ matches.filter_map do |match|
104
+ entry_data = cache_store.get(match[:id])
105
+ next unless entry_data
106
+
107
+ {
108
+ query: entry_data[:query],
109
+ response: Serializer.deserialize(entry_data[:response]),
110
+ similarity: match[:similarity],
111
+ metadata: entry_data[:metadata]
112
+ }
113
+ end
114
+ end
115
+
116
+ # Check if a similar query exists in the cache
117
+ # @param query [String] the query to check
118
+ # @param threshold [Float] similarity threshold
119
+ # @return [Boolean]
120
+ def exists?(query, threshold: nil)
121
+ threshold ||= config.similarity_threshold
122
+ embedding = embedding_generator.generate(query)
123
+ matches = vector_store.search(embedding, limit: 1)
124
+ matches.any? && matches.first[:similarity] >= threshold
125
+ end
126
+
127
+ # Delete a cached entry by query
128
+ # @param query [String] the query to delete
129
+ # @param threshold [Float] similarity threshold for matching
130
+ # @return [Boolean] true if an entry was deleted
131
+ def delete(query, threshold: nil)
132
+ threshold ||= config.similarity_threshold
133
+ embedding = embedding_generator.generate(query)
134
+ matches = vector_store.search(embedding, limit: 1)
135
+
136
+ return false unless matches.any? && matches.first[:similarity] >= threshold
137
+
138
+ id = matches.first[:id]
139
+ vector_store.delete(id)
140
+ cache_store.delete(id)
141
+ true
142
+ end
143
+
144
+ # Clear all cached entries
145
+ def clear!
146
+ vector_store.clear!
147
+ cache_store.clear!
148
+ reset_stats!
149
+ end
150
+
151
+ # Invalidate all cache entries similar to the given query
152
+ # @param query [String] the query to match against
153
+ # @param threshold [Float] similarity threshold (defaults to config)
154
+ # @param limit [Integer] maximum entries to invalidate
155
+ # @return [Integer] number of entries invalidated
156
+ def invalidate(query, threshold: nil, limit: 100)
157
+ threshold ||= config.similarity_threshold
158
+ embedding = embedding_generator.generate(query)
159
+ matches = vector_store.search(embedding, limit: limit)
160
+
161
+ count = 0
162
+ matches.each do |match|
163
+ next unless match[:similarity] >= threshold
164
+
165
+ vector_store.delete(match[:id])
166
+ cache_store.delete(match[:id])
167
+ count += 1
168
+ end
169
+
170
+ count
171
+ end
172
+
173
+ # Get cache statistics
174
+ # @return [Hash] cache statistics
175
+ def stats
176
+ load_stats!
177
+ {
178
+ hits: @hits,
179
+ misses: @misses,
180
+ hit_rate: hit_rate,
181
+ entries: cache_store.size
182
+ }
183
+ end
184
+
185
+ # Reset the cache stores (clears stores but preserves configuration)
186
+ def reset!
187
+ @embedding_generator = nil
188
+ @vector_store = nil
189
+ @cache_store = nil
190
+ @stats_loaded = false
191
+ @hits = 0
192
+ @misses = 0
193
+ end
194
+
195
+ # Fully reset including configuration (useful for testing)
196
+ def reset_all!
197
+ @config = nil
198
+ reset!
199
+ end
200
+
201
+ # Wrap a RubyLLM::Chat instance with caching middleware
202
+ # @param chat [RubyLLM::Chat] the chat instance to wrap
203
+ # @param threshold [Float, nil] similarity threshold override
204
+ # @param ttl [Integer, nil] TTL override in seconds
205
+ # @param on_cache_hit [Proc, nil] callback for cache hits, receives (chat, user_message, cached_response)
206
+ # @param max_messages [Integer, :unlimited, false, nil] max conversation messages before skipping cache
207
+ # - Integer: skip cache after N messages (default: 1, only first message cached)
208
+ # - :unlimited or false: cache all messages regardless of conversation length
209
+ # - nil: use config default
210
+ # @return [Middleware] the wrapped chat
211
+ def wrap(chat, threshold: nil, ttl: nil, on_cache_hit: nil, max_messages: nil)
212
+ Middleware.new(
213
+ chat,
214
+ threshold: threshold,
215
+ ttl: ttl,
216
+ on_cache_hit: on_cache_hit,
217
+ max_messages: max_messages
218
+ )
219
+ end
220
+
221
+ # Access internal components (for middleware)
222
+ # @api private
223
+ def embedding_generator
224
+ @embedding_generator ||= Embedding.new(config)
225
+ end
226
+
227
+ # @api private
228
+ def vector_store
229
+ @vector_store ||= build_vector_store
230
+ end
231
+
232
+ # @api private
233
+ def cache_store
234
+ @cache_store ||= build_cache_store
235
+ end
236
+
237
+ # @api private
238
+ def record_hit!
239
+ load_stats!
240
+ @hits += 1
241
+ persist_stats!
242
+ end
243
+
244
+ # @api private
245
+ def record_miss!
246
+ load_stats!
247
+ @misses += 1
248
+ persist_stats!
249
+ end
250
+
251
+ private
252
+
253
+ def build_vector_store
254
+ case config.vector_store
255
+ when :memory
256
+ VectorStores::Memory.new(config)
257
+ when :redis
258
+ require_relative "semantic_cache/vector_stores/redis"
259
+ VectorStores::Redis.new(config)
260
+ else
261
+ raise Error, "Unknown vector store: #{config.vector_store}"
262
+ end
263
+ end
264
+
265
+ def build_cache_store
266
+ case config.cache_store
267
+ when :memory
268
+ CacheStores::Memory.new(config)
269
+ when :redis
270
+ require_relative "semantic_cache/cache_stores/redis"
271
+ CacheStores::Redis.new(config)
272
+ else
273
+ raise Error, "Unknown cache store: #{config.cache_store}"
274
+ end
275
+ end
276
+
277
+ def hit_rate
278
+ total = @hits + @misses
279
+ return 0.0 if total.zero?
280
+
281
+ @hits.to_f / total
282
+ end
283
+
284
+ def reset_stats!
285
+ @hits = 0
286
+ @misses = 0
287
+ @stats_loaded = true
288
+ persist_stats!
289
+ end
290
+
291
+ def load_stats!
292
+ return if @stats_loaded
293
+
294
+ if config.cache_store == :redis
295
+ stats_data = cache_store.get("__semantic_cache_stats__")
296
+ if stats_data
297
+ @hits = stats_data[:hits] || stats_data["hits"] || 0
298
+ @misses = stats_data[:misses] || stats_data["misses"] || 0
299
+ else
300
+ @hits = 0
301
+ @misses = 0
302
+ end
303
+ else
304
+ @hits ||= 0
305
+ @misses ||= 0
306
+ end
307
+ @stats_loaded = true
308
+ end
309
+
310
+ def persist_stats!
311
+ return unless config.cache_store == :redis
312
+
313
+ cache_store.set("__semantic_cache_stats__", { hits: @hits, misses: @misses }, ttl: nil)
314
+ end
315
+ end
316
+ end
317
+ end
@@ -0,0 +1,3 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "ruby_llm/semantic_cache"
@@ -0,0 +1,41 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "lib/ruby_llm/semantic_cache/version"
4
+
5
+ Gem::Specification.new do |spec|
6
+ spec.name = "ruby_llm-semantic_cache"
7
+ spec.version = RubyLLM::SemanticCache::VERSION
8
+ spec.authors = ["Chris Hasinski"]
9
+ spec.email = ["krzysztof.hasinski@gmail.com"]
10
+
11
+ spec.summary = "Semantic caching for RubyLLM applications"
12
+ spec.description = "Cache RubyLLM responses based on semantic similarity, not exact string matching. " \
13
+ "Reduces costs and latency by returning cached responses for semantically equivalent queries."
14
+ spec.homepage = "https://github.com/khasinski/ruby_llm-semantic_cache"
15
+ spec.license = "MIT"
16
+ spec.required_ruby_version = ">= 2.7.0"
17
+
18
+ spec.metadata["homepage_uri"] = spec.homepage
19
+ spec.metadata["source_code_uri"] = spec.homepage
20
+ spec.metadata["changelog_uri"] = "#{spec.homepage}/blob/main/CHANGELOG.md"
21
+
22
+ spec.files = Dir.chdir(__dir__) do
23
+ `git ls-files -z`.split("\x0").reject do |f|
24
+ (File.expand_path(f) == __FILE__) ||
25
+ f.start_with?(*%w[bin/ test/ spec/ features/ .git .github appveyor Gemfile])
26
+ end
27
+ end
28
+ spec.bindir = "exe"
29
+ spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
30
+ spec.require_paths = ["lib"]
31
+
32
+ # Required dependencies
33
+ spec.add_dependency "ruby_llm", "~> 1.0"
34
+
35
+ # Optional: Redis backend
36
+ spec.add_development_dependency "neighbor-redis", "~> 0.1"
37
+
38
+ spec.add_development_dependency "rake", "~> 13.0"
39
+ spec.add_development_dependency "rspec", "~> 3.0"
40
+ spec.add_development_dependency "rubocop", "~> 1.50"
41
+ end
metadata ADDED
@@ -0,0 +1,135 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: ruby_llm-semantic_cache
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Chris Hasinski
8
+ bindir: exe
9
+ cert_chain: []
10
+ date: 1980-01-02 00:00:00.000000000 Z
11
+ dependencies:
12
+ - !ruby/object:Gem::Dependency
13
+ name: ruby_llm
14
+ requirement: !ruby/object:Gem::Requirement
15
+ requirements:
16
+ - - "~>"
17
+ - !ruby/object:Gem::Version
18
+ version: '1.0'
19
+ type: :runtime
20
+ prerelease: false
21
+ version_requirements: !ruby/object:Gem::Requirement
22
+ requirements:
23
+ - - "~>"
24
+ - !ruby/object:Gem::Version
25
+ version: '1.0'
26
+ - !ruby/object:Gem::Dependency
27
+ name: neighbor-redis
28
+ requirement: !ruby/object:Gem::Requirement
29
+ requirements:
30
+ - - "~>"
31
+ - !ruby/object:Gem::Version
32
+ version: '0.1'
33
+ type: :development
34
+ prerelease: false
35
+ version_requirements: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - "~>"
38
+ - !ruby/object:Gem::Version
39
+ version: '0.1'
40
+ - !ruby/object:Gem::Dependency
41
+ name: rake
42
+ requirement: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - "~>"
45
+ - !ruby/object:Gem::Version
46
+ version: '13.0'
47
+ type: :development
48
+ prerelease: false
49
+ version_requirements: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - "~>"
52
+ - !ruby/object:Gem::Version
53
+ version: '13.0'
54
+ - !ruby/object:Gem::Dependency
55
+ name: rspec
56
+ requirement: !ruby/object:Gem::Requirement
57
+ requirements:
58
+ - - "~>"
59
+ - !ruby/object:Gem::Version
60
+ version: '3.0'
61
+ type: :development
62
+ prerelease: false
63
+ version_requirements: !ruby/object:Gem::Requirement
64
+ requirements:
65
+ - - "~>"
66
+ - !ruby/object:Gem::Version
67
+ version: '3.0'
68
+ - !ruby/object:Gem::Dependency
69
+ name: rubocop
70
+ requirement: !ruby/object:Gem::Requirement
71
+ requirements:
72
+ - - "~>"
73
+ - !ruby/object:Gem::Version
74
+ version: '1.50'
75
+ type: :development
76
+ prerelease: false
77
+ version_requirements: !ruby/object:Gem::Requirement
78
+ requirements:
79
+ - - "~>"
80
+ - !ruby/object:Gem::Version
81
+ version: '1.50'
82
+ description: Cache RubyLLM responses based on semantic similarity, not exact string
83
+ matching. Reduces costs and latency by returning cached responses for semantically
84
+ equivalent queries.
85
+ email:
86
+ - krzysztof.hasinski@gmail.com
87
+ executables: []
88
+ extensions: []
89
+ extra_rdoc_files: []
90
+ files:
91
+ - ".rspec"
92
+ - README.md
93
+ - Rakefile
94
+ - docker-compose.yml
95
+ - lib/ruby_llm-semantic_cache.rb
96
+ - lib/ruby_llm/semantic_cache.rb
97
+ - lib/ruby_llm/semantic_cache/cache_stores/base.rb
98
+ - lib/ruby_llm/semantic_cache/cache_stores/memory.rb
99
+ - lib/ruby_llm/semantic_cache/cache_stores/redis.rb
100
+ - lib/ruby_llm/semantic_cache/configuration.rb
101
+ - lib/ruby_llm/semantic_cache/embedding.rb
102
+ - lib/ruby_llm/semantic_cache/entry.rb
103
+ - lib/ruby_llm/semantic_cache/middleware.rb
104
+ - lib/ruby_llm/semantic_cache/scoped.rb
105
+ - lib/ruby_llm/semantic_cache/serializer.rb
106
+ - lib/ruby_llm/semantic_cache/vector_stores/base.rb
107
+ - lib/ruby_llm/semantic_cache/vector_stores/memory.rb
108
+ - lib/ruby_llm/semantic_cache/vector_stores/redis.rb
109
+ - lib/ruby_llm/semantic_cache/version.rb
110
+ - ruby_llm-semantic_cache.gemspec
111
+ homepage: https://github.com/khasinski/ruby_llm-semantic_cache
112
+ licenses:
113
+ - MIT
114
+ metadata:
115
+ homepage_uri: https://github.com/khasinski/ruby_llm-semantic_cache
116
+ source_code_uri: https://github.com/khasinski/ruby_llm-semantic_cache
117
+ changelog_uri: https://github.com/khasinski/ruby_llm-semantic_cache/blob/main/CHANGELOG.md
118
+ rdoc_options: []
119
+ require_paths:
120
+ - lib
121
+ required_ruby_version: !ruby/object:Gem::Requirement
122
+ requirements:
123
+ - - ">="
124
+ - !ruby/object:Gem::Version
125
+ version: 2.7.0
126
+ required_rubygems_version: !ruby/object:Gem::Requirement
127
+ requirements:
128
+ - - ">="
129
+ - !ruby/object:Gem::Version
130
+ version: '0'
131
+ requirements: []
132
+ rubygems_version: 3.6.9
133
+ specification_version: 4
134
+ summary: Semantic caching for RubyLLM applications
135
+ test_files: []