semantic-cache 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,45 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "openai"
4
+
5
+ module SemanticCache
6
+ class Embedding
7
+ def initialize(model: nil, api_key: nil)
8
+ config = SemanticCache.configuration
9
+ @model = model || config.embedding_model
10
+ @client = OpenAI::Client.new(access_token: api_key || config.openai_api_key)
11
+ end
12
+
13
+ # Generate an embedding vector for the given text.
14
+ # Returns an Array of Floats.
15
+ def generate(text)
16
+ response = @client.embeddings(
17
+ parameters: {
18
+ model: @model,
19
+ input: text
20
+ }
21
+ )
22
+
23
+ data = response.dig("data", 0, "embedding")
24
+ raise Error, "Failed to generate embedding: #{response}" if data.nil?
25
+
26
+ data
27
+ end
28
+
29
+ # Generate embeddings for multiple texts in a single API call.
30
+ # Returns an Array of Arrays of Floats.
31
+ def generate_batch(texts)
32
+ response = @client.embeddings(
33
+ parameters: {
34
+ model: @model,
35
+ input: texts
36
+ }
37
+ )
38
+
39
+ data = response["data"]
40
+ raise Error, "Failed to generate embeddings: #{response}" if data.nil?
41
+
42
+ data.sort_by { |d| d["index"] }.map { |d| d["embedding"] }
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,62 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "json"
4
+
5
+ module SemanticCache
6
+ class Entry
7
+ attr_reader :query, :embedding, :response, :model, :tags, :created_at, :ttl, :metadata
8
+
9
+ def initialize(query:, embedding:, response:, model: nil, tags: [], ttl: nil, metadata: {})
10
+ @query = query
11
+ @embedding = embedding
12
+ @response = response
13
+ @model = model
14
+ @tags = Array(tags)
15
+ @created_at = Time.now.to_f
16
+ @ttl = ttl
17
+ @metadata = metadata
18
+ end
19
+
20
+ def expired?
21
+ return false if @ttl.nil?
22
+
23
+ Time.now.to_f - @created_at > @ttl
24
+ end
25
+
26
+ def to_h
27
+ {
28
+ query: @query,
29
+ embedding: @embedding,
30
+ response: @response,
31
+ model: @model,
32
+ tags: @tags,
33
+ created_at: @created_at,
34
+ ttl: @ttl,
35
+ metadata: @metadata
36
+ }
37
+ end
38
+
39
+ def to_json(*args)
40
+ to_h.to_json(*args)
41
+ end
42
+
43
+ def self.from_h(hash)
44
+ hash = hash.transform_keys(&:to_sym) if hash.is_a?(Hash)
45
+ entry = new(
46
+ query: hash[:query],
47
+ embedding: hash[:embedding],
48
+ response: hash[:response],
49
+ model: hash[:model],
50
+ tags: hash[:tags] || [],
51
+ ttl: hash[:ttl],
52
+ metadata: hash[:metadata] || {}
53
+ )
54
+ entry.instance_variable_set(:@created_at, hash[:created_at]) if hash[:created_at]
55
+ entry
56
+ end
57
+
58
+ def self.from_json(json_string)
59
+ from_h(JSON.parse(json_string))
60
+ end
61
+ end
62
+ end
@@ -0,0 +1,76 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "semantic_cache"
4
+
5
+ module SemanticCache
6
+ # Rails integration for SemanticCache.
7
+ #
8
+ # In your Gemfile:
9
+ # gem "semantic-cache"
10
+ #
11
+ # Then require in an initializer:
12
+ # require "semantic_cache/rails"
13
+ #
14
+ # Usage in controllers:
15
+ #
16
+ # class ChatController < ApplicationController
17
+ # include SemanticCache::Cacheable
18
+ #
19
+ # cache_ai_calls only: [:create], ttl: 1.hour
20
+ #
21
+ # def create
22
+ # response = SemanticCache.current.fetch(params[:message]) do
23
+ # openai_client.chat(messages: [{ role: "user", content: params[:message] }])
24
+ # end
25
+ # render json: { response: response }
26
+ # end
27
+ # end
28
+ #
29
+ # Or with per-user namespacing:
30
+ #
31
+ # class ApplicationController < ActionController::Base
32
+ # around_action :with_semantic_cache
33
+ #
34
+ # private
35
+ #
36
+ # def with_semantic_cache
37
+ # SemanticCache.with_cache(namespace: "user_#{current_user.id}") do
38
+ # yield
39
+ # end
40
+ # end
41
+ # end
42
+ #
43
+ module Cacheable
44
+ def self.included(base)
45
+ base.extend(ClassMethods)
46
+ end
47
+
48
+ module ClassMethods
49
+ def cache_ai_calls(only: nil, except: nil, ttl: nil, namespace: nil)
50
+ actions = { only: only, except: except }.compact
51
+
52
+ around_action(**actions) do |_controller, block|
53
+ SemanticCache.with_cache(namespace: namespace, default_ttl: ttl) do
54
+ block.call
55
+ end
56
+ end
57
+ end
58
+ end
59
+ end
60
+
61
+ class << self
62
+ # Thread-local cache instance for use within a request.
63
+ def current
64
+ Thread.current[:semantic_cache_instance]
65
+ end
66
+
67
+ # Set a cache instance for the current thread/request scope.
68
+ def with_cache(namespace: nil, **options)
69
+ previous = Thread.current[:semantic_cache_instance]
70
+ Thread.current[:semantic_cache_instance] = Cache.new(namespace: namespace, **options)
71
+ yield
72
+ ensure
73
+ Thread.current[:semantic_cache_instance] = previous
74
+ end
75
+ end
76
+ end
@@ -0,0 +1,31 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SemanticCache
4
+ module Similarity
5
+ module_function
6
+
7
+ # Compute cosine similarity between two vectors.
8
+ # Returns a Float between -1.0 and 1.0.
9
+ def cosine(vec_a, vec_b)
10
+ return 0.0 if vec_a.empty? || vec_b.empty?
11
+ raise ArgumentError, "Vectors must be the same length" unless vec_a.length == vec_b.length
12
+
13
+ dot_product = 0.0
14
+ magnitude_a = 0.0
15
+ magnitude_b = 0.0
16
+
17
+ vec_a.length.times do |i|
18
+ a = vec_a[i]
19
+ b = vec_b[i]
20
+ dot_product += a * b
21
+ magnitude_a += a * a
22
+ magnitude_b += b * b
23
+ end
24
+
25
+ denominator = Math.sqrt(magnitude_a) * Math.sqrt(magnitude_b)
26
+ return 0.0 if denominator.zero?
27
+
28
+ dot_product / denominator
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,108 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "monitor"
4
+
5
+ module SemanticCache
6
+ class Stats
7
+ include MonitorMixin
8
+
9
+ attr_reader :hits, :misses, :total_savings, :last_event
10
+
11
+ def initialize
12
+ super() # Initialize MonitorMixin
13
+ @hits = 0
14
+ @misses = 0
15
+ @total_savings = 0.0
16
+ @last_event = nil
17
+ @response_times = []
18
+ @cached_response_times = []
19
+ end
20
+
21
+ def record_hit(saved_cost: 0.0, response_time: nil)
22
+ synchronize do
23
+ @hits += 1
24
+ @total_savings += saved_cost
25
+ @last_event = :hit
26
+ @cached_response_times << response_time if response_time
27
+ end
28
+ end
29
+
30
+ def record_miss(response_time: nil)
31
+ synchronize do
32
+ @misses += 1
33
+ @last_event = :miss
34
+ @response_times << response_time if response_time
35
+ end
36
+ end
37
+
38
+ def total_queries
39
+ synchronize { @hits + @misses }
40
+ end
41
+
42
+ def hit_rate
43
+ synchronize do
44
+ total = @hits + @misses
45
+ return 0.0 if total.zero?
46
+
47
+ (@hits.to_f / total * 100).round(1)
48
+ end
49
+ end
50
+
51
+ def avg_response_time
52
+ synchronize do
53
+ return 0.0 if @response_times.empty?
54
+
55
+ (@response_times.sum / @response_times.length).round(2)
56
+ end
57
+ end
58
+
59
+ def avg_cached_response_time
60
+ synchronize do
61
+ return 0.0 if @cached_response_times.empty?
62
+
63
+ (@cached_response_times.sum / @cached_response_times.length).round(2)
64
+ end
65
+ end
66
+
67
+ def to_h
68
+ synchronize do
69
+ {
70
+ hits: @hits,
71
+ misses: @misses,
72
+ total_queries: @hits + @misses,
73
+ hit_rate: hit_rate,
74
+ savings: format("$%.2f", @total_savings),
75
+ total_savings: @total_savings,
76
+ last_hit: @last_event == :hit,
77
+ avg_response_time_ms: avg_response_time,
78
+ avg_cached_response_time_ms: avg_cached_response_time
79
+ }
80
+ end
81
+ end
82
+
83
+ def report
84
+ synchronize do
85
+ lines = []
86
+ lines << "Total queries: #{@hits + @misses}"
87
+ lines << "Cache hits: #{@hits}"
88
+ lines << "Cache misses: #{@misses}"
89
+ lines << "Hit rate: #{hit_rate}%"
90
+ lines << "Total savings: #{format("$%.2f", @total_savings)}"
91
+ lines << "Avg API response time: #{avg_response_time}ms" unless @response_times.empty?
92
+ lines << "Avg cached response time: #{avg_cached_response_time}ms" unless @cached_response_times.empty?
93
+ lines.join("\n")
94
+ end
95
+ end
96
+
97
+ def reset!
98
+ synchronize do
99
+ @hits = 0
100
+ @misses = 0
101
+ @total_savings = 0.0
102
+ @last_event = nil
103
+ @response_times = []
104
+ @cached_response_times = []
105
+ end
106
+ end
107
+ end
108
+ end
@@ -0,0 +1,78 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "monitor"
4
+
5
+ module SemanticCache
6
+ module Stores
7
+ # Thread-safe in-memory cache store.
8
+ # Good for development, testing, and single-process apps.
9
+ class Memory
10
+ include MonitorMixin
11
+
12
+ def initialize(**_options)
13
+ super()
14
+ @data = {}
15
+ @tags_index = Hash.new { |h, k| h[k] = Set.new }
16
+ end
17
+
18
+ # Store a cache entry.
19
+ def write(key, entry)
20
+ synchronize do
21
+ @data[key] = entry
22
+ entry.tags.each { |tag| @tags_index[tag].add(key) }
23
+ end
24
+ end
25
+
26
+ # Retrieve all non-expired entries.
27
+ # Returns an Array of Entry objects.
28
+ def entries
29
+ synchronize do
30
+ cleanup_expired!
31
+ @data.values
32
+ end
33
+ end
34
+
35
+ # Delete a specific entry by key.
36
+ def delete(key)
37
+ synchronize do
38
+ entry = @data.delete(key)
39
+ entry&.tags&.each { |tag| @tags_index[tag].delete(key) }
40
+ end
41
+ end
42
+
43
+ # Delete all entries matching the given tags.
44
+ def invalidate_by_tags(tags)
45
+ synchronize do
46
+ Array(tags).each do |tag|
47
+ keys = @tags_index[tag].to_a
48
+ keys.each { |key| @data.delete(key) }
49
+ @tags_index.delete(tag)
50
+ end
51
+ end
52
+ end
53
+
54
+ # Delete all entries.
55
+ def clear
56
+ synchronize do
57
+ @data.clear
58
+ @tags_index.clear
59
+ end
60
+ end
61
+
62
+ # Number of entries in the store.
63
+ def size
64
+ synchronize do
65
+ cleanup_expired!
66
+ @data.size
67
+ end
68
+ end
69
+
70
+ private
71
+
72
+ def cleanup_expired!
73
+ expired_keys = @data.select { |_k, v| v.expired? }.keys
74
+ expired_keys.each { |key| delete(key) }
75
+ end
76
+ end
77
+ end
78
+ end
@@ -0,0 +1,127 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "json"
4
+
5
+ module SemanticCache
6
+ module Stores
7
+ # Redis-backed cache store.
8
+ # Suitable for production, multi-process, and distributed apps.
9
+ #
10
+ # Requires the `redis` gem: gem install redis
11
+ class Redis
12
+ def initialize(redis: nil, namespace: nil, **options)
13
+ @namespace = namespace || SemanticCache.configuration.namespace
14
+ @redis = redis || connect(options)
15
+ end
16
+
17
+ # Store a cache entry.
18
+ def write(key, entry)
19
+ full_key = namespaced_key(key)
20
+ data = entry.to_json
21
+
22
+ if entry.ttl
23
+ @redis.setex(full_key, entry.ttl.to_i, data)
24
+ else
25
+ @redis.set(full_key, data)
26
+ end
27
+
28
+ # Maintain tag index
29
+ entry.tags.each do |tag|
30
+ @redis.sadd(tag_key(tag), full_key)
31
+ end
32
+
33
+ # Add to the set of all keys for scanning
34
+ @redis.sadd(keys_set_key, full_key)
35
+ end
36
+
37
+ # Retrieve all non-expired entries.
38
+ # Returns an Array of Entry objects.
39
+ def entries
40
+ keys = @redis.smembers(keys_set_key)
41
+ return [] if keys.nil? || keys.empty?
42
+
43
+ values = @redis.mget(*keys)
44
+ result = []
45
+
46
+ keys.each_with_index do |key, i|
47
+ if values[i].nil?
48
+ # Key expired in Redis; remove from set
49
+ @redis.srem(keys_set_key, key)
50
+ next
51
+ end
52
+
53
+ entry = Entry.from_json(values[i])
54
+ if entry.expired?
55
+ delete_raw(key)
56
+ next
57
+ end
58
+
59
+ result << entry
60
+ end
61
+
62
+ result
63
+ end
64
+
65
+ # Delete a specific entry by key.
66
+ def delete(key)
67
+ delete_raw(namespaced_key(key))
68
+ end
69
+
70
+ # Delete all entries matching the given tags.
71
+ def invalidate_by_tags(tags)
72
+ Array(tags).each do |tag|
73
+ keys = @redis.smembers(tag_key(tag))
74
+ keys&.each { |key| delete_raw(key) }
75
+ @redis.del(tag_key(tag))
76
+ end
77
+ end
78
+
79
+ # Delete all entries.
80
+ def clear
81
+ keys = @redis.smembers(keys_set_key)
82
+ keys&.each { |key| @redis.del(key) }
83
+ @redis.del(keys_set_key)
84
+
85
+ # Clean up tag indices
86
+ tag_keys = @redis.keys("#{@namespace}:tag:*")
87
+ tag_keys.each { |key| @redis.del(key) }
88
+ end
89
+
90
+ # Number of entries in the store.
91
+ def size
92
+ @redis.scard(keys_set_key).to_i
93
+ end
94
+
95
+ private
96
+
97
+ def connect(options)
98
+ require "redis"
99
+ ::Redis.new(**options)
100
+ end
101
+
102
+ def namespaced_key(key)
103
+ "#{@namespace}:entry:#{key}"
104
+ end
105
+
106
+ def tag_key(tag)
107
+ "#{@namespace}:tag:#{tag}"
108
+ end
109
+
110
+ def keys_set_key
111
+ "#{@namespace}:keys"
112
+ end
113
+
114
+ def delete_raw(full_key)
115
+ data = @redis.get(full_key)
116
+ if data
117
+ entry = Entry.from_json(data)
118
+ entry.tags.each do |tag|
119
+ @redis.srem(tag_key(tag), full_key)
120
+ end
121
+ end
122
+ @redis.del(full_key)
123
+ @redis.srem(keys_set_key, full_key)
124
+ end
125
+ end
126
+ end
127
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SemanticCache
4
+ VERSION = "0.1.0"
5
+ end
@@ -0,0 +1,39 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "semantic_cache/version"
4
+ require_relative "semantic_cache/configuration"
5
+ require_relative "semantic_cache/embedding"
6
+ require_relative "semantic_cache/similarity"
7
+ require_relative "semantic_cache/stores/memory"
8
+ require_relative "semantic_cache/stores/redis"
9
+ require_relative "semantic_cache/stats"
10
+ require_relative "semantic_cache/entry"
11
+ require_relative "semantic_cache/cache"
12
+ require_relative "semantic_cache/client_wrapper"
13
+
14
+ module SemanticCache
15
+ class Error < StandardError; end
16
+ class ConfigurationError < Error; end
17
+ class StoreError < Error; end
18
+
19
+ class << self
20
+ attr_writer :configuration
21
+
22
+ def configuration
23
+ @configuration ||= Configuration.new
24
+ end
25
+
26
+ def configure
27
+ yield(configuration)
28
+ end
29
+
30
+ def reset!
31
+ @configuration = Configuration.new
32
+ end
33
+
34
+ # Convenience method to create a new cache instance
35
+ def new(**options)
36
+ Cache.new(**options)
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,43 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "lib/semantic_cache/version"
4
+
5
+ Gem::Specification.new do |spec|
6
+ spec.name = "semantic-cache"
7
+ spec.version = SemanticCache::VERSION
8
+ spec.authors = ["stokry"]
9
+ spec.email = ["stokry@users.noreply.github.com"]
10
+
11
+ spec.summary = "Semantic caching for LLM API calls — save 70%+ on costs"
12
+ spec.description = "Cache LLM responses using semantic similarity matching. " \
13
+ "Similar questions return cached answers instantly, " \
14
+ "cutting API costs by 70% or more. Works with OpenAI, Anthropic, and Gemini."
15
+ spec.homepage = "https://github.com/stokry/semantic-cache"
16
+ spec.license = "MIT"
17
+ spec.required_ruby_version = ">= 3.0.0"
18
+
19
+ spec.metadata["homepage_uri"] = spec.homepage
20
+ spec.metadata["source_code_uri"] = "https://github.com/stokry/semantic-cache"
21
+ spec.metadata["changelog_uri"] = "https://github.com/stokry/semantic-cache/blob/main/CHANGELOG.md"
22
+ spec.metadata["rubygems_mcp_server_uri"] = "https://rubygems.org/gems/semantic-cache"
23
+
24
+ spec.files = Dir.chdir(__dir__) do
25
+ `git ls-files -z`.split("\x0").reject do |f|
26
+ (File.expand_path(f) == __FILE__) ||
27
+ f.start_with?("spec/", "test/", ".git", ".github", "bin/", "examples/")
28
+ end
29
+ end
30
+
31
+ spec.bindir = "exe"
32
+ spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
33
+ spec.require_paths = ["lib"]
34
+
35
+ spec.add_dependency "ruby-openai", "~> 7.0"
36
+
37
+ spec.add_development_dependency "rake", "~> 13.0"
38
+ spec.add_development_dependency "rspec", "~> 3.0"
39
+ spec.add_development_dependency "rubocop", "~> 1.0"
40
+ spec.add_development_dependency "redis", ">= 4.0"
41
+ spec.add_development_dependency "webmock", "~> 3.0"
42
+ spec.add_development_dependency "simplecov", "~> 0.22"
43
+ end