RubyGems - semantic-cache - Versions diffs - 0.1.0 - Mend

semantic-cache 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

checksums.yaml +7 -0
data/CHANGELOG.md +19 -0
data/Gemfile +9 -0
data/LICENSE +21 -0
data/README.md +307 -0
data/Rakefile +8 -0
data/lib/semantic_cache/cache.rb +187 -0
data/lib/semantic_cache/client_wrapper.rb +66 -0
data/lib/semantic_cache/configuration.rb +50 -0
data/lib/semantic_cache/embedding.rb +45 -0
data/lib/semantic_cache/entry.rb +62 -0
data/lib/semantic_cache/rails.rb +76 -0
data/lib/semantic_cache/similarity.rb +31 -0
data/lib/semantic_cache/stats.rb +108 -0
data/lib/semantic_cache/stores/memory.rb +78 -0
data/lib/semantic_cache/stores/redis.rb +127 -0
data/lib/semantic_cache/version.rb +5 -0
data/lib/semantic_cache.rb +39 -0
data/semantic_cache.gemspec +43 -0
metadata +162 -0

data/lib/semantic_cache/embedding.rb ADDED Viewed

@@ -0,0 +1,45 @@
+# frozen_string_literal: true
+require "openai"
+module SemanticCache
+  class Embedding
+    def initialize(model: nil, api_key: nil)
+      config = SemanticCache.configuration
+      @model = model || config.embedding_model
+      @client = OpenAI::Client.new(access_token: api_key || config.openai_api_key)
+    end
+    # Generate an embedding vector for the given text.
+    # Returns an Array of Floats.
+    def generate(text)
+      response = @client.embeddings(
+        parameters: {
+          model: @model,
+          input: text
+        }
+      )
+      data = response.dig("data", 0, "embedding")
+      raise Error, "Failed to generate embedding: #{response}" if data.nil?
+      data
+    end
+    # Generate embeddings for multiple texts in a single API call.
+    # Returns an Array of Arrays of Floats.
+    def generate_batch(texts)
+      response = @client.embeddings(
+        parameters: {
+          model: @model,
+          input: texts
+        }
+      )
+      data = response["data"]
+      raise Error, "Failed to generate embeddings: #{response}" if data.nil?
+      data.sort_by { |d| d["index"] }.map { |d| d["embedding"] }
+    end
+  end
+end

data/lib/semantic_cache/entry.rb ADDED Viewed

@@ -0,0 +1,62 @@
+# frozen_string_literal: true
+require "json"
+module SemanticCache
+  class Entry
+    attr_reader :query, :embedding, :response, :model, :tags, :created_at, :ttl, :metadata
+    def initialize(query:, embedding:, response:, model: nil, tags: [], ttl: nil, metadata: {})
+      @query = query
+      @embedding = embedding
+      @response = response
+      @model = model
+      @tags = Array(tags)
+      @created_at = Time.now.to_f
+      @ttl = ttl
+      @metadata = metadata
+    end
+    def expired?
+      return false if @ttl.nil?
+      Time.now.to_f - @created_at > @ttl
+    end
+    def to_h
+      {
+        query: @query,
+        embedding: @embedding,
+        response: @response,
+        model: @model,
+        tags: @tags,
+        created_at: @created_at,
+        ttl: @ttl,
+        metadata: @metadata
+      }
+    end
+    def to_json(*args)
+      to_h.to_json(*args)
+    end
+    def self.from_h(hash)
+      hash = hash.transform_keys(&:to_sym) if hash.is_a?(Hash)
+      entry = new(
+        query: hash[:query],
+        embedding: hash[:embedding],
+        response: hash[:response],
+        model: hash[:model],
+        tags: hash[:tags] || [],
+        ttl: hash[:ttl],
+        metadata: hash[:metadata] || {}
+      )
+      entry.instance_variable_set(:@created_at, hash[:created_at]) if hash[:created_at]
+      entry
+    end
+    def self.from_json(json_string)
+      from_h(JSON.parse(json_string))
+    end
+  end
+end

data/lib/semantic_cache/rails.rb ADDED Viewed

@@ -0,0 +1,76 @@
+# frozen_string_literal: true
+require "semantic_cache"
+module SemanticCache
+  # Rails integration for SemanticCache.
+  #
+  # In your Gemfile:
+  #   gem "semantic-cache"
+  #
+  # Then require in an initializer:
+  #   require "semantic_cache/rails"
+  #
+  # Usage in controllers:
+  #
+  #   class ChatController < ApplicationController
+  #     include SemanticCache::Cacheable
+  #
+  #     cache_ai_calls only: [:create], ttl: 1.hour
+  #
+  #     def create
+  #       response = SemanticCache.current.fetch(params[:message]) do
+  #         openai_client.chat(messages: [{ role: "user", content: params[:message] }])
+  #       end
+  #       render json: { response: response }
+  #     end
+  #   end
+  #
+  # Or with per-user namespacing:
+  #
+  #   class ApplicationController < ActionController::Base
+  #     around_action :with_semantic_cache
+  #
+  #     private
+  #
+  #     def with_semantic_cache
+  #       SemanticCache.with_cache(namespace: "user_#{current_user.id}") do
+  #         yield
+  #       end
+  #     end
+  #   end
+  #
+  module Cacheable
+    def self.included(base)
+      base.extend(ClassMethods)
+    end
+    module ClassMethods
+      def cache_ai_calls(only: nil, except: nil, ttl: nil, namespace: nil)
+        actions = { only: only, except: except }.compact
+        around_action(**actions) do |_controller, block|
+          SemanticCache.with_cache(namespace: namespace, default_ttl: ttl) do
+            block.call
+          end
+        end
+      end
+    end
+  end
+  class << self
+    # Thread-local cache instance for use within a request.
+    def current
+      Thread.current[:semantic_cache_instance]
+    end
+    # Set a cache instance for the current thread/request scope.
+    def with_cache(namespace: nil, **options)
+      previous = Thread.current[:semantic_cache_instance]
+      Thread.current[:semantic_cache_instance] = Cache.new(namespace: namespace, **options)
+      yield
+    ensure
+      Thread.current[:semantic_cache_instance] = previous
+    end
+  end
+end

data/lib/semantic_cache/similarity.rb ADDED Viewed

@@ -0,0 +1,31 @@
+# frozen_string_literal: true
+module SemanticCache
+  module Similarity
+    module_function
+    # Compute cosine similarity between two vectors.
+    # Returns a Float between -1.0 and 1.0.
+    def cosine(vec_a, vec_b)
+      return 0.0 if vec_a.empty? || vec_b.empty?
+      raise ArgumentError, "Vectors must be the same length" unless vec_a.length == vec_b.length
+      dot_product = 0.0
+      magnitude_a = 0.0
+      magnitude_b = 0.0
+      vec_a.length.times do |i|
+        a = vec_a[i]
+        b = vec_b[i]
+        dot_product += a * b
+        magnitude_a += a * a
+        magnitude_b += b * b
+      end
+      denominator = Math.sqrt(magnitude_a) * Math.sqrt(magnitude_b)
+      return 0.0 if denominator.zero?
+      dot_product / denominator
+    end
+  end
+end

data/lib/semantic_cache/stats.rb ADDED Viewed

@@ -0,0 +1,108 @@
+# frozen_string_literal: true
+require "monitor"
+module SemanticCache
+  class Stats
+    include MonitorMixin
+    attr_reader :hits, :misses, :total_savings, :last_event
+    def initialize
+      super() # Initialize MonitorMixin
+      @hits = 0
+      @misses = 0
+      @total_savings = 0.0
+      @last_event = nil
+      @response_times = []
+      @cached_response_times = []
+    end
+    def record_hit(saved_cost: 0.0, response_time: nil)
+      synchronize do
+        @hits += 1
+        @total_savings += saved_cost
+        @last_event = :hit
+        @cached_response_times << response_time if response_time
+      end
+    end
+    def record_miss(response_time: nil)
+      synchronize do
+        @misses += 1
+        @last_event = :miss
+        @response_times << response_time if response_time
+      end
+    end
+    def total_queries
+      synchronize { @hits + @misses }
+    end
+    def hit_rate
+      synchronize do
+        total = @hits + @misses
+        return 0.0 if total.zero?
+        (@hits.to_f / total * 100).round(1)
+      end
+    end
+    def avg_response_time
+      synchronize do
+        return 0.0 if @response_times.empty?
+        (@response_times.sum / @response_times.length).round(2)
+      end
+    end
+    def avg_cached_response_time
+      synchronize do
+        return 0.0 if @cached_response_times.empty?
+        (@cached_response_times.sum / @cached_response_times.length).round(2)
+      end
+    end
+    def to_h
+      synchronize do
+        {
+          hits: @hits,
+          misses: @misses,
+          total_queries: @hits + @misses,
+          hit_rate: hit_rate,
+          savings: format("$%.2f", @total_savings),
+          total_savings: @total_savings,
+          last_hit: @last_event == :hit,
+          avg_response_time_ms: avg_response_time,
+          avg_cached_response_time_ms: avg_cached_response_time
+        }
+      end
+    end
+    def report
+      synchronize do
+        lines = []
+        lines << "Total queries: #{@hits + @misses}"
+        lines << "Cache hits: #{@hits}"
+        lines << "Cache misses: #{@misses}"
+        lines << "Hit rate: #{hit_rate}%"
+        lines << "Total savings: #{format("$%.2f", @total_savings)}"
+        lines << "Avg API response time: #{avg_response_time}ms" unless @response_times.empty?
+        lines << "Avg cached response time: #{avg_cached_response_time}ms" unless @cached_response_times.empty?
+        lines.join("\n")
+      end
+    end
+    def reset!
+      synchronize do
+        @hits = 0
+        @misses = 0
+        @total_savings = 0.0
+        @last_event = nil
+        @response_times = []
+        @cached_response_times = []
+      end
+    end
+  end
+end

data/lib/semantic_cache/stores/memory.rb ADDED Viewed

@@ -0,0 +1,78 @@
+# frozen_string_literal: true
+require "monitor"
+module SemanticCache
+  module Stores
+    # Thread-safe in-memory cache store.
+    # Good for development, testing, and single-process apps.
+    class Memory
+      include MonitorMixin
+      def initialize(**_options)
+        super()
+        @data = {}
+        @tags_index = Hash.new { |h, k| h[k] = Set.new }
+      end
+      # Store a cache entry.
+      def write(key, entry)
+        synchronize do
+          @data[key] = entry
+          entry.tags.each { |tag| @tags_index[tag].add(key) }
+        end
+      end
+      # Retrieve all non-expired entries.
+      # Returns an Array of Entry objects.
+      def entries
+        synchronize do
+          cleanup_expired!
+          @data.values
+        end
+      end
+      # Delete a specific entry by key.
+      def delete(key)
+        synchronize do
+          entry = @data.delete(key)
+          entry&.tags&.each { |tag| @tags_index[tag].delete(key) }
+        end
+      end
+      # Delete all entries matching the given tags.
+      def invalidate_by_tags(tags)
+        synchronize do
+          Array(tags).each do |tag|
+            keys = @tags_index[tag].to_a
+            keys.each { |key| @data.delete(key) }
+            @tags_index.delete(tag)
+          end
+        end
+      end
+      # Delete all entries.
+      def clear
+        synchronize do
+          @data.clear
+          @tags_index.clear
+        end
+      end
+      # Number of entries in the store.
+      def size
+        synchronize do
+          cleanup_expired!
+          @data.size
+        end
+      end
+      private
+      def cleanup_expired!
+        expired_keys = @data.select { |_k, v| v.expired? }.keys
+        expired_keys.each { |key| delete(key) }
+      end
+    end
+  end
+end

data/lib/semantic_cache/stores/redis.rb ADDED Viewed

@@ -0,0 +1,127 @@
+# frozen_string_literal: true
+require "json"
+module SemanticCache
+  module Stores
+    # Redis-backed cache store.
+    # Suitable for production, multi-process, and distributed apps.
+    #
+    # Requires the `redis` gem: gem install redis
+    class Redis
+      def initialize(redis: nil, namespace: nil, **options)
+        @namespace = namespace || SemanticCache.configuration.namespace
+        @redis = redis || connect(options)
+      end
+      # Store a cache entry.
+      def write(key, entry)
+        full_key = namespaced_key(key)
+        data = entry.to_json
+        if entry.ttl
+          @redis.setex(full_key, entry.ttl.to_i, data)
+        else
+          @redis.set(full_key, data)
+        end
+        # Maintain tag index
+        entry.tags.each do |tag|
+          @redis.sadd(tag_key(tag), full_key)
+        end
+        # Add to the set of all keys for scanning
+        @redis.sadd(keys_set_key, full_key)
+      end
+      # Retrieve all non-expired entries.
+      # Returns an Array of Entry objects.
+      def entries
+        keys = @redis.smembers(keys_set_key)
+        return [] if keys.nil? || keys.empty?
+        values = @redis.mget(*keys)
+        result = []
+        keys.each_with_index do |key, i|
+          if values[i].nil?
+            # Key expired in Redis; remove from set
+            @redis.srem(keys_set_key, key)
+            next
+          end
+          entry = Entry.from_json(values[i])
+          if entry.expired?
+            delete_raw(key)
+            next
+          end
+          result << entry
+        end
+        result
+      end
+      # Delete a specific entry by key.
+      def delete(key)
+        delete_raw(namespaced_key(key))
+      end
+      # Delete all entries matching the given tags.
+      def invalidate_by_tags(tags)
+        Array(tags).each do |tag|
+          keys = @redis.smembers(tag_key(tag))
+          keys&.each { |key| delete_raw(key) }
+          @redis.del(tag_key(tag))
+        end
+      end
+      # Delete all entries.
+      def clear
+        keys = @redis.smembers(keys_set_key)
+        keys&.each { |key| @redis.del(key) }
+        @redis.del(keys_set_key)
+        # Clean up tag indices
+        tag_keys = @redis.keys("#{@namespace}:tag:*")
+        tag_keys.each { |key| @redis.del(key) }
+      end
+      # Number of entries in the store.
+      def size
+        @redis.scard(keys_set_key).to_i
+      end
+      private
+      def connect(options)
+        require "redis"
+        ::Redis.new(**options)
+      end
+      def namespaced_key(key)
+        "#{@namespace}:entry:#{key}"
+      end
+      def tag_key(tag)
+        "#{@namespace}:tag:#{tag}"
+      end
+      def keys_set_key
+        "#{@namespace}:keys"
+      end
+      def delete_raw(full_key)
+        data = @redis.get(full_key)
+        if data
+          entry = Entry.from_json(data)
+          entry.tags.each do |tag|
+            @redis.srem(tag_key(tag), full_key)
+          end
+        end
+        @redis.del(full_key)
+        @redis.srem(keys_set_key, full_key)
+      end
+    end
+  end
+end

data/lib/semantic_cache/version.rb ADDED Viewed

@@ -0,0 +1,5 @@
+# frozen_string_literal: true
+module SemanticCache
+  VERSION = "0.1.0"
+end

data/lib/semantic_cache.rb ADDED Viewed

@@ -0,0 +1,39 @@
+# frozen_string_literal: true
+require_relative "semantic_cache/version"
+require_relative "semantic_cache/configuration"
+require_relative "semantic_cache/embedding"
+require_relative "semantic_cache/similarity"
+require_relative "semantic_cache/stores/memory"
+require_relative "semantic_cache/stores/redis"
+require_relative "semantic_cache/stats"
+require_relative "semantic_cache/entry"
+require_relative "semantic_cache/cache"
+require_relative "semantic_cache/client_wrapper"
+module SemanticCache
+  class Error < StandardError; end
+  class ConfigurationError < Error; end
+  class StoreError < Error; end
+  class << self
+    attr_writer :configuration
+    def configuration
+      @configuration ||= Configuration.new
+    end
+    def configure
+      yield(configuration)
+    end
+    def reset!
+      @configuration = Configuration.new
+    end
+    # Convenience method to create a new cache instance
+    def new(**options)
+      Cache.new(**options)
+    end
+  end
+end

data/semantic_cache.gemspec ADDED Viewed

@@ -0,0 +1,43 @@
+# frozen_string_literal: true
+require_relative "lib/semantic_cache/version"
+Gem::Specification.new do |spec|
+  spec.name = "semantic-cache"
+  spec.version = SemanticCache::VERSION
+  spec.authors = ["stokry"]
+  spec.email = ["stokry@users.noreply.github.com"]
+  spec.summary = "Semantic caching for LLM API calls — save 70%+ on costs"
+  spec.description = "Cache LLM responses using semantic similarity matching. " \
+                     "Similar questions return cached answers instantly, " \
+                     "cutting API costs by 70% or more. Works with OpenAI, Anthropic, and Gemini."
+  spec.homepage = "https://github.com/stokry/semantic-cache"
+  spec.license = "MIT"
+  spec.required_ruby_version = ">= 3.0.0"
+  spec.metadata["homepage_uri"] = spec.homepage
+  spec.metadata["source_code_uri"] = "https://github.com/stokry/semantic-cache"
+  spec.metadata["changelog_uri"] = "https://github.com/stokry/semantic-cache/blob/main/CHANGELOG.md"
+  spec.metadata["rubygems_mcp_server_uri"] = "https://rubygems.org/gems/semantic-cache"
+  spec.files = Dir.chdir(__dir__) do
+    `git ls-files -z`.split("\x0").reject do |f|
+      (File.expand_path(f) == __FILE__) ||
+        f.start_with?("spec/", "test/", ".git", ".github", "bin/", "examples/")
+    end
+  end
+  spec.bindir = "exe"
+  spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
+  spec.require_paths = ["lib"]
+  spec.add_dependency "ruby-openai", "~> 7.0"
+  spec.add_development_dependency "rake", "~> 13.0"
+  spec.add_development_dependency "rspec", "~> 3.0"
+  spec.add_development_dependency "rubocop", "~> 1.0"
+  spec.add_development_dependency "redis", ">= 4.0"
+  spec.add_development_dependency "webmock", "~> 3.0"
+  spec.add_development_dependency "simplecov", "~> 0.22"
+end