RubyGems - clip-rb - Versions diffs - 1.0.1 → 1.1.0 - Mend

clip-rb 1.0.1 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

checksums.yaml +4 -4
data/README.md +19 -0
data/lib/clip/multilingual_model.rb +46 -0
data/lib/clip/version.rb +1 -1
data/lib/clip.rb +39 -22
metadata +17 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 439cf05cf7254fd365e7e92bf9850459f7d89955e806ac8f05e15a7f150a06ca
-  data.tar.gz: 686958e82bec0e3492d5c41d2466537cadf2b325f7b4cc3f46c8d57adcc0bd5d
+  metadata.gz: 2c81810bbac560a3feb1dd2087cd6c6df70b1cf6f2d51786bea6879b323e2492
+  data.tar.gz: 2b2aa3b7172df21a0b4cc85e11c9ad9ef5549d1a6c902b20e4ccd45c90ed9c9e
 SHA512:
-  metadata.gz: 0562c28111e9ec9b57f177972d3caf2e07f571b04143b3dbb24a38ac33d2ae0fcf40429985ac7bfa27ea165808643c72b9358957666353728e6ad8c06d2f5ef4
-  data.tar.gz: eda7f529de010b88b59da53ae700dbc2bb0ca7bd4cf2c1a3dc551186a67d43e6ddb54bb77dac9259af019ed8843e9345461f4b0c12fce9e118c29c4c8f4c7410
+  metadata.gz: a10482e5d8fb4917807fa0a70b6c33a6a51c44af24e156ffc72f2e9e5656295a09c8be38ddbe20b7d58b318f1cd827c04badef8b611f68696e4e35a1c8e32fd6
+  data.tar.gz: 41eca7d9401f5d296245a5de0309a99f04408da87f0604c8fbcaf71dbbaa7c3ea6e3042d913f4e36074bf0beeb8c8ca1a7e6ade4d8ffcb87a7c8fbfa6408c3aa

data/README.md CHANGED Viewed

@@ -21,6 +21,7 @@ See [neighbor gem](https://github.com/ankane/neighbor) to learn more about vecto
 - Ruby 3.0.0 or later
 - ONNX CLIP models (downloaded automatically on first use)
+- XLM Roberta CLIP model (for multilingual support)
 ---
@@ -54,6 +55,24 @@ image_embedding = clip.encode_image("test/fixtures/test.jpg")
 💡 Tip: Use cosine similarity for KNN vector search when comparing embeddings!
+## Multilingual text embeddings
+Since the original CLIP only supports English embeddings this gem now has added support for multilingual text embeddings using the XLM Roberta model.
+```ruby
+require 'clip'
+clip = Clip::MultilingualModel.new
+text_embedding = clip.encode_text("un photo de un gato")
+# => [0.15546110272407532, 0.07329428941011429, ...]
+image_embedding = clip.encode_image("test/fixtures/test.jpg")
+# => [0.22115306556224823, 0.19343754649162292, ...]
+```
+```bash
 ## CLI
 Additionally you can fetch embeddings by calling:

data/lib/clip/multilingual_model.rb ADDED Viewed

@@ -0,0 +1,46 @@
+require "onnxruntime"
+require "tokenizers"
+module Clip
+  class MultilingualModel
+    def initialize(
+      textual_model_path: ".clip_models/multilingual/textual.onnx",
+      visual_model_path: ".clip_models/multilingual/visual.onnx",
+      tokenizer: Tokenizers.from_pretrained("M-CLIP/XLM-Roberta-Large-Vit-B-32"),
+      image_preprocessor: Clip::ImagePreprocessor.new,
+      download_models: true,
+      download_dir: ".clip_models/multilingual"
+    )
+      @textual_model_path = textual_model_path
+      @visual_model_path = visual_model_path
+      Clip.download_models(download_dir, Clip::MULTILINGUAL_MODELS) if download_models && !Clip.models_exist?(textual_model_path: textual_model_path, visual_model_path: visual_model_path)
+      @tokenizer = tokenizer
+      @image_preprocessor = image_preprocessor
+    end
+    def encode_text(text)
+      encoding  = tokenizer.encode(text)
+      input_ids      = [encoding.ids]
+      attention_mask = [Array.new(encoding.ids.size, 1)]
+      text_model.predict({ "input_ids" => input_ids, "attention_mask" => attention_mask })['output'].first
+    end
+    def encode_image(image)
+      image = image_preprocessor.preprocess(image).to_a
+      image_model.predict({ pixel_values: [ image ] })["output"].first
+    end
+    def text_model
+      @text_model ||= OnnxRuntime::Model.new(textual_model_path)
+    end
+    def image_model
+      @image_model ||= OnnxRuntime::Model.new(visual_model_path)
+    end
+    private
+    attr_reader :textual_model_path, :visual_model_path, :tokenizer, :image_preprocessor
+  end
+end

data/lib/clip/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # frozen_string_literal: true
 module Clip
-  VERSION = "1.0.1"
+  VERSION = "1.1.0"
 end

data/lib/clip.rb CHANGED Viewed

@@ -1,7 +1,9 @@
 require_relative "clip/model"
+require_relative "clip/multilingual_model"
 require_relative "clip/tokenizer"
 require_relative "clip/image_preprocessor"
 require "net/http"
+require "uri"
 require "fileutils"
 require "logger"
@@ -10,36 +12,51 @@ module Clip
   BASE_URL = "https://huggingface.co/khasinski/"
   MODELS = {
-    textual: "clip-ViT-B-32-onnx/resolve/main/textual.onnx?download=true",
-    visual: "clip-ViT-B-32-onnx/resolve/main/visual.onnx?download=true"
+    "textual.onnx" => "clip-ViT-B-32-onnx/resolve/main/textual.onnx?download=true",
+    "visual.onnx" => "clip-ViT-B-32-onnx/resolve/main/visual.onnx?download=true"
   }
-  def self.download_models(download_dir)
+  MULTILINGUAL_MODELS = {
+    "textual.onnx" => "XLM-Roberta-Large-Vit-B-32-onnx/resolve/main/textual.onnx?download=true",
+    "data.bin" => "XLM-Roberta-Large-Vit-B-32-onnx/resolve/main/data.bin?download=true",
+    "visual.onnx" => "XLM-Roberta-Large-Vit-B-32-onnx/resolve/main/visual.onnx?download=true"
+  }
+  def self.download_models(download_dir, models = MODELS)
     logger ||= Logger.new(STDOUT)
     FileUtils.mkdir_p(download_dir)
-    MODELS.each do |type, path|
+    models.each do |filename, path|
       uri = URI.join(BASE_URL, path)
-      logger.info("Downloading #{type} model from #{uri}")
-      while true
-        response = Net::HTTP.get_response(uri)
-        if response.is_a?(Net::HTTPRedirection)
-          logger.info("Redirected to #{response['location']}")
-          uri = URI.parse(response['location']) # Update URI to the redirect location
-          next
-        elsif response.is_a?(Net::HTTPSuccess)
-          file_path = File.join(download_dir, "#{type}.onnx")
-          File.open(file_path, 'wb') do |file|
-            file.write(response.body) # Write the body directly for simplicity
+      logger.info("Downloading #{filename} model from #{uri}")
+      self.download_file(uri.to_s, File.join(download_dir, filename))
+    end
+  end
+  def self.download_file(url, destination, limit = 10)
+    raise "Too many HTTP redirects" if limit == 0
+    uri = URI.parse(url)
+    http = Net::HTTP.new(uri.host, uri.port)
+    http.use_ssl = (uri.scheme == 'https')
+    request = Net::HTTP::Get.new(uri.request_uri)
+    http.request(request) do |response|
+      case response
+      when Net::HTTPRedirection
+        new_url = response['location']
+        self.download_file(new_url, destination, limit - 1)
+      when Net::HTTPSuccess
+        File.open(destination, 'wb') do |file|
+          response.read_body do |chunk|
+            file.write(chunk)
           end
-          logger.info("Successfully downloaded #{type} model")
-          break
-        else
-          logger.error("Failed to download #{type} model from #{uri}: #{response.code} #{response.message}")
-          raise "Failed to download #{type} model from #{uri}"
         end
+        puts "Downloaded #{url} to #{destination}"
+      else
+        raise "Failed to download file: #{response.code} #{response.message}"
       end
     end
   end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: clip-rb
 version: !ruby/object:Gem::Version
-  version: 1.0.1
+  version: 1.1.0
 platform: ruby
 authors:
 - Krzysztof Hasiński
 autorequire:
 bindir: exe
 cert_chain: []
-date: 2025-02-01 00:00:00.000000000 Z
+date: 2025-02-08 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: onnxruntime
@@ -94,6 +94,20 @@ dependencies:
     - - ">="
       - !ruby/object:Gem::Version
         version: '0'
+- !ruby/object:Gem::Dependency
+  name: tokenizers
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: '0'
+  type: :runtime
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: '0'
 description: OpenAI CLIP embeddings, uses ONNX models. Allows to create embeddings
   for images and text
 email:
@@ -117,6 +131,7 @@ files:
 - lib/clip.rb
 - lib/clip/image_preprocessor.rb
 - lib/clip/model.rb
+- lib/clip/multilingual_model.rb
 - lib/clip/tokenizer.rb
 - lib/clip/version.rb
 - sig/clip.rbs