clip-rb 1.0.2 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c6c5fa03ca9061b273aa50792bfa67159b213049f9630466d18ec621459c227f
4
- data.tar.gz: 23e59e48ab413dde5f61fdcce0f83a7ffd7bbe713c4f289840dedec347c4ed4d
3
+ metadata.gz: 2c81810bbac560a3feb1dd2087cd6c6df70b1cf6f2d51786bea6879b323e2492
4
+ data.tar.gz: 2b2aa3b7172df21a0b4cc85e11c9ad9ef5549d1a6c902b20e4ccd45c90ed9c9e
5
5
  SHA512:
6
- metadata.gz: b7ce62a3bbb124e6a5481199a4a56d5ca08b21c0ec71420e9970c543ea320f281cdc820a208087a9122de7d1f61734331346517a97871f24769ca1b77a7206be
7
- data.tar.gz: 02a4f1145d2769e85aa8d415c35bc83a83469196e4d792a022f55f0de22e221bf1b19ddc592c9b05b613eafef2cb19aea91f36550d99acaede5dc05f625ed675
6
+ metadata.gz: a10482e5d8fb4917807fa0a70b6c33a6a51c44af24e156ffc72f2e9e5656295a09c8be38ddbe20b7d58b318f1cd827c04badef8b611f68696e4e35a1c8e32fd6
7
+ data.tar.gz: 41eca7d9401f5d296245a5de0309a99f04408da87f0604c8fbcaf71dbbaa7c3ea6e3042d913f4e36074bf0beeb8c8ca1a7e6ade4d8ffcb87a7c8fbfa6408c3aa
data/README.md CHANGED
@@ -21,6 +21,7 @@ See [neighbor gem](https://github.com/ankane/neighbor) to learn more about vecto
21
21
 
22
22
  - Ruby 3.0.0 or later
23
23
  - ONNX CLIP models (downloaded automatically on first use)
24
+ - XLM Roberta CLIP model (for multilingual support)
24
25
 
25
26
  ---
26
27
 
@@ -54,6 +55,24 @@ image_embedding = clip.encode_image("test/fixtures/test.jpg")
54
55
 
55
56
  💡 Tip: Use cosine similarity for KNN vector search when comparing embeddings!
56
57
 
58
+ ## Multilingual text embeddings
59
+
60
+ Since the original CLIP only supports English embeddings this gem now has added support for multilingual text embeddings using the XLM Roberta model.
61
+
62
+ ```ruby
63
+ require 'clip'
64
+
65
+ clip = Clip::MultilingualModel.new
66
+
67
+ text_embedding = clip.encode_text("un photo de un gato")
68
+ # => [0.15546110272407532, 0.07329428941011429, ...]
69
+
70
+ image_embedding = clip.encode_image("test/fixtures/test.jpg")
71
+ # => [0.22115306556224823, 0.19343754649162292, ...]
72
+ ```
73
+
74
+ ```bash
75
+
57
76
  ## CLI
58
77
 
59
78
  Additionally you can fetch embeddings by calling:
@@ -0,0 +1,46 @@
1
+ require "onnxruntime"
2
+ require "tokenizers"
3
+
4
+ module Clip
5
+ class MultilingualModel
6
+ def initialize(
7
+ textual_model_path: ".clip_models/multilingual/textual.onnx",
8
+ visual_model_path: ".clip_models/multilingual/visual.onnx",
9
+ tokenizer: Tokenizers.from_pretrained("M-CLIP/XLM-Roberta-Large-Vit-B-32"),
10
+ image_preprocessor: Clip::ImagePreprocessor.new,
11
+ download_models: true,
12
+ download_dir: ".clip_models/multilingual"
13
+ )
14
+ @textual_model_path = textual_model_path
15
+ @visual_model_path = visual_model_path
16
+ Clip.download_models(download_dir, Clip::MULTILINGUAL_MODELS) if download_models && !Clip.models_exist?(textual_model_path: textual_model_path, visual_model_path: visual_model_path)
17
+ @tokenizer = tokenizer
18
+ @image_preprocessor = image_preprocessor
19
+ end
20
+
21
+ def encode_text(text)
22
+ encoding = tokenizer.encode(text)
23
+ input_ids = [encoding.ids]
24
+ attention_mask = [Array.new(encoding.ids.size, 1)]
25
+
26
+ text_model.predict({ "input_ids" => input_ids, "attention_mask" => attention_mask })['output'].first
27
+ end
28
+
29
+ def encode_image(image)
30
+ image = image_preprocessor.preprocess(image).to_a
31
+ image_model.predict({ pixel_values: [ image ] })["output"].first
32
+ end
33
+
34
+ def text_model
35
+ @text_model ||= OnnxRuntime::Model.new(textual_model_path)
36
+ end
37
+
38
+ def image_model
39
+ @image_model ||= OnnxRuntime::Model.new(visual_model_path)
40
+ end
41
+
42
+ private
43
+
44
+ attr_reader :textual_model_path, :visual_model_path, :tokenizer, :image_preprocessor
45
+ end
46
+ end
data/lib/clip/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Clip
4
- VERSION = "1.0.2"
4
+ VERSION = "1.1.0"
5
5
  end
data/lib/clip.rb CHANGED
@@ -1,7 +1,9 @@
1
1
  require_relative "clip/model"
2
+ require_relative "clip/multilingual_model"
2
3
  require_relative "clip/tokenizer"
3
4
  require_relative "clip/image_preprocessor"
4
5
  require "net/http"
6
+ require "uri"
5
7
  require "fileutils"
6
8
  require "logger"
7
9
 
@@ -10,36 +12,51 @@ module Clip
10
12
 
11
13
  BASE_URL = "https://huggingface.co/khasinski/"
12
14
  MODELS = {
13
- textual: "clip-ViT-B-32-onnx/resolve/main/textual.onnx?download=true",
14
- visual: "clip-ViT-B-32-onnx/resolve/main/visual.onnx?download=true"
15
+ "textual.onnx" => "clip-ViT-B-32-onnx/resolve/main/textual.onnx?download=true",
16
+ "visual.onnx" => "clip-ViT-B-32-onnx/resolve/main/visual.onnx?download=true"
15
17
  }
16
18
 
17
- def self.download_models(download_dir)
19
+ MULTILINGUAL_MODELS = {
20
+ "textual.onnx" => "XLM-Roberta-Large-Vit-B-32-onnx/resolve/main/textual.onnx?download=true",
21
+ "data.bin" => "XLM-Roberta-Large-Vit-B-32-onnx/resolve/main/data.bin?download=true",
22
+ "visual.onnx" => "XLM-Roberta-Large-Vit-B-32-onnx/resolve/main/visual.onnx?download=true"
23
+ }
24
+
25
+ def self.download_models(download_dir, models = MODELS)
18
26
  logger ||= Logger.new(STDOUT)
19
27
  FileUtils.mkdir_p(download_dir)
20
28
 
21
- MODELS.each do |type, path|
29
+ models.each do |filename, path|
22
30
  uri = URI.join(BASE_URL, path)
23
- logger.info("Downloading #{type} model from #{uri}")
24
-
25
- while true
26
- response = Net::HTTP.get_response(uri)
27
-
28
- if response.is_a?(Net::HTTPRedirection)
29
- logger.info("Redirected to #{response['location']}")
30
- uri = URI.parse(response['location']) # Update URI to the redirect location
31
- next
32
- elsif response.is_a?(Net::HTTPSuccess)
33
- file_path = File.join(download_dir, "#{type}.onnx")
34
- File.open(file_path, 'wb') do |file|
35
- file.write(response.body) # Write the body directly for simplicity
31
+ logger.info("Downloading #{filename} model from #{uri}")
32
+
33
+ self.download_file(uri.to_s, File.join(download_dir, filename))
34
+ end
35
+ end
36
+
37
+ def self.download_file(url, destination, limit = 10)
38
+ raise "Too many HTTP redirects" if limit == 0
39
+
40
+ uri = URI.parse(url)
41
+ http = Net::HTTP.new(uri.host, uri.port)
42
+ http.use_ssl = (uri.scheme == 'https')
43
+
44
+ request = Net::HTTP::Get.new(uri.request_uri)
45
+
46
+ http.request(request) do |response|
47
+ case response
48
+ when Net::HTTPRedirection
49
+ new_url = response['location']
50
+ self.download_file(new_url, destination, limit - 1)
51
+ when Net::HTTPSuccess
52
+ File.open(destination, 'wb') do |file|
53
+ response.read_body do |chunk|
54
+ file.write(chunk)
36
55
  end
37
- logger.info("Successfully downloaded #{type} model")
38
- break
39
- else
40
- logger.error("Failed to download #{type} model from #{uri}: #{response.code} #{response.message}")
41
- raise "Failed to download #{type} model from #{uri}"
42
56
  end
57
+ puts "Downloaded #{url} to #{destination}"
58
+ else
59
+ raise "Failed to download file: #{response.code} #{response.message}"
43
60
  end
44
61
  end
45
62
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: clip-rb
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.2
4
+ version: 1.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Krzysztof Hasiński
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2025-02-04 00:00:00.000000000 Z
11
+ date: 2025-02-08 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: onnxruntime
@@ -94,6 +94,20 @@ dependencies:
94
94
  - - ">="
95
95
  - !ruby/object:Gem::Version
96
96
  version: '0'
97
+ - !ruby/object:Gem::Dependency
98
+ name: tokenizers
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - ">="
102
+ - !ruby/object:Gem::Version
103
+ version: '0'
104
+ type: :runtime
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - ">="
109
+ - !ruby/object:Gem::Version
110
+ version: '0'
97
111
  description: OpenAI CLIP embeddings, uses ONNX models. Allows to create embeddings
98
112
  for images and text
99
113
  email:
@@ -117,6 +131,7 @@ files:
117
131
  - lib/clip.rb
118
132
  - lib/clip/image_preprocessor.rb
119
133
  - lib/clip/model.rb
134
+ - lib/clip/multilingual_model.rb
120
135
  - lib/clip/tokenizer.rb
121
136
  - lib/clip/version.rb
122
137
  - sig/clip.rbs