clip-rb 1.0.1 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +19 -0
- data/lib/clip/multilingual_model.rb +46 -0
- data/lib/clip/version.rb +1 -1
- data/lib/clip.rb +39 -22
- metadata +17 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2c81810bbac560a3feb1dd2087cd6c6df70b1cf6f2d51786bea6879b323e2492
|
4
|
+
data.tar.gz: 2b2aa3b7172df21a0b4cc85e11c9ad9ef5549d1a6c902b20e4ccd45c90ed9c9e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a10482e5d8fb4917807fa0a70b6c33a6a51c44af24e156ffc72f2e9e5656295a09c8be38ddbe20b7d58b318f1cd827c04badef8b611f68696e4e35a1c8e32fd6
|
7
|
+
data.tar.gz: 41eca7d9401f5d296245a5de0309a99f04408da87f0604c8fbcaf71dbbaa7c3ea6e3042d913f4e36074bf0beeb8c8ca1a7e6ade4d8ffcb87a7c8fbfa6408c3aa
|
data/README.md
CHANGED
@@ -21,6 +21,7 @@ See [neighbor gem](https://github.com/ankane/neighbor) to learn more about vecto
|
|
21
21
|
|
22
22
|
- Ruby 3.0.0 or later
|
23
23
|
- ONNX CLIP models (downloaded automatically on first use)
|
24
|
+
- XLM Roberta CLIP model (for multilingual support)
|
24
25
|
|
25
26
|
---
|
26
27
|
|
@@ -54,6 +55,24 @@ image_embedding = clip.encode_image("test/fixtures/test.jpg")
|
|
54
55
|
|
55
56
|
💡 Tip: Use cosine similarity for KNN vector search when comparing embeddings!
|
56
57
|
|
58
|
+
## Multilingual text embeddings
|
59
|
+
|
60
|
+
Since the original CLIP only supports English embeddings this gem now has added support for multilingual text embeddings using the XLM Roberta model.
|
61
|
+
|
62
|
+
```ruby
|
63
|
+
require 'clip'
|
64
|
+
|
65
|
+
clip = Clip::MultilingualModel.new
|
66
|
+
|
67
|
+
text_embedding = clip.encode_text("un photo de un gato")
|
68
|
+
# => [0.15546110272407532, 0.07329428941011429, ...]
|
69
|
+
|
70
|
+
image_embedding = clip.encode_image("test/fixtures/test.jpg")
|
71
|
+
# => [0.22115306556224823, 0.19343754649162292, ...]
|
72
|
+
```
|
73
|
+
|
74
|
+
```bash
|
75
|
+
|
57
76
|
## CLI
|
58
77
|
|
59
78
|
Additionally you can fetch embeddings by calling:
|
@@ -0,0 +1,46 @@
|
|
1
|
+
require "onnxruntime"
|
2
|
+
require "tokenizers"
|
3
|
+
|
4
|
+
module Clip
|
5
|
+
class MultilingualModel
|
6
|
+
def initialize(
|
7
|
+
textual_model_path: ".clip_models/multilingual/textual.onnx",
|
8
|
+
visual_model_path: ".clip_models/multilingual/visual.onnx",
|
9
|
+
tokenizer: Tokenizers.from_pretrained("M-CLIP/XLM-Roberta-Large-Vit-B-32"),
|
10
|
+
image_preprocessor: Clip::ImagePreprocessor.new,
|
11
|
+
download_models: true,
|
12
|
+
download_dir: ".clip_models/multilingual"
|
13
|
+
)
|
14
|
+
@textual_model_path = textual_model_path
|
15
|
+
@visual_model_path = visual_model_path
|
16
|
+
Clip.download_models(download_dir, Clip::MULTILINGUAL_MODELS) if download_models && !Clip.models_exist?(textual_model_path: textual_model_path, visual_model_path: visual_model_path)
|
17
|
+
@tokenizer = tokenizer
|
18
|
+
@image_preprocessor = image_preprocessor
|
19
|
+
end
|
20
|
+
|
21
|
+
def encode_text(text)
|
22
|
+
encoding = tokenizer.encode(text)
|
23
|
+
input_ids = [encoding.ids]
|
24
|
+
attention_mask = [Array.new(encoding.ids.size, 1)]
|
25
|
+
|
26
|
+
text_model.predict({ "input_ids" => input_ids, "attention_mask" => attention_mask })['output'].first
|
27
|
+
end
|
28
|
+
|
29
|
+
def encode_image(image)
|
30
|
+
image = image_preprocessor.preprocess(image).to_a
|
31
|
+
image_model.predict({ pixel_values: [ image ] })["output"].first
|
32
|
+
end
|
33
|
+
|
34
|
+
def text_model
|
35
|
+
@text_model ||= OnnxRuntime::Model.new(textual_model_path)
|
36
|
+
end
|
37
|
+
|
38
|
+
def image_model
|
39
|
+
@image_model ||= OnnxRuntime::Model.new(visual_model_path)
|
40
|
+
end
|
41
|
+
|
42
|
+
private
|
43
|
+
|
44
|
+
attr_reader :textual_model_path, :visual_model_path, :tokenizer, :image_preprocessor
|
45
|
+
end
|
46
|
+
end
|
data/lib/clip/version.rb
CHANGED
data/lib/clip.rb
CHANGED
@@ -1,7 +1,9 @@
|
|
1
1
|
require_relative "clip/model"
|
2
|
+
require_relative "clip/multilingual_model"
|
2
3
|
require_relative "clip/tokenizer"
|
3
4
|
require_relative "clip/image_preprocessor"
|
4
5
|
require "net/http"
|
6
|
+
require "uri"
|
5
7
|
require "fileutils"
|
6
8
|
require "logger"
|
7
9
|
|
@@ -10,36 +12,51 @@ module Clip
|
|
10
12
|
|
11
13
|
BASE_URL = "https://huggingface.co/khasinski/"
|
12
14
|
MODELS = {
|
13
|
-
textual
|
14
|
-
visual
|
15
|
+
"textual.onnx" => "clip-ViT-B-32-onnx/resolve/main/textual.onnx?download=true",
|
16
|
+
"visual.onnx" => "clip-ViT-B-32-onnx/resolve/main/visual.onnx?download=true"
|
15
17
|
}
|
16
18
|
|
17
|
-
|
19
|
+
MULTILINGUAL_MODELS = {
|
20
|
+
"textual.onnx" => "XLM-Roberta-Large-Vit-B-32-onnx/resolve/main/textual.onnx?download=true",
|
21
|
+
"data.bin" => "XLM-Roberta-Large-Vit-B-32-onnx/resolve/main/data.bin?download=true",
|
22
|
+
"visual.onnx" => "XLM-Roberta-Large-Vit-B-32-onnx/resolve/main/visual.onnx?download=true"
|
23
|
+
}
|
24
|
+
|
25
|
+
def self.download_models(download_dir, models = MODELS)
|
18
26
|
logger ||= Logger.new(STDOUT)
|
19
27
|
FileUtils.mkdir_p(download_dir)
|
20
28
|
|
21
|
-
|
29
|
+
models.each do |filename, path|
|
22
30
|
uri = URI.join(BASE_URL, path)
|
23
|
-
logger.info("Downloading #{
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
31
|
+
logger.info("Downloading #{filename} model from #{uri}")
|
32
|
+
|
33
|
+
self.download_file(uri.to_s, File.join(download_dir, filename))
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
def self.download_file(url, destination, limit = 10)
|
38
|
+
raise "Too many HTTP redirects" if limit == 0
|
39
|
+
|
40
|
+
uri = URI.parse(url)
|
41
|
+
http = Net::HTTP.new(uri.host, uri.port)
|
42
|
+
http.use_ssl = (uri.scheme == 'https')
|
43
|
+
|
44
|
+
request = Net::HTTP::Get.new(uri.request_uri)
|
45
|
+
|
46
|
+
http.request(request) do |response|
|
47
|
+
case response
|
48
|
+
when Net::HTTPRedirection
|
49
|
+
new_url = response['location']
|
50
|
+
self.download_file(new_url, destination, limit - 1)
|
51
|
+
when Net::HTTPSuccess
|
52
|
+
File.open(destination, 'wb') do |file|
|
53
|
+
response.read_body do |chunk|
|
54
|
+
file.write(chunk)
|
36
55
|
end
|
37
|
-
logger.info("Successfully downloaded #{type} model")
|
38
|
-
break
|
39
|
-
else
|
40
|
-
logger.error("Failed to download #{type} model from #{uri}: #{response.code} #{response.message}")
|
41
|
-
raise "Failed to download #{type} model from #{uri}"
|
42
56
|
end
|
57
|
+
puts "Downloaded #{url} to #{destination}"
|
58
|
+
else
|
59
|
+
raise "Failed to download file: #{response.code} #{response.message}"
|
43
60
|
end
|
44
61
|
end
|
45
62
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: clip-rb
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0
|
4
|
+
version: 1.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Krzysztof Hasiński
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2025-02-
|
11
|
+
date: 2025-02-08 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: onnxruntime
|
@@ -94,6 +94,20 @@ dependencies:
|
|
94
94
|
- - ">="
|
95
95
|
- !ruby/object:Gem::Version
|
96
96
|
version: '0'
|
97
|
+
- !ruby/object:Gem::Dependency
|
98
|
+
name: tokenizers
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
100
|
+
requirements:
|
101
|
+
- - ">="
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: '0'
|
104
|
+
type: :runtime
|
105
|
+
prerelease: false
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - ">="
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: '0'
|
97
111
|
description: OpenAI CLIP embeddings, uses ONNX models. Allows to create embeddings
|
98
112
|
for images and text
|
99
113
|
email:
|
@@ -117,6 +131,7 @@ files:
|
|
117
131
|
- lib/clip.rb
|
118
132
|
- lib/clip/image_preprocessor.rb
|
119
133
|
- lib/clip/model.rb
|
134
|
+
- lib/clip/multilingual_model.rb
|
120
135
|
- lib/clip/tokenizer.rb
|
121
136
|
- lib/clip/version.rb
|
122
137
|
- sig/clip.rbs
|