fastembed 1.0.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +1 -0
- data/.yardopts +6 -0
- data/BENCHMARKS.md +124 -1
- data/CHANGELOG.md +14 -0
- data/README.md +395 -74
- data/benchmark/compare_all.rb +167 -0
- data/benchmark/compare_python.py +60 -0
- data/benchmark/memory_profile.rb +70 -0
- data/benchmark/profile.rb +198 -0
- data/benchmark/reranker_benchmark.rb +158 -0
- data/exe/fastembed +6 -0
- data/fastembed.gemspec +3 -0
- data/lib/fastembed/async.rb +193 -0
- data/lib/fastembed/base_model.rb +247 -0
- data/lib/fastembed/base_model_info.rb +61 -0
- data/lib/fastembed/cli.rb +745 -0
- data/lib/fastembed/custom_model_registry.rb +255 -0
- data/lib/fastembed/image_embedding.rb +313 -0
- data/lib/fastembed/late_interaction_embedding.rb +260 -0
- data/lib/fastembed/late_interaction_model_info.rb +91 -0
- data/lib/fastembed/model_info.rb +59 -19
- data/lib/fastembed/model_management.rb +82 -23
- data/lib/fastembed/onnx_embedding_model.rb +25 -4
- data/lib/fastembed/pooling.rb +39 -3
- data/lib/fastembed/progress.rb +52 -0
- data/lib/fastembed/quantization.rb +75 -0
- data/lib/fastembed/reranker_model_info.rb +91 -0
- data/lib/fastembed/sparse_embedding.rb +261 -0
- data/lib/fastembed/sparse_model_info.rb +80 -0
- data/lib/fastembed/text_cross_encoder.rb +217 -0
- data/lib/fastembed/text_embedding.rb +161 -28
- data/lib/fastembed/validators.rb +59 -0
- data/lib/fastembed/version.rb +1 -1
- data/lib/fastembed.rb +42 -1
- data/plan.md +257 -0
- data/scripts/verify_models.rb +229 -0
- metadata +70 -3
|
@@ -0,0 +1,745 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'optparse'
|
|
4
|
+
require 'json'
|
|
5
|
+
require_relative '../fastembed'
|
|
6
|
+
|
|
7
|
+
module Fastembed
|
|
8
|
+
# Command-line interface for fastembed
|
|
9
|
+
class CLI
|
|
10
|
+
FORMATS = %w[json ndjson csv].freeze
|
|
11
|
+
|
|
12
|
+
def initialize(argv)
|
|
13
|
+
@argv = argv
|
|
14
|
+
@options = {
|
|
15
|
+
format: 'json',
|
|
16
|
+
model: 'BAAI/bge-small-en-v1.5',
|
|
17
|
+
sparse_model: 'prithivida/Splade_PP_en_v1',
|
|
18
|
+
reranker_model: 'cross-encoder/ms-marco-MiniLM-L-6-v2',
|
|
19
|
+
batch_size: 256,
|
|
20
|
+
top_k: nil
|
|
21
|
+
}
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def run
|
|
25
|
+
command = parse_global_options
|
|
26
|
+
|
|
27
|
+
case command
|
|
28
|
+
when 'list-models', 'models'
|
|
29
|
+
list_models
|
|
30
|
+
when 'list-rerankers', 'rerankers'
|
|
31
|
+
list_rerankers
|
|
32
|
+
when 'list-sparse'
|
|
33
|
+
list_sparse
|
|
34
|
+
when 'list-image'
|
|
35
|
+
list_image
|
|
36
|
+
when 'embed'
|
|
37
|
+
embed
|
|
38
|
+
when 'sparse-embed'
|
|
39
|
+
sparse_embed
|
|
40
|
+
when 'rerank'
|
|
41
|
+
rerank
|
|
42
|
+
when 'cache'
|
|
43
|
+
cache_command
|
|
44
|
+
when 'download'
|
|
45
|
+
download_command
|
|
46
|
+
when 'info'
|
|
47
|
+
info_command
|
|
48
|
+
when 'benchmark'
|
|
49
|
+
benchmark_command
|
|
50
|
+
when 'version', '-v', '--version'
|
|
51
|
+
puts "fastembed #{Fastembed::VERSION}"
|
|
52
|
+
when 'help', nil
|
|
53
|
+
puts global_help
|
|
54
|
+
else
|
|
55
|
+
warn "Unknown command: #{command}"
|
|
56
|
+
warn global_help
|
|
57
|
+
exit 1
|
|
58
|
+
end
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
private
|
|
62
|
+
|
|
63
|
+
def parse_global_options
|
|
64
|
+
return @argv.shift if @argv.empty? || !@argv.first.start_with?('-')
|
|
65
|
+
|
|
66
|
+
global_parser.order!(@argv)
|
|
67
|
+
@argv.shift
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
def global_parser
|
|
71
|
+
OptionParser.new do |opts|
|
|
72
|
+
opts.banner = 'Usage: fastembed [options] <command> [command-options]'
|
|
73
|
+
opts.on('-v', '--version', 'Show version') do
|
|
74
|
+
puts "fastembed #{Fastembed::VERSION}"
|
|
75
|
+
exit 0
|
|
76
|
+
end
|
|
77
|
+
opts.on('-h', '--help', 'Show help') do
|
|
78
|
+
puts global_help
|
|
79
|
+
exit 0
|
|
80
|
+
end
|
|
81
|
+
end
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
def global_help
|
|
85
|
+
<<~HELP
|
|
86
|
+
fastembed - Fast text embeddings for Ruby
|
|
87
|
+
|
|
88
|
+
Usage: fastembed <command> [options]
|
|
89
|
+
|
|
90
|
+
Commands:
|
|
91
|
+
embed Generate dense embeddings for text
|
|
92
|
+
sparse-embed Generate sparse (SPLADE) embeddings for text
|
|
93
|
+
rerank Rerank documents by relevance to a query
|
|
94
|
+
download Pre-download a model for offline use
|
|
95
|
+
info Show detailed information about a model
|
|
96
|
+
benchmark Run performance benchmarks
|
|
97
|
+
list-models List available dense embedding models
|
|
98
|
+
list-sparse List available sparse embedding models
|
|
99
|
+
list-rerankers List available reranker models
|
|
100
|
+
list-image List available image embedding models
|
|
101
|
+
cache Manage model cache (clear, info)
|
|
102
|
+
version Show version
|
|
103
|
+
help Show this help message
|
|
104
|
+
|
|
105
|
+
Run 'fastembed <command> --help' for command-specific options.
|
|
106
|
+
HELP
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
def list_models
|
|
110
|
+
puts 'Available embedding models:'
|
|
111
|
+
puts
|
|
112
|
+
Fastembed::SUPPORTED_MODELS.each_value do |model|
|
|
113
|
+
puts " #{model.model_name}"
|
|
114
|
+
puts " Dimensions: #{model.dim}"
|
|
115
|
+
puts " Description: #{model.description}"
|
|
116
|
+
puts
|
|
117
|
+
end
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
def list_rerankers
|
|
121
|
+
puts 'Available reranker models:'
|
|
122
|
+
puts
|
|
123
|
+
Fastembed::SUPPORTED_RERANKER_MODELS.each_value do |model|
|
|
124
|
+
puts " #{model.model_name}"
|
|
125
|
+
puts " Size: #{model.size_in_gb} GB"
|
|
126
|
+
puts " Description: #{model.description}"
|
|
127
|
+
puts
|
|
128
|
+
end
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
def list_sparse
|
|
132
|
+
puts 'Available sparse embedding models:'
|
|
133
|
+
puts
|
|
134
|
+
Fastembed::SUPPORTED_SPARSE_MODELS.each_value do |model|
|
|
135
|
+
puts " #{model.model_name}"
|
|
136
|
+
puts " Size: #{model.size_in_gb} GB"
|
|
137
|
+
puts " Description: #{model.description}"
|
|
138
|
+
puts
|
|
139
|
+
end
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
def list_image
|
|
143
|
+
puts 'Available image embedding models:'
|
|
144
|
+
puts
|
|
145
|
+
Fastembed::SUPPORTED_IMAGE_MODELS.each_value do |model|
|
|
146
|
+
puts " #{model.model_name}"
|
|
147
|
+
puts " Dimensions: #{model.dim}"
|
|
148
|
+
puts " Image Size: #{model.image_size}x#{model.image_size}"
|
|
149
|
+
puts " Description: #{model.description}"
|
|
150
|
+
puts
|
|
151
|
+
end
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
def embed
|
|
155
|
+
parse_embed_options
|
|
156
|
+
texts = gather_texts
|
|
157
|
+
|
|
158
|
+
if texts.empty?
|
|
159
|
+
warn 'Error: No text provided. Pass text as arguments, use -i FILE, or pipe through stdin.'
|
|
160
|
+
exit 1
|
|
161
|
+
end
|
|
162
|
+
|
|
163
|
+
show_progress = !@options[:quiet]
|
|
164
|
+
embedding = Fastembed::TextEmbedding.new(model_name: @options[:model], show_progress: show_progress)
|
|
165
|
+
|
|
166
|
+
embeddings = if @options[:show_progress] && !@options[:quiet]
|
|
167
|
+
embed_with_progress(embedding, texts)
|
|
168
|
+
else
|
|
169
|
+
embedding.embed(texts, batch_size: @options[:batch_size]).to_a
|
|
170
|
+
end
|
|
171
|
+
|
|
172
|
+
output_embeddings(texts, embeddings)
|
|
173
|
+
end
|
|
174
|
+
|
|
175
|
+
def parse_embed_options
|
|
176
|
+
OptionParser.new do |opts|
|
|
177
|
+
opts.banner = 'Usage: fastembed embed [options] [text ...]'
|
|
178
|
+
|
|
179
|
+
opts.on('-m', '--model MODEL', 'Model to use (default: BAAI/bge-small-en-v1.5)') do |m|
|
|
180
|
+
@options[:model] = m
|
|
181
|
+
end
|
|
182
|
+
|
|
183
|
+
opts.on('-f', '--format FORMAT', FORMATS, "Output format: #{FORMATS.join(', ')} (default: json)") do |f|
|
|
184
|
+
@options[:format] = f
|
|
185
|
+
end
|
|
186
|
+
|
|
187
|
+
opts.on('-b', '--batch-size SIZE', Integer, 'Batch size (default: 256)') do |b|
|
|
188
|
+
@options[:batch_size] = b
|
|
189
|
+
end
|
|
190
|
+
|
|
191
|
+
opts.on('-i', '--input FILE', 'Read texts from file (one per line)') do |file|
|
|
192
|
+
@options[:input_file] = file
|
|
193
|
+
end
|
|
194
|
+
|
|
195
|
+
opts.on('-q', '--quiet', 'Suppress progress output') do
|
|
196
|
+
@options[:quiet] = true
|
|
197
|
+
end
|
|
198
|
+
|
|
199
|
+
opts.on('-p', '--progress', 'Show progress bar') do
|
|
200
|
+
@options[:show_progress] = true
|
|
201
|
+
end
|
|
202
|
+
|
|
203
|
+
opts.on('-h', '--help', 'Show help') do
|
|
204
|
+
puts opts
|
|
205
|
+
exit 0
|
|
206
|
+
end
|
|
207
|
+
end.parse!(@argv)
|
|
208
|
+
end
|
|
209
|
+
|
|
210
|
+
def sparse_embed
|
|
211
|
+
parse_sparse_embed_options
|
|
212
|
+
texts = gather_texts
|
|
213
|
+
|
|
214
|
+
if texts.empty?
|
|
215
|
+
warn 'Error: No text provided. Pass text as arguments or pipe through stdin.'
|
|
216
|
+
exit 1
|
|
217
|
+
end
|
|
218
|
+
|
|
219
|
+
embedding = Fastembed::TextSparseEmbedding.new(model_name: @options[:sparse_model])
|
|
220
|
+
sparse_embeddings = embedding.embed(texts, batch_size: @options[:batch_size]).to_a
|
|
221
|
+
|
|
222
|
+
output_sparse_embeddings(texts, sparse_embeddings)
|
|
223
|
+
end
|
|
224
|
+
|
|
225
|
+
def parse_sparse_embed_options
|
|
226
|
+
OptionParser.new do |opts|
|
|
227
|
+
opts.banner = 'Usage: fastembed sparse-embed [options] [text ...]'
|
|
228
|
+
|
|
229
|
+
opts.on('-m', '--model MODEL', 'Model to use (default: prithivida/Splade_PP_en_v1)') do |m|
|
|
230
|
+
@options[:sparse_model] = m
|
|
231
|
+
end
|
|
232
|
+
|
|
233
|
+
opts.on('-f', '--format FORMAT', %w[json ndjson], 'Output format: json, ndjson (default: json)') do |f|
|
|
234
|
+
@options[:format] = f
|
|
235
|
+
end
|
|
236
|
+
|
|
237
|
+
opts.on('-b', '--batch-size SIZE', Integer, 'Batch size (default: 256)') do |b|
|
|
238
|
+
@options[:batch_size] = b
|
|
239
|
+
end
|
|
240
|
+
|
|
241
|
+
opts.on('-h', '--help', 'Show help') do
|
|
242
|
+
puts opts
|
|
243
|
+
exit 0
|
|
244
|
+
end
|
|
245
|
+
end.parse!(@argv)
|
|
246
|
+
end
|
|
247
|
+
|
|
248
|
+
def output_sparse_embeddings(texts, embeddings)
|
|
249
|
+
case @options[:format]
|
|
250
|
+
when 'json'
|
|
251
|
+
output_sparse_json(texts, embeddings)
|
|
252
|
+
when 'ndjson'
|
|
253
|
+
output_sparse_ndjson(texts, embeddings)
|
|
254
|
+
end
|
|
255
|
+
end
|
|
256
|
+
|
|
257
|
+
def output_sparse_json(texts, embeddings)
|
|
258
|
+
result = texts.zip(embeddings).map do |text, emb|
|
|
259
|
+
{ text: text, indices: emb.indices, values: emb.values }
|
|
260
|
+
end
|
|
261
|
+
puts JSON.pretty_generate(result)
|
|
262
|
+
end
|
|
263
|
+
|
|
264
|
+
def output_sparse_ndjson(texts, embeddings)
|
|
265
|
+
texts.zip(embeddings).each do |text, emb|
|
|
266
|
+
puts JSON.generate({ text: text, indices: emb.indices, values: emb.values })
|
|
267
|
+
end
|
|
268
|
+
end
|
|
269
|
+
|
|
270
|
+
def gather_texts
|
|
271
|
+
# Priority: -i file > arguments > stdin
|
|
272
|
+
if @options[:input_file]
|
|
273
|
+
read_texts_from_file(@options[:input_file])
|
|
274
|
+
elsif @argv.any?
|
|
275
|
+
@argv
|
|
276
|
+
elsif !$stdin.tty?
|
|
277
|
+
$stdin.read.split("\n").reject(&:empty?)
|
|
278
|
+
else
|
|
279
|
+
[]
|
|
280
|
+
end
|
|
281
|
+
end
|
|
282
|
+
|
|
283
|
+
def read_texts_from_file(file_path)
|
|
284
|
+
unless File.exist?(file_path)
|
|
285
|
+
warn "Error: File not found: #{file_path}"
|
|
286
|
+
exit 1
|
|
287
|
+
end
|
|
288
|
+
|
|
289
|
+
File.readlines(file_path, chomp: true).reject(&:empty?)
|
|
290
|
+
end
|
|
291
|
+
|
|
292
|
+
def embed_with_progress(embedding, texts)
|
|
293
|
+
total = texts.length
|
|
294
|
+
batch_size = @options[:batch_size]
|
|
295
|
+
total_batches = (total.to_f / batch_size).ceil
|
|
296
|
+
embeddings = embedding.embed(texts, batch_size: batch_size) do |progress|
|
|
297
|
+
percent = (progress.current.to_f / progress.total * 100).round
|
|
298
|
+
bar_width = 30
|
|
299
|
+
filled = (bar_width * progress.current / progress.total).round
|
|
300
|
+
bar = ('=' * filled) + ('-' * (bar_width - filled))
|
|
301
|
+
$stderr.print "\r[#{bar}] #{percent}% (#{progress.current}/#{total_batches} batches)"
|
|
302
|
+
end.map { |emb| emb }
|
|
303
|
+
|
|
304
|
+
$stderr.puts
|
|
305
|
+
embeddings
|
|
306
|
+
end
|
|
307
|
+
|
|
308
|
+
def output_embeddings(texts, embeddings)
|
|
309
|
+
case @options[:format]
|
|
310
|
+
when 'json'
|
|
311
|
+
output_json(texts, embeddings)
|
|
312
|
+
when 'ndjson'
|
|
313
|
+
output_ndjson(texts, embeddings)
|
|
314
|
+
when 'csv'
|
|
315
|
+
output_csv(embeddings)
|
|
316
|
+
end
|
|
317
|
+
end
|
|
318
|
+
|
|
319
|
+
def output_json(texts, embeddings)
|
|
320
|
+
result = texts.zip(embeddings).map do |text, embedding|
|
|
321
|
+
{ text: text, embedding: embedding }
|
|
322
|
+
end
|
|
323
|
+
puts JSON.pretty_generate(result)
|
|
324
|
+
end
|
|
325
|
+
|
|
326
|
+
def output_ndjson(texts, embeddings)
|
|
327
|
+
texts.zip(embeddings).each do |text, embedding|
|
|
328
|
+
puts JSON.generate({ text: text, embedding: embedding })
|
|
329
|
+
end
|
|
330
|
+
end
|
|
331
|
+
|
|
332
|
+
def output_csv(embeddings)
|
|
333
|
+
embeddings.each do |embedding|
|
|
334
|
+
puts embedding.join(',')
|
|
335
|
+
end
|
|
336
|
+
end
|
|
337
|
+
|
|
338
|
+
def rerank
|
|
339
|
+
parse_rerank_options
|
|
340
|
+
|
|
341
|
+
if @options[:query].nil? || @options[:query].empty?
|
|
342
|
+
warn 'Error: Query is required. Use -q or --query to specify.'
|
|
343
|
+
exit 1
|
|
344
|
+
end
|
|
345
|
+
|
|
346
|
+
documents = gather_texts
|
|
347
|
+
if documents.empty?
|
|
348
|
+
warn 'Error: No documents provided. Pass documents as arguments or pipe through stdin.'
|
|
349
|
+
exit 1
|
|
350
|
+
end
|
|
351
|
+
|
|
352
|
+
reranker = Fastembed::TextCrossEncoder.new(model_name: @options[:reranker_model])
|
|
353
|
+
results = reranker.rerank_with_scores(
|
|
354
|
+
query: @options[:query],
|
|
355
|
+
documents: documents,
|
|
356
|
+
top_k: @options[:top_k],
|
|
357
|
+
batch_size: @options[:batch_size]
|
|
358
|
+
)
|
|
359
|
+
|
|
360
|
+
output_rerank_results(results)
|
|
361
|
+
end
|
|
362
|
+
|
|
363
|
+
def parse_rerank_options
|
|
364
|
+
OptionParser.new do |opts|
|
|
365
|
+
opts.banner = 'Usage: fastembed rerank -q QUERY [options] [documents ...]'
|
|
366
|
+
|
|
367
|
+
opts.on('-q', '--query QUERY', 'Query to rank documents against (required)') do |q|
|
|
368
|
+
@options[:query] = q
|
|
369
|
+
end
|
|
370
|
+
|
|
371
|
+
opts.on('-m', '--model MODEL', 'Reranker model (default: cross-encoder/ms-marco-MiniLM-L-6-v2)') do |m|
|
|
372
|
+
@options[:reranker_model] = m
|
|
373
|
+
end
|
|
374
|
+
|
|
375
|
+
opts.on('-k', '--top-k K', Integer, 'Return only top K results') do |k|
|
|
376
|
+
@options[:top_k] = k
|
|
377
|
+
end
|
|
378
|
+
|
|
379
|
+
opts.on('-f', '--format FORMAT', %w[json ndjson], 'Output format: json, ndjson (default: json)') do |f|
|
|
380
|
+
@options[:format] = f
|
|
381
|
+
end
|
|
382
|
+
|
|
383
|
+
opts.on('-b', '--batch-size SIZE', Integer, 'Batch size (default: 256)') do |b|
|
|
384
|
+
@options[:batch_size] = b
|
|
385
|
+
end
|
|
386
|
+
|
|
387
|
+
opts.on('-h', '--help', 'Show help') do
|
|
388
|
+
puts opts
|
|
389
|
+
exit 0
|
|
390
|
+
end
|
|
391
|
+
end.parse!(@argv)
|
|
392
|
+
end
|
|
393
|
+
|
|
394
|
+
def output_rerank_results(results)
|
|
395
|
+
case @options[:format]
|
|
396
|
+
when 'json'
|
|
397
|
+
puts JSON.pretty_generate(results)
|
|
398
|
+
when 'ndjson'
|
|
399
|
+
results.each { |r| puts JSON.generate(r) }
|
|
400
|
+
end
|
|
401
|
+
end
|
|
402
|
+
|
|
403
|
+
def cache_command
|
|
404
|
+
subcommand = @argv.shift
|
|
405
|
+
|
|
406
|
+
case subcommand
|
|
407
|
+
when 'clear'
|
|
408
|
+
cache_clear
|
|
409
|
+
when 'info'
|
|
410
|
+
cache_info
|
|
411
|
+
when 'help', nil, '--help', '-h'
|
|
412
|
+
puts cache_help
|
|
413
|
+
else
|
|
414
|
+
warn "Unknown cache subcommand: #{subcommand}"
|
|
415
|
+
warn cache_help
|
|
416
|
+
exit 1
|
|
417
|
+
end
|
|
418
|
+
end
|
|
419
|
+
|
|
420
|
+
def cache_help
|
|
421
|
+
<<~HELP
|
|
422
|
+
Usage: fastembed cache <subcommand>
|
|
423
|
+
|
|
424
|
+
Subcommands:
|
|
425
|
+
clear Remove all cached models
|
|
426
|
+
info Show cache directory and size
|
|
427
|
+
|
|
428
|
+
Examples:
|
|
429
|
+
fastembed cache info
|
|
430
|
+
fastembed cache clear
|
|
431
|
+
HELP
|
|
432
|
+
end
|
|
433
|
+
|
|
434
|
+
def cache_clear
|
|
435
|
+
cache_path = ModelManagement.cache_dir
|
|
436
|
+
models_path = File.join(cache_path, 'models')
|
|
437
|
+
|
|
438
|
+
unless Dir.exist?(models_path)
|
|
439
|
+
puts 'Cache is empty.'
|
|
440
|
+
return
|
|
441
|
+
end
|
|
442
|
+
|
|
443
|
+
# Count models before clearing
|
|
444
|
+
model_count = Dir.glob(File.join(models_path, '*')).count { |f| File.directory?(f) }
|
|
445
|
+
|
|
446
|
+
if model_count.zero?
|
|
447
|
+
puts 'Cache is empty.'
|
|
448
|
+
return
|
|
449
|
+
end
|
|
450
|
+
|
|
451
|
+
print "Remove #{model_count} cached model(s)? [y/N] "
|
|
452
|
+
response = $stdin.gets&.strip&.downcase
|
|
453
|
+
|
|
454
|
+
if response == 'y'
|
|
455
|
+
FileUtils.rm_rf(models_path)
|
|
456
|
+
puts 'Cache cleared.'
|
|
457
|
+
else
|
|
458
|
+
puts 'Aborted.'
|
|
459
|
+
end
|
|
460
|
+
end
|
|
461
|
+
|
|
462
|
+
def cache_info
|
|
463
|
+
cache_path = ModelManagement.cache_dir
|
|
464
|
+
models_path = File.join(cache_path, 'models')
|
|
465
|
+
|
|
466
|
+
puts "Cache directory: #{cache_path}"
|
|
467
|
+
puts
|
|
468
|
+
|
|
469
|
+
unless Dir.exist?(models_path)
|
|
470
|
+
puts 'No models cached.'
|
|
471
|
+
return
|
|
472
|
+
end
|
|
473
|
+
|
|
474
|
+
models = Dir.glob(File.join(models_path, '*')).select { |f| File.directory?(f) }
|
|
475
|
+
|
|
476
|
+
if models.empty?
|
|
477
|
+
puts 'No models cached.'
|
|
478
|
+
return
|
|
479
|
+
end
|
|
480
|
+
|
|
481
|
+
total_size = 0
|
|
482
|
+
puts 'Cached models:'
|
|
483
|
+
models.each do |model_dir|
|
|
484
|
+
name = File.basename(model_dir).gsub('--', '/')
|
|
485
|
+
size = directory_size(model_dir)
|
|
486
|
+
total_size += size
|
|
487
|
+
puts " #{name} (#{format_size(size)})"
|
|
488
|
+
end
|
|
489
|
+
|
|
490
|
+
puts
|
|
491
|
+
puts "Total: #{models.count} model(s), #{format_size(total_size)}"
|
|
492
|
+
end
|
|
493
|
+
|
|
494
|
+
def directory_size(path)
|
|
495
|
+
Dir.glob(File.join(path, '**', '*'))
|
|
496
|
+
.select { |f| File.file?(f) }
|
|
497
|
+
.sum { |f| File.size(f) }
|
|
498
|
+
end
|
|
499
|
+
|
|
500
|
+
def format_size(bytes)
|
|
501
|
+
if bytes >= 1_073_741_824
|
|
502
|
+
format('%.2f GB', bytes / 1_073_741_824.0)
|
|
503
|
+
elsif bytes >= 1_048_576
|
|
504
|
+
format('%.2f MB', bytes / 1_048_576.0)
|
|
505
|
+
elsif bytes >= 1024
|
|
506
|
+
format('%.2f KB', bytes / 1024.0)
|
|
507
|
+
else
|
|
508
|
+
"#{bytes} B"
|
|
509
|
+
end
|
|
510
|
+
end
|
|
511
|
+
|
|
512
|
+
# Download command - pre-download models for offline use
|
|
513
|
+
def download_command
|
|
514
|
+
parse_download_options
|
|
515
|
+
|
|
516
|
+
model_name = @argv.shift
|
|
517
|
+
if model_name.nil?
|
|
518
|
+
warn 'Error: Model name required.'
|
|
519
|
+
warn 'Usage: fastembed download <model-name>'
|
|
520
|
+
warn "\nExamples:"
|
|
521
|
+
warn ' fastembed download BAAI/bge-small-en-v1.5'
|
|
522
|
+
warn ' fastembed download --type reranker cross-encoder/ms-marco-MiniLM-L-6-v2'
|
|
523
|
+
exit 1
|
|
524
|
+
end
|
|
525
|
+
|
|
526
|
+
model_type = @options[:download_type] || :embedding
|
|
527
|
+
|
|
528
|
+
begin
|
|
529
|
+
model_info = resolve_model_for_download(model_name, model_type)
|
|
530
|
+
puts "Downloading #{model_name}..."
|
|
531
|
+
ModelManagement.retrieve_model(model_name, model_info: model_info, show_progress: true)
|
|
532
|
+
puts 'Download complete!'
|
|
533
|
+
rescue ArgumentError => e
|
|
534
|
+
warn "Error: #{e.message}"
|
|
535
|
+
exit 1
|
|
536
|
+
rescue DownloadError => e
|
|
537
|
+
warn "Download failed: #{e.message}"
|
|
538
|
+
exit 1
|
|
539
|
+
end
|
|
540
|
+
end
|
|
541
|
+
|
|
542
|
+
def parse_download_options
|
|
543
|
+
OptionParser.new do |opts|
|
|
544
|
+
opts.banner = 'Usage: fastembed download [options] <model-name>'
|
|
545
|
+
|
|
546
|
+
opts.on('-t', '--type TYPE', %w[embedding reranker sparse late-interaction image],
|
|
547
|
+
'Model type (embedding, reranker, sparse, late-interaction, image)') do |t|
|
|
548
|
+
@options[:download_type] = t.tr('-', '_').to_sym
|
|
549
|
+
end
|
|
550
|
+
|
|
551
|
+
opts.on('-h', '--help', 'Show help') do
|
|
552
|
+
puts opts
|
|
553
|
+
puts "\nExamples:"
|
|
554
|
+
puts ' fastembed download BAAI/bge-small-en-v1.5'
|
|
555
|
+
puts ' fastembed download --type reranker cross-encoder/ms-marco-MiniLM-L-6-v2'
|
|
556
|
+
exit 0
|
|
557
|
+
end
|
|
558
|
+
end.parse!(@argv)
|
|
559
|
+
end
|
|
560
|
+
|
|
561
|
+
def resolve_model_for_download(model_name, type)
|
|
562
|
+
registry = case type
|
|
563
|
+
when :embedding
|
|
564
|
+
SUPPORTED_MODELS.merge(CustomModelRegistry.embedding_models)
|
|
565
|
+
when :reranker
|
|
566
|
+
SUPPORTED_RERANKER_MODELS.merge(CustomModelRegistry.reranker_models)
|
|
567
|
+
when :sparse
|
|
568
|
+
SUPPORTED_SPARSE_MODELS.merge(CustomModelRegistry.sparse_models)
|
|
569
|
+
when :late_interaction
|
|
570
|
+
SUPPORTED_LATE_INTERACTION_MODELS.merge(CustomModelRegistry.late_interaction_models)
|
|
571
|
+
when :image
|
|
572
|
+
SUPPORTED_IMAGE_MODELS
|
|
573
|
+
else
|
|
574
|
+
SUPPORTED_MODELS.merge(CustomModelRegistry.embedding_models)
|
|
575
|
+
end
|
|
576
|
+
|
|
577
|
+
model_info = registry[model_name]
|
|
578
|
+
raise ArgumentError, "Unknown #{type} model: #{model_name}" unless model_info
|
|
579
|
+
|
|
580
|
+
model_info
|
|
581
|
+
end
|
|
582
|
+
|
|
583
|
+
# Info command - show detailed model information
|
|
584
|
+
def info_command
|
|
585
|
+
parse_info_options
|
|
586
|
+
|
|
587
|
+
model_name = @argv.shift
|
|
588
|
+
if model_name.nil?
|
|
589
|
+
warn 'Error: Model name required.'
|
|
590
|
+
warn 'Usage: fastembed info <model-name>'
|
|
591
|
+
exit 1
|
|
592
|
+
end
|
|
593
|
+
|
|
594
|
+
model_info = find_model_info(model_name)
|
|
595
|
+
if model_info.nil?
|
|
596
|
+
warn "Unknown model: #{model_name}"
|
|
597
|
+
exit 1
|
|
598
|
+
end
|
|
599
|
+
|
|
600
|
+
display_model_info(model_name, model_info)
|
|
601
|
+
end
|
|
602
|
+
|
|
603
|
+
def parse_info_options
|
|
604
|
+
OptionParser.new do |opts|
|
|
605
|
+
opts.banner = 'Usage: fastembed info <model-name>'
|
|
606
|
+
|
|
607
|
+
opts.on('-h', '--help', 'Show help') do
|
|
608
|
+
puts opts
|
|
609
|
+
exit 0
|
|
610
|
+
end
|
|
611
|
+
end.parse!(@argv)
|
|
612
|
+
end
|
|
613
|
+
|
|
614
|
+
def find_model_info(model_name)
|
|
615
|
+
# Search all registries
|
|
616
|
+
SUPPORTED_MODELS[model_name] ||
|
|
617
|
+
CustomModelRegistry.embedding_models[model_name] ||
|
|
618
|
+
SUPPORTED_RERANKER_MODELS[model_name] ||
|
|
619
|
+
CustomModelRegistry.reranker_models[model_name] ||
|
|
620
|
+
SUPPORTED_SPARSE_MODELS[model_name] ||
|
|
621
|
+
CustomModelRegistry.sparse_models[model_name] ||
|
|
622
|
+
SUPPORTED_LATE_INTERACTION_MODELS[model_name] ||
|
|
623
|
+
CustomModelRegistry.late_interaction_models[model_name] ||
|
|
624
|
+
SUPPORTED_IMAGE_MODELS[model_name]
|
|
625
|
+
end
|
|
626
|
+
|
|
627
|
+
def display_model_info(model_name, info)
|
|
628
|
+
puts "Model: #{model_name}"
|
|
629
|
+
puts " Description: #{info.description}"
|
|
630
|
+
puts " Size: #{info.size_in_gb} GB"
|
|
631
|
+
puts " Max Length: #{info.max_length} tokens"
|
|
632
|
+
puts " Model File: #{info.model_file}"
|
|
633
|
+
puts " Tokenizer: #{info.tokenizer_file}"
|
|
634
|
+
|
|
635
|
+
# Type-specific info
|
|
636
|
+
puts " Dimensions: #{info.dim}" if info.respond_to?(:dim) && info.dim
|
|
637
|
+
puts " Pooling: #{info.pooling}" if info.respond_to?(:pooling)
|
|
638
|
+
puts " Normalize: #{info.normalize}" if info.respond_to?(:normalize)
|
|
639
|
+
puts " Image Size: #{info.image_size}x#{info.image_size}" if info.respond_to?(:image_size)
|
|
640
|
+
|
|
641
|
+
# Source info
|
|
642
|
+
puts " HuggingFace: https://huggingface.co/#{info.sources[:hf]}" if info.sources[:hf]
|
|
643
|
+
|
|
644
|
+
# Cache status
|
|
645
|
+
cache_path = ModelManagement.model_directory(info)
|
|
646
|
+
if ModelManagement.model_cached?(cache_path, info)
|
|
647
|
+
size = directory_size(cache_path)
|
|
648
|
+
puts " Cached: Yes (#{format_size(size)})"
|
|
649
|
+
else
|
|
650
|
+
puts ' Cached: No'
|
|
651
|
+
end
|
|
652
|
+
end
|
|
653
|
+
|
|
654
|
+
# Benchmark command - run performance benchmarks
|
|
655
|
+
def benchmark_command
|
|
656
|
+
parse_benchmark_options
|
|
657
|
+
|
|
658
|
+
model_name = @options[:model]
|
|
659
|
+
iterations = @options[:iterations]
|
|
660
|
+
batch_size = @options[:batch_size]
|
|
661
|
+
|
|
662
|
+
puts "Benchmarking #{model_name}..."
|
|
663
|
+
puts " Iterations: #{iterations}"
|
|
664
|
+
puts " Batch size: #{batch_size}"
|
|
665
|
+
puts
|
|
666
|
+
|
|
667
|
+
# Sample texts for benchmarking
|
|
668
|
+
sample_texts = [
|
|
669
|
+
'The quick brown fox jumps over the lazy dog.',
|
|
670
|
+
'Machine learning is transforming how we build software.',
|
|
671
|
+
'Ruby is a dynamic, open source programming language.',
|
|
672
|
+
'Embeddings convert text into numerical vectors.'
|
|
673
|
+
] * ((batch_size / 4) + 1)
|
|
674
|
+
sample_texts = sample_texts.first(batch_size)
|
|
675
|
+
|
|
676
|
+
begin
|
|
677
|
+
# Load model (measure load time)
|
|
678
|
+
puts 'Loading model...'
|
|
679
|
+
load_start = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
680
|
+
embedding = Fastembed::TextEmbedding.new(model_name: model_name)
|
|
681
|
+
load_time = Process.clock_gettime(Process::CLOCK_MONOTONIC) - load_start
|
|
682
|
+
puts " Load time: #{format('%.2f', load_time)}s"
|
|
683
|
+
puts
|
|
684
|
+
|
|
685
|
+
# Warmup
|
|
686
|
+
puts 'Warming up...'
|
|
687
|
+
embedding.embed(sample_texts.first(4)).to_a
|
|
688
|
+
|
|
689
|
+
# Benchmark
|
|
690
|
+
puts "Running #{iterations} iterations..."
|
|
691
|
+
times = []
|
|
692
|
+
iterations.times do |i|
|
|
693
|
+
start = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
694
|
+
embedding.embed(sample_texts, batch_size: batch_size).to_a
|
|
695
|
+
elapsed = Process.clock_gettime(Process::CLOCK_MONOTONIC) - start
|
|
696
|
+
times << elapsed
|
|
697
|
+
print "\r Progress: #{i + 1}/#{iterations}"
|
|
698
|
+
end
|
|
699
|
+
puts
|
|
700
|
+
|
|
701
|
+
# Results
|
|
702
|
+
puts
|
|
703
|
+
puts 'Results:'
|
|
704
|
+
avg_time = times.sum / times.length
|
|
705
|
+
min_time = times.min
|
|
706
|
+
max_time = times.max
|
|
707
|
+
throughput = batch_size / avg_time
|
|
708
|
+
|
|
709
|
+
puts " Avg time: #{format('%.3f', avg_time)}s"
|
|
710
|
+
puts " Min time: #{format('%.3f', min_time)}s"
|
|
711
|
+
puts " Max time: #{format('%.3f', max_time)}s"
|
|
712
|
+
puts " Throughput: #{format('%.1f', throughput)} texts/sec"
|
|
713
|
+
puts " Dimensions: #{embedding.dim}"
|
|
714
|
+
rescue StandardError => e
|
|
715
|
+
warn "Benchmark failed: #{e.message}"
|
|
716
|
+
exit 1
|
|
717
|
+
end
|
|
718
|
+
end
|
|
719
|
+
|
|
720
|
+
def parse_benchmark_options
|
|
721
|
+
@options[:iterations] = 10
|
|
722
|
+
|
|
723
|
+
OptionParser.new do |opts|
|
|
724
|
+
opts.banner = 'Usage: fastembed benchmark [options]'
|
|
725
|
+
|
|
726
|
+
opts.on('-m', '--model MODEL', 'Model to benchmark (default: BAAI/bge-small-en-v1.5)') do |m|
|
|
727
|
+
@options[:model] = m
|
|
728
|
+
end
|
|
729
|
+
|
|
730
|
+
opts.on('-n', '--iterations N', Integer, 'Number of iterations (default: 10)') do |n|
|
|
731
|
+
@options[:iterations] = n
|
|
732
|
+
end
|
|
733
|
+
|
|
734
|
+
opts.on('-b', '--batch-size SIZE', Integer, 'Batch size (default: 256)') do |b|
|
|
735
|
+
@options[:batch_size] = b
|
|
736
|
+
end
|
|
737
|
+
|
|
738
|
+
opts.on('-h', '--help', 'Show help') do
|
|
739
|
+
puts opts
|
|
740
|
+
exit 0
|
|
741
|
+
end
|
|
742
|
+
end.parse!(@argv)
|
|
743
|
+
end
|
|
744
|
+
end
|
|
745
|
+
end
|