fastembed 1.0.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,167 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ # Unified benchmark comparing Ruby fastembed with Python fastembed
5
+ # Runs both implementations and reports side-by-side results
6
+
7
+ require 'bundler/setup'
8
+ require 'fastembed'
9
+ require 'benchmark'
10
+ require 'json'
11
+ require 'open3'
12
+
13
+ TEXTS = [
14
+ 'Machine learning is a subset of artificial intelligence that enables systems to learn from data.',
15
+ 'Ruby on Rails is a server-side web application framework written in Ruby under the MIT License.',
16
+ 'Vector databases store embeddings and enable fast similarity search across millions of documents.',
17
+ 'Natural language processing helps computers understand, interpret, and generate human language.',
18
+ 'The quick brown fox jumps over the lazy dog. This is a classic pangram used in typing tests.'
19
+ ].freeze
20
+
21
+ def run_ruby_benchmark
22
+ puts 'Running Ruby benchmark...'
23
+ results = {}
24
+
25
+ # Model loading
26
+ start = Process.clock_gettime(Process::CLOCK_MONOTONIC)
27
+ embedding = Fastembed::TextEmbedding.new(model_name: 'BAAI/bge-small-en-v1.5', show_progress: false)
28
+ results[:load_time] = ((Process.clock_gettime(Process::CLOCK_MONOTONIC) - start) * 1000).round(1)
29
+
30
+ # Warmup
31
+ embedding.embed(['warmup']).to_a
32
+
33
+ # Single document latency
34
+ times = []
35
+ 10.times do
36
+ start = Process.clock_gettime(Process::CLOCK_MONOTONIC)
37
+ embedding.embed([TEXTS.first]).to_a
38
+ times << (Process.clock_gettime(Process::CLOCK_MONOTONIC) - start) * 1000
39
+ end
40
+ results[:single_latency] = times.min.round(2)
41
+
42
+ # Throughput tests
43
+ [100, 500, 1000].each do |count|
44
+ texts = TEXTS.cycle.take(count)
45
+ start = Process.clock_gettime(Process::CLOCK_MONOTONIC)
46
+ embedding.embed(texts, batch_size: 64).to_a
47
+ elapsed = Process.clock_gettime(Process::CLOCK_MONOTONIC) - start
48
+ results[:"throughput_#{count}"] = (count / elapsed).round(1)
49
+ end
50
+
51
+ results
52
+ end
53
+
54
+ def run_python_benchmark
55
+ puts 'Running Python benchmark...'
56
+
57
+ python_script = <<~PYTHON
58
+ import json
59
+ import time
60
+ from fastembed import TextEmbedding
61
+
62
+ TEXTS = [
63
+ "Machine learning is a subset of artificial intelligence that enables systems to learn from data.",
64
+ "Ruby on Rails is a server-side web application framework written in Ruby under the MIT License.",
65
+ "Vector databases store embeddings and enable fast similarity search across millions of documents.",
66
+ "Natural language processing helps computers understand, interpret, and generate human language.",
67
+ "The quick brown fox jumps over the lazy dog. This is a classic pangram used in typing tests."
68
+ ]
69
+
70
+ results = {}
71
+
72
+ # Model loading
73
+ start = time.time()
74
+ embedding = TextEmbedding(model_name="BAAI/bge-small-en-v1.5")
75
+ results["load_time"] = round((time.time() - start) * 1000, 1)
76
+
77
+ # Warmup
78
+ list(embedding.embed(["warmup"]))
79
+
80
+ # Single document latency
81
+ times = []
82
+ for _ in range(10):
83
+ start = time.time()
84
+ list(embedding.embed([TEXTS[0]]))
85
+ times.append((time.time() - start) * 1000)
86
+ results["single_latency"] = round(min(times), 2)
87
+
88
+ # Throughput tests
89
+ for count in [100, 500, 1000]:
90
+ texts = (TEXTS * (count // len(TEXTS) + 1))[:count]
91
+ start = time.time()
92
+ list(embedding.embed(texts, batch_size=64))
93
+ elapsed = time.time() - start
94
+ results[f"throughput_{count}"] = round(count / elapsed, 1)
95
+
96
+ print(json.dumps(results))
97
+ PYTHON
98
+
99
+ stdout, status = Open3.capture2('python3', '-c', python_script)
100
+
101
+ if status.success?
102
+ JSON.parse(stdout)
103
+ else
104
+ puts 'Warning: Python benchmark failed. Is fastembed installed? (pip install fastembed)'
105
+ nil
106
+ end
107
+ rescue Errno::ENOENT
108
+ puts 'Warning: Python not found'
109
+ nil
110
+ end
111
+
112
+ def print_comparison(ruby_results, python_results)
113
+ puts
114
+ puts '=' * 70
115
+ puts 'RUBY vs PYTHON FASTEMBED COMPARISON'
116
+ puts '=' * 70
117
+ puts
118
+
119
+ metrics = [
120
+ [:load_time, 'Model load time', 'ms', :lower_better],
121
+ [:single_latency, 'Single doc latency', 'ms', :lower_better],
122
+ [:throughput_100, '100 docs throughput', 'docs/sec', :higher_better],
123
+ [:throughput_500, '500 docs throughput', 'docs/sec', :higher_better],
124
+ [:throughput_1000, '1000 docs throughput', 'docs/sec', :higher_better]
125
+ ]
126
+
127
+ puts format('%-25s %15s %15s %10s', 'Metric', 'Ruby', 'Python', 'Winner')
128
+ puts '-' * 70
129
+
130
+ metrics.each do |key, label, unit, direction|
131
+ ruby_val = ruby_results[key]
132
+ python_val = python_results&.fetch(key.to_s, nil)
133
+
134
+ if python_val
135
+ if direction == :lower_better
136
+ winner = ruby_val < python_val ? 'Ruby' : 'Python'
137
+ ratio = python_val / ruby_val
138
+ else
139
+ winner = ruby_val > python_val ? 'Ruby' : 'Python'
140
+ ratio = ruby_val / python_val
141
+ end
142
+ ratio_str = winner == 'Ruby' ? "(#{ratio.round(1)}x)" : ''
143
+ winner_str = "#{winner} #{ratio_str}"
144
+ else
145
+ winner_str = 'N/A'
146
+ end
147
+
148
+ ruby_str = "#{ruby_val} #{unit}"
149
+ python_str = python_val ? "#{python_val} #{unit}" : 'N/A'
150
+
151
+ puts format('%-25s %15s %15s %10s', label, ruby_str, python_str, winner_str)
152
+ end
153
+
154
+ puts
155
+ end
156
+
157
+ # Run benchmarks
158
+ ruby_results = run_ruby_benchmark
159
+ python_results = run_python_benchmark
160
+
161
+ print_comparison(ruby_results, python_results)
162
+
163
+ puts 'Summary:'
164
+ puts '- Both use the same ONNX Runtime and HuggingFace Tokenizers'
165
+ puts '- Performance differences come from language overhead and batching'
166
+ puts '- Ruby tends to win on latency, Python on large batch throughput'
167
+ puts
@@ -0,0 +1,60 @@
1
+ #!/usr/bin/env python3
2
+ """Compare Python FastEmbed performance with Ruby FastEmbed."""
3
+
4
+ import time
5
+ from fastembed import TextEmbedding
6
+
7
+ TEXTS = [
8
+ "Machine learning is a subset of artificial intelligence that enables systems to learn from data.",
9
+ "Ruby on Rails is a server-side web application framework written in Ruby under the MIT License.",
10
+ "Vector databases store embeddings and enable fast similarity search across millions of documents.",
11
+ "Natural language processing helps computers understand, interpret, and generate human language.",
12
+ "The quick brown fox jumps over the lazy dog. This is a classic pangram used in typing tests."
13
+ ]
14
+
15
+ def benchmark_python():
16
+ print("=" * 60)
17
+ print("PYTHON FASTEMBED BENCHMARK")
18
+ print("=" * 60)
19
+ print()
20
+
21
+ # Model loading time
22
+ start = time.time()
23
+ embedding = TextEmbedding(model_name="BAAI/bge-small-en-v1.5")
24
+ load_time = time.time() - start
25
+ print(f"Model load time: {load_time * 1000:.1f}ms")
26
+
27
+ # Warmup
28
+ list(embedding.embed(["warmup"]))
29
+
30
+ # Single document latency
31
+ print()
32
+ print("Single document latency:")
33
+ for i, text in enumerate(TEXTS[:3]):
34
+ times = []
35
+ for _ in range(10):
36
+ start = time.time()
37
+ list(embedding.embed([text]))
38
+ times.append(time.time() - start)
39
+ avg = sum(times) / len(times)
40
+ min_time = min(times)
41
+ print(f" Text {i+1} ({len(text)} chars): avg {avg*1000:.2f}ms, min {min_time*1000:.2f}ms")
42
+
43
+ # Throughput
44
+ print()
45
+ print("Throughput:")
46
+ for count in [100, 500, 1000]:
47
+ texts = (TEXTS * (count // len(TEXTS) + 1))[:count]
48
+
49
+ start = time.time()
50
+ list(embedding.embed(texts, batch_size=64))
51
+ elapsed = time.time() - start
52
+
53
+ rate = count / elapsed
54
+ print(f" {count} docs: {rate:.1f} docs/sec ({elapsed*1000:.1f}ms)")
55
+
56
+ print()
57
+ print("=" * 60)
58
+
59
+ if __name__ == "__main__":
60
+ benchmark_python()
@@ -0,0 +1,70 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ require 'bundler/setup'
5
+ require 'fastembed'
6
+
7
+ def memory_mb
8
+ `ps -o rss= -p #{Process.pid}`.to_i / 1024.0
9
+ end
10
+
11
+ def print_memory(label)
12
+ puts "#{label}: #{memory_mb.round(1)} MB"
13
+ end
14
+
15
+ puts '=' * 60
16
+ puts 'MEMORY PROFILING'
17
+ puts '=' * 60
18
+ puts
19
+
20
+ print_memory('Initial')
21
+
22
+ # Load model
23
+ embedding = Fastembed::TextEmbedding.new
24
+ print_memory('After model load')
25
+
26
+ # Generate sample texts
27
+ texts = Array.new(1000) { |i| "This is document number #{i} with some content for embedding." }
28
+
29
+ # Process in batches
30
+ GC.start
31
+ print_memory('Before embedding 1000 docs')
32
+
33
+ vectors = embedding.embed(texts, batch_size: 64).to_a
34
+ print_memory('After embedding 1000 docs (holding results)')
35
+
36
+ # Clear vectors
37
+ vectors = nil
38
+ GC.start
39
+ sleep 0.1
40
+ print_memory('After clearing vectors + GC')
41
+
42
+ # Test lazy evaluation memory efficiency
43
+ puts
44
+ puts 'Testing lazy evaluation memory efficiency...'
45
+ print_memory('Before lazy processing')
46
+
47
+ count = 0
48
+ embedding.embed(texts, batch_size: 64).each do |_vec|
49
+ count += 1
50
+ # Don't store vectors, just count them
51
+ end
52
+ puts "Processed #{count} vectors without storing"
53
+
54
+ GC.start
55
+ sleep 0.1
56
+ print_memory('After lazy processing + GC')
57
+
58
+ # Stress test - multiple rounds
59
+ puts
60
+ puts 'Stress test - 5 rounds of 1000 docs each...'
61
+ 5.times do |round|
62
+ embedding.embed(texts, batch_size: 64).to_a
63
+ GC.start
64
+ print_memory("After round #{round + 1}")
65
+ end
66
+
67
+ puts
68
+ puts '=' * 60
69
+ puts 'MEMORY PROFILE COMPLETE'
70
+ puts '=' * 60
@@ -0,0 +1,198 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ require 'bundler/setup'
5
+ require 'fastembed'
6
+ require 'benchmark'
7
+
8
+ # Sample texts of varying lengths
9
+ SHORT_TEXTS = [
10
+ 'Hello world',
11
+ 'Ruby is great',
12
+ 'Machine learning',
13
+ 'Vector embeddings',
14
+ 'Semantic search'
15
+ ].freeze
16
+
17
+ MEDIUM_TEXTS = [
18
+ 'The quick brown fox jumps over the lazy dog. This is a classic pangram used in typing tests.',
19
+ 'Machine learning is a subset of artificial intelligence that enables systems to learn from data.',
20
+ 'Ruby on Rails is a server-side web application framework written in Ruby under the MIT License.',
21
+ 'Vector databases store embeddings and enable fast similarity search across millions of documents.',
22
+ 'Natural language processing helps computers understand, interpret, and generate human language.'
23
+ ].freeze
24
+
25
+ LONG_TEXTS = [
26
+ 'Artificial intelligence (AI) is intelligence demonstrated by machines, as opposed to natural intelligence displayed by animals including humans. AI research has been defined as the field of study of intelligent agents, which refers to any system that perceives its environment and takes actions that maximize its chance of achieving its goals. The term artificial intelligence had previously been used to describe machines that mimic and display human cognitive skills that are associated with the human mind, such as learning and problem-solving.',
27
+ 'Ruby is an interpreted, high-level, general-purpose programming language which supports multiple programming paradigms. It was designed with an emphasis on programming productivity and simplicity. In Ruby, everything is an object, including primitive data types. It was developed in the mid-1990s by Yukihiro Matsumoto in Japan. Ruby is dynamically typed and uses garbage collection and just-in-time compilation.',
28
+ 'Text embeddings are dense vector representations of text that capture semantic meaning. They are produced by machine learning models trained on large corpora of text data. These embeddings enable semantic similarity calculations, clustering, and information retrieval tasks. Modern embedding models like BERT, Sentence Transformers, and OpenAI embeddings have revolutionized natural language processing applications.',
29
+ 'Vector databases are specialized database systems designed to store and query high-dimensional vector data efficiently. They use approximate nearest neighbor algorithms like HNSW, IVF, and PQ to enable fast similarity search at scale. Popular vector databases include Pinecone, Weaviate, Qdrant, Milvus, and pgvector. They are essential infrastructure for semantic search, recommendation systems, and RAG applications.',
30
+ 'The Transformer architecture, introduced in the paper "Attention Is All You Need", revolutionized natural language processing. It relies entirely on self-attention mechanisms, dispensing with recurrence and convolutions. This enables much more parallelization and has led to significant improvements in translation quality. Transformers are the foundation of modern language models like GPT, BERT, and T5.'
31
+ ].freeze
32
+
33
+ def print_separator
34
+ puts '-' * 70
35
+ end
36
+
37
+ def format_rate(count, time)
38
+ rate = count / time
39
+ "#{rate.round(1)} docs/sec"
40
+ end
41
+
42
+ def profile_batch(embedding, texts, batch_size, iterations = 3)
43
+ times = []
44
+ iterations.times do
45
+ GC.start
46
+ time = Benchmark.realtime do
47
+ embedding.embed(texts, batch_size: batch_size).to_a
48
+ end
49
+ times << time
50
+ end
51
+ times.min # Return best time
52
+ end
53
+
54
+ puts '=' * 70
55
+ puts 'FASTEMBED-RB PERFORMANCE PROFILE'
56
+ puts '=' * 70
57
+ puts
58
+ puts "Ruby version: #{RUBY_VERSION}"
59
+ puts "Platform: #{RUBY_PLATFORM}"
60
+ puts "Fastembed version: #{Fastembed::VERSION}"
61
+ puts
62
+
63
+ # Model loading benchmark
64
+ print_separator
65
+ puts 'MODEL LOADING TIME'
66
+ print_separator
67
+
68
+ models = [
69
+ 'BAAI/bge-small-en-v1.5',
70
+ 'sentence-transformers/all-MiniLM-L6-v2'
71
+ ]
72
+
73
+ models.each do |model_name|
74
+ # Ensure model is downloaded first
75
+ Fastembed::TextEmbedding.new(model_name: model_name)
76
+ GC.start
77
+
78
+ time = Benchmark.realtime do
79
+ Fastembed::TextEmbedding.new(model_name: model_name)
80
+ end
81
+ puts "#{model_name}: #{(time * 1000).round(1)}ms"
82
+ end
83
+
84
+ puts
85
+
86
+ # Single document latency
87
+ print_separator
88
+ puts 'SINGLE DOCUMENT LATENCY (lower is better)'
89
+ print_separator
90
+
91
+ embedding = Fastembed::TextEmbedding.new
92
+ warmup = embedding.embed(['warmup']).to_a # Warm up
93
+
94
+ [SHORT_TEXTS.first, MEDIUM_TEXTS.first, LONG_TEXTS.first].each_with_index do |text, i|
95
+ label = %w[Short Medium Long][i]
96
+ times = []
97
+ 10.times do
98
+ time = Benchmark.realtime { embedding.embed([text]).to_a }
99
+ times << time
100
+ end
101
+ avg = times.sum / times.length
102
+ min = times.min
103
+ puts "#{label} text (#{text.length} chars): avg #{(avg * 1000).round(2)}ms, min #{(min * 1000).round(2)}ms"
104
+ end
105
+
106
+ puts
107
+
108
+ # Throughput benchmarks
109
+ print_separator
110
+ puts 'THROUGHPUT (higher is better)'
111
+ print_separator
112
+
113
+ [10, 100, 500, 1000].each do |count|
114
+ texts = MEDIUM_TEXTS.cycle.take(count)
115
+
116
+ [32, 64, 128, 256].each do |batch_size|
117
+ next if batch_size > count
118
+
119
+ time = profile_batch(embedding, texts, batch_size)
120
+ rate = format_rate(count, time)
121
+ puts "#{count} docs, batch #{batch_size}: #{rate} (#{(time * 1000).round(1)}ms total)"
122
+ end
123
+ puts
124
+ end
125
+
126
+ # Memory efficiency test
127
+ print_separator
128
+ puts 'LAZY EVALUATION TEST'
129
+ print_separator
130
+
131
+ texts = MEDIUM_TEXTS.cycle.take(1000)
132
+ processed = 0
133
+
134
+ time = Benchmark.realtime do
135
+ embedding.embed(texts, batch_size: 64).each do |_vec|
136
+ processed += 1
137
+ break if processed >= 100 # Only process first 100
138
+ end
139
+ end
140
+
141
+ puts "Processed #{processed}/1000 documents in #{(time * 1000).round(1)}ms"
142
+ puts '(Lazy evaluation means we only computed embeddings for documents we needed)'
143
+
144
+ puts
145
+
146
+ # Embedding quality sanity check
147
+ print_separator
148
+ puts 'EMBEDDING QUALITY SANITY CHECK'
149
+ print_separator
150
+
151
+ test_pairs = [
152
+ ['dog', 'puppy', 'high'],
153
+ ['dog', 'cat', 'medium'],
154
+ ['dog', 'airplane', 'low'],
155
+ ['machine learning', 'artificial intelligence', 'high'],
156
+ ['machine learning', 'cooking recipes', 'low']
157
+ ]
158
+
159
+ def cosine_similarity(a, b)
160
+ a.zip(b).sum { |x, y| x * y }
161
+ end
162
+
163
+ test_pairs.each do |text1, text2, expected|
164
+ vecs = embedding.embed([text1, text2]).to_a
165
+ sim = cosine_similarity(vecs[0], vecs[1])
166
+ status = case expected
167
+ when 'high' then sim > 0.7 ? 'PASS' : 'FAIL'
168
+ when 'medium' then sim > 0.4 && sim < 0.8 ? 'PASS' : 'FAIL'
169
+ when 'low' then sim < 0.5 ? 'PASS' : 'FAIL'
170
+ end
171
+ puts "#{status}: '#{text1}' vs '#{text2}' = #{sim.round(3)} (expected #{expected})"
172
+ end
173
+
174
+ puts
175
+
176
+ # Compare with batch sizes
177
+ print_separator
178
+ puts 'OPTIMAL BATCH SIZE ANALYSIS'
179
+ print_separator
180
+
181
+ texts = MEDIUM_TEXTS.cycle.take(500)
182
+ results = {}
183
+
184
+ [1, 8, 16, 32, 64, 128, 256, 512].each do |batch_size|
185
+ time = profile_batch(embedding, texts, batch_size, 2)
186
+ rate = 500.0 / time
187
+ results[batch_size] = rate
188
+ puts "Batch #{batch_size.to_s.rjust(3)}: #{rate.round(1)} docs/sec"
189
+ end
190
+
191
+ optimal = results.max_by { |_, v| v }
192
+ puts
193
+ puts "Optimal batch size: #{optimal[0]} (#{optimal[1].round(1)} docs/sec)"
194
+
195
+ puts
196
+ puts '=' * 70
197
+ puts 'PROFILE COMPLETE'
198
+ puts '=' * 70
@@ -0,0 +1,158 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ # Benchmark script for TextCrossEncoder (reranker) performance
5
+
6
+ require 'bundler/setup'
7
+ require 'fastembed'
8
+ require 'benchmark'
9
+
10
+ QUERY = 'What is machine learning?'
11
+
12
+ DOCUMENTS = [
13
+ 'Machine learning is a subset of artificial intelligence that enables systems to learn from data.',
14
+ 'Ruby on Rails is a server-side web application framework written in Ruby under the MIT License.',
15
+ 'Deep learning uses neural networks with many layers to model complex patterns in data.',
16
+ 'Vector databases store embeddings and enable fast similarity search across millions of documents.',
17
+ 'Supervised learning requires labeled training data to learn the mapping from inputs to outputs.',
18
+ 'Natural language processing helps computers understand, interpret, and generate human language.',
19
+ 'Random forests are ensemble learning methods that construct multiple decision trees.',
20
+ 'The quick brown fox jumps over the lazy dog. This is a classic pangram used in typing tests.',
21
+ 'Gradient descent is an optimization algorithm used to minimize the loss function in ML models.',
22
+ 'Transformers use self-attention mechanisms to process sequential data in parallel.'
23
+ ].freeze
24
+
25
+ def print_separator
26
+ puts '-' * 70
27
+ end
28
+
29
+ puts '=' * 70
30
+ puts 'RERANKER (CROSS-ENCODER) PERFORMANCE BENCHMARK'
31
+ puts '=' * 70
32
+ puts
33
+ puts "Ruby version: #{RUBY_VERSION}"
34
+ puts "Fastembed version: #{Fastembed::VERSION}"
35
+ puts
36
+
37
+ # Model loading benchmark
38
+ print_separator
39
+ puts 'MODEL LOADING TIME'
40
+ print_separator
41
+
42
+ Fastembed::SUPPORTED_RERANKER_MODELS.each_key do |model_name|
43
+ # Ensure model is downloaded first
44
+ begin
45
+ Fastembed::TextCrossEncoder.new(model_name: model_name)
46
+ rescue StandardError
47
+ puts "#{model_name}: (skipped - not available)"
48
+ next
49
+ end
50
+ GC.start
51
+
52
+ time = Benchmark.realtime do
53
+ Fastembed::TextCrossEncoder.new(model_name: model_name)
54
+ end
55
+ puts "#{model_name}: #{(time * 1000).round(1)}ms"
56
+ end
57
+
58
+ puts
59
+
60
+ # Use default model for latency tests
61
+ reranker = Fastembed::TextCrossEncoder.new
62
+
63
+ # Single query latency
64
+ print_separator
65
+ puts 'SINGLE QUERY LATENCY (reranking against 10 documents)'
66
+ print_separator
67
+
68
+ times = []
69
+ 20.times do
70
+ GC.start
71
+ time = Benchmark.realtime do
72
+ reranker.rerank(query: QUERY, documents: DOCUMENTS)
73
+ end
74
+ times << time
75
+ end
76
+
77
+ avg = times.sum / times.length
78
+ min = times.min
79
+ max = times.max
80
+ puts "Average: #{(avg * 1000).round(2)}ms"
81
+ puts "Min: #{(min * 1000).round(2)}ms"
82
+ puts "Max: #{(max * 1000).round(2)}ms"
83
+ puts
84
+
85
+ # Throughput with varying document counts
86
+ print_separator
87
+ puts 'THROUGHPUT VS DOCUMENT COUNT'
88
+ print_separator
89
+
90
+ [10, 50, 100, 200].each do |doc_count|
91
+ docs = DOCUMENTS.cycle.take(doc_count)
92
+
93
+ times = []
94
+ 3.times do
95
+ GC.start
96
+ time = Benchmark.realtime do
97
+ reranker.rerank(query: QUERY, documents: docs, batch_size: 64)
98
+ end
99
+ times << time
100
+ end
101
+
102
+ min_time = times.min
103
+ rate = doc_count / min_time
104
+ puts "#{doc_count} documents: #{rate.round(1)} docs/sec (#{(min_time * 1000).round(1)}ms)"
105
+ end
106
+
107
+ puts
108
+
109
+ # Batch size optimization
110
+ print_separator
111
+ puts 'BATCH SIZE OPTIMIZATION (100 documents)'
112
+ print_separator
113
+
114
+ docs = DOCUMENTS.cycle.take(100)
115
+ results = {}
116
+
117
+ [8, 16, 32, 64, 128].each do |batch_size|
118
+ times = []
119
+ 3.times do
120
+ GC.start
121
+ time = Benchmark.realtime do
122
+ reranker.rerank(query: QUERY, documents: docs, batch_size: batch_size)
123
+ end
124
+ times << time
125
+ end
126
+
127
+ min_time = times.min
128
+ rate = 100.0 / min_time
129
+ results[batch_size] = rate
130
+ puts "Batch #{batch_size.to_s.rjust(3)}: #{rate.round(1)} docs/sec"
131
+ end
132
+
133
+ optimal = results.max_by { |_, v| v }
134
+ puts
135
+ puts "Optimal batch size: #{optimal[0]} (#{optimal[1].round(1)} docs/sec)"
136
+
137
+ puts
138
+
139
+ # Quality check
140
+ print_separator
141
+ puts 'RERANKING QUALITY CHECK'
142
+ print_separator
143
+
144
+ results = reranker.rerank_with_scores(query: QUERY, documents: DOCUMENTS, top_k: 5)
145
+
146
+ puts "Query: '#{QUERY}'"
147
+ puts
148
+ puts 'Top 5 results:'
149
+ results.each_with_index do |result, i|
150
+ score = result[:score]
151
+ doc = result[:document][0, 60]
152
+ puts "#{i + 1}. (#{score.round(3)}) #{doc}..."
153
+ end
154
+
155
+ puts
156
+ puts '=' * 70
157
+ puts 'BENCHMARK COMPLETE'
158
+ puts '=' * 70
data/exe/fastembed ADDED
@@ -0,0 +1,6 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ require 'fastembed/cli'
5
+
6
+ Fastembed::CLI.new(ARGV).run
data/fastembed.gemspec CHANGED
@@ -32,8 +32,11 @@ Gem::Specification.new do |spec|
32
32
  spec.add_dependency 'onnxruntime', '~> 0.9'
33
33
  spec.add_dependency 'tokenizers', '~> 0.5'
34
34
 
35
+ spec.add_development_dependency 'mini_magick', '~> 4.0'
35
36
  spec.add_development_dependency 'rake', '~> 13.0'
36
37
  spec.add_development_dependency 'rspec', '~> 3.0'
37
38
  spec.add_development_dependency 'rubocop', '~> 1.0'
38
39
  spec.add_development_dependency 'rubocop-rspec', '~> 3.0'
40
+ spec.add_development_dependency 'webmock', '~> 3.0'
41
+ spec.add_development_dependency 'yard', '~> 0.9'
39
42
  end