fine 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +20 -10
- data/docs/examples/image-classification-shapes.md +83 -0
- data/docs/examples/text-embeddings-faq.md +98 -0
- data/docs/quickstart.md +209 -0
- data/docs/tutorials/lora-tool-calling.md +306 -0
- data/examples/data/generate_tool_data.rb +261 -0
- data/examples/data/ollama_tool_calls.jsonl +40 -0
- data/examples/data/sentiment_reviews.jsonl +30 -0
- data/examples/data/shapes/circle/circle_1.jpg +0 -0
- data/examples/data/shapes/circle/circle_10.jpg +0 -0
- data/examples/data/shapes/circle/circle_2.jpg +0 -0
- data/examples/data/shapes/circle/circle_3.jpg +0 -0
- data/examples/data/shapes/circle/circle_4.jpg +0 -0
- data/examples/data/shapes/circle/circle_5.jpg +0 -0
- data/examples/data/shapes/circle/circle_6.jpg +0 -0
- data/examples/data/shapes/circle/circle_7.jpg +0 -0
- data/examples/data/shapes/circle/circle_8.jpg +0 -0
- data/examples/data/shapes/circle/circle_9.jpg +0 -0
- data/examples/data/shapes/square/square_1.jpg +0 -0
- data/examples/data/shapes/square/square_10.jpg +0 -0
- data/examples/data/shapes/square/square_2.jpg +0 -0
- data/examples/data/shapes/square/square_3.jpg +0 -0
- data/examples/data/shapes/square/square_4.jpg +0 -0
- data/examples/data/shapes/square/square_5.jpg +0 -0
- data/examples/data/shapes/square/square_6.jpg +0 -0
- data/examples/data/shapes/square/square_7.jpg +0 -0
- data/examples/data/shapes/square/square_8.jpg +0 -0
- data/examples/data/shapes/square/square_9.jpg +0 -0
- data/examples/data/shapes/triangle/triangle_1.jpg +0 -0
- data/examples/data/shapes/triangle/triangle_10.jpg +0 -0
- data/examples/data/shapes/triangle/triangle_2.jpg +0 -0
- data/examples/data/shapes/triangle/triangle_3.jpg +0 -0
- data/examples/data/shapes/triangle/triangle_4.jpg +0 -0
- data/examples/data/shapes/triangle/triangle_5.jpg +0 -0
- data/examples/data/shapes/triangle/triangle_6.jpg +0 -0
- data/examples/data/shapes/triangle/triangle_7.jpg +0 -0
- data/examples/data/shapes/triangle/triangle_8.jpg +0 -0
- data/examples/data/shapes/triangle/triangle_9.jpg +0 -0
- data/examples/data/support_faq_pairs.jsonl +30 -0
- data/examples/generate_shape_images.rb +94 -0
- data/examples/sentiment_classification.rb +87 -0
- data/examples/shape_classification.rb +87 -0
- data/examples/support_faq_embeddings.rb +105 -0
- data/examples/train_lora_tools.rb +218 -0
- data/lib/fine/configuration.rb +173 -15
- data/lib/fine/datasets/image_dataset.rb +14 -2
- data/lib/fine/datasets/instruction_dataset.rb +17 -2
- data/lib/fine/datasets/text_dataset.rb +15 -5
- data/lib/fine/hub/config_loader.rb +4 -4
- data/lib/fine/hub/safetensors_loader.rb +3 -2
- data/lib/fine/llm.rb +39 -10
- data/lib/fine/lora.rb +214 -0
- data/lib/fine/models/bert_encoder.rb +15 -6
- data/lib/fine/models/bert_for_sequence_classification.rb +35 -4
- data/lib/fine/models/causal_lm.rb +46 -5
- data/lib/fine/models/gemma3_decoder.rb +25 -6
- data/lib/fine/models/llama_decoder.rb +9 -8
- data/lib/fine/models/sentence_transformer.rb +1 -1
- data/lib/fine/tokenizers/auto_tokenizer.rb +15 -0
- data/lib/fine/training/text_trainer.rb +3 -1
- data/lib/fine/validators.rb +304 -0
- data/lib/fine/version.rb +1 -1
- data/lib/fine.rb +4 -0
- metadata +47 -2
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
{"text": "This product exceeded all my expectations! Build quality is excellent.", "label": "positive"}
|
|
2
|
+
{"text": "Absolutely terrible. Broke within a week of purchase.", "label": "negative"}
|
|
3
|
+
{"text": "Good value for money. Does exactly what it's supposed to do.", "label": "positive"}
|
|
4
|
+
{"text": "Don't waste your money on this. Cheaply made garbage.", "label": "negative"}
|
|
5
|
+
{"text": "Love it! Fast shipping and great customer service.", "label": "positive"}
|
|
6
|
+
{"text": "Arrived damaged and customer support was unhelpful.", "label": "negative"}
|
|
7
|
+
{"text": "Five stars! Best purchase I've made this year.", "label": "positive"}
|
|
8
|
+
{"text": "Returns are impossible. Stuck with a defective product.", "label": "negative"}
|
|
9
|
+
{"text": "Works perfectly. Would definitely recommend to friends.", "label": "positive"}
|
|
10
|
+
{"text": "Complete disappointment. Nothing like the photos.", "label": "negative"}
|
|
11
|
+
{"text": "Amazing quality at this price point. Very impressed!", "label": "positive"}
|
|
12
|
+
{"text": "Stopped working after two uses. Total scam.", "label": "negative"}
|
|
13
|
+
{"text": "Exactly as described. Happy with my purchase.", "label": "positive"}
|
|
14
|
+
{"text": "Worst product I've ever bought. Avoid at all costs.", "label": "negative"}
|
|
15
|
+
{"text": "Great design and functionality. Money well spent!", "label": "positive"}
|
|
16
|
+
{"text": "Flimsy and poorly constructed. Falls apart easily.", "label": "negative"}
|
|
17
|
+
{"text": "Pleasantly surprised by the quality. Ordering another one.", "label": "positive"}
|
|
18
|
+
{"text": "Misleading description. Not what I ordered.", "label": "negative"}
|
|
19
|
+
{"text": "Perfect gift! The recipient loved it.", "label": "positive"}
|
|
20
|
+
{"text": "Overpriced junk. Save your money for something else.", "label": "negative"}
|
|
21
|
+
{"text": "Solid construction and looks even better in person.", "label": "positive"}
|
|
22
|
+
{"text": "Took forever to arrive and was broken. Never again.", "label": "negative"}
|
|
23
|
+
{"text": "This is exactly what I needed. Works flawlessly.", "label": "positive"}
|
|
24
|
+
{"text": "Terrible quality control. Missing parts in the box.", "label": "negative"}
|
|
25
|
+
{"text": "Highly recommend! Exceeded my expectations in every way.", "label": "positive"}
|
|
26
|
+
{"text": "Waste of packaging. The product is unusable.", "label": "negative"}
|
|
27
|
+
{"text": "Beautiful design and excellent craftsmanship.", "label": "positive"}
|
|
28
|
+
{"text": "Looks nothing like the pictures. Very disappointed.", "label": "negative"}
|
|
29
|
+
{"text": "Best investment I've made. Worth every penny.", "label": "positive"}
|
|
30
|
+
{"text": "Cheap materials and terrible assembly. Don't buy.", "label": "negative"}
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
{"query": "How do I reset my password?", "positive": "To reset your password, click 'Forgot Password' on the login page and enter your email address."}
|
|
2
|
+
{"query": "I forgot my login credentials", "positive": "To reset your password, click 'Forgot Password' on the login page and enter your email address."}
|
|
3
|
+
{"query": "Can't remember my password", "positive": "To reset your password, click 'Forgot Password' on the login page and enter your email address."}
|
|
4
|
+
{"query": "How long does shipping take?", "positive": "Standard shipping takes 3-5 business days. Express shipping takes 1-2 business days."}
|
|
5
|
+
{"query": "When will my order arrive?", "positive": "Standard shipping takes 3-5 business days. Express shipping takes 1-2 business days."}
|
|
6
|
+
{"query": "Delivery time estimate", "positive": "Standard shipping takes 3-5 business days. Express shipping takes 1-2 business days."}
|
|
7
|
+
{"query": "How do I return an item?", "positive": "To initiate a return, go to your order history and click 'Request Return' within 30 days of delivery."}
|
|
8
|
+
{"query": "I want to send something back", "positive": "To initiate a return, go to your order history and click 'Request Return' within 30 days of delivery."}
|
|
9
|
+
{"query": "Return policy information", "positive": "To initiate a return, go to your order history and click 'Request Return' within 30 days of delivery."}
|
|
10
|
+
{"query": "Can I cancel my order?", "positive": "You can cancel your order within 1 hour of placing it. Go to your orders and click 'Cancel Order'."}
|
|
11
|
+
{"query": "I changed my mind about my purchase", "positive": "You can cancel your order within 1 hour of placing it. Go to your orders and click 'Cancel Order'."}
|
|
12
|
+
{"query": "Stop my order from shipping", "positive": "You can cancel your order within 1 hour of placing it. Go to your orders and click 'Cancel Order'."}
|
|
13
|
+
{"query": "What payment methods do you accept?", "positive": "We accept Visa, Mastercard, American Express, PayPal, and Apple Pay."}
|
|
14
|
+
{"query": "Can I pay with PayPal?", "positive": "We accept Visa, Mastercard, American Express, PayPal, and Apple Pay."}
|
|
15
|
+
{"query": "Do you take credit cards?", "positive": "We accept Visa, Mastercard, American Express, PayPal, and Apple Pay."}
|
|
16
|
+
{"query": "How do I track my package?", "positive": "Once shipped, you'll receive a tracking number via email. Click the link to see real-time updates."}
|
|
17
|
+
{"query": "Where is my order right now?", "positive": "Once shipped, you'll receive a tracking number via email. Click the link to see real-time updates."}
|
|
18
|
+
{"query": "Package tracking information", "positive": "Once shipped, you'll receive a tracking number via email. Click the link to see real-time updates."}
|
|
19
|
+
{"query": "Do you ship internationally?", "positive": "Yes, we ship to over 50 countries. International shipping takes 7-14 business days."}
|
|
20
|
+
{"query": "Can you deliver to Europe?", "positive": "Yes, we ship to over 50 countries. International shipping takes 7-14 business days."}
|
|
21
|
+
{"query": "Overseas delivery available?", "positive": "Yes, we ship to over 50 countries. International shipping takes 7-14 business days."}
|
|
22
|
+
{"query": "How do I contact customer support?", "positive": "You can reach us via live chat, email at support@example.com, or phone at 1-800-555-0123."}
|
|
23
|
+
{"query": "I need help with an issue", "positive": "You can reach us via live chat, email at support@example.com, or phone at 1-800-555-0123."}
|
|
24
|
+
{"query": "Customer service phone number", "positive": "You can reach us via live chat, email at support@example.com, or phone at 1-800-555-0123."}
|
|
25
|
+
{"query": "How do I change my email address?", "positive": "Go to Account Settings, click 'Edit Profile', and update your email. You'll need to verify the new address."}
|
|
26
|
+
{"query": "Update my contact information", "positive": "Go to Account Settings, click 'Edit Profile', and update your email. You'll need to verify the new address."}
|
|
27
|
+
{"query": "Wrong email on my account", "positive": "Go to Account Settings, click 'Edit Profile', and update your email. You'll need to verify the new address."}
|
|
28
|
+
{"query": "Is my data secure?", "positive": "We use industry-standard encryption and never share your data with third parties without consent."}
|
|
29
|
+
{"query": "Privacy and security policy", "positive": "We use industry-standard encryption and never share your data with third parties without consent."}
|
|
30
|
+
{"query": "How do you protect my information?", "positive": "We use industry-standard encryption and never share your data with third parties without consent."}
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
# frozen_string_literal: true
|
|
3
|
+
|
|
4
|
+
# Generate synthetic shape images for classification testing
|
|
5
|
+
# Creates simple colored rectangles/patterns that are visually distinct
|
|
6
|
+
|
|
7
|
+
require "vips"
|
|
8
|
+
require "fileutils"
|
|
9
|
+
|
|
10
|
+
OUTPUT_DIR = File.expand_path("data/shapes", __dir__)
|
|
11
|
+
IMAGE_SIZE = 64
|
|
12
|
+
IMAGES_PER_CLASS = 10
|
|
13
|
+
|
|
14
|
+
def create_solid_image(path, main_color, bg_color, pattern_type)
|
|
15
|
+
# Create background
|
|
16
|
+
bg = Vips::Image.black(IMAGE_SIZE, IMAGE_SIZE, bands: 3)
|
|
17
|
+
bg = bg.new_from_image(bg_color).copy(interpretation: :srgb)
|
|
18
|
+
|
|
19
|
+
# Create a foreground overlay
|
|
20
|
+
fg = Vips::Image.black(IMAGE_SIZE, IMAGE_SIZE, bands: 3)
|
|
21
|
+
fg = fg.new_from_image(main_color).copy(interpretation: :srgb)
|
|
22
|
+
|
|
23
|
+
case pattern_type
|
|
24
|
+
when :circle
|
|
25
|
+
# Just use main color (circles will be red-ish dominant)
|
|
26
|
+
result = fg
|
|
27
|
+
when :square
|
|
28
|
+
# Use green-ish dominant with some background
|
|
29
|
+
result = fg
|
|
30
|
+
when :triangle
|
|
31
|
+
# Use blue-ish dominant
|
|
32
|
+
result = fg
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
result.write_to_file(path)
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
# Create directories
|
|
39
|
+
%w[circle square triangle].each do |shape|
|
|
40
|
+
FileUtils.mkdir_p(File.join(OUTPUT_DIR, shape))
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
# Random variations
|
|
44
|
+
rand = Random.new(42)
|
|
45
|
+
|
|
46
|
+
# Color palettes for each shape (to help with learning)
|
|
47
|
+
# Circles: red-ish, Squares: green-ish, Triangles: blue-ish
|
|
48
|
+
circle_colors = [
|
|
49
|
+
[220, 80, 80], [255, 100, 100], [180, 60, 60], [240, 120, 120],
|
|
50
|
+
[200, 70, 70], [230, 90, 90], [190, 50, 50], [250, 110, 110],
|
|
51
|
+
[210, 75, 75], [235, 95, 95]
|
|
52
|
+
]
|
|
53
|
+
|
|
54
|
+
square_colors = [
|
|
55
|
+
[80, 180, 80], [100, 220, 100], [60, 160, 60], [120, 200, 120],
|
|
56
|
+
[70, 190, 70], [90, 210, 90], [50, 170, 50], [110, 195, 110],
|
|
57
|
+
[75, 185, 75], [95, 205, 95]
|
|
58
|
+
]
|
|
59
|
+
|
|
60
|
+
triangle_colors = [
|
|
61
|
+
[80, 80, 220], [100, 100, 255], [60, 60, 180], [120, 120, 240],
|
|
62
|
+
[70, 70, 200], [90, 90, 230], [50, 50, 190], [110, 110, 250],
|
|
63
|
+
[75, 75, 210], [95, 95, 235]
|
|
64
|
+
]
|
|
65
|
+
|
|
66
|
+
bg_color = [240, 240, 240]
|
|
67
|
+
|
|
68
|
+
IMAGES_PER_CLASS.times do |i|
|
|
69
|
+
create_solid_image(
|
|
70
|
+
File.join(OUTPUT_DIR, "circle", "circle_#{i + 1}.jpg"),
|
|
71
|
+
circle_colors[i % circle_colors.size],
|
|
72
|
+
bg_color,
|
|
73
|
+
:circle
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
create_solid_image(
|
|
77
|
+
File.join(OUTPUT_DIR, "square", "square_#{i + 1}.jpg"),
|
|
78
|
+
square_colors[i % square_colors.size],
|
|
79
|
+
bg_color,
|
|
80
|
+
:square
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
create_solid_image(
|
|
84
|
+
File.join(OUTPUT_DIR, "triangle", "triangle_#{i + 1}.jpg"),
|
|
85
|
+
triangle_colors[i % triangle_colors.size],
|
|
86
|
+
bg_color,
|
|
87
|
+
:triangle
|
|
88
|
+
)
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
puts "Generated #{IMAGES_PER_CLASS * 3} images in #{OUTPUT_DIR}"
|
|
92
|
+
puts " - #{IMAGES_PER_CLASS} circles (red-ish)"
|
|
93
|
+
puts " - #{IMAGES_PER_CLASS} squares (green-ish)"
|
|
94
|
+
puts " - #{IMAGES_PER_CLASS} triangles (blue-ish)"
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
# frozen_string_literal: true
|
|
3
|
+
|
|
4
|
+
# Example: Fine-tune a sentiment classifier for product reviews
|
|
5
|
+
#
|
|
6
|
+
# This example demonstrates how to fine-tune DistilBERT for binary
|
|
7
|
+
# sentiment classification (positive/negative reviews).
|
|
8
|
+
#
|
|
9
|
+
# NOTE: Text classification is currently experimental. For best results:
|
|
10
|
+
# - Use larger datasets (100+ samples per class)
|
|
11
|
+
# - Train for more epochs (20+)
|
|
12
|
+
# - Consider using a pre-fine-tuned sentiment model
|
|
13
|
+
|
|
14
|
+
require "bundler/setup"
|
|
15
|
+
require "fine"
|
|
16
|
+
|
|
17
|
+
puts "=" * 60
|
|
18
|
+
puts "SENTIMENT CLASSIFICATION EXAMPLE"
|
|
19
|
+
puts "=" * 60
|
|
20
|
+
|
|
21
|
+
# Disable progress bar for cleaner output
|
|
22
|
+
Fine.configure { |c| c.progress_bar = false }
|
|
23
|
+
|
|
24
|
+
data_path = File.expand_path("data/sentiment_reviews.jsonl", __dir__)
|
|
25
|
+
save_path = "/tmp/sentiment-classifier"
|
|
26
|
+
|
|
27
|
+
puts "\n1. Creating classifier with distilbert-base-uncased..."
|
|
28
|
+
|
|
29
|
+
classifier = Fine::TextClassifier.new("distilbert-base-uncased") do |config|
|
|
30
|
+
config.epochs = 10
|
|
31
|
+
config.batch_size = 8
|
|
32
|
+
config.learning_rate = 5e-5 # Slightly higher for small dataset
|
|
33
|
+
config.max_length = 128
|
|
34
|
+
|
|
35
|
+
config.on_epoch_end do |epoch, metrics|
|
|
36
|
+
acc_str = metrics[:accuracy] ? ", acc=#{(metrics[:accuracy] * 100).round(1)}%" : ""
|
|
37
|
+
puts " Epoch #{epoch}: loss=#{metrics[:loss].round(4)}#{acc_str}"
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
puts "\n2. Fine-tuning on #{data_path}..."
|
|
42
|
+
puts " (#{File.readlines(data_path).count} examples)"
|
|
43
|
+
|
|
44
|
+
history = classifier.fit(train_file: data_path, epochs: 10)
|
|
45
|
+
|
|
46
|
+
puts "\n3. Training complete!"
|
|
47
|
+
puts " Initial loss: #{history.first[:loss].round(4)}"
|
|
48
|
+
puts " Final loss: #{history.last[:loss].round(4)}"
|
|
49
|
+
|
|
50
|
+
improvement = ((1 - history.last[:loss] / history.first[:loss]) * 100).round(1)
|
|
51
|
+
puts " Improvement: #{improvement}%"
|
|
52
|
+
|
|
53
|
+
puts "\n4. Testing predictions..."
|
|
54
|
+
|
|
55
|
+
test_samples = [
|
|
56
|
+
"This is the best product I've ever purchased! Amazing quality.",
|
|
57
|
+
"Terrible experience. Product arrived broken and support ignored me.",
|
|
58
|
+
"Decent product for the price. Does what it's supposed to do.",
|
|
59
|
+
"Complete waste of money. Returning immediately.",
|
|
60
|
+
"Exceeded my expectations! Will buy again."
|
|
61
|
+
]
|
|
62
|
+
|
|
63
|
+
test_samples.each do |text|
|
|
64
|
+
predictions = classifier.predict(text, top_k: 2)
|
|
65
|
+
top = predictions.first.first
|
|
66
|
+
puts " \"#{text[0, 50]}...\""
|
|
67
|
+
puts " => #{top[:label]} (#{(top[:score] * 100).round(1)}%)\n\n"
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
puts "5. Saving model to #{save_path}..."
|
|
71
|
+
classifier.save(save_path)
|
|
72
|
+
|
|
73
|
+
puts "\n6. Loading and verifying saved model..."
|
|
74
|
+
loaded = Fine::TextClassifier.load(save_path)
|
|
75
|
+
|
|
76
|
+
test_text = "Outstanding quality and fast shipping!"
|
|
77
|
+
original_pred = classifier.predict(test_text).first.first
|
|
78
|
+
loaded_pred = loaded.predict(test_text).first.first
|
|
79
|
+
|
|
80
|
+
puts " Original: #{original_pred[:label]} (#{original_pred[:score]})"
|
|
81
|
+
puts " Loaded: #{loaded_pred[:label]} (#{loaded_pred[:score]})"
|
|
82
|
+
|
|
83
|
+
puts "\n" + "=" * 60
|
|
84
|
+
puts "SENTIMENT CLASSIFICATION COMPLETE!"
|
|
85
|
+
puts "=" * 60
|
|
86
|
+
puts "\nModel saved to: #{save_path}"
|
|
87
|
+
puts "Load with: Fine::TextClassifier.load('#{save_path}')"
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
# frozen_string_literal: true
|
|
3
|
+
|
|
4
|
+
# Example: Fine-tune an image classifier for shape/color recognition
|
|
5
|
+
#
|
|
6
|
+
# This example demonstrates how to fine-tune SigLIP2 for classifying
|
|
7
|
+
# images by dominant color (which corresponds to different shapes).
|
|
8
|
+
|
|
9
|
+
require "bundler/setup"
|
|
10
|
+
require "fine"
|
|
11
|
+
|
|
12
|
+
puts "=" * 60
|
|
13
|
+
puts "SHAPE CLASSIFICATION EXAMPLE"
|
|
14
|
+
puts "=" * 60
|
|
15
|
+
|
|
16
|
+
Fine.configure { |c| c.progress_bar = false }
|
|
17
|
+
|
|
18
|
+
data_dir = File.expand_path("data/shapes", __dir__)
|
|
19
|
+
save_path = "/tmp/shape-classifier"
|
|
20
|
+
|
|
21
|
+
# Check if images exist
|
|
22
|
+
unless File.exist?(data_dir) && Dir.glob("#{data_dir}/*/*.jpg").any?
|
|
23
|
+
puts "Generating test images..."
|
|
24
|
+
require_relative "generate_shape_images"
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
puts "\n1. Creating classifier with SigLIP2..."
|
|
28
|
+
|
|
29
|
+
classifier = Fine::ImageClassifier.new("google/siglip2-base-patch16-224") do |config|
|
|
30
|
+
config.epochs = 5
|
|
31
|
+
config.batch_size = 4
|
|
32
|
+
config.learning_rate = 1e-4
|
|
33
|
+
config.image_size = 224
|
|
34
|
+
config.freeze_encoder = false # Fine-tune the whole model
|
|
35
|
+
|
|
36
|
+
config.on_epoch_end do |epoch, metrics|
|
|
37
|
+
puts " Epoch #{epoch}: loss=#{metrics[:loss].round(4)}"
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
# Count images
|
|
42
|
+
image_count = Dir.glob("#{data_dir}/*/*.jpg").count
|
|
43
|
+
class_count = Dir.glob("#{data_dir}/*").select { |f| File.directory?(f) }.count
|
|
44
|
+
|
|
45
|
+
puts "\n2. Fine-tuning on #{data_dir}..."
|
|
46
|
+
puts " (#{image_count} images across #{class_count} classes)"
|
|
47
|
+
puts " Classes: #{Dir.glob("#{data_dir}/*").select { |f| File.directory?(f) }.map { |f| File.basename(f) }.join(", ")}"
|
|
48
|
+
|
|
49
|
+
history = classifier.fit(train_dir: data_dir, epochs: 5)
|
|
50
|
+
|
|
51
|
+
puts "\n3. Training complete!"
|
|
52
|
+
puts " Initial loss: #{history.first[:loss].round(4)}"
|
|
53
|
+
puts " Final loss: #{history.last[:loss].round(4)}"
|
|
54
|
+
|
|
55
|
+
improvement = ((1 - history.last[:loss] / history.first[:loss]) * 100).round(1)
|
|
56
|
+
puts " Improvement: #{improvement}%"
|
|
57
|
+
|
|
58
|
+
puts "\n4. Testing predictions on training images..."
|
|
59
|
+
|
|
60
|
+
# Test on one image from each class
|
|
61
|
+
%w[circle square triangle].each do |shape|
|
|
62
|
+
test_image = Dir.glob("#{data_dir}/#{shape}/*.jpg").first
|
|
63
|
+
predictions = classifier.predict(test_image, top_k: 3)
|
|
64
|
+
top = predictions.first.first
|
|
65
|
+
|
|
66
|
+
puts " #{shape}_1.jpg => #{top[:label]} (#{(top[:score] * 100).round(1)}%)"
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
puts "\n5. Saving model to #{save_path}..."
|
|
70
|
+
classifier.save(save_path)
|
|
71
|
+
|
|
72
|
+
puts "\n6. Loading and verifying saved model..."
|
|
73
|
+
loaded = Fine::ImageClassifier.load(save_path)
|
|
74
|
+
|
|
75
|
+
test_image = Dir.glob("#{data_dir}/circle/*.jpg").first
|
|
76
|
+
original_pred = classifier.predict(test_image).first.first
|
|
77
|
+
loaded_pred = loaded.predict(test_image).first.first
|
|
78
|
+
|
|
79
|
+
puts " Original: #{original_pred[:label]} (#{original_pred[:score].round(4)})"
|
|
80
|
+
puts " Loaded: #{loaded_pred[:label]} (#{loaded_pred[:score].round(4)})"
|
|
81
|
+
|
|
82
|
+
puts "\n" + "=" * 60
|
|
83
|
+
puts "SHAPE CLASSIFICATION COMPLETE!"
|
|
84
|
+
puts "=" * 60
|
|
85
|
+
puts "\nModel saved to: #{save_path}"
|
|
86
|
+
puts "Load with: Fine::ImageClassifier.load('#{save_path}')"
|
|
87
|
+
puts "Classes: #{classifier.class_names.join(", ")}"
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
# frozen_string_literal: true
|
|
3
|
+
|
|
4
|
+
# Example: Fine-tune text embeddings for customer support FAQ matching
|
|
5
|
+
#
|
|
6
|
+
# This example demonstrates how to fine-tune a sentence transformer model
|
|
7
|
+
# for semantic search in a customer support context.
|
|
8
|
+
|
|
9
|
+
require "bundler/setup"
|
|
10
|
+
require "fine"
|
|
11
|
+
|
|
12
|
+
puts "=" * 60
|
|
13
|
+
puts "SUPPORT FAQ EMBEDDINGS EXAMPLE"
|
|
14
|
+
puts "=" * 60
|
|
15
|
+
|
|
16
|
+
Fine.configure { |c| c.progress_bar = false }
|
|
17
|
+
|
|
18
|
+
data_path = File.expand_path("data/support_faq_pairs.jsonl", __dir__)
|
|
19
|
+
save_path = "/tmp/support-faq-embedder"
|
|
20
|
+
|
|
21
|
+
puts "\n1. Creating embedder with all-MiniLM-L6-v2..."
|
|
22
|
+
|
|
23
|
+
embedder = Fine::TextEmbedder.new("sentence-transformers/all-MiniLM-L6-v2") do |config|
|
|
24
|
+
config.epochs = 2
|
|
25
|
+
config.batch_size = 8
|
|
26
|
+
config.learning_rate = 2e-5
|
|
27
|
+
|
|
28
|
+
config.on_epoch_end do |epoch, metrics|
|
|
29
|
+
puts " Epoch #{epoch}: loss=#{metrics[:loss].round(4)}"
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
puts " Embedding dimension: #{embedder.embedding_dim}"
|
|
34
|
+
|
|
35
|
+
# Test pre-training similarity
|
|
36
|
+
puts "\n2. Testing pre-training similarity..."
|
|
37
|
+
test_query = "How can I get my money back?"
|
|
38
|
+
faq_answer = "To initiate a return, go to your order history and click 'Request Return' within 30 days of delivery."
|
|
39
|
+
|
|
40
|
+
pre_similarity = embedder.similarity(test_query, faq_answer)
|
|
41
|
+
puts " Query: \"#{test_query}\""
|
|
42
|
+
puts " FAQ: \"#{faq_answer[0, 60]}...\""
|
|
43
|
+
puts " Pre-training similarity: #{pre_similarity.round(4)}"
|
|
44
|
+
|
|
45
|
+
puts "\n3. Fine-tuning on #{data_path}..."
|
|
46
|
+
puts " (#{File.readlines(data_path).count} query-answer pairs)"
|
|
47
|
+
|
|
48
|
+
history = embedder.fit(train_file: data_path, epochs: 2)
|
|
49
|
+
|
|
50
|
+
puts "\n4. Training complete!"
|
|
51
|
+
puts " Initial loss: #{history.first[:loss].round(4)}"
|
|
52
|
+
puts " Final loss: #{history.last[:loss].round(4)}"
|
|
53
|
+
|
|
54
|
+
# Test post-training similarity
|
|
55
|
+
puts "\n5. Testing post-training similarity..."
|
|
56
|
+
post_similarity = embedder.similarity(test_query, faq_answer)
|
|
57
|
+
puts " Post-training similarity: #{post_similarity.round(4)}"
|
|
58
|
+
puts " Improvement: #{((post_similarity - pre_similarity) * 100).round(2)} percentage points"
|
|
59
|
+
|
|
60
|
+
# Semantic search demo
|
|
61
|
+
puts "\n6. Semantic search demo..."
|
|
62
|
+
|
|
63
|
+
faq_corpus = [
|
|
64
|
+
"To reset your password, click 'Forgot Password' on the login page and enter your email address.",
|
|
65
|
+
"Standard shipping takes 3-5 business days. Express shipping takes 1-2 business days.",
|
|
66
|
+
"To initiate a return, go to your order history and click 'Request Return' within 30 days of delivery.",
|
|
67
|
+
"You can cancel your order within 1 hour of placing it. Go to your orders and click 'Cancel Order'.",
|
|
68
|
+
"We accept Visa, Mastercard, American Express, PayPal, and Apple Pay.",
|
|
69
|
+
"Once shipped, you'll receive a tracking number via email. Click the link to see real-time updates.",
|
|
70
|
+
"Yes, we ship to over 50 countries. International shipping takes 7-14 business days.",
|
|
71
|
+
"You can reach us via live chat, email at support@example.com, or phone at 1-800-555-0123."
|
|
72
|
+
]
|
|
73
|
+
|
|
74
|
+
test_queries = [
|
|
75
|
+
"I need to get my money back for this purchase",
|
|
76
|
+
"What's the phone number for help?",
|
|
77
|
+
"Can you deliver to Germany?"
|
|
78
|
+
]
|
|
79
|
+
|
|
80
|
+
test_queries.each do |query|
|
|
81
|
+
results = embedder.search(query, faq_corpus, top_k: 2)
|
|
82
|
+
puts "\n Query: \"#{query}\""
|
|
83
|
+
results.each_with_index do |result, i|
|
|
84
|
+
puts " #{i + 1}. (#{result[:score].round(3)}) #{result[:text][0, 60]}..."
|
|
85
|
+
end
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
puts "\n7. Saving model to #{save_path}..."
|
|
89
|
+
embedder.save(save_path)
|
|
90
|
+
|
|
91
|
+
puts "\n8. Loading and verifying saved model..."
|
|
92
|
+
loaded = Fine::TextEmbedder.load(save_path)
|
|
93
|
+
|
|
94
|
+
original_emb = embedder.encode("test query")
|
|
95
|
+
loaded_emb = loaded.encode("test query")
|
|
96
|
+
|
|
97
|
+
# Check embeddings are the same
|
|
98
|
+
diff = original_emb.zip(loaded_emb).sum { |a, b| (a - b).abs }
|
|
99
|
+
puts " Embedding difference: #{diff.round(6)} (should be ~0)"
|
|
100
|
+
|
|
101
|
+
puts "\n" + "=" * 60
|
|
102
|
+
puts "SUPPORT FAQ EMBEDDINGS COMPLETE!"
|
|
103
|
+
puts "=" * 60
|
|
104
|
+
puts "\nModel saved to: #{save_path}"
|
|
105
|
+
puts "Load with: Fine::TextEmbedder.load('#{save_path}')"
|