feather-ai 0.2.0 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +2 -0
- data/lib/feather_ai/configuration.rb +2 -1
- data/lib/feather_ai/identifier.rb +76 -28
- data/lib/feather_ai/result.rb +3 -1
- data/lib/feather_ai/version.rb +1 -1
- data/lib/feather_ai.rb +3 -0
- metadata +3 -3
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 9f2195b84828584d6764f60823b99fbbbe9961c1c0ebee3d26679b6450a0dd53
|
|
4
|
+
data.tar.gz: '008c505376ab82635e2a8fdae0475a3adab609c5b9bef588f6a48573c238036c'
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: be14496f9c58080371192aa146f521178f563433e9bb28e4ecd720c3f802e4264e3a5efef7281813e6a1f18d2e03998eb69e4279dee5c156052879cb2510ed2b
|
|
7
|
+
data.tar.gz: 14f4968e7363d889e2d0590b675141ec4868385cd58138fbb4de4910acabe61385a14b079136ec0c10d93ba709b2a656de76e2e39bc50016b9faa98672134627
|
data/README.md
CHANGED
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
# FeatherAi
|
|
2
2
|
|
|
3
|
+
[](https://badge.fury.io/rb/feather-ai)
|
|
4
|
+
|
|
3
5
|
A Ruby gem for identifying birds from photos and audio using [RubyLLM](https://github.com/coelacanth/ruby_llm). FeatherAi adds multi-modal identification, location-aware results, multi-model consensus, and a Rails integration on top of RubyLLM.
|
|
4
6
|
|
|
5
7
|
## Installation
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
module FeatherAi
|
|
4
4
|
# Configuration object for FeatherAi gem settings.
|
|
5
5
|
class Configuration
|
|
6
|
-
attr_accessor :provider, :model, :location, :consensus_models, :tips_model
|
|
6
|
+
attr_accessor :provider, :model, :location, :consensus_models, :tips_model, :media_resolution
|
|
7
7
|
|
|
8
8
|
def initialize
|
|
9
9
|
@provider = :anthropic
|
|
@@ -11,6 +11,7 @@ module FeatherAi
|
|
|
11
11
|
@location = nil
|
|
12
12
|
@consensus_models = %w[claude-sonnet-4 claude-haiku-4]
|
|
13
13
|
@tips_model = "claude-haiku-4"
|
|
14
|
+
@media_resolution = :high
|
|
14
15
|
end
|
|
15
16
|
|
|
16
17
|
def initialize_copy(source)
|
|
@@ -5,6 +5,9 @@ module FeatherAi
|
|
|
5
5
|
# rubocop:disable Metrics/ClassLength
|
|
6
6
|
class Identifier
|
|
7
7
|
SCHEMA = RubyLLM::Schema.create do
|
|
8
|
+
string :reasoning,
|
|
9
|
+
description: "Step-by-step visual analysis: describe body size, bill shape, " \
|
|
10
|
+
"plumage, markings, and rule out similar species before identifying"
|
|
8
11
|
string :common_name, description: "Common name of the bird"
|
|
9
12
|
string :species, description: "Scientific species name (Genus species)"
|
|
10
13
|
string :family, description: "Bird family name"
|
|
@@ -22,44 +25,60 @@ module FeatherAi
|
|
|
22
25
|
@config = config
|
|
23
26
|
end
|
|
24
27
|
|
|
28
|
+
# @param image [String, Array<String>, nil] path(s) to image file(s)
|
|
29
|
+
# @param audio [String, nil] path to audio file
|
|
25
30
|
def identify(image = nil, audio = nil, location: nil)
|
|
26
|
-
|
|
31
|
+
images = normalize_images(image)
|
|
32
|
+
validate_inputs!(images, audio)
|
|
33
|
+
run_identification(images, audio, location || @config.location)
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
private
|
|
37
|
+
|
|
38
|
+
def normalize_images(image)
|
|
39
|
+
case image
|
|
40
|
+
when nil then []
|
|
41
|
+
when String then [image]
|
|
42
|
+
when Array then image
|
|
43
|
+
else raise ArgumentError, "image must be a String or Array<String>, got #{image.class}"
|
|
44
|
+
end
|
|
45
|
+
end
|
|
27
46
|
|
|
28
|
-
|
|
29
|
-
source = derive_source(
|
|
30
|
-
payload = instrumentation_payload(effective_location,
|
|
47
|
+
def run_identification(images, audio, effective_location)
|
|
48
|
+
source = derive_source(images, audio)
|
|
49
|
+
payload = instrumentation_payload(effective_location, images, audio)
|
|
31
50
|
|
|
32
51
|
Instrumentation.instrument("identify.feather_ai", payload) do
|
|
33
|
-
response, duration_ms = perform_identification(
|
|
52
|
+
response, duration_ms = perform_identification(images, audio, effective_location)
|
|
34
53
|
result = build_result(response, duration_ms, source)
|
|
35
54
|
payload[:result] = result
|
|
36
55
|
result
|
|
37
56
|
end
|
|
38
57
|
end
|
|
39
58
|
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
def validate_inputs!(image, audio)
|
|
43
|
-
return unless image.nil? && audio.nil?
|
|
59
|
+
def validate_inputs!(images, audio)
|
|
60
|
+
return unless images.empty? && audio.nil?
|
|
44
61
|
|
|
45
62
|
raise FeatherAi::ConfigurationError, "At least one of image or audio must be provided"
|
|
46
63
|
end
|
|
47
64
|
|
|
48
|
-
def instrumentation_payload(location,
|
|
65
|
+
def instrumentation_payload(location, images, audio)
|
|
49
66
|
{
|
|
50
67
|
model: @config.model,
|
|
51
68
|
location: location,
|
|
52
|
-
has_image:
|
|
69
|
+
has_image: images.any?,
|
|
70
|
+
image_count: images.size,
|
|
53
71
|
has_audio: !audio.nil?
|
|
54
72
|
}
|
|
55
73
|
end
|
|
56
74
|
|
|
57
|
-
def perform_identification(
|
|
75
|
+
def perform_identification(images, audio, location)
|
|
58
76
|
chat = configure_chat(location)
|
|
59
|
-
|
|
77
|
+
prompt = build_text_prompt(images, audio)
|
|
78
|
+
attachments = images.any? ? images : nil
|
|
60
79
|
|
|
61
80
|
start_ms = Process.clock_gettime(Process::CLOCK_MONOTONIC, :millisecond)
|
|
62
|
-
response = chat.ask(
|
|
81
|
+
response = chat.ask(prompt, with: attachments)
|
|
63
82
|
duration_ms = Process.clock_gettime(Process::CLOCK_MONOTONIC, :millisecond) - start_ms
|
|
64
83
|
|
|
65
84
|
[response, duration_ms]
|
|
@@ -69,9 +88,19 @@ module FeatherAi
|
|
|
69
88
|
chat = RubyLLM.chat(model: @config.model)
|
|
70
89
|
chat.with_instructions(system_prompt(location))
|
|
71
90
|
chat.with_schema(SCHEMA)
|
|
91
|
+
chat.with_params(**generation_params) if generation_params.any?
|
|
72
92
|
chat
|
|
73
93
|
end
|
|
74
94
|
|
|
95
|
+
def generation_params
|
|
96
|
+
params = {}
|
|
97
|
+
if @config.media_resolution
|
|
98
|
+
resolution = "MEDIA_RESOLUTION_#{@config.media_resolution.to_s.upcase}"
|
|
99
|
+
params[:generationConfig] = { mediaResolution: resolution }
|
|
100
|
+
end
|
|
101
|
+
params
|
|
102
|
+
end
|
|
103
|
+
|
|
75
104
|
def build_result(response, duration_ms, source)
|
|
76
105
|
parsed = response.content
|
|
77
106
|
Result.new(
|
|
@@ -82,6 +111,7 @@ module FeatherAi
|
|
|
82
111
|
|
|
83
112
|
def parsed_identification_attrs(parsed)
|
|
84
113
|
{
|
|
114
|
+
reasoning: parsed["reasoning"],
|
|
85
115
|
common_name: parsed["common_name"],
|
|
86
116
|
species: parsed["species"],
|
|
87
117
|
family: parsed["family"],
|
|
@@ -112,10 +142,10 @@ module FeatherAi
|
|
|
112
142
|
}
|
|
113
143
|
end
|
|
114
144
|
|
|
115
|
-
def derive_source(
|
|
116
|
-
if
|
|
145
|
+
def derive_source(images, audio)
|
|
146
|
+
if images.any? && audio
|
|
117
147
|
:multimodal
|
|
118
|
-
elsif
|
|
148
|
+
elsif images.any?
|
|
119
149
|
:vision
|
|
120
150
|
else
|
|
121
151
|
:audio
|
|
@@ -134,26 +164,44 @@ module FeatherAi
|
|
|
134
164
|
end
|
|
135
165
|
|
|
136
166
|
def system_prompt(location)
|
|
137
|
-
base =
|
|
138
|
-
"Return structured identification data."
|
|
167
|
+
base = base_system_prompt
|
|
139
168
|
return base unless location
|
|
140
169
|
|
|
141
|
-
"#{base} The observer is located in #{location} —
|
|
170
|
+
"#{base} The observer is located in #{location} — " \
|
|
171
|
+
"prioritise species native to that region and consider regional plumage variations."
|
|
142
172
|
end
|
|
143
173
|
|
|
144
|
-
def
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
174
|
+
def base_system_prompt
|
|
175
|
+
<<~PROMPT.gsub(/\s+/, " ").strip
|
|
176
|
+
You are an expert ornithologist specialising in field identification.
|
|
177
|
+
Before identifying the bird, carefully analyse key visual features:
|
|
178
|
+
body size and shape, bill shape and size, plumage colour and pattern,
|
|
179
|
+
eye colour, leg colour, tail shape, and any distinctive markings.
|
|
180
|
+
Consider common look-alikes and explain why this is not one of them.
|
|
181
|
+
Only then commit to your identification with structured data.
|
|
182
|
+
If the image is unclear or shows multiple species, identify the most
|
|
183
|
+
prominent bird and set confidence to low or medium accordingly.
|
|
184
|
+
PROMPT
|
|
185
|
+
end
|
|
148
186
|
|
|
187
|
+
def build_text_prompt(images, audio)
|
|
188
|
+
parts = []
|
|
149
189
|
if audio
|
|
150
190
|
transcript = RubyLLM.transcribe(audio)
|
|
151
|
-
parts <<
|
|
191
|
+
parts << "Bird call/song transcript: #{transcript}"
|
|
152
192
|
end
|
|
193
|
+
parts << identification_prompt(images.size, has_audio: !audio.nil?)
|
|
194
|
+
parts.join("\n")
|
|
195
|
+
end
|
|
153
196
|
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
197
|
+
def identification_prompt(image_count, has_audio:)
|
|
198
|
+
if image_count > 1 && has_audio
|
|
199
|
+
"Identify the bird shown in the provided images and heard in the audio. Use all inputs together."
|
|
200
|
+
elsif image_count > 1
|
|
201
|
+
"Identify the bird shown in the provided images. Use all images together to make your identification."
|
|
202
|
+
else
|
|
203
|
+
"Identify the bird shown and/or heard above."
|
|
204
|
+
end
|
|
157
205
|
end
|
|
158
206
|
end
|
|
159
207
|
# rubocop:enable Metrics/ClassLength
|
data/lib/feather_ai/result.rb
CHANGED
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
module FeatherAi
|
|
4
4
|
# Immutable value object wrapping all identification output.
|
|
5
5
|
class Result
|
|
6
|
-
attr_reader :common_name, :species, :family, :confidence, :region_native, :candidates,
|
|
6
|
+
attr_reader :common_name, :species, :family, :confidence, :region_native, :reasoning, :candidates,
|
|
7
7
|
:input_tokens, :output_tokens, :cost, :model_id, :duration_ms, :source,
|
|
8
8
|
:consensus_models
|
|
9
9
|
|
|
@@ -42,6 +42,7 @@ module FeatherAi
|
|
|
42
42
|
@family = attrs[:family]
|
|
43
43
|
@confidence = attrs[:confidence]&.to_sym
|
|
44
44
|
@region_native = attrs[:region_native]
|
|
45
|
+
@reasoning = attrs[:reasoning]
|
|
45
46
|
@candidates = attrs[:candidates] || []
|
|
46
47
|
end
|
|
47
48
|
|
|
@@ -63,6 +64,7 @@ module FeatherAi
|
|
|
63
64
|
|
|
64
65
|
def identification_hash
|
|
65
66
|
{
|
|
67
|
+
reasoning: @reasoning,
|
|
66
68
|
common_name: @common_name,
|
|
67
69
|
species: @species,
|
|
68
70
|
family: @family,
|
data/lib/feather_ai/version.rb
CHANGED
data/lib/feather_ai.rb
CHANGED
|
@@ -30,6 +30,9 @@ module FeatherAi
|
|
|
30
30
|
@configuration = nil
|
|
31
31
|
end
|
|
32
32
|
|
|
33
|
+
# Identify a bird from image(s) and/or audio.
|
|
34
|
+
# @param image [String, Array<String>, nil] path(s) to image file(s)
|
|
35
|
+
# @param audio [String, nil] path to audio file
|
|
33
36
|
def identify(image = nil, audio = nil, location: nil, consensus: false)
|
|
34
37
|
if consensus
|
|
35
38
|
Consensus.new.identify(image, audio, location: location)
|
metadata
CHANGED
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: feather-ai
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.3.1
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Brandyn Britton
|
|
8
8
|
bindir: exe
|
|
9
9
|
cert_chain: []
|
|
10
|
-
date:
|
|
10
|
+
date: 1980-01-02 00:00:00.000000000 Z
|
|
11
11
|
dependencies:
|
|
12
12
|
- !ruby/object:Gem::Dependency
|
|
13
13
|
name: ruby_llm
|
|
@@ -83,7 +83,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
83
83
|
- !ruby/object:Gem::Version
|
|
84
84
|
version: '0'
|
|
85
85
|
requirements: []
|
|
86
|
-
rubygems_version:
|
|
86
|
+
rubygems_version: 4.0.8
|
|
87
87
|
specification_version: 4
|
|
88
88
|
summary: Identify birds from photos and audio using LLMs
|
|
89
89
|
test_files: []
|