feather-ai 0.2.0 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 31f571486891a197ef789e511c429a5433290f5755330eb8c87e9ebd5d2a1f25
4
- data.tar.gz: ea99ec7efd75ccab6ce2321cd56bf3e712b0d3a79bef4139b4828eb3e23efa1b
3
+ metadata.gz: 9f2195b84828584d6764f60823b99fbbbe9961c1c0ebee3d26679b6450a0dd53
4
+ data.tar.gz: '008c505376ab82635e2a8fdae0475a3adab609c5b9bef588f6a48573c238036c'
5
5
  SHA512:
6
- metadata.gz: 83000667059ba7fc6090b49029051675e54f6b56ccf5a99e39f5156cd36387467fb0ef01136d2fac5aba9de1be52941c312c721dcb8db760fe5a8fb125731ce1
7
- data.tar.gz: 0b378a24104f4463d487bc7a4523ff479c8e7cda4ef251fe61b88e75ff2936fde4ee9aec77cfb5677e3246a770a987961192199e2284e7f1dedf1e903ed824c2
6
+ metadata.gz: be14496f9c58080371192aa146f521178f563433e9bb28e4ecd720c3f802e4264e3a5efef7281813e6a1f18d2e03998eb69e4279dee5c156052879cb2510ed2b
7
+ data.tar.gz: 14f4968e7363d889e2d0590b675141ec4868385cd58138fbb4de4910acabe61385a14b079136ec0c10d93ba709b2a656de76e2e39bc50016b9faa98672134627
data/README.md CHANGED
@@ -1,5 +1,7 @@
1
1
  # FeatherAi
2
2
 
3
+ [![Gem Version](https://badge.fury.io/rb/feather-ai.svg?icon=si%3Arubygems)](https://badge.fury.io/rb/feather-ai)
4
+
3
5
  A Ruby gem for identifying birds from photos and audio using [RubyLLM](https://github.com/coelacanth/ruby_llm). FeatherAi adds multi-modal identification, location-aware results, multi-model consensus, and a Rails integration on top of RubyLLM.
4
6
 
5
7
  ## Installation
@@ -3,7 +3,7 @@
3
3
  module FeatherAi
4
4
  # Configuration object for FeatherAi gem settings.
5
5
  class Configuration
6
- attr_accessor :provider, :model, :location, :consensus_models, :tips_model
6
+ attr_accessor :provider, :model, :location, :consensus_models, :tips_model, :media_resolution
7
7
 
8
8
  def initialize
9
9
  @provider = :anthropic
@@ -11,6 +11,7 @@ module FeatherAi
11
11
  @location = nil
12
12
  @consensus_models = %w[claude-sonnet-4 claude-haiku-4]
13
13
  @tips_model = "claude-haiku-4"
14
+ @media_resolution = :high
14
15
  end
15
16
 
16
17
  def initialize_copy(source)
@@ -5,6 +5,9 @@ module FeatherAi
5
5
  # rubocop:disable Metrics/ClassLength
6
6
  class Identifier
7
7
  SCHEMA = RubyLLM::Schema.create do
8
+ string :reasoning,
9
+ description: "Step-by-step visual analysis: describe body size, bill shape, " \
10
+ "plumage, markings, and rule out similar species before identifying"
8
11
  string :common_name, description: "Common name of the bird"
9
12
  string :species, description: "Scientific species name (Genus species)"
10
13
  string :family, description: "Bird family name"
@@ -22,44 +25,60 @@ module FeatherAi
22
25
  @config = config
23
26
  end
24
27
 
28
+ # @param image [String, Array<String>, nil] path(s) to image file(s)
29
+ # @param audio [String, nil] path to audio file
25
30
  def identify(image = nil, audio = nil, location: nil)
26
- validate_inputs!(image, audio)
31
+ images = normalize_images(image)
32
+ validate_inputs!(images, audio)
33
+ run_identification(images, audio, location || @config.location)
34
+ end
35
+
36
+ private
37
+
38
+ def normalize_images(image)
39
+ case image
40
+ when nil then []
41
+ when String then [image]
42
+ when Array then image
43
+ else raise ArgumentError, "image must be a String or Array<String>, got #{image.class}"
44
+ end
45
+ end
27
46
 
28
- effective_location = location || @config.location
29
- source = derive_source(image, audio)
30
- payload = instrumentation_payload(effective_location, image, audio)
47
+ def run_identification(images, audio, effective_location)
48
+ source = derive_source(images, audio)
49
+ payload = instrumentation_payload(effective_location, images, audio)
31
50
 
32
51
  Instrumentation.instrument("identify.feather_ai", payload) do
33
- response, duration_ms = perform_identification(image, audio, effective_location)
52
+ response, duration_ms = perform_identification(images, audio, effective_location)
34
53
  result = build_result(response, duration_ms, source)
35
54
  payload[:result] = result
36
55
  result
37
56
  end
38
57
  end
39
58
 
40
- private
41
-
42
- def validate_inputs!(image, audio)
43
- return unless image.nil? && audio.nil?
59
+ def validate_inputs!(images, audio)
60
+ return unless images.empty? && audio.nil?
44
61
 
45
62
  raise FeatherAi::ConfigurationError, "At least one of image or audio must be provided"
46
63
  end
47
64
 
48
- def instrumentation_payload(location, image, audio)
65
+ def instrumentation_payload(location, images, audio)
49
66
  {
50
67
  model: @config.model,
51
68
  location: location,
52
- has_image: !image.nil?,
69
+ has_image: images.any?,
70
+ image_count: images.size,
53
71
  has_audio: !audio.nil?
54
72
  }
55
73
  end
56
74
 
57
- def perform_identification(image, audio, location)
75
+ def perform_identification(images, audio, location)
58
76
  chat = configure_chat(location)
59
- message = build_message(image, audio)
77
+ prompt = build_text_prompt(images, audio)
78
+ attachments = images.any? ? images : nil
60
79
 
61
80
  start_ms = Process.clock_gettime(Process::CLOCK_MONOTONIC, :millisecond)
62
- response = chat.ask(message)
81
+ response = chat.ask(prompt, with: attachments)
63
82
  duration_ms = Process.clock_gettime(Process::CLOCK_MONOTONIC, :millisecond) - start_ms
64
83
 
65
84
  [response, duration_ms]
@@ -69,9 +88,19 @@ module FeatherAi
69
88
  chat = RubyLLM.chat(model: @config.model)
70
89
  chat.with_instructions(system_prompt(location))
71
90
  chat.with_schema(SCHEMA)
91
+ chat.with_params(**generation_params) if generation_params.any?
72
92
  chat
73
93
  end
74
94
 
95
+ def generation_params
96
+ params = {}
97
+ if @config.media_resolution
98
+ resolution = "MEDIA_RESOLUTION_#{@config.media_resolution.to_s.upcase}"
99
+ params[:generationConfig] = { mediaResolution: resolution }
100
+ end
101
+ params
102
+ end
103
+
75
104
  def build_result(response, duration_ms, source)
76
105
  parsed = response.content
77
106
  Result.new(
@@ -82,6 +111,7 @@ module FeatherAi
82
111
 
83
112
  def parsed_identification_attrs(parsed)
84
113
  {
114
+ reasoning: parsed["reasoning"],
85
115
  common_name: parsed["common_name"],
86
116
  species: parsed["species"],
87
117
  family: parsed["family"],
@@ -112,10 +142,10 @@ module FeatherAi
112
142
  }
113
143
  end
114
144
 
115
- def derive_source(image, audio)
116
- if image && audio
145
+ def derive_source(images, audio)
146
+ if images.any? && audio
117
147
  :multimodal
118
- elsif image
148
+ elsif images.any?
119
149
  :vision
120
150
  else
121
151
  :audio
@@ -134,26 +164,44 @@ module FeatherAi
134
164
  end
135
165
 
136
166
  def system_prompt(location)
137
- base = "You are an expert ornithologist. Identify the bird from the provided image and/or audio. " \
138
- "Return structured identification data."
167
+ base = base_system_prompt
139
168
  return base unless location
140
169
 
141
- "#{base} The observer is located in #{location} — prioritize species native to that region."
170
+ "#{base} The observer is located in #{location} — " \
171
+ "prioritise species native to that region and consider regional plumage variations."
142
172
  end
143
173
 
144
- def build_message(image, audio)
145
- parts = []
146
-
147
- parts << { type: :image, content: image } if image
174
+ def base_system_prompt
175
+ <<~PROMPT.gsub(/\s+/, " ").strip
176
+ You are an expert ornithologist specialising in field identification.
177
+ Before identifying the bird, carefully analyse key visual features:
178
+ body size and shape, bill shape and size, plumage colour and pattern,
179
+ eye colour, leg colour, tail shape, and any distinctive markings.
180
+ Consider common look-alikes and explain why this is not one of them.
181
+ Only then commit to your identification with structured data.
182
+ If the image is unclear or shows multiple species, identify the most
183
+ prominent bird and set confidence to low or medium accordingly.
184
+ PROMPT
185
+ end
148
186
 
187
+ def build_text_prompt(images, audio)
188
+ parts = []
149
189
  if audio
150
190
  transcript = RubyLLM.transcribe(audio)
151
- parts << { type: :text, content: "Bird call/song transcript: #{transcript}" }
191
+ parts << "Bird call/song transcript: #{transcript}"
152
192
  end
193
+ parts << identification_prompt(images.size, has_audio: !audio.nil?)
194
+ parts.join("\n")
195
+ end
153
196
 
154
- parts << { type: :text, content: "Identify the bird shown and/or heard above." }
155
-
156
- parts
197
+ def identification_prompt(image_count, has_audio:)
198
+ if image_count > 1 && has_audio
199
+ "Identify the bird shown in the provided images and heard in the audio. Use all inputs together."
200
+ elsif image_count > 1
201
+ "Identify the bird shown in the provided images. Use all images together to make your identification."
202
+ else
203
+ "Identify the bird shown and/or heard above."
204
+ end
157
205
  end
158
206
  end
159
207
  # rubocop:enable Metrics/ClassLength
@@ -3,7 +3,7 @@
3
3
  module FeatherAi
4
4
  # Immutable value object wrapping all identification output.
5
5
  class Result
6
- attr_reader :common_name, :species, :family, :confidence, :region_native, :candidates,
6
+ attr_reader :common_name, :species, :family, :confidence, :region_native, :reasoning, :candidates,
7
7
  :input_tokens, :output_tokens, :cost, :model_id, :duration_ms, :source,
8
8
  :consensus_models
9
9
 
@@ -42,6 +42,7 @@ module FeatherAi
42
42
  @family = attrs[:family]
43
43
  @confidence = attrs[:confidence]&.to_sym
44
44
  @region_native = attrs[:region_native]
45
+ @reasoning = attrs[:reasoning]
45
46
  @candidates = attrs[:candidates] || []
46
47
  end
47
48
 
@@ -63,6 +64,7 @@ module FeatherAi
63
64
 
64
65
  def identification_hash
65
66
  {
67
+ reasoning: @reasoning,
66
68
  common_name: @common_name,
67
69
  species: @species,
68
70
  family: @family,
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module FeatherAi
4
- VERSION = "0.2.0"
4
+ VERSION = "0.3.1"
5
5
  end
data/lib/feather_ai.rb CHANGED
@@ -30,6 +30,9 @@ module FeatherAi
30
30
  @configuration = nil
31
31
  end
32
32
 
33
+ # Identify a bird from image(s) and/or audio.
34
+ # @param image [String, Array<String>, nil] path(s) to image file(s)
35
+ # @param audio [String, nil] path to audio file
33
36
  def identify(image = nil, audio = nil, location: nil, consensus: false)
34
37
  if consensus
35
38
  Consensus.new.identify(image, audio, location: location)
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: feather-ai
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.3.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Brandyn Britton
8
8
  bindir: exe
9
9
  cert_chain: []
10
- date: 2026-03-18 00:00:00.000000000 Z
10
+ date: 1980-01-02 00:00:00.000000000 Z
11
11
  dependencies:
12
12
  - !ruby/object:Gem::Dependency
13
13
  name: ruby_llm
@@ -83,7 +83,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
83
83
  - !ruby/object:Gem::Version
84
84
  version: '0'
85
85
  requirements: []
86
- rubygems_version: 3.6.2
86
+ rubygems_version: 4.0.8
87
87
  specification_version: 4
88
88
  summary: Identify birds from photos and audio using LLMs
89
89
  test_files: []