alt_text 0.1.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a2ce46a5e7a525ed907991d2d50e35d8a2c002f73118e5502e7fe9305775d310
4
- data.tar.gz: 564f906bdcebd926b85429d174e2c4cae9c66b1349973d7ec905cf6c74354bed
3
+ metadata.gz: 3b0d1d1ac9b9c9191b85d04a377a5583fc4a1b2ee54907151346a8483ef4b588
4
+ data.tar.gz: 49bcb6cb50d85b2af6b2145b00746d0927ec14b45969265b58593a754e437c8f
5
5
  SHA512:
6
- metadata.gz: 70405efc438ae7ebb3b4ecf7cd4a9225256c44008c9dbed79958381ff48a13fc703cb73ba06c64387f053aa823139de47b18d12500b47c6c18211160d1691ac0
7
- data.tar.gz: 56c6673daa0095dacc08edec163ac82ec9bae3de884e77f263b6b68ecf60501d439a9f7e1a0f25d3156fc91d6d0e61911ea316affc80d580db7b1652f87501b9
6
+ metadata.gz: 961d27e13b64d05ba621303df804df9cf6ba8baea87a1220da968722e6122d1c9eda20db2a32fd537652dc2e1ce984f820f3c998737b07ba045dd649c0606e1d
7
+ data.tar.gz: 8a91c62cfb49af32a0dde25422ed7fc8e75e2562ce64dc21acec851d673314f4323113383033ad35ee5c708db21dcc0931bdade6106aedccf826beefe7aa2f1a
data/README.md CHANGED
@@ -2,6 +2,8 @@
2
2
 
3
3
  This uses Ruby's AWS SDK to send images and a prompt to an LLM in Amazon's Bedrock to generate Alt Text for the images.
4
4
 
5
+ The client uses Bedrock's `converse` API and currently supports JPEG and PNG inputs.
6
+
5
7
  ### Ruby Client Usage
6
8
 
7
9
  This gem uses imagemagick to resize large images, so you will need to install imagemagick:
@@ -39,19 +41,29 @@ gem install alt_text
39
41
  Instantiate the client with injected AWS credentials:
40
42
 
41
43
  ```
42
- client = AltText::Client.new {
43
- access_key_id: ENV['YOUR_ACCESS_KEY_ID'],
44
- secret_access_key: ENV['YOUR_SECRET_ACCESS_KEY'],
44
+ client = AltText::Client.new(
45
+ access_key: ENV['AWS_ACCESS_KEY_ID'],
46
+ secret_key: ENV['AWS_SECRET_ACCESS_KEY'],
45
47
  region: 'us-east-1'
46
- }
48
+ )
47
49
  ```
48
50
 
49
51
  Call the `#process_image` method with the image path, prompt, and LLM ID as arguments:
50
52
 
51
53
  ```
52
- client.process_image('folder/image.png', 'Please generate alt text', 'sonnet3.51`)
54
+ client.process_image(
55
+ 'folder/image.png',
56
+ prompt: 'Please generate alt text',
57
+ model_id: 'default'
58
+ )
53
59
  ```
54
60
 
61
+ Supported image types:
62
+
63
+ - `.jpg`
64
+ - `.jpeg`
65
+ - `.png`
66
+
55
67
  *Note: A sample prompt can be found in `prompt.txt`.*
56
68
 
57
69
  ### CLI Usage
@@ -2,7 +2,7 @@
2
2
 
3
3
  require 'aws-sdk-bedrockruntime'
4
4
  require 'mini_magick'
5
- require 'base64'
5
+ require 'marcel'
6
6
 
7
7
  module AltText
8
8
  class Client
@@ -16,36 +16,59 @@ module AltText
16
16
 
17
17
  def process_image(image_path, prompt:, model_id:)
18
18
  model_id = AltText::LLMRegistry.resolve(model_id)
19
+ image_format = image_format_for(image_path)
19
20
  tmp_image = resize_if_needed(image_path)
20
21
 
21
- encoded_image = Base64.strict_encode64(File.binread(tmp_image))
22
+ image_bytes = File.binread(tmp_image)
22
23
  tmp_image.close! if tmp_image.is_a?(Tempfile)
23
24
 
24
- payload = {
25
- messages: [
26
- { role: 'user',
27
- content: [
28
- { type: 'image',
29
- source:
30
- { type: 'base64',
31
- media_type: 'image/jpeg',
32
- data: encoded_image } },
33
- { type: 'text',
34
- text: prompt }
35
- ] }
36
- ],
37
- max_tokens: 10_000,
38
- anthropic_version: 'bedrock-2023-05-31'
39
- }
40
-
41
- response = @client.invoke_model(model_id: model_id,
42
- content_type: 'application/json',
43
- body: payload.to_json)
44
- JSON.parse(response.body.read)['content'][0]['text']
25
+ messages = [
26
+ {
27
+ role: 'user',
28
+ content: [
29
+ {
30
+ image: {
31
+ format: image_format,
32
+ source: {
33
+ bytes: image_bytes
34
+ }
35
+ }
36
+ },
37
+ {
38
+ text: prompt
39
+ }
40
+ ]
41
+ }
42
+ ]
43
+
44
+ # The `converse` method of the Bedrock Ruby SDK is used to interact with
45
+ # LLM models in a standardized way, using a "messages" schema that supports
46
+ # text, images, and tool calls. Unlike `invoke_model`, which requires
47
+ # model-specific payloads. Note that this prevents fine-grained control
48
+ # of image processing parameters that some models may support.
49
+ #
50
+ # Examples of supported models:
51
+ # - Amazon Nova Pro (supports text and images)
52
+ # - Amazon Nova Lite (supports text and images)
53
+ # - Anthropic Claude / Opus (supports text and images)
54
+ response = @client.converse(model_id: model_id,
55
+ messages: messages)
56
+
57
+ response.output.message.content.first.text
45
58
  end
46
59
 
47
60
  private
48
61
 
62
+ def image_format_for(path)
63
+ content_type = Marcel::MimeType.for(Pathname.new(path))
64
+ case content_type
65
+ when 'image/jpeg' then 'jpeg'
66
+ when 'image/png' then 'png'
67
+ else
68
+ raise ArgumentError, "Unsupported image type: #{content_type || 'unknown'}"
69
+ end
70
+ end
71
+
49
72
  def resize_if_needed(file)
50
73
  if File.size(file) < 4_000_000
51
74
  file
@@ -1,12 +1,14 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module AltText
4
+ # Bedrock model IDs evolve over time, so this mapping is intentionally
5
+ # centralized and easy to update as models are added, renamed, or retired.
6
+ # Entries here are expected to work with the Bedrock Ruby SDK `converse` API.
4
7
  class LLMRegistry
5
8
  LLM_MAP = {
6
- 'default' => 'anthropic.claude-3-5-sonnet-20240620-v1:0',
7
- 'sonnet3.51' => 'anthropic.claude-3-5-sonnet-20240620-v1:0',
8
- 'sonnet3.52' => 'anthropic.claude-3-5-sonnet-20241022-v2:0',
9
- 'sonnet3.571' => 'us.anthropic.claude-3-7-sonnet-20250219-v1:0',
9
+ 'default' => 'us.amazon.nova-pro-v1:0',
10
+ 'novalite' => 'amazon.nova-lite-v1:0',
11
+ 'sonnet4.5' => 'anthropic.claude-sonnet-4-5',
10
12
  'novapro' => 'us.amazon.nova-pro-v1:0'
11
13
  }.freeze
12
14
 
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module AltText
4
- VERSION = '0.1.1'
4
+ VERSION = '0.2.0'
5
5
  end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: alt_text
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Alex Kiessling
8
8
  bindir: bin
9
9
  cert_chain: []
10
- date: 2025-11-18 00:00:00.000000000 Z
10
+ date: 2026-03-11 00:00:00.000000000 Z
11
11
  dependencies:
12
12
  - !ruby/object:Gem::Dependency
13
13
  name: aws-sdk-bedrockruntime
@@ -37,6 +37,20 @@ dependencies:
37
37
  - - "~>"
38
38
  - !ruby/object:Gem::Version
39
39
  version: 3.1.8
40
+ - !ruby/object:Gem::Dependency
41
+ name: marcel
42
+ requirement: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - "~>"
45
+ - !ruby/object:Gem::Version
46
+ version: '1.1'
47
+ type: :runtime
48
+ prerelease: false
49
+ version_requirements: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - "~>"
52
+ - !ruby/object:Gem::Version
53
+ version: '1.1'
40
54
  - !ruby/object:Gem::Dependency
41
55
  name: mini_magick
42
56
  requirement: !ruby/object:Gem::Requirement