alt_text 0.1.1 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +17 -5
- data/lib/alt_text/client.rb +46 -23
- data/lib/alt_text/llm_registry.rb +6 -4
- data/lib/alt_text/version.rb +1 -1
- metadata +16 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 3b0d1d1ac9b9c9191b85d04a377a5583fc4a1b2ee54907151346a8483ef4b588
|
|
4
|
+
data.tar.gz: 49bcb6cb50d85b2af6b2145b00746d0927ec14b45969265b58593a754e437c8f
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 961d27e13b64d05ba621303df804df9cf6ba8baea87a1220da968722e6122d1c9eda20db2a32fd537652dc2e1ce984f820f3c998737b07ba045dd649c0606e1d
|
|
7
|
+
data.tar.gz: 8a91c62cfb49af32a0dde25422ed7fc8e75e2562ce64dc21acec851d673314f4323113383033ad35ee5c708db21dcc0931bdade6106aedccf826beefe7aa2f1a
|
data/README.md
CHANGED
|
@@ -2,6 +2,8 @@
|
|
|
2
2
|
|
|
3
3
|
This uses Ruby's AWS SDK to send images and a prompt to an LLM in Amazon's Bedrock to generate Alt Text for the images.
|
|
4
4
|
|
|
5
|
+
The client uses Bedrock's `converse` API and currently supports JPEG and PNG inputs.
|
|
6
|
+
|
|
5
7
|
### Ruby Client Usage
|
|
6
8
|
|
|
7
9
|
This gem uses imagemagick to resize large images, so you will need to install imagemagick:
|
|
@@ -39,19 +41,29 @@ gem install alt_text
|
|
|
39
41
|
Instantiate the client with injected AWS credentials:
|
|
40
42
|
|
|
41
43
|
```
|
|
42
|
-
client = AltText::Client.new
|
|
43
|
-
|
|
44
|
-
|
|
44
|
+
client = AltText::Client.new(
|
|
45
|
+
access_key: ENV['AWS_ACCESS_KEY_ID'],
|
|
46
|
+
secret_key: ENV['AWS_SECRET_ACCESS_KEY'],
|
|
45
47
|
region: 'us-east-1'
|
|
46
|
-
|
|
48
|
+
)
|
|
47
49
|
```
|
|
48
50
|
|
|
49
51
|
Call the `#process_image` method with the image path, prompt, and LLM ID as arguments:
|
|
50
52
|
|
|
51
53
|
```
|
|
52
|
-
client.process_image(
|
|
54
|
+
client.process_image(
|
|
55
|
+
'folder/image.png',
|
|
56
|
+
prompt: 'Please generate alt text',
|
|
57
|
+
model_id: 'default'
|
|
58
|
+
)
|
|
53
59
|
```
|
|
54
60
|
|
|
61
|
+
Supported image types:
|
|
62
|
+
|
|
63
|
+
- `.jpg`
|
|
64
|
+
- `.jpeg`
|
|
65
|
+
- `.png`
|
|
66
|
+
|
|
55
67
|
*Note: A sample prompt can be found in `prompt.txt`.*
|
|
56
68
|
|
|
57
69
|
### CLI Usage
|
data/lib/alt_text/client.rb
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
require 'aws-sdk-bedrockruntime'
|
|
4
4
|
require 'mini_magick'
|
|
5
|
-
require '
|
|
5
|
+
require 'marcel'
|
|
6
6
|
|
|
7
7
|
module AltText
|
|
8
8
|
class Client
|
|
@@ -16,36 +16,59 @@ module AltText
|
|
|
16
16
|
|
|
17
17
|
def process_image(image_path, prompt:, model_id:)
|
|
18
18
|
model_id = AltText::LLMRegistry.resolve(model_id)
|
|
19
|
+
image_format = image_format_for(image_path)
|
|
19
20
|
tmp_image = resize_if_needed(image_path)
|
|
20
21
|
|
|
21
|
-
|
|
22
|
+
image_bytes = File.binread(tmp_image)
|
|
22
23
|
tmp_image.close! if tmp_image.is_a?(Tempfile)
|
|
23
24
|
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
25
|
+
messages = [
|
|
26
|
+
{
|
|
27
|
+
role: 'user',
|
|
28
|
+
content: [
|
|
29
|
+
{
|
|
30
|
+
image: {
|
|
31
|
+
format: image_format,
|
|
32
|
+
source: {
|
|
33
|
+
bytes: image_bytes
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
},
|
|
37
|
+
{
|
|
38
|
+
text: prompt
|
|
39
|
+
}
|
|
40
|
+
]
|
|
41
|
+
}
|
|
42
|
+
]
|
|
43
|
+
|
|
44
|
+
# The `converse` method of the Bedrock Ruby SDK is used to interact with
|
|
45
|
+
# LLM models in a standardized way, using a "messages" schema that supports
|
|
46
|
+
# text, images, and tool calls. Unlike `invoke_model`, which requires
|
|
47
|
+
# model-specific payloads. Note that this prevents fine-grained control
|
|
48
|
+
# of image processing parameters that some models may support.
|
|
49
|
+
#
|
|
50
|
+
# Examples of supported models:
|
|
51
|
+
# - Amazon Nova Pro (supports text and images)
|
|
52
|
+
# - Amazon Nova Lite (supports text and images)
|
|
53
|
+
# - Anthropic Claude / Opus (supports text and images)
|
|
54
|
+
response = @client.converse(model_id: model_id,
|
|
55
|
+
messages: messages)
|
|
56
|
+
|
|
57
|
+
response.output.message.content.first.text
|
|
45
58
|
end
|
|
46
59
|
|
|
47
60
|
private
|
|
48
61
|
|
|
62
|
+
def image_format_for(path)
|
|
63
|
+
content_type = Marcel::MimeType.for(Pathname.new(path))
|
|
64
|
+
case content_type
|
|
65
|
+
when 'image/jpeg' then 'jpeg'
|
|
66
|
+
when 'image/png' then 'png'
|
|
67
|
+
else
|
|
68
|
+
raise ArgumentError, "Unsupported image type: #{content_type || 'unknown'}"
|
|
69
|
+
end
|
|
70
|
+
end
|
|
71
|
+
|
|
49
72
|
def resize_if_needed(file)
|
|
50
73
|
if File.size(file) < 4_000_000
|
|
51
74
|
file
|
|
@@ -1,12 +1,14 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
module AltText
|
|
4
|
+
# Bedrock model IDs evolve over time, so this mapping is intentionally
|
|
5
|
+
# centralized and easy to update as models are added, renamed, or retired.
|
|
6
|
+
# Entries here are expected to work with the Bedrock Ruby SDK `converse` API.
|
|
4
7
|
class LLMRegistry
|
|
5
8
|
LLM_MAP = {
|
|
6
|
-
'default' => '
|
|
7
|
-
'
|
|
8
|
-
'
|
|
9
|
-
'sonnet3.571' => 'us.anthropic.claude-3-7-sonnet-20250219-v1:0',
|
|
9
|
+
'default' => 'us.amazon.nova-pro-v1:0',
|
|
10
|
+
'novalite' => 'amazon.nova-lite-v1:0',
|
|
11
|
+
'sonnet4.5' => 'anthropic.claude-sonnet-4-5',
|
|
10
12
|
'novapro' => 'us.amazon.nova-pro-v1:0'
|
|
11
13
|
}.freeze
|
|
12
14
|
|
data/lib/alt_text/version.rb
CHANGED
metadata
CHANGED
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: alt_text
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.2.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Alex Kiessling
|
|
8
8
|
bindir: bin
|
|
9
9
|
cert_chain: []
|
|
10
|
-
date:
|
|
10
|
+
date: 2026-03-11 00:00:00.000000000 Z
|
|
11
11
|
dependencies:
|
|
12
12
|
- !ruby/object:Gem::Dependency
|
|
13
13
|
name: aws-sdk-bedrockruntime
|
|
@@ -37,6 +37,20 @@ dependencies:
|
|
|
37
37
|
- - "~>"
|
|
38
38
|
- !ruby/object:Gem::Version
|
|
39
39
|
version: 3.1.8
|
|
40
|
+
- !ruby/object:Gem::Dependency
|
|
41
|
+
name: marcel
|
|
42
|
+
requirement: !ruby/object:Gem::Requirement
|
|
43
|
+
requirements:
|
|
44
|
+
- - "~>"
|
|
45
|
+
- !ruby/object:Gem::Version
|
|
46
|
+
version: '1.1'
|
|
47
|
+
type: :runtime
|
|
48
|
+
prerelease: false
|
|
49
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
50
|
+
requirements:
|
|
51
|
+
- - "~>"
|
|
52
|
+
- !ruby/object:Gem::Version
|
|
53
|
+
version: '1.1'
|
|
40
54
|
- !ruby/object:Gem::Dependency
|
|
41
55
|
name: mini_magick
|
|
42
56
|
requirement: !ruby/object:Gem::Requirement
|