alt_text 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE.txt +21 -0
- data/README.md +79 -0
- data/bin/alt_text +55 -0
- data/lib/alt_text/client.rb +61 -0
- data/lib/alt_text/llm_registry.rb +21 -0
- data/lib/alt_text.rb +7 -0
- data/prompt.txt +22 -0
- metadata +89 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 74c7e9c622b606ca65db9350ee5a3f662f07c6c24c93b0989b6d7e64c2cb93e9
|
4
|
+
data.tar.gz: 12cf5e8099bd768c53334eb2a00a84d1c1f9bf3715032a04f8846f4cab7725f6
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 7d47cd62ff68eb4e9678e5d58af2d75e827e238990104e35ce93dd3eaf304b3334715dd89df1fd4ea82714f3050877d3410585dcb9d2bfd41d7761332239257c
|
7
|
+
data.tar.gz: ac4bc91f0ee83fae79bea43ed7bbc8d4020bc1596afd2ee61b4905634d7ddabb2caafebb1d0c138da46ff1161fae698ee3a06ccb556e7cc6c0cacd0154127e49
|
data/LICENSE.txt
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2025 Penn State University Libraries
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,79 @@
|
|
1
|
+
## Alt Text Generator
|
2
|
+
|
3
|
+
This uses Ruby's AWS SDK to send images and a prompt to an LLM in Amazon's Bedrock to generate Alt Text for the images.
|
4
|
+
|
5
|
+
### Ruby Client Usage
|
6
|
+
|
7
|
+
This gem uses imagemagick to resize large images, so you will need to install imagemagick:
|
8
|
+
|
9
|
+
Mac:
|
10
|
+
|
11
|
+
```
|
12
|
+
brew install imagemagick
|
13
|
+
```
|
14
|
+
|
15
|
+
Ubuntu:
|
16
|
+
|
17
|
+
```
|
18
|
+
apt-get update
|
19
|
+
apt-get install imagemagick
|
20
|
+
```
|
21
|
+
|
22
|
+
Then, add the gem to your project:
|
23
|
+
|
24
|
+
In the Gemfile:
|
25
|
+
```
|
26
|
+
# Gemfile
|
27
|
+
gem 'alt_text'
|
28
|
+
```
|
29
|
+
```
|
30
|
+
bundle install
|
31
|
+
```
|
32
|
+
|
33
|
+
Or, via `gem install`:
|
34
|
+
|
35
|
+
```
|
36
|
+
gem install alt_text
|
37
|
+
```
|
38
|
+
|
39
|
+
Instantiate the client with injected AWS credentials:
|
40
|
+
|
41
|
+
```
|
42
|
+
client = AltText::Client.new {
|
43
|
+
access_key_id: ENV['YOUR_ACCESS_KEY_ID'],
|
44
|
+
secret_access_key: ENV['YOUR_SECRET_ACCESS_KEY'],
|
45
|
+
region: 'us-east-1'
|
46
|
+
}
|
47
|
+
```
|
48
|
+
|
49
|
+
Call the `#process_image` method with the image path, prompt, and LLM ID as arguments:
|
50
|
+
|
51
|
+
```
|
52
|
+
client.process_image('folder/image.png', 'Please generate alt text', 'sonnet3.51`)
|
53
|
+
```
|
54
|
+
|
55
|
+
*Note: A sample prompt can be found in `prompt.txt`.*
|
56
|
+
|
57
|
+
### CLI Usage
|
58
|
+
|
59
|
+
Copy the `.env.sample` file to `.env` and add your AWS credentials.
|
60
|
+
|
61
|
+
```
|
62
|
+
cp .env.sample .env
|
63
|
+
```
|
64
|
+
|
65
|
+
General CLI command to generate Alt Text for images in the `images/` directory:
|
66
|
+
|
67
|
+
```
|
68
|
+
bundle exec bin/alt_text \
|
69
|
+
-s output/output.txt \
|
70
|
+
-l default \
|
71
|
+
-d images \
|
72
|
+
-p prompt.txt
|
73
|
+
```
|
74
|
+
|
75
|
+
Run this for help:
|
76
|
+
|
77
|
+
```
|
78
|
+
bundle exec bin/alt_text -h
|
79
|
+
```
|
data/bin/alt_text
ADDED
@@ -0,0 +1,55 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require_relative "../lib/alt_text"
|
3
|
+
require "dotenv/load"
|
4
|
+
require "optparse"
|
5
|
+
|
6
|
+
# ---------- Defaults ----------
|
7
|
+
options = {
|
8
|
+
save_file: 'output/output.txt',
|
9
|
+
model: 'default',
|
10
|
+
prompt_file: 'prompt.txt',
|
11
|
+
folder: 'images/'
|
12
|
+
}
|
13
|
+
|
14
|
+
# ---------- CLI Parsing ----------
|
15
|
+
OptionParser.new do |opts|
|
16
|
+
opts.banner = "Usage: bundle exec bin/alt_text.rb [options]"
|
17
|
+
|
18
|
+
opts.on('-s FILE', '--save FILE', 'Save output file path') { |v| options[:save_file] = v }
|
19
|
+
opts.on('-l MODEL', '--llm MODEL', "Model name (#{AltText::LLMRegistry.available.join(', ')})") { |v| options[:model] = v }
|
20
|
+
opts.on('-d FOLDER', '--dir FOLDER', 'Folder to process') { |v| options[:folder] = v }
|
21
|
+
opts.on('-p FILE', '--prompt FILE', 'Prompt file path') { |v| options[:prompt_file] = v }
|
22
|
+
end.parse!
|
23
|
+
|
24
|
+
# ---------- Helpers ----------
|
25
|
+
def list_files_scandir(path='.', str_exclude='pdf')
|
26
|
+
Dir.glob(File.join(path, '**', '*'))
|
27
|
+
.select { |f| File.file?(f) && !f.downcase.end_with?(str_exclude) }
|
28
|
+
end
|
29
|
+
|
30
|
+
# ---------- Setup ----------
|
31
|
+
File.delete(options[:save_file]) if File.exist?(options[:save_file])
|
32
|
+
output_file = File.open(options[:save_file], 'w')
|
33
|
+
model_id = options[:model]
|
34
|
+
prompt_string = File.read(options[:prompt_file])
|
35
|
+
files = list_files_scandir(options[:folder])
|
36
|
+
|
37
|
+
Dotenv.load('.env')
|
38
|
+
|
39
|
+
client = AltText::Client.new(
|
40
|
+
access_key: ENV['AWS_ACCESS_KEY_ID'],
|
41
|
+
secret_key: ENV['AWS_SECRET_ACCESS_KEY'],
|
42
|
+
region: ENV['AWS_REGION']
|
43
|
+
)
|
44
|
+
|
45
|
+
# ---------- Process Images ----------
|
46
|
+
files.each_with_index do |file, index|
|
47
|
+
puts "Processing image #{index + 1} of #{files.size}: #{file}"
|
48
|
+
begin
|
49
|
+
output = client.process_image(file, prompt: prompt_string, model_id: model_id)
|
50
|
+
rescue StandardError => e
|
51
|
+
output = "Processing error: #{e.message}"
|
52
|
+
ensure
|
53
|
+
output_file.puts("#{file}: \t#{output}")
|
54
|
+
end
|
55
|
+
end
|
@@ -0,0 +1,61 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'aws-sdk-bedrockruntime'
|
4
|
+
require 'mini_magick'
|
5
|
+
require 'base64'
|
6
|
+
|
7
|
+
module AltText
|
8
|
+
class Client
|
9
|
+
def initialize(access_key:, secret_key:, region:)
|
10
|
+
@client = Aws::BedrockRuntime::Client.new(
|
11
|
+
access_key_id: access_key,
|
12
|
+
secret_access_key: secret_key,
|
13
|
+
region: region
|
14
|
+
)
|
15
|
+
end
|
16
|
+
|
17
|
+
def process_image(image_path, prompt:, model_id:)
|
18
|
+
model_id = AltText::LLMRegistry.resolve(model_id)
|
19
|
+
tmp_image = resize_if_needed(image_path)
|
20
|
+
|
21
|
+
encoded_image = Base64.strict_encode64(File.binread(tmp_image))
|
22
|
+
tmp_image.close! if tmp_image.is_a?(Tempfile)
|
23
|
+
|
24
|
+
payload = {
|
25
|
+
messages: [
|
26
|
+
{ role: 'user',
|
27
|
+
content: [
|
28
|
+
{ type: 'image',
|
29
|
+
source:
|
30
|
+
{ type: 'base64',
|
31
|
+
media_type: 'image/jpeg',
|
32
|
+
data: encoded_image } },
|
33
|
+
{ type: 'text',
|
34
|
+
text: prompt }
|
35
|
+
] }
|
36
|
+
],
|
37
|
+
max_tokens: 10_000,
|
38
|
+
anthropic_version: 'bedrock-2023-05-31'
|
39
|
+
}
|
40
|
+
|
41
|
+
response = @client.invoke_model(model_id: model_id,
|
42
|
+
content_type: 'application/json',
|
43
|
+
body: payload.to_json)
|
44
|
+
JSON.parse(response.body.read)['content'][0]['text']
|
45
|
+
end
|
46
|
+
|
47
|
+
private
|
48
|
+
|
49
|
+
def resize_if_needed(file)
|
50
|
+
if File.size(file) < 4_000_000
|
51
|
+
file
|
52
|
+
else
|
53
|
+
tmp = Tempfile.new("#{file}_tmp.jpg")
|
54
|
+
image = MiniMagick::Image.open(file)
|
55
|
+
image.resize '800x'
|
56
|
+
image.write tmp.path
|
57
|
+
tmp
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module AltText
|
4
|
+
class LLMRegistry
|
5
|
+
LLM_MAP = {
|
6
|
+
'default' => 'anthropic.claude-3-5-sonnet-20240620-v1:0',
|
7
|
+
'sonnet3.51' => 'anthropic.claude-3-5-sonnet-20240620-v1:0',
|
8
|
+
'sonnet3.52' => 'anthropic.claude-3-5-sonnet-20241022-v2:0',
|
9
|
+
'sonnet3.571' => 'us.anthropic.claude-3-7-sonnet-20250219-v1:0',
|
10
|
+
'novapro' => 'us.amazon.nova-pro-v1:0'
|
11
|
+
}.freeze
|
12
|
+
|
13
|
+
def self.resolve(key)
|
14
|
+
LLM_MAP[key] or raise ArgumentError, "Unsupported LLM: #{key}"
|
15
|
+
end
|
16
|
+
|
17
|
+
def self.available
|
18
|
+
LLM_MAP.keys
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
data/lib/alt_text.rb
ADDED
data/prompt.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
You are a professional technical writer trained in crafting image alternative text for accessibility purposes. Your task is to describe the visual content of uploaded images in clear, concise, and objective language that supports screen reader users. Describe the visual content of the image in one concise, objective sentence (≤100 characters).
|
2
|
+
When given an image, follow these guidelines to create appropriate and effective alternative text descriptions.
|
3
|
+
1. Technical guidelines:
|
4
|
+
- Descriptions should not exceed 100 characters.
|
5
|
+
- Use precise, simple language and clear terminology.
|
6
|
+
- Use bias-free language: avoid assumptions about gender, ability, race, or age.
|
7
|
+
2. Language guidelines:
|
8
|
+
- Do not use ambiguous adjectives (e.g., “tranquil,” “vintage,” “rural”).
|
9
|
+
- Do not use subjective adjectives (e.g., "traditional," "rustic")
|
10
|
+
- Avoid assumptions or guessing unclear elements.
|
11
|
+
- Do not include “image of,” “photo of,” or similar phrases.
|
12
|
+
- Focus only on the visible, essential elements in the image.
|
13
|
+
3. Output guidelines:
|
14
|
+
- Assume one image per input and respond with one alt text string.
|
15
|
+
Please see the examples provided to help guide your description structure.
|
16
|
+
<examples>
|
17
|
+
"A lighthouse on a rocky coast under a cloudy sky."
|
18
|
+
"Three people walking along a path surrounded by green trees."
|
19
|
+
“A person wearing a beret and glasses eating from a bowl.”
|
20
|
+
</examples>
|
21
|
+
Generate the alt text description following these rules.
|
22
|
+
|
metadata
ADDED
@@ -0,0 +1,89 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: alt_text
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Alex Kiessling
|
8
|
+
bindir: bin
|
9
|
+
cert_chain: []
|
10
|
+
date: 2025-09-15 00:00:00.000000000 Z
|
11
|
+
dependencies:
|
12
|
+
- !ruby/object:Gem::Dependency
|
13
|
+
name: aws-sdk-bedrockruntime
|
14
|
+
requirement: !ruby/object:Gem::Requirement
|
15
|
+
requirements:
|
16
|
+
- - "~>"
|
17
|
+
- !ruby/object:Gem::Version
|
18
|
+
version: 1.55.0
|
19
|
+
type: :runtime
|
20
|
+
prerelease: false
|
21
|
+
version_requirements: !ruby/object:Gem::Requirement
|
22
|
+
requirements:
|
23
|
+
- - "~>"
|
24
|
+
- !ruby/object:Gem::Version
|
25
|
+
version: 1.55.0
|
26
|
+
- !ruby/object:Gem::Dependency
|
27
|
+
name: dotenv
|
28
|
+
requirement: !ruby/object:Gem::Requirement
|
29
|
+
requirements:
|
30
|
+
- - "~>"
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: 3.1.8
|
33
|
+
type: :runtime
|
34
|
+
prerelease: false
|
35
|
+
version_requirements: !ruby/object:Gem::Requirement
|
36
|
+
requirements:
|
37
|
+
- - "~>"
|
38
|
+
- !ruby/object:Gem::Version
|
39
|
+
version: 3.1.8
|
40
|
+
- !ruby/object:Gem::Dependency
|
41
|
+
name: mini_magick
|
42
|
+
requirement: !ruby/object:Gem::Requirement
|
43
|
+
requirements:
|
44
|
+
- - "~>"
|
45
|
+
- !ruby/object:Gem::Version
|
46
|
+
version: 5.3.0
|
47
|
+
type: :runtime
|
48
|
+
prerelease: false
|
49
|
+
version_requirements: !ruby/object:Gem::Requirement
|
50
|
+
requirements:
|
51
|
+
- - "~>"
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: 5.3.0
|
54
|
+
description: AltText helps with accessibility by generating alt text for images.
|
55
|
+
email:
|
56
|
+
- ajk5603@psu.edu
|
57
|
+
executables: []
|
58
|
+
extensions: []
|
59
|
+
extra_rdoc_files: []
|
60
|
+
files:
|
61
|
+
- LICENSE.txt
|
62
|
+
- README.md
|
63
|
+
- bin/alt_text
|
64
|
+
- lib/alt_text.rb
|
65
|
+
- lib/alt_text/client.rb
|
66
|
+
- lib/alt_text/llm_registry.rb
|
67
|
+
- prompt.txt
|
68
|
+
homepage: https://github.com/psu-libraries/alt_text
|
69
|
+
licenses:
|
70
|
+
- MIT
|
71
|
+
metadata: {}
|
72
|
+
rdoc_options: []
|
73
|
+
require_paths:
|
74
|
+
- lib
|
75
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
76
|
+
requirements:
|
77
|
+
- - ">="
|
78
|
+
- !ruby/object:Gem::Version
|
79
|
+
version: '3.4'
|
80
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
81
|
+
requirements:
|
82
|
+
- - ">="
|
83
|
+
- !ruby/object:Gem::Version
|
84
|
+
version: '0'
|
85
|
+
requirements: []
|
86
|
+
rubygems_version: 3.6.2
|
87
|
+
specification_version: 4
|
88
|
+
summary: Generates alt text
|
89
|
+
test_files: []
|