alt_text 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 74c7e9c622b606ca65db9350ee5a3f662f07c6c24c93b0989b6d7e64c2cb93e9
4
+ data.tar.gz: 12cf5e8099bd768c53334eb2a00a84d1c1f9bf3715032a04f8846f4cab7725f6
5
+ SHA512:
6
+ metadata.gz: 7d47cd62ff68eb4e9678e5d58af2d75e827e238990104e35ce93dd3eaf304b3334715dd89df1fd4ea82714f3050877d3410585dcb9d2bfd41d7761332239257c
7
+ data.tar.gz: ac4bc91f0ee83fae79bea43ed7bbc8d4020bc1596afd2ee61b4905634d7ddabb2caafebb1d0c138da46ff1161fae698ee3a06ccb556e7cc6c0cacd0154127e49
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2025 Penn State University Libraries
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,79 @@
1
+ ## Alt Text Generator
2
+
3
+ This uses Ruby's AWS SDK to send images and a prompt to an LLM in Amazon's Bedrock to generate Alt Text for the images.
4
+
5
+ ### Ruby Client Usage
6
+
7
+ This gem uses imagemagick to resize large images, so you will need to install imagemagick:
8
+
9
+ Mac:
10
+
11
+ ```
12
+ brew install imagemagick
13
+ ```
14
+
15
+ Ubuntu:
16
+
17
+ ```
18
+ apt-get update
19
+ apt-get install imagemagick
20
+ ```
21
+
22
+ Then, add the gem to your project:
23
+
24
+ In the Gemfile:
25
+ ```
26
+ # Gemfile
27
+ gem 'alt_text'
28
+ ```
29
+ ```
30
+ bundle install
31
+ ```
32
+
33
+ Or, via `gem install`:
34
+
35
+ ```
36
+ gem install alt_text
37
+ ```
38
+
39
+ Instantiate the client with injected AWS credentials:
40
+
41
+ ```
42
+ client = AltText::Client.new {
43
+ access_key_id: ENV['YOUR_ACCESS_KEY_ID'],
44
+ secret_access_key: ENV['YOUR_SECRET_ACCESS_KEY'],
45
+ region: 'us-east-1'
46
+ }
47
+ ```
48
+
49
+ Call the `#process_image` method with the image path, prompt, and LLM ID as arguments:
50
+
51
+ ```
52
+ client.process_image('folder/image.png', 'Please generate alt text', 'sonnet3.51`)
53
+ ```
54
+
55
+ *Note: A sample prompt can be found in `prompt.txt`.*
56
+
57
+ ### CLI Usage
58
+
59
+ Copy the `.env.sample` file to `.env` and add your AWS credentials.
60
+
61
+ ```
62
+ cp .env.sample .env
63
+ ```
64
+
65
+ General CLI command to generate Alt Text for images in the `images/` directory:
66
+
67
+ ```
68
+ bundle exec bin/alt_text \
69
+ -s output/output.txt \
70
+ -l default \
71
+ -d images \
72
+ -p prompt.txt
73
+ ```
74
+
75
+ Run this for help:
76
+
77
+ ```
78
+ bundle exec bin/alt_text -h
79
+ ```
data/bin/alt_text ADDED
@@ -0,0 +1,55 @@
1
+ #!/usr/bin/env ruby
2
+ require_relative "../lib/alt_text"
3
+ require "dotenv/load"
4
+ require "optparse"
5
+
6
+ # ---------- Defaults ----------
7
+ options = {
8
+ save_file: 'output/output.txt',
9
+ model: 'default',
10
+ prompt_file: 'prompt.txt',
11
+ folder: 'images/'
12
+ }
13
+
14
+ # ---------- CLI Parsing ----------
15
+ OptionParser.new do |opts|
16
+ opts.banner = "Usage: bundle exec bin/alt_text.rb [options]"
17
+
18
+ opts.on('-s FILE', '--save FILE', 'Save output file path') { |v| options[:save_file] = v }
19
+ opts.on('-l MODEL', '--llm MODEL', "Model name (#{AltText::LLMRegistry.available.join(', ')})") { |v| options[:model] = v }
20
+ opts.on('-d FOLDER', '--dir FOLDER', 'Folder to process') { |v| options[:folder] = v }
21
+ opts.on('-p FILE', '--prompt FILE', 'Prompt file path') { |v| options[:prompt_file] = v }
22
+ end.parse!
23
+
24
+ # ---------- Helpers ----------
25
+ def list_files_scandir(path='.', str_exclude='pdf')
26
+ Dir.glob(File.join(path, '**', '*'))
27
+ .select { |f| File.file?(f) && !f.downcase.end_with?(str_exclude) }
28
+ end
29
+
30
+ # ---------- Setup ----------
31
+ File.delete(options[:save_file]) if File.exist?(options[:save_file])
32
+ output_file = File.open(options[:save_file], 'w')
33
+ model_id = options[:model]
34
+ prompt_string = File.read(options[:prompt_file])
35
+ files = list_files_scandir(options[:folder])
36
+
37
+ Dotenv.load('.env')
38
+
39
+ client = AltText::Client.new(
40
+ access_key: ENV['AWS_ACCESS_KEY_ID'],
41
+ secret_key: ENV['AWS_SECRET_ACCESS_KEY'],
42
+ region: ENV['AWS_REGION']
43
+ )
44
+
45
+ # ---------- Process Images ----------
46
+ files.each_with_index do |file, index|
47
+ puts "Processing image #{index + 1} of #{files.size}: #{file}"
48
+ begin
49
+ output = client.process_image(file, prompt: prompt_string, model_id: model_id)
50
+ rescue StandardError => e
51
+ output = "Processing error: #{e.message}"
52
+ ensure
53
+ output_file.puts("#{file}: \t#{output}")
54
+ end
55
+ end
@@ -0,0 +1,61 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'aws-sdk-bedrockruntime'
4
+ require 'mini_magick'
5
+ require 'base64'
6
+
7
+ module AltText
8
+ class Client
9
+ def initialize(access_key:, secret_key:, region:)
10
+ @client = Aws::BedrockRuntime::Client.new(
11
+ access_key_id: access_key,
12
+ secret_access_key: secret_key,
13
+ region: region
14
+ )
15
+ end
16
+
17
+ def process_image(image_path, prompt:, model_id:)
18
+ model_id = AltText::LLMRegistry.resolve(model_id)
19
+ tmp_image = resize_if_needed(image_path)
20
+
21
+ encoded_image = Base64.strict_encode64(File.binread(tmp_image))
22
+ tmp_image.close! if tmp_image.is_a?(Tempfile)
23
+
24
+ payload = {
25
+ messages: [
26
+ { role: 'user',
27
+ content: [
28
+ { type: 'image',
29
+ source:
30
+ { type: 'base64',
31
+ media_type: 'image/jpeg',
32
+ data: encoded_image } },
33
+ { type: 'text',
34
+ text: prompt }
35
+ ] }
36
+ ],
37
+ max_tokens: 10_000,
38
+ anthropic_version: 'bedrock-2023-05-31'
39
+ }
40
+
41
+ response = @client.invoke_model(model_id: model_id,
42
+ content_type: 'application/json',
43
+ body: payload.to_json)
44
+ JSON.parse(response.body.read)['content'][0]['text']
45
+ end
46
+
47
+ private
48
+
49
+ def resize_if_needed(file)
50
+ if File.size(file) < 4_000_000
51
+ file
52
+ else
53
+ tmp = Tempfile.new("#{file}_tmp.jpg")
54
+ image = MiniMagick::Image.open(file)
55
+ image.resize '800x'
56
+ image.write tmp.path
57
+ tmp
58
+ end
59
+ end
60
+ end
61
+ end
@@ -0,0 +1,21 @@
1
+ # frozen_string_literal: true
2
+
3
+ module AltText
4
+ class LLMRegistry
5
+ LLM_MAP = {
6
+ 'default' => 'anthropic.claude-3-5-sonnet-20240620-v1:0',
7
+ 'sonnet3.51' => 'anthropic.claude-3-5-sonnet-20240620-v1:0',
8
+ 'sonnet3.52' => 'anthropic.claude-3-5-sonnet-20241022-v2:0',
9
+ 'sonnet3.571' => 'us.anthropic.claude-3-7-sonnet-20250219-v1:0',
10
+ 'novapro' => 'us.amazon.nova-pro-v1:0'
11
+ }.freeze
12
+
13
+ def self.resolve(key)
14
+ LLM_MAP[key] or raise ArgumentError, "Unsupported LLM: #{key}"
15
+ end
16
+
17
+ def self.available
18
+ LLM_MAP.keys
19
+ end
20
+ end
21
+ end
data/lib/alt_text.rb ADDED
@@ -0,0 +1,7 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'alt_text/client'
4
+ require_relative 'alt_text/llm_registry'
5
+
6
+ module AltText
7
+ end
data/prompt.txt ADDED
@@ -0,0 +1,22 @@
1
+ You are a professional technical writer trained in crafting image alternative text for accessibility purposes. Your task is to describe the visual content of uploaded images in clear, concise, and objective language that supports screen reader users. Describe the visual content of the image in one concise, objective sentence (≤100 characters).
2
+ When given an image, follow these guidelines to create appropriate and effective alternative text descriptions.
3
+ 1. Technical guidelines:
4
+ - Descriptions should not exceed 100 characters.
5
+ - Use precise, simple language and clear terminology.
6
+ - Use bias-free language: avoid assumptions about gender, ability, race, or age.
7
+ 2. Language guidelines:
8
+ - Do not use ambiguous adjectives (e.g., “tranquil,” “vintage,” “rural”).
9
+ - Do not use subjective adjectives (e.g., "traditional," "rustic")
10
+ - Avoid assumptions or guessing unclear elements.
11
+ - Do not include “image of,” “photo of,” or similar phrases.
12
+ - Focus only on the visible, essential elements in the image.
13
+ 3. Output guidelines:
14
+ - Assume one image per input and respond with one alt text string.
15
+ Please see the examples provided to help guide your description structure.
16
+ <examples>
17
+ "A lighthouse on a rocky coast under a cloudy sky."
18
+ "Three people walking along a path surrounded by green trees."
19
+ “A person wearing a beret and glasses eating from a bowl.”
20
+ </examples>
21
+ Generate the alt text description following these rules.
22
+
metadata ADDED
@@ -0,0 +1,89 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: alt_text
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Alex Kiessling
8
+ bindir: bin
9
+ cert_chain: []
10
+ date: 2025-09-15 00:00:00.000000000 Z
11
+ dependencies:
12
+ - !ruby/object:Gem::Dependency
13
+ name: aws-sdk-bedrockruntime
14
+ requirement: !ruby/object:Gem::Requirement
15
+ requirements:
16
+ - - "~>"
17
+ - !ruby/object:Gem::Version
18
+ version: 1.55.0
19
+ type: :runtime
20
+ prerelease: false
21
+ version_requirements: !ruby/object:Gem::Requirement
22
+ requirements:
23
+ - - "~>"
24
+ - !ruby/object:Gem::Version
25
+ version: 1.55.0
26
+ - !ruby/object:Gem::Dependency
27
+ name: dotenv
28
+ requirement: !ruby/object:Gem::Requirement
29
+ requirements:
30
+ - - "~>"
31
+ - !ruby/object:Gem::Version
32
+ version: 3.1.8
33
+ type: :runtime
34
+ prerelease: false
35
+ version_requirements: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - "~>"
38
+ - !ruby/object:Gem::Version
39
+ version: 3.1.8
40
+ - !ruby/object:Gem::Dependency
41
+ name: mini_magick
42
+ requirement: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - "~>"
45
+ - !ruby/object:Gem::Version
46
+ version: 5.3.0
47
+ type: :runtime
48
+ prerelease: false
49
+ version_requirements: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - "~>"
52
+ - !ruby/object:Gem::Version
53
+ version: 5.3.0
54
+ description: AltText helps with accessibility by generating alt text for images.
55
+ email:
56
+ - ajk5603@psu.edu
57
+ executables: []
58
+ extensions: []
59
+ extra_rdoc_files: []
60
+ files:
61
+ - LICENSE.txt
62
+ - README.md
63
+ - bin/alt_text
64
+ - lib/alt_text.rb
65
+ - lib/alt_text/client.rb
66
+ - lib/alt_text/llm_registry.rb
67
+ - prompt.txt
68
+ homepage: https://github.com/psu-libraries/alt_text
69
+ licenses:
70
+ - MIT
71
+ metadata: {}
72
+ rdoc_options: []
73
+ require_paths:
74
+ - lib
75
+ required_ruby_version: !ruby/object:Gem::Requirement
76
+ requirements:
77
+ - - ">="
78
+ - !ruby/object:Gem::Version
79
+ version: '3.4'
80
+ required_rubygems_version: !ruby/object:Gem::Requirement
81
+ requirements:
82
+ - - ">="
83
+ - !ruby/object:Gem::Version
84
+ version: '0'
85
+ requirements: []
86
+ rubygems_version: 3.6.2
87
+ specification_version: 4
88
+ summary: Generates alt text
89
+ test_files: []