activestorage-ocr 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE +21 -0
- data/README.md +171 -0
- data/lib/activestorage/ocr/analyzer.rb +64 -0
- data/lib/activestorage/ocr/binary.rb +198 -0
- data/lib/activestorage/ocr/client.rb +174 -0
- data/lib/activestorage/ocr/configuration.rb +72 -0
- data/lib/activestorage/ocr/railtie.rb +77 -0
- data/lib/activestorage/ocr/result.rb +84 -0
- data/lib/activestorage/ocr/version.rb +7 -0
- data/lib/activestorage/ocr.rb +106 -0
- data/lib/activestorage-ocr.rb +3 -0
- metadata +139 -0
checksums.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
---
|
|
2
|
+
SHA256:
|
|
3
|
+
metadata.gz: '06842e85669ce35ba5e03695a92bf5acaccf8519cbfa57d8f4803664757bcccd'
|
|
4
|
+
data.tar.gz: d513b79b0691862d8f79df97a4fd3da02a61c56b5eacc027723a9a5515218efc
|
|
5
|
+
SHA512:
|
|
6
|
+
metadata.gz: 7bfd336fccd12c9dfa437671c0e318fedc5ead6bae106ddd91a85d05e72dc2d1efabc00d098614fce7a3791a12a94db6668110f65a32ea7894cf15646f668d7e
|
|
7
|
+
data.tar.gz: d32683900ac1e7136b5e8ef6b322acd75587dcd3c9a5aa62d08bddb5d45083137d42e19cdb02c6518c25f19cf9d704196c8dd86b0f8a2cbbd1fbd6f52c430bae
|
data/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2024
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
data/README.md
ADDED
|
@@ -0,0 +1,171 @@
|
|
|
1
|
+
# activestorage-ocr
|
|
2
|
+
|
|
3
|
+
[](https://github.com/Cause-of-a-Kind/activestorage-ocr/actions/workflows/ci.yml)
|
|
4
|
+
[](https://opensource.org/licenses/MIT)
|
|
5
|
+
|
|
6
|
+
OCR for Rails Active Storage attachments, powered by Rust and [ocrs](https://github.com/robertknight/ocrs).
|
|
7
|
+
|
|
8
|
+
## Overview
|
|
9
|
+
|
|
10
|
+
`activestorage-ocr` provides optical character recognition (OCR) for files stored with Active Storage. It uses a high-performance Rust server with the pure-Rust `ocrs` OCR engine, eliminating the need for third-party OCR services or system-level dependencies.
|
|
11
|
+
|
|
12
|
+
**Key Features:**
|
|
13
|
+
- **Pure Rust** - No Tesseract or system dependencies required
|
|
14
|
+
- **Self-contained** - Models download automatically on first run (~50MB)
|
|
15
|
+
- **Fast** - Processes images in ~150ms
|
|
16
|
+
- **HTTP/JSON API** - Easy to debug and integrate
|
|
17
|
+
|
|
18
|
+
**Supported Formats:**
|
|
19
|
+
- Images: PNG, JPEG, TIFF, WebP, GIF, BMP
|
|
20
|
+
- Documents: PDF (both embedded text and scanned/image PDFs)
|
|
21
|
+
|
|
22
|
+
**Architecture:** Separate process with HTTP/JSON communication (inspired by AnyCable)
|
|
23
|
+
- **Rust server** handles CPU-intensive OCR processing
|
|
24
|
+
- **Ruby gem** provides seamless Rails integration
|
|
25
|
+
- Simple HTTP/JSON protocol for easy debugging
|
|
26
|
+
|
|
27
|
+
## Requirements
|
|
28
|
+
|
|
29
|
+
- Ruby 3.2+
|
|
30
|
+
- Rails 7.0+ with Active Storage
|
|
31
|
+
- Rust (for building from source) or pre-built binaries from releases
|
|
32
|
+
|
|
33
|
+
## Installation
|
|
34
|
+
|
|
35
|
+
Add to your Gemfile:
|
|
36
|
+
|
|
37
|
+
```ruby
|
|
38
|
+
gem "activestorage-ocr"
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
Then install the OCR server binary:
|
|
42
|
+
|
|
43
|
+
```bash
|
|
44
|
+
bundle install
|
|
45
|
+
bundle exec rake activestorage_ocr:install
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
## Quick Start
|
|
49
|
+
|
|
50
|
+
1. **Start the OCR server:**
|
|
51
|
+
|
|
52
|
+
```bash
|
|
53
|
+
bundle exec rake activestorage_ocr:start
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
2. **Use the client in your Rails app:**
|
|
57
|
+
|
|
58
|
+
```ruby
|
|
59
|
+
# In rails console or your code
|
|
60
|
+
client = ActiveStorage::Ocr::Client.new
|
|
61
|
+
|
|
62
|
+
# Check server health
|
|
63
|
+
client.healthy? # => true
|
|
64
|
+
|
|
65
|
+
# Extract text from a file
|
|
66
|
+
result = client.extract_text_from_path("/path/to/image.png", content_type: "image/png")
|
|
67
|
+
result.text # => "Extracted text..."
|
|
68
|
+
result.confidence # => 0.95
|
|
69
|
+
|
|
70
|
+
# Extract text from an Active Storage attachment
|
|
71
|
+
result = client.extract_text(document.file)
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
## Configuration
|
|
75
|
+
|
|
76
|
+
```ruby
|
|
77
|
+
# config/initializers/activestorage_ocr.rb
|
|
78
|
+
ActiveStorage::Ocr.configure do |config|
|
|
79
|
+
config.server_host = "127.0.0.1"
|
|
80
|
+
config.server_port = 9292
|
|
81
|
+
config.timeout = 60
|
|
82
|
+
end
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
### Environment Variables
|
|
86
|
+
|
|
87
|
+
| Variable | Default | Description |
|
|
88
|
+
|----------|---------|-------------|
|
|
89
|
+
| `ACTIVESTORAGE_OCR_HOST` | `127.0.0.1` | Server host |
|
|
90
|
+
| `ACTIVESTORAGE_OCR_PORT` | `9292` | Server port |
|
|
91
|
+
|
|
92
|
+
## Rake Tasks
|
|
93
|
+
|
|
94
|
+
```bash
|
|
95
|
+
# Install the OCR server binary for your platform
|
|
96
|
+
bundle exec rake activestorage_ocr:install
|
|
97
|
+
|
|
98
|
+
# Start the OCR server
|
|
99
|
+
bundle exec rake activestorage_ocr:start
|
|
100
|
+
|
|
101
|
+
# Check server health
|
|
102
|
+
bundle exec rake activestorage_ocr:health
|
|
103
|
+
|
|
104
|
+
# Show binary info (platform, path, version)
|
|
105
|
+
bundle exec rake activestorage_ocr:info
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
## API Endpoints
|
|
109
|
+
|
|
110
|
+
The Rust server exposes these HTTP endpoints:
|
|
111
|
+
|
|
112
|
+
| Endpoint | Method | Description |
|
|
113
|
+
|----------|--------|-------------|
|
|
114
|
+
| `/health` | GET | Health check |
|
|
115
|
+
| `/info` | GET | Server info and supported formats |
|
|
116
|
+
| `/ocr` | POST | Extract text from uploaded file |
|
|
117
|
+
|
|
118
|
+
### Example with curl
|
|
119
|
+
|
|
120
|
+
```bash
|
|
121
|
+
# Health check
|
|
122
|
+
curl http://localhost:9292/health
|
|
123
|
+
|
|
124
|
+
# OCR an image
|
|
125
|
+
curl -X POST http://localhost:9292/ocr \
|
|
126
|
+
-F "file=@document.png;type=image/png"
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
## Development
|
|
130
|
+
|
|
131
|
+
### Building from source
|
|
132
|
+
|
|
133
|
+
```bash
|
|
134
|
+
# Build the Rust server
|
|
135
|
+
cd rust
|
|
136
|
+
cargo build --release
|
|
137
|
+
|
|
138
|
+
# The binary will be at rust/target/release/activestorage-ocr-server
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
### Running tests
|
|
142
|
+
|
|
143
|
+
```bash
|
|
144
|
+
# Ruby unit tests
|
|
145
|
+
bundle exec rake test
|
|
146
|
+
|
|
147
|
+
# Rust tests
|
|
148
|
+
cd rust && cargo test
|
|
149
|
+
|
|
150
|
+
# Integration tests (requires server running)
|
|
151
|
+
cd rust && ./target/release/activestorage-ocr-server &
|
|
152
|
+
cd test/sandbox && RAILS_ENV=test bin/rails test
|
|
153
|
+
```
|
|
154
|
+
|
|
155
|
+
### Code style
|
|
156
|
+
|
|
157
|
+
```bash
|
|
158
|
+
# Format Rust code
|
|
159
|
+
cd rust && cargo fmt
|
|
160
|
+
|
|
161
|
+
# Check for Rust warnings
|
|
162
|
+
cd rust && cargo clippy
|
|
163
|
+
```
|
|
164
|
+
|
|
165
|
+
## Contributing
|
|
166
|
+
|
|
167
|
+
See [CONTRIBUTING.md](CONTRIBUTING.md) for development setup and guidelines.
|
|
168
|
+
|
|
169
|
+
## License
|
|
170
|
+
|
|
171
|
+
MIT License - see [LICENSE](LICENSE) file.
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module ActiveStorage
|
|
4
|
+
module Ocr
|
|
5
|
+
# Active Storage analyzer for OCR processing.
|
|
6
|
+
#
|
|
7
|
+
# This analyzer integrates with Active Storage's analysis system to
|
|
8
|
+
# automatically extract text from uploaded images and PDFs.
|
|
9
|
+
#
|
|
10
|
+
# When registered with Active Storage, it will automatically process
|
|
11
|
+
# supported file types and store OCR results in the blob's metadata.
|
|
12
|
+
#
|
|
13
|
+
# == Metadata
|
|
14
|
+
#
|
|
15
|
+
# After analysis, blobs will have the following metadata:
|
|
16
|
+
# * +ocr_text+ - The extracted text
|
|
17
|
+
# * +ocr_confidence+ - Confidence score (0.0 to 1.0)
|
|
18
|
+
# * +ocr_processed_at+ - ISO 8601 timestamp
|
|
19
|
+
#
|
|
20
|
+
# == Example
|
|
21
|
+
#
|
|
22
|
+
# document.file.analyze
|
|
23
|
+
# document.file.metadata["ocr_text"] # => "Extracted text..."
|
|
24
|
+
#
|
|
25
|
+
class Analyzer < ActiveStorage::Analyzer
|
|
26
|
+
# Determines if this analyzer can process the blob.
|
|
27
|
+
#
|
|
28
|
+
# ==== Parameters
|
|
29
|
+
#
|
|
30
|
+
# * +blob+ - An ActiveStorage::Blob instance
|
|
31
|
+
#
|
|
32
|
+
# ==== Returns
|
|
33
|
+
#
|
|
34
|
+
# +true+ if the blob's content type is supported.
|
|
35
|
+
def self.accept?(blob)
|
|
36
|
+
ActiveStorage::Ocr.configuration.accept_content_type?(blob.content_type)
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
# Extracts OCR metadata from the blob.
|
|
40
|
+
#
|
|
41
|
+
# Called by Active Storage during analysis.
|
|
42
|
+
#
|
|
43
|
+
# ==== Returns
|
|
44
|
+
#
|
|
45
|
+
# A Hash containing OCR results, or an empty Hash if extraction fails.
|
|
46
|
+
def metadata
|
|
47
|
+
result = extract_text
|
|
48
|
+
return {} unless result&.success?
|
|
49
|
+
|
|
50
|
+
result.to_metadata
|
|
51
|
+
rescue Error => e
|
|
52
|
+
Rails.logger.error("[ActiveStorage::Ocr] OCR failed: #{e.message}") if defined?(Rails)
|
|
53
|
+
{}
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
private
|
|
57
|
+
|
|
58
|
+
# Performs the OCR extraction.
|
|
59
|
+
def extract_text
|
|
60
|
+
Client.new.extract_text(blob)
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
end
|
|
@@ -0,0 +1,198 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "fileutils"
|
|
4
|
+
require "net/http"
|
|
5
|
+
require "uri"
|
|
6
|
+
require "rubygems/package"
|
|
7
|
+
require "zlib"
|
|
8
|
+
|
|
9
|
+
module ActiveStorage
|
|
10
|
+
module Ocr
|
|
11
|
+
# Manages the OCR server binary.
|
|
12
|
+
#
|
|
13
|
+
# Handles downloading pre-built binaries from GitHub releases and
|
|
14
|
+
# detecting the appropriate platform.
|
|
15
|
+
#
|
|
16
|
+
# == Supported Platforms
|
|
17
|
+
#
|
|
18
|
+
# * darwin-x86_64 (macOS Intel)
|
|
19
|
+
# * darwin-aarch64 (macOS Apple Silicon)
|
|
20
|
+
# * linux-x86_64 (Linux x86_64)
|
|
21
|
+
# * linux-aarch64 (Linux ARM64)
|
|
22
|
+
#
|
|
23
|
+
# == Usage
|
|
24
|
+
#
|
|
25
|
+
# # Install the binary for the current platform
|
|
26
|
+
# ActiveStorage::Ocr::Binary.install!
|
|
27
|
+
#
|
|
28
|
+
# # Check if binary is installed
|
|
29
|
+
# ActiveStorage::Ocr::Binary.installed? # => true
|
|
30
|
+
#
|
|
31
|
+
# # Get the path to the binary
|
|
32
|
+
# ActiveStorage::Ocr::Binary.binary_path
|
|
33
|
+
#
|
|
34
|
+
class Binary
|
|
35
|
+
# GitHub repository for downloading releases.
|
|
36
|
+
GITHUB_REPO = "Cause-of-a-Kind/activestorage-ocr"
|
|
37
|
+
|
|
38
|
+
# Name of the server binary.
|
|
39
|
+
BINARY_NAME = "activestorage-ocr-server"
|
|
40
|
+
|
|
41
|
+
class << self
|
|
42
|
+
# Detects the current platform.
|
|
43
|
+
#
|
|
44
|
+
# ==== Returns
|
|
45
|
+
#
|
|
46
|
+
# A String like "darwin-x86_64" or "linux-aarch64".
|
|
47
|
+
#
|
|
48
|
+
# ==== Raises
|
|
49
|
+
#
|
|
50
|
+
# RuntimeError if the OS or architecture is unsupported.
|
|
51
|
+
def platform
|
|
52
|
+
os = case RbConfig::CONFIG["host_os"]
|
|
53
|
+
when /darwin/i then "darwin"
|
|
54
|
+
when /linux/i then "linux"
|
|
55
|
+
else
|
|
56
|
+
raise "Unsupported OS: #{RbConfig::CONFIG['host_os']}"
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
arch = case RbConfig::CONFIG["host_cpu"]
|
|
60
|
+
when /x86_64|amd64/i then "x86_64"
|
|
61
|
+
when /arm64|aarch64/i then "aarch64"
|
|
62
|
+
else
|
|
63
|
+
raise "Unsupported architecture: #{RbConfig::CONFIG['host_cpu']}"
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
"#{os}-#{arch}"
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
# Returns the path where the binary is installed.
|
|
70
|
+
#
|
|
71
|
+
# ==== Returns
|
|
72
|
+
#
|
|
73
|
+
# Absolute path to the binary.
|
|
74
|
+
def binary_path
|
|
75
|
+
@binary_path ||= File.join(install_dir, BINARY_NAME)
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
# Returns the installation directory.
|
|
79
|
+
#
|
|
80
|
+
# Creates the directory if it doesn't exist.
|
|
81
|
+
def install_dir
|
|
82
|
+
@install_dir ||= begin
|
|
83
|
+
dir = File.join(gem_root, "bin")
|
|
84
|
+
FileUtils.mkdir_p(dir)
|
|
85
|
+
dir
|
|
86
|
+
end
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
# Returns the gem's root directory.
|
|
90
|
+
def gem_root
|
|
91
|
+
@gem_root ||= File.expand_path("../../../..", __FILE__)
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
# Checks if the binary is installed and executable.
|
|
95
|
+
#
|
|
96
|
+
# ==== Returns
|
|
97
|
+
#
|
|
98
|
+
# +true+ if the binary exists and is executable.
|
|
99
|
+
def installed?
|
|
100
|
+
File.executable?(binary_path)
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
# Returns the gem version.
|
|
104
|
+
#
|
|
105
|
+
# Used to determine which release to download.
|
|
106
|
+
def version
|
|
107
|
+
ActiveStorage::Ocr::VERSION
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
# Returns the download URL for the current platform.
|
|
111
|
+
#
|
|
112
|
+
# ==== Returns
|
|
113
|
+
#
|
|
114
|
+
# GitHub releases URL for the platform-specific tarball.
|
|
115
|
+
def download_url
|
|
116
|
+
tag = "v#{version}"
|
|
117
|
+
filename = "activestorage-ocr-server-#{platform}.tar.gz"
|
|
118
|
+
"https://github.com/#{GITHUB_REPO}/releases/download/#{tag}/#{filename}"
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
# Downloads and installs the binary.
|
|
122
|
+
#
|
|
123
|
+
# Downloads from GitHub releases and extracts to the gem's bin directory.
|
|
124
|
+
#
|
|
125
|
+
# ==== Parameters
|
|
126
|
+
#
|
|
127
|
+
# * +force+ - If true, reinstalls even if already installed
|
|
128
|
+
#
|
|
129
|
+
# ==== Returns
|
|
130
|
+
#
|
|
131
|
+
# Path to the installed binary.
|
|
132
|
+
#
|
|
133
|
+
# ==== Raises
|
|
134
|
+
#
|
|
135
|
+
# RuntimeError if the download fails.
|
|
136
|
+
def install!(force: false)
|
|
137
|
+
return binary_path if installed? && !force
|
|
138
|
+
|
|
139
|
+
puts "Downloading activestorage-ocr-server for #{platform}..."
|
|
140
|
+
|
|
141
|
+
uri = URI(download_url)
|
|
142
|
+
response = fetch_with_redirects(uri)
|
|
143
|
+
|
|
144
|
+
unless response.is_a?(Net::HTTPSuccess)
|
|
145
|
+
raise "Failed to download binary: #{response.code} #{response.message}\n" \
|
|
146
|
+
"URL: #{download_url}\n" \
|
|
147
|
+
"You may need to build from source: cd rust && cargo build --release"
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
extract_binary(response.body)
|
|
151
|
+
puts "Installed to #{binary_path}"
|
|
152
|
+
binary_path
|
|
153
|
+
end
|
|
154
|
+
|
|
155
|
+
private
|
|
156
|
+
|
|
157
|
+
# Fetches a URL, following redirects.
|
|
158
|
+
def fetch_with_redirects(uri, limit = 10)
|
|
159
|
+
raise "Too many redirects" if limit == 0
|
|
160
|
+
|
|
161
|
+
http = Net::HTTP.new(uri.host, uri.port)
|
|
162
|
+
http.use_ssl = uri.scheme == "https"
|
|
163
|
+
|
|
164
|
+
request = Net::HTTP::Get.new(uri)
|
|
165
|
+
response = http.request(request)
|
|
166
|
+
|
|
167
|
+
case response
|
|
168
|
+
when Net::HTTPRedirection
|
|
169
|
+
location = URI(response["location"])
|
|
170
|
+
fetch_with_redirects(location, limit - 1)
|
|
171
|
+
else
|
|
172
|
+
response
|
|
173
|
+
end
|
|
174
|
+
end
|
|
175
|
+
|
|
176
|
+
# Extracts the binary from a gzipped tarball.
|
|
177
|
+
def extract_binary(tarball_data)
|
|
178
|
+
gz = Zlib::GzipReader.new(StringIO.new(tarball_data))
|
|
179
|
+
tar = Gem::Package::TarReader.new(gz)
|
|
180
|
+
|
|
181
|
+
tar.each do |entry|
|
|
182
|
+
next unless entry.file? && entry.full_name == BINARY_NAME
|
|
183
|
+
|
|
184
|
+
File.open(binary_path, "wb") do |f|
|
|
185
|
+
f.write(entry.read)
|
|
186
|
+
end
|
|
187
|
+
File.chmod(0o755, binary_path)
|
|
188
|
+
return
|
|
189
|
+
end
|
|
190
|
+
|
|
191
|
+
raise "Binary not found in tarball"
|
|
192
|
+
ensure
|
|
193
|
+
gz&.close
|
|
194
|
+
end
|
|
195
|
+
end
|
|
196
|
+
end
|
|
197
|
+
end
|
|
198
|
+
end
|
|
@@ -0,0 +1,174 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module ActiveStorage
|
|
4
|
+
module Ocr
|
|
5
|
+
# HTTP client for communicating with the OCR server.
|
|
6
|
+
#
|
|
7
|
+
# The Client handles all communication with the Rust OCR server,
|
|
8
|
+
# including file uploads and response parsing.
|
|
9
|
+
#
|
|
10
|
+
# == Basic Usage
|
|
11
|
+
#
|
|
12
|
+
# client = ActiveStorage::Ocr::Client.new
|
|
13
|
+
#
|
|
14
|
+
# # Extract text from an Active Storage blob
|
|
15
|
+
# result = client.extract_text(document.file)
|
|
16
|
+
#
|
|
17
|
+
# # Extract text from a file path
|
|
18
|
+
# result = client.extract_text_from_path("/path/to/image.png")
|
|
19
|
+
#
|
|
20
|
+
# # Check server health
|
|
21
|
+
# client.healthy? # => true
|
|
22
|
+
#
|
|
23
|
+
class Client
|
|
24
|
+
# Creates a new Client.
|
|
25
|
+
#
|
|
26
|
+
# ==== Parameters
|
|
27
|
+
#
|
|
28
|
+
# * +config+ - Configuration object (defaults to global configuration)
|
|
29
|
+
def initialize(config: ActiveStorage::Ocr.configuration)
|
|
30
|
+
@config = config
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
# Extracts text from an Active Storage blob.
|
|
34
|
+
#
|
|
35
|
+
# Opens the blob temporarily and sends it to the OCR server.
|
|
36
|
+
#
|
|
37
|
+
# ==== Parameters
|
|
38
|
+
#
|
|
39
|
+
# * +blob+ - An ActiveStorage::Blob instance
|
|
40
|
+
#
|
|
41
|
+
# ==== Returns
|
|
42
|
+
#
|
|
43
|
+
# A Result object with extracted text and metadata.
|
|
44
|
+
#
|
|
45
|
+
# ==== Raises
|
|
46
|
+
#
|
|
47
|
+
# * ConnectionError - if the server is unreachable
|
|
48
|
+
# * ServerError - if the server returns an error
|
|
49
|
+
def extract_text(blob)
|
|
50
|
+
blob.open do |file|
|
|
51
|
+
extract_text_from_file(file, blob.content_type, blob.filename.to_s)
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
# Extracts text from a file path.
|
|
56
|
+
#
|
|
57
|
+
# ==== Parameters
|
|
58
|
+
#
|
|
59
|
+
# * +path+ - Path to the file
|
|
60
|
+
# * +content_type+ - MIME type (auto-detected if not provided)
|
|
61
|
+
# * +filename+ - Filename to send (defaults to basename of path)
|
|
62
|
+
#
|
|
63
|
+
# ==== Returns
|
|
64
|
+
#
|
|
65
|
+
# A Result object with extracted text and metadata.
|
|
66
|
+
#
|
|
67
|
+
# ==== Raises
|
|
68
|
+
#
|
|
69
|
+
# * ConnectionError - if the server is unreachable
|
|
70
|
+
# * ServerError - if the server returns an error
|
|
71
|
+
def extract_text_from_path(path, content_type: nil, filename: nil)
|
|
72
|
+
content_type ||= Marcel::MimeType.for(Pathname.new(path))
|
|
73
|
+
filename ||= File.basename(path)
|
|
74
|
+
|
|
75
|
+
File.open(path, "rb") do |file|
|
|
76
|
+
extract_text_from_file(file, content_type, filename)
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
# Extracts text from an IO object.
|
|
81
|
+
#
|
|
82
|
+
# This is the low-level method that performs the actual HTTP request.
|
|
83
|
+
#
|
|
84
|
+
# ==== Parameters
|
|
85
|
+
#
|
|
86
|
+
# * +file+ - An IO object (File, StringIO, etc.)
|
|
87
|
+
# * +content_type+ - MIME type of the file
|
|
88
|
+
# * +filename+ - Filename to send to the server
|
|
89
|
+
#
|
|
90
|
+
# ==== Returns
|
|
91
|
+
#
|
|
92
|
+
# A Result object with extracted text and metadata.
|
|
93
|
+
#
|
|
94
|
+
# ==== Raises
|
|
95
|
+
#
|
|
96
|
+
# * ConnectionError - if the server is unreachable
|
|
97
|
+
# * ServerError - if the server returns an error
|
|
98
|
+
def extract_text_from_file(file, content_type, filename)
|
|
99
|
+
response = connection.post("/ocr") do |req|
|
|
100
|
+
req.body = {
|
|
101
|
+
file: Faraday::Multipart::FilePart.new(
|
|
102
|
+
file,
|
|
103
|
+
content_type,
|
|
104
|
+
filename
|
|
105
|
+
)
|
|
106
|
+
}
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
handle_response(response)
|
|
110
|
+
rescue Faraday::ConnectionFailed, Faraday::TimeoutError => e
|
|
111
|
+
raise ConnectionError, "Failed to connect to OCR server: #{e.message}"
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
# Checks if the OCR server is healthy.
|
|
115
|
+
#
|
|
116
|
+
# ==== Returns
|
|
117
|
+
#
|
|
118
|
+
# +true+ if the server responds with status "ok", +false+ otherwise.
|
|
119
|
+
def healthy?
|
|
120
|
+
response = connection.get("/health")
|
|
121
|
+
response.success? && JSON.parse(response.body)["status"] == "ok"
|
|
122
|
+
rescue StandardError
|
|
123
|
+
false
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
# Gets information about the OCR server.
|
|
127
|
+
#
|
|
128
|
+
# ==== Returns
|
|
129
|
+
#
|
|
130
|
+
# A Hash with server information including:
|
|
131
|
+
# * +:version+ - Server version
|
|
132
|
+
# * +:supported_formats+ - Array of supported MIME types
|
|
133
|
+
#
|
|
134
|
+
# ==== Raises
|
|
135
|
+
#
|
|
136
|
+
# * ConnectionError - if the server is unreachable
|
|
137
|
+
def server_info
|
|
138
|
+
response = connection.get("/info")
|
|
139
|
+
JSON.parse(response.body, symbolize_names: true)
|
|
140
|
+
rescue Faraday::ConnectionFailed, Faraday::TimeoutError => e
|
|
141
|
+
raise ConnectionError, "Failed to connect to OCR server: #{e.message}"
|
|
142
|
+
end
|
|
143
|
+
|
|
144
|
+
private
|
|
145
|
+
|
|
146
|
+
# Returns the Faraday connection, creating it if necessary.
|
|
147
|
+
def connection
|
|
148
|
+
@connection ||= Faraday.new(url: @config.server_url) do |f|
|
|
149
|
+
f.request :multipart
|
|
150
|
+
f.options.timeout = @config.timeout
|
|
151
|
+
f.options.open_timeout = @config.open_timeout
|
|
152
|
+
f.adapter Faraday.default_adapter
|
|
153
|
+
end
|
|
154
|
+
end
|
|
155
|
+
|
|
156
|
+
# Parses the server response and returns a Result.
|
|
157
|
+
def handle_response(response)
|
|
158
|
+
unless response.success?
|
|
159
|
+
error_body = JSON.parse(response.body) rescue {}
|
|
160
|
+
raise ServerError, error_body["error"] || "OCR server returned #{response.status}"
|
|
161
|
+
end
|
|
162
|
+
|
|
163
|
+
data = JSON.parse(response.body, symbolize_names: true)
|
|
164
|
+
|
|
165
|
+
Result.new(
|
|
166
|
+
text: data[:text],
|
|
167
|
+
confidence: data[:confidence],
|
|
168
|
+
processing_time_ms: data[:processing_time_ms],
|
|
169
|
+
warnings: data[:warnings] || []
|
|
170
|
+
)
|
|
171
|
+
end
|
|
172
|
+
end
|
|
173
|
+
end
|
|
174
|
+
end
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module ActiveStorage
|
|
4
|
+
module Ocr
|
|
5
|
+
# Configuration options for the OCR module.
|
|
6
|
+
#
|
|
7
|
+
# Settings can be configured via environment variables or the configure block:
|
|
8
|
+
#
|
|
9
|
+
# ActiveStorage::Ocr.configure do |config|
|
|
10
|
+
# config.server_url = "http://localhost:9292"
|
|
11
|
+
# config.timeout = 60
|
|
12
|
+
# end
|
|
13
|
+
#
|
|
14
|
+
# == Environment Variables
|
|
15
|
+
#
|
|
16
|
+
# * +ACTIVESTORAGE_OCR_SERVER_URL+ - OCR server URL (default: http://localhost:9292)
|
|
17
|
+
# * +ACTIVESTORAGE_OCR_TIMEOUT+ - Request timeout in seconds (default: 30)
|
|
18
|
+
# * +ACTIVESTORAGE_OCR_OPEN_TIMEOUT+ - Connection timeout in seconds (default: 5)
|
|
19
|
+
#
|
|
20
|
+
class Configuration
|
|
21
|
+
# The URL of the OCR server.
|
|
22
|
+
attr_accessor :server_url
|
|
23
|
+
|
|
24
|
+
# Request timeout in seconds.
|
|
25
|
+
attr_accessor :timeout
|
|
26
|
+
|
|
27
|
+
# Connection open timeout in seconds.
|
|
28
|
+
attr_accessor :open_timeout
|
|
29
|
+
|
|
30
|
+
# Array of MIME types that the analyzer will process.
|
|
31
|
+
attr_accessor :content_types
|
|
32
|
+
|
|
33
|
+
# Creates a new Configuration with default values.
|
|
34
|
+
#
|
|
35
|
+
# Defaults are read from environment variables if set.
|
|
36
|
+
def initialize
|
|
37
|
+
@server_url = ENV.fetch("ACTIVESTORAGE_OCR_SERVER_URL", "http://localhost:9292")
|
|
38
|
+
@timeout = ENV.fetch("ACTIVESTORAGE_OCR_TIMEOUT", 30).to_i
|
|
39
|
+
@open_timeout = ENV.fetch("ACTIVESTORAGE_OCR_OPEN_TIMEOUT", 5).to_i
|
|
40
|
+
@content_types = default_content_types
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
# Returns the default list of supported content types.
|
|
44
|
+
#
|
|
45
|
+
# Includes common image formats and PDF.
|
|
46
|
+
def default_content_types
|
|
47
|
+
%w[
|
|
48
|
+
image/png
|
|
49
|
+
image/jpeg
|
|
50
|
+
image/gif
|
|
51
|
+
image/bmp
|
|
52
|
+
image/webp
|
|
53
|
+
image/tiff
|
|
54
|
+
application/pdf
|
|
55
|
+
]
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
# Checks if the given content type is supported.
|
|
59
|
+
#
|
|
60
|
+
# ==== Parameters
|
|
61
|
+
#
|
|
62
|
+
# * +content_type+ - A MIME type string (e.g., "image/png")
|
|
63
|
+
#
|
|
64
|
+
# ==== Returns
|
|
65
|
+
#
|
|
66
|
+
# +true+ if the content type is in the supported list, +false+ otherwise.
|
|
67
|
+
def accept_content_type?(content_type)
|
|
68
|
+
content_types.include?(content_type)
|
|
69
|
+
end
|
|
70
|
+
end
|
|
71
|
+
end
|
|
72
|
+
end
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module ActiveStorage
|
|
4
|
+
module Ocr
|
|
5
|
+
# Rails integration via Railtie.
|
|
6
|
+
#
|
|
7
|
+
# Automatically registers the OCR analyzer with Active Storage and
|
|
8
|
+
# provides rake tasks for managing the OCR server.
|
|
9
|
+
#
|
|
10
|
+
# == Rake Tasks
|
|
11
|
+
#
|
|
12
|
+
# * +activestorage_ocr:install+ - Download and install the server binary
|
|
13
|
+
# * +activestorage_ocr:start+ - Start the OCR server
|
|
14
|
+
# * +activestorage_ocr:health+ - Check if the server is responding
|
|
15
|
+
# * +activestorage_ocr:info+ - Show binary and platform information
|
|
16
|
+
#
|
|
17
|
+
class Railtie < Rails::Railtie
|
|
18
|
+
# Registers the OCR analyzer with Active Storage.
|
|
19
|
+
initializer "activestorage-ocr.add_analyzer" do
|
|
20
|
+
config.after_initialize do
|
|
21
|
+
# Prepend our analyzer so it runs before other analyzers
|
|
22
|
+
if defined?(ActiveStorage) && ActiveStorage.respond_to?(:analyzers)
|
|
23
|
+
ActiveStorage.analyzers.prepend(ActiveStorage::Ocr::Analyzer)
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
# Defines rake tasks for server management.
|
|
29
|
+
rake_tasks do
|
|
30
|
+
namespace :activestorage_ocr do
|
|
31
|
+
desc "Install the OCR server binary"
|
|
32
|
+
task :install do
|
|
33
|
+
ActiveStorage::Ocr::Binary.install!
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
desc "Check OCR server health"
|
|
37
|
+
task health: :environment do
|
|
38
|
+
client = ActiveStorage::Ocr::Client.new
|
|
39
|
+
if client.healthy?
|
|
40
|
+
puts "OCR server is healthy"
|
|
41
|
+
info = client.server_info
|
|
42
|
+
puts " Version: #{info[:version]}"
|
|
43
|
+
puts " Supported formats: #{info[:supported_formats].join(', ')}"
|
|
44
|
+
else
|
|
45
|
+
puts "OCR server is not responding"
|
|
46
|
+
exit 1
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
desc "Start the OCR server"
|
|
51
|
+
task :start do
|
|
52
|
+
binary = ActiveStorage::Ocr::Binary.binary_path
|
|
53
|
+
unless File.executable?(binary)
|
|
54
|
+
puts "Binary not found. Installing..."
|
|
55
|
+
ActiveStorage::Ocr::Binary.install!
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
config = ActiveStorage::Ocr.configuration
|
|
59
|
+
host = config.server_host.gsub(%r{https?://}, "")
|
|
60
|
+
port = config.server_port
|
|
61
|
+
|
|
62
|
+
puts "Starting OCR server on #{host}:#{port}..."
|
|
63
|
+
exec(binary, "--host", host, "--port", port.to_s)
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
desc "Show binary info"
|
|
67
|
+
task :info do
|
|
68
|
+
puts "Platform: #{ActiveStorage::Ocr::Binary.platform}"
|
|
69
|
+
puts "Binary path: #{ActiveStorage::Ocr::Binary.binary_path}"
|
|
70
|
+
puts "Installed: #{ActiveStorage::Ocr::Binary.installed?}"
|
|
71
|
+
puts "Version: #{ActiveStorage::Ocr::Binary.version}"
|
|
72
|
+
end
|
|
73
|
+
end
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
end
|
|
77
|
+
end
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module ActiveStorage
|
|
4
|
+
module Ocr
|
|
5
|
+
# Represents the result of an OCR operation.
|
|
6
|
+
#
|
|
7
|
+
# Contains the extracted text, confidence score, and processing metadata.
|
|
8
|
+
#
|
|
9
|
+
# == Example
|
|
10
|
+
#
|
|
11
|
+
# result = client.extract_text(blob)
|
|
12
|
+
# if result.success?
|
|
13
|
+
# puts result.text
|
|
14
|
+
# puts "Confidence: #{result.confidence}"
|
|
15
|
+
# end
|
|
16
|
+
#
|
|
17
|
+
class Result
|
|
18
|
+
# The extracted text content.
|
|
19
|
+
attr_reader :text
|
|
20
|
+
|
|
21
|
+
# Confidence score from 0.0 to 1.0.
|
|
22
|
+
attr_reader :confidence
|
|
23
|
+
|
|
24
|
+
# Time taken to process the file in milliseconds.
|
|
25
|
+
attr_reader :processing_time_ms
|
|
26
|
+
|
|
27
|
+
# Array of warning messages from the OCR server.
|
|
28
|
+
attr_reader :warnings
|
|
29
|
+
|
|
30
|
+
# Creates a new Result.
|
|
31
|
+
#
|
|
32
|
+
# ==== Parameters
|
|
33
|
+
#
|
|
34
|
+
# * +text+ - The extracted text
|
|
35
|
+
# * +confidence+ - Confidence score (0.0 to 1.0)
|
|
36
|
+
# * +processing_time_ms+ - Processing time in milliseconds
|
|
37
|
+
# * +warnings+ - Array of warning messages (optional)
|
|
38
|
+
def initialize(text:, confidence:, processing_time_ms:, warnings: [])
|
|
39
|
+
@text = text
|
|
40
|
+
@confidence = confidence
|
|
41
|
+
@processing_time_ms = processing_time_ms
|
|
42
|
+
@warnings = warnings
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
# Returns whether OCR successfully extracted text.
|
|
46
|
+
#
|
|
47
|
+
# ==== Returns
|
|
48
|
+
#
|
|
49
|
+
# +true+ if text was extracted, +false+ if text is nil or empty.
|
|
50
|
+
def success?
|
|
51
|
+
!text.nil? && !text.empty?
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
# Converts the result to a Hash.
|
|
55
|
+
#
|
|
56
|
+
# ==== Returns
|
|
57
|
+
#
|
|
58
|
+
# A Hash with all result attributes.
|
|
59
|
+
def to_h
|
|
60
|
+
{
|
|
61
|
+
text: text,
|
|
62
|
+
confidence: confidence,
|
|
63
|
+
processing_time_ms: processing_time_ms,
|
|
64
|
+
warnings: warnings
|
|
65
|
+
}
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
# Converts the result to Active Storage metadata format.
|
|
69
|
+
#
|
|
70
|
+
# This format is suitable for storing in blob metadata.
|
|
71
|
+
#
|
|
72
|
+
# ==== Returns
|
|
73
|
+
#
|
|
74
|
+
# A Hash with +:ocr_text+, +:ocr_confidence+, and +:ocr_processed_at+.
|
|
75
|
+
def to_metadata
|
|
76
|
+
{
|
|
77
|
+
ocr_text: text,
|
|
78
|
+
ocr_confidence: confidence,
|
|
79
|
+
ocr_processed_at: Time.now.utc.iso8601
|
|
80
|
+
}
|
|
81
|
+
end
|
|
82
|
+
end
|
|
83
|
+
end
|
|
84
|
+
end
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "faraday"
|
|
4
|
+
require "faraday/multipart"
|
|
5
|
+
require "json"
|
|
6
|
+
|
|
7
|
+
require_relative "ocr/version"
|
|
8
|
+
require_relative "ocr/configuration"
|
|
9
|
+
require_relative "ocr/client"
|
|
10
|
+
require_relative "ocr/result"
|
|
11
|
+
require_relative "ocr/binary"
|
|
12
|
+
|
|
13
|
+
if defined?(Rails)
|
|
14
|
+
require_relative "ocr/analyzer"
|
|
15
|
+
require_relative "ocr/railtie"
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
module ActiveStorage
|
|
19
|
+
# OCR support for Rails Active Storage attachments.
|
|
20
|
+
#
|
|
21
|
+
# This module provides optical character recognition (OCR) for files stored
|
|
22
|
+
# with Active Storage using a high-performance Rust server with the pure-Rust
|
|
23
|
+
# +ocrs+ OCR engine.
|
|
24
|
+
#
|
|
25
|
+
# == Configuration
|
|
26
|
+
#
|
|
27
|
+
# ActiveStorage::Ocr.configure do |config|
|
|
28
|
+
# config.server_url = "http://localhost:9292"
|
|
29
|
+
# config.timeout = 30
|
|
30
|
+
# end
|
|
31
|
+
#
|
|
32
|
+
# == Basic Usage
|
|
33
|
+
#
|
|
34
|
+
# # Extract text from an Active Storage blob
|
|
35
|
+
# result = ActiveStorage::Ocr.extract_text(document.file)
|
|
36
|
+
# result.text # => "Extracted text..."
|
|
37
|
+
# result.confidence # => 0.95
|
|
38
|
+
#
|
|
39
|
+
# == Error Handling
|
|
40
|
+
#
|
|
41
|
+
# All errors inherit from ActiveStorage::Ocr::Error:
|
|
42
|
+
# - ActiveStorage::Ocr::ConnectionError - server unreachable
|
|
43
|
+
# - ActiveStorage::Ocr::ServerError - server returned an error
|
|
44
|
+
#
|
|
45
|
+
module Ocr
|
|
46
|
+
# Base error class for all OCR errors.
|
|
47
|
+
class Error < StandardError; end
|
|
48
|
+
|
|
49
|
+
# Raised when the OCR server returns an error response.
|
|
50
|
+
class ServerError < Error; end
|
|
51
|
+
|
|
52
|
+
# Raised when the OCR server is unreachable or times out.
|
|
53
|
+
class ConnectionError < Error; end
|
|
54
|
+
|
|
55
|
+
class << self
|
|
56
|
+
attr_writer :configuration
|
|
57
|
+
|
|
58
|
+
# Returns the current configuration.
|
|
59
|
+
#
|
|
60
|
+
# Creates a new Configuration with defaults if none exists.
|
|
61
|
+
def configuration
|
|
62
|
+
@configuration ||= Configuration.new
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
# Configures the OCR module.
|
|
66
|
+
#
|
|
67
|
+
# ==== Example
|
|
68
|
+
#
|
|
69
|
+
# ActiveStorage::Ocr.configure do |config|
|
|
70
|
+
# config.server_url = "http://localhost:9292"
|
|
71
|
+
# config.timeout = 60
|
|
72
|
+
# end
|
|
73
|
+
def configure
|
|
74
|
+
yield(configuration)
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
# Resets configuration to defaults.
|
|
78
|
+
#
|
|
79
|
+
# Useful for testing.
|
|
80
|
+
def reset_configuration!
|
|
81
|
+
@configuration = Configuration.new
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
# Extracts text from an Active Storage blob.
|
|
85
|
+
#
|
|
86
|
+
# This is a convenience method that creates a new Client and calls
|
|
87
|
+
# extract_text on it.
|
|
88
|
+
#
|
|
89
|
+
# ==== Parameters
|
|
90
|
+
#
|
|
91
|
+
# * +blob+ - An ActiveStorage::Blob instance
|
|
92
|
+
#
|
|
93
|
+
# ==== Returns
|
|
94
|
+
#
|
|
95
|
+
# A Result object containing extracted text and metadata.
|
|
96
|
+
#
|
|
97
|
+
# ==== Raises
|
|
98
|
+
#
|
|
99
|
+
# * ConnectionError - if the server is unreachable
|
|
100
|
+
# * ServerError - if the server returns an error
|
|
101
|
+
def extract_text(blob)
|
|
102
|
+
Client.new.extract_text(blob)
|
|
103
|
+
end
|
|
104
|
+
end
|
|
105
|
+
end
|
|
106
|
+
end
|
metadata
ADDED
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
|
2
|
+
name: activestorage-ocr
|
|
3
|
+
version: !ruby/object:Gem::Version
|
|
4
|
+
version: 0.1.0
|
|
5
|
+
platform: ruby
|
|
6
|
+
authors:
|
|
7
|
+
- Michael Rispoli
|
|
8
|
+
bindir: bin
|
|
9
|
+
cert_chain: []
|
|
10
|
+
date: 1980-01-02 00:00:00.000000000 Z
|
|
11
|
+
dependencies:
|
|
12
|
+
- !ruby/object:Gem::Dependency
|
|
13
|
+
name: activestorage
|
|
14
|
+
requirement: !ruby/object:Gem::Requirement
|
|
15
|
+
requirements:
|
|
16
|
+
- - ">="
|
|
17
|
+
- !ruby/object:Gem::Version
|
|
18
|
+
version: '7.0'
|
|
19
|
+
type: :runtime
|
|
20
|
+
prerelease: false
|
|
21
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
22
|
+
requirements:
|
|
23
|
+
- - ">="
|
|
24
|
+
- !ruby/object:Gem::Version
|
|
25
|
+
version: '7.0'
|
|
26
|
+
- !ruby/object:Gem::Dependency
|
|
27
|
+
name: faraday
|
|
28
|
+
requirement: !ruby/object:Gem::Requirement
|
|
29
|
+
requirements:
|
|
30
|
+
- - ">="
|
|
31
|
+
- !ruby/object:Gem::Version
|
|
32
|
+
version: '2.0'
|
|
33
|
+
type: :runtime
|
|
34
|
+
prerelease: false
|
|
35
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
36
|
+
requirements:
|
|
37
|
+
- - ">="
|
|
38
|
+
- !ruby/object:Gem::Version
|
|
39
|
+
version: '2.0'
|
|
40
|
+
- !ruby/object:Gem::Dependency
|
|
41
|
+
name: faraday-multipart
|
|
42
|
+
requirement: !ruby/object:Gem::Requirement
|
|
43
|
+
requirements:
|
|
44
|
+
- - ">="
|
|
45
|
+
- !ruby/object:Gem::Version
|
|
46
|
+
version: '1.0'
|
|
47
|
+
type: :runtime
|
|
48
|
+
prerelease: false
|
|
49
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
50
|
+
requirements:
|
|
51
|
+
- - ">="
|
|
52
|
+
- !ruby/object:Gem::Version
|
|
53
|
+
version: '1.0'
|
|
54
|
+
- !ruby/object:Gem::Dependency
|
|
55
|
+
name: minitest
|
|
56
|
+
requirement: !ruby/object:Gem::Requirement
|
|
57
|
+
requirements:
|
|
58
|
+
- - "~>"
|
|
59
|
+
- !ruby/object:Gem::Version
|
|
60
|
+
version: '5.0'
|
|
61
|
+
type: :development
|
|
62
|
+
prerelease: false
|
|
63
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
64
|
+
requirements:
|
|
65
|
+
- - "~>"
|
|
66
|
+
- !ruby/object:Gem::Version
|
|
67
|
+
version: '5.0'
|
|
68
|
+
- !ruby/object:Gem::Dependency
|
|
69
|
+
name: webmock
|
|
70
|
+
requirement: !ruby/object:Gem::Requirement
|
|
71
|
+
requirements:
|
|
72
|
+
- - "~>"
|
|
73
|
+
- !ruby/object:Gem::Version
|
|
74
|
+
version: '3.0'
|
|
75
|
+
type: :development
|
|
76
|
+
prerelease: false
|
|
77
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
78
|
+
requirements:
|
|
79
|
+
- - "~>"
|
|
80
|
+
- !ruby/object:Gem::Version
|
|
81
|
+
version: '3.0'
|
|
82
|
+
- !ruby/object:Gem::Dependency
|
|
83
|
+
name: rake
|
|
84
|
+
requirement: !ruby/object:Gem::Requirement
|
|
85
|
+
requirements:
|
|
86
|
+
- - "~>"
|
|
87
|
+
- !ruby/object:Gem::Version
|
|
88
|
+
version: '13.0'
|
|
89
|
+
type: :development
|
|
90
|
+
prerelease: false
|
|
91
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
92
|
+
requirements:
|
|
93
|
+
- - "~>"
|
|
94
|
+
- !ruby/object:Gem::Version
|
|
95
|
+
version: '13.0'
|
|
96
|
+
description: Extract text from images and PDFs stored in Active Storage using a high-performance
|
|
97
|
+
Rust OCR server
|
|
98
|
+
email:
|
|
99
|
+
- mike.rispoli@causeofakind.com
|
|
100
|
+
executables: []
|
|
101
|
+
extensions: []
|
|
102
|
+
extra_rdoc_files: []
|
|
103
|
+
files:
|
|
104
|
+
- LICENSE
|
|
105
|
+
- README.md
|
|
106
|
+
- lib/activestorage-ocr.rb
|
|
107
|
+
- lib/activestorage/ocr.rb
|
|
108
|
+
- lib/activestorage/ocr/analyzer.rb
|
|
109
|
+
- lib/activestorage/ocr/binary.rb
|
|
110
|
+
- lib/activestorage/ocr/client.rb
|
|
111
|
+
- lib/activestorage/ocr/configuration.rb
|
|
112
|
+
- lib/activestorage/ocr/railtie.rb
|
|
113
|
+
- lib/activestorage/ocr/result.rb
|
|
114
|
+
- lib/activestorage/ocr/version.rb
|
|
115
|
+
homepage: https://github.com/Cause-of-a-Kind/activestorage-ocr
|
|
116
|
+
licenses:
|
|
117
|
+
- MIT
|
|
118
|
+
metadata:
|
|
119
|
+
homepage_uri: https://github.com/Cause-of-a-Kind/activestorage-ocr
|
|
120
|
+
source_code_uri: https://github.com/Cause-of-a-Kind/activestorage-ocr
|
|
121
|
+
changelog_uri: https://github.com/Cause-of-a-Kind/activestorage-ocr/blob/main/CHANGELOG.md
|
|
122
|
+
rdoc_options: []
|
|
123
|
+
require_paths:
|
|
124
|
+
- lib
|
|
125
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
|
126
|
+
requirements:
|
|
127
|
+
- - ">="
|
|
128
|
+
- !ruby/object:Gem::Version
|
|
129
|
+
version: 3.2.0
|
|
130
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
131
|
+
requirements:
|
|
132
|
+
- - ">="
|
|
133
|
+
- !ruby/object:Gem::Version
|
|
134
|
+
version: '0'
|
|
135
|
+
requirements: []
|
|
136
|
+
rubygems_version: 3.6.9
|
|
137
|
+
specification_version: 4
|
|
138
|
+
summary: OCR support for Rails Active Storage
|
|
139
|
+
test_files: []
|