pdf_ocr 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.rspec +1 -0
- data/Gemfile +13 -0
- data/Gemfile.lock +63 -0
- data/README.md +138 -0
- data/Rakefile +4 -0
- data/lib/ocr/data_extractor.rb +122 -0
- data/lib/ocr/version.rb +5 -0
- data/lib/ocr.rb +9 -0
- data/ocr.gemspec +46 -0
- data/sig/ocr.rbs +4 -0
- metadata +129 -0
checksums.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
---
|
|
2
|
+
SHA256:
|
|
3
|
+
metadata.gz: 32727eeb24656d1fce7cb43f2f5192f29cfda53192ef161cfae047f2871f6bff
|
|
4
|
+
data.tar.gz: 558586ded2489faf79ce7f36ee1ab6df267d9dc30d67e6ba554be61bde959e19
|
|
5
|
+
SHA512:
|
|
6
|
+
metadata.gz: c02b99bb1e652fe8c26ad80ed8dc4652c8eab5cc9a8bb4699b656080066772f811ee66ced0faa584f9a526322620c9e628f3a47c194f54b900706f968274c4dc
|
|
7
|
+
data.tar.gz: 9d7fea0ffe63fb2c10825d906831fb70dce2f1ab3d3d0c02c814dbd499c81fa906f23bc27e794d5ed3670b381c86b9da1b6f1abc091392684f5c17f97be000b4
|
data/.rspec
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
--require spec_helper
|
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
PATH
|
|
2
|
+
remote: .
|
|
3
|
+
specs:
|
|
4
|
+
pdf_ocr (0.1.0)
|
|
5
|
+
mini_magick
|
|
6
|
+
pdf-reader
|
|
7
|
+
rtesseract
|
|
8
|
+
|
|
9
|
+
GEM
|
|
10
|
+
remote: https://rubygems.org/
|
|
11
|
+
specs:
|
|
12
|
+
Ascii85 (2.0.1)
|
|
13
|
+
afm (0.2.2)
|
|
14
|
+
bigdecimal (3.3.1)
|
|
15
|
+
byebug (12.0.0)
|
|
16
|
+
diff-lcs (1.6.2)
|
|
17
|
+
hashery (2.1.2)
|
|
18
|
+
mini_magick (4.13.2)
|
|
19
|
+
mini_portile2 (2.8.9)
|
|
20
|
+
nokogiri (1.18.10)
|
|
21
|
+
mini_portile2 (~> 2.8.2)
|
|
22
|
+
racc (~> 1.4)
|
|
23
|
+
pdf-reader (2.15.0)
|
|
24
|
+
Ascii85 (>= 1.0, < 3.0, != 2.0.0)
|
|
25
|
+
afm (>= 0.2.1, < 2)
|
|
26
|
+
hashery (~> 2.0)
|
|
27
|
+
ruby-rc4
|
|
28
|
+
ttfunk
|
|
29
|
+
racc (1.8.1)
|
|
30
|
+
rake (13.3.0)
|
|
31
|
+
rspec (3.13.1)
|
|
32
|
+
rspec-core (~> 3.13.0)
|
|
33
|
+
rspec-expectations (~> 3.13.0)
|
|
34
|
+
rspec-mocks (~> 3.13.0)
|
|
35
|
+
rspec-core (3.13.5)
|
|
36
|
+
rspec-support (~> 3.13.0)
|
|
37
|
+
rspec-expectations (3.13.5)
|
|
38
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
|
39
|
+
rspec-support (~> 3.13.0)
|
|
40
|
+
rspec-mocks (3.13.6)
|
|
41
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
|
42
|
+
rspec-support (~> 3.13.0)
|
|
43
|
+
rspec-support (3.13.6)
|
|
44
|
+
rtesseract (2.2.0)
|
|
45
|
+
nokogiri
|
|
46
|
+
ruby-rc4 (0.1.5)
|
|
47
|
+
ttfunk (1.8.0)
|
|
48
|
+
bigdecimal (~> 3.1)
|
|
49
|
+
|
|
50
|
+
PLATFORMS
|
|
51
|
+
x86_64-linux
|
|
52
|
+
|
|
53
|
+
DEPENDENCIES
|
|
54
|
+
byebug
|
|
55
|
+
mini_magick
|
|
56
|
+
pdf-reader
|
|
57
|
+
pdf_ocr!
|
|
58
|
+
rake (~> 13.0)
|
|
59
|
+
rspec
|
|
60
|
+
rtesseract
|
|
61
|
+
|
|
62
|
+
BUNDLED WITH
|
|
63
|
+
2.4.12
|
data/README.md
ADDED
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
# OCR
|
|
2
|
+
|
|
3
|
+
A lightweight Ruby gem for extracting text from PDFs, including scanned PDFs using OCR.
|
|
4
|
+
|
|
5
|
+
This gem supports:
|
|
6
|
+
|
|
7
|
+
- PDFs with readable text
|
|
8
|
+
- Scanned PDFs using Tesseract OCR
|
|
9
|
+
- File objects, file paths, StringIO, and Rails/ActiveStorage uploads
|
|
10
|
+
- Fully Rails-independent
|
|
11
|
+
|
|
12
|
+
---
|
|
13
|
+
|
|
14
|
+
## 🚀 Features
|
|
15
|
+
|
|
16
|
+
- Detect if PDF is scanned or text-based
|
|
17
|
+
- Extract text from normal PDFs using `PDF::Reader`
|
|
18
|
+
- Extract text from scanned PDFs using `RTesseract` and `MiniMagick`
|
|
19
|
+
- Automatic cleanup of temporary images
|
|
20
|
+
|
|
21
|
+
---
|
|
22
|
+
|
|
23
|
+
## 💻 Installation
|
|
24
|
+
|
|
25
|
+
Add this line to your application's Gemfile:
|
|
26
|
+
|
|
27
|
+
```ruby
|
|
28
|
+
gem 'ocr', git: 'https://github.com/your_username/ocr.git'
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
Or install directly:
|
|
32
|
+
```ruby
|
|
33
|
+
gem install ocr
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
## Dependencies
|
|
37
|
+
- PDF::Reader
|
|
38
|
+
|
|
39
|
+
- RTesseract
|
|
40
|
+
|
|
41
|
+
- MiniMagick
|
|
42
|
+
|
|
43
|
+
- Tesseract OCR (system-level executable)
|
|
44
|
+
|
|
45
|
+
- pdftoppm from Poppler utils (for converting PDF pages to images)
|
|
46
|
+
|
|
47
|
+
## ⚙️ Usage
|
|
48
|
+
```ruby
|
|
49
|
+
require 'ocr'
|
|
50
|
+
require 'stringio'
|
|
51
|
+
|
|
52
|
+
# From a File object
|
|
53
|
+
file = File.open("path/to/document.pdf")
|
|
54
|
+
result = Ocr::DataExtractor.new(file).call
|
|
55
|
+
puts result["raw_text"] if result["success"]
|
|
56
|
+
|
|
57
|
+
# From a file path string
|
|
58
|
+
result = Ocr::DataExtractor.new("path/to/document.pdf").call
|
|
59
|
+
|
|
60
|
+
# From a StringIO object (in-memory PDF)
|
|
61
|
+
pdf_data = StringIO.new(File.read("path/to/document.pdf"))
|
|
62
|
+
result = Ocr::DataExtractor.new(pdf_data).call
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
## Example Result
|
|
66
|
+
```ruby
|
|
67
|
+
{
|
|
68
|
+
"success" => true,
|
|
69
|
+
"raw_text" => "Extracted text content from PDF ..."
|
|
70
|
+
}
|
|
71
|
+
```
|
|
72
|
+
- If OCR fails for a scanned PDF:
|
|
73
|
+
```ruby
|
|
74
|
+
{
|
|
75
|
+
"success" => false,
|
|
76
|
+
"message" => "Unable to extract text using OCR"
|
|
77
|
+
}
|
|
78
|
+
```
|
|
79
|
+
## 🔧 Notes
|
|
80
|
+
1. Ensure Tesseract OCR is installed on your system:
|
|
81
|
+
```
|
|
82
|
+
# Ubuntu/Debian
|
|
83
|
+
sudo apt install tesseract-ocr
|
|
84
|
+
|
|
85
|
+
# MacOS (with Homebrew)
|
|
86
|
+
brew install tesseract
|
|
87
|
+
```
|
|
88
|
+
2. Ensure pdftoppm is installed (for PDF-to-image conversion):
|
|
89
|
+
```
|
|
90
|
+
# Ubuntu/Debian
|
|
91
|
+
sudo apt install poppler-utils
|
|
92
|
+
|
|
93
|
+
# MacOS (with Homebrew)
|
|
94
|
+
brew install poppler
|
|
95
|
+
```
|
|
96
|
+
3. This gem does not require Rails, but it will work with Rails ActiveStorage objects that respond to .open.
|
|
97
|
+
|
|
98
|
+
## 🧪 Running Tests
|
|
99
|
+
```
|
|
100
|
+
bundle install
|
|
101
|
+
bundle exec rspec
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
- PDFs with selectable text
|
|
105
|
+
|
|
106
|
+
- Scanned PDFs
|
|
107
|
+
|
|
108
|
+
- Malformed PDFs (fallback to OCR)
|
|
109
|
+
|
|
110
|
+
## 📝 Contributing
|
|
111
|
+
|
|
112
|
+
- Fork the repository
|
|
113
|
+
|
|
114
|
+
- Create your feature branch (git checkout -b your-feature)
|
|
115
|
+
|
|
116
|
+
- Commit your changes (git commit -am 'Add new feature')
|
|
117
|
+
|
|
118
|
+
- Push to the branch (git push origin your-feature)
|
|
119
|
+
|
|
120
|
+
- Open a Pull Request
|
|
121
|
+
|
|
122
|
+
## 📝 License
|
|
123
|
+
|
|
124
|
+
MIT License © RaviShankarSinghal
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
---
|
|
128
|
+
|
|
129
|
+
This version includes:
|
|
130
|
+
|
|
131
|
+
- Version and build badges (replace with your repo info)
|
|
132
|
+
- Clear installation instructions
|
|
133
|
+
- Usage examples for File, path, and StringIO
|
|
134
|
+
- System dependencies
|
|
135
|
+
- Test instructions
|
|
136
|
+
- Contributing guidelines
|
|
137
|
+
|
|
138
|
+
---
|
data/Rakefile
ADDED
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
require "mini_magick"
|
|
2
|
+
require "pdf/reader"
|
|
3
|
+
require "rtesseract"
|
|
4
|
+
require "securerandom"
|
|
5
|
+
require "shellwords"
|
|
6
|
+
require "tmpdir"
|
|
7
|
+
|
|
8
|
+
module Ocr
|
|
9
|
+
class DataExtractor
|
|
10
|
+
def initialize(document)
|
|
11
|
+
@document = document
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def call
|
|
15
|
+
ocr_data(@document)
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
private
|
|
19
|
+
|
|
20
|
+
def ocr_data(document)
|
|
21
|
+
extracted_text = ""
|
|
22
|
+
is_scanned = false
|
|
23
|
+
|
|
24
|
+
file = get_file_from(document)
|
|
25
|
+
reader = if file.respond_to?(:path)
|
|
26
|
+
PDF::Reader.new(file.path)
|
|
27
|
+
else
|
|
28
|
+
PDF::Reader.new(file)
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
reader.pages.each do |page|
|
|
32
|
+
page_text = safe_page_text(page)
|
|
33
|
+
extracted_text << " " << page_text
|
|
34
|
+
|
|
35
|
+
if page_text.strip.empty? || mostly_junk?(page_text)
|
|
36
|
+
is_scanned = true
|
|
37
|
+
break
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
if is_scanned || scanned_pdf?(extracted_text)
|
|
42
|
+
scanned_pdf_ocr(file)
|
|
43
|
+
else
|
|
44
|
+
{ "success" => true, "raw_text" => extracted_text.strip }
|
|
45
|
+
end
|
|
46
|
+
rescue PDF::Reader::MalformedPDFError, PDF::Reader::UnsupportedFeatureError => e
|
|
47
|
+
log_warning "PDF parsing failed: #{e.message}"
|
|
48
|
+
scanned_pdf_ocr(file)
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
def get_file_from(document)
|
|
52
|
+
return document.tap(&:open) if document.respond_to?(:open)
|
|
53
|
+
return document if document.is_a?(File)
|
|
54
|
+
return document if document.respond_to?(:read)
|
|
55
|
+
return File.open(document) if document.is_a?(String)
|
|
56
|
+
|
|
57
|
+
raise ArgumentError, "Unsupported document type: #{document.class}"
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
def safe_page_text(page)
|
|
61
|
+
page.text.to_s.encode("UTF-8", invalid: :replace, undef: :replace, replace: "")
|
|
62
|
+
rescue
|
|
63
|
+
""
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
def scanned_pdf?(text)
|
|
67
|
+
return true if text.empty?
|
|
68
|
+
junk_ratio = text.count("^A-Za-z0-9\s").to_f / text.size
|
|
69
|
+
junk_ratio > 0.5 || text.size < 100
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
def mostly_junk?(text)
|
|
73
|
+
return true if text.empty?
|
|
74
|
+
text.scan(/[A-Za-z]/).count < (text.size * 0.2)
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
def scanned_pdf_ocr(file)
|
|
78
|
+
images = []
|
|
79
|
+
full_text = ""
|
|
80
|
+
|
|
81
|
+
images = if file.respond_to?(:path)
|
|
82
|
+
convert_pdf_to_images(file.path)
|
|
83
|
+
else
|
|
84
|
+
convert_pdf_to_images(file)
|
|
85
|
+
end
|
|
86
|
+
full_text += images.map { |img| extract_text(img) }.join(" ")
|
|
87
|
+
|
|
88
|
+
unless full_text.strip.empty?
|
|
89
|
+
{ "success" => true, "raw_text" => full_text.strip }
|
|
90
|
+
else
|
|
91
|
+
{ "success" => false, "message" => "Unable to extract text using OCR" }
|
|
92
|
+
end
|
|
93
|
+
ensure
|
|
94
|
+
cleanup(images)
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
def convert_pdf_to_images(pdf_path)
|
|
98
|
+
output_prefix = File.join(Dir.tmpdir, "ocr_page_#{SecureRandom.hex(4)}")
|
|
99
|
+
system("pdftoppm -png -r 300 #{Shellwords.escape(pdf_path)} #{Shellwords.escape(output_prefix)}")
|
|
100
|
+
Dir["#{output_prefix}-*.png"]
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
def extract_text(image_path)
|
|
104
|
+
RTesseract.new(image_path, lang: "eng", processor: "mini_magick").to_s
|
|
105
|
+
rescue => e
|
|
106
|
+
log_warning "OCR failed on #{image_path}: #{e.message}"
|
|
107
|
+
""
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
def cleanup(images)
|
|
111
|
+
images&.each { |img| File.delete(img) if File.exist?(img) }
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
def log_warning(message)
|
|
115
|
+
if defined?(Rails)
|
|
116
|
+
Rails.logger.warn(message)
|
|
117
|
+
else
|
|
118
|
+
warn(message)
|
|
119
|
+
end
|
|
120
|
+
end
|
|
121
|
+
end
|
|
122
|
+
end
|
data/lib/ocr/version.rb
ADDED
data/lib/ocr.rb
ADDED
data/ocr.gemspec
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "lib/ocr/version"
|
|
4
|
+
|
|
5
|
+
Gem::Specification.new do |spec|
|
|
6
|
+
spec.name = "pdf_ocr"
|
|
7
|
+
spec.version = Ocr::VERSION
|
|
8
|
+
spec.authors = ["Ravi Shankar Singhal"]
|
|
9
|
+
spec.email = ["ravi.singhal2308@gmail.com"]
|
|
10
|
+
|
|
11
|
+
spec.summary = "A lightweight Ruby gem for extracting text from images using OCR."
|
|
12
|
+
spec.description = "OCR is a Ruby gem that allows you to easily extract text from image files (JPG, PNG, PDF) using Tesseract OCR engine. It provides a simple, intuitive interface for integrating OCR capabilities into your Ruby or Rails applications."
|
|
13
|
+
spec.homepage = "https://github.com/RaviShankarSinghal/ocr_gem"
|
|
14
|
+
spec.license = "MIT"
|
|
15
|
+
|
|
16
|
+
spec.required_ruby_version = ">= 2.6.0"
|
|
17
|
+
|
|
18
|
+
spec.metadata = {
|
|
19
|
+
"homepage_uri" => spec.homepage,
|
|
20
|
+
"source_code_uri" => "https://github.com/RaviShankarSinghal/ocr_gem",
|
|
21
|
+
"changelog_uri" => "https://github.com/RaviShankarSinghal/ocr_gem/blob/main/CHANGELOG.md",
|
|
22
|
+
"documentation_uri" => "https://rubydoc.info/gems/ocr"
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
spec.files = Dir.chdir(__dir__) do
|
|
26
|
+
`git ls-files -z`.split("\x0").reject do |f|
|
|
27
|
+
(File.expand_path(f) == __FILE__) ||
|
|
28
|
+
f.start_with?(*%w[bin/ test/ spec/ features/ .git .circleci appveyor])
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
spec.bindir = "exe"
|
|
33
|
+
spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
|
|
34
|
+
spec.require_paths = ["lib"]
|
|
35
|
+
|
|
36
|
+
# Common dependencies for OCR-based Ruby gems
|
|
37
|
+
# Runtime dependencies
|
|
38
|
+
spec.add_runtime_dependency "pdf-reader"
|
|
39
|
+
spec.add_runtime_dependency "mini_magick"
|
|
40
|
+
spec.add_runtime_dependency "rtesseract"
|
|
41
|
+
|
|
42
|
+
# Development dependencies
|
|
43
|
+
spec.add_development_dependency "rspec"
|
|
44
|
+
spec.add_development_dependency "byebug"
|
|
45
|
+
|
|
46
|
+
end
|
data/sig/ocr.rbs
ADDED
metadata
ADDED
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
|
2
|
+
name: pdf_ocr
|
|
3
|
+
version: !ruby/object:Gem::Version
|
|
4
|
+
version: 0.1.0
|
|
5
|
+
platform: ruby
|
|
6
|
+
authors:
|
|
7
|
+
- Ravi Shankar Singhal
|
|
8
|
+
autorequire:
|
|
9
|
+
bindir: exe
|
|
10
|
+
cert_chain: []
|
|
11
|
+
date: 2025-10-24 00:00:00.000000000 Z
|
|
12
|
+
dependencies:
|
|
13
|
+
- !ruby/object:Gem::Dependency
|
|
14
|
+
name: pdf-reader
|
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
|
16
|
+
requirements:
|
|
17
|
+
- - ">="
|
|
18
|
+
- !ruby/object:Gem::Version
|
|
19
|
+
version: '0'
|
|
20
|
+
type: :runtime
|
|
21
|
+
prerelease: false
|
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
23
|
+
requirements:
|
|
24
|
+
- - ">="
|
|
25
|
+
- !ruby/object:Gem::Version
|
|
26
|
+
version: '0'
|
|
27
|
+
- !ruby/object:Gem::Dependency
|
|
28
|
+
name: mini_magick
|
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
|
30
|
+
requirements:
|
|
31
|
+
- - ">="
|
|
32
|
+
- !ruby/object:Gem::Version
|
|
33
|
+
version: '0'
|
|
34
|
+
type: :runtime
|
|
35
|
+
prerelease: false
|
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
37
|
+
requirements:
|
|
38
|
+
- - ">="
|
|
39
|
+
- !ruby/object:Gem::Version
|
|
40
|
+
version: '0'
|
|
41
|
+
- !ruby/object:Gem::Dependency
|
|
42
|
+
name: rtesseract
|
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
|
44
|
+
requirements:
|
|
45
|
+
- - ">="
|
|
46
|
+
- !ruby/object:Gem::Version
|
|
47
|
+
version: '0'
|
|
48
|
+
type: :runtime
|
|
49
|
+
prerelease: false
|
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
51
|
+
requirements:
|
|
52
|
+
- - ">="
|
|
53
|
+
- !ruby/object:Gem::Version
|
|
54
|
+
version: '0'
|
|
55
|
+
- !ruby/object:Gem::Dependency
|
|
56
|
+
name: rspec
|
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
|
58
|
+
requirements:
|
|
59
|
+
- - ">="
|
|
60
|
+
- !ruby/object:Gem::Version
|
|
61
|
+
version: '0'
|
|
62
|
+
type: :development
|
|
63
|
+
prerelease: false
|
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
65
|
+
requirements:
|
|
66
|
+
- - ">="
|
|
67
|
+
- !ruby/object:Gem::Version
|
|
68
|
+
version: '0'
|
|
69
|
+
- !ruby/object:Gem::Dependency
|
|
70
|
+
name: byebug
|
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
|
72
|
+
requirements:
|
|
73
|
+
- - ">="
|
|
74
|
+
- !ruby/object:Gem::Version
|
|
75
|
+
version: '0'
|
|
76
|
+
type: :development
|
|
77
|
+
prerelease: false
|
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
79
|
+
requirements:
|
|
80
|
+
- - ">="
|
|
81
|
+
- !ruby/object:Gem::Version
|
|
82
|
+
version: '0'
|
|
83
|
+
description: OCR is a Ruby gem that allows you to easily extract text from image files
|
|
84
|
+
(JPG, PNG, PDF) using Tesseract OCR engine. It provides a simple, intuitive interface
|
|
85
|
+
for integrating OCR capabilities into your Ruby or Rails applications.
|
|
86
|
+
email:
|
|
87
|
+
- ravi.singhal2308@gmail.com
|
|
88
|
+
executables: []
|
|
89
|
+
extensions: []
|
|
90
|
+
extra_rdoc_files: []
|
|
91
|
+
files:
|
|
92
|
+
- ".rspec"
|
|
93
|
+
- Gemfile
|
|
94
|
+
- Gemfile.lock
|
|
95
|
+
- README.md
|
|
96
|
+
- Rakefile
|
|
97
|
+
- lib/ocr.rb
|
|
98
|
+
- lib/ocr/data_extractor.rb
|
|
99
|
+
- lib/ocr/version.rb
|
|
100
|
+
- ocr.gemspec
|
|
101
|
+
- sig/ocr.rbs
|
|
102
|
+
homepage: https://github.com/RaviShankarSinghal/ocr_gem
|
|
103
|
+
licenses:
|
|
104
|
+
- MIT
|
|
105
|
+
metadata:
|
|
106
|
+
homepage_uri: https://github.com/RaviShankarSinghal/ocr_gem
|
|
107
|
+
source_code_uri: https://github.com/RaviShankarSinghal/ocr_gem
|
|
108
|
+
changelog_uri: https://github.com/RaviShankarSinghal/ocr_gem/blob/main/CHANGELOG.md
|
|
109
|
+
documentation_uri: https://rubydoc.info/gems/ocr
|
|
110
|
+
post_install_message:
|
|
111
|
+
rdoc_options: []
|
|
112
|
+
require_paths:
|
|
113
|
+
- lib
|
|
114
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
|
115
|
+
requirements:
|
|
116
|
+
- - ">="
|
|
117
|
+
- !ruby/object:Gem::Version
|
|
118
|
+
version: 2.6.0
|
|
119
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
120
|
+
requirements:
|
|
121
|
+
- - ">="
|
|
122
|
+
- !ruby/object:Gem::Version
|
|
123
|
+
version: '0'
|
|
124
|
+
requirements: []
|
|
125
|
+
rubygems_version: 3.3.7
|
|
126
|
+
signing_key:
|
|
127
|
+
specification_version: 4
|
|
128
|
+
summary: A lightweight Ruby gem for extracting text from images using OCR.
|
|
129
|
+
test_files: []
|