pdf_ocr 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/README.md +24 -16
- data/lib/ocr/data_extractor.rb +1 -1
- data/lib/ocr/version.rb +1 -1
- metadata +3 -3
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 96003dcb66cc0427ddfa6241161b0b5c358b652f161bd2ff5fab5abca8e31236
|
|
4
|
+
data.tar.gz: 431385258bee6950aa37ed494997f86003f75b215bb4ed0cd2480cfb116fae00
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 7a69ad20675eebabb9db5f9c6ffc3a85a3adc1290bc42bdfbe0d45b2ece92d21b3618051a5193e447b7f3c36373952d8abe27bebd1e561807ad4663be4334aca
|
|
7
|
+
data.tar.gz: 767e89448d6eea25a12a84f016d0baeb566c34af1157269a62b0af352cd3fbbd4a4adf40a731b8bd7b0a26bfe247c3d864eea843405731e13a9df431bf2a6d7c
|
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
|
@@ -46,7 +46,7 @@ gem install pdf_ocr
|
|
|
46
46
|
|
|
47
47
|
## ⚙️ Usage
|
|
48
48
|
```ruby
|
|
49
|
-
require '
|
|
49
|
+
require 'ocr'
|
|
50
50
|
require 'stringio'
|
|
51
51
|
|
|
52
52
|
# From a File object
|
|
@@ -78,22 +78,30 @@ result = Ocr::DataExtractor.new(pdf_data).call
|
|
|
78
78
|
```
|
|
79
79
|
## 🔧 Notes
|
|
80
80
|
1. Ensure Tesseract OCR is installed on your system:
|
|
81
|
-
```
|
|
82
|
-
# Ubuntu/Debian
|
|
83
|
-
sudo apt install tesseract-ocr
|
|
84
|
-
|
|
85
|
-
# MacOS (with Homebrew)
|
|
86
|
-
brew install tesseract
|
|
87
|
-
```
|
|
81
|
+
```
|
|
82
|
+
# Ubuntu/Debian
|
|
83
|
+
sudo apt install tesseract-ocr
|
|
84
|
+
|
|
85
|
+
# MacOS (with Homebrew)
|
|
86
|
+
brew install tesseract
|
|
87
|
+
```
|
|
88
88
|
2. Ensure pdftoppm is installed (for PDF-to-image conversion):
|
|
89
|
-
```
|
|
90
|
-
# Ubuntu/Debian
|
|
91
|
-
sudo apt install poppler-utils
|
|
92
|
-
|
|
93
|
-
# MacOS (with Homebrew)
|
|
94
|
-
brew install poppler
|
|
95
|
-
```
|
|
96
|
-
3.
|
|
89
|
+
```
|
|
90
|
+
# Ubuntu/Debian
|
|
91
|
+
sudo apt install poppler-utils
|
|
92
|
+
|
|
93
|
+
# MacOS (with Homebrew)
|
|
94
|
+
brew install poppler
|
|
95
|
+
```
|
|
96
|
+
3. Ensure ImageMagick is installed ( for images):
|
|
97
|
+
```
|
|
98
|
+
# Ubuntu/Debian
|
|
99
|
+
sudo apt install imagemagick
|
|
100
|
+
|
|
101
|
+
# MacOS (with Homebrew)
|
|
102
|
+
brew install imagemagick
|
|
103
|
+
```
|
|
104
|
+
4. This gem does not require Rails, but it will work with Rails ActiveStorage objects that respond to .open.
|
|
97
105
|
|
|
98
106
|
## 🧪 Running Tests
|
|
99
107
|
```
|
data/lib/ocr/data_extractor.rb
CHANGED
data/lib/ocr/version.rb
CHANGED
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: pdf_ocr
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.1.
|
|
4
|
+
version: 0.1.3
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Ravi Shankar Singhal
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2025-
|
|
11
|
+
date: 2025-11-11 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: pdf-reader
|
|
@@ -106,7 +106,7 @@ metadata:
|
|
|
106
106
|
homepage_uri: https://github.com/RaviShankarSinghal/ocr_gem
|
|
107
107
|
source_code_uri: https://github.com/RaviShankarSinghal/ocr_gem
|
|
108
108
|
changelog_uri: https://github.com/RaviShankarSinghal/ocr_gem/blob/main/CHANGELOG.md
|
|
109
|
-
documentation_uri: https://rubydoc.info/gems/pdf_ocr/0.1.
|
|
109
|
+
documentation_uri: https://rubydoc.info/gems/pdf_ocr/0.1.3
|
|
110
110
|
post_install_message:
|
|
111
111
|
rdoc_options: []
|
|
112
112
|
require_paths:
|