ocr-file 0.0.8 → 0.0.10
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/README.md +3 -1
- data/lib/ocr-file/document.rb +1 -0
- data/lib/ocr-file/image_engines/image_magick.rb +4 -0
- data/lib/ocr-file/image_engines/pdf_engine.rb +2 -0
- data/lib/ocr-file/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e77eefa085a14282b42584bd4bf6796a99e10589552b52858cb8f5dd75c84b97
|
4
|
+
data.tar.gz: 9fde9adae0c252ecc56937a314c676903a6b2a6ababe51030cbcd9ab3ee1ba81
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f6c9cf596d6a78ccea7e1fb45543826e1b95dab74449700eb0f0bed4bace802fbc15fba118a982fbf7daed9ebb188e417876f1e6a9cb6f620eb3630a4aaed7af
|
7
|
+
data.tar.gz: 9aab569d476170d8c7b405f65a1629d8ec789f008f734a7cc7f49c5716c89c7a7b5010ae3096ed7e4b3358fe650c221da658801a893cd76f6183f23b19696349
|
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
@@ -45,6 +45,7 @@ You will need to install `tesseract` with your desired language on your system,
|
|
45
45
|
image_preprocess: true,
|
46
46
|
effects: ['despeckle', 'deskew', 'enhance', 'sharpen', 'remove_shadow', 'bw'], # Applies effects as listed. 'norm' is also available
|
47
47
|
automatic_reprocess: true, # Will possibly do double + the operations but can produce better results automatically
|
48
|
+
dimensions: [width, height], # Can be nil but will lock the images
|
48
49
|
# PDF to Image Processing
|
49
50
|
optimise_pdf: true,
|
50
51
|
extract_pdf_images: true, # if false will screenshot each PDF page
|
@@ -81,7 +82,8 @@ You will need to install `tesseract` with your desired language on your system,
|
|
81
82
|
# The files can be images or other PDFs
|
82
83
|
file_paths = []
|
83
84
|
merged_document = ::HexaPDF::Document.new
|
84
|
-
|
85
|
+
dimensions = [width, height] # or nil to maintain dimensions
|
86
|
+
documents = file_paths.map { |path| OcrFile::ImageEngines::PdfEngine.insert_image(merged_document, path, dimensions: dimensions) }
|
85
87
|
OcrFile::ImageEngines::PdfEngine.save_pdf(merged_document, save_file_path, optimise: true)
|
86
88
|
```
|
87
89
|
|
data/lib/ocr-file/document.rb
CHANGED
@@ -25,6 +25,7 @@ module OcrFile
|
|
25
25
|
image_preprocess: true,
|
26
26
|
effects: ['despeckle', 'deskew', 'enhance', 'sharpen', 'remove_shadow', 'bw'],
|
27
27
|
automatic_reprocess: true,
|
28
|
+
dimensions: nil, # width, height. Will lock images to these dimensions
|
28
29
|
# PDF to Image Processing
|
29
30
|
optimise_pdf: true,
|
30
31
|
extract_pdf_images: true, # if false will screenshot each PDF page
|
@@ -30,6 +30,10 @@ module OcrFile
|
|
30
30
|
def convert!
|
31
31
|
return @image_path unless @config[:image_preprocess]
|
32
32
|
|
33
|
+
if @config[:dimensions].is_a?(Array) && @config[:dimensions].size == 2
|
34
|
+
resize(width, height)
|
35
|
+
end
|
36
|
+
|
33
37
|
@config[:effects].each do |effect|
|
34
38
|
self.send(effect.to_sym)
|
35
39
|
end
|
data/lib/ocr-file/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ocr-file
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.10
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- trex22
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2023-07-23 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: console-style
|