ocr-file 0.0.7 → 0.0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/README.md +3 -3
- data/lib/ocr-file/document.rb +1 -1
- data/lib/ocr-file/image_engines/image_magick.rb +8 -1
- data/lib/ocr-file/image_engines/pdf_engine.rb +18 -3
- data/lib/ocr-file/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9ae0f4940b34df3280221cf8b26d86ba3498f8344ef5f0e27ea335ca651a8906
|
4
|
+
data.tar.gz: 5e790899721d25bb0f4dc0e8e276b39b62bbb2803549fdbc8ba148804885bec0
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6cd016ca7bba37866579cad59f01f41d190c0a191cd1ce27fa7037646da7bf4962923664c7b6295655936aed8714fac01b08301be65fdfef68403c8dd12c075b
|
7
|
+
data.tar.gz: f1581713a76e19f1b24d43f030cccbfb32b206bea8d1a5f07fed26fe4e0cfaa3f991c0c35b98bf1f222ca36b143e83700638ecf3b0520b9663d2fe4336cc5da2
|
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
@@ -79,9 +79,9 @@ You will need to install `tesseract` with your desired language on your system,
|
|
79
79
|
|
80
80
|
# How to merge files into a single PDF:
|
81
81
|
# The files can be images or other PDFs
|
82
|
-
|
83
|
-
|
84
|
-
|
82
|
+
file_paths = []
|
83
|
+
merged_document = ::HexaPDF::Document.new
|
84
|
+
documents = file_paths.map { |path| OcrFile::ImageEngines::PdfEngine.insert_image(merged_document, path) }
|
85
85
|
OcrFile::ImageEngines::PdfEngine.save_pdf(merged_document, save_file_path, optimise: true)
|
86
86
|
```
|
87
87
|
|
data/lib/ocr-file/document.rb
CHANGED
@@ -66,7 +66,7 @@ module OcrFile
|
|
66
66
|
|
67
67
|
def image?
|
68
68
|
return false if pdf?
|
69
|
-
ACCEPTED_IMAGE_TYPES.any? { |type| @original_file_path.downcase.include?(".#{type}")}
|
69
|
+
ACCEPTED_IMAGE_TYPES.any? { |type| @original_file_path.downcase.include?(".#{type}") }
|
70
70
|
end
|
71
71
|
|
72
72
|
# Treat anything which isnt a PDF or image as text
|
@@ -5,7 +5,7 @@ module OcrFile
|
|
5
5
|
# Conversion of image types
|
6
6
|
# Rotation and detection of skew
|
7
7
|
|
8
|
-
attr_reader :image_path, :image, :temp_path, :save_file_path, :config
|
8
|
+
attr_reader :image_path, :image, :temp_path, :save_file_path, :config, :width, :height
|
9
9
|
|
10
10
|
def initialize(image_path:, temp_path:, save_file_path:, config:)
|
11
11
|
@image_path = image_path
|
@@ -22,6 +22,9 @@ module OcrFile
|
|
22
22
|
# end
|
23
23
|
|
24
24
|
@image = MiniMagick::Image.open(image_path)
|
25
|
+
|
26
|
+
@width = @image[:width]
|
27
|
+
@height = @image[:height]
|
25
28
|
end
|
26
29
|
|
27
30
|
def convert!
|
@@ -39,6 +42,10 @@ module OcrFile
|
|
39
42
|
@save_file_path
|
40
43
|
end
|
41
44
|
|
45
|
+
def resize(width, height)
|
46
|
+
@image.resize("#{width}x#{height}")
|
47
|
+
end
|
48
|
+
|
42
49
|
# Effects
|
43
50
|
# http://www.imagemagick.org/script/command-line-options.php
|
44
51
|
def bw
|
@@ -61,9 +61,24 @@ module OcrFile
|
|
61
61
|
image_paths
|
62
62
|
end
|
63
63
|
|
64
|
-
def insert_image(document, image_path)
|
65
|
-
|
66
|
-
|
64
|
+
def insert_image(document, image_path, dimensions: nil)
|
65
|
+
image_processor = OcrFile::ImageEngines::ImageMagick.new(
|
66
|
+
image_path: image_path,
|
67
|
+
temp_path: @temp_folder_path,
|
68
|
+
save_file_path: '',
|
69
|
+
config: @config
|
70
|
+
)
|
71
|
+
|
72
|
+
if dimensions
|
73
|
+
width = dimensions[0]
|
74
|
+
height = dimensions[1]
|
75
|
+
else
|
76
|
+
width = image_processor.width
|
77
|
+
height = image_processor.height
|
78
|
+
end
|
79
|
+
|
80
|
+
page = document.pages.add([0, 0, width, height])
|
81
|
+
page.canvas.image(@image || image_path, at: [0, 0], width: width, height: height)
|
67
82
|
end
|
68
83
|
|
69
84
|
def combine(text, pdf_of_images)
|
data/lib/ocr-file/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ocr-file
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.8
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- trex22
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-07-
|
11
|
+
date: 2022-07-23 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: console-style
|