tesseract_ffi 0.3.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/tesseract_ffi.rb +3 -2
- data/lib/tesseract_ffi/rectangles.rb +59 -0
- data/lib/tesseract_ffi/tesseract.rb +2 -40
- data/lib/tesseract_ffi/version.rb +1 -1
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c5a9c0c8d2c8b8df8ac6796658f09879e752b87a0942310489e52f0eb7e86150
|
4
|
+
data.tar.gz: 72a6acdefeba8c4c021f9f031d2c7523ae1450f883eff21429e36aa4795b610b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: bdc7ad7ab5f9fd23141097b9b0a924aabcc3891331481563c6f0bdfaba0398688a8954ae31cbdc5b779379546007374c156c98eef6ce067c4ef2ea462e67642b
|
7
|
+
data.tar.gz: 0fc5c10d36c8822d3604f46ed9687adc4cbbb1e4d76229c0a04632af54bf75e4461947c49bffc327fb69728ba1fbededfafe84cdefdda269e1e24cf29eb029ba
|
data/lib/tesseract_ffi.rb
CHANGED
@@ -2,8 +2,9 @@
|
|
2
2
|
|
3
3
|
require 'ffi'
|
4
4
|
require 'tesseract_ffi/version'
|
5
|
-
require 'tesseract_ffi/conf_vars'
|
6
|
-
require 'tesseract_ffi/oem'
|
5
|
+
require 'tesseract_ffi/conf_vars' # mix-in to tesseract
|
6
|
+
require 'tesseract_ffi/oem' # mix-in to tesseract
|
7
|
+
require 'tesseract_ffi/rectangles' # mix-in to tesseract
|
7
8
|
require 'tesseract_ffi/tesseract'
|
8
9
|
require 'tesseract_ffi/tess_exception'
|
9
10
|
require 'tesseract_ffi/quick'
|
@@ -0,0 +1,59 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module TesseractFFI
|
4
|
+
# module Rectangles mixin for recognizing text blocks defined by rectangles
|
5
|
+
module Rectangles
|
6
|
+
def set_rectangle(x_coord, y_coord, width, height)
|
7
|
+
tess_set_rectangle(@handle, x_coord, y_coord, width, height)
|
8
|
+
end
|
9
|
+
|
10
|
+
def recognize_rectangle(x_coord, y_coord, width, height)
|
11
|
+
setup do
|
12
|
+
set_rectangle(x_coord, y_coord, width, height)
|
13
|
+
ocr
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
# rubocop:disable Metrics/MethodLength
|
18
|
+
def recognize_rectangles(rectangle_list)
|
19
|
+
@texts = []
|
20
|
+
if valid_rectangle_list? rectangle_list
|
21
|
+
@rectangle_list = rectangle_list
|
22
|
+
setup do
|
23
|
+
@rectangle_list.each do |r|
|
24
|
+
set_rectangle(r[0], r[1], r[2], r[3])
|
25
|
+
ocr
|
26
|
+
@texts << @utf8_text.strip
|
27
|
+
end
|
28
|
+
end
|
29
|
+
else
|
30
|
+
@rectangle_list = nil
|
31
|
+
end
|
32
|
+
@texts
|
33
|
+
end
|
34
|
+
|
35
|
+
def valid_rectangle_list?(list)
|
36
|
+
if list.is_a?(Array) && list.all? { |r| valid_rectangle?(r) }
|
37
|
+
true
|
38
|
+
else
|
39
|
+
msg = 'Tess Error Argument must be a list'
|
40
|
+
# copy the error message as we are not going to Setup
|
41
|
+
@errors << msg
|
42
|
+
raise TessException.new(error_msg: msg)
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
def valid_rectangle?(rectangle)
|
47
|
+
if rectangle.is_a?(Array) &&
|
48
|
+
rectangle.length == 4 &&
|
49
|
+
rectangle.all? { |r| r.is_a?(Integer) }
|
50
|
+
true
|
51
|
+
else
|
52
|
+
msg = 'Tesseract Error Argument must be array of 4 Integer'
|
53
|
+
@errors << msg
|
54
|
+
raise TessException.new(error_msg: msg)
|
55
|
+
end
|
56
|
+
end
|
57
|
+
# rubocop:enable Metrics/MethodLength
|
58
|
+
end
|
59
|
+
end
|
@@ -6,6 +6,7 @@ module TesseractFFI
|
|
6
6
|
include TesseractFFI
|
7
7
|
include ConfVars
|
8
8
|
include OEM
|
9
|
+
include Rectangles
|
9
10
|
|
10
11
|
attr_accessor :language, :file_name, :source_resolution
|
11
12
|
attr_reader :utf8_text, :hocr_text, :errors
|
@@ -48,9 +49,7 @@ module TesseractFFI
|
|
48
49
|
tess_set_source_resolution(@handle, @source_resolution)
|
49
50
|
raise TessException.new(error_msg: 'Recognition Error') if tess_recognize(@handle, 0) != 0
|
50
51
|
|
51
|
-
@utf8_text =
|
52
|
-
text = tess_get_utf8(@handle, 0)
|
53
|
-
@utf8_text = text.encode('UTF-8') if text
|
52
|
+
@utf8_text = tess_get_utf8(@handle, 0)
|
54
53
|
@hocr_text = tess_get_hocr(@handle, 0)
|
55
54
|
end
|
56
55
|
|
@@ -67,42 +66,5 @@ module TesseractFFI
|
|
67
66
|
TesseractFFI.tess_process_pages(@handle, @file_name, nil, 5000, pdf_renderer)
|
68
67
|
end
|
69
68
|
end
|
70
|
-
|
71
|
-
def set_rectangle(x_coord, y_coord, width, height)
|
72
|
-
tess_set_rectangle(@handle, x_coord, y_coord, width, height)
|
73
|
-
end
|
74
|
-
|
75
|
-
def recognize_rectangle(x_coord, y_coord, width, height)
|
76
|
-
setup do
|
77
|
-
set_rectangle(x_coord, y_coord, width, height)
|
78
|
-
ocr
|
79
|
-
end
|
80
|
-
end
|
81
|
-
|
82
|
-
# rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
83
|
-
def recognize_rectangles(rectangle_list)
|
84
|
-
unless rectangle_list.is_a?(Array) && rectangle_list.length.positive?
|
85
|
-
msg = 'Tess Error Argument must be a list'
|
86
|
-
# copy the error message as we are not going to Setup
|
87
|
-
@errors << msg
|
88
|
-
raise TessException.new(error_msg: msg)
|
89
|
-
end
|
90
|
-
|
91
|
-
texts = []
|
92
|
-
setup do
|
93
|
-
rectangle_list.each do |r|
|
94
|
-
unless r.is_a?(Array) && rectangle_list.length > 3
|
95
|
-
msg = 'Argument must be a list of 4-arrays'
|
96
|
-
raise TessException.new(error_msg: msg)
|
97
|
-
end
|
98
|
-
|
99
|
-
set_rectangle(r[0], r[1], r[2], r[3])
|
100
|
-
ocr
|
101
|
-
texts << @utf8_text.strip
|
102
|
-
end
|
103
|
-
end
|
104
|
-
texts
|
105
|
-
end
|
106
|
-
# rubocop:enable Metrics/AbcSize, Metrics/MethodLength
|
107
69
|
end
|
108
70
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: tesseract_ffi
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- David Verrier
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-08-
|
11
|
+
date: 2020-08-16 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: ffi
|
@@ -121,6 +121,7 @@ files:
|
|
121
121
|
- lib/tesseract_ffi/conf_vars.rb
|
122
122
|
- lib/tesseract_ffi/oem.rb
|
123
123
|
- lib/tesseract_ffi/quick.rb
|
124
|
+
- lib/tesseract_ffi/rectangles.rb
|
124
125
|
- lib/tesseract_ffi/tess_exception.rb
|
125
126
|
- lib/tesseract_ffi/tesseract.rb
|
126
127
|
- lib/tesseract_ffi/version.rb
|