tesseract_ffi 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 618b8a311591353b1d164091aaf4cf793241a4f47eadbd3413a4c44c61c5fe7a
4
- data.tar.gz: 33a199c65cfbe8f734c9b103ce9639068b5572ff805f2fa991ec6bd748108315
3
+ metadata.gz: c5a9c0c8d2c8b8df8ac6796658f09879e752b87a0942310489e52f0eb7e86150
4
+ data.tar.gz: 72a6acdefeba8c4c021f9f031d2c7523ae1450f883eff21429e36aa4795b610b
5
5
  SHA512:
6
- metadata.gz: ddda303d8de39d73cf353c5265acaf690ec4551aea60e8ec88c1720cfb38d2f5f637d02db13ed54068e95342c7fa2f5a3f8b5a1b613146ae120e2bf9905ef7af
7
- data.tar.gz: 74db6585a8eae9546b7bdd969572c789e548bc465db393af29ed6ceebe47baf2d9c8f7369f5df2d0d2bff2c30ae0214737781bd64d074aedb80c32dd8d8b3001
6
+ metadata.gz: bdc7ad7ab5f9fd23141097b9b0a924aabcc3891331481563c6f0bdfaba0398688a8954ae31cbdc5b779379546007374c156c98eef6ce067c4ef2ea462e67642b
7
+ data.tar.gz: 0fc5c10d36c8822d3604f46ed9687adc4cbbb1e4d76229c0a04632af54bf75e4461947c49bffc327fb69728ba1fbededfafe84cdefdda269e1e24cf29eb029ba
@@ -2,8 +2,9 @@
2
2
 
3
3
  require 'ffi'
4
4
  require 'tesseract_ffi/version'
5
- require 'tesseract_ffi/conf_vars' # mix-in to tesseract
6
- require 'tesseract_ffi/oem' # mix-in to tesseract
5
+ require 'tesseract_ffi/conf_vars' # mix-in to tesseract
6
+ require 'tesseract_ffi/oem' # mix-in to tesseract
7
+ require 'tesseract_ffi/rectangles' # mix-in to tesseract
7
8
  require 'tesseract_ffi/tesseract'
8
9
  require 'tesseract_ffi/tess_exception'
9
10
  require 'tesseract_ffi/quick'
@@ -0,0 +1,59 @@
1
+ # frozen_string_literal: true
2
+
3
+ module TesseractFFI
4
+ # module Rectangles mixin for recognizing text blocks defined by rectangles
5
+ module Rectangles
6
+ def set_rectangle(x_coord, y_coord, width, height)
7
+ tess_set_rectangle(@handle, x_coord, y_coord, width, height)
8
+ end
9
+
10
+ def recognize_rectangle(x_coord, y_coord, width, height)
11
+ setup do
12
+ set_rectangle(x_coord, y_coord, width, height)
13
+ ocr
14
+ end
15
+ end
16
+
17
+ # rubocop:disable Metrics/MethodLength
18
+ def recognize_rectangles(rectangle_list)
19
+ @texts = []
20
+ if valid_rectangle_list? rectangle_list
21
+ @rectangle_list = rectangle_list
22
+ setup do
23
+ @rectangle_list.each do |r|
24
+ set_rectangle(r[0], r[1], r[2], r[3])
25
+ ocr
26
+ @texts << @utf8_text.strip
27
+ end
28
+ end
29
+ else
30
+ @rectangle_list = nil
31
+ end
32
+ @texts
33
+ end
34
+
35
+ def valid_rectangle_list?(list)
36
+ if list.is_a?(Array) && list.all? { |r| valid_rectangle?(r) }
37
+ true
38
+ else
39
+ msg = 'Tess Error Argument must be a list'
40
+ # copy the error message as we are not going to Setup
41
+ @errors << msg
42
+ raise TessException.new(error_msg: msg)
43
+ end
44
+ end
45
+
46
+ def valid_rectangle?(rectangle)
47
+ if rectangle.is_a?(Array) &&
48
+ rectangle.length == 4 &&
49
+ rectangle.all? { |r| r.is_a?(Integer) }
50
+ true
51
+ else
52
+ msg = 'Tesseract Error Argument must be array of 4 Integer'
53
+ @errors << msg
54
+ raise TessException.new(error_msg: msg)
55
+ end
56
+ end
57
+ # rubocop:enable Metrics/MethodLength
58
+ end
59
+ end
@@ -6,6 +6,7 @@ module TesseractFFI
6
6
  include TesseractFFI
7
7
  include ConfVars
8
8
  include OEM
9
+ include Rectangles
9
10
 
10
11
  attr_accessor :language, :file_name, :source_resolution
11
12
  attr_reader :utf8_text, :hocr_text, :errors
@@ -48,9 +49,7 @@ module TesseractFFI
48
49
  tess_set_source_resolution(@handle, @source_resolution)
49
50
  raise TessException.new(error_msg: 'Recognition Error') if tess_recognize(@handle, 0) != 0
50
51
 
51
- @utf8_text = ''
52
- text = tess_get_utf8(@handle, 0)
53
- @utf8_text = text.encode('UTF-8') if text
52
+ @utf8_text = tess_get_utf8(@handle, 0)
54
53
  @hocr_text = tess_get_hocr(@handle, 0)
55
54
  end
56
55
 
@@ -67,42 +66,5 @@ module TesseractFFI
67
66
  TesseractFFI.tess_process_pages(@handle, @file_name, nil, 5000, pdf_renderer)
68
67
  end
69
68
  end
70
-
71
- def set_rectangle(x_coord, y_coord, width, height)
72
- tess_set_rectangle(@handle, x_coord, y_coord, width, height)
73
- end
74
-
75
- def recognize_rectangle(x_coord, y_coord, width, height)
76
- setup do
77
- set_rectangle(x_coord, y_coord, width, height)
78
- ocr
79
- end
80
- end
81
-
82
- # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
83
- def recognize_rectangles(rectangle_list)
84
- unless rectangle_list.is_a?(Array) && rectangle_list.length.positive?
85
- msg = 'Tess Error Argument must be a list'
86
- # copy the error message as we are not going to Setup
87
- @errors << msg
88
- raise TessException.new(error_msg: msg)
89
- end
90
-
91
- texts = []
92
- setup do
93
- rectangle_list.each do |r|
94
- unless r.is_a?(Array) && rectangle_list.length > 3
95
- msg = 'Argument must be a list of 4-arrays'
96
- raise TessException.new(error_msg: msg)
97
- end
98
-
99
- set_rectangle(r[0], r[1], r[2], r[3])
100
- ocr
101
- texts << @utf8_text.strip
102
- end
103
- end
104
- texts
105
- end
106
- # rubocop:enable Metrics/AbcSize, Metrics/MethodLength
107
69
  end
108
70
  end
@@ -2,5 +2,5 @@
2
2
 
3
3
  # module with version
4
4
  module TesseractFFI
5
- VERSION = '0.3.0'
5
+ VERSION = '0.4.0'
6
6
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tesseract_ffi
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - David Verrier
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-08-14 00:00:00.000000000 Z
11
+ date: 2020-08-16 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: ffi
@@ -121,6 +121,7 @@ files:
121
121
  - lib/tesseract_ffi/conf_vars.rb
122
122
  - lib/tesseract_ffi/oem.rb
123
123
  - lib/tesseract_ffi/quick.rb
124
+ - lib/tesseract_ffi/rectangles.rb
124
125
  - lib/tesseract_ffi/tess_exception.rb
125
126
  - lib/tesseract_ffi/tesseract.rb
126
127
  - lib/tesseract_ffi/version.rb