sqed 0.3.2 → 0.4.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.ruby-version +1 -1
- data/Guardfile +66 -0
- data/lib/sqed.rb +120 -68
- data/lib/sqed/boundaries.rb +30 -25
- data/lib/sqed/boundary_finder.rb +221 -212
- data/lib/sqed/boundary_finder/color_line_finder.rb +50 -42
- data/lib/sqed/boundary_finder/cross_finder.rb +3 -3
- data/lib/sqed/boundary_finder/stage_finder.rb +8 -3
- data/lib/sqed/extractor.rb +23 -25
- data/lib/sqed/parser.rb +4 -7
- data/lib/sqed/parser/barcode_parser.rb +5 -5
- data/lib/sqed/parser/ocr_parser.rb +46 -46
- data/lib/sqed/result.rb +60 -57
- data/lib/sqed/version.rb +1 -1
- data/lib/sqed_config.rb +52 -56
- data/spec/lib/sqed/boundaries_spec.rb +1 -1
- data/spec/lib/sqed/boundary_finder/color_line_finder_spec.rb +24 -24
- data/spec/lib/sqed/boundary_finder/cross_finder_spec.rb +1 -1
- data/spec/lib/sqed/boundary_finder/stage_finder_spec.rb +1 -1
- data/spec/lib/sqed/boundary_finder_spec.rb +73 -45
- data/spec/lib/sqed/extractor_spec.rb +4 -4
- data/spec/lib/sqed/parser/ocr_spec.rb +2 -2
- data/spec/lib/sqed_spec.rb +39 -39
- data/spec/lib/stage_handling/seven_slot_spec.rb +45 -9
- data/spec/support/files/stage_images/inhs_7_slot2.jpg +0 -0
- data/spec/support/image_helpers.rb +10 -9
- metadata +6 -3
@@ -4,15 +4,24 @@ require 'rmagick'
|
|
4
4
|
#
|
5
5
|
class Sqed::BoundaryFinder::ColorLineFinder < Sqed::BoundaryFinder
|
6
6
|
|
7
|
-
|
8
|
-
|
7
|
+
attr_accessor :boundary_color
|
8
|
+
|
9
|
+
def initialize(**opts)
|
10
|
+
# image: image, layout: layout, boundary_color: :green, use_thumbnail: true)
|
11
|
+
image = opts[:image]
|
12
|
+
layout = opts[:layout]
|
13
|
+
use_thumbnail = opts[:use_thumbnail]
|
14
|
+
@boundary_color = opts[:boundary_color] || :green
|
15
|
+
|
16
|
+
super(image: image, layout: layout, use_thumbnail: use_thumbnail)
|
17
|
+
|
9
18
|
raise 'No layout provided.' if @layout.nil?
|
10
19
|
@boundary_color = boundary_color
|
11
20
|
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
21
|
+
if use_thumbnail
|
22
|
+
@original_image = @image.copy
|
23
|
+
@image = thumbnail
|
24
|
+
end
|
16
25
|
find_bands
|
17
26
|
end
|
18
27
|
|
@@ -21,77 +30,78 @@ class Sqed::BoundaryFinder::ColorLineFinder < Sqed::BoundaryFinder
|
|
21
30
|
def find_bands
|
22
31
|
case layout # boundaries.coordinates are referenced from stage image
|
23
32
|
|
24
|
-
|
33
|
+
# No specs for this yet
|
25
34
|
when :seven_slot
|
26
|
-
top_bottom_split = Sqed::BoundaryFinder.color_boundary_finder(
|
27
|
-
left_right_split = Sqed::BoundaryFinder.color_boundary_finder(
|
35
|
+
top_bottom_split = Sqed::BoundaryFinder.color_boundary_finder(image: image, scan: :columns, boundary_color: boundary_color) # detect vertical division [array]
|
36
|
+
left_right_split = Sqed::BoundaryFinder.color_boundary_finder(image: image, sample_subdivision_size: 2, boundary_color: boundary_color) # detect horizontal division [array]
|
28
37
|
|
29
|
-
boundaries.set(0, [0, 0, left_right_split[0], top_bottom_split[0]
|
30
|
-
boundaries.set(6, [0, top_bottom_split[2], left_right_split[0], image.rows - top_bottom_split[2]
|
38
|
+
boundaries.set(0, [0, 0, left_right_split[0], top_bottom_split[0]])
|
39
|
+
boundaries.set(6, [0, top_bottom_split[2], left_right_split[0], image.rows - top_bottom_split[2]] )
|
31
40
|
|
32
41
|
right_top_image = image.crop( left_right_split[2], 0, image.columns - left_right_split[2], top_bottom_split[0] , true) # sections 1,2
|
33
42
|
right_bottom_image = image.crop(left_right_split[2], top_bottom_split[2], image.columns - left_right_split[2], image.rows - top_bottom_split[2], true) # sections 3,4,5
|
34
43
|
|
35
|
-
right_top_split = corrected_frequency(Sqed::BoundaryFinder.color_boundary_finder(
|
44
|
+
right_top_split = corrected_frequency(Sqed::BoundaryFinder.color_boundary_finder(image: right_top_image, boundary_color: boundary_color)) # vertical line b/w 1 & 2, use "corrected_frequency" to account for color bleed from previous crop
|
36
45
|
|
37
46
|
boundaries.set(1, [left_right_split[2], 0, right_top_split[0], top_bottom_split[0] ])
|
38
|
-
boundaries.set(2, [left_right_split[2] + right_top_split[2], 0, right_top_image.columns - right_top_split[2], top_bottom_split[0]
|
47
|
+
boundaries.set(2, [left_right_split[2] + right_top_split[2], 0, right_top_image.columns - right_top_split[2], top_bottom_split[0]])
|
48
|
+
|
49
|
+
right_bottom_split = corrected_frequency(Sqed::BoundaryFinder.color_boundary_finder(image: right_bottom_image, scan: :columns, sample_subdivision_size: 2, boundary_color: boundary_color)) # horizontal line b/w (5,3) & 4, use "corrected_frequency" to account for color bleed from previous crop
|
50
|
+
|
51
|
+
bottom_right_top_image = right_bottom_image.crop(0,0, image.columns - left_right_split[2], right_bottom_split[0], true) # 3,5
|
39
52
|
|
40
|
-
|
41
|
-
|
42
|
-
bottom_right_top_image = right_bottom_image.crop(0,0, image.columns - left_right_split[2], right_bottom_split[2], true) # 3,5
|
53
|
+
boundaries.set(3, [ left_right_split[2] + right_top_split[2], top_bottom_split[2], left_right_split[2] + right_top_split[2], bottom_right_top_image.rows ])
|
54
|
+
boundaries.set(5, [ left_right_split[2], top_bottom_split[2], right_top_split[0], bottom_right_top_image.rows])
|
43
55
|
|
44
|
-
|
45
|
-
boundaries.set(
|
46
|
-
|
47
|
-
boundaries.set(4, [ left_right_split[2], top_bottom_split[2] + right_top_split[2], image.columns - left_right_split[2], right_bottom_image.rows - right_top_split[2] ] )
|
56
|
+
# ! not high enough
|
57
|
+
boundaries.set(4, [left_right_split[2], top_bottom_split[2] + right_bottom_split[2], image.columns - left_right_split[2], right_bottom_image.rows ])
|
48
58
|
|
49
59
|
when :vertical_split
|
50
|
-
t = Sqed::BoundaryFinder.color_boundary_finder(
|
60
|
+
t = Sqed::BoundaryFinder.color_boundary_finder(image: image, boundary_color: boundary_color) #detect vertical division
|
51
61
|
return if t.nil?
|
52
62
|
boundaries.set(0, [0, 0, t[0], image.rows]) # left section of image
|
53
63
|
boundaries.set(1, [t[2], 0, image.columns - t[2], image.rows]) # right section of image
|
54
64
|
|
55
65
|
when :horizontal_split
|
56
|
-
t = Sqed::BoundaryFinder.color_boundary_finder(
|
66
|
+
t = Sqed::BoundaryFinder.color_boundary_finder(image: image, scan: :columns, boundary_color: boundary_color) # set to detect horizontal division
|
57
67
|
return if t.nil?
|
58
68
|
|
59
69
|
boundaries.set(0, [0, 0, image.columns, t[0]]) # upper section of image
|
60
70
|
boundaries.set(1, [0, t[2], image.columns, image.rows - t[2]]) # lower section of image
|
61
71
|
|
62
72
|
when :right_t # only 3 zones expected, with horizontal division in right-side of vertical division
|
63
|
-
vertical = self.class.new(
|
73
|
+
vertical = self.class.new(image: @image, layout: :vertical_split, boundary_color: boundary_color, use_thumbnail: false ).boundaries
|
64
74
|
|
65
75
|
irt = image.crop(*vertical.for(1), true)
|
66
|
-
right = self.class.new(
|
67
|
-
|
76
|
+
right = self.class.new(image: irt, layout: :horizontal_split, boundary_color: boundary_color, use_thumbnail: false ).boundaries
|
77
|
+
|
68
78
|
boundaries.set(0, vertical.for(0))
|
69
79
|
boundaries.set(1, [ vertical.x_for(1), 0, right.width_for(0), right.height_for(0) ] )
|
70
80
|
boundaries.set(2, [ vertical.x_for(1), right.y_for(1), right.width_for(1), right.height_for(1)] )
|
71
81
|
|
72
82
|
when :vertical_offset_cross # 4 zones expected, with (varying) horizontal division in left- and right- sides of vertical division
|
73
|
-
vertical = self.class.new(
|
83
|
+
vertical = self.class.new(image: @image, layout: :vertical_split, boundary_color: boundary_color, use_thumbnail: false).boundaries
|
74
84
|
|
75
85
|
ilt = image.crop(*vertical.for(0), true)
|
76
86
|
irt = image.crop(*vertical.for(1), true)
|
77
87
|
|
78
|
-
left = self.class.new(
|
79
|
-
right = self.class.new(
|
88
|
+
left = self.class.new(image: ilt, layout: :horizontal_split, boundary_color: boundary_color, use_thumbnail: false).boundaries # fails
|
89
|
+
right = self.class.new(image: irt, layout: :horizontal_split, boundary_color: boundary_color, use_thumbnail: false ).boundaries # OK
|
80
90
|
|
81
91
|
boundaries.set(0, [0, 0, left.width_for(0), left.height_for(0) ])
|
82
92
|
boundaries.set(1, [vertical.x_for(1), 0, right.width_for(0), right.height_for(0) ])
|
83
93
|
boundaries.set(2, [vertical.x_for(1), right.y_for(1), right.width_for(1), right.height_for(1) ])
|
84
94
|
boundaries.set(3, [0, left.y_for(1), left.width_for(1), left.height_for(1) ])
|
85
95
|
|
86
|
-
|
96
|
+
# No specs for this yet
|
87
97
|
when :horizontal_offset_cross
|
88
|
-
horizontal = self.class.new(
|
98
|
+
horizontal = self.class.new(image: @image, layout: :horizontal_split, boundary_color: boundary_color, use_thumbnail: false ).boundaries
|
89
99
|
|
90
100
|
itop = image.crop(*horizontal.for(0), true)
|
91
101
|
ibottom = image.crop(*horizontal.for(1), true)
|
92
102
|
|
93
|
-
top = self.class.new(
|
94
|
-
bottom = self.class.new(
|
103
|
+
top = self.class.new(image: ilt, layout: :vertical_split, boundary_color: boundary_color, use_thumbnail: false ).boundaries
|
104
|
+
bottom = self.class.new(image: irt, layout: :vertical_split, boundary_color: boundary_color, use_thumbnail: false ).boundaries
|
95
105
|
|
96
106
|
boundaries.set(0, [0, 0, top.width_for(0), top.height_for(0) ])
|
97
107
|
boundaries.set(1, [top.x_for(1), 0, top.width_for(1), top.height_for(1) ])
|
@@ -99,9 +109,9 @@ class Sqed::BoundaryFinder::ColorLineFinder < Sqed::BoundaryFinder
|
|
99
109
|
boundaries.set(3, [0, horizontal.y_for(1), bottom.width_for(0), bottom.height_for(0) ])
|
100
110
|
|
101
111
|
when :cross # 4 zones, with perfectly intersected horizontal and vertical division
|
102
|
-
v = self.class.new(
|
103
|
-
h = self.class.new(
|
104
|
-
|
112
|
+
v = self.class.new(image: @image, layout: :vertical_split, boundary_color: boundary_color, use_thumbnail: false ).boundaries
|
113
|
+
h = self.class.new(image: @image, layout: :horizontal_split, boundary_color: boundary_color, use_thumbnail: false).boundaries
|
114
|
+
|
105
115
|
return if v.nil? || h.nil?
|
106
116
|
|
107
117
|
boundaries.set(0, [0,0, v.width_for(0), h.height_for(0) ])
|
@@ -116,13 +126,11 @@ class Sqed::BoundaryFinder::ColorLineFinder < Sqed::BoundaryFinder
|
|
116
126
|
|
117
127
|
boundaries.complete = true if boundaries.populated?
|
118
128
|
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
129
|
+
if use_thumbnail
|
130
|
+
@image = @original_image
|
131
|
+
zoom_boundaries
|
132
|
+
@original_image = nil
|
133
|
+
end
|
124
134
|
|
125
135
|
end
|
126
|
-
|
127
|
-
|
128
136
|
end
|
@@ -15,8 +15,13 @@ class Sqed::BoundaryFinder::StageFinder < Sqed::BoundaryFinder
|
|
15
15
|
|
16
16
|
attr_reader :x0, :y0, :x1, :y1, :min_width, :min_height, :rows, :columns
|
17
17
|
|
18
|
-
def initialize(
|
19
|
-
|
18
|
+
def initialize(**opts)
|
19
|
+
image = opts[:image]
|
20
|
+
is_border_proc = opts[:is_border_proc]
|
21
|
+
min_ratio = opts[:min_ratio]
|
22
|
+
min_ratio ||= MIN_CROP_RATIO
|
23
|
+
|
24
|
+
super(image: image, layout: :internal_box)
|
20
25
|
|
21
26
|
@min_ratio = min_ratio
|
22
27
|
|
@@ -26,7 +31,7 @@ class Sqed::BoundaryFinder::StageFinder < Sqed::BoundaryFinder
|
|
26
31
|
@min_width, @min_height = image.columns * @min_ratio, image.rows * @min_ratio # minimum resultant area
|
27
32
|
@columns, @rows = image.columns, image.rows
|
28
33
|
|
29
|
-
|
34
|
+
|
30
35
|
# We need a border finder proc. Provide one if none was given.
|
31
36
|
@is_border = is_border_proc || self.class.default_border_finder(image) # if no proc specified, use default below
|
32
37
|
|
data/lib/sqed/extractor.rb
CHANGED
@@ -1,62 +1,59 @@
|
|
1
1
|
require 'rmagick'
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
|
3
|
+
class Sqed
|
4
|
+
|
5
|
+
# An Extractor takes Boundaries object and a metadata_map and returns a Sqed::Result
|
6
|
+
#
|
7
|
+
class Extractor
|
6
8
|
|
7
9
|
class Error < StandardError; end;
|
8
10
|
|
9
|
-
# a Sqed::Boundaries instance
|
11
|
+
# a Sqed::Boundaries instance
|
10
12
|
attr_accessor :boundaries
|
11
13
|
|
14
|
+
# @return [Hash] like `{0 => :annotated_specimen, 1 => :identifier, 2 => :image_registration }`
|
12
15
|
# a metadata_map hash from EXTRACTION_PATTERNS like:
|
13
|
-
# {0 => :annotated_specimen, 1 => :identifier, 2 =>:image_registration }
|
14
16
|
attr_accessor :metadata_map
|
15
17
|
|
16
|
-
#
|
18
|
+
# @return [Magick::Image file]
|
17
19
|
attr_accessor :image
|
18
20
|
|
19
|
-
def initialize(
|
20
|
-
|
21
|
-
|
22
|
-
|
21
|
+
def initialize(**opts)
|
22
|
+
@metadata_map = opts[:metadata_map]
|
23
|
+
@boundaries = opts[:boundaries]
|
24
|
+
@image = opts[:image]
|
23
25
|
|
24
|
-
|
25
|
-
|
26
|
-
|
26
|
+
raise Error, 'boundaries not provided or provided boundary is not a Sqed::Boundaries' if boundaries.nil? || !boundaries.class == Sqed::Boundaries
|
27
|
+
raise Error, 'metadata_map not provided or metadata_map not a Hash' if metadata_map.nil? || !metadata_map.class == Hash
|
28
|
+
raise Error, 'image not provided' if image.nil? || !image.class.name == 'Magick::Image'
|
27
29
|
end
|
28
30
|
|
29
31
|
def result
|
30
|
-
r = Sqed::Result.new
|
32
|
+
r = Sqed::Result.new
|
31
33
|
|
32
34
|
r.sections = metadata_map.values.sort
|
33
|
-
|
35
|
+
|
34
36
|
# assign the images to the result
|
35
37
|
boundaries.each do |section_index, coords|
|
36
38
|
section_type = metadata_map[section_index]
|
37
|
-
|
38
|
-
# TODO: raise this higher up the chain
|
39
|
-
raise Error, "invalid section_type [#{section_type}]" if !SqedConfig::LAYOUT_SECTION_TYPES.include?(section_type)
|
40
39
|
|
41
40
|
r.send("#{section_type}_image=", extract_image(coords))
|
42
41
|
r.boundary_coordinates[section_type] = coords
|
43
|
-
end
|
42
|
+
end
|
44
43
|
|
45
44
|
# assign the metadata to the result
|
46
45
|
metadata_map.each do |section_index, section_type|
|
47
46
|
# only extract data if a parser exists
|
48
47
|
if parsers = SqedConfig::SECTION_PARSERS[section_type]
|
49
|
-
|
50
48
|
section_image = r.send("#{section_type}_image")
|
51
|
-
|
52
49
|
updated = r.send(section_type)
|
53
50
|
|
54
51
|
parsers.each do |p|
|
55
|
-
parsed_result = p.new(section_image).
|
56
|
-
updated
|
52
|
+
parsed_result = p.new(section_image).get_text(section_type: section_type)
|
53
|
+
updated[p::TYPE] = parsed_result if parsed_result && parsed_result.length > 0
|
57
54
|
end
|
58
55
|
|
59
|
-
r.send("#{section_type}=", updated)
|
56
|
+
r.send("#{section_type}=", updated)
|
60
57
|
end
|
61
58
|
end
|
62
59
|
|
@@ -65,7 +62,8 @@ class Sqed::Extractor
|
|
65
62
|
|
66
63
|
# crop takes x, y, width, height
|
67
64
|
def extract_image(coords)
|
68
|
-
|
65
|
+
@image.crop(*coords, true)
|
69
66
|
end
|
70
67
|
|
68
|
+
end
|
71
69
|
end
|
data/lib/sqed/parser.rb
CHANGED
@@ -3,17 +3,14 @@
|
|
3
3
|
# Base class for Parsers
|
4
4
|
#
|
5
5
|
class Sqed::Parser
|
6
|
+
|
6
7
|
attr_accessor :image
|
7
8
|
|
9
|
+
attr_accessor :extracted_text
|
10
|
+
|
8
11
|
def initialize(image)
|
9
|
-
@image = image
|
12
|
+
@image = image
|
10
13
|
raise 'no image provided to parser' if @image && !(@image.class.name == 'Magick::Image')
|
11
14
|
end
|
12
15
|
|
13
|
-
# TODO: is this required?!j
|
14
|
-
# must be provided in subclasses
|
15
|
-
def text(section_type: :default)
|
16
|
-
nil
|
17
|
-
end
|
18
|
-
|
19
16
|
end
|
@@ -1,12 +1,11 @@
|
|
1
1
|
# Given an image, return an ordered array of detectable barcodes
|
2
|
-
#
|
2
|
+
#
|
3
|
+
# !! DOES NOTHING !!
|
3
4
|
#
|
4
5
|
class Sqed::Parser::BarcodeParser < Sqed::Parser
|
5
6
|
|
6
7
|
TYPE = :barcode
|
7
8
|
|
8
|
-
attr_accessor :image
|
9
|
-
|
10
9
|
attr_accessor :barcode
|
11
10
|
|
12
11
|
def initialize(image)
|
@@ -28,15 +27,16 @@ class Sqed::Parser::BarcodeParser < Sqed::Parser
|
|
28
27
|
# try a bunch of options, organized by most common, give the first hit
|
29
28
|
def get_barcode
|
30
29
|
[get_code_128].compact.first
|
31
|
-
end
|
30
|
+
end
|
32
31
|
|
33
32
|
#def get_datamatrix
|
34
33
|
# https://github.com/srijan/ruby-dmtx
|
35
34
|
#end
|
36
35
|
|
37
36
|
# alias to a universal method
|
38
|
-
def
|
37
|
+
def get_text(section_type: :default)
|
39
38
|
barcode
|
40
39
|
end
|
41
40
|
|
41
|
+
|
42
42
|
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
require 'rtesseract'
|
2
|
+
|
1
3
|
# encoding: UTF-8
|
2
4
|
#
|
3
5
|
# Given a single image return all text in that image.
|
@@ -17,49 +19,51 @@
|
|
17
19
|
# Below an x-height of 10 pixels, you have very little chance of accurate results,
|
18
20
|
# and below about 8 pixels, most of the text will be "noise removed".
|
19
21
|
#
|
20
|
-
require 'rtesseract'
|
21
|
-
|
22
22
|
class Sqed::Parser::OcrParser < Sqed::Parser
|
23
23
|
|
24
24
|
TYPE = :text
|
25
25
|
|
26
|
+
# Other experimented with default params
|
27
|
+
# classify_debug_level: 5,
|
28
|
+
# lang: 'eng',
|
29
|
+
# load_system_dawg: 0,
|
30
|
+
# load_unambig_dawg: 0,
|
31
|
+
# load_freq_dawg: 0,
|
32
|
+
# load_fixed_length_dawgs: 0,
|
33
|
+
# load_number_dawg: 0,
|
34
|
+
# load_punc_dawg: 1, ## important
|
35
|
+
# load_unambig_dawg: 1,
|
36
|
+
# chop_enable: 0,
|
37
|
+
# enable_new_segsearch: 1,
|
38
|
+
# tessedit_debug_quality_metrics: 1,
|
39
|
+
# tessedit_write_params_to_file: 'tmp/ocr_config_file.txt',
|
40
|
+
# tessedit_write_images: 1,
|
41
|
+
# equationdetect_save_merged_image: 1,
|
42
|
+
# tessedit_dump_pageseg_images: 1,
|
43
|
+
# equationdetect_save_bi_image: 1
|
44
|
+
|
26
45
|
# Tesseract parameters default/specific to section type,
|
27
46
|
# default is merged into the type
|
28
47
|
SECTION_PARAMS = {
|
29
48
|
default: {
|
30
|
-
psm: 3
|
31
|
-
# classify_debug_level: 5,
|
32
|
-
# lang: 'eng',
|
33
|
-
# load_system_dawg: 0,
|
34
|
-
# load_unambig_dawg: 0,
|
35
|
-
# load_freq_dawg: 0,
|
36
|
-
# load_fixed_length_dawgs: 0,
|
37
|
-
# load_number_dawg: 0,
|
38
|
-
# load_punc_dawg: 1, ## important
|
39
|
-
# load_unambig_dawg: 1,
|
40
|
-
# chop_enable: 0,
|
41
|
-
# enable_new_segsearch: 1,
|
42
|
-
# tessedit_debug_quality_metrics: 1,
|
43
|
-
# tessedit_write_params_to_file: 'tmp/ocr_config_file.txt',
|
44
|
-
# tessedit_write_images: 1,
|
45
|
-
# equationdetect_save_merged_image: 1,
|
46
|
-
# tessedit_dump_pageseg_images: 1,
|
47
|
-
# equationdetect_save_bi_image: 1
|
49
|
+
psm: 3
|
48
50
|
},
|
49
51
|
annotated_specimen: {
|
50
|
-
|
52
|
+
# was 45, significantly improves annotated_specimen for odontates
|
53
|
+
edges_children_count_limit: 3000
|
51
54
|
},
|
52
55
|
identifier: {
|
53
56
|
psm: 1,
|
54
57
|
# tessedit_char_whitelist: '0123456789'
|
55
58
|
# edges_children_count_limit: 4000
|
56
|
-
},
|
59
|
+
},
|
57
60
|
curator_metadata: {
|
61
|
+
psm: 3
|
58
62
|
},
|
59
63
|
labels: {
|
60
64
|
psm: 3, # may need to be 6
|
61
65
|
},
|
62
|
-
|
66
|
+
determination_labels: {
|
63
67
|
psm: 3
|
64
68
|
},
|
65
69
|
other_labels: {
|
@@ -68,12 +72,7 @@ class Sqed::Parser::OcrParser < Sqed::Parser
|
|
68
72
|
collecting_event_labels: {
|
69
73
|
psm: 3
|
70
74
|
}
|
71
|
-
|
72
|
-
|
73
|
-
}
|
74
|
-
|
75
|
-
# the text extracted from the image
|
76
|
-
attr_accessor :text
|
75
|
+
}.freeze
|
77
76
|
|
78
77
|
# future consideration
|
79
78
|
# def enhance_image(img)
|
@@ -102,35 +101,36 @@ class Sqed::Parser::OcrParser < Sqed::Parser
|
|
102
101
|
# img = img.white_threshold(245)
|
103
102
|
# img
|
104
103
|
# end
|
105
|
-
|
104
|
+
|
106
105
|
# @return [String]
|
107
|
-
# the ocr text
|
108
|
-
def
|
109
|
-
img =
|
110
|
-
|
106
|
+
# the ocr text
|
107
|
+
def get_text(section_type: :default)
|
108
|
+
img = image
|
109
|
+
|
111
110
|
# resample if an image 4"x4" is less than 300dpi
|
112
111
|
if img.columns * img.rows < 144000
|
113
112
|
img = img.resample(300)
|
114
113
|
end
|
115
|
-
|
116
|
-
params = SECTION_PARAMS[:default].merge(SECTION_PARAMS[section_type])
|
117
|
-
r = RTesseract.new(img, params)
|
118
|
-
@text = r.to_s.strip
|
119
114
|
|
120
|
-
|
115
|
+
params = SECTION_PARAMS[:default].dup
|
116
|
+
params.merge!(SECTION_PARAMS[section_type])
|
117
|
+
|
118
|
+
r = RTesseract.new(img, params)
|
119
|
+
@extracted_text = r.to_s.strip
|
120
|
+
|
121
|
+
if @extracted_text == ''
|
121
122
|
img = img.white_threshold(245)
|
122
|
-
r = RTesseract.new(img, params)
|
123
|
-
@
|
123
|
+
r = RTesseract.new(img, params)
|
124
|
+
@extracted_text = r.to_s.strip
|
124
125
|
end
|
125
126
|
|
126
|
-
if @
|
127
|
+
if @extracted_text == ''
|
127
128
|
img = img.quantize(256,Magick::GRAYColorspace)
|
128
|
-
r = RTesseract.new(img, params)
|
129
|
-
@
|
129
|
+
r = RTesseract.new(img, params)
|
130
|
+
@extracted_text = r.to_s.strip
|
130
131
|
end
|
131
132
|
|
132
|
-
@
|
133
|
+
@extracted_text
|
133
134
|
end
|
134
135
|
|
135
|
-
|
136
136
|
end
|