sqed 0.3.2 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.ruby-version +1 -1
- data/Guardfile +66 -0
- data/lib/sqed.rb +120 -68
- data/lib/sqed/boundaries.rb +30 -25
- data/lib/sqed/boundary_finder.rb +221 -212
- data/lib/sqed/boundary_finder/color_line_finder.rb +50 -42
- data/lib/sqed/boundary_finder/cross_finder.rb +3 -3
- data/lib/sqed/boundary_finder/stage_finder.rb +8 -3
- data/lib/sqed/extractor.rb +23 -25
- data/lib/sqed/parser.rb +4 -7
- data/lib/sqed/parser/barcode_parser.rb +5 -5
- data/lib/sqed/parser/ocr_parser.rb +46 -46
- data/lib/sqed/result.rb +60 -57
- data/lib/sqed/version.rb +1 -1
- data/lib/sqed_config.rb +52 -56
- data/spec/lib/sqed/boundaries_spec.rb +1 -1
- data/spec/lib/sqed/boundary_finder/color_line_finder_spec.rb +24 -24
- data/spec/lib/sqed/boundary_finder/cross_finder_spec.rb +1 -1
- data/spec/lib/sqed/boundary_finder/stage_finder_spec.rb +1 -1
- data/spec/lib/sqed/boundary_finder_spec.rb +73 -45
- data/spec/lib/sqed/extractor_spec.rb +4 -4
- data/spec/lib/sqed/parser/ocr_spec.rb +2 -2
- data/spec/lib/sqed_spec.rb +39 -39
- data/spec/lib/stage_handling/seven_slot_spec.rb +45 -9
- data/spec/support/files/stage_images/inhs_7_slot2.jpg +0 -0
- data/spec/support/image_helpers.rb +10 -9
- metadata +6 -3
@@ -4,15 +4,24 @@ require 'rmagick'
|
|
4
4
|
#
|
5
5
|
class Sqed::BoundaryFinder::ColorLineFinder < Sqed::BoundaryFinder
|
6
6
|
|
7
|
-
|
8
|
-
|
7
|
+
attr_accessor :boundary_color
|
8
|
+
|
9
|
+
def initialize(**opts)
|
10
|
+
# image: image, layout: layout, boundary_color: :green, use_thumbnail: true)
|
11
|
+
image = opts[:image]
|
12
|
+
layout = opts[:layout]
|
13
|
+
use_thumbnail = opts[:use_thumbnail]
|
14
|
+
@boundary_color = opts[:boundary_color] || :green
|
15
|
+
|
16
|
+
super(image: image, layout: layout, use_thumbnail: use_thumbnail)
|
17
|
+
|
9
18
|
raise 'No layout provided.' if @layout.nil?
|
10
19
|
@boundary_color = boundary_color
|
11
20
|
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
21
|
+
if use_thumbnail
|
22
|
+
@original_image = @image.copy
|
23
|
+
@image = thumbnail
|
24
|
+
end
|
16
25
|
find_bands
|
17
26
|
end
|
18
27
|
|
@@ -21,77 +30,78 @@ class Sqed::BoundaryFinder::ColorLineFinder < Sqed::BoundaryFinder
|
|
21
30
|
def find_bands
|
22
31
|
case layout # boundaries.coordinates are referenced from stage image
|
23
32
|
|
24
|
-
|
33
|
+
# No specs for this yet
|
25
34
|
when :seven_slot
|
26
|
-
top_bottom_split = Sqed::BoundaryFinder.color_boundary_finder(
|
27
|
-
left_right_split = Sqed::BoundaryFinder.color_boundary_finder(
|
35
|
+
top_bottom_split = Sqed::BoundaryFinder.color_boundary_finder(image: image, scan: :columns, boundary_color: boundary_color) # detect vertical division [array]
|
36
|
+
left_right_split = Sqed::BoundaryFinder.color_boundary_finder(image: image, sample_subdivision_size: 2, boundary_color: boundary_color) # detect horizontal division [array]
|
28
37
|
|
29
|
-
boundaries.set(0, [0, 0, left_right_split[0], top_bottom_split[0]
|
30
|
-
boundaries.set(6, [0, top_bottom_split[2], left_right_split[0], image.rows - top_bottom_split[2]
|
38
|
+
boundaries.set(0, [0, 0, left_right_split[0], top_bottom_split[0]])
|
39
|
+
boundaries.set(6, [0, top_bottom_split[2], left_right_split[0], image.rows - top_bottom_split[2]] )
|
31
40
|
|
32
41
|
right_top_image = image.crop( left_right_split[2], 0, image.columns - left_right_split[2], top_bottom_split[0] , true) # sections 1,2
|
33
42
|
right_bottom_image = image.crop(left_right_split[2], top_bottom_split[2], image.columns - left_right_split[2], image.rows - top_bottom_split[2], true) # sections 3,4,5
|
34
43
|
|
35
|
-
right_top_split = corrected_frequency(Sqed::BoundaryFinder.color_boundary_finder(
|
44
|
+
right_top_split = corrected_frequency(Sqed::BoundaryFinder.color_boundary_finder(image: right_top_image, boundary_color: boundary_color)) # vertical line b/w 1 & 2, use "corrected_frequency" to account for color bleed from previous crop
|
36
45
|
|
37
46
|
boundaries.set(1, [left_right_split[2], 0, right_top_split[0], top_bottom_split[0] ])
|
38
|
-
boundaries.set(2, [left_right_split[2] + right_top_split[2], 0, right_top_image.columns - right_top_split[2], top_bottom_split[0]
|
47
|
+
boundaries.set(2, [left_right_split[2] + right_top_split[2], 0, right_top_image.columns - right_top_split[2], top_bottom_split[0]])
|
48
|
+
|
49
|
+
right_bottom_split = corrected_frequency(Sqed::BoundaryFinder.color_boundary_finder(image: right_bottom_image, scan: :columns, sample_subdivision_size: 2, boundary_color: boundary_color)) # horizontal line b/w (5,3) & 4, use "corrected_frequency" to account for color bleed from previous crop
|
50
|
+
|
51
|
+
bottom_right_top_image = right_bottom_image.crop(0,0, image.columns - left_right_split[2], right_bottom_split[0], true) # 3,5
|
39
52
|
|
40
|
-
|
41
|
-
|
42
|
-
bottom_right_top_image = right_bottom_image.crop(0,0, image.columns - left_right_split[2], right_bottom_split[2], true) # 3,5
|
53
|
+
boundaries.set(3, [ left_right_split[2] + right_top_split[2], top_bottom_split[2], left_right_split[2] + right_top_split[2], bottom_right_top_image.rows ])
|
54
|
+
boundaries.set(5, [ left_right_split[2], top_bottom_split[2], right_top_split[0], bottom_right_top_image.rows])
|
43
55
|
|
44
|
-
|
45
|
-
boundaries.set(
|
46
|
-
|
47
|
-
boundaries.set(4, [ left_right_split[2], top_bottom_split[2] + right_top_split[2], image.columns - left_right_split[2], right_bottom_image.rows - right_top_split[2] ] )
|
56
|
+
# ! not high enough
|
57
|
+
boundaries.set(4, [left_right_split[2], top_bottom_split[2] + right_bottom_split[2], image.columns - left_right_split[2], right_bottom_image.rows ])
|
48
58
|
|
49
59
|
when :vertical_split
|
50
|
-
t = Sqed::BoundaryFinder.color_boundary_finder(
|
60
|
+
t = Sqed::BoundaryFinder.color_boundary_finder(image: image, boundary_color: boundary_color) #detect vertical division
|
51
61
|
return if t.nil?
|
52
62
|
boundaries.set(0, [0, 0, t[0], image.rows]) # left section of image
|
53
63
|
boundaries.set(1, [t[2], 0, image.columns - t[2], image.rows]) # right section of image
|
54
64
|
|
55
65
|
when :horizontal_split
|
56
|
-
t = Sqed::BoundaryFinder.color_boundary_finder(
|
66
|
+
t = Sqed::BoundaryFinder.color_boundary_finder(image: image, scan: :columns, boundary_color: boundary_color) # set to detect horizontal division
|
57
67
|
return if t.nil?
|
58
68
|
|
59
69
|
boundaries.set(0, [0, 0, image.columns, t[0]]) # upper section of image
|
60
70
|
boundaries.set(1, [0, t[2], image.columns, image.rows - t[2]]) # lower section of image
|
61
71
|
|
62
72
|
when :right_t # only 3 zones expected, with horizontal division in right-side of vertical division
|
63
|
-
vertical = self.class.new(
|
73
|
+
vertical = self.class.new(image: @image, layout: :vertical_split, boundary_color: boundary_color, use_thumbnail: false ).boundaries
|
64
74
|
|
65
75
|
irt = image.crop(*vertical.for(1), true)
|
66
|
-
right = self.class.new(
|
67
|
-
|
76
|
+
right = self.class.new(image: irt, layout: :horizontal_split, boundary_color: boundary_color, use_thumbnail: false ).boundaries
|
77
|
+
|
68
78
|
boundaries.set(0, vertical.for(0))
|
69
79
|
boundaries.set(1, [ vertical.x_for(1), 0, right.width_for(0), right.height_for(0) ] )
|
70
80
|
boundaries.set(2, [ vertical.x_for(1), right.y_for(1), right.width_for(1), right.height_for(1)] )
|
71
81
|
|
72
82
|
when :vertical_offset_cross # 4 zones expected, with (varying) horizontal division in left- and right- sides of vertical division
|
73
|
-
vertical = self.class.new(
|
83
|
+
vertical = self.class.new(image: @image, layout: :vertical_split, boundary_color: boundary_color, use_thumbnail: false).boundaries
|
74
84
|
|
75
85
|
ilt = image.crop(*vertical.for(0), true)
|
76
86
|
irt = image.crop(*vertical.for(1), true)
|
77
87
|
|
78
|
-
left = self.class.new(
|
79
|
-
right = self.class.new(
|
88
|
+
left = self.class.new(image: ilt, layout: :horizontal_split, boundary_color: boundary_color, use_thumbnail: false).boundaries # fails
|
89
|
+
right = self.class.new(image: irt, layout: :horizontal_split, boundary_color: boundary_color, use_thumbnail: false ).boundaries # OK
|
80
90
|
|
81
91
|
boundaries.set(0, [0, 0, left.width_for(0), left.height_for(0) ])
|
82
92
|
boundaries.set(1, [vertical.x_for(1), 0, right.width_for(0), right.height_for(0) ])
|
83
93
|
boundaries.set(2, [vertical.x_for(1), right.y_for(1), right.width_for(1), right.height_for(1) ])
|
84
94
|
boundaries.set(3, [0, left.y_for(1), left.width_for(1), left.height_for(1) ])
|
85
95
|
|
86
|
-
|
96
|
+
# No specs for this yet
|
87
97
|
when :horizontal_offset_cross
|
88
|
-
horizontal = self.class.new(
|
98
|
+
horizontal = self.class.new(image: @image, layout: :horizontal_split, boundary_color: boundary_color, use_thumbnail: false ).boundaries
|
89
99
|
|
90
100
|
itop = image.crop(*horizontal.for(0), true)
|
91
101
|
ibottom = image.crop(*horizontal.for(1), true)
|
92
102
|
|
93
|
-
top = self.class.new(
|
94
|
-
bottom = self.class.new(
|
103
|
+
top = self.class.new(image: ilt, layout: :vertical_split, boundary_color: boundary_color, use_thumbnail: false ).boundaries
|
104
|
+
bottom = self.class.new(image: irt, layout: :vertical_split, boundary_color: boundary_color, use_thumbnail: false ).boundaries
|
95
105
|
|
96
106
|
boundaries.set(0, [0, 0, top.width_for(0), top.height_for(0) ])
|
97
107
|
boundaries.set(1, [top.x_for(1), 0, top.width_for(1), top.height_for(1) ])
|
@@ -99,9 +109,9 @@ class Sqed::BoundaryFinder::ColorLineFinder < Sqed::BoundaryFinder
|
|
99
109
|
boundaries.set(3, [0, horizontal.y_for(1), bottom.width_for(0), bottom.height_for(0) ])
|
100
110
|
|
101
111
|
when :cross # 4 zones, with perfectly intersected horizontal and vertical division
|
102
|
-
v = self.class.new(
|
103
|
-
h = self.class.new(
|
104
|
-
|
112
|
+
v = self.class.new(image: @image, layout: :vertical_split, boundary_color: boundary_color, use_thumbnail: false ).boundaries
|
113
|
+
h = self.class.new(image: @image, layout: :horizontal_split, boundary_color: boundary_color, use_thumbnail: false).boundaries
|
114
|
+
|
105
115
|
return if v.nil? || h.nil?
|
106
116
|
|
107
117
|
boundaries.set(0, [0,0, v.width_for(0), h.height_for(0) ])
|
@@ -116,13 +126,11 @@ class Sqed::BoundaryFinder::ColorLineFinder < Sqed::BoundaryFinder
|
|
116
126
|
|
117
127
|
boundaries.complete = true if boundaries.populated?
|
118
128
|
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
129
|
+
if use_thumbnail
|
130
|
+
@image = @original_image
|
131
|
+
zoom_boundaries
|
132
|
+
@original_image = nil
|
133
|
+
end
|
124
134
|
|
125
135
|
end
|
126
|
-
|
127
|
-
|
128
136
|
end
|
@@ -15,8 +15,13 @@ class Sqed::BoundaryFinder::StageFinder < Sqed::BoundaryFinder
|
|
15
15
|
|
16
16
|
attr_reader :x0, :y0, :x1, :y1, :min_width, :min_height, :rows, :columns
|
17
17
|
|
18
|
-
def initialize(
|
19
|
-
|
18
|
+
def initialize(**opts)
|
19
|
+
image = opts[:image]
|
20
|
+
is_border_proc = opts[:is_border_proc]
|
21
|
+
min_ratio = opts[:min_ratio]
|
22
|
+
min_ratio ||= MIN_CROP_RATIO
|
23
|
+
|
24
|
+
super(image: image, layout: :internal_box)
|
20
25
|
|
21
26
|
@min_ratio = min_ratio
|
22
27
|
|
@@ -26,7 +31,7 @@ class Sqed::BoundaryFinder::StageFinder < Sqed::BoundaryFinder
|
|
26
31
|
@min_width, @min_height = image.columns * @min_ratio, image.rows * @min_ratio # minimum resultant area
|
27
32
|
@columns, @rows = image.columns, image.rows
|
28
33
|
|
29
|
-
|
34
|
+
|
30
35
|
# We need a border finder proc. Provide one if none was given.
|
31
36
|
@is_border = is_border_proc || self.class.default_border_finder(image) # if no proc specified, use default below
|
32
37
|
|
data/lib/sqed/extractor.rb
CHANGED
@@ -1,62 +1,59 @@
|
|
1
1
|
require 'rmagick'
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
|
3
|
+
class Sqed
|
4
|
+
|
5
|
+
# An Extractor takes Boundaries object and a metadata_map and returns a Sqed::Result
|
6
|
+
#
|
7
|
+
class Extractor
|
6
8
|
|
7
9
|
class Error < StandardError; end;
|
8
10
|
|
9
|
-
# a Sqed::Boundaries instance
|
11
|
+
# a Sqed::Boundaries instance
|
10
12
|
attr_accessor :boundaries
|
11
13
|
|
14
|
+
# @return [Hash] like `{0 => :annotated_specimen, 1 => :identifier, 2 => :image_registration }`
|
12
15
|
# a metadata_map hash from EXTRACTION_PATTERNS like:
|
13
|
-
# {0 => :annotated_specimen, 1 => :identifier, 2 =>:image_registration }
|
14
16
|
attr_accessor :metadata_map
|
15
17
|
|
16
|
-
#
|
18
|
+
# @return [Magick::Image file]
|
17
19
|
attr_accessor :image
|
18
20
|
|
19
|
-
def initialize(
|
20
|
-
|
21
|
-
|
22
|
-
|
21
|
+
def initialize(**opts)
|
22
|
+
@metadata_map = opts[:metadata_map]
|
23
|
+
@boundaries = opts[:boundaries]
|
24
|
+
@image = opts[:image]
|
23
25
|
|
24
|
-
|
25
|
-
|
26
|
-
|
26
|
+
raise Error, 'boundaries not provided or provided boundary is not a Sqed::Boundaries' if boundaries.nil? || !boundaries.class == Sqed::Boundaries
|
27
|
+
raise Error, 'metadata_map not provided or metadata_map not a Hash' if metadata_map.nil? || !metadata_map.class == Hash
|
28
|
+
raise Error, 'image not provided' if image.nil? || !image.class.name == 'Magick::Image'
|
27
29
|
end
|
28
30
|
|
29
31
|
def result
|
30
|
-
r = Sqed::Result.new
|
32
|
+
r = Sqed::Result.new
|
31
33
|
|
32
34
|
r.sections = metadata_map.values.sort
|
33
|
-
|
35
|
+
|
34
36
|
# assign the images to the result
|
35
37
|
boundaries.each do |section_index, coords|
|
36
38
|
section_type = metadata_map[section_index]
|
37
|
-
|
38
|
-
# TODO: raise this higher up the chain
|
39
|
-
raise Error, "invalid section_type [#{section_type}]" if !SqedConfig::LAYOUT_SECTION_TYPES.include?(section_type)
|
40
39
|
|
41
40
|
r.send("#{section_type}_image=", extract_image(coords))
|
42
41
|
r.boundary_coordinates[section_type] = coords
|
43
|
-
end
|
42
|
+
end
|
44
43
|
|
45
44
|
# assign the metadata to the result
|
46
45
|
metadata_map.each do |section_index, section_type|
|
47
46
|
# only extract data if a parser exists
|
48
47
|
if parsers = SqedConfig::SECTION_PARSERS[section_type]
|
49
|
-
|
50
48
|
section_image = r.send("#{section_type}_image")
|
51
|
-
|
52
49
|
updated = r.send(section_type)
|
53
50
|
|
54
51
|
parsers.each do |p|
|
55
|
-
parsed_result = p.new(section_image).
|
56
|
-
updated
|
52
|
+
parsed_result = p.new(section_image).get_text(section_type: section_type)
|
53
|
+
updated[p::TYPE] = parsed_result if parsed_result && parsed_result.length > 0
|
57
54
|
end
|
58
55
|
|
59
|
-
r.send("#{section_type}=", updated)
|
56
|
+
r.send("#{section_type}=", updated)
|
60
57
|
end
|
61
58
|
end
|
62
59
|
|
@@ -65,7 +62,8 @@ class Sqed::Extractor
|
|
65
62
|
|
66
63
|
# crop takes x, y, width, height
|
67
64
|
def extract_image(coords)
|
68
|
-
|
65
|
+
@image.crop(*coords, true)
|
69
66
|
end
|
70
67
|
|
68
|
+
end
|
71
69
|
end
|
data/lib/sqed/parser.rb
CHANGED
@@ -3,17 +3,14 @@
|
|
3
3
|
# Base class for Parsers
|
4
4
|
#
|
5
5
|
class Sqed::Parser
|
6
|
+
|
6
7
|
attr_accessor :image
|
7
8
|
|
9
|
+
attr_accessor :extracted_text
|
10
|
+
|
8
11
|
def initialize(image)
|
9
|
-
@image = image
|
12
|
+
@image = image
|
10
13
|
raise 'no image provided to parser' if @image && !(@image.class.name == 'Magick::Image')
|
11
14
|
end
|
12
15
|
|
13
|
-
# TODO: is this required?!j
|
14
|
-
# must be provided in subclasses
|
15
|
-
def text(section_type: :default)
|
16
|
-
nil
|
17
|
-
end
|
18
|
-
|
19
16
|
end
|
@@ -1,12 +1,11 @@
|
|
1
1
|
# Given an image, return an ordered array of detectable barcodes
|
2
|
-
#
|
2
|
+
#
|
3
|
+
# !! DOES NOTHING !!
|
3
4
|
#
|
4
5
|
class Sqed::Parser::BarcodeParser < Sqed::Parser
|
5
6
|
|
6
7
|
TYPE = :barcode
|
7
8
|
|
8
|
-
attr_accessor :image
|
9
|
-
|
10
9
|
attr_accessor :barcode
|
11
10
|
|
12
11
|
def initialize(image)
|
@@ -28,15 +27,16 @@ class Sqed::Parser::BarcodeParser < Sqed::Parser
|
|
28
27
|
# try a bunch of options, organized by most common, give the first hit
|
29
28
|
def get_barcode
|
30
29
|
[get_code_128].compact.first
|
31
|
-
end
|
30
|
+
end
|
32
31
|
|
33
32
|
#def get_datamatrix
|
34
33
|
# https://github.com/srijan/ruby-dmtx
|
35
34
|
#end
|
36
35
|
|
37
36
|
# alias to a universal method
|
38
|
-
def
|
37
|
+
def get_text(section_type: :default)
|
39
38
|
barcode
|
40
39
|
end
|
41
40
|
|
41
|
+
|
42
42
|
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
require 'rtesseract'
|
2
|
+
|
1
3
|
# encoding: UTF-8
|
2
4
|
#
|
3
5
|
# Given a single image return all text in that image.
|
@@ -17,49 +19,51 @@
|
|
17
19
|
# Below an x-height of 10 pixels, you have very little chance of accurate results,
|
18
20
|
# and below about 8 pixels, most of the text will be "noise removed".
|
19
21
|
#
|
20
|
-
require 'rtesseract'
|
21
|
-
|
22
22
|
class Sqed::Parser::OcrParser < Sqed::Parser
|
23
23
|
|
24
24
|
TYPE = :text
|
25
25
|
|
26
|
+
# Other experimented with default params
|
27
|
+
# classify_debug_level: 5,
|
28
|
+
# lang: 'eng',
|
29
|
+
# load_system_dawg: 0,
|
30
|
+
# load_unambig_dawg: 0,
|
31
|
+
# load_freq_dawg: 0,
|
32
|
+
# load_fixed_length_dawgs: 0,
|
33
|
+
# load_number_dawg: 0,
|
34
|
+
# load_punc_dawg: 1, ## important
|
35
|
+
# load_unambig_dawg: 1,
|
36
|
+
# chop_enable: 0,
|
37
|
+
# enable_new_segsearch: 1,
|
38
|
+
# tessedit_debug_quality_metrics: 1,
|
39
|
+
# tessedit_write_params_to_file: 'tmp/ocr_config_file.txt',
|
40
|
+
# tessedit_write_images: 1,
|
41
|
+
# equationdetect_save_merged_image: 1,
|
42
|
+
# tessedit_dump_pageseg_images: 1,
|
43
|
+
# equationdetect_save_bi_image: 1
|
44
|
+
|
26
45
|
# Tesseract parameters default/specific to section type,
|
27
46
|
# default is merged into the type
|
28
47
|
SECTION_PARAMS = {
|
29
48
|
default: {
|
30
|
-
psm: 3
|
31
|
-
# classify_debug_level: 5,
|
32
|
-
# lang: 'eng',
|
33
|
-
# load_system_dawg: 0,
|
34
|
-
# load_unambig_dawg: 0,
|
35
|
-
# load_freq_dawg: 0,
|
36
|
-
# load_fixed_length_dawgs: 0,
|
37
|
-
# load_number_dawg: 0,
|
38
|
-
# load_punc_dawg: 1, ## important
|
39
|
-
# load_unambig_dawg: 1,
|
40
|
-
# chop_enable: 0,
|
41
|
-
# enable_new_segsearch: 1,
|
42
|
-
# tessedit_debug_quality_metrics: 1,
|
43
|
-
# tessedit_write_params_to_file: 'tmp/ocr_config_file.txt',
|
44
|
-
# tessedit_write_images: 1,
|
45
|
-
# equationdetect_save_merged_image: 1,
|
46
|
-
# tessedit_dump_pageseg_images: 1,
|
47
|
-
# equationdetect_save_bi_image: 1
|
49
|
+
psm: 3
|
48
50
|
},
|
49
51
|
annotated_specimen: {
|
50
|
-
|
52
|
+
# was 45, significantly improves annotated_specimen for odontates
|
53
|
+
edges_children_count_limit: 3000
|
51
54
|
},
|
52
55
|
identifier: {
|
53
56
|
psm: 1,
|
54
57
|
# tessedit_char_whitelist: '0123456789'
|
55
58
|
# edges_children_count_limit: 4000
|
56
|
-
},
|
59
|
+
},
|
57
60
|
curator_metadata: {
|
61
|
+
psm: 3
|
58
62
|
},
|
59
63
|
labels: {
|
60
64
|
psm: 3, # may need to be 6
|
61
65
|
},
|
62
|
-
|
66
|
+
determination_labels: {
|
63
67
|
psm: 3
|
64
68
|
},
|
65
69
|
other_labels: {
|
@@ -68,12 +72,7 @@ class Sqed::Parser::OcrParser < Sqed::Parser
|
|
68
72
|
collecting_event_labels: {
|
69
73
|
psm: 3
|
70
74
|
}
|
71
|
-
|
72
|
-
|
73
|
-
}
|
74
|
-
|
75
|
-
# the text extracted from the image
|
76
|
-
attr_accessor :text
|
75
|
+
}.freeze
|
77
76
|
|
78
77
|
# future consideration
|
79
78
|
# def enhance_image(img)
|
@@ -102,35 +101,36 @@ class Sqed::Parser::OcrParser < Sqed::Parser
|
|
102
101
|
# img = img.white_threshold(245)
|
103
102
|
# img
|
104
103
|
# end
|
105
|
-
|
104
|
+
|
106
105
|
# @return [String]
|
107
|
-
# the ocr text
|
108
|
-
def
|
109
|
-
img =
|
110
|
-
|
106
|
+
# the ocr text
|
107
|
+
def get_text(section_type: :default)
|
108
|
+
img = image
|
109
|
+
|
111
110
|
# resample if an image 4"x4" is less than 300dpi
|
112
111
|
if img.columns * img.rows < 144000
|
113
112
|
img = img.resample(300)
|
114
113
|
end
|
115
|
-
|
116
|
-
params = SECTION_PARAMS[:default].merge(SECTION_PARAMS[section_type])
|
117
|
-
r = RTesseract.new(img, params)
|
118
|
-
@text = r.to_s.strip
|
119
114
|
|
120
|
-
|
115
|
+
params = SECTION_PARAMS[:default].dup
|
116
|
+
params.merge!(SECTION_PARAMS[section_type])
|
117
|
+
|
118
|
+
r = RTesseract.new(img, params)
|
119
|
+
@extracted_text = r.to_s.strip
|
120
|
+
|
121
|
+
if @extracted_text == ''
|
121
122
|
img = img.white_threshold(245)
|
122
|
-
r = RTesseract.new(img, params)
|
123
|
-
@
|
123
|
+
r = RTesseract.new(img, params)
|
124
|
+
@extracted_text = r.to_s.strip
|
124
125
|
end
|
125
126
|
|
126
|
-
if @
|
127
|
+
if @extracted_text == ''
|
127
128
|
img = img.quantize(256,Magick::GRAYColorspace)
|
128
|
-
r = RTesseract.new(img, params)
|
129
|
-
@
|
129
|
+
r = RTesseract.new(img, params)
|
130
|
+
@extracted_text = r.to_s.strip
|
130
131
|
end
|
131
132
|
|
132
|
-
@
|
133
|
+
@extracted_text
|
133
134
|
end
|
134
135
|
|
135
|
-
|
136
136
|
end
|