sqed 0.1.5 → 0.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/sqed.rb +11 -5
- data/lib/sqed/boundaries.rb +14 -0
- data/lib/sqed/boundary_finder.rb +65 -4
- data/lib/sqed/boundary_finder/color_line_finder.rb +28 -15
- data/lib/sqed/boundary_finder/cross_finder.rb +3 -3
- data/lib/sqed/boundary_finder/stage_finder.rb +21 -18
- data/lib/sqed/extractor.rb +2 -1
- data/lib/sqed/parser.rb +2 -1
- data/lib/sqed/parser/barcode_parser.rb +1 -1
- data/lib/sqed/parser/ocr_parser.rb +86 -49
- data/lib/sqed/version.rb +1 -1
- data/spec/lib/sqed/boundary_finder/color_line_finder_spec.rb +6 -6
- data/spec/lib/sqed/boundary_finder/cross_finder_spec.rb +2 -1
- data/spec/lib/sqed/boundary_finder_spec.rb +8 -2
- data/spec/lib/sqed_spec.rb +10 -6
- data/sqed.gemspec +1 -1
- metadata +6 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: bd0958fc2efbed976b77385f511ce9025f26b5b2
|
4
|
+
data.tar.gz: 2f3efab45b172677057170dfdbdee8366372fac0
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 04c09b12b5212a5b7c6cf356caa6e04ef4c422b5e83f14c652996089b9f30c016102572cc2963c2710d56135d7411e2778303034de9b8a44597d4feee7796b43
|
7
|
+
data.tar.gz: 14a8a7cd8bd19a6c3c00e3105b82b513f119bca6f3febac4751e9d1714e2ff08d4aaa3a5bbd3ead4f349d56078830aba02272bc3231424cea3877064b5b9f5b4
|
data/lib/sqed.rb
CHANGED
@@ -41,7 +41,10 @@ class Sqed
|
|
41
41
|
# a symbol, :red, :green, :blue, describing the boundary color within the stage
|
42
42
|
attr_accessor :boundary_color
|
43
43
|
|
44
|
-
|
44
|
+
# Boolean, whether to do the boundary detection (not stage detection at present) against a thumbnail version of the passed image (faster, less accurate, true be default)
|
45
|
+
attr_accessor :use_thumbnail
|
46
|
+
|
47
|
+
def initialize(image: image, pattern: pattern, has_border: true, boundary_color: :green, use_thumbnail: true)
|
45
48
|
raise 'extraction pattern not defined' if pattern && !SqedConfig::EXTRACTION_PATTERNS.keys.include?(pattern)
|
46
49
|
|
47
50
|
@image = image
|
@@ -51,7 +54,7 @@ class Sqed
|
|
51
54
|
@pattern = pattern
|
52
55
|
@pattern ||= :cross
|
53
56
|
@boundary_color = boundary_color
|
54
|
-
|
57
|
+
@use_thumbnail = use_thumbnail
|
55
58
|
set_stage_boundary if @image
|
56
59
|
end
|
57
60
|
|
@@ -108,6 +111,7 @@ class Sqed
|
|
108
111
|
extractor.result
|
109
112
|
end
|
110
113
|
|
114
|
+
# Debugging purposes
|
111
115
|
def attributes
|
112
116
|
{
|
113
117
|
image: @image,
|
@@ -115,7 +119,8 @@ class Sqed
|
|
115
119
|
stage_boundary: stage_boundary,
|
116
120
|
has_border: @has_border,
|
117
121
|
pattern: @pattern,
|
118
|
-
boundary_color: @boundary_color
|
122
|
+
boundary_color: @boundary_color,
|
123
|
+
use_thumbnail: @use_thumbnail
|
119
124
|
}
|
120
125
|
end
|
121
126
|
|
@@ -138,11 +143,12 @@ class Sqed
|
|
138
143
|
def get_section_boundaries
|
139
144
|
boundary_finder_class = SqedConfig::EXTRACTION_PATTERNS[@pattern][:boundary_finder]
|
140
145
|
|
141
|
-
options = {image: stage_image}
|
146
|
+
options = {image: stage_image, use_thumbnail: use_thumbnail}
|
142
147
|
options.merge!( layout: SqedConfig::EXTRACTION_PATTERNS[@pattern][:layout] ) unless boundary_finder_class.name == 'Sqed::BoundaryFinder::CrossFinder'
|
143
|
-
options.merge!( boundary_color: @boundary_color) if
|
148
|
+
options.merge!( boundary_color: @boundary_color) if boundary_finder_class.name == 'Sqed::BoundaryFinder::ColorLineFinder'
|
144
149
|
|
145
150
|
boundary_finder_class.new(options).boundaries
|
151
|
+
|
146
152
|
end
|
147
153
|
|
148
154
|
end
|
data/lib/sqed/boundaries.rb
CHANGED
@@ -91,4 +91,18 @@ class Sqed::Boundaries
|
|
91
91
|
end
|
92
92
|
true
|
93
93
|
end
|
94
|
+
|
95
|
+
def zoom(width_factor, height_factor)
|
96
|
+
coordinates.keys.each do |i|
|
97
|
+
set(i, [
|
98
|
+
(x_for(i).to_f * width_factor).to_i,
|
99
|
+
(y_for(i).to_f * height_factor).to_i,
|
100
|
+
(width_for(i).to_f * width_factor).to_i,
|
101
|
+
(height_for(i).to_f * height_factor).to_i
|
102
|
+
])
|
103
|
+
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
|
94
108
|
end
|
data/lib/sqed/boundary_finder.rb
CHANGED
@@ -2,6 +2,9 @@
|
|
2
2
|
# return derivative images. Finders operate on cropped images, i.e. only the "stage".
|
3
3
|
#
|
4
4
|
class Sqed::BoundaryFinder
|
5
|
+
|
6
|
+
THUMB_SIZE = 100
|
7
|
+
|
5
8
|
# the passed image
|
6
9
|
attr_reader :img
|
7
10
|
|
@@ -11,10 +14,18 @@ class Sqed::BoundaryFinder
|
|
11
14
|
# A Sqed::Boundaries instance, stores the coordinates of all of the layout sections
|
12
15
|
attr_reader :boundaries
|
13
16
|
|
14
|
-
|
17
|
+
# Whether to compress the original image to a thumbnail when finding boundaries
|
18
|
+
attr_reader :use_thumbnail
|
19
|
+
|
20
|
+
# when we compute using a derived thumbnail we temporarily store the full size image here
|
21
|
+
attr_reader :original_image
|
22
|
+
|
23
|
+
def initialize(image: image, layout: layout, use_thumbnail: true)
|
15
24
|
raise 'No layout provided.' if layout.nil?
|
16
25
|
raise 'No image provided.' if image.nil? || image.class.name != 'Magick::Image'
|
17
26
|
|
27
|
+
@use_thumbnail = use_thumbnail
|
28
|
+
|
18
29
|
@layout = layout
|
19
30
|
@img = image
|
20
31
|
true
|
@@ -25,12 +36,46 @@ class Sqed::BoundaryFinder
|
|
25
36
|
@boundaries ||= Sqed::Boundaries.new(@layout)
|
26
37
|
end
|
27
38
|
|
39
|
+
def longest_thumbnail_axis
|
40
|
+
img.columns > img.rows ? :width : :height
|
41
|
+
end
|
42
|
+
|
43
|
+
def thumbnail_height
|
44
|
+
if longest_thumbnail_axis == :height
|
45
|
+
THUMB_SIZE
|
46
|
+
else
|
47
|
+
(img.rows.to_f * (THUMB_SIZE.to_f / img.columns.to_f)).round.to_i
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
def thumbnail_width
|
52
|
+
if longest_thumbnail_axis == :width
|
53
|
+
THUMB_SIZE
|
54
|
+
else
|
55
|
+
(img.columns.to_f * (THUMB_SIZE.to_f / img.rows.to_f)).round.to_i
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
# see https://rmagick.github.io/image3.html#thumbnail
|
60
|
+
def thumbnail
|
61
|
+
img.thumbnail(thumbnail_width, thumbnail_height)
|
62
|
+
end
|
63
|
+
|
64
|
+
def width_factor
|
65
|
+
img.columns.to_f / thumbnail_width.to_f
|
66
|
+
end
|
67
|
+
|
68
|
+
def height_factor
|
69
|
+
img.rows.to_f / thumbnail_height.to_f
|
70
|
+
end
|
71
|
+
|
72
|
+
def zoom_boundaries
|
73
|
+
boundaries.zoom(width_factor, height_factor )
|
74
|
+
end
|
28
75
|
|
29
76
|
# return [Integer, nil]
|
30
77
|
# sample more with small images, less with large images
|
31
78
|
# we want to return larger numbers (= faster sampling)
|
32
|
-
#
|
33
|
-
#
|
34
79
|
def self.get_subdivision_size(image_width)
|
35
80
|
case image_width
|
36
81
|
when nil
|
@@ -69,6 +114,7 @@ class Sqed::BoundaryFinder
|
|
69
114
|
# (:rows|:columns), :rows finds vertical borders, :columns finds horizontal borders
|
70
115
|
#
|
71
116
|
def self.color_boundary_finder(image: image, sample_subdivision_size: nil, sample_cutoff_factor: nil, scan: :rows, boundary_color: :green)
|
117
|
+
|
72
118
|
image_width = image.send(scan)
|
73
119
|
sample_subdivision_size = get_subdivision_size(image_width) if sample_subdivision_size.nil?
|
74
120
|
samples_to_take = (image_width / sample_subdivision_size).to_i - 1
|
@@ -105,6 +151,8 @@ class Sqed::BoundaryFinder
|
|
105
151
|
|
106
152
|
if sample_cutoff_factor.nil?
|
107
153
|
cutoff = max_difference(border_hits.values)
|
154
|
+
|
155
|
+
cutoff = border_hits.values.first - 1 if cutoff == 0 # difference of two identical things is 0
|
108
156
|
else
|
109
157
|
cutoff = (samples_to_take * sample_cutoff_factor).to_i
|
110
158
|
end
|
@@ -132,6 +180,7 @@ class Sqed::BoundaryFinder
|
|
132
180
|
# return [Array]
|
133
181
|
# the median position of all (pixel) positions that have a count greater than the cutoff
|
134
182
|
def self.frequency_stats(frequency_hash, sample_cutoff = 0)
|
183
|
+
|
135
184
|
return nil if sample_cutoff.nil? || sample_cutoff < 1
|
136
185
|
hit_ranges = []
|
137
186
|
|
@@ -141,7 +190,18 @@ class Sqed::BoundaryFinder
|
|
141
190
|
end
|
142
191
|
end
|
143
192
|
|
144
|
-
|
193
|
+
case hit_ranges.size
|
194
|
+
when 1
|
195
|
+
c = hit_ranges[0]
|
196
|
+
hit_ranges = [c - 1, c, c + 1]
|
197
|
+
when 2
|
198
|
+
hit_ranges.sort!
|
199
|
+
c1 = hit_ranges[0]
|
200
|
+
c2 = hit_ranges[1]
|
201
|
+
hit_ranges = [c1, c2, c2 + (c2 - c1)]
|
202
|
+
when 0
|
203
|
+
return nil
|
204
|
+
end
|
145
205
|
|
146
206
|
# we have to sort because the keys (positions) we examined came unordered from a hash originally
|
147
207
|
hit_ranges.sort!
|
@@ -174,3 +234,4 @@ class Sqed::BoundaryFinder
|
|
174
234
|
end
|
175
235
|
|
176
236
|
end
|
237
|
+
|
@@ -4,10 +4,15 @@ require 'rmagick'
|
|
4
4
|
#
|
5
5
|
class Sqed::BoundaryFinder::ColorLineFinder < Sqed::BoundaryFinder
|
6
6
|
|
7
|
-
def initialize(image: image, layout: layout, boundary_color: :green)
|
8
|
-
super(image: image, layout: layout)
|
7
|
+
def initialize(image: image, layout: layout, boundary_color: :green, use_thumbnail: true)
|
8
|
+
super(image: image, layout: layout, use_thumbnail: use_thumbnail)
|
9
9
|
raise 'No layout provided.' if @layout.nil?
|
10
10
|
@boundary_color = boundary_color
|
11
|
+
|
12
|
+
if use_thumbnail
|
13
|
+
@original_image = @img.copy
|
14
|
+
@img = thumbnail
|
15
|
+
end
|
11
16
|
find_bands
|
12
17
|
end
|
13
18
|
|
@@ -25,26 +30,28 @@ class Sqed::BoundaryFinder::ColorLineFinder < Sqed::BoundaryFinder
|
|
25
30
|
when :horizontal_split
|
26
31
|
t = Sqed::BoundaryFinder.color_boundary_finder(image: img, scan: :columns, boundary_color: @boundary_color) # set to detect horizontal division, (green line)
|
27
32
|
return if t.nil?
|
33
|
+
|
28
34
|
boundaries.set(0, [0, 0, img.columns, t[0]]) # upper section of image
|
29
35
|
boundaries.set(1, [0, t[2], img.columns, img.rows - t[2]]) # lower section of image
|
30
36
|
|
31
37
|
when :right_t # only 3 zones expected, with horizontal division in right-side of vertical division
|
32
|
-
vertical = self.class.new(image: @img, layout: :vertical_split, boundary_color: @boundary_color ).boundaries
|
33
|
-
irt = img.crop(*vertical.for(1), true)
|
34
|
-
right = self.class.new(image: irt, layout: :horizontal_split, boundary_color: @boundary_color ).boundaries
|
38
|
+
vertical = self.class.new(image: @img, layout: :vertical_split, boundary_color: @boundary_color, use_thumbnail: false ).boundaries
|
35
39
|
|
36
|
-
|
40
|
+
irt = img.crop(*vertical.for(1), true)
|
41
|
+
right = self.class.new(image: irt, layout: :horizontal_split, boundary_color: @boundary_color, use_thumbnail: false ).boundaries
|
42
|
+
|
43
|
+
boundaries.set(0, vertical.for(0))
|
37
44
|
boundaries.set(1, [ vertical.x_for(1), 0, right.width_for(0), right.height_for(0) ] )
|
38
45
|
boundaries.set(2, [ vertical.x_for(1), right.y_for(1), right.width_for(1), right.height_for(1)] )
|
39
46
|
|
40
47
|
when :vertical_offset_cross # 4 zones expected, with (varying) horizontal division in left- and right- sides of vertical division
|
41
|
-
vertical = self.class.new(image: @img, layout: :vertical_split, boundary_color: @boundary_color ).boundaries
|
42
|
-
|
48
|
+
vertical = self.class.new(image: @img, layout: :vertical_split, boundary_color: @boundary_color, use_thumbnail: false).boundaries
|
49
|
+
|
43
50
|
ilt = img.crop(*vertical.for(0), true)
|
44
51
|
irt = img.crop(*vertical.for(1), true)
|
45
52
|
|
46
|
-
left = self.class.new(image: ilt, layout: :horizontal_split, boundary_color: @boundary_color ).boundaries
|
47
|
-
right = self.class.new(image: irt, layout: :horizontal_split, boundary_color: @boundary_color ).boundaries
|
53
|
+
left = self.class.new(image: ilt, layout: :horizontal_split, boundary_color: @boundary_color, use_thumbnail: false).boundaries # fails
|
54
|
+
right = self.class.new(image: irt, layout: :horizontal_split, boundary_color: @boundary_color, use_thumbnail: false ).boundaries # OK
|
48
55
|
|
49
56
|
boundaries.set(0, [0, 0, left.width_for(0), left.height_for(0) ])
|
50
57
|
boundaries.set(1, [vertical.x_for(1), 0, right.width_for(0), right.height_for(0) ])
|
@@ -53,13 +60,13 @@ class Sqed::BoundaryFinder::ColorLineFinder < Sqed::BoundaryFinder
|
|
53
60
|
|
54
61
|
# No specs for this yet
|
55
62
|
when :horizontal_offset_cross
|
56
|
-
horizontal = self.class.new(image: @img, layout: :horizontal_split, boundary_color: @boundary_color ).boundaries
|
63
|
+
horizontal = self.class.new(image: @img, layout: :horizontal_split, boundary_color: @boundary_color, use_thumbnail: false ).boundaries
|
57
64
|
|
58
65
|
itop = img.crop(*horizontal.for(0), true)
|
59
66
|
ibottom = img.crop(*horizontal.for(1), true)
|
60
67
|
|
61
|
-
top = self.class.new(image: ilt, layout: :vertical_split, boundary_color: @boundary_color ).boundaries
|
62
|
-
bottom = self.class.new(image: irt, layout: :vertical_split, boundary_color: @boundary_color ).boundaries
|
68
|
+
top = self.class.new(image: ilt, layout: :vertical_split, boundary_color: @boundary_color, use_thumbnail: false ).boundaries
|
69
|
+
bottom = self.class.new(image: irt, layout: :vertical_split, boundary_color: @boundary_color, use_thumbnail: false ).boundaries
|
63
70
|
|
64
71
|
boundaries.set(0, [0, 0, top.width_for(0), top.height_for(0) ])
|
65
72
|
boundaries.set(1, [top.x_for(1), 0, top.width_for(1), top.height_for(1) ])
|
@@ -67,8 +74,8 @@ class Sqed::BoundaryFinder::ColorLineFinder < Sqed::BoundaryFinder
|
|
67
74
|
boundaries.set(3, [0, horizontal.y_for(1), bottom.width_for(0), bottom.height_for(0) ])
|
68
75
|
|
69
76
|
when :cross # 4 zones, with perfectly intersected horizontal and vertical division
|
70
|
-
v = self.class.new(image: @img, layout: :vertical_split, boundary_color: @boundary_color ).boundaries
|
71
|
-
h = self.class.new(image: @img, layout: :horizontal_split, boundary_color: @boundary_color ).boundaries
|
77
|
+
v = self.class.new(image: @img, layout: :vertical_split, boundary_color: @boundary_color, use_thumbnail: false ).boundaries
|
78
|
+
h = self.class.new(image: @img, layout: :horizontal_split, boundary_color: @boundary_color, use_thumbnail: false).boundaries
|
72
79
|
|
73
80
|
return if v.nil? || h.nil?
|
74
81
|
|
@@ -84,6 +91,12 @@ class Sqed::BoundaryFinder::ColorLineFinder < Sqed::BoundaryFinder
|
|
84
91
|
|
85
92
|
boundaries.complete = true if boundaries.populated?
|
86
93
|
|
94
|
+
if use_thumbnail
|
95
|
+
@img = @original_image
|
96
|
+
zoom_boundaries
|
97
|
+
@original_image = nil
|
98
|
+
end
|
99
|
+
|
87
100
|
end
|
88
101
|
|
89
102
|
|
@@ -5,13 +5,13 @@ require 'rmagick'
|
|
5
5
|
class Sqed::BoundaryFinder::CrossFinder < Sqed::BoundaryFinder
|
6
6
|
|
7
7
|
def initialize(image: image)
|
8
|
-
@
|
8
|
+
@img = image
|
9
9
|
find_edges
|
10
10
|
end
|
11
11
|
|
12
12
|
def find_edges
|
13
|
-
width = @
|
14
|
-
height = @
|
13
|
+
width = @img.columns / 2
|
14
|
+
height = @img.rows / 2
|
15
15
|
|
16
16
|
boundaries.coordinates[0] = [0, 0, width, height]
|
17
17
|
boundaries.coordinates[1] = [width, 0, width, height]
|
@@ -3,7 +3,7 @@ require 'rmagick'
|
|
3
3
|
# Some of this code was originally inspired by Emmanuel Oga's gist https://gist.github.com/EmmanuelOga/2476153.
|
4
4
|
#
|
5
5
|
class Sqed::BoundaryFinder::StageFinder < Sqed::BoundaryFinder
|
6
|
-
|
6
|
+
|
7
7
|
# The proc containing the border finding algorithim
|
8
8
|
attr_reader :is_border
|
9
9
|
|
@@ -11,23 +11,24 @@ class Sqed::BoundaryFinder::StageFinder < Sqed::BoundaryFinder
|
|
11
11
|
|
12
12
|
# How small we accept a cropped picture to be. E.G. if it was 100x100 and
|
13
13
|
# ratio 0.1, min output should be 10x10
|
14
|
-
MIN_CROP_RATIO = 0.1
|
14
|
+
MIN_CROP_RATIO = 0.1
|
15
15
|
|
16
|
-
attr_reader :x0, :y0, :x1, :y1, :min_width, :min_height, :rows, :columns
|
16
|
+
attr_reader :x0, :y0, :x1, :y1, :min_width, :min_height, :rows, :columns
|
17
17
|
|
18
18
|
def initialize(image: image, is_border_proc: nil, min_ratio: MIN_CROP_RATIO)
|
19
|
-
super(image: image, layout: :internal_box)
|
19
|
+
super(image: image, layout: :internal_box)
|
20
20
|
|
21
|
-
@min_ratio =
|
21
|
+
@min_ratio = min_ratio
|
22
22
|
|
23
23
|
# Initial co-ordinates
|
24
24
|
@x0, @y0 = 0, 0
|
25
|
-
@x1, @y1 = img.columns, img.rows
|
25
|
+
@x1, @y1 = img.columns, img.rows
|
26
26
|
@min_width, @min_height = img.columns * @min_ratio, img.rows * @min_ratio # minimum resultant area
|
27
27
|
@columns, @rows = img.columns, img.rows
|
28
28
|
|
29
|
+
|
29
30
|
# We need a border finder proc. Provide one if none was given.
|
30
|
-
@is_border = is_border_proc || self.class.default_border_finder(img)
|
31
|
+
@is_border = is_border_proc || self.class.default_border_finder(img) # if no proc specified, use default below
|
31
32
|
|
32
33
|
@x00 = @x0
|
33
34
|
@y00 = @y0
|
@@ -51,8 +52,9 @@ class Sqed::BoundaryFinder::StageFinder < Sqed::BoundaryFinder
|
|
51
52
|
# works for 0.5, >0.137; 0.60, >0.14 0.65, >0.146; 0.70, >0.1875; 0.75, >0.1875; 0.8, >0.237; 0.85, >0.24; 0.90, >0.28; 0.95, >0.25
|
52
53
|
# fails for 0.75, (0.18, 0.17,0.16,0.15); 0.70, 0.18;
|
53
54
|
#
|
54
|
-
|
55
|
-
|
55
|
+
# this sets variables (locally) for find_edges
|
56
|
+
def self.default_border_finder(img, samples = 5, threshold = 0.75, fuzz_factor = 0.40) # working on non-synthetic images 04-dec-2014
|
57
|
+
fuzz = ((Magick::QuantumRange + 1) * fuzz_factor).to_i
|
56
58
|
# Returns true if the edge is a border (border meaning outer region to be cropped)
|
57
59
|
lambda do |edge|
|
58
60
|
border, non_border = 0.0, 0.0 # maybe should be called outer, inner
|
@@ -79,7 +81,7 @@ class Sqed::BoundaryFinder::StageFinder < Sqed::BoundaryFinder
|
|
79
81
|
# handle this exception
|
80
82
|
return unless is_border # return if no process defined or set for @is_border
|
81
83
|
|
82
|
-
u = x1 - 1
|
84
|
+
u = x1 - 1 # rightmost pixel (kind of)
|
83
85
|
# increment from left to right
|
84
86
|
x0.upto(u) do |x|
|
85
87
|
if width_croppable? && is_border[vline(x)] then
|
@@ -89,7 +91,7 @@ class Sqed::BoundaryFinder::StageFinder < Sqed::BoundaryFinder
|
|
89
91
|
end
|
90
92
|
end
|
91
93
|
# increment from left to right
|
92
|
-
(u).downto(x0) { |x| width_croppable?
|
94
|
+
(u).downto(x0) { |x| width_croppable? && is_border[vline(x)] ? @x1 = x - 1 : break }
|
93
95
|
|
94
96
|
u = y1 - 1
|
95
97
|
0.upto(u) do |y|
|
@@ -104,10 +106,11 @@ class Sqed::BoundaryFinder::StageFinder < Sqed::BoundaryFinder
|
|
104
106
|
|
105
107
|
delta_x = 0 #width/50 # 2% of cropped image to make up for trapezoidal distortion
|
106
108
|
delta_y = 0 #height/50 # 2% of cropped image to make up for trapezoidal distortion <- NOT 3%
|
107
|
-
|
109
|
+
|
108
110
|
# TODO: add conditions
|
109
|
-
boundaries.complete = true
|
110
|
-
boundaries.
|
111
|
+
boundaries.complete = true
|
112
|
+
boundaries.set(0, [x0 + delta_x, y0 + delta_y, width - 2*delta_x, height - 2*delta_y])
|
113
|
+
|
111
114
|
end
|
112
115
|
|
113
116
|
def width_croppable?
|
@@ -127,13 +130,13 @@ class Sqed::BoundaryFinder::StageFinder < Sqed::BoundaryFinder
|
|
127
130
|
end
|
128
131
|
|
129
132
|
# actually + 1 (starting at zero?)
|
130
|
-
def width
|
131
|
-
@x1 - @x0
|
133
|
+
def width
|
134
|
+
@x1 - @x0
|
132
135
|
end
|
133
|
-
|
136
|
+
|
134
137
|
# actually + 1 (starting at zero?)
|
135
138
|
def height
|
136
|
-
@y1 - @y0
|
139
|
+
@y1 - @y0
|
137
140
|
end
|
138
141
|
|
139
142
|
end
|
data/lib/sqed/extractor.rb
CHANGED
@@ -42,10 +42,11 @@ class Sqed::Extractor
|
|
42
42
|
if parsers = SqedConfig::SECTION_PARSERS[section_type]
|
43
43
|
|
44
44
|
section_image = r.send("#{section_type}_image")
|
45
|
+
|
45
46
|
updated = r.send(section_type)
|
46
47
|
|
47
48
|
parsers.each do |p|
|
48
|
-
parsed_result = p.new(section_image).text
|
49
|
+
parsed_result = p.new(section_image).text(section_type: section_type)
|
49
50
|
updated.merge!(p::TYPE => parsed_result) if parsed_result
|
50
51
|
end
|
51
52
|
|
data/lib/sqed/parser.rb
CHANGED
@@ -2,7 +2,20 @@
|
|
2
2
|
#
|
3
3
|
# Given a single image return all text in that image.
|
4
4
|
#
|
5
|
-
# For
|
5
|
+
# For reference
|
6
|
+
# http://misteroleg.wordpress.com/2012/12/19/ocr-using-tesseract-and-imagemagick-as-pre-processing-task/
|
7
|
+
# https://code.google.com/p/tesseract-ocr/wiki/FAQ
|
8
|
+
# http://www.sk-spell.sk.cx/tesseract-ocr-parameters-in-302-version
|
9
|
+
#
|
10
|
+
# "There is a minimum text size for reasonable accuracy.
|
11
|
+
# You have to consider resolution as well as point size.
|
12
|
+
# Accuracy drops off below 10pt x 300dpi, rapidly below 8pt x 300dpi.
|
13
|
+
# A quick check is to count the pixels of the x-height of your characters.
|
14
|
+
# (X-height is the height of the lower case x.)
|
15
|
+
# At 10pt x 300dpi x-heights are typically about 20 pixels, although this
|
16
|
+
# can vary dramatically from font to font.
|
17
|
+
# Below an x-height of 10 pixels, you have very little chance of accurate results,
|
18
|
+
# and below about 8 pixels, most of the text will be "noise removed".
|
6
19
|
#
|
7
20
|
require 'rtesseract'
|
8
21
|
|
@@ -10,60 +23,84 @@ class Sqed::Parser::OcrParser < Sqed::Parser
|
|
10
23
|
|
11
24
|
TYPE = :text
|
12
25
|
|
26
|
+
# Tesseract parameters default/specific to section type,
|
27
|
+
# default is merged into the type
|
28
|
+
SECTION_PARAMS = {
|
29
|
+
default: {
|
30
|
+
psm: 3,
|
31
|
+
# classify_debug_level: 5,
|
32
|
+
# lang: 'eng',
|
33
|
+
# load_system_dawg: 0,
|
34
|
+
# load_unambig_dawg: 0,
|
35
|
+
# load_freq_dawg: 0,
|
36
|
+
# load_fixed_length_dawgs: 0,
|
37
|
+
# load_number_dawg: 0,
|
38
|
+
# load_punc_dawg: 1, ## important
|
39
|
+
# load_unambig_dawg: 1,
|
40
|
+
# chop_enable: 0,
|
41
|
+
# enable_new_segsearch: 1,
|
42
|
+
# tessedit_debug_quality_metrics: 1,
|
43
|
+
# tessedit_write_params_to_file: 'tmp/ocr_config_file.txt',
|
44
|
+
# tessedit_write_images: 1,
|
45
|
+
# equationdetect_save_merged_image: 1,
|
46
|
+
# tessedit_dump_pageseg_images: 1,
|
47
|
+
# equationdetect_save_bi_image: 1
|
48
|
+
},
|
49
|
+
annotated_specimen: {
|
50
|
+
edges_children_count_limit: 3000 # was 45, significantly improves annotated_specimen for odontates
|
51
|
+
},
|
52
|
+
identifier: {
|
53
|
+
psm: 1,
|
54
|
+
# tessedit_char_whitelist: '0123456789'
|
55
|
+
# edges_children_count_limit: 4000
|
56
|
+
},
|
57
|
+
curator_metadata: {
|
58
|
+
},
|
59
|
+
labels: {
|
60
|
+
psm: 3, # may need to be 6
|
61
|
+
},
|
62
|
+
deterimination_labels: {
|
63
|
+
psm: 3
|
64
|
+
}
|
65
|
+
}
|
66
|
+
|
13
67
|
# the text extracted from the image
|
14
68
|
attr_accessor :text
|
15
69
|
|
16
|
-
#
|
17
|
-
def
|
18
|
-
|
19
|
-
|
20
|
-
# @jrflood: this is where you will have to do some research, tuning images so that they can be better ocr-ed,
|
21
|
-
# get potential border pixel color (based on quadrant?)
|
22
|
-
new_color = img.pixel_color(1, 1)
|
23
|
-
# img = img.scale(2)
|
24
|
-
# img.write('foo0.jpg.jpg')
|
25
|
-
# img = img.enhance
|
26
|
-
# img.write('foo1.jpg')
|
27
|
-
# img = img.quantize(8, Magick::GRAYColorspace)
|
28
|
-
# img.write('foo1.jpg')
|
29
|
-
# img = img.sharpen(1.0, 0.2)
|
30
|
-
# img.write('foo2.jpg')
|
31
|
-
# border_color = img.pixel_color(img.columns - 1, img.rows - 1)
|
32
|
-
# img = img.color_floodfill(img.columns - 1, img.rows - 1, new_color)
|
33
|
-
# img.write('tmp/foo4.jpg')
|
34
|
-
# img = img.quantize(2, Magick::GRAYColorspace)
|
35
|
-
# #img = img.threshold(0.5)
|
36
|
-
# img.write('foo4.jpg') # for debugging purposes, this is the image that is sent to OCR
|
37
|
-
# img = img.equalize #(32, Magick::GRAYColorspace)
|
38
|
-
# img.write('foo5.jpg') # for debugging purposes, this is the image that is sent to OCR
|
39
|
-
# #img.write('foo3.jpg') # for debugging purposes, this is the image that is sent to OCR
|
40
|
-
#
|
41
|
-
# img.write('foo.jpg') # for debugging purposes, this is the image that is sent to OCR
|
42
|
-
|
43
|
-
|
44
|
-
# From https://code.google.com/p/tesseract-ocr/wiki/FAQ
|
45
|
-
# " There is a minimum text size for reasonable accuracy. You have to consider resolution as well as point size. Accuracy drops off below 10pt x 300dpi, rapidly below 8pt x 300dpi. A quick check is to count the pixels of the x-height of your characters. (X-height is the height of the lower case x.) At 10pt x 300dpi x-heights are typically about 20 pixels, although this can vary dramatically from font to font. Below an x-height of 10 pixels, you have very little chance of accurate results, and below about 8 pixels, most of the text will be "noise removed".
|
46
|
-
|
47
|
-
|
48
|
-
# http://www.sk-spell.sk.cx/tesseract-ocr-parameters-in-302-version
|
49
|
-
# doesn't supprot outputbase
|
50
|
-
r = RTesseract.new(img, lang: 'eng', psm: 1,
|
51
|
-
load_system_dawg: 0,
|
52
|
-
tessedit_debug_quality_metrics: 1,
|
53
|
-
load_freq_dawg: 1 ,
|
54
|
-
chop_enable: 1,
|
55
|
-
tessedit_write_images: 1,
|
56
|
-
equationdetect_save_merged_image: 1,
|
57
|
-
tessedit_dump_pageseg_images: 1,
|
58
|
-
equationdetect_save_bi_image: 1,
|
59
|
-
load_unambig_dawg: 0,
|
60
|
-
tessedit_write_params_to_file: 'tmp/ocr_config_file.txt' ) # psm: 3,
|
61
|
-
|
62
|
-
# img = img.white_threshold(245)
|
70
|
+
# future consideration
|
71
|
+
# def enhance_image(img)
|
72
|
+
# get potential border pixel color (based on quadrant?)
|
73
|
+
# new_color = img.pixel_color(1, 1)
|
63
74
|
|
75
|
+
# img = img.scale(2)
|
76
|
+
# img.write('foo0.jpg.jpg')
|
77
|
+
# img = img.enhance
|
78
|
+
# img.write('foo1.jpg')
|
79
|
+
# img = img.quantize(8, Magick::GRAYColorspace)
|
80
|
+
# img.write('foo1.jpg')
|
81
|
+
# img = img.sharpen(1.0, 0.2)
|
82
|
+
# img.write('foo2.jpg')
|
83
|
+
# border_color = img.pixel_color(img.columns - 1, img.rows - 1)
|
84
|
+
# img = img.color_floodfill(img.columns - 1, img.rows - 1, new_color)
|
85
|
+
# img.write('tmp/foo4.jpg')
|
86
|
+
# img = img.quantize(2, Magick::GRAYColorspace)
|
87
|
+
# #img = img.threshold(0.5)
|
88
|
+
# img.write('foo4.jpg') # for debugging purposes, this is the image that is sent to OCR
|
89
|
+
# img = img.equalize #(32, Magick::GRAYColorspace)
|
90
|
+
# img.write('foo5.jpg') # for debugging purposes, this is the image that is sent to OCR
|
91
|
+
# #img.write('foo3.jpg') # for debugging purposes, this is the image that is sent to OCR
|
92
|
+
#
|
93
|
+
# img.write('foo.jpg') # for debugging purposes, this is the image that is sent to OCR
|
94
|
+
# img = img.white_threshold(245)
|
95
|
+
# img
|
96
|
+
# end
|
97
|
+
|
98
|
+
def text(section_type: :default)
|
99
|
+
img = @image
|
100
|
+
params = SECTION_PARAMS[:default].merge(SECTION_PARAMS[section_type])
|
101
|
+
r = RTesseract.new(img, params)
|
64
102
|
@text = r.to_s.strip
|
65
103
|
end
|
66
104
|
|
67
|
-
# Need to provide tuning methods here, i.e. image transormations that facilitate OCR
|
68
105
|
|
69
106
|
end
|
data/lib/sqed/version.rb
CHANGED
@@ -8,25 +8,25 @@ describe Sqed::BoundaryFinder::ColorLineFinder do
|
|
8
8
|
let(:c) {b.boundaries}
|
9
9
|
let(:d) { image.crop(*c.for(0), true) }
|
10
10
|
|
11
|
-
let(:e) { Sqed::BoundaryFinder::ColorLineFinder.new(image: d, layout: :right_t) }
|
11
|
+
let(:e) { Sqed::BoundaryFinder::ColorLineFinder.new(image: d, layout: :right_t, use_thumbnail: false) }
|
12
12
|
let(:f) { e.boundaries }
|
13
|
-
let(:g) { Sqed::BoundaryFinder::ColorLineFinder.new(image: d, layout: :vertical_offset_cross)}
|
13
|
+
let(:g) { Sqed::BoundaryFinder::ColorLineFinder.new(image: d, layout: :vertical_offset_cross, use_thumbnail: false)}
|
14
14
|
let(:h) { g.boundaries }
|
15
|
-
let(:gv) { Sqed::BoundaryFinder::ColorLineFinder.new(image: d, layout: :vertical_split) }
|
15
|
+
let(:gv) { Sqed::BoundaryFinder::ColorLineFinder.new(image: d, layout: :vertical_split, use_thumbnail: false) }
|
16
16
|
let(:hv) { gv.boundaries }
|
17
17
|
|
18
18
|
let(:ah) { ImageHelpers.vertical_offset_cross_red }
|
19
19
|
let(:bh) { Sqed::BoundaryFinder::StageFinder.new(image: ah) }
|
20
20
|
let(:ch) { bh.boundaries }
|
21
21
|
let(:dh) { ah.crop(*ch.for(0), true) }
|
22
|
-
let(:gh) { Sqed::BoundaryFinder::ColorLineFinder.new(image: dh, layout: :horizontal_split, boundary_color: :red) } # was :horizontal_split
|
22
|
+
let(:gh) { Sqed::BoundaryFinder::ColorLineFinder.new(image: dh, layout: :horizontal_split, boundary_color: :red, use_thumbnail: false) } # was :horizontal_split
|
23
23
|
let(:hh) { gh.boundaries }
|
24
24
|
|
25
25
|
let(:ibs) { ImageHelpers.black_stage_green_line_specimen }
|
26
26
|
let(:bbs) { Sqed::BoundaryFinder::StageFinder.new(image: ibs) }
|
27
27
|
let(:cbs) { bbs.boundaries }
|
28
28
|
let(:dbs) { ibs.crop(*cbs.for(0), true) }
|
29
|
-
let(:gbs) { Sqed::BoundaryFinder::ColorLineFinder.new(image: dbs, layout: :vertical_offset_cross) }
|
29
|
+
let(:gbs) { Sqed::BoundaryFinder::ColorLineFinder.new(image: dbs, layout: :vertical_offset_cross, use_thumbnail: false) }
|
30
30
|
let(:hbs) { gbs.boundaries }
|
31
31
|
|
32
32
|
specify 'initial image columns are as expected for :image above' do
|
@@ -167,7 +167,7 @@ describe Sqed::BoundaryFinder::ColorLineFinder do
|
|
167
167
|
context 'thumbnail processing finds reasonable boundaries' do
|
168
168
|
|
169
169
|
let(:thumb) { ImageHelpers.frost_stage_thumb }
|
170
|
-
let(:finder) { Sqed::BoundaryFinder::ColorLineFinder.new(image: thumb, layout: :cross)}
|
170
|
+
let(:finder) { Sqed::BoundaryFinder::ColorLineFinder.new(image: thumb, layout: :cross, use_thumbnail: false)}
|
171
171
|
let(:finder_boundaries) { finder.boundaries }
|
172
172
|
|
173
173
|
let(:pct) { 0.08 }
|
@@ -3,7 +3,7 @@ require 'spec_helper'
|
|
3
3
|
describe Sqed::BoundaryFinder do
|
4
4
|
|
5
5
|
specify 'when no image provided, #new raises' do
|
6
|
-
expect { Sqed::BoundaryFinder.new() }.to raise_error
|
6
|
+
expect { Sqed::BoundaryFinder.new() }.to raise_error('No layout provided.')
|
7
7
|
end
|
8
8
|
|
9
9
|
context 'when initiated with an image' do
|
@@ -103,9 +103,15 @@ describe Sqed::BoundaryFinder do
|
|
103
103
|
expect( Sqed::BoundaryFinder.frequency_stats(i, 12)).to eq([3, 4, 5])
|
104
104
|
end
|
105
105
|
|
106
|
+
specify 'returns estimated borders if only one hit greater than samples taken' do
|
107
|
+
expect( Sqed::BoundaryFinder.frequency_stats(i, 15)).to eq([2,3,4])
|
108
|
+
end
|
109
|
+
|
106
110
|
specify 'returns nil if no count is greater than samples taken' do
|
107
|
-
expect( Sqed::BoundaryFinder.frequency_stats(i,
|
111
|
+
expect( Sqed::BoundaryFinder.frequency_stats(i, 20)).to eq(nil)
|
108
112
|
end
|
113
|
+
|
114
|
+
|
109
115
|
end
|
110
116
|
|
111
117
|
context 'offset boundaries from crossy_black_line_specimen image ' do
|
data/spec/lib/sqed_spec.rb
CHANGED
@@ -90,20 +90,23 @@ describe Sqed do
|
|
90
90
|
|
91
91
|
context '#result' do
|
92
92
|
let(:r) { s.result }
|
93
|
+
|
93
94
|
specify 'returns a Sqed::Result' do
|
94
95
|
expect(r.class.name).to eq('Sqed::Result')
|
95
96
|
end
|
96
97
|
|
97
98
|
context 'extracted data' do
|
98
|
-
specify 'for an :identifier section' do
|
99
|
+
specify 'text for an :identifier section' do
|
100
|
+
|
101
|
+
r.identifier_image.write('41.jpg')
|
99
102
|
expect(r.text_for(:identifier)).to match('000041196')
|
100
103
|
end
|
101
104
|
|
102
|
-
specify 'for an annotated_specimen section' do
|
105
|
+
specify 'text for an annotated_specimen section' do
|
103
106
|
expect(r.text_for(:annotated_specimen)).to match('Saucier Creek')
|
104
107
|
end
|
105
108
|
|
106
|
-
specify 'for a curator_metadata section' do
|
109
|
+
specify 'text for a curator_metadata section' do
|
107
110
|
expect(r.text_for(:curator_metadata)).to match('Frost Entomological Museum')
|
108
111
|
end
|
109
112
|
end
|
@@ -113,7 +116,7 @@ describe Sqed do
|
|
113
116
|
context 'all together, with border' do
|
114
117
|
let(:image) { ImageHelpers.greenline_image }
|
115
118
|
let(:pattern) { :right_t }
|
116
|
-
let(:s) { Sqed.new(image: image, pattern: pattern, has_border:
|
119
|
+
let(:s) { Sqed.new(image: image, pattern: pattern, has_border: true) }
|
117
120
|
|
118
121
|
specify '#boundaries returns a Sqed::Boundaries instance' do
|
119
122
|
expect(s.boundaries.class.name).to eq('Sqed::Boundaries')
|
@@ -138,11 +141,12 @@ describe Sqed do
|
|
138
141
|
end
|
139
142
|
|
140
143
|
context 'extracted data' do
|
141
|
-
specify 'for an :identifier section' do
|
144
|
+
specify 'text for an :identifier section' do
|
145
|
+
r.identifier_image.write('85.jpg')
|
142
146
|
expect(r.text_for(:identifier)).to match('000085067')
|
143
147
|
end
|
144
148
|
|
145
|
-
specify 'for a specimen section' do
|
149
|
+
specify 'text for a specimen section' do
|
146
150
|
expect(r.text_for(:annotated_specimen)).to match('Aeshna')
|
147
151
|
end
|
148
152
|
end
|
data/sqed.gemspec
CHANGED
@@ -23,7 +23,7 @@ Gem::Specification.new do |spec|
|
|
23
23
|
spec.add_dependency 'rtesseract', '~> 1.2.6'
|
24
24
|
spec.add_dependency 'zxing_cpp', '~> 0.1.0'
|
25
25
|
|
26
|
-
spec.add_development_dependency 'rspec'
|
26
|
+
spec.add_development_dependency 'rspec', '~> 3.3'
|
27
27
|
spec.add_development_dependency 'bundler', '~> 1.5'
|
28
28
|
spec.add_development_dependency 'did_you_mean', '~> 0.9'
|
29
29
|
spec.add_development_dependency 'byebug'
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sqed
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Matt Yoder
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2015-
|
12
|
+
date: 2015-09-09 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rake
|
@@ -71,16 +71,16 @@ dependencies:
|
|
71
71
|
name: rspec
|
72
72
|
requirement: !ruby/object:Gem::Requirement
|
73
73
|
requirements:
|
74
|
-
- - "
|
74
|
+
- - "~>"
|
75
75
|
- !ruby/object:Gem::Version
|
76
|
-
version: '
|
76
|
+
version: '3.3'
|
77
77
|
type: :development
|
78
78
|
prerelease: false
|
79
79
|
version_requirements: !ruby/object:Gem::Requirement
|
80
80
|
requirements:
|
81
|
-
- - "
|
81
|
+
- - "~>"
|
82
82
|
- !ruby/object:Gem::Version
|
83
|
-
version: '
|
83
|
+
version: '3.3'
|
84
84
|
- !ruby/object:Gem::Dependency
|
85
85
|
name: bundler
|
86
86
|
requirement: !ruby/object:Gem::Requirement
|