sqed 0.1.7 → 0.1.8
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/sqed.rb +4 -3
- data/lib/sqed/boundary_finder.rb +3 -3
- data/lib/sqed/boundary_finder/color_line_finder.rb +31 -6
- data/lib/sqed/parser/ocr_parser.rb +15 -5
- data/lib/sqed/version.rb +1 -1
- data/lib/sqed_config.rb +37 -14
- data/spec/lib/stage_handling/seven_slot_spec.rb +18 -0
- data/spec/support/files/stage_images/inhs_7_slot.jpg +0 -0
- data/spec/support/files/stage_images/inhs_four_thirds.jpg +0 -0
- data/spec/support/image_helpers.rb +3 -1
- metadata +8 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 639226420485cffa59f748b6c0e5c66db21b239e
|
4
|
+
data.tar.gz: 425bd2c26e16339e4ef7b61b605f90bf2efb42db
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0b40ba1db0b861fb88769d99997c840931f916dacd1731b0f321828c54fbe033e552c879298a0e2a5057b3859a534487c298e8870fbd4ea7316b3351e95a4504
|
7
|
+
data.tar.gz: 7bd37213c089d409778291ad30aa63c5c65a0ebdd10a311ed376388b9eddc2134da234b788600b7d4213042c6290ccb88a120969d5de08aed9571cd9d5c2a2f6
|
data/lib/sqed.rb
CHANGED
@@ -77,13 +77,13 @@ class Sqed
|
|
77
77
|
# federate extraction options and apply user provided over-rides
|
78
78
|
def extraction_metadata
|
79
79
|
data = SqedConfig::EXTRACTION_PATTERNS[@pattern]
|
80
|
-
|
80
|
+
|
81
|
+
data.merge!(boundary_color: boundary_color)
|
81
82
|
data.merge!(boundary_finder: @boundary_finder) if boundary_finder
|
83
|
+
data.merge!(has_border: has_border)
|
82
84
|
data.merge!(layout: layout) if layout
|
83
85
|
data.merge!(metadata_map: metadata_map) if metadata_map
|
84
|
-
data.merge!(has_border: has_border)
|
85
86
|
data.merge!(use_thumbnail: use_thumbnail)
|
86
|
-
data.merge!(boundary_color: boundary_color)
|
87
87
|
data
|
88
88
|
end
|
89
89
|
|
@@ -133,6 +133,7 @@ class Sqed
|
|
133
133
|
end
|
134
134
|
|
135
135
|
def result
|
136
|
+
# pattern.nil? is no longer true -> must have values for all extraction_metadata keys
|
136
137
|
return false if image.nil? || pattern.nil?
|
137
138
|
extractor = Sqed::Extractor.new(
|
138
139
|
boundaries: boundaries,
|
data/lib/sqed/boundary_finder.rb
CHANGED
@@ -95,8 +95,8 @@ class Sqed::BoundaryFinder
|
|
95
95
|
end
|
96
96
|
end
|
97
97
|
|
98
|
-
# @return
|
99
|
-
# the
|
98
|
+
# @return [Array]
|
99
|
+
# the x or y position returned as a start, mid, and end coordinate that represent the width of the colored line that completely divides the image, e.g. [9, 15, 16]
|
100
100
|
#
|
101
101
|
# @param image
|
102
102
|
# the image to sample
|
@@ -178,7 +178,7 @@ class Sqed::BoundaryFinder
|
|
178
178
|
end
|
179
179
|
|
180
180
|
# return [Array]
|
181
|
-
# the
|
181
|
+
# the start, mid, endpoint position of all (pixel) positions that have a count greater than the cutoff
|
182
182
|
def self.frequency_stats(frequency_hash, sample_cutoff = 0)
|
183
183
|
|
184
184
|
return nil if sample_cutoff.nil? || sample_cutoff < 1
|
@@ -1,6 +1,6 @@
|
|
1
1
|
require 'rmagick'
|
2
2
|
|
3
|
-
#
|
3
|
+
# An agnostic pattern finder for color-line delimited boundaries
|
4
4
|
#
|
5
5
|
class Sqed::BoundaryFinder::ColorLineFinder < Sqed::BoundaryFinder
|
6
6
|
|
@@ -21,20 +21,45 @@ class Sqed::BoundaryFinder::ColorLineFinder < Sqed::BoundaryFinder
|
|
21
21
|
def find_bands
|
22
22
|
case @layout # boundaries.coordinates are referenced from stage image
|
23
23
|
|
24
|
-
|
25
|
-
|
24
|
+
# No specs for this yet
|
25
|
+
when :seven_slot
|
26
|
+
top_bottom_split = Sqed::BoundaryFinder.color_boundary_finder(image: img, scan: :columns, boundary_color: @boundary_color) # detect vertical division [array]
|
27
|
+
left_right_split = Sqed::BoundaryFinder.color_boundary_finder(image: img, sample_subdivision_size: 2, boundary_color: @boundary_color) # detect horizontal division [array]
|
28
|
+
|
29
|
+
boundaries.set(0, [0, 0, left_right_split[0], top_bottom_split[0] ])
|
30
|
+
boundaries.set(6, [0, top_bottom_split[2], left_right_split[0], img.rows - top_bottom_split[2] ] )
|
31
|
+
|
32
|
+
right_top_img = img.crop( left_right_split[2], 0, img.columns - left_right_split[2], top_bottom_split[0] , true) # sections 1,2
|
33
|
+
right_bottom_img = img.crop(left_right_split[2], top_bottom_split[2], img.columns - left_right_split[2], img.rows - top_bottom_split[2], true) # sections 3,4,5
|
34
|
+
|
35
|
+
right_top_split = Sqed::BoundaryFinder.color_boundary_finder(image: right_top_img, boundary_color: @boundary_color) # vertical line b/w 1 & 2
|
36
|
+
|
37
|
+
boundaries.set(1, [left_right_split[2], 0, left_right_split[2] + right_top_split[0], top_bottom_split[0] ])
|
38
|
+
boundaries.set(2, [left_right_split[2] + right_top_split[2], 0, right_top_img.columns - right_top_split[2], top_bottom_split[0] ] )
|
39
|
+
|
40
|
+
right_bottom_split = Sqed::BoundaryFinder.color_boundary_finder(image: right_bottom_img, scan: :columns, sample_subdivision_size: 2, boundary_color: @boundary_color) # horizontal line b/w (5,3) & 4
|
41
|
+
|
42
|
+
bottom_right_top_img = right_bottom_img.crop(0,0, img.columns - left_right_split[2], right_bottom_split[1], true) # 3,5 - we leave right_bottom_split at [1] (not 2) to take into account possible overlap error (crop gets full line at top of image)
|
43
|
+
|
44
|
+
boundaries.set(3, [ left_right_split[2] + right_top_split[2], top_bottom_split[2], left_right_split[2] + right_top_split[2], bottom_right_top_img.rows ] )
|
45
|
+
boundaries.set(5, [ left_right_split[2], top_bottom_split[2], left_right_split[2] + right_top_split[0], bottom_right_top_img.rows ] )
|
46
|
+
|
47
|
+
boundaries.set(4, [ left_right_split[2], top_bottom_split[2] + right_top_split[2], img.columns - left_right_split[2], right_bottom_img.rows - right_top_split[2] ] )
|
48
|
+
|
49
|
+
when :vertical_split
|
50
|
+
t = Sqed::BoundaryFinder.color_boundary_finder(image: img, boundary_color: @boundary_color) #detect vertical division
|
26
51
|
return if t.nil?
|
27
52
|
boundaries.set(0, [0, 0, t[0], img.rows]) # left section of image
|
28
53
|
boundaries.set(1, [t[2], 0, img.columns - t[2], img.rows]) # right section of image
|
29
54
|
|
30
55
|
when :horizontal_split
|
31
|
-
t = Sqed::BoundaryFinder.color_boundary_finder(image: img, scan: :columns, boundary_color: @boundary_color) # set to detect horizontal division
|
56
|
+
t = Sqed::BoundaryFinder.color_boundary_finder(image: img, scan: :columns, boundary_color: @boundary_color) # set to detect horizontal division
|
32
57
|
return if t.nil?
|
33
58
|
|
34
59
|
boundaries.set(0, [0, 0, img.columns, t[0]]) # upper section of image
|
35
60
|
boundaries.set(1, [0, t[2], img.columns, img.rows - t[2]]) # lower section of image
|
36
61
|
|
37
|
-
when :right_t
|
62
|
+
when :right_t # only 3 zones expected, with horizontal division in right-side of vertical division
|
38
63
|
vertical = self.class.new(image: @img, layout: :vertical_split, boundary_color: @boundary_color, use_thumbnail: false ).boundaries
|
39
64
|
|
40
65
|
irt = img.crop(*vertical.for(1), true)
|
@@ -50,7 +75,7 @@ class Sqed::BoundaryFinder::ColorLineFinder < Sqed::BoundaryFinder
|
|
50
75
|
ilt = img.crop(*vertical.for(0), true)
|
51
76
|
irt = img.crop(*vertical.for(1), true)
|
52
77
|
|
53
|
-
left = self.class.new(image: ilt, layout: :horizontal_split, boundary_color: @boundary_color, use_thumbnail: false).boundaries
|
78
|
+
left = self.class.new(image: ilt, layout: :horizontal_split, boundary_color: @boundary_color, use_thumbnail: false).boundaries # fails
|
54
79
|
right = self.class.new(image: irt, layout: :horizontal_split, boundary_color: @boundary_color, use_thumbnail: false ).boundaries # OK
|
55
80
|
|
56
81
|
boundaries.set(0, [0, 0, left.width_for(0), left.height_for(0) ])
|
@@ -61,7 +61,15 @@ class Sqed::Parser::OcrParser < Sqed::Parser
|
|
61
61
|
},
|
62
62
|
deterimination_labels: {
|
63
63
|
psm: 3
|
64
|
+
},
|
65
|
+
other_labels: {
|
66
|
+
psm: 3
|
67
|
+
},
|
68
|
+
collecting_event_labels: {
|
69
|
+
psm: 3
|
64
70
|
}
|
71
|
+
|
72
|
+
|
65
73
|
}
|
66
74
|
|
67
75
|
# the text extracted from the image
|
@@ -85,16 +93,18 @@ class Sqed::Parser::OcrParser < Sqed::Parser
|
|
85
93
|
# img.write('tmp/foo4.jpg')
|
86
94
|
# img = img.quantize(2, Magick::GRAYColorspace)
|
87
95
|
# #img = img.threshold(0.5)
|
88
|
-
# img.write('foo4.jpg')
|
96
|
+
# img.write('foo4.jpg')
|
89
97
|
# img = img.equalize #(32, Magick::GRAYColorspace)
|
90
|
-
# img.write('foo5.jpg')
|
91
|
-
# #img.write('foo3.jpg')
|
98
|
+
# img.write('foo5.jpg')
|
99
|
+
# #img.write('foo3.jpg')
|
92
100
|
#
|
93
|
-
# img.write('foo.jpg')
|
101
|
+
# img.write('foo.jpg')
|
94
102
|
# img = img.white_threshold(245)
|
95
103
|
# img
|
96
104
|
# end
|
97
|
-
|
105
|
+
|
106
|
+
# @return [String]
|
107
|
+
# the ocr text
|
98
108
|
def text(section_type: :default)
|
99
109
|
img = @image
|
100
110
|
params = SECTION_PARAMS[:default].merge(SECTION_PARAMS[section_type])
|
data/lib/sqed/version.rb
CHANGED
data/lib/sqed_config.rb
CHANGED
@@ -58,6 +58,14 @@ module SqedConfig
|
|
58
58
|
# | 0 | :internal_box
|
59
59
|
# -----
|
60
60
|
#
|
61
|
+
# 0 | 1 | 2
|
62
|
+
# ------------
|
63
|
+
# | 5 | 3 :seven_slot
|
64
|
+
# 6 |--------
|
65
|
+
# | 4
|
66
|
+
#
|
67
|
+
#
|
68
|
+
#
|
61
69
|
|
62
70
|
# Hash values are used to stub out
|
63
71
|
# the Sqed::Boundaries instance.
|
@@ -69,29 +77,38 @@ module SqedConfig
|
|
69
77
|
vertical_split: [0,1],
|
70
78
|
right_t: [0,1,2],
|
71
79
|
left_t: [0,1,2],
|
72
|
-
internal_box: [0]
|
80
|
+
internal_box: [0],
|
81
|
+
seven_slot: [0,1,2,3,4,5,6]
|
73
82
|
}
|
74
83
|
|
75
84
|
# Each element of the layout is a "section".
|
76
85
|
LAYOUT_SECTION_TYPES = [
|
77
|
-
:
|
78
|
-
:
|
79
|
-
:
|
80
|
-
:determination_labels,
|
81
|
-
:
|
82
|
-
:
|
83
|
-
:
|
84
|
-
:
|
85
|
-
:
|
86
|
+
:annotated_specimen, # a specimen is present, and metadata is too
|
87
|
+
:collecting_event_labels, # the section that contains collecting event labels (only)
|
88
|
+
:curator_metadata, # the section contains text with curator metadata
|
89
|
+
:determination_labels, # the section contains text that determines the specimen (only)
|
90
|
+
:identifier, # the section contains an identifier (e.g. barcode or unique number)
|
91
|
+
:image_registration, # the section contains only image registration information,
|
92
|
+
:labels, # the section contains collecting event and other non-determination labels
|
93
|
+
:nothing, # section is empty
|
94
|
+
:other_labels, # the section that contains text that misc.
|
95
|
+
:specimen, # the specimen only, no metadata should be present
|
96
|
+
:stage, # the image contains the full stage
|
86
97
|
]
|
87
98
|
|
88
99
|
# Links section types to data parsers
|
89
100
|
SECTION_PARSERS = {
|
90
|
-
|
91
|
-
|
92
|
-
deterimination_labels: [ Sqed::Parser::OcrParser ],
|
101
|
+
annotated_specimen: [ Sqed::Parser::OcrParser],
|
102
|
+
collecting_event_labels: [ Sqed::Parser::OcrParser],
|
93
103
|
curator_metadata: [ Sqed::Parser::OcrParser ],
|
94
|
-
|
104
|
+
deterimination_labels: [ Sqed::Parser::OcrParser ],
|
105
|
+
identifier: [ Sqed::Parser::BarcodeParser, Sqed::Parser::OcrParser ],
|
106
|
+
image_registration: [],
|
107
|
+
labels: [ Sqed::Parser::OcrParser ],
|
108
|
+
nothing: [],
|
109
|
+
other_labels: [ Sqed::Parser::OcrParser ],
|
110
|
+
specimen: [],
|
111
|
+
stage: []
|
95
112
|
}
|
96
113
|
|
97
114
|
EXTRACTION_PATTERNS = {
|
@@ -123,6 +140,12 @@ module SqedConfig
|
|
123
140
|
boundary_finder: Sqed::BoundaryFinder::StageFinder,
|
124
141
|
layout: :internal_box,
|
125
142
|
metadata_map: {0 => :stage}
|
143
|
+
},
|
144
|
+
|
145
|
+
seven_slot: {
|
146
|
+
boundary_finder: Sqed::BoundaryFinder::ColorLineFinder,
|
147
|
+
layout: :seven_slot,
|
148
|
+
metadata_map: {0 => :collecting_event_labels, 1 => :determination_labels, 2 => :other_labels, 3 => :image_registration, 4 => :curator_metadata, 5 => :identifier, 6 => :specimen }
|
126
149
|
}
|
127
150
|
}
|
128
151
|
|
@@ -0,0 +1,18 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe 'handling 7 slot stages' do
|
4
|
+
|
5
|
+
let(:image) { ImageHelpers.inhs_stage_7_slot }
|
6
|
+
let(:sqed) { Sqed.new( image: image, pattern: :seven_slot, boundary_color: :red, has_border: false ) }
|
7
|
+
|
8
|
+
context 'parses' do
|
9
|
+
specify 'new() without errors' do
|
10
|
+
expect( sqed ).to be_truthy
|
11
|
+
end
|
12
|
+
|
13
|
+
specify 'get_result without errors' do
|
14
|
+
expect( sqed.result ).to be_truthy
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
end
|
Binary file
|
Binary file
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sqed
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.8
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Matt Yoder
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2015-
|
12
|
+
date: 2015-12-21 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rake
|
@@ -176,6 +176,7 @@ files:
|
|
176
176
|
- spec/lib/sqed/parser_spec.rb
|
177
177
|
- spec/lib/sqed/result_spec.rb
|
178
178
|
- spec/lib/sqed_spec.rb
|
179
|
+
- spec/lib/stage_handling/seven_slot_spec.rb
|
179
180
|
- spec/spec_helper.rb
|
180
181
|
- spec/support/files/barcode_images/code_128_barcode.png
|
181
182
|
- spec/support/files/barcode_images/datamatrix_barcode.png
|
@@ -197,6 +198,8 @@ files:
|
|
197
198
|
- spec/support/files/stage_images/frost_stage_medium.jpg
|
198
199
|
- spec/support/files/stage_images/frost_stage_thumb.jpg
|
199
200
|
- spec/support/files/stage_images/greenlineimage.jpg
|
201
|
+
- spec/support/files/stage_images/inhs_7_slot.jpg
|
202
|
+
- spec/support/files/stage_images/inhs_four_thirds.jpg
|
200
203
|
- spec/support/files/test0.jpg
|
201
204
|
- spec/support/files/test1.jpg
|
202
205
|
- spec/support/files/test2.jpg
|
@@ -242,6 +245,7 @@ test_files:
|
|
242
245
|
- spec/lib/sqed/parser_spec.rb
|
243
246
|
- spec/lib/sqed/result_spec.rb
|
244
247
|
- spec/lib/sqed_spec.rb
|
248
|
+
- spec/lib/stage_handling/seven_slot_spec.rb
|
245
249
|
- spec/spec_helper.rb
|
246
250
|
- spec/support/files/barcode_images/code_128_barcode.png
|
247
251
|
- spec/support/files/barcode_images/datamatrix_barcode.png
|
@@ -263,6 +267,8 @@ test_files:
|
|
263
267
|
- spec/support/files/stage_images/frost_stage_medium.jpg
|
264
268
|
- spec/support/files/stage_images/frost_stage_thumb.jpg
|
265
269
|
- spec/support/files/stage_images/greenlineimage.jpg
|
270
|
+
- spec/support/files/stage_images/inhs_7_slot.jpg
|
271
|
+
- spec/support/files/stage_images/inhs_four_thirds.jpg
|
266
272
|
- spec/support/files/test0.jpg
|
267
273
|
- spec/support/files/test1.jpg
|
268
274
|
- spec/support/files/test2.jpg
|