sqed 0.1.7 → 0.1.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/sqed.rb +4 -3
- data/lib/sqed/boundary_finder.rb +3 -3
- data/lib/sqed/boundary_finder/color_line_finder.rb +31 -6
- data/lib/sqed/parser/ocr_parser.rb +15 -5
- data/lib/sqed/version.rb +1 -1
- data/lib/sqed_config.rb +37 -14
- data/spec/lib/stage_handling/seven_slot_spec.rb +18 -0
- data/spec/support/files/stage_images/inhs_7_slot.jpg +0 -0
- data/spec/support/files/stage_images/inhs_four_thirds.jpg +0 -0
- data/spec/support/image_helpers.rb +3 -1
- metadata +8 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 639226420485cffa59f748b6c0e5c66db21b239e
|
4
|
+
data.tar.gz: 425bd2c26e16339e4ef7b61b605f90bf2efb42db
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0b40ba1db0b861fb88769d99997c840931f916dacd1731b0f321828c54fbe033e552c879298a0e2a5057b3859a534487c298e8870fbd4ea7316b3351e95a4504
|
7
|
+
data.tar.gz: 7bd37213c089d409778291ad30aa63c5c65a0ebdd10a311ed376388b9eddc2134da234b788600b7d4213042c6290ccb88a120969d5de08aed9571cd9d5c2a2f6
|
data/lib/sqed.rb
CHANGED
@@ -77,13 +77,13 @@ class Sqed
|
|
77
77
|
# federate extraction options and apply user provided over-rides
|
78
78
|
def extraction_metadata
|
79
79
|
data = SqedConfig::EXTRACTION_PATTERNS[@pattern]
|
80
|
-
|
80
|
+
|
81
|
+
data.merge!(boundary_color: boundary_color)
|
81
82
|
data.merge!(boundary_finder: @boundary_finder) if boundary_finder
|
83
|
+
data.merge!(has_border: has_border)
|
82
84
|
data.merge!(layout: layout) if layout
|
83
85
|
data.merge!(metadata_map: metadata_map) if metadata_map
|
84
|
-
data.merge!(has_border: has_border)
|
85
86
|
data.merge!(use_thumbnail: use_thumbnail)
|
86
|
-
data.merge!(boundary_color: boundary_color)
|
87
87
|
data
|
88
88
|
end
|
89
89
|
|
@@ -133,6 +133,7 @@ class Sqed
|
|
133
133
|
end
|
134
134
|
|
135
135
|
def result
|
136
|
+
# pattern.nil? is no longer true -> must have values for all extraction_metadata keys
|
136
137
|
return false if image.nil? || pattern.nil?
|
137
138
|
extractor = Sqed::Extractor.new(
|
138
139
|
boundaries: boundaries,
|
data/lib/sqed/boundary_finder.rb
CHANGED
@@ -95,8 +95,8 @@ class Sqed::BoundaryFinder
|
|
95
95
|
end
|
96
96
|
end
|
97
97
|
|
98
|
-
# @return
|
99
|
-
# the
|
98
|
+
# @return [Array]
|
99
|
+
# the x or y position returned as a start, mid, and end coordinate that represent the width of the colored line that completely divides the image, e.g. [9, 15, 16]
|
100
100
|
#
|
101
101
|
# @param image
|
102
102
|
# the image to sample
|
@@ -178,7 +178,7 @@ class Sqed::BoundaryFinder
|
|
178
178
|
end
|
179
179
|
|
180
180
|
# return [Array]
|
181
|
-
# the
|
181
|
+
# the start, mid, endpoint position of all (pixel) positions that have a count greater than the cutoff
|
182
182
|
def self.frequency_stats(frequency_hash, sample_cutoff = 0)
|
183
183
|
|
184
184
|
return nil if sample_cutoff.nil? || sample_cutoff < 1
|
@@ -1,6 +1,6 @@
|
|
1
1
|
require 'rmagick'
|
2
2
|
|
3
|
-
#
|
3
|
+
# An agnostic pattern finder for color-line delimited boundaries
|
4
4
|
#
|
5
5
|
class Sqed::BoundaryFinder::ColorLineFinder < Sqed::BoundaryFinder
|
6
6
|
|
@@ -21,20 +21,45 @@ class Sqed::BoundaryFinder::ColorLineFinder < Sqed::BoundaryFinder
|
|
21
21
|
def find_bands
|
22
22
|
case @layout # boundaries.coordinates are referenced from stage image
|
23
23
|
|
24
|
-
|
25
|
-
|
24
|
+
# No specs for this yet
|
25
|
+
when :seven_slot
|
26
|
+
top_bottom_split = Sqed::BoundaryFinder.color_boundary_finder(image: img, scan: :columns, boundary_color: @boundary_color) # detect vertical division [array]
|
27
|
+
left_right_split = Sqed::BoundaryFinder.color_boundary_finder(image: img, sample_subdivision_size: 2, boundary_color: @boundary_color) # detect horizontal division [array]
|
28
|
+
|
29
|
+
boundaries.set(0, [0, 0, left_right_split[0], top_bottom_split[0] ])
|
30
|
+
boundaries.set(6, [0, top_bottom_split[2], left_right_split[0], img.rows - top_bottom_split[2] ] )
|
31
|
+
|
32
|
+
right_top_img = img.crop( left_right_split[2], 0, img.columns - left_right_split[2], top_bottom_split[0] , true) # sections 1,2
|
33
|
+
right_bottom_img = img.crop(left_right_split[2], top_bottom_split[2], img.columns - left_right_split[2], img.rows - top_bottom_split[2], true) # sections 3,4,5
|
34
|
+
|
35
|
+
right_top_split = Sqed::BoundaryFinder.color_boundary_finder(image: right_top_img, boundary_color: @boundary_color) # vertical line b/w 1 & 2
|
36
|
+
|
37
|
+
boundaries.set(1, [left_right_split[2], 0, left_right_split[2] + right_top_split[0], top_bottom_split[0] ])
|
38
|
+
boundaries.set(2, [left_right_split[2] + right_top_split[2], 0, right_top_img.columns - right_top_split[2], top_bottom_split[0] ] )
|
39
|
+
|
40
|
+
right_bottom_split = Sqed::BoundaryFinder.color_boundary_finder(image: right_bottom_img, scan: :columns, sample_subdivision_size: 2, boundary_color: @boundary_color) # horizontal line b/w (5,3) & 4
|
41
|
+
|
42
|
+
bottom_right_top_img = right_bottom_img.crop(0,0, img.columns - left_right_split[2], right_bottom_split[1], true) # 3,5 - we leave right_bottom_split at [1] (not 2) to take into account possible overlap error (crop gets full line at top of image)
|
43
|
+
|
44
|
+
boundaries.set(3, [ left_right_split[2] + right_top_split[2], top_bottom_split[2], left_right_split[2] + right_top_split[2], bottom_right_top_img.rows ] )
|
45
|
+
boundaries.set(5, [ left_right_split[2], top_bottom_split[2], left_right_split[2] + right_top_split[0], bottom_right_top_img.rows ] )
|
46
|
+
|
47
|
+
boundaries.set(4, [ left_right_split[2], top_bottom_split[2] + right_top_split[2], img.columns - left_right_split[2], right_bottom_img.rows - right_top_split[2] ] )
|
48
|
+
|
49
|
+
when :vertical_split
|
50
|
+
t = Sqed::BoundaryFinder.color_boundary_finder(image: img, boundary_color: @boundary_color) #detect vertical division
|
26
51
|
return if t.nil?
|
27
52
|
boundaries.set(0, [0, 0, t[0], img.rows]) # left section of image
|
28
53
|
boundaries.set(1, [t[2], 0, img.columns - t[2], img.rows]) # right section of image
|
29
54
|
|
30
55
|
when :horizontal_split
|
31
|
-
t = Sqed::BoundaryFinder.color_boundary_finder(image: img, scan: :columns, boundary_color: @boundary_color) # set to detect horizontal division
|
56
|
+
t = Sqed::BoundaryFinder.color_boundary_finder(image: img, scan: :columns, boundary_color: @boundary_color) # set to detect horizontal division
|
32
57
|
return if t.nil?
|
33
58
|
|
34
59
|
boundaries.set(0, [0, 0, img.columns, t[0]]) # upper section of image
|
35
60
|
boundaries.set(1, [0, t[2], img.columns, img.rows - t[2]]) # lower section of image
|
36
61
|
|
37
|
-
when :right_t
|
62
|
+
when :right_t # only 3 zones expected, with horizontal division in right-side of vertical division
|
38
63
|
vertical = self.class.new(image: @img, layout: :vertical_split, boundary_color: @boundary_color, use_thumbnail: false ).boundaries
|
39
64
|
|
40
65
|
irt = img.crop(*vertical.for(1), true)
|
@@ -50,7 +75,7 @@ class Sqed::BoundaryFinder::ColorLineFinder < Sqed::BoundaryFinder
|
|
50
75
|
ilt = img.crop(*vertical.for(0), true)
|
51
76
|
irt = img.crop(*vertical.for(1), true)
|
52
77
|
|
53
|
-
left = self.class.new(image: ilt, layout: :horizontal_split, boundary_color: @boundary_color, use_thumbnail: false).boundaries
|
78
|
+
left = self.class.new(image: ilt, layout: :horizontal_split, boundary_color: @boundary_color, use_thumbnail: false).boundaries # fails
|
54
79
|
right = self.class.new(image: irt, layout: :horizontal_split, boundary_color: @boundary_color, use_thumbnail: false ).boundaries # OK
|
55
80
|
|
56
81
|
boundaries.set(0, [0, 0, left.width_for(0), left.height_for(0) ])
|
@@ -61,7 +61,15 @@ class Sqed::Parser::OcrParser < Sqed::Parser
|
|
61
61
|
},
|
62
62
|
deterimination_labels: {
|
63
63
|
psm: 3
|
64
|
+
},
|
65
|
+
other_labels: {
|
66
|
+
psm: 3
|
67
|
+
},
|
68
|
+
collecting_event_labels: {
|
69
|
+
psm: 3
|
64
70
|
}
|
71
|
+
|
72
|
+
|
65
73
|
}
|
66
74
|
|
67
75
|
# the text extracted from the image
|
@@ -85,16 +93,18 @@ class Sqed::Parser::OcrParser < Sqed::Parser
|
|
85
93
|
# img.write('tmp/foo4.jpg')
|
86
94
|
# img = img.quantize(2, Magick::GRAYColorspace)
|
87
95
|
# #img = img.threshold(0.5)
|
88
|
-
# img.write('foo4.jpg')
|
96
|
+
# img.write('foo4.jpg')
|
89
97
|
# img = img.equalize #(32, Magick::GRAYColorspace)
|
90
|
-
# img.write('foo5.jpg')
|
91
|
-
# #img.write('foo3.jpg')
|
98
|
+
# img.write('foo5.jpg')
|
99
|
+
# #img.write('foo3.jpg')
|
92
100
|
#
|
93
|
-
# img.write('foo.jpg')
|
101
|
+
# img.write('foo.jpg')
|
94
102
|
# img = img.white_threshold(245)
|
95
103
|
# img
|
96
104
|
# end
|
97
|
-
|
105
|
+
|
106
|
+
# @return [String]
|
107
|
+
# the ocr text
|
98
108
|
def text(section_type: :default)
|
99
109
|
img = @image
|
100
110
|
params = SECTION_PARAMS[:default].merge(SECTION_PARAMS[section_type])
|
data/lib/sqed/version.rb
CHANGED
data/lib/sqed_config.rb
CHANGED
@@ -58,6 +58,14 @@ module SqedConfig
|
|
58
58
|
# | 0 | :internal_box
|
59
59
|
# -----
|
60
60
|
#
|
61
|
+
# 0 | 1 | 2
|
62
|
+
# ------------
|
63
|
+
# | 5 | 3 :seven_slot
|
64
|
+
# 6 |--------
|
65
|
+
# | 4
|
66
|
+
#
|
67
|
+
#
|
68
|
+
#
|
61
69
|
|
62
70
|
# Hash values are used to stub out
|
63
71
|
# the Sqed::Boundaries instance.
|
@@ -69,29 +77,38 @@ module SqedConfig
|
|
69
77
|
vertical_split: [0,1],
|
70
78
|
right_t: [0,1,2],
|
71
79
|
left_t: [0,1,2],
|
72
|
-
internal_box: [0]
|
80
|
+
internal_box: [0],
|
81
|
+
seven_slot: [0,1,2,3,4,5,6]
|
73
82
|
}
|
74
83
|
|
75
84
|
# Each element of the layout is a "section".
|
76
85
|
LAYOUT_SECTION_TYPES = [
|
77
|
-
:
|
78
|
-
:
|
79
|
-
:
|
80
|
-
:determination_labels,
|
81
|
-
:
|
82
|
-
:
|
83
|
-
:
|
84
|
-
:
|
85
|
-
:
|
86
|
+
:annotated_specimen, # a specimen is present, and metadata is too
|
87
|
+
:collecting_event_labels, # the section that contains collecting event labels (only)
|
88
|
+
:curator_metadata, # the section contains text with curator metadata
|
89
|
+
:determination_labels, # the section contains text that determines the specimen (only)
|
90
|
+
:identifier, # the section contains an identifier (e.g. barcode or unique number)
|
91
|
+
:image_registration, # the section contains only image registration information,
|
92
|
+
:labels, # the section contains collecting event and other non-determination labels
|
93
|
+
:nothing, # section is empty
|
94
|
+
:other_labels, # the section that contains text that misc.
|
95
|
+
:specimen, # the specimen only, no metadata should be present
|
96
|
+
:stage, # the image contains the full stage
|
86
97
|
]
|
87
98
|
|
88
99
|
# Links section types to data parsers
|
89
100
|
SECTION_PARSERS = {
|
90
|
-
|
91
|
-
|
92
|
-
deterimination_labels: [ Sqed::Parser::OcrParser ],
|
101
|
+
annotated_specimen: [ Sqed::Parser::OcrParser],
|
102
|
+
collecting_event_labels: [ Sqed::Parser::OcrParser],
|
93
103
|
curator_metadata: [ Sqed::Parser::OcrParser ],
|
94
|
-
|
104
|
+
deterimination_labels: [ Sqed::Parser::OcrParser ],
|
105
|
+
identifier: [ Sqed::Parser::BarcodeParser, Sqed::Parser::OcrParser ],
|
106
|
+
image_registration: [],
|
107
|
+
labels: [ Sqed::Parser::OcrParser ],
|
108
|
+
nothing: [],
|
109
|
+
other_labels: [ Sqed::Parser::OcrParser ],
|
110
|
+
specimen: [],
|
111
|
+
stage: []
|
95
112
|
}
|
96
113
|
|
97
114
|
EXTRACTION_PATTERNS = {
|
@@ -123,6 +140,12 @@ module SqedConfig
|
|
123
140
|
boundary_finder: Sqed::BoundaryFinder::StageFinder,
|
124
141
|
layout: :internal_box,
|
125
142
|
metadata_map: {0 => :stage}
|
143
|
+
},
|
144
|
+
|
145
|
+
seven_slot: {
|
146
|
+
boundary_finder: Sqed::BoundaryFinder::ColorLineFinder,
|
147
|
+
layout: :seven_slot,
|
148
|
+
metadata_map: {0 => :collecting_event_labels, 1 => :determination_labels, 2 => :other_labels, 3 => :image_registration, 4 => :curator_metadata, 5 => :identifier, 6 => :specimen }
|
126
149
|
}
|
127
150
|
}
|
128
151
|
|
@@ -0,0 +1,18 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe 'handling 7 slot stages' do
|
4
|
+
|
5
|
+
let(:image) { ImageHelpers.inhs_stage_7_slot }
|
6
|
+
let(:sqed) { Sqed.new( image: image, pattern: :seven_slot, boundary_color: :red, has_border: false ) }
|
7
|
+
|
8
|
+
context 'parses' do
|
9
|
+
specify 'new() without errors' do
|
10
|
+
expect( sqed ).to be_truthy
|
11
|
+
end
|
12
|
+
|
13
|
+
specify 'get_result without errors' do
|
14
|
+
expect( sqed.result ).to be_truthy
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
end
|
Binary file
|
Binary file
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sqed
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.8
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Matt Yoder
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2015-
|
12
|
+
date: 2015-12-21 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rake
|
@@ -176,6 +176,7 @@ files:
|
|
176
176
|
- spec/lib/sqed/parser_spec.rb
|
177
177
|
- spec/lib/sqed/result_spec.rb
|
178
178
|
- spec/lib/sqed_spec.rb
|
179
|
+
- spec/lib/stage_handling/seven_slot_spec.rb
|
179
180
|
- spec/spec_helper.rb
|
180
181
|
- spec/support/files/barcode_images/code_128_barcode.png
|
181
182
|
- spec/support/files/barcode_images/datamatrix_barcode.png
|
@@ -197,6 +198,8 @@ files:
|
|
197
198
|
- spec/support/files/stage_images/frost_stage_medium.jpg
|
198
199
|
- spec/support/files/stage_images/frost_stage_thumb.jpg
|
199
200
|
- spec/support/files/stage_images/greenlineimage.jpg
|
201
|
+
- spec/support/files/stage_images/inhs_7_slot.jpg
|
202
|
+
- spec/support/files/stage_images/inhs_four_thirds.jpg
|
200
203
|
- spec/support/files/test0.jpg
|
201
204
|
- spec/support/files/test1.jpg
|
202
205
|
- spec/support/files/test2.jpg
|
@@ -242,6 +245,7 @@ test_files:
|
|
242
245
|
- spec/lib/sqed/parser_spec.rb
|
243
246
|
- spec/lib/sqed/result_spec.rb
|
244
247
|
- spec/lib/sqed_spec.rb
|
248
|
+
- spec/lib/stage_handling/seven_slot_spec.rb
|
245
249
|
- spec/spec_helper.rb
|
246
250
|
- spec/support/files/barcode_images/code_128_barcode.png
|
247
251
|
- spec/support/files/barcode_images/datamatrix_barcode.png
|
@@ -263,6 +267,8 @@ test_files:
|
|
263
267
|
- spec/support/files/stage_images/frost_stage_medium.jpg
|
264
268
|
- spec/support/files/stage_images/frost_stage_thumb.jpg
|
265
269
|
- spec/support/files/stage_images/greenlineimage.jpg
|
270
|
+
- spec/support/files/stage_images/inhs_7_slot.jpg
|
271
|
+
- spec/support/files/stage_images/inhs_four_thirds.jpg
|
266
272
|
- spec/support/files/test0.jpg
|
267
273
|
- spec/support/files/test1.jpg
|
268
274
|
- spec/support/files/test2.jpg
|