sqed 0.0.4 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +2 -0
- data/README.md +2 -2
- data/lib/sqed/boundaries.rb +22 -7
- data/lib/sqed/boundary_finder/color_line_finder.rb +26 -19
- data/lib/sqed/boundary_finder.rb +2 -2
- data/lib/sqed/extractor.rb +33 -17
- data/lib/sqed/parser/barcode_parser.rb +32 -16
- data/lib/sqed/parser/ocr_parser.rb +24 -6
- data/lib/sqed/parser.rb +7 -0
- data/lib/sqed/result.rb +54 -6
- data/lib/sqed/version.rb +1 -1
- data/lib/sqed.rb +37 -12
- data/lib/sqed_config.rb +14 -8
- data/spec/lib/sqed/boundaries_spec.rb +26 -1
- data/spec/lib/sqed/boundary_finder_spec.rb +89 -1
- data/spec/lib/sqed/extractor_spec.rb +50 -76
- data/spec/lib/sqed/parser/barcode_spec.rb +25 -0
- data/spec/lib/sqed/parser/ocr_spec.rb +16 -0
- data/spec/lib/sqed/parser_spec.rb +6 -1
- data/spec/lib/sqed/result_spec.rb +24 -0
- data/spec/lib/sqed_spec.rb +58 -144
- data/spec/support/files/barcode_images/code_128_barcode.png +0 -0
- data/spec/support/files/barcode_images/datamatrix_barcode.png +0 -0
- data/spec/support/files/{2Dbarcode.png → barcode_images/osuc_datamatrix_barcode.png} +0 -0
- data/spec/support/files/label_images/basic1.png +0 -0
- data/spec/support/files/label_images/basic2.png +0 -0
- data/spec/support/files/label_images/readme.png +0 -0
- data/spec/support/files/{types_21.jpg → misc_images/types_21.jpg} +0 -0
- data/spec/support/files/{types_8.jpg → misc_images/types_8.jpg} +0 -0
- data/spec/support/files/{CrossyBlackLinesSpecimen.jpg → stage_images/CrossyBlackLinesSpecimen.jpg} +0 -0
- data/spec/support/files/{CrossyGreenLinesSpecimen.jpg → stage_images/CrossyGreenLinesSpecimen.jpg} +0 -0
- data/spec/support/files/{black_stage_green_line_specimen.jpg → stage_images/black_stage_green_line_specimen.jpg} +0 -0
- data/spec/support/files/{boundary_cross_green.jpg → stage_images/boundary_cross_green.jpg} +0 -0
- data/spec/support/files/{boundary_left_t_yellow.jpg → stage_images/boundary_left_t_yellow.jpg} +0 -0
- data/spec/support/files/{boundary_offset_cross_red.jpg → stage_images/boundary_offset_cross_red.jpg} +0 -0
- data/spec/support/files/{boundary_right_t_green.jpg → stage_images/boundary_right_t_green.jpg} +0 -0
- data/spec/support/files/stage_images/frost_stage.jpg +0 -0
- data/spec/support/files/{greenlineimage.jpg → stage_images/greenlineimage.jpg} +0 -0
- data/spec/support/files/test4.jpg +0 -0
- data/spec/support/image_helpers.rb +61 -33
- data/sqed.gemspec +3 -2
- metadata +58 -34
- data/spec/support/files/Quadrant_2_3.jpg +0 -0
- data/spec/support/files/test4OLD.jpg +0 -0
- data/spec/support/files/test_barcode.JPG +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4600b58a8dca4c59c21e0fcaa08c63e7564a56f4
|
4
|
+
data.tar.gz: f1f96f8e93e988bd53ca354c4fe3a67f359b1368
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d7cb582b1d6db468617a132ce67a0dccab2315911eb650468ef6d0e756333cfcca710969d9d845dbe8c3149059499c486845072a053f5674412af268928e2f72
|
7
|
+
data.tar.gz: 9a24446ca6fb5caf9d7a9c436013e31cc6ceeb09e4c06f1968ee4cb5a2d544bd939d7b59e3bda023b2cfea38c4eb0cb1182e0971290738c44c8135a33d046b08
|
data/.gitignore
CHANGED
data/README.md
CHANGED
@@ -3,7 +3,7 @@
|
|
3
3
|
|
4
4
|
# Sqed
|
5
5
|
|
6
|
-
|
6
|
+
Sqed is a gem that faciliates metadata extraction from images of staged collection objects.
|
7
7
|
|
8
8
|
## Installation
|
9
9
|
|
@@ -21,7 +21,7 @@ Or install it yourself as:
|
|
21
21
|
|
22
22
|
## Usage
|
23
23
|
|
24
|
-
|
24
|
+
For the time being, see specs.
|
25
25
|
|
26
26
|
## Contributing
|
27
27
|
|
data/lib/sqed/boundaries.rb
CHANGED
@@ -12,13 +12,15 @@ class Sqed::Boundaries
|
|
12
12
|
# 0 => [10,10,40,40]
|
13
13
|
attr_reader :coordinates
|
14
14
|
|
15
|
-
#
|
15
|
+
# A symbol from Sqed::Config::LAYOUTS.keys
|
16
|
+
# :right_t
|
16
17
|
attr_accessor :layout
|
17
18
|
|
18
|
-
#
|
19
|
+
# Boolean, whether or not the last method to populate this object passed fully
|
19
20
|
attr_accessor :complete
|
20
21
|
|
21
22
|
def initialize(layout = nil)
|
23
|
+
raise 'unrecognized layout' if layout && !SqedConfig::LAYOUTS.include?(layout)
|
22
24
|
@complete = false
|
23
25
|
|
24
26
|
@layout = layout
|
@@ -35,11 +37,12 @@ class Sqed::Boundaries
|
|
35
37
|
def offset(boundary)
|
36
38
|
b = Sqed::Boundaries.new() # the idea here is to create a deep copy of self, offsetting by boundary as we go
|
37
39
|
(0..self.coordinates.length - 1).each do |i|
|
38
|
-
b.
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
40
|
+
b.set(i,
|
41
|
+
[(self.x_for(i) + boundary.x_for(0)),
|
42
|
+
(self.y_for(i) + boundary.y_for(0)),
|
43
|
+
self.width_for(i),
|
44
|
+
self.height_for(i)]
|
45
|
+
)
|
43
46
|
end
|
44
47
|
b.complete = self.complete
|
45
48
|
b
|
@@ -76,4 +79,16 @@ class Sqed::Boundaries
|
|
76
79
|
@coordinates[index][3]
|
77
80
|
end
|
78
81
|
|
82
|
+
def set(index, coordinates)
|
83
|
+
@coordinates[index] = coordinates
|
84
|
+
end
|
85
|
+
|
86
|
+
def populated?
|
87
|
+
@coordinates.each do |c|
|
88
|
+
return false if c[0].nil?
|
89
|
+
end
|
90
|
+
true
|
91
|
+
end
|
92
|
+
|
93
|
+
|
79
94
|
end
|
@@ -19,15 +19,15 @@ class Sqed::BoundaryFinder::ColorLineFinder < Sqed::BoundaryFinder
|
|
19
19
|
when :vertical_split # can vertical and horizontal split be re-used to do cross cases?
|
20
20
|
t = Sqed::BoundaryFinder.color_boundary_finder(image: img, boundary_color: @boundary_color) #detect vertical division, green line
|
21
21
|
return if t.nil?
|
22
|
-
boundaries.
|
23
|
-
boundaries.
|
22
|
+
boundaries.set(0, [0, 0, t[0], img.rows]) # left section of image
|
23
|
+
boundaries.set(1, [t[2], 0, img.columns - t[2], img.rows]) # right section of image
|
24
24
|
boundaries.complete = true
|
25
25
|
|
26
26
|
when :horizontal_split
|
27
27
|
t = Sqed::BoundaryFinder.color_boundary_finder(image: img, scan: :columns, boundary_color: @boundary_color) # set to detect horizontal division, (green line)
|
28
28
|
return if t.nil?
|
29
|
-
boundaries.
|
30
|
-
boundaries.
|
29
|
+
boundaries.set(0, [0, 0, img.columns, t[0]]) # upper section of image
|
30
|
+
boundaries.set(1, [0, t[2], img.columns, img.rows - t[2]]) # lower section of image
|
31
31
|
boundaries.complete = true
|
32
32
|
# boundaries.coordinates[2] = [0, 0, img.columns, t[1]] # upper section of image
|
33
33
|
# boundaries.coordinates[3] = [0, t[1], img.columns, img.rows - t[1]] # lower section of image
|
@@ -35,45 +35,52 @@ class Sqed::BoundaryFinder::ColorLineFinder < Sqed::BoundaryFinder
|
|
35
35
|
when :right_t # only 3 zones expected, with horizontal division in right-side of vertical division
|
36
36
|
t = Sqed::BoundaryFinder.color_boundary_finder(image: img, boundary_color: @boundary_color) #defaults to detect vertical division, green line
|
37
37
|
return if t.nil?
|
38
|
-
|
39
|
-
|
38
|
+
|
39
|
+
left = [0, 0, t[0], img.rows]
|
40
|
+
right = [t[2], 0, img.columns - t[2], img.rows]
|
41
|
+
|
42
|
+
boundaries.set(0, left) # left section of image
|
40
43
|
|
41
44
|
# now subdivide right side
|
42
|
-
irt = img.crop(*
|
45
|
+
irt = img.crop(*right, true)
|
43
46
|
rt = Sqed::BoundaryFinder.color_boundary_finder(image: irt, scan: :columns, boundary_color: @boundary_color) # set to detect horizontal division, (green line)
|
44
47
|
return if rt.nil?
|
45
|
-
boundaries.
|
46
|
-
boundaries.
|
48
|
+
boundaries.set(1, [t[2], 0, img.columns - t[2], rt[0]]) # upper section of image
|
49
|
+
boundaries.set(2, [t[2], rt[2], img.columns - t[2], img.rows - rt[2]]) # lower section of image
|
47
50
|
boundaries.complete = true
|
48
51
|
# will return 1, 2, or 3
|
49
52
|
|
50
53
|
when :offset_cross # 4 zones expected, with horizontal division in right- and left- sides of vertical division
|
51
54
|
t = Sqed::BoundaryFinder.color_boundary_finder(image: img, boundary_color: @boundary_color) # defaults to detect vertical division, green line
|
52
55
|
raise if t.nil?
|
53
|
-
|
54
|
-
|
56
|
+
|
57
|
+
left = [0, 0, t[0], img.rows] # left section of image
|
58
|
+
right = [t[2], 0, img.columns - t[2], img.rows] # right section of image
|
55
59
|
|
56
60
|
# now subdivide left side
|
57
|
-
ilt = img.crop(*
|
61
|
+
ilt = img.crop(*left, true)
|
58
62
|
|
59
63
|
lt = Sqed::BoundaryFinder.color_boundary_finder(image: ilt, scan: :columns, boundary_color: @boundary_color) # set to detect horizontal division, (green line)
|
64
|
+
|
60
65
|
if !lt.nil?
|
61
|
-
boundaries.
|
62
|
-
boundaries.
|
66
|
+
boundaries.set(0, [0, 0, left[2], lt[0]]) # upper section of image
|
67
|
+
boundaries.set(3, [0, lt[2], left[2], img.rows - lt[2]]) # lower section of image
|
63
68
|
end
|
64
69
|
|
65
70
|
# now subdivide right side
|
66
|
-
irt = img.crop(*
|
71
|
+
irt = img.crop(*right, true)
|
67
72
|
rt = Sqed::BoundaryFinder.color_boundary_finder(image: irt, scan: :columns, boundary_color: @boundary_color) # set to detect horizontal division, (green line)
|
68
73
|
return if rt.nil?
|
69
74
|
|
70
|
-
boundaries.
|
71
|
-
boundaries.
|
75
|
+
boundaries.set(1, [t[2], 0, img.columns - t[2], rt[0]]) # upper section of image
|
76
|
+
boundaries.set(2, [t[2], rt[2], img.columns - t[2], img.rows - rt[2]]) # lower section of image
|
72
77
|
# will return 1, 2, 3, or 4 //// does not handle staggered vertical boundary case
|
73
|
-
|
78
|
+
|
79
|
+
boundaries.complete = true if boundaries.populated?
|
74
80
|
|
75
81
|
else
|
76
|
-
boundaries.
|
82
|
+
boundaries.set(0, [0, 0, img.columns, img.rows]) # totality of image as default
|
83
|
+
# TODO: boundaries.complete status here?
|
77
84
|
return # return original image boundary if no method implemented
|
78
85
|
end
|
79
86
|
|
data/lib/sqed/boundary_finder.rb
CHANGED
@@ -52,9 +52,9 @@ class Sqed::BoundaryFinder
|
|
52
52
|
(0..samples_to_take).each do |s|
|
53
53
|
# Create a sample image a single pixel tall
|
54
54
|
if scan == :rows
|
55
|
-
j = image.crop(0, s * sample_subdivision_size, image.columns, 1)
|
55
|
+
j = image.crop(0, s * sample_subdivision_size, image.columns, 1, true)
|
56
56
|
elsif scan == :columns
|
57
|
-
j = image.crop(s * sample_subdivision_size, 0, 1, image.rows)
|
57
|
+
j = image.crop(s * sample_subdivision_size, 0, 1, image.rows, true)
|
58
58
|
else
|
59
59
|
raise
|
60
60
|
end
|
data/lib/sqed/extractor.rb
CHANGED
@@ -1,45 +1,61 @@
|
|
1
1
|
require 'RMagick'
|
2
2
|
|
3
|
-
# An Extractor takes Boundries object and a
|
3
|
+
# An Extractor takes Boundries object and a metadata_map pattern and returns a Sqed::Result
|
4
4
|
#
|
5
5
|
class Sqed::Extractor
|
6
6
|
|
7
|
-
|
7
|
+
# a Sqed::Boundaries instance
|
8
|
+
attr_accessor :boundaries
|
8
9
|
|
9
|
-
|
10
|
-
|
11
|
-
|
10
|
+
# a metadata_map hash from EXTRACTION_PATTERNS like:
|
11
|
+
# {0 => :annotated_specimen, 1 => :identifier, 2 =>:image_registration }
|
12
|
+
attr_accessor :metadata_map
|
12
13
|
|
13
|
-
|
14
|
+
# a Magick::Image file
|
15
|
+
attr_accessor :image
|
16
|
+
|
17
|
+
def initialize(boundaries: boundaries, metadata_map: metadata_map, image: image)
|
18
|
+
raise 'boundaries not provided or provided boundary is not a Sqed::Boundaries' if boundaries.nil? || !boundaries.class == Sqed::Boundaries
|
19
|
+
raise 'metadata_map not provided or metadata_map not a Hash' if metadata_map.nil? || !metadata_map.class == Hash
|
20
|
+
raise 'image not provided' if image.nil? || !image.class == Magick::Image
|
21
|
+
|
22
|
+
@metadata_map = metadata_map
|
14
23
|
@boundaries = boundaries
|
15
24
|
@image = image
|
16
25
|
end
|
17
26
|
|
18
27
|
def result
|
19
28
|
r = Sqed::Result.new()
|
20
|
-
|
29
|
+
|
21
30
|
# assign the images to the result
|
22
|
-
boundaries.each do |
|
23
|
-
|
31
|
+
boundaries.each do |section_index, coords|
|
32
|
+
image_setter = "#{metadata_map[section_index]}_image="
|
33
|
+
r.send(image_setter, extract_image(coords))
|
24
34
|
end
|
25
35
|
|
26
36
|
# assign the metadata to the result
|
27
|
-
|
37
|
+
metadata_map.each do |section_index, section_type|
|
28
38
|
# only extract data if a parser exists
|
29
|
-
if
|
30
|
-
|
39
|
+
if parsers = SqedConfig::SECTION_PARSERS[section_type]
|
40
|
+
|
41
|
+
section_image = r.send("#{section_type}_image")
|
42
|
+
updated = r.send(section_type)
|
43
|
+
|
44
|
+
parsers.each do |p|
|
45
|
+
parsed_result = p.new(section_image).text
|
46
|
+
updated.merge!(p::TYPE => parsed_result) if parsed_result
|
47
|
+
end
|
48
|
+
|
49
|
+
r.send("#{section_type}=", updated)
|
31
50
|
end
|
32
51
|
end
|
33
52
|
|
34
53
|
r
|
35
54
|
end
|
36
55
|
|
37
|
-
#
|
56
|
+
# crop takes x, y, width, height
|
38
57
|
def extract_image(coords)
|
39
|
-
|
40
|
-
# @image.crop(coords[0], coords[1], coords[2] - coords[0], coords[3] - coords[1] )
|
41
|
-
bp = 0
|
42
|
-
@image.crop(coords[0], coords[1], coords[2], coords[3], true)
|
58
|
+
i = @image.crop(*coords, true)
|
43
59
|
end
|
44
60
|
|
45
61
|
end
|
@@ -1,27 +1,43 @@
|
|
1
1
|
# Given an image, return an ordered array of detectable barcodes
|
2
2
|
|
3
|
+
|
4
|
+
|
3
5
|
class Sqed::Parser::BarcodeParser < Sqed::Parser
|
4
|
-
|
6
|
+
|
7
|
+
TYPE = :barcode
|
8
|
+
|
9
|
+
attr_accessor :image
|
10
|
+
|
11
|
+
attr_accessor :barcode
|
5
12
|
|
6
13
|
def initialize(image)
|
7
14
|
super
|
8
|
-
@
|
15
|
+
@image = image
|
9
16
|
end
|
10
17
|
|
11
|
-
def
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
18
|
+
def barcode
|
19
|
+
@barcode ||= get_barcode
|
20
|
+
@barcode
|
21
|
+
end
|
22
|
+
|
23
|
+
# Uses the same enging as zbarimg that you can install with brew (zbarimg)
|
24
|
+
#
|
25
|
+
def get_code_128
|
26
|
+
ZXing.decode @image.filename
|
27
|
+
end
|
28
|
+
|
29
|
+
# try a bunch of options, organized by most common, give the first hit
|
30
|
+
def get_barcode
|
31
|
+
[get_code_128].compact.first
|
32
|
+
end
|
33
|
+
|
34
|
+
#def get_datamatrix
|
35
|
+
# https://github.com/srijan/ruby-dmtx
|
36
|
+
#end
|
37
|
+
|
38
|
+
# alias to a universal method
|
39
|
+
def text
|
40
|
+
barcode
|
25
41
|
end
|
26
42
|
|
27
43
|
end
|
@@ -7,8 +7,13 @@
|
|
7
7
|
require 'rtesseract'
|
8
8
|
|
9
9
|
class Sqed::Parser::OcrParser < Sqed::Parser
|
10
|
-
attr_accessor :text
|
11
10
|
|
11
|
+
TYPE = :text
|
12
|
+
|
13
|
+
# the text extracted from the image
|
14
|
+
attr_accessor :text
|
15
|
+
|
16
|
+
# https://code.google.com/p/tesseract-ocr/wiki/FAQ
|
12
17
|
def text
|
13
18
|
img = @image #.white_threshold(245)
|
14
19
|
|
@@ -19,9 +24,6 @@ class Sqed::Parser::OcrParser < Sqed::Parser
|
|
19
24
|
# img = img.scale(2)
|
20
25
|
# img.write('foo0.jpg.jpg')
|
21
26
|
# img = img.enhance
|
22
|
-
# img = img.enhance
|
23
|
-
# img = img.enhance
|
24
|
-
# img = img.enhance
|
25
27
|
# img.write('foo1.jpg')
|
26
28
|
# img = img.quantize(8, Magick::GRAYColorspace)
|
27
29
|
# img.write('foo1.jpg')
|
@@ -39,12 +41,28 @@ class Sqed::Parser::OcrParser < Sqed::Parser
|
|
39
41
|
#
|
40
42
|
# img.write('foo.jpg') # for debugging purposes, this is the image that is sent to OCR
|
41
43
|
|
42
|
-
r = RTesseract.new(img, lang: 'eng', psm: 3)
|
43
44
|
|
45
|
+
# From https://code.google.com/p/tesseract-ocr/wiki/FAQ
|
46
|
+
# " There is a minimum text size for reasonable accuracy. You have to consider resolution as well as point size. Accuracy drops off below 10pt x 300dpi, rapidly below 8pt x 300dpi. A quick check is to count the pixels of the x-height of your characters. (X-height is the height of the lower case x.) At 10pt x 300dpi x-heights are typically about 20 pixels, although this can vary dramatically from font to font. Below an x-height of 10 pixels, you have very little chance of accurate results, and below about 8 pixels, most of the text will be "noise removed".
|
47
|
+
|
48
|
+
|
49
|
+
# http://www.sk-spell.sk.cx/tesseract-ocr-parameters-in-302-version
|
50
|
+
# doesn't supprot outputbase
|
51
|
+
r = RTesseract.new(img, lang: 'eng', psm: 1,
|
52
|
+
load_system_dawg: 0,
|
53
|
+
tessedit_debug_quality_metrics: 1,
|
54
|
+
load_freq_dawg: 1 ,
|
55
|
+
chop_enable: 1,
|
56
|
+
tessedit_write_images: 1,
|
57
|
+
equationdetect_save_merged_image: 1,
|
58
|
+
tessedit_dump_pageseg_images: 1,
|
59
|
+
equationdetect_save_bi_image: 1,
|
60
|
+
load_unambig_dawg: 0,
|
61
|
+
tessedit_write_params_to_file: 'tmp/ocr_config_file.txt' ) # psm: 3,
|
44
62
|
|
45
63
|
# img = img.white_threshold(245)
|
46
64
|
|
47
|
-
@text = r.to_s
|
65
|
+
@text = r.to_s.strip
|
48
66
|
end
|
49
67
|
|
50
68
|
# Need to provide tuning methods here, i.e. image transormations that facilitate OCR
|
data/lib/sqed/parser.rb
CHANGED
data/lib/sqed/result.rb
CHANGED
@@ -1,15 +1,63 @@
|
|
1
|
-
|
2
|
-
#
|
3
|
-
# full process of data extraction from an image.
|
4
|
-
#
|
5
|
-
#
|
1
|
+
# A Sqed::Result is a container for the results of the
|
2
|
+
# the data extraction for the full stage
|
6
3
|
#
|
7
4
|
class Sqed::Result
|
8
5
|
|
9
6
|
SqedConfig::LAYOUT_SECTION_TYPES.each do |k|
|
10
|
-
attr_accessor k
|
11
7
|
attr_accessor "#{k}_image".to_sym
|
8
|
+
attr_accessor k
|
9
|
+
end
|
10
|
+
|
11
|
+
def initialize
|
12
|
+
SqedConfig::LAYOUT_SECTION_TYPES.each do |k|
|
13
|
+
send("#{k}=", {})
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
# return [String, nil]
|
18
|
+
# the text derived from the OCR parsing of the section
|
19
|
+
def text_for(section)
|
20
|
+
send(section)[:text]
|
12
21
|
end
|
22
|
+
|
23
|
+
# return [String, nil]
|
24
|
+
# the text derived from the barcode parsing of the section
|
25
|
+
def barcode_text_for(section)
|
26
|
+
send(section)[:barcode]
|
27
|
+
end
|
28
|
+
|
29
|
+
# return [Hash]
|
30
|
+
# a map of layout_section_type => value (if there is a value),
|
31
|
+
# i.e. all possible parsed text values returned from the parser
|
32
|
+
def text
|
33
|
+
result = {}
|
34
|
+
SqedConfig::LAYOUT_SECTION_TYPES.each do |k|
|
35
|
+
v = self.send(k)
|
36
|
+
result.merge!(k => v) if v[:barcode] || v[:text]
|
37
|
+
end
|
38
|
+
result
|
39
|
+
end
|
40
|
+
|
41
|
+
# return [Hash]
|
42
|
+
# a map of layout_section_type => Image
|
43
|
+
def images
|
44
|
+
result = {}
|
45
|
+
SqedConfig::LAYOUT_SECTION_TYPES.each do |k|
|
46
|
+
image = self.send("#{k}_image")
|
47
|
+
result.merge!(k => image) if image
|
48
|
+
end
|
49
|
+
result
|
50
|
+
end
|
51
|
+
|
52
|
+
# return [True]
|
53
|
+
# write the images in #images to tmp/
|
54
|
+
def write_images
|
55
|
+
images.each do |k, img|
|
56
|
+
img.write("tmp/#{k}.jpg")
|
57
|
+
end
|
58
|
+
true
|
59
|
+
end
|
60
|
+
|
13
61
|
end
|
14
62
|
|
15
63
|
|
data/lib/sqed/version.rb
CHANGED
data/lib/sqed.rb
CHANGED
@@ -4,7 +4,7 @@ recent_ruby = RUBY_VERSION >= '2.1.1'
|
|
4
4
|
raise "IMPORTANT: sqed gem requires ruby >= 2.1.1" unless recent_ruby
|
5
5
|
|
6
6
|
require "RMagick"
|
7
|
-
|
7
|
+
include Magick
|
8
8
|
|
9
9
|
# Instants take the following
|
10
10
|
# 1) A base image @image
|
@@ -28,19 +28,27 @@ class Sqed
|
|
28
28
|
attr_accessor :pattern
|
29
29
|
|
30
30
|
# the image that is the cropped content for parsing
|
31
|
-
attr_accessor :stage_image
|
31
|
+
attr_accessor :stage_image
|
32
|
+
|
33
|
+
# a Sqed::Boundaries instance that stores the coordinates of the stage
|
34
|
+
attr_accessor :stage_boundary
|
35
|
+
|
36
|
+
# a Sqed::Boundaries instances that contains the coordinates of the interan stage sections
|
37
|
+
attr_accessor :boundaries
|
38
|
+
|
39
|
+
# Boolean, whether to detect the border on initialization, i.e. new()
|
40
|
+
attr_accessor :auto_detect_border
|
41
|
+
|
42
|
+
# a symbol, :red, :green, :blue, describing the boundary color within the stage
|
43
|
+
attr_accessor :boundary_color
|
32
44
|
|
33
45
|
def initialize(image: image, pattern: pattern, auto_detect_border: true, boundary_color: :green)
|
34
46
|
@image = image
|
35
|
-
|
36
47
|
@boundaries = nil
|
37
|
-
@stage_boundary = Sqed::Boundaries.new(:internal_box)
|
38
|
-
|
48
|
+
@stage_boundary = Sqed::Boundaries.new(:internal_box)
|
39
49
|
@auto_detect_border = auto_detect_border
|
40
|
-
|
41
50
|
@pattern = pattern
|
42
51
|
@pattern ||= :standard_cross
|
43
|
-
|
44
52
|
@boundary_color = boundary_color
|
45
53
|
|
46
54
|
set_stage_boundary if @auto_detect_border && @image
|
@@ -60,6 +68,8 @@ class Sqed
|
|
60
68
|
@boundaries
|
61
69
|
end
|
62
70
|
|
71
|
+
# Return [Sqed::Boundaries instance]
|
72
|
+
# a boundaries instance that has the original image (prior to cropping stage) coordinates
|
63
73
|
def native_boundaries
|
64
74
|
# check for @boundaries.complete first? OR handle partial detections ?!
|
65
75
|
if @boundaries.complete
|
@@ -69,14 +79,18 @@ class Sqed
|
|
69
79
|
end
|
70
80
|
end
|
71
81
|
|
82
|
+
# return [Image]
|
83
|
+
# crops the image if not already done
|
72
84
|
def stage_image
|
73
85
|
crop_image if @stage_boundary.complete && @stage_image.nil?
|
74
86
|
@stage_image
|
75
87
|
end
|
76
88
|
|
89
|
+
# return [Image]
|
90
|
+
# crops the stage if not done, then sets/returns @stage_image
|
77
91
|
def crop_image
|
78
92
|
if @stage_boundary.complete
|
79
|
-
@stage_image = @image.crop(*@stage_boundary.for(SqedConfig.index_for_section_type(:stage, :stage)))
|
93
|
+
@stage_image = @image.crop(*@stage_boundary.for(SqedConfig.index_for_section_type(:stage, :stage)), true)
|
80
94
|
else
|
81
95
|
@stage_image = @image
|
82
96
|
end
|
@@ -84,14 +98,24 @@ class Sqed
|
|
84
98
|
|
85
99
|
def result
|
86
100
|
return false if @image.nil? || @pattern.nil?
|
87
|
-
crop_image
|
88
101
|
extractor = Sqed::Extractor.new(
|
89
|
-
boundaries:
|
90
|
-
|
91
|
-
image:
|
102
|
+
boundaries: boundaries,
|
103
|
+
metadata_map: SqedConfig::EXTRACTION_PATTERNS[@pattern][:metadata_map],
|
104
|
+
image: stage_image)
|
92
105
|
extractor.result
|
93
106
|
end
|
94
107
|
|
108
|
+
def attributes
|
109
|
+
{
|
110
|
+
image: @image,
|
111
|
+
boundaries: @boundaries,
|
112
|
+
stage_boundary: @stage_boundary,
|
113
|
+
auto_detect_border: @auto_detect_border,
|
114
|
+
pattern: @pattern,
|
115
|
+
boundary_color: @boundary_color
|
116
|
+
}
|
117
|
+
end
|
118
|
+
|
95
119
|
protected
|
96
120
|
|
97
121
|
def set_stage_boundary
|
@@ -103,6 +127,7 @@ class Sqed
|
|
103
127
|
|
104
128
|
def get_section_boundaries
|
105
129
|
boundary_finder_class = SqedConfig::EXTRACTION_PATTERNS[@pattern][:boundary_finder]
|
130
|
+
|
106
131
|
options = {image: stage_image}
|
107
132
|
options.merge!( layout: SqedConfig::EXTRACTION_PATTERNS[@pattern][:layout] ) unless boundary_finder_class.name == 'Sqed::BoundaryFinder::CrossFinder'
|
108
133
|
options.merge!( boundary_color: @boundary_color) if boundary_finder_class.name == 'Sqed::BoundaryFinder::ColorLineFinder'
|
data/lib/sqed_config.rb
CHANGED
@@ -1,5 +1,7 @@
|
|
1
1
|
# encoding: UTF-8
|
2
2
|
|
3
|
+
require 'zxing'
|
4
|
+
|
3
5
|
require_relative "sqed/parser"
|
4
6
|
require_relative "sqed/parser/ocr_parser"
|
5
7
|
require_relative "sqed/parser/barcode_parser"
|
@@ -51,7 +53,6 @@ module SqedConfig
|
|
51
53
|
|
52
54
|
# Hash values are used to stub out
|
53
55
|
# the Sqed::Boundaries instance.
|
54
|
-
#
|
55
56
|
LAYOUTS = {
|
56
57
|
cross: [0,1,2,3],
|
57
58
|
offset_cross: [0,1,2,3],
|
@@ -62,7 +63,7 @@ module SqedConfig
|
|
62
63
|
internal_box: [0]
|
63
64
|
}
|
64
65
|
|
65
|
-
#
|
66
|
+
# Each element of the layout is a "section".
|
66
67
|
LAYOUT_SECTION_TYPES = [
|
67
68
|
:stage, # the image contains the full stage
|
68
69
|
:specimen, # the specimen only, no metadata should be present
|
@@ -70,26 +71,31 @@ module SqedConfig
|
|
70
71
|
:determination_labels, # the section contains text that determines the specimen
|
71
72
|
:labels, # the section contains collecting event and non-determination labels
|
72
73
|
:identifier, # the section contains an identifier (e.g. barcode or unique number)
|
73
|
-
:image_registration
|
74
|
+
:image_registration, # the section contains only image registration information,
|
75
|
+
:curator_metadata, # the section contains text with curator metadata
|
76
|
+
:nothing # section is empty
|
74
77
|
]
|
75
78
|
|
76
79
|
# Links section types to data parsers
|
77
80
|
SECTION_PARSERS = {
|
78
|
-
labels: Sqed::Parser::OcrParser,
|
79
|
-
identifier: Sqed::Parser::BarcodeParser,
|
80
|
-
deterimination_labels: Sqed::Parser::OcrParser
|
81
|
+
labels: [ Sqed::Parser::OcrParser ],
|
82
|
+
identifier: [ Sqed::Parser::BarcodeParser, Sqed::Parser::OcrParser ],
|
83
|
+
deterimination_labels: [ Sqed::Parser::OcrParser ],
|
84
|
+
curator_metadata: [ Sqed::Parser::OcrParser ],
|
85
|
+
specimen: [ Sqed::Parser::OcrParser ],
|
81
86
|
}
|
82
87
|
|
83
88
|
EXTRACTION_PATTERNS = {
|
84
89
|
right_t: {
|
85
90
|
boundary_finder: Sqed::BoundaryFinder::ColorLineFinder,
|
86
91
|
layout: :right_t,
|
87
|
-
metadata_map: {0 => :annotated_specimen, 1 => :
|
92
|
+
metadata_map: {0 => :annotated_specimen, 1 => :identifier, 2 =>:image_registration }
|
88
93
|
},
|
89
94
|
offset_cross: {
|
90
95
|
boundary_finder: Sqed::BoundaryFinder::ColorLineFinder,
|
91
96
|
layout: :offset_cross,
|
92
|
-
metadata_map: {0 => :
|
97
|
+
metadata_map: {0 => :curator_metadata, 1 => :identifier, 2 => :image_registration, 3 => :specimen }
|
98
|
+
# metadata_map: {0 => :annotated_specimen, 1 => :identifier, 2 =>:image_registration }
|
93
99
|
},
|
94
100
|
standard_cross: {
|
95
101
|
boundary_finder: Sqed::BoundaryFinder::CrossFinder,
|