sqed 0.0.4 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +2 -0
- data/README.md +2 -2
- data/lib/sqed/boundaries.rb +22 -7
- data/lib/sqed/boundary_finder/color_line_finder.rb +26 -19
- data/lib/sqed/boundary_finder.rb +2 -2
- data/lib/sqed/extractor.rb +33 -17
- data/lib/sqed/parser/barcode_parser.rb +32 -16
- data/lib/sqed/parser/ocr_parser.rb +24 -6
- data/lib/sqed/parser.rb +7 -0
- data/lib/sqed/result.rb +54 -6
- data/lib/sqed/version.rb +1 -1
- data/lib/sqed.rb +37 -12
- data/lib/sqed_config.rb +14 -8
- data/spec/lib/sqed/boundaries_spec.rb +26 -1
- data/spec/lib/sqed/boundary_finder_spec.rb +89 -1
- data/spec/lib/sqed/extractor_spec.rb +50 -76
- data/spec/lib/sqed/parser/barcode_spec.rb +25 -0
- data/spec/lib/sqed/parser/ocr_spec.rb +16 -0
- data/spec/lib/sqed/parser_spec.rb +6 -1
- data/spec/lib/sqed/result_spec.rb +24 -0
- data/spec/lib/sqed_spec.rb +58 -144
- data/spec/support/files/barcode_images/code_128_barcode.png +0 -0
- data/spec/support/files/barcode_images/datamatrix_barcode.png +0 -0
- data/spec/support/files/{2Dbarcode.png → barcode_images/osuc_datamatrix_barcode.png} +0 -0
- data/spec/support/files/label_images/basic1.png +0 -0
- data/spec/support/files/label_images/basic2.png +0 -0
- data/spec/support/files/label_images/readme.png +0 -0
- data/spec/support/files/{types_21.jpg → misc_images/types_21.jpg} +0 -0
- data/spec/support/files/{types_8.jpg → misc_images/types_8.jpg} +0 -0
- data/spec/support/files/{CrossyBlackLinesSpecimen.jpg → stage_images/CrossyBlackLinesSpecimen.jpg} +0 -0
- data/spec/support/files/{CrossyGreenLinesSpecimen.jpg → stage_images/CrossyGreenLinesSpecimen.jpg} +0 -0
- data/spec/support/files/{black_stage_green_line_specimen.jpg → stage_images/black_stage_green_line_specimen.jpg} +0 -0
- data/spec/support/files/{boundary_cross_green.jpg → stage_images/boundary_cross_green.jpg} +0 -0
- data/spec/support/files/{boundary_left_t_yellow.jpg → stage_images/boundary_left_t_yellow.jpg} +0 -0
- data/spec/support/files/{boundary_offset_cross_red.jpg → stage_images/boundary_offset_cross_red.jpg} +0 -0
- data/spec/support/files/{boundary_right_t_green.jpg → stage_images/boundary_right_t_green.jpg} +0 -0
- data/spec/support/files/stage_images/frost_stage.jpg +0 -0
- data/spec/support/files/{greenlineimage.jpg → stage_images/greenlineimage.jpg} +0 -0
- data/spec/support/files/test4.jpg +0 -0
- data/spec/support/image_helpers.rb +61 -33
- data/sqed.gemspec +3 -2
- metadata +58 -34
- data/spec/support/files/Quadrant_2_3.jpg +0 -0
- data/spec/support/files/test4OLD.jpg +0 -0
- data/spec/support/files/test_barcode.JPG +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4600b58a8dca4c59c21e0fcaa08c63e7564a56f4
|
4
|
+
data.tar.gz: f1f96f8e93e988bd53ca354c4fe3a67f359b1368
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d7cb582b1d6db468617a132ce67a0dccab2315911eb650468ef6d0e756333cfcca710969d9d845dbe8c3149059499c486845072a053f5674412af268928e2f72
|
7
|
+
data.tar.gz: 9a24446ca6fb5caf9d7a9c436013e31cc6ceeb09e4c06f1968ee4cb5a2d544bd939d7b59e3bda023b2cfea38c4eb0cb1182e0971290738c44c8135a33d046b08
|
data/.gitignore
CHANGED
data/README.md
CHANGED
@@ -3,7 +3,7 @@
|
|
3
3
|
|
4
4
|
# Sqed
|
5
5
|
|
6
|
-
|
6
|
+
Sqed is a gem that faciliates metadata extraction from images of staged collection objects.
|
7
7
|
|
8
8
|
## Installation
|
9
9
|
|
@@ -21,7 +21,7 @@ Or install it yourself as:
|
|
21
21
|
|
22
22
|
## Usage
|
23
23
|
|
24
|
-
|
24
|
+
For the time being, see specs.
|
25
25
|
|
26
26
|
## Contributing
|
27
27
|
|
data/lib/sqed/boundaries.rb
CHANGED
@@ -12,13 +12,15 @@ class Sqed::Boundaries
|
|
12
12
|
# 0 => [10,10,40,40]
|
13
13
|
attr_reader :coordinates
|
14
14
|
|
15
|
-
#
|
15
|
+
# A symbol from Sqed::Config::LAYOUTS.keys
|
16
|
+
# :right_t
|
16
17
|
attr_accessor :layout
|
17
18
|
|
18
|
-
#
|
19
|
+
# Boolean, whether or not the last method to populate this object passed fully
|
19
20
|
attr_accessor :complete
|
20
21
|
|
21
22
|
def initialize(layout = nil)
|
23
|
+
raise 'unrecognized layout' if layout && !SqedConfig::LAYOUTS.include?(layout)
|
22
24
|
@complete = false
|
23
25
|
|
24
26
|
@layout = layout
|
@@ -35,11 +37,12 @@ class Sqed::Boundaries
|
|
35
37
|
def offset(boundary)
|
36
38
|
b = Sqed::Boundaries.new() # the idea here is to create a deep copy of self, offsetting by boundary as we go
|
37
39
|
(0..self.coordinates.length - 1).each do |i|
|
38
|
-
b.
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
40
|
+
b.set(i,
|
41
|
+
[(self.x_for(i) + boundary.x_for(0)),
|
42
|
+
(self.y_for(i) + boundary.y_for(0)),
|
43
|
+
self.width_for(i),
|
44
|
+
self.height_for(i)]
|
45
|
+
)
|
43
46
|
end
|
44
47
|
b.complete = self.complete
|
45
48
|
b
|
@@ -76,4 +79,16 @@ class Sqed::Boundaries
|
|
76
79
|
@coordinates[index][3]
|
77
80
|
end
|
78
81
|
|
82
|
+
def set(index, coordinates)
|
83
|
+
@coordinates[index] = coordinates
|
84
|
+
end
|
85
|
+
|
86
|
+
def populated?
|
87
|
+
@coordinates.each do |c|
|
88
|
+
return false if c[0].nil?
|
89
|
+
end
|
90
|
+
true
|
91
|
+
end
|
92
|
+
|
93
|
+
|
79
94
|
end
|
@@ -19,15 +19,15 @@ class Sqed::BoundaryFinder::ColorLineFinder < Sqed::BoundaryFinder
|
|
19
19
|
when :vertical_split # can vertical and horizontal split be re-used to do cross cases?
|
20
20
|
t = Sqed::BoundaryFinder.color_boundary_finder(image: img, boundary_color: @boundary_color) #detect vertical division, green line
|
21
21
|
return if t.nil?
|
22
|
-
boundaries.
|
23
|
-
boundaries.
|
22
|
+
boundaries.set(0, [0, 0, t[0], img.rows]) # left section of image
|
23
|
+
boundaries.set(1, [t[2], 0, img.columns - t[2], img.rows]) # right section of image
|
24
24
|
boundaries.complete = true
|
25
25
|
|
26
26
|
when :horizontal_split
|
27
27
|
t = Sqed::BoundaryFinder.color_boundary_finder(image: img, scan: :columns, boundary_color: @boundary_color) # set to detect horizontal division, (green line)
|
28
28
|
return if t.nil?
|
29
|
-
boundaries.
|
30
|
-
boundaries.
|
29
|
+
boundaries.set(0, [0, 0, img.columns, t[0]]) # upper section of image
|
30
|
+
boundaries.set(1, [0, t[2], img.columns, img.rows - t[2]]) # lower section of image
|
31
31
|
boundaries.complete = true
|
32
32
|
# boundaries.coordinates[2] = [0, 0, img.columns, t[1]] # upper section of image
|
33
33
|
# boundaries.coordinates[3] = [0, t[1], img.columns, img.rows - t[1]] # lower section of image
|
@@ -35,45 +35,52 @@ class Sqed::BoundaryFinder::ColorLineFinder < Sqed::BoundaryFinder
|
|
35
35
|
when :right_t # only 3 zones expected, with horizontal division in right-side of vertical division
|
36
36
|
t = Sqed::BoundaryFinder.color_boundary_finder(image: img, boundary_color: @boundary_color) #defaults to detect vertical division, green line
|
37
37
|
return if t.nil?
|
38
|
-
|
39
|
-
|
38
|
+
|
39
|
+
left = [0, 0, t[0], img.rows]
|
40
|
+
right = [t[2], 0, img.columns - t[2], img.rows]
|
41
|
+
|
42
|
+
boundaries.set(0, left) # left section of image
|
40
43
|
|
41
44
|
# now subdivide right side
|
42
|
-
irt = img.crop(*
|
45
|
+
irt = img.crop(*right, true)
|
43
46
|
rt = Sqed::BoundaryFinder.color_boundary_finder(image: irt, scan: :columns, boundary_color: @boundary_color) # set to detect horizontal division, (green line)
|
44
47
|
return if rt.nil?
|
45
|
-
boundaries.
|
46
|
-
boundaries.
|
48
|
+
boundaries.set(1, [t[2], 0, img.columns - t[2], rt[0]]) # upper section of image
|
49
|
+
boundaries.set(2, [t[2], rt[2], img.columns - t[2], img.rows - rt[2]]) # lower section of image
|
47
50
|
boundaries.complete = true
|
48
51
|
# will return 1, 2, or 3
|
49
52
|
|
50
53
|
when :offset_cross # 4 zones expected, with horizontal division in right- and left- sides of vertical division
|
51
54
|
t = Sqed::BoundaryFinder.color_boundary_finder(image: img, boundary_color: @boundary_color) # defaults to detect vertical division, green line
|
52
55
|
raise if t.nil?
|
53
|
-
|
54
|
-
|
56
|
+
|
57
|
+
left = [0, 0, t[0], img.rows] # left section of image
|
58
|
+
right = [t[2], 0, img.columns - t[2], img.rows] # right section of image
|
55
59
|
|
56
60
|
# now subdivide left side
|
57
|
-
ilt = img.crop(*
|
61
|
+
ilt = img.crop(*left, true)
|
58
62
|
|
59
63
|
lt = Sqed::BoundaryFinder.color_boundary_finder(image: ilt, scan: :columns, boundary_color: @boundary_color) # set to detect horizontal division, (green line)
|
64
|
+
|
60
65
|
if !lt.nil?
|
61
|
-
boundaries.
|
62
|
-
boundaries.
|
66
|
+
boundaries.set(0, [0, 0, left[2], lt[0]]) # upper section of image
|
67
|
+
boundaries.set(3, [0, lt[2], left[2], img.rows - lt[2]]) # lower section of image
|
63
68
|
end
|
64
69
|
|
65
70
|
# now subdivide right side
|
66
|
-
irt = img.crop(*
|
71
|
+
irt = img.crop(*right, true)
|
67
72
|
rt = Sqed::BoundaryFinder.color_boundary_finder(image: irt, scan: :columns, boundary_color: @boundary_color) # set to detect horizontal division, (green line)
|
68
73
|
return if rt.nil?
|
69
74
|
|
70
|
-
boundaries.
|
71
|
-
boundaries.
|
75
|
+
boundaries.set(1, [t[2], 0, img.columns - t[2], rt[0]]) # upper section of image
|
76
|
+
boundaries.set(2, [t[2], rt[2], img.columns - t[2], img.rows - rt[2]]) # lower section of image
|
72
77
|
# will return 1, 2, 3, or 4 //// does not handle staggered vertical boundary case
|
73
|
-
|
78
|
+
|
79
|
+
boundaries.complete = true if boundaries.populated?
|
74
80
|
|
75
81
|
else
|
76
|
-
boundaries.
|
82
|
+
boundaries.set(0, [0, 0, img.columns, img.rows]) # totality of image as default
|
83
|
+
# TODO: boundaries.complete status here?
|
77
84
|
return # return original image boundary if no method implemented
|
78
85
|
end
|
79
86
|
|
data/lib/sqed/boundary_finder.rb
CHANGED
@@ -52,9 +52,9 @@ class Sqed::BoundaryFinder
|
|
52
52
|
(0..samples_to_take).each do |s|
|
53
53
|
# Create a sample image a single pixel tall
|
54
54
|
if scan == :rows
|
55
|
-
j = image.crop(0, s * sample_subdivision_size, image.columns, 1)
|
55
|
+
j = image.crop(0, s * sample_subdivision_size, image.columns, 1, true)
|
56
56
|
elsif scan == :columns
|
57
|
-
j = image.crop(s * sample_subdivision_size, 0, 1, image.rows)
|
57
|
+
j = image.crop(s * sample_subdivision_size, 0, 1, image.rows, true)
|
58
58
|
else
|
59
59
|
raise
|
60
60
|
end
|
data/lib/sqed/extractor.rb
CHANGED
@@ -1,45 +1,61 @@
|
|
1
1
|
require 'RMagick'
|
2
2
|
|
3
|
-
# An Extractor takes Boundries object and a
|
3
|
+
# An Extractor takes Boundries object and a metadata_map pattern and returns a Sqed::Result
|
4
4
|
#
|
5
5
|
class Sqed::Extractor
|
6
6
|
|
7
|
-
|
7
|
+
# a Sqed::Boundaries instance
|
8
|
+
attr_accessor :boundaries
|
8
9
|
|
9
|
-
|
10
|
-
|
11
|
-
|
10
|
+
# a metadata_map hash from EXTRACTION_PATTERNS like:
|
11
|
+
# {0 => :annotated_specimen, 1 => :identifier, 2 =>:image_registration }
|
12
|
+
attr_accessor :metadata_map
|
12
13
|
|
13
|
-
|
14
|
+
# a Magick::Image file
|
15
|
+
attr_accessor :image
|
16
|
+
|
17
|
+
def initialize(boundaries: boundaries, metadata_map: metadata_map, image: image)
|
18
|
+
raise 'boundaries not provided or provided boundary is not a Sqed::Boundaries' if boundaries.nil? || !boundaries.class == Sqed::Boundaries
|
19
|
+
raise 'metadata_map not provided or metadata_map not a Hash' if metadata_map.nil? || !metadata_map.class == Hash
|
20
|
+
raise 'image not provided' if image.nil? || !image.class == Magick::Image
|
21
|
+
|
22
|
+
@metadata_map = metadata_map
|
14
23
|
@boundaries = boundaries
|
15
24
|
@image = image
|
16
25
|
end
|
17
26
|
|
18
27
|
def result
|
19
28
|
r = Sqed::Result.new()
|
20
|
-
|
29
|
+
|
21
30
|
# assign the images to the result
|
22
|
-
boundaries.each do |
|
23
|
-
|
31
|
+
boundaries.each do |section_index, coords|
|
32
|
+
image_setter = "#{metadata_map[section_index]}_image="
|
33
|
+
r.send(image_setter, extract_image(coords))
|
24
34
|
end
|
25
35
|
|
26
36
|
# assign the metadata to the result
|
27
|
-
|
37
|
+
metadata_map.each do |section_index, section_type|
|
28
38
|
# only extract data if a parser exists
|
29
|
-
if
|
30
|
-
|
39
|
+
if parsers = SqedConfig::SECTION_PARSERS[section_type]
|
40
|
+
|
41
|
+
section_image = r.send("#{section_type}_image")
|
42
|
+
updated = r.send(section_type)
|
43
|
+
|
44
|
+
parsers.each do |p|
|
45
|
+
parsed_result = p.new(section_image).text
|
46
|
+
updated.merge!(p::TYPE => parsed_result) if parsed_result
|
47
|
+
end
|
48
|
+
|
49
|
+
r.send("#{section_type}=", updated)
|
31
50
|
end
|
32
51
|
end
|
33
52
|
|
34
53
|
r
|
35
54
|
end
|
36
55
|
|
37
|
-
#
|
56
|
+
# crop takes x, y, width, height
|
38
57
|
def extract_image(coords)
|
39
|
-
|
40
|
-
# @image.crop(coords[0], coords[1], coords[2] - coords[0], coords[3] - coords[1] )
|
41
|
-
bp = 0
|
42
|
-
@image.crop(coords[0], coords[1], coords[2], coords[3], true)
|
58
|
+
i = @image.crop(*coords, true)
|
43
59
|
end
|
44
60
|
|
45
61
|
end
|
@@ -1,27 +1,43 @@
|
|
1
1
|
# Given an image, return an ordered array of detectable barcodes
|
2
2
|
|
3
|
+
|
4
|
+
|
3
5
|
class Sqed::Parser::BarcodeParser < Sqed::Parser
|
4
|
-
|
6
|
+
|
7
|
+
TYPE = :barcode
|
8
|
+
|
9
|
+
attr_accessor :image
|
10
|
+
|
11
|
+
attr_accessor :barcode
|
5
12
|
|
6
13
|
def initialize(image)
|
7
14
|
super
|
8
|
-
@
|
15
|
+
@image = image
|
9
16
|
end
|
10
17
|
|
11
|
-
def
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
18
|
+
def barcode
|
19
|
+
@barcode ||= get_barcode
|
20
|
+
@barcode
|
21
|
+
end
|
22
|
+
|
23
|
+
# Uses the same enging as zbarimg that you can install with brew (zbarimg)
|
24
|
+
#
|
25
|
+
def get_code_128
|
26
|
+
ZXing.decode @image.filename
|
27
|
+
end
|
28
|
+
|
29
|
+
# try a bunch of options, organized by most common, give the first hit
|
30
|
+
def get_barcode
|
31
|
+
[get_code_128].compact.first
|
32
|
+
end
|
33
|
+
|
34
|
+
#def get_datamatrix
|
35
|
+
# https://github.com/srijan/ruby-dmtx
|
36
|
+
#end
|
37
|
+
|
38
|
+
# alias to a universal method
|
39
|
+
def text
|
40
|
+
barcode
|
25
41
|
end
|
26
42
|
|
27
43
|
end
|
@@ -7,8 +7,13 @@
|
|
7
7
|
require 'rtesseract'
|
8
8
|
|
9
9
|
class Sqed::Parser::OcrParser < Sqed::Parser
|
10
|
-
attr_accessor :text
|
11
10
|
|
11
|
+
TYPE = :text
|
12
|
+
|
13
|
+
# the text extracted from the image
|
14
|
+
attr_accessor :text
|
15
|
+
|
16
|
+
# https://code.google.com/p/tesseract-ocr/wiki/FAQ
|
12
17
|
def text
|
13
18
|
img = @image #.white_threshold(245)
|
14
19
|
|
@@ -19,9 +24,6 @@ class Sqed::Parser::OcrParser < Sqed::Parser
|
|
19
24
|
# img = img.scale(2)
|
20
25
|
# img.write('foo0.jpg.jpg')
|
21
26
|
# img = img.enhance
|
22
|
-
# img = img.enhance
|
23
|
-
# img = img.enhance
|
24
|
-
# img = img.enhance
|
25
27
|
# img.write('foo1.jpg')
|
26
28
|
# img = img.quantize(8, Magick::GRAYColorspace)
|
27
29
|
# img.write('foo1.jpg')
|
@@ -39,12 +41,28 @@ class Sqed::Parser::OcrParser < Sqed::Parser
|
|
39
41
|
#
|
40
42
|
# img.write('foo.jpg') # for debugging purposes, this is the image that is sent to OCR
|
41
43
|
|
42
|
-
r = RTesseract.new(img, lang: 'eng', psm: 3)
|
43
44
|
|
45
|
+
# From https://code.google.com/p/tesseract-ocr/wiki/FAQ
|
46
|
+
# " There is a minimum text size for reasonable accuracy. You have to consider resolution as well as point size. Accuracy drops off below 10pt x 300dpi, rapidly below 8pt x 300dpi. A quick check is to count the pixels of the x-height of your characters. (X-height is the height of the lower case x.) At 10pt x 300dpi x-heights are typically about 20 pixels, although this can vary dramatically from font to font. Below an x-height of 10 pixels, you have very little chance of accurate results, and below about 8 pixels, most of the text will be "noise removed".
|
47
|
+
|
48
|
+
|
49
|
+
# http://www.sk-spell.sk.cx/tesseract-ocr-parameters-in-302-version
|
50
|
+
# doesn't supprot outputbase
|
51
|
+
r = RTesseract.new(img, lang: 'eng', psm: 1,
|
52
|
+
load_system_dawg: 0,
|
53
|
+
tessedit_debug_quality_metrics: 1,
|
54
|
+
load_freq_dawg: 1 ,
|
55
|
+
chop_enable: 1,
|
56
|
+
tessedit_write_images: 1,
|
57
|
+
equationdetect_save_merged_image: 1,
|
58
|
+
tessedit_dump_pageseg_images: 1,
|
59
|
+
equationdetect_save_bi_image: 1,
|
60
|
+
load_unambig_dawg: 0,
|
61
|
+
tessedit_write_params_to_file: 'tmp/ocr_config_file.txt' ) # psm: 3,
|
44
62
|
|
45
63
|
# img = img.white_threshold(245)
|
46
64
|
|
47
|
-
@text = r.to_s
|
65
|
+
@text = r.to_s.strip
|
48
66
|
end
|
49
67
|
|
50
68
|
# Need to provide tuning methods here, i.e. image transormations that facilitate OCR
|
data/lib/sqed/parser.rb
CHANGED
data/lib/sqed/result.rb
CHANGED
@@ -1,15 +1,63 @@
|
|
1
|
-
|
2
|
-
#
|
3
|
-
# full process of data extraction from an image.
|
4
|
-
#
|
5
|
-
#
|
1
|
+
# A Sqed::Result is a container for the results of the
|
2
|
+
# the data extraction for the full stage
|
6
3
|
#
|
7
4
|
class Sqed::Result
|
8
5
|
|
9
6
|
SqedConfig::LAYOUT_SECTION_TYPES.each do |k|
|
10
|
-
attr_accessor k
|
11
7
|
attr_accessor "#{k}_image".to_sym
|
8
|
+
attr_accessor k
|
9
|
+
end
|
10
|
+
|
11
|
+
def initialize
|
12
|
+
SqedConfig::LAYOUT_SECTION_TYPES.each do |k|
|
13
|
+
send("#{k}=", {})
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
# return [String, nil]
|
18
|
+
# the text derived from the OCR parsing of the section
|
19
|
+
def text_for(section)
|
20
|
+
send(section)[:text]
|
12
21
|
end
|
22
|
+
|
23
|
+
# return [String, nil]
|
24
|
+
# the text derived from the barcode parsing of the section
|
25
|
+
def barcode_text_for(section)
|
26
|
+
send(section)[:barcode]
|
27
|
+
end
|
28
|
+
|
29
|
+
# return [Hash]
|
30
|
+
# a map of layout_section_type => value (if there is a value),
|
31
|
+
# i.e. all possible parsed text values returned from the parser
|
32
|
+
def text
|
33
|
+
result = {}
|
34
|
+
SqedConfig::LAYOUT_SECTION_TYPES.each do |k|
|
35
|
+
v = self.send(k)
|
36
|
+
result.merge!(k => v) if v[:barcode] || v[:text]
|
37
|
+
end
|
38
|
+
result
|
39
|
+
end
|
40
|
+
|
41
|
+
# return [Hash]
|
42
|
+
# a map of layout_section_type => Image
|
43
|
+
def images
|
44
|
+
result = {}
|
45
|
+
SqedConfig::LAYOUT_SECTION_TYPES.each do |k|
|
46
|
+
image = self.send("#{k}_image")
|
47
|
+
result.merge!(k => image) if image
|
48
|
+
end
|
49
|
+
result
|
50
|
+
end
|
51
|
+
|
52
|
+
# return [True]
|
53
|
+
# write the images in #images to tmp/
|
54
|
+
def write_images
|
55
|
+
images.each do |k, img|
|
56
|
+
img.write("tmp/#{k}.jpg")
|
57
|
+
end
|
58
|
+
true
|
59
|
+
end
|
60
|
+
|
13
61
|
end
|
14
62
|
|
15
63
|
|
data/lib/sqed/version.rb
CHANGED
data/lib/sqed.rb
CHANGED
@@ -4,7 +4,7 @@ recent_ruby = RUBY_VERSION >= '2.1.1'
|
|
4
4
|
raise "IMPORTANT: sqed gem requires ruby >= 2.1.1" unless recent_ruby
|
5
5
|
|
6
6
|
require "RMagick"
|
7
|
-
|
7
|
+
include Magick
|
8
8
|
|
9
9
|
# Instants take the following
|
10
10
|
# 1) A base image @image
|
@@ -28,19 +28,27 @@ class Sqed
|
|
28
28
|
attr_accessor :pattern
|
29
29
|
|
30
30
|
# the image that is the cropped content for parsing
|
31
|
-
attr_accessor :stage_image
|
31
|
+
attr_accessor :stage_image
|
32
|
+
|
33
|
+
# a Sqed::Boundaries instance that stores the coordinates of the stage
|
34
|
+
attr_accessor :stage_boundary
|
35
|
+
|
36
|
+
# a Sqed::Boundaries instances that contains the coordinates of the interan stage sections
|
37
|
+
attr_accessor :boundaries
|
38
|
+
|
39
|
+
# Boolean, whether to detect the border on initialization, i.e. new()
|
40
|
+
attr_accessor :auto_detect_border
|
41
|
+
|
42
|
+
# a symbol, :red, :green, :blue, describing the boundary color within the stage
|
43
|
+
attr_accessor :boundary_color
|
32
44
|
|
33
45
|
def initialize(image: image, pattern: pattern, auto_detect_border: true, boundary_color: :green)
|
34
46
|
@image = image
|
35
|
-
|
36
47
|
@boundaries = nil
|
37
|
-
@stage_boundary = Sqed::Boundaries.new(:internal_box)
|
38
|
-
|
48
|
+
@stage_boundary = Sqed::Boundaries.new(:internal_box)
|
39
49
|
@auto_detect_border = auto_detect_border
|
40
|
-
|
41
50
|
@pattern = pattern
|
42
51
|
@pattern ||= :standard_cross
|
43
|
-
|
44
52
|
@boundary_color = boundary_color
|
45
53
|
|
46
54
|
set_stage_boundary if @auto_detect_border && @image
|
@@ -60,6 +68,8 @@ class Sqed
|
|
60
68
|
@boundaries
|
61
69
|
end
|
62
70
|
|
71
|
+
# Return [Sqed::Boundaries instance]
|
72
|
+
# a boundaries instance that has the original image (prior to cropping stage) coordinates
|
63
73
|
def native_boundaries
|
64
74
|
# check for @boundaries.complete first? OR handle partial detections ?!
|
65
75
|
if @boundaries.complete
|
@@ -69,14 +79,18 @@ class Sqed
|
|
69
79
|
end
|
70
80
|
end
|
71
81
|
|
82
|
+
# return [Image]
|
83
|
+
# crops the image if not already done
|
72
84
|
def stage_image
|
73
85
|
crop_image if @stage_boundary.complete && @stage_image.nil?
|
74
86
|
@stage_image
|
75
87
|
end
|
76
88
|
|
89
|
+
# return [Image]
|
90
|
+
# crops the stage if not done, then sets/returns @stage_image
|
77
91
|
def crop_image
|
78
92
|
if @stage_boundary.complete
|
79
|
-
@stage_image = @image.crop(*@stage_boundary.for(SqedConfig.index_for_section_type(:stage, :stage)))
|
93
|
+
@stage_image = @image.crop(*@stage_boundary.for(SqedConfig.index_for_section_type(:stage, :stage)), true)
|
80
94
|
else
|
81
95
|
@stage_image = @image
|
82
96
|
end
|
@@ -84,14 +98,24 @@ class Sqed
|
|
84
98
|
|
85
99
|
def result
|
86
100
|
return false if @image.nil? || @pattern.nil?
|
87
|
-
crop_image
|
88
101
|
extractor = Sqed::Extractor.new(
|
89
|
-
boundaries:
|
90
|
-
|
91
|
-
image:
|
102
|
+
boundaries: boundaries,
|
103
|
+
metadata_map: SqedConfig::EXTRACTION_PATTERNS[@pattern][:metadata_map],
|
104
|
+
image: stage_image)
|
92
105
|
extractor.result
|
93
106
|
end
|
94
107
|
|
108
|
+
def attributes
|
109
|
+
{
|
110
|
+
image: @image,
|
111
|
+
boundaries: @boundaries,
|
112
|
+
stage_boundary: @stage_boundary,
|
113
|
+
auto_detect_border: @auto_detect_border,
|
114
|
+
pattern: @pattern,
|
115
|
+
boundary_color: @boundary_color
|
116
|
+
}
|
117
|
+
end
|
118
|
+
|
95
119
|
protected
|
96
120
|
|
97
121
|
def set_stage_boundary
|
@@ -103,6 +127,7 @@ class Sqed
|
|
103
127
|
|
104
128
|
def get_section_boundaries
|
105
129
|
boundary_finder_class = SqedConfig::EXTRACTION_PATTERNS[@pattern][:boundary_finder]
|
130
|
+
|
106
131
|
options = {image: stage_image}
|
107
132
|
options.merge!( layout: SqedConfig::EXTRACTION_PATTERNS[@pattern][:layout] ) unless boundary_finder_class.name == 'Sqed::BoundaryFinder::CrossFinder'
|
108
133
|
options.merge!( boundary_color: @boundary_color) if boundary_finder_class.name == 'Sqed::BoundaryFinder::ColorLineFinder'
|
data/lib/sqed_config.rb
CHANGED
@@ -1,5 +1,7 @@
|
|
1
1
|
# encoding: UTF-8
|
2
2
|
|
3
|
+
require 'zxing'
|
4
|
+
|
3
5
|
require_relative "sqed/parser"
|
4
6
|
require_relative "sqed/parser/ocr_parser"
|
5
7
|
require_relative "sqed/parser/barcode_parser"
|
@@ -51,7 +53,6 @@ module SqedConfig
|
|
51
53
|
|
52
54
|
# Hash values are used to stub out
|
53
55
|
# the Sqed::Boundaries instance.
|
54
|
-
#
|
55
56
|
LAYOUTS = {
|
56
57
|
cross: [0,1,2,3],
|
57
58
|
offset_cross: [0,1,2,3],
|
@@ -62,7 +63,7 @@ module SqedConfig
|
|
62
63
|
internal_box: [0]
|
63
64
|
}
|
64
65
|
|
65
|
-
#
|
66
|
+
# Each element of the layout is a "section".
|
66
67
|
LAYOUT_SECTION_TYPES = [
|
67
68
|
:stage, # the image contains the full stage
|
68
69
|
:specimen, # the specimen only, no metadata should be present
|
@@ -70,26 +71,31 @@ module SqedConfig
|
|
70
71
|
:determination_labels, # the section contains text that determines the specimen
|
71
72
|
:labels, # the section contains collecting event and non-determination labels
|
72
73
|
:identifier, # the section contains an identifier (e.g. barcode or unique number)
|
73
|
-
:image_registration
|
74
|
+
:image_registration, # the section contains only image registration information,
|
75
|
+
:curator_metadata, # the section contains text with curator metadata
|
76
|
+
:nothing # section is empty
|
74
77
|
]
|
75
78
|
|
76
79
|
# Links section types to data parsers
|
77
80
|
SECTION_PARSERS = {
|
78
|
-
labels: Sqed::Parser::OcrParser,
|
79
|
-
identifier: Sqed::Parser::BarcodeParser,
|
80
|
-
deterimination_labels: Sqed::Parser::OcrParser
|
81
|
+
labels: [ Sqed::Parser::OcrParser ],
|
82
|
+
identifier: [ Sqed::Parser::BarcodeParser, Sqed::Parser::OcrParser ],
|
83
|
+
deterimination_labels: [ Sqed::Parser::OcrParser ],
|
84
|
+
curator_metadata: [ Sqed::Parser::OcrParser ],
|
85
|
+
specimen: [ Sqed::Parser::OcrParser ],
|
81
86
|
}
|
82
87
|
|
83
88
|
EXTRACTION_PATTERNS = {
|
84
89
|
right_t: {
|
85
90
|
boundary_finder: Sqed::BoundaryFinder::ColorLineFinder,
|
86
91
|
layout: :right_t,
|
87
|
-
metadata_map: {0 => :annotated_specimen, 1 => :
|
92
|
+
metadata_map: {0 => :annotated_specimen, 1 => :identifier, 2 =>:image_registration }
|
88
93
|
},
|
89
94
|
offset_cross: {
|
90
95
|
boundary_finder: Sqed::BoundaryFinder::ColorLineFinder,
|
91
96
|
layout: :offset_cross,
|
92
|
-
metadata_map: {0 => :
|
97
|
+
metadata_map: {0 => :curator_metadata, 1 => :identifier, 2 => :image_registration, 3 => :specimen }
|
98
|
+
# metadata_map: {0 => :annotated_specimen, 1 => :identifier, 2 =>:image_registration }
|
93
99
|
},
|
94
100
|
standard_cross: {
|
95
101
|
boundary_finder: Sqed::BoundaryFinder::CrossFinder,
|