sqed 0.0.4 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (46) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +2 -0
  3. data/README.md +2 -2
  4. data/lib/sqed/boundaries.rb +22 -7
  5. data/lib/sqed/boundary_finder/color_line_finder.rb +26 -19
  6. data/lib/sqed/boundary_finder.rb +2 -2
  7. data/lib/sqed/extractor.rb +33 -17
  8. data/lib/sqed/parser/barcode_parser.rb +32 -16
  9. data/lib/sqed/parser/ocr_parser.rb +24 -6
  10. data/lib/sqed/parser.rb +7 -0
  11. data/lib/sqed/result.rb +54 -6
  12. data/lib/sqed/version.rb +1 -1
  13. data/lib/sqed.rb +37 -12
  14. data/lib/sqed_config.rb +14 -8
  15. data/spec/lib/sqed/boundaries_spec.rb +26 -1
  16. data/spec/lib/sqed/boundary_finder_spec.rb +89 -1
  17. data/spec/lib/sqed/extractor_spec.rb +50 -76
  18. data/spec/lib/sqed/parser/barcode_spec.rb +25 -0
  19. data/spec/lib/sqed/parser/ocr_spec.rb +16 -0
  20. data/spec/lib/sqed/parser_spec.rb +6 -1
  21. data/spec/lib/sqed/result_spec.rb +24 -0
  22. data/spec/lib/sqed_spec.rb +58 -144
  23. data/spec/support/files/barcode_images/code_128_barcode.png +0 -0
  24. data/spec/support/files/barcode_images/datamatrix_barcode.png +0 -0
  25. data/spec/support/files/{2Dbarcode.png → barcode_images/osuc_datamatrix_barcode.png} +0 -0
  26. data/spec/support/files/label_images/basic1.png +0 -0
  27. data/spec/support/files/label_images/basic2.png +0 -0
  28. data/spec/support/files/label_images/readme.png +0 -0
  29. data/spec/support/files/{types_21.jpg → misc_images/types_21.jpg} +0 -0
  30. data/spec/support/files/{types_8.jpg → misc_images/types_8.jpg} +0 -0
  31. data/spec/support/files/{CrossyBlackLinesSpecimen.jpg → stage_images/CrossyBlackLinesSpecimen.jpg} +0 -0
  32. data/spec/support/files/{CrossyGreenLinesSpecimen.jpg → stage_images/CrossyGreenLinesSpecimen.jpg} +0 -0
  33. data/spec/support/files/{black_stage_green_line_specimen.jpg → stage_images/black_stage_green_line_specimen.jpg} +0 -0
  34. data/spec/support/files/{boundary_cross_green.jpg → stage_images/boundary_cross_green.jpg} +0 -0
  35. data/spec/support/files/{boundary_left_t_yellow.jpg → stage_images/boundary_left_t_yellow.jpg} +0 -0
  36. data/spec/support/files/{boundary_offset_cross_red.jpg → stage_images/boundary_offset_cross_red.jpg} +0 -0
  37. data/spec/support/files/{boundary_right_t_green.jpg → stage_images/boundary_right_t_green.jpg} +0 -0
  38. data/spec/support/files/stage_images/frost_stage.jpg +0 -0
  39. data/spec/support/files/{greenlineimage.jpg → stage_images/greenlineimage.jpg} +0 -0
  40. data/spec/support/files/test4.jpg +0 -0
  41. data/spec/support/image_helpers.rb +61 -33
  42. data/sqed.gemspec +3 -2
  43. metadata +58 -34
  44. data/spec/support/files/Quadrant_2_3.jpg +0 -0
  45. data/spec/support/files/test4OLD.jpg +0 -0
  46. data/spec/support/files/test_barcode.JPG +0 -0
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: bed58b448dbb4cd6681353d53ecf41a5d5bc86d3
4
- data.tar.gz: 2a59cd7ffd1033a230d37eca146f40c9ced0bb77
3
+ metadata.gz: 4600b58a8dca4c59c21e0fcaa08c63e7564a56f4
4
+ data.tar.gz: f1f96f8e93e988bd53ca354c4fe3a67f359b1368
5
5
  SHA512:
6
- metadata.gz: 0bf133d7ff4b70b1f064a3450b36f9db622c84d0a104e1ae965e40c23c5d8e8b1711c6724dc7af297a5bf864fecaa2d27a9c6f2ec8e40c812bdf5049b5ac8c45
7
- data.tar.gz: c7b7ad499e5a39daa8f71144fa363a8af69c5dec95842ee7de35bcf4d6e72fbebca88f4f84313cfb7c5f87e9467fe828ce79539e175057c07f4e77d6cd0875fb
6
+ metadata.gz: d7cb582b1d6db468617a132ce67a0dccab2315911eb650468ef6d0e756333cfcca710969d9d845dbe8c3149059499c486845072a053f5674412af268928e2f72
7
+ data.tar.gz: 9a24446ca6fb5caf9d7a9c436013e31cc6ceeb09e4c06f1968ee4cb5a2d544bd939d7b59e3bda023b2cfea38c4eb0cb1182e0971290738c44c8135a33d046b08
data/.gitignore CHANGED
@@ -24,4 +24,6 @@ local/
24
24
  foo*.jpg
25
25
  SessionID_BarcodeImage.JPG
26
26
 
27
+ tess*
28
+
27
29
  /*.jpg
data/README.md CHANGED
@@ -3,7 +3,7 @@
3
3
 
4
4
  # Sqed
5
5
 
6
- Stub for a gem that supports specimen digitization from images.
6
+ Sqed is a gem that faciliates metadata extraction from images of staged collection objects.
7
7
 
8
8
  ## Installation
9
9
 
@@ -21,7 +21,7 @@ Or install it yourself as:
21
21
 
22
22
  ## Usage
23
23
 
24
- TODO: Write usage instructions here
24
+ For the time being, see specs.
25
25
 
26
26
  ## Contributing
27
27
 
@@ -12,13 +12,15 @@ class Sqed::Boundaries
12
12
  # 0 => [10,10,40,40]
13
13
  attr_reader :coordinates
14
14
 
15
- # An Sqed::Config::EXTRACTION_PATTERN layout
15
+ # A symbol from Sqed::Config::LAYOUTS.keys
16
+ # :right_t
16
17
  attr_accessor :layout
17
18
 
18
- # Whether or not the last method to populate this object passed fully
19
+ # Boolean, whether or not the last method to populate this object passed fully
19
20
  attr_accessor :complete
20
21
 
21
22
  def initialize(layout = nil)
23
+ raise 'unrecognized layout' if layout && !SqedConfig::LAYOUTS.include?(layout)
22
24
  @complete = false
23
25
 
24
26
  @layout = layout
@@ -35,11 +37,12 @@ class Sqed::Boundaries
35
37
  def offset(boundary)
36
38
  b = Sqed::Boundaries.new() # the idea here is to create a deep copy of self, offsetting by boundary as we go
37
39
  (0..self.coordinates.length - 1).each do |i|
38
- b.coordinates[i] = [] # create the instance of the i-th coordinate, then populate it
39
- b.coordinates[i][0] = self.x_for(i) + boundary.x_for(0)
40
- b.coordinates[i][1] = self.y_for(i) + boundary.y_for(0)
41
- b.coordinates[i][2] = self.width_for(i)
42
- b.coordinates[i][3] = self.height_for(i)
40
+ b.set(i,
41
+ [(self.x_for(i) + boundary.x_for(0)),
42
+ (self.y_for(i) + boundary.y_for(0)),
43
+ self.width_for(i),
44
+ self.height_for(i)]
45
+ )
43
46
  end
44
47
  b.complete = self.complete
45
48
  b
@@ -76,4 +79,16 @@ class Sqed::Boundaries
76
79
  @coordinates[index][3]
77
80
  end
78
81
 
82
+ def set(index, coordinates)
83
+ @coordinates[index] = coordinates
84
+ end
85
+
86
+ def populated?
87
+ @coordinates.each do |c|
88
+ return false if c[0].nil?
89
+ end
90
+ true
91
+ end
92
+
93
+
79
94
  end
@@ -19,15 +19,15 @@ class Sqed::BoundaryFinder::ColorLineFinder < Sqed::BoundaryFinder
19
19
  when :vertical_split # can vertical and horizontal split be re-used to do cross cases?
20
20
  t = Sqed::BoundaryFinder.color_boundary_finder(image: img, boundary_color: @boundary_color) #detect vertical division, green line
21
21
  return if t.nil?
22
- boundaries.coordinates[0] = [0, 0, t[0], img.rows] # left section of image
23
- boundaries.coordinates[1] = [t[2], 0, img.columns - t[2], img.rows] # right section of image
22
+ boundaries.set(0, [0, 0, t[0], img.rows]) # left section of image
23
+ boundaries.set(1, [t[2], 0, img.columns - t[2], img.rows]) # right section of image
24
24
  boundaries.complete = true
25
25
 
26
26
  when :horizontal_split
27
27
  t = Sqed::BoundaryFinder.color_boundary_finder(image: img, scan: :columns, boundary_color: @boundary_color) # set to detect horizontal division, (green line)
28
28
  return if t.nil?
29
- boundaries.coordinates[0] = [0, 0, img.columns, t[0]] # upper section of image
30
- boundaries.coordinates[1] = [0, t[2], img.columns, img.rows - t[2]] # lower section of image
29
+ boundaries.set(0, [0, 0, img.columns, t[0]]) # upper section of image
30
+ boundaries.set(1, [0, t[2], img.columns, img.rows - t[2]]) # lower section of image
31
31
  boundaries.complete = true
32
32
  # boundaries.coordinates[2] = [0, 0, img.columns, t[1]] # upper section of image
33
33
  # boundaries.coordinates[3] = [0, t[1], img.columns, img.rows - t[1]] # lower section of image
@@ -35,45 +35,52 @@ class Sqed::BoundaryFinder::ColorLineFinder < Sqed::BoundaryFinder
35
35
  when :right_t # only 3 zones expected, with horizontal division in right-side of vertical division
36
36
  t = Sqed::BoundaryFinder.color_boundary_finder(image: img, boundary_color: @boundary_color) #defaults to detect vertical division, green line
37
37
  return if t.nil?
38
- boundaries.coordinates[0] = [0, 0, t[0], img.rows] # left section of image
39
- boundaries.coordinates[1] = [t[2], 0, img.columns - t[2], img.rows] # left section of image
38
+
39
+ left = [0, 0, t[0], img.rows]
40
+ right = [t[2], 0, img.columns - t[2], img.rows]
41
+
42
+ boundaries.set(0, left) # left section of image
40
43
 
41
44
  # now subdivide right side
42
- irt = img.crop(*boundaries.coordinates[1], true)
45
+ irt = img.crop(*right, true)
43
46
  rt = Sqed::BoundaryFinder.color_boundary_finder(image: irt, scan: :columns, boundary_color: @boundary_color) # set to detect horizontal division, (green line)
44
47
  return if rt.nil?
45
- boundaries.coordinates[1] = [t[2], 0, img.columns - t[2], rt[0]] # upper section of image
46
- boundaries.coordinates[2] = [t[2], rt[2], img.columns - t[2], img.rows - rt[2]] # lower section of image
48
+ boundaries.set(1, [t[2], 0, img.columns - t[2], rt[0]]) # upper section of image
49
+ boundaries.set(2, [t[2], rt[2], img.columns - t[2], img.rows - rt[2]]) # lower section of image
47
50
  boundaries.complete = true
48
51
  # will return 1, 2, or 3
49
52
 
50
53
  when :offset_cross # 4 zones expected, with horizontal division in right- and left- sides of vertical division
51
54
  t = Sqed::BoundaryFinder.color_boundary_finder(image: img, boundary_color: @boundary_color) # defaults to detect vertical division, green line
52
55
  raise if t.nil?
53
- boundaries.coordinates[0] = [0, 0, t[0], img.rows] # left section of image
54
- boundaries.coordinates[1] = [t[2], 0, img.columns - t[2], img.rows] # right section of image
56
+
57
+ left = [0, 0, t[0], img.rows] # left section of image
58
+ right = [t[2], 0, img.columns - t[2], img.rows] # right section of image
55
59
 
56
60
  # now subdivide left side
57
- ilt = img.crop(*boundaries.coordinates[0], true)
61
+ ilt = img.crop(*left, true)
58
62
 
59
63
  lt = Sqed::BoundaryFinder.color_boundary_finder(image: ilt, scan: :columns, boundary_color: @boundary_color) # set to detect horizontal division, (green line)
64
+
60
65
  if !lt.nil?
61
- boundaries.coordinates[0] = [0, 0, t[0], lt[0]] # upper section of image
62
- boundaries.coordinates[3] = [0, lt[2], t[0], img.rows - lt[2]] # lower section of image
66
+ boundaries.set(0, [0, 0, left[2], lt[0]]) # upper section of image
67
+ boundaries.set(3, [0, lt[2], left[2], img.rows - lt[2]]) # lower section of image
63
68
  end
64
69
 
65
70
  # now subdivide right side
66
- irt = img.crop(*boundaries.coordinates[1], true)
71
+ irt = img.crop(*right, true)
67
72
  rt = Sqed::BoundaryFinder.color_boundary_finder(image: irt, scan: :columns, boundary_color: @boundary_color) # set to detect horizontal division, (green line)
68
73
  return if rt.nil?
69
74
 
70
- boundaries.coordinates[1] = [t[2], 0, img.columns - t[2], rt[0]] # upper section of image
71
- boundaries.coordinates[2] = [t[2], rt[2], img.columns - t[2], img.rows - rt[2]] # lower section of image
75
+ boundaries.set(1, [t[2], 0, img.columns - t[2], rt[0]]) # upper section of image
76
+ boundaries.set(2, [t[2], rt[2], img.columns - t[2], img.rows - rt[2]]) # lower section of image
72
77
  # will return 1, 2, 3, or 4 //// does not handle staggered vertical boundary case
73
- boundaries.complete = true
78
+
79
+ boundaries.complete = true if boundaries.populated?
74
80
 
75
81
  else
76
- boundaries.coordinates[0] = [0, 0, img.columns, img.rows] # totality of image as default
82
+ boundaries.set(0, [0, 0, img.columns, img.rows]) # totality of image as default
83
+ # TODO: boundaries.complete status here?
77
84
  return # return original image boundary if no method implemented
78
85
  end
79
86
 
@@ -52,9 +52,9 @@ class Sqed::BoundaryFinder
52
52
  (0..samples_to_take).each do |s|
53
53
  # Create a sample image a single pixel tall
54
54
  if scan == :rows
55
- j = image.crop(0, s * sample_subdivision_size, image.columns, 1)
55
+ j = image.crop(0, s * sample_subdivision_size, image.columns, 1, true)
56
56
  elsif scan == :columns
57
- j = image.crop(s * sample_subdivision_size, 0, 1, image.rows)
57
+ j = image.crop(s * sample_subdivision_size, 0, 1, image.rows, true)
58
58
  else
59
59
  raise
60
60
  end
@@ -1,45 +1,61 @@
1
1
  require 'RMagick'
2
2
 
3
- # An Extractor takes Boundries object and a layout pattern and returns a Sqed::Result
3
+ # An Extractor takes Boundries object and a metadata_map pattern and returns a Sqed::Result
4
4
  #
5
5
  class Sqed::Extractor
6
6
 
7
- attr_accessor :boundaries, :layout, :image
7
+ # a Sqed::Boundaries instance
8
+ attr_accessor :boundaries
8
9
 
9
- def initialize(boundaries: boundaries, layout: layout, image: image)
10
- raise if boundaries.nil? || !boundaries.class == Sqed::Boundaries
11
- raise if layout.nil? || !layout.class == Hash
10
+ # a metadata_map hash from EXTRACTION_PATTERNS like:
11
+ # {0 => :annotated_specimen, 1 => :identifier, 2 =>:image_registration }
12
+ attr_accessor :metadata_map
12
13
 
13
- @layout = layout
14
+ # a Magick::Image file
15
+ attr_accessor :image
16
+
17
+ def initialize(boundaries: boundaries, metadata_map: metadata_map, image: image)
18
+ raise 'boundaries not provided or provided boundary is not a Sqed::Boundaries' if boundaries.nil? || !boundaries.class == Sqed::Boundaries
19
+ raise 'metadata_map not provided or metadata_map not a Hash' if metadata_map.nil? || !metadata_map.class == Hash
20
+ raise 'image not provided' if image.nil? || !image.class == Magick::Image
21
+
22
+ @metadata_map = metadata_map
14
23
  @boundaries = boundaries
15
24
  @image = image
16
25
  end
17
26
 
18
27
  def result
19
28
  r = Sqed::Result.new()
20
-
29
+
21
30
  # assign the images to the result
22
- boundaries.each do |section, coords|
23
- r.send("#{LAYOUT_SECTION_TYPES[section]}=", extract_image(coords))
31
+ boundaries.each do |section_index, coords|
32
+ image_setter = "#{metadata_map[section_index]}_image="
33
+ r.send(image_setter, extract_image(coords))
24
34
  end
25
35
 
26
36
  # assign the metadata to the result
27
- layout.keys.each do |section_index, section_type|
37
+ metadata_map.each do |section_index, section_type|
28
38
  # only extract data if a parser exists
29
- if parser = SECTION_PARSERS[section_type]
30
- r.send("#{section_type}=", parser.new(image: r.send(section_type + "_image").text) )
39
+ if parsers = SqedConfig::SECTION_PARSERS[section_type]
40
+
41
+ section_image = r.send("#{section_type}_image")
42
+ updated = r.send(section_type)
43
+
44
+ parsers.each do |p|
45
+ parsed_result = p.new(section_image).text
46
+ updated.merge!(p::TYPE => parsed_result) if parsed_result
47
+ end
48
+
49
+ r.send("#{section_type}=", updated)
31
50
  end
32
51
  end
33
52
 
34
53
  r
35
54
  end
36
55
 
37
- # coords are x1, y1, x2, y2
56
+ # crop takes x, y, width, height
38
57
  def extract_image(coords)
39
- # crop takes x, y, width, height
40
- # @image.crop(coords[0], coords[1], coords[2] - coords[0], coords[3] - coords[1] )
41
- bp = 0
42
- @image.crop(coords[0], coords[1], coords[2], coords[3], true)
58
+ i = @image.crop(*coords, true)
43
59
  end
44
60
 
45
61
  end
@@ -1,27 +1,43 @@
1
1
  # Given an image, return an ordered array of detectable barcodes
2
2
 
3
+
4
+
3
5
  class Sqed::Parser::BarcodeParser < Sqed::Parser
4
- attr_accessor :barcodes
6
+
7
+ TYPE = :barcode
8
+
9
+ attr_accessor :image
10
+
11
+ attr_accessor :barcode
5
12
 
6
13
  def initialize(image)
7
14
  super
8
- @barcodes = bar_codes
15
+ @image = image
9
16
  end
10
17
 
11
- def bar_codes
12
- # process the images, spit out the barcodes
13
- # return ZXing.decode_all(@image) #['ABC 123', 'DEF 456']
14
- # a = `/usr/local/Cellar/zbar/0.10_1/bin/zbarimg ~/src/sqed/spec/support/files/test_barcode.JPG`
15
- # b = a.split("\n")
16
- f = 'SessionID_BarcodeImage.JPG'
17
- i = @image[:image]
18
- if i.nil?
19
- i = @image
20
- end
21
- i.write("tmp/#{f}")
22
- c = `/usr/local/Cellar/zbar/0.10_1/bin/zbarimg #{f}`
23
- d = c.split("\n")
24
- return d
18
+ def barcode
19
+ @barcode ||= get_barcode
20
+ @barcode
21
+ end
22
+
23
+ # Uses the same enging as zbarimg that you can install with brew (zbarimg)
24
+ #
25
+ def get_code_128
26
+ ZXing.decode @image.filename
27
+ end
28
+
29
+ # try a bunch of options, organized by most common, give the first hit
30
+ def get_barcode
31
+ [get_code_128].compact.first
32
+ end
33
+
34
+ #def get_datamatrix
35
+ # https://github.com/srijan/ruby-dmtx
36
+ #end
37
+
38
+ # alias to a universal method
39
+ def text
40
+ barcode
25
41
  end
26
42
 
27
43
  end
@@ -7,8 +7,13 @@
7
7
  require 'rtesseract'
8
8
 
9
9
  class Sqed::Parser::OcrParser < Sqed::Parser
10
- attr_accessor :text
11
10
 
11
+ TYPE = :text
12
+
13
+ # the text extracted from the image
14
+ attr_accessor :text
15
+
16
+ # https://code.google.com/p/tesseract-ocr/wiki/FAQ
12
17
  def text
13
18
  img = @image #.white_threshold(245)
14
19
 
@@ -19,9 +24,6 @@ class Sqed::Parser::OcrParser < Sqed::Parser
19
24
  # img = img.scale(2)
20
25
  # img.write('foo0.jpg.jpg')
21
26
  # img = img.enhance
22
- # img = img.enhance
23
- # img = img.enhance
24
- # img = img.enhance
25
27
  # img.write('foo1.jpg')
26
28
  # img = img.quantize(8, Magick::GRAYColorspace)
27
29
  # img.write('foo1.jpg')
@@ -39,12 +41,28 @@ class Sqed::Parser::OcrParser < Sqed::Parser
39
41
  #
40
42
  # img.write('foo.jpg') # for debugging purposes, this is the image that is sent to OCR
41
43
 
42
- r = RTesseract.new(img, lang: 'eng', psm: 3)
43
44
 
45
+ # From https://code.google.com/p/tesseract-ocr/wiki/FAQ
46
+ # " There is a minimum text size for reasonable accuracy. You have to consider resolution as well as point size. Accuracy drops off below 10pt x 300dpi, rapidly below 8pt x 300dpi. A quick check is to count the pixels of the x-height of your characters. (X-height is the height of the lower case x.) At 10pt x 300dpi x-heights are typically about 20 pixels, although this can vary dramatically from font to font. Below an x-height of 10 pixels, you have very little chance of accurate results, and below about 8 pixels, most of the text will be "noise removed".
47
+
48
+
49
+ # http://www.sk-spell.sk.cx/tesseract-ocr-parameters-in-302-version
50
+ # doesn't supprot outputbase
51
+ r = RTesseract.new(img, lang: 'eng', psm: 1,
52
+ load_system_dawg: 0,
53
+ tessedit_debug_quality_metrics: 1,
54
+ load_freq_dawg: 1 ,
55
+ chop_enable: 1,
56
+ tessedit_write_images: 1,
57
+ equationdetect_save_merged_image: 1,
58
+ tessedit_dump_pageseg_images: 1,
59
+ equationdetect_save_bi_image: 1,
60
+ load_unambig_dawg: 0,
61
+ tessedit_write_params_to_file: 'tmp/ocr_config_file.txt' ) # psm: 3,
44
62
 
45
63
  # img = img.white_threshold(245)
46
64
 
47
- @text = r.to_s
65
+ @text = r.to_s.strip
48
66
  end
49
67
 
50
68
  # Need to provide tuning methods here, i.e. image transormations that facilitate OCR
data/lib/sqed/parser.rb CHANGED
@@ -7,5 +7,12 @@ class Sqed::Parser
7
7
 
8
8
  def initialize(image)
9
9
  @image = image
10
+ raise 'no image provided to parser' if @image && !(@image.class.name == 'Magick::Image')
10
11
  end
12
+
13
+ # must be provided in subclasses
14
+ def text
15
+ nil
16
+ end
17
+
11
18
  end
data/lib/sqed/result.rb CHANGED
@@ -1,15 +1,63 @@
1
-
2
- # A Sqed::Result is a wrapper for the results of the
3
- # full process of data extraction from an image.
4
- #
5
- #
1
+ # A Sqed::Result is a container for the results of the
2
+ # the data extraction for the full stage
6
3
  #
7
4
  class Sqed::Result
8
5
 
9
6
  SqedConfig::LAYOUT_SECTION_TYPES.each do |k|
10
- attr_accessor k
11
7
  attr_accessor "#{k}_image".to_sym
8
+ attr_accessor k
9
+ end
10
+
11
+ def initialize
12
+ SqedConfig::LAYOUT_SECTION_TYPES.each do |k|
13
+ send("#{k}=", {})
14
+ end
15
+ end
16
+
17
+ # return [String, nil]
18
+ # the text derived from the OCR parsing of the section
19
+ def text_for(section)
20
+ send(section)[:text]
12
21
  end
22
+
23
+ # return [String, nil]
24
+ # the text derived from the barcode parsing of the section
25
+ def barcode_text_for(section)
26
+ send(section)[:barcode]
27
+ end
28
+
29
+ # return [Hash]
30
+ # a map of layout_section_type => value (if there is a value),
31
+ # i.e. all possible parsed text values returned from the parser
32
+ def text
33
+ result = {}
34
+ SqedConfig::LAYOUT_SECTION_TYPES.each do |k|
35
+ v = self.send(k)
36
+ result.merge!(k => v) if v[:barcode] || v[:text]
37
+ end
38
+ result
39
+ end
40
+
41
+ # return [Hash]
42
+ # a map of layout_section_type => Image
43
+ def images
44
+ result = {}
45
+ SqedConfig::LAYOUT_SECTION_TYPES.each do |k|
46
+ image = self.send("#{k}_image")
47
+ result.merge!(k => image) if image
48
+ end
49
+ result
50
+ end
51
+
52
+ # return [True]
53
+ # write the images in #images to tmp/
54
+ def write_images
55
+ images.each do |k, img|
56
+ img.write("tmp/#{k}.jpg")
57
+ end
58
+ true
59
+ end
60
+
13
61
  end
14
62
 
15
63
 
data/lib/sqed/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  class Sqed
2
- VERSION = "0.0.4"
2
+ VERSION = "0.1.0"
3
3
  end
data/lib/sqed.rb CHANGED
@@ -4,7 +4,7 @@ recent_ruby = RUBY_VERSION >= '2.1.1'
4
4
  raise "IMPORTANT: sqed gem requires ruby >= 2.1.1" unless recent_ruby
5
5
 
6
6
  require "RMagick"
7
- # include Magick
7
+ include Magick
8
8
 
9
9
  # Instants take the following
10
10
  # 1) A base image @image
@@ -28,19 +28,27 @@ class Sqed
28
28
  attr_accessor :pattern
29
29
 
30
30
  # the image that is the cropped content for parsing
31
- attr_accessor :stage_image, :stage_boundary, :boundaries, :auto_detect_border, :boundary_color
31
+ attr_accessor :stage_image
32
+
33
+ # a Sqed::Boundaries instance that stores the coordinates of the stage
34
+ attr_accessor :stage_boundary
35
+
36
+ # a Sqed::Boundaries instances that contains the coordinates of the interan stage sections
37
+ attr_accessor :boundaries
38
+
39
+ # Boolean, whether to detect the border on initialization, i.e. new()
40
+ attr_accessor :auto_detect_border
41
+
42
+ # a symbol, :red, :green, :blue, describing the boundary color within the stage
43
+ attr_accessor :boundary_color
32
44
 
33
45
  def initialize(image: image, pattern: pattern, auto_detect_border: true, boundary_color: :green)
34
46
  @image = image
35
-
36
47
  @boundaries = nil
37
- @stage_boundary = Sqed::Boundaries.new(:internal_box) # a.k.a. stage
38
-
48
+ @stage_boundary = Sqed::Boundaries.new(:internal_box)
39
49
  @auto_detect_border = auto_detect_border
40
-
41
50
  @pattern = pattern
42
51
  @pattern ||= :standard_cross
43
-
44
52
  @boundary_color = boundary_color
45
53
 
46
54
  set_stage_boundary if @auto_detect_border && @image
@@ -60,6 +68,8 @@ class Sqed
60
68
  @boundaries
61
69
  end
62
70
 
71
+ # Return [Sqed::Boundaries instance]
72
+ # a boundaries instance that has the original image (prior to cropping stage) coordinates
63
73
  def native_boundaries
64
74
  # check for @boundaries.complete first? OR handle partial detections ?!
65
75
  if @boundaries.complete
@@ -69,14 +79,18 @@ class Sqed
69
79
  end
70
80
  end
71
81
 
82
+ # return [Image]
83
+ # crops the image if not already done
72
84
  def stage_image
73
85
  crop_image if @stage_boundary.complete && @stage_image.nil?
74
86
  @stage_image
75
87
  end
76
88
 
89
+ # return [Image]
90
+ # crops the stage if not done, then sets/returns @stage_image
77
91
  def crop_image
78
92
  if @stage_boundary.complete
79
- @stage_image = @image.crop(*@stage_boundary.for(SqedConfig.index_for_section_type(:stage, :stage)))
93
+ @stage_image = @image.crop(*@stage_boundary.for(SqedConfig.index_for_section_type(:stage, :stage)), true)
80
94
  else
81
95
  @stage_image = @image
82
96
  end
@@ -84,14 +98,24 @@ class Sqed
84
98
 
85
99
  def result
86
100
  return false if @image.nil? || @pattern.nil?
87
- crop_image
88
101
  extractor = Sqed::Extractor.new(
89
- boundaries: @boundaries,
90
- layout: SqedConfig::EXTRACTION_PATTERNS[@pattern][:layout],
91
- image: @stage_image)
102
+ boundaries: boundaries,
103
+ metadata_map: SqedConfig::EXTRACTION_PATTERNS[@pattern][:metadata_map],
104
+ image: stage_image)
92
105
  extractor.result
93
106
  end
94
107
 
108
+ def attributes
109
+ {
110
+ image: @image,
111
+ boundaries: @boundaries,
112
+ stage_boundary: @stage_boundary,
113
+ auto_detect_border: @auto_detect_border,
114
+ pattern: @pattern,
115
+ boundary_color: @boundary_color
116
+ }
117
+ end
118
+
95
119
  protected
96
120
 
97
121
  def set_stage_boundary
@@ -103,6 +127,7 @@ class Sqed
103
127
 
104
128
  def get_section_boundaries
105
129
  boundary_finder_class = SqedConfig::EXTRACTION_PATTERNS[@pattern][:boundary_finder]
130
+
106
131
  options = {image: stage_image}
107
132
  options.merge!( layout: SqedConfig::EXTRACTION_PATTERNS[@pattern][:layout] ) unless boundary_finder_class.name == 'Sqed::BoundaryFinder::CrossFinder'
108
133
  options.merge!( boundary_color: @boundary_color) if boundary_finder_class.name == 'Sqed::BoundaryFinder::ColorLineFinder'
data/lib/sqed_config.rb CHANGED
@@ -1,5 +1,7 @@
1
1
  # encoding: UTF-8
2
2
 
3
+ require 'zxing'
4
+
3
5
  require_relative "sqed/parser"
4
6
  require_relative "sqed/parser/ocr_parser"
5
7
  require_relative "sqed/parser/barcode_parser"
@@ -51,7 +53,6 @@ module SqedConfig
51
53
 
52
54
  # Hash values are used to stub out
53
55
  # the Sqed::Boundaries instance.
54
- #
55
56
  LAYOUTS = {
56
57
  cross: [0,1,2,3],
57
58
  offset_cross: [0,1,2,3],
@@ -62,7 +63,7 @@ module SqedConfig
62
63
  internal_box: [0]
63
64
  }
64
65
 
65
- # Each element of the layout is a "section".
66
+ # Each element of the layout is a "section".
66
67
  LAYOUT_SECTION_TYPES = [
67
68
  :stage, # the image contains the full stage
68
69
  :specimen, # the specimen only, no metadata should be present
@@ -70,26 +71,31 @@ module SqedConfig
70
71
  :determination_labels, # the section contains text that determines the specimen
71
72
  :labels, # the section contains collecting event and non-determination labels
72
73
  :identifier, # the section contains an identifier (e.g. barcode or unique number)
73
- :image_registration # the section contains only image registration information
74
+ :image_registration, # the section contains only image registration information,
75
+ :curator_metadata, # the section contains text with curator metadata
76
+ :nothing # section is empty
74
77
  ]
75
78
 
76
79
  # Links section types to data parsers
77
80
  SECTION_PARSERS = {
78
- labels: Sqed::Parser::OcrParser,
79
- identifier: Sqed::Parser::BarcodeParser,
80
- deterimination_labels: Sqed::Parser::OcrParser
81
+ labels: [ Sqed::Parser::OcrParser ],
82
+ identifier: [ Sqed::Parser::BarcodeParser, Sqed::Parser::OcrParser ],
83
+ deterimination_labels: [ Sqed::Parser::OcrParser ],
84
+ curator_metadata: [ Sqed::Parser::OcrParser ],
85
+ specimen: [ Sqed::Parser::OcrParser ],
81
86
  }
82
87
 
83
88
  EXTRACTION_PATTERNS = {
84
89
  right_t: {
85
90
  boundary_finder: Sqed::BoundaryFinder::ColorLineFinder,
86
91
  layout: :right_t,
87
- metadata_map: {0 => :annotated_specimen, 1 => :identifiers, 2 =>:image_registration }
92
+ metadata_map: {0 => :annotated_specimen, 1 => :identifier, 2 =>:image_registration }
88
93
  },
89
94
  offset_cross: {
90
95
  boundary_finder: Sqed::BoundaryFinder::ColorLineFinder,
91
96
  layout: :offset_cross,
92
- metadata_map: {0 => :annotated_specimen, 1 => :identifiers, 2 =>:image_registration }
97
+ metadata_map: {0 => :curator_metadata, 1 => :identifier, 2 => :image_registration, 3 => :specimen }
98
+ # metadata_map: {0 => :annotated_specimen, 1 => :identifier, 2 =>:image_registration }
93
99
  },
94
100
  standard_cross: {
95
101
  boundary_finder: Sqed::BoundaryFinder::CrossFinder,