sqed 0.0.4 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +2 -0
  3. data/README.md +2 -2
  4. data/lib/sqed/boundaries.rb +22 -7
  5. data/lib/sqed/boundary_finder/color_line_finder.rb +26 -19
  6. data/lib/sqed/boundary_finder.rb +2 -2
  7. data/lib/sqed/extractor.rb +33 -17
  8. data/lib/sqed/parser/barcode_parser.rb +32 -16
  9. data/lib/sqed/parser/ocr_parser.rb +24 -6
  10. data/lib/sqed/parser.rb +7 -0
  11. data/lib/sqed/result.rb +54 -6
  12. data/lib/sqed/version.rb +1 -1
  13. data/lib/sqed.rb +37 -12
  14. data/lib/sqed_config.rb +14 -8
  15. data/spec/lib/sqed/boundaries_spec.rb +26 -1
  16. data/spec/lib/sqed/boundary_finder_spec.rb +89 -1
  17. data/spec/lib/sqed/extractor_spec.rb +50 -76
  18. data/spec/lib/sqed/parser/barcode_spec.rb +25 -0
  19. data/spec/lib/sqed/parser/ocr_spec.rb +16 -0
  20. data/spec/lib/sqed/parser_spec.rb +6 -1
  21. data/spec/lib/sqed/result_spec.rb +24 -0
  22. data/spec/lib/sqed_spec.rb +58 -144
  23. data/spec/support/files/barcode_images/code_128_barcode.png +0 -0
  24. data/spec/support/files/barcode_images/datamatrix_barcode.png +0 -0
  25. data/spec/support/files/{2Dbarcode.png → barcode_images/osuc_datamatrix_barcode.png} +0 -0
  26. data/spec/support/files/label_images/basic1.png +0 -0
  27. data/spec/support/files/label_images/basic2.png +0 -0
  28. data/spec/support/files/label_images/readme.png +0 -0
  29. data/spec/support/files/{types_21.jpg → misc_images/types_21.jpg} +0 -0
  30. data/spec/support/files/{types_8.jpg → misc_images/types_8.jpg} +0 -0
  31. data/spec/support/files/{CrossyBlackLinesSpecimen.jpg → stage_images/CrossyBlackLinesSpecimen.jpg} +0 -0
  32. data/spec/support/files/{CrossyGreenLinesSpecimen.jpg → stage_images/CrossyGreenLinesSpecimen.jpg} +0 -0
  33. data/spec/support/files/{black_stage_green_line_specimen.jpg → stage_images/black_stage_green_line_specimen.jpg} +0 -0
  34. data/spec/support/files/{boundary_cross_green.jpg → stage_images/boundary_cross_green.jpg} +0 -0
  35. data/spec/support/files/{boundary_left_t_yellow.jpg → stage_images/boundary_left_t_yellow.jpg} +0 -0
  36. data/spec/support/files/{boundary_offset_cross_red.jpg → stage_images/boundary_offset_cross_red.jpg} +0 -0
  37. data/spec/support/files/{boundary_right_t_green.jpg → stage_images/boundary_right_t_green.jpg} +0 -0
  38. data/spec/support/files/stage_images/frost_stage.jpg +0 -0
  39. data/spec/support/files/{greenlineimage.jpg → stage_images/greenlineimage.jpg} +0 -0
  40. data/spec/support/files/test4.jpg +0 -0
  41. data/spec/support/image_helpers.rb +61 -33
  42. data/sqed.gemspec +3 -2
  43. metadata +58 -34
  44. data/spec/support/files/Quadrant_2_3.jpg +0 -0
  45. data/spec/support/files/test4OLD.jpg +0 -0
  46. data/spec/support/files/test_barcode.JPG +0 -0
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: bed58b448dbb4cd6681353d53ecf41a5d5bc86d3
4
- data.tar.gz: 2a59cd7ffd1033a230d37eca146f40c9ced0bb77
3
+ metadata.gz: 4600b58a8dca4c59c21e0fcaa08c63e7564a56f4
4
+ data.tar.gz: f1f96f8e93e988bd53ca354c4fe3a67f359b1368
5
5
  SHA512:
6
- metadata.gz: 0bf133d7ff4b70b1f064a3450b36f9db622c84d0a104e1ae965e40c23c5d8e8b1711c6724dc7af297a5bf864fecaa2d27a9c6f2ec8e40c812bdf5049b5ac8c45
7
- data.tar.gz: c7b7ad499e5a39daa8f71144fa363a8af69c5dec95842ee7de35bcf4d6e72fbebca88f4f84313cfb7c5f87e9467fe828ce79539e175057c07f4e77d6cd0875fb
6
+ metadata.gz: d7cb582b1d6db468617a132ce67a0dccab2315911eb650468ef6d0e756333cfcca710969d9d845dbe8c3149059499c486845072a053f5674412af268928e2f72
7
+ data.tar.gz: 9a24446ca6fb5caf9d7a9c436013e31cc6ceeb09e4c06f1968ee4cb5a2d544bd939d7b59e3bda023b2cfea38c4eb0cb1182e0971290738c44c8135a33d046b08
data/.gitignore CHANGED
@@ -24,4 +24,6 @@ local/
24
24
  foo*.jpg
25
25
  SessionID_BarcodeImage.JPG
26
26
 
27
+ tess*
28
+
27
29
  /*.jpg
data/README.md CHANGED
@@ -3,7 +3,7 @@
3
3
 
4
4
  # Sqed
5
5
 
6
- Stub for a gem that supports specimen digitization from images.
6
+ Sqed is a gem that faciliates metadata extraction from images of staged collection objects.
7
7
 
8
8
  ## Installation
9
9
 
@@ -21,7 +21,7 @@ Or install it yourself as:
21
21
 
22
22
  ## Usage
23
23
 
24
- TODO: Write usage instructions here
24
+ For the time being, see specs.
25
25
 
26
26
  ## Contributing
27
27
 
@@ -12,13 +12,15 @@ class Sqed::Boundaries
12
12
  # 0 => [10,10,40,40]
13
13
  attr_reader :coordinates
14
14
 
15
- # An Sqed::Config::EXTRACTION_PATTERN layout
15
+ # A symbol from Sqed::Config::LAYOUTS.keys
16
+ # :right_t
16
17
  attr_accessor :layout
17
18
 
18
- # Whether or not the last method to populate this object passed fully
19
+ # Boolean, whether or not the last method to populate this object passed fully
19
20
  attr_accessor :complete
20
21
 
21
22
  def initialize(layout = nil)
23
+ raise 'unrecognized layout' if layout && !SqedConfig::LAYOUTS.include?(layout)
22
24
  @complete = false
23
25
 
24
26
  @layout = layout
@@ -35,11 +37,12 @@ class Sqed::Boundaries
35
37
  def offset(boundary)
36
38
  b = Sqed::Boundaries.new() # the idea here is to create a deep copy of self, offsetting by boundary as we go
37
39
  (0..self.coordinates.length - 1).each do |i|
38
- b.coordinates[i] = [] # create the instance of the i-th coordinate, then populate it
39
- b.coordinates[i][0] = self.x_for(i) + boundary.x_for(0)
40
- b.coordinates[i][1] = self.y_for(i) + boundary.y_for(0)
41
- b.coordinates[i][2] = self.width_for(i)
42
- b.coordinates[i][3] = self.height_for(i)
40
+ b.set(i,
41
+ [(self.x_for(i) + boundary.x_for(0)),
42
+ (self.y_for(i) + boundary.y_for(0)),
43
+ self.width_for(i),
44
+ self.height_for(i)]
45
+ )
43
46
  end
44
47
  b.complete = self.complete
45
48
  b
@@ -76,4 +79,16 @@ class Sqed::Boundaries
76
79
  @coordinates[index][3]
77
80
  end
78
81
 
82
+ def set(index, coordinates)
83
+ @coordinates[index] = coordinates
84
+ end
85
+
86
+ def populated?
87
+ @coordinates.each do |c|
88
+ return false if c[0].nil?
89
+ end
90
+ true
91
+ end
92
+
93
+
79
94
  end
@@ -19,15 +19,15 @@ class Sqed::BoundaryFinder::ColorLineFinder < Sqed::BoundaryFinder
19
19
  when :vertical_split # can vertical and horizontal split be re-used to do cross cases?
20
20
  t = Sqed::BoundaryFinder.color_boundary_finder(image: img, boundary_color: @boundary_color) #detect vertical division, green line
21
21
  return if t.nil?
22
- boundaries.coordinates[0] = [0, 0, t[0], img.rows] # left section of image
23
- boundaries.coordinates[1] = [t[2], 0, img.columns - t[2], img.rows] # right section of image
22
+ boundaries.set(0, [0, 0, t[0], img.rows]) # left section of image
23
+ boundaries.set(1, [t[2], 0, img.columns - t[2], img.rows]) # right section of image
24
24
  boundaries.complete = true
25
25
 
26
26
  when :horizontal_split
27
27
  t = Sqed::BoundaryFinder.color_boundary_finder(image: img, scan: :columns, boundary_color: @boundary_color) # set to detect horizontal division, (green line)
28
28
  return if t.nil?
29
- boundaries.coordinates[0] = [0, 0, img.columns, t[0]] # upper section of image
30
- boundaries.coordinates[1] = [0, t[2], img.columns, img.rows - t[2]] # lower section of image
29
+ boundaries.set(0, [0, 0, img.columns, t[0]]) # upper section of image
30
+ boundaries.set(1, [0, t[2], img.columns, img.rows - t[2]]) # lower section of image
31
31
  boundaries.complete = true
32
32
  # boundaries.coordinates[2] = [0, 0, img.columns, t[1]] # upper section of image
33
33
  # boundaries.coordinates[3] = [0, t[1], img.columns, img.rows - t[1]] # lower section of image
@@ -35,45 +35,52 @@ class Sqed::BoundaryFinder::ColorLineFinder < Sqed::BoundaryFinder
35
35
  when :right_t # only 3 zones expected, with horizontal division in right-side of vertical division
36
36
  t = Sqed::BoundaryFinder.color_boundary_finder(image: img, boundary_color: @boundary_color) #defaults to detect vertical division, green line
37
37
  return if t.nil?
38
- boundaries.coordinates[0] = [0, 0, t[0], img.rows] # left section of image
39
- boundaries.coordinates[1] = [t[2], 0, img.columns - t[2], img.rows] # left section of image
38
+
39
+ left = [0, 0, t[0], img.rows]
40
+ right = [t[2], 0, img.columns - t[2], img.rows]
41
+
42
+ boundaries.set(0, left) # left section of image
40
43
 
41
44
  # now subdivide right side
42
- irt = img.crop(*boundaries.coordinates[1], true)
45
+ irt = img.crop(*right, true)
43
46
  rt = Sqed::BoundaryFinder.color_boundary_finder(image: irt, scan: :columns, boundary_color: @boundary_color) # set to detect horizontal division, (green line)
44
47
  return if rt.nil?
45
- boundaries.coordinates[1] = [t[2], 0, img.columns - t[2], rt[0]] # upper section of image
46
- boundaries.coordinates[2] = [t[2], rt[2], img.columns - t[2], img.rows - rt[2]] # lower section of image
48
+ boundaries.set(1, [t[2], 0, img.columns - t[2], rt[0]]) # upper section of image
49
+ boundaries.set(2, [t[2], rt[2], img.columns - t[2], img.rows - rt[2]]) # lower section of image
47
50
  boundaries.complete = true
48
51
  # will return 1, 2, or 3
49
52
 
50
53
  when :offset_cross # 4 zones expected, with horizontal division in right- and left- sides of vertical division
51
54
  t = Sqed::BoundaryFinder.color_boundary_finder(image: img, boundary_color: @boundary_color) # defaults to detect vertical division, green line
52
55
  raise if t.nil?
53
- boundaries.coordinates[0] = [0, 0, t[0], img.rows] # left section of image
54
- boundaries.coordinates[1] = [t[2], 0, img.columns - t[2], img.rows] # right section of image
56
+
57
+ left = [0, 0, t[0], img.rows] # left section of image
58
+ right = [t[2], 0, img.columns - t[2], img.rows] # right section of image
55
59
 
56
60
  # now subdivide left side
57
- ilt = img.crop(*boundaries.coordinates[0], true)
61
+ ilt = img.crop(*left, true)
58
62
 
59
63
  lt = Sqed::BoundaryFinder.color_boundary_finder(image: ilt, scan: :columns, boundary_color: @boundary_color) # set to detect horizontal division, (green line)
64
+
60
65
  if !lt.nil?
61
- boundaries.coordinates[0] = [0, 0, t[0], lt[0]] # upper section of image
62
- boundaries.coordinates[3] = [0, lt[2], t[0], img.rows - lt[2]] # lower section of image
66
+ boundaries.set(0, [0, 0, left[2], lt[0]]) # upper section of image
67
+ boundaries.set(3, [0, lt[2], left[2], img.rows - lt[2]]) # lower section of image
63
68
  end
64
69
 
65
70
  # now subdivide right side
66
- irt = img.crop(*boundaries.coordinates[1], true)
71
+ irt = img.crop(*right, true)
67
72
  rt = Sqed::BoundaryFinder.color_boundary_finder(image: irt, scan: :columns, boundary_color: @boundary_color) # set to detect horizontal division, (green line)
68
73
  return if rt.nil?
69
74
 
70
- boundaries.coordinates[1] = [t[2], 0, img.columns - t[2], rt[0]] # upper section of image
71
- boundaries.coordinates[2] = [t[2], rt[2], img.columns - t[2], img.rows - rt[2]] # lower section of image
75
+ boundaries.set(1, [t[2], 0, img.columns - t[2], rt[0]]) # upper section of image
76
+ boundaries.set(2, [t[2], rt[2], img.columns - t[2], img.rows - rt[2]]) # lower section of image
72
77
  # will return 1, 2, 3, or 4 //// does not handle staggered vertical boundary case
73
- boundaries.complete = true
78
+
79
+ boundaries.complete = true if boundaries.populated?
74
80
 
75
81
  else
76
- boundaries.coordinates[0] = [0, 0, img.columns, img.rows] # totality of image as default
82
+ boundaries.set(0, [0, 0, img.columns, img.rows]) # totality of image as default
83
+ # TODO: boundaries.complete status here?
77
84
  return # return original image boundary if no method implemented
78
85
  end
79
86
 
@@ -52,9 +52,9 @@ class Sqed::BoundaryFinder
52
52
  (0..samples_to_take).each do |s|
53
53
  # Create a sample image a single pixel tall
54
54
  if scan == :rows
55
- j = image.crop(0, s * sample_subdivision_size, image.columns, 1)
55
+ j = image.crop(0, s * sample_subdivision_size, image.columns, 1, true)
56
56
  elsif scan == :columns
57
- j = image.crop(s * sample_subdivision_size, 0, 1, image.rows)
57
+ j = image.crop(s * sample_subdivision_size, 0, 1, image.rows, true)
58
58
  else
59
59
  raise
60
60
  end
@@ -1,45 +1,61 @@
1
1
  require 'RMagick'
2
2
 
3
- # An Extractor takes Boundries object and a layout pattern and returns a Sqed::Result
3
+ # An Extractor takes Boundries object and a metadata_map pattern and returns a Sqed::Result
4
4
  #
5
5
  class Sqed::Extractor
6
6
 
7
- attr_accessor :boundaries, :layout, :image
7
+ # a Sqed::Boundaries instance
8
+ attr_accessor :boundaries
8
9
 
9
- def initialize(boundaries: boundaries, layout: layout, image: image)
10
- raise if boundaries.nil? || !boundaries.class == Sqed::Boundaries
11
- raise if layout.nil? || !layout.class == Hash
10
+ # a metadata_map hash from EXTRACTION_PATTERNS like:
11
+ # {0 => :annotated_specimen, 1 => :identifier, 2 =>:image_registration }
12
+ attr_accessor :metadata_map
12
13
 
13
- @layout = layout
14
+ # a Magick::Image file
15
+ attr_accessor :image
16
+
17
+ def initialize(boundaries: boundaries, metadata_map: metadata_map, image: image)
18
+ raise 'boundaries not provided or provided boundary is not a Sqed::Boundaries' if boundaries.nil? || !boundaries.class == Sqed::Boundaries
19
+ raise 'metadata_map not provided or metadata_map not a Hash' if metadata_map.nil? || !metadata_map.class == Hash
20
+ raise 'image not provided' if image.nil? || !image.class == Magick::Image
21
+
22
+ @metadata_map = metadata_map
14
23
  @boundaries = boundaries
15
24
  @image = image
16
25
  end
17
26
 
18
27
  def result
19
28
  r = Sqed::Result.new()
20
-
29
+
21
30
  # assign the images to the result
22
- boundaries.each do |section, coords|
23
- r.send("#{LAYOUT_SECTION_TYPES[section]}=", extract_image(coords))
31
+ boundaries.each do |section_index, coords|
32
+ image_setter = "#{metadata_map[section_index]}_image="
33
+ r.send(image_setter, extract_image(coords))
24
34
  end
25
35
 
26
36
  # assign the metadata to the result
27
- layout.keys.each do |section_index, section_type|
37
+ metadata_map.each do |section_index, section_type|
28
38
  # only extract data if a parser exists
29
- if parser = SECTION_PARSERS[section_type]
30
- r.send("#{section_type}=", parser.new(image: r.send(section_type + "_image").text) )
39
+ if parsers = SqedConfig::SECTION_PARSERS[section_type]
40
+
41
+ section_image = r.send("#{section_type}_image")
42
+ updated = r.send(section_type)
43
+
44
+ parsers.each do |p|
45
+ parsed_result = p.new(section_image).text
46
+ updated.merge!(p::TYPE => parsed_result) if parsed_result
47
+ end
48
+
49
+ r.send("#{section_type}=", updated)
31
50
  end
32
51
  end
33
52
 
34
53
  r
35
54
  end
36
55
 
37
- # coords are x1, y1, x2, y2
56
+ # crop takes x, y, width, height
38
57
  def extract_image(coords)
39
- # crop takes x, y, width, height
40
- # @image.crop(coords[0], coords[1], coords[2] - coords[0], coords[3] - coords[1] )
41
- bp = 0
42
- @image.crop(coords[0], coords[1], coords[2], coords[3], true)
58
+ i = @image.crop(*coords, true)
43
59
  end
44
60
 
45
61
  end
@@ -1,27 +1,43 @@
1
1
  # Given an image, return an ordered array of detectable barcodes
2
2
 
3
+
4
+
3
5
  class Sqed::Parser::BarcodeParser < Sqed::Parser
4
- attr_accessor :barcodes
6
+
7
+ TYPE = :barcode
8
+
9
+ attr_accessor :image
10
+
11
+ attr_accessor :barcode
5
12
 
6
13
  def initialize(image)
7
14
  super
8
- @barcodes = bar_codes
15
+ @image = image
9
16
  end
10
17
 
11
- def bar_codes
12
- # process the images, spit out the barcodes
13
- # return ZXing.decode_all(@image) #['ABC 123', 'DEF 456']
14
- # a = `/usr/local/Cellar/zbar/0.10_1/bin/zbarimg ~/src/sqed/spec/support/files/test_barcode.JPG`
15
- # b = a.split("\n")
16
- f = 'SessionID_BarcodeImage.JPG'
17
- i = @image[:image]
18
- if i.nil?
19
- i = @image
20
- end
21
- i.write("tmp/#{f}")
22
- c = `/usr/local/Cellar/zbar/0.10_1/bin/zbarimg #{f}`
23
- d = c.split("\n")
24
- return d
18
+ def barcode
19
+ @barcode ||= get_barcode
20
+ @barcode
21
+ end
22
+
23
+ # Uses the same enging as zbarimg that you can install with brew (zbarimg)
24
+ #
25
+ def get_code_128
26
+ ZXing.decode @image.filename
27
+ end
28
+
29
+ # try a bunch of options, organized by most common, give the first hit
30
+ def get_barcode
31
+ [get_code_128].compact.first
32
+ end
33
+
34
+ #def get_datamatrix
35
+ # https://github.com/srijan/ruby-dmtx
36
+ #end
37
+
38
+ # alias to a universal method
39
+ def text
40
+ barcode
25
41
  end
26
42
 
27
43
  end
@@ -7,8 +7,13 @@
7
7
  require 'rtesseract'
8
8
 
9
9
  class Sqed::Parser::OcrParser < Sqed::Parser
10
- attr_accessor :text
11
10
 
11
+ TYPE = :text
12
+
13
+ # the text extracted from the image
14
+ attr_accessor :text
15
+
16
+ # https://code.google.com/p/tesseract-ocr/wiki/FAQ
12
17
  def text
13
18
  img = @image #.white_threshold(245)
14
19
 
@@ -19,9 +24,6 @@ class Sqed::Parser::OcrParser < Sqed::Parser
19
24
  # img = img.scale(2)
20
25
  # img.write('foo0.jpg.jpg')
21
26
  # img = img.enhance
22
- # img = img.enhance
23
- # img = img.enhance
24
- # img = img.enhance
25
27
  # img.write('foo1.jpg')
26
28
  # img = img.quantize(8, Magick::GRAYColorspace)
27
29
  # img.write('foo1.jpg')
@@ -39,12 +41,28 @@ class Sqed::Parser::OcrParser < Sqed::Parser
39
41
  #
40
42
  # img.write('foo.jpg') # for debugging purposes, this is the image that is sent to OCR
41
43
 
42
- r = RTesseract.new(img, lang: 'eng', psm: 3)
43
44
 
45
+ # From https://code.google.com/p/tesseract-ocr/wiki/FAQ
46
+ # " There is a minimum text size for reasonable accuracy. You have to consider resolution as well as point size. Accuracy drops off below 10pt x 300dpi, rapidly below 8pt x 300dpi. A quick check is to count the pixels of the x-height of your characters. (X-height is the height of the lower case x.) At 10pt x 300dpi x-heights are typically about 20 pixels, although this can vary dramatically from font to font. Below an x-height of 10 pixels, you have very little chance of accurate results, and below about 8 pixels, most of the text will be "noise removed".
47
+
48
+
49
+ # http://www.sk-spell.sk.cx/tesseract-ocr-parameters-in-302-version
50
+ # doesn't supprot outputbase
51
+ r = RTesseract.new(img, lang: 'eng', psm: 1,
52
+ load_system_dawg: 0,
53
+ tessedit_debug_quality_metrics: 1,
54
+ load_freq_dawg: 1 ,
55
+ chop_enable: 1,
56
+ tessedit_write_images: 1,
57
+ equationdetect_save_merged_image: 1,
58
+ tessedit_dump_pageseg_images: 1,
59
+ equationdetect_save_bi_image: 1,
60
+ load_unambig_dawg: 0,
61
+ tessedit_write_params_to_file: 'tmp/ocr_config_file.txt' ) # psm: 3,
44
62
 
45
63
  # img = img.white_threshold(245)
46
64
 
47
- @text = r.to_s
65
+ @text = r.to_s.strip
48
66
  end
49
67
 
50
68
  # Need to provide tuning methods here, i.e. image transormations that facilitate OCR
data/lib/sqed/parser.rb CHANGED
@@ -7,5 +7,12 @@ class Sqed::Parser
7
7
 
8
8
  def initialize(image)
9
9
  @image = image
10
+ raise 'no image provided to parser' if @image && !(@image.class.name == 'Magick::Image')
10
11
  end
12
+
13
+ # must be provided in subclasses
14
+ def text
15
+ nil
16
+ end
17
+
11
18
  end
data/lib/sqed/result.rb CHANGED
@@ -1,15 +1,63 @@
1
-
2
- # A Sqed::Result is a wrapper for the results of the
3
- # full process of data extraction from an image.
4
- #
5
- #
1
+ # A Sqed::Result is a container for the results of the
2
+ # the data extraction for the full stage
6
3
  #
7
4
  class Sqed::Result
8
5
 
9
6
  SqedConfig::LAYOUT_SECTION_TYPES.each do |k|
10
- attr_accessor k
11
7
  attr_accessor "#{k}_image".to_sym
8
+ attr_accessor k
9
+ end
10
+
11
+ def initialize
12
+ SqedConfig::LAYOUT_SECTION_TYPES.each do |k|
13
+ send("#{k}=", {})
14
+ end
15
+ end
16
+
17
+ # return [String, nil]
18
+ # the text derived from the OCR parsing of the section
19
+ def text_for(section)
20
+ send(section)[:text]
12
21
  end
22
+
23
+ # return [String, nil]
24
+ # the text derived from the barcode parsing of the section
25
+ def barcode_text_for(section)
26
+ send(section)[:barcode]
27
+ end
28
+
29
+ # return [Hash]
30
+ # a map of layout_section_type => value (if there is a value),
31
+ # i.e. all possible parsed text values returned from the parser
32
+ def text
33
+ result = {}
34
+ SqedConfig::LAYOUT_SECTION_TYPES.each do |k|
35
+ v = self.send(k)
36
+ result.merge!(k => v) if v[:barcode] || v[:text]
37
+ end
38
+ result
39
+ end
40
+
41
+ # return [Hash]
42
+ # a map of layout_section_type => Image
43
+ def images
44
+ result = {}
45
+ SqedConfig::LAYOUT_SECTION_TYPES.each do |k|
46
+ image = self.send("#{k}_image")
47
+ result.merge!(k => image) if image
48
+ end
49
+ result
50
+ end
51
+
52
+ # return [True]
53
+ # write the images in #images to tmp/
54
+ def write_images
55
+ images.each do |k, img|
56
+ img.write("tmp/#{k}.jpg")
57
+ end
58
+ true
59
+ end
60
+
13
61
  end
14
62
 
15
63
 
data/lib/sqed/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  class Sqed
2
- VERSION = "0.0.4"
2
+ VERSION = "0.1.0"
3
3
  end
data/lib/sqed.rb CHANGED
@@ -4,7 +4,7 @@ recent_ruby = RUBY_VERSION >= '2.1.1'
4
4
  raise "IMPORTANT: sqed gem requires ruby >= 2.1.1" unless recent_ruby
5
5
 
6
6
  require "RMagick"
7
- # include Magick
7
+ include Magick
8
8
 
9
9
  # Instants take the following
10
10
  # 1) A base image @image
@@ -28,19 +28,27 @@ class Sqed
28
28
  attr_accessor :pattern
29
29
 
30
30
  # the image that is the cropped content for parsing
31
- attr_accessor :stage_image, :stage_boundary, :boundaries, :auto_detect_border, :boundary_color
31
+ attr_accessor :stage_image
32
+
33
+ # a Sqed::Boundaries instance that stores the coordinates of the stage
34
+ attr_accessor :stage_boundary
35
+
36
+ # a Sqed::Boundaries instances that contains the coordinates of the interan stage sections
37
+ attr_accessor :boundaries
38
+
39
+ # Boolean, whether to detect the border on initialization, i.e. new()
40
+ attr_accessor :auto_detect_border
41
+
42
+ # a symbol, :red, :green, :blue, describing the boundary color within the stage
43
+ attr_accessor :boundary_color
32
44
 
33
45
  def initialize(image: image, pattern: pattern, auto_detect_border: true, boundary_color: :green)
34
46
  @image = image
35
-
36
47
  @boundaries = nil
37
- @stage_boundary = Sqed::Boundaries.new(:internal_box) # a.k.a. stage
38
-
48
+ @stage_boundary = Sqed::Boundaries.new(:internal_box)
39
49
  @auto_detect_border = auto_detect_border
40
-
41
50
  @pattern = pattern
42
51
  @pattern ||= :standard_cross
43
-
44
52
  @boundary_color = boundary_color
45
53
 
46
54
  set_stage_boundary if @auto_detect_border && @image
@@ -60,6 +68,8 @@ class Sqed
60
68
  @boundaries
61
69
  end
62
70
 
71
+ # Return [Sqed::Boundaries instance]
72
+ # a boundaries instance that has the original image (prior to cropping stage) coordinates
63
73
  def native_boundaries
64
74
  # check for @boundaries.complete first? OR handle partial detections ?!
65
75
  if @boundaries.complete
@@ -69,14 +79,18 @@ class Sqed
69
79
  end
70
80
  end
71
81
 
82
+ # return [Image]
83
+ # crops the image if not already done
72
84
  def stage_image
73
85
  crop_image if @stage_boundary.complete && @stage_image.nil?
74
86
  @stage_image
75
87
  end
76
88
 
89
+ # return [Image]
90
+ # crops the stage if not done, then sets/returns @stage_image
77
91
  def crop_image
78
92
  if @stage_boundary.complete
79
- @stage_image = @image.crop(*@stage_boundary.for(SqedConfig.index_for_section_type(:stage, :stage)))
93
+ @stage_image = @image.crop(*@stage_boundary.for(SqedConfig.index_for_section_type(:stage, :stage)), true)
80
94
  else
81
95
  @stage_image = @image
82
96
  end
@@ -84,14 +98,24 @@ class Sqed
84
98
 
85
99
  def result
86
100
  return false if @image.nil? || @pattern.nil?
87
- crop_image
88
101
  extractor = Sqed::Extractor.new(
89
- boundaries: @boundaries,
90
- layout: SqedConfig::EXTRACTION_PATTERNS[@pattern][:layout],
91
- image: @stage_image)
102
+ boundaries: boundaries,
103
+ metadata_map: SqedConfig::EXTRACTION_PATTERNS[@pattern][:metadata_map],
104
+ image: stage_image)
92
105
  extractor.result
93
106
  end
94
107
 
108
+ def attributes
109
+ {
110
+ image: @image,
111
+ boundaries: @boundaries,
112
+ stage_boundary: @stage_boundary,
113
+ auto_detect_border: @auto_detect_border,
114
+ pattern: @pattern,
115
+ boundary_color: @boundary_color
116
+ }
117
+ end
118
+
95
119
  protected
96
120
 
97
121
  def set_stage_boundary
@@ -103,6 +127,7 @@ class Sqed
103
127
 
104
128
  def get_section_boundaries
105
129
  boundary_finder_class = SqedConfig::EXTRACTION_PATTERNS[@pattern][:boundary_finder]
130
+
106
131
  options = {image: stage_image}
107
132
  options.merge!( layout: SqedConfig::EXTRACTION_PATTERNS[@pattern][:layout] ) unless boundary_finder_class.name == 'Sqed::BoundaryFinder::CrossFinder'
108
133
  options.merge!( boundary_color: @boundary_color) if boundary_finder_class.name == 'Sqed::BoundaryFinder::ColorLineFinder'
data/lib/sqed_config.rb CHANGED
@@ -1,5 +1,7 @@
1
1
  # encoding: UTF-8
2
2
 
3
+ require 'zxing'
4
+
3
5
  require_relative "sqed/parser"
4
6
  require_relative "sqed/parser/ocr_parser"
5
7
  require_relative "sqed/parser/barcode_parser"
@@ -51,7 +53,6 @@ module SqedConfig
51
53
 
52
54
  # Hash values are used to stub out
53
55
  # the Sqed::Boundaries instance.
54
- #
55
56
  LAYOUTS = {
56
57
  cross: [0,1,2,3],
57
58
  offset_cross: [0,1,2,3],
@@ -62,7 +63,7 @@ module SqedConfig
62
63
  internal_box: [0]
63
64
  }
64
65
 
65
- # Each element of the layout is a "section".
66
+ # Each element of the layout is a "section".
66
67
  LAYOUT_SECTION_TYPES = [
67
68
  :stage, # the image contains the full stage
68
69
  :specimen, # the specimen only, no metadata should be present
@@ -70,26 +71,31 @@ module SqedConfig
70
71
  :determination_labels, # the section contains text that determines the specimen
71
72
  :labels, # the section contains collecting event and non-determination labels
72
73
  :identifier, # the section contains an identifier (e.g. barcode or unique number)
73
- :image_registration # the section contains only image registration information
74
+ :image_registration, # the section contains only image registration information,
75
+ :curator_metadata, # the section contains text with curator metadata
76
+ :nothing # section is empty
74
77
  ]
75
78
 
76
79
  # Links section types to data parsers
77
80
  SECTION_PARSERS = {
78
- labels: Sqed::Parser::OcrParser,
79
- identifier: Sqed::Parser::BarcodeParser,
80
- deterimination_labels: Sqed::Parser::OcrParser
81
+ labels: [ Sqed::Parser::OcrParser ],
82
+ identifier: [ Sqed::Parser::BarcodeParser, Sqed::Parser::OcrParser ],
83
+ deterimination_labels: [ Sqed::Parser::OcrParser ],
84
+ curator_metadata: [ Sqed::Parser::OcrParser ],
85
+ specimen: [ Sqed::Parser::OcrParser ],
81
86
  }
82
87
 
83
88
  EXTRACTION_PATTERNS = {
84
89
  right_t: {
85
90
  boundary_finder: Sqed::BoundaryFinder::ColorLineFinder,
86
91
  layout: :right_t,
87
- metadata_map: {0 => :annotated_specimen, 1 => :identifiers, 2 =>:image_registration }
92
+ metadata_map: {0 => :annotated_specimen, 1 => :identifier, 2 =>:image_registration }
88
93
  },
89
94
  offset_cross: {
90
95
  boundary_finder: Sqed::BoundaryFinder::ColorLineFinder,
91
96
  layout: :offset_cross,
92
- metadata_map: {0 => :annotated_specimen, 1 => :identifiers, 2 =>:image_registration }
97
+ metadata_map: {0 => :curator_metadata, 1 => :identifier, 2 => :image_registration, 3 => :specimen }
98
+ # metadata_map: {0 => :annotated_specimen, 1 => :identifier, 2 =>:image_registration }
93
99
  },
94
100
  standard_cross: {
95
101
  boundary_finder: Sqed::BoundaryFinder::CrossFinder,