RubyGems - sqed - Versions diffs - 0.3.2 → 0.4.0 - Mend

sqed 0.3.2 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

checksums.yaml +4 -4
data/.ruby-version +1 -1
data/Guardfile +66 -0
data/lib/sqed.rb +120 -68
data/lib/sqed/boundaries.rb +30 -25
data/lib/sqed/boundary_finder.rb +221 -212
data/lib/sqed/boundary_finder/color_line_finder.rb +50 -42
data/lib/sqed/boundary_finder/cross_finder.rb +3 -3
data/lib/sqed/boundary_finder/stage_finder.rb +8 -3
data/lib/sqed/extractor.rb +23 -25
data/lib/sqed/parser.rb +4 -7
data/lib/sqed/parser/barcode_parser.rb +5 -5
data/lib/sqed/parser/ocr_parser.rb +46 -46
data/lib/sqed/result.rb +60 -57
data/lib/sqed/version.rb +1 -1
data/lib/sqed_config.rb +52 -56
data/spec/lib/sqed/boundaries_spec.rb +1 -1
data/spec/lib/sqed/boundary_finder/color_line_finder_spec.rb +24 -24
data/spec/lib/sqed/boundary_finder/cross_finder_spec.rb +1 -1
data/spec/lib/sqed/boundary_finder/stage_finder_spec.rb +1 -1
data/spec/lib/sqed/boundary_finder_spec.rb +73 -45
data/spec/lib/sqed/extractor_spec.rb +4 -4
data/spec/lib/sqed/parser/ocr_spec.rb +2 -2
data/spec/lib/sqed_spec.rb +39 -39
data/spec/lib/stage_handling/seven_slot_spec.rb +45 -9
data/spec/support/files/stage_images/inhs_7_slot2.jpg +0 -0
data/spec/support/image_helpers.rb +10 -9
metadata +6 -3

data/lib/sqed/boundary_finder/color_line_finder.rb CHANGED

@@ -4,15 +4,24 @@ require 'rmagick'
 #
 class Sqed::BoundaryFinder::ColorLineFinder < Sqed::BoundaryFinder
-  def initialize(target_image: image, target_layout: layout, boundary_color: :green, use_thumbnail: true)
-    super(target_image: target_image, target_layout: target_layout, use_thumbnail: use_thumbnail)
+  attr_accessor :boundary_color
+  def initialize(**opts)
+    # image: image, layout: layout, boundary_color: :green, use_thumbnail: true)
+    image = opts[:image]
+    layout = opts[:layout]
+    use_thumbnail = opts[:use_thumbnail]
+    @boundary_color = opts[:boundary_color] || :green
+    super(image: image, layout: layout, use_thumbnail: use_thumbnail)
     raise 'No layout provided.' if @layout.nil?
     @boundary_color = boundary_color
-   if use_thumbnail
-     @original_image = @image.copy
-     @image = thumbnail
-   end
+    if use_thumbnail
+      @original_image = @image.copy
+      @image = thumbnail
+    end
     find_bands
   end
@@ -21,77 +30,78 @@ class Sqed::BoundaryFinder::ColorLineFinder < Sqed::BoundaryFinder
   def find_bands
     case layout    # boundaries.coordinates are referenced from stage image
-    # No specs for this yet
+      # No specs for this yet
     when :seven_slot
-      top_bottom_split = Sqed::BoundaryFinder.color_boundary_finder(target_image: image, scan: :columns, boundary_color: @boundary_color)              # detect vertical division [array]
-      left_right_split = Sqed::BoundaryFinder.color_boundary_finder(target_image: image, sample_subdivision_size: 2, boundary_color: @boundary_color)  # detect horizontal division [array]
+      top_bottom_split = Sqed::BoundaryFinder.color_boundary_finder(image: image, scan: :columns, boundary_color: boundary_color)              # detect vertical division [array]
+      left_right_split = Sqed::BoundaryFinder.color_boundary_finder(image: image, sample_subdivision_size: 2, boundary_color: boundary_color)  # detect horizontal division [array]
-      boundaries.set(0, [0, 0, left_right_split[0], top_bottom_split[0] ])
-      boundaries.set(6, [0, top_bottom_split[2], left_right_split[0], image.rows - top_bottom_split[2] ] )
+      boundaries.set(0, [0, 0, left_right_split[0], top_bottom_split[0]])
+      boundaries.set(6, [0, top_bottom_split[2], left_right_split[0], image.rows - top_bottom_split[2]] )
       right_top_image = image.crop( left_right_split[2], 0, image.columns - left_right_split[2], top_bottom_split[0] , true) # sections 1,2
       right_bottom_image = image.crop(left_right_split[2], top_bottom_split[2], image.columns - left_right_split[2], image.rows - top_bottom_split[2], true)  # sections 3,4,5
-      right_top_split = corrected_frequency(Sqed::BoundaryFinder.color_boundary_finder(target_image: right_top_image, boundary_color: @boundary_color)) # vertical line b/w 1 & 2, use "corrected_frequency" to account for color bleed from previous crop
+      right_top_split = corrected_frequency(Sqed::BoundaryFinder.color_boundary_finder(image: right_top_image, boundary_color: boundary_color)) # vertical line b/w 1 & 2, use "corrected_frequency" to account for color bleed from previous crop
       boundaries.set(1, [left_right_split[2], 0, right_top_split[0], top_bottom_split[0] ])
-      boundaries.set(2, [left_right_split[2] + right_top_split[2], 0, right_top_image.columns - right_top_split[2], top_bottom_split[0] ]   )
+      boundaries.set(2, [left_right_split[2] + right_top_split[2], 0, right_top_image.columns - right_top_split[2], top_bottom_split[0]])
+      right_bottom_split = corrected_frequency(Sqed::BoundaryFinder.color_boundary_finder(image: right_bottom_image, scan: :columns, sample_subdivision_size: 2, boundary_color: boundary_color)) # horizontal line b/w (5,3) & 4, use "corrected_frequency" to account for color bleed from previous crop
+      bottom_right_top_image = right_bottom_image.crop(0,0, image.columns - left_right_split[2], right_bottom_split[0], true) # 3,5
-      right_bottom_split = corrected_frequency(Sqed::BoundaryFinder.color_boundary_finder(target_image: right_bottom_image, scan: :columns, sample_subdivision_size: 2, boundary_color: @boundary_color)) # horizontal line b/w (5,3) & 4, use "corrected_frequency" to account for color bleed from previous crop
-      bottom_right_top_image = right_bottom_image.crop(0,0, image.columns - left_right_split[2], right_bottom_split[2], true) # 3,5
+      boundaries.set(3, [ left_right_split[2] + right_top_split[2], top_bottom_split[2], left_right_split[2] + right_top_split[2], bottom_right_top_image.rows ])
+      boundaries.set(5, [ left_right_split[2], top_bottom_split[2], right_top_split[0], bottom_right_top_image.rows])
-      boundaries.set(3, [ left_right_split[2] + right_top_split[2], top_bottom_split[2], left_right_split[2] + right_top_split[2], bottom_right_top_image.rows ] )
-      boundaries.set(5, [ left_right_split[2], top_bottom_split[2], right_top_split[0], bottom_right_top_image.rows ] )
-      boundaries.set(4, [ left_right_split[2], top_bottom_split[2] + right_top_split[2], image.columns - left_right_split[2],  right_bottom_image.rows - right_top_split[2] ] )
+      # ! not high enough
+      boundaries.set(4, [left_right_split[2], top_bottom_split[2] + right_bottom_split[2], image.columns - left_right_split[2], right_bottom_image.rows ])
     when :vertical_split
-      t = Sqed::BoundaryFinder.color_boundary_finder(target_image: image, boundary_color: @boundary_color)  #detect vertical division
+      t = Sqed::BoundaryFinder.color_boundary_finder(image: image, boundary_color: boundary_color)  #detect vertical division
       return if t.nil?
       boundaries.set(0, [0, 0, t[0], image.rows])  # left section of image
       boundaries.set(1, [t[2], 0, image.columns - t[2], image.rows])  # right section of image
     when :horizontal_split
-      t = Sqed::BoundaryFinder.color_boundary_finder(target_image: image, scan: :columns, boundary_color: @boundary_color)  # set to detect horizontal division
+      t = Sqed::BoundaryFinder.color_boundary_finder(image: image, scan: :columns, boundary_color: boundary_color)  # set to detect horizontal division
       return if t.nil?
       boundaries.set(0, [0, 0, image.columns, t[0]])  # upper section of image
       boundaries.set(1, [0, t[2], image.columns, image.rows - t[2]])  # lower section of image
     when :right_t # only 3 zones expected, with horizontal division in right-side of vertical division
-      vertical = self.class.new(target_image: @image, target_layout: :vertical_split, boundary_color: @boundary_color, use_thumbnail: false ).boundaries
+      vertical = self.class.new(image: @image, layout: :vertical_split, boundary_color: boundary_color, use_thumbnail: false ).boundaries
       irt = image.crop(*vertical.for(1), true)
-      right = self.class.new(target_image: irt, target_layout: :horizontal_split, boundary_color: @boundary_color, use_thumbnail: false ).boundaries
+      right = self.class.new(image: irt, layout: :horizontal_split, boundary_color: boundary_color, use_thumbnail: false ).boundaries
       boundaries.set(0, vertical.for(0))
       boundaries.set(1, [ vertical.x_for(1), 0, right.width_for(0), right.height_for(0) ] )
       boundaries.set(2, [ vertical.x_for(1), right.y_for(1), right.width_for(1), right.height_for(1)] )
     when :vertical_offset_cross   # 4 zones expected, with (varying) horizontal division in left- and right- sides of vertical division
-      vertical = self.class.new(target_image: @image, target_layout: :vertical_split, boundary_color: @boundary_color, use_thumbnail: false).boundaries
+      vertical = self.class.new(image: @image, layout: :vertical_split, boundary_color: boundary_color, use_thumbnail: false).boundaries
       ilt = image.crop(*vertical.for(0), true)
       irt = image.crop(*vertical.for(1), true)
-      left = self.class.new(target_image: ilt, target_layout: :horizontal_split, boundary_color: @boundary_color, use_thumbnail: false).boundaries   # fails
-      right = self.class.new(target_image: irt, target_layout: :horizontal_split, boundary_color: @boundary_color, use_thumbnail: false ).boundaries # OK
+      left = self.class.new(image: ilt, layout: :horizontal_split, boundary_color: boundary_color, use_thumbnail: false).boundaries   # fails
+      right = self.class.new(image: irt, layout: :horizontal_split, boundary_color: boundary_color, use_thumbnail: false ).boundaries # OK
       boundaries.set(0, [0, 0, left.width_for(0), left.height_for(0) ])
       boundaries.set(1, [vertical.x_for(1), 0, right.width_for(0), right.height_for(0) ])
       boundaries.set(2, [vertical.x_for(1), right.y_for(1), right.width_for(1), right.height_for(1) ])
       boundaries.set(3, [0, left.y_for(1), left.width_for(1), left.height_for(1) ])
-    # No specs for this yet
+      # No specs for this yet
     when :horizontal_offset_cross
-      horizontal = self.class.new(target_image: @image, target_layout: :horizontal_split, boundary_color: @boundary_color, use_thumbnail: false ).boundaries
+      horizontal = self.class.new(image: @image, layout: :horizontal_split, boundary_color: boundary_color, use_thumbnail: false ).boundaries
       itop = image.crop(*horizontal.for(0), true)
       ibottom = image.crop(*horizontal.for(1), true)
-      top = self.class.new(target_image: ilt, target_layout: :vertical_split, boundary_color: @boundary_color, use_thumbnail: false ).boundaries
-      bottom = self.class.new(target_image: irt, target_layout: :vertical_split, boundary_color: @boundary_color, use_thumbnail: false ).boundaries
+      top = self.class.new(image: ilt, layout: :vertical_split, boundary_color: boundary_color, use_thumbnail: false ).boundaries
+      bottom = self.class.new(image: irt, layout: :vertical_split, boundary_color: boundary_color, use_thumbnail: false ).boundaries
       boundaries.set(0, [0, 0, top.width_for(0), top.height_for(0) ])
       boundaries.set(1, [top.x_for(1), 0, top.width_for(1), top.height_for(1) ])
@@ -99,9 +109,9 @@ class Sqed::BoundaryFinder::ColorLineFinder < Sqed::BoundaryFinder
       boundaries.set(3, [0, horizontal.y_for(1), bottom.width_for(0), bottom.height_for(0) ])
     when :cross # 4 zones, with perfectly intersected horizontal and vertical division
-      v = self.class.new(target_image: @image, target_layout: :vertical_split, boundary_color: @boundary_color, use_thumbnail: false ).boundaries
-      h = self.class.new(target_image: @image, target_layout: :horizontal_split, boundary_color: @boundary_color, use_thumbnail: false).boundaries
+      v = self.class.new(image: @image, layout: :vertical_split, boundary_color: boundary_color, use_thumbnail: false ).boundaries
+      h = self.class.new(image: @image, layout: :horizontal_split, boundary_color: boundary_color, use_thumbnail: false).boundaries
       return if v.nil? || h.nil?
       boundaries.set(0, [0,0, v.width_for(0), h.height_for(0) ])
@@ -116,13 +126,11 @@ class Sqed::BoundaryFinder::ColorLineFinder < Sqed::BoundaryFinder
     boundaries.complete = true if boundaries.populated?
-   if use_thumbnail
-     @image = @original_image
-     zoom_boundaries
-     @original_image = nil
-   end
+    if use_thumbnail
+      @image = @original_image
+      zoom_boundaries
+      @original_image = nil
+    end
   end
 end

data/lib/sqed/boundary_finder/cross_finder.rb CHANGED

@@ -4,9 +4,9 @@ require 'rmagick'
 #
 class Sqed::BoundaryFinder::CrossFinder < Sqed::BoundaryFinder
-  def initialize(target_image: image)
-    @image = target_image
-    find_edges
+  def initialize(**opts)
+    @image = opts[:image]
+    find_edges
   end
   def find_edges

data/lib/sqed/boundary_finder/stage_finder.rb CHANGED

@@ -15,8 +15,13 @@ class Sqed::BoundaryFinder::StageFinder < Sqed::BoundaryFinder
   attr_reader :x0, :y0, :x1, :y1, :min_width, :min_height, :rows, :columns
-  def initialize(target_image: image, is_border_proc: nil, min_ratio: MIN_CROP_RATIO)
-    super(target_image: target_image, target_layout: :internal_box)
+  def initialize(**opts)
+    image = opts[:image]
+    is_border_proc = opts[:is_border_proc]
+    min_ratio = opts[:min_ratio]
+    min_ratio ||= MIN_CROP_RATIO
+    super(image: image, layout: :internal_box)
     @min_ratio = min_ratio
@@ -26,7 +31,7 @@ class Sqed::BoundaryFinder::StageFinder < Sqed::BoundaryFinder
     @min_width, @min_height = image.columns * @min_ratio, image.rows * @min_ratio # minimum resultant area
     @columns, @rows = image.columns, image.rows
     # We need a border finder proc. Provide one if none was given.
     @is_border = is_border_proc || self.class.default_border_finder(image) # if no proc specified, use default below

data/lib/sqed/extractor.rb CHANGED

@@ -1,62 +1,59 @@
 require 'rmagick'
-# An Extractor takes Boundries object and a metadata_map and returns a Sqed::Result
-#
-class Sqed::Extractor
+class Sqed
+  # An Extractor takes Boundaries object and a metadata_map and returns a Sqed::Result
+  #
+  class Extractor
   class Error < StandardError; end;
-  # a Sqed::Boundaries instance
+  # a Sqed::Boundaries instance
   attr_accessor :boundaries
+  # @return [Hash] like `{0 => :annotated_specimen, 1 => :identifier, 2 => :image_registration }`
   # a metadata_map hash from EXTRACTION_PATTERNS like:
-  #   {0 => :annotated_specimen, 1 => :identifier, 2 =>:image_registration }
   attr_accessor :metadata_map
-  # a Magick::Image file
+  # @return [Magick::Image file]
   attr_accessor :image
-  def initialize(target_boundaries: nil, target_metadata_map: nil, target_image: nil)
-    raise Error, 'target_boundaries not provided or provided boundary is not a Sqed::Boundaries' if target_boundaries.nil? || !target_boundaries.class == Sqed::Boundaries
-    raise Error, 'target_metadata_map not provided or target_metadata_map not a Hash' if target_metadata_map.nil? || !target_metadata_map.class == Hash
-    raise Error, 'target_image not provided' if target_image.nil? || !target_image.class.name == 'Magick::Image'
+  def initialize(**opts)
+    @metadata_map = opts[:metadata_map]
+    @boundaries = opts[:boundaries]
+    @image = opts[:image]
-    @metadata_map = target_metadata_map
-    @boundaries = target_boundaries
-    @image = target_image
+    raise Error, 'boundaries not provided or provided boundary is not a Sqed::Boundaries' if boundaries.nil? || !boundaries.class == Sqed::Boundaries
+    raise Error, 'metadata_map not provided or metadata_map not a Hash' if metadata_map.nil? || !metadata_map.class == Hash
+    raise Error, 'image not provided' if image.nil? || !image.class.name == 'Magick::Image'
   end
   def result
-    r = Sqed::Result.new()
+    r = Sqed::Result.new
     r.sections = metadata_map.values.sort
     # assign the images to the result
     boundaries.each do |section_index, coords|
       section_type = metadata_map[section_index]
-      # TODO: raise this higher up the chain
-      raise Error, "invalid section_type [#{section_type}]" if !SqedConfig::LAYOUT_SECTION_TYPES.include?(section_type)
       r.send("#{section_type}_image=", extract_image(coords))
       r.boundary_coordinates[section_type] = coords
-    end
+    end
     # assign the metadata to the result
     metadata_map.each do |section_index, section_type|
       # only extract data if a parser exists
       if parsers = SqedConfig::SECTION_PARSERS[section_type]
         section_image = r.send("#{section_type}_image")
         updated = r.send(section_type)
         parsers.each do |p|
-          parsed_result = p.new(section_image).text(section_type: section_type)
-          updated.merge!(p::TYPE => parsed_result) if parsed_result
+          parsed_result = p.new(section_image).get_text(section_type: section_type)
+          updated[p::TYPE] = parsed_result if parsed_result && parsed_result.length > 0
         end
-        r.send("#{section_type}=", updated)
+        r.send("#{section_type}=", updated)
       end
     end
@@ -65,7 +62,8 @@ class Sqed::Extractor
   # crop takes x, y, width, height
   def extract_image(coords)
-    i = @image.crop(*coords, true)
+    @image.crop(*coords, true)
   end
+  end
 end

data/lib/sqed/parser.rb CHANGED

@@ -3,17 +3,14 @@
 # Base class for Parsers
 #
 class Sqed::Parser
   attr_accessor :image
+  attr_accessor :extracted_text
   def initialize(image)
-    @image = image
+    @image = image
     raise 'no image provided to parser' if @image && !(@image.class.name == 'Magick::Image')
   end
-  # TODO: is this required?!j
-  # must be provided in subclasses
-  def text(section_type: :default)
-    nil
-  end
 end

data/lib/sqed/parser/barcode_parser.rb CHANGED

@@ -1,12 +1,11 @@
 # Given an image, return an ordered array of detectable barcodes
-#
+#
+# !! DOES NOTHING !!
 #
 class Sqed::Parser::BarcodeParser < Sqed::Parser
   TYPE = :barcode
-  attr_accessor :image
   attr_accessor :barcode
   def initialize(image)
@@ -28,15 +27,16 @@ class Sqed::Parser::BarcodeParser < Sqed::Parser
   # try a bunch of options, organized by most common,  give the first hit
   def get_barcode
     [get_code_128].compact.first
-  end
+  end
  #def get_datamatrix
  #  https://github.com/srijan/ruby-dmtx
  #end
   # alias to a universal method
-  def text(section_type: :default)
+  def get_text(section_type: :default)
     barcode
   end
 end

data/lib/sqed/parser/ocr_parser.rb CHANGED

@@ -1,3 +1,5 @@
+require 'rtesseract'
 # encoding: UTF-8
 #
 # Given a single image return all text in that image.
@@ -17,49 +19,51 @@
 # Below an x-height of 10 pixels, you have very little chance of accurate results,
 # and below about 8 pixels, most of the text will be "noise removed".
 #
-require 'rtesseract'
 class Sqed::Parser::OcrParser < Sqed::Parser
   TYPE = :text
+  # Other experimented with default params
+  #      classify_debug_level: 5,
+  #      lang: 'eng',
+  #      load_system_dawg: 0,
+  #      load_unambig_dawg: 0,
+  #      load_freq_dawg: 0,
+  #      load_fixed_length_dawgs: 0,
+  #      load_number_dawg: 0,
+  #      load_punc_dawg: 1, ## important
+  #      load_unambig_dawg: 1,
+  #      chop_enable: 0,
+  #     enable_new_segsearch: 1,
+  #     tessedit_debug_quality_metrics: 1,
+  #     tessedit_write_params_to_file: 'tmp/ocr_config_file.txt',
+  #     tessedit_write_images: 1,
+  #     equationdetect_save_merged_image: 1,
+  #     tessedit_dump_pageseg_images: 1,
+  #     equationdetect_save_bi_image: 1
   # Tesseract parameters default/specific to section type,
   # default is merged into the type
   SECTION_PARAMS = {
     default: {
-      psm: 3,
-#      classify_debug_level: 5,
-#      lang: 'eng',
-#      load_system_dawg: 0,
-#      load_unambig_dawg: 0,
-#      load_freq_dawg: 0,
-#      load_fixed_length_dawgs: 0,
-#      load_number_dawg: 0,
-#      load_punc_dawg: 1, ## important
-#      load_unambig_dawg: 1,
-#      chop_enable: 0,
-#     enable_new_segsearch: 1,
-#     tessedit_debug_quality_metrics: 1,
-#     tessedit_write_params_to_file: 'tmp/ocr_config_file.txt',
-#     tessedit_write_images: 1,
-#     equationdetect_save_merged_image: 1,
-#     tessedit_dump_pageseg_images: 1,
-#     equationdetect_save_bi_image: 1
+      psm: 3
     },
     annotated_specimen: {
-      edges_children_count_limit: 3000 # was 45, significantly improves annotated_specimen for odontates
+      # was 45, significantly improves annotated_specimen for odontates
+      edges_children_count_limit: 3000
     },
     identifier: {
       psm: 1,
       # tessedit_char_whitelist: '0123456789'
       #  edges_children_count_limit: 4000
-    },
+    },
     curator_metadata: {
+      psm: 3
     },
     labels: {
       psm: 3, # may need to be 6
     },
-    deterimination_labels: {
+    determination_labels: {
       psm: 3
     },
     other_labels: {
@@ -68,12 +72,7 @@ class Sqed::Parser::OcrParser < Sqed::Parser
     collecting_event_labels: {
       psm: 3
     }
-  }
-  # the text extracted from the image
-  attr_accessor :text
+  }.freeze
   # future consideration
   # def enhance_image(img)
@@ -102,35 +101,36 @@ class Sqed::Parser::OcrParser < Sqed::Parser
   # img = img.white_threshold(245)
   # img
   # end
   # @return [String]
-  #   the ocr text
-  def text(section_type: :default)
-    img = @image
+  #   the ocr text
+  def get_text(section_type: :default)
+    img = image
     # resample if an image 4"x4" is less than 300dpi
     if img.columns * img.rows < 144000
       img = img.resample(300)
     end
-    params = SECTION_PARAMS[:default].merge(SECTION_PARAMS[section_type])
-    r = RTesseract.new(img, params)
-    @text = r.to_s.strip
-    if @text == ""
+    params = SECTION_PARAMS[:default].dup
+    params.merge!(SECTION_PARAMS[section_type])
+    r = RTesseract.new(img, params)
+    @extracted_text = r.to_s.strip
+    if @extracted_text == ''
       img = img.white_threshold(245)
-      r = RTesseract.new(img, params)
-      @text = r.to_s.strip
+      r = RTesseract.new(img, params)
+      @extracted_text = r.to_s.strip
     end
-    if @text == ""
+    if @extracted_text == ''
       img = img.quantize(256,Magick::GRAYColorspace)
-      r = RTesseract.new(img, params)
-      @text = r.to_s.strip
+      r = RTesseract.new(img, params)
+      @extracted_text = r.to_s.strip
     end
-    @text
+    @extracted_text
   end
 end