sqed 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +27 -0
  3. data/.rspec +2 -0
  4. data/.travis.yml +18 -0
  5. data/Gemfile +7 -0
  6. data/LICENSE.txt +22 -0
  7. data/README.md +36 -0
  8. data/Rakefile +9 -0
  9. data/lib/sqed.rb +111 -0
  10. data/lib/sqed/boundaries.rb +79 -0
  11. data/lib/sqed/boundary_finder.rb +150 -0
  12. data/lib/sqed/boundary_finder/color_line_finder.rb +83 -0
  13. data/lib/sqed/boundary_finder/cross_finder.rb +23 -0
  14. data/lib/sqed/boundary_finder/stage_finder.rb +139 -0
  15. data/lib/sqed/extractor.rb +45 -0
  16. data/lib/sqed/parser.rb +11 -0
  17. data/lib/sqed/parser/barcode_parser.rb +27 -0
  18. data/lib/sqed/parser/ocr_parser.rb +52 -0
  19. data/lib/sqed/result.rb +15 -0
  20. data/lib/sqed/version.rb +3 -0
  21. data/lib/sqed_config.rb +112 -0
  22. data/spec/lib/sqed/boundaries_spec.rb +35 -0
  23. data/spec/lib/sqed/boundary_finder/color_line_finder_spec.rb +167 -0
  24. data/spec/lib/sqed/boundary_finder/cross_finder_spec.rb +28 -0
  25. data/spec/lib/sqed/boundary_finder/stage_finder_spec.rb +9 -0
  26. data/spec/lib/sqed/boundary_finder_spec.rb +108 -0
  27. data/spec/lib/sqed/extractor_spec.rb +82 -0
  28. data/spec/lib/sqed/parser_spec.rb +6 -0
  29. data/spec/lib/sqed/result_spec.rb +17 -0
  30. data/spec/lib/sqed_spec.rb +200 -0
  31. data/spec/spec_helper.rb +34 -0
  32. data/spec/support/files/2Dbarcode.png +0 -0
  33. data/spec/support/files/CrossyBlackLinesSpecimen.jpg +0 -0
  34. data/spec/support/files/CrossyGreenLinesSpecimen.jpg +0 -0
  35. data/spec/support/files/Quadrant_2_3.jpg +0 -0
  36. data/spec/support/files/black_stage_green_line_specimen.jpg +0 -0
  37. data/spec/support/files/boundary_cross_green.jpg +0 -0
  38. data/spec/support/files/boundary_left_t_yellow.jpg +0 -0
  39. data/spec/support/files/boundary_offset_cross_red.jpg +0 -0
  40. data/spec/support/files/boundary_right_t_green.jpg +0 -0
  41. data/spec/support/files/greenlineimage.jpg +0 -0
  42. data/spec/support/files/label_images/black_stage_green_line_specimen_label.jpg +0 -0
  43. data/spec/support/files/test0.jpg +0 -0
  44. data/spec/support/files/test1.jpg +0 -0
  45. data/spec/support/files/test2.jpg +0 -0
  46. data/spec/support/files/test3.jpg +0 -0
  47. data/spec/support/files/test4.jpg +0 -0
  48. data/spec/support/files/test4OLD.jpg +0 -0
  49. data/spec/support/files/test_barcode.JPG +0 -0
  50. data/spec/support/files/test_ocr0.jpg +0 -0
  51. data/spec/support/files/types_21.jpg +0 -0
  52. data/spec/support/files/types_8.jpg +0 -0
  53. data/spec/support/image_helpers.rb +78 -0
  54. data/sqed.gemspec +31 -0
  55. metadata +244 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 5e49c9eab8df51ee536b44949c954468cb3d7705
4
+ data.tar.gz: 342dd83674453cc664ddff07ed5dad5b1f897507
5
+ SHA512:
6
+ metadata.gz: dfc0fd88f66f86da1e0f1a848c91fa13afdf678a7b07dcea1cd1cfbab8a3962a862399f67c09379f6939b38de51f21d69ffd948c4b89e6ffa97fe76a4c8177bf
7
+ data.tar.gz: 2b3eb4475971484a0052c2ca000962f6d9818394fff227edb6a5140c437822166fc81845d592f295bd82b65fb2bbd9d4be70ee6f46b9340991fa63df90d11c6b
data/.gitignore ADDED
@@ -0,0 +1,27 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
18
+
19
+ *~
20
+ .DS_Store
21
+ *.swp
22
+ .idea/
23
+ local/
24
+ foo*.jpg
25
+ SessionID_BarcodeImage.JPG
26
+
27
+ /*.jpg
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --color
2
+ --format progress
data/.travis.yml ADDED
@@ -0,0 +1,18 @@
1
+
2
+ #bundler_args: --without development
3
+ language: ruby
4
+ rvm:
5
+ - 2.1.2
6
+ #before_install:
7
+ # - sudo add-apt-repository -y ppa:moti-p/cc
8
+ # - sudo apt-get update
9
+ # - sudo apt-get -y --reinstall install imagemagick
10
+ # - printf "\n" | pecl install imagick-beta
11
+ branches:
12
+ only:
13
+ - master
14
+ notifications:
15
+ email:
16
+ - diapriid@gmail.com
17
+ - jrflood@illinois.edu
18
+ on_failure: change
data/Gemfile ADDED
@@ -0,0 +1,7 @@
1
+
2
+ source 'https://rubygems.org'
3
+
4
+ # Specify your gem's dependencies in sqed.gemspec
5
+ gemspec
6
+
7
+
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2014 Matt Yoder
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,36 @@
1
+
2
+ [![Continuous Integration Status][1]][2]
3
+
4
+ # Sqed
5
+
6
+ Stub for a gem that supports specimen digitization from images.
7
+
8
+ ## Installation
9
+
10
+ Add this line to your application's Gemfile:
11
+
12
+ gem 'sqed'
13
+
14
+ And then execute:
15
+
16
+ $ bundle
17
+
18
+ Or install it yourself as:
19
+
20
+ $ gem install sqed
21
+
22
+ ## Usage
23
+
24
+ TODO: Write usage instructions here
25
+
26
+ ## Contributing
27
+
28
+ 1. Fork it ( http://github.com/SpeciesFileGroup/sqed/fork )
29
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
30
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
31
+ 4. Push to the branch (`git push origin my-new-feature`)
32
+ 5. Create new Pull Request
33
+
34
+ [1]: https://secure.travis-ci.org/SpeciesFileGroup/sqed.png?branch=master
35
+ [2]: http://travis-ci.org/SpeciesFileGroup/sqed?branch=master
36
+
data/Rakefile ADDED
@@ -0,0 +1,9 @@
1
+ #!/usr/bin/env rake
2
+ require "bundler/gem_tasks"
3
+
4
+ require 'rspec/core/rake_task'
5
+
6
+ RSpec::Core::RakeTask.new('spec')
7
+
8
+ task :default => :spec
9
+
data/lib/sqed.rb ADDED
@@ -0,0 +1,111 @@
1
+ # encoding: UTF-8
2
+
3
+ recent_ruby = RUBY_VERSION >= '2.1.1'
4
+ raise "IMPORTANT: sqed gem requires ruby >= 2.1.1" unless recent_ruby
5
+
6
+ require "RMagick"
7
+ include Magick
8
+ require_relative 'sqed_config'
9
+ require_relative "sqed/extractor"
10
+ require_relative "sqed/result"
11
+
12
+ # Instants take the following
13
+ # 1) A base image @image
14
+ # 2) A target extraction pattern
15
+ #
16
+ # Return a Sqed::Result
17
+ #
18
+ # a = Sqed.new(pattern: :right_t, image: image)
19
+ # b = a.result # => Sqed::Result instance
20
+ #
21
+ class Sqed
22
+ # initial image which is an instance of ImageMagick::image, containing background and stage, or just stage
23
+ attr_accessor :image
24
+
25
+ # the particular arrangement of the content, a symbol taken from SqedConfig::EXTRACTION_PATTERNS
26
+ attr_accessor :pattern
27
+
28
+ # the image that is the cropped content for parsing
29
+ attr_accessor :stage_image, :stage_boundary, :boundaries, :auto_detect_border, :boundary_color
30
+
31
+ def initialize(image: image, pattern: pattern, auto_detect_border: true, boundary_color: :green)
32
+ @image = image
33
+
34
+ @boundaries = nil
35
+ @stage_boundary = Sqed::Boundaries.new(:internal_box) # a.k.a. stage
36
+
37
+ @auto_detect_border = auto_detect_border
38
+
39
+ @pattern = pattern
40
+ @pattern ||= :standard_cross
41
+
42
+ @boundary_color = boundary_color
43
+
44
+ set_stage_boundary if @auto_detect_border && @image
45
+ end
46
+
47
+ # This handles the case of
48
+ # s = Sqed.new() # no image: @some_image on init
49
+ # s.image = @some_image
50
+ #
51
+ def image=(value)
52
+ @image = value
53
+ set_stage_boundary if @auto_detect_border
54
+ end
55
+
56
+ def boundaries(force = false)
57
+ @boundaries = get_section_boundaries if @boundaries.nil? || force
58
+ @boundaries
59
+ end
60
+
61
+ def native_boundaries
62
+ # check for @boundaries.complete first? OR handle partial detections ?!
63
+ if @boundaries.complete
64
+ @boundaries.offset(@stage_boundary)
65
+ else
66
+ nil
67
+ end
68
+ end
69
+
70
+ def stage_image
71
+ crop_image if @stage_boundary.complete && @stage_image.nil?
72
+ @stage_image
73
+ end
74
+
75
+ def crop_image
76
+ if @stage_boundary.complete
77
+ @stage_image = @image.crop(*@stage_boundary.for(SqedConfig.index_for_section_type(:stage, :stage)))
78
+ else
79
+ @stage_image = @image
80
+ end
81
+ end
82
+
83
+ def result
84
+ return false if @image.nil? || @pattern.nil?
85
+ crop_image
86
+ extractor = Sqed::Extractor.new(
87
+ boundaries: @boundaries,
88
+ layout: SqedConfig::EXTRACTION_PATTERNS[@pattern][:layout],
89
+ image: @stage_image)
90
+ extractor.result
91
+ end
92
+
93
+ protected
94
+
95
+ def set_stage_boundary
96
+ @stage_boundary = Sqed::BoundaryFinder::StageFinder.new(image: @image).boundaries
97
+ if !@stage_boundary.complete
98
+ @stage_boundary.coordinates[0] = [0, 0, @image.columns, @image.rows]
99
+ end
100
+ end
101
+
102
+ def get_section_boundaries
103
+ boundary_finder_class = SqedConfig::EXTRACTION_PATTERNS[@pattern][:boundary_finder]
104
+ options = {image: stage_image}
105
+ options.merge!( layout: SqedConfig::EXTRACTION_PATTERNS[@pattern][:layout] ) unless boundary_finder_class.name == 'Sqed::BoundaryFinder::CrossFinder'
106
+ options.merge!( boundary_color: @boundary_color) if boundary_finder_class.name == 'Sqed::BoundaryFinder::ColorLineFinder'
107
+
108
+ boundary_finder_class.new(options).boundaries
109
+ end
110
+
111
+ end
@@ -0,0 +1,79 @@
1
+ # An Sqed::Boundaries is a simple wrapper for a hash that contains the co-ordinates for each section of a layout.
2
+
3
+ # Layouts are Hashes defined in EXTRACTION_PATTERNS[<pattern>][<layout>]
4
+ #
5
+ class Sqed::Boundaries
6
+ include Enumerable
7
+
8
+ # stores a hash
9
+ # References the section by integer index!
10
+ # In the pattern integer => [x1,y1, width, height] (ImageMagick convention rectangle descriptors)
11
+ # e.g.
12
+ # 0 => [10,10,40,40]
13
+ attr_reader :coordinates
14
+
15
+ # An Sqed::Config::EXTRACTION_PATTERN layout
16
+ attr_accessor :layout
17
+
18
+ # Whether or not the last method to populate this object passed fully
19
+ attr_accessor :complete
20
+
21
+ def initialize(layout = nil)
22
+ @complete = false
23
+
24
+ @layout = layout
25
+ @coordinates = {}
26
+ initialize_coordinates if !@layout.nil?
27
+ end
28
+
29
+ def initialize_coordinates
30
+ SqedConfig::LAYOUTS[@layout].each do |k|
31
+ @coordinates.merge!(k => [nil, nil, nil, nil] )
32
+ end
33
+ end
34
+
35
+ def offset(boundary)
36
+ b = Sqed::Boundaries.new() # the idea here is to create a deep copy of self, offsetting by boundary as we go
37
+ (0..self.coordinates.length - 1).each do |i|
38
+ b.coordinates[i] = [] # create the instance of the i-th coordinate, then populate it
39
+ b.coordinates[i][0] = self.x_for(i) + boundary.x_for(0)
40
+ b.coordinates[i][1] = self.y_for(i) + boundary.y_for(0)
41
+ b.coordinates[i][2] = self.width_for(i)
42
+ b.coordinates[i][3] = self.height_for(i)
43
+ end
44
+ b.complete = self.complete
45
+ b
46
+ end
47
+
48
+ def for(section)
49
+ @coordinates[section]
50
+ end
51
+
52
+ def each(&block)
53
+ @coordinates.each do |section_index, coords|
54
+ block.call([section_index, coords])
55
+ end
56
+ end
57
+
58
+ # Overrides Enumerable
59
+ def count
60
+ @coordinates.length
61
+ end
62
+
63
+ def x_for(index)
64
+ @coordinates[index][0]
65
+ end
66
+
67
+ def y_for(index)
68
+ @coordinates[index][1]
69
+ end
70
+
71
+ def width_for(index)
72
+ @coordinates[index][2]
73
+ end
74
+
75
+ def height_for(index)
76
+ @coordinates[index][3]
77
+ end
78
+
79
+ end
@@ -0,0 +1,150 @@
1
+ require 'RMagick'
2
+
3
+ # Sqed Boundary Finders find boundaries on images and return co-ordinates of those boundaries. They do not
4
+ # return derivative images. Finders operate on cropped images, i.e. only the "stage".
5
+ #
6
+ class Sqed::BoundaryFinder
7
+ # the passed image
8
+ attr_reader :img
9
+
10
+ # a symbol from SqedConfig::LAYOUTS
11
+ attr_reader :layout
12
+
13
+ # A Sqed::Boundaries instance, stores the coordinates of all of the layout sections
14
+ attr_reader :boundaries
15
+
16
+ def initialize(image: image, layout: layout)
17
+ raise 'No layout provided.' if layout.nil?
18
+ raise 'No image provided.' if image.nil? || image.class != Magick::Image
19
+
20
+ @layout = layout
21
+ @img = image
22
+ true
23
+ end
24
+
25
+ # Returns a Sqed::Boundaries instance initialized to the number of sections in the passed layout.
26
+ def boundaries
27
+ @boundaries ||= Sqed::Boundaries.new(@layout)
28
+ end
29
+
30
+ # @return
31
+ # the column (x position) in the middle of the single green vertical line dividing the stage
32
+ #
33
+ # @param image
34
+ # the image to sample
35
+ #
36
+ # @param sample_subdivision_size
37
+ # an Integer, the distance in pixels b/w samples
38
+ #
39
+ # @param sample_cutoff_factor: (0.0-1.0)
40
+ # if provided over-rides the default cutoff calculation by reducing the number of pixels required to be considered a border hit
41
+ # - for example, if you have an image of height 100 pixels, and a sample_subdivision_size of 10, and a sample_cutoff_factor of .8
42
+ # then only posititions with 8 ((100/10)*.8) or more hits
43
+ # - when nil the cutoff defaults to the maximum of the pairwise difference between hit counts
44
+ #
45
+ # @param scan
46
+ # (:rows|:columns), :rows finds vertical borders, :columns finds horizontal borders
47
+ #
48
+ def self.color_boundary_finder(image: image, sample_subdivision_size: 10, sample_cutoff_factor: nil, scan: :rows, boundary_color: :green)
49
+ border_hits = {}
50
+ samples_to_take = (image.send(scan) / sample_subdivision_size).to_i - 1
51
+
52
+ (0..samples_to_take).each do |s|
53
+ # Create a sample image a single pixel tall
54
+ if scan == :rows
55
+ j = image.crop(0, s * sample_subdivision_size, image.columns, 1)
56
+ elsif scan == :columns
57
+ j = image.crop(s * sample_subdivision_size, 0, 1, image.rows)
58
+ else
59
+ raise
60
+ end
61
+
62
+ j.each_pixel do |pixel, c, r|
63
+ index = ( (scan == :rows) ? c : r)
64
+
65
+ # Our hit metric is dirt simple, if there is some percentage more of the boundary_color than the others, count + 1 for that column
66
+ if send("is_#{boundary_color}?", pixel)
67
+ # we have already hit that column previously, increment
68
+ if border_hits[index]
69
+ border_hits[index] += 1
70
+ # initialize the newly hit column 1
71
+ else
72
+ border_hits[index] = 1
73
+ end
74
+ end
75
+ end
76
+ end
77
+
78
+ return nil if border_hits.length < 2
79
+
80
+ if sample_cutoff_factor.nil?
81
+ cutoff = max_difference(border_hits.values)
82
+ else
83
+ cutoff = (samples_to_take * sample_cutoff_factor).to_i
84
+ end
85
+
86
+ frequency_stats(border_hits, cutoff)
87
+ end
88
+
89
+ def self.is_green?(pixel)
90
+ (pixel.green > pixel.red*1.2) && (pixel.green > pixel.blue*1.2)
91
+ end
92
+
93
+ def self.is_blue?(pixel)
94
+ (pixel.blue > pixel.red*1.2) && (pixel.blue > pixel.green*1.2)
95
+ end
96
+
97
+ def self.is_red?(pixel)
98
+ (pixel.red > pixel.blue*1.2) && (pixel.red > pixel.green*1.2)
99
+ end
100
+
101
+ def self.is_black?(pixel)
102
+ black_threshold = 65535*0.15 #tune for black
103
+ (pixel.red < black_threshold) && (pixel.blue < black_threshold) && (pixel.green < black_threshold)
104
+ end
105
+
106
+ # Takes a frequency hash of position => count key/values and returns
107
+ # the median position of all positions that have a count greater than the cutoff
108
+ def self.frequency_stats(frequency_hash, sample_cutoff = 0)
109
+ return nil if sample_cutoff.nil? || sample_cutoff < 1
110
+ hit_ranges = []
111
+
112
+ frequency_hash.each do |position, count|
113
+ if count >= sample_cutoff
114
+ hit_ranges.push(position)
115
+ end
116
+ end
117
+
118
+ return nil if hit_ranges.size < 3
119
+
120
+ # we have to sort because the keys (positions) we examined came unordered from a hash originally
121
+ hit_ranges.sort!
122
+
123
+ # return the position exactly in the middle of the array
124
+ [hit_ranges.first, hit_ranges[(hit_ranges.length / 2).to_i], hit_ranges.last]
125
+ end
126
+
127
+ # Returns an Integer, the maximum of the pairwise differences of the values in the array
128
+ # For example, given
129
+ # [1,2,3,9,6,2,0]
130
+ # The resulting pairwise array is
131
+ # [1,1,6,3,4,2]
132
+ # The max (value returned) is
133
+ # 6
134
+ def self.max_pairwise_difference(array)
135
+ (0..array.length-2).map{|i| (array[i] - array[i+1]).abs }.max
136
+ end
137
+
138
+ def self.max_difference(array)
139
+ array.max - array.min
140
+ end
141
+
142
+ def self.derivative_signs(array)
143
+ (0..array.length-2).map { |i| (array[i+1] - array[i]) <=> 0 }
144
+ end
145
+
146
+ def self.derivative(array)
147
+ (0..array.length-2).map { |i| array[i+1] - array[i] }
148
+ end
149
+
150
+ end