sqed 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (55) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +27 -0
  3. data/.rspec +2 -0
  4. data/.travis.yml +18 -0
  5. data/Gemfile +7 -0
  6. data/LICENSE.txt +22 -0
  7. data/README.md +36 -0
  8. data/Rakefile +9 -0
  9. data/lib/sqed.rb +111 -0
  10. data/lib/sqed/boundaries.rb +79 -0
  11. data/lib/sqed/boundary_finder.rb +150 -0
  12. data/lib/sqed/boundary_finder/color_line_finder.rb +83 -0
  13. data/lib/sqed/boundary_finder/cross_finder.rb +23 -0
  14. data/lib/sqed/boundary_finder/stage_finder.rb +139 -0
  15. data/lib/sqed/extractor.rb +45 -0
  16. data/lib/sqed/parser.rb +11 -0
  17. data/lib/sqed/parser/barcode_parser.rb +27 -0
  18. data/lib/sqed/parser/ocr_parser.rb +52 -0
  19. data/lib/sqed/result.rb +15 -0
  20. data/lib/sqed/version.rb +3 -0
  21. data/lib/sqed_config.rb +112 -0
  22. data/spec/lib/sqed/boundaries_spec.rb +35 -0
  23. data/spec/lib/sqed/boundary_finder/color_line_finder_spec.rb +167 -0
  24. data/spec/lib/sqed/boundary_finder/cross_finder_spec.rb +28 -0
  25. data/spec/lib/sqed/boundary_finder/stage_finder_spec.rb +9 -0
  26. data/spec/lib/sqed/boundary_finder_spec.rb +108 -0
  27. data/spec/lib/sqed/extractor_spec.rb +82 -0
  28. data/spec/lib/sqed/parser_spec.rb +6 -0
  29. data/spec/lib/sqed/result_spec.rb +17 -0
  30. data/spec/lib/sqed_spec.rb +200 -0
  31. data/spec/spec_helper.rb +34 -0
  32. data/spec/support/files/2Dbarcode.png +0 -0
  33. data/spec/support/files/CrossyBlackLinesSpecimen.jpg +0 -0
  34. data/spec/support/files/CrossyGreenLinesSpecimen.jpg +0 -0
  35. data/spec/support/files/Quadrant_2_3.jpg +0 -0
  36. data/spec/support/files/black_stage_green_line_specimen.jpg +0 -0
  37. data/spec/support/files/boundary_cross_green.jpg +0 -0
  38. data/spec/support/files/boundary_left_t_yellow.jpg +0 -0
  39. data/spec/support/files/boundary_offset_cross_red.jpg +0 -0
  40. data/spec/support/files/boundary_right_t_green.jpg +0 -0
  41. data/spec/support/files/greenlineimage.jpg +0 -0
  42. data/spec/support/files/label_images/black_stage_green_line_specimen_label.jpg +0 -0
  43. data/spec/support/files/test0.jpg +0 -0
  44. data/spec/support/files/test1.jpg +0 -0
  45. data/spec/support/files/test2.jpg +0 -0
  46. data/spec/support/files/test3.jpg +0 -0
  47. data/spec/support/files/test4.jpg +0 -0
  48. data/spec/support/files/test4OLD.jpg +0 -0
  49. data/spec/support/files/test_barcode.JPG +0 -0
  50. data/spec/support/files/test_ocr0.jpg +0 -0
  51. data/spec/support/files/types_21.jpg +0 -0
  52. data/spec/support/files/types_8.jpg +0 -0
  53. data/spec/support/image_helpers.rb +78 -0
  54. data/sqed.gemspec +31 -0
  55. metadata +244 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 5e49c9eab8df51ee536b44949c954468cb3d7705
4
+ data.tar.gz: 342dd83674453cc664ddff07ed5dad5b1f897507
5
+ SHA512:
6
+ metadata.gz: dfc0fd88f66f86da1e0f1a848c91fa13afdf678a7b07dcea1cd1cfbab8a3962a862399f67c09379f6939b38de51f21d69ffd948c4b89e6ffa97fe76a4c8177bf
7
+ data.tar.gz: 2b3eb4475971484a0052c2ca000962f6d9818394fff227edb6a5140c437822166fc81845d592f295bd82b65fb2bbd9d4be70ee6f46b9340991fa63df90d11c6b
data/.gitignore ADDED
@@ -0,0 +1,27 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
18
+
19
+ *~
20
+ .DS_Store
21
+ *.swp
22
+ .idea/
23
+ local/
24
+ foo*.jpg
25
+ SessionID_BarcodeImage.JPG
26
+
27
+ /*.jpg
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --color
2
+ --format progress
data/.travis.yml ADDED
@@ -0,0 +1,18 @@
1
+
2
+ #bundler_args: --without development
3
+ language: ruby
4
+ rvm:
5
+ - 2.1.2
6
+ #before_install:
7
+ # - sudo add-apt-repository -y ppa:moti-p/cc
8
+ # - sudo apt-get update
9
+ # - sudo apt-get -y --reinstall install imagemagick
10
+ # - printf "\n" | pecl install imagick-beta
11
+ branches:
12
+ only:
13
+ - master
14
+ notifications:
15
+ email:
16
+ - diapriid@gmail.com
17
+ - jrflood@illinois.edu
18
+ on_failure: change
data/Gemfile ADDED
@@ -0,0 +1,7 @@
1
+
2
+ source 'https://rubygems.org'
3
+
4
+ # Specify your gem's dependencies in sqed.gemspec
5
+ gemspec
6
+
7
+
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2014 Matt Yoder
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,36 @@
1
+
2
+ [![Continuous Integration Status][1]][2]
3
+
4
+ # Sqed
5
+
6
+ Stub for a gem that supports specimen digitization from images.
7
+
8
+ ## Installation
9
+
10
+ Add this line to your application's Gemfile:
11
+
12
+ gem 'sqed'
13
+
14
+ And then execute:
15
+
16
+ $ bundle
17
+
18
+ Or install it yourself as:
19
+
20
+ $ gem install sqed
21
+
22
+ ## Usage
23
+
24
+ TODO: Write usage instructions here
25
+
26
+ ## Contributing
27
+
28
+ 1. Fork it ( http://github.com/SpeciesFileGroup/sqed/fork )
29
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
30
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
31
+ 4. Push to the branch (`git push origin my-new-feature`)
32
+ 5. Create new Pull Request
33
+
34
+ [1]: https://secure.travis-ci.org/SpeciesFileGroup/sqed.png?branch=master
35
+ [2]: http://travis-ci.org/SpeciesFileGroup/sqed?branch=master
36
+
data/Rakefile ADDED
@@ -0,0 +1,9 @@
1
+ #!/usr/bin/env rake
2
+ require "bundler/gem_tasks"
3
+
4
+ require 'rspec/core/rake_task'
5
+
6
+ RSpec::Core::RakeTask.new('spec')
7
+
8
+ task :default => :spec
9
+
data/lib/sqed.rb ADDED
@@ -0,0 +1,111 @@
1
+ # encoding: UTF-8
2
+
3
+ recent_ruby = RUBY_VERSION >= '2.1.1'
4
+ raise "IMPORTANT: sqed gem requires ruby >= 2.1.1" unless recent_ruby
5
+
6
+ require "RMagick"
7
+ include Magick
8
+ require_relative 'sqed_config'
9
+ require_relative "sqed/extractor"
10
+ require_relative "sqed/result"
11
+
12
+ # Instants take the following
13
+ # 1) A base image @image
14
+ # 2) A target extraction pattern
15
+ #
16
+ # Return a Sqed::Result
17
+ #
18
+ # a = Sqed.new(pattern: :right_t, image: image)
19
+ # b = a.result # => Sqed::Result instance
20
+ #
21
+ class Sqed
22
+ # initial image which is an instance of ImageMagick::image, containing background and stage, or just stage
23
+ attr_accessor :image
24
+
25
+ # the particular arrangement of the content, a symbol taken from SqedConfig::EXTRACTION_PATTERNS
26
+ attr_accessor :pattern
27
+
28
+ # the image that is the cropped content for parsing
29
+ attr_accessor :stage_image, :stage_boundary, :boundaries, :auto_detect_border, :boundary_color
30
+
31
+ def initialize(image: image, pattern: pattern, auto_detect_border: true, boundary_color: :green)
32
+ @image = image
33
+
34
+ @boundaries = nil
35
+ @stage_boundary = Sqed::Boundaries.new(:internal_box) # a.k.a. stage
36
+
37
+ @auto_detect_border = auto_detect_border
38
+
39
+ @pattern = pattern
40
+ @pattern ||= :standard_cross
41
+
42
+ @boundary_color = boundary_color
43
+
44
+ set_stage_boundary if @auto_detect_border && @image
45
+ end
46
+
47
+ # This handles the case of
48
+ # s = Sqed.new() # no image: @some_image on init
49
+ # s.image = @some_image
50
+ #
51
+ def image=(value)
52
+ @image = value
53
+ set_stage_boundary if @auto_detect_border
54
+ end
55
+
56
+ def boundaries(force = false)
57
+ @boundaries = get_section_boundaries if @boundaries.nil? || force
58
+ @boundaries
59
+ end
60
+
61
+ def native_boundaries
62
+ # check for @boundaries.complete first? OR handle partial detections ?!
63
+ if @boundaries.complete
64
+ @boundaries.offset(@stage_boundary)
65
+ else
66
+ nil
67
+ end
68
+ end
69
+
70
+ def stage_image
71
+ crop_image if @stage_boundary.complete && @stage_image.nil?
72
+ @stage_image
73
+ end
74
+
75
+ def crop_image
76
+ if @stage_boundary.complete
77
+ @stage_image = @image.crop(*@stage_boundary.for(SqedConfig.index_for_section_type(:stage, :stage)))
78
+ else
79
+ @stage_image = @image
80
+ end
81
+ end
82
+
83
+ def result
84
+ return false if @image.nil? || @pattern.nil?
85
+ crop_image
86
+ extractor = Sqed::Extractor.new(
87
+ boundaries: @boundaries,
88
+ layout: SqedConfig::EXTRACTION_PATTERNS[@pattern][:layout],
89
+ image: @stage_image)
90
+ extractor.result
91
+ end
92
+
93
+ protected
94
+
95
+ def set_stage_boundary
96
+ @stage_boundary = Sqed::BoundaryFinder::StageFinder.new(image: @image).boundaries
97
+ if !@stage_boundary.complete
98
+ @stage_boundary.coordinates[0] = [0, 0, @image.columns, @image.rows]
99
+ end
100
+ end
101
+
102
+ def get_section_boundaries
103
+ boundary_finder_class = SqedConfig::EXTRACTION_PATTERNS[@pattern][:boundary_finder]
104
+ options = {image: stage_image}
105
+ options.merge!( layout: SqedConfig::EXTRACTION_PATTERNS[@pattern][:layout] ) unless boundary_finder_class.name == 'Sqed::BoundaryFinder::CrossFinder'
106
+ options.merge!( boundary_color: @boundary_color) if boundary_finder_class.name == 'Sqed::BoundaryFinder::ColorLineFinder'
107
+
108
+ boundary_finder_class.new(options).boundaries
109
+ end
110
+
111
+ end
@@ -0,0 +1,79 @@
1
+ # An Sqed::Boundaries is a simple wrapper for a hash that contains the co-ordinates for each section of a layout.
2
+
3
+ # Layouts are Hashes defined in EXTRACTION_PATTERNS[<pattern>][<layout>]
4
+ #
5
+ class Sqed::Boundaries
6
+ include Enumerable
7
+
8
+ # stores a hash
9
+ # References the section by integer index!
10
+ # In the pattern integer => [x1,y1, width, height] (ImageMagick convention rectangle descriptors)
11
+ # e.g.
12
+ # 0 => [10,10,40,40]
13
+ attr_reader :coordinates
14
+
15
+ # An Sqed::Config::EXTRACTION_PATTERN layout
16
+ attr_accessor :layout
17
+
18
+ # Whether or not the last method to populate this object passed fully
19
+ attr_accessor :complete
20
+
21
+ def initialize(layout = nil)
22
+ @complete = false
23
+
24
+ @layout = layout
25
+ @coordinates = {}
26
+ initialize_coordinates if !@layout.nil?
27
+ end
28
+
29
+ def initialize_coordinates
30
+ SqedConfig::LAYOUTS[@layout].each do |k|
31
+ @coordinates.merge!(k => [nil, nil, nil, nil] )
32
+ end
33
+ end
34
+
35
+ def offset(boundary)
36
+ b = Sqed::Boundaries.new() # the idea here is to create a deep copy of self, offsetting by boundary as we go
37
+ (0..self.coordinates.length - 1).each do |i|
38
+ b.coordinates[i] = [] # create the instance of the i-th coordinate, then populate it
39
+ b.coordinates[i][0] = self.x_for(i) + boundary.x_for(0)
40
+ b.coordinates[i][1] = self.y_for(i) + boundary.y_for(0)
41
+ b.coordinates[i][2] = self.width_for(i)
42
+ b.coordinates[i][3] = self.height_for(i)
43
+ end
44
+ b.complete = self.complete
45
+ b
46
+ end
47
+
48
+ def for(section)
49
+ @coordinates[section]
50
+ end
51
+
52
+ def each(&block)
53
+ @coordinates.each do |section_index, coords|
54
+ block.call([section_index, coords])
55
+ end
56
+ end
57
+
58
+ # Overrides Enumerable
59
+ def count
60
+ @coordinates.length
61
+ end
62
+
63
+ def x_for(index)
64
+ @coordinates[index][0]
65
+ end
66
+
67
+ def y_for(index)
68
+ @coordinates[index][1]
69
+ end
70
+
71
+ def width_for(index)
72
+ @coordinates[index][2]
73
+ end
74
+
75
+ def height_for(index)
76
+ @coordinates[index][3]
77
+ end
78
+
79
+ end
@@ -0,0 +1,150 @@
1
+ require 'RMagick'
2
+
3
+ # Sqed Boundary Finders find boundaries on images and return co-ordinates of those boundaries. They do not
4
+ # return derivative images. Finders operate on cropped images, i.e. only the "stage".
5
+ #
6
+ class Sqed::BoundaryFinder
7
+ # the passed image
8
+ attr_reader :img
9
+
10
+ # a symbol from SqedConfig::LAYOUTS
11
+ attr_reader :layout
12
+
13
+ # A Sqed::Boundaries instance, stores the coordinates of all of the layout sections
14
+ attr_reader :boundaries
15
+
16
+ def initialize(image: image, layout: layout)
17
+ raise 'No layout provided.' if layout.nil?
18
+ raise 'No image provided.' if image.nil? || image.class != Magick::Image
19
+
20
+ @layout = layout
21
+ @img = image
22
+ true
23
+ end
24
+
25
+ # Returns a Sqed::Boundaries instance initialized to the number of sections in the passed layout.
26
+ def boundaries
27
+ @boundaries ||= Sqed::Boundaries.new(@layout)
28
+ end
29
+
30
+ # @return
31
+ # the column (x position) in the middle of the single green vertical line dividing the stage
32
+ #
33
+ # @param image
34
+ # the image to sample
35
+ #
36
+ # @param sample_subdivision_size
37
+ # an Integer, the distance in pixels b/w samples
38
+ #
39
+ # @param sample_cutoff_factor: (0.0-1.0)
40
+ # if provided over-rides the default cutoff calculation by reducing the number of pixels required to be considered a border hit
41
+ # - for example, if you have an image of height 100 pixels, and a sample_subdivision_size of 10, and a sample_cutoff_factor of .8
42
+ # then only posititions with 8 ((100/10)*.8) or more hits
43
+ # - when nil the cutoff defaults to the maximum of the pairwise difference between hit counts
44
+ #
45
+ # @param scan
46
+ # (:rows|:columns), :rows finds vertical borders, :columns finds horizontal borders
47
+ #
48
+ def self.color_boundary_finder(image: image, sample_subdivision_size: 10, sample_cutoff_factor: nil, scan: :rows, boundary_color: :green)
49
+ border_hits = {}
50
+ samples_to_take = (image.send(scan) / sample_subdivision_size).to_i - 1
51
+
52
+ (0..samples_to_take).each do |s|
53
+ # Create a sample image a single pixel tall
54
+ if scan == :rows
55
+ j = image.crop(0, s * sample_subdivision_size, image.columns, 1)
56
+ elsif scan == :columns
57
+ j = image.crop(s * sample_subdivision_size, 0, 1, image.rows)
58
+ else
59
+ raise
60
+ end
61
+
62
+ j.each_pixel do |pixel, c, r|
63
+ index = ( (scan == :rows) ? c : r)
64
+
65
+ # Our hit metric is dirt simple, if there is some percentage more of the boundary_color than the others, count + 1 for that column
66
+ if send("is_#{boundary_color}?", pixel)
67
+ # we have already hit that column previously, increment
68
+ if border_hits[index]
69
+ border_hits[index] += 1
70
+ # initialize the newly hit column 1
71
+ else
72
+ border_hits[index] = 1
73
+ end
74
+ end
75
+ end
76
+ end
77
+
78
+ return nil if border_hits.length < 2
79
+
80
+ if sample_cutoff_factor.nil?
81
+ cutoff = max_difference(border_hits.values)
82
+ else
83
+ cutoff = (samples_to_take * sample_cutoff_factor).to_i
84
+ end
85
+
86
+ frequency_stats(border_hits, cutoff)
87
+ end
88
+
89
+ def self.is_green?(pixel)
90
+ (pixel.green > pixel.red*1.2) && (pixel.green > pixel.blue*1.2)
91
+ end
92
+
93
+ def self.is_blue?(pixel)
94
+ (pixel.blue > pixel.red*1.2) && (pixel.blue > pixel.green*1.2)
95
+ end
96
+
97
+ def self.is_red?(pixel)
98
+ (pixel.red > pixel.blue*1.2) && (pixel.red > pixel.green*1.2)
99
+ end
100
+
101
+ def self.is_black?(pixel)
102
+ black_threshold = 65535*0.15 #tune for black
103
+ (pixel.red < black_threshold) && (pixel.blue < black_threshold) && (pixel.green < black_threshold)
104
+ end
105
+
106
+ # Takes a frequency hash of position => count key/values and returns
107
+ # the median position of all positions that have a count greater than the cutoff
108
+ def self.frequency_stats(frequency_hash, sample_cutoff = 0)
109
+ return nil if sample_cutoff.nil? || sample_cutoff < 1
110
+ hit_ranges = []
111
+
112
+ frequency_hash.each do |position, count|
113
+ if count >= sample_cutoff
114
+ hit_ranges.push(position)
115
+ end
116
+ end
117
+
118
+ return nil if hit_ranges.size < 3
119
+
120
+ # we have to sort because the keys (positions) we examined came unordered from a hash originally
121
+ hit_ranges.sort!
122
+
123
+ # return the position exactly in the middle of the array
124
+ [hit_ranges.first, hit_ranges[(hit_ranges.length / 2).to_i], hit_ranges.last]
125
+ end
126
+
127
+ # Returns an Integer, the maximum of the pairwise differences of the values in the array
128
+ # For example, given
129
+ # [1,2,3,9,6,2,0]
130
+ # The resulting pairwise array is
131
+ # [1,1,6,3,4,2]
132
+ # The max (value returned) is
133
+ # 6
134
+ def self.max_pairwise_difference(array)
135
+ (0..array.length-2).map{|i| (array[i] - array[i+1]).abs }.max
136
+ end
137
+
138
+ def self.max_difference(array)
139
+ array.max - array.min
140
+ end
141
+
142
+ def self.derivative_signs(array)
143
+ (0..array.length-2).map { |i| (array[i+1] - array[i]) <=> 0 }
144
+ end
145
+
146
+ def self.derivative(array)
147
+ (0..array.length-2).map { |i| array[i+1] - array[i] }
148
+ end
149
+
150
+ end