sqed 0.4.4 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: df36b8e8052dc45504b1a8cfb4f511b7d5af680f
4
- data.tar.gz: 2557f8b06d2c521893d4bbace2c9433be420e376
2
+ SHA256:
3
+ metadata.gz: d78e636e0c209d3cfd4de9f698bafd575686e1d393fd8148f3ae491d0396d140
4
+ data.tar.gz: 51c75913c74ccebe5a172ed64408ce176d5a04d08f788d3be46aeef1a9595e8f
5
5
  SHA512:
6
- metadata.gz: a190d03073872ad472a422d96b8f62fce283c2936a18d53d6b90f7409e07acfc6f407a5125d88350a04ccecccef3709e7c4c82fa2059ba806f52685cb86948eb
7
- data.tar.gz: d29c8d94a3d04c4a8461e11573fafdb7be8383d305ccb7222a05b9cb123fd4e18d135f27df551eee96970552324a91e67688e81754cad37c631472bd41dd0fb3
6
+ metadata.gz: 459da0e33636600618502410797123154988ce2a03f514cdf7dec9829ca6c747c5d26952ca2b247a613cd8968eb7cb5c0bcc3a6711db495e06175ec64c57da77
7
+ data.tar.gz: 49fc0de43216358f44e2d89089a5605ec46a5ef526b6464588ea867301719932433841fd5a071b4cc41ef7ce8013a2eb5ee7f5aaac69786d4e7b46308c0e0f1e
@@ -16,6 +16,8 @@ class Sqed::BoundaryFinder::ColorLineFinder < Sqed::BoundaryFinder
16
16
  super(image: image, layout: layout, use_thumbnail: use_thumbnail)
17
17
 
18
18
  raise 'No layout provided.' if @layout.nil?
19
+
20
+ # !@#? why this
19
21
  @boundary_color = boundary_color
20
22
 
21
23
  if use_thumbnail
@@ -95,8 +97,6 @@ class Sqed::BoundaryFinder::ColorLineFinder < Sqed::BoundaryFinder
95
97
  max_width: right_top_image.columns
96
98
  ) # vertical line b/w 1 & 2, use "corrected_frequency" to account for color bleed from previous crop
97
99
 
98
-
99
-
100
100
  boundaries.set(1, [left_right_split[2], 0, right_top_split[0], top_bottom_split[0]] )
101
101
  boundaries.set(2, [left_right_split[2] + right_top_split[2], 0, right_top_image.columns - right_top_split[2], top_bottom_split[0]])
102
102
 
@@ -6,64 +6,64 @@ class Sqed
6
6
  #
7
7
  class Extractor
8
8
 
9
- class Error < StandardError; end;
9
+ class Error < StandardError; end;
10
10
 
11
- # a Sqed::Boundaries instance
12
- attr_accessor :boundaries
11
+ # a Sqed::Boundaries instance
12
+ attr_accessor :boundaries
13
13
 
14
- # @return [Hash] like `{0 => :annotated_specimen, 1 => :identifier, 2 => :image_registration }`
15
- # a metadata_map hash from EXTRACTION_PATTERNS like:
16
- attr_accessor :metadata_map
14
+ # @return [Hash] like `{0 => :annotated_specimen, 1 => :identifier, 2 => :image_registration }`
15
+ # a metadata_map hash from EXTRACTION_PATTERNS like:
16
+ attr_accessor :metadata_map
17
17
 
18
- # @return [Magick::Image file]
19
- attr_accessor :image
18
+ # @return [Magick::Image file]
19
+ attr_accessor :image
20
20
 
21
- def initialize(**opts)
22
- @metadata_map = opts[:metadata_map]
23
- @boundaries = opts[:boundaries]
24
- @image = opts[:image]
21
+ def initialize(**opts)
22
+ @metadata_map = opts[:metadata_map]
23
+ @boundaries = opts[:boundaries]
24
+ @image = opts[:image]
25
25
 
26
- raise Error, 'boundaries not provided or provided boundary is not a Sqed::Boundaries' if boundaries.nil? || !boundaries.class == Sqed::Boundaries
27
- raise Error, 'metadata_map not provided or metadata_map not a Hash' if metadata_map.nil? || !metadata_map.class == Hash
28
- raise Error, 'image not provided' if image.nil? || !image.class.name == 'Magick::Image'
29
- end
26
+ raise Error, 'boundaries not provided or provided boundary is not a Sqed::Boundaries' if boundaries.nil? || !boundaries.class == Sqed::Boundaries
27
+ raise Error, 'metadata_map not provided or metadata_map not a Hash' if metadata_map.nil? || !metadata_map.class == Hash
28
+ raise Error, 'image not provided' if image.nil? || !image.class.name == 'Magick::Image'
29
+ end
30
30
 
31
- def result
32
- r = Sqed::Result.new
31
+ def result
32
+ r = Sqed::Result.new
33
33
 
34
- r.sections = metadata_map.values.sort
34
+ r.sections = metadata_map.keys.sort.collect{|k| metadata_map[k]}
35
35
 
36
- # assign the images to the result
37
- boundaries.each do |section_index, coords|
38
- section_type = metadata_map[section_index]
36
+ # assign the images to the result
37
+ boundaries.each do |section_index, coords|
38
+ section_type = metadata_map[section_index]
39
39
 
40
- r.send("#{section_type}_image=", extract_image(coords))
41
- r.boundary_coordinates[section_type] = coords
42
- end
40
+ r.send("#{section_type}_image=", extract_image(coords))
41
+ r.boundary_coordinates[section_type] = coords
42
+ end
43
43
 
44
- # assign the metadata to the result
45
- metadata_map.each do |section_index, section_type|
46
- # only extract data if a parser exists
47
- if parsers = SqedConfig::SECTION_PARSERS[section_type]
48
- section_image = r.send("#{section_type}_image")
49
- updated = r.send(section_type)
44
+ # assign the metadata to the result
45
+ metadata_map.each do |section_index, section_type|
46
+ # only extract data if a parser exists
47
+ if parsers = SqedConfig::SECTION_PARSERS[section_type]
48
+ section_image = r.send("#{section_type}_image")
49
+ updated = r.send(section_type)
50
50
 
51
- parsers.each do |p|
52
- parsed_result = p.new(section_image).get_text(section_type: section_type)
53
- updated[p::TYPE] = parsed_result if parsed_result && parsed_result.length > 0
54
- end
51
+ parsers.each do |p|
52
+ parsed_result = p.new(section_image).get_text(section_type: section_type)
53
+ updated[p::TYPE] = parsed_result if parsed_result && parsed_result.length > 0
54
+ end
55
55
 
56
- r.send("#{section_type}=", updated)
56
+ r.send("#{section_type}=", updated)
57
+ end
57
58
  end
58
- end
59
59
 
60
- r
61
- end
60
+ r
61
+ end
62
62
 
63
- # crop takes x, y, width, height
64
- def extract_image(coords)
65
- @image.crop(*coords, true)
66
- end
63
+ # crop takes x, y, width, height
64
+ def extract_image(coords)
65
+ @image.crop(*coords, true)
66
+ end
67
67
 
68
68
  end
69
69
  end
@@ -18,7 +18,7 @@ class Sqed::Parser::BarcodeParser < Sqed::Parser
18
18
  @barcode
19
19
  end
20
20
 
21
- # Uses the same enging as zbarimg that you can install with brew (zbarimg)
21
+ # Uses the same engine as zbarimg that you can install with brew (zbarimg)
22
22
  #
23
23
  def get_code_128
24
24
  nil # ZXing.decode @image.filename
@@ -29,9 +29,9 @@ class Sqed::Parser::BarcodeParser < Sqed::Parser
29
29
  [get_code_128].compact.first
30
30
  end
31
31
 
32
- #def get_datamatrix
33
- # https://github.com/srijan/ruby-dmtx
34
- #end
32
+ #def get_datamatrix
33
+ # https://github.com/srijan/ruby-dmtx
34
+ #end
35
35
 
36
36
  # alias to a universal method
37
37
  def get_text(section_type: :default)
@@ -1,5 +1,9 @@
1
1
  require 'rtesseract'
2
2
 
3
+ # We use tempfile because Rtesseract doesn't work directly with ImageMagic::Image (any longer... apparently, maybe)
4
+ # https://ruby-doc.org/stdlib-2.6.1/libdoc/tempfile/rdoc/Tempfile.html
5
+ require 'tempfile'
6
+
3
7
  # encoding: UTF-8
4
8
  #
5
9
  # Given a single image return all text in that image.
@@ -115,19 +119,42 @@ class Sqed::Parser::OcrParser < Sqed::Parser
115
119
  params = SECTION_PARAMS[:default].dup
116
120
  params.merge!(SECTION_PARAMS[section_type])
117
121
 
118
- r = RTesseract.new(img, params)
119
- @extracted_text = r.to_s.strip
122
+ # May be able to overcome this hacky kludge messe with providing `processor:` to new
123
+ file = Tempfile.new('foo1')
124
+ begin
125
+ file.write(image.to_blob)
126
+ file.rewind
127
+ @extracted_text = RTesseract.new(file.path, params).to_s&.strip
128
+ file.close
129
+ ensure
130
+ file.close
131
+ file.unlink # deletes the temp file
132
+ end
120
133
 
121
134
  if @extracted_text == ''
122
- img = img.white_threshold(245)
123
- r = RTesseract.new(img, params)
124
- @extracted_text = r.to_s.strip
135
+ file = Tempfile.new('foo2')
136
+ begin
137
+ file.write(img.dup.white_threshold(245).to_blob)
138
+ file.rewind
139
+ @extracted_text = RTesseract.new(file.path, params).to_s&.strip
140
+ file.close
141
+ ensure
142
+ file.close
143
+ file.unlink # deletes the temp file
144
+ end
125
145
  end
126
146
 
127
147
  if @extracted_text == ''
128
- img = img.quantize(256,Magick::GRAYColorspace)
129
- r = RTesseract.new(img, params)
130
- @extracted_text = r.to_s.strip
148
+ file = Tempfile.new('foo3')
149
+ begin
150
+ file.write(img.dup.quantize(256,Magick::GRAYColorspace).to_blob)
151
+ file.rewind
152
+ @extracted_text = RTesseract.new(file.path, params).to_s&.strip
153
+ file.close
154
+ ensure
155
+ file.close
156
+ file.unlink # deletes the temp file
157
+ end
131
158
  end
132
159
 
133
160
  @extracted_text
@@ -1,3 +1,3 @@
1
1
  class Sqed
2
- VERSION = '0.4.4'.freeze
2
+ VERSION = '0.5.0'.freeze
3
3
  end
@@ -106,7 +106,7 @@ module SqedConfig
106
106
  collecting_event_labels: [Sqed::Parser::OcrParser],
107
107
  curator_metadata: [Sqed::Parser::OcrParser],
108
108
  determination_labels: [Sqed::Parser::OcrParser],
109
- identifier: [Sqed::Parser::BarcodeParser, Sqed::Parser::OcrParser],
109
+ identifier: [Sqed::Parser::OcrParser, Sqed::Parser::BarcodeParser],
110
110
  image_registration: [],
111
111
  labels: [Sqed::Parser::OcrParser],
112
112
  nothing: [],
@@ -123,15 +123,15 @@ module SqedConfig
123
123
  },
124
124
 
125
125
  vertical_offset_cross: {
126
- boundary_finder: Sqed::BoundaryFinder::ColorLineFinder,
127
- layout: :vertical_offset_cross,
128
- metadata_map: { 0 => :curator_metadata, 1 => :identifier, 2 => :image_registration, 3 => :annotated_specimen }
126
+ boundary_finder: Sqed::BoundaryFinder::ColorLineFinder,
127
+ layout: :vertical_offset_cross,
128
+ metadata_map: { 0 => :curator_metadata, 1 => :identifier, 2 => :image_registration, 3 => :annotated_specimen }
129
129
  },
130
130
 
131
131
  equal_cross: {
132
- boundary_finder: Sqed::BoundaryFinder::CrossFinder,
133
- layout: :equal_cross,
134
- metadata_map: { 0 => :curator_metadata, 1 => :identifier, 2 => :image_registration, 3 => :annotated_specimen }
132
+ boundary_finder: Sqed::BoundaryFinder::CrossFinder,
133
+ layout: :equal_cross,
134
+ metadata_map: { 0 => :curator_metadata, 1 => :identifier, 2 => :image_registration, 3 => :annotated_specimen }
135
135
  },
136
136
 
137
137
  cross: {
@@ -159,9 +159,23 @@ module SqedConfig
159
159
  }
160
160
  }.freeze
161
161
 
162
+
163
+ BOUNDARY_COLORS = [:red, :green, :blue, :black].freeze
164
+
162
165
  DEFAULT_TMP_DIR = '/tmp'.freeze
163
166
 
164
167
  def self.index_for_section_type(pattern, section_type)
165
168
  EXTRACTION_PATTERNS[pattern][:metadata_map].invert[section_type]
166
169
  end
170
+
171
+ # Format to return JSON
172
+ def self.metadata
173
+ return {
174
+ boundary_colors: BOUNDARY_COLORS,
175
+ extraction_patterns: EXTRACTION_PATTERNS,
176
+ section_parsers: SECTION_PARSERS,
177
+ layout_section_types: LAYOUT_SECTION_TYPES,
178
+ layouts: LAYOUTS
179
+ }
180
+ end
167
181
  end
@@ -4,7 +4,7 @@ describe Sqed::Extractor do
4
4
  let(:metadata_map) {
5
5
  {0 => :specimen, 1 => :identifier, 2 => :nothing, 3 => :image_registration }
6
6
  }
7
-
7
+
8
8
  let(:image) { ImageHelpers.crossy_green_line_specimen }
9
9
 
10
10
  let(:boundaries) {
@@ -29,7 +29,7 @@ describe Sqed::Extractor do
29
29
  specify '#metadata_map' do
30
30
  expect(e).to respond_to(:metadata_map)
31
31
  end
32
-
32
+
33
33
  specify '#boundaries' do
34
34
  expect(e).to respond_to(:boundaries)
35
35
  end
@@ -37,7 +37,7 @@ describe Sqed::Extractor do
37
37
 
38
38
  context 'extracting to a #result' do
39
39
  let(:r) { e.result }
40
-
40
+
41
41
  specify '#result retuns a Sqed::Result' do
42
42
  expect(r.class.name).to eq('Sqed::Result')
43
43
  end
@@ -52,7 +52,7 @@ describe Sqed::Extractor do
52
52
  end
53
53
 
54
54
  specify '#sections is populated with section_types' do
55
- expect(r.sections).to eq( [ :identifier, :image_registration, :nothing, :specimen ] )
55
+ expect(r.sections).to eq( [ :specimen, :identifier, :nothing, :image_registration ] )
56
56
  end
57
57
 
58
58
  specify '#boundary_coordinates is populated with coordinates' do
@@ -0,0 +1,9 @@
1
+ require 'spec_helper'
2
+
3
+ describe SqedConfig do
4
+
5
+ specify '.metadata' do
6
+ expect(SqedConfig.metadata.keys).to contain_exactly(:boundary_colors, :extraction_patterns, :section_parsers, :layout_section_types, :layouts)
7
+ end
8
+
9
+ end
@@ -113,7 +113,7 @@ describe Sqed do
113
113
  end
114
114
 
115
115
  specify '#text_for a :curator_metadata section' do
116
- expect(rz.text_for(:curator_metadata)).to match('Frost Entomological Museum')
116
+ expect(rz.text_for(:curator_metadata)).to match(/Frost\s*Entomological\s*Museum/)
117
117
  end
118
118
  end
119
119
  end
@@ -20,15 +20,15 @@ Gem::Specification.new do |spec|
20
20
 
21
21
  spec.add_dependency 'rake', '~> 12.3'
22
22
  spec.add_dependency 'rmagick', '~> 2.16'
23
- spec.add_dependency 'rtesseract', '~> 2.2.0'
23
+ spec.add_dependency 'rtesseract', '~> 3.0.2'
24
24
 
25
25
  # A qrcode reader, too many problems with compiling, dependencies
26
26
  # spec.add_dependency 'zxing_cpp', '~> 0.1.0'
27
27
 
28
28
  spec.add_development_dependency 'rspec', '~> 3.8'
29
- spec.add_development_dependency 'bundler', '~> 1.5'
29
+ spec.add_development_dependency 'bundler', '~> 2.0'
30
30
  # spec.add_development_dependency 'did_you_mean', '~> 0.9'
31
- spec.add_development_dependency 'byebug', '~> 10.0.0'
31
+ spec.add_development_dependency 'byebug', '~> 10.0'
32
32
  spec.add_development_dependency 'awesome_print', '~> 1.8'
33
33
  end
34
34
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sqed
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.4
4
+ version: 0.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Matt Yoder
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2018-09-17 00:00:00.000000000 Z
12
+ date: 2019-02-06 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rake
@@ -45,14 +45,14 @@ dependencies:
45
45
  requirements:
46
46
  - - "~>"
47
47
  - !ruby/object:Gem::Version
48
- version: 2.2.0
48
+ version: 3.0.2
49
49
  type: :runtime
50
50
  prerelease: false
51
51
  version_requirements: !ruby/object:Gem::Requirement
52
52
  requirements:
53
53
  - - "~>"
54
54
  - !ruby/object:Gem::Version
55
- version: 2.2.0
55
+ version: 3.0.2
56
56
  - !ruby/object:Gem::Dependency
57
57
  name: rspec
58
58
  requirement: !ruby/object:Gem::Requirement
@@ -73,28 +73,28 @@ dependencies:
73
73
  requirements:
74
74
  - - "~>"
75
75
  - !ruby/object:Gem::Version
76
- version: '1.5'
76
+ version: '2.0'
77
77
  type: :development
78
78
  prerelease: false
79
79
  version_requirements: !ruby/object:Gem::Requirement
80
80
  requirements:
81
81
  - - "~>"
82
82
  - !ruby/object:Gem::Version
83
- version: '1.5'
83
+ version: '2.0'
84
84
  - !ruby/object:Gem::Dependency
85
85
  name: byebug
86
86
  requirement: !ruby/object:Gem::Requirement
87
87
  requirements:
88
88
  - - "~>"
89
89
  - !ruby/object:Gem::Version
90
- version: 10.0.0
90
+ version: '10.0'
91
91
  type: :development
92
92
  prerelease: false
93
93
  version_requirements: !ruby/object:Gem::Requirement
94
94
  requirements:
95
95
  - - "~>"
96
96
  - !ruby/object:Gem::Version
97
- version: 10.0.0
97
+ version: '10.0'
98
98
  - !ruby/object:Gem::Dependency
99
99
  name: awesome_print
100
100
  requirement: !ruby/object:Gem::Requirement
@@ -150,6 +150,7 @@ files:
150
150
  - spec/lib/sqed/parser/ocr_spec.rb
151
151
  - spec/lib/sqed/parser_spec.rb
152
152
  - spec/lib/sqed/result_spec.rb
153
+ - spec/lib/sqed_config_spec.rb
153
154
  - spec/lib/sqed_spec.rb
154
155
  - spec/lib/sqed_utils_spec.rb
155
156
  - spec/lib/stage_handling/lep_stage_spec.rb
@@ -207,8 +208,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
207
208
  - !ruby/object:Gem::Version
208
209
  version: '0'
209
210
  requirements: []
210
- rubyforge_project:
211
- rubygems_version: 2.6.14
211
+ rubygems_version: 3.0.2
212
212
  signing_key:
213
213
  specification_version: 4
214
214
  summary: Specimens Quickly extracted and Digitized, or just "squid". A ruby gem for
@@ -224,6 +224,7 @@ test_files:
224
224
  - spec/lib/sqed/parser/ocr_spec.rb
225
225
  - spec/lib/sqed/parser_spec.rb
226
226
  - spec/lib/sqed/result_spec.rb
227
+ - spec/lib/sqed_config_spec.rb
227
228
  - spec/lib/sqed_spec.rb
228
229
  - spec/lib/sqed_utils_spec.rb
229
230
  - spec/lib/stage_handling/lep_stage_spec.rb