sqed 0.4.4 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: df36b8e8052dc45504b1a8cfb4f511b7d5af680f
4
- data.tar.gz: 2557f8b06d2c521893d4bbace2c9433be420e376
2
+ SHA256:
3
+ metadata.gz: d78e636e0c209d3cfd4de9f698bafd575686e1d393fd8148f3ae491d0396d140
4
+ data.tar.gz: 51c75913c74ccebe5a172ed64408ce176d5a04d08f788d3be46aeef1a9595e8f
5
5
  SHA512:
6
- metadata.gz: a190d03073872ad472a422d96b8f62fce283c2936a18d53d6b90f7409e07acfc6f407a5125d88350a04ccecccef3709e7c4c82fa2059ba806f52685cb86948eb
7
- data.tar.gz: d29c8d94a3d04c4a8461e11573fafdb7be8383d305ccb7222a05b9cb123fd4e18d135f27df551eee96970552324a91e67688e81754cad37c631472bd41dd0fb3
6
+ metadata.gz: 459da0e33636600618502410797123154988ce2a03f514cdf7dec9829ca6c747c5d26952ca2b247a613cd8968eb7cb5c0bcc3a6711db495e06175ec64c57da77
7
+ data.tar.gz: 49fc0de43216358f44e2d89089a5605ec46a5ef526b6464588ea867301719932433841fd5a071b4cc41ef7ce8013a2eb5ee7f5aaac69786d4e7b46308c0e0f1e
@@ -16,6 +16,8 @@ class Sqed::BoundaryFinder::ColorLineFinder < Sqed::BoundaryFinder
16
16
  super(image: image, layout: layout, use_thumbnail: use_thumbnail)
17
17
 
18
18
  raise 'No layout provided.' if @layout.nil?
19
+
20
+ # !@#? why this
19
21
  @boundary_color = boundary_color
20
22
 
21
23
  if use_thumbnail
@@ -95,8 +97,6 @@ class Sqed::BoundaryFinder::ColorLineFinder < Sqed::BoundaryFinder
95
97
  max_width: right_top_image.columns
96
98
  ) # vertical line b/w 1 & 2, use "corrected_frequency" to account for color bleed from previous crop
97
99
 
98
-
99
-
100
100
  boundaries.set(1, [left_right_split[2], 0, right_top_split[0], top_bottom_split[0]] )
101
101
  boundaries.set(2, [left_right_split[2] + right_top_split[2], 0, right_top_image.columns - right_top_split[2], top_bottom_split[0]])
102
102
 
@@ -6,64 +6,64 @@ class Sqed
6
6
  #
7
7
  class Extractor
8
8
 
9
- class Error < StandardError; end;
9
+ class Error < StandardError; end;
10
10
 
11
- # a Sqed::Boundaries instance
12
- attr_accessor :boundaries
11
+ # a Sqed::Boundaries instance
12
+ attr_accessor :boundaries
13
13
 
14
- # @return [Hash] like `{0 => :annotated_specimen, 1 => :identifier, 2 => :image_registration }`
15
- # a metadata_map hash from EXTRACTION_PATTERNS like:
16
- attr_accessor :metadata_map
14
+ # @return [Hash] like `{0 => :annotated_specimen, 1 => :identifier, 2 => :image_registration }`
15
+ # a metadata_map hash from EXTRACTION_PATTERNS like:
16
+ attr_accessor :metadata_map
17
17
 
18
- # @return [Magick::Image file]
19
- attr_accessor :image
18
+ # @return [Magick::Image file]
19
+ attr_accessor :image
20
20
 
21
- def initialize(**opts)
22
- @metadata_map = opts[:metadata_map]
23
- @boundaries = opts[:boundaries]
24
- @image = opts[:image]
21
+ def initialize(**opts)
22
+ @metadata_map = opts[:metadata_map]
23
+ @boundaries = opts[:boundaries]
24
+ @image = opts[:image]
25
25
 
26
- raise Error, 'boundaries not provided or provided boundary is not a Sqed::Boundaries' if boundaries.nil? || !boundaries.class == Sqed::Boundaries
27
- raise Error, 'metadata_map not provided or metadata_map not a Hash' if metadata_map.nil? || !metadata_map.class == Hash
28
- raise Error, 'image not provided' if image.nil? || !image.class.name == 'Magick::Image'
29
- end
26
+ raise Error, 'boundaries not provided or provided boundary is not a Sqed::Boundaries' if boundaries.nil? || !boundaries.class == Sqed::Boundaries
27
+ raise Error, 'metadata_map not provided or metadata_map not a Hash' if metadata_map.nil? || !metadata_map.class == Hash
28
+ raise Error, 'image not provided' if image.nil? || !image.class.name == 'Magick::Image'
29
+ end
30
30
 
31
- def result
32
- r = Sqed::Result.new
31
+ def result
32
+ r = Sqed::Result.new
33
33
 
34
- r.sections = metadata_map.values.sort
34
+ r.sections = metadata_map.keys.sort.collect{|k| metadata_map[k]}
35
35
 
36
- # assign the images to the result
37
- boundaries.each do |section_index, coords|
38
- section_type = metadata_map[section_index]
36
+ # assign the images to the result
37
+ boundaries.each do |section_index, coords|
38
+ section_type = metadata_map[section_index]
39
39
 
40
- r.send("#{section_type}_image=", extract_image(coords))
41
- r.boundary_coordinates[section_type] = coords
42
- end
40
+ r.send("#{section_type}_image=", extract_image(coords))
41
+ r.boundary_coordinates[section_type] = coords
42
+ end
43
43
 
44
- # assign the metadata to the result
45
- metadata_map.each do |section_index, section_type|
46
- # only extract data if a parser exists
47
- if parsers = SqedConfig::SECTION_PARSERS[section_type]
48
- section_image = r.send("#{section_type}_image")
49
- updated = r.send(section_type)
44
+ # assign the metadata to the result
45
+ metadata_map.each do |section_index, section_type|
46
+ # only extract data if a parser exists
47
+ if parsers = SqedConfig::SECTION_PARSERS[section_type]
48
+ section_image = r.send("#{section_type}_image")
49
+ updated = r.send(section_type)
50
50
 
51
- parsers.each do |p|
52
- parsed_result = p.new(section_image).get_text(section_type: section_type)
53
- updated[p::TYPE] = parsed_result if parsed_result && parsed_result.length > 0
54
- end
51
+ parsers.each do |p|
52
+ parsed_result = p.new(section_image).get_text(section_type: section_type)
53
+ updated[p::TYPE] = parsed_result if parsed_result && parsed_result.length > 0
54
+ end
55
55
 
56
- r.send("#{section_type}=", updated)
56
+ r.send("#{section_type}=", updated)
57
+ end
57
58
  end
58
- end
59
59
 
60
- r
61
- end
60
+ r
61
+ end
62
62
 
63
- # crop takes x, y, width, height
64
- def extract_image(coords)
65
- @image.crop(*coords, true)
66
- end
63
+ # crop takes x, y, width, height
64
+ def extract_image(coords)
65
+ @image.crop(*coords, true)
66
+ end
67
67
 
68
68
  end
69
69
  end
@@ -18,7 +18,7 @@ class Sqed::Parser::BarcodeParser < Sqed::Parser
18
18
  @barcode
19
19
  end
20
20
 
21
- # Uses the same enging as zbarimg that you can install with brew (zbarimg)
21
+ # Uses the same engine as zbarimg that you can install with brew (zbarimg)
22
22
  #
23
23
  def get_code_128
24
24
  nil # ZXing.decode @image.filename
@@ -29,9 +29,9 @@ class Sqed::Parser::BarcodeParser < Sqed::Parser
29
29
  [get_code_128].compact.first
30
30
  end
31
31
 
32
- #def get_datamatrix
33
- # https://github.com/srijan/ruby-dmtx
34
- #end
32
+ #def get_datamatrix
33
+ # https://github.com/srijan/ruby-dmtx
34
+ #end
35
35
 
36
36
  # alias to a universal method
37
37
  def get_text(section_type: :default)
@@ -1,5 +1,9 @@
1
1
  require 'rtesseract'
2
2
 
3
+ # We use tempfile because Rtesseract doesn't work directly with ImageMagic::Image (any longer... apparently, maybe)
4
+ # https://ruby-doc.org/stdlib-2.6.1/libdoc/tempfile/rdoc/Tempfile.html
5
+ require 'tempfile'
6
+
3
7
  # encoding: UTF-8
4
8
  #
5
9
  # Given a single image return all text in that image.
@@ -115,19 +119,42 @@ class Sqed::Parser::OcrParser < Sqed::Parser
115
119
  params = SECTION_PARAMS[:default].dup
116
120
  params.merge!(SECTION_PARAMS[section_type])
117
121
 
118
- r = RTesseract.new(img, params)
119
- @extracted_text = r.to_s.strip
122
+ # May be able to overcome this hacky kludge messe with providing `processor:` to new
123
+ file = Tempfile.new('foo1')
124
+ begin
125
+ file.write(image.to_blob)
126
+ file.rewind
127
+ @extracted_text = RTesseract.new(file.path, params).to_s&.strip
128
+ file.close
129
+ ensure
130
+ file.close
131
+ file.unlink # deletes the temp file
132
+ end
120
133
 
121
134
  if @extracted_text == ''
122
- img = img.white_threshold(245)
123
- r = RTesseract.new(img, params)
124
- @extracted_text = r.to_s.strip
135
+ file = Tempfile.new('foo2')
136
+ begin
137
+ file.write(img.dup.white_threshold(245).to_blob)
138
+ file.rewind
139
+ @extracted_text = RTesseract.new(file.path, params).to_s&.strip
140
+ file.close
141
+ ensure
142
+ file.close
143
+ file.unlink # deletes the temp file
144
+ end
125
145
  end
126
146
 
127
147
  if @extracted_text == ''
128
- img = img.quantize(256,Magick::GRAYColorspace)
129
- r = RTesseract.new(img, params)
130
- @extracted_text = r.to_s.strip
148
+ file = Tempfile.new('foo3')
149
+ begin
150
+ file.write(img.dup.quantize(256,Magick::GRAYColorspace).to_blob)
151
+ file.rewind
152
+ @extracted_text = RTesseract.new(file.path, params).to_s&.strip
153
+ file.close
154
+ ensure
155
+ file.close
156
+ file.unlink # deletes the temp file
157
+ end
131
158
  end
132
159
 
133
160
  @extracted_text
@@ -1,3 +1,3 @@
1
1
  class Sqed
2
- VERSION = '0.4.4'.freeze
2
+ VERSION = '0.5.0'.freeze
3
3
  end
@@ -106,7 +106,7 @@ module SqedConfig
106
106
  collecting_event_labels: [Sqed::Parser::OcrParser],
107
107
  curator_metadata: [Sqed::Parser::OcrParser],
108
108
  determination_labels: [Sqed::Parser::OcrParser],
109
- identifier: [Sqed::Parser::BarcodeParser, Sqed::Parser::OcrParser],
109
+ identifier: [Sqed::Parser::OcrParser, Sqed::Parser::BarcodeParser],
110
110
  image_registration: [],
111
111
  labels: [Sqed::Parser::OcrParser],
112
112
  nothing: [],
@@ -123,15 +123,15 @@ module SqedConfig
123
123
  },
124
124
 
125
125
  vertical_offset_cross: {
126
- boundary_finder: Sqed::BoundaryFinder::ColorLineFinder,
127
- layout: :vertical_offset_cross,
128
- metadata_map: { 0 => :curator_metadata, 1 => :identifier, 2 => :image_registration, 3 => :annotated_specimen }
126
+ boundary_finder: Sqed::BoundaryFinder::ColorLineFinder,
127
+ layout: :vertical_offset_cross,
128
+ metadata_map: { 0 => :curator_metadata, 1 => :identifier, 2 => :image_registration, 3 => :annotated_specimen }
129
129
  },
130
130
 
131
131
  equal_cross: {
132
- boundary_finder: Sqed::BoundaryFinder::CrossFinder,
133
- layout: :equal_cross,
134
- metadata_map: { 0 => :curator_metadata, 1 => :identifier, 2 => :image_registration, 3 => :annotated_specimen }
132
+ boundary_finder: Sqed::BoundaryFinder::CrossFinder,
133
+ layout: :equal_cross,
134
+ metadata_map: { 0 => :curator_metadata, 1 => :identifier, 2 => :image_registration, 3 => :annotated_specimen }
135
135
  },
136
136
 
137
137
  cross: {
@@ -159,9 +159,23 @@ module SqedConfig
159
159
  }
160
160
  }.freeze
161
161
 
162
+
163
+ BOUNDARY_COLORS = [:red, :green, :blue, :black].freeze
164
+
162
165
  DEFAULT_TMP_DIR = '/tmp'.freeze
163
166
 
164
167
  def self.index_for_section_type(pattern, section_type)
165
168
  EXTRACTION_PATTERNS[pattern][:metadata_map].invert[section_type]
166
169
  end
170
+
171
+ # Format to return JSON
172
+ def self.metadata
173
+ return {
174
+ boundary_colors: BOUNDARY_COLORS,
175
+ extraction_patterns: EXTRACTION_PATTERNS,
176
+ section_parsers: SECTION_PARSERS,
177
+ layout_section_types: LAYOUT_SECTION_TYPES,
178
+ layouts: LAYOUTS
179
+ }
180
+ end
167
181
  end
@@ -4,7 +4,7 @@ describe Sqed::Extractor do
4
4
  let(:metadata_map) {
5
5
  {0 => :specimen, 1 => :identifier, 2 => :nothing, 3 => :image_registration }
6
6
  }
7
-
7
+
8
8
  let(:image) { ImageHelpers.crossy_green_line_specimen }
9
9
 
10
10
  let(:boundaries) {
@@ -29,7 +29,7 @@ describe Sqed::Extractor do
29
29
  specify '#metadata_map' do
30
30
  expect(e).to respond_to(:metadata_map)
31
31
  end
32
-
32
+
33
33
  specify '#boundaries' do
34
34
  expect(e).to respond_to(:boundaries)
35
35
  end
@@ -37,7 +37,7 @@ describe Sqed::Extractor do
37
37
 
38
38
  context 'extracting to a #result' do
39
39
  let(:r) { e.result }
40
-
40
+
41
41
  specify '#result retuns a Sqed::Result' do
42
42
  expect(r.class.name).to eq('Sqed::Result')
43
43
  end
@@ -52,7 +52,7 @@ describe Sqed::Extractor do
52
52
  end
53
53
 
54
54
  specify '#sections is populated with section_types' do
55
- expect(r.sections).to eq( [ :identifier, :image_registration, :nothing, :specimen ] )
55
+ expect(r.sections).to eq( [ :specimen, :identifier, :nothing, :image_registration ] )
56
56
  end
57
57
 
58
58
  specify '#boundary_coordinates is populated with coordinates' do
@@ -0,0 +1,9 @@
1
+ require 'spec_helper'
2
+
3
+ describe SqedConfig do
4
+
5
+ specify '.metadata' do
6
+ expect(SqedConfig.metadata.keys).to contain_exactly(:boundary_colors, :extraction_patterns, :section_parsers, :layout_section_types, :layouts)
7
+ end
8
+
9
+ end
@@ -113,7 +113,7 @@ describe Sqed do
113
113
  end
114
114
 
115
115
  specify '#text_for a :curator_metadata section' do
116
- expect(rz.text_for(:curator_metadata)).to match('Frost Entomological Museum')
116
+ expect(rz.text_for(:curator_metadata)).to match(/Frost\s*Entomological\s*Museum/)
117
117
  end
118
118
  end
119
119
  end
@@ -20,15 +20,15 @@ Gem::Specification.new do |spec|
20
20
 
21
21
  spec.add_dependency 'rake', '~> 12.3'
22
22
  spec.add_dependency 'rmagick', '~> 2.16'
23
- spec.add_dependency 'rtesseract', '~> 2.2.0'
23
+ spec.add_dependency 'rtesseract', '~> 3.0.2'
24
24
 
25
25
  # A qrcode reader, too many problems with compiling, dependencies
26
26
  # spec.add_dependency 'zxing_cpp', '~> 0.1.0'
27
27
 
28
28
  spec.add_development_dependency 'rspec', '~> 3.8'
29
- spec.add_development_dependency 'bundler', '~> 1.5'
29
+ spec.add_development_dependency 'bundler', '~> 2.0'
30
30
  # spec.add_development_dependency 'did_you_mean', '~> 0.9'
31
- spec.add_development_dependency 'byebug', '~> 10.0.0'
31
+ spec.add_development_dependency 'byebug', '~> 10.0'
32
32
  spec.add_development_dependency 'awesome_print', '~> 1.8'
33
33
  end
34
34
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sqed
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.4
4
+ version: 0.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Matt Yoder
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2018-09-17 00:00:00.000000000 Z
12
+ date: 2019-02-06 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rake
@@ -45,14 +45,14 @@ dependencies:
45
45
  requirements:
46
46
  - - "~>"
47
47
  - !ruby/object:Gem::Version
48
- version: 2.2.0
48
+ version: 3.0.2
49
49
  type: :runtime
50
50
  prerelease: false
51
51
  version_requirements: !ruby/object:Gem::Requirement
52
52
  requirements:
53
53
  - - "~>"
54
54
  - !ruby/object:Gem::Version
55
- version: 2.2.0
55
+ version: 3.0.2
56
56
  - !ruby/object:Gem::Dependency
57
57
  name: rspec
58
58
  requirement: !ruby/object:Gem::Requirement
@@ -73,28 +73,28 @@ dependencies:
73
73
  requirements:
74
74
  - - "~>"
75
75
  - !ruby/object:Gem::Version
76
- version: '1.5'
76
+ version: '2.0'
77
77
  type: :development
78
78
  prerelease: false
79
79
  version_requirements: !ruby/object:Gem::Requirement
80
80
  requirements:
81
81
  - - "~>"
82
82
  - !ruby/object:Gem::Version
83
- version: '1.5'
83
+ version: '2.0'
84
84
  - !ruby/object:Gem::Dependency
85
85
  name: byebug
86
86
  requirement: !ruby/object:Gem::Requirement
87
87
  requirements:
88
88
  - - "~>"
89
89
  - !ruby/object:Gem::Version
90
- version: 10.0.0
90
+ version: '10.0'
91
91
  type: :development
92
92
  prerelease: false
93
93
  version_requirements: !ruby/object:Gem::Requirement
94
94
  requirements:
95
95
  - - "~>"
96
96
  - !ruby/object:Gem::Version
97
- version: 10.0.0
97
+ version: '10.0'
98
98
  - !ruby/object:Gem::Dependency
99
99
  name: awesome_print
100
100
  requirement: !ruby/object:Gem::Requirement
@@ -150,6 +150,7 @@ files:
150
150
  - spec/lib/sqed/parser/ocr_spec.rb
151
151
  - spec/lib/sqed/parser_spec.rb
152
152
  - spec/lib/sqed/result_spec.rb
153
+ - spec/lib/sqed_config_spec.rb
153
154
  - spec/lib/sqed_spec.rb
154
155
  - spec/lib/sqed_utils_spec.rb
155
156
  - spec/lib/stage_handling/lep_stage_spec.rb
@@ -207,8 +208,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
207
208
  - !ruby/object:Gem::Version
208
209
  version: '0'
209
210
  requirements: []
210
- rubyforge_project:
211
- rubygems_version: 2.6.14
211
+ rubygems_version: 3.0.2
212
212
  signing_key:
213
213
  specification_version: 4
214
214
  summary: Specimens Quickly extracted and Digitized, or just "squid". A ruby gem for
@@ -224,6 +224,7 @@ test_files:
224
224
  - spec/lib/sqed/parser/ocr_spec.rb
225
225
  - spec/lib/sqed/parser_spec.rb
226
226
  - spec/lib/sqed/result_spec.rb
227
+ - spec/lib/sqed_config_spec.rb
227
228
  - spec/lib/sqed_spec.rb
228
229
  - spec/lib/sqed_utils_spec.rb
229
230
  - spec/lib/stage_handling/lep_stage_spec.rb