sqed 0.4.4 → 0.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/lib/sqed/boundary_finder/color_line_finder.rb +2 -2
- data/lib/sqed/extractor.rb +43 -43
- data/lib/sqed/parser/barcode_parser.rb +4 -4
- data/lib/sqed/parser/ocr_parser.rb +35 -8
- data/lib/sqed/version.rb +1 -1
- data/lib/sqed_config.rb +21 -7
- data/spec/lib/sqed/extractor_spec.rb +4 -4
- data/spec/lib/sqed_config_spec.rb +9 -0
- data/spec/lib/sqed_spec.rb +1 -1
- data/sqed.gemspec +3 -3
- metadata +11 -10
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: d78e636e0c209d3cfd4de9f698bafd575686e1d393fd8148f3ae491d0396d140
|
4
|
+
data.tar.gz: 51c75913c74ccebe5a172ed64408ce176d5a04d08f788d3be46aeef1a9595e8f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 459da0e33636600618502410797123154988ce2a03f514cdf7dec9829ca6c747c5d26952ca2b247a613cd8968eb7cb5c0bcc3a6711db495e06175ec64c57da77
|
7
|
+
data.tar.gz: 49fc0de43216358f44e2d89089a5605ec46a5ef526b6464588ea867301719932433841fd5a071b4cc41ef7ce8013a2eb5ee7f5aaac69786d4e7b46308c0e0f1e
|
@@ -16,6 +16,8 @@ class Sqed::BoundaryFinder::ColorLineFinder < Sqed::BoundaryFinder
|
|
16
16
|
super(image: image, layout: layout, use_thumbnail: use_thumbnail)
|
17
17
|
|
18
18
|
raise 'No layout provided.' if @layout.nil?
|
19
|
+
|
20
|
+
# !@#? why this
|
19
21
|
@boundary_color = boundary_color
|
20
22
|
|
21
23
|
if use_thumbnail
|
@@ -95,8 +97,6 @@ class Sqed::BoundaryFinder::ColorLineFinder < Sqed::BoundaryFinder
|
|
95
97
|
max_width: right_top_image.columns
|
96
98
|
) # vertical line b/w 1 & 2, use "corrected_frequency" to account for color bleed from previous crop
|
97
99
|
|
98
|
-
|
99
|
-
|
100
100
|
boundaries.set(1, [left_right_split[2], 0, right_top_split[0], top_bottom_split[0]] )
|
101
101
|
boundaries.set(2, [left_right_split[2] + right_top_split[2], 0, right_top_image.columns - right_top_split[2], top_bottom_split[0]])
|
102
102
|
|
data/lib/sqed/extractor.rb
CHANGED
@@ -6,64 +6,64 @@ class Sqed
|
|
6
6
|
#
|
7
7
|
class Extractor
|
8
8
|
|
9
|
-
|
9
|
+
class Error < StandardError; end;
|
10
10
|
|
11
|
-
|
12
|
-
|
11
|
+
# a Sqed::Boundaries instance
|
12
|
+
attr_accessor :boundaries
|
13
13
|
|
14
|
-
|
15
|
-
|
16
|
-
|
14
|
+
# @return [Hash] like `{0 => :annotated_specimen, 1 => :identifier, 2 => :image_registration }`
|
15
|
+
# a metadata_map hash from EXTRACTION_PATTERNS like:
|
16
|
+
attr_accessor :metadata_map
|
17
17
|
|
18
|
-
|
19
|
-
|
18
|
+
# @return [Magick::Image file]
|
19
|
+
attr_accessor :image
|
20
20
|
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
21
|
+
def initialize(**opts)
|
22
|
+
@metadata_map = opts[:metadata_map]
|
23
|
+
@boundaries = opts[:boundaries]
|
24
|
+
@image = opts[:image]
|
25
25
|
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
26
|
+
raise Error, 'boundaries not provided or provided boundary is not a Sqed::Boundaries' if boundaries.nil? || !boundaries.class == Sqed::Boundaries
|
27
|
+
raise Error, 'metadata_map not provided or metadata_map not a Hash' if metadata_map.nil? || !metadata_map.class == Hash
|
28
|
+
raise Error, 'image not provided' if image.nil? || !image.class.name == 'Magick::Image'
|
29
|
+
end
|
30
30
|
|
31
|
-
|
32
|
-
|
31
|
+
def result
|
32
|
+
r = Sqed::Result.new
|
33
33
|
|
34
|
-
|
34
|
+
r.sections = metadata_map.keys.sort.collect{|k| metadata_map[k]}
|
35
35
|
|
36
|
-
|
37
|
-
|
38
|
-
|
36
|
+
# assign the images to the result
|
37
|
+
boundaries.each do |section_index, coords|
|
38
|
+
section_type = metadata_map[section_index]
|
39
39
|
|
40
|
-
|
41
|
-
|
42
|
-
|
40
|
+
r.send("#{section_type}_image=", extract_image(coords))
|
41
|
+
r.boundary_coordinates[section_type] = coords
|
42
|
+
end
|
43
43
|
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
44
|
+
# assign the metadata to the result
|
45
|
+
metadata_map.each do |section_index, section_type|
|
46
|
+
# only extract data if a parser exists
|
47
|
+
if parsers = SqedConfig::SECTION_PARSERS[section_type]
|
48
|
+
section_image = r.send("#{section_type}_image")
|
49
|
+
updated = r.send(section_type)
|
50
50
|
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
51
|
+
parsers.each do |p|
|
52
|
+
parsed_result = p.new(section_image).get_text(section_type: section_type)
|
53
|
+
updated[p::TYPE] = parsed_result if parsed_result && parsed_result.length > 0
|
54
|
+
end
|
55
55
|
|
56
|
-
|
56
|
+
r.send("#{section_type}=", updated)
|
57
|
+
end
|
57
58
|
end
|
58
|
-
end
|
59
59
|
|
60
|
-
|
61
|
-
|
60
|
+
r
|
61
|
+
end
|
62
62
|
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
63
|
+
# crop takes x, y, width, height
|
64
|
+
def extract_image(coords)
|
65
|
+
@image.crop(*coords, true)
|
66
|
+
end
|
67
67
|
|
68
68
|
end
|
69
69
|
end
|
@@ -18,7 +18,7 @@ class Sqed::Parser::BarcodeParser < Sqed::Parser
|
|
18
18
|
@barcode
|
19
19
|
end
|
20
20
|
|
21
|
-
# Uses the same
|
21
|
+
# Uses the same engine as zbarimg that you can install with brew (zbarimg)
|
22
22
|
#
|
23
23
|
def get_code_128
|
24
24
|
nil # ZXing.decode @image.filename
|
@@ -29,9 +29,9 @@ class Sqed::Parser::BarcodeParser < Sqed::Parser
|
|
29
29
|
[get_code_128].compact.first
|
30
30
|
end
|
31
31
|
|
32
|
-
|
33
|
-
|
34
|
-
|
32
|
+
#def get_datamatrix
|
33
|
+
# https://github.com/srijan/ruby-dmtx
|
34
|
+
#end
|
35
35
|
|
36
36
|
# alias to a universal method
|
37
37
|
def get_text(section_type: :default)
|
@@ -1,5 +1,9 @@
|
|
1
1
|
require 'rtesseract'
|
2
2
|
|
3
|
+
# We use tempfile because Rtesseract doesn't work directly with ImageMagic::Image (any longer... apparently, maybe)
|
4
|
+
# https://ruby-doc.org/stdlib-2.6.1/libdoc/tempfile/rdoc/Tempfile.html
|
5
|
+
require 'tempfile'
|
6
|
+
|
3
7
|
# encoding: UTF-8
|
4
8
|
#
|
5
9
|
# Given a single image return all text in that image.
|
@@ -115,19 +119,42 @@ class Sqed::Parser::OcrParser < Sqed::Parser
|
|
115
119
|
params = SECTION_PARAMS[:default].dup
|
116
120
|
params.merge!(SECTION_PARAMS[section_type])
|
117
121
|
|
118
|
-
|
119
|
-
|
122
|
+
# May be able to overcome this hacky kludge messe with providing `processor:` to new
|
123
|
+
file = Tempfile.new('foo1')
|
124
|
+
begin
|
125
|
+
file.write(image.to_blob)
|
126
|
+
file.rewind
|
127
|
+
@extracted_text = RTesseract.new(file.path, params).to_s&.strip
|
128
|
+
file.close
|
129
|
+
ensure
|
130
|
+
file.close
|
131
|
+
file.unlink # deletes the temp file
|
132
|
+
end
|
120
133
|
|
121
134
|
if @extracted_text == ''
|
122
|
-
|
123
|
-
|
124
|
-
|
135
|
+
file = Tempfile.new('foo2')
|
136
|
+
begin
|
137
|
+
file.write(img.dup.white_threshold(245).to_blob)
|
138
|
+
file.rewind
|
139
|
+
@extracted_text = RTesseract.new(file.path, params).to_s&.strip
|
140
|
+
file.close
|
141
|
+
ensure
|
142
|
+
file.close
|
143
|
+
file.unlink # deletes the temp file
|
144
|
+
end
|
125
145
|
end
|
126
146
|
|
127
147
|
if @extracted_text == ''
|
128
|
-
|
129
|
-
|
130
|
-
|
148
|
+
file = Tempfile.new('foo3')
|
149
|
+
begin
|
150
|
+
file.write(img.dup.quantize(256,Magick::GRAYColorspace).to_blob)
|
151
|
+
file.rewind
|
152
|
+
@extracted_text = RTesseract.new(file.path, params).to_s&.strip
|
153
|
+
file.close
|
154
|
+
ensure
|
155
|
+
file.close
|
156
|
+
file.unlink # deletes the temp file
|
157
|
+
end
|
131
158
|
end
|
132
159
|
|
133
160
|
@extracted_text
|
data/lib/sqed/version.rb
CHANGED
data/lib/sqed_config.rb
CHANGED
@@ -106,7 +106,7 @@ module SqedConfig
|
|
106
106
|
collecting_event_labels: [Sqed::Parser::OcrParser],
|
107
107
|
curator_metadata: [Sqed::Parser::OcrParser],
|
108
108
|
determination_labels: [Sqed::Parser::OcrParser],
|
109
|
-
identifier: [Sqed::Parser::
|
109
|
+
identifier: [Sqed::Parser::OcrParser, Sqed::Parser::BarcodeParser],
|
110
110
|
image_registration: [],
|
111
111
|
labels: [Sqed::Parser::OcrParser],
|
112
112
|
nothing: [],
|
@@ -123,15 +123,15 @@ module SqedConfig
|
|
123
123
|
},
|
124
124
|
|
125
125
|
vertical_offset_cross: {
|
126
|
-
|
127
|
-
|
128
|
-
|
126
|
+
boundary_finder: Sqed::BoundaryFinder::ColorLineFinder,
|
127
|
+
layout: :vertical_offset_cross,
|
128
|
+
metadata_map: { 0 => :curator_metadata, 1 => :identifier, 2 => :image_registration, 3 => :annotated_specimen }
|
129
129
|
},
|
130
130
|
|
131
131
|
equal_cross: {
|
132
|
-
|
133
|
-
|
134
|
-
|
132
|
+
boundary_finder: Sqed::BoundaryFinder::CrossFinder,
|
133
|
+
layout: :equal_cross,
|
134
|
+
metadata_map: { 0 => :curator_metadata, 1 => :identifier, 2 => :image_registration, 3 => :annotated_specimen }
|
135
135
|
},
|
136
136
|
|
137
137
|
cross: {
|
@@ -159,9 +159,23 @@ module SqedConfig
|
|
159
159
|
}
|
160
160
|
}.freeze
|
161
161
|
|
162
|
+
|
163
|
+
BOUNDARY_COLORS = [:red, :green, :blue, :black].freeze
|
164
|
+
|
162
165
|
DEFAULT_TMP_DIR = '/tmp'.freeze
|
163
166
|
|
164
167
|
def self.index_for_section_type(pattern, section_type)
|
165
168
|
EXTRACTION_PATTERNS[pattern][:metadata_map].invert[section_type]
|
166
169
|
end
|
170
|
+
|
171
|
+
# Format to return JSON
|
172
|
+
def self.metadata
|
173
|
+
return {
|
174
|
+
boundary_colors: BOUNDARY_COLORS,
|
175
|
+
extraction_patterns: EXTRACTION_PATTERNS,
|
176
|
+
section_parsers: SECTION_PARSERS,
|
177
|
+
layout_section_types: LAYOUT_SECTION_TYPES,
|
178
|
+
layouts: LAYOUTS
|
179
|
+
}
|
180
|
+
end
|
167
181
|
end
|
@@ -4,7 +4,7 @@ describe Sqed::Extractor do
|
|
4
4
|
let(:metadata_map) {
|
5
5
|
{0 => :specimen, 1 => :identifier, 2 => :nothing, 3 => :image_registration }
|
6
6
|
}
|
7
|
-
|
7
|
+
|
8
8
|
let(:image) { ImageHelpers.crossy_green_line_specimen }
|
9
9
|
|
10
10
|
let(:boundaries) {
|
@@ -29,7 +29,7 @@ describe Sqed::Extractor do
|
|
29
29
|
specify '#metadata_map' do
|
30
30
|
expect(e).to respond_to(:metadata_map)
|
31
31
|
end
|
32
|
-
|
32
|
+
|
33
33
|
specify '#boundaries' do
|
34
34
|
expect(e).to respond_to(:boundaries)
|
35
35
|
end
|
@@ -37,7 +37,7 @@ describe Sqed::Extractor do
|
|
37
37
|
|
38
38
|
context 'extracting to a #result' do
|
39
39
|
let(:r) { e.result }
|
40
|
-
|
40
|
+
|
41
41
|
specify '#result retuns a Sqed::Result' do
|
42
42
|
expect(r.class.name).to eq('Sqed::Result')
|
43
43
|
end
|
@@ -52,7 +52,7 @@ describe Sqed::Extractor do
|
|
52
52
|
end
|
53
53
|
|
54
54
|
specify '#sections is populated with section_types' do
|
55
|
-
expect(r.sections).to eq( [ :
|
55
|
+
expect(r.sections).to eq( [ :specimen, :identifier, :nothing, :image_registration ] )
|
56
56
|
end
|
57
57
|
|
58
58
|
specify '#boundary_coordinates is populated with coordinates' do
|
data/spec/lib/sqed_spec.rb
CHANGED
@@ -113,7 +113,7 @@ describe Sqed do
|
|
113
113
|
end
|
114
114
|
|
115
115
|
specify '#text_for a :curator_metadata section' do
|
116
|
-
expect(rz.text_for(:curator_metadata)).to match(
|
116
|
+
expect(rz.text_for(:curator_metadata)).to match(/Frost\s*Entomological\s*Museum/)
|
117
117
|
end
|
118
118
|
end
|
119
119
|
end
|
data/sqed.gemspec
CHANGED
@@ -20,15 +20,15 @@ Gem::Specification.new do |spec|
|
|
20
20
|
|
21
21
|
spec.add_dependency 'rake', '~> 12.3'
|
22
22
|
spec.add_dependency 'rmagick', '~> 2.16'
|
23
|
-
spec.add_dependency 'rtesseract', '~>
|
23
|
+
spec.add_dependency 'rtesseract', '~> 3.0.2'
|
24
24
|
|
25
25
|
# A qrcode reader, too many problems with compiling, dependencies
|
26
26
|
# spec.add_dependency 'zxing_cpp', '~> 0.1.0'
|
27
27
|
|
28
28
|
spec.add_development_dependency 'rspec', '~> 3.8'
|
29
|
-
spec.add_development_dependency 'bundler', '~>
|
29
|
+
spec.add_development_dependency 'bundler', '~> 2.0'
|
30
30
|
# spec.add_development_dependency 'did_you_mean', '~> 0.9'
|
31
|
-
spec.add_development_dependency 'byebug', '~> 10.0
|
31
|
+
spec.add_development_dependency 'byebug', '~> 10.0'
|
32
32
|
spec.add_development_dependency 'awesome_print', '~> 1.8'
|
33
33
|
end
|
34
34
|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sqed
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Matt Yoder
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2019-02-06 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rake
|
@@ -45,14 +45,14 @@ dependencies:
|
|
45
45
|
requirements:
|
46
46
|
- - "~>"
|
47
47
|
- !ruby/object:Gem::Version
|
48
|
-
version:
|
48
|
+
version: 3.0.2
|
49
49
|
type: :runtime
|
50
50
|
prerelease: false
|
51
51
|
version_requirements: !ruby/object:Gem::Requirement
|
52
52
|
requirements:
|
53
53
|
- - "~>"
|
54
54
|
- !ruby/object:Gem::Version
|
55
|
-
version:
|
55
|
+
version: 3.0.2
|
56
56
|
- !ruby/object:Gem::Dependency
|
57
57
|
name: rspec
|
58
58
|
requirement: !ruby/object:Gem::Requirement
|
@@ -73,28 +73,28 @@ dependencies:
|
|
73
73
|
requirements:
|
74
74
|
- - "~>"
|
75
75
|
- !ruby/object:Gem::Version
|
76
|
-
version: '
|
76
|
+
version: '2.0'
|
77
77
|
type: :development
|
78
78
|
prerelease: false
|
79
79
|
version_requirements: !ruby/object:Gem::Requirement
|
80
80
|
requirements:
|
81
81
|
- - "~>"
|
82
82
|
- !ruby/object:Gem::Version
|
83
|
-
version: '
|
83
|
+
version: '2.0'
|
84
84
|
- !ruby/object:Gem::Dependency
|
85
85
|
name: byebug
|
86
86
|
requirement: !ruby/object:Gem::Requirement
|
87
87
|
requirements:
|
88
88
|
- - "~>"
|
89
89
|
- !ruby/object:Gem::Version
|
90
|
-
version: 10.0
|
90
|
+
version: '10.0'
|
91
91
|
type: :development
|
92
92
|
prerelease: false
|
93
93
|
version_requirements: !ruby/object:Gem::Requirement
|
94
94
|
requirements:
|
95
95
|
- - "~>"
|
96
96
|
- !ruby/object:Gem::Version
|
97
|
-
version: 10.0
|
97
|
+
version: '10.0'
|
98
98
|
- !ruby/object:Gem::Dependency
|
99
99
|
name: awesome_print
|
100
100
|
requirement: !ruby/object:Gem::Requirement
|
@@ -150,6 +150,7 @@ files:
|
|
150
150
|
- spec/lib/sqed/parser/ocr_spec.rb
|
151
151
|
- spec/lib/sqed/parser_spec.rb
|
152
152
|
- spec/lib/sqed/result_spec.rb
|
153
|
+
- spec/lib/sqed_config_spec.rb
|
153
154
|
- spec/lib/sqed_spec.rb
|
154
155
|
- spec/lib/sqed_utils_spec.rb
|
155
156
|
- spec/lib/stage_handling/lep_stage_spec.rb
|
@@ -207,8 +208,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
207
208
|
- !ruby/object:Gem::Version
|
208
209
|
version: '0'
|
209
210
|
requirements: []
|
210
|
-
|
211
|
-
rubygems_version: 2.6.14
|
211
|
+
rubygems_version: 3.0.2
|
212
212
|
signing_key:
|
213
213
|
specification_version: 4
|
214
214
|
summary: Specimens Quickly extracted and Digitized, or just "squid". A ruby gem for
|
@@ -224,6 +224,7 @@ test_files:
|
|
224
224
|
- spec/lib/sqed/parser/ocr_spec.rb
|
225
225
|
- spec/lib/sqed/parser_spec.rb
|
226
226
|
- spec/lib/sqed/result_spec.rb
|
227
|
+
- spec/lib/sqed_config_spec.rb
|
227
228
|
- spec/lib/sqed_spec.rb
|
228
229
|
- spec/lib/sqed_utils_spec.rb
|
229
230
|
- spec/lib/stage_handling/lep_stage_spec.rb
|