sqed 0.4.4 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/lib/sqed/boundary_finder/color_line_finder.rb +2 -2
- data/lib/sqed/extractor.rb +43 -43
- data/lib/sqed/parser/barcode_parser.rb +4 -4
- data/lib/sqed/parser/ocr_parser.rb +35 -8
- data/lib/sqed/version.rb +1 -1
- data/lib/sqed_config.rb +21 -7
- data/spec/lib/sqed/extractor_spec.rb +4 -4
- data/spec/lib/sqed_config_spec.rb +9 -0
- data/spec/lib/sqed_spec.rb +1 -1
- data/sqed.gemspec +3 -3
- metadata +11 -10
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: d78e636e0c209d3cfd4de9f698bafd575686e1d393fd8148f3ae491d0396d140
|
4
|
+
data.tar.gz: 51c75913c74ccebe5a172ed64408ce176d5a04d08f788d3be46aeef1a9595e8f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 459da0e33636600618502410797123154988ce2a03f514cdf7dec9829ca6c747c5d26952ca2b247a613cd8968eb7cb5c0bcc3a6711db495e06175ec64c57da77
|
7
|
+
data.tar.gz: 49fc0de43216358f44e2d89089a5605ec46a5ef526b6464588ea867301719932433841fd5a071b4cc41ef7ce8013a2eb5ee7f5aaac69786d4e7b46308c0e0f1e
|
@@ -16,6 +16,8 @@ class Sqed::BoundaryFinder::ColorLineFinder < Sqed::BoundaryFinder
|
|
16
16
|
super(image: image, layout: layout, use_thumbnail: use_thumbnail)
|
17
17
|
|
18
18
|
raise 'No layout provided.' if @layout.nil?
|
19
|
+
|
20
|
+
# !@#? why this
|
19
21
|
@boundary_color = boundary_color
|
20
22
|
|
21
23
|
if use_thumbnail
|
@@ -95,8 +97,6 @@ class Sqed::BoundaryFinder::ColorLineFinder < Sqed::BoundaryFinder
|
|
95
97
|
max_width: right_top_image.columns
|
96
98
|
) # vertical line b/w 1 & 2, use "corrected_frequency" to account for color bleed from previous crop
|
97
99
|
|
98
|
-
|
99
|
-
|
100
100
|
boundaries.set(1, [left_right_split[2], 0, right_top_split[0], top_bottom_split[0]] )
|
101
101
|
boundaries.set(2, [left_right_split[2] + right_top_split[2], 0, right_top_image.columns - right_top_split[2], top_bottom_split[0]])
|
102
102
|
|
data/lib/sqed/extractor.rb
CHANGED
@@ -6,64 +6,64 @@ class Sqed
|
|
6
6
|
#
|
7
7
|
class Extractor
|
8
8
|
|
9
|
-
|
9
|
+
class Error < StandardError; end;
|
10
10
|
|
11
|
-
|
12
|
-
|
11
|
+
# a Sqed::Boundaries instance
|
12
|
+
attr_accessor :boundaries
|
13
13
|
|
14
|
-
|
15
|
-
|
16
|
-
|
14
|
+
# @return [Hash] like `{0 => :annotated_specimen, 1 => :identifier, 2 => :image_registration }`
|
15
|
+
# a metadata_map hash from EXTRACTION_PATTERNS like:
|
16
|
+
attr_accessor :metadata_map
|
17
17
|
|
18
|
-
|
19
|
-
|
18
|
+
# @return [Magick::Image file]
|
19
|
+
attr_accessor :image
|
20
20
|
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
21
|
+
def initialize(**opts)
|
22
|
+
@metadata_map = opts[:metadata_map]
|
23
|
+
@boundaries = opts[:boundaries]
|
24
|
+
@image = opts[:image]
|
25
25
|
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
26
|
+
raise Error, 'boundaries not provided or provided boundary is not a Sqed::Boundaries' if boundaries.nil? || !boundaries.class == Sqed::Boundaries
|
27
|
+
raise Error, 'metadata_map not provided or metadata_map not a Hash' if metadata_map.nil? || !metadata_map.class == Hash
|
28
|
+
raise Error, 'image not provided' if image.nil? || !image.class.name == 'Magick::Image'
|
29
|
+
end
|
30
30
|
|
31
|
-
|
32
|
-
|
31
|
+
def result
|
32
|
+
r = Sqed::Result.new
|
33
33
|
|
34
|
-
|
34
|
+
r.sections = metadata_map.keys.sort.collect{|k| metadata_map[k]}
|
35
35
|
|
36
|
-
|
37
|
-
|
38
|
-
|
36
|
+
# assign the images to the result
|
37
|
+
boundaries.each do |section_index, coords|
|
38
|
+
section_type = metadata_map[section_index]
|
39
39
|
|
40
|
-
|
41
|
-
|
42
|
-
|
40
|
+
r.send("#{section_type}_image=", extract_image(coords))
|
41
|
+
r.boundary_coordinates[section_type] = coords
|
42
|
+
end
|
43
43
|
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
44
|
+
# assign the metadata to the result
|
45
|
+
metadata_map.each do |section_index, section_type|
|
46
|
+
# only extract data if a parser exists
|
47
|
+
if parsers = SqedConfig::SECTION_PARSERS[section_type]
|
48
|
+
section_image = r.send("#{section_type}_image")
|
49
|
+
updated = r.send(section_type)
|
50
50
|
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
51
|
+
parsers.each do |p|
|
52
|
+
parsed_result = p.new(section_image).get_text(section_type: section_type)
|
53
|
+
updated[p::TYPE] = parsed_result if parsed_result && parsed_result.length > 0
|
54
|
+
end
|
55
55
|
|
56
|
-
|
56
|
+
r.send("#{section_type}=", updated)
|
57
|
+
end
|
57
58
|
end
|
58
|
-
end
|
59
59
|
|
60
|
-
|
61
|
-
|
60
|
+
r
|
61
|
+
end
|
62
62
|
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
63
|
+
# crop takes x, y, width, height
|
64
|
+
def extract_image(coords)
|
65
|
+
@image.crop(*coords, true)
|
66
|
+
end
|
67
67
|
|
68
68
|
end
|
69
69
|
end
|
@@ -18,7 +18,7 @@ class Sqed::Parser::BarcodeParser < Sqed::Parser
|
|
18
18
|
@barcode
|
19
19
|
end
|
20
20
|
|
21
|
-
# Uses the same
|
21
|
+
# Uses the same engine as zbarimg that you can install with brew (zbarimg)
|
22
22
|
#
|
23
23
|
def get_code_128
|
24
24
|
nil # ZXing.decode @image.filename
|
@@ -29,9 +29,9 @@ class Sqed::Parser::BarcodeParser < Sqed::Parser
|
|
29
29
|
[get_code_128].compact.first
|
30
30
|
end
|
31
31
|
|
32
|
-
|
33
|
-
|
34
|
-
|
32
|
+
#def get_datamatrix
|
33
|
+
# https://github.com/srijan/ruby-dmtx
|
34
|
+
#end
|
35
35
|
|
36
36
|
# alias to a universal method
|
37
37
|
def get_text(section_type: :default)
|
@@ -1,5 +1,9 @@
|
|
1
1
|
require 'rtesseract'
|
2
2
|
|
3
|
+
# We use tempfile because Rtesseract doesn't work directly with ImageMagic::Image (any longer... apparently, maybe)
|
4
|
+
# https://ruby-doc.org/stdlib-2.6.1/libdoc/tempfile/rdoc/Tempfile.html
|
5
|
+
require 'tempfile'
|
6
|
+
|
3
7
|
# encoding: UTF-8
|
4
8
|
#
|
5
9
|
# Given a single image return all text in that image.
|
@@ -115,19 +119,42 @@ class Sqed::Parser::OcrParser < Sqed::Parser
|
|
115
119
|
params = SECTION_PARAMS[:default].dup
|
116
120
|
params.merge!(SECTION_PARAMS[section_type])
|
117
121
|
|
118
|
-
|
119
|
-
|
122
|
+
# May be able to overcome this hacky kludge messe with providing `processor:` to new
|
123
|
+
file = Tempfile.new('foo1')
|
124
|
+
begin
|
125
|
+
file.write(image.to_blob)
|
126
|
+
file.rewind
|
127
|
+
@extracted_text = RTesseract.new(file.path, params).to_s&.strip
|
128
|
+
file.close
|
129
|
+
ensure
|
130
|
+
file.close
|
131
|
+
file.unlink # deletes the temp file
|
132
|
+
end
|
120
133
|
|
121
134
|
if @extracted_text == ''
|
122
|
-
|
123
|
-
|
124
|
-
|
135
|
+
file = Tempfile.new('foo2')
|
136
|
+
begin
|
137
|
+
file.write(img.dup.white_threshold(245).to_blob)
|
138
|
+
file.rewind
|
139
|
+
@extracted_text = RTesseract.new(file.path, params).to_s&.strip
|
140
|
+
file.close
|
141
|
+
ensure
|
142
|
+
file.close
|
143
|
+
file.unlink # deletes the temp file
|
144
|
+
end
|
125
145
|
end
|
126
146
|
|
127
147
|
if @extracted_text == ''
|
128
|
-
|
129
|
-
|
130
|
-
|
148
|
+
file = Tempfile.new('foo3')
|
149
|
+
begin
|
150
|
+
file.write(img.dup.quantize(256,Magick::GRAYColorspace).to_blob)
|
151
|
+
file.rewind
|
152
|
+
@extracted_text = RTesseract.new(file.path, params).to_s&.strip
|
153
|
+
file.close
|
154
|
+
ensure
|
155
|
+
file.close
|
156
|
+
file.unlink # deletes the temp file
|
157
|
+
end
|
131
158
|
end
|
132
159
|
|
133
160
|
@extracted_text
|
data/lib/sqed/version.rb
CHANGED
data/lib/sqed_config.rb
CHANGED
@@ -106,7 +106,7 @@ module SqedConfig
|
|
106
106
|
collecting_event_labels: [Sqed::Parser::OcrParser],
|
107
107
|
curator_metadata: [Sqed::Parser::OcrParser],
|
108
108
|
determination_labels: [Sqed::Parser::OcrParser],
|
109
|
-
identifier: [Sqed::Parser::
|
109
|
+
identifier: [Sqed::Parser::OcrParser, Sqed::Parser::BarcodeParser],
|
110
110
|
image_registration: [],
|
111
111
|
labels: [Sqed::Parser::OcrParser],
|
112
112
|
nothing: [],
|
@@ -123,15 +123,15 @@ module SqedConfig
|
|
123
123
|
},
|
124
124
|
|
125
125
|
vertical_offset_cross: {
|
126
|
-
|
127
|
-
|
128
|
-
|
126
|
+
boundary_finder: Sqed::BoundaryFinder::ColorLineFinder,
|
127
|
+
layout: :vertical_offset_cross,
|
128
|
+
metadata_map: { 0 => :curator_metadata, 1 => :identifier, 2 => :image_registration, 3 => :annotated_specimen }
|
129
129
|
},
|
130
130
|
|
131
131
|
equal_cross: {
|
132
|
-
|
133
|
-
|
134
|
-
|
132
|
+
boundary_finder: Sqed::BoundaryFinder::CrossFinder,
|
133
|
+
layout: :equal_cross,
|
134
|
+
metadata_map: { 0 => :curator_metadata, 1 => :identifier, 2 => :image_registration, 3 => :annotated_specimen }
|
135
135
|
},
|
136
136
|
|
137
137
|
cross: {
|
@@ -159,9 +159,23 @@ module SqedConfig
|
|
159
159
|
}
|
160
160
|
}.freeze
|
161
161
|
|
162
|
+
|
163
|
+
BOUNDARY_COLORS = [:red, :green, :blue, :black].freeze
|
164
|
+
|
162
165
|
DEFAULT_TMP_DIR = '/tmp'.freeze
|
163
166
|
|
164
167
|
def self.index_for_section_type(pattern, section_type)
|
165
168
|
EXTRACTION_PATTERNS[pattern][:metadata_map].invert[section_type]
|
166
169
|
end
|
170
|
+
|
171
|
+
# Format to return JSON
|
172
|
+
def self.metadata
|
173
|
+
return {
|
174
|
+
boundary_colors: BOUNDARY_COLORS,
|
175
|
+
extraction_patterns: EXTRACTION_PATTERNS,
|
176
|
+
section_parsers: SECTION_PARSERS,
|
177
|
+
layout_section_types: LAYOUT_SECTION_TYPES,
|
178
|
+
layouts: LAYOUTS
|
179
|
+
}
|
180
|
+
end
|
167
181
|
end
|
@@ -4,7 +4,7 @@ describe Sqed::Extractor do
|
|
4
4
|
let(:metadata_map) {
|
5
5
|
{0 => :specimen, 1 => :identifier, 2 => :nothing, 3 => :image_registration }
|
6
6
|
}
|
7
|
-
|
7
|
+
|
8
8
|
let(:image) { ImageHelpers.crossy_green_line_specimen }
|
9
9
|
|
10
10
|
let(:boundaries) {
|
@@ -29,7 +29,7 @@ describe Sqed::Extractor do
|
|
29
29
|
specify '#metadata_map' do
|
30
30
|
expect(e).to respond_to(:metadata_map)
|
31
31
|
end
|
32
|
-
|
32
|
+
|
33
33
|
specify '#boundaries' do
|
34
34
|
expect(e).to respond_to(:boundaries)
|
35
35
|
end
|
@@ -37,7 +37,7 @@ describe Sqed::Extractor do
|
|
37
37
|
|
38
38
|
context 'extracting to a #result' do
|
39
39
|
let(:r) { e.result }
|
40
|
-
|
40
|
+
|
41
41
|
specify '#result retuns a Sqed::Result' do
|
42
42
|
expect(r.class.name).to eq('Sqed::Result')
|
43
43
|
end
|
@@ -52,7 +52,7 @@ describe Sqed::Extractor do
|
|
52
52
|
end
|
53
53
|
|
54
54
|
specify '#sections is populated with section_types' do
|
55
|
-
expect(r.sections).to eq( [ :
|
55
|
+
expect(r.sections).to eq( [ :specimen, :identifier, :nothing, :image_registration ] )
|
56
56
|
end
|
57
57
|
|
58
58
|
specify '#boundary_coordinates is populated with coordinates' do
|
data/spec/lib/sqed_spec.rb
CHANGED
@@ -113,7 +113,7 @@ describe Sqed do
|
|
113
113
|
end
|
114
114
|
|
115
115
|
specify '#text_for a :curator_metadata section' do
|
116
|
-
expect(rz.text_for(:curator_metadata)).to match(
|
116
|
+
expect(rz.text_for(:curator_metadata)).to match(/Frost\s*Entomological\s*Museum/)
|
117
117
|
end
|
118
118
|
end
|
119
119
|
end
|
data/sqed.gemspec
CHANGED
@@ -20,15 +20,15 @@ Gem::Specification.new do |spec|
|
|
20
20
|
|
21
21
|
spec.add_dependency 'rake', '~> 12.3'
|
22
22
|
spec.add_dependency 'rmagick', '~> 2.16'
|
23
|
-
spec.add_dependency 'rtesseract', '~>
|
23
|
+
spec.add_dependency 'rtesseract', '~> 3.0.2'
|
24
24
|
|
25
25
|
# A qrcode reader, too many problems with compiling, dependencies
|
26
26
|
# spec.add_dependency 'zxing_cpp', '~> 0.1.0'
|
27
27
|
|
28
28
|
spec.add_development_dependency 'rspec', '~> 3.8'
|
29
|
-
spec.add_development_dependency 'bundler', '~>
|
29
|
+
spec.add_development_dependency 'bundler', '~> 2.0'
|
30
30
|
# spec.add_development_dependency 'did_you_mean', '~> 0.9'
|
31
|
-
spec.add_development_dependency 'byebug', '~> 10.0
|
31
|
+
spec.add_development_dependency 'byebug', '~> 10.0'
|
32
32
|
spec.add_development_dependency 'awesome_print', '~> 1.8'
|
33
33
|
end
|
34
34
|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sqed
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Matt Yoder
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2019-02-06 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rake
|
@@ -45,14 +45,14 @@ dependencies:
|
|
45
45
|
requirements:
|
46
46
|
- - "~>"
|
47
47
|
- !ruby/object:Gem::Version
|
48
|
-
version:
|
48
|
+
version: 3.0.2
|
49
49
|
type: :runtime
|
50
50
|
prerelease: false
|
51
51
|
version_requirements: !ruby/object:Gem::Requirement
|
52
52
|
requirements:
|
53
53
|
- - "~>"
|
54
54
|
- !ruby/object:Gem::Version
|
55
|
-
version:
|
55
|
+
version: 3.0.2
|
56
56
|
- !ruby/object:Gem::Dependency
|
57
57
|
name: rspec
|
58
58
|
requirement: !ruby/object:Gem::Requirement
|
@@ -73,28 +73,28 @@ dependencies:
|
|
73
73
|
requirements:
|
74
74
|
- - "~>"
|
75
75
|
- !ruby/object:Gem::Version
|
76
|
-
version: '
|
76
|
+
version: '2.0'
|
77
77
|
type: :development
|
78
78
|
prerelease: false
|
79
79
|
version_requirements: !ruby/object:Gem::Requirement
|
80
80
|
requirements:
|
81
81
|
- - "~>"
|
82
82
|
- !ruby/object:Gem::Version
|
83
|
-
version: '
|
83
|
+
version: '2.0'
|
84
84
|
- !ruby/object:Gem::Dependency
|
85
85
|
name: byebug
|
86
86
|
requirement: !ruby/object:Gem::Requirement
|
87
87
|
requirements:
|
88
88
|
- - "~>"
|
89
89
|
- !ruby/object:Gem::Version
|
90
|
-
version: 10.0
|
90
|
+
version: '10.0'
|
91
91
|
type: :development
|
92
92
|
prerelease: false
|
93
93
|
version_requirements: !ruby/object:Gem::Requirement
|
94
94
|
requirements:
|
95
95
|
- - "~>"
|
96
96
|
- !ruby/object:Gem::Version
|
97
|
-
version: 10.0
|
97
|
+
version: '10.0'
|
98
98
|
- !ruby/object:Gem::Dependency
|
99
99
|
name: awesome_print
|
100
100
|
requirement: !ruby/object:Gem::Requirement
|
@@ -150,6 +150,7 @@ files:
|
|
150
150
|
- spec/lib/sqed/parser/ocr_spec.rb
|
151
151
|
- spec/lib/sqed/parser_spec.rb
|
152
152
|
- spec/lib/sqed/result_spec.rb
|
153
|
+
- spec/lib/sqed_config_spec.rb
|
153
154
|
- spec/lib/sqed_spec.rb
|
154
155
|
- spec/lib/sqed_utils_spec.rb
|
155
156
|
- spec/lib/stage_handling/lep_stage_spec.rb
|
@@ -207,8 +208,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
207
208
|
- !ruby/object:Gem::Version
|
208
209
|
version: '0'
|
209
210
|
requirements: []
|
210
|
-
|
211
|
-
rubygems_version: 2.6.14
|
211
|
+
rubygems_version: 3.0.2
|
212
212
|
signing_key:
|
213
213
|
specification_version: 4
|
214
214
|
summary: Specimens Quickly extracted and Digitized, or just "squid". A ruby gem for
|
@@ -224,6 +224,7 @@ test_files:
|
|
224
224
|
- spec/lib/sqed/parser/ocr_spec.rb
|
225
225
|
- spec/lib/sqed/parser_spec.rb
|
226
226
|
- spec/lib/sqed/result_spec.rb
|
227
|
+
- spec/lib/sqed_config_spec.rb
|
227
228
|
- spec/lib/sqed_spec.rb
|
228
229
|
- spec/lib/sqed_utils_spec.rb
|
229
230
|
- spec/lib/stage_handling/lep_stage_spec.rb
|