sqed 0.2.4 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/sqed.rb +8 -6
- data/lib/sqed/extractor.rb +4 -4
- data/lib/sqed/parser/ocr_parser.rb +21 -1
- data/lib/sqed/version.rb +1 -1
- data/lib/sqed_config.rb +7 -7
- data/spec/lib/sqed_spec.rb +5 -0
- data/spec/lib/stage_handling/seven_slot_spec.rb +10 -1
- data/sqed.gemspec +5 -5
- metadata +15 -15
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8c81b996a2e7dcb26492461ff434ecf900701b0b
|
4
|
+
data.tar.gz: 1773eba85c994fa5f75b2da35cf55d3d5a14c3b1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8a83a73acb05b279b5ec5e7ac6730c8c750b79c88e22ee5b0dcfcd7ce2d502a254544607b771f262de97dab14611e850bdc07ce3804bd835d927c6ca2e7eda29
|
7
|
+
data.tar.gz: d2d0ff721d4c20698065574128a9cae9d95bdab144fcde852042205cc365af798861fa29f7f439d9dbf1ce9524c8bc5dccb2d3937ca9c2fe7e20c19179c9eed0
|
data/lib/sqed.rb
CHANGED
@@ -3,7 +3,7 @@
|
|
3
3
|
recent_ruby = RUBY_VERSION >= '2.1.1'
|
4
4
|
raise "IMPORTANT: sqed gem requires ruby >= 2.1.1" unless recent_ruby
|
5
5
|
|
6
|
-
require "
|
6
|
+
require "RMagick"
|
7
7
|
|
8
8
|
# Instances take the following
|
9
9
|
# 1) A base image @image
|
@@ -27,6 +27,7 @@ class Sqed
|
|
27
27
|
# Legal values are symbols taken from SqedConfig::EXTRACTION_PATTERNS.
|
28
28
|
# DO NOT pass pattern outside of a sqed instance!
|
29
29
|
#
|
30
|
+
# !! Always passed as an option :target_pattern, an persisted as :pattern
|
30
31
|
attr_accessor :pattern
|
31
32
|
|
32
33
|
# Provide a specific layout, overrides layout metadata taken from pattern
|
@@ -50,13 +51,14 @@ class Sqed
|
|
50
51
|
# Boolean, whether to do the boundary detection (not stage detection at present) against a thumbnail version of the passed image (faster, less accurate, true be default)
|
51
52
|
attr_accessor :use_thumbnail
|
52
53
|
|
53
|
-
# Provide a metadata map, overrides metadata taken from pattern
|
54
|
+
# Provide a metadata map, overrides metadata taken from pattern.
|
55
|
+
# !! Always passed as an option :target_metadata_map, an persisted as :metadata_map
|
54
56
|
attr_accessor :metadata_map
|
55
57
|
|
56
58
|
# Provide a boundary_finder, overrides metadata taken from pattern
|
57
59
|
attr_accessor :boundary_finder
|
58
60
|
|
59
|
-
def initialize(target_image: nil, target_pattern: nil, target_layout: nil, has_border: true, boundary_color: :green, use_thumbnail: true, boundary_finder: nil,
|
61
|
+
def initialize(target_image: nil, target_pattern: nil, target_layout: nil, has_border: true, boundary_color: :green, use_thumbnail: true, boundary_finder: nil, target_metadata_map: nil)
|
60
62
|
raise 'extraction pattern not defined' if target_pattern && !SqedConfig::EXTRACTION_PATTERNS.keys.include?(target_pattern)
|
61
63
|
|
62
64
|
# data, and stubs for results
|
@@ -65,14 +67,14 @@ class Sqed
|
|
65
67
|
@stage_boundary = Sqed::Boundaries.new(:internal_box)
|
66
68
|
|
67
69
|
# extraction metadata
|
68
|
-
@pattern = target_pattern # not required if target_layout,
|
70
|
+
@pattern = target_pattern # not required if target_layout, target_metadata_map, and boundary_finder are provided
|
69
71
|
|
70
72
|
@has_border = has_border
|
71
73
|
@boundary_finder = boundary_finder.constantize if boundary_finder
|
72
74
|
@layout = target_layout
|
73
75
|
@layout ||= SqedConfig::EXTRACTION_PATTERNS[pattern][:layout] if pattern
|
74
76
|
|
75
|
-
@metadata_map =
|
77
|
+
@metadata_map = target_metadata_map
|
76
78
|
@boundary_color = boundary_color
|
77
79
|
@use_thumbnail = use_thumbnail
|
78
80
|
|
@@ -147,7 +149,7 @@ class Sqed
|
|
147
149
|
|
148
150
|
extractor = Sqed::Extractor.new(
|
149
151
|
target_boundaries: boundaries,
|
150
|
-
target_metadata_map: extraction_metadata[:
|
152
|
+
target_metadata_map: extraction_metadata[:target_metadata_map],
|
151
153
|
target_image: stage_image)
|
152
154
|
extractor.result
|
153
155
|
end
|
data/lib/sqed/extractor.rb
CHANGED
@@ -16,10 +16,10 @@ class Sqed::Extractor
|
|
16
16
|
# a Magick::Image file
|
17
17
|
attr_accessor :image
|
18
18
|
|
19
|
-
def initialize(target_boundaries:
|
20
|
-
raise Error, '
|
21
|
-
raise Error, '
|
22
|
-
raise Error, '
|
19
|
+
def initialize(target_boundaries: nil, target_metadata_map: nil, target_image: nil)
|
20
|
+
raise Error, 'target_boundaries not provided or provided boundary is not a Sqed::Boundaries' if target_boundaries.nil? || !target_boundaries.class == Sqed::Boundaries
|
21
|
+
raise Error, 'target_metadata_map not provided or target_metadata_map not a Hash' if target_metadata_map.nil? || !target_metadata_map.class == Hash
|
22
|
+
raise Error, 'target_image not provided' if target_image.nil? || !target_image.class.name == 'Magick::Image'
|
23
23
|
|
24
24
|
@metadata_map = target_metadata_map
|
25
25
|
@boundaries = target_boundaries
|
@@ -107,9 +107,29 @@ class Sqed::Parser::OcrParser < Sqed::Parser
|
|
107
107
|
# the ocr text
|
108
108
|
def text(section_type: :default)
|
109
109
|
img = @image
|
110
|
+
|
111
|
+
# resample if an image 4"x4" is less than 300dpi
|
112
|
+
if img.columns * img.rows < 144000
|
113
|
+
img = img.resample(300)
|
114
|
+
end
|
115
|
+
|
110
116
|
params = SECTION_PARAMS[:default].merge(SECTION_PARAMS[section_type])
|
111
117
|
r = RTesseract.new(img, params)
|
112
|
-
@text = r.to_s.strip
|
118
|
+
@text = r.to_s.strip
|
119
|
+
|
120
|
+
if @text == ""
|
121
|
+
img = img.white_threshold(245)
|
122
|
+
r = RTesseract.new(img, params)
|
123
|
+
@text = r.to_s.strip
|
124
|
+
end
|
125
|
+
|
126
|
+
if @text == ""
|
127
|
+
img = img.quantize(256,Magick::GRAYColorspace)
|
128
|
+
r = RTesseract.new(img, params)
|
129
|
+
@text = r.to_s.strip
|
130
|
+
end
|
131
|
+
|
132
|
+
@text
|
113
133
|
end
|
114
134
|
|
115
135
|
|
data/lib/sqed/version.rb
CHANGED
data/lib/sqed_config.rb
CHANGED
@@ -115,43 +115,43 @@ module SqedConfig
|
|
115
115
|
right_t: {
|
116
116
|
boundary_finder: Sqed::BoundaryFinder::ColorLineFinder,
|
117
117
|
layout: :right_t,
|
118
|
-
|
118
|
+
target_metadata_map: {0 => :annotated_specimen, 1 => :identifier, 2 =>:image_registration }
|
119
119
|
},
|
120
120
|
|
121
121
|
vertical_offset_cross: {
|
122
122
|
boundary_finder: Sqed::BoundaryFinder::ColorLineFinder,
|
123
123
|
layout: :vertical_offset_cross,
|
124
|
-
|
124
|
+
target_metadata_map: {0 => :curator_metadata, 1 => :identifier, 2 => :image_registration, 3 => :annotated_specimen }
|
125
125
|
},
|
126
126
|
|
127
127
|
equal_cross: {
|
128
128
|
boundary_finder: Sqed::BoundaryFinder::CrossFinder,
|
129
129
|
layout: :equal_cross,
|
130
|
-
|
130
|
+
target_metadata_map: {0 => :curator_metadata, 1 => :identifier, 2 => :image_registration, 3 => :annotated_specimen }
|
131
131
|
},
|
132
132
|
|
133
133
|
cross: {
|
134
134
|
boundary_finder: Sqed::BoundaryFinder::ColorLineFinder,
|
135
135
|
layout: :cross,
|
136
|
-
|
136
|
+
target_metadata_map: {0 => :curator_metadata, 1 => :identifier, 2 => :image_registration, 3 => :annotated_specimen }
|
137
137
|
},
|
138
138
|
|
139
139
|
stage: {
|
140
140
|
boundary_finder: Sqed::BoundaryFinder::StageFinder,
|
141
141
|
layout: :internal_box,
|
142
|
-
|
142
|
+
target_metadata_map: {0 => :stage}
|
143
143
|
},
|
144
144
|
|
145
145
|
seven_slot: {
|
146
146
|
boundary_finder: Sqed::BoundaryFinder::ColorLineFinder,
|
147
147
|
layout: :seven_slot,
|
148
|
-
|
148
|
+
target_metadata_map: {0 => :collecting_event_labels, 1 => :determination_labels, 2 => :other_labels, 3 => :image_registration, 4 => :curator_metadata, 5 => :identifier, 6 => :specimen }
|
149
149
|
}
|
150
150
|
}
|
151
151
|
|
152
152
|
DEFAULT_TMP_DIR = "/tmp"
|
153
153
|
|
154
154
|
def self.index_for_section_type(pattern, section_type)
|
155
|
-
EXTRACTION_PATTERNS[pattern][:
|
155
|
+
EXTRACTION_PATTERNS[pattern][:target_metadata_map].invert[section_type]
|
156
156
|
end
|
157
157
|
end
|
data/spec/lib/sqed_spec.rb
CHANGED
@@ -10,7 +10,16 @@ describe 'handling 7 slot stages' do
|
|
10
10
|
expect( sqed ).to be_truthy
|
11
11
|
end
|
12
12
|
|
13
|
-
specify '
|
13
|
+
specify 'get result without errors' do
|
14
|
+
expect( sqed.result ).to be_truthy
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
context 'without target_pattern' do
|
19
|
+
let(:m) { {"0" => "collecting_event_labels", "1" => "determination_labels", "2" => "other_labels", "3" => "image_registration", "4" => "curator_metadata", "5" => "identifier", "6" => "specimen" } }
|
20
|
+
let(:s) { Sqed.new(target_image: image, metadata_map: m, target_layout: :seven_slot, boundary_color: :red, has_border: false ) }
|
21
|
+
|
22
|
+
specify 'get result without errors' do
|
14
23
|
expect( sqed.result ).to be_truthy
|
15
24
|
end
|
16
25
|
end
|
data/sqed.gemspec
CHANGED
@@ -19,16 +19,16 @@ Gem::Specification.new do |spec|
|
|
19
19
|
spec.require_paths = ["lib"]
|
20
20
|
|
21
21
|
spec.add_dependency 'rake', '~> 11.1.2'
|
22
|
-
spec.add_dependency 'rmagick', '~> 2.
|
23
|
-
spec.add_dependency 'rtesseract', '~> 1.
|
22
|
+
spec.add_dependency 'rmagick', '~> 2.16'
|
23
|
+
spec.add_dependency 'rtesseract', '~> 2.1.0'
|
24
24
|
|
25
25
|
# A qrcode reader, too many problems with compiling, dependencies
|
26
26
|
# spec.add_dependency 'zxing_cpp', '~> 0.1.0'
|
27
27
|
|
28
|
-
spec.add_development_dependency 'rspec', '~> 3.
|
28
|
+
spec.add_development_dependency 'rspec', '~> 3.6'
|
29
29
|
spec.add_development_dependency 'bundler', '~> 1.5'
|
30
30
|
# spec.add_development_dependency 'did_you_mean', '~> 0.9'
|
31
|
-
spec.add_development_dependency 'byebug'
|
32
|
-
spec.add_development_dependency 'awesome_print', '~> 1.
|
31
|
+
spec.add_development_dependency 'byebug', '~> 9.0.6'
|
32
|
+
spec.add_development_dependency 'awesome_print', '~> 1.8'
|
33
33
|
end
|
34
34
|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sqed
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Matt Yoder
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2017-07-12 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rake
|
@@ -31,42 +31,42 @@ dependencies:
|
|
31
31
|
requirements:
|
32
32
|
- - "~>"
|
33
33
|
- !ruby/object:Gem::Version
|
34
|
-
version: '2.
|
34
|
+
version: '2.16'
|
35
35
|
type: :runtime
|
36
36
|
prerelease: false
|
37
37
|
version_requirements: !ruby/object:Gem::Requirement
|
38
38
|
requirements:
|
39
39
|
- - "~>"
|
40
40
|
- !ruby/object:Gem::Version
|
41
|
-
version: '2.
|
41
|
+
version: '2.16'
|
42
42
|
- !ruby/object:Gem::Dependency
|
43
43
|
name: rtesseract
|
44
44
|
requirement: !ruby/object:Gem::Requirement
|
45
45
|
requirements:
|
46
46
|
- - "~>"
|
47
47
|
- !ruby/object:Gem::Version
|
48
|
-
version: 1.
|
48
|
+
version: 2.1.0
|
49
49
|
type: :runtime
|
50
50
|
prerelease: false
|
51
51
|
version_requirements: !ruby/object:Gem::Requirement
|
52
52
|
requirements:
|
53
53
|
- - "~>"
|
54
54
|
- !ruby/object:Gem::Version
|
55
|
-
version: 1.
|
55
|
+
version: 2.1.0
|
56
56
|
- !ruby/object:Gem::Dependency
|
57
57
|
name: rspec
|
58
58
|
requirement: !ruby/object:Gem::Requirement
|
59
59
|
requirements:
|
60
60
|
- - "~>"
|
61
61
|
- !ruby/object:Gem::Version
|
62
|
-
version: '3.
|
62
|
+
version: '3.6'
|
63
63
|
type: :development
|
64
64
|
prerelease: false
|
65
65
|
version_requirements: !ruby/object:Gem::Requirement
|
66
66
|
requirements:
|
67
67
|
- - "~>"
|
68
68
|
- !ruby/object:Gem::Version
|
69
|
-
version: '3.
|
69
|
+
version: '3.6'
|
70
70
|
- !ruby/object:Gem::Dependency
|
71
71
|
name: bundler
|
72
72
|
requirement: !ruby/object:Gem::Requirement
|
@@ -85,30 +85,30 @@ dependencies:
|
|
85
85
|
name: byebug
|
86
86
|
requirement: !ruby/object:Gem::Requirement
|
87
87
|
requirements:
|
88
|
-
- - "
|
88
|
+
- - "~>"
|
89
89
|
- !ruby/object:Gem::Version
|
90
|
-
version:
|
90
|
+
version: 9.0.6
|
91
91
|
type: :development
|
92
92
|
prerelease: false
|
93
93
|
version_requirements: !ruby/object:Gem::Requirement
|
94
94
|
requirements:
|
95
|
-
- - "
|
95
|
+
- - "~>"
|
96
96
|
- !ruby/object:Gem::Version
|
97
|
-
version:
|
97
|
+
version: 9.0.6
|
98
98
|
- !ruby/object:Gem::Dependency
|
99
99
|
name: awesome_print
|
100
100
|
requirement: !ruby/object:Gem::Requirement
|
101
101
|
requirements:
|
102
102
|
- - "~>"
|
103
103
|
- !ruby/object:Gem::Version
|
104
|
-
version: '1.
|
104
|
+
version: '1.8'
|
105
105
|
type: :development
|
106
106
|
prerelease: false
|
107
107
|
version_requirements: !ruby/object:Gem::Requirement
|
108
108
|
requirements:
|
109
109
|
- - "~>"
|
110
110
|
- !ruby/object:Gem::Version
|
111
|
-
version: '1.
|
111
|
+
version: '1.8'
|
112
112
|
description: A utility gem to aid in the processing of images taken in the process
|
113
113
|
of digitizing natural history collections.
|
114
114
|
email:
|
@@ -201,7 +201,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
201
201
|
version: '0'
|
202
202
|
requirements: []
|
203
203
|
rubyforge_project:
|
204
|
-
rubygems_version: 2.6.
|
204
|
+
rubygems_version: 2.6.12
|
205
205
|
signing_key:
|
206
206
|
specification_version: 4
|
207
207
|
summary: Specimens Quickly Extracted and Digitized, or just "squid". A ruby gem for
|