sqed 0.2.4 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/sqed.rb +8 -6
- data/lib/sqed/extractor.rb +4 -4
- data/lib/sqed/parser/ocr_parser.rb +21 -1
- data/lib/sqed/version.rb +1 -1
- data/lib/sqed_config.rb +7 -7
- data/spec/lib/sqed_spec.rb +5 -0
- data/spec/lib/stage_handling/seven_slot_spec.rb +10 -1
- data/sqed.gemspec +5 -5
- metadata +15 -15
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8c81b996a2e7dcb26492461ff434ecf900701b0b
|
4
|
+
data.tar.gz: 1773eba85c994fa5f75b2da35cf55d3d5a14c3b1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8a83a73acb05b279b5ec5e7ac6730c8c750b79c88e22ee5b0dcfcd7ce2d502a254544607b771f262de97dab14611e850bdc07ce3804bd835d927c6ca2e7eda29
|
7
|
+
data.tar.gz: d2d0ff721d4c20698065574128a9cae9d95bdab144fcde852042205cc365af798861fa29f7f439d9dbf1ce9524c8bc5dccb2d3937ca9c2fe7e20c19179c9eed0
|
data/lib/sqed.rb
CHANGED
@@ -3,7 +3,7 @@
|
|
3
3
|
recent_ruby = RUBY_VERSION >= '2.1.1'
|
4
4
|
raise "IMPORTANT: sqed gem requires ruby >= 2.1.1" unless recent_ruby
|
5
5
|
|
6
|
-
require "
|
6
|
+
require "RMagick"
|
7
7
|
|
8
8
|
# Instances take the following
|
9
9
|
# 1) A base image @image
|
@@ -27,6 +27,7 @@ class Sqed
|
|
27
27
|
# Legal values are symbols taken from SqedConfig::EXTRACTION_PATTERNS.
|
28
28
|
# DO NOT pass pattern outside of a sqed instance!
|
29
29
|
#
|
30
|
+
# !! Always passed as an option :target_pattern, an persisted as :pattern
|
30
31
|
attr_accessor :pattern
|
31
32
|
|
32
33
|
# Provide a specific layout, overrides layout metadata taken from pattern
|
@@ -50,13 +51,14 @@ class Sqed
|
|
50
51
|
# Boolean, whether to do the boundary detection (not stage detection at present) against a thumbnail version of the passed image (faster, less accurate, true be default)
|
51
52
|
attr_accessor :use_thumbnail
|
52
53
|
|
53
|
-
# Provide a metadata map, overrides metadata taken from pattern
|
54
|
+
# Provide a metadata map, overrides metadata taken from pattern.
|
55
|
+
# !! Always passed as an option :target_metadata_map, an persisted as :metadata_map
|
54
56
|
attr_accessor :metadata_map
|
55
57
|
|
56
58
|
# Provide a boundary_finder, overrides metadata taken from pattern
|
57
59
|
attr_accessor :boundary_finder
|
58
60
|
|
59
|
-
def initialize(target_image: nil, target_pattern: nil, target_layout: nil, has_border: true, boundary_color: :green, use_thumbnail: true, boundary_finder: nil,
|
61
|
+
def initialize(target_image: nil, target_pattern: nil, target_layout: nil, has_border: true, boundary_color: :green, use_thumbnail: true, boundary_finder: nil, target_metadata_map: nil)
|
60
62
|
raise 'extraction pattern not defined' if target_pattern && !SqedConfig::EXTRACTION_PATTERNS.keys.include?(target_pattern)
|
61
63
|
|
62
64
|
# data, and stubs for results
|
@@ -65,14 +67,14 @@ class Sqed
|
|
65
67
|
@stage_boundary = Sqed::Boundaries.new(:internal_box)
|
66
68
|
|
67
69
|
# extraction metadata
|
68
|
-
@pattern = target_pattern # not required if target_layout,
|
70
|
+
@pattern = target_pattern # not required if target_layout, target_metadata_map, and boundary_finder are provided
|
69
71
|
|
70
72
|
@has_border = has_border
|
71
73
|
@boundary_finder = boundary_finder.constantize if boundary_finder
|
72
74
|
@layout = target_layout
|
73
75
|
@layout ||= SqedConfig::EXTRACTION_PATTERNS[pattern][:layout] if pattern
|
74
76
|
|
75
|
-
@metadata_map =
|
77
|
+
@metadata_map = target_metadata_map
|
76
78
|
@boundary_color = boundary_color
|
77
79
|
@use_thumbnail = use_thumbnail
|
78
80
|
|
@@ -147,7 +149,7 @@ class Sqed
|
|
147
149
|
|
148
150
|
extractor = Sqed::Extractor.new(
|
149
151
|
target_boundaries: boundaries,
|
150
|
-
target_metadata_map: extraction_metadata[:
|
152
|
+
target_metadata_map: extraction_metadata[:target_metadata_map],
|
151
153
|
target_image: stage_image)
|
152
154
|
extractor.result
|
153
155
|
end
|
data/lib/sqed/extractor.rb
CHANGED
@@ -16,10 +16,10 @@ class Sqed::Extractor
|
|
16
16
|
# a Magick::Image file
|
17
17
|
attr_accessor :image
|
18
18
|
|
19
|
-
def initialize(target_boundaries:
|
20
|
-
raise Error, '
|
21
|
-
raise Error, '
|
22
|
-
raise Error, '
|
19
|
+
def initialize(target_boundaries: nil, target_metadata_map: nil, target_image: nil)
|
20
|
+
raise Error, 'target_boundaries not provided or provided boundary is not a Sqed::Boundaries' if target_boundaries.nil? || !target_boundaries.class == Sqed::Boundaries
|
21
|
+
raise Error, 'target_metadata_map not provided or target_metadata_map not a Hash' if target_metadata_map.nil? || !target_metadata_map.class == Hash
|
22
|
+
raise Error, 'target_image not provided' if target_image.nil? || !target_image.class.name == 'Magick::Image'
|
23
23
|
|
24
24
|
@metadata_map = target_metadata_map
|
25
25
|
@boundaries = target_boundaries
|
@@ -107,9 +107,29 @@ class Sqed::Parser::OcrParser < Sqed::Parser
|
|
107
107
|
# the ocr text
|
108
108
|
def text(section_type: :default)
|
109
109
|
img = @image
|
110
|
+
|
111
|
+
# resample if an image 4"x4" is less than 300dpi
|
112
|
+
if img.columns * img.rows < 144000
|
113
|
+
img = img.resample(300)
|
114
|
+
end
|
115
|
+
|
110
116
|
params = SECTION_PARAMS[:default].merge(SECTION_PARAMS[section_type])
|
111
117
|
r = RTesseract.new(img, params)
|
112
|
-
@text = r.to_s.strip
|
118
|
+
@text = r.to_s.strip
|
119
|
+
|
120
|
+
if @text == ""
|
121
|
+
img = img.white_threshold(245)
|
122
|
+
r = RTesseract.new(img, params)
|
123
|
+
@text = r.to_s.strip
|
124
|
+
end
|
125
|
+
|
126
|
+
if @text == ""
|
127
|
+
img = img.quantize(256,Magick::GRAYColorspace)
|
128
|
+
r = RTesseract.new(img, params)
|
129
|
+
@text = r.to_s.strip
|
130
|
+
end
|
131
|
+
|
132
|
+
@text
|
113
133
|
end
|
114
134
|
|
115
135
|
|
data/lib/sqed/version.rb
CHANGED
data/lib/sqed_config.rb
CHANGED
@@ -115,43 +115,43 @@ module SqedConfig
|
|
115
115
|
right_t: {
|
116
116
|
boundary_finder: Sqed::BoundaryFinder::ColorLineFinder,
|
117
117
|
layout: :right_t,
|
118
|
-
|
118
|
+
target_metadata_map: {0 => :annotated_specimen, 1 => :identifier, 2 =>:image_registration }
|
119
119
|
},
|
120
120
|
|
121
121
|
vertical_offset_cross: {
|
122
122
|
boundary_finder: Sqed::BoundaryFinder::ColorLineFinder,
|
123
123
|
layout: :vertical_offset_cross,
|
124
|
-
|
124
|
+
target_metadata_map: {0 => :curator_metadata, 1 => :identifier, 2 => :image_registration, 3 => :annotated_specimen }
|
125
125
|
},
|
126
126
|
|
127
127
|
equal_cross: {
|
128
128
|
boundary_finder: Sqed::BoundaryFinder::CrossFinder,
|
129
129
|
layout: :equal_cross,
|
130
|
-
|
130
|
+
target_metadata_map: {0 => :curator_metadata, 1 => :identifier, 2 => :image_registration, 3 => :annotated_specimen }
|
131
131
|
},
|
132
132
|
|
133
133
|
cross: {
|
134
134
|
boundary_finder: Sqed::BoundaryFinder::ColorLineFinder,
|
135
135
|
layout: :cross,
|
136
|
-
|
136
|
+
target_metadata_map: {0 => :curator_metadata, 1 => :identifier, 2 => :image_registration, 3 => :annotated_specimen }
|
137
137
|
},
|
138
138
|
|
139
139
|
stage: {
|
140
140
|
boundary_finder: Sqed::BoundaryFinder::StageFinder,
|
141
141
|
layout: :internal_box,
|
142
|
-
|
142
|
+
target_metadata_map: {0 => :stage}
|
143
143
|
},
|
144
144
|
|
145
145
|
seven_slot: {
|
146
146
|
boundary_finder: Sqed::BoundaryFinder::ColorLineFinder,
|
147
147
|
layout: :seven_slot,
|
148
|
-
|
148
|
+
target_metadata_map: {0 => :collecting_event_labels, 1 => :determination_labels, 2 => :other_labels, 3 => :image_registration, 4 => :curator_metadata, 5 => :identifier, 6 => :specimen }
|
149
149
|
}
|
150
150
|
}
|
151
151
|
|
152
152
|
DEFAULT_TMP_DIR = "/tmp"
|
153
153
|
|
154
154
|
def self.index_for_section_type(pattern, section_type)
|
155
|
-
EXTRACTION_PATTERNS[pattern][:
|
155
|
+
EXTRACTION_PATTERNS[pattern][:target_metadata_map].invert[section_type]
|
156
156
|
end
|
157
157
|
end
|
data/spec/lib/sqed_spec.rb
CHANGED
@@ -10,7 +10,16 @@ describe 'handling 7 slot stages' do
|
|
10
10
|
expect( sqed ).to be_truthy
|
11
11
|
end
|
12
12
|
|
13
|
-
specify '
|
13
|
+
specify 'get result without errors' do
|
14
|
+
expect( sqed.result ).to be_truthy
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
context 'without target_pattern' do
|
19
|
+
let(:m) { {"0" => "collecting_event_labels", "1" => "determination_labels", "2" => "other_labels", "3" => "image_registration", "4" => "curator_metadata", "5" => "identifier", "6" => "specimen" } }
|
20
|
+
let(:s) { Sqed.new(target_image: image, metadata_map: m, target_layout: :seven_slot, boundary_color: :red, has_border: false ) }
|
21
|
+
|
22
|
+
specify 'get result without errors' do
|
14
23
|
expect( sqed.result ).to be_truthy
|
15
24
|
end
|
16
25
|
end
|
data/sqed.gemspec
CHANGED
@@ -19,16 +19,16 @@ Gem::Specification.new do |spec|
|
|
19
19
|
spec.require_paths = ["lib"]
|
20
20
|
|
21
21
|
spec.add_dependency 'rake', '~> 11.1.2'
|
22
|
-
spec.add_dependency 'rmagick', '~> 2.
|
23
|
-
spec.add_dependency 'rtesseract', '~> 1.
|
22
|
+
spec.add_dependency 'rmagick', '~> 2.16'
|
23
|
+
spec.add_dependency 'rtesseract', '~> 2.1.0'
|
24
24
|
|
25
25
|
# A qrcode reader, too many problems with compiling, dependencies
|
26
26
|
# spec.add_dependency 'zxing_cpp', '~> 0.1.0'
|
27
27
|
|
28
|
-
spec.add_development_dependency 'rspec', '~> 3.
|
28
|
+
spec.add_development_dependency 'rspec', '~> 3.6'
|
29
29
|
spec.add_development_dependency 'bundler', '~> 1.5'
|
30
30
|
# spec.add_development_dependency 'did_you_mean', '~> 0.9'
|
31
|
-
spec.add_development_dependency 'byebug'
|
32
|
-
spec.add_development_dependency 'awesome_print', '~> 1.
|
31
|
+
spec.add_development_dependency 'byebug', '~> 9.0.6'
|
32
|
+
spec.add_development_dependency 'awesome_print', '~> 1.8'
|
33
33
|
end
|
34
34
|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sqed
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Matt Yoder
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2017-07-12 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rake
|
@@ -31,42 +31,42 @@ dependencies:
|
|
31
31
|
requirements:
|
32
32
|
- - "~>"
|
33
33
|
- !ruby/object:Gem::Version
|
34
|
-
version: '2.
|
34
|
+
version: '2.16'
|
35
35
|
type: :runtime
|
36
36
|
prerelease: false
|
37
37
|
version_requirements: !ruby/object:Gem::Requirement
|
38
38
|
requirements:
|
39
39
|
- - "~>"
|
40
40
|
- !ruby/object:Gem::Version
|
41
|
-
version: '2.
|
41
|
+
version: '2.16'
|
42
42
|
- !ruby/object:Gem::Dependency
|
43
43
|
name: rtesseract
|
44
44
|
requirement: !ruby/object:Gem::Requirement
|
45
45
|
requirements:
|
46
46
|
- - "~>"
|
47
47
|
- !ruby/object:Gem::Version
|
48
|
-
version: 1.
|
48
|
+
version: 2.1.0
|
49
49
|
type: :runtime
|
50
50
|
prerelease: false
|
51
51
|
version_requirements: !ruby/object:Gem::Requirement
|
52
52
|
requirements:
|
53
53
|
- - "~>"
|
54
54
|
- !ruby/object:Gem::Version
|
55
|
-
version: 1.
|
55
|
+
version: 2.1.0
|
56
56
|
- !ruby/object:Gem::Dependency
|
57
57
|
name: rspec
|
58
58
|
requirement: !ruby/object:Gem::Requirement
|
59
59
|
requirements:
|
60
60
|
- - "~>"
|
61
61
|
- !ruby/object:Gem::Version
|
62
|
-
version: '3.
|
62
|
+
version: '3.6'
|
63
63
|
type: :development
|
64
64
|
prerelease: false
|
65
65
|
version_requirements: !ruby/object:Gem::Requirement
|
66
66
|
requirements:
|
67
67
|
- - "~>"
|
68
68
|
- !ruby/object:Gem::Version
|
69
|
-
version: '3.
|
69
|
+
version: '3.6'
|
70
70
|
- !ruby/object:Gem::Dependency
|
71
71
|
name: bundler
|
72
72
|
requirement: !ruby/object:Gem::Requirement
|
@@ -85,30 +85,30 @@ dependencies:
|
|
85
85
|
name: byebug
|
86
86
|
requirement: !ruby/object:Gem::Requirement
|
87
87
|
requirements:
|
88
|
-
- - "
|
88
|
+
- - "~>"
|
89
89
|
- !ruby/object:Gem::Version
|
90
|
-
version:
|
90
|
+
version: 9.0.6
|
91
91
|
type: :development
|
92
92
|
prerelease: false
|
93
93
|
version_requirements: !ruby/object:Gem::Requirement
|
94
94
|
requirements:
|
95
|
-
- - "
|
95
|
+
- - "~>"
|
96
96
|
- !ruby/object:Gem::Version
|
97
|
-
version:
|
97
|
+
version: 9.0.6
|
98
98
|
- !ruby/object:Gem::Dependency
|
99
99
|
name: awesome_print
|
100
100
|
requirement: !ruby/object:Gem::Requirement
|
101
101
|
requirements:
|
102
102
|
- - "~>"
|
103
103
|
- !ruby/object:Gem::Version
|
104
|
-
version: '1.
|
104
|
+
version: '1.8'
|
105
105
|
type: :development
|
106
106
|
prerelease: false
|
107
107
|
version_requirements: !ruby/object:Gem::Requirement
|
108
108
|
requirements:
|
109
109
|
- - "~>"
|
110
110
|
- !ruby/object:Gem::Version
|
111
|
-
version: '1.
|
111
|
+
version: '1.8'
|
112
112
|
description: A utility gem to aid in the processing of images taken in the process
|
113
113
|
of digitizing natural history collections.
|
114
114
|
email:
|
@@ -201,7 +201,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
201
201
|
version: '0'
|
202
202
|
requirements: []
|
203
203
|
rubyforge_project:
|
204
|
-
rubygems_version: 2.6.
|
204
|
+
rubygems_version: 2.6.12
|
205
205
|
signing_key:
|
206
206
|
specification_version: 4
|
207
207
|
summary: Specimens Quickly Extracted and Digitized, or just "squid". A ruby gem for
|