shear 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.github/workflows/main.yml +13 -0
- data/.github/workflows/publisher.yml +18 -0
- data/.gitignore +56 -0
- data/.hound.yml +4 -0
- data/.rspec +2 -0
- data/.rubocop.yml +491 -0
- data/.ruby-version +1 -0
- data/CHANGELOG.md +11 -0
- data/Gemfile +4 -0
- data/Gemfile.lock +173 -0
- data/Guardfile +5 -0
- data/LICENSE.txt +21 -0
- data/README.md +45 -0
- data/Rakefile +1 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/lib/shear.rb +13 -0
- data/lib/shear/bounding_box_utils.rb +64 -0
- data/lib/shear/template.rb +187 -0
- data/lib/shear/template_match.rb +36 -0
- data/lib/shear/version.rb +3 -0
- data/lib/shear/word_collection.rb +157 -0
- data/lib/stencils/base_stencil.rb +20 -0
- data/lib/stencils/example_back_stencil.rb +37 -0
- data/lib/stencils/example_front_stencil.rb +44 -0
- data/lib/stencils/stencil_group.rb +40 -0
- data/lib/utils/vision_utils.rb +101 -0
- data/shear.gemspec +37 -0
- metadata +250 -0
data/.ruby-version
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
2.4
|
data/CHANGELOG.md
ADDED
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
@@ -0,0 +1,173 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
shear (0.1.1)
|
5
|
+
activesupport (>= 4.2)
|
6
|
+
faraday (~> 0.17)
|
7
|
+
faraday_middleware (~> 0.14)
|
8
|
+
google-cloud-vision (~> 1.0)
|
9
|
+
patron (~> 0.6)
|
10
|
+
rake (>= 10, < 14)
|
11
|
+
|
12
|
+
GEM
|
13
|
+
remote: https://rubygems.org/
|
14
|
+
specs:
|
15
|
+
activesupport (5.2.4.3)
|
16
|
+
concurrent-ruby (~> 1.0, >= 1.0.2)
|
17
|
+
i18n (>= 0.7, < 2)
|
18
|
+
minitest (~> 5.1)
|
19
|
+
tzinfo (~> 1.1)
|
20
|
+
addressable (2.7.0)
|
21
|
+
public_suffix (>= 2.0.2, < 5.0)
|
22
|
+
coderay (1.1.3)
|
23
|
+
concurrent-ruby (1.1.7)
|
24
|
+
coveralls (0.8.23)
|
25
|
+
json (>= 1.8, < 3)
|
26
|
+
simplecov (~> 0.16.1)
|
27
|
+
term-ansicolor (~> 1.3)
|
28
|
+
thor (>= 0.19.4, < 2.0)
|
29
|
+
tins (~> 1.6)
|
30
|
+
crack (0.4.3)
|
31
|
+
safe_yaml (~> 1.0.0)
|
32
|
+
diff-lcs (1.4.4)
|
33
|
+
docile (1.3.2)
|
34
|
+
faraday (0.17.3)
|
35
|
+
multipart-post (>= 1.2, < 3)
|
36
|
+
faraday_middleware (0.14.0)
|
37
|
+
faraday (>= 0.7.4, < 1.0)
|
38
|
+
ffi (1.13.1)
|
39
|
+
formatador (0.2.5)
|
40
|
+
gapic-common (0.3.4)
|
41
|
+
google-protobuf (~> 3.12, >= 3.12.2)
|
42
|
+
googleapis-common-protos (>= 1.3.9, < 2.0)
|
43
|
+
googleapis-common-protos-types (>= 1.0.4, < 2.0)
|
44
|
+
googleauth (~> 0.9)
|
45
|
+
grpc (~> 1.25)
|
46
|
+
google-cloud-core (1.5.0)
|
47
|
+
google-cloud-env (~> 1.0)
|
48
|
+
google-cloud-errors (~> 1.0)
|
49
|
+
google-cloud-env (1.3.3)
|
50
|
+
faraday (>= 0.17.3, < 2.0)
|
51
|
+
google-cloud-errors (1.0.1)
|
52
|
+
google-cloud-vision (1.0.0)
|
53
|
+
google-cloud-core (~> 1.5)
|
54
|
+
google-cloud-vision-v1 (~> 0.0)
|
55
|
+
google-cloud-vision-v1p3beta1 (~> 0.0)
|
56
|
+
google-cloud-vision-v1 (0.2.5)
|
57
|
+
gapic-common (~> 0.3)
|
58
|
+
google-cloud-errors (~> 1.0)
|
59
|
+
google-cloud-vision-v1p3beta1 (0.2.5)
|
60
|
+
gapic-common (~> 0.3)
|
61
|
+
google-cloud-errors (~> 1.0)
|
62
|
+
google-protobuf (3.12.4)
|
63
|
+
googleapis-common-protos (1.3.10)
|
64
|
+
google-protobuf (~> 3.11)
|
65
|
+
googleapis-common-protos-types (>= 1.0.5, < 2.0)
|
66
|
+
grpc (~> 1.27)
|
67
|
+
googleapis-common-protos-types (1.0.5)
|
68
|
+
google-protobuf (~> 3.11)
|
69
|
+
googleauth (0.13.1)
|
70
|
+
faraday (>= 0.17.3, < 2.0)
|
71
|
+
jwt (>= 1.4, < 3.0)
|
72
|
+
memoist (~> 0.16)
|
73
|
+
multi_json (~> 1.11)
|
74
|
+
os (>= 0.9, < 2.0)
|
75
|
+
signet (~> 0.14)
|
76
|
+
grpc (1.30.2)
|
77
|
+
google-protobuf (~> 3.12)
|
78
|
+
googleapis-common-protos-types (~> 1.0)
|
79
|
+
guard (2.16.2)
|
80
|
+
formatador (>= 0.2.4)
|
81
|
+
listen (>= 2.7, < 4.0)
|
82
|
+
lumberjack (>= 1.0.12, < 2.0)
|
83
|
+
nenv (~> 0.1)
|
84
|
+
notiffany (~> 0.0)
|
85
|
+
pry (>= 0.9.12)
|
86
|
+
shellany (~> 0.0)
|
87
|
+
thor (>= 0.18.1)
|
88
|
+
guard-compat (1.2.1)
|
89
|
+
guard-rspec (4.7.3)
|
90
|
+
guard (~> 2.1)
|
91
|
+
guard-compat (~> 1.1)
|
92
|
+
rspec (>= 2.99.0, < 4.0)
|
93
|
+
hashdiff (1.0.1)
|
94
|
+
i18n (1.8.5)
|
95
|
+
concurrent-ruby (~> 1.0)
|
96
|
+
json (2.3.1)
|
97
|
+
jwt (2.2.1)
|
98
|
+
listen (3.2.1)
|
99
|
+
rb-fsevent (~> 0.10, >= 0.10.3)
|
100
|
+
rb-inotify (~> 0.9, >= 0.9.10)
|
101
|
+
lumberjack (1.2.7)
|
102
|
+
memoist (0.16.2)
|
103
|
+
method_source (1.0.0)
|
104
|
+
minitest (5.14.1)
|
105
|
+
multi_json (1.15.0)
|
106
|
+
multipart-post (2.1.1)
|
107
|
+
nenv (0.3.0)
|
108
|
+
notiffany (0.1.3)
|
109
|
+
nenv (~> 0.1)
|
110
|
+
shellany (~> 0.0)
|
111
|
+
os (1.1.1)
|
112
|
+
patron (0.13.3)
|
113
|
+
pry (0.13.1)
|
114
|
+
coderay (~> 1.1)
|
115
|
+
method_source (~> 1.0)
|
116
|
+
public_suffix (4.0.5)
|
117
|
+
rake (13.0.1)
|
118
|
+
rb-fsevent (0.10.4)
|
119
|
+
rb-inotify (0.10.1)
|
120
|
+
ffi (~> 1.0)
|
121
|
+
rspec (3.9.0)
|
122
|
+
rspec-core (~> 3.9.0)
|
123
|
+
rspec-expectations (~> 3.9.0)
|
124
|
+
rspec-mocks (~> 3.9.0)
|
125
|
+
rspec-core (3.9.2)
|
126
|
+
rspec-support (~> 3.9.3)
|
127
|
+
rspec-expectations (3.9.2)
|
128
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
129
|
+
rspec-support (~> 3.9.0)
|
130
|
+
rspec-mocks (3.9.1)
|
131
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
132
|
+
rspec-support (~> 3.9.0)
|
133
|
+
rspec-support (3.9.3)
|
134
|
+
safe_yaml (1.0.5)
|
135
|
+
shellany (0.0.1)
|
136
|
+
signet (0.14.0)
|
137
|
+
addressable (~> 2.3)
|
138
|
+
faraday (>= 0.17.3, < 2.0)
|
139
|
+
jwt (>= 1.5, < 3.0)
|
140
|
+
multi_json (~> 1.10)
|
141
|
+
simplecov (0.16.1)
|
142
|
+
docile (~> 1.1)
|
143
|
+
json (>= 1.8, < 3)
|
144
|
+
simplecov-html (~> 0.10.0)
|
145
|
+
simplecov-html (0.10.2)
|
146
|
+
sync (0.5.0)
|
147
|
+
term-ansicolor (1.7.1)
|
148
|
+
tins (~> 1.0)
|
149
|
+
thor (1.0.1)
|
150
|
+
thread_safe (0.3.6)
|
151
|
+
tins (1.25.0)
|
152
|
+
sync
|
153
|
+
tzinfo (1.2.7)
|
154
|
+
thread_safe (~> 0.1)
|
155
|
+
webmock (3.8.3)
|
156
|
+
addressable (>= 2.3.6)
|
157
|
+
crack (>= 0.3.2)
|
158
|
+
hashdiff (>= 0.4.0, < 2.0.0)
|
159
|
+
|
160
|
+
PLATFORMS
|
161
|
+
ruby
|
162
|
+
|
163
|
+
DEPENDENCIES
|
164
|
+
bundler (~> 2.1)
|
165
|
+
coveralls (~> 0.8)
|
166
|
+
guard-rspec (~> 4.7)
|
167
|
+
pry (~> 0.13)
|
168
|
+
rspec (~> 3.4)
|
169
|
+
shear!
|
170
|
+
webmock (~> 3.8)
|
171
|
+
|
172
|
+
BUNDLED WITH
|
173
|
+
2.1.4
|
data/Guardfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright 2020 Buda.com SpA
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,45 @@
|
|
1
|
+
# Shear
|
2
|
+
|
3
|
+
A tool used to extract data from a given structured document image
|
4
|
+
|
5
|
+
## Installation
|
6
|
+
|
7
|
+
```bash
|
8
|
+
$ gem install shear
|
9
|
+
```
|
10
|
+
|
11
|
+
Or add to your Gemfile:
|
12
|
+
|
13
|
+
```ruby
|
14
|
+
gem "shear"
|
15
|
+
```
|
16
|
+
|
17
|
+
```bash
|
18
|
+
bundle install
|
19
|
+
```
|
20
|
+
|
21
|
+
## Usage
|
22
|
+
|
23
|
+
You can check the [wiki](https://github.com/budacom/shear/wiki) to learn how to use Shear.
|
24
|
+
|
25
|
+
## Demo
|
26
|
+
|
27
|
+
Check the [official shear-demo](https://shear-demo.herokuapp.com/) for a live example of shear. Also check the demo's code on the [shear-demo repository](https://github.com/budacom/shear-demo).
|
28
|
+
|
29
|
+
## Testing
|
30
|
+
|
31
|
+
1. Fork it
|
32
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
33
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
34
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
35
|
+
5. Create new Pull Request
|
36
|
+
|
37
|
+
## Credits
|
38
|
+
|
39
|
+
This gem was created by [Antonio López](https://github.com/alopez7) and [Buda.com SpA](https://www.buda.com/).
|
40
|
+
|
41
|
+
Shear is maintained by [Buda.com SpA](https://www.buda.com/).
|
42
|
+
|
43
|
+
## License
|
44
|
+
|
45
|
+
Shear is a free software and may be redistributed under the terms specified in the LICENSE file.
|
data/Rakefile
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require "bundler/gem_tasks"
|
data/bin/console
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require "bundler/setup"
|
4
|
+
require "shear"
|
5
|
+
|
6
|
+
# You can add fixtures and/or initialization code here to make experimenting
|
7
|
+
# with your gem easier. You can also use a different console, if you like.
|
8
|
+
|
9
|
+
# (If you use this, don't forget to add pry to your Gemfile!)
|
10
|
+
# require "pry"
|
11
|
+
# Pry.start
|
12
|
+
|
13
|
+
require "irb"
|
14
|
+
IRB.start
|
data/bin/setup
ADDED
data/lib/shear.rb
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
require 'shear/bounding_box_utils'
|
2
|
+
require 'shear/template'
|
3
|
+
require 'shear/template_match'
|
4
|
+
require "shear/version"
|
5
|
+
require 'shear/word_collection'
|
6
|
+
require 'utils/vision_utils'
|
7
|
+
require 'stencils/base_stencil'
|
8
|
+
require 'stencils/example_back_stencil'
|
9
|
+
require 'stencils/example_front_stencil'
|
10
|
+
require 'stencils/stencil_group'
|
11
|
+
|
12
|
+
module Shear
|
13
|
+
end
|
@@ -0,0 +1,64 @@
|
|
1
|
+
module Shear
|
2
|
+
module BoundingBoxUtils
|
3
|
+
extend self
|
4
|
+
|
5
|
+
def vertex_in_aabb?(aabb, vertex)
|
6
|
+
return false if vertex[0] < aabb[:min][0]
|
7
|
+
return false if vertex[0] > aabb[:max][0]
|
8
|
+
return false if vertex[1] < aabb[:min][1]
|
9
|
+
return false if vertex[1] > aabb[:max][1]
|
10
|
+
|
11
|
+
true
|
12
|
+
end
|
13
|
+
|
14
|
+
def edges_touch?(edge1, edge2) # rubocop:disable AbcSize, MethodLength
|
15
|
+
ax = edge1[0][0].to_f
|
16
|
+
ay = edge1[0][1].to_f
|
17
|
+
bx = edge1[1][0].to_f
|
18
|
+
by = edge1[1][1].to_f
|
19
|
+
|
20
|
+
cx = edge2[0][0].to_f
|
21
|
+
cy = edge2[0][1].to_f
|
22
|
+
dx = edge2[1][0].to_f
|
23
|
+
dy = edge2[1][1].to_f
|
24
|
+
|
25
|
+
return false if ((cx - dx) * (ay - by) - (cy - dy) * (ax - bx)).zero?
|
26
|
+
|
27
|
+
alpha_numerator = (by - dy) * (cx - dx) - (bx - dx) * (cy - dy)
|
28
|
+
alpha = alpha_numerator / ((ax - bx) * (cy - dy) - (ay - by) * (cx - dx))
|
29
|
+
beta_numerator = (dy - by) * (ax - bx) - (dx - bx) * (ay - by)
|
30
|
+
beta = beta_numerator / ((cx - dx) * (ay - by) - (cy - dy) * (ax - bx))
|
31
|
+
|
32
|
+
return false if alpha.negative? || alpha > 1
|
33
|
+
return false if beta.negative? || beta > 1
|
34
|
+
|
35
|
+
true
|
36
|
+
end
|
37
|
+
|
38
|
+
def collides?(aabb, box) # rubocop:disable AbcSize
|
39
|
+
# Checks if one point of the box is inside the aabb or if edges touch
|
40
|
+
# Ignores case when aabb is inside box since is highly unprobable
|
41
|
+
|
42
|
+
box.each { |vertex| return true if vertex_in_aabb?(aabb, vertex) }
|
43
|
+
|
44
|
+
box.each_index do |vertex1|
|
45
|
+
vertex2 = (vertex1 + 1) % 4
|
46
|
+
edge1 = [box[vertex1], box[vertex2]]
|
47
|
+
|
48
|
+
edge2 = [aabb[:min], [aabb[:min][0], aabb[:max][1]]]
|
49
|
+
return true if edges_touch?(edge1, edge2)
|
50
|
+
|
51
|
+
edge2 = [aabb[:min], [aabb[:max][0], aabb[:min][1]]]
|
52
|
+
return true if edges_touch?(edge1, edge2)
|
53
|
+
|
54
|
+
edge2 = [aabb[:max], [aabb[:max][0], aabb[:min][1]]]
|
55
|
+
return true if edges_touch?(edge1, edge2)
|
56
|
+
|
57
|
+
edge2 = [aabb[:max], [aabb[:min][0], aabb[:max][1]]]
|
58
|
+
return true if edges_touch?(edge1, edge2)
|
59
|
+
end
|
60
|
+
|
61
|
+
false
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
@@ -0,0 +1,187 @@
|
|
1
|
+
require 'active_support'
|
2
|
+
require 'active_support/core_ext'
|
3
|
+
|
4
|
+
module Shear
|
5
|
+
class Template
|
6
|
+
def self.build(&_block)
|
7
|
+
template = new
|
8
|
+
_block.call(template)
|
9
|
+
template.seal
|
10
|
+
template
|
11
|
+
end
|
12
|
+
|
13
|
+
attr_reader :sealed, :fixtures, :exclusions
|
14
|
+
|
15
|
+
def initialize
|
16
|
+
@sealed = false
|
17
|
+
@fixtures = []
|
18
|
+
@exclusions = {}
|
19
|
+
end
|
20
|
+
|
21
|
+
def set(_word, at: nil, label: nil, filter: nil)
|
22
|
+
raise 'template sealed' if @sealed
|
23
|
+
|
24
|
+
@fixtures << [_word, at, label, filter]
|
25
|
+
end
|
26
|
+
|
27
|
+
def set_exclusion(_field_name, _excluded_word)
|
28
|
+
raise 'template sealed' if @sealed
|
29
|
+
|
30
|
+
@exclusions[_field_name] = Set[] if !@exclusions.include?(_field_name)
|
31
|
+
@exclusions[_field_name] << I18n.transliterate(_excluded_word).upcase
|
32
|
+
end
|
33
|
+
|
34
|
+
def get_exclusions(_field_name)
|
35
|
+
@exclusions.include?(_field_name) ? @exclusions[_field_name] : Set[]
|
36
|
+
end
|
37
|
+
|
38
|
+
def seal
|
39
|
+
@sealed = true
|
40
|
+
end
|
41
|
+
|
42
|
+
def match(_word_collection)
|
43
|
+
filtered_words_collection = filter_words(_word_collection)
|
44
|
+
return nil if should_discard_stencil?(filtered_words_collection.words)
|
45
|
+
|
46
|
+
recursive_match(filtered_words_collection, [], 0)
|
47
|
+
end
|
48
|
+
|
49
|
+
private
|
50
|
+
|
51
|
+
def should_discard_stencil?(_words)
|
52
|
+
return true if should_discard_stencil_by_discard_fixture?(_words)
|
53
|
+
return true if should_discard_stencil_by_unique_fixture?(_words)
|
54
|
+
|
55
|
+
false
|
56
|
+
end
|
57
|
+
|
58
|
+
def should_discard_stencil_by_discard_fixture?(_words)
|
59
|
+
discard_fixtures_tl =
|
60
|
+
@fixtures
|
61
|
+
.select { |f| f[3] == 'discard' }
|
62
|
+
.map { |f| I18n.transliterate(f[0]).upcase }
|
63
|
+
return true if _words.any? { |w| discard_fixtures_tl.include? w[:tl_word] }
|
64
|
+
|
65
|
+
false
|
66
|
+
end
|
67
|
+
|
68
|
+
def should_discard_stencil_by_unique_fixture?(_words)
|
69
|
+
unique_fixtures_tl =
|
70
|
+
@fixtures.select { |f| f[3] == 'unique' }.map { |f| I18n.transliterate(f[0]).upcase }
|
71
|
+
unique_fixture_words =
|
72
|
+
_words.map { |w| w[:tl_word] }.select { |tl_w| unique_fixtures_tl.include? tl_w }
|
73
|
+
return true if unique_fixture_words.uniq.length != unique_fixture_words.length
|
74
|
+
|
75
|
+
false
|
76
|
+
end
|
77
|
+
|
78
|
+
def filter_words(_word_collection)
|
79
|
+
filtered_words = reject_words_with_duplicate_bounding_box(_word_collection.words)
|
80
|
+
filtered_words = select_big_words_with_larger_bounding_box(filtered_words)
|
81
|
+
filtered_words = select_words_with_high_confidence(filtered_words)
|
82
|
+
WordCollection.new(filtered_words)
|
83
|
+
end
|
84
|
+
|
85
|
+
def reject_words_with_duplicate_bounding_box(_words)
|
86
|
+
_words.uniq { |w| [w[:original_bounding_box], w[:tl_word]] }
|
87
|
+
end
|
88
|
+
|
89
|
+
def select_big_words_with_larger_bounding_box(_words)
|
90
|
+
big_fixtures_tl =
|
91
|
+
@fixtures.select { |f| f[3] == 'big' }.map { |f| I18n.transliterate(f[0]).upcase }
|
92
|
+
big_fixture_words = _words.select { |w| big_fixtures_tl.include? w[:tl_word] }
|
93
|
+
non_big_fixture_words = _words - big_fixture_words
|
94
|
+
filtered_big_words =
|
95
|
+
big_fixture_words
|
96
|
+
.sort_by { |w| distance(w[:original_bounding_box][0], w[:original_bounding_box][1]) }
|
97
|
+
.reverse
|
98
|
+
.uniq { |w| w[:tl_word] }
|
99
|
+
non_big_fixture_words + filtered_big_words
|
100
|
+
end
|
101
|
+
|
102
|
+
def select_words_with_high_confidence(_words)
|
103
|
+
confidence_fixtures_tl =
|
104
|
+
@fixtures.select { |f| f[3] == 'confidence' }.map { |f| I18n.transliterate(f[0]).upcase }
|
105
|
+
confidence_fixture_words = _words.select { |w| confidence_fixtures_tl.include? w[:tl_word] }
|
106
|
+
non_confidence_fixture_words = _words - confidence_fixture_words
|
107
|
+
filtered_confidence_words =
|
108
|
+
confidence_fixture_words
|
109
|
+
.sort_by { |w| w[:conf] }
|
110
|
+
.reverse
|
111
|
+
.uniq { |w| w[:tl_word] }
|
112
|
+
non_confidence_fixture_words + filtered_confidence_words
|
113
|
+
end
|
114
|
+
|
115
|
+
def recursive_match(_words, _result, _fixture_index) # rubocop:disable all
|
116
|
+
word_indexes = []
|
117
|
+
while word_indexes.empty?
|
118
|
+
return calculate_match(_words, _result) if _fixture_index == @fixtures.length
|
119
|
+
|
120
|
+
word, _, _, word_filter = @fixtures[_fixture_index]
|
121
|
+
word_indexes = _words.search(word)
|
122
|
+
_fixture_index += 1 if word_indexes.empty? || word_filter == 'discard'
|
123
|
+
end
|
124
|
+
return calculate_match(_words, _result) if _fixture_index == @fixtures.length
|
125
|
+
|
126
|
+
word_indexes.inject(nil) do |best_match, word_index|
|
127
|
+
new_result = _result + [[word_index, _fixture_index]]
|
128
|
+
match = recursive_match(_words, new_result, _fixture_index + 1)
|
129
|
+
next match if best_match.nil?
|
130
|
+
next best_match if match.nil?
|
131
|
+
|
132
|
+
match.error < best_match.error ? match : best_match
|
133
|
+
end
|
134
|
+
end
|
135
|
+
|
136
|
+
def calculate_match(_words, _result) # rubocop:disable AbcSize, MethodLength
|
137
|
+
# select 3 points and calculate transformation matrix (solve T for W * T = F)
|
138
|
+
|
139
|
+
return nil if _result.length <= 3
|
140
|
+
|
141
|
+
word_loc1 = _words.original_location(_result.first[0]) + [1]
|
142
|
+
word_loc2 = _words.original_location(_result.second[0]) + [1]
|
143
|
+
word_loc3 = _words.original_location(_result.third[0]) + [1]
|
144
|
+
|
145
|
+
matrix_w = Matrix[word_loc1, word_loc2, word_loc3]
|
146
|
+
matrix_f = Matrix[
|
147
|
+
@fixtures[_result.first[1]][1] + [1],
|
148
|
+
@fixtures[_result.second[1]][1] + [1],
|
149
|
+
@fixtures[_result.third[1]][1] + [1]
|
150
|
+
]
|
151
|
+
|
152
|
+
raise "Found locations are collinear" if matrix_w.singular?
|
153
|
+
|
154
|
+
transform = (matrix_w.inverse * matrix_f).transpose
|
155
|
+
|
156
|
+
# transform collection
|
157
|
+
|
158
|
+
norm_words = _words.clone.transform! transform
|
159
|
+
|
160
|
+
# calculate error mean(distance_from_fixture ^ 2)
|
161
|
+
|
162
|
+
errors = _result[3..-1].each.map do |index_pair|
|
163
|
+
distance(norm_words.location(index_pair[0]), @fixtures[index_pair[1]][1])
|
164
|
+
end
|
165
|
+
|
166
|
+
TemplateMatch.new(
|
167
|
+
load_labeled_points(_result, norm_words), norm_words, mean_sq_error(errors), transform
|
168
|
+
)
|
169
|
+
end
|
170
|
+
|
171
|
+
def load_labeled_points(_result, _words)
|
172
|
+
Hash[_result.each.map do |index_pair|
|
173
|
+
fixture_label = @fixtures[index_pair[1]][2]
|
174
|
+
word_location = _words.location(index_pair[0])
|
175
|
+
[fixture_label, word_location] if fixture_label
|
176
|
+
end.reject(&:nil?)]
|
177
|
+
end
|
178
|
+
|
179
|
+
def distance(_pt1, _pt2)
|
180
|
+
Math.sqrt((_pt2[0].to_d - _pt1[0])**2 + (_pt2[1].to_d - _pt1[1])**2)
|
181
|
+
end
|
182
|
+
|
183
|
+
def mean_sq_error(_errors)
|
184
|
+
_errors.sum { |e| e * e } / _errors.count
|
185
|
+
end
|
186
|
+
end
|
187
|
+
end
|