kvg_character_recognition 0.1.3 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: d60d7f44902345773b5fff377783a490a41c7f06
4
- data.tar.gz: 6b8e90de99d64c80ce576f969114cd02432fea58
3
+ metadata.gz: 3561d546950211fe53ebcb63610de390341134c8
4
+ data.tar.gz: 0e96434c9cb5e5d724bec72b4c0046d055b6c7e5
5
5
  SHA512:
6
- metadata.gz: 0ae3dda7c2114311a4613ed39b6e48e2e13a4b51619fb791c9476a0070a0dd49e60468fd63d7a7cb660ea52ea803e05cef48fbdd61f0c7a23c5c74141a54fc47
7
- data.tar.gz: 6c43c6da5be2c25b6cf75ed364de496f8f87fb6f9bc0d8e2aaa0161e0e0ac159d2bdcf8805a36ff77f0ec52eab85fee697a093635a530dbfd779e7163f00f029
6
+ metadata.gz: 3129719a4702919436175246fa579ac3f28638fc49fc897941f5dfbba8f13cb820904058e194427791898411af5d46e4bf875a9acedebcc81478fac7110a7227
7
+ data.tar.gz: dff06615f6bedfac5bd6521572a5f367462f1fed073c6b4809a818b13f3b1ffb36a58361668b132d3e432861b6c0c56532852ce079eb27c3ccc709305ace8143
@@ -26,7 +26,7 @@ Gem::Specification.new do |spec|
26
26
  raise "RubyGems 2.0 or newer is required to protect against public gem pushes."
27
27
  end
28
28
 
29
- spec.files = `git ls-files -z`.split("\x0").reject { |f| f == 'kvg_character_recognition-0.1.2.gem' || f.match(%r{^(test|spec|features)/}) }
29
+ spec.files = `git ls-files -z`.split("\x0").reject { |f| f == 'kvg_character_recognition-0.1.3.gem' || f.match(%r{^(test|spec|features)/}) }
30
30
  spec.bindir = "exe"
31
31
  spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
32
32
  spec.require_paths = ["lib"]
@@ -1,35 +1,22 @@
1
- require 'bundler'
2
- Bundler.require
1
+ require 'nokogiri'
3
2
  require 'yaml'
3
+ require 'json'
4
+ require 'matrix'
4
5
  #require all files in ./lib/
5
- Dir[File.join(File.dirname(__FILE__), '/kvg_character_recognition/*.rb')].each {|file| require file }
6
+ require File.join(File.dirname(__FILE__), '/kvg_character_recognition/utils.rb')
7
+ require File.join(File.dirname(__FILE__), '/kvg_character_recognition/normalization.rb')
8
+ require File.join(File.dirname(__FILE__), '/kvg_character_recognition/preprocessor.rb')
9
+ require File.join(File.dirname(__FILE__), '/kvg_character_recognition/non_structural_feature.rb')
10
+ require File.join(File.dirname(__FILE__), '/kvg_character_recognition/heatmap_feature.rb')
11
+ require File.join(File.dirname(__FILE__), '/kvg_character_recognition/kvg_parser.rb')
12
+ require File.join(File.dirname(__FILE__), '/kvg_character_recognition/datastore.rb')
13
+ require File.join(File.dirname(__FILE__), '/kvg_character_recognition/trainer.rb')
14
+ require File.join(File.dirname(__FILE__), '/kvg_character_recognition/template.rb')
15
+ require File.join(File.dirname(__FILE__), '/kvg_character_recognition/recognizer.rb')
6
16
 
7
17
  module KvgCharacterRecognition
8
-
9
- CONFIG = {
10
- size: 109, #fixed canvas size of kanjivg data
11
- downsample_interval: 4,
12
- interpolate_distance: 0.8,
13
- heatmap_coarse_grid: 17,
14
- heatmap_granular_grid: 17,
15
- }
16
- VALID_KEYS = CONFIG.keys
17
-
18
- #Configure through hash
19
- def self.configure(opts = {})
20
- opts.each {|k,v| CONFIG[k.to_sym] = v if VALID_KEYS.include? k.to_sym}
21
- end
22
-
23
- #Configure with yaml
24
- def self.configure_with(yml)
25
- begin
26
- config = YAML::load(IO.read(yml))
27
- rescue Errno::ENOENT
28
- log(:warning, "YAML configuration file couldn't be found. Using defaults."); return
29
- rescue Psych::SyntaxError
30
- log(:warning, "YAML configuration file contains invalid syntax. Using defaults."); return
31
- end
32
-
33
- configure(config)
18
+ def self.init_datastore filename="characters.json", xml="kanjivg-20150615-2.xml"
19
+ datastore = JSONDatastore.new(filename)
20
+ Template.parse_from_xml xml, datastore
34
21
  end
35
22
  end
@@ -1,5 +1,3 @@
1
- require 'json'
2
-
3
1
  module KvgCharacterRecognition
4
2
  class JSONDatastore
5
3
  def initialize filename = 'characters.json'
@@ -0,0 +1,50 @@
1
+ module KvgCharacterRecognition
2
+ class HeatmapFeature
3
+ include NonStructuralFeature
4
+ attr_accessor :size, :weights, :number_of_grids, :heatmaps
5
+ def initialize bi_normed, ld_normed, pd_normed, size, number_of_grids, weights=[1/9.0, 1/9.0, 1/9.0, 1/9.0, 1/9.0, 1/9.0, 1/9.0, 1/9.0, 1/9.0]
6
+ @size = size
7
+ @number_of_grids = number_of_grids
8
+ @number_of_points = bi_normed.flatten(1).count
9
+ @weights = weights
10
+ @heatmaps = smooth(generate_heatmaps(bi_normed, ld_normed, pd_normed))
11
+ end
12
+
13
+ def generate_heatmaps bi_normed, ld_normed, pd_normed
14
+
15
+ grid_size = size / @number_of_grids.to_f
16
+
17
+ map = Map.new @number_of_grids, @number_of_grids, [0, 0, 0]
18
+
19
+ #fill the heatmap
20
+ bi_normed.each do |stroke|
21
+ stroke.each do |point|
22
+ grid1 = [(point[0] / grid_size).floor, (point[1] / grid_size).floor]
23
+
24
+ map[grid1[0], grid1[1]] = [map[grid1[0], grid1[1]][0] + (1 / @number_of_points.to_f).round(4),
25
+ map[grid1[0], grid1[1]][1],
26
+ map[grid1[0], grid1[1]][2]] if grid1[0] < @number_of_grids && grid1[1] < @number_of_grids
27
+ end
28
+ end
29
+ ld_normed.each do |stroke|
30
+ stroke.each do |point|
31
+ grid2 = [(point[0] / grid_size).floor, (point[1] / grid_size).floor]
32
+
33
+ map[grid2[0], grid2[1]] = [map[grid2[0], grid2[1]][0],
34
+ map[grid2[0], grid2[1]][1] + (1 / @number_of_points.to_f).round(4),
35
+ map[grid2[0], grid2[1]][2]] if grid2[0] < @number_of_grids && grid2[1] < @number_of_grids
36
+ end
37
+ end
38
+ pd_normed.each do |stroke|
39
+ stroke.each do |point|
40
+ grid4 = [(point[0] / grid_size).floor, (point[1] / grid_size).floor]
41
+
42
+ map[grid4[0], grid4[1]] = [map[grid4[0], grid4[1]][0],
43
+ map[grid4[0], grid4[1]][1],
44
+ map[grid4[0], grid4[1]][2] + (1 / @number_of_points.to_f).round(4)] if grid4[0] < @number_of_grids && grid4[1] < @number_of_grids
45
+ end
46
+ end
47
+ map
48
+ end
49
+ end
50
+ end
@@ -0,0 +1,272 @@
1
+ module KvgCharacterRecognition
2
+ #This module contains classes which can be used to parse a svg command
3
+ #The code is copied from https://github.com/rogerbraun/KVG-Tools
4
+ #Methods for generating sexp or xml outputs are removed
5
+ module KvgParser
6
+ #A Point
7
+ class Point
8
+ attr_accessor :x, :y, :color
9
+
10
+ def initialize(x,y, color = :black)
11
+ @x,@y, @color = x, y, color
12
+ end
13
+
14
+ #Basic point arithmetics
15
+ def +(p2)
16
+ return Point.new(@x + p2.x, @y + p2.y)
17
+ end
18
+
19
+ def -(p2)
20
+ return Point.new(@x - p2.x, @y - p2.y)
21
+ end
22
+
23
+ def dist(p2)
24
+ return Math.sqrt((p2.x - @x)**2 + (p2.y - @y)**2)
25
+ end
26
+
27
+ def *(number)
28
+ return Point.new(@x * number, @y * number)
29
+ end
30
+
31
+ #to array
32
+ def to_a
33
+ [@x.round(2), @y.round(2)]
34
+ end
35
+
36
+ end
37
+
38
+ # SVG_M represents the moveto command.
39
+ # SVG Syntax is:
40
+ # m x y
41
+ # It sets the current cursor to the point (x,y).
42
+ # As always, capitalization denotes absolute values.
43
+ # Takes a Point as argument.
44
+ # If given 2 Points, the second argument is treated as the current cursor.
45
+ class SVG_M
46
+
47
+ def initialize(p1, p2 = Point.new(0,0))
48
+ @p = p1 + p2
49
+ end
50
+
51
+ def to_points
52
+ return []
53
+ end
54
+
55
+ def current_cursor
56
+ return @p
57
+ end
58
+
59
+ end
60
+
61
+ # SVG_C represents the cubic Bézier curveto command.
62
+ # Syntax is:
63
+ # c x1 y1 x2 y2 x y
64
+ # It sets the current cursor to the point (x,y).
65
+ # As always, capitalization denotes absolute values.
66
+ # Takes 4 Points as argument, the fourth being the current cursor
67
+ # If constructed using SVG_C.relative, the current cursor is added to every
68
+ # point.
69
+ class SVG_C
70
+
71
+ def initialize(c1,c2,p,current_cursor)
72
+ @c1,@c2,@p,@current_cursor = c1,c2,p,current_cursor
73
+ @@c_color = :green
74
+ end
75
+
76
+ def SVG_C.relative(c1,c2,p,current_cursor)
77
+ SVG_C.new(c1 + current_cursor, c2 + current_cursor, p + current_cursor, current_cursor)
78
+ end
79
+
80
+ def second_point
81
+ @c2
82
+ end
83
+
84
+ # This implements the algorithm found here:
85
+ # http://www.cubic.org/docs/bezier.htm
86
+ # Takes 2 Points and a factor between 0 and 1
87
+ def linear_interpolation(a,b,factor)
88
+
89
+ xr = a.x + ((b.x - a.x) * factor)
90
+ yr = a.y + ((b.y - a.y) * factor)
91
+
92
+ return Point.new(xr,yr);
93
+
94
+ end
95
+
96
+ def switch_color
97
+ if @@c_color == :green
98
+ @@c_color = :red
99
+ elsif @@c_color == :red
100
+ @@c_color = :purple
101
+ else
102
+ @@c_color = :green
103
+ end
104
+ end
105
+
106
+ def make_curvepoint(factor)
107
+ ab = linear_interpolation(@current_cursor,@c1,factor)
108
+ bc = linear_interpolation(@c1,@c2,factor)
109
+ cd = linear_interpolation(@c2,@p,factor)
110
+
111
+ abbc = linear_interpolation(ab,bc,factor)
112
+ bccd = linear_interpolation(bc,cd,factor)
113
+ return linear_interpolation(abbc,bccd,factor)
114
+ end
115
+
116
+ def length(points)
117
+ old_point = @current_cursor;
118
+ length = 0.0
119
+ factor = points.to_f
120
+
121
+ (1..points).each {|point|
122
+ new_point = make_curvepoint(point/(factor.to_f))
123
+ length += old_point.dist(new_point)
124
+ old_point = new_point
125
+ }
126
+ return length
127
+ end
128
+
129
+ # This gives back an array of points on the curve. The argument given
130
+ # denotes how the distance between each point.
131
+ def make_curvepoint_array(distance)
132
+ result = Array.new
133
+
134
+ l = length(20)
135
+ points = l * distance
136
+ factor = points.to_f
137
+
138
+ (0..points).each {|point|
139
+ result.push(make_curvepoint(point/(factor.to_f)))
140
+ }
141
+
142
+ return result
143
+ end
144
+
145
+
146
+ def to_points
147
+ return make_curvepoint_array(0.3)
148
+ end
149
+
150
+ def current_cursor
151
+ @p
152
+ end
153
+
154
+ end
155
+
156
+ # SVG_S represents the smooth curveto command.
157
+ # Syntax is:
158
+ # s x2 y2 x y
159
+ # It sets the current cursor to the point (x,y).
160
+ # As always, capitalization denotes absolute values.
161
+ # Takes 3 Points as argument, the third being the current cursor
162
+ # If constructed using SVG_S.relative, the current cursor is added to every
163
+ # point.
164
+ class SVG_S < SVG_C
165
+
166
+ def initialize(c2, p, current_cursor,previous_point)
167
+ super(SVG_S.reflect(previous_point,current_cursor), c2, p, current_cursor)
168
+ end
169
+
170
+ # The reflection in this case is rather tricky. Using SVG_C.relative, the
171
+ # offset of current_cursor is added to all the positions (except current_cursor).
172
+ # The reflected point, however is already calculated in absolute values.
173
+ # Because of this, we have to subtract the current_cursor from the reflected
174
+ # point, as it is already added later. I think I got the classes somewhat wrong.
175
+ # Maybe points should get a field whether they are absolute oder relative?
176
+ # Don't know yet. It works now, though!
177
+ def SVG_S.relative(c2, p, current_cursor, previous_point)
178
+ SVG_C.relative(SVG_S.reflect(previous_point,current_cursor) - current_cursor, c2, p, current_cursor)
179
+ end
180
+
181
+ def SVG_S.reflect(p, mirror)
182
+ return mirror + (mirror - p)
183
+ end
184
+
185
+ end
186
+
187
+
188
+ # Stroke represent one stroke, which is a series of SVG commands.
189
+ class Stroke
190
+ COMMANDS = ["M", "C", "c", "s", "S"]
191
+
192
+ def initialize(stroke_as_code)
193
+ @command_list = parse(stroke_as_code)
194
+ end
195
+
196
+ def to_points
197
+ return @command_list.map{|element| element.to_points}.flatten
198
+ end
199
+
200
+ #to array
201
+ #TODO: better implementation using composite pattern
202
+ def to_a
203
+ to_points.map{|point| point.to_a}
204
+ end
205
+
206
+ def split_elements(line)
207
+ # This is magic.
208
+ return line.gsub("-",",-").gsub("s",",s,").gsub("S",",S,").gsub("c",",c,").gsub("C",",C,").gsub("m", "M").gsub("M","M,").gsub("[","").gsub(";",",;,").gsub(",,",",").gsub(" ,", ",").gsub(", ", ",").gsub(" ", ",").split(/,/);
209
+ end
210
+
211
+ def parse(stroke_as_code)
212
+ elements = split_elements(stroke_as_code).delete_if{ |e| e == "" }
213
+ command_list = Array.new
214
+ current_cursor = Point.new(0,0);
215
+
216
+ while elements != [] do
217
+
218
+ case elements.slice!(0)
219
+ when "M"
220
+ x,y = elements.slice!(0..1)
221
+ m = SVG_M.new(Point.new(x.to_f,y.to_f))
222
+ current_cursor = m.current_cursor
223
+ command_list.push(m)
224
+
225
+ when "C"
226
+ x1,y1,x2,y2,x,y = elements.slice!(0..5)
227
+ c = SVG_C.new(Point.new(x1.to_f,y1.to_f), Point.new(x2.to_f,y2.to_f), Point.new(x.to_f,y.to_f), current_cursor)
228
+ current_cursor = c.current_cursor
229
+ command_list.push(c)
230
+
231
+ #handle polybezier
232
+ unless elements.empty? || COMMANDS.include?(elements.first)
233
+ elements.unshift("C")
234
+ end
235
+ when "c"
236
+ x1,y1,x2,y2,x,y = elements.slice!(0..5)
237
+ c = SVG_C.relative(Point.new(x1.to_f,y1.to_f), Point.new(x2.to_f,y2.to_f), Point.new(x.to_f,y.to_f), current_cursor)
238
+ current_cursor = c.current_cursor
239
+ command_list.push(c)
240
+
241
+ #handle polybezier
242
+ unless elements.empty? || COMMANDS.include?(elements.first)
243
+ elements.unshift("c")
244
+ end
245
+
246
+ when "s"
247
+ x2,y2,x,y = elements.slice!(0..3)
248
+ reflected_point = command_list[-1].second_point
249
+ s = SVG_S.relative(Point.new(x2.to_f,y2.to_f), Point.new(x.to_f,y.to_f), current_cursor, reflected_point)
250
+ current_cursor = s.current_cursor
251
+ command_list.push(s)
252
+
253
+ when "S"
254
+ x2,y2,x,y = elements.slice!(0..3)
255
+ reflected_point = command_list[-1].second_point
256
+ s = SVG_S.new(Point.new(x2.to_f,y2.to_f), Point.new(x.to_f,y.to_f), current_cursor,reflected_point)
257
+ current_cursor = s.current_cursor
258
+ command_list.push(s)
259
+
260
+ else
261
+ #print "You should not be here\n"
262
+
263
+ end
264
+
265
+ end
266
+
267
+ return command_list
268
+ end
269
+
270
+ end
271
+ end
272
+ end
@@ -0,0 +1,72 @@
1
+ module NonStructuralFeature
2
+ #This class can be used for storing heatmap count and directional feature densities
3
+ #basically it is a nxm matrix with an initial value in each cell
4
+ class Map
5
+ attr_accessor :initial_value
6
+ #Make a new map with
7
+ #Params:
8
+ #+n+:: row length
9
+ #+m+:: column length
10
+ #+initial_value+:: for heatmap initial_value = 0 and for directional feature densities initial_value = [0, 0, 0, 0] <= [weight in e1, weight in e2, ...]
11
+ def initialize n, m, initial_value
12
+ @array = Array.new(n * m, initial_value)
13
+ @n = n
14
+ @m = m
15
+ @initial_value = initial_value
16
+ end
17
+
18
+ #Access value in the cell of i-th row and j-th column
19
+ #e.g. map[i,j]
20
+ def [](i, j)
21
+ @array[j*@n + i]
22
+ end
23
+
24
+ #Store value in the cell of i-th row and j-th column
25
+ #e.g. map[i,j] = value
26
+ def []=(i, j, value)
27
+ @array[j*@n + i] = value
28
+ end
29
+
30
+ def to_a
31
+ @array
32
+ end
33
+
34
+ #Normaly n is the same as m
35
+ def size
36
+ @n
37
+ end
38
+ end
39
+
40
+ def smooth map
41
+ new_map = Map.new(@number_of_grids, @number_of_grids, map.initial_value)
42
+
43
+ (0..(@number_of_grids - 1)).each do |i|
44
+ (0..(@number_of_grids - 1)).each do |j|
45
+ #weights alternative
46
+ # = [1/16, 2/16, 1/16];
47
+ # [2/16, 4/16, 2/16];
48
+ # [1/16, 2/16, 1/16]
49
+ #
50
+ #weights = [1/9, 1/9, 1/9];
51
+ # [1/9, 1/9, 1/9];
52
+ # [1/9, 1/9, 1/9]
53
+ #
54
+ w11 = (0..(@number_of_grids-1)).cover?(i+1) && (0..(@number_of_grids-1)).cover?(j-1)? map[i+1,j-1].map{|e| e * @weights[0]} : [0, 0, 0]
55
+ w12 = (0..(@number_of_grids-1)).cover?(i+1) && (0..(@number_of_grids-1)).cover?(j)? map[i+1,j].map{|e| e * @weights[1]} : [0, 0, 0]
56
+ w13 = (0..(@number_of_grids-1)).cover?(i+1) && (0..(@number_of_grids-1)).cover?(j+1)? map[i+1,j+1].map{|e| e * @weights[2]} : [0, 0, 0]
57
+ w21 = (0..(@number_of_grids-1)).cover?(i) && (0..(@number_of_grids-1)).cover?(j-1)? map[i,j-1].map{|e| e * @weights[3]} : [0, 0, 0]
58
+ w22 = (0..(@number_of_grids-1)).cover?(i) && (0..(@number_of_grids-1)).cover?(j)? map[i,j].map{|e| e * @weights[4]} : [0, 0, 0]
59
+ w23 = (0..(@number_of_grids-1)).cover?(i) && (0..(@number_of_grids-1)).cover?(j+1)? map[i,j+1].map{|e| e * @weights[5]} : [0, 0, 0]
60
+ w31 = (0..(@number_of_grids-1)).cover?(i-1) && (0..(@number_of_grids-1)).cover?(j-1)? map[i-1,j-1].map{|e| e * @weights[6]} : [0, 0, 0]
61
+ w32 = (0..(@number_of_grids-1)).cover?(i-1) && (0..(@number_of_grids-1)).cover?(j)? map[i-1,j].map{|e| e * @weights[7]} : [0, 0, 0]
62
+ w33 = (0..(@number_of_grids-1)).cover?(i-1) && (0..(@number_of_grids-1)).cover?(j+1)? map[i-1,j+1].map{|e| e * @weights[8]} : [0, 0, 0]
63
+
64
+ new_map[i,j] = [(w11[0] + w12[0] + w13[0] + w21[0] + w22[0] + w23[0] + w31[0] + w32[0] + w33[0]).round(3),
65
+ (w11[1] + w12[1] + w13[1] + w21[1] + w22[1] + w23[1] + w31[1] + w32[1] + w33[1]).round(3),
66
+ (w11[2] + w12[2] + w13[2] + w21[2] + w22[2] + w23[2] + w31[2] + w32[2] + w33[2]).round(3)]
67
+ end
68
+ end
69
+
70
+ new_map
71
+ end
72
+ end