kvg_character_recognition 0.1.3 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: d60d7f44902345773b5fff377783a490a41c7f06
4
- data.tar.gz: 6b8e90de99d64c80ce576f969114cd02432fea58
3
+ metadata.gz: 3561d546950211fe53ebcb63610de390341134c8
4
+ data.tar.gz: 0e96434c9cb5e5d724bec72b4c0046d055b6c7e5
5
5
  SHA512:
6
- metadata.gz: 0ae3dda7c2114311a4613ed39b6e48e2e13a4b51619fb791c9476a0070a0dd49e60468fd63d7a7cb660ea52ea803e05cef48fbdd61f0c7a23c5c74141a54fc47
7
- data.tar.gz: 6c43c6da5be2c25b6cf75ed364de496f8f87fb6f9bc0d8e2aaa0161e0e0ac159d2bdcf8805a36ff77f0ec52eab85fee697a093635a530dbfd779e7163f00f029
6
+ metadata.gz: 3129719a4702919436175246fa579ac3f28638fc49fc897941f5dfbba8f13cb820904058e194427791898411af5d46e4bf875a9acedebcc81478fac7110a7227
7
+ data.tar.gz: dff06615f6bedfac5bd6521572a5f367462f1fed073c6b4809a818b13f3b1ffb36a58361668b132d3e432861b6c0c56532852ce079eb27c3ccc709305ace8143
@@ -26,7 +26,7 @@ Gem::Specification.new do |spec|
26
26
  raise "RubyGems 2.0 or newer is required to protect against public gem pushes."
27
27
  end
28
28
 
29
- spec.files = `git ls-files -z`.split("\x0").reject { |f| f == 'kvg_character_recognition-0.1.2.gem' || f.match(%r{^(test|spec|features)/}) }
29
+ spec.files = `git ls-files -z`.split("\x0").reject { |f| f == 'kvg_character_recognition-0.1.3.gem' || f.match(%r{^(test|spec|features)/}) }
30
30
  spec.bindir = "exe"
31
31
  spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
32
32
  spec.require_paths = ["lib"]
@@ -1,35 +1,22 @@
1
- require 'bundler'
2
- Bundler.require
1
+ require 'nokogiri'
3
2
  require 'yaml'
3
+ require 'json'
4
+ require 'matrix'
4
5
  #require all files in ./lib/
5
- Dir[File.join(File.dirname(__FILE__), '/kvg_character_recognition/*.rb')].each {|file| require file }
6
+ require File.join(File.dirname(__FILE__), '/kvg_character_recognition/utils.rb')
7
+ require File.join(File.dirname(__FILE__), '/kvg_character_recognition/normalization.rb')
8
+ require File.join(File.dirname(__FILE__), '/kvg_character_recognition/preprocessor.rb')
9
+ require File.join(File.dirname(__FILE__), '/kvg_character_recognition/non_structural_feature.rb')
10
+ require File.join(File.dirname(__FILE__), '/kvg_character_recognition/heatmap_feature.rb')
11
+ require File.join(File.dirname(__FILE__), '/kvg_character_recognition/kvg_parser.rb')
12
+ require File.join(File.dirname(__FILE__), '/kvg_character_recognition/datastore.rb')
13
+ require File.join(File.dirname(__FILE__), '/kvg_character_recognition/trainer.rb')
14
+ require File.join(File.dirname(__FILE__), '/kvg_character_recognition/template.rb')
15
+ require File.join(File.dirname(__FILE__), '/kvg_character_recognition/recognizer.rb')
6
16
 
7
17
  module KvgCharacterRecognition
8
-
9
- CONFIG = {
10
- size: 109, #fixed canvas size of kanjivg data
11
- downsample_interval: 4,
12
- interpolate_distance: 0.8,
13
- heatmap_coarse_grid: 17,
14
- heatmap_granular_grid: 17,
15
- }
16
- VALID_KEYS = CONFIG.keys
17
-
18
- #Configure through hash
19
- def self.configure(opts = {})
20
- opts.each {|k,v| CONFIG[k.to_sym] = v if VALID_KEYS.include? k.to_sym}
21
- end
22
-
23
- #Configure with yaml
24
- def self.configure_with(yml)
25
- begin
26
- config = YAML::load(IO.read(yml))
27
- rescue Errno::ENOENT
28
- log(:warning, "YAML configuration file couldn't be found. Using defaults."); return
29
- rescue Psych::SyntaxError
30
- log(:warning, "YAML configuration file contains invalid syntax. Using defaults."); return
31
- end
32
-
33
- configure(config)
18
+ def self.init_datastore filename="characters.json", xml="kanjivg-20150615-2.xml"
19
+ datastore = JSONDatastore.new(filename)
20
+ Template.parse_from_xml xml, datastore
34
21
  end
35
22
  end
@@ -1,5 +1,3 @@
1
- require 'json'
2
-
3
1
  module KvgCharacterRecognition
4
2
  class JSONDatastore
5
3
  def initialize filename = 'characters.json'
@@ -0,0 +1,50 @@
1
+ module KvgCharacterRecognition
2
+ class HeatmapFeature
3
+ include NonStructuralFeature
4
+ attr_accessor :size, :weights, :number_of_grids, :heatmaps
5
+ def initialize bi_normed, ld_normed, pd_normed, size, number_of_grids, weights=[1/9.0, 1/9.0, 1/9.0, 1/9.0, 1/9.0, 1/9.0, 1/9.0, 1/9.0, 1/9.0]
6
+ @size = size
7
+ @number_of_grids = number_of_grids
8
+ @number_of_points = bi_normed.flatten(1).count
9
+ @weights = weights
10
+ @heatmaps = smooth(generate_heatmaps(bi_normed, ld_normed, pd_normed))
11
+ end
12
+
13
+ def generate_heatmaps bi_normed, ld_normed, pd_normed
14
+
15
+ grid_size = size / @number_of_grids.to_f
16
+
17
+ map = Map.new @number_of_grids, @number_of_grids, [0, 0, 0]
18
+
19
+ #fill the heatmap
20
+ bi_normed.each do |stroke|
21
+ stroke.each do |point|
22
+ grid1 = [(point[0] / grid_size).floor, (point[1] / grid_size).floor]
23
+
24
+ map[grid1[0], grid1[1]] = [map[grid1[0], grid1[1]][0] + (1 / @number_of_points.to_f).round(4),
25
+ map[grid1[0], grid1[1]][1],
26
+ map[grid1[0], grid1[1]][2]] if grid1[0] < @number_of_grids && grid1[1] < @number_of_grids
27
+ end
28
+ end
29
+ ld_normed.each do |stroke|
30
+ stroke.each do |point|
31
+ grid2 = [(point[0] / grid_size).floor, (point[1] / grid_size).floor]
32
+
33
+ map[grid2[0], grid2[1]] = [map[grid2[0], grid2[1]][0],
34
+ map[grid2[0], grid2[1]][1] + (1 / @number_of_points.to_f).round(4),
35
+ map[grid2[0], grid2[1]][2]] if grid2[0] < @number_of_grids && grid2[1] < @number_of_grids
36
+ end
37
+ end
38
+ pd_normed.each do |stroke|
39
+ stroke.each do |point|
40
+ grid4 = [(point[0] / grid_size).floor, (point[1] / grid_size).floor]
41
+
42
+ map[grid4[0], grid4[1]] = [map[grid4[0], grid4[1]][0],
43
+ map[grid4[0], grid4[1]][1],
44
+ map[grid4[0], grid4[1]][2] + (1 / @number_of_points.to_f).round(4)] if grid4[0] < @number_of_grids && grid4[1] < @number_of_grids
45
+ end
46
+ end
47
+ map
48
+ end
49
+ end
50
+ end
@@ -0,0 +1,272 @@
1
+ module KvgCharacterRecognition
2
+ #This module contains classes which can be used to parse a svg command
3
+ #The code is copied from https://github.com/rogerbraun/KVG-Tools
4
+ #Methods for generating sexp or xml outputs are removed
5
+ module KvgParser
6
+ #A Point
7
+ class Point
8
+ attr_accessor :x, :y, :color
9
+
10
+ def initialize(x,y, color = :black)
11
+ @x,@y, @color = x, y, color
12
+ end
13
+
14
+ #Basic point arithmetics
15
+ def +(p2)
16
+ return Point.new(@x + p2.x, @y + p2.y)
17
+ end
18
+
19
+ def -(p2)
20
+ return Point.new(@x - p2.x, @y - p2.y)
21
+ end
22
+
23
+ def dist(p2)
24
+ return Math.sqrt((p2.x - @x)**2 + (p2.y - @y)**2)
25
+ end
26
+
27
+ def *(number)
28
+ return Point.new(@x * number, @y * number)
29
+ end
30
+
31
+ #to array
32
+ def to_a
33
+ [@x.round(2), @y.round(2)]
34
+ end
35
+
36
+ end
37
+
38
+ # SVG_M represents the moveto command.
39
+ # SVG Syntax is:
40
+ # m x y
41
+ # It sets the current cursor to the point (x,y).
42
+ # As always, capitalization denotes absolute values.
43
+ # Takes a Point as argument.
44
+ # If given 2 Points, the second argument is treated as the current cursor.
45
+ class SVG_M
46
+
47
+ def initialize(p1, p2 = Point.new(0,0))
48
+ @p = p1 + p2
49
+ end
50
+
51
+ def to_points
52
+ return []
53
+ end
54
+
55
+ def current_cursor
56
+ return @p
57
+ end
58
+
59
+ end
60
+
61
+ # SVG_C represents the cubic Bézier curveto command.
62
+ # Syntax is:
63
+ # c x1 y1 x2 y2 x y
64
+ # It sets the current cursor to the point (x,y).
65
+ # As always, capitalization denotes absolute values.
66
+ # Takes 4 Points as argument, the fourth being the current cursor
67
+ # If constructed using SVG_C.relative, the current cursor is added to every
68
+ # point.
69
+ class SVG_C
70
+
71
+ def initialize(c1,c2,p,current_cursor)
72
+ @c1,@c2,@p,@current_cursor = c1,c2,p,current_cursor
73
+ @@c_color = :green
74
+ end
75
+
76
+ def SVG_C.relative(c1,c2,p,current_cursor)
77
+ SVG_C.new(c1 + current_cursor, c2 + current_cursor, p + current_cursor, current_cursor)
78
+ end
79
+
80
+ def second_point
81
+ @c2
82
+ end
83
+
84
+ # This implements the algorithm found here:
85
+ # http://www.cubic.org/docs/bezier.htm
86
+ # Takes 2 Points and a factor between 0 and 1
87
+ def linear_interpolation(a,b,factor)
88
+
89
+ xr = a.x + ((b.x - a.x) * factor)
90
+ yr = a.y + ((b.y - a.y) * factor)
91
+
92
+ return Point.new(xr,yr);
93
+
94
+ end
95
+
96
+ def switch_color
97
+ if @@c_color == :green
98
+ @@c_color = :red
99
+ elsif @@c_color == :red
100
+ @@c_color = :purple
101
+ else
102
+ @@c_color = :green
103
+ end
104
+ end
105
+
106
+ def make_curvepoint(factor)
107
+ ab = linear_interpolation(@current_cursor,@c1,factor)
108
+ bc = linear_interpolation(@c1,@c2,factor)
109
+ cd = linear_interpolation(@c2,@p,factor)
110
+
111
+ abbc = linear_interpolation(ab,bc,factor)
112
+ bccd = linear_interpolation(bc,cd,factor)
113
+ return linear_interpolation(abbc,bccd,factor)
114
+ end
115
+
116
+ def length(points)
117
+ old_point = @current_cursor;
118
+ length = 0.0
119
+ factor = points.to_f
120
+
121
+ (1..points).each {|point|
122
+ new_point = make_curvepoint(point/(factor.to_f))
123
+ length += old_point.dist(new_point)
124
+ old_point = new_point
125
+ }
126
+ return length
127
+ end
128
+
129
+ # This gives back an array of points on the curve. The argument given
130
+ # denotes how the distance between each point.
131
+ def make_curvepoint_array(distance)
132
+ result = Array.new
133
+
134
+ l = length(20)
135
+ points = l * distance
136
+ factor = points.to_f
137
+
138
+ (0..points).each {|point|
139
+ result.push(make_curvepoint(point/(factor.to_f)))
140
+ }
141
+
142
+ return result
143
+ end
144
+
145
+
146
+ def to_points
147
+ return make_curvepoint_array(0.3)
148
+ end
149
+
150
+ def current_cursor
151
+ @p
152
+ end
153
+
154
+ end
155
+
156
+ # SVG_S represents the smooth curveto command.
157
+ # Syntax is:
158
+ # s x2 y2 x y
159
+ # It sets the current cursor to the point (x,y).
160
+ # As always, capitalization denotes absolute values.
161
+ # Takes 3 Points as argument, the third being the current cursor
162
+ # If constructed using SVG_S.relative, the current cursor is added to every
163
+ # point.
164
+ class SVG_S < SVG_C
165
+
166
+ def initialize(c2, p, current_cursor,previous_point)
167
+ super(SVG_S.reflect(previous_point,current_cursor), c2, p, current_cursor)
168
+ end
169
+
170
+ # The reflection in this case is rather tricky. Using SVG_C.relative, the
171
+ # offset of current_cursor is added to all the positions (except current_cursor).
172
+ # The reflected point, however is already calculated in absolute values.
173
+ # Because of this, we have to subtract the current_cursor from the reflected
174
+ # point, as it is already added later. I think I got the classes somewhat wrong.
175
+ # Maybe points should get a field whether they are absolute oder relative?
176
+ # Don't know yet. It works now, though!
177
+ def SVG_S.relative(c2, p, current_cursor, previous_point)
178
+ SVG_C.relative(SVG_S.reflect(previous_point,current_cursor) - current_cursor, c2, p, current_cursor)
179
+ end
180
+
181
+ def SVG_S.reflect(p, mirror)
182
+ return mirror + (mirror - p)
183
+ end
184
+
185
+ end
186
+
187
+
188
+ # Stroke represent one stroke, which is a series of SVG commands.
189
+ class Stroke
190
+ COMMANDS = ["M", "C", "c", "s", "S"]
191
+
192
+ def initialize(stroke_as_code)
193
+ @command_list = parse(stroke_as_code)
194
+ end
195
+
196
+ def to_points
197
+ return @command_list.map{|element| element.to_points}.flatten
198
+ end
199
+
200
+ #to array
201
+ #TODO: better implementation using composite pattern
202
+ def to_a
203
+ to_points.map{|point| point.to_a}
204
+ end
205
+
206
+ def split_elements(line)
207
+ # This is magic.
208
+ return line.gsub("-",",-").gsub("s",",s,").gsub("S",",S,").gsub("c",",c,").gsub("C",",C,").gsub("m", "M").gsub("M","M,").gsub("[","").gsub(";",",;,").gsub(",,",",").gsub(" ,", ",").gsub(", ", ",").gsub(" ", ",").split(/,/);
209
+ end
210
+
211
+ def parse(stroke_as_code)
212
+ elements = split_elements(stroke_as_code).delete_if{ |e| e == "" }
213
+ command_list = Array.new
214
+ current_cursor = Point.new(0,0);
215
+
216
+ while elements != [] do
217
+
218
+ case elements.slice!(0)
219
+ when "M"
220
+ x,y = elements.slice!(0..1)
221
+ m = SVG_M.new(Point.new(x.to_f,y.to_f))
222
+ current_cursor = m.current_cursor
223
+ command_list.push(m)
224
+
225
+ when "C"
226
+ x1,y1,x2,y2,x,y = elements.slice!(0..5)
227
+ c = SVG_C.new(Point.new(x1.to_f,y1.to_f), Point.new(x2.to_f,y2.to_f), Point.new(x.to_f,y.to_f), current_cursor)
228
+ current_cursor = c.current_cursor
229
+ command_list.push(c)
230
+
231
+ #handle polybezier
232
+ unless elements.empty? || COMMANDS.include?(elements.first)
233
+ elements.unshift("C")
234
+ end
235
+ when "c"
236
+ x1,y1,x2,y2,x,y = elements.slice!(0..5)
237
+ c = SVG_C.relative(Point.new(x1.to_f,y1.to_f), Point.new(x2.to_f,y2.to_f), Point.new(x.to_f,y.to_f), current_cursor)
238
+ current_cursor = c.current_cursor
239
+ command_list.push(c)
240
+
241
+ #handle polybezier
242
+ unless elements.empty? || COMMANDS.include?(elements.first)
243
+ elements.unshift("c")
244
+ end
245
+
246
+ when "s"
247
+ x2,y2,x,y = elements.slice!(0..3)
248
+ reflected_point = command_list[-1].second_point
249
+ s = SVG_S.relative(Point.new(x2.to_f,y2.to_f), Point.new(x.to_f,y.to_f), current_cursor, reflected_point)
250
+ current_cursor = s.current_cursor
251
+ command_list.push(s)
252
+
253
+ when "S"
254
+ x2,y2,x,y = elements.slice!(0..3)
255
+ reflected_point = command_list[-1].second_point
256
+ s = SVG_S.new(Point.new(x2.to_f,y2.to_f), Point.new(x.to_f,y.to_f), current_cursor,reflected_point)
257
+ current_cursor = s.current_cursor
258
+ command_list.push(s)
259
+
260
+ else
261
+ #print "You should not be here\n"
262
+
263
+ end
264
+
265
+ end
266
+
267
+ return command_list
268
+ end
269
+
270
+ end
271
+ end
272
+ end
@@ -0,0 +1,72 @@
1
+ module NonStructuralFeature
2
+ #This class can be used for storing heatmap count and directional feature densities
3
+ #basically it is a nxm matrix with an initial value in each cell
4
+ class Map
5
+ attr_accessor :initial_value
6
+ #Make a new map with
7
+ #Params:
8
+ #+n+:: row length
9
+ #+m+:: column length
10
+ #+initial_value+:: for heatmap initial_value = 0 and for directional feature densities initial_value = [0, 0, 0, 0] <= [weight in e1, weight in e2, ...]
11
+ def initialize n, m, initial_value
12
+ @array = Array.new(n * m, initial_value)
13
+ @n = n
14
+ @m = m
15
+ @initial_value = initial_value
16
+ end
17
+
18
+ #Access value in the cell of i-th row and j-th column
19
+ #e.g. map[i,j]
20
+ def [](i, j)
21
+ @array[j*@n + i]
22
+ end
23
+
24
+ #Store value in the cell of i-th row and j-th column
25
+ #e.g. map[i,j] = value
26
+ def []=(i, j, value)
27
+ @array[j*@n + i] = value
28
+ end
29
+
30
+ def to_a
31
+ @array
32
+ end
33
+
34
+ #Normaly n is the same as m
35
+ def size
36
+ @n
37
+ end
38
+ end
39
+
40
+ def smooth map
41
+ new_map = Map.new(@number_of_grids, @number_of_grids, map.initial_value)
42
+
43
+ (0..(@number_of_grids - 1)).each do |i|
44
+ (0..(@number_of_grids - 1)).each do |j|
45
+ #weights alternative
46
+ # = [1/16, 2/16, 1/16];
47
+ # [2/16, 4/16, 2/16];
48
+ # [1/16, 2/16, 1/16]
49
+ #
50
+ #weights = [1/9, 1/9, 1/9];
51
+ # [1/9, 1/9, 1/9];
52
+ # [1/9, 1/9, 1/9]
53
+ #
54
+ w11 = (0..(@number_of_grids-1)).cover?(i+1) && (0..(@number_of_grids-1)).cover?(j-1)? map[i+1,j-1].map{|e| e * @weights[0]} : [0, 0, 0]
55
+ w12 = (0..(@number_of_grids-1)).cover?(i+1) && (0..(@number_of_grids-1)).cover?(j)? map[i+1,j].map{|e| e * @weights[1]} : [0, 0, 0]
56
+ w13 = (0..(@number_of_grids-1)).cover?(i+1) && (0..(@number_of_grids-1)).cover?(j+1)? map[i+1,j+1].map{|e| e * @weights[2]} : [0, 0, 0]
57
+ w21 = (0..(@number_of_grids-1)).cover?(i) && (0..(@number_of_grids-1)).cover?(j-1)? map[i,j-1].map{|e| e * @weights[3]} : [0, 0, 0]
58
+ w22 = (0..(@number_of_grids-1)).cover?(i) && (0..(@number_of_grids-1)).cover?(j)? map[i,j].map{|e| e * @weights[4]} : [0, 0, 0]
59
+ w23 = (0..(@number_of_grids-1)).cover?(i) && (0..(@number_of_grids-1)).cover?(j+1)? map[i,j+1].map{|e| e * @weights[5]} : [0, 0, 0]
60
+ w31 = (0..(@number_of_grids-1)).cover?(i-1) && (0..(@number_of_grids-1)).cover?(j-1)? map[i-1,j-1].map{|e| e * @weights[6]} : [0, 0, 0]
61
+ w32 = (0..(@number_of_grids-1)).cover?(i-1) && (0..(@number_of_grids-1)).cover?(j)? map[i-1,j].map{|e| e * @weights[7]} : [0, 0, 0]
62
+ w33 = (0..(@number_of_grids-1)).cover?(i-1) && (0..(@number_of_grids-1)).cover?(j+1)? map[i-1,j+1].map{|e| e * @weights[8]} : [0, 0, 0]
63
+
64
+ new_map[i,j] = [(w11[0] + w12[0] + w13[0] + w21[0] + w22[0] + w23[0] + w31[0] + w32[0] + w33[0]).round(3),
65
+ (w11[1] + w12[1] + w13[1] + w21[1] + w22[1] + w23[1] + w31[1] + w32[1] + w33[1]).round(3),
66
+ (w11[2] + w12[2] + w13[2] + w21[2] + w22[2] + w23[2] + w31[2] + w32[2] + w33[2]).round(3)]
67
+ end
68
+ end
69
+
70
+ new_map
71
+ end
72
+ end