kvg_character_recognition 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: a14886233c4152851248456913d18768d9c8472a
4
+ data.tar.gz: f29d6c47b40ce1eb68a4db14c3b03961c275197c
5
+ SHA512:
6
+ metadata.gz: 339db4a0b7e01108b9d105f688dfb4bba6ea81420bb4255b70f1ff8ef51c5ae04cab3424464e453632f2ea17727f2e1629a95148d06379720f5e1cae448fa4a8
7
+ data.tar.gz: 2f57226b5dac0f9311bf0c5dec2750354aa6c40c5348e2f1df858b52a470aed242aa380be1d4aadfd2d9a1753e5fbad0a1a621a232d28d5e81b3dde4b2c929d9
data/.gitignore ADDED
@@ -0,0 +1,9 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /Gemfile.lock
4
+ /_yardoc/
5
+ /coverage/
6
+ /doc/
7
+ /pkg/
8
+ /spec/reports/
9
+ /tmp/
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --format documentation
2
+ --color
data/.travis.yml ADDED
@@ -0,0 +1,4 @@
1
+ language: ruby
2
+ rvm:
3
+ - 2.2.3
4
+ before_install: gem install bundler -v 1.10.6
data/Gemfile ADDED
@@ -0,0 +1,8 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in kvg_character_recognition.gemspec
4
+ gemspec
5
+
6
+ gem 'sequel'
7
+ gem 'sqlite3'
8
+ gem 'nokogiri'
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2016 Jiayi Zheng
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,96 @@
1
+ # KvgCharacterRecognition
2
+ KvgCharacterRecognition module contains a CJK-character recognition engine which uses pattern/template matching techniques to achieve recognitionof stroke-order and stroke-number free handwritten character patterns in the format [stroke1, stroke2 ...].
3
+ A stroke is an array of points in the format [[x1, y1], [x2, y2], ...].
4
+ For templates, we use svg data from the [KanjiVG project](http://kanjivg.tagaini.net/)
5
+
6
+ The engine takes 3 steps to perform the recognition of an input pattern.
7
+ 1. Preprocessing
8
+ The preprocessing step consists of smoothing, normalizing, interpolating and downsampling of the data points.
9
+ 2. Feature Extraction
10
+ Smoothed heatmap, significant points and directional feature densities are used as features.
11
+ A heatmap divides the input pattern in small grids and stores the number of data points in each grid.
12
+ Significant points are defined as start and end point of a stroke, points on curve or edge.
13
+ Directional feature densities are introduced in the paper "On-line Recognition of Freely Handwritten Japanese Character Using Directional Feature Density"
14
+ 3. Matching
15
+ We use the significant points to perform a coarse recognition of the input pattern, that filters out template patterns with great distance to the input pattern. Next, a mixed distance score of directional feature density and smoothed heatmap is calculated.
16
+ ## Installation
17
+
18
+ Add this line to your application's Gemfile:
19
+
20
+ ```ruby
21
+ gem 'kvg_character_recognition'
22
+ ```
23
+
24
+ And then execute:
25
+
26
+ $ bundle
27
+
28
+ Or install it yourself as:
29
+
30
+ $ gem install kvg_character_recognition
31
+
32
+ ## Usage
33
+
34
+ 1. Create a database(e.g. using sqlite3 data.db)
35
+
36
+ 2. Setup the characters table in the database and populate it with kanjivg templates from the [xml release](https://github.com/KanjiVG/kanjivg/releases)
37
+ ```ruby
38
+ require 'kvg_character_recognition'
39
+
40
+ KvgCharacterRecognition::Database.setup
41
+
42
+ KvgCharacterRecognition::Database.populate_from_xml "kanjivg-20150615-2.xml"
43
+ ```
44
+
45
+ 3. Recognition
46
+
47
+ Use an input field of size 300x300 for the best recognition accuracy. The input pattern in the example is the character 二, drawn on a 300x300 html canvas using mouse.
48
+ ```ruby
49
+ strokes = [[[99.0, 108.0], [100.0, 108.0], [101.0, 108.0], [101.0, 108.0], [103.0, 108.0], [105.0, 107.0], [107.0, 107.0], [108.0, 107.0], [111.0, 106.0], [111.0, 106.0], [112.0, 106.0], [113.0, 106.0], [114.0, 106.0], [115.0, 105.0], [116.0, 105.0], [118.0, 105.0], [120.0, 105.0], [121.0, 104.0], [122.0, 104.0], [122.0, 104.0], [123.0, 104.0], [124.0, 103.0], [125.0, 103.0], [126.0, 103.0], [127.0, 103.0], [129.0, 102.0], [130.0, 102.0], [132.0, 102.0], [132.0, 101.0], [133.0, 101.0], [135.0, 101.0], [136.0, 101.0], [137.0, 101.0], [138.0, 101.0], [140.0, 101.0], [141.0, 100.0], [142.0, 100.0], [143.0, 100.0], [144.0, 100.0], [145.0, 99.0], [148.0, 99.0], [150.0, 99.0], [151.0, 98.0], [152.0, 98.0], [153.0, 98.0], [154.0, 98.0], [156.0, 97.0], [157.0, 97.0], [158.0, 97.0], [159.0, 97.0], [161.0, 97.0], [162.0, 96.0], [162.0, 96.0], [164.0, 96.0], [165.0, 96.0], [166.0, 96.0], [167.0, 96.0], [169.0, 95.0], [170.0, 95.0], [171.0, 95.0], [172.0, 95.0], [173.0, 95.0], [174.0, 95.0]], [[53.0, 190.0], [54.0, 190.0], [56.0, 190.0], [57.0, 190.0], [59.0, 190.0], [61.0, 190.0], [63.0, 189.0], [66.0, 189.0], [67.0, 189.0], [68.0, 189.0], [69.0, 189.0], [71.0, 189.0], [72.0, 188.0], [72.0, 188.0], [74.0, 188.0], [76.0, 187.0], [78.0, 187.0], [80.0, 187.0], [81.0, 187.0], [82.0, 186.0], [84.0, 186.0], [87.0, 186.0], [89.0, 185.0], [91.0, 185.0], [93.0, 185.0], [95.0, 184.0], [98.0, 184.0], [100.0, 183.0], [102.0, 183.0], [104.0, 183.0], [106.0, 183.0], [110.0, 182.0], [111.0, 182.0], [112.0, 182.0], [115.0, 182.0], [118.0, 182.0], [120.0, 182.0], [122.0, 182.0], [125.0, 182.0], [128.0, 181.0], [130.0, 181.0], [133.0, 180.0], [136.0, 180.0], [141.0, 180.0], [143.0, 179.0], [146.0, 179.0], [150.0, 179.0], [152.0, 178.0], [155.0, 178.0], [158.0, 178.0], [159.0, 178.0], [162.0, 177.0], [164.0, 177.0], [167.0, 177.0], [170.0, 177.0], [173.0, 176.0], [176.0, 176.0], [179.0, 176.0], [182.0, 175.0], [187.0, 175.0], [189.0, 174.0], [192.0, 174.0], [194.0, 174.0], [196.0, 173.0], [199.0, 173.0], [202.0, 173.0], [204.0, 172.0], [206.0, 172.0], [209.0, 172.0], [211.0, 172.0], [212.0, 172.0], [215.0, 172.0], [217.0, 172.0], [219.0, 171.0], [221.0, 171.0], [221.0, 172.0]]]
50
+
51
+ scores = KvgCharacterRecognition::Recognizer.scores strokes
52
+
53
+ irb(main):004:0> scores.take 10
54
+ => [[1.524079282599697, 60, "二"], [2.8346163809971143, 1373, "工"], [3.0987422100694757, 7, "上"], [3.127346308294038, 365, "冫"], [3.439293212191952, 6, "三"], [3.4890481845638304, 3770, "立"], [3.541524904953307, 2721, "江"], [3.641178875851016, 569, "厂"], [3.6447144433336294, 72, "亠"], [3.7498483818966353, 2706, "氵"]]
55
+ ```
56
+
57
+ ## Configuration
58
+ You can try out different parameters for adapting the extracted features to your input settings i.e. other sample rate, size
59
+ Don't forget to redo the whole database step after changing the configuration.
60
+ ```ruby
61
+ #this is the default configuration
62
+ config = {
63
+ size: 109, #fixed canvas size of kanjivg data
64
+ downsample_interval: 4,
65
+ interpolate_distance: 0.8,
66
+ direction_grid: 15,
67
+ smoothed_heatmap_grid: 20,
68
+ significant_points_heatmap_grid: 3
69
+ }
70
+
71
+ #from hash
72
+ Kvgcharacterrecognition.configure(config)
73
+ #from yaml file
74
+ Kvgcharacterrecognition.configure_with(path_to_yml)
75
+
76
+ #configure database with yml
77
+ #TODO why is postgres slower than sqlite?
78
+ Kvgcharacterrecognition.configure_database(path_to_yml)
79
+ ```
80
+
81
+
82
+ ## Development
83
+
84
+ After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
85
+
86
+ To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
87
+
88
+ ## Contributing
89
+
90
+ Bug reports and pull requests are welcome on GitHub at https://github.com/[USERNAME]/kvg_character_recognition.
91
+
92
+
93
+ ## License
94
+
95
+ The gem is available as open source under the terms of the [MIT License](http://opensource.org/licenses/MIT).
96
+
data/Rakefile ADDED
@@ -0,0 +1,6 @@
1
+ require "bundler/gem_tasks"
2
+ require "rspec/core/rake_task"
3
+
4
+ RSpec::Core::RakeTask.new(:spec)
5
+
6
+ task :default => :spec
data/bin/console ADDED
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "kvg_character_recognition"
5
+
6
+ # You can add fixtures and/or initialization code here to make experimenting
7
+ # with your gem easier. You can also use a different console, if you like.
8
+
9
+ # (If you use this, don't forget to add pry to your Gemfile!)
10
+ # require "pry"
11
+ # Pry.start
12
+
13
+ require "irb"
14
+ IRB.start
data/bin/setup ADDED
@@ -0,0 +1,7 @@
1
+ #!/bin/bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+
5
+ bundle install
6
+
7
+ # Do any other automated setup that you need to do here
@@ -0,0 +1,40 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'kvg_character_recognition/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "kvg_character_recognition"
8
+ spec.version = KvgCharacterRecognition::VERSION
9
+ spec.authors = ["Jiayi Zheng"]
10
+ spec.email = ["thebluber@gmail.com"]
11
+
12
+ spec.summary = "CJK-character recognition using template matching techniques and template data from KanjiVG project"
13
+ spec.description = %q{This gem contains a CJK-character recognition engine using pattern/template matching techniques.
14
+ It can recognize stroke-order and stroke-number free handwritten character patterns in the format [stroke1, stroke2 ...].
15
+ A stroke is an array of points in the format [[x1, y1], [x2, y2], ...].
16
+ KanjiVG data(characters in svg format) from https://github.com/KanjiVG/kanjivg/releases are used as templates.
17
+ }
18
+ spec.homepage = "https://github.com/thebluber/kvg_character_recognition"
19
+ spec.license = "MIT"
20
+
21
+ # Prevent pushing this gem to RubyGems.org by setting 'allowed_push_host', or
22
+ # delete this section to allow pushing this gem to any host.
23
+ if spec.respond_to?(:metadata)
24
+ spec.metadata['allowed_push_host'] = 'https://rubygems.org'
25
+ else
26
+ raise "RubyGems 2.0 or newer is required to protect against public gem pushes."
27
+ end
28
+
29
+ spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
30
+ spec.bindir = "exe"
31
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
32
+ spec.require_paths = ["lib"]
33
+
34
+ spec.add_dependency "nokogiri"
35
+ spec.add_dependency "sequel"
36
+ spec.add_dependency "sqlite3"
37
+ spec.add_dependency "bundler", "~> 1.10"
38
+ spec.add_development_dependency "rake", "~> 10.0"
39
+ spec.add_development_dependency "rspec"
40
+ end
@@ -0,0 +1,55 @@
1
+ require 'bundler'
2
+ Bundler.require
3
+ require 'yaml'
4
+ #require all files in ./lib/
5
+ Dir[File.join(File.dirname(__FILE__), '/kvg_character_recognition/*.rb')].each {|file| require file }
6
+
7
+ module KvgCharacterRecognition
8
+
9
+ @db = Sequel.connect('sqlite://characters.db')
10
+ CONFIG = {
11
+ size: 109, #fixed canvas size of kanjivg data
12
+ downsample_interval: 4,
13
+ interpolate_distance: 0.8,
14
+ direction_grid: 15,
15
+ smoothed_heatmap_grid: 20,
16
+ significant_points_heatmap_grid: 3
17
+ }
18
+ VALID_KEYS = CONFIG.keys
19
+
20
+ #Configure through hash
21
+ def self.configure(opts = {})
22
+ opts.each {|k,v| CONFIG[k.to_sym] = v if VALID_KEYS.include? k.to_sym}
23
+ end
24
+
25
+ #Configure with yaml
26
+ def self.configure_with(yml)
27
+ begin
28
+ config = YAML::load(IO.read(yml))
29
+ rescue Errno::ENOENT
30
+ log(:warning, "YAML configuration file couldn't be found. Using defaults."); return
31
+ rescue Psych::SyntaxError
32
+ log(:warning, "YAML configuration file contains invalid syntax. Using defaults."); return
33
+ end
34
+
35
+ configure(config)
36
+ end
37
+
38
+ #Configure database
39
+ def self.configure_database(yml)
40
+ begin
41
+ db_config = YAML::load(IO.read(yml))
42
+ rescue Errno::ENOENT
43
+ log(:warning, "YAML configuration file couldn't be found. Using defaults."); return
44
+ rescue Psych::SyntaxError
45
+ log(:warning, "YAML configuration file contains invalid syntax. Using defaults."); return
46
+ end
47
+ @db = Sequel.connect(yml)
48
+ end
49
+
50
+ #getter
51
+ def self.db
52
+ @db
53
+ end
54
+
55
+ end
@@ -0,0 +1,103 @@
1
+ require 'matrix'
2
+ require 'nokogiri'
3
+
4
+ module KvgCharacterRecognition
5
+ #This class contains methods for database interactions
6
+ class Database
7
+
8
+ #This method creates a database table for storing the extracted features of the templates
9
+ #Arrays of points will be serialized and stored as string
10
+ #Following fields are created:
11
+ # - primary_key :id
12
+ # - String :value
13
+ # - Integer :codepoint
14
+ # - String :serialized_strokes i.e. [stroke, x, y]
15
+ # - String :direction_e1
16
+ # - String :direction_e2
17
+ # - String :direction_e3
18
+ # - String :direction_e4
19
+ # - String :heatmap_smoothed
20
+ # - String :heatmap_significant_points
21
+ def self.setup
22
+ KvgCharacterRecognition.db.create_table :characters do
23
+ primary_key :id
24
+ String :value
25
+ Integer :codepoint
26
+ Integer :number_of_strokes
27
+ String :serialized_strokes
28
+ String :direction_e1
29
+ String :direction_e2
30
+ String :direction_e3
31
+ String :direction_e4
32
+ String :heatmap_smoothed
33
+ String :heatmap_significant_points
34
+ end
35
+
36
+ end
37
+
38
+ #Drop created table
39
+ def self.drop
40
+ KvgCharacterRecognition.db.drop_table(:characters) if KvgCharacterRecognition.db.table_exists?(:characters)
41
+ end
42
+
43
+ #This method populates the database table with parsed template patterns from the kanjivg file in xml format
44
+ #Params:
45
+ #+xml+:: download the latest xml release from https://github.com/KanjiVG/kanjivg/releases
46
+ def self.populate_from_xml xml
47
+ file = File.open(xml) { |f| Nokogiri::XML(f) }
48
+
49
+ file.xpath("//kanji").each do |kanji|
50
+ #id has format: "kvg:kanji_CODEPOINT"
51
+ codepoint = kanji.attributes["id"].value.split("_")[1]
52
+ next unless codepoint.hex >= "04e00".hex && codepoint.hex <= "09faf".hex
53
+ puts codepoint
54
+ value = [codepoint.hex].pack("U")
55
+
56
+ #Preprocessing
57
+ #--------------
58
+ #parse strokes
59
+ strokes = kanji.xpath("g//path").map{|p| p.attributes["d"].value }.map{ |stroke| KvgParser::Stroke.new(stroke).to_a }
60
+ #strokes in the format [[[x1, y1], [x2, y2] ...], [[x2, y2], [x3, y3] ...], ...]
61
+ strokes = Preprocessor.preprocess(strokes, CONFIG[:interpolate_distance], CONFIG[:downsample_interval], false)
62
+
63
+ #serialize strokes
64
+ serialized = strokes.map.with_index do |stroke, i|
65
+ stroke.map{ |p| [i, p[0], p[1]] }
66
+ end
67
+
68
+ points = strokes.flatten(1)
69
+
70
+ #Feature Extraction
71
+ #--------------
72
+ #20x20 heatmap smoothed
73
+ heatmap_smoothed = FeatureExtractor.smooth_heatmap(FeatureExtractor.heatmap(points, CONFIG[:smoothed_heatmap_grid], CONFIG[:size]))
74
+
75
+ #directional feature densities
76
+ #transposed from Mx4 to 4xM
77
+ direction = Matrix.columns(FeatureExtractor.spatial_weight_filter(FeatureExtractor.directional_feature_densities(strokes, CONFIG[:direction_grid])).to_a).to_a
78
+
79
+ #significant points
80
+ significant_points = Preprocessor.significant_points(strokes)
81
+
82
+ #3x3 heatmap of significant points for coarse recognition
83
+ heatmap_significant_points = FeatureExtractor.heatmap(significant_points, CONFIG[:significant_points_heatmap_grid], CONFIG[:size])
84
+
85
+
86
+ #Store to database
87
+ #--------------
88
+ KvgCharacterRecognition.db[:characters].insert value: value,
89
+ codepoint: codepoint.hex,
90
+ number_of_strokes: strokes.count,
91
+ serialized_strokes: serialized.join(","),
92
+ direction_e1: direction[0].join(","),
93
+ direction_e2: direction[1].join(","),
94
+ direction_e3: direction[2].join(","),
95
+ direction_e4: direction[3].join(","),
96
+ heatmap_smoothed: heatmap_smoothed.to_a.join(","),
97
+ heatmap_significant_points: heatmap_significant_points.to_a.join(",")
98
+ end
99
+
100
+ end
101
+
102
+ end
103
+ end
@@ -0,0 +1,168 @@
1
+ require 'matrix'
2
+ module KvgCharacterRecognition
3
+ #This class contains a collection of methods for extracting useful features
4
+ class FeatureExtractor
5
+
6
+ #This methods generates a heatmap for the given character pattern
7
+ #A heatmap divides the input character pattern(image of the character) into nxn grids
8
+ #We count the points in each grid and store the number in a map
9
+ #The map array can be used as feature
10
+ #Params:
11
+ #+points+:: flattened strokes i.e. [[x1, y1], [x2, y2]...] because the seperation of points in strokes is irrelevant in this case
12
+ #+grid+:: number of grids
13
+ def self.heatmap points, grid, size
14
+
15
+ grid_size = size / grid.to_f
16
+
17
+ map = Map.new grid, grid, 0
18
+
19
+ #fill the heatmap
20
+ points.each do |point|
21
+ if point[0] < size && point[1] < size
22
+ x_i = (point[0] / grid_size).floor if point[0] < size
23
+ y_i = (point[1] / grid_size).floor if point[1] < size
24
+
25
+ map[y_i, x_i] = map[y_i, x_i] + 1
26
+ end
27
+ end
28
+
29
+ map
30
+ end
31
+
32
+ #This method calculates the directional feature densities and stores them in a map
33
+ #The process and algorithm is described in the paper "On-line Recognition of Freely Handwritten Japanese Characters Using Directional Feature Densities" by Akinori Kawamura and co.
34
+ #Params:
35
+ #+strokes+:: [[[x1, y1], [x2, y2] ...], [[x1, y1], ...]]]
36
+ #+grid+:: number of grids in which the input character pattern should be seperated. Default is 15 as in the paper
37
+ def self.directional_feature_densities strokes, grid
38
+ #initialize a map for storing the weights in each directional space
39
+ map = Map.new grid, grid, [0, 0, 0, 0]
40
+
41
+ #step width
42
+ step = CONFIG[:size] / grid.to_f
43
+
44
+ strokes.each do |stroke|
45
+ current_p = stroke[0]
46
+ stroke.each do |point|
47
+ next if point == current_p
48
+ #map current point coordinate to map index
49
+ #i_x = xth column
50
+ #i_y = yth row
51
+ i_x = (current_p[0] / step).floor
52
+ i_y = (current_p[1] / step).floor
53
+
54
+ #direction vector V_ij = P_ij+1 - P_ij
55
+ v = [point[0] - current_p[0], point[1] - current_p[1]]
56
+ #store the sum of decomposed direction vectors in the corresponding grid
57
+ decomposed = decompose(v)
58
+ map[i_y, i_x] = [map[i_y, i_x][0] + decomposed[0],
59
+ map[i_y, i_x][1] + decomposed[1],
60
+ map[i_y, i_x][2] + decomposed[2],
61
+ map[i_y, i_x][3] + decomposed[3]]
62
+ end
63
+ end
64
+ map
65
+ end
66
+
67
+ #This method is a helper method for calculating directional feature density
68
+ #which decomposes the direction vector into predefined direction spaces
69
+ #- e1: [1, 0]
70
+ #- e2: [1/sqrt(2), 1/sqrt(2)]
71
+ #- e3: [0, 1]
72
+ #- e4: [-1/sqrt(2), 1/sqrt(2)]
73
+ #Params:
74
+ #+v+:: direction vector of 2 adjacent points V_ij = P_ij+1 - P_ij
75
+ def self.decompose v
76
+ e1 = [1, 0]
77
+ e2 = [1/Math.sqrt(2), 1/Math.sqrt(2)]
78
+ e3 = [0, 1]
79
+ e4 = [-1/Math.sqrt(2), 1/Math.sqrt(2)]
80
+ #angle between vector v and e1
81
+ #det = x1*y2 - x2*y1
82
+ #dot = x1*x2 + y1*y2
83
+ #atan2(det, dot) in range 0..180 and 0..-180
84
+ angle = (Math.atan2(v[1], v[0]) / (Math::PI / 180)).floor
85
+ if (0..44).cover?(angle) || (-180..-136).cover?(angle)
86
+ decomposed = [(Matrix.columns([e1, e2]).inverse * Vector.elements(v)).to_a, 0, 0].flatten
87
+ elsif (45..89).cover?(angle) || (-135..-91).cover?(angle)
88
+ decomposed = [0, (Matrix.columns([e2, e3]).inverse * Vector.elements(v)).to_a, 0].flatten
89
+ elsif (90..134).cover?(angle) || (-90..-44).cover?(angle)
90
+ decomposed = [0, 0, (Matrix.columns([e3, e4]).inverse * Vector.elements(v)).to_a].flatten
91
+ elsif (135..179).cover?(angle) || (-45..-1).cover?(angle)
92
+ tmp = (Matrix.columns([e4, e1]).inverse * Vector.elements(v)).to_a
93
+ decomposed = [tmp[0], 0, 0, tmp[1]]
94
+ end
95
+
96
+ decomposed
97
+ end
98
+
99
+ #This methods reduces the dimension of directonal feature densities stored in the map
100
+ #It takes every 2nd grid of directional_feature_densities map and stores the average of the weighted sum of adjacent grids around it
101
+ #weights = [1/16, 2/16, 1/16];
102
+ # [2/16, 4/16, 2/16];
103
+ # [1/16, 2/16, 1/16]
104
+ #Params:
105
+ #+map+:: directional feature densities map i.e. [[e1, e2, e3, e4], [e1, e2, e3, e4] ...] for each grid of input character pattern
106
+ def self.spatial_weight_filter map
107
+ #default grid should be 15
108
+ grid = map.size
109
+ new_grid = (grid / 2.0).ceil
110
+ new_map = Map.new(new_grid, new_grid, [0, 0, 0, 0])
111
+
112
+ (0..(grid - 1)).each_slice(2) do |i, i2|
113
+ (0..(grid - 1)).each_slice(2) do |j, j2|
114
+ #weights = [1/16, 2/16, 1/16];
115
+ # [2/16, 4/16, 2/16];
116
+ # [1/16, 2/16, 1/16]
117
+ w11 = (0..(grid-1)).cover?(i+1) && (0..(grid-1)).cover?(j-1)? map[i+1,j-1].map{|e| e * 1 / 16.0} : [0, 0, 0, 0]
118
+ w12 = (0..(grid-1)).cover?(i+1) && (0..(grid-1)).cover?(j)? map[i+1,j].map{|e| e * 2 / 16.0} : [0, 0, 0, 0]
119
+ w13 = (0..(grid-1)).cover?(i+1) && (0..(grid-1)).cover?(j+1)? map[i+1,j+1].map{|e| e * 1 / 16.0} : [0, 0, 0, 0]
120
+ w21 = (0..(grid-1)).cover?(i) && (0..(grid-1)).cover?(j-1)? map[i,j-1].map{|e| e * 2 / 16.0} : [0, 0, 0, 0]
121
+ w22 = (0..(grid-1)).cover?(i) && (0..(grid-1)).cover?(j)? map[i,j].map{|e| e * 4 / 16.0} : [0, 0, 0, 0]
122
+ w23 = (0..(grid-1)).cover?(i) && (0..(grid-1)).cover?(j+1)? map[i,j+1].map{|e| e * 2 / 16.0} : [0, 0, 0, 0]
123
+ w31 = (0..(grid-1)).cover?(i-1) && (0..(grid-1)).cover?(j-1)? map[i-1,j-1].map{|e| e * 1 / 16.0} : [0, 0, 0, 0]
124
+ w32 = (0..(grid-1)).cover?(i-1) && (0..(grid-1)).cover?(j)? map[i-1,j].map{|e| e * 2 / 16.0} : [0, 0, 0, 0]
125
+ w33 = (0..(grid-1)).cover?(i-1) && (0..(grid-1)).cover?(j+1)? map[i-1,j+1].map{|e| e * 1 / 16.0} : [0, 0, 0, 0]
126
+
127
+ new_map[i/2,j/2] = [w11[0] + w12[0] + w13[0] + w21[0] + w22[0] + w23[0] + w31[0] + w32[0] + w33[0],
128
+ w11[1] + w12[1] + w13[1] + w21[1] + w22[1] + w23[1] + w31[1] + w32[1] + w33[1],
129
+ w11[2] + w12[2] + w13[2] + w21[2] + w22[2] + w23[2] + w31[2] + w32[2] + w33[2],
130
+ w11[3] + w12[3] + w13[3] + w21[3] + w22[3] + w23[3] + w31[3] + w32[3] + w33[3]]
131
+ end
132
+ end
133
+
134
+ new_map
135
+ end
136
+
137
+ #This method smooths a heatmap using spatial_weight_filter technique
138
+ #but instead of taking every 2nd grid, it processes every grid and stores the average of the weighted sum of adjacent grids
139
+ #Params:
140
+ #+map+:: a heatmap
141
+ def self.smooth_heatmap map
142
+ grid = map.size
143
+ #map is a heatmap
144
+ new_map = Map.new(grid, grid, 0)
145
+
146
+ (0..(grid - 1)).each do |i|
147
+ (0..(grid - 1)).each do |j|
148
+ #weights = [1/16, 2/16, 1/16];
149
+ # [2/16, 4/16, 2/16];
150
+ # [1/16, 2/16, 1/16]
151
+ w11 = (0..(grid-1)).cover?(i+1) && (0..(grid-1)).cover?(j-1)? map[i+1,j-1] * 1 / 16.0 : 0
152
+ w12 = (0..(grid-1)).cover?(i+1) && (0..(grid-1)).cover?(j)? map[i+1,j] * 2 / 16.0 : 0
153
+ w13 = (0..(grid-1)).cover?(i+1) && (0..(grid-1)).cover?(j+1)? map[i+1,j+1] * 1 / 16.0 : 0
154
+ w21 = (0..(grid-1)).cover?(i) && (0..(grid-1)).cover?(j-1)? map[i,j-1] * 2 / 16.0 : 0
155
+ w22 = (0..(grid-1)).cover?(i) && (0..(grid-1)).cover?(j)? map[i,j] * 4 / 16.0 : 0
156
+ w23 = (0..(grid-1)).cover?(i) && (0..(grid-1)).cover?(j+1)? map[i,j+1] * 2 / 16.0 : 0
157
+ w31 = (0..(grid-1)).cover?(i-1) && (0..(grid-1)).cover?(j-1)? map[i-1,j-1] * 1 / 16.0 : 0
158
+ w32 = (0..(grid-1)).cover?(i-1) && (0..(grid-1)).cover?(j)? map[i-1,j] * 2 / 16.0 : 0
159
+ w33 = (0..(grid-1)).cover?(i-1) && (0..(grid-1)).cover?(j+1)? map[i-1,j+1] * 1 / 16.0 : 0
160
+
161
+ new_map[i,j] = w11 + w12 + w13 + w21 + w22 + w23 + w31 + w32 + w33
162
+ end
163
+ end
164
+
165
+ new_map
166
+ end
167
+ end
168
+ end
@@ -0,0 +1,214 @@
1
+ module KvgCharacterRecognition
2
+ #This class has a collection of methods for the preprocessing step of character recognition
3
+ class Preprocessor
4
+
5
+ #A simple smooth method using the following formula
6
+ #p'(i) = (w(-M)*p(i-M) + ... + w(0)*p(i) + ... + w(M)*p(i+M)) / S
7
+ #where the smoothed point is a weighted average of its adjacent points.
8
+ #Only the user input should be smoothed, it is not necessary for kvg data.
9
+ #Params:
10
+ #+stroke+:: array of points i.e [[x1, y1], [x2, y2] ...]
11
+ def self.smooth stroke
12
+ weights = [1,3,1]
13
+ offset = weights.length / 2
14
+ wsum = weights.inject{ |sum, x| sum + x}
15
+
16
+ return stroke if stroke.length < weights.length
17
+
18
+ copy = stroke.dup
19
+
20
+ (offset..(stroke.length - offset - 1)).each do |i|
21
+ accum = [0, 0]
22
+
23
+ weights.each_with_index do |w, j|
24
+ accum[0] += w * copy[i + j - offset][0]
25
+ accum[1] += w * copy[i + j - offset][1]
26
+ end
27
+
28
+ stroke[i] = accum.map{ |acc| (acc / wsum.to_f).round(2) }
29
+ end
30
+ stroke
31
+ end
32
+
33
+ #This method executes different preprocessing steps
34
+ #0.Normalize strokes to the size 109x109 and center the coordinates using bi moment normalization method
35
+ #1.Smooth strokes if set to true
36
+ #2.Interpolate points by given distance, in order to equalize the sample rate of input and template
37
+ #3.Downsample by given interval
38
+ def self.preprocess strokes, interpolate_distance=0.8, downsample_interval=4, smooth=true
39
+ means, diffs = means_and_diffs(strokes)
40
+ #normalize strokes
41
+ strokes = bi_moment_normalize(means, diffs, strokes)
42
+
43
+ strokes.map do |stroke|
44
+ stroke = smooth(stroke) if smooth
45
+ interpolated = interpolate(stroke, interpolate_distance)
46
+ downsample(interpolated, downsample_interval)
47
+ end
48
+ end
49
+
50
+ #This method calculates means and diffs of x and y coordinates in the strokes
51
+ #The return values are used in the normalization step
52
+ #means, diffs = means_and_diffs strokes
53
+ #Return values:
54
+ #+means+:: [mean_of_x, mean_of_y]
55
+ #+diffs+:: differences of the x and y coordinates to their means i.e. [[d_x1, d_x2 ...], [d_y1, d_y2 ...]]
56
+ def self.means_and_diffs strokes
57
+ points = strokes.flatten(1)
58
+ sums = points.inject([0, 0]){ |acc, point| acc = [acc[0] + point[0], acc[1] + point[1]] }
59
+ #means = [x_c, y_c]
60
+ means = sums.map{ |sum| (sum / points.length.to_f).round(2) }
61
+
62
+ diffs = points.inject([[], []]){ |acc, point| acc = [acc[0] << point[0] - means[0], acc[1] << point[1] - means[1]] }
63
+ [means, diffs]
64
+ end
65
+
66
+ #This methods normalizes the strokes using bi moment
67
+ #Params:
68
+ #+strokes+:: [[[x1, y1], [x2, y2], ...], [[x1, y1], ...]]
69
+ #+means+:: [x_c, y_c]
70
+ #+diffs+:: [d_x, d_y]; d_x = [d1, d2, ...]
71
+ def self.bi_moment_normalize means, diffs, strokes
72
+
73
+ #calculating delta values
74
+ delta = Proc.new do |diff, operator|
75
+ #d_x or d_y
76
+ #operator: >= or <
77
+ accum = 0
78
+ counter = 0
79
+
80
+ diff.each do |d|
81
+ if d.send operator, 0
82
+ accum += d ** 2
83
+ counter += 1
84
+ end
85
+ end
86
+ accum / counter
87
+ end
88
+
89
+ new_strokes = []
90
+ strokes.each do |stroke|
91
+ new_stroke = []
92
+ stroke.each do |point|
93
+ if point[0] - means[0] >= 0
94
+ new_x = ( CONFIG[:size] * (point[0] - means[0]) / (4 * Math.sqrt(delta.call(diffs[0], :>=))).round(2) ) + CONFIG[:size]/2
95
+ else
96
+ new_x = ( CONFIG[:size] * (point[0] - means[0]) / (4 * Math.sqrt(delta.call(diffs[0], :<))).round(2) ) + CONFIG[:size]/2
97
+ end
98
+ if point[1] - means[1] >= 0
99
+ new_y = ( CONFIG[:size] * (point[1] - means[1]) / (4 * Math.sqrt(delta.call(diffs[1], :>=))).round(2) ) + CONFIG[:size]/2
100
+ else
101
+ new_y = ( CONFIG[:size] * (point[1] - means[1]) / (4 * Math.sqrt(delta.call(diffs[1], :<))).round(2) ) + CONFIG[:size]/2
102
+ end
103
+
104
+ if new_x >= 0 && new_x <= CONFIG[:size] && new_y >= 0 && new_y <= CONFIG[:size]
105
+ new_stroke << [new_x.round(3), new_y.round(3)]
106
+ end
107
+ end
108
+ new_strokes << new_stroke unless new_stroke.empty?
109
+ end
110
+ new_strokes
111
+ end
112
+
113
+ #This method returns the significant points of a given character
114
+ #Significant points are:
115
+ #- Start and end point of a stroke
116
+ #- Point on curve or edge
117
+ #To determine whether a point is on curve or edge, we take the 2 adjacent points and calculate the angle between the 2 vectors
118
+ #If the angle is smaller than 150 degree, then the point should be on curve or edge
119
+ def self.significant_points strokes
120
+ points = []
121
+ strokes.each_with_index do |stroke, i|
122
+ points << stroke[0]
123
+
124
+ #collect edge points
125
+ #determine whether a point is an edge point by the internal angle between vector P_i-1 - P_i and P_i+1 - P_i
126
+ pre = stroke[0]
127
+ (1..(stroke.length - 1)).each do |j|
128
+ current = stroke[j]
129
+ nex = stroke[j+1]
130
+ if nex
131
+ v1 = [pre[0] - current[0], pre[1] - current[1]]
132
+ v2 = [nex[0] - current[0], nex[1] - current[1]]
133
+ det = v1[0] * v2[1] - (v2[0] * v1[1])
134
+ dot = v1[0] * v2[0] + (v2[1] * v1[1])
135
+ angle = Math.atan2(det, dot) / (Math::PI / 180)
136
+
137
+ if angle.abs < 150
138
+ #current point is on a curve or an edge
139
+ points << current
140
+ end
141
+ end
142
+ pre = current
143
+ end
144
+
145
+ points << stroke[stroke.length - 1]
146
+ end
147
+
148
+ points
149
+ end
150
+
151
+ #This methods calculates the euclidean distance between 2 points
152
+ #Params:
153
+ #- p1, p2: [x, y]
154
+ def self.euclidean_distance(p1, p2)
155
+ sum_of_squares = 0
156
+ p1.each_with_index do |p1_coord,index|
157
+ sum_of_squares += (p1_coord - p2[index]) ** 2
158
+ end
159
+ Math.sqrt( sum_of_squares )
160
+ end
161
+
162
+ #This method interpolates points into a stroke with given distance
163
+ #The algorithm is taken from the paper preprocessing techniques for online character recognition
164
+ def self.interpolate stroke, d=0.5
165
+ current = stroke.first
166
+ new_stroke = [current]
167
+
168
+ index = 1
169
+ last_index = 0
170
+ while index < stroke.length do
171
+ point = stroke[index]
172
+
173
+ #only consider point with greater than d distance to current point
174
+ if euclidean_distance(current, point) < d
175
+ index += 1
176
+ else
177
+
178
+ #calculate new point coordinate
179
+ new_point = []
180
+ if point[0] == current[0] # x2 == x1
181
+ if point[1] > current[1] # y2 > y1
182
+ new_point = [current[0], current[1] + d]
183
+ else # y2 < y1
184
+ new_point = [current[0], current[1] - d]
185
+ end
186
+ else # x2 != x1
187
+ slope = (point[1] - current[1]) / (point[0] - current[0]).to_f
188
+ if point[0] > current[0] # x2 > x1
189
+ new_point[0] = current[0] + Math.sqrt(d**2 / (slope**2 + 1))
190
+ else # x2 < x1
191
+ new_point[0] = current[0] - Math.sqrt(d**2 / (slope**2 + 1))
192
+ end
193
+ new_point[1] = slope * new_point[0] + point[1] - (slope * point[0])
194
+ end
195
+
196
+ new_point = new_point.map{ |num| num.round(2) }
197
+ new_stroke << new_point
198
+
199
+ current = new_point
200
+ last_index += ((index - last_index) / 2).floor
201
+ index = last_index + 1
202
+ end
203
+ end
204
+
205
+ new_stroke
206
+ end
207
+
208
+ #This methods downsamples a stroke in given interval
209
+ #The number of points in the stroke will be reduced
210
+ def self.downsample stroke, interval=3
211
+ stroke.each_slice(interval).map(&:first)
212
+ end
213
+ end
214
+ end
@@ -0,0 +1,63 @@
1
+ require 'matrix'
2
+ module KvgCharacterRecognition
3
+ #This class contains methods calculating similarity scores between input pattern and template patterns
4
+ class Recognizer
5
+
6
+ #This method selects all templates from the database which should be further examined
7
+ #It filtered out those characters with a too great difference in number of strokes to the input character
8
+ def self.select_templates strokes
9
+ min = strokes.count <= 5 ? strokes.count : strokes.count - 5
10
+ max = strokes.count + 10
11
+ KvgCharacterRecognition.db[:characters].where(:number_of_strokes => (min..max))
12
+ end
13
+
14
+ #This method uses heatmap of significant points to coarse recognize the input pattern
15
+ #Params:
16
+ #+strokes+:: strokes should be preprocessed
17
+ def self.coarse_recognize strokes
18
+ heatmap = FeatureExtractor.heatmap(Preprocessor.significant_points(strokes), CONFIG[:significant_points_heatmap_grid], CONFIG[:size]).to_a
19
+
20
+ templates = select_templates strokes
21
+ templates.map do |candidate|
22
+ candidate_heatmap = candidate[:heatmap_significant_points].split(",").map(&:to_f)
23
+
24
+ score = Preprocessor.euclidean_distance(heatmap, candidate_heatmap)
25
+ [score.round(3), candidate]
26
+ end
27
+ end
28
+
29
+ #This method calculates similarity scores which is an average of the somehow weighted sum of the euclidean distance of
30
+ #1. 20x20 smoothed heatmap
31
+ #2. euclidean distance of directional feature densities in average
32
+ #Params:
33
+ #+strokes+:: strokes are not preprocessed
34
+ def self.scores strokes
35
+ #preprocess strokes
36
+ #with smoothing
37
+ strokes = Preprocessor.preprocess(strokes, CONFIG[:interpolate_distance], CONFIG[:downsample_interval], true)
38
+
39
+ #feature extraction
40
+ directions = Matrix.columns(FeatureExtractor.spatial_weight_filter(FeatureExtractor.directional_feature_densities(strokes, CONFIG[:direction_grid])).to_a).to_a
41
+ heatmap_smoothed = FeatureExtractor.smooth_heatmap(FeatureExtractor.heatmap(strokes.flatten(1), CONFIG[:smoothed_heatmap_grid], CONFIG[:size])).to_a
42
+
43
+ #dump half of the templates after coarse recognition
44
+ #collection is in the form [[score, c1], [score, c2] ...]
45
+ collection = coarse_recognize(strokes).sort{ |a, b| a[0] <=> b[0] }
46
+
47
+ scores = collection.take(collection.count / 2).map do |cand|
48
+ direction_score = (Preprocessor.euclidean_distance(directions[0], cand[1][:direction_e1].split(",").map(&:to_f)) +
49
+ Preprocessor.euclidean_distance(directions[1], cand[1][:direction_e2].split(",").map(&:to_f)) +
50
+ Preprocessor.euclidean_distance(directions[2], cand[1][:direction_e3].split(",").map(&:to_f)) +
51
+ Preprocessor.euclidean_distance(directions[3], cand[1][:direction_e4].split(",").map(&:to_f)) ) / 4
52
+
53
+ heatmap_score = Preprocessor.euclidean_distance(heatmap_smoothed, cand[1][:heatmap_smoothed].split(",").map(&:to_f))
54
+
55
+ mix = (direction_score / 100) + heatmap_score
56
+ [mix/2, cand[1][:id], cand[1][:value]]
57
+ end
58
+
59
+ scores.sort{ |a, b| a[0] <=> b[0] }
60
+ end
61
+
62
+ end
63
+ end
@@ -0,0 +1,310 @@
1
+ module KvgCharacterRecognition
2
+
3
+ #This class can be used for storing heatmap count and directional feature densities
4
+ #basically it is a nxm matrix with an initial value in each cell
5
+ class Map
6
+ #Make a new map with
7
+ #Params:
8
+ #+n+:: row length
9
+ #+m+:: column length
10
+ #+initial_value+:: for heatmap initial_value = 0 and for directional feature densities initial_value = [0, 0, 0, 0] <= [weight in e1, weight in e2, ...]
11
+ def initialize n, m, initial_value
12
+ @array = Array.new(n * m, initial_value)
13
+ @n = n
14
+ @m = m
15
+ end
16
+
17
+ #Access value in the cell of i-th row and j-th column
18
+ #e.g. map[i,j]
19
+ def [](i, j)
20
+ @array[j*@n + i]
21
+ end
22
+
23
+ #Store value in the cell of i-th row and j-th column
24
+ #e.g. map[i,j] = value
25
+ def []=(i, j, value)
26
+ @array[j*@n + i] = value
27
+ end
28
+
29
+ def to_a
30
+ @array
31
+ end
32
+
33
+ #Normaly n is the same as m
34
+ def size
35
+ @n
36
+ end
37
+ end
38
+
39
+
40
+ #This module contains classes which can be used to parse a svg command
41
+ #The code is copied from https://github.com/rogerbraun/KVG-Tools
42
+ #Methods for generating sexp or xml outputs are removed
43
+ module KvgParser
44
+ #A Point
45
+ class Point
46
+ attr_accessor :x, :y, :color
47
+
48
+ def initialize(x,y, color = :black)
49
+ @x,@y, @color = x, y, color
50
+ end
51
+
52
+ #Basic point arithmetics
53
+ def +(p2)
54
+ return Point.new(@x + p2.x, @y + p2.y)
55
+ end
56
+
57
+ def -(p2)
58
+ return Point.new(@x - p2.x, @y - p2.y)
59
+ end
60
+
61
+ def dist(p2)
62
+ return Math.sqrt((p2.x - @x)**2 + (p2.y - @y)**2)
63
+ end
64
+
65
+ def *(number)
66
+ return Point.new(@x * number, @y * number)
67
+ end
68
+
69
+ #to array
70
+ def to_a
71
+ [@x.round(2), @y.round(2)]
72
+ end
73
+
74
+ end
75
+
76
+ # SVG_M represents the moveto command.
77
+ # SVG Syntax is:
78
+ # m x y
79
+ # It sets the current cursor to the point (x,y).
80
+ # As always, capitalization denotes absolute values.
81
+ # Takes a Point as argument.
82
+ # If given 2 Points, the second argument is treated as the current cursor.
83
+ class SVG_M
84
+
85
+ def initialize(p1, p2 = Point.new(0,0))
86
+ @p = p1 + p2
87
+ end
88
+
89
+ def to_points
90
+ return []
91
+ end
92
+
93
+ def current_cursor
94
+ return @p
95
+ end
96
+
97
+ end
98
+
99
+ # SVG_C represents the cubic Bézier curveto command.
100
+ # Syntax is:
101
+ # c x1 y1 x2 y2 x y
102
+ # It sets the current cursor to the point (x,y).
103
+ # As always, capitalization denotes absolute values.
104
+ # Takes 4 Points as argument, the fourth being the current cursor
105
+ # If constructed using SVG_C.relative, the current cursor is added to every
106
+ # point.
107
+ class SVG_C
108
+
109
+ def initialize(c1,c2,p,current_cursor)
110
+ @c1,@c2,@p,@current_cursor = c1,c2,p,current_cursor
111
+ @@c_color = :green
112
+ end
113
+
114
+ def SVG_C.relative(c1,c2,p,current_cursor)
115
+ SVG_C.new(c1 + current_cursor, c2 + current_cursor, p + current_cursor, current_cursor)
116
+ end
117
+
118
+ def second_point
119
+ @c2
120
+ end
121
+
122
+ # This implements the algorithm found here:
123
+ # http://www.cubic.org/docs/bezier.htm
124
+ # Takes 2 Points and a factor between 0 and 1
125
+ def linear_interpolation(a,b,factor)
126
+
127
+ xr = a.x + ((b.x - a.x) * factor)
128
+ yr = a.y + ((b.y - a.y) * factor)
129
+
130
+ return Point.new(xr,yr);
131
+
132
+ end
133
+
134
+ def switch_color
135
+ if @@c_color == :green
136
+ @@c_color = :red
137
+ elsif @@c_color == :red
138
+ @@c_color = :purple
139
+ else
140
+ @@c_color = :green
141
+ end
142
+ end
143
+
144
+ def make_curvepoint(factor)
145
+ ab = linear_interpolation(@current_cursor,@c1,factor)
146
+ bc = linear_interpolation(@c1,@c2,factor)
147
+ cd = linear_interpolation(@c2,@p,factor)
148
+
149
+ abbc = linear_interpolation(ab,bc,factor)
150
+ bccd = linear_interpolation(bc,cd,factor)
151
+ return linear_interpolation(abbc,bccd,factor)
152
+ end
153
+
154
+ def length(points)
155
+ old_point = @current_cursor;
156
+ length = 0.0
157
+ factor = points.to_f
158
+
159
+ (1..points).each {|point|
160
+ new_point = make_curvepoint(point/(factor.to_f))
161
+ length += old_point.dist(new_point)
162
+ old_point = new_point
163
+ }
164
+ return length
165
+ end
166
+
167
+ # This gives back an array of points on the curve. The argument given
168
+ # denotes how the distance between each point.
169
+ def make_curvepoint_array(distance)
170
+ result = Array.new
171
+
172
+ l = length(20)
173
+ points = l * distance
174
+ factor = points.to_f
175
+
176
+ (0..points).each {|point|
177
+ result.push(make_curvepoint(point/(factor.to_f)))
178
+ }
179
+
180
+ return result
181
+ end
182
+
183
+
184
+ def to_points
185
+ return make_curvepoint_array(0.3)
186
+ end
187
+
188
+ def current_cursor
189
+ @p
190
+ end
191
+
192
+ end
193
+
194
+ # SVG_S represents the smooth curveto command.
195
+ # Syntax is:
196
+ # s x2 y2 x y
197
+ # It sets the current cursor to the point (x,y).
198
+ # As always, capitalization denotes absolute values.
199
+ # Takes 3 Points as argument, the third being the current cursor
200
+ # If constructed using SVG_S.relative, the current cursor is added to every
201
+ # point.
202
+ class SVG_S < SVG_C
203
+
204
+ def initialize(c2, p, current_cursor,previous_point)
205
+ super(SVG_S.reflect(previous_point,current_cursor), c2, p, current_cursor)
206
+ end
207
+
208
+ # The reflection in this case is rather tricky. Using SVG_C.relative, the
209
+ # offset of current_cursor is added to all the positions (except current_cursor).
210
+ # The reflected point, however is already calculated in absolute values.
211
+ # Because of this, we have to subtract the current_cursor from the reflected
212
+ # point, as it is already added later. I think I got the classes somewhat wrong.
213
+ # Maybe points should get a field whether they are absolute oder relative?
214
+ # Don't know yet. It works now, though!
215
+ def SVG_S.relative(c2, p, current_cursor, previous_point)
216
+ SVG_C.relative(SVG_S.reflect(previous_point,current_cursor) - current_cursor, c2, p, current_cursor)
217
+ end
218
+
219
+ def SVG_S.reflect(p, mirror)
220
+ return mirror + (mirror - p)
221
+ end
222
+
223
+ end
224
+
225
+
226
+ # Stroke represent one stroke, which is a series of SVG commands.
227
+ class Stroke
228
+ COMMANDS = ["M", "C", "c", "s", "S"]
229
+
230
+ def initialize(stroke_as_code)
231
+ @command_list = parse(stroke_as_code)
232
+ end
233
+
234
+ def to_points
235
+ return @command_list.map{|element| element.to_points}.flatten
236
+ end
237
+
238
+ #to array
239
+ #TODO: better implementation using composite pattern
240
+ def to_a
241
+ to_points.map{|point| point.to_a}
242
+ end
243
+
244
+ def split_elements(line)
245
+ # This is magic.
246
+ return line.gsub("-",",-").gsub("s",",s,").gsub("S",",S,").gsub("c",",c,").gsub("C",",C,").gsub("m", "M").gsub("M","M,").gsub("[","").gsub(";",",;,").gsub(",,",",").gsub(" ,", ",").gsub(", ", ",").gsub(" ", ",").split(/,/);
247
+ end
248
+
249
+ def parse(stroke_as_code)
250
+ elements = split_elements(stroke_as_code).delete_if{ |e| e == "" }
251
+ command_list = Array.new
252
+ current_cursor = Point.new(0,0);
253
+
254
+ while elements != [] do
255
+
256
+ case elements.slice!(0)
257
+ when "M"
258
+ x,y = elements.slice!(0..1)
259
+ m = SVG_M.new(Point.new(x.to_f,y.to_f))
260
+ current_cursor = m.current_cursor
261
+ command_list.push(m)
262
+
263
+ when "C"
264
+ x1,y1,x2,y2,x,y = elements.slice!(0..5)
265
+ c = SVG_C.new(Point.new(x1.to_f,y1.to_f), Point.new(x2.to_f,y2.to_f), Point.new(x.to_f,y.to_f), current_cursor)
266
+ current_cursor = c.current_cursor
267
+ command_list.push(c)
268
+
269
+ #handle polybezier
270
+ unless elements.empty? || COMMANDS.include?(elements.first)
271
+ elements.unshift("C")
272
+ end
273
+ when "c"
274
+ x1,y1,x2,y2,x,y = elements.slice!(0..5)
275
+ c = SVG_C.relative(Point.new(x1.to_f,y1.to_f), Point.new(x2.to_f,y2.to_f), Point.new(x.to_f,y.to_f), current_cursor)
276
+ current_cursor = c.current_cursor
277
+ command_list.push(c)
278
+
279
+ #handle polybezier
280
+ unless elements.empty? || COMMANDS.include?(elements.first)
281
+ elements.unshift("c")
282
+ end
283
+
284
+ when "s"
285
+ x2,y2,x,y = elements.slice!(0..3)
286
+ reflected_point = command_list[-1].second_point
287
+ s = SVG_S.relative(Point.new(x2.to_f,y2.to_f), Point.new(x.to_f,y.to_f), current_cursor, reflected_point)
288
+ current_cursor = s.current_cursor
289
+ command_list.push(s)
290
+
291
+ when "S"
292
+ x2,y2,x,y = elements.slice!(0..3)
293
+ reflected_point = command_list[-1].second_point
294
+ s = SVG_S.new(Point.new(x2.to_f,y2.to_f), Point.new(x.to_f,y.to_f), current_cursor,reflected_point)
295
+ current_cursor = s.current_cursor
296
+ command_list.push(s)
297
+
298
+ else
299
+ #print "You should not be here\n"
300
+
301
+ end
302
+
303
+ end
304
+
305
+ return command_list
306
+ end
307
+
308
+ end
309
+ end
310
+ end
@@ -0,0 +1,3 @@
1
+ module KvgCharacterRecognition
2
+ VERSION = "0.1.0"
3
+ end
metadata ADDED
@@ -0,0 +1,152 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: kvg_character_recognition
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Jiayi Zheng
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2016-01-12 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: nokogiri
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: sequel
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: sqlite3
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: bundler
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '1.10'
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '1.10'
69
+ - !ruby/object:Gem::Dependency
70
+ name: rake
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: '10.0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: '10.0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: rspec
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ">="
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
97
+ description: "This gem contains a CJK-character recognition engine using pattern/template
98
+ matching techniques.\n It can recognize stroke-order and stroke-number free handwritten
99
+ character patterns in the format [stroke1, stroke2 ...].\n A stroke is an array
100
+ of points in the format [[x1, y1], [x2, y2], ...].\n KanjiVG data(characters in
101
+ svg format) from https://github.com/KanjiVG/kanjivg/releases are used as templates.\n
102
+ \ "
103
+ email:
104
+ - thebluber@gmail.com
105
+ executables: []
106
+ extensions: []
107
+ extra_rdoc_files: []
108
+ files:
109
+ - ".gitignore"
110
+ - ".rspec"
111
+ - ".travis.yml"
112
+ - Gemfile
113
+ - LICENSE.txt
114
+ - README.md
115
+ - Rakefile
116
+ - bin/console
117
+ - bin/setup
118
+ - kvg_character_recognition.gemspec
119
+ - lib/kvg_character_recognition.rb
120
+ - lib/kvg_character_recognition/database.rb
121
+ - lib/kvg_character_recognition/feature_extractor.rb
122
+ - lib/kvg_character_recognition/preprocessor.rb
123
+ - lib/kvg_character_recognition/recognizer.rb
124
+ - lib/kvg_character_recognition/utils.rb
125
+ - lib/kvg_character_recognition/version.rb
126
+ homepage: https://github.com/thebluber/kvg_character_recognition
127
+ licenses:
128
+ - MIT
129
+ metadata:
130
+ allowed_push_host: https://rubygems.org
131
+ post_install_message:
132
+ rdoc_options: []
133
+ require_paths:
134
+ - lib
135
+ required_ruby_version: !ruby/object:Gem::Requirement
136
+ requirements:
137
+ - - ">="
138
+ - !ruby/object:Gem::Version
139
+ version: '0'
140
+ required_rubygems_version: !ruby/object:Gem::Requirement
141
+ requirements:
142
+ - - ">="
143
+ - !ruby/object:Gem::Version
144
+ version: '0'
145
+ requirements: []
146
+ rubyforge_project:
147
+ rubygems_version: 2.4.5.1
148
+ signing_key:
149
+ specification_version: 4
150
+ summary: CJK-character recognition using template matching techniques and template
151
+ data from KanjiVG project
152
+ test_files: []