hintable_levenshtein 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,35 @@
1
+ = Hintable Levenshtein
2
+
3
+ Levenshtein distances but with extra hints. Perhaps adding or deleting a space is not as big as a change as other things, or
4
+ substituting a 'c' for a 'k' is again a cheaper operation than just any arbitrary change.
5
+
6
+ Just an example
7
+
8
+ english_rules = [
9
+ HintableLevenshtein::RuleSet.new(0.3, HintableLevenshtein::Rule.insert(/[\.,!]/)),
10
+ HintableLevenshtein::RuleSet.new(0.3, HintableLevenshtein::Rule.delete(/[\.,!]/)),
11
+ HintableLevenshtein::RuleSet.new(0.4, HintableLevenshtein::Rule.substitute('!' => '.')),
12
+ HintableLevenshtein::RuleSet.new(0.4, HintableLevenshtein::Rule.substitute('!' => ',')),
13
+ HintableLevenshtein::RuleSet.new(0.75, HintableLevenshtein::Rule.insert(' '), HintableLevenshtein::Rule.insert(' ')),
14
+ HintableLevenshtein::RuleSet.new(0.5, HintableLevenshtein::Rule.insert(' ')),
15
+ HintableLevenshtein::RuleSet.new(0.5, HintableLevenshtein::Rule.delete(' ')),
16
+ HintableLevenshtein::RuleSet.new(0.7, HintableLevenshtein::Rule.substitute('z' => 's')),
17
+ HintableLevenshtein::RuleSet.new(0.7, HintableLevenshtein::Rule.substitute('k' => 'c')),
18
+ HintableLevenshtein::RuleSet.new(0.7, HintableLevenshtein::Rule.substitute('u' => 'o')),
19
+ HintableLevenshtein::RuleSet.new(0.7, HintableLevenshtein::Rule.substitute('e' => 'a')),
20
+ HintableLevenshtein::RuleSet.new(0.7, HintableLevenshtein::Rule.substitute('i' => 'y')),
21
+ HintableLevenshtein::RuleSet.new(1, HintableLevenshtein::Rule.delete),
22
+ HintableLevenshtein::RuleSet.new(1, HintableLevenshtein::Rule.insert),
23
+ HintableLevenshtein::RuleSet.new(1, HintableLevenshtein::Rule.substitute)
24
+ ]
25
+
26
+ a = "hello kitten pizza!!"
27
+ b = "hello cittin pissssa.."
28
+
29
+ puts "normal levenshtein: #{HintableLevenshtein.new.distance(a, b)}"
30
+ puts "hinted levenshtein: #{HintableLevenshtein.new(english_rules).distance(a, b)}"
31
+
32
+ Would output:
33
+
34
+ normal levenshtein: 11.0
35
+ hinted levenshtein: 7.15
@@ -0,0 +1,48 @@
1
+ libdir = File.expand_path("lib")
2
+ $:.unshift(libdir) unless $:.include?(libdir)
3
+
4
+ require 'hintable_levenshtein'
5
+
6
+ begin
7
+ require 'jeweler'
8
+ Jeweler::Tasks.new do |s|
9
+ s.name = "hintable_levenshtein"
10
+ s.description = s.summary = "Levenshtein with hints"
11
+ s.email = "joshbuddy@gmail.com"
12
+ s.homepage = "http://github.com/joshbuddy/hintable_levenshtein"
13
+ s.authors = ["Joshua Hull"]
14
+ s.files = FileList["[A-Z]*", "{lib,spec}/**/*"]
15
+ s.rubyforge_project = 'joshbuddy-hintable_levenshtein'
16
+ end
17
+ Jeweler::GemcutterTasks.new
18
+ rescue LoadError
19
+ puts "Jeweler not available. Install it with: sudo gem install technicalpickles-jeweler -s http://gems.github.com"
20
+ end
21
+
22
+ require 'spec'
23
+ require 'spec/rake/spectask'
24
+ task :spec => 'spec:all'
25
+ namespace(:spec) do
26
+ Spec::Rake::SpecTask.new(:all) do |t|
27
+ t.spec_opts ||= []
28
+ t.spec_opts << "-rubygems"
29
+ t.spec_opts << "--options" << "spec/spec.opts"
30
+ t.spec_files = FileList['spec/**/*_spec.rb']
31
+ end
32
+
33
+ end
34
+
35
+ desc "Run all examples with RCov"
36
+ Spec::Rake::SpecTask.new('spec_with_rcov') do |t|
37
+ t.spec_files = FileList['spec/**/*.rb']
38
+ t.rcov = true
39
+ t.rcov_opts = ['--exclude', 'spec']
40
+ end
41
+
42
+ require 'rake/rdoctask'
43
+ desc "Generate documentation"
44
+ Rake::RDocTask.new do |rd|
45
+ rd.main = "README.rdoc"
46
+ rd.rdoc_files.include("README.rdoc", "lib/**/*.rb")
47
+ rd.rdoc_dir = 'rdoc'
48
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.0.2
@@ -0,0 +1,129 @@
1
+ class HintableLevenshtein
2
+
3
+ autoload :Event, File.join(File.dirname(__FILE__), 'hintable_levenshtein', 'event')
4
+ autoload :RuleSet, File.join(File.dirname(__FILE__), 'hintable_levenshtein', 'rule_set')
5
+
6
+ def initialize(new_rule_set = nil, &block)
7
+ @rule_set = []
8
+
9
+ instance_eval(&block) if block
10
+
11
+ if new_rule_set
12
+ rule_set.concat(new_rule_set)
13
+ end
14
+
15
+ if rule_set.empty?
16
+ rule_set << RuleSet.new(1, Rule.delete)
17
+ rule_set << RuleSet.new(1, Rule.insert)
18
+ rule_set << RuleSet.new(1, Rule.substitute)
19
+ end
20
+ end
21
+
22
+ def rule_set_sizes
23
+ @rule_set_sizes ||= rule_set.collect{|r| r.rules.size}.uniq.sort.reverse
24
+ end
25
+
26
+ def largest_rule_size
27
+ rule_set_sizes.first
28
+ end
29
+
30
+ def delete(score, match)
31
+ rule_set << RuleSet.new(score, Rule.delete(match))
32
+ end
33
+
34
+ def insert(score, match)
35
+ rule_set << RuleSet.new(score, Rule.insert(match))
36
+ end
37
+
38
+ def substitute(score, match)
39
+ rule_set << RuleSet.new(score, Rule.substitute(match))
40
+ end
41
+
42
+ Position = Struct.new(:x, :y)
43
+
44
+ def distance(s, t)
45
+ matrix = levenshtein_matrix(s, t)
46
+ steps = calculate_steps(s, t, matrix)
47
+ calculate_score(steps)
48
+ end
49
+
50
+ private
51
+
52
+ attr_reader :rule_set
53
+
54
+ def levenshtein_matrix(s, t)
55
+ d = Array.new(s.size + 1) {|i| Array.new(t.size + 1)}
56
+
57
+ (0..s.size).each do |m|
58
+ (0..t.size).each do |n|
59
+ if m * n == 0
60
+ d[m][n] = m + n
61
+ else
62
+ d[m][n] = [
63
+ d[m-1][n] + 1,
64
+ d[m][n-1] + 1,
65
+ d[m-1][n-1] + (s[m-1] == t[n-1] ? 0 : 1)
66
+ ].min
67
+ end
68
+ end
69
+ end
70
+
71
+ d
72
+ end
73
+
74
+ def calculate_steps(s, t, matrix)
75
+ position = Position.new(s.size, t.size)
76
+ value = matrix[position.x][position.y]
77
+ steps = []
78
+ while matrix[position.x][position.y] != 0
79
+ previous_position = position.dup
80
+ if position.x == 0
81
+ previous.y -= 1
82
+ elsif position.y == 0
83
+ previous.x -= 1
84
+ else
85
+ possible_values = [matrix[position.x - 1][position.y], matrix[position.x][position.y - 1], matrix[position.x - 1][position.y - 1]]
86
+ case possible_values.min
87
+ when possible_values[2]
88
+ position.x -= 1
89
+ position.y -= 1
90
+ when possible_values[1]
91
+ position.y -= 1
92
+ when possible_values[0]
93
+ position.x -= 1
94
+ end
95
+ end
96
+
97
+ next unless value != matrix[position.x][position.y]
98
+
99
+ value = matrix[position.x][position.y]
100
+ steps << if previous_position.x == position.x + 1 && previous_position.y == position.y + 1
101
+ Event.new(:substitute, s[position.x] => t[position.y])
102
+ elsif previous_position.x == (position.x + 1)
103
+ Event.new(:delete, s[position.x].chr)
104
+ else
105
+ Event.new(:insert, t[position.y].chr)
106
+ end
107
+ end
108
+ steps
109
+ end
110
+
111
+ def calculate_score(steps)
112
+ score = 0.0
113
+ rule_set_sizes.each do |set_size|
114
+ rule_set.select{|r| r.rules.size == set_size}.each do |rule|
115
+ (0..(steps.size - set_size)).each do |offset|
116
+ analyzed_set = steps[offset..(offset + set_size - 1)]
117
+ unless analyzed_set.any?{|s| s.nil?}
118
+ if rule.match?(analyzed_set)
119
+ steps.fill(nil, offset, set_size)
120
+ score += rule.score
121
+ end
122
+ end
123
+ end
124
+ end
125
+ end
126
+ score
127
+ end
128
+
129
+ end
@@ -0,0 +1,31 @@
1
+ class HintableLevenshtein
2
+ class Event
3
+
4
+ attr_reader :type, :chr, :from, :to
5
+
6
+ def initialize(type, chr = nil)
7
+ @type = type
8
+ case type
9
+ when :substitute
10
+ unless chr.nil?
11
+ @from = chr.keys.first
12
+ @from = @from.chr unless @from.is_a?(String)
13
+ @to = chr.values.first
14
+ @to = @to.chr unless @to.is_a?(String)
15
+ else
16
+ @from = @to = nil
17
+ end
18
+ else
19
+ @chr = chr.is_a?(String) ? chr : chr.chr
20
+ end
21
+ end
22
+
23
+ def to_s
24
+ if from
25
+ "#{type} -> #{from}..#{to}"
26
+ else
27
+ "#{type} -> #{chr}"
28
+ end
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,70 @@
1
+ class HintableLevenshtein
2
+ class RuleSet
3
+
4
+ include Comparable
5
+
6
+ attr_reader :rules, :score
7
+ def initialize(score, *rules)
8
+ @rules = rules
9
+ @score = score
10
+ end
11
+
12
+ def to_s
13
+ "#{score} -> #{rules.collect{|r| r.to_s} * ', '}"
14
+ end
15
+
16
+ def match?(events)
17
+ return if events.size != rules.size
18
+ events.each_with_index do |e, idx|
19
+ return unless rules[idx].match?(e)
20
+ end
21
+ true
22
+ end
23
+ end
24
+
25
+ class Rule
26
+ def self.delete(matcher = nil)
27
+ new(:delete, matcher)
28
+ end
29
+
30
+ def self.insert(matcher = nil)
31
+ new(:insert, matcher)
32
+ end
33
+
34
+ def self.substitute(matcher = {})
35
+ new(:substitute, matcher)
36
+ end
37
+
38
+ attr_reader :type, :matcher
39
+
40
+ def match?(event)
41
+ if event.type == type
42
+ case type
43
+ when :delete, :insert
44
+ ret = case matcher
45
+ when nil
46
+ true
47
+ when String
48
+ !matcher.index(event.chr).nil?
49
+ when Array
50
+ matcher.include?(event.chr)
51
+ else
52
+ matcher === event.chr
53
+ end
54
+ when :substitute
55
+ matcher.empty? || matcher[event.from] == event.to || matcher[event.to] == event.from
56
+ end
57
+ end
58
+ end
59
+
60
+ def to_s
61
+ "#{type} #{matcher.inspect}"
62
+ end
63
+
64
+ private
65
+ def initialize(type, matcher)
66
+ @type = type
67
+ @matcher = matcher
68
+ end
69
+ end
70
+ end
@@ -0,0 +1,30 @@
1
+ require 'lib/hintable_levenshtein'
2
+
3
+ describe HintableLevenshtein do
4
+ it "should do this thing" do
5
+ english_rules = [
6
+ HintableLevenshtein::RuleSet.new(0.3, HintableLevenshtein::Rule.insert(/[\.,!]/)),
7
+ HintableLevenshtein::RuleSet.new(0.3, HintableLevenshtein::Rule.delete(/[\.,!]/)),
8
+ HintableLevenshtein::RuleSet.new(0.4, HintableLevenshtein::Rule.substitute('!' => '.')),
9
+ HintableLevenshtein::RuleSet.new(0.4, HintableLevenshtein::Rule.substitute('!' => ',')),
10
+ HintableLevenshtein::RuleSet.new(0.75, HintableLevenshtein::Rule.insert(' '), HintableLevenshtein::Rule.insert(' ')),
11
+ HintableLevenshtein::RuleSet.new(0.5, HintableLevenshtein::Rule.insert(' ')),
12
+ HintableLevenshtein::RuleSet.new(0.5, HintableLevenshtein::Rule.delete(' ')),
13
+ HintableLevenshtein::RuleSet.new(0.7, HintableLevenshtein::Rule.substitute('z' => 's')),
14
+ HintableLevenshtein::RuleSet.new(0.7, HintableLevenshtein::Rule.substitute('k' => 'c')),
15
+ HintableLevenshtein::RuleSet.new(0.7, HintableLevenshtein::Rule.substitute('u' => 'o')),
16
+ HintableLevenshtein::RuleSet.new(0.7, HintableLevenshtein::Rule.substitute('e' => 'a')),
17
+ HintableLevenshtein::RuleSet.new(0.7, HintableLevenshtein::Rule.substitute('i' => 'y')),
18
+ HintableLevenshtein::RuleSet.new(1, HintableLevenshtein::Rule.delete),
19
+ HintableLevenshtein::RuleSet.new(1, HintableLevenshtein::Rule.insert),
20
+ HintableLevenshtein::RuleSet.new(1, HintableLevenshtein::Rule.substitute)
21
+ ]
22
+
23
+ a = "hello kitten pizza!!"
24
+ b = "hello cittin pissssa.."
25
+
26
+ HintableLevenshtein.new.distance(a, b).should == 11
27
+ HintableLevenshtein.new(english_rules).distance(a, b).should == 7.15
28
+
29
+ end
30
+ end
@@ -0,0 +1,7 @@
1
+ --colour
2
+ --format
3
+ specdoc
4
+ --loadby
5
+ mtime
6
+ --reverse
7
+ --backtrace
metadata ADDED
@@ -0,0 +1,62 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: hintable_levenshtein
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.2
5
+ platform: ruby
6
+ authors:
7
+ - Joshua Hull
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2009-12-18 00:00:00 -05:00
13
+ default_executable:
14
+ dependencies: []
15
+
16
+ description: Levenshtein with hints
17
+ email: joshbuddy@gmail.com
18
+ executables: []
19
+
20
+ extensions: []
21
+
22
+ extra_rdoc_files:
23
+ - README.rdoc
24
+ files:
25
+ - README.rdoc
26
+ - Rakefile
27
+ - VERSION
28
+ - lib/hintable_levenshtein.rb
29
+ - lib/hintable_levenshtein/event.rb
30
+ - lib/hintable_levenshtein/rule_set.rb
31
+ - spec/hl_spec.rb
32
+ - spec/spec.opts
33
+ has_rdoc: true
34
+ homepage: http://github.com/joshbuddy/hintable_levenshtein
35
+ licenses: []
36
+
37
+ post_install_message:
38
+ rdoc_options:
39
+ - --charset=UTF-8
40
+ require_paths:
41
+ - lib
42
+ required_ruby_version: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - ">="
45
+ - !ruby/object:Gem::Version
46
+ version: "0"
47
+ version:
48
+ required_rubygems_version: !ruby/object:Gem::Requirement
49
+ requirements:
50
+ - - ">="
51
+ - !ruby/object:Gem::Version
52
+ version: "0"
53
+ version:
54
+ requirements: []
55
+
56
+ rubyforge_project: joshbuddy-hintable_levenshtein
57
+ rubygems_version: 1.3.5
58
+ signing_key:
59
+ specification_version: 3
60
+ summary: Levenshtein with hints
61
+ test_files:
62
+ - spec/hl_spec.rb