komainu 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +5 -0
- data/Gemfile +4 -0
- data/Rakefile +6 -0
- data/komainu.gemspec +23 -0
- data/lib/komainu.rb +4 -0
- data/lib/komainu/levenshtein.rb +51 -0
- data/lib/komainu/search_result.rb +10 -0
- data/lib/komainu/search_results.rb +11 -0
- data/lib/komainu/searches_text.rb +50 -0
- data/lib/komainu/trie_node.rb +20 -0
- data/lib/komainu/version.rb +3 -0
- data/spec/komainu/levenshtein_spec.rb +16 -0
- data/spec/komainu/searches_text_spec.rb +38 -0
- data/spec/komainu/trie_node_spec.rb +28 -0
- data/spec/spec_helper.rb +4 -0
- metadata +75 -0
data/Gemfile
ADDED
data/Rakefile
ADDED
data/komainu.gemspec
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
$:.push File.expand_path("../lib", __FILE__)
|
3
|
+
require "komainu/version"
|
4
|
+
|
5
|
+
Gem::Specification.new do |s|
|
6
|
+
s.name = "komainu"
|
7
|
+
s.version = Komainu::VERSION
|
8
|
+
s.authors = ["Andrew Vos"]
|
9
|
+
s.email = ["andrew.vos@gmail.com"]
|
10
|
+
s.homepage = ""
|
11
|
+
s.summary = %q{}
|
12
|
+
s.description = %q{}
|
13
|
+
|
14
|
+
s.rubyforge_project = "komainu"
|
15
|
+
|
16
|
+
s.files = `git ls-files`.split("\n")
|
17
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
18
|
+
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
19
|
+
s.require_paths = ["lib"]
|
20
|
+
|
21
|
+
s.add_development_dependency "minitest"
|
22
|
+
# s.add_runtime_dependency "rest-client"
|
23
|
+
end
|
data/lib/komainu.rb
ADDED
@@ -0,0 +1,51 @@
|
|
1
|
+
module Komainu
|
2
|
+
class Levenshtein
|
3
|
+
def initialize(words)
|
4
|
+
@trie = TrieNode.new
|
5
|
+
words.each do |word|
|
6
|
+
@trie.insert(word)
|
7
|
+
end
|
8
|
+
end
|
9
|
+
|
10
|
+
def best_match word
|
11
|
+
"Batman"
|
12
|
+
end
|
13
|
+
|
14
|
+
def search word, maximum_distance
|
15
|
+
current_row = (0..word.length).to_a
|
16
|
+
results = {}
|
17
|
+
@trie.children.keys.each do |letter|
|
18
|
+
search_recursive(@trie.children[letter], letter, word, current_row, results, maximum_distance)
|
19
|
+
end
|
20
|
+
results
|
21
|
+
end
|
22
|
+
|
23
|
+
def search_recursive node, letter, word, previous_row, results, maximum_distance
|
24
|
+
columns = word.length + 1
|
25
|
+
current_row = [previous_row.first + 1]
|
26
|
+
|
27
|
+
(1...columns).each do |column|
|
28
|
+
insert_cost = current_row[column - 1] + 1
|
29
|
+
delete_cost = previous_row[column] + 1
|
30
|
+
|
31
|
+
if word[column - 1] != letter
|
32
|
+
replace_cost = previous_row[column - 1] + 1
|
33
|
+
else
|
34
|
+
replace_cost = previous_row[column - 1]
|
35
|
+
end
|
36
|
+
|
37
|
+
current_row << [insert_cost, delete_cost, replace_cost].min
|
38
|
+
end
|
39
|
+
|
40
|
+
if current_row.last <= maximum_distance && node.word
|
41
|
+
results[node.word] = current_row.last
|
42
|
+
end
|
43
|
+
|
44
|
+
if current_row.min <= maximum_distance
|
45
|
+
node.children.keys.each do |letter|
|
46
|
+
search_recursive(node.children[letter], letter, word, current_row, results, maximum_distance)
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
@@ -0,0 +1,50 @@
|
|
1
|
+
require "komainu/levenshtein"
|
2
|
+
require "komainu/search_results"
|
3
|
+
|
4
|
+
module Komainu
|
5
|
+
class SearchesText
|
6
|
+
def initialize data_to_search
|
7
|
+
@data_to_search = data_to_search
|
8
|
+
end
|
9
|
+
|
10
|
+
def search query
|
11
|
+
results = SearchResults.new
|
12
|
+
@data_to_search.each do |name, text|
|
13
|
+
if text_includes_string(text, query)
|
14
|
+
results.items << SearchResult.new(name, text)
|
15
|
+
elsif text_includes_words_from_string(text, query)
|
16
|
+
results.items << SearchResult.new(name, text)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
results.suggestion = calculate_suggestion(query)
|
21
|
+
results
|
22
|
+
end
|
23
|
+
|
24
|
+
private
|
25
|
+
|
26
|
+
def calculate_suggestion(query)
|
27
|
+
words = split_into_words(@data_to_search.values.join(" "))
|
28
|
+
levenshtein = Levenshtein.new(words)
|
29
|
+
suggestion = split_into_words(query).map do |word|
|
30
|
+
matches = levenshtein.search(word, 2)
|
31
|
+
matches.keys.first || word
|
32
|
+
end
|
33
|
+
suggestion.join(" ")
|
34
|
+
end
|
35
|
+
|
36
|
+
def split_into_words(string)
|
37
|
+
string.scan(/\b\w+\b/)
|
38
|
+
end
|
39
|
+
|
40
|
+
def text_includes_string text, string
|
41
|
+
text.downcase.include? string.downcase
|
42
|
+
end
|
43
|
+
|
44
|
+
def text_includes_words_from_string text, string
|
45
|
+
string.split(" ").any? do |word|
|
46
|
+
text_includes_string(text, word)
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
module Komainu
|
2
|
+
class TrieNode
|
3
|
+
attr_accessor :word, :children
|
4
|
+
|
5
|
+
def initialize
|
6
|
+
@children = {}
|
7
|
+
end
|
8
|
+
|
9
|
+
def insert word
|
10
|
+
node = self
|
11
|
+
word.each_char do |letter|
|
12
|
+
unless node.children[letter]
|
13
|
+
node.children[letter] = TrieNode.new
|
14
|
+
end
|
15
|
+
node = node.children[letter]
|
16
|
+
end
|
17
|
+
node.word = word
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,16 @@
|
|
1
|
+
require File.expand_path(File.join(File.dirname(__FILE__), "..", "spec_helper"))
|
2
|
+
require "komainu/levenshtein"
|
3
|
+
|
4
|
+
module Komainu
|
5
|
+
describe Levenshtein do
|
6
|
+
it "finds words with a distance less the maximum distance" do
|
7
|
+
levenshtein = Levenshtein.new(["hello", "there", "good", "sirs"])
|
8
|
+
levenshtein.search("hell", 14).must_equal({
|
9
|
+
"hello" => 1,
|
10
|
+
"there" => 3,
|
11
|
+
"good" => 4,
|
12
|
+
"sirs" => 4
|
13
|
+
})
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
require File.expand_path(File.join(File.dirname(__FILE__), "..", "spec_helper"))
|
2
|
+
require "komainu/searches_text"
|
3
|
+
|
4
|
+
module Komainu
|
5
|
+
describe SearchesText do
|
6
|
+
subject { SearchesText.new(data_to_search) }
|
7
|
+
|
8
|
+
let :data_to_search do
|
9
|
+
{
|
10
|
+
:item1 => "This is some text",
|
11
|
+
:item2 => "Batman has no parents"
|
12
|
+
}
|
13
|
+
end
|
14
|
+
|
15
|
+
it "finds exact matches" do
|
16
|
+
result = subject.search("Batman has")
|
17
|
+
result.items.size.must_equal 1
|
18
|
+
result.items.first.name.must_equal :item2
|
19
|
+
end
|
20
|
+
|
21
|
+
it "finds matches in any case" do
|
22
|
+
result = subject.search("BATMAN has")
|
23
|
+
result.items.size.must_equal 1
|
24
|
+
result.items.first.name.must_equal :item2
|
25
|
+
end
|
26
|
+
|
27
|
+
it "finds matches if the text is not in order" do
|
28
|
+
result = subject.search("has batman")
|
29
|
+
result.items.size.must_equal 1
|
30
|
+
result.items.first.name.must_equal :item2
|
31
|
+
end
|
32
|
+
|
33
|
+
it "suggests an alternate query" do
|
34
|
+
result = subject.search("btman")
|
35
|
+
result.suggestion.must_equal "Batman"
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
require File.expand_path(File.join(File.dirname(__FILE__), "..", "spec_helper"))
|
2
|
+
require "komainu/trie_node"
|
3
|
+
|
4
|
+
module Komainu
|
5
|
+
describe TrieNode do
|
6
|
+
it "stores a word" do
|
7
|
+
trie_node = TrieNode.new
|
8
|
+
trie_node.insert("abc")
|
9
|
+
trie_node.children["a"].wont_be_nil
|
10
|
+
trie_node.children["a"].children["b"].wont_be_nil
|
11
|
+
trie_node.children["a"].children["b"].children["c"].wont_be_nil
|
12
|
+
end
|
13
|
+
|
14
|
+
it "stores multiple words" do
|
15
|
+
trie_node = TrieNode.new
|
16
|
+
trie_node.insert("ac")
|
17
|
+
trie_node.insert("ad")
|
18
|
+
trie_node.children["a"].children["c"].wont_be_nil
|
19
|
+
trie_node.children["a"].children["d"].wont_be_nil
|
20
|
+
end
|
21
|
+
|
22
|
+
it "sets the last node word" do
|
23
|
+
trie_node = TrieNode.new
|
24
|
+
trie_node.insert("ac")
|
25
|
+
trie_node.children["a"].children["c"].word.must_equal "ac"
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
data/spec/spec_helper.rb
ADDED
metadata
ADDED
@@ -0,0 +1,75 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: komainu
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Andrew Vos
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2011-11-22 00:00:00.000000000Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: minitest
|
16
|
+
requirement: &70327818908280 !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '0'
|
22
|
+
type: :development
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: *70327818908280
|
25
|
+
description: ''
|
26
|
+
email:
|
27
|
+
- andrew.vos@gmail.com
|
28
|
+
executables: []
|
29
|
+
extensions: []
|
30
|
+
extra_rdoc_files: []
|
31
|
+
files:
|
32
|
+
- .gitignore
|
33
|
+
- Gemfile
|
34
|
+
- Rakefile
|
35
|
+
- komainu.gemspec
|
36
|
+
- lib/komainu.rb
|
37
|
+
- lib/komainu/levenshtein.rb
|
38
|
+
- lib/komainu/search_result.rb
|
39
|
+
- lib/komainu/search_results.rb
|
40
|
+
- lib/komainu/searches_text.rb
|
41
|
+
- lib/komainu/trie_node.rb
|
42
|
+
- lib/komainu/version.rb
|
43
|
+
- spec/komainu/levenshtein_spec.rb
|
44
|
+
- spec/komainu/searches_text_spec.rb
|
45
|
+
- spec/komainu/trie_node_spec.rb
|
46
|
+
- spec/spec_helper.rb
|
47
|
+
homepage: ''
|
48
|
+
licenses: []
|
49
|
+
post_install_message:
|
50
|
+
rdoc_options: []
|
51
|
+
require_paths:
|
52
|
+
- lib
|
53
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
54
|
+
none: false
|
55
|
+
requirements:
|
56
|
+
- - ! '>='
|
57
|
+
- !ruby/object:Gem::Version
|
58
|
+
version: '0'
|
59
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
60
|
+
none: false
|
61
|
+
requirements:
|
62
|
+
- - ! '>='
|
63
|
+
- !ruby/object:Gem::Version
|
64
|
+
version: '0'
|
65
|
+
requirements: []
|
66
|
+
rubyforge_project: komainu
|
67
|
+
rubygems_version: 1.8.10
|
68
|
+
signing_key:
|
69
|
+
specification_version: 3
|
70
|
+
summary: ''
|
71
|
+
test_files:
|
72
|
+
- spec/komainu/levenshtein_spec.rb
|
73
|
+
- spec/komainu/searches_text_spec.rb
|
74
|
+
- spec/komainu/trie_node_spec.rb
|
75
|
+
- spec/spec_helper.rb
|