komainu 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +5 -0
- data/Gemfile +4 -0
- data/Rakefile +6 -0
- data/komainu.gemspec +23 -0
- data/lib/komainu.rb +4 -0
- data/lib/komainu/levenshtein.rb +51 -0
- data/lib/komainu/search_result.rb +10 -0
- data/lib/komainu/search_results.rb +11 -0
- data/lib/komainu/searches_text.rb +50 -0
- data/lib/komainu/trie_node.rb +20 -0
- data/lib/komainu/version.rb +3 -0
- data/spec/komainu/levenshtein_spec.rb +16 -0
- data/spec/komainu/searches_text_spec.rb +38 -0
- data/spec/komainu/trie_node_spec.rb +28 -0
- data/spec/spec_helper.rb +4 -0
- metadata +75 -0
data/Gemfile
ADDED
data/Rakefile
ADDED
data/komainu.gemspec
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
$:.push File.expand_path("../lib", __FILE__)
|
3
|
+
require "komainu/version"
|
4
|
+
|
5
|
+
Gem::Specification.new do |s|
|
6
|
+
s.name = "komainu"
|
7
|
+
s.version = Komainu::VERSION
|
8
|
+
s.authors = ["Andrew Vos"]
|
9
|
+
s.email = ["andrew.vos@gmail.com"]
|
10
|
+
s.homepage = ""
|
11
|
+
s.summary = %q{}
|
12
|
+
s.description = %q{}
|
13
|
+
|
14
|
+
s.rubyforge_project = "komainu"
|
15
|
+
|
16
|
+
s.files = `git ls-files`.split("\n")
|
17
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
18
|
+
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
19
|
+
s.require_paths = ["lib"]
|
20
|
+
|
21
|
+
s.add_development_dependency "minitest"
|
22
|
+
# s.add_runtime_dependency "rest-client"
|
23
|
+
end
|
data/lib/komainu.rb
ADDED
@@ -0,0 +1,51 @@
|
|
1
|
+
module Komainu
|
2
|
+
class Levenshtein
|
3
|
+
def initialize(words)
|
4
|
+
@trie = TrieNode.new
|
5
|
+
words.each do |word|
|
6
|
+
@trie.insert(word)
|
7
|
+
end
|
8
|
+
end
|
9
|
+
|
10
|
+
def best_match word
|
11
|
+
"Batman"
|
12
|
+
end
|
13
|
+
|
14
|
+
def search word, maximum_distance
|
15
|
+
current_row = (0..word.length).to_a
|
16
|
+
results = {}
|
17
|
+
@trie.children.keys.each do |letter|
|
18
|
+
search_recursive(@trie.children[letter], letter, word, current_row, results, maximum_distance)
|
19
|
+
end
|
20
|
+
results
|
21
|
+
end
|
22
|
+
|
23
|
+
def search_recursive node, letter, word, previous_row, results, maximum_distance
|
24
|
+
columns = word.length + 1
|
25
|
+
current_row = [previous_row.first + 1]
|
26
|
+
|
27
|
+
(1...columns).each do |column|
|
28
|
+
insert_cost = current_row[column - 1] + 1
|
29
|
+
delete_cost = previous_row[column] + 1
|
30
|
+
|
31
|
+
if word[column - 1] != letter
|
32
|
+
replace_cost = previous_row[column - 1] + 1
|
33
|
+
else
|
34
|
+
replace_cost = previous_row[column - 1]
|
35
|
+
end
|
36
|
+
|
37
|
+
current_row << [insert_cost, delete_cost, replace_cost].min
|
38
|
+
end
|
39
|
+
|
40
|
+
if current_row.last <= maximum_distance && node.word
|
41
|
+
results[node.word] = current_row.last
|
42
|
+
end
|
43
|
+
|
44
|
+
if current_row.min <= maximum_distance
|
45
|
+
node.children.keys.each do |letter|
|
46
|
+
search_recursive(node.children[letter], letter, word, current_row, results, maximum_distance)
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
@@ -0,0 +1,50 @@
|
|
1
|
+
require "komainu/levenshtein"
|
2
|
+
require "komainu/search_results"
|
3
|
+
|
4
|
+
module Komainu
|
5
|
+
class SearchesText
|
6
|
+
def initialize data_to_search
|
7
|
+
@data_to_search = data_to_search
|
8
|
+
end
|
9
|
+
|
10
|
+
def search query
|
11
|
+
results = SearchResults.new
|
12
|
+
@data_to_search.each do |name, text|
|
13
|
+
if text_includes_string(text, query)
|
14
|
+
results.items << SearchResult.new(name, text)
|
15
|
+
elsif text_includes_words_from_string(text, query)
|
16
|
+
results.items << SearchResult.new(name, text)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
results.suggestion = calculate_suggestion(query)
|
21
|
+
results
|
22
|
+
end
|
23
|
+
|
24
|
+
private
|
25
|
+
|
26
|
+
def calculate_suggestion(query)
|
27
|
+
words = split_into_words(@data_to_search.values.join(" "))
|
28
|
+
levenshtein = Levenshtein.new(words)
|
29
|
+
suggestion = split_into_words(query).map do |word|
|
30
|
+
matches = levenshtein.search(word, 2)
|
31
|
+
matches.keys.first || word
|
32
|
+
end
|
33
|
+
suggestion.join(" ")
|
34
|
+
end
|
35
|
+
|
36
|
+
def split_into_words(string)
|
37
|
+
string.scan(/\b\w+\b/)
|
38
|
+
end
|
39
|
+
|
40
|
+
def text_includes_string text, string
|
41
|
+
text.downcase.include? string.downcase
|
42
|
+
end
|
43
|
+
|
44
|
+
def text_includes_words_from_string text, string
|
45
|
+
string.split(" ").any? do |word|
|
46
|
+
text_includes_string(text, word)
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
module Komainu
|
2
|
+
class TrieNode
|
3
|
+
attr_accessor :word, :children
|
4
|
+
|
5
|
+
def initialize
|
6
|
+
@children = {}
|
7
|
+
end
|
8
|
+
|
9
|
+
def insert word
|
10
|
+
node = self
|
11
|
+
word.each_char do |letter|
|
12
|
+
unless node.children[letter]
|
13
|
+
node.children[letter] = TrieNode.new
|
14
|
+
end
|
15
|
+
node = node.children[letter]
|
16
|
+
end
|
17
|
+
node.word = word
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,16 @@
|
|
1
|
+
require File.expand_path(File.join(File.dirname(__FILE__), "..", "spec_helper"))
|
2
|
+
require "komainu/levenshtein"
|
3
|
+
|
4
|
+
module Komainu
|
5
|
+
describe Levenshtein do
|
6
|
+
it "finds words with a distance less the maximum distance" do
|
7
|
+
levenshtein = Levenshtein.new(["hello", "there", "good", "sirs"])
|
8
|
+
levenshtein.search("hell", 14).must_equal({
|
9
|
+
"hello" => 1,
|
10
|
+
"there" => 3,
|
11
|
+
"good" => 4,
|
12
|
+
"sirs" => 4
|
13
|
+
})
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
require File.expand_path(File.join(File.dirname(__FILE__), "..", "spec_helper"))
|
2
|
+
require "komainu/searches_text"
|
3
|
+
|
4
|
+
module Komainu
|
5
|
+
describe SearchesText do
|
6
|
+
subject { SearchesText.new(data_to_search) }
|
7
|
+
|
8
|
+
let :data_to_search do
|
9
|
+
{
|
10
|
+
:item1 => "This is some text",
|
11
|
+
:item2 => "Batman has no parents"
|
12
|
+
}
|
13
|
+
end
|
14
|
+
|
15
|
+
it "finds exact matches" do
|
16
|
+
result = subject.search("Batman has")
|
17
|
+
result.items.size.must_equal 1
|
18
|
+
result.items.first.name.must_equal :item2
|
19
|
+
end
|
20
|
+
|
21
|
+
it "finds matches in any case" do
|
22
|
+
result = subject.search("BATMAN has")
|
23
|
+
result.items.size.must_equal 1
|
24
|
+
result.items.first.name.must_equal :item2
|
25
|
+
end
|
26
|
+
|
27
|
+
it "finds matches if the text is not in order" do
|
28
|
+
result = subject.search("has batman")
|
29
|
+
result.items.size.must_equal 1
|
30
|
+
result.items.first.name.must_equal :item2
|
31
|
+
end
|
32
|
+
|
33
|
+
it "suggests an alternate query" do
|
34
|
+
result = subject.search("btman")
|
35
|
+
result.suggestion.must_equal "Batman"
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
require File.expand_path(File.join(File.dirname(__FILE__), "..", "spec_helper"))
|
2
|
+
require "komainu/trie_node"
|
3
|
+
|
4
|
+
module Komainu
|
5
|
+
describe TrieNode do
|
6
|
+
it "stores a word" do
|
7
|
+
trie_node = TrieNode.new
|
8
|
+
trie_node.insert("abc")
|
9
|
+
trie_node.children["a"].wont_be_nil
|
10
|
+
trie_node.children["a"].children["b"].wont_be_nil
|
11
|
+
trie_node.children["a"].children["b"].children["c"].wont_be_nil
|
12
|
+
end
|
13
|
+
|
14
|
+
it "stores multiple words" do
|
15
|
+
trie_node = TrieNode.new
|
16
|
+
trie_node.insert("ac")
|
17
|
+
trie_node.insert("ad")
|
18
|
+
trie_node.children["a"].children["c"].wont_be_nil
|
19
|
+
trie_node.children["a"].children["d"].wont_be_nil
|
20
|
+
end
|
21
|
+
|
22
|
+
it "sets the last node word" do
|
23
|
+
trie_node = TrieNode.new
|
24
|
+
trie_node.insert("ac")
|
25
|
+
trie_node.children["a"].children["c"].word.must_equal "ac"
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
data/spec/spec_helper.rb
ADDED
metadata
ADDED
@@ -0,0 +1,75 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: komainu
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Andrew Vos
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2011-11-22 00:00:00.000000000Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: minitest
|
16
|
+
requirement: &70327818908280 !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '0'
|
22
|
+
type: :development
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: *70327818908280
|
25
|
+
description: ''
|
26
|
+
email:
|
27
|
+
- andrew.vos@gmail.com
|
28
|
+
executables: []
|
29
|
+
extensions: []
|
30
|
+
extra_rdoc_files: []
|
31
|
+
files:
|
32
|
+
- .gitignore
|
33
|
+
- Gemfile
|
34
|
+
- Rakefile
|
35
|
+
- komainu.gemspec
|
36
|
+
- lib/komainu.rb
|
37
|
+
- lib/komainu/levenshtein.rb
|
38
|
+
- lib/komainu/search_result.rb
|
39
|
+
- lib/komainu/search_results.rb
|
40
|
+
- lib/komainu/searches_text.rb
|
41
|
+
- lib/komainu/trie_node.rb
|
42
|
+
- lib/komainu/version.rb
|
43
|
+
- spec/komainu/levenshtein_spec.rb
|
44
|
+
- spec/komainu/searches_text_spec.rb
|
45
|
+
- spec/komainu/trie_node_spec.rb
|
46
|
+
- spec/spec_helper.rb
|
47
|
+
homepage: ''
|
48
|
+
licenses: []
|
49
|
+
post_install_message:
|
50
|
+
rdoc_options: []
|
51
|
+
require_paths:
|
52
|
+
- lib
|
53
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
54
|
+
none: false
|
55
|
+
requirements:
|
56
|
+
- - ! '>='
|
57
|
+
- !ruby/object:Gem::Version
|
58
|
+
version: '0'
|
59
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
60
|
+
none: false
|
61
|
+
requirements:
|
62
|
+
- - ! '>='
|
63
|
+
- !ruby/object:Gem::Version
|
64
|
+
version: '0'
|
65
|
+
requirements: []
|
66
|
+
rubyforge_project: komainu
|
67
|
+
rubygems_version: 1.8.10
|
68
|
+
signing_key:
|
69
|
+
specification_version: 3
|
70
|
+
summary: ''
|
71
|
+
test_files:
|
72
|
+
- spec/komainu/levenshtein_spec.rb
|
73
|
+
- spec/komainu/searches_text_spec.rb
|
74
|
+
- spec/komainu/trie_node_spec.rb
|
75
|
+
- spec/spec_helper.rb
|