komainu 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gitignore ADDED
@@ -0,0 +1,5 @@
1
+ *.gem
2
+ .bundle
3
+ Gemfile.lock
4
+ pkg/*
5
+ *.swp
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source "http://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in komainu.gemspec
4
+ gemspec
data/Rakefile ADDED
@@ -0,0 +1,6 @@
1
+ require "bundler/gem_tasks"
2
+
3
+ require 'rake/testtask'
4
+ Rake::TestTask.new(:spec) do |task|
5
+ task.test_files = FileList['spec/**/*_spec.rb']
6
+ end
data/komainu.gemspec ADDED
@@ -0,0 +1,23 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require "komainu/version"
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = "komainu"
7
+ s.version = Komainu::VERSION
8
+ s.authors = ["Andrew Vos"]
9
+ s.email = ["andrew.vos@gmail.com"]
10
+ s.homepage = ""
11
+ s.summary = %q{}
12
+ s.description = %q{}
13
+
14
+ s.rubyforge_project = "komainu"
15
+
16
+ s.files = `git ls-files`.split("\n")
17
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
18
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
19
+ s.require_paths = ["lib"]
20
+
21
+ s.add_development_dependency "minitest"
22
+ # s.add_runtime_dependency "rest-client"
23
+ end
data/lib/komainu.rb ADDED
@@ -0,0 +1,4 @@
1
+ require "komainu/version"
2
+
3
+ module Komainu
4
+ end
@@ -0,0 +1,51 @@
1
+ module Komainu
2
+ class Levenshtein
3
+ def initialize(words)
4
+ @trie = TrieNode.new
5
+ words.each do |word|
6
+ @trie.insert(word)
7
+ end
8
+ end
9
+
10
+ def best_match word
11
+ "Batman"
12
+ end
13
+
14
+ def search word, maximum_distance
15
+ current_row = (0..word.length).to_a
16
+ results = {}
17
+ @trie.children.keys.each do |letter|
18
+ search_recursive(@trie.children[letter], letter, word, current_row, results, maximum_distance)
19
+ end
20
+ results
21
+ end
22
+
23
+ def search_recursive node, letter, word, previous_row, results, maximum_distance
24
+ columns = word.length + 1
25
+ current_row = [previous_row.first + 1]
26
+
27
+ (1...columns).each do |column|
28
+ insert_cost = current_row[column - 1] + 1
29
+ delete_cost = previous_row[column] + 1
30
+
31
+ if word[column - 1] != letter
32
+ replace_cost = previous_row[column - 1] + 1
33
+ else
34
+ replace_cost = previous_row[column - 1]
35
+ end
36
+
37
+ current_row << [insert_cost, delete_cost, replace_cost].min
38
+ end
39
+
40
+ if current_row.last <= maximum_distance && node.word
41
+ results[node.word] = current_row.last
42
+ end
43
+
44
+ if current_row.min <= maximum_distance
45
+ node.children.keys.each do |letter|
46
+ search_recursive(node.children[letter], letter, word, current_row, results, maximum_distance)
47
+ end
48
+ end
49
+ end
50
+ end
51
+ end
@@ -0,0 +1,10 @@
1
+ module Komainu
2
+ class SearchResult
3
+ attr_accessor :name, :text
4
+
5
+ def initialize name, text
6
+ @name = name
7
+ @text = text
8
+ end
9
+ end
10
+ end
@@ -0,0 +1,11 @@
1
+ require "komainu/search_result"
2
+
3
+ module Komainu
4
+ class SearchResults
5
+ attr_accessor :items, :suggestion
6
+
7
+ def initialize
8
+ @items = []
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,50 @@
1
+ require "komainu/levenshtein"
2
+ require "komainu/search_results"
3
+
4
+ module Komainu
5
+ class SearchesText
6
+ def initialize data_to_search
7
+ @data_to_search = data_to_search
8
+ end
9
+
10
+ def search query
11
+ results = SearchResults.new
12
+ @data_to_search.each do |name, text|
13
+ if text_includes_string(text, query)
14
+ results.items << SearchResult.new(name, text)
15
+ elsif text_includes_words_from_string(text, query)
16
+ results.items << SearchResult.new(name, text)
17
+ end
18
+ end
19
+
20
+ results.suggestion = calculate_suggestion(query)
21
+ results
22
+ end
23
+
24
+ private
25
+
26
+ def calculate_suggestion(query)
27
+ words = split_into_words(@data_to_search.values.join(" "))
28
+ levenshtein = Levenshtein.new(words)
29
+ suggestion = split_into_words(query).map do |word|
30
+ matches = levenshtein.search(word, 2)
31
+ matches.keys.first || word
32
+ end
33
+ suggestion.join(" ")
34
+ end
35
+
36
+ def split_into_words(string)
37
+ string.scan(/\b\w+\b/)
38
+ end
39
+
40
+ def text_includes_string text, string
41
+ text.downcase.include? string.downcase
42
+ end
43
+
44
+ def text_includes_words_from_string text, string
45
+ string.split(" ").any? do |word|
46
+ text_includes_string(text, word)
47
+ end
48
+ end
49
+ end
50
+ end
@@ -0,0 +1,20 @@
1
+ module Komainu
2
+ class TrieNode
3
+ attr_accessor :word, :children
4
+
5
+ def initialize
6
+ @children = {}
7
+ end
8
+
9
+ def insert word
10
+ node = self
11
+ word.each_char do |letter|
12
+ unless node.children[letter]
13
+ node.children[letter] = TrieNode.new
14
+ end
15
+ node = node.children[letter]
16
+ end
17
+ node.word = word
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,3 @@
1
+ module Komainu
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,16 @@
1
+ require File.expand_path(File.join(File.dirname(__FILE__), "..", "spec_helper"))
2
+ require "komainu/levenshtein"
3
+
4
+ module Komainu
5
+ describe Levenshtein do
6
+ it "finds words with a distance less the maximum distance" do
7
+ levenshtein = Levenshtein.new(["hello", "there", "good", "sirs"])
8
+ levenshtein.search("hell", 14).must_equal({
9
+ "hello" => 1,
10
+ "there" => 3,
11
+ "good" => 4,
12
+ "sirs" => 4
13
+ })
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,38 @@
1
+ require File.expand_path(File.join(File.dirname(__FILE__), "..", "spec_helper"))
2
+ require "komainu/searches_text"
3
+
4
+ module Komainu
5
+ describe SearchesText do
6
+ subject { SearchesText.new(data_to_search) }
7
+
8
+ let :data_to_search do
9
+ {
10
+ :item1 => "This is some text",
11
+ :item2 => "Batman has no parents"
12
+ }
13
+ end
14
+
15
+ it "finds exact matches" do
16
+ result = subject.search("Batman has")
17
+ result.items.size.must_equal 1
18
+ result.items.first.name.must_equal :item2
19
+ end
20
+
21
+ it "finds matches in any case" do
22
+ result = subject.search("BATMAN has")
23
+ result.items.size.must_equal 1
24
+ result.items.first.name.must_equal :item2
25
+ end
26
+
27
+ it "finds matches if the text is not in order" do
28
+ result = subject.search("has batman")
29
+ result.items.size.must_equal 1
30
+ result.items.first.name.must_equal :item2
31
+ end
32
+
33
+ it "suggests an alternate query" do
34
+ result = subject.search("btman")
35
+ result.suggestion.must_equal "Batman"
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,28 @@
1
+ require File.expand_path(File.join(File.dirname(__FILE__), "..", "spec_helper"))
2
+ require "komainu/trie_node"
3
+
4
+ module Komainu
5
+ describe TrieNode do
6
+ it "stores a word" do
7
+ trie_node = TrieNode.new
8
+ trie_node.insert("abc")
9
+ trie_node.children["a"].wont_be_nil
10
+ trie_node.children["a"].children["b"].wont_be_nil
11
+ trie_node.children["a"].children["b"].children["c"].wont_be_nil
12
+ end
13
+
14
+ it "stores multiple words" do
15
+ trie_node = TrieNode.new
16
+ trie_node.insert("ac")
17
+ trie_node.insert("ad")
18
+ trie_node.children["a"].children["c"].wont_be_nil
19
+ trie_node.children["a"].children["d"].wont_be_nil
20
+ end
21
+
22
+ it "sets the last node word" do
23
+ trie_node = TrieNode.new
24
+ trie_node.insert("ac")
25
+ trie_node.children["a"].children["c"].word.must_equal "ac"
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,4 @@
1
+ $: << File.expand_path(File.join(File.dirname(__FILE__), "..", "lib"))
2
+ require "minitest/pride"
3
+ require "minitest/autorun"
4
+ require "minitest/spec"
metadata ADDED
@@ -0,0 +1,75 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: komainu
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Andrew Vos
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2011-11-22 00:00:00.000000000Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: minitest
16
+ requirement: &70327818908280 !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :development
23
+ prerelease: false
24
+ version_requirements: *70327818908280
25
+ description: ''
26
+ email:
27
+ - andrew.vos@gmail.com
28
+ executables: []
29
+ extensions: []
30
+ extra_rdoc_files: []
31
+ files:
32
+ - .gitignore
33
+ - Gemfile
34
+ - Rakefile
35
+ - komainu.gemspec
36
+ - lib/komainu.rb
37
+ - lib/komainu/levenshtein.rb
38
+ - lib/komainu/search_result.rb
39
+ - lib/komainu/search_results.rb
40
+ - lib/komainu/searches_text.rb
41
+ - lib/komainu/trie_node.rb
42
+ - lib/komainu/version.rb
43
+ - spec/komainu/levenshtein_spec.rb
44
+ - spec/komainu/searches_text_spec.rb
45
+ - spec/komainu/trie_node_spec.rb
46
+ - spec/spec_helper.rb
47
+ homepage: ''
48
+ licenses: []
49
+ post_install_message:
50
+ rdoc_options: []
51
+ require_paths:
52
+ - lib
53
+ required_ruby_version: !ruby/object:Gem::Requirement
54
+ none: false
55
+ requirements:
56
+ - - ! '>='
57
+ - !ruby/object:Gem::Version
58
+ version: '0'
59
+ required_rubygems_version: !ruby/object:Gem::Requirement
60
+ none: false
61
+ requirements:
62
+ - - ! '>='
63
+ - !ruby/object:Gem::Version
64
+ version: '0'
65
+ requirements: []
66
+ rubyforge_project: komainu
67
+ rubygems_version: 1.8.10
68
+ signing_key:
69
+ specification_version: 3
70
+ summary: ''
71
+ test_files:
72
+ - spec/komainu/levenshtein_spec.rb
73
+ - spec/komainu/searches_text_spec.rb
74
+ - spec/komainu/trie_node_spec.rb
75
+ - spec/spec_helper.rb