wordfinder 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/word_finder.rb +5 -33
- data/lib/word_finder/checker.rb +24 -0
- data/lib/word_finder/node.rb +65 -0
- data/lib/word_finder/version.rb +1 -1
- data/spec/wordfinder_spec.rb +8 -0
- metadata +4 -2
data/lib/word_finder.rb
CHANGED
@@ -5,9 +5,9 @@
|
|
5
5
|
# See LICENSE file for details.
|
6
6
|
##
|
7
7
|
|
8
|
-
require "ffi"
|
9
|
-
require "ffi/aspell"
|
10
8
|
require "word_finder/version"
|
9
|
+
require "word_finder/checker"
|
10
|
+
require "word_finder/node"
|
11
11
|
|
12
12
|
module WordFinder
|
13
13
|
class << self
|
@@ -24,37 +24,9 @@ module WordFinder
|
|
24
24
|
# @return [Array<String>]
|
25
25
|
# an Array containing all words found, in order
|
26
26
|
def words_in(phrase, options = {})
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
unmatched = ""
|
31
|
-
|
32
|
-
phrase.each_char do |c|
|
33
|
-
unmatched << c
|
34
|
-
|
35
|
-
tmp, matched = matched, []
|
36
|
-
|
37
|
-
if tmp.empty?
|
38
|
-
if speller.correct?(unmatched)
|
39
|
-
matched.push(unmatched)
|
40
|
-
unmatched = ""
|
41
|
-
end
|
42
|
-
else
|
43
|
-
(0..tmp.length).each do |i|
|
44
|
-
word = tmp[i..-1].join + unmatched
|
45
|
-
|
46
|
-
if speller.correct?(word)
|
47
|
-
matched.push(word)
|
48
|
-
unmatched = ""
|
49
|
-
break
|
50
|
-
elsif tmp[i]
|
51
|
-
matched << tmp[i]
|
52
|
-
end
|
53
|
-
end
|
54
|
-
end
|
55
|
-
end
|
56
|
-
|
57
|
-
matched.reject{|m| m !~ /\w/} # remove punctuation marks
|
27
|
+
Node.new(options).tap{|node|
|
28
|
+
phrase.each_char(&node.method(:insert))
|
29
|
+
}.words.reject{|m| m !~ /\w/}
|
58
30
|
end
|
59
31
|
end
|
60
32
|
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
##
|
2
|
+
# WordFinder.
|
3
|
+
# Copyright © 2012 Chris Corbyn.
|
4
|
+
#
|
5
|
+
# See LICENSE file for details.
|
6
|
+
##
|
7
|
+
|
8
|
+
require "ffi"
|
9
|
+
require "ffi/aspell"
|
10
|
+
|
11
|
+
module WordFinder
|
12
|
+
# Caching subclass of Aspell.
|
13
|
+
class Checker < FFI::Aspell::Speller
|
14
|
+
def initialize(*)
|
15
|
+
super
|
16
|
+
@cache = {}
|
17
|
+
end
|
18
|
+
|
19
|
+
def word?(str)
|
20
|
+
@cache[str] = correct?(str) unless @cache.key?(str)
|
21
|
+
@cache[str]
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,65 @@
|
|
1
|
+
##
|
2
|
+
# WordFinder.
|
3
|
+
# Copyright © 2012 Chris Corbyn.
|
4
|
+
#
|
5
|
+
# See LICENSE file for details.
|
6
|
+
##
|
7
|
+
|
8
|
+
module WordFinder
|
9
|
+
# A node in the search tree.
|
10
|
+
#
|
11
|
+
# A search is represented by a tree of nodes.
|
12
|
+
# Each node represents a branch of valid words.
|
13
|
+
class Node
|
14
|
+
attr_reader :offset
|
15
|
+
|
16
|
+
def initialize(options = {})
|
17
|
+
@words = []
|
18
|
+
@buf = ""
|
19
|
+
@children = []
|
20
|
+
@offset = 0
|
21
|
+
@checker = Checker.new(options[:lang] || "en_US")
|
22
|
+
end
|
23
|
+
|
24
|
+
def insert(char)
|
25
|
+
@children.each{|c| c.insert(char)}
|
26
|
+
|
27
|
+
@buf << char
|
28
|
+
|
29
|
+
if @checker.word?(@buf)
|
30
|
+
@words << @buf
|
31
|
+
@buf = ""
|
32
|
+
condense
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
def condense
|
37
|
+
(@offset...(@words.length - 1)).each do |i|
|
38
|
+
(@words[i..-1].join).tap do |word|
|
39
|
+
if @checker.word?(word)
|
40
|
+
@children << dup
|
41
|
+
@words[i..-1] = word
|
42
|
+
@buf = ""
|
43
|
+
@offset = @words.length
|
44
|
+
return
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
def words
|
51
|
+
(best = @words)
|
52
|
+
@children.each do |c|
|
53
|
+
other = c.words
|
54
|
+
best = other if other.length < best.length
|
55
|
+
end
|
56
|
+
best
|
57
|
+
end
|
58
|
+
|
59
|
+
def initialize_copy(parent)
|
60
|
+
@words = parent.instance_variable_get(:@words).dup
|
61
|
+
@buf = parent.instance_variable_get(:@buf).dup
|
62
|
+
@children = []
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
data/lib/word_finder/version.rb
CHANGED
data/spec/wordfinder_spec.rb
CHANGED
@@ -67,5 +67,13 @@ describe WordFinder do
|
|
67
67
|
words.should == %w[I need it now]
|
68
68
|
end
|
69
69
|
end
|
70
|
+
|
71
|
+
context "with ambiguous constructions" do
|
72
|
+
let(:phrase) { "threelittlepigswenttomarket" }
|
73
|
+
|
74
|
+
it "uses the fewest words possible" do
|
75
|
+
words.should == %w[three little pigs went to market]
|
76
|
+
end
|
77
|
+
end
|
70
78
|
end
|
71
79
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: wordfinder
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-12-
|
12
|
+
date: 2012-12-09 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rspec
|
@@ -73,6 +73,8 @@ files:
|
|
73
73
|
- README.md
|
74
74
|
- Rakefile
|
75
75
|
- lib/word_finder.rb
|
76
|
+
- lib/word_finder/checker.rb
|
77
|
+
- lib/word_finder/node.rb
|
76
78
|
- lib/word_finder/version.rb
|
77
79
|
- lib/wordfinder.rb
|
78
80
|
- spec/spec_helper.rb
|