tagelizer 0.1.3 → 0.1.4
Sign up to get free protection for your applications and to get access to all the features.
- data/VERSION +1 -1
- data/lib/tagelizer.rb +10 -1
- data/spec/tagelizer_spec.rb +7 -1
- data/tagelizer.gemspec +1 -1
- metadata +3 -3
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.1.
|
1
|
+
0.1.4
|
data/lib/tagelizer.rb
CHANGED
@@ -14,7 +14,7 @@ class Tagelizer
|
|
14
14
|
end
|
15
15
|
|
16
16
|
def parse( text )
|
17
|
-
text.split(" ").collect {|i| /(\w*)/.match(i)[1]}.select {|i| i.size > @minwordsize}.collect {|w|
|
17
|
+
remove_duplicates(text.split(" ").collect {|i| /(\w*)/.match(i)[1]}.select {|i| i.size > @minwordsize}.collect {|w| corrected_word(w)})
|
18
18
|
end
|
19
19
|
|
20
20
|
def speller
|
@@ -63,6 +63,15 @@ class Tagelizer
|
|
63
63
|
end
|
64
64
|
end
|
65
65
|
|
66
|
+
def remove_duplicates list
|
67
|
+
if list.empty?
|
68
|
+
[]
|
69
|
+
else
|
70
|
+
tmp = list.pop
|
71
|
+
remove_duplicates(list.select { |word| stemmer.stem(word) != stemmer.stem(tmp) }) + [tmp]
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
66
75
|
|
67
76
|
|
68
77
|
end
|
data/spec/tagelizer_spec.rb
CHANGED
@@ -22,6 +22,12 @@ describe "Tagelizer" do
|
|
22
22
|
it "can use basic form of words" do
|
23
23
|
tagi = Tagelizer.new
|
24
24
|
text = "He reads a book."
|
25
|
-
tagi.parse(text).should == ["
|
25
|
+
tagi.parse(text).should == ["reads", "book"]
|
26
|
+
end
|
27
|
+
|
28
|
+
it "should compare stems" do
|
29
|
+
tagi = Tagelizer.new
|
30
|
+
text = "He reads a book as a reading."
|
31
|
+
tagi.parse(text).should == ["book", "reading"]
|
26
32
|
end
|
27
33
|
end
|
data/tagelizer.gemspec
CHANGED
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: tagelizer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 19
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 1
|
9
|
-
-
|
10
|
-
version: 0.1.
|
9
|
+
- 4
|
10
|
+
version: 0.1.4
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Jonatan Reiners
|