stopwords-filter 0.2.1 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +3 -0
- data/VERSION +1 -1
- data/lib/stopwords/filter.rb +14 -10
- data/lib/stopwords/snowball/filter.rb +15 -18
- data/lib/stopwords/snowball/wordsieve.rb +17 -13
- data/spec/lib/snowball_filter_spec.rb +12 -1
- metadata +1 -1
data/CHANGELOG
CHANGED
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.2.
|
1
|
+
0.2.1
|
data/lib/stopwords/filter.rb
CHANGED
@@ -1,17 +1,21 @@
|
|
1
|
-
|
1
|
+
module Stopwords
|
2
2
|
|
3
|
-
|
3
|
+
class Filter
|
4
4
|
|
5
|
-
|
6
|
-
@stopwords = stopwords
|
7
|
-
end
|
5
|
+
attr_reader :stopwords
|
8
6
|
|
9
|
-
|
10
|
-
|
11
|
-
|
7
|
+
def initialize stopwords
|
8
|
+
@stopwords = stopwords
|
9
|
+
end
|
10
|
+
|
11
|
+
def filter words
|
12
|
+
words - @stopwords
|
13
|
+
end
|
14
|
+
|
15
|
+
def stopword? word
|
16
|
+
stopwords.include? word
|
17
|
+
end
|
12
18
|
|
13
|
-
def stopword? word
|
14
|
-
stopwords.include? word
|
15
19
|
end
|
16
20
|
|
17
21
|
end
|
@@ -1,19 +1,16 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
1
|
+
module Stopwords
|
2
|
+
module Snowball
|
3
|
+
class Filter < Stopwords::Filter
|
4
|
+
attr_reader :locale
|
5
|
+
attr_reader :locale_filename
|
6
|
+
|
7
|
+
def initialize locale, custom_list = []
|
8
|
+
@locale = locale
|
9
|
+
@locale_filename = "#{File.dirname(__FILE__)}/locales/#{locale}.csv"
|
10
|
+
|
11
|
+
raise "Unknown locale" unless File.exists?(@locale_filename)
|
12
|
+
super File.read(@locale_filename).split(",") + custom_list
|
13
|
+
end
|
14
|
+
end
|
16
15
|
end
|
17
|
-
|
18
|
-
|
19
|
-
end
|
16
|
+
end
|
@@ -1,16 +1,20 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
1
|
+
module Stopwords
|
2
|
+
module Snowball
|
3
|
+
class Stopwords::Snowball::WordSieve
|
4
|
+
def initialize custom_list = []
|
5
|
+
@filters = Dir[File.dirname(__FILE__) + '/locales/*.csv'].each_with_object({}) do |file, filters|
|
6
|
+
lang = File.basename(file, '.csv').to_sym
|
7
|
+
filters[lang] = Stopwords::Snowball::Filter.new lang, custom_list
|
8
|
+
end
|
9
|
+
end
|
8
10
|
|
9
|
-
|
10
|
-
|
11
|
-
|
11
|
+
def stopword? args={}
|
12
|
+
args[:lang] ? @filters[args[:lang]].stopword?(args[:word] ) : false
|
13
|
+
end
|
12
14
|
|
13
|
-
|
14
|
-
|
15
|
+
def filter args={}
|
16
|
+
args[:lang] ? @filters[args[:lang]].filter(args[:words] ) : args[:words]
|
17
|
+
end
|
18
|
+
end
|
15
19
|
end
|
16
|
-
end
|
20
|
+
end
|
@@ -15,5 +15,16 @@ describe Stopwords::Snowball::Filter do
|
|
15
15
|
|
16
16
|
end
|
17
17
|
|
18
|
+
context "when custom list" do
|
18
19
|
|
19
|
-
|
20
|
+
let (:filter) { Stopwords::Snowball::Filter.new "es", ["Santurce"] }
|
21
|
+
|
22
|
+
subject { filter }
|
23
|
+
|
24
|
+
it("should remove the stopwords for the list of words to be filtered") { filter.filter("desde Santurce a Bilbao".split).should == ["Bilbao"]}
|
25
|
+
|
26
|
+
end
|
27
|
+
|
28
|
+
|
29
|
+
|
30
|
+
end
|