stopwords-filter 0.2.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/CHANGELOG CHANGED
@@ -1,3 +1,6 @@
1
+ 0.3.0:
2
+ * Added custom words list to snowball filter [@sbeckeriv]
3
+ * Fixed problem about initialization: https://github.com/brenes/stopwords-filter/issues/3 [@zackxu1]
1
4
  0.2.0:
2
5
  * Added stopword? method [@s2gatev]
3
6
  * Added Sieve class [@s2gatev]
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.2.0
1
+ 0.2.1
@@ -1,17 +1,21 @@
1
- class Stopwords::Filter
1
+ module Stopwords
2
2
 
3
- attr_reader :stopwords
3
+ class Filter
4
4
 
5
- def initialize stopwords
6
- @stopwords = stopwords
7
- end
5
+ attr_reader :stopwords
8
6
 
9
- def filter words
10
- words - @stopwords
11
- end
7
+ def initialize stopwords
8
+ @stopwords = stopwords
9
+ end
10
+
11
+ def filter words
12
+ words - @stopwords
13
+ end
14
+
15
+ def stopword? word
16
+ stopwords.include? word
17
+ end
12
18
 
13
- def stopword? word
14
- stopwords.include? word
15
19
  end
16
20
 
17
21
  end
@@ -1,19 +1,16 @@
1
- class Stopwords::Snowball::Filter < Stopwords::Filter
2
-
3
-
4
- attr_reader :locale
5
- attr_reader :locale_filename
6
-
7
- def initialize locale
8
-
9
- @locale = locale
10
- @locale_filename = "#{File.dirname(__FILE__)}/locales/#{locale}.csv"
11
-
12
- raise "Unknown locale" unless File.exists?(@locale_filename)
13
-
14
- super File.read(@locale_filename).split(",")
15
-
1
+ module Stopwords
2
+ module Snowball
3
+ class Filter < Stopwords::Filter
4
+ attr_reader :locale
5
+ attr_reader :locale_filename
6
+
7
+ def initialize locale, custom_list = []
8
+ @locale = locale
9
+ @locale_filename = "#{File.dirname(__FILE__)}/locales/#{locale}.csv"
10
+
11
+ raise "Unknown locale" unless File.exists?(@locale_filename)
12
+ super File.read(@locale_filename).split(",") + custom_list
13
+ end
14
+ end
16
15
  end
17
-
18
-
19
- end
16
+ end
@@ -1,16 +1,20 @@
1
- class Stopwords::Snowball::WordSieve
2
- def initialize
3
- @filters = Dir[File.dirname(__FILE__) + '/locales/*.csv'].each_with_object({}) do |file, filters|
4
- lang = File.basename(file, '.csv').to_sym
5
- filters[lang] = Stopwords::Snowball::Filter.new lang
6
- end
7
- end
1
+ module Stopwords
2
+ module Snowball
3
+ class Stopwords::Snowball::WordSieve
4
+ def initialize custom_list = []
5
+ @filters = Dir[File.dirname(__FILE__) + '/locales/*.csv'].each_with_object({}) do |file, filters|
6
+ lang = File.basename(file, '.csv').to_sym
7
+ filters[lang] = Stopwords::Snowball::Filter.new lang, custom_list
8
+ end
9
+ end
8
10
 
9
- def stopword? args={}
10
- args[:lang] ? @filters[args[:lang]].stopword?(args[:word] ) : false
11
- end
11
+ def stopword? args={}
12
+ args[:lang] ? @filters[args[:lang]].stopword?(args[:word] ) : false
13
+ end
12
14
 
13
- def filter args={}
14
- args[:lang] ? @filters[args[:lang]].filter(args[:words] ) : args[:words]
15
+ def filter args={}
16
+ args[:lang] ? @filters[args[:lang]].filter(args[:words] ) : args[:words]
17
+ end
18
+ end
15
19
  end
16
- end
20
+ end
@@ -15,5 +15,16 @@ describe Stopwords::Snowball::Filter do
15
15
 
16
16
  end
17
17
 
18
+ context "when custom list" do
18
19
 
19
- end
20
+ let (:filter) { Stopwords::Snowball::Filter.new "es", ["Santurce"] }
21
+
22
+ subject { filter }
23
+
24
+ it("should remove the stopwords for the list of words to be filtered") { filter.filter("desde Santurce a Bilbao".split).should == ["Bilbao"]}
25
+
26
+ end
27
+
28
+
29
+
30
+ end
metadata CHANGED
@@ -2,7 +2,7 @@
2
2
  name: stopwords-filter
3
3
  version: !ruby/object:Gem::Version
4
4
  prerelease:
5
- version: 0.2.1
5
+ version: 0.3.0
6
6
  platform: ruby
7
7
  authors:
8
8
  - David J. Brenes