highscore 0.3.1 → 0.3.2

Sign up to get free protection for your applications and to get access to all the features.
data/History.txt CHANGED
@@ -1,3 +1,7 @@
1
+ == 0.3.2 / 2012-01-27
2
+
3
+ * use custom lists of ignored words (blacklist), fallback to a default list
4
+
1
5
  == 0.3.1 / 2012-01-21
2
6
 
3
7
  * refactored and threw away all that array stuff that nobody wants
data/README.md CHANGED
@@ -29,6 +29,27 @@ Examples
29
29
  end
30
30
 
31
31
 
32
+ Using a custom blacklist to ignore keywords
33
+ -------------------------------------------
34
+
35
+ # setting single words
36
+ blacklist = Highscore::Blacklist.new
37
+ blacklist << 'foo'
38
+
39
+ # load a string/array
40
+ blacklist = Highscore::Blacklist.load "a string"
41
+ blacklist = Highscore::Blacklist.load %w{an array}
42
+
43
+ # loading from a file (separated by whitespace)
44
+ blacklist = Highscore::Blacklist.load_file "blacklist.txt"
45
+
46
+ # loading the default blacklist (falls back automatically if not explicit given)
47
+ blacklist = Highscore::Blacklist.load_default_file
48
+
49
+ # inject the blacklist into the content class
50
+ content = Highscore::Content.new "a string", blacklist
51
+
52
+
32
53
  Requirements
33
54
  ------------
34
55
 
data/lib/blacklist.txt ADDED
@@ -0,0 +1,3 @@
1
+ the and that post add not see about using some something under our comments comment run
2
+ you want for will file are with end new this use all but can your just get very data blog
3
+ format out first they posts second
@@ -0,0 +1,72 @@
1
+ module Highscore
2
+
3
+ # blacklisted words to be ignored in the resulting keywords
4
+ #
5
+ class Blacklist
6
+ include Enumerable
7
+
8
+ # load a file of keywords
9
+ def self.load_file file_path
10
+ words = File.read(file_path).split(' ')
11
+ self.load(words)
12
+ end
13
+
14
+ # load default file
15
+ #
16
+ def self.load_default_file
17
+ file_path = File.join(File.dirname(__FILE__), %w{.. blacklist.txt})
18
+ self.load_file(file_path)
19
+ end
20
+
21
+ # load a file or array of words
22
+ #
23
+ def self.load(data)
24
+ if data.instance_of?(String)
25
+ words = data.split(' ')
26
+ elsif data.instance_of? Array
27
+ words = data
28
+ else
29
+ raise ArgumentError, "don't know how to handle a %s class" % data.class
30
+ end
31
+
32
+ words.map! {|x| x.gsub(/[\!\.\:\,\;\-\+]/, '') }
33
+
34
+ self.new(words)
35
+ end
36
+
37
+ attr_reader :words
38
+
39
+ def initialize(words = [])
40
+ @words = words
41
+ end
42
+
43
+ # iterate over words
44
+ #
45
+ def each
46
+ @words.each {|x| yield x }
47
+ end
48
+
49
+ # count of ignored words
50
+ def length
51
+ @words.length
52
+ end
53
+
54
+ # get an array of blacklisted words
55
+ #
56
+ def to_a
57
+ @words.to_a
58
+ end
59
+
60
+ # does the blacklist contain this keyword?
61
+ #
62
+ def include? keyword
63
+ @words.include? keyword
64
+ end
65
+
66
+ # add a new word to the blacklist
67
+ def <<(word)
68
+ @words << word
69
+ end
70
+
71
+ end
72
+ end
@@ -5,9 +5,15 @@ module Highscore
5
5
  class Content
6
6
  attr_reader :content
7
7
 
8
- def initialize content
8
+ def initialize(content, blacklist = nil)
9
9
  @content = content
10
10
 
11
+ unless blacklist
12
+ blacklist = Highscore::Blacklist.load_default_file
13
+ end
14
+
15
+ @blacklist = blacklist
16
+
11
17
  @emphasis = {
12
18
  :multiplier => 1.0,
13
19
  :upper_case => 3.0,
@@ -36,7 +42,7 @@ module Highscore
36
42
  def keywords
37
43
  keywords = Keywords.new
38
44
 
39
- Keywords.find_keywords(@content).each do |text|
45
+ Keywords.find_keywords(@content, @blacklist).each do |text|
40
46
  weight = @emphasis[:multiplier]
41
47
 
42
48
  if text.length >= @emphasis[:long_words_threshold]
@@ -9,14 +9,14 @@ module Highscore
9
9
  include Enumerable
10
10
 
11
11
  # find keywords in a piece of content
12
- def self.find_keywords content
12
+ def self.find_keywords content, blacklist
13
13
  keywords = content.scan(/\w+/)
14
14
  keywords.delete_if do |x|
15
15
  x.match(/^[\d]+(\.[\d]+){0,1}$/) or x.length <= 2
16
16
  end
17
17
 
18
18
  keywords.delete_if do |key, value|
19
- %w{the and that post add not see about using some something under our comments comment run you want for will file are with end new this use all but can your just get very data blog format out first they posts second}.include? key.downcase
19
+ blacklist.include?(key.downcase)
20
20
  end
21
21
 
22
22
  keywords.sort
@@ -0,0 +1,2 @@
1
+ this is the test
2
+ blacklist file
@@ -0,0 +1,63 @@
1
+ $:.unshift(File.join(File.dirname(__FILE__), %w{.. .. lib highscore}))
2
+ require "blacklist"
3
+ require "test/unit"
4
+
5
+ class TestBlacklist < Test::Unit::TestCase
6
+ def test_load_file
7
+ file_path = File.join(File.dirname(__FILE__), %w{.. fixtures blacklist.txt})
8
+ blacklist = Highscore::Blacklist.load_file(file_path)
9
+
10
+ assert_equal 6, blacklist.length
11
+ end
12
+
13
+ def test_load_default_file
14
+ blacklist = Highscore::Blacklist.load_default_file
15
+ assert_equal 42, blacklist.length
16
+ end
17
+
18
+ def test_load_file_fail
19
+ assert_raises(Errno::ENOENT) do
20
+ Highscore::Blacklist.load_file('foobar')
21
+ end
22
+ end
23
+
24
+ def test_empty_blacklist
25
+ blacklist = Highscore::Blacklist.new
26
+ assert_equal 0, blacklist.length
27
+ end
28
+
29
+ def test_add_new_word
30
+ blacklist = Highscore::Blacklist.new
31
+ blacklist << 'foo'
32
+
33
+ assert_equal ['foo'], blacklist.words
34
+ end
35
+
36
+ def test_load_string_and_remove_special_chars
37
+ blacklist = Highscore::Blacklist.load "this is an awesome string!"
38
+ assert_equal 5, blacklist.length
39
+
40
+ assert_equal ['this', 'is', 'an', 'awesome', 'string'], blacklist.to_a
41
+ end
42
+
43
+ def test_load_array
44
+ words = ['foo', 'bar', 'baz']
45
+
46
+ blacklist = Highscore::Blacklist.load(words)
47
+
48
+ assert_equal words, blacklist.words
49
+ end
50
+
51
+ def test_load_unknown_type
52
+ assert_raises ArgumentError do
53
+ Highscore::Blacklist.load(1)
54
+ end
55
+ end
56
+
57
+ def test_include?
58
+ blacklist = Highscore::Blacklist.load "foobar baz"
59
+
60
+ assert blacklist.include?("foobar")
61
+ assert !blacklist.include?("bla")
62
+ end
63
+ end
data/version.txt CHANGED
@@ -1 +1 @@
1
- 0.3.1
1
+ 0.3.2
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: highscore
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.1
4
+ version: 0.3.2
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-01-21 00:00:00.000000000 Z
12
+ date: 2012-01-27 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: bones
16
- requirement: &70191947101320 !ruby/object:Gem::Requirement
16
+ requirement: &70200855600060 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,7 +21,7 @@ dependencies:
21
21
  version: 3.7.3
22
22
  type: :development
23
23
  prerelease: false
24
- version_requirements: *70191947101320
24
+ version_requirements: *70200855600060
25
25
  description: Rank keywords in long texts.
26
26
  email: liebler.dominik@googlemail.com
27
27
  executables:
@@ -30,6 +30,8 @@ extensions: []
30
30
  extra_rdoc_files:
31
31
  - History.txt
32
32
  - bin/highscore
33
+ - lib/blacklist.txt
34
+ - test/fixtures/blacklist.txt
33
35
  files:
34
36
  - .gitignore
35
37
  - History.txt
@@ -37,10 +39,14 @@ files:
37
39
  - Rakefile
38
40
  - bin/highscore
39
41
  - highscore.gemspec
42
+ - lib/blacklist.txt
40
43
  - lib/highscore.rb
44
+ - lib/highscore/blacklist.rb
41
45
  - lib/highscore/content.rb
42
46
  - lib/highscore/keyword.rb
43
47
  - lib/highscore/keywords.rb
48
+ - test/fixtures/blacklist.txt
49
+ - test/highscore/test_blacklist.rb
44
50
  - test/highscore/test_content.rb
45
51
  - test/highscore/test_keyword.rb
46
52
  - test/highscore/test_keywords.rb
@@ -73,6 +79,7 @@ signing_key:
73
79
  specification_version: 3
74
80
  summary: Rank keywords in long texts.
75
81
  test_files:
82
+ - test/highscore/test_blacklist.rb
76
83
  - test/highscore/test_content.rb
77
84
  - test/highscore/test_keyword.rb
78
85
  - test/highscore/test_keywords.rb