highscore 0.4.3 → 0.5.0
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +7 -0
- data/README.md +10 -0
- data/bin/highscore +12 -2
- data/lib/highscore/blacklist.rb +5 -59
- data/lib/highscore/content.rb +53 -13
- data/lib/highscore/keyword.rb +7 -0
- data/lib/highscore/keywords.rb +28 -9
- data/lib/highscore/string.rb +10 -3
- data/lib/highscore/whitelist.rb +10 -0
- data/lib/highscore/wordlist.rb +76 -0
- data/test/highscore/test_blacklist.rb +10 -58
- data/test/highscore/test_content.rb +8 -0
- data/test/highscore/test_whitelist.rb +17 -0
- data/test/highscore/test_wordlist.rb +59 -0
- data/version.txt +1 -1
- metadata +11 -5
data/History.txt
CHANGED
@@ -1,3 +1,10 @@
|
|
1
|
+
== 0.5.0 / 2012-02-20
|
2
|
+
|
3
|
+
* now supports a whitelist approach
|
4
|
+
* <= 2 char words are ignored by default, but is now configurable
|
5
|
+
* refactored a huge part of the API
|
6
|
+
* added some more inline-documentation
|
7
|
+
|
1
8
|
== 0.4.3 / 2012-02-11
|
2
9
|
|
3
10
|
* rate words based on the amount of vowels and consonants
|
data/README.md
CHANGED
@@ -10,6 +10,7 @@ Features
|
|
10
10
|
* rate based on amount (%) of vowels and consonants in a string
|
11
11
|
* directly get keywords from String objects
|
12
12
|
* blacklist words via a plain text file, String or an Array of words
|
13
|
+
* optionally, configure a whitelist and only words from that list will get ranked
|
13
14
|
* merge together Keywords from multiple sources
|
14
15
|
* contains a CLI tool that operates on STDIN/OUT and is configurable via parameters
|
15
16
|
|
@@ -79,6 +80,15 @@ blacklist = Highscore::Blacklist.load_default_file
|
|
79
80
|
content = Highscore::Content.new "a string", blacklist
|
80
81
|
```
|
81
82
|
|
83
|
+
Using a whitelist instead of ranking all words
|
84
|
+
----------------------------------------------
|
85
|
+
|
86
|
+
```ruby
|
87
|
+
# construct and inject it just like a blacklist
|
88
|
+
whitelist = Highscore::Whitelist.load %w{these are valid keywords}
|
89
|
+
content = Highscore::Content.new "invalid words", whitelist
|
90
|
+
```
|
91
|
+
|
82
92
|
Install
|
83
93
|
-------
|
84
94
|
|
data/bin/highscore
CHANGED
@@ -18,7 +18,12 @@ optparse = OptionParser.new do |opts|
|
|
18
18
|
|
19
19
|
# blacklist file
|
20
20
|
opts.on('-b', '--blacklist FILEPATH', 'specify a blacklist file that should be used instead of the default') do |filepath|
|
21
|
-
options[:
|
21
|
+
options[:wordlist] = Highscore::Blacklist.load_file(filepath)
|
22
|
+
end
|
23
|
+
|
24
|
+
# whitelist file
|
25
|
+
opts.on('-w', '--whitelist FILEPATH', 'specify a whitelist file') do |filepath|
|
26
|
+
options[:wordlist] = Highscore::Whitelist.load_file(filepath)
|
22
27
|
end
|
23
28
|
|
24
29
|
# general multiplier
|
@@ -26,6 +31,11 @@ optparse = OptionParser.new do |opts|
|
|
26
31
|
options[:emphasis][:multiplier] = multiplier.to_f
|
27
32
|
end
|
28
33
|
|
34
|
+
# don't print rank weight
|
35
|
+
opts.on('--no-ignore-short', 'don\'t ignore short words (<= 2 chars)') do
|
36
|
+
options[:emphasis][:ignore_short_words] = false
|
37
|
+
end
|
38
|
+
|
29
39
|
# don't print rank weight
|
30
40
|
opts.on('-s', '--short', 'don\'t print rank weight') do
|
31
41
|
options[:short] = true
|
@@ -64,7 +74,7 @@ end
|
|
64
74
|
optparse.parse!
|
65
75
|
|
66
76
|
text = STDIN.read.to_s
|
67
|
-
keywords = text.keywords(options[:
|
77
|
+
keywords = text.keywords(options[:wordlist]) do |content|
|
68
78
|
options[:emphasis].each do |key,value|
|
69
79
|
content.set key, value
|
70
80
|
end
|
data/lib/highscore/blacklist.rb
CHANGED
@@ -1,72 +1,18 @@
|
|
1
|
+
$:.unshift(File.dirname(__FILE__))
|
2
|
+
require "wordlist"
|
3
|
+
|
1
4
|
module Highscore
|
2
5
|
|
3
6
|
# blacklisted words to be ignored in the resulting keywords
|
4
7
|
#
|
5
|
-
class Blacklist
|
6
|
-
include Enumerable
|
7
|
-
|
8
|
-
# load a file of keywords
|
9
|
-
def self.load_file file_path
|
10
|
-
words = File.read(file_path).split(' ')
|
11
|
-
self.load(words)
|
12
|
-
end
|
8
|
+
class Blacklist < Wordlist
|
13
9
|
|
14
10
|
# load default file
|
15
11
|
#
|
12
|
+
# @return Highscore::Blacklist
|
16
13
|
def self.load_default_file
|
17
14
|
file_path = File.join(File.dirname(__FILE__), %w{.. blacklist.txt})
|
18
15
|
self.load_file(file_path)
|
19
16
|
end
|
20
|
-
|
21
|
-
# load a file or array of words
|
22
|
-
#
|
23
|
-
def self.load(data)
|
24
|
-
if data.instance_of?(String)
|
25
|
-
words = data.split(' ')
|
26
|
-
elsif data.instance_of? Array
|
27
|
-
words = data
|
28
|
-
else
|
29
|
-
raise ArgumentError, "don't know how to handle a %s class" % data.class
|
30
|
-
end
|
31
|
-
|
32
|
-
words.map! {|x| x.gsub(/[\!\.\:\,\;\-\+]/, '') }
|
33
|
-
|
34
|
-
self.new(words)
|
35
|
-
end
|
36
|
-
|
37
|
-
attr_reader :words
|
38
|
-
|
39
|
-
def initialize(words = [])
|
40
|
-
@words = words
|
41
|
-
end
|
42
|
-
|
43
|
-
# iterate over words
|
44
|
-
#
|
45
|
-
def each
|
46
|
-
@words.each {|x| yield x }
|
47
|
-
end
|
48
|
-
|
49
|
-
# count of ignored words
|
50
|
-
def length
|
51
|
-
@words.length
|
52
|
-
end
|
53
|
-
|
54
|
-
# get an array of blacklisted words
|
55
|
-
#
|
56
|
-
def to_a
|
57
|
-
@words.to_a
|
58
|
-
end
|
59
|
-
|
60
|
-
# does the blacklist contain this keyword?
|
61
|
-
#
|
62
|
-
def include? keyword
|
63
|
-
@words.include? keyword
|
64
|
-
end
|
65
|
-
|
66
|
-
# add a new word to the blacklist
|
67
|
-
def <<(word)
|
68
|
-
@words << word
|
69
|
-
end
|
70
|
-
|
71
17
|
end
|
72
18
|
end
|
data/lib/highscore/content.rb
CHANGED
@@ -5,57 +5,89 @@ module Highscore
|
|
5
5
|
class Content
|
6
6
|
attr_reader :content
|
7
7
|
|
8
|
-
|
8
|
+
# @param content String
|
9
|
+
# @param wordlist Highscore::Wordlist
|
10
|
+
def initialize(content, wordlist = nil)
|
9
11
|
@content = content
|
10
|
-
|
11
|
-
|
12
|
-
|
12
|
+
@whitelist = @blacklist = nil
|
13
|
+
|
14
|
+
if wordlist.nil?
|
15
|
+
@blacklist = Highscore::Blacklist.load_default_file
|
16
|
+
elsif wordlist.kind_of? Highscore::Blacklist
|
17
|
+
@blacklist = wordlist
|
18
|
+
else
|
19
|
+
@whitelist = wordlist
|
13
20
|
end
|
14
21
|
|
15
|
-
@blacklist = blacklist
|
16
|
-
|
17
22
|
@emphasis = {
|
18
23
|
:multiplier => 1.0,
|
19
24
|
:upper_case => 3.0,
|
20
25
|
:long_words => 2.0,
|
21
26
|
:long_words_threshold => 15,
|
22
27
|
:vowels => 0,
|
23
|
-
:consonants => 0
|
28
|
+
:consonants => 0,
|
29
|
+
:ignore_short_words => true
|
24
30
|
}
|
25
31
|
end
|
26
32
|
|
27
33
|
# configure ranking
|
28
34
|
#
|
35
|
+
# @param block
|
29
36
|
def configure(&block)
|
30
37
|
instance_eval(&block)
|
31
38
|
end
|
32
39
|
|
33
40
|
# set emphasis options to rank the content
|
34
41
|
#
|
42
|
+
# @param key Symbol
|
43
|
+
# @param value Object
|
35
44
|
def set(key, value)
|
36
|
-
@emphasis[key.to_sym] = value
|
45
|
+
@emphasis[key.to_sym] = value
|
37
46
|
end
|
38
47
|
|
39
48
|
# get the ranked keywords
|
40
49
|
#
|
41
|
-
#
|
42
|
-
# keywords -> Keywords
|
43
|
-
#
|
50
|
+
# @return Highscore::Keywords
|
44
51
|
def keywords
|
45
52
|
keywords = Keywords.new
|
46
53
|
|
47
|
-
Keywords.find_keywords(@content,
|
54
|
+
Keywords.find_keywords(@content, wordlist).each do |text|
|
48
55
|
text = text.to_s
|
49
|
-
|
56
|
+
|
57
|
+
if not (text.match(/^[\d]+(\.[\d]+){0,1}$/) or text.length <= 2)
|
58
|
+
keywords << Highscore::Keyword.new(text, weight(text))
|
59
|
+
elsif allow_short_words
|
60
|
+
keywords << Highscore::Keyword.new(text, weight(text))
|
61
|
+
end
|
50
62
|
end
|
51
63
|
|
52
64
|
keywords
|
53
65
|
end
|
54
66
|
|
67
|
+
# get the used wordlist
|
68
|
+
#
|
69
|
+
# @return Highscore::Wordlist
|
70
|
+
def wordlist
|
71
|
+
unless @whitelist.nil?
|
72
|
+
@whitelist
|
73
|
+
else
|
74
|
+
@blacklist
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
55
78
|
private
|
56
79
|
|
80
|
+
# allow short words to be rated
|
81
|
+
#
|
82
|
+
# @return TrueClass FalseClass
|
83
|
+
def allow_short_words
|
84
|
+
not @emphasis[:ignore_short_words]
|
85
|
+
end
|
86
|
+
|
57
87
|
# weight a single text keyword
|
58
88
|
#
|
89
|
+
# @param text String
|
90
|
+
# @return Float
|
59
91
|
def weight(text)
|
60
92
|
weight = @emphasis[:multiplier]
|
61
93
|
|
@@ -72,11 +104,19 @@ module Highscore
|
|
72
104
|
weight
|
73
105
|
end
|
74
106
|
|
107
|
+
# weight the vowels on a text
|
108
|
+
#
|
109
|
+
# @param text String
|
110
|
+
# @return Float
|
75
111
|
def vowels(text)
|
76
112
|
percent = text.vowels.length / text.length.to_f
|
77
113
|
percent * @emphasis[:vowels]
|
78
114
|
end
|
79
115
|
|
116
|
+
# weight the consonants on a text
|
117
|
+
#
|
118
|
+
# @param text String
|
119
|
+
# @return Float
|
80
120
|
def consonants(text)
|
81
121
|
percent = text.consonants.length / text.length.to_f
|
82
122
|
percent * @emphasis[:consonants]
|
data/lib/highscore/keyword.rb
CHANGED
@@ -6,17 +6,24 @@ module Highscore
|
|
6
6
|
attr_accessor :weight, :text
|
7
7
|
|
8
8
|
# init a keyword
|
9
|
+
#
|
10
|
+
# @param text String
|
11
|
+
# @param weight Float
|
9
12
|
def initialize(text, weight)
|
10
13
|
@text = text
|
11
14
|
@weight = weight.to_f
|
12
15
|
end
|
13
16
|
|
14
17
|
# sort keywords
|
18
|
+
#
|
19
|
+
# @param other Highscore::Keyword
|
15
20
|
def <=>(other)
|
16
21
|
other.weight <=> @weight
|
17
22
|
end
|
18
23
|
|
19
24
|
# get the string
|
25
|
+
#
|
26
|
+
# @return String
|
20
27
|
def to_s
|
21
28
|
@text
|
22
29
|
end
|
data/lib/highscore/keywords.rb
CHANGED
@@ -9,14 +9,19 @@ module Highscore
|
|
9
9
|
include Enumerable
|
10
10
|
|
11
11
|
# find keywords in a piece of content
|
12
|
-
|
12
|
+
#
|
13
|
+
# @param content String
|
14
|
+
# @param wordlist Highscore::Wordlist
|
15
|
+
# @return Highscore::Keywords
|
16
|
+
def self.find_keywords content, wordlist
|
13
17
|
keywords = content.to_s.scan(/\w+/)
|
14
|
-
keywords.delete_if do |x|
|
15
|
-
x.match(/^[\d]+(\.[\d]+){0,1}$/) or x.length <= 2
|
16
|
-
end
|
17
18
|
|
18
19
|
keywords.delete_if do |key, value|
|
19
|
-
|
20
|
+
if wordlist.kind_of? Highscore::Blacklist
|
21
|
+
wordlist.include?(key.downcase)
|
22
|
+
elsif wordlist.kind_of? Highscore::Whitelist
|
23
|
+
not wordlist.include?(key.downcase)
|
24
|
+
end
|
20
25
|
end
|
21
26
|
|
22
27
|
keywords.sort
|
@@ -31,21 +36,23 @@ module Highscore
|
|
31
36
|
# ranks the keywords and removes keywords that have a low ranking
|
32
37
|
# or are blacklisted
|
33
38
|
#
|
34
|
-
#
|
35
|
-
# rank -> array
|
36
|
-
#
|
39
|
+
# @return Array
|
37
40
|
def rank
|
38
41
|
sort
|
39
42
|
end
|
40
43
|
|
41
44
|
# get the top n keywords
|
42
45
|
#
|
46
|
+
# @param n Fixnum
|
47
|
+
# @return Array
|
43
48
|
def top n = 10
|
44
49
|
rank[0..(n - 1)]
|
45
50
|
end
|
46
51
|
|
47
52
|
# add new keywords
|
48
53
|
#
|
54
|
+
# @param keyword String
|
55
|
+
# @return Highscore::Keywords
|
49
56
|
def <<(keyword)
|
50
57
|
key = Digest::SHA1.hexdigest(keyword.text)
|
51
58
|
|
@@ -54,9 +61,13 @@ module Highscore
|
|
54
61
|
else
|
55
62
|
@keywords[key] = keyword
|
56
63
|
end
|
64
|
+
|
65
|
+
@keywords
|
57
66
|
end
|
58
67
|
|
59
68
|
# sort
|
69
|
+
#
|
70
|
+
# @return Array
|
60
71
|
def sort
|
61
72
|
sorted = @keywords.sort {|a,b| a[1] <=> b[1] }
|
62
73
|
|
@@ -66,26 +77,34 @@ module Highscore
|
|
66
77
|
|
67
78
|
# Enumerable
|
68
79
|
#
|
69
|
-
def each
|
80
|
+
def each
|
70
81
|
@keywords.each {|keyword| yield keyword[1] }
|
71
82
|
end
|
72
83
|
|
73
84
|
# number of Keywords given
|
85
|
+
#
|
86
|
+
# @return Fixnum
|
74
87
|
def length
|
75
88
|
@keywords.length
|
76
89
|
end
|
77
90
|
|
78
91
|
# get the keyword with the highest rank
|
92
|
+
#
|
93
|
+
# @return Highscore::Keyword
|
79
94
|
def first
|
80
95
|
sort.first
|
81
96
|
end
|
82
97
|
|
83
98
|
# get the keyword with the lowest rank
|
99
|
+
#
|
100
|
+
# @return Highscore::Keyword
|
84
101
|
def last
|
85
102
|
sort.reverse.first
|
86
103
|
end
|
87
104
|
|
88
105
|
# merge in another keyword list, operates on self
|
106
|
+
#
|
107
|
+
# @return Highscore::Keywords
|
89
108
|
def merge!(other)
|
90
109
|
other.each do |keyword|
|
91
110
|
self << keyword
|
data/lib/highscore/string.rb
CHANGED
@@ -1,11 +1,14 @@
|
|
1
|
-
# monkey patch to
|
1
|
+
# monkey patch to call custom methods on arbitrary strings
|
2
2
|
#
|
3
3
|
class String
|
4
4
|
|
5
5
|
# get keywords from the string
|
6
6
|
#
|
7
|
-
|
8
|
-
|
7
|
+
# @param wordlist Highscore::Wordlist
|
8
|
+
# @param block Block
|
9
|
+
# @return Highscore::Keywords
|
10
|
+
def keywords(wordlist = nil, &block)
|
11
|
+
content = Highscore::Content.new(self, wordlist)
|
9
12
|
|
10
13
|
if block_given?
|
11
14
|
content.configure do
|
@@ -17,11 +20,15 @@ class String
|
|
17
20
|
end
|
18
21
|
|
19
22
|
# get all vowels from a string
|
23
|
+
#
|
24
|
+
# @return String
|
20
25
|
def vowels
|
21
26
|
gsub(/[^aeiou]/, '')
|
22
27
|
end
|
23
28
|
|
24
29
|
# get all consonants from a string
|
30
|
+
#
|
31
|
+
# @return String
|
25
32
|
def consonants
|
26
33
|
gsub(/[aeiou]/, '')
|
27
34
|
end
|
@@ -0,0 +1,76 @@
|
|
1
|
+
module Highscore
|
2
|
+
|
3
|
+
# a basic list of words
|
4
|
+
class Wordlist
|
5
|
+
include Enumerable
|
6
|
+
|
7
|
+
# load a file of keywords
|
8
|
+
#
|
9
|
+
# @param file_path String
|
10
|
+
# @return Highscore::Wordlist
|
11
|
+
def self.load_file file_path
|
12
|
+
words = File.read(file_path).split(' ')
|
13
|
+
self.load(words)
|
14
|
+
end
|
15
|
+
|
16
|
+
# load a file or array of words
|
17
|
+
#
|
18
|
+
# @param data String Array
|
19
|
+
# @return Highscore::Wordlist
|
20
|
+
def self.load(data)
|
21
|
+
if data.instance_of?(String)
|
22
|
+
words = data.split(' ')
|
23
|
+
elsif data.instance_of? Array
|
24
|
+
words = data
|
25
|
+
else
|
26
|
+
raise ArgumentError, "don't know how to handle a %s class" % data.class
|
27
|
+
end
|
28
|
+
|
29
|
+
words.map! {|x| x.gsub(/[\!\.\:\,\;\-\+]/, '') }
|
30
|
+
|
31
|
+
self.new(words)
|
32
|
+
end
|
33
|
+
|
34
|
+
attr_reader :words
|
35
|
+
|
36
|
+
# @param words Array
|
37
|
+
def initialize(words = [])
|
38
|
+
@words = words
|
39
|
+
end
|
40
|
+
|
41
|
+
# iterate over words
|
42
|
+
#
|
43
|
+
def each
|
44
|
+
@words.each {|word| yield word }
|
45
|
+
end
|
46
|
+
|
47
|
+
# count of ignored words
|
48
|
+
#
|
49
|
+
# @return Fixnum
|
50
|
+
def length
|
51
|
+
@words.length
|
52
|
+
end
|
53
|
+
|
54
|
+
# get an array of blacklisted words
|
55
|
+
#
|
56
|
+
# @return Array
|
57
|
+
def to_a
|
58
|
+
@words.to_a
|
59
|
+
end
|
60
|
+
|
61
|
+
# does the blacklist contain this keyword?
|
62
|
+
#
|
63
|
+
# @param keyword String
|
64
|
+
# @return true/false
|
65
|
+
def include? keyword
|
66
|
+
@words.include? keyword
|
67
|
+
end
|
68
|
+
|
69
|
+
# add a new word to the blacklist
|
70
|
+
#
|
71
|
+
# @param word String
|
72
|
+
def <<(word)
|
73
|
+
@words << word
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
@@ -3,11 +3,9 @@ require "blacklist"
|
|
3
3
|
require "test/unit"
|
4
4
|
|
5
5
|
class TestBlacklist < Test::Unit::TestCase
|
6
|
-
def
|
7
|
-
|
8
|
-
blacklist
|
9
|
-
|
10
|
-
assert_equal 6, blacklist.length
|
6
|
+
def test_is_a_wordlist
|
7
|
+
blacklist = Highscore::Blacklist.new
|
8
|
+
assert blacklist.kind_of? Highscore::Wordlist
|
11
9
|
end
|
12
10
|
|
13
11
|
def test_load_default_file
|
@@ -15,62 +13,16 @@ class TestBlacklist < Test::Unit::TestCase
|
|
15
13
|
assert_equal 42, blacklist.length
|
16
14
|
end
|
17
15
|
|
18
|
-
def test_load_file_fail
|
19
|
-
assert_raises(Errno::ENOENT) do
|
20
|
-
Highscore::Blacklist.load_file('foobar')
|
21
|
-
end
|
22
|
-
end
|
23
|
-
|
24
|
-
def test_empty_blacklist
|
25
|
-
blacklist = Highscore::Blacklist.new
|
26
|
-
assert_equal 0, blacklist.length
|
27
|
-
end
|
28
|
-
|
29
|
-
def test_add_new_word
|
30
|
-
blacklist = Highscore::Blacklist.new
|
31
|
-
blacklist << 'foo'
|
32
|
-
|
33
|
-
assert_equal ['foo'], blacklist.words
|
34
|
-
end
|
35
|
-
|
36
|
-
def test_load_string_and_remove_special_chars
|
37
|
-
blacklist = Highscore::Blacklist.load "this is an awesome string!"
|
38
|
-
assert_equal 5, blacklist.length
|
39
|
-
|
40
|
-
assert_equal ['this', 'is', 'an', 'awesome', 'string'], blacklist.to_a
|
41
|
-
end
|
42
|
-
|
43
|
-
def test_load_array
|
44
|
-
words = ['foo', 'bar', 'baz']
|
45
|
-
|
46
|
-
blacklist = Highscore::Blacklist.load(words)
|
47
|
-
|
48
|
-
assert_equal words, blacklist.words
|
49
|
-
end
|
50
|
-
|
51
|
-
def test_load_unknown_type
|
52
|
-
assert_raises ArgumentError do
|
53
|
-
Highscore::Blacklist.load(1)
|
54
|
-
end
|
55
|
-
end
|
56
|
-
|
57
|
-
def test_include?
|
58
|
-
blacklist = Highscore::Blacklist.load "foobar baz"
|
59
|
-
|
60
|
-
assert blacklist.include?("foobar")
|
61
|
-
assert !blacklist.include?("bla")
|
62
|
-
end
|
63
|
-
|
64
16
|
def test_blacklisting_content
|
65
|
-
|
17
|
+
keywords = "Foo bar is not bar baz".keywords(Highscore::Blacklist.load(%w(baz)))
|
66
18
|
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
19
|
+
keyword_list = []
|
20
|
+
keywords.rank.each do |k|
|
21
|
+
keyword_list << k.text
|
22
|
+
end
|
71
23
|
|
72
|
-
|
24
|
+
expected_keywords = %w(Foo bar not)
|
73
25
|
|
74
|
-
|
26
|
+
assert_equal expected_keywords, keyword_list
|
75
27
|
end
|
76
28
|
end
|
@@ -42,4 +42,12 @@ class TestContent < Test::Unit::TestCase
|
|
42
42
|
assert_equal 3.75, keywords.first.weight
|
43
43
|
assert_equal 3.5, keywords.last.weight
|
44
44
|
end
|
45
|
+
|
46
|
+
def test_rank_short_words
|
47
|
+
keywords = 'be as is foobar'.keywords do
|
48
|
+
set :ignore_short_words, false
|
49
|
+
end
|
50
|
+
|
51
|
+
assert_equal 4, keywords.length
|
52
|
+
end
|
45
53
|
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
$:.unshift(File.join(File.dirname(__FILE__), %w{.. .. lib highscore}))
|
2
|
+
require "whitelist"
|
3
|
+
require "test/unit"
|
4
|
+
|
5
|
+
class TestBlacklist < Test::Unit::TestCase
|
6
|
+
def test_is_wordlist
|
7
|
+
whitelist = Highscore::Whitelist.new
|
8
|
+
assert whitelist.kind_of? Highscore::Wordlist
|
9
|
+
end
|
10
|
+
|
11
|
+
def test_whitelist_content
|
12
|
+
whitelist = Highscore::Whitelist.load %w{foo bar}
|
13
|
+
|
14
|
+
content = Highscore::Content.new "foo baz bar", whitelist
|
15
|
+
assert_equal 2, content.keywords.length
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1,59 @@
|
|
1
|
+
$:.unshift(File.join(File.dirname(__FILE__), %w{.. .. lib highscore}))
|
2
|
+
require "wordlist"
|
3
|
+
require "test/unit"
|
4
|
+
|
5
|
+
class TestBlacklist < Test::Unit::TestCase
|
6
|
+
|
7
|
+
def test_load_file
|
8
|
+
file_path = File.join(File.dirname(__FILE__), %w{.. fixtures blacklist.txt})
|
9
|
+
blacklist = Highscore::Wordlist.load_file(file_path)
|
10
|
+
|
11
|
+
assert_equal 6, blacklist.length
|
12
|
+
end
|
13
|
+
|
14
|
+
def test_load_file_fail
|
15
|
+
assert_raises(Errno::ENOENT) do
|
16
|
+
Highscore::Blacklist.load_file('foobar')
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
def test_empty_blacklist
|
21
|
+
blacklist = Highscore::Wordlist.new
|
22
|
+
assert_equal 0, blacklist.length
|
23
|
+
end
|
24
|
+
|
25
|
+
def test_add_new_word
|
26
|
+
blacklist = Highscore::Wordlist.new
|
27
|
+
blacklist << 'foo'
|
28
|
+
|
29
|
+
assert_equal %w(foo), blacklist.words
|
30
|
+
end
|
31
|
+
|
32
|
+
def test_load_string_and_remove_special_chars
|
33
|
+
blacklist = Highscore::Wordlist.load "this is an awesome string!"
|
34
|
+
assert_equal 5, blacklist.length
|
35
|
+
|
36
|
+
assert_equal %w{this is an awesome string}, blacklist.to_a
|
37
|
+
end
|
38
|
+
|
39
|
+
def test_load_array
|
40
|
+
words = %w{foo bar baz}
|
41
|
+
|
42
|
+
blacklist = Highscore::Wordlist.load(words)
|
43
|
+
|
44
|
+
assert_equal words, blacklist.words
|
45
|
+
end
|
46
|
+
|
47
|
+
def test_load_unknown_type
|
48
|
+
assert_raises ArgumentError do
|
49
|
+
Highscore::Wordlist.load(1)
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
def test_include?
|
54
|
+
blacklist = Highscore::Wordlist.load "foobar baz"
|
55
|
+
|
56
|
+
assert blacklist.include?("foobar")
|
57
|
+
assert !blacklist.include?("bla")
|
58
|
+
end
|
59
|
+
end
|
data/version.txt
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.5.0
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: highscore
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.5.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-02-
|
12
|
+
date: 2012-02-20 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: bones
|
16
|
-
requirement: &
|
16
|
+
requirement: &70127917245680 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
@@ -21,7 +21,7 @@ dependencies:
|
|
21
21
|
version: 3.7.3
|
22
22
|
type: :development
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *70127917245680
|
25
25
|
description: Find and rank keywords in long texts.
|
26
26
|
email: liebler.dominik@googlemail.com
|
27
27
|
executables:
|
@@ -46,12 +46,16 @@ files:
|
|
46
46
|
- lib/highscore/keyword.rb
|
47
47
|
- lib/highscore/keywords.rb
|
48
48
|
- lib/highscore/string.rb
|
49
|
+
- lib/highscore/whitelist.rb
|
50
|
+
- lib/highscore/wordlist.rb
|
49
51
|
- test/fixtures/blacklist.txt
|
50
52
|
- test/highscore/test_blacklist.rb
|
51
53
|
- test/highscore/test_content.rb
|
52
54
|
- test/highscore/test_keyword.rb
|
53
55
|
- test/highscore/test_keywords.rb
|
54
56
|
- test/highscore/test_string.rb
|
57
|
+
- test/highscore/test_whitelist.rb
|
58
|
+
- test/highscore/test_wordlist.rb
|
55
59
|
- test/test_highscore.rb
|
56
60
|
- version.txt
|
57
61
|
homepage: http://thewebdev.de
|
@@ -76,7 +80,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
76
80
|
version: '0'
|
77
81
|
requirements: []
|
78
82
|
rubyforge_project: highscore
|
79
|
-
rubygems_version: 1.8.
|
83
|
+
rubygems_version: 1.8.16
|
80
84
|
signing_key:
|
81
85
|
specification_version: 3
|
82
86
|
summary: Find and rank keywords in long texts.
|
@@ -86,4 +90,6 @@ test_files:
|
|
86
90
|
- test/highscore/test_keyword.rb
|
87
91
|
- test/highscore/test_keywords.rb
|
88
92
|
- test/highscore/test_string.rb
|
93
|
+
- test/highscore/test_whitelist.rb
|
94
|
+
- test/highscore/test_wordlist.rb
|
89
95
|
- test/test_highscore.rb
|