highscore 0.4.3 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +7 -0
- data/README.md +10 -0
- data/bin/highscore +12 -2
- data/lib/highscore/blacklist.rb +5 -59
- data/lib/highscore/content.rb +53 -13
- data/lib/highscore/keyword.rb +7 -0
- data/lib/highscore/keywords.rb +28 -9
- data/lib/highscore/string.rb +10 -3
- data/lib/highscore/whitelist.rb +10 -0
- data/lib/highscore/wordlist.rb +76 -0
- data/test/highscore/test_blacklist.rb +10 -58
- data/test/highscore/test_content.rb +8 -0
- data/test/highscore/test_whitelist.rb +17 -0
- data/test/highscore/test_wordlist.rb +59 -0
- data/version.txt +1 -1
- metadata +11 -5
data/History.txt
CHANGED
@@ -1,3 +1,10 @@
|
|
1
|
+
== 0.5.0 / 2012-02-20
|
2
|
+
|
3
|
+
* now supports a whitelist approach
|
4
|
+
* <= 2 char words are ignored by default, but is now configurable
|
5
|
+
* refactored a huge part of the API
|
6
|
+
* added some more inline-documentation
|
7
|
+
|
1
8
|
== 0.4.3 / 2012-02-11
|
2
9
|
|
3
10
|
* rate words based on the amount of vowels and consonants
|
data/README.md
CHANGED
@@ -10,6 +10,7 @@ Features
|
|
10
10
|
* rate based on amount (%) of vowels and consonants in a string
|
11
11
|
* directly get keywords from String objects
|
12
12
|
* blacklist words via a plain text file, String or an Array of words
|
13
|
+
* optionally, configure a whitelist and only words from that list will get ranked
|
13
14
|
* merge together Keywords from multiple sources
|
14
15
|
* contains a CLI tool that operates on STDIN/OUT and is configurable via parameters
|
15
16
|
|
@@ -79,6 +80,15 @@ blacklist = Highscore::Blacklist.load_default_file
|
|
79
80
|
content = Highscore::Content.new "a string", blacklist
|
80
81
|
```
|
81
82
|
|
83
|
+
Using a whitelist instead of ranking all words
|
84
|
+
----------------------------------------------
|
85
|
+
|
86
|
+
```ruby
|
87
|
+
# construct and inject it just like a blacklist
|
88
|
+
whitelist = Highscore::Whitelist.load %w{these are valid keywords}
|
89
|
+
content = Highscore::Content.new "invalid words", whitelist
|
90
|
+
```
|
91
|
+
|
82
92
|
Install
|
83
93
|
-------
|
84
94
|
|
data/bin/highscore
CHANGED
@@ -18,7 +18,12 @@ optparse = OptionParser.new do |opts|
|
|
18
18
|
|
19
19
|
# blacklist file
|
20
20
|
opts.on('-b', '--blacklist FILEPATH', 'specify a blacklist file that should be used instead of the default') do |filepath|
|
21
|
-
options[:
|
21
|
+
options[:wordlist] = Highscore::Blacklist.load_file(filepath)
|
22
|
+
end
|
23
|
+
|
24
|
+
# whitelist file
|
25
|
+
opts.on('-w', '--whitelist FILEPATH', 'specify a whitelist file') do |filepath|
|
26
|
+
options[:wordlist] = Highscore::Whitelist.load_file(filepath)
|
22
27
|
end
|
23
28
|
|
24
29
|
# general multiplier
|
@@ -26,6 +31,11 @@ optparse = OptionParser.new do |opts|
|
|
26
31
|
options[:emphasis][:multiplier] = multiplier.to_f
|
27
32
|
end
|
28
33
|
|
34
|
+
# don't print rank weight
|
35
|
+
opts.on('--no-ignore-short', 'don\'t ignore short words (<= 2 chars)') do
|
36
|
+
options[:emphasis][:ignore_short_words] = false
|
37
|
+
end
|
38
|
+
|
29
39
|
# don't print rank weight
|
30
40
|
opts.on('-s', '--short', 'don\'t print rank weight') do
|
31
41
|
options[:short] = true
|
@@ -64,7 +74,7 @@ end
|
|
64
74
|
optparse.parse!
|
65
75
|
|
66
76
|
text = STDIN.read.to_s
|
67
|
-
keywords = text.keywords(options[:
|
77
|
+
keywords = text.keywords(options[:wordlist]) do |content|
|
68
78
|
options[:emphasis].each do |key,value|
|
69
79
|
content.set key, value
|
70
80
|
end
|
data/lib/highscore/blacklist.rb
CHANGED
@@ -1,72 +1,18 @@
|
|
1
|
+
$:.unshift(File.dirname(__FILE__))
|
2
|
+
require "wordlist"
|
3
|
+
|
1
4
|
module Highscore
|
2
5
|
|
3
6
|
# blacklisted words to be ignored in the resulting keywords
|
4
7
|
#
|
5
|
-
class Blacklist
|
6
|
-
include Enumerable
|
7
|
-
|
8
|
-
# load a file of keywords
|
9
|
-
def self.load_file file_path
|
10
|
-
words = File.read(file_path).split(' ')
|
11
|
-
self.load(words)
|
12
|
-
end
|
8
|
+
class Blacklist < Wordlist
|
13
9
|
|
14
10
|
# load default file
|
15
11
|
#
|
12
|
+
# @return Highscore::Blacklist
|
16
13
|
def self.load_default_file
|
17
14
|
file_path = File.join(File.dirname(__FILE__), %w{.. blacklist.txt})
|
18
15
|
self.load_file(file_path)
|
19
16
|
end
|
20
|
-
|
21
|
-
# load a file or array of words
|
22
|
-
#
|
23
|
-
def self.load(data)
|
24
|
-
if data.instance_of?(String)
|
25
|
-
words = data.split(' ')
|
26
|
-
elsif data.instance_of? Array
|
27
|
-
words = data
|
28
|
-
else
|
29
|
-
raise ArgumentError, "don't know how to handle a %s class" % data.class
|
30
|
-
end
|
31
|
-
|
32
|
-
words.map! {|x| x.gsub(/[\!\.\:\,\;\-\+]/, '') }
|
33
|
-
|
34
|
-
self.new(words)
|
35
|
-
end
|
36
|
-
|
37
|
-
attr_reader :words
|
38
|
-
|
39
|
-
def initialize(words = [])
|
40
|
-
@words = words
|
41
|
-
end
|
42
|
-
|
43
|
-
# iterate over words
|
44
|
-
#
|
45
|
-
def each
|
46
|
-
@words.each {|x| yield x }
|
47
|
-
end
|
48
|
-
|
49
|
-
# count of ignored words
|
50
|
-
def length
|
51
|
-
@words.length
|
52
|
-
end
|
53
|
-
|
54
|
-
# get an array of blacklisted words
|
55
|
-
#
|
56
|
-
def to_a
|
57
|
-
@words.to_a
|
58
|
-
end
|
59
|
-
|
60
|
-
# does the blacklist contain this keyword?
|
61
|
-
#
|
62
|
-
def include? keyword
|
63
|
-
@words.include? keyword
|
64
|
-
end
|
65
|
-
|
66
|
-
# add a new word to the blacklist
|
67
|
-
def <<(word)
|
68
|
-
@words << word
|
69
|
-
end
|
70
|
-
|
71
17
|
end
|
72
18
|
end
|
data/lib/highscore/content.rb
CHANGED
@@ -5,57 +5,89 @@ module Highscore
|
|
5
5
|
class Content
|
6
6
|
attr_reader :content
|
7
7
|
|
8
|
-
|
8
|
+
# @param content String
|
9
|
+
# @param wordlist Highscore::Wordlist
|
10
|
+
def initialize(content, wordlist = nil)
|
9
11
|
@content = content
|
10
|
-
|
11
|
-
|
12
|
-
|
12
|
+
@whitelist = @blacklist = nil
|
13
|
+
|
14
|
+
if wordlist.nil?
|
15
|
+
@blacklist = Highscore::Blacklist.load_default_file
|
16
|
+
elsif wordlist.kind_of? Highscore::Blacklist
|
17
|
+
@blacklist = wordlist
|
18
|
+
else
|
19
|
+
@whitelist = wordlist
|
13
20
|
end
|
14
21
|
|
15
|
-
@blacklist = blacklist
|
16
|
-
|
17
22
|
@emphasis = {
|
18
23
|
:multiplier => 1.0,
|
19
24
|
:upper_case => 3.0,
|
20
25
|
:long_words => 2.0,
|
21
26
|
:long_words_threshold => 15,
|
22
27
|
:vowels => 0,
|
23
|
-
:consonants => 0
|
28
|
+
:consonants => 0,
|
29
|
+
:ignore_short_words => true
|
24
30
|
}
|
25
31
|
end
|
26
32
|
|
27
33
|
# configure ranking
|
28
34
|
#
|
35
|
+
# @param block
|
29
36
|
def configure(&block)
|
30
37
|
instance_eval(&block)
|
31
38
|
end
|
32
39
|
|
33
40
|
# set emphasis options to rank the content
|
34
41
|
#
|
42
|
+
# @param key Symbol
|
43
|
+
# @param value Object
|
35
44
|
def set(key, value)
|
36
|
-
@emphasis[key.to_sym] = value
|
45
|
+
@emphasis[key.to_sym] = value
|
37
46
|
end
|
38
47
|
|
39
48
|
# get the ranked keywords
|
40
49
|
#
|
41
|
-
#
|
42
|
-
# keywords -> Keywords
|
43
|
-
#
|
50
|
+
# @return Highscore::Keywords
|
44
51
|
def keywords
|
45
52
|
keywords = Keywords.new
|
46
53
|
|
47
|
-
Keywords.find_keywords(@content,
|
54
|
+
Keywords.find_keywords(@content, wordlist).each do |text|
|
48
55
|
text = text.to_s
|
49
|
-
|
56
|
+
|
57
|
+
if not (text.match(/^[\d]+(\.[\d]+){0,1}$/) or text.length <= 2)
|
58
|
+
keywords << Highscore::Keyword.new(text, weight(text))
|
59
|
+
elsif allow_short_words
|
60
|
+
keywords << Highscore::Keyword.new(text, weight(text))
|
61
|
+
end
|
50
62
|
end
|
51
63
|
|
52
64
|
keywords
|
53
65
|
end
|
54
66
|
|
67
|
+
# get the used wordlist
|
68
|
+
#
|
69
|
+
# @return Highscore::Wordlist
|
70
|
+
def wordlist
|
71
|
+
unless @whitelist.nil?
|
72
|
+
@whitelist
|
73
|
+
else
|
74
|
+
@blacklist
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
55
78
|
private
|
56
79
|
|
80
|
+
# allow short words to be rated
|
81
|
+
#
|
82
|
+
# @return TrueClass FalseClass
|
83
|
+
def allow_short_words
|
84
|
+
not @emphasis[:ignore_short_words]
|
85
|
+
end
|
86
|
+
|
57
87
|
# weight a single text keyword
|
58
88
|
#
|
89
|
+
# @param text String
|
90
|
+
# @return Float
|
59
91
|
def weight(text)
|
60
92
|
weight = @emphasis[:multiplier]
|
61
93
|
|
@@ -72,11 +104,19 @@ module Highscore
|
|
72
104
|
weight
|
73
105
|
end
|
74
106
|
|
107
|
+
# weight the vowels on a text
|
108
|
+
#
|
109
|
+
# @param text String
|
110
|
+
# @return Float
|
75
111
|
def vowels(text)
|
76
112
|
percent = text.vowels.length / text.length.to_f
|
77
113
|
percent * @emphasis[:vowels]
|
78
114
|
end
|
79
115
|
|
116
|
+
# weight the consonants on a text
|
117
|
+
#
|
118
|
+
# @param text String
|
119
|
+
# @return Float
|
80
120
|
def consonants(text)
|
81
121
|
percent = text.consonants.length / text.length.to_f
|
82
122
|
percent * @emphasis[:consonants]
|
data/lib/highscore/keyword.rb
CHANGED
@@ -6,17 +6,24 @@ module Highscore
|
|
6
6
|
attr_accessor :weight, :text
|
7
7
|
|
8
8
|
# init a keyword
|
9
|
+
#
|
10
|
+
# @param text String
|
11
|
+
# @param weight Float
|
9
12
|
def initialize(text, weight)
|
10
13
|
@text = text
|
11
14
|
@weight = weight.to_f
|
12
15
|
end
|
13
16
|
|
14
17
|
# sort keywords
|
18
|
+
#
|
19
|
+
# @param other Highscore::Keyword
|
15
20
|
def <=>(other)
|
16
21
|
other.weight <=> @weight
|
17
22
|
end
|
18
23
|
|
19
24
|
# get the string
|
25
|
+
#
|
26
|
+
# @return String
|
20
27
|
def to_s
|
21
28
|
@text
|
22
29
|
end
|
data/lib/highscore/keywords.rb
CHANGED
@@ -9,14 +9,19 @@ module Highscore
|
|
9
9
|
include Enumerable
|
10
10
|
|
11
11
|
# find keywords in a piece of content
|
12
|
-
|
12
|
+
#
|
13
|
+
# @param content String
|
14
|
+
# @param wordlist Highscore::Wordlist
|
15
|
+
# @return Highscore::Keywords
|
16
|
+
def self.find_keywords content, wordlist
|
13
17
|
keywords = content.to_s.scan(/\w+/)
|
14
|
-
keywords.delete_if do |x|
|
15
|
-
x.match(/^[\d]+(\.[\d]+){0,1}$/) or x.length <= 2
|
16
|
-
end
|
17
18
|
|
18
19
|
keywords.delete_if do |key, value|
|
19
|
-
|
20
|
+
if wordlist.kind_of? Highscore::Blacklist
|
21
|
+
wordlist.include?(key.downcase)
|
22
|
+
elsif wordlist.kind_of? Highscore::Whitelist
|
23
|
+
not wordlist.include?(key.downcase)
|
24
|
+
end
|
20
25
|
end
|
21
26
|
|
22
27
|
keywords.sort
|
@@ -31,21 +36,23 @@ module Highscore
|
|
31
36
|
# ranks the keywords and removes keywords that have a low ranking
|
32
37
|
# or are blacklisted
|
33
38
|
#
|
34
|
-
#
|
35
|
-
# rank -> array
|
36
|
-
#
|
39
|
+
# @return Array
|
37
40
|
def rank
|
38
41
|
sort
|
39
42
|
end
|
40
43
|
|
41
44
|
# get the top n keywords
|
42
45
|
#
|
46
|
+
# @param n Fixnum
|
47
|
+
# @return Array
|
43
48
|
def top n = 10
|
44
49
|
rank[0..(n - 1)]
|
45
50
|
end
|
46
51
|
|
47
52
|
# add new keywords
|
48
53
|
#
|
54
|
+
# @param keyword String
|
55
|
+
# @return Highscore::Keywords
|
49
56
|
def <<(keyword)
|
50
57
|
key = Digest::SHA1.hexdigest(keyword.text)
|
51
58
|
|
@@ -54,9 +61,13 @@ module Highscore
|
|
54
61
|
else
|
55
62
|
@keywords[key] = keyword
|
56
63
|
end
|
64
|
+
|
65
|
+
@keywords
|
57
66
|
end
|
58
67
|
|
59
68
|
# sort
|
69
|
+
#
|
70
|
+
# @return Array
|
60
71
|
def sort
|
61
72
|
sorted = @keywords.sort {|a,b| a[1] <=> b[1] }
|
62
73
|
|
@@ -66,26 +77,34 @@ module Highscore
|
|
66
77
|
|
67
78
|
# Enumerable
|
68
79
|
#
|
69
|
-
def each
|
80
|
+
def each
|
70
81
|
@keywords.each {|keyword| yield keyword[1] }
|
71
82
|
end
|
72
83
|
|
73
84
|
# number of Keywords given
|
85
|
+
#
|
86
|
+
# @return Fixnum
|
74
87
|
def length
|
75
88
|
@keywords.length
|
76
89
|
end
|
77
90
|
|
78
91
|
# get the keyword with the highest rank
|
92
|
+
#
|
93
|
+
# @return Highscore::Keyword
|
79
94
|
def first
|
80
95
|
sort.first
|
81
96
|
end
|
82
97
|
|
83
98
|
# get the keyword with the lowest rank
|
99
|
+
#
|
100
|
+
# @return Highscore::Keyword
|
84
101
|
def last
|
85
102
|
sort.reverse.first
|
86
103
|
end
|
87
104
|
|
88
105
|
# merge in another keyword list, operates on self
|
106
|
+
#
|
107
|
+
# @return Highscore::Keywords
|
89
108
|
def merge!(other)
|
90
109
|
other.each do |keyword|
|
91
110
|
self << keyword
|
data/lib/highscore/string.rb
CHANGED
@@ -1,11 +1,14 @@
|
|
1
|
-
# monkey patch to
|
1
|
+
# monkey patch to call custom methods on arbitrary strings
|
2
2
|
#
|
3
3
|
class String
|
4
4
|
|
5
5
|
# get keywords from the string
|
6
6
|
#
|
7
|
-
|
8
|
-
|
7
|
+
# @param wordlist Highscore::Wordlist
|
8
|
+
# @param block Block
|
9
|
+
# @return Highscore::Keywords
|
10
|
+
def keywords(wordlist = nil, &block)
|
11
|
+
content = Highscore::Content.new(self, wordlist)
|
9
12
|
|
10
13
|
if block_given?
|
11
14
|
content.configure do
|
@@ -17,11 +20,15 @@ class String
|
|
17
20
|
end
|
18
21
|
|
19
22
|
# get all vowels from a string
|
23
|
+
#
|
24
|
+
# @return String
|
20
25
|
def vowels
|
21
26
|
gsub(/[^aeiou]/, '')
|
22
27
|
end
|
23
28
|
|
24
29
|
# get all consonants from a string
|
30
|
+
#
|
31
|
+
# @return String
|
25
32
|
def consonants
|
26
33
|
gsub(/[aeiou]/, '')
|
27
34
|
end
|
@@ -0,0 +1,76 @@
|
|
1
|
+
module Highscore
|
2
|
+
|
3
|
+
# a basic list of words
|
4
|
+
class Wordlist
|
5
|
+
include Enumerable
|
6
|
+
|
7
|
+
# load a file of keywords
|
8
|
+
#
|
9
|
+
# @param file_path String
|
10
|
+
# @return Highscore::Wordlist
|
11
|
+
def self.load_file file_path
|
12
|
+
words = File.read(file_path).split(' ')
|
13
|
+
self.load(words)
|
14
|
+
end
|
15
|
+
|
16
|
+
# load a file or array of words
|
17
|
+
#
|
18
|
+
# @param data String Array
|
19
|
+
# @return Highscore::Wordlist
|
20
|
+
def self.load(data)
|
21
|
+
if data.instance_of?(String)
|
22
|
+
words = data.split(' ')
|
23
|
+
elsif data.instance_of? Array
|
24
|
+
words = data
|
25
|
+
else
|
26
|
+
raise ArgumentError, "don't know how to handle a %s class" % data.class
|
27
|
+
end
|
28
|
+
|
29
|
+
words.map! {|x| x.gsub(/[\!\.\:\,\;\-\+]/, '') }
|
30
|
+
|
31
|
+
self.new(words)
|
32
|
+
end
|
33
|
+
|
34
|
+
attr_reader :words
|
35
|
+
|
36
|
+
# @param words Array
|
37
|
+
def initialize(words = [])
|
38
|
+
@words = words
|
39
|
+
end
|
40
|
+
|
41
|
+
# iterate over words
|
42
|
+
#
|
43
|
+
def each
|
44
|
+
@words.each {|word| yield word }
|
45
|
+
end
|
46
|
+
|
47
|
+
# count of ignored words
|
48
|
+
#
|
49
|
+
# @return Fixnum
|
50
|
+
def length
|
51
|
+
@words.length
|
52
|
+
end
|
53
|
+
|
54
|
+
# get an array of blacklisted words
|
55
|
+
#
|
56
|
+
# @return Array
|
57
|
+
def to_a
|
58
|
+
@words.to_a
|
59
|
+
end
|
60
|
+
|
61
|
+
# does the blacklist contain this keyword?
|
62
|
+
#
|
63
|
+
# @param keyword String
|
64
|
+
# @return true/false
|
65
|
+
def include? keyword
|
66
|
+
@words.include? keyword
|
67
|
+
end
|
68
|
+
|
69
|
+
# add a new word to the blacklist
|
70
|
+
#
|
71
|
+
# @param word String
|
72
|
+
def <<(word)
|
73
|
+
@words << word
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
@@ -3,11 +3,9 @@ require "blacklist"
|
|
3
3
|
require "test/unit"
|
4
4
|
|
5
5
|
class TestBlacklist < Test::Unit::TestCase
|
6
|
-
def
|
7
|
-
|
8
|
-
blacklist
|
9
|
-
|
10
|
-
assert_equal 6, blacklist.length
|
6
|
+
def test_is_a_wordlist
|
7
|
+
blacklist = Highscore::Blacklist.new
|
8
|
+
assert blacklist.kind_of? Highscore::Wordlist
|
11
9
|
end
|
12
10
|
|
13
11
|
def test_load_default_file
|
@@ -15,62 +13,16 @@ class TestBlacklist < Test::Unit::TestCase
|
|
15
13
|
assert_equal 42, blacklist.length
|
16
14
|
end
|
17
15
|
|
18
|
-
def test_load_file_fail
|
19
|
-
assert_raises(Errno::ENOENT) do
|
20
|
-
Highscore::Blacklist.load_file('foobar')
|
21
|
-
end
|
22
|
-
end
|
23
|
-
|
24
|
-
def test_empty_blacklist
|
25
|
-
blacklist = Highscore::Blacklist.new
|
26
|
-
assert_equal 0, blacklist.length
|
27
|
-
end
|
28
|
-
|
29
|
-
def test_add_new_word
|
30
|
-
blacklist = Highscore::Blacklist.new
|
31
|
-
blacklist << 'foo'
|
32
|
-
|
33
|
-
assert_equal ['foo'], blacklist.words
|
34
|
-
end
|
35
|
-
|
36
|
-
def test_load_string_and_remove_special_chars
|
37
|
-
blacklist = Highscore::Blacklist.load "this is an awesome string!"
|
38
|
-
assert_equal 5, blacklist.length
|
39
|
-
|
40
|
-
assert_equal ['this', 'is', 'an', 'awesome', 'string'], blacklist.to_a
|
41
|
-
end
|
42
|
-
|
43
|
-
def test_load_array
|
44
|
-
words = ['foo', 'bar', 'baz']
|
45
|
-
|
46
|
-
blacklist = Highscore::Blacklist.load(words)
|
47
|
-
|
48
|
-
assert_equal words, blacklist.words
|
49
|
-
end
|
50
|
-
|
51
|
-
def test_load_unknown_type
|
52
|
-
assert_raises ArgumentError do
|
53
|
-
Highscore::Blacklist.load(1)
|
54
|
-
end
|
55
|
-
end
|
56
|
-
|
57
|
-
def test_include?
|
58
|
-
blacklist = Highscore::Blacklist.load "foobar baz"
|
59
|
-
|
60
|
-
assert blacklist.include?("foobar")
|
61
|
-
assert !blacklist.include?("bla")
|
62
|
-
end
|
63
|
-
|
64
16
|
def test_blacklisting_content
|
65
|
-
|
17
|
+
keywords = "Foo bar is not bar baz".keywords(Highscore::Blacklist.load(%w(baz)))
|
66
18
|
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
19
|
+
keyword_list = []
|
20
|
+
keywords.rank.each do |k|
|
21
|
+
keyword_list << k.text
|
22
|
+
end
|
71
23
|
|
72
|
-
|
24
|
+
expected_keywords = %w(Foo bar not)
|
73
25
|
|
74
|
-
|
26
|
+
assert_equal expected_keywords, keyword_list
|
75
27
|
end
|
76
28
|
end
|
@@ -42,4 +42,12 @@ class TestContent < Test::Unit::TestCase
|
|
42
42
|
assert_equal 3.75, keywords.first.weight
|
43
43
|
assert_equal 3.5, keywords.last.weight
|
44
44
|
end
|
45
|
+
|
46
|
+
def test_rank_short_words
|
47
|
+
keywords = 'be as is foobar'.keywords do
|
48
|
+
set :ignore_short_words, false
|
49
|
+
end
|
50
|
+
|
51
|
+
assert_equal 4, keywords.length
|
52
|
+
end
|
45
53
|
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
$:.unshift(File.join(File.dirname(__FILE__), %w{.. .. lib highscore}))
|
2
|
+
require "whitelist"
|
3
|
+
require "test/unit"
|
4
|
+
|
5
|
+
class TestBlacklist < Test::Unit::TestCase
|
6
|
+
def test_is_wordlist
|
7
|
+
whitelist = Highscore::Whitelist.new
|
8
|
+
assert whitelist.kind_of? Highscore::Wordlist
|
9
|
+
end
|
10
|
+
|
11
|
+
def test_whitelist_content
|
12
|
+
whitelist = Highscore::Whitelist.load %w{foo bar}
|
13
|
+
|
14
|
+
content = Highscore::Content.new "foo baz bar", whitelist
|
15
|
+
assert_equal 2, content.keywords.length
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1,59 @@
|
|
1
|
+
$:.unshift(File.join(File.dirname(__FILE__), %w{.. .. lib highscore}))
|
2
|
+
require "wordlist"
|
3
|
+
require "test/unit"
|
4
|
+
|
5
|
+
class TestBlacklist < Test::Unit::TestCase
|
6
|
+
|
7
|
+
def test_load_file
|
8
|
+
file_path = File.join(File.dirname(__FILE__), %w{.. fixtures blacklist.txt})
|
9
|
+
blacklist = Highscore::Wordlist.load_file(file_path)
|
10
|
+
|
11
|
+
assert_equal 6, blacklist.length
|
12
|
+
end
|
13
|
+
|
14
|
+
def test_load_file_fail
|
15
|
+
assert_raises(Errno::ENOENT) do
|
16
|
+
Highscore::Blacklist.load_file('foobar')
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
def test_empty_blacklist
|
21
|
+
blacklist = Highscore::Wordlist.new
|
22
|
+
assert_equal 0, blacklist.length
|
23
|
+
end
|
24
|
+
|
25
|
+
def test_add_new_word
|
26
|
+
blacklist = Highscore::Wordlist.new
|
27
|
+
blacklist << 'foo'
|
28
|
+
|
29
|
+
assert_equal %w(foo), blacklist.words
|
30
|
+
end
|
31
|
+
|
32
|
+
def test_load_string_and_remove_special_chars
|
33
|
+
blacklist = Highscore::Wordlist.load "this is an awesome string!"
|
34
|
+
assert_equal 5, blacklist.length
|
35
|
+
|
36
|
+
assert_equal %w{this is an awesome string}, blacklist.to_a
|
37
|
+
end
|
38
|
+
|
39
|
+
def test_load_array
|
40
|
+
words = %w{foo bar baz}
|
41
|
+
|
42
|
+
blacklist = Highscore::Wordlist.load(words)
|
43
|
+
|
44
|
+
assert_equal words, blacklist.words
|
45
|
+
end
|
46
|
+
|
47
|
+
def test_load_unknown_type
|
48
|
+
assert_raises ArgumentError do
|
49
|
+
Highscore::Wordlist.load(1)
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
def test_include?
|
54
|
+
blacklist = Highscore::Wordlist.load "foobar baz"
|
55
|
+
|
56
|
+
assert blacklist.include?("foobar")
|
57
|
+
assert !blacklist.include?("bla")
|
58
|
+
end
|
59
|
+
end
|
data/version.txt
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.5.0
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: highscore
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.5.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-02-
|
12
|
+
date: 2012-02-20 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: bones
|
16
|
-
requirement: &
|
16
|
+
requirement: &70127917245680 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
@@ -21,7 +21,7 @@ dependencies:
|
|
21
21
|
version: 3.7.3
|
22
22
|
type: :development
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *70127917245680
|
25
25
|
description: Find and rank keywords in long texts.
|
26
26
|
email: liebler.dominik@googlemail.com
|
27
27
|
executables:
|
@@ -46,12 +46,16 @@ files:
|
|
46
46
|
- lib/highscore/keyword.rb
|
47
47
|
- lib/highscore/keywords.rb
|
48
48
|
- lib/highscore/string.rb
|
49
|
+
- lib/highscore/whitelist.rb
|
50
|
+
- lib/highscore/wordlist.rb
|
49
51
|
- test/fixtures/blacklist.txt
|
50
52
|
- test/highscore/test_blacklist.rb
|
51
53
|
- test/highscore/test_content.rb
|
52
54
|
- test/highscore/test_keyword.rb
|
53
55
|
- test/highscore/test_keywords.rb
|
54
56
|
- test/highscore/test_string.rb
|
57
|
+
- test/highscore/test_whitelist.rb
|
58
|
+
- test/highscore/test_wordlist.rb
|
55
59
|
- test/test_highscore.rb
|
56
60
|
- version.txt
|
57
61
|
homepage: http://thewebdev.de
|
@@ -76,7 +80,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
76
80
|
version: '0'
|
77
81
|
requirements: []
|
78
82
|
rubyforge_project: highscore
|
79
|
-
rubygems_version: 1.8.
|
83
|
+
rubygems_version: 1.8.16
|
80
84
|
signing_key:
|
81
85
|
specification_version: 3
|
82
86
|
summary: Find and rank keywords in long texts.
|
@@ -86,4 +90,6 @@ test_files:
|
|
86
90
|
- test/highscore/test_keyword.rb
|
87
91
|
- test/highscore/test_keywords.rb
|
88
92
|
- test/highscore/test_string.rb
|
93
|
+
- test/highscore/test_whitelist.rb
|
94
|
+
- test/highscore/test_wordlist.rb
|
89
95
|
- test/test_highscore.rb
|