highscore 0.3.1 → 0.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +4 -0
- data/README.md +21 -0
- data/lib/blacklist.txt +3 -0
- data/lib/highscore/blacklist.rb +72 -0
- data/lib/highscore/content.rb +8 -2
- data/lib/highscore/keywords.rb +2 -2
- data/test/fixtures/blacklist.txt +2 -0
- data/test/highscore/test_blacklist.rb +63 -0
- data/version.txt +1 -1
- metadata +11 -4
data/History.txt
CHANGED
data/README.md
CHANGED
@@ -29,6 +29,27 @@ Examples
|
|
29
29
|
end
|
30
30
|
|
31
31
|
|
32
|
+
Using a custom blacklist to ignore keywords
|
33
|
+
-------------------------------------------
|
34
|
+
|
35
|
+
# setting single words
|
36
|
+
blacklist = Highscore::Blacklist.new
|
37
|
+
blacklist << 'foo'
|
38
|
+
|
39
|
+
# load a string/array
|
40
|
+
blacklist = Highscore::Blacklist.load "a string"
|
41
|
+
blacklist = Highscore::Blacklist.load %w{an array}
|
42
|
+
|
43
|
+
# loading from a file (separated by whitespace)
|
44
|
+
blacklist = Highscore::Blacklist.load_file "blacklist.txt"
|
45
|
+
|
46
|
+
# loading the default blacklist (falls back automatically if not explicit given)
|
47
|
+
blacklist = Highscore::Blacklist.load_default_file
|
48
|
+
|
49
|
+
# inject the blacklist into the content class
|
50
|
+
content = Highscore::Content.new "a string", blacklist
|
51
|
+
|
52
|
+
|
32
53
|
Requirements
|
33
54
|
------------
|
34
55
|
|
data/lib/blacklist.txt
ADDED
@@ -0,0 +1,72 @@
|
|
1
|
+
module Highscore
|
2
|
+
|
3
|
+
# blacklisted words to be ignored in the resulting keywords
|
4
|
+
#
|
5
|
+
class Blacklist
|
6
|
+
include Enumerable
|
7
|
+
|
8
|
+
# load a file of keywords
|
9
|
+
def self.load_file file_path
|
10
|
+
words = File.read(file_path).split(' ')
|
11
|
+
self.load(words)
|
12
|
+
end
|
13
|
+
|
14
|
+
# load default file
|
15
|
+
#
|
16
|
+
def self.load_default_file
|
17
|
+
file_path = File.join(File.dirname(__FILE__), %w{.. blacklist.txt})
|
18
|
+
self.load_file(file_path)
|
19
|
+
end
|
20
|
+
|
21
|
+
# load a file or array of words
|
22
|
+
#
|
23
|
+
def self.load(data)
|
24
|
+
if data.instance_of?(String)
|
25
|
+
words = data.split(' ')
|
26
|
+
elsif data.instance_of? Array
|
27
|
+
words = data
|
28
|
+
else
|
29
|
+
raise ArgumentError, "don't know how to handle a %s class" % data.class
|
30
|
+
end
|
31
|
+
|
32
|
+
words.map! {|x| x.gsub(/[\!\.\:\,\;\-\+]/, '') }
|
33
|
+
|
34
|
+
self.new(words)
|
35
|
+
end
|
36
|
+
|
37
|
+
attr_reader :words
|
38
|
+
|
39
|
+
def initialize(words = [])
|
40
|
+
@words = words
|
41
|
+
end
|
42
|
+
|
43
|
+
# iterate over words
|
44
|
+
#
|
45
|
+
def each
|
46
|
+
@words.each {|x| yield x }
|
47
|
+
end
|
48
|
+
|
49
|
+
# count of ignored words
|
50
|
+
def length
|
51
|
+
@words.length
|
52
|
+
end
|
53
|
+
|
54
|
+
# get an array of blacklisted words
|
55
|
+
#
|
56
|
+
def to_a
|
57
|
+
@words.to_a
|
58
|
+
end
|
59
|
+
|
60
|
+
# does the blacklist contain this keyword?
|
61
|
+
#
|
62
|
+
def include? keyword
|
63
|
+
@words.include? keyword
|
64
|
+
end
|
65
|
+
|
66
|
+
# add a new word to the blacklist
|
67
|
+
def <<(word)
|
68
|
+
@words << word
|
69
|
+
end
|
70
|
+
|
71
|
+
end
|
72
|
+
end
|
data/lib/highscore/content.rb
CHANGED
@@ -5,9 +5,15 @@ module Highscore
|
|
5
5
|
class Content
|
6
6
|
attr_reader :content
|
7
7
|
|
8
|
-
def initialize
|
8
|
+
def initialize(content, blacklist = nil)
|
9
9
|
@content = content
|
10
10
|
|
11
|
+
unless blacklist
|
12
|
+
blacklist = Highscore::Blacklist.load_default_file
|
13
|
+
end
|
14
|
+
|
15
|
+
@blacklist = blacklist
|
16
|
+
|
11
17
|
@emphasis = {
|
12
18
|
:multiplier => 1.0,
|
13
19
|
:upper_case => 3.0,
|
@@ -36,7 +42,7 @@ module Highscore
|
|
36
42
|
def keywords
|
37
43
|
keywords = Keywords.new
|
38
44
|
|
39
|
-
Keywords.find_keywords(@content).each do |text|
|
45
|
+
Keywords.find_keywords(@content, @blacklist).each do |text|
|
40
46
|
weight = @emphasis[:multiplier]
|
41
47
|
|
42
48
|
if text.length >= @emphasis[:long_words_threshold]
|
data/lib/highscore/keywords.rb
CHANGED
@@ -9,14 +9,14 @@ module Highscore
|
|
9
9
|
include Enumerable
|
10
10
|
|
11
11
|
# find keywords in a piece of content
|
12
|
-
def self.find_keywords content
|
12
|
+
def self.find_keywords content, blacklist
|
13
13
|
keywords = content.scan(/\w+/)
|
14
14
|
keywords.delete_if do |x|
|
15
15
|
x.match(/^[\d]+(\.[\d]+){0,1}$/) or x.length <= 2
|
16
16
|
end
|
17
17
|
|
18
18
|
keywords.delete_if do |key, value|
|
19
|
-
|
19
|
+
blacklist.include?(key.downcase)
|
20
20
|
end
|
21
21
|
|
22
22
|
keywords.sort
|
@@ -0,0 +1,63 @@
|
|
1
|
+
$:.unshift(File.join(File.dirname(__FILE__), %w{.. .. lib highscore}))
|
2
|
+
require "blacklist"
|
3
|
+
require "test/unit"
|
4
|
+
|
5
|
+
class TestBlacklist < Test::Unit::TestCase
|
6
|
+
def test_load_file
|
7
|
+
file_path = File.join(File.dirname(__FILE__), %w{.. fixtures blacklist.txt})
|
8
|
+
blacklist = Highscore::Blacklist.load_file(file_path)
|
9
|
+
|
10
|
+
assert_equal 6, blacklist.length
|
11
|
+
end
|
12
|
+
|
13
|
+
def test_load_default_file
|
14
|
+
blacklist = Highscore::Blacklist.load_default_file
|
15
|
+
assert_equal 42, blacklist.length
|
16
|
+
end
|
17
|
+
|
18
|
+
def test_load_file_fail
|
19
|
+
assert_raises(Errno::ENOENT) do
|
20
|
+
Highscore::Blacklist.load_file('foobar')
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
def test_empty_blacklist
|
25
|
+
blacklist = Highscore::Blacklist.new
|
26
|
+
assert_equal 0, blacklist.length
|
27
|
+
end
|
28
|
+
|
29
|
+
def test_add_new_word
|
30
|
+
blacklist = Highscore::Blacklist.new
|
31
|
+
blacklist << 'foo'
|
32
|
+
|
33
|
+
assert_equal ['foo'], blacklist.words
|
34
|
+
end
|
35
|
+
|
36
|
+
def test_load_string_and_remove_special_chars
|
37
|
+
blacklist = Highscore::Blacklist.load "this is an awesome string!"
|
38
|
+
assert_equal 5, blacklist.length
|
39
|
+
|
40
|
+
assert_equal ['this', 'is', 'an', 'awesome', 'string'], blacklist.to_a
|
41
|
+
end
|
42
|
+
|
43
|
+
def test_load_array
|
44
|
+
words = ['foo', 'bar', 'baz']
|
45
|
+
|
46
|
+
blacklist = Highscore::Blacklist.load(words)
|
47
|
+
|
48
|
+
assert_equal words, blacklist.words
|
49
|
+
end
|
50
|
+
|
51
|
+
def test_load_unknown_type
|
52
|
+
assert_raises ArgumentError do
|
53
|
+
Highscore::Blacklist.load(1)
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
def test_include?
|
58
|
+
blacklist = Highscore::Blacklist.load "foobar baz"
|
59
|
+
|
60
|
+
assert blacklist.include?("foobar")
|
61
|
+
assert !blacklist.include?("bla")
|
62
|
+
end
|
63
|
+
end
|
data/version.txt
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.3.
|
1
|
+
0.3.2
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: highscore
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.2
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-01-
|
12
|
+
date: 2012-01-27 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: bones
|
16
|
-
requirement: &
|
16
|
+
requirement: &70200855600060 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
@@ -21,7 +21,7 @@ dependencies:
|
|
21
21
|
version: 3.7.3
|
22
22
|
type: :development
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *70200855600060
|
25
25
|
description: Rank keywords in long texts.
|
26
26
|
email: liebler.dominik@googlemail.com
|
27
27
|
executables:
|
@@ -30,6 +30,8 @@ extensions: []
|
|
30
30
|
extra_rdoc_files:
|
31
31
|
- History.txt
|
32
32
|
- bin/highscore
|
33
|
+
- lib/blacklist.txt
|
34
|
+
- test/fixtures/blacklist.txt
|
33
35
|
files:
|
34
36
|
- .gitignore
|
35
37
|
- History.txt
|
@@ -37,10 +39,14 @@ files:
|
|
37
39
|
- Rakefile
|
38
40
|
- bin/highscore
|
39
41
|
- highscore.gemspec
|
42
|
+
- lib/blacklist.txt
|
40
43
|
- lib/highscore.rb
|
44
|
+
- lib/highscore/blacklist.rb
|
41
45
|
- lib/highscore/content.rb
|
42
46
|
- lib/highscore/keyword.rb
|
43
47
|
- lib/highscore/keywords.rb
|
48
|
+
- test/fixtures/blacklist.txt
|
49
|
+
- test/highscore/test_blacklist.rb
|
44
50
|
- test/highscore/test_content.rb
|
45
51
|
- test/highscore/test_keyword.rb
|
46
52
|
- test/highscore/test_keywords.rb
|
@@ -73,6 +79,7 @@ signing_key:
|
|
73
79
|
specification_version: 3
|
74
80
|
summary: Rank keywords in long texts.
|
75
81
|
test_files:
|
82
|
+
- test/highscore/test_blacklist.rb
|
76
83
|
- test/highscore/test_content.rb
|
77
84
|
- test/highscore/test_keyword.rb
|
78
85
|
- test/highscore/test_keywords.rb
|