highscore 0.3.1 → 0.3.2
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +4 -0
- data/README.md +21 -0
- data/lib/blacklist.txt +3 -0
- data/lib/highscore/blacklist.rb +72 -0
- data/lib/highscore/content.rb +8 -2
- data/lib/highscore/keywords.rb +2 -2
- data/test/fixtures/blacklist.txt +2 -0
- data/test/highscore/test_blacklist.rb +63 -0
- data/version.txt +1 -1
- metadata +11 -4
data/History.txt
CHANGED
data/README.md
CHANGED
@@ -29,6 +29,27 @@ Examples
|
|
29
29
|
end
|
30
30
|
|
31
31
|
|
32
|
+
Using a custom blacklist to ignore keywords
|
33
|
+
-------------------------------------------
|
34
|
+
|
35
|
+
# setting single words
|
36
|
+
blacklist = Highscore::Blacklist.new
|
37
|
+
blacklist << 'foo'
|
38
|
+
|
39
|
+
# load a string/array
|
40
|
+
blacklist = Highscore::Blacklist.load "a string"
|
41
|
+
blacklist = Highscore::Blacklist.load %w{an array}
|
42
|
+
|
43
|
+
# loading from a file (separated by whitespace)
|
44
|
+
blacklist = Highscore::Blacklist.load_file "blacklist.txt"
|
45
|
+
|
46
|
+
# loading the default blacklist (falls back automatically if not explicit given)
|
47
|
+
blacklist = Highscore::Blacklist.load_default_file
|
48
|
+
|
49
|
+
# inject the blacklist into the content class
|
50
|
+
content = Highscore::Content.new "a string", blacklist
|
51
|
+
|
52
|
+
|
32
53
|
Requirements
|
33
54
|
------------
|
34
55
|
|
data/lib/blacklist.txt
ADDED
@@ -0,0 +1,72 @@
|
|
1
|
+
module Highscore
|
2
|
+
|
3
|
+
# blacklisted words to be ignored in the resulting keywords
|
4
|
+
#
|
5
|
+
class Blacklist
|
6
|
+
include Enumerable
|
7
|
+
|
8
|
+
# load a file of keywords
|
9
|
+
def self.load_file file_path
|
10
|
+
words = File.read(file_path).split(' ')
|
11
|
+
self.load(words)
|
12
|
+
end
|
13
|
+
|
14
|
+
# load default file
|
15
|
+
#
|
16
|
+
def self.load_default_file
|
17
|
+
file_path = File.join(File.dirname(__FILE__), %w{.. blacklist.txt})
|
18
|
+
self.load_file(file_path)
|
19
|
+
end
|
20
|
+
|
21
|
+
# load a file or array of words
|
22
|
+
#
|
23
|
+
def self.load(data)
|
24
|
+
if data.instance_of?(String)
|
25
|
+
words = data.split(' ')
|
26
|
+
elsif data.instance_of? Array
|
27
|
+
words = data
|
28
|
+
else
|
29
|
+
raise ArgumentError, "don't know how to handle a %s class" % data.class
|
30
|
+
end
|
31
|
+
|
32
|
+
words.map! {|x| x.gsub(/[\!\.\:\,\;\-\+]/, '') }
|
33
|
+
|
34
|
+
self.new(words)
|
35
|
+
end
|
36
|
+
|
37
|
+
attr_reader :words
|
38
|
+
|
39
|
+
def initialize(words = [])
|
40
|
+
@words = words
|
41
|
+
end
|
42
|
+
|
43
|
+
# iterate over words
|
44
|
+
#
|
45
|
+
def each
|
46
|
+
@words.each {|x| yield x }
|
47
|
+
end
|
48
|
+
|
49
|
+
# count of ignored words
|
50
|
+
def length
|
51
|
+
@words.length
|
52
|
+
end
|
53
|
+
|
54
|
+
# get an array of blacklisted words
|
55
|
+
#
|
56
|
+
def to_a
|
57
|
+
@words.to_a
|
58
|
+
end
|
59
|
+
|
60
|
+
# does the blacklist contain this keyword?
|
61
|
+
#
|
62
|
+
def include? keyword
|
63
|
+
@words.include? keyword
|
64
|
+
end
|
65
|
+
|
66
|
+
# add a new word to the blacklist
|
67
|
+
def <<(word)
|
68
|
+
@words << word
|
69
|
+
end
|
70
|
+
|
71
|
+
end
|
72
|
+
end
|
data/lib/highscore/content.rb
CHANGED
@@ -5,9 +5,15 @@ module Highscore
|
|
5
5
|
class Content
|
6
6
|
attr_reader :content
|
7
7
|
|
8
|
-
def initialize
|
8
|
+
def initialize(content, blacklist = nil)
|
9
9
|
@content = content
|
10
10
|
|
11
|
+
unless blacklist
|
12
|
+
blacklist = Highscore::Blacklist.load_default_file
|
13
|
+
end
|
14
|
+
|
15
|
+
@blacklist = blacklist
|
16
|
+
|
11
17
|
@emphasis = {
|
12
18
|
:multiplier => 1.0,
|
13
19
|
:upper_case => 3.0,
|
@@ -36,7 +42,7 @@ module Highscore
|
|
36
42
|
def keywords
|
37
43
|
keywords = Keywords.new
|
38
44
|
|
39
|
-
Keywords.find_keywords(@content).each do |text|
|
45
|
+
Keywords.find_keywords(@content, @blacklist).each do |text|
|
40
46
|
weight = @emphasis[:multiplier]
|
41
47
|
|
42
48
|
if text.length >= @emphasis[:long_words_threshold]
|
data/lib/highscore/keywords.rb
CHANGED
@@ -9,14 +9,14 @@ module Highscore
|
|
9
9
|
include Enumerable
|
10
10
|
|
11
11
|
# find keywords in a piece of content
|
12
|
-
def self.find_keywords content
|
12
|
+
def self.find_keywords content, blacklist
|
13
13
|
keywords = content.scan(/\w+/)
|
14
14
|
keywords.delete_if do |x|
|
15
15
|
x.match(/^[\d]+(\.[\d]+){0,1}$/) or x.length <= 2
|
16
16
|
end
|
17
17
|
|
18
18
|
keywords.delete_if do |key, value|
|
19
|
-
|
19
|
+
blacklist.include?(key.downcase)
|
20
20
|
end
|
21
21
|
|
22
22
|
keywords.sort
|
@@ -0,0 +1,63 @@
|
|
1
|
+
$:.unshift(File.join(File.dirname(__FILE__), %w{.. .. lib highscore}))
|
2
|
+
require "blacklist"
|
3
|
+
require "test/unit"
|
4
|
+
|
5
|
+
class TestBlacklist < Test::Unit::TestCase
|
6
|
+
def test_load_file
|
7
|
+
file_path = File.join(File.dirname(__FILE__), %w{.. fixtures blacklist.txt})
|
8
|
+
blacklist = Highscore::Blacklist.load_file(file_path)
|
9
|
+
|
10
|
+
assert_equal 6, blacklist.length
|
11
|
+
end
|
12
|
+
|
13
|
+
def test_load_default_file
|
14
|
+
blacklist = Highscore::Blacklist.load_default_file
|
15
|
+
assert_equal 42, blacklist.length
|
16
|
+
end
|
17
|
+
|
18
|
+
def test_load_file_fail
|
19
|
+
assert_raises(Errno::ENOENT) do
|
20
|
+
Highscore::Blacklist.load_file('foobar')
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
def test_empty_blacklist
|
25
|
+
blacklist = Highscore::Blacklist.new
|
26
|
+
assert_equal 0, blacklist.length
|
27
|
+
end
|
28
|
+
|
29
|
+
def test_add_new_word
|
30
|
+
blacklist = Highscore::Blacklist.new
|
31
|
+
blacklist << 'foo'
|
32
|
+
|
33
|
+
assert_equal ['foo'], blacklist.words
|
34
|
+
end
|
35
|
+
|
36
|
+
def test_load_string_and_remove_special_chars
|
37
|
+
blacklist = Highscore::Blacklist.load "this is an awesome string!"
|
38
|
+
assert_equal 5, blacklist.length
|
39
|
+
|
40
|
+
assert_equal ['this', 'is', 'an', 'awesome', 'string'], blacklist.to_a
|
41
|
+
end
|
42
|
+
|
43
|
+
def test_load_array
|
44
|
+
words = ['foo', 'bar', 'baz']
|
45
|
+
|
46
|
+
blacklist = Highscore::Blacklist.load(words)
|
47
|
+
|
48
|
+
assert_equal words, blacklist.words
|
49
|
+
end
|
50
|
+
|
51
|
+
def test_load_unknown_type
|
52
|
+
assert_raises ArgumentError do
|
53
|
+
Highscore::Blacklist.load(1)
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
def test_include?
|
58
|
+
blacklist = Highscore::Blacklist.load "foobar baz"
|
59
|
+
|
60
|
+
assert blacklist.include?("foobar")
|
61
|
+
assert !blacklist.include?("bla")
|
62
|
+
end
|
63
|
+
end
|
data/version.txt
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.3.
|
1
|
+
0.3.2
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: highscore
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.2
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-01-
|
12
|
+
date: 2012-01-27 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: bones
|
16
|
-
requirement: &
|
16
|
+
requirement: &70200855600060 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
@@ -21,7 +21,7 @@ dependencies:
|
|
21
21
|
version: 3.7.3
|
22
22
|
type: :development
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *70200855600060
|
25
25
|
description: Rank keywords in long texts.
|
26
26
|
email: liebler.dominik@googlemail.com
|
27
27
|
executables:
|
@@ -30,6 +30,8 @@ extensions: []
|
|
30
30
|
extra_rdoc_files:
|
31
31
|
- History.txt
|
32
32
|
- bin/highscore
|
33
|
+
- lib/blacklist.txt
|
34
|
+
- test/fixtures/blacklist.txt
|
33
35
|
files:
|
34
36
|
- .gitignore
|
35
37
|
- History.txt
|
@@ -37,10 +39,14 @@ files:
|
|
37
39
|
- Rakefile
|
38
40
|
- bin/highscore
|
39
41
|
- highscore.gemspec
|
42
|
+
- lib/blacklist.txt
|
40
43
|
- lib/highscore.rb
|
44
|
+
- lib/highscore/blacklist.rb
|
41
45
|
- lib/highscore/content.rb
|
42
46
|
- lib/highscore/keyword.rb
|
43
47
|
- lib/highscore/keywords.rb
|
48
|
+
- test/fixtures/blacklist.txt
|
49
|
+
- test/highscore/test_blacklist.rb
|
44
50
|
- test/highscore/test_content.rb
|
45
51
|
- test/highscore/test_keyword.rb
|
46
52
|
- test/highscore/test_keywords.rb
|
@@ -73,6 +79,7 @@ signing_key:
|
|
73
79
|
specification_version: 3
|
74
80
|
summary: Rank keywords in long texts.
|
75
81
|
test_files:
|
82
|
+
- test/highscore/test_blacklist.rb
|
76
83
|
- test/highscore/test_content.rb
|
77
84
|
- test/highscore/test_keyword.rb
|
78
85
|
- test/highscore/test_keywords.rb
|