highscore 1.1.0 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +15 -0
- data/History.txt +6 -0
- data/README.md +11 -0
- data/bin/highscore +16 -0
- data/lib/highscore.rb +1 -0
- data/lib/highscore/bonuslist.rb +12 -0
- data/lib/highscore/content.rb +17 -2
- data/lib/highscore/string.rb +2 -2
- data/test/highscore/test_bonuslist.rb +61 -0
- data/test/highscore/test_content.rb +8 -0
- data/version.txt +1 -1
- metadata +51 -30
checksums.yaml
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
---
|
2
|
+
!binary "U0hBMQ==":
|
3
|
+
metadata.gz: !binary |-
|
4
|
+
ODQ1NGUyMjdjNGUzNjdlYTA4MjFlYjViMmY0NzM1MjBhMTU2ODlhOQ==
|
5
|
+
data.tar.gz: !binary |-
|
6
|
+
MTVmNzMyOTVhZjFkNjBhMTFjODc5NWM2MzdlN2E1NTljYzNjY2ExMw==
|
7
|
+
!binary "U0hBNTEy":
|
8
|
+
metadata.gz: !binary |-
|
9
|
+
ZGVlOWUwNTgxM2MwMTlkMGMzN2MzYjViYjdhZDkxMjkxYmZiZTNkYTEzOThi
|
10
|
+
Yzg2Y2E4MjdmOGE1NWJhNDMzYjBkZGM4MDMyNzA4ODEwMmFiMTVmYWQyMzk2
|
11
|
+
ODIwOTIwMjRkMzgyN2E1Y2QzNTMxYjFmNzllM2NlOTE2ZjA4ZjM=
|
12
|
+
data.tar.gz: !binary |-
|
13
|
+
NDZlMzFiYjVjYmNiMDcyOGVlZGI1YWZkYmY1YWZhNWI5ZWRmNzgyNWU1NjZi
|
14
|
+
NzJlZDEzZjNmYWJhOGJhMWM3NTE1NDJiNDhlYWI1MTVkNmI1MjNhNzY0OTg0
|
15
|
+
MDhjZGY5MDkxY2FjZWU4NTU1NzcwYzNiNzA4MGVjZDlhMDVmMGI=
|
data/History.txt
CHANGED
@@ -1,3 +1,9 @@
|
|
1
|
+
== 1.2.0 / 2013-12-06
|
2
|
+
|
3
|
+
* configurable minimum word length
|
4
|
+
* bonus words get rated higher than normal words (configurable just like blacklists)
|
5
|
+
* (Thanks to Tim-B for the new features)
|
6
|
+
|
1
7
|
== 1.1.0 / 2013-04-
|
2
8
|
|
3
9
|
* added support for custom word ignore handlers using lambda functions
|
data/README.md
CHANGED
@@ -15,6 +15,7 @@ Easily find and rank keywords in long texts.
|
|
15
15
|
* merge together Keywords from multiple sources
|
16
16
|
* contains a CLI tool that operates on STDIN/OUT and is configurable via parameters
|
17
17
|
* can use `bloomfilter-rb` gem for better performance (optional)
|
18
|
+
* words on the bonus list will receive a higher score
|
18
19
|
|
19
20
|
## Installation
|
20
21
|
|
@@ -38,6 +39,8 @@ text.configure do
|
|
38
39
|
set :upper_case, 3
|
39
40
|
set :long_words, 2
|
40
41
|
set :long_words_threshold, 15
|
42
|
+
set :short_words_threshold, 3 # => default: 2
|
43
|
+
set :bonus_multiplier, 2 # => default: 3
|
41
44
|
set :vowels, 1 # => default: 0 = not considered
|
42
45
|
set :consonants, 5 # => default: 0 = not considered
|
43
46
|
set :ignore_case, true # => default: false
|
@@ -106,6 +109,14 @@ whitelist = Highscore::Whitelist.load %w{these are valid keywords}
|
|
106
109
|
content = Highscore::Content.new "invalid words", whitelist
|
107
110
|
```
|
108
111
|
|
112
|
+
### Using bonus words
|
113
|
+
|
114
|
+
```ruby
|
115
|
+
# construct and inject it just like a blacklist
|
116
|
+
bonuslist = Highscore::Bonuslist.load %w{bonus words}
|
117
|
+
content = Highscore::Content.new "A string with bonus words in it", bonuslist
|
118
|
+
```
|
119
|
+
|
109
120
|
## I18n
|
110
121
|
|
111
122
|
```ruby
|
data/bin/highscore
CHANGED
@@ -26,11 +26,21 @@ optparse = OptionParser.new do |opts|
|
|
26
26
|
options[:wordlist] = Highscore::Whitelist.load_file(filepath)
|
27
27
|
end
|
28
28
|
|
29
|
+
# bonus word file
|
30
|
+
opts.on('--bonuslist FILEPATH', 'specify a bonus word file') do |filepath|
|
31
|
+
options[:bonus_list] = Highscore::Bonuslist.load_file(filepath)
|
32
|
+
end
|
33
|
+
|
29
34
|
# general multiplier
|
30
35
|
opts.on('-m', '--multiplier MULTIPLIER', 'set the global rank multiplier') do |multiplier|
|
31
36
|
options[:emphasis][:multiplier] = multiplier.to_f
|
32
37
|
end
|
33
38
|
|
39
|
+
# bonus multiplier
|
40
|
+
opts.on('--bonusmultiplier MULTIPLIER', 'set the bonus word multiplier') do |multiplier|
|
41
|
+
options[:emphasis][:bonus_multiplier] = multiplier.to_f
|
42
|
+
end
|
43
|
+
|
34
44
|
opts.on('--no-ignore-short', 'don\'t ignore short words (<= 2 chars)') do
|
35
45
|
options[:emphasis][:ignore_short_words] = false
|
36
46
|
end
|
@@ -68,6 +78,12 @@ optparse = OptionParser.new do |opts|
|
|
68
78
|
options[:emphasis][:long_words_threshold] = u.to_i
|
69
79
|
end
|
70
80
|
|
81
|
+
# short words threshold
|
82
|
+
opts.on('--shortwordsthreshold THRESHOLD', 'threshold for short words (default is 2 chars)') do |u|
|
83
|
+
options[:emphasis][:short_words_threshold] = u.to_i
|
84
|
+
end
|
85
|
+
|
86
|
+
|
71
87
|
opts.on('-n', '--top N', 'show only the top N keywords') do |u|
|
72
88
|
options[:top] = u.to_i
|
73
89
|
end
|
data/lib/highscore.rb
CHANGED
data/lib/highscore/content.rb
CHANGED
@@ -10,13 +10,15 @@ module Highscore
|
|
10
10
|
# @param wordlist Highscore::Wordlist
|
11
11
|
def initialize(content, wordlist = nil)
|
12
12
|
@content = content
|
13
|
-
@whitelist = @blacklist = nil
|
13
|
+
@whitelist = @blacklist = bonuslist = nil
|
14
14
|
@language_wordlists = {}
|
15
15
|
|
16
16
|
if wordlist.nil?
|
17
17
|
@blacklist = Highscore::Blacklist.load_default_file
|
18
18
|
elsif wordlist.kind_of? Highscore::Blacklist
|
19
19
|
@blacklist = wordlist
|
20
|
+
elsif wordlist.kind_of? Highscore::Bonuslist
|
21
|
+
bonuslist = wordlist
|
20
22
|
else
|
21
23
|
@whitelist = wordlist
|
22
24
|
end
|
@@ -25,6 +27,9 @@ module Highscore
|
|
25
27
|
:multiplier => 1.0,
|
26
28
|
:upper_case => 3.0,
|
27
29
|
:long_words => 2.0,
|
30
|
+
:short_words_threshold => 2,
|
31
|
+
:bonus_multiplier => 3.0,
|
32
|
+
:bonus_list => bonuslist,
|
28
33
|
:long_words_threshold => 15,
|
29
34
|
:vowels => 0,
|
30
35
|
:consonants => 0,
|
@@ -34,6 +39,7 @@ module Highscore
|
|
34
39
|
:word_pattern => /\p{Word}+/u,
|
35
40
|
:stemming => false
|
36
41
|
}
|
42
|
+
|
37
43
|
end
|
38
44
|
|
39
45
|
# configure ranking
|
@@ -120,7 +126,7 @@ module Highscore
|
|
120
126
|
#
|
121
127
|
# @return TrueClass FalseClass
|
122
128
|
def ignore? word
|
123
|
-
ignore = word.short?
|
129
|
+
ignore = word.short?(@emphasis[:short_words_threshold])
|
124
130
|
|
125
131
|
# exception: allow short words
|
126
132
|
ignore = (not allow_short_words?) if ignore
|
@@ -172,10 +178,19 @@ module Highscore
|
|
172
178
|
weight *= @emphasis[:upper_case]
|
173
179
|
end
|
174
180
|
|
181
|
+
weight += bonus(text)
|
175
182
|
weight += vowels(text)
|
176
183
|
weight + consonants(text)
|
177
184
|
end
|
178
185
|
|
186
|
+
def bonus(text)
|
187
|
+
return 0 if not @emphasis[:bonus_list].kind_of? Highscore::Bonuslist
|
188
|
+
if @emphasis[:bonus_list].include?(text)
|
189
|
+
return @emphasis[:multiplier] * @emphasis[:bonus_multiplier]
|
190
|
+
end
|
191
|
+
return 0
|
192
|
+
end
|
193
|
+
|
179
194
|
# weight the vowels on a text
|
180
195
|
#
|
181
196
|
# @param text String
|
data/lib/highscore/string.rb
CHANGED
@@ -0,0 +1,61 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/../test_highscore'
|
2
|
+
|
3
|
+
class TestBonuslist < Highscore::TestCase
|
4
|
+
def test_is_wordlist
|
5
|
+
bonuslist = Highscore::Bonuslist.new
|
6
|
+
assert bonuslist.kind_of? Highscore::Bonuslist
|
7
|
+
end
|
8
|
+
|
9
|
+
def test_bonus_content
|
10
|
+
bonuslist = Highscore::Bonuslist.load %w{Hacker}
|
11
|
+
|
12
|
+
text = Highscore::Content.new "Cats Cats Cats Cats Ruby Hacker", bonuslist
|
13
|
+
|
14
|
+
results = text.keywords.rank
|
15
|
+
|
16
|
+
assert_equal results[0].text, "Cats"
|
17
|
+
assert_equal results[1].text, "Hacker"
|
18
|
+
assert_equal results[2].text, "Ruby"
|
19
|
+
|
20
|
+
assert_equal results[0].weight, 12.0
|
21
|
+
assert_equal results[1].weight, 6.0
|
22
|
+
assert_equal results[2].weight, 3.0
|
23
|
+
end
|
24
|
+
|
25
|
+
def test_repeated_word
|
26
|
+
bonuslist = Highscore::Bonuslist.load %w{Hacker}
|
27
|
+
|
28
|
+
text = Highscore::Content.new "Cats Hacker Cats Cats Ruby Hacker", bonuslist
|
29
|
+
|
30
|
+
results = text.keywords.rank
|
31
|
+
|
32
|
+
assert_equal results[0].text, "Hacker"
|
33
|
+
assert_equal results[1].text, "Cats"
|
34
|
+
assert_equal results[2].text, "Ruby"
|
35
|
+
|
36
|
+
assert_equal results[0].weight, 12.0
|
37
|
+
assert_equal results[1].weight, 9.0
|
38
|
+
assert_equal results[2].weight, 3.0
|
39
|
+
end
|
40
|
+
|
41
|
+
def test_bonus_option
|
42
|
+
|
43
|
+
bonuslist = Highscore::Bonuslist.load %w{Hacker}
|
44
|
+
|
45
|
+
text = Highscore::Content.new "Cats Hacker Cats Cats Ruby Hacker", bonuslist
|
46
|
+
|
47
|
+
text.configure do
|
48
|
+
set :bonus_multiplier, 4
|
49
|
+
end
|
50
|
+
|
51
|
+
results = text.keywords.rank
|
52
|
+
|
53
|
+
assert_equal results[0].text, "Hacker"
|
54
|
+
assert_equal results[1].text, "Cats"
|
55
|
+
assert_equal results[2].text, "Ruby"
|
56
|
+
|
57
|
+
assert_equal results[0].weight, 14.0
|
58
|
+
assert_equal results[1].weight, 9.0
|
59
|
+
assert_equal results[2].weight, 3.0
|
60
|
+
end
|
61
|
+
end
|
@@ -60,6 +60,14 @@ class TestContent < Highscore::TestCase
|
|
60
60
|
assert_equal 4, keywords.length
|
61
61
|
end
|
62
62
|
|
63
|
+
def test_rank_short_words_limit
|
64
|
+
keywords = '56789 as 444 cat is foobar'.keywords do
|
65
|
+
set :short_words_threshold, 3
|
66
|
+
end
|
67
|
+
|
68
|
+
assert_equal 1, keywords.length
|
69
|
+
end
|
70
|
+
|
63
71
|
def test_ignore_custom
|
64
72
|
keywords = 'foobar a3832'.keywords do
|
65
73
|
set :ignore, lambda { |w| w.gsub(/[^0-9]/, '').length > 2 }
|
data/version.txt
CHANGED
@@ -1 +1 @@
|
|
1
|
-
1.
|
1
|
+
1.2.0
|
metadata
CHANGED
@@ -1,38 +1,57 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: highscore
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
5
|
-
prerelease:
|
4
|
+
version: 1.2.0
|
6
5
|
platform: ruby
|
7
6
|
authors:
|
8
7
|
- Dominik Liebler
|
9
8
|
autorequire:
|
10
9
|
bindir: bin
|
11
10
|
cert_chain: []
|
12
|
-
date:
|
11
|
+
date: 2013-12-06 00:00:00.000000000 Z
|
13
12
|
dependencies:
|
14
13
|
- !ruby/object:Gem::Dependency
|
15
14
|
name: simplecov
|
16
|
-
requirement:
|
17
|
-
none: false
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
18
16
|
requirements:
|
19
17
|
- - ! '>='
|
20
18
|
- !ruby/object:Gem::Version
|
21
19
|
version: 0.6.4
|
22
20
|
type: :development
|
23
21
|
prerelease: false
|
24
|
-
version_requirements:
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ! '>='
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: 0.6.4
|
25
27
|
- !ruby/object:Gem::Dependency
|
26
28
|
name: whatlanguage
|
27
|
-
requirement:
|
28
|
-
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ! '>='
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: 1.0.0
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
29
37
|
requirements:
|
30
38
|
- - ! '>='
|
31
39
|
- !ruby/object:Gem::Version
|
32
40
|
version: 1.0.0
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: bloomfilter-rb
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ! '>='
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: 2.1.1
|
33
48
|
type: :runtime
|
34
49
|
prerelease: false
|
35
|
-
version_requirements:
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ! '>='
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: 2.1.1
|
36
55
|
description: Find and rank keywords in text.
|
37
56
|
email: liebler.dominik@googlemail.com
|
38
57
|
executables:
|
@@ -44,23 +63,25 @@ extra_rdoc_files:
|
|
44
63
|
files:
|
45
64
|
- bin/highscore
|
46
65
|
- lib/blacklist.txt
|
47
|
-
- lib/highscore
|
48
|
-
- lib/highscore/
|
49
|
-
- lib/highscore/keyword.rb
|
66
|
+
- lib/highscore.rb
|
67
|
+
- lib/highscore/wordlist.rb
|
50
68
|
- lib/highscore/keywords.rb
|
51
|
-
- lib/highscore/
|
69
|
+
- lib/highscore/bonuslist.rb
|
52
70
|
- lib/highscore/whitelist.rb
|
53
|
-
- lib/highscore/
|
54
|
-
- lib/highscore.rb
|
55
|
-
-
|
56
|
-
-
|
57
|
-
- test/highscore/
|
58
|
-
- test/highscore/test_keyword.rb
|
59
|
-
- test/highscore/test_keywords.rb
|
71
|
+
- lib/highscore/keyword.rb
|
72
|
+
- lib/highscore/string.rb
|
73
|
+
- lib/highscore/content.rb
|
74
|
+
- lib/highscore/blacklist.rb
|
75
|
+
- test/highscore/test_bonuslist.rb
|
60
76
|
- test/highscore/test_multiple_blacklists.rb
|
61
|
-
- test/highscore/
|
77
|
+
- test/highscore/test_keyword.rb
|
62
78
|
- test/highscore/test_whitelist.rb
|
63
79
|
- test/highscore/test_wordlist.rb
|
80
|
+
- test/highscore/test_string.rb
|
81
|
+
- test/highscore/test_content.rb
|
82
|
+
- test/highscore/test_blacklist.rb
|
83
|
+
- test/highscore/test_keywords.rb
|
84
|
+
- test/fixtures/blacklist.txt
|
64
85
|
- test/test_highscore.rb
|
65
86
|
- README.md
|
66
87
|
- History.txt
|
@@ -68,6 +89,7 @@ files:
|
|
68
89
|
- version.txt
|
69
90
|
homepage: http://domnikl.github.com/highscore
|
70
91
|
licenses: []
|
92
|
+
metadata: {}
|
71
93
|
post_install_message:
|
72
94
|
rdoc_options:
|
73
95
|
- --main
|
@@ -75,31 +97,30 @@ rdoc_options:
|
|
75
97
|
require_paths:
|
76
98
|
- lib
|
77
99
|
required_ruby_version: !ruby/object:Gem::Requirement
|
78
|
-
none: false
|
79
100
|
requirements:
|
80
101
|
- - ! '>='
|
81
102
|
- !ruby/object:Gem::Version
|
82
103
|
version: '0'
|
83
104
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
84
|
-
none: false
|
85
105
|
requirements:
|
86
106
|
- - ! '>='
|
87
107
|
- !ruby/object:Gem::Version
|
88
108
|
version: '0'
|
89
109
|
requirements: []
|
90
110
|
rubyforge_project: highscore
|
91
|
-
rubygems_version:
|
111
|
+
rubygems_version: 2.0.3
|
92
112
|
signing_key:
|
93
113
|
specification_version: 3
|
94
114
|
summary: Easily find and rank keywords in long texts.
|
95
115
|
test_files:
|
96
|
-
- test/
|
97
|
-
- test/highscore/test_blacklist.rb
|
98
|
-
- test/highscore/test_content.rb
|
99
|
-
- test/highscore/test_keyword.rb
|
100
|
-
- test/highscore/test_keywords.rb
|
116
|
+
- test/highscore/test_bonuslist.rb
|
101
117
|
- test/highscore/test_multiple_blacklists.rb
|
102
|
-
- test/highscore/
|
118
|
+
- test/highscore/test_keyword.rb
|
103
119
|
- test/highscore/test_whitelist.rb
|
104
120
|
- test/highscore/test_wordlist.rb
|
121
|
+
- test/highscore/test_string.rb
|
122
|
+
- test/highscore/test_content.rb
|
123
|
+
- test/highscore/test_blacklist.rb
|
124
|
+
- test/highscore/test_keywords.rb
|
125
|
+
- test/fixtures/blacklist.txt
|
105
126
|
- test/test_highscore.rb
|