highscore 1.1.0 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,15 @@
1
+ ---
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ ODQ1NGUyMjdjNGUzNjdlYTA4MjFlYjViMmY0NzM1MjBhMTU2ODlhOQ==
5
+ data.tar.gz: !binary |-
6
+ MTVmNzMyOTVhZjFkNjBhMTFjODc5NWM2MzdlN2E1NTljYzNjY2ExMw==
7
+ !binary "U0hBNTEy":
8
+ metadata.gz: !binary |-
9
+ ZGVlOWUwNTgxM2MwMTlkMGMzN2MzYjViYjdhZDkxMjkxYmZiZTNkYTEzOThi
10
+ Yzg2Y2E4MjdmOGE1NWJhNDMzYjBkZGM4MDMyNzA4ODEwMmFiMTVmYWQyMzk2
11
+ ODIwOTIwMjRkMzgyN2E1Y2QzNTMxYjFmNzllM2NlOTE2ZjA4ZjM=
12
+ data.tar.gz: !binary |-
13
+ NDZlMzFiYjVjYmNiMDcyOGVlZGI1YWZkYmY1YWZhNWI5ZWRmNzgyNWU1NjZi
14
+ NzJlZDEzZjNmYWJhOGJhMWM3NTE1NDJiNDhlYWI1MTVkNmI1MjNhNzY0OTg0
15
+ MDhjZGY5MDkxY2FjZWU4NTU1NzcwYzNiNzA4MGVjZDlhMDVmMGI=
@@ -1,3 +1,9 @@
1
+ == 1.2.0 / 2013-12-06
2
+
3
+ * configurable minimum word length
4
+ * bonus words get rated higher than normal words (configurable just like blacklists)
5
+ * (Thanks to Tim-B for the new features)
6
+
1
7
  == 1.1.0 / 2013-04-
2
8
 
3
9
  * added support for custom word ignore handlers using lambda functions
data/README.md CHANGED
@@ -15,6 +15,7 @@ Easily find and rank keywords in long texts.
15
15
  * merge together Keywords from multiple sources
16
16
  * contains a CLI tool that operates on STDIN/OUT and is configurable via parameters
17
17
  * can use `bloomfilter-rb` gem for better performance (optional)
18
+ * words on the bonus list will receive a higher score
18
19
 
19
20
  ## Installation
20
21
 
@@ -38,6 +39,8 @@ text.configure do
38
39
  set :upper_case, 3
39
40
  set :long_words, 2
40
41
  set :long_words_threshold, 15
42
+ set :short_words_threshold, 3 # => default: 2
43
+ set :bonus_multiplier, 2 # => default: 3
41
44
  set :vowels, 1 # => default: 0 = not considered
42
45
  set :consonants, 5 # => default: 0 = not considered
43
46
  set :ignore_case, true # => default: false
@@ -106,6 +109,14 @@ whitelist = Highscore::Whitelist.load %w{these are valid keywords}
106
109
  content = Highscore::Content.new "invalid words", whitelist
107
110
  ```
108
111
 
112
+ ### Using bonus words
113
+
114
+ ```ruby
115
+ # construct and inject it just like a blacklist
116
+ bonuslist = Highscore::Bonuslist.load %w{bonus words}
117
+ content = Highscore::Content.new "A string with bonus words in it", bonuslist
118
+ ```
119
+
109
120
  ## I18n
110
121
 
111
122
  ```ruby
@@ -26,11 +26,21 @@ optparse = OptionParser.new do |opts|
26
26
  options[:wordlist] = Highscore::Whitelist.load_file(filepath)
27
27
  end
28
28
 
29
+ # bonus word file
30
+ opts.on('--bonuslist FILEPATH', 'specify a bonus word file') do |filepath|
31
+ options[:bonus_list] = Highscore::Bonuslist.load_file(filepath)
32
+ end
33
+
29
34
  # general multiplier
30
35
  opts.on('-m', '--multiplier MULTIPLIER', 'set the global rank multiplier') do |multiplier|
31
36
  options[:emphasis][:multiplier] = multiplier.to_f
32
37
  end
33
38
 
39
+ # bonus multiplier
40
+ opts.on('--bonusmultiplier MULTIPLIER', 'set the bonus word multiplier') do |multiplier|
41
+ options[:emphasis][:bonus_multiplier] = multiplier.to_f
42
+ end
43
+
34
44
  opts.on('--no-ignore-short', 'don\'t ignore short words (<= 2 chars)') do
35
45
  options[:emphasis][:ignore_short_words] = false
36
46
  end
@@ -68,6 +78,12 @@ optparse = OptionParser.new do |opts|
68
78
  options[:emphasis][:long_words_threshold] = u.to_i
69
79
  end
70
80
 
81
+ # short words threshold
82
+ opts.on('--shortwordsthreshold THRESHOLD', 'threshold for short words (default is 2 chars)') do |u|
83
+ options[:emphasis][:short_words_threshold] = u.to_i
84
+ end
85
+
86
+
71
87
  opts.on('-n', '--top N', 'show only the top N keywords') do |u|
72
88
  options[:top] = u.to_i
73
89
  end
@@ -14,6 +14,7 @@ module Highscore
14
14
  string
15
15
  whitelist
16
16
  wordlist
17
+ bonuslist
17
18
  )
18
19
 
19
20
  modules.each do |m|
@@ -0,0 +1,12 @@
1
+ $:.unshift(File.dirname(__FILE__))
2
+ require "wordlist"
3
+
4
+ module Highscore
5
+ # Bonus words
6
+ #
7
+ class Bonuslist < Wordlist
8
+ def filter(keywords)
9
+ keywords
10
+ end
11
+ end
12
+ end
@@ -10,13 +10,15 @@ module Highscore
10
10
  # @param wordlist Highscore::Wordlist
11
11
  def initialize(content, wordlist = nil)
12
12
  @content = content
13
- @whitelist = @blacklist = nil
13
+ @whitelist = @blacklist = bonuslist = nil
14
14
  @language_wordlists = {}
15
15
 
16
16
  if wordlist.nil?
17
17
  @blacklist = Highscore::Blacklist.load_default_file
18
18
  elsif wordlist.kind_of? Highscore::Blacklist
19
19
  @blacklist = wordlist
20
+ elsif wordlist.kind_of? Highscore::Bonuslist
21
+ bonuslist = wordlist
20
22
  else
21
23
  @whitelist = wordlist
22
24
  end
@@ -25,6 +27,9 @@ module Highscore
25
27
  :multiplier => 1.0,
26
28
  :upper_case => 3.0,
27
29
  :long_words => 2.0,
30
+ :short_words_threshold => 2,
31
+ :bonus_multiplier => 3.0,
32
+ :bonus_list => bonuslist,
28
33
  :long_words_threshold => 15,
29
34
  :vowels => 0,
30
35
  :consonants => 0,
@@ -34,6 +39,7 @@ module Highscore
34
39
  :word_pattern => /\p{Word}+/u,
35
40
  :stemming => false
36
41
  }
42
+
37
43
  end
38
44
 
39
45
  # configure ranking
@@ -120,7 +126,7 @@ module Highscore
120
126
  #
121
127
  # @return TrueClass FalseClass
122
128
  def ignore? word
123
- ignore = word.short?
129
+ ignore = word.short?(@emphasis[:short_words_threshold])
124
130
 
125
131
  # exception: allow short words
126
132
  ignore = (not allow_short_words?) if ignore
@@ -172,10 +178,19 @@ module Highscore
172
178
  weight *= @emphasis[:upper_case]
173
179
  end
174
180
 
181
+ weight += bonus(text)
175
182
  weight += vowels(text)
176
183
  weight + consonants(text)
177
184
  end
178
185
 
186
+ def bonus(text)
187
+ return 0 if not @emphasis[:bonus_list].kind_of? Highscore::Bonuslist
188
+ if @emphasis[:bonus_list].include?(text)
189
+ return @emphasis[:multiplier] * @emphasis[:bonus_multiplier]
190
+ end
191
+ return 0
192
+ end
193
+
179
194
  # weight the vowels on a text
180
195
  #
181
196
  # @param text String
@@ -38,7 +38,7 @@ class String
38
38
  # is this a short word?
39
39
  #
40
40
  # @return TrueClass|FalseClass
41
- def short?
42
- match(/^[\d]+(\.[\d]+){0,1}$/) or length <= 2
41
+ def short?(limit=2)
42
+ match(/^[\d]+(\.[\d]+){0,#{limit - 1}}$/) or length <= limit
43
43
  end
44
44
  end
@@ -0,0 +1,61 @@
1
+ require File.dirname(__FILE__) + '/../test_highscore'
2
+
3
+ class TestBonuslist < Highscore::TestCase
4
+ def test_is_wordlist
5
+ bonuslist = Highscore::Bonuslist.new
6
+ assert bonuslist.kind_of? Highscore::Bonuslist
7
+ end
8
+
9
+ def test_bonus_content
10
+ bonuslist = Highscore::Bonuslist.load %w{Hacker}
11
+
12
+ text = Highscore::Content.new "Cats Cats Cats Cats Ruby Hacker", bonuslist
13
+
14
+ results = text.keywords.rank
15
+
16
+ assert_equal results[0].text, "Cats"
17
+ assert_equal results[1].text, "Hacker"
18
+ assert_equal results[2].text, "Ruby"
19
+
20
+ assert_equal results[0].weight, 12.0
21
+ assert_equal results[1].weight, 6.0
22
+ assert_equal results[2].weight, 3.0
23
+ end
24
+
25
+ def test_repeated_word
26
+ bonuslist = Highscore::Bonuslist.load %w{Hacker}
27
+
28
+ text = Highscore::Content.new "Cats Hacker Cats Cats Ruby Hacker", bonuslist
29
+
30
+ results = text.keywords.rank
31
+
32
+ assert_equal results[0].text, "Hacker"
33
+ assert_equal results[1].text, "Cats"
34
+ assert_equal results[2].text, "Ruby"
35
+
36
+ assert_equal results[0].weight, 12.0
37
+ assert_equal results[1].weight, 9.0
38
+ assert_equal results[2].weight, 3.0
39
+ end
40
+
41
+ def test_bonus_option
42
+
43
+ bonuslist = Highscore::Bonuslist.load %w{Hacker}
44
+
45
+ text = Highscore::Content.new "Cats Hacker Cats Cats Ruby Hacker", bonuslist
46
+
47
+ text.configure do
48
+ set :bonus_multiplier, 4
49
+ end
50
+
51
+ results = text.keywords.rank
52
+
53
+ assert_equal results[0].text, "Hacker"
54
+ assert_equal results[1].text, "Cats"
55
+ assert_equal results[2].text, "Ruby"
56
+
57
+ assert_equal results[0].weight, 14.0
58
+ assert_equal results[1].weight, 9.0
59
+ assert_equal results[2].weight, 3.0
60
+ end
61
+ end
@@ -60,6 +60,14 @@ class TestContent < Highscore::TestCase
60
60
  assert_equal 4, keywords.length
61
61
  end
62
62
 
63
+ def test_rank_short_words_limit
64
+ keywords = '56789 as 444 cat is foobar'.keywords do
65
+ set :short_words_threshold, 3
66
+ end
67
+
68
+ assert_equal 1, keywords.length
69
+ end
70
+
63
71
  def test_ignore_custom
64
72
  keywords = 'foobar a3832'.keywords do
65
73
  set :ignore, lambda { |w| w.gsub(/[^0-9]/, '').length > 2 }
@@ -1 +1 @@
1
- 1.1.0
1
+ 1.2.0
metadata CHANGED
@@ -1,38 +1,57 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: highscore
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.0
5
- prerelease:
4
+ version: 1.2.0
6
5
  platform: ruby
7
6
  authors:
8
7
  - Dominik Liebler
9
8
  autorequire:
10
9
  bindir: bin
11
10
  cert_chain: []
12
- date: 2012-12-13 00:00:00.000000000 Z
11
+ date: 2013-12-06 00:00:00.000000000 Z
13
12
  dependencies:
14
13
  - !ruby/object:Gem::Dependency
15
14
  name: simplecov
16
- requirement: &70314466936760 !ruby/object:Gem::Requirement
17
- none: false
15
+ requirement: !ruby/object:Gem::Requirement
18
16
  requirements:
19
17
  - - ! '>='
20
18
  - !ruby/object:Gem::Version
21
19
  version: 0.6.4
22
20
  type: :development
23
21
  prerelease: false
24
- version_requirements: *70314466936760
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ! '>='
25
+ - !ruby/object:Gem::Version
26
+ version: 0.6.4
25
27
  - !ruby/object:Gem::Dependency
26
28
  name: whatlanguage
27
- requirement: &70314466936180 !ruby/object:Gem::Requirement
28
- none: false
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ! '>='
32
+ - !ruby/object:Gem::Version
33
+ version: 1.0.0
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
29
37
  requirements:
30
38
  - - ! '>='
31
39
  - !ruby/object:Gem::Version
32
40
  version: 1.0.0
41
+ - !ruby/object:Gem::Dependency
42
+ name: bloomfilter-rb
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ! '>='
46
+ - !ruby/object:Gem::Version
47
+ version: 2.1.1
33
48
  type: :runtime
34
49
  prerelease: false
35
- version_requirements: *70314466936180
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ! '>='
53
+ - !ruby/object:Gem::Version
54
+ version: 2.1.1
36
55
  description: Find and rank keywords in text.
37
56
  email: liebler.dominik@googlemail.com
38
57
  executables:
@@ -44,23 +63,25 @@ extra_rdoc_files:
44
63
  files:
45
64
  - bin/highscore
46
65
  - lib/blacklist.txt
47
- - lib/highscore/blacklist.rb
48
- - lib/highscore/content.rb
49
- - lib/highscore/keyword.rb
66
+ - lib/highscore.rb
67
+ - lib/highscore/wordlist.rb
50
68
  - lib/highscore/keywords.rb
51
- - lib/highscore/string.rb
69
+ - lib/highscore/bonuslist.rb
52
70
  - lib/highscore/whitelist.rb
53
- - lib/highscore/wordlist.rb
54
- - lib/highscore.rb
55
- - test/fixtures/blacklist.txt
56
- - test/highscore/test_blacklist.rb
57
- - test/highscore/test_content.rb
58
- - test/highscore/test_keyword.rb
59
- - test/highscore/test_keywords.rb
71
+ - lib/highscore/keyword.rb
72
+ - lib/highscore/string.rb
73
+ - lib/highscore/content.rb
74
+ - lib/highscore/blacklist.rb
75
+ - test/highscore/test_bonuslist.rb
60
76
  - test/highscore/test_multiple_blacklists.rb
61
- - test/highscore/test_string.rb
77
+ - test/highscore/test_keyword.rb
62
78
  - test/highscore/test_whitelist.rb
63
79
  - test/highscore/test_wordlist.rb
80
+ - test/highscore/test_string.rb
81
+ - test/highscore/test_content.rb
82
+ - test/highscore/test_blacklist.rb
83
+ - test/highscore/test_keywords.rb
84
+ - test/fixtures/blacklist.txt
64
85
  - test/test_highscore.rb
65
86
  - README.md
66
87
  - History.txt
@@ -68,6 +89,7 @@ files:
68
89
  - version.txt
69
90
  homepage: http://domnikl.github.com/highscore
70
91
  licenses: []
92
+ metadata: {}
71
93
  post_install_message:
72
94
  rdoc_options:
73
95
  - --main
@@ -75,31 +97,30 @@ rdoc_options:
75
97
  require_paths:
76
98
  - lib
77
99
  required_ruby_version: !ruby/object:Gem::Requirement
78
- none: false
79
100
  requirements:
80
101
  - - ! '>='
81
102
  - !ruby/object:Gem::Version
82
103
  version: '0'
83
104
  required_rubygems_version: !ruby/object:Gem::Requirement
84
- none: false
85
105
  requirements:
86
106
  - - ! '>='
87
107
  - !ruby/object:Gem::Version
88
108
  version: '0'
89
109
  requirements: []
90
110
  rubyforge_project: highscore
91
- rubygems_version: 1.8.16
111
+ rubygems_version: 2.0.3
92
112
  signing_key:
93
113
  specification_version: 3
94
114
  summary: Easily find and rank keywords in long texts.
95
115
  test_files:
96
- - test/fixtures/blacklist.txt
97
- - test/highscore/test_blacklist.rb
98
- - test/highscore/test_content.rb
99
- - test/highscore/test_keyword.rb
100
- - test/highscore/test_keywords.rb
116
+ - test/highscore/test_bonuslist.rb
101
117
  - test/highscore/test_multiple_blacklists.rb
102
- - test/highscore/test_string.rb
118
+ - test/highscore/test_keyword.rb
103
119
  - test/highscore/test_whitelist.rb
104
120
  - test/highscore/test_wordlist.rb
121
+ - test/highscore/test_string.rb
122
+ - test/highscore/test_content.rb
123
+ - test/highscore/test_blacklist.rb
124
+ - test/highscore/test_keywords.rb
125
+ - test/fixtures/blacklist.txt
105
126
  - test/test_highscore.rb