highscore 1.1.0 → 1.2.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,15 @@
1
+ ---
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ ODQ1NGUyMjdjNGUzNjdlYTA4MjFlYjViMmY0NzM1MjBhMTU2ODlhOQ==
5
+ data.tar.gz: !binary |-
6
+ MTVmNzMyOTVhZjFkNjBhMTFjODc5NWM2MzdlN2E1NTljYzNjY2ExMw==
7
+ !binary "U0hBNTEy":
8
+ metadata.gz: !binary |-
9
+ ZGVlOWUwNTgxM2MwMTlkMGMzN2MzYjViYjdhZDkxMjkxYmZiZTNkYTEzOThi
10
+ Yzg2Y2E4MjdmOGE1NWJhNDMzYjBkZGM4MDMyNzA4ODEwMmFiMTVmYWQyMzk2
11
+ ODIwOTIwMjRkMzgyN2E1Y2QzNTMxYjFmNzllM2NlOTE2ZjA4ZjM=
12
+ data.tar.gz: !binary |-
13
+ NDZlMzFiYjVjYmNiMDcyOGVlZGI1YWZkYmY1YWZhNWI5ZWRmNzgyNWU1NjZi
14
+ NzJlZDEzZjNmYWJhOGJhMWM3NTE1NDJiNDhlYWI1MTVkNmI1MjNhNzY0OTg0
15
+ MDhjZGY5MDkxY2FjZWU4NTU1NzcwYzNiNzA4MGVjZDlhMDVmMGI=
@@ -1,3 +1,9 @@
1
+ == 1.2.0 / 2013-12-06
2
+
3
+ * configurable minimum word length
4
+ * bonus words get rated higher than normal words (configurable just like blacklists)
5
+ * (Thanks to Tim-B for the new features)
6
+
1
7
  == 1.1.0 / 2013-04-
2
8
 
3
9
  * added support for custom word ignore handlers using lambda functions
data/README.md CHANGED
@@ -15,6 +15,7 @@ Easily find and rank keywords in long texts.
15
15
  * merge together Keywords from multiple sources
16
16
  * contains a CLI tool that operates on STDIN/OUT and is configurable via parameters
17
17
  * can use `bloomfilter-rb` gem for better performance (optional)
18
+ * words on the bonus list will receive a higher score
18
19
 
19
20
  ## Installation
20
21
 
@@ -38,6 +39,8 @@ text.configure do
38
39
  set :upper_case, 3
39
40
  set :long_words, 2
40
41
  set :long_words_threshold, 15
42
+ set :short_words_threshold, 3 # => default: 2
43
+ set :bonus_multiplier, 2 # => default: 3
41
44
  set :vowels, 1 # => default: 0 = not considered
42
45
  set :consonants, 5 # => default: 0 = not considered
43
46
  set :ignore_case, true # => default: false
@@ -106,6 +109,14 @@ whitelist = Highscore::Whitelist.load %w{these are valid keywords}
106
109
  content = Highscore::Content.new "invalid words", whitelist
107
110
  ```
108
111
 
112
+ ### Using bonus words
113
+
114
+ ```ruby
115
+ # construct and inject it just like a blacklist
116
+ bonuslist = Highscore::Bonuslist.load %w{bonus words}
117
+ content = Highscore::Content.new "A string with bonus words in it", bonuslist
118
+ ```
119
+
109
120
  ## I18n
110
121
 
111
122
  ```ruby
@@ -26,11 +26,21 @@ optparse = OptionParser.new do |opts|
26
26
  options[:wordlist] = Highscore::Whitelist.load_file(filepath)
27
27
  end
28
28
 
29
+ # bonus word file
30
+ opts.on('--bonuslist FILEPATH', 'specify a bonus word file') do |filepath|
31
+ options[:bonus_list] = Highscore::Bonuslist.load_file(filepath)
32
+ end
33
+
29
34
  # general multiplier
30
35
  opts.on('-m', '--multiplier MULTIPLIER', 'set the global rank multiplier') do |multiplier|
31
36
  options[:emphasis][:multiplier] = multiplier.to_f
32
37
  end
33
38
 
39
+ # bonus multiplier
40
+ opts.on('--bonusmultiplier MULTIPLIER', 'set the bonus word multiplier') do |multiplier|
41
+ options[:emphasis][:bonus_multiplier] = multiplier.to_f
42
+ end
43
+
34
44
  opts.on('--no-ignore-short', 'don\'t ignore short words (<= 2 chars)') do
35
45
  options[:emphasis][:ignore_short_words] = false
36
46
  end
@@ -68,6 +78,12 @@ optparse = OptionParser.new do |opts|
68
78
  options[:emphasis][:long_words_threshold] = u.to_i
69
79
  end
70
80
 
81
+ # short words threshold
82
+ opts.on('--shortwordsthreshold THRESHOLD', 'threshold for short words (default is 2 chars)') do |u|
83
+ options[:emphasis][:short_words_threshold] = u.to_i
84
+ end
85
+
86
+
71
87
  opts.on('-n', '--top N', 'show only the top N keywords') do |u|
72
88
  options[:top] = u.to_i
73
89
  end
@@ -14,6 +14,7 @@ module Highscore
14
14
  string
15
15
  whitelist
16
16
  wordlist
17
+ bonuslist
17
18
  )
18
19
 
19
20
  modules.each do |m|
@@ -0,0 +1,12 @@
1
+ $:.unshift(File.dirname(__FILE__))
2
+ require "wordlist"
3
+
4
+ module Highscore
5
+ # Bonus words
6
+ #
7
+ class Bonuslist < Wordlist
8
+ def filter(keywords)
9
+ keywords
10
+ end
11
+ end
12
+ end
@@ -10,13 +10,15 @@ module Highscore
10
10
  # @param wordlist Highscore::Wordlist
11
11
  def initialize(content, wordlist = nil)
12
12
  @content = content
13
- @whitelist = @blacklist = nil
13
+ @whitelist = @blacklist = bonuslist = nil
14
14
  @language_wordlists = {}
15
15
 
16
16
  if wordlist.nil?
17
17
  @blacklist = Highscore::Blacklist.load_default_file
18
18
  elsif wordlist.kind_of? Highscore::Blacklist
19
19
  @blacklist = wordlist
20
+ elsif wordlist.kind_of? Highscore::Bonuslist
21
+ bonuslist = wordlist
20
22
  else
21
23
  @whitelist = wordlist
22
24
  end
@@ -25,6 +27,9 @@ module Highscore
25
27
  :multiplier => 1.0,
26
28
  :upper_case => 3.0,
27
29
  :long_words => 2.0,
30
+ :short_words_threshold => 2,
31
+ :bonus_multiplier => 3.0,
32
+ :bonus_list => bonuslist,
28
33
  :long_words_threshold => 15,
29
34
  :vowels => 0,
30
35
  :consonants => 0,
@@ -34,6 +39,7 @@ module Highscore
34
39
  :word_pattern => /\p{Word}+/u,
35
40
  :stemming => false
36
41
  }
42
+
37
43
  end
38
44
 
39
45
  # configure ranking
@@ -120,7 +126,7 @@ module Highscore
120
126
  #
121
127
  # @return TrueClass FalseClass
122
128
  def ignore? word
123
- ignore = word.short?
129
+ ignore = word.short?(@emphasis[:short_words_threshold])
124
130
 
125
131
  # exception: allow short words
126
132
  ignore = (not allow_short_words?) if ignore
@@ -172,10 +178,19 @@ module Highscore
172
178
  weight *= @emphasis[:upper_case]
173
179
  end
174
180
 
181
+ weight += bonus(text)
175
182
  weight += vowels(text)
176
183
  weight + consonants(text)
177
184
  end
178
185
 
186
+ def bonus(text)
187
+ return 0 if not @emphasis[:bonus_list].kind_of? Highscore::Bonuslist
188
+ if @emphasis[:bonus_list].include?(text)
189
+ return @emphasis[:multiplier] * @emphasis[:bonus_multiplier]
190
+ end
191
+ return 0
192
+ end
193
+
179
194
  # weight the vowels on a text
180
195
  #
181
196
  # @param text String
@@ -38,7 +38,7 @@ class String
38
38
  # is this a short word?
39
39
  #
40
40
  # @return TrueClass|FalseClass
41
- def short?
42
- match(/^[\d]+(\.[\d]+){0,1}$/) or length <= 2
41
+ def short?(limit=2)
42
+ match(/^[\d]+(\.[\d]+){0,#{limit - 1}}$/) or length <= limit
43
43
  end
44
44
  end
@@ -0,0 +1,61 @@
1
+ require File.dirname(__FILE__) + '/../test_highscore'
2
+
3
+ class TestBonuslist < Highscore::TestCase
4
+ def test_is_wordlist
5
+ bonuslist = Highscore::Bonuslist.new
6
+ assert bonuslist.kind_of? Highscore::Bonuslist
7
+ end
8
+
9
+ def test_bonus_content
10
+ bonuslist = Highscore::Bonuslist.load %w{Hacker}
11
+
12
+ text = Highscore::Content.new "Cats Cats Cats Cats Ruby Hacker", bonuslist
13
+
14
+ results = text.keywords.rank
15
+
16
+ assert_equal results[0].text, "Cats"
17
+ assert_equal results[1].text, "Hacker"
18
+ assert_equal results[2].text, "Ruby"
19
+
20
+ assert_equal results[0].weight, 12.0
21
+ assert_equal results[1].weight, 6.0
22
+ assert_equal results[2].weight, 3.0
23
+ end
24
+
25
+ def test_repeated_word
26
+ bonuslist = Highscore::Bonuslist.load %w{Hacker}
27
+
28
+ text = Highscore::Content.new "Cats Hacker Cats Cats Ruby Hacker", bonuslist
29
+
30
+ results = text.keywords.rank
31
+
32
+ assert_equal results[0].text, "Hacker"
33
+ assert_equal results[1].text, "Cats"
34
+ assert_equal results[2].text, "Ruby"
35
+
36
+ assert_equal results[0].weight, 12.0
37
+ assert_equal results[1].weight, 9.0
38
+ assert_equal results[2].weight, 3.0
39
+ end
40
+
41
+ def test_bonus_option
42
+
43
+ bonuslist = Highscore::Bonuslist.load %w{Hacker}
44
+
45
+ text = Highscore::Content.new "Cats Hacker Cats Cats Ruby Hacker", bonuslist
46
+
47
+ text.configure do
48
+ set :bonus_multiplier, 4
49
+ end
50
+
51
+ results = text.keywords.rank
52
+
53
+ assert_equal results[0].text, "Hacker"
54
+ assert_equal results[1].text, "Cats"
55
+ assert_equal results[2].text, "Ruby"
56
+
57
+ assert_equal results[0].weight, 14.0
58
+ assert_equal results[1].weight, 9.0
59
+ assert_equal results[2].weight, 3.0
60
+ end
61
+ end
@@ -60,6 +60,14 @@ class TestContent < Highscore::TestCase
60
60
  assert_equal 4, keywords.length
61
61
  end
62
62
 
63
+ def test_rank_short_words_limit
64
+ keywords = '56789 as 444 cat is foobar'.keywords do
65
+ set :short_words_threshold, 3
66
+ end
67
+
68
+ assert_equal 1, keywords.length
69
+ end
70
+
63
71
  def test_ignore_custom
64
72
  keywords = 'foobar a3832'.keywords do
65
73
  set :ignore, lambda { |w| w.gsub(/[^0-9]/, '').length > 2 }
@@ -1 +1 @@
1
- 1.1.0
1
+ 1.2.0
metadata CHANGED
@@ -1,38 +1,57 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: highscore
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.0
5
- prerelease:
4
+ version: 1.2.0
6
5
  platform: ruby
7
6
  authors:
8
7
  - Dominik Liebler
9
8
  autorequire:
10
9
  bindir: bin
11
10
  cert_chain: []
12
- date: 2012-12-13 00:00:00.000000000 Z
11
+ date: 2013-12-06 00:00:00.000000000 Z
13
12
  dependencies:
14
13
  - !ruby/object:Gem::Dependency
15
14
  name: simplecov
16
- requirement: &70314466936760 !ruby/object:Gem::Requirement
17
- none: false
15
+ requirement: !ruby/object:Gem::Requirement
18
16
  requirements:
19
17
  - - ! '>='
20
18
  - !ruby/object:Gem::Version
21
19
  version: 0.6.4
22
20
  type: :development
23
21
  prerelease: false
24
- version_requirements: *70314466936760
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ! '>='
25
+ - !ruby/object:Gem::Version
26
+ version: 0.6.4
25
27
  - !ruby/object:Gem::Dependency
26
28
  name: whatlanguage
27
- requirement: &70314466936180 !ruby/object:Gem::Requirement
28
- none: false
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ! '>='
32
+ - !ruby/object:Gem::Version
33
+ version: 1.0.0
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
29
37
  requirements:
30
38
  - - ! '>='
31
39
  - !ruby/object:Gem::Version
32
40
  version: 1.0.0
41
+ - !ruby/object:Gem::Dependency
42
+ name: bloomfilter-rb
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ! '>='
46
+ - !ruby/object:Gem::Version
47
+ version: 2.1.1
33
48
  type: :runtime
34
49
  prerelease: false
35
- version_requirements: *70314466936180
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ! '>='
53
+ - !ruby/object:Gem::Version
54
+ version: 2.1.1
36
55
  description: Find and rank keywords in text.
37
56
  email: liebler.dominik@googlemail.com
38
57
  executables:
@@ -44,23 +63,25 @@ extra_rdoc_files:
44
63
  files:
45
64
  - bin/highscore
46
65
  - lib/blacklist.txt
47
- - lib/highscore/blacklist.rb
48
- - lib/highscore/content.rb
49
- - lib/highscore/keyword.rb
66
+ - lib/highscore.rb
67
+ - lib/highscore/wordlist.rb
50
68
  - lib/highscore/keywords.rb
51
- - lib/highscore/string.rb
69
+ - lib/highscore/bonuslist.rb
52
70
  - lib/highscore/whitelist.rb
53
- - lib/highscore/wordlist.rb
54
- - lib/highscore.rb
55
- - test/fixtures/blacklist.txt
56
- - test/highscore/test_blacklist.rb
57
- - test/highscore/test_content.rb
58
- - test/highscore/test_keyword.rb
59
- - test/highscore/test_keywords.rb
71
+ - lib/highscore/keyword.rb
72
+ - lib/highscore/string.rb
73
+ - lib/highscore/content.rb
74
+ - lib/highscore/blacklist.rb
75
+ - test/highscore/test_bonuslist.rb
60
76
  - test/highscore/test_multiple_blacklists.rb
61
- - test/highscore/test_string.rb
77
+ - test/highscore/test_keyword.rb
62
78
  - test/highscore/test_whitelist.rb
63
79
  - test/highscore/test_wordlist.rb
80
+ - test/highscore/test_string.rb
81
+ - test/highscore/test_content.rb
82
+ - test/highscore/test_blacklist.rb
83
+ - test/highscore/test_keywords.rb
84
+ - test/fixtures/blacklist.txt
64
85
  - test/test_highscore.rb
65
86
  - README.md
66
87
  - History.txt
@@ -68,6 +89,7 @@ files:
68
89
  - version.txt
69
90
  homepage: http://domnikl.github.com/highscore
70
91
  licenses: []
92
+ metadata: {}
71
93
  post_install_message:
72
94
  rdoc_options:
73
95
  - --main
@@ -75,31 +97,30 @@ rdoc_options:
75
97
  require_paths:
76
98
  - lib
77
99
  required_ruby_version: !ruby/object:Gem::Requirement
78
- none: false
79
100
  requirements:
80
101
  - - ! '>='
81
102
  - !ruby/object:Gem::Version
82
103
  version: '0'
83
104
  required_rubygems_version: !ruby/object:Gem::Requirement
84
- none: false
85
105
  requirements:
86
106
  - - ! '>='
87
107
  - !ruby/object:Gem::Version
88
108
  version: '0'
89
109
  requirements: []
90
110
  rubyforge_project: highscore
91
- rubygems_version: 1.8.16
111
+ rubygems_version: 2.0.3
92
112
  signing_key:
93
113
  specification_version: 3
94
114
  summary: Easily find and rank keywords in long texts.
95
115
  test_files:
96
- - test/fixtures/blacklist.txt
97
- - test/highscore/test_blacklist.rb
98
- - test/highscore/test_content.rb
99
- - test/highscore/test_keyword.rb
100
- - test/highscore/test_keywords.rb
116
+ - test/highscore/test_bonuslist.rb
101
117
  - test/highscore/test_multiple_blacklists.rb
102
- - test/highscore/test_string.rb
118
+ - test/highscore/test_keyword.rb
103
119
  - test/highscore/test_whitelist.rb
104
120
  - test/highscore/test_wordlist.rb
121
+ - test/highscore/test_string.rb
122
+ - test/highscore/test_content.rb
123
+ - test/highscore/test_blacklist.rb
124
+ - test/highscore/test_keywords.rb
125
+ - test/fixtures/blacklist.txt
105
126
  - test/test_highscore.rb