highscore 1.0.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +7 -0
- data/README.md +28 -21
- data/Rakefile +5 -1
- data/lib/highscore.rb +0 -2
- data/lib/highscore/blacklist.rb +12 -0
- data/lib/highscore/content.rb +59 -38
- data/lib/highscore/keyword.rb +8 -1
- data/lib/highscore/keywords.rb +26 -19
- data/lib/highscore/string.rb +7 -0
- data/lib/highscore/whitelist.rb +10 -1
- data/lib/highscore/wordlist.rb +45 -4
- data/test/highscore/test_content.rb +9 -7
- data/test/highscore/test_keyword.rb +8 -0
- data/test/highscore/test_keywords.rb +16 -0
- data/test/highscore/test_string.rb +5 -0
- data/test/highscore/test_wordlist.rb +8 -0
- data/version.txt +1 -1
- metadata +6 -6
data/History.txt
CHANGED
@@ -1,3 +1,10 @@
|
|
1
|
+
== 1.1.0 / 2013-04-
|
2
|
+
|
3
|
+
* added support for custom word ignore handlers using lambda functions
|
4
|
+
* fixed some performance issues with unnecessary ranking by unused criteria (consonants & vowels)
|
5
|
+
* added possibility to use bloomfilters for faster performance instead of Array#include?
|
6
|
+
* removed compatibility with Ruby 1.8.x
|
7
|
+
|
1
8
|
== 1.0.0 / 2013-02-02
|
2
9
|
|
3
10
|
* added per-language support for black- and whitelists (thanks to bobjflong)
|
data/README.md
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
Easily find and rank keywords in long texts.
|
4
4
|
|
5
|
-
[](http://travis-ci.org/domnikl/highscore) [](http://travis-ci.org/domnikl/highscore) [](https://codeclimate.com/github/domnikl/highscore)
|
6
6
|
|
7
7
|
## Features
|
8
8
|
|
@@ -11,9 +11,23 @@ Easily find and rank keywords in long texts.
|
|
11
11
|
* directly get keywords from String objects
|
12
12
|
* blacklist words via a plain text file, String or an Array of words (per language if needed)
|
13
13
|
* optionally, configure a whitelist and only words from that list will get ranked
|
14
|
-
*
|
14
|
+
* uses word stemming if necessary (requires the `fast-stemmer` or `stemmer` gem)
|
15
15
|
* merge together Keywords from multiple sources
|
16
16
|
* contains a CLI tool that operates on STDIN/OUT and is configurable via parameters
|
17
|
+
* can use `bloomfilter-rb` gem for better performance (optional)
|
18
|
+
|
19
|
+
## Installation
|
20
|
+
|
21
|
+
* `[sudo] gem install highscore`
|
22
|
+
|
23
|
+
For better blacklist perfomance, use the `bloomfilter-rb` gem:
|
24
|
+
|
25
|
+
* `[sudo] gem install bloomfilter-rb`
|
26
|
+
|
27
|
+
To use word stemming, you need to have the `fast-stemmer` (C extension) or `stemmer` gem installed:
|
28
|
+
|
29
|
+
* `[sudo] gem install fast-stemmer`
|
30
|
+
* `[sudo] gem install stemmer`
|
17
31
|
|
18
32
|
## Examples
|
19
33
|
|
@@ -37,7 +51,7 @@ text.keywords.top(50).each do |keyword|
|
|
37
51
|
keyword.weight # => rank weight (float)
|
38
52
|
end
|
39
53
|
|
40
|
-
# you could just use a string
|
54
|
+
# you could also just use a string
|
41
55
|
keywords = "Foo bar baz".keywords(blacklist) do
|
42
56
|
set :multiplier, 10
|
43
57
|
end
|
@@ -59,9 +73,11 @@ end
|
|
59
73
|
# not 1.0
|
60
74
|
```
|
61
75
|
|
62
|
-
Have a look at bin/highscore
|
76
|
+
Have a look at `bin/highscore`, you can run highscore on your CLI and feed it with text on STDIN.
|
63
77
|
|
64
|
-
##
|
78
|
+
## Blacklisting and Whitelisting
|
79
|
+
|
80
|
+
### Using a custom blacklist to ignore keywords
|
65
81
|
|
66
82
|
```ruby
|
67
83
|
# setting single words
|
@@ -82,7 +98,7 @@ blacklist = Highscore::Blacklist.load_default_file
|
|
82
98
|
content = Highscore::Content.new "a string", blacklist
|
83
99
|
```
|
84
100
|
|
85
|
-
|
101
|
+
### Using a whitelist instead of ranking all words
|
86
102
|
|
87
103
|
```ruby
|
88
104
|
# construct and inject it just like a blacklist
|
@@ -90,7 +106,7 @@ whitelist = Highscore::Whitelist.load %w{these are valid keywords}
|
|
90
106
|
content = Highscore::Content.new "invalid words", whitelist
|
91
107
|
```
|
92
108
|
|
93
|
-
##
|
109
|
+
## I18n
|
94
110
|
|
95
111
|
```ruby
|
96
112
|
# Load a default blacklist
|
@@ -98,34 +114,25 @@ blacklist_default = Highscore::Blacklist.load "mister"
|
|
98
114
|
text = Highscore::Content.new "oui mister interesting", blacklist_default
|
99
115
|
text.keywords.top(3).join " "
|
100
116
|
|
101
|
-
#
|
117
|
+
# => prints "interesting oui"
|
102
118
|
|
103
119
|
# Load a rudimentary blacklist for French
|
104
120
|
blacklist_francais = Highscore::Blacklist.load "oui"
|
105
|
-
text.add_wordlist blacklist_francais,
|
121
|
+
text.add_wordlist blacklist_francais, :french
|
106
122
|
text.keywords(:lang => :fr).top(3).join " "
|
107
123
|
|
108
|
-
#
|
124
|
+
# => prints "interesting mister"
|
109
125
|
```
|
110
126
|
|
111
|
-
## Install
|
112
|
-
|
113
|
-
* `[sudo] gem install highscore`
|
114
|
-
|
115
|
-
To use word stemming, you need to have the `fast-stemmer` (C extension) or `stemmer` gem installed:
|
116
|
-
|
117
|
-
* `[sudo] gem install fast-stemmer`
|
118
|
-
* `[sudo] gem install stemmer`
|
119
|
-
|
120
127
|
## Author
|
121
128
|
|
122
|
-
Original author: Dominik Liebler <liebler.dominik@
|
129
|
+
Original author: Dominik Liebler <liebler.dominik@gmail.com>
|
123
130
|
|
124
131
|
## License
|
125
132
|
|
126
133
|
(The MIT License)
|
127
134
|
|
128
|
-
Copyright (c)
|
135
|
+
Copyright (c) 2013 Dominik Liebler
|
129
136
|
|
130
137
|
Permission is hereby granted, free of charge, to any person obtaining
|
131
138
|
a copy of this software and associated documentation files (the
|
data/Rakefile
CHANGED
@@ -9,18 +9,22 @@ require "highscore"
|
|
9
9
|
gem_name = "highscore-#{Highscore::VERSION}.gem"
|
10
10
|
|
11
11
|
namespace :gem do
|
12
|
+
desc "clean previously generated gems"
|
12
13
|
task :clean do
|
13
14
|
system "rm -f *.gem"
|
14
15
|
end
|
15
|
-
|
16
|
+
|
17
|
+
desc "build gem"
|
16
18
|
task :build => [:clean, :test] do
|
17
19
|
system "gem build highscore.gemspec"
|
18
20
|
end
|
19
21
|
|
22
|
+
desc "install gem"
|
20
23
|
task :install => :build do
|
21
24
|
system "gem install #{gem_name}"
|
22
25
|
end
|
23
26
|
|
27
|
+
desc "release to rubygems.org"
|
24
28
|
task :release => :build do
|
25
29
|
system "gem push #{gem_name}"
|
26
30
|
end
|
data/lib/highscore.rb
CHANGED
data/lib/highscore/blacklist.rb
CHANGED
@@ -14,5 +14,17 @@ module Highscore
|
|
14
14
|
file_path = File.join(File.dirname(__FILE__), %w{.. blacklist.txt})
|
15
15
|
self.load_file(file_path)
|
16
16
|
end
|
17
|
+
|
18
|
+
# filters a given keywords array
|
19
|
+
#
|
20
|
+
# @param Array keywords
|
21
|
+
# @return Array
|
22
|
+
def filter(keywords)
|
23
|
+
keywords.delete_if do |key, value|
|
24
|
+
include?(key.downcase)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
|
17
29
|
end
|
18
30
|
end
|
data/lib/highscore/content.rb
CHANGED
@@ -1,9 +1,6 @@
|
|
1
1
|
$:.unshift(File.dirname(__FILE__))
|
2
2
|
require 'keywords'
|
3
3
|
|
4
|
-
# external gems
|
5
|
-
require 'rubygems'
|
6
|
-
|
7
4
|
module Highscore
|
8
5
|
class Content
|
9
6
|
attr_reader :content
|
@@ -33,13 +30,10 @@ module Highscore
|
|
33
30
|
:consonants => 0,
|
34
31
|
:ignore_short_words => true,
|
35
32
|
:ignore_case => false,
|
33
|
+
:ignore => nil,
|
36
34
|
:word_pattern => /\p{Word}+/u,
|
37
35
|
:stemming => false
|
38
36
|
}
|
39
|
-
|
40
|
-
if RUBY_VERSION =~ /^1\.8/
|
41
|
-
@emphasis[:word_pattern] = /\w+/
|
42
|
-
end
|
43
37
|
end
|
44
38
|
|
45
39
|
# configure ranking
|
@@ -71,6 +65,7 @@ module Highscore
|
|
71
65
|
# @return Highscore::Keywords
|
72
66
|
def keywords(opts = {})
|
73
67
|
used_wordlist = nil
|
68
|
+
|
74
69
|
if opts[:lang]
|
75
70
|
used_wordlist = language_wordlists[opts[:lang].to_sym]
|
76
71
|
else
|
@@ -80,16 +75,9 @@ module Highscore
|
|
80
75
|
@emphasis[:stemming] = use_stemming?
|
81
76
|
|
82
77
|
keywords = Keywords.new
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
text = text.stem if @emphasis[:stemming]
|
87
|
-
|
88
|
-
if not (text.match(/^[\d]+(\.[\d]+){0,1}$/) or text.length <= 2)
|
89
|
-
keywords << Highscore::Keyword.new(text, weight(text))
|
90
|
-
elsif allow_short_words
|
91
|
-
keywords << Highscore::Keyword.new(text, weight(text))
|
92
|
-
end
|
78
|
+
Keywords.find_keywords(processed_content, used_wordlist, word_pattern).each do |word|
|
79
|
+
keyword = extract_keyword(word)
|
80
|
+
keywords << keyword unless keyword.nil?
|
93
81
|
end
|
94
82
|
|
95
83
|
keywords
|
@@ -99,10 +87,10 @@ module Highscore
|
|
99
87
|
#
|
100
88
|
# @return Highscore::Wordlist
|
101
89
|
def wordlist
|
102
|
-
|
103
|
-
@whitelist
|
104
|
-
else
|
90
|
+
if @whitelist.nil?
|
105
91
|
@blacklist
|
92
|
+
else
|
93
|
+
@whitelist
|
106
94
|
end
|
107
95
|
end
|
108
96
|
|
@@ -116,6 +104,35 @@ module Highscore
|
|
116
104
|
|
117
105
|
private
|
118
106
|
|
107
|
+
# extracts a single keyword from a single word
|
108
|
+
#
|
109
|
+
# @return Highscore::Keyword
|
110
|
+
def extract_keyword word
|
111
|
+
word = word.to_s
|
112
|
+
word = word.stem if @emphasis[:stemming]
|
113
|
+
|
114
|
+
unless ignore?(word)
|
115
|
+
Highscore::Keyword.new(word, weight(word))
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
119
|
+
# Internal: should the word be ignored or not?
|
120
|
+
#
|
121
|
+
# @return TrueClass FalseClass
|
122
|
+
def ignore? word
|
123
|
+
ignore = word.short?
|
124
|
+
|
125
|
+
# exception: allow short words
|
126
|
+
ignore = (not allow_short_words?) if ignore
|
127
|
+
|
128
|
+
# exception: custom handler
|
129
|
+
unless @emphasis[:ignore].nil?
|
130
|
+
ignore = @emphasis[:ignore].call(word)
|
131
|
+
end
|
132
|
+
|
133
|
+
ignore
|
134
|
+
end
|
135
|
+
|
119
136
|
# processes the text content applying any necessary transformations
|
120
137
|
#
|
121
138
|
# @return String
|
@@ -128,8 +145,8 @@ module Highscore
|
|
128
145
|
|
129
146
|
# allow short words to be rated
|
130
147
|
#
|
131
|
-
# @return TrueClass
|
132
|
-
def allow_short_words
|
148
|
+
# @return TrueClass|FalseClass
|
149
|
+
def allow_short_words?
|
133
150
|
not @emphasis[:ignore_short_words]
|
134
151
|
end
|
135
152
|
|
@@ -156,8 +173,7 @@ module Highscore
|
|
156
173
|
end
|
157
174
|
|
158
175
|
weight += vowels(text)
|
159
|
-
weight
|
160
|
-
weight
|
176
|
+
weight + consonants(text)
|
161
177
|
end
|
162
178
|
|
163
179
|
# weight the vowels on a text
|
@@ -165,6 +181,8 @@ module Highscore
|
|
165
181
|
# @param text String
|
166
182
|
# @return Float
|
167
183
|
def vowels(text)
|
184
|
+
return 0 if @emphasis[:vowels] == 0
|
185
|
+
|
168
186
|
percent = text.vowels.length / text.length.to_f
|
169
187
|
percent * @emphasis[:vowels]
|
170
188
|
end
|
@@ -174,28 +192,31 @@ module Highscore
|
|
174
192
|
# @param text String
|
175
193
|
# @return Float
|
176
194
|
def consonants(text)
|
195
|
+
return 0 if @emphasis[:consonants] == 0
|
196
|
+
|
177
197
|
percent = text.consonants.length / text.length.to_f
|
178
198
|
percent * @emphasis[:consonants]
|
179
199
|
end
|
180
200
|
|
181
|
-
# using stemming is only possible
|
182
|
-
#
|
201
|
+
# Internal: using stemming is only possible if fast-stemmer is installed
|
202
|
+
# doesn't work for JRuby
|
203
|
+
#
|
204
|
+
# @return TrueClass|FalseClass
|
183
205
|
def use_stemming?
|
184
|
-
|
206
|
+
return false unless @emphasis[:stemming]
|
207
|
+
|
208
|
+
gems = %w(fast_stemmer stemmer)
|
209
|
+
|
210
|
+
gems.each do |gem|
|
185
211
|
begin
|
186
|
-
require
|
187
|
-
true
|
212
|
+
require gem
|
213
|
+
return true
|
188
214
|
rescue LoadError
|
189
|
-
|
190
|
-
require 'stemmer'
|
191
|
-
true
|
192
|
-
rescue LoadError
|
193
|
-
false
|
194
|
-
end
|
215
|
+
false
|
195
216
|
end
|
196
|
-
else
|
197
|
-
false
|
198
217
|
end
|
199
218
|
end
|
219
|
+
|
220
|
+
|
200
221
|
end
|
201
|
-
end
|
222
|
+
end
|
data/lib/highscore/keyword.rb
CHANGED
@@ -3,7 +3,7 @@ module Highscore
|
|
3
3
|
# keywords read from the content
|
4
4
|
#
|
5
5
|
class Keyword
|
6
|
-
attr_accessor :weight, :text
|
6
|
+
attr_accessor :weight, :text, :percent
|
7
7
|
|
8
8
|
# init a keyword
|
9
9
|
#
|
@@ -14,6 +14,13 @@ module Highscore
|
|
14
14
|
@weight = weight.to_f
|
15
15
|
end
|
16
16
|
|
17
|
+
# sets a percent value (in the keywords context)
|
18
|
+
#
|
19
|
+
# @param percent Float
|
20
|
+
def percent=(percent)
|
21
|
+
@percent = percent.to_f
|
22
|
+
end
|
23
|
+
|
17
24
|
# sort keywords
|
18
25
|
#
|
19
26
|
# @param other Highscore::Keyword
|
data/lib/highscore/keywords.rb
CHANGED
@@ -1,6 +1,3 @@
|
|
1
|
-
# external
|
2
|
-
require 'digest/sha1'
|
3
|
-
|
4
1
|
module Highscore
|
5
2
|
|
6
3
|
# keywords that were found in content
|
@@ -16,14 +13,11 @@ module Highscore
|
|
16
13
|
# @return Highscore::Keywords
|
17
14
|
def self.find_keywords content, wordlist, pattern=/\w+/
|
18
15
|
keywords = content.to_s.scan(pattern).flatten
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
elsif wordlist.kind_of? Highscore::Whitelist
|
23
|
-
not wordlist.include?(key.downcase)
|
24
|
-
end
|
16
|
+
|
17
|
+
if not wordlist.nil? and wordlist.respond_to? :filter
|
18
|
+
keywords = wordlist.filter(keywords)
|
25
19
|
end
|
26
|
-
|
20
|
+
|
27
21
|
keywords.sort
|
28
22
|
end
|
29
23
|
|
@@ -33,8 +27,7 @@ module Highscore
|
|
33
27
|
@keywords = {}
|
34
28
|
end
|
35
29
|
|
36
|
-
# ranks the keywords
|
37
|
-
# or are blacklisted
|
30
|
+
# ranks the keywords
|
38
31
|
#
|
39
32
|
# @return Array
|
40
33
|
def rank
|
@@ -46,20 +39,26 @@ module Highscore
|
|
46
39
|
# @param n Fixnum
|
47
40
|
# @return Array
|
48
41
|
def top n = 10
|
49
|
-
rank[0..(n - 1)]
|
42
|
+
tops = rank[0..(n - 1)]
|
43
|
+
sum_all = sum(n)
|
44
|
+
|
45
|
+
# set percentage values
|
46
|
+
tops.each do |keyword|
|
47
|
+
keyword.percent = keyword.weight * 100 / sum_all
|
48
|
+
end
|
49
|
+
|
50
|
+
tops
|
50
51
|
end
|
51
52
|
|
52
53
|
# add new keywords
|
53
54
|
#
|
54
|
-
# @param keyword
|
55
|
+
# @param keyword Highscore::Keyword
|
55
56
|
# @return Highscore::Keywords
|
56
57
|
def <<(keyword)
|
57
|
-
|
58
|
-
|
59
|
-
if @keywords.has_key?(key)
|
60
|
-
@keywords[key].weight += keyword.weight
|
58
|
+
if @keywords.has_key?(keyword.text)
|
59
|
+
@keywords[keyword.text].weight += keyword.weight
|
61
60
|
else
|
62
|
-
@keywords[
|
61
|
+
@keywords[keyword.text] = keyword
|
63
62
|
end
|
64
63
|
|
65
64
|
@keywords
|
@@ -102,6 +101,14 @@ module Highscore
|
|
102
101
|
sort.reverse.first
|
103
102
|
end
|
104
103
|
|
104
|
+
# Returns the sum of the weight of the top n keywords
|
105
|
+
#
|
106
|
+
# @return Float
|
107
|
+
def sum(n)
|
108
|
+
top = rank[0..(n - 1)]
|
109
|
+
top.map(&:weight).inject { |sum,weight| sum + weight }
|
110
|
+
end
|
111
|
+
|
105
112
|
# merge in another keyword list, operates on self
|
106
113
|
#
|
107
114
|
# @return Highscore::Keywords
|
data/lib/highscore/string.rb
CHANGED
data/lib/highscore/whitelist.rb
CHANGED
@@ -5,6 +5,15 @@ module Highscore
|
|
5
5
|
# whitelisted words
|
6
6
|
#
|
7
7
|
class Whitelist < Wordlist
|
8
|
-
|
8
|
+
# filters a given keywords array
|
9
|
+
#
|
10
|
+
# @param Array keywords
|
11
|
+
# @return Array
|
12
|
+
def filter(keywords)
|
13
|
+
keywords.delete_if do |key, value|
|
14
|
+
not include?(key.downcase)
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
9
18
|
end
|
10
19
|
end
|
data/lib/highscore/wordlist.rb
CHANGED
@@ -8,7 +8,7 @@ module Highscore
|
|
8
8
|
#
|
9
9
|
# @param file_path String
|
10
10
|
# @return Highscore::Wordlist
|
11
|
-
def self.load_file
|
11
|
+
def self.load_file(file_path, use_bloom_filter = true)
|
12
12
|
words = File.read(file_path).split(' ')
|
13
13
|
self.load(words)
|
14
14
|
end
|
@@ -34,8 +34,11 @@ module Highscore
|
|
34
34
|
attr_reader :words
|
35
35
|
|
36
36
|
# @param words Array
|
37
|
-
def initialize(words = [])
|
37
|
+
def initialize(words = [], use_bloom_filter = true)
|
38
38
|
@words = words
|
39
|
+
@bloom_filter = nil
|
40
|
+
|
41
|
+
init_bloom_filter
|
39
42
|
end
|
40
43
|
|
41
44
|
# iterate over words
|
@@ -62,8 +65,12 @@ module Highscore
|
|
62
65
|
#
|
63
66
|
# @param keyword String
|
64
67
|
# @return true/false
|
65
|
-
def include?
|
66
|
-
@
|
68
|
+
def include?(keyword)
|
69
|
+
unless @bloom_filter.nil?
|
70
|
+
@bloom_filter.include? keyword
|
71
|
+
else
|
72
|
+
@words.include? keyword
|
73
|
+
end
|
67
74
|
end
|
68
75
|
|
69
76
|
# add a new word to the blacklist
|
@@ -71,6 +78,40 @@ module Highscore
|
|
71
78
|
# @param word String
|
72
79
|
def <<(word)
|
73
80
|
@words << word
|
81
|
+
@bloom_filter << word unless @bloom_filter.nil?
|
82
|
+
end
|
83
|
+
|
84
|
+
private
|
85
|
+
|
86
|
+
# determine whether bloom filters should be used
|
87
|
+
def use_bloom_filter
|
88
|
+
begin
|
89
|
+
require 'bloomfilter-rb'
|
90
|
+
true
|
91
|
+
rescue LoadError
|
92
|
+
false
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
# build a bloom filter out of this wordlist to determine faster
|
97
|
+
# if words should be black- or whitelisted
|
98
|
+
#
|
99
|
+
def init_bloom_filter
|
100
|
+
return unless use_bloom_filter
|
101
|
+
|
102
|
+
n = length # number of filter elements
|
103
|
+
|
104
|
+
if n > 0
|
105
|
+
b = 4 # bits per bucket
|
106
|
+
m = n * b * 10 # number of filter buckets
|
107
|
+
|
108
|
+
k = (0.7 * (m / n)).to_i # number of hash functions
|
109
|
+
k = 1 if k <= 1
|
110
|
+
|
111
|
+
@bloom_filter = BloomFilter::Native.new(:size => m, :bucket => b, :raise => true, :hashes => k)
|
112
|
+
each { |w| @bloom_filter.insert(w) }
|
113
|
+
end
|
74
114
|
end
|
115
|
+
|
75
116
|
end
|
76
117
|
end
|
@@ -37,13 +37,7 @@ class TestContent < Highscore::TestCase
|
|
37
37
|
content = 'Schöne Grüße, caractères, русский'
|
38
38
|
|
39
39
|
content = Highscore::Content.new content
|
40
|
-
|
41
|
-
if RUBY_VERSION =~ /^1\.8/
|
42
|
-
# Ruby 1.8 doesn't support correct tokenization
|
43
|
-
assert_equal 3, content.keywords.length
|
44
|
-
else
|
45
|
-
assert_equal 4, content.keywords.length
|
46
|
-
end
|
40
|
+
assert_equal 4, content.keywords.length
|
47
41
|
end
|
48
42
|
|
49
43
|
def test_vowels_and_consonants
|
@@ -66,6 +60,14 @@ class TestContent < Highscore::TestCase
|
|
66
60
|
assert_equal 4, keywords.length
|
67
61
|
end
|
68
62
|
|
63
|
+
def test_ignore_custom
|
64
|
+
keywords = 'foobar a3832'.keywords do
|
65
|
+
set :ignore, lambda { |w| w.gsub(/[^0-9]/, '').length > 2 }
|
66
|
+
end
|
67
|
+
|
68
|
+
assert_equal 1, keywords.length
|
69
|
+
end
|
70
|
+
|
69
71
|
def test_word_pattern
|
70
72
|
keywords = 'foo Ruby foo Ruby'.keywords do
|
71
73
|
set :word_pattern, /(?=(\b\w+\s\w+\b))/
|
@@ -30,4 +30,12 @@ class TestKeyword < Highscore::TestCase
|
|
30
30
|
@keyword.weight = 10.123
|
31
31
|
assert_equal 10.123, @keyword.weight
|
32
32
|
end
|
33
|
+
|
34
|
+
def test_percent
|
35
|
+
# per default, percent is not used => nil
|
36
|
+
assert_nil @keyword.percent
|
37
|
+
|
38
|
+
@keyword.percent = 50.1
|
39
|
+
assert_equal 50.1, @keyword.percent
|
40
|
+
end
|
33
41
|
end
|
@@ -51,8 +51,24 @@ class TestKeywords < Highscore::TestCase
|
|
51
51
|
def test_top
|
52
52
|
top = @keywords.top(1)
|
53
53
|
|
54
|
+
assert_equal(1, top.length)
|
54
55
|
assert_equal('the', top[0].text)
|
55
56
|
assert_equal(10.0, top[0].weight)
|
57
|
+
assert_equal(100.0, top[0].percent)
|
58
|
+
end
|
59
|
+
|
60
|
+
def test_percent
|
61
|
+
top = @keywords.top(10)
|
62
|
+
|
63
|
+
assert_equal 4, top.length
|
64
|
+
assert_equal 62.5, top[0].percent
|
65
|
+
assert_equal 18.75, top[1].percent
|
66
|
+
assert_equal 12.5, top[2].percent
|
67
|
+
assert_equal 6.25, top[3].percent
|
68
|
+
end
|
69
|
+
|
70
|
+
def test_sum
|
71
|
+
assert_equal(16, @keywords.sum(50))
|
56
72
|
end
|
57
73
|
|
58
74
|
def test_top_empty
|
@@ -54,4 +54,12 @@ class TestBlacklist < Highscore::TestCase
|
|
54
54
|
assert blacklist.include?("foobar")
|
55
55
|
assert !blacklist.include?("bla")
|
56
56
|
end
|
57
|
+
|
58
|
+
def test_each
|
59
|
+
blacklist = Highscore::Wordlist.load "foo bar baz"
|
60
|
+
|
61
|
+
a = []
|
62
|
+
blacklist.each { |x| a << x }
|
63
|
+
assert_equal ['foo', 'bar', 'baz'], a
|
64
|
+
end
|
57
65
|
end
|
data/version.txt
CHANGED
@@ -1 +1 @@
|
|
1
|
-
1.
|
1
|
+
1.1.0
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: highscore
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.1.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -13,7 +13,7 @@ date: 2012-12-13 00:00:00.000000000 Z
|
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: simplecov
|
16
|
-
requirement: &
|
16
|
+
requirement: &70314466936760 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: 0.6.4
|
22
22
|
type: :development
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *70314466936760
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: whatlanguage
|
27
|
-
requirement: &
|
27
|
+
requirement: &70314466936180 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ! '>='
|
@@ -32,7 +32,7 @@ dependencies:
|
|
32
32
|
version: 1.0.0
|
33
33
|
type: :runtime
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *70314466936180
|
36
36
|
description: Find and rank keywords in text.
|
37
37
|
email: liebler.dominik@googlemail.com
|
38
38
|
executables:
|
@@ -66,7 +66,7 @@ files:
|
|
66
66
|
- History.txt
|
67
67
|
- Rakefile
|
68
68
|
- version.txt
|
69
|
-
homepage:
|
69
|
+
homepage: http://domnikl.github.com/highscore
|
70
70
|
licenses: []
|
71
71
|
post_install_message:
|
72
72
|
rdoc_options:
|