highscore 1.0.0 → 1.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +7 -0
- data/README.md +28 -21
- data/Rakefile +5 -1
- data/lib/highscore.rb +0 -2
- data/lib/highscore/blacklist.rb +12 -0
- data/lib/highscore/content.rb +59 -38
- data/lib/highscore/keyword.rb +8 -1
- data/lib/highscore/keywords.rb +26 -19
- data/lib/highscore/string.rb +7 -0
- data/lib/highscore/whitelist.rb +10 -1
- data/lib/highscore/wordlist.rb +45 -4
- data/test/highscore/test_content.rb +9 -7
- data/test/highscore/test_keyword.rb +8 -0
- data/test/highscore/test_keywords.rb +16 -0
- data/test/highscore/test_string.rb +5 -0
- data/test/highscore/test_wordlist.rb +8 -0
- data/version.txt +1 -1
- metadata +6 -6
data/History.txt
CHANGED
@@ -1,3 +1,10 @@
|
|
1
|
+
== 1.1.0 / 2013-04-
|
2
|
+
|
3
|
+
* added support for custom word ignore handlers using lambda functions
|
4
|
+
* fixed some performance issues with unnecessary ranking by unused criteria (consonants & vowels)
|
5
|
+
* added possibility to use bloomfilters for faster performance instead of Array#include?
|
6
|
+
* removed compatibility with Ruby 1.8.x
|
7
|
+
|
1
8
|
== 1.0.0 / 2013-02-02
|
2
9
|
|
3
10
|
* added per-language support for black- and whitelists (thanks to bobjflong)
|
data/README.md
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
Easily find and rank keywords in long texts.
|
4
4
|
|
5
|
-
[![Build Status](https://secure.travis-ci.org/domnikl/highscore.png?branch=develop)](http://travis-ci.org/domnikl/highscore) [![Code Climate](https://codeclimate.com/
|
5
|
+
[![Build Status](https://secure.travis-ci.org/domnikl/highscore.png?branch=develop)](http://travis-ci.org/domnikl/highscore) [![Code Climate](https://codeclimate.com/github/domnikl/highscore.png)](https://codeclimate.com/github/domnikl/highscore)
|
6
6
|
|
7
7
|
## Features
|
8
8
|
|
@@ -11,9 +11,23 @@ Easily find and rank keywords in long texts.
|
|
11
11
|
* directly get keywords from String objects
|
12
12
|
* blacklist words via a plain text file, String or an Array of words (per language if needed)
|
13
13
|
* optionally, configure a whitelist and only words from that list will get ranked
|
14
|
-
*
|
14
|
+
* uses word stemming if necessary (requires the `fast-stemmer` or `stemmer` gem)
|
15
15
|
* merge together Keywords from multiple sources
|
16
16
|
* contains a CLI tool that operates on STDIN/OUT and is configurable via parameters
|
17
|
+
* can use `bloomfilter-rb` gem for better performance (optional)
|
18
|
+
|
19
|
+
## Installation
|
20
|
+
|
21
|
+
* `[sudo] gem install highscore`
|
22
|
+
|
23
|
+
For better blacklist perfomance, use the `bloomfilter-rb` gem:
|
24
|
+
|
25
|
+
* `[sudo] gem install bloomfilter-rb`
|
26
|
+
|
27
|
+
To use word stemming, you need to have the `fast-stemmer` (C extension) or `stemmer` gem installed:
|
28
|
+
|
29
|
+
* `[sudo] gem install fast-stemmer`
|
30
|
+
* `[sudo] gem install stemmer`
|
17
31
|
|
18
32
|
## Examples
|
19
33
|
|
@@ -37,7 +51,7 @@ text.keywords.top(50).each do |keyword|
|
|
37
51
|
keyword.weight # => rank weight (float)
|
38
52
|
end
|
39
53
|
|
40
|
-
# you could just use a string
|
54
|
+
# you could also just use a string
|
41
55
|
keywords = "Foo bar baz".keywords(blacklist) do
|
42
56
|
set :multiplier, 10
|
43
57
|
end
|
@@ -59,9 +73,11 @@ end
|
|
59
73
|
# not 1.0
|
60
74
|
```
|
61
75
|
|
62
|
-
Have a look at bin/highscore
|
76
|
+
Have a look at `bin/highscore`, you can run highscore on your CLI and feed it with text on STDIN.
|
63
77
|
|
64
|
-
##
|
78
|
+
## Blacklisting and Whitelisting
|
79
|
+
|
80
|
+
### Using a custom blacklist to ignore keywords
|
65
81
|
|
66
82
|
```ruby
|
67
83
|
# setting single words
|
@@ -82,7 +98,7 @@ blacklist = Highscore::Blacklist.load_default_file
|
|
82
98
|
content = Highscore::Content.new "a string", blacklist
|
83
99
|
```
|
84
100
|
|
85
|
-
|
101
|
+
### Using a whitelist instead of ranking all words
|
86
102
|
|
87
103
|
```ruby
|
88
104
|
# construct and inject it just like a blacklist
|
@@ -90,7 +106,7 @@ whitelist = Highscore::Whitelist.load %w{these are valid keywords}
|
|
90
106
|
content = Highscore::Content.new "invalid words", whitelist
|
91
107
|
```
|
92
108
|
|
93
|
-
##
|
109
|
+
## I18n
|
94
110
|
|
95
111
|
```ruby
|
96
112
|
# Load a default blacklist
|
@@ -98,34 +114,25 @@ blacklist_default = Highscore::Blacklist.load "mister"
|
|
98
114
|
text = Highscore::Content.new "oui mister interesting", blacklist_default
|
99
115
|
text.keywords.top(3).join " "
|
100
116
|
|
101
|
-
#
|
117
|
+
# => prints "interesting oui"
|
102
118
|
|
103
119
|
# Load a rudimentary blacklist for French
|
104
120
|
blacklist_francais = Highscore::Blacklist.load "oui"
|
105
|
-
text.add_wordlist blacklist_francais,
|
121
|
+
text.add_wordlist blacklist_francais, :french
|
106
122
|
text.keywords(:lang => :fr).top(3).join " "
|
107
123
|
|
108
|
-
#
|
124
|
+
# => prints "interesting mister"
|
109
125
|
```
|
110
126
|
|
111
|
-
## Install
|
112
|
-
|
113
|
-
* `[sudo] gem install highscore`
|
114
|
-
|
115
|
-
To use word stemming, you need to have the `fast-stemmer` (C extension) or `stemmer` gem installed:
|
116
|
-
|
117
|
-
* `[sudo] gem install fast-stemmer`
|
118
|
-
* `[sudo] gem install stemmer`
|
119
|
-
|
120
127
|
## Author
|
121
128
|
|
122
|
-
Original author: Dominik Liebler <liebler.dominik@
|
129
|
+
Original author: Dominik Liebler <liebler.dominik@gmail.com>
|
123
130
|
|
124
131
|
## License
|
125
132
|
|
126
133
|
(The MIT License)
|
127
134
|
|
128
|
-
Copyright (c)
|
135
|
+
Copyright (c) 2013 Dominik Liebler
|
129
136
|
|
130
137
|
Permission is hereby granted, free of charge, to any person obtaining
|
131
138
|
a copy of this software and associated documentation files (the
|
data/Rakefile
CHANGED
@@ -9,18 +9,22 @@ require "highscore"
|
|
9
9
|
gem_name = "highscore-#{Highscore::VERSION}.gem"
|
10
10
|
|
11
11
|
namespace :gem do
|
12
|
+
desc "clean previously generated gems"
|
12
13
|
task :clean do
|
13
14
|
system "rm -f *.gem"
|
14
15
|
end
|
15
|
-
|
16
|
+
|
17
|
+
desc "build gem"
|
16
18
|
task :build => [:clean, :test] do
|
17
19
|
system "gem build highscore.gemspec"
|
18
20
|
end
|
19
21
|
|
22
|
+
desc "install gem"
|
20
23
|
task :install => :build do
|
21
24
|
system "gem install #{gem_name}"
|
22
25
|
end
|
23
26
|
|
27
|
+
desc "release to rubygems.org"
|
24
28
|
task :release => :build do
|
25
29
|
system "gem push #{gem_name}"
|
26
30
|
end
|
data/lib/highscore.rb
CHANGED
data/lib/highscore/blacklist.rb
CHANGED
@@ -14,5 +14,17 @@ module Highscore
|
|
14
14
|
file_path = File.join(File.dirname(__FILE__), %w{.. blacklist.txt})
|
15
15
|
self.load_file(file_path)
|
16
16
|
end
|
17
|
+
|
18
|
+
# filters a given keywords array
|
19
|
+
#
|
20
|
+
# @param Array keywords
|
21
|
+
# @return Array
|
22
|
+
def filter(keywords)
|
23
|
+
keywords.delete_if do |key, value|
|
24
|
+
include?(key.downcase)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
|
17
29
|
end
|
18
30
|
end
|
data/lib/highscore/content.rb
CHANGED
@@ -1,9 +1,6 @@
|
|
1
1
|
$:.unshift(File.dirname(__FILE__))
|
2
2
|
require 'keywords'
|
3
3
|
|
4
|
-
# external gems
|
5
|
-
require 'rubygems'
|
6
|
-
|
7
4
|
module Highscore
|
8
5
|
class Content
|
9
6
|
attr_reader :content
|
@@ -33,13 +30,10 @@ module Highscore
|
|
33
30
|
:consonants => 0,
|
34
31
|
:ignore_short_words => true,
|
35
32
|
:ignore_case => false,
|
33
|
+
:ignore => nil,
|
36
34
|
:word_pattern => /\p{Word}+/u,
|
37
35
|
:stemming => false
|
38
36
|
}
|
39
|
-
|
40
|
-
if RUBY_VERSION =~ /^1\.8/
|
41
|
-
@emphasis[:word_pattern] = /\w+/
|
42
|
-
end
|
43
37
|
end
|
44
38
|
|
45
39
|
# configure ranking
|
@@ -71,6 +65,7 @@ module Highscore
|
|
71
65
|
# @return Highscore::Keywords
|
72
66
|
def keywords(opts = {})
|
73
67
|
used_wordlist = nil
|
68
|
+
|
74
69
|
if opts[:lang]
|
75
70
|
used_wordlist = language_wordlists[opts[:lang].to_sym]
|
76
71
|
else
|
@@ -80,16 +75,9 @@ module Highscore
|
|
80
75
|
@emphasis[:stemming] = use_stemming?
|
81
76
|
|
82
77
|
keywords = Keywords.new
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
text = text.stem if @emphasis[:stemming]
|
87
|
-
|
88
|
-
if not (text.match(/^[\d]+(\.[\d]+){0,1}$/) or text.length <= 2)
|
89
|
-
keywords << Highscore::Keyword.new(text, weight(text))
|
90
|
-
elsif allow_short_words
|
91
|
-
keywords << Highscore::Keyword.new(text, weight(text))
|
92
|
-
end
|
78
|
+
Keywords.find_keywords(processed_content, used_wordlist, word_pattern).each do |word|
|
79
|
+
keyword = extract_keyword(word)
|
80
|
+
keywords << keyword unless keyword.nil?
|
93
81
|
end
|
94
82
|
|
95
83
|
keywords
|
@@ -99,10 +87,10 @@ module Highscore
|
|
99
87
|
#
|
100
88
|
# @return Highscore::Wordlist
|
101
89
|
def wordlist
|
102
|
-
|
103
|
-
@whitelist
|
104
|
-
else
|
90
|
+
if @whitelist.nil?
|
105
91
|
@blacklist
|
92
|
+
else
|
93
|
+
@whitelist
|
106
94
|
end
|
107
95
|
end
|
108
96
|
|
@@ -116,6 +104,35 @@ module Highscore
|
|
116
104
|
|
117
105
|
private
|
118
106
|
|
107
|
+
# extracts a single keyword from a single word
|
108
|
+
#
|
109
|
+
# @return Highscore::Keyword
|
110
|
+
def extract_keyword word
|
111
|
+
word = word.to_s
|
112
|
+
word = word.stem if @emphasis[:stemming]
|
113
|
+
|
114
|
+
unless ignore?(word)
|
115
|
+
Highscore::Keyword.new(word, weight(word))
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
119
|
+
# Internal: should the word be ignored or not?
|
120
|
+
#
|
121
|
+
# @return TrueClass FalseClass
|
122
|
+
def ignore? word
|
123
|
+
ignore = word.short?
|
124
|
+
|
125
|
+
# exception: allow short words
|
126
|
+
ignore = (not allow_short_words?) if ignore
|
127
|
+
|
128
|
+
# exception: custom handler
|
129
|
+
unless @emphasis[:ignore].nil?
|
130
|
+
ignore = @emphasis[:ignore].call(word)
|
131
|
+
end
|
132
|
+
|
133
|
+
ignore
|
134
|
+
end
|
135
|
+
|
119
136
|
# processes the text content applying any necessary transformations
|
120
137
|
#
|
121
138
|
# @return String
|
@@ -128,8 +145,8 @@ module Highscore
|
|
128
145
|
|
129
146
|
# allow short words to be rated
|
130
147
|
#
|
131
|
-
# @return TrueClass
|
132
|
-
def allow_short_words
|
148
|
+
# @return TrueClass|FalseClass
|
149
|
+
def allow_short_words?
|
133
150
|
not @emphasis[:ignore_short_words]
|
134
151
|
end
|
135
152
|
|
@@ -156,8 +173,7 @@ module Highscore
|
|
156
173
|
end
|
157
174
|
|
158
175
|
weight += vowels(text)
|
159
|
-
weight
|
160
|
-
weight
|
176
|
+
weight + consonants(text)
|
161
177
|
end
|
162
178
|
|
163
179
|
# weight the vowels on a text
|
@@ -165,6 +181,8 @@ module Highscore
|
|
165
181
|
# @param text String
|
166
182
|
# @return Float
|
167
183
|
def vowels(text)
|
184
|
+
return 0 if @emphasis[:vowels] == 0
|
185
|
+
|
168
186
|
percent = text.vowels.length / text.length.to_f
|
169
187
|
percent * @emphasis[:vowels]
|
170
188
|
end
|
@@ -174,28 +192,31 @@ module Highscore
|
|
174
192
|
# @param text String
|
175
193
|
# @return Float
|
176
194
|
def consonants(text)
|
195
|
+
return 0 if @emphasis[:consonants] == 0
|
196
|
+
|
177
197
|
percent = text.consonants.length / text.length.to_f
|
178
198
|
percent * @emphasis[:consonants]
|
179
199
|
end
|
180
200
|
|
181
|
-
# using stemming is only possible
|
182
|
-
#
|
201
|
+
# Internal: using stemming is only possible if fast-stemmer is installed
|
202
|
+
# doesn't work for JRuby
|
203
|
+
#
|
204
|
+
# @return TrueClass|FalseClass
|
183
205
|
def use_stemming?
|
184
|
-
|
206
|
+
return false unless @emphasis[:stemming]
|
207
|
+
|
208
|
+
gems = %w(fast_stemmer stemmer)
|
209
|
+
|
210
|
+
gems.each do |gem|
|
185
211
|
begin
|
186
|
-
require
|
187
|
-
true
|
212
|
+
require gem
|
213
|
+
return true
|
188
214
|
rescue LoadError
|
189
|
-
|
190
|
-
require 'stemmer'
|
191
|
-
true
|
192
|
-
rescue LoadError
|
193
|
-
false
|
194
|
-
end
|
215
|
+
false
|
195
216
|
end
|
196
|
-
else
|
197
|
-
false
|
198
217
|
end
|
199
218
|
end
|
219
|
+
|
220
|
+
|
200
221
|
end
|
201
|
-
end
|
222
|
+
end
|
data/lib/highscore/keyword.rb
CHANGED
@@ -3,7 +3,7 @@ module Highscore
|
|
3
3
|
# keywords read from the content
|
4
4
|
#
|
5
5
|
class Keyword
|
6
|
-
attr_accessor :weight, :text
|
6
|
+
attr_accessor :weight, :text, :percent
|
7
7
|
|
8
8
|
# init a keyword
|
9
9
|
#
|
@@ -14,6 +14,13 @@ module Highscore
|
|
14
14
|
@weight = weight.to_f
|
15
15
|
end
|
16
16
|
|
17
|
+
# sets a percent value (in the keywords context)
|
18
|
+
#
|
19
|
+
# @param percent Float
|
20
|
+
def percent=(percent)
|
21
|
+
@percent = percent.to_f
|
22
|
+
end
|
23
|
+
|
17
24
|
# sort keywords
|
18
25
|
#
|
19
26
|
# @param other Highscore::Keyword
|
data/lib/highscore/keywords.rb
CHANGED
@@ -1,6 +1,3 @@
|
|
1
|
-
# external
|
2
|
-
require 'digest/sha1'
|
3
|
-
|
4
1
|
module Highscore
|
5
2
|
|
6
3
|
# keywords that were found in content
|
@@ -16,14 +13,11 @@ module Highscore
|
|
16
13
|
# @return Highscore::Keywords
|
17
14
|
def self.find_keywords content, wordlist, pattern=/\w+/
|
18
15
|
keywords = content.to_s.scan(pattern).flatten
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
elsif wordlist.kind_of? Highscore::Whitelist
|
23
|
-
not wordlist.include?(key.downcase)
|
24
|
-
end
|
16
|
+
|
17
|
+
if not wordlist.nil? and wordlist.respond_to? :filter
|
18
|
+
keywords = wordlist.filter(keywords)
|
25
19
|
end
|
26
|
-
|
20
|
+
|
27
21
|
keywords.sort
|
28
22
|
end
|
29
23
|
|
@@ -33,8 +27,7 @@ module Highscore
|
|
33
27
|
@keywords = {}
|
34
28
|
end
|
35
29
|
|
36
|
-
# ranks the keywords
|
37
|
-
# or are blacklisted
|
30
|
+
# ranks the keywords
|
38
31
|
#
|
39
32
|
# @return Array
|
40
33
|
def rank
|
@@ -46,20 +39,26 @@ module Highscore
|
|
46
39
|
# @param n Fixnum
|
47
40
|
# @return Array
|
48
41
|
def top n = 10
|
49
|
-
rank[0..(n - 1)]
|
42
|
+
tops = rank[0..(n - 1)]
|
43
|
+
sum_all = sum(n)
|
44
|
+
|
45
|
+
# set percentage values
|
46
|
+
tops.each do |keyword|
|
47
|
+
keyword.percent = keyword.weight * 100 / sum_all
|
48
|
+
end
|
49
|
+
|
50
|
+
tops
|
50
51
|
end
|
51
52
|
|
52
53
|
# add new keywords
|
53
54
|
#
|
54
|
-
# @param keyword
|
55
|
+
# @param keyword Highscore::Keyword
|
55
56
|
# @return Highscore::Keywords
|
56
57
|
def <<(keyword)
|
57
|
-
|
58
|
-
|
59
|
-
if @keywords.has_key?(key)
|
60
|
-
@keywords[key].weight += keyword.weight
|
58
|
+
if @keywords.has_key?(keyword.text)
|
59
|
+
@keywords[keyword.text].weight += keyword.weight
|
61
60
|
else
|
62
|
-
@keywords[
|
61
|
+
@keywords[keyword.text] = keyword
|
63
62
|
end
|
64
63
|
|
65
64
|
@keywords
|
@@ -102,6 +101,14 @@ module Highscore
|
|
102
101
|
sort.reverse.first
|
103
102
|
end
|
104
103
|
|
104
|
+
# Returns the sum of the weight of the top n keywords
|
105
|
+
#
|
106
|
+
# @return Float
|
107
|
+
def sum(n)
|
108
|
+
top = rank[0..(n - 1)]
|
109
|
+
top.map(&:weight).inject { |sum,weight| sum + weight }
|
110
|
+
end
|
111
|
+
|
105
112
|
# merge in another keyword list, operates on self
|
106
113
|
#
|
107
114
|
# @return Highscore::Keywords
|
data/lib/highscore/string.rb
CHANGED
data/lib/highscore/whitelist.rb
CHANGED
@@ -5,6 +5,15 @@ module Highscore
|
|
5
5
|
# whitelisted words
|
6
6
|
#
|
7
7
|
class Whitelist < Wordlist
|
8
|
-
|
8
|
+
# filters a given keywords array
|
9
|
+
#
|
10
|
+
# @param Array keywords
|
11
|
+
# @return Array
|
12
|
+
def filter(keywords)
|
13
|
+
keywords.delete_if do |key, value|
|
14
|
+
not include?(key.downcase)
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
9
18
|
end
|
10
19
|
end
|
data/lib/highscore/wordlist.rb
CHANGED
@@ -8,7 +8,7 @@ module Highscore
|
|
8
8
|
#
|
9
9
|
# @param file_path String
|
10
10
|
# @return Highscore::Wordlist
|
11
|
-
def self.load_file
|
11
|
+
def self.load_file(file_path, use_bloom_filter = true)
|
12
12
|
words = File.read(file_path).split(' ')
|
13
13
|
self.load(words)
|
14
14
|
end
|
@@ -34,8 +34,11 @@ module Highscore
|
|
34
34
|
attr_reader :words
|
35
35
|
|
36
36
|
# @param words Array
|
37
|
-
def initialize(words = [])
|
37
|
+
def initialize(words = [], use_bloom_filter = true)
|
38
38
|
@words = words
|
39
|
+
@bloom_filter = nil
|
40
|
+
|
41
|
+
init_bloom_filter
|
39
42
|
end
|
40
43
|
|
41
44
|
# iterate over words
|
@@ -62,8 +65,12 @@ module Highscore
|
|
62
65
|
#
|
63
66
|
# @param keyword String
|
64
67
|
# @return true/false
|
65
|
-
def include?
|
66
|
-
@
|
68
|
+
def include?(keyword)
|
69
|
+
unless @bloom_filter.nil?
|
70
|
+
@bloom_filter.include? keyword
|
71
|
+
else
|
72
|
+
@words.include? keyword
|
73
|
+
end
|
67
74
|
end
|
68
75
|
|
69
76
|
# add a new word to the blacklist
|
@@ -71,6 +78,40 @@ module Highscore
|
|
71
78
|
# @param word String
|
72
79
|
def <<(word)
|
73
80
|
@words << word
|
81
|
+
@bloom_filter << word unless @bloom_filter.nil?
|
82
|
+
end
|
83
|
+
|
84
|
+
private
|
85
|
+
|
86
|
+
# determine whether bloom filters should be used
|
87
|
+
def use_bloom_filter
|
88
|
+
begin
|
89
|
+
require 'bloomfilter-rb'
|
90
|
+
true
|
91
|
+
rescue LoadError
|
92
|
+
false
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
# build a bloom filter out of this wordlist to determine faster
|
97
|
+
# if words should be black- or whitelisted
|
98
|
+
#
|
99
|
+
def init_bloom_filter
|
100
|
+
return unless use_bloom_filter
|
101
|
+
|
102
|
+
n = length # number of filter elements
|
103
|
+
|
104
|
+
if n > 0
|
105
|
+
b = 4 # bits per bucket
|
106
|
+
m = n * b * 10 # number of filter buckets
|
107
|
+
|
108
|
+
k = (0.7 * (m / n)).to_i # number of hash functions
|
109
|
+
k = 1 if k <= 1
|
110
|
+
|
111
|
+
@bloom_filter = BloomFilter::Native.new(:size => m, :bucket => b, :raise => true, :hashes => k)
|
112
|
+
each { |w| @bloom_filter.insert(w) }
|
113
|
+
end
|
74
114
|
end
|
115
|
+
|
75
116
|
end
|
76
117
|
end
|
@@ -37,13 +37,7 @@ class TestContent < Highscore::TestCase
|
|
37
37
|
content = 'Schöne Grüße, caractères, русский'
|
38
38
|
|
39
39
|
content = Highscore::Content.new content
|
40
|
-
|
41
|
-
if RUBY_VERSION =~ /^1\.8/
|
42
|
-
# Ruby 1.8 doesn't support correct tokenization
|
43
|
-
assert_equal 3, content.keywords.length
|
44
|
-
else
|
45
|
-
assert_equal 4, content.keywords.length
|
46
|
-
end
|
40
|
+
assert_equal 4, content.keywords.length
|
47
41
|
end
|
48
42
|
|
49
43
|
def test_vowels_and_consonants
|
@@ -66,6 +60,14 @@ class TestContent < Highscore::TestCase
|
|
66
60
|
assert_equal 4, keywords.length
|
67
61
|
end
|
68
62
|
|
63
|
+
def test_ignore_custom
|
64
|
+
keywords = 'foobar a3832'.keywords do
|
65
|
+
set :ignore, lambda { |w| w.gsub(/[^0-9]/, '').length > 2 }
|
66
|
+
end
|
67
|
+
|
68
|
+
assert_equal 1, keywords.length
|
69
|
+
end
|
70
|
+
|
69
71
|
def test_word_pattern
|
70
72
|
keywords = 'foo Ruby foo Ruby'.keywords do
|
71
73
|
set :word_pattern, /(?=(\b\w+\s\w+\b))/
|
@@ -30,4 +30,12 @@ class TestKeyword < Highscore::TestCase
|
|
30
30
|
@keyword.weight = 10.123
|
31
31
|
assert_equal 10.123, @keyword.weight
|
32
32
|
end
|
33
|
+
|
34
|
+
def test_percent
|
35
|
+
# per default, percent is not used => nil
|
36
|
+
assert_nil @keyword.percent
|
37
|
+
|
38
|
+
@keyword.percent = 50.1
|
39
|
+
assert_equal 50.1, @keyword.percent
|
40
|
+
end
|
33
41
|
end
|
@@ -51,8 +51,24 @@ class TestKeywords < Highscore::TestCase
|
|
51
51
|
def test_top
|
52
52
|
top = @keywords.top(1)
|
53
53
|
|
54
|
+
assert_equal(1, top.length)
|
54
55
|
assert_equal('the', top[0].text)
|
55
56
|
assert_equal(10.0, top[0].weight)
|
57
|
+
assert_equal(100.0, top[0].percent)
|
58
|
+
end
|
59
|
+
|
60
|
+
def test_percent
|
61
|
+
top = @keywords.top(10)
|
62
|
+
|
63
|
+
assert_equal 4, top.length
|
64
|
+
assert_equal 62.5, top[0].percent
|
65
|
+
assert_equal 18.75, top[1].percent
|
66
|
+
assert_equal 12.5, top[2].percent
|
67
|
+
assert_equal 6.25, top[3].percent
|
68
|
+
end
|
69
|
+
|
70
|
+
def test_sum
|
71
|
+
assert_equal(16, @keywords.sum(50))
|
56
72
|
end
|
57
73
|
|
58
74
|
def test_top_empty
|
@@ -54,4 +54,12 @@ class TestBlacklist < Highscore::TestCase
|
|
54
54
|
assert blacklist.include?("foobar")
|
55
55
|
assert !blacklist.include?("bla")
|
56
56
|
end
|
57
|
+
|
58
|
+
def test_each
|
59
|
+
blacklist = Highscore::Wordlist.load "foo bar baz"
|
60
|
+
|
61
|
+
a = []
|
62
|
+
blacklist.each { |x| a << x }
|
63
|
+
assert_equal ['foo', 'bar', 'baz'], a
|
64
|
+
end
|
57
65
|
end
|
data/version.txt
CHANGED
@@ -1 +1 @@
|
|
1
|
-
1.
|
1
|
+
1.1.0
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: highscore
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.1.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -13,7 +13,7 @@ date: 2012-12-13 00:00:00.000000000 Z
|
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: simplecov
|
16
|
-
requirement: &
|
16
|
+
requirement: &70314466936760 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: 0.6.4
|
22
22
|
type: :development
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *70314466936760
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: whatlanguage
|
27
|
-
requirement: &
|
27
|
+
requirement: &70314466936180 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ! '>='
|
@@ -32,7 +32,7 @@ dependencies:
|
|
32
32
|
version: 1.0.0
|
33
33
|
type: :runtime
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *70314466936180
|
36
36
|
description: Find and rank keywords in text.
|
37
37
|
email: liebler.dominik@googlemail.com
|
38
38
|
executables:
|
@@ -66,7 +66,7 @@ files:
|
|
66
66
|
- History.txt
|
67
67
|
- Rakefile
|
68
68
|
- version.txt
|
69
|
-
homepage:
|
69
|
+
homepage: http://domnikl.github.com/highscore
|
70
70
|
licenses: []
|
71
71
|
post_install_message:
|
72
72
|
rdoc_options:
|