highscore 0.4.2 → 0.4.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.travis.yml ADDED
@@ -0,0 +1,10 @@
1
+ language: ruby
2
+ rvm:
3
+ - 1.8.7
4
+ - 1.9.2
5
+ - 1.9.3
6
+ - jruby-18mode # JRuby in 1.8 mode
7
+ - jruby-19mode # JRuby in 1.9 mode
8
+ - rbx-18mode
9
+ # - rbx-19mode # currently in active development, may or may not work for your project
10
+ before_install: gem install bones
data/History.txt CHANGED
@@ -1,3 +1,8 @@
1
+ == 0.4.3 / 2012-02-11
2
+
3
+ * rate words based on the amount of vowels and consonants
4
+ * added a CLI tool that uses all of the API (via parameters)
5
+
1
6
  == 0.4.2 / 2012-02-07
2
7
 
3
8
  * added a simple example that also shows return values in README.md
data/README.md CHANGED
@@ -7,9 +7,11 @@ Features
7
7
  --------
8
8
 
9
9
  * configurable to rank different types of words different (uppercase, long words, etc.)
10
+ * rate based on amount (%) of vowels and consonants in a string
10
11
  * directly get keywords from String objects
11
12
  * blacklist words via a plain text file, String or an Array of words
12
13
  * merge together Keywords from multiple sources
14
+ * contains a CLI tool that operates on STDIN/OUT and is configurable via parameters
13
15
 
14
16
  Examples
15
17
  --------
@@ -21,11 +23,10 @@ text.configure do
21
23
  set :upper_case, 3
22
24
  set :long_words, 2
23
25
  set :long_words_threshold, 15
26
+ set :vowels, 1 # => default = 0 = not considered
27
+ set :consonants, 5 # => default = 0 = not considered
24
28
  end
25
29
 
26
- # get all keywords
27
- text.keywords.rank => Array
28
-
29
30
  # get only the top 50 keywords
30
31
  text.keywords.top(50).each do |keyword|
31
32
  keyword.text # => keyword text
@@ -44,7 +45,7 @@ can iterate over. Each object in there is a Highscore::Keyword that has methods
44
45
  ```ruby
45
46
  keywords = "Foo bar is not bar baz".keywords(Highscore::Blacklist.load(['baz']))
46
47
 
47
- keywords.each do |k|
48
+ keywords.rank.each do |k|
48
49
  puts "#{k.text} #{k.weight}"
49
50
  end
50
51
 
@@ -54,6 +55,8 @@ end
54
55
  # not 1.0
55
56
  ```
56
57
 
58
+ Have a look at bin/highscore, you can run highscore on your CLI and feed it with text on STDIN.
59
+
57
60
  Using a custom blacklist to ignore keywords
58
61
  -------------------------------------------
59
62
 
data/bin/highscore CHANGED
@@ -1,7 +1,83 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
+ require 'optparse'
4
+
3
5
  root = File.expand_path('../..', __FILE__)
4
6
  require File.join(root, %w[lib highscore])
5
7
 
6
- # Put your code here
8
+ options = {
9
+ :emphasis => {},
10
+ :short => false
11
+ }
12
+
13
+ optparse = OptionParser.new do |opts|
14
+ opts.on('-h', '--help', 'display this help message') do
15
+ puts opts
16
+ exit
17
+ end
18
+
19
+ # blacklist file
20
+ opts.on('-b', '--blacklist FILEPATH', 'specify a blacklist file that should be used instead of the default') do |filepath|
21
+ options[:blacklist] = Highscore::Blacklist.load_file(filepath)
22
+ end
23
+
24
+ # general multiplier
25
+ opts.on('-m', '--multiplier MULTIPLIER', 'set the global rank multiplier') do |multiplier|
26
+ options[:emphasis][:multiplier] = multiplier.to_f
27
+ end
28
+
29
+ # don't print rank weight
30
+ opts.on('-s', '--short', 'don\'t print rank weight') do
31
+ options[:short] = true
32
+ end
33
+
34
+ # upper case
35
+ opts.on('-u', '--uppercase MULTIPLIER', 'rank multiplier for uppercase words (default is 3.0)') do |u|
36
+ options[:emphasis][:upper_case] = u.to_f
37
+ end
38
+
39
+ # vowels
40
+ opts.on('--vowels MULTIPLIER', 'multiplier for vowels') do |u|
41
+ options[:emphasis][:vowels] = u.to_i
42
+ end
43
+
44
+ # vowels
45
+ opts.on('--consonants MULTIPLIER', 'multiplier for consonants') do |u|
46
+ options[:emphasis][:consonants] = u.to_i
47
+ end
48
+
49
+ # long words
50
+ opts.on('-l', '--longwords MULTIPLIER', 'rank multiplier for long words (default is 2.0)') do |u|
51
+ options[:emphasis][:long_words] = u.to_f
52
+ end
53
+
54
+ # long words threshold
55
+ opts.on('-t', '--longwordsthreshold THRESHOLD', 'threshold for long words (default is 15 chars)') do |u|
56
+ options[:emphasis][:long_words_threshold] = u.to_i
57
+ end
58
+
59
+ opts.on('-n', '--top N', 'show only the top N keywords') do |u|
60
+ options[:top] = u.to_i
61
+ end
62
+ end
63
+
64
+ optparse.parse!
65
+
66
+ text = STDIN.read.to_s
67
+ keywords = text.keywords(options[:blacklist]) do |content|
68
+ options[:emphasis].each do |key,value|
69
+ content.set key, value
70
+ end
71
+ end
72
+
73
+ if options[:top]
74
+ keywords = keywords.top(options[:top])
75
+ else
76
+ keywords = keywords.rank
77
+ end
7
78
 
79
+ keywords.each do |keyword|
80
+ print keyword.text
81
+ print " #{keyword.weight}" unless options[:short]
82
+ puts
83
+ end
@@ -18,7 +18,9 @@ module Highscore
18
18
  :multiplier => 1.0,
19
19
  :upper_case => 3.0,
20
20
  :long_words => 2.0,
21
- :long_words_threshold => 15
21
+ :long_words_threshold => 15,
22
+ :vowels => 0,
23
+ :consonants => 0
22
24
  }
23
25
  end
24
26
 
@@ -43,21 +45,41 @@ module Highscore
43
45
  keywords = Keywords.new
44
46
 
45
47
  Keywords.find_keywords(@content, @blacklist).each do |text|
46
- weight = @emphasis[:multiplier]
47
48
  text = text.to_s
49
+ keywords << Highscore::Keyword.new(text, weight(text))
50
+ end
51
+
52
+ keywords
53
+ end
48
54
 
49
- if text.length >= @emphasis[:long_words_threshold]
50
- weight *= @emphasis[:long_words]
51
- end
55
+ private
52
56
 
53
- if text[0,1] == text[0,1].upcase
54
- weight *= @emphasis[:upper_case]
55
- end
57
+ # weight a single text keyword
58
+ #
59
+ def weight(text)
60
+ weight = @emphasis[:multiplier]
56
61
 
57
- keywords << Highscore::Keyword.new(text, weight)
62
+ if text.length >= @emphasis[:long_words_threshold]
63
+ weight *= @emphasis[:long_words]
58
64
  end
59
65
 
60
- keywords
66
+ if text[0,1] == text[0,1].upcase
67
+ weight *= @emphasis[:upper_case]
68
+ end
69
+
70
+ weight += vowels(text)
71
+ weight += consonants(text)
72
+ weight
73
+ end
74
+
75
+ def vowels(text)
76
+ percent = text.vowels.length / text.length.to_f
77
+ percent * @emphasis[:vowels]
78
+ end
79
+
80
+ def consonants(text)
81
+ percent = text.consonants.length / text.length.to_f
82
+ percent * @emphasis[:consonants]
61
83
  end
62
84
  end
63
85
  end
@@ -15,5 +15,10 @@ module Highscore
15
15
  def <=>(other)
16
16
  other.weight <=> @weight
17
17
  end
18
+
19
+ # get the string
20
+ def to_s
21
+ @text
22
+ end
18
23
  end
19
24
  end
@@ -15,4 +15,14 @@ class String
15
15
 
16
16
  content.keywords
17
17
  end
18
+
19
+ # get all vowels from a string
20
+ def vowels
21
+ gsub(/[^aeiou]/, '')
22
+ end
23
+
24
+ # get all consonants from a string
25
+ def consonants
26
+ gsub(/[aeiou]/, '')
27
+ end
18
28
  end
@@ -65,7 +65,7 @@ class TestBlacklist < Test::Unit::TestCase
65
65
  keywords = "Foo bar is not bar baz".keywords(Highscore::Blacklist.load(['baz']))
66
66
 
67
67
  keyword_list = []
68
- keywords.each do |k|
68
+ keywords.rank.each do |k|
69
69
  keyword_list << k.text
70
70
  end
71
71
 
@@ -30,4 +30,16 @@ class TestContent < Test::Unit::TestCase
30
30
  content = Highscore::Content.new content
31
31
  assert_equal 1, content.keywords.length
32
32
  end
33
+
34
+ def test_vowels_and_consonants
35
+ keywords = 'foobar RubyGems'.keywords do
36
+ set :vowels, 2
37
+ set :consonants, 3
38
+ set :upper_case, 1
39
+ set :long_words, 1
40
+ end
41
+
42
+ assert_equal 3.75, keywords.first.weight
43
+ assert_equal 3.5, keywords.last.weight
44
+ end
33
45
  end
@@ -22,6 +22,10 @@ class TestKeyword < Test::Unit::TestCase
22
22
  assert_equal 'Foobar', @keyword.text
23
23
  end
24
24
 
25
+ def test_to_s
26
+ assert_equal 'Ruby', @keyword.to_s
27
+ end
28
+
25
29
  def test_weight
26
30
  assert_equal 2, @keyword.weight
27
31
 
@@ -13,6 +13,14 @@ class TestString < Test::Unit::TestCase
13
13
  assert_equal 0, keywords.length
14
14
  end
15
15
 
16
+ def test_vowels
17
+ assert_equal("eoaiu", "feobariu".vowels)
18
+ end
19
+
20
+ def test_consonants
21
+ assert_equal("fbr", "feobariu".consonants)
22
+ end
23
+
16
24
  def test_default_blacklist
17
25
  keywords = "the Ruby Ruby Ruby Hacker".keywords
18
26
  assert_equal 2, keywords.length
data/version.txt CHANGED
@@ -1 +1 @@
1
- 0.4.2
1
+ 0.4.3
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: highscore
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.2
4
+ version: 0.4.3
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-02-07 00:00:00.000000000 Z
12
+ date: 2012-02-11 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: bones
16
- requirement: &70117716241580 !ruby/object:Gem::Requirement
16
+ requirement: &70111402827500 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,7 +21,7 @@ dependencies:
21
21
  version: 3.7.3
22
22
  type: :development
23
23
  prerelease: false
24
- version_requirements: *70117716241580
24
+ version_requirements: *70111402827500
25
25
  description: Find and rank keywords in long texts.
26
26
  email: liebler.dominik@googlemail.com
27
27
  executables:
@@ -34,6 +34,7 @@ extra_rdoc_files:
34
34
  - test/fixtures/blacklist.txt
35
35
  files:
36
36
  - .gitignore
37
+ - .travis.yml
37
38
  - History.txt
38
39
  - README.md
39
40
  - Rakefile