highscore 0.4.2 → 0.4.3

Sign up to get free protection for your applications and to get access to all the features.
data/.travis.yml ADDED
@@ -0,0 +1,10 @@
1
+ language: ruby
2
+ rvm:
3
+ - 1.8.7
4
+ - 1.9.2
5
+ - 1.9.3
6
+ - jruby-18mode # JRuby in 1.8 mode
7
+ - jruby-19mode # JRuby in 1.9 mode
8
+ - rbx-18mode
9
+ # - rbx-19mode # currently in active development, may or may not work for your project
10
+ before_install: gem install bones
data/History.txt CHANGED
@@ -1,3 +1,8 @@
1
+ == 0.4.3 / 2012-02-11
2
+
3
+ * rate words based on the amount of vowels and consonants
4
+ * added a CLI tool that uses all of the API (via parameters)
5
+
1
6
  == 0.4.2 / 2012-02-07
2
7
 
3
8
  * added a simple example that also shows return values in README.md
data/README.md CHANGED
@@ -7,9 +7,11 @@ Features
7
7
  --------
8
8
 
9
9
  * configurable to rank different types of words different (uppercase, long words, etc.)
10
+ * rate based on amount (%) of vowels and consonants in a string
10
11
  * directly get keywords from String objects
11
12
  * blacklist words via a plain text file, String or an Array of words
12
13
  * merge together Keywords from multiple sources
14
+ * contains a CLI tool that operates on STDIN/OUT and is configurable via parameters
13
15
 
14
16
  Examples
15
17
  --------
@@ -21,11 +23,10 @@ text.configure do
21
23
  set :upper_case, 3
22
24
  set :long_words, 2
23
25
  set :long_words_threshold, 15
26
+ set :vowels, 1 # => default = 0 = not considered
27
+ set :consonants, 5 # => default = 0 = not considered
24
28
  end
25
29
 
26
- # get all keywords
27
- text.keywords.rank => Array
28
-
29
30
  # get only the top 50 keywords
30
31
  text.keywords.top(50).each do |keyword|
31
32
  keyword.text # => keyword text
@@ -44,7 +45,7 @@ can iterate over. Each object in there is a Highscore::Keyword that has methods
44
45
  ```ruby
45
46
  keywords = "Foo bar is not bar baz".keywords(Highscore::Blacklist.load(['baz']))
46
47
 
47
- keywords.each do |k|
48
+ keywords.rank.each do |k|
48
49
  puts "#{k.text} #{k.weight}"
49
50
  end
50
51
 
@@ -54,6 +55,8 @@ end
54
55
  # not 1.0
55
56
  ```
56
57
 
58
+ Have a look at bin/highscore, you can run highscore on your CLI and feed it with text on STDIN.
59
+
57
60
  Using a custom blacklist to ignore keywords
58
61
  -------------------------------------------
59
62
 
data/bin/highscore CHANGED
@@ -1,7 +1,83 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
+ require 'optparse'
4
+
3
5
  root = File.expand_path('../..', __FILE__)
4
6
  require File.join(root, %w[lib highscore])
5
7
 
6
- # Put your code here
8
+ options = {
9
+ :emphasis => {},
10
+ :short => false
11
+ }
12
+
13
+ optparse = OptionParser.new do |opts|
14
+ opts.on('-h', '--help', 'display this help message') do
15
+ puts opts
16
+ exit
17
+ end
18
+
19
+ # blacklist file
20
+ opts.on('-b', '--blacklist FILEPATH', 'specify a blacklist file that should be used instead of the default') do |filepath|
21
+ options[:blacklist] = Highscore::Blacklist.load_file(filepath)
22
+ end
23
+
24
+ # general multiplier
25
+ opts.on('-m', '--multiplier MULTIPLIER', 'set the global rank multiplier') do |multiplier|
26
+ options[:emphasis][:multiplier] = multiplier.to_f
27
+ end
28
+
29
+ # don't print rank weight
30
+ opts.on('-s', '--short', 'don\'t print rank weight') do
31
+ options[:short] = true
32
+ end
33
+
34
+ # upper case
35
+ opts.on('-u', '--uppercase MULTIPLIER', 'rank multiplier for uppercase words (default is 3.0)') do |u|
36
+ options[:emphasis][:upper_case] = u.to_f
37
+ end
38
+
39
+ # vowels
40
+ opts.on('--vowels MULTIPLIER', 'multiplier for vowels') do |u|
41
+ options[:emphasis][:vowels] = u.to_i
42
+ end
43
+
44
+ # vowels
45
+ opts.on('--consonants MULTIPLIER', 'multiplier for consonants') do |u|
46
+ options[:emphasis][:consonants] = u.to_i
47
+ end
48
+
49
+ # long words
50
+ opts.on('-l', '--longwords MULTIPLIER', 'rank multiplier for long words (default is 2.0)') do |u|
51
+ options[:emphasis][:long_words] = u.to_f
52
+ end
53
+
54
+ # long words threshold
55
+ opts.on('-t', '--longwordsthreshold THRESHOLD', 'threshold for long words (default is 15 chars)') do |u|
56
+ options[:emphasis][:long_words_threshold] = u.to_i
57
+ end
58
+
59
+ opts.on('-n', '--top N', 'show only the top N keywords') do |u|
60
+ options[:top] = u.to_i
61
+ end
62
+ end
63
+
64
+ optparse.parse!
65
+
66
+ text = STDIN.read.to_s
67
+ keywords = text.keywords(options[:blacklist]) do |content|
68
+ options[:emphasis].each do |key,value|
69
+ content.set key, value
70
+ end
71
+ end
72
+
73
+ if options[:top]
74
+ keywords = keywords.top(options[:top])
75
+ else
76
+ keywords = keywords.rank
77
+ end
7
78
 
79
+ keywords.each do |keyword|
80
+ print keyword.text
81
+ print " #{keyword.weight}" unless options[:short]
82
+ puts
83
+ end
@@ -18,7 +18,9 @@ module Highscore
18
18
  :multiplier => 1.0,
19
19
  :upper_case => 3.0,
20
20
  :long_words => 2.0,
21
- :long_words_threshold => 15
21
+ :long_words_threshold => 15,
22
+ :vowels => 0,
23
+ :consonants => 0
22
24
  }
23
25
  end
24
26
 
@@ -43,21 +45,41 @@ module Highscore
43
45
  keywords = Keywords.new
44
46
 
45
47
  Keywords.find_keywords(@content, @blacklist).each do |text|
46
- weight = @emphasis[:multiplier]
47
48
  text = text.to_s
49
+ keywords << Highscore::Keyword.new(text, weight(text))
50
+ end
51
+
52
+ keywords
53
+ end
48
54
 
49
- if text.length >= @emphasis[:long_words_threshold]
50
- weight *= @emphasis[:long_words]
51
- end
55
+ private
52
56
 
53
- if text[0,1] == text[0,1].upcase
54
- weight *= @emphasis[:upper_case]
55
- end
57
+ # weight a single text keyword
58
+ #
59
+ def weight(text)
60
+ weight = @emphasis[:multiplier]
56
61
 
57
- keywords << Highscore::Keyword.new(text, weight)
62
+ if text.length >= @emphasis[:long_words_threshold]
63
+ weight *= @emphasis[:long_words]
58
64
  end
59
65
 
60
- keywords
66
+ if text[0,1] == text[0,1].upcase
67
+ weight *= @emphasis[:upper_case]
68
+ end
69
+
70
+ weight += vowels(text)
71
+ weight += consonants(text)
72
+ weight
73
+ end
74
+
75
+ def vowels(text)
76
+ percent = text.vowels.length / text.length.to_f
77
+ percent * @emphasis[:vowels]
78
+ end
79
+
80
+ def consonants(text)
81
+ percent = text.consonants.length / text.length.to_f
82
+ percent * @emphasis[:consonants]
61
83
  end
62
84
  end
63
85
  end
@@ -15,5 +15,10 @@ module Highscore
15
15
  def <=>(other)
16
16
  other.weight <=> @weight
17
17
  end
18
+
19
+ # get the string
20
+ def to_s
21
+ @text
22
+ end
18
23
  end
19
24
  end
@@ -15,4 +15,14 @@ class String
15
15
 
16
16
  content.keywords
17
17
  end
18
+
19
+ # get all vowels from a string
20
+ def vowels
21
+ gsub(/[^aeiou]/, '')
22
+ end
23
+
24
+ # get all consonants from a string
25
+ def consonants
26
+ gsub(/[aeiou]/, '')
27
+ end
18
28
  end
@@ -65,7 +65,7 @@ class TestBlacklist < Test::Unit::TestCase
65
65
  keywords = "Foo bar is not bar baz".keywords(Highscore::Blacklist.load(['baz']))
66
66
 
67
67
  keyword_list = []
68
- keywords.each do |k|
68
+ keywords.rank.each do |k|
69
69
  keyword_list << k.text
70
70
  end
71
71
 
@@ -30,4 +30,16 @@ class TestContent < Test::Unit::TestCase
30
30
  content = Highscore::Content.new content
31
31
  assert_equal 1, content.keywords.length
32
32
  end
33
+
34
+ def test_vowels_and_consonants
35
+ keywords = 'foobar RubyGems'.keywords do
36
+ set :vowels, 2
37
+ set :consonants, 3
38
+ set :upper_case, 1
39
+ set :long_words, 1
40
+ end
41
+
42
+ assert_equal 3.75, keywords.first.weight
43
+ assert_equal 3.5, keywords.last.weight
44
+ end
33
45
  end
@@ -22,6 +22,10 @@ class TestKeyword < Test::Unit::TestCase
22
22
  assert_equal 'Foobar', @keyword.text
23
23
  end
24
24
 
25
+ def test_to_s
26
+ assert_equal 'Ruby', @keyword.to_s
27
+ end
28
+
25
29
  def test_weight
26
30
  assert_equal 2, @keyword.weight
27
31
 
@@ -13,6 +13,14 @@ class TestString < Test::Unit::TestCase
13
13
  assert_equal 0, keywords.length
14
14
  end
15
15
 
16
+ def test_vowels
17
+ assert_equal("eoaiu", "feobariu".vowels)
18
+ end
19
+
20
+ def test_consonants
21
+ assert_equal("fbr", "feobariu".consonants)
22
+ end
23
+
16
24
  def test_default_blacklist
17
25
  keywords = "the Ruby Ruby Ruby Hacker".keywords
18
26
  assert_equal 2, keywords.length
data/version.txt CHANGED
@@ -1 +1 @@
1
- 0.4.2
1
+ 0.4.3
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: highscore
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.2
4
+ version: 0.4.3
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-02-07 00:00:00.000000000 Z
12
+ date: 2012-02-11 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: bones
16
- requirement: &70117716241580 !ruby/object:Gem::Requirement
16
+ requirement: &70111402827500 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,7 +21,7 @@ dependencies:
21
21
  version: 3.7.3
22
22
  type: :development
23
23
  prerelease: false
24
- version_requirements: *70117716241580
24
+ version_requirements: *70111402827500
25
25
  description: Find and rank keywords in long texts.
26
26
  email: liebler.dominik@googlemail.com
27
27
  executables:
@@ -34,6 +34,7 @@ extra_rdoc_files:
34
34
  - test/fixtures/blacklist.txt
35
35
  files:
36
36
  - .gitignore
37
+ - .travis.yml
37
38
  - History.txt
38
39
  - README.md
39
40
  - Rakefile