highscore 0.2.1 → 0.3.1

Sign up to get free protection for your applications and to get access to all the features.
data/History.txt CHANGED
@@ -1,3 +1,7 @@
1
+ == 0.3.1 / 2012-01-21
2
+
3
+ * refactored and threw away all that array stuff that nobody wants
4
+
1
5
  == 0.2.1 / 2012-01-15
2
6
 
3
7
  * added parameter multiplier to set a general multiplier for the ranking
data/README.md CHANGED
@@ -19,8 +19,15 @@ Examples
19
19
  set :long_words_threshold, 15
20
20
  end
21
21
 
22
- text.keywords # => Hash
23
- text.keywords.top(50) # => Array
22
+ # get all keywords
23
+ text.keywords.rank => Array
24
+
25
+ # get the top 50 keywords
26
+ text.keywords.top(50).each do |keyword|
27
+ keyword.text # => keyword text
28
+ keyword.weight # => rank weight (float)
29
+ end
30
+
24
31
 
25
32
  Requirements
26
33
  ------------
data/highscore.gemspec ADDED
@@ -0,0 +1,34 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ Gem::Specification.new do |s|
4
+ s.name = "highscore"
5
+ s.version = "0.3.0"
6
+
7
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
8
+ s.authors = ["Dominik Liebler"]
9
+ s.date = "2012-01-21"
10
+ s.description = "Rank keywords in long texts."
11
+ s.email = "liebler.dominik@googlemail.com"
12
+ s.executables = ["highscore"]
13
+ s.extra_rdoc_files = ["History.txt", "bin/highscore"]
14
+ s.files = [".gitignore", "History.txt", "README.md", "Rakefile", "bin/highscore", "lib/highscore.rb", "lib/highscore/content.rb", "lib/highscore/keyword.rb", "lib/highscore/keywords.rb", "test/highscore/test_content.rb", "test/highscore/test_keyword.rb", "test/highscore/test_keywords.rb", "test/test_highscore.rb", "version.txt"]
15
+ s.homepage = "http://thewebdev.de"
16
+ s.rdoc_options = ["--main", "README.md"]
17
+ s.require_paths = ["lib"]
18
+ s.rubyforge_project = "highscore"
19
+ s.rubygems_version = "1.8.11"
20
+ s.summary = "Rank keywords in long texts."
21
+ s.test_files = ["test/highscore/test_content.rb", "test/highscore/test_keyword.rb", "test/highscore/test_keywords.rb", "test/test_highscore.rb"]
22
+
23
+ if s.respond_to? :specification_version then
24
+ s.specification_version = 3
25
+
26
+ if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
27
+ s.add_development_dependency(%q<bones>, [">= 3.7.3"])
28
+ else
29
+ s.add_dependency(%q<bones>, [">= 3.7.3"])
30
+ end
31
+ else
32
+ s.add_dependency(%q<bones>, [">= 3.7.3"])
33
+ end
34
+ end
@@ -34,36 +34,23 @@ module Highscore
34
34
  # keywords -> Keywords
35
35
  #
36
36
  def keywords
37
- keywords = Keywords.new(0)
37
+ keywords = Keywords.new
38
38
 
39
- find_keywords.each do |k|
39
+ Keywords.find_keywords(@content).each do |text|
40
40
  weight = @emphasis[:multiplier]
41
41
 
42
- if k.length >= @emphasis[:long_words_threshold]
42
+ if text.length >= @emphasis[:long_words_threshold]
43
43
  weight *= @emphasis[:long_words]
44
44
  end
45
45
 
46
- if k[0] == k[0].upcase
46
+ if text[0] == text[0].upcase
47
47
  weight *= @emphasis[:upper_case]
48
48
  end
49
49
 
50
- keywords[k] += weight
50
+ keywords << Highscore::Keyword.new(text, weight)
51
51
  end
52
52
 
53
53
  keywords
54
54
  end
55
-
56
- private
57
-
58
- # find keywords in the content and rate them
59
- #
60
- def find_keywords
61
- keywords = @content.scan(/\w+/)
62
- keywords.delete_if do |x|
63
- x.match(/^[\d]+(\.[\d]+){0,1}$/) or x.length <= 2
64
- end
65
-
66
- keywords.sort
67
- end
68
55
  end
69
56
  end
@@ -0,0 +1,19 @@
1
+ module Highscore
2
+
3
+ # keywords read from the content
4
+ #
5
+ class Keyword
6
+ attr_accessor :weight, :text
7
+
8
+ # init a keyword
9
+ def initialize(text, weight)
10
+ @text = text
11
+ @weight = weight.to_f
12
+ end
13
+
14
+ # sort keywords
15
+ def <=>(other)
16
+ other.weight <=> @weight
17
+ end
18
+ end
19
+ end
@@ -1,7 +1,32 @@
1
+ # external
2
+ require 'digest/sha1'
3
+
1
4
  module Highscore
5
+
2
6
  # keywords that were found in content
3
7
  #
4
- class Keywords < Hash
8
+ class Keywords
9
+ include Enumerable
10
+
11
+ # find keywords in a piece of content
12
+ def self.find_keywords content
13
+ keywords = content.scan(/\w+/)
14
+ keywords.delete_if do |x|
15
+ x.match(/^[\d]+(\.[\d]+){0,1}$/) or x.length <= 2
16
+ end
17
+
18
+ keywords.delete_if do |key, value|
19
+ %w{the and that post add not see about using some something under our comments comment run you want for will file are with end new this use all but can your just get very data blog format out first they posts second}.include? key.downcase
20
+ end
21
+
22
+ keywords.sort
23
+ end
24
+
25
+ # init a new keyword collection
26
+ #
27
+ def initialize
28
+ @keywords = {}
29
+ end
5
30
 
6
31
  # ranks the keywords and removes keywords that have a low ranking
7
32
  # or are blacklisted
@@ -10,50 +35,44 @@ module Highscore
10
35
  # rank -> array
11
36
  #
12
37
  def rank
13
- filter
14
- sort_it
38
+ sort
15
39
  end
16
40
 
17
41
  # get the top n keywords
18
42
  #
19
43
  def top n = 10
20
- filter
21
44
  rank[0..(n - 1)]
22
45
  end
23
46
 
24
- # sorts the keywords and returns a array of arrays
25
- #
26
- # :call-seq:
27
- # sort_it -> array
47
+ # add new keywords
28
48
  #
29
- def sort_it
30
- sort {|x,y| y[1] <=> x[1]}
49
+ def <<(keyword)
50
+ key = Digest::SHA1.hexdigest(keyword.text)
51
+
52
+ if @keywords.has_key?(key)
53
+ @keywords[key].weight += keyword.weight
54
+ else
55
+ @keywords[key] = keyword
56
+ end
31
57
  end
32
58
 
33
- private
59
+ # sort
60
+ def sort
61
+ sorted = @keywords.sort {|a,b| a[1] <=> b[1] }
34
62
 
35
- # filter out unwanted results
36
- #
37
- def filter
38
- run_blacklist
39
- filter_low
63
+ # convert Array from sort back to Array of Keyword objects
64
+ sorted.collect {|x| x[1]}
40
65
  end
41
66
 
42
- # filter low ranked keywords
67
+ # Enumerable
43
68
  #
44
- def filter_low
45
- delete_if do |key, value|
46
- value <= 0
47
- end
69
+ def each &block
70
+ @keywords.each {|keyword| keyword.call(keyword)}
48
71
  end
49
72
 
50
- # remove blacklisted keywords
51
- #
52
- def run_blacklist
53
- # FIXME: add more keywords!
54
- delete_if do |key, value|
55
- %w{the and that post add not see about using some something under our comments comment run you want for will file are with end new this use all but can your just get very data blog format out first they posts second}.include? key.downcase
56
- end
73
+ # number of Keywords given
74
+ def length
75
+ @keywords.length
57
76
  end
58
77
  end
59
78
  end
@@ -15,4 +15,11 @@ class TestContent < Test::Unit::TestCase
15
15
  def test_keywords
16
16
  assert_instance_of(Highscore::Keywords, @content.keywords)
17
17
  end
18
+
19
+ def test_multiple_keywords
20
+ content = 'Ruby Ruby Ruby and so forth ...'
21
+
22
+ content = Highscore::Content.new content
23
+ assert_equal 2, content.keywords.length
24
+ end
18
25
  end
@@ -0,0 +1,31 @@
1
+ $:.unshift(File.join(File.dirname(__FILE__), %w{.. .. lib highscore}))
2
+ require 'keyword'
3
+ require "test/unit"
4
+
5
+ class TestKeyword < Test::Unit::TestCase
6
+ def setup
7
+ @keyword = Highscore::Keyword.new('Ruby', 2)
8
+ end
9
+
10
+ def test_init
11
+
12
+ # don't allow 'empty' keywords
13
+ assert_raise(ArgumentError) do
14
+ Highscore::Keyword.new
15
+ end
16
+ end
17
+
18
+ def test_text
19
+ assert_equal 'Ruby', @keyword.text
20
+
21
+ @keyword.text = 'Foobar'
22
+ assert_equal 'Foobar', @keyword.text
23
+ end
24
+
25
+ def test_weight
26
+ assert_equal 2, @keyword.weight
27
+
28
+ @keyword.weight = 10.123
29
+ assert_equal 10.123, @keyword.weight
30
+ end
31
+ end
@@ -1,14 +1,15 @@
1
1
  $:.unshift(File.join(File.dirname(__FILE__), %w{.. .. lib highscore}))
2
- require "keywords"
2
+ require 'keywords'
3
+ require 'keyword'
3
4
  require "test/unit"
4
5
 
5
6
  class TestKeywords < Test::Unit::TestCase
6
7
  def setup
7
8
  @keywords = Highscore::Keywords.new
8
- @keywords['Ruby'] = 2
9
- @keywords['Sinatra'] = 3
10
- @keywords['Highscore'] = 1
11
- @keywords['the'] = 10
9
+ @keywords << Highscore::Keyword.new('Ruby', 2)
10
+ @keywords << Highscore::Keyword.new('Sinatra', 3)
11
+ @keywords << Highscore::Keyword.new('Highscore', 1)
12
+ @keywords << Highscore::Keyword.new('the', 10)
12
13
  end
13
14
 
14
15
  def test_init
@@ -20,10 +21,16 @@ class TestKeywords < Test::Unit::TestCase
20
21
 
21
22
  ranked = @keywords.rank
22
23
 
23
- assert_instance_of(Array, ranked)
24
+ ranked_texts = []
25
+ ranked.each do |keyword|
26
+ assert(keyword.instance_of?(Highscore::Keyword),
27
+ "keywords must be instances of Highscore::Keyword, #{keyword.class} given")
28
+ ranked_texts << keyword.text
29
+ end
24
30
 
25
- should_rank = [['Sinatra', 3], ['Ruby', 2], ['Highscore', 1]]
26
- assert_equal should_rank, ranked
31
+ should_rank = %w{the Sinatra Ruby Highscore}
32
+
33
+ assert_equal should_rank, ranked_texts
27
34
  end
28
35
 
29
36
  def test_rank_empty
@@ -31,18 +38,13 @@ class TestKeywords < Test::Unit::TestCase
31
38
  end
32
39
 
33
40
  def test_top
34
- assert_equal [['Sinatra', 3]], @keywords.top(1)
41
+ top = @keywords.top(1)
42
+
43
+ assert_equal('the', top[0].text)
44
+ assert_equal(10.0, top[0].weight)
35
45
  end
36
46
 
37
47
  def test_top_empty
38
48
  assert_equal [], Highscore::Keywords.new.top(0)
39
49
  end
40
-
41
- def test_sort
42
- keywords = Highscore::Keywords.new
43
- keywords['Test'] = 1
44
- keywords['Foobar'] = 2
45
-
46
- assert_equal [['Foobar', 2], ['Test', 1]], keywords.sort_it
47
- end
48
50
  end
data/version.txt CHANGED
@@ -1 +1 @@
1
- 0.2.1
1
+ 0.3.1
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: highscore
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.1
4
+ version: 0.3.1
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-01-18 00:00:00.000000000 Z
12
+ date: 2012-01-21 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: bones
16
- requirement: &70306539324720 !ruby/object:Gem::Requirement
16
+ requirement: &70191947101320 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,7 +21,7 @@ dependencies:
21
21
  version: 3.7.3
22
22
  type: :development
23
23
  prerelease: false
24
- version_requirements: *70306539324720
24
+ version_requirements: *70191947101320
25
25
  description: Rank keywords in long texts.
26
26
  email: liebler.dominik@googlemail.com
27
27
  executables:
@@ -36,10 +36,13 @@ files:
36
36
  - README.md
37
37
  - Rakefile
38
38
  - bin/highscore
39
+ - highscore.gemspec
39
40
  - lib/highscore.rb
40
41
  - lib/highscore/content.rb
42
+ - lib/highscore/keyword.rb
41
43
  - lib/highscore/keywords.rb
42
44
  - test/highscore/test_content.rb
45
+ - test/highscore/test_keyword.rb
43
46
  - test/highscore/test_keywords.rb
44
47
  - test/test_highscore.rb
45
48
  - version.txt
@@ -71,5 +74,6 @@ specification_version: 3
71
74
  summary: Rank keywords in long texts.
72
75
  test_files:
73
76
  - test/highscore/test_content.rb
77
+ - test/highscore/test_keyword.rb
74
78
  - test/highscore/test_keywords.rb
75
79
  - test/test_highscore.rb