highscore 0.2.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,11 @@
1
+ # general
2
+ .DS_Store
3
+
4
+ # RubyMine
5
+ .idea
6
+
7
+ # bones
8
+ announcement.txt
9
+ coverage
10
+ doc
11
+ pkg
@@ -0,0 +1,12 @@
1
+ == 0.2.1 / 2012-01-15
2
+
3
+ * added parameter multiplier to set a general multiplier for the ranking
4
+
5
+ == 0.2.0 / 2012-01-14
6
+
7
+ * added configure() and parameters to influence emphasizing
8
+
9
+ == 0.1.0 / 2012-01-14
10
+
11
+ * 1 major enhancement
12
+ * Birthday!
@@ -0,0 +1,64 @@
1
+ highscore
2
+ ===========
3
+
4
+ Rank keywords in long texts.
5
+
6
+ Features
7
+ --------
8
+
9
+ * configureable to rank different types of words different (uppercase, long words, etc.)
10
+
11
+ Examples
12
+ --------
13
+
14
+ text = Highscore::Content.new "foo bar"
15
+ text.configure.do
16
+ set :multiplier, 2
17
+ set :upper_case, 3
18
+ set :long_words, 2
19
+ set :long_words_threshold, 15
20
+ end
21
+
22
+ text.keywords # => Hash
23
+ text.keywords.top(50) # => Array
24
+
25
+ Requirements
26
+ ------------
27
+
28
+ (none)
29
+
30
+ Install
31
+ -------
32
+
33
+ * sudo gem install highscore
34
+
35
+ Author
36
+ ------
37
+
38
+ Original author: Dominik Liebler <liebler.dominik@googlemail.com>
39
+
40
+ License
41
+ -------
42
+
43
+ (The MIT License)
44
+
45
+ Copyright (c) 2012 Dominik Liebler
46
+
47
+ Permission is hereby granted, free of charge, to any person obtaining
48
+ a copy of this software and associated documentation files (the
49
+ 'Software'), to deal in the Software without restriction, including
50
+ without limitation the rights to use, copy, modify, merge, publish,
51
+ distribute, sublicense, and/or sell copies of the Software, and to
52
+ permit persons to whom the Software is furnished to do so, subject to
53
+ the following conditions:
54
+
55
+ The above copyright notice and this permission notice shall be
56
+ included in all copies or substantial portions of the Software.
57
+
58
+ THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
59
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
60
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
61
+ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
62
+ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
63
+ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
64
+ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,18 @@
1
+
2
+ begin
3
+ require 'bones'
4
+ rescue LoadError
5
+ abort '### Please install the "bones" gem ###'
6
+ end
7
+
8
+ task :default => 'test:run'
9
+ task 'gem:release' => 'test:run'
10
+
11
+ Bones {
12
+ name 'highscore'
13
+ authors 'Dominik Liebler'
14
+ email 'liebler.dominik@googlemail.com'
15
+ url 'http://thewebdev.de'
16
+ ignore_file '.gitignore'
17
+ }
18
+
@@ -0,0 +1,7 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ root = File.expand_path('../..', __FILE__)
4
+ require File.join(root, %w[lib highscore])
5
+
6
+ # Put your code here
7
+
@@ -0,0 +1,60 @@
1
+
2
+ module Highscore
3
+
4
+ # :stopdoc:
5
+ LIBPATH = ::File.expand_path('..', __FILE__) + ::File::SEPARATOR
6
+ PATH = ::File.dirname(LIBPATH) + ::File::SEPARATOR
7
+ VERSION = ::File.read(PATH + 'version.txt').strip
8
+ # :startdoc:
9
+
10
+ # Returns the library path for the module. If any arguments are given,
11
+ # they will be joined to the end of the libray path using
12
+ # <tt>File.join</tt>.
13
+ #
14
+ def self.libpath( *args )
15
+ rv = args.empty? ? LIBPATH : ::File.join(LIBPATH, args.flatten)
16
+ if block_given?
17
+ begin
18
+ $LOAD_PATH.unshift LIBPATH
19
+ rv = yield
20
+ ensure
21
+ $LOAD_PATH.shift
22
+ end
23
+ end
24
+ return rv
25
+ end
26
+
27
+ # Returns the lpath for the module. If any arguments are given,
28
+ # they will be joined to the end of the path using
29
+ # <tt>File.join</tt>.
30
+ #
31
+ def self.path( *args )
32
+ rv = args.empty? ? PATH : ::File.join(PATH, args.flatten)
33
+ if block_given?
34
+ begin
35
+ $LOAD_PATH.unshift PATH
36
+ rv = yield
37
+ ensure
38
+ $LOAD_PATH.shift
39
+ end
40
+ end
41
+ return rv
42
+ end
43
+
44
+ # Utility method used to require all files ending in .rb that lie in the
45
+ # directory below this file that has the same name as the filename passed
46
+ # in. Optionally, a specific _directory_ name can be passed in such that
47
+ # the _filename_ does not have to be equivalent to the directory.
48
+ #
49
+ def self.require_all_libs_relative_to( fname, dir = nil )
50
+ dir ||= ::File.basename(fname, '.*')
51
+ search_me = ::File.expand_path(
52
+ ::File.join(::File.dirname(fname), dir, '**', '*.rb'))
53
+
54
+ Dir.glob(search_me).sort.each {|rb| require rb}
55
+ end
56
+
57
+ end # module Highscore
58
+
59
+ Highscore.require_all_libs_relative_to(__FILE__)
60
+
@@ -0,0 +1,69 @@
1
+ $:.unshift(File.join(File.dirname(__FILE__)))
2
+ require 'keywords'
3
+
4
+ module Highscore
5
+ class Content
6
+ attr_reader :content
7
+
8
+ def initialize content
9
+ @content = content
10
+
11
+ @emphasis = {
12
+ :multiplier => 1.0,
13
+ :upper_case => 3.0,
14
+ :long_words => 2.0,
15
+ :long_words_threshold => 15
16
+ }
17
+ end
18
+
19
+ # configure ranking
20
+ #
21
+ def configure(&block)
22
+ instance_eval(&block)
23
+ end
24
+
25
+ # set emphasis options to rank the content
26
+ #
27
+ def set(key, value)
28
+ @emphasis[key.to_sym] = value.to_f
29
+ end
30
+
31
+ # get the ranked keywords
32
+ #
33
+ # :call-seq:
34
+ # keywords -> Keywords
35
+ #
36
+ def keywords
37
+ keywords = Keywords.new(0)
38
+
39
+ find_keywords.each do |k|
40
+ weight = @emphasis[:multiplier]
41
+
42
+ if k.length >= @emphasis[:long_words_threshold]
43
+ weight *= @emphasis[:long_words]
44
+ end
45
+
46
+ if k[0] == k[0].upcase
47
+ weight *= @emphasis[:upper_case]
48
+ end
49
+
50
+ keywords[k] += weight
51
+ end
52
+
53
+ keywords
54
+ end
55
+
56
+ private
57
+
58
+ # find keywords in the content and rate them
59
+ #
60
+ def find_keywords
61
+ keywords = @content.scan(/\w+/)
62
+ keywords.delete_if do |x|
63
+ x.match(/^[\d]+(\.[\d]+){0,1}$/) or x.length <= 2
64
+ end
65
+
66
+ keywords.sort
67
+ end
68
+ end
69
+ end
@@ -0,0 +1,59 @@
1
+ module Highscore
2
+ # keywords that were found in content
3
+ #
4
+ class Keywords < Hash
5
+
6
+ # ranks the keywords and removes keywords that have a low ranking
7
+ # or are blacklisted
8
+ #
9
+ # :call-seq:
10
+ # rank -> array
11
+ #
12
+ def rank
13
+ filter
14
+ sort_it
15
+ end
16
+
17
+ # get the top n keywords
18
+ #
19
+ def top n = 10
20
+ filter
21
+ rank[0..(n - 1)]
22
+ end
23
+
24
+ # sorts the keywords and returns a array of arrays
25
+ #
26
+ # :call-seq:
27
+ # sort_it -> array
28
+ #
29
+ def sort_it
30
+ sort {|x,y| y[1] <=> x[1]}
31
+ end
32
+
33
+ private
34
+
35
+ # filter out unwanted results
36
+ #
37
+ def filter
38
+ run_blacklist
39
+ filter_low
40
+ end
41
+
42
+ # filter low ranked keywords
43
+ #
44
+ def filter_low
45
+ delete_if do |key, value|
46
+ value <= 0
47
+ end
48
+ end
49
+
50
+ # remove blacklisted keywords
51
+ #
52
+ def run_blacklist
53
+ # FIXME: add more keywords!
54
+ delete_if do |key, value|
55
+ %w{the and that post add not see about using some something under our comments comment run you want for will file are with end new this use all but can your just get very data blog format out first they posts second}.include? key.downcase
56
+ end
57
+ end
58
+ end
59
+ end
@@ -0,0 +1,18 @@
1
+ $:.unshift(File.join(File.dirname(__FILE__), %w{.. .. lib highscore}))
2
+ require "content"
3
+ require "test/unit"
4
+
5
+ class TestContent < Test::Unit::TestCase
6
+ def setup
7
+ @text = "This is some text"
8
+ @content = Highscore::Content.new(@text)
9
+ end
10
+
11
+ def test_content
12
+ assert_equal @text, @content.content
13
+ end
14
+
15
+ def test_keywords
16
+ assert_instance_of(Highscore::Keywords, @content.keywords)
17
+ end
18
+ end
@@ -0,0 +1,48 @@
1
+ $:.unshift(File.join(File.dirname(__FILE__), %w{.. .. lib highscore}))
2
+ require "keywords"
3
+ require "test/unit"
4
+
5
+ class TestKeywords < Test::Unit::TestCase
6
+ def setup
7
+ @keywords = Highscore::Keywords.new
8
+ @keywords['Ruby'] = 2
9
+ @keywords['Sinatra'] = 3
10
+ @keywords['Highscore'] = 1
11
+ @keywords['the'] = 10
12
+ end
13
+
14
+ def test_init
15
+ assert Highscore::Keywords.new.length == 0
16
+ end
17
+
18
+ def test_rank
19
+ assert @keywords.length == 4
20
+
21
+ ranked = @keywords.rank
22
+
23
+ assert_instance_of(Array, ranked)
24
+
25
+ should_rank = [['Sinatra', 3], ['Ruby', 2], ['Highscore', 1]]
26
+ assert_equal should_rank, ranked
27
+ end
28
+
29
+ def test_rank_empty
30
+ assert_equal [], Highscore::Keywords.new.rank
31
+ end
32
+
33
+ def test_top
34
+ assert_equal [['Sinatra', 3]], @keywords.top(1)
35
+ end
36
+
37
+ def test_top_empty
38
+ assert_equal [], Highscore::Keywords.new.top(0)
39
+ end
40
+
41
+ def test_sort
42
+ keywords = Highscore::Keywords.new
43
+ keywords['Test'] = 1
44
+ keywords['Foobar'] = 2
45
+
46
+ assert_equal [['Foobar', 2], ['Test', 1]], keywords.sort_it
47
+ end
48
+ end
File without changes
@@ -0,0 +1 @@
1
+ 0.2.1
metadata ADDED
@@ -0,0 +1,75 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: highscore
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.2.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Dominik Liebler
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-01-18 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: bones
16
+ requirement: &70306539324720 !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: 3.7.3
22
+ type: :development
23
+ prerelease: false
24
+ version_requirements: *70306539324720
25
+ description: Rank keywords in long texts.
26
+ email: liebler.dominik@googlemail.com
27
+ executables:
28
+ - highscore
29
+ extensions: []
30
+ extra_rdoc_files:
31
+ - History.txt
32
+ - bin/highscore
33
+ files:
34
+ - .gitignore
35
+ - History.txt
36
+ - README.md
37
+ - Rakefile
38
+ - bin/highscore
39
+ - lib/highscore.rb
40
+ - lib/highscore/content.rb
41
+ - lib/highscore/keywords.rb
42
+ - test/highscore/test_content.rb
43
+ - test/highscore/test_keywords.rb
44
+ - test/test_highscore.rb
45
+ - version.txt
46
+ homepage: http://thewebdev.de
47
+ licenses: []
48
+ post_install_message:
49
+ rdoc_options:
50
+ - --main
51
+ - README.md
52
+ require_paths:
53
+ - lib
54
+ required_ruby_version: !ruby/object:Gem::Requirement
55
+ none: false
56
+ requirements:
57
+ - - ! '>='
58
+ - !ruby/object:Gem::Version
59
+ version: '0'
60
+ required_rubygems_version: !ruby/object:Gem::Requirement
61
+ none: false
62
+ requirements:
63
+ - - ! '>='
64
+ - !ruby/object:Gem::Version
65
+ version: '0'
66
+ requirements: []
67
+ rubyforge_project: highscore
68
+ rubygems_version: 1.8.11
69
+ signing_key:
70
+ specification_version: 3
71
+ summary: Rank keywords in long texts.
72
+ test_files:
73
+ - test/highscore/test_content.rb
74
+ - test/highscore/test_keywords.rb
75
+ - test/test_highscore.rb