mscharkow-textstats 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
data/.document ADDED
@@ -0,0 +1,5 @@
1
+ README.rdoc
2
+ lib/**/*.rb
3
+ bin/*
4
+ features/**/*.feature
5
+ LICENSE
data/.gitignore ADDED
@@ -0,0 +1,5 @@
1
+ *.sw?
2
+ .DS_Store
3
+ coverage
4
+ rdoc
5
+ pkg
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2009 Michael Scharkow
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.rdoc ADDED
@@ -0,0 +1,7 @@
1
+ = textstats
2
+
3
+ Extends Ruby's String class with some basic text statistics like average word or sentence length, word frequencies and readability indices.
4
+
5
+ == Copyright
6
+
7
+ Copyright (c) 2009 Michael Scharkow <michael@underused.org>. See LICENSE for details.
data/Rakefile ADDED
@@ -0,0 +1,56 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+
4
+ begin
5
+ require 'jeweler'
6
+ Jeweler::Tasks.new do |gem|
7
+ gem.name = "textstats"
8
+ gem.summary = %Q{TODO}
9
+ gem.email = "michael@underused.org"
10
+ gem.homepage = "http://github.com/underused/textstats"
11
+ gem.authors = ["Michael Scharkow"]
12
+ # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
13
+ end
14
+
15
+ rescue LoadError
16
+ puts "Jeweler (or a dependency) not available. Install it with: sudo gem install jeweler"
17
+ end
18
+
19
+ require 'rake/testtask'
20
+ Rake::TestTask.new(:test) do |test|
21
+ test.libs << 'lib' << 'test'
22
+ test.pattern = 'test/**/*_test.rb'
23
+ test.verbose = true
24
+ end
25
+
26
+ begin
27
+ require 'rcov/rcovtask'
28
+ Rcov::RcovTask.new do |test|
29
+ test.libs << 'test'
30
+ test.pattern = 'test/**/*_test.rb'
31
+ test.verbose = true
32
+ end
33
+ rescue LoadError
34
+ task :rcov do
35
+ abort "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov"
36
+ end
37
+ end
38
+
39
+
40
+ task :default => :test
41
+
42
+ require 'rake/rdoctask'
43
+ Rake::RDocTask.new do |rdoc|
44
+ if File.exist?('VERSION.yml')
45
+ config = YAML.load(File.read('VERSION.yml'))
46
+ version = "#{config[:major]}.#{config[:minor]}.#{config[:patch]}"
47
+ else
48
+ version = ""
49
+ end
50
+
51
+ rdoc.rdoc_dir = 'rdoc'
52
+ rdoc.title = "textstats #{version}"
53
+ rdoc.rdoc_files.include('README*')
54
+ rdoc.rdoc_files.include('lib/**/*.rb')
55
+ end
56
+
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.0.2
data/lib/textstats.rb ADDED
@@ -0,0 +1,58 @@
1
+ class String
2
+ def words
3
+ scan(/\w[\w\'\-]*/)
4
+ end
5
+
6
+ def avg_word_length
7
+ Float(words.collect{|w|w.size}.inject(nil){|sum,x|sum ? sum + x : x})/words.size
8
+ end
9
+
10
+ def sentences
11
+ out = scan(/(\.|\?|\!|:)(\s+[A-Z]|$)/).size
12
+ (out > 0) ? out : 1
13
+ end
14
+
15
+ def avg_sentence_length
16
+ Float(words.size/sentences)
17
+ end
18
+
19
+ def puncts
20
+ scan(/(\s-\s|,|;|\(|\)|\")/)
21
+ end
22
+
23
+ def punct_ratio
24
+ Float(puncts.size)/size
25
+ end
26
+
27
+ def type_token_ratio(downcase=false)
28
+ if downcase
29
+ w = words.map{|w|w.downcase}
30
+ else
31
+ w = words
32
+ end
33
+ Float(w.uniq.size)/w.size
34
+ end
35
+
36
+ def word_freqs
37
+ wl = Hash.new(0)
38
+ words.each{|w|wl[w] +=1}
39
+ wl.sort{|a,b| a[1]<=>b[1]}.reverse
40
+ end
41
+
42
+
43
+ def long_words(size=6)
44
+ wl = []
45
+ words.each{|w|wl << w if w.size >= size}
46
+ wl
47
+ end
48
+
49
+ def ari
50
+ (4.71 * avg_word_length) + (0.5 * avg_sentence_length) - 21.43
51
+ end
52
+
53
+ def clf
54
+ (5.89 * avg_word_length) - (0.4 * 100.0/avg_sentence_length) - 15.8
55
+ end
56
+
57
+
58
+ end
@@ -0,0 +1,10 @@
1
+ require 'rubygems'
2
+ require 'test/unit'
3
+ require 'shoulda'
4
+
5
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
6
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
7
+ require 'textstats'
8
+
9
+ class Test::Unit::TestCase
10
+ end
@@ -0,0 +1,34 @@
1
+ require 'test_helper'
2
+
3
+ class TextstatsTest < Test::Unit::TestCase
4
+ def setup
5
+ @simple = "This is a test text."
6
+ @apos = "It's meant for testing the github-textstats gem."
7
+ @punct= "It contains commas, full stops, exclamation; marks and other (special) chars.
8
+ Does it? It really does: not!"
9
+ end
10
+
11
+ should "count words correctly" do
12
+ assert_equal @simple.words.size, 5
13
+ assert_equal @apos.words.size, 7
14
+ end
15
+
16
+ should "count sentences correctly" do
17
+ assert_equal @punct.sentences, 3
18
+ end
19
+
20
+ should "count special chars correctly" do
21
+ assert_equal @punct.puncts.size, 5
22
+ end
23
+
24
+ should "compute type-token ratio correctly" do
25
+ assert_equal @simple.type_token_ratio, 1
26
+ assert_equal @apos.type_token_ratio, @apos.type_token_ratio(true)
27
+ assert @punct.type_token_ratio > @punct.type_token_ratio(true)
28
+ end
29
+
30
+ should "count long words correctly" do
31
+ assert_equal @punct.long_words(6).size, 5
32
+ end
33
+
34
+ end
data/textstats.gemspec ADDED
@@ -0,0 +1,47 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ Gem::Specification.new do |s|
4
+ s.name = %q{textstats}
5
+ s.version = "0.0.2"
6
+
7
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
8
+ s.authors = ["Michael Scharkow"]
9
+ s.date = %q{2009-07-01}
10
+ s.email = %q{michael@underused.org}
11
+ s.extra_rdoc_files = [
12
+ "LICENSE",
13
+ "README.rdoc"
14
+ ]
15
+ s.files = [
16
+ ".document",
17
+ ".gitignore",
18
+ "LICENSE",
19
+ "README.rdoc",
20
+ "Rakefile",
21
+ "VERSION",
22
+ "lib/textstats.rb",
23
+ "test/test_helper.rb",
24
+ "test/textstats_test.rb",
25
+ "textstats.gemspec"
26
+ ]
27
+ s.has_rdoc = true
28
+ s.homepage = %q{http://github.com/underused/textstats}
29
+ s.rdoc_options = ["--charset=UTF-8"]
30
+ s.require_paths = ["lib"]
31
+ s.rubygems_version = %q{1.3.1}
32
+ s.summary = %q{TODO}
33
+ s.test_files = [
34
+ "test/test_helper.rb",
35
+ "test/textstats_test.rb"
36
+ ]
37
+
38
+ if s.respond_to? :specification_version then
39
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
40
+ s.specification_version = 2
41
+
42
+ if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
43
+ else
44
+ end
45
+ else
46
+ end
47
+ end
metadata ADDED
@@ -0,0 +1,65 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: mscharkow-textstats
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.2
5
+ platform: ruby
6
+ authors:
7
+ - Michael Scharkow
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2009-07-01 00:00:00 -07:00
13
+ default_executable:
14
+ dependencies: []
15
+
16
+ description:
17
+ email: michael@underused.org
18
+ executables: []
19
+
20
+ extensions: []
21
+
22
+ extra_rdoc_files:
23
+ - LICENSE
24
+ - README.rdoc
25
+ files:
26
+ - .document
27
+ - .gitignore
28
+ - LICENSE
29
+ - README.rdoc
30
+ - Rakefile
31
+ - VERSION
32
+ - lib/textstats.rb
33
+ - test/test_helper.rb
34
+ - test/textstats_test.rb
35
+ - textstats.gemspec
36
+ has_rdoc: true
37
+ homepage: http://github.com/underused/textstats
38
+ licenses:
39
+ post_install_message:
40
+ rdoc_options:
41
+ - --charset=UTF-8
42
+ require_paths:
43
+ - lib
44
+ required_ruby_version: !ruby/object:Gem::Requirement
45
+ requirements:
46
+ - - ">="
47
+ - !ruby/object:Gem::Version
48
+ version: "0"
49
+ version:
50
+ required_rubygems_version: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: "0"
55
+ version:
56
+ requirements: []
57
+
58
+ rubyforge_project:
59
+ rubygems_version: 1.3.5
60
+ signing_key:
61
+ specification_version: 2
62
+ summary: TODO
63
+ test_files:
64
+ - test/test_helper.rb
65
+ - test/textstats_test.rb