textstats 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,5 @@
1
+ README.rdoc
2
+ lib/**/*.rb
3
+ bin/*
4
+ features/**/*.feature
5
+ LICENSE
@@ -0,0 +1,5 @@
1
+ *.sw?
2
+ .DS_Store
3
+ coverage
4
+ rdoc
5
+ pkg
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2009 Michael Scharkow
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,7 @@
1
+ = textstats
2
+
3
+ Extends Ruby's String class with some basic text statistics like average word or sentence length, word frequencies and readability indices.
4
+
5
+ == Copyright
6
+
7
+ Copyright (c) 2009 Michael Scharkow <michael@underused.org>. See LICENSE for details.
@@ -0,0 +1,56 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+
4
+ begin
5
+ require 'jeweler'
6
+ Jeweler::Tasks.new do |gem|
7
+ gem.name = "textstats"
8
+ gem.summary = "Important stuff"
9
+ gem.email = "michael@underused.org"
10
+ gem.homepage = "http://github.com/mscharkow/textstats"
11
+ gem.authors = ["Michael Scharkow"]
12
+ # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
13
+ end
14
+
15
+ rescue LoadError
16
+ puts "Jeweler (or a dependency) not available. Install it with: sudo gem install jeweler"
17
+ end
18
+
19
+ require 'rake/testtask'
20
+ Rake::TestTask.new(:test) do |test|
21
+ test.libs << 'lib' << 'test'
22
+ test.pattern = 'test/**/*_test.rb'
23
+ test.verbose = true
24
+ end
25
+
26
+ begin
27
+ require 'rcov/rcovtask'
28
+ Rcov::RcovTask.new do |test|
29
+ test.libs << 'test'
30
+ test.pattern = 'test/**/*_test.rb'
31
+ test.verbose = true
32
+ end
33
+ rescue LoadError
34
+ task :rcov do
35
+ abort "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov"
36
+ end
37
+ end
38
+
39
+
40
+ task :default => :test
41
+
42
+ require 'rake/rdoctask'
43
+ Rake::RDocTask.new do |rdoc|
44
+ if File.exist?('VERSION.yml')
45
+ config = YAML.load(File.read('VERSION.yml'))
46
+ version = "#{config[:major]}.#{config[:minor]}.#{config[:patch]}"
47
+ else
48
+ version = ""
49
+ end
50
+
51
+ rdoc.rdoc_dir = 'rdoc'
52
+ rdoc.title = "textstats #{version}"
53
+ rdoc.rdoc_files.include('README*')
54
+ rdoc.rdoc_files.include('lib/**/*.rb')
55
+ end
56
+
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.0.2
@@ -0,0 +1,58 @@
1
+ class String
2
+ def words
3
+ scan(/\w[\w\'\-]*/)
4
+ end
5
+
6
+ def avg_word_length
7
+ Float(words.collect{|w|w.size}.inject(nil){|sum,x|sum ? sum + x : x})/words.size
8
+ end
9
+
10
+ def sentences
11
+ out = scan(/(\.|\?|\!|:)(\s+[A-Z]|$)/).size
12
+ (out > 0) ? out : 1
13
+ end
14
+
15
+ def avg_sentence_length
16
+ Float(words.size/sentences)
17
+ end
18
+
19
+ def puncts
20
+ scan(/(\s-\s|,|;|\(|\)|\")/)
21
+ end
22
+
23
+ def punct_ratio
24
+ Float(puncts.size)/size
25
+ end
26
+
27
+ def type_token_ratio(downcase=false)
28
+ if downcase
29
+ w = words.map{|w|w.downcase}
30
+ else
31
+ w = words
32
+ end
33
+ Float(w.uniq.size)/w.size
34
+ end
35
+
36
+ def word_freqs
37
+ wl = Hash.new(0)
38
+ words.each{|w|wl[w] +=1}
39
+ wl.sort{|a,b| a[1]<=>b[1]}.reverse
40
+ end
41
+
42
+
43
+ def long_words(size=6)
44
+ wl = []
45
+ words.each{|w|wl << w if w.size >= size}
46
+ wl
47
+ end
48
+
49
+ def ari
50
+ (4.71 * avg_word_length) + (0.5 * avg_sentence_length) - 21.43
51
+ end
52
+
53
+ def clf
54
+ (5.89 * avg_word_length) - (0.4 * 100.0/avg_sentence_length) - 15.8
55
+ end
56
+
57
+
58
+ end
@@ -0,0 +1,10 @@
1
+ require 'rubygems'
2
+ require 'test/unit'
3
+ require 'shoulda'
4
+
5
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
6
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
7
+ require 'textstats'
8
+
9
+ class Test::Unit::TestCase
10
+ end
@@ -0,0 +1,34 @@
1
+ require 'test_helper'
2
+
3
+ class TextstatsTest < Test::Unit::TestCase
4
+ def setup
5
+ @simple = "This is a test text."
6
+ @apos = "It's meant for testing the github-textstats gem."
7
+ @punct= "It contains commas, full stops, exclamation; marks and other (special) chars.
8
+ Does it? It really does: not!"
9
+ end
10
+
11
+ should "count words correctly" do
12
+ assert_equal @simple.words.size, 5
13
+ assert_equal @apos.words.size, 7
14
+ end
15
+
16
+ should "count sentences correctly" do
17
+ assert_equal @punct.sentences, 3
18
+ end
19
+
20
+ should "count special chars correctly" do
21
+ assert_equal @punct.puncts.size, 5
22
+ end
23
+
24
+ should "compute type-token ratio correctly" do
25
+ assert_equal @simple.type_token_ratio, 1
26
+ assert_equal @apos.type_token_ratio, @apos.type_token_ratio(true)
27
+ assert @punct.type_token_ratio > @punct.type_token_ratio(true)
28
+ end
29
+
30
+ should "count long words correctly" do
31
+ assert_equal @punct.long_words(6).size, 5
32
+ end
33
+
34
+ end
@@ -0,0 +1,50 @@
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE DIRECTLY
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run the gemspec command
4
+ # -*- encoding: utf-8 -*-
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = %q{textstats}
8
+ s.version = "0.0.2"
9
+
10
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
+ s.authors = ["Michael Scharkow"]
12
+ s.date = %q{2010-01-06}
13
+ s.email = %q{michael@underused.org}
14
+ s.extra_rdoc_files = [
15
+ "LICENSE",
16
+ "README.rdoc"
17
+ ]
18
+ s.files = [
19
+ ".document",
20
+ ".gitignore",
21
+ "LICENSE",
22
+ "README.rdoc",
23
+ "Rakefile",
24
+ "VERSION",
25
+ "lib/textstats.rb",
26
+ "test/test_helper.rb",
27
+ "test/textstats_test.rb",
28
+ "textstats.gemspec"
29
+ ]
30
+ s.homepage = %q{http://github.com/mscharkow/textstats}
31
+ s.rdoc_options = ["--charset=UTF-8"]
32
+ s.require_paths = ["lib"]
33
+ s.rubygems_version = %q{1.3.5}
34
+ s.summary = %q{Important stuff}
35
+ s.test_files = [
36
+ "test/test_helper.rb",
37
+ "test/textstats_test.rb"
38
+ ]
39
+
40
+ if s.respond_to? :specification_version then
41
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
42
+ s.specification_version = 3
43
+
44
+ if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
45
+ else
46
+ end
47
+ else
48
+ end
49
+ end
50
+
metadata ADDED
@@ -0,0 +1,78 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: textstats
3
+ version: !ruby/object:Gem::Version
4
+ hash: 27
5
+ prerelease: false
6
+ segments:
7
+ - 0
8
+ - 0
9
+ - 2
10
+ version: 0.0.2
11
+ platform: ruby
12
+ authors:
13
+ - Michael Scharkow
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2010-01-06 00:00:00 +01:00
19
+ default_executable:
20
+ dependencies: []
21
+
22
+ description:
23
+ email: michael@underused.org
24
+ executables: []
25
+
26
+ extensions: []
27
+
28
+ extra_rdoc_files:
29
+ - LICENSE
30
+ - README.rdoc
31
+ files:
32
+ - .document
33
+ - .gitignore
34
+ - LICENSE
35
+ - README.rdoc
36
+ - Rakefile
37
+ - VERSION
38
+ - lib/textstats.rb
39
+ - test/test_helper.rb
40
+ - test/textstats_test.rb
41
+ - textstats.gemspec
42
+ has_rdoc: true
43
+ homepage: http://github.com/mscharkow/textstats
44
+ licenses: []
45
+
46
+ post_install_message:
47
+ rdoc_options:
48
+ - --charset=UTF-8
49
+ require_paths:
50
+ - lib
51
+ required_ruby_version: !ruby/object:Gem::Requirement
52
+ none: false
53
+ requirements:
54
+ - - ">="
55
+ - !ruby/object:Gem::Version
56
+ hash: 3
57
+ segments:
58
+ - 0
59
+ version: "0"
60
+ required_rubygems_version: !ruby/object:Gem::Requirement
61
+ none: false
62
+ requirements:
63
+ - - ">="
64
+ - !ruby/object:Gem::Version
65
+ hash: 3
66
+ segments:
67
+ - 0
68
+ version: "0"
69
+ requirements: []
70
+
71
+ rubyforge_project:
72
+ rubygems_version: 1.3.7
73
+ signing_key:
74
+ specification_version: 3
75
+ summary: Important stuff
76
+ test_files:
77
+ - test/test_helper.rb
78
+ - test/textstats_test.rb