vocab_counter 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
data/.document ADDED
@@ -0,0 +1,5 @@
1
+ README.rdoc
2
+ lib/**/*.rb
3
+ bin/*
4
+ features/**/*.feature
5
+ LICENSE
data/.gitignore ADDED
@@ -0,0 +1,21 @@
1
+ ## MAC OS
2
+ .DS_Store
3
+
4
+ ## TEXTMATE
5
+ *.tmproj
6
+ tmtags
7
+
8
+ ## EMACS
9
+ *~
10
+ \#*
11
+ .\#*
12
+
13
+ ## VIM
14
+ *.swp
15
+
16
+ ## PROJECT::GENERAL
17
+ coverage
18
+ rdoc
19
+ pkg
20
+
21
+ ## PROJECT::SPECIFIC
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2009 Julian Burgess
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.rdoc ADDED
@@ -0,0 +1,31 @@
1
+ = vocab_counter
2
+
3
+ A simple script to count the number of unique terms in a document (case insensitive). Produces csv output.
4
+
5
+ It's also a way for me to practise creating a gem :)
6
+
7
+ == Example
8
+
9
+ The cat sat on the mat
10
+
11
+ the,2
12
+ cat,1
13
+ sat,1
14
+ on,1
15
+ mat,1
16
+
17
+ == Note on Patches/Pull Requests
18
+
19
+ * Fork the project.
20
+ * Make your feature addition or bug fix.
21
+ * Add tests for it. This is important so I don't break it in a
22
+ future version unintentionally.
23
+ * Commit, do not mess with rakefile, version, or history.
24
+ (if you want to have your own version, that is fine but
25
+ bump version in a commit by itself I can ignore when I pull)
26
+ * Send me a pull request. Bonus points for topic branches.
27
+
28
+ == Copyright
29
+
30
+ Copyright (c) 2009 Julian Burgess. See LICENSE for details.
31
+
data/Rakefile ADDED
@@ -0,0 +1,46 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+
4
+ begin
5
+ require 'jeweler'
6
+ Jeweler::Tasks.new do |gem|
7
+ gem.name = "vocab_counter"
8
+ gem.summary = %Q{Count the number of unique terms in a given text}
9
+ gem.description = %Q{Produces a CSV sorted by the number of times each term appears (case insensitive, lowercase output).}
10
+ gem.email = "aubergene@gmail.com"
11
+ gem.homepage = "http://github.com/aubergene/vocab_counter"
12
+ gem.authors = ["Julian Burgess"]
13
+ gem.add_development_dependency "rspec", ">= 1.2.9"
14
+ # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
15
+ end
16
+ Jeweler::GemcutterTasks.new
17
+ rescue LoadError
18
+ puts "Jeweler (or a dependency) not available. Install it with: sudo gem install jeweler"
19
+ end
20
+
21
+ require 'spec/rake/spectask'
22
+ Spec::Rake::SpecTask.new(:spec) do |spec|
23
+ spec.libs << 'lib' << 'spec'
24
+ spec.spec_files = FileList['spec/**/*_spec.rb']
25
+ end
26
+
27
+ Spec::Rake::SpecTask.new(:rcov) do |spec|
28
+ spec.libs << 'lib' << 'spec'
29
+ spec.pattern = 'spec/**/*_spec.rb'
30
+ spec.rcov = true
31
+ end
32
+
33
+ task :spec => :check_dependencies
34
+
35
+ task :default => :spec
36
+
37
+ require 'rake/rdoctask'
38
+ Rake::RDocTask.new do |rdoc|
39
+ version = File.exist?('VERSION') ? File.read('VERSION') : ""
40
+
41
+ rdoc.rdoc_dir = 'rdoc'
42
+ rdoc.title = "vocab_counter #{version}"
43
+ rdoc.rdoc_files.include('README*')
44
+ rdoc.rdoc_files.include('lib/**/*.rb')
45
+ end
46
+
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.1.0
data/bin/vocab_counter ADDED
@@ -0,0 +1,12 @@
1
+ #!/usr/bin/env ruby
2
+ require 'lib/vocab_counter'
3
+
4
+ unless File.exists?(ARGV[0].to_s)
5
+ puts "Vocab counter: please specify a file"
6
+ Process.exit
7
+ end
8
+
9
+ file = File.open(ARGV[0])
10
+
11
+ puts VocabCounter.count(file.read)
12
+
@@ -0,0 +1,20 @@
1
+ class VocabCounter
2
+
3
+ def self.count(input)
4
+ input.downcase!
5
+ input.gsub!(/[^a-z0-9']/, ' ')
6
+ input.gsub!(/\s'|'\s/, ' ')
7
+ count = Hash.new(0)
8
+ input.split(/\s+/).each do |term|
9
+ count[term.to_sym] += 1
10
+ end
11
+
12
+ out = ""
13
+ count.sort { |a,b| b[1] <=> a[1] }.each do |k,v|
14
+ out << "#{k.to_s},#{v}\n"
15
+ end
16
+ out
17
+ end
18
+
19
+ end
20
+
data/spec/spec.opts ADDED
@@ -0,0 +1 @@
1
+ --color
@@ -0,0 +1,9 @@
1
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
2
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
3
+ require 'vocab_counter'
4
+ require 'spec'
5
+ require 'spec/autorun'
6
+
7
+ Spec::Runner.configure do |config|
8
+
9
+ end
@@ -0,0 +1,33 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
+
3
+ describe "VocabCounter" do
4
+
5
+ it "should count the number of terms in a string" do
6
+ out = VocabCounter.count("The cat sat on the mat")
7
+ out.should include("the,2")
8
+ out.should include("cat,1")
9
+ end
10
+
11
+ it "should ignore case when counting terms" do
12
+ out = VocabCounter.count("The the THE tHe CAT sat on the mat")
13
+ out.should include("the,5")
14
+ out.should include("cat,1")
15
+ end
16
+
17
+ it "should ignore an non ['a-z0-9] characters when counting terms" do
18
+ out = VocabCounter.count("The %cat$ sat-on, the mat. Brian's cat. The cat?s sat on, the mat! ")
19
+ out.should include("the,4")
20
+ out.should include("cat,3")
21
+ out.should include("brian's,1")
22
+ end
23
+
24
+ it "should only include apostrophes which appear within a word" do
25
+ out = VocabCounter.count("Brian's Brian Brian' Brian's cat's cats cat' cats'")
26
+ out.should include("brian's,2")
27
+ out.should include("brian,2")
28
+ out.should include("cat,1")
29
+ out.should include("cats,1")
30
+ end
31
+
32
+ end
33
+
@@ -0,0 +1,58 @@
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE DIRECTLY
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run the gemspec command
4
+ # -*- encoding: utf-8 -*-
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = %q{vocab_counter}
8
+ s.version = "0.1.0"
9
+
10
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
+ s.authors = ["Julian Burgess"]
12
+ s.date = %q{2009-11-07}
13
+ s.default_executable = %q{vocab_counter}
14
+ s.description = %q{Produces a CSV sorted by the number of times each term appears (case insensitive, lowercase output).}
15
+ s.email = %q{aubergene@gmail.com}
16
+ s.executables = ["vocab_counter"]
17
+ s.extra_rdoc_files = [
18
+ "LICENSE",
19
+ "README.rdoc"
20
+ ]
21
+ s.files = [
22
+ ".document",
23
+ ".gitignore",
24
+ "LICENSE",
25
+ "README.rdoc",
26
+ "Rakefile",
27
+ "VERSION",
28
+ "bin/vocab_counter",
29
+ "lib/vocab_counter.rb",
30
+ "spec/spec.opts",
31
+ "spec/spec_helper.rb",
32
+ "spec/vocab_counter_spec.rb",
33
+ "vocab_counter.gemspec"
34
+ ]
35
+ s.homepage = %q{http://github.com/aubergene/vocab_counter}
36
+ s.rdoc_options = ["--charset=UTF-8"]
37
+ s.require_paths = ["lib"]
38
+ s.rubygems_version = %q{1.3.5}
39
+ s.summary = %q{Count the number of unique terms in a given text}
40
+ s.test_files = [
41
+ "spec/spec_helper.rb",
42
+ "spec/vocab_counter_spec.rb"
43
+ ]
44
+
45
+ if s.respond_to? :specification_version then
46
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
47
+ s.specification_version = 3
48
+
49
+ if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
50
+ s.add_development_dependency(%q<rspec>, [">= 1.2.9"])
51
+ else
52
+ s.add_dependency(%q<rspec>, [">= 1.2.9"])
53
+ end
54
+ else
55
+ s.add_dependency(%q<rspec>, [">= 1.2.9"])
56
+ end
57
+ end
58
+
metadata ADDED
@@ -0,0 +1,77 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: vocab_counter
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Julian Burgess
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2009-11-07 00:00:00 +00:00
13
+ default_executable: vocab_counter
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: rspec
17
+ type: :development
18
+ version_requirement:
19
+ version_requirements: !ruby/object:Gem::Requirement
20
+ requirements:
21
+ - - ">="
22
+ - !ruby/object:Gem::Version
23
+ version: 1.2.9
24
+ version:
25
+ description: Produces a CSV sorted by the number of times each term appears (case insensitive, lowercase output).
26
+ email: aubergene@gmail.com
27
+ executables:
28
+ - vocab_counter
29
+ extensions: []
30
+
31
+ extra_rdoc_files:
32
+ - LICENSE
33
+ - README.rdoc
34
+ files:
35
+ - .document
36
+ - .gitignore
37
+ - LICENSE
38
+ - README.rdoc
39
+ - Rakefile
40
+ - VERSION
41
+ - bin/vocab_counter
42
+ - lib/vocab_counter.rb
43
+ - spec/spec.opts
44
+ - spec/spec_helper.rb
45
+ - spec/vocab_counter_spec.rb
46
+ - vocab_counter.gemspec
47
+ has_rdoc: true
48
+ homepage: http://github.com/aubergene/vocab_counter
49
+ licenses: []
50
+
51
+ post_install_message:
52
+ rdoc_options:
53
+ - --charset=UTF-8
54
+ require_paths:
55
+ - lib
56
+ required_ruby_version: !ruby/object:Gem::Requirement
57
+ requirements:
58
+ - - ">="
59
+ - !ruby/object:Gem::Version
60
+ version: "0"
61
+ version:
62
+ required_rubygems_version: !ruby/object:Gem::Requirement
63
+ requirements:
64
+ - - ">="
65
+ - !ruby/object:Gem::Version
66
+ version: "0"
67
+ version:
68
+ requirements: []
69
+
70
+ rubyforge_project:
71
+ rubygems_version: 1.3.5
72
+ signing_key:
73
+ specification_version: 3
74
+ summary: Count the number of unique terms in a given text
75
+ test_files:
76
+ - spec/spec_helper.rb
77
+ - spec/vocab_counter_spec.rb