vocab_counter 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.document ADDED
@@ -0,0 +1,5 @@
1
+ README.rdoc
2
+ lib/**/*.rb
3
+ bin/*
4
+ features/**/*.feature
5
+ LICENSE
data/.gitignore ADDED
@@ -0,0 +1,21 @@
1
+ ## MAC OS
2
+ .DS_Store
3
+
4
+ ## TEXTMATE
5
+ *.tmproj
6
+ tmtags
7
+
8
+ ## EMACS
9
+ *~
10
+ \#*
11
+ .\#*
12
+
13
+ ## VIM
14
+ *.swp
15
+
16
+ ## PROJECT::GENERAL
17
+ coverage
18
+ rdoc
19
+ pkg
20
+
21
+ ## PROJECT::SPECIFIC
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2009 Julian Burgess
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.rdoc ADDED
@@ -0,0 +1,31 @@
1
+ = vocab_counter
2
+
3
+ A simple script to count the number of unique terms in a document (case insensitive). Produces csv output.
4
+
5
+ It's also a way for me to practise creating a gem :)
6
+
7
+ == Example
8
+
9
+ The cat sat on the mat
10
+
11
+ the,2
12
+ cat,1
13
+ sat,1
14
+ on,1
15
+ mat,1
16
+
17
+ == Note on Patches/Pull Requests
18
+
19
+ * Fork the project.
20
+ * Make your feature addition or bug fix.
21
+ * Add tests for it. This is important so I don't break it in a
22
+ future version unintentionally.
23
+ * Commit, do not mess with rakefile, version, or history.
24
+ (if you want to have your own version, that is fine but
25
+ bump version in a commit by itself I can ignore when I pull)
26
+ * Send me a pull request. Bonus points for topic branches.
27
+
28
+ == Copyright
29
+
30
+ Copyright (c) 2009 Julian Burgess. See LICENSE for details.
31
+
data/Rakefile ADDED
@@ -0,0 +1,46 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+
4
+ begin
5
+ require 'jeweler'
6
+ Jeweler::Tasks.new do |gem|
7
+ gem.name = "vocab_counter"
8
+ gem.summary = %Q{Count the number of unique terms in a given text}
9
+ gem.description = %Q{Produces a CSV sorted by the number of times each term appears (case insensitive, lowercase output).}
10
+ gem.email = "aubergene@gmail.com"
11
+ gem.homepage = "http://github.com/aubergene/vocab_counter"
12
+ gem.authors = ["Julian Burgess"]
13
+ gem.add_development_dependency "rspec", ">= 1.2.9"
14
+ # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
15
+ end
16
+ Jeweler::GemcutterTasks.new
17
+ rescue LoadError
18
+ puts "Jeweler (or a dependency) not available. Install it with: sudo gem install jeweler"
19
+ end
20
+
21
+ require 'spec/rake/spectask'
22
+ Spec::Rake::SpecTask.new(:spec) do |spec|
23
+ spec.libs << 'lib' << 'spec'
24
+ spec.spec_files = FileList['spec/**/*_spec.rb']
25
+ end
26
+
27
+ Spec::Rake::SpecTask.new(:rcov) do |spec|
28
+ spec.libs << 'lib' << 'spec'
29
+ spec.pattern = 'spec/**/*_spec.rb'
30
+ spec.rcov = true
31
+ end
32
+
33
+ task :spec => :check_dependencies
34
+
35
+ task :default => :spec
36
+
37
+ require 'rake/rdoctask'
38
+ Rake::RDocTask.new do |rdoc|
39
+ version = File.exist?('VERSION') ? File.read('VERSION') : ""
40
+
41
+ rdoc.rdoc_dir = 'rdoc'
42
+ rdoc.title = "vocab_counter #{version}"
43
+ rdoc.rdoc_files.include('README*')
44
+ rdoc.rdoc_files.include('lib/**/*.rb')
45
+ end
46
+
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.1.0
data/bin/vocab_counter ADDED
@@ -0,0 +1,12 @@
1
+ #!/usr/bin/env ruby
2
+ require 'lib/vocab_counter'
3
+
4
+ unless File.exists?(ARGV[0].to_s)
5
+ puts "Vocab counter: please specify a file"
6
+ Process.exit
7
+ end
8
+
9
+ file = File.open(ARGV[0])
10
+
11
+ puts VocabCounter.count(file.read)
12
+
@@ -0,0 +1,20 @@
1
+ class VocabCounter
2
+
3
+ def self.count(input)
4
+ input.downcase!
5
+ input.gsub!(/[^a-z0-9']/, ' ')
6
+ input.gsub!(/\s'|'\s/, ' ')
7
+ count = Hash.new(0)
8
+ input.split(/\s+/).each do |term|
9
+ count[term.to_sym] += 1
10
+ end
11
+
12
+ out = ""
13
+ count.sort { |a,b| b[1] <=> a[1] }.each do |k,v|
14
+ out << "#{k.to_s},#{v}\n"
15
+ end
16
+ out
17
+ end
18
+
19
+ end
20
+
data/spec/spec.opts ADDED
@@ -0,0 +1 @@
1
+ --color
@@ -0,0 +1,9 @@
1
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
2
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
3
+ require 'vocab_counter'
4
+ require 'spec'
5
+ require 'spec/autorun'
6
+
7
+ Spec::Runner.configure do |config|
8
+
9
+ end
@@ -0,0 +1,33 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
+
3
+ describe "VocabCounter" do
4
+
5
+ it "should count the number of terms in a string" do
6
+ out = VocabCounter.count("The cat sat on the mat")
7
+ out.should include("the,2")
8
+ out.should include("cat,1")
9
+ end
10
+
11
+ it "should ignore case when counting terms" do
12
+ out = VocabCounter.count("The the THE tHe CAT sat on the mat")
13
+ out.should include("the,5")
14
+ out.should include("cat,1")
15
+ end
16
+
17
+ it "should ignore an non ['a-z0-9] characters when counting terms" do
18
+ out = VocabCounter.count("The %cat$ sat-on, the mat. Brian's cat. The cat?s sat on, the mat! ")
19
+ out.should include("the,4")
20
+ out.should include("cat,3")
21
+ out.should include("brian's,1")
22
+ end
23
+
24
+ it "should only include apostrophes which appear within a word" do
25
+ out = VocabCounter.count("Brian's Brian Brian' Brian's cat's cats cat' cats'")
26
+ out.should include("brian's,2")
27
+ out.should include("brian,2")
28
+ out.should include("cat,1")
29
+ out.should include("cats,1")
30
+ end
31
+
32
+ end
33
+
@@ -0,0 +1,58 @@
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE DIRECTLY
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run the gemspec command
4
+ # -*- encoding: utf-8 -*-
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = %q{vocab_counter}
8
+ s.version = "0.1.0"
9
+
10
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
+ s.authors = ["Julian Burgess"]
12
+ s.date = %q{2009-11-07}
13
+ s.default_executable = %q{vocab_counter}
14
+ s.description = %q{Produces a CSV sorted by the number of times each term appears (case insensitive, lowercase output).}
15
+ s.email = %q{aubergene@gmail.com}
16
+ s.executables = ["vocab_counter"]
17
+ s.extra_rdoc_files = [
18
+ "LICENSE",
19
+ "README.rdoc"
20
+ ]
21
+ s.files = [
22
+ ".document",
23
+ ".gitignore",
24
+ "LICENSE",
25
+ "README.rdoc",
26
+ "Rakefile",
27
+ "VERSION",
28
+ "bin/vocab_counter",
29
+ "lib/vocab_counter.rb",
30
+ "spec/spec.opts",
31
+ "spec/spec_helper.rb",
32
+ "spec/vocab_counter_spec.rb",
33
+ "vocab_counter.gemspec"
34
+ ]
35
+ s.homepage = %q{http://github.com/aubergene/vocab_counter}
36
+ s.rdoc_options = ["--charset=UTF-8"]
37
+ s.require_paths = ["lib"]
38
+ s.rubygems_version = %q{1.3.5}
39
+ s.summary = %q{Count the number of unique terms in a given text}
40
+ s.test_files = [
41
+ "spec/spec_helper.rb",
42
+ "spec/vocab_counter_spec.rb"
43
+ ]
44
+
45
+ if s.respond_to? :specification_version then
46
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
47
+ s.specification_version = 3
48
+
49
+ if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
50
+ s.add_development_dependency(%q<rspec>, [">= 1.2.9"])
51
+ else
52
+ s.add_dependency(%q<rspec>, [">= 1.2.9"])
53
+ end
54
+ else
55
+ s.add_dependency(%q<rspec>, [">= 1.2.9"])
56
+ end
57
+ end
58
+
metadata ADDED
@@ -0,0 +1,77 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: vocab_counter
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Julian Burgess
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2009-11-07 00:00:00 +00:00
13
+ default_executable: vocab_counter
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: rspec
17
+ type: :development
18
+ version_requirement:
19
+ version_requirements: !ruby/object:Gem::Requirement
20
+ requirements:
21
+ - - ">="
22
+ - !ruby/object:Gem::Version
23
+ version: 1.2.9
24
+ version:
25
+ description: Produces a CSV sorted by the number of times each term appears (case insensitive, lowercase output).
26
+ email: aubergene@gmail.com
27
+ executables:
28
+ - vocab_counter
29
+ extensions: []
30
+
31
+ extra_rdoc_files:
32
+ - LICENSE
33
+ - README.rdoc
34
+ files:
35
+ - .document
36
+ - .gitignore
37
+ - LICENSE
38
+ - README.rdoc
39
+ - Rakefile
40
+ - VERSION
41
+ - bin/vocab_counter
42
+ - lib/vocab_counter.rb
43
+ - spec/spec.opts
44
+ - spec/spec_helper.rb
45
+ - spec/vocab_counter_spec.rb
46
+ - vocab_counter.gemspec
47
+ has_rdoc: true
48
+ homepage: http://github.com/aubergene/vocab_counter
49
+ licenses: []
50
+
51
+ post_install_message:
52
+ rdoc_options:
53
+ - --charset=UTF-8
54
+ require_paths:
55
+ - lib
56
+ required_ruby_version: !ruby/object:Gem::Requirement
57
+ requirements:
58
+ - - ">="
59
+ - !ruby/object:Gem::Version
60
+ version: "0"
61
+ version:
62
+ required_rubygems_version: !ruby/object:Gem::Requirement
63
+ requirements:
64
+ - - ">="
65
+ - !ruby/object:Gem::Version
66
+ version: "0"
67
+ version:
68
+ requirements: []
69
+
70
+ rubyforge_project:
71
+ rubygems_version: 1.3.5
72
+ signing_key:
73
+ specification_version: 3
74
+ summary: Count the number of unique terms in a given text
75
+ test_files:
76
+ - spec/spec_helper.rb
77
+ - spec/vocab_counter_spec.rb