vocab_counter 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.document +5 -0
- data/.gitignore +21 -0
- data/LICENSE +20 -0
- data/README.rdoc +31 -0
- data/Rakefile +46 -0
- data/VERSION +1 -0
- data/bin/vocab_counter +12 -0
- data/lib/vocab_counter.rb +20 -0
- data/spec/spec.opts +1 -0
- data/spec/spec_helper.rb +9 -0
- data/spec/vocab_counter_spec.rb +33 -0
- data/vocab_counter.gemspec +58 -0
- metadata +77 -0
data/.document
ADDED
data/.gitignore
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2009 Julian Burgess
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.rdoc
ADDED
@@ -0,0 +1,31 @@
|
|
1
|
+
= vocab_counter
|
2
|
+
|
3
|
+
A simple script to count the number of unique terms in a document (case insensitive). Produces csv output.
|
4
|
+
|
5
|
+
It's also a way for me to practise creating a gem :)
|
6
|
+
|
7
|
+
== Example
|
8
|
+
|
9
|
+
The cat sat on the mat
|
10
|
+
|
11
|
+
the,2
|
12
|
+
cat,1
|
13
|
+
sat,1
|
14
|
+
on,1
|
15
|
+
mat,1
|
16
|
+
|
17
|
+
== Note on Patches/Pull Requests
|
18
|
+
|
19
|
+
* Fork the project.
|
20
|
+
* Make your feature addition or bug fix.
|
21
|
+
* Add tests for it. This is important so I don't break it in a
|
22
|
+
future version unintentionally.
|
23
|
+
* Commit, do not mess with rakefile, version, or history.
|
24
|
+
(if you want to have your own version, that is fine but
|
25
|
+
bump version in a commit by itself I can ignore when I pull)
|
26
|
+
* Send me a pull request. Bonus points for topic branches.
|
27
|
+
|
28
|
+
== Copyright
|
29
|
+
|
30
|
+
Copyright (c) 2009 Julian Burgess. See LICENSE for details.
|
31
|
+
|
data/Rakefile
ADDED
@@ -0,0 +1,46 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'rake'
|
3
|
+
|
4
|
+
begin
|
5
|
+
require 'jeweler'
|
6
|
+
Jeweler::Tasks.new do |gem|
|
7
|
+
gem.name = "vocab_counter"
|
8
|
+
gem.summary = %Q{Count the number of unique terms in a given text}
|
9
|
+
gem.description = %Q{Produces a CSV sorted by the number of times each term appears (case insensitive, lowercase output).}
|
10
|
+
gem.email = "aubergene@gmail.com"
|
11
|
+
gem.homepage = "http://github.com/aubergene/vocab_counter"
|
12
|
+
gem.authors = ["Julian Burgess"]
|
13
|
+
gem.add_development_dependency "rspec", ">= 1.2.9"
|
14
|
+
# gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
|
15
|
+
end
|
16
|
+
Jeweler::GemcutterTasks.new
|
17
|
+
rescue LoadError
|
18
|
+
puts "Jeweler (or a dependency) not available. Install it with: sudo gem install jeweler"
|
19
|
+
end
|
20
|
+
|
21
|
+
require 'spec/rake/spectask'
|
22
|
+
Spec::Rake::SpecTask.new(:spec) do |spec|
|
23
|
+
spec.libs << 'lib' << 'spec'
|
24
|
+
spec.spec_files = FileList['spec/**/*_spec.rb']
|
25
|
+
end
|
26
|
+
|
27
|
+
Spec::Rake::SpecTask.new(:rcov) do |spec|
|
28
|
+
spec.libs << 'lib' << 'spec'
|
29
|
+
spec.pattern = 'spec/**/*_spec.rb'
|
30
|
+
spec.rcov = true
|
31
|
+
end
|
32
|
+
|
33
|
+
task :spec => :check_dependencies
|
34
|
+
|
35
|
+
task :default => :spec
|
36
|
+
|
37
|
+
require 'rake/rdoctask'
|
38
|
+
Rake::RDocTask.new do |rdoc|
|
39
|
+
version = File.exist?('VERSION') ? File.read('VERSION') : ""
|
40
|
+
|
41
|
+
rdoc.rdoc_dir = 'rdoc'
|
42
|
+
rdoc.title = "vocab_counter #{version}"
|
43
|
+
rdoc.rdoc_files.include('README*')
|
44
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
45
|
+
end
|
46
|
+
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.1.0
|
data/bin/vocab_counter
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
class VocabCounter
|
2
|
+
|
3
|
+
def self.count(input)
|
4
|
+
input.downcase!
|
5
|
+
input.gsub!(/[^a-z0-9']/, ' ')
|
6
|
+
input.gsub!(/\s'|'\s/, ' ')
|
7
|
+
count = Hash.new(0)
|
8
|
+
input.split(/\s+/).each do |term|
|
9
|
+
count[term.to_sym] += 1
|
10
|
+
end
|
11
|
+
|
12
|
+
out = ""
|
13
|
+
count.sort { |a,b| b[1] <=> a[1] }.each do |k,v|
|
14
|
+
out << "#{k.to_s},#{v}\n"
|
15
|
+
end
|
16
|
+
out
|
17
|
+
end
|
18
|
+
|
19
|
+
end
|
20
|
+
|
data/spec/spec.opts
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
--color
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,33 @@
|
|
1
|
+
require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
|
2
|
+
|
3
|
+
describe "VocabCounter" do
|
4
|
+
|
5
|
+
it "should count the number of terms in a string" do
|
6
|
+
out = VocabCounter.count("The cat sat on the mat")
|
7
|
+
out.should include("the,2")
|
8
|
+
out.should include("cat,1")
|
9
|
+
end
|
10
|
+
|
11
|
+
it "should ignore case when counting terms" do
|
12
|
+
out = VocabCounter.count("The the THE tHe CAT sat on the mat")
|
13
|
+
out.should include("the,5")
|
14
|
+
out.should include("cat,1")
|
15
|
+
end
|
16
|
+
|
17
|
+
it "should ignore an non ['a-z0-9] characters when counting terms" do
|
18
|
+
out = VocabCounter.count("The %cat$ sat-on, the mat. Brian's cat. The cat?s sat on, the mat! ")
|
19
|
+
out.should include("the,4")
|
20
|
+
out.should include("cat,3")
|
21
|
+
out.should include("brian's,1")
|
22
|
+
end
|
23
|
+
|
24
|
+
it "should only include apostrophes which appear within a word" do
|
25
|
+
out = VocabCounter.count("Brian's Brian Brian' Brian's cat's cats cat' cats'")
|
26
|
+
out.should include("brian's,2")
|
27
|
+
out.should include("brian,2")
|
28
|
+
out.should include("cat,1")
|
29
|
+
out.should include("cats,1")
|
30
|
+
end
|
31
|
+
|
32
|
+
end
|
33
|
+
|
@@ -0,0 +1,58 @@
|
|
1
|
+
# Generated by jeweler
|
2
|
+
# DO NOT EDIT THIS FILE DIRECTLY
|
3
|
+
# Instead, edit Jeweler::Tasks in Rakefile, and run the gemspec command
|
4
|
+
# -*- encoding: utf-8 -*-
|
5
|
+
|
6
|
+
Gem::Specification.new do |s|
|
7
|
+
s.name = %q{vocab_counter}
|
8
|
+
s.version = "0.1.0"
|
9
|
+
|
10
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
|
+
s.authors = ["Julian Burgess"]
|
12
|
+
s.date = %q{2009-11-07}
|
13
|
+
s.default_executable = %q{vocab_counter}
|
14
|
+
s.description = %q{Produces a CSV sorted by the number of times each term appears (case insensitive, lowercase output).}
|
15
|
+
s.email = %q{aubergene@gmail.com}
|
16
|
+
s.executables = ["vocab_counter"]
|
17
|
+
s.extra_rdoc_files = [
|
18
|
+
"LICENSE",
|
19
|
+
"README.rdoc"
|
20
|
+
]
|
21
|
+
s.files = [
|
22
|
+
".document",
|
23
|
+
".gitignore",
|
24
|
+
"LICENSE",
|
25
|
+
"README.rdoc",
|
26
|
+
"Rakefile",
|
27
|
+
"VERSION",
|
28
|
+
"bin/vocab_counter",
|
29
|
+
"lib/vocab_counter.rb",
|
30
|
+
"spec/spec.opts",
|
31
|
+
"spec/spec_helper.rb",
|
32
|
+
"spec/vocab_counter_spec.rb",
|
33
|
+
"vocab_counter.gemspec"
|
34
|
+
]
|
35
|
+
s.homepage = %q{http://github.com/aubergene/vocab_counter}
|
36
|
+
s.rdoc_options = ["--charset=UTF-8"]
|
37
|
+
s.require_paths = ["lib"]
|
38
|
+
s.rubygems_version = %q{1.3.5}
|
39
|
+
s.summary = %q{Count the number of unique terms in a given text}
|
40
|
+
s.test_files = [
|
41
|
+
"spec/spec_helper.rb",
|
42
|
+
"spec/vocab_counter_spec.rb"
|
43
|
+
]
|
44
|
+
|
45
|
+
if s.respond_to? :specification_version then
|
46
|
+
current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
|
47
|
+
s.specification_version = 3
|
48
|
+
|
49
|
+
if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
|
50
|
+
s.add_development_dependency(%q<rspec>, [">= 1.2.9"])
|
51
|
+
else
|
52
|
+
s.add_dependency(%q<rspec>, [">= 1.2.9"])
|
53
|
+
end
|
54
|
+
else
|
55
|
+
s.add_dependency(%q<rspec>, [">= 1.2.9"])
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
metadata
ADDED
@@ -0,0 +1,77 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: vocab_counter
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Julian Burgess
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2009-11-07 00:00:00 +00:00
|
13
|
+
default_executable: vocab_counter
|
14
|
+
dependencies:
|
15
|
+
- !ruby/object:Gem::Dependency
|
16
|
+
name: rspec
|
17
|
+
type: :development
|
18
|
+
version_requirement:
|
19
|
+
version_requirements: !ruby/object:Gem::Requirement
|
20
|
+
requirements:
|
21
|
+
- - ">="
|
22
|
+
- !ruby/object:Gem::Version
|
23
|
+
version: 1.2.9
|
24
|
+
version:
|
25
|
+
description: Produces a CSV sorted by the number of times each term appears (case insensitive, lowercase output).
|
26
|
+
email: aubergene@gmail.com
|
27
|
+
executables:
|
28
|
+
- vocab_counter
|
29
|
+
extensions: []
|
30
|
+
|
31
|
+
extra_rdoc_files:
|
32
|
+
- LICENSE
|
33
|
+
- README.rdoc
|
34
|
+
files:
|
35
|
+
- .document
|
36
|
+
- .gitignore
|
37
|
+
- LICENSE
|
38
|
+
- README.rdoc
|
39
|
+
- Rakefile
|
40
|
+
- VERSION
|
41
|
+
- bin/vocab_counter
|
42
|
+
- lib/vocab_counter.rb
|
43
|
+
- spec/spec.opts
|
44
|
+
- spec/spec_helper.rb
|
45
|
+
- spec/vocab_counter_spec.rb
|
46
|
+
- vocab_counter.gemspec
|
47
|
+
has_rdoc: true
|
48
|
+
homepage: http://github.com/aubergene/vocab_counter
|
49
|
+
licenses: []
|
50
|
+
|
51
|
+
post_install_message:
|
52
|
+
rdoc_options:
|
53
|
+
- --charset=UTF-8
|
54
|
+
require_paths:
|
55
|
+
- lib
|
56
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
57
|
+
requirements:
|
58
|
+
- - ">="
|
59
|
+
- !ruby/object:Gem::Version
|
60
|
+
version: "0"
|
61
|
+
version:
|
62
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
63
|
+
requirements:
|
64
|
+
- - ">="
|
65
|
+
- !ruby/object:Gem::Version
|
66
|
+
version: "0"
|
67
|
+
version:
|
68
|
+
requirements: []
|
69
|
+
|
70
|
+
rubyforge_project:
|
71
|
+
rubygems_version: 1.3.5
|
72
|
+
signing_key:
|
73
|
+
specification_version: 3
|
74
|
+
summary: Count the number of unique terms in a given text
|
75
|
+
test_files:
|
76
|
+
- spec/spec_helper.rb
|
77
|
+
- spec/vocab_counter_spec.rb
|