wordfreq 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1 @@
1
+ pkg/*
data/.rspec ADDED
@@ -0,0 +1 @@
1
+ --colour
data/Gemfile ADDED
@@ -0,0 +1,5 @@
1
+ source 'https://rubygems.org'
2
+ gem 'rspec'
3
+
4
+ # Specify your gem's dependencies in wordfreq.gemspec
5
+ gemspec
@@ -0,0 +1,24 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ wordfreq (0.1.4)
5
+
6
+ GEM
7
+ remote: https://rubygems.org/
8
+ specs:
9
+ diff-lcs (1.1.3)
10
+ rspec (2.11.0)
11
+ rspec-core (~> 2.11.0)
12
+ rspec-expectations (~> 2.11.0)
13
+ rspec-mocks (~> 2.11.0)
14
+ rspec-core (2.11.1)
15
+ rspec-expectations (2.11.3)
16
+ diff-lcs (~> 1.1.3)
17
+ rspec-mocks (2.11.3)
18
+
19
+ PLATFORMS
20
+ ruby
21
+
22
+ DEPENDENCIES
23
+ rspec
24
+ wordfreq!
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2012 Tobi Lehman
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,84 @@
1
+ # wordfreq
2
+
3
+ `wordfreq` is a simple command line that calculates word frequencies.
4
+ It also supports character frequencies, with the `-c` option.
5
+
6
+ ## Installation
7
+
8
+ Add this line to your application's Gemfile:
9
+
10
+ gem 'wordfreq'
11
+
12
+ And then execute:
13
+
14
+ $ bundle
15
+
16
+ Or install it yourself as:
17
+
18
+ $ gem install wordfreq
19
+
20
+ ## Usage
21
+
22
+ `wordfreq` is used in a similar manner to the common Unix utilities, such as `wc`:
23
+
24
+ ### Use with a filename argument
25
+
26
+ $ wordfreq filename.txt
27
+ foo 17
28
+ bar 11
29
+ is 10
30
+ vim 4
31
+ baz 3
32
+ something 1
33
+
34
+ ### Use with standard input
35
+
36
+ $ echo 'this is what it is not' | wordfreq
37
+ is 2
38
+ this 1
39
+ what 1
40
+ it 1
41
+ not 1
42
+
43
+ ### Use for character frequency
44
+
45
+ $ wordfreq -c filename.txt
46
+ e 29366
47
+ s 20795
48
+ i 19379
49
+ a 19092
50
+ r 17799
51
+ t 16180
52
+ o 15789
53
+ n 15547
54
+ l 12466
55
+ d 9860
56
+ u 9082
57
+ c 8945
58
+ p 7045
59
+ m 6870
60
+ g 6818
61
+ h 6667
62
+ b 5130
63
+ f 4439
64
+ y 4103
65
+ w 2976
66
+ k 2763
67
+ v 2174
68
+ x 1176
69
+ z 801
70
+ j 552
71
+ q 529
72
+
73
+ ## TODO
74
+
75
+ - profile program to find bottlenecks
76
+
77
+ ## Contributing
78
+
79
+ 1. Fork it
80
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
81
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
82
+ 4. Push to the branch (`git push origin my-new-feature`)
83
+ 5. Create new Pull Request
84
+ 6. Buy me coffee, or beer. Either are good.
@@ -0,0 +1,8 @@
1
+ require "bundler/gem_tasks"
2
+
3
+ task :default => :spec
4
+
5
+ task :spec do
6
+ sh "rspec spec"
7
+ end
8
+
@@ -0,0 +1,26 @@
1
+ #!/usr/bin/env ruby
2
+ require 'wordfreq'
3
+
4
+ if (ARGV.member? '-c')
5
+ delim = :chars
6
+ ARGV.delete_if { |a| a == '-c' }
7
+ else
8
+ delim = :words
9
+ end
10
+
11
+ # ARGF is awesome, it reads files whose names were passed in,
12
+ # or it reads from STDIN
13
+ content = ARGF.read
14
+
15
+ wf = Wordfreq.calculate(content, delim).sort_by { |k,v| -v.to_i }
16
+ max_len = wf.first.first.length rescue 0
17
+ width = max_len + 2
18
+
19
+
20
+ wf.each do |pair|
21
+ begin
22
+ $stdout.puts(pair[0].ljust(width + 10) + pair[1].to_s)
23
+ rescue Errno::EPIPE
24
+ break
25
+ end
26
+ end
@@ -0,0 +1,26 @@
1
+ require "wordfreq/version"
2
+
3
+ module Wordfreq
4
+ DELIMITERS = {:words => /\b/, :chars => //}
5
+
6
+ def self.calculate(content, delimiter)
7
+ wf = Hash.new
8
+ delim = DELIMITERS.fetch(delimiter)
9
+
10
+ lower_content = content.downcase
11
+
12
+ tokens = if delimiter == :words
13
+ content.split(delim).select do |s|
14
+ s.length > 0 && s =~ /^[a-z]+$/ # wordlike?
15
+ end
16
+ else
17
+ content.split(delim).reject { |c| c =~ /\s/ }
18
+ end
19
+
20
+ tokens.group_by{|s| s }.each do |k,v|
21
+ wf[k] = v.count
22
+ end
23
+
24
+ return wf
25
+ end
26
+ end
@@ -0,0 +1,3 @@
1
+ module Wordfreq
2
+ VERSION = "0.1.4"
3
+ end
@@ -0,0 +1,24 @@
1
+ require_relative '../../lib/wordfreq'
2
+
3
+ describe Wordfreq do
4
+ let (:content) { "it is in a while that it is not around it so it is and it is not" }
5
+
6
+ it "calculates the frequency of words" do
7
+ Wordfreq::calculate(content, :words).should == {"it"=>5, "is"=>4, "in"=>1,
8
+ "a"=>1, "while"=>1, "that"=>1,
9
+ "not"=>2, "around"=>1, "so"=>1,
10
+ "and"=>1}
11
+ end
12
+
13
+ it "calculates the frequency of characters" do
14
+ Wordfreq::calculate(content, :chars).should == {"i"=>11, "t"=>9, " "=>17,
15
+ "s"=>5, "n"=>5, "a"=>4,
16
+ "w"=>1, "h"=>2, "l"=>1,
17
+ "e"=>1, "o"=>4, "r"=>1,
18
+ "u"=>1, "d"=>2}
19
+ end
20
+
21
+ it "raises a key error when delimiter is not supported" do
22
+ lambda { Wordfreq::calculate(content, :bogus) }.should raise_error KeyError
23
+ end
24
+ end
@@ -0,0 +1,19 @@
1
+ # -*- encoding: utf-8 -*-
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'wordfreq/version'
5
+
6
+ Gem::Specification.new do |gem|
7
+ gem.name = "wordfreq"
8
+ gem.version = Wordfreq::VERSION
9
+ gem.authors = ["Tobi Lehman"]
10
+ gem.email = ["tobi.lehman@gmail.com"]
11
+ gem.description = %q{a command-line word frequency calculator}
12
+ gem.summary = %q{what even is a summary}
13
+ gem.homepage = ""
14
+
15
+ gem.files = `git ls-files`.split($/)
16
+ gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
17
+ gem.test_files = gem.files.grep(%r{^(spec|features)/})
18
+ gem.require_paths = ["lib"]
19
+ end
metadata ADDED
@@ -0,0 +1,59 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: wordfreq
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.4
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Tobi Lehman
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-12-30 00:00:00.000000000 Z
13
+ dependencies: []
14
+ description: a command-line word frequency calculator
15
+ email:
16
+ - tobi.lehman@gmail.com
17
+ executables:
18
+ - wordfreq
19
+ extensions: []
20
+ extra_rdoc_files: []
21
+ files:
22
+ - .gitignore
23
+ - .rspec
24
+ - Gemfile
25
+ - Gemfile.lock
26
+ - LICENSE.txt
27
+ - README.md
28
+ - Rakefile
29
+ - bin/wordfreq
30
+ - lib/wordfreq.rb
31
+ - lib/wordfreq/version.rb
32
+ - spec/lib/wordfreq_spec.rb
33
+ - wordfreq.gemspec
34
+ homepage: ''
35
+ licenses: []
36
+ post_install_message:
37
+ rdoc_options: []
38
+ require_paths:
39
+ - lib
40
+ required_ruby_version: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
46
+ required_rubygems_version: !ruby/object:Gem::Requirement
47
+ none: false
48
+ requirements:
49
+ - - ! '>='
50
+ - !ruby/object:Gem::Version
51
+ version: '0'
52
+ requirements: []
53
+ rubyforge_project:
54
+ rubygems_version: 1.8.24
55
+ signing_key:
56
+ specification_version: 3
57
+ summary: what even is a summary
58
+ test_files:
59
+ - spec/lib/wordfreq_spec.rb