wordfreq 0.1.4

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1 @@
1
+ pkg/*
data/.rspec ADDED
@@ -0,0 +1 @@
1
+ --colour
data/Gemfile ADDED
@@ -0,0 +1,5 @@
1
+ source 'https://rubygems.org'
2
+ gem 'rspec'
3
+
4
+ # Specify your gem's dependencies in wordfreq.gemspec
5
+ gemspec
@@ -0,0 +1,24 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ wordfreq (0.1.4)
5
+
6
+ GEM
7
+ remote: https://rubygems.org/
8
+ specs:
9
+ diff-lcs (1.1.3)
10
+ rspec (2.11.0)
11
+ rspec-core (~> 2.11.0)
12
+ rspec-expectations (~> 2.11.0)
13
+ rspec-mocks (~> 2.11.0)
14
+ rspec-core (2.11.1)
15
+ rspec-expectations (2.11.3)
16
+ diff-lcs (~> 1.1.3)
17
+ rspec-mocks (2.11.3)
18
+
19
+ PLATFORMS
20
+ ruby
21
+
22
+ DEPENDENCIES
23
+ rspec
24
+ wordfreq!
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2012 Tobi Lehman
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,84 @@
1
+ # wordfreq
2
+
3
+ `wordfreq` is a simple command line that calculates word frequencies.
4
+ It also supports character frequencies, with the `-c` option.
5
+
6
+ ## Installation
7
+
8
+ Add this line to your application's Gemfile:
9
+
10
+ gem 'wordfreq'
11
+
12
+ And then execute:
13
+
14
+ $ bundle
15
+
16
+ Or install it yourself as:
17
+
18
+ $ gem install wordfreq
19
+
20
+ ## Usage
21
+
22
+ `wordfreq` is used in a similar manner to the common Unix utilities, such as `wc`:
23
+
24
+ ### Use with a filename argument
25
+
26
+ $ wordfreq filename.txt
27
+ foo 17
28
+ bar 11
29
+ is 10
30
+ vim 4
31
+ baz 3
32
+ something 1
33
+
34
+ ### Use with standard input
35
+
36
+ $ echo 'this is what it is not' | wordfreq
37
+ is 2
38
+ this 1
39
+ what 1
40
+ it 1
41
+ not 1
42
+
43
+ ### Use for character frequency
44
+
45
+ $ wordfreq -c filename.txt
46
+ e 29366
47
+ s 20795
48
+ i 19379
49
+ a 19092
50
+ r 17799
51
+ t 16180
52
+ o 15789
53
+ n 15547
54
+ l 12466
55
+ d 9860
56
+ u 9082
57
+ c 8945
58
+ p 7045
59
+ m 6870
60
+ g 6818
61
+ h 6667
62
+ b 5130
63
+ f 4439
64
+ y 4103
65
+ w 2976
66
+ k 2763
67
+ v 2174
68
+ x 1176
69
+ z 801
70
+ j 552
71
+ q 529
72
+
73
+ ## TODO
74
+
75
+ - profile program to find bottlenecks
76
+
77
+ ## Contributing
78
+
79
+ 1. Fork it
80
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
81
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
82
+ 4. Push to the branch (`git push origin my-new-feature`)
83
+ 5. Create new Pull Request
84
+ 6. Buy me coffee, or beer. Either are good.
@@ -0,0 +1,8 @@
1
+ require "bundler/gem_tasks"
2
+
3
+ task :default => :spec
4
+
5
+ task :spec do
6
+ sh "rspec spec"
7
+ end
8
+
@@ -0,0 +1,26 @@
1
+ #!/usr/bin/env ruby
2
+ require 'wordfreq'
3
+
4
+ if (ARGV.member? '-c')
5
+ delim = :chars
6
+ ARGV.delete_if { |a| a == '-c' }
7
+ else
8
+ delim = :words
9
+ end
10
+
11
+ # ARGF is awesome, it reads files whose names were passed in,
12
+ # or it reads from STDIN
13
+ content = ARGF.read
14
+
15
+ wf = Wordfreq.calculate(content, delim).sort_by { |k,v| -v.to_i }
16
+ max_len = wf.first.first.length rescue 0
17
+ width = max_len + 2
18
+
19
+
20
+ wf.each do |pair|
21
+ begin
22
+ $stdout.puts(pair[0].ljust(width + 10) + pair[1].to_s)
23
+ rescue Errno::EPIPE
24
+ break
25
+ end
26
+ end
@@ -0,0 +1,26 @@
1
+ require "wordfreq/version"
2
+
3
+ module Wordfreq
4
+ DELIMITERS = {:words => /\b/, :chars => //}
5
+
6
+ def self.calculate(content, delimiter)
7
+ wf = Hash.new
8
+ delim = DELIMITERS.fetch(delimiter)
9
+
10
+ lower_content = content.downcase
11
+
12
+ tokens = if delimiter == :words
13
+ content.split(delim).select do |s|
14
+ s.length > 0 && s =~ /^[a-z]+$/ # wordlike?
15
+ end
16
+ else
17
+ content.split(delim).reject { |c| c =~ /\s/ }
18
+ end
19
+
20
+ tokens.group_by{|s| s }.each do |k,v|
21
+ wf[k] = v.count
22
+ end
23
+
24
+ return wf
25
+ end
26
+ end
@@ -0,0 +1,3 @@
1
+ module Wordfreq
2
+ VERSION = "0.1.4"
3
+ end
@@ -0,0 +1,24 @@
1
+ require_relative '../../lib/wordfreq'
2
+
3
+ describe Wordfreq do
4
+ let (:content) { "it is in a while that it is not around it so it is and it is not" }
5
+
6
+ it "calculates the frequency of words" do
7
+ Wordfreq::calculate(content, :words).should == {"it"=>5, "is"=>4, "in"=>1,
8
+ "a"=>1, "while"=>1, "that"=>1,
9
+ "not"=>2, "around"=>1, "so"=>1,
10
+ "and"=>1}
11
+ end
12
+
13
+ it "calculates the frequency of characters" do
14
+ Wordfreq::calculate(content, :chars).should == {"i"=>11, "t"=>9, " "=>17,
15
+ "s"=>5, "n"=>5, "a"=>4,
16
+ "w"=>1, "h"=>2, "l"=>1,
17
+ "e"=>1, "o"=>4, "r"=>1,
18
+ "u"=>1, "d"=>2}
19
+ end
20
+
21
+ it "raises a key error when delimiter is not supported" do
22
+ lambda { Wordfreq::calculate(content, :bogus) }.should raise_error KeyError
23
+ end
24
+ end
@@ -0,0 +1,19 @@
1
+ # -*- encoding: utf-8 -*-
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'wordfreq/version'
5
+
6
+ Gem::Specification.new do |gem|
7
+ gem.name = "wordfreq"
8
+ gem.version = Wordfreq::VERSION
9
+ gem.authors = ["Tobi Lehman"]
10
+ gem.email = ["tobi.lehman@gmail.com"]
11
+ gem.description = %q{a command-line word frequency calculator}
12
+ gem.summary = %q{what even is a summary}
13
+ gem.homepage = ""
14
+
15
+ gem.files = `git ls-files`.split($/)
16
+ gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
17
+ gem.test_files = gem.files.grep(%r{^(spec|features)/})
18
+ gem.require_paths = ["lib"]
19
+ end
metadata ADDED
@@ -0,0 +1,59 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: wordfreq
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.4
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Tobi Lehman
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-12-30 00:00:00.000000000 Z
13
+ dependencies: []
14
+ description: a command-line word frequency calculator
15
+ email:
16
+ - tobi.lehman@gmail.com
17
+ executables:
18
+ - wordfreq
19
+ extensions: []
20
+ extra_rdoc_files: []
21
+ files:
22
+ - .gitignore
23
+ - .rspec
24
+ - Gemfile
25
+ - Gemfile.lock
26
+ - LICENSE.txt
27
+ - README.md
28
+ - Rakefile
29
+ - bin/wordfreq
30
+ - lib/wordfreq.rb
31
+ - lib/wordfreq/version.rb
32
+ - spec/lib/wordfreq_spec.rb
33
+ - wordfreq.gemspec
34
+ homepage: ''
35
+ licenses: []
36
+ post_install_message:
37
+ rdoc_options: []
38
+ require_paths:
39
+ - lib
40
+ required_ruby_version: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
46
+ required_rubygems_version: !ruby/object:Gem::Requirement
47
+ none: false
48
+ requirements:
49
+ - - ! '>='
50
+ - !ruby/object:Gem::Version
51
+ version: '0'
52
+ requirements: []
53
+ rubyforge_project:
54
+ rubygems_version: 1.8.24
55
+ signing_key:
56
+ specification_version: 3
57
+ summary: what even is a summary
58
+ test_files:
59
+ - spec/lib/wordfreq_spec.rb