word_counter 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 95867ff9818214b5df3ff16612478e0eb6486488
4
+ data.tar.gz: 5184bffd7a37d26afb7b9d4d49fe76fe3047a85e
5
+ SHA512:
6
+ metadata.gz: 9ec9744709d1b8adf0a92955626cb18174311c646af7d72c9ef42c2518a2f4ca6d1c002353e162cc397af36fc769c12b94feed180e56fafce629ed15843d6f41
7
+ data.tar.gz: 31923b381313a371ef1d78b26633a629f8226f00b164a84d3192bbb81454c91d6687308d94af474fa0b71a9d31b86ac5a433aed0289640e118e3da594acc24ee
data/.gitignore ADDED
@@ -0,0 +1,17 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
data/.rspec ADDED
@@ -0,0 +1 @@
1
+ --color
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in word_counter.gemspec
4
+ gemspec
data/Guardfile ADDED
@@ -0,0 +1,10 @@
1
+ #!/usr/bin/ruby
2
+ # A sample Guardfile
3
+ # More info at https://github.com/guard/guard#readme
4
+
5
+ guard :rspec, failed_mode: :none do
6
+ watch(%r{^spec/.+_spec\.rb$})
7
+ watch(%r{^spec/.+\.txt$}) { |m| "spec" }
8
+ watch(%r{^lib/(.+)\.rb$}) { |m| "spec/#{m[1]}_spec.rb" }
9
+ # watch('spec/spec_helper.rb') { "spec" }
10
+ end
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2014 wulftone
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,44 @@
1
+ # WordCounter
2
+
3
+ Counts words from either a file or a website, and prints a report to stdout.
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ gem 'word_counter'
10
+
11
+ And then execute:
12
+
13
+ $ bundle
14
+
15
+ Or install it yourself as:
16
+
17
+ $ gem install word_counter
18
+
19
+ ## Usage
20
+
21
+ To count a website's words:
22
+
23
+ $ word_counter www.example.com
24
+
25
+ To count a file's words:
26
+
27
+ $ word_counter ./path/to/my/file.txt
28
+
29
+ Use the `-s` switch to also report which lines contained the counted word (can result in lot of text output, so you might want to pipe it to `less`):
30
+
31
+ $ word_counter www.example.com -s | less
32
+
33
+ ## Roadmap
34
+
35
+ - Color
36
+ - More flexible options
37
+
38
+ ## Contributing
39
+
40
+ 1. Fork it ( http://github.com/wulftone/word_counter/fork )
41
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
42
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
43
+ 4. Push to the branch (`git push origin my-new-feature`)
44
+ 5. Create new Pull Request
data/Rakefile ADDED
@@ -0,0 +1 @@
1
+ require "bundler/gem_tasks"
data/bin/word_counter ADDED
@@ -0,0 +1,7 @@
1
+ #!/usr/bin/ruby
2
+
3
+ require 'word_counter'
4
+
5
+ show_sentences = ARGV[1] == '-s'
6
+ wc = WordCounter.new ARGV[0], show_sentences
7
+ wc.report
@@ -0,0 +1,3 @@
1
+ class WordCounter
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,100 @@
1
+ require "word_counter/version"
2
+ require 'curb'
3
+ require 'nokogiri'
4
+
5
+ class NoFileError < StandardError; end
6
+
7
+ class WordCounter
8
+
9
+
10
+ ##
11
+ # @param filename [String] The path and filename of the file to analyze
12
+ def initialize arg, show_sentences = false
13
+ @show_sentences = true if show_sentences
14
+
15
+ begin
16
+ # try to open it as a file
17
+ analyze_file arg
18
+ rescue NoFileError => e
19
+ # try to analyze it as a website, so curl it
20
+ analyze_website arg
21
+ end
22
+ end
23
+
24
+
25
+ def analyze_website url
26
+ http = Curl.get url
27
+ html = Nokogiri::HTML http.body_str
28
+
29
+ html.search('script').remove
30
+ html.search('meta').remove
31
+ html.search('style').remove
32
+ text = html.text
33
+ @hashified_words = WordCounter.hashify_words text
34
+ end
35
+
36
+
37
+
38
+ def analyze_file filename
39
+ raise NoFileError, "File does not exist!" unless File.exist? filename
40
+
41
+ @file = File.open filename do |file|
42
+ @hashified_words = WordCounter.hashify_words file
43
+ end
44
+ end
45
+
46
+ ##
47
+ # Builds the data structures we use for our analysis.
48
+ #
49
+ # @param file [File] The file we're analyzing
50
+ def self.hashify_words file
51
+ hash = {}
52
+
53
+ file.each_line do |line|
54
+ # words = line.split
55
+ words = line.split(/\W+/)
56
+
57
+ words.reject! { |w| w.empty? }
58
+
59
+ words.each do |word|
60
+ sym = word.to_sym
61
+
62
+ if hash[sym] == nil
63
+ hash[sym] = {
64
+ count: 1,
65
+ lines: [line.strip]
66
+ }
67
+ else
68
+ hash[sym][:count] += 1
69
+ hash[sym][:lines].push(line.strip).uniq!
70
+ hash[sym][:lines].sort!
71
+ end
72
+ end
73
+ end
74
+
75
+ hash
76
+ end
77
+
78
+
79
+ def show_sentences?
80
+ @show_sentences
81
+ end
82
+
83
+
84
+ ##
85
+ # Prints a report to stdout
86
+ def report
87
+ hashified_words_with_sorted_lines = @hashified_words.each do |word, data|
88
+ data[:lines].sort
89
+ end
90
+
91
+ sorted_hash = hashified_words_with_sorted_lines.sort_by { |word, data|
92
+ [-data[:count], word]
93
+ }
94
+
95
+ sorted_hash.each do |word, data|
96
+ puts "#{data[:count]} #{word}"
97
+ puts " #{data[:lines].join("\n ")}" if show_sentences?
98
+ end
99
+ end
100
+ end
data/spec/test.txt ADDED
@@ -0,0 +1,5 @@
1
+ sniff sniff honk
2
+ honk woof bark
3
+ bark woof woof snort
4
+ snort bark woof
5
+
@@ -0,0 +1,105 @@
1
+ require_relative '../lib/word_counter'
2
+
3
+ module CaptureStdout
4
+ def capture_stdout(&blk)
5
+ old = $stdout
6
+ $stdout = fake = StringIO.new
7
+ blk.call
8
+ fake.string
9
+ ensure
10
+ $stdout = old
11
+ end
12
+
13
+ def capture_stderr(&blk)
14
+ old = $stderr
15
+ $stderr = fake = StringIO.new
16
+ blk.call
17
+ fake.string
18
+ ensure
19
+ $stderr = old
20
+ end
21
+ end
22
+
23
+ describe WordCounter do
24
+ include CaptureStdout
25
+ let(:test_file) { './spec/test.txt' }
26
+ let(:hash) {
27
+ {
28
+ :bark => {:count=>3, :lines=>["bark woof woof snort", "honk woof bark", "snort bark woof"]},
29
+ :honk => {:count=>2, :lines=>["honk woof bark", "sniff sniff honk"]},
30
+ :sniff => {:count=>2, :lines=>["sniff sniff honk"]},
31
+ :snort => {:count=>2, :lines=>["bark woof woof snort", "snort bark woof"]},
32
+ :woof => {:count=>4, :lines=>["bark woof woof snort", "honk woof bark", "snort bark woof"]}
33
+ }
34
+ }
35
+
36
+ it '#report' do
37
+ wc = WordCounter.new test_file, true
38
+
39
+ printed = capture_stdout do
40
+ wc.report
41
+ end
42
+
43
+ printed.should eq "4 woof
44
+ bark woof woof snort
45
+ honk woof bark
46
+ snort bark woof
47
+ 3 bark
48
+ bark woof woof snort
49
+ honk woof bark
50
+ snort bark woof
51
+ 2 honk
52
+ honk woof bark
53
+ sniff sniff honk
54
+ 2 sniff
55
+ sniff sniff honk
56
+ 2 snort
57
+ bark woof woof snort
58
+ snort bark woof
59
+ "
60
+ end
61
+
62
+
63
+ it '.hashify_words' do
64
+ File.open test_file do |file|
65
+ result = WordCounter.hashify_words file
66
+ result.should eq hash
67
+ end
68
+ end
69
+
70
+ it '#analyze_website' do
71
+ wc = WordCounter.new 'www.example.com'
72
+
73
+ printed = capture_stdout do
74
+ wc.report
75
+ end
76
+
77
+ printed.should eq "2 Domain
78
+ 2 Example
79
+ 2 domain
80
+ 2 examples
81
+ 2 for
82
+ 2 in
83
+ 1 More
84
+ 1 This
85
+ 1 You
86
+ 1 asking
87
+ 1 be
88
+ 1 coordination
89
+ 1 documents
90
+ 1 established
91
+ 1 illustrative
92
+ 1 information
93
+ 1 is
94
+ 1 may
95
+ 1 or
96
+ 1 permission
97
+ 1 prior
98
+ 1 this
99
+ 1 to
100
+ 1 use
101
+ 1 used
102
+ 1 without
103
+ "
104
+ end
105
+ end
data/word_counter ADDED
@@ -0,0 +1,2 @@
1
+ #!/bin/bash
2
+ ruby -Ilib ./bin/word_counter "$@"
@@ -0,0 +1,29 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'word_counter/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "word_counter"
8
+ spec.version = WordCounter::VERSION
9
+ spec.authors = ["trevor bortins"]
10
+ spec.email = ["trevor.bortins@gmail.com"]
11
+ spec.summary = %q{Counts words in a file and prints them out in interesting ways.}
12
+ spec.description = %q{Counts words in a file and prints them out in interesting ways.}
13
+ spec.homepage = "https://github.com/wulftone/word_counter"
14
+ spec.license = "MIT"
15
+
16
+ spec.files = `git ls-files -z`.split("\x0")
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_development_dependency "bundler", "~> 1.5"
22
+ spec.add_development_dependency "rake"
23
+ spec.add_development_dependency "rspec"
24
+ spec.add_development_dependency "guard-rspec"
25
+ spec.add_development_dependency "pry-debugger"
26
+
27
+ spec.add_dependency "curb"
28
+ spec.add_dependency "nokogiri"
29
+ end
metadata ADDED
@@ -0,0 +1,160 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: word_counter
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - trevor bortins
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-03-25 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.5'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.5'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rspec
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: guard-rspec
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: pry-debugger
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: curb
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ type: :runtime
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ">="
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
97
+ - !ruby/object:Gem::Dependency
98
+ name: nokogiri
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - ">="
102
+ - !ruby/object:Gem::Version
103
+ version: '0'
104
+ type: :runtime
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - ">="
109
+ - !ruby/object:Gem::Version
110
+ version: '0'
111
+ description: Counts words in a file and prints them out in interesting ways.
112
+ email:
113
+ - trevor.bortins@gmail.com
114
+ executables:
115
+ - word_counter
116
+ extensions: []
117
+ extra_rdoc_files: []
118
+ files:
119
+ - ".gitignore"
120
+ - ".rspec"
121
+ - Gemfile
122
+ - Guardfile
123
+ - LICENSE.txt
124
+ - README.md
125
+ - Rakefile
126
+ - bin/word_counter
127
+ - lib/word_counter.rb
128
+ - lib/word_counter/version.rb
129
+ - spec/test.txt
130
+ - spec/word_counter_spec.rb
131
+ - word_counter
132
+ - word_counter.gemspec
133
+ homepage: https://github.com/wulftone/word_counter
134
+ licenses:
135
+ - MIT
136
+ metadata: {}
137
+ post_install_message:
138
+ rdoc_options: []
139
+ require_paths:
140
+ - lib
141
+ required_ruby_version: !ruby/object:Gem::Requirement
142
+ requirements:
143
+ - - ">="
144
+ - !ruby/object:Gem::Version
145
+ version: '0'
146
+ required_rubygems_version: !ruby/object:Gem::Requirement
147
+ requirements:
148
+ - - ">="
149
+ - !ruby/object:Gem::Version
150
+ version: '0'
151
+ requirements: []
152
+ rubyforge_project:
153
+ rubygems_version: 2.2.2
154
+ signing_key:
155
+ specification_version: 4
156
+ summary: Counts words in a file and prints them out in interesting ways.
157
+ test_files:
158
+ - spec/test.txt
159
+ - spec/word_counter_spec.rb
160
+ has_rdoc: