word_counter 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +7 -0
- data/README.md +2 -0
- data/lib/word_counter/version.rb +1 -1
- data/lib/word_counter.rb +98 -45
- data/spec/word_counter_spec.rb +12 -7
- data/word_counter.gemspec +0 -1
- metadata +3 -16
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 369c75efe673d26c303a3a337ec9ac52374f224a
|
4
|
+
data.tar.gz: 8851cd6e31789b9ffce17a9e202ed168a8e6ffcc
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e4a8b6d8b6347e2389f9da6f2d380094663f65e57a6e364fd7c5b52fb2c08ef4595202ff04c879426a9886e23bd562d46701ba8da432993e4059406bb45522ae
|
7
|
+
data.tar.gz: 8b808a9111151250694eb470ff8472cc9cf5203c101b090e76eeb6dfbe3f4f601b5c6a48a32b42f7c9b946822f3ed48670c71ae1d8ab9e4bb78d7fe12415ac77
|
data/.travis.yml
ADDED
data/README.md
CHANGED
@@ -2,6 +2,8 @@
|
|
2
2
|
|
3
3
|
Counts words from either a file or a website, and prints a report to stdout.
|
4
4
|
|
5
|
+
[](http://travis-ci.org/wulftone/word_counter)
|
6
|
+
|
5
7
|
## Installation
|
6
8
|
|
7
9
|
Add this line to your application's Gemfile:
|
data/lib/word_counter/version.rb
CHANGED
data/lib/word_counter.rb
CHANGED
@@ -1,61 +1,137 @@
|
|
1
1
|
require "word_counter/version"
|
2
|
-
require
|
2
|
+
require "net/http"
|
3
3
|
require 'nokogiri'
|
4
4
|
|
5
5
|
class NoFileError < StandardError; end
|
6
|
+
class NoWebsiteError < StandardError; end
|
6
7
|
|
7
8
|
class WordCounter
|
8
9
|
|
9
10
|
|
10
11
|
##
|
12
|
+
# WordCounter!
|
13
|
+
#
|
11
14
|
# @param filename [String] The path and filename of the file to analyze
|
15
|
+
# @param show_sentences [Boolean] (default: false) If true, WordCounter will print out the sentences which contain the counted word in question
|
12
16
|
def initialize arg, show_sentences = false
|
13
17
|
raise ArgumentError, "Please supply a URL or file path." unless arg
|
14
18
|
@show_sentences = true if show_sentences
|
15
19
|
|
16
20
|
begin
|
17
21
|
# try to open it as a file
|
18
|
-
analyze_file arg
|
22
|
+
@hashified_words = WordCounter.analyze_file arg
|
19
23
|
rescue NoFileError => e
|
20
24
|
# try to analyze it as a website, so curl it
|
21
|
-
analyze_website arg
|
25
|
+
@hashified_words = WordCounter.analyze_website arg
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
|
30
|
+
##
|
31
|
+
# Helper method
|
32
|
+
def show_sentences?
|
33
|
+
@show_sentences
|
34
|
+
end
|
35
|
+
|
36
|
+
|
37
|
+
##
|
38
|
+
# Prints a report to stdout
|
39
|
+
def report
|
40
|
+
hashified_words_with_sorted_lines = @hashified_words.each do |word, data|
|
41
|
+
# data[:lines].sort
|
42
|
+
end
|
43
|
+
|
44
|
+
sorted_hash = hashified_words_with_sorted_lines.sort_by { |word, data|
|
45
|
+
[-data[:count], word]
|
46
|
+
}
|
47
|
+
|
48
|
+
sorted_hash.each do |word, data|
|
49
|
+
puts "#{data[:count]} #{word}"
|
50
|
+
puts " #{data[:lines].join("\n ")}" if show_sentences?
|
22
51
|
end
|
23
52
|
end
|
24
53
|
|
25
54
|
|
26
|
-
|
27
|
-
|
28
|
-
|
55
|
+
##
|
56
|
+
# Fetch a url
|
57
|
+
#
|
58
|
+
# @param uri_str [String] A URI
|
59
|
+
def self.fetch(uri_str, limit = 10)
|
60
|
+
raise ArgumentError, 'too many HTTP redirects' if limit == 0
|
61
|
+
|
62
|
+
uri = URI uri_str
|
63
|
+
response = Net::HTTP.get_response uri
|
64
|
+
|
65
|
+
case response
|
66
|
+
when Net::HTTPSuccess then
|
67
|
+
response
|
68
|
+
when Net::HTTPRedirection then
|
69
|
+
location = response['location']
|
70
|
+
warn "redirected to #{location}"
|
71
|
+
fetch(location, limit - 1)
|
72
|
+
else
|
73
|
+
response.value
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
|
78
|
+
##
|
79
|
+
# Prepends an http:// if there isn't one.
|
80
|
+
#
|
81
|
+
# @param arg [String]
|
82
|
+
def self.urlize arg
|
83
|
+
if arg =~ /^(http:\/\/|https:\/\/)/
|
84
|
+
arg
|
85
|
+
else
|
86
|
+
"http://#{arg}"
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
29
90
|
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
91
|
+
##
|
92
|
+
# Vists a website and analyzes it
|
93
|
+
#
|
94
|
+
# @param arg [String] A website URL
|
95
|
+
def self.analyze_website arg
|
96
|
+
url = WordCounter.urlize arg
|
97
|
+
res = WordCounter.fetch url
|
98
|
+
raise NoWebsiteError unless res.code == '200'
|
99
|
+
|
100
|
+
doc = Nokogiri::HTML res.body
|
101
|
+
doc.search('script').remove
|
102
|
+
doc.search('meta').remove
|
103
|
+
doc.search('style').remove
|
104
|
+
text = doc.text
|
105
|
+
hashify_words text
|
35
106
|
end
|
36
107
|
|
37
108
|
|
38
109
|
|
39
|
-
|
40
|
-
|
110
|
+
##
|
111
|
+
# Opens a file and analyzes it
|
112
|
+
#
|
113
|
+
# @param file [String] A path to a file
|
114
|
+
def self.analyze_file file
|
115
|
+
raise NoFileError, "File does not exist!" unless File.exist? file
|
116
|
+
|
117
|
+
hashified_words = nil
|
41
118
|
|
42
|
-
@file = File.open
|
43
|
-
|
119
|
+
@file = File.open file do |file|
|
120
|
+
hashified_words = hashify_words file
|
44
121
|
end
|
122
|
+
|
123
|
+
hashified_words
|
45
124
|
end
|
46
125
|
|
47
126
|
##
|
48
127
|
# Builds the data structures we use for our analysis.
|
49
128
|
#
|
50
|
-
# @param
|
51
|
-
def self.hashify_words
|
129
|
+
# @param string [File] The string we're analyzing (notice: can also be a File object, because `each_line` also works with Files.)
|
130
|
+
def self.hashify_words string
|
52
131
|
hash = {}
|
53
132
|
|
54
|
-
|
55
|
-
|
56
|
-
words = line.split(/\W+/)
|
57
|
-
|
58
|
-
words.reject! { |w| w.empty? }
|
133
|
+
string.each_line do |line|
|
134
|
+
words = line.split(/\W+/).reject { |w| w.empty? }
|
59
135
|
|
60
136
|
words.each do |word|
|
61
137
|
sym = word.to_sym
|
@@ -75,27 +151,4 @@ class WordCounter
|
|
75
151
|
|
76
152
|
hash
|
77
153
|
end
|
78
|
-
|
79
|
-
|
80
|
-
def show_sentences?
|
81
|
-
@show_sentences
|
82
|
-
end
|
83
|
-
|
84
|
-
|
85
|
-
##
|
86
|
-
# Prints a report to stdout
|
87
|
-
def report
|
88
|
-
hashified_words_with_sorted_lines = @hashified_words.each do |word, data|
|
89
|
-
data[:lines].sort
|
90
|
-
end
|
91
|
-
|
92
|
-
sorted_hash = hashified_words_with_sorted_lines.sort_by { |word, data|
|
93
|
-
[-data[:count], word]
|
94
|
-
}
|
95
|
-
|
96
|
-
sorted_hash.each do |word, data|
|
97
|
-
puts "#{data[:count]} #{word}"
|
98
|
-
puts " #{data[:lines].join("\n ")}" if show_sentences?
|
99
|
-
end
|
100
|
-
end
|
101
154
|
end
|
data/spec/word_counter_spec.rb
CHANGED
@@ -60,13 +60,6 @@ describe WordCounter do
|
|
60
60
|
end
|
61
61
|
|
62
62
|
|
63
|
-
it '.hashify_words' do
|
64
|
-
File.open test_file do |file|
|
65
|
-
result = WordCounter.hashify_words file
|
66
|
-
result.should eq hash
|
67
|
-
end
|
68
|
-
end
|
69
|
-
|
70
63
|
it '#analyze_website' do
|
71
64
|
wc = WordCounter.new 'www.example.com'
|
72
65
|
|
@@ -102,4 +95,16 @@ describe WordCounter do
|
|
102
95
|
1 without
|
103
96
|
"
|
104
97
|
end
|
98
|
+
|
99
|
+
|
100
|
+
it '.urlize' do
|
101
|
+
url = WordCounter.urlize 'example.com'
|
102
|
+
url.should eq 'http://example.com'
|
103
|
+
url = WordCounter.urlize 'https://example.com'
|
104
|
+
url.should eq 'https://example.com'
|
105
|
+
url = WordCounter.urlize 'http://example.com'
|
106
|
+
url.should eq 'http://example.com'
|
107
|
+
end
|
108
|
+
|
109
|
+
|
105
110
|
end
|
data/word_counter.gemspec
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: word_counter
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- trevor bortins
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-03-
|
11
|
+
date: 2014-03-26 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -80,20 +80,6 @@ dependencies:
|
|
80
80
|
- - ">="
|
81
81
|
- !ruby/object:Gem::Version
|
82
82
|
version: '0'
|
83
|
-
- !ruby/object:Gem::Dependency
|
84
|
-
name: curb
|
85
|
-
requirement: !ruby/object:Gem::Requirement
|
86
|
-
requirements:
|
87
|
-
- - ">="
|
88
|
-
- !ruby/object:Gem::Version
|
89
|
-
version: '0'
|
90
|
-
type: :runtime
|
91
|
-
prerelease: false
|
92
|
-
version_requirements: !ruby/object:Gem::Requirement
|
93
|
-
requirements:
|
94
|
-
- - ">="
|
95
|
-
- !ruby/object:Gem::Version
|
96
|
-
version: '0'
|
97
83
|
- !ruby/object:Gem::Dependency
|
98
84
|
name: nokogiri
|
99
85
|
requirement: !ruby/object:Gem::Requirement
|
@@ -118,6 +104,7 @@ extra_rdoc_files: []
|
|
118
104
|
files:
|
119
105
|
- ".gitignore"
|
120
106
|
- ".rspec"
|
107
|
+
- ".travis.yml"
|
121
108
|
- Gemfile
|
122
109
|
- Guardfile
|
123
110
|
- LICENSE.txt
|