word_counter 0.1.0 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.travis.yml +7 -0
- data/README.md +2 -0
- data/lib/word_counter/version.rb +1 -1
- data/lib/word_counter.rb +98 -45
- data/spec/word_counter_spec.rb +12 -7
- data/word_counter.gemspec +0 -1
- metadata +3 -16
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 369c75efe673d26c303a3a337ec9ac52374f224a
|
4
|
+
data.tar.gz: 8851cd6e31789b9ffce17a9e202ed168a8e6ffcc
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e4a8b6d8b6347e2389f9da6f2d380094663f65e57a6e364fd7c5b52fb2c08ef4595202ff04c879426a9886e23bd562d46701ba8da432993e4059406bb45522ae
|
7
|
+
data.tar.gz: 8b808a9111151250694eb470ff8472cc9cf5203c101b090e76eeb6dfbe3f4f601b5c6a48a32b42f7c9b946822f3ed48670c71ae1d8ab9e4bb78d7fe12415ac77
|
data/.travis.yml
ADDED
data/README.md
CHANGED
@@ -2,6 +2,8 @@
|
|
2
2
|
|
3
3
|
Counts words from either a file or a website, and prints a report to stdout.
|
4
4
|
|
5
|
+
[![Build Status](https://api.travis-ci.org/wulftone/word_counter.svg?branch=master)](http://travis-ci.org/wulftone/word_counter)
|
6
|
+
|
5
7
|
## Installation
|
6
8
|
|
7
9
|
Add this line to your application's Gemfile:
|
data/lib/word_counter/version.rb
CHANGED
data/lib/word_counter.rb
CHANGED
@@ -1,61 +1,137 @@
|
|
1
1
|
require "word_counter/version"
|
2
|
-
require
|
2
|
+
require "net/http"
|
3
3
|
require 'nokogiri'
|
4
4
|
|
5
5
|
class NoFileError < StandardError; end
|
6
|
+
class NoWebsiteError < StandardError; end
|
6
7
|
|
7
8
|
class WordCounter
|
8
9
|
|
9
10
|
|
10
11
|
##
|
12
|
+
# WordCounter!
|
13
|
+
#
|
11
14
|
# @param filename [String] The path and filename of the file to analyze
|
15
|
+
# @param show_sentences [Boolean] (default: false) If true, WordCounter will print out the sentences which contain the counted word in question
|
12
16
|
def initialize arg, show_sentences = false
|
13
17
|
raise ArgumentError, "Please supply a URL or file path." unless arg
|
14
18
|
@show_sentences = true if show_sentences
|
15
19
|
|
16
20
|
begin
|
17
21
|
# try to open it as a file
|
18
|
-
analyze_file arg
|
22
|
+
@hashified_words = WordCounter.analyze_file arg
|
19
23
|
rescue NoFileError => e
|
20
24
|
# try to analyze it as a website, so curl it
|
21
|
-
analyze_website arg
|
25
|
+
@hashified_words = WordCounter.analyze_website arg
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
|
30
|
+
##
|
31
|
+
# Helper method
|
32
|
+
def show_sentences?
|
33
|
+
@show_sentences
|
34
|
+
end
|
35
|
+
|
36
|
+
|
37
|
+
##
|
38
|
+
# Prints a report to stdout
|
39
|
+
def report
|
40
|
+
hashified_words_with_sorted_lines = @hashified_words.each do |word, data|
|
41
|
+
# data[:lines].sort
|
42
|
+
end
|
43
|
+
|
44
|
+
sorted_hash = hashified_words_with_sorted_lines.sort_by { |word, data|
|
45
|
+
[-data[:count], word]
|
46
|
+
}
|
47
|
+
|
48
|
+
sorted_hash.each do |word, data|
|
49
|
+
puts "#{data[:count]} #{word}"
|
50
|
+
puts " #{data[:lines].join("\n ")}" if show_sentences?
|
22
51
|
end
|
23
52
|
end
|
24
53
|
|
25
54
|
|
26
|
-
|
27
|
-
|
28
|
-
|
55
|
+
##
|
56
|
+
# Fetch a url
|
57
|
+
#
|
58
|
+
# @param uri_str [String] A URI
|
59
|
+
def self.fetch(uri_str, limit = 10)
|
60
|
+
raise ArgumentError, 'too many HTTP redirects' if limit == 0
|
61
|
+
|
62
|
+
uri = URI uri_str
|
63
|
+
response = Net::HTTP.get_response uri
|
64
|
+
|
65
|
+
case response
|
66
|
+
when Net::HTTPSuccess then
|
67
|
+
response
|
68
|
+
when Net::HTTPRedirection then
|
69
|
+
location = response['location']
|
70
|
+
warn "redirected to #{location}"
|
71
|
+
fetch(location, limit - 1)
|
72
|
+
else
|
73
|
+
response.value
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
|
78
|
+
##
|
79
|
+
# Prepends an http:// if there isn't one.
|
80
|
+
#
|
81
|
+
# @param arg [String]
|
82
|
+
def self.urlize arg
|
83
|
+
if arg =~ /^(http:\/\/|https:\/\/)/
|
84
|
+
arg
|
85
|
+
else
|
86
|
+
"http://#{arg}"
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
29
90
|
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
91
|
+
##
|
92
|
+
# Vists a website and analyzes it
|
93
|
+
#
|
94
|
+
# @param arg [String] A website URL
|
95
|
+
def self.analyze_website arg
|
96
|
+
url = WordCounter.urlize arg
|
97
|
+
res = WordCounter.fetch url
|
98
|
+
raise NoWebsiteError unless res.code == '200'
|
99
|
+
|
100
|
+
doc = Nokogiri::HTML res.body
|
101
|
+
doc.search('script').remove
|
102
|
+
doc.search('meta').remove
|
103
|
+
doc.search('style').remove
|
104
|
+
text = doc.text
|
105
|
+
hashify_words text
|
35
106
|
end
|
36
107
|
|
37
108
|
|
38
109
|
|
39
|
-
|
40
|
-
|
110
|
+
##
|
111
|
+
# Opens a file and analyzes it
|
112
|
+
#
|
113
|
+
# @param file [String] A path to a file
|
114
|
+
def self.analyze_file file
|
115
|
+
raise NoFileError, "File does not exist!" unless File.exist? file
|
116
|
+
|
117
|
+
hashified_words = nil
|
41
118
|
|
42
|
-
@file = File.open
|
43
|
-
|
119
|
+
@file = File.open file do |file|
|
120
|
+
hashified_words = hashify_words file
|
44
121
|
end
|
122
|
+
|
123
|
+
hashified_words
|
45
124
|
end
|
46
125
|
|
47
126
|
##
|
48
127
|
# Builds the data structures we use for our analysis.
|
49
128
|
#
|
50
|
-
# @param
|
51
|
-
def self.hashify_words
|
129
|
+
# @param string [File] The string we're analyzing (notice: can also be a File object, because `each_line` also works with Files.)
|
130
|
+
def self.hashify_words string
|
52
131
|
hash = {}
|
53
132
|
|
54
|
-
|
55
|
-
|
56
|
-
words = line.split(/\W+/)
|
57
|
-
|
58
|
-
words.reject! { |w| w.empty? }
|
133
|
+
string.each_line do |line|
|
134
|
+
words = line.split(/\W+/).reject { |w| w.empty? }
|
59
135
|
|
60
136
|
words.each do |word|
|
61
137
|
sym = word.to_sym
|
@@ -75,27 +151,4 @@ class WordCounter
|
|
75
151
|
|
76
152
|
hash
|
77
153
|
end
|
78
|
-
|
79
|
-
|
80
|
-
def show_sentences?
|
81
|
-
@show_sentences
|
82
|
-
end
|
83
|
-
|
84
|
-
|
85
|
-
##
|
86
|
-
# Prints a report to stdout
|
87
|
-
def report
|
88
|
-
hashified_words_with_sorted_lines = @hashified_words.each do |word, data|
|
89
|
-
data[:lines].sort
|
90
|
-
end
|
91
|
-
|
92
|
-
sorted_hash = hashified_words_with_sorted_lines.sort_by { |word, data|
|
93
|
-
[-data[:count], word]
|
94
|
-
}
|
95
|
-
|
96
|
-
sorted_hash.each do |word, data|
|
97
|
-
puts "#{data[:count]} #{word}"
|
98
|
-
puts " #{data[:lines].join("\n ")}" if show_sentences?
|
99
|
-
end
|
100
|
-
end
|
101
154
|
end
|
data/spec/word_counter_spec.rb
CHANGED
@@ -60,13 +60,6 @@ describe WordCounter do
|
|
60
60
|
end
|
61
61
|
|
62
62
|
|
63
|
-
it '.hashify_words' do
|
64
|
-
File.open test_file do |file|
|
65
|
-
result = WordCounter.hashify_words file
|
66
|
-
result.should eq hash
|
67
|
-
end
|
68
|
-
end
|
69
|
-
|
70
63
|
it '#analyze_website' do
|
71
64
|
wc = WordCounter.new 'www.example.com'
|
72
65
|
|
@@ -102,4 +95,16 @@ describe WordCounter do
|
|
102
95
|
1 without
|
103
96
|
"
|
104
97
|
end
|
98
|
+
|
99
|
+
|
100
|
+
it '.urlize' do
|
101
|
+
url = WordCounter.urlize 'example.com'
|
102
|
+
url.should eq 'http://example.com'
|
103
|
+
url = WordCounter.urlize 'https://example.com'
|
104
|
+
url.should eq 'https://example.com'
|
105
|
+
url = WordCounter.urlize 'http://example.com'
|
106
|
+
url.should eq 'http://example.com'
|
107
|
+
end
|
108
|
+
|
109
|
+
|
105
110
|
end
|
data/word_counter.gemspec
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: word_counter
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- trevor bortins
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-03-
|
11
|
+
date: 2014-03-26 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -80,20 +80,6 @@ dependencies:
|
|
80
80
|
- - ">="
|
81
81
|
- !ruby/object:Gem::Version
|
82
82
|
version: '0'
|
83
|
-
- !ruby/object:Gem::Dependency
|
84
|
-
name: curb
|
85
|
-
requirement: !ruby/object:Gem::Requirement
|
86
|
-
requirements:
|
87
|
-
- - ">="
|
88
|
-
- !ruby/object:Gem::Version
|
89
|
-
version: '0'
|
90
|
-
type: :runtime
|
91
|
-
prerelease: false
|
92
|
-
version_requirements: !ruby/object:Gem::Requirement
|
93
|
-
requirements:
|
94
|
-
- - ">="
|
95
|
-
- !ruby/object:Gem::Version
|
96
|
-
version: '0'
|
97
83
|
- !ruby/object:Gem::Dependency
|
98
84
|
name: nokogiri
|
99
85
|
requirement: !ruby/object:Gem::Requirement
|
@@ -118,6 +104,7 @@ extra_rdoc_files: []
|
|
118
104
|
files:
|
119
105
|
- ".gitignore"
|
120
106
|
- ".rspec"
|
107
|
+
- ".travis.yml"
|
121
108
|
- Gemfile
|
122
109
|
- Guardfile
|
123
110
|
- LICENSE.txt
|