word_counter 0.1.0 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 5c147e993b823171dd15fde073ea9b756e90f9f2
4
- data.tar.gz: ac1b7049265125eecc4e8bc2ba0667aa240480be
3
+ metadata.gz: 369c75efe673d26c303a3a337ec9ac52374f224a
4
+ data.tar.gz: 8851cd6e31789b9ffce17a9e202ed168a8e6ffcc
5
5
  SHA512:
6
- metadata.gz: 08f066c3857a101ef68d26ff4d83a80ca0cf3857a38761abc5c9b862debddd31d0771293ef4b08a80ac84a540254f125c5c3792aaa0030dfa159645ae3c00ebd
7
- data.tar.gz: 4dffa4cf7ff6ccc62df4b4181ff142555b780fbe5a02dc175a29358790bbefdfca99800ce0023a9efff271f76c7a29b3c51a854c6a5546b55a555c305313dbbc
6
+ metadata.gz: e4a8b6d8b6347e2389f9da6f2d380094663f65e57a6e364fd7c5b52fb2c08ef4595202ff04c879426a9886e23bd562d46701ba8da432993e4059406bb45522ae
7
+ data.tar.gz: 8b808a9111151250694eb470ff8472cc9cf5203c101b090e76eeb6dfbe3f4f601b5c6a48a32b42f7c9b946822f3ed48670c71ae1d8ab9e4bb78d7fe12415ac77
data/.travis.yml ADDED
@@ -0,0 +1,7 @@
1
+ language: ruby
2
+ rvm:
3
+ - "1.9.3"
4
+ - "2.0.0"
5
+ - "2.1.0"
6
+ - "2.1.1"
7
+ script: bundle exec rspec spec
data/README.md CHANGED
@@ -2,6 +2,8 @@
2
2
 
3
3
  Counts words from either a file or a website, and prints a report to stdout.
4
4
 
5
+ [![Build Status](https://api.travis-ci.org/wulftone/word_counter.svg?branch=master)](http://travis-ci.org/wulftone/word_counter)
6
+
5
7
  ## Installation
6
8
 
7
9
  Add this line to your application's Gemfile:
@@ -1,3 +1,3 @@
1
1
  class WordCounter
2
- VERSION = "0.1.0"
2
+ VERSION = "0.2.0"
3
3
  end
data/lib/word_counter.rb CHANGED
@@ -1,61 +1,137 @@
1
1
  require "word_counter/version"
2
- require 'curb'
2
+ require "net/http"
3
3
  require 'nokogiri'
4
4
 
5
5
  class NoFileError < StandardError; end
6
+ class NoWebsiteError < StandardError; end
6
7
 
7
8
  class WordCounter
8
9
 
9
10
 
10
11
  ##
12
+ # WordCounter!
13
+ #
11
14
  # @param filename [String] The path and filename of the file to analyze
15
+ # @param show_sentences [Boolean] (default: false) If true, WordCounter will print out the sentences which contain the counted word in question
12
16
  def initialize arg, show_sentences = false
13
17
  raise ArgumentError, "Please supply a URL or file path." unless arg
14
18
  @show_sentences = true if show_sentences
15
19
 
16
20
  begin
17
21
  # try to open it as a file
18
- analyze_file arg
22
+ @hashified_words = WordCounter.analyze_file arg
19
23
  rescue NoFileError => e
20
24
  # try to analyze it as a website, so curl it
21
- analyze_website arg
25
+ @hashified_words = WordCounter.analyze_website arg
26
+ end
27
+ end
28
+
29
+
30
+ ##
31
+ # Helper method
32
+ def show_sentences?
33
+ @show_sentences
34
+ end
35
+
36
+
37
+ ##
38
+ # Prints a report to stdout
39
+ def report
40
+ hashified_words_with_sorted_lines = @hashified_words.each do |word, data|
41
+ # data[:lines].sort
42
+ end
43
+
44
+ sorted_hash = hashified_words_with_sorted_lines.sort_by { |word, data|
45
+ [-data[:count], word]
46
+ }
47
+
48
+ sorted_hash.each do |word, data|
49
+ puts "#{data[:count]} #{word}"
50
+ puts " #{data[:lines].join("\n ")}" if show_sentences?
22
51
  end
23
52
  end
24
53
 
25
54
 
26
- def analyze_website url
27
- http = Curl.get url
28
- html = Nokogiri::HTML http.body_str
55
+ ##
56
+ # Fetch a url
57
+ #
58
+ # @param uri_str [String] A URI
59
+ def self.fetch(uri_str, limit = 10)
60
+ raise ArgumentError, 'too many HTTP redirects' if limit == 0
61
+
62
+ uri = URI uri_str
63
+ response = Net::HTTP.get_response uri
64
+
65
+ case response
66
+ when Net::HTTPSuccess then
67
+ response
68
+ when Net::HTTPRedirection then
69
+ location = response['location']
70
+ warn "redirected to #{location}"
71
+ fetch(location, limit - 1)
72
+ else
73
+ response.value
74
+ end
75
+ end
76
+
77
+
78
+ ##
79
+ # Prepends an http:// if there isn't one.
80
+ #
81
+ # @param arg [String]
82
+ def self.urlize arg
83
+ if arg =~ /^(http:\/\/|https:\/\/)/
84
+ arg
85
+ else
86
+ "http://#{arg}"
87
+ end
88
+ end
89
+
29
90
 
30
- html.search('script').remove
31
- html.search('meta').remove
32
- html.search('style').remove
33
- text = html.text
34
- @hashified_words = WordCounter.hashify_words text
91
+ ##
92
+ # Vists a website and analyzes it
93
+ #
94
+ # @param arg [String] A website URL
95
+ def self.analyze_website arg
96
+ url = WordCounter.urlize arg
97
+ res = WordCounter.fetch url
98
+ raise NoWebsiteError unless res.code == '200'
99
+
100
+ doc = Nokogiri::HTML res.body
101
+ doc.search('script').remove
102
+ doc.search('meta').remove
103
+ doc.search('style').remove
104
+ text = doc.text
105
+ hashify_words text
35
106
  end
36
107
 
37
108
 
38
109
 
39
- def analyze_file filename
40
- raise NoFileError, "File does not exist!" unless File.exist? filename
110
+ ##
111
+ # Opens a file and analyzes it
112
+ #
113
+ # @param file [String] A path to a file
114
+ def self.analyze_file file
115
+ raise NoFileError, "File does not exist!" unless File.exist? file
116
+
117
+ hashified_words = nil
41
118
 
42
- @file = File.open filename do |file|
43
- @hashified_words = WordCounter.hashify_words file
119
+ @file = File.open file do |file|
120
+ hashified_words = hashify_words file
44
121
  end
122
+
123
+ hashified_words
45
124
  end
46
125
 
47
126
  ##
48
127
  # Builds the data structures we use for our analysis.
49
128
  #
50
- # @param file [File] The file we're analyzing
51
- def self.hashify_words file
129
+ # @param string [File] The string we're analyzing (notice: can also be a File object, because `each_line` also works with Files.)
130
+ def self.hashify_words string
52
131
  hash = {}
53
132
 
54
- file.each_line do |line|
55
- # words = line.split
56
- words = line.split(/\W+/)
57
-
58
- words.reject! { |w| w.empty? }
133
+ string.each_line do |line|
134
+ words = line.split(/\W+/).reject { |w| w.empty? }
59
135
 
60
136
  words.each do |word|
61
137
  sym = word.to_sym
@@ -75,27 +151,4 @@ class WordCounter
75
151
 
76
152
  hash
77
153
  end
78
-
79
-
80
- def show_sentences?
81
- @show_sentences
82
- end
83
-
84
-
85
- ##
86
- # Prints a report to stdout
87
- def report
88
- hashified_words_with_sorted_lines = @hashified_words.each do |word, data|
89
- data[:lines].sort
90
- end
91
-
92
- sorted_hash = hashified_words_with_sorted_lines.sort_by { |word, data|
93
- [-data[:count], word]
94
- }
95
-
96
- sorted_hash.each do |word, data|
97
- puts "#{data[:count]} #{word}"
98
- puts " #{data[:lines].join("\n ")}" if show_sentences?
99
- end
100
- end
101
154
  end
@@ -60,13 +60,6 @@ describe WordCounter do
60
60
  end
61
61
 
62
62
 
63
- it '.hashify_words' do
64
- File.open test_file do |file|
65
- result = WordCounter.hashify_words file
66
- result.should eq hash
67
- end
68
- end
69
-
70
63
  it '#analyze_website' do
71
64
  wc = WordCounter.new 'www.example.com'
72
65
 
@@ -102,4 +95,16 @@ describe WordCounter do
102
95
  1 without
103
96
  "
104
97
  end
98
+
99
+
100
+ it '.urlize' do
101
+ url = WordCounter.urlize 'example.com'
102
+ url.should eq 'http://example.com'
103
+ url = WordCounter.urlize 'https://example.com'
104
+ url.should eq 'https://example.com'
105
+ url = WordCounter.urlize 'http://example.com'
106
+ url.should eq 'http://example.com'
107
+ end
108
+
109
+
105
110
  end
data/word_counter.gemspec CHANGED
@@ -24,6 +24,5 @@ Gem::Specification.new do |spec|
24
24
  spec.add_development_dependency "guard-rspec"
25
25
  spec.add_development_dependency "pry-debugger"
26
26
 
27
- spec.add_dependency "curb"
28
27
  spec.add_dependency "nokogiri"
29
28
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: word_counter
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - trevor bortins
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-03-25 00:00:00.000000000 Z
11
+ date: 2014-03-26 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -80,20 +80,6 @@ dependencies:
80
80
  - - ">="
81
81
  - !ruby/object:Gem::Version
82
82
  version: '0'
83
- - !ruby/object:Gem::Dependency
84
- name: curb
85
- requirement: !ruby/object:Gem::Requirement
86
- requirements:
87
- - - ">="
88
- - !ruby/object:Gem::Version
89
- version: '0'
90
- type: :runtime
91
- prerelease: false
92
- version_requirements: !ruby/object:Gem::Requirement
93
- requirements:
94
- - - ">="
95
- - !ruby/object:Gem::Version
96
- version: '0'
97
83
  - !ruby/object:Gem::Dependency
98
84
  name: nokogiri
99
85
  requirement: !ruby/object:Gem::Requirement
@@ -118,6 +104,7 @@ extra_rdoc_files: []
118
104
  files:
119
105
  - ".gitignore"
120
106
  - ".rspec"
107
+ - ".travis.yml"
121
108
  - Gemfile
122
109
  - Guardfile
123
110
  - LICENSE.txt