utterson 0.1.2 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/utterson +1 -1
- data/lib/utterson.rb +2 -105
- data/lib/utterson/base.rb +49 -0
- data/lib/utterson/html_check.rb +98 -0
- data/lib/utterson/version.rb +3 -0
- data/spec/utterson/base_spec.rb +45 -0
- data/spec/utterson/html_check_spec.rb +156 -0
- metadata +9 -4
- data/spec/utterson_spec.rb +0 -174
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA1:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: b284cf9e9272e1093bd7c4172ef0036750de59e8
|
|
4
|
+
data.tar.gz: 512987cf376f785eb0621c7e374a002c3385cdb8
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: e1aec6ceb0f9748d1f5e378592159e322335214a6f96987bd253e38f2c4c42b6fe9a6c52791a0b9402228e71b5653bd9dfa0760360f3bab733e3a439eeb520c3
|
|
7
|
+
data.tar.gz: 64d65499f25e4c614995eced88b5a052c4623dbd8bbba7024abdfd512b696ecbe4d124b919eca19775e78ffaf2961e8281b2d17147732b5dbfaa9793e84131e4
|
data/bin/utterson
CHANGED
data/lib/utterson.rb
CHANGED
|
@@ -1,105 +1,2 @@
|
|
|
1
|
-
require '
|
|
2
|
-
|
|
3
|
-
require 'net/http'
|
|
4
|
-
require 'timeout'
|
|
5
|
-
|
|
6
|
-
class Utterson
|
|
7
|
-
attr_reader :errors
|
|
8
|
-
|
|
9
|
-
def initialize(opts={})
|
|
10
|
-
@dir = opts[:dir] || './'
|
|
11
|
-
@root = opts[:root] || @dir
|
|
12
|
-
@errors = {}
|
|
13
|
-
@checked_urls = {}
|
|
14
|
-
@stats = {errors: 0, files: 0, urls: 0}
|
|
15
|
-
end
|
|
16
|
-
|
|
17
|
-
def check
|
|
18
|
-
Dir.glob(File.join(@dir, '**/*.{html,htm}')) do |f|
|
|
19
|
-
@stats[:files] += 1
|
|
20
|
-
puts "Checking #{f}"
|
|
21
|
-
collect_uris_from(f).each do |u|
|
|
22
|
-
@stats[:urls] += 1
|
|
23
|
-
check_uri(u, f)
|
|
24
|
-
end
|
|
25
|
-
end
|
|
26
|
-
print_results
|
|
27
|
-
end
|
|
28
|
-
|
|
29
|
-
def collect_uris_from(f)
|
|
30
|
-
ret = []
|
|
31
|
-
doc = Nokogiri::HTML(File.read(f))
|
|
32
|
-
doc.traverse do |el|
|
|
33
|
-
ret << el['src'] unless el['src'].nil?
|
|
34
|
-
ret << el['href'] unless el['href'].nil?
|
|
35
|
-
end
|
|
36
|
-
ret
|
|
37
|
-
end
|
|
38
|
-
|
|
39
|
-
def check_uri(url, file)
|
|
40
|
-
return if @checked_urls[url]
|
|
41
|
-
|
|
42
|
-
if url =~ /^(https?:)?\/\//
|
|
43
|
-
check_remote_uri url, file
|
|
44
|
-
else
|
|
45
|
-
check_local_uri url, file
|
|
46
|
-
end
|
|
47
|
-
@checked_urls[url] = true
|
|
48
|
-
end
|
|
49
|
-
|
|
50
|
-
def check_remote_uri(url, file)
|
|
51
|
-
begin
|
|
52
|
-
uri = URI(url.gsub(/^\/\//, 'http://'))
|
|
53
|
-
rescue URI::InvalidURIError => e
|
|
54
|
-
return add_error(file, uri.to_s, e.message)
|
|
55
|
-
end
|
|
56
|
-
begin
|
|
57
|
-
response = Net::HTTP.start(uri.host, uri.port,
|
|
58
|
-
:use_ssl => uri.scheme == 'https') do |http|
|
|
59
|
-
p = uri.path.empty? ? "/" : uri.path
|
|
60
|
-
http.head(p)
|
|
61
|
-
end
|
|
62
|
-
if response.code =~ /^[^23]/
|
|
63
|
-
add_error(file, uri.to_s, response)
|
|
64
|
-
end
|
|
65
|
-
rescue Timeout::Error
|
|
66
|
-
add_error(file, uri.to_s, "Reading buffer timed out")
|
|
67
|
-
rescue Errno::ETIMEDOUT
|
|
68
|
-
add_error(file, uri.to_s, "Connection timed out")
|
|
69
|
-
rescue SocketError => e
|
|
70
|
-
add_error(file, uri.to_s, e.message)
|
|
71
|
-
end
|
|
72
|
-
end
|
|
73
|
-
|
|
74
|
-
def check_local_uri(url, file)
|
|
75
|
-
url.gsub!(/\?.*$/, '')
|
|
76
|
-
if url =~ /^\//
|
|
77
|
-
path = File.expand_path(".#{url}", @root)
|
|
78
|
-
else
|
|
79
|
-
path = File.expand_path(url, File.dirname(file))
|
|
80
|
-
end
|
|
81
|
-
add_error(file, url, "File not found") unless File.exists? path
|
|
82
|
-
end
|
|
83
|
-
|
|
84
|
-
def add_error(file, url, response)
|
|
85
|
-
@stats[:errors] += 1
|
|
86
|
-
@errors[file] = {} if @errors[file].nil?
|
|
87
|
-
@errors[file][url] = response
|
|
88
|
-
end
|
|
89
|
-
|
|
90
|
-
def print_results
|
|
91
|
-
@errors.each do |file, info|
|
|
92
|
-
puts file
|
|
93
|
-
info.each do |url, response|
|
|
94
|
-
s = response.respond_to?(:code) ? "HTTP #{response.code}" : response
|
|
95
|
-
puts "\t#{url}\n\t\t#{s}"
|
|
96
|
-
end
|
|
97
|
-
end
|
|
98
|
-
if @stats[:errors] == 0
|
|
99
|
-
puts "#{@stats[:files]} files with #{@stats[:urls]} urls checked."
|
|
100
|
-
else
|
|
101
|
-
puts "#{@stats[:files]} files with #{@stats[:urls]} urls checked and #{@stats[:errors]} errors found."
|
|
102
|
-
end
|
|
103
|
-
end
|
|
104
|
-
|
|
105
|
-
end
|
|
1
|
+
require 'utterson/base'
|
|
2
|
+
require 'utterson/version'
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
require 'utterson/html_check'
|
|
2
|
+
|
|
3
|
+
module Utterson
|
|
4
|
+
class Base
|
|
5
|
+
attr_reader :errors
|
|
6
|
+
|
|
7
|
+
def initialize(opts={})
|
|
8
|
+
@dir = opts[:dir] || './'
|
|
9
|
+
@root = opts[:root] || @dir
|
|
10
|
+
@errors = {}
|
|
11
|
+
@checked_urls = {}
|
|
12
|
+
@stats = {errors: 0, files: 0, urls: 0}
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def check
|
|
16
|
+
threads = []
|
|
17
|
+
Dir.glob(File.join(@dir, '**/*.{html,htm}')) do |f|
|
|
18
|
+
@stats[:files] += 1
|
|
19
|
+
puts "Checking file #{f}"
|
|
20
|
+
c = HtmlCheck.new(file: f, root: @root)
|
|
21
|
+
c.when_done do |r|
|
|
22
|
+
@stats[:urls] = r[:urls]
|
|
23
|
+
@errors.merge! r[:errors]
|
|
24
|
+
puts "Check done with #{f}"
|
|
25
|
+
end
|
|
26
|
+
threads << c.run
|
|
27
|
+
end
|
|
28
|
+
threads.each {|t| t.join}
|
|
29
|
+
print_results
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
def print_results
|
|
33
|
+
count = 0
|
|
34
|
+
@errors.each do |file, info|
|
|
35
|
+
puts file
|
|
36
|
+
info.each do |url, response|
|
|
37
|
+
s = response.respond_to?(:code) ? "HTTP #{response.code}" : response
|
|
38
|
+
puts "\t#{url}\n\t\t#{s}"
|
|
39
|
+
count += 1
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
if count == 0
|
|
43
|
+
puts "#{@stats[:files]} files with #{@stats[:urls]} urls checked."
|
|
44
|
+
else
|
|
45
|
+
puts "#{@stats[:files]} files with #{@stats[:urls]} urls checked and #{count} errors found."
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
end
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
require 'nokogiri'
|
|
2
|
+
|
|
3
|
+
require 'net/http'
|
|
4
|
+
require 'timeout'
|
|
5
|
+
require 'thread'
|
|
6
|
+
|
|
7
|
+
module Utterson
|
|
8
|
+
class HtmlCheck
|
|
9
|
+
attr_reader :errors
|
|
10
|
+
|
|
11
|
+
@@semaphore = Mutex.new
|
|
12
|
+
@@checked_urls = {}
|
|
13
|
+
|
|
14
|
+
def initialize(opts={})
|
|
15
|
+
@file = opts[:file]
|
|
16
|
+
@root = opts[:root]
|
|
17
|
+
@errors = {}
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def when_done(&handler)
|
|
21
|
+
@result_handler = handler
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def run
|
|
25
|
+
Thread.new do
|
|
26
|
+
collect_uris_from(@file).each do |u|
|
|
27
|
+
check_uri(u, @file)
|
|
28
|
+
end
|
|
29
|
+
unless @result_handler.nil?
|
|
30
|
+
@@semaphore.synchronize do
|
|
31
|
+
@result_handler.call(errors: @errors, urls: @@checked_urls.count)
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
def collect_uris_from(f)
|
|
38
|
+
ret = []
|
|
39
|
+
doc = Nokogiri::HTML(File.read(f))
|
|
40
|
+
doc.traverse do |el|
|
|
41
|
+
ret << el['src'] unless el['src'].nil?
|
|
42
|
+
ret << el['href'] unless el['href'].nil?
|
|
43
|
+
end
|
|
44
|
+
ret
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
def check_uri(url, file)
|
|
48
|
+
@@semaphore.synchronize do
|
|
49
|
+
if @@checked_urls[url]
|
|
50
|
+
return
|
|
51
|
+
else
|
|
52
|
+
@@checked_urls[url] = true
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
if url =~ /^(https?:)?\/\//
|
|
57
|
+
check_remote_uri url, file
|
|
58
|
+
else
|
|
59
|
+
check_local_uri url, file
|
|
60
|
+
end
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
def check_remote_uri(url, file)
|
|
64
|
+
begin
|
|
65
|
+
uri = URI(url.gsub(/^\/\//, 'http://'))
|
|
66
|
+
rescue URI::InvalidURIError => e
|
|
67
|
+
return add_error(file, uri.to_s, e.message)
|
|
68
|
+
end
|
|
69
|
+
begin
|
|
70
|
+
response = Net::HTTP.start(uri.host, uri.port,
|
|
71
|
+
:use_ssl => uri.scheme == 'https') do |http|
|
|
72
|
+
p = uri.path.empty? ? "/" : uri.path
|
|
73
|
+
http.head(p)
|
|
74
|
+
end
|
|
75
|
+
if response.code =~ /^[^23]/
|
|
76
|
+
add_error(file, uri.to_s, response)
|
|
77
|
+
end
|
|
78
|
+
rescue => e
|
|
79
|
+
add_error(file, uri.to_s, e.message)
|
|
80
|
+
end
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
def check_local_uri(url, file)
|
|
84
|
+
url.gsub!(/\?.*$/, '')
|
|
85
|
+
if url =~ /^\//
|
|
86
|
+
path = File.expand_path(".#{url}", @root)
|
|
87
|
+
else
|
|
88
|
+
path = File.expand_path(url, File.dirname(file))
|
|
89
|
+
end
|
|
90
|
+
add_error(file, url, "File not found") unless File.exists? path
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
def add_error(file, url, response)
|
|
94
|
+
@errors[file] = {} if @errors[file].nil?
|
|
95
|
+
@errors[file][url] = response
|
|
96
|
+
end
|
|
97
|
+
end
|
|
98
|
+
end
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
require 'spec_helper'
|
|
2
|
+
|
|
3
|
+
module Utterson
|
|
4
|
+
describe Base do
|
|
5
|
+
it "should go through all htm and html files in target dir" do
|
|
6
|
+
dir = "spec/fixtures/dir-structure"
|
|
7
|
+
u = Base.new(dir: dir)
|
|
8
|
+
HtmlCheck.stub(:new) {double(when_done: {}, run: double(join: {}))}
|
|
9
|
+
|
|
10
|
+
["spec/fixtures/dir-structure/1.htm",
|
|
11
|
+
"spec/fixtures/dir-structure/2.html",
|
|
12
|
+
"spec/fixtures/dir-structure/a/3.htm",
|
|
13
|
+
"spec/fixtures/dir-structure/a/b/4.html"].each do |file|
|
|
14
|
+
HtmlCheck.should_receive(:new).with(file: file, root: dir)
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
u.check
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
describe "#print_results" do
|
|
21
|
+
it "should output only basic stats if no errors" do
|
|
22
|
+
u = Base.new(dir: "spec/fixtures/dir-structure")
|
|
23
|
+
output = capture_stdout do
|
|
24
|
+
u.check
|
|
25
|
+
end
|
|
26
|
+
output.should match(/4 files with 0 urls checked/)
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
it "should output error information" do
|
|
30
|
+
stub_request(:head, "http://example.com/").
|
|
31
|
+
with(:headers => {'Accept'=>'*/*', 'User-Agent'=>'Ruby'}).
|
|
32
|
+
to_return(:status => 404, :body => "", :headers => {})
|
|
33
|
+
u = Base.new(dir: "spec/fixtures")
|
|
34
|
+
output = capture_stdout do
|
|
35
|
+
u.check
|
|
36
|
+
end
|
|
37
|
+
output.should match("spec/fixtures/sample.html\n\tstyle.css\n\t\tFile not found")
|
|
38
|
+
output.should match("script.js\n\t\tFile not found")
|
|
39
|
+
output.should match("image.jpg\n\t\tFile not found")
|
|
40
|
+
output.should match("http://example.com\n\t\tHTTP 404")
|
|
41
|
+
output.should match("5 files with 4 urls checked and 4 errors found")
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
end
|
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
require 'spec_helper'
|
|
2
|
+
|
|
3
|
+
module Utterson
|
|
4
|
+
describe HtmlCheck do
|
|
5
|
+
before(:each) do
|
|
6
|
+
HtmlCheck.class_variable_set(:@@checked_urls, {})
|
|
7
|
+
end
|
|
8
|
+
|
|
9
|
+
let(:sample_file) {"spec/fixtures/sample.html"}
|
|
10
|
+
|
|
11
|
+
it "should check all urls which are found" do
|
|
12
|
+
h = HtmlCheck.new(dir: "spec/fixtures", file: "spec/fixtures/sample.html")
|
|
13
|
+
h.stub(:check_uri) {}
|
|
14
|
+
h.should_receive(:check_uri).exactly(4).times
|
|
15
|
+
h.run.join
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
it "should find all uris from sample document" do
|
|
19
|
+
h = HtmlCheck.new(file: sample_file)
|
|
20
|
+
uris = h.collect_uris_from(sample_file)
|
|
21
|
+
uris.should include("script.js")
|
|
22
|
+
uris.should include("style.css")
|
|
23
|
+
uris.should include("http://example.com")
|
|
24
|
+
uris.should include("image.jpg")
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
describe "#check_uri" do
|
|
28
|
+
let(:h) {HtmlCheck.new}
|
|
29
|
+
let(:html_file) {"file.html"}
|
|
30
|
+
|
|
31
|
+
it "should check same url only once" do
|
|
32
|
+
url = "http://example.com"
|
|
33
|
+
h.stub(:check_remote_uri) {}
|
|
34
|
+
h.should_receive(:check_remote_uri).once.with(url, html_file)
|
|
35
|
+
h.check_uri(url, html_file)
|
|
36
|
+
h.check_uri(url, html_file)
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
it "should use remote checking for http protocol" do
|
|
40
|
+
url = "http://example.com"
|
|
41
|
+
h.stub(:check_remote_uri) {}
|
|
42
|
+
h.should_receive(:check_remote_uri).with(url, html_file)
|
|
43
|
+
h.check_uri(url, html_file)
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
it "should use remote checking for https protocol" do
|
|
47
|
+
url = "https://example.com"
|
|
48
|
+
h.stub(:check_remote_uri) {}
|
|
49
|
+
h.should_receive(:check_remote_uri).with(url, html_file)
|
|
50
|
+
h.check_uri(url, html_file)
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
it "should use remote checking when only // is specified" do
|
|
54
|
+
url = "//example.com"
|
|
55
|
+
h.stub(:check_remote_uri) {}
|
|
56
|
+
h.should_receive(:check_remote_uri).with(url, html_file)
|
|
57
|
+
h.check_uri(url, html_file)
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
it "should use local checking for relative uris" do
|
|
61
|
+
url = "../file.html"
|
|
62
|
+
h.stub(:check_local_uri) {}
|
|
63
|
+
h.should_receive(:check_local_uri).with(url, html_file)
|
|
64
|
+
h.check_uri(url, html_file)
|
|
65
|
+
end
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
describe "#check_local_uri" do
|
|
69
|
+
let(:h) {HtmlCheck.new(root: "spec/fixtures/dir-structure")}
|
|
70
|
+
let(:html_file) {"spec/fixtures/dir-structure/1.htm"}
|
|
71
|
+
|
|
72
|
+
it "should not assign error info if file exists" do
|
|
73
|
+
h.check_local_uri("../sample.html", html_file)
|
|
74
|
+
h.errors.should be_empty
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
it "should assign error info if file doesn't exist" do
|
|
78
|
+
url = "../sample_not_found.html"
|
|
79
|
+
h.check_local_uri(url, html_file)
|
|
80
|
+
h.errors[html_file].should == {url => "File not found"}
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
it "should use root directory when urls start with /" do
|
|
84
|
+
h2 = HtmlCheck.new(file: html_file,
|
|
85
|
+
root: "spec/fixtures")
|
|
86
|
+
h2.check_local_uri("/sample.html", html_file)
|
|
87
|
+
h2.errors.should be_empty
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
it "should handle target directory as root for urls starting with / if root is no available" do
|
|
91
|
+
h.check_local_uri("/2.html", html_file)
|
|
92
|
+
h.errors.should be_empty
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
it "should ignore query string when checking local files" do
|
|
96
|
+
h.check_local_uri("2.html?queryparam=value", html_file)
|
|
97
|
+
h.errors.should be_empty
|
|
98
|
+
end
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
describe "#check_remote_uri" do
|
|
102
|
+
let(:h) {HtmlCheck.new(file: "test.html")}
|
|
103
|
+
let(:html_file) {"test.html"}
|
|
104
|
+
let(:url) {"http://example.com/index.html"}
|
|
105
|
+
|
|
106
|
+
it "should not assign error info if request is successfull" do
|
|
107
|
+
stub_request(:head, url).
|
|
108
|
+
with(:headers => {'Accept'=>'*/*', 'User-Agent'=>'Ruby'}).
|
|
109
|
+
to_return(:status => 200, :body => "", :headers => {})
|
|
110
|
+
h.check_remote_uri(url, html_file)
|
|
111
|
+
h.errors.should be_empty
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
it "should assign error info if there is error response" do
|
|
115
|
+
stub_request(:head, url).
|
|
116
|
+
with(:headers => {'Accept'=>'*/*', 'User-Agent'=>'Ruby'}).
|
|
117
|
+
to_return(:status => 404, :body => "", :headers => {})
|
|
118
|
+
h.check_remote_uri(url, html_file)
|
|
119
|
+
puts h.errors.inspect
|
|
120
|
+
h.errors[html_file].should_not be_empty
|
|
121
|
+
h.errors[html_file][url].instance_of?(Net::HTTPNotFound).should be_true
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
it "should add error status from buffer timeouts" do
|
|
125
|
+
stub_request(:head, url).to_timeout
|
|
126
|
+
h.check_remote_uri(url, html_file)
|
|
127
|
+
h.errors.should_not be_empty
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
it "should add error status from connection timeouts" do
|
|
131
|
+
stub_request(:head, url).to_raise(Errno::ETIMEDOUT)
|
|
132
|
+
h.check_remote_uri(url, html_file)
|
|
133
|
+
h.errors.should_not be_empty
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
it "should add error status from 'No route to host' errors" do
|
|
137
|
+
stub_request(:head, url).to_raise(Errno::EHOSTUNREACH)
|
|
138
|
+
h.check_remote_uri(url, html_file)
|
|
139
|
+
h.errors.should_not be_empty
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
it "shoud add error status from name resolution errors" do
|
|
143
|
+
stub_request(:head, url).
|
|
144
|
+
to_raise(SocketError.new('getaddrinfo: Name or service not known'))
|
|
145
|
+
h.check_remote_uri(url, html_file)
|
|
146
|
+
h.errors.should_not be_empty
|
|
147
|
+
end
|
|
148
|
+
|
|
149
|
+
it "shoud add error status when invalid URI" do
|
|
150
|
+
URI.stub(:new).and_raise(URI::InvalidURIError)
|
|
151
|
+
h.check_remote_uri("http://invalid_uri", html_file)
|
|
152
|
+
h.errors.should_not be_empty
|
|
153
|
+
end
|
|
154
|
+
end
|
|
155
|
+
end
|
|
156
|
+
end
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: utterson
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.2.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Juhamatti Niemelä
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2013-11-
|
|
11
|
+
date: 2013-11-07 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: trollop
|
|
@@ -103,15 +103,19 @@ extensions: []
|
|
|
103
103
|
extra_rdoc_files: []
|
|
104
104
|
files:
|
|
105
105
|
- bin/utterson
|
|
106
|
+
- lib/utterson/html_check.rb
|
|
107
|
+
- lib/utterson/version.rb
|
|
108
|
+
- lib/utterson/base.rb
|
|
106
109
|
- lib/utterson.rb
|
|
107
110
|
- README.md
|
|
111
|
+
- spec/utterson/base_spec.rb
|
|
112
|
+
- spec/utterson/html_check_spec.rb
|
|
108
113
|
- spec/spec_helper.rb
|
|
109
114
|
- spec/fixtures/dir-structure/a/3.htm
|
|
110
115
|
- spec/fixtures/dir-structure/a/b/4.html
|
|
111
116
|
- spec/fixtures/dir-structure/2.html
|
|
112
117
|
- spec/fixtures/dir-structure/1.htm
|
|
113
118
|
- spec/fixtures/sample.html
|
|
114
|
-
- spec/utterson_spec.rb
|
|
115
119
|
homepage: https://github.com/iiska/utterson
|
|
116
120
|
licenses:
|
|
117
121
|
- MIT
|
|
@@ -137,11 +141,12 @@ signing_key:
|
|
|
137
141
|
specification_version: 4
|
|
138
142
|
summary: Friendly HTML crawler and url checker
|
|
139
143
|
test_files:
|
|
144
|
+
- spec/utterson/base_spec.rb
|
|
145
|
+
- spec/utterson/html_check_spec.rb
|
|
140
146
|
- spec/spec_helper.rb
|
|
141
147
|
- spec/fixtures/dir-structure/a/3.htm
|
|
142
148
|
- spec/fixtures/dir-structure/a/b/4.html
|
|
143
149
|
- spec/fixtures/dir-structure/2.html
|
|
144
150
|
- spec/fixtures/dir-structure/1.htm
|
|
145
151
|
- spec/fixtures/sample.html
|
|
146
|
-
- spec/utterson_spec.rb
|
|
147
152
|
has_rdoc:
|
data/spec/utterson_spec.rb
DELETED
|
@@ -1,174 +0,0 @@
|
|
|
1
|
-
require 'spec_helper'
|
|
2
|
-
|
|
3
|
-
describe Utterson do
|
|
4
|
-
it "should go through all htm and html files in target dir" do
|
|
5
|
-
u = Utterson.new(dir: "spec/fixtures/dir-structure")
|
|
6
|
-
u.stub(:collect_uris_from) {[]}
|
|
7
|
-
|
|
8
|
-
["spec/fixtures/dir-structure/1.htm",
|
|
9
|
-
"spec/fixtures/dir-structure/2.html",
|
|
10
|
-
"spec/fixtures/dir-structure/a/3.htm",
|
|
11
|
-
"spec/fixtures/dir-structure/a/b/4.html"].each do |file|
|
|
12
|
-
u.should_receive(:collect_uris_from).with(file)
|
|
13
|
-
end
|
|
14
|
-
|
|
15
|
-
u.check
|
|
16
|
-
end
|
|
17
|
-
|
|
18
|
-
it "should check all urls which are found" do
|
|
19
|
-
u = Utterson.new(dir: "spec/fixtures")
|
|
20
|
-
u.stub(:check_uri) {}
|
|
21
|
-
u.should_receive(:check_uri).exactly(4).times
|
|
22
|
-
|
|
23
|
-
u.check
|
|
24
|
-
end
|
|
25
|
-
|
|
26
|
-
it "should find all uris from sample document" do
|
|
27
|
-
u = Utterson.new
|
|
28
|
-
uris = u.collect_uris_from("spec/fixtures/sample.html")
|
|
29
|
-
uris.should include("script.js")
|
|
30
|
-
uris.should include("style.css")
|
|
31
|
-
uris.should include("http://example.com")
|
|
32
|
-
uris.should include("image.jpg")
|
|
33
|
-
end
|
|
34
|
-
|
|
35
|
-
describe "#check_uri" do
|
|
36
|
-
let(:u) {Utterson.new}
|
|
37
|
-
let(:html_file) {"file.html"}
|
|
38
|
-
|
|
39
|
-
it "should use remote checking for http protocol" do
|
|
40
|
-
url = "http://example.com"
|
|
41
|
-
u.stub(:check_remote_uri) {}
|
|
42
|
-
u.should_receive(:check_remote_uri).with(url, html_file)
|
|
43
|
-
u.check_uri(url, html_file)
|
|
44
|
-
end
|
|
45
|
-
|
|
46
|
-
it "should use remote checking for https protocol" do
|
|
47
|
-
url = "https://example.com"
|
|
48
|
-
u.stub(:check_remote_uri) {}
|
|
49
|
-
u.should_receive(:check_remote_uri).with(url, html_file)
|
|
50
|
-
u.check_uri(url, html_file)
|
|
51
|
-
end
|
|
52
|
-
|
|
53
|
-
it "should use remote checking when only // is specified" do
|
|
54
|
-
url = "//example.com"
|
|
55
|
-
u.stub(:check_remote_uri) {}
|
|
56
|
-
u.should_receive(:check_remote_uri).with(url, html_file)
|
|
57
|
-
u.check_uri(url, html_file)
|
|
58
|
-
end
|
|
59
|
-
|
|
60
|
-
it "should use local checking for relative uris" do
|
|
61
|
-
url = "../file.html"
|
|
62
|
-
u.stub(:check_local_uri) {}
|
|
63
|
-
u.should_receive(:check_local_uri).with(url, html_file)
|
|
64
|
-
u.check_uri(url, html_file)
|
|
65
|
-
end
|
|
66
|
-
end
|
|
67
|
-
|
|
68
|
-
describe "#check_local_uri" do
|
|
69
|
-
let(:u) {Utterson.new(dir: "spec/fixtures/dir-structure")}
|
|
70
|
-
let(:html_file) {"spec/fixtures/dir-structure/1.htm"}
|
|
71
|
-
|
|
72
|
-
it "should not assign error info if file exists" do
|
|
73
|
-
u.check_local_uri("../sample.html", html_file)
|
|
74
|
-
u.errors.should be_empty
|
|
75
|
-
end
|
|
76
|
-
|
|
77
|
-
it "should assign error info if file doesn't exist" do
|
|
78
|
-
url = "../sample_not_found.html"
|
|
79
|
-
u.check_local_uri(url, html_file)
|
|
80
|
-
u.errors[html_file].should == {url => "File not found"}
|
|
81
|
-
end
|
|
82
|
-
|
|
83
|
-
it "should use root directory when urls start with /" do
|
|
84
|
-
u2 = Utterson.new(dir: "spec/fixtures/dir-structure",
|
|
85
|
-
root: "spec/fixtures")
|
|
86
|
-
u2.check_local_uri("/sample.html", html_file)
|
|
87
|
-
u2.errors.should be_empty
|
|
88
|
-
end
|
|
89
|
-
|
|
90
|
-
it "should handle target directory as root for urls starting with / if root is no available" do
|
|
91
|
-
u.check_local_uri("/2.html", html_file)
|
|
92
|
-
u.errors.should be_empty
|
|
93
|
-
end
|
|
94
|
-
|
|
95
|
-
it "should ignore query string when checking local files" do
|
|
96
|
-
u.check_local_uri("2.html?queryparam=value", html_file)
|
|
97
|
-
u.errors.should be_empty
|
|
98
|
-
end
|
|
99
|
-
end
|
|
100
|
-
|
|
101
|
-
describe "#check_remote_uri" do
|
|
102
|
-
let(:u) {Utterson.new}
|
|
103
|
-
let(:html_file) {"test.html"}
|
|
104
|
-
let(:url) {"http://example.com/index.html"}
|
|
105
|
-
|
|
106
|
-
it "should not assign error info if request is successfull" do
|
|
107
|
-
stub_request(:head, url).
|
|
108
|
-
with(:headers => {'Accept'=>'*/*', 'User-Agent'=>'Ruby'}).
|
|
109
|
-
to_return(:status => 200, :body => "", :headers => {})
|
|
110
|
-
u.check_remote_uri(url, html_file)
|
|
111
|
-
u.errors.should be_empty
|
|
112
|
-
end
|
|
113
|
-
|
|
114
|
-
it "should assign error info if there is error response" do
|
|
115
|
-
stub_request(:head, url).
|
|
116
|
-
with(:headers => {'Accept'=>'*/*', 'User-Agent'=>'Ruby'}).
|
|
117
|
-
to_return(:status => 404, :body => "", :headers => {})
|
|
118
|
-
u.check_remote_uri(url, html_file)
|
|
119
|
-
puts u.errors.inspect
|
|
120
|
-
u.errors[html_file].should_not be_empty
|
|
121
|
-
u.errors[html_file][url].instance_of?(Net::HTTPNotFound).should be_true
|
|
122
|
-
end
|
|
123
|
-
|
|
124
|
-
it "should add error status from buffer timeouts" do
|
|
125
|
-
stub_request(:head, url).to_timeout
|
|
126
|
-
u.check_remote_uri(url, html_file)
|
|
127
|
-
u.errors.should_not be_empty
|
|
128
|
-
end
|
|
129
|
-
|
|
130
|
-
it "should add error status from connection timeouts" do
|
|
131
|
-
stub_request(:head, url).to_raise(Errno::ETIMEDOUT)
|
|
132
|
-
u.check_remote_uri(url, html_file)
|
|
133
|
-
u.errors.should_not be_empty
|
|
134
|
-
end
|
|
135
|
-
|
|
136
|
-
it "shoud add error status from name resolution errors" do
|
|
137
|
-
stub_request(:head, url).
|
|
138
|
-
to_raise(SocketError.new('getaddrinfo: Name or service not known'))
|
|
139
|
-
u.check_remote_uri(url, html_file)
|
|
140
|
-
u.errors.should_not be_empty
|
|
141
|
-
end
|
|
142
|
-
|
|
143
|
-
it "shoud add error status when invalid URI" do
|
|
144
|
-
URI.stub(:new).and_raise(URI::InvalidURIError)
|
|
145
|
-
u.check_remote_uri("http://invalid_uri", html_file)
|
|
146
|
-
u.errors.should_not be_empty
|
|
147
|
-
end
|
|
148
|
-
end
|
|
149
|
-
|
|
150
|
-
describe "#print_results" do
|
|
151
|
-
it "should output only basic stats if no errors" do
|
|
152
|
-
u = Utterson.new(dir: "spec/fixtures/dir-structure")
|
|
153
|
-
output = capture_stdout do
|
|
154
|
-
u.check
|
|
155
|
-
end
|
|
156
|
-
output.should match(/4 files with 0 urls checked/)
|
|
157
|
-
end
|
|
158
|
-
|
|
159
|
-
it "should output error information" do
|
|
160
|
-
stub_request(:head, "http://example.com/").
|
|
161
|
-
with(:headers => {'Accept'=>'*/*', 'User-Agent'=>'Ruby'}).
|
|
162
|
-
to_return(:status => 404, :body => "", :headers => {})
|
|
163
|
-
u = Utterson.new(dir: "spec/fixtures")
|
|
164
|
-
output = capture_stdout do
|
|
165
|
-
u.check
|
|
166
|
-
end
|
|
167
|
-
output.should match("spec/fixtures/sample.html\n\tstyle.css\n\t\tFile not found")
|
|
168
|
-
output.should match("script.js\n\t\tFile not found")
|
|
169
|
-
output.should match("image.jpg\n\t\tFile not found")
|
|
170
|
-
output.should match("http://example.com\n\t\tHTTP 404")
|
|
171
|
-
output.should match("5 files with 4 urls checked and 4 errors found")
|
|
172
|
-
end
|
|
173
|
-
end
|
|
174
|
-
end
|