validate-website 0.8.1 → 0.9.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/validate_website/core.rb +53 -55
- data/lib/validate_website/option_parser.rb +0 -9
- data/man/man1/validate-website-static.1 +3 -3
- data/man/man1/validate-website.1 +4 -9
- data/spec/core_spec.rb +7 -6
- data/spec/spec_helper.rb +1 -1
- data/spec/validator_spec.rb +6 -6
- metadata +6 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7c6d263126db73e28c33fddb7ba58358f522d305
|
4
|
+
data.tar.gz: bc4d5405e1872d425dea1691456159d523b9c2d2
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3039cb7c6d5082a2d69915ddf2c1d102362049e5297899eb2aeba0b8b493edc32301aa3803a20411bbfb82bbb9fe05cbebc514083ae1aef9ae954769cd308521
|
7
|
+
data.tar.gz: 14682a11d7e68083aefecd1ab2ca978de5c3d257f0a8d6d5bcacf4427586f5814947f7ea0c3b52bd821d72c3e4dbc0803ae9c950b629576fa10bf05facec1758
|
@@ -7,7 +7,7 @@ require 'validate_website/option_parser'
|
|
7
7
|
require 'validate_website/validator'
|
8
8
|
require 'validate_website/colorful_messages'
|
9
9
|
|
10
|
-
require '
|
10
|
+
require 'spidr'
|
11
11
|
|
12
12
|
module ValidateWebsite
|
13
13
|
|
@@ -15,7 +15,7 @@ module ValidateWebsite
|
|
15
15
|
class Core
|
16
16
|
|
17
17
|
attr_accessor :site
|
18
|
-
attr_reader :options, :
|
18
|
+
attr_reader :options, :crawler
|
19
19
|
|
20
20
|
include ColorfulMessages
|
21
21
|
|
@@ -52,29 +52,35 @@ module ValidateWebsite
|
|
52
52
|
#
|
53
53
|
def crawl(opts={})
|
54
54
|
opts = @options.merge(opts)
|
55
|
+
opts.merge!(:ignore_links => Regexp.new(opts[:exclude])) if opts[:exclude]
|
56
|
+
|
55
57
|
puts color(:note, "validating #{@site}", opts[:color]) unless opts[:quiet]
|
56
58
|
puts color(:warning, "No internet connection") unless internet_connection?
|
57
59
|
|
58
|
-
@
|
59
|
-
|
60
|
+
@crawler = Spidr.site(@site, opts) do |crawler|
|
61
|
+
crawler.every_css_page do |page|
|
62
|
+
extract_urls_from_css(page).each do |u|
|
63
|
+
crawler.enqueue(u)
|
64
|
+
end
|
65
|
+
end
|
60
66
|
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
67
|
+
crawler.every_html_page do |page|
|
68
|
+
extract_imgs_from_page(page).each do |i|
|
69
|
+
crawler.enqueue(i)
|
70
|
+
end
|
65
71
|
|
66
|
-
|
67
|
-
|
68
|
-
if opts[:markup_validation] && page.html? && page.fetched?
|
69
|
-
validate(page.doc, page.body, url, opts)
|
72
|
+
if opts[:markup_validation] && page.html?
|
73
|
+
validate(page.doc, page.body, page.url, opts)
|
70
74
|
end
|
75
|
+
end
|
71
76
|
|
72
|
-
|
77
|
+
crawler.every_failed_url do |url|
|
78
|
+
if opts[:not_found]
|
73
79
|
@not_found_error = true
|
74
|
-
puts color(:error, "%s linked
|
80
|
+
puts color(:error, "%s linked but not exist" % [url], opts[:color])
|
75
81
|
to_file(url)
|
76
82
|
end
|
77
|
-
|
83
|
+
end
|
78
84
|
end
|
79
85
|
end
|
80
86
|
|
@@ -93,17 +99,14 @@ module ValidateWebsite
|
|
93
99
|
files.each do |f|
|
94
100
|
next unless File.file?(f)
|
95
101
|
|
96
|
-
|
97
|
-
|
98
|
-
:headers => {'content-type' => ['text/html', 'application/xhtml+xml']})
|
102
|
+
response = fake_http_response(open(f).read)
|
103
|
+
page = Spidr::Page.new(URI.parse(opts[:site] + URI.encode(f)), response)
|
99
104
|
|
100
105
|
if opts[:markup_validation]
|
101
106
|
validate(page.doc, page.body, f)
|
102
107
|
end
|
103
108
|
if opts[:not_found]
|
104
|
-
|
105
|
-
links.concat extract_urls_from_img_script_iframe_link(page)
|
106
|
-
check_static_not_found(links.uniq)
|
109
|
+
check_static_not_found(page.links)
|
107
110
|
end
|
108
111
|
end
|
109
112
|
end
|
@@ -128,13 +131,6 @@ module ValidateWebsite
|
|
128
131
|
end
|
129
132
|
end
|
130
133
|
|
131
|
-
def get_url(page, elem, attrname)
|
132
|
-
u = elem.attributes[attrname].to_s
|
133
|
-
return if u.nil? || u.empty?
|
134
|
-
abs = page.to_absolute(u) rescue nil
|
135
|
-
abs if abs && page.in_domain?(abs)
|
136
|
-
end
|
137
|
-
|
138
134
|
# check files linked on static document
|
139
135
|
# see lib/validate_website/runner.rb
|
140
136
|
def check_static_not_found(links, opts={})
|
@@ -143,8 +139,8 @@ module ValidateWebsite
|
|
143
139
|
file_location = URI.parse(File.join(Dir.getwd, l.path)).path
|
144
140
|
# Check CSS url()
|
145
141
|
if File.exists?(file_location) && File.extname(file_location) == '.css'
|
146
|
-
|
147
|
-
|
142
|
+
response = fake_http_response(open(file_location).read, ['text/css'])
|
143
|
+
css_page = Spidr::Page.new(l, response)
|
148
144
|
links.concat extract_urls_from_css(css_page)
|
149
145
|
links.uniq!
|
150
146
|
end
|
@@ -156,42 +152,29 @@ module ValidateWebsite
|
|
156
152
|
end
|
157
153
|
end
|
158
154
|
|
159
|
-
# Extract urls from img script iframe and link element
|
160
|
-
#
|
161
|
-
# @param [Anemone::Page] an Anemone::Page object
|
162
|
-
# @return [Array] Lists of urls
|
163
|
-
#
|
164
|
-
def extract_urls_from_img_script_iframe_link(page)
|
165
|
-
links = Set.new
|
166
|
-
page.doc.css('img, script, iframe, link').each do |elem|
|
167
|
-
if elem.name == 'link'
|
168
|
-
url = get_url(page, elem, "href")
|
169
|
-
else
|
170
|
-
url = get_url(page, elem, "src")
|
171
|
-
end
|
172
|
-
links << url unless url.nil? || url.to_s.empty?
|
173
|
-
end
|
174
|
-
links
|
175
|
-
end
|
176
|
-
|
177
155
|
# Extract urls from CSS page
|
178
156
|
#
|
179
|
-
# @param [
|
157
|
+
# @param [Spidr::Page] an Spidr::Page object
|
180
158
|
# @return [Array] Lists of urls
|
181
159
|
#
|
182
160
|
def extract_urls_from_css(page)
|
183
|
-
page.body.scan(/url\((['".\/\w-]+)\)/).inject([]) do |result, url|
|
161
|
+
page.body.scan(/url\((['".\/\w-]+)\)/).inject(Set[]) do |result, url|
|
184
162
|
url = url.first.gsub("'", "").gsub('"', '')
|
185
163
|
abs = page.to_absolute(URI.parse(url))
|
186
164
|
result << abs
|
187
165
|
end
|
188
166
|
end
|
189
167
|
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
168
|
+
# Extract imgs urls from page
|
169
|
+
#
|
170
|
+
# @param [Spidr::Page] an Spidr::Page object
|
171
|
+
# @return [Array] Lists of urls
|
172
|
+
#
|
173
|
+
def extract_imgs_from_page(page)
|
174
|
+
page.doc.search('//img[@src]').inject(Set[]) do |result, elem|
|
175
|
+
u = elem.attributes['src']
|
176
|
+
result << page.to_absolute(URI.parse(u))
|
177
|
+
end
|
195
178
|
end
|
196
179
|
|
197
180
|
##
|
@@ -220,5 +203,20 @@ module ValidateWebsite
|
|
220
203
|
end
|
221
204
|
end
|
222
205
|
|
206
|
+
# Fake http response for Spidr static crawling
|
207
|
+
# see https://github.com/ruby/ruby/blob/trunk/lib/net/http/response.rb
|
208
|
+
#
|
209
|
+
# @param [String] response body
|
210
|
+
# @param [Array] content types
|
211
|
+
# @return [Net::HTTPResponse] fake http response
|
212
|
+
def fake_http_response(body, content_types=['text/html', 'text/xhtml+xml'])
|
213
|
+
response = Net::HTTPResponse.new '1.1', 200, 'OK'
|
214
|
+
response.instance_variable_set(:@read, true)
|
215
|
+
response.body = body
|
216
|
+
content_types.each do |c|
|
217
|
+
response.add_field('content-type', c)
|
218
|
+
end
|
219
|
+
response
|
220
|
+
end
|
223
221
|
end
|
224
222
|
end
|
@@ -16,11 +16,6 @@ module ValidateWebsite
|
|
16
16
|
:color => true,
|
17
17
|
# internal verbose for ValidateWebsite
|
18
18
|
:validate_verbose => false,
|
19
|
-
# Anemone options see anemone/lib/anemone/core.rb
|
20
|
-
:verbose => false,
|
21
|
-
:cookies => nil,
|
22
|
-
:accept_cookies => true,
|
23
|
-
:redirect_limit => 0,
|
24
19
|
}
|
25
20
|
|
26
21
|
DEFAULT_OPTS_CRAWL = {
|
@@ -101,10 +96,6 @@ module ValidateWebsite
|
|
101
96
|
"Only report errors (Default: #{@@default_opts[:quiet]})") { |v|
|
102
97
|
options[:quiet] = v
|
103
98
|
}
|
104
|
-
o.on("-d", "--debug",
|
105
|
-
"Show anemone log (Default: #{@@default_opts[:verbose]})") { |v|
|
106
|
-
options[:verbose] = v
|
107
|
-
}
|
108
99
|
|
109
100
|
o.separator ""
|
110
101
|
o.on_tail("-h", "--help", "Show this help message.") { puts o; exit }
|
@@ -1,13 +1,13 @@
|
|
1
1
|
'\" t
|
2
2
|
.\" Title: validate-website-static
|
3
3
|
.\" Author: [see the "AUTHOR" section]
|
4
|
-
.\" Generator: DocBook XSL Stylesheets v1.
|
5
|
-
.\" Date:
|
4
|
+
.\" Generator: DocBook XSL Stylesheets v1.78.1 <http://docbook.sf.net/>
|
5
|
+
.\" Date: 09/20/2014
|
6
6
|
.\" Manual: \ \&
|
7
7
|
.\" Source: \ \&
|
8
8
|
.\" Language: English
|
9
9
|
.\"
|
10
|
-
.TH "VALIDATE\-WEBSITE\-S" "1" "
|
10
|
+
.TH "VALIDATE\-WEBSITE\-S" "1" "09/20/2014" "\ \&" "\ \&"
|
11
11
|
.\" -----------------------------------------------------------------
|
12
12
|
.\" * Define some portability stuff
|
13
13
|
.\" -----------------------------------------------------------------
|
data/man/man1/validate-website.1
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
'\" t
|
2
2
|
.\" Title: validate-website
|
3
3
|
.\" Author: [see the "AUTHOR" section]
|
4
|
-
.\" Generator: DocBook XSL Stylesheets v1.
|
5
|
-
.\" Date:
|
4
|
+
.\" Generator: DocBook XSL Stylesheets v1.78.1 <http://docbook.sf.net/>
|
5
|
+
.\" Date: 09/20/2014
|
6
6
|
.\" Manual: \ \&
|
7
7
|
.\" Source: \ \&
|
8
8
|
.\" Language: English
|
9
9
|
.\"
|
10
|
-
.TH "VALIDATE\-WEBSITE" "1" "
|
10
|
+
.TH "VALIDATE\-WEBSITE" "1" "09/20/2014" "\ \&" "\ \&"
|
11
11
|
.\" -----------------------------------------------------------------
|
12
12
|
.\" * Define some portability stuff
|
13
13
|
.\" -----------------------------------------------------------------
|
@@ -45,7 +45,7 @@ http://localhost:3000/)
|
|
45
45
|
.PP
|
46
46
|
\fB\-u\fR, \fB\-\-user\-agent\fR \fIUSERAGENT\fR
|
47
47
|
.RS 4
|
48
|
-
Change user agent (Default:
|
48
|
+
Change user agent (Default: Spidr\&.user_agent)
|
49
49
|
.RE
|
50
50
|
.PP
|
51
51
|
\fB\-e\fR, \fB\-\-exclude\fR \fIEXCLUDE\fR
|
@@ -95,11 +95,6 @@ Show detail of validator errors (Default: false)\&.
|
|
95
95
|
Only report errors (Default: false)\&.
|
96
96
|
.RE
|
97
97
|
.PP
|
98
|
-
\fB\-d\fR, \fB\-\-debug\fR
|
99
|
-
.RS 4
|
100
|
-
Show anemone log (Default: false)
|
101
|
-
.RE
|
102
|
-
.PP
|
103
98
|
\fB\-h\fR, \fB\-\-help\fR
|
104
99
|
.RS 4
|
105
100
|
Show help message and exit\&.
|
data/spec/core_spec.rb
CHANGED
@@ -5,7 +5,8 @@ describe ValidateWebsite::Core do
|
|
5
5
|
|
6
6
|
before do
|
7
7
|
WebMock.reset!
|
8
|
-
stub_request(:get, ValidateWebsite::Core::PING_URL).
|
8
|
+
stub_request(:get, ValidateWebsite::Core::PING_URL).to_return(:status => 200)
|
9
|
+
stub_request(:get, /#{SPEC_DOMAIN}/).to_return(:status => 200)
|
9
10
|
@validate_website = ValidateWebsite::Core.new(:color => false)
|
10
11
|
end
|
11
12
|
|
@@ -18,7 +19,7 @@ describe ValidateWebsite::Core do
|
|
18
19
|
:content_type => 'text/html')
|
19
20
|
@validate_website.site = page.url
|
20
21
|
@validate_website.crawl(:quiet => true)
|
21
|
-
@validate_website.
|
22
|
+
@validate_website.crawler.history.size.must_equal 5
|
22
23
|
end
|
23
24
|
|
24
25
|
it 'extract link' do
|
@@ -29,7 +30,7 @@ describe ValidateWebsite::Core do
|
|
29
30
|
:content_type => 'text/html')
|
30
31
|
@validate_website.site = page.url
|
31
32
|
@validate_website.crawl(:quiet => true)
|
32
|
-
@validate_website.
|
33
|
+
@validate_website.crawler.history.size.must_equal 98
|
33
34
|
end
|
34
35
|
end
|
35
36
|
|
@@ -43,7 +44,7 @@ describe ValidateWebsite::Core do
|
|
43
44
|
:content_type => 'text/css')
|
44
45
|
@validate_website.site = page.url
|
45
46
|
@validate_website.crawl(:quiet => true)
|
46
|
-
@validate_website.
|
47
|
+
@validate_website.crawler.history.size.must_equal 5
|
47
48
|
end
|
48
49
|
|
49
50
|
it "should extract url with single quote" do
|
@@ -52,7 +53,7 @@ describe ValidateWebsite::Core do
|
|
52
53
|
:content_type => 'text/css')
|
53
54
|
@validate_website.site = page.url
|
54
55
|
@validate_website.crawl(:quiet => true)
|
55
|
-
@validate_website.
|
56
|
+
@validate_website.crawler.history.size.must_equal 2
|
56
57
|
end
|
57
58
|
|
58
59
|
it "should extract url with double quote" do
|
@@ -61,7 +62,7 @@ describe ValidateWebsite::Core do
|
|
61
62
|
:content_type => 'text/css')
|
62
63
|
@validate_website.site = page.url
|
63
64
|
@validate_website.crawl(:quiet => true)
|
64
|
-
@validate_website.
|
65
|
+
@validate_website.crawler.history.size.must_equal 2
|
65
66
|
end
|
66
67
|
end
|
67
68
|
|
data/spec/spec_helper.rb
CHANGED
data/spec/validator_spec.rb
CHANGED
@@ -4,7 +4,7 @@ require File.expand_path('../spec_helper', __FILE__)
|
|
4
4
|
describe ValidateWebsite::Validator do
|
5
5
|
before do
|
6
6
|
WebMock.reset!
|
7
|
-
@http =
|
7
|
+
@http = Spidr::Agent.new
|
8
8
|
end
|
9
9
|
|
10
10
|
describe("xhtml1") do
|
@@ -15,7 +15,7 @@ describe ValidateWebsite::Validator do
|
|
15
15
|
page = FakePage.new(name,
|
16
16
|
:body => open(file).read,
|
17
17
|
:content_type => 'text/html')
|
18
|
-
@xhtml1_page = @http.
|
18
|
+
@xhtml1_page = @http.get_page(page.url)
|
19
19
|
validator = ValidateWebsite::Validator.new(@xhtml1_page.doc, @xhtml1_page.body)
|
20
20
|
validator.dtd.system_id.must_equal dtd_uri
|
21
21
|
validator.namespace.must_equal name
|
@@ -36,7 +36,7 @@ describe ValidateWebsite::Validator do
|
|
36
36
|
page = FakePage.new(name,
|
37
37
|
:body => open(file).read,
|
38
38
|
:content_type => 'text/html')
|
39
|
-
@html5_page = @http.
|
39
|
+
@html5_page = @http.get_page(page.url)
|
40
40
|
validator = ValidateWebsite::Validator.new(@html5_page.doc, @html5_page.body)
|
41
41
|
validator.valid?.must_equal true
|
42
42
|
end
|
@@ -46,7 +46,7 @@ describe ValidateWebsite::Validator do
|
|
46
46
|
page = FakePage.new(name,
|
47
47
|
:body => open(file).read,
|
48
48
|
:content_type => 'text/html')
|
49
|
-
@html5_page = @http.
|
49
|
+
@html5_page = @http.get_page(page.url)
|
50
50
|
validator = ValidateWebsite::Validator.new(@html5_page.doc, @html5_page.body)
|
51
51
|
validator.valid?.must_equal true
|
52
52
|
end
|
@@ -61,7 +61,7 @@ describe ValidateWebsite::Validator do
|
|
61
61
|
page = FakePage.new(name,
|
62
62
|
:body => open(file).read,
|
63
63
|
:content_type => 'text/html')
|
64
|
-
@html5_page = @http.
|
64
|
+
@html5_page = @http.get_page(page.url)
|
65
65
|
end
|
66
66
|
|
67
67
|
it 'should have an array of errors' do
|
@@ -85,7 +85,7 @@ describe ValidateWebsite::Validator do
|
|
85
85
|
page = FakePage.new(name,
|
86
86
|
:body => open(file).read,
|
87
87
|
:content_type => 'text/html')
|
88
|
-
@html4_strict_page = @http.
|
88
|
+
@html4_strict_page = @http.get_page(page.url)
|
89
89
|
validator = ValidateWebsite::Validator.new(@html4_strict_page.doc, @html4_strict_page.body)
|
90
90
|
validator.valid?.must_equal true
|
91
91
|
end
|
metadata
CHANGED
@@ -1,29 +1,29 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: validate-website
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.9.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Laurent Arnoud
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-09-
|
11
|
+
date: 2014-09-20 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
|
-
name:
|
14
|
+
name: spidr
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
17
|
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: '0.
|
19
|
+
version: '0.4'
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: '0.
|
26
|
+
version: '0.4'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: paint
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
@@ -250,7 +250,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
250
250
|
- !ruby/object:Gem::Version
|
251
251
|
version: '0'
|
252
252
|
requirements:
|
253
|
-
-
|
253
|
+
- spidr
|
254
254
|
- rainbow
|
255
255
|
- multipart_body
|
256
256
|
rubyforge_project:
|