validate-website 0.8.1 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/validate_website/core.rb +53 -55
- data/lib/validate_website/option_parser.rb +0 -9
- data/man/man1/validate-website-static.1 +3 -3
- data/man/man1/validate-website.1 +4 -9
- data/spec/core_spec.rb +7 -6
- data/spec/spec_helper.rb +1 -1
- data/spec/validator_spec.rb +6 -6
- metadata +6 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7c6d263126db73e28c33fddb7ba58358f522d305
|
4
|
+
data.tar.gz: bc4d5405e1872d425dea1691456159d523b9c2d2
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3039cb7c6d5082a2d69915ddf2c1d102362049e5297899eb2aeba0b8b493edc32301aa3803a20411bbfb82bbb9fe05cbebc514083ae1aef9ae954769cd308521
|
7
|
+
data.tar.gz: 14682a11d7e68083aefecd1ab2ca978de5c3d257f0a8d6d5bcacf4427586f5814947f7ea0c3b52bd821d72c3e4dbc0803ae9c950b629576fa10bf05facec1758
|
@@ -7,7 +7,7 @@ require 'validate_website/option_parser'
|
|
7
7
|
require 'validate_website/validator'
|
8
8
|
require 'validate_website/colorful_messages'
|
9
9
|
|
10
|
-
require '
|
10
|
+
require 'spidr'
|
11
11
|
|
12
12
|
module ValidateWebsite
|
13
13
|
|
@@ -15,7 +15,7 @@ module ValidateWebsite
|
|
15
15
|
class Core
|
16
16
|
|
17
17
|
attr_accessor :site
|
18
|
-
attr_reader :options, :
|
18
|
+
attr_reader :options, :crawler
|
19
19
|
|
20
20
|
include ColorfulMessages
|
21
21
|
|
@@ -52,29 +52,35 @@ module ValidateWebsite
|
|
52
52
|
#
|
53
53
|
def crawl(opts={})
|
54
54
|
opts = @options.merge(opts)
|
55
|
+
opts.merge!(:ignore_links => Regexp.new(opts[:exclude])) if opts[:exclude]
|
56
|
+
|
55
57
|
puts color(:note, "validating #{@site}", opts[:color]) unless opts[:quiet]
|
56
58
|
puts color(:warning, "No internet connection") unless internet_connection?
|
57
59
|
|
58
|
-
@
|
59
|
-
|
60
|
+
@crawler = Spidr.site(@site, opts) do |crawler|
|
61
|
+
crawler.every_css_page do |page|
|
62
|
+
extract_urls_from_css(page).each do |u|
|
63
|
+
crawler.enqueue(u)
|
64
|
+
end
|
65
|
+
end
|
60
66
|
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
67
|
+
crawler.every_html_page do |page|
|
68
|
+
extract_imgs_from_page(page).each do |i|
|
69
|
+
crawler.enqueue(i)
|
70
|
+
end
|
65
71
|
|
66
|
-
|
67
|
-
|
68
|
-
if opts[:markup_validation] && page.html? && page.fetched?
|
69
|
-
validate(page.doc, page.body, url, opts)
|
72
|
+
if opts[:markup_validation] && page.html?
|
73
|
+
validate(page.doc, page.body, page.url, opts)
|
70
74
|
end
|
75
|
+
end
|
71
76
|
|
72
|
-
|
77
|
+
crawler.every_failed_url do |url|
|
78
|
+
if opts[:not_found]
|
73
79
|
@not_found_error = true
|
74
|
-
puts color(:error, "%s linked
|
80
|
+
puts color(:error, "%s linked but not exist" % [url], opts[:color])
|
75
81
|
to_file(url)
|
76
82
|
end
|
77
|
-
|
83
|
+
end
|
78
84
|
end
|
79
85
|
end
|
80
86
|
|
@@ -93,17 +99,14 @@ module ValidateWebsite
|
|
93
99
|
files.each do |f|
|
94
100
|
next unless File.file?(f)
|
95
101
|
|
96
|
-
|
97
|
-
|
98
|
-
:headers => {'content-type' => ['text/html', 'application/xhtml+xml']})
|
102
|
+
response = fake_http_response(open(f).read)
|
103
|
+
page = Spidr::Page.new(URI.parse(opts[:site] + URI.encode(f)), response)
|
99
104
|
|
100
105
|
if opts[:markup_validation]
|
101
106
|
validate(page.doc, page.body, f)
|
102
107
|
end
|
103
108
|
if opts[:not_found]
|
104
|
-
|
105
|
-
links.concat extract_urls_from_img_script_iframe_link(page)
|
106
|
-
check_static_not_found(links.uniq)
|
109
|
+
check_static_not_found(page.links)
|
107
110
|
end
|
108
111
|
end
|
109
112
|
end
|
@@ -128,13 +131,6 @@ module ValidateWebsite
|
|
128
131
|
end
|
129
132
|
end
|
130
133
|
|
131
|
-
def get_url(page, elem, attrname)
|
132
|
-
u = elem.attributes[attrname].to_s
|
133
|
-
return if u.nil? || u.empty?
|
134
|
-
abs = page.to_absolute(u) rescue nil
|
135
|
-
abs if abs && page.in_domain?(abs)
|
136
|
-
end
|
137
|
-
|
138
134
|
# check files linked on static document
|
139
135
|
# see lib/validate_website/runner.rb
|
140
136
|
def check_static_not_found(links, opts={})
|
@@ -143,8 +139,8 @@ module ValidateWebsite
|
|
143
139
|
file_location = URI.parse(File.join(Dir.getwd, l.path)).path
|
144
140
|
# Check CSS url()
|
145
141
|
if File.exists?(file_location) && File.extname(file_location) == '.css'
|
146
|
-
|
147
|
-
|
142
|
+
response = fake_http_response(open(file_location).read, ['text/css'])
|
143
|
+
css_page = Spidr::Page.new(l, response)
|
148
144
|
links.concat extract_urls_from_css(css_page)
|
149
145
|
links.uniq!
|
150
146
|
end
|
@@ -156,42 +152,29 @@ module ValidateWebsite
|
|
156
152
|
end
|
157
153
|
end
|
158
154
|
|
159
|
-
# Extract urls from img script iframe and link element
|
160
|
-
#
|
161
|
-
# @param [Anemone::Page] an Anemone::Page object
|
162
|
-
# @return [Array] Lists of urls
|
163
|
-
#
|
164
|
-
def extract_urls_from_img_script_iframe_link(page)
|
165
|
-
links = Set.new
|
166
|
-
page.doc.css('img, script, iframe, link').each do |elem|
|
167
|
-
if elem.name == 'link'
|
168
|
-
url = get_url(page, elem, "href")
|
169
|
-
else
|
170
|
-
url = get_url(page, elem, "src")
|
171
|
-
end
|
172
|
-
links << url unless url.nil? || url.to_s.empty?
|
173
|
-
end
|
174
|
-
links
|
175
|
-
end
|
176
|
-
|
177
155
|
# Extract urls from CSS page
|
178
156
|
#
|
179
|
-
# @param [
|
157
|
+
# @param [Spidr::Page] an Spidr::Page object
|
180
158
|
# @return [Array] Lists of urls
|
181
159
|
#
|
182
160
|
def extract_urls_from_css(page)
|
183
|
-
page.body.scan(/url\((['".\/\w-]+)\)/).inject([]) do |result, url|
|
161
|
+
page.body.scan(/url\((['".\/\w-]+)\)/).inject(Set[]) do |result, url|
|
184
162
|
url = url.first.gsub("'", "").gsub('"', '')
|
185
163
|
abs = page.to_absolute(URI.parse(url))
|
186
164
|
result << abs
|
187
165
|
end
|
188
166
|
end
|
189
167
|
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
168
|
+
# Extract imgs urls from page
|
169
|
+
#
|
170
|
+
# @param [Spidr::Page] an Spidr::Page object
|
171
|
+
# @return [Array] Lists of urls
|
172
|
+
#
|
173
|
+
def extract_imgs_from_page(page)
|
174
|
+
page.doc.search('//img[@src]').inject(Set[]) do |result, elem|
|
175
|
+
u = elem.attributes['src']
|
176
|
+
result << page.to_absolute(URI.parse(u))
|
177
|
+
end
|
195
178
|
end
|
196
179
|
|
197
180
|
##
|
@@ -220,5 +203,20 @@ module ValidateWebsite
|
|
220
203
|
end
|
221
204
|
end
|
222
205
|
|
206
|
+
# Fake http response for Spidr static crawling
|
207
|
+
# see https://github.com/ruby/ruby/blob/trunk/lib/net/http/response.rb
|
208
|
+
#
|
209
|
+
# @param [String] response body
|
210
|
+
# @param [Array] content types
|
211
|
+
# @return [Net::HTTPResponse] fake http response
|
212
|
+
def fake_http_response(body, content_types=['text/html', 'text/xhtml+xml'])
|
213
|
+
response = Net::HTTPResponse.new '1.1', 200, 'OK'
|
214
|
+
response.instance_variable_set(:@read, true)
|
215
|
+
response.body = body
|
216
|
+
content_types.each do |c|
|
217
|
+
response.add_field('content-type', c)
|
218
|
+
end
|
219
|
+
response
|
220
|
+
end
|
223
221
|
end
|
224
222
|
end
|
@@ -16,11 +16,6 @@ module ValidateWebsite
|
|
16
16
|
:color => true,
|
17
17
|
# internal verbose for ValidateWebsite
|
18
18
|
:validate_verbose => false,
|
19
|
-
# Anemone options see anemone/lib/anemone/core.rb
|
20
|
-
:verbose => false,
|
21
|
-
:cookies => nil,
|
22
|
-
:accept_cookies => true,
|
23
|
-
:redirect_limit => 0,
|
24
19
|
}
|
25
20
|
|
26
21
|
DEFAULT_OPTS_CRAWL = {
|
@@ -101,10 +96,6 @@ module ValidateWebsite
|
|
101
96
|
"Only report errors (Default: #{@@default_opts[:quiet]})") { |v|
|
102
97
|
options[:quiet] = v
|
103
98
|
}
|
104
|
-
o.on("-d", "--debug",
|
105
|
-
"Show anemone log (Default: #{@@default_opts[:verbose]})") { |v|
|
106
|
-
options[:verbose] = v
|
107
|
-
}
|
108
99
|
|
109
100
|
o.separator ""
|
110
101
|
o.on_tail("-h", "--help", "Show this help message.") { puts o; exit }
|
@@ -1,13 +1,13 @@
|
|
1
1
|
'\" t
|
2
2
|
.\" Title: validate-website-static
|
3
3
|
.\" Author: [see the "AUTHOR" section]
|
4
|
-
.\" Generator: DocBook XSL Stylesheets v1.
|
5
|
-
.\" Date:
|
4
|
+
.\" Generator: DocBook XSL Stylesheets v1.78.1 <http://docbook.sf.net/>
|
5
|
+
.\" Date: 09/20/2014
|
6
6
|
.\" Manual: \ \&
|
7
7
|
.\" Source: \ \&
|
8
8
|
.\" Language: English
|
9
9
|
.\"
|
10
|
-
.TH "VALIDATE\-WEBSITE\-S" "1" "
|
10
|
+
.TH "VALIDATE\-WEBSITE\-S" "1" "09/20/2014" "\ \&" "\ \&"
|
11
11
|
.\" -----------------------------------------------------------------
|
12
12
|
.\" * Define some portability stuff
|
13
13
|
.\" -----------------------------------------------------------------
|
data/man/man1/validate-website.1
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
'\" t
|
2
2
|
.\" Title: validate-website
|
3
3
|
.\" Author: [see the "AUTHOR" section]
|
4
|
-
.\" Generator: DocBook XSL Stylesheets v1.
|
5
|
-
.\" Date:
|
4
|
+
.\" Generator: DocBook XSL Stylesheets v1.78.1 <http://docbook.sf.net/>
|
5
|
+
.\" Date: 09/20/2014
|
6
6
|
.\" Manual: \ \&
|
7
7
|
.\" Source: \ \&
|
8
8
|
.\" Language: English
|
9
9
|
.\"
|
10
|
-
.TH "VALIDATE\-WEBSITE" "1" "
|
10
|
+
.TH "VALIDATE\-WEBSITE" "1" "09/20/2014" "\ \&" "\ \&"
|
11
11
|
.\" -----------------------------------------------------------------
|
12
12
|
.\" * Define some portability stuff
|
13
13
|
.\" -----------------------------------------------------------------
|
@@ -45,7 +45,7 @@ http://localhost:3000/)
|
|
45
45
|
.PP
|
46
46
|
\fB\-u\fR, \fB\-\-user\-agent\fR \fIUSERAGENT\fR
|
47
47
|
.RS 4
|
48
|
-
Change user agent (Default:
|
48
|
+
Change user agent (Default: Spidr\&.user_agent)
|
49
49
|
.RE
|
50
50
|
.PP
|
51
51
|
\fB\-e\fR, \fB\-\-exclude\fR \fIEXCLUDE\fR
|
@@ -95,11 +95,6 @@ Show detail of validator errors (Default: false)\&.
|
|
95
95
|
Only report errors (Default: false)\&.
|
96
96
|
.RE
|
97
97
|
.PP
|
98
|
-
\fB\-d\fR, \fB\-\-debug\fR
|
99
|
-
.RS 4
|
100
|
-
Show anemone log (Default: false)
|
101
|
-
.RE
|
102
|
-
.PP
|
103
98
|
\fB\-h\fR, \fB\-\-help\fR
|
104
99
|
.RS 4
|
105
100
|
Show help message and exit\&.
|
data/spec/core_spec.rb
CHANGED
@@ -5,7 +5,8 @@ describe ValidateWebsite::Core do
|
|
5
5
|
|
6
6
|
before do
|
7
7
|
WebMock.reset!
|
8
|
-
stub_request(:get, ValidateWebsite::Core::PING_URL).
|
8
|
+
stub_request(:get, ValidateWebsite::Core::PING_URL).to_return(:status => 200)
|
9
|
+
stub_request(:get, /#{SPEC_DOMAIN}/).to_return(:status => 200)
|
9
10
|
@validate_website = ValidateWebsite::Core.new(:color => false)
|
10
11
|
end
|
11
12
|
|
@@ -18,7 +19,7 @@ describe ValidateWebsite::Core do
|
|
18
19
|
:content_type => 'text/html')
|
19
20
|
@validate_website.site = page.url
|
20
21
|
@validate_website.crawl(:quiet => true)
|
21
|
-
@validate_website.
|
22
|
+
@validate_website.crawler.history.size.must_equal 5
|
22
23
|
end
|
23
24
|
|
24
25
|
it 'extract link' do
|
@@ -29,7 +30,7 @@ describe ValidateWebsite::Core do
|
|
29
30
|
:content_type => 'text/html')
|
30
31
|
@validate_website.site = page.url
|
31
32
|
@validate_website.crawl(:quiet => true)
|
32
|
-
@validate_website.
|
33
|
+
@validate_website.crawler.history.size.must_equal 98
|
33
34
|
end
|
34
35
|
end
|
35
36
|
|
@@ -43,7 +44,7 @@ describe ValidateWebsite::Core do
|
|
43
44
|
:content_type => 'text/css')
|
44
45
|
@validate_website.site = page.url
|
45
46
|
@validate_website.crawl(:quiet => true)
|
46
|
-
@validate_website.
|
47
|
+
@validate_website.crawler.history.size.must_equal 5
|
47
48
|
end
|
48
49
|
|
49
50
|
it "should extract url with single quote" do
|
@@ -52,7 +53,7 @@ describe ValidateWebsite::Core do
|
|
52
53
|
:content_type => 'text/css')
|
53
54
|
@validate_website.site = page.url
|
54
55
|
@validate_website.crawl(:quiet => true)
|
55
|
-
@validate_website.
|
56
|
+
@validate_website.crawler.history.size.must_equal 2
|
56
57
|
end
|
57
58
|
|
58
59
|
it "should extract url with double quote" do
|
@@ -61,7 +62,7 @@ describe ValidateWebsite::Core do
|
|
61
62
|
:content_type => 'text/css')
|
62
63
|
@validate_website.site = page.url
|
63
64
|
@validate_website.crawl(:quiet => true)
|
64
|
-
@validate_website.
|
65
|
+
@validate_website.crawler.history.size.must_equal 2
|
65
66
|
end
|
66
67
|
end
|
67
68
|
|
data/spec/spec_helper.rb
CHANGED
data/spec/validator_spec.rb
CHANGED
@@ -4,7 +4,7 @@ require File.expand_path('../spec_helper', __FILE__)
|
|
4
4
|
describe ValidateWebsite::Validator do
|
5
5
|
before do
|
6
6
|
WebMock.reset!
|
7
|
-
@http =
|
7
|
+
@http = Spidr::Agent.new
|
8
8
|
end
|
9
9
|
|
10
10
|
describe("xhtml1") do
|
@@ -15,7 +15,7 @@ describe ValidateWebsite::Validator do
|
|
15
15
|
page = FakePage.new(name,
|
16
16
|
:body => open(file).read,
|
17
17
|
:content_type => 'text/html')
|
18
|
-
@xhtml1_page = @http.
|
18
|
+
@xhtml1_page = @http.get_page(page.url)
|
19
19
|
validator = ValidateWebsite::Validator.new(@xhtml1_page.doc, @xhtml1_page.body)
|
20
20
|
validator.dtd.system_id.must_equal dtd_uri
|
21
21
|
validator.namespace.must_equal name
|
@@ -36,7 +36,7 @@ describe ValidateWebsite::Validator do
|
|
36
36
|
page = FakePage.new(name,
|
37
37
|
:body => open(file).read,
|
38
38
|
:content_type => 'text/html')
|
39
|
-
@html5_page = @http.
|
39
|
+
@html5_page = @http.get_page(page.url)
|
40
40
|
validator = ValidateWebsite::Validator.new(@html5_page.doc, @html5_page.body)
|
41
41
|
validator.valid?.must_equal true
|
42
42
|
end
|
@@ -46,7 +46,7 @@ describe ValidateWebsite::Validator do
|
|
46
46
|
page = FakePage.new(name,
|
47
47
|
:body => open(file).read,
|
48
48
|
:content_type => 'text/html')
|
49
|
-
@html5_page = @http.
|
49
|
+
@html5_page = @http.get_page(page.url)
|
50
50
|
validator = ValidateWebsite::Validator.new(@html5_page.doc, @html5_page.body)
|
51
51
|
validator.valid?.must_equal true
|
52
52
|
end
|
@@ -61,7 +61,7 @@ describe ValidateWebsite::Validator do
|
|
61
61
|
page = FakePage.new(name,
|
62
62
|
:body => open(file).read,
|
63
63
|
:content_type => 'text/html')
|
64
|
-
@html5_page = @http.
|
64
|
+
@html5_page = @http.get_page(page.url)
|
65
65
|
end
|
66
66
|
|
67
67
|
it 'should have an array of errors' do
|
@@ -85,7 +85,7 @@ describe ValidateWebsite::Validator do
|
|
85
85
|
page = FakePage.new(name,
|
86
86
|
:body => open(file).read,
|
87
87
|
:content_type => 'text/html')
|
88
|
-
@html4_strict_page = @http.
|
88
|
+
@html4_strict_page = @http.get_page(page.url)
|
89
89
|
validator = ValidateWebsite::Validator.new(@html4_strict_page.doc, @html4_strict_page.body)
|
90
90
|
validator.valid?.must_equal true
|
91
91
|
end
|
metadata
CHANGED
@@ -1,29 +1,29 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: validate-website
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.9.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Laurent Arnoud
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-09-
|
11
|
+
date: 2014-09-20 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
|
-
name:
|
14
|
+
name: spidr
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
17
|
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: '0.
|
19
|
+
version: '0.4'
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: '0.
|
26
|
+
version: '0.4'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: paint
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
@@ -250,7 +250,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
250
250
|
- !ruby/object:Gem::Version
|
251
251
|
version: '0'
|
252
252
|
requirements:
|
253
|
-
-
|
253
|
+
- spidr
|
254
254
|
- rainbow
|
255
255
|
- multipart_body
|
256
256
|
rubyforge_project:
|