validate-website 0.8.1 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 3c732e2a061a486000368013967fdd598da0b29a
4
- data.tar.gz: 9977f374877aa34f18a3740faf79e8f77f8ed474
3
+ metadata.gz: 7c6d263126db73e28c33fddb7ba58358f522d305
4
+ data.tar.gz: bc4d5405e1872d425dea1691456159d523b9c2d2
5
5
  SHA512:
6
- metadata.gz: 70fab85d94c458bf16a36a4780249ba3d7cfd2d591208a3266f74535520084ce3f9a922c633c46fd47e41ce73b3f113f3f1aef88571d0767e3d452fe33e83b83
7
- data.tar.gz: 3416c45ea279abf768d44cefeece27f2e8eb0bcb0d6552f67e0d8e91d914533270a72687fe4136f27b64ca0f72dd215de59dbedc64b851d9daff11ff81475834
6
+ metadata.gz: 3039cb7c6d5082a2d69915ddf2c1d102362049e5297899eb2aeba0b8b493edc32301aa3803a20411bbfb82bbb9fe05cbebc514083ae1aef9ae954769cd308521
7
+ data.tar.gz: 14682a11d7e68083aefecd1ab2ca978de5c3d257f0a8d6d5bcacf4427586f5814947f7ea0c3b52bd821d72c3e4dbc0803ae9c950b629576fa10bf05facec1758
@@ -7,7 +7,7 @@ require 'validate_website/option_parser'
7
7
  require 'validate_website/validator'
8
8
  require 'validate_website/colorful_messages'
9
9
 
10
- require 'anemone'
10
+ require 'spidr'
11
11
 
12
12
  module ValidateWebsite
13
13
 
@@ -15,7 +15,7 @@ module ValidateWebsite
15
15
  class Core
16
16
 
17
17
  attr_accessor :site
18
- attr_reader :options, :anemone
18
+ attr_reader :options, :crawler
19
19
 
20
20
  include ColorfulMessages
21
21
 
@@ -52,29 +52,35 @@ module ValidateWebsite
52
52
  #
53
53
  def crawl(opts={})
54
54
  opts = @options.merge(opts)
55
+ opts.merge!(:ignore_links => Regexp.new(opts[:exclude])) if opts[:exclude]
56
+
55
57
  puts color(:note, "validating #{@site}", opts[:color]) unless opts[:quiet]
56
58
  puts color(:warning, "No internet connection") unless internet_connection?
57
59
 
58
- @anemone = Anemone.crawl(@site, opts) do |anemone|
59
- anemone.skip_links_like Regexp.new(opts[:exclude]) if opts[:exclude]
60
+ @crawler = Spidr.site(@site, opts) do |crawler|
61
+ crawler.every_css_page do |page|
62
+ extract_urls_from_css(page).each do |u|
63
+ crawler.enqueue(u)
64
+ end
65
+ end
60
66
 
61
- # select the links on each page to follow (iframe, link, css url)
62
- anemone.focus_crawl { |page|
63
- page.links.concat(extract_urls(page))
64
- }
67
+ crawler.every_html_page do |page|
68
+ extract_imgs_from_page(page).each do |i|
69
+ crawler.enqueue(i)
70
+ end
65
71
 
66
- anemone.on_every_page { |page|
67
- url = page.url.to_s
68
- if opts[:markup_validation] && page.html? && page.fetched?
69
- validate(page.doc, page.body, url, opts)
72
+ if opts[:markup_validation] && page.html?
73
+ validate(page.doc, page.body, page.url, opts)
70
74
  end
75
+ end
71
76
 
72
- if opts[:not_found] && page.not_found?
77
+ crawler.every_failed_url do |url|
78
+ if opts[:not_found]
73
79
  @not_found_error = true
74
- puts color(:error, "%s linked in %s but not exist" % [url, page.referer], opts[:color])
80
+ puts color(:error, "%s linked but not exist" % [url], opts[:color])
75
81
  to_file(url)
76
82
  end
77
- }
83
+ end
78
84
  end
79
85
  end
80
86
 
@@ -93,17 +99,14 @@ module ValidateWebsite
93
99
  files.each do |f|
94
100
  next unless File.file?(f)
95
101
 
96
- page = Anemone::Page.new(URI.parse(opts[:site] + URI.encode(f)),
97
- :body => open(f).read,
98
- :headers => {'content-type' => ['text/html', 'application/xhtml+xml']})
102
+ response = fake_http_response(open(f).read)
103
+ page = Spidr::Page.new(URI.parse(opts[:site] + URI.encode(f)), response)
99
104
 
100
105
  if opts[:markup_validation]
101
106
  validate(page.doc, page.body, f)
102
107
  end
103
108
  if opts[:not_found]
104
- links = page.links
105
- links.concat extract_urls_from_img_script_iframe_link(page)
106
- check_static_not_found(links.uniq)
109
+ check_static_not_found(page.links)
107
110
  end
108
111
  end
109
112
  end
@@ -128,13 +131,6 @@ module ValidateWebsite
128
131
  end
129
132
  end
130
133
 
131
- def get_url(page, elem, attrname)
132
- u = elem.attributes[attrname].to_s
133
- return if u.nil? || u.empty?
134
- abs = page.to_absolute(u) rescue nil
135
- abs if abs && page.in_domain?(abs)
136
- end
137
-
138
134
  # check files linked on static document
139
135
  # see lib/validate_website/runner.rb
140
136
  def check_static_not_found(links, opts={})
@@ -143,8 +139,8 @@ module ValidateWebsite
143
139
  file_location = URI.parse(File.join(Dir.getwd, l.path)).path
144
140
  # Check CSS url()
145
141
  if File.exists?(file_location) && File.extname(file_location) == '.css'
146
- css_page = Anemone::Page.new(l, :body => File.read(file_location),
147
- :headers => {'content-type' => ['text/css']})
142
+ response = fake_http_response(open(file_location).read, ['text/css'])
143
+ css_page = Spidr::Page.new(l, response)
148
144
  links.concat extract_urls_from_css(css_page)
149
145
  links.uniq!
150
146
  end
@@ -156,42 +152,29 @@ module ValidateWebsite
156
152
  end
157
153
  end
158
154
 
159
- # Extract urls from img script iframe and link element
160
- #
161
- # @param [Anemone::Page] an Anemone::Page object
162
- # @return [Array] Lists of urls
163
- #
164
- def extract_urls_from_img_script_iframe_link(page)
165
- links = Set.new
166
- page.doc.css('img, script, iframe, link').each do |elem|
167
- if elem.name == 'link'
168
- url = get_url(page, elem, "href")
169
- else
170
- url = get_url(page, elem, "src")
171
- end
172
- links << url unless url.nil? || url.to_s.empty?
173
- end
174
- links
175
- end
176
-
177
155
  # Extract urls from CSS page
178
156
  #
179
- # @param [Anemone::Page] an Anemone::Page object
157
+ # @param [Spidr::Page] an Spidr::Page object
180
158
  # @return [Array] Lists of urls
181
159
  #
182
160
  def extract_urls_from_css(page)
183
- page.body.scan(/url\((['".\/\w-]+)\)/).inject([]) do |result, url|
161
+ page.body.scan(/url\((['".\/\w-]+)\)/).inject(Set[]) do |result, url|
184
162
  url = url.first.gsub("'", "").gsub('"', '')
185
163
  abs = page.to_absolute(URI.parse(url))
186
164
  result << abs
187
165
  end
188
166
  end
189
167
 
190
- def extract_urls(page)
191
- links = Set.new
192
- links.merge extract_urls_from_img_script_iframe_link(page) if page.html?
193
- links.merge extract_urls_from_css(page) if page.content_type == 'text/css'
194
- links.to_a
168
+ # Extract imgs urls from page
169
+ #
170
+ # @param [Spidr::Page] an Spidr::Page object
171
+ # @return [Array] Lists of urls
172
+ #
173
+ def extract_imgs_from_page(page)
174
+ page.doc.search('//img[@src]').inject(Set[]) do |result, elem|
175
+ u = elem.attributes['src']
176
+ result << page.to_absolute(URI.parse(u))
177
+ end
195
178
  end
196
179
 
197
180
  ##
@@ -220,5 +203,20 @@ module ValidateWebsite
220
203
  end
221
204
  end
222
205
 
206
+ # Fake http response for Spidr static crawling
207
+ # see https://github.com/ruby/ruby/blob/trunk/lib/net/http/response.rb
208
+ #
209
+ # @param [String] response body
210
+ # @param [Array] content types
211
+ # @return [Net::HTTPResponse] fake http response
212
+ def fake_http_response(body, content_types=['text/html', 'text/xhtml+xml'])
213
+ response = Net::HTTPResponse.new '1.1', 200, 'OK'
214
+ response.instance_variable_set(:@read, true)
215
+ response.body = body
216
+ content_types.each do |c|
217
+ response.add_field('content-type', c)
218
+ end
219
+ response
220
+ end
223
221
  end
224
222
  end
@@ -16,11 +16,6 @@ module ValidateWebsite
16
16
  :color => true,
17
17
  # internal verbose for ValidateWebsite
18
18
  :validate_verbose => false,
19
- # Anemone options see anemone/lib/anemone/core.rb
20
- :verbose => false,
21
- :cookies => nil,
22
- :accept_cookies => true,
23
- :redirect_limit => 0,
24
19
  }
25
20
 
26
21
  DEFAULT_OPTS_CRAWL = {
@@ -101,10 +96,6 @@ module ValidateWebsite
101
96
  "Only report errors (Default: #{@@default_opts[:quiet]})") { |v|
102
97
  options[:quiet] = v
103
98
  }
104
- o.on("-d", "--debug",
105
- "Show anemone log (Default: #{@@default_opts[:verbose]})") { |v|
106
- options[:verbose] = v
107
- }
108
99
 
109
100
  o.separator ""
110
101
  o.on_tail("-h", "--help", "Show this help message.") { puts o; exit }
@@ -1,13 +1,13 @@
1
1
  '\" t
2
2
  .\" Title: validate-website-static
3
3
  .\" Author: [see the "AUTHOR" section]
4
- .\" Generator: DocBook XSL Stylesheets v1.76.1 <http://docbook.sf.net/>
5
- .\" Date: 04/05/2012
4
+ .\" Generator: DocBook XSL Stylesheets v1.78.1 <http://docbook.sf.net/>
5
+ .\" Date: 09/20/2014
6
6
  .\" Manual: \ \&
7
7
  .\" Source: \ \&
8
8
  .\" Language: English
9
9
  .\"
10
- .TH "VALIDATE\-WEBSITE\-S" "1" "04/05/2012" "\ \&" "\ \&"
10
+ .TH "VALIDATE\-WEBSITE\-S" "1" "09/20/2014" "\ \&" "\ \&"
11
11
  .\" -----------------------------------------------------------------
12
12
  .\" * Define some portability stuff
13
13
  .\" -----------------------------------------------------------------
@@ -1,13 +1,13 @@
1
1
  '\" t
2
2
  .\" Title: validate-website
3
3
  .\" Author: [see the "AUTHOR" section]
4
- .\" Generator: DocBook XSL Stylesheets v1.76.1 <http://docbook.sf.net/>
5
- .\" Date: 04/05/2012
4
+ .\" Generator: DocBook XSL Stylesheets v1.78.1 <http://docbook.sf.net/>
5
+ .\" Date: 09/20/2014
6
6
  .\" Manual: \ \&
7
7
  .\" Source: \ \&
8
8
  .\" Language: English
9
9
  .\"
10
- .TH "VALIDATE\-WEBSITE" "1" "04/05/2012" "\ \&" "\ \&"
10
+ .TH "VALIDATE\-WEBSITE" "1" "09/20/2014" "\ \&" "\ \&"
11
11
  .\" -----------------------------------------------------------------
12
12
  .\" * Define some portability stuff
13
13
  .\" -----------------------------------------------------------------
@@ -45,7 +45,7 @@ http://localhost:3000/)
45
45
  .PP
46
46
  \fB\-u\fR, \fB\-\-user\-agent\fR \fIUSERAGENT\fR
47
47
  .RS 4
48
- Change user agent (Default: Anemone/VERSION)
48
+ Change user agent (Default: Spidr\&.user_agent)
49
49
  .RE
50
50
  .PP
51
51
  \fB\-e\fR, \fB\-\-exclude\fR \fIEXCLUDE\fR
@@ -95,11 +95,6 @@ Show detail of validator errors (Default: false)\&.
95
95
  Only report errors (Default: false)\&.
96
96
  .RE
97
97
  .PP
98
- \fB\-d\fR, \fB\-\-debug\fR
99
- .RS 4
100
- Show anemone log (Default: false)
101
- .RE
102
- .PP
103
98
  \fB\-h\fR, \fB\-\-help\fR
104
99
  .RS 4
105
100
  Show help message and exit\&.
@@ -5,7 +5,8 @@ describe ValidateWebsite::Core do
5
5
 
6
6
  before do
7
7
  WebMock.reset!
8
- stub_request(:get, ValidateWebsite::Core::PING_URL).with(:status => 200)
8
+ stub_request(:get, ValidateWebsite::Core::PING_URL).to_return(:status => 200)
9
+ stub_request(:get, /#{SPEC_DOMAIN}/).to_return(:status => 200)
9
10
  @validate_website = ValidateWebsite::Core.new(:color => false)
10
11
  end
11
12
 
@@ -18,7 +19,7 @@ describe ValidateWebsite::Core do
18
19
  :content_type => 'text/html')
19
20
  @validate_website.site = page.url
20
21
  @validate_website.crawl(:quiet => true)
21
- @validate_website.anemone.pages.size.must_equal 5
22
+ @validate_website.crawler.history.size.must_equal 5
22
23
  end
23
24
 
24
25
  it 'extract link' do
@@ -29,7 +30,7 @@ describe ValidateWebsite::Core do
29
30
  :content_type => 'text/html')
30
31
  @validate_website.site = page.url
31
32
  @validate_website.crawl(:quiet => true)
32
- @validate_website.anemone.pages.size.must_equal 98
33
+ @validate_website.crawler.history.size.must_equal 98
33
34
  end
34
35
  end
35
36
 
@@ -43,7 +44,7 @@ describe ValidateWebsite::Core do
43
44
  :content_type => 'text/css')
44
45
  @validate_website.site = page.url
45
46
  @validate_website.crawl(:quiet => true)
46
- @validate_website.anemone.pages.size.must_equal 5
47
+ @validate_website.crawler.history.size.must_equal 5
47
48
  end
48
49
 
49
50
  it "should extract url with single quote" do
@@ -52,7 +53,7 @@ describe ValidateWebsite::Core do
52
53
  :content_type => 'text/css')
53
54
  @validate_website.site = page.url
54
55
  @validate_website.crawl(:quiet => true)
55
- @validate_website.anemone.pages.size.must_equal 2
56
+ @validate_website.crawler.history.size.must_equal 2
56
57
  end
57
58
 
58
59
  it "should extract url with double quote" do
@@ -61,7 +62,7 @@ describe ValidateWebsite::Core do
61
62
  :content_type => 'text/css')
62
63
  @validate_website.site = page.url
63
64
  @validate_website.crawl(:quiet => true)
64
- @validate_website.anemone.pages.size.must_equal 2
65
+ @validate_website.crawler.history.size.must_equal 2
65
66
  end
66
67
  end
67
68
 
@@ -1,7 +1,7 @@
1
1
  # encoding: UTF-8
2
2
  require 'minitest/autorun'
3
3
  require_relative 'webmock_helper'
4
- require 'anemone'
4
+ require 'spidr'
5
5
  require 'pry'
6
6
 
7
7
  require 'validate_website/core'
@@ -4,7 +4,7 @@ require File.expand_path('../spec_helper', __FILE__)
4
4
  describe ValidateWebsite::Validator do
5
5
  before do
6
6
  WebMock.reset!
7
- @http = Anemone::HTTP.new
7
+ @http = Spidr::Agent.new
8
8
  end
9
9
 
10
10
  describe("xhtml1") do
@@ -15,7 +15,7 @@ describe ValidateWebsite::Validator do
15
15
  page = FakePage.new(name,
16
16
  :body => open(file).read,
17
17
  :content_type => 'text/html')
18
- @xhtml1_page = @http.fetch_page(page.url)
18
+ @xhtml1_page = @http.get_page(page.url)
19
19
  validator = ValidateWebsite::Validator.new(@xhtml1_page.doc, @xhtml1_page.body)
20
20
  validator.dtd.system_id.must_equal dtd_uri
21
21
  validator.namespace.must_equal name
@@ -36,7 +36,7 @@ describe ValidateWebsite::Validator do
36
36
  page = FakePage.new(name,
37
37
  :body => open(file).read,
38
38
  :content_type => 'text/html')
39
- @html5_page = @http.fetch_page(page.url)
39
+ @html5_page = @http.get_page(page.url)
40
40
  validator = ValidateWebsite::Validator.new(@html5_page.doc, @html5_page.body)
41
41
  validator.valid?.must_equal true
42
42
  end
@@ -46,7 +46,7 @@ describe ValidateWebsite::Validator do
46
46
  page = FakePage.new(name,
47
47
  :body => open(file).read,
48
48
  :content_type => 'text/html')
49
- @html5_page = @http.fetch_page(page.url)
49
+ @html5_page = @http.get_page(page.url)
50
50
  validator = ValidateWebsite::Validator.new(@html5_page.doc, @html5_page.body)
51
51
  validator.valid?.must_equal true
52
52
  end
@@ -61,7 +61,7 @@ describe ValidateWebsite::Validator do
61
61
  page = FakePage.new(name,
62
62
  :body => open(file).read,
63
63
  :content_type => 'text/html')
64
- @html5_page = @http.fetch_page(page.url)
64
+ @html5_page = @http.get_page(page.url)
65
65
  end
66
66
 
67
67
  it 'should have an array of errors' do
@@ -85,7 +85,7 @@ describe ValidateWebsite::Validator do
85
85
  page = FakePage.new(name,
86
86
  :body => open(file).read,
87
87
  :content_type => 'text/html')
88
- @html4_strict_page = @http.fetch_page(page.url)
88
+ @html4_strict_page = @http.get_page(page.url)
89
89
  validator = ValidateWebsite::Validator.new(@html4_strict_page.doc, @html4_strict_page.body)
90
90
  validator.valid?.must_equal true
91
91
  end
metadata CHANGED
@@ -1,29 +1,29 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: validate-website
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.8.1
4
+ version: 0.9.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Laurent Arnoud
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-09-18 00:00:00.000000000 Z
11
+ date: 2014-09-20 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
- name: anemone
14
+ name: spidr
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
17
  - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: '0.7'
19
+ version: '0.4'
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: '0.7'
26
+ version: '0.4'
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: paint
29
29
  requirement: !ruby/object:Gem::Requirement
@@ -250,7 +250,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
250
250
  - !ruby/object:Gem::Version
251
251
  version: '0'
252
252
  requirements:
253
- - anemone
253
+ - spidr
254
254
  - rainbow
255
255
  - multipart_body
256
256
  rubyforge_project: