validate-website 0.7.0 → 0.7.1

Sign up to get free protection for your applications and to get access to all the features.
data/README.rdoc CHANGED
@@ -62,6 +62,13 @@ HTML5 support with Validator.nu Web Service.
62
62
  66::
63
63
  There are not valid markup and pages not found.
64
64
 
65
+ == On your application ==
66
+
67
+ require 'validate_website/validator'
68
+ body = '<!DOCTYPE html><html></html>'
69
+ v = ValidateWebsite::Validator.new(Nokogiri::HTML(body), body)
70
+ v.valid? # => false
71
+
65
72
  == REQUIREMENTS
66
73
 
67
74
  See validate-website.gemspec file.
data/Rakefile CHANGED
@@ -1,9 +1,9 @@
1
- require 'rake/rdoctask'
2
- require "rspec/core/rake_task" # RSpec 2.0
1
+ require 'rdoc/task'
2
+ require 'rake/testtask'
3
3
 
4
4
  task :default => [:test]
5
5
 
6
- Rake::RDocTask.new do |rd|
6
+ RDoc::Task.new do |rd|
7
7
  rd.main = "README.rdoc"
8
8
  rd.rdoc_files.include("README.rdoc", "lib/**/*.rb")
9
9
  end
@@ -13,9 +13,7 @@ task :manpage do
13
13
  system('find doc/ -type f -exec a2x -f manpage -D man/man1 {} \;')
14
14
  end
15
15
 
16
- # RSpec 2.0
17
- RSpec::Core::RakeTask.new(:test) do |spec|
18
- spec.pattern = 'spec/*_spec.rb'
19
- spec.rspec_opts = ['--backtrace']
16
+ Rake::TestTask.new do |t|
17
+ t.pattern = "spec/*_spec.rb"
20
18
  end
21
19
  task :spec => :test
data/bin/validate-website CHANGED
@@ -1,14 +1,6 @@
1
1
  #!/usr/bin/env ruby
2
2
  # encoding: utf-8
3
3
 
4
- developer_mode = false
5
- developer_mode = true if __FILE__ == $0
6
- if developer_mode
7
- lib_dir = File.join(File.dirname(__FILE__), '..', 'lib')
8
- $:.unshift(File.expand_path(lib_dir))
9
- require 'rubygems'
10
- end
11
-
12
4
  require 'validate_website/runner'
13
5
  exit_status = ValidateWebsite::Runner.run_crawl(ARGV)
14
6
  exit(exit_status)
@@ -1,14 +1,6 @@
1
1
  #!/usr/bin/env ruby
2
2
  # encoding: utf-8
3
3
 
4
- developer_mode = false
5
- developer_mode = true if __FILE__ == $0
6
- if developer_mode
7
- lib_dir = File.join(File.dirname(__FILE__), '..', 'lib')
8
- $:.unshift(File.expand_path(lib_dir))
9
- require 'rubygems'
10
- end
11
-
12
4
  require 'validate_website/runner'
13
5
  exit_status = ValidateWebsite::Runner.run_static(ARGV)
14
6
  exit(exit_status)
@@ -2,8 +2,8 @@
2
2
  This is the HTML 4.01 Frameset DTD, which should be
3
3
  used for documents with frames. This DTD is identical
4
4
  to the HTML 4.01 Transitional DTD except for the
5
- content model of the "HTML" element: in frameset
6
- documents, the "FRAMESET" element replaces the "BODY"
5
+ content model of the "HTML" element: in frameset
6
+ documents, the "FRAMESET" element replaces the "BODY"
7
7
  element.
8
8
 
9
9
  Draft: $Date: 1999/12/24 23:37:45 $
@@ -56,7 +56,7 @@
56
56
  presentation control for user agents that don't (adequately)
57
57
  support style sheets.
58
58
 
59
- If you are writing a document that includes frames, use
59
+ If you are writing a document that includes frames, use
60
60
  the following FPI:
61
61
 
62
62
  "-//W3C//DTD HTML 4.01 Frameset//EN"
@@ -65,7 +65,7 @@
65
65
 
66
66
  http://www.w3.org/TR/1999/REC-html401-19991224/frameset.dtd
67
67
 
68
- Use the following (relative) URIs to refer to
68
+ Use the following (relative) URIs to refer to
69
69
  the DTDs and entity definitions of this specification:
70
70
 
71
71
  "strict.dtd"
@@ -102,7 +102,7 @@
102
102
  -->
103
103
 
104
104
  <!ENTITY % Character "CDATA"
105
- -- a single character from [ISO10646]
105
+ -- a single character from [ISO10646]
106
106
  -->
107
107
 
108
108
  <!ENTITY % LinkTypes "CDATA"
@@ -427,8 +427,8 @@
427
427
 
428
428
  <!ENTITY % IAlign "(top|middle|bottom|left|right)" -- center? -->
429
429
 
430
- <!-- To avoid problems with text-only UAs as well as
431
- to make image content understandable and navigable
430
+ <!-- To avoid problems with text-only UAs as well as
431
+ to make image content understandable and navigable
432
432
  to users of non-visual UAs, you need to provide
433
433
  a description with ALT, and avoid server-side image maps -->
434
434
  <!ELEMENT IMG - O EMPTY -- Embedded image -->
@@ -454,7 +454,7 @@
454
454
 
455
455
  <!--==================== OBJECT ======================================-->
456
456
  <!--
457
- OBJECT is used to embed objects as part of HTML pages
457
+ OBJECT is used to embed objects as part of HTML pages
458
458
  PARAM elements should precede other content. SGML mixed content
459
459
  model technicality precludes specifying this formally ...
460
460
  -->
@@ -820,7 +820,7 @@
820
820
  -->
821
821
 
822
822
  <!ENTITY % TRules "(none | groups | rows | cols | all)">
823
-
823
+
824
824
  <!-- horizontal placement of table relative to document -->
825
825
  <!ENTITY % TAlign "(left|center|right)">
826
826
 
@@ -964,8 +964,8 @@ several semantically related columns together.
964
964
  default: 100% (1 row) --
965
965
  cols %MultiLengths; #IMPLIED -- list of lengths,
966
966
  default: 100% (1 col) --
967
- onload %Script; #IMPLIED -- all the frames have been loaded --
968
- onunload %Script; #IMPLIED -- all the frames have been removed --
967
+ onload %Script; #IMPLIED -- all the frames have been loaded --
968
+ onunload %Script; #IMPLIED -- all the frames have been removed --
969
969
  >
970
970
  ]]>
971
971
 
@@ -1,10 +1,10 @@
1
1
  <!--
2
- This is HTML 4.01 Strict DTD, which excludes the presentation
3
- attributes and elements that W3C expects to phase out as
2
+ This is HTML 4.01 Strict DTD, which excludes the presentation
3
+ attributes and elements that W3C expects to phase out as
4
4
  support for style sheets matures. Authors should use the Strict
5
5
  DTD when possible, but may use the Transitional DTD when support
6
6
  for presentation attribute and elements is required.
7
-
7
+
8
8
  HTML 4 includes mechanisms for style sheets, scripting,
9
9
  embedding objects, improved support for right to left and mixed
10
10
  direction text, and enhancements to forms for improved
@@ -52,7 +52,7 @@
52
52
 
53
53
  http://www.w3.org/TR/1999/REC-html401-19991224/loose.dtd
54
54
 
55
- If you are writing a document that includes frames, use
55
+ If you are writing a document that includes frames, use
56
56
  the following FPI:
57
57
 
58
58
  "-//W3C//DTD HTML 4.01 Frameset//EN"
@@ -61,7 +61,7 @@
61
61
 
62
62
  http://www.w3.org/TR/1999/REC-html401-19991224/frameset.dtd
63
63
 
64
- Use the following (relative) URIs to refer to
64
+ Use the following (relative) URIs to refer to
65
65
  the DTDs and entity definitions of this specification:
66
66
 
67
67
  "strict.dtd"
@@ -98,7 +98,7 @@
98
98
  -->
99
99
 
100
100
  <!ENTITY % Character "CDATA"
101
- -- a single character from [ISO10646]
101
+ -- a single character from [ISO10646]
102
102
  -->
103
103
 
104
104
  <!ENTITY % LinkTypes "CDATA"
@@ -367,8 +367,8 @@
367
367
  <!ENTITY % Pixels "CDATA" -- integer representing length in pixels -->
368
368
 
369
369
 
370
- <!-- To avoid problems with text-only UAs as well as
371
- to make image content understandable and navigable
370
+ <!-- To avoid problems with text-only UAs as well as
371
+ to make image content understandable and navigable
372
372
  to users of non-visual UAs, you need to provide
373
373
  a description with ALT, and avoid server-side image maps -->
374
374
  <!ELEMENT IMG - O EMPTY -- Embedded image -->
@@ -390,7 +390,7 @@
390
390
 
391
391
  <!--==================== OBJECT ======================================-->
392
392
  <!--
393
- OBJECT is used to embed objects as part of HTML pages
393
+ OBJECT is used to embed objects as part of HTML pages
394
394
  PARAM elements should precede other content. SGML mixed content
395
395
  model technicality precludes specifying this formally ...
396
396
  -->
@@ -686,7 +686,7 @@
686
686
  -->
687
687
 
688
688
  <!ENTITY % TRules "(none | groups | rows | cols | all)">
689
-
689
+
690
690
  <!-- horizontal placement of table relative to document -->
691
691
  <!ENTITY % TAlign "(left|center|right)">
692
692
 
@@ -14,10 +14,10 @@
14
14
  for Informatics and Mathematics, Keio University).
15
15
  All Rights Reserved.
16
16
 
17
- Permission to use, copy, modify and distribute the XHTML DTD and its
18
- accompanying documentation for any purpose and without fee is hereby
19
- granted in perpetuity, provided that the above copyright notice and
20
- this paragraph appear in all copies. The copyright holders make no
17
+ Permission to use, copy, modify and distribute the XHTML DTD and its
18
+ accompanying documentation for any purpose and without fee is hereby
19
+ granted in perpetuity, provided that the above copyright notice and
20
+ this paragraph appear in all copies. The copyright holders make no
21
21
  representation about the suitability of the DTD for any purpose.
22
22
 
23
23
  It is provided "as is" without expressed or implied warranty.
@@ -18,7 +18,7 @@
18
18
  existing ISO 8879 entity names. ISO 10646 character numbers
19
19
  are given for each character, in hex. values are decimal
20
20
  conversions of the ISO 10646 values and refer to the document
21
- character set. Names are Unicode names.
21
+ character set. Names are Unicode names.
22
22
  -->
23
23
 
24
24
  <!-- C0 Controls and Basic Latin -->
@@ -18,7 +18,7 @@
18
18
  existing ISO 8879 entity names. ISO 10646 character numbers
19
19
  are given for each character, in hex. values are decimal
20
20
  conversions of the ISO 10646 values and refer to the document
21
- character set. Names are Unicode names.
21
+ character set. Names are Unicode names.
22
22
  -->
23
23
 
24
24
  <!-- Latin Extended-B -->
@@ -144,7 +144,7 @@
144
144
  <!ENTITY uArr "&#8657;"> <!-- upwards double arrow, U+21D1 ISOamsa -->
145
145
  <!ENTITY rArr "&#8658;"> <!-- rightwards double arrow,
146
146
  U+21D2 ISOtech -->
147
- <!-- Unicode does not say this is the 'implies' character but does not have
147
+ <!-- Unicode does not say this is the 'implies' character but does not have
148
148
  another character with this function so rArr can be used for 'implies'
149
149
  as ISOtech suggests -->
150
150
  <!ENTITY dArr "&#8659;"> <!-- downwards double arrow, U+21D3 ISOamsa -->
@@ -217,11 +217,11 @@
217
217
  <!ENTITY rfloor "&#8971;"> <!-- right floor, U+230B ISOamsc -->
218
218
  <!ENTITY lang "&#9001;"> <!-- left-pointing angle bracket = bra,
219
219
  U+2329 ISOtech -->
220
- <!-- lang is NOT the same character as U+003C 'less than sign'
220
+ <!-- lang is NOT the same character as U+003C 'less than sign'
221
221
  or U+2039 'single left-pointing angle quotation mark' -->
222
222
  <!ENTITY rang "&#9002;"> <!-- right-pointing angle bracket = ket,
223
223
  U+232A ISOtech -->
224
- <!-- rang is NOT the same character as U+003E 'greater than sign'
224
+ <!-- rang is NOT the same character as U+003E 'greater than sign'
225
225
  or U+203A 'single right-pointing angle quotation mark' -->
226
226
 
227
227
  <!-- Geometric Shapes -->
@@ -9,7 +9,7 @@
9
9
  For further information, see: http://www.w3.org/TR/xhtml1
10
10
 
11
11
  Copyright (c) 1998-2002 W3C (MIT, INRIA, Keio),
12
- All Rights Reserved.
12
+ All Rights Reserved.
13
13
 
14
14
  This DTD module is identified by the PUBLIC and SYSTEM identifiers:
15
15
 
@@ -199,7 +199,7 @@
199
199
 
200
200
  <!ENTITY % special.extra
201
201
  "object | applet | img | map | iframe">
202
-
202
+
203
203
  <!ENTITY % special.basic
204
204
  "br | span | bdo">
205
205
 
@@ -304,7 +304,7 @@
304
304
  window title. Exactly one title is required per document.
305
305
  -->
306
306
  <!ELEMENT title (#PCDATA)>
307
- <!ATTLIST title
307
+ <!ATTLIST title
308
308
  %i18n;
309
309
  id ID #IMPLIED
310
310
  >
@@ -550,14 +550,14 @@
550
550
  start %Number; #IMPLIED
551
551
  >
552
552
 
553
- <!-- single column list (DEPRECATED) -->
553
+ <!-- single column list (DEPRECATED) -->
554
554
  <!ELEMENT menu (li)+>
555
555
  <!ATTLIST menu
556
556
  %attrs;
557
557
  compact (compact) #IMPLIED
558
558
  >
559
559
 
560
- <!-- multiple column list (DEPRECATED) -->
560
+ <!-- multiple column list (DEPRECATED) -->
561
561
  <!ELEMENT dir (li)+>
562
562
  <!ATTLIST dir
563
563
  %attrs;
@@ -616,7 +616,7 @@
616
616
 
617
617
  <!--=================== Preformatted Text ================================-->
618
618
 
619
- <!-- content is %Inline; excluding
619
+ <!-- content is %Inline; excluding
620
620
  "img|object|applet|big|small|sub|sup|font|basefont" -->
621
621
 
622
622
  <!ELEMENT pre %pre.content;>
@@ -1034,7 +1034,7 @@
1034
1034
 
1035
1035
  <!--
1036
1036
  Content is %Flow; excluding a, form, form controls, iframe
1037
- -->
1037
+ -->
1038
1038
  <!ELEMENT button %button.content;> <!-- push button -->
1039
1039
  <!ATTLIST button
1040
1040
  %attrs;
@@ -1075,7 +1075,7 @@
1075
1075
  -->
1076
1076
 
1077
1077
  <!ENTITY % TRules "(none | groups | rows | cols | all)">
1078
-
1078
+
1079
1079
  <!-- horizontal placement of table relative to document -->
1080
1080
  <!ENTITY % TAlign "(left|center|right)">
1081
1081
 
@@ -9,7 +9,7 @@
9
9
  For further information, see: http://www.w3.org/TR/xhtml1
10
10
 
11
11
  Copyright (c) 1998-2002 W3C (MIT, INRIA, Keio),
12
- All Rights Reserved.
12
+ All Rights Reserved.
13
13
 
14
14
  This DTD module is identified by the PUBLIC and SYSTEM identifiers:
15
15
 
@@ -263,7 +263,7 @@
263
263
  window title. Exactly one title is required per document.
264
264
  -->
265
265
  <!ELEMENT title (#PCDATA)>
266
- <!ATTLIST title
266
+ <!ATTLIST title
267
267
  %i18n;
268
268
  id ID #IMPLIED
269
269
  >
@@ -801,7 +801,7 @@
801
801
 
802
802
  <!--
803
803
  Content is %Flow; excluding a, form and form controls
804
- -->
804
+ -->
805
805
  <!ELEMENT button %button.content;> <!-- push button -->
806
806
  <!ATTLIST button
807
807
  %attrs;
@@ -834,7 +834,7 @@
834
834
  -->
835
835
 
836
836
  <!ENTITY % TRules "(none | groups | rows | cols | all)">
837
-
837
+
838
838
  <!-- horizontal alignment attributes for cell contents
839
839
 
840
840
  char alignment char, e.g. char=':'
@@ -9,7 +9,7 @@
9
9
  For further information, see: http://www.w3.org/TR/xhtml1
10
10
 
11
11
  Copyright (c) 1998-2002 W3C (MIT, INRIA, Keio),
12
- All Rights Reserved.
12
+ All Rights Reserved.
13
13
 
14
14
  This DTD module is identified by the PUBLIC and SYSTEM identifiers:
15
15
 
@@ -196,7 +196,7 @@
196
196
 
197
197
  <!ENTITY % special.extra
198
198
  "object | applet | img | map | iframe">
199
-
199
+
200
200
  <!ENTITY % special.basic
201
201
  "br | span | bdo">
202
202
 
@@ -299,7 +299,7 @@
299
299
  window title. Exactly one title is required per document.
300
300
  -->
301
301
  <!ELEMENT title (#PCDATA)>
302
- <!ATTLIST title
302
+ <!ATTLIST title
303
303
  %i18n;
304
304
  id ID #IMPLIED
305
305
  >
@@ -517,14 +517,14 @@
517
517
  start %Number; #IMPLIED
518
518
  >
519
519
 
520
- <!-- single column list (DEPRECATED) -->
520
+ <!-- single column list (DEPRECATED) -->
521
521
  <!ELEMENT menu (li)+>
522
522
  <!ATTLIST menu
523
523
  %attrs;
524
524
  compact (compact) #IMPLIED
525
525
  >
526
526
 
527
- <!-- multiple column list (DEPRECATED) -->
527
+ <!-- multiple column list (DEPRECATED) -->
528
528
  <!ELEMENT dir (li)+>
529
529
  <!ATTLIST dir
530
530
  %attrs;
@@ -583,7 +583,7 @@
583
583
 
584
584
  <!--=================== Preformatted Text ================================-->
585
585
 
586
- <!-- content is %Inline; excluding
586
+ <!-- content is %Inline; excluding
587
587
  "img|object|applet|big|small|sub|sup|font|basefont" -->
588
588
 
589
589
  <!ELEMENT pre %pre.content;>
@@ -1000,7 +1000,7 @@
1000
1000
 
1001
1001
  <!--
1002
1002
  Content is %Flow; excluding a, form, form controls, iframe
1003
- -->
1003
+ -->
1004
1004
  <!ELEMENT button %button.content;> <!-- push button -->
1005
1005
  <!ATTLIST button
1006
1006
  %attrs;
@@ -1041,7 +1041,7 @@
1041
1041
  -->
1042
1042
 
1043
1043
  <!ENTITY % TRules "(none | groups | rows | cols | all)">
1044
-
1044
+
1045
1045
  <!-- horizontal placement of table relative to document -->
1046
1046
  <!ENTITY % TAlign "(left|center|right)">
1047
1047
 
@@ -37,9 +37,18 @@ module ValidateWebsite
37
37
  @site = @options[:site]
38
38
  end
39
39
 
40
+ ##
41
+ #
42
+ # @param [Hash] options
43
+ # :quiet [Boolean] no output (true, false)
44
+ # :color [Boolean] color output (true, false)
45
+ # :exclude [String] a String used by Regexp.new
46
+ # :markup_validation [Boolean] Check the markup validity
47
+ # :not_found [Boolean] Check for not found page (404)
48
+ #
40
49
  def crawl(opts={})
41
50
  opts = @options.merge(opts)
42
- puts color(:note, "validating #{@site}", opts[:color])
51
+ puts color(:note, "validating #{@site}", opts[:color]) unless opts[:quiet]
43
52
 
44
53
  @anemone = Anemone.crawl(@site, opts) do |anemone|
45
54
  anemone.skip_links_like Regexp.new(opts[:exclude]) if opts[:exclude]
@@ -87,7 +96,8 @@ module ValidateWebsite
87
96
  files.each do |f|
88
97
  next unless File.file?(f)
89
98
 
90
- page = Anemone::Page.new(URI.parse(opts[:site] + f), :body => open(f).read,
99
+ page = Anemone::Page.new(URI.parse(opts[:site] + URI.encode(f)),
100
+ :body => open(f).read,
91
101
  :headers => {'content-type' => ['text/html', 'application/xhtml+xml']})
92
102
 
93
103
  if opts[:markup_validation]
@@ -180,6 +190,14 @@ module ValidateWebsite
180
190
  end
181
191
  end
182
192
 
193
+ ##
194
+ # @param [Nokogiri::HTML::Document] original_doc
195
+ # @param [String] The raw HTTP response body of the page
196
+ # @param [String] url
197
+ # @param [Hash] options
198
+ # :quiet no output (true, false)
199
+ # :color color output (true, false)
200
+ #
183
201
  def validate(doc, body, url, opts={})
184
202
  opts = @options.merge(opts)
185
203
  validator = Validator.new(doc, body)
@@ -1,4 +1,6 @@
1
1
  # encoding: utf-8
2
+ require 'uri'
3
+ require 'nokogiri'
2
4
 
3
5
  module ValidateWebsite
4
6
  class Validator
@@ -6,6 +8,9 @@ module ValidateWebsite
6
8
 
7
9
  attr_reader :original_doc, :body, :dtd, :doc, :namespace, :xsd, :errors
8
10
 
11
+ ##
12
+ # @param [Nokogiri::HTML::Document] original_doc
13
+ # @param [String] The raw HTTP response body of the page
9
14
  def initialize(original_doc, body)
10
15
  @original_doc = original_doc
11
16
  @body = body
@@ -37,6 +42,7 @@ module ValidateWebsite
37
42
  @errors = @xsd.validate(@doc)
38
43
  elsif document =~ /^\<!DOCTYPE html\>/i
39
44
  # TODO: use a local Java, Python parser... write a Ruby HTML5 parser ?
45
+ require 'net/http'
40
46
  require 'multipart_body'
41
47
  url = URI.parse('http://validator.nu/')
42
48
  multipart = MultipartBody.new(:content => document)
@@ -63,6 +69,8 @@ module ValidateWebsite
63
69
  @errors << e
64
70
  end
65
71
 
72
+ ##
73
+ # @return [Boolean]
66
74
  def valid?
67
75
  @errors.length == 0
68
76
  end
data/spec/core_spec.rb CHANGED
@@ -1,56 +1,65 @@
1
- require File.expand_path(File.join(File.dirname(__FILE__), 'spec_helper'))
1
+ require File.expand_path('../spec_helper', __FILE__)
2
2
 
3
- module ValidateWebsite
4
- describe Core do
3
+ describe ValidateWebsite::Core do
5
4
 
6
- before(:each) do
7
- FakeWeb.clean_registry
8
- @validate_website = ValidateWebsite::Core.new(:color => false)
9
- end
5
+ before do
6
+ FakeWeb.clean_registry
7
+ @validate_website = ValidateWebsite::Core.new(:color => false)
8
+ end
10
9
 
11
- context('html') do
12
- it "should extract url" do
13
- name = 'xhtml1-strict'
14
- file = File.join('spec', 'data', "#{name}.html")
15
- page = FakePage.new(name,
16
- :body => open(file).read,
17
- :content_type => 'text/html')
18
- @validate_website.site = page.url
19
- @validate_website.crawl(:quiet => true)
20
- @validate_website.anemone.should have(5).pages
21
- end
10
+ describe('html') do
11
+ it "extract url" do
12
+ name = 'xhtml1-strict'
13
+ file = File.join('spec', 'data', "#{name}.html")
14
+ page = FakePage.new(name,
15
+ :body => open(file).read,
16
+ :content_type => 'text/html')
17
+ @validate_website.site = page.url
18
+ @validate_website.crawl(:quiet => true)
19
+ @validate_website.anemone.pages.size.must_equal 5
22
20
  end
21
+ end
23
22
 
24
- context('css') do
25
- it "should crawl css and extract url" do
26
- page = FakePage.new('test.css',
27
- :body => ".test {background-image: url(pouet);}
23
+ describe('css') do
24
+ it "crawl css and extract url" do
25
+ page = FakePage.new('test.css',
26
+ :body => ".test {background-image: url(pouet);}
28
27
  .tests {background-image: url(/image/pouet.png)}
29
28
  .tests {background-image: url(/image/pouet_42.png)}
30
29
  .tests {background-image: url(/image/pouet)}",
31
30
  :content_type => 'text/css')
32
- @validate_website.site = page.url
33
- @validate_website.crawl
34
- @validate_website.anemone.should have(5).pages
35
- end
31
+ @validate_website.site = page.url
32
+ @validate_website.crawl(:quiet => true)
33
+ @validate_website.anemone.pages.size.must_equal 5
34
+ end
36
35
 
37
- it "should extract url with single quote" do
38
- page = FakePage.new('test.css',
39
- :body => ".test {background-image: url('pouet');}",
40
- :content_type => 'text/css')
41
- @validate_website.site = page.url
42
- @validate_website.crawl
43
- @validate_website.anemone.should have(2).pages
44
- end
36
+ it "should extract url with single quote" do
37
+ page = FakePage.new('test.css',
38
+ :body => ".test {background-image: url('pouet');}",
39
+ :content_type => 'text/css')
40
+ @validate_website.site = page.url
41
+ @validate_website.crawl(:quiet => true)
42
+ @validate_website.anemone.pages.size.must_equal 2
43
+ end
44
+
45
+ it "should extract url with double quote" do
46
+ page = FakePage.new('test.css',
47
+ :body => ".test {background-image: url(\"pouet\");}",
48
+ :content_type => 'text/css')
49
+ @validate_website.site = page.url
50
+ @validate_website.crawl(:quiet => true)
51
+ @validate_website.anemone.pages.size.must_equal 2
52
+ end
53
+ end
45
54
 
46
- it "should extract url with double quote" do
47
- page = FakePage.new('test.css',
48
- :body => ".test {background-image: url(\"pouet\");}",
49
- :content_type => 'text/css')
50
- @validate_website.site = page.url
51
- @validate_website.crawl
52
- @validate_website.anemone.should have(2).pages
53
- end
55
+ describe('static') do
56
+ it 'no space in directory name' do
57
+ pattern = File.join(File.dirname(__FILE__), 'example/**/*.html')
58
+ @validate_website.crawl_static(:pattern => pattern,
59
+ :site => 'http://dev.af83.com/',
60
+ :markup_validation => false,
61
+ :not_found => false,
62
+ :quiet => true)
54
63
  end
55
64
  end
56
65
  end
@@ -0,0 +1,22 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
3
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
4
+ <html xmlns="http://www.w3.org/1999/xhtml">
5
+ <head>
6
+ <title>title</title>
7
+ </head>
8
+ <body>
9
+ <h1>Title 1</h1>
10
+ <p>Paragraphe.</p>
11
+
12
+ <h2>Title 2</h2>
13
+ <ul>
14
+ <li><a href="/my-url1" title="title">my url</a></li>
15
+ <li><a href="/my-url2" title="title">my url</a></li>
16
+ <li><a href="/my-url1" title="title">my url</a></li>
17
+ </ul>
18
+ <p><img src="http://test.com/img.png" alt="non local img" /></p>
19
+ <p><img src="http://www.example.com/img1.png" alt="local img with absolute uri" /></p>
20
+ <p><img src="/img2.png" alt="local img with non absolute uri" /></p>
21
+ </body>
22
+ </html>
data/spec/spec_helper.rb CHANGED
@@ -1,9 +1,7 @@
1
+ require 'minitest/autorun'
1
2
  require File.expand_path(File.join(File.dirname(__FILE__), 'fakeweb_helper'))
2
3
  require 'anemone'
3
4
 
4
- lib_dir = File.join(File.dirname(__FILE__), '..', 'lib')
5
- $:.unshift(File.expand_path(lib_dir))
6
-
7
5
  require 'validate_website/core'
8
6
 
9
7
  SPEC_DOMAIN = 'http://www.example.com/'
@@ -1,69 +1,67 @@
1
- require File.expand_path(File.join(File.dirname(__FILE__), 'spec_helper'))
1
+ require File.expand_path('../spec_helper', __FILE__)
2
2
 
3
- module ValidateWebsite
4
- describe Validator do
5
- before(:each) do
6
- FakeWeb.clean_registry
7
- @http = Anemone::HTTP.new
3
+ describe ValidateWebsite::Validator do
4
+ before do
5
+ FakeWeb.clean_registry
6
+ @http = Anemone::HTTP.new
7
+ end
8
+
9
+ describe("xhtml1") do
10
+ it "xhtml1-strict should be valid" do
11
+ name = 'xhtml1-strict'
12
+ dtd_uri = 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd'
13
+ file = File.join('spec', 'data', "#{name}.html")
14
+ page = FakePage.new(name,
15
+ :body => open(file).read,
16
+ :content_type => 'text/html')
17
+ @xhtml1_page = @http.fetch_page(page.url)
18
+ validator = ValidateWebsite::Validator.new(@xhtml1_page.doc, @xhtml1_page.body)
19
+ validator.dtd.system_id.must_equal dtd_uri
20
+ validator.namespace.must_equal name
21
+ validator.valid?.must_equal true
8
22
  end
23
+ end
9
24
 
10
- describe("xhtml1") do
11
- it "xhtml1-strict should be valid" do
12
- name = 'xhtml1-strict'
13
- dtd_uri = 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd'
25
+ describe('html5') do
26
+ describe('when valid') do
27
+ before do
28
+ validator_res = File.join('spec', 'data', 'validator.nu-success.html')
29
+ FakeWeb.register_uri(:any, 'http://validator.nu/',
30
+ :body => open(validator_res).read)
31
+ end
32
+ it "html5 should be valid" do
33
+ name = 'html5'
14
34
  file = File.join('spec', 'data', "#{name}.html")
15
35
  page = FakePage.new(name,
16
36
  :body => open(file).read,
17
37
  :content_type => 'text/html')
18
- @xhtml1_page = @http.fetch_page(page.url)
19
- validator = Validator.new(@xhtml1_page.doc, @xhtml1_page.body)
20
- validator.dtd.system_id.should == dtd_uri
21
- validator.namespace.should == name
22
- validator.should be_valid
23
- end
24
- end
25
-
26
- describe('html5') do
27
- context('when valid') do
28
- before do
29
- validator_res = File.join('spec', 'data', 'validator.nu-success.html')
30
- FakeWeb.register_uri(:any, 'http://validator.nu/',
31
- :body => open(validator_res).read)
32
- end
33
- it "html5 should be valid" do
34
- name = 'html5'
35
- file = File.join('spec', 'data', "#{name}.html")
36
- page = FakePage.new(name,
37
- :body => open(file).read,
38
- :content_type => 'text/html')
39
- @html5_page = @http.fetch_page(page.url)
40
- validator = Validator.new(@html5_page.doc, @html5_page.body)
41
- validator.should be_valid
42
- end
43
- it "with DLFP" do
44
- name = 'html5'
45
- file = File.join('spec', 'data', "#{name}-linuxfr.html")
46
- page = FakePage.new(name,
47
- :body => open(file).read,
48
- :content_type => 'text/html')
49
- @html5_page = @http.fetch_page(page.url)
50
- validator = Validator.new(@html5_page.doc, @html5_page.body)
51
- validator.should be_valid
52
- end
38
+ @html5_page = @http.fetch_page(page.url)
39
+ validator = ValidateWebsite::Validator.new(@html5_page.doc, @html5_page.body)
40
+ validator.valid?.must_equal true
53
41
  end
54
- end
55
-
56
- describe('html4') do
57
- it 'should validate html4' do
58
- name = 'html4-strict'
59
- file = File.join('spec', 'data', "#{name}.html")
42
+ it "with DLFP" do
43
+ name = 'html5'
44
+ file = File.join('spec', 'data', "#{name}-linuxfr.html")
60
45
  page = FakePage.new(name,
61
46
  :body => open(file).read,
62
47
  :content_type => 'text/html')
63
- @html4_strict_page = @http.fetch_page(page.url)
64
- validator = Validator.new(@html4_strict_page.doc, @html4_strict_page.body)
65
- validator.should be_valid
48
+ @html5_page = @http.fetch_page(page.url)
49
+ validator = ValidateWebsite::Validator.new(@html5_page.doc, @html5_page.body)
50
+ validator.valid?.must_equal true
66
51
  end
67
52
  end
68
53
  end
54
+
55
+ describe('html4') do
56
+ it 'should validate html4' do
57
+ name = 'html4-strict'
58
+ file = File.join('spec', 'data', "#{name}.html")
59
+ page = FakePage.new(name,
60
+ :body => open(file).read,
61
+ :content_type => 'text/html')
62
+ @html4_strict_page = @http.fetch_page(page.url)
63
+ validator = ValidateWebsite::Validator.new(@html4_strict_page.doc, @html4_strict_page.body)
64
+ validator.valid?.must_equal true
65
+ end
66
+ end
69
67
  end
metadata CHANGED
@@ -1,128 +1,91 @@
1
- --- !ruby/object:Gem::Specification
1
+ --- !ruby/object:Gem::Specification
2
2
  name: validate-website
3
- version: !ruby/object:Gem::Version
4
- hash: 3
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.7.1
5
5
  prerelease:
6
- segments:
7
- - 0
8
- - 7
9
- - 0
10
- version: 0.7.0
11
6
  platform: ruby
12
- authors:
7
+ authors:
13
8
  - Laurent Arnoud
14
9
  autorequire:
15
10
  bindir: bin
16
11
  cert_chain: []
17
-
18
- date: 2011-06-06 00:00:00 Z
19
- dependencies:
20
- - !ruby/object:Gem::Dependency
12
+ date: 2011-12-25 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
21
15
  name: anemone
22
- prerelease: false
23
- requirement: &id001 !ruby/object:Gem::Requirement
16
+ requirement: &14219840 !ruby/object:Gem::Requirement
24
17
  none: false
25
- requirements:
26
- - - ">="
27
- - !ruby/object:Gem::Version
28
- hash: 5
29
- segments:
30
- - 0
31
- - 6
32
- - 1
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
33
21
  version: 0.6.1
34
22
  type: :runtime
35
- version_requirements: *id001
36
- - !ruby/object:Gem::Dependency
37
- name: rainbow
38
23
  prerelease: false
39
- requirement: &id002 !ruby/object:Gem::Requirement
24
+ version_requirements: *14219840
25
+ - !ruby/object:Gem::Dependency
26
+ name: rainbow
27
+ requirement: &14218980 !ruby/object:Gem::Requirement
40
28
  none: false
41
- requirements:
42
- - - ">="
43
- - !ruby/object:Gem::Version
44
- hash: 17
45
- segments:
46
- - 1
47
- - 1
48
- - 1
29
+ requirements:
30
+ - - ! '>='
31
+ - !ruby/object:Gem::Version
49
32
  version: 1.1.1
50
33
  type: :runtime
51
- version_requirements: *id002
52
- - !ruby/object:Gem::Dependency
53
- name: multipart_body
54
34
  prerelease: false
55
- requirement: &id003 !ruby/object:Gem::Requirement
35
+ version_requirements: *14218980
36
+ - !ruby/object:Gem::Dependency
37
+ name: multipart_body
38
+ requirement: &14218400 !ruby/object:Gem::Requirement
56
39
  none: false
57
- requirements:
58
- - - ">="
59
- - !ruby/object:Gem::Version
60
- hash: 21
61
- segments:
62
- - 0
63
- - 2
64
- - 1
40
+ requirements:
41
+ - - ! '>='
42
+ - !ruby/object:Gem::Version
65
43
  version: 0.2.1
66
44
  type: :runtime
67
- version_requirements: *id003
68
- - !ruby/object:Gem::Dependency
69
- name: rake
70
45
  prerelease: false
71
- requirement: &id004 !ruby/object:Gem::Requirement
46
+ version_requirements: *14218400
47
+ - !ruby/object:Gem::Dependency
48
+ name: rake
49
+ requirement: &14217660 !ruby/object:Gem::Requirement
72
50
  none: false
73
- requirements:
74
- - - ">="
75
- - !ruby/object:Gem::Version
76
- hash: 49
77
- segments:
78
- - 0
79
- - 8
80
- - 7
51
+ requirements:
52
+ - - ! '>='
53
+ - !ruby/object:Gem::Version
81
54
  version: 0.8.7
82
55
  type: :development
83
- version_requirements: *id004
84
- - !ruby/object:Gem::Dependency
85
- name: rspec
86
56
  prerelease: false
87
- requirement: &id005 !ruby/object:Gem::Requirement
57
+ version_requirements: *14217660
58
+ - !ruby/object:Gem::Dependency
59
+ name: minitest
60
+ requirement: &14232420 !ruby/object:Gem::Requirement
88
61
  none: false
89
- requirements:
90
- - - ">="
91
- - !ruby/object:Gem::Version
92
- hash: 23
93
- segments:
94
- - 2
95
- - 6
96
- - 0
97
- version: 2.6.0
62
+ requirements:
63
+ - - ! '>='
64
+ - !ruby/object:Gem::Version
65
+ version: 2.1.0
98
66
  type: :development
99
- version_requirements: *id005
100
- - !ruby/object:Gem::Dependency
101
- name: fakeweb
102
67
  prerelease: false
103
- requirement: &id006 !ruby/object:Gem::Requirement
68
+ version_requirements: *14232420
69
+ - !ruby/object:Gem::Dependency
70
+ name: fakeweb
71
+ requirement: &14230680 !ruby/object:Gem::Requirement
104
72
  none: false
105
- requirements:
106
- - - ">="
107
- - !ruby/object:Gem::Version
108
- hash: 27
109
- segments:
110
- - 1
111
- - 3
112
- - 0
73
+ requirements:
74
+ - - ! '>='
75
+ - !ruby/object:Gem::Version
113
76
  version: 1.3.0
114
77
  type: :development
115
- version_requirements: *id006
116
- description: validate-website is a web crawler for checking the markup validity with XML Schema / DTD and not found urls.
78
+ prerelease: false
79
+ version_requirements: *14230680
80
+ description: validate-website is a web crawler for checking the markup validity with
81
+ XML Schema / DTD and not found urls.
117
82
  email: laurent@spkdev.net
118
- executables:
83
+ executables:
119
84
  - validate-website
120
85
  - validate-website-static
121
86
  extensions: []
122
-
123
87
  extra_rdoc_files: []
124
-
125
- files:
88
+ files:
126
89
  - README.rdoc
127
90
  - Rakefile
128
91
  - LICENSE
@@ -134,6 +97,16 @@ files:
134
97
  - lib/validate_website.rb
135
98
  - man/man1/validate-website.1
136
99
  - man/man1/validate-website-static.1
100
+ - spec/spec_helper.rb
101
+ - spec/data/html5.html
102
+ - spec/data/html4-strict.html
103
+ - spec/data/validator.nu-success.html
104
+ - spec/data/html5-linuxfr.html
105
+ - spec/data/xhtml1-strict.html
106
+ - spec/example/ruby smalltalk/blockcamp-paris-le-28-novembre.html
107
+ - spec/core_spec.rb
108
+ - spec/fakeweb_helper.rb
109
+ - spec/validator_spec.rb
137
110
  - data/schemas/xhtml-basic10-module-redefines-1.xsd
138
111
  - data/schemas/xhtml-frames-1.xsd
139
112
  - data/schemas/xhtml-basic11.xsd
@@ -224,44 +197,28 @@ files:
224
197
  - data/schemas/xhtml-ssismap-1.xsd
225
198
  - data/schemas/xhtml-list-1.xsd
226
199
  - data/schemas/xhtml-charent-1.xsd
227
- - spec/spec_helper.rb
228
- - spec/data/html5.html
229
- - spec/data/html4-strict.html
230
- - spec/data/validator.nu-success.html
231
- - spec/data/html5-linuxfr.html
232
- - spec/data/xhtml1-strict.html
233
- - spec/core_spec.rb
234
- - spec/fakeweb_helper.rb
235
- - spec/validator_spec.rb
236
200
  - bin/validate-website
237
201
  - bin/validate-website-static
238
202
  homepage: http://github.com/spk/validate-website
239
- licenses:
203
+ licenses:
240
204
  - MIT
241
205
  post_install_message:
242
206
  rdoc_options: []
243
-
244
- require_paths:
207
+ require_paths:
245
208
  - lib
246
- required_ruby_version: !ruby/object:Gem::Requirement
209
+ required_ruby_version: !ruby/object:Gem::Requirement
247
210
  none: false
248
- requirements:
249
- - - ">="
250
- - !ruby/object:Gem::Version
251
- hash: 3
252
- segments:
253
- - 0
254
- version: "0"
255
- required_rubygems_version: !ruby/object:Gem::Requirement
211
+ requirements:
212
+ - - ! '>='
213
+ - !ruby/object:Gem::Version
214
+ version: '0'
215
+ required_rubygems_version: !ruby/object:Gem::Requirement
256
216
  none: false
257
- requirements:
258
- - - ">="
259
- - !ruby/object:Gem::Version
260
- hash: 3
261
- segments:
262
- - 0
263
- version: "0"
264
- requirements:
217
+ requirements:
218
+ - - ! '>='
219
+ - !ruby/object:Gem::Version
220
+ version: '0'
221
+ requirements:
265
222
  - anemone
266
223
  - rainbow
267
224
  - multipart_body
@@ -270,14 +227,7 @@ rubygems_version: 1.7.2
270
227
  signing_key:
271
228
  specification_version: 3
272
229
  summary: Web crawler for checking the validity of your documents
273
- test_files:
274
- - spec/spec_helper.rb
275
- - spec/data/html5.html
276
- - spec/data/html4-strict.html
277
- - spec/data/validator.nu-success.html
278
- - spec/data/html5-linuxfr.html
279
- - spec/data/xhtml1-strict.html
230
+ test_files:
280
231
  - spec/core_spec.rb
281
- - spec/fakeweb_helper.rb
282
232
  - spec/validator_spec.rb
283
233
  has_rdoc: