validate-website 0.5.3 → 0.5.7

Sign up to get free protection for your applications and to get access to all the features.
data/README.rdoc CHANGED
@@ -29,8 +29,6 @@ found urls.
29
29
  Url to exclude (ex: redirect|news)
30
30
  -f, --file FILE
31
31
  Save not well formed or not found (with -n used) urls
32
- -a, --authorization USER,PASS
33
- Basic http authentification
34
32
  -c, --cookies COOKIES
35
33
  Set defaults cookies
36
34
  -m, --[no-]markup-validation
data/Rakefile CHANGED
@@ -7,7 +7,7 @@ require "rspec/core/rake_task" # RSpec 2.0
7
7
  # Globals
8
8
 
9
9
  PKG_NAME = 'validate-website'
10
- PKG_VERSION = '0.5.3'
10
+ PKG_VERSION = '0.5.7'
11
11
 
12
12
  PKG_FILES = ['README.rdoc', 'Rakefile', 'LICENSE']
13
13
  Find.find('bin/', 'lib/', 'man/', 'spec/') do |f|
@@ -43,15 +43,15 @@ spec = Gem::Specification.new do |s|
43
43
  s.summary = 'Web crawler for checking the validity of your documents'
44
44
  s.name = PKG_NAME
45
45
  s.version = PKG_VERSION
46
- s.requirements << 'spk-anemone' << 'rainbow' << 'spk-html5'
47
- s.add_dependency('spk-anemone', '>= 0.4.0')
46
+ s.requirements << 'anemone' << 'rainbow'
47
+ s.add_dependency('anemone', '>= 0.5.0')
48
48
  s.add_dependency('rainbow', '>= 1.1')
49
- s.add_dependency('spk-html5', '= 0.10.1')
50
49
  s.add_development_dependency('rspec', '>= 2.0.0')
51
50
  s.add_development_dependency('fakeweb', '>= 1.3.0')
52
51
  s.require_path = 'lib'
53
52
  s.bindir = 'bin'
54
53
  s.executables << 'validate-website'
54
+ s.executables << 'validate-website-static'
55
55
  s.files = PKG_FILES
56
56
  s.description = 'validate-website is a web crawler for checking the markup' +
57
57
  'validity and not found urls.'
data/bin/validate-website CHANGED
@@ -11,19 +11,6 @@ require 'rubygems' if developer_mode
11
11
  require 'validate_website'
12
12
 
13
13
  validate_website = ValidateWebsite.new(ARGV)
14
- options = validate_website.options
15
-
16
- validate_website.crawl options[:site],
17
- :markup_validation => options[:markup_validation],
18
- :user_agent => options[:user_agent],
19
- :exclude => options[:exclude],
20
- :file => options[:file],
21
- :authorization => options[:authorization],
22
- :not_found => options[:not_found],
23
- :cookies => options[:cookies],
24
- :accept_cookies => options[:accept_cookies],
25
- :verbose => options[:debug],
26
- :validate_verbose => options[:verbose],
27
- :quiet => options[:quiet]
14
+ validate_website.crawl
28
15
 
29
16
  exit(validate_website.exit_status)
@@ -0,0 +1,25 @@
1
+ #!/usr/bin/env ruby
2
+ # encoding: utf-8
3
+
4
+ lib_dir = File.join(File.dirname(__FILE__), '..', 'lib')
5
+ $:.unshift(File.expand_path(lib_dir))
6
+
7
+ developer_mode = false
8
+ developer_mode = true if __FILE__ == $0
9
+ require 'rubygems' if developer_mode
10
+
11
+ require 'validate_website'
12
+
13
+ validate_website = ValidateWebsite.new(ARGV)
14
+
15
+ files = Dir.glob(File.join("**", "*.html"))
16
+ files.each do |f|
17
+ next unless File.file?(f)
18
+
19
+ body = open(f).read
20
+ doc = Nokogiri::HTML(body)
21
+
22
+ validate_website.validate(doc, body, f)
23
+ end
24
+
25
+ exit(validate_website.exit_status)
@@ -2,12 +2,15 @@
2
2
 
3
3
  require 'optparse'
4
4
  require 'open-uri'
5
+
5
6
  require 'validator'
6
- require 'anemone'
7
7
  require 'colorful_messages'
8
8
 
9
+ require 'anemone'
10
+
9
11
  class ValidateWebsite
10
12
 
13
+ attr_accessor :site
11
14
  attr_reader :options, :anemone
12
15
 
13
16
  include ColorfulMessages
@@ -17,34 +20,42 @@ class ValidateWebsite
17
20
  EXIT_FAILURE_NOT_FOUND = 65
18
21
  EXIT_FAILURE_MARKUP_NOT_FOUND = 66
19
22
 
20
- def initialize(args=[])
23
+ def initialize(args=[], validation_type = :crawl)
21
24
  @markup_error = nil
22
25
  @not_found_error = nil
23
26
 
24
- @options = {
27
+ @options_crawl = {
25
28
  :site => 'http://localhost:3000/',
26
29
  :markup_validation => true,
27
- :user_agent => Anemone::Core::DEFAULT_OPTS[:user_agent],
28
30
  :exclude => nil,
29
31
  :file => nil,
30
- :authorization => nil,
31
32
  # log not found url (404 status code)
32
33
  :not_found => false,
34
+ # internal verbose for ValidateWebsite
35
+ :validate_verbose => false,
36
+ :quiet => false,
37
+
38
+ # Anemone options see anemone/lib/anemone/core.rb
39
+ :verbose => false,
40
+ :user_agent => Anemone::Core::DEFAULT_OPTS[:user_agent],
33
41
  :cookies => nil,
34
42
  :accept_cookies => true,
35
- :verbose => false,
36
- :debug => false,
37
- :quiet => false,
43
+ :redirect_limit => 0,
38
44
  }
39
- parse(args)
45
+ send("parse_#{validation_type}_options", args)
40
46
 
41
- # truncate file
42
- if options[:file]
43
- open(options[:file], 'w').write('')
47
+ @file = @options[:file]
48
+ if @file
49
+ # truncate file
50
+ open(@file, 'w').write('')
44
51
  end
52
+
53
+ @site = @options[:site]
45
54
  end
46
55
 
47
- def parse(args)
56
+ def parse_crawl_options(args)
57
+ @options = @options_crawl
58
+
48
59
  opts = OptionParser.new do |o|
49
60
  o.set_summary_indent(' ')
50
61
  o.banner = 'Usage: validate-website [OPTIONS]'
@@ -66,12 +77,10 @@ class ValidateWebsite
66
77
  }
67
78
  o.on("-f", "--file 'FILE'", String,
68
79
  "Save not well formed or not found urls") { |v| @options[:file] = v }
69
- o.on("-a", "--authorization 'USER,PASS'", Array,
70
- "Basic http authentification") { |v|
71
- @options[:authorization] = v
72
- }
80
+
73
81
  o.on("-c", "--cookies 'COOKIES'", String,
74
82
  "Set defaults cookies") { |v| @options[:cookies] = v }
83
+
75
84
  o.on("-m", "--[no-]markup-validation",
76
85
  "Markup validation (Default: #{@options[:markup_validation]})") { |v|
77
86
  @options[:markup_validation] = v
@@ -81,16 +90,16 @@ class ValidateWebsite
81
90
  @options[:not_found] = v
82
91
  }
83
92
  o.on("-v", "--verbose",
84
- "Show validator errors (Default: #{@options[:verbose]})") { |v|
85
- @options[:verbose] = v
93
+ "Show validator errors (Default: #{@options[:validate_verbose]})") { |v|
94
+ @options[:validate_verbose] = v
86
95
  }
87
96
  o.on("-q", "--quiet",
88
97
  "Only report errors (Default: #{@options[:quiet]})") { |v|
89
98
  @options[:quiet] = v
90
99
  }
91
100
  o.on("-d", "--debug",
92
- "Show anemone log (Default: #{@options[:debug]})") { |v|
93
- @options[:debug] = v
101
+ "Show anemone log (Default: #{@options[:verbose]})") { |v|
102
+ @options[:verbose] = v
94
103
  }
95
104
 
96
105
  o.separator ""
@@ -99,12 +108,32 @@ class ValidateWebsite
99
108
  opts.parse!(args)
100
109
  end
101
110
 
102
- def crawl(site, opts={})
103
- puts note("Validating #{site}") if opts[:validate_verbose]
111
+ def validate(doc, body, url, opts={})
112
+ opts = @options.merge(opts)
113
+ validator = Validator.new(doc, body)
114
+ msg = " well formed? %s" % validator.valid?
115
+ if validator.valid?
116
+ unless opts[:quiet]
117
+ print info(url)
118
+ puts success(msg)
119
+ end
120
+ else
121
+ @markup_error = true
122
+ print info(url)
123
+ puts error(msg)
124
+ puts error(validator.errors.join(", ")) if opts[:validate_verbose]
125
+ to_file(url)
126
+ end
127
+ end
104
128
 
105
- @anemone = Anemone.crawl(site, opts) do |anemone|
129
+ def crawl(opts={})
130
+ opts = @options.merge(opts)
131
+ puts note("Validating #{@site}") if opts[:validate_verbose]
132
+
133
+ @anemone = Anemone.crawl(@site, opts) do |anemone|
106
134
  anemone.skip_links_like Regexp.new(opts[:exclude]) if opts[:exclude]
107
135
 
136
+ # select the links on each page to follow (iframe, link, css url)
108
137
  anemone.focus_crawl { |p|
109
138
  links = []
110
139
  if p.html?
@@ -134,20 +163,7 @@ class ValidateWebsite
134
163
  if opts[:markup_validation]
135
164
  # validate html/html+xml
136
165
  if page.html? && page.fetched?
137
- validator = Validator.new(page)
138
- msg = " well formed? %s" % validator.valid?
139
- if validator.valid?
140
- unless opts[:quiet]
141
- print info(url)
142
- puts success(msg)
143
- end
144
- else
145
- @markup_error = true
146
- print info(url)
147
- puts error(msg)
148
- puts error(validator.errors.join(", ")) if opts[:validate_verbose]
149
- to_file(url)
150
- end
166
+ validate(page.doc, page.body, url, opts)
151
167
  end
152
168
  end
153
169
 
@@ -156,6 +172,9 @@ class ValidateWebsite
156
172
  puts error("%s linked in %s but not exist" % [url, page.referer])
157
173
  to_file(url)
158
174
  end
175
+
176
+ # throw away the page (hope this saves memory)
177
+ page = nil
159
178
  }
160
179
  end
161
180
  end
@@ -174,7 +193,9 @@ class ValidateWebsite
174
193
 
175
194
  private
176
195
  def to_file(msg)
177
- open(options[:file], 'a').write("#{msg}\n") if options[:file]
196
+ if @file && File.exist?(@file)
197
+ open(@file, 'a').write("#{msg}\n")
198
+ end
178
199
  end
179
200
 
180
201
  def get_url(page, elem, attrname)
data/lib/validator.rb CHANGED
@@ -3,19 +3,21 @@
3
3
  class Validator
4
4
  XHTML_PATH = File.join(File.dirname(__FILE__), '..', 'lib', 'xhtml')
5
5
 
6
- attr_reader :page, :dtd, :doc, :namespace, :xsd, :errors
6
+ attr_reader :original_doc, :body, :dtd, :doc, :namespace, :xsd, :errors
7
7
 
8
- def initialize(page)
9
- @page = page
10
- @dtd = @page.doc.internal_subset
8
+ def initialize(original_doc, body)
9
+ @original_doc = original_doc
10
+ @body = body
11
+ @dtd = @original_doc.internal_subset
11
12
  init_namespace(@dtd)
12
13
  @errors = []
14
+ @errors << 'Unknown document' if @namespace.nil?
13
15
 
14
- if @namespace
15
- if @dtd_uri && @page.body.match(@dtd_uri.to_s)
16
- document = @page.body.sub(@dtd_uri.to_s, @namespace + '.dtd')
16
+ if @errors.empty?
17
+ if @dtd_uri && @body.match(@dtd_uri.to_s)
18
+ document = @body.sub(@dtd_uri.to_s, @namespace + '.dtd')
17
19
  else
18
- document = @page.body
20
+ document = @body
19
21
  end
20
22
  @doc = Dir.chdir(XHTML_PATH) do
21
23
  Nokogiri::XML(document) { |cfg|
@@ -40,19 +42,8 @@ class Validator
40
42
  end
41
43
  @errors = @doc.errors
42
44
  end
43
- elsif @page.body =~ /^\<!DOCTYPE html\>/i
44
- # html5 doctype
45
- # http://dev.w3.org/html5/spec/Overview.html#the-doctype
46
- require 'html5'
47
- require 'html5/filters/validator'
48
- html5_parser = HTML5::HTMLParser.new(:tokenizer => HTMLConformanceChecker)
49
- html5_parser.parse(@page.body)
50
- @errors = html5_parser.errors.collect do |er|
51
- "#{er[1]} line #{er[0][0]}"
52
- end
53
- else
54
- @errors << 'Unknown Document'
55
45
  end
46
+
56
47
  rescue Nokogiri::XML::SyntaxError => e
57
48
  # http://nokogiri.org/tutorials/ensuring_well_formed_markup.html
58
49
  @errors << e
@@ -2,12 +2,12 @@
2
2
  .\" Title: validate-website
3
3
  .\" Author: [see the "AUTHOR" section]
4
4
  .\" Generator: DocBook XSL Stylesheets v1.75.2 <http://docbook.sf.net/>
5
- .\" Date: 12/05/2010
5
+ .\" Date: 12/10/2010
6
6
  .\" Manual: \ \&
7
7
  .\" Source: \ \&
8
8
  .\" Language: English
9
9
  .\"
10
- .TH "VALIDATE\-WEBSITE" "1" "12/05/2010" "\ \&" "\ \&"
10
+ .TH "VALIDATE\-WEBSITE" "1" "12/10/2010" "\ \&" "\ \&"
11
11
  .\" -----------------------------------------------------------------
12
12
  .\" * Define some portability stuff
13
13
  .\" -----------------------------------------------------------------
@@ -59,11 +59,6 @@ Url to exclude (ex:
59
59
  Save not well formed or not found (with \-n used) urls
60
60
  .RE
61
61
  .PP
62
- \fB\-a\fR, \fB\-\-authorization\fR \fIUSER,PASS\fR
63
- .RS 4
64
- Basic http authentification
65
- .RE
66
- .PP
67
62
  \fB\-c\fR, \fB\-\-cookies\fR \fICOOKIES\fR
68
63
  .RS 4
69
64
  Set defaults cookies
@@ -0,0 +1,243 @@
1
+ <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
2
+ <html lang="cs">
3
+ <head>
4
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-2">
5
+ <title>Debian -- Univerz�ln� opera�n� syst�m </title>
6
+ <link rev="made" href="mailto:webmaster@debian.org">
7
+ <link rel="shortcut icon" href="favicon.ico">
8
+ <meta name="Keywords" content="debian, GNU, linux, unix, open source, svobodn�, DFSG">
9
+ <meta name="Description" content="Debian GNU/Linux je svobodn� distribuce opera�n�ho syst�mu GNU/Linux. Je spravov�n a udr�ov�n za pomoci mnoha dobrovoln�k�, kte�� mu v�nuj� sv�j �as a zku�enosti.">
10
+ <meta name="Generator" content="WML 2.0.11 (19-Aug-2006)">
11
+ <meta name="Modified" content="2010-12-02 11:26:39">
12
+ <link rel="alternate" type="application/rss+xml"
13
+ title="Bezpe�nostn� zpr�vy Debianu (pouze nadpisy)" href="security/dsa">
14
+ <link rel="alternate" type="application/rss+xml"
15
+ title="Bezpe�nostn� zpr�vy Debianu (souhrn)" href="security/dsa-long">
16
+ <link href="./debian.css" rel="stylesheet" type="text/css">
17
+ <link href="./debian-cs.css" rel="stylesheet" type="text/css" media="all">
18
+ </head>
19
+ <body>
20
+ <div id="header">
21
+ <div id="upperheader">
22
+ <div id="logo">
23
+ <a href="./"><img src="./logos/openlogo-nd-50.png" width="50" height="61" alt=""></a>
24
+ <a href="./" rel="start"><img src="Pics/debian.png" width="179" height="61" alt="Projekt Debian"></a>
25
+ </div> <!-- end logo -->
26
+ </div> <!-- end upperheader -->
27
+ <!--UdmComment-->
28
+ <div id="navbar">
29
+ <p class="hidecss"><a href="#inner">P�esko�it Quicknav</a></p>
30
+ <ul>
31
+ <li><a href="intro/about">O&nbsp;Debianu</a></li>
32
+ <li><a href="./News/">Novinky</a></li>
33
+ <li><a href="distrib/">Jak&nbsp;z�skat&nbsp;Debian</a></li>
34
+ <li><a href="./support">Podpora</a></li>
35
+ <li><a href="./devel/">V�voj��sk�&nbsp;koutek</a></li>
36
+ <li><a href="./sitemap">P�ehled str�nek</a></li>
37
+ <li><a href="http://search.debian.org/">Hledat</a></li>
38
+ </ul>
39
+ </div> <!-- end navbar -->
40
+ </div> <!-- end header -->
41
+ <!--/UdmComment-->
42
+ <div id="outer">
43
+ <div id="inner">
44
+ <div id="leftcol">
45
+ <!--UdmComment-->
46
+ <ul>
47
+ <li><a href="intro/about">O&nbsp;Debianu</a>
48
+ <ul>
49
+ <li><a href="./social_contract">Spole�ensk�&nbsp;smlouva</a></li>
50
+ <li><a href="./intro/free">Svobodn�&nbsp;software</a></li>
51
+ <li><a href="./partners/">Partne�i</a></li>
52
+ <li><a href="./donations">Dary</a></li>
53
+ <li><a href="./contact">Napi�te&nbsp;n�m</a></li>
54
+ </ul>
55
+ </li>
56
+ <li><a href="./News/">Novinky</a>
57
+ <ul>
58
+ <li><a href="./News/project/">Novinky&nbsp;projektu</a></li>
59
+ <li><a href="./events/">Ud�losti</a></li>
60
+ </ul>
61
+ </li>
62
+ <li><a href="distrib/">Jak&nbsp;z�skat&nbsp;Debian</a>
63
+ <ul>
64
+ <li><a href="CD/vendors/">Prodejci CD</a></li>
65
+ <li><a href="CD/">Obrazy CD</a></li>
66
+ <li><a href="distrib/netinst">S��ov� instalace</a></li>
67
+ <li><a href="distrib/pre-installed">P�edinstalovan�</a></li>
68
+ </ul>
69
+ </li>
70
+ <li><a href="distrib/packages">Debian�&nbsp;bal��ky</a></li>
71
+ <li><a href="doc/">Dokumentace</a>
72
+ <ul>
73
+ <li><a href="./releases/">Informace&nbsp;k&nbsp;verz�m</a></li>
74
+ <li><a href="./releases/stable/installmanual">Instala�n�&nbsp;p��ru�ka</a></li>
75
+ <li><a href="doc/books">Knihy&nbsp;o&nbsp;Debianu</a></li>
76
+ </ul>
77
+ </li>
78
+ <li><a href="./support">Podpora</a>
79
+ <ul>
80
+ <li><a href="./international/">Debian&nbsp;a&nbsp;jazyky</a></li>
81
+ <li><a href="./security/">Bezpe�nostn�&nbsp;informace</a></li>
82
+ <li><a href="Bugs/">Hl�en� chyb</a></li>
83
+ <li><a href="MailingLists/">Konference</a></li>
84
+ <li><a href="http://lists.debian.org/">Archivy&nbsp;konferenc�</a></li>
85
+ <li><a href="./ports/">Porty/architektury</a></li>
86
+ </ul>
87
+ </li>
88
+ <li><a href="misc/">Ostatn�</a></li>
89
+ <li><a href="./intro/help">Pomozte Debianu</a></li>
90
+ <li><a href="./devel/">V�voj��sk�&nbsp;koutek</a></li>
91
+ <li><a href="./sitemap">P�ehled str�nek</a></li>
92
+ <li><a href="http://search.debian.org/">Hledat</a></li>
93
+ </ul>
94
+ <form method="get" action="http://search.debian.org/">
95
+ <p>
96
+ <input type="text" name="q" size="12">
97
+ </p>
98
+ </form>
99
+ <p>
100
+ <a href="./sponsor.html"><img src="sponsor_img.jpg" alt="Nav�tivte sponzora str�nek" width="102" height="60"></a>
101
+ </p>
102
+ <p>
103
+ <a href="http://validator.w3.org/check/referer"><img src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01!" width="88" height="31"></a>
104
+ </p>
105
+ <p>
106
+ <img src="http://jigsaw.w3.org/css-validator/images/vcss"
107
+ alt="Valid CSS!" width="88" height="31">
108
+ </p>
109
+ <!--/UdmComment-->
110
+ </div> <!-- end leftcol -->
111
+ <div id="maincol">
112
+ <a href="./News/2009/20090214"><img src="Pics/lennybanner_indexed.png" alt="Debian 5.0 - univerz�ln� opera�n� syst�m" width="380" height="310" style="margin-right: 10px; float: left;"></a>
113
+ <h2>Co je Debian?</h2>
114
+ <p><a href="http://www.debian.org/">Debian</a>
115
+ je <a href="intro/free">svobodn�</a> opera�n� syst�m (OS) ur�en� k&nbsp;provozu
116
+ na mnoha r�zn�ch typech po��ta��. Opera�n� syst�m se skl�d� ze z�kladn�ho
117
+ programov�ho vybaven� a&nbsp;dal��ch n�stroj�, kter�ch je k&nbsp;provozu
118
+ po��ta�e t�eba. Vlastn�m z�kladem OS je j�dro. Jeliko� Debian pou��v� j�dro
119
+ <a href="http://www.kernel.org/">Linux</a> a&nbsp;v�t�ina z�kladn�ch syst�mov�ch
120
+ program� byla vytvo�ena v&nbsp;r�mci <a href="http://www.gnu.org/">projektu GNU</a>,
121
+ nese syst�m ozna�en� GNU/Linux.</p>
122
+ <p>Debian GNU/Linux je v�ak v�ce ne� jen samotn� opera�n� syst�m. Obsahuje p�es
123
+ 25000 <a href="distrib/packages">bal��k�</a>
124
+ s&nbsp;(p�edkompilovan�mi) programy a&nbsp;dokumentac�, p�ipraven�ch
125
+ pro snadnou instalaci.</p>
126
+ <p><a href="intro/about">Podrobnosti...</a></p>
127
+ <hr>
128
+ <h2>Za��n�me</h2>
129
+ <p><a href="releases/stable/">Posledn� stabiln� verze Debianu</a> m� ��slo
130
+ 5.0. Tato verze byla naposledy aktualizov�na
131
+ 27. listopadu 2010. P�e�t�te si v�ce
132
+ o&nbsp;<a href="releases/">dostupn�ch verz�ch Debianu</a>.</p>
133
+ <p>Chcete-li za��t pou��vat Debian, m��ete jej z�skat na
134
+ <a href="distrib/">distribu�n� str�nce</a> a&nbsp;pot� nainstalovat za pomoci
135
+ <a href="releases/stable/installmanual">instala�n� p��ru�ky</a>.</p>
136
+ <p>Pokud aktualizujete na nejnov�j�� stabiln� verzi z&nbsp;verze p�edchoz�,
137
+ p�e�tete si pros�m nejprve
138
+ <a href="releases/stable/releasenotes">pozn�mky k&nbsp;vyd�n�</a>.</p>
139
+ <p>Pot�ebujete-li pomoc p�i pou��v�n� nebo nastaven� Debianu, pod�vejte se do
140
+ <a href="doc/">dokumentace</a> nebo na str�nky
141
+ s&nbsp;<a href="support">u�ivatelskou podporou</a>.</p>
142
+ <p>Pro u�ivatele z&nbsp;neanglicky mluv�c�ch zem� jsou tu str�nky
143
+ v�nuj�c� se podpo�e <a href="international/">ostatn�ch jazyk�</a>.</p>
144
+ <p>U�ivatel� pracuj�c� s&nbsp;po��ta�i jin�ch hardwarov�ch platforem,
145
+ ne� je Intel x86, najdou informace na str�nk�ch v�nuj�c�ch se
146
+ <a href="ports/">ostatn�m platform�m</a>.</p>
147
+ <hr>
148
+ <h2>Novinky</h2>
149
+ <p><tt>[27.11.2010]</tt> <strong><a href="News/2010/20101127">Updated Debian GNU/Linux: 5.0.7 released</a></strong><br>
150
+ <tt>[16.11.2010]</tt> <strong><a href="News/2010/20101116b">Debian 6.0 <q>Squeeze</q>: Call for Upgrade and Installation tests</a></strong><br>
151
+ <tt>[16.11.2010]</tt> <strong><a href="News/2010/20101116a">Debian Women IRC Training Sessions</a></strong><br>
152
+ <tt>[02.11.2010]</tt> <strong><a href="News/2010/20101102">Mini-DebConf in Ho Chi Minh City, Vietnam during FOSSASIA 2010</a></strong><br>
153
+ <tt>[19.10.2010]</tt> <strong><a href="News/2010/20101019">Debian to officially welcome non-packaging contributors</a></strong><br>
154
+ <tt>[07.10.2010]</tt> <strong><a href="News/2010/20101007">Debian to be at the Society for Neuroscience meeting</a></strong><br>
155
+ </p>
156
+ <p>Star�� zpr�vy naleznete v&nbsp;<a href="./News/">archivu novinek</a>.
157
+ Pokud chcete b�t o&nbsp;novink�ch ze sv�ta Debianu informov�ni elektronickou
158
+ po�tou, p�ihlaste se do konference
159
+ <a href="MailingLists/debian-announce">debian-announce</a>.</p>
160
+ <hr>
161
+ <h2>Bezpe�nostn� zpr�vy</h2>
162
+ <p><tt>[01.12.2010]</tt> <strong><a href="security/2010/dsa-2129">DSA-2129 krb5</a></strong> - checksum verification weakness <br>
163
+ <tt>[01.12.2010]</tt> <strong><a href="security/2010/dsa-2128">DSA-2128 libxml2</a></strong> - invalid memory access <br>
164
+ <tt>[28.11.2010]</tt> <strong><a href="security/2010/dsa-2127">DSA-2127 wireshark</a></strong> - denial of service <br>
165
+ <tt>[26.11.2010]</tt> <strong><a href="security/2010/dsa-2126">DSA-2126 linux-2.6</a></strong> - privilege escalation/denial of service/information leak <br>
166
+ <tt>[22.11.2010]</tt> <strong><a href="security/2010/dsa-2125">DSA-2125 openssl</a></strong> - buffer overflow <br>
167
+ <tt>[01.11.2010]</tt> <strong><a href="security/2010/dsa-2124">DSA-2124 xulrunner</a></strong> - several vulnerabilities <br>
168
+ <tt>[01.11.2010]</tt> <strong><a href="security/2010/dsa-2123">DSA-2123 nss</a></strong> - several vulnerabilities <br>
169
+ <tt>[22.10.2010]</tt> <strong><a href="security/2010/dsa-2122">DSA-2122 glibc</a></strong> - missing input sanitization <br>
170
+ <tt>[19.10.2010]</tt> <strong><a href="security/2010/dsa-2121">DSA-2121 typo3-src</a></strong> - several vulnerabilities <br>
171
+ <tt>[12.10.2010]</tt> <strong><a href="security/2010/dsa-2120">DSA-2120 postgresql-8.3</a></strong> - privilege escalation <br>
172
+ <tt>[12.10.2010]</tt> <strong><a href="security/2010/dsa-2119">DSA-2119 poppler</a></strong> - several vulnerabilities <br>
173
+ </p>
174
+ <p>Star�� bezpe�nostn� zpr�vy se nach�z� na str�nce
175
+ s&nbsp;<a href="./security/">bezpe�nostn�mi informacemi</a>.
176
+ Pokud chcete b�t na potenci�ln� bezpe�nostn� rizika upozor�ov�ni
177
+ elektronickou po�tou co nejd��ve po jejich ohl�en�, p�ihlaste se do
178
+ konference
179
+ <a href="http://lists.debian.org/debian-security-announce/">debian-security-announce</a>.</p>
180
+ </div> <!-- end maincol -->
181
+ <div class="clr"></div>
182
+ </div> <!-- end inner -->
183
+ <div id="footer">
184
+ <hr class="hidecss">
185
+ <!--UdmComment-->
186
+ <p>
187
+ Tato str�nka je tak� dostupn� v n�sleduj�c�ch jazyc�ch:
188
+ </p><p class="navpara">
189
+ <a href="index.ar.html" title="arab�tina" hreflang="ar" lang="ar" rel="alternate">&#1593;&#1585;&#1576;&#1610;&#1577;&nbsp;(Arabiya)</a>
190
+ <a href="index.bg.html" title="bulhar�tina" hreflang="bg" lang="bg" rel="alternate">&#1041;&#1098;&#1083;&#1075;&#1072;&#1088;&#1089;&#1082;&#1080;&nbsp;(B&#601;lgarski)</a>
191
+ <a href="index.ca.html" title="katal�n�tina" hreflang="ca" lang="ca" rel="alternate">catal&agrave;</a>
192
+ <a href="index.da.html" title="d�n�tina" hreflang="da" lang="da" rel="alternate">dansk</a>
193
+ <a href="index.de.html" title="n�m�ina" hreflang="de" lang="de" rel="alternate">Deutsch</a>
194
+ <a href="index.el.html" title="�e�tina" hreflang="el" lang="el" rel="alternate">&#917;&#955;&#955;&#951;&#957;&#953;&#954;&#940;&nbsp;(Ellinika)</a>
195
+ <a href="index.en.html" title="angli�tina" hreflang="en" lang="en" rel="alternate">English</a>
196
+ <a href="index.es.html" title="�pan�l�tina" hreflang="es" lang="es" rel="alternate">espa&ntilde;ol</a>
197
+ <a href="index.eo.html" title="esperanto" hreflang="eo" lang="eo" rel="alternate">Esperanto</a>
198
+ <a href="index.fr.html" title="francouz�tina" hreflang="fr" lang="fr" rel="alternate">fran&ccedil;ais</a>
199
+ <a href="index.ko.html" title="korej�tina" hreflang="ko" lang="ko" rel="alternate">&#54620;&#44397;&#50612;&nbsp;(Hangul)</a>
200
+ <a href="index.hy.html" title="arm�n�tina" hreflang="hy" lang="hy" rel="alternate">&#1344;&#1377;&#1397;&#1381;&#1408;&#1381;&#1398;&nbsp;(hayeren)</a>
201
+ <a href="index.hr.html" title="chorvat�tina" hreflang="hr" lang="hr" rel="alternate">hrvatski</a>
202
+ <a href="index.id.html" title="indon�tina" hreflang="id" lang="id" rel="alternate">Indonesia</a>
203
+ <a href="index.it.html" title="ital�tina" hreflang="it" lang="it" rel="alternate">Italiano</a>
204
+ <a href="index.he.html" title="hebrej�tina" hreflang="he" lang="he" rel="alternate">&#1506;&#1489;&#1512;&#1497;&#1514;&nbsp;(ivrit)</a>
205
+ <a href="index.lt.html" title="litev�tina" hreflang="lt" lang="lt" rel="alternate">Lietuvi&#371;</a>
206
+ <a href="index.hu.html" title="ma�ar�tina" hreflang="hu" lang="hu" rel="alternate">magyar</a>
207
+ <a href="index.nl.html" title="holand�tina" hreflang="nl" lang="nl" rel="alternate">Nederlands</a>
208
+ <a href="index.ja.html" title="japon�tina" hreflang="ja" lang="ja" rel="alternate">&#26085;&#26412;&#35486;&nbsp;(Nihongo)</a>
209
+ <a href="index.nb.html" title="nor�tina" hreflang="nb" lang="nb" rel="alternate">norsk&nbsp;(bokm&aring;l)</a>
210
+ <a href="index.pl.html" title="pol�tina" hreflang="pl" lang="pl" rel="alternate">polski</a>
211
+ <a href="index.pt.html" title="portugal�tina" hreflang="pt" lang="pt" rel="alternate">Portugu&ecirc;s</a>
212
+ <a href="index.ro.html" title="rumun�tina" hreflang="ro" lang="ro" rel="alternate">rom&acirc;n&#259;</a>
213
+ <a href="index.ru.html" title="ru�tina" hreflang="ru" lang="ru" rel="alternate">&#1056;&#1091;&#1089;&#1089;&#1082;&#1080;&#1081;&nbsp;(Russkij)</a>
214
+ <a href="index.sk.html" title="sloven�tina" hreflang="sk" lang="sk" rel="alternate">slovensky</a>
215
+ <a href="index.fi.html" title="fin�tina" hreflang="fi" lang="fi" rel="alternate">suomi</a>
216
+ <a href="index.sv.html" title="�v�d�tina" hreflang="sv" lang="sv" rel="alternate">svenska</a>
217
+ <a href="index.ta.html" title="tamil�tina" hreflang="ta" lang="ta" rel="alternate">&#2980;&#2990;&#3007;&#2996;&#3021;&nbsp;(Tamil)</a>
218
+ <a href="index.tr.html" title="ture�tina" hreflang="tr" lang="tr" rel="alternate">T&uuml;rk&ccedil;e</a>
219
+ <a href="index.uk.html" title="ukrajin�tina" hreflang="uk" lang="uk" rel="alternate">&#1091;&#1082;&#1088;&#1072;&#1111;&#1085;&#1089;&#1100;&#1082;&#1072;&nbsp;(ukrajins'ka)</a>
220
+ <a href="index.zh-cn.html" title="��n�tina (��na)" hreflang="zh-CN" lang="zh-CN" rel="alternate">&#20013;&#25991;(&#31616;)</a>
221
+ <a href="index.zh-hk.html" title="��n�tina (Hong Kong)" hreflang="zh-HK" lang="zh-HK" rel="alternate">&#20013;&#25991;(HK)</a>
222
+ <a href="index.zh-tw.html" title="��n�tina (Tchai-wan)" hreflang="zh-TW" lang="zh-TW" rel="alternate">&#20013;&#25991;(&#32321;)</a>
223
+ </p><p>
224
+ Jak nastavit <a href="./intro/cn">v�choz� jazyk</a>
225
+ </p>
226
+ <!--/UdmComment-->
227
+ <hr>
228
+ <!--UdmComment-->
229
+ <div id="fineprint">
230
+ <p>Probl�my t�kaj�c� se webov�ch str�nek oznamujte anglicky na e-mail <a href="mailto:debian-www@lists.debian.org">debian-www@lists.debian.org</a>. Dal�� kontakty z�sk�te na str�nce s&nbsp;<a href="./contact">kontakty</a>.</p>
231
+ <p>
232
+ Posledn� zm�na: �t, 2. pro 11:26:39 UTC 2010
233
+ <br>
234
+ Copyright &copy; 1997-2010
235
+ <a href="http://www.spi-inc.org/">SPI</a>; P�e�tete si <a href="./license" rel="copyright">licen�n� podm�nky</a><br>
236
+ Debian je registrovan� <a href="./trademark">obchodn� zn�mka</a> organizace Software in the Public Interest, Inc.
237
+ </p>
238
+ </div>
239
+ <!--/UdmComment-->
240
+ </div> <!-- end footer -->
241
+ </div> <!-- end outer -->
242
+ </body>
243
+ </html>
@@ -14,7 +14,8 @@ describe ValidateWebsite do
14
14
  page = FakePage.new(name,
15
15
  :body => open(file).read,
16
16
  :content_type => 'text/html')
17
- @validate_website.crawl(page.url)
17
+ @validate_website.site = page.url
18
+ @validate_website.crawl(:quiet => true)
18
19
  @validate_website.anemone.should have(3).pages
19
20
  end
20
21
  end
@@ -27,7 +28,8 @@ describe ValidateWebsite do
27
28
  .tests {background-image: url(/image/pouet_42.png)}
28
29
  .tests {background-image: url(/image/pouet)}",
29
30
  :content_type => 'text/css')
30
- @validate_website.crawl(page.url)
31
+ @validate_website.site = page.url
32
+ @validate_website.crawl
31
33
  @validate_website.anemone.should have(5).pages
32
34
  end
33
35
 
@@ -35,7 +37,8 @@ describe ValidateWebsite do
35
37
  page = FakePage.new('test.css',
36
38
  :body => ".test {background-image: url('pouet');}",
37
39
  :content_type => 'text/css')
38
- @validate_website.crawl(page.url)
40
+ @validate_website.site = page.url
41
+ @validate_website.crawl
39
42
  @validate_website.anemone.should have(2).pages
40
43
  end
41
44
 
@@ -43,7 +46,8 @@ describe ValidateWebsite do
43
46
  page = FakePage.new('test.css',
44
47
  :body => ".test {background-image: url(\"pouet\");}",
45
48
  :content_type => 'text/css')
46
- @validate_website.crawl(page.url)
49
+ @validate_website.site = page.url
50
+ @validate_website.crawl
47
51
  @validate_website.anemone.should have(2).pages
48
52
  end
49
53
  end
@@ -15,7 +15,7 @@ describe Validator do
15
15
  :body => open(file).read,
16
16
  :content_type => 'text/html')
17
17
  @xhtml1_page = @http.fetch_page(page.url)
18
- validator = Validator.new(@xhtml1_page)
18
+ validator = Validator.new(@xhtml1_page.doc, @xhtml1_page.body)
19
19
  validator.dtd.system_id.should == dtd_uri
20
20
  validator.namespace.should == name
21
21
  validator.should be_valid
@@ -25,13 +25,14 @@ describe Validator do
25
25
  describe('html5') do
26
26
  context('when valid') do
27
27
  it "html5 should be valid" do
28
+ pending("need update html5lib")
28
29
  name = 'html5'
29
30
  file = File.join('spec', 'data', "#{name}.html")
30
31
  page = FakePage.new(name,
31
32
  :body => open(file).read,
32
33
  :content_type => 'text/html')
33
34
  @html5_page = @http.fetch_page(page.url)
34
- validator = Validator.new(@html5_page)
35
+ validator = Validator.new(@html5_page.doc, @html5_page.body)
35
36
  validator.should be_valid
36
37
  end
37
38
  end
@@ -44,7 +45,7 @@ describe Validator do
44
45
  :body => open(file).read,
45
46
  :content_type => 'text/html')
46
47
  @html5_page = @http.fetch_page(page.url)
47
- validator = Validator.new(@html5_page)
48
+ validator = Validator.new(@html5_page.doc, @html5_page.body)
48
49
  validator.should be_valid
49
50
  end
50
51
  end
@@ -58,7 +59,7 @@ describe Validator do
58
59
  :body => open(file).read,
59
60
  :content_type => 'text/html')
60
61
  @html4_strict_page = @http.fetch_page(page.url)
61
- validator = Validator.new(@html4_strict_page)
62
+ validator = Validator.new(@html4_strict_page.doc, @html4_strict_page.body)
62
63
  validator.should be_valid
63
64
  end
64
65
  end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: validate-website
3
3
  version: !ruby/object:Gem::Version
4
- hash: 13
4
+ hash: 5
5
5
  prerelease: false
6
6
  segments:
7
7
  - 0
8
8
  - 5
9
- - 3
10
- version: 0.5.3
9
+ - 7
10
+ version: 0.5.7
11
11
  platform: ruby
12
12
  authors:
13
13
  - Laurent Arnoud
@@ -15,23 +15,23 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2010-12-05 00:00:00 +01:00
18
+ date: 2010-12-10 00:00:00 +01:00
19
19
  default_executable:
20
20
  dependencies:
21
21
  - !ruby/object:Gem::Dependency
22
- name: spk-anemone
22
+ name: anemone
23
23
  prerelease: false
24
24
  requirement: &id001 !ruby/object:Gem::Requirement
25
25
  none: false
26
26
  requirements:
27
27
  - - ">="
28
28
  - !ruby/object:Gem::Version
29
- hash: 15
29
+ hash: 11
30
30
  segments:
31
31
  - 0
32
- - 4
32
+ - 5
33
33
  - 0
34
- version: 0.4.0
34
+ version: 0.5.0
35
35
  type: :runtime
36
36
  version_requirements: *id001
37
37
  - !ruby/object:Gem::Dependency
@@ -49,26 +49,10 @@ dependencies:
49
49
  version: "1.1"
50
50
  type: :runtime
51
51
  version_requirements: *id002
52
- - !ruby/object:Gem::Dependency
53
- name: spk-html5
54
- prerelease: false
55
- requirement: &id003 !ruby/object:Gem::Requirement
56
- none: false
57
- requirements:
58
- - - "="
59
- - !ruby/object:Gem::Version
60
- hash: 53
61
- segments:
62
- - 0
63
- - 10
64
- - 1
65
- version: 0.10.1
66
- type: :runtime
67
- version_requirements: *id003
68
52
  - !ruby/object:Gem::Dependency
69
53
  name: rspec
70
54
  prerelease: false
71
- requirement: &id004 !ruby/object:Gem::Requirement
55
+ requirement: &id003 !ruby/object:Gem::Requirement
72
56
  none: false
73
57
  requirements:
74
58
  - - ">="
@@ -80,11 +64,11 @@ dependencies:
80
64
  - 0
81
65
  version: 2.0.0
82
66
  type: :development
83
- version_requirements: *id004
67
+ version_requirements: *id003
84
68
  - !ruby/object:Gem::Dependency
85
69
  name: fakeweb
86
70
  prerelease: false
87
- requirement: &id005 !ruby/object:Gem::Requirement
71
+ requirement: &id004 !ruby/object:Gem::Requirement
88
72
  none: false
89
73
  requirements:
90
74
  - - ">="
@@ -96,11 +80,12 @@ dependencies:
96
80
  - 0
97
81
  version: 1.3.0
98
82
  type: :development
99
- version_requirements: *id005
83
+ version_requirements: *id004
100
84
  description: validate-website is a web crawler for checking the markupvalidity and not found urls.
101
85
  email: laurent@spkdev.net
102
86
  executables:
103
87
  - validate-website
88
+ - validate-website-static
104
89
  extensions: []
105
90
 
106
91
  extra_rdoc_files: []
@@ -109,6 +94,7 @@ files:
109
94
  - README.rdoc
110
95
  - Rakefile
111
96
  - LICENSE
97
+ - bin/validate-website-static
112
98
  - bin/validate-website
113
99
  - lib/validator.rb
114
100
  - lib/colorful_messages.rb
@@ -207,6 +193,7 @@ files:
207
193
  - spec/validator_spec.rb
208
194
  - spec/spec_helper.rb
209
195
  - spec/data/html5-linuxfr.html
196
+ - spec/data/index.cs.html
210
197
  - spec/data/html4-strict.html
211
198
  - spec/data/html5.html
212
199
  - spec/data/xhtml1-strict.html
@@ -240,9 +227,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
240
227
  - 0
241
228
  version: "0"
242
229
  requirements:
243
- - spk-anemone
230
+ - anemone
244
231
  - rainbow
245
- - spk-html5
246
232
  rubyforge_project:
247
233
  rubygems_version: 1.3.7
248
234
  signing_key: