validate-website 0.5.3 → 0.5.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.rdoc CHANGED
@@ -29,8 +29,6 @@ found urls.
29
29
  Url to exclude (ex: redirect|news)
30
30
  -f, --file FILE
31
31
  Save not well formed or not found (with -n used) urls
32
- -a, --authorization USER,PASS
33
- Basic http authentification
34
32
  -c, --cookies COOKIES
35
33
  Set defaults cookies
36
34
  -m, --[no-]markup-validation
data/Rakefile CHANGED
@@ -7,7 +7,7 @@ require "rspec/core/rake_task" # RSpec 2.0
7
7
  # Globals
8
8
 
9
9
  PKG_NAME = 'validate-website'
10
- PKG_VERSION = '0.5.3'
10
+ PKG_VERSION = '0.5.7'
11
11
 
12
12
  PKG_FILES = ['README.rdoc', 'Rakefile', 'LICENSE']
13
13
  Find.find('bin/', 'lib/', 'man/', 'spec/') do |f|
@@ -43,15 +43,15 @@ spec = Gem::Specification.new do |s|
43
43
  s.summary = 'Web crawler for checking the validity of your documents'
44
44
  s.name = PKG_NAME
45
45
  s.version = PKG_VERSION
46
- s.requirements << 'spk-anemone' << 'rainbow' << 'spk-html5'
47
- s.add_dependency('spk-anemone', '>= 0.4.0')
46
+ s.requirements << 'anemone' << 'rainbow'
47
+ s.add_dependency('anemone', '>= 0.5.0')
48
48
  s.add_dependency('rainbow', '>= 1.1')
49
- s.add_dependency('spk-html5', '= 0.10.1')
50
49
  s.add_development_dependency('rspec', '>= 2.0.0')
51
50
  s.add_development_dependency('fakeweb', '>= 1.3.0')
52
51
  s.require_path = 'lib'
53
52
  s.bindir = 'bin'
54
53
  s.executables << 'validate-website'
54
+ s.executables << 'validate-website-static'
55
55
  s.files = PKG_FILES
56
56
  s.description = 'validate-website is a web crawler for checking the markup' +
57
57
  'validity and not found urls.'
data/bin/validate-website CHANGED
@@ -11,19 +11,6 @@ require 'rubygems' if developer_mode
11
11
  require 'validate_website'
12
12
 
13
13
  validate_website = ValidateWebsite.new(ARGV)
14
- options = validate_website.options
15
-
16
- validate_website.crawl options[:site],
17
- :markup_validation => options[:markup_validation],
18
- :user_agent => options[:user_agent],
19
- :exclude => options[:exclude],
20
- :file => options[:file],
21
- :authorization => options[:authorization],
22
- :not_found => options[:not_found],
23
- :cookies => options[:cookies],
24
- :accept_cookies => options[:accept_cookies],
25
- :verbose => options[:debug],
26
- :validate_verbose => options[:verbose],
27
- :quiet => options[:quiet]
14
+ validate_website.crawl
28
15
 
29
16
  exit(validate_website.exit_status)
@@ -0,0 +1,25 @@
1
+ #!/usr/bin/env ruby
2
+ # encoding: utf-8
3
+
4
+ lib_dir = File.join(File.dirname(__FILE__), '..', 'lib')
5
+ $:.unshift(File.expand_path(lib_dir))
6
+
7
+ developer_mode = false
8
+ developer_mode = true if __FILE__ == $0
9
+ require 'rubygems' if developer_mode
10
+
11
+ require 'validate_website'
12
+
13
+ validate_website = ValidateWebsite.new(ARGV)
14
+
15
+ files = Dir.glob(File.join("**", "*.html"))
16
+ files.each do |f|
17
+ next unless File.file?(f)
18
+
19
+ body = open(f).read
20
+ doc = Nokogiri::HTML(body)
21
+
22
+ validate_website.validate(doc, body, f)
23
+ end
24
+
25
+ exit(validate_website.exit_status)
@@ -2,12 +2,15 @@
2
2
 
3
3
  require 'optparse'
4
4
  require 'open-uri'
5
+
5
6
  require 'validator'
6
- require 'anemone'
7
7
  require 'colorful_messages'
8
8
 
9
+ require 'anemone'
10
+
9
11
  class ValidateWebsite
10
12
 
13
+ attr_accessor :site
11
14
  attr_reader :options, :anemone
12
15
 
13
16
  include ColorfulMessages
@@ -17,34 +20,42 @@ class ValidateWebsite
17
20
  EXIT_FAILURE_NOT_FOUND = 65
18
21
  EXIT_FAILURE_MARKUP_NOT_FOUND = 66
19
22
 
20
- def initialize(args=[])
23
+ def initialize(args=[], validation_type = :crawl)
21
24
  @markup_error = nil
22
25
  @not_found_error = nil
23
26
 
24
- @options = {
27
+ @options_crawl = {
25
28
  :site => 'http://localhost:3000/',
26
29
  :markup_validation => true,
27
- :user_agent => Anemone::Core::DEFAULT_OPTS[:user_agent],
28
30
  :exclude => nil,
29
31
  :file => nil,
30
- :authorization => nil,
31
32
  # log not found url (404 status code)
32
33
  :not_found => false,
34
+ # internal verbose for ValidateWebsite
35
+ :validate_verbose => false,
36
+ :quiet => false,
37
+
38
+ # Anemone options see anemone/lib/anemone/core.rb
39
+ :verbose => false,
40
+ :user_agent => Anemone::Core::DEFAULT_OPTS[:user_agent],
33
41
  :cookies => nil,
34
42
  :accept_cookies => true,
35
- :verbose => false,
36
- :debug => false,
37
- :quiet => false,
43
+ :redirect_limit => 0,
38
44
  }
39
- parse(args)
45
+ send("parse_#{validation_type}_options", args)
40
46
 
41
- # truncate file
42
- if options[:file]
43
- open(options[:file], 'w').write('')
47
+ @file = @options[:file]
48
+ if @file
49
+ # truncate file
50
+ open(@file, 'w').write('')
44
51
  end
52
+
53
+ @site = @options[:site]
45
54
  end
46
55
 
47
- def parse(args)
56
+ def parse_crawl_options(args)
57
+ @options = @options_crawl
58
+
48
59
  opts = OptionParser.new do |o|
49
60
  o.set_summary_indent(' ')
50
61
  o.banner = 'Usage: validate-website [OPTIONS]'
@@ -66,12 +77,10 @@ class ValidateWebsite
66
77
  }
67
78
  o.on("-f", "--file 'FILE'", String,
68
79
  "Save not well formed or not found urls") { |v| @options[:file] = v }
69
- o.on("-a", "--authorization 'USER,PASS'", Array,
70
- "Basic http authentification") { |v|
71
- @options[:authorization] = v
72
- }
80
+
73
81
  o.on("-c", "--cookies 'COOKIES'", String,
74
82
  "Set defaults cookies") { |v| @options[:cookies] = v }
83
+
75
84
  o.on("-m", "--[no-]markup-validation",
76
85
  "Markup validation (Default: #{@options[:markup_validation]})") { |v|
77
86
  @options[:markup_validation] = v
@@ -81,16 +90,16 @@ class ValidateWebsite
81
90
  @options[:not_found] = v
82
91
  }
83
92
  o.on("-v", "--verbose",
84
- "Show validator errors (Default: #{@options[:verbose]})") { |v|
85
- @options[:verbose] = v
93
+ "Show validator errors (Default: #{@options[:validate_verbose]})") { |v|
94
+ @options[:validate_verbose] = v
86
95
  }
87
96
  o.on("-q", "--quiet",
88
97
  "Only report errors (Default: #{@options[:quiet]})") { |v|
89
98
  @options[:quiet] = v
90
99
  }
91
100
  o.on("-d", "--debug",
92
- "Show anemone log (Default: #{@options[:debug]})") { |v|
93
- @options[:debug] = v
101
+ "Show anemone log (Default: #{@options[:verbose]})") { |v|
102
+ @options[:verbose] = v
94
103
  }
95
104
 
96
105
  o.separator ""
@@ -99,12 +108,32 @@ class ValidateWebsite
99
108
  opts.parse!(args)
100
109
  end
101
110
 
102
- def crawl(site, opts={})
103
- puts note("Validating #{site}") if opts[:validate_verbose]
111
+ def validate(doc, body, url, opts={})
112
+ opts = @options.merge(opts)
113
+ validator = Validator.new(doc, body)
114
+ msg = " well formed? %s" % validator.valid?
115
+ if validator.valid?
116
+ unless opts[:quiet]
117
+ print info(url)
118
+ puts success(msg)
119
+ end
120
+ else
121
+ @markup_error = true
122
+ print info(url)
123
+ puts error(msg)
124
+ puts error(validator.errors.join(", ")) if opts[:validate_verbose]
125
+ to_file(url)
126
+ end
127
+ end
104
128
 
105
- @anemone = Anemone.crawl(site, opts) do |anemone|
129
+ def crawl(opts={})
130
+ opts = @options.merge(opts)
131
+ puts note("Validating #{@site}") if opts[:validate_verbose]
132
+
133
+ @anemone = Anemone.crawl(@site, opts) do |anemone|
106
134
  anemone.skip_links_like Regexp.new(opts[:exclude]) if opts[:exclude]
107
135
 
136
+ # select the links on each page to follow (iframe, link, css url)
108
137
  anemone.focus_crawl { |p|
109
138
  links = []
110
139
  if p.html?
@@ -134,20 +163,7 @@ class ValidateWebsite
134
163
  if opts[:markup_validation]
135
164
  # validate html/html+xml
136
165
  if page.html? && page.fetched?
137
- validator = Validator.new(page)
138
- msg = " well formed? %s" % validator.valid?
139
- if validator.valid?
140
- unless opts[:quiet]
141
- print info(url)
142
- puts success(msg)
143
- end
144
- else
145
- @markup_error = true
146
- print info(url)
147
- puts error(msg)
148
- puts error(validator.errors.join(", ")) if opts[:validate_verbose]
149
- to_file(url)
150
- end
166
+ validate(page.doc, page.body, url, opts)
151
167
  end
152
168
  end
153
169
 
@@ -156,6 +172,9 @@ class ValidateWebsite
156
172
  puts error("%s linked in %s but not exist" % [url, page.referer])
157
173
  to_file(url)
158
174
  end
175
+
176
+ # throw away the page (hope this saves memory)
177
+ page = nil
159
178
  }
160
179
  end
161
180
  end
@@ -174,7 +193,9 @@ class ValidateWebsite
174
193
 
175
194
  private
176
195
  def to_file(msg)
177
- open(options[:file], 'a').write("#{msg}\n") if options[:file]
196
+ if @file && File.exist?(@file)
197
+ open(@file, 'a').write("#{msg}\n")
198
+ end
178
199
  end
179
200
 
180
201
  def get_url(page, elem, attrname)
data/lib/validator.rb CHANGED
@@ -3,19 +3,21 @@
3
3
  class Validator
4
4
  XHTML_PATH = File.join(File.dirname(__FILE__), '..', 'lib', 'xhtml')
5
5
 
6
- attr_reader :page, :dtd, :doc, :namespace, :xsd, :errors
6
+ attr_reader :original_doc, :body, :dtd, :doc, :namespace, :xsd, :errors
7
7
 
8
- def initialize(page)
9
- @page = page
10
- @dtd = @page.doc.internal_subset
8
+ def initialize(original_doc, body)
9
+ @original_doc = original_doc
10
+ @body = body
11
+ @dtd = @original_doc.internal_subset
11
12
  init_namespace(@dtd)
12
13
  @errors = []
14
+ @errors << 'Unknown document' if @namespace.nil?
13
15
 
14
- if @namespace
15
- if @dtd_uri && @page.body.match(@dtd_uri.to_s)
16
- document = @page.body.sub(@dtd_uri.to_s, @namespace + '.dtd')
16
+ if @errors.empty?
17
+ if @dtd_uri && @body.match(@dtd_uri.to_s)
18
+ document = @body.sub(@dtd_uri.to_s, @namespace + '.dtd')
17
19
  else
18
- document = @page.body
20
+ document = @body
19
21
  end
20
22
  @doc = Dir.chdir(XHTML_PATH) do
21
23
  Nokogiri::XML(document) { |cfg|
@@ -40,19 +42,8 @@ class Validator
40
42
  end
41
43
  @errors = @doc.errors
42
44
  end
43
- elsif @page.body =~ /^\<!DOCTYPE html\>/i
44
- # html5 doctype
45
- # http://dev.w3.org/html5/spec/Overview.html#the-doctype
46
- require 'html5'
47
- require 'html5/filters/validator'
48
- html5_parser = HTML5::HTMLParser.new(:tokenizer => HTMLConformanceChecker)
49
- html5_parser.parse(@page.body)
50
- @errors = html5_parser.errors.collect do |er|
51
- "#{er[1]} line #{er[0][0]}"
52
- end
53
- else
54
- @errors << 'Unknown Document'
55
45
  end
46
+
56
47
  rescue Nokogiri::XML::SyntaxError => e
57
48
  # http://nokogiri.org/tutorials/ensuring_well_formed_markup.html
58
49
  @errors << e
@@ -2,12 +2,12 @@
2
2
  .\" Title: validate-website
3
3
  .\" Author: [see the "AUTHOR" section]
4
4
  .\" Generator: DocBook XSL Stylesheets v1.75.2 <http://docbook.sf.net/>
5
- .\" Date: 12/05/2010
5
+ .\" Date: 12/10/2010
6
6
  .\" Manual: \ \&
7
7
  .\" Source: \ \&
8
8
  .\" Language: English
9
9
  .\"
10
- .TH "VALIDATE\-WEBSITE" "1" "12/05/2010" "\ \&" "\ \&"
10
+ .TH "VALIDATE\-WEBSITE" "1" "12/10/2010" "\ \&" "\ \&"
11
11
  .\" -----------------------------------------------------------------
12
12
  .\" * Define some portability stuff
13
13
  .\" -----------------------------------------------------------------
@@ -59,11 +59,6 @@ Url to exclude (ex:
59
59
  Save not well formed or not found (with \-n used) urls
60
60
  .RE
61
61
  .PP
62
- \fB\-a\fR, \fB\-\-authorization\fR \fIUSER,PASS\fR
63
- .RS 4
64
- Basic http authentification
65
- .RE
66
- .PP
67
62
  \fB\-c\fR, \fB\-\-cookies\fR \fICOOKIES\fR
68
63
  .RS 4
69
64
  Set defaults cookies
@@ -0,0 +1,243 @@
1
+ <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
2
+ <html lang="cs">
3
+ <head>
4
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-2">
5
+ <title>Debian -- Univerz�ln� opera�n� syst�m </title>
6
+ <link rev="made" href="mailto:webmaster@debian.org">
7
+ <link rel="shortcut icon" href="favicon.ico">
8
+ <meta name="Keywords" content="debian, GNU, linux, unix, open source, svobodn�, DFSG">
9
+ <meta name="Description" content="Debian GNU/Linux je svobodn� distribuce opera�n�ho syst�mu GNU/Linux. Je spravov�n a udr�ov�n za pomoci mnoha dobrovoln�k�, kte�� mu v�nuj� sv�j �as a zku�enosti.">
10
+ <meta name="Generator" content="WML 2.0.11 (19-Aug-2006)">
11
+ <meta name="Modified" content="2010-12-02 11:26:39">
12
+ <link rel="alternate" type="application/rss+xml"
13
+ title="Bezpe�nostn� zpr�vy Debianu (pouze nadpisy)" href="security/dsa">
14
+ <link rel="alternate" type="application/rss+xml"
15
+ title="Bezpe�nostn� zpr�vy Debianu (souhrn)" href="security/dsa-long">
16
+ <link href="./debian.css" rel="stylesheet" type="text/css">
17
+ <link href="./debian-cs.css" rel="stylesheet" type="text/css" media="all">
18
+ </head>
19
+ <body>
20
+ <div id="header">
21
+ <div id="upperheader">
22
+ <div id="logo">
23
+ <a href="./"><img src="./logos/openlogo-nd-50.png" width="50" height="61" alt=""></a>
24
+ <a href="./" rel="start"><img src="Pics/debian.png" width="179" height="61" alt="Projekt Debian"></a>
25
+ </div> <!-- end logo -->
26
+ </div> <!-- end upperheader -->
27
+ <!--UdmComment-->
28
+ <div id="navbar">
29
+ <p class="hidecss"><a href="#inner">P�esko�it Quicknav</a></p>
30
+ <ul>
31
+ <li><a href="intro/about">O&nbsp;Debianu</a></li>
32
+ <li><a href="./News/">Novinky</a></li>
33
+ <li><a href="distrib/">Jak&nbsp;z�skat&nbsp;Debian</a></li>
34
+ <li><a href="./support">Podpora</a></li>
35
+ <li><a href="./devel/">V�voj��sk�&nbsp;koutek</a></li>
36
+ <li><a href="./sitemap">P�ehled str�nek</a></li>
37
+ <li><a href="http://search.debian.org/">Hledat</a></li>
38
+ </ul>
39
+ </div> <!-- end navbar -->
40
+ </div> <!-- end header -->
41
+ <!--/UdmComment-->
42
+ <div id="outer">
43
+ <div id="inner">
44
+ <div id="leftcol">
45
+ <!--UdmComment-->
46
+ <ul>
47
+ <li><a href="intro/about">O&nbsp;Debianu</a>
48
+ <ul>
49
+ <li><a href="./social_contract">Spole�ensk�&nbsp;smlouva</a></li>
50
+ <li><a href="./intro/free">Svobodn�&nbsp;software</a></li>
51
+ <li><a href="./partners/">Partne�i</a></li>
52
+ <li><a href="./donations">Dary</a></li>
53
+ <li><a href="./contact">Napi�te&nbsp;n�m</a></li>
54
+ </ul>
55
+ </li>
56
+ <li><a href="./News/">Novinky</a>
57
+ <ul>
58
+ <li><a href="./News/project/">Novinky&nbsp;projektu</a></li>
59
+ <li><a href="./events/">Ud�losti</a></li>
60
+ </ul>
61
+ </li>
62
+ <li><a href="distrib/">Jak&nbsp;z�skat&nbsp;Debian</a>
63
+ <ul>
64
+ <li><a href="CD/vendors/">Prodejci CD</a></li>
65
+ <li><a href="CD/">Obrazy CD</a></li>
66
+ <li><a href="distrib/netinst">S��ov� instalace</a></li>
67
+ <li><a href="distrib/pre-installed">P�edinstalovan�</a></li>
68
+ </ul>
69
+ </li>
70
+ <li><a href="distrib/packages">Debian�&nbsp;bal��ky</a></li>
71
+ <li><a href="doc/">Dokumentace</a>
72
+ <ul>
73
+ <li><a href="./releases/">Informace&nbsp;k&nbsp;verz�m</a></li>
74
+ <li><a href="./releases/stable/installmanual">Instala�n�&nbsp;p��ru�ka</a></li>
75
+ <li><a href="doc/books">Knihy&nbsp;o&nbsp;Debianu</a></li>
76
+ </ul>
77
+ </li>
78
+ <li><a href="./support">Podpora</a>
79
+ <ul>
80
+ <li><a href="./international/">Debian&nbsp;a&nbsp;jazyky</a></li>
81
+ <li><a href="./security/">Bezpe�nostn�&nbsp;informace</a></li>
82
+ <li><a href="Bugs/">Hl�en� chyb</a></li>
83
+ <li><a href="MailingLists/">Konference</a></li>
84
+ <li><a href="http://lists.debian.org/">Archivy&nbsp;konferenc�</a></li>
85
+ <li><a href="./ports/">Porty/architektury</a></li>
86
+ </ul>
87
+ </li>
88
+ <li><a href="misc/">Ostatn�</a></li>
89
+ <li><a href="./intro/help">Pomozte Debianu</a></li>
90
+ <li><a href="./devel/">V�voj��sk�&nbsp;koutek</a></li>
91
+ <li><a href="./sitemap">P�ehled str�nek</a></li>
92
+ <li><a href="http://search.debian.org/">Hledat</a></li>
93
+ </ul>
94
+ <form method="get" action="http://search.debian.org/">
95
+ <p>
96
+ <input type="text" name="q" size="12">
97
+ </p>
98
+ </form>
99
+ <p>
100
+ <a href="./sponsor.html"><img src="sponsor_img.jpg" alt="Nav�tivte sponzora str�nek" width="102" height="60"></a>
101
+ </p>
102
+ <p>
103
+ <a href="http://validator.w3.org/check/referer"><img src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01!" width="88" height="31"></a>
104
+ </p>
105
+ <p>
106
+ <img src="http://jigsaw.w3.org/css-validator/images/vcss"
107
+ alt="Valid CSS!" width="88" height="31">
108
+ </p>
109
+ <!--/UdmComment-->
110
+ </div> <!-- end leftcol -->
111
+ <div id="maincol">
112
+ <a href="./News/2009/20090214"><img src="Pics/lennybanner_indexed.png" alt="Debian 5.0 - univerz�ln� opera�n� syst�m" width="380" height="310" style="margin-right: 10px; float: left;"></a>
113
+ <h2>Co je Debian?</h2>
114
+ <p><a href="http://www.debian.org/">Debian</a>
115
+ je <a href="intro/free">svobodn�</a> opera�n� syst�m (OS) ur�en� k&nbsp;provozu
116
+ na mnoha r�zn�ch typech po��ta��. Opera�n� syst�m se skl�d� ze z�kladn�ho
117
+ programov�ho vybaven� a&nbsp;dal��ch n�stroj�, kter�ch je k&nbsp;provozu
118
+ po��ta�e t�eba. Vlastn�m z�kladem OS je j�dro. Jeliko� Debian pou��v� j�dro
119
+ <a href="http://www.kernel.org/">Linux</a> a&nbsp;v�t�ina z�kladn�ch syst�mov�ch
120
+ program� byla vytvo�ena v&nbsp;r�mci <a href="http://www.gnu.org/">projektu GNU</a>,
121
+ nese syst�m ozna�en� GNU/Linux.</p>
122
+ <p>Debian GNU/Linux je v�ak v�ce ne� jen samotn� opera�n� syst�m. Obsahuje p�es
123
+ 25000 <a href="distrib/packages">bal��k�</a>
124
+ s&nbsp;(p�edkompilovan�mi) programy a&nbsp;dokumentac�, p�ipraven�ch
125
+ pro snadnou instalaci.</p>
126
+ <p><a href="intro/about">Podrobnosti...</a></p>
127
+ <hr>
128
+ <h2>Za��n�me</h2>
129
+ <p><a href="releases/stable/">Posledn� stabiln� verze Debianu</a> m� ��slo
130
+ 5.0. Tato verze byla naposledy aktualizov�na
131
+ 27. listopadu 2010. P�e�t�te si v�ce
132
+ o&nbsp;<a href="releases/">dostupn�ch verz�ch Debianu</a>.</p>
133
+ <p>Chcete-li za��t pou��vat Debian, m��ete jej z�skat na
134
+ <a href="distrib/">distribu�n� str�nce</a> a&nbsp;pot� nainstalovat za pomoci
135
+ <a href="releases/stable/installmanual">instala�n� p��ru�ky</a>.</p>
136
+ <p>Pokud aktualizujete na nejnov�j�� stabiln� verzi z&nbsp;verze p�edchoz�,
137
+ p�e�tete si pros�m nejprve
138
+ <a href="releases/stable/releasenotes">pozn�mky k&nbsp;vyd�n�</a>.</p>
139
+ <p>Pot�ebujete-li pomoc p�i pou��v�n� nebo nastaven� Debianu, pod�vejte se do
140
+ <a href="doc/">dokumentace</a> nebo na str�nky
141
+ s&nbsp;<a href="support">u�ivatelskou podporou</a>.</p>
142
+ <p>Pro u�ivatele z&nbsp;neanglicky mluv�c�ch zem� jsou tu str�nky
143
+ v�nuj�c� se podpo�e <a href="international/">ostatn�ch jazyk�</a>.</p>
144
+ <p>U�ivatel� pracuj�c� s&nbsp;po��ta�i jin�ch hardwarov�ch platforem,
145
+ ne� je Intel x86, najdou informace na str�nk�ch v�nuj�c�ch se
146
+ <a href="ports/">ostatn�m platform�m</a>.</p>
147
+ <hr>
148
+ <h2>Novinky</h2>
149
+ <p><tt>[27.11.2010]</tt> <strong><a href="News/2010/20101127">Updated Debian GNU/Linux: 5.0.7 released</a></strong><br>
150
+ <tt>[16.11.2010]</tt> <strong><a href="News/2010/20101116b">Debian 6.0 <q>Squeeze</q>: Call for Upgrade and Installation tests</a></strong><br>
151
+ <tt>[16.11.2010]</tt> <strong><a href="News/2010/20101116a">Debian Women IRC Training Sessions</a></strong><br>
152
+ <tt>[02.11.2010]</tt> <strong><a href="News/2010/20101102">Mini-DebConf in Ho Chi Minh City, Vietnam during FOSSASIA 2010</a></strong><br>
153
+ <tt>[19.10.2010]</tt> <strong><a href="News/2010/20101019">Debian to officially welcome non-packaging contributors</a></strong><br>
154
+ <tt>[07.10.2010]</tt> <strong><a href="News/2010/20101007">Debian to be at the Society for Neuroscience meeting</a></strong><br>
155
+ </p>
156
+ <p>Star�� zpr�vy naleznete v&nbsp;<a href="./News/">archivu novinek</a>.
157
+ Pokud chcete b�t o&nbsp;novink�ch ze sv�ta Debianu informov�ni elektronickou
158
+ po�tou, p�ihlaste se do konference
159
+ <a href="MailingLists/debian-announce">debian-announce</a>.</p>
160
+ <hr>
161
+ <h2>Bezpe�nostn� zpr�vy</h2>
162
+ <p><tt>[01.12.2010]</tt> <strong><a href="security/2010/dsa-2129">DSA-2129 krb5</a></strong> - checksum verification weakness <br>
163
+ <tt>[01.12.2010]</tt> <strong><a href="security/2010/dsa-2128">DSA-2128 libxml2</a></strong> - invalid memory access <br>
164
+ <tt>[28.11.2010]</tt> <strong><a href="security/2010/dsa-2127">DSA-2127 wireshark</a></strong> - denial of service <br>
165
+ <tt>[26.11.2010]</tt> <strong><a href="security/2010/dsa-2126">DSA-2126 linux-2.6</a></strong> - privilege escalation/denial of service/information leak <br>
166
+ <tt>[22.11.2010]</tt> <strong><a href="security/2010/dsa-2125">DSA-2125 openssl</a></strong> - buffer overflow <br>
167
+ <tt>[01.11.2010]</tt> <strong><a href="security/2010/dsa-2124">DSA-2124 xulrunner</a></strong> - several vulnerabilities <br>
168
+ <tt>[01.11.2010]</tt> <strong><a href="security/2010/dsa-2123">DSA-2123 nss</a></strong> - several vulnerabilities <br>
169
+ <tt>[22.10.2010]</tt> <strong><a href="security/2010/dsa-2122">DSA-2122 glibc</a></strong> - missing input sanitization <br>
170
+ <tt>[19.10.2010]</tt> <strong><a href="security/2010/dsa-2121">DSA-2121 typo3-src</a></strong> - several vulnerabilities <br>
171
+ <tt>[12.10.2010]</tt> <strong><a href="security/2010/dsa-2120">DSA-2120 postgresql-8.3</a></strong> - privilege escalation <br>
172
+ <tt>[12.10.2010]</tt> <strong><a href="security/2010/dsa-2119">DSA-2119 poppler</a></strong> - several vulnerabilities <br>
173
+ </p>
174
+ <p>Star�� bezpe�nostn� zpr�vy se nach�z� na str�nce
175
+ s&nbsp;<a href="./security/">bezpe�nostn�mi informacemi</a>.
176
+ Pokud chcete b�t na potenci�ln� bezpe�nostn� rizika upozor�ov�ni
177
+ elektronickou po�tou co nejd��ve po jejich ohl�en�, p�ihlaste se do
178
+ konference
179
+ <a href="http://lists.debian.org/debian-security-announce/">debian-security-announce</a>.</p>
180
+ </div> <!-- end maincol -->
181
+ <div class="clr"></div>
182
+ </div> <!-- end inner -->
183
+ <div id="footer">
184
+ <hr class="hidecss">
185
+ <!--UdmComment-->
186
+ <p>
187
+ Tato str�nka je tak� dostupn� v n�sleduj�c�ch jazyc�ch:
188
+ </p><p class="navpara">
189
+ <a href="index.ar.html" title="arab�tina" hreflang="ar" lang="ar" rel="alternate">&#1593;&#1585;&#1576;&#1610;&#1577;&nbsp;(Arabiya)</a>
190
+ <a href="index.bg.html" title="bulhar�tina" hreflang="bg" lang="bg" rel="alternate">&#1041;&#1098;&#1083;&#1075;&#1072;&#1088;&#1089;&#1082;&#1080;&nbsp;(B&#601;lgarski)</a>
191
+ <a href="index.ca.html" title="katal�n�tina" hreflang="ca" lang="ca" rel="alternate">catal&agrave;</a>
192
+ <a href="index.da.html" title="d�n�tina" hreflang="da" lang="da" rel="alternate">dansk</a>
193
+ <a href="index.de.html" title="n�m�ina" hreflang="de" lang="de" rel="alternate">Deutsch</a>
194
+ <a href="index.el.html" title="�e�tina" hreflang="el" lang="el" rel="alternate">&#917;&#955;&#955;&#951;&#957;&#953;&#954;&#940;&nbsp;(Ellinika)</a>
195
+ <a href="index.en.html" title="angli�tina" hreflang="en" lang="en" rel="alternate">English</a>
196
+ <a href="index.es.html" title="�pan�l�tina" hreflang="es" lang="es" rel="alternate">espa&ntilde;ol</a>
197
+ <a href="index.eo.html" title="esperanto" hreflang="eo" lang="eo" rel="alternate">Esperanto</a>
198
+ <a href="index.fr.html" title="francouz�tina" hreflang="fr" lang="fr" rel="alternate">fran&ccedil;ais</a>
199
+ <a href="index.ko.html" title="korej�tina" hreflang="ko" lang="ko" rel="alternate">&#54620;&#44397;&#50612;&nbsp;(Hangul)</a>
200
+ <a href="index.hy.html" title="arm�n�tina" hreflang="hy" lang="hy" rel="alternate">&#1344;&#1377;&#1397;&#1381;&#1408;&#1381;&#1398;&nbsp;(hayeren)</a>
201
+ <a href="index.hr.html" title="chorvat�tina" hreflang="hr" lang="hr" rel="alternate">hrvatski</a>
202
+ <a href="index.id.html" title="indon�tina" hreflang="id" lang="id" rel="alternate">Indonesia</a>
203
+ <a href="index.it.html" title="ital�tina" hreflang="it" lang="it" rel="alternate">Italiano</a>
204
+ <a href="index.he.html" title="hebrej�tina" hreflang="he" lang="he" rel="alternate">&#1506;&#1489;&#1512;&#1497;&#1514;&nbsp;(ivrit)</a>
205
+ <a href="index.lt.html" title="litev�tina" hreflang="lt" lang="lt" rel="alternate">Lietuvi&#371;</a>
206
+ <a href="index.hu.html" title="ma�ar�tina" hreflang="hu" lang="hu" rel="alternate">magyar</a>
207
+ <a href="index.nl.html" title="holand�tina" hreflang="nl" lang="nl" rel="alternate">Nederlands</a>
208
+ <a href="index.ja.html" title="japon�tina" hreflang="ja" lang="ja" rel="alternate">&#26085;&#26412;&#35486;&nbsp;(Nihongo)</a>
209
+ <a href="index.nb.html" title="nor�tina" hreflang="nb" lang="nb" rel="alternate">norsk&nbsp;(bokm&aring;l)</a>
210
+ <a href="index.pl.html" title="pol�tina" hreflang="pl" lang="pl" rel="alternate">polski</a>
211
+ <a href="index.pt.html" title="portugal�tina" hreflang="pt" lang="pt" rel="alternate">Portugu&ecirc;s</a>
212
+ <a href="index.ro.html" title="rumun�tina" hreflang="ro" lang="ro" rel="alternate">rom&acirc;n&#259;</a>
213
+ <a href="index.ru.html" title="ru�tina" hreflang="ru" lang="ru" rel="alternate">&#1056;&#1091;&#1089;&#1089;&#1082;&#1080;&#1081;&nbsp;(Russkij)</a>
214
+ <a href="index.sk.html" title="sloven�tina" hreflang="sk" lang="sk" rel="alternate">slovensky</a>
215
+ <a href="index.fi.html" title="fin�tina" hreflang="fi" lang="fi" rel="alternate">suomi</a>
216
+ <a href="index.sv.html" title="�v�d�tina" hreflang="sv" lang="sv" rel="alternate">svenska</a>
217
+ <a href="index.ta.html" title="tamil�tina" hreflang="ta" lang="ta" rel="alternate">&#2980;&#2990;&#3007;&#2996;&#3021;&nbsp;(Tamil)</a>
218
+ <a href="index.tr.html" title="ture�tina" hreflang="tr" lang="tr" rel="alternate">T&uuml;rk&ccedil;e</a>
219
+ <a href="index.uk.html" title="ukrajin�tina" hreflang="uk" lang="uk" rel="alternate">&#1091;&#1082;&#1088;&#1072;&#1111;&#1085;&#1089;&#1100;&#1082;&#1072;&nbsp;(ukrajins'ka)</a>
220
+ <a href="index.zh-cn.html" title="��n�tina (��na)" hreflang="zh-CN" lang="zh-CN" rel="alternate">&#20013;&#25991;(&#31616;)</a>
221
+ <a href="index.zh-hk.html" title="��n�tina (Hong Kong)" hreflang="zh-HK" lang="zh-HK" rel="alternate">&#20013;&#25991;(HK)</a>
222
+ <a href="index.zh-tw.html" title="��n�tina (Tchai-wan)" hreflang="zh-TW" lang="zh-TW" rel="alternate">&#20013;&#25991;(&#32321;)</a>
223
+ </p><p>
224
+ Jak nastavit <a href="./intro/cn">v�choz� jazyk</a>
225
+ </p>
226
+ <!--/UdmComment-->
227
+ <hr>
228
+ <!--UdmComment-->
229
+ <div id="fineprint">
230
+ <p>Probl�my t�kaj�c� se webov�ch str�nek oznamujte anglicky na e-mail <a href="mailto:debian-www@lists.debian.org">debian-www@lists.debian.org</a>. Dal�� kontakty z�sk�te na str�nce s&nbsp;<a href="./contact">kontakty</a>.</p>
231
+ <p>
232
+ Posledn� zm�na: �t, 2. pro 11:26:39 UTC 2010
233
+ <br>
234
+ Copyright &copy; 1997-2010
235
+ <a href="http://www.spi-inc.org/">SPI</a>; P�e�tete si <a href="./license" rel="copyright">licen�n� podm�nky</a><br>
236
+ Debian je registrovan� <a href="./trademark">obchodn� zn�mka</a> organizace Software in the Public Interest, Inc.
237
+ </p>
238
+ </div>
239
+ <!--/UdmComment-->
240
+ </div> <!-- end footer -->
241
+ </div> <!-- end outer -->
242
+ </body>
243
+ </html>
@@ -14,7 +14,8 @@ describe ValidateWebsite do
14
14
  page = FakePage.new(name,
15
15
  :body => open(file).read,
16
16
  :content_type => 'text/html')
17
- @validate_website.crawl(page.url)
17
+ @validate_website.site = page.url
18
+ @validate_website.crawl(:quiet => true)
18
19
  @validate_website.anemone.should have(3).pages
19
20
  end
20
21
  end
@@ -27,7 +28,8 @@ describe ValidateWebsite do
27
28
  .tests {background-image: url(/image/pouet_42.png)}
28
29
  .tests {background-image: url(/image/pouet)}",
29
30
  :content_type => 'text/css')
30
- @validate_website.crawl(page.url)
31
+ @validate_website.site = page.url
32
+ @validate_website.crawl
31
33
  @validate_website.anemone.should have(5).pages
32
34
  end
33
35
 
@@ -35,7 +37,8 @@ describe ValidateWebsite do
35
37
  page = FakePage.new('test.css',
36
38
  :body => ".test {background-image: url('pouet');}",
37
39
  :content_type => 'text/css')
38
- @validate_website.crawl(page.url)
40
+ @validate_website.site = page.url
41
+ @validate_website.crawl
39
42
  @validate_website.anemone.should have(2).pages
40
43
  end
41
44
 
@@ -43,7 +46,8 @@ describe ValidateWebsite do
43
46
  page = FakePage.new('test.css',
44
47
  :body => ".test {background-image: url(\"pouet\");}",
45
48
  :content_type => 'text/css')
46
- @validate_website.crawl(page.url)
49
+ @validate_website.site = page.url
50
+ @validate_website.crawl
47
51
  @validate_website.anemone.should have(2).pages
48
52
  end
49
53
  end
@@ -15,7 +15,7 @@ describe Validator do
15
15
  :body => open(file).read,
16
16
  :content_type => 'text/html')
17
17
  @xhtml1_page = @http.fetch_page(page.url)
18
- validator = Validator.new(@xhtml1_page)
18
+ validator = Validator.new(@xhtml1_page.doc, @xhtml1_page.body)
19
19
  validator.dtd.system_id.should == dtd_uri
20
20
  validator.namespace.should == name
21
21
  validator.should be_valid
@@ -25,13 +25,14 @@ describe Validator do
25
25
  describe('html5') do
26
26
  context('when valid') do
27
27
  it "html5 should be valid" do
28
+ pending("need update html5lib")
28
29
  name = 'html5'
29
30
  file = File.join('spec', 'data', "#{name}.html")
30
31
  page = FakePage.new(name,
31
32
  :body => open(file).read,
32
33
  :content_type => 'text/html')
33
34
  @html5_page = @http.fetch_page(page.url)
34
- validator = Validator.new(@html5_page)
35
+ validator = Validator.new(@html5_page.doc, @html5_page.body)
35
36
  validator.should be_valid
36
37
  end
37
38
  end
@@ -44,7 +45,7 @@ describe Validator do
44
45
  :body => open(file).read,
45
46
  :content_type => 'text/html')
46
47
  @html5_page = @http.fetch_page(page.url)
47
- validator = Validator.new(@html5_page)
48
+ validator = Validator.new(@html5_page.doc, @html5_page.body)
48
49
  validator.should be_valid
49
50
  end
50
51
  end
@@ -58,7 +59,7 @@ describe Validator do
58
59
  :body => open(file).read,
59
60
  :content_type => 'text/html')
60
61
  @html4_strict_page = @http.fetch_page(page.url)
61
- validator = Validator.new(@html4_strict_page)
62
+ validator = Validator.new(@html4_strict_page.doc, @html4_strict_page.body)
62
63
  validator.should be_valid
63
64
  end
64
65
  end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: validate-website
3
3
  version: !ruby/object:Gem::Version
4
- hash: 13
4
+ hash: 5
5
5
  prerelease: false
6
6
  segments:
7
7
  - 0
8
8
  - 5
9
- - 3
10
- version: 0.5.3
9
+ - 7
10
+ version: 0.5.7
11
11
  platform: ruby
12
12
  authors:
13
13
  - Laurent Arnoud
@@ -15,23 +15,23 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2010-12-05 00:00:00 +01:00
18
+ date: 2010-12-10 00:00:00 +01:00
19
19
  default_executable:
20
20
  dependencies:
21
21
  - !ruby/object:Gem::Dependency
22
- name: spk-anemone
22
+ name: anemone
23
23
  prerelease: false
24
24
  requirement: &id001 !ruby/object:Gem::Requirement
25
25
  none: false
26
26
  requirements:
27
27
  - - ">="
28
28
  - !ruby/object:Gem::Version
29
- hash: 15
29
+ hash: 11
30
30
  segments:
31
31
  - 0
32
- - 4
32
+ - 5
33
33
  - 0
34
- version: 0.4.0
34
+ version: 0.5.0
35
35
  type: :runtime
36
36
  version_requirements: *id001
37
37
  - !ruby/object:Gem::Dependency
@@ -49,26 +49,10 @@ dependencies:
49
49
  version: "1.1"
50
50
  type: :runtime
51
51
  version_requirements: *id002
52
- - !ruby/object:Gem::Dependency
53
- name: spk-html5
54
- prerelease: false
55
- requirement: &id003 !ruby/object:Gem::Requirement
56
- none: false
57
- requirements:
58
- - - "="
59
- - !ruby/object:Gem::Version
60
- hash: 53
61
- segments:
62
- - 0
63
- - 10
64
- - 1
65
- version: 0.10.1
66
- type: :runtime
67
- version_requirements: *id003
68
52
  - !ruby/object:Gem::Dependency
69
53
  name: rspec
70
54
  prerelease: false
71
- requirement: &id004 !ruby/object:Gem::Requirement
55
+ requirement: &id003 !ruby/object:Gem::Requirement
72
56
  none: false
73
57
  requirements:
74
58
  - - ">="
@@ -80,11 +64,11 @@ dependencies:
80
64
  - 0
81
65
  version: 2.0.0
82
66
  type: :development
83
- version_requirements: *id004
67
+ version_requirements: *id003
84
68
  - !ruby/object:Gem::Dependency
85
69
  name: fakeweb
86
70
  prerelease: false
87
- requirement: &id005 !ruby/object:Gem::Requirement
71
+ requirement: &id004 !ruby/object:Gem::Requirement
88
72
  none: false
89
73
  requirements:
90
74
  - - ">="
@@ -96,11 +80,12 @@ dependencies:
96
80
  - 0
97
81
  version: 1.3.0
98
82
  type: :development
99
- version_requirements: *id005
83
+ version_requirements: *id004
100
84
  description: validate-website is a web crawler for checking the markupvalidity and not found urls.
101
85
  email: laurent@spkdev.net
102
86
  executables:
103
87
  - validate-website
88
+ - validate-website-static
104
89
  extensions: []
105
90
 
106
91
  extra_rdoc_files: []
@@ -109,6 +94,7 @@ files:
109
94
  - README.rdoc
110
95
  - Rakefile
111
96
  - LICENSE
97
+ - bin/validate-website-static
112
98
  - bin/validate-website
113
99
  - lib/validator.rb
114
100
  - lib/colorful_messages.rb
@@ -207,6 +193,7 @@ files:
207
193
  - spec/validator_spec.rb
208
194
  - spec/spec_helper.rb
209
195
  - spec/data/html5-linuxfr.html
196
+ - spec/data/index.cs.html
210
197
  - spec/data/html4-strict.html
211
198
  - spec/data/html5.html
212
199
  - spec/data/xhtml1-strict.html
@@ -240,9 +227,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
240
227
  - 0
241
228
  version: "0"
242
229
  requirements:
243
- - spk-anemone
230
+ - anemone
244
231
  - rainbow
245
- - spk-html5
246
232
  rubyforge_project:
247
233
  rubygems_version: 1.3.7
248
234
  signing_key: