validate-website 0.5.7 → 0.6.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (109) hide show
  1. data/README.rdoc +5 -4
  2. data/Rakefile +3 -3
  3. data/bin/validate-website +9 -11
  4. data/bin/validate-website-static +7 -18
  5. data/lib/validate_website.rb +1 -210
  6. data/lib/validate_website/colorful_messages.rb +28 -0
  7. data/lib/validate_website/core.rb +141 -0
  8. data/lib/validate_website/option_parser.rb +133 -0
  9. data/lib/validate_website/runner.rb +35 -0
  10. data/lib/validate_website/validator.rb +69 -0
  11. data/man/man1/validate-website-static.1 +82 -0
  12. data/{lib/xhtml → share/schemas}/frameset.dtd +0 -0
  13. data/{lib/xhtml → share/schemas}/loose.dtd +0 -0
  14. data/{lib/xhtml → share/schemas}/strict.dtd +0 -0
  15. data/{lib/xhtml → share/schemas}/xframes-1.xsd +0 -0
  16. data/{lib/xhtml → share/schemas}/xhtml-access-1.xsd +0 -0
  17. data/{lib/xhtml → share/schemas}/xhtml-applet-1.xsd +0 -0
  18. data/{lib/xhtml → share/schemas}/xhtml-attribs-1.xsd +0 -0
  19. data/{lib/xhtml → share/schemas}/xhtml-base-1.xsd +0 -0
  20. data/{lib/xhtml → share/schemas}/xhtml-basic-form-1.xsd +0 -0
  21. data/{lib/xhtml → share/schemas}/xhtml-basic-table-1.xsd +0 -0
  22. data/{lib/xhtml → share/schemas}/xhtml-basic10-model-1.xsd +0 -0
  23. data/{lib/xhtml → share/schemas}/xhtml-basic10-module-redefines-1.xsd +0 -0
  24. data/{lib/xhtml → share/schemas}/xhtml-basic10-modules-1.xsd +0 -0
  25. data/{lib/xhtml → share/schemas}/xhtml-basic10.xsd +0 -0
  26. data/{lib/xhtml → share/schemas}/xhtml-basic11-model-1.xsd +0 -0
  27. data/{lib/xhtml → share/schemas}/xhtml-basic11-modules-1.xsd +0 -0
  28. data/{lib/xhtml → share/schemas}/xhtml-basic11.dtd +0 -0
  29. data/{lib/xhtml → share/schemas}/xhtml-basic11.xsd +0 -0
  30. data/{lib/xhtml → share/schemas}/xhtml-bdo-1.xsd +0 -0
  31. data/{lib/xhtml → share/schemas}/xhtml-blkphras-1.xsd +0 -0
  32. data/{lib/xhtml → share/schemas}/xhtml-blkpres-1.xsd +0 -0
  33. data/{lib/xhtml → share/schemas}/xhtml-blkstruct-1.xsd +0 -0
  34. data/{lib/xhtml → share/schemas}/xhtml-charent-1.xsd +0 -0
  35. data/{lib/xhtml → share/schemas}/xhtml-copyright-1.xsd +0 -0
  36. data/{lib/xhtml → share/schemas}/xhtml-csismap-1.xsd +0 -0
  37. data/{lib/xhtml → share/schemas}/xhtml-datatypes-1.xsd +0 -0
  38. data/{lib/xhtml → share/schemas}/xhtml-edit-1.xsd +0 -0
  39. data/{lib/xhtml → share/schemas}/xhtml-events-1.xsd +0 -0
  40. data/{lib/xhtml → share/schemas}/xhtml-form-1.xsd +0 -0
  41. data/{lib/xhtml → share/schemas}/xhtml-frames-1.xsd +0 -0
  42. data/{lib/xhtml → share/schemas}/xhtml-framework-1.xsd +0 -0
  43. data/{lib/xhtml → share/schemas}/xhtml-hypertext-1.xsd +0 -0
  44. data/{lib/xhtml → share/schemas}/xhtml-iframe-1.xsd +0 -0
  45. data/{lib/xhtml → share/schemas}/xhtml-image-1.xsd +0 -0
  46. data/{lib/xhtml → share/schemas}/xhtml-inlphras-1.xsd +0 -0
  47. data/{lib/xhtml → share/schemas}/xhtml-inlpres-1.xsd +0 -0
  48. data/{lib/xhtml → share/schemas}/xhtml-inlstruct-1.xsd +0 -0
  49. data/{lib/xhtml → share/schemas}/xhtml-inlstyle-1.xsd +0 -0
  50. data/{lib/xhtml → share/schemas}/xhtml-inputmode-1.xsd +0 -0
  51. data/{lib/xhtml → share/schemas}/xhtml-lat1.ent +0 -0
  52. data/{lib/xhtml → share/schemas}/xhtml-legacy-1.xsd +0 -0
  53. data/{lib/xhtml → share/schemas}/xhtml-link-1.xsd +0 -0
  54. data/{lib/xhtml → share/schemas}/xhtml-list-1.xsd +0 -0
  55. data/{lib/xhtml → share/schemas}/xhtml-meta-1.xsd +0 -0
  56. data/{lib/xhtml → share/schemas}/xhtml-metaAttributes-1.xsd +0 -0
  57. data/{lib/xhtml → share/schemas}/xhtml-misc-1.xsd +0 -0
  58. data/{lib/xhtml → share/schemas}/xhtml-nameident-1.xsd +0 -0
  59. data/{lib/xhtml → share/schemas}/xhtml-notations-1.xsd +0 -0
  60. data/{lib/xhtml → share/schemas}/xhtml-object-1.xsd +0 -0
  61. data/{lib/xhtml → share/schemas}/xhtml-param-1.xsd +0 -0
  62. data/{lib/xhtml → share/schemas}/xhtml-pres-1.xsd +0 -0
  63. data/{lib/xhtml → share/schemas}/xhtml-print-1.xsd +0 -0
  64. data/{lib/xhtml → share/schemas}/xhtml-print-model-1.xsd +0 -0
  65. data/{lib/xhtml → share/schemas}/xhtml-print-modules-1.xsd +0 -0
  66. data/{lib/xhtml → share/schemas}/xhtml-rdfa-1.dtd +0 -0
  67. data/{lib/xhtml → share/schemas}/xhtml-rdfa-1.xsd +0 -0
  68. data/{lib/xhtml → share/schemas}/xhtml-rdfa-model-1.xsd +0 -0
  69. data/{lib/xhtml → share/schemas}/xhtml-rdfa-modules-1.xsd +0 -0
  70. data/{lib/xhtml → share/schemas}/xhtml-ruby-1.xsd +0 -0
  71. data/{lib/xhtml → share/schemas}/xhtml-ruby-basic-1.xsd +0 -0
  72. data/{lib/xhtml → share/schemas}/xhtml-script-1.xsd +0 -0
  73. data/{lib/xhtml → share/schemas}/xhtml-special.ent +0 -0
  74. data/{lib/xhtml → share/schemas}/xhtml-ssismap-1.xsd +0 -0
  75. data/{lib/xhtml → share/schemas}/xhtml-struct-1.xsd +0 -0
  76. data/{lib/xhtml → share/schemas}/xhtml-style-1.xsd +0 -0
  77. data/{lib/xhtml → share/schemas}/xhtml-symbol.ent +0 -0
  78. data/{lib/xhtml → share/schemas}/xhtml-table-1.xsd +0 -0
  79. data/{lib/xhtml → share/schemas}/xhtml-target-1.xsd +0 -0
  80. data/{lib/xhtml → share/schemas}/xhtml-text-1.xsd +0 -0
  81. data/{lib/xhtml → share/schemas}/xhtml1-frameset.dtd +0 -0
  82. data/{lib/xhtml → share/schemas}/xhtml1-frameset.xsd +0 -0
  83. data/{lib/xhtml → share/schemas}/xhtml1-strict.dtd +0 -0
  84. data/{lib/xhtml → share/schemas}/xhtml1-strict.xsd +0 -0
  85. data/{lib/xhtml → share/schemas}/xhtml1-transitional.dtd +0 -0
  86. data/{lib/xhtml → share/schemas}/xhtml1-transitional.xsd +0 -0
  87. data/{lib/xhtml → share/schemas}/xhtml11-model-1.xsd +0 -0
  88. data/{lib/xhtml → share/schemas}/xhtml11-module-redefines-1.xsd +0 -0
  89. data/{lib/xhtml → share/schemas}/xhtml11-modules-1.xsd +0 -0
  90. data/{lib/xhtml → share/schemas}/xhtml11.xsd +0 -0
  91. data/{lib/xhtml → share/schemas}/xhtml2.xsd +0 -0
  92. data/{lib/xhtml → share/schemas}/xml-events-1.xsd +0 -0
  93. data/{lib/xhtml → share/schemas}/xml-events-2.xsd +0 -0
  94. data/{lib/xhtml → share/schemas}/xml-events-attribs-1.xsd +0 -0
  95. data/{lib/xhtml → share/schemas}/xml-events-attribs-2.xsd +0 -0
  96. data/{lib/xhtml → share/schemas}/xml-events-copyright-1.xsd +0 -0
  97. data/{lib/xhtml → share/schemas}/xml-events-copyright-2.xsd +0 -0
  98. data/{lib/xhtml → share/schemas}/xml-handlers-1.xsd +0 -0
  99. data/{lib/xhtml → share/schemas}/xml-handlers-2.xsd +0 -0
  100. data/{lib/xhtml → share/schemas}/xml-script-1.xsd +0 -0
  101. data/{lib/xhtml → share/schemas}/xml.xsd +0 -0
  102. data/spec/core_spec.rb +56 -0
  103. data/spec/spec_helper.rb +1 -1
  104. data/spec/validator_spec.rb +3 -1
  105. metadata +102 -99
  106. data/lib/colorful_messages.rb +0 -28
  107. data/lib/validator.rb +0 -67
  108. data/spec/data/index.cs.html +0 -243
  109. data/spec/validate_website_spec.rb +0 -54
data/README.rdoc CHANGED
@@ -12,14 +12,16 @@
12
12
 
13
13
  == SYNOPSIS
14
14
 
15
- validate-website [OPTIONS]
15
+ validate-website [OPTIONS]
16
+ validate-website-static [OPTIONS]
16
17
 
17
18
  == DESCRIPTION
18
19
 
19
20
  validate-website is a web crawler for checking the markup validity and not
20
21
  found urls.
22
+ validate-website-static check the markup validity of your local documents.
21
23
 
22
- == OPTIONS
24
+ == VALIDATE WEBSITE OPTIONS
23
25
 
24
26
  -s, --site SITE
25
27
  Website to crawl (Default: http://localhost:3000/)
@@ -56,9 +58,8 @@ found urls.
56
58
 
57
59
  == REQUIREMENTS
58
60
 
59
- * spk-anemone, '>= 0.4.0'
61
+ * anemone, '>= 0.5.0'
60
62
  * rainbow, '>= 1.1'
61
- * html5, '= 0.10.0'
62
63
 
63
64
  == CREDITS
64
65
 
data/Rakefile CHANGED
@@ -7,10 +7,10 @@ require "rspec/core/rake_task" # RSpec 2.0
7
7
  # Globals
8
8
 
9
9
  PKG_NAME = 'validate-website'
10
- PKG_VERSION = '0.5.7'
10
+ PKG_VERSION = '0.6.0'
11
11
 
12
12
  PKG_FILES = ['README.rdoc', 'Rakefile', 'LICENSE']
13
- Find.find('bin/', 'lib/', 'man/', 'spec/') do |f|
13
+ Find.find('bin/', 'lib/', 'man/', 'spec/', 'share/') do |f|
14
14
  if FileTest.directory?(f) and f =~ /\.svn|\.git/
15
15
  Find.prune
16
16
  else
@@ -64,7 +64,7 @@ end
64
64
 
65
65
  desc 'Update manpage from asciidoc file'
66
66
  task :manpage do
67
- system('a2x -f manpage -D man/man1 doc/validate-website.txt')
67
+ system('find doc/ -type f -exec a2x -f manpage -D man/man1 {} \;')
68
68
  end
69
69
 
70
70
  # RSpec 2.0
data/bin/validate-website CHANGED
@@ -1,16 +1,14 @@
1
1
  #!/usr/bin/env ruby
2
2
  # encoding: utf-8
3
3
 
4
- lib_dir = File.join(File.dirname(__FILE__), '..', 'lib')
5
- $:.unshift(File.expand_path(lib_dir))
6
-
7
4
  developer_mode = false
8
5
  developer_mode = true if __FILE__ == $0
9
- require 'rubygems' if developer_mode
10
-
11
- require 'validate_website'
12
-
13
- validate_website = ValidateWebsite.new(ARGV)
14
- validate_website.crawl
15
-
16
- exit(validate_website.exit_status)
6
+ if developer_mode
7
+ lib_dir = File.join(File.dirname(__FILE__), '..', 'lib')
8
+ $:.unshift(File.expand_path(lib_dir))
9
+ require 'rubygems'
10
+ end
11
+
12
+ require 'validate_website/runner'
13
+ exit_status = ValidateWebsite::Runner.run_crawl(ARGV)
14
+ exit(exit_status)
@@ -1,25 +1,14 @@
1
1
  #!/usr/bin/env ruby
2
2
  # encoding: utf-8
3
3
 
4
- lib_dir = File.join(File.dirname(__FILE__), '..', 'lib')
5
- $:.unshift(File.expand_path(lib_dir))
6
-
7
4
  developer_mode = false
8
5
  developer_mode = true if __FILE__ == $0
9
- require 'rubygems' if developer_mode
10
-
11
- require 'validate_website'
12
-
13
- validate_website = ValidateWebsite.new(ARGV)
14
-
15
- files = Dir.glob(File.join("**", "*.html"))
16
- files.each do |f|
17
- next unless File.file?(f)
18
-
19
- body = open(f).read
20
- doc = Nokogiri::HTML(body)
21
-
22
- validate_website.validate(doc, body, f)
6
+ if developer_mode
7
+ lib_dir = File.join(File.dirname(__FILE__), '..', 'lib')
8
+ $:.unshift(File.expand_path(lib_dir))
9
+ require 'rubygems'
23
10
  end
24
11
 
25
- exit(validate_website.exit_status)
12
+ require 'validate_website/runner'
13
+ exit_status = ValidateWebsite::Runner.run_static(ARGV)
14
+ exit(exit_status)
@@ -1,211 +1,2 @@
1
1
  # encoding: utf-8
2
-
3
- require 'optparse'
4
- require 'open-uri'
5
-
6
- require 'validator'
7
- require 'colorful_messages'
8
-
9
- require 'anemone'
10
-
11
- class ValidateWebsite
12
-
13
- attr_accessor :site
14
- attr_reader :options, :anemone
15
-
16
- include ColorfulMessages
17
-
18
- EXIT_SUCCESS = 0
19
- EXIT_FAILURE_MARKUP = 64
20
- EXIT_FAILURE_NOT_FOUND = 65
21
- EXIT_FAILURE_MARKUP_NOT_FOUND = 66
22
-
23
- def initialize(args=[], validation_type = :crawl)
24
- @markup_error = nil
25
- @not_found_error = nil
26
-
27
- @options_crawl = {
28
- :site => 'http://localhost:3000/',
29
- :markup_validation => true,
30
- :exclude => nil,
31
- :file => nil,
32
- # log not found url (404 status code)
33
- :not_found => false,
34
- # internal verbose for ValidateWebsite
35
- :validate_verbose => false,
36
- :quiet => false,
37
-
38
- # Anemone options see anemone/lib/anemone/core.rb
39
- :verbose => false,
40
- :user_agent => Anemone::Core::DEFAULT_OPTS[:user_agent],
41
- :cookies => nil,
42
- :accept_cookies => true,
43
- :redirect_limit => 0,
44
- }
45
- send("parse_#{validation_type}_options", args)
46
-
47
- @file = @options[:file]
48
- if @file
49
- # truncate file
50
- open(@file, 'w').write('')
51
- end
52
-
53
- @site = @options[:site]
54
- end
55
-
56
- def parse_crawl_options(args)
57
- @options = @options_crawl
58
-
59
- opts = OptionParser.new do |o|
60
- o.set_summary_indent(' ')
61
- o.banner = 'Usage: validate-website [OPTIONS]'
62
- o.define_head 'validate-website - Web crawler for checking the validity'+
63
- ' of your documents'
64
- o.separator ''
65
-
66
- o.on("-s", "--site 'SITE'", String,
67
- "Website to crawl (Default: #{@options[:site]})") { |v|
68
- @options[:site] = v
69
- }
70
- o.on("-u", "--user-agent 'USERAGENT'", String,
71
- "Change user agent (Default: #{@options[:user_agent]})") { |v|
72
- @options[:user_agent] = v
73
- }
74
- o.on("-e", "--exclude 'EXCLUDE'", String,
75
- "Url to exclude (ex: 'redirect|news')") { |v|
76
- @options[:exclude] = v
77
- }
78
- o.on("-f", "--file 'FILE'", String,
79
- "Save not well formed or not found urls") { |v| @options[:file] = v }
80
-
81
- o.on("-c", "--cookies 'COOKIES'", String,
82
- "Set defaults cookies") { |v| @options[:cookies] = v }
83
-
84
- o.on("-m", "--[no-]markup-validation",
85
- "Markup validation (Default: #{@options[:markup_validation]})") { |v|
86
- @options[:markup_validation] = v
87
- }
88
- o.on("-n", "--not-found",
89
- "Log not found url (Default: #{@options[:not_found]})") { |v|
90
- @options[:not_found] = v
91
- }
92
- o.on("-v", "--verbose",
93
- "Show validator errors (Default: #{@options[:validate_verbose]})") { |v|
94
- @options[:validate_verbose] = v
95
- }
96
- o.on("-q", "--quiet",
97
- "Only report errors (Default: #{@options[:quiet]})") { |v|
98
- @options[:quiet] = v
99
- }
100
- o.on("-d", "--debug",
101
- "Show anemone log (Default: #{@options[:verbose]})") { |v|
102
- @options[:verbose] = v
103
- }
104
-
105
- o.separator ""
106
- o.on_tail("-h", "--help", "Show this help message.") { puts o; exit }
107
- end
108
- opts.parse!(args)
109
- end
110
-
111
- def validate(doc, body, url, opts={})
112
- opts = @options.merge(opts)
113
- validator = Validator.new(doc, body)
114
- msg = " well formed? %s" % validator.valid?
115
- if validator.valid?
116
- unless opts[:quiet]
117
- print info(url)
118
- puts success(msg)
119
- end
120
- else
121
- @markup_error = true
122
- print info(url)
123
- puts error(msg)
124
- puts error(validator.errors.join(", ")) if opts[:validate_verbose]
125
- to_file(url)
126
- end
127
- end
128
-
129
- def crawl(opts={})
130
- opts = @options.merge(opts)
131
- puts note("Validating #{@site}") if opts[:validate_verbose]
132
-
133
- @anemone = Anemone.crawl(@site, opts) do |anemone|
134
- anemone.skip_links_like Regexp.new(opts[:exclude]) if opts[:exclude]
135
-
136
- # select the links on each page to follow (iframe, link, css url)
137
- anemone.focus_crawl { |p|
138
- links = []
139
- if p.html?
140
- p.doc.css('img, script, iframe').each do |elem|
141
- url = get_url(p, elem, "src")
142
- links << url unless url.nil?
143
- end
144
- p.doc.css('link').each do |link|
145
- url = get_url(p, link, "href")
146
- links << url unless url.nil?
147
- end
148
- end
149
- if p.content_type == 'text/css'
150
- p.body.scan(/url\((['".\/\w-]+)\)/).each do |url|
151
- url = url.first.gsub("'", "").gsub('"', '')
152
- abs = p.to_absolute(URI(url))
153
- links << abs
154
- end
155
- end
156
- links.uniq!
157
- p.links.concat(links)
158
- }
159
-
160
- anemone.on_every_page { |page|
161
- url = page.url.to_s
162
-
163
- if opts[:markup_validation]
164
- # validate html/html+xml
165
- if page.html? && page.fetched?
166
- validate(page.doc, page.body, url, opts)
167
- end
168
- end
169
-
170
- if opts[:not_found] && page.not_found?
171
- @not_found_error = true
172
- puts error("%s linked in %s but not exist" % [url, page.referer])
173
- to_file(url)
174
- end
175
-
176
- # throw away the page (hope this saves memory)
177
- page = nil
178
- }
179
- end
180
- end
181
-
182
- def exit_status
183
- if @markup_error && @not_found_error
184
- EXIT_FAILURE_MARKUP_NOT_FOUND
185
- elsif @markup_error
186
- EXIT_FAILURE_MARKUP
187
- elsif @not_found_error
188
- EXIT_FAILURE_NOT_FOUND
189
- else
190
- EXIT_SUCCESS
191
- end
192
- end
193
-
194
- private
195
- def to_file(msg)
196
- if @file && File.exist?(@file)
197
- open(@file, 'a').write("#{msg}\n")
198
- end
199
- end
200
-
201
- def get_url(page, elem, attrname)
202
- u = elem.attributes[attrname] if elem.attributes[attrname]
203
- return if u.nil?
204
- begin
205
- abs = page.to_absolute(URI(u))
206
- rescue
207
- abs = nil
208
- end
209
- return abs if abs && page.in_domain?(abs)
210
- end
211
- end
2
+ require 'validate_website/core'
@@ -0,0 +1,28 @@
1
+ # encoding: utf-8
2
+ require 'rainbow'
3
+
4
+ module ValidateWebsite
5
+ module ColorfulMessages
6
+ def error(message)
7
+ message.to_s.foreground(:red)
8
+ end
9
+
10
+ def warning(message)
11
+ message.to_s.foreground(:yellow)
12
+ end
13
+
14
+ def success(message)
15
+ message.to_s.foreground(:green)
16
+ end
17
+
18
+ alias_method :message, :success
19
+
20
+ def note(message)
21
+ message.to_s.foreground(:magenta)
22
+ end
23
+
24
+ def info(message)
25
+ message.to_s.foreground(:blue)
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,141 @@
1
+ # encoding: utf-8
2
+
3
+ require 'open-uri'
4
+
5
+ require 'validate_website/option_parser'
6
+ require 'validate_website/validator'
7
+ require 'validate_website/colorful_messages'
8
+
9
+ require 'anemone'
10
+
11
+ module ValidateWebsite
12
+
13
+ class Core
14
+
15
+ attr_accessor :site
16
+ attr_reader :options, :anemone
17
+
18
+ include ColorfulMessages
19
+
20
+ EXIT_SUCCESS = 0
21
+ EXIT_FAILURE_MARKUP = 64
22
+ EXIT_FAILURE_NOT_FOUND = 65
23
+ EXIT_FAILURE_MARKUP_NOT_FOUND = 66
24
+
25
+ def initialize(options={}, validation_type = :crawl)
26
+ @markup_error = nil
27
+ @not_found_error = nil
28
+
29
+ @options = Parser.parse(options, validation_type)
30
+
31
+ @file = @options[:file]
32
+ if @file
33
+ # truncate file
34
+ open(@file, 'w').write('')
35
+ end
36
+
37
+ @site = @options[:site]
38
+ end
39
+
40
+ def validate(doc, body, url, opts={})
41
+ opts = @options.merge(opts)
42
+ validator = Validator.new(doc, body)
43
+ msg = " well formed? %s" % validator.valid?
44
+ if validator.valid?
45
+ unless opts[:quiet]
46
+ print info(url)
47
+ puts success(msg)
48
+ end
49
+ else
50
+ @markup_error = true
51
+ print info(url)
52
+ puts error(msg)
53
+ puts error(validator.errors.join(", ")) if opts[:validate_verbose]
54
+ to_file(url)
55
+ end
56
+ end
57
+
58
+ def crawl(opts={})
59
+ opts = @options.merge(opts)
60
+ puts note("Validating #{@site}") if opts[:validate_verbose]
61
+
62
+ @anemone = Anemone.crawl(@site, opts) do |anemone|
63
+ anemone.skip_links_like Regexp.new(opts[:exclude]) if opts[:exclude]
64
+
65
+ # select the links on each page to follow (iframe, link, css url)
66
+ anemone.focus_crawl { |p|
67
+ links = []
68
+ if p.html?
69
+ p.doc.css('img, script, iframe').each do |elem|
70
+ url = get_url(p, elem, "src")
71
+ links << url unless url.nil?
72
+ end
73
+ p.doc.css('link').each do |link|
74
+ url = get_url(p, link, "href")
75
+ links << url unless url.nil?
76
+ end
77
+ end
78
+ if p.content_type == 'text/css'
79
+ p.body.scan(/url\((['".\/\w-]+)\)/).each do |url|
80
+ url = url.first.gsub("'", "").gsub('"', '')
81
+ abs = p.to_absolute(URI(url))
82
+ links << abs
83
+ end
84
+ end
85
+ links.uniq!
86
+ p.links.concat(links)
87
+ }
88
+
89
+ anemone.on_every_page { |page|
90
+ url = page.url.to_s
91
+
92
+ if opts[:markup_validation]
93
+ # validate html/html+xml
94
+ if page.html? && page.fetched?
95
+ validate(page.doc, page.body, url, opts)
96
+ end
97
+ end
98
+
99
+ if opts[:not_found] && page.not_found?
100
+ @not_found_error = true
101
+ puts error("%s linked in %s but not exist" % [url, page.referer])
102
+ to_file(url)
103
+ end
104
+
105
+ # throw away the page (hope this saves memory)
106
+ page = nil
107
+ }
108
+ end
109
+ end
110
+
111
+ def exit_status
112
+ if @markup_error && @not_found_error
113
+ EXIT_FAILURE_MARKUP_NOT_FOUND
114
+ elsif @markup_error
115
+ EXIT_FAILURE_MARKUP
116
+ elsif @not_found_error
117
+ EXIT_FAILURE_NOT_FOUND
118
+ else
119
+ EXIT_SUCCESS
120
+ end
121
+ end
122
+
123
+ private
124
+ def to_file(msg)
125
+ if @file && File.exist?(@file)
126
+ open(@file, 'a').write("#{msg}\n")
127
+ end
128
+ end
129
+
130
+ def get_url(page, elem, attrname)
131
+ u = elem.attributes[attrname] if elem.attributes[attrname]
132
+ return if u.nil?
133
+ begin
134
+ abs = page.to_absolute(URI(u))
135
+ rescue
136
+ abs = nil
137
+ end
138
+ return abs if abs && page.in_domain?(abs)
139
+ end
140
+ end
141
+ end