validate-website 0.5.7 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (109) hide show
  1. data/README.rdoc +5 -4
  2. data/Rakefile +3 -3
  3. data/bin/validate-website +9 -11
  4. data/bin/validate-website-static +7 -18
  5. data/lib/validate_website.rb +1 -210
  6. data/lib/validate_website/colorful_messages.rb +28 -0
  7. data/lib/validate_website/core.rb +141 -0
  8. data/lib/validate_website/option_parser.rb +133 -0
  9. data/lib/validate_website/runner.rb +35 -0
  10. data/lib/validate_website/validator.rb +69 -0
  11. data/man/man1/validate-website-static.1 +82 -0
  12. data/{lib/xhtml → share/schemas}/frameset.dtd +0 -0
  13. data/{lib/xhtml → share/schemas}/loose.dtd +0 -0
  14. data/{lib/xhtml → share/schemas}/strict.dtd +0 -0
  15. data/{lib/xhtml → share/schemas}/xframes-1.xsd +0 -0
  16. data/{lib/xhtml → share/schemas}/xhtml-access-1.xsd +0 -0
  17. data/{lib/xhtml → share/schemas}/xhtml-applet-1.xsd +0 -0
  18. data/{lib/xhtml → share/schemas}/xhtml-attribs-1.xsd +0 -0
  19. data/{lib/xhtml → share/schemas}/xhtml-base-1.xsd +0 -0
  20. data/{lib/xhtml → share/schemas}/xhtml-basic-form-1.xsd +0 -0
  21. data/{lib/xhtml → share/schemas}/xhtml-basic-table-1.xsd +0 -0
  22. data/{lib/xhtml → share/schemas}/xhtml-basic10-model-1.xsd +0 -0
  23. data/{lib/xhtml → share/schemas}/xhtml-basic10-module-redefines-1.xsd +0 -0
  24. data/{lib/xhtml → share/schemas}/xhtml-basic10-modules-1.xsd +0 -0
  25. data/{lib/xhtml → share/schemas}/xhtml-basic10.xsd +0 -0
  26. data/{lib/xhtml → share/schemas}/xhtml-basic11-model-1.xsd +0 -0
  27. data/{lib/xhtml → share/schemas}/xhtml-basic11-modules-1.xsd +0 -0
  28. data/{lib/xhtml → share/schemas}/xhtml-basic11.dtd +0 -0
  29. data/{lib/xhtml → share/schemas}/xhtml-basic11.xsd +0 -0
  30. data/{lib/xhtml → share/schemas}/xhtml-bdo-1.xsd +0 -0
  31. data/{lib/xhtml → share/schemas}/xhtml-blkphras-1.xsd +0 -0
  32. data/{lib/xhtml → share/schemas}/xhtml-blkpres-1.xsd +0 -0
  33. data/{lib/xhtml → share/schemas}/xhtml-blkstruct-1.xsd +0 -0
  34. data/{lib/xhtml → share/schemas}/xhtml-charent-1.xsd +0 -0
  35. data/{lib/xhtml → share/schemas}/xhtml-copyright-1.xsd +0 -0
  36. data/{lib/xhtml → share/schemas}/xhtml-csismap-1.xsd +0 -0
  37. data/{lib/xhtml → share/schemas}/xhtml-datatypes-1.xsd +0 -0
  38. data/{lib/xhtml → share/schemas}/xhtml-edit-1.xsd +0 -0
  39. data/{lib/xhtml → share/schemas}/xhtml-events-1.xsd +0 -0
  40. data/{lib/xhtml → share/schemas}/xhtml-form-1.xsd +0 -0
  41. data/{lib/xhtml → share/schemas}/xhtml-frames-1.xsd +0 -0
  42. data/{lib/xhtml → share/schemas}/xhtml-framework-1.xsd +0 -0
  43. data/{lib/xhtml → share/schemas}/xhtml-hypertext-1.xsd +0 -0
  44. data/{lib/xhtml → share/schemas}/xhtml-iframe-1.xsd +0 -0
  45. data/{lib/xhtml → share/schemas}/xhtml-image-1.xsd +0 -0
  46. data/{lib/xhtml → share/schemas}/xhtml-inlphras-1.xsd +0 -0
  47. data/{lib/xhtml → share/schemas}/xhtml-inlpres-1.xsd +0 -0
  48. data/{lib/xhtml → share/schemas}/xhtml-inlstruct-1.xsd +0 -0
  49. data/{lib/xhtml → share/schemas}/xhtml-inlstyle-1.xsd +0 -0
  50. data/{lib/xhtml → share/schemas}/xhtml-inputmode-1.xsd +0 -0
  51. data/{lib/xhtml → share/schemas}/xhtml-lat1.ent +0 -0
  52. data/{lib/xhtml → share/schemas}/xhtml-legacy-1.xsd +0 -0
  53. data/{lib/xhtml → share/schemas}/xhtml-link-1.xsd +0 -0
  54. data/{lib/xhtml → share/schemas}/xhtml-list-1.xsd +0 -0
  55. data/{lib/xhtml → share/schemas}/xhtml-meta-1.xsd +0 -0
  56. data/{lib/xhtml → share/schemas}/xhtml-metaAttributes-1.xsd +0 -0
  57. data/{lib/xhtml → share/schemas}/xhtml-misc-1.xsd +0 -0
  58. data/{lib/xhtml → share/schemas}/xhtml-nameident-1.xsd +0 -0
  59. data/{lib/xhtml → share/schemas}/xhtml-notations-1.xsd +0 -0
  60. data/{lib/xhtml → share/schemas}/xhtml-object-1.xsd +0 -0
  61. data/{lib/xhtml → share/schemas}/xhtml-param-1.xsd +0 -0
  62. data/{lib/xhtml → share/schemas}/xhtml-pres-1.xsd +0 -0
  63. data/{lib/xhtml → share/schemas}/xhtml-print-1.xsd +0 -0
  64. data/{lib/xhtml → share/schemas}/xhtml-print-model-1.xsd +0 -0
  65. data/{lib/xhtml → share/schemas}/xhtml-print-modules-1.xsd +0 -0
  66. data/{lib/xhtml → share/schemas}/xhtml-rdfa-1.dtd +0 -0
  67. data/{lib/xhtml → share/schemas}/xhtml-rdfa-1.xsd +0 -0
  68. data/{lib/xhtml → share/schemas}/xhtml-rdfa-model-1.xsd +0 -0
  69. data/{lib/xhtml → share/schemas}/xhtml-rdfa-modules-1.xsd +0 -0
  70. data/{lib/xhtml → share/schemas}/xhtml-ruby-1.xsd +0 -0
  71. data/{lib/xhtml → share/schemas}/xhtml-ruby-basic-1.xsd +0 -0
  72. data/{lib/xhtml → share/schemas}/xhtml-script-1.xsd +0 -0
  73. data/{lib/xhtml → share/schemas}/xhtml-special.ent +0 -0
  74. data/{lib/xhtml → share/schemas}/xhtml-ssismap-1.xsd +0 -0
  75. data/{lib/xhtml → share/schemas}/xhtml-struct-1.xsd +0 -0
  76. data/{lib/xhtml → share/schemas}/xhtml-style-1.xsd +0 -0
  77. data/{lib/xhtml → share/schemas}/xhtml-symbol.ent +0 -0
  78. data/{lib/xhtml → share/schemas}/xhtml-table-1.xsd +0 -0
  79. data/{lib/xhtml → share/schemas}/xhtml-target-1.xsd +0 -0
  80. data/{lib/xhtml → share/schemas}/xhtml-text-1.xsd +0 -0
  81. data/{lib/xhtml → share/schemas}/xhtml1-frameset.dtd +0 -0
  82. data/{lib/xhtml → share/schemas}/xhtml1-frameset.xsd +0 -0
  83. data/{lib/xhtml → share/schemas}/xhtml1-strict.dtd +0 -0
  84. data/{lib/xhtml → share/schemas}/xhtml1-strict.xsd +0 -0
  85. data/{lib/xhtml → share/schemas}/xhtml1-transitional.dtd +0 -0
  86. data/{lib/xhtml → share/schemas}/xhtml1-transitional.xsd +0 -0
  87. data/{lib/xhtml → share/schemas}/xhtml11-model-1.xsd +0 -0
  88. data/{lib/xhtml → share/schemas}/xhtml11-module-redefines-1.xsd +0 -0
  89. data/{lib/xhtml → share/schemas}/xhtml11-modules-1.xsd +0 -0
  90. data/{lib/xhtml → share/schemas}/xhtml11.xsd +0 -0
  91. data/{lib/xhtml → share/schemas}/xhtml2.xsd +0 -0
  92. data/{lib/xhtml → share/schemas}/xml-events-1.xsd +0 -0
  93. data/{lib/xhtml → share/schemas}/xml-events-2.xsd +0 -0
  94. data/{lib/xhtml → share/schemas}/xml-events-attribs-1.xsd +0 -0
  95. data/{lib/xhtml → share/schemas}/xml-events-attribs-2.xsd +0 -0
  96. data/{lib/xhtml → share/schemas}/xml-events-copyright-1.xsd +0 -0
  97. data/{lib/xhtml → share/schemas}/xml-events-copyright-2.xsd +0 -0
  98. data/{lib/xhtml → share/schemas}/xml-handlers-1.xsd +0 -0
  99. data/{lib/xhtml → share/schemas}/xml-handlers-2.xsd +0 -0
  100. data/{lib/xhtml → share/schemas}/xml-script-1.xsd +0 -0
  101. data/{lib/xhtml → share/schemas}/xml.xsd +0 -0
  102. data/spec/core_spec.rb +56 -0
  103. data/spec/spec_helper.rb +1 -1
  104. data/spec/validator_spec.rb +3 -1
  105. metadata +102 -99
  106. data/lib/colorful_messages.rb +0 -28
  107. data/lib/validator.rb +0 -67
  108. data/spec/data/index.cs.html +0 -243
  109. data/spec/validate_website_spec.rb +0 -54
data/README.rdoc CHANGED
@@ -12,14 +12,16 @@
12
12
 
13
13
  == SYNOPSIS
14
14
 
15
- validate-website [OPTIONS]
15
+ validate-website [OPTIONS]
16
+ validate-website-static [OPTIONS]
16
17
 
17
18
  == DESCRIPTION
18
19
 
19
20
  validate-website is a web crawler for checking the markup validity and not
20
21
  found urls.
22
+ validate-website-static check the markup validity of your local documents.
21
23
 
22
- == OPTIONS
24
+ == VALIDATE WEBSITE OPTIONS
23
25
 
24
26
  -s, --site SITE
25
27
  Website to crawl (Default: http://localhost:3000/)
@@ -56,9 +58,8 @@ found urls.
56
58
 
57
59
  == REQUIREMENTS
58
60
 
59
- * spk-anemone, '>= 0.4.0'
61
+ * anemone, '>= 0.5.0'
60
62
  * rainbow, '>= 1.1'
61
- * html5, '= 0.10.0'
62
63
 
63
64
  == CREDITS
64
65
 
data/Rakefile CHANGED
@@ -7,10 +7,10 @@ require "rspec/core/rake_task" # RSpec 2.0
7
7
  # Globals
8
8
 
9
9
  PKG_NAME = 'validate-website'
10
- PKG_VERSION = '0.5.7'
10
+ PKG_VERSION = '0.6.0'
11
11
 
12
12
  PKG_FILES = ['README.rdoc', 'Rakefile', 'LICENSE']
13
- Find.find('bin/', 'lib/', 'man/', 'spec/') do |f|
13
+ Find.find('bin/', 'lib/', 'man/', 'spec/', 'share/') do |f|
14
14
  if FileTest.directory?(f) and f =~ /\.svn|\.git/
15
15
  Find.prune
16
16
  else
@@ -64,7 +64,7 @@ end
64
64
 
65
65
  desc 'Update manpage from asciidoc file'
66
66
  task :manpage do
67
- system('a2x -f manpage -D man/man1 doc/validate-website.txt')
67
+ system('find doc/ -type f -exec a2x -f manpage -D man/man1 {} \;')
68
68
  end
69
69
 
70
70
  # RSpec 2.0
data/bin/validate-website CHANGED
@@ -1,16 +1,14 @@
1
1
  #!/usr/bin/env ruby
2
2
  # encoding: utf-8
3
3
 
4
- lib_dir = File.join(File.dirname(__FILE__), '..', 'lib')
5
- $:.unshift(File.expand_path(lib_dir))
6
-
7
4
  developer_mode = false
8
5
  developer_mode = true if __FILE__ == $0
9
- require 'rubygems' if developer_mode
10
-
11
- require 'validate_website'
12
-
13
- validate_website = ValidateWebsite.new(ARGV)
14
- validate_website.crawl
15
-
16
- exit(validate_website.exit_status)
6
+ if developer_mode
7
+ lib_dir = File.join(File.dirname(__FILE__), '..', 'lib')
8
+ $:.unshift(File.expand_path(lib_dir))
9
+ require 'rubygems'
10
+ end
11
+
12
+ require 'validate_website/runner'
13
+ exit_status = ValidateWebsite::Runner.run_crawl(ARGV)
14
+ exit(exit_status)
@@ -1,25 +1,14 @@
1
1
  #!/usr/bin/env ruby
2
2
  # encoding: utf-8
3
3
 
4
- lib_dir = File.join(File.dirname(__FILE__), '..', 'lib')
5
- $:.unshift(File.expand_path(lib_dir))
6
-
7
4
  developer_mode = false
8
5
  developer_mode = true if __FILE__ == $0
9
- require 'rubygems' if developer_mode
10
-
11
- require 'validate_website'
12
-
13
- validate_website = ValidateWebsite.new(ARGV)
14
-
15
- files = Dir.glob(File.join("**", "*.html"))
16
- files.each do |f|
17
- next unless File.file?(f)
18
-
19
- body = open(f).read
20
- doc = Nokogiri::HTML(body)
21
-
22
- validate_website.validate(doc, body, f)
6
+ if developer_mode
7
+ lib_dir = File.join(File.dirname(__FILE__), '..', 'lib')
8
+ $:.unshift(File.expand_path(lib_dir))
9
+ require 'rubygems'
23
10
  end
24
11
 
25
- exit(validate_website.exit_status)
12
+ require 'validate_website/runner'
13
+ exit_status = ValidateWebsite::Runner.run_static(ARGV)
14
+ exit(exit_status)
@@ -1,211 +1,2 @@
1
1
  # encoding: utf-8
2
-
3
- require 'optparse'
4
- require 'open-uri'
5
-
6
- require 'validator'
7
- require 'colorful_messages'
8
-
9
- require 'anemone'
10
-
11
- class ValidateWebsite
12
-
13
- attr_accessor :site
14
- attr_reader :options, :anemone
15
-
16
- include ColorfulMessages
17
-
18
- EXIT_SUCCESS = 0
19
- EXIT_FAILURE_MARKUP = 64
20
- EXIT_FAILURE_NOT_FOUND = 65
21
- EXIT_FAILURE_MARKUP_NOT_FOUND = 66
22
-
23
- def initialize(args=[], validation_type = :crawl)
24
- @markup_error = nil
25
- @not_found_error = nil
26
-
27
- @options_crawl = {
28
- :site => 'http://localhost:3000/',
29
- :markup_validation => true,
30
- :exclude => nil,
31
- :file => nil,
32
- # log not found url (404 status code)
33
- :not_found => false,
34
- # internal verbose for ValidateWebsite
35
- :validate_verbose => false,
36
- :quiet => false,
37
-
38
- # Anemone options see anemone/lib/anemone/core.rb
39
- :verbose => false,
40
- :user_agent => Anemone::Core::DEFAULT_OPTS[:user_agent],
41
- :cookies => nil,
42
- :accept_cookies => true,
43
- :redirect_limit => 0,
44
- }
45
- send("parse_#{validation_type}_options", args)
46
-
47
- @file = @options[:file]
48
- if @file
49
- # truncate file
50
- open(@file, 'w').write('')
51
- end
52
-
53
- @site = @options[:site]
54
- end
55
-
56
- def parse_crawl_options(args)
57
- @options = @options_crawl
58
-
59
- opts = OptionParser.new do |o|
60
- o.set_summary_indent(' ')
61
- o.banner = 'Usage: validate-website [OPTIONS]'
62
- o.define_head 'validate-website - Web crawler for checking the validity'+
63
- ' of your documents'
64
- o.separator ''
65
-
66
- o.on("-s", "--site 'SITE'", String,
67
- "Website to crawl (Default: #{@options[:site]})") { |v|
68
- @options[:site] = v
69
- }
70
- o.on("-u", "--user-agent 'USERAGENT'", String,
71
- "Change user agent (Default: #{@options[:user_agent]})") { |v|
72
- @options[:user_agent] = v
73
- }
74
- o.on("-e", "--exclude 'EXCLUDE'", String,
75
- "Url to exclude (ex: 'redirect|news')") { |v|
76
- @options[:exclude] = v
77
- }
78
- o.on("-f", "--file 'FILE'", String,
79
- "Save not well formed or not found urls") { |v| @options[:file] = v }
80
-
81
- o.on("-c", "--cookies 'COOKIES'", String,
82
- "Set defaults cookies") { |v| @options[:cookies] = v }
83
-
84
- o.on("-m", "--[no-]markup-validation",
85
- "Markup validation (Default: #{@options[:markup_validation]})") { |v|
86
- @options[:markup_validation] = v
87
- }
88
- o.on("-n", "--not-found",
89
- "Log not found url (Default: #{@options[:not_found]})") { |v|
90
- @options[:not_found] = v
91
- }
92
- o.on("-v", "--verbose",
93
- "Show validator errors (Default: #{@options[:validate_verbose]})") { |v|
94
- @options[:validate_verbose] = v
95
- }
96
- o.on("-q", "--quiet",
97
- "Only report errors (Default: #{@options[:quiet]})") { |v|
98
- @options[:quiet] = v
99
- }
100
- o.on("-d", "--debug",
101
- "Show anemone log (Default: #{@options[:verbose]})") { |v|
102
- @options[:verbose] = v
103
- }
104
-
105
- o.separator ""
106
- o.on_tail("-h", "--help", "Show this help message.") { puts o; exit }
107
- end
108
- opts.parse!(args)
109
- end
110
-
111
- def validate(doc, body, url, opts={})
112
- opts = @options.merge(opts)
113
- validator = Validator.new(doc, body)
114
- msg = " well formed? %s" % validator.valid?
115
- if validator.valid?
116
- unless opts[:quiet]
117
- print info(url)
118
- puts success(msg)
119
- end
120
- else
121
- @markup_error = true
122
- print info(url)
123
- puts error(msg)
124
- puts error(validator.errors.join(", ")) if opts[:validate_verbose]
125
- to_file(url)
126
- end
127
- end
128
-
129
- def crawl(opts={})
130
- opts = @options.merge(opts)
131
- puts note("Validating #{@site}") if opts[:validate_verbose]
132
-
133
- @anemone = Anemone.crawl(@site, opts) do |anemone|
134
- anemone.skip_links_like Regexp.new(opts[:exclude]) if opts[:exclude]
135
-
136
- # select the links on each page to follow (iframe, link, css url)
137
- anemone.focus_crawl { |p|
138
- links = []
139
- if p.html?
140
- p.doc.css('img, script, iframe').each do |elem|
141
- url = get_url(p, elem, "src")
142
- links << url unless url.nil?
143
- end
144
- p.doc.css('link').each do |link|
145
- url = get_url(p, link, "href")
146
- links << url unless url.nil?
147
- end
148
- end
149
- if p.content_type == 'text/css'
150
- p.body.scan(/url\((['".\/\w-]+)\)/).each do |url|
151
- url = url.first.gsub("'", "").gsub('"', '')
152
- abs = p.to_absolute(URI(url))
153
- links << abs
154
- end
155
- end
156
- links.uniq!
157
- p.links.concat(links)
158
- }
159
-
160
- anemone.on_every_page { |page|
161
- url = page.url.to_s
162
-
163
- if opts[:markup_validation]
164
- # validate html/html+xml
165
- if page.html? && page.fetched?
166
- validate(page.doc, page.body, url, opts)
167
- end
168
- end
169
-
170
- if opts[:not_found] && page.not_found?
171
- @not_found_error = true
172
- puts error("%s linked in %s but not exist" % [url, page.referer])
173
- to_file(url)
174
- end
175
-
176
- # throw away the page (hope this saves memory)
177
- page = nil
178
- }
179
- end
180
- end
181
-
182
- def exit_status
183
- if @markup_error && @not_found_error
184
- EXIT_FAILURE_MARKUP_NOT_FOUND
185
- elsif @markup_error
186
- EXIT_FAILURE_MARKUP
187
- elsif @not_found_error
188
- EXIT_FAILURE_NOT_FOUND
189
- else
190
- EXIT_SUCCESS
191
- end
192
- end
193
-
194
- private
195
- def to_file(msg)
196
- if @file && File.exist?(@file)
197
- open(@file, 'a').write("#{msg}\n")
198
- end
199
- end
200
-
201
- def get_url(page, elem, attrname)
202
- u = elem.attributes[attrname] if elem.attributes[attrname]
203
- return if u.nil?
204
- begin
205
- abs = page.to_absolute(URI(u))
206
- rescue
207
- abs = nil
208
- end
209
- return abs if abs && page.in_domain?(abs)
210
- end
211
- end
2
+ require 'validate_website/core'
@@ -0,0 +1,28 @@
1
+ # encoding: utf-8
2
+ require 'rainbow'
3
+
4
+ module ValidateWebsite
5
+ module ColorfulMessages
6
+ def error(message)
7
+ message.to_s.foreground(:red)
8
+ end
9
+
10
+ def warning(message)
11
+ message.to_s.foreground(:yellow)
12
+ end
13
+
14
+ def success(message)
15
+ message.to_s.foreground(:green)
16
+ end
17
+
18
+ alias_method :message, :success
19
+
20
+ def note(message)
21
+ message.to_s.foreground(:magenta)
22
+ end
23
+
24
+ def info(message)
25
+ message.to_s.foreground(:blue)
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,141 @@
1
+ # encoding: utf-8
2
+
3
+ require 'open-uri'
4
+
5
+ require 'validate_website/option_parser'
6
+ require 'validate_website/validator'
7
+ require 'validate_website/colorful_messages'
8
+
9
+ require 'anemone'
10
+
11
+ module ValidateWebsite
12
+
13
+ class Core
14
+
15
+ attr_accessor :site
16
+ attr_reader :options, :anemone
17
+
18
+ include ColorfulMessages
19
+
20
+ EXIT_SUCCESS = 0
21
+ EXIT_FAILURE_MARKUP = 64
22
+ EXIT_FAILURE_NOT_FOUND = 65
23
+ EXIT_FAILURE_MARKUP_NOT_FOUND = 66
24
+
25
+ def initialize(options={}, validation_type = :crawl)
26
+ @markup_error = nil
27
+ @not_found_error = nil
28
+
29
+ @options = Parser.parse(options, validation_type)
30
+
31
+ @file = @options[:file]
32
+ if @file
33
+ # truncate file
34
+ open(@file, 'w').write('')
35
+ end
36
+
37
+ @site = @options[:site]
38
+ end
39
+
40
+ def validate(doc, body, url, opts={})
41
+ opts = @options.merge(opts)
42
+ validator = Validator.new(doc, body)
43
+ msg = " well formed? %s" % validator.valid?
44
+ if validator.valid?
45
+ unless opts[:quiet]
46
+ print info(url)
47
+ puts success(msg)
48
+ end
49
+ else
50
+ @markup_error = true
51
+ print info(url)
52
+ puts error(msg)
53
+ puts error(validator.errors.join(", ")) if opts[:validate_verbose]
54
+ to_file(url)
55
+ end
56
+ end
57
+
58
+ def crawl(opts={})
59
+ opts = @options.merge(opts)
60
+ puts note("Validating #{@site}") if opts[:validate_verbose]
61
+
62
+ @anemone = Anemone.crawl(@site, opts) do |anemone|
63
+ anemone.skip_links_like Regexp.new(opts[:exclude]) if opts[:exclude]
64
+
65
+ # select the links on each page to follow (iframe, link, css url)
66
+ anemone.focus_crawl { |p|
67
+ links = []
68
+ if p.html?
69
+ p.doc.css('img, script, iframe').each do |elem|
70
+ url = get_url(p, elem, "src")
71
+ links << url unless url.nil?
72
+ end
73
+ p.doc.css('link').each do |link|
74
+ url = get_url(p, link, "href")
75
+ links << url unless url.nil?
76
+ end
77
+ end
78
+ if p.content_type == 'text/css'
79
+ p.body.scan(/url\((['".\/\w-]+)\)/).each do |url|
80
+ url = url.first.gsub("'", "").gsub('"', '')
81
+ abs = p.to_absolute(URI(url))
82
+ links << abs
83
+ end
84
+ end
85
+ links.uniq!
86
+ p.links.concat(links)
87
+ }
88
+
89
+ anemone.on_every_page { |page|
90
+ url = page.url.to_s
91
+
92
+ if opts[:markup_validation]
93
+ # validate html/html+xml
94
+ if page.html? && page.fetched?
95
+ validate(page.doc, page.body, url, opts)
96
+ end
97
+ end
98
+
99
+ if opts[:not_found] && page.not_found?
100
+ @not_found_error = true
101
+ puts error("%s linked in %s but not exist" % [url, page.referer])
102
+ to_file(url)
103
+ end
104
+
105
+ # throw away the page (hope this saves memory)
106
+ page = nil
107
+ }
108
+ end
109
+ end
110
+
111
+ def exit_status
112
+ if @markup_error && @not_found_error
113
+ EXIT_FAILURE_MARKUP_NOT_FOUND
114
+ elsif @markup_error
115
+ EXIT_FAILURE_MARKUP
116
+ elsif @not_found_error
117
+ EXIT_FAILURE_NOT_FOUND
118
+ else
119
+ EXIT_SUCCESS
120
+ end
121
+ end
122
+
123
+ private
124
+ def to_file(msg)
125
+ if @file && File.exist?(@file)
126
+ open(@file, 'a').write("#{msg}\n")
127
+ end
128
+ end
129
+
130
+ def get_url(page, elem, attrname)
131
+ u = elem.attributes[attrname] if elem.attributes[attrname]
132
+ return if u.nil?
133
+ begin
134
+ abs = page.to_absolute(URI(u))
135
+ rescue
136
+ abs = nil
137
+ end
138
+ return abs if abs && page.in_domain?(abs)
139
+ end
140
+ end
141
+ end