validate-website 0.2 → 0.3
Sign up to get free protection for your applications and to get access to all the features.
- data/Rakefile +3 -3
- data/bin/validate-website +27 -50
- data/lib/validate_website.rb +55 -0
- metadata +109 -86
data/Rakefile
CHANGED
@@ -7,7 +7,7 @@ require 'find'
|
|
7
7
|
# Globals
|
8
8
|
|
9
9
|
PKG_NAME = 'validate-website'
|
10
|
-
PKG_VERSION = '0.
|
10
|
+
PKG_VERSION = '0.3'
|
11
11
|
|
12
12
|
PKG_FILES = ['README', 'Rakefile']
|
13
13
|
Find.find('lib/', 'bin/') do |f|
|
@@ -58,8 +58,8 @@ spec = Gem::Specification.new do |s|
|
|
58
58
|
s.name = PKG_NAME
|
59
59
|
s.version = PKG_VERSION
|
60
60
|
s.requirements << 'spk-anemone' << 'rainbow'
|
61
|
-
s.add_dependency('spk-anemone', '>= 0.
|
62
|
-
s.add_dependency('rainbow', '>= 1.
|
61
|
+
s.add_dependency('spk-anemone', '>= 0.4.0')
|
62
|
+
s.add_dependency('rainbow', '>= 1.1')
|
63
63
|
s.require_path = 'lib'
|
64
64
|
s.bindir = 'bin'
|
65
65
|
s.executables << 'validate-website'
|
data/bin/validate-website
CHANGED
@@ -8,67 +8,44 @@ require 'rubygems' if developer_mode
|
|
8
8
|
require 'validator'
|
9
9
|
require 'anemone'
|
10
10
|
require 'colorful_messages'
|
11
|
-
require '
|
11
|
+
require 'validate_website'
|
12
12
|
|
13
13
|
include ColorfulMessages
|
14
14
|
|
15
|
-
|
16
|
-
|
17
|
-
:site => 'http://localhost:3000/',
|
18
|
-
:useragent => Anemone::Core::DEFAULT_OPTS[:user_agent],
|
19
|
-
:exclude => nil,
|
20
|
-
:file => nil,
|
21
|
-
:auth => nil,
|
22
|
-
}
|
15
|
+
validate_website = ValidateWebsite.new(ARGV)
|
16
|
+
options = validate_website.options
|
23
17
|
|
24
|
-
|
25
|
-
script_name = File.basename($0)
|
26
|
-
o.set_summary_indent(' ')
|
27
|
-
o.banner = "Usage: #{script_name} [OPTIONS]"
|
28
|
-
o.define_head "#{script_name} - Web crawler for checking the validity of " +
|
29
|
-
'your documents'
|
30
|
-
o.separator ""
|
18
|
+
exit_code = 0
|
31
19
|
|
32
|
-
|
33
|
-
|
20
|
+
Anemone.crawl(options[:site],
|
21
|
+
:user_agent => options[:useragent],
|
22
|
+
:authorization => options[:auth]) do |anemone|
|
34
23
|
|
35
|
-
|
36
|
-
"Default: #{OPTIONS[:useragent]}") { |v| OPTIONS[:useragent] = v }
|
37
|
-
o.on("-e", "--exclude=val", String,
|
38
|
-
"Url to exclude") { |v| OPTIONS[:exclude] = v }
|
39
|
-
o.on("-f", "--file=val", String,
|
40
|
-
"save not well formed urls") { |v| OPTIONS[:file] = v }
|
41
|
-
o.on("--auth=[user,pass]", Array,
|
42
|
-
"Basic http authentification") { |v| OPTIONS[:auth] = v }
|
43
|
-
|
44
|
-
o.separator ""
|
45
|
-
o.on_tail("-h", "--help", "Show this help message.") { puts o; exit }
|
46
|
-
o.parse!
|
47
|
-
end
|
48
|
-
|
49
|
-
if OPTIONS[:file]
|
50
|
-
file = OPTIONS[:file]
|
51
|
-
open(file, 'w').write('')
|
52
|
-
end
|
53
|
-
|
54
|
-
Anemone.crawl(OPTIONS[:site],
|
55
|
-
:user_agent => OPTIONS[:useragent],
|
56
|
-
:authorization => OPTIONS[:auth]) do |anemone|
|
57
|
-
|
58
|
-
anemone.skip_links_like Regexp.new(OPTIONS[:exclude]) if OPTIONS[:exclude]
|
24
|
+
anemone.skip_links_like Regexp.new(options[:exclude]) if options[:exclude]
|
59
25
|
|
60
26
|
anemone.on_every_page { |page|
|
61
|
-
next unless page.html?
|
62
27
|
url = page.url.to_s
|
63
28
|
print info(url)
|
64
29
|
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
30
|
+
# validate html/html+xml
|
31
|
+
if page.html? && page.fetched?
|
32
|
+
validator = Validator.new(page)
|
33
|
+
msg = " well formed? %s" % validator.valid?
|
34
|
+
if validator.valid?
|
35
|
+
puts success(msg)
|
36
|
+
else
|
37
|
+
exit_code = 1
|
38
|
+
puts error(msg)
|
39
|
+
validate_website.to_file(url)
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
if options[:not_found] && page.not_found?
|
44
|
+
exit_code = 1
|
45
|
+
puts error("%s linked in %s but not exist" % [url, page.referer])
|
46
|
+
validate_website.to_file(url)
|
72
47
|
end
|
73
48
|
}
|
74
49
|
end
|
50
|
+
|
51
|
+
exit(exit_code)
|
@@ -0,0 +1,55 @@
|
|
1
|
+
require 'optparse'
|
2
|
+
require 'open-uri'
|
3
|
+
|
4
|
+
class ValidateWebsite
|
5
|
+
|
6
|
+
attr_reader :options
|
7
|
+
|
8
|
+
def initialize(args)
|
9
|
+
@options = {
|
10
|
+
:site => 'http://localhost:3000/',
|
11
|
+
:useragent => Anemone::Core::DEFAULT_OPTS[:user_agent],
|
12
|
+
:exclude => nil,
|
13
|
+
:file => nil,
|
14
|
+
:auth => nil,
|
15
|
+
# log not found url (404 status code)
|
16
|
+
:not_found => false,
|
17
|
+
}
|
18
|
+
parse(args)
|
19
|
+
|
20
|
+
# truncate file
|
21
|
+
if options[:file]
|
22
|
+
open(options[:file], 'w').write('')
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
def parse(args)
|
27
|
+
opts = OptionParser.new do |o|
|
28
|
+
o.set_summary_indent(' ')
|
29
|
+
o.banner = "Usage: validate-website [OPTIONS]"
|
30
|
+
o.define_head "validate-website - Web crawler for checking the validity of your documents"
|
31
|
+
o.separator ""
|
32
|
+
|
33
|
+
o.on("-s", "--site=val", String,
|
34
|
+
"Default: #{@options[:site]}") { |v| @options[:site] = v }
|
35
|
+
|
36
|
+
o.on("-u", "--useragent=val", String,
|
37
|
+
"Default: #{@options[:useragent]}") { |v| @options[:useragent] = v }
|
38
|
+
o.on("-e", "--exclude=val", String,
|
39
|
+
"Url to exclude") { |v| @options[:exclude] = v }
|
40
|
+
o.on("-f", "--file=val", String,
|
41
|
+
"save not well formed urls") { |v| @options[:file] = v }
|
42
|
+
o.on("--auth=[user,pass]", Array,
|
43
|
+
"Basic http authentification") { |v| @options[:auth] = v }
|
44
|
+
o.on("-n", "--not-found", "Log not found url") { |v| @options[:not_found] = v }
|
45
|
+
|
46
|
+
o.separator ""
|
47
|
+
o.on_tail("-h", "--help", "Show this help message.") { puts o; exit }
|
48
|
+
end
|
49
|
+
opts.parse!(args)
|
50
|
+
end
|
51
|
+
|
52
|
+
def to_file(msg)
|
53
|
+
open(options[:file], 'a').write("#{msg}\n") if options[:file]
|
54
|
+
end
|
55
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,12 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: validate-website
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
|
4
|
+
hash: 13
|
5
|
+
prerelease: false
|
6
|
+
segments:
|
7
|
+
- 0
|
8
|
+
- 3
|
9
|
+
version: "0.3"
|
5
10
|
platform: ruby
|
6
11
|
authors:
|
7
12
|
- spk
|
@@ -9,29 +14,40 @@ autorequire:
|
|
9
14
|
bindir: bin
|
10
15
|
cert_chain: []
|
11
16
|
|
12
|
-
date:
|
17
|
+
date: 2010-08-18 00:00:00 +02:00
|
13
18
|
default_executable:
|
14
19
|
dependencies:
|
15
20
|
- !ruby/object:Gem::Dependency
|
16
21
|
name: spk-anemone
|
17
|
-
|
18
|
-
|
19
|
-
|
22
|
+
prerelease: false
|
23
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
24
|
+
none: false
|
20
25
|
requirements:
|
21
26
|
- - ">="
|
22
27
|
- !ruby/object:Gem::Version
|
23
|
-
|
24
|
-
|
28
|
+
hash: 15
|
29
|
+
segments:
|
30
|
+
- 0
|
31
|
+
- 4
|
32
|
+
- 0
|
33
|
+
version: 0.4.0
|
34
|
+
type: :runtime
|
35
|
+
version_requirements: *id001
|
25
36
|
- !ruby/object:Gem::Dependency
|
26
37
|
name: rainbow
|
27
|
-
|
28
|
-
|
29
|
-
|
38
|
+
prerelease: false
|
39
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
40
|
+
none: false
|
30
41
|
requirements:
|
31
42
|
- - ">="
|
32
43
|
- !ruby/object:Gem::Version
|
33
|
-
|
34
|
-
|
44
|
+
hash: 13
|
45
|
+
segments:
|
46
|
+
- 1
|
47
|
+
- 1
|
48
|
+
version: "1.1"
|
49
|
+
type: :runtime
|
50
|
+
version_requirements: *id002
|
35
51
|
description: Web crawler for checking the validity of your documents
|
36
52
|
email: spk@tuxfamily.org
|
37
53
|
executables:
|
@@ -43,94 +59,95 @@ extra_rdoc_files: []
|
|
43
59
|
files:
|
44
60
|
- README
|
45
61
|
- Rakefile
|
46
|
-
- lib/colorful_messages.rb
|
47
62
|
- lib/validator.rb
|
48
|
-
- lib/
|
63
|
+
- lib/colorful_messages.rb
|
64
|
+
- lib/xhtml/xhtml-blkphras-1.xsd
|
65
|
+
- lib/xhtml/xhtml1-transitional.xsd
|
66
|
+
- lib/xhtml/xhtml1-strict.xsd
|
67
|
+
- lib/xhtml/xhtml-blkstruct-1.xsd
|
49
68
|
- lib/xhtml/xhtml11-modules-1.xsd
|
50
|
-
- lib/xhtml/xml-events-attribs-1.xsd
|
51
|
-
- lib/xhtml/xhtml-pres-1.xsd
|
52
|
-
- lib/xhtml/xml.xsd
|
53
|
-
- lib/xhtml/xhtml-rdfa-1.xsd
|
54
|
-
- lib/xhtml/xhtml-base-1.xsd
|
55
|
-
- lib/xhtml/xhtml-inlphras-1.xsd
|
56
|
-
- lib/xhtml/xhtml-basic11.xsd
|
57
|
-
- lib/xhtml/xhtml-attribs-1.xsd
|
58
|
-
- lib/xhtml/xhtml-table-1.xsd
|
59
|
-
- lib/xhtml/xhtml11-module-redefines-1.xsd
|
60
|
-
- lib/xhtml/xml-events-1.xsd
|
61
|
-
- lib/xhtml/xhtml-inlstyle-1.xsd
|
62
|
-
- lib/xhtml/xhtml-special.ent
|
63
|
-
- lib/xhtml/xhtml-list-1.xsd
|
64
|
-
- lib/xhtml/xml-events-copyright-1.xsd
|
65
|
-
- lib/xhtml/xhtml-ruby-basic-1.xsd
|
66
|
-
- lib/xhtml/xhtml-text-1.xsd
|
67
|
-
- lib/xhtml/xhtml-lat1.ent
|
68
|
-
- lib/xhtml/xhtml-object-1.xsd
|
69
69
|
- lib/xhtml/xhtml-blkpres-1.xsd
|
70
|
-
- lib/xhtml/xhtml-
|
71
|
-
- lib/xhtml/
|
70
|
+
- lib/xhtml/xhtml-inlpres-1.xsd
|
71
|
+
- lib/xhtml/xhtml-inlphras-1.xsd
|
72
|
+
- lib/xhtml/xhtml-misc-1.xsd
|
72
73
|
- lib/xhtml/xhtml-symbol.ent
|
73
|
-
- lib/xhtml/
|
74
|
+
- lib/xhtml/xhtml1-frameset.xsd
|
75
|
+
- lib/xhtml/xhtml-meta-1.xsd
|
76
|
+
- lib/xhtml/xhtml1-frameset.dtd
|
77
|
+
- lib/xhtml/xml-events-copyright-2.xsd
|
78
|
+
- lib/xhtml/xml-handlers-2.xsd
|
79
|
+
- lib/xhtml/xhtml-object-1.xsd
|
80
|
+
- lib/xhtml/xhtml-bdo-1.xsd
|
81
|
+
- lib/xhtml/xml-events-1.xsd
|
74
82
|
- lib/xhtml/xhtml-basic10-model-1.xsd
|
75
|
-
- lib/xhtml/
|
76
|
-
- lib/xhtml/
|
83
|
+
- lib/xhtml/xhtml-table-1.xsd
|
84
|
+
- lib/xhtml/xhtml2.xsd
|
85
|
+
- lib/xhtml/xhtml-print-model-1.xsd
|
86
|
+
- lib/xhtml/xhtml-rdfa-1.xsd
|
87
|
+
- lib/xhtml/xhtml-basic-form-1.xsd
|
77
88
|
- lib/xhtml/xhtml-inputmode-1.xsd
|
89
|
+
- lib/xhtml/xhtml-ssismap-1.xsd
|
90
|
+
- lib/xhtml/xhtml-basic10-modules-1.xsd
|
91
|
+
- lib/xhtml/xhtml-events-1.xsd
|
92
|
+
- lib/xhtml/xhtml-form-1.xsd
|
78
93
|
- lib/xhtml/xhtml-struct-1.xsd
|
79
|
-
- lib/xhtml/
|
80
|
-
- lib/xhtml/xhtml-blkphras-1.xsd
|
81
|
-
- lib/xhtml/xhtml11.xsd
|
82
|
-
- lib/xhtml/xhtml-bdo-1.xsd
|
83
|
-
- lib/xhtml/xhtml-notations-1.xsd
|
84
|
-
- lib/xhtml/xhtml-rdfa-model-1.xsd
|
85
|
-
- lib/xhtml/xhtml-inlpres-1.xsd
|
86
|
-
- lib/xhtml/xhtml-hypertext-1.xsd
|
87
|
-
- lib/xhtml/xhtml-print-model-1.xsd
|
94
|
+
- lib/xhtml/xml-script-1.xsd
|
88
95
|
- lib/xhtml/xhtml-print-modules-1.xsd
|
96
|
+
- lib/xhtml/xhtml-basic10.xsd
|
97
|
+
- lib/xhtml/xframes-1.xsd
|
98
|
+
- lib/xhtml/xhtml-rdfa-1.dtd
|
99
|
+
- lib/xhtml/xhtml-datatypes-1.xsd
|
100
|
+
- lib/xhtml/xhtml-pres-1.xsd
|
101
|
+
- lib/xhtml/xhtml-charent-1.xsd
|
102
|
+
- lib/xhtml/xhtml1-transitional.dtd
|
89
103
|
- lib/xhtml/xhtml-access-1.xsd
|
90
|
-
- lib/xhtml/xhtml-
|
104
|
+
- lib/xhtml/xhtml-target-1.xsd
|
105
|
+
- lib/xhtml/xhtml-param-1.xsd
|
106
|
+
- lib/xhtml/xhtml-special.ent
|
91
107
|
- lib/xhtml/xhtml-legacy-1.xsd
|
92
|
-
- lib/xhtml/xhtml-
|
93
|
-
- lib/xhtml/xhtml-applet-1.xsd
|
108
|
+
- lib/xhtml/xhtml-frames-1.xsd
|
94
109
|
- lib/xhtml/xhtml11-model-1.xsd
|
95
|
-
- lib/xhtml/xhtml-
|
96
|
-
- lib/xhtml/xhtml-
|
97
|
-
- lib/xhtml/
|
98
|
-
- lib/xhtml/xhtml-
|
99
|
-
- lib/xhtml/xhtml-rdfa-modules-1.xsd
|
100
|
-
- lib/xhtml/xhtml-basic-table-1.xsd
|
101
|
-
- lib/xhtml/xml-events-copyright-2.xsd
|
102
|
-
- lib/xhtml/xml-handlers-2.xsd
|
110
|
+
- lib/xhtml/xhtml-inlstyle-1.xsd
|
111
|
+
- lib/xhtml/xhtml-text-1.xsd
|
112
|
+
- lib/xhtml/xhtml1-strict.dtd
|
113
|
+
- lib/xhtml/xhtml-list-1.xsd
|
103
114
|
- lib/xhtml/xhtml-framework-1.xsd
|
115
|
+
- lib/xhtml/xml-events-copyright-1.xsd
|
116
|
+
- lib/xhtml/xhtml-print-1.xsd
|
117
|
+
- lib/xhtml/xml-events-2.xsd
|
118
|
+
- lib/xhtml/xhtml-applet-1.xsd
|
119
|
+
- lib/xhtml/xhtml-style-1.xsd
|
120
|
+
- lib/xhtml/xhtml-base-1.xsd
|
121
|
+
- lib/xhtml/xhtml-nameident-1.xsd
|
104
122
|
- lib/xhtml/xhtml-iframe-1.xsd
|
105
|
-
- lib/xhtml/xhtml1-frameset.xsd
|
106
|
-
- lib/xhtml/xhtml1-strict.dtd
|
107
123
|
- lib/xhtml/xhtml-inlstruct-1.xsd
|
108
|
-
- lib/xhtml/xhtml-
|
124
|
+
- lib/xhtml/xhtml-notations-1.xsd
|
125
|
+
- lib/xhtml/xhtml11-module-redefines-1.xsd
|
126
|
+
- lib/xhtml/xhtml-basic11-modules-1.xsd
|
127
|
+
- lib/xhtml/xml-events-attribs-2.xsd
|
128
|
+
- lib/xhtml/xhtml-rdfa-model-1.xsd
|
129
|
+
- lib/xhtml/xhtml-script-1.xsd
|
130
|
+
- lib/xhtml/xhtml-hypertext-1.xsd
|
131
|
+
- lib/xhtml/xhtml-rdfa-modules-1.xsd
|
109
132
|
- lib/xhtml/xhtml-image-1.xsd
|
110
|
-
- lib/xhtml/
|
111
|
-
- lib/xhtml/xhtml-ruby-1.xsd
|
112
|
-
- lib/xhtml/
|
113
|
-
- lib/xhtml/xhtml-frames-1.xsd
|
114
|
-
- lib/xhtml/xhtml1-transitional.dtd
|
115
|
-
- lib/xhtml/xhtml-meta-1.xsd
|
116
|
-
- lib/xhtml/xhtml-basic10-module-redefines-1.xsd
|
117
|
-
- lib/xhtml/xhtml-basic10.xsd
|
133
|
+
- lib/xhtml/xml.xsd
|
134
|
+
- lib/xhtml/xhtml-ruby-basic-1.xsd
|
135
|
+
- lib/xhtml/xhtml-basic11.xsd
|
118
136
|
- lib/xhtml/xml-handlers-1.xsd
|
119
|
-
- lib/xhtml/
|
120
|
-
- lib/xhtml/xhtml-
|
121
|
-
- lib/xhtml/xhtml-script-1.xsd
|
122
|
-
- lib/xhtml/xhtml2.xsd
|
123
|
-
- lib/xhtml/xhtml-basic-form-1.xsd
|
124
|
-
- lib/xhtml/xhtml-rdfa-1.dtd
|
125
|
-
- lib/xhtml/xhtml-print-1.xsd
|
137
|
+
- lib/xhtml/xml-events-attribs-1.xsd
|
138
|
+
- lib/xhtml/xhtml-link-1.xsd
|
126
139
|
- lib/xhtml/xhtml-metaAttributes-1.xsd
|
127
|
-
- lib/xhtml/xhtml-
|
128
|
-
- lib/xhtml/xhtml-
|
129
|
-
- lib/xhtml/xhtml-
|
130
|
-
- lib/xhtml/
|
131
|
-
- lib/xhtml/
|
132
|
-
- lib/xhtml/xhtml-
|
133
|
-
- lib/xhtml/
|
140
|
+
- lib/xhtml/xhtml-basic-table-1.xsd
|
141
|
+
- lib/xhtml/xhtml-csismap-1.xsd
|
142
|
+
- lib/xhtml/xhtml-basic11-model-1.xsd
|
143
|
+
- lib/xhtml/xhtml-copyright-1.xsd
|
144
|
+
- lib/xhtml/xhtml-basic10-module-redefines-1.xsd
|
145
|
+
- lib/xhtml/xhtml-edit-1.xsd
|
146
|
+
- lib/xhtml/xhtml11.xsd
|
147
|
+
- lib/xhtml/xhtml-lat1.ent
|
148
|
+
- lib/xhtml/xhtml-attribs-1.xsd
|
149
|
+
- lib/xhtml/xhtml-ruby-1.xsd
|
150
|
+
- lib/validate_website.rb
|
134
151
|
- bin/validate-website
|
135
152
|
has_rdoc: true
|
136
153
|
homepage:
|
@@ -142,22 +159,28 @@ rdoc_options: []
|
|
142
159
|
require_paths:
|
143
160
|
- lib
|
144
161
|
required_ruby_version: !ruby/object:Gem::Requirement
|
162
|
+
none: false
|
145
163
|
requirements:
|
146
164
|
- - ">="
|
147
165
|
- !ruby/object:Gem::Version
|
166
|
+
hash: 3
|
167
|
+
segments:
|
168
|
+
- 0
|
148
169
|
version: "0"
|
149
|
-
version:
|
150
170
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
171
|
+
none: false
|
151
172
|
requirements:
|
152
173
|
- - ">="
|
153
174
|
- !ruby/object:Gem::Version
|
175
|
+
hash: 3
|
176
|
+
segments:
|
177
|
+
- 0
|
154
178
|
version: "0"
|
155
|
-
version:
|
156
179
|
requirements:
|
157
180
|
- spk-anemone
|
158
181
|
- rainbow
|
159
182
|
rubyforge_project:
|
160
|
-
rubygems_version: 1.3.
|
183
|
+
rubygems_version: 1.3.7
|
161
184
|
signing_key:
|
162
185
|
specification_version: 3
|
163
186
|
summary: Web crawler for checking the validity of your documents
|