validate-website 0.2 → 0.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (4) hide show
  1. data/Rakefile +3 -3
  2. data/bin/validate-website +27 -50
  3. data/lib/validate_website.rb +55 -0
  4. metadata +109 -86
data/Rakefile CHANGED
@@ -7,7 +7,7 @@ require 'find'
7
7
  # Globals
8
8
 
9
9
  PKG_NAME = 'validate-website'
10
- PKG_VERSION = '0.2'
10
+ PKG_VERSION = '0.3'
11
11
 
12
12
  PKG_FILES = ['README', 'Rakefile']
13
13
  Find.find('lib/', 'bin/') do |f|
@@ -58,8 +58,8 @@ spec = Gem::Specification.new do |s|
58
58
  s.name = PKG_NAME
59
59
  s.version = PKG_VERSION
60
60
  s.requirements << 'spk-anemone' << 'rainbow'
61
- s.add_dependency('spk-anemone', '>= 0.2.4')
62
- s.add_dependency('rainbow', '>= 1.0.4')
61
+ s.add_dependency('spk-anemone', '>= 0.4.0')
62
+ s.add_dependency('rainbow', '>= 1.1')
63
63
  s.require_path = 'lib'
64
64
  s.bindir = 'bin'
65
65
  s.executables << 'validate-website'
data/bin/validate-website CHANGED
@@ -8,67 +8,44 @@ require 'rubygems' if developer_mode
8
8
  require 'validator'
9
9
  require 'anemone'
10
10
  require 'colorful_messages'
11
- require 'optparse'
11
+ require 'validate_website'
12
12
 
13
13
  include ColorfulMessages
14
14
 
15
- # default options
16
- OPTIONS = {
17
- :site => 'http://localhost:3000/',
18
- :useragent => Anemone::Core::DEFAULT_OPTS[:user_agent],
19
- :exclude => nil,
20
- :file => nil,
21
- :auth => nil,
22
- }
15
+ validate_website = ValidateWebsite.new(ARGV)
16
+ options = validate_website.options
23
17
 
24
- ARGV.options do |o|
25
- script_name = File.basename($0)
26
- o.set_summary_indent(' ')
27
- o.banner = "Usage: #{script_name} [OPTIONS]"
28
- o.define_head "#{script_name} - Web crawler for checking the validity of " +
29
- 'your documents'
30
- o.separator ""
18
+ exit_code = 0
31
19
 
32
- o.on("-s", "--site=val", String,
33
- "Default: #{OPTIONS[:site]}") { |v| OPTIONS[:site] = v }
20
+ Anemone.crawl(options[:site],
21
+ :user_agent => options[:useragent],
22
+ :authorization => options[:auth]) do |anemone|
34
23
 
35
- o.on("-u", "--useragent=val", String,
36
- "Default: #{OPTIONS[:useragent]}") { |v| OPTIONS[:useragent] = v }
37
- o.on("-e", "--exclude=val", String,
38
- "Url to exclude") { |v| OPTIONS[:exclude] = v }
39
- o.on("-f", "--file=val", String,
40
- "save not well formed urls") { |v| OPTIONS[:file] = v }
41
- o.on("--auth=[user,pass]", Array,
42
- "Basic http authentification") { |v| OPTIONS[:auth] = v }
43
-
44
- o.separator ""
45
- o.on_tail("-h", "--help", "Show this help message.") { puts o; exit }
46
- o.parse!
47
- end
48
-
49
- if OPTIONS[:file]
50
- file = OPTIONS[:file]
51
- open(file, 'w').write('')
52
- end
53
-
54
- Anemone.crawl(OPTIONS[:site],
55
- :user_agent => OPTIONS[:useragent],
56
- :authorization => OPTIONS[:auth]) do |anemone|
57
-
58
- anemone.skip_links_like Regexp.new(OPTIONS[:exclude]) if OPTIONS[:exclude]
24
+ anemone.skip_links_like Regexp.new(options[:exclude]) if options[:exclude]
59
25
 
60
26
  anemone.on_every_page { |page|
61
- next unless page.html?
62
27
  url = page.url.to_s
63
28
  print info(url)
64
29
 
65
- validator = Validator.new(page)
66
- msg = " well formed? %s" % validator.valid?
67
- if validator.valid?
68
- puts success(msg)
69
- else
70
- puts error(msg)
71
- open(file, 'a').write("#{url}\n") if OPTIONS[:file]
30
+ # validate html/html+xml
31
+ if page.html? && page.fetched?
32
+ validator = Validator.new(page)
33
+ msg = " well formed? %s" % validator.valid?
34
+ if validator.valid?
35
+ puts success(msg)
36
+ else
37
+ exit_code = 1
38
+ puts error(msg)
39
+ validate_website.to_file(url)
40
+ end
41
+ end
42
+
43
+ if options[:not_found] && page.not_found?
44
+ exit_code = 1
45
+ puts error("%s linked in %s but not exist" % [url, page.referer])
46
+ validate_website.to_file(url)
72
47
  end
73
48
  }
74
49
  end
50
+
51
+ exit(exit_code)
@@ -0,0 +1,55 @@
1
+ require 'optparse'
2
+ require 'open-uri'
3
+
4
+ class ValidateWebsite
5
+
6
+ attr_reader :options
7
+
8
+ def initialize(args)
9
+ @options = {
10
+ :site => 'http://localhost:3000/',
11
+ :useragent => Anemone::Core::DEFAULT_OPTS[:user_agent],
12
+ :exclude => nil,
13
+ :file => nil,
14
+ :auth => nil,
15
+ # log not found url (404 status code)
16
+ :not_found => false,
17
+ }
18
+ parse(args)
19
+
20
+ # truncate file
21
+ if options[:file]
22
+ open(options[:file], 'w').write('')
23
+ end
24
+ end
25
+
26
+ def parse(args)
27
+ opts = OptionParser.new do |o|
28
+ o.set_summary_indent(' ')
29
+ o.banner = "Usage: validate-website [OPTIONS]"
30
+ o.define_head "validate-website - Web crawler for checking the validity of your documents"
31
+ o.separator ""
32
+
33
+ o.on("-s", "--site=val", String,
34
+ "Default: #{@options[:site]}") { |v| @options[:site] = v }
35
+
36
+ o.on("-u", "--useragent=val", String,
37
+ "Default: #{@options[:useragent]}") { |v| @options[:useragent] = v }
38
+ o.on("-e", "--exclude=val", String,
39
+ "Url to exclude") { |v| @options[:exclude] = v }
40
+ o.on("-f", "--file=val", String,
41
+ "save not well formed urls") { |v| @options[:file] = v }
42
+ o.on("--auth=[user,pass]", Array,
43
+ "Basic http authentification") { |v| @options[:auth] = v }
44
+ o.on("-n", "--not-found", "Log not found url") { |v| @options[:not_found] = v }
45
+
46
+ o.separator ""
47
+ o.on_tail("-h", "--help", "Show this help message.") { puts o; exit }
48
+ end
49
+ opts.parse!(args)
50
+ end
51
+
52
+ def to_file(msg)
53
+ open(options[:file], 'a').write("#{msg}\n") if options[:file]
54
+ end
55
+ end
metadata CHANGED
@@ -1,7 +1,12 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: validate-website
3
3
  version: !ruby/object:Gem::Version
4
- version: "0.2"
4
+ hash: 13
5
+ prerelease: false
6
+ segments:
7
+ - 0
8
+ - 3
9
+ version: "0.3"
5
10
  platform: ruby
6
11
  authors:
7
12
  - spk
@@ -9,29 +14,40 @@ autorequire:
9
14
  bindir: bin
10
15
  cert_chain: []
11
16
 
12
- date: 2009-12-02 00:00:00 +01:00
17
+ date: 2010-08-18 00:00:00 +02:00
13
18
  default_executable:
14
19
  dependencies:
15
20
  - !ruby/object:Gem::Dependency
16
21
  name: spk-anemone
17
- type: :runtime
18
- version_requirement:
19
- version_requirements: !ruby/object:Gem::Requirement
22
+ prerelease: false
23
+ requirement: &id001 !ruby/object:Gem::Requirement
24
+ none: false
20
25
  requirements:
21
26
  - - ">="
22
27
  - !ruby/object:Gem::Version
23
- version: 0.2.4
24
- version:
28
+ hash: 15
29
+ segments:
30
+ - 0
31
+ - 4
32
+ - 0
33
+ version: 0.4.0
34
+ type: :runtime
35
+ version_requirements: *id001
25
36
  - !ruby/object:Gem::Dependency
26
37
  name: rainbow
27
- type: :runtime
28
- version_requirement:
29
- version_requirements: !ruby/object:Gem::Requirement
38
+ prerelease: false
39
+ requirement: &id002 !ruby/object:Gem::Requirement
40
+ none: false
30
41
  requirements:
31
42
  - - ">="
32
43
  - !ruby/object:Gem::Version
33
- version: 1.0.4
34
- version:
44
+ hash: 13
45
+ segments:
46
+ - 1
47
+ - 1
48
+ version: "1.1"
49
+ type: :runtime
50
+ version_requirements: *id002
35
51
  description: Web crawler for checking the validity of your documents
36
52
  email: spk@tuxfamily.org
37
53
  executables:
@@ -43,94 +59,95 @@ extra_rdoc_files: []
43
59
  files:
44
60
  - README
45
61
  - Rakefile
46
- - lib/colorful_messages.rb
47
62
  - lib/validator.rb
48
- - lib/xhtml/xml-events-2.xsd
63
+ - lib/colorful_messages.rb
64
+ - lib/xhtml/xhtml-blkphras-1.xsd
65
+ - lib/xhtml/xhtml1-transitional.xsd
66
+ - lib/xhtml/xhtml1-strict.xsd
67
+ - lib/xhtml/xhtml-blkstruct-1.xsd
49
68
  - lib/xhtml/xhtml11-modules-1.xsd
50
- - lib/xhtml/xml-events-attribs-1.xsd
51
- - lib/xhtml/xhtml-pres-1.xsd
52
- - lib/xhtml/xml.xsd
53
- - lib/xhtml/xhtml-rdfa-1.xsd
54
- - lib/xhtml/xhtml-base-1.xsd
55
- - lib/xhtml/xhtml-inlphras-1.xsd
56
- - lib/xhtml/xhtml-basic11.xsd
57
- - lib/xhtml/xhtml-attribs-1.xsd
58
- - lib/xhtml/xhtml-table-1.xsd
59
- - lib/xhtml/xhtml11-module-redefines-1.xsd
60
- - lib/xhtml/xml-events-1.xsd
61
- - lib/xhtml/xhtml-inlstyle-1.xsd
62
- - lib/xhtml/xhtml-special.ent
63
- - lib/xhtml/xhtml-list-1.xsd
64
- - lib/xhtml/xml-events-copyright-1.xsd
65
- - lib/xhtml/xhtml-ruby-basic-1.xsd
66
- - lib/xhtml/xhtml-text-1.xsd
67
- - lib/xhtml/xhtml-lat1.ent
68
- - lib/xhtml/xhtml-object-1.xsd
69
69
  - lib/xhtml/xhtml-blkpres-1.xsd
70
- - lib/xhtml/xhtml-csismap-1.xsd
71
- - lib/xhtml/xhtml1-frameset.dtd
70
+ - lib/xhtml/xhtml-inlpres-1.xsd
71
+ - lib/xhtml/xhtml-inlphras-1.xsd
72
+ - lib/xhtml/xhtml-misc-1.xsd
72
73
  - lib/xhtml/xhtml-symbol.ent
73
- - lib/xhtml/xhtml-basic10-modules-1.xsd
74
+ - lib/xhtml/xhtml1-frameset.xsd
75
+ - lib/xhtml/xhtml-meta-1.xsd
76
+ - lib/xhtml/xhtml1-frameset.dtd
77
+ - lib/xhtml/xml-events-copyright-2.xsd
78
+ - lib/xhtml/xml-handlers-2.xsd
79
+ - lib/xhtml/xhtml-object-1.xsd
80
+ - lib/xhtml/xhtml-bdo-1.xsd
81
+ - lib/xhtml/xml-events-1.xsd
74
82
  - lib/xhtml/xhtml-basic10-model-1.xsd
75
- - lib/xhtml/xhtml1-transitional.xsd
76
- - lib/xhtml/xhtml-edit-1.xsd
83
+ - lib/xhtml/xhtml-table-1.xsd
84
+ - lib/xhtml/xhtml2.xsd
85
+ - lib/xhtml/xhtml-print-model-1.xsd
86
+ - lib/xhtml/xhtml-rdfa-1.xsd
87
+ - lib/xhtml/xhtml-basic-form-1.xsd
77
88
  - lib/xhtml/xhtml-inputmode-1.xsd
89
+ - lib/xhtml/xhtml-ssismap-1.xsd
90
+ - lib/xhtml/xhtml-basic10-modules-1.xsd
91
+ - lib/xhtml/xhtml-events-1.xsd
92
+ - lib/xhtml/xhtml-form-1.xsd
78
93
  - lib/xhtml/xhtml-struct-1.xsd
79
- - lib/xhtml/xhtml-nameident-1.xsd
80
- - lib/xhtml/xhtml-blkphras-1.xsd
81
- - lib/xhtml/xhtml11.xsd
82
- - lib/xhtml/xhtml-bdo-1.xsd
83
- - lib/xhtml/xhtml-notations-1.xsd
84
- - lib/xhtml/xhtml-rdfa-model-1.xsd
85
- - lib/xhtml/xhtml-inlpres-1.xsd
86
- - lib/xhtml/xhtml-hypertext-1.xsd
87
- - lib/xhtml/xhtml-print-model-1.xsd
94
+ - lib/xhtml/xml-script-1.xsd
88
95
  - lib/xhtml/xhtml-print-modules-1.xsd
96
+ - lib/xhtml/xhtml-basic10.xsd
97
+ - lib/xhtml/xframes-1.xsd
98
+ - lib/xhtml/xhtml-rdfa-1.dtd
99
+ - lib/xhtml/xhtml-datatypes-1.xsd
100
+ - lib/xhtml/xhtml-pres-1.xsd
101
+ - lib/xhtml/xhtml-charent-1.xsd
102
+ - lib/xhtml/xhtml1-transitional.dtd
89
103
  - lib/xhtml/xhtml-access-1.xsd
90
- - lib/xhtml/xhtml-form-1.xsd
104
+ - lib/xhtml/xhtml-target-1.xsd
105
+ - lib/xhtml/xhtml-param-1.xsd
106
+ - lib/xhtml/xhtml-special.ent
91
107
  - lib/xhtml/xhtml-legacy-1.xsd
92
- - lib/xhtml/xhtml-misc-1.xsd
93
- - lib/xhtml/xhtml-applet-1.xsd
108
+ - lib/xhtml/xhtml-frames-1.xsd
94
109
  - lib/xhtml/xhtml11-model-1.xsd
95
- - lib/xhtml/xhtml-events-1.xsd
96
- - lib/xhtml/xhtml-basic11-model-1.xsd
97
- - lib/xhtml/xhtml-link-1.xsd
98
- - lib/xhtml/xhtml-blkstruct-1.xsd
99
- - lib/xhtml/xhtml-rdfa-modules-1.xsd
100
- - lib/xhtml/xhtml-basic-table-1.xsd
101
- - lib/xhtml/xml-events-copyright-2.xsd
102
- - lib/xhtml/xml-handlers-2.xsd
110
+ - lib/xhtml/xhtml-inlstyle-1.xsd
111
+ - lib/xhtml/xhtml-text-1.xsd
112
+ - lib/xhtml/xhtml1-strict.dtd
113
+ - lib/xhtml/xhtml-list-1.xsd
103
114
  - lib/xhtml/xhtml-framework-1.xsd
115
+ - lib/xhtml/xml-events-copyright-1.xsd
116
+ - lib/xhtml/xhtml-print-1.xsd
117
+ - lib/xhtml/xml-events-2.xsd
118
+ - lib/xhtml/xhtml-applet-1.xsd
119
+ - lib/xhtml/xhtml-style-1.xsd
120
+ - lib/xhtml/xhtml-base-1.xsd
121
+ - lib/xhtml/xhtml-nameident-1.xsd
104
122
  - lib/xhtml/xhtml-iframe-1.xsd
105
- - lib/xhtml/xhtml1-frameset.xsd
106
- - lib/xhtml/xhtml1-strict.dtd
107
123
  - lib/xhtml/xhtml-inlstruct-1.xsd
108
- - lib/xhtml/xhtml-ssismap-1.xsd
124
+ - lib/xhtml/xhtml-notations-1.xsd
125
+ - lib/xhtml/xhtml11-module-redefines-1.xsd
126
+ - lib/xhtml/xhtml-basic11-modules-1.xsd
127
+ - lib/xhtml/xml-events-attribs-2.xsd
128
+ - lib/xhtml/xhtml-rdfa-model-1.xsd
129
+ - lib/xhtml/xhtml-script-1.xsd
130
+ - lib/xhtml/xhtml-hypertext-1.xsd
131
+ - lib/xhtml/xhtml-rdfa-modules-1.xsd
109
132
  - lib/xhtml/xhtml-image-1.xsd
110
- - lib/xhtml/xhtml-target-1.xsd
111
- - lib/xhtml/xhtml-ruby-1.xsd
112
- - lib/xhtml/xhtml1-strict.xsd
113
- - lib/xhtml/xhtml-frames-1.xsd
114
- - lib/xhtml/xhtml1-transitional.dtd
115
- - lib/xhtml/xhtml-meta-1.xsd
116
- - lib/xhtml/xhtml-basic10-module-redefines-1.xsd
117
- - lib/xhtml/xhtml-basic10.xsd
133
+ - lib/xhtml/xml.xsd
134
+ - lib/xhtml/xhtml-ruby-basic-1.xsd
135
+ - lib/xhtml/xhtml-basic11.xsd
118
136
  - lib/xhtml/xml-handlers-1.xsd
119
- - lib/xhtml/xhtml-charent-1.xsd
120
- - lib/xhtml/xhtml-copyright-1.xsd
121
- - lib/xhtml/xhtml-script-1.xsd
122
- - lib/xhtml/xhtml2.xsd
123
- - lib/xhtml/xhtml-basic-form-1.xsd
124
- - lib/xhtml/xhtml-rdfa-1.dtd
125
- - lib/xhtml/xhtml-print-1.xsd
137
+ - lib/xhtml/xml-events-attribs-1.xsd
138
+ - lib/xhtml/xhtml-link-1.xsd
126
139
  - lib/xhtml/xhtml-metaAttributes-1.xsd
127
- - lib/xhtml/xhtml-datatypes-1.xsd
128
- - lib/xhtml/xhtml-param-1.xsd
129
- - lib/xhtml/xhtml-style-1.xsd
130
- - lib/xhtml/xml-script-1.xsd
131
- - lib/xhtml/xml-events-attribs-2.xsd
132
- - lib/xhtml/xhtml-basic11-modules-1.xsd
133
- - lib/xhtml/xframes-1.xsd
140
+ - lib/xhtml/xhtml-basic-table-1.xsd
141
+ - lib/xhtml/xhtml-csismap-1.xsd
142
+ - lib/xhtml/xhtml-basic11-model-1.xsd
143
+ - lib/xhtml/xhtml-copyright-1.xsd
144
+ - lib/xhtml/xhtml-basic10-module-redefines-1.xsd
145
+ - lib/xhtml/xhtml-edit-1.xsd
146
+ - lib/xhtml/xhtml11.xsd
147
+ - lib/xhtml/xhtml-lat1.ent
148
+ - lib/xhtml/xhtml-attribs-1.xsd
149
+ - lib/xhtml/xhtml-ruby-1.xsd
150
+ - lib/validate_website.rb
134
151
  - bin/validate-website
135
152
  has_rdoc: true
136
153
  homepage:
@@ -142,22 +159,28 @@ rdoc_options: []
142
159
  require_paths:
143
160
  - lib
144
161
  required_ruby_version: !ruby/object:Gem::Requirement
162
+ none: false
145
163
  requirements:
146
164
  - - ">="
147
165
  - !ruby/object:Gem::Version
166
+ hash: 3
167
+ segments:
168
+ - 0
148
169
  version: "0"
149
- version:
150
170
  required_rubygems_version: !ruby/object:Gem::Requirement
171
+ none: false
151
172
  requirements:
152
173
  - - ">="
153
174
  - !ruby/object:Gem::Version
175
+ hash: 3
176
+ segments:
177
+ - 0
154
178
  version: "0"
155
- version:
156
179
  requirements:
157
180
  - spk-anemone
158
181
  - rainbow
159
182
  rubyforge_project:
160
- rubygems_version: 1.3.5
183
+ rubygems_version: 1.3.7
161
184
  signing_key:
162
185
  specification_version: 3
163
186
  summary: Web crawler for checking the validity of your documents