validate-website 0.2 → 0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (4) hide show
  1. data/Rakefile +3 -3
  2. data/bin/validate-website +27 -50
  3. data/lib/validate_website.rb +55 -0
  4. metadata +109 -86
data/Rakefile CHANGED
@@ -7,7 +7,7 @@ require 'find'
7
7
  # Globals
8
8
 
9
9
  PKG_NAME = 'validate-website'
10
- PKG_VERSION = '0.2'
10
+ PKG_VERSION = '0.3'
11
11
 
12
12
  PKG_FILES = ['README', 'Rakefile']
13
13
  Find.find('lib/', 'bin/') do |f|
@@ -58,8 +58,8 @@ spec = Gem::Specification.new do |s|
58
58
  s.name = PKG_NAME
59
59
  s.version = PKG_VERSION
60
60
  s.requirements << 'spk-anemone' << 'rainbow'
61
- s.add_dependency('spk-anemone', '>= 0.2.4')
62
- s.add_dependency('rainbow', '>= 1.0.4')
61
+ s.add_dependency('spk-anemone', '>= 0.4.0')
62
+ s.add_dependency('rainbow', '>= 1.1')
63
63
  s.require_path = 'lib'
64
64
  s.bindir = 'bin'
65
65
  s.executables << 'validate-website'
data/bin/validate-website CHANGED
@@ -8,67 +8,44 @@ require 'rubygems' if developer_mode
8
8
  require 'validator'
9
9
  require 'anemone'
10
10
  require 'colorful_messages'
11
- require 'optparse'
11
+ require 'validate_website'
12
12
 
13
13
  include ColorfulMessages
14
14
 
15
- # default options
16
- OPTIONS = {
17
- :site => 'http://localhost:3000/',
18
- :useragent => Anemone::Core::DEFAULT_OPTS[:user_agent],
19
- :exclude => nil,
20
- :file => nil,
21
- :auth => nil,
22
- }
15
+ validate_website = ValidateWebsite.new(ARGV)
16
+ options = validate_website.options
23
17
 
24
- ARGV.options do |o|
25
- script_name = File.basename($0)
26
- o.set_summary_indent(' ')
27
- o.banner = "Usage: #{script_name} [OPTIONS]"
28
- o.define_head "#{script_name} - Web crawler for checking the validity of " +
29
- 'your documents'
30
- o.separator ""
18
+ exit_code = 0
31
19
 
32
- o.on("-s", "--site=val", String,
33
- "Default: #{OPTIONS[:site]}") { |v| OPTIONS[:site] = v }
20
+ Anemone.crawl(options[:site],
21
+ :user_agent => options[:useragent],
22
+ :authorization => options[:auth]) do |anemone|
34
23
 
35
- o.on("-u", "--useragent=val", String,
36
- "Default: #{OPTIONS[:useragent]}") { |v| OPTIONS[:useragent] = v }
37
- o.on("-e", "--exclude=val", String,
38
- "Url to exclude") { |v| OPTIONS[:exclude] = v }
39
- o.on("-f", "--file=val", String,
40
- "save not well formed urls") { |v| OPTIONS[:file] = v }
41
- o.on("--auth=[user,pass]", Array,
42
- "Basic http authentification") { |v| OPTIONS[:auth] = v }
43
-
44
- o.separator ""
45
- o.on_tail("-h", "--help", "Show this help message.") { puts o; exit }
46
- o.parse!
47
- end
48
-
49
- if OPTIONS[:file]
50
- file = OPTIONS[:file]
51
- open(file, 'w').write('')
52
- end
53
-
54
- Anemone.crawl(OPTIONS[:site],
55
- :user_agent => OPTIONS[:useragent],
56
- :authorization => OPTIONS[:auth]) do |anemone|
57
-
58
- anemone.skip_links_like Regexp.new(OPTIONS[:exclude]) if OPTIONS[:exclude]
24
+ anemone.skip_links_like Regexp.new(options[:exclude]) if options[:exclude]
59
25
 
60
26
  anemone.on_every_page { |page|
61
- next unless page.html?
62
27
  url = page.url.to_s
63
28
  print info(url)
64
29
 
65
- validator = Validator.new(page)
66
- msg = " well formed? %s" % validator.valid?
67
- if validator.valid?
68
- puts success(msg)
69
- else
70
- puts error(msg)
71
- open(file, 'a').write("#{url}\n") if OPTIONS[:file]
30
+ # validate html/html+xml
31
+ if page.html? && page.fetched?
32
+ validator = Validator.new(page)
33
+ msg = " well formed? %s" % validator.valid?
34
+ if validator.valid?
35
+ puts success(msg)
36
+ else
37
+ exit_code = 1
38
+ puts error(msg)
39
+ validate_website.to_file(url)
40
+ end
41
+ end
42
+
43
+ if options[:not_found] && page.not_found?
44
+ exit_code = 1
45
+ puts error("%s linked in %s but not exist" % [url, page.referer])
46
+ validate_website.to_file(url)
72
47
  end
73
48
  }
74
49
  end
50
+
51
+ exit(exit_code)
@@ -0,0 +1,55 @@
1
+ require 'optparse'
2
+ require 'open-uri'
3
+
4
+ class ValidateWebsite
5
+
6
+ attr_reader :options
7
+
8
+ def initialize(args)
9
+ @options = {
10
+ :site => 'http://localhost:3000/',
11
+ :useragent => Anemone::Core::DEFAULT_OPTS[:user_agent],
12
+ :exclude => nil,
13
+ :file => nil,
14
+ :auth => nil,
15
+ # log not found url (404 status code)
16
+ :not_found => false,
17
+ }
18
+ parse(args)
19
+
20
+ # truncate file
21
+ if options[:file]
22
+ open(options[:file], 'w').write('')
23
+ end
24
+ end
25
+
26
+ def parse(args)
27
+ opts = OptionParser.new do |o|
28
+ o.set_summary_indent(' ')
29
+ o.banner = "Usage: validate-website [OPTIONS]"
30
+ o.define_head "validate-website - Web crawler for checking the validity of your documents"
31
+ o.separator ""
32
+
33
+ o.on("-s", "--site=val", String,
34
+ "Default: #{@options[:site]}") { |v| @options[:site] = v }
35
+
36
+ o.on("-u", "--useragent=val", String,
37
+ "Default: #{@options[:useragent]}") { |v| @options[:useragent] = v }
38
+ o.on("-e", "--exclude=val", String,
39
+ "Url to exclude") { |v| @options[:exclude] = v }
40
+ o.on("-f", "--file=val", String,
41
+ "save not well formed urls") { |v| @options[:file] = v }
42
+ o.on("--auth=[user,pass]", Array,
43
+ "Basic http authentification") { |v| @options[:auth] = v }
44
+ o.on("-n", "--not-found", "Log not found url") { |v| @options[:not_found] = v }
45
+
46
+ o.separator ""
47
+ o.on_tail("-h", "--help", "Show this help message.") { puts o; exit }
48
+ end
49
+ opts.parse!(args)
50
+ end
51
+
52
+ def to_file(msg)
53
+ open(options[:file], 'a').write("#{msg}\n") if options[:file]
54
+ end
55
+ end
metadata CHANGED
@@ -1,7 +1,12 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: validate-website
3
3
  version: !ruby/object:Gem::Version
4
- version: "0.2"
4
+ hash: 13
5
+ prerelease: false
6
+ segments:
7
+ - 0
8
+ - 3
9
+ version: "0.3"
5
10
  platform: ruby
6
11
  authors:
7
12
  - spk
@@ -9,29 +14,40 @@ autorequire:
9
14
  bindir: bin
10
15
  cert_chain: []
11
16
 
12
- date: 2009-12-02 00:00:00 +01:00
17
+ date: 2010-08-18 00:00:00 +02:00
13
18
  default_executable:
14
19
  dependencies:
15
20
  - !ruby/object:Gem::Dependency
16
21
  name: spk-anemone
17
- type: :runtime
18
- version_requirement:
19
- version_requirements: !ruby/object:Gem::Requirement
22
+ prerelease: false
23
+ requirement: &id001 !ruby/object:Gem::Requirement
24
+ none: false
20
25
  requirements:
21
26
  - - ">="
22
27
  - !ruby/object:Gem::Version
23
- version: 0.2.4
24
- version:
28
+ hash: 15
29
+ segments:
30
+ - 0
31
+ - 4
32
+ - 0
33
+ version: 0.4.0
34
+ type: :runtime
35
+ version_requirements: *id001
25
36
  - !ruby/object:Gem::Dependency
26
37
  name: rainbow
27
- type: :runtime
28
- version_requirement:
29
- version_requirements: !ruby/object:Gem::Requirement
38
+ prerelease: false
39
+ requirement: &id002 !ruby/object:Gem::Requirement
40
+ none: false
30
41
  requirements:
31
42
  - - ">="
32
43
  - !ruby/object:Gem::Version
33
- version: 1.0.4
34
- version:
44
+ hash: 13
45
+ segments:
46
+ - 1
47
+ - 1
48
+ version: "1.1"
49
+ type: :runtime
50
+ version_requirements: *id002
35
51
  description: Web crawler for checking the validity of your documents
36
52
  email: spk@tuxfamily.org
37
53
  executables:
@@ -43,94 +59,95 @@ extra_rdoc_files: []
43
59
  files:
44
60
  - README
45
61
  - Rakefile
46
- - lib/colorful_messages.rb
47
62
  - lib/validator.rb
48
- - lib/xhtml/xml-events-2.xsd
63
+ - lib/colorful_messages.rb
64
+ - lib/xhtml/xhtml-blkphras-1.xsd
65
+ - lib/xhtml/xhtml1-transitional.xsd
66
+ - lib/xhtml/xhtml1-strict.xsd
67
+ - lib/xhtml/xhtml-blkstruct-1.xsd
49
68
  - lib/xhtml/xhtml11-modules-1.xsd
50
- - lib/xhtml/xml-events-attribs-1.xsd
51
- - lib/xhtml/xhtml-pres-1.xsd
52
- - lib/xhtml/xml.xsd
53
- - lib/xhtml/xhtml-rdfa-1.xsd
54
- - lib/xhtml/xhtml-base-1.xsd
55
- - lib/xhtml/xhtml-inlphras-1.xsd
56
- - lib/xhtml/xhtml-basic11.xsd
57
- - lib/xhtml/xhtml-attribs-1.xsd
58
- - lib/xhtml/xhtml-table-1.xsd
59
- - lib/xhtml/xhtml11-module-redefines-1.xsd
60
- - lib/xhtml/xml-events-1.xsd
61
- - lib/xhtml/xhtml-inlstyle-1.xsd
62
- - lib/xhtml/xhtml-special.ent
63
- - lib/xhtml/xhtml-list-1.xsd
64
- - lib/xhtml/xml-events-copyright-1.xsd
65
- - lib/xhtml/xhtml-ruby-basic-1.xsd
66
- - lib/xhtml/xhtml-text-1.xsd
67
- - lib/xhtml/xhtml-lat1.ent
68
- - lib/xhtml/xhtml-object-1.xsd
69
69
  - lib/xhtml/xhtml-blkpres-1.xsd
70
- - lib/xhtml/xhtml-csismap-1.xsd
71
- - lib/xhtml/xhtml1-frameset.dtd
70
+ - lib/xhtml/xhtml-inlpres-1.xsd
71
+ - lib/xhtml/xhtml-inlphras-1.xsd
72
+ - lib/xhtml/xhtml-misc-1.xsd
72
73
  - lib/xhtml/xhtml-symbol.ent
73
- - lib/xhtml/xhtml-basic10-modules-1.xsd
74
+ - lib/xhtml/xhtml1-frameset.xsd
75
+ - lib/xhtml/xhtml-meta-1.xsd
76
+ - lib/xhtml/xhtml1-frameset.dtd
77
+ - lib/xhtml/xml-events-copyright-2.xsd
78
+ - lib/xhtml/xml-handlers-2.xsd
79
+ - lib/xhtml/xhtml-object-1.xsd
80
+ - lib/xhtml/xhtml-bdo-1.xsd
81
+ - lib/xhtml/xml-events-1.xsd
74
82
  - lib/xhtml/xhtml-basic10-model-1.xsd
75
- - lib/xhtml/xhtml1-transitional.xsd
76
- - lib/xhtml/xhtml-edit-1.xsd
83
+ - lib/xhtml/xhtml-table-1.xsd
84
+ - lib/xhtml/xhtml2.xsd
85
+ - lib/xhtml/xhtml-print-model-1.xsd
86
+ - lib/xhtml/xhtml-rdfa-1.xsd
87
+ - lib/xhtml/xhtml-basic-form-1.xsd
77
88
  - lib/xhtml/xhtml-inputmode-1.xsd
89
+ - lib/xhtml/xhtml-ssismap-1.xsd
90
+ - lib/xhtml/xhtml-basic10-modules-1.xsd
91
+ - lib/xhtml/xhtml-events-1.xsd
92
+ - lib/xhtml/xhtml-form-1.xsd
78
93
  - lib/xhtml/xhtml-struct-1.xsd
79
- - lib/xhtml/xhtml-nameident-1.xsd
80
- - lib/xhtml/xhtml-blkphras-1.xsd
81
- - lib/xhtml/xhtml11.xsd
82
- - lib/xhtml/xhtml-bdo-1.xsd
83
- - lib/xhtml/xhtml-notations-1.xsd
84
- - lib/xhtml/xhtml-rdfa-model-1.xsd
85
- - lib/xhtml/xhtml-inlpres-1.xsd
86
- - lib/xhtml/xhtml-hypertext-1.xsd
87
- - lib/xhtml/xhtml-print-model-1.xsd
94
+ - lib/xhtml/xml-script-1.xsd
88
95
  - lib/xhtml/xhtml-print-modules-1.xsd
96
+ - lib/xhtml/xhtml-basic10.xsd
97
+ - lib/xhtml/xframes-1.xsd
98
+ - lib/xhtml/xhtml-rdfa-1.dtd
99
+ - lib/xhtml/xhtml-datatypes-1.xsd
100
+ - lib/xhtml/xhtml-pres-1.xsd
101
+ - lib/xhtml/xhtml-charent-1.xsd
102
+ - lib/xhtml/xhtml1-transitional.dtd
89
103
  - lib/xhtml/xhtml-access-1.xsd
90
- - lib/xhtml/xhtml-form-1.xsd
104
+ - lib/xhtml/xhtml-target-1.xsd
105
+ - lib/xhtml/xhtml-param-1.xsd
106
+ - lib/xhtml/xhtml-special.ent
91
107
  - lib/xhtml/xhtml-legacy-1.xsd
92
- - lib/xhtml/xhtml-misc-1.xsd
93
- - lib/xhtml/xhtml-applet-1.xsd
108
+ - lib/xhtml/xhtml-frames-1.xsd
94
109
  - lib/xhtml/xhtml11-model-1.xsd
95
- - lib/xhtml/xhtml-events-1.xsd
96
- - lib/xhtml/xhtml-basic11-model-1.xsd
97
- - lib/xhtml/xhtml-link-1.xsd
98
- - lib/xhtml/xhtml-blkstruct-1.xsd
99
- - lib/xhtml/xhtml-rdfa-modules-1.xsd
100
- - lib/xhtml/xhtml-basic-table-1.xsd
101
- - lib/xhtml/xml-events-copyright-2.xsd
102
- - lib/xhtml/xml-handlers-2.xsd
110
+ - lib/xhtml/xhtml-inlstyle-1.xsd
111
+ - lib/xhtml/xhtml-text-1.xsd
112
+ - lib/xhtml/xhtml1-strict.dtd
113
+ - lib/xhtml/xhtml-list-1.xsd
103
114
  - lib/xhtml/xhtml-framework-1.xsd
115
+ - lib/xhtml/xml-events-copyright-1.xsd
116
+ - lib/xhtml/xhtml-print-1.xsd
117
+ - lib/xhtml/xml-events-2.xsd
118
+ - lib/xhtml/xhtml-applet-1.xsd
119
+ - lib/xhtml/xhtml-style-1.xsd
120
+ - lib/xhtml/xhtml-base-1.xsd
121
+ - lib/xhtml/xhtml-nameident-1.xsd
104
122
  - lib/xhtml/xhtml-iframe-1.xsd
105
- - lib/xhtml/xhtml1-frameset.xsd
106
- - lib/xhtml/xhtml1-strict.dtd
107
123
  - lib/xhtml/xhtml-inlstruct-1.xsd
108
- - lib/xhtml/xhtml-ssismap-1.xsd
124
+ - lib/xhtml/xhtml-notations-1.xsd
125
+ - lib/xhtml/xhtml11-module-redefines-1.xsd
126
+ - lib/xhtml/xhtml-basic11-modules-1.xsd
127
+ - lib/xhtml/xml-events-attribs-2.xsd
128
+ - lib/xhtml/xhtml-rdfa-model-1.xsd
129
+ - lib/xhtml/xhtml-script-1.xsd
130
+ - lib/xhtml/xhtml-hypertext-1.xsd
131
+ - lib/xhtml/xhtml-rdfa-modules-1.xsd
109
132
  - lib/xhtml/xhtml-image-1.xsd
110
- - lib/xhtml/xhtml-target-1.xsd
111
- - lib/xhtml/xhtml-ruby-1.xsd
112
- - lib/xhtml/xhtml1-strict.xsd
113
- - lib/xhtml/xhtml-frames-1.xsd
114
- - lib/xhtml/xhtml1-transitional.dtd
115
- - lib/xhtml/xhtml-meta-1.xsd
116
- - lib/xhtml/xhtml-basic10-module-redefines-1.xsd
117
- - lib/xhtml/xhtml-basic10.xsd
133
+ - lib/xhtml/xml.xsd
134
+ - lib/xhtml/xhtml-ruby-basic-1.xsd
135
+ - lib/xhtml/xhtml-basic11.xsd
118
136
  - lib/xhtml/xml-handlers-1.xsd
119
- - lib/xhtml/xhtml-charent-1.xsd
120
- - lib/xhtml/xhtml-copyright-1.xsd
121
- - lib/xhtml/xhtml-script-1.xsd
122
- - lib/xhtml/xhtml2.xsd
123
- - lib/xhtml/xhtml-basic-form-1.xsd
124
- - lib/xhtml/xhtml-rdfa-1.dtd
125
- - lib/xhtml/xhtml-print-1.xsd
137
+ - lib/xhtml/xml-events-attribs-1.xsd
138
+ - lib/xhtml/xhtml-link-1.xsd
126
139
  - lib/xhtml/xhtml-metaAttributes-1.xsd
127
- - lib/xhtml/xhtml-datatypes-1.xsd
128
- - lib/xhtml/xhtml-param-1.xsd
129
- - lib/xhtml/xhtml-style-1.xsd
130
- - lib/xhtml/xml-script-1.xsd
131
- - lib/xhtml/xml-events-attribs-2.xsd
132
- - lib/xhtml/xhtml-basic11-modules-1.xsd
133
- - lib/xhtml/xframes-1.xsd
140
+ - lib/xhtml/xhtml-basic-table-1.xsd
141
+ - lib/xhtml/xhtml-csismap-1.xsd
142
+ - lib/xhtml/xhtml-basic11-model-1.xsd
143
+ - lib/xhtml/xhtml-copyright-1.xsd
144
+ - lib/xhtml/xhtml-basic10-module-redefines-1.xsd
145
+ - lib/xhtml/xhtml-edit-1.xsd
146
+ - lib/xhtml/xhtml11.xsd
147
+ - lib/xhtml/xhtml-lat1.ent
148
+ - lib/xhtml/xhtml-attribs-1.xsd
149
+ - lib/xhtml/xhtml-ruby-1.xsd
150
+ - lib/validate_website.rb
134
151
  - bin/validate-website
135
152
  has_rdoc: true
136
153
  homepage:
@@ -142,22 +159,28 @@ rdoc_options: []
142
159
  require_paths:
143
160
  - lib
144
161
  required_ruby_version: !ruby/object:Gem::Requirement
162
+ none: false
145
163
  requirements:
146
164
  - - ">="
147
165
  - !ruby/object:Gem::Version
166
+ hash: 3
167
+ segments:
168
+ - 0
148
169
  version: "0"
149
- version:
150
170
  required_rubygems_version: !ruby/object:Gem::Requirement
171
+ none: false
151
172
  requirements:
152
173
  - - ">="
153
174
  - !ruby/object:Gem::Version
175
+ hash: 3
176
+ segments:
177
+ - 0
154
178
  version: "0"
155
- version:
156
179
  requirements:
157
180
  - spk-anemone
158
181
  - rainbow
159
182
  rubyforge_project:
160
- rubygems_version: 1.3.5
183
+ rubygems_version: 1.3.7
161
184
  signing_key:
162
185
  specification_version: 3
163
186
  summary: Web crawler for checking the validity of your documents