w3scraper 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/bin/w3scraper +6 -0
- data/lib/w3scraper.rb +80 -0
- metadata +48 -0
data/bin/w3scraper
ADDED
data/lib/w3scraper.rb
ADDED
@@ -0,0 +1,80 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'nokogiri'
|
4
|
+
require 'date'
|
5
|
+
require 'optparse'
|
6
|
+
|
7
|
+
|
8
|
+
class W3scraper
|
9
|
+
VERSION = '0.0.1'
|
10
|
+
|
11
|
+
def initialize(arguments, stdin)
|
12
|
+
@arguments = arguments
|
13
|
+
@stdin = stdin
|
14
|
+
@options = {}
|
15
|
+
parse_options
|
16
|
+
end
|
17
|
+
|
18
|
+
def run
|
19
|
+
errors = parse_errors(w3_parser_output)
|
20
|
+
print_errors(errors)
|
21
|
+
end
|
22
|
+
|
23
|
+
protected
|
24
|
+
|
25
|
+
def w3_parser_output
|
26
|
+
@w3_parser_output ||= `curl -s -F output=text -F "uploaded_file=@#{@target_file};type=text/html" http://validator.w3.org/check`
|
27
|
+
end
|
28
|
+
|
29
|
+
def print_errors(errors)
|
30
|
+
errors.each do |result|
|
31
|
+
puts "Line #{result[:line]} Col #{result[:col]}: #{result[:msg]}"
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
def parse_errors(output)
|
36
|
+
doc = Nokogiri::HTML(output)
|
37
|
+
ret_val = []
|
38
|
+
|
39
|
+
doc.css("#error_loop .msg_err").each do |e|
|
40
|
+
next_result = {}
|
41
|
+
|
42
|
+
#extract line/column numbers
|
43
|
+
pos_match = e.css("em").first.to_s.match(/Line (\d*).*Column (\d*)/m)
|
44
|
+
next_result[:line] = pos_match[1]
|
45
|
+
next_result[:col] = pos_match[2]
|
46
|
+
|
47
|
+
#extract error message
|
48
|
+
next_result[:msg] = e.css(".msg").first.content
|
49
|
+
|
50
|
+
ret_val << next_result
|
51
|
+
end
|
52
|
+
|
53
|
+
ret_val
|
54
|
+
end
|
55
|
+
|
56
|
+
|
57
|
+
def parse_options
|
58
|
+
opts = OptionParser.new
|
59
|
+
opts.banner = "Usage: #{File.basename(__FILE__)} [options] [file to validate]"
|
60
|
+
|
61
|
+
opts.on('-v', '--version') do
|
62
|
+
puts "version: #{VERSION}"
|
63
|
+
exit 0
|
64
|
+
end
|
65
|
+
|
66
|
+
opts.on('-h', '--help') do
|
67
|
+
puts opts
|
68
|
+
exit 0
|
69
|
+
end
|
70
|
+
|
71
|
+
opts.parse!(@arguments)
|
72
|
+
|
73
|
+
if @arguments.size != 1
|
74
|
+
puts opts
|
75
|
+
exit 1
|
76
|
+
end
|
77
|
+
|
78
|
+
@target_file = @arguments.first
|
79
|
+
end
|
80
|
+
end
|
metadata
ADDED
@@ -0,0 +1,48 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: w3scraper
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Martin Grenfell
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2012-05-02 00:00:00.000000000 Z
|
13
|
+
dependencies: []
|
14
|
+
description: Sends a file to http://validator.w3.org and parses/outputs any errors.
|
15
|
+
email: martin.grenfell@gmail.com
|
16
|
+
executables:
|
17
|
+
- w3scraper
|
18
|
+
extensions: []
|
19
|
+
extra_rdoc_files: []
|
20
|
+
files:
|
21
|
+
- lib/w3scraper.rb
|
22
|
+
- bin/w3scraper
|
23
|
+
homepage: http://rubygems.org/gems/w3scraper
|
24
|
+
licenses: []
|
25
|
+
post_install_message:
|
26
|
+
rdoc_options: []
|
27
|
+
require_paths:
|
28
|
+
- lib
|
29
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
30
|
+
none: false
|
31
|
+
requirements:
|
32
|
+
- - ! '>='
|
33
|
+
- !ruby/object:Gem::Version
|
34
|
+
version: '0'
|
35
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
36
|
+
none: false
|
37
|
+
requirements:
|
38
|
+
- - ! '>='
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
requirements: []
|
42
|
+
rubyforge_project:
|
43
|
+
rubygems_version: 1.8.10
|
44
|
+
signing_key:
|
45
|
+
specification_version: 3
|
46
|
+
summary: Command line interface to http://validator.w3.org
|
47
|
+
test_files: []
|
48
|
+
has_rdoc:
|