w3scraper 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bin/w3scraper +6 -0
- data/lib/w3scraper.rb +80 -0
- metadata +48 -0
data/bin/w3scraper
ADDED
data/lib/w3scraper.rb
ADDED
@@ -0,0 +1,80 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'nokogiri'
|
4
|
+
require 'date'
|
5
|
+
require 'optparse'
|
6
|
+
|
7
|
+
|
8
|
+
class W3scraper
|
9
|
+
VERSION = '0.0.1'
|
10
|
+
|
11
|
+
def initialize(arguments, stdin)
|
12
|
+
@arguments = arguments
|
13
|
+
@stdin = stdin
|
14
|
+
@options = {}
|
15
|
+
parse_options
|
16
|
+
end
|
17
|
+
|
18
|
+
def run
|
19
|
+
errors = parse_errors(w3_parser_output)
|
20
|
+
print_errors(errors)
|
21
|
+
end
|
22
|
+
|
23
|
+
protected
|
24
|
+
|
25
|
+
def w3_parser_output
|
26
|
+
@w3_parser_output ||= `curl -s -F output=text -F "uploaded_file=@#{@target_file};type=text/html" http://validator.w3.org/check`
|
27
|
+
end
|
28
|
+
|
29
|
+
def print_errors(errors)
|
30
|
+
errors.each do |result|
|
31
|
+
puts "Line #{result[:line]} Col #{result[:col]}: #{result[:msg]}"
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
def parse_errors(output)
|
36
|
+
doc = Nokogiri::HTML(output)
|
37
|
+
ret_val = []
|
38
|
+
|
39
|
+
doc.css("#error_loop .msg_err").each do |e|
|
40
|
+
next_result = {}
|
41
|
+
|
42
|
+
#extract line/column numbers
|
43
|
+
pos_match = e.css("em").first.to_s.match(/Line (\d*).*Column (\d*)/m)
|
44
|
+
next_result[:line] = pos_match[1]
|
45
|
+
next_result[:col] = pos_match[2]
|
46
|
+
|
47
|
+
#extract error message
|
48
|
+
next_result[:msg] = e.css(".msg").first.content
|
49
|
+
|
50
|
+
ret_val << next_result
|
51
|
+
end
|
52
|
+
|
53
|
+
ret_val
|
54
|
+
end
|
55
|
+
|
56
|
+
|
57
|
+
def parse_options
|
58
|
+
opts = OptionParser.new
|
59
|
+
opts.banner = "Usage: #{File.basename(__FILE__)} [options] [file to validate]"
|
60
|
+
|
61
|
+
opts.on('-v', '--version') do
|
62
|
+
puts "version: #{VERSION}"
|
63
|
+
exit 0
|
64
|
+
end
|
65
|
+
|
66
|
+
opts.on('-h', '--help') do
|
67
|
+
puts opts
|
68
|
+
exit 0
|
69
|
+
end
|
70
|
+
|
71
|
+
opts.parse!(@arguments)
|
72
|
+
|
73
|
+
if @arguments.size != 1
|
74
|
+
puts opts
|
75
|
+
exit 1
|
76
|
+
end
|
77
|
+
|
78
|
+
@target_file = @arguments.first
|
79
|
+
end
|
80
|
+
end
|
metadata
ADDED
@@ -0,0 +1,48 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: w3scraper
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Martin Grenfell
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2012-05-02 00:00:00.000000000 Z
|
13
|
+
dependencies: []
|
14
|
+
description: Sends a file to http://validator.w3.org and parses/outputs any errors.
|
15
|
+
email: martin.grenfell@gmail.com
|
16
|
+
executables:
|
17
|
+
- w3scraper
|
18
|
+
extensions: []
|
19
|
+
extra_rdoc_files: []
|
20
|
+
files:
|
21
|
+
- lib/w3scraper.rb
|
22
|
+
- bin/w3scraper
|
23
|
+
homepage: http://rubygems.org/gems/w3scraper
|
24
|
+
licenses: []
|
25
|
+
post_install_message:
|
26
|
+
rdoc_options: []
|
27
|
+
require_paths:
|
28
|
+
- lib
|
29
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
30
|
+
none: false
|
31
|
+
requirements:
|
32
|
+
- - ! '>='
|
33
|
+
- !ruby/object:Gem::Version
|
34
|
+
version: '0'
|
35
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
36
|
+
none: false
|
37
|
+
requirements:
|
38
|
+
- - ! '>='
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
requirements: []
|
42
|
+
rubyforge_project:
|
43
|
+
rubygems_version: 1.8.10
|
44
|
+
signing_key:
|
45
|
+
specification_version: 3
|
46
|
+
summary: Command line interface to http://validator.w3.org
|
47
|
+
test_files: []
|
48
|
+
has_rdoc:
|