w3scraper 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (3) hide show
  1. data/bin/w3scraper +6 -0
  2. data/lib/w3scraper.rb +80 -0
  3. metadata +48 -0
data/bin/w3scraper ADDED
@@ -0,0 +1,6 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ require 'w3scraper'
4
+
5
+ app = W3scraper.new(ARGV, STDIN)
6
+ app.run
data/lib/w3scraper.rb ADDED
@@ -0,0 +1,80 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'nokogiri'
4
+ require 'date'
5
+ require 'optparse'
6
+
7
+
8
+ class W3scraper
9
+ VERSION = '0.0.1'
10
+
11
+ def initialize(arguments, stdin)
12
+ @arguments = arguments
13
+ @stdin = stdin
14
+ @options = {}
15
+ parse_options
16
+ end
17
+
18
+ def run
19
+ errors = parse_errors(w3_parser_output)
20
+ print_errors(errors)
21
+ end
22
+
23
+ protected
24
+
25
+ def w3_parser_output
26
+ @w3_parser_output ||= `curl -s -F output=text -F "uploaded_file=@#{@target_file};type=text/html" http://validator.w3.org/check`
27
+ end
28
+
29
+ def print_errors(errors)
30
+ errors.each do |result|
31
+ puts "Line #{result[:line]} Col #{result[:col]}: #{result[:msg]}"
32
+ end
33
+ end
34
+
35
+ def parse_errors(output)
36
+ doc = Nokogiri::HTML(output)
37
+ ret_val = []
38
+
39
+ doc.css("#error_loop .msg_err").each do |e|
40
+ next_result = {}
41
+
42
+ #extract line/column numbers
43
+ pos_match = e.css("em").first.to_s.match(/Line (\d*).*Column (\d*)/m)
44
+ next_result[:line] = pos_match[1]
45
+ next_result[:col] = pos_match[2]
46
+
47
+ #extract error message
48
+ next_result[:msg] = e.css(".msg").first.content
49
+
50
+ ret_val << next_result
51
+ end
52
+
53
+ ret_val
54
+ end
55
+
56
+
57
+ def parse_options
58
+ opts = OptionParser.new
59
+ opts.banner = "Usage: #{File.basename(__FILE__)} [options] [file to validate]"
60
+
61
+ opts.on('-v', '--version') do
62
+ puts "version: #{VERSION}"
63
+ exit 0
64
+ end
65
+
66
+ opts.on('-h', '--help') do
67
+ puts opts
68
+ exit 0
69
+ end
70
+
71
+ opts.parse!(@arguments)
72
+
73
+ if @arguments.size != 1
74
+ puts opts
75
+ exit 1
76
+ end
77
+
78
+ @target_file = @arguments.first
79
+ end
80
+ end
metadata ADDED
@@ -0,0 +1,48 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: w3scraper
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Martin Grenfell
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-05-02 00:00:00.000000000 Z
13
+ dependencies: []
14
+ description: Sends a file to http://validator.w3.org and parses/outputs any errors.
15
+ email: martin.grenfell@gmail.com
16
+ executables:
17
+ - w3scraper
18
+ extensions: []
19
+ extra_rdoc_files: []
20
+ files:
21
+ - lib/w3scraper.rb
22
+ - bin/w3scraper
23
+ homepage: http://rubygems.org/gems/w3scraper
24
+ licenses: []
25
+ post_install_message:
26
+ rdoc_options: []
27
+ require_paths:
28
+ - lib
29
+ required_ruby_version: !ruby/object:Gem::Requirement
30
+ none: false
31
+ requirements:
32
+ - - ! '>='
33
+ - !ruby/object:Gem::Version
34
+ version: '0'
35
+ required_rubygems_version: !ruby/object:Gem::Requirement
36
+ none: false
37
+ requirements:
38
+ - - ! '>='
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ requirements: []
42
+ rubyforge_project:
43
+ rubygems_version: 1.8.10
44
+ signing_key:
45
+ specification_version: 3
46
+ summary: Command line interface to http://validator.w3.org
47
+ test_files: []
48
+ has_rdoc: