w3scraper 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. data/bin/w3scraper +6 -0
  2. data/lib/w3scraper.rb +80 -0
  3. metadata +48 -0
data/bin/w3scraper ADDED
@@ -0,0 +1,6 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ require 'w3scraper'
4
+
5
+ app = W3scraper.new(ARGV, STDIN)
6
+ app.run
data/lib/w3scraper.rb ADDED
@@ -0,0 +1,80 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'nokogiri'
4
+ require 'date'
5
+ require 'optparse'
6
+
7
+
8
+ class W3scraper
9
+ VERSION = '0.0.1'
10
+
11
+ def initialize(arguments, stdin)
12
+ @arguments = arguments
13
+ @stdin = stdin
14
+ @options = {}
15
+ parse_options
16
+ end
17
+
18
+ def run
19
+ errors = parse_errors(w3_parser_output)
20
+ print_errors(errors)
21
+ end
22
+
23
+ protected
24
+
25
+ def w3_parser_output
26
+ @w3_parser_output ||= `curl -s -F output=text -F "uploaded_file=@#{@target_file};type=text/html" http://validator.w3.org/check`
27
+ end
28
+
29
+ def print_errors(errors)
30
+ errors.each do |result|
31
+ puts "Line #{result[:line]} Col #{result[:col]}: #{result[:msg]}"
32
+ end
33
+ end
34
+
35
+ def parse_errors(output)
36
+ doc = Nokogiri::HTML(output)
37
+ ret_val = []
38
+
39
+ doc.css("#error_loop .msg_err").each do |e|
40
+ next_result = {}
41
+
42
+ #extract line/column numbers
43
+ pos_match = e.css("em").first.to_s.match(/Line (\d*).*Column (\d*)/m)
44
+ next_result[:line] = pos_match[1]
45
+ next_result[:col] = pos_match[2]
46
+
47
+ #extract error message
48
+ next_result[:msg] = e.css(".msg").first.content
49
+
50
+ ret_val << next_result
51
+ end
52
+
53
+ ret_val
54
+ end
55
+
56
+
57
+ def parse_options
58
+ opts = OptionParser.new
59
+ opts.banner = "Usage: #{File.basename(__FILE__)} [options] [file to validate]"
60
+
61
+ opts.on('-v', '--version') do
62
+ puts "version: #{VERSION}"
63
+ exit 0
64
+ end
65
+
66
+ opts.on('-h', '--help') do
67
+ puts opts
68
+ exit 0
69
+ end
70
+
71
+ opts.parse!(@arguments)
72
+
73
+ if @arguments.size != 1
74
+ puts opts
75
+ exit 1
76
+ end
77
+
78
+ @target_file = @arguments.first
79
+ end
80
+ end
metadata ADDED
@@ -0,0 +1,48 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: w3scraper
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Martin Grenfell
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-05-02 00:00:00.000000000 Z
13
+ dependencies: []
14
+ description: Sends a file to http://validator.w3.org and parses/outputs any errors.
15
+ email: martin.grenfell@gmail.com
16
+ executables:
17
+ - w3scraper
18
+ extensions: []
19
+ extra_rdoc_files: []
20
+ files:
21
+ - lib/w3scraper.rb
22
+ - bin/w3scraper
23
+ homepage: http://rubygems.org/gems/w3scraper
24
+ licenses: []
25
+ post_install_message:
26
+ rdoc_options: []
27
+ require_paths:
28
+ - lib
29
+ required_ruby_version: !ruby/object:Gem::Requirement
30
+ none: false
31
+ requirements:
32
+ - - ! '>='
33
+ - !ruby/object:Gem::Version
34
+ version: '0'
35
+ required_rubygems_version: !ruby/object:Gem::Requirement
36
+ none: false
37
+ requirements:
38
+ - - ! '>='
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ requirements: []
42
+ rubyforge_project:
43
+ rubygems_version: 1.8.10
44
+ signing_key:
45
+ specification_version: 3
46
+ summary: Command line interface to http://validator.w3.org
47
+ test_files: []
48
+ has_rdoc: