blinkr 0.1.0 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +66 -0
- data/lib/blinkr.rb +10 -6
- data/lib/blinkr/config.rb +46 -0
- data/lib/blinkr/engine.rb +142 -0
- data/lib/blinkr/extensions/a_title.rb +17 -0
- data/lib/blinkr/extensions/empty_a_href.rb +19 -0
- data/lib/blinkr/extensions/img_alt.rb +17 -0
- data/lib/blinkr/extensions/inline_css.rb +21 -0
- data/lib/blinkr/extensions/javascript.rb +17 -0
- data/lib/blinkr/extensions/links.rb +53 -0
- data/lib/blinkr/extensions/meta.html.slim +66 -0
- data/lib/blinkr/extensions/meta.rb +84 -0
- data/lib/blinkr/extensions/pipeline.rb +39 -0
- data/lib/blinkr/extensions/resources.rb +20 -0
- data/lib/blinkr/http_utils.rb +49 -0
- data/lib/blinkr/phantomjs_wrapper.rb +65 -0
- data/lib/blinkr/report.html.slim +216 -127
- data/lib/blinkr/report.rb +35 -3
- data/lib/blinkr/sitemap.rb +20 -0
- data/lib/blinkr/snap.js +2 -0
- data/lib/blinkr/typhoeus_wrapper.rb +98 -0
- data/lib/blinkr/version.rb +1 -1
- metadata +18 -3
- data/lib/blinkr/check.rb +0 -231
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8f345eab68efa8b4884096359a7bf64c0286687f
|
4
|
+
data.tar.gz: 2366eee2265b2033197e68341ded32dad7c68195
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 455e68a0cf36690b202beced966708b49aeca94d77ccbd46d01cff174731630175eb6e1e17b7437df9fedb5266ad9f4ba2fe1b20aa7ac7f73f98c24318fc40c0
|
7
|
+
data.tar.gz: 538f12426478bf7cc97208d3c244016ec6176c7d932a399c87555f13e5d69dbe56a065a620b929e0b47d37d050a51cd597724ab49297ca8c4a69ab81fe79ef17
|
data/README.md
CHANGED
@@ -87,6 +87,8 @@ ignore_fragments: true
|
|
87
87
|
# Control the number of threads used to run phantomjs. By default 8.
|
88
88
|
phantomjs_threads: 8
|
89
89
|
|
90
|
+
# Export the report to phantomjs
|
91
|
+
|
90
92
|
````
|
91
93
|
|
92
94
|
You can specify a custom config file on the command link:
|
@@ -114,6 +116,70 @@ mode (this is very verbose, so normally used with `-s`):
|
|
114
116
|
blinkr -c my_blinkr.yaml -s http://www.acme.com/corp -v
|
115
117
|
````
|
116
118
|
|
119
|
+
## Extending Blinkr
|
120
|
+
|
121
|
+
Blinkr is based around a pipeline. Issues with the pages are *collected*,
|
122
|
+
*analysed*, and then passed to the report for *transformation* and rendering.
|
123
|
+
Additional sections may *appended* to the report.
|
124
|
+
|
125
|
+
To add extensions to blinkr, you need to define a custom pipeline. The pipeline
|
126
|
+
is defined in a ruby file (e.g. `blinkr.rb`)
|
127
|
+
|
128
|
+
````
|
129
|
+
require 'acme/spellcheck'
|
130
|
+
|
131
|
+
Blinkr::Extensions::Pipeline.new do |config|
|
132
|
+
# define the default extensions
|
133
|
+
extension Blinkr::Extensions::Links.new config
|
134
|
+
extension Blinkr::Extensions::JavaScript.new config
|
135
|
+
extension Blinkr::Extensions::Resources.new config
|
136
|
+
|
137
|
+
# define custom extensions
|
138
|
+
extension ACME::Extensions::SpellCheck.new config
|
139
|
+
end
|
140
|
+
````
|
141
|
+
|
142
|
+
NOTE: You must add the default extensions to a custom pipeline, for them to be
|
143
|
+
executed.
|
144
|
+
|
145
|
+
The pipeline is defined in `blinkr.yaml`:
|
146
|
+
|
147
|
+
````
|
148
|
+
# Use a custom pipeline
|
149
|
+
pipeline: blinkr.rb
|
150
|
+
````
|
151
|
+
|
152
|
+
An extension is just a standard Ruby class. It should declare an
|
153
|
+
`initialize(config)` method, and may declare one or more of:
|
154
|
+
|
155
|
+
* `collect(page)`
|
156
|
+
* `analyze(context, typhoeus)`
|
157
|
+
* `transform(page, error, default_html)`
|
158
|
+
* `append(context)`
|
159
|
+
|
160
|
+
Each method is called as the pipeline progresses. Arguments passed are:
|
161
|
+
|
162
|
+
* `page` - a object containing the tyhpoeus `response`, the page `body` (as a
|
163
|
+
Nokogiri HTML document), an array of `errors` for the page, any
|
164
|
+
`resource_errors` which ocurred when the page was loaded, and any
|
165
|
+
`javascript_errors` which ocurred when the page was loaded
|
166
|
+
* `context` - a map of `url` => `page`s which are being analysed. After the
|
167
|
+
analyze phase, and before the transform phase, any pages with no errors
|
168
|
+
are removed from the context
|
169
|
+
* `typhoeus` - a wrapper around typhoeus, defining a `process` method and
|
170
|
+
a `process_all` method, both of which take a `url` and a `retry` limit, and
|
171
|
+
accept a block to execute when a response is returned.
|
172
|
+
* `error` - an individual error, consisting of a `type`, a `url`, a `title`, a
|
173
|
+
`code`, a `message`, a `detail`, a `snippet` and an fontawesome `icon` class
|
174
|
+
* `default_html` - the default HTML used to display the error
|
175
|
+
|
176
|
+
`transform` should return the HTML used to display the error. `append` should
|
177
|
+
return any HTML to be appended to the report. A templating language, such as
|
178
|
+
slim or haml may be used to generate the HTML.
|
179
|
+
|
180
|
+
The build extensions, in lib/blinkr/extensions are good examples of how
|
181
|
+
extensions can perform broken link analysis, or collect and format resource
|
182
|
+
loading and javascript execution errors.
|
117
183
|
|
118
184
|
## Contributing
|
119
185
|
|
data/lib/blinkr.rb
CHANGED
@@ -1,21 +1,25 @@
|
|
1
1
|
require 'blinkr/version'
|
2
|
-
require 'blinkr/
|
2
|
+
require 'blinkr/engine'
|
3
3
|
require 'blinkr/report'
|
4
|
+
require 'blinkr/config'
|
5
|
+
require 'blinkr/typhoeus_wrapper'
|
4
6
|
require 'yaml'
|
5
7
|
|
6
8
|
module Blinkr
|
7
9
|
def self.run(base_url, config = 'blinkr.yaml', single, verbose, vverbose)
|
10
|
+
args = { :base_url => base_url, :verbose => verbose, :vverbose => vverbose }
|
8
11
|
if !config.nil? && File.exists?(config)
|
9
|
-
config =
|
12
|
+
config = Blinkr::Config.read config, args
|
10
13
|
else
|
11
|
-
config =
|
14
|
+
config = Blinkr::Config.new args
|
12
15
|
end
|
13
|
-
|
16
|
+
|
14
17
|
if single.nil?
|
15
|
-
Blinkr::
|
18
|
+
Blinkr::Engine.new(config).run
|
16
19
|
else
|
17
|
-
|
20
|
+
Blinkr::TyphoeusWrapper.new(config).debug(single)
|
18
21
|
end
|
19
22
|
end
|
20
23
|
|
21
24
|
end
|
25
|
+
|
@@ -0,0 +1,46 @@
|
|
1
|
+
require 'ostruct'
|
2
|
+
|
3
|
+
module Blinkr
|
4
|
+
class Config < OpenStruct
|
5
|
+
|
6
|
+
def self.read file, args
|
7
|
+
raise "Cannot read #{file}" unless File.exists? file
|
8
|
+
Config.new(YAML.load_file(file).merge(args).merge({ :config_file => file }))
|
9
|
+
end
|
10
|
+
|
11
|
+
DEFAULTS = {:skips => [], :ignores => [], :max_retrys => 3, :browser => 'typhoeus', :viewport => 1200, :phantomjs_threads => 8, :report => 'blinkr.html'}
|
12
|
+
|
13
|
+
def initialize(hash={})
|
14
|
+
super(DEFAULTS.merge(hash))
|
15
|
+
end
|
16
|
+
|
17
|
+
def validate
|
18
|
+
ignores.each {|ignore| raise "An ignore must be a hash" unless ignore.is_a? Hash}
|
19
|
+
raise "Must specify base_url" if base_url.nil?
|
20
|
+
raise "Must specify sitemap" if sitemap.nil?
|
21
|
+
self
|
22
|
+
end
|
23
|
+
|
24
|
+
def sitemap
|
25
|
+
if super.nil?
|
26
|
+
URI.join(base_url, 'sitemap.xml').to_s
|
27
|
+
else
|
28
|
+
super
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
def max_page_retrys
|
33
|
+
@max_page_retrys || @max_retrys
|
34
|
+
end
|
35
|
+
|
36
|
+
def ignored? url, code, message
|
37
|
+
ignores.any? { |ignore| ( !url.nil? && ignore.has_key?('url') ? !ignore['url'].match(url).nil? : true ) && ( !code.nil? && ignore.has_key?('code') ? ignore['code'] == code : true ) && ( !message.nil? && ignore.has_key?('message') ? !ignore['message'].match(message).nil? : true ) }
|
38
|
+
end
|
39
|
+
|
40
|
+
def skipped? url
|
41
|
+
skips.any? { |regex| regex.match(url) }
|
42
|
+
end
|
43
|
+
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
@@ -0,0 +1,142 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
require 'blinkr/phantomjs_wrapper'
|
3
|
+
require 'blinkr/typhoeus_wrapper'
|
4
|
+
require 'blinkr/http_utils'
|
5
|
+
require 'blinkr/sitemap'
|
6
|
+
require 'blinkr/report'
|
7
|
+
require 'blinkr/extensions/links'
|
8
|
+
require 'blinkr/extensions/javascript'
|
9
|
+
require 'blinkr/extensions/resources'
|
10
|
+
require 'blinkr/extensions/pipeline'
|
11
|
+
require 'json'
|
12
|
+
require 'ostruct'
|
13
|
+
|
14
|
+
# Monkeypatch OpenStruct
|
15
|
+
class OpenStruct
|
16
|
+
|
17
|
+
EXCEPT = [:response, :body, :resource_errors, :javascript_errors]
|
18
|
+
|
19
|
+
def to_json(*args)
|
20
|
+
to_h.delete_if{ |k, v| EXCEPT.include?(k) }.to_json(*args)
|
21
|
+
end
|
22
|
+
|
23
|
+
end
|
24
|
+
|
25
|
+
module Blinkr
|
26
|
+
class Engine
|
27
|
+
include HttpUtils
|
28
|
+
include Sitemap
|
29
|
+
|
30
|
+
def initialize config
|
31
|
+
@config = config.validate
|
32
|
+
@extensions = []
|
33
|
+
load_pipeline
|
34
|
+
end
|
35
|
+
|
36
|
+
def run
|
37
|
+
context = OpenStruct.new({:pages => {}})
|
38
|
+
typhoeus, browser = TyphoeusWrapper.new(@config, context)
|
39
|
+
browser = PhantomJSWrapper.new(@config, context) if @config.browser == 'phantomjs'
|
40
|
+
page_count = 0
|
41
|
+
browser.process_all(sitemap_locations, @config.max_page_retrys) do |response, resource_errors, javascript_errors|
|
42
|
+
if response.success?
|
43
|
+
url = response.request.base_url
|
44
|
+
puts "Loaded page #{url}" if @config.verbose
|
45
|
+
body = Nokogiri::HTML(response.body)
|
46
|
+
page = OpenStruct.new({ :response => response, :body => body, :errors => ErrorArray.new(@config), :resource_errors => resource_errors || [], :javascript_errors => javascript_errors || [] })
|
47
|
+
context.pages[url] = page
|
48
|
+
collect page
|
49
|
+
page_count += 1
|
50
|
+
else
|
51
|
+
puts "#{respones.code} #{response.status_message} Unable to load page #{url} #{'(' + response.return_message + ')' unless response.return_message.nil?}"
|
52
|
+
end
|
53
|
+
end
|
54
|
+
typhoeus.hydra.run if @config.browser == 'typhoeus'
|
55
|
+
analyze context, typhoeus
|
56
|
+
puts "Loaded #{page_count} pages using #{browser.name}. Performed #{typhoeus.count} requests using typhoeus."
|
57
|
+
context.pages.reject! { |url, page| page.errors.empty? }
|
58
|
+
unless @config.export.nil?
|
59
|
+
File.open(@config.export, 'w') do |file|
|
60
|
+
file.write(context.to_json)
|
61
|
+
end
|
62
|
+
end
|
63
|
+
Blinkr::Report.new(context, self, @config).render
|
64
|
+
end
|
65
|
+
|
66
|
+
def append context
|
67
|
+
exec :append, context
|
68
|
+
end
|
69
|
+
|
70
|
+
def transform page, error, &block
|
71
|
+
default = yield
|
72
|
+
result = exec(:transform, page, error, default)
|
73
|
+
if result.empty?
|
74
|
+
default
|
75
|
+
else
|
76
|
+
result.join
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
def analyze context, typhoeus
|
81
|
+
exec :analyze, context, typhoeus
|
82
|
+
end
|
83
|
+
|
84
|
+
def collect page
|
85
|
+
exec :collect, page
|
86
|
+
end
|
87
|
+
|
88
|
+
private
|
89
|
+
|
90
|
+
class ErrorArray < Array
|
91
|
+
|
92
|
+
def initialize config
|
93
|
+
@config = config
|
94
|
+
end
|
95
|
+
|
96
|
+
def << error
|
97
|
+
unless @config.ignored?(error.url, error.code, error.message)
|
98
|
+
super
|
99
|
+
else
|
100
|
+
self
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
end
|
105
|
+
|
106
|
+
def extension ext
|
107
|
+
@extensions << ext
|
108
|
+
end
|
109
|
+
|
110
|
+
def default_pipeline
|
111
|
+
extension Blinkr::Extensions::Links.new @config
|
112
|
+
extension Blinkr::Extensions::JavaScript.new @config
|
113
|
+
extension Blinkr::Extensions::Resources.new @config
|
114
|
+
end
|
115
|
+
|
116
|
+
def exec method, *args
|
117
|
+
result = []
|
118
|
+
@extensions.each do |e|
|
119
|
+
result << e.send(method, *args) if e.respond_to? method
|
120
|
+
end
|
121
|
+
result
|
122
|
+
end
|
123
|
+
|
124
|
+
def load_pipeline
|
125
|
+
unless @config.pipeline.nil?
|
126
|
+
pipeline_file = File.join(File.dirname(@config.config_file), @config.pipeline)
|
127
|
+
if File.exists?( pipeline_file )
|
128
|
+
p = eval(File.read( pipeline_file ), nil, pipeline_file, 1).load @config
|
129
|
+
p.extensions.each do |e|
|
130
|
+
extension( e )
|
131
|
+
end
|
132
|
+
else
|
133
|
+
raise "Cannot find pipeline file #{pipeline_file}"
|
134
|
+
end
|
135
|
+
else
|
136
|
+
default_pipeline
|
137
|
+
end
|
138
|
+
end
|
139
|
+
|
140
|
+
end
|
141
|
+
end
|
142
|
+
|
@@ -0,0 +1,17 @@
|
|
1
|
+
module Blinkr
|
2
|
+
module Extensions
|
3
|
+
class ATitle
|
4
|
+
|
5
|
+
def initialize config
|
6
|
+
@config = config
|
7
|
+
end
|
8
|
+
|
9
|
+
def collect page
|
10
|
+
page.body.css('a:not([title])').each do |a|
|
11
|
+
page.errors << OpenStruct.new({ :severity => 'info', :category => 'SEO', :type => '<a title=""> missing', :title => "#{a['href']} (line #{a.line})", :message => '<a title=""> missing', :snippet => a.to_s, :icon => 'fa-info' })
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
module Blinkr
|
2
|
+
module Extensions
|
3
|
+
class EmptyAHref
|
4
|
+
|
5
|
+
def initialize config
|
6
|
+
@config = config
|
7
|
+
end
|
8
|
+
|
9
|
+
def collect page
|
10
|
+
page.body.css('a[href]').each do |a|
|
11
|
+
if a['href'].empty?
|
12
|
+
page.errors << OpenStruct.new({ :severity => 'info', :category => 'HTML Compatibility/Correctness', :type => '<a href=""> empty', :title => %Q{<a href=""> empty (line #{a.line})}, :message => %Q{<a href=""> empty}, :snippet => a.to_s, :icon => 'fa-info' })
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
module Blinkr
|
2
|
+
module Extensions
|
3
|
+
class ImgAlt
|
4
|
+
|
5
|
+
def initialize config
|
6
|
+
@config = config
|
7
|
+
end
|
8
|
+
|
9
|
+
def collect page
|
10
|
+
page.body.css('img:not([alt])').each do |img|
|
11
|
+
page.errors << OpenStruct.new({ :severity => 'warning', :category => 'SEO', :type => '<img alt=""> missing', :title => "#{img['src']} (line #{img.line})", :message => '<img alt=""> missing', :snippet => img.to_s, :icon => 'fa-info' })
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
module Blinkr
|
2
|
+
module Extensions
|
3
|
+
class InlineCss
|
4
|
+
|
5
|
+
def initialize config
|
6
|
+
@config = config
|
7
|
+
end
|
8
|
+
|
9
|
+
def collect page
|
10
|
+
page.body.css('[style]').each do |elm|
|
11
|
+
if elm['style'] == ""
|
12
|
+
page.errors << OpenStruct.new({ :severity => 'info', :category => 'HTML Compatibility/Correctness', :type => 'style attribute is empty', :title => %Q{"#{elm['style']}" (line #{elm.line})}, :message => 'style attribute is empty', :snippet => elm.to_s, :icon => 'fa-info' })
|
13
|
+
else
|
14
|
+
page.errors << OpenStruct.new({ :severity => 'info', :category => 'HTML Compatibility/Correctness', :type => 'Inline CSS detected', :title => %Q{"#{elm['style']}" (line #{elm.line})}, :message => 'inline style', :snippet => elm.to_s, :icon => 'fa-info' })
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
module Blinkr
|
2
|
+
module Extensions
|
3
|
+
class JavaScript
|
4
|
+
|
5
|
+
def initialize config
|
6
|
+
@config = config
|
7
|
+
end
|
8
|
+
|
9
|
+
def collect page
|
10
|
+
page.javascript_errors.each do |error|
|
11
|
+
page.errors << OpenStruct.new({ :severity => 'danger', :category => 'JavaScript', :type => 'JavaScript error', :title => error.msg, :snippet => error.trace, :icon => 'fa-gears' })
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1,53 @@
|
|
1
|
+
require 'blinkr/http_utils'
|
2
|
+
|
3
|
+
module Blinkr
|
4
|
+
module Extensions
|
5
|
+
class Links
|
6
|
+
include HttpUtils
|
7
|
+
|
8
|
+
def initialize config
|
9
|
+
@config = config
|
10
|
+
@links = {}
|
11
|
+
end
|
12
|
+
|
13
|
+
def collect page
|
14
|
+
page.body.css('a[href]').each do |a|
|
15
|
+
attr = a.attribute('href')
|
16
|
+
src = page.response.effective_url
|
17
|
+
url = attr.value
|
18
|
+
url = sanitize url, src
|
19
|
+
unless url.nil? || @config.skipped?(url)
|
20
|
+
@links[url] ||= []
|
21
|
+
@links[url] << {:page => page, :line => attr.line, :snippet => attr.parent.to_s}
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
def analyze context, typhoeus
|
27
|
+
puts "----------------------" if @config.verbose
|
28
|
+
puts " #{@links.length} links to check " if @config.verbose
|
29
|
+
puts "----------------------" if @config.verbose
|
30
|
+
@links.each do |url, metadata|
|
31
|
+
typhoeus.process(url, @config.max_retrys) do |resp|
|
32
|
+
puts "Loaded #{url} via typhoeus #{'(cached)' if resp.cached?}" if @config.verbose
|
33
|
+
unless resp.success? || resp.code == 200
|
34
|
+
metadata.each do |src|
|
35
|
+
code = resp.code.to_i unless resp.code.nil? || resp.code == 0
|
36
|
+
if resp.status_message.nil?
|
37
|
+
message = resp.return_message
|
38
|
+
else
|
39
|
+
message = resp.status_message
|
40
|
+
detail = resp.return_message unless resp.return_message == "No error"
|
41
|
+
end
|
42
|
+
src[:page].errors << OpenStruct.new({ :severity => 'danger', :category => 'Resources missing', :type => '<a href=""> target cannot be loaded', :url => url, :title => "#{url} (line #{src[:line]})", :code => code, :message => message, :detail => detail, :snippet => src[:snippet], :icon => 'fa-bookmark-o' })
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
typhoeus.hydra.run
|
48
|
+
end
|
49
|
+
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|