blinkr 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +66 -0
- data/lib/blinkr.rb +10 -6
- data/lib/blinkr/config.rb +46 -0
- data/lib/blinkr/engine.rb +142 -0
- data/lib/blinkr/extensions/a_title.rb +17 -0
- data/lib/blinkr/extensions/empty_a_href.rb +19 -0
- data/lib/blinkr/extensions/img_alt.rb +17 -0
- data/lib/blinkr/extensions/inline_css.rb +21 -0
- data/lib/blinkr/extensions/javascript.rb +17 -0
- data/lib/blinkr/extensions/links.rb +53 -0
- data/lib/blinkr/extensions/meta.html.slim +66 -0
- data/lib/blinkr/extensions/meta.rb +84 -0
- data/lib/blinkr/extensions/pipeline.rb +39 -0
- data/lib/blinkr/extensions/resources.rb +20 -0
- data/lib/blinkr/http_utils.rb +49 -0
- data/lib/blinkr/phantomjs_wrapper.rb +65 -0
- data/lib/blinkr/report.html.slim +216 -127
- data/lib/blinkr/report.rb +35 -3
- data/lib/blinkr/sitemap.rb +20 -0
- data/lib/blinkr/snap.js +2 -0
- data/lib/blinkr/typhoeus_wrapper.rb +98 -0
- data/lib/blinkr/version.rb +1 -1
- metadata +18 -3
- data/lib/blinkr/check.rb +0 -231
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8f345eab68efa8b4884096359a7bf64c0286687f
|
4
|
+
data.tar.gz: 2366eee2265b2033197e68341ded32dad7c68195
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 455e68a0cf36690b202beced966708b49aeca94d77ccbd46d01cff174731630175eb6e1e17b7437df9fedb5266ad9f4ba2fe1b20aa7ac7f73f98c24318fc40c0
|
7
|
+
data.tar.gz: 538f12426478bf7cc97208d3c244016ec6176c7d932a399c87555f13e5d69dbe56a065a620b929e0b47d37d050a51cd597724ab49297ca8c4a69ab81fe79ef17
|
data/README.md
CHANGED
@@ -87,6 +87,8 @@ ignore_fragments: true
|
|
87
87
|
# Control the number of threads used to run phantomjs. By default 8.
|
88
88
|
phantomjs_threads: 8
|
89
89
|
|
90
|
+
# Export the report to phantomjs
|
91
|
+
|
90
92
|
````
|
91
93
|
|
92
94
|
You can specify a custom config file on the command link:
|
@@ -114,6 +116,70 @@ mode (this is very verbose, so normally used with `-s`):
|
|
114
116
|
blinkr -c my_blinkr.yaml -s http://www.acme.com/corp -v
|
115
117
|
````
|
116
118
|
|
119
|
+
## Extending Blinkr
|
120
|
+
|
121
|
+
Blinkr is based around a pipeline. Issues with the pages are *collected*,
|
122
|
+
*analysed*, and then passed to the report for *transformation* and rendering.
|
123
|
+
Additional sections may *appended* to the report.
|
124
|
+
|
125
|
+
To add extensions to blinkr, you need to define a custom pipeline. The pipeline
|
126
|
+
is defined in a ruby file (e.g. `blinkr.rb`)
|
127
|
+
|
128
|
+
````
|
129
|
+
require 'acme/spellcheck'
|
130
|
+
|
131
|
+
Blinkr::Extensions::Pipeline.new do |config|
|
132
|
+
# define the default extensions
|
133
|
+
extension Blinkr::Extensions::Links.new config
|
134
|
+
extension Blinkr::Extensions::JavaScript.new config
|
135
|
+
extension Blinkr::Extensions::Resources.new config
|
136
|
+
|
137
|
+
# define custom extensions
|
138
|
+
extension ACME::Extensions::SpellCheck.new config
|
139
|
+
end
|
140
|
+
````
|
141
|
+
|
142
|
+
NOTE: You must add the default extensions to a custom pipeline, for them to be
|
143
|
+
executed.
|
144
|
+
|
145
|
+
The pipeline is defined in `blinkr.yaml`:
|
146
|
+
|
147
|
+
````
|
148
|
+
# Use a custom pipeline
|
149
|
+
pipeline: blinkr.rb
|
150
|
+
````
|
151
|
+
|
152
|
+
An extension is just a standard Ruby class. It should declare an
|
153
|
+
`initialize(config)` method, and may declare one or more of:
|
154
|
+
|
155
|
+
* `collect(page)`
|
156
|
+
* `analyze(context, typhoeus)`
|
157
|
+
* `transform(page, error, default_html)`
|
158
|
+
* `append(context)`
|
159
|
+
|
160
|
+
Each method is called as the pipeline progresses. Arguments passed are:
|
161
|
+
|
162
|
+
* `page` - a object containing the tyhpoeus `response`, the page `body` (as a
|
163
|
+
Nokogiri HTML document), an array of `errors` for the page, any
|
164
|
+
`resource_errors` which ocurred when the page was loaded, and any
|
165
|
+
`javascript_errors` which ocurred when the page was loaded
|
166
|
+
* `context` - a map of `url` => `page`s which are being analysed. After the
|
167
|
+
analyze phase, and before the transform phase, any pages with no errors
|
168
|
+
are removed from the context
|
169
|
+
* `typhoeus` - a wrapper around typhoeus, defining a `process` method and
|
170
|
+
a `process_all` method, both of which take a `url` and a `retry` limit, and
|
171
|
+
accept a block to execute when a response is returned.
|
172
|
+
* `error` - an individual error, consisting of a `type`, a `url`, a `title`, a
|
173
|
+
`code`, a `message`, a `detail`, a `snippet` and an fontawesome `icon` class
|
174
|
+
* `default_html` - the default HTML used to display the error
|
175
|
+
|
176
|
+
`transform` should return the HTML used to display the error. `append` should
|
177
|
+
return any HTML to be appended to the report. A templating language, such as
|
178
|
+
slim or haml may be used to generate the HTML.
|
179
|
+
|
180
|
+
The build extensions, in lib/blinkr/extensions are good examples of how
|
181
|
+
extensions can perform broken link analysis, or collect and format resource
|
182
|
+
loading and javascript execution errors.
|
117
183
|
|
118
184
|
## Contributing
|
119
185
|
|
data/lib/blinkr.rb
CHANGED
@@ -1,21 +1,25 @@
|
|
1
1
|
require 'blinkr/version'
|
2
|
-
require 'blinkr/
|
2
|
+
require 'blinkr/engine'
|
3
3
|
require 'blinkr/report'
|
4
|
+
require 'blinkr/config'
|
5
|
+
require 'blinkr/typhoeus_wrapper'
|
4
6
|
require 'yaml'
|
5
7
|
|
6
8
|
module Blinkr
|
7
9
|
def self.run(base_url, config = 'blinkr.yaml', single, verbose, vverbose)
|
10
|
+
args = { :base_url => base_url, :verbose => verbose, :vverbose => vverbose }
|
8
11
|
if !config.nil? && File.exists?(config)
|
9
|
-
config =
|
12
|
+
config = Blinkr::Config.read config, args
|
10
13
|
else
|
11
|
-
config =
|
14
|
+
config = Blinkr::Config.new args
|
12
15
|
end
|
13
|
-
|
16
|
+
|
14
17
|
if single.nil?
|
15
|
-
Blinkr::
|
18
|
+
Blinkr::Engine.new(config).run
|
16
19
|
else
|
17
|
-
|
20
|
+
Blinkr::TyphoeusWrapper.new(config).debug(single)
|
18
21
|
end
|
19
22
|
end
|
20
23
|
|
21
24
|
end
|
25
|
+
|
@@ -0,0 +1,46 @@
|
|
1
|
+
require 'ostruct'
|
2
|
+
|
3
|
+
module Blinkr
|
4
|
+
class Config < OpenStruct
|
5
|
+
|
6
|
+
def self.read file, args
|
7
|
+
raise "Cannot read #{file}" unless File.exists? file
|
8
|
+
Config.new(YAML.load_file(file).merge(args).merge({ :config_file => file }))
|
9
|
+
end
|
10
|
+
|
11
|
+
DEFAULTS = {:skips => [], :ignores => [], :max_retrys => 3, :browser => 'typhoeus', :viewport => 1200, :phantomjs_threads => 8, :report => 'blinkr.html'}
|
12
|
+
|
13
|
+
def initialize(hash={})
|
14
|
+
super(DEFAULTS.merge(hash))
|
15
|
+
end
|
16
|
+
|
17
|
+
def validate
|
18
|
+
ignores.each {|ignore| raise "An ignore must be a hash" unless ignore.is_a? Hash}
|
19
|
+
raise "Must specify base_url" if base_url.nil?
|
20
|
+
raise "Must specify sitemap" if sitemap.nil?
|
21
|
+
self
|
22
|
+
end
|
23
|
+
|
24
|
+
def sitemap
|
25
|
+
if super.nil?
|
26
|
+
URI.join(base_url, 'sitemap.xml').to_s
|
27
|
+
else
|
28
|
+
super
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
def max_page_retrys
|
33
|
+
@max_page_retrys || @max_retrys
|
34
|
+
end
|
35
|
+
|
36
|
+
def ignored? url, code, message
|
37
|
+
ignores.any? { |ignore| ( !url.nil? && ignore.has_key?('url') ? !ignore['url'].match(url).nil? : true ) && ( !code.nil? && ignore.has_key?('code') ? ignore['code'] == code : true ) && ( !message.nil? && ignore.has_key?('message') ? !ignore['message'].match(message).nil? : true ) }
|
38
|
+
end
|
39
|
+
|
40
|
+
def skipped? url
|
41
|
+
skips.any? { |regex| regex.match(url) }
|
42
|
+
end
|
43
|
+
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
@@ -0,0 +1,142 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
require 'blinkr/phantomjs_wrapper'
|
3
|
+
require 'blinkr/typhoeus_wrapper'
|
4
|
+
require 'blinkr/http_utils'
|
5
|
+
require 'blinkr/sitemap'
|
6
|
+
require 'blinkr/report'
|
7
|
+
require 'blinkr/extensions/links'
|
8
|
+
require 'blinkr/extensions/javascript'
|
9
|
+
require 'blinkr/extensions/resources'
|
10
|
+
require 'blinkr/extensions/pipeline'
|
11
|
+
require 'json'
|
12
|
+
require 'ostruct'
|
13
|
+
|
14
|
+
# Monkeypatch OpenStruct
|
15
|
+
class OpenStruct
|
16
|
+
|
17
|
+
EXCEPT = [:response, :body, :resource_errors, :javascript_errors]
|
18
|
+
|
19
|
+
def to_json(*args)
|
20
|
+
to_h.delete_if{ |k, v| EXCEPT.include?(k) }.to_json(*args)
|
21
|
+
end
|
22
|
+
|
23
|
+
end
|
24
|
+
|
25
|
+
module Blinkr
|
26
|
+
class Engine
|
27
|
+
include HttpUtils
|
28
|
+
include Sitemap
|
29
|
+
|
30
|
+
def initialize config
|
31
|
+
@config = config.validate
|
32
|
+
@extensions = []
|
33
|
+
load_pipeline
|
34
|
+
end
|
35
|
+
|
36
|
+
def run
|
37
|
+
context = OpenStruct.new({:pages => {}})
|
38
|
+
typhoeus, browser = TyphoeusWrapper.new(@config, context)
|
39
|
+
browser = PhantomJSWrapper.new(@config, context) if @config.browser == 'phantomjs'
|
40
|
+
page_count = 0
|
41
|
+
browser.process_all(sitemap_locations, @config.max_page_retrys) do |response, resource_errors, javascript_errors|
|
42
|
+
if response.success?
|
43
|
+
url = response.request.base_url
|
44
|
+
puts "Loaded page #{url}" if @config.verbose
|
45
|
+
body = Nokogiri::HTML(response.body)
|
46
|
+
page = OpenStruct.new({ :response => response, :body => body, :errors => ErrorArray.new(@config), :resource_errors => resource_errors || [], :javascript_errors => javascript_errors || [] })
|
47
|
+
context.pages[url] = page
|
48
|
+
collect page
|
49
|
+
page_count += 1
|
50
|
+
else
|
51
|
+
puts "#{respones.code} #{response.status_message} Unable to load page #{url} #{'(' + response.return_message + ')' unless response.return_message.nil?}"
|
52
|
+
end
|
53
|
+
end
|
54
|
+
typhoeus.hydra.run if @config.browser == 'typhoeus'
|
55
|
+
analyze context, typhoeus
|
56
|
+
puts "Loaded #{page_count} pages using #{browser.name}. Performed #{typhoeus.count} requests using typhoeus."
|
57
|
+
context.pages.reject! { |url, page| page.errors.empty? }
|
58
|
+
unless @config.export.nil?
|
59
|
+
File.open(@config.export, 'w') do |file|
|
60
|
+
file.write(context.to_json)
|
61
|
+
end
|
62
|
+
end
|
63
|
+
Blinkr::Report.new(context, self, @config).render
|
64
|
+
end
|
65
|
+
|
66
|
+
def append context
|
67
|
+
exec :append, context
|
68
|
+
end
|
69
|
+
|
70
|
+
def transform page, error, &block
|
71
|
+
default = yield
|
72
|
+
result = exec(:transform, page, error, default)
|
73
|
+
if result.empty?
|
74
|
+
default
|
75
|
+
else
|
76
|
+
result.join
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
def analyze context, typhoeus
|
81
|
+
exec :analyze, context, typhoeus
|
82
|
+
end
|
83
|
+
|
84
|
+
def collect page
|
85
|
+
exec :collect, page
|
86
|
+
end
|
87
|
+
|
88
|
+
private
|
89
|
+
|
90
|
+
class ErrorArray < Array
|
91
|
+
|
92
|
+
def initialize config
|
93
|
+
@config = config
|
94
|
+
end
|
95
|
+
|
96
|
+
def << error
|
97
|
+
unless @config.ignored?(error.url, error.code, error.message)
|
98
|
+
super
|
99
|
+
else
|
100
|
+
self
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
end
|
105
|
+
|
106
|
+
def extension ext
|
107
|
+
@extensions << ext
|
108
|
+
end
|
109
|
+
|
110
|
+
def default_pipeline
|
111
|
+
extension Blinkr::Extensions::Links.new @config
|
112
|
+
extension Blinkr::Extensions::JavaScript.new @config
|
113
|
+
extension Blinkr::Extensions::Resources.new @config
|
114
|
+
end
|
115
|
+
|
116
|
+
def exec method, *args
|
117
|
+
result = []
|
118
|
+
@extensions.each do |e|
|
119
|
+
result << e.send(method, *args) if e.respond_to? method
|
120
|
+
end
|
121
|
+
result
|
122
|
+
end
|
123
|
+
|
124
|
+
def load_pipeline
|
125
|
+
unless @config.pipeline.nil?
|
126
|
+
pipeline_file = File.join(File.dirname(@config.config_file), @config.pipeline)
|
127
|
+
if File.exists?( pipeline_file )
|
128
|
+
p = eval(File.read( pipeline_file ), nil, pipeline_file, 1).load @config
|
129
|
+
p.extensions.each do |e|
|
130
|
+
extension( e )
|
131
|
+
end
|
132
|
+
else
|
133
|
+
raise "Cannot find pipeline file #{pipeline_file}"
|
134
|
+
end
|
135
|
+
else
|
136
|
+
default_pipeline
|
137
|
+
end
|
138
|
+
end
|
139
|
+
|
140
|
+
end
|
141
|
+
end
|
142
|
+
|
@@ -0,0 +1,17 @@
|
|
1
|
+
module Blinkr
|
2
|
+
module Extensions
|
3
|
+
class ATitle
|
4
|
+
|
5
|
+
def initialize config
|
6
|
+
@config = config
|
7
|
+
end
|
8
|
+
|
9
|
+
def collect page
|
10
|
+
page.body.css('a:not([title])').each do |a|
|
11
|
+
page.errors << OpenStruct.new({ :severity => 'info', :category => 'SEO', :type => '<a title=""> missing', :title => "#{a['href']} (line #{a.line})", :message => '<a title=""> missing', :snippet => a.to_s, :icon => 'fa-info' })
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
module Blinkr
|
2
|
+
module Extensions
|
3
|
+
class EmptyAHref
|
4
|
+
|
5
|
+
def initialize config
|
6
|
+
@config = config
|
7
|
+
end
|
8
|
+
|
9
|
+
def collect page
|
10
|
+
page.body.css('a[href]').each do |a|
|
11
|
+
if a['href'].empty?
|
12
|
+
page.errors << OpenStruct.new({ :severity => 'info', :category => 'HTML Compatibility/Correctness', :type => '<a href=""> empty', :title => %Q{<a href=""> empty (line #{a.line})}, :message => %Q{<a href=""> empty}, :snippet => a.to_s, :icon => 'fa-info' })
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
module Blinkr
|
2
|
+
module Extensions
|
3
|
+
class ImgAlt
|
4
|
+
|
5
|
+
def initialize config
|
6
|
+
@config = config
|
7
|
+
end
|
8
|
+
|
9
|
+
def collect page
|
10
|
+
page.body.css('img:not([alt])').each do |img|
|
11
|
+
page.errors << OpenStruct.new({ :severity => 'warning', :category => 'SEO', :type => '<img alt=""> missing', :title => "#{img['src']} (line #{img.line})", :message => '<img alt=""> missing', :snippet => img.to_s, :icon => 'fa-info' })
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
module Blinkr
|
2
|
+
module Extensions
|
3
|
+
class InlineCss
|
4
|
+
|
5
|
+
def initialize config
|
6
|
+
@config = config
|
7
|
+
end
|
8
|
+
|
9
|
+
def collect page
|
10
|
+
page.body.css('[style]').each do |elm|
|
11
|
+
if elm['style'] == ""
|
12
|
+
page.errors << OpenStruct.new({ :severity => 'info', :category => 'HTML Compatibility/Correctness', :type => 'style attribute is empty', :title => %Q{"#{elm['style']}" (line #{elm.line})}, :message => 'style attribute is empty', :snippet => elm.to_s, :icon => 'fa-info' })
|
13
|
+
else
|
14
|
+
page.errors << OpenStruct.new({ :severity => 'info', :category => 'HTML Compatibility/Correctness', :type => 'Inline CSS detected', :title => %Q{"#{elm['style']}" (line #{elm.line})}, :message => 'inline style', :snippet => elm.to_s, :icon => 'fa-info' })
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
module Blinkr
|
2
|
+
module Extensions
|
3
|
+
class JavaScript
|
4
|
+
|
5
|
+
def initialize config
|
6
|
+
@config = config
|
7
|
+
end
|
8
|
+
|
9
|
+
def collect page
|
10
|
+
page.javascript_errors.each do |error|
|
11
|
+
page.errors << OpenStruct.new({ :severity => 'danger', :category => 'JavaScript', :type => 'JavaScript error', :title => error.msg, :snippet => error.trace, :icon => 'fa-gears' })
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1,53 @@
|
|
1
|
+
require 'blinkr/http_utils'
|
2
|
+
|
3
|
+
module Blinkr
|
4
|
+
module Extensions
|
5
|
+
class Links
|
6
|
+
include HttpUtils
|
7
|
+
|
8
|
+
def initialize config
|
9
|
+
@config = config
|
10
|
+
@links = {}
|
11
|
+
end
|
12
|
+
|
13
|
+
def collect page
|
14
|
+
page.body.css('a[href]').each do |a|
|
15
|
+
attr = a.attribute('href')
|
16
|
+
src = page.response.effective_url
|
17
|
+
url = attr.value
|
18
|
+
url = sanitize url, src
|
19
|
+
unless url.nil? || @config.skipped?(url)
|
20
|
+
@links[url] ||= []
|
21
|
+
@links[url] << {:page => page, :line => attr.line, :snippet => attr.parent.to_s}
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
def analyze context, typhoeus
|
27
|
+
puts "----------------------" if @config.verbose
|
28
|
+
puts " #{@links.length} links to check " if @config.verbose
|
29
|
+
puts "----------------------" if @config.verbose
|
30
|
+
@links.each do |url, metadata|
|
31
|
+
typhoeus.process(url, @config.max_retrys) do |resp|
|
32
|
+
puts "Loaded #{url} via typhoeus #{'(cached)' if resp.cached?}" if @config.verbose
|
33
|
+
unless resp.success? || resp.code == 200
|
34
|
+
metadata.each do |src|
|
35
|
+
code = resp.code.to_i unless resp.code.nil? || resp.code == 0
|
36
|
+
if resp.status_message.nil?
|
37
|
+
message = resp.return_message
|
38
|
+
else
|
39
|
+
message = resp.status_message
|
40
|
+
detail = resp.return_message unless resp.return_message == "No error"
|
41
|
+
end
|
42
|
+
src[:page].errors << OpenStruct.new({ :severity => 'danger', :category => 'Resources missing', :type => '<a href=""> target cannot be loaded', :url => url, :title => "#{url} (line #{src[:line]})", :code => code, :message => message, :detail => detail, :snippet => src[:snippet], :icon => 'fa-bookmark-o' })
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
typhoeus.hydra.run
|
48
|
+
end
|
49
|
+
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|