site_health 0.1.0 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (51) hide show
  1. checksums.yaml +5 -5
  2. data/.gitignore +3 -0
  3. data/.rubocop.yml +7 -0
  4. data/.ruby-style-guide.yml +263 -0
  5. data/.travis.yml +3 -2
  6. data/CHANGELOG.md +10 -0
  7. data/Gemfile +4 -2
  8. data/LICENSE.txt +1 -1
  9. data/README.md +165 -21
  10. data/Rakefile +5 -3
  11. data/bin/console +4 -10
  12. data/bin/setup +0 -2
  13. data/exe/site_health +75 -0
  14. data/lib/site_health.rb +89 -113
  15. data/lib/site_health/check_data.rb +35 -0
  16. data/lib/site_health/checkers/checker.rb +152 -0
  17. data/lib/site_health/checkers/facebook_share_link.rb +125 -0
  18. data/lib/site_health/checkers/google_page_speed.rb +55 -0
  19. data/lib/site_health/checkers/html_proofer.rb +67 -0
  20. data/lib/site_health/checkers/json_syntax.rb +28 -0
  21. data/lib/site_health/checkers/missing_description.rb +50 -0
  22. data/lib/site_health/checkers/missing_title.rb +41 -0
  23. data/lib/site_health/checkers/page_not_found.rb +30 -0
  24. data/lib/site_health/checkers/redirect.rb +16 -0
  25. data/lib/site_health/checkers/w3c_css.rb +37 -0
  26. data/lib/site_health/checkers/w3c_html.rb +37 -0
  27. data/lib/site_health/checkers/xml.rb +27 -0
  28. data/lib/site_health/configuration/configuration.rb +84 -0
  29. data/lib/site_health/configuration/html_proofer_configuration.rb +88 -0
  30. data/lib/site_health/configuration/w3c_validators_configuration.rb +23 -0
  31. data/lib/site_health/event_emitter.rb +70 -0
  32. data/lib/site_health/issue.rb +125 -0
  33. data/lib/site_health/issues.rb +43 -0
  34. data/lib/site_health/issues_report.rb +52 -0
  35. data/lib/site_health/key_struct.rb +6 -3
  36. data/lib/site_health/link.rb +32 -0
  37. data/lib/site_health/null_logger.rb +14 -0
  38. data/lib/site_health/nurse.rb +167 -0
  39. data/lib/site_health/summarizers/page_size_summarizer.rb +77 -0
  40. data/lib/site_health/timer.rb +47 -0
  41. data/lib/site_health/url_map.rb +41 -0
  42. data/lib/site_health/version.rb +10 -1
  43. data/lib/site_health/{journals/w3c_journal.rb → w3c_journal_builder.rb} +5 -1
  44. data/site_health.gemspec +28 -17
  45. metadata +144 -21
  46. data/lib/site_health/checkers/css_page.rb +0 -36
  47. data/lib/site_health/checkers/html_page.rb +0 -41
  48. data/lib/site_health/checkers/xml_page.rb +0 -21
  49. data/lib/site_health/journals/css_journal.rb +0 -12
  50. data/lib/site_health/journals/html_journal.rb +0 -16
  51. data/lib/site_health/journals/xml_journal.rb +0 -8
@@ -0,0 +1,43 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SiteHealth
4
+ class Issues
5
+ include Enumerable
6
+
7
+ attr_reader :id
8
+
9
+ def initialize(id)
10
+ @id = id
11
+ @issues = []
12
+ end
13
+
14
+ # Adds an issue
15
+ # @param [Issue] issue if nil then a keywords are expected
16
+ # @return [Array<Issue>] the current list of issues
17
+ # @see Issue#initialize for supported keyword arguments
18
+ def add(issue = nil, **args)
19
+ unless issue
20
+ args[:name] ||= id
21
+ issue = Issue.new(args)
22
+ end
23
+
24
+ @issues << issue
25
+ end
26
+ alias_method :<<, :add
27
+
28
+ # @return [TrueClass, FalseClass] true if there are no issues
29
+ def empty?
30
+ @issues.empty?
31
+ end
32
+
33
+ # Enumerates over every issue.
34
+ #
35
+ # @yieldparam [Issue] issue
36
+ #
37
+ # @return [Enumerator]
38
+ # If no block is given, an enumerator object will be returned.
39
+ def each(&block)
40
+ @issues.each(&block)
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,52 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'csv'
4
+ require 'json'
5
+ require 'site_health/issue'
6
+
7
+ module SiteHealth
8
+ class IssuesReport
9
+ def initialize(issues)
10
+ @issues = issues
11
+ @fields = Issue.fields
12
+ @select_block = proc { true }
13
+ yield(self) if block_given?
14
+ end
15
+
16
+ def fields=(fields)
17
+ @fields = fields.map(&:to_sym)
18
+ end
19
+
20
+ def select(&block)
21
+ @select_block = block
22
+ end
23
+
24
+ def to_a
25
+ issues = []
26
+ each { |data| issues << data }
27
+ issues
28
+ end
29
+
30
+ def to_csv
31
+ CSV.generate do |csv|
32
+ csv << @fields
33
+ each { |data| csv << data.values_at(*@fields) }
34
+ end
35
+ end
36
+
37
+ def to_json
38
+ JSON.dump(to_a)
39
+ end
40
+
41
+ private
42
+
43
+ def each
44
+ @issues.each do |issue|
45
+ next unless @select_block.call(issue)
46
+
47
+ hash = issue.to_h.select { |k| @fields.include?(k) }
48
+ yield(hash)
49
+ end
50
+ end
51
+ end
52
+ end
@@ -1,12 +1,15 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module SiteHealth
4
+ # Enhance Struct to work with keywords
2
5
  class KeyStruct < Struct
3
6
  def initialize(**keyword_args)
4
7
  keyword_args.each do |key, value|
5
- if members.include?(key)
6
- self[key] = value
7
- else
8
+ unless members.include?(key)
8
9
  raise ArgumentError, "Unknown key struct member: #{key}"
9
10
  end
11
+
12
+ self[key] = value
10
13
  end
11
14
  end
12
15
  end
@@ -0,0 +1,32 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'uri'
4
+
5
+ module SiteHealth
6
+ class Link
7
+ def self.valid?(*args)
8
+ new(*args).valid?
9
+ end
10
+
11
+ attr_reader :uri, :url
12
+
13
+ def initialize(url)
14
+ @url = url
15
+ @uri = safe_parse_url(@url)
16
+ @valid = @uri.absolute?
17
+ end
18
+
19
+ def valid?
20
+ @valid
21
+ end
22
+
23
+ private
24
+
25
+ # @param [String] url
26
+ def safe_parse_url(url)
27
+ URI.parse(url)
28
+ rescue URI::InvalidURIError
29
+ URI.parse('')
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,14 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'logger'
4
+
5
+ module SiteHealth
6
+ # Don't log anyting / Send the logs to the abyss
7
+ class NullLogger < Logger
8
+ # Allow any and all params
9
+ def initialize(*args); end
10
+
11
+ # Allow any and alls params and don't do anyting
12
+ def add(*args, &block); end
13
+ end
14
+ end
@@ -0,0 +1,167 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'site_health/url_map'
4
+ require 'site_health/event_emitter'
5
+ require 'site_health/timer'
6
+
7
+ module SiteHealth
8
+ # Holds page analysis data
9
+ class Nurse
10
+ attr_reader :config, :failures, :checkers
11
+
12
+ # @return [Array<Issue>] found issues
13
+ attr_reader :issues
14
+
15
+ def initialize(config: SiteHealth.config)
16
+ @config = config
17
+ @checkers = config.checkers
18
+ @pages_journal = UrlMap.new { {} }
19
+ @failures = []
20
+ @issues = []
21
+ @clerk = nil
22
+ @punched_out = false
23
+ end
24
+
25
+ # @return [Nurse] returns self
26
+ def punch_out!
27
+ post_shift_analysis unless @punched_out
28
+
29
+ @punched_out = true
30
+ self
31
+ end
32
+
33
+ # @return [Hash] check results
34
+ def journal
35
+ {
36
+ checked_urls: @pages_journal.to_h,
37
+ internal_server_error_urls: failures,
38
+ }
39
+ end
40
+
41
+ # @return [Array] all URL that have failed
42
+ def check_failed_url(url)
43
+ clerk.emit_failed_url(url)
44
+ @failures << url
45
+ end
46
+
47
+ # @return [Object] the event emitter
48
+ # @yieldparam [Object] the event emiiter
49
+ def clerk
50
+ @clerk ||= begin
51
+ events = %w[journal failed_url check page issue].concat(checkers.map(&:name))
52
+ EventEmitter.define(*events).new.tap { |e| yield(e) if block_given? }
53
+ end
54
+ end
55
+
56
+ # @return [Hash] result data
57
+ def check_page(page)
58
+ @pages_journal[page.url].tap do |journal|
59
+ timer = Timer.start
60
+ clerk.emit_page(page)
61
+
62
+ journal[:started_at] = timer.started_at
63
+ journal[:checked] = true
64
+ journal[:url] = page.url
65
+ journal[:content_type] = page.content_type
66
+ journal[:http_status] = page.code
67
+ journal[:redirect] = page.redirect?
68
+ journal[:title] = page.title
69
+ journal[:links_to] = page.each_url.map do |url|
70
+ (@pages_journal[url][:links_from] ||= []) << page.url
71
+ url.to_s
72
+ end
73
+
74
+ journal[:checks] = lab_results(page)
75
+
76
+ timer.finish
77
+
78
+ journal[:finished_at] = timer.finished_at
79
+ journal[:runtime_in_seconds] = timer.diff.round(1)
80
+
81
+ clerk.emit_journal(journal, page)
82
+ end
83
+ end
84
+
85
+ # @return [Hash] results of all checkers for page
86
+ def lab_results(page)
87
+ journal = {}
88
+ checkers.each do |checker_klass|
89
+ checker = checker_klass.new(page, config: config)
90
+ next unless checker.should_check?
91
+
92
+ checker.call
93
+
94
+ issues = checker.issues
95
+ @issues.concat(issues.to_a)
96
+
97
+ clerk.emit_check(checker)
98
+ clerk.emit(checker.name, checker)
99
+ clerk.emit_each_issue(issues)
100
+
101
+ journal[checker.name.to_sym] = checker.to_h
102
+ end
103
+ journal
104
+ end
105
+
106
+ # Provides transparent access to the methods in {#clerk}.
107
+ # @param [Symbol] name
108
+ # The name of the missing method.
109
+ # @param [Array] arguments
110
+ # Additional arguments for the missing method.
111
+ # @raise [NoMethodError]
112
+ # The missing method did not map to a method in {#clerk}.
113
+ # @see #clerk
114
+ def method_missing(method, *args, &block)
115
+ if clerk.respond_to?(method)
116
+ return clerk.public_send(method, *args, &block)
117
+ end
118
+
119
+ super
120
+ end
121
+
122
+ # @param [Symbol] name
123
+ # The name of the missing method.
124
+ # @param [Boolean] include_private optional (default: false)
125
+ # Whether to include private methods
126
+ # @return [Boolean]
127
+ # true if it can respond to method name, false otherwise
128
+ def respond_to_missing?(method, include_private = false)
129
+ clerk.respond_to?(method, include_private) || super
130
+ end
131
+
132
+ private
133
+
134
+ def post_shift_analysis
135
+ issues = links_to_page_not_found_issues
136
+ clerk.emit_each_issue(issues)
137
+ @issues.concat(issues)
138
+ end
139
+
140
+ def links_to_page_not_found_issues
141
+ issues = []
142
+ not_found = @issues.
143
+ select { |issue| issue.code == :not_found }.
144
+ map { |issue| issue.url.to_s }
145
+
146
+ not_found.each do |url|
147
+ (@pages_journal[url][:links_from] || []).each do |link_from_url|
148
+ issues << build_links_to_not_found_issue(link_from_url, url)
149
+ end
150
+ end
151
+
152
+ issues
153
+ end
154
+
155
+ def build_links_to_not_found_issue(url, not_found_url)
156
+ Issue.new(
157
+ name: 'links_to_page_not_found',
158
+ code: :links_to_not_found,
159
+ title: 'Links to page not found',
160
+ detail: "Links to #{not_found_url} that is 404 page not found",
161
+ severity: :major,
162
+ priority: :high,
163
+ url: url
164
+ )
165
+ end
166
+ end
167
+ end
@@ -0,0 +1,77 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SiteHealth
4
+ class PageSpeedSummarizer
5
+ def initialize(data)
6
+ @data = data[:checked_urls]
7
+ end
8
+
9
+ def to_csv
10
+ to_matrix.map { |row| row.join(',') }.join("\n")
11
+ end
12
+
13
+ def to_matrix
14
+ header = %w[
15
+ url
16
+ total_speed_score
17
+ css_kb
18
+ html_kb
19
+ image_kb
20
+ javascript_kb
21
+ other_kb
22
+ total_kbytes
23
+ number_hosts
24
+ number_js_resources
25
+ number_css_resources
26
+ number_resources
27
+ number_static_resources
28
+ started_at
29
+ finished_at
30
+ runtime_in_seconds
31
+ ]
32
+ rows = @data.map do |_, data|
33
+ pagespeed_data = data.dig(:checks, :google_page_speed).data
34
+ next unless pagespeed_data
35
+
36
+ url = data[:url]
37
+ started_at = data[:started_at]
38
+ finished_at = data[:finished_at]
39
+ runtime = data[:runtime_in_seconds]
40
+
41
+ build_row(url, runtime, started_at, finished_at, pagespeed_data)
42
+ end.reject(&:nil?)
43
+
44
+ [header] + rows
45
+ end
46
+
47
+ def build_row(url, runtime_in_seconds, started_at, finished_at, pagespeed_data)
48
+ stats = pagespeed_data[:page_stats]
49
+
50
+ kbytes_columns = [
51
+ bytes_to_kb(stats[:css_response_bytes]),
52
+ bytes_to_kb(stats[:html_response_bytes]),
53
+ bytes_to_kb(stats[:image_response_bytes]),
54
+ bytes_to_kb(stats[:javascript_response_bytes]),
55
+ bytes_to_kb(stats[:other_response_bytes]),
56
+ ]
57
+ kbytes_columns << kbytes_columns.sum.round(1)
58
+
59
+ host_columns = [
60
+ stats[:number_hosts],
61
+ stats[:number_js_resources],
62
+ stats[:number_css_resources],
63
+ stats[:number_resources],
64
+ stats[:number_static_resources],
65
+ ]
66
+
67
+ total_speed_score = pagespeed_data.dig(:rule_groups, :SPEED, :score)
68
+
69
+ [url, total_speed_score] + kbytes_columns + host_columns +
70
+ [started_at, finished_at, runtime_in_seconds]
71
+ end
72
+
73
+ def bytes_to_kb(bytes, round: 1)
74
+ (bytes / 1024.0).round(round)
75
+ end
76
+ end
77
+ end
@@ -0,0 +1,47 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'time'
4
+
5
+ module SiteHealth
6
+ class Timer
7
+ attr_reader :started_at, :finished_at
8
+
9
+ def self.start
10
+ new.tap(&:start)
11
+ end
12
+
13
+ def self.measure(&block)
14
+ new.tap { |timer| timer.measure(&block) }
15
+ end
16
+
17
+ def measure
18
+ start
19
+ yield
20
+ finish
21
+ self
22
+ end
23
+
24
+ def start
25
+ @started = high_precision_time
26
+ @started_at = Time.now
27
+ end
28
+
29
+ def finish
30
+ @finished = high_precision_time
31
+ @finished_at = Time.now
32
+ end
33
+
34
+ def diff
35
+ fail(StandardError, 'timer must be started') unless @started
36
+
37
+ finish = @finished || high_precision_time
38
+ finish - @started
39
+ end
40
+
41
+ private
42
+
43
+ def high_precision_time
44
+ Process.clock_gettime(Process::CLOCK_MONOTONIC)
45
+ end
46
+ end
47
+ end