marty 1.1.5 → 1.1.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/app/controllers/marty/diagnostic_controller.rb +15 -416
- data/app/models/diagnostic/aws/ec2_instance.rb +100 -0
- data/app/models/diagnostic/base.rb +69 -0
- data/app/models/diagnostic/base_collection.rb +10 -0
- data/app/models/diagnostic/collection.rb +10 -0
- data/app/models/diagnostic/delayed_job.rb +48 -0
- data/app/models/diagnostic/env.rb +35 -0
- data/app/models/diagnostic/environment.rb +35 -0
- data/app/models/diagnostic/fatal.rb +12 -0
- data/app/models/diagnostic/helper.rb +11 -0
- data/app/models/diagnostic/nodes.rb +18 -0
- data/app/models/diagnostic/reporter.rb +108 -0
- data/app/models/diagnostic/request.rb +28 -0
- data/app/models/diagnostic/version.rb +17 -0
- data/app/models/marty/helper.rb +0 -8
- data/app/views/marty/diagnostic/diag.html.erb +15 -19
- data/app/views/marty/diagnostic/op.html.erb +18 -19
- data/delorean/diagnostics.dl +1 -1
- data/lib/diagnostic/database.rb +28 -0
- data/lib/diagnostic/node.rb +35 -0
- data/lib/diagnostic/packer.rb +47 -0
- data/lib/marty/version.rb +1 -1
- data/spec/controllers/diagnostic_controller_spec.rb +18 -175
- data/spec/models/diagnostic/base_spec.rb +98 -0
- data/spec/models/diagnostic/collection_spec.rb +32 -0
- data/spec/models/diagnostic/delayed_job_spec.rb +46 -0
- data/spec/models/diagnostic/reporter_spec.rb +319 -0
- metadata +21 -1
@@ -0,0 +1,100 @@
|
|
1
|
+
class Diagnostic::Aws::Ec2Instance
|
2
|
+
attr_reader :id, :doc, :role, :creds, :version, :host, :tag, :nodes
|
3
|
+
|
4
|
+
# aws reserved host used to get instance meta-data
|
5
|
+
META_DATA_HOST = '169.254.169.254'
|
6
|
+
|
7
|
+
def self.is_aws?
|
8
|
+
response = get("http://#{META_DATA_HOST}") rescue nil
|
9
|
+
response.present?
|
10
|
+
end
|
11
|
+
|
12
|
+
def initialize
|
13
|
+
@id = get_instance_id
|
14
|
+
@doc = get_document
|
15
|
+
@role = get_role
|
16
|
+
@creds = get_credentials
|
17
|
+
@host = "ec2.#{@doc['region']}.amazonaws.com"
|
18
|
+
@version = '2016-11-15'
|
19
|
+
@tag = get_tag
|
20
|
+
@nodes = get_private_ips
|
21
|
+
end
|
22
|
+
|
23
|
+
private
|
24
|
+
def self.get url
|
25
|
+
uri = URI.parse(url)
|
26
|
+
request = Net::HTTP.new(uri.host, uri.port)
|
27
|
+
request.read_timeout = request.open_timeout = ENV['DIAG_TIMEOUT'] || 5
|
28
|
+
request.start {|http|
|
29
|
+
http.get(uri.to_s)
|
30
|
+
}.body
|
31
|
+
end
|
32
|
+
|
33
|
+
def query_meta_data query
|
34
|
+
self.class.get("http://#{META_DATA_HOST}/latest/meta-data/#{query}/")
|
35
|
+
end
|
36
|
+
|
37
|
+
def query_dynamic query
|
38
|
+
self.class.get("http://#{META_DATA_HOST}/latest/dynamic/#{query}/")
|
39
|
+
end
|
40
|
+
|
41
|
+
def get_instance_id
|
42
|
+
query_meta_data('instance-id').to_s
|
43
|
+
end
|
44
|
+
|
45
|
+
def get_role
|
46
|
+
query_meta_data('iam/security-credentials').to_s
|
47
|
+
end
|
48
|
+
|
49
|
+
def get_credentials
|
50
|
+
JSON.parse(query_meta_data("iam/security-credentials/#{@role}"))
|
51
|
+
end
|
52
|
+
|
53
|
+
def get_document
|
54
|
+
JSON.parse(query_dynamic('instance-identity/document'))
|
55
|
+
end
|
56
|
+
|
57
|
+
def ec2_request action, params = {}
|
58
|
+
default = {
|
59
|
+
'Action' => action,
|
60
|
+
'Version' => @version
|
61
|
+
}
|
62
|
+
|
63
|
+
url = "https://#{@host}/?" +
|
64
|
+
(default + params).map{|a, v| "#{a}=#{v}"}.join('&')
|
65
|
+
|
66
|
+
sig = Aws::Sigv4::Signer.new(service: 'ec2',
|
67
|
+
region: @doc['region'],
|
68
|
+
access_key_id: @creds['AccessKeyId'],
|
69
|
+
secret_access_key: @creds['SecretAccessKey'],
|
70
|
+
session_token: @creds['Token'])
|
71
|
+
signed_url = sig.presign_url(http_method:'GET', url: url)
|
72
|
+
|
73
|
+
http = Net::HTTP.new(@host, 443)
|
74
|
+
http.use_ssl = true
|
75
|
+
Hash.from_xml(Net::HTTP.get(signed_url))["#{action}Response"]
|
76
|
+
end
|
77
|
+
|
78
|
+
def get_tag
|
79
|
+
params = {'Filter.1.Name' => 'resource-id',
|
80
|
+
'Filter.1.Value.1' => get_instance_id,
|
81
|
+
'Filter.2.Name' => 'key',
|
82
|
+
'Filter.2.Value.1' => 'Name'}
|
83
|
+
ec2_request('DescribeTags', params)['tagSet']['item']['value']
|
84
|
+
end
|
85
|
+
|
86
|
+
def get_instances
|
87
|
+
params = {'Filter.1.Name' => 'tag-value',
|
88
|
+
'Filter.1.Value.1' => @tag}
|
89
|
+
ec2_request('DescribeInstances', params)
|
90
|
+
end
|
91
|
+
|
92
|
+
def get_private_ips
|
93
|
+
get_instances['reservationSet']['item'].map{
|
94
|
+
|i|
|
95
|
+
item = i['instancesSet']['item']
|
96
|
+
item.is_a?(Array) ? item.map{|i| i['privateIpAddress']} :
|
97
|
+
item['privateIpAddress']
|
98
|
+
}.flatten
|
99
|
+
end
|
100
|
+
end
|
@@ -0,0 +1,69 @@
|
|
1
|
+
class Diagnostic::Base < Diagnostic::Request
|
2
|
+
extend Diagnostic::Packer
|
3
|
+
include ActionView::Helpers::TextHelper
|
4
|
+
|
5
|
+
# all diagnostics have `aggregatable` set to true.
|
6
|
+
# aggregatable indicates to the reporting mechanism that a diagnostic
|
7
|
+
# should be aggregated as these types of diagnostics are
|
8
|
+
# aggregated differently (or not at all).
|
9
|
+
class_attribute :aggregatable, :status_only
|
10
|
+
self.aggregatable = true
|
11
|
+
self.status_only = false
|
12
|
+
|
13
|
+
@@read_only = Marty::Util.db_in_recovery?
|
14
|
+
@@template = ActionController::Base.new.lookup_context.
|
15
|
+
find_template("marty/diagnostic/diag").identifier
|
16
|
+
|
17
|
+
def self.generate
|
18
|
+
raise "generate has not been defined for #{name}"
|
19
|
+
end
|
20
|
+
|
21
|
+
def self.fatal?
|
22
|
+
name == 'Diagnostic::Fatal'
|
23
|
+
end
|
24
|
+
|
25
|
+
def self.process_status_only infos
|
26
|
+
return infos unless status_only
|
27
|
+
infos.map{|info| info.map{|test, result| [test, result['status']]}.to_h}
|
28
|
+
end
|
29
|
+
|
30
|
+
def self.get_difference data
|
31
|
+
values = process_status_only(data.values)
|
32
|
+
Marty::DataExporter.hash_array_merge(values, true).map{
|
33
|
+
|test, values|
|
34
|
+
test if values.uniq.count > 1
|
35
|
+
}.compact
|
36
|
+
end
|
37
|
+
|
38
|
+
def self.apply_consistency data
|
39
|
+
diff = get_difference(data)
|
40
|
+
data.each_with_object({}){
|
41
|
+
|(node, diagnostic), new_data|
|
42
|
+
new_data[node] = diagnostic.each_with_object({}){
|
43
|
+
|(test, info), new_diagnostic|
|
44
|
+
new_diagnostic[test] = info + {'consistent' => !diff.include?(test)}
|
45
|
+
}
|
46
|
+
}
|
47
|
+
end
|
48
|
+
|
49
|
+
def self.consistent? data
|
50
|
+
process_status_only(data.values).uniq.count == 1
|
51
|
+
end
|
52
|
+
|
53
|
+
def self.display data
|
54
|
+
consistent = consistent?(data)
|
55
|
+
success = consistent && !fatal?
|
56
|
+
ERB.new(File.open(@@template).read).result(binding)
|
57
|
+
end
|
58
|
+
|
59
|
+
def self.display_info_css info
|
60
|
+
return 'inconsistent' if info.nil? || (info['status'] &&
|
61
|
+
info['consistent'] == false)
|
62
|
+
return 'error' unless info['status']
|
63
|
+
'passed'
|
64
|
+
end
|
65
|
+
|
66
|
+
def self.display_info_description info
|
67
|
+
new.simple_format(info ? info['description'] : 'N/A')
|
68
|
+
end
|
69
|
+
end
|
@@ -0,0 +1,10 @@
|
|
1
|
+
# used to group separate diagnostics into one diagnostic
|
2
|
+
class Diagnostic::Collection < Diagnostic::ByStatus
|
3
|
+
class_attribute :diagnostics
|
4
|
+
self.diagnostics = []
|
5
|
+
|
6
|
+
def self.generate
|
7
|
+
raise 'No diagnostics assigned to collection.' if diagnostics.empty?
|
8
|
+
diagnostics.map{|diagnostic| diagnostic.generate}.reduce(:deep_merge)
|
9
|
+
end
|
10
|
+
end
|
@@ -0,0 +1,10 @@
|
|
1
|
+
class Diagnostic::Collection < Diagnostic::Base
|
2
|
+
class_attribute :diagnostics
|
3
|
+
self.diagnostics = []
|
4
|
+
self.status_only = true
|
5
|
+
|
6
|
+
def self.generate
|
7
|
+
raise 'No diagnostics assigned to collection.' if diagnostics.empty?
|
8
|
+
diagnostics.map{|d| d.generate}.reduce(:deep_merge)
|
9
|
+
end
|
10
|
+
end
|
@@ -0,0 +1,48 @@
|
|
1
|
+
# DelayedJob is a unique diagnostic that creates a series of delayed jobs
|
2
|
+
# in the hopes that enough nodes will touch these jobs to determine
|
3
|
+
# if delayed job workers are running the latest application version
|
4
|
+
#
|
5
|
+
# `DELAYED_VER` environment variable should be set in the
|
6
|
+
# delayed jobs initializer.
|
7
|
+
#
|
8
|
+
class Diagnostic::DelayedJob < Diagnostic::Base
|
9
|
+
self.aggregatable = false
|
10
|
+
|
11
|
+
def self.generate
|
12
|
+
raise 'DelayedJob cannot be called with local scope.' if scope == 'local'
|
13
|
+
|
14
|
+
raise 'DELAYED_VER environment variable has not been initialized.' if
|
15
|
+
ENV['DELAYED_VER'].nil?
|
16
|
+
|
17
|
+
total_workers = delayed_worker_count
|
18
|
+
|
19
|
+
raise 'No delayed jobs are running.' if total_workers.zero?
|
20
|
+
|
21
|
+
# we will only iterate by half of the total delayed workers to avoid
|
22
|
+
# excess use of delayed job time
|
23
|
+
total_workers = (total_workers/2).zero? ? 1 : total_workers/2
|
24
|
+
|
25
|
+
d_engine = Marty::ScriptSet.new.get_engine("Diagnostics")
|
26
|
+
res = d_engine.
|
27
|
+
evaluate('VersionDelay', 'result', {'count' => total_workers-1})
|
28
|
+
|
29
|
+
# merge results, remove duplicates, and construct "aggregate" object
|
30
|
+
res.each_with_object({}){
|
31
|
+
|r, hash|
|
32
|
+
hash[r[0]] ||= []
|
33
|
+
hash[r[0]] << r[1]
|
34
|
+
}.map {
|
35
|
+
|node, result|
|
36
|
+
|
37
|
+
versions = result.uniq
|
38
|
+
status = versions.count == 1 && versions[0] == ENV['DELAYED_VER']
|
39
|
+
|
40
|
+
{node => {'Version' => create_info(versions.join("\n"), status)}}
|
41
|
+
}.reduce(:deep_merge)
|
42
|
+
end
|
43
|
+
|
44
|
+
def self.delayed_worker_count
|
45
|
+
Diagnostic::Node.get_postgres_connections[Diagnostic::Database.db_name].
|
46
|
+
count{|conn| conn['application_name'].include?('delayed_job')}
|
47
|
+
end
|
48
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
class Diagnostic::Env < Diagnostic::Base
|
2
|
+
def self.environment_variables filter=''
|
3
|
+
env = ENV.clone
|
4
|
+
|
5
|
+
# obfuscate SECRET_KEY_BASE for comparison
|
6
|
+
env['SECRET_KEY_BASE'] = env['SECRET_KEY_BASE'][0,4] if
|
7
|
+
env['SECRET_KEY_BASE']
|
8
|
+
|
9
|
+
# remove SCRIPT_URI, SCRIPT_URL as calling node differs
|
10
|
+
['SCRIPT_URI', 'SCRIPT_URL'].each{|k| env.delete(k)}
|
11
|
+
|
12
|
+
to_block = ['PASSWORD', 'DEBUG']
|
13
|
+
env.sort.each_with_object({}){|(k,v),h|
|
14
|
+
h[k] = v if to_block.all?{|b| !k.include?(b)} && k.include?(filter)}
|
15
|
+
end
|
16
|
+
|
17
|
+
def self.generate
|
18
|
+
pack do
|
19
|
+
environment_variables
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
# overwritten to only return inconsitent data
|
24
|
+
def self.apply_consistency data
|
25
|
+
diff = get_difference(data)
|
26
|
+
data.each_with_object({}){
|
27
|
+
|(node, diagnostic), new_data|
|
28
|
+
new_data[node] = diagnostic.each_with_object({}){
|
29
|
+
|(test, info), new_diagnostic|
|
30
|
+
new_diagnostic[test] = info + {'consistent' => false} if
|
31
|
+
diff.include?(test)
|
32
|
+
}
|
33
|
+
}
|
34
|
+
end
|
35
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
class Diagnostic::Environment < Diagnostic::Base
|
2
|
+
def self.generate
|
3
|
+
pack do
|
4
|
+
rbv = "#{RUBY_VERSION}-p#{RUBY_PATCHLEVEL} (#{RUBY_PLATFORM})"
|
5
|
+
{
|
6
|
+
'Environment' => Rails.env,
|
7
|
+
'Rails' => Rails.version,
|
8
|
+
'Netzke Core' => Netzke::Core::VERSION,
|
9
|
+
'Netzke Basepack' => Netzke::Basepack::VERSION,
|
10
|
+
'Ruby' => rbv,
|
11
|
+
'RubyGems' => Gem::VERSION,
|
12
|
+
'Database Adapter' => Diagnostic::Database.db_adapter_name,
|
13
|
+
'Database Server' => Diagnostic::Database.db_server_name,
|
14
|
+
'Database Version' => db_version,
|
15
|
+
'Database Schema Version' => db_schema
|
16
|
+
}
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
def self.db_version
|
21
|
+
begin
|
22
|
+
Diagnostic::Database.db_version
|
23
|
+
rescue => e
|
24
|
+
error(e.message)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
def self.db_schema
|
29
|
+
begin
|
30
|
+
Diagnostic::Database.db_schema
|
31
|
+
rescue => e
|
32
|
+
error(e.message)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
@@ -0,0 +1,12 @@
|
|
1
|
+
class Diagnostic::Fatal < Diagnostic::Base
|
2
|
+
def self.display_alert_message
|
3
|
+
'<h3 class="error">Something went wrong.</br>'\
|
4
|
+
'Consistency is checked between remaining nodes if applicable.</h3>'
|
5
|
+
end
|
6
|
+
|
7
|
+
def self.message msg, opts = {}
|
8
|
+
node = opts[:node] || Diagnostic::Node.my_ip
|
9
|
+
type = opts[:type] || 'RuntimeError'
|
10
|
+
{name => {node => {type => error(msg)}}}
|
11
|
+
end
|
12
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
class Diagnostic::Nodes < Diagnostic::Base
|
2
|
+
def self.generate
|
3
|
+
pack do
|
4
|
+
begin
|
5
|
+
a_nodes = Diagnostic::Aws::Ec2Instance.new.nodes.sort if
|
6
|
+
Diagnostic::Aws::Ec2Instance.is_aws?
|
7
|
+
rescue => e
|
8
|
+
a_nodes = [e.message]
|
9
|
+
end
|
10
|
+
pg_nodes = Diagnostic::Node.get_nodes.sort
|
11
|
+
a_nodes.nil? || pg_nodes == a_nodes ? pg_nodes.join("\n") :
|
12
|
+
error("There is a discrepancy between nodes connected to "\
|
13
|
+
"Postgres and those discovered through AWS EC2.\n"\
|
14
|
+
"Postgres: \n#{pg_nodes.join("\n")}\n"\
|
15
|
+
"AWS: \n#{a_nodes.join("\n")}")
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,108 @@
|
|
1
|
+
class Diagnostic::Reporter < Diagnostic::Request
|
2
|
+
class_attribute :reports, :diagnostics
|
3
|
+
|
4
|
+
self.reports = {}
|
5
|
+
self.diagnostics = []
|
6
|
+
|
7
|
+
def self.run request
|
8
|
+
self.request = request
|
9
|
+
|
10
|
+
ops = op.split(/,\s*/).uniq - [unresolve_diagnostic(self)]
|
11
|
+
reps = ops.select{|o| reports.keys.include?(o)}
|
12
|
+
|
13
|
+
self.diagnostics = ((ops - reps) + reps.map{|r| reports[r]}.flatten).uniq.
|
14
|
+
map{|d| resolve_diagnostic(d)}
|
15
|
+
|
16
|
+
self.scope == 'local' ? generate : aggregate
|
17
|
+
end
|
18
|
+
|
19
|
+
private
|
20
|
+
def self.resolve_diagnostic name
|
21
|
+
return name.constantize unless name.slice('Diagnostic::').nil?
|
22
|
+
('Diagnostic::' + name.downcase.camelize).constantize
|
23
|
+
end
|
24
|
+
|
25
|
+
def self.unresolve_diagnostic klass
|
26
|
+
klass.name.demodulize.underscore
|
27
|
+
end
|
28
|
+
|
29
|
+
def self.generate
|
30
|
+
diagnostics.each_with_object({}){
|
31
|
+
|d, h|
|
32
|
+
begin
|
33
|
+
h[d.name] = d.generate
|
34
|
+
rescue => e
|
35
|
+
h.deep_merge!(Diagnostic::Fatal.message(e.message, type: d.name))
|
36
|
+
end
|
37
|
+
}
|
38
|
+
end
|
39
|
+
|
40
|
+
def self.aggregate
|
41
|
+
data = consistency([generate, get_remote_diagnostics].reduce(:deep_merge))
|
42
|
+
{'data' => data, 'errors' => errors(data)}
|
43
|
+
end
|
44
|
+
|
45
|
+
def self.consistency data
|
46
|
+
data.each_with_object({}){
|
47
|
+
|(klass, result), h|
|
48
|
+
h[klass] = resolve_diagnostic(klass).apply_consistency(result)
|
49
|
+
}
|
50
|
+
end
|
51
|
+
|
52
|
+
def self.errors data
|
53
|
+
data.each_with_object({}){
|
54
|
+
|(klass, result), new_data|
|
55
|
+
new_data[klass] = result.each_with_object({}){
|
56
|
+
|(node, diagnostic), new_result|
|
57
|
+
new_result[node] = diagnostic.each_with_object({}){
|
58
|
+
|(test, info), new_diagnostic|
|
59
|
+
new_diagnostic[test] = info unless
|
60
|
+
info['status'] && (scope == 'local' || info['consistent'])
|
61
|
+
}
|
62
|
+
new_result.delete(node) if new_result[node].empty?
|
63
|
+
}
|
64
|
+
new_data.delete(klass) if new_data[klass].empty?
|
65
|
+
}
|
66
|
+
end
|
67
|
+
|
68
|
+
def self.displays result
|
69
|
+
result.map{|d, r| resolve_diagnostic(d).display(r)}.sum
|
70
|
+
end
|
71
|
+
|
72
|
+
def self.get_remote_diagnostics
|
73
|
+
ops = diagnostics.map{|d| unresolve_diagnostic(d) if d.aggregatable}.compact
|
74
|
+
return {} if ops.empty?
|
75
|
+
|
76
|
+
nodes = Diagnostic::Node.get_nodes - [Diagnostic::Node.my_ip]
|
77
|
+
remote = nodes.sort.map do |n|
|
78
|
+
Thread.new do
|
79
|
+
uri = Addressable::URI.new(host: n, port: request.port)
|
80
|
+
uri.scheme = ssl? ? 'https' : 'http'
|
81
|
+
uri.path = '/marty/diag.json'
|
82
|
+
uri.query_values = {
|
83
|
+
scope: 'local',
|
84
|
+
op: ops.join(','),
|
85
|
+
}
|
86
|
+
req = Net::HTTP.new(uri.host, uri.port)
|
87
|
+
req.use_ssl = ssl?
|
88
|
+
req.read_timeout = req.open_timeout = ENV['DIAG_TIMEOUT'] || 10
|
89
|
+
req.verify_mode = OpenSSL::SSL::VERIFY_NONE
|
90
|
+
|
91
|
+
begin
|
92
|
+
response = req.start {|http| http.get(uri.to_s)}
|
93
|
+
next JSON.parse(response.body) if response.code == "200"
|
94
|
+
|
95
|
+
Diagnostic::Fatal.message(response.body,
|
96
|
+
type: response.message,
|
97
|
+
node: uri.host)
|
98
|
+
rescue => e
|
99
|
+
Diagnostic::Fatal.message(e.message,
|
100
|
+
type: e.class,
|
101
|
+
node: uri.host)
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
remote.empty? ? {} : remote.map(&:join).map(&:value).reduce(:deep_merge)
|
107
|
+
end
|
108
|
+
end
|