marty 1.1.5 → 1.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/app/controllers/marty/diagnostic_controller.rb +15 -416
- data/app/models/diagnostic/aws/ec2_instance.rb +100 -0
- data/app/models/diagnostic/base.rb +69 -0
- data/app/models/diagnostic/base_collection.rb +10 -0
- data/app/models/diagnostic/collection.rb +10 -0
- data/app/models/diagnostic/delayed_job.rb +48 -0
- data/app/models/diagnostic/env.rb +35 -0
- data/app/models/diagnostic/environment.rb +35 -0
- data/app/models/diagnostic/fatal.rb +12 -0
- data/app/models/diagnostic/helper.rb +11 -0
- data/app/models/diagnostic/nodes.rb +18 -0
- data/app/models/diagnostic/reporter.rb +108 -0
- data/app/models/diagnostic/request.rb +28 -0
- data/app/models/diagnostic/version.rb +17 -0
- data/app/models/marty/helper.rb +0 -8
- data/app/views/marty/diagnostic/diag.html.erb +15 -19
- data/app/views/marty/diagnostic/op.html.erb +18 -19
- data/delorean/diagnostics.dl +1 -1
- data/lib/diagnostic/database.rb +28 -0
- data/lib/diagnostic/node.rb +35 -0
- data/lib/diagnostic/packer.rb +47 -0
- data/lib/marty/version.rb +1 -1
- data/spec/controllers/diagnostic_controller_spec.rb +18 -175
- data/spec/models/diagnostic/base_spec.rb +98 -0
- data/spec/models/diagnostic/collection_spec.rb +32 -0
- data/spec/models/diagnostic/delayed_job_spec.rb +46 -0
- data/spec/models/diagnostic/reporter_spec.rb +319 -0
- metadata +21 -1
@@ -0,0 +1,100 @@
|
|
1
|
+
class Diagnostic::Aws::Ec2Instance
|
2
|
+
attr_reader :id, :doc, :role, :creds, :version, :host, :tag, :nodes
|
3
|
+
|
4
|
+
# aws reserved host used to get instance meta-data
|
5
|
+
META_DATA_HOST = '169.254.169.254'
|
6
|
+
|
7
|
+
def self.is_aws?
|
8
|
+
response = get("http://#{META_DATA_HOST}") rescue nil
|
9
|
+
response.present?
|
10
|
+
end
|
11
|
+
|
12
|
+
def initialize
|
13
|
+
@id = get_instance_id
|
14
|
+
@doc = get_document
|
15
|
+
@role = get_role
|
16
|
+
@creds = get_credentials
|
17
|
+
@host = "ec2.#{@doc['region']}.amazonaws.com"
|
18
|
+
@version = '2016-11-15'
|
19
|
+
@tag = get_tag
|
20
|
+
@nodes = get_private_ips
|
21
|
+
end
|
22
|
+
|
23
|
+
private
|
24
|
+
def self.get url
|
25
|
+
uri = URI.parse(url)
|
26
|
+
request = Net::HTTP.new(uri.host, uri.port)
|
27
|
+
request.read_timeout = request.open_timeout = ENV['DIAG_TIMEOUT'] || 5
|
28
|
+
request.start {|http|
|
29
|
+
http.get(uri.to_s)
|
30
|
+
}.body
|
31
|
+
end
|
32
|
+
|
33
|
+
def query_meta_data query
|
34
|
+
self.class.get("http://#{META_DATA_HOST}/latest/meta-data/#{query}/")
|
35
|
+
end
|
36
|
+
|
37
|
+
def query_dynamic query
|
38
|
+
self.class.get("http://#{META_DATA_HOST}/latest/dynamic/#{query}/")
|
39
|
+
end
|
40
|
+
|
41
|
+
def get_instance_id
|
42
|
+
query_meta_data('instance-id').to_s
|
43
|
+
end
|
44
|
+
|
45
|
+
def get_role
|
46
|
+
query_meta_data('iam/security-credentials').to_s
|
47
|
+
end
|
48
|
+
|
49
|
+
def get_credentials
|
50
|
+
JSON.parse(query_meta_data("iam/security-credentials/#{@role}"))
|
51
|
+
end
|
52
|
+
|
53
|
+
def get_document
|
54
|
+
JSON.parse(query_dynamic('instance-identity/document'))
|
55
|
+
end
|
56
|
+
|
57
|
+
def ec2_request action, params = {}
|
58
|
+
default = {
|
59
|
+
'Action' => action,
|
60
|
+
'Version' => @version
|
61
|
+
}
|
62
|
+
|
63
|
+
url = "https://#{@host}/?" +
|
64
|
+
(default + params).map{|a, v| "#{a}=#{v}"}.join('&')
|
65
|
+
|
66
|
+
sig = Aws::Sigv4::Signer.new(service: 'ec2',
|
67
|
+
region: @doc['region'],
|
68
|
+
access_key_id: @creds['AccessKeyId'],
|
69
|
+
secret_access_key: @creds['SecretAccessKey'],
|
70
|
+
session_token: @creds['Token'])
|
71
|
+
signed_url = sig.presign_url(http_method:'GET', url: url)
|
72
|
+
|
73
|
+
http = Net::HTTP.new(@host, 443)
|
74
|
+
http.use_ssl = true
|
75
|
+
Hash.from_xml(Net::HTTP.get(signed_url))["#{action}Response"]
|
76
|
+
end
|
77
|
+
|
78
|
+
def get_tag
|
79
|
+
params = {'Filter.1.Name' => 'resource-id',
|
80
|
+
'Filter.1.Value.1' => get_instance_id,
|
81
|
+
'Filter.2.Name' => 'key',
|
82
|
+
'Filter.2.Value.1' => 'Name'}
|
83
|
+
ec2_request('DescribeTags', params)['tagSet']['item']['value']
|
84
|
+
end
|
85
|
+
|
86
|
+
def get_instances
|
87
|
+
params = {'Filter.1.Name' => 'tag-value',
|
88
|
+
'Filter.1.Value.1' => @tag}
|
89
|
+
ec2_request('DescribeInstances', params)
|
90
|
+
end
|
91
|
+
|
92
|
+
def get_private_ips
|
93
|
+
get_instances['reservationSet']['item'].map{
|
94
|
+
|i|
|
95
|
+
item = i['instancesSet']['item']
|
96
|
+
item.is_a?(Array) ? item.map{|i| i['privateIpAddress']} :
|
97
|
+
item['privateIpAddress']
|
98
|
+
}.flatten
|
99
|
+
end
|
100
|
+
end
|
@@ -0,0 +1,69 @@
|
|
1
|
+
class Diagnostic::Base < Diagnostic::Request
|
2
|
+
extend Diagnostic::Packer
|
3
|
+
include ActionView::Helpers::TextHelper
|
4
|
+
|
5
|
+
# all diagnostics have `aggregatable` set to true.
|
6
|
+
# aggregatable indicates to the reporting mechanism that a diagnostic
|
7
|
+
# should be aggregated as these types of diagnostics are
|
8
|
+
# aggregated differently (or not at all).
|
9
|
+
class_attribute :aggregatable, :status_only
|
10
|
+
self.aggregatable = true
|
11
|
+
self.status_only = false
|
12
|
+
|
13
|
+
@@read_only = Marty::Util.db_in_recovery?
|
14
|
+
@@template = ActionController::Base.new.lookup_context.
|
15
|
+
find_template("marty/diagnostic/diag").identifier
|
16
|
+
|
17
|
+
def self.generate
|
18
|
+
raise "generate has not been defined for #{name}"
|
19
|
+
end
|
20
|
+
|
21
|
+
def self.fatal?
|
22
|
+
name == 'Diagnostic::Fatal'
|
23
|
+
end
|
24
|
+
|
25
|
+
def self.process_status_only infos
|
26
|
+
return infos unless status_only
|
27
|
+
infos.map{|info| info.map{|test, result| [test, result['status']]}.to_h}
|
28
|
+
end
|
29
|
+
|
30
|
+
def self.get_difference data
|
31
|
+
values = process_status_only(data.values)
|
32
|
+
Marty::DataExporter.hash_array_merge(values, true).map{
|
33
|
+
|test, values|
|
34
|
+
test if values.uniq.count > 1
|
35
|
+
}.compact
|
36
|
+
end
|
37
|
+
|
38
|
+
def self.apply_consistency data
|
39
|
+
diff = get_difference(data)
|
40
|
+
data.each_with_object({}){
|
41
|
+
|(node, diagnostic), new_data|
|
42
|
+
new_data[node] = diagnostic.each_with_object({}){
|
43
|
+
|(test, info), new_diagnostic|
|
44
|
+
new_diagnostic[test] = info + {'consistent' => !diff.include?(test)}
|
45
|
+
}
|
46
|
+
}
|
47
|
+
end
|
48
|
+
|
49
|
+
def self.consistent? data
|
50
|
+
process_status_only(data.values).uniq.count == 1
|
51
|
+
end
|
52
|
+
|
53
|
+
def self.display data
|
54
|
+
consistent = consistent?(data)
|
55
|
+
success = consistent && !fatal?
|
56
|
+
ERB.new(File.open(@@template).read).result(binding)
|
57
|
+
end
|
58
|
+
|
59
|
+
def self.display_info_css info
|
60
|
+
return 'inconsistent' if info.nil? || (info['status'] &&
|
61
|
+
info['consistent'] == false)
|
62
|
+
return 'error' unless info['status']
|
63
|
+
'passed'
|
64
|
+
end
|
65
|
+
|
66
|
+
def self.display_info_description info
|
67
|
+
new.simple_format(info ? info['description'] : 'N/A')
|
68
|
+
end
|
69
|
+
end
|
@@ -0,0 +1,10 @@
|
|
1
|
+
# used to group separate diagnostics into one diagnostic
|
2
|
+
class Diagnostic::Collection < Diagnostic::ByStatus
|
3
|
+
class_attribute :diagnostics
|
4
|
+
self.diagnostics = []
|
5
|
+
|
6
|
+
def self.generate
|
7
|
+
raise 'No diagnostics assigned to collection.' if diagnostics.empty?
|
8
|
+
diagnostics.map{|diagnostic| diagnostic.generate}.reduce(:deep_merge)
|
9
|
+
end
|
10
|
+
end
|
@@ -0,0 +1,10 @@
|
|
1
|
+
class Diagnostic::Collection < Diagnostic::Base
|
2
|
+
class_attribute :diagnostics
|
3
|
+
self.diagnostics = []
|
4
|
+
self.status_only = true
|
5
|
+
|
6
|
+
def self.generate
|
7
|
+
raise 'No diagnostics assigned to collection.' if diagnostics.empty?
|
8
|
+
diagnostics.map{|d| d.generate}.reduce(:deep_merge)
|
9
|
+
end
|
10
|
+
end
|
@@ -0,0 +1,48 @@
|
|
1
|
+
# DelayedJob is a unique diagnostic that creates a series of delayed jobs
|
2
|
+
# in the hopes that enough nodes will touch these jobs to determine
|
3
|
+
# if delayed job workers are running the latest application version
|
4
|
+
#
|
5
|
+
# `DELAYED_VER` environment variable should be set in the
|
6
|
+
# delayed jobs initializer.
|
7
|
+
#
|
8
|
+
class Diagnostic::DelayedJob < Diagnostic::Base
|
9
|
+
self.aggregatable = false
|
10
|
+
|
11
|
+
def self.generate
|
12
|
+
raise 'DelayedJob cannot be called with local scope.' if scope == 'local'
|
13
|
+
|
14
|
+
raise 'DELAYED_VER environment variable has not been initialized.' if
|
15
|
+
ENV['DELAYED_VER'].nil?
|
16
|
+
|
17
|
+
total_workers = delayed_worker_count
|
18
|
+
|
19
|
+
raise 'No delayed jobs are running.' if total_workers.zero?
|
20
|
+
|
21
|
+
# we will only iterate by half of the total delayed workers to avoid
|
22
|
+
# excess use of delayed job time
|
23
|
+
total_workers = (total_workers/2).zero? ? 1 : total_workers/2
|
24
|
+
|
25
|
+
d_engine = Marty::ScriptSet.new.get_engine("Diagnostics")
|
26
|
+
res = d_engine.
|
27
|
+
evaluate('VersionDelay', 'result', {'count' => total_workers-1})
|
28
|
+
|
29
|
+
# merge results, remove duplicates, and construct "aggregate" object
|
30
|
+
res.each_with_object({}){
|
31
|
+
|r, hash|
|
32
|
+
hash[r[0]] ||= []
|
33
|
+
hash[r[0]] << r[1]
|
34
|
+
}.map {
|
35
|
+
|node, result|
|
36
|
+
|
37
|
+
versions = result.uniq
|
38
|
+
status = versions.count == 1 && versions[0] == ENV['DELAYED_VER']
|
39
|
+
|
40
|
+
{node => {'Version' => create_info(versions.join("\n"), status)}}
|
41
|
+
}.reduce(:deep_merge)
|
42
|
+
end
|
43
|
+
|
44
|
+
def self.delayed_worker_count
|
45
|
+
Diagnostic::Node.get_postgres_connections[Diagnostic::Database.db_name].
|
46
|
+
count{|conn| conn['application_name'].include?('delayed_job')}
|
47
|
+
end
|
48
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
class Diagnostic::Env < Diagnostic::Base
|
2
|
+
def self.environment_variables filter=''
|
3
|
+
env = ENV.clone
|
4
|
+
|
5
|
+
# obfuscate SECRET_KEY_BASE for comparison
|
6
|
+
env['SECRET_KEY_BASE'] = env['SECRET_KEY_BASE'][0,4] if
|
7
|
+
env['SECRET_KEY_BASE']
|
8
|
+
|
9
|
+
# remove SCRIPT_URI, SCRIPT_URL as calling node differs
|
10
|
+
['SCRIPT_URI', 'SCRIPT_URL'].each{|k| env.delete(k)}
|
11
|
+
|
12
|
+
to_block = ['PASSWORD', 'DEBUG']
|
13
|
+
env.sort.each_with_object({}){|(k,v),h|
|
14
|
+
h[k] = v if to_block.all?{|b| !k.include?(b)} && k.include?(filter)}
|
15
|
+
end
|
16
|
+
|
17
|
+
def self.generate
|
18
|
+
pack do
|
19
|
+
environment_variables
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
# overwritten to only return inconsitent data
|
24
|
+
def self.apply_consistency data
|
25
|
+
diff = get_difference(data)
|
26
|
+
data.each_with_object({}){
|
27
|
+
|(node, diagnostic), new_data|
|
28
|
+
new_data[node] = diagnostic.each_with_object({}){
|
29
|
+
|(test, info), new_diagnostic|
|
30
|
+
new_diagnostic[test] = info + {'consistent' => false} if
|
31
|
+
diff.include?(test)
|
32
|
+
}
|
33
|
+
}
|
34
|
+
end
|
35
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
class Diagnostic::Environment < Diagnostic::Base
|
2
|
+
def self.generate
|
3
|
+
pack do
|
4
|
+
rbv = "#{RUBY_VERSION}-p#{RUBY_PATCHLEVEL} (#{RUBY_PLATFORM})"
|
5
|
+
{
|
6
|
+
'Environment' => Rails.env,
|
7
|
+
'Rails' => Rails.version,
|
8
|
+
'Netzke Core' => Netzke::Core::VERSION,
|
9
|
+
'Netzke Basepack' => Netzke::Basepack::VERSION,
|
10
|
+
'Ruby' => rbv,
|
11
|
+
'RubyGems' => Gem::VERSION,
|
12
|
+
'Database Adapter' => Diagnostic::Database.db_adapter_name,
|
13
|
+
'Database Server' => Diagnostic::Database.db_server_name,
|
14
|
+
'Database Version' => db_version,
|
15
|
+
'Database Schema Version' => db_schema
|
16
|
+
}
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
def self.db_version
|
21
|
+
begin
|
22
|
+
Diagnostic::Database.db_version
|
23
|
+
rescue => e
|
24
|
+
error(e.message)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
def self.db_schema
|
29
|
+
begin
|
30
|
+
Diagnostic::Database.db_schema
|
31
|
+
rescue => e
|
32
|
+
error(e.message)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
@@ -0,0 +1,12 @@
|
|
1
|
+
class Diagnostic::Fatal < Diagnostic::Base
|
2
|
+
def self.display_alert_message
|
3
|
+
'<h3 class="error">Something went wrong.</br>'\
|
4
|
+
'Consistency is checked between remaining nodes if applicable.</h3>'
|
5
|
+
end
|
6
|
+
|
7
|
+
def self.message msg, opts = {}
|
8
|
+
node = opts[:node] || Diagnostic::Node.my_ip
|
9
|
+
type = opts[:type] || 'RuntimeError'
|
10
|
+
{name => {node => {type => error(msg)}}}
|
11
|
+
end
|
12
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
class Diagnostic::Nodes < Diagnostic::Base
|
2
|
+
def self.generate
|
3
|
+
pack do
|
4
|
+
begin
|
5
|
+
a_nodes = Diagnostic::Aws::Ec2Instance.new.nodes.sort if
|
6
|
+
Diagnostic::Aws::Ec2Instance.is_aws?
|
7
|
+
rescue => e
|
8
|
+
a_nodes = [e.message]
|
9
|
+
end
|
10
|
+
pg_nodes = Diagnostic::Node.get_nodes.sort
|
11
|
+
a_nodes.nil? || pg_nodes == a_nodes ? pg_nodes.join("\n") :
|
12
|
+
error("There is a discrepancy between nodes connected to "\
|
13
|
+
"Postgres and those discovered through AWS EC2.\n"\
|
14
|
+
"Postgres: \n#{pg_nodes.join("\n")}\n"\
|
15
|
+
"AWS: \n#{a_nodes.join("\n")}")
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,108 @@
|
|
1
|
+
class Diagnostic::Reporter < Diagnostic::Request
|
2
|
+
class_attribute :reports, :diagnostics
|
3
|
+
|
4
|
+
self.reports = {}
|
5
|
+
self.diagnostics = []
|
6
|
+
|
7
|
+
def self.run request
|
8
|
+
self.request = request
|
9
|
+
|
10
|
+
ops = op.split(/,\s*/).uniq - [unresolve_diagnostic(self)]
|
11
|
+
reps = ops.select{|o| reports.keys.include?(o)}
|
12
|
+
|
13
|
+
self.diagnostics = ((ops - reps) + reps.map{|r| reports[r]}.flatten).uniq.
|
14
|
+
map{|d| resolve_diagnostic(d)}
|
15
|
+
|
16
|
+
self.scope == 'local' ? generate : aggregate
|
17
|
+
end
|
18
|
+
|
19
|
+
private
|
20
|
+
def self.resolve_diagnostic name
|
21
|
+
return name.constantize unless name.slice('Diagnostic::').nil?
|
22
|
+
('Diagnostic::' + name.downcase.camelize).constantize
|
23
|
+
end
|
24
|
+
|
25
|
+
def self.unresolve_diagnostic klass
|
26
|
+
klass.name.demodulize.underscore
|
27
|
+
end
|
28
|
+
|
29
|
+
def self.generate
|
30
|
+
diagnostics.each_with_object({}){
|
31
|
+
|d, h|
|
32
|
+
begin
|
33
|
+
h[d.name] = d.generate
|
34
|
+
rescue => e
|
35
|
+
h.deep_merge!(Diagnostic::Fatal.message(e.message, type: d.name))
|
36
|
+
end
|
37
|
+
}
|
38
|
+
end
|
39
|
+
|
40
|
+
def self.aggregate
|
41
|
+
data = consistency([generate, get_remote_diagnostics].reduce(:deep_merge))
|
42
|
+
{'data' => data, 'errors' => errors(data)}
|
43
|
+
end
|
44
|
+
|
45
|
+
def self.consistency data
|
46
|
+
data.each_with_object({}){
|
47
|
+
|(klass, result), h|
|
48
|
+
h[klass] = resolve_diagnostic(klass).apply_consistency(result)
|
49
|
+
}
|
50
|
+
end
|
51
|
+
|
52
|
+
def self.errors data
|
53
|
+
data.each_with_object({}){
|
54
|
+
|(klass, result), new_data|
|
55
|
+
new_data[klass] = result.each_with_object({}){
|
56
|
+
|(node, diagnostic), new_result|
|
57
|
+
new_result[node] = diagnostic.each_with_object({}){
|
58
|
+
|(test, info), new_diagnostic|
|
59
|
+
new_diagnostic[test] = info unless
|
60
|
+
info['status'] && (scope == 'local' || info['consistent'])
|
61
|
+
}
|
62
|
+
new_result.delete(node) if new_result[node].empty?
|
63
|
+
}
|
64
|
+
new_data.delete(klass) if new_data[klass].empty?
|
65
|
+
}
|
66
|
+
end
|
67
|
+
|
68
|
+
def self.displays result
|
69
|
+
result.map{|d, r| resolve_diagnostic(d).display(r)}.sum
|
70
|
+
end
|
71
|
+
|
72
|
+
def self.get_remote_diagnostics
|
73
|
+
ops = diagnostics.map{|d| unresolve_diagnostic(d) if d.aggregatable}.compact
|
74
|
+
return {} if ops.empty?
|
75
|
+
|
76
|
+
nodes = Diagnostic::Node.get_nodes - [Diagnostic::Node.my_ip]
|
77
|
+
remote = nodes.sort.map do |n|
|
78
|
+
Thread.new do
|
79
|
+
uri = Addressable::URI.new(host: n, port: request.port)
|
80
|
+
uri.scheme = ssl? ? 'https' : 'http'
|
81
|
+
uri.path = '/marty/diag.json'
|
82
|
+
uri.query_values = {
|
83
|
+
scope: 'local',
|
84
|
+
op: ops.join(','),
|
85
|
+
}
|
86
|
+
req = Net::HTTP.new(uri.host, uri.port)
|
87
|
+
req.use_ssl = ssl?
|
88
|
+
req.read_timeout = req.open_timeout = ENV['DIAG_TIMEOUT'] || 10
|
89
|
+
req.verify_mode = OpenSSL::SSL::VERIFY_NONE
|
90
|
+
|
91
|
+
begin
|
92
|
+
response = req.start {|http| http.get(uri.to_s)}
|
93
|
+
next JSON.parse(response.body) if response.code == "200"
|
94
|
+
|
95
|
+
Diagnostic::Fatal.message(response.body,
|
96
|
+
type: response.message,
|
97
|
+
node: uri.host)
|
98
|
+
rescue => e
|
99
|
+
Diagnostic::Fatal.message(e.message,
|
100
|
+
type: e.class,
|
101
|
+
node: uri.host)
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
remote.empty? ? {} : remote.map(&:join).map(&:value).reduce(:deep_merge)
|
107
|
+
end
|
108
|
+
end
|