watchtower_job 1.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +14 -0
- data/.rspec +2 -0
- data/.travis.yml +5 -0
- data/Gemfile +6 -0
- data/Guardfile +17 -0
- data/README.md +36 -0
- data/Rakefile +6 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/exe/watchtower_job +63 -0
- data/initd_script_basic +42 -0
- data/initd_script_counts +42 -0
- data/lib/watchtower_job.rb +90 -0
- data/lib/watchtower_job/application.rb +153 -0
- data/lib/watchtower_job/data_feed.rb +92 -0
- data/lib/watchtower_job/event_collector.rb +45 -0
- data/lib/watchtower_job/execution_failure.rb +79 -0
- data/lib/watchtower_job/interface_execution.rb +72 -0
- data/lib/watchtower_job/kinesis_reader.rb +43 -0
- data/lib/watchtower_job/version.rb +3 -0
- data/lib/watchtower_job/vine_site_services.rb +43 -0
- data/lib/watchtower_job/workflow_counts.rb +54 -0
- data/send_an_event_for_each_failure_type.rb +30 -0
- data/watchtower_job.gemspec +39 -0
- metadata +282 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: b074e4831eec4322589f4562fd25bd2a364f45cd
|
4
|
+
data.tar.gz: aaf0d321d18885196a6efe189ac9f38ea7bd52c3
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: df6b2d939a5524cb29afa118f37c3921fece0ee090fada993aaeff3112763a0f64aedc532c2a2bf0117af2e546107b06fa7f127167d5acedfbde4b3108b64c28
|
7
|
+
data.tar.gz: e4849ce069b3700028c0d6c7884a237fbe1c789752f57d8ac496ca27b6cdbd1e354787742c8ff40bd7c1202b76217c05d9d45f49091d90030692dfdf41e212b5
|
data/.gitignore
ADDED
data/.rspec
ADDED
data/.travis.yml
ADDED
data/Gemfile
ADDED
data/Guardfile
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
# A sample Guardfile
|
2
|
+
# More info at https://github.com/guard/guard#readme
|
3
|
+
|
4
|
+
guard :rspec, cmd: "bundle exec rspec --order rand" do
|
5
|
+
require "guard/rspec/dsl"
|
6
|
+
dsl = Guard::RSpec::Dsl.new(self)
|
7
|
+
|
8
|
+
# Feel free to open issues for suggestions and improvements
|
9
|
+
|
10
|
+
# RSpec files
|
11
|
+
rspec = dsl.rspec
|
12
|
+
watch(rspec.spec_helper) { rspec.spec_dir }
|
13
|
+
watch(rspec.spec_support) { rspec.spec_dir }
|
14
|
+
watch(rspec.spec_files)
|
15
|
+
|
16
|
+
watch(/^lib\/(.*)\.rb/) { |m| "spec/#{m[1]}_spec.rb" }
|
17
|
+
end
|
data/README.md
ADDED
@@ -0,0 +1,36 @@
|
|
1
|
+
# WatchtowerJob
|
2
|
+
|
3
|
+
This gem will read from an Interface Execution event kinesis stream and post events to Einstien.
|
4
|
+
|
5
|
+
Alarm conditions described here: https://wiki.sdlc.appriss.com/display/EINSTEIN/WatchTower+Alarm+Types
|
6
|
+
|
7
|
+
## Installation
|
8
|
+
|
9
|
+
Add this line to your application's Gemfile:
|
10
|
+
|
11
|
+
```ruby
|
12
|
+
gem 'watchtower_job'
|
13
|
+
```
|
14
|
+
|
15
|
+
And then execute:
|
16
|
+
|
17
|
+
$ bundle
|
18
|
+
|
19
|
+
Or install it yourself as:
|
20
|
+
|
21
|
+
$ gem install watchtower_job
|
22
|
+
|
23
|
+
## Usage
|
24
|
+
|
25
|
+
This is expected to be a long-running process. Run the command in /exe. Use the -h command line arg to understand what options can be used with the running of this gem for choosing the NT2 environment and which Einstein instance to use.
|
26
|
+
|
27
|
+
## Development
|
28
|
+
|
29
|
+
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
30
|
+
|
31
|
+
To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
|
32
|
+
|
33
|
+
## Contributing
|
34
|
+
|
35
|
+
This source is kept in Appriss's BitBucket repository.
|
36
|
+
|
data/Rakefile
ADDED
data/bin/console
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require "bundler/setup"
|
4
|
+
require "watchtower_job"
|
5
|
+
|
6
|
+
# You can add fixtures and/or initialization code here to make experimenting
|
7
|
+
# with your gem easier. You can also use a different console, if you like.
|
8
|
+
|
9
|
+
# (If you use this, don't forget to add pry to your Gemfile!)
|
10
|
+
# require "pry"
|
11
|
+
# Pry.start
|
12
|
+
|
13
|
+
require "irb"
|
14
|
+
IRB.start(__FILE__)
|
data/bin/setup
ADDED
data/exe/watchtower_job
ADDED
@@ -0,0 +1,63 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# -*- mode: ruby -*-
|
3
|
+
$:.unshift File.join(File.dirname(__FILE__), "..", "lib")
|
4
|
+
|
5
|
+
require "watchtower_job"
|
6
|
+
require 'slop'
|
7
|
+
require 'einstein_environment'
|
8
|
+
require 'syslog/logger'
|
9
|
+
|
10
|
+
opts = Slop.parse(ARGV, :help => true) do
|
11
|
+
banner <<-EOS
|
12
|
+
*****************
|
13
|
+
watchtower_job listens to the NT2 Interface Execution Kinesis stream and posts Data Feed NT2 events to Einstein. This app runs indefinitely.
|
14
|
+
Version: #{WatchtowerJob::VERSION}
|
15
|
+
*****************
|
16
|
+
Usage:
|
17
|
+
watchtower_job [options]
|
18
|
+
where [options] are:
|
19
|
+
EOS
|
20
|
+
on :l, :log_level=, "Set to debug, info, warn, error, or fatal.", :default => "debug"
|
21
|
+
on :o, :log_file=, "File to log output to. By default logs to stdout.", :default => nil
|
22
|
+
on :n, :nt2_env=, "Environment from which to read NT2 Interface Execution Kinesis stream", :default => Einstein::Environment::Environment.get_appriss_environment
|
23
|
+
on :e, :einstein_env=, "Environment of Einstein for posting events", :default => Einstein::Environment::Environment.get_appriss_environment
|
24
|
+
on :c, :counts_only, 'Do not send errors, only counts of successful runs', :default => false
|
25
|
+
on :p, :pid_file=, "File to which the current pid will be written", :default => nil
|
26
|
+
on :s, :sequence_file=, "File from which the last read squence_number will be read and written", :default => nil
|
27
|
+
on :d, :datafeed_state_file=, "File from which the data feed state will be read and written", :default => nil
|
28
|
+
end
|
29
|
+
exit 0 if opts.help?
|
30
|
+
|
31
|
+
if pid_file_name = opts[:pid_file]
|
32
|
+
if File.exists?(pid_file_name)
|
33
|
+
file = File.open(pid_file_name)
|
34
|
+
file.flock(File::LOCK_EX | File::LOCK_NB) or abort "WatchTower is already running using #{pid_file_name} to track it's status."
|
35
|
+
end
|
36
|
+
WatchtowerJob.write_pid(pid_file_name)
|
37
|
+
end
|
38
|
+
|
39
|
+
#setup logging
|
40
|
+
log_name = opts[:log_file]
|
41
|
+
if log_name
|
42
|
+
log = Syslog::Logger.new(log_name)
|
43
|
+
else
|
44
|
+
log = Logger.new($stdout)
|
45
|
+
end
|
46
|
+
log.level = Logger.const_get(opts[:log_level].upcase)
|
47
|
+
if log_name
|
48
|
+
log.formatter = proc do |severity, datetime, progname, msg|
|
49
|
+
datetime = datetime.strftime('%m/%d/%yT%H:%M')
|
50
|
+
"[#{datetime}] #{$$} #{severity} -- #{msg}\n"
|
51
|
+
end
|
52
|
+
else
|
53
|
+
# Give a more concise format when logging to STDOUT
|
54
|
+
log.formatter = proc do |severity, datetime, progname, msg|
|
55
|
+
"#{severity}: #{msg}\n"
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
WatchtowerJob.set_logger(log)
|
60
|
+
WatchtowerJob.set_nt2_environment(opts[:nt2_env])
|
61
|
+
WatchtowerJob.set_einstein_environment(opts[:einstein_env])
|
62
|
+
|
63
|
+
WatchtowerJob::Application.new(skip_errors:opts[:counts_only], last_run_file: opts[:sequence_file], datafeed_state_file: opts[:datafeed_state_file]).run
|
data/initd_script_basic
ADDED
@@ -0,0 +1,42 @@
|
|
1
|
+
# This script must be located within the /etc/init.d directory
|
2
|
+
# this should be copied as watchtower_job
|
3
|
+
|
4
|
+
### BEGIN INIT INFO
|
5
|
+
# Provides: watchtower_job
|
6
|
+
# Required-Start: $all
|
7
|
+
# Required-Stop: $local_fs $remote_fs $network
|
8
|
+
# Default-Start: 3 5
|
9
|
+
# Default-Stop: 0 1 2 6
|
10
|
+
# Short-Description: WatchTower Job
|
11
|
+
# Description: Generates Einstein events based on interface executions
|
12
|
+
### END INIT INFO
|
13
|
+
|
14
|
+
. /etc/rc.status
|
15
|
+
rc_reset
|
16
|
+
|
17
|
+
DUSER=root
|
18
|
+
DGROUP=root
|
19
|
+
|
20
|
+
case "$1" in
|
21
|
+
start)
|
22
|
+
echo -n 'Starting WatchTower Job: '
|
23
|
+
nohup env AWS_SECRET_ACCESS_KEY=qs1DA4TvNqLMxmDv5wt6GW0jBS+rb/R99j1Ubkz2 AWS_ACCESS_KEY_ID=AKIAJ3LAV3OLVBKJEMMQ watchtower_job -o watchtower_job -n prod -p /var/tmp/watchtower_job.pid -s /var/tmp/watchtower_job.last -d /var/tmp/watchtower_job_datafeed_state.yml > /dev/null 2>&1&
|
24
|
+
rc_status -v
|
25
|
+
;;
|
26
|
+
stop)
|
27
|
+
echo -n 'Stopping WatchTower Job: '
|
28
|
+
kill `cat /var/tmp/watchtower_job.pid`
|
29
|
+
rc_status -v
|
30
|
+
;;
|
31
|
+
status)
|
32
|
+
if ! ps -p `cat /var/tmp/watchtower_job.pid` > /dev/null 2>&1; then echo "not running">&2; else echo "running">&2; fi
|
33
|
+
;;
|
34
|
+
restart)
|
35
|
+
$0 stop
|
36
|
+
$0 start
|
37
|
+
;;
|
38
|
+
*)
|
39
|
+
echo "Usage: $0 {start|stop|restart|status}"
|
40
|
+
exit 1
|
41
|
+
esac
|
42
|
+
rc_exit
|
data/initd_script_counts
ADDED
@@ -0,0 +1,42 @@
|
|
1
|
+
# This script must be located within the /etc/init.d directory
|
2
|
+
# this should be copied as watchtower_job_prep_counts
|
3
|
+
|
4
|
+
### BEGIN INIT INFO
|
5
|
+
# Provides: watchtower_job
|
6
|
+
# Required-Start: $all
|
7
|
+
# Required-Stop: $local_fs $remote_fs $network
|
8
|
+
# Default-Start: 3 5
|
9
|
+
# Default-Stop: 0 1 2 6
|
10
|
+
# Short-Description: WatchTower Job Prep Counts
|
11
|
+
# Description: Generates Einstein events based on interface executions
|
12
|
+
### END INIT INFO
|
13
|
+
|
14
|
+
. /etc/rc.status
|
15
|
+
rc_reset
|
16
|
+
|
17
|
+
DUSER=root
|
18
|
+
DGROUP=root
|
19
|
+
|
20
|
+
case "$1" in
|
21
|
+
start)
|
22
|
+
echo -n 'Starting WatchTower Job Prep Counts: '
|
23
|
+
nohup env AWS_SECRET_ACCESS_KEY=qs1DA4TvNqLMxmDv5wt6GW0jBS+rb/R99j1Ubkz2 AWS_ACCESS_KEY_ID=AKIAJ3LAV3OLVBKJEMMQ watchtower_job -o watchtower_job_prep_counts -n prep -p /var/tmp/watchtower_job_prep_counts.pid -s /var/tmp/watchtower_job_prep_counts.last -c > /dev/null 2>&1&
|
24
|
+
rc_status -v
|
25
|
+
;;
|
26
|
+
stop)
|
27
|
+
echo -n 'Stopping WatchTower Job Prep Counts: '
|
28
|
+
kill `cat /var/tmp/watchtower_job_prep_counts.pid`
|
29
|
+
rc_status -v
|
30
|
+
;;
|
31
|
+
status)
|
32
|
+
if ! ps -p `cat /var/tmp/watchtower_job_prep_counts.pid` > /dev/null 2>&1; then echo "not running">&2; else echo "running">&2; fi
|
33
|
+
;;
|
34
|
+
restart)
|
35
|
+
$0 stop
|
36
|
+
$0 start
|
37
|
+
;;
|
38
|
+
*)
|
39
|
+
echo "Usage: $0 {start|stop|restart|status}"
|
40
|
+
exit 1
|
41
|
+
esac
|
42
|
+
rc_exit
|
@@ -0,0 +1,90 @@
|
|
1
|
+
require 'watchtower_job/version'
|
2
|
+
require 'watchtower_job/data_feed'
|
3
|
+
require 'watchtower_job/execution_failure'
|
4
|
+
require 'watchtower_job/interface_execution'
|
5
|
+
require 'watchtower_job/event_collector'
|
6
|
+
require 'watchtower_job/vine_site_services'
|
7
|
+
require 'watchtower_job/kinesis_reader'
|
8
|
+
require 'watchtower_job/workflow_counts'
|
9
|
+
require 'watchtower_job/application'
|
10
|
+
require 'net/https'
|
11
|
+
require 'logger'
|
12
|
+
require 'json'
|
13
|
+
|
14
|
+
module WatchtowerJob
|
15
|
+
class DataFeedNotCompleted < RuntimeError; end
|
16
|
+
|
17
|
+
@@nt2_env = nil
|
18
|
+
@@nt2_service = nil
|
19
|
+
@@log = nil
|
20
|
+
@@event_collector_url = nil
|
21
|
+
@@einstein_env = nil
|
22
|
+
@@dfr2_api_url = nil
|
23
|
+
|
24
|
+
def self.write_pid(file_name)
|
25
|
+
File.open(file_name, 'w') { |file| file.write(Process.pid) }
|
26
|
+
end
|
27
|
+
|
28
|
+
def self.nt2_service
|
29
|
+
return @@nt2_service unless @@nt2_service.nil?
|
30
|
+
raise 'Set NT2 Environment!' if @@nt2_env.nil?
|
31
|
+
uri = URI.parse "https://web-external-#{@@nt2_env}.nt2.appriss.com"
|
32
|
+
@@nt2_service = Net::HTTP.new(uri.host, uri.port)
|
33
|
+
@@nt2_service.use_ssl = true
|
34
|
+
@@nt2_service
|
35
|
+
end
|
36
|
+
|
37
|
+
def self.dfr2_api_url
|
38
|
+
raise 'Set NT2 Environment!' if @@nt2_env.nil?
|
39
|
+
@@dfr2_api_url ||=
|
40
|
+
case @@nt2_env
|
41
|
+
when 'dev', 'qa'
|
42
|
+
'https://dfr2-api.dev.awstools.appriss.com'
|
43
|
+
when 'prep', 'prod'
|
44
|
+
'https://dfr2-api.prod.awstools.appriss.com'
|
45
|
+
when 'test'
|
46
|
+
'https://dfr2-api.test.awstools.appriss.com'
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
def self.vine_site_service
|
51
|
+
raise 'Set NT2 Environment!' if @@nt2_env.nil?
|
52
|
+
@@vine_services ||= VineSiteServices.new
|
53
|
+
end
|
54
|
+
|
55
|
+
def self.nt2_env
|
56
|
+
@@nt2_env
|
57
|
+
end
|
58
|
+
|
59
|
+
def self.set_nt2_environment(env)
|
60
|
+
@@nt2_service = nil
|
61
|
+
@@vine_services = nil
|
62
|
+
@@dfr2_api_url = nil
|
63
|
+
@@nt2_env = env.downcase
|
64
|
+
end
|
65
|
+
|
66
|
+
def self.einstein_event_collector
|
67
|
+
raise 'Set Einstein Environment!' if @@einstein_env.nil?
|
68
|
+
@@event_collector ||= EventCollector.new("https://events-watchtower-job.#{@@einstein_env}.awstools.appriss.com/")
|
69
|
+
end
|
70
|
+
|
71
|
+
def self.set_einstein_environment(env)
|
72
|
+
@@event_collector = nil
|
73
|
+
@@einstein_env = env.downcase
|
74
|
+
end
|
75
|
+
|
76
|
+
def self.log
|
77
|
+
return @@log unless @@log.nil?
|
78
|
+
#default to stdout
|
79
|
+
@@log = Logger.new($stdout)
|
80
|
+
@@log.level = Logger::DEBUG
|
81
|
+
@@log.formatter = proc do |severity, datetime, progname, msg|
|
82
|
+
"#{severity}: #{msg}\n"
|
83
|
+
end
|
84
|
+
@@log
|
85
|
+
end
|
86
|
+
|
87
|
+
def self.set_logger(logger)
|
88
|
+
@@log = logger
|
89
|
+
end
|
90
|
+
end
|
@@ -0,0 +1,153 @@
|
|
1
|
+
require 'einstein_aws_keys'
|
2
|
+
require 'aws-sdk'
|
3
|
+
require 'yaml'
|
4
|
+
|
5
|
+
module WatchtowerJob
|
6
|
+
class Application
|
7
|
+
CHECK_IN_FREQUENCY = 5 * 60 #5 minutes
|
8
|
+
def initialize(skip_errors:false,sleep_time:30,last_run_file:nil,datafeed_state_file:nil)
|
9
|
+
@datafeed_state_file = datafeed_state_file
|
10
|
+
@data_feeds = {}
|
11
|
+
get_datafeed_state
|
12
|
+
@sleep_time = sleep_time
|
13
|
+
@skip_errors = skip_errors
|
14
|
+
@last_run_file = last_run_file
|
15
|
+
s3 = Aws::S3::Resource.new(Einstein::AwsKeys::AwsKeys.get_keys.merge(region:'us-east-1'))
|
16
|
+
config = YAML.load s3.bucket('cttools-watchtower').object("configs/kinesis_keys.yaml").get.body.string
|
17
|
+
@stream = KinesisReader.new(config[WatchtowerJob.nt2_env], last_sequence_number)
|
18
|
+
@workflow_counter = WorkflowCounts.new
|
19
|
+
end
|
20
|
+
|
21
|
+
def run
|
22
|
+
startup_time = Time.now
|
23
|
+
post_check_in(@last_check_in = startup_time)
|
24
|
+
@stream.read do |message|
|
25
|
+
check_in
|
26
|
+
if message.nil?
|
27
|
+
WatchtowerJob.log.debug "Read nothing, sleep for #{@sleep_time}"
|
28
|
+
sleep @sleep_time
|
29
|
+
next
|
30
|
+
end
|
31
|
+
message_data = JSON.parse(message['data'])
|
32
|
+
|
33
|
+
if (InterfaceExecution.required_values - message_data.keys).length != 0
|
34
|
+
WatchtowerJob.log.info "Ignoring non-InterfaceExecution message: #{message}"
|
35
|
+
next
|
36
|
+
end
|
37
|
+
|
38
|
+
interface_execution = InterfaceExecution.new(message_data)
|
39
|
+
if interface_execution.success? && interface_execution.end_time >= startup_time #prevent sending large counts at startup
|
40
|
+
@workflow_counter.count(interface_execution.workflow)
|
41
|
+
end
|
42
|
+
|
43
|
+
next if @skip_errors
|
44
|
+
|
45
|
+
data_feed = get_feed(interface_execution.data_feed_id)
|
46
|
+
if data_feed.nil?
|
47
|
+
#not implemented, do nothing with this execution
|
48
|
+
WatchtowerJob.log.debug "Data Feed #{interface_execution.data_feed_id} implementation status is not \"Completed\""
|
49
|
+
next
|
50
|
+
end
|
51
|
+
WatchtowerJob.log.debug "Read Interface Execution for #{interface_execution.data_feed_id}"
|
52
|
+
data_feed.handle_latest_execution(interface_execution)
|
53
|
+
end
|
54
|
+
rescue SignalException => e
|
55
|
+
WatchtowerJob.log.fatal "Received #{e.message} Signal.... Shutting down..."
|
56
|
+
keep_last_sequence_number
|
57
|
+
keep_datafeed_state
|
58
|
+
rescue => e
|
59
|
+
WatchtowerJob.log.fatal "Crashed! - #{e.class}: #{e.message}"
|
60
|
+
WatchtowerJob.log.debug "Backtrace:\n#{e.backtrace.join('\n')}"
|
61
|
+
keep_last_sequence_number
|
62
|
+
keep_datafeed_state
|
63
|
+
end
|
64
|
+
|
65
|
+
def keep_last_sequence_number
|
66
|
+
return unless @last_run_file
|
67
|
+
File.open(@last_run_file, 'w') { |file| file.write(@stream.last_sequence_number) }
|
68
|
+
WatchtowerJob.log.info "Saved last sequence number. (#{@stream.last_sequence_number})"
|
69
|
+
end
|
70
|
+
|
71
|
+
def last_sequence_number(file=@last_run_file)
|
72
|
+
return nil if file.nil?
|
73
|
+
if File.exist?(file)
|
74
|
+
last_sequence = File.read(file)
|
75
|
+
WatchtowerJob.log.info "Stream's last sequence number: (#{last_sequence})"
|
76
|
+
return last_sequence
|
77
|
+
end
|
78
|
+
WatchtowerJob.log.info "#{file} does not exist -- will not use a last_sequence"
|
79
|
+
return nil
|
80
|
+
end
|
81
|
+
|
82
|
+
def keep_datafeed_state
|
83
|
+
return unless @datafeed_state_file
|
84
|
+
cleared_list = @data_feeds.select do |id,data_feed|
|
85
|
+
data_feed.errors == 0 && !data_feed.never_been_cleared
|
86
|
+
end
|
87
|
+
File.open(@datafeed_state_file, 'w') { |file| file.write(cleared_list.keys.to_yaml) }
|
88
|
+
WatchtowerJob.log.info "Saved datafeed state: (#{cleared_list.keys})"
|
89
|
+
end
|
90
|
+
|
91
|
+
def get_datafeed_state(file=@datafeed_state_file)
|
92
|
+
return nil if file.nil?
|
93
|
+
if File.exist?(file)
|
94
|
+
datafeed_list = YAML.load File.read(file)
|
95
|
+
WatchtowerJob.log.info "found #{datafeed_list.count} datafeeds, beginning load..."
|
96
|
+
threads = []
|
97
|
+
in_groups(datafeed_list,3).each_with_index do |list,t_index|
|
98
|
+
threads << Thread.new do
|
99
|
+
list.each_with_index do |id,index|
|
100
|
+
WatchtowerJob.log.info "thread:#{(t_index + 1)} Fetching(#{(t_index + 1) * index}): #{id} ..."
|
101
|
+
get_feed(id,false)
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
105
|
+
threads.map(&:join)
|
106
|
+
WatchtowerJob.log.info "datafeed state is: (#{@data_feeds.keys})"
|
107
|
+
else
|
108
|
+
WatchtowerJob.log.info "#{file} does not exist -- will start with empty state"
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
def get_feed(id,never_been_cleared=true)
|
113
|
+
@data_feeds[id] ||= DataFeed.new(id,never_been_cleared)
|
114
|
+
rescue DataFeedNotCompleted
|
115
|
+
WatchtowerJob.log.info "data feed not in completed state, ignoring"
|
116
|
+
return nil
|
117
|
+
end
|
118
|
+
|
119
|
+
def check_in
|
120
|
+
check_in_time = Time.now
|
121
|
+
if (check_in_time - @last_check_in).to_i > CHECK_IN_FREQUENCY
|
122
|
+
post_check_in(check_in_time)
|
123
|
+
@workflow_counter.send_counts
|
124
|
+
@last_check_in = check_in_time
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
128
|
+
def post_check_in(check_in_time)
|
129
|
+
event_type = "#{WatchtowerJob.nt2_env.upcase}.check_in"
|
130
|
+
data = WatchtowerJob.einstein_event_collector.event_body(
|
131
|
+
config_item_type: 'Einstein Component',
|
132
|
+
config_item_name: 'Watchtower Job',
|
133
|
+
event_type: event_type,
|
134
|
+
value: 'set',
|
135
|
+
start_time: check_in_time
|
136
|
+
)
|
137
|
+
WatchtowerJob.einstein_event_collector.post_event(data)
|
138
|
+
end
|
139
|
+
|
140
|
+
def in_groups(array,number)
|
141
|
+
group_size = array.size / number
|
142
|
+
leftovers = array.size % number
|
143
|
+
groups = []
|
144
|
+
start = 0
|
145
|
+
number.times do |index|
|
146
|
+
length = group_size + (leftovers > 0 && leftovers > index ? 1 : 0)
|
147
|
+
groups << array.slice(start, length)
|
148
|
+
start += length
|
149
|
+
end
|
150
|
+
groups
|
151
|
+
end
|
152
|
+
end
|
153
|
+
end
|
@@ -0,0 +1,92 @@
|
|
1
|
+
module WatchtowerJob
|
2
|
+
class DataFeed
|
3
|
+
ERROR_THRESHOLD = 7
|
4
|
+
MAESTRO_ERROR_THRESHOLD = 4
|
5
|
+
|
6
|
+
attr_reader :errors, :id, :never_been_cleared
|
7
|
+
|
8
|
+
def initialize(id,never_been_cleared=true)
|
9
|
+
@id = id
|
10
|
+
details = JSON.parse retrieve_data_feed_details(id)
|
11
|
+
@data_feed_attributes = details['data']['attributes']
|
12
|
+
raise DataFeedNotCompleted unless @data_feed_attributes['deployed-environments'][WatchtowerJob.nt2_env]['implementation-status-cd'] == 'Completed'
|
13
|
+
@errors = 0
|
14
|
+
@never_been_cleared = never_been_cleared
|
15
|
+
end
|
16
|
+
|
17
|
+
def handle_latest_execution(execution)
|
18
|
+
WatchtowerJob.log.debug "Run #{execution.success? ? 'successful' : 'failed'}."
|
19
|
+
if execution.success?
|
20
|
+
@errors = 0
|
21
|
+
if @never_been_cleared
|
22
|
+
first_clear
|
23
|
+
else
|
24
|
+
clear_current_failure
|
25
|
+
end
|
26
|
+
else
|
27
|
+
@errors += 1
|
28
|
+
return unless failure = determine_error(execution)
|
29
|
+
set_current_failure(failure)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def set_current_failure(failure)
|
34
|
+
if failure > @current_failure
|
35
|
+
WatchtowerJob.log.debug "Set alarm to #{failure.name}"
|
36
|
+
clear_current_failure
|
37
|
+
@current_failure = failure
|
38
|
+
failure.send_start_event
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
def first_clear
|
43
|
+
WatchtowerJob.log.debug "Clearing All Possible Failures"
|
44
|
+
@current_failure = nil
|
45
|
+
@never_been_cleared = false
|
46
|
+
ExecutionFailure.clear_all_failures_possible(id)
|
47
|
+
end
|
48
|
+
|
49
|
+
def clear_current_failure
|
50
|
+
if @current_failure
|
51
|
+
WatchtowerJob.log.debug "Clearing #{@current_failure.name}"
|
52
|
+
failure = @current_failure
|
53
|
+
@current_failure = nil
|
54
|
+
failure.send_clear_event
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
def determine_error(execution)
|
59
|
+
#please make sure you update the wiki as you make changes here
|
60
|
+
if execution.error_messages_include?(/Failed during Getsend/, -2)
|
61
|
+
return ExecutionFailure.new_getsend_minus_two(execution)
|
62
|
+
elsif execution.error_messages_include?(/Could not start interface\. Error communicating with the gateway\./)
|
63
|
+
return ExecutionFailure.new_gateway_error_failure(execution)
|
64
|
+
elsif execution.message =~ /Incomplete previous run detected/ #&& execution.sherlock_feed?
|
65
|
+
return ExecutionFailure.new_incomplete_previous_run(execution)
|
66
|
+
elsif execution.error_messages_include?(/Failed during Getsend/, -7) && execution.imports_enabled?
|
67
|
+
return ExecutionFailure.new_getsend_minus_seven(execution)
|
68
|
+
elsif @errors >= MAESTRO_ERROR_THRESHOLD && execution.error_messages_include?(/Interface was killed automatically, run time exceeded/) && photo_feed?
|
69
|
+
return ExecutionFailure.new_maestro_max_runtime_exceeded(execution)
|
70
|
+
elsif execution.message =~ /Failed during Import/
|
71
|
+
return ExecutionFailure.new_failed_import(execution)
|
72
|
+
elsif @errors >= ERROR_THRESHOLD
|
73
|
+
return ExecutionFailure.new_repeated_errors(execution)
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
def photo_feed?
|
78
|
+
@is_photo_feed ||= @data_feed_attributes['data-types'] == ['Photo']
|
79
|
+
end
|
80
|
+
|
81
|
+
def retrieve_data_feed_details(id)
|
82
|
+
retries ||= 0
|
83
|
+
sleep(retries ** 2)
|
84
|
+
RestClient.get "#{WatchtowerJob.dfr2_api_url}/data_feeds/#{id}?include_config=false", :content_type => 'application/json'
|
85
|
+
rescue => e
|
86
|
+
retry if (retries += 1) <= 4
|
87
|
+
WatchtowerJob.log.error e.message
|
88
|
+
WatchtowerJob.log.error "error with GET from dfr2-api, retry #{retries}"
|
89
|
+
raise e
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
@@ -0,0 +1,45 @@
|
|
1
|
+
require 'einstein_aws_keys'
|
2
|
+
|
3
|
+
module WatchtowerJob
|
4
|
+
class EventCollector
|
5
|
+
def initialize(collector_url)
|
6
|
+
uri = URI.parse(collector_url)
|
7
|
+
@collector_service = Net::HTTP.new(uri.host, uri.port)
|
8
|
+
@collector_service.use_ssl = (uri.scheme == 'https')
|
9
|
+
@component_password = Einstein::AwsKeys::AwsKeys.get_component_password
|
10
|
+
end
|
11
|
+
|
12
|
+
def post_event(event_body)
|
13
|
+
retries ||= 0
|
14
|
+
WatchtowerJob.log.debug "Event post to #{event_body[:config_item_type]}:#{event_body[:config_item_name]}, type: #{event_body[:event_type]}, value: #{event_body[:value]}"
|
15
|
+
post = Net::HTTP::Post.new("/events", {'Content-Type' =>'application/json'})
|
16
|
+
post.body = event_body.to_json
|
17
|
+
post.basic_auth('watchtower_job', '49adf188447b20daaa5f45a960ec1001')
|
18
|
+
sleep(retries ** 2)
|
19
|
+
response = @collector_service.start {|https| https.request(post)}
|
20
|
+
raise "Einstein Event failed to post! Status: #{response.code} Body: #{response.body}" unless response.code == "200"
|
21
|
+
response
|
22
|
+
rescue => e
|
23
|
+
retry if (retries += 1) <= 4
|
24
|
+
WatchtowerJob.log.error e.message
|
25
|
+
WatchtowerJob.log.error "error with POST to einstein events, retry #{retries}"
|
26
|
+
raise e
|
27
|
+
end
|
28
|
+
|
29
|
+
def event_body(config_item_type:,config_item_name:,event_type:,value:,start_time:Time.now,alternate_key:nil,info:{})
|
30
|
+
body = {
|
31
|
+
:source => 'watch_tower_job',
|
32
|
+
:config_item_type => config_item_type,
|
33
|
+
:config_item_name => config_item_name,
|
34
|
+
:event_type => event_type,
|
35
|
+
:start_ts => start_time,
|
36
|
+
:value => value,
|
37
|
+
:info => info
|
38
|
+
}
|
39
|
+
if alternate_key
|
40
|
+
body.merge!({:alternate_name_key => alternate_key})
|
41
|
+
end
|
42
|
+
body
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
@@ -0,0 +1,79 @@
|
|
1
|
+
module WatchtowerJob
|
2
|
+
class ExecutionFailure
|
3
|
+
attr_reader :name, :data_feed_id, :failure_start, :priority, :start_value, :clear_value
|
4
|
+
def initialize(name,data_feed_id,failure_start,priority,start_value='set',clear_value='clear')
|
5
|
+
@name,@data_feed_id,@failure_start,@priority,@start_value,@clear_value = name,data_feed_id,failure_start,priority,start_value,clear_value
|
6
|
+
end
|
7
|
+
|
8
|
+
def send_start_event
|
9
|
+
send_to_einstein(start_value,@failure_start)
|
10
|
+
end
|
11
|
+
|
12
|
+
def send_clear_event(clear_time=Time.now)
|
13
|
+
send_to_einstein(clear_value, clear_time)
|
14
|
+
end
|
15
|
+
|
16
|
+
def <=>(other)
|
17
|
+
return 1 if other.nil?
|
18
|
+
@priority <=> other.priority
|
19
|
+
end
|
20
|
+
|
21
|
+
def >(other)
|
22
|
+
(self <=> other) == 1
|
23
|
+
end
|
24
|
+
|
25
|
+
def self.clear_all_failures_possible(data_feed_id)
|
26
|
+
ExecutionFailure.failure_helper_methods.each do |new_failure|
|
27
|
+
generic_failure = ExecutionFailure.send(new_failure, data_feed_id: data_feed_id, failure_start: nil)
|
28
|
+
generic_failure.send_clear_event
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
def self.failure_helper_methods
|
33
|
+
self.methods.select{|m| m.to_s =~ /^new_/}
|
34
|
+
end
|
35
|
+
|
36
|
+
def self.new_getsend_minus_two(interface_execution=nil,data_feed_id:interface_execution.data_feed_id,failure_start:interface_execution.end_time)
|
37
|
+
ExecutionFailure.new('Getsend -2', data_feed_id, failure_start, 0)
|
38
|
+
end
|
39
|
+
|
40
|
+
def self.new_gateway_error_failure(interface_execution=nil,data_feed_id:interface_execution.data_feed_id,failure_start:interface_execution.end_time)
|
41
|
+
ExecutionFailure.new('Gateway Error Alarm', data_feed_id, failure_start, 10)
|
42
|
+
end
|
43
|
+
|
44
|
+
def self.new_getsend_minus_seven(interface_execution=nil,data_feed_id:interface_execution.data_feed_id,failure_start:interface_execution.end_time)
|
45
|
+
ExecutionFailure.new('Getsend -7', data_feed_id, failure_start, 20)
|
46
|
+
end
|
47
|
+
|
48
|
+
def self.new_incomplete_previous_run(interface_execution=nil,data_feed_id:interface_execution.data_feed_id,failure_start:interface_execution.end_time)
|
49
|
+
ExecutionFailure.new('Incomplete Previous Run', data_feed_id, failure_start, 30)
|
50
|
+
end
|
51
|
+
|
52
|
+
def self.new_maestro_max_runtime_exceeded(interface_execution=nil,data_feed_id:interface_execution.data_feed_id,failure_start:interface_execution.end_time)
|
53
|
+
ExecutionFailure.new('Maestro Max Runtime Exceeded', data_feed_id, failure_start, 50)
|
54
|
+
end
|
55
|
+
|
56
|
+
def self.new_failed_import(interface_execution=nil,data_feed_id:interface_execution.data_feed_id,failure_start:interface_execution.end_time)
|
57
|
+
ExecutionFailure.new('Failed Import', data_feed_id, failure_start, 70)
|
58
|
+
end
|
59
|
+
|
60
|
+
def self.new_repeated_errors(interface_execution=nil,data_feed_id:interface_execution.data_feed_id,failure_start:interface_execution.end_time)
|
61
|
+
ExecutionFailure.new('Repeated Errors', data_feed_id, failure_start, 100)
|
62
|
+
end
|
63
|
+
|
64
|
+
private
|
65
|
+
|
66
|
+
def send_to_einstein(value,time)
|
67
|
+
data = WatchtowerJob.einstein_event_collector.event_body(
|
68
|
+
config_item_type:'Data Feed NT2',
|
69
|
+
config_item_name:@data_feed_id,
|
70
|
+
alternate_key:'data_feed_id',
|
71
|
+
event_type:@name.downcase,
|
72
|
+
value:value,
|
73
|
+
start_time:time
|
74
|
+
)
|
75
|
+
WatchtowerJob.einstein_event_collector.post_event(data)
|
76
|
+
end
|
77
|
+
|
78
|
+
end
|
79
|
+
end
|
@@ -0,0 +1,72 @@
|
|
1
|
+
module WatchtowerJob
|
2
|
+
class InterfaceExecution
|
3
|
+
def self.required_values
|
4
|
+
%w(datafeedId runEndTime interfaceExecutionId interfaceId interfaceVersion site type message success)
|
5
|
+
end
|
6
|
+
|
7
|
+
attr_reader :end_time, :data_feed_id, :interface_version, :message, :workflow
|
8
|
+
def initialize(execution_message)
|
9
|
+
@data_feed_id = execution_message['datafeedId']
|
10
|
+
@interface_version = execution_message['interfaceVersion']
|
11
|
+
@message = execution_message['message']
|
12
|
+
@workflow = execution_message['type']
|
13
|
+
@success = execution_message['success']
|
14
|
+
@site, @agency = execution_message['site'], execution_message['agency']
|
15
|
+
@interface_id = execution_message['interfaceId']
|
16
|
+
@execution_id = execution_message['interfaceExecutionId']
|
17
|
+
@end_time = Time.at(execution_message['runEndTime']/1000)
|
18
|
+
end
|
19
|
+
|
20
|
+
def success?
|
21
|
+
@success
|
22
|
+
end
|
23
|
+
|
24
|
+
def errors
|
25
|
+
@errors ||= JSON.parse(retreive_errors)
|
26
|
+
end
|
27
|
+
|
28
|
+
def retreive_errors
|
29
|
+
retries ||= 0
|
30
|
+
sleep(retries ** 2)
|
31
|
+
response = WatchtowerJob.nt2_service.start do |https|
|
32
|
+
https.request(Net::HTTP::Get.new("/interface_execution/#{@execution_id}/errors"))
|
33
|
+
end
|
34
|
+
raise "GET of NT2 errors failed. Status: #{response.code} Body: #{response.body}" unless response.code == "200"
|
35
|
+
response.body
|
36
|
+
rescue => e
|
37
|
+
retry if (retries += 1) <= 4
|
38
|
+
WatchtowerJob.log.error e.message
|
39
|
+
WatchtowerJob.log.error "error with GET from NT2 errors, retry #{retries}"
|
40
|
+
raise e
|
41
|
+
end
|
42
|
+
|
43
|
+
def error_messages_include?(regex,error_code=nil)
|
44
|
+
errors.each do |error|
|
45
|
+
if error['errorDescription'] =~ regex
|
46
|
+
#if error_code is available, it must also match
|
47
|
+
if !error_code || error['errorCode'] == error_code
|
48
|
+
return true
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
return false
|
53
|
+
end
|
54
|
+
|
55
|
+
def sherlock_feed?
|
56
|
+
['SHERLOCK', 'SHERLOCK_FILE'].include? @workflow
|
57
|
+
end
|
58
|
+
|
59
|
+
def imports_enabled?
|
60
|
+
site_operationals = WatchtowerJob.vine_site_service.get_details(@site)['operationals']['vine']
|
61
|
+
return false unless operationals_support_imports?(site_operationals)
|
62
|
+
return true unless @agency
|
63
|
+
agency_operationals = WatchtowerJob.vine_site_service.get_details(@site,@agency)['operationals']['vine']
|
64
|
+
operationals_support_imports?(agency_operationals)
|
65
|
+
end
|
66
|
+
|
67
|
+
def operationals_support_imports?(vine_operationals)
|
68
|
+
return false if vine_operationals['master']['disabled'] || !vine_operationals['master']['available']
|
69
|
+
vine_operationals['import']['available'] && !vine_operationals['import']['disabled']
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
@@ -0,0 +1,43 @@
|
|
1
|
+
require 'aws-sdk'
|
2
|
+
|
3
|
+
module WatchtowerJob
|
4
|
+
class KinesisReader
|
5
|
+
attr_reader :last_sequence_number
|
6
|
+
def initialize(config,last_sequence_number=nil)
|
7
|
+
@config = config
|
8
|
+
@last_sequence_number = last_sequence_number
|
9
|
+
end
|
10
|
+
|
11
|
+
def get_iterator
|
12
|
+
description = client.describe_stream({stream_name: stream_name})
|
13
|
+
start_shard = description.stream_description.shards.first
|
14
|
+
if @last_sequence_number
|
15
|
+
return client.get_shard_iterator(stream_name: stream_name, shard_id: start_shard.shard_id,shard_iterator_type:"AFTER_SEQUENCE_NUMBER",starting_sequence_number: @last_sequence_number).shard_iterator
|
16
|
+
end
|
17
|
+
client.get_shard_iterator(stream_name: stream_name, shard_id: start_shard.shard_id,shard_iterator_type:"TRIM_HORIZON").shard_iterator
|
18
|
+
end
|
19
|
+
|
20
|
+
def read(iterator=get_iterator,&block)
|
21
|
+
while (records = client.get_records(shard_iterator:iterator,limit:10)) do
|
22
|
+
iterator = records[:next_shard_iterator]
|
23
|
+
records[:records].each do |message|
|
24
|
+
WatchtowerJob.log.debug "Stream Message: #{message}"
|
25
|
+
yield message.dup
|
26
|
+
@last_sequence_number = message['sequence_number']
|
27
|
+
end
|
28
|
+
if records[:records].empty?
|
29
|
+
yield nil
|
30
|
+
end
|
31
|
+
end
|
32
|
+
return false #it's done, but it should never be done!
|
33
|
+
end
|
34
|
+
|
35
|
+
def client
|
36
|
+
return @client ||= Aws::Kinesis::Client.new(@config)
|
37
|
+
end
|
38
|
+
|
39
|
+
def stream_name
|
40
|
+
@stream_name ||= client.list_streams[:stream_names].find{|name| name =~ /#{WatchtowerJob.nt2_env}/}
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
@@ -0,0 +1,43 @@
|
|
1
|
+
require 'hawk'
|
2
|
+
require 'rest-client'
|
3
|
+
|
4
|
+
module WatchtowerJob
|
5
|
+
class VineSiteServices
|
6
|
+
SERVICES_ID = 'mock'
|
7
|
+
SERVICES_KEY = 'mock'
|
8
|
+
SERVICES_USER = 'watchtower'
|
9
|
+
|
10
|
+
def host
|
11
|
+
"site-#{WatchtowerJob.nt2_env}.vineserv.appriss.com"
|
12
|
+
end
|
13
|
+
|
14
|
+
def build_hawk_header(path)
|
15
|
+
time_in_millis = Time.now.to_f.round
|
16
|
+
Hawk::Client.build_authorization_header(
|
17
|
+
:credentials => { :id => SERVICES_ID, :key => SERVICES_KEY, :algorithm => 'sha256' },
|
18
|
+
:ts => time_in_millis, :method => 'GET', :request_uri => path,
|
19
|
+
:host => host, :port => 443, :ext => "userId=#{SERVICES_USER}"
|
20
|
+
)
|
21
|
+
end
|
22
|
+
|
23
|
+
def get_details(site,agency=nil)
|
24
|
+
path = "/sites/#{site}"
|
25
|
+
if agency
|
26
|
+
path += "/agencies/#{agency}"
|
27
|
+
end
|
28
|
+
hawk_auth = build_hawk_header(path)
|
29
|
+
JSON.parse retrieve_agency_details(path,hawk_auth)
|
30
|
+
end
|
31
|
+
|
32
|
+
def retrieve_agency_details(path,hawk_auth)
|
33
|
+
retries ||= 0
|
34
|
+
sleep(retries ** 2)
|
35
|
+
RestClient.get "https://#{host}#{path}", :content_type => 'application/json', :Authorization => hawk_auth
|
36
|
+
rescue => e
|
37
|
+
retry if (retries += 1) <= 4
|
38
|
+
WatchtowerJob.log.error e.message
|
39
|
+
WatchtowerJob.log.error "error with GET from vine services, retry #{retries}"
|
40
|
+
raise e
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
@@ -0,0 +1,54 @@
|
|
1
|
+
module WatchtowerJob
|
2
|
+
class WorkflowCounts
|
3
|
+
WORKFLOWS = %w(GETSEND FTPPROC SHERLOCK SHERLOCK_FILE) #these are the only ones to monitor
|
4
|
+
def initialize
|
5
|
+
@workflows = Hash.new
|
6
|
+
reset_workflows_hash
|
7
|
+
end
|
8
|
+
|
9
|
+
def count(workflow_type)
|
10
|
+
unless WORKFLOWS.include?(workflow_type)
|
11
|
+
WatchtowerJob.log.warn("#{workflow_type} is unsupported workflow type!") unless WORKFLOWS.include?(workflow_type)
|
12
|
+
return nil
|
13
|
+
end
|
14
|
+
@workflows[lookup_ci_name(workflow_type)] += 1
|
15
|
+
end
|
16
|
+
|
17
|
+
def send_counts
|
18
|
+
@workflows.each do |config_item_name, count|
|
19
|
+
WatchtowerJob.einstein_event_collector.post_event(
|
20
|
+
WatchtowerJob.einstein_event_collector.event_body(
|
21
|
+
config_item_type: 'Service',
|
22
|
+
config_item_name: config_item_name,
|
23
|
+
event_type: 'successful runs',
|
24
|
+
value: count
|
25
|
+
)
|
26
|
+
)
|
27
|
+
end
|
28
|
+
reset_workflows_hash
|
29
|
+
end
|
30
|
+
|
31
|
+
def lookup_ci_name(workflow)
|
32
|
+
name =
|
33
|
+
case workflow
|
34
|
+
when 'GETSEND'
|
35
|
+
'NT2.GetsendWorkflow'
|
36
|
+
when 'FTPPROC'
|
37
|
+
'NT2.FTPProcWorkflow'
|
38
|
+
when 'SHERLOCK', 'SHERLOCK_FILE'
|
39
|
+
'NT2.SherlockWorkflow'
|
40
|
+
else
|
41
|
+
return nil
|
42
|
+
end
|
43
|
+
return "PREP.#{name}" if WatchtowerJob.nt2_env == 'prep'
|
44
|
+
name
|
45
|
+
end
|
46
|
+
|
47
|
+
private
|
48
|
+
def reset_workflows_hash
|
49
|
+
WORKFLOWS.map{|wf| lookup_ci_name(wf)}.uniq.each do |ci|
|
50
|
+
@workflows[ci] = 0
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
require 'watchtower_job'
|
2
|
+
|
3
|
+
WatchtowerJob.set_einstein_environment(ARGV.shift || 'dev')
|
4
|
+
|
5
|
+
class WatchtowerJob::EventCollector
|
6
|
+
alias original_event_body event_body
|
7
|
+
def event_body(**args)
|
8
|
+
body = original_event_body(args)
|
9
|
+
body[:info]['einstein-metadata'] = {note: 'This was generated manually to get events into Einstein. If you have questions, please contact pbecker.'}
|
10
|
+
body
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
fake_execution_run = {
|
15
|
+
'datafeedId' => 1700000002, #this is "Charles Test Sherlock"
|
16
|
+
'runEndTime' => (Time.now - 60).to_i * 1000,
|
17
|
+
'interfaceExecutionId' => 0,
|
18
|
+
'interfaceId' => 1700000002,
|
19
|
+
'interfaceVersion' => 0,
|
20
|
+
'site' => 'FAKE',
|
21
|
+
'type' => 'Watchtower Test',
|
22
|
+
'message' => 'Watchtower Test',
|
23
|
+
'success' => true
|
24
|
+
}
|
25
|
+
|
26
|
+
ie = WatchtowerJob::InterfaceExecution.new(fake_execution_run)
|
27
|
+
methods_to_generate_failures = (WatchtowerJob::ExecutionFailure.methods - Object.methods).select{|sym| sym.to_s =~ /^new/}
|
28
|
+
execution_failures = methods_to_generate_failures.map{|method_name| WatchtowerJob::ExecutionFailure.send(method_name, ie)}
|
29
|
+
execution_failures.map(&:send_start_event)
|
30
|
+
execution_failures.map(&:send_clear_event)
|
@@ -0,0 +1,39 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'watchtower_job/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "watchtower_job"
|
8
|
+
spec.version = WatchtowerJob::VERSION
|
9
|
+
spec.authors = ["Philip Becker"]
|
10
|
+
spec.email = ["pbecker@appriss.com"]
|
11
|
+
|
12
|
+
spec.summary = %q{WatchTower observes the health of Data Feed interface executions.}
|
13
|
+
spec.description = %q{Reads the interface execution kinesis stream for NT2. Then it uses that execution info combined with information from VINE Services to determine if this is an error state. If it is, it sends a message to Einstein. It also clears Einstein alarms related to tracked errors. State is not tracked between runs, so if the job fails it won't manage previously created errors.}
|
14
|
+
|
15
|
+
|
16
|
+
spec.files = `git ls-files -z`.split("\x0").reject do |f|
|
17
|
+
f.match(%r{^(test|spec|features)/})
|
18
|
+
end
|
19
|
+
spec.bindir = "exe"
|
20
|
+
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
21
|
+
spec.require_paths = ["lib"]
|
22
|
+
|
23
|
+
spec.add_development_dependency "bundler", "~> 1.14"
|
24
|
+
spec.add_development_dependency "rake", "~> 10.0"
|
25
|
+
spec.add_development_dependency "rspec", "~> 3.0"
|
26
|
+
spec.add_development_dependency "guard"
|
27
|
+
spec.add_development_dependency 'guard-rspec'
|
28
|
+
spec.add_development_dependency 'webmock'
|
29
|
+
spec.add_development_dependency 'byebug'
|
30
|
+
spec.add_development_dependency 'awesome_print'
|
31
|
+
|
32
|
+
spec.add_dependency 'aws-sdk', '~> 2.7.15'
|
33
|
+
spec.add_dependency 'hawk-auth'
|
34
|
+
spec.add_dependency 'rest-client'
|
35
|
+
spec.add_dependency 'einstein_aws_keys'
|
36
|
+
spec.add_dependency 'einstein_environment'
|
37
|
+
spec.add_dependency 'slop', '~> 3.6.0'
|
38
|
+
spec.add_dependency 'SyslogLogger', '~> 2.0'
|
39
|
+
end
|
metadata
ADDED
@@ -0,0 +1,282 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: watchtower_job
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.3.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Philip Becker
|
8
|
+
autorequire:
|
9
|
+
bindir: exe
|
10
|
+
cert_chain: []
|
11
|
+
date: 2018-04-17 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: bundler
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.14'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.14'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rake
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '10.0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '10.0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rspec
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '3.0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '3.0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: guard
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: guard-rspec
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - ">="
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0'
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - ">="
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: webmock
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - ">="
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '0'
|
90
|
+
type: :development
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - ">="
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '0'
|
97
|
+
- !ruby/object:Gem::Dependency
|
98
|
+
name: byebug
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
100
|
+
requirements:
|
101
|
+
- - ">="
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: '0'
|
104
|
+
type: :development
|
105
|
+
prerelease: false
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - ">="
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: '0'
|
111
|
+
- !ruby/object:Gem::Dependency
|
112
|
+
name: awesome_print
|
113
|
+
requirement: !ruby/object:Gem::Requirement
|
114
|
+
requirements:
|
115
|
+
- - ">="
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: '0'
|
118
|
+
type: :development
|
119
|
+
prerelease: false
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
121
|
+
requirements:
|
122
|
+
- - ">="
|
123
|
+
- !ruby/object:Gem::Version
|
124
|
+
version: '0'
|
125
|
+
- !ruby/object:Gem::Dependency
|
126
|
+
name: aws-sdk
|
127
|
+
requirement: !ruby/object:Gem::Requirement
|
128
|
+
requirements:
|
129
|
+
- - "~>"
|
130
|
+
- !ruby/object:Gem::Version
|
131
|
+
version: 2.7.15
|
132
|
+
type: :runtime
|
133
|
+
prerelease: false
|
134
|
+
version_requirements: !ruby/object:Gem::Requirement
|
135
|
+
requirements:
|
136
|
+
- - "~>"
|
137
|
+
- !ruby/object:Gem::Version
|
138
|
+
version: 2.7.15
|
139
|
+
- !ruby/object:Gem::Dependency
|
140
|
+
name: hawk-auth
|
141
|
+
requirement: !ruby/object:Gem::Requirement
|
142
|
+
requirements:
|
143
|
+
- - ">="
|
144
|
+
- !ruby/object:Gem::Version
|
145
|
+
version: '0'
|
146
|
+
type: :runtime
|
147
|
+
prerelease: false
|
148
|
+
version_requirements: !ruby/object:Gem::Requirement
|
149
|
+
requirements:
|
150
|
+
- - ">="
|
151
|
+
- !ruby/object:Gem::Version
|
152
|
+
version: '0'
|
153
|
+
- !ruby/object:Gem::Dependency
|
154
|
+
name: rest-client
|
155
|
+
requirement: !ruby/object:Gem::Requirement
|
156
|
+
requirements:
|
157
|
+
- - ">="
|
158
|
+
- !ruby/object:Gem::Version
|
159
|
+
version: '0'
|
160
|
+
type: :runtime
|
161
|
+
prerelease: false
|
162
|
+
version_requirements: !ruby/object:Gem::Requirement
|
163
|
+
requirements:
|
164
|
+
- - ">="
|
165
|
+
- !ruby/object:Gem::Version
|
166
|
+
version: '0'
|
167
|
+
- !ruby/object:Gem::Dependency
|
168
|
+
name: einstein_aws_keys
|
169
|
+
requirement: !ruby/object:Gem::Requirement
|
170
|
+
requirements:
|
171
|
+
- - ">="
|
172
|
+
- !ruby/object:Gem::Version
|
173
|
+
version: '0'
|
174
|
+
type: :runtime
|
175
|
+
prerelease: false
|
176
|
+
version_requirements: !ruby/object:Gem::Requirement
|
177
|
+
requirements:
|
178
|
+
- - ">="
|
179
|
+
- !ruby/object:Gem::Version
|
180
|
+
version: '0'
|
181
|
+
- !ruby/object:Gem::Dependency
|
182
|
+
name: einstein_environment
|
183
|
+
requirement: !ruby/object:Gem::Requirement
|
184
|
+
requirements:
|
185
|
+
- - ">="
|
186
|
+
- !ruby/object:Gem::Version
|
187
|
+
version: '0'
|
188
|
+
type: :runtime
|
189
|
+
prerelease: false
|
190
|
+
version_requirements: !ruby/object:Gem::Requirement
|
191
|
+
requirements:
|
192
|
+
- - ">="
|
193
|
+
- !ruby/object:Gem::Version
|
194
|
+
version: '0'
|
195
|
+
- !ruby/object:Gem::Dependency
|
196
|
+
name: slop
|
197
|
+
requirement: !ruby/object:Gem::Requirement
|
198
|
+
requirements:
|
199
|
+
- - "~>"
|
200
|
+
- !ruby/object:Gem::Version
|
201
|
+
version: 3.6.0
|
202
|
+
type: :runtime
|
203
|
+
prerelease: false
|
204
|
+
version_requirements: !ruby/object:Gem::Requirement
|
205
|
+
requirements:
|
206
|
+
- - "~>"
|
207
|
+
- !ruby/object:Gem::Version
|
208
|
+
version: 3.6.0
|
209
|
+
- !ruby/object:Gem::Dependency
|
210
|
+
name: SyslogLogger
|
211
|
+
requirement: !ruby/object:Gem::Requirement
|
212
|
+
requirements:
|
213
|
+
- - "~>"
|
214
|
+
- !ruby/object:Gem::Version
|
215
|
+
version: '2.0'
|
216
|
+
type: :runtime
|
217
|
+
prerelease: false
|
218
|
+
version_requirements: !ruby/object:Gem::Requirement
|
219
|
+
requirements:
|
220
|
+
- - "~>"
|
221
|
+
- !ruby/object:Gem::Version
|
222
|
+
version: '2.0'
|
223
|
+
description: Reads the interface execution kinesis stream for NT2. Then it uses that
|
224
|
+
execution info combined with information from VINE Services to determine if this
|
225
|
+
is an error state. If it is, it sends a message to Einstein. It also clears Einstein
|
226
|
+
alarms related to tracked errors. State is not tracked between runs, so if the
|
227
|
+
job fails it won't manage previously created errors.
|
228
|
+
email:
|
229
|
+
- pbecker@appriss.com
|
230
|
+
executables:
|
231
|
+
- watchtower_job
|
232
|
+
extensions: []
|
233
|
+
extra_rdoc_files: []
|
234
|
+
files:
|
235
|
+
- ".gitignore"
|
236
|
+
- ".rspec"
|
237
|
+
- ".travis.yml"
|
238
|
+
- Gemfile
|
239
|
+
- Guardfile
|
240
|
+
- README.md
|
241
|
+
- Rakefile
|
242
|
+
- bin/console
|
243
|
+
- bin/setup
|
244
|
+
- exe/watchtower_job
|
245
|
+
- initd_script_basic
|
246
|
+
- initd_script_counts
|
247
|
+
- lib/watchtower_job.rb
|
248
|
+
- lib/watchtower_job/application.rb
|
249
|
+
- lib/watchtower_job/data_feed.rb
|
250
|
+
- lib/watchtower_job/event_collector.rb
|
251
|
+
- lib/watchtower_job/execution_failure.rb
|
252
|
+
- lib/watchtower_job/interface_execution.rb
|
253
|
+
- lib/watchtower_job/kinesis_reader.rb
|
254
|
+
- lib/watchtower_job/version.rb
|
255
|
+
- lib/watchtower_job/vine_site_services.rb
|
256
|
+
- lib/watchtower_job/workflow_counts.rb
|
257
|
+
- send_an_event_for_each_failure_type.rb
|
258
|
+
- watchtower_job.gemspec
|
259
|
+
homepage:
|
260
|
+
licenses: []
|
261
|
+
metadata: {}
|
262
|
+
post_install_message:
|
263
|
+
rdoc_options: []
|
264
|
+
require_paths:
|
265
|
+
- lib
|
266
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
267
|
+
requirements:
|
268
|
+
- - ">="
|
269
|
+
- !ruby/object:Gem::Version
|
270
|
+
version: '0'
|
271
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
272
|
+
requirements:
|
273
|
+
- - ">="
|
274
|
+
- !ruby/object:Gem::Version
|
275
|
+
version: '0'
|
276
|
+
requirements: []
|
277
|
+
rubyforge_project:
|
278
|
+
rubygems_version: 2.5.1
|
279
|
+
signing_key:
|
280
|
+
specification_version: 4
|
281
|
+
summary: WatchTower observes the health of Data Feed interface executions.
|
282
|
+
test_files: []
|