ace-eye 0.6.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (114) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +38 -0
  3. data/.rspec +2 -0
  4. data/.travis.yml +6 -0
  5. data/CHANGES.md +77 -0
  6. data/Gemfile +6 -0
  7. data/LICENSE +22 -0
  8. data/README.md +212 -0
  9. data/Rakefile +35 -0
  10. data/bin/eye +5 -0
  11. data/bin/loader_eye +72 -0
  12. data/bin/runner +16 -0
  13. data/examples/dependency.eye +17 -0
  14. data/examples/notify.eye +19 -0
  15. data/examples/plugin/README.md +15 -0
  16. data/examples/plugin/main.eye +15 -0
  17. data/examples/plugin/plugin.rb +63 -0
  18. data/examples/process_thin.rb +29 -0
  19. data/examples/processes/em.rb +57 -0
  20. data/examples/processes/forking.rb +20 -0
  21. data/examples/processes/sample.rb +144 -0
  22. data/examples/processes/thin.ru +12 -0
  23. data/examples/puma.eye +29 -0
  24. data/examples/rbenv.eye +11 -0
  25. data/examples/sidekiq.eye +23 -0
  26. data/examples/test.eye +87 -0
  27. data/examples/thin-farm.eye +30 -0
  28. data/examples/unicorn.eye +39 -0
  29. data/eye.gemspec +40 -0
  30. data/lib/eye.rb +28 -0
  31. data/lib/eye/application.rb +73 -0
  32. data/lib/eye/checker.rb +258 -0
  33. data/lib/eye/checker/children_count.rb +44 -0
  34. data/lib/eye/checker/children_memory.rb +12 -0
  35. data/lib/eye/checker/cpu.rb +17 -0
  36. data/lib/eye/checker/cputime.rb +13 -0
  37. data/lib/eye/checker/file_ctime.rb +24 -0
  38. data/lib/eye/checker/file_size.rb +34 -0
  39. data/lib/eye/checker/file_touched.rb +15 -0
  40. data/lib/eye/checker/http.rb +96 -0
  41. data/lib/eye/checker/memory.rb +17 -0
  42. data/lib/eye/checker/nop.rb +6 -0
  43. data/lib/eye/checker/runtime.rb +18 -0
  44. data/lib/eye/checker/socket.rb +159 -0
  45. data/lib/eye/child_process.rb +101 -0
  46. data/lib/eye/cli.rb +185 -0
  47. data/lib/eye/cli/commands.rb +78 -0
  48. data/lib/eye/cli/render.rb +130 -0
  49. data/lib/eye/cli/server.rb +93 -0
  50. data/lib/eye/client.rb +32 -0
  51. data/lib/eye/config.rb +91 -0
  52. data/lib/eye/control.rb +2 -0
  53. data/lib/eye/controller.rb +54 -0
  54. data/lib/eye/controller/commands.rb +88 -0
  55. data/lib/eye/controller/helpers.rb +101 -0
  56. data/lib/eye/controller/load.rb +224 -0
  57. data/lib/eye/controller/options.rb +18 -0
  58. data/lib/eye/controller/send_command.rb +177 -0
  59. data/lib/eye/controller/status.rb +72 -0
  60. data/lib/eye/dsl.rb +53 -0
  61. data/lib/eye/dsl/application_opts.rb +39 -0
  62. data/lib/eye/dsl/chain.rb +12 -0
  63. data/lib/eye/dsl/child_process_opts.rb +13 -0
  64. data/lib/eye/dsl/config_opts.rb +55 -0
  65. data/lib/eye/dsl/group_opts.rb +32 -0
  66. data/lib/eye/dsl/helpers.rb +20 -0
  67. data/lib/eye/dsl/main.rb +51 -0
  68. data/lib/eye/dsl/opts.rb +151 -0
  69. data/lib/eye/dsl/process_opts.rb +36 -0
  70. data/lib/eye/dsl/pure_opts.rb +121 -0
  71. data/lib/eye/dsl/validation.rb +88 -0
  72. data/lib/eye/group.rb +140 -0
  73. data/lib/eye/group/chain.rb +81 -0
  74. data/lib/eye/loader.rb +10 -0
  75. data/lib/eye/local.rb +100 -0
  76. data/lib/eye/logger.rb +104 -0
  77. data/lib/eye/notify.rb +118 -0
  78. data/lib/eye/notify/jabber.rb +30 -0
  79. data/lib/eye/notify/mail.rb +48 -0
  80. data/lib/eye/process.rb +85 -0
  81. data/lib/eye/process/children.rb +60 -0
  82. data/lib/eye/process/commands.rb +280 -0
  83. data/lib/eye/process/config.rb +81 -0
  84. data/lib/eye/process/controller.rb +73 -0
  85. data/lib/eye/process/data.rb +78 -0
  86. data/lib/eye/process/monitor.rb +108 -0
  87. data/lib/eye/process/notify.rb +32 -0
  88. data/lib/eye/process/scheduler.rb +82 -0
  89. data/lib/eye/process/states.rb +86 -0
  90. data/lib/eye/process/states_history.rb +66 -0
  91. data/lib/eye/process/system.rb +97 -0
  92. data/lib/eye/process/trigger.rb +34 -0
  93. data/lib/eye/process/validate.rb +33 -0
  94. data/lib/eye/process/watchers.rb +66 -0
  95. data/lib/eye/reason.rb +20 -0
  96. data/lib/eye/server.rb +60 -0
  97. data/lib/eye/sigar.rb +5 -0
  98. data/lib/eye/system.rb +139 -0
  99. data/lib/eye/system_resources.rb +99 -0
  100. data/lib/eye/trigger.rb +136 -0
  101. data/lib/eye/trigger/check_dependency.rb +30 -0
  102. data/lib/eye/trigger/flapping.rb +41 -0
  103. data/lib/eye/trigger/stop_children.rb +17 -0
  104. data/lib/eye/trigger/transition.rb +15 -0
  105. data/lib/eye/trigger/wait_dependency.rb +49 -0
  106. data/lib/eye/utils.rb +45 -0
  107. data/lib/eye/utils/alive_array.rb +57 -0
  108. data/lib/eye/utils/celluloid_chain.rb +71 -0
  109. data/lib/eye/utils/celluloid_klass.rb +5 -0
  110. data/lib/eye/utils/leak_19.rb +10 -0
  111. data/lib/eye/utils/mini_active_support.rb +111 -0
  112. data/lib/eye/utils/pmap.rb +7 -0
  113. data/lib/eye/utils/tail.rb +20 -0
  114. metadata +398 -0
@@ -0,0 +1,108 @@
1
+ module Eye::Process::Monitor
2
+
3
+ private
4
+
5
+ def check_alive_with_refresh_pid_if_needed
6
+ if process_really_running?
7
+ return true
8
+
9
+ else
10
+ warn 'process not really running'
11
+ try_update_pid_from_file
12
+ end
13
+ end
14
+
15
+ def try_update_pid_from_file
16
+ # if pid file was rewritten
17
+ newpid = load_pid_from_file
18
+ if newpid != self.pid
19
+ info "process <#{self.pid}> changed pid to <#{newpid}>, updating..." if self.pid
20
+ self.pid = newpid
21
+
22
+ if process_really_running?
23
+ return true
24
+ else
25
+ warn "process <#{newpid}> was not found"
26
+ return false
27
+ end
28
+ else
29
+ debug 'process was not found'
30
+ return false
31
+ end
32
+ end
33
+
34
+ def check_alive
35
+ if up?
36
+
37
+ # check that process runned
38
+ unless process_really_running?
39
+ warn "check_alive: process <#{self.pid}> not found"
40
+ notify :info, 'crashed!'
41
+ clear_pid_file if control_pid? && self.pid && load_pid_from_file == self.pid
42
+
43
+ switch :crashed, Eye::Reason.new(:crashed)
44
+ else
45
+ # check that pid_file still here
46
+ ppid = failsafe_load_pid
47
+
48
+ if ppid != self.pid
49
+ msg = "check_alive: pid_file (#{self[:pid_file]}) changed by itself (<#{self.pid}> => <#{ppid}>)"
50
+ if control_pid?
51
+ msg += ", reverting to <#{self.pid}> (the pid_file is controlled by eye)"
52
+ unless failsafe_save_pid
53
+ msg += ", pid_file write failed! O_o"
54
+ end
55
+ else
56
+ changed_ago_s = Time.now - pid_file_ctime
57
+
58
+ if ppid == nil
59
+ msg += ", reverting to <#{self.pid}> (the pid_file is empty)"
60
+ unless failsafe_save_pid
61
+ msg += ", pid_file write failed! O_o"
62
+ end
63
+
64
+ elsif (changed_ago_s > self[:auto_update_pidfile_grace]) && process_pid_running?(ppid)
65
+ msg += ", trusting this change, and now monitor <#{ppid}>"
66
+ self.pid = ppid
67
+
68
+ elsif (changed_ago_s > self[:revert_fuckup_pidfile_grace])
69
+ msg += " over #{self[:revert_fuckup_pidfile_grace]}s ago, reverting to <#{self.pid}>, because <#{ppid}> not alive"
70
+ unless failsafe_save_pid
71
+ msg += ", pid_file write failed! O_o"
72
+ end
73
+
74
+ else
75
+ msg += ', ignoring self-managed pid change'
76
+ end
77
+ end
78
+
79
+ warn msg
80
+ end
81
+ end
82
+ end
83
+ end
84
+
85
+ def check_crash
86
+ if down?
87
+ if self[:keep_alive]
88
+ warn 'check crashed: process is down'
89
+
90
+ if self[:restore_in]
91
+ schedule_in self[:restore_in].to_f, :restore, Eye::Reason.new(:crashed)
92
+ else
93
+ schedule :restore, Eye::Reason.new(:crashed)
94
+ end
95
+ else
96
+ warn 'check crashed: process without keep_alive'
97
+ schedule :unmonitor, Eye::Reason.new(:crashed)
98
+ end
99
+ else
100
+ debug 'check crashed: skipped, process is not in down'
101
+ end
102
+ end
103
+
104
+ def restore
105
+ start if down?
106
+ end
107
+
108
+ end
@@ -0,0 +1,32 @@
1
+ module Eye::Process::Notify
2
+
3
+ # notify to user:
4
+ # 1) process crashed by itself, and we restart it [:info]
5
+ # 2) checker bounded to restart process [:warn]
6
+ # 3) flapping + switch to unmonitored [:error]
7
+
8
+ LEVELS = {:debug => 0, :info => 1, :warn => 2, :error => 3, :fatal => 4}
9
+
10
+ def notify(level, msg)
11
+ # logging it
12
+ error "NOTIFY: #{msg}" if ilevel(level) > ilevel(:info)
13
+
14
+ # send notifies
15
+ if self[:notify].present?
16
+ message = {:message => msg, :name => name,
17
+ :full_name => full_name, :pid => pid, :host => Eye::Local.host, :level => level,
18
+ :at => Time.now }
19
+
20
+ self[:notify].each do |contact, not_level|
21
+ Eye::Notify.notify(contact, message) if ilevel(level) >= ilevel(not_level)
22
+ end
23
+ end
24
+ end
25
+
26
+ private
27
+
28
+ def ilevel(level)
29
+ LEVELS[level].to_i
30
+ end
31
+
32
+ end
@@ -0,0 +1,82 @@
1
+ module Eye::Process::Scheduler
2
+
3
+ # ex: schedule :update_config, config, "reason: update_config"
4
+ def schedule(command, *args, &block)
5
+ if scheduler.alive?
6
+ unless self.respond_to?(command, true)
7
+ warn ":#{command} scheduling is unsupported"
8
+ return
9
+ end
10
+
11
+ reason = if args.present? && args[-1].kind_of?(Eye::Reason)
12
+ args.pop
13
+ end
14
+
15
+ info "schedule :#{command} #{reason ? "(reason: #{reason})" : nil}"
16
+
17
+ if reason.class == Eye::Reason
18
+ # for auto reasons
19
+ # skip already running commands and all in chain
20
+ scheduler.add_wo_dups_current(:scheduled_action, command, {:args => args, :reason => reason}, &block)
21
+ else
22
+ # for manual, or without reason
23
+ # skip only for last in chain
24
+ scheduler.add_wo_dups(:scheduled_action, command, {:args => args, :reason => reason}, &block)
25
+ end
26
+ end
27
+ end
28
+
29
+ def schedule_in(interval, command, *args, &block)
30
+ debug "schedule_in #{interval} :#{command} #{args}"
31
+ after(interval.to_f) do
32
+ debug "scheduled_in #{interval} :#{command} #{args}"
33
+ schedule(command, *args, &block)
34
+ end
35
+ end
36
+
37
+ def scheduled_action(command, h = {}, &block)
38
+ reason = h.delete(:reason)
39
+ info "=> #{command} #{h[:args].present? ? "#{h[:args]*',' }" : nil} #{reason ? "(reason: #{reason})" : nil}"
40
+
41
+ @current_scheduled_command = command
42
+ @last_scheduled_command = command
43
+ @last_scheduled_reason = reason
44
+ @last_scheduled_at = Time.now
45
+
46
+ send(command, *h[:args], &block)
47
+ @current_scheduled_command = nil
48
+ info "<= #{command}"
49
+
50
+ schedule_history.push(command, reason, @last_scheduled_at.to_i)
51
+ end
52
+
53
+ def scheduler_actions_list
54
+ scheduler.list.map{|c| c[:args].first rescue nil }.compact
55
+ end
56
+
57
+ def scheduler_clear_pending_list
58
+ scheduler.clear_pending_list
59
+ end
60
+
61
+ def self.included(base)
62
+ base.finalizer :remove_scheduler
63
+ end
64
+
65
+ attr_accessor :current_scheduled_command
66
+ attr_accessor :last_scheduled_command, :last_scheduled_reason, :last_scheduled_at
67
+
68
+ def schedule_history
69
+ @schedule_history ||= Eye::Process::StatesHistory.new(50)
70
+ end
71
+
72
+ private
73
+
74
+ def remove_scheduler
75
+ @scheduler.terminate if @scheduler && @scheduler.alive?
76
+ end
77
+
78
+ def scheduler
79
+ @scheduler ||= Eye::Utils::CelluloidChain.new(current_actor)
80
+ end
81
+
82
+ end
@@ -0,0 +1,86 @@
1
+ require 'state_machine'
2
+ require 'state_machine/version'
3
+
4
+ class Eye::Process
5
+ class StateError < Exception; end
6
+
7
+ # do transition
8
+ def switch(name, reason = nil)
9
+ @state_reason = reason || last_scheduled_reason
10
+ self.send("#{name}!")
11
+ end
12
+
13
+ state_machine :state, :initial => :unmonitored do
14
+ state :unmonitored, :up, :down
15
+ state :starting, :stopping, :restarting
16
+
17
+ event :starting do
18
+ transition [:unmonitored, :down] => :starting
19
+ end
20
+
21
+ event :already_running do
22
+ transition [:unmonitored, :down, :up] => :up
23
+ end
24
+
25
+ event :started do
26
+ transition :starting => :up
27
+ end
28
+
29
+ event :crashed do
30
+ transition [:starting, :restarting, :up] => :down
31
+ end
32
+
33
+ event :stopping do
34
+ transition [:up, :restarting] => :stopping
35
+ end
36
+
37
+ event :stopped do
38
+ transition :stopping => :down
39
+ end
40
+
41
+ event :cant_kill do
42
+ transition :stopping => :up
43
+ end
44
+
45
+ event :restarting do
46
+ transition [:unmonitored, :up, :down] => :restarting
47
+ end
48
+
49
+ event :restarted do
50
+ transition :restarting => :up
51
+ end
52
+
53
+ event :unmonitoring do
54
+ transition any => :unmonitored
55
+ end
56
+
57
+ after_transition any => any, :do => :log_transition
58
+ after_transition any => any, :do => :check_triggers
59
+
60
+ after_transition any => :unmonitored, :do => :on_unmonitored
61
+
62
+ after_transition any-:up => :up, :do => :add_watchers
63
+ after_transition :up => any-:up, :do => :remove_watchers
64
+
65
+ after_transition any-:up => :up, :do => :add_children
66
+ after_transition any => [:unmonitored, :down], :do => :remove_children
67
+
68
+ after_transition :on => :crashed, :do => :on_crashed
69
+ end
70
+
71
+ def on_crashed
72
+ schedule :check_crash, Eye::Reason.new(:crashed)
73
+ end
74
+
75
+ def on_unmonitored
76
+ self.pid = nil
77
+ end
78
+
79
+ def log_transition(transition)
80
+ if transition.to_name != transition.from_name || @state_reason.is_a?(Eye::Reason::User)
81
+ @states_history.push transition.to_name, @state_reason
82
+ info "switch :#{transition.event} [:#{transition.from_name} => :#{transition.to_name}] #{@state_reason ? "(reason: #{@state_reason})" : nil}"
83
+ end
84
+ end
85
+
86
+ end
@@ -0,0 +1,66 @@
1
+ class Eye::Process::StatesHistory < Eye::Utils::Tail
2
+
3
+ def push(state, reason = nil, tm = Time.now)
4
+ super(state: state, at: tm.to_i, reason: reason)
5
+ end
6
+
7
+ def states
8
+ self.map{|c| c[:state] }
9
+ end
10
+
11
+ def states_for_period(period, from_time = nil)
12
+ tm = Time.now - period
13
+ tm = [tm, from_time].max if from_time
14
+ tm = tm.to_f
15
+ self.select{|s| s[:at] >= tm }.map{|c| c[:state] }
16
+ end
17
+
18
+ def last_state
19
+ last[:state]
20
+ end
21
+
22
+ def last_reason
23
+ last[:reason] rescue nil
24
+ end
25
+
26
+ def last_state_changed_at
27
+ Time.at(last[:at])
28
+ end
29
+
30
+ def seq?(*seq)
31
+ str = states * ','
32
+ substr = seq.flatten * ','
33
+ str.include?(substr)
34
+ end
35
+
36
+ def end?(*seq)
37
+ str = states * ','
38
+ substr = seq.flatten * ','
39
+ str.end_with?(substr)
40
+ end
41
+
42
+ def any?(*seq)
43
+ states.any? do |st|
44
+ seq.flatten.include?(st)
45
+ end
46
+ end
47
+
48
+ def noone?(*seq)
49
+ !states.all? do |st|
50
+ seq.flatten.include?(st)
51
+ end
52
+ end
53
+
54
+ def all?(*seq)
55
+ states.all? do |st|
56
+ seq.flatten.include?(st)
57
+ end
58
+ end
59
+
60
+ def state_count(state)
61
+ states.count do |st|
62
+ st == state
63
+ end
64
+ end
65
+
66
+ end
@@ -0,0 +1,97 @@
1
+ require 'timeout'
2
+
3
+ module Eye::Process::System
4
+
5
+ def load_pid_from_file
6
+ if File.exists?(self[:pid_file_ex])
7
+ _pid = File.read(self[:pid_file_ex]).to_i
8
+ _pid > 0 ? _pid : nil
9
+ end
10
+ end
11
+
12
+ def set_pid_from_file
13
+ self.pid = load_pid_from_file
14
+ end
15
+
16
+ def save_pid_to_file
17
+ if self.pid
18
+ File.open(self[:pid_file_ex], 'w') do |f|
19
+ f.write self.pid
20
+ end
21
+ true
22
+ else
23
+ false
24
+ end
25
+ end
26
+
27
+ def clear_pid_file
28
+ info "delete pid_file: #{self[:pid_file_ex]}"
29
+ File.unlink(self[:pid_file_ex])
30
+ true
31
+ rescue
32
+ nil
33
+ end
34
+
35
+ def pid_file_ctime
36
+ File.ctime(self[:pid_file_ex]) rescue Time.now
37
+ end
38
+
39
+ def process_really_running?
40
+ process_pid_running?(self.pid)
41
+ end
42
+
43
+ def process_pid_running?(pid)
44
+ res = Eye::System.check_pid_alive(pid)
45
+ debug "process_really_running?: (#{pid}) #{res.inspect}"
46
+ !!res[:result]
47
+ end
48
+
49
+ def send_signal(code)
50
+ res = Eye::System.send_signal(self.pid, code)
51
+
52
+ msg = "send_signal #{code} to #{self.pid}"
53
+ msg += ", error<#{res[:error]}>" if res[:error]
54
+ info msg
55
+
56
+ res[:result] == :ok
57
+ end
58
+
59
+ def wait_for_condition(timeout, step = 0.1, &block)
60
+ res = nil
61
+ sumtime = 0
62
+
63
+ loop do
64
+ tm = Time.now
65
+ res = yield # note that yield can block actor here and timeout can be overhead
66
+ return res if res
67
+ sleep step.to_f
68
+ sumtime += (Time.now - tm)
69
+ return false if sumtime > timeout
70
+ end
71
+ end
72
+
73
+ def execute(cmd, cfg = {})
74
+ defer{ Eye::System::execute cmd, cfg }
75
+ end
76
+
77
+ def failsafe_load_pid
78
+ pid = load_pid_from_file
79
+
80
+ if !pid
81
+ # this is can be symlink changed case
82
+ sleep 0.1
83
+ pid = load_pid_from_file
84
+ end
85
+
86
+ pid
87
+ end
88
+
89
+ def failsafe_save_pid
90
+ save_pid_to_file
91
+ true
92
+ rescue => ex
93
+ log_ex(ex)
94
+ false
95
+ end
96
+
97
+ end