ace-eye 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (114) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +38 -0
  3. data/.rspec +2 -0
  4. data/.travis.yml +6 -0
  5. data/CHANGES.md +77 -0
  6. data/Gemfile +6 -0
  7. data/LICENSE +22 -0
  8. data/README.md +212 -0
  9. data/Rakefile +35 -0
  10. data/bin/eye +5 -0
  11. data/bin/loader_eye +72 -0
  12. data/bin/runner +16 -0
  13. data/examples/dependency.eye +17 -0
  14. data/examples/notify.eye +19 -0
  15. data/examples/plugin/README.md +15 -0
  16. data/examples/plugin/main.eye +15 -0
  17. data/examples/plugin/plugin.rb +63 -0
  18. data/examples/process_thin.rb +29 -0
  19. data/examples/processes/em.rb +57 -0
  20. data/examples/processes/forking.rb +20 -0
  21. data/examples/processes/sample.rb +144 -0
  22. data/examples/processes/thin.ru +12 -0
  23. data/examples/puma.eye +29 -0
  24. data/examples/rbenv.eye +11 -0
  25. data/examples/sidekiq.eye +23 -0
  26. data/examples/test.eye +87 -0
  27. data/examples/thin-farm.eye +30 -0
  28. data/examples/unicorn.eye +39 -0
  29. data/eye.gemspec +40 -0
  30. data/lib/eye.rb +28 -0
  31. data/lib/eye/application.rb +73 -0
  32. data/lib/eye/checker.rb +258 -0
  33. data/lib/eye/checker/children_count.rb +44 -0
  34. data/lib/eye/checker/children_memory.rb +12 -0
  35. data/lib/eye/checker/cpu.rb +17 -0
  36. data/lib/eye/checker/cputime.rb +13 -0
  37. data/lib/eye/checker/file_ctime.rb +24 -0
  38. data/lib/eye/checker/file_size.rb +34 -0
  39. data/lib/eye/checker/file_touched.rb +15 -0
  40. data/lib/eye/checker/http.rb +96 -0
  41. data/lib/eye/checker/memory.rb +17 -0
  42. data/lib/eye/checker/nop.rb +6 -0
  43. data/lib/eye/checker/runtime.rb +18 -0
  44. data/lib/eye/checker/socket.rb +159 -0
  45. data/lib/eye/child_process.rb +101 -0
  46. data/lib/eye/cli.rb +185 -0
  47. data/lib/eye/cli/commands.rb +78 -0
  48. data/lib/eye/cli/render.rb +130 -0
  49. data/lib/eye/cli/server.rb +93 -0
  50. data/lib/eye/client.rb +32 -0
  51. data/lib/eye/config.rb +91 -0
  52. data/lib/eye/control.rb +2 -0
  53. data/lib/eye/controller.rb +54 -0
  54. data/lib/eye/controller/commands.rb +88 -0
  55. data/lib/eye/controller/helpers.rb +101 -0
  56. data/lib/eye/controller/load.rb +224 -0
  57. data/lib/eye/controller/options.rb +18 -0
  58. data/lib/eye/controller/send_command.rb +177 -0
  59. data/lib/eye/controller/status.rb +72 -0
  60. data/lib/eye/dsl.rb +53 -0
  61. data/lib/eye/dsl/application_opts.rb +39 -0
  62. data/lib/eye/dsl/chain.rb +12 -0
  63. data/lib/eye/dsl/child_process_opts.rb +13 -0
  64. data/lib/eye/dsl/config_opts.rb +55 -0
  65. data/lib/eye/dsl/group_opts.rb +32 -0
  66. data/lib/eye/dsl/helpers.rb +20 -0
  67. data/lib/eye/dsl/main.rb +51 -0
  68. data/lib/eye/dsl/opts.rb +151 -0
  69. data/lib/eye/dsl/process_opts.rb +36 -0
  70. data/lib/eye/dsl/pure_opts.rb +121 -0
  71. data/lib/eye/dsl/validation.rb +88 -0
  72. data/lib/eye/group.rb +140 -0
  73. data/lib/eye/group/chain.rb +81 -0
  74. data/lib/eye/loader.rb +10 -0
  75. data/lib/eye/local.rb +100 -0
  76. data/lib/eye/logger.rb +104 -0
  77. data/lib/eye/notify.rb +118 -0
  78. data/lib/eye/notify/jabber.rb +30 -0
  79. data/lib/eye/notify/mail.rb +48 -0
  80. data/lib/eye/process.rb +85 -0
  81. data/lib/eye/process/children.rb +60 -0
  82. data/lib/eye/process/commands.rb +280 -0
  83. data/lib/eye/process/config.rb +81 -0
  84. data/lib/eye/process/controller.rb +73 -0
  85. data/lib/eye/process/data.rb +78 -0
  86. data/lib/eye/process/monitor.rb +108 -0
  87. data/lib/eye/process/notify.rb +32 -0
  88. data/lib/eye/process/scheduler.rb +82 -0
  89. data/lib/eye/process/states.rb +86 -0
  90. data/lib/eye/process/states_history.rb +66 -0
  91. data/lib/eye/process/system.rb +97 -0
  92. data/lib/eye/process/trigger.rb +34 -0
  93. data/lib/eye/process/validate.rb +33 -0
  94. data/lib/eye/process/watchers.rb +66 -0
  95. data/lib/eye/reason.rb +20 -0
  96. data/lib/eye/server.rb +60 -0
  97. data/lib/eye/sigar.rb +5 -0
  98. data/lib/eye/system.rb +139 -0
  99. data/lib/eye/system_resources.rb +99 -0
  100. data/lib/eye/trigger.rb +136 -0
  101. data/lib/eye/trigger/check_dependency.rb +30 -0
  102. data/lib/eye/trigger/flapping.rb +41 -0
  103. data/lib/eye/trigger/stop_children.rb +17 -0
  104. data/lib/eye/trigger/transition.rb +15 -0
  105. data/lib/eye/trigger/wait_dependency.rb +49 -0
  106. data/lib/eye/utils.rb +45 -0
  107. data/lib/eye/utils/alive_array.rb +57 -0
  108. data/lib/eye/utils/celluloid_chain.rb +71 -0
  109. data/lib/eye/utils/celluloid_klass.rb +5 -0
  110. data/lib/eye/utils/leak_19.rb +10 -0
  111. data/lib/eye/utils/mini_active_support.rb +111 -0
  112. data/lib/eye/utils/pmap.rb +7 -0
  113. data/lib/eye/utils/tail.rb +20 -0
  114. metadata +398 -0
@@ -0,0 +1,108 @@
1
+ module Eye::Process::Monitor
2
+
3
+ private
4
+
5
+ def check_alive_with_refresh_pid_if_needed
6
+ if process_really_running?
7
+ return true
8
+
9
+ else
10
+ warn 'process not really running'
11
+ try_update_pid_from_file
12
+ end
13
+ end
14
+
15
+ def try_update_pid_from_file
16
+ # if pid file was rewritten
17
+ newpid = load_pid_from_file
18
+ if newpid != self.pid
19
+ info "process <#{self.pid}> changed pid to <#{newpid}>, updating..." if self.pid
20
+ self.pid = newpid
21
+
22
+ if process_really_running?
23
+ return true
24
+ else
25
+ warn "process <#{newpid}> was not found"
26
+ return false
27
+ end
28
+ else
29
+ debug 'process was not found'
30
+ return false
31
+ end
32
+ end
33
+
34
+ def check_alive
35
+ if up?
36
+
37
+ # check that process runned
38
+ unless process_really_running?
39
+ warn "check_alive: process <#{self.pid}> not found"
40
+ notify :info, 'crashed!'
41
+ clear_pid_file if control_pid? && self.pid && load_pid_from_file == self.pid
42
+
43
+ switch :crashed, Eye::Reason.new(:crashed)
44
+ else
45
+ # check that pid_file still here
46
+ ppid = failsafe_load_pid
47
+
48
+ if ppid != self.pid
49
+ msg = "check_alive: pid_file (#{self[:pid_file]}) changed by itself (<#{self.pid}> => <#{ppid}>)"
50
+ if control_pid?
51
+ msg += ", reverting to <#{self.pid}> (the pid_file is controlled by eye)"
52
+ unless failsafe_save_pid
53
+ msg += ", pid_file write failed! O_o"
54
+ end
55
+ else
56
+ changed_ago_s = Time.now - pid_file_ctime
57
+
58
+ if ppid == nil
59
+ msg += ", reverting to <#{self.pid}> (the pid_file is empty)"
60
+ unless failsafe_save_pid
61
+ msg += ", pid_file write failed! O_o"
62
+ end
63
+
64
+ elsif (changed_ago_s > self[:auto_update_pidfile_grace]) && process_pid_running?(ppid)
65
+ msg += ", trusting this change, and now monitor <#{ppid}>"
66
+ self.pid = ppid
67
+
68
+ elsif (changed_ago_s > self[:revert_fuckup_pidfile_grace])
69
+ msg += " over #{self[:revert_fuckup_pidfile_grace]}s ago, reverting to <#{self.pid}>, because <#{ppid}> not alive"
70
+ unless failsafe_save_pid
71
+ msg += ", pid_file write failed! O_o"
72
+ end
73
+
74
+ else
75
+ msg += ', ignoring self-managed pid change'
76
+ end
77
+ end
78
+
79
+ warn msg
80
+ end
81
+ end
82
+ end
83
+ end
84
+
85
+ def check_crash
86
+ if down?
87
+ if self[:keep_alive]
88
+ warn 'check crashed: process is down'
89
+
90
+ if self[:restore_in]
91
+ schedule_in self[:restore_in].to_f, :restore, Eye::Reason.new(:crashed)
92
+ else
93
+ schedule :restore, Eye::Reason.new(:crashed)
94
+ end
95
+ else
96
+ warn 'check crashed: process without keep_alive'
97
+ schedule :unmonitor, Eye::Reason.new(:crashed)
98
+ end
99
+ else
100
+ debug 'check crashed: skipped, process is not in down'
101
+ end
102
+ end
103
+
104
+ def restore
105
+ start if down?
106
+ end
107
+
108
+ end
@@ -0,0 +1,32 @@
1
+ module Eye::Process::Notify
2
+
3
+ # notify to user:
4
+ # 1) process crashed by itself, and we restart it [:info]
5
+ # 2) checker bounded to restart process [:warn]
6
+ # 3) flapping + switch to unmonitored [:error]
7
+
8
+ LEVELS = {:debug => 0, :info => 1, :warn => 2, :error => 3, :fatal => 4}
9
+
10
+ def notify(level, msg)
11
+ # logging it
12
+ error "NOTIFY: #{msg}" if ilevel(level) > ilevel(:info)
13
+
14
+ # send notifies
15
+ if self[:notify].present?
16
+ message = {:message => msg, :name => name,
17
+ :full_name => full_name, :pid => pid, :host => Eye::Local.host, :level => level,
18
+ :at => Time.now }
19
+
20
+ self[:notify].each do |contact, not_level|
21
+ Eye::Notify.notify(contact, message) if ilevel(level) >= ilevel(not_level)
22
+ end
23
+ end
24
+ end
25
+
26
+ private
27
+
28
+ def ilevel(level)
29
+ LEVELS[level].to_i
30
+ end
31
+
32
+ end
@@ -0,0 +1,82 @@
1
+ module Eye::Process::Scheduler
2
+
3
+ # ex: schedule :update_config, config, "reason: update_config"
4
+ def schedule(command, *args, &block)
5
+ if scheduler.alive?
6
+ unless self.respond_to?(command, true)
7
+ warn ":#{command} scheduling is unsupported"
8
+ return
9
+ end
10
+
11
+ reason = if args.present? && args[-1].kind_of?(Eye::Reason)
12
+ args.pop
13
+ end
14
+
15
+ info "schedule :#{command} #{reason ? "(reason: #{reason})" : nil}"
16
+
17
+ if reason.class == Eye::Reason
18
+ # for auto reasons
19
+ # skip already running commands and all in chain
20
+ scheduler.add_wo_dups_current(:scheduled_action, command, {:args => args, :reason => reason}, &block)
21
+ else
22
+ # for manual, or without reason
23
+ # skip only for last in chain
24
+ scheduler.add_wo_dups(:scheduled_action, command, {:args => args, :reason => reason}, &block)
25
+ end
26
+ end
27
+ end
28
+
29
+ def schedule_in(interval, command, *args, &block)
30
+ debug "schedule_in #{interval} :#{command} #{args}"
31
+ after(interval.to_f) do
32
+ debug "scheduled_in #{interval} :#{command} #{args}"
33
+ schedule(command, *args, &block)
34
+ end
35
+ end
36
+
37
+ def scheduled_action(command, h = {}, &block)
38
+ reason = h.delete(:reason)
39
+ info "=> #{command} #{h[:args].present? ? "#{h[:args]*',' }" : nil} #{reason ? "(reason: #{reason})" : nil}"
40
+
41
+ @current_scheduled_command = command
42
+ @last_scheduled_command = command
43
+ @last_scheduled_reason = reason
44
+ @last_scheduled_at = Time.now
45
+
46
+ send(command, *h[:args], &block)
47
+ @current_scheduled_command = nil
48
+ info "<= #{command}"
49
+
50
+ schedule_history.push(command, reason, @last_scheduled_at.to_i)
51
+ end
52
+
53
+ def scheduler_actions_list
54
+ scheduler.list.map{|c| c[:args].first rescue nil }.compact
55
+ end
56
+
57
+ def scheduler_clear_pending_list
58
+ scheduler.clear_pending_list
59
+ end
60
+
61
+ def self.included(base)
62
+ base.finalizer :remove_scheduler
63
+ end
64
+
65
+ attr_accessor :current_scheduled_command
66
+ attr_accessor :last_scheduled_command, :last_scheduled_reason, :last_scheduled_at
67
+
68
+ def schedule_history
69
+ @schedule_history ||= Eye::Process::StatesHistory.new(50)
70
+ end
71
+
72
+ private
73
+
74
+ def remove_scheduler
75
+ @scheduler.terminate if @scheduler && @scheduler.alive?
76
+ end
77
+
78
+ def scheduler
79
+ @scheduler ||= Eye::Utils::CelluloidChain.new(current_actor)
80
+ end
81
+
82
+ end
@@ -0,0 +1,86 @@
1
+ require 'state_machine'
2
+ require 'state_machine/version'
3
+
4
+ class Eye::Process
5
+ class StateError < Exception; end
6
+
7
+ # do transition
8
+ def switch(name, reason = nil)
9
+ @state_reason = reason || last_scheduled_reason
10
+ self.send("#{name}!")
11
+ end
12
+
13
+ state_machine :state, :initial => :unmonitored do
14
+ state :unmonitored, :up, :down
15
+ state :starting, :stopping, :restarting
16
+
17
+ event :starting do
18
+ transition [:unmonitored, :down] => :starting
19
+ end
20
+
21
+ event :already_running do
22
+ transition [:unmonitored, :down, :up] => :up
23
+ end
24
+
25
+ event :started do
26
+ transition :starting => :up
27
+ end
28
+
29
+ event :crashed do
30
+ transition [:starting, :restarting, :up] => :down
31
+ end
32
+
33
+ event :stopping do
34
+ transition [:up, :restarting] => :stopping
35
+ end
36
+
37
+ event :stopped do
38
+ transition :stopping => :down
39
+ end
40
+
41
+ event :cant_kill do
42
+ transition :stopping => :up
43
+ end
44
+
45
+ event :restarting do
46
+ transition [:unmonitored, :up, :down] => :restarting
47
+ end
48
+
49
+ event :restarted do
50
+ transition :restarting => :up
51
+ end
52
+
53
+ event :unmonitoring do
54
+ transition any => :unmonitored
55
+ end
56
+
57
+ after_transition any => any, :do => :log_transition
58
+ after_transition any => any, :do => :check_triggers
59
+
60
+ after_transition any => :unmonitored, :do => :on_unmonitored
61
+
62
+ after_transition any-:up => :up, :do => :add_watchers
63
+ after_transition :up => any-:up, :do => :remove_watchers
64
+
65
+ after_transition any-:up => :up, :do => :add_children
66
+ after_transition any => [:unmonitored, :down], :do => :remove_children
67
+
68
+ after_transition :on => :crashed, :do => :on_crashed
69
+ end
70
+
71
+ def on_crashed
72
+ schedule :check_crash, Eye::Reason.new(:crashed)
73
+ end
74
+
75
+ def on_unmonitored
76
+ self.pid = nil
77
+ end
78
+
79
+ def log_transition(transition)
80
+ if transition.to_name != transition.from_name || @state_reason.is_a?(Eye::Reason::User)
81
+ @states_history.push transition.to_name, @state_reason
82
+ info "switch :#{transition.event} [:#{transition.from_name} => :#{transition.to_name}] #{@state_reason ? "(reason: #{@state_reason})" : nil}"
83
+ end
84
+ end
85
+
86
+ end
@@ -0,0 +1,66 @@
1
+ class Eye::Process::StatesHistory < Eye::Utils::Tail
2
+
3
+ def push(state, reason = nil, tm = Time.now)
4
+ super(state: state, at: tm.to_i, reason: reason)
5
+ end
6
+
7
+ def states
8
+ self.map{|c| c[:state] }
9
+ end
10
+
11
+ def states_for_period(period, from_time = nil)
12
+ tm = Time.now - period
13
+ tm = [tm, from_time].max if from_time
14
+ tm = tm.to_f
15
+ self.select{|s| s[:at] >= tm }.map{|c| c[:state] }
16
+ end
17
+
18
+ def last_state
19
+ last[:state]
20
+ end
21
+
22
+ def last_reason
23
+ last[:reason] rescue nil
24
+ end
25
+
26
+ def last_state_changed_at
27
+ Time.at(last[:at])
28
+ end
29
+
30
+ def seq?(*seq)
31
+ str = states * ','
32
+ substr = seq.flatten * ','
33
+ str.include?(substr)
34
+ end
35
+
36
+ def end?(*seq)
37
+ str = states * ','
38
+ substr = seq.flatten * ','
39
+ str.end_with?(substr)
40
+ end
41
+
42
+ def any?(*seq)
43
+ states.any? do |st|
44
+ seq.flatten.include?(st)
45
+ end
46
+ end
47
+
48
+ def noone?(*seq)
49
+ !states.all? do |st|
50
+ seq.flatten.include?(st)
51
+ end
52
+ end
53
+
54
+ def all?(*seq)
55
+ states.all? do |st|
56
+ seq.flatten.include?(st)
57
+ end
58
+ end
59
+
60
+ def state_count(state)
61
+ states.count do |st|
62
+ st == state
63
+ end
64
+ end
65
+
66
+ end
@@ -0,0 +1,97 @@
1
+ require 'timeout'
2
+
3
+ module Eye::Process::System
4
+
5
+ def load_pid_from_file
6
+ if File.exists?(self[:pid_file_ex])
7
+ _pid = File.read(self[:pid_file_ex]).to_i
8
+ _pid > 0 ? _pid : nil
9
+ end
10
+ end
11
+
12
+ def set_pid_from_file
13
+ self.pid = load_pid_from_file
14
+ end
15
+
16
+ def save_pid_to_file
17
+ if self.pid
18
+ File.open(self[:pid_file_ex], 'w') do |f|
19
+ f.write self.pid
20
+ end
21
+ true
22
+ else
23
+ false
24
+ end
25
+ end
26
+
27
+ def clear_pid_file
28
+ info "delete pid_file: #{self[:pid_file_ex]}"
29
+ File.unlink(self[:pid_file_ex])
30
+ true
31
+ rescue
32
+ nil
33
+ end
34
+
35
+ def pid_file_ctime
36
+ File.ctime(self[:pid_file_ex]) rescue Time.now
37
+ end
38
+
39
+ def process_really_running?
40
+ process_pid_running?(self.pid)
41
+ end
42
+
43
+ def process_pid_running?(pid)
44
+ res = Eye::System.check_pid_alive(pid)
45
+ debug "process_really_running?: (#{pid}) #{res.inspect}"
46
+ !!res[:result]
47
+ end
48
+
49
+ def send_signal(code)
50
+ res = Eye::System.send_signal(self.pid, code)
51
+
52
+ msg = "send_signal #{code} to #{self.pid}"
53
+ msg += ", error<#{res[:error]}>" if res[:error]
54
+ info msg
55
+
56
+ res[:result] == :ok
57
+ end
58
+
59
+ def wait_for_condition(timeout, step = 0.1, &block)
60
+ res = nil
61
+ sumtime = 0
62
+
63
+ loop do
64
+ tm = Time.now
65
+ res = yield # note that yield can block actor here and timeout can be overhead
66
+ return res if res
67
+ sleep step.to_f
68
+ sumtime += (Time.now - tm)
69
+ return false if sumtime > timeout
70
+ end
71
+ end
72
+
73
+ def execute(cmd, cfg = {})
74
+ defer{ Eye::System::execute cmd, cfg }
75
+ end
76
+
77
+ def failsafe_load_pid
78
+ pid = load_pid_from_file
79
+
80
+ if !pid
81
+ # this is can be symlink changed case
82
+ sleep 0.1
83
+ pid = load_pid_from_file
84
+ end
85
+
86
+ pid
87
+ end
88
+
89
+ def failsafe_save_pid
90
+ save_pid_to_file
91
+ true
92
+ rescue => ex
93
+ log_ex(ex)
94
+ false
95
+ end
96
+
97
+ end