tengine_job_agent 0.3.17

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,5 @@
1
+ lib/**/*.rb
2
+ bin/*
3
+ -
4
+ features/**/*.feature
5
+ LICENSE.txt
data/.rspec ADDED
@@ -0,0 +1 @@
1
+ --color
data/Gemfile ADDED
@@ -0,0 +1,22 @@
1
+ # -*- coding: utf-8 -*-
2
+ source "http://rubygems.org"
3
+
4
+ # Add dependencies required to use your gem here.
5
+ # Example:
6
+ # gem "activesupport", ">= 2.3.5"
7
+
8
+ gem "tengine_support", "~> 0.3.0"
9
+ gem "tengine_event", "~> 0.4.0"
10
+
11
+ # Add dependencies to develop your gem here.
12
+ # Include everything needed to run rake, tests, features, etc.
13
+ group :development do
14
+ gem "rspec", "~> 2.6.0"
15
+ gem "yard", "~> 0.7.2"
16
+ gem "bundler", "~> 1.0.18"
17
+ gem "jeweler", "~> 1.6.4"
18
+ # gem "rcov", ">= 0"
19
+ gem "simplecov", "~> 0.5.3"
20
+ gem "ZenTest", "~> 4.6.2"
21
+ gem "ci_reporter", "~>1.6.5"
22
+ end
@@ -0,0 +1,67 @@
1
+ GEM
2
+ remote: http://rubygems.org/
3
+ specs:
4
+ ZenTest (4.6.2)
5
+ activesupport (3.2.1)
6
+ i18n (~> 0.6)
7
+ multi_json (~> 1.0)
8
+ amq-client (0.8.7)
9
+ amq-protocol (>= 0.8.4)
10
+ eventmachine
11
+ amq-protocol (0.8.4)
12
+ amqp (0.8.4)
13
+ amq-client (~> 0.8.7)
14
+ amq-protocol (~> 0.8.4)
15
+ eventmachine
16
+ builder (3.0.0)
17
+ ci_reporter (1.6.9)
18
+ builder (>= 2.1.2)
19
+ diff-lcs (1.1.3)
20
+ eventmachine (0.12.10)
21
+ git (1.2.5)
22
+ i18n (0.6.0)
23
+ jeweler (1.6.4)
24
+ bundler (~> 1.0)
25
+ git (>= 1.2.5)
26
+ rake
27
+ macaddr (1.5.0)
28
+ systemu (>= 2.4.0)
29
+ multi_json (1.0.4)
30
+ rake (0.9.2.2)
31
+ rspec (2.6.0)
32
+ rspec-core (~> 2.6.0)
33
+ rspec-expectations (~> 2.6.0)
34
+ rspec-mocks (~> 2.6.0)
35
+ rspec-core (2.6.4)
36
+ rspec-expectations (2.6.0)
37
+ diff-lcs (~> 1.1.2)
38
+ rspec-mocks (2.6.0)
39
+ simplecov (0.5.4)
40
+ multi_json (~> 1.0.3)
41
+ simplecov-html (~> 0.5.3)
42
+ simplecov-html (0.5.3)
43
+ systemu (2.4.2)
44
+ tengine_event (0.4.6)
45
+ activesupport (>= 3.0.0)
46
+ amqp (~> 0.8.0)
47
+ tengine_support (>= 0.3.24)
48
+ uuid (~> 2.3.4)
49
+ tengine_support (0.3.24)
50
+ activesupport (>= 3.0.0)
51
+ uuid (2.3.5)
52
+ macaddr (~> 1.0)
53
+ yard (0.7.5)
54
+
55
+ PLATFORMS
56
+ ruby
57
+
58
+ DEPENDENCIES
59
+ ZenTest (~> 4.6.2)
60
+ bundler (~> 1.0.18)
61
+ ci_reporter (~> 1.6.5)
62
+ jeweler (~> 1.6.4)
63
+ rspec (~> 2.6.0)
64
+ simplecov (~> 0.5.3)
65
+ tengine_event (~> 0.4.0)
66
+ tengine_support (~> 0.3.0)
67
+ yard (~> 0.7.2)
@@ -0,0 +1,20 @@
1
+ = tengine_job_agent
2
+
3
+ Description goes here.
4
+
5
+ == Contributing to tengine_job_agent
6
+
7
+ * Check out the latest master to make sure the feature hasn't been implemented or the bug hasn't been fixed yet
8
+ * Check out the issue tracker to make sure someone already hasn't requested it and/or contributed it
9
+ * Fork the project
10
+ * Start a feature/bugfix branch
11
+ * Commit and push until you are happy with your contribution
12
+ * Make sure to add tests for it. This is important so I don't break it in a future version unintentionally.
13
+ * Please try not to mess with the Rakefile, version, or history. If you want to have your own version, or is otherwise necessary, that is fine, but please isolate to its own commit so I can cherry-pick around it.
14
+
15
+ == License
16
+ tengine_job_agent is distributed under the MPL2.0 or LGPLv3 or the dual license of MPL2.0/LGPLv3
17
+
18
+ == Copyright
19
+
20
+ Copyright (c) 2011 nautilus-technologies.com
@@ -0,0 +1,42 @@
1
+ # encoding: utf-8
2
+
3
+ require 'rubygems'
4
+ require 'bundler'
5
+ begin
6
+ Bundler.setup(:default, :development)
7
+ rescue Bundler::BundlerError => e
8
+ $stderr.puts e.message
9
+ $stderr.puts "Run `bundle install` to install missing gems"
10
+ exit e.status_code
11
+ end
12
+ require 'rake'
13
+
14
+ require 'jeweler'
15
+ Jeweler::Tasks.new do |gem|
16
+ # gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
17
+ gem.name = "tengine_job_agent"
18
+ gem.homepage = "http://github.com/tengine/tengine_job_agent"
19
+ gem.license = "MPL2.0/LGPLv3"
20
+ gem.summary = %Q{tengine_job_agent invoke job, watches it and notify its finish to tengine server}
21
+ gem.description = %Q{tengine_job_agent works with tengine_job}
22
+ gem.email = "tengine@nautilus-technologies.com"
23
+ gem.authors = %w[taigou totty g-morita shyouhei akm]
24
+ # dependencies defined in Gemfile
25
+ end
26
+ Jeweler::RubygemsDotOrgTasks.new
27
+
28
+ require 'rspec/core'
29
+ require 'rspec/core/rake_task'
30
+ RSpec::Core::RakeTask.new(:spec) do |spec|
31
+ spec.pattern = FileList['spec/**/*_spec.rb']
32
+ end
33
+
34
+ RSpec::Core::RakeTask.new(:rcov) do |spec|
35
+ spec.pattern = 'spec/**/*_spec.rb'
36
+ spec.rcov = true
37
+ end
38
+
39
+ task :default => :spec
40
+
41
+ require 'yard'
42
+ YARD::Rake::YardocTask.new
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.3.17
@@ -0,0 +1,23 @@
1
+ #!/usr/bin/env ruby
2
+ # -*- coding: utf-8 -*-
3
+
4
+ target_pid = ARGV.shift
5
+ interval = ARGV.shift.to_i # seconds
6
+ signals = ARGV.join(',').split(/,/).compact
7
+
8
+ first_time = true
9
+ signals.each do |signal|
10
+ begin
11
+ result = Process.kill(signal, -1 * target_pid.to_i) # killing process group
12
+ puts "result: #{result.inspect}"
13
+ first_time = false
14
+ rescue Errno::ESRCH
15
+ if first_time
16
+ puts "#{$!.message}."
17
+ else
18
+ # previous signal must have succeeded to kill the process
19
+ break
20
+ end
21
+ end
22
+ sleep(interval)
23
+ end
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env ruby
2
+ # -*- coding: utf-8 -*-
3
+
4
+ $LOAD_PATH << File.expand_path("../lib", File.dirname(__FILE__))
5
+ require "tengine_job_agent"
6
+
7
+ exit_code = TengineJobAgent::Run.process(*ARGV)
8
+ exit(exit_code)
@@ -0,0 +1,15 @@
1
+ #!/usr/bin/env ruby
2
+ # -*- coding: utf-8 -*-
3
+
4
+ unless RUBY_VERSION >= "1.9.2"
5
+ raise "RUBY_VERSION must be >= 1.9.2 but was #{RUBY_VERSION}"
6
+ end
7
+
8
+ # Process.daemon(true, true)
9
+ Process.daemon(true)
10
+
11
+ $LOAD_PATH << File.expand_path("../lib", File.dirname(__FILE__))
12
+ require "tengine_job_agent"
13
+
14
+ exit_code = TengineJobAgent::Watchdog.process(*ARGV)
15
+ exit(exit_code)
@@ -0,0 +1,10 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ module TengineJobAgent
4
+
5
+ autoload :Run , "tengine_job_agent/run"
6
+ autoload :Watchdog , 'tengine_job_agent/watchdog'
7
+
8
+ autoload :CommandUtils, 'tengine_job_agent/command_utils'
9
+
10
+ end
@@ -0,0 +1,38 @@
1
+ require 'tengine_job_agent'
2
+ require 'logger'
3
+ require 'yaml'
4
+ require 'tengine/support/yaml_with_erb'
5
+
6
+ module TengineJobAgent::CommandUtils
7
+ def self.included(mod)
8
+ mod.extend(ClassMethods)
9
+ end
10
+
11
+ module ClassMethods
12
+ def load_config
13
+ config_path = Dir["{.,./config,/etc}/tengine_job_agent{.yml,.yml.erb}"].first
14
+ YAML.load_file(config_path)
15
+ end
16
+
17
+ def process(*args)
18
+ config = load_config
19
+ logger = new_logger(config)
20
+ begin
21
+ return new(logger, args, config).process
22
+ rescue Exception => e
23
+ logger.error("error: [#{e.class.name}] #{e.message}\n " << e.backtrace.join("\n"))
24
+ return false
25
+ end
26
+ end
27
+
28
+ def new_logger(config)
29
+ logfile = config['logfile']
30
+ unless logfile
31
+ prefix = self.name.split('::').last.downcase
32
+ logfile = File.expand_path("#{prefix}-#{Process.pid}.log", config['log_dir'])
33
+ end
34
+ Logger.new(logfile)
35
+ end
36
+
37
+ end
38
+ end
@@ -0,0 +1,74 @@
1
+ # -*- coding: utf-8 -*-
2
+ require 'tengine_job_agent'
3
+ require 'timeout'
4
+ require 'rbconfig'
5
+
6
+ class TengineJobAgent::Run
7
+ include TengineJobAgent::CommandUtils
8
+
9
+ def initialize(logger, args, config = {})
10
+ @logger = logger
11
+ @pid_output = STDOUT
12
+ @error_output = STDERR
13
+ @args = args
14
+ @config = config
15
+ @pid_path = File.expand_path("pid_for_#{Process.pid}", @config['log_dir'])
16
+ @timeout = (config[:timeout ] || ENV["MM_SYSTEM_AGENT_RUN_TIMEOUT" ] || 600).to_i # seconds
17
+ @timeout_alert = (config[:timeout_alert] || ENV["MM_SYSTEM_AGENT_RUN_TIMEOUT_ALERT"] || 30 ).to_i # seconds
18
+ end
19
+
20
+ def process
21
+ validate_environment
22
+ line = nil
23
+ process_spawned = false
24
+ begin
25
+ timeout(@timeout) do #タイムアウト(秒)
26
+ @logger.info("watchdog process spawning for #{@args.join(' ')}")
27
+ pid = spawn_watchdog # watchdogプロセスをspawnで起動
28
+ @logger.info("watchdog process spawned. PID: #{pid.inspect}")
29
+ File.open(@pid_path, "r") do |f|
30
+ sleep(0.1) until line = f.gets
31
+ process_spawned = true
32
+ @logger.info("watchdog process returned first result: #{line.inspect}")
33
+ if line =~ /\A\d+\n?\Z/ # 数字と改行のみで構成されるならそれはPIDのはず。
34
+ @pid_output.puts(line.strip)
35
+ @logger.info("return PID: #{pid.inspect}")
36
+ else
37
+ f.rewind
38
+ msg = f.read
39
+ @logger.error("error occurred:\n#{msg}")
40
+ @error_output.puts(msg)
41
+ return false
42
+ end
43
+ end
44
+ end
45
+ rescue Timeout::Error => e
46
+ @error_output.puts("[#{e.class.name}] #{e.message}")
47
+ raise e # raiseしたものはTengineJobAgent::Run.processでloggerに出力されるので、ここでは何もしません
48
+ end
49
+ end
50
+
51
+ # 引数に@pid_pathを渡してwatchdogを起動します。戻り値は起動したwatchdogのPIDです
52
+ def spawn_watchdog
53
+ @logger.info("pid file creating: #{@pid_path}")
54
+ File.open(@pid_path, "w"){ } # ファイルをクリア
55
+ @logger.info("pid file created: #{@pid_path}")
56
+ # http://doc.ruby-lang.org/ja/1.9.2/method/Kernel/m/spawn.html を参考にしています
57
+ args = @args # + [{:out => stdout_w}] #, :err => stderr_w}]
58
+ watchdog = File.expand_path("../../bin/tengine_job_agent_watchdog", File.dirname(__FILE__))
59
+ @logger.info("spawning watchdog: #{@pid_path}")
60
+ pid = Process.spawn(RbConfig.ruby, watchdog, @pid_path, *args)
61
+ @logger.info("spawned watchdog: #{pid}")
62
+ return pid
63
+ end
64
+
65
+ # ジョブ実行時に使用されるRubyが1.8系の場合でもtengine_job_agent_runがエラーを起こさない
66
+ def validate_environment
67
+ if RUBY_VERSION >= "1.9.2"
68
+ @logger.info("RUBY_VERSION is #{RUBY_VERSION}")
69
+ else
70
+ raise "RUBY_VERSION must be >= 1.9.2 but was #{RUBY_VERSION}"
71
+ end
72
+ end
73
+
74
+ end
@@ -0,0 +1,162 @@
1
+ # -*- coding: utf-8 -*-
2
+ require 'tengine_job_agent'
3
+
4
+ require 'fileutils'
5
+ require 'tempfile'
6
+ require 'tengine_event'
7
+ require 'eventmachine'
8
+ require 'uuid'
9
+
10
+ class TengineJobAgent::Watchdog
11
+ include TengineJobAgent::CommandUtils
12
+
13
+ def initialize(logger, args, config = {})
14
+ @uuid = UUID.new.generate
15
+ @logger = logger
16
+ @pid_output = config['pid_output'] || STDOUT
17
+ @pid_path, @program, *@args = *args
18
+ @config = config
19
+ end
20
+
21
+ def process
22
+ pid, process_status = nil, nil
23
+ with_tmp_outs do |stdout, stderr|
24
+ EM.run do
25
+ sender.mq_suite.send :ensures, :connection do
26
+ sender.wait_for_connection do
27
+ begin
28
+ pid = spawn_process
29
+ File.open(@pid_path, "a"){|f| f.puts(pid)} # 起動したPIDを呼び出し元に返す
30
+ detach_and_wait_process(pid)
31
+ rescue Exception => e
32
+ File.open(@pid_path, "a"){|f| f.puts("[#{e.class.name}] #{e.message}")}
33
+ @logger.error("[#{e.class.name}] #{e.message}")
34
+ EM.stop
35
+ end
36
+ end
37
+ end
38
+ end
39
+ end
40
+ end
41
+
42
+ def spawn_process
43
+ @logger.info("spawning process " << [@program, @args].flatten.join(" "))
44
+ options = {
45
+ :out => @stdout.path,
46
+ :err => @stderr.path,
47
+ :pgroup => true}
48
+ pid = Process.spawn(@program, *(@args + [options]))
49
+ @logger.info("spawned process PID: #{pid}")
50
+ return pid
51
+ end
52
+
53
+ def detach_and_wait_process(pid)
54
+ @logger.info("detaching process PID: #{pid}")
55
+ fire_heartbeat pid do
56
+ timer = nil
57
+ int = @config["heartbeat"]["job"]["interval"]
58
+ if int and int > 0
59
+ timer = EM.add_periodic_timer int do
60
+ fire_heartbeat pid do end # <- rspecを黙らせるための無駄なブロック
61
+ end
62
+ end
63
+ EM.defer(lambda { Process.waitpid2 pid }, lambda {|a|
64
+ @logger.info("process finished: " << a[1].exitstatus.inspect)
65
+ EM.cancel_timer timer if timer
66
+ fire_finished(*a)
67
+ })
68
+ end
69
+ end
70
+
71
+ def fire_finished(pid, process_status)
72
+ exit_status = process_status.exitstatus # killされた場合にnilの可能性がある
73
+ level_key = exit_status == 0 ? :info : :error
74
+ @logger.info("fire_finished starting #{pid} #{level_key}(#{exit_status})")
75
+ event_properties = {
76
+ "execution_id" => ENV['MM_SCHEDULE_ID'],
77
+ "root_jobnet_id" => ENV['MM_ROOT_JOBNET_ID'],
78
+ "target_jobnet_id" => ENV['MM_TARGET_JOBNET_ID'],
79
+ "target_job_id" => ENV['MM_ACTUAL_JOB_ID'],
80
+ "pid" => pid,
81
+ "exit_status" => exit_status,
82
+ "command" => [@program, @args].flatten.join(" "),
83
+ }
84
+ user_stdout_path = output_filepath("stdout", pid)
85
+ user_stderr_path = output_filepath("stderr", pid)
86
+ FileUtils.cp(@stdout.path, user_stdout_path)
87
+ FileUtils.cp(@stderr.path, user_stderr_path)
88
+ event_properties[:stdout_log] = user_stdout_path
89
+ event_properties[:stderr_log] = user_stderr_path
90
+ if level_key == :error
91
+ event_properties[:message] =
92
+ "Job process failed. STDOUT and STDERR were redirected to files.\n" <<
93
+ "You can see them at '#{user_stdout_path}' and '#{user_stderr_path}'\n" <<
94
+ "on the server '#{ENV['MM_SERVER_NAME']}'"
95
+ end
96
+ sender.fire("finished.process.job.tengine", {
97
+ :key => @uuid,
98
+ :level_key => level_key,
99
+ :source_name => source_name(pid),
100
+ :sender_name => sender_name,
101
+ :properties => event_properties,
102
+ })
103
+ @logger.info("fire_finished complete")
104
+ sender.stop
105
+ end
106
+
107
+ def fire_heartbeat pid, &block
108
+ sender.fire("job.heartbeat.tengine", {
109
+ :key => @uuid,
110
+ :level_key => :debug,
111
+ :sender_name => sender_name,
112
+ :source_name => source_name(pid),
113
+ :occurred_at => Time.now,
114
+ :properties => {
115
+ "root_jobnet_id" => ENV['MM_ROOT_JOBNET_ID'],
116
+ "target_jobnet_id" => ENV['MM_TARGET_JOBNET_ID'],
117
+ "target_job_id" => ENV['MM_ACTUAL_JOB_ID'],
118
+ "pid" => pid,
119
+ "command" => [@program, @args].flatten.join(" "),
120
+ },
121
+ :keep_connection => true,
122
+ :retry_count => 0,
123
+ }, &block)
124
+ @logger.debug("fire_heartbeat #{pid}")
125
+ end
126
+
127
+ def sender
128
+ @sender ||= Tengine::Event::Sender.new(@config)
129
+ end
130
+
131
+ private
132
+ def sender_name
133
+ @sender_name ||= sprintf "agent:%s/%d/tengine_job_agent", Tengine::Event.host_name, Process.pid
134
+ end
135
+
136
+ def source_name pid
137
+ sprintf "job:%s/%d/%s/%s", ENV['MM_SERVER_NAME'], pid, ENV['MM_ROOT_JOBNET_ID'], ENV['MM_ACTUAL_JOB_ID']
138
+ end
139
+
140
+ def output_filepath(prefix, pid)
141
+ File.expand_path("#{prefix}-#{pid}.log", @config['log_dir'])
142
+ end
143
+
144
+ def with_tmp_outs
145
+ Tempfile.open("stdout-#{Process.pid}.log") do |tmp_stdout|
146
+ @stdout = tmp_stdout
147
+ begin
148
+ Tempfile.open("stderr-#{Process.pid}.log") do |tmp_stderr|
149
+ @stderr = tmp_stderr
150
+ begin
151
+ yield
152
+ ensure
153
+ @stderr = nil
154
+ end
155
+ end
156
+ ensure
157
+ @stdout = nil
158
+ end
159
+ end
160
+ end
161
+
162
+ end