timocratic-skynet 0.9.4
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +152 -0
- data/License.txt +20 -0
- data/Manifest.txt +144 -0
- data/README.txt +178 -0
- data/Rakefile +5 -0
- data/app_generators/skynet_install/USAGE +5 -0
- data/app_generators/skynet_install/skynet_install_generator.rb +94 -0
- data/app_generators/skynet_install/templates/migration.rb +43 -0
- data/app_generators/skynet_install/templates/skynet_config.rb +50 -0
- data/app_generators/skynet_install/templates/skynet_initializer.rb +1 -0
- data/app_generators/skynet_install/templates/skynet_mysql_schema.sql +33 -0
- data/bin/skynet +71 -0
- data/bin/skynet_install +36 -0
- data/bin/skynet_tuplespace_server +74 -0
- data/config/hoe.rb +75 -0
- data/config/requirements.rb +17 -0
- data/examples/dgrep/README +70 -0
- data/examples/dgrep/config/skynet_config.rb +26 -0
- data/examples/dgrep/data/shakespeare/README +2 -0
- data/examples/dgrep/data/shakespeare/poetry/loverscomplaint +381 -0
- data/examples/dgrep/data/shakespeare/poetry/rapeoflucrece +2199 -0
- data/examples/dgrep/data/shakespeare/poetry/sonnets +2633 -0
- data/examples/dgrep/data/shakespeare/poetry/various +640 -0
- data/examples/dgrep/data/shakespeare/poetry/venusandadonis +1423 -0
- data/examples/dgrep/data/testfile1.txt +1 -0
- data/examples/dgrep/data/testfile2.txt +1 -0
- data/examples/dgrep/data/testfile3.txt +1 -0
- data/examples/dgrep/data/testfile4.txt +1 -0
- data/examples/dgrep/lib/dgrep.rb +59 -0
- data/examples/dgrep/lib/mapreduce_test.rb +32 -0
- data/examples/dgrep/lib/most_common_words.rb +45 -0
- data/examples/dgrep/script/dgrep +75 -0
- data/examples/rails_mysql_example/README +66 -0
- data/examples/rails_mysql_example/Rakefile +10 -0
- data/examples/rails_mysql_example/app/controllers/application.rb +10 -0
- data/examples/rails_mysql_example/app/helpers/application_helper.rb +3 -0
- data/examples/rails_mysql_example/app/models/user.rb +21 -0
- data/examples/rails_mysql_example/app/models/user_favorite.rb +5 -0
- data/examples/rails_mysql_example/app/models/user_mailer.rb +12 -0
- data/examples/rails_mysql_example/app/views/user_mailer/welcome.erb +5 -0
- data/examples/rails_mysql_example/config/boot.rb +109 -0
- data/examples/rails_mysql_example/config/database.yml +42 -0
- data/examples/rails_mysql_example/config/environment.rb +59 -0
- data/examples/rails_mysql_example/config/environments/development.rb +18 -0
- data/examples/rails_mysql_example/config/environments/production.rb +19 -0
- data/examples/rails_mysql_example/config/environments/test.rb +22 -0
- data/examples/rails_mysql_example/config/initializers/inflections.rb +10 -0
- data/examples/rails_mysql_example/config/initializers/mime_types.rb +5 -0
- data/examples/rails_mysql_example/config/initializers/skynet.rb +1 -0
- data/examples/rails_mysql_example/config/routes.rb +35 -0
- data/examples/rails_mysql_example/config/skynet_config.rb +36 -0
- data/examples/rails_mysql_example/db/migrate/001_create_skynet_tables.rb +43 -0
- data/examples/rails_mysql_example/db/migrate/002_create_users.rb +16 -0
- data/examples/rails_mysql_example/db/migrate/003_create_user_favorites.rb +14 -0
- data/examples/rails_mysql_example/db/schema.rb +85 -0
- data/examples/rails_mysql_example/db/skynet_mysql_schema.sql +33 -0
- data/examples/rails_mysql_example/doc/README_FOR_APP +2 -0
- data/examples/rails_mysql_example/lib/tasks/rails_mysql_example.rake +20 -0
- data/examples/rails_mysql_example/public/.htaccess +40 -0
- data/examples/rails_mysql_example/public/404.html +30 -0
- data/examples/rails_mysql_example/public/422.html +30 -0
- data/examples/rails_mysql_example/public/500.html +30 -0
- data/examples/rails_mysql_example/public/dispatch.cgi +10 -0
- data/examples/rails_mysql_example/public/dispatch.fcgi +24 -0
- data/examples/rails_mysql_example/public/dispatch.rb +10 -0
- data/examples/rails_mysql_example/public/favicon.ico +0 -0
- data/examples/rails_mysql_example/public/images/rails.png +0 -0
- data/examples/rails_mysql_example/public/index.html +277 -0
- data/examples/rails_mysql_example/public/javascripts/application.js +2 -0
- data/examples/rails_mysql_example/public/javascripts/controls.js +963 -0
- data/examples/rails_mysql_example/public/javascripts/dragdrop.js +972 -0
- data/examples/rails_mysql_example/public/javascripts/effects.js +1120 -0
- data/examples/rails_mysql_example/public/javascripts/prototype.js +4225 -0
- data/examples/rails_mysql_example/public/robots.txt +5 -0
- data/examples/rails_mysql_example/script/about +3 -0
- data/examples/rails_mysql_example/script/console +3 -0
- data/examples/rails_mysql_example/script/destroy +3 -0
- data/examples/rails_mysql_example/script/generate +3 -0
- data/examples/rails_mysql_example/script/performance/benchmarker +3 -0
- data/examples/rails_mysql_example/script/performance/profiler +3 -0
- data/examples/rails_mysql_example/script/performance/request +3 -0
- data/examples/rails_mysql_example/script/plugin +3 -0
- data/examples/rails_mysql_example/script/process/inspector +3 -0
- data/examples/rails_mysql_example/script/process/reaper +3 -0
- data/examples/rails_mysql_example/script/process/spawner +3 -0
- data/examples/rails_mysql_example/script/runner +3 -0
- data/examples/rails_mysql_example/script/server +3 -0
- data/examples/rails_mysql_example/test/fixtures/user_favorites.yml +9 -0
- data/examples/rails_mysql_example/test/fixtures/users.yml +11 -0
- data/examples/rails_mysql_example/test/test_helper.rb +38 -0
- data/examples/rails_mysql_example/test/unit/user_favorite_test.rb +8 -0
- data/examples/rails_mysql_example/test/unit/user_test.rb +8 -0
- data/extras/README +7 -0
- data/extras/init.d/skynet +87 -0
- data/extras/nagios/check_skynet.sh +121 -0
- data/extras/rails/controllers/skynet_controller.rb +43 -0
- data/extras/rails/views/skynet/index.rhtml +137 -0
- data/lib/skynet.rb +95 -0
- data/lib/skynet/mapreduce_helper.rb +74 -0
- data/lib/skynet/mapreduce_test.rb +56 -0
- data/lib/skynet/message_queue_adapters/message_queue_adapter.rb +70 -0
- data/lib/skynet/message_queue_adapters/mysql.rb +509 -0
- data/lib/skynet/message_queue_adapters/tuple_space.rb +316 -0
- data/lib/skynet/skynet_active_record_extensions.rb +280 -0
- data/lib/skynet/skynet_config.rb +232 -0
- data/lib/skynet/skynet_console.rb +50 -0
- data/lib/skynet/skynet_console_helper.rb +66 -0
- data/lib/skynet/skynet_debugger.rb +138 -0
- data/lib/skynet/skynet_guid_generator.rb +68 -0
- data/lib/skynet/skynet_job.rb +892 -0
- data/lib/skynet/skynet_launcher.rb +40 -0
- data/lib/skynet/skynet_logger.rb +62 -0
- data/lib/skynet/skynet_manager.rb +706 -0
- data/lib/skynet/skynet_message.rb +359 -0
- data/lib/skynet/skynet_message_queue.rb +136 -0
- data/lib/skynet/skynet_partitioners.rb +96 -0
- data/lib/skynet/skynet_ruby_extensions.rb +53 -0
- data/lib/skynet/skynet_task.rb +118 -0
- data/lib/skynet/skynet_tuplespace_server.rb +83 -0
- data/lib/skynet/skynet_worker.rb +451 -0
- data/lib/skynet/version.rb +9 -0
- data/script/destroy +14 -0
- data/script/generate +14 -0
- data/script/txt2html +74 -0
- data/setup.rb +1585 -0
- data/tasks/deployment.rake +34 -0
- data/tasks/environment.rake +7 -0
- data/tasks/website.rake +17 -0
- data/test/test_active_record_extensions.rb +138 -0
- data/test/test_generator_helper.rb +20 -0
- data/test/test_helper.rb +10 -0
- data/test/test_mysql_message_queue_adapter.rb +263 -0
- data/test/test_skynet.rb +19 -0
- data/test/test_skynet_install_generator.rb +49 -0
- data/test/test_skynet_job.rb +717 -0
- data/test/test_skynet_manager.rb +157 -0
- data/test/test_skynet_message.rb +229 -0
- data/test/test_skynet_task.rb +24 -0
- data/test/test_tuplespace_message_queue.rb +174 -0
- data/website/index.html +181 -0
- data/website/index.txt +98 -0
- data/website/javascripts/rounded_corners_lite.inc.js +285 -0
- data/website/stylesheets/screen.css +138 -0
- data/website/template.rhtml +48 -0
- metadata +247 -0
@@ -0,0 +1,40 @@
|
|
1
|
+
# FIXME: should be a module
|
2
|
+
class Skynet
|
3
|
+
include SkynetDebugger
|
4
|
+
def self.start(options={})
|
5
|
+
begin
|
6
|
+
mq = Skynet::MessageQueue.new
|
7
|
+
rescue Skynet::ConnectionError
|
8
|
+
if Skynet::MessageQueue.adapter == :tuplespace
|
9
|
+
ts_port = Skynet::CONFIG[:TS_SERVER_HOSTS].first.split(':').last
|
10
|
+
# puts "trying to make ts skynet_tuplespace_server --port=#{ts_port} --logfile=#{Skynet.config.logfile_location} --piddir=#{Skynet.config.skynet_pid_dir} --use_ringserver=#{Skynet.config.ts_use_ringserver} --drburi=#{Skynet.config.ts_drburi} start"
|
11
|
+
cmd = "skynet_tuplespace_server --port=#{ts_port} --logfile=#{Skynet.config.logfile_location} --piddir=#{Skynet.config.skynet_pid_dir} --use_ringserver=#{Skynet.config.ts_use_ringserver} --drburi=#{Skynet.config.ts_drburi} start"
|
12
|
+
pid = fork do
|
13
|
+
exec(cmd)
|
14
|
+
end
|
15
|
+
sleep Skynet::CONFIG[:TS_SERVER_START_DELAY]
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
options[:script_path] = Skynet::CONFIG[:LAUNCHER_PATH]
|
20
|
+
|
21
|
+
if ARGV.detect {|a| a == 'console' }
|
22
|
+
ARGV.delete('console')
|
23
|
+
Skynet::Console.start
|
24
|
+
elsif options[:worker_type] or ARGV.detect {|a| a =~ /worker_type/ }
|
25
|
+
Skynet::Worker.start(options)
|
26
|
+
else
|
27
|
+
if ARGV.include?('stop')
|
28
|
+
Skynet::Manager.stop(options)
|
29
|
+
else
|
30
|
+
options["daemonize"] = true if ARGV.include?('start')
|
31
|
+
Skynet::Manager.start(options)
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
def self.new(options={})
|
37
|
+
warn("Skynet.new is deprecated, please use Skynet.start instead")
|
38
|
+
start(options)
|
39
|
+
end
|
40
|
+
end
|
@@ -0,0 +1,62 @@
|
|
1
|
+
# http://darwinweb.net/article/Undoing_Rails_Monkey_Patch_To_Logger
|
2
|
+
|
3
|
+
require 'logger'
|
4
|
+
|
5
|
+
class Skynet
|
6
|
+
|
7
|
+
class Error < StandardError
|
8
|
+
end
|
9
|
+
|
10
|
+
class Logger < ::Logger
|
11
|
+
if respond_to?(:format_message)
|
12
|
+
alias format_message old_format_message
|
13
|
+
end
|
14
|
+
|
15
|
+
@@log = nil
|
16
|
+
|
17
|
+
def self.get
|
18
|
+
if not @@log
|
19
|
+
@@log = self.new(Skynet::Config.new.logfile_location)
|
20
|
+
@@log.level = Skynet::CONFIG[:SKYNET_LOG_LEVEL]
|
21
|
+
end
|
22
|
+
@@log
|
23
|
+
end
|
24
|
+
|
25
|
+
def self.log=(log)
|
26
|
+
@@log = log
|
27
|
+
end
|
28
|
+
|
29
|
+
def printlog(*args)
|
30
|
+
self.class.get.unknown(*args)
|
31
|
+
end
|
32
|
+
|
33
|
+
end
|
34
|
+
|
35
|
+
|
36
|
+
# This module can be mixed in to add logging methods to your class.
|
37
|
+
module Loggable
|
38
|
+
def debug
|
39
|
+
log = Skynet::Logger.get
|
40
|
+
end
|
41
|
+
|
42
|
+
def info
|
43
|
+
log = Skynet::Logger.get
|
44
|
+
end
|
45
|
+
|
46
|
+
def warn
|
47
|
+
log = Skynet::Logger.get
|
48
|
+
end
|
49
|
+
|
50
|
+
def error
|
51
|
+
log = Skynet::Logger.get
|
52
|
+
end
|
53
|
+
|
54
|
+
def fatal
|
55
|
+
log = Skynet::Logger.get
|
56
|
+
end
|
57
|
+
|
58
|
+
def unknown
|
59
|
+
log = Skynet::Logger.get
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
@@ -0,0 +1,706 @@
|
|
1
|
+
require 'yaml'
|
2
|
+
|
3
|
+
class Skynet
|
4
|
+
class Manager
|
5
|
+
|
6
|
+
class Error < StandardError
|
7
|
+
end
|
8
|
+
|
9
|
+
include SkynetDebugger
|
10
|
+
|
11
|
+
Skynet::CONFIG[:PERCENTAGE_OF_TASK_ONLY_WORKERS] ||= 0.7
|
12
|
+
Skynet::CONFIG[:PERCENTAGE_OF_MASTER_ONLY_WORKERS] ||= 0.2
|
13
|
+
|
14
|
+
def self.debug_class_desc
|
15
|
+
"MANAGER"
|
16
|
+
end
|
17
|
+
|
18
|
+
attr_accessor :required_libs, :queue_id
|
19
|
+
attr_reader :config, :worker_queue, :wqts
|
20
|
+
|
21
|
+
def initialize(options)
|
22
|
+
raise Error.new("You must provide a script path to Skynet::Manager.new.") unless options[:script_path]
|
23
|
+
@script_path = options[:script_path] || Skynet::CONFIG[:LAUNCHER_PATH]
|
24
|
+
# info "Skynet Launcher Path: [#{@script_path}]"
|
25
|
+
@workers_requested = options[:workers] || 4
|
26
|
+
@required_libs = options[:required_libs] || []
|
27
|
+
@queue_id = options[:queue_id] || 0
|
28
|
+
@number_of_workers = 0
|
29
|
+
@workers_by_type = {:master => [], :task => [], :any => []}
|
30
|
+
@signaled_workers = []
|
31
|
+
@worker_queue = {}
|
32
|
+
@workers_restarting = 0
|
33
|
+
@all_workers_started = false
|
34
|
+
@config = Skynet::Config.new
|
35
|
+
@mutex = Mutex.new
|
36
|
+
@wqts = Queue.new
|
37
|
+
end
|
38
|
+
|
39
|
+
def worker_notify(item)
|
40
|
+
@wqts.push(item)
|
41
|
+
end
|
42
|
+
|
43
|
+
def start_worker_queue_thread
|
44
|
+
Thread.new do
|
45
|
+
last_save_time = Time.now
|
46
|
+
loop do
|
47
|
+
task = @wqts.pop
|
48
|
+
begin
|
49
|
+
status = Skynet::WorkerStatusMessage.new(task)
|
50
|
+
status.started_at = status.started_at.to_i
|
51
|
+
@mutex.synchronize do
|
52
|
+
@worker_queue[status.worker_id] = status
|
53
|
+
end
|
54
|
+
if last_save_time < Time.now - 60
|
55
|
+
save_worker_queue_to_file
|
56
|
+
last_save_time = Time.now
|
57
|
+
end
|
58
|
+
rescue Exception => e
|
59
|
+
error "Error in worker queue thread #{e.inspect} #{e.backtrace.join("\n")}"
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
def start_workers
|
66
|
+
load_worker_queue_from_file
|
67
|
+
start_worker_queue_thread
|
68
|
+
|
69
|
+
setup_signals
|
70
|
+
|
71
|
+
starting = workers_to_start(@workers_requested)
|
72
|
+
warn "Starting #{starting} workers. QUEUE: #{config.queue_name_by_id(queue_id)} #{@workers_requested - starting} already running."
|
73
|
+
add_worker(starting)
|
74
|
+
end
|
75
|
+
|
76
|
+
### maybe workers_to_start should be a method
|
77
|
+
def workers_to_start(workers_to_start)
|
78
|
+
if not worker_pids.empty?
|
79
|
+
worker_pids.each do |worker_pid|
|
80
|
+
if worker_alive?(worker_pid)
|
81
|
+
@number_of_workers += 1
|
82
|
+
workers_to_start -= 1
|
83
|
+
else
|
84
|
+
mark_worker_as_stopped(worker_pid)
|
85
|
+
end
|
86
|
+
return 0 if workers_to_start < 1
|
87
|
+
end
|
88
|
+
end
|
89
|
+
return workers_to_start
|
90
|
+
end
|
91
|
+
|
92
|
+
def check_started_workers
|
93
|
+
begin
|
94
|
+
100.times do |ii|
|
95
|
+
warn "Checking started workers, #{active_workers.size} out of #{@number_of_workers} after the #{(ii+1)}th try..."
|
96
|
+
break if active_workers.size >= @number_of_workers
|
97
|
+
sleep (@number_of_workers - active_workers.size)
|
98
|
+
end
|
99
|
+
rescue Exception => e
|
100
|
+
fatal "Something bad happened #{e.inspect} #{e.backtrace.join("\n")}"
|
101
|
+
end
|
102
|
+
|
103
|
+
@all_workers_started = true
|
104
|
+
|
105
|
+
printlog "FINISHED STARTING ALL #{active_workers.size} WORKERS"
|
106
|
+
if active_workers.size > @number_of_workers
|
107
|
+
warn "EXPECTED #{@number_of_workers}"
|
108
|
+
@number_of_workers = active_workers.size
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
# the main application loop
|
113
|
+
def run
|
114
|
+
loop do
|
115
|
+
next unless @all_workers_started
|
116
|
+
begin
|
117
|
+
check_workers
|
118
|
+
sleep Skynet::CONFIG[:WORKER_CHECK_DELAY]
|
119
|
+
rescue SystemExit, Interrupt => e
|
120
|
+
printlog "Manager Exiting!"
|
121
|
+
exit
|
122
|
+
rescue Exception => e
|
123
|
+
fatal "Something bad happened #{e.inspect} #{e.backtrace.join("\n")}"
|
124
|
+
end
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
128
|
+
def check_workers
|
129
|
+
debug "Checking on #{@number_of_workers} workers..." unless @shutdown
|
130
|
+
check_running_pids
|
131
|
+
check_number_of_workers
|
132
|
+
true
|
133
|
+
end
|
134
|
+
|
135
|
+
def check_running_pids
|
136
|
+
worker_pids.each do |wpid|
|
137
|
+
if not worker_alive?(wpid)
|
138
|
+
if @shutdown
|
139
|
+
info "Worker #{wpid} shut down gracefully. Removing from queue."
|
140
|
+
else
|
141
|
+
error "Worker #{wpid} was in queue and but was not running. Removing from queue."
|
142
|
+
end
|
143
|
+
mark_worker_as_stopped(wpid)
|
144
|
+
@number_of_workers -= 1
|
145
|
+
end
|
146
|
+
end
|
147
|
+
worker_pids
|
148
|
+
end
|
149
|
+
|
150
|
+
def check_number_of_workers
|
151
|
+
if @shutdown
|
152
|
+
worker_shutdown
|
153
|
+
if worker_pids.size < 1
|
154
|
+
exit
|
155
|
+
end
|
156
|
+
elsif @workers_restarting > 0
|
157
|
+
if @workers_requested - worker_pids.size != 0
|
158
|
+
restarting = @workers_requested - worker_pids.size
|
159
|
+
warn "RESTART MODE: Expected #{@number_of_workers} workers. #{worker_pids.size} running. #{restarting} are still restarting"
|
160
|
+
else
|
161
|
+
warn "RESTART MODE: Expected #{@number_of_workers} workers. #{worker_pids.size} running."
|
162
|
+
end
|
163
|
+
@workers_restarting = @workers_requested - worker_pids.size
|
164
|
+
|
165
|
+
elsif worker_pids.size != @number_of_workers
|
166
|
+
starting = 0
|
167
|
+
if worker_pids.size.to_f / @workers_requested.to_f < 0.85
|
168
|
+
starting = @workers_requested - worker_pids.size
|
169
|
+
error "Expected #{@number_of_workers} workers. #{worker_pids.size} running. Starting #{starting}"
|
170
|
+
@number_of_workers = worker_pids.size
|
171
|
+
add_worker(starting)
|
172
|
+
else
|
173
|
+
|
174
|
+
error "Expected #{@number_of_workers} workers. #{worker_pids.size} running."
|
175
|
+
@number_of_workers = worker_pids.size
|
176
|
+
end
|
177
|
+
end
|
178
|
+
end
|
179
|
+
|
180
|
+
def worker_shutdown
|
181
|
+
if not @masters_dead
|
182
|
+
workers_to_kill = active_workers.select do |w|
|
183
|
+
w.map_or_reduce == "master" and active_workers.detect{|status| status.process_id == w.process_id and worker_alive?(w.process_id)}
|
184
|
+
end
|
185
|
+
warn "Shutting down masters. #{worker_pids.size} workers still running." if worker_pids.size > 0
|
186
|
+
|
187
|
+
worker_pids_to_kill = workers_to_kill.collect { |w| w.process_id }
|
188
|
+
if worker_pids_to_kill and not worker_pids_to_kill.empty?
|
189
|
+
warn "FOUND MORE RUNNING MASTERS WE HAVEN'T KILLED:", worker_pids_to_kill
|
190
|
+
remove_worker(worker_pids_to_kill)
|
191
|
+
end
|
192
|
+
|
193
|
+
if not active_workers.detect { |w| w.map_or_reduce == "master" }
|
194
|
+
signal_workers("TERM")
|
195
|
+
@masters_dead = true
|
196
|
+
else
|
197
|
+
return check_number_of_workers
|
198
|
+
end
|
199
|
+
end
|
200
|
+
if worker_pids.size < 1
|
201
|
+
info "No more workers running."
|
202
|
+
else
|
203
|
+
warn "Shutting down. #{worker_pids.size} workers still running." if worker_pids.size > 0
|
204
|
+
end
|
205
|
+
end
|
206
|
+
|
207
|
+
def worker_alive?(worker_pid)
|
208
|
+
Skynet.process_alive?(worker_pid)
|
209
|
+
end
|
210
|
+
|
211
|
+
def add_workers(*args)
|
212
|
+
add_worker(*args)
|
213
|
+
end
|
214
|
+
|
215
|
+
def add_worker(workers=1)
|
216
|
+
num_task_only_workers = (workers * Skynet::CONFIG[:PERCENTAGE_OF_TASK_ONLY_WORKERS]).to_i
|
217
|
+
num_master_only_workers = (workers * Skynet::CONFIG[:PERCENTAGE_OF_MASTER_ONLY_WORKERS]).to_i
|
218
|
+
warn "Adding #{workers} WORKERS. Task Workers: #{num_task_only_workers}, Master Workers: #{num_master_only_workers} Master & Task Workers: #{workers - num_task_only_workers - num_master_only_workers}"
|
219
|
+
|
220
|
+
@all_workers_started = false
|
221
|
+
worker_types = {:task => 0, :master => 0, :any => 0}
|
222
|
+
(1..workers).collect do |ii|
|
223
|
+
worker_type = :any
|
224
|
+
if (ii <= num_master_only_workers)
|
225
|
+
worker_type = :master
|
226
|
+
worker_types[:master] += 1
|
227
|
+
elsif (ii > num_master_only_workers and ii <= num_master_only_workers + num_task_only_workers)
|
228
|
+
worker_type = :task
|
229
|
+
worker_types[:task] += 1
|
230
|
+
else
|
231
|
+
worker_types[:any] += 1
|
232
|
+
end
|
233
|
+
cmd = "#{@script_path} --worker_type=#{worker_type}"
|
234
|
+
cmd << " --config='#{Skynet::CONFIG[:CONFIG_FILE]}'" if Skynet::CONFIG[:CONFIG_FILE]
|
235
|
+
cmd << " --queue_id=#{queue_id}"
|
236
|
+
cmd << " -r #{required_libs.join(' -r ')}" if required_libs and not required_libs.empty?
|
237
|
+
wpid = Skynet.fork_and_exec(cmd)
|
238
|
+
Skynet.close_console
|
239
|
+
@workers_by_type[worker_type] ||= []
|
240
|
+
@workers_by_type[worker_type] << wpid
|
241
|
+
warn "Adding Worker ##{ii} PID: #{wpid} QUEUE: #{queue_id}, WORKER_TYPE?:#{worker_type}"
|
242
|
+
@mutex.synchronize do
|
243
|
+
@number_of_workers += 1
|
244
|
+
end
|
245
|
+
sleep 0.01
|
246
|
+
wpid
|
247
|
+
end
|
248
|
+
info "Worker Distribution", worker_types
|
249
|
+
check_started_workers
|
250
|
+
end
|
251
|
+
|
252
|
+
def remove_workers(workers=1)
|
253
|
+
pids = worker_pids[0...workers]
|
254
|
+
remove_worker(pids)
|
255
|
+
end
|
256
|
+
|
257
|
+
def remove_worker(pids = nil)
|
258
|
+
pids = [pids] unless pids.kind_of?(Array)
|
259
|
+
info "Removing workers #{pids.join(",")} from worker queue. They will die gracefully when they finish what they're doing."
|
260
|
+
pids.collect do |wpid|
|
261
|
+
Process.kill("INT",wpid)
|
262
|
+
mark_worker_as_stopped(wpid)
|
263
|
+
@number_of_workers -= 1
|
264
|
+
warn "REMOVING WORKER #{wpid}"
|
265
|
+
@signaled_workers << wpid
|
266
|
+
end
|
267
|
+
pids
|
268
|
+
end
|
269
|
+
|
270
|
+
def mark_worker_as_stopped(wpid)
|
271
|
+
worker = @worker_queue.values.detect {|status| status.process_id == wpid}
|
272
|
+
if worker and not worker_alive?(wpid)
|
273
|
+
@worker_queue.delete_if {|worker_id, status| status.process_id == wpid }
|
274
|
+
worker_pids.delete(worker.process_id)
|
275
|
+
worker.started_at = Time.now.to_f
|
276
|
+
worker.process_id = nil
|
277
|
+
end
|
278
|
+
end
|
279
|
+
|
280
|
+
def signal_workers(signal,worker_type=[])
|
281
|
+
worker_types = [worker_type].flatten
|
282
|
+
active_workers.each do |worker|
|
283
|
+
worker_types.each do |worker_type|
|
284
|
+
if worker_type == :idle
|
285
|
+
next if worker_type and worker.task_id
|
286
|
+
else
|
287
|
+
next if worker_type and not @workers_by_type[worker_type].include?(worker.process_id)
|
288
|
+
end
|
289
|
+
end
|
290
|
+
warn "SHUTTING DOWN #{worker.process_id} MR: #{worker.map_or_reduce} SIG: #{signal}"
|
291
|
+
begin
|
292
|
+
Process.kill(signal,worker.process_id)
|
293
|
+
rescue Errno::ESRCH
|
294
|
+
warn "Tried to kill a process that didn't exist #{worker.process_id}"
|
295
|
+
end
|
296
|
+
# mark_worker_as_stopped(worker.process_id)
|
297
|
+
@signaled_workers << worker.process_id
|
298
|
+
end
|
299
|
+
end
|
300
|
+
|
301
|
+
def hard_restart_workers
|
302
|
+
@all_workers_started = false
|
303
|
+
signal_workers("TERM")
|
304
|
+
@restart = true
|
305
|
+
signal_workers("INT",:master)
|
306
|
+
signal_workers("INT",:any)
|
307
|
+
sleep @number_of_workers
|
308
|
+
check_started_workers
|
309
|
+
end
|
310
|
+
|
311
|
+
# ===========================
|
312
|
+
# = XXX THIS IS A HORRIBLE HACK =
|
313
|
+
# ===========================
|
314
|
+
def restart_worker(wpid)
|
315
|
+
info "RESTARTING WORKER #{wpid}"
|
316
|
+
@mutex.synchronize do
|
317
|
+
Process.kill("HUP",wpid)
|
318
|
+
mark_worker_as_stopped(wpid)
|
319
|
+
@workers_restarting += 1
|
320
|
+
end
|
321
|
+
sleep Skynet::CONFIG[:WORKER_CHECK_DELAY]
|
322
|
+
end
|
323
|
+
|
324
|
+
def restart_workers
|
325
|
+
@all_workers_started = false
|
326
|
+
signal_workers("HUP")
|
327
|
+
sleep @number_of_workers
|
328
|
+
check_started_workers
|
329
|
+
end
|
330
|
+
|
331
|
+
def setup_signals
|
332
|
+
Signal.trap("HUP") do
|
333
|
+
restart_workers
|
334
|
+
end
|
335
|
+
Signal.trap("TERM") do
|
336
|
+
if @term
|
337
|
+
terminate
|
338
|
+
else
|
339
|
+
@term=true
|
340
|
+
shutdown
|
341
|
+
end
|
342
|
+
end
|
343
|
+
|
344
|
+
Signal.trap("INT") do
|
345
|
+
if @shutdown
|
346
|
+
terminate
|
347
|
+
else
|
348
|
+
shutdown
|
349
|
+
end
|
350
|
+
end
|
351
|
+
end
|
352
|
+
|
353
|
+
def shutdown
|
354
|
+
info(:shutdown)
|
355
|
+
@shutdown = true
|
356
|
+
signal_workers("TERM",[:idle,:master,:any])
|
357
|
+
end
|
358
|
+
|
359
|
+
def terminate
|
360
|
+
info(:terminate)
|
361
|
+
signal_workers("KILL")
|
362
|
+
sleep 1
|
363
|
+
exit
|
364
|
+
end
|
365
|
+
|
366
|
+
def save_worker_queue_to_file
|
367
|
+
debug "Writing worker queue to file #{Skynet.config.manager_statfile_location}"
|
368
|
+
File.open(Skynet.config.manager_statfile_location,"w") do |f|
|
369
|
+
f.write(YAML.dump(@worker_queue))
|
370
|
+
end
|
371
|
+
end
|
372
|
+
|
373
|
+
def load_worker_queue_from_file
|
374
|
+
if File.exists?(Skynet.config.manager_statfile_location)
|
375
|
+
File.open(Skynet.config.manager_statfile_location,"r") do |f|
|
376
|
+
begin
|
377
|
+
@worker_queue = YAML.load(f.read)
|
378
|
+
raise Error.new("Bad Manager File returned type #{@worker_queue.class}") unless @worker_queue.is_a?(Hash)
|
379
|
+
rescue Exception => e
|
380
|
+
error "Error loading manager stats file: #{f}", e
|
381
|
+
@worker_queue = {}
|
382
|
+
save_worker_queue_to_file
|
383
|
+
end
|
384
|
+
end
|
385
|
+
end
|
386
|
+
end
|
387
|
+
|
388
|
+
def prune_inactive_worker_stats
|
389
|
+
@worker_queue.delete_if{|worker_id, worker| !worker.process_id.is_a?(Fixnum) }
|
390
|
+
stats
|
391
|
+
end
|
392
|
+
|
393
|
+
def self.stats_for_hosts(manager_hosts=nil)
|
394
|
+
manager_hosts ||= Skynet::CONFIG[:MANAGER_HOSTS] || ["localhost"]
|
395
|
+
stats = {
|
396
|
+
:servers => {},
|
397
|
+
:processed => 0,
|
398
|
+
:number_of_workers => 0,
|
399
|
+
:active_workers => 0,
|
400
|
+
:idle_workers => 0,
|
401
|
+
:hosts => 0,
|
402
|
+
:masters => 0,
|
403
|
+
:taskworkers => 0,
|
404
|
+
:time => Time.now.to_f
|
405
|
+
}
|
406
|
+
servers = {}
|
407
|
+
manager_hosts.each do |manager_host|
|
408
|
+
begin
|
409
|
+
manager = DRbObject.new(nil,"druby://#{manager_host}:#{Skynet::CONFIG[:SKYNET_LOCAL_MANAGER_PORT]}")
|
410
|
+
manager_stats = manager.stats
|
411
|
+
servers[manager_host] = manager_stats
|
412
|
+
manager_stats.each do |key,value|
|
413
|
+
next unless value.is_a?(Fixnum)
|
414
|
+
stats[key] ||= 0
|
415
|
+
stats[key] += value
|
416
|
+
end
|
417
|
+
rescue DRb::DRbConnError, Errno::ECONNREFUSED => e
|
418
|
+
warn "Couldn't get stats from manager at druby://#{manager_host}:#{Skynet::CONFIG[:SKYNET_LOCAL_MANAGER_PORT]}"
|
419
|
+
end
|
420
|
+
end
|
421
|
+
stats[:servers] = servers
|
422
|
+
stats[:hosts] = manager_hosts
|
423
|
+
stats
|
424
|
+
end
|
425
|
+
|
426
|
+
def stats
|
427
|
+
started_times = @worker_queue.values.collect{|worker| worker.started_at }.sort
|
428
|
+
active_started_times = active_workers.collect{|worker|worker.started_at }.sort
|
429
|
+
stats = {
|
430
|
+
:hostname => hostname,
|
431
|
+
:earliest_update => started_times.first,
|
432
|
+
:latest_update => started_times.last,
|
433
|
+
:active_earliest_update => active_started_times.first,
|
434
|
+
:active_latest_update => active_started_times.last,
|
435
|
+
:processed => 0,
|
436
|
+
:processed_by_active_workers => 0,
|
437
|
+
:number_of_workers => 0,
|
438
|
+
:idle_workers => 0,
|
439
|
+
:shutdown_workers => 0,
|
440
|
+
}
|
441
|
+
@worker_queue.values.collect{|worker|stats[:processed] += worker.processed}
|
442
|
+
active_workers.collect{|worker|stats[:processed_by_active_workers] += worker.processed}
|
443
|
+
currently_active_workers, idle_workers = active_workers.partition{|worker| worker.map_or_reduce }
|
444
|
+
stats[:number_of_workers] = active_workers.size
|
445
|
+
stats[:active_workers] = currently_active_workers.size
|
446
|
+
stats[:idle_workers] = idle_workers.size
|
447
|
+
stats[:shutdown_workers] = inactive_workers.size
|
448
|
+
stats[:masters] = active_workers.select{|worker|worker.tasktype.to_s == "master"}.size
|
449
|
+
stats[:master_or_task_workers] = active_workers.select{|worker|worker.tasktype.to_s == "any"}.size
|
450
|
+
stats[:taskworkers] = active_workers.select{|worker|worker.tasktype.to_s == "task"}.size
|
451
|
+
stats[:active_masters] = currently_active_workers.select{|worker|worker.tasktype.to_s == "master"}.size
|
452
|
+
stats[:active_master_or_task_workers] = currently_active_workers.select{|worker|worker.tasktype.to_s == "any"}.size
|
453
|
+
stats[:active_taskworkers] = currently_active_workers.select{|worker|worker.tasktype.to_s == "task"}.size
|
454
|
+
stats[:idle_masters] = idle_workers.select{|worker|worker.tasktype.to_s == "master"}.size
|
455
|
+
stats[:idle_master_or_task_workers] = idle_workers.select{|worker|worker.tasktype.to_s == "any"}.size
|
456
|
+
stats[:idle_taskworkers] = idle_workers.select{|worker|worker.tasktype.to_s == "task"}.size
|
457
|
+
stats
|
458
|
+
end
|
459
|
+
|
460
|
+
def active_workers
|
461
|
+
@worker_queue.values.select{|status| status.process_id.is_a?(Fixnum) }
|
462
|
+
end
|
463
|
+
|
464
|
+
def inactive_workers
|
465
|
+
@worker_queue.values.select{|status| !status.process_id.is_a?(Fixnum) }
|
466
|
+
end
|
467
|
+
|
468
|
+
def worker_pids
|
469
|
+
active_workers.collect {|w| w.process_id}
|
470
|
+
end
|
471
|
+
|
472
|
+
def parent_pid
|
473
|
+
$$
|
474
|
+
end
|
475
|
+
|
476
|
+
def hostname
|
477
|
+
@machine_name ||= Socket.gethostname
|
478
|
+
end
|
479
|
+
|
480
|
+
def ping
|
481
|
+
true
|
482
|
+
end
|
483
|
+
|
484
|
+
def self.local_manager_uri
|
485
|
+
"druby://localhost:#{Skynet::CONFIG[:SKYNET_LOCAL_MANAGER_PORT]}"
|
486
|
+
end
|
487
|
+
|
488
|
+
def self.get
|
489
|
+
DRbObject.new(nil,local_manager_uri)
|
490
|
+
end
|
491
|
+
|
492
|
+
def self.start(options={})
|
493
|
+
options[:add_workers] ||= nil
|
494
|
+
options[:remove_workers] ||= nil
|
495
|
+
options[:use_rails] ||= false
|
496
|
+
options[:required_libs] ||= []
|
497
|
+
|
498
|
+
config = Skynet::Config.new
|
499
|
+
|
500
|
+
OptionParser.new do |opt|
|
501
|
+
opt.banner = %{Usage:
|
502
|
+
> skynet [options]
|
503
|
+
|
504
|
+
OR to daemonize
|
505
|
+
|
506
|
+
> skynet [options] start
|
507
|
+
> skynet stop
|
508
|
+
|
509
|
+
You can also run:
|
510
|
+
> skynet console [options]
|
511
|
+
}
|
512
|
+
opt.on('--restart-all-workers', 'Restart All Workers') do |v|
|
513
|
+
puts "Restarting ALL workers on ALL machines."
|
514
|
+
begin
|
515
|
+
manager = self.get
|
516
|
+
manager.restart_all_workers
|
517
|
+
exit
|
518
|
+
rescue DRb::DRbConnError => e
|
519
|
+
puts "No manager running at #{local_manager_uri} ERROR: #{e.inspect}"
|
520
|
+
exit
|
521
|
+
end
|
522
|
+
end
|
523
|
+
opt.on('--restart-workers', 'Restart Workers') do |v|
|
524
|
+
puts "Restarting workers on this machine."
|
525
|
+
begin
|
526
|
+
manager = self.get
|
527
|
+
manager.restart_workers
|
528
|
+
exit
|
529
|
+
rescue DRb::DRbConnError => e
|
530
|
+
puts "No manager running at #{local_manager_uri} ERROR: #{e.inspect}"
|
531
|
+
exit
|
532
|
+
end
|
533
|
+
end
|
534
|
+
opt.on('--increment-worker-version', 'Increment Worker Version') do |v|
|
535
|
+
ver = Skynet::MessageQueue.new.increment_worker_version
|
536
|
+
puts "Incrementing Worker Version to #{ver}"
|
537
|
+
exit
|
538
|
+
end
|
539
|
+
opt.on('--add-workers=WORKERS', 'Number of workers to add.') do |v|
|
540
|
+
options[:add_workers] = v.to_i
|
541
|
+
end
|
542
|
+
opt.on('--remove-workers=WORKERS', 'Number of workers to remove.') do |v|
|
543
|
+
options[:remove_workers] = v.to_i
|
544
|
+
end
|
545
|
+
opt.on('--workers=WORKERS', 'Number of workers to start.') do |v|
|
546
|
+
options[:workers] = v.to_i
|
547
|
+
end
|
548
|
+
opt.on('-r', '--required LIBRARY', 'Require the specified libraries') do |v|
|
549
|
+
options[:required_libs] << File.expand_path(v)
|
550
|
+
end
|
551
|
+
opt.on('--config=CONFIG_FILE', 'Where to find the skynet.rb config file') do |v|
|
552
|
+
options[:config_file] = File.expand_path(v)
|
553
|
+
end
|
554
|
+
opt.on('--queue=QUEUE_NAME', 'Which queue should these workers use (default "default").') do |v|
|
555
|
+
options[:queue] = v
|
556
|
+
end
|
557
|
+
opt.on('--queue_id=queue_id', 'Which queue should these workers use (default 0).') do |v|
|
558
|
+
options[:queue_id] = v.to_i
|
559
|
+
end
|
560
|
+
opt.parse!(ARGV)
|
561
|
+
end
|
562
|
+
if options[:queue]
|
563
|
+
if options[:queue_id]
|
564
|
+
raise Skynet::Error.new("You may either provide a queue_id or a queue, but not both.")
|
565
|
+
end
|
566
|
+
options[:queue_id] = config.queue_id_by_name(options[:queue])
|
567
|
+
else
|
568
|
+
options[:queue_id] ||= 0
|
569
|
+
end
|
570
|
+
|
571
|
+
options[:required_libs].each do |adlib|
|
572
|
+
begin
|
573
|
+
require adlib
|
574
|
+
rescue MissingSourceFile => e
|
575
|
+
error "The included lib #{adlib} was not found: #{e.inspect}"
|
576
|
+
exit
|
577
|
+
end
|
578
|
+
end
|
579
|
+
|
580
|
+
options[:config_file] ||= Skynet::CONFIG[:CONFIG_FILE]
|
581
|
+
if options[:config_file]
|
582
|
+
begin
|
583
|
+
require options[:config_file]
|
584
|
+
rescue MissingSourceFile => e
|
585
|
+
error "The config file at #{options[:config_file]} was not found: #{e.inspect}"
|
586
|
+
exit
|
587
|
+
end
|
588
|
+
elsif Skynet::CONFIG[:SYSTEM_RUNNER]
|
589
|
+
error "Config file missing. Please add a config/skynet_config.rb before starting."
|
590
|
+
end
|
591
|
+
|
592
|
+
options[:workers] ||= Skynet::CONFIG[:NUMBER_OF_WORKERS] || 4
|
593
|
+
options[:pid_file] ||= Skynet::Config.pidfile_location
|
594
|
+
options[:script_path] ||= Skynet::CONFIG[:LAUNCHER_PATH]
|
595
|
+
|
596
|
+
# Handle add or remove workers
|
597
|
+
if options[:add_workers] or options[:remove_workers]
|
598
|
+
begin
|
599
|
+
manager = self.get
|
600
|
+
if options[:add_workers]
|
601
|
+
pids = manager.add_worker(options[:add_workers])
|
602
|
+
warn "ADDING #{options[:add_workers]} workers PIDS: #{pids.inspect}"
|
603
|
+
elsif options[:remove_workers]
|
604
|
+
pids = manager.remove_workers(options[:remove_workers])
|
605
|
+
warn "REMOVING #{options[:remove_workers]} workers PIDS: #{pids.inspect}"
|
606
|
+
end
|
607
|
+
rescue DRb::DRbConnError => e
|
608
|
+
warn "Couldnt add or remove workers. There are probably no workers running. At least I couldn't find a skynet_manager around at #{local_manager_uri} #{e.inspect}"
|
609
|
+
rescue Exception => e
|
610
|
+
warn "Couldnt add or remove workers #{e.inspect} #{e.backtrace.join("\n")}"
|
611
|
+
end
|
612
|
+
exit
|
613
|
+
|
614
|
+
else
|
615
|
+
|
616
|
+
begin
|
617
|
+
debug "Making sure there's an available MessageQueue"
|
618
|
+
ts = Skynet::MessageQueue.new
|
619
|
+
rescue Skynet::ConnectionError => e
|
620
|
+
fatal "Couldn't get MessageQueue! #{e.message}"
|
621
|
+
raise Skynet::ConnectionError.new("ERROR! Couldn't get MessageQueue! #{e.message}")
|
622
|
+
end
|
623
|
+
|
624
|
+
debug "CONTINUING TO START : There IS an available MessageQueue", options
|
625
|
+
|
626
|
+
begin
|
627
|
+
if oldpid = read_pid_file
|
628
|
+
errmsg = nil
|
629
|
+
if Skynet.process_alive?(oldpid)
|
630
|
+
errmsg = "Another Skynet Manager is running at pid: #{oldpid}"
|
631
|
+
warn errmsg
|
632
|
+
stderr errmsg
|
633
|
+
exit
|
634
|
+
else
|
635
|
+
errmsg = "Deleting stale pidfile #{Skynet::Config.pidfile_location}"
|
636
|
+
warn errmsg
|
637
|
+
stderr errmsg
|
638
|
+
File.unlink(Skynet::Config.pidfile_location) if File.exist?(Skynet::Config.pidfile_location)
|
639
|
+
end
|
640
|
+
end
|
641
|
+
|
642
|
+
printlog "STARTING THE MANAGER!!!!!!!!!!! port: #{Skynet::CONFIG[:SKYNET_LOCAL_MANAGER_PORT]}"
|
643
|
+
puts "Starting Skynet..."
|
644
|
+
printlog "Skynet Stopped"
|
645
|
+
if options["daemonize"]
|
646
|
+
Skynet.safefork do
|
647
|
+
sess_id = Process.setsid
|
648
|
+
write_pid_file
|
649
|
+
Skynet.close_console
|
650
|
+
run_manager(options)
|
651
|
+
exit!
|
652
|
+
end
|
653
|
+
else
|
654
|
+
write_pid_file
|
655
|
+
run_manager(options)
|
656
|
+
end
|
657
|
+
rescue SystemExit, Interrupt
|
658
|
+
rescue Exception => e
|
659
|
+
fatal("Error in Manager. Manager Dying. #{e.inspect} #{e.backtrace}")
|
660
|
+
end
|
661
|
+
end
|
662
|
+
end
|
663
|
+
|
664
|
+
def self.run_manager(options)
|
665
|
+
@manager = Skynet::Manager.new(options)
|
666
|
+
@drb_manager = DRb.start_service("druby://:#{Skynet::CONFIG[:SKYNET_LOCAL_MANAGER_PORT]}", @manager)
|
667
|
+
@manager.start_workers
|
668
|
+
info "MANAGER STARTED ON PORT: #{Skynet::CONFIG[:SKYNET_LOCAL_MANAGER_PORT]}"
|
669
|
+
@manager.run
|
670
|
+
end
|
671
|
+
|
672
|
+
# stop the daemon, nicely at first, and then forcefully if necessary
|
673
|
+
def self.stop(options = {})
|
674
|
+
pid = read_pid_file
|
675
|
+
if not pid
|
676
|
+
puts "The Skynet Manager is not running. No PID found in #{Skynet::Config.pidfile_location}"
|
677
|
+
exit
|
678
|
+
end
|
679
|
+
$stdout.puts "Stopping Skynet"
|
680
|
+
printlog "Stopping Skynet"
|
681
|
+
Process.kill("TERM", pid)
|
682
|
+
180.times { Process.kill(0, pid); sleep(1) }
|
683
|
+
Process.kill("TERM", pid)
|
684
|
+
180.times { Process.kill(0, pid); sleep(1) }
|
685
|
+
$stdout.puts("using kill -9 #{pid}")
|
686
|
+
Process.kill("KILL", pid)
|
687
|
+
rescue Errno::ESRCH => e
|
688
|
+
printlog "Skynet Stopped"
|
689
|
+
ensure
|
690
|
+
File.unlink(Skynet::Config.pidfile_location) if File.exist?(Skynet::Config.pidfile_location)
|
691
|
+
end
|
692
|
+
|
693
|
+
def self.read_pid_file
|
694
|
+
pidfile = Skynet::Config.pidfile_location
|
695
|
+
File.read(pidfile).to_i if File.exist?(pidfile)
|
696
|
+
end
|
697
|
+
|
698
|
+
def self.write_pid_file
|
699
|
+
pidfile = Skynet::Config.pidfile_location
|
700
|
+
info "Writing PIDFILE to #{pidfile}"
|
701
|
+
open(pidfile, "w") {|f| f << Process.pid << "\n"}
|
702
|
+
at_exit { File.unlink(pidfile) if read_pid_file == Process.pid }
|
703
|
+
end
|
704
|
+
|
705
|
+
end
|
706
|
+
end
|