timocratic-skynet 0.9.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (145) hide show
  1. data/History.txt +152 -0
  2. data/License.txt +20 -0
  3. data/Manifest.txt +144 -0
  4. data/README.txt +178 -0
  5. data/Rakefile +5 -0
  6. data/app_generators/skynet_install/USAGE +5 -0
  7. data/app_generators/skynet_install/skynet_install_generator.rb +94 -0
  8. data/app_generators/skynet_install/templates/migration.rb +43 -0
  9. data/app_generators/skynet_install/templates/skynet_config.rb +50 -0
  10. data/app_generators/skynet_install/templates/skynet_initializer.rb +1 -0
  11. data/app_generators/skynet_install/templates/skynet_mysql_schema.sql +33 -0
  12. data/bin/skynet +71 -0
  13. data/bin/skynet_install +36 -0
  14. data/bin/skynet_tuplespace_server +74 -0
  15. data/config/hoe.rb +75 -0
  16. data/config/requirements.rb +17 -0
  17. data/examples/dgrep/README +70 -0
  18. data/examples/dgrep/config/skynet_config.rb +26 -0
  19. data/examples/dgrep/data/shakespeare/README +2 -0
  20. data/examples/dgrep/data/shakespeare/poetry/loverscomplaint +381 -0
  21. data/examples/dgrep/data/shakespeare/poetry/rapeoflucrece +2199 -0
  22. data/examples/dgrep/data/shakespeare/poetry/sonnets +2633 -0
  23. data/examples/dgrep/data/shakespeare/poetry/various +640 -0
  24. data/examples/dgrep/data/shakespeare/poetry/venusandadonis +1423 -0
  25. data/examples/dgrep/data/testfile1.txt +1 -0
  26. data/examples/dgrep/data/testfile2.txt +1 -0
  27. data/examples/dgrep/data/testfile3.txt +1 -0
  28. data/examples/dgrep/data/testfile4.txt +1 -0
  29. data/examples/dgrep/lib/dgrep.rb +59 -0
  30. data/examples/dgrep/lib/mapreduce_test.rb +32 -0
  31. data/examples/dgrep/lib/most_common_words.rb +45 -0
  32. data/examples/dgrep/script/dgrep +75 -0
  33. data/examples/rails_mysql_example/README +66 -0
  34. data/examples/rails_mysql_example/Rakefile +10 -0
  35. data/examples/rails_mysql_example/app/controllers/application.rb +10 -0
  36. data/examples/rails_mysql_example/app/helpers/application_helper.rb +3 -0
  37. data/examples/rails_mysql_example/app/models/user.rb +21 -0
  38. data/examples/rails_mysql_example/app/models/user_favorite.rb +5 -0
  39. data/examples/rails_mysql_example/app/models/user_mailer.rb +12 -0
  40. data/examples/rails_mysql_example/app/views/user_mailer/welcome.erb +5 -0
  41. data/examples/rails_mysql_example/config/boot.rb +109 -0
  42. data/examples/rails_mysql_example/config/database.yml +42 -0
  43. data/examples/rails_mysql_example/config/environment.rb +59 -0
  44. data/examples/rails_mysql_example/config/environments/development.rb +18 -0
  45. data/examples/rails_mysql_example/config/environments/production.rb +19 -0
  46. data/examples/rails_mysql_example/config/environments/test.rb +22 -0
  47. data/examples/rails_mysql_example/config/initializers/inflections.rb +10 -0
  48. data/examples/rails_mysql_example/config/initializers/mime_types.rb +5 -0
  49. data/examples/rails_mysql_example/config/initializers/skynet.rb +1 -0
  50. data/examples/rails_mysql_example/config/routes.rb +35 -0
  51. data/examples/rails_mysql_example/config/skynet_config.rb +36 -0
  52. data/examples/rails_mysql_example/db/migrate/001_create_skynet_tables.rb +43 -0
  53. data/examples/rails_mysql_example/db/migrate/002_create_users.rb +16 -0
  54. data/examples/rails_mysql_example/db/migrate/003_create_user_favorites.rb +14 -0
  55. data/examples/rails_mysql_example/db/schema.rb +85 -0
  56. data/examples/rails_mysql_example/db/skynet_mysql_schema.sql +33 -0
  57. data/examples/rails_mysql_example/doc/README_FOR_APP +2 -0
  58. data/examples/rails_mysql_example/lib/tasks/rails_mysql_example.rake +20 -0
  59. data/examples/rails_mysql_example/public/.htaccess +40 -0
  60. data/examples/rails_mysql_example/public/404.html +30 -0
  61. data/examples/rails_mysql_example/public/422.html +30 -0
  62. data/examples/rails_mysql_example/public/500.html +30 -0
  63. data/examples/rails_mysql_example/public/dispatch.cgi +10 -0
  64. data/examples/rails_mysql_example/public/dispatch.fcgi +24 -0
  65. data/examples/rails_mysql_example/public/dispatch.rb +10 -0
  66. data/examples/rails_mysql_example/public/favicon.ico +0 -0
  67. data/examples/rails_mysql_example/public/images/rails.png +0 -0
  68. data/examples/rails_mysql_example/public/index.html +277 -0
  69. data/examples/rails_mysql_example/public/javascripts/application.js +2 -0
  70. data/examples/rails_mysql_example/public/javascripts/controls.js +963 -0
  71. data/examples/rails_mysql_example/public/javascripts/dragdrop.js +972 -0
  72. data/examples/rails_mysql_example/public/javascripts/effects.js +1120 -0
  73. data/examples/rails_mysql_example/public/javascripts/prototype.js +4225 -0
  74. data/examples/rails_mysql_example/public/robots.txt +5 -0
  75. data/examples/rails_mysql_example/script/about +3 -0
  76. data/examples/rails_mysql_example/script/console +3 -0
  77. data/examples/rails_mysql_example/script/destroy +3 -0
  78. data/examples/rails_mysql_example/script/generate +3 -0
  79. data/examples/rails_mysql_example/script/performance/benchmarker +3 -0
  80. data/examples/rails_mysql_example/script/performance/profiler +3 -0
  81. data/examples/rails_mysql_example/script/performance/request +3 -0
  82. data/examples/rails_mysql_example/script/plugin +3 -0
  83. data/examples/rails_mysql_example/script/process/inspector +3 -0
  84. data/examples/rails_mysql_example/script/process/reaper +3 -0
  85. data/examples/rails_mysql_example/script/process/spawner +3 -0
  86. data/examples/rails_mysql_example/script/runner +3 -0
  87. data/examples/rails_mysql_example/script/server +3 -0
  88. data/examples/rails_mysql_example/test/fixtures/user_favorites.yml +9 -0
  89. data/examples/rails_mysql_example/test/fixtures/users.yml +11 -0
  90. data/examples/rails_mysql_example/test/test_helper.rb +38 -0
  91. data/examples/rails_mysql_example/test/unit/user_favorite_test.rb +8 -0
  92. data/examples/rails_mysql_example/test/unit/user_test.rb +8 -0
  93. data/extras/README +7 -0
  94. data/extras/init.d/skynet +87 -0
  95. data/extras/nagios/check_skynet.sh +121 -0
  96. data/extras/rails/controllers/skynet_controller.rb +43 -0
  97. data/extras/rails/views/skynet/index.rhtml +137 -0
  98. data/lib/skynet.rb +95 -0
  99. data/lib/skynet/mapreduce_helper.rb +74 -0
  100. data/lib/skynet/mapreduce_test.rb +56 -0
  101. data/lib/skynet/message_queue_adapters/message_queue_adapter.rb +70 -0
  102. data/lib/skynet/message_queue_adapters/mysql.rb +509 -0
  103. data/lib/skynet/message_queue_adapters/tuple_space.rb +316 -0
  104. data/lib/skynet/skynet_active_record_extensions.rb +280 -0
  105. data/lib/skynet/skynet_config.rb +232 -0
  106. data/lib/skynet/skynet_console.rb +50 -0
  107. data/lib/skynet/skynet_console_helper.rb +66 -0
  108. data/lib/skynet/skynet_debugger.rb +138 -0
  109. data/lib/skynet/skynet_guid_generator.rb +68 -0
  110. data/lib/skynet/skynet_job.rb +892 -0
  111. data/lib/skynet/skynet_launcher.rb +40 -0
  112. data/lib/skynet/skynet_logger.rb +62 -0
  113. data/lib/skynet/skynet_manager.rb +706 -0
  114. data/lib/skynet/skynet_message.rb +359 -0
  115. data/lib/skynet/skynet_message_queue.rb +136 -0
  116. data/lib/skynet/skynet_partitioners.rb +96 -0
  117. data/lib/skynet/skynet_ruby_extensions.rb +53 -0
  118. data/lib/skynet/skynet_task.rb +118 -0
  119. data/lib/skynet/skynet_tuplespace_server.rb +83 -0
  120. data/lib/skynet/skynet_worker.rb +451 -0
  121. data/lib/skynet/version.rb +9 -0
  122. data/script/destroy +14 -0
  123. data/script/generate +14 -0
  124. data/script/txt2html +74 -0
  125. data/setup.rb +1585 -0
  126. data/tasks/deployment.rake +34 -0
  127. data/tasks/environment.rake +7 -0
  128. data/tasks/website.rake +17 -0
  129. data/test/test_active_record_extensions.rb +138 -0
  130. data/test/test_generator_helper.rb +20 -0
  131. data/test/test_helper.rb +10 -0
  132. data/test/test_mysql_message_queue_adapter.rb +263 -0
  133. data/test/test_skynet.rb +19 -0
  134. data/test/test_skynet_install_generator.rb +49 -0
  135. data/test/test_skynet_job.rb +717 -0
  136. data/test/test_skynet_manager.rb +157 -0
  137. data/test/test_skynet_message.rb +229 -0
  138. data/test/test_skynet_task.rb +24 -0
  139. data/test/test_tuplespace_message_queue.rb +174 -0
  140. data/website/index.html +181 -0
  141. data/website/index.txt +98 -0
  142. data/website/javascripts/rounded_corners_lite.inc.js +285 -0
  143. data/website/stylesheets/screen.css +138 -0
  144. data/website/template.rhtml +48 -0
  145. metadata +247 -0
@@ -0,0 +1,40 @@
1
+ # FIXME: should be a module
2
+ class Skynet
3
+ include SkynetDebugger
4
+ def self.start(options={})
5
+ begin
6
+ mq = Skynet::MessageQueue.new
7
+ rescue Skynet::ConnectionError
8
+ if Skynet::MessageQueue.adapter == :tuplespace
9
+ ts_port = Skynet::CONFIG[:TS_SERVER_HOSTS].first.split(':').last
10
+ # puts "trying to make ts skynet_tuplespace_server --port=#{ts_port} --logfile=#{Skynet.config.logfile_location} --piddir=#{Skynet.config.skynet_pid_dir} --use_ringserver=#{Skynet.config.ts_use_ringserver} --drburi=#{Skynet.config.ts_drburi} start"
11
+ cmd = "skynet_tuplespace_server --port=#{ts_port} --logfile=#{Skynet.config.logfile_location} --piddir=#{Skynet.config.skynet_pid_dir} --use_ringserver=#{Skynet.config.ts_use_ringserver} --drburi=#{Skynet.config.ts_drburi} start"
12
+ pid = fork do
13
+ exec(cmd)
14
+ end
15
+ sleep Skynet::CONFIG[:TS_SERVER_START_DELAY]
16
+ end
17
+ end
18
+
19
+ options[:script_path] = Skynet::CONFIG[:LAUNCHER_PATH]
20
+
21
+ if ARGV.detect {|a| a == 'console' }
22
+ ARGV.delete('console')
23
+ Skynet::Console.start
24
+ elsif options[:worker_type] or ARGV.detect {|a| a =~ /worker_type/ }
25
+ Skynet::Worker.start(options)
26
+ else
27
+ if ARGV.include?('stop')
28
+ Skynet::Manager.stop(options)
29
+ else
30
+ options["daemonize"] = true if ARGV.include?('start')
31
+ Skynet::Manager.start(options)
32
+ end
33
+ end
34
+ end
35
+
36
+ def self.new(options={})
37
+ warn("Skynet.new is deprecated, please use Skynet.start instead")
38
+ start(options)
39
+ end
40
+ end
@@ -0,0 +1,62 @@
1
+ # http://darwinweb.net/article/Undoing_Rails_Monkey_Patch_To_Logger
2
+
3
+ require 'logger'
4
+
5
+ class Skynet
6
+
7
+ class Error < StandardError
8
+ end
9
+
10
+ class Logger < ::Logger
11
+ if respond_to?(:format_message)
12
+ alias format_message old_format_message
13
+ end
14
+
15
+ @@log = nil
16
+
17
+ def self.get
18
+ if not @@log
19
+ @@log = self.new(Skynet::Config.new.logfile_location)
20
+ @@log.level = Skynet::CONFIG[:SKYNET_LOG_LEVEL]
21
+ end
22
+ @@log
23
+ end
24
+
25
+ def self.log=(log)
26
+ @@log = log
27
+ end
28
+
29
+ def printlog(*args)
30
+ self.class.get.unknown(*args)
31
+ end
32
+
33
+ end
34
+
35
+
36
+ # This module can be mixed in to add logging methods to your class.
37
+ module Loggable
38
+ def debug
39
+ log = Skynet::Logger.get
40
+ end
41
+
42
+ def info
43
+ log = Skynet::Logger.get
44
+ end
45
+
46
+ def warn
47
+ log = Skynet::Logger.get
48
+ end
49
+
50
+ def error
51
+ log = Skynet::Logger.get
52
+ end
53
+
54
+ def fatal
55
+ log = Skynet::Logger.get
56
+ end
57
+
58
+ def unknown
59
+ log = Skynet::Logger.get
60
+ end
61
+ end
62
+ end
@@ -0,0 +1,706 @@
1
+ require 'yaml'
2
+
3
+ class Skynet
4
+ class Manager
5
+
6
+ class Error < StandardError
7
+ end
8
+
9
+ include SkynetDebugger
10
+
11
+ Skynet::CONFIG[:PERCENTAGE_OF_TASK_ONLY_WORKERS] ||= 0.7
12
+ Skynet::CONFIG[:PERCENTAGE_OF_MASTER_ONLY_WORKERS] ||= 0.2
13
+
14
+ def self.debug_class_desc
15
+ "MANAGER"
16
+ end
17
+
18
+ attr_accessor :required_libs, :queue_id
19
+ attr_reader :config, :worker_queue, :wqts
20
+
21
+ def initialize(options)
22
+ raise Error.new("You must provide a script path to Skynet::Manager.new.") unless options[:script_path]
23
+ @script_path = options[:script_path] || Skynet::CONFIG[:LAUNCHER_PATH]
24
+ # info "Skynet Launcher Path: [#{@script_path}]"
25
+ @workers_requested = options[:workers] || 4
26
+ @required_libs = options[:required_libs] || []
27
+ @queue_id = options[:queue_id] || 0
28
+ @number_of_workers = 0
29
+ @workers_by_type = {:master => [], :task => [], :any => []}
30
+ @signaled_workers = []
31
+ @worker_queue = {}
32
+ @workers_restarting = 0
33
+ @all_workers_started = false
34
+ @config = Skynet::Config.new
35
+ @mutex = Mutex.new
36
+ @wqts = Queue.new
37
+ end
38
+
39
+ def worker_notify(item)
40
+ @wqts.push(item)
41
+ end
42
+
43
+ def start_worker_queue_thread
44
+ Thread.new do
45
+ last_save_time = Time.now
46
+ loop do
47
+ task = @wqts.pop
48
+ begin
49
+ status = Skynet::WorkerStatusMessage.new(task)
50
+ status.started_at = status.started_at.to_i
51
+ @mutex.synchronize do
52
+ @worker_queue[status.worker_id] = status
53
+ end
54
+ if last_save_time < Time.now - 60
55
+ save_worker_queue_to_file
56
+ last_save_time = Time.now
57
+ end
58
+ rescue Exception => e
59
+ error "Error in worker queue thread #{e.inspect} #{e.backtrace.join("\n")}"
60
+ end
61
+ end
62
+ end
63
+ end
64
+
65
+ def start_workers
66
+ load_worker_queue_from_file
67
+ start_worker_queue_thread
68
+
69
+ setup_signals
70
+
71
+ starting = workers_to_start(@workers_requested)
72
+ warn "Starting #{starting} workers. QUEUE: #{config.queue_name_by_id(queue_id)} #{@workers_requested - starting} already running."
73
+ add_worker(starting)
74
+ end
75
+
76
+ ### maybe workers_to_start should be a method
77
+ def workers_to_start(workers_to_start)
78
+ if not worker_pids.empty?
79
+ worker_pids.each do |worker_pid|
80
+ if worker_alive?(worker_pid)
81
+ @number_of_workers += 1
82
+ workers_to_start -= 1
83
+ else
84
+ mark_worker_as_stopped(worker_pid)
85
+ end
86
+ return 0 if workers_to_start < 1
87
+ end
88
+ end
89
+ return workers_to_start
90
+ end
91
+
92
+ def check_started_workers
93
+ begin
94
+ 100.times do |ii|
95
+ warn "Checking started workers, #{active_workers.size} out of #{@number_of_workers} after the #{(ii+1)}th try..."
96
+ break if active_workers.size >= @number_of_workers
97
+ sleep (@number_of_workers - active_workers.size)
98
+ end
99
+ rescue Exception => e
100
+ fatal "Something bad happened #{e.inspect} #{e.backtrace.join("\n")}"
101
+ end
102
+
103
+ @all_workers_started = true
104
+
105
+ printlog "FINISHED STARTING ALL #{active_workers.size} WORKERS"
106
+ if active_workers.size > @number_of_workers
107
+ warn "EXPECTED #{@number_of_workers}"
108
+ @number_of_workers = active_workers.size
109
+ end
110
+ end
111
+
112
+ # the main application loop
113
+ def run
114
+ loop do
115
+ next unless @all_workers_started
116
+ begin
117
+ check_workers
118
+ sleep Skynet::CONFIG[:WORKER_CHECK_DELAY]
119
+ rescue SystemExit, Interrupt => e
120
+ printlog "Manager Exiting!"
121
+ exit
122
+ rescue Exception => e
123
+ fatal "Something bad happened #{e.inspect} #{e.backtrace.join("\n")}"
124
+ end
125
+ end
126
+ end
127
+
128
+ def check_workers
129
+ debug "Checking on #{@number_of_workers} workers..." unless @shutdown
130
+ check_running_pids
131
+ check_number_of_workers
132
+ true
133
+ end
134
+
135
+ def check_running_pids
136
+ worker_pids.each do |wpid|
137
+ if not worker_alive?(wpid)
138
+ if @shutdown
139
+ info "Worker #{wpid} shut down gracefully. Removing from queue."
140
+ else
141
+ error "Worker #{wpid} was in queue and but was not running. Removing from queue."
142
+ end
143
+ mark_worker_as_stopped(wpid)
144
+ @number_of_workers -= 1
145
+ end
146
+ end
147
+ worker_pids
148
+ end
149
+
150
+ def check_number_of_workers
151
+ if @shutdown
152
+ worker_shutdown
153
+ if worker_pids.size < 1
154
+ exit
155
+ end
156
+ elsif @workers_restarting > 0
157
+ if @workers_requested - worker_pids.size != 0
158
+ restarting = @workers_requested - worker_pids.size
159
+ warn "RESTART MODE: Expected #{@number_of_workers} workers. #{worker_pids.size} running. #{restarting} are still restarting"
160
+ else
161
+ warn "RESTART MODE: Expected #{@number_of_workers} workers. #{worker_pids.size} running."
162
+ end
163
+ @workers_restarting = @workers_requested - worker_pids.size
164
+
165
+ elsif worker_pids.size != @number_of_workers
166
+ starting = 0
167
+ if worker_pids.size.to_f / @workers_requested.to_f < 0.85
168
+ starting = @workers_requested - worker_pids.size
169
+ error "Expected #{@number_of_workers} workers. #{worker_pids.size} running. Starting #{starting}"
170
+ @number_of_workers = worker_pids.size
171
+ add_worker(starting)
172
+ else
173
+
174
+ error "Expected #{@number_of_workers} workers. #{worker_pids.size} running."
175
+ @number_of_workers = worker_pids.size
176
+ end
177
+ end
178
+ end
179
+
180
+ def worker_shutdown
181
+ if not @masters_dead
182
+ workers_to_kill = active_workers.select do |w|
183
+ w.map_or_reduce == "master" and active_workers.detect{|status| status.process_id == w.process_id and worker_alive?(w.process_id)}
184
+ end
185
+ warn "Shutting down masters. #{worker_pids.size} workers still running." if worker_pids.size > 0
186
+
187
+ worker_pids_to_kill = workers_to_kill.collect { |w| w.process_id }
188
+ if worker_pids_to_kill and not worker_pids_to_kill.empty?
189
+ warn "FOUND MORE RUNNING MASTERS WE HAVEN'T KILLED:", worker_pids_to_kill
190
+ remove_worker(worker_pids_to_kill)
191
+ end
192
+
193
+ if not active_workers.detect { |w| w.map_or_reduce == "master" }
194
+ signal_workers("TERM")
195
+ @masters_dead = true
196
+ else
197
+ return check_number_of_workers
198
+ end
199
+ end
200
+ if worker_pids.size < 1
201
+ info "No more workers running."
202
+ else
203
+ warn "Shutting down. #{worker_pids.size} workers still running." if worker_pids.size > 0
204
+ end
205
+ end
206
+
207
+ def worker_alive?(worker_pid)
208
+ Skynet.process_alive?(worker_pid)
209
+ end
210
+
211
+ def add_workers(*args)
212
+ add_worker(*args)
213
+ end
214
+
215
+ def add_worker(workers=1)
216
+ num_task_only_workers = (workers * Skynet::CONFIG[:PERCENTAGE_OF_TASK_ONLY_WORKERS]).to_i
217
+ num_master_only_workers = (workers * Skynet::CONFIG[:PERCENTAGE_OF_MASTER_ONLY_WORKERS]).to_i
218
+ warn "Adding #{workers} WORKERS. Task Workers: #{num_task_only_workers}, Master Workers: #{num_master_only_workers} Master & Task Workers: #{workers - num_task_only_workers - num_master_only_workers}"
219
+
220
+ @all_workers_started = false
221
+ worker_types = {:task => 0, :master => 0, :any => 0}
222
+ (1..workers).collect do |ii|
223
+ worker_type = :any
224
+ if (ii <= num_master_only_workers)
225
+ worker_type = :master
226
+ worker_types[:master] += 1
227
+ elsif (ii > num_master_only_workers and ii <= num_master_only_workers + num_task_only_workers)
228
+ worker_type = :task
229
+ worker_types[:task] += 1
230
+ else
231
+ worker_types[:any] += 1
232
+ end
233
+ cmd = "#{@script_path} --worker_type=#{worker_type}"
234
+ cmd << " --config='#{Skynet::CONFIG[:CONFIG_FILE]}'" if Skynet::CONFIG[:CONFIG_FILE]
235
+ cmd << " --queue_id=#{queue_id}"
236
+ cmd << " -r #{required_libs.join(' -r ')}" if required_libs and not required_libs.empty?
237
+ wpid = Skynet.fork_and_exec(cmd)
238
+ Skynet.close_console
239
+ @workers_by_type[worker_type] ||= []
240
+ @workers_by_type[worker_type] << wpid
241
+ warn "Adding Worker ##{ii} PID: #{wpid} QUEUE: #{queue_id}, WORKER_TYPE?:#{worker_type}"
242
+ @mutex.synchronize do
243
+ @number_of_workers += 1
244
+ end
245
+ sleep 0.01
246
+ wpid
247
+ end
248
+ info "Worker Distribution", worker_types
249
+ check_started_workers
250
+ end
251
+
252
+ def remove_workers(workers=1)
253
+ pids = worker_pids[0...workers]
254
+ remove_worker(pids)
255
+ end
256
+
257
+ def remove_worker(pids = nil)
258
+ pids = [pids] unless pids.kind_of?(Array)
259
+ info "Removing workers #{pids.join(",")} from worker queue. They will die gracefully when they finish what they're doing."
260
+ pids.collect do |wpid|
261
+ Process.kill("INT",wpid)
262
+ mark_worker_as_stopped(wpid)
263
+ @number_of_workers -= 1
264
+ warn "REMOVING WORKER #{wpid}"
265
+ @signaled_workers << wpid
266
+ end
267
+ pids
268
+ end
269
+
270
+ def mark_worker_as_stopped(wpid)
271
+ worker = @worker_queue.values.detect {|status| status.process_id == wpid}
272
+ if worker and not worker_alive?(wpid)
273
+ @worker_queue.delete_if {|worker_id, status| status.process_id == wpid }
274
+ worker_pids.delete(worker.process_id)
275
+ worker.started_at = Time.now.to_f
276
+ worker.process_id = nil
277
+ end
278
+ end
279
+
280
+ def signal_workers(signal,worker_type=[])
281
+ worker_types = [worker_type].flatten
282
+ active_workers.each do |worker|
283
+ worker_types.each do |worker_type|
284
+ if worker_type == :idle
285
+ next if worker_type and worker.task_id
286
+ else
287
+ next if worker_type and not @workers_by_type[worker_type].include?(worker.process_id)
288
+ end
289
+ end
290
+ warn "SHUTTING DOWN #{worker.process_id} MR: #{worker.map_or_reduce} SIG: #{signal}"
291
+ begin
292
+ Process.kill(signal,worker.process_id)
293
+ rescue Errno::ESRCH
294
+ warn "Tried to kill a process that didn't exist #{worker.process_id}"
295
+ end
296
+ # mark_worker_as_stopped(worker.process_id)
297
+ @signaled_workers << worker.process_id
298
+ end
299
+ end
300
+
301
+ def hard_restart_workers
302
+ @all_workers_started = false
303
+ signal_workers("TERM")
304
+ @restart = true
305
+ signal_workers("INT",:master)
306
+ signal_workers("INT",:any)
307
+ sleep @number_of_workers
308
+ check_started_workers
309
+ end
310
+
311
+ # ===========================
312
+ # = XXX THIS IS A HORRIBLE HACK =
313
+ # ===========================
314
+ def restart_worker(wpid)
315
+ info "RESTARTING WORKER #{wpid}"
316
+ @mutex.synchronize do
317
+ Process.kill("HUP",wpid)
318
+ mark_worker_as_stopped(wpid)
319
+ @workers_restarting += 1
320
+ end
321
+ sleep Skynet::CONFIG[:WORKER_CHECK_DELAY]
322
+ end
323
+
324
+ def restart_workers
325
+ @all_workers_started = false
326
+ signal_workers("HUP")
327
+ sleep @number_of_workers
328
+ check_started_workers
329
+ end
330
+
331
+ def setup_signals
332
+ Signal.trap("HUP") do
333
+ restart_workers
334
+ end
335
+ Signal.trap("TERM") do
336
+ if @term
337
+ terminate
338
+ else
339
+ @term=true
340
+ shutdown
341
+ end
342
+ end
343
+
344
+ Signal.trap("INT") do
345
+ if @shutdown
346
+ terminate
347
+ else
348
+ shutdown
349
+ end
350
+ end
351
+ end
352
+
353
+ def shutdown
354
+ info(:shutdown)
355
+ @shutdown = true
356
+ signal_workers("TERM",[:idle,:master,:any])
357
+ end
358
+
359
+ def terminate
360
+ info(:terminate)
361
+ signal_workers("KILL")
362
+ sleep 1
363
+ exit
364
+ end
365
+
366
+ def save_worker_queue_to_file
367
+ debug "Writing worker queue to file #{Skynet.config.manager_statfile_location}"
368
+ File.open(Skynet.config.manager_statfile_location,"w") do |f|
369
+ f.write(YAML.dump(@worker_queue))
370
+ end
371
+ end
372
+
373
+ def load_worker_queue_from_file
374
+ if File.exists?(Skynet.config.manager_statfile_location)
375
+ File.open(Skynet.config.manager_statfile_location,"r") do |f|
376
+ begin
377
+ @worker_queue = YAML.load(f.read)
378
+ raise Error.new("Bad Manager File returned type #{@worker_queue.class}") unless @worker_queue.is_a?(Hash)
379
+ rescue Exception => e
380
+ error "Error loading manager stats file: #{f}", e
381
+ @worker_queue = {}
382
+ save_worker_queue_to_file
383
+ end
384
+ end
385
+ end
386
+ end
387
+
388
+ def prune_inactive_worker_stats
389
+ @worker_queue.delete_if{|worker_id, worker| !worker.process_id.is_a?(Fixnum) }
390
+ stats
391
+ end
392
+
393
+ def self.stats_for_hosts(manager_hosts=nil)
394
+ manager_hosts ||= Skynet::CONFIG[:MANAGER_HOSTS] || ["localhost"]
395
+ stats = {
396
+ :servers => {},
397
+ :processed => 0,
398
+ :number_of_workers => 0,
399
+ :active_workers => 0,
400
+ :idle_workers => 0,
401
+ :hosts => 0,
402
+ :masters => 0,
403
+ :taskworkers => 0,
404
+ :time => Time.now.to_f
405
+ }
406
+ servers = {}
407
+ manager_hosts.each do |manager_host|
408
+ begin
409
+ manager = DRbObject.new(nil,"druby://#{manager_host}:#{Skynet::CONFIG[:SKYNET_LOCAL_MANAGER_PORT]}")
410
+ manager_stats = manager.stats
411
+ servers[manager_host] = manager_stats
412
+ manager_stats.each do |key,value|
413
+ next unless value.is_a?(Fixnum)
414
+ stats[key] ||= 0
415
+ stats[key] += value
416
+ end
417
+ rescue DRb::DRbConnError, Errno::ECONNREFUSED => e
418
+ warn "Couldn't get stats from manager at druby://#{manager_host}:#{Skynet::CONFIG[:SKYNET_LOCAL_MANAGER_PORT]}"
419
+ end
420
+ end
421
+ stats[:servers] = servers
422
+ stats[:hosts] = manager_hosts
423
+ stats
424
+ end
425
+
426
+ def stats
427
+ started_times = @worker_queue.values.collect{|worker| worker.started_at }.sort
428
+ active_started_times = active_workers.collect{|worker|worker.started_at }.sort
429
+ stats = {
430
+ :hostname => hostname,
431
+ :earliest_update => started_times.first,
432
+ :latest_update => started_times.last,
433
+ :active_earliest_update => active_started_times.first,
434
+ :active_latest_update => active_started_times.last,
435
+ :processed => 0,
436
+ :processed_by_active_workers => 0,
437
+ :number_of_workers => 0,
438
+ :idle_workers => 0,
439
+ :shutdown_workers => 0,
440
+ }
441
+ @worker_queue.values.collect{|worker|stats[:processed] += worker.processed}
442
+ active_workers.collect{|worker|stats[:processed_by_active_workers] += worker.processed}
443
+ currently_active_workers, idle_workers = active_workers.partition{|worker| worker.map_or_reduce }
444
+ stats[:number_of_workers] = active_workers.size
445
+ stats[:active_workers] = currently_active_workers.size
446
+ stats[:idle_workers] = idle_workers.size
447
+ stats[:shutdown_workers] = inactive_workers.size
448
+ stats[:masters] = active_workers.select{|worker|worker.tasktype.to_s == "master"}.size
449
+ stats[:master_or_task_workers] = active_workers.select{|worker|worker.tasktype.to_s == "any"}.size
450
+ stats[:taskworkers] = active_workers.select{|worker|worker.tasktype.to_s == "task"}.size
451
+ stats[:active_masters] = currently_active_workers.select{|worker|worker.tasktype.to_s == "master"}.size
452
+ stats[:active_master_or_task_workers] = currently_active_workers.select{|worker|worker.tasktype.to_s == "any"}.size
453
+ stats[:active_taskworkers] = currently_active_workers.select{|worker|worker.tasktype.to_s == "task"}.size
454
+ stats[:idle_masters] = idle_workers.select{|worker|worker.tasktype.to_s == "master"}.size
455
+ stats[:idle_master_or_task_workers] = idle_workers.select{|worker|worker.tasktype.to_s == "any"}.size
456
+ stats[:idle_taskworkers] = idle_workers.select{|worker|worker.tasktype.to_s == "task"}.size
457
+ stats
458
+ end
459
+
460
+ def active_workers
461
+ @worker_queue.values.select{|status| status.process_id.is_a?(Fixnum) }
462
+ end
463
+
464
+ def inactive_workers
465
+ @worker_queue.values.select{|status| !status.process_id.is_a?(Fixnum) }
466
+ end
467
+
468
+ def worker_pids
469
+ active_workers.collect {|w| w.process_id}
470
+ end
471
+
472
+ def parent_pid
473
+ $$
474
+ end
475
+
476
+ def hostname
477
+ @machine_name ||= Socket.gethostname
478
+ end
479
+
480
+ def ping
481
+ true
482
+ end
483
+
484
+ def self.local_manager_uri
485
+ "druby://localhost:#{Skynet::CONFIG[:SKYNET_LOCAL_MANAGER_PORT]}"
486
+ end
487
+
488
+ def self.get
489
+ DRbObject.new(nil,local_manager_uri)
490
+ end
491
+
492
+ def self.start(options={})
493
+ options[:add_workers] ||= nil
494
+ options[:remove_workers] ||= nil
495
+ options[:use_rails] ||= false
496
+ options[:required_libs] ||= []
497
+
498
+ config = Skynet::Config.new
499
+
500
+ OptionParser.new do |opt|
501
+ opt.banner = %{Usage:
502
+ > skynet [options]
503
+
504
+ OR to daemonize
505
+
506
+ > skynet [options] start
507
+ > skynet stop
508
+
509
+ You can also run:
510
+ > skynet console [options]
511
+ }
512
+ opt.on('--restart-all-workers', 'Restart All Workers') do |v|
513
+ puts "Restarting ALL workers on ALL machines."
514
+ begin
515
+ manager = self.get
516
+ manager.restart_all_workers
517
+ exit
518
+ rescue DRb::DRbConnError => e
519
+ puts "No manager running at #{local_manager_uri} ERROR: #{e.inspect}"
520
+ exit
521
+ end
522
+ end
523
+ opt.on('--restart-workers', 'Restart Workers') do |v|
524
+ puts "Restarting workers on this machine."
525
+ begin
526
+ manager = self.get
527
+ manager.restart_workers
528
+ exit
529
+ rescue DRb::DRbConnError => e
530
+ puts "No manager running at #{local_manager_uri} ERROR: #{e.inspect}"
531
+ exit
532
+ end
533
+ end
534
+ opt.on('--increment-worker-version', 'Increment Worker Version') do |v|
535
+ ver = Skynet::MessageQueue.new.increment_worker_version
536
+ puts "Incrementing Worker Version to #{ver}"
537
+ exit
538
+ end
539
+ opt.on('--add-workers=WORKERS', 'Number of workers to add.') do |v|
540
+ options[:add_workers] = v.to_i
541
+ end
542
+ opt.on('--remove-workers=WORKERS', 'Number of workers to remove.') do |v|
543
+ options[:remove_workers] = v.to_i
544
+ end
545
+ opt.on('--workers=WORKERS', 'Number of workers to start.') do |v|
546
+ options[:workers] = v.to_i
547
+ end
548
+ opt.on('-r', '--required LIBRARY', 'Require the specified libraries') do |v|
549
+ options[:required_libs] << File.expand_path(v)
550
+ end
551
+ opt.on('--config=CONFIG_FILE', 'Where to find the skynet.rb config file') do |v|
552
+ options[:config_file] = File.expand_path(v)
553
+ end
554
+ opt.on('--queue=QUEUE_NAME', 'Which queue should these workers use (default "default").') do |v|
555
+ options[:queue] = v
556
+ end
557
+ opt.on('--queue_id=queue_id', 'Which queue should these workers use (default 0).') do |v|
558
+ options[:queue_id] = v.to_i
559
+ end
560
+ opt.parse!(ARGV)
561
+ end
562
+ if options[:queue]
563
+ if options[:queue_id]
564
+ raise Skynet::Error.new("You may either provide a queue_id or a queue, but not both.")
565
+ end
566
+ options[:queue_id] = config.queue_id_by_name(options[:queue])
567
+ else
568
+ options[:queue_id] ||= 0
569
+ end
570
+
571
+ options[:required_libs].each do |adlib|
572
+ begin
573
+ require adlib
574
+ rescue MissingSourceFile => e
575
+ error "The included lib #{adlib} was not found: #{e.inspect}"
576
+ exit
577
+ end
578
+ end
579
+
580
+ options[:config_file] ||= Skynet::CONFIG[:CONFIG_FILE]
581
+ if options[:config_file]
582
+ begin
583
+ require options[:config_file]
584
+ rescue MissingSourceFile => e
585
+ error "The config file at #{options[:config_file]} was not found: #{e.inspect}"
586
+ exit
587
+ end
588
+ elsif Skynet::CONFIG[:SYSTEM_RUNNER]
589
+ error "Config file missing. Please add a config/skynet_config.rb before starting."
590
+ end
591
+
592
+ options[:workers] ||= Skynet::CONFIG[:NUMBER_OF_WORKERS] || 4
593
+ options[:pid_file] ||= Skynet::Config.pidfile_location
594
+ options[:script_path] ||= Skynet::CONFIG[:LAUNCHER_PATH]
595
+
596
+ # Handle add or remove workers
597
+ if options[:add_workers] or options[:remove_workers]
598
+ begin
599
+ manager = self.get
600
+ if options[:add_workers]
601
+ pids = manager.add_worker(options[:add_workers])
602
+ warn "ADDING #{options[:add_workers]} workers PIDS: #{pids.inspect}"
603
+ elsif options[:remove_workers]
604
+ pids = manager.remove_workers(options[:remove_workers])
605
+ warn "REMOVING #{options[:remove_workers]} workers PIDS: #{pids.inspect}"
606
+ end
607
+ rescue DRb::DRbConnError => e
608
+ warn "Couldnt add or remove workers. There are probably no workers running. At least I couldn't find a skynet_manager around at #{local_manager_uri} #{e.inspect}"
609
+ rescue Exception => e
610
+ warn "Couldnt add or remove workers #{e.inspect} #{e.backtrace.join("\n")}"
611
+ end
612
+ exit
613
+
614
+ else
615
+
616
+ begin
617
+ debug "Making sure there's an available MessageQueue"
618
+ ts = Skynet::MessageQueue.new
619
+ rescue Skynet::ConnectionError => e
620
+ fatal "Couldn't get MessageQueue! #{e.message}"
621
+ raise Skynet::ConnectionError.new("ERROR! Couldn't get MessageQueue! #{e.message}")
622
+ end
623
+
624
+ debug "CONTINUING TO START : There IS an available MessageQueue", options
625
+
626
+ begin
627
+ if oldpid = read_pid_file
628
+ errmsg = nil
629
+ if Skynet.process_alive?(oldpid)
630
+ errmsg = "Another Skynet Manager is running at pid: #{oldpid}"
631
+ warn errmsg
632
+ stderr errmsg
633
+ exit
634
+ else
635
+ errmsg = "Deleting stale pidfile #{Skynet::Config.pidfile_location}"
636
+ warn errmsg
637
+ stderr errmsg
638
+ File.unlink(Skynet::Config.pidfile_location) if File.exist?(Skynet::Config.pidfile_location)
639
+ end
640
+ end
641
+
642
+ printlog "STARTING THE MANAGER!!!!!!!!!!! port: #{Skynet::CONFIG[:SKYNET_LOCAL_MANAGER_PORT]}"
643
+ puts "Starting Skynet..."
644
+ printlog "Skynet Stopped"
645
+ if options["daemonize"]
646
+ Skynet.safefork do
647
+ sess_id = Process.setsid
648
+ write_pid_file
649
+ Skynet.close_console
650
+ run_manager(options)
651
+ exit!
652
+ end
653
+ else
654
+ write_pid_file
655
+ run_manager(options)
656
+ end
657
+ rescue SystemExit, Interrupt
658
+ rescue Exception => e
659
+ fatal("Error in Manager. Manager Dying. #{e.inspect} #{e.backtrace}")
660
+ end
661
+ end
662
+ end
663
+
664
+ def self.run_manager(options)
665
+ @manager = Skynet::Manager.new(options)
666
+ @drb_manager = DRb.start_service("druby://:#{Skynet::CONFIG[:SKYNET_LOCAL_MANAGER_PORT]}", @manager)
667
+ @manager.start_workers
668
+ info "MANAGER STARTED ON PORT: #{Skynet::CONFIG[:SKYNET_LOCAL_MANAGER_PORT]}"
669
+ @manager.run
670
+ end
671
+
672
+ # stop the daemon, nicely at first, and then forcefully if necessary
673
+ def self.stop(options = {})
674
+ pid = read_pid_file
675
+ if not pid
676
+ puts "The Skynet Manager is not running. No PID found in #{Skynet::Config.pidfile_location}"
677
+ exit
678
+ end
679
+ $stdout.puts "Stopping Skynet"
680
+ printlog "Stopping Skynet"
681
+ Process.kill("TERM", pid)
682
+ 180.times { Process.kill(0, pid); sleep(1) }
683
+ Process.kill("TERM", pid)
684
+ 180.times { Process.kill(0, pid); sleep(1) }
685
+ $stdout.puts("using kill -9 #{pid}")
686
+ Process.kill("KILL", pid)
687
+ rescue Errno::ESRCH => e
688
+ printlog "Skynet Stopped"
689
+ ensure
690
+ File.unlink(Skynet::Config.pidfile_location) if File.exist?(Skynet::Config.pidfile_location)
691
+ end
692
+
693
+ def self.read_pid_file
694
+ pidfile = Skynet::Config.pidfile_location
695
+ File.read(pidfile).to_i if File.exist?(pidfile)
696
+ end
697
+
698
+ def self.write_pid_file
699
+ pidfile = Skynet::Config.pidfile_location
700
+ info "Writing PIDFILE to #{pidfile}"
701
+ open(pidfile, "w") {|f| f << Process.pid << "\n"}
702
+ at_exit { File.unlink(pidfile) if read_pid_file == Process.pid }
703
+ end
704
+
705
+ end
706
+ end