timocratic-skynet 0.9.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (145) hide show
  1. data/History.txt +152 -0
  2. data/License.txt +20 -0
  3. data/Manifest.txt +144 -0
  4. data/README.txt +178 -0
  5. data/Rakefile +5 -0
  6. data/app_generators/skynet_install/USAGE +5 -0
  7. data/app_generators/skynet_install/skynet_install_generator.rb +94 -0
  8. data/app_generators/skynet_install/templates/migration.rb +43 -0
  9. data/app_generators/skynet_install/templates/skynet_config.rb +50 -0
  10. data/app_generators/skynet_install/templates/skynet_initializer.rb +1 -0
  11. data/app_generators/skynet_install/templates/skynet_mysql_schema.sql +33 -0
  12. data/bin/skynet +71 -0
  13. data/bin/skynet_install +36 -0
  14. data/bin/skynet_tuplespace_server +74 -0
  15. data/config/hoe.rb +75 -0
  16. data/config/requirements.rb +17 -0
  17. data/examples/dgrep/README +70 -0
  18. data/examples/dgrep/config/skynet_config.rb +26 -0
  19. data/examples/dgrep/data/shakespeare/README +2 -0
  20. data/examples/dgrep/data/shakespeare/poetry/loverscomplaint +381 -0
  21. data/examples/dgrep/data/shakespeare/poetry/rapeoflucrece +2199 -0
  22. data/examples/dgrep/data/shakespeare/poetry/sonnets +2633 -0
  23. data/examples/dgrep/data/shakespeare/poetry/various +640 -0
  24. data/examples/dgrep/data/shakespeare/poetry/venusandadonis +1423 -0
  25. data/examples/dgrep/data/testfile1.txt +1 -0
  26. data/examples/dgrep/data/testfile2.txt +1 -0
  27. data/examples/dgrep/data/testfile3.txt +1 -0
  28. data/examples/dgrep/data/testfile4.txt +1 -0
  29. data/examples/dgrep/lib/dgrep.rb +59 -0
  30. data/examples/dgrep/lib/mapreduce_test.rb +32 -0
  31. data/examples/dgrep/lib/most_common_words.rb +45 -0
  32. data/examples/dgrep/script/dgrep +75 -0
  33. data/examples/rails_mysql_example/README +66 -0
  34. data/examples/rails_mysql_example/Rakefile +10 -0
  35. data/examples/rails_mysql_example/app/controllers/application.rb +10 -0
  36. data/examples/rails_mysql_example/app/helpers/application_helper.rb +3 -0
  37. data/examples/rails_mysql_example/app/models/user.rb +21 -0
  38. data/examples/rails_mysql_example/app/models/user_favorite.rb +5 -0
  39. data/examples/rails_mysql_example/app/models/user_mailer.rb +12 -0
  40. data/examples/rails_mysql_example/app/views/user_mailer/welcome.erb +5 -0
  41. data/examples/rails_mysql_example/config/boot.rb +109 -0
  42. data/examples/rails_mysql_example/config/database.yml +42 -0
  43. data/examples/rails_mysql_example/config/environment.rb +59 -0
  44. data/examples/rails_mysql_example/config/environments/development.rb +18 -0
  45. data/examples/rails_mysql_example/config/environments/production.rb +19 -0
  46. data/examples/rails_mysql_example/config/environments/test.rb +22 -0
  47. data/examples/rails_mysql_example/config/initializers/inflections.rb +10 -0
  48. data/examples/rails_mysql_example/config/initializers/mime_types.rb +5 -0
  49. data/examples/rails_mysql_example/config/initializers/skynet.rb +1 -0
  50. data/examples/rails_mysql_example/config/routes.rb +35 -0
  51. data/examples/rails_mysql_example/config/skynet_config.rb +36 -0
  52. data/examples/rails_mysql_example/db/migrate/001_create_skynet_tables.rb +43 -0
  53. data/examples/rails_mysql_example/db/migrate/002_create_users.rb +16 -0
  54. data/examples/rails_mysql_example/db/migrate/003_create_user_favorites.rb +14 -0
  55. data/examples/rails_mysql_example/db/schema.rb +85 -0
  56. data/examples/rails_mysql_example/db/skynet_mysql_schema.sql +33 -0
  57. data/examples/rails_mysql_example/doc/README_FOR_APP +2 -0
  58. data/examples/rails_mysql_example/lib/tasks/rails_mysql_example.rake +20 -0
  59. data/examples/rails_mysql_example/public/.htaccess +40 -0
  60. data/examples/rails_mysql_example/public/404.html +30 -0
  61. data/examples/rails_mysql_example/public/422.html +30 -0
  62. data/examples/rails_mysql_example/public/500.html +30 -0
  63. data/examples/rails_mysql_example/public/dispatch.cgi +10 -0
  64. data/examples/rails_mysql_example/public/dispatch.fcgi +24 -0
  65. data/examples/rails_mysql_example/public/dispatch.rb +10 -0
  66. data/examples/rails_mysql_example/public/favicon.ico +0 -0
  67. data/examples/rails_mysql_example/public/images/rails.png +0 -0
  68. data/examples/rails_mysql_example/public/index.html +277 -0
  69. data/examples/rails_mysql_example/public/javascripts/application.js +2 -0
  70. data/examples/rails_mysql_example/public/javascripts/controls.js +963 -0
  71. data/examples/rails_mysql_example/public/javascripts/dragdrop.js +972 -0
  72. data/examples/rails_mysql_example/public/javascripts/effects.js +1120 -0
  73. data/examples/rails_mysql_example/public/javascripts/prototype.js +4225 -0
  74. data/examples/rails_mysql_example/public/robots.txt +5 -0
  75. data/examples/rails_mysql_example/script/about +3 -0
  76. data/examples/rails_mysql_example/script/console +3 -0
  77. data/examples/rails_mysql_example/script/destroy +3 -0
  78. data/examples/rails_mysql_example/script/generate +3 -0
  79. data/examples/rails_mysql_example/script/performance/benchmarker +3 -0
  80. data/examples/rails_mysql_example/script/performance/profiler +3 -0
  81. data/examples/rails_mysql_example/script/performance/request +3 -0
  82. data/examples/rails_mysql_example/script/plugin +3 -0
  83. data/examples/rails_mysql_example/script/process/inspector +3 -0
  84. data/examples/rails_mysql_example/script/process/reaper +3 -0
  85. data/examples/rails_mysql_example/script/process/spawner +3 -0
  86. data/examples/rails_mysql_example/script/runner +3 -0
  87. data/examples/rails_mysql_example/script/server +3 -0
  88. data/examples/rails_mysql_example/test/fixtures/user_favorites.yml +9 -0
  89. data/examples/rails_mysql_example/test/fixtures/users.yml +11 -0
  90. data/examples/rails_mysql_example/test/test_helper.rb +38 -0
  91. data/examples/rails_mysql_example/test/unit/user_favorite_test.rb +8 -0
  92. data/examples/rails_mysql_example/test/unit/user_test.rb +8 -0
  93. data/extras/README +7 -0
  94. data/extras/init.d/skynet +87 -0
  95. data/extras/nagios/check_skynet.sh +121 -0
  96. data/extras/rails/controllers/skynet_controller.rb +43 -0
  97. data/extras/rails/views/skynet/index.rhtml +137 -0
  98. data/lib/skynet.rb +95 -0
  99. data/lib/skynet/mapreduce_helper.rb +74 -0
  100. data/lib/skynet/mapreduce_test.rb +56 -0
  101. data/lib/skynet/message_queue_adapters/message_queue_adapter.rb +70 -0
  102. data/lib/skynet/message_queue_adapters/mysql.rb +509 -0
  103. data/lib/skynet/message_queue_adapters/tuple_space.rb +316 -0
  104. data/lib/skynet/skynet_active_record_extensions.rb +280 -0
  105. data/lib/skynet/skynet_config.rb +232 -0
  106. data/lib/skynet/skynet_console.rb +50 -0
  107. data/lib/skynet/skynet_console_helper.rb +66 -0
  108. data/lib/skynet/skynet_debugger.rb +138 -0
  109. data/lib/skynet/skynet_guid_generator.rb +68 -0
  110. data/lib/skynet/skynet_job.rb +892 -0
  111. data/lib/skynet/skynet_launcher.rb +40 -0
  112. data/lib/skynet/skynet_logger.rb +62 -0
  113. data/lib/skynet/skynet_manager.rb +706 -0
  114. data/lib/skynet/skynet_message.rb +359 -0
  115. data/lib/skynet/skynet_message_queue.rb +136 -0
  116. data/lib/skynet/skynet_partitioners.rb +96 -0
  117. data/lib/skynet/skynet_ruby_extensions.rb +53 -0
  118. data/lib/skynet/skynet_task.rb +118 -0
  119. data/lib/skynet/skynet_tuplespace_server.rb +83 -0
  120. data/lib/skynet/skynet_worker.rb +451 -0
  121. data/lib/skynet/version.rb +9 -0
  122. data/script/destroy +14 -0
  123. data/script/generate +14 -0
  124. data/script/txt2html +74 -0
  125. data/setup.rb +1585 -0
  126. data/tasks/deployment.rake +34 -0
  127. data/tasks/environment.rake +7 -0
  128. data/tasks/website.rake +17 -0
  129. data/test/test_active_record_extensions.rb +138 -0
  130. data/test/test_generator_helper.rb +20 -0
  131. data/test/test_helper.rb +10 -0
  132. data/test/test_mysql_message_queue_adapter.rb +263 -0
  133. data/test/test_skynet.rb +19 -0
  134. data/test/test_skynet_install_generator.rb +49 -0
  135. data/test/test_skynet_job.rb +717 -0
  136. data/test/test_skynet_manager.rb +157 -0
  137. data/test/test_skynet_message.rb +229 -0
  138. data/test/test_skynet_task.rb +24 -0
  139. data/test/test_tuplespace_message_queue.rb +174 -0
  140. data/website/index.html +181 -0
  141. data/website/index.txt +98 -0
  142. data/website/javascripts/rounded_corners_lite.inc.js +285 -0
  143. data/website/stylesheets/screen.css +138 -0
  144. data/website/template.rhtml +48 -0
  145. metadata +247 -0
@@ -0,0 +1,96 @@
1
+ class Skynet
2
+ # Collection of partitioning utilities
3
+ class Partitioners
4
+ include SkynetDebugger
5
+
6
+ # Split one block of data into partitions
7
+ #
8
+ class SimplePartitionData < Partitioners
9
+
10
+ def self.reduce_partition(data, partitions)
11
+ partitioned_data = Array.new
12
+
13
+ # If data size is significantly greater than the number of desired
14
+ # partitions, we can divide the data roughly but the last partition
15
+ # may be smaller than the others.
16
+ #
17
+ return data if (not data) or data.empty?
18
+
19
+ if partitions >= data.length
20
+ data.each do |datum|
21
+ partitioned_data << [datum]
22
+ end
23
+ elsif (data.length >= partitions * 2)
24
+ # Use quicker but less "fair" method
25
+ size = data.length / partitions
26
+
27
+ if (data.length % partitions != 0)
28
+ size += 1 # Last slice of leftovers
29
+ end
30
+
31
+ (0..partitions - 1).each do |i|
32
+ partitioned_data[i] = data[i * size, size]
33
+ end
34
+ else
35
+ # Slower method, but partitions evenly
36
+ partitions = (data.size < partitions ? data.size : partitions)
37
+ (0..partitions - 1).each { |i| partitioned_data[i] = Array.new }
38
+
39
+ data.each_with_index do |datum, i|
40
+ partitioned_data[i % partitions] << datum
41
+ end
42
+ end
43
+
44
+ partitioned_data
45
+ end
46
+ end
47
+
48
+
49
+ class RecombineAndSplit < Partitioners
50
+ # Tries to be smart about what kind of data its getting, whether array of arrays or array of arrays of arrays.
51
+ #
52
+ def self.reduce_partition(post_map_data,new_partitions)
53
+ return post_map_data unless post_map_data.is_a?(Array) and (not post_map_data.empty?) and post_map_data.first.is_a?(Array)
54
+ ### Why did I do this? It breaks badly.
55
+ # if not post_map_data.first.first.is_a?(Array)
56
+ # partitioned_data = post_map_data.flatten
57
+ # else
58
+ partitioned_data = post_map_data.inject(Array.new) do |data,part|
59
+ data += part
60
+ end
61
+ # end
62
+ partitioned_data = Skynet::Partitioners::SimplePartitionData.reduce_partition(partitioned_data, new_partitions)
63
+ debug "POST PARTITIONED DATA_SIZE", partitioned_data.size
64
+ debug "POST PARTITIONED DATA", partitioned_data
65
+ partitioned_data
66
+ end
67
+ end
68
+
69
+ class ArrayDataSplitByFirstEntry < Partitioners
70
+ # Smarter partitioner for array data, generates simple sum of array[0]
71
+ # and ensures that all arrays sharing that key go into the same partition.
72
+ #
73
+ def self.reduce_partition(post_map_data, new_partitions)
74
+ partitions = []
75
+ (0..new_partitions - 1).each { |i| partitions[i] = Array.new }
76
+ cnt = 0
77
+ post_map_data.each do |partition|
78
+ partition.each do |array|
79
+ next unless array.is_a?(Array) and array.size >= 2
80
+ if array[0].kind_of?(Fixnum)
81
+ key = array[0] % new_partitions
82
+ elsif array[0].kind_of?(String)
83
+ key = array[0].sum % new_partitions
84
+ else
85
+ cnt += 1
86
+ key = cnt % new_partitions
87
+ end
88
+ partitions[key] << array
89
+ end
90
+ end
91
+ partitions
92
+ end
93
+ end
94
+
95
+ end
96
+ end
@@ -0,0 +1,53 @@
1
+ module Enumerable
2
+ def mapreduce(klass=nil,options={},&block)
3
+ data = []
4
+ if self.is_a?(Hash)
5
+ self.each {|k,v| data << {k => v}}
6
+ else
7
+ data = self
8
+ end
9
+ jobopts = {
10
+ :mappers => 20000,
11
+ :map_data => data,
12
+ :name => "#{klass} Enumerable MASTER",
13
+ :map_name => "#{klass} Enumerable MAP",
14
+ :reduce_name => "#{klass} Enumerable REDUCE",
15
+ :map_timeout => 3600,
16
+ :reduce_timeout => 3600,
17
+ :master_timeout => 3600,
18
+ :master_result_timeout => 3600
19
+ }
20
+
21
+ jobopts[:map_reduce_class] = klass.to_s if klass
22
+
23
+ options.each { |k,v| jobopts[k] = v }
24
+ if block_given?
25
+ jobopts[:map] = block
26
+ end
27
+
28
+ if block_given? or not jobopts[:async]
29
+ job = Skynet::Job.new(jobopts.merge(:local_master => true))
30
+ else
31
+ job = Skynet::AsyncJob.new(jobopts)
32
+ end
33
+ job.run
34
+ end
35
+ end
36
+
37
+ class String
38
+ ### THIS IS TAKEN DIRECTLY FROM ActiveSupport::Inflector
39
+ # Constantize tries to find a declared constant with the name specified
40
+ # in the string. It raises a NameError when the name is not in CamelCase
41
+ # or is not initialized.
42
+ #
43
+ # Examples
44
+ # "Module".constantize #=> Module
45
+ # "Class".constantize #=> Class
46
+ def constantize
47
+ unless /\A(?:::)?([A-Z]\w*(?:::[A-Z]\w*)*)\z/ =~ self
48
+ raise NameError, "#{camel_cased_word.inspect} is not a valid constant name!"
49
+ end
50
+
51
+ Object.module_eval("::#{$1}", __FILE__, __LINE__)
52
+ end
53
+ end
@@ -0,0 +1,118 @@
1
+ class Skynet
2
+ class Task
3
+ include SkynetDebugger
4
+
5
+ class ConstructorError < StandardError; end
6
+ class TimeoutError < StandardError; end
7
+
8
+ attr_reader :data, :process, :result, :map_or_reduce, :marshalable
9
+ attr_accessor :name, :result_timeout, :retry
10
+
11
+ @@log = nil
12
+
13
+ def self.debug_class_desc
14
+ "TASK"
15
+ end
16
+
17
+ def self.master_task(job)
18
+ options = {
19
+ :async => false,
20
+ :local_master => true,
21
+ :map_name => job.map_name || job.name,
22
+ :reduce_name => job.reduce_name || job.name,
23
+ }
24
+ Skynet::Job::FIELDS.each do |field|
25
+ next if options.has_key?(field)
26
+ options[field] = job.send(field) if job.send(field)
27
+ end
28
+
29
+ master_job = Skynet::Job.new(options)
30
+
31
+ self.new(
32
+ :task_id => master_job.task_id,
33
+ :data => nil,
34
+ :process => master_job.to_h,
35
+ :map_or_reduce => :master,
36
+ :name => master_job.name,
37
+ :result_timeout => master_job.master_timeout,
38
+ :retry => master_job.master_retry || Skynet::CONFIG[:DEFAULT_MASTER_RETRY]
39
+ )
40
+ end
41
+
42
+ def initialize(opts = {})
43
+ unless opts[:task_id] and opts[:process] and opts[:map_or_reduce]
44
+ raise ConstructorError.new("Must provide task_id, process and map_or_reduce")
45
+ end
46
+ @marshalable = true
47
+ @task_id = opts[:task_id].to_i
48
+ @data = opts[:data]
49
+ self.process = opts[:process]
50
+ @name = opts[:name]
51
+ @map_or_reduce = opts[:map_or_reduce]
52
+ @result_timeout = opts[:result_timeout]
53
+ @retry = opts[:retry]
54
+ end
55
+
56
+ def process=(process)
57
+ if process.is_a?(Proc)
58
+ @marshalable = false
59
+ end
60
+ @process = process
61
+ end
62
+
63
+ def can_marshal?
64
+ @marshalable
65
+ end
66
+
67
+ def task_or_master
68
+ if @map_or_reduce == :master
69
+ @map_or_reduce
70
+ else
71
+ :task
72
+ end
73
+ end
74
+
75
+ def task_id
76
+ @task_id.to_i
77
+ end
78
+
79
+ def run(iteration=nil)
80
+ info "running task #{name} TIMEOUT: #{result_timeout} task_id:#{task_id} MorR:#{map_or_reduce} PROCESS CLASS: #{@process.class}"
81
+ begin
82
+ Timeout::timeout(@result_timeout) do
83
+ if @process.class == Proc
84
+ debug " - #{@map_or_reduce} using Proc"
85
+ @process.call @data
86
+ elsif @map_or_reduce == :master
87
+ debug " - as master"
88
+ job = Skynet::Job.new(@process)
89
+ job.run
90
+ elsif @process.class == String
91
+ debug " - #{@map_or_reduce} using class #{@process}"
92
+ @process.constantize.send(@map_or_reduce,@data)
93
+ end
94
+ end
95
+ rescue Timeout::Error => e
96
+ # ==========
97
+ # = XXX NEWSFEED HACK
98
+ # = I'm printing the data hash, but that hash has all this shit added to it after runing through newsfeed.
99
+ # = It's actually nice to be able to see what was added, but sometimes its too much data.
100
+ # = Though the handy part will be adding instrumentation to the event_hash and seeing it onyl during a timeout.
101
+ # ==========
102
+
103
+ if @data.is_a?(Array) and @data.first.is_a?(Hash)
104
+ @data.each {|h|h.delete(:event_object)}
105
+ end
106
+ raise TimeoutError.new("TASK TIMED OUT! #{name} IT:[#{iteration}] timeout:#{@result_timeout} #{e.inspect} DATA: #{@data.inspect} #{e.backtrace.join("\n")}")
107
+
108
+ # ==========
109
+ # = XXX This rescue block is probably not necessary. Just for debugging for now. =
110
+ # ==========
111
+ rescue Exception => e
112
+ error "Error running task #{e.inspect} TASK:", self, e.backtrace.join("\n")
113
+ raise e
114
+ end
115
+ end
116
+
117
+ end ## END class Task
118
+ end
@@ -0,0 +1,83 @@
1
+ #!/usr/bin/env ruby -w
2
+
3
+ require 'rinda/ring'
4
+ require 'rinda/tuplespace'
5
+ require 'rubygems'
6
+ require 'logger'
7
+ require 'optparse'
8
+ require 'pp'
9
+
10
+ class Rinda::TupleSpaceProxy
11
+ def take(tuple, sec=nil, &block)
12
+ port = []
13
+ port.push @ts.move(nil, tuple, sec, &block)
14
+ port[0]
15
+ end
16
+ end
17
+
18
+ class Rinda::Tuple
19
+
20
+ require 'ostruct'
21
+
22
+ def init_with_ary(ary)
23
+ if ary.instance_of?(DRb::DRbUnknown)
24
+ begin
25
+ Marshal.load(ary.buf)
26
+ rescue Exception => e
27
+ raise Rinda::RindaError.new("DRb couldn't marshall tuple of type #{ary.name}, it was turned into a DRb::DRbUnknown object.\nMarshal exception #{e.inspect}\nOriginal object:\n\t#{ary.buf}.\n\nStacktrace:\n")
28
+ end
29
+ else
30
+ @tuple = Array.new(ary.size)
31
+ @tuple.size.times do |i|
32
+ @tuple[i] = ary[i]
33
+ end
34
+ end
35
+ end
36
+ end
37
+
38
+ class Skynet
39
+ class Task
40
+ end
41
+ class Message
42
+ class Payload
43
+ end
44
+ end
45
+
46
+ class AsyncJob
47
+ end
48
+
49
+ class Job
50
+ end
51
+
52
+ class TuplespaceServer
53
+
54
+ def self.start(options)
55
+ options[:port] ||= 7647
56
+ log = Logger.new(options[:logfile])
57
+ log.level = Object.module_eval("#{"Logger::" + options[:loglevel].upcase}", __FILE__, __LINE__) if options[:loglevel]
58
+ log.info "STARTING TUPLESPACE SERVER ON PORT: #{options[:port]} Logging to #{options[:logfile]}"
59
+
60
+ # Create a TupleSpace to hold named services, and start running
61
+ ts = Rinda::TupleSpace.new
62
+ begin
63
+ if options[:use_ringserver] and options[:port]
64
+ DRb.start_service
65
+ tuple = [:name,:TupleSpace, ts, 'Tuple Space']
66
+ renewer = Rinda::SimpleRenewer.new
67
+ ring_ts = Rinda::TupleSpace.new
68
+ ring_ts.write(tuple, renewer)
69
+
70
+ server = Rinda::RingServer.new(ring_ts, options[:port])
71
+ end
72
+ if options[:drburi]
73
+ DRb.start_service(options[:drburi], ts)
74
+ end
75
+ DRb.thread.join
76
+ rescue SystemExit, Interrupt
77
+ rescue Exception, RuntimeError => e
78
+ log.fatal "Couldn't start Skynet Server #{e.inspect}"
79
+ end
80
+
81
+ end
82
+ end
83
+ end
@@ -0,0 +1,451 @@
1
+ class Skynet
2
+ class Worker
3
+
4
+ include SkynetDebugger
5
+ include Skynet::GuidGenerator
6
+
7
+ RETRY_TIME = 2
8
+ Skynet::CONFIG[:WORKER_VERSION_CHECK_DELAY] ||= 30
9
+
10
+ Skynet::CONFIG[:WORKER_MAX_MEMORY] ||= 500
11
+
12
+ MEMORY_CHECK_DELAY = 30
13
+ MANAGER_PING_INTERVAL = 60
14
+
15
+ attr_accessor :message,:task, :mq, :processed
16
+ attr_reader :worker_id, :worker_info, :worker_type, :queue_id
17
+
18
+ class Error < StandardError; end
19
+ class RespawnWorker < Skynet::Error; end
20
+ class ConnectionFailure < Skynet::Error; end
21
+ class NoManagerError < Skynet::Error; end
22
+
23
+ def self.debug_class_desc
24
+ "WORKER-#{$$}"
25
+ end
26
+
27
+ def initialize(worker_type, options = {})
28
+ @worker_id = get_unique_id(1).to_i
29
+ @worker_type = worker_type.to_sym
30
+ @queue_id = options[:queue_id] || 0
31
+ @processed = 0
32
+ @in_process = false
33
+ @mq = Skynet::MessageQueue.new
34
+
35
+ debug "THIS WORKER TAKES #{worker_type}"
36
+
37
+ @worker_info = {
38
+ :tasktype => worker_type,
39
+ :hostname => hostname,
40
+ :process_id => process_id,
41
+ :worker_type => payload_type,
42
+ :worker_id => worker_id,
43
+ :version => mq.get_worker_version,
44
+ }
45
+ @worker_info.merge!(options)
46
+ end
47
+
48
+ def process_id
49
+ $$
50
+ end
51
+
52
+ def hostname
53
+ @machine_name ||= Socket.gethostname
54
+ end
55
+
56
+ def version
57
+ @curver
58
+ end
59
+
60
+ def new_version_respawn?
61
+ if !@verchecktime
62
+ @verchecktime = Time.now
63
+ begin
64
+ @curver = mq.get_worker_version
65
+ debug "FINDING INITIAL VER #{@curver}"
66
+ rescue Skynet::RequestExpiredError => e
67
+ warn "NO INITIAL VER IN MQ using 1"
68
+ @curver = 1
69
+ end
70
+ else
71
+ if Time.now < (@verchecktime + Skynet::CONFIG[:WORKER_VERSION_CHECK_DELAY])
72
+ return false
73
+ else
74
+ @verchecktime = Time.now
75
+ begin
76
+ newver = mq.get_worker_version
77
+ # debug "CURVER #{@curver} NEWVER: #{newver}"
78
+ if newver != @curver and not mq.version_active?(@curver, queue_id)
79
+ info "RESTARTING WORKER ON PID #{$$}"
80
+ return true
81
+ end
82
+ rescue Skynet::RequestExpiredError => e
83
+ warn "NO CURRENT WORKER REV IN MQ still using 1"
84
+ mq.set_worker_version(1)
85
+ return false
86
+ end
87
+ end
88
+ end
89
+ return false
90
+ end
91
+
92
+ def notify_worker_started
93
+ write_worker_status(
94
+ @worker_info.merge({
95
+ :name => "waiting for #{@worker_type}",
96
+ :processed => 0,
97
+ :started_at => Time.now.to_i
98
+ })
99
+ )
100
+ end
101
+
102
+ def notify_task_begun(task)
103
+ task[:processed] = @processed
104
+ task[:started_at] = Time.now.to_i
105
+ @in_process = true
106
+ write_worker_status(@worker_info.merge(task))
107
+ end
108
+
109
+ def notify_task_complete
110
+ @processed += 1
111
+ @in_process = false
112
+
113
+ write_worker_status(
114
+ @worker_info.merge({
115
+ :task_id => 0,
116
+ :job_id => 0,
117
+ :name => "waiting for #{@worker_type}",
118
+ :processed => @processed,
119
+ :map_or_reduce => nil,
120
+ :started_at => Time.now.to_i
121
+ })
122
+ )
123
+ end
124
+
125
+ def notify_worker_stop
126
+ info "Worker #{process_id} stopping..."
127
+ write_worker_status(
128
+ @worker_info.merge({
129
+ :task_id => 0,
130
+ :job_id => 0,
131
+ :name => "waiting for #{@worker_type}",
132
+ :processed => @processed,
133
+ :process_id => nil,
134
+ :map_or_reduce => nil,
135
+ :started_at => Time.now.to_i
136
+ })
137
+ )
138
+ end
139
+
140
+ def manager_send(method,*args)
141
+ begin
142
+ manager.send(method,*args)
143
+ rescue DRb::DRbConnError, Errno::ECONNREFUSED => e
144
+ error "Worker could not connect to manager to call #{method} on manager #{e.inspect}"
145
+ rescue Exception => e
146
+ error "Worker could not connect call #{method} on manager #{e.inspect} args:", args
147
+ end
148
+ end
149
+
150
+ def write_worker_status(status)
151
+ manager_send(:worker_notify,status)
152
+ end
153
+
154
+ def manager
155
+ Skynet::Manager.get
156
+ end
157
+
158
+ def payload_type
159
+ return nil if worker_type == :any
160
+ return worker_type
161
+ end
162
+
163
+ def interrupt
164
+ if @die
165
+ exit
166
+ else
167
+ @die = true
168
+ if not @in_process
169
+ notify_worker_stop
170
+ exit
171
+ end
172
+ end
173
+ end
174
+
175
+ def start
176
+ exceptions = 0
177
+ conerror = 0
178
+ @curver = nil
179
+
180
+ # setup signal handlers for manager
181
+ Signal.trap("HUP") do
182
+ @respawn = true
183
+ raise Skynet::Worker::RespawnWorker.new if not @in_process
184
+ end
185
+ Signal.trap("TERM") { interrupt }
186
+ Signal.trap("INT") { interrupt }
187
+
188
+ raise Skynet::Worker::RespawnWorker.new if new_version_respawn?
189
+
190
+ printlog "STARTING WORKER @ VER:#{@curver} type:#{@worker_type} QUEUE_ID:#{queue_id}"
191
+
192
+ notify_worker_started
193
+
194
+ message = nil
195
+ task = nil
196
+
197
+ loop do
198
+ message = nil
199
+ begin
200
+ if Skynet::CONFIG[:WORKER_MAX_PROCESSED] and Skynet::CONFIG[:WORKER_MAX_PROCESSED] > 0 and @processed >= Skynet::CONFIG[:WORKER_MAX_PROCESSED]
201
+ raise Skynet::Worker::RespawnWorker.new("WORKER OVER MAX MEM AT: #{get_memory_size} MAX: #{Skynet::CONFIG[:WORKER_MAX_MEMORY]}")
202
+ end
203
+ if @die
204
+ exit
205
+ elsif @respawn
206
+ raise Skynet::Worker::RespawnWorker.new()
207
+ end
208
+
209
+ if local_mem = max_memory_reached?
210
+ raise Skynet::Worker::RespawnWorker.new("WORKER OVER MAX MEM AT: #{local_mem} MAX: #{Skynet::CONFIG[:WORKER_MAX_MEMORY]}")
211
+ end
212
+
213
+ if conerror > 0
214
+ @mq = Skynet::MessageQueue.new
215
+ warn "WORKER RECONNECTED AFTER #{conerror} tries"
216
+ conerror = 0
217
+ end
218
+
219
+ # debug "1 START LOOPSSS at VER #{@curver}"
220
+ #
221
+ # debug "LOOK FOR WORK USING TEMPLATE", Skynet::Message.task_template(@curver)
222
+ # message = Skynet::Message.new(mq.take(Skynet::Message.task_template(@curver),0.00001))
223
+ message = mq.take_next_task(@curver, 0.00001, payload_type, queue_id)
224
+
225
+ next unless message.respond_to?(:payload)
226
+
227
+ task = message.payload
228
+ error "BAD MESSAGE", task unless task.respond_to?(:map_or_reduce)
229
+
230
+ info "STEP 2 GOT MESSAGE #{message.name} type:#{task.map_or_reduce}, jobid: #{message.job_id}, taskid:#{message.task_id} it: #{message.iteration}"
231
+ debug "STEP 2.1 message=", message.to_a
232
+ # info "STEP 3 GOT TASK taskid: #{task.task_id}"
233
+ # debug "STEP 3.1 task=", task
234
+ next unless task
235
+ # maybe instead of putting a time in the future, it puts the start time and an offset in seconds
236
+
237
+ # task.debug "taking task #{task.task_id} name:#{task.name}..."
238
+
239
+ info "STEP 4 RUNNING TASK #{message.name} jobid: #{message.job_id} taskid: #{task.task_id}"
240
+ notify_task_begun({
241
+ :job_id => message.job_id,
242
+ :task_id => message.task_id,
243
+ :iteration => message.iteration,
244
+ :name => message.name,
245
+ :map_or_reduce => task.map_or_reduce
246
+ })
247
+ result = task.run(message.iteration)
248
+
249
+ info "STEP 5 GOT RESULT FROM RUN TASK #{message.name} jobid: #{message.job_id} taskid: #{task.task_id}"
250
+ debug "STEP 5.1 RESULT DATA:", result
251
+
252
+ result_message = mq.write_result(message,result,task.result_timeout)
253
+ info "STEP 6 WROTE RESULT MESSAGE #{message.name} jobid: #{message.job_id} taskid: #{task.task_id}"
254
+ # debug "STEP 6.1 RESULT_MESSAGE:", result_message
255
+ notify_task_complete
256
+
257
+ rescue Skynet::Task::TimeoutError => e
258
+ error "Task timed out while executing #{e.inspect} #{e.backtrace.join("\n")}"
259
+ @in_process = false
260
+ next
261
+
262
+ rescue Skynet::Worker::RespawnWorker => e
263
+ info "Respawning and taking worker status #{e.message}"
264
+ notify_worker_stop
265
+ raise e
266
+
267
+ rescue Skynet::RequestExpiredError => e
268
+ if new_version_respawn?
269
+ notify_worker_stop
270
+ manager_send(:restart_worker,$$)
271
+ end
272
+ sleep 1
273
+ next
274
+
275
+ rescue Skynet::ConnectionError, DRb::DRbConnError => e
276
+ conerror += 1
277
+ retry_time = conerror > 6 ? RETRY_TIME * 3 : RETRY_TIME
278
+ error "#{e.message}, RETRY #{conerror} in #{retry_time} seconds !!"
279
+ @mq = nil
280
+ sleep retry_time
281
+ if conerror > 20
282
+ fatal "TOO MANY RECONNECTION EXCEPTIONS #{e.message}"
283
+ notify_worker_stop
284
+ raise e
285
+ end
286
+ next
287
+
288
+ rescue NoManagerError => e
289
+ fatal e.message
290
+ break
291
+ rescue Interrupt, SystemExit => e
292
+ info "Exiting..."
293
+ notify_worker_stop
294
+ break
295
+ rescue Exception => e
296
+ error "skynet_worker.rb:#{__LINE__} #{e.inspect} #{e.backtrace.join("\n")}"
297
+ exceptions += 1
298
+ break if exceptions > 1000
299
+ #mq.take(@next_worker_message.task_template,0.0005) if message
300
+ if message
301
+ mq.write_error(message,"#{e.inspect} #{e.backtrace.join("\n")}",(task.respond_to?(:result_timeout) ? task.result_timeout : 200))
302
+ else
303
+ # what do we do here
304
+ # mq.write_error(message,"ERROR in WORKER [#{$$}] #{e.inspect} #{e.backtrace.join("\n")}")
305
+ end
306
+ # mq.write_error("ERROR in WORKER [#{$$}] #{e.inspect} #{e.backtrace.join("\n")}")
307
+ @in_process = false
308
+ next
309
+ end
310
+ end
311
+ end
312
+
313
+ @@ok_to_mem_check = false
314
+ @@lastmem = nil
315
+ @@memct = 0
316
+
317
+ def max_memory_reached?
318
+ return false unless ok_to_mem_check?
319
+ if !@memchecktime
320
+ @memchecktime = Time.now
321
+ return false
322
+ elsif Time.now > (@memchecktime + MEMORY_CHECK_DELAY)
323
+ @memchecktime = Time.now
324
+ local_mem = get_memory_size.to_i
325
+ return local_mem if local_mem > Skynet::CONFIG[:WORKER_MAX_MEMORY]
326
+ else
327
+ false
328
+ end
329
+ end
330
+
331
+ def find_pid_size(file, format=:notpretty)
332
+ begin
333
+ open(file).each { |line|
334
+ if line.index('VmSize')
335
+ temp = line[7..-5].strip.to_f/1000
336
+ return BigDecimal(temp.to_s).truncate(5).to_s('F') if format == :pretty
337
+ return temp
338
+ end
339
+ }
340
+ rescue Exception => e
341
+ warn "ERROR #{e.inspect}"
342
+ '0'
343
+ end
344
+ end
345
+
346
+ def get_memory_size
347
+ find_pid_size("/proc/self/status")
348
+ end
349
+
350
+ def ok_to_mem_check?
351
+ return true if @@ok_to_mem_check == true
352
+ return false if @@ok_to_mem_check == :notok
353
+ if File.exists?('/proc/self/status')
354
+ @@lastmem ||= get_memory_size.to_i
355
+ return @@ok_to_mem_check = true
356
+ else
357
+ @@ok_to_mem_check = :notok
358
+ return false
359
+ end
360
+ end
361
+
362
+ def self.start(options={})
363
+ options[:worker_type] ||= :any
364
+ options[:required_libs] ||= []
365
+
366
+ OptionParser.new do |opt|
367
+ opt.banner = "Usage: worker [options]"
368
+ opt.on('-r', '--required LIBRARY', 'Include the specified libraries') do |v|
369
+ options[:required_libs] << v
370
+ end
371
+ opt.on('--worker_type=WORKERTYPE', "master, task or any") do |v|
372
+ if ["any","master","task"].include?(v)
373
+ options[:worker_type] = v
374
+ else
375
+ raise Skynet::Error.new("#{v} is not a valid worker_type")
376
+ end
377
+ end
378
+ opt.on('--config=CONFIG_FILE', 'Where to find the skynet.rb config file') do |v|
379
+ options[:config_file] = File.expand_path(v)
380
+ end
381
+ opt.on('--queue=QUEUE_NAME', 'Which queue should these workers use (default "default").') do |v|
382
+ options[:queue] = v
383
+ end
384
+ opt.on('--queue_id=queue_id', 'Which queue should these workers use (default 0).') do |v|
385
+ options[:queue_id] = v.to_i
386
+ end
387
+ opt.parse!(ARGV)
388
+ end
389
+
390
+ if options[:queue]
391
+ if options[:queue_id]
392
+ raise Skynet::Error.new("You may either provide a queue_id or a queue, but not both.")
393
+ end
394
+ options[:queue_id] = config.queue_id_by_name(options[:queue])
395
+ end
396
+
397
+ options[:required_libs].each do |adlib|
398
+ begin
399
+ require adlib
400
+ rescue MissingSourceFile => e
401
+ error "The included lib #{adlib} was not found: #{e.inspect}"
402
+ exit
403
+ end
404
+ end
405
+
406
+ options[:config_file] ||= Skynet::CONFIG[:CONFIG_FILE]
407
+ if options[:config_file]
408
+ begin
409
+ require options[:config_file]
410
+ rescue MissingSourceFile => e
411
+ error "The config file at #{options[:config_file]} was not found: #{e.inspect}"
412
+ exit
413
+ end
414
+ else
415
+ error "Config file missing. Please add a config/skynet_config.rb before starting."
416
+ exit
417
+ end
418
+
419
+ debug "WORKER STARTING WORKER_TYPE?:#{options[:worker_type]}. QUEUE: #{Skynet::Config.new.queue_name_by_id(options[:queue_id])}"
420
+
421
+ begin
422
+ worker = Skynet::Worker.new(options[:worker_type], options)
423
+ worker.start
424
+ rescue Skynet::Worker::NoManagerError => e
425
+ fatal e.message
426
+ exit
427
+ rescue Skynet::Worker::RespawnWorker => e
428
+ warn "WORKER #{$$} SCRIPT CAUGHT RESPAWN. RESTARTING #{e.message}"
429
+ cmd = "ruby #{Skynet::CONFIG[:LAUNCHER_PATH]} --worker_type=#{options[:worker_type]} --queue_id=#{options[:queue_id]} "
430
+ cmd << "--config=#{options[:config_file]} "
431
+ cmd << "-r #{options[:required_libs].join(' -r ')}" if options[:required_libs] and not options[:required_libs].empty?
432
+ pid = Skynet.fork_and_exec(cmd)
433
+ exit
434
+ rescue SystemExit
435
+ info "WORKER #{$$} EXITING GRACEFULLY"
436
+ rescue Exception => e
437
+ fatal "WORKER #{$$} DYING #{e.class} #{e.message} #{e.backtrace}"
438
+ report = ExceptionReport.new(e)
439
+ report.save
440
+ end
441
+ end
442
+ end
443
+ end
444
+
445
+ class ExceptionReport
446
+ def initialize(*args)
447
+ end
448
+
449
+ def save
450
+ end
451
+ end