simple_map_reduce 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/ISSUE_TEMPLATE.md +1 -0
- data/.github/PULL_REQUEST_TEMPLATE.md +1 -0
- data/README.md +3 -0
- data/lib/simple_map_reduce.rb +4 -0
- data/lib/simple_map_reduce/data_store_factory.rb +22 -0
- data/lib/simple_map_reduce/data_stores/default_data_store.rb +15 -0
- data/lib/simple_map_reduce/data_stores/remote_data_store.rb +42 -0
- data/lib/simple_map_reduce/server/job.rb +17 -2
- data/lib/simple_map_reduce/server/job_tracker.rb +30 -4
- data/lib/simple_map_reduce/server/job_worker.rb +40 -5
- data/lib/simple_map_reduce/server/worker.rb +18 -2
- data/lib/simple_map_reduce/version.rb +1 -1
- data/lib/simple_map_reduce/worker/polling_workers_status_worker.rb +50 -0
- data/lib/simple_map_reduce/worker/register_map_task_worker.rb +0 -3
- data/lib/simple_map_reduce/worker/run_map_task_worker.rb +28 -28
- data/lib/simple_map_reduce/worker/run_reduce_task_worker.rb +2 -6
- metadata +8 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f96ea39ae49bed4226271a6ab428e2eb7258cc921704fb747839b8fe2c096ae3
|
4
|
+
data.tar.gz: b3e60d6060546c708811b9b893007cd3ff1816e4b951015cf83f4294ca822351
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 16f03783a4eb7f7ed62987de371828d33debde02c8c0b23ec24c9c80e531f1a8053a1d6682542efd682e2f180237fb74e1ed29056d11eb6cba15b3af090cf4cc
|
7
|
+
data.tar.gz: 6f5e703dedf1c432bcc1e16a652ffa6713584d7d082b23b9253840e68ccdeef7167737ab8b51aef1c7c303743eaa501cd161b117d3d0672e8e9a76abf780471f
|
@@ -0,0 +1 @@
|
|
1
|
+
# What is this issue about ?
|
@@ -0,0 +1 @@
|
|
1
|
+
# What will this PR change ?
|
data/README.md
CHANGED
@@ -1,3 +1,6 @@
|
|
1
|
+
[](https://travis-ci.org/serihiro/simple_map_reduce)
|
2
|
+
[](https://badge.fury.io/rb/simple_map_reduce)
|
3
|
+
|
1
4
|
# SimpleMapReduce
|
2
5
|
|
3
6
|
- This is a [MapReduce](https://research.google.com/archive/mapreduce.html) implementation distributed framework written in ruby.
|
data/lib/simple_map_reduce.rb
CHANGED
@@ -19,6 +19,9 @@ end
|
|
19
19
|
|
20
20
|
require 'simple_map_reduce/version'
|
21
21
|
require 'simple_map_reduce/s3_client'
|
22
|
+
require 'simple_map_reduce/data_stores/default_data_store'
|
23
|
+
require 'simple_map_reduce/data_stores/remote_data_store'
|
24
|
+
require 'simple_map_reduce/data_store_factory'
|
22
25
|
require 'simple_map_reduce/driver/config'
|
23
26
|
require 'simple_map_reduce/driver/job'
|
24
27
|
require 'simple_map_reduce/server/confg'
|
@@ -30,3 +33,4 @@ require 'simple_map_reduce/server/job_worker'
|
|
30
33
|
require 'simple_map_reduce/worker/register_map_task_worker'
|
31
34
|
require 'simple_map_reduce/worker/run_map_task_worker'
|
32
35
|
require 'simple_map_reduce/worker/run_reduce_task_worker'
|
36
|
+
require 'simple_map_reduce/worker/polling_workers_status_worker'
|
@@ -0,0 +1,22 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module SimpleMapReduce
|
4
|
+
class DataStoreFactory
|
5
|
+
TYPES = %w(default remote).freeze
|
6
|
+
|
7
|
+
class << self
|
8
|
+
def create(data_store_type, options = {})
|
9
|
+
unless TYPES.include?(data_store_type)
|
10
|
+
raise ArgumentError, "Unsupported data_store_type: `#{data_store_type}`"
|
11
|
+
end
|
12
|
+
|
13
|
+
case data_store_type
|
14
|
+
when 'default'
|
15
|
+
SimpleMapReduce::DataStores::DefaultDataStore.new(options)
|
16
|
+
when 'remote'
|
17
|
+
SimpleMapReduce::DataStores::RemoteDataStore.new(options)
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module SimpleMapReduce
|
4
|
+
module DataStores
|
5
|
+
class RemoteDataStore
|
6
|
+
def initialize(options)
|
7
|
+
@resource_name = options[:resource_name]
|
8
|
+
@resource_id = options[:resource_id]
|
9
|
+
@server_url = options[:server_url]
|
10
|
+
end
|
11
|
+
|
12
|
+
def save_state(event)
|
13
|
+
http_client.put do |request|
|
14
|
+
request.url("/#{@resource_name}/#{@resource_id}")
|
15
|
+
request.body = { event: event }.to_json
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
private
|
20
|
+
|
21
|
+
HTTP_JSON_HEADER = {
|
22
|
+
'Accept' => 'application/json',
|
23
|
+
'Content-Type' => 'application/json'
|
24
|
+
}.freeze
|
25
|
+
|
26
|
+
def http_client
|
27
|
+
@http_client ||= ::Faraday.new(
|
28
|
+
url: @server_url,
|
29
|
+
headers: HTTP_JSON_HEADER,
|
30
|
+
request: {
|
31
|
+
open_timeout: 10,
|
32
|
+
timeout: 15
|
33
|
+
}
|
34
|
+
) do |faraday|
|
35
|
+
faraday.response :logger
|
36
|
+
faraday.response :raise_error
|
37
|
+
faraday.adapter Faraday.default_adapter
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
@@ -19,6 +19,8 @@ module SimpleMapReduce
|
|
19
19
|
alias state current_state
|
20
20
|
|
21
21
|
aasm do
|
22
|
+
before_all_events :save_state
|
23
|
+
|
22
24
|
state :ready, initial: true
|
23
25
|
state :in_process
|
24
26
|
state :succeeded
|
@@ -47,7 +49,8 @@ module SimpleMapReduce
|
|
47
49
|
job_output_bucket_name:,
|
48
50
|
job_output_directory_path:,
|
49
51
|
map_worker_url: nil,
|
50
|
-
map_worker: nil
|
52
|
+
map_worker: nil,
|
53
|
+
data_store_type: 'default')
|
51
54
|
|
52
55
|
@id = id
|
53
56
|
@map_script = map_script&.strip
|
@@ -62,6 +65,10 @@ module SimpleMapReduce
|
|
62
65
|
if @map_worker.nil? && map_worker_url
|
63
66
|
@map_worker = SimpleMapReduce::Server::Worker.new(url: map_worker_url)
|
64
67
|
end
|
68
|
+
@data_store = SimpleMapReduce::DataStoreFactory.create(data_store_type,
|
69
|
+
server_url: SimpleMapReduce.job_tracker_url,
|
70
|
+
resource_name: 'jobs',
|
71
|
+
resource_id: @id)
|
65
72
|
|
66
73
|
unless valid?
|
67
74
|
raise ArgumentError, 'invalid Job parameters are detected'
|
@@ -121,9 +128,17 @@ module SimpleMapReduce
|
|
121
128
|
|
122
129
|
class << self
|
123
130
|
def deserialize(data)
|
124
|
-
|
131
|
+
params = Hash[MessagePack.unpack(data).map { |k, v| [k.to_sym, v] }]
|
132
|
+
params[:data_store_type] = 'remote'
|
133
|
+
new(params)
|
125
134
|
end
|
126
135
|
end
|
136
|
+
|
137
|
+
private
|
138
|
+
|
139
|
+
def save_state
|
140
|
+
@data_store.save_state(aasm.current_event)
|
141
|
+
end
|
127
142
|
end
|
128
143
|
end
|
129
144
|
end
|
@@ -173,6 +173,7 @@ module SimpleMapReduce
|
|
173
173
|
check_s3_access
|
174
174
|
create_s3_buckets_if_not_existing
|
175
175
|
job_manager
|
176
|
+
start_polling_workers
|
176
177
|
logger.info('All setup process is done successfully. The job tracker is operation ready.')
|
177
178
|
logger.info("This job tracker url: #{SimpleMapReduce.job_tracker_url}")
|
178
179
|
end
|
@@ -234,7 +235,7 @@ module SimpleMapReduce
|
|
234
235
|
end
|
235
236
|
|
236
237
|
def register_worker(url:)
|
237
|
-
worker = ::SimpleMapReduce::Server::Worker.new(url: url)
|
238
|
+
worker = ::SimpleMapReduce::Server::Worker.new(url: url, data_store_type: 'remote')
|
238
239
|
if @workers.nil?
|
239
240
|
@workers = {}
|
240
241
|
end
|
@@ -255,9 +256,16 @@ module SimpleMapReduce
|
|
255
256
|
ready_workers = ready_workers.keys.take(worker_size)
|
256
257
|
|
257
258
|
ready_workers.map do |retry_worker_id|
|
258
|
-
|
259
|
-
|
260
|
-
|
259
|
+
begin
|
260
|
+
@workers[retry_worker_id].reserve!
|
261
|
+
rescue => e
|
262
|
+
logger.error("Failed to transit the worker state: `#{@workers[retry_worker_id]}`")
|
263
|
+
logger.error(e.inspect)
|
264
|
+
nil
|
265
|
+
else
|
266
|
+
@workers[retry_worker_id]
|
267
|
+
end
|
268
|
+
end.compact
|
261
269
|
else
|
262
270
|
return []
|
263
271
|
end
|
@@ -277,6 +285,22 @@ module SimpleMapReduce
|
|
277
285
|
mutex.unlock
|
278
286
|
end
|
279
287
|
|
288
|
+
POLLING_INTERVAL = 10
|
289
|
+
|
290
|
+
def start_polling_workers
|
291
|
+
@keep_polling_workers = true
|
292
|
+
|
293
|
+
@polling_workers_thread = Thread.new do
|
294
|
+
loop do
|
295
|
+
break unless @keep_polling_workers
|
296
|
+
|
297
|
+
job_manager.enqueue_job!(SimpleMapReduce::Worker::PollingWorkersStatusWorker, args: @workers || {})
|
298
|
+
sleep(POLLING_INTERVAL)
|
299
|
+
end
|
300
|
+
end
|
301
|
+
@polling_workers_thread.run
|
302
|
+
end
|
303
|
+
|
280
304
|
def job_manager
|
281
305
|
@job_manager ||= ::Rasteira::EmbedWorker::Manager.run
|
282
306
|
end
|
@@ -295,6 +319,8 @@ module SimpleMapReduce
|
|
295
319
|
|
296
320
|
# @override
|
297
321
|
def quit!
|
322
|
+
@keep_polling_workers = false
|
323
|
+
@polling_workers_thread.kill
|
298
324
|
job_manager.shutdown_workers!
|
299
325
|
super
|
300
326
|
end
|
@@ -18,7 +18,9 @@ module SimpleMapReduce
|
|
18
18
|
post '/map_tasks' do
|
19
19
|
raw_body = request.body.read
|
20
20
|
job = SimpleMapReduce::Server::Job.deserialize(raw_body)
|
21
|
-
self.class.
|
21
|
+
self.class.worker.work!
|
22
|
+
job.start!
|
23
|
+
self.class.job_manager.enqueue_job!(SimpleMapReduce::Worker::RunMapTaskWorker, args: [job, self.class.worker])
|
22
24
|
|
23
25
|
json(succeeded: true, job_id: job.id)
|
24
26
|
end
|
@@ -26,14 +28,42 @@ module SimpleMapReduce
|
|
26
28
|
post '/reduce_tasks' do
|
27
29
|
raw_body = request.body.read
|
28
30
|
task = SimpleMapReduce::Server::Task.deserialize(raw_body)
|
29
|
-
|
30
|
-
self.class.job_manager.enqueue_job!(SimpleMapReduce::Worker::RunReduceTaskWorker, args: [task, self.class.
|
31
|
+
self.class.worker.work!
|
32
|
+
self.class.job_manager.enqueue_job!(SimpleMapReduce::Worker::RunReduceTaskWorker, args: [task, self.class.worker])
|
31
33
|
|
32
34
|
json(succeeded: true, job_id: task.job_id, task_id: task.id)
|
33
35
|
end
|
34
36
|
|
37
|
+
put '/workers/:id' do
|
38
|
+
body = JSON.parse(request.body.read, symbolize_names: true)
|
39
|
+
if params[:id] != self.class.worker_id
|
40
|
+
status 404
|
41
|
+
json(succeeded: false, error_message: 'The specified worker id was not found.')
|
42
|
+
return
|
43
|
+
end
|
44
|
+
|
45
|
+
begin
|
46
|
+
self.class.worker.update!(body)
|
47
|
+
json(succeeded: true, worker: self.class.worker.dump)
|
48
|
+
rescue => e
|
49
|
+
puts e.inspect
|
50
|
+
status 400
|
51
|
+
json(succeeded: false, error_class: e.class.to_s, error_message: e.message)
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
get '/workers/:id' do
|
56
|
+
if params[:id] != self.class.worker_id
|
57
|
+
status 404
|
58
|
+
json(succeeded: false, error_message: 'The specified worker id was not found.')
|
59
|
+
else
|
60
|
+
json(succeeded: true, worker: self.class.worker.dump)
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
35
64
|
class << self
|
36
|
-
|
65
|
+
attr_reader :worker_id
|
66
|
+
attr_reader :worker
|
37
67
|
|
38
68
|
def setup_worker
|
39
69
|
check_s3_access
|
@@ -41,6 +71,7 @@ module SimpleMapReduce
|
|
41
71
|
job_manager
|
42
72
|
logger.info('All setup process is done successfully. This worker is operation ready.')
|
43
73
|
logger.info("This job worker url: #{SimpleMapReduce.job_worker_url}, id: #{worker_id}")
|
74
|
+
logger.info("This job worker status url: #{SimpleMapReduce.job_worker_url}/workers/#{worker_id}")
|
44
75
|
logger.info("The job tracker url: #{SimpleMapReduce.job_tracker_url}")
|
45
76
|
end
|
46
77
|
|
@@ -56,7 +87,11 @@ module SimpleMapReduce
|
|
56
87
|
end
|
57
88
|
|
58
89
|
body = JSON.parse(response.body, symbolize_names: true)
|
59
|
-
|
90
|
+
@worker_id = body[:id]
|
91
|
+
@worker = SimpleMapReduce::Server::Worker.new(
|
92
|
+
id: @worker_id,
|
93
|
+
url: SimpleMapReduce.job_worker_url
|
94
|
+
)
|
60
95
|
logger.info("[OK] registering this worker to the job_tracker #{SimpleMapReduce.job_worker_url}")
|
61
96
|
end
|
62
97
|
|
@@ -15,7 +15,11 @@ module SimpleMapReduce
|
|
15
15
|
delegate current_state: :aasm
|
16
16
|
alias state current_state
|
17
17
|
|
18
|
+
STATES = %i(ready reserved working).freeze
|
19
|
+
|
18
20
|
aasm do
|
21
|
+
before_all_events :save_state
|
22
|
+
|
19
23
|
state :ready, initial: true
|
20
24
|
state :reserved
|
21
25
|
state :working
|
@@ -29,12 +33,20 @@ module SimpleMapReduce
|
|
29
33
|
end
|
30
34
|
|
31
35
|
event :work do
|
32
|
-
transitions from:
|
36
|
+
transitions from: %i(reserved working), to: :working
|
33
37
|
end
|
34
38
|
end
|
35
39
|
|
36
|
-
def initialize(url:)
|
40
|
+
def initialize(url:, id: nil, state: nil, data_store_type: 'default')
|
37
41
|
@url = url
|
42
|
+
@id = id
|
43
|
+
if STATES.include?(state)
|
44
|
+
aasm_write_state_without_persistence(state)
|
45
|
+
end
|
46
|
+
@data_store = SimpleMapReduce::DataStoreFactory.create(data_store_type,
|
47
|
+
server_url: url,
|
48
|
+
resource_name: 'workers',
|
49
|
+
resource_id: self.id)
|
38
50
|
unless valid?
|
39
51
|
raise ArgumentError, 'invalid url'
|
40
52
|
end
|
@@ -71,6 +83,10 @@ module SimpleMapReduce
|
|
71
83
|
def valid?
|
72
84
|
!@url.to_s.empty? && @url =~ URI::DEFAULT_PARSER.make_regexp
|
73
85
|
end
|
86
|
+
|
87
|
+
def save_state
|
88
|
+
@data_store.save_state(aasm.current_event)
|
89
|
+
end
|
74
90
|
end
|
75
91
|
end
|
76
92
|
end
|
@@ -0,0 +1,50 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module SimpleMapReduce
|
4
|
+
module Worker
|
5
|
+
class PollingWorkersStatusWorker
|
6
|
+
def perform(workers)
|
7
|
+
logger.debug("begin polling workers: #{workers.keys}")
|
8
|
+
workers.each do |id, worker|
|
9
|
+
begin
|
10
|
+
response = http_client(worker.url).get("/workers/#{worker.id}")
|
11
|
+
body = JSON.parse(response.body, symbolize_names: true)[:worker]
|
12
|
+
worker.aasm.current_state = body[:state].to_sym
|
13
|
+
rescue => e
|
14
|
+
logger.error(e.inspect)
|
15
|
+
logger.error(e&.response&.inspect)
|
16
|
+
logger.info("Worker #{worker.id} is removed from workers")
|
17
|
+
workers.delete(id)
|
18
|
+
end
|
19
|
+
end
|
20
|
+
logger.debug("finish polling workers: #{workers.keys}")
|
21
|
+
end
|
22
|
+
|
23
|
+
private
|
24
|
+
|
25
|
+
HTTP_JSON_HEADER = {
|
26
|
+
'Accept' => 'application/json',
|
27
|
+
'Content-Type' => 'application/json'
|
28
|
+
}.freeze
|
29
|
+
|
30
|
+
def http_client(url)
|
31
|
+
::Faraday.new(
|
32
|
+
url: url,
|
33
|
+
headers: HTTP_JSON_HEADER,
|
34
|
+
request: {
|
35
|
+
open_timeout: 5,
|
36
|
+
timeout: 10
|
37
|
+
}
|
38
|
+
) do |faraday|
|
39
|
+
faraday.response :logger
|
40
|
+
faraday.response :raise_error
|
41
|
+
faraday.adapter Faraday.default_adapter
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
def logger
|
46
|
+
SimpleMapReduce.logger
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
@@ -3,16 +3,16 @@
|
|
3
3
|
module SimpleMapReduce
|
4
4
|
module Worker
|
5
5
|
class RunMapTaskWorker
|
6
|
-
|
6
|
+
class InvalidMapTaskError < StandardError; end
|
7
|
+
|
8
|
+
def perform(job, map_worker)
|
7
9
|
task_wrapper_class_name = "TaskWrapper#{job.id.delete('-')}"
|
8
10
|
self.class.class_eval("class #{task_wrapper_class_name}; end", 'Task Wrapper Class')
|
9
11
|
task_wrapper_class = self.class.const_get(task_wrapper_class_name)
|
10
12
|
task_wrapper_class.class_eval(job.map_script, 'Map task script')
|
11
13
|
map_task = task_wrapper_class.const_get(job.map_class_name, false).new
|
12
14
|
unless map_task.respond_to?(:map)
|
13
|
-
|
14
|
-
logger.error('no map method')
|
15
|
-
return
|
15
|
+
raise InvalidMapTaskError, 'no map method'
|
16
16
|
end
|
17
17
|
logger.info('map task start')
|
18
18
|
|
@@ -45,31 +45,37 @@ module SimpleMapReduce
|
|
45
45
|
logger.debug(response.body)
|
46
46
|
|
47
47
|
# {"succeeded":true,"workers":[{"id":70157882164440,"url":"http://localhost:4569","state":'reserved'}]}
|
48
|
-
reserved_workers = JSON.parse(response.body, symbolize_names: true)[:reserved_workers]
|
48
|
+
reserved_workers = JSON.parse(response.body, symbolize_names: true)[:reserved_workers].map do |worker|
|
49
|
+
SimpleMapReduce::Server::Worker.new(
|
50
|
+
id: worker[:id],
|
51
|
+
url: worker[:url],
|
52
|
+
state: worker[:state].to_sym,
|
53
|
+
data_store_type: 'remote'
|
54
|
+
)
|
55
|
+
end
|
49
56
|
if reserved_workers.count == 0
|
50
57
|
# keep working with same worker
|
51
|
-
reserved_workers <<
|
58
|
+
reserved_workers << map_worker
|
52
59
|
end
|
53
60
|
|
54
61
|
shuffle(job, reserved_workers, local_output_cache)
|
55
62
|
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
63
|
+
if reserved_workers.all? { |w| w.id != map_worker.id }
|
64
|
+
begin
|
65
|
+
map_worker.ready!
|
66
|
+
rescue => notify_error
|
67
|
+
logger.fatal(notify_error.inspect)
|
68
|
+
logger.fatal(notify_error.backtrace.take(50))
|
60
69
|
end
|
61
|
-
logger.debug(response.body)
|
62
70
|
end
|
63
71
|
rescue => e
|
64
72
|
logger.error(e.inspect)
|
65
73
|
logger.error(e.backtrace.take(50))
|
74
|
+
job.failed!
|
66
75
|
# TODO: notifying to job_tracker that this task have failed
|
67
76
|
ensure
|
68
77
|
local_input_cache&.delete
|
69
78
|
local_output_cache&.delete
|
70
|
-
reserved_workers&.each do |worker|
|
71
|
-
worker[:shuffled_local_output]&.delete
|
72
|
-
end
|
73
79
|
if self.class.const_defined?(task_wrapper_class_name.to_sym)
|
74
80
|
self.class.send(:remove_const, task_wrapper_class_name.to_sym)
|
75
81
|
end
|
@@ -106,14 +112,11 @@ module SimpleMapReduce
|
|
106
112
|
workers_count = workers.count
|
107
113
|
raise 'No workers' unless workers_count > 0
|
108
114
|
|
109
|
-
|
110
|
-
worker[:shuffled_local_output] = Tempfile.new
|
111
|
-
end
|
112
|
-
|
115
|
+
shuffled_local_outputs = Array.new(workers_count, Tempfile.new)
|
113
116
|
local_output_cache.each_line(rs: "\n") do |raw_line|
|
114
117
|
output = JSON.parse(raw_line, symbolize_names: true)
|
115
118
|
partition_id = output[:key].hash % workers_count
|
116
|
-
|
119
|
+
shuffled_local_outputs[partition_id].puts(output.to_json)
|
117
120
|
end
|
118
121
|
|
119
122
|
task_script = job.reduce_script
|
@@ -134,7 +137,7 @@ module SimpleMapReduce
|
|
134
137
|
task_output_directory_path: task_output_directory_path
|
135
138
|
)
|
136
139
|
|
137
|
-
local_output_cache =
|
140
|
+
local_output_cache = shuffled_local_outputs[partition_id]
|
138
141
|
local_output_cache.rewind
|
139
142
|
s3_client.put_object(
|
140
143
|
body: local_output_cache.read,
|
@@ -142,18 +145,15 @@ module SimpleMapReduce
|
|
142
145
|
key: reduce_task.task_input_file_path
|
143
146
|
)
|
144
147
|
|
145
|
-
response = http_client(worker
|
148
|
+
response = http_client(worker.url).post do |request|
|
146
149
|
request.url('/reduce_tasks')
|
147
150
|
request.body = reduce_task.serialize
|
148
151
|
end
|
149
152
|
logger.debug(response.body)
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
request.body = { event: 'work' }.to_json
|
155
|
-
end
|
156
|
-
logger.debug(response.body)
|
153
|
+
end
|
154
|
+
ensure
|
155
|
+
shuffled_local_outputs&.each do |output|
|
156
|
+
output.delete
|
157
157
|
end
|
158
158
|
end
|
159
159
|
end
|
@@ -3,7 +3,7 @@
|
|
3
3
|
module SimpleMapReduce
|
4
4
|
module Worker
|
5
5
|
class RunReduceTaskWorker
|
6
|
-
def perform(task,
|
6
|
+
def perform(task, reduce_worker)
|
7
7
|
task_wrapper_class_name = "TaskWrapper#{task.id.delete('-')}"
|
8
8
|
self.class.class_eval("class #{task_wrapper_class_name}; end", 'Task Wrapper Class')
|
9
9
|
task_wrapper_class = self.class.const_get(task_wrapper_class_name)
|
@@ -54,11 +54,7 @@ module SimpleMapReduce
|
|
54
54
|
end
|
55
55
|
|
56
56
|
begin
|
57
|
-
|
58
|
-
request.url("/workers/#{reduce_worker_id}")
|
59
|
-
request.body = { event: 'ready' }.to_json
|
60
|
-
end
|
61
|
-
logger.debug(response.body)
|
57
|
+
reduce_worker.ready!
|
62
58
|
rescue => notify_error
|
63
59
|
logger.fatal(notify_error.inspect)
|
64
60
|
logger.fatal(notify_error.backtrace.take(50))
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: simple_map_reduce
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Kazuhiro Serizawa
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-
|
11
|
+
date: 2018-02-12 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -228,6 +228,8 @@ executables:
|
|
228
228
|
extensions: []
|
229
229
|
extra_rdoc_files: []
|
230
230
|
files:
|
231
|
+
- ".github/ISSUE_TEMPLATE.md"
|
232
|
+
- ".github/PULL_REQUEST_TEMPLATE.md"
|
231
233
|
- ".gitignore"
|
232
234
|
- ".rspec"
|
233
235
|
- ".rubocop.yml"
|
@@ -249,6 +251,9 @@ files:
|
|
249
251
|
- docker-compose.yml
|
250
252
|
- exe/simple_map_reduce
|
251
253
|
- lib/simple_map_reduce.rb
|
254
|
+
- lib/simple_map_reduce/data_store_factory.rb
|
255
|
+
- lib/simple_map_reduce/data_stores/default_data_store.rb
|
256
|
+
- lib/simple_map_reduce/data_stores/remote_data_store.rb
|
252
257
|
- lib/simple_map_reduce/driver/config.rb
|
253
258
|
- lib/simple_map_reduce/driver/job.rb
|
254
259
|
- lib/simple_map_reduce/s3_client.rb
|
@@ -259,6 +264,7 @@ files:
|
|
259
264
|
- lib/simple_map_reduce/server/task.rb
|
260
265
|
- lib/simple_map_reduce/server/worker.rb
|
261
266
|
- lib/simple_map_reduce/version.rb
|
267
|
+
- lib/simple_map_reduce/worker/polling_workers_status_worker.rb
|
262
268
|
- lib/simple_map_reduce/worker/register_map_task_worker.rb
|
263
269
|
- lib/simple_map_reduce/worker/run_map_task_worker.rb
|
264
270
|
- lib/simple_map_reduce/worker/run_reduce_task_worker.rb
|