simple_map_reduce 0.1.0 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/ISSUE_TEMPLATE.md +1 -0
- data/.github/PULL_REQUEST_TEMPLATE.md +1 -0
- data/README.md +3 -0
- data/lib/simple_map_reduce.rb +4 -0
- data/lib/simple_map_reduce/data_store_factory.rb +22 -0
- data/lib/simple_map_reduce/data_stores/default_data_store.rb +15 -0
- data/lib/simple_map_reduce/data_stores/remote_data_store.rb +42 -0
- data/lib/simple_map_reduce/server/job.rb +17 -2
- data/lib/simple_map_reduce/server/job_tracker.rb +30 -4
- data/lib/simple_map_reduce/server/job_worker.rb +40 -5
- data/lib/simple_map_reduce/server/worker.rb +18 -2
- data/lib/simple_map_reduce/version.rb +1 -1
- data/lib/simple_map_reduce/worker/polling_workers_status_worker.rb +50 -0
- data/lib/simple_map_reduce/worker/register_map_task_worker.rb +0 -3
- data/lib/simple_map_reduce/worker/run_map_task_worker.rb +28 -28
- data/lib/simple_map_reduce/worker/run_reduce_task_worker.rb +2 -6
- metadata +8 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f96ea39ae49bed4226271a6ab428e2eb7258cc921704fb747839b8fe2c096ae3
|
4
|
+
data.tar.gz: b3e60d6060546c708811b9b893007cd3ff1816e4b951015cf83f4294ca822351
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 16f03783a4eb7f7ed62987de371828d33debde02c8c0b23ec24c9c80e531f1a8053a1d6682542efd682e2f180237fb74e1ed29056d11eb6cba15b3af090cf4cc
|
7
|
+
data.tar.gz: 6f5e703dedf1c432bcc1e16a652ffa6713584d7d082b23b9253840e68ccdeef7167737ab8b51aef1c7c303743eaa501cd161b117d3d0672e8e9a76abf780471f
|
@@ -0,0 +1 @@
|
|
1
|
+
# What is this issue about ?
|
@@ -0,0 +1 @@
|
|
1
|
+
# What will this PR change ?
|
data/README.md
CHANGED
@@ -1,3 +1,6 @@
|
|
1
|
+
[![Build Status](https://travis-ci.org/serihiro/simple_map_reduce.svg?branch=master)](https://travis-ci.org/serihiro/simple_map_reduce)
|
2
|
+
[![Gem Version](https://badge.fury.io/rb/simple_map_reduce.svg)](https://badge.fury.io/rb/simple_map_reduce)
|
3
|
+
|
1
4
|
# SimpleMapReduce
|
2
5
|
|
3
6
|
- This is a [MapReduce](https://research.google.com/archive/mapreduce.html) implementation distributed framework written in ruby.
|
data/lib/simple_map_reduce.rb
CHANGED
@@ -19,6 +19,9 @@ end
|
|
19
19
|
|
20
20
|
require 'simple_map_reduce/version'
|
21
21
|
require 'simple_map_reduce/s3_client'
|
22
|
+
require 'simple_map_reduce/data_stores/default_data_store'
|
23
|
+
require 'simple_map_reduce/data_stores/remote_data_store'
|
24
|
+
require 'simple_map_reduce/data_store_factory'
|
22
25
|
require 'simple_map_reduce/driver/config'
|
23
26
|
require 'simple_map_reduce/driver/job'
|
24
27
|
require 'simple_map_reduce/server/confg'
|
@@ -30,3 +33,4 @@ require 'simple_map_reduce/server/job_worker'
|
|
30
33
|
require 'simple_map_reduce/worker/register_map_task_worker'
|
31
34
|
require 'simple_map_reduce/worker/run_map_task_worker'
|
32
35
|
require 'simple_map_reduce/worker/run_reduce_task_worker'
|
36
|
+
require 'simple_map_reduce/worker/polling_workers_status_worker'
|
@@ -0,0 +1,22 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module SimpleMapReduce
|
4
|
+
class DataStoreFactory
|
5
|
+
TYPES = %w(default remote).freeze
|
6
|
+
|
7
|
+
class << self
|
8
|
+
def create(data_store_type, options = {})
|
9
|
+
unless TYPES.include?(data_store_type)
|
10
|
+
raise ArgumentError, "Unsupported data_store_type: `#{data_store_type}`"
|
11
|
+
end
|
12
|
+
|
13
|
+
case data_store_type
|
14
|
+
when 'default'
|
15
|
+
SimpleMapReduce::DataStores::DefaultDataStore.new(options)
|
16
|
+
when 'remote'
|
17
|
+
SimpleMapReduce::DataStores::RemoteDataStore.new(options)
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module SimpleMapReduce
|
4
|
+
module DataStores
|
5
|
+
class RemoteDataStore
|
6
|
+
def initialize(options)
|
7
|
+
@resource_name = options[:resource_name]
|
8
|
+
@resource_id = options[:resource_id]
|
9
|
+
@server_url = options[:server_url]
|
10
|
+
end
|
11
|
+
|
12
|
+
def save_state(event)
|
13
|
+
http_client.put do |request|
|
14
|
+
request.url("/#{@resource_name}/#{@resource_id}")
|
15
|
+
request.body = { event: event }.to_json
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
private
|
20
|
+
|
21
|
+
HTTP_JSON_HEADER = {
|
22
|
+
'Accept' => 'application/json',
|
23
|
+
'Content-Type' => 'application/json'
|
24
|
+
}.freeze
|
25
|
+
|
26
|
+
def http_client
|
27
|
+
@http_client ||= ::Faraday.new(
|
28
|
+
url: @server_url,
|
29
|
+
headers: HTTP_JSON_HEADER,
|
30
|
+
request: {
|
31
|
+
open_timeout: 10,
|
32
|
+
timeout: 15
|
33
|
+
}
|
34
|
+
) do |faraday|
|
35
|
+
faraday.response :logger
|
36
|
+
faraday.response :raise_error
|
37
|
+
faraday.adapter Faraday.default_adapter
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
@@ -19,6 +19,8 @@ module SimpleMapReduce
|
|
19
19
|
alias state current_state
|
20
20
|
|
21
21
|
aasm do
|
22
|
+
before_all_events :save_state
|
23
|
+
|
22
24
|
state :ready, initial: true
|
23
25
|
state :in_process
|
24
26
|
state :succeeded
|
@@ -47,7 +49,8 @@ module SimpleMapReduce
|
|
47
49
|
job_output_bucket_name:,
|
48
50
|
job_output_directory_path:,
|
49
51
|
map_worker_url: nil,
|
50
|
-
map_worker: nil
|
52
|
+
map_worker: nil,
|
53
|
+
data_store_type: 'default')
|
51
54
|
|
52
55
|
@id = id
|
53
56
|
@map_script = map_script&.strip
|
@@ -62,6 +65,10 @@ module SimpleMapReduce
|
|
62
65
|
if @map_worker.nil? && map_worker_url
|
63
66
|
@map_worker = SimpleMapReduce::Server::Worker.new(url: map_worker_url)
|
64
67
|
end
|
68
|
+
@data_store = SimpleMapReduce::DataStoreFactory.create(data_store_type,
|
69
|
+
server_url: SimpleMapReduce.job_tracker_url,
|
70
|
+
resource_name: 'jobs',
|
71
|
+
resource_id: @id)
|
65
72
|
|
66
73
|
unless valid?
|
67
74
|
raise ArgumentError, 'invalid Job parameters are detected'
|
@@ -121,9 +128,17 @@ module SimpleMapReduce
|
|
121
128
|
|
122
129
|
class << self
|
123
130
|
def deserialize(data)
|
124
|
-
|
131
|
+
params = Hash[MessagePack.unpack(data).map { |k, v| [k.to_sym, v] }]
|
132
|
+
params[:data_store_type] = 'remote'
|
133
|
+
new(params)
|
125
134
|
end
|
126
135
|
end
|
136
|
+
|
137
|
+
private
|
138
|
+
|
139
|
+
def save_state
|
140
|
+
@data_store.save_state(aasm.current_event)
|
141
|
+
end
|
127
142
|
end
|
128
143
|
end
|
129
144
|
end
|
@@ -173,6 +173,7 @@ module SimpleMapReduce
|
|
173
173
|
check_s3_access
|
174
174
|
create_s3_buckets_if_not_existing
|
175
175
|
job_manager
|
176
|
+
start_polling_workers
|
176
177
|
logger.info('All setup process is done successfully. The job tracker is operation ready.')
|
177
178
|
logger.info("This job tracker url: #{SimpleMapReduce.job_tracker_url}")
|
178
179
|
end
|
@@ -234,7 +235,7 @@ module SimpleMapReduce
|
|
234
235
|
end
|
235
236
|
|
236
237
|
def register_worker(url:)
|
237
|
-
worker = ::SimpleMapReduce::Server::Worker.new(url: url)
|
238
|
+
worker = ::SimpleMapReduce::Server::Worker.new(url: url, data_store_type: 'remote')
|
238
239
|
if @workers.nil?
|
239
240
|
@workers = {}
|
240
241
|
end
|
@@ -255,9 +256,16 @@ module SimpleMapReduce
|
|
255
256
|
ready_workers = ready_workers.keys.take(worker_size)
|
256
257
|
|
257
258
|
ready_workers.map do |retry_worker_id|
|
258
|
-
|
259
|
-
|
260
|
-
|
259
|
+
begin
|
260
|
+
@workers[retry_worker_id].reserve!
|
261
|
+
rescue => e
|
262
|
+
logger.error("Failed to transit the worker state: `#{@workers[retry_worker_id]}`")
|
263
|
+
logger.error(e.inspect)
|
264
|
+
nil
|
265
|
+
else
|
266
|
+
@workers[retry_worker_id]
|
267
|
+
end
|
268
|
+
end.compact
|
261
269
|
else
|
262
270
|
return []
|
263
271
|
end
|
@@ -277,6 +285,22 @@ module SimpleMapReduce
|
|
277
285
|
mutex.unlock
|
278
286
|
end
|
279
287
|
|
288
|
+
POLLING_INTERVAL = 10
|
289
|
+
|
290
|
+
def start_polling_workers
|
291
|
+
@keep_polling_workers = true
|
292
|
+
|
293
|
+
@polling_workers_thread = Thread.new do
|
294
|
+
loop do
|
295
|
+
break unless @keep_polling_workers
|
296
|
+
|
297
|
+
job_manager.enqueue_job!(SimpleMapReduce::Worker::PollingWorkersStatusWorker, args: @workers || {})
|
298
|
+
sleep(POLLING_INTERVAL)
|
299
|
+
end
|
300
|
+
end
|
301
|
+
@polling_workers_thread.run
|
302
|
+
end
|
303
|
+
|
280
304
|
def job_manager
|
281
305
|
@job_manager ||= ::Rasteira::EmbedWorker::Manager.run
|
282
306
|
end
|
@@ -295,6 +319,8 @@ module SimpleMapReduce
|
|
295
319
|
|
296
320
|
# @override
|
297
321
|
def quit!
|
322
|
+
@keep_polling_workers = false
|
323
|
+
@polling_workers_thread.kill
|
298
324
|
job_manager.shutdown_workers!
|
299
325
|
super
|
300
326
|
end
|
@@ -18,7 +18,9 @@ module SimpleMapReduce
|
|
18
18
|
post '/map_tasks' do
|
19
19
|
raw_body = request.body.read
|
20
20
|
job = SimpleMapReduce::Server::Job.deserialize(raw_body)
|
21
|
-
self.class.
|
21
|
+
self.class.worker.work!
|
22
|
+
job.start!
|
23
|
+
self.class.job_manager.enqueue_job!(SimpleMapReduce::Worker::RunMapTaskWorker, args: [job, self.class.worker])
|
22
24
|
|
23
25
|
json(succeeded: true, job_id: job.id)
|
24
26
|
end
|
@@ -26,14 +28,42 @@ module SimpleMapReduce
|
|
26
28
|
post '/reduce_tasks' do
|
27
29
|
raw_body = request.body.read
|
28
30
|
task = SimpleMapReduce::Server::Task.deserialize(raw_body)
|
29
|
-
|
30
|
-
self.class.job_manager.enqueue_job!(SimpleMapReduce::Worker::RunReduceTaskWorker, args: [task, self.class.
|
31
|
+
self.class.worker.work!
|
32
|
+
self.class.job_manager.enqueue_job!(SimpleMapReduce::Worker::RunReduceTaskWorker, args: [task, self.class.worker])
|
31
33
|
|
32
34
|
json(succeeded: true, job_id: task.job_id, task_id: task.id)
|
33
35
|
end
|
34
36
|
|
37
|
+
put '/workers/:id' do
|
38
|
+
body = JSON.parse(request.body.read, symbolize_names: true)
|
39
|
+
if params[:id] != self.class.worker_id
|
40
|
+
status 404
|
41
|
+
json(succeeded: false, error_message: 'The specified worker id was not found.')
|
42
|
+
return
|
43
|
+
end
|
44
|
+
|
45
|
+
begin
|
46
|
+
self.class.worker.update!(body)
|
47
|
+
json(succeeded: true, worker: self.class.worker.dump)
|
48
|
+
rescue => e
|
49
|
+
puts e.inspect
|
50
|
+
status 400
|
51
|
+
json(succeeded: false, error_class: e.class.to_s, error_message: e.message)
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
get '/workers/:id' do
|
56
|
+
if params[:id] != self.class.worker_id
|
57
|
+
status 404
|
58
|
+
json(succeeded: false, error_message: 'The specified worker id was not found.')
|
59
|
+
else
|
60
|
+
json(succeeded: true, worker: self.class.worker.dump)
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
35
64
|
class << self
|
36
|
-
|
65
|
+
attr_reader :worker_id
|
66
|
+
attr_reader :worker
|
37
67
|
|
38
68
|
def setup_worker
|
39
69
|
check_s3_access
|
@@ -41,6 +71,7 @@ module SimpleMapReduce
|
|
41
71
|
job_manager
|
42
72
|
logger.info('All setup process is done successfully. This worker is operation ready.')
|
43
73
|
logger.info("This job worker url: #{SimpleMapReduce.job_worker_url}, id: #{worker_id}")
|
74
|
+
logger.info("This job worker status url: #{SimpleMapReduce.job_worker_url}/workers/#{worker_id}")
|
44
75
|
logger.info("The job tracker url: #{SimpleMapReduce.job_tracker_url}")
|
45
76
|
end
|
46
77
|
|
@@ -56,7 +87,11 @@ module SimpleMapReduce
|
|
56
87
|
end
|
57
88
|
|
58
89
|
body = JSON.parse(response.body, symbolize_names: true)
|
59
|
-
|
90
|
+
@worker_id = body[:id]
|
91
|
+
@worker = SimpleMapReduce::Server::Worker.new(
|
92
|
+
id: @worker_id,
|
93
|
+
url: SimpleMapReduce.job_worker_url
|
94
|
+
)
|
60
95
|
logger.info("[OK] registering this worker to the job_tracker #{SimpleMapReduce.job_worker_url}")
|
61
96
|
end
|
62
97
|
|
@@ -15,7 +15,11 @@ module SimpleMapReduce
|
|
15
15
|
delegate current_state: :aasm
|
16
16
|
alias state current_state
|
17
17
|
|
18
|
+
STATES = %i(ready reserved working).freeze
|
19
|
+
|
18
20
|
aasm do
|
21
|
+
before_all_events :save_state
|
22
|
+
|
19
23
|
state :ready, initial: true
|
20
24
|
state :reserved
|
21
25
|
state :working
|
@@ -29,12 +33,20 @@ module SimpleMapReduce
|
|
29
33
|
end
|
30
34
|
|
31
35
|
event :work do
|
32
|
-
transitions from:
|
36
|
+
transitions from: %i(reserved working), to: :working
|
33
37
|
end
|
34
38
|
end
|
35
39
|
|
36
|
-
def initialize(url:)
|
40
|
+
def initialize(url:, id: nil, state: nil, data_store_type: 'default')
|
37
41
|
@url = url
|
42
|
+
@id = id
|
43
|
+
if STATES.include?(state)
|
44
|
+
aasm_write_state_without_persistence(state)
|
45
|
+
end
|
46
|
+
@data_store = SimpleMapReduce::DataStoreFactory.create(data_store_type,
|
47
|
+
server_url: url,
|
48
|
+
resource_name: 'workers',
|
49
|
+
resource_id: self.id)
|
38
50
|
unless valid?
|
39
51
|
raise ArgumentError, 'invalid url'
|
40
52
|
end
|
@@ -71,6 +83,10 @@ module SimpleMapReduce
|
|
71
83
|
def valid?
|
72
84
|
!@url.to_s.empty? && @url =~ URI::DEFAULT_PARSER.make_regexp
|
73
85
|
end
|
86
|
+
|
87
|
+
def save_state
|
88
|
+
@data_store.save_state(aasm.current_event)
|
89
|
+
end
|
74
90
|
end
|
75
91
|
end
|
76
92
|
end
|
@@ -0,0 +1,50 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module SimpleMapReduce
|
4
|
+
module Worker
|
5
|
+
class PollingWorkersStatusWorker
|
6
|
+
def perform(workers)
|
7
|
+
logger.debug("begin polling workers: #{workers.keys}")
|
8
|
+
workers.each do |id, worker|
|
9
|
+
begin
|
10
|
+
response = http_client(worker.url).get("/workers/#{worker.id}")
|
11
|
+
body = JSON.parse(response.body, symbolize_names: true)[:worker]
|
12
|
+
worker.aasm.current_state = body[:state].to_sym
|
13
|
+
rescue => e
|
14
|
+
logger.error(e.inspect)
|
15
|
+
logger.error(e&.response&.inspect)
|
16
|
+
logger.info("Worker #{worker.id} is removed from workers")
|
17
|
+
workers.delete(id)
|
18
|
+
end
|
19
|
+
end
|
20
|
+
logger.debug("finish polling workers: #{workers.keys}")
|
21
|
+
end
|
22
|
+
|
23
|
+
private
|
24
|
+
|
25
|
+
HTTP_JSON_HEADER = {
|
26
|
+
'Accept' => 'application/json',
|
27
|
+
'Content-Type' => 'application/json'
|
28
|
+
}.freeze
|
29
|
+
|
30
|
+
def http_client(url)
|
31
|
+
::Faraday.new(
|
32
|
+
url: url,
|
33
|
+
headers: HTTP_JSON_HEADER,
|
34
|
+
request: {
|
35
|
+
open_timeout: 5,
|
36
|
+
timeout: 10
|
37
|
+
}
|
38
|
+
) do |faraday|
|
39
|
+
faraday.response :logger
|
40
|
+
faraday.response :raise_error
|
41
|
+
faraday.adapter Faraday.default_adapter
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
def logger
|
46
|
+
SimpleMapReduce.logger
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
@@ -3,16 +3,16 @@
|
|
3
3
|
module SimpleMapReduce
|
4
4
|
module Worker
|
5
5
|
class RunMapTaskWorker
|
6
|
-
|
6
|
+
class InvalidMapTaskError < StandardError; end
|
7
|
+
|
8
|
+
def perform(job, map_worker)
|
7
9
|
task_wrapper_class_name = "TaskWrapper#{job.id.delete('-')}"
|
8
10
|
self.class.class_eval("class #{task_wrapper_class_name}; end", 'Task Wrapper Class')
|
9
11
|
task_wrapper_class = self.class.const_get(task_wrapper_class_name)
|
10
12
|
task_wrapper_class.class_eval(job.map_script, 'Map task script')
|
11
13
|
map_task = task_wrapper_class.const_get(job.map_class_name, false).new
|
12
14
|
unless map_task.respond_to?(:map)
|
13
|
-
|
14
|
-
logger.error('no map method')
|
15
|
-
return
|
15
|
+
raise InvalidMapTaskError, 'no map method'
|
16
16
|
end
|
17
17
|
logger.info('map task start')
|
18
18
|
|
@@ -45,31 +45,37 @@ module SimpleMapReduce
|
|
45
45
|
logger.debug(response.body)
|
46
46
|
|
47
47
|
# {"succeeded":true,"workers":[{"id":70157882164440,"url":"http://localhost:4569","state":'reserved'}]}
|
48
|
-
reserved_workers = JSON.parse(response.body, symbolize_names: true)[:reserved_workers]
|
48
|
+
reserved_workers = JSON.parse(response.body, symbolize_names: true)[:reserved_workers].map do |worker|
|
49
|
+
SimpleMapReduce::Server::Worker.new(
|
50
|
+
id: worker[:id],
|
51
|
+
url: worker[:url],
|
52
|
+
state: worker[:state].to_sym,
|
53
|
+
data_store_type: 'remote'
|
54
|
+
)
|
55
|
+
end
|
49
56
|
if reserved_workers.count == 0
|
50
57
|
# keep working with same worker
|
51
|
-
reserved_workers <<
|
58
|
+
reserved_workers << map_worker
|
52
59
|
end
|
53
60
|
|
54
61
|
shuffle(job, reserved_workers, local_output_cache)
|
55
62
|
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
63
|
+
if reserved_workers.all? { |w| w.id != map_worker.id }
|
64
|
+
begin
|
65
|
+
map_worker.ready!
|
66
|
+
rescue => notify_error
|
67
|
+
logger.fatal(notify_error.inspect)
|
68
|
+
logger.fatal(notify_error.backtrace.take(50))
|
60
69
|
end
|
61
|
-
logger.debug(response.body)
|
62
70
|
end
|
63
71
|
rescue => e
|
64
72
|
logger.error(e.inspect)
|
65
73
|
logger.error(e.backtrace.take(50))
|
74
|
+
job.failed!
|
66
75
|
# TODO: notifying to job_tracker that this task have failed
|
67
76
|
ensure
|
68
77
|
local_input_cache&.delete
|
69
78
|
local_output_cache&.delete
|
70
|
-
reserved_workers&.each do |worker|
|
71
|
-
worker[:shuffled_local_output]&.delete
|
72
|
-
end
|
73
79
|
if self.class.const_defined?(task_wrapper_class_name.to_sym)
|
74
80
|
self.class.send(:remove_const, task_wrapper_class_name.to_sym)
|
75
81
|
end
|
@@ -106,14 +112,11 @@ module SimpleMapReduce
|
|
106
112
|
workers_count = workers.count
|
107
113
|
raise 'No workers' unless workers_count > 0
|
108
114
|
|
109
|
-
|
110
|
-
worker[:shuffled_local_output] = Tempfile.new
|
111
|
-
end
|
112
|
-
|
115
|
+
shuffled_local_outputs = Array.new(workers_count, Tempfile.new)
|
113
116
|
local_output_cache.each_line(rs: "\n") do |raw_line|
|
114
117
|
output = JSON.parse(raw_line, symbolize_names: true)
|
115
118
|
partition_id = output[:key].hash % workers_count
|
116
|
-
|
119
|
+
shuffled_local_outputs[partition_id].puts(output.to_json)
|
117
120
|
end
|
118
121
|
|
119
122
|
task_script = job.reduce_script
|
@@ -134,7 +137,7 @@ module SimpleMapReduce
|
|
134
137
|
task_output_directory_path: task_output_directory_path
|
135
138
|
)
|
136
139
|
|
137
|
-
local_output_cache =
|
140
|
+
local_output_cache = shuffled_local_outputs[partition_id]
|
138
141
|
local_output_cache.rewind
|
139
142
|
s3_client.put_object(
|
140
143
|
body: local_output_cache.read,
|
@@ -142,18 +145,15 @@ module SimpleMapReduce
|
|
142
145
|
key: reduce_task.task_input_file_path
|
143
146
|
)
|
144
147
|
|
145
|
-
response = http_client(worker
|
148
|
+
response = http_client(worker.url).post do |request|
|
146
149
|
request.url('/reduce_tasks')
|
147
150
|
request.body = reduce_task.serialize
|
148
151
|
end
|
149
152
|
logger.debug(response.body)
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
request.body = { event: 'work' }.to_json
|
155
|
-
end
|
156
|
-
logger.debug(response.body)
|
153
|
+
end
|
154
|
+
ensure
|
155
|
+
shuffled_local_outputs&.each do |output|
|
156
|
+
output.delete
|
157
157
|
end
|
158
158
|
end
|
159
159
|
end
|
@@ -3,7 +3,7 @@
|
|
3
3
|
module SimpleMapReduce
|
4
4
|
module Worker
|
5
5
|
class RunReduceTaskWorker
|
6
|
-
def perform(task,
|
6
|
+
def perform(task, reduce_worker)
|
7
7
|
task_wrapper_class_name = "TaskWrapper#{task.id.delete('-')}"
|
8
8
|
self.class.class_eval("class #{task_wrapper_class_name}; end", 'Task Wrapper Class')
|
9
9
|
task_wrapper_class = self.class.const_get(task_wrapper_class_name)
|
@@ -54,11 +54,7 @@ module SimpleMapReduce
|
|
54
54
|
end
|
55
55
|
|
56
56
|
begin
|
57
|
-
|
58
|
-
request.url("/workers/#{reduce_worker_id}")
|
59
|
-
request.body = { event: 'ready' }.to_json
|
60
|
-
end
|
61
|
-
logger.debug(response.body)
|
57
|
+
reduce_worker.ready!
|
62
58
|
rescue => notify_error
|
63
59
|
logger.fatal(notify_error.inspect)
|
64
60
|
logger.fatal(notify_error.backtrace.take(50))
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: simple_map_reduce
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Kazuhiro Serizawa
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-
|
11
|
+
date: 2018-02-12 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -228,6 +228,8 @@ executables:
|
|
228
228
|
extensions: []
|
229
229
|
extra_rdoc_files: []
|
230
230
|
files:
|
231
|
+
- ".github/ISSUE_TEMPLATE.md"
|
232
|
+
- ".github/PULL_REQUEST_TEMPLATE.md"
|
231
233
|
- ".gitignore"
|
232
234
|
- ".rspec"
|
233
235
|
- ".rubocop.yml"
|
@@ -249,6 +251,9 @@ files:
|
|
249
251
|
- docker-compose.yml
|
250
252
|
- exe/simple_map_reduce
|
251
253
|
- lib/simple_map_reduce.rb
|
254
|
+
- lib/simple_map_reduce/data_store_factory.rb
|
255
|
+
- lib/simple_map_reduce/data_stores/default_data_store.rb
|
256
|
+
- lib/simple_map_reduce/data_stores/remote_data_store.rb
|
252
257
|
- lib/simple_map_reduce/driver/config.rb
|
253
258
|
- lib/simple_map_reduce/driver/job.rb
|
254
259
|
- lib/simple_map_reduce/s3_client.rb
|
@@ -259,6 +264,7 @@ files:
|
|
259
264
|
- lib/simple_map_reduce/server/task.rb
|
260
265
|
- lib/simple_map_reduce/server/worker.rb
|
261
266
|
- lib/simple_map_reduce/version.rb
|
267
|
+
- lib/simple_map_reduce/worker/polling_workers_status_worker.rb
|
262
268
|
- lib/simple_map_reduce/worker/register_map_task_worker.rb
|
263
269
|
- lib/simple_map_reduce/worker/run_map_task_worker.rb
|
264
270
|
- lib/simple_map_reduce/worker/run_reduce_task_worker.rb
|