simple_map_reduce 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,97 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SimpleMapReduce
4
+ module Worker
5
+ class RunReduceTaskWorker
6
+ def perform(task, reduce_worker_id)
7
+ task_wrapper_class_name = "TaskWrapper#{task.id.delete('-')}"
8
+ self.class.class_eval("class #{task_wrapper_class_name}; end", 'Task Wrapper Class')
9
+ task_wrapper_class = self.class.const_get(task_wrapper_class_name)
10
+ task_wrapper_class.class_eval(task.task_script, 'Reduce task script')
11
+ reduce_task = task_wrapper_class.const_get(task.task_class_name, false).new
12
+ unless reduce_task.respond_to?(:reduce)
13
+ # TODO: notifying to job_tracker that this task have failed
14
+ logger.error('no reduce method')
15
+ return
16
+ end
17
+
18
+ logger.info('reduce task start')
19
+
20
+ local_input_cache = Tempfile.new
21
+ s3_client.get_object(
22
+ response_target: local_input_cache.path,
23
+ bucket: task.task_input_bucket_name,
24
+ key: task.task_input_file_path
25
+ )
26
+ local_input_cache.rewind
27
+
28
+ local_output_cache = Tempfile.new
29
+ reduce_task.reduce(local_input_cache, local_output_cache)
30
+
31
+ local_output_cache.rewind
32
+ s3_client.put_object(
33
+ body: local_output_cache.read,
34
+ bucket: task.task_output_bucket_name,
35
+ key: "#{task.task_output_directory_path}/#{task.job_id}/#{task.id}_reduce_task_output.txt"
36
+ )
37
+
38
+ s3_client.delete_object(
39
+ bucket: task.task_input_bucket_name,
40
+ key: task.task_input_file_path
41
+ )
42
+
43
+ # TODO: Notify the task succeeded
44
+ rescue => e
45
+ logger.error(e.inspect)
46
+ logger.error(e.backtrace.take(50))
47
+
48
+ # TODO: Notify the task failed
49
+ ensure
50
+ local_input_cache&.delete
51
+ local_output_cache&.delete
52
+ if self.class.const_defined?(task_wrapper_class_name.to_sym)
53
+ self.class.send(:remove_const, task_wrapper_class_name.to_sym)
54
+ end
55
+
56
+ begin
57
+ response = http_client(SimpleMapReduce.job_tracker_url).put do |request|
58
+ request.url("/workers/#{reduce_worker_id}")
59
+ request.body = { event: 'ready' }.to_json
60
+ end
61
+ logger.debug(response.body)
62
+ rescue => notify_error
63
+ logger.fatal(notify_error.inspect)
64
+ logger.fatal(notify_error.backtrace.take(50))
65
+ end
66
+
67
+ logger.info('reduce task end')
68
+ end
69
+
70
+ private
71
+
72
+ def s3_client
73
+ SimpleMapReduce::S3Client.instance.client
74
+ end
75
+
76
+ def logger
77
+ SimpleMapReduce.logger
78
+ end
79
+
80
+ HTTP_MSGPACK_HEADER = {
81
+ 'Accept' => 'application/x-msgpack',
82
+ 'Content-Type' => 'application/x-msgpack'
83
+ }.freeze
84
+
85
+ def http_client(url)
86
+ ::Faraday.new(
87
+ url: url,
88
+ headers: HTTP_MSGPACK_HEADER
89
+ ) do |faraday|
90
+ faraday.response :logger
91
+ faraday.response :raise_error
92
+ faraday.adapter Faraday.default_adapter
93
+ end
94
+ end
95
+ end
96
+ end
97
+ end
@@ -0,0 +1,32 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rasteira'
4
+ require 'faraday'
5
+
6
+ module SimpleMapReduce
7
+ class << self
8
+ # see https://github.com/aws/aws-sdk-ruby/blob/v2.10.100/aws-sdk-resources/lib/aws-sdk-resources/services/s3/encryption/client.rb#L182-L219
9
+ # for detail of s3_config
10
+ attr_accessor :s3_config
11
+ attr_accessor :job_tracker_url
12
+ attr_accessor :job_worker_url
13
+ attr_accessor :s3_input_bucket_name
14
+ attr_accessor :s3_output_bucket_name
15
+ attr_accessor :s3_intermediate_bucket_name
16
+ attr_accessor :logger
17
+ end
18
+ end
19
+
20
+ require 'simple_map_reduce/version'
21
+ require 'simple_map_reduce/s3_client'
22
+ require 'simple_map_reduce/driver/config'
23
+ require 'simple_map_reduce/driver/job'
24
+ require 'simple_map_reduce/server/confg'
25
+ require 'simple_map_reduce/server/job'
26
+ require 'simple_map_reduce/server/task'
27
+ require 'simple_map_reduce/server/worker'
28
+ require 'simple_map_reduce/server/job_tracker'
29
+ require 'simple_map_reduce/server/job_worker'
30
+ require 'simple_map_reduce/worker/register_map_task_worker'
31
+ require 'simple_map_reduce/worker/run_map_task_worker'
32
+ require 'simple_map_reduce/worker/run_reduce_task_worker'
@@ -0,0 +1,41 @@
1
+ # frozen_string_literal: true
2
+
3
+ lib = File.expand_path('../lib', __FILE__)
4
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
5
+ require 'simple_map_reduce/version'
6
+
7
+ Gem::Specification.new do |spec|
8
+ spec.name = 'simple_map_reduce'
9
+ spec.version = SimpleMapReduce::VERSION
10
+ spec.authors = ['Kazuhiro Serizawa']
11
+ spec.email = ['nserihiro@gmail.com']
12
+
13
+ spec.summary = 'Simple MapReduce framework'
14
+ spec.description = 'Simple MapReduce framework'
15
+ spec.homepage = 'https://github.com/serihiro/simple_map_reduce'
16
+ spec.license = 'MIT'
17
+
18
+ spec.files = `git ls-files -z`.split("\x0").reject do |f|
19
+ f.match(%r{^(test|spec|features)/})
20
+ end
21
+ spec.bindir = 'exe'
22
+ spec.executables = spec.files.grep(%r(^exe/)) { |f| File.basename(f) }
23
+ spec.require_paths = ['lib']
24
+ spec.required_ruby_version = '>= 2.3.0'
25
+
26
+ spec.add_development_dependency 'bundler', '~> 1.16.0'
27
+ spec.add_development_dependency 'factory_bot', '~> 4.8.0'
28
+ spec.add_development_dependency 'faker'
29
+ spec.add_development_dependency 'rack-test', '~> 0.8.0'
30
+ spec.add_development_dependency 'rake', '~> 10.0'
31
+ spec.add_development_dependency 'rspec', '~> 3.0'
32
+ spec.add_development_dependency 'rubocop', '0.52.1'
33
+ spec.add_runtime_dependency 'aasm', '~> 4.12.0'
34
+ spec.add_runtime_dependency 'aws-sdk', '~> 3.0.0'
35
+ spec.add_runtime_dependency 'faraday', '~> 0.13.0'
36
+ spec.add_runtime_dependency 'msgpack', '~> 1.2.0'
37
+ spec.add_runtime_dependency 'rasteira', '~> 0.1.0'
38
+ spec.add_runtime_dependency 'sinatra', '~> 2.0.0'
39
+ spec.add_runtime_dependency 'sinatra-contrib', '~> 2.0.0'
40
+ spec.add_runtime_dependency 'thor', '~> 0.20.0'
41
+ end
metadata ADDED
@@ -0,0 +1,290 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: simple_map_reduce
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Kazuhiro Serizawa
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2018-01-08 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: 1.16.0
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: 1.16.0
27
+ - !ruby/object:Gem::Dependency
28
+ name: factory_bot
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: 4.8.0
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: 4.8.0
41
+ - !ruby/object:Gem::Dependency
42
+ name: faker
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: rack-test
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: 0.8.0
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: 0.8.0
69
+ - !ruby/object:Gem::Dependency
70
+ name: rake
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: '10.0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: '10.0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: rspec
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: '3.0'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - "~>"
95
+ - !ruby/object:Gem::Version
96
+ version: '3.0'
97
+ - !ruby/object:Gem::Dependency
98
+ name: rubocop
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - '='
102
+ - !ruby/object:Gem::Version
103
+ version: 0.52.1
104
+ type: :development
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - '='
109
+ - !ruby/object:Gem::Version
110
+ version: 0.52.1
111
+ - !ruby/object:Gem::Dependency
112
+ name: aasm
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - "~>"
116
+ - !ruby/object:Gem::Version
117
+ version: 4.12.0
118
+ type: :runtime
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - "~>"
123
+ - !ruby/object:Gem::Version
124
+ version: 4.12.0
125
+ - !ruby/object:Gem::Dependency
126
+ name: aws-sdk
127
+ requirement: !ruby/object:Gem::Requirement
128
+ requirements:
129
+ - - "~>"
130
+ - !ruby/object:Gem::Version
131
+ version: 3.0.0
132
+ type: :runtime
133
+ prerelease: false
134
+ version_requirements: !ruby/object:Gem::Requirement
135
+ requirements:
136
+ - - "~>"
137
+ - !ruby/object:Gem::Version
138
+ version: 3.0.0
139
+ - !ruby/object:Gem::Dependency
140
+ name: faraday
141
+ requirement: !ruby/object:Gem::Requirement
142
+ requirements:
143
+ - - "~>"
144
+ - !ruby/object:Gem::Version
145
+ version: 0.13.0
146
+ type: :runtime
147
+ prerelease: false
148
+ version_requirements: !ruby/object:Gem::Requirement
149
+ requirements:
150
+ - - "~>"
151
+ - !ruby/object:Gem::Version
152
+ version: 0.13.0
153
+ - !ruby/object:Gem::Dependency
154
+ name: msgpack
155
+ requirement: !ruby/object:Gem::Requirement
156
+ requirements:
157
+ - - "~>"
158
+ - !ruby/object:Gem::Version
159
+ version: 1.2.0
160
+ type: :runtime
161
+ prerelease: false
162
+ version_requirements: !ruby/object:Gem::Requirement
163
+ requirements:
164
+ - - "~>"
165
+ - !ruby/object:Gem::Version
166
+ version: 1.2.0
167
+ - !ruby/object:Gem::Dependency
168
+ name: rasteira
169
+ requirement: !ruby/object:Gem::Requirement
170
+ requirements:
171
+ - - "~>"
172
+ - !ruby/object:Gem::Version
173
+ version: 0.1.0
174
+ type: :runtime
175
+ prerelease: false
176
+ version_requirements: !ruby/object:Gem::Requirement
177
+ requirements:
178
+ - - "~>"
179
+ - !ruby/object:Gem::Version
180
+ version: 0.1.0
181
+ - !ruby/object:Gem::Dependency
182
+ name: sinatra
183
+ requirement: !ruby/object:Gem::Requirement
184
+ requirements:
185
+ - - "~>"
186
+ - !ruby/object:Gem::Version
187
+ version: 2.0.0
188
+ type: :runtime
189
+ prerelease: false
190
+ version_requirements: !ruby/object:Gem::Requirement
191
+ requirements:
192
+ - - "~>"
193
+ - !ruby/object:Gem::Version
194
+ version: 2.0.0
195
+ - !ruby/object:Gem::Dependency
196
+ name: sinatra-contrib
197
+ requirement: !ruby/object:Gem::Requirement
198
+ requirements:
199
+ - - "~>"
200
+ - !ruby/object:Gem::Version
201
+ version: 2.0.0
202
+ type: :runtime
203
+ prerelease: false
204
+ version_requirements: !ruby/object:Gem::Requirement
205
+ requirements:
206
+ - - "~>"
207
+ - !ruby/object:Gem::Version
208
+ version: 2.0.0
209
+ - !ruby/object:Gem::Dependency
210
+ name: thor
211
+ requirement: !ruby/object:Gem::Requirement
212
+ requirements:
213
+ - - "~>"
214
+ - !ruby/object:Gem::Version
215
+ version: 0.20.0
216
+ type: :runtime
217
+ prerelease: false
218
+ version_requirements: !ruby/object:Gem::Requirement
219
+ requirements:
220
+ - - "~>"
221
+ - !ruby/object:Gem::Version
222
+ version: 0.20.0
223
+ description: Simple MapReduce framework
224
+ email:
225
+ - nserihiro@gmail.com
226
+ executables:
227
+ - simple_map_reduce
228
+ extensions: []
229
+ extra_rdoc_files: []
230
+ files:
231
+ - ".gitignore"
232
+ - ".rspec"
233
+ - ".rubocop.yml"
234
+ - ".ruby-version"
235
+ - ".travis.yml"
236
+ - CODE_OF_CONDUCT.md
237
+ - Dockerfile
238
+ - Gemfile
239
+ - LICENSE.txt
240
+ - README.md
241
+ - Rakefile
242
+ - bin/console
243
+ - bin/job_tracker
244
+ - bin/job_worker1
245
+ - bin/job_worker2
246
+ - bin/job_worker3
247
+ - bin/register_word_count_job
248
+ - bin/setup
249
+ - docker-compose.yml
250
+ - exe/simple_map_reduce
251
+ - lib/simple_map_reduce.rb
252
+ - lib/simple_map_reduce/driver/config.rb
253
+ - lib/simple_map_reduce/driver/job.rb
254
+ - lib/simple_map_reduce/s3_client.rb
255
+ - lib/simple_map_reduce/server/confg.rb
256
+ - lib/simple_map_reduce/server/job.rb
257
+ - lib/simple_map_reduce/server/job_tracker.rb
258
+ - lib/simple_map_reduce/server/job_worker.rb
259
+ - lib/simple_map_reduce/server/task.rb
260
+ - lib/simple_map_reduce/server/worker.rb
261
+ - lib/simple_map_reduce/version.rb
262
+ - lib/simple_map_reduce/worker/register_map_task_worker.rb
263
+ - lib/simple_map_reduce/worker/run_map_task_worker.rb
264
+ - lib/simple_map_reduce/worker/run_reduce_task_worker.rb
265
+ - simple_map_reduce.gemspec
266
+ homepage: https://github.com/serihiro/simple_map_reduce
267
+ licenses:
268
+ - MIT
269
+ metadata: {}
270
+ post_install_message:
271
+ rdoc_options: []
272
+ require_paths:
273
+ - lib
274
+ required_ruby_version: !ruby/object:Gem::Requirement
275
+ requirements:
276
+ - - ">="
277
+ - !ruby/object:Gem::Version
278
+ version: 2.3.0
279
+ required_rubygems_version: !ruby/object:Gem::Requirement
280
+ requirements:
281
+ - - ">="
282
+ - !ruby/object:Gem::Version
283
+ version: '0'
284
+ requirements: []
285
+ rubyforge_project:
286
+ rubygems_version: 2.7.3
287
+ signing_key:
288
+ specification_version: 4
289
+ summary: Simple MapReduce framework
290
+ test_files: []