bramble 0.1.0 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +14 -9
- data/lib/bramble/base_job.rb +5 -0
- data/lib/bramble/begin_job.rb +13 -0
- data/lib/bramble/keys.rb +16 -2
- data/lib/bramble/map.rb +23 -16
- data/lib/bramble/map_job.rb +3 -3
- data/lib/bramble/reduce.rb +13 -5
- data/lib/bramble/reduce_job.rb +4 -5
- data/lib/bramble/result.rb +46 -0
- data/lib/bramble/serialize.rb +24 -0
- data/lib/bramble/state.rb +56 -0
- data/lib/bramble/storage/memory_storage.rb +4 -0
- data/lib/bramble/storage.rb +8 -44
- data/lib/bramble/version.rb +1 -1
- data/lib/bramble.rb +21 -8
- metadata +6 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c78abeda625df09bdbbfbaa059768757a54380f9
|
4
|
+
data.tar.gz: b641d188443171568abe011b22fc5b3ccf7083e6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3d5e01178d77d2cd06c99634318835143dbdcb3ba203ecc6477cb7bb60d32019521d531660bc1a8f6e7eab258d0d55ec617113ee61e112c3e31a74a9094b655a
|
7
|
+
data.tar.gz: a54391b20995074b7b16382d75823d247f9fb267f27bae2804db4ed5e97c3fc2fbd331cbf666e8a5d42817aafabe8a26c3f194148bf29cc6010876cd2763c4d4
|
data/README.md
CHANGED
@@ -15,10 +15,15 @@ Map-reduce with ActiveJob
|
|
15
15
|
end
|
16
16
|
```
|
17
17
|
|
18
|
-
- Define a module with `map` and `
|
18
|
+
- Define a module with `map`, `reduce` and `items(options = {})` functions:
|
19
19
|
|
20
20
|
```ruby
|
21
21
|
module LetterCount
|
22
|
+
# Generate a list of items based on some input
|
23
|
+
def self.items(filepath)
|
24
|
+
File.read(filepath).split(" ")
|
25
|
+
end
|
26
|
+
|
22
27
|
# .map is called with each item in the input
|
23
28
|
def self.map(word)
|
24
29
|
letters = word.upcase.each_char
|
@@ -38,24 +43,26 @@ Map-reduce with ActiveJob
|
|
38
43
|
end
|
39
44
|
```
|
40
45
|
|
41
|
-
- Start a job with a handle, module, and
|
46
|
+
- Start a job with a handle, module, and an (optional) argument for finding data:
|
42
47
|
|
43
48
|
```ruby
|
44
49
|
# used for fetching the result later:
|
45
50
|
handle = "shakespeare-letter-count"
|
46
51
|
|
47
|
-
#
|
48
|
-
|
52
|
+
# will be sent to `.items(filepath)`
|
53
|
+
hamlet_path = "./shakespeare/hamlet.txt"
|
49
54
|
|
50
55
|
# Begin the process:
|
51
|
-
Bramble.map_reduce(handle, LetterCount,
|
56
|
+
Bramble.map_reduce(handle, LetterCount, hamlet_path)
|
52
57
|
```
|
53
58
|
|
54
59
|
- Later, fetch the result using the handle:
|
55
60
|
|
56
61
|
```ruby
|
57
|
-
result = Bramble.
|
58
|
-
#
|
62
|
+
result = Bramble.get("shakespeare-letter-count")
|
63
|
+
result.running? # => false
|
64
|
+
result.finished? # => true
|
65
|
+
result.data # => { "A" => 100, "B" => 100, ... }
|
59
66
|
```
|
60
67
|
|
61
68
|
- Delete the saved result:
|
@@ -66,8 +73,6 @@ Map-reduce with ActiveJob
|
|
66
73
|
|
67
74
|
## Todo
|
68
75
|
|
69
|
-
- Use `Storage` as gateway to `config.storage`
|
70
|
-
- Job convenience class?
|
71
76
|
- `.fetch` to find-or-calculate?
|
72
77
|
- Adapters: Memcache, ActiveRecord
|
73
78
|
|
@@ -0,0 +1,13 @@
|
|
1
|
+
module Bramble
|
2
|
+
# This exists to call `implementation.items` in the background.
|
3
|
+
# It might take a long time to fetch, so let's background it.
|
4
|
+
#
|
5
|
+
# Then it starts the map-reduce job.
|
6
|
+
class BeginJob < Bramble::BaseJob
|
7
|
+
def perform(handle, implementation_name, items_options)
|
8
|
+
implementation = implementation_name.constantize
|
9
|
+
all_items = implementation.items(items_options)
|
10
|
+
Bramble::Map.perform(handle, implementation, all_items)
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
data/lib/bramble/keys.rb
CHANGED
@@ -1,5 +1,7 @@
|
|
1
1
|
module Bramble
|
2
2
|
module Keys
|
3
|
+
module_function
|
4
|
+
|
3
5
|
def namespace(handle)
|
4
6
|
"#{Bramble.config.namespace}:#{handle}"
|
5
7
|
end
|
@@ -12,8 +14,12 @@ module Bramble
|
|
12
14
|
"#{namespace(handle)}:keys"
|
13
15
|
end
|
14
16
|
|
15
|
-
def
|
16
|
-
"#{namespace(handle)}:
|
17
|
+
def map_finished_count_key(handle)
|
18
|
+
"#{namespace(handle)}:map_finished_count"
|
19
|
+
end
|
20
|
+
|
21
|
+
def reduce_finished_count_key(handle)
|
22
|
+
"#{namespace(handle)}:reduce_finished_count"
|
17
23
|
end
|
18
24
|
|
19
25
|
def total_count_key(handle)
|
@@ -23,5 +29,13 @@ module Bramble
|
|
23
29
|
def result_key(handle)
|
24
30
|
"#{namespace(handle)}:result"
|
25
31
|
end
|
32
|
+
|
33
|
+
def job_id_key(handle)
|
34
|
+
"#{namespace(handle)}:job_id"
|
35
|
+
end
|
36
|
+
|
37
|
+
def status_key(handle)
|
38
|
+
"#{namespace(handle)}:status"
|
39
|
+
end
|
26
40
|
end
|
27
41
|
end
|
data/lib/bramble/map.rb
CHANGED
@@ -5,25 +5,32 @@ module Bramble
|
|
5
5
|
module_function
|
6
6
|
|
7
7
|
def perform(handle, implementation, values)
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
8
|
+
Bramble::State.running?(handle) do
|
9
|
+
storage.set(total_count_key(handle), values.length)
|
10
|
+
values.each do |value|
|
11
|
+
Bramble::MapJob.perform_later(handle, implementation.name, Bramble::Serialize.dump(value))
|
12
|
+
end
|
13
13
|
end
|
14
14
|
end
|
15
15
|
|
16
16
|
def perform_map(handle, implementation, value)
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
Bramble::
|
17
|
+
Bramble::State.running?(handle) do
|
18
|
+
impl_keys_key = keys_key(handle)
|
19
|
+
implementation.map(value) do |map_key, map_val|
|
20
|
+
Bramble::State.running?(handle) do
|
21
|
+
raw_key = Bramble::Serialize.dump(map_key)
|
22
|
+
storage.map_keys_push(impl_keys_key, raw_key)
|
23
|
+
storage.map_result_push(data_key(handle, raw_key), Bramble::Serialize.dump(map_val))
|
24
|
+
end
|
25
|
+
end
|
26
|
+
Bramble::State.running?(handle) do
|
27
|
+
finished = storage.increment(map_finished_count_key(handle))
|
28
|
+
total = storage.get(total_count_key(handle)).to_i
|
29
|
+
if finished == total
|
30
|
+
Bramble::Reduce.perform(handle, implementation)
|
31
|
+
Bramble::State.clear_map(handle)
|
32
|
+
end
|
33
|
+
end
|
27
34
|
end
|
28
35
|
end
|
29
36
|
|
@@ -32,7 +39,7 @@ module Bramble
|
|
32
39
|
module_function
|
33
40
|
|
34
41
|
def storage
|
35
|
-
Bramble
|
42
|
+
Bramble::Storage
|
36
43
|
end
|
37
44
|
end
|
38
45
|
end
|
data/lib/bramble/map_job.rb
CHANGED
@@ -1,8 +1,8 @@
|
|
1
1
|
module Bramble
|
2
|
-
class MapJob <
|
3
|
-
|
4
|
-
def perform(handle, mapper_name, value)
|
2
|
+
class MapJob < Bramble::BaseJob
|
3
|
+
def perform(handle, mapper_name, raw_value)
|
5
4
|
mapper = mapper_name.constantize
|
5
|
+
value = Bramble::Serialize.load(raw_value)
|
6
6
|
Bramble::Map.perform_map(handle, mapper, value)
|
7
7
|
end
|
8
8
|
end
|
data/lib/bramble/reduce.rb
CHANGED
@@ -12,10 +12,18 @@ module Bramble
|
|
12
12
|
end
|
13
13
|
|
14
14
|
def perform_reduce(handle, implementation, raw_key)
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
15
|
+
if Bramble::State.running?(handle)
|
16
|
+
raw_values = storage.map_result_get(data_key(handle, raw_key))
|
17
|
+
values = Bramble::Serialize.load(raw_values)
|
18
|
+
key = Bramble::Serialize.load(raw_key)
|
19
|
+
reduced_value = implementation.reduce(key, values)
|
20
|
+
Bramble::State.running?(handle) do
|
21
|
+
storage.reduce_result_set(result_key(handle), raw_key, Bramble::Serialize.dump(reduced_value))
|
22
|
+
storage.increment(reduce_finished_count_key(handle))
|
23
|
+
end
|
24
|
+
else
|
25
|
+
Bramble::State.clear_reduce(handle)
|
26
|
+
end
|
19
27
|
end
|
20
28
|
|
21
29
|
private
|
@@ -23,7 +31,7 @@ module Bramble
|
|
23
31
|
module_function
|
24
32
|
|
25
33
|
def storage
|
26
|
-
Bramble
|
34
|
+
Bramble::Storage
|
27
35
|
end
|
28
36
|
end
|
29
37
|
end
|
data/lib/bramble/reduce_job.rb
CHANGED
@@ -1,9 +1,8 @@
|
|
1
1
|
module Bramble
|
2
|
-
class ReduceJob <
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
Bramble::Reduce.perform_reduce(handle, reducer, key)
|
2
|
+
class ReduceJob < Bramble::BaseJob
|
3
|
+
def perform(handle, implementation_name, key)
|
4
|
+
implementation = implementation_name.constantize
|
5
|
+
Bramble::Reduce.perform_reduce(handle, implementation, key)
|
7
6
|
end
|
8
7
|
end
|
9
8
|
end
|
@@ -0,0 +1,46 @@
|
|
1
|
+
module Bramble
|
2
|
+
# This class exposes the data and some info about the state of the task
|
3
|
+
class Result
|
4
|
+
|
5
|
+
attr_reader :handle
|
6
|
+
|
7
|
+
def initialize(handle)
|
8
|
+
job_id = storage.get(Bramble::Keys.job_id_key(handle))
|
9
|
+
@handle = "#{handle}:#{job_id}"
|
10
|
+
end
|
11
|
+
|
12
|
+
def data
|
13
|
+
@data ||= begin
|
14
|
+
key = Bramble::Keys.result_key(handle)
|
15
|
+
results = storage.reduce_result_get(key)
|
16
|
+
Bramble::Serialize.load(results)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
def finished?
|
21
|
+
@finished ||= finished_count > 0 && total_count == finished_count
|
22
|
+
end
|
23
|
+
|
24
|
+
def running?
|
25
|
+
@running ||= started? && !finished?
|
26
|
+
end
|
27
|
+
|
28
|
+
private
|
29
|
+
|
30
|
+
def total_count
|
31
|
+
@total_count ||= storage.get(Bramble::Keys.total_count_key(handle)).to_i
|
32
|
+
end
|
33
|
+
|
34
|
+
def finished_count
|
35
|
+
@finished_count ||= storage.get(Bramble::Keys.reduce_finished_count_key(handle)).to_i
|
36
|
+
end
|
37
|
+
|
38
|
+
def started?
|
39
|
+
@started ||= !!storage.get(Bramble::Keys.status_key(handle))
|
40
|
+
end
|
41
|
+
|
42
|
+
def storage
|
43
|
+
Bramble::Storage
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
module Bramble
|
2
|
+
# eg, Redis uses strings only, so use this module to freeze and thaw values from storage
|
3
|
+
module Serialize
|
4
|
+
# prepare an object for storage
|
5
|
+
def self.dump(obj)
|
6
|
+
Marshal.dump(obj)
|
7
|
+
end
|
8
|
+
|
9
|
+
# reload an object from storage
|
10
|
+
def self.load(stored_obj)
|
11
|
+
case stored_obj
|
12
|
+
when Array
|
13
|
+
stored_obj.map { |obj| load(obj) }
|
14
|
+
when Hash
|
15
|
+
stored_obj.inject({}) do |memo, (k, v)|
|
16
|
+
memo[load(k)] = load(v)
|
17
|
+
memo
|
18
|
+
end
|
19
|
+
else
|
20
|
+
Marshal.load(stored_obj)
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,56 @@
|
|
1
|
+
module Bramble
|
2
|
+
module State
|
3
|
+
extend Bramble::Keys
|
4
|
+
SEPARATOR = ":"
|
5
|
+
module_function
|
6
|
+
|
7
|
+
# Run the block and return true if the `job_id` is still active
|
8
|
+
def running?(handle)
|
9
|
+
handle_name, job_id = handle.split(SEPARATOR)
|
10
|
+
is_running = storage.get(job_id_key(handle_name)) == job_id
|
11
|
+
if block_given?
|
12
|
+
yield
|
13
|
+
end
|
14
|
+
is_running
|
15
|
+
end
|
16
|
+
|
17
|
+
def start_job(handle)
|
18
|
+
handle_name, job_id = handle.split(SEPARATOR)
|
19
|
+
previous_job_id = storage.get(job_id_key(handle_name))
|
20
|
+
if previous_job_id
|
21
|
+
clear_job("#{handle_name}:#{previous_job_id}")
|
22
|
+
end
|
23
|
+
storage.set(status_key(handle), "started")
|
24
|
+
storage.set(job_id_key(handle_name), job_id)
|
25
|
+
end
|
26
|
+
|
27
|
+
def clear_job(handle)
|
28
|
+
handle_name, job_id = handle.split(SEPARATOR)
|
29
|
+
storage.delete(job_id_key(handle_name))
|
30
|
+
storage.delete(status_key(handle))
|
31
|
+
clear_reduce(handle)
|
32
|
+
clear_map(handle)
|
33
|
+
end
|
34
|
+
|
35
|
+
def clear_map(handle)
|
36
|
+
map_group_keys = storage.map_keys_get(keys_key(handle))
|
37
|
+
map_group_keys.each do |group_key|
|
38
|
+
storage.delete(data_key(handle, group_key))
|
39
|
+
end
|
40
|
+
storage.delete(keys_key(handle))
|
41
|
+
storage.delete(map_finished_count_key(handle))
|
42
|
+
end
|
43
|
+
|
44
|
+
def clear_reduce(handle)
|
45
|
+
storage.delete(total_count_key(handle))
|
46
|
+
storage.delete(reduce_finished_count_key(handle))
|
47
|
+
storage.delete(result_key(handle))
|
48
|
+
end
|
49
|
+
|
50
|
+
private
|
51
|
+
|
52
|
+
def self.storage
|
53
|
+
Bramble::Storage
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
data/lib/bramble/storage.rb
CHANGED
@@ -1,56 +1,20 @@
|
|
1
|
+
require "forwardable"
|
1
2
|
require "bramble/storage/redis_storage"
|
2
3
|
require "bramble/storage/memory_storage"
|
3
4
|
|
4
5
|
module Bramble
|
5
6
|
module Storage
|
6
|
-
extend
|
7
|
+
extend SingleForwardable
|
7
8
|
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
# Wipe out the results for this handle
|
15
|
-
def self.delete(handle)
|
16
|
-
# Reset counts
|
17
|
-
storage.delete(total_count_key(handle))
|
18
|
-
storage.delete(finished_count_key(handle))
|
19
|
-
# Reset result
|
20
|
-
storage.delete(result_key(handle))
|
21
|
-
|
22
|
-
# Reset dangling map data
|
23
|
-
map_group_keys = storage.map_keys_get(keys_key(handle))
|
24
|
-
map_group_keys.each do |group_key|
|
25
|
-
storage.delete(data_key(handle, group_key))
|
26
|
-
end
|
27
|
-
storage.delete(keys_key(handle))
|
28
|
-
end
|
29
|
-
|
30
|
-
|
31
|
-
# prepare an object for storage
|
32
|
-
def self.dump(obj)
|
33
|
-
Marshal.dump(obj)
|
34
|
-
end
|
35
|
-
|
36
|
-
# reload an object from storage
|
37
|
-
def self.load(stored_obj)
|
38
|
-
case stored_obj
|
39
|
-
when Array
|
40
|
-
stored_obj.map { |obj| load(obj) }
|
41
|
-
when Hash
|
42
|
-
stored_obj.inject({}) do |memo, (k, v)|
|
43
|
-
memo[load(k)] = load(v)
|
44
|
-
memo
|
45
|
-
end
|
46
|
-
else
|
47
|
-
Marshal.load(stored_obj)
|
48
|
-
end
|
49
|
-
end
|
9
|
+
def_delegators :storage_instance,
|
10
|
+
:set, :get, :delete, :increment,
|
11
|
+
:map_result_push, :map_result_get,
|
12
|
+
:reduce_result_set, :reduce_result_get,
|
13
|
+
:map_keys_push, :map_keys_get
|
50
14
|
|
51
15
|
private
|
52
16
|
|
53
|
-
def self.
|
17
|
+
def self.storage_instance
|
54
18
|
Bramble.config.storage
|
55
19
|
end
|
56
20
|
end
|
data/lib/bramble/version.rb
CHANGED
data/lib/bramble.rb
CHANGED
@@ -1,10 +1,15 @@
|
|
1
1
|
require "ostruct"
|
2
2
|
require "active_job"
|
3
|
+
require "bramble/base_job"
|
4
|
+
require "bramble/begin_job"
|
3
5
|
require "bramble/keys"
|
4
6
|
require "bramble/map"
|
5
7
|
require "bramble/map_job"
|
6
8
|
require "bramble/reduce"
|
7
9
|
require "bramble/reduce_job"
|
10
|
+
require "bramble/result"
|
11
|
+
require "bramble/serialize"
|
12
|
+
require "bramble/state"
|
8
13
|
require "bramble/storage"
|
9
14
|
require "bramble/version"
|
10
15
|
require "bramble/conf"
|
@@ -19,19 +24,27 @@ module Bramble
|
|
19
24
|
end
|
20
25
|
|
21
26
|
# @param handle [String] This string will be used to store the result
|
22
|
-
# @param implementation [
|
23
|
-
# @param
|
24
|
-
def self.map_reduce(handle, implementation,
|
25
|
-
|
27
|
+
# @param implementation [.map, .reduce, .name, .items(options)] The container of map and reduce methods
|
28
|
+
# @param items_options [Object] will be passed to .items
|
29
|
+
def self.map_reduce(handle, implementation, items_options = {})
|
30
|
+
# Secret feature: the implementation can provide a job_id
|
31
|
+
job_id = if implementation.respond_to?(:job_id)
|
32
|
+
implementation.job_id
|
33
|
+
else
|
34
|
+
Time.now.strftime("%s%6N")
|
35
|
+
end
|
36
|
+
handle = "#{handle}:#{job_id}"
|
37
|
+
Bramble::State.start_job(handle)
|
38
|
+
Bramble::BeginJob.perform_later(handle, implementation.name, items_options)
|
26
39
|
end
|
27
40
|
|
28
|
-
#
|
29
|
-
def self.
|
30
|
-
Bramble::
|
41
|
+
# @return [Bramble::Result] Status & data for this handle
|
42
|
+
def self.get(handle)
|
43
|
+
Bramble::Result.new(handle)
|
31
44
|
end
|
32
45
|
|
33
46
|
# Remove results for `handle`, if there are any
|
34
47
|
def self.delete(handle)
|
35
|
-
Bramble::
|
48
|
+
Bramble::State.clear_job(handle)
|
36
49
|
end
|
37
50
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bramble
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Robert Mosolgo
|
@@ -103,12 +103,17 @@ extra_rdoc_files: []
|
|
103
103
|
files:
|
104
104
|
- README.md
|
105
105
|
- lib/bramble.rb
|
106
|
+
- lib/bramble/base_job.rb
|
107
|
+
- lib/bramble/begin_job.rb
|
106
108
|
- lib/bramble/conf.rb
|
107
109
|
- lib/bramble/keys.rb
|
108
110
|
- lib/bramble/map.rb
|
109
111
|
- lib/bramble/map_job.rb
|
110
112
|
- lib/bramble/reduce.rb
|
111
113
|
- lib/bramble/reduce_job.rb
|
114
|
+
- lib/bramble/result.rb
|
115
|
+
- lib/bramble/serialize.rb
|
116
|
+
- lib/bramble/state.rb
|
112
117
|
- lib/bramble/storage.rb
|
113
118
|
- lib/bramble/storage/memory_storage.rb
|
114
119
|
- lib/bramble/storage/redis_storage.rb
|