bramble 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +14 -9
- data/lib/bramble/base_job.rb +5 -0
- data/lib/bramble/begin_job.rb +13 -0
- data/lib/bramble/keys.rb +16 -2
- data/lib/bramble/map.rb +23 -16
- data/lib/bramble/map_job.rb +3 -3
- data/lib/bramble/reduce.rb +13 -5
- data/lib/bramble/reduce_job.rb +4 -5
- data/lib/bramble/result.rb +46 -0
- data/lib/bramble/serialize.rb +24 -0
- data/lib/bramble/state.rb +56 -0
- data/lib/bramble/storage/memory_storage.rb +4 -0
- data/lib/bramble/storage.rb +8 -44
- data/lib/bramble/version.rb +1 -1
- data/lib/bramble.rb +21 -8
- metadata +6 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c78abeda625df09bdbbfbaa059768757a54380f9
|
4
|
+
data.tar.gz: b641d188443171568abe011b22fc5b3ccf7083e6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3d5e01178d77d2cd06c99634318835143dbdcb3ba203ecc6477cb7bb60d32019521d531660bc1a8f6e7eab258d0d55ec617113ee61e112c3e31a74a9094b655a
|
7
|
+
data.tar.gz: a54391b20995074b7b16382d75823d247f9fb267f27bae2804db4ed5e97c3fc2fbd331cbf666e8a5d42817aafabe8a26c3f194148bf29cc6010876cd2763c4d4
|
data/README.md
CHANGED
@@ -15,10 +15,15 @@ Map-reduce with ActiveJob
|
|
15
15
|
end
|
16
16
|
```
|
17
17
|
|
18
|
-
- Define a module with `map` and `
|
18
|
+
- Define a module with `map`, `reduce` and `items(options = {})` functions:
|
19
19
|
|
20
20
|
```ruby
|
21
21
|
module LetterCount
|
22
|
+
# Generate a list of items based on some input
|
23
|
+
def self.items(filepath)
|
24
|
+
File.read(filepath).split(" ")
|
25
|
+
end
|
26
|
+
|
22
27
|
# .map is called with each item in the input
|
23
28
|
def self.map(word)
|
24
29
|
letters = word.upcase.each_char
|
@@ -38,24 +43,26 @@ Map-reduce with ActiveJob
|
|
38
43
|
end
|
39
44
|
```
|
40
45
|
|
41
|
-
- Start a job with a handle, module, and
|
46
|
+
- Start a job with a handle, module, and an (optional) argument for finding data:
|
42
47
|
|
43
48
|
```ruby
|
44
49
|
# used for fetching the result later:
|
45
50
|
handle = "shakespeare-letter-count"
|
46
51
|
|
47
|
-
#
|
48
|
-
|
52
|
+
# will be sent to `.items(filepath)`
|
53
|
+
hamlet_path = "./shakespeare/hamlet.txt"
|
49
54
|
|
50
55
|
# Begin the process:
|
51
|
-
Bramble.map_reduce(handle, LetterCount,
|
56
|
+
Bramble.map_reduce(handle, LetterCount, hamlet_path)
|
52
57
|
```
|
53
58
|
|
54
59
|
- Later, fetch the result using the handle:
|
55
60
|
|
56
61
|
```ruby
|
57
|
-
result = Bramble.
|
58
|
-
#
|
62
|
+
result = Bramble.get("shakespeare-letter-count")
|
63
|
+
result.running? # => false
|
64
|
+
result.finished? # => true
|
65
|
+
result.data # => { "A" => 100, "B" => 100, ... }
|
59
66
|
```
|
60
67
|
|
61
68
|
- Delete the saved result:
|
@@ -66,8 +73,6 @@ Map-reduce with ActiveJob
|
|
66
73
|
|
67
74
|
## Todo
|
68
75
|
|
69
|
-
- Use `Storage` as gateway to `config.storage`
|
70
|
-
- Job convenience class?
|
71
76
|
- `.fetch` to find-or-calculate?
|
72
77
|
- Adapters: Memcache, ActiveRecord
|
73
78
|
|
@@ -0,0 +1,13 @@
|
|
1
|
+
module Bramble
|
2
|
+
# This exists to call `implementation.items` in the background.
|
3
|
+
# It might take a long time to fetch, so let's background it.
|
4
|
+
#
|
5
|
+
# Then it starts the map-reduce job.
|
6
|
+
class BeginJob < Bramble::BaseJob
|
7
|
+
def perform(handle, implementation_name, items_options)
|
8
|
+
implementation = implementation_name.constantize
|
9
|
+
all_items = implementation.items(items_options)
|
10
|
+
Bramble::Map.perform(handle, implementation, all_items)
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
data/lib/bramble/keys.rb
CHANGED
@@ -1,5 +1,7 @@
|
|
1
1
|
module Bramble
|
2
2
|
module Keys
|
3
|
+
module_function
|
4
|
+
|
3
5
|
def namespace(handle)
|
4
6
|
"#{Bramble.config.namespace}:#{handle}"
|
5
7
|
end
|
@@ -12,8 +14,12 @@ module Bramble
|
|
12
14
|
"#{namespace(handle)}:keys"
|
13
15
|
end
|
14
16
|
|
15
|
-
def
|
16
|
-
"#{namespace(handle)}:
|
17
|
+
def map_finished_count_key(handle)
|
18
|
+
"#{namespace(handle)}:map_finished_count"
|
19
|
+
end
|
20
|
+
|
21
|
+
def reduce_finished_count_key(handle)
|
22
|
+
"#{namespace(handle)}:reduce_finished_count"
|
17
23
|
end
|
18
24
|
|
19
25
|
def total_count_key(handle)
|
@@ -23,5 +29,13 @@ module Bramble
|
|
23
29
|
def result_key(handle)
|
24
30
|
"#{namespace(handle)}:result"
|
25
31
|
end
|
32
|
+
|
33
|
+
def job_id_key(handle)
|
34
|
+
"#{namespace(handle)}:job_id"
|
35
|
+
end
|
36
|
+
|
37
|
+
def status_key(handle)
|
38
|
+
"#{namespace(handle)}:status"
|
39
|
+
end
|
26
40
|
end
|
27
41
|
end
|
data/lib/bramble/map.rb
CHANGED
@@ -5,25 +5,32 @@ module Bramble
|
|
5
5
|
module_function
|
6
6
|
|
7
7
|
def perform(handle, implementation, values)
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
8
|
+
Bramble::State.running?(handle) do
|
9
|
+
storage.set(total_count_key(handle), values.length)
|
10
|
+
values.each do |value|
|
11
|
+
Bramble::MapJob.perform_later(handle, implementation.name, Bramble::Serialize.dump(value))
|
12
|
+
end
|
13
13
|
end
|
14
14
|
end
|
15
15
|
|
16
16
|
def perform_map(handle, implementation, value)
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
Bramble::
|
17
|
+
Bramble::State.running?(handle) do
|
18
|
+
impl_keys_key = keys_key(handle)
|
19
|
+
implementation.map(value) do |map_key, map_val|
|
20
|
+
Bramble::State.running?(handle) do
|
21
|
+
raw_key = Bramble::Serialize.dump(map_key)
|
22
|
+
storage.map_keys_push(impl_keys_key, raw_key)
|
23
|
+
storage.map_result_push(data_key(handle, raw_key), Bramble::Serialize.dump(map_val))
|
24
|
+
end
|
25
|
+
end
|
26
|
+
Bramble::State.running?(handle) do
|
27
|
+
finished = storage.increment(map_finished_count_key(handle))
|
28
|
+
total = storage.get(total_count_key(handle)).to_i
|
29
|
+
if finished == total
|
30
|
+
Bramble::Reduce.perform(handle, implementation)
|
31
|
+
Bramble::State.clear_map(handle)
|
32
|
+
end
|
33
|
+
end
|
27
34
|
end
|
28
35
|
end
|
29
36
|
|
@@ -32,7 +39,7 @@ module Bramble
|
|
32
39
|
module_function
|
33
40
|
|
34
41
|
def storage
|
35
|
-
Bramble
|
42
|
+
Bramble::Storage
|
36
43
|
end
|
37
44
|
end
|
38
45
|
end
|
data/lib/bramble/map_job.rb
CHANGED
@@ -1,8 +1,8 @@
|
|
1
1
|
module Bramble
|
2
|
-
class MapJob <
|
3
|
-
|
4
|
-
def perform(handle, mapper_name, value)
|
2
|
+
class MapJob < Bramble::BaseJob
|
3
|
+
def perform(handle, mapper_name, raw_value)
|
5
4
|
mapper = mapper_name.constantize
|
5
|
+
value = Bramble::Serialize.load(raw_value)
|
6
6
|
Bramble::Map.perform_map(handle, mapper, value)
|
7
7
|
end
|
8
8
|
end
|
data/lib/bramble/reduce.rb
CHANGED
@@ -12,10 +12,18 @@ module Bramble
|
|
12
12
|
end
|
13
13
|
|
14
14
|
def perform_reduce(handle, implementation, raw_key)
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
15
|
+
if Bramble::State.running?(handle)
|
16
|
+
raw_values = storage.map_result_get(data_key(handle, raw_key))
|
17
|
+
values = Bramble::Serialize.load(raw_values)
|
18
|
+
key = Bramble::Serialize.load(raw_key)
|
19
|
+
reduced_value = implementation.reduce(key, values)
|
20
|
+
Bramble::State.running?(handle) do
|
21
|
+
storage.reduce_result_set(result_key(handle), raw_key, Bramble::Serialize.dump(reduced_value))
|
22
|
+
storage.increment(reduce_finished_count_key(handle))
|
23
|
+
end
|
24
|
+
else
|
25
|
+
Bramble::State.clear_reduce(handle)
|
26
|
+
end
|
19
27
|
end
|
20
28
|
|
21
29
|
private
|
@@ -23,7 +31,7 @@ module Bramble
|
|
23
31
|
module_function
|
24
32
|
|
25
33
|
def storage
|
26
|
-
Bramble
|
34
|
+
Bramble::Storage
|
27
35
|
end
|
28
36
|
end
|
29
37
|
end
|
data/lib/bramble/reduce_job.rb
CHANGED
@@ -1,9 +1,8 @@
|
|
1
1
|
module Bramble
|
2
|
-
class ReduceJob <
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
Bramble::Reduce.perform_reduce(handle, reducer, key)
|
2
|
+
class ReduceJob < Bramble::BaseJob
|
3
|
+
def perform(handle, implementation_name, key)
|
4
|
+
implementation = implementation_name.constantize
|
5
|
+
Bramble::Reduce.perform_reduce(handle, implementation, key)
|
7
6
|
end
|
8
7
|
end
|
9
8
|
end
|
@@ -0,0 +1,46 @@
|
|
1
|
+
module Bramble
|
2
|
+
# This class exposes the data and some info about the state of the task
|
3
|
+
class Result
|
4
|
+
|
5
|
+
attr_reader :handle
|
6
|
+
|
7
|
+
def initialize(handle)
|
8
|
+
job_id = storage.get(Bramble::Keys.job_id_key(handle))
|
9
|
+
@handle = "#{handle}:#{job_id}"
|
10
|
+
end
|
11
|
+
|
12
|
+
def data
|
13
|
+
@data ||= begin
|
14
|
+
key = Bramble::Keys.result_key(handle)
|
15
|
+
results = storage.reduce_result_get(key)
|
16
|
+
Bramble::Serialize.load(results)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
def finished?
|
21
|
+
@finished ||= finished_count > 0 && total_count == finished_count
|
22
|
+
end
|
23
|
+
|
24
|
+
def running?
|
25
|
+
@running ||= started? && !finished?
|
26
|
+
end
|
27
|
+
|
28
|
+
private
|
29
|
+
|
30
|
+
def total_count
|
31
|
+
@total_count ||= storage.get(Bramble::Keys.total_count_key(handle)).to_i
|
32
|
+
end
|
33
|
+
|
34
|
+
def finished_count
|
35
|
+
@finished_count ||= storage.get(Bramble::Keys.reduce_finished_count_key(handle)).to_i
|
36
|
+
end
|
37
|
+
|
38
|
+
def started?
|
39
|
+
@started ||= !!storage.get(Bramble::Keys.status_key(handle))
|
40
|
+
end
|
41
|
+
|
42
|
+
def storage
|
43
|
+
Bramble::Storage
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
module Bramble
|
2
|
+
# eg, Redis uses strings only, so use this module to freeze and thaw values from storage
|
3
|
+
module Serialize
|
4
|
+
# prepare an object for storage
|
5
|
+
def self.dump(obj)
|
6
|
+
Marshal.dump(obj)
|
7
|
+
end
|
8
|
+
|
9
|
+
# reload an object from storage
|
10
|
+
def self.load(stored_obj)
|
11
|
+
case stored_obj
|
12
|
+
when Array
|
13
|
+
stored_obj.map { |obj| load(obj) }
|
14
|
+
when Hash
|
15
|
+
stored_obj.inject({}) do |memo, (k, v)|
|
16
|
+
memo[load(k)] = load(v)
|
17
|
+
memo
|
18
|
+
end
|
19
|
+
else
|
20
|
+
Marshal.load(stored_obj)
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,56 @@
|
|
1
|
+
module Bramble
|
2
|
+
module State
|
3
|
+
extend Bramble::Keys
|
4
|
+
SEPARATOR = ":"
|
5
|
+
module_function
|
6
|
+
|
7
|
+
# Run the block and return true if the `job_id` is still active
|
8
|
+
def running?(handle)
|
9
|
+
handle_name, job_id = handle.split(SEPARATOR)
|
10
|
+
is_running = storage.get(job_id_key(handle_name)) == job_id
|
11
|
+
if block_given?
|
12
|
+
yield
|
13
|
+
end
|
14
|
+
is_running
|
15
|
+
end
|
16
|
+
|
17
|
+
def start_job(handle)
|
18
|
+
handle_name, job_id = handle.split(SEPARATOR)
|
19
|
+
previous_job_id = storage.get(job_id_key(handle_name))
|
20
|
+
if previous_job_id
|
21
|
+
clear_job("#{handle_name}:#{previous_job_id}")
|
22
|
+
end
|
23
|
+
storage.set(status_key(handle), "started")
|
24
|
+
storage.set(job_id_key(handle_name), job_id)
|
25
|
+
end
|
26
|
+
|
27
|
+
def clear_job(handle)
|
28
|
+
handle_name, job_id = handle.split(SEPARATOR)
|
29
|
+
storage.delete(job_id_key(handle_name))
|
30
|
+
storage.delete(status_key(handle))
|
31
|
+
clear_reduce(handle)
|
32
|
+
clear_map(handle)
|
33
|
+
end
|
34
|
+
|
35
|
+
def clear_map(handle)
|
36
|
+
map_group_keys = storage.map_keys_get(keys_key(handle))
|
37
|
+
map_group_keys.each do |group_key|
|
38
|
+
storage.delete(data_key(handle, group_key))
|
39
|
+
end
|
40
|
+
storage.delete(keys_key(handle))
|
41
|
+
storage.delete(map_finished_count_key(handle))
|
42
|
+
end
|
43
|
+
|
44
|
+
def clear_reduce(handle)
|
45
|
+
storage.delete(total_count_key(handle))
|
46
|
+
storage.delete(reduce_finished_count_key(handle))
|
47
|
+
storage.delete(result_key(handle))
|
48
|
+
end
|
49
|
+
|
50
|
+
private
|
51
|
+
|
52
|
+
def self.storage
|
53
|
+
Bramble::Storage
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
data/lib/bramble/storage.rb
CHANGED
@@ -1,56 +1,20 @@
|
|
1
|
+
require "forwardable"
|
1
2
|
require "bramble/storage/redis_storage"
|
2
3
|
require "bramble/storage/memory_storage"
|
3
4
|
|
4
5
|
module Bramble
|
5
6
|
module Storage
|
6
|
-
extend
|
7
|
+
extend SingleForwardable
|
7
8
|
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
# Wipe out the results for this handle
|
15
|
-
def self.delete(handle)
|
16
|
-
# Reset counts
|
17
|
-
storage.delete(total_count_key(handle))
|
18
|
-
storage.delete(finished_count_key(handle))
|
19
|
-
# Reset result
|
20
|
-
storage.delete(result_key(handle))
|
21
|
-
|
22
|
-
# Reset dangling map data
|
23
|
-
map_group_keys = storage.map_keys_get(keys_key(handle))
|
24
|
-
map_group_keys.each do |group_key|
|
25
|
-
storage.delete(data_key(handle, group_key))
|
26
|
-
end
|
27
|
-
storage.delete(keys_key(handle))
|
28
|
-
end
|
29
|
-
|
30
|
-
|
31
|
-
# prepare an object for storage
|
32
|
-
def self.dump(obj)
|
33
|
-
Marshal.dump(obj)
|
34
|
-
end
|
35
|
-
|
36
|
-
# reload an object from storage
|
37
|
-
def self.load(stored_obj)
|
38
|
-
case stored_obj
|
39
|
-
when Array
|
40
|
-
stored_obj.map { |obj| load(obj) }
|
41
|
-
when Hash
|
42
|
-
stored_obj.inject({}) do |memo, (k, v)|
|
43
|
-
memo[load(k)] = load(v)
|
44
|
-
memo
|
45
|
-
end
|
46
|
-
else
|
47
|
-
Marshal.load(stored_obj)
|
48
|
-
end
|
49
|
-
end
|
9
|
+
def_delegators :storage_instance,
|
10
|
+
:set, :get, :delete, :increment,
|
11
|
+
:map_result_push, :map_result_get,
|
12
|
+
:reduce_result_set, :reduce_result_get,
|
13
|
+
:map_keys_push, :map_keys_get
|
50
14
|
|
51
15
|
private
|
52
16
|
|
53
|
-
def self.
|
17
|
+
def self.storage_instance
|
54
18
|
Bramble.config.storage
|
55
19
|
end
|
56
20
|
end
|
data/lib/bramble/version.rb
CHANGED
data/lib/bramble.rb
CHANGED
@@ -1,10 +1,15 @@
|
|
1
1
|
require "ostruct"
|
2
2
|
require "active_job"
|
3
|
+
require "bramble/base_job"
|
4
|
+
require "bramble/begin_job"
|
3
5
|
require "bramble/keys"
|
4
6
|
require "bramble/map"
|
5
7
|
require "bramble/map_job"
|
6
8
|
require "bramble/reduce"
|
7
9
|
require "bramble/reduce_job"
|
10
|
+
require "bramble/result"
|
11
|
+
require "bramble/serialize"
|
12
|
+
require "bramble/state"
|
8
13
|
require "bramble/storage"
|
9
14
|
require "bramble/version"
|
10
15
|
require "bramble/conf"
|
@@ -19,19 +24,27 @@ module Bramble
|
|
19
24
|
end
|
20
25
|
|
21
26
|
# @param handle [String] This string will be used to store the result
|
22
|
-
# @param implementation [
|
23
|
-
# @param
|
24
|
-
def self.map_reduce(handle, implementation,
|
25
|
-
|
27
|
+
# @param implementation [.map, .reduce, .name, .items(options)] The container of map and reduce methods
|
28
|
+
# @param items_options [Object] will be passed to .items
|
29
|
+
def self.map_reduce(handle, implementation, items_options = {})
|
30
|
+
# Secret feature: the implementation can provide a job_id
|
31
|
+
job_id = if implementation.respond_to?(:job_id)
|
32
|
+
implementation.job_id
|
33
|
+
else
|
34
|
+
Time.now.strftime("%s%6N")
|
35
|
+
end
|
36
|
+
handle = "#{handle}:#{job_id}"
|
37
|
+
Bramble::State.start_job(handle)
|
38
|
+
Bramble::BeginJob.perform_later(handle, implementation.name, items_options)
|
26
39
|
end
|
27
40
|
|
28
|
-
#
|
29
|
-
def self.
|
30
|
-
Bramble::
|
41
|
+
# @return [Bramble::Result] Status & data for this handle
|
42
|
+
def self.get(handle)
|
43
|
+
Bramble::Result.new(handle)
|
31
44
|
end
|
32
45
|
|
33
46
|
# Remove results for `handle`, if there are any
|
34
47
|
def self.delete(handle)
|
35
|
-
Bramble::
|
48
|
+
Bramble::State.clear_job(handle)
|
36
49
|
end
|
37
50
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bramble
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Robert Mosolgo
|
@@ -103,12 +103,17 @@ extra_rdoc_files: []
|
|
103
103
|
files:
|
104
104
|
- README.md
|
105
105
|
- lib/bramble.rb
|
106
|
+
- lib/bramble/base_job.rb
|
107
|
+
- lib/bramble/begin_job.rb
|
106
108
|
- lib/bramble/conf.rb
|
107
109
|
- lib/bramble/keys.rb
|
108
110
|
- lib/bramble/map.rb
|
109
111
|
- lib/bramble/map_job.rb
|
110
112
|
- lib/bramble/reduce.rb
|
111
113
|
- lib/bramble/reduce_job.rb
|
114
|
+
- lib/bramble/result.rb
|
115
|
+
- lib/bramble/serialize.rb
|
116
|
+
- lib/bramble/state.rb
|
112
117
|
- lib/bramble/storage.rb
|
113
118
|
- lib/bramble/storage/memory_storage.rb
|
114
119
|
- lib/bramble/storage/redis_storage.rb
|