bramble 0.4.0 → 0.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +12 -1
- data/lib/bramble.rb +1 -0
- data/lib/bramble/base_job.rb +2 -0
- data/lib/bramble/begin_job.rb +1 -0
- data/lib/bramble/error_handling.rb +11 -0
- data/lib/bramble/keys.rb +1 -0
- data/lib/bramble/map.rb +14 -6
- data/lib/bramble/map_job.rb +1 -0
- data/lib/bramble/reduce.rb +16 -2
- data/lib/bramble/reduce_job.rb +1 -0
- data/lib/bramble/result.rb +9 -0
- data/lib/bramble/state.rb +7 -0
- data/lib/bramble/storage.rb +2 -0
- data/lib/bramble/version.rb +1 -1
- metadata +7 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 61f945ee023bed4fa6464c90b8568060a1768824
|
4
|
+
data.tar.gz: 3d2b138f12b2fe5421914732cbbaf04b94a8b40a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 151e9f40aec216edbadbec3ea225633957c4f294a145ced9fd2c90ac70ba02f52d2014eb09f83011b29eba0224ad0085a2a059a3b297d460ea102325f4068cca
|
7
|
+
data.tar.gz: a13af709e45714aba91eee42ab8a6600f02c4c76574f39b7df9e5591fcf6c6f2234883a68eff619bd22a6e60bd9b4294aebc10cf9366930c750b53e151b81b9c
|
data/README.md
CHANGED
@@ -44,6 +44,16 @@ We have some staff-only views that expose stats about how people use our app. Ev
|
|
44
44
|
# observations => [1, 1, 1, 1, 1]
|
45
45
|
observations.length
|
46
46
|
end
|
47
|
+
|
48
|
+
# If a .map or .reduce hits an error,
|
49
|
+
# it will be rescued and passed here.
|
50
|
+
# To cause the job to fail, raise it again.
|
51
|
+
# Otherwise, let it pass
|
52
|
+
def self.on_error(err)
|
53
|
+
Bugsnag.notify(err)
|
54
|
+
# Or, to trigger a faiure:
|
55
|
+
# raise(err)
|
56
|
+
end
|
47
57
|
end
|
48
58
|
```
|
49
59
|
|
@@ -83,7 +93,8 @@ We have some staff-only views that expose stats about how people use our app. Ev
|
|
83
93
|
|
84
94
|
## Todo
|
85
95
|
|
86
|
-
-
|
96
|
+
- Write some more adapters: Memcached, ActiveRecord
|
97
|
+
- Warn if no storage is configured
|
87
98
|
- Do we have atomicity issues? Occasional test failures
|
88
99
|
- Consolidate storage in Redis to a single key? (Could some keys be evicted while others remain?)
|
89
100
|
|
data/lib/bramble.rb
CHANGED
data/lib/bramble/base_job.rb
CHANGED
data/lib/bramble/begin_job.rb
CHANGED
@@ -4,6 +4,7 @@ module Bramble
|
|
4
4
|
#
|
5
5
|
# Then it starts the map-reduce job.
|
6
6
|
class BeginJob < Bramble::BaseJob
|
7
|
+
# Call the defined `.items(options)` function
|
7
8
|
def perform(handle, implementation_name, items_options)
|
8
9
|
implementation = implementation_name.constantize
|
9
10
|
all_items = implementation.items(items_options)
|
@@ -0,0 +1,11 @@
|
|
1
|
+
module Bramble
|
2
|
+
module ErrorHandling
|
3
|
+
# If an error is raised during the block,
|
4
|
+
# pass it to the implementation's `on_error` function.
|
5
|
+
def self.rescuing(implementation)
|
6
|
+
yield
|
7
|
+
rescue StandardError => err
|
8
|
+
implementation.on_error(err)
|
9
|
+
end
|
10
|
+
end
|
11
|
+
end
|
data/lib/bramble/keys.rb
CHANGED
data/lib/bramble/map.rb
CHANGED
@@ -4,6 +4,7 @@ module Bramble
|
|
4
4
|
|
5
5
|
module_function
|
6
6
|
|
7
|
+
# For each of `values`, queue up a job to call the map function
|
7
8
|
def perform(handle, implementation, values)
|
8
9
|
Bramble::State.running?(handle) do
|
9
10
|
storage.set(map_total_count_key(handle), values.length)
|
@@ -13,18 +14,25 @@ module Bramble
|
|
13
14
|
end
|
14
15
|
end
|
15
16
|
|
17
|
+
# Call `.map` on `value`, storing the result for `.reduce` and handling any error.
|
16
18
|
def perform_map(handle, implementation, value)
|
17
19
|
Bramble::State.running?(handle) do
|
18
20
|
impl_keys_key = keys_key(handle)
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
21
|
+
|
22
|
+
Bramble::ErrorHandling.rescuing(implementation) do
|
23
|
+
# Execute the provided map function
|
24
|
+
implementation.map(value) do |map_key, map_val|
|
25
|
+
Bramble::State.running?(handle) do
|
26
|
+
raw_key = Bramble::Serialize.dump(map_key)
|
27
|
+
raw_value = Bramble::Serialize.dump(map_val)
|
28
|
+
# Push the result to be reduced
|
29
|
+
storage.map_keys_push(impl_keys_key, raw_key)
|
30
|
+
storage.map_result_push(data_key(handle, raw_key), raw_value)
|
31
|
+
end
|
25
32
|
end
|
26
33
|
end
|
27
34
|
|
35
|
+
# Mark this item as mapped (even if there was an error)
|
28
36
|
Bramble::State.running?(handle) do
|
29
37
|
finished = storage.increment(map_finished_count_key(handle))
|
30
38
|
total = storage.get(map_total_count_key(handle)).to_i
|
data/lib/bramble/map_job.rb
CHANGED
data/lib/bramble/reduce.rb
CHANGED
@@ -4,24 +4,38 @@ module Bramble
|
|
4
4
|
|
5
5
|
module_function
|
6
6
|
|
7
|
+
# Queue up a job to call `.reduce` on keys for `handle`
|
7
8
|
def perform(handle, implementation)
|
8
9
|
Bramble::State.running?(handle) do
|
10
|
+
# Set how many reduce call we expect
|
9
11
|
all_raw_keys = storage.map_keys_get(keys_key(handle))
|
10
12
|
storage.set(reduce_total_count_key(handle), all_raw_keys.length)
|
13
|
+
# Enqueue a job for each reduce call
|
11
14
|
all_raw_keys.each do |raw_key|
|
12
15
|
Bramble::ReduceJob.perform_later(handle, implementation.name, raw_key)
|
13
16
|
end
|
14
17
|
end
|
15
18
|
end
|
16
19
|
|
20
|
+
# Perform `.reduce` on `raw_key`, handling errors and saving the result
|
17
21
|
def perform_reduce(handle, implementation, raw_key)
|
18
22
|
if Bramble::State.running?(handle)
|
19
23
|
raw_values = storage.map_result_get(data_key(handle, raw_key))
|
20
24
|
values = Bramble::Serialize.load(raw_values)
|
21
25
|
key = Bramble::Serialize.load(raw_key)
|
22
|
-
reduced_value =
|
26
|
+
reduced_value = nil
|
27
|
+
|
28
|
+
Bramble::ErrorHandling.rescuing(implementation) do
|
29
|
+
# Run the defined .reduce function
|
30
|
+
reduced_value = implementation.reduce(key, values)
|
31
|
+
# Store the result
|
32
|
+
Bramble::State.running?(handle) do
|
33
|
+
storage.reduce_result_set(result_key(handle), raw_key, Bramble::Serialize.dump(reduced_value))
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
# Mark this key as reduced, check if we're finished
|
23
38
|
Bramble::State.running?(handle) do
|
24
|
-
storage.reduce_result_set(result_key(handle), raw_key, Bramble::Serialize.dump(reduced_value))
|
25
39
|
storage.increment(reduce_finished_count_key(handle))
|
26
40
|
if Bramble::State.percent_reduced(handle) >= 1
|
27
41
|
storage.set(finished_at_key(handle), Time.now.to_i)
|
data/lib/bramble/reduce_job.rb
CHANGED
data/lib/bramble/result.rb
CHANGED
@@ -4,6 +4,9 @@ module Bramble
|
|
4
4
|
|
5
5
|
attr_reader :handle, :percent_mapped, :percent_reduced, :finished_at
|
6
6
|
|
7
|
+
# Read the state for `handle` and store it in this object
|
8
|
+
# The state for `handle` may change during this time, but you won't
|
9
|
+
# see the changes until you get a new result.
|
7
10
|
def initialize(handle)
|
8
11
|
job_id = storage.get(Bramble::Keys.job_id_key(handle))
|
9
12
|
@handle = "#{handle}:#{job_id}"
|
@@ -17,6 +20,7 @@ module Bramble
|
|
17
20
|
end
|
18
21
|
end
|
19
22
|
|
23
|
+
# @return [Hash<Any, Any>] The `key => value` results of `.reduce`
|
20
24
|
def data
|
21
25
|
@data ||= begin
|
22
26
|
key = Bramble::Keys.result_key(handle)
|
@@ -25,15 +29,20 @@ module Bramble
|
|
25
29
|
end
|
26
30
|
end
|
27
31
|
|
32
|
+
# @return [Boolean] True if all data has been mapped and reduced
|
28
33
|
def finished?
|
29
34
|
# Possible to be greater than 1 because of floating-point arithmetic
|
30
35
|
percent_finished >= 1
|
31
36
|
end
|
32
37
|
|
38
|
+
# @return [Boolean] True if the job has been started but it isn't finished yet
|
33
39
|
def running?
|
34
40
|
started? && !finished?
|
35
41
|
end
|
36
42
|
|
43
|
+
# How far along is this job?
|
44
|
+
# `.map` is considered 50%, `.reduce` is considered 50%
|
45
|
+
# @return [Float] Percent progress for this job
|
37
46
|
def percent_finished
|
38
47
|
(percent_mapped + percent_reduced) / 2
|
39
48
|
end
|
data/lib/bramble/state.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
module Bramble
|
2
|
+
# Helpers for detecting and managing the state of a given `handle` while the job is (or isn't) running
|
2
3
|
module State
|
3
4
|
extend Bramble::Keys
|
4
5
|
SEPARATOR = ":"
|
@@ -14,6 +15,7 @@ module Bramble
|
|
14
15
|
is_running
|
15
16
|
end
|
16
17
|
|
18
|
+
# Mark `handle` as started
|
17
19
|
def start_job(handle)
|
18
20
|
handle_name, job_id = handle.split(SEPARATOR)
|
19
21
|
previous_job_id = storage.get(job_id_key(handle_name))
|
@@ -24,6 +26,7 @@ module Bramble
|
|
24
26
|
storage.set(job_id_key(handle_name), job_id)
|
25
27
|
end
|
26
28
|
|
29
|
+
# Clear the state of `handle`
|
27
30
|
def clear_job(handle)
|
28
31
|
handle_name, job_id = handle.split(SEPARATOR)
|
29
32
|
storage.delete(job_id_key(handle_name))
|
@@ -32,6 +35,7 @@ module Bramble
|
|
32
35
|
clear_map(handle)
|
33
36
|
end
|
34
37
|
|
38
|
+
# How many values of `handle` have been sent to `.map`?
|
35
39
|
def percent_mapped(handle)
|
36
40
|
percent_between_keys(
|
37
41
|
map_total_count_key(handle),
|
@@ -39,6 +43,7 @@ module Bramble
|
|
39
43
|
)
|
40
44
|
end
|
41
45
|
|
46
|
+
# How many values of `handle` have been sent to `.reduce?`
|
42
47
|
def percent_reduced(handle)
|
43
48
|
percent_between_keys(
|
44
49
|
reduce_total_count_key(handle),
|
@@ -46,6 +51,7 @@ module Bramble
|
|
46
51
|
)
|
47
52
|
end
|
48
53
|
|
54
|
+
# Clear all traces of the `.map` operation for `handle`
|
49
55
|
def clear_map(handle)
|
50
56
|
map_group_keys = storage.map_keys_get(keys_key(handle))
|
51
57
|
map_group_keys.each do |group_key|
|
@@ -56,6 +62,7 @@ module Bramble
|
|
56
62
|
storage.delete(map_finished_count_key(handle))
|
57
63
|
end
|
58
64
|
|
65
|
+
# Clear all traces of the `.reduce` operation for `handle`
|
59
66
|
def clear_reduce(handle)
|
60
67
|
storage.delete(reduce_total_count_key(handle))
|
61
68
|
storage.delete(reduce_finished_count_key(handle))
|
data/lib/bramble/storage.rb
CHANGED
@@ -3,6 +3,8 @@ require "bramble/storage/redis_storage"
|
|
3
3
|
require "bramble/storage/memory_storage"
|
4
4
|
|
5
5
|
module Bramble
|
6
|
+
# A single access point to the storage selected by `Bramble.config.storage`.
|
7
|
+
# All methods are delegated to that storage adapter
|
6
8
|
module Storage
|
7
9
|
extend SingleForwardable
|
8
10
|
|
data/lib/bramble/version.rb
CHANGED
metadata
CHANGED
@@ -1,29 +1,29 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bramble
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Robert Mosolgo
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-05
|
11
|
+
date: 2016-07-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activejob
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
|
-
- - "
|
17
|
+
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: '
|
19
|
+
version: '4.2'
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
|
-
- - "
|
24
|
+
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: '
|
26
|
+
version: '4.2'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: minitest
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
@@ -106,6 +106,7 @@ files:
|
|
106
106
|
- lib/bramble/base_job.rb
|
107
107
|
- lib/bramble/begin_job.rb
|
108
108
|
- lib/bramble/conf.rb
|
109
|
+
- lib/bramble/error_handling.rb
|
109
110
|
- lib/bramble/keys.rb
|
110
111
|
- lib/bramble/map.rb
|
111
112
|
- lib/bramble/map_job.rb
|