bramble 0.4.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +12 -1
- data/lib/bramble.rb +1 -0
- data/lib/bramble/base_job.rb +2 -0
- data/lib/bramble/begin_job.rb +1 -0
- data/lib/bramble/error_handling.rb +11 -0
- data/lib/bramble/keys.rb +1 -0
- data/lib/bramble/map.rb +14 -6
- data/lib/bramble/map_job.rb +1 -0
- data/lib/bramble/reduce.rb +16 -2
- data/lib/bramble/reduce_job.rb +1 -0
- data/lib/bramble/result.rb +9 -0
- data/lib/bramble/state.rb +7 -0
- data/lib/bramble/storage.rb +2 -0
- data/lib/bramble/version.rb +1 -1
- metadata +7 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 61f945ee023bed4fa6464c90b8568060a1768824
|
4
|
+
data.tar.gz: 3d2b138f12b2fe5421914732cbbaf04b94a8b40a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 151e9f40aec216edbadbec3ea225633957c4f294a145ced9fd2c90ac70ba02f52d2014eb09f83011b29eba0224ad0085a2a059a3b297d460ea102325f4068cca
|
7
|
+
data.tar.gz: a13af709e45714aba91eee42ab8a6600f02c4c76574f39b7df9e5591fcf6c6f2234883a68eff619bd22a6e60bd9b4294aebc10cf9366930c750b53e151b81b9c
|
data/README.md
CHANGED
@@ -44,6 +44,16 @@ We have some staff-only views that expose stats about how people use our app. Ev
|
|
44
44
|
# observations => [1, 1, 1, 1, 1]
|
45
45
|
observations.length
|
46
46
|
end
|
47
|
+
|
48
|
+
# If a .map or .reduce hits an error,
|
49
|
+
# it will be rescued and passed here.
|
50
|
+
# To cause the job to fail, raise it again.
|
51
|
+
# Otherwise, let it pass
|
52
|
+
def self.on_error(err)
|
53
|
+
Bugsnag.notify(err)
|
54
|
+
# Or, to trigger a faiure:
|
55
|
+
# raise(err)
|
56
|
+
end
|
47
57
|
end
|
48
58
|
```
|
49
59
|
|
@@ -83,7 +93,8 @@ We have some staff-only views that expose stats about how people use our app. Ev
|
|
83
93
|
|
84
94
|
## Todo
|
85
95
|
|
86
|
-
-
|
96
|
+
- Write some more adapters: Memcached, ActiveRecord
|
97
|
+
- Warn if no storage is configured
|
87
98
|
- Do we have atomicity issues? Occasional test failures
|
88
99
|
- Consolidate storage in Redis to a single key? (Could some keys be evicted while others remain?)
|
89
100
|
|
data/lib/bramble.rb
CHANGED
data/lib/bramble/base_job.rb
CHANGED
data/lib/bramble/begin_job.rb
CHANGED
@@ -4,6 +4,7 @@ module Bramble
|
|
4
4
|
#
|
5
5
|
# Then it starts the map-reduce job.
|
6
6
|
class BeginJob < Bramble::BaseJob
|
7
|
+
# Call the defined `.items(options)` function
|
7
8
|
def perform(handle, implementation_name, items_options)
|
8
9
|
implementation = implementation_name.constantize
|
9
10
|
all_items = implementation.items(items_options)
|
@@ -0,0 +1,11 @@
|
|
1
|
+
module Bramble
|
2
|
+
module ErrorHandling
|
3
|
+
# If an error is raised during the block,
|
4
|
+
# pass it to the implementation's `on_error` function.
|
5
|
+
def self.rescuing(implementation)
|
6
|
+
yield
|
7
|
+
rescue StandardError => err
|
8
|
+
implementation.on_error(err)
|
9
|
+
end
|
10
|
+
end
|
11
|
+
end
|
data/lib/bramble/keys.rb
CHANGED
data/lib/bramble/map.rb
CHANGED
@@ -4,6 +4,7 @@ module Bramble
|
|
4
4
|
|
5
5
|
module_function
|
6
6
|
|
7
|
+
# For each of `values`, queue up a job to call the map function
|
7
8
|
def perform(handle, implementation, values)
|
8
9
|
Bramble::State.running?(handle) do
|
9
10
|
storage.set(map_total_count_key(handle), values.length)
|
@@ -13,18 +14,25 @@ module Bramble
|
|
13
14
|
end
|
14
15
|
end
|
15
16
|
|
17
|
+
# Call `.map` on `value`, storing the result for `.reduce` and handling any error.
|
16
18
|
def perform_map(handle, implementation, value)
|
17
19
|
Bramble::State.running?(handle) do
|
18
20
|
impl_keys_key = keys_key(handle)
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
21
|
+
|
22
|
+
Bramble::ErrorHandling.rescuing(implementation) do
|
23
|
+
# Execute the provided map function
|
24
|
+
implementation.map(value) do |map_key, map_val|
|
25
|
+
Bramble::State.running?(handle) do
|
26
|
+
raw_key = Bramble::Serialize.dump(map_key)
|
27
|
+
raw_value = Bramble::Serialize.dump(map_val)
|
28
|
+
# Push the result to be reduced
|
29
|
+
storage.map_keys_push(impl_keys_key, raw_key)
|
30
|
+
storage.map_result_push(data_key(handle, raw_key), raw_value)
|
31
|
+
end
|
25
32
|
end
|
26
33
|
end
|
27
34
|
|
35
|
+
# Mark this item as mapped (even if there was an error)
|
28
36
|
Bramble::State.running?(handle) do
|
29
37
|
finished = storage.increment(map_finished_count_key(handle))
|
30
38
|
total = storage.get(map_total_count_key(handle)).to_i
|
data/lib/bramble/map_job.rb
CHANGED
data/lib/bramble/reduce.rb
CHANGED
@@ -4,24 +4,38 @@ module Bramble
|
|
4
4
|
|
5
5
|
module_function
|
6
6
|
|
7
|
+
# Queue up a job to call `.reduce` on keys for `handle`
|
7
8
|
def perform(handle, implementation)
|
8
9
|
Bramble::State.running?(handle) do
|
10
|
+
# Set how many reduce call we expect
|
9
11
|
all_raw_keys = storage.map_keys_get(keys_key(handle))
|
10
12
|
storage.set(reduce_total_count_key(handle), all_raw_keys.length)
|
13
|
+
# Enqueue a job for each reduce call
|
11
14
|
all_raw_keys.each do |raw_key|
|
12
15
|
Bramble::ReduceJob.perform_later(handle, implementation.name, raw_key)
|
13
16
|
end
|
14
17
|
end
|
15
18
|
end
|
16
19
|
|
20
|
+
# Perform `.reduce` on `raw_key`, handling errors and saving the result
|
17
21
|
def perform_reduce(handle, implementation, raw_key)
|
18
22
|
if Bramble::State.running?(handle)
|
19
23
|
raw_values = storage.map_result_get(data_key(handle, raw_key))
|
20
24
|
values = Bramble::Serialize.load(raw_values)
|
21
25
|
key = Bramble::Serialize.load(raw_key)
|
22
|
-
reduced_value =
|
26
|
+
reduced_value = nil
|
27
|
+
|
28
|
+
Bramble::ErrorHandling.rescuing(implementation) do
|
29
|
+
# Run the defined .reduce function
|
30
|
+
reduced_value = implementation.reduce(key, values)
|
31
|
+
# Store the result
|
32
|
+
Bramble::State.running?(handle) do
|
33
|
+
storage.reduce_result_set(result_key(handle), raw_key, Bramble::Serialize.dump(reduced_value))
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
# Mark this key as reduced, check if we're finished
|
23
38
|
Bramble::State.running?(handle) do
|
24
|
-
storage.reduce_result_set(result_key(handle), raw_key, Bramble::Serialize.dump(reduced_value))
|
25
39
|
storage.increment(reduce_finished_count_key(handle))
|
26
40
|
if Bramble::State.percent_reduced(handle) >= 1
|
27
41
|
storage.set(finished_at_key(handle), Time.now.to_i)
|
data/lib/bramble/reduce_job.rb
CHANGED
data/lib/bramble/result.rb
CHANGED
@@ -4,6 +4,9 @@ module Bramble
|
|
4
4
|
|
5
5
|
attr_reader :handle, :percent_mapped, :percent_reduced, :finished_at
|
6
6
|
|
7
|
+
# Read the state for `handle` and store it in this object
|
8
|
+
# The state for `handle` may change during this time, but you won't
|
9
|
+
# see the changes until you get a new result.
|
7
10
|
def initialize(handle)
|
8
11
|
job_id = storage.get(Bramble::Keys.job_id_key(handle))
|
9
12
|
@handle = "#{handle}:#{job_id}"
|
@@ -17,6 +20,7 @@ module Bramble
|
|
17
20
|
end
|
18
21
|
end
|
19
22
|
|
23
|
+
# @return [Hash<Any, Any>] The `key => value` results of `.reduce`
|
20
24
|
def data
|
21
25
|
@data ||= begin
|
22
26
|
key = Bramble::Keys.result_key(handle)
|
@@ -25,15 +29,20 @@ module Bramble
|
|
25
29
|
end
|
26
30
|
end
|
27
31
|
|
32
|
+
# @return [Boolean] True if all data has been mapped and reduced
|
28
33
|
def finished?
|
29
34
|
# Possible to be greater than 1 because of floating-point arithmetic
|
30
35
|
percent_finished >= 1
|
31
36
|
end
|
32
37
|
|
38
|
+
# @return [Boolean] True if the job has been started but it isn't finished yet
|
33
39
|
def running?
|
34
40
|
started? && !finished?
|
35
41
|
end
|
36
42
|
|
43
|
+
# How far along is this job?
|
44
|
+
# `.map` is considered 50%, `.reduce` is considered 50%
|
45
|
+
# @return [Float] Percent progress for this job
|
37
46
|
def percent_finished
|
38
47
|
(percent_mapped + percent_reduced) / 2
|
39
48
|
end
|
data/lib/bramble/state.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
module Bramble
|
2
|
+
# Helpers for detecting and managing the state of a given `handle` while the job is (or isn't) running
|
2
3
|
module State
|
3
4
|
extend Bramble::Keys
|
4
5
|
SEPARATOR = ":"
|
@@ -14,6 +15,7 @@ module Bramble
|
|
14
15
|
is_running
|
15
16
|
end
|
16
17
|
|
18
|
+
# Mark `handle` as started
|
17
19
|
def start_job(handle)
|
18
20
|
handle_name, job_id = handle.split(SEPARATOR)
|
19
21
|
previous_job_id = storage.get(job_id_key(handle_name))
|
@@ -24,6 +26,7 @@ module Bramble
|
|
24
26
|
storage.set(job_id_key(handle_name), job_id)
|
25
27
|
end
|
26
28
|
|
29
|
+
# Clear the state of `handle`
|
27
30
|
def clear_job(handle)
|
28
31
|
handle_name, job_id = handle.split(SEPARATOR)
|
29
32
|
storage.delete(job_id_key(handle_name))
|
@@ -32,6 +35,7 @@ module Bramble
|
|
32
35
|
clear_map(handle)
|
33
36
|
end
|
34
37
|
|
38
|
+
# How many values of `handle` have been sent to `.map`?
|
35
39
|
def percent_mapped(handle)
|
36
40
|
percent_between_keys(
|
37
41
|
map_total_count_key(handle),
|
@@ -39,6 +43,7 @@ module Bramble
|
|
39
43
|
)
|
40
44
|
end
|
41
45
|
|
46
|
+
# How many values of `handle` have been sent to `.reduce?`
|
42
47
|
def percent_reduced(handle)
|
43
48
|
percent_between_keys(
|
44
49
|
reduce_total_count_key(handle),
|
@@ -46,6 +51,7 @@ module Bramble
|
|
46
51
|
)
|
47
52
|
end
|
48
53
|
|
54
|
+
# Clear all traces of the `.map` operation for `handle`
|
49
55
|
def clear_map(handle)
|
50
56
|
map_group_keys = storage.map_keys_get(keys_key(handle))
|
51
57
|
map_group_keys.each do |group_key|
|
@@ -56,6 +62,7 @@ module Bramble
|
|
56
62
|
storage.delete(map_finished_count_key(handle))
|
57
63
|
end
|
58
64
|
|
65
|
+
# Clear all traces of the `.reduce` operation for `handle`
|
59
66
|
def clear_reduce(handle)
|
60
67
|
storage.delete(reduce_total_count_key(handle))
|
61
68
|
storage.delete(reduce_finished_count_key(handle))
|
data/lib/bramble/storage.rb
CHANGED
@@ -3,6 +3,8 @@ require "bramble/storage/redis_storage"
|
|
3
3
|
require "bramble/storage/memory_storage"
|
4
4
|
|
5
5
|
module Bramble
|
6
|
+
# A single access point to the storage selected by `Bramble.config.storage`.
|
7
|
+
# All methods are delegated to that storage adapter
|
6
8
|
module Storage
|
7
9
|
extend SingleForwardable
|
8
10
|
|
data/lib/bramble/version.rb
CHANGED
metadata
CHANGED
@@ -1,29 +1,29 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bramble
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Robert Mosolgo
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-05
|
11
|
+
date: 2016-07-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activejob
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
|
-
- - "
|
17
|
+
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: '
|
19
|
+
version: '4.2'
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
|
-
- - "
|
24
|
+
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: '
|
26
|
+
version: '4.2'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: minitest
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
@@ -106,6 +106,7 @@ files:
|
|
106
106
|
- lib/bramble/base_job.rb
|
107
107
|
- lib/bramble/begin_job.rb
|
108
108
|
- lib/bramble/conf.rb
|
109
|
+
- lib/bramble/error_handling.rb
|
109
110
|
- lib/bramble/keys.rb
|
110
111
|
- lib/bramble/map.rb
|
111
112
|
- lib/bramble/map_job.rb
|