bramble 0.4.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 138b78b6cd5ccf7de287e447453cb010de725eb7
4
- data.tar.gz: 554476911d84da387cc7017b4bb0041810507fe7
3
+ metadata.gz: 61f945ee023bed4fa6464c90b8568060a1768824
4
+ data.tar.gz: 3d2b138f12b2fe5421914732cbbaf04b94a8b40a
5
5
  SHA512:
6
- metadata.gz: c78a8f5a1eaf795df61ef62439ee77d2de522edea38f9c8ee77538662b91410615b362ea25e7b387f333cc2461d8b32a631130c1c277b1852befdd3062bcef9d
7
- data.tar.gz: f112df8555c290fbf037ec7c401014f4d5c6ec1794a0351317baf568632c211b0ca794c75f7ac6505ae40f2213b40c2603df59394de5b223796ab8bb3e429f27
6
+ metadata.gz: 151e9f40aec216edbadbec3ea225633957c4f294a145ced9fd2c90ac70ba02f52d2014eb09f83011b29eba0224ad0085a2a059a3b297d460ea102325f4068cca
7
+ data.tar.gz: a13af709e45714aba91eee42ab8a6600f02c4c76574f39b7df9e5591fcf6c6f2234883a68eff619bd22a6e60bd9b4294aebc10cf9366930c750b53e151b81b9c
data/README.md CHANGED
@@ -44,6 +44,16 @@ We have some staff-only views that expose stats about how people use our app. Ev
44
44
  # observations => [1, 1, 1, 1, 1]
45
45
  observations.length
46
46
  end
47
+
48
+ # If a .map or .reduce hits an error,
49
+ # it will be rescued and passed here.
50
+ # To cause the job to fail, raise it again.
51
+ # Otherwise, let it pass
52
+ def self.on_error(err)
53
+ Bugsnag.notify(err)
54
+ # Or, to trigger a faiure:
55
+ # raise(err)
56
+ end
47
57
  end
48
58
  ```
49
59
 
@@ -83,7 +93,8 @@ We have some staff-only views that expose stats about how people use our app. Ev
83
93
 
84
94
  ## Todo
85
95
 
86
- - Adapters: Memcached, ActiveRecord
96
+ - Write some more adapters: Memcached, ActiveRecord
97
+ - Warn if no storage is configured
87
98
  - Do we have atomicity issues? Occasional test failures
88
99
  - Consolidate storage in Redis to a single key? (Could some keys be evicted while others remain?)
89
100
 
data/lib/bramble.rb CHANGED
@@ -2,6 +2,7 @@ require "ostruct"
2
2
  require "active_job"
3
3
  require "bramble/base_job"
4
4
  require "bramble/begin_job"
5
+ require "bramble/error_handling"
5
6
  require "bramble/keys"
6
7
  require "bramble/map"
7
8
  require "bramble/map_job"
@@ -1,4 +1,6 @@
1
1
  module Bramble
2
+ # This is the parent class for all Bramble jobs.
3
+ # It sets the queue based on your config.
2
4
  class BaseJob < ActiveJob::Base
3
5
  queue_as { Bramble.config.queue_as }
4
6
  end
@@ -4,6 +4,7 @@ module Bramble
4
4
  #
5
5
  # Then it starts the map-reduce job.
6
6
  class BeginJob < Bramble::BaseJob
7
+ # Call the defined `.items(options)` function
7
8
  def perform(handle, implementation_name, items_options)
8
9
  implementation = implementation_name.constantize
9
10
  all_items = implementation.items(items_options)
@@ -0,0 +1,11 @@
1
+ module Bramble
2
+ module ErrorHandling
3
+ # If an error is raised during the block,
4
+ # pass it to the implementation's `on_error` function.
5
+ def self.rescuing(implementation)
6
+ yield
7
+ rescue StandardError => err
8
+ implementation.on_error(err)
9
+ end
10
+ end
11
+ end
data/lib/bramble/keys.rb CHANGED
@@ -1,4 +1,5 @@
1
1
  module Bramble
2
+ # A collection of helpers for getting string keys in the storage adapter.
2
3
  module Keys
3
4
  module_function
4
5
 
data/lib/bramble/map.rb CHANGED
@@ -4,6 +4,7 @@ module Bramble
4
4
 
5
5
  module_function
6
6
 
7
+ # For each of `values`, queue up a job to call the map function
7
8
  def perform(handle, implementation, values)
8
9
  Bramble::State.running?(handle) do
9
10
  storage.set(map_total_count_key(handle), values.length)
@@ -13,18 +14,25 @@ module Bramble
13
14
  end
14
15
  end
15
16
 
17
+ # Call `.map` on `value`, storing the result for `.reduce` and handling any error.
16
18
  def perform_map(handle, implementation, value)
17
19
  Bramble::State.running?(handle) do
18
20
  impl_keys_key = keys_key(handle)
19
- implementation.map(value) do |map_key, map_val|
20
- Bramble::State.running?(handle) do
21
- raw_key = Bramble::Serialize.dump(map_key)
22
- raw_value = Bramble::Serialize.dump(map_val)
23
- storage.map_keys_push(impl_keys_key, raw_key)
24
- storage.map_result_push(data_key(handle, raw_key), raw_value)
21
+
22
+ Bramble::ErrorHandling.rescuing(implementation) do
23
+ # Execute the provided map function
24
+ implementation.map(value) do |map_key, map_val|
25
+ Bramble::State.running?(handle) do
26
+ raw_key = Bramble::Serialize.dump(map_key)
27
+ raw_value = Bramble::Serialize.dump(map_val)
28
+ # Push the result to be reduced
29
+ storage.map_keys_push(impl_keys_key, raw_key)
30
+ storage.map_result_push(data_key(handle, raw_key), raw_value)
31
+ end
25
32
  end
26
33
  end
27
34
 
35
+ # Mark this item as mapped (even if there was an error)
28
36
  Bramble::State.running?(handle) do
29
37
  finished = storage.increment(map_finished_count_key(handle))
30
38
  total = storage.get(map_total_count_key(handle)).to_i
@@ -1,4 +1,5 @@
1
1
  module Bramble
2
+ # Calls the `.map(value)` function
2
3
  class MapJob < Bramble::BaseJob
3
4
  def perform(handle, mapper_name, raw_value)
4
5
  mapper = mapper_name.constantize
@@ -4,24 +4,38 @@ module Bramble
4
4
 
5
5
  module_function
6
6
 
7
+ # Queue up a job to call `.reduce` on keys for `handle`
7
8
  def perform(handle, implementation)
8
9
  Bramble::State.running?(handle) do
10
+ # Set how many reduce call we expect
9
11
  all_raw_keys = storage.map_keys_get(keys_key(handle))
10
12
  storage.set(reduce_total_count_key(handle), all_raw_keys.length)
13
+ # Enqueue a job for each reduce call
11
14
  all_raw_keys.each do |raw_key|
12
15
  Bramble::ReduceJob.perform_later(handle, implementation.name, raw_key)
13
16
  end
14
17
  end
15
18
  end
16
19
 
20
+ # Perform `.reduce` on `raw_key`, handling errors and saving the result
17
21
  def perform_reduce(handle, implementation, raw_key)
18
22
  if Bramble::State.running?(handle)
19
23
  raw_values = storage.map_result_get(data_key(handle, raw_key))
20
24
  values = Bramble::Serialize.load(raw_values)
21
25
  key = Bramble::Serialize.load(raw_key)
22
- reduced_value = implementation.reduce(key, values)
26
+ reduced_value = nil
27
+
28
+ Bramble::ErrorHandling.rescuing(implementation) do
29
+ # Run the defined .reduce function
30
+ reduced_value = implementation.reduce(key, values)
31
+ # Store the result
32
+ Bramble::State.running?(handle) do
33
+ storage.reduce_result_set(result_key(handle), raw_key, Bramble::Serialize.dump(reduced_value))
34
+ end
35
+ end
36
+
37
+ # Mark this key as reduced, check if we're finished
23
38
  Bramble::State.running?(handle) do
24
- storage.reduce_result_set(result_key(handle), raw_key, Bramble::Serialize.dump(reduced_value))
25
39
  storage.increment(reduce_finished_count_key(handle))
26
40
  if Bramble::State.percent_reduced(handle) >= 1
27
41
  storage.set(finished_at_key(handle), Time.now.to_i)
@@ -1,4 +1,5 @@
1
1
  module Bramble
2
+ # Call `.reduce` for `key`
2
3
  class ReduceJob < Bramble::BaseJob
3
4
  def perform(handle, implementation_name, key)
4
5
  implementation = implementation_name.constantize
@@ -4,6 +4,9 @@ module Bramble
4
4
 
5
5
  attr_reader :handle, :percent_mapped, :percent_reduced, :finished_at
6
6
 
7
+ # Read the state for `handle` and store it in this object
8
+ # The state for `handle` may change during this time, but you won't
9
+ # see the changes until you get a new result.
7
10
  def initialize(handle)
8
11
  job_id = storage.get(Bramble::Keys.job_id_key(handle))
9
12
  @handle = "#{handle}:#{job_id}"
@@ -17,6 +20,7 @@ module Bramble
17
20
  end
18
21
  end
19
22
 
23
+ # @return [Hash<Any, Any>] The `key => value` results of `.reduce`
20
24
  def data
21
25
  @data ||= begin
22
26
  key = Bramble::Keys.result_key(handle)
@@ -25,15 +29,20 @@ module Bramble
25
29
  end
26
30
  end
27
31
 
32
+ # @return [Boolean] True if all data has been mapped and reduced
28
33
  def finished?
29
34
  # Possible to be greater than 1 because of floating-point arithmetic
30
35
  percent_finished >= 1
31
36
  end
32
37
 
38
+ # @return [Boolean] True if the job has been started but it isn't finished yet
33
39
  def running?
34
40
  started? && !finished?
35
41
  end
36
42
 
43
+ # How far along is this job?
44
+ # `.map` is considered 50%, `.reduce` is considered 50%
45
+ # @return [Float] Percent progress for this job
37
46
  def percent_finished
38
47
  (percent_mapped + percent_reduced) / 2
39
48
  end
data/lib/bramble/state.rb CHANGED
@@ -1,4 +1,5 @@
1
1
  module Bramble
2
+ # Helpers for detecting and managing the state of a given `handle` while the job is (or isn't) running
2
3
  module State
3
4
  extend Bramble::Keys
4
5
  SEPARATOR = ":"
@@ -14,6 +15,7 @@ module Bramble
14
15
  is_running
15
16
  end
16
17
 
18
+ # Mark `handle` as started
17
19
  def start_job(handle)
18
20
  handle_name, job_id = handle.split(SEPARATOR)
19
21
  previous_job_id = storage.get(job_id_key(handle_name))
@@ -24,6 +26,7 @@ module Bramble
24
26
  storage.set(job_id_key(handle_name), job_id)
25
27
  end
26
28
 
29
+ # Clear the state of `handle`
27
30
  def clear_job(handle)
28
31
  handle_name, job_id = handle.split(SEPARATOR)
29
32
  storage.delete(job_id_key(handle_name))
@@ -32,6 +35,7 @@ module Bramble
32
35
  clear_map(handle)
33
36
  end
34
37
 
38
+ # How many values of `handle` have been sent to `.map`?
35
39
  def percent_mapped(handle)
36
40
  percent_between_keys(
37
41
  map_total_count_key(handle),
@@ -39,6 +43,7 @@ module Bramble
39
43
  )
40
44
  end
41
45
 
46
+ # How many values of `handle` have been sent to `.reduce?`
42
47
  def percent_reduced(handle)
43
48
  percent_between_keys(
44
49
  reduce_total_count_key(handle),
@@ -46,6 +51,7 @@ module Bramble
46
51
  )
47
52
  end
48
53
 
54
+ # Clear all traces of the `.map` operation for `handle`
49
55
  def clear_map(handle)
50
56
  map_group_keys = storage.map_keys_get(keys_key(handle))
51
57
  map_group_keys.each do |group_key|
@@ -56,6 +62,7 @@ module Bramble
56
62
  storage.delete(map_finished_count_key(handle))
57
63
  end
58
64
 
65
+ # Clear all traces of the `.reduce` operation for `handle`
59
66
  def clear_reduce(handle)
60
67
  storage.delete(reduce_total_count_key(handle))
61
68
  storage.delete(reduce_finished_count_key(handle))
@@ -3,6 +3,8 @@ require "bramble/storage/redis_storage"
3
3
  require "bramble/storage/memory_storage"
4
4
 
5
5
  module Bramble
6
+ # A single access point to the storage selected by `Bramble.config.storage`.
7
+ # All methods are delegated to that storage adapter
6
8
  module Storage
7
9
  extend SingleForwardable
8
10
 
@@ -1,3 +1,3 @@
1
1
  module Bramble
2
- VERSION = "0.4.0"
2
+ VERSION = "0.5.0"
3
3
  end
metadata CHANGED
@@ -1,29 +1,29 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bramble
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.0
4
+ version: 0.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Robert Mosolgo
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-05-16 00:00:00.000000000 Z
11
+ date: 2016-07-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activejob
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - ">="
17
+ - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: '0'
19
+ version: '4.2'
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - ">="
24
+ - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: '0'
26
+ version: '4.2'
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: minitest
29
29
  requirement: !ruby/object:Gem::Requirement
@@ -106,6 +106,7 @@ files:
106
106
  - lib/bramble/base_job.rb
107
107
  - lib/bramble/begin_job.rb
108
108
  - lib/bramble/conf.rb
109
+ - lib/bramble/error_handling.rb
109
110
  - lib/bramble/keys.rb
110
111
  - lib/bramble/map.rb
111
112
  - lib/bramble/map_job.rb