bramble 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 044266e293f683cdeabe0d4f9d7789d49324195c
4
- data.tar.gz: 6b516203cd8fc97eb20810245a05d08a96c3e898
3
+ metadata.gz: c78abeda625df09bdbbfbaa059768757a54380f9
4
+ data.tar.gz: b641d188443171568abe011b22fc5b3ccf7083e6
5
5
  SHA512:
6
- metadata.gz: 77f06d4145ce1ad5622cca85c687531ca3b76bcb9c94831d0015e15118cf34a110828de0cd0c2e706f403503f44b16f3ffb858713380090ad2bd99a08025b27c
7
- data.tar.gz: d7b4d8f7c716d1a7b70a1fd4b57ac6be24c166494f2be03e0d86b1b25336bd542c56b5ad0adcc03bf359bacd6197764322f206e5575842213294d8f3d1ffc564
6
+ metadata.gz: 3d5e01178d77d2cd06c99634318835143dbdcb3ba203ecc6477cb7bb60d32019521d531660bc1a8f6e7eab258d0d55ec617113ee61e112c3e31a74a9094b655a
7
+ data.tar.gz: a54391b20995074b7b16382d75823d247f9fb267f27bae2804db4ed5e97c3fc2fbd331cbf666e8a5d42817aafabe8a26c3f194148bf29cc6010876cd2763c4d4
data/README.md CHANGED
@@ -15,10 +15,15 @@ Map-reduce with ActiveJob
15
15
  end
16
16
  ```
17
17
 
18
- - Define a module with `map` and `reduce` functions:
18
+ - Define a module with `map`, `reduce` and `items(options = {})` functions:
19
19
 
20
20
  ```ruby
21
21
  module LetterCount
22
+ # Generate a list of items based on some input
23
+ def self.items(filepath)
24
+ File.read(filepath).split(" ")
25
+ end
26
+
22
27
  # .map is called with each item in the input
23
28
  def self.map(word)
24
29
  letters = word.upcase.each_char
@@ -38,24 +43,26 @@ Map-reduce with ActiveJob
38
43
  end
39
44
  ```
40
45
 
41
- - Start a job with a handle, module, and some data:
46
+ - Start a job with a handle, module, and an (optional) argument for finding data:
42
47
 
43
48
  ```ruby
44
49
  # used for fetching the result later:
45
50
  handle = "shakespeare-letter-count"
46
51
 
47
- # Something that responds to #each:
48
- data = hamlet.split(" ")
52
+ # will be sent to `.items(filepath)`
53
+ hamlet_path = "./shakespeare/hamlet.txt"
49
54
 
50
55
  # Begin the process:
51
- Bramble.map_reduce(handle, LetterCount, words_in_hamlet)
56
+ Bramble.map_reduce(handle, LetterCount, hamlet_path)
52
57
  ```
53
58
 
54
59
  - Later, fetch the result using the handle:
55
60
 
56
61
  ```ruby
57
- result = Bramble.read("shakespeare-letter-count")
58
- # { "A" => 100, "B" => 100, ... }
62
+ result = Bramble.get("shakespeare-letter-count")
63
+ result.running? # => false
64
+ result.finished? # => true
65
+ result.data # => { "A" => 100, "B" => 100, ... }
59
66
  ```
60
67
 
61
68
  - Delete the saved result:
@@ -66,8 +73,6 @@ Map-reduce with ActiveJob
66
73
 
67
74
  ## Todo
68
75
 
69
- - Use `Storage` as gateway to `config.storage`
70
- - Job convenience class?
71
76
  - `.fetch` to find-or-calculate?
72
77
  - Adapters: Memcache, ActiveRecord
73
78
 
@@ -0,0 +1,5 @@
1
+ module Bramble
2
+ class BaseJob < ActiveJob::Base
3
+ queue_as { Bramble.config.queue_as }
4
+ end
5
+ end
@@ -0,0 +1,13 @@
1
+ module Bramble
2
+ # This exists to call `implementation.items` in the background.
3
+ # It might take a long time to fetch, so let's background it.
4
+ #
5
+ # Then it starts the map-reduce job.
6
+ class BeginJob < Bramble::BaseJob
7
+ def perform(handle, implementation_name, items_options)
8
+ implementation = implementation_name.constantize
9
+ all_items = implementation.items(items_options)
10
+ Bramble::Map.perform(handle, implementation, all_items)
11
+ end
12
+ end
13
+ end
data/lib/bramble/keys.rb CHANGED
@@ -1,5 +1,7 @@
1
1
  module Bramble
2
2
  module Keys
3
+ module_function
4
+
3
5
  def namespace(handle)
4
6
  "#{Bramble.config.namespace}:#{handle}"
5
7
  end
@@ -12,8 +14,12 @@ module Bramble
12
14
  "#{namespace(handle)}:keys"
13
15
  end
14
16
 
15
- def finished_count_key(handle)
16
- "#{namespace(handle)}:finished_count"
17
+ def map_finished_count_key(handle)
18
+ "#{namespace(handle)}:map_finished_count"
19
+ end
20
+
21
+ def reduce_finished_count_key(handle)
22
+ "#{namespace(handle)}:reduce_finished_count"
17
23
  end
18
24
 
19
25
  def total_count_key(handle)
@@ -23,5 +29,13 @@ module Bramble
23
29
  def result_key(handle)
24
30
  "#{namespace(handle)}:result"
25
31
  end
32
+
33
+ def job_id_key(handle)
34
+ "#{namespace(handle)}:job_id"
35
+ end
36
+
37
+ def status_key(handle)
38
+ "#{namespace(handle)}:status"
39
+ end
26
40
  end
27
41
  end
data/lib/bramble/map.rb CHANGED
@@ -5,25 +5,32 @@ module Bramble
5
5
  module_function
6
6
 
7
7
  def perform(handle, implementation, values)
8
- # TODO: make sure there isn't one going on right now
9
- Bramble::Storage.delete(handle)
10
- storage.set(total_count_key(handle), values.length)
11
- values.each do |value|
12
- Bramble::MapJob.perform_later(handle, implementation.name, value)
8
+ Bramble::State.running?(handle) do
9
+ storage.set(total_count_key(handle), values.length)
10
+ values.each do |value|
11
+ Bramble::MapJob.perform_later(handle, implementation.name, Bramble::Serialize.dump(value))
12
+ end
13
13
  end
14
14
  end
15
15
 
16
16
  def perform_map(handle, implementation, value)
17
- impl_keys_key = keys_key(handle)
18
- implementation.map(value) do |map_key, map_val|
19
- raw_key = Bramble::Storage.dump(map_key)
20
- storage.map_keys_push(impl_keys_key, raw_key)
21
- storage.map_result_push(data_key(handle, raw_key), Bramble::Storage.dump(map_val))
22
- end
23
- finished = storage.increment(finished_count_key(handle))
24
- total = storage.get(total_count_key(handle)).to_i
25
- if finished == total
26
- Bramble::Reduce.perform(handle, implementation)
17
+ Bramble::State.running?(handle) do
18
+ impl_keys_key = keys_key(handle)
19
+ implementation.map(value) do |map_key, map_val|
20
+ Bramble::State.running?(handle) do
21
+ raw_key = Bramble::Serialize.dump(map_key)
22
+ storage.map_keys_push(impl_keys_key, raw_key)
23
+ storage.map_result_push(data_key(handle, raw_key), Bramble::Serialize.dump(map_val))
24
+ end
25
+ end
26
+ Bramble::State.running?(handle) do
27
+ finished = storage.increment(map_finished_count_key(handle))
28
+ total = storage.get(total_count_key(handle)).to_i
29
+ if finished == total
30
+ Bramble::Reduce.perform(handle, implementation)
31
+ Bramble::State.clear_map(handle)
32
+ end
33
+ end
27
34
  end
28
35
  end
29
36
 
@@ -32,7 +39,7 @@ module Bramble
32
39
  module_function
33
40
 
34
41
  def storage
35
- Bramble.config.storage
42
+ Bramble::Storage
36
43
  end
37
44
  end
38
45
  end
@@ -1,8 +1,8 @@
1
1
  module Bramble
2
- class MapJob < ActiveJob::Base
3
- queue_as { Bramble.config.queue_as }
4
- def perform(handle, mapper_name, value)
2
+ class MapJob < Bramble::BaseJob
3
+ def perform(handle, mapper_name, raw_value)
5
4
  mapper = mapper_name.constantize
5
+ value = Bramble::Serialize.load(raw_value)
6
6
  Bramble::Map.perform_map(handle, mapper, value)
7
7
  end
8
8
  end
@@ -12,10 +12,18 @@ module Bramble
12
12
  end
13
13
 
14
14
  def perform_reduce(handle, implementation, raw_key)
15
- values = storage.map_result_get(data_key(handle, raw_key))
16
- values = Bramble::Storage.load(values)
17
- reduced_value = implementation.reduce(Bramble::Storage.load(raw_key), values)
18
- storage.reduce_result_set(result_key(handle), raw_key, Bramble::Storage.dump(reduced_value))
15
+ if Bramble::State.running?(handle)
16
+ raw_values = storage.map_result_get(data_key(handle, raw_key))
17
+ values = Bramble::Serialize.load(raw_values)
18
+ key = Bramble::Serialize.load(raw_key)
19
+ reduced_value = implementation.reduce(key, values)
20
+ Bramble::State.running?(handle) do
21
+ storage.reduce_result_set(result_key(handle), raw_key, Bramble::Serialize.dump(reduced_value))
22
+ storage.increment(reduce_finished_count_key(handle))
23
+ end
24
+ else
25
+ Bramble::State.clear_reduce(handle)
26
+ end
19
27
  end
20
28
 
21
29
  private
@@ -23,7 +31,7 @@ module Bramble
23
31
  module_function
24
32
 
25
33
  def storage
26
- Bramble.config.storage
34
+ Bramble::Storage
27
35
  end
28
36
  end
29
37
  end
@@ -1,9 +1,8 @@
1
1
  module Bramble
2
- class ReduceJob < ActiveJob::Base
3
- queue_as { Bramble.config.queue_as }
4
- def perform(handle, reducer_name, key)
5
- reducer = reducer_name.constantize
6
- Bramble::Reduce.perform_reduce(handle, reducer, key)
2
+ class ReduceJob < Bramble::BaseJob
3
+ def perform(handle, implementation_name, key)
4
+ implementation = implementation_name.constantize
5
+ Bramble::Reduce.perform_reduce(handle, implementation, key)
7
6
  end
8
7
  end
9
8
  end
@@ -0,0 +1,46 @@
1
+ module Bramble
2
+ # This class exposes the data and some info about the state of the task
3
+ class Result
4
+
5
+ attr_reader :handle
6
+
7
+ def initialize(handle)
8
+ job_id = storage.get(Bramble::Keys.job_id_key(handle))
9
+ @handle = "#{handle}:#{job_id}"
10
+ end
11
+
12
+ def data
13
+ @data ||= begin
14
+ key = Bramble::Keys.result_key(handle)
15
+ results = storage.reduce_result_get(key)
16
+ Bramble::Serialize.load(results)
17
+ end
18
+ end
19
+
20
+ def finished?
21
+ @finished ||= finished_count > 0 && total_count == finished_count
22
+ end
23
+
24
+ def running?
25
+ @running ||= started? && !finished?
26
+ end
27
+
28
+ private
29
+
30
+ def total_count
31
+ @total_count ||= storage.get(Bramble::Keys.total_count_key(handle)).to_i
32
+ end
33
+
34
+ def finished_count
35
+ @finished_count ||= storage.get(Bramble::Keys.reduce_finished_count_key(handle)).to_i
36
+ end
37
+
38
+ def started?
39
+ @started ||= !!storage.get(Bramble::Keys.status_key(handle))
40
+ end
41
+
42
+ def storage
43
+ Bramble::Storage
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,24 @@
1
+ module Bramble
2
+ # eg, Redis uses strings only, so use this module to freeze and thaw values from storage
3
+ module Serialize
4
+ # prepare an object for storage
5
+ def self.dump(obj)
6
+ Marshal.dump(obj)
7
+ end
8
+
9
+ # reload an object from storage
10
+ def self.load(stored_obj)
11
+ case stored_obj
12
+ when Array
13
+ stored_obj.map { |obj| load(obj) }
14
+ when Hash
15
+ stored_obj.inject({}) do |memo, (k, v)|
16
+ memo[load(k)] = load(v)
17
+ memo
18
+ end
19
+ else
20
+ Marshal.load(stored_obj)
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,56 @@
1
+ module Bramble
2
+ module State
3
+ extend Bramble::Keys
4
+ SEPARATOR = ":"
5
+ module_function
6
+
7
+ # Run the block and return true if the `job_id` is still active
8
+ def running?(handle)
9
+ handle_name, job_id = handle.split(SEPARATOR)
10
+ is_running = storage.get(job_id_key(handle_name)) == job_id
11
+ if block_given?
12
+ yield
13
+ end
14
+ is_running
15
+ end
16
+
17
+ def start_job(handle)
18
+ handle_name, job_id = handle.split(SEPARATOR)
19
+ previous_job_id = storage.get(job_id_key(handle_name))
20
+ if previous_job_id
21
+ clear_job("#{handle_name}:#{previous_job_id}")
22
+ end
23
+ storage.set(status_key(handle), "started")
24
+ storage.set(job_id_key(handle_name), job_id)
25
+ end
26
+
27
+ def clear_job(handle)
28
+ handle_name, job_id = handle.split(SEPARATOR)
29
+ storage.delete(job_id_key(handle_name))
30
+ storage.delete(status_key(handle))
31
+ clear_reduce(handle)
32
+ clear_map(handle)
33
+ end
34
+
35
+ def clear_map(handle)
36
+ map_group_keys = storage.map_keys_get(keys_key(handle))
37
+ map_group_keys.each do |group_key|
38
+ storage.delete(data_key(handle, group_key))
39
+ end
40
+ storage.delete(keys_key(handle))
41
+ storage.delete(map_finished_count_key(handle))
42
+ end
43
+
44
+ def clear_reduce(handle)
45
+ storage.delete(total_count_key(handle))
46
+ storage.delete(reduce_finished_count_key(handle))
47
+ storage.delete(result_key(handle))
48
+ end
49
+
50
+ private
51
+
52
+ def self.storage
53
+ Bramble::Storage
54
+ end
55
+ end
56
+ end
@@ -9,6 +9,10 @@ module Bramble
9
9
 
10
10
  module_function
11
11
 
12
+ def transaction
13
+ yield
14
+ end
15
+
12
16
  def set(key, value)
13
17
  STORAGE[key] = value
14
18
  end
@@ -1,56 +1,20 @@
1
+ require "forwardable"
1
2
  require "bramble/storage/redis_storage"
2
3
  require "bramble/storage/memory_storage"
3
4
 
4
5
  module Bramble
5
6
  module Storage
6
- extend Bramble::Keys
7
+ extend SingleForwardable
7
8
 
8
- def self.read(handle)
9
- key = result_key(handle)
10
- results = storage.reduce_result_get(key)
11
- load(results)
12
- end
13
-
14
- # Wipe out the results for this handle
15
- def self.delete(handle)
16
- # Reset counts
17
- storage.delete(total_count_key(handle))
18
- storage.delete(finished_count_key(handle))
19
- # Reset result
20
- storage.delete(result_key(handle))
21
-
22
- # Reset dangling map data
23
- map_group_keys = storage.map_keys_get(keys_key(handle))
24
- map_group_keys.each do |group_key|
25
- storage.delete(data_key(handle, group_key))
26
- end
27
- storage.delete(keys_key(handle))
28
- end
29
-
30
-
31
- # prepare an object for storage
32
- def self.dump(obj)
33
- Marshal.dump(obj)
34
- end
35
-
36
- # reload an object from storage
37
- def self.load(stored_obj)
38
- case stored_obj
39
- when Array
40
- stored_obj.map { |obj| load(obj) }
41
- when Hash
42
- stored_obj.inject({}) do |memo, (k, v)|
43
- memo[load(k)] = load(v)
44
- memo
45
- end
46
- else
47
- Marshal.load(stored_obj)
48
- end
49
- end
9
+ def_delegators :storage_instance,
10
+ :set, :get, :delete, :increment,
11
+ :map_result_push, :map_result_get,
12
+ :reduce_result_set, :reduce_result_get,
13
+ :map_keys_push, :map_keys_get
50
14
 
51
15
  private
52
16
 
53
- def self.storage
17
+ def self.storage_instance
54
18
  Bramble.config.storage
55
19
  end
56
20
  end
@@ -1,3 +1,3 @@
1
1
  module Bramble
2
- VERSION = "0.1.0"
2
+ VERSION = "0.2.0"
3
3
  end
data/lib/bramble.rb CHANGED
@@ -1,10 +1,15 @@
1
1
  require "ostruct"
2
2
  require "active_job"
3
+ require "bramble/base_job"
4
+ require "bramble/begin_job"
3
5
  require "bramble/keys"
4
6
  require "bramble/map"
5
7
  require "bramble/map_job"
6
8
  require "bramble/reduce"
7
9
  require "bramble/reduce_job"
10
+ require "bramble/result"
11
+ require "bramble/serialize"
12
+ require "bramble/state"
8
13
  require "bramble/storage"
9
14
  require "bramble/version"
10
15
  require "bramble/conf"
@@ -19,19 +24,27 @@ module Bramble
19
24
  end
20
25
 
21
26
  # @param handle [String] This string will be used to store the result
22
- # @param implementation [#map, #reduce, #name] The container of map and reduce methods
23
- # @param items [Array] List of items to map over
24
- def self.map_reduce(handle, implementation, items)
25
- Bramble::Map.perform(handle, implementation, items)
27
+ # @param implementation [.map, .reduce, .name, .items(options)] The container of map and reduce methods
28
+ # @param items_options [Object] will be passed to .items
29
+ def self.map_reduce(handle, implementation, items_options = {})
30
+ # Secret feature: the implementation can provide a job_id
31
+ job_id = if implementation.respond_to?(:job_id)
32
+ implementation.job_id
33
+ else
34
+ Time.now.strftime("%s%6N")
35
+ end
36
+ handle = "#{handle}:#{job_id}"
37
+ Bramble::State.start_job(handle)
38
+ Bramble::BeginJob.perform_later(handle, implementation.name, items_options)
26
39
  end
27
40
 
28
- # Get results for `handle`, if they exist
29
- def self.read(handle)
30
- Bramble::Storage.read(handle)
41
+ # @return [Bramble::Result] Status & data for this handle
42
+ def self.get(handle)
43
+ Bramble::Result.new(handle)
31
44
  end
32
45
 
33
46
  # Remove results for `handle`, if there are any
34
47
  def self.delete(handle)
35
- Bramble::Storage.delete(handle)
48
+ Bramble::State.clear_job(handle)
36
49
  end
37
50
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bramble
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Robert Mosolgo
@@ -103,12 +103,17 @@ extra_rdoc_files: []
103
103
  files:
104
104
  - README.md
105
105
  - lib/bramble.rb
106
+ - lib/bramble/base_job.rb
107
+ - lib/bramble/begin_job.rb
106
108
  - lib/bramble/conf.rb
107
109
  - lib/bramble/keys.rb
108
110
  - lib/bramble/map.rb
109
111
  - lib/bramble/map_job.rb
110
112
  - lib/bramble/reduce.rb
111
113
  - lib/bramble/reduce_job.rb
114
+ - lib/bramble/result.rb
115
+ - lib/bramble/serialize.rb
116
+ - lib/bramble/state.rb
112
117
  - lib/bramble/storage.rb
113
118
  - lib/bramble/storage/memory_storage.rb
114
119
  - lib/bramble/storage/redis_storage.rb