bramble 0.1.0 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 044266e293f683cdeabe0d4f9d7789d49324195c
4
- data.tar.gz: 6b516203cd8fc97eb20810245a05d08a96c3e898
3
+ metadata.gz: c78abeda625df09bdbbfbaa059768757a54380f9
4
+ data.tar.gz: b641d188443171568abe011b22fc5b3ccf7083e6
5
5
  SHA512:
6
- metadata.gz: 77f06d4145ce1ad5622cca85c687531ca3b76bcb9c94831d0015e15118cf34a110828de0cd0c2e706f403503f44b16f3ffb858713380090ad2bd99a08025b27c
7
- data.tar.gz: d7b4d8f7c716d1a7b70a1fd4b57ac6be24c166494f2be03e0d86b1b25336bd542c56b5ad0adcc03bf359bacd6197764322f206e5575842213294d8f3d1ffc564
6
+ metadata.gz: 3d5e01178d77d2cd06c99634318835143dbdcb3ba203ecc6477cb7bb60d32019521d531660bc1a8f6e7eab258d0d55ec617113ee61e112c3e31a74a9094b655a
7
+ data.tar.gz: a54391b20995074b7b16382d75823d247f9fb267f27bae2804db4ed5e97c3fc2fbd331cbf666e8a5d42817aafabe8a26c3f194148bf29cc6010876cd2763c4d4
data/README.md CHANGED
@@ -15,10 +15,15 @@ Map-reduce with ActiveJob
15
15
  end
16
16
  ```
17
17
 
18
- - Define a module with `map` and `reduce` functions:
18
+ - Define a module with `map`, `reduce` and `items(options = {})` functions:
19
19
 
20
20
  ```ruby
21
21
  module LetterCount
22
+ # Generate a list of items based on some input
23
+ def self.items(filepath)
24
+ File.read(filepath).split(" ")
25
+ end
26
+
22
27
  # .map is called with each item in the input
23
28
  def self.map(word)
24
29
  letters = word.upcase.each_char
@@ -38,24 +43,26 @@ Map-reduce with ActiveJob
38
43
  end
39
44
  ```
40
45
 
41
- - Start a job with a handle, module, and some data:
46
+ - Start a job with a handle, module, and an (optional) argument for finding data:
42
47
 
43
48
  ```ruby
44
49
  # used for fetching the result later:
45
50
  handle = "shakespeare-letter-count"
46
51
 
47
- # Something that responds to #each:
48
- data = hamlet.split(" ")
52
+ # will be sent to `.items(filepath)`
53
+ hamlet_path = "./shakespeare/hamlet.txt"
49
54
 
50
55
  # Begin the process:
51
- Bramble.map_reduce(handle, LetterCount, words_in_hamlet)
56
+ Bramble.map_reduce(handle, LetterCount, hamlet_path)
52
57
  ```
53
58
 
54
59
  - Later, fetch the result using the handle:
55
60
 
56
61
  ```ruby
57
- result = Bramble.read("shakespeare-letter-count")
58
- # { "A" => 100, "B" => 100, ... }
62
+ result = Bramble.get("shakespeare-letter-count")
63
+ result.running? # => false
64
+ result.finished? # => true
65
+ result.data # => { "A" => 100, "B" => 100, ... }
59
66
  ```
60
67
 
61
68
  - Delete the saved result:
@@ -66,8 +73,6 @@ Map-reduce with ActiveJob
66
73
 
67
74
  ## Todo
68
75
 
69
- - Use `Storage` as gateway to `config.storage`
70
- - Job convenience class?
71
76
  - `.fetch` to find-or-calculate?
72
77
  - Adapters: Memcache, ActiveRecord
73
78
 
@@ -0,0 +1,5 @@
1
+ module Bramble
2
+ class BaseJob < ActiveJob::Base
3
+ queue_as { Bramble.config.queue_as }
4
+ end
5
+ end
@@ -0,0 +1,13 @@
1
+ module Bramble
2
+ # This exists to call `implementation.items` in the background.
3
+ # It might take a long time to fetch, so let's background it.
4
+ #
5
+ # Then it starts the map-reduce job.
6
+ class BeginJob < Bramble::BaseJob
7
+ def perform(handle, implementation_name, items_options)
8
+ implementation = implementation_name.constantize
9
+ all_items = implementation.items(items_options)
10
+ Bramble::Map.perform(handle, implementation, all_items)
11
+ end
12
+ end
13
+ end
data/lib/bramble/keys.rb CHANGED
@@ -1,5 +1,7 @@
1
1
  module Bramble
2
2
  module Keys
3
+ module_function
4
+
3
5
  def namespace(handle)
4
6
  "#{Bramble.config.namespace}:#{handle}"
5
7
  end
@@ -12,8 +14,12 @@ module Bramble
12
14
  "#{namespace(handle)}:keys"
13
15
  end
14
16
 
15
- def finished_count_key(handle)
16
- "#{namespace(handle)}:finished_count"
17
+ def map_finished_count_key(handle)
18
+ "#{namespace(handle)}:map_finished_count"
19
+ end
20
+
21
+ def reduce_finished_count_key(handle)
22
+ "#{namespace(handle)}:reduce_finished_count"
17
23
  end
18
24
 
19
25
  def total_count_key(handle)
@@ -23,5 +29,13 @@ module Bramble
23
29
  def result_key(handle)
24
30
  "#{namespace(handle)}:result"
25
31
  end
32
+
33
+ def job_id_key(handle)
34
+ "#{namespace(handle)}:job_id"
35
+ end
36
+
37
+ def status_key(handle)
38
+ "#{namespace(handle)}:status"
39
+ end
26
40
  end
27
41
  end
data/lib/bramble/map.rb CHANGED
@@ -5,25 +5,32 @@ module Bramble
5
5
  module_function
6
6
 
7
7
  def perform(handle, implementation, values)
8
- # TODO: make sure there isn't one going on right now
9
- Bramble::Storage.delete(handle)
10
- storage.set(total_count_key(handle), values.length)
11
- values.each do |value|
12
- Bramble::MapJob.perform_later(handle, implementation.name, value)
8
+ Bramble::State.running?(handle) do
9
+ storage.set(total_count_key(handle), values.length)
10
+ values.each do |value|
11
+ Bramble::MapJob.perform_later(handle, implementation.name, Bramble::Serialize.dump(value))
12
+ end
13
13
  end
14
14
  end
15
15
 
16
16
  def perform_map(handle, implementation, value)
17
- impl_keys_key = keys_key(handle)
18
- implementation.map(value) do |map_key, map_val|
19
- raw_key = Bramble::Storage.dump(map_key)
20
- storage.map_keys_push(impl_keys_key, raw_key)
21
- storage.map_result_push(data_key(handle, raw_key), Bramble::Storage.dump(map_val))
22
- end
23
- finished = storage.increment(finished_count_key(handle))
24
- total = storage.get(total_count_key(handle)).to_i
25
- if finished == total
26
- Bramble::Reduce.perform(handle, implementation)
17
+ Bramble::State.running?(handle) do
18
+ impl_keys_key = keys_key(handle)
19
+ implementation.map(value) do |map_key, map_val|
20
+ Bramble::State.running?(handle) do
21
+ raw_key = Bramble::Serialize.dump(map_key)
22
+ storage.map_keys_push(impl_keys_key, raw_key)
23
+ storage.map_result_push(data_key(handle, raw_key), Bramble::Serialize.dump(map_val))
24
+ end
25
+ end
26
+ Bramble::State.running?(handle) do
27
+ finished = storage.increment(map_finished_count_key(handle))
28
+ total = storage.get(total_count_key(handle)).to_i
29
+ if finished == total
30
+ Bramble::Reduce.perform(handle, implementation)
31
+ Bramble::State.clear_map(handle)
32
+ end
33
+ end
27
34
  end
28
35
  end
29
36
 
@@ -32,7 +39,7 @@ module Bramble
32
39
  module_function
33
40
 
34
41
  def storage
35
- Bramble.config.storage
42
+ Bramble::Storage
36
43
  end
37
44
  end
38
45
  end
@@ -1,8 +1,8 @@
1
1
  module Bramble
2
- class MapJob < ActiveJob::Base
3
- queue_as { Bramble.config.queue_as }
4
- def perform(handle, mapper_name, value)
2
+ class MapJob < Bramble::BaseJob
3
+ def perform(handle, mapper_name, raw_value)
5
4
  mapper = mapper_name.constantize
5
+ value = Bramble::Serialize.load(raw_value)
6
6
  Bramble::Map.perform_map(handle, mapper, value)
7
7
  end
8
8
  end
@@ -12,10 +12,18 @@ module Bramble
12
12
  end
13
13
 
14
14
  def perform_reduce(handle, implementation, raw_key)
15
- values = storage.map_result_get(data_key(handle, raw_key))
16
- values = Bramble::Storage.load(values)
17
- reduced_value = implementation.reduce(Bramble::Storage.load(raw_key), values)
18
- storage.reduce_result_set(result_key(handle), raw_key, Bramble::Storage.dump(reduced_value))
15
+ if Bramble::State.running?(handle)
16
+ raw_values = storage.map_result_get(data_key(handle, raw_key))
17
+ values = Bramble::Serialize.load(raw_values)
18
+ key = Bramble::Serialize.load(raw_key)
19
+ reduced_value = implementation.reduce(key, values)
20
+ Bramble::State.running?(handle) do
21
+ storage.reduce_result_set(result_key(handle), raw_key, Bramble::Serialize.dump(reduced_value))
22
+ storage.increment(reduce_finished_count_key(handle))
23
+ end
24
+ else
25
+ Bramble::State.clear_reduce(handle)
26
+ end
19
27
  end
20
28
 
21
29
  private
@@ -23,7 +31,7 @@ module Bramble
23
31
  module_function
24
32
 
25
33
  def storage
26
- Bramble.config.storage
34
+ Bramble::Storage
27
35
  end
28
36
  end
29
37
  end
@@ -1,9 +1,8 @@
1
1
  module Bramble
2
- class ReduceJob < ActiveJob::Base
3
- queue_as { Bramble.config.queue_as }
4
- def perform(handle, reducer_name, key)
5
- reducer = reducer_name.constantize
6
- Bramble::Reduce.perform_reduce(handle, reducer, key)
2
+ class ReduceJob < Bramble::BaseJob
3
+ def perform(handle, implementation_name, key)
4
+ implementation = implementation_name.constantize
5
+ Bramble::Reduce.perform_reduce(handle, implementation, key)
7
6
  end
8
7
  end
9
8
  end
@@ -0,0 +1,46 @@
1
+ module Bramble
2
+ # This class exposes the data and some info about the state of the task
3
+ class Result
4
+
5
+ attr_reader :handle
6
+
7
+ def initialize(handle)
8
+ job_id = storage.get(Bramble::Keys.job_id_key(handle))
9
+ @handle = "#{handle}:#{job_id}"
10
+ end
11
+
12
+ def data
13
+ @data ||= begin
14
+ key = Bramble::Keys.result_key(handle)
15
+ results = storage.reduce_result_get(key)
16
+ Bramble::Serialize.load(results)
17
+ end
18
+ end
19
+
20
+ def finished?
21
+ @finished ||= finished_count > 0 && total_count == finished_count
22
+ end
23
+
24
+ def running?
25
+ @running ||= started? && !finished?
26
+ end
27
+
28
+ private
29
+
30
+ def total_count
31
+ @total_count ||= storage.get(Bramble::Keys.total_count_key(handle)).to_i
32
+ end
33
+
34
+ def finished_count
35
+ @finished_count ||= storage.get(Bramble::Keys.reduce_finished_count_key(handle)).to_i
36
+ end
37
+
38
+ def started?
39
+ @started ||= !!storage.get(Bramble::Keys.status_key(handle))
40
+ end
41
+
42
+ def storage
43
+ Bramble::Storage
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,24 @@
1
+ module Bramble
2
+ # eg, Redis uses strings only, so use this module to freeze and thaw values from storage
3
+ module Serialize
4
+ # prepare an object for storage
5
+ def self.dump(obj)
6
+ Marshal.dump(obj)
7
+ end
8
+
9
+ # reload an object from storage
10
+ def self.load(stored_obj)
11
+ case stored_obj
12
+ when Array
13
+ stored_obj.map { |obj| load(obj) }
14
+ when Hash
15
+ stored_obj.inject({}) do |memo, (k, v)|
16
+ memo[load(k)] = load(v)
17
+ memo
18
+ end
19
+ else
20
+ Marshal.load(stored_obj)
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,56 @@
1
+ module Bramble
2
+ module State
3
+ extend Bramble::Keys
4
+ SEPARATOR = ":"
5
+ module_function
6
+
7
+ # Run the block and return true if the `job_id` is still active
8
+ def running?(handle)
9
+ handle_name, job_id = handle.split(SEPARATOR)
10
+ is_running = storage.get(job_id_key(handle_name)) == job_id
11
+ if block_given?
12
+ yield
13
+ end
14
+ is_running
15
+ end
16
+
17
+ def start_job(handle)
18
+ handle_name, job_id = handle.split(SEPARATOR)
19
+ previous_job_id = storage.get(job_id_key(handle_name))
20
+ if previous_job_id
21
+ clear_job("#{handle_name}:#{previous_job_id}")
22
+ end
23
+ storage.set(status_key(handle), "started")
24
+ storage.set(job_id_key(handle_name), job_id)
25
+ end
26
+
27
+ def clear_job(handle)
28
+ handle_name, job_id = handle.split(SEPARATOR)
29
+ storage.delete(job_id_key(handle_name))
30
+ storage.delete(status_key(handle))
31
+ clear_reduce(handle)
32
+ clear_map(handle)
33
+ end
34
+
35
+ def clear_map(handle)
36
+ map_group_keys = storage.map_keys_get(keys_key(handle))
37
+ map_group_keys.each do |group_key|
38
+ storage.delete(data_key(handle, group_key))
39
+ end
40
+ storage.delete(keys_key(handle))
41
+ storage.delete(map_finished_count_key(handle))
42
+ end
43
+
44
+ def clear_reduce(handle)
45
+ storage.delete(total_count_key(handle))
46
+ storage.delete(reduce_finished_count_key(handle))
47
+ storage.delete(result_key(handle))
48
+ end
49
+
50
+ private
51
+
52
+ def self.storage
53
+ Bramble::Storage
54
+ end
55
+ end
56
+ end
@@ -9,6 +9,10 @@ module Bramble
9
9
 
10
10
  module_function
11
11
 
12
+ def transaction
13
+ yield
14
+ end
15
+
12
16
  def set(key, value)
13
17
  STORAGE[key] = value
14
18
  end
@@ -1,56 +1,20 @@
1
+ require "forwardable"
1
2
  require "bramble/storage/redis_storage"
2
3
  require "bramble/storage/memory_storage"
3
4
 
4
5
  module Bramble
5
6
  module Storage
6
- extend Bramble::Keys
7
+ extend SingleForwardable
7
8
 
8
- def self.read(handle)
9
- key = result_key(handle)
10
- results = storage.reduce_result_get(key)
11
- load(results)
12
- end
13
-
14
- # Wipe out the results for this handle
15
- def self.delete(handle)
16
- # Reset counts
17
- storage.delete(total_count_key(handle))
18
- storage.delete(finished_count_key(handle))
19
- # Reset result
20
- storage.delete(result_key(handle))
21
-
22
- # Reset dangling map data
23
- map_group_keys = storage.map_keys_get(keys_key(handle))
24
- map_group_keys.each do |group_key|
25
- storage.delete(data_key(handle, group_key))
26
- end
27
- storage.delete(keys_key(handle))
28
- end
29
-
30
-
31
- # prepare an object for storage
32
- def self.dump(obj)
33
- Marshal.dump(obj)
34
- end
35
-
36
- # reload an object from storage
37
- def self.load(stored_obj)
38
- case stored_obj
39
- when Array
40
- stored_obj.map { |obj| load(obj) }
41
- when Hash
42
- stored_obj.inject({}) do |memo, (k, v)|
43
- memo[load(k)] = load(v)
44
- memo
45
- end
46
- else
47
- Marshal.load(stored_obj)
48
- end
49
- end
9
+ def_delegators :storage_instance,
10
+ :set, :get, :delete, :increment,
11
+ :map_result_push, :map_result_get,
12
+ :reduce_result_set, :reduce_result_get,
13
+ :map_keys_push, :map_keys_get
50
14
 
51
15
  private
52
16
 
53
- def self.storage
17
+ def self.storage_instance
54
18
  Bramble.config.storage
55
19
  end
56
20
  end
@@ -1,3 +1,3 @@
1
1
  module Bramble
2
- VERSION = "0.1.0"
2
+ VERSION = "0.2.0"
3
3
  end
data/lib/bramble.rb CHANGED
@@ -1,10 +1,15 @@
1
1
  require "ostruct"
2
2
  require "active_job"
3
+ require "bramble/base_job"
4
+ require "bramble/begin_job"
3
5
  require "bramble/keys"
4
6
  require "bramble/map"
5
7
  require "bramble/map_job"
6
8
  require "bramble/reduce"
7
9
  require "bramble/reduce_job"
10
+ require "bramble/result"
11
+ require "bramble/serialize"
12
+ require "bramble/state"
8
13
  require "bramble/storage"
9
14
  require "bramble/version"
10
15
  require "bramble/conf"
@@ -19,19 +24,27 @@ module Bramble
19
24
  end
20
25
 
21
26
  # @param handle [String] This string will be used to store the result
22
- # @param implementation [#map, #reduce, #name] The container of map and reduce methods
23
- # @param items [Array] List of items to map over
24
- def self.map_reduce(handle, implementation, items)
25
- Bramble::Map.perform(handle, implementation, items)
27
+ # @param implementation [.map, .reduce, .name, .items(options)] The container of map and reduce methods
28
+ # @param items_options [Object] will be passed to .items
29
+ def self.map_reduce(handle, implementation, items_options = {})
30
+ # Secret feature: the implementation can provide a job_id
31
+ job_id = if implementation.respond_to?(:job_id)
32
+ implementation.job_id
33
+ else
34
+ Time.now.strftime("%s%6N")
35
+ end
36
+ handle = "#{handle}:#{job_id}"
37
+ Bramble::State.start_job(handle)
38
+ Bramble::BeginJob.perform_later(handle, implementation.name, items_options)
26
39
  end
27
40
 
28
- # Get results for `handle`, if they exist
29
- def self.read(handle)
30
- Bramble::Storage.read(handle)
41
+ # @return [Bramble::Result] Status & data for this handle
42
+ def self.get(handle)
43
+ Bramble::Result.new(handle)
31
44
  end
32
45
 
33
46
  # Remove results for `handle`, if there are any
34
47
  def self.delete(handle)
35
- Bramble::Storage.delete(handle)
48
+ Bramble::State.clear_job(handle)
36
49
  end
37
50
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bramble
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Robert Mosolgo
@@ -103,12 +103,17 @@ extra_rdoc_files: []
103
103
  files:
104
104
  - README.md
105
105
  - lib/bramble.rb
106
+ - lib/bramble/base_job.rb
107
+ - lib/bramble/begin_job.rb
106
108
  - lib/bramble/conf.rb
107
109
  - lib/bramble/keys.rb
108
110
  - lib/bramble/map.rb
109
111
  - lib/bramble/map_job.rb
110
112
  - lib/bramble/reduce.rb
111
113
  - lib/bramble/reduce_job.rb
114
+ - lib/bramble/result.rb
115
+ - lib/bramble/serialize.rb
116
+ - lib/bramble/state.rb
112
117
  - lib/bramble/storage.rb
113
118
  - lib/bramble/storage/memory_storage.rb
114
119
  - lib/bramble/storage/redis_storage.rb