bramble 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 044266e293f683cdeabe0d4f9d7789d49324195c
4
+ data.tar.gz: 6b516203cd8fc97eb20810245a05d08a96c3e898
5
+ SHA512:
6
+ metadata.gz: 77f06d4145ce1ad5622cca85c687531ca3b76bcb9c94831d0015e15118cf34a110828de0cd0c2e706f403503f44b16f3ffb858713380090ad2bd99a08025b27c
7
+ data.tar.gz: d7b4d8f7c716d1a7b70a1fd4b57ac6be24c166494f2be03e0d86b1b25336bd542c56b5ad0adcc03bf359bacd6197764322f206e5575842213294d8f3d1ffc564
data/README.md ADDED
@@ -0,0 +1,76 @@
1
+ # Bramble [![Build Status](https://travis-ci.org/rmosolgo/bramble.svg?branch=master)](https://travis-ci.org/rmosolgo/bramble)
2
+
3
+ Map-reduce with ActiveJob
4
+
5
+ ## Usage
6
+
7
+ - Setup ActiveJob with a queue named `:bramble`
8
+
9
+ - Setup Redis and give Bramble a connection object:
10
+
11
+ ```ruby
12
+ my_redis_connection = Redis.new # Your connection settings here!
13
+ Bramble.config do |conf|
14
+ conf.redis_conn = my_redis_connection
15
+ end
16
+ ```
17
+
18
+ - Define a module with `map` and `reduce` functions:
19
+
20
+ ```ruby
21
+ module LetterCount
22
+ # .map is called with each item in the input
23
+ def self.map(word)
24
+ letters = word.upcase.each_char
25
+
26
+ # call `yield` to emit a key-value pair for processing
27
+ letters.each { |letter| yield(letter, 1) }
28
+ end
29
+
30
+ # .reduce is called with
31
+ # - `yield` key (first argument)
32
+ # - array of `yield` values (second argument)
33
+ def self.reduce(letter, observations)
34
+ # letter => "A"
35
+ # observations => [1, 1, 1, 1, 1]
36
+ observations.length
37
+ end
38
+ end
39
+ ```
40
+
41
+ - Start a job with a handle, module, and some data:
42
+
43
+ ```ruby
44
+ # used for fetching the result later:
45
+ handle = "shakespeare-letter-count"
46
+
47
+ # Something that responds to #each:
48
+ data = hamlet.split(" ")
49
+
50
+ # Begin the process:
51
+ Bramble.map_reduce(handle, LetterCount, words_in_hamlet)
52
+ ```
53
+
54
+ - Later, fetch the result using the handle:
55
+
56
+ ```ruby
57
+ result = Bramble.read("shakespeare-letter-count")
58
+ # { "A" => 100, "B" => 100, ... }
59
+ ```
60
+
61
+ - Delete the saved result:
62
+
63
+ ```ruby
64
+ Bramble.delete("shakespeare-letter-count")
65
+ ```
66
+
67
+ ## Todo
68
+
69
+ - Use `Storage` as gateway to `config.storage`
70
+ - Job convenience class?
71
+ - `.fetch` to find-or-calculate?
72
+ - Adapters: Memcache, ActiveRecord
73
+
74
+ ## Development
75
+
76
+ - `rake test`
data/lib/bramble.rb ADDED
@@ -0,0 +1,37 @@
1
+ require "ostruct"
2
+ require "active_job"
3
+ require "bramble/keys"
4
+ require "bramble/map"
5
+ require "bramble/map_job"
6
+ require "bramble/reduce"
7
+ require "bramble/reduce_job"
8
+ require "bramble/storage"
9
+ require "bramble/version"
10
+ require "bramble/conf"
11
+
12
+ module Bramble
13
+ def self.config
14
+ if block_given?
15
+ yield(Bramble::CONF)
16
+ else
17
+ Bramble::CONF
18
+ end
19
+ end
20
+
21
+ # @param handle [String] This string will be used to store the result
22
+ # @param implementation [#map, #reduce, #name] The container of map and reduce methods
23
+ # @param items [Array] List of items to map over
24
+ def self.map_reduce(handle, implementation, items)
25
+ Bramble::Map.perform(handle, implementation, items)
26
+ end
27
+
28
+ # Get results for `handle`, if they exist
29
+ def self.read(handle)
30
+ Bramble::Storage.read(handle)
31
+ end
32
+
33
+ # Remove results for `handle`, if there are any
34
+ def self.delete(handle)
35
+ Bramble::Storage.delete(handle)
36
+ end
37
+ end
@@ -0,0 +1,8 @@
1
+ module Bramble
2
+ CONF = OpenStruct.new(
3
+ redis_conn: nil,
4
+ namespace: "Bramble",
5
+ queue_as: :bramble,
6
+ storage: Bramble::Storage::RedisStorage
7
+ )
8
+ end
@@ -0,0 +1,27 @@
1
+ module Bramble
2
+ module Keys
3
+ def namespace(handle)
4
+ "#{Bramble.config.namespace}:#{handle}"
5
+ end
6
+
7
+ def data_key(handle, key)
8
+ "#{namespace(handle)}:data:#{key}"
9
+ end
10
+
11
+ def keys_key(handle)
12
+ "#{namespace(handle)}:keys"
13
+ end
14
+
15
+ def finished_count_key(handle)
16
+ "#{namespace(handle)}:finished_count"
17
+ end
18
+
19
+ def total_count_key(handle)
20
+ "#{namespace(handle)}:total_count"
21
+ end
22
+
23
+ def result_key(handle)
24
+ "#{namespace(handle)}:result"
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,38 @@
1
+ module Bramble
2
+ module Map
3
+ extend Bramble::Keys
4
+
5
+ module_function
6
+
7
+ def perform(handle, implementation, values)
8
+ # TODO: make sure there isn't one going on right now
9
+ Bramble::Storage.delete(handle)
10
+ storage.set(total_count_key(handle), values.length)
11
+ values.each do |value|
12
+ Bramble::MapJob.perform_later(handle, implementation.name, value)
13
+ end
14
+ end
15
+
16
+ def perform_map(handle, implementation, value)
17
+ impl_keys_key = keys_key(handle)
18
+ implementation.map(value) do |map_key, map_val|
19
+ raw_key = Bramble::Storage.dump(map_key)
20
+ storage.map_keys_push(impl_keys_key, raw_key)
21
+ storage.map_result_push(data_key(handle, raw_key), Bramble::Storage.dump(map_val))
22
+ end
23
+ finished = storage.increment(finished_count_key(handle))
24
+ total = storage.get(total_count_key(handle)).to_i
25
+ if finished == total
26
+ Bramble::Reduce.perform(handle, implementation)
27
+ end
28
+ end
29
+
30
+ private
31
+
32
+ module_function
33
+
34
+ def storage
35
+ Bramble.config.storage
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,9 @@
1
+ module Bramble
2
+ class MapJob < ActiveJob::Base
3
+ queue_as { Bramble.config.queue_as }
4
+ def perform(handle, mapper_name, value)
5
+ mapper = mapper_name.constantize
6
+ Bramble::Map.perform_map(handle, mapper, value)
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,29 @@
1
+ module Bramble
2
+ module Reduce
3
+ extend Bramble::Keys
4
+
5
+ module_function
6
+
7
+ def perform(handle, implementation)
8
+ all_raw_keys = storage.map_keys_get(keys_key(handle))
9
+ all_raw_keys.each do |raw_key|
10
+ Bramble::ReduceJob.perform_later(handle, implementation.name, raw_key)
11
+ end
12
+ end
13
+
14
+ def perform_reduce(handle, implementation, raw_key)
15
+ values = storage.map_result_get(data_key(handle, raw_key))
16
+ values = Bramble::Storage.load(values)
17
+ reduced_value = implementation.reduce(Bramble::Storage.load(raw_key), values)
18
+ storage.reduce_result_set(result_key(handle), raw_key, Bramble::Storage.dump(reduced_value))
19
+ end
20
+
21
+ private
22
+
23
+ module_function
24
+
25
+ def storage
26
+ Bramble.config.storage
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,9 @@
1
+ module Bramble
2
+ class ReduceJob < ActiveJob::Base
3
+ queue_as { Bramble.config.queue_as }
4
+ def perform(handle, reducer_name, key)
5
+ reducer = reducer_name.constantize
6
+ Bramble::Reduce.perform_reduce(handle, reducer, key)
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,57 @@
1
+ require "bramble/storage/redis_storage"
2
+ require "bramble/storage/memory_storage"
3
+
4
+ module Bramble
5
+ module Storage
6
+ extend Bramble::Keys
7
+
8
+ def self.read(handle)
9
+ key = result_key(handle)
10
+ results = storage.reduce_result_get(key)
11
+ load(results)
12
+ end
13
+
14
+ # Wipe out the results for this handle
15
+ def self.delete(handle)
16
+ # Reset counts
17
+ storage.delete(total_count_key(handle))
18
+ storage.delete(finished_count_key(handle))
19
+ # Reset result
20
+ storage.delete(result_key(handle))
21
+
22
+ # Reset dangling map data
23
+ map_group_keys = storage.map_keys_get(keys_key(handle))
24
+ map_group_keys.each do |group_key|
25
+ storage.delete(data_key(handle, group_key))
26
+ end
27
+ storage.delete(keys_key(handle))
28
+ end
29
+
30
+
31
+ # prepare an object for storage
32
+ def self.dump(obj)
33
+ Marshal.dump(obj)
34
+ end
35
+
36
+ # reload an object from storage
37
+ def self.load(stored_obj)
38
+ case stored_obj
39
+ when Array
40
+ stored_obj.map { |obj| load(obj) }
41
+ when Hash
42
+ stored_obj.inject({}) do |memo, (k, v)|
43
+ memo[load(k)] = load(v)
44
+ memo
45
+ end
46
+ else
47
+ Marshal.load(stored_obj)
48
+ end
49
+ end
50
+
51
+ private
52
+
53
+ def self.storage
54
+ Bramble.config.storage
55
+ end
56
+ end
57
+ end
@@ -0,0 +1,57 @@
1
+ require "set"
2
+
3
+ module Bramble
4
+ module Storage
5
+ # ☠ This is for single-threaded, single-process Ruby only!
6
+ # If you try to use this in production, you're going to have a bad time.
7
+ module MemoryStorage
8
+ STORAGE = {}
9
+
10
+ module_function
11
+
12
+ def set(key, value)
13
+ STORAGE[key] = value
14
+ end
15
+
16
+ def get(key)
17
+ STORAGE[key]
18
+ end
19
+
20
+ def delete(key)
21
+ STORAGE.delete(key)
22
+ end
23
+
24
+ def increment(key)
25
+ STORAGE[key] ||= 0
26
+ STORAGE[key] += 1
27
+ end
28
+
29
+ def map_result_push(key, value)
30
+ STORAGE[key] ||= []
31
+ STORAGE[key] << value
32
+ end
33
+
34
+ def map_result_get(key)
35
+ STORAGE[key] || []
36
+ end
37
+
38
+ def reduce_result_set(storage_key, reduce_key, value)
39
+ STORAGE[storage_key] ||= {}
40
+ STORAGE[storage_key][reduce_key] = value
41
+ end
42
+
43
+ def reduce_result_get(storage_key)
44
+ STORAGE[storage_key] || {}
45
+ end
46
+
47
+ def map_keys_push(key, value)
48
+ STORAGE[key] ||= Set.new
49
+ STORAGE[key] << value
50
+ end
51
+
52
+ def map_keys_get(key)
53
+ STORAGE[key] || Set.new
54
+ end
55
+ end
56
+ end
57
+ end
@@ -0,0 +1,55 @@
1
+ module Bramble
2
+ module Storage
3
+ module RedisStorage
4
+ module_function
5
+
6
+ def set(key, value)
7
+ redis_conn.set(key, value)
8
+ end
9
+
10
+ def get(key)
11
+ redis_conn.get(key)
12
+ end
13
+
14
+ def delete(key)
15
+ redis_conn.del(key)
16
+ end
17
+
18
+ def increment(key)
19
+ redis_conn.incr(key)
20
+ end
21
+
22
+ def map_result_push(key, value)
23
+ redis_conn.rpush(key, value)
24
+ end
25
+
26
+ def map_result_get(key)
27
+ redis_conn.lrange(key, 0, -1)
28
+ end
29
+
30
+ def reduce_result_set(storage_key, reduce_key, value)
31
+ redis_conn.hset(storage_key, reduce_key, value)
32
+ end
33
+
34
+ def reduce_result_get(storage_key)
35
+ redis_conn.hgetall(storage_key)
36
+ end
37
+
38
+ def map_keys_push(key, value)
39
+ redis_conn.sadd(key, value)
40
+ end
41
+
42
+ def map_keys_get(key)
43
+ redis_conn.smembers(key)
44
+ end
45
+
46
+ private
47
+
48
+ module_function
49
+
50
+ def redis_conn
51
+ Bramble.config.redis_conn
52
+ end
53
+ end
54
+ end
55
+ end
@@ -0,0 +1,3 @@
1
+ module Bramble
2
+ VERSION = "0.1.0"
3
+ end
metadata ADDED
@@ -0,0 +1,140 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: bramble
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Robert Mosolgo
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2016-05-13 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: activejob
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: minitest
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: minitest-focus
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: minitest-reporters
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: rake
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: redis
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ">="
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
97
+ description: Distribute map-reduce tasks with ActiveJob, storing the results in Redis
98
+ (or another backend)
99
+ email: rdmosolgo@gmail.com
100
+ executables: []
101
+ extensions: []
102
+ extra_rdoc_files: []
103
+ files:
104
+ - README.md
105
+ - lib/bramble.rb
106
+ - lib/bramble/conf.rb
107
+ - lib/bramble/keys.rb
108
+ - lib/bramble/map.rb
109
+ - lib/bramble/map_job.rb
110
+ - lib/bramble/reduce.rb
111
+ - lib/bramble/reduce_job.rb
112
+ - lib/bramble/storage.rb
113
+ - lib/bramble/storage/memory_storage.rb
114
+ - lib/bramble/storage/redis_storage.rb
115
+ - lib/bramble/version.rb
116
+ homepage: https://github.com/rmosolgo/bramble
117
+ licenses:
118
+ - MIT
119
+ metadata: {}
120
+ post_install_message:
121
+ rdoc_options: []
122
+ require_paths:
123
+ - lib
124
+ required_ruby_version: !ruby/object:Gem::Requirement
125
+ requirements:
126
+ - - ">="
127
+ - !ruby/object:Gem::Version
128
+ version: 2.0.0
129
+ required_rubygems_version: !ruby/object:Gem::Requirement
130
+ requirements:
131
+ - - ">="
132
+ - !ruby/object:Gem::Version
133
+ version: '0'
134
+ requirements: []
135
+ rubyforge_project:
136
+ rubygems_version: 2.5.1
137
+ signing_key:
138
+ specification_version: 4
139
+ summary: Map-reduce, backed by ActiveJob
140
+ test_files: []