bramble 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/README.md +76 -0
- data/lib/bramble.rb +37 -0
- data/lib/bramble/conf.rb +8 -0
- data/lib/bramble/keys.rb +27 -0
- data/lib/bramble/map.rb +38 -0
- data/lib/bramble/map_job.rb +9 -0
- data/lib/bramble/reduce.rb +29 -0
- data/lib/bramble/reduce_job.rb +9 -0
- data/lib/bramble/storage.rb +57 -0
- data/lib/bramble/storage/memory_storage.rb +57 -0
- data/lib/bramble/storage/redis_storage.rb +55 -0
- data/lib/bramble/version.rb +3 -0
- metadata +140 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 044266e293f683cdeabe0d4f9d7789d49324195c
|
4
|
+
data.tar.gz: 6b516203cd8fc97eb20810245a05d08a96c3e898
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 77f06d4145ce1ad5622cca85c687531ca3b76bcb9c94831d0015e15118cf34a110828de0cd0c2e706f403503f44b16f3ffb858713380090ad2bd99a08025b27c
|
7
|
+
data.tar.gz: d7b4d8f7c716d1a7b70a1fd4b57ac6be24c166494f2be03e0d86b1b25336bd542c56b5ad0adcc03bf359bacd6197764322f206e5575842213294d8f3d1ffc564
|
data/README.md
ADDED
@@ -0,0 +1,76 @@
|
|
1
|
+
# Bramble [](https://travis-ci.org/rmosolgo/bramble)
|
2
|
+
|
3
|
+
Map-reduce with ActiveJob
|
4
|
+
|
5
|
+
## Usage
|
6
|
+
|
7
|
+
- Setup ActiveJob with a queue named `:bramble`
|
8
|
+
|
9
|
+
- Setup Redis and give Bramble a connection object:
|
10
|
+
|
11
|
+
```ruby
|
12
|
+
my_redis_connection = Redis.new # Your connection settings here!
|
13
|
+
Bramble.config do |conf|
|
14
|
+
conf.redis_conn = my_redis_connection
|
15
|
+
end
|
16
|
+
```
|
17
|
+
|
18
|
+
- Define a module with `map` and `reduce` functions:
|
19
|
+
|
20
|
+
```ruby
|
21
|
+
module LetterCount
|
22
|
+
# .map is called with each item in the input
|
23
|
+
def self.map(word)
|
24
|
+
letters = word.upcase.each_char
|
25
|
+
|
26
|
+
# call `yield` to emit a key-value pair for processing
|
27
|
+
letters.each { |letter| yield(letter, 1) }
|
28
|
+
end
|
29
|
+
|
30
|
+
# .reduce is called with
|
31
|
+
# - `yield` key (first argument)
|
32
|
+
# - array of `yield` values (second argument)
|
33
|
+
def self.reduce(letter, observations)
|
34
|
+
# letter => "A"
|
35
|
+
# observations => [1, 1, 1, 1, 1]
|
36
|
+
observations.length
|
37
|
+
end
|
38
|
+
end
|
39
|
+
```
|
40
|
+
|
41
|
+
- Start a job with a handle, module, and some data:
|
42
|
+
|
43
|
+
```ruby
|
44
|
+
# used for fetching the result later:
|
45
|
+
handle = "shakespeare-letter-count"
|
46
|
+
|
47
|
+
# Something that responds to #each:
|
48
|
+
data = hamlet.split(" ")
|
49
|
+
|
50
|
+
# Begin the process:
|
51
|
+
Bramble.map_reduce(handle, LetterCount, words_in_hamlet)
|
52
|
+
```
|
53
|
+
|
54
|
+
- Later, fetch the result using the handle:
|
55
|
+
|
56
|
+
```ruby
|
57
|
+
result = Bramble.read("shakespeare-letter-count")
|
58
|
+
# { "A" => 100, "B" => 100, ... }
|
59
|
+
```
|
60
|
+
|
61
|
+
- Delete the saved result:
|
62
|
+
|
63
|
+
```ruby
|
64
|
+
Bramble.delete("shakespeare-letter-count")
|
65
|
+
```
|
66
|
+
|
67
|
+
## Todo
|
68
|
+
|
69
|
+
- Use `Storage` as gateway to `config.storage`
|
70
|
+
- Job convenience class?
|
71
|
+
- `.fetch` to find-or-calculate?
|
72
|
+
- Adapters: Memcache, ActiveRecord
|
73
|
+
|
74
|
+
## Development
|
75
|
+
|
76
|
+
- `rake test`
|
data/lib/bramble.rb
ADDED
@@ -0,0 +1,37 @@
|
|
1
|
+
require "ostruct"
|
2
|
+
require "active_job"
|
3
|
+
require "bramble/keys"
|
4
|
+
require "bramble/map"
|
5
|
+
require "bramble/map_job"
|
6
|
+
require "bramble/reduce"
|
7
|
+
require "bramble/reduce_job"
|
8
|
+
require "bramble/storage"
|
9
|
+
require "bramble/version"
|
10
|
+
require "bramble/conf"
|
11
|
+
|
12
|
+
module Bramble
|
13
|
+
def self.config
|
14
|
+
if block_given?
|
15
|
+
yield(Bramble::CONF)
|
16
|
+
else
|
17
|
+
Bramble::CONF
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
# @param handle [String] This string will be used to store the result
|
22
|
+
# @param implementation [#map, #reduce, #name] The container of map and reduce methods
|
23
|
+
# @param items [Array] List of items to map over
|
24
|
+
def self.map_reduce(handle, implementation, items)
|
25
|
+
Bramble::Map.perform(handle, implementation, items)
|
26
|
+
end
|
27
|
+
|
28
|
+
# Get results for `handle`, if they exist
|
29
|
+
def self.read(handle)
|
30
|
+
Bramble::Storage.read(handle)
|
31
|
+
end
|
32
|
+
|
33
|
+
# Remove results for `handle`, if there are any
|
34
|
+
def self.delete(handle)
|
35
|
+
Bramble::Storage.delete(handle)
|
36
|
+
end
|
37
|
+
end
|
data/lib/bramble/conf.rb
ADDED
data/lib/bramble/keys.rb
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
module Bramble
|
2
|
+
module Keys
|
3
|
+
def namespace(handle)
|
4
|
+
"#{Bramble.config.namespace}:#{handle}"
|
5
|
+
end
|
6
|
+
|
7
|
+
def data_key(handle, key)
|
8
|
+
"#{namespace(handle)}:data:#{key}"
|
9
|
+
end
|
10
|
+
|
11
|
+
def keys_key(handle)
|
12
|
+
"#{namespace(handle)}:keys"
|
13
|
+
end
|
14
|
+
|
15
|
+
def finished_count_key(handle)
|
16
|
+
"#{namespace(handle)}:finished_count"
|
17
|
+
end
|
18
|
+
|
19
|
+
def total_count_key(handle)
|
20
|
+
"#{namespace(handle)}:total_count"
|
21
|
+
end
|
22
|
+
|
23
|
+
def result_key(handle)
|
24
|
+
"#{namespace(handle)}:result"
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
data/lib/bramble/map.rb
ADDED
@@ -0,0 +1,38 @@
|
|
1
|
+
module Bramble
|
2
|
+
module Map
|
3
|
+
extend Bramble::Keys
|
4
|
+
|
5
|
+
module_function
|
6
|
+
|
7
|
+
def perform(handle, implementation, values)
|
8
|
+
# TODO: make sure there isn't one going on right now
|
9
|
+
Bramble::Storage.delete(handle)
|
10
|
+
storage.set(total_count_key(handle), values.length)
|
11
|
+
values.each do |value|
|
12
|
+
Bramble::MapJob.perform_later(handle, implementation.name, value)
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
def perform_map(handle, implementation, value)
|
17
|
+
impl_keys_key = keys_key(handle)
|
18
|
+
implementation.map(value) do |map_key, map_val|
|
19
|
+
raw_key = Bramble::Storage.dump(map_key)
|
20
|
+
storage.map_keys_push(impl_keys_key, raw_key)
|
21
|
+
storage.map_result_push(data_key(handle, raw_key), Bramble::Storage.dump(map_val))
|
22
|
+
end
|
23
|
+
finished = storage.increment(finished_count_key(handle))
|
24
|
+
total = storage.get(total_count_key(handle)).to_i
|
25
|
+
if finished == total
|
26
|
+
Bramble::Reduce.perform(handle, implementation)
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
private
|
31
|
+
|
32
|
+
module_function
|
33
|
+
|
34
|
+
def storage
|
35
|
+
Bramble.config.storage
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
module Bramble
|
2
|
+
module Reduce
|
3
|
+
extend Bramble::Keys
|
4
|
+
|
5
|
+
module_function
|
6
|
+
|
7
|
+
def perform(handle, implementation)
|
8
|
+
all_raw_keys = storage.map_keys_get(keys_key(handle))
|
9
|
+
all_raw_keys.each do |raw_key|
|
10
|
+
Bramble::ReduceJob.perform_later(handle, implementation.name, raw_key)
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
def perform_reduce(handle, implementation, raw_key)
|
15
|
+
values = storage.map_result_get(data_key(handle, raw_key))
|
16
|
+
values = Bramble::Storage.load(values)
|
17
|
+
reduced_value = implementation.reduce(Bramble::Storage.load(raw_key), values)
|
18
|
+
storage.reduce_result_set(result_key(handle), raw_key, Bramble::Storage.dump(reduced_value))
|
19
|
+
end
|
20
|
+
|
21
|
+
private
|
22
|
+
|
23
|
+
module_function
|
24
|
+
|
25
|
+
def storage
|
26
|
+
Bramble.config.storage
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,57 @@
|
|
1
|
+
require "bramble/storage/redis_storage"
|
2
|
+
require "bramble/storage/memory_storage"
|
3
|
+
|
4
|
+
module Bramble
|
5
|
+
module Storage
|
6
|
+
extend Bramble::Keys
|
7
|
+
|
8
|
+
def self.read(handle)
|
9
|
+
key = result_key(handle)
|
10
|
+
results = storage.reduce_result_get(key)
|
11
|
+
load(results)
|
12
|
+
end
|
13
|
+
|
14
|
+
# Wipe out the results for this handle
|
15
|
+
def self.delete(handle)
|
16
|
+
# Reset counts
|
17
|
+
storage.delete(total_count_key(handle))
|
18
|
+
storage.delete(finished_count_key(handle))
|
19
|
+
# Reset result
|
20
|
+
storage.delete(result_key(handle))
|
21
|
+
|
22
|
+
# Reset dangling map data
|
23
|
+
map_group_keys = storage.map_keys_get(keys_key(handle))
|
24
|
+
map_group_keys.each do |group_key|
|
25
|
+
storage.delete(data_key(handle, group_key))
|
26
|
+
end
|
27
|
+
storage.delete(keys_key(handle))
|
28
|
+
end
|
29
|
+
|
30
|
+
|
31
|
+
# prepare an object for storage
|
32
|
+
def self.dump(obj)
|
33
|
+
Marshal.dump(obj)
|
34
|
+
end
|
35
|
+
|
36
|
+
# reload an object from storage
|
37
|
+
def self.load(stored_obj)
|
38
|
+
case stored_obj
|
39
|
+
when Array
|
40
|
+
stored_obj.map { |obj| load(obj) }
|
41
|
+
when Hash
|
42
|
+
stored_obj.inject({}) do |memo, (k, v)|
|
43
|
+
memo[load(k)] = load(v)
|
44
|
+
memo
|
45
|
+
end
|
46
|
+
else
|
47
|
+
Marshal.load(stored_obj)
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
private
|
52
|
+
|
53
|
+
def self.storage
|
54
|
+
Bramble.config.storage
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
@@ -0,0 +1,57 @@
|
|
1
|
+
require "set"
|
2
|
+
|
3
|
+
module Bramble
|
4
|
+
module Storage
|
5
|
+
# ☠ This is for single-threaded, single-process Ruby only!
|
6
|
+
# If you try to use this in production, you're going to have a bad time.
|
7
|
+
module MemoryStorage
|
8
|
+
STORAGE = {}
|
9
|
+
|
10
|
+
module_function
|
11
|
+
|
12
|
+
def set(key, value)
|
13
|
+
STORAGE[key] = value
|
14
|
+
end
|
15
|
+
|
16
|
+
def get(key)
|
17
|
+
STORAGE[key]
|
18
|
+
end
|
19
|
+
|
20
|
+
def delete(key)
|
21
|
+
STORAGE.delete(key)
|
22
|
+
end
|
23
|
+
|
24
|
+
def increment(key)
|
25
|
+
STORAGE[key] ||= 0
|
26
|
+
STORAGE[key] += 1
|
27
|
+
end
|
28
|
+
|
29
|
+
def map_result_push(key, value)
|
30
|
+
STORAGE[key] ||= []
|
31
|
+
STORAGE[key] << value
|
32
|
+
end
|
33
|
+
|
34
|
+
def map_result_get(key)
|
35
|
+
STORAGE[key] || []
|
36
|
+
end
|
37
|
+
|
38
|
+
def reduce_result_set(storage_key, reduce_key, value)
|
39
|
+
STORAGE[storage_key] ||= {}
|
40
|
+
STORAGE[storage_key][reduce_key] = value
|
41
|
+
end
|
42
|
+
|
43
|
+
def reduce_result_get(storage_key)
|
44
|
+
STORAGE[storage_key] || {}
|
45
|
+
end
|
46
|
+
|
47
|
+
def map_keys_push(key, value)
|
48
|
+
STORAGE[key] ||= Set.new
|
49
|
+
STORAGE[key] << value
|
50
|
+
end
|
51
|
+
|
52
|
+
def map_keys_get(key)
|
53
|
+
STORAGE[key] || Set.new
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
@@ -0,0 +1,55 @@
|
|
1
|
+
module Bramble
|
2
|
+
module Storage
|
3
|
+
module RedisStorage
|
4
|
+
module_function
|
5
|
+
|
6
|
+
def set(key, value)
|
7
|
+
redis_conn.set(key, value)
|
8
|
+
end
|
9
|
+
|
10
|
+
def get(key)
|
11
|
+
redis_conn.get(key)
|
12
|
+
end
|
13
|
+
|
14
|
+
def delete(key)
|
15
|
+
redis_conn.del(key)
|
16
|
+
end
|
17
|
+
|
18
|
+
def increment(key)
|
19
|
+
redis_conn.incr(key)
|
20
|
+
end
|
21
|
+
|
22
|
+
def map_result_push(key, value)
|
23
|
+
redis_conn.rpush(key, value)
|
24
|
+
end
|
25
|
+
|
26
|
+
def map_result_get(key)
|
27
|
+
redis_conn.lrange(key, 0, -1)
|
28
|
+
end
|
29
|
+
|
30
|
+
def reduce_result_set(storage_key, reduce_key, value)
|
31
|
+
redis_conn.hset(storage_key, reduce_key, value)
|
32
|
+
end
|
33
|
+
|
34
|
+
def reduce_result_get(storage_key)
|
35
|
+
redis_conn.hgetall(storage_key)
|
36
|
+
end
|
37
|
+
|
38
|
+
def map_keys_push(key, value)
|
39
|
+
redis_conn.sadd(key, value)
|
40
|
+
end
|
41
|
+
|
42
|
+
def map_keys_get(key)
|
43
|
+
redis_conn.smembers(key)
|
44
|
+
end
|
45
|
+
|
46
|
+
private
|
47
|
+
|
48
|
+
module_function
|
49
|
+
|
50
|
+
def redis_conn
|
51
|
+
Bramble.config.redis_conn
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
metadata
ADDED
@@ -0,0 +1,140 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: bramble
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Robert Mosolgo
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2016-05-13 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: activejob
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: minitest
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: minitest-focus
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: minitest-reporters
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: rake
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - ">="
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0'
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - ">="
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: redis
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - ">="
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '0'
|
90
|
+
type: :development
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - ">="
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '0'
|
97
|
+
description: Distribute map-reduce tasks with ActiveJob, storing the results in Redis
|
98
|
+
(or another backend)
|
99
|
+
email: rdmosolgo@gmail.com
|
100
|
+
executables: []
|
101
|
+
extensions: []
|
102
|
+
extra_rdoc_files: []
|
103
|
+
files:
|
104
|
+
- README.md
|
105
|
+
- lib/bramble.rb
|
106
|
+
- lib/bramble/conf.rb
|
107
|
+
- lib/bramble/keys.rb
|
108
|
+
- lib/bramble/map.rb
|
109
|
+
- lib/bramble/map_job.rb
|
110
|
+
- lib/bramble/reduce.rb
|
111
|
+
- lib/bramble/reduce_job.rb
|
112
|
+
- lib/bramble/storage.rb
|
113
|
+
- lib/bramble/storage/memory_storage.rb
|
114
|
+
- lib/bramble/storage/redis_storage.rb
|
115
|
+
- lib/bramble/version.rb
|
116
|
+
homepage: https://github.com/rmosolgo/bramble
|
117
|
+
licenses:
|
118
|
+
- MIT
|
119
|
+
metadata: {}
|
120
|
+
post_install_message:
|
121
|
+
rdoc_options: []
|
122
|
+
require_paths:
|
123
|
+
- lib
|
124
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
125
|
+
requirements:
|
126
|
+
- - ">="
|
127
|
+
- !ruby/object:Gem::Version
|
128
|
+
version: 2.0.0
|
129
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
130
|
+
requirements:
|
131
|
+
- - ">="
|
132
|
+
- !ruby/object:Gem::Version
|
133
|
+
version: '0'
|
134
|
+
requirements: []
|
135
|
+
rubyforge_project:
|
136
|
+
rubygems_version: 2.5.1
|
137
|
+
signing_key:
|
138
|
+
specification_version: 4
|
139
|
+
summary: Map-reduce, backed by ActiveJob
|
140
|
+
test_files: []
|