bramble 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 044266e293f683cdeabe0d4f9d7789d49324195c
4
+ data.tar.gz: 6b516203cd8fc97eb20810245a05d08a96c3e898
5
+ SHA512:
6
+ metadata.gz: 77f06d4145ce1ad5622cca85c687531ca3b76bcb9c94831d0015e15118cf34a110828de0cd0c2e706f403503f44b16f3ffb858713380090ad2bd99a08025b27c
7
+ data.tar.gz: d7b4d8f7c716d1a7b70a1fd4b57ac6be24c166494f2be03e0d86b1b25336bd542c56b5ad0adcc03bf359bacd6197764322f206e5575842213294d8f3d1ffc564
data/README.md ADDED
@@ -0,0 +1,76 @@
1
+ # Bramble [![Build Status](https://travis-ci.org/rmosolgo/bramble.svg?branch=master)](https://travis-ci.org/rmosolgo/bramble)
2
+
3
+ Map-reduce with ActiveJob
4
+
5
+ ## Usage
6
+
7
+ - Setup ActiveJob with a queue named `:bramble`
8
+
9
+ - Setup Redis and give Bramble a connection object:
10
+
11
+ ```ruby
12
+ my_redis_connection = Redis.new # Your connection settings here!
13
+ Bramble.config do |conf|
14
+ conf.redis_conn = my_redis_connection
15
+ end
16
+ ```
17
+
18
+ - Define a module with `map` and `reduce` functions:
19
+
20
+ ```ruby
21
+ module LetterCount
22
+ # .map is called with each item in the input
23
+ def self.map(word)
24
+ letters = word.upcase.each_char
25
+
26
+ # call `yield` to emit a key-value pair for processing
27
+ letters.each { |letter| yield(letter, 1) }
28
+ end
29
+
30
+ # .reduce is called with
31
+ # - `yield` key (first argument)
32
+ # - array of `yield` values (second argument)
33
+ def self.reduce(letter, observations)
34
+ # letter => "A"
35
+ # observations => [1, 1, 1, 1, 1]
36
+ observations.length
37
+ end
38
+ end
39
+ ```
40
+
41
+ - Start a job with a handle, module, and some data:
42
+
43
+ ```ruby
44
+ # used for fetching the result later:
45
+ handle = "shakespeare-letter-count"
46
+
47
+ # Something that responds to #each:
48
+ data = hamlet.split(" ")
49
+
50
+ # Begin the process:
51
+ Bramble.map_reduce(handle, LetterCount, words_in_hamlet)
52
+ ```
53
+
54
+ - Later, fetch the result using the handle:
55
+
56
+ ```ruby
57
+ result = Bramble.read("shakespeare-letter-count")
58
+ # { "A" => 100, "B" => 100, ... }
59
+ ```
60
+
61
+ - Delete the saved result:
62
+
63
+ ```ruby
64
+ Bramble.delete("shakespeare-letter-count")
65
+ ```
66
+
67
+ ## Todo
68
+
69
+ - Use `Storage` as gateway to `config.storage`
70
+ - Job convenience class?
71
+ - `.fetch` to find-or-calculate?
72
+ - Adapters: Memcache, ActiveRecord
73
+
74
+ ## Development
75
+
76
+ - `rake test`
data/lib/bramble.rb ADDED
@@ -0,0 +1,37 @@
1
+ require "ostruct"
2
+ require "active_job"
3
+ require "bramble/keys"
4
+ require "bramble/map"
5
+ require "bramble/map_job"
6
+ require "bramble/reduce"
7
+ require "bramble/reduce_job"
8
+ require "bramble/storage"
9
+ require "bramble/version"
10
+ require "bramble/conf"
11
+
12
+ module Bramble
13
+ def self.config
14
+ if block_given?
15
+ yield(Bramble::CONF)
16
+ else
17
+ Bramble::CONF
18
+ end
19
+ end
20
+
21
+ # @param handle [String] This string will be used to store the result
22
+ # @param implementation [#map, #reduce, #name] The container of map and reduce methods
23
+ # @param items [Array] List of items to map over
24
+ def self.map_reduce(handle, implementation, items)
25
+ Bramble::Map.perform(handle, implementation, items)
26
+ end
27
+
28
+ # Get results for `handle`, if they exist
29
+ def self.read(handle)
30
+ Bramble::Storage.read(handle)
31
+ end
32
+
33
+ # Remove results for `handle`, if there are any
34
+ def self.delete(handle)
35
+ Bramble::Storage.delete(handle)
36
+ end
37
+ end
@@ -0,0 +1,8 @@
1
+ module Bramble
2
+ CONF = OpenStruct.new(
3
+ redis_conn: nil,
4
+ namespace: "Bramble",
5
+ queue_as: :bramble,
6
+ storage: Bramble::Storage::RedisStorage
7
+ )
8
+ end
@@ -0,0 +1,27 @@
1
+ module Bramble
2
+ module Keys
3
+ def namespace(handle)
4
+ "#{Bramble.config.namespace}:#{handle}"
5
+ end
6
+
7
+ def data_key(handle, key)
8
+ "#{namespace(handle)}:data:#{key}"
9
+ end
10
+
11
+ def keys_key(handle)
12
+ "#{namespace(handle)}:keys"
13
+ end
14
+
15
+ def finished_count_key(handle)
16
+ "#{namespace(handle)}:finished_count"
17
+ end
18
+
19
+ def total_count_key(handle)
20
+ "#{namespace(handle)}:total_count"
21
+ end
22
+
23
+ def result_key(handle)
24
+ "#{namespace(handle)}:result"
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,38 @@
1
+ module Bramble
2
+ module Map
3
+ extend Bramble::Keys
4
+
5
+ module_function
6
+
7
+ def perform(handle, implementation, values)
8
+ # TODO: make sure there isn't one going on right now
9
+ Bramble::Storage.delete(handle)
10
+ storage.set(total_count_key(handle), values.length)
11
+ values.each do |value|
12
+ Bramble::MapJob.perform_later(handle, implementation.name, value)
13
+ end
14
+ end
15
+
16
+ def perform_map(handle, implementation, value)
17
+ impl_keys_key = keys_key(handle)
18
+ implementation.map(value) do |map_key, map_val|
19
+ raw_key = Bramble::Storage.dump(map_key)
20
+ storage.map_keys_push(impl_keys_key, raw_key)
21
+ storage.map_result_push(data_key(handle, raw_key), Bramble::Storage.dump(map_val))
22
+ end
23
+ finished = storage.increment(finished_count_key(handle))
24
+ total = storage.get(total_count_key(handle)).to_i
25
+ if finished == total
26
+ Bramble::Reduce.perform(handle, implementation)
27
+ end
28
+ end
29
+
30
+ private
31
+
32
+ module_function
33
+
34
+ def storage
35
+ Bramble.config.storage
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,9 @@
1
+ module Bramble
2
+ class MapJob < ActiveJob::Base
3
+ queue_as { Bramble.config.queue_as }
4
+ def perform(handle, mapper_name, value)
5
+ mapper = mapper_name.constantize
6
+ Bramble::Map.perform_map(handle, mapper, value)
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,29 @@
1
+ module Bramble
2
+ module Reduce
3
+ extend Bramble::Keys
4
+
5
+ module_function
6
+
7
+ def perform(handle, implementation)
8
+ all_raw_keys = storage.map_keys_get(keys_key(handle))
9
+ all_raw_keys.each do |raw_key|
10
+ Bramble::ReduceJob.perform_later(handle, implementation.name, raw_key)
11
+ end
12
+ end
13
+
14
+ def perform_reduce(handle, implementation, raw_key)
15
+ values = storage.map_result_get(data_key(handle, raw_key))
16
+ values = Bramble::Storage.load(values)
17
+ reduced_value = implementation.reduce(Bramble::Storage.load(raw_key), values)
18
+ storage.reduce_result_set(result_key(handle), raw_key, Bramble::Storage.dump(reduced_value))
19
+ end
20
+
21
+ private
22
+
23
+ module_function
24
+
25
+ def storage
26
+ Bramble.config.storage
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,9 @@
1
+ module Bramble
2
+ class ReduceJob < ActiveJob::Base
3
+ queue_as { Bramble.config.queue_as }
4
+ def perform(handle, reducer_name, key)
5
+ reducer = reducer_name.constantize
6
+ Bramble::Reduce.perform_reduce(handle, reducer, key)
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,57 @@
1
+ require "bramble/storage/redis_storage"
2
+ require "bramble/storage/memory_storage"
3
+
4
+ module Bramble
5
+ module Storage
6
+ extend Bramble::Keys
7
+
8
+ def self.read(handle)
9
+ key = result_key(handle)
10
+ results = storage.reduce_result_get(key)
11
+ load(results)
12
+ end
13
+
14
+ # Wipe out the results for this handle
15
+ def self.delete(handle)
16
+ # Reset counts
17
+ storage.delete(total_count_key(handle))
18
+ storage.delete(finished_count_key(handle))
19
+ # Reset result
20
+ storage.delete(result_key(handle))
21
+
22
+ # Reset dangling map data
23
+ map_group_keys = storage.map_keys_get(keys_key(handle))
24
+ map_group_keys.each do |group_key|
25
+ storage.delete(data_key(handle, group_key))
26
+ end
27
+ storage.delete(keys_key(handle))
28
+ end
29
+
30
+
31
+ # prepare an object for storage
32
+ def self.dump(obj)
33
+ Marshal.dump(obj)
34
+ end
35
+
36
+ # reload an object from storage
37
+ def self.load(stored_obj)
38
+ case stored_obj
39
+ when Array
40
+ stored_obj.map { |obj| load(obj) }
41
+ when Hash
42
+ stored_obj.inject({}) do |memo, (k, v)|
43
+ memo[load(k)] = load(v)
44
+ memo
45
+ end
46
+ else
47
+ Marshal.load(stored_obj)
48
+ end
49
+ end
50
+
51
+ private
52
+
53
+ def self.storage
54
+ Bramble.config.storage
55
+ end
56
+ end
57
+ end
@@ -0,0 +1,57 @@
1
+ require "set"
2
+
3
+ module Bramble
4
+ module Storage
5
+ # ☠ This is for single-threaded, single-process Ruby only!
6
+ # If you try to use this in production, you're going to have a bad time.
7
+ module MemoryStorage
8
+ STORAGE = {}
9
+
10
+ module_function
11
+
12
+ def set(key, value)
13
+ STORAGE[key] = value
14
+ end
15
+
16
+ def get(key)
17
+ STORAGE[key]
18
+ end
19
+
20
+ def delete(key)
21
+ STORAGE.delete(key)
22
+ end
23
+
24
+ def increment(key)
25
+ STORAGE[key] ||= 0
26
+ STORAGE[key] += 1
27
+ end
28
+
29
+ def map_result_push(key, value)
30
+ STORAGE[key] ||= []
31
+ STORAGE[key] << value
32
+ end
33
+
34
+ def map_result_get(key)
35
+ STORAGE[key] || []
36
+ end
37
+
38
+ def reduce_result_set(storage_key, reduce_key, value)
39
+ STORAGE[storage_key] ||= {}
40
+ STORAGE[storage_key][reduce_key] = value
41
+ end
42
+
43
+ def reduce_result_get(storage_key)
44
+ STORAGE[storage_key] || {}
45
+ end
46
+
47
+ def map_keys_push(key, value)
48
+ STORAGE[key] ||= Set.new
49
+ STORAGE[key] << value
50
+ end
51
+
52
+ def map_keys_get(key)
53
+ STORAGE[key] || Set.new
54
+ end
55
+ end
56
+ end
57
+ end
@@ -0,0 +1,55 @@
1
+ module Bramble
2
+ module Storage
3
+ module RedisStorage
4
+ module_function
5
+
6
+ def set(key, value)
7
+ redis_conn.set(key, value)
8
+ end
9
+
10
+ def get(key)
11
+ redis_conn.get(key)
12
+ end
13
+
14
+ def delete(key)
15
+ redis_conn.del(key)
16
+ end
17
+
18
+ def increment(key)
19
+ redis_conn.incr(key)
20
+ end
21
+
22
+ def map_result_push(key, value)
23
+ redis_conn.rpush(key, value)
24
+ end
25
+
26
+ def map_result_get(key)
27
+ redis_conn.lrange(key, 0, -1)
28
+ end
29
+
30
+ def reduce_result_set(storage_key, reduce_key, value)
31
+ redis_conn.hset(storage_key, reduce_key, value)
32
+ end
33
+
34
+ def reduce_result_get(storage_key)
35
+ redis_conn.hgetall(storage_key)
36
+ end
37
+
38
+ def map_keys_push(key, value)
39
+ redis_conn.sadd(key, value)
40
+ end
41
+
42
+ def map_keys_get(key)
43
+ redis_conn.smembers(key)
44
+ end
45
+
46
+ private
47
+
48
+ module_function
49
+
50
+ def redis_conn
51
+ Bramble.config.redis_conn
52
+ end
53
+ end
54
+ end
55
+ end
@@ -0,0 +1,3 @@
1
+ module Bramble
2
+ VERSION = "0.1.0"
3
+ end
metadata ADDED
@@ -0,0 +1,140 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: bramble
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Robert Mosolgo
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2016-05-13 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: activejob
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: minitest
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: minitest-focus
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: minitest-reporters
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: rake
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: redis
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ">="
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
97
+ description: Distribute map-reduce tasks with ActiveJob, storing the results in Redis
98
+ (or another backend)
99
+ email: rdmosolgo@gmail.com
100
+ executables: []
101
+ extensions: []
102
+ extra_rdoc_files: []
103
+ files:
104
+ - README.md
105
+ - lib/bramble.rb
106
+ - lib/bramble/conf.rb
107
+ - lib/bramble/keys.rb
108
+ - lib/bramble/map.rb
109
+ - lib/bramble/map_job.rb
110
+ - lib/bramble/reduce.rb
111
+ - lib/bramble/reduce_job.rb
112
+ - lib/bramble/storage.rb
113
+ - lib/bramble/storage/memory_storage.rb
114
+ - lib/bramble/storage/redis_storage.rb
115
+ - lib/bramble/version.rb
116
+ homepage: https://github.com/rmosolgo/bramble
117
+ licenses:
118
+ - MIT
119
+ metadata: {}
120
+ post_install_message:
121
+ rdoc_options: []
122
+ require_paths:
123
+ - lib
124
+ required_ruby_version: !ruby/object:Gem::Requirement
125
+ requirements:
126
+ - - ">="
127
+ - !ruby/object:Gem::Version
128
+ version: 2.0.0
129
+ required_rubygems_version: !ruby/object:Gem::Requirement
130
+ requirements:
131
+ - - ">="
132
+ - !ruby/object:Gem::Version
133
+ version: '0'
134
+ requirements: []
135
+ rubyforge_project:
136
+ rubygems_version: 2.5.1
137
+ signing_key:
138
+ specification_version: 4
139
+ summary: Map-reduce, backed by ActiveJob
140
+ test_files: []