bramble 0.3.0 → 0.4.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +18 -7
- data/lib/bramble/keys.rb +4 -0
- data/lib/bramble/map.rb +3 -1
- data/lib/bramble/reduce.rb +3 -0
- data/lib/bramble/result.rb +15 -10
- data/lib/bramble/serialize.rb +5 -2
- data/lib/bramble/state.rb +25 -0
- data/lib/bramble/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 138b78b6cd5ccf7de287e447453cb010de725eb7
|
4
|
+
data.tar.gz: 554476911d84da387cc7017b4bb0041810507fe7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c78a8f5a1eaf795df61ef62439ee77d2de522edea38f9c8ee77538662b91410615b362ea25e7b387f333cc2461d8b32a631130c1c277b1852befdd3062bcef9d
|
7
|
+
data.tar.gz: f112df8555c290fbf037ec7c401014f4d5c6ec1794a0351317baf568632c211b0ca794c75f7ac6505ae40f2213b40c2603df59394de5b223796ab8bb3e429f27
|
data/README.md
CHANGED
@@ -1,6 +1,10 @@
|
|
1
|
-
# Bramble [![Build Status](https://travis-ci.org/rmosolgo/bramble.svg?branch=master)](https://travis-ci.org/rmosolgo/bramble)
|
1
|
+
# Bramble [![Build Status](https://travis-ci.org/rmosolgo/bramble.svg?branch=master)](https://travis-ci.org/rmosolgo/bramble) [![Gem Version](https://badge.fury.io/rb/bramble.svg)](https://badge.fury.io/rb/bramble)
|
2
2
|
|
3
|
-
Map-reduce with ActiveJob
|
3
|
+
Map-reduce with ActiveJob + database
|
4
|
+
|
5
|
+
## Rationale
|
6
|
+
|
7
|
+
We have some staff-only views that expose stats about how people use our app. Eventually, our tables grew so large that MySQL wouldn't aggregate them all at once. So we can use this to generate those stats over time.
|
4
8
|
|
5
9
|
## Usage
|
6
10
|
|
@@ -43,6 +47,8 @@ Map-reduce with ActiveJob
|
|
43
47
|
end
|
44
48
|
```
|
45
49
|
|
50
|
+
Inputs and outputs are serialized with __JSON__, so some Ruby types will be lost (eg, Symbols).
|
51
|
+
|
46
52
|
- Start a job with a handle, module, and an (optional) argument for finding data:
|
47
53
|
|
48
54
|
```ruby
|
@@ -60,9 +66,13 @@ Map-reduce with ActiveJob
|
|
60
66
|
|
61
67
|
```ruby
|
62
68
|
result = Bramble.get("shakespeare-letter-count")
|
63
|
-
result.running?
|
64
|
-
result.finished?
|
65
|
-
result.data
|
69
|
+
result.running? # => false
|
70
|
+
result.finished? # => true
|
71
|
+
result.data # => { "A" => 100, "B" => 100, ... }
|
72
|
+
result.percent_finished # 1.0
|
73
|
+
result.percent_mapped # 1.0
|
74
|
+
result.percent_reduced # 1.0
|
75
|
+
result.finished_at # 2016-05-16 12:31:00 UTC
|
66
76
|
```
|
67
77
|
|
68
78
|
- Delete the saved result:
|
@@ -73,8 +83,9 @@ Map-reduce with ActiveJob
|
|
73
83
|
|
74
84
|
## Todo
|
75
85
|
|
76
|
-
-
|
77
|
-
-
|
86
|
+
- Adapters: Memcached, ActiveRecord
|
87
|
+
- Do we have atomicity issues? Occasional test failures
|
88
|
+
- Consolidate storage in Redis to a single key? (Could some keys be evicted while others remain?)
|
78
89
|
|
79
90
|
## Development
|
80
91
|
|
data/lib/bramble/keys.rb
CHANGED
data/lib/bramble/map.rb
CHANGED
@@ -19,10 +19,12 @@ module Bramble
|
|
19
19
|
implementation.map(value) do |map_key, map_val|
|
20
20
|
Bramble::State.running?(handle) do
|
21
21
|
raw_key = Bramble::Serialize.dump(map_key)
|
22
|
+
raw_value = Bramble::Serialize.dump(map_val)
|
22
23
|
storage.map_keys_push(impl_keys_key, raw_key)
|
23
|
-
storage.map_result_push(data_key(handle, raw_key),
|
24
|
+
storage.map_result_push(data_key(handle, raw_key), raw_value)
|
24
25
|
end
|
25
26
|
end
|
27
|
+
|
26
28
|
Bramble::State.running?(handle) do
|
27
29
|
finished = storage.increment(map_finished_count_key(handle))
|
28
30
|
total = storage.get(map_total_count_key(handle)).to_i
|
data/lib/bramble/reduce.rb
CHANGED
@@ -23,6 +23,9 @@ module Bramble
|
|
23
23
|
Bramble::State.running?(handle) do
|
24
24
|
storage.reduce_result_set(result_key(handle), raw_key, Bramble::Serialize.dump(reduced_value))
|
25
25
|
storage.increment(reduce_finished_count_key(handle))
|
26
|
+
if Bramble::State.percent_reduced(handle) >= 1
|
27
|
+
storage.set(finished_at_key(handle), Time.now.to_i)
|
28
|
+
end
|
26
29
|
end
|
27
30
|
else
|
28
31
|
Bramble::State.clear_reduce(handle)
|
data/lib/bramble/result.rb
CHANGED
@@ -2,11 +2,19 @@ module Bramble
|
|
2
2
|
# This class exposes the data and some info about the state of the task
|
3
3
|
class Result
|
4
4
|
|
5
|
-
attr_reader :handle
|
5
|
+
attr_reader :handle, :percent_mapped, :percent_reduced, :finished_at
|
6
6
|
|
7
7
|
def initialize(handle)
|
8
8
|
job_id = storage.get(Bramble::Keys.job_id_key(handle))
|
9
9
|
@handle = "#{handle}:#{job_id}"
|
10
|
+
@percent_mapped = Bramble::State.percent_mapped(@handle)
|
11
|
+
@percent_reduced = Bramble::State.percent_reduced(@handle)
|
12
|
+
if finished?
|
13
|
+
finished_at_ms = storage.get(Bramble::Keys.finished_at_key(@handle)).to_i
|
14
|
+
@finished_at = Time.at(finished_at_ms)
|
15
|
+
else
|
16
|
+
@finished_at = nil
|
17
|
+
end
|
10
18
|
end
|
11
19
|
|
12
20
|
def data
|
@@ -18,22 +26,19 @@ module Bramble
|
|
18
26
|
end
|
19
27
|
|
20
28
|
def finished?
|
21
|
-
|
29
|
+
# Possible to be greater than 1 because of floating-point arithmetic
|
30
|
+
percent_finished >= 1
|
22
31
|
end
|
23
32
|
|
24
33
|
def running?
|
25
|
-
|
34
|
+
started? && !finished?
|
26
35
|
end
|
27
36
|
|
28
|
-
|
29
|
-
|
30
|
-
def total_count
|
31
|
-
@total_count ||= storage.get(Bramble::Keys.reduce_total_count_key(handle)).to_i
|
37
|
+
def percent_finished
|
38
|
+
(percent_mapped + percent_reduced) / 2
|
32
39
|
end
|
33
40
|
|
34
|
-
|
35
|
-
@finished_count ||= storage.get(Bramble::Keys.reduce_finished_count_key(handle)).to_i
|
36
|
-
end
|
41
|
+
private
|
37
42
|
|
38
43
|
def started?
|
39
44
|
@started ||= !!storage.get(Bramble::Keys.status_key(handle))
|
data/lib/bramble/serialize.rb
CHANGED
@@ -1,9 +1,10 @@
|
|
1
|
+
require "json"
|
1
2
|
module Bramble
|
2
3
|
# eg, Redis uses strings only, so use this module to freeze and thaw values from storage
|
3
4
|
module Serialize
|
4
5
|
# prepare an object for storage
|
5
6
|
def self.dump(obj)
|
6
|
-
|
7
|
+
JSON.dump(obj)
|
7
8
|
end
|
8
9
|
|
9
10
|
# reload an object from storage
|
@@ -16,8 +17,10 @@ module Bramble
|
|
16
17
|
memo[load(k)] = load(v)
|
17
18
|
memo
|
18
19
|
end
|
20
|
+
when nil
|
21
|
+
nil
|
19
22
|
else
|
20
|
-
|
23
|
+
JSON.load(stored_obj)
|
21
24
|
end
|
22
25
|
end
|
23
26
|
end
|
data/lib/bramble/state.rb
CHANGED
@@ -32,6 +32,20 @@ module Bramble
|
|
32
32
|
clear_map(handle)
|
33
33
|
end
|
34
34
|
|
35
|
+
def percent_mapped(handle)
|
36
|
+
percent_between_keys(
|
37
|
+
map_total_count_key(handle),
|
38
|
+
map_finished_count_key(handle)
|
39
|
+
)
|
40
|
+
end
|
41
|
+
|
42
|
+
def percent_reduced(handle)
|
43
|
+
percent_between_keys(
|
44
|
+
reduce_total_count_key(handle),
|
45
|
+
reduce_finished_count_key(handle)
|
46
|
+
)
|
47
|
+
end
|
48
|
+
|
35
49
|
def clear_map(handle)
|
36
50
|
map_group_keys = storage.map_keys_get(keys_key(handle))
|
37
51
|
map_group_keys.each do |group_key|
|
@@ -46,6 +60,7 @@ module Bramble
|
|
46
60
|
storage.delete(reduce_total_count_key(handle))
|
47
61
|
storage.delete(reduce_finished_count_key(handle))
|
48
62
|
storage.delete(result_key(handle))
|
63
|
+
storage.delete(finished_at_key(handle))
|
49
64
|
end
|
50
65
|
|
51
66
|
private
|
@@ -53,5 +68,15 @@ module Bramble
|
|
53
68
|
def self.storage
|
54
69
|
Bramble::Storage
|
55
70
|
end
|
71
|
+
|
72
|
+
def self.percent_between_keys(total_key, finished_key)
|
73
|
+
total = storage.get(total_key).to_f
|
74
|
+
if total == 0
|
75
|
+
0
|
76
|
+
else
|
77
|
+
finished = storage.get(finished_key).to_i
|
78
|
+
finished / total
|
79
|
+
end
|
80
|
+
end
|
56
81
|
end
|
57
82
|
end
|
data/lib/bramble/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bramble
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Robert Mosolgo
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-05-
|
11
|
+
date: 2016-05-16 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activejob
|