jobba 1.5.0 → 1.6.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +19 -0
- data/Rakefile +1 -1
- data/lib/jobba.rb +23 -0
- data/lib/jobba/clause.rb +85 -1
- data/lib/jobba/clause_factory.rb +4 -1
- data/lib/jobba/id_clause.rb +8 -0
- data/lib/jobba/query.rb +108 -57
- data/lib/jobba/utils.rb +41 -0
- data/lib/jobba/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9d8bc9c17314ba74b80cf4c79c5d638f9d35b83b
|
4
|
+
data.tar.gz: e5f612733209502da66c0200cc924fb941d8f773
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e185dfac6ec0e3d5d1daad73b62abc307dd8098251c1889f8427dbf1137970a9bb6e24ccdfda10eeacbd7ddd29648ee28cc7a7a8b1e4ced5b1c664208a307527
|
7
|
+
data.tar.gz: 259cd22746fb655039ca5e4b11c7095567c725ba03bb238445ef702ad24e07aa540eddab71613e21990508006050ddde68ec0173b6afc4cd32ecfdd3491ba541
|
data/README.md
CHANGED
@@ -439,6 +439,17 @@ Jobba.where(...).run.count # These pull data back to Ruby and count in Ruby
|
|
439
439
|
Jobba.where(...).run.empty?
|
440
440
|
```
|
441
441
|
|
442
|
+
## Pagination
|
443
|
+
|
444
|
+
Pagination is supported with an ActiveRecord-like interface. You can call `.limit(x)` and `.offset(y)` on
|
445
|
+
queries, e.g.
|
446
|
+
|
447
|
+
```ruby
|
448
|
+
Jobba.where(state: :succeeded).limit(10).offset(20).to_a
|
449
|
+
```
|
450
|
+
|
451
|
+
Specifying a limit does not guarantee that you'll get that many elements back, as there may not be that many left in the result.
|
452
|
+
|
442
453
|
## Notes
|
443
454
|
|
444
455
|
### Times
|
@@ -449,6 +460,8 @@ Note that, in operations having to do with time, this gem ignores anything beyon
|
|
449
460
|
|
450
461
|
Jobba strives to do all of its operations as efficiently as possible using built-in Redis operations. If you find a place where the efficiency can be improved, please submit an issue or a pull request.
|
451
462
|
|
463
|
+
Single-clause queries (those with one `where` call) have been optimized. `Jobba.all` is a single-clause query. If you have lots of IDs, try to get by with single-clause queries. Multi-clause queries (including `count`) have to copy sets into temporary working sets where query clauses are ANDed together. This can be expensive for large datasets.
|
464
|
+
|
452
465
|
### Write from one; Read from many
|
453
466
|
|
454
467
|
Jobba assumes that any job is being run at one time by only one worker. Jobba makes no accomodations for multiple processes updating a Status at the same time; multiple processes reading of a Status are fine of course.
|
@@ -463,6 +476,12 @@ $> USE_REAL_REDIS=true rspec
|
|
463
476
|
|
464
477
|
Travis runs the specs with both `fakeredis` and real Redis.
|
465
478
|
|
479
|
+
Clauses need to implement three methods:
|
480
|
+
|
481
|
+
1. `to_new_set` - puts the IDs indicated by the clause into a new sorted set in redis
|
482
|
+
2. `result_ids` - used to get the IDs indicated by the clause when the clause is the only one in the query
|
483
|
+
3. `result_count` - used to get the count of IDs indicated by the clause when the clause is the only one in the query
|
484
|
+
|
466
485
|
## TODO
|
467
486
|
|
468
487
|
1. Provide job min, max, and average durations.
|
data/Rakefile
CHANGED
data/lib/jobba.rb
CHANGED
@@ -38,4 +38,27 @@ module Jobba
|
|
38
38
|
)
|
39
39
|
end
|
40
40
|
|
41
|
+
# Clears the whole shebang! USE WITH CARE!
|
42
|
+
def self.clear_all_jobba_data!
|
43
|
+
keys = Jobba.redis.keys("*")
|
44
|
+
keys.each_slice(1000) do |some_keys|
|
45
|
+
Jobba.redis.del(*some_keys)
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
def self.cleanup(seconds_ago: 60*60*24*30*12, batch_size: 1000)
|
50
|
+
start_time = Jobba::Time.now
|
51
|
+
delete_before = start_time - seconds_ago
|
52
|
+
|
53
|
+
jobs_count = 0
|
54
|
+
loop do
|
55
|
+
jobs = where(recorded_at: { before: delete_before }).limit(batch_size).to_a
|
56
|
+
jobs.each(&:delete!)
|
57
|
+
|
58
|
+
num_jobs = jobs.size
|
59
|
+
jobs_count += num_jobs
|
60
|
+
break if jobs.size < batch_size
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
41
64
|
end
|
data/lib/jobba/clause.rb
CHANGED
@@ -4,7 +4,9 @@ class Jobba::Clause
|
|
4
4
|
include Jobba::Common
|
5
5
|
|
6
6
|
# if `keys` or `suffixes` is an array, all entries will be included in the resulting set
|
7
|
-
def initialize(prefix: nil, suffixes: nil, keys: nil, min: nil, max: nil
|
7
|
+
def initialize(prefix: nil, suffixes: nil, keys: nil, min: nil, max: nil,
|
8
|
+
keys_contain_only_unique_ids: false)
|
9
|
+
|
8
10
|
if keys.nil? && prefix.nil? && suffixes.nil?
|
9
11
|
raise ArgumentError, "Either `keys` or both `prefix` and `suffix` must be specified.", caller
|
10
12
|
end
|
@@ -22,6 +24,8 @@ class Jobba::Clause
|
|
22
24
|
|
23
25
|
@min = min
|
24
26
|
@max = max
|
27
|
+
|
28
|
+
@keys_contain_only_unique_ids = keys_contain_only_unique_ids
|
25
29
|
end
|
26
30
|
|
27
31
|
def to_new_set
|
@@ -41,4 +45,84 @@ class Jobba::Clause
|
|
41
45
|
new_key
|
42
46
|
end
|
43
47
|
|
48
|
+
def result_ids(offset: nil, limit: nil)
|
49
|
+
# If we have one key and it is sorted, we can let redis return limited IDs,
|
50
|
+
# so handle that case specially.
|
51
|
+
|
52
|
+
id_data =
|
53
|
+
if @keys.one?
|
54
|
+
# offset and limit may or may not be used, so have to do again below
|
55
|
+
get_members(key: @keys.first, offset: offset, limit: limit)
|
56
|
+
else
|
57
|
+
ids = @keys.flat_map do |key|
|
58
|
+
# don't do limiting here -- doesn't make sense til we collect all the members
|
59
|
+
get_members(key: key)[:ids]
|
60
|
+
end
|
61
|
+
|
62
|
+
ids.sort!
|
63
|
+
ids.uniq! unless @keys_contain_only_unique_ids
|
64
|
+
|
65
|
+
{ids: ids, is_limited: false}
|
66
|
+
end
|
67
|
+
|
68
|
+
if !offset.nil? && !limit.nil? && id_data[:is_limited] == false
|
69
|
+
id_data[:ids].slice(offset, limit)
|
70
|
+
else
|
71
|
+
id_data[:ids]
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
def get_members(key:, offset: nil, limit: nil)
|
76
|
+
if sorted_key?(key)
|
77
|
+
min = @min.nil? ? "-inf" : "#{@min}"
|
78
|
+
max = @max.nil? ? "+inf" : "#{@max}"
|
79
|
+
|
80
|
+
options = {}
|
81
|
+
is_limited = false
|
82
|
+
|
83
|
+
if !offset.nil? && !limit.nil?
|
84
|
+
options[:limit] = [offset, limit]
|
85
|
+
is_limited = true
|
86
|
+
end
|
87
|
+
|
88
|
+
ids = redis.zrangebyscore(key, min, max, options)
|
89
|
+
|
90
|
+
{ids: ids, is_limited: is_limited}
|
91
|
+
else
|
92
|
+
ids = redis.smembers(key)
|
93
|
+
ids.sort!
|
94
|
+
{ids: ids, is_limited: false}
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
def result_count(offset: nil, limit: nil)
|
99
|
+
if @keys.one? || @keys_contain_only_unique_ids
|
100
|
+
# can count each key on its own using fast redis ops and add them up
|
101
|
+
nonlimited_count = @keys.map do |key|
|
102
|
+
if sorted_key?(key)
|
103
|
+
if @min.nil? && @max.nil?
|
104
|
+
redis.zcard(key)
|
105
|
+
else
|
106
|
+
min = @min.nil? ? "-inf" : "#{@min}"
|
107
|
+
max = @max.nil? ? "+inf" : "#{@max}"
|
108
|
+
|
109
|
+
redis.zcount(key, min, max)
|
110
|
+
end
|
111
|
+
else
|
112
|
+
redis.scard(key)
|
113
|
+
end
|
114
|
+
end.reduce(:+)
|
115
|
+
|
116
|
+
Jobba::Utils.limited_count(nonlimited_count: nonlimited_count,
|
117
|
+
offset: offset, limit: limit)
|
118
|
+
else
|
119
|
+
# Because we need to get a count of uniq members, have to do a full query
|
120
|
+
result_ids(offset: offset, limit: limit).count
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
124
|
+
def sorted_key?(key)
|
125
|
+
key.match(/_at$/)
|
126
|
+
end
|
127
|
+
|
44
128
|
end
|
data/lib/jobba/clause_factory.rb
CHANGED
@@ -65,7 +65,10 @@ class Jobba::ClauseFactory
|
|
65
65
|
}.uniq
|
66
66
|
|
67
67
|
validate_state_name!(state)
|
68
|
-
|
68
|
+
|
69
|
+
# An ID is in only one state at a time, so we can tell `Clause` that
|
70
|
+
# info via `keys_contain_only_unique_ids` -- helps it be more efficient
|
71
|
+
Jobba::Clause.new(keys: state, keys_contain_only_unique_ids: true)
|
69
72
|
end
|
70
73
|
|
71
74
|
def self.validate_state_name!(state_name)
|
data/lib/jobba/id_clause.rb
CHANGED
@@ -11,4 +11,12 @@ class Jobba::IdClause
|
|
11
11
|
redis.zadd(new_key, @ids.collect{|id| [0, id]}) if @ids.any?
|
12
12
|
new_key
|
13
13
|
end
|
14
|
+
|
15
|
+
def result_ids(offset: nil, limit: nil)
|
16
|
+
@ids.map(&:to_s).slice(offset || 0, limit || @ids.count)
|
17
|
+
end
|
18
|
+
|
19
|
+
def result_count(offset: nil, limit: nil)
|
20
|
+
Jobba::Utils.limited_count(nonlimited_count: @ids.count, offset: offset, limit: limit)
|
21
|
+
end
|
14
22
|
end
|
data/lib/jobba/query.rb
CHANGED
@@ -6,6 +6,8 @@ class Jobba::Query
|
|
6
6
|
|
7
7
|
include Jobba::Common
|
8
8
|
|
9
|
+
attr_reader :_limit, :_offset
|
10
|
+
|
9
11
|
def where(options)
|
10
12
|
options.each do |kk,vv|
|
11
13
|
clauses.push(Jobba::ClauseFactory.new_clause(kk,vv))
|
@@ -14,8 +16,19 @@ class Jobba::Query
|
|
14
16
|
self
|
15
17
|
end
|
16
18
|
|
19
|
+
def limit(number)
|
20
|
+
@_limit = number
|
21
|
+
@_offset ||= 0
|
22
|
+
self
|
23
|
+
end
|
24
|
+
|
25
|
+
def offset(number)
|
26
|
+
@_offset = number
|
27
|
+
self
|
28
|
+
end
|
29
|
+
|
17
30
|
def count
|
18
|
-
_run(
|
31
|
+
_run(CountStatuses.new(self))
|
19
32
|
end
|
20
33
|
|
21
34
|
def empty?
|
@@ -39,7 +52,7 @@ class Jobba::Query
|
|
39
52
|
end
|
40
53
|
|
41
54
|
def run
|
42
|
-
_run(
|
55
|
+
_run(GetStatuses.new(self))
|
43
56
|
end
|
44
57
|
|
45
58
|
protected
|
@@ -50,73 +63,111 @@ class Jobba::Query
|
|
50
63
|
@clauses = []
|
51
64
|
end
|
52
65
|
|
53
|
-
class
|
54
|
-
attr_reader :
|
66
|
+
class Operations
|
67
|
+
attr_reader :query, :redis
|
68
|
+
|
69
|
+
def initialize(query)
|
70
|
+
@query = query
|
71
|
+
@redis = query.redis
|
72
|
+
end
|
55
73
|
|
56
|
-
|
57
|
-
|
58
|
-
|
74
|
+
# Standalone method that gives the final result when the query is one clause
|
75
|
+
def handle_single_clause(clause)
|
76
|
+
raise "AbstractMethod"
|
59
77
|
end
|
60
78
|
|
61
|
-
|
62
|
-
|
79
|
+
# When the query is multiple clauses, this method is called on the final set
|
80
|
+
# that represents the ANDing of all clauses. It is called inside a `redis.multi`
|
81
|
+
# block.
|
82
|
+
def multi_clause_last_redis_op(result_set)
|
83
|
+
raise "AbstractMethod"
|
84
|
+
end
|
85
|
+
|
86
|
+
# Called on the output from the redis multi block for multi-clause queries.
|
87
|
+
def multi_clause_postprocess(redis_output)
|
88
|
+
raise "AbstractMethod"
|
63
89
|
end
|
64
90
|
end
|
65
91
|
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
},
|
70
|
-
->(ids) {
|
92
|
+
class GetStatuses < Operations
|
93
|
+
def handle_single_clause(clause)
|
94
|
+
ids = clause.result_ids(limit: query._limit, offset: query._offset)
|
71
95
|
Jobba::Statuses.new(ids)
|
72
|
-
|
73
|
-
)
|
74
|
-
|
75
|
-
COUNT_STATUSES = RunBlocks.new(
|
76
|
-
->(working_set, redis) {
|
77
|
-
redis.zcard(working_set)
|
78
|
-
},
|
79
|
-
nil
|
80
|
-
)
|
81
|
-
|
82
|
-
def _run(run_blocks)
|
83
|
-
# Each clause in a query is converted to a sorted set (which may be filtered,
|
84
|
-
# e.g. in the case of timestamp clauses) and then the sets are successively
|
85
|
-
# intersected.
|
86
|
-
#
|
87
|
-
# Different users of this method have different uses for the final "working"
|
88
|
-
# set. Because we want to bundle all of the creations and intersections of
|
89
|
-
# clause sets into one call to Redis (via a `multi` block), we have users
|
90
|
-
# of this method provide a final block to run on the working set within
|
91
|
-
# Redis (and within the `multi` call) and then another block to run on
|
92
|
-
# the output of the first block.
|
93
|
-
|
94
|
-
multi_result = redis.multi do
|
95
|
-
load_default_clause if clauses.empty?
|
96
|
-
working_set = nil
|
97
|
-
|
98
|
-
clauses.each do |clause|
|
99
|
-
clause_set = clause.to_new_set
|
100
|
-
|
101
|
-
if working_set.nil?
|
102
|
-
working_set = clause_set
|
103
|
-
else
|
104
|
-
redis.zinterstore(working_set, [working_set, clause_set], weights: [0, 0])
|
105
|
-
redis.del(clause_set)
|
106
|
-
end
|
107
|
-
end
|
96
|
+
end
|
108
97
|
|
109
|
-
|
110
|
-
|
98
|
+
def multi_clause_last_redis_op(result_set)
|
99
|
+
start = query._offset || 0
|
100
|
+
stop = query._limit.nil? ? -1 : start + query._limit - 1
|
101
|
+
redis.zrange(result_set, start, stop)
|
102
|
+
end
|
111
103
|
|
112
|
-
|
104
|
+
def multi_clause_postprocess(ids)
|
105
|
+
Jobba::Statuses.new(ids)
|
113
106
|
end
|
107
|
+
end
|
114
108
|
|
115
|
-
|
109
|
+
class CountStatuses < Operations
|
110
|
+
def handle_single_clause(clause)
|
111
|
+
clause.result_count(limit: query._limit, offset: query._offset)
|
112
|
+
end
|
113
|
+
|
114
|
+
def multi_clause_last_redis_op(result_set)
|
115
|
+
redis.zcard(result_set)
|
116
|
+
end
|
117
|
+
|
118
|
+
def multi_clause_postprocess(redis_output)
|
119
|
+
Jobba::Utils.limited_count(nonlimited_count: redis_output, offset: query._offset, limit: query._limit)
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
def _run(operations)
|
124
|
+
if _limit.nil? && !_offset.nil?
|
125
|
+
raise ArgumentError, "`limit` must be set if `offset` is set", caller
|
126
|
+
end
|
116
127
|
|
117
|
-
|
118
|
-
|
119
|
-
|
128
|
+
load_default_clause if clauses.empty?
|
129
|
+
|
130
|
+
if clauses.one?
|
131
|
+
# We can make specialized calls that don't need intermediate copies of sets
|
132
|
+
# to be made (which are costly)
|
133
|
+
operations.handle_single_clause(clauses.first)
|
134
|
+
else
|
135
|
+
# Each clause in a query is converted to a sorted set (which may be filtered,
|
136
|
+
# e.g. in the case of timestamp clauses) and then the sets are successively
|
137
|
+
# intersected.
|
138
|
+
#
|
139
|
+
# Different users of this method have different uses for the final "working"
|
140
|
+
# set. Because we want to bundle all of the creations and intersections of
|
141
|
+
# clause sets into one call to Redis (via a `multi` block), we have users
|
142
|
+
# of this method provide a final block to run on the working set within
|
143
|
+
# Redis (and within the `multi` call) and then another block to run on
|
144
|
+
# the output of the first block.
|
145
|
+
#
|
146
|
+
# This code also works for the single clause case, but it is less efficient
|
147
|
+
|
148
|
+
multi_result = redis.multi do
|
149
|
+
|
150
|
+
working_set = nil
|
151
|
+
|
152
|
+
clauses.each do |clause|
|
153
|
+
clause_set = clause.to_new_set
|
154
|
+
|
155
|
+
if working_set.nil?
|
156
|
+
working_set = clause_set
|
157
|
+
else
|
158
|
+
redis.zinterstore(working_set, [working_set, clause_set], weights: [0, 0])
|
159
|
+
redis.del(clause_set)
|
160
|
+
end
|
161
|
+
end
|
162
|
+
|
163
|
+
# This is later accessed as `multi_result[-2]` since it is the second to last output
|
164
|
+
operations.multi_clause_last_redis_op(working_set)
|
165
|
+
|
166
|
+
redis.del(working_set)
|
167
|
+
end
|
168
|
+
|
169
|
+
operations.multi_clause_postprocess(multi_result[-2])
|
170
|
+
end
|
120
171
|
end
|
121
172
|
|
122
173
|
def load_default_clause
|
data/lib/jobba/utils.rb
CHANGED
@@ -26,4 +26,45 @@ module Jobba::Utils
|
|
26
26
|
"temp:#{SecureRandom.hex(10)}"
|
27
27
|
end
|
28
28
|
|
29
|
+
def self.limited_count(nonlimited_count:, offset:, limit:)
|
30
|
+
raise(ArgumentError, "`limit` cannot be negative") if !limit.nil? && limit < 0
|
31
|
+
raise(ArgumentError, "`offset` cannot be negative") if !offset.nil? && offset < 0
|
32
|
+
|
33
|
+
# If we get a count of an array or set that doesn't take into account
|
34
|
+
# specified offsets and limits (what we call a `nonlimited_count`, but
|
35
|
+
# we need the count to effectively have been done with an offset and
|
36
|
+
# limit, this method calculates that limited count.
|
37
|
+
#
|
38
|
+
# This can happen when it is more efficient to calculate an unlimited
|
39
|
+
# count and then limit it after the fact.
|
40
|
+
#
|
41
|
+
# E.g.
|
42
|
+
#
|
43
|
+
# Get count of
|
44
|
+
# array = [a b c d e f g]
|
45
|
+
# where
|
46
|
+
# offset = 4
|
47
|
+
# limit = 5
|
48
|
+
#
|
49
|
+
# nonlimited_count = 7
|
50
|
+
#
|
51
|
+
# The limited array includes the highlighted (^) elements
|
52
|
+
# array = [a b c d e f g]
|
53
|
+
# ^ ^ ^ ^ ^
|
54
|
+
# Element `e` is the first element indicated by an offset of 4. The
|
55
|
+
# limit of 5 then causes us to take the rest of the elements in the array.
|
56
|
+
# The limit here is effectively 3 since there are only 3 elements left.
|
57
|
+
#
|
58
|
+
# So the limited_count is 3.
|
59
|
+
|
60
|
+
first_position_counted = offset || 0
|
61
|
+
|
62
|
+
# The `min` here is to make sure we don't go beyond the end of the array. The `- 1`
|
63
|
+
# is because we are getting a zero-indexed position from a count.
|
64
|
+
last_position_counted = [first_position_counted + (limit || nonlimited_count), nonlimited_count].min - 1
|
65
|
+
|
66
|
+
# Guard against first position being after last position by forcing min of 0
|
67
|
+
[last_position_counted - first_position_counted + 1, 0].max
|
68
|
+
end
|
69
|
+
|
29
70
|
end
|
data/lib/jobba/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: jobba
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.6.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- JP Slavinsky
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-
|
11
|
+
date: 2017-09-22 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: redis
|