jobba 1.5.0 → 1.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +19 -0
- data/Rakefile +1 -1
- data/lib/jobba.rb +23 -0
- data/lib/jobba/clause.rb +85 -1
- data/lib/jobba/clause_factory.rb +4 -1
- data/lib/jobba/id_clause.rb +8 -0
- data/lib/jobba/query.rb +108 -57
- data/lib/jobba/utils.rb +41 -0
- data/lib/jobba/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9d8bc9c17314ba74b80cf4c79c5d638f9d35b83b
|
4
|
+
data.tar.gz: e5f612733209502da66c0200cc924fb941d8f773
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e185dfac6ec0e3d5d1daad73b62abc307dd8098251c1889f8427dbf1137970a9bb6e24ccdfda10eeacbd7ddd29648ee28cc7a7a8b1e4ced5b1c664208a307527
|
7
|
+
data.tar.gz: 259cd22746fb655039ca5e4b11c7095567c725ba03bb238445ef702ad24e07aa540eddab71613e21990508006050ddde68ec0173b6afc4cd32ecfdd3491ba541
|
data/README.md
CHANGED
@@ -439,6 +439,17 @@ Jobba.where(...).run.count # These pull data back to Ruby and count in Ruby
|
|
439
439
|
Jobba.where(...).run.empty?
|
440
440
|
```
|
441
441
|
|
442
|
+
## Pagination
|
443
|
+
|
444
|
+
Pagination is supported with an ActiveRecord-like interface. You can call `.limit(x)` and `.offset(y)` on
|
445
|
+
queries, e.g.
|
446
|
+
|
447
|
+
```ruby
|
448
|
+
Jobba.where(state: :succeeded).limit(10).offset(20).to_a
|
449
|
+
```
|
450
|
+
|
451
|
+
Specifying a limit does not guarantee that you'll get that many elements back, as there may not be that many left in the result.
|
452
|
+
|
442
453
|
## Notes
|
443
454
|
|
444
455
|
### Times
|
@@ -449,6 +460,8 @@ Note that, in operations having to do with time, this gem ignores anything beyon
|
|
449
460
|
|
450
461
|
Jobba strives to do all of its operations as efficiently as possible using built-in Redis operations. If you find a place where the efficiency can be improved, please submit an issue or a pull request.
|
451
462
|
|
463
|
+
Single-clause queries (those with one `where` call) have been optimized. `Jobba.all` is a single-clause query. If you have lots of IDs, try to get by with single-clause queries. Multi-clause queries (including `count`) have to copy sets into temporary working sets where query clauses are ANDed together. This can be expensive for large datasets.
|
464
|
+
|
452
465
|
### Write from one; Read from many
|
453
466
|
|
454
467
|
Jobba assumes that any job is being run at one time by only one worker. Jobba makes no accomodations for multiple processes updating a Status at the same time; multiple processes reading of a Status are fine of course.
|
@@ -463,6 +476,12 @@ $> USE_REAL_REDIS=true rspec
|
|
463
476
|
|
464
477
|
Travis runs the specs with both `fakeredis` and real Redis.
|
465
478
|
|
479
|
+
Clauses need to implement three methods:
|
480
|
+
|
481
|
+
1. `to_new_set` - puts the IDs indicated by the clause into a new sorted set in redis
|
482
|
+
2. `result_ids` - used to get the IDs indicated by the clause when the clause is the only one in the query
|
483
|
+
3. `result_count` - used to get the count of IDs indicated by the clause when the clause is the only one in the query
|
484
|
+
|
466
485
|
## TODO
|
467
486
|
|
468
487
|
1. Provide job min, max, and average durations.
|
data/Rakefile
CHANGED
data/lib/jobba.rb
CHANGED
@@ -38,4 +38,27 @@ module Jobba
|
|
38
38
|
)
|
39
39
|
end
|
40
40
|
|
41
|
+
# Clears the whole shebang! USE WITH CARE!
|
42
|
+
def self.clear_all_jobba_data!
|
43
|
+
keys = Jobba.redis.keys("*")
|
44
|
+
keys.each_slice(1000) do |some_keys|
|
45
|
+
Jobba.redis.del(*some_keys)
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
def self.cleanup(seconds_ago: 60*60*24*30*12, batch_size: 1000)
|
50
|
+
start_time = Jobba::Time.now
|
51
|
+
delete_before = start_time - seconds_ago
|
52
|
+
|
53
|
+
jobs_count = 0
|
54
|
+
loop do
|
55
|
+
jobs = where(recorded_at: { before: delete_before }).limit(batch_size).to_a
|
56
|
+
jobs.each(&:delete!)
|
57
|
+
|
58
|
+
num_jobs = jobs.size
|
59
|
+
jobs_count += num_jobs
|
60
|
+
break if jobs.size < batch_size
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
41
64
|
end
|
data/lib/jobba/clause.rb
CHANGED
@@ -4,7 +4,9 @@ class Jobba::Clause
|
|
4
4
|
include Jobba::Common
|
5
5
|
|
6
6
|
# if `keys` or `suffixes` is an array, all entries will be included in the resulting set
|
7
|
-
def initialize(prefix: nil, suffixes: nil, keys: nil, min: nil, max: nil
|
7
|
+
def initialize(prefix: nil, suffixes: nil, keys: nil, min: nil, max: nil,
|
8
|
+
keys_contain_only_unique_ids: false)
|
9
|
+
|
8
10
|
if keys.nil? && prefix.nil? && suffixes.nil?
|
9
11
|
raise ArgumentError, "Either `keys` or both `prefix` and `suffix` must be specified.", caller
|
10
12
|
end
|
@@ -22,6 +24,8 @@ class Jobba::Clause
|
|
22
24
|
|
23
25
|
@min = min
|
24
26
|
@max = max
|
27
|
+
|
28
|
+
@keys_contain_only_unique_ids = keys_contain_only_unique_ids
|
25
29
|
end
|
26
30
|
|
27
31
|
def to_new_set
|
@@ -41,4 +45,84 @@ class Jobba::Clause
|
|
41
45
|
new_key
|
42
46
|
end
|
43
47
|
|
48
|
+
def result_ids(offset: nil, limit: nil)
|
49
|
+
# If we have one key and it is sorted, we can let redis return limited IDs,
|
50
|
+
# so handle that case specially.
|
51
|
+
|
52
|
+
id_data =
|
53
|
+
if @keys.one?
|
54
|
+
# offset and limit may or may not be used, so have to do again below
|
55
|
+
get_members(key: @keys.first, offset: offset, limit: limit)
|
56
|
+
else
|
57
|
+
ids = @keys.flat_map do |key|
|
58
|
+
# don't do limiting here -- doesn't make sense til we collect all the members
|
59
|
+
get_members(key: key)[:ids]
|
60
|
+
end
|
61
|
+
|
62
|
+
ids.sort!
|
63
|
+
ids.uniq! unless @keys_contain_only_unique_ids
|
64
|
+
|
65
|
+
{ids: ids, is_limited: false}
|
66
|
+
end
|
67
|
+
|
68
|
+
if !offset.nil? && !limit.nil? && id_data[:is_limited] == false
|
69
|
+
id_data[:ids].slice(offset, limit)
|
70
|
+
else
|
71
|
+
id_data[:ids]
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
def get_members(key:, offset: nil, limit: nil)
|
76
|
+
if sorted_key?(key)
|
77
|
+
min = @min.nil? ? "-inf" : "#{@min}"
|
78
|
+
max = @max.nil? ? "+inf" : "#{@max}"
|
79
|
+
|
80
|
+
options = {}
|
81
|
+
is_limited = false
|
82
|
+
|
83
|
+
if !offset.nil? && !limit.nil?
|
84
|
+
options[:limit] = [offset, limit]
|
85
|
+
is_limited = true
|
86
|
+
end
|
87
|
+
|
88
|
+
ids = redis.zrangebyscore(key, min, max, options)
|
89
|
+
|
90
|
+
{ids: ids, is_limited: is_limited}
|
91
|
+
else
|
92
|
+
ids = redis.smembers(key)
|
93
|
+
ids.sort!
|
94
|
+
{ids: ids, is_limited: false}
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
def result_count(offset: nil, limit: nil)
|
99
|
+
if @keys.one? || @keys_contain_only_unique_ids
|
100
|
+
# can count each key on its own using fast redis ops and add them up
|
101
|
+
nonlimited_count = @keys.map do |key|
|
102
|
+
if sorted_key?(key)
|
103
|
+
if @min.nil? && @max.nil?
|
104
|
+
redis.zcard(key)
|
105
|
+
else
|
106
|
+
min = @min.nil? ? "-inf" : "#{@min}"
|
107
|
+
max = @max.nil? ? "+inf" : "#{@max}"
|
108
|
+
|
109
|
+
redis.zcount(key, min, max)
|
110
|
+
end
|
111
|
+
else
|
112
|
+
redis.scard(key)
|
113
|
+
end
|
114
|
+
end.reduce(:+)
|
115
|
+
|
116
|
+
Jobba::Utils.limited_count(nonlimited_count: nonlimited_count,
|
117
|
+
offset: offset, limit: limit)
|
118
|
+
else
|
119
|
+
# Because we need to get a count of uniq members, have to do a full query
|
120
|
+
result_ids(offset: offset, limit: limit).count
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
124
|
+
def sorted_key?(key)
|
125
|
+
key.match(/_at$/)
|
126
|
+
end
|
127
|
+
|
44
128
|
end
|
data/lib/jobba/clause_factory.rb
CHANGED
@@ -65,7 +65,10 @@ class Jobba::ClauseFactory
|
|
65
65
|
}.uniq
|
66
66
|
|
67
67
|
validate_state_name!(state)
|
68
|
-
|
68
|
+
|
69
|
+
# An ID is in only one state at a time, so we can tell `Clause` that
|
70
|
+
# info via `keys_contain_only_unique_ids` -- helps it be more efficient
|
71
|
+
Jobba::Clause.new(keys: state, keys_contain_only_unique_ids: true)
|
69
72
|
end
|
70
73
|
|
71
74
|
def self.validate_state_name!(state_name)
|
data/lib/jobba/id_clause.rb
CHANGED
@@ -11,4 +11,12 @@ class Jobba::IdClause
|
|
11
11
|
redis.zadd(new_key, @ids.collect{|id| [0, id]}) if @ids.any?
|
12
12
|
new_key
|
13
13
|
end
|
14
|
+
|
15
|
+
def result_ids(offset: nil, limit: nil)
|
16
|
+
@ids.map(&:to_s).slice(offset || 0, limit || @ids.count)
|
17
|
+
end
|
18
|
+
|
19
|
+
def result_count(offset: nil, limit: nil)
|
20
|
+
Jobba::Utils.limited_count(nonlimited_count: @ids.count, offset: offset, limit: limit)
|
21
|
+
end
|
14
22
|
end
|
data/lib/jobba/query.rb
CHANGED
@@ -6,6 +6,8 @@ class Jobba::Query
|
|
6
6
|
|
7
7
|
include Jobba::Common
|
8
8
|
|
9
|
+
attr_reader :_limit, :_offset
|
10
|
+
|
9
11
|
def where(options)
|
10
12
|
options.each do |kk,vv|
|
11
13
|
clauses.push(Jobba::ClauseFactory.new_clause(kk,vv))
|
@@ -14,8 +16,19 @@ class Jobba::Query
|
|
14
16
|
self
|
15
17
|
end
|
16
18
|
|
19
|
+
def limit(number)
|
20
|
+
@_limit = number
|
21
|
+
@_offset ||= 0
|
22
|
+
self
|
23
|
+
end
|
24
|
+
|
25
|
+
def offset(number)
|
26
|
+
@_offset = number
|
27
|
+
self
|
28
|
+
end
|
29
|
+
|
17
30
|
def count
|
18
|
-
_run(
|
31
|
+
_run(CountStatuses.new(self))
|
19
32
|
end
|
20
33
|
|
21
34
|
def empty?
|
@@ -39,7 +52,7 @@ class Jobba::Query
|
|
39
52
|
end
|
40
53
|
|
41
54
|
def run
|
42
|
-
_run(
|
55
|
+
_run(GetStatuses.new(self))
|
43
56
|
end
|
44
57
|
|
45
58
|
protected
|
@@ -50,73 +63,111 @@ class Jobba::Query
|
|
50
63
|
@clauses = []
|
51
64
|
end
|
52
65
|
|
53
|
-
class
|
54
|
-
attr_reader :
|
66
|
+
class Operations
|
67
|
+
attr_reader :query, :redis
|
68
|
+
|
69
|
+
def initialize(query)
|
70
|
+
@query = query
|
71
|
+
@redis = query.redis
|
72
|
+
end
|
55
73
|
|
56
|
-
|
57
|
-
|
58
|
-
|
74
|
+
# Standalone method that gives the final result when the query is one clause
|
75
|
+
def handle_single_clause(clause)
|
76
|
+
raise "AbstractMethod"
|
59
77
|
end
|
60
78
|
|
61
|
-
|
62
|
-
|
79
|
+
# When the query is multiple clauses, this method is called on the final set
|
80
|
+
# that represents the ANDing of all clauses. It is called inside a `redis.multi`
|
81
|
+
# block.
|
82
|
+
def multi_clause_last_redis_op(result_set)
|
83
|
+
raise "AbstractMethod"
|
84
|
+
end
|
85
|
+
|
86
|
+
# Called on the output from the redis multi block for multi-clause queries.
|
87
|
+
def multi_clause_postprocess(redis_output)
|
88
|
+
raise "AbstractMethod"
|
63
89
|
end
|
64
90
|
end
|
65
91
|
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
},
|
70
|
-
->(ids) {
|
92
|
+
class GetStatuses < Operations
|
93
|
+
def handle_single_clause(clause)
|
94
|
+
ids = clause.result_ids(limit: query._limit, offset: query._offset)
|
71
95
|
Jobba::Statuses.new(ids)
|
72
|
-
|
73
|
-
)
|
74
|
-
|
75
|
-
COUNT_STATUSES = RunBlocks.new(
|
76
|
-
->(working_set, redis) {
|
77
|
-
redis.zcard(working_set)
|
78
|
-
},
|
79
|
-
nil
|
80
|
-
)
|
81
|
-
|
82
|
-
def _run(run_blocks)
|
83
|
-
# Each clause in a query is converted to a sorted set (which may be filtered,
|
84
|
-
# e.g. in the case of timestamp clauses) and then the sets are successively
|
85
|
-
# intersected.
|
86
|
-
#
|
87
|
-
# Different users of this method have different uses for the final "working"
|
88
|
-
# set. Because we want to bundle all of the creations and intersections of
|
89
|
-
# clause sets into one call to Redis (via a `multi` block), we have users
|
90
|
-
# of this method provide a final block to run on the working set within
|
91
|
-
# Redis (and within the `multi` call) and then another block to run on
|
92
|
-
# the output of the first block.
|
93
|
-
|
94
|
-
multi_result = redis.multi do
|
95
|
-
load_default_clause if clauses.empty?
|
96
|
-
working_set = nil
|
97
|
-
|
98
|
-
clauses.each do |clause|
|
99
|
-
clause_set = clause.to_new_set
|
100
|
-
|
101
|
-
if working_set.nil?
|
102
|
-
working_set = clause_set
|
103
|
-
else
|
104
|
-
redis.zinterstore(working_set, [working_set, clause_set], weights: [0, 0])
|
105
|
-
redis.del(clause_set)
|
106
|
-
end
|
107
|
-
end
|
96
|
+
end
|
108
97
|
|
109
|
-
|
110
|
-
|
98
|
+
def multi_clause_last_redis_op(result_set)
|
99
|
+
start = query._offset || 0
|
100
|
+
stop = query._limit.nil? ? -1 : start + query._limit - 1
|
101
|
+
redis.zrange(result_set, start, stop)
|
102
|
+
end
|
111
103
|
|
112
|
-
|
104
|
+
def multi_clause_postprocess(ids)
|
105
|
+
Jobba::Statuses.new(ids)
|
113
106
|
end
|
107
|
+
end
|
114
108
|
|
115
|
-
|
109
|
+
class CountStatuses < Operations
|
110
|
+
def handle_single_clause(clause)
|
111
|
+
clause.result_count(limit: query._limit, offset: query._offset)
|
112
|
+
end
|
113
|
+
|
114
|
+
def multi_clause_last_redis_op(result_set)
|
115
|
+
redis.zcard(result_set)
|
116
|
+
end
|
117
|
+
|
118
|
+
def multi_clause_postprocess(redis_output)
|
119
|
+
Jobba::Utils.limited_count(nonlimited_count: redis_output, offset: query._offset, limit: query._limit)
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
def _run(operations)
|
124
|
+
if _limit.nil? && !_offset.nil?
|
125
|
+
raise ArgumentError, "`limit` must be set if `offset` is set", caller
|
126
|
+
end
|
116
127
|
|
117
|
-
|
118
|
-
|
119
|
-
|
128
|
+
load_default_clause if clauses.empty?
|
129
|
+
|
130
|
+
if clauses.one?
|
131
|
+
# We can make specialized calls that don't need intermediate copies of sets
|
132
|
+
# to be made (which are costly)
|
133
|
+
operations.handle_single_clause(clauses.first)
|
134
|
+
else
|
135
|
+
# Each clause in a query is converted to a sorted set (which may be filtered,
|
136
|
+
# e.g. in the case of timestamp clauses) and then the sets are successively
|
137
|
+
# intersected.
|
138
|
+
#
|
139
|
+
# Different users of this method have different uses for the final "working"
|
140
|
+
# set. Because we want to bundle all of the creations and intersections of
|
141
|
+
# clause sets into one call to Redis (via a `multi` block), we have users
|
142
|
+
# of this method provide a final block to run on the working set within
|
143
|
+
# Redis (and within the `multi` call) and then another block to run on
|
144
|
+
# the output of the first block.
|
145
|
+
#
|
146
|
+
# This code also works for the single clause case, but it is less efficient
|
147
|
+
|
148
|
+
multi_result = redis.multi do
|
149
|
+
|
150
|
+
working_set = nil
|
151
|
+
|
152
|
+
clauses.each do |clause|
|
153
|
+
clause_set = clause.to_new_set
|
154
|
+
|
155
|
+
if working_set.nil?
|
156
|
+
working_set = clause_set
|
157
|
+
else
|
158
|
+
redis.zinterstore(working_set, [working_set, clause_set], weights: [0, 0])
|
159
|
+
redis.del(clause_set)
|
160
|
+
end
|
161
|
+
end
|
162
|
+
|
163
|
+
# This is later accessed as `multi_result[-2]` since it is the second to last output
|
164
|
+
operations.multi_clause_last_redis_op(working_set)
|
165
|
+
|
166
|
+
redis.del(working_set)
|
167
|
+
end
|
168
|
+
|
169
|
+
operations.multi_clause_postprocess(multi_result[-2])
|
170
|
+
end
|
120
171
|
end
|
121
172
|
|
122
173
|
def load_default_clause
|
data/lib/jobba/utils.rb
CHANGED
@@ -26,4 +26,45 @@ module Jobba::Utils
|
|
26
26
|
"temp:#{SecureRandom.hex(10)}"
|
27
27
|
end
|
28
28
|
|
29
|
+
def self.limited_count(nonlimited_count:, offset:, limit:)
|
30
|
+
raise(ArgumentError, "`limit` cannot be negative") if !limit.nil? && limit < 0
|
31
|
+
raise(ArgumentError, "`offset` cannot be negative") if !offset.nil? && offset < 0
|
32
|
+
|
33
|
+
# If we get a count of an array or set that doesn't take into account
|
34
|
+
# specified offsets and limits (what we call a `nonlimited_count`, but
|
35
|
+
# we need the count to effectively have been done with an offset and
|
36
|
+
# limit, this method calculates that limited count.
|
37
|
+
#
|
38
|
+
# This can happen when it is more efficient to calculate an unlimited
|
39
|
+
# count and then limit it after the fact.
|
40
|
+
#
|
41
|
+
# E.g.
|
42
|
+
#
|
43
|
+
# Get count of
|
44
|
+
# array = [a b c d e f g]
|
45
|
+
# where
|
46
|
+
# offset = 4
|
47
|
+
# limit = 5
|
48
|
+
#
|
49
|
+
# nonlimited_count = 7
|
50
|
+
#
|
51
|
+
# The limited array includes the highlighted (^) elements
|
52
|
+
# array = [a b c d e f g]
|
53
|
+
# ^ ^ ^ ^ ^
|
54
|
+
# Element `e` is the first element indicated by an offset of 4. The
|
55
|
+
# limit of 5 then causes us to take the rest of the elements in the array.
|
56
|
+
# The limit here is effectively 3 since there are only 3 elements left.
|
57
|
+
#
|
58
|
+
# So the limited_count is 3.
|
59
|
+
|
60
|
+
first_position_counted = offset || 0
|
61
|
+
|
62
|
+
# The `min` here is to make sure we don't go beyond the end of the array. The `- 1`
|
63
|
+
# is because we are getting a zero-indexed position from a count.
|
64
|
+
last_position_counted = [first_position_counted + (limit || nonlimited_count), nonlimited_count].min - 1
|
65
|
+
|
66
|
+
# Guard against first position being after last position by forcing min of 0
|
67
|
+
[last_position_counted - first_position_counted + 1, 0].max
|
68
|
+
end
|
69
|
+
|
29
70
|
end
|
data/lib/jobba/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: jobba
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.6.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- JP Slavinsky
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-
|
11
|
+
date: 2017-09-22 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: redis
|