redis_dedupe 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +22 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +40 -0
- data/Rakefile +3 -0
- data/lib/redis_dedupe.rb +64 -0
- data/lib/redis_dedupe/version.rb +3 -0
- data/redis_dedupe.gemspec +24 -0
- data/spec/redis_dedupe_spec.rb +56 -0
- data/spec/spec_helper.rb +0 -0
- data/tasks/rspec.rake +3 -0
- metadata +120 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 75665204b3b1c72866624a2cb948eea5e7ec59db
|
4
|
+
data.tar.gz: d8472403d0a7711f7255b3662697dc58aa2131c2
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 848327df6eb7833b69bc55575a0c33b5d0c82ae9d09fc3dcf2ab20cf35d6d24554fd82d23f71400e647a7e71201d480b6fd4572ee9e3168e6aa3f46921299b4c
|
7
|
+
data.tar.gz: 8672ea3ee0be153a2fe786d9143d23ef6e998eef2dd9501389d5dd0eb3f12df287e8f22810d3d61ca345465f14194feaf8226d993b79d0484c3e44a2b71df9bd
|
data/.gitignore
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
*.gem
|
2
|
+
*.rbc
|
3
|
+
.bundle
|
4
|
+
.config
|
5
|
+
.yardoc
|
6
|
+
Gemfile.lock
|
7
|
+
InstalledFiles
|
8
|
+
_yardoc
|
9
|
+
coverage
|
10
|
+
doc/
|
11
|
+
lib/bundler/man
|
12
|
+
pkg
|
13
|
+
rdoc
|
14
|
+
spec/reports
|
15
|
+
test/tmp
|
16
|
+
test/version_tmp
|
17
|
+
tmp
|
18
|
+
*.bundle
|
19
|
+
*.so
|
20
|
+
*.o
|
21
|
+
*.a
|
22
|
+
mkmf.log
|
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2015 Andy Huynh
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,40 @@
|
|
1
|
+
# RedisDedupe
|
2
|
+
|
3
|
+
This is a weak deduper to make things like bulk email run safer. It is not a lock safe for financial/security needs because it uses a weak redis locking pattern that can have race conditions.
|
4
|
+
|
5
|
+
However, imagine a bulk email job that loops over 100 users, and enqueues a background email for each user. If the job fails at iteration 50, a retry would enqueue all the users again and many will receive dupes. This would continue multiple times as the parent job continued to rerun. By marking that a subjob has been enqueued, we can let that isolated job handle its own failures, and the batch enqueue job can run multiple times without re-enqueueing the same subjobs.
|
6
|
+
|
7
|
+
## Installation
|
8
|
+
|
9
|
+
Add this line to your application's Gemfile:
|
10
|
+
|
11
|
+
gem 'redis_dedupe'
|
12
|
+
|
13
|
+
And then execute:
|
14
|
+
|
15
|
+
$ bundle
|
16
|
+
|
17
|
+
Or install it yourself as:
|
18
|
+
|
19
|
+
$ gem install redis_dedupe
|
20
|
+
|
21
|
+
## Usage
|
22
|
+
|
23
|
+
```ruby
|
24
|
+
comment_id = 42
|
25
|
+
dedupe = RedisDedupe::Set.new($redis, "comment:42:notification")
|
26
|
+
|
27
|
+
users.each do |user|
|
28
|
+
dedupe.check(user.id) do
|
29
|
+
send_email_to(user, comment_id)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
```
|
33
|
+
|
34
|
+
## Contributing
|
35
|
+
|
36
|
+
1. Fork it ( https://github.com/[my-github-username]/redis_dedupe/fork )
|
37
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
38
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
39
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
40
|
+
5. Create a new Pull Request
|
data/Rakefile
ADDED
data/lib/redis_dedupe.rb
ADDED
@@ -0,0 +1,64 @@
|
|
1
|
+
require 'redis_dedupe/version'
|
2
|
+
|
3
|
+
module RedisDedupe
|
4
|
+
class<<self
|
5
|
+
attr_accessor :client
|
6
|
+
end
|
7
|
+
|
8
|
+
class Set
|
9
|
+
SEVEN_DAYS = 7 * 24 * 60 * 60
|
10
|
+
|
11
|
+
attr_reader :key, :expires_in
|
12
|
+
|
13
|
+
def initialize(redis, key, expires_in = Time.now + SEVEN_DAYS)
|
14
|
+
@redis = redis
|
15
|
+
@key = key
|
16
|
+
@expires_in = expires_in
|
17
|
+
end
|
18
|
+
|
19
|
+
def check(member)
|
20
|
+
results = redis.pipelined do
|
21
|
+
redis.sadd(key, member)
|
22
|
+
redis.expire(key, expires_in)
|
23
|
+
end
|
24
|
+
|
25
|
+
if results[0]
|
26
|
+
yield
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
def finish
|
31
|
+
redis.del(key)
|
32
|
+
end
|
33
|
+
|
34
|
+
private
|
35
|
+
|
36
|
+
def redis
|
37
|
+
@redis
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
module Helpers
|
42
|
+
private
|
43
|
+
|
44
|
+
def dedupe
|
45
|
+
@dedupe ||= RedisDedupe::Set.new(RedisDedupe.client, [dedupe_namespace, dedupe_id].join(':'))
|
46
|
+
end
|
47
|
+
|
48
|
+
# Implement in class, should return an integer or string:
|
49
|
+
#
|
50
|
+
# Ex.
|
51
|
+
#
|
52
|
+
# def dedupe_id
|
53
|
+
# @announcement.id # => 42
|
54
|
+
# end
|
55
|
+
#
|
56
|
+
def dedupe_id
|
57
|
+
raise NotImplementedError
|
58
|
+
end
|
59
|
+
|
60
|
+
def dedupe_namespace
|
61
|
+
self.class.name
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'redis_dedupe/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "redis_dedupe"
|
8
|
+
spec.version = RedisDedupe::VERSION
|
9
|
+
spec.authors = ["Andy Huynh"]
|
10
|
+
spec.email = ["andy4thehuynh@gmail.com"]
|
11
|
+
spec.summary = %q{ A weak deduper to make things like bulk email run safer. }
|
12
|
+
spec.description = %q{ This is a weak deduper to make things like bulk email run safer. It is not a lock safe for financial/security needs because it uses a weak redis locking pattern that can have race conditions. However, imagine a bulk email job that loops over 100 users, and enqueues a background email for each user. If the job fails at iteration 50, a retry would enqueue all the users again and many will receive dupes. This would continue multiple times as the parent job continued to rerun. By marking that a subjob has been enqueued, we can let that isolated job handle its own failures, and the batch enqueue job can run multiple times without re-enqueueing the same subjobs. }
|
13
|
+
spec.homepage = ""
|
14
|
+
spec.license = "MIT"
|
15
|
+
|
16
|
+
spec.files = `git ls-files -z`.split("\x0")
|
17
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
18
|
+
spec.require_paths = ["lib"]
|
19
|
+
|
20
|
+
spec.add_development_dependency "bundler", "~> 1.6"
|
21
|
+
spec.add_development_dependency "rake"
|
22
|
+
spec.add_development_dependency "rspec"
|
23
|
+
spec.add_development_dependency "mock_redis"
|
24
|
+
end
|
@@ -0,0 +1,56 @@
|
|
1
|
+
require 'redis_dedupe'
|
2
|
+
require 'mock_redis'
|
3
|
+
require 'spec_helper'
|
4
|
+
|
5
|
+
describe RedisDedupe::Set do
|
6
|
+
it "is initialized with a redis client and key" do
|
7
|
+
dedupe = RedisDedupe::Set.new(:redis, :key)
|
8
|
+
expect(dedupe.key).to eq(:key)
|
9
|
+
end
|
10
|
+
|
11
|
+
it "defaults expires_in to 7 days" do
|
12
|
+
dedupe = RedisDedupe::Set.new(:redis, :key)
|
13
|
+
expect(dedupe.expires_in.to_i).to eq((Time.now + (7*24*60*60)).to_i)
|
14
|
+
end
|
15
|
+
|
16
|
+
it "optionally receives an expires_in time" do
|
17
|
+
dedupe = RedisDedupe::Set.new(:redis, :key, (Time.now + (7*24*60)).to_i)
|
18
|
+
expect(dedupe.expires_in.to_i).to eq((Time.now + (7*24*60)).to_i)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
describe RedisDedupe::Set, "#check" do
|
23
|
+
it "prevents a block from yielding multiple times for the same member" do
|
24
|
+
dedupe1 = RedisDedupe::Set.new(MockRedis.new, 'spec_key:1')
|
25
|
+
dedupe2 = RedisDedupe::Set.new(MockRedis.new, 'spec_key:2')
|
26
|
+
|
27
|
+
@results = []
|
28
|
+
|
29
|
+
dedupe1.check('1') { @results << 'A' }
|
30
|
+
dedupe1.check('1') { @results << 'B' }
|
31
|
+
dedupe2.check('1') { @results << 'C' }
|
32
|
+
|
33
|
+
expect(@results).to eq(['A', 'C'])
|
34
|
+
end
|
35
|
+
|
36
|
+
it "sets the set to expire so it cleans up if the process never completes" do
|
37
|
+
redis = MockRedis.new
|
38
|
+
dedupe = RedisDedupe::Set.new(redis, 'spec_key:1', 10)
|
39
|
+
|
40
|
+
dedupe.check('1') { }
|
41
|
+
|
42
|
+
expect(redis.ttl 'spec_key:1').to be_within(1).of(10)
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
describe RedisDedupe::Set, "#finish" do
|
47
|
+
it "removes the set to free up memory" do
|
48
|
+
redis = MockRedis.new
|
49
|
+
dedupe = RedisDedupe::Set.new(redis, 'spec_key:1')
|
50
|
+
|
51
|
+
dedupe.check('1') { }
|
52
|
+
dedupe.finish
|
53
|
+
|
54
|
+
expect(redis.exists 'spec_key:1').to be(false)
|
55
|
+
end
|
56
|
+
end
|
data/spec/spec_helper.rb
ADDED
File without changes
|
data/tasks/rspec.rake
ADDED
metadata
ADDED
@@ -0,0 +1,120 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: redis_dedupe
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.2
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Andy Huynh
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2015-03-25 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: bundler
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.6'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.6'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rake
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rspec
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: mock_redis
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
69
|
+
description: " This is a weak deduper to make things like bulk email run safer. It
|
70
|
+
is not a lock safe for financial/security needs because it uses a weak redis locking
|
71
|
+
pattern that can have race conditions. However, imagine a bulk email job that loops
|
72
|
+
over 100 users, and enqueues a background email for each user. If the job fails
|
73
|
+
at iteration 50, a retry would enqueue all the users again and many will receive
|
74
|
+
dupes. This would continue multiple times as the parent job continued to rerun.
|
75
|
+
By marking that a subjob has been enqueued, we can let that isolated job handle
|
76
|
+
its own failures, and the batch enqueue job can run multiple times without re-enqueueing
|
77
|
+
the same subjobs. "
|
78
|
+
email:
|
79
|
+
- andy4thehuynh@gmail.com
|
80
|
+
executables: []
|
81
|
+
extensions: []
|
82
|
+
extra_rdoc_files: []
|
83
|
+
files:
|
84
|
+
- ".gitignore"
|
85
|
+
- Gemfile
|
86
|
+
- LICENSE.txt
|
87
|
+
- README.md
|
88
|
+
- Rakefile
|
89
|
+
- lib/redis_dedupe.rb
|
90
|
+
- lib/redis_dedupe/version.rb
|
91
|
+
- redis_dedupe.gemspec
|
92
|
+
- spec/redis_dedupe_spec.rb
|
93
|
+
- spec/spec_helper.rb
|
94
|
+
- tasks/rspec.rake
|
95
|
+
homepage: ''
|
96
|
+
licenses:
|
97
|
+
- MIT
|
98
|
+
metadata: {}
|
99
|
+
post_install_message:
|
100
|
+
rdoc_options: []
|
101
|
+
require_paths:
|
102
|
+
- lib
|
103
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
104
|
+
requirements:
|
105
|
+
- - ">="
|
106
|
+
- !ruby/object:Gem::Version
|
107
|
+
version: '0'
|
108
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
109
|
+
requirements:
|
110
|
+
- - ">="
|
111
|
+
- !ruby/object:Gem::Version
|
112
|
+
version: '0'
|
113
|
+
requirements: []
|
114
|
+
rubyforge_project:
|
115
|
+
rubygems_version: 2.2.2
|
116
|
+
signing_key:
|
117
|
+
specification_version: 4
|
118
|
+
summary: A weak deduper to make things like bulk email run safer.
|
119
|
+
test_files: []
|
120
|
+
has_rdoc:
|