de-dupe 0.0.1.beta1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.rspec +3 -0
- data/.rubocop.yml +36 -0
- data/.standard.yml +3 -0
- data/LICENSE.txt +21 -0
- data/README.md +345 -0
- data/Rakefile +10 -0
- data/lib/de-dupe.rb +3 -0
- data/lib/de_dupe/config.rb +23 -0
- data/lib/de_dupe/dataset.rb +103 -0
- data/lib/de_dupe/lock.rb +64 -0
- data/lib/de_dupe/lock_key.rb +19 -0
- data/lib/de_dupe/redis_pool.rb +27 -0
- data/lib/de_dupe/version.rb +5 -0
- data/lib/de_dupe.rb +68 -0
- data/lib/dedupe.rb +3 -0
- data/mise.toml +2 -0
- metadata +114 -0
checksums.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
---
|
|
2
|
+
SHA256:
|
|
3
|
+
metadata.gz: 87750b01e05633001daa17f7b5984b2dc6defaa9987b0bc67c028590794a0cee
|
|
4
|
+
data.tar.gz: 27af945d2ab33a01c9d27eb0050c067dd3970c0dba98fd28d90bc87203709a4b
|
|
5
|
+
SHA512:
|
|
6
|
+
metadata.gz: '09edd34f392f56e7c3eb34fbf506725e9713a056c526c468a11f773702e3834bf836c867ce0d4173ee5d0586f26af661e0ca65aba523072b388c5f52c61128d4'
|
|
7
|
+
data.tar.gz: 0c02cbada386781ff4fb9cb61e1906573ecdf5a74be99b3a164692a9e683df4aea498a597e5d5c05d57b94351202a02a925f5b6fd8f57ea5c3dda94d2a085714
|
data/.rspec
ADDED
data/.rubocop.yml
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
inherit_mode:
|
|
2
|
+
merge:
|
|
3
|
+
- Exclude
|
|
4
|
+
|
|
5
|
+
require:
|
|
6
|
+
- rubocop-performance
|
|
7
|
+
- rubocop-rspec
|
|
8
|
+
- standard/cop/block_single_line_braces
|
|
9
|
+
|
|
10
|
+
inherit_gem:
|
|
11
|
+
standard: config/base.yml
|
|
12
|
+
|
|
13
|
+
AllCops:
|
|
14
|
+
TargetRubyVersion: 2.7
|
|
15
|
+
SuggestExtensions: false
|
|
16
|
+
Exclude:
|
|
17
|
+
- "db/**/*"
|
|
18
|
+
- "tmp/**/*"
|
|
19
|
+
- "vendor/**/*"
|
|
20
|
+
- "spec/support/hooks/notification.rb" # I'll refactor this rspec matcher to allow use things like an_instance_of
|
|
21
|
+
NewCops: enable
|
|
22
|
+
|
|
23
|
+
RSpec/MultipleExpectations:
|
|
24
|
+
Enabled: false
|
|
25
|
+
|
|
26
|
+
RSpec/ExampleLength:
|
|
27
|
+
Enabled: false
|
|
28
|
+
|
|
29
|
+
RSpec/MultipleMemoizedHelpers:
|
|
30
|
+
Enabled: false
|
|
31
|
+
|
|
32
|
+
RSpec/MessageSpies:
|
|
33
|
+
Enabled: false
|
|
34
|
+
|
|
35
|
+
RSpec/StubbedMock:
|
|
36
|
+
Enabled: false
|
data/.standard.yml
ADDED
data/LICENSE.txt
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
The MIT License (MIT)
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 Marcos G. Zimmermann
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
|
13
|
+
all copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
21
|
+
THE SOFTWARE.
|
data/README.md
ADDED
|
@@ -0,0 +1,345 @@
|
|
|
1
|
+
# DeDupe
|
|
2
|
+
|
|
3
|
+
A Ruby gem for distributed deduplication and locking using Redis Sorted Sets. DeDupe provides a simple and efficient way to prevent duplicate execution of tasks across multiple processes or servers, with automatic TTL-based expiration and cleanup.
|
|
4
|
+
|
|
5
|
+
## Features
|
|
6
|
+
|
|
7
|
+
- ๐ **Distributed Locking**: Prevent duplicate execution across multiple processes/servers
|
|
8
|
+
- โฑ๏ธ **TTL-based Expiration**: Automatic expiration of locks with configurable time-to-live
|
|
9
|
+
- ๐งน **Automatic Cleanup**: Expired entries are automatically removed
|
|
10
|
+
- ๐ฆ **Multiple ID Management**: Handle multiple locks/IDs within the same namespace
|
|
11
|
+
- ๐ **Simple API**: Easy-to-use interface with block support
|
|
12
|
+
- ๐ **Redis-backed**: Uses Redis Sorted Sets for efficient storage and querying
|
|
13
|
+
|
|
14
|
+
## Installation
|
|
15
|
+
|
|
16
|
+
Add this line to your application's Gemfile:
|
|
17
|
+
|
|
18
|
+
```ruby
|
|
19
|
+
gem "de-dupe"
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
And then execute:
|
|
23
|
+
|
|
24
|
+
```bash
|
|
25
|
+
$ bundle install
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
Or install it yourself as:
|
|
29
|
+
|
|
30
|
+
```bash
|
|
31
|
+
$ gem install de-dupe
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
## Configuration
|
|
35
|
+
|
|
36
|
+
Configure DeDupe with your Redis connection:
|
|
37
|
+
|
|
38
|
+
```ruby
|
|
39
|
+
require "de-dupe"
|
|
40
|
+
require "connection_pool"
|
|
41
|
+
|
|
42
|
+
DeDupe.configure do |config|
|
|
43
|
+
# Redis connection (supports ConnectionPool or Redis instance)
|
|
44
|
+
config.redis = ConnectionPool.new(size: 10, timeout: 1) do
|
|
45
|
+
Redis.new(url: ENV.fetch("REDIS_URL", "redis://localhost:6379"))
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
# Namespace for all keys (default: "de-dupe")
|
|
49
|
+
config.namespace = "my-app"
|
|
50
|
+
|
|
51
|
+
# Default TTL in seconds (default: 300 = 5 minutes)
|
|
52
|
+
config.expires_in = 3600 # 1 hour
|
|
53
|
+
end
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
### Configuration Options
|
|
57
|
+
|
|
58
|
+
- `redis`: Redis connection instance or ConnectionPool (required)
|
|
59
|
+
- `namespace`: Prefix for all Redis keys (default: `"de-dupe"`)
|
|
60
|
+
- `expires_in`: Default TTL in seconds for locks (default: `300`)
|
|
61
|
+
|
|
62
|
+
## Usage
|
|
63
|
+
|
|
64
|
+
### Simple Locking with `DeDupe.acquire`
|
|
65
|
+
|
|
66
|
+
The simplest way to use DeDupe is with the `acquire` method:
|
|
67
|
+
|
|
68
|
+
```ruby
|
|
69
|
+
# Prevent duplicate execution of a long-running job
|
|
70
|
+
DeDupe.acquire("import", "user-12345", ttl: 3600) do
|
|
71
|
+
# This block will only execute if the lock can be acquired
|
|
72
|
+
# If another process is already running this, the block won't execute
|
|
73
|
+
import_user_data("user-12345")
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
# With multiple namespace levels
|
|
77
|
+
DeDupe.acquire("import", "users", "batch-2024-01-01", ttl: 7200) do
|
|
78
|
+
process_batch("batch-2024-01-01")
|
|
79
|
+
end
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
**Note**: The last argument is the lock ID, all previous arguments form the namespace.
|
|
83
|
+
|
|
84
|
+
### Using the Lock Class
|
|
85
|
+
|
|
86
|
+
For more control, use the `Lock` class directly:
|
|
87
|
+
|
|
88
|
+
```ruby
|
|
89
|
+
# Create a lock
|
|
90
|
+
lock = DeDupe::Lock.new(
|
|
91
|
+
lock_key: "import:users",
|
|
92
|
+
lock_id: "user-12345",
|
|
93
|
+
ttl: 3600 # optional, uses config default if not provided
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
# Check if locked
|
|
97
|
+
if lock.locked?
|
|
98
|
+
puts "Already processing"
|
|
99
|
+
else
|
|
100
|
+
# Acquire the lock
|
|
101
|
+
if lock.acquire
|
|
102
|
+
begin
|
|
103
|
+
# Do work
|
|
104
|
+
process_user("user-12345")
|
|
105
|
+
ensure
|
|
106
|
+
# Always release
|
|
107
|
+
lock.release
|
|
108
|
+
end
|
|
109
|
+
end
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
# Or use with_lock for automatic release
|
|
113
|
+
lock.with_lock do
|
|
114
|
+
# Block only executes if lock can be acquired
|
|
115
|
+
# Lock is automatically released after block execution (even on error)
|
|
116
|
+
process_user("user-12345")
|
|
117
|
+
end
|
|
118
|
+
```
|
|
119
|
+
|
|
120
|
+
### Using the Dataset Class
|
|
121
|
+
|
|
122
|
+
The `Dataset` class allows you to manage multiple IDs within the same namespace:
|
|
123
|
+
|
|
124
|
+
```ruby
|
|
125
|
+
# Create a dataset
|
|
126
|
+
dataset = DeDupe::Dataset.new("import:users", ttl: 3600)
|
|
127
|
+
|
|
128
|
+
# Acquire multiple IDs at once
|
|
129
|
+
if dataset.acquire("user-1", "user-2", "user-3")
|
|
130
|
+
puts "All IDs acquired"
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
# Check which IDs are locked
|
|
134
|
+
locked = dataset.locked_members("user-1", "user-2", "user-3")
|
|
135
|
+
# => ["user-1", "user-2", "user-3"]
|
|
136
|
+
|
|
137
|
+
# Check which IDs are unlocked
|
|
138
|
+
unlocked = dataset.unlocked_members("user-1", "user-2", "user-3")
|
|
139
|
+
# => []
|
|
140
|
+
|
|
141
|
+
# Release specific IDs
|
|
142
|
+
dataset.release("user-1", "user-2")
|
|
143
|
+
|
|
144
|
+
# Check lock status for multiple IDs
|
|
145
|
+
if dataset.locked?("user-1", "user-2", "user-3")
|
|
146
|
+
puts "At least one ID is locked"
|
|
147
|
+
end
|
|
148
|
+
|
|
149
|
+
# Flush all entries
|
|
150
|
+
dataset.flush
|
|
151
|
+
|
|
152
|
+
# Manually clean up expired entries
|
|
153
|
+
dataset.flush_expired_members
|
|
154
|
+
```
|
|
155
|
+
|
|
156
|
+
## API Reference
|
|
157
|
+
|
|
158
|
+
### `DeDupe.acquire(*keys, ttl: nil, &block)`
|
|
159
|
+
|
|
160
|
+
Convenience method for acquiring a lock and executing a block.
|
|
161
|
+
|
|
162
|
+
- `*keys`: Namespace components (last one becomes the lock_id)
|
|
163
|
+
- `ttl`: Optional TTL in seconds (uses config default if not provided)
|
|
164
|
+
- `&block`: Block to execute if lock is acquired
|
|
165
|
+
|
|
166
|
+
**Returns**: Result of the block, or `nil` if lock couldn't be acquired
|
|
167
|
+
|
|
168
|
+
### `DeDupe::Lock`
|
|
169
|
+
|
|
170
|
+
#### `new(lock_key:, lock_id:, ttl: nil)`
|
|
171
|
+
|
|
172
|
+
Create a new lock instance.
|
|
173
|
+
|
|
174
|
+
- `lock_key`: Namespace/group for the lock
|
|
175
|
+
- `lock_id`: Unique identifier within the namespace
|
|
176
|
+
- `ttl`: Optional TTL in seconds
|
|
177
|
+
|
|
178
|
+
#### `acquire` / `lock`
|
|
179
|
+
|
|
180
|
+
Attempt to acquire the lock. Returns `true` if acquired, `false` if already exists.
|
|
181
|
+
|
|
182
|
+
#### `release` / `unlock`
|
|
183
|
+
|
|
184
|
+
Release the lock. Returns `true` if released, `false` if lock didn't exist.
|
|
185
|
+
|
|
186
|
+
#### `locked?`
|
|
187
|
+
|
|
188
|
+
Check if the lock is currently active (exists and not expired). Returns `true` or `false`.
|
|
189
|
+
|
|
190
|
+
#### `with_lock(&block)`
|
|
191
|
+
|
|
192
|
+
Acquire lock, execute block, and automatically release lock. Block only executes if lock can be acquired.
|
|
193
|
+
|
|
194
|
+
- Returns block result if lock acquired, `nil` otherwise
|
|
195
|
+
- Always releases lock, even if block raises an exception
|
|
196
|
+
- Propagates exceptions from the block
|
|
197
|
+
|
|
198
|
+
### `DeDupe::Dataset`
|
|
199
|
+
|
|
200
|
+
#### `new(lock_key, ttl: nil)`
|
|
201
|
+
|
|
202
|
+
Create a new dataset instance.
|
|
203
|
+
|
|
204
|
+
- `lock_key`: Namespace for the dataset
|
|
205
|
+
- `ttl`: Optional TTL in seconds
|
|
206
|
+
|
|
207
|
+
#### `acquire(*ids)` / `lock(*ids)`
|
|
208
|
+
|
|
209
|
+
Acquire locks for multiple IDs. Returns `true` if at least one ID was acquired.
|
|
210
|
+
|
|
211
|
+
#### `release(*ids)` / `unlock(*ids)`
|
|
212
|
+
|
|
213
|
+
Release locks for multiple IDs. Returns `true` if at least one ID was released.
|
|
214
|
+
|
|
215
|
+
#### `locked?(*ids)`
|
|
216
|
+
|
|
217
|
+
Check if any of the given IDs are locked. Returns `true` if at least one is locked.
|
|
218
|
+
|
|
219
|
+
#### `locked_members(*ids)`
|
|
220
|
+
|
|
221
|
+
Return array of IDs that are currently locked (not expired).
|
|
222
|
+
|
|
223
|
+
#### `unlocked_members(*ids)`
|
|
224
|
+
|
|
225
|
+
Return array of IDs that are unlocked (don't exist or expired).
|
|
226
|
+
|
|
227
|
+
#### `flush`
|
|
228
|
+
|
|
229
|
+
Remove all entries from the dataset.
|
|
230
|
+
|
|
231
|
+
#### `flush_expired_members`
|
|
232
|
+
|
|
233
|
+
Remove all expired entries from the dataset (automatically called during `acquire`).
|
|
234
|
+
|
|
235
|
+
## Examples
|
|
236
|
+
|
|
237
|
+
### Preventing Duplicate Job Execution
|
|
238
|
+
|
|
239
|
+
```ruby
|
|
240
|
+
# In a background job processor
|
|
241
|
+
class ImportUserJob
|
|
242
|
+
def perform(user_id)
|
|
243
|
+
DeDupe.acquire("import", "user-#{user_id}", ttl: 3600) do
|
|
244
|
+
# Only one instance of this job will run per user_id
|
|
245
|
+
UserImporter.new(user_id).import
|
|
246
|
+
end
|
|
247
|
+
end
|
|
248
|
+
end
|
|
249
|
+
```
|
|
250
|
+
|
|
251
|
+
### Batch Processing with Deduplication
|
|
252
|
+
|
|
253
|
+
```ruby
|
|
254
|
+
# Process a batch, ensuring each item is only processed once
|
|
255
|
+
dataset = DeDupe::Dataset.new("batch:process-#{batch_id}", ttl: 7200)
|
|
256
|
+
|
|
257
|
+
items_to_process.each do |item|
|
|
258
|
+
next if dataset.locked?(item.id) # Skip if already processing
|
|
259
|
+
|
|
260
|
+
if dataset.acquire(item.id)
|
|
261
|
+
begin
|
|
262
|
+
process_item(item)
|
|
263
|
+
ensure
|
|
264
|
+
dataset.release(item.id)
|
|
265
|
+
end
|
|
266
|
+
end
|
|
267
|
+
end
|
|
268
|
+
```
|
|
269
|
+
|
|
270
|
+
### Rate Limiting with Locking
|
|
271
|
+
|
|
272
|
+
```ruby
|
|
273
|
+
# Ensure only one API call per user per minute
|
|
274
|
+
user_id = current_user.id
|
|
275
|
+
lock = DeDupe::Lock.new(
|
|
276
|
+
lock_key: "api:rate-limit",
|
|
277
|
+
lock_id: "user-#{user_id}",
|
|
278
|
+
ttl: 60 # 1 minute
|
|
279
|
+
)
|
|
280
|
+
|
|
281
|
+
unless lock.locked?
|
|
282
|
+
lock.with_lock do
|
|
283
|
+
make_api_call(user_id)
|
|
284
|
+
end
|
|
285
|
+
else
|
|
286
|
+
puts "Rate limit: Please wait before making another request"
|
|
287
|
+
end
|
|
288
|
+
```
|
|
289
|
+
|
|
290
|
+
### Parallel Processing Safety
|
|
291
|
+
|
|
292
|
+
```ruby
|
|
293
|
+
# Ensure only one worker processes a task, even in parallel environments
|
|
294
|
+
task_id = "task-12345"
|
|
295
|
+
|
|
296
|
+
DeDupe.acquire("workers", "process", task_id, ttl: 300) do
|
|
297
|
+
# This will only execute in one worker, even if multiple workers
|
|
298
|
+
# try to process the same task simultaneously
|
|
299
|
+
process_task(task_id)
|
|
300
|
+
end
|
|
301
|
+
```
|
|
302
|
+
|
|
303
|
+
## How It Works
|
|
304
|
+
|
|
305
|
+
DeDupe uses Redis Sorted Sets to store locks with expiration timestamps as scores:
|
|
306
|
+
|
|
307
|
+
1. **Lock Acquisition**: When you acquire a lock, it's stored in Redis with a score of `current_time + ttl`
|
|
308
|
+
2. **Lock Checking**: A lock is considered active if its score (expiration time) is greater than the current time
|
|
309
|
+
3. **Automatic Cleanup**: Expired entries (where score <= current_time) are automatically removed
|
|
310
|
+
4. **Distributed**: Since it uses Redis, locks work across multiple processes and servers
|
|
311
|
+
|
|
312
|
+
## Requirements
|
|
313
|
+
|
|
314
|
+
- Ruby >= 2.7
|
|
315
|
+
- Redis server
|
|
316
|
+
- `redis` gem
|
|
317
|
+
- `zeitwerk` gem
|
|
318
|
+
|
|
319
|
+
## Development
|
|
320
|
+
|
|
321
|
+
After checking out the repo, run:
|
|
322
|
+
|
|
323
|
+
```bash
|
|
324
|
+
bundle install
|
|
325
|
+
```
|
|
326
|
+
|
|
327
|
+
Run tests:
|
|
328
|
+
|
|
329
|
+
```bash
|
|
330
|
+
bundle exec rspec
|
|
331
|
+
```
|
|
332
|
+
|
|
333
|
+
Run RuboCop:
|
|
334
|
+
|
|
335
|
+
```bash
|
|
336
|
+
bundle exec rubocop
|
|
337
|
+
```
|
|
338
|
+
|
|
339
|
+
## Contributing
|
|
340
|
+
|
|
341
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/marcosgz/de-dupe.
|
|
342
|
+
|
|
343
|
+
## License
|
|
344
|
+
|
|
345
|
+
The gem is available as open source under the terms of the [MIT License](LICENSE.txt).
|
data/Rakefile
ADDED
data/lib/de-dupe.rb
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "redis"
|
|
4
|
+
require "yaml"
|
|
5
|
+
|
|
6
|
+
module DeDupe
|
|
7
|
+
class Config
|
|
8
|
+
# Redis/ConnectionPool instance of a valid list of configs to build a new redis connection
|
|
9
|
+
attr_accessor :redis
|
|
10
|
+
|
|
11
|
+
# Namespace used to group group data stored by this package
|
|
12
|
+
attr_accessor :namespace
|
|
13
|
+
|
|
14
|
+
# The global TTL for the redis storage. Keep nil if you don't want to expire objects.
|
|
15
|
+
attr_accessor :expires_in
|
|
16
|
+
|
|
17
|
+
def initialize
|
|
18
|
+
@redis = nil
|
|
19
|
+
@namespace = "de-dupe"
|
|
20
|
+
@expires_in = 5 * 60 # 5 minutes
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
end
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module DeDupe
|
|
4
|
+
# Distributed deduplication dataset using Redis Sorted Set.
|
|
5
|
+
# Handle multiple entries within the same namespace.
|
|
6
|
+
# The current timestamp is used as the score for the entries
|
|
7
|
+
class Dataset
|
|
8
|
+
attr_reader :lock_key, :ttl
|
|
9
|
+
|
|
10
|
+
# @param lock_key [String] The key to store the dataset
|
|
11
|
+
# @param ttl [Integer] The time to live for the dataset in seconds
|
|
12
|
+
def initialize(lock_key, ttl: nil)
|
|
13
|
+
@lock_key = lock_key.to_s
|
|
14
|
+
@ttl = ttl || DeDupe.config.expires_in.to_i
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def acquire(*ids)
|
|
18
|
+
return false if ids.empty?
|
|
19
|
+
|
|
20
|
+
flush_expired_members # Keep the dataset clean
|
|
21
|
+
score = expiration_score
|
|
22
|
+
redis_pool.with do |conn|
|
|
23
|
+
conn.zadd(lock_key, ids.map { |id| [score, id] }, nx: true) > 0
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
alias_method :lock, :acquire
|
|
27
|
+
|
|
28
|
+
def release(*ids)
|
|
29
|
+
return false if ids.empty?
|
|
30
|
+
|
|
31
|
+
redis_pool.with do |conn|
|
|
32
|
+
conn.zrem(lock_key, ids) > 0
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
alias_method :unlock, :release
|
|
36
|
+
|
|
37
|
+
def locked?(*ids)
|
|
38
|
+
return false if ids.empty?
|
|
39
|
+
|
|
40
|
+
redis_pool.with do |conn|
|
|
41
|
+
scores = conn.zmscore(lock_key, *ids).compact
|
|
42
|
+
return false if scores.empty?
|
|
43
|
+
|
|
44
|
+
current_time = now
|
|
45
|
+
scores.any? { |score| score.to_f >= current_time }
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def unlocked_members(*ids)
|
|
50
|
+
return [] if ids.empty?
|
|
51
|
+
|
|
52
|
+
redis_pool.with do |conn|
|
|
53
|
+
scores = conn.zmscore(lock_key, *ids)
|
|
54
|
+
return ids if scores.nil? || scores.empty?
|
|
55
|
+
|
|
56
|
+
current_time = now
|
|
57
|
+
scores.each_with_index
|
|
58
|
+
.select { |score, idx| score.nil? || score.to_f <= current_time }
|
|
59
|
+
.map { |score, idx| ids[idx] }
|
|
60
|
+
end
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
def locked_members(*ids)
|
|
64
|
+
return [] if ids.empty?
|
|
65
|
+
|
|
66
|
+
redis_pool.with do |conn|
|
|
67
|
+
scores = conn.zmscore(lock_key, *ids)
|
|
68
|
+
return [] if scores.nil? || scores.empty?
|
|
69
|
+
|
|
70
|
+
current_time = now
|
|
71
|
+
scores.each_with_index
|
|
72
|
+
.select { |score, idx| score && score.to_f >= current_time }
|
|
73
|
+
.map { |score, idx| ids[idx] }
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
def flush
|
|
78
|
+
redis_pool.with do |conn|
|
|
79
|
+
conn.del(lock_key) > 0
|
|
80
|
+
end
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
def flush_expired_members
|
|
84
|
+
redis_pool.with do |conn|
|
|
85
|
+
conn.zremrangebyscore(lock_key, "-inf", "(#{now}") > 0
|
|
86
|
+
end
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
private
|
|
90
|
+
|
|
91
|
+
def expiration_score
|
|
92
|
+
now(append_seconds: ttl)
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
def now(append_seconds: 0)
|
|
96
|
+
Time.now.to_f + append_seconds
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
def redis_pool
|
|
100
|
+
DeDupe.redis_pool
|
|
101
|
+
end
|
|
102
|
+
end
|
|
103
|
+
end
|
data/lib/de_dupe/lock.rb
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module DeDupe
|
|
4
|
+
# Distributed deduplication lock using Redis Sorted Set.
|
|
5
|
+
# Allows multiple independent locks under the same namespace (lock_key).
|
|
6
|
+
class Lock
|
|
7
|
+
extend Forwardable
|
|
8
|
+
def_delegators :dataset, :lock_key, :ttl
|
|
9
|
+
|
|
10
|
+
attr_reader :dataset, :lock_id
|
|
11
|
+
|
|
12
|
+
# @param lock_key [String] Namespace/group for similar jobs (e.g., "import:users")
|
|
13
|
+
# @param lock_id [String] Unique identifier within the namespace (e.g., digest)
|
|
14
|
+
# @param ttl [Integer, nil] Lock duration in seconds (defaults to config.expires_in)
|
|
15
|
+
def initialize(lock_key:, lock_id:, ttl: nil)
|
|
16
|
+
@lock_id = lock_id.to_s
|
|
17
|
+
@dataset = Dataset.new(lock_key, ttl: ttl)
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def acquire
|
|
21
|
+
dataset.acquire(lock_id)
|
|
22
|
+
end
|
|
23
|
+
alias_method :lock, :acquire
|
|
24
|
+
|
|
25
|
+
def release
|
|
26
|
+
dataset.release(lock_id)
|
|
27
|
+
end
|
|
28
|
+
alias_method :unlock, :release
|
|
29
|
+
|
|
30
|
+
def locked?
|
|
31
|
+
redis_pool.with do |conn|
|
|
32
|
+
score = conn.zscore(lock_key, lock_id)
|
|
33
|
+
return false unless score
|
|
34
|
+
|
|
35
|
+
score.to_f > now
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
def with_lock(&block)
|
|
40
|
+
return if locked?
|
|
41
|
+
return unless acquire
|
|
42
|
+
|
|
43
|
+
begin
|
|
44
|
+
yield
|
|
45
|
+
ensure
|
|
46
|
+
release
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
private
|
|
51
|
+
|
|
52
|
+
def redis_pool
|
|
53
|
+
DeDupe.redis_pool
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
def expiration_score
|
|
57
|
+
now(append_seconds: ttl)
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
def now(append_seconds: 0)
|
|
61
|
+
Time.now.to_f + append_seconds
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
end
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module DeDupe
|
|
4
|
+
class LockKey
|
|
5
|
+
SEPARATOR = ":"
|
|
6
|
+
|
|
7
|
+
def initialize(*keys)
|
|
8
|
+
@keys = keys.map { |k| k.to_s.strip.downcase }
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
def to_s
|
|
12
|
+
[DeDupe.config.namespace, *keys].compact.join(SEPARATOR)
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
private
|
|
16
|
+
|
|
17
|
+
attr_reader :keys
|
|
18
|
+
end
|
|
19
|
+
end
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module DeDupe
|
|
4
|
+
class RedisPool
|
|
5
|
+
extend Forwardable
|
|
6
|
+
def_delegator :@connection, :with
|
|
7
|
+
|
|
8
|
+
module ConnectionPoolLike
|
|
9
|
+
def with
|
|
10
|
+
yield self
|
|
11
|
+
end
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def initialize(connection)
|
|
15
|
+
if connection.respond_to?(:with)
|
|
16
|
+
@connection = connection
|
|
17
|
+
else
|
|
18
|
+
@connection = if connection.respond_to?(:client)
|
|
19
|
+
connection
|
|
20
|
+
else
|
|
21
|
+
::Redis.new(*[connection].compact)
|
|
22
|
+
end
|
|
23
|
+
@connection.extend(ConnectionPoolLike)
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
end
|
data/lib/de_dupe.rb
ADDED
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "redis"
|
|
4
|
+
require "forwardable"
|
|
5
|
+
require "zeitwerk"
|
|
6
|
+
|
|
7
|
+
loader = Zeitwerk::Loader.for_gem
|
|
8
|
+
loader.inflector.inflect "version" => "VERSION"
|
|
9
|
+
loader.ignore("#{__dir__}/de-dupe.rb")
|
|
10
|
+
loader.ignore("#{__dir__}/dedupe.rb")
|
|
11
|
+
loader.log! if ENV["DEBUG"]
|
|
12
|
+
loader.setup
|
|
13
|
+
|
|
14
|
+
module DeDupe
|
|
15
|
+
class Error < StandardError; end
|
|
16
|
+
|
|
17
|
+
module_function
|
|
18
|
+
|
|
19
|
+
def config
|
|
20
|
+
@config ||= Config.new
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def configure(&block)
|
|
24
|
+
return unless block
|
|
25
|
+
|
|
26
|
+
config.instance_eval(&block)
|
|
27
|
+
@redis_pool = nil
|
|
28
|
+
config
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
def acquire(*keys, **kwargs, &block)
|
|
32
|
+
id = keys.pop
|
|
33
|
+
if keys.empty?
|
|
34
|
+
raise Error, <<~ERROR.strip
|
|
35
|
+
You must provide the namespace + the identifier for the lock.
|
|
36
|
+
|
|
37
|
+
Example:
|
|
38
|
+
DeDupe.acquire("long-running-job", "1234567890", ttl: 50) do
|
|
39
|
+
# code to execute
|
|
40
|
+
end
|
|
41
|
+
ERROR
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
namespace = LockKey.new(*keys).to_s
|
|
45
|
+
lock = Lock.new(lock_key: namespace, lock_id: id, **kwargs)
|
|
46
|
+
lock.with_lock(&block)
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def redis_pool
|
|
50
|
+
@redis_pool ||= RedisPool.new(config.redis)
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
def flush_all
|
|
54
|
+
keys.inject(0) do |total, (key, cli)|
|
|
55
|
+
total + cli.del(key)
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
def keys
|
|
60
|
+
Enumerator.new do |yielder|
|
|
61
|
+
redis_pool.with do |cli|
|
|
62
|
+
cli.keys("#{config.namespace}:*").each { |key| yielder.yield(key, cli) }
|
|
63
|
+
end
|
|
64
|
+
end
|
|
65
|
+
end
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
loader.eager_load
|
data/lib/dedupe.rb
ADDED
data/mise.toml
ADDED
metadata
ADDED
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
|
2
|
+
name: de-dupe
|
|
3
|
+
version: !ruby/object:Gem::Version
|
|
4
|
+
version: 0.0.1.beta1
|
|
5
|
+
platform: ruby
|
|
6
|
+
authors:
|
|
7
|
+
- Marcos G. Zimmermann
|
|
8
|
+
autorequire:
|
|
9
|
+
bindir: exe
|
|
10
|
+
cert_chain: []
|
|
11
|
+
date: 2025-12-24 00:00:00.000000000 Z
|
|
12
|
+
dependencies:
|
|
13
|
+
- !ruby/object:Gem::Dependency
|
|
14
|
+
name: redis
|
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
|
16
|
+
requirements:
|
|
17
|
+
- - ">="
|
|
18
|
+
- !ruby/object:Gem::Version
|
|
19
|
+
version: '0'
|
|
20
|
+
type: :runtime
|
|
21
|
+
prerelease: false
|
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
23
|
+
requirements:
|
|
24
|
+
- - ">="
|
|
25
|
+
- !ruby/object:Gem::Version
|
|
26
|
+
version: '0'
|
|
27
|
+
- !ruby/object:Gem::Dependency
|
|
28
|
+
name: zeitwerk
|
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
|
30
|
+
requirements:
|
|
31
|
+
- - ">="
|
|
32
|
+
- !ruby/object:Gem::Version
|
|
33
|
+
version: '0'
|
|
34
|
+
type: :runtime
|
|
35
|
+
prerelease: false
|
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
37
|
+
requirements:
|
|
38
|
+
- - ">="
|
|
39
|
+
- !ruby/object:Gem::Version
|
|
40
|
+
version: '0'
|
|
41
|
+
- !ruby/object:Gem::Dependency
|
|
42
|
+
name: standard
|
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
|
44
|
+
requirements:
|
|
45
|
+
- - ">="
|
|
46
|
+
- !ruby/object:Gem::Version
|
|
47
|
+
version: '0'
|
|
48
|
+
type: :development
|
|
49
|
+
prerelease: false
|
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
51
|
+
requirements:
|
|
52
|
+
- - ">="
|
|
53
|
+
- !ruby/object:Gem::Version
|
|
54
|
+
version: '0'
|
|
55
|
+
description: |
|
|
56
|
+
DeDupe is a Ruby gem for distributed deduplication and locking using Redis Sorted Sets.
|
|
57
|
+
It provides a simple and efficient way to prevent duplicate execution of tasks across
|
|
58
|
+
multiple processes or servers, with automatic TTL-based expiration and cleanup.
|
|
59
|
+
|
|
60
|
+
Features:
|
|
61
|
+
- Distributed locking across multiple processes/servers
|
|
62
|
+
- TTL-based automatic expiration
|
|
63
|
+
- Automatic cleanup of expired entries
|
|
64
|
+
- Multiple ID management within namespaces
|
|
65
|
+
- Simple API with block support
|
|
66
|
+
email:
|
|
67
|
+
- mgzmaster@gmail.com
|
|
68
|
+
executables: []
|
|
69
|
+
extensions: []
|
|
70
|
+
extra_rdoc_files: []
|
|
71
|
+
files:
|
|
72
|
+
- ".rspec"
|
|
73
|
+
- ".rubocop.yml"
|
|
74
|
+
- ".standard.yml"
|
|
75
|
+
- LICENSE.txt
|
|
76
|
+
- README.md
|
|
77
|
+
- Rakefile
|
|
78
|
+
- lib/de-dupe.rb
|
|
79
|
+
- lib/de_dupe.rb
|
|
80
|
+
- lib/de_dupe/config.rb
|
|
81
|
+
- lib/de_dupe/dataset.rb
|
|
82
|
+
- lib/de_dupe/lock.rb
|
|
83
|
+
- lib/de_dupe/lock_key.rb
|
|
84
|
+
- lib/de_dupe/redis_pool.rb
|
|
85
|
+
- lib/de_dupe/version.rb
|
|
86
|
+
- lib/dedupe.rb
|
|
87
|
+
- mise.toml
|
|
88
|
+
homepage: https://github.com/marcosgz/de-dupe
|
|
89
|
+
licenses:
|
|
90
|
+
- MIT
|
|
91
|
+
metadata:
|
|
92
|
+
homepage_uri: https://github.com/marcosgz/de-dupe
|
|
93
|
+
source_code_uri: https://github.com/marcosgz/de-dupe
|
|
94
|
+
changelog_uri: https://github.com/marcosgz/de-dupe/blob/main/CHANGELOG.md
|
|
95
|
+
post_install_message:
|
|
96
|
+
rdoc_options: []
|
|
97
|
+
require_paths:
|
|
98
|
+
- lib
|
|
99
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
|
100
|
+
requirements:
|
|
101
|
+
- - ">="
|
|
102
|
+
- !ruby/object:Gem::Version
|
|
103
|
+
version: '2.7'
|
|
104
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
105
|
+
requirements:
|
|
106
|
+
- - ">"
|
|
107
|
+
- !ruby/object:Gem::Version
|
|
108
|
+
version: 1.3.1
|
|
109
|
+
requirements: []
|
|
110
|
+
rubygems_version: 3.4.22
|
|
111
|
+
signing_key:
|
|
112
|
+
specification_version: 4
|
|
113
|
+
summary: Distributed deduplication and locking using Redis Sorted Sets
|
|
114
|
+
test_files: []
|