unique_content_set 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/LICENSE +22 -0
- data/README.md +42 -0
- data/Rakefile +135 -0
- data/lib/unique_content_set.rb +71 -0
- data/test/unique_content_set_test.rb +42 -0
- data/unique_content_set.gemspec +76 -0
- metadata +96 -0
data/LICENSE
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
The MIT License
|
2
|
+
|
3
|
+
Copyright (c) Rick Olson
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
22
|
+
|
data/README.md
ADDED
@@ -0,0 +1,42 @@
|
|
1
|
+
# Unique Content Set
|
2
|
+
|
3
|
+
Check for uniquely created content in a Redis set.
|
4
|
+
|
5
|
+
## INSTALL
|
6
|
+
|
7
|
+
gem install unique_content_set
|
8
|
+
|
9
|
+
## USAGE
|
10
|
+
|
11
|
+
# uses a Redis Set named something like "unique:5:messages"
|
12
|
+
set = UniqueContentSet.new user.id, :messages
|
13
|
+
|
14
|
+
if set.add(@message.body, @message.created_at)
|
15
|
+
# this message body has been posted before, do something!
|
16
|
+
end
|
17
|
+
|
18
|
+
if set.exist?(@message.body)
|
19
|
+
# this message body has been posted before, do something!
|
20
|
+
end
|
21
|
+
|
22
|
+
# Purge old message content.
|
23
|
+
set.delete_before(1.month.ago)
|
24
|
+
|
25
|
+
## Contribute
|
26
|
+
|
27
|
+
If you'd like to hack on UniqueContentSet, start by forking the repo on GitHub:
|
28
|
+
|
29
|
+
`https://github.com/technoweenie/unique_content_set`
|
30
|
+
|
31
|
+
The best way to get your changes merged back into core is as follows:
|
32
|
+
|
33
|
+
* Clone down your fork
|
34
|
+
* Create a thoughtfully named topic branch to contain your change
|
35
|
+
* Hack away
|
36
|
+
* Add tests and make sure everything still passes by running rake
|
37
|
+
* If you are adding new functionality, document it in the README
|
38
|
+
* Do not change the version number, I will do that on my end
|
39
|
+
* If necessary, rebase your commits into logical chunks, without errors
|
40
|
+
* Push the branch up to GitHub
|
41
|
+
* Send a pull request to the `technoweenie/unique_content_set` project.
|
42
|
+
|
data/Rakefile
ADDED
@@ -0,0 +1,135 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'rake'
|
3
|
+
require 'date'
|
4
|
+
|
5
|
+
#############################################################################
|
6
|
+
#
|
7
|
+
# Helper functions
|
8
|
+
#
|
9
|
+
#############################################################################
|
10
|
+
|
11
|
+
def name
|
12
|
+
@name ||= Dir['*.gemspec'].first.split('.').first
|
13
|
+
end
|
14
|
+
|
15
|
+
def version
|
16
|
+
line = File.read("lib/#{name}.rb")[/^\s*VERSION\s*=\s*.*/]
|
17
|
+
line.match(/.*VERSION\s*=\s*['"](.*)['"]/)[1]
|
18
|
+
end
|
19
|
+
|
20
|
+
def date
|
21
|
+
Date.today.to_s
|
22
|
+
end
|
23
|
+
|
24
|
+
def rubyforge_project
|
25
|
+
name
|
26
|
+
end
|
27
|
+
|
28
|
+
def gemspec_file
|
29
|
+
"#{name}.gemspec"
|
30
|
+
end
|
31
|
+
|
32
|
+
def gem_file
|
33
|
+
"#{name}-#{version}.gem"
|
34
|
+
end
|
35
|
+
|
36
|
+
def replace_header(head, header_name)
|
37
|
+
head.sub!(/(\.#{header_name}\s*= ').*'/) { "#{$1}#{send(header_name)}'"}
|
38
|
+
end
|
39
|
+
|
40
|
+
#############################################################################
|
41
|
+
#
|
42
|
+
# Standard tasks
|
43
|
+
#
|
44
|
+
#############################################################################
|
45
|
+
|
46
|
+
task :default => :test
|
47
|
+
|
48
|
+
require 'rake/testtask'
|
49
|
+
Rake::TestTask.new(:test) do |test|
|
50
|
+
test.libs << 'lib' << 'test'
|
51
|
+
test.pattern = 'test/**/*_test.rb'
|
52
|
+
test.verbose = true
|
53
|
+
end
|
54
|
+
|
55
|
+
desc "Open an irb session preloaded with this library"
|
56
|
+
task :console do
|
57
|
+
sh "irb -rubygems -r ./lib/#{name}.rb"
|
58
|
+
end
|
59
|
+
|
60
|
+
#############################################################################
|
61
|
+
#
|
62
|
+
# Custom tasks (add your own tasks here)
|
63
|
+
#
|
64
|
+
#############################################################################
|
65
|
+
|
66
|
+
|
67
|
+
|
68
|
+
#############################################################################
|
69
|
+
#
|
70
|
+
# Packaging tasks
|
71
|
+
#
|
72
|
+
#############################################################################
|
73
|
+
|
74
|
+
desc "Create tag v#{version} and build and push #{gem_file} to Rubygems"
|
75
|
+
task :release => :build do
|
76
|
+
unless `git branch` =~ /^\* master$/
|
77
|
+
puts "You must be on the master branch to release!"
|
78
|
+
exit!
|
79
|
+
end
|
80
|
+
sh "git commit --allow-empty -a -m 'Release #{version}'"
|
81
|
+
sh "git tag v#{version}"
|
82
|
+
sh "git push origin master"
|
83
|
+
sh "git push origin v#{version}"
|
84
|
+
sh "gem push pkg/#{name}-#{version}.gem"
|
85
|
+
end
|
86
|
+
|
87
|
+
desc "Build #{gem_file} into the pkg directory"
|
88
|
+
task :build => :gemspec do
|
89
|
+
sh "mkdir -p pkg"
|
90
|
+
sh "gem build #{gemspec_file}"
|
91
|
+
sh "mv #{gem_file} pkg"
|
92
|
+
end
|
93
|
+
|
94
|
+
desc "Generate #{gemspec_file}"
|
95
|
+
task :gemspec => :validate do
|
96
|
+
# read spec file and split out manifest section
|
97
|
+
spec = File.read(gemspec_file)
|
98
|
+
head, manifest, tail = spec.split(" # = MANIFEST =\n")
|
99
|
+
|
100
|
+
# replace name version and date
|
101
|
+
replace_header(head, :name)
|
102
|
+
replace_header(head, :version)
|
103
|
+
replace_header(head, :date)
|
104
|
+
#comment this out if your rubyforge_project has a different name
|
105
|
+
replace_header(head, :rubyforge_project)
|
106
|
+
|
107
|
+
# determine file list from git ls-files
|
108
|
+
files = `git ls-files`.
|
109
|
+
split("\n").
|
110
|
+
sort.
|
111
|
+
reject { |file| file =~ /^\./ }.
|
112
|
+
reject { |file| file =~ /^(rdoc|pkg)/ }.
|
113
|
+
map { |file| " #{file}" }.
|
114
|
+
join("\n")
|
115
|
+
|
116
|
+
# piece file back together and write
|
117
|
+
manifest = " s.files = %w[\n#{files}\n ]\n"
|
118
|
+
spec = [head, manifest, tail].join(" # = MANIFEST =\n")
|
119
|
+
File.open(gemspec_file, 'w') { |io| io.write(spec) }
|
120
|
+
puts "Updated #{gemspec_file}"
|
121
|
+
end
|
122
|
+
|
123
|
+
desc "Validate #{gemspec_file}"
|
124
|
+
task :validate do
|
125
|
+
libfiles = Dir['lib/*'] - ["lib/#{name}.rb", "lib/#{name}"]
|
126
|
+
unless libfiles.empty?
|
127
|
+
puts "Directory `lib` should only contain a `#{name}.rb` file and `#{name}` dir."
|
128
|
+
exit!
|
129
|
+
end
|
130
|
+
unless Dir['VERSION*'].empty?
|
131
|
+
puts "A `VERSION` file at root level violates Gem best practices."
|
132
|
+
exit!
|
133
|
+
end
|
134
|
+
end
|
135
|
+
|
@@ -0,0 +1,71 @@
|
|
1
|
+
require 'digest/sha1'
|
2
|
+
|
3
|
+
# Stores a set of unique content. This is used to check if new content being
|
4
|
+
# added has been seen already.
|
5
|
+
class UniqueContentSet
|
6
|
+
VERSION = '0.0.1'
|
7
|
+
|
8
|
+
class << self
|
9
|
+
# Sets are stored in Redis.
|
10
|
+
attr_accessor :redis,
|
11
|
+
|
12
|
+
# Prefix all Redis keys with a certain value.
|
13
|
+
:redis_prefix
|
14
|
+
end
|
15
|
+
|
16
|
+
self.redis_prefix = "unique"
|
17
|
+
|
18
|
+
attr_reader :key
|
19
|
+
|
20
|
+
def initialize(*args)
|
21
|
+
@redis = self.class.redis
|
22
|
+
|
23
|
+
# Turn the given args into a redis key, with pieces separated by ':'.
|
24
|
+
args.unshift self.class.redis_prefix
|
25
|
+
args.compact!
|
26
|
+
args.map! { |a| a.to_s }
|
27
|
+
@key = args * ":"
|
28
|
+
end
|
29
|
+
|
30
|
+
# Public: Adds the given content to the current set, scored by the
|
31
|
+
# given time.
|
32
|
+
#
|
33
|
+
# content - String content to add to the set.
|
34
|
+
# time - The current Time the content was created.
|
35
|
+
#
|
36
|
+
# Returns true if this is the first occurence of the content, or false.
|
37
|
+
def add(content, time = Time.now)
|
38
|
+
@redis.zadd(@key, time.to_i, member_from(content))
|
39
|
+
end
|
40
|
+
|
41
|
+
# Public: Looks for the given content in the current set.
|
42
|
+
#
|
43
|
+
# content - String content that is being checked.
|
44
|
+
#
|
45
|
+
# Returns true if the content is a member of the set, or false.
|
46
|
+
def exist?(content)
|
47
|
+
!!@redis.zscore(@key, member_from(content))
|
48
|
+
end
|
49
|
+
|
50
|
+
# Public: Removes content posted before the given time.
|
51
|
+
#
|
52
|
+
# time - The latest Time that should be purged from the set.
|
53
|
+
#
|
54
|
+
# Returns a Fixnum of the number of removed entries.
|
55
|
+
def delete_before(time)
|
56
|
+
@redis.zremrangebyscore(@key, 0, time.to_i+1)
|
57
|
+
end
|
58
|
+
|
59
|
+
private
|
60
|
+
# Encodes the content into a value that can be used to quickly check
|
61
|
+
# uniqueness in the set.
|
62
|
+
#
|
63
|
+
# content - The String content.
|
64
|
+
#
|
65
|
+
# Returns a String of the the size of the content, plus a SHA of the content,
|
66
|
+
# separated by a colon.
|
67
|
+
def member_from(content)
|
68
|
+
content = content.to_s
|
69
|
+
'%d:%s' % [content.size, Digest::SHA1.hexdigest(content)]
|
70
|
+
end
|
71
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
require File.expand_path("../../lib/unique_content_set", __FILE__)
|
2
|
+
require 'test/unit'
|
3
|
+
require 'rubygems'
|
4
|
+
require 'redis'
|
5
|
+
|
6
|
+
UniqueContentSet.redis = Redis.new
|
7
|
+
UniqueContentSet.redis.select ENV['REDIS_DB'] || 7
|
8
|
+
|
9
|
+
class UniqueContentSetTest < Test::Unit::TestCase
|
10
|
+
def setup
|
11
|
+
UniqueContentSet.redis.flushdb
|
12
|
+
@set = UniqueContentSet.new :abc
|
13
|
+
@set.add 'abc', 1
|
14
|
+
end
|
15
|
+
|
16
|
+
def test_adding_unique_content
|
17
|
+
assert_equal true, @set.add('def')
|
18
|
+
end
|
19
|
+
|
20
|
+
def test_adding_repeated_content
|
21
|
+
assert_equal false, @set.add('abc')
|
22
|
+
end
|
23
|
+
|
24
|
+
def test_checking_content_existence
|
25
|
+
assert @set.exist?('abc')
|
26
|
+
assert !@set.exist?('def')
|
27
|
+
end
|
28
|
+
|
29
|
+
def test_removing_old_content
|
30
|
+
@set.add 'def', 2
|
31
|
+
@set.add 'ghi', 3
|
32
|
+
assert @set.exist?('abc')
|
33
|
+
assert @set.exist?('def')
|
34
|
+
assert @set.exist?('ghi')
|
35
|
+
|
36
|
+
@set.delete_before(1)
|
37
|
+
|
38
|
+
assert !@set.exist?('abc')
|
39
|
+
assert !@set.exist?('def')
|
40
|
+
assert @set.exist?('ghi')
|
41
|
+
end
|
42
|
+
end
|
@@ -0,0 +1,76 @@
|
|
1
|
+
## This is the rakegem gemspec template. Make sure you read and understand
|
2
|
+
## all of the comments. Some sections require modification, and others can
|
3
|
+
## be deleted if you don't need them. Once you understand the contents of
|
4
|
+
## this file, feel free to delete any comments that begin with two hash marks.
|
5
|
+
## You can find comprehensive Gem::Specification documentation, at
|
6
|
+
## http://docs.rubygems.org/read/chapter/20
|
7
|
+
Gem::Specification.new do |s|
|
8
|
+
s.specification_version = 2 if s.respond_to? :specification_version=
|
9
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
10
|
+
s.rubygems_version = '1.3.5'
|
11
|
+
|
12
|
+
## Leave these as is they will be modified for you by the rake gemspec task.
|
13
|
+
## If your rubyforge_project name is different, then edit it and comment out
|
14
|
+
## the sub! line in the Rakefile
|
15
|
+
s.name = 'unique_content_set'
|
16
|
+
s.version = '0.0.1'
|
17
|
+
s.date = '2011-03-28'
|
18
|
+
s.rubyforge_project = 'unique_content_set'
|
19
|
+
|
20
|
+
## Make sure your summary is short. The description may be as long
|
21
|
+
## as you like.
|
22
|
+
s.summary = "Short description used in Gem listings."
|
23
|
+
s.description = "Long description. Maybe copied from the README."
|
24
|
+
|
25
|
+
## List the primary authors. If there are a bunch of authors, it's probably
|
26
|
+
## better to set the email to an email list or something. If you don't have
|
27
|
+
## a custom homepage, consider using your GitHub URL or the like.
|
28
|
+
s.authors = ["Rick Olson"]
|
29
|
+
s.email = 'technoweenie@gmail.com'
|
30
|
+
s.homepage = 'http://github.com/technoweenie/unique_content_set'
|
31
|
+
|
32
|
+
## This gets added to the $LOAD_PATH so that 'lib/NAME.rb' can be required as
|
33
|
+
## require 'NAME.rb' or'/lib/NAME/file.rb' can be as require 'NAME/file.rb'
|
34
|
+
s.require_paths = %w[lib]
|
35
|
+
|
36
|
+
## This sections is only necessary if you have C extensions.
|
37
|
+
#s.require_paths << 'ext'
|
38
|
+
#s.extensions = %w[ext/extconf.rb]
|
39
|
+
|
40
|
+
## If your gem includes any executables, list them here.
|
41
|
+
#s.executables = ["name"]
|
42
|
+
#s.default_executable = 'name'
|
43
|
+
|
44
|
+
## Specify any RDoc options here. You'll want to add your README and
|
45
|
+
## LICENSE files to the extra_rdoc_files list.
|
46
|
+
s.rdoc_options = ["--charset=UTF-8"]
|
47
|
+
s.extra_rdoc_files = %w[README.md LICENSE]
|
48
|
+
|
49
|
+
## List your runtime dependencies here. Runtime dependencies are those
|
50
|
+
## that are needed for an end user to actually USE your code.
|
51
|
+
s.add_dependency('redis', ["~> 2.1.0", "~> 2.0.0"])
|
52
|
+
|
53
|
+
## List your development dependencies here. Development dependencies are
|
54
|
+
## those that are only needed during development
|
55
|
+
#s.add_development_dependency('DEVDEPNAME', [">= 1.1.0", "< 2.0.0"])
|
56
|
+
|
57
|
+
## Leave this section as-is. It will be automatically generated from the
|
58
|
+
## contents of your Git repository via the gemspec task. DO NOT REMOVE
|
59
|
+
## THE MANIFEST COMMENTS, they are used as delimiters by the task.
|
60
|
+
# = MANIFEST =
|
61
|
+
s.files = %w[
|
62
|
+
LICENSE
|
63
|
+
README.md
|
64
|
+
Rakefile
|
65
|
+
lib/unique_content_set.rb
|
66
|
+
test/unique_content_set_test.rb
|
67
|
+
unique_content_set.gemspec
|
68
|
+
]
|
69
|
+
# = MANIFEST =
|
70
|
+
|
71
|
+
## Test files will be grabbed from the file list. Make sure the path glob
|
72
|
+
## matches what you actually use.
|
73
|
+
s.test_files = s.files.select { |path| path =~ /^test\/.*_test\.rb/ }
|
74
|
+
end
|
75
|
+
|
76
|
+
|
metadata
ADDED
@@ -0,0 +1,96 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: unique_content_set
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
hash: 29
|
5
|
+
prerelease: false
|
6
|
+
segments:
|
7
|
+
- 0
|
8
|
+
- 0
|
9
|
+
- 1
|
10
|
+
version: 0.0.1
|
11
|
+
platform: ruby
|
12
|
+
authors:
|
13
|
+
- Rick Olson
|
14
|
+
autorequire:
|
15
|
+
bindir: bin
|
16
|
+
cert_chain: []
|
17
|
+
|
18
|
+
date: 2011-03-28 00:00:00 -07:00
|
19
|
+
default_executable:
|
20
|
+
dependencies:
|
21
|
+
- !ruby/object:Gem::Dependency
|
22
|
+
name: redis
|
23
|
+
prerelease: false
|
24
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ~>
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
hash: 11
|
30
|
+
segments:
|
31
|
+
- 2
|
32
|
+
- 1
|
33
|
+
- 0
|
34
|
+
version: 2.1.0
|
35
|
+
- - ~>
|
36
|
+
- !ruby/object:Gem::Version
|
37
|
+
hash: 15
|
38
|
+
segments:
|
39
|
+
- 2
|
40
|
+
- 0
|
41
|
+
- 0
|
42
|
+
version: 2.0.0
|
43
|
+
type: :runtime
|
44
|
+
version_requirements: *id001
|
45
|
+
description: Long description. Maybe copied from the README.
|
46
|
+
email: technoweenie@gmail.com
|
47
|
+
executables: []
|
48
|
+
|
49
|
+
extensions: []
|
50
|
+
|
51
|
+
extra_rdoc_files:
|
52
|
+
- README.md
|
53
|
+
- LICENSE
|
54
|
+
files:
|
55
|
+
- LICENSE
|
56
|
+
- README.md
|
57
|
+
- Rakefile
|
58
|
+
- lib/unique_content_set.rb
|
59
|
+
- test/unique_content_set_test.rb
|
60
|
+
- unique_content_set.gemspec
|
61
|
+
has_rdoc: true
|
62
|
+
homepage: http://github.com/technoweenie/unique_content_set
|
63
|
+
licenses: []
|
64
|
+
|
65
|
+
post_install_message:
|
66
|
+
rdoc_options:
|
67
|
+
- --charset=UTF-8
|
68
|
+
require_paths:
|
69
|
+
- lib
|
70
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
71
|
+
none: false
|
72
|
+
requirements:
|
73
|
+
- - ">="
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
hash: 3
|
76
|
+
segments:
|
77
|
+
- 0
|
78
|
+
version: "0"
|
79
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
80
|
+
none: false
|
81
|
+
requirements:
|
82
|
+
- - ">="
|
83
|
+
- !ruby/object:Gem::Version
|
84
|
+
hash: 3
|
85
|
+
segments:
|
86
|
+
- 0
|
87
|
+
version: "0"
|
88
|
+
requirements: []
|
89
|
+
|
90
|
+
rubyforge_project: unique_content_set
|
91
|
+
rubygems_version: 1.3.7
|
92
|
+
signing_key:
|
93
|
+
specification_version: 2
|
94
|
+
summary: Short description used in Gem listings.
|
95
|
+
test_files:
|
96
|
+
- test/unique_content_set_test.rb
|