unique_content_set 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/LICENSE +22 -0
- data/README.md +42 -0
- data/Rakefile +135 -0
- data/lib/unique_content_set.rb +71 -0
- data/test/unique_content_set_test.rb +42 -0
- data/unique_content_set.gemspec +76 -0
- metadata +96 -0
data/LICENSE
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
The MIT License
|
2
|
+
|
3
|
+
Copyright (c) Rick Olson
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
22
|
+
|
data/README.md
ADDED
@@ -0,0 +1,42 @@
|
|
1
|
+
# Unique Content Set
|
2
|
+
|
3
|
+
Check for uniquely created content in a Redis set.
|
4
|
+
|
5
|
+
## INSTALL
|
6
|
+
|
7
|
+
gem install unique_content_set
|
8
|
+
|
9
|
+
## USAGE
|
10
|
+
|
11
|
+
# uses a Redis Set named something like "unique:5:messages"
|
12
|
+
set = UniqueContentSet.new user.id, :messages
|
13
|
+
|
14
|
+
if set.add(@message.body, @message.created_at)
|
15
|
+
# this message body has been posted before, do something!
|
16
|
+
end
|
17
|
+
|
18
|
+
if set.exist?(@message.body)
|
19
|
+
# this message body has been posted before, do something!
|
20
|
+
end
|
21
|
+
|
22
|
+
# Purge old message content.
|
23
|
+
set.delete_before(1.month.ago)
|
24
|
+
|
25
|
+
## Contribute
|
26
|
+
|
27
|
+
If you'd like to hack on UniqueContentSet, start by forking the repo on GitHub:
|
28
|
+
|
29
|
+
`https://github.com/technoweenie/unique_content_set`
|
30
|
+
|
31
|
+
The best way to get your changes merged back into core is as follows:
|
32
|
+
|
33
|
+
* Clone down your fork
|
34
|
+
* Create a thoughtfully named topic branch to contain your change
|
35
|
+
* Hack away
|
36
|
+
* Add tests and make sure everything still passes by running rake
|
37
|
+
* If you are adding new functionality, document it in the README
|
38
|
+
* Do not change the version number, I will do that on my end
|
39
|
+
* If necessary, rebase your commits into logical chunks, without errors
|
40
|
+
* Push the branch up to GitHub
|
41
|
+
* Send a pull request to the `technoweenie/unique_content_set` project.
|
42
|
+
|
data/Rakefile
ADDED
@@ -0,0 +1,135 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'rake'
|
3
|
+
require 'date'
|
4
|
+
|
5
|
+
#############################################################################
|
6
|
+
#
|
7
|
+
# Helper functions
|
8
|
+
#
|
9
|
+
#############################################################################
|
10
|
+
|
11
|
+
def name
|
12
|
+
@name ||= Dir['*.gemspec'].first.split('.').first
|
13
|
+
end
|
14
|
+
|
15
|
+
def version
|
16
|
+
line = File.read("lib/#{name}.rb")[/^\s*VERSION\s*=\s*.*/]
|
17
|
+
line.match(/.*VERSION\s*=\s*['"](.*)['"]/)[1]
|
18
|
+
end
|
19
|
+
|
20
|
+
def date
|
21
|
+
Date.today.to_s
|
22
|
+
end
|
23
|
+
|
24
|
+
def rubyforge_project
|
25
|
+
name
|
26
|
+
end
|
27
|
+
|
28
|
+
def gemspec_file
|
29
|
+
"#{name}.gemspec"
|
30
|
+
end
|
31
|
+
|
32
|
+
def gem_file
|
33
|
+
"#{name}-#{version}.gem"
|
34
|
+
end
|
35
|
+
|
36
|
+
def replace_header(head, header_name)
|
37
|
+
head.sub!(/(\.#{header_name}\s*= ').*'/) { "#{$1}#{send(header_name)}'"}
|
38
|
+
end
|
39
|
+
|
40
|
+
#############################################################################
|
41
|
+
#
|
42
|
+
# Standard tasks
|
43
|
+
#
|
44
|
+
#############################################################################
|
45
|
+
|
46
|
+
task :default => :test
|
47
|
+
|
48
|
+
require 'rake/testtask'
|
49
|
+
Rake::TestTask.new(:test) do |test|
|
50
|
+
test.libs << 'lib' << 'test'
|
51
|
+
test.pattern = 'test/**/*_test.rb'
|
52
|
+
test.verbose = true
|
53
|
+
end
|
54
|
+
|
55
|
+
desc "Open an irb session preloaded with this library"
|
56
|
+
task :console do
|
57
|
+
sh "irb -rubygems -r ./lib/#{name}.rb"
|
58
|
+
end
|
59
|
+
|
60
|
+
#############################################################################
|
61
|
+
#
|
62
|
+
# Custom tasks (add your own tasks here)
|
63
|
+
#
|
64
|
+
#############################################################################
|
65
|
+
|
66
|
+
|
67
|
+
|
68
|
+
#############################################################################
|
69
|
+
#
|
70
|
+
# Packaging tasks
|
71
|
+
#
|
72
|
+
#############################################################################
|
73
|
+
|
74
|
+
desc "Create tag v#{version} and build and push #{gem_file} to Rubygems"
|
75
|
+
task :release => :build do
|
76
|
+
unless `git branch` =~ /^\* master$/
|
77
|
+
puts "You must be on the master branch to release!"
|
78
|
+
exit!
|
79
|
+
end
|
80
|
+
sh "git commit --allow-empty -a -m 'Release #{version}'"
|
81
|
+
sh "git tag v#{version}"
|
82
|
+
sh "git push origin master"
|
83
|
+
sh "git push origin v#{version}"
|
84
|
+
sh "gem push pkg/#{name}-#{version}.gem"
|
85
|
+
end
|
86
|
+
|
87
|
+
desc "Build #{gem_file} into the pkg directory"
|
88
|
+
task :build => :gemspec do
|
89
|
+
sh "mkdir -p pkg"
|
90
|
+
sh "gem build #{gemspec_file}"
|
91
|
+
sh "mv #{gem_file} pkg"
|
92
|
+
end
|
93
|
+
|
94
|
+
desc "Generate #{gemspec_file}"
|
95
|
+
task :gemspec => :validate do
|
96
|
+
# read spec file and split out manifest section
|
97
|
+
spec = File.read(gemspec_file)
|
98
|
+
head, manifest, tail = spec.split(" # = MANIFEST =\n")
|
99
|
+
|
100
|
+
# replace name version and date
|
101
|
+
replace_header(head, :name)
|
102
|
+
replace_header(head, :version)
|
103
|
+
replace_header(head, :date)
|
104
|
+
#comment this out if your rubyforge_project has a different name
|
105
|
+
replace_header(head, :rubyforge_project)
|
106
|
+
|
107
|
+
# determine file list from git ls-files
|
108
|
+
files = `git ls-files`.
|
109
|
+
split("\n").
|
110
|
+
sort.
|
111
|
+
reject { |file| file =~ /^\./ }.
|
112
|
+
reject { |file| file =~ /^(rdoc|pkg)/ }.
|
113
|
+
map { |file| " #{file}" }.
|
114
|
+
join("\n")
|
115
|
+
|
116
|
+
# piece file back together and write
|
117
|
+
manifest = " s.files = %w[\n#{files}\n ]\n"
|
118
|
+
spec = [head, manifest, tail].join(" # = MANIFEST =\n")
|
119
|
+
File.open(gemspec_file, 'w') { |io| io.write(spec) }
|
120
|
+
puts "Updated #{gemspec_file}"
|
121
|
+
end
|
122
|
+
|
123
|
+
desc "Validate #{gemspec_file}"
|
124
|
+
task :validate do
|
125
|
+
libfiles = Dir['lib/*'] - ["lib/#{name}.rb", "lib/#{name}"]
|
126
|
+
unless libfiles.empty?
|
127
|
+
puts "Directory `lib` should only contain a `#{name}.rb` file and `#{name}` dir."
|
128
|
+
exit!
|
129
|
+
end
|
130
|
+
unless Dir['VERSION*'].empty?
|
131
|
+
puts "A `VERSION` file at root level violates Gem best practices."
|
132
|
+
exit!
|
133
|
+
end
|
134
|
+
end
|
135
|
+
|
@@ -0,0 +1,71 @@
|
|
1
|
+
require 'digest/sha1'
|
2
|
+
|
3
|
+
# Stores a set of unique content. This is used to check if new content being
|
4
|
+
# added has been seen already.
|
5
|
+
class UniqueContentSet
|
6
|
+
VERSION = '0.0.1'
|
7
|
+
|
8
|
+
class << self
|
9
|
+
# Sets are stored in Redis.
|
10
|
+
attr_accessor :redis,
|
11
|
+
|
12
|
+
# Prefix all Redis keys with a certain value.
|
13
|
+
:redis_prefix
|
14
|
+
end
|
15
|
+
|
16
|
+
self.redis_prefix = "unique"
|
17
|
+
|
18
|
+
attr_reader :key
|
19
|
+
|
20
|
+
def initialize(*args)
|
21
|
+
@redis = self.class.redis
|
22
|
+
|
23
|
+
# Turn the given args into a redis key, with pieces separated by ':'.
|
24
|
+
args.unshift self.class.redis_prefix
|
25
|
+
args.compact!
|
26
|
+
args.map! { |a| a.to_s }
|
27
|
+
@key = args * ":"
|
28
|
+
end
|
29
|
+
|
30
|
+
# Public: Adds the given content to the current set, scored by the
|
31
|
+
# given time.
|
32
|
+
#
|
33
|
+
# content - String content to add to the set.
|
34
|
+
# time - The current Time the content was created.
|
35
|
+
#
|
36
|
+
# Returns true if this is the first occurence of the content, or false.
|
37
|
+
def add(content, time = Time.now)
|
38
|
+
@redis.zadd(@key, time.to_i, member_from(content))
|
39
|
+
end
|
40
|
+
|
41
|
+
# Public: Looks for the given content in the current set.
|
42
|
+
#
|
43
|
+
# content - String content that is being checked.
|
44
|
+
#
|
45
|
+
# Returns true if the content is a member of the set, or false.
|
46
|
+
def exist?(content)
|
47
|
+
!!@redis.zscore(@key, member_from(content))
|
48
|
+
end
|
49
|
+
|
50
|
+
# Public: Removes content posted before the given time.
|
51
|
+
#
|
52
|
+
# time - The latest Time that should be purged from the set.
|
53
|
+
#
|
54
|
+
# Returns a Fixnum of the number of removed entries.
|
55
|
+
def delete_before(time)
|
56
|
+
@redis.zremrangebyscore(@key, 0, time.to_i+1)
|
57
|
+
end
|
58
|
+
|
59
|
+
private
|
60
|
+
# Encodes the content into a value that can be used to quickly check
|
61
|
+
# uniqueness in the set.
|
62
|
+
#
|
63
|
+
# content - The String content.
|
64
|
+
#
|
65
|
+
# Returns a String of the the size of the content, plus a SHA of the content,
|
66
|
+
# separated by a colon.
|
67
|
+
def member_from(content)
|
68
|
+
content = content.to_s
|
69
|
+
'%d:%s' % [content.size, Digest::SHA1.hexdigest(content)]
|
70
|
+
end
|
71
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
require File.expand_path("../../lib/unique_content_set", __FILE__)
|
2
|
+
require 'test/unit'
|
3
|
+
require 'rubygems'
|
4
|
+
require 'redis'
|
5
|
+
|
6
|
+
UniqueContentSet.redis = Redis.new
|
7
|
+
UniqueContentSet.redis.select ENV['REDIS_DB'] || 7
|
8
|
+
|
9
|
+
class UniqueContentSetTest < Test::Unit::TestCase
|
10
|
+
def setup
|
11
|
+
UniqueContentSet.redis.flushdb
|
12
|
+
@set = UniqueContentSet.new :abc
|
13
|
+
@set.add 'abc', 1
|
14
|
+
end
|
15
|
+
|
16
|
+
def test_adding_unique_content
|
17
|
+
assert_equal true, @set.add('def')
|
18
|
+
end
|
19
|
+
|
20
|
+
def test_adding_repeated_content
|
21
|
+
assert_equal false, @set.add('abc')
|
22
|
+
end
|
23
|
+
|
24
|
+
def test_checking_content_existence
|
25
|
+
assert @set.exist?('abc')
|
26
|
+
assert !@set.exist?('def')
|
27
|
+
end
|
28
|
+
|
29
|
+
def test_removing_old_content
|
30
|
+
@set.add 'def', 2
|
31
|
+
@set.add 'ghi', 3
|
32
|
+
assert @set.exist?('abc')
|
33
|
+
assert @set.exist?('def')
|
34
|
+
assert @set.exist?('ghi')
|
35
|
+
|
36
|
+
@set.delete_before(1)
|
37
|
+
|
38
|
+
assert !@set.exist?('abc')
|
39
|
+
assert !@set.exist?('def')
|
40
|
+
assert @set.exist?('ghi')
|
41
|
+
end
|
42
|
+
end
|
@@ -0,0 +1,76 @@
|
|
1
|
+
## This is the rakegem gemspec template. Make sure you read and understand
|
2
|
+
## all of the comments. Some sections require modification, and others can
|
3
|
+
## be deleted if you don't need them. Once you understand the contents of
|
4
|
+
## this file, feel free to delete any comments that begin with two hash marks.
|
5
|
+
## You can find comprehensive Gem::Specification documentation, at
|
6
|
+
## http://docs.rubygems.org/read/chapter/20
|
7
|
+
Gem::Specification.new do |s|
|
8
|
+
s.specification_version = 2 if s.respond_to? :specification_version=
|
9
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
10
|
+
s.rubygems_version = '1.3.5'
|
11
|
+
|
12
|
+
## Leave these as is they will be modified for you by the rake gemspec task.
|
13
|
+
## If your rubyforge_project name is different, then edit it and comment out
|
14
|
+
## the sub! line in the Rakefile
|
15
|
+
s.name = 'unique_content_set'
|
16
|
+
s.version = '0.0.1'
|
17
|
+
s.date = '2011-03-28'
|
18
|
+
s.rubyforge_project = 'unique_content_set'
|
19
|
+
|
20
|
+
## Make sure your summary is short. The description may be as long
|
21
|
+
## as you like.
|
22
|
+
s.summary = "Short description used in Gem listings."
|
23
|
+
s.description = "Long description. Maybe copied from the README."
|
24
|
+
|
25
|
+
## List the primary authors. If there are a bunch of authors, it's probably
|
26
|
+
## better to set the email to an email list or something. If you don't have
|
27
|
+
## a custom homepage, consider using your GitHub URL or the like.
|
28
|
+
s.authors = ["Rick Olson"]
|
29
|
+
s.email = 'technoweenie@gmail.com'
|
30
|
+
s.homepage = 'http://github.com/technoweenie/unique_content_set'
|
31
|
+
|
32
|
+
## This gets added to the $LOAD_PATH so that 'lib/NAME.rb' can be required as
|
33
|
+
## require 'NAME.rb' or'/lib/NAME/file.rb' can be as require 'NAME/file.rb'
|
34
|
+
s.require_paths = %w[lib]
|
35
|
+
|
36
|
+
## This sections is only necessary if you have C extensions.
|
37
|
+
#s.require_paths << 'ext'
|
38
|
+
#s.extensions = %w[ext/extconf.rb]
|
39
|
+
|
40
|
+
## If your gem includes any executables, list them here.
|
41
|
+
#s.executables = ["name"]
|
42
|
+
#s.default_executable = 'name'
|
43
|
+
|
44
|
+
## Specify any RDoc options here. You'll want to add your README and
|
45
|
+
## LICENSE files to the extra_rdoc_files list.
|
46
|
+
s.rdoc_options = ["--charset=UTF-8"]
|
47
|
+
s.extra_rdoc_files = %w[README.md LICENSE]
|
48
|
+
|
49
|
+
## List your runtime dependencies here. Runtime dependencies are those
|
50
|
+
## that are needed for an end user to actually USE your code.
|
51
|
+
s.add_dependency('redis', ["~> 2.1.0", "~> 2.0.0"])
|
52
|
+
|
53
|
+
## List your development dependencies here. Development dependencies are
|
54
|
+
## those that are only needed during development
|
55
|
+
#s.add_development_dependency('DEVDEPNAME', [">= 1.1.0", "< 2.0.0"])
|
56
|
+
|
57
|
+
## Leave this section as-is. It will be automatically generated from the
|
58
|
+
## contents of your Git repository via the gemspec task. DO NOT REMOVE
|
59
|
+
## THE MANIFEST COMMENTS, they are used as delimiters by the task.
|
60
|
+
# = MANIFEST =
|
61
|
+
s.files = %w[
|
62
|
+
LICENSE
|
63
|
+
README.md
|
64
|
+
Rakefile
|
65
|
+
lib/unique_content_set.rb
|
66
|
+
test/unique_content_set_test.rb
|
67
|
+
unique_content_set.gemspec
|
68
|
+
]
|
69
|
+
# = MANIFEST =
|
70
|
+
|
71
|
+
## Test files will be grabbed from the file list. Make sure the path glob
|
72
|
+
## matches what you actually use.
|
73
|
+
s.test_files = s.files.select { |path| path =~ /^test\/.*_test\.rb/ }
|
74
|
+
end
|
75
|
+
|
76
|
+
|
metadata
ADDED
@@ -0,0 +1,96 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: unique_content_set
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
hash: 29
|
5
|
+
prerelease: false
|
6
|
+
segments:
|
7
|
+
- 0
|
8
|
+
- 0
|
9
|
+
- 1
|
10
|
+
version: 0.0.1
|
11
|
+
platform: ruby
|
12
|
+
authors:
|
13
|
+
- Rick Olson
|
14
|
+
autorequire:
|
15
|
+
bindir: bin
|
16
|
+
cert_chain: []
|
17
|
+
|
18
|
+
date: 2011-03-28 00:00:00 -07:00
|
19
|
+
default_executable:
|
20
|
+
dependencies:
|
21
|
+
- !ruby/object:Gem::Dependency
|
22
|
+
name: redis
|
23
|
+
prerelease: false
|
24
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ~>
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
hash: 11
|
30
|
+
segments:
|
31
|
+
- 2
|
32
|
+
- 1
|
33
|
+
- 0
|
34
|
+
version: 2.1.0
|
35
|
+
- - ~>
|
36
|
+
- !ruby/object:Gem::Version
|
37
|
+
hash: 15
|
38
|
+
segments:
|
39
|
+
- 2
|
40
|
+
- 0
|
41
|
+
- 0
|
42
|
+
version: 2.0.0
|
43
|
+
type: :runtime
|
44
|
+
version_requirements: *id001
|
45
|
+
description: Long description. Maybe copied from the README.
|
46
|
+
email: technoweenie@gmail.com
|
47
|
+
executables: []
|
48
|
+
|
49
|
+
extensions: []
|
50
|
+
|
51
|
+
extra_rdoc_files:
|
52
|
+
- README.md
|
53
|
+
- LICENSE
|
54
|
+
files:
|
55
|
+
- LICENSE
|
56
|
+
- README.md
|
57
|
+
- Rakefile
|
58
|
+
- lib/unique_content_set.rb
|
59
|
+
- test/unique_content_set_test.rb
|
60
|
+
- unique_content_set.gemspec
|
61
|
+
has_rdoc: true
|
62
|
+
homepage: http://github.com/technoweenie/unique_content_set
|
63
|
+
licenses: []
|
64
|
+
|
65
|
+
post_install_message:
|
66
|
+
rdoc_options:
|
67
|
+
- --charset=UTF-8
|
68
|
+
require_paths:
|
69
|
+
- lib
|
70
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
71
|
+
none: false
|
72
|
+
requirements:
|
73
|
+
- - ">="
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
hash: 3
|
76
|
+
segments:
|
77
|
+
- 0
|
78
|
+
version: "0"
|
79
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
80
|
+
none: false
|
81
|
+
requirements:
|
82
|
+
- - ">="
|
83
|
+
- !ruby/object:Gem::Version
|
84
|
+
hash: 3
|
85
|
+
segments:
|
86
|
+
- 0
|
87
|
+
version: "0"
|
88
|
+
requirements: []
|
89
|
+
|
90
|
+
rubyforge_project: unique_content_set
|
91
|
+
rubygems_version: 1.3.7
|
92
|
+
signing_key:
|
93
|
+
specification_version: 2
|
94
|
+
summary: Short description used in Gem listings.
|
95
|
+
test_files:
|
96
|
+
- test/unique_content_set_test.rb
|