canonicurl 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +4 -0
- data/Gemfile +4 -0
- data/Rakefile +2 -0
- data/canonicurl.gemspec +23 -0
- data/lib/canonicurl/cache.rb +123 -0
- data/lib/canonicurl/version.rb +3 -0
- data/lib/canonicurl.rb +5 -0
- metadata +105 -0
data/.gitignore
ADDED
data/Gemfile
ADDED
data/Rakefile
ADDED
data/canonicurl.gemspec
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
$:.push File.expand_path("../lib", __FILE__)
|
3
|
+
require "canonicurl/version"
|
4
|
+
|
5
|
+
Gem::Specification.new do |s|
|
6
|
+
s.name = "canonicurl"
|
7
|
+
s.version = Canonicurl::VERSION
|
8
|
+
s.platform = Gem::Platform::RUBY
|
9
|
+
s.authors = ["David Dai"]
|
10
|
+
s.email = ["david.github@gmail.com"]
|
11
|
+
s.homepage = "https://github.com/newtonapple/canonicurl"
|
12
|
+
s.summary = %q{A Canonical URL cache using Redis}
|
13
|
+
s.description = %q{}
|
14
|
+
|
15
|
+
s.rubyforge_project = "canonicurl"
|
16
|
+
|
17
|
+
s.files = `git ls-files`.split("\n")
|
18
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
19
|
+
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
20
|
+
s.require_paths = ["lib"]
|
21
|
+
s.add_dependency 'redis', '>= 2.2.0'
|
22
|
+
s.add_dependency 'em-http-request', '>= 1.0.0.beta.3'
|
23
|
+
end
|
@@ -0,0 +1,123 @@
|
|
1
|
+
require 'digest/md5'
|
2
|
+
require 'redis'
|
3
|
+
require 'em-http'
|
4
|
+
|
5
|
+
module Canonicurl
|
6
|
+
class Cache
|
7
|
+
CANONICAL = 'C'
|
8
|
+
ERROR = 'E'
|
9
|
+
LOCKED = 'L'
|
10
|
+
RESOLVING = 'R'
|
11
|
+
|
12
|
+
TTL = 60 * 60 * 24 * 90 # 90 days ~ 3 months
|
13
|
+
REDIRECTS = 5
|
14
|
+
CONNECTION_TIMEOUT = 5
|
15
|
+
KEY_PREFIX = 'curl:'
|
16
|
+
|
17
|
+
attr_accessor :db, :ttl, :timeout, :redirects
|
18
|
+
attr_reader :key_prefix
|
19
|
+
|
20
|
+
def self.url(code_or_url)
|
21
|
+
code_or_url && code_or_url.size > 1 ? code_or_url : url
|
22
|
+
end
|
23
|
+
|
24
|
+
|
25
|
+
def initialize(options={})
|
26
|
+
@db = options[:db] || Redis.connect
|
27
|
+
@ttl = options[:ttl] || TTL
|
28
|
+
@timeout = options[:timeout] || CONNECTION_TIMEOUT
|
29
|
+
@redirects = options[:redirects] || REDIRECTS
|
30
|
+
@key_prefix = options[:key_prefix] || KEY_PREFIX
|
31
|
+
end
|
32
|
+
|
33
|
+
|
34
|
+
def get(url)
|
35
|
+
@db.get key(url)
|
36
|
+
end
|
37
|
+
|
38
|
+
|
39
|
+
def fetch(url, callbacks={})
|
40
|
+
k = key(url)
|
41
|
+
@db.setnx(k, LOCKED) # lock it if key doesn't exist
|
42
|
+
|
43
|
+
result = @db.get(k)
|
44
|
+
if !result.nil? && result.size > 1
|
45
|
+
return result
|
46
|
+
|
47
|
+
end
|
48
|
+
|
49
|
+
case result
|
50
|
+
when CANONICAL
|
51
|
+
yield url
|
52
|
+
when LOCKED
|
53
|
+
resolve(url, k, callbacks)
|
54
|
+
RESOLVING
|
55
|
+
else
|
56
|
+
result
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
|
61
|
+
def set(url, canonical_url, url_key=nil)
|
62
|
+
url_key = url_key || key(url)
|
63
|
+
if url == canonical_url
|
64
|
+
@db.setex(url_key, @ttl, CANONICAL)
|
65
|
+
else
|
66
|
+
@db.setex(url_key, @ttl, canonical_url)
|
67
|
+
@db.setex(key(canonical_url), @ttl, CANONICAL) # preemptively set the canonical_url
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
|
72
|
+
def key(url)
|
73
|
+
"#{@key_prefix}#{Digest::MD5.hexdigest(url)}"
|
74
|
+
end
|
75
|
+
|
76
|
+
|
77
|
+
private
|
78
|
+
|
79
|
+
def resolve(url, url_key, callbacks)
|
80
|
+
em_already_running = true
|
81
|
+
@db.set(url_key, RESOLVING)
|
82
|
+
em do |running|
|
83
|
+
em_already_running = running
|
84
|
+
http = EM::HttpRequest.new(url,
|
85
|
+
:connection_timeout => @timeout,
|
86
|
+
:inactivity_timeout => @timeout * 2).get(:redirects => @redirects)
|
87
|
+
http.callback {
|
88
|
+
status = http.response_header.status.to_i
|
89
|
+
case status
|
90
|
+
when 200...300
|
91
|
+
canonical_url = http.last_effective_url.to_s
|
92
|
+
set url, canonical_url, url_key
|
93
|
+
callbacks[:resolved].call(canonical_url, http) if callbacks[:resolved]
|
94
|
+
else
|
95
|
+
@db.set url_key, (status / 100).to_s
|
96
|
+
callbacks[:failed].call(http) if callbacks[:failed]
|
97
|
+
end
|
98
|
+
EM.stop unless em_already_running
|
99
|
+
}
|
100
|
+
http.errback {
|
101
|
+
@db.set(url_key, ERROR)
|
102
|
+
callbacks[:error].call(http) if callbacks[:error]
|
103
|
+
EM.stop unless em_already_running
|
104
|
+
}
|
105
|
+
end
|
106
|
+
rescue Exception => e
|
107
|
+
@db.set(url_key, ERROR)
|
108
|
+
callbacks[:exception].call(e) if callbacks[:exception]
|
109
|
+
EM.stop unless em_already_running
|
110
|
+
end
|
111
|
+
|
112
|
+
|
113
|
+
def em
|
114
|
+
if EM.reactor_running?
|
115
|
+
yield true
|
116
|
+
else
|
117
|
+
EM.run do
|
118
|
+
yield false
|
119
|
+
end
|
120
|
+
end
|
121
|
+
end
|
122
|
+
end
|
123
|
+
end
|
data/lib/canonicurl.rb
ADDED
metadata
ADDED
@@ -0,0 +1,105 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: canonicurl
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
hash: 27
|
5
|
+
prerelease:
|
6
|
+
segments:
|
7
|
+
- 0
|
8
|
+
- 0
|
9
|
+
- 2
|
10
|
+
version: 0.0.2
|
11
|
+
platform: ruby
|
12
|
+
authors:
|
13
|
+
- David Dai
|
14
|
+
autorequire:
|
15
|
+
bindir: bin
|
16
|
+
cert_chain: []
|
17
|
+
|
18
|
+
date: 2011-04-06 00:00:00 Z
|
19
|
+
dependencies:
|
20
|
+
- !ruby/object:Gem::Dependency
|
21
|
+
name: redis
|
22
|
+
prerelease: false
|
23
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
24
|
+
none: false
|
25
|
+
requirements:
|
26
|
+
- - ">="
|
27
|
+
- !ruby/object:Gem::Version
|
28
|
+
hash: 7
|
29
|
+
segments:
|
30
|
+
- 2
|
31
|
+
- 2
|
32
|
+
- 0
|
33
|
+
version: 2.2.0
|
34
|
+
type: :runtime
|
35
|
+
version_requirements: *id001
|
36
|
+
- !ruby/object:Gem::Dependency
|
37
|
+
name: em-http-request
|
38
|
+
prerelease: false
|
39
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
40
|
+
none: false
|
41
|
+
requirements:
|
42
|
+
- - ">="
|
43
|
+
- !ruby/object:Gem::Version
|
44
|
+
hash: 62196357
|
45
|
+
segments:
|
46
|
+
- 1
|
47
|
+
- 0
|
48
|
+
- 0
|
49
|
+
- beta
|
50
|
+
- 3
|
51
|
+
version: 1.0.0.beta.3
|
52
|
+
type: :runtime
|
53
|
+
version_requirements: *id002
|
54
|
+
description: ""
|
55
|
+
email:
|
56
|
+
- david.github@gmail.com
|
57
|
+
executables: []
|
58
|
+
|
59
|
+
extensions: []
|
60
|
+
|
61
|
+
extra_rdoc_files: []
|
62
|
+
|
63
|
+
files:
|
64
|
+
- .gitignore
|
65
|
+
- Gemfile
|
66
|
+
- Rakefile
|
67
|
+
- canonicurl.gemspec
|
68
|
+
- lib/canonicurl.rb
|
69
|
+
- lib/canonicurl/cache.rb
|
70
|
+
- lib/canonicurl/version.rb
|
71
|
+
homepage: https://github.com/newtonapple/canonicurl
|
72
|
+
licenses: []
|
73
|
+
|
74
|
+
post_install_message:
|
75
|
+
rdoc_options: []
|
76
|
+
|
77
|
+
require_paths:
|
78
|
+
- lib
|
79
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
80
|
+
none: false
|
81
|
+
requirements:
|
82
|
+
- - ">="
|
83
|
+
- !ruby/object:Gem::Version
|
84
|
+
hash: 3
|
85
|
+
segments:
|
86
|
+
- 0
|
87
|
+
version: "0"
|
88
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
89
|
+
none: false
|
90
|
+
requirements:
|
91
|
+
- - ">="
|
92
|
+
- !ruby/object:Gem::Version
|
93
|
+
hash: 3
|
94
|
+
segments:
|
95
|
+
- 0
|
96
|
+
version: "0"
|
97
|
+
requirements: []
|
98
|
+
|
99
|
+
rubyforge_project: canonicurl
|
100
|
+
rubygems_version: 1.7.2
|
101
|
+
signing_key:
|
102
|
+
specification_version: 3
|
103
|
+
summary: A Canonical URL cache using Redis
|
104
|
+
test_files: []
|
105
|
+
|