canonicurl 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +4 -0
- data/Gemfile +4 -0
- data/Rakefile +2 -0
- data/canonicurl.gemspec +23 -0
- data/lib/canonicurl/cache.rb +123 -0
- data/lib/canonicurl/version.rb +3 -0
- data/lib/canonicurl.rb +5 -0
- metadata +105 -0
data/.gitignore
ADDED
data/Gemfile
ADDED
data/Rakefile
ADDED
data/canonicurl.gemspec
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
$:.push File.expand_path("../lib", __FILE__)
|
3
|
+
require "canonicurl/version"
|
4
|
+
|
5
|
+
Gem::Specification.new do |s|
|
6
|
+
s.name = "canonicurl"
|
7
|
+
s.version = Canonicurl::VERSION
|
8
|
+
s.platform = Gem::Platform::RUBY
|
9
|
+
s.authors = ["David Dai"]
|
10
|
+
s.email = ["david.github@gmail.com"]
|
11
|
+
s.homepage = "https://github.com/newtonapple/canonicurl"
|
12
|
+
s.summary = %q{A Canonical URL cache using Redis}
|
13
|
+
s.description = %q{}
|
14
|
+
|
15
|
+
s.rubyforge_project = "canonicurl"
|
16
|
+
|
17
|
+
s.files = `git ls-files`.split("\n")
|
18
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
19
|
+
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
20
|
+
s.require_paths = ["lib"]
|
21
|
+
s.add_dependency 'redis', '>= 2.2.0'
|
22
|
+
s.add_dependency 'em-http-request', '>= 1.0.0.beta.3'
|
23
|
+
end
|
@@ -0,0 +1,123 @@
|
|
1
|
+
require 'digest/md5'
|
2
|
+
require 'redis'
|
3
|
+
require 'em-http'
|
4
|
+
|
5
|
+
module Canonicurl
|
6
|
+
class Cache
|
7
|
+
CANONICAL = 'C'
|
8
|
+
ERROR = 'E'
|
9
|
+
LOCKED = 'L'
|
10
|
+
RESOLVING = 'R'
|
11
|
+
|
12
|
+
TTL = 60 * 60 * 24 * 90 # 90 days ~ 3 months
|
13
|
+
REDIRECTS = 5
|
14
|
+
CONNECTION_TIMEOUT = 5
|
15
|
+
KEY_PREFIX = 'curl:'
|
16
|
+
|
17
|
+
attr_accessor :db, :ttl, :timeout, :redirects
|
18
|
+
attr_reader :key_prefix
|
19
|
+
|
20
|
+
def self.url(code_or_url)
|
21
|
+
code_or_url && code_or_url.size > 1 ? code_or_url : url
|
22
|
+
end
|
23
|
+
|
24
|
+
|
25
|
+
def initialize(options={})
|
26
|
+
@db = options[:db] || Redis.connect
|
27
|
+
@ttl = options[:ttl] || TTL
|
28
|
+
@timeout = options[:timeout] || CONNECTION_TIMEOUT
|
29
|
+
@redirects = options[:redirects] || REDIRECTS
|
30
|
+
@key_prefix = options[:key_prefix] || KEY_PREFIX
|
31
|
+
end
|
32
|
+
|
33
|
+
|
34
|
+
def get(url)
|
35
|
+
@db.get key(url)
|
36
|
+
end
|
37
|
+
|
38
|
+
|
39
|
+
def fetch(url, callbacks={})
|
40
|
+
k = key(url)
|
41
|
+
@db.setnx(k, LOCKED) # lock it if key doesn't exist
|
42
|
+
|
43
|
+
result = @db.get(k)
|
44
|
+
if !result.nil? && result.size > 1
|
45
|
+
return result
|
46
|
+
|
47
|
+
end
|
48
|
+
|
49
|
+
case result
|
50
|
+
when CANONICAL
|
51
|
+
yield url
|
52
|
+
when LOCKED
|
53
|
+
resolve(url, k, callbacks)
|
54
|
+
RESOLVING
|
55
|
+
else
|
56
|
+
result
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
|
61
|
+
def set(url, canonical_url, url_key=nil)
|
62
|
+
url_key = url_key || key(url)
|
63
|
+
if url == canonical_url
|
64
|
+
@db.setex(url_key, @ttl, CANONICAL)
|
65
|
+
else
|
66
|
+
@db.setex(url_key, @ttl, canonical_url)
|
67
|
+
@db.setex(key(canonical_url), @ttl, CANONICAL) # preemptively set the canonical_url
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
|
72
|
+
def key(url)
|
73
|
+
"#{@key_prefix}#{Digest::MD5.hexdigest(url)}"
|
74
|
+
end
|
75
|
+
|
76
|
+
|
77
|
+
private
|
78
|
+
|
79
|
+
def resolve(url, url_key, callbacks)
|
80
|
+
em_already_running = true
|
81
|
+
@db.set(url_key, RESOLVING)
|
82
|
+
em do |running|
|
83
|
+
em_already_running = running
|
84
|
+
http = EM::HttpRequest.new(url,
|
85
|
+
:connection_timeout => @timeout,
|
86
|
+
:inactivity_timeout => @timeout * 2).get(:redirects => @redirects)
|
87
|
+
http.callback {
|
88
|
+
status = http.response_header.status.to_i
|
89
|
+
case status
|
90
|
+
when 200...300
|
91
|
+
canonical_url = http.last_effective_url.to_s
|
92
|
+
set url, canonical_url, url_key
|
93
|
+
callbacks[:resolved].call(canonical_url, http) if callbacks[:resolved]
|
94
|
+
else
|
95
|
+
@db.set url_key, (status / 100).to_s
|
96
|
+
callbacks[:failed].call(http) if callbacks[:failed]
|
97
|
+
end
|
98
|
+
EM.stop unless em_already_running
|
99
|
+
}
|
100
|
+
http.errback {
|
101
|
+
@db.set(url_key, ERROR)
|
102
|
+
callbacks[:error].call(http) if callbacks[:error]
|
103
|
+
EM.stop unless em_already_running
|
104
|
+
}
|
105
|
+
end
|
106
|
+
rescue Exception => e
|
107
|
+
@db.set(url_key, ERROR)
|
108
|
+
callbacks[:exception].call(e) if callbacks[:exception]
|
109
|
+
EM.stop unless em_already_running
|
110
|
+
end
|
111
|
+
|
112
|
+
|
113
|
+
def em
|
114
|
+
if EM.reactor_running?
|
115
|
+
yield true
|
116
|
+
else
|
117
|
+
EM.run do
|
118
|
+
yield false
|
119
|
+
end
|
120
|
+
end
|
121
|
+
end
|
122
|
+
end
|
123
|
+
end
|
data/lib/canonicurl.rb
ADDED
metadata
ADDED
@@ -0,0 +1,105 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: canonicurl
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
hash: 27
|
5
|
+
prerelease:
|
6
|
+
segments:
|
7
|
+
- 0
|
8
|
+
- 0
|
9
|
+
- 2
|
10
|
+
version: 0.0.2
|
11
|
+
platform: ruby
|
12
|
+
authors:
|
13
|
+
- David Dai
|
14
|
+
autorequire:
|
15
|
+
bindir: bin
|
16
|
+
cert_chain: []
|
17
|
+
|
18
|
+
date: 2011-04-06 00:00:00 Z
|
19
|
+
dependencies:
|
20
|
+
- !ruby/object:Gem::Dependency
|
21
|
+
name: redis
|
22
|
+
prerelease: false
|
23
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
24
|
+
none: false
|
25
|
+
requirements:
|
26
|
+
- - ">="
|
27
|
+
- !ruby/object:Gem::Version
|
28
|
+
hash: 7
|
29
|
+
segments:
|
30
|
+
- 2
|
31
|
+
- 2
|
32
|
+
- 0
|
33
|
+
version: 2.2.0
|
34
|
+
type: :runtime
|
35
|
+
version_requirements: *id001
|
36
|
+
- !ruby/object:Gem::Dependency
|
37
|
+
name: em-http-request
|
38
|
+
prerelease: false
|
39
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
40
|
+
none: false
|
41
|
+
requirements:
|
42
|
+
- - ">="
|
43
|
+
- !ruby/object:Gem::Version
|
44
|
+
hash: 62196357
|
45
|
+
segments:
|
46
|
+
- 1
|
47
|
+
- 0
|
48
|
+
- 0
|
49
|
+
- beta
|
50
|
+
- 3
|
51
|
+
version: 1.0.0.beta.3
|
52
|
+
type: :runtime
|
53
|
+
version_requirements: *id002
|
54
|
+
description: ""
|
55
|
+
email:
|
56
|
+
- david.github@gmail.com
|
57
|
+
executables: []
|
58
|
+
|
59
|
+
extensions: []
|
60
|
+
|
61
|
+
extra_rdoc_files: []
|
62
|
+
|
63
|
+
files:
|
64
|
+
- .gitignore
|
65
|
+
- Gemfile
|
66
|
+
- Rakefile
|
67
|
+
- canonicurl.gemspec
|
68
|
+
- lib/canonicurl.rb
|
69
|
+
- lib/canonicurl/cache.rb
|
70
|
+
- lib/canonicurl/version.rb
|
71
|
+
homepage: https://github.com/newtonapple/canonicurl
|
72
|
+
licenses: []
|
73
|
+
|
74
|
+
post_install_message:
|
75
|
+
rdoc_options: []
|
76
|
+
|
77
|
+
require_paths:
|
78
|
+
- lib
|
79
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
80
|
+
none: false
|
81
|
+
requirements:
|
82
|
+
- - ">="
|
83
|
+
- !ruby/object:Gem::Version
|
84
|
+
hash: 3
|
85
|
+
segments:
|
86
|
+
- 0
|
87
|
+
version: "0"
|
88
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
89
|
+
none: false
|
90
|
+
requirements:
|
91
|
+
- - ">="
|
92
|
+
- !ruby/object:Gem::Version
|
93
|
+
hash: 3
|
94
|
+
segments:
|
95
|
+
- 0
|
96
|
+
version: "0"
|
97
|
+
requirements: []
|
98
|
+
|
99
|
+
rubyforge_project: canonicurl
|
100
|
+
rubygems_version: 1.7.2
|
101
|
+
signing_key:
|
102
|
+
specification_version: 3
|
103
|
+
summary: A Canonical URL cache using Redis
|
104
|
+
test_files: []
|
105
|
+
|