canonicurl 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore ADDED
@@ -0,0 +1,4 @@
1
+ *.gem
2
+ .bundle
3
+ Gemfile.lock
4
+ pkg/*
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source "http://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in canonicurl.gemspec
4
+ gemspec
data/Rakefile ADDED
@@ -0,0 +1,2 @@
1
+ require 'bundler'
2
+ Bundler::GemHelper.install_tasks
@@ -0,0 +1,23 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require "canonicurl/version"
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = "canonicurl"
7
+ s.version = Canonicurl::VERSION
8
+ s.platform = Gem::Platform::RUBY
9
+ s.authors = ["David Dai"]
10
+ s.email = ["david.github@gmail.com"]
11
+ s.homepage = "https://github.com/newtonapple/canonicurl"
12
+ s.summary = %q{A Canonical URL cache using Redis}
13
+ s.description = %q{}
14
+
15
+ s.rubyforge_project = "canonicurl"
16
+
17
+ s.files = `git ls-files`.split("\n")
18
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
19
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
20
+ s.require_paths = ["lib"]
21
+ s.add_dependency 'redis', '>= 2.2.0'
22
+ s.add_dependency 'em-http-request', '>= 1.0.0.beta.3'
23
+ end
@@ -0,0 +1,123 @@
1
+ require 'digest/md5'
2
+ require 'redis'
3
+ require 'em-http'
4
+
5
+ module Canonicurl
6
+ class Cache
7
+ CANONICAL = 'C'
8
+ ERROR = 'E'
9
+ LOCKED = 'L'
10
+ RESOLVING = 'R'
11
+
12
+ TTL = 60 * 60 * 24 * 90 # 90 days ~ 3 months
13
+ REDIRECTS = 5
14
+ CONNECTION_TIMEOUT = 5
15
+ KEY_PREFIX = 'curl:'
16
+
17
+ attr_accessor :db, :ttl, :timeout, :redirects
18
+ attr_reader :key_prefix
19
+
20
+ def self.url(code_or_url)
21
+ code_or_url && code_or_url.size > 1 ? code_or_url : url
22
+ end
23
+
24
+
25
+ def initialize(options={})
26
+ @db = options[:db] || Redis.connect
27
+ @ttl = options[:ttl] || TTL
28
+ @timeout = options[:timeout] || CONNECTION_TIMEOUT
29
+ @redirects = options[:redirects] || REDIRECTS
30
+ @key_prefix = options[:key_prefix] || KEY_PREFIX
31
+ end
32
+
33
+
34
+ def get(url)
35
+ @db.get key(url)
36
+ end
37
+
38
+
39
+ def fetch(url, callbacks={})
40
+ k = key(url)
41
+ @db.setnx(k, LOCKED) # lock it if key doesn't exist
42
+
43
+ result = @db.get(k)
44
+ if !result.nil? && result.size > 1
45
+ return result
46
+
47
+ end
48
+
49
+ case result
50
+ when CANONICAL
51
+ yield url
52
+ when LOCKED
53
+ resolve(url, k, callbacks)
54
+ RESOLVING
55
+ else
56
+ result
57
+ end
58
+ end
59
+
60
+
61
+ def set(url, canonical_url, url_key=nil)
62
+ url_key = url_key || key(url)
63
+ if url == canonical_url
64
+ @db.setex(url_key, @ttl, CANONICAL)
65
+ else
66
+ @db.setex(url_key, @ttl, canonical_url)
67
+ @db.setex(key(canonical_url), @ttl, CANONICAL) # preemptively set the canonical_url
68
+ end
69
+ end
70
+
71
+
72
+ def key(url)
73
+ "#{@key_prefix}#{Digest::MD5.hexdigest(url)}"
74
+ end
75
+
76
+
77
+ private
78
+
79
+ def resolve(url, url_key, callbacks)
80
+ em_already_running = true
81
+ @db.set(url_key, RESOLVING)
82
+ em do |running|
83
+ em_already_running = running
84
+ http = EM::HttpRequest.new(url,
85
+ :connection_timeout => @timeout,
86
+ :inactivity_timeout => @timeout * 2).get(:redirects => @redirects)
87
+ http.callback {
88
+ status = http.response_header.status.to_i
89
+ case status
90
+ when 200...300
91
+ canonical_url = http.last_effective_url.to_s
92
+ set url, canonical_url, url_key
93
+ callbacks[:resolved].call(canonical_url, http) if callbacks[:resolved]
94
+ else
95
+ @db.set url_key, (status / 100).to_s
96
+ callbacks[:failed].call(http) if callbacks[:failed]
97
+ end
98
+ EM.stop unless em_already_running
99
+ }
100
+ http.errback {
101
+ @db.set(url_key, ERROR)
102
+ callbacks[:error].call(http) if callbacks[:error]
103
+ EM.stop unless em_already_running
104
+ }
105
+ end
106
+ rescue Exception => e
107
+ @db.set(url_key, ERROR)
108
+ callbacks[:exception].call(e) if callbacks[:exception]
109
+ EM.stop unless em_already_running
110
+ end
111
+
112
+
113
+ def em
114
+ if EM.reactor_running?
115
+ yield true
116
+ else
117
+ EM.run do
118
+ yield false
119
+ end
120
+ end
121
+ end
122
+ end
123
+ end
@@ -0,0 +1,3 @@
1
+ module Canonicurl
2
+ VERSION = "0.0.2"
3
+ end
data/lib/canonicurl.rb ADDED
@@ -0,0 +1,5 @@
1
+ module Canonicurl
2
+ end
3
+
4
+ require 'canonicurl/cache'
5
+ require 'canonicurl/version'
metadata ADDED
@@ -0,0 +1,105 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: canonicurl
3
+ version: !ruby/object:Gem::Version
4
+ hash: 27
5
+ prerelease:
6
+ segments:
7
+ - 0
8
+ - 0
9
+ - 2
10
+ version: 0.0.2
11
+ platform: ruby
12
+ authors:
13
+ - David Dai
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2011-04-06 00:00:00 Z
19
+ dependencies:
20
+ - !ruby/object:Gem::Dependency
21
+ name: redis
22
+ prerelease: false
23
+ requirement: &id001 !ruby/object:Gem::Requirement
24
+ none: false
25
+ requirements:
26
+ - - ">="
27
+ - !ruby/object:Gem::Version
28
+ hash: 7
29
+ segments:
30
+ - 2
31
+ - 2
32
+ - 0
33
+ version: 2.2.0
34
+ type: :runtime
35
+ version_requirements: *id001
36
+ - !ruby/object:Gem::Dependency
37
+ name: em-http-request
38
+ prerelease: false
39
+ requirement: &id002 !ruby/object:Gem::Requirement
40
+ none: false
41
+ requirements:
42
+ - - ">="
43
+ - !ruby/object:Gem::Version
44
+ hash: 62196357
45
+ segments:
46
+ - 1
47
+ - 0
48
+ - 0
49
+ - beta
50
+ - 3
51
+ version: 1.0.0.beta.3
52
+ type: :runtime
53
+ version_requirements: *id002
54
+ description: ""
55
+ email:
56
+ - david.github@gmail.com
57
+ executables: []
58
+
59
+ extensions: []
60
+
61
+ extra_rdoc_files: []
62
+
63
+ files:
64
+ - .gitignore
65
+ - Gemfile
66
+ - Rakefile
67
+ - canonicurl.gemspec
68
+ - lib/canonicurl.rb
69
+ - lib/canonicurl/cache.rb
70
+ - lib/canonicurl/version.rb
71
+ homepage: https://github.com/newtonapple/canonicurl
72
+ licenses: []
73
+
74
+ post_install_message:
75
+ rdoc_options: []
76
+
77
+ require_paths:
78
+ - lib
79
+ required_ruby_version: !ruby/object:Gem::Requirement
80
+ none: false
81
+ requirements:
82
+ - - ">="
83
+ - !ruby/object:Gem::Version
84
+ hash: 3
85
+ segments:
86
+ - 0
87
+ version: "0"
88
+ required_rubygems_version: !ruby/object:Gem::Requirement
89
+ none: false
90
+ requirements:
91
+ - - ">="
92
+ - !ruby/object:Gem::Version
93
+ hash: 3
94
+ segments:
95
+ - 0
96
+ version: "0"
97
+ requirements: []
98
+
99
+ rubyforge_project: canonicurl
100
+ rubygems_version: 1.7.2
101
+ signing_key:
102
+ specification_version: 3
103
+ summary: A Canonical URL cache using Redis
104
+ test_files: []
105
+