canonicurl 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gitignore ADDED
@@ -0,0 +1,4 @@
1
+ *.gem
2
+ .bundle
3
+ Gemfile.lock
4
+ pkg/*
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source "http://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in canonicurl.gemspec
4
+ gemspec
data/Rakefile ADDED
@@ -0,0 +1,2 @@
1
+ require 'bundler'
2
+ Bundler::GemHelper.install_tasks
@@ -0,0 +1,23 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require "canonicurl/version"
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = "canonicurl"
7
+ s.version = Canonicurl::VERSION
8
+ s.platform = Gem::Platform::RUBY
9
+ s.authors = ["David Dai"]
10
+ s.email = ["david.github@gmail.com"]
11
+ s.homepage = "https://github.com/newtonapple/canonicurl"
12
+ s.summary = %q{A Canonical URL cache using Redis}
13
+ s.description = %q{}
14
+
15
+ s.rubyforge_project = "canonicurl"
16
+
17
+ s.files = `git ls-files`.split("\n")
18
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
19
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
20
+ s.require_paths = ["lib"]
21
+ s.add_dependency 'redis', '>= 2.2.0'
22
+ s.add_dependency 'em-http-request', '>= 1.0.0.beta.3'
23
+ end
@@ -0,0 +1,123 @@
1
+ require 'digest/md5'
2
+ require 'redis'
3
+ require 'em-http'
4
+
5
+ module Canonicurl
6
+ class Cache
7
+ CANONICAL = 'C'
8
+ ERROR = 'E'
9
+ LOCKED = 'L'
10
+ RESOLVING = 'R'
11
+
12
+ TTL = 60 * 60 * 24 * 90 # 90 days ~ 3 months
13
+ REDIRECTS = 5
14
+ CONNECTION_TIMEOUT = 5
15
+ KEY_PREFIX = 'curl:'
16
+
17
+ attr_accessor :db, :ttl, :timeout, :redirects
18
+ attr_reader :key_prefix
19
+
20
+ def self.url(code_or_url)
21
+ code_or_url && code_or_url.size > 1 ? code_or_url : url
22
+ end
23
+
24
+
25
+ def initialize(options={})
26
+ @db = options[:db] || Redis.connect
27
+ @ttl = options[:ttl] || TTL
28
+ @timeout = options[:timeout] || CONNECTION_TIMEOUT
29
+ @redirects = options[:redirects] || REDIRECTS
30
+ @key_prefix = options[:key_prefix] || KEY_PREFIX
31
+ end
32
+
33
+
34
+ def get(url)
35
+ @db.get key(url)
36
+ end
37
+
38
+
39
+ def fetch(url, callbacks={})
40
+ k = key(url)
41
+ @db.setnx(k, LOCKED) # lock it if key doesn't exist
42
+
43
+ result = @db.get(k)
44
+ if !result.nil? && result.size > 1
45
+ return result
46
+
47
+ end
48
+
49
+ case result
50
+ when CANONICAL
51
+ yield url
52
+ when LOCKED
53
+ resolve(url, k, callbacks)
54
+ RESOLVING
55
+ else
56
+ result
57
+ end
58
+ end
59
+
60
+
61
+ def set(url, canonical_url, url_key=nil)
62
+ url_key = url_key || key(url)
63
+ if url == canonical_url
64
+ @db.setex(url_key, @ttl, CANONICAL)
65
+ else
66
+ @db.setex(url_key, @ttl, canonical_url)
67
+ @db.setex(key(canonical_url), @ttl, CANONICAL) # preemptively set the canonical_url
68
+ end
69
+ end
70
+
71
+
72
+ def key(url)
73
+ "#{@key_prefix}#{Digest::MD5.hexdigest(url)}"
74
+ end
75
+
76
+
77
+ private
78
+
79
+ def resolve(url, url_key, callbacks)
80
+ em_already_running = true
81
+ @db.set(url_key, RESOLVING)
82
+ em do |running|
83
+ em_already_running = running
84
+ http = EM::HttpRequest.new(url,
85
+ :connection_timeout => @timeout,
86
+ :inactivity_timeout => @timeout * 2).get(:redirects => @redirects)
87
+ http.callback {
88
+ status = http.response_header.status.to_i
89
+ case status
90
+ when 200...300
91
+ canonical_url = http.last_effective_url.to_s
92
+ set url, canonical_url, url_key
93
+ callbacks[:resolved].call(canonical_url, http) if callbacks[:resolved]
94
+ else
95
+ @db.set url_key, (status / 100).to_s
96
+ callbacks[:failed].call(http) if callbacks[:failed]
97
+ end
98
+ EM.stop unless em_already_running
99
+ }
100
+ http.errback {
101
+ @db.set(url_key, ERROR)
102
+ callbacks[:error].call(http) if callbacks[:error]
103
+ EM.stop unless em_already_running
104
+ }
105
+ end
106
+ rescue Exception => e
107
+ @db.set(url_key, ERROR)
108
+ callbacks[:exception].call(e) if callbacks[:exception]
109
+ EM.stop unless em_already_running
110
+ end
111
+
112
+
113
+ def em
114
+ if EM.reactor_running?
115
+ yield true
116
+ else
117
+ EM.run do
118
+ yield false
119
+ end
120
+ end
121
+ end
122
+ end
123
+ end
@@ -0,0 +1,3 @@
1
+ module Canonicurl
2
+ VERSION = "0.0.2"
3
+ end
data/lib/canonicurl.rb ADDED
@@ -0,0 +1,5 @@
1
+ module Canonicurl
2
+ end
3
+
4
+ require 'canonicurl/cache'
5
+ require 'canonicurl/version'
metadata ADDED
@@ -0,0 +1,105 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: canonicurl
3
+ version: !ruby/object:Gem::Version
4
+ hash: 27
5
+ prerelease:
6
+ segments:
7
+ - 0
8
+ - 0
9
+ - 2
10
+ version: 0.0.2
11
+ platform: ruby
12
+ authors:
13
+ - David Dai
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2011-04-06 00:00:00 Z
19
+ dependencies:
20
+ - !ruby/object:Gem::Dependency
21
+ name: redis
22
+ prerelease: false
23
+ requirement: &id001 !ruby/object:Gem::Requirement
24
+ none: false
25
+ requirements:
26
+ - - ">="
27
+ - !ruby/object:Gem::Version
28
+ hash: 7
29
+ segments:
30
+ - 2
31
+ - 2
32
+ - 0
33
+ version: 2.2.0
34
+ type: :runtime
35
+ version_requirements: *id001
36
+ - !ruby/object:Gem::Dependency
37
+ name: em-http-request
38
+ prerelease: false
39
+ requirement: &id002 !ruby/object:Gem::Requirement
40
+ none: false
41
+ requirements:
42
+ - - ">="
43
+ - !ruby/object:Gem::Version
44
+ hash: 62196357
45
+ segments:
46
+ - 1
47
+ - 0
48
+ - 0
49
+ - beta
50
+ - 3
51
+ version: 1.0.0.beta.3
52
+ type: :runtime
53
+ version_requirements: *id002
54
+ description: ""
55
+ email:
56
+ - david.github@gmail.com
57
+ executables: []
58
+
59
+ extensions: []
60
+
61
+ extra_rdoc_files: []
62
+
63
+ files:
64
+ - .gitignore
65
+ - Gemfile
66
+ - Rakefile
67
+ - canonicurl.gemspec
68
+ - lib/canonicurl.rb
69
+ - lib/canonicurl/cache.rb
70
+ - lib/canonicurl/version.rb
71
+ homepage: https://github.com/newtonapple/canonicurl
72
+ licenses: []
73
+
74
+ post_install_message:
75
+ rdoc_options: []
76
+
77
+ require_paths:
78
+ - lib
79
+ required_ruby_version: !ruby/object:Gem::Requirement
80
+ none: false
81
+ requirements:
82
+ - - ">="
83
+ - !ruby/object:Gem::Version
84
+ hash: 3
85
+ segments:
86
+ - 0
87
+ version: "0"
88
+ required_rubygems_version: !ruby/object:Gem::Requirement
89
+ none: false
90
+ requirements:
91
+ - - ">="
92
+ - !ruby/object:Gem::Version
93
+ hash: 3
94
+ segments:
95
+ - 0
96
+ version: "0"
97
+ requirements: []
98
+
99
+ rubyforge_project: canonicurl
100
+ rubygems_version: 1.7.2
101
+ signing_key:
102
+ specification_version: 3
103
+ summary: A Canonical URL cache using Redis
104
+ test_files: []
105
+