uri-meta 0.9.1
Sign up to get free protection for your applications and to get access to all the features.
- data/.document +5 -0
- data/.gitignore +5 -0
- data/LICENSE +20 -0
- data/README.markdown +68 -0
- data/Rakefile +57 -0
- data/VERSION +1 -0
- data/benchmark.rb +36 -0
- data/lib/uri/meta.rb +136 -0
- data/test/test_helper.rb +10 -0
- data/test/uri-meta_test.rb +395 -0
- data/uri-meta.gemspec +60 -0
- metadata +96 -0
data/.document
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2009 Stateless Systems
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.markdown
ADDED
@@ -0,0 +1,68 @@
|
|
1
|
+
# uri-meta: Get meta information about your URI
|
2
|
+
|
3
|
+
uri-meta is a ruby interface to the [metauri.com](http://www.metauri.com/) service.
|
4
|
+
|
5
|
+
[metauri.com](http://www.metauri.com/) provides two things:
|
6
|
+
|
7
|
+
* follows your URI to the end point where there is actual content instead of redirects
|
8
|
+
* obtains meta information (title etc) about that end URI
|
9
|
+
|
10
|
+
## Examples
|
11
|
+
|
12
|
+
require 'uri'
|
13
|
+
require 'uri/meta'
|
14
|
+
uri = URI.parse('http://www.google.com/')
|
15
|
+
puts uri.meta.title
|
16
|
+
# Google
|
17
|
+
puts uri.meta.status
|
18
|
+
# 200
|
19
|
+
puts uri.meta(:headers => 1).headers
|
20
|
+
# HTTP/1.1 .... etc
|
21
|
+
|
22
|
+
uri = URI.parse('http://bit.ly/PBzu')
|
23
|
+
puts uri.meta.content_type
|
24
|
+
# image/gif
|
25
|
+
|
26
|
+
meta = URI.parse('http://bit.ly/PBzu').meta(:max_redirects = 2)
|
27
|
+
puts(meta.last_effective_uri) unless meta.errors?
|
28
|
+
# http://clipart.tiu.edu/offcampus/animated/bd13644_.gif
|
29
|
+
|
30
|
+
|
31
|
+
URI::Meta.multi(['http://www.google.com/', 'http://bit.ly/PBzu'], :max_redirects => 10) do |meta|
|
32
|
+
# Don't rely on these being processed in the same order they were listed!
|
33
|
+
if meta.redirect?
|
34
|
+
puts "## #{meta.uri} -> #{meta.last_effective_uri}"
|
35
|
+
else
|
36
|
+
puts "## #{meta.uri} did not redirect and it's title was #{meta.title}"
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
## Caching
|
41
|
+
|
42
|
+
uri-meta uses in-memory caching via [wycats-moneta](http://github.com/wycats/moneta), so it
|
43
|
+
should be relatively straight forward for you to use whatever other caching mechanism you want,
|
44
|
+
provided it's supported by moneta.
|
45
|
+
|
46
|
+
require 'uri'
|
47
|
+
require 'uri/meta'
|
48
|
+
|
49
|
+
# Memcached
|
50
|
+
require 'moneta/memcache'
|
51
|
+
URI::Meta::Cache.cache = Moneta::Memcache.new(:server => 'localhost', :namespace => 'uri_meta')
|
52
|
+
URI::Meta::Cache.expires_in = (60 * 60 * 24 * 7) # 1 week
|
53
|
+
|
54
|
+
# No caching (for testing I guess)
|
55
|
+
URI::Meta::Cache.cache = nil
|
56
|
+
|
57
|
+
## Known Issues
|
58
|
+
|
59
|
+
* Redirects that aren't handled by the webserver (302), such as javascript or
|
60
|
+
<meta> tag redirects are not supported yet.
|
61
|
+
* Framed redirects, such as stumbleupon are not resolved yet, as these are
|
62
|
+
techincally full pages it could be difficult to know that it's not really
|
63
|
+
then end URI.
|
64
|
+
* No RDOC as yet.
|
65
|
+
|
66
|
+
# Copyright
|
67
|
+
|
68
|
+
Copyright (c) 2009 Stateless Systems. See LICENSE for details.
|
data/Rakefile
ADDED
@@ -0,0 +1,57 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'rake'
|
3
|
+
|
4
|
+
begin
|
5
|
+
require 'jeweler'
|
6
|
+
Jeweler::Tasks.new do |gem|
|
7
|
+
gem.name = "uri-meta"
|
8
|
+
gem.summary = %Q{Meta information for a URI}
|
9
|
+
gem.description = %Q{Retrieves meta information for a URI from the meturi.com service.}
|
10
|
+
gem.email = "production@statelesssystems.com"
|
11
|
+
gem.homepage = "http://github.com/stateless-systems/uri-meta"
|
12
|
+
gem.authors = ["Stateless Systems"]
|
13
|
+
gem.add_dependency "taf2-curb"
|
14
|
+
gem.add_dependency "wycats-moneta"
|
15
|
+
gem.add_development_dependency "thoughtbot-shoulda"
|
16
|
+
end
|
17
|
+
rescue LoadError
|
18
|
+
puts "Jeweler (or a dependency) not available. Install it with: sudo gem install jeweler"
|
19
|
+
end
|
20
|
+
|
21
|
+
require 'rake/testtask'
|
22
|
+
Rake::TestTask.new(:test) do |test|
|
23
|
+
test.libs << 'lib' << 'test'
|
24
|
+
test.pattern = 'test/**/*_test.rb'
|
25
|
+
test.verbose = true
|
26
|
+
end
|
27
|
+
|
28
|
+
begin
|
29
|
+
require 'rcov/rcovtask'
|
30
|
+
Rcov::RcovTask.new do |test|
|
31
|
+
test.libs << 'test'
|
32
|
+
test.pattern = 'test/**/*_test.rb'
|
33
|
+
test.verbose = true
|
34
|
+
end
|
35
|
+
rescue LoadError
|
36
|
+
task :rcov do
|
37
|
+
abort "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov"
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
task :test => :check_dependencies
|
42
|
+
|
43
|
+
task :default => :test
|
44
|
+
|
45
|
+
require 'rake/rdoctask'
|
46
|
+
Rake::RDocTask.new do |rdoc|
|
47
|
+
if File.exist?('VERSION')
|
48
|
+
version = File.read('VERSION')
|
49
|
+
else
|
50
|
+
version = ""
|
51
|
+
end
|
52
|
+
|
53
|
+
rdoc.rdoc_dir = 'rdoc'
|
54
|
+
rdoc.title = "uri-meta #{version}"
|
55
|
+
rdoc.rdoc_files.include('README*')
|
56
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
57
|
+
end
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.9.1
|
data/benchmark.rb
ADDED
@@ -0,0 +1,36 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), 'lib'))
|
4
|
+
|
5
|
+
require 'rubygems'
|
6
|
+
require 'benchmark'
|
7
|
+
require 'curb'
|
8
|
+
require 'uri/meta'
|
9
|
+
|
10
|
+
URI::Meta::Cache.cache = nil
|
11
|
+
|
12
|
+
cached_uris = uncached_uris = []
|
13
|
+
uncached_uris = []
|
14
|
+
|
15
|
+
delete = Curl::Multi.new
|
16
|
+
|
17
|
+
(1..50).each do |x|
|
18
|
+
cached_uris << URI.parse('http://tigris.id.au/')
|
19
|
+
uncached_uris << URI.parse("http://tigris.id.au/#{x}")
|
20
|
+
c = Curl::Easy.new("http://www.metauri.com/delete?uri=#{uncached_uris.last.to_s}")
|
21
|
+
c.on_complete{|curl| print '.'}
|
22
|
+
delete.add(c)
|
23
|
+
end
|
24
|
+
|
25
|
+
print ' performing cache clear '
|
26
|
+
clear = Benchmark.realtime{ delete.perform }
|
27
|
+
puts " #{clear}"
|
28
|
+
|
29
|
+
## TODO: figure out why uncached is faster when X > pool size, but way less when X < pool size
|
30
|
+
print ' calculating cached time '
|
31
|
+
cached = Benchmark.realtime{ URI::Meta.multi(cached_uris){|m| print '.'}}
|
32
|
+
puts " #{cached}"
|
33
|
+
|
34
|
+
print 'calculating uncached time '
|
35
|
+
uncached = Benchmark.realtime{ URI::Meta.multi(uncached_uris){|m| print '.'}}
|
36
|
+
puts " #{uncached}"
|
data/lib/uri/meta.rb
ADDED
@@ -0,0 +1,136 @@
|
|
1
|
+
require 'uri'
|
2
|
+
require 'curb'
|
3
|
+
require 'yaml'
|
4
|
+
require 'moneta'
|
5
|
+
require 'moneta/memory'
|
6
|
+
require 'digest/sha1'
|
7
|
+
|
8
|
+
module URI
|
9
|
+
class Meta
|
10
|
+
attr_accessor :headers, :uri, :title, :feed, :last_modified, :content_type, :charset, :last_effective_uri, :status, :errors
|
11
|
+
@@service_host = 'www.metauri.com'
|
12
|
+
@@user_agent = 'uri-meta rubygem'
|
13
|
+
|
14
|
+
UNSAFE = Regexp.new("[#{URI::REGEXP::PATTERN::RESERVED} #%]", false, 'N').freeze
|
15
|
+
|
16
|
+
def self.service_host
|
17
|
+
@@service_host
|
18
|
+
end
|
19
|
+
|
20
|
+
def self.service_host=(service_host)
|
21
|
+
@@service_host = service_host
|
22
|
+
end
|
23
|
+
|
24
|
+
def self.user_agent
|
25
|
+
@@user_agent
|
26
|
+
end
|
27
|
+
|
28
|
+
def self.user_agent=(user_agent)
|
29
|
+
@@user_agent = user_agent
|
30
|
+
end
|
31
|
+
|
32
|
+
def initialize(options = {})
|
33
|
+
self.errors = []
|
34
|
+
options.each do |k, v|
|
35
|
+
case k
|
36
|
+
when :last_effective_uri, :uri, :feed then send("#{k}=", v.to_s == '' ? nil : (URI.parse(v.to_s) rescue nil))
|
37
|
+
when :error, :errors then self.errors.push(*[v].flatten)
|
38
|
+
else send("#{k}=", v) if respond_to?("#{k}=")
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
def redirect?
|
44
|
+
uri != last_effective_uri
|
45
|
+
end
|
46
|
+
|
47
|
+
def errors?
|
48
|
+
!errors.empty?
|
49
|
+
end
|
50
|
+
|
51
|
+
def self.get(uri, options = {})
|
52
|
+
uri = URI.parse(uri.to_s) rescue nil
|
53
|
+
raise ArgumentError.new("Can't coerce #{uri.class} to URI") unless uri.is_a?(URI)
|
54
|
+
raise NotImplementedError.new('Only HTTP is supported so far.') unless uri.is_a?(URI::HTTP)
|
55
|
+
URI::Meta.multi([uri], options).first
|
56
|
+
end
|
57
|
+
|
58
|
+
def self.cache_key(uri, options = {})
|
59
|
+
# Make sure the key includes the options used to retrieve the meta
|
60
|
+
uid = uri.to_s + options.to_a.sort{|a,b| a[0].to_s <=> b[0].to_s}.to_s
|
61
|
+
Digest::SHA1.hexdigest(uid)
|
62
|
+
end
|
63
|
+
|
64
|
+
#--
|
65
|
+
# TODO: Chunk uri's through a pre-warmed pool of curl easy instances?
|
66
|
+
def self.multi(uris, options = {}, &block)
|
67
|
+
metas = []
|
68
|
+
multi = Curl::Multi.new
|
69
|
+
uris.each do |uri|
|
70
|
+
if meta = URI::Meta::Cache.get(cache_key(uri, options))
|
71
|
+
metas << meta
|
72
|
+
URI::Meta::Cache.store(cache_key(uri, options), meta)
|
73
|
+
block.call(meta) if block
|
74
|
+
else
|
75
|
+
easy = curl(uri, options)
|
76
|
+
easy.on_complete do |curl|
|
77
|
+
args = YAML.load(curl.body_str) rescue {:errors => "YAML Error, #{$!.message}"}
|
78
|
+
args = {:errors => "YAML Error, server returned unknown format."} unless args.is_a?(Hash)
|
79
|
+
|
80
|
+
metas << meta = URI::Meta.new({:uri => uri}.update(args))
|
81
|
+
URI::Meta::Cache.store(cache_key(uri, options), meta)
|
82
|
+
block.call(meta) if block
|
83
|
+
end
|
84
|
+
multi.add(easy)
|
85
|
+
end
|
86
|
+
end
|
87
|
+
multi.perform
|
88
|
+
metas
|
89
|
+
end
|
90
|
+
|
91
|
+
protected
|
92
|
+
#--
|
93
|
+
# Required because the URI option must be verbatim. If '+' and others are not escaped Merb, Rack or something
|
94
|
+
# helpfully converts them to spaces on metauri.com
|
95
|
+
def self.curl(uri, options = {})
|
96
|
+
options = options.update(:uri => uri)
|
97
|
+
options = options.map{|k, v| "#{k}=" + URI.escape(v.to_s, UNSAFE)}.join('&')
|
98
|
+
c = Curl::Easy.new("http://#{service_host}/show.yaml?#{options}")
|
99
|
+
c.headers['User-Agent'] = user_agent
|
100
|
+
c
|
101
|
+
end
|
102
|
+
|
103
|
+
module Mixin
|
104
|
+
def meta(options = {})
|
105
|
+
@meta ||= URI::Meta.get(self, options)
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
class Cache
|
110
|
+
@@cache = Moneta::Memory.new
|
111
|
+
@@expires_in = 86_400 # 24 hours
|
112
|
+
|
113
|
+
class << self
|
114
|
+
def store(uid, obj)
|
115
|
+
@@cache.store(uid, obj, :expires_in => @@expires_in) unless @@cache.nil?
|
116
|
+
end
|
117
|
+
|
118
|
+
def get(id)
|
119
|
+
@@cache[id] unless @@cache.nil?
|
120
|
+
end
|
121
|
+
|
122
|
+
def cache=(cache)
|
123
|
+
warn 'Turning off caching is poor form, for longer processes consider using moneta/memcached' if cache.nil?
|
124
|
+
@@cache = cache
|
125
|
+
end
|
126
|
+
|
127
|
+
def expires_in=(seconds)
|
128
|
+
@@expires_in = seconds
|
129
|
+
end
|
130
|
+
end
|
131
|
+
end
|
132
|
+
end
|
133
|
+
|
134
|
+
URI::Generic.send(:include, URI::Meta::Mixin)
|
135
|
+
URI::HTTP.send(:include, URI::Meta::Mixin)
|
136
|
+
end
|
data/test/test_helper.rb
ADDED
@@ -0,0 +1,395 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), 'test_helper')
|
2
|
+
require 'uri'
|
3
|
+
require 'curb'
|
4
|
+
require 'timeout'
|
5
|
+
|
6
|
+
class UriMetaTest < Test::Unit::TestCase
|
7
|
+
# First things first. Purge all test URIs on the metauri service so we don't
|
8
|
+
# get issues from old cached URIs.
|
9
|
+
[
|
10
|
+
'garbage',
|
11
|
+
'http://bit.ly/PBzu',
|
12
|
+
'http://bit.ly/rvQhW',
|
13
|
+
'http://img11.yfrog.com/i/vaix.jpg/',
|
14
|
+
'http://rss.slashdot.org/Slashdot/slashdot',
|
15
|
+
'http://slashdot.org/',
|
16
|
+
'http://taptaptap.com/+MqN',
|
17
|
+
"http://#{URI::Meta.service_host}/",
|
18
|
+
"http://#{URI::Meta.service_host}/double_redirect_test",
|
19
|
+
"http://#{URI::Meta.service_host}/#foo",
|
20
|
+
"http://#{URI::Meta.service_host}/foo%5Bbar%5D",
|
21
|
+
"http://#{URI::Meta.service_host}/meta_redirect_test",
|
22
|
+
"http://#{URI::Meta.service_host}/redirect_test",
|
23
|
+
'http://www.facebook.com/home.php',
|
24
|
+
'http://www.facebook.com/pages/Bronx-NY/Career-and-Transfer-Services-at-BCC/113334355068',
|
25
|
+
'http://www.google.com:666/',
|
26
|
+
'http://www.stumbleupon.com/s/#4sDy2p/sivers.org/hellyeah',
|
27
|
+
'http://www.taobao.com/',
|
28
|
+
'http://www.youtube.com/das_captcha?next=/watch%3Fv%3DQ1rdsFuNIMc',
|
29
|
+
].each{|uri| Curl::Easy.http_get("http://#{URI::Meta.service_host}/delete?uri=#{URI.escape(uri.to_s, URI::Meta::UNSAFE)}") }
|
30
|
+
|
31
|
+
context %Q(URI.parse('http://#{URI::Meta.service_host}/')) do
|
32
|
+
setup do
|
33
|
+
@uri = URI.parse("http://#{URI::Meta.service_host}/")
|
34
|
+
end
|
35
|
+
|
36
|
+
should 'respond_to :meta' do
|
37
|
+
assert_respond_to @uri, :meta
|
38
|
+
end
|
39
|
+
|
40
|
+
context '.meta' do
|
41
|
+
setup do
|
42
|
+
@meta = @uri.meta
|
43
|
+
end
|
44
|
+
|
45
|
+
should 'be a URI::Meta object' do
|
46
|
+
assert_kind_of URI::Meta, @meta
|
47
|
+
end
|
48
|
+
|
49
|
+
context '.uri' do
|
50
|
+
should 'be a URI object' do
|
51
|
+
assert_kind_of URI, @meta.uri
|
52
|
+
end
|
53
|
+
|
54
|
+
should 'be the same as the original URI' do
|
55
|
+
assert_equal @uri.to_s, @meta.uri.to_s
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
context '.last_effective_uri' do
|
60
|
+
should 'be a URI object' do
|
61
|
+
assert_kind_of URI, @meta.last_effective_uri
|
62
|
+
end
|
63
|
+
|
64
|
+
should 'not have been a redirect' do
|
65
|
+
assert_equal @uri.to_s, @meta.last_effective_uri.to_s
|
66
|
+
assert !@meta.redirect?
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
context '.title' do
|
71
|
+
should 'be Meta URI' do
|
72
|
+
assert_equal 'Meta URI', @meta.title
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
context '.status' do
|
77
|
+
should 'be 200' do
|
78
|
+
assert_equal 200, @meta.status
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
context '.headers' do
|
83
|
+
should 'be nil' do
|
84
|
+
assert_nil @meta.headers
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
context '.meta(:headers => 1)' do
|
90
|
+
setup do
|
91
|
+
@meta = URI.parse("http://#{URI::Meta.service_host}/").meta(:headers => 1)
|
92
|
+
end
|
93
|
+
|
94
|
+
context '.headers' do
|
95
|
+
should 'be populated' do
|
96
|
+
assert_not_nil @meta.headers
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
context %Q(URI.parse('http://#{URI::Meta.service_host}/redirect_test')) do
|
103
|
+
setup do
|
104
|
+
@uri = URI.parse("http://#{URI::Meta.service_host}/redirect_test")
|
105
|
+
end
|
106
|
+
|
107
|
+
context '.meta' do
|
108
|
+
context '.last_effective_uri' do
|
109
|
+
should 'be a redirect' do
|
110
|
+
assert_not_equal @uri.to_s, @uri.meta.last_effective_uri.to_s
|
111
|
+
assert @uri.meta.redirect?
|
112
|
+
end
|
113
|
+
end
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
context %Q(URI.parse('http://#{URI::Meta.service_host}/double_redirect_test')) do
|
118
|
+
setup do
|
119
|
+
@uri = URI.parse("http://#{URI::Meta.service_host}/double_redirect_test")
|
120
|
+
end
|
121
|
+
|
122
|
+
context '.meta(:max_redirects => 1)' do
|
123
|
+
should 'error on too many redirects' do
|
124
|
+
meta = @uri.meta(:max_redirects => 1)
|
125
|
+
assert meta.errors?
|
126
|
+
assert_kind_of String, meta.errors.first
|
127
|
+
end
|
128
|
+
end
|
129
|
+
end
|
130
|
+
|
131
|
+
context %q{URI.parse('http://bit.ly/rvQhW').meta} do
|
132
|
+
should 'raise nothing' do
|
133
|
+
assert_nothing_raised do
|
134
|
+
URI.parse('http://bit.ly/rvQhW').meta
|
135
|
+
end
|
136
|
+
end
|
137
|
+
end
|
138
|
+
|
139
|
+
context %q(URI.parse('garbage').meta) do
|
140
|
+
should 'raise errors' do
|
141
|
+
assert_raise NotImplementedError do
|
142
|
+
URI.parse('garbage').meta
|
143
|
+
end
|
144
|
+
end
|
145
|
+
end
|
146
|
+
|
147
|
+
context %q(URI.parse('http://bit.ly/PBzu').meta) do
|
148
|
+
setup do
|
149
|
+
@meta = URI.parse('http://bit.ly/PBzu').meta
|
150
|
+
end
|
151
|
+
|
152
|
+
should 'be a redirect' do
|
153
|
+
assert @meta.redirect?
|
154
|
+
assert_not_equal 'http://bit.ly/PBzu', @meta.last_effective_uri
|
155
|
+
end
|
156
|
+
end
|
157
|
+
|
158
|
+
context %q(URI.parse('http://taptaptap.com/+MqN').meta) do
|
159
|
+
setup do
|
160
|
+
@uri = URI.parse('http://taptaptap.com/+MqN')
|
161
|
+
end
|
162
|
+
|
163
|
+
should 'escape the + symbol' do
|
164
|
+
assert_nothing_raised do
|
165
|
+
@meta = @uri.meta
|
166
|
+
end
|
167
|
+
assert !@meta.errors?
|
168
|
+
end
|
169
|
+
end
|
170
|
+
|
171
|
+
context %Q(URI::Meta.multi(['http://www.google.com/', "http://#{URI::Meta.service_host}/"])) do
|
172
|
+
setup do
|
173
|
+
@metas = URI::Meta.multi(['http://www.google.com/', "http://#{URI::Meta.service_host}/"])
|
174
|
+
end
|
175
|
+
|
176
|
+
should 'return an array' do
|
177
|
+
assert_kind_of Array, @metas
|
178
|
+
end
|
179
|
+
|
180
|
+
should 'all be URI::Meta objects' do
|
181
|
+
assert @metas.all?{|m| m.kind_of? URI::Meta}
|
182
|
+
end
|
183
|
+
|
184
|
+
should 'contain a google meta' do
|
185
|
+
assert @metas.any?{|m| m.title == 'Google'}
|
186
|
+
end
|
187
|
+
end
|
188
|
+
|
189
|
+
context %Q(URI::Meta.multi(['http://www.google.com/', "http://#{URI::Meta.service_host}/"]) {}) do
|
190
|
+
setup do
|
191
|
+
@block_metas = []
|
192
|
+
@return_metas = URI::Meta.multi(['http://www.google.com/', "http://#{URI::Meta.service_host}/"]) do |meta|
|
193
|
+
@block_metas << meta
|
194
|
+
end
|
195
|
+
end
|
196
|
+
|
197
|
+
should 'return an array of 2' do
|
198
|
+
assert_kind_of Array, @return_metas
|
199
|
+
assert_equal 2, @return_metas.size
|
200
|
+
end
|
201
|
+
|
202
|
+
should 'all be URI::Meta objects' do
|
203
|
+
assert @return_metas.all?{|m| m.kind_of? URI::Meta}
|
204
|
+
end
|
205
|
+
|
206
|
+
should 'contain a google meta' do
|
207
|
+
assert @return_metas.any?{|m| m.title == 'Google'}
|
208
|
+
end
|
209
|
+
|
210
|
+
context 'yielded in block' do
|
211
|
+
should '2 URI::Meta objects' do
|
212
|
+
assert @block_metas.all?{|m| m.kind_of? URI::Meta}
|
213
|
+
assert_equal 2, @return_metas.size
|
214
|
+
end
|
215
|
+
|
216
|
+
should 'a google meta' do
|
217
|
+
assert @block_metas.any?{|m| m.title == 'Google'}
|
218
|
+
end
|
219
|
+
end
|
220
|
+
end
|
221
|
+
|
222
|
+
context %q(URI.parse('http://www.google.com:666/')) do
|
223
|
+
setup do
|
224
|
+
@uri = URI.parse('http://www.google.com:666/')
|
225
|
+
end
|
226
|
+
|
227
|
+
context '.meta' do
|
228
|
+
should 'not return within 5 seconds' do
|
229
|
+
begin
|
230
|
+
timeout(5) do
|
231
|
+
meta = @uri.meta
|
232
|
+
assert false
|
233
|
+
end
|
234
|
+
rescue Timeout::Error => e
|
235
|
+
assert true
|
236
|
+
end
|
237
|
+
end
|
238
|
+
end
|
239
|
+
|
240
|
+
context '.meta(:connect_timeout => 1)' do
|
241
|
+
should 'return before 5 seconds' do
|
242
|
+
begin
|
243
|
+
timeout(5) do
|
244
|
+
meta = @uri.meta(:connect_timeout => 1)
|
245
|
+
assert true
|
246
|
+
end
|
247
|
+
rescue Timeout::Error => e
|
248
|
+
assert false
|
249
|
+
end
|
250
|
+
end
|
251
|
+
|
252
|
+
should 'contain timeout errors' do
|
253
|
+
assert @uri.meta(:connect_timeout => 1).errors?
|
254
|
+
end
|
255
|
+
end
|
256
|
+
end
|
257
|
+
|
258
|
+
context %Q(URI.parse('http://#{URI::Meta.service_host}/#foo').meta) do
|
259
|
+
setup do
|
260
|
+
@uri = URI.parse("http://#{URI::Meta.service_host}/#foo")
|
261
|
+
@meta = @uri.meta
|
262
|
+
end
|
263
|
+
|
264
|
+
should 'keep # info intact' do
|
265
|
+
assert_equal @uri.to_s, @meta.uri.to_s
|
266
|
+
end
|
267
|
+
|
268
|
+
should 'not have a feed' do
|
269
|
+
assert_nil @meta.feed
|
270
|
+
end
|
271
|
+
end
|
272
|
+
|
273
|
+
context %q(URI.parse('http://www.taobao.com/').meta) do
|
274
|
+
setup do
|
275
|
+
@uri = URI.parse('http://www.taobao.com/')
|
276
|
+
end
|
277
|
+
|
278
|
+
should 'not die from UTF8 issues' do
|
279
|
+
assert_nothing_raised do
|
280
|
+
@meta = @uri.meta
|
281
|
+
end
|
282
|
+
assert !@meta.errors?
|
283
|
+
end
|
284
|
+
end
|
285
|
+
|
286
|
+
context %q(URI.parse('http://www.stumbleupon.com/s/#4sDy2p/sivers.org/hellyeah').meta) do
|
287
|
+
setup do
|
288
|
+
@uri = URI.parse('http://www.stumbleupon.com/s/#4sDy2p/sivers.org/hellyeah')
|
289
|
+
@meta = @uri.meta
|
290
|
+
end
|
291
|
+
|
292
|
+
should 'be a redirect' do
|
293
|
+
assert @meta.redirect?
|
294
|
+
end
|
295
|
+
|
296
|
+
should 'not end at stumble upon' do
|
297
|
+
assert @meta.last_effective_uri !~ /stumble/
|
298
|
+
end
|
299
|
+
end
|
300
|
+
|
301
|
+
context %q(URI.parse('http://www.youtube.com/das_captcha?next=/watch%3Fv%3DQ1rdsFuNIMc')) do
|
302
|
+
setup do
|
303
|
+
@uri = URI.parse('http://www.youtube.com/das_captcha?next=/watch%3Fv%3DQ1rdsFuNIMc')
|
304
|
+
@meta = @uri.meta
|
305
|
+
end
|
306
|
+
|
307
|
+
should 'obtain the correct title through captcha' do
|
308
|
+
assert_equal 'YouTube - Legolibrium', @meta.title
|
309
|
+
end
|
310
|
+
|
311
|
+
should 'not have changed the last_effective_uri' do
|
312
|
+
assert_equal @uri.to_s, @meta.uri.to_s
|
313
|
+
end
|
314
|
+
end
|
315
|
+
|
316
|
+
context %q(URI.parse('http://www.facebook.com/home.php')) do
|
317
|
+
setup do
|
318
|
+
@meta = URI.parse('http://www.facebook.com/home.php').meta
|
319
|
+
end
|
320
|
+
|
321
|
+
should 'correctly return 403' do
|
322
|
+
assert_equal 403, @meta.status
|
323
|
+
end
|
324
|
+
end
|
325
|
+
|
326
|
+
context %Q(URI.parse("http://#{URI::Meta.service_host}/foo%5Bbar]")) do
|
327
|
+
setup do
|
328
|
+
@uri = URI.parse("http://#{URI::Meta.service_host}/foo%5Bbar%5D")
|
329
|
+
@meta = @uri.meta
|
330
|
+
end
|
331
|
+
|
332
|
+
should 'keep encoded square brackets intact' do
|
333
|
+
assert_equal @uri.to_s, @meta.uri.to_s
|
334
|
+
end
|
335
|
+
end
|
336
|
+
|
337
|
+
context %q(URI.parse('http://img11.yfrog.com/i/vaix.jpg/').meta) do
|
338
|
+
setup do
|
339
|
+
@meta = URI.parse('http://img11.yfrog.com/i/vaix.jpg/').meta
|
340
|
+
end
|
341
|
+
|
342
|
+
should 'have a content type' do
|
343
|
+
assert_not_nil @meta.content_type
|
344
|
+
end
|
345
|
+
|
346
|
+
should 'have a title' do
|
347
|
+
assert_not_nil @meta.title
|
348
|
+
assert_not_equal '', @meta.title
|
349
|
+
end
|
350
|
+
end
|
351
|
+
|
352
|
+
context %q(URI.parse('http://www.facebook.com/pages/Bronx-NY/Career-and-Transfer-Services-at-BCC/113334355068').meta) do
|
353
|
+
setup do
|
354
|
+
@meta = URI.parse('http://www.facebook.com/pages/Bronx-NY/Career-and-Transfer-Services-at-BCC/113334355068').meta
|
355
|
+
end
|
356
|
+
|
357
|
+
should 'have a title' do
|
358
|
+
assert_not_nil @meta.title
|
359
|
+
assert_not_equal '', @meta.title
|
360
|
+
end
|
361
|
+
end
|
362
|
+
|
363
|
+
context %Q(URI.parse("http://#{URI::Meta.service_host}/meta_redirect_test").meta) do
|
364
|
+
setup do
|
365
|
+
@uri = URI.parse("http://#{URI::Meta.service_host}/meta_redirect_test")
|
366
|
+
@meta = @uri.meta
|
367
|
+
end
|
368
|
+
|
369
|
+
should 'be a redirect' do
|
370
|
+
assert @meta.redirect?
|
371
|
+
end
|
372
|
+
|
373
|
+
should 'keep the original URL intact' do
|
374
|
+
assert_equal @uri.to_s, @meta.uri.to_s
|
375
|
+
end
|
376
|
+
end
|
377
|
+
|
378
|
+
context %Q(URI.parse('http://slashdot.org/').meta) do
|
379
|
+
setup do
|
380
|
+
@meta = URI.parse('http://slashdot.org/').meta
|
381
|
+
end
|
382
|
+
|
383
|
+
should 'have a feed' do
|
384
|
+
assert_equal 'http://rss.slashdot.org/Slashdot/slashdot', @meta.feed.to_s
|
385
|
+
end
|
386
|
+
end
|
387
|
+
|
388
|
+
context %Q(URI.parse('http://rss.slashdot.org/Slashdot/slashdot').meta) do
|
389
|
+
setup do
|
390
|
+
@meta = URI.parse('http://rss.slashdot.org/Slashdot/slashdot').meta
|
391
|
+
end
|
392
|
+
|
393
|
+
should 'have a feed equal to itself'
|
394
|
+
end
|
395
|
+
end
|
data/uri-meta.gemspec
ADDED
@@ -0,0 +1,60 @@
|
|
1
|
+
# Generated by jeweler
|
2
|
+
# DO NOT EDIT THIS FILE
|
3
|
+
# Instead, edit Jeweler::Tasks in Rakefile, and run `rake gemspec`
|
4
|
+
# -*- encoding: utf-8 -*-
|
5
|
+
|
6
|
+
Gem::Specification.new do |s|
|
7
|
+
s.name = %q{uri-meta}
|
8
|
+
s.version = "0.9.1"
|
9
|
+
|
10
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
|
+
s.authors = ["Stateless Systems"]
|
12
|
+
s.date = %q{2009-10-13}
|
13
|
+
s.description = %q{Retrieves meta information for a URI from the meturi.com service.}
|
14
|
+
s.email = %q{production@statelesssystems.com}
|
15
|
+
s.extra_rdoc_files = [
|
16
|
+
"LICENSE",
|
17
|
+
"README.markdown"
|
18
|
+
]
|
19
|
+
s.files = [
|
20
|
+
".document",
|
21
|
+
".gitignore",
|
22
|
+
"LICENSE",
|
23
|
+
"README.markdown",
|
24
|
+
"Rakefile",
|
25
|
+
"VERSION",
|
26
|
+
"benchmark.rb",
|
27
|
+
"lib/uri/meta.rb",
|
28
|
+
"test/test_helper.rb",
|
29
|
+
"test/uri-meta_test.rb",
|
30
|
+
"uri-meta.gemspec"
|
31
|
+
]
|
32
|
+
s.homepage = %q{http://github.com/stateless-systems/uri-meta}
|
33
|
+
s.rdoc_options = ["--charset=UTF-8"]
|
34
|
+
s.require_paths = ["lib"]
|
35
|
+
s.rubygems_version = %q{1.3.5}
|
36
|
+
s.summary = %q{Meta information for a URI}
|
37
|
+
s.test_files = [
|
38
|
+
"test/test_helper.rb",
|
39
|
+
"test/uri-meta_test.rb"
|
40
|
+
]
|
41
|
+
|
42
|
+
if s.respond_to? :specification_version then
|
43
|
+
current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
|
44
|
+
s.specification_version = 3
|
45
|
+
|
46
|
+
if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
|
47
|
+
s.add_runtime_dependency(%q<taf2-curb>, [">= 0"])
|
48
|
+
s.add_runtime_dependency(%q<wycats-moneta>, [">= 0"])
|
49
|
+
s.add_development_dependency(%q<thoughtbot-shoulda>, [">= 0"])
|
50
|
+
else
|
51
|
+
s.add_dependency(%q<taf2-curb>, [">= 0"])
|
52
|
+
s.add_dependency(%q<wycats-moneta>, [">= 0"])
|
53
|
+
s.add_dependency(%q<thoughtbot-shoulda>, [">= 0"])
|
54
|
+
end
|
55
|
+
else
|
56
|
+
s.add_dependency(%q<taf2-curb>, [">= 0"])
|
57
|
+
s.add_dependency(%q<wycats-moneta>, [">= 0"])
|
58
|
+
s.add_dependency(%q<thoughtbot-shoulda>, [">= 0"])
|
59
|
+
end
|
60
|
+
end
|
metadata
ADDED
@@ -0,0 +1,96 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: uri-meta
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.9.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Stateless Systems
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2009-10-13 00:00:00 +11:00
|
13
|
+
default_executable:
|
14
|
+
dependencies:
|
15
|
+
- !ruby/object:Gem::Dependency
|
16
|
+
name: taf2-curb
|
17
|
+
type: :runtime
|
18
|
+
version_requirement:
|
19
|
+
version_requirements: !ruby/object:Gem::Requirement
|
20
|
+
requirements:
|
21
|
+
- - ">="
|
22
|
+
- !ruby/object:Gem::Version
|
23
|
+
version: "0"
|
24
|
+
version:
|
25
|
+
- !ruby/object:Gem::Dependency
|
26
|
+
name: wycats-moneta
|
27
|
+
type: :runtime
|
28
|
+
version_requirement:
|
29
|
+
version_requirements: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: "0"
|
34
|
+
version:
|
35
|
+
- !ruby/object:Gem::Dependency
|
36
|
+
name: thoughtbot-shoulda
|
37
|
+
type: :development
|
38
|
+
version_requirement:
|
39
|
+
version_requirements: !ruby/object:Gem::Requirement
|
40
|
+
requirements:
|
41
|
+
- - ">="
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
version: "0"
|
44
|
+
version:
|
45
|
+
description: Retrieves meta information for a URI from the meturi.com service.
|
46
|
+
email: production@statelesssystems.com
|
47
|
+
executables: []
|
48
|
+
|
49
|
+
extensions: []
|
50
|
+
|
51
|
+
extra_rdoc_files:
|
52
|
+
- LICENSE
|
53
|
+
- README.markdown
|
54
|
+
files:
|
55
|
+
- .document
|
56
|
+
- .gitignore
|
57
|
+
- LICENSE
|
58
|
+
- README.markdown
|
59
|
+
- Rakefile
|
60
|
+
- VERSION
|
61
|
+
- benchmark.rb
|
62
|
+
- lib/uri/meta.rb
|
63
|
+
- test/test_helper.rb
|
64
|
+
- test/uri-meta_test.rb
|
65
|
+
- uri-meta.gemspec
|
66
|
+
has_rdoc: true
|
67
|
+
homepage: http://github.com/stateless-systems/uri-meta
|
68
|
+
licenses: []
|
69
|
+
|
70
|
+
post_install_message:
|
71
|
+
rdoc_options:
|
72
|
+
- --charset=UTF-8
|
73
|
+
require_paths:
|
74
|
+
- lib
|
75
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
76
|
+
requirements:
|
77
|
+
- - ">="
|
78
|
+
- !ruby/object:Gem::Version
|
79
|
+
version: "0"
|
80
|
+
version:
|
81
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
82
|
+
requirements:
|
83
|
+
- - ">="
|
84
|
+
- !ruby/object:Gem::Version
|
85
|
+
version: "0"
|
86
|
+
version:
|
87
|
+
requirements: []
|
88
|
+
|
89
|
+
rubyforge_project:
|
90
|
+
rubygems_version: 1.3.5
|
91
|
+
signing_key:
|
92
|
+
specification_version: 3
|
93
|
+
summary: Meta information for a URI
|
94
|
+
test_files:
|
95
|
+
- test/test_helper.rb
|
96
|
+
- test/uri-meta_test.rb
|