uri-meta 0.9.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.document +5 -0
- data/.gitignore +5 -0
- data/LICENSE +20 -0
- data/README.markdown +68 -0
- data/Rakefile +57 -0
- data/VERSION +1 -0
- data/benchmark.rb +36 -0
- data/lib/uri/meta.rb +136 -0
- data/test/test_helper.rb +10 -0
- data/test/uri-meta_test.rb +395 -0
- data/uri-meta.gemspec +60 -0
- metadata +96 -0
data/.document
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2009 Stateless Systems
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.markdown
ADDED
@@ -0,0 +1,68 @@
|
|
1
|
+
# uri-meta: Get meta information about your URI
|
2
|
+
|
3
|
+
uri-meta is a ruby interface to the [metauri.com](http://www.metauri.com/) service.
|
4
|
+
|
5
|
+
[metauri.com](http://www.metauri.com/) provides two things:
|
6
|
+
|
7
|
+
* follows your URI to the end point where there is actual content instead of redirects
|
8
|
+
* obtains meta information (title etc) about that end URI
|
9
|
+
|
10
|
+
## Examples
|
11
|
+
|
12
|
+
require 'uri'
|
13
|
+
require 'uri/meta'
|
14
|
+
uri = URI.parse('http://www.google.com/')
|
15
|
+
puts uri.meta.title
|
16
|
+
# Google
|
17
|
+
puts uri.meta.status
|
18
|
+
# 200
|
19
|
+
puts uri.meta(:headers => 1).headers
|
20
|
+
# HTTP/1.1 .... etc
|
21
|
+
|
22
|
+
uri = URI.parse('http://bit.ly/PBzu')
|
23
|
+
puts uri.meta.content_type
|
24
|
+
# image/gif
|
25
|
+
|
26
|
+
meta = URI.parse('http://bit.ly/PBzu').meta(:max_redirects = 2)
|
27
|
+
puts(meta.last_effective_uri) unless meta.errors?
|
28
|
+
# http://clipart.tiu.edu/offcampus/animated/bd13644_.gif
|
29
|
+
|
30
|
+
|
31
|
+
URI::Meta.multi(['http://www.google.com/', 'http://bit.ly/PBzu'], :max_redirects => 10) do |meta|
|
32
|
+
# Don't rely on these being processed in the same order they were listed!
|
33
|
+
if meta.redirect?
|
34
|
+
puts "## #{meta.uri} -> #{meta.last_effective_uri}"
|
35
|
+
else
|
36
|
+
puts "## #{meta.uri} did not redirect and it's title was #{meta.title}"
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
## Caching
|
41
|
+
|
42
|
+
uri-meta uses in-memory caching via [wycats-moneta](http://github.com/wycats/moneta), so it
|
43
|
+
should be relatively straight forward for you to use whatever other caching mechanism you want,
|
44
|
+
provided it's supported by moneta.
|
45
|
+
|
46
|
+
require 'uri'
|
47
|
+
require 'uri/meta'
|
48
|
+
|
49
|
+
# Memcached
|
50
|
+
require 'moneta/memcache'
|
51
|
+
URI::Meta::Cache.cache = Moneta::Memcache.new(:server => 'localhost', :namespace => 'uri_meta')
|
52
|
+
URI::Meta::Cache.expires_in = (60 * 60 * 24 * 7) # 1 week
|
53
|
+
|
54
|
+
# No caching (for testing I guess)
|
55
|
+
URI::Meta::Cache.cache = nil
|
56
|
+
|
57
|
+
## Known Issues
|
58
|
+
|
59
|
+
* Redirects that aren't handled by the webserver (302), such as javascript or
|
60
|
+
<meta> tag redirects are not supported yet.
|
61
|
+
* Framed redirects, such as stumbleupon are not resolved yet, as these are
|
62
|
+
techincally full pages it could be difficult to know that it's not really
|
63
|
+
then end URI.
|
64
|
+
* No RDOC as yet.
|
65
|
+
|
66
|
+
# Copyright
|
67
|
+
|
68
|
+
Copyright (c) 2009 Stateless Systems. See LICENSE for details.
|
data/Rakefile
ADDED
@@ -0,0 +1,57 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'rake'
|
3
|
+
|
4
|
+
begin
|
5
|
+
require 'jeweler'
|
6
|
+
Jeweler::Tasks.new do |gem|
|
7
|
+
gem.name = "uri-meta"
|
8
|
+
gem.summary = %Q{Meta information for a URI}
|
9
|
+
gem.description = %Q{Retrieves meta information for a URI from the meturi.com service.}
|
10
|
+
gem.email = "production@statelesssystems.com"
|
11
|
+
gem.homepage = "http://github.com/stateless-systems/uri-meta"
|
12
|
+
gem.authors = ["Stateless Systems"]
|
13
|
+
gem.add_dependency "taf2-curb"
|
14
|
+
gem.add_dependency "wycats-moneta"
|
15
|
+
gem.add_development_dependency "thoughtbot-shoulda"
|
16
|
+
end
|
17
|
+
rescue LoadError
|
18
|
+
puts "Jeweler (or a dependency) not available. Install it with: sudo gem install jeweler"
|
19
|
+
end
|
20
|
+
|
21
|
+
require 'rake/testtask'
|
22
|
+
Rake::TestTask.new(:test) do |test|
|
23
|
+
test.libs << 'lib' << 'test'
|
24
|
+
test.pattern = 'test/**/*_test.rb'
|
25
|
+
test.verbose = true
|
26
|
+
end
|
27
|
+
|
28
|
+
begin
|
29
|
+
require 'rcov/rcovtask'
|
30
|
+
Rcov::RcovTask.new do |test|
|
31
|
+
test.libs << 'test'
|
32
|
+
test.pattern = 'test/**/*_test.rb'
|
33
|
+
test.verbose = true
|
34
|
+
end
|
35
|
+
rescue LoadError
|
36
|
+
task :rcov do
|
37
|
+
abort "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov"
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
task :test => :check_dependencies
|
42
|
+
|
43
|
+
task :default => :test
|
44
|
+
|
45
|
+
require 'rake/rdoctask'
|
46
|
+
Rake::RDocTask.new do |rdoc|
|
47
|
+
if File.exist?('VERSION')
|
48
|
+
version = File.read('VERSION')
|
49
|
+
else
|
50
|
+
version = ""
|
51
|
+
end
|
52
|
+
|
53
|
+
rdoc.rdoc_dir = 'rdoc'
|
54
|
+
rdoc.title = "uri-meta #{version}"
|
55
|
+
rdoc.rdoc_files.include('README*')
|
56
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
57
|
+
end
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.9.1
|
data/benchmark.rb
ADDED
@@ -0,0 +1,36 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), 'lib'))
|
4
|
+
|
5
|
+
require 'rubygems'
|
6
|
+
require 'benchmark'
|
7
|
+
require 'curb'
|
8
|
+
require 'uri/meta'
|
9
|
+
|
10
|
+
URI::Meta::Cache.cache = nil
|
11
|
+
|
12
|
+
cached_uris = uncached_uris = []
|
13
|
+
uncached_uris = []
|
14
|
+
|
15
|
+
delete = Curl::Multi.new
|
16
|
+
|
17
|
+
(1..50).each do |x|
|
18
|
+
cached_uris << URI.parse('http://tigris.id.au/')
|
19
|
+
uncached_uris << URI.parse("http://tigris.id.au/#{x}")
|
20
|
+
c = Curl::Easy.new("http://www.metauri.com/delete?uri=#{uncached_uris.last.to_s}")
|
21
|
+
c.on_complete{|curl| print '.'}
|
22
|
+
delete.add(c)
|
23
|
+
end
|
24
|
+
|
25
|
+
print ' performing cache clear '
|
26
|
+
clear = Benchmark.realtime{ delete.perform }
|
27
|
+
puts " #{clear}"
|
28
|
+
|
29
|
+
## TODO: figure out why uncached is faster when X > pool size, but way less when X < pool size
|
30
|
+
print ' calculating cached time '
|
31
|
+
cached = Benchmark.realtime{ URI::Meta.multi(cached_uris){|m| print '.'}}
|
32
|
+
puts " #{cached}"
|
33
|
+
|
34
|
+
print 'calculating uncached time '
|
35
|
+
uncached = Benchmark.realtime{ URI::Meta.multi(uncached_uris){|m| print '.'}}
|
36
|
+
puts " #{uncached}"
|
data/lib/uri/meta.rb
ADDED
@@ -0,0 +1,136 @@
|
|
1
|
+
require 'uri'
|
2
|
+
require 'curb'
|
3
|
+
require 'yaml'
|
4
|
+
require 'moneta'
|
5
|
+
require 'moneta/memory'
|
6
|
+
require 'digest/sha1'
|
7
|
+
|
8
|
+
module URI
|
9
|
+
class Meta
|
10
|
+
attr_accessor :headers, :uri, :title, :feed, :last_modified, :content_type, :charset, :last_effective_uri, :status, :errors
|
11
|
+
@@service_host = 'www.metauri.com'
|
12
|
+
@@user_agent = 'uri-meta rubygem'
|
13
|
+
|
14
|
+
UNSAFE = Regexp.new("[#{URI::REGEXP::PATTERN::RESERVED} #%]", false, 'N').freeze
|
15
|
+
|
16
|
+
def self.service_host
|
17
|
+
@@service_host
|
18
|
+
end
|
19
|
+
|
20
|
+
def self.service_host=(service_host)
|
21
|
+
@@service_host = service_host
|
22
|
+
end
|
23
|
+
|
24
|
+
def self.user_agent
|
25
|
+
@@user_agent
|
26
|
+
end
|
27
|
+
|
28
|
+
def self.user_agent=(user_agent)
|
29
|
+
@@user_agent = user_agent
|
30
|
+
end
|
31
|
+
|
32
|
+
def initialize(options = {})
|
33
|
+
self.errors = []
|
34
|
+
options.each do |k, v|
|
35
|
+
case k
|
36
|
+
when :last_effective_uri, :uri, :feed then send("#{k}=", v.to_s == '' ? nil : (URI.parse(v.to_s) rescue nil))
|
37
|
+
when :error, :errors then self.errors.push(*[v].flatten)
|
38
|
+
else send("#{k}=", v) if respond_to?("#{k}=")
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
def redirect?
|
44
|
+
uri != last_effective_uri
|
45
|
+
end
|
46
|
+
|
47
|
+
def errors?
|
48
|
+
!errors.empty?
|
49
|
+
end
|
50
|
+
|
51
|
+
def self.get(uri, options = {})
|
52
|
+
uri = URI.parse(uri.to_s) rescue nil
|
53
|
+
raise ArgumentError.new("Can't coerce #{uri.class} to URI") unless uri.is_a?(URI)
|
54
|
+
raise NotImplementedError.new('Only HTTP is supported so far.') unless uri.is_a?(URI::HTTP)
|
55
|
+
URI::Meta.multi([uri], options).first
|
56
|
+
end
|
57
|
+
|
58
|
+
def self.cache_key(uri, options = {})
|
59
|
+
# Make sure the key includes the options used to retrieve the meta
|
60
|
+
uid = uri.to_s + options.to_a.sort{|a,b| a[0].to_s <=> b[0].to_s}.to_s
|
61
|
+
Digest::SHA1.hexdigest(uid)
|
62
|
+
end
|
63
|
+
|
64
|
+
#--
|
65
|
+
# TODO: Chunk uri's through a pre-warmed pool of curl easy instances?
|
66
|
+
def self.multi(uris, options = {}, &block)
|
67
|
+
metas = []
|
68
|
+
multi = Curl::Multi.new
|
69
|
+
uris.each do |uri|
|
70
|
+
if meta = URI::Meta::Cache.get(cache_key(uri, options))
|
71
|
+
metas << meta
|
72
|
+
URI::Meta::Cache.store(cache_key(uri, options), meta)
|
73
|
+
block.call(meta) if block
|
74
|
+
else
|
75
|
+
easy = curl(uri, options)
|
76
|
+
easy.on_complete do |curl|
|
77
|
+
args = YAML.load(curl.body_str) rescue {:errors => "YAML Error, #{$!.message}"}
|
78
|
+
args = {:errors => "YAML Error, server returned unknown format."} unless args.is_a?(Hash)
|
79
|
+
|
80
|
+
metas << meta = URI::Meta.new({:uri => uri}.update(args))
|
81
|
+
URI::Meta::Cache.store(cache_key(uri, options), meta)
|
82
|
+
block.call(meta) if block
|
83
|
+
end
|
84
|
+
multi.add(easy)
|
85
|
+
end
|
86
|
+
end
|
87
|
+
multi.perform
|
88
|
+
metas
|
89
|
+
end
|
90
|
+
|
91
|
+
protected
|
92
|
+
#--
|
93
|
+
# Required because the URI option must be verbatim. If '+' and others are not escaped Merb, Rack or something
|
94
|
+
# helpfully converts them to spaces on metauri.com
|
95
|
+
def self.curl(uri, options = {})
|
96
|
+
options = options.update(:uri => uri)
|
97
|
+
options = options.map{|k, v| "#{k}=" + URI.escape(v.to_s, UNSAFE)}.join('&')
|
98
|
+
c = Curl::Easy.new("http://#{service_host}/show.yaml?#{options}")
|
99
|
+
c.headers['User-Agent'] = user_agent
|
100
|
+
c
|
101
|
+
end
|
102
|
+
|
103
|
+
module Mixin
|
104
|
+
def meta(options = {})
|
105
|
+
@meta ||= URI::Meta.get(self, options)
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
class Cache
|
110
|
+
@@cache = Moneta::Memory.new
|
111
|
+
@@expires_in = 86_400 # 24 hours
|
112
|
+
|
113
|
+
class << self
|
114
|
+
def store(uid, obj)
|
115
|
+
@@cache.store(uid, obj, :expires_in => @@expires_in) unless @@cache.nil?
|
116
|
+
end
|
117
|
+
|
118
|
+
def get(id)
|
119
|
+
@@cache[id] unless @@cache.nil?
|
120
|
+
end
|
121
|
+
|
122
|
+
def cache=(cache)
|
123
|
+
warn 'Turning off caching is poor form, for longer processes consider using moneta/memcached' if cache.nil?
|
124
|
+
@@cache = cache
|
125
|
+
end
|
126
|
+
|
127
|
+
def expires_in=(seconds)
|
128
|
+
@@expires_in = seconds
|
129
|
+
end
|
130
|
+
end
|
131
|
+
end
|
132
|
+
end
|
133
|
+
|
134
|
+
URI::Generic.send(:include, URI::Meta::Mixin)
|
135
|
+
URI::HTTP.send(:include, URI::Meta::Mixin)
|
136
|
+
end
|
data/test/test_helper.rb
ADDED
@@ -0,0 +1,395 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), 'test_helper')
|
2
|
+
require 'uri'
|
3
|
+
require 'curb'
|
4
|
+
require 'timeout'
|
5
|
+
|
6
|
+
class UriMetaTest < Test::Unit::TestCase
|
7
|
+
# First things first. Purge all test URIs on the metauri service so we don't
|
8
|
+
# get issues from old cached URIs.
|
9
|
+
[
|
10
|
+
'garbage',
|
11
|
+
'http://bit.ly/PBzu',
|
12
|
+
'http://bit.ly/rvQhW',
|
13
|
+
'http://img11.yfrog.com/i/vaix.jpg/',
|
14
|
+
'http://rss.slashdot.org/Slashdot/slashdot',
|
15
|
+
'http://slashdot.org/',
|
16
|
+
'http://taptaptap.com/+MqN',
|
17
|
+
"http://#{URI::Meta.service_host}/",
|
18
|
+
"http://#{URI::Meta.service_host}/double_redirect_test",
|
19
|
+
"http://#{URI::Meta.service_host}/#foo",
|
20
|
+
"http://#{URI::Meta.service_host}/foo%5Bbar%5D",
|
21
|
+
"http://#{URI::Meta.service_host}/meta_redirect_test",
|
22
|
+
"http://#{URI::Meta.service_host}/redirect_test",
|
23
|
+
'http://www.facebook.com/home.php',
|
24
|
+
'http://www.facebook.com/pages/Bronx-NY/Career-and-Transfer-Services-at-BCC/113334355068',
|
25
|
+
'http://www.google.com:666/',
|
26
|
+
'http://www.stumbleupon.com/s/#4sDy2p/sivers.org/hellyeah',
|
27
|
+
'http://www.taobao.com/',
|
28
|
+
'http://www.youtube.com/das_captcha?next=/watch%3Fv%3DQ1rdsFuNIMc',
|
29
|
+
].each{|uri| Curl::Easy.http_get("http://#{URI::Meta.service_host}/delete?uri=#{URI.escape(uri.to_s, URI::Meta::UNSAFE)}") }
|
30
|
+
|
31
|
+
context %Q(URI.parse('http://#{URI::Meta.service_host}/')) do
|
32
|
+
setup do
|
33
|
+
@uri = URI.parse("http://#{URI::Meta.service_host}/")
|
34
|
+
end
|
35
|
+
|
36
|
+
should 'respond_to :meta' do
|
37
|
+
assert_respond_to @uri, :meta
|
38
|
+
end
|
39
|
+
|
40
|
+
context '.meta' do
|
41
|
+
setup do
|
42
|
+
@meta = @uri.meta
|
43
|
+
end
|
44
|
+
|
45
|
+
should 'be a URI::Meta object' do
|
46
|
+
assert_kind_of URI::Meta, @meta
|
47
|
+
end
|
48
|
+
|
49
|
+
context '.uri' do
|
50
|
+
should 'be a URI object' do
|
51
|
+
assert_kind_of URI, @meta.uri
|
52
|
+
end
|
53
|
+
|
54
|
+
should 'be the same as the original URI' do
|
55
|
+
assert_equal @uri.to_s, @meta.uri.to_s
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
context '.last_effective_uri' do
|
60
|
+
should 'be a URI object' do
|
61
|
+
assert_kind_of URI, @meta.last_effective_uri
|
62
|
+
end
|
63
|
+
|
64
|
+
should 'not have been a redirect' do
|
65
|
+
assert_equal @uri.to_s, @meta.last_effective_uri.to_s
|
66
|
+
assert !@meta.redirect?
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
context '.title' do
|
71
|
+
should 'be Meta URI' do
|
72
|
+
assert_equal 'Meta URI', @meta.title
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
context '.status' do
|
77
|
+
should 'be 200' do
|
78
|
+
assert_equal 200, @meta.status
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
context '.headers' do
|
83
|
+
should 'be nil' do
|
84
|
+
assert_nil @meta.headers
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
context '.meta(:headers => 1)' do
|
90
|
+
setup do
|
91
|
+
@meta = URI.parse("http://#{URI::Meta.service_host}/").meta(:headers => 1)
|
92
|
+
end
|
93
|
+
|
94
|
+
context '.headers' do
|
95
|
+
should 'be populated' do
|
96
|
+
assert_not_nil @meta.headers
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
context %Q(URI.parse('http://#{URI::Meta.service_host}/redirect_test')) do
|
103
|
+
setup do
|
104
|
+
@uri = URI.parse("http://#{URI::Meta.service_host}/redirect_test")
|
105
|
+
end
|
106
|
+
|
107
|
+
context '.meta' do
|
108
|
+
context '.last_effective_uri' do
|
109
|
+
should 'be a redirect' do
|
110
|
+
assert_not_equal @uri.to_s, @uri.meta.last_effective_uri.to_s
|
111
|
+
assert @uri.meta.redirect?
|
112
|
+
end
|
113
|
+
end
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
context %Q(URI.parse('http://#{URI::Meta.service_host}/double_redirect_test')) do
|
118
|
+
setup do
|
119
|
+
@uri = URI.parse("http://#{URI::Meta.service_host}/double_redirect_test")
|
120
|
+
end
|
121
|
+
|
122
|
+
context '.meta(:max_redirects => 1)' do
|
123
|
+
should 'error on too many redirects' do
|
124
|
+
meta = @uri.meta(:max_redirects => 1)
|
125
|
+
assert meta.errors?
|
126
|
+
assert_kind_of String, meta.errors.first
|
127
|
+
end
|
128
|
+
end
|
129
|
+
end
|
130
|
+
|
131
|
+
context %q{URI.parse('http://bit.ly/rvQhW').meta} do
|
132
|
+
should 'raise nothing' do
|
133
|
+
assert_nothing_raised do
|
134
|
+
URI.parse('http://bit.ly/rvQhW').meta
|
135
|
+
end
|
136
|
+
end
|
137
|
+
end
|
138
|
+
|
139
|
+
context %q(URI.parse('garbage').meta) do
|
140
|
+
should 'raise errors' do
|
141
|
+
assert_raise NotImplementedError do
|
142
|
+
URI.parse('garbage').meta
|
143
|
+
end
|
144
|
+
end
|
145
|
+
end
|
146
|
+
|
147
|
+
context %q(URI.parse('http://bit.ly/PBzu').meta) do
|
148
|
+
setup do
|
149
|
+
@meta = URI.parse('http://bit.ly/PBzu').meta
|
150
|
+
end
|
151
|
+
|
152
|
+
should 'be a redirect' do
|
153
|
+
assert @meta.redirect?
|
154
|
+
assert_not_equal 'http://bit.ly/PBzu', @meta.last_effective_uri
|
155
|
+
end
|
156
|
+
end
|
157
|
+
|
158
|
+
context %q(URI.parse('http://taptaptap.com/+MqN').meta) do
|
159
|
+
setup do
|
160
|
+
@uri = URI.parse('http://taptaptap.com/+MqN')
|
161
|
+
end
|
162
|
+
|
163
|
+
should 'escape the + symbol' do
|
164
|
+
assert_nothing_raised do
|
165
|
+
@meta = @uri.meta
|
166
|
+
end
|
167
|
+
assert !@meta.errors?
|
168
|
+
end
|
169
|
+
end
|
170
|
+
|
171
|
+
context %Q(URI::Meta.multi(['http://www.google.com/', "http://#{URI::Meta.service_host}/"])) do
|
172
|
+
setup do
|
173
|
+
@metas = URI::Meta.multi(['http://www.google.com/', "http://#{URI::Meta.service_host}/"])
|
174
|
+
end
|
175
|
+
|
176
|
+
should 'return an array' do
|
177
|
+
assert_kind_of Array, @metas
|
178
|
+
end
|
179
|
+
|
180
|
+
should 'all be URI::Meta objects' do
|
181
|
+
assert @metas.all?{|m| m.kind_of? URI::Meta}
|
182
|
+
end
|
183
|
+
|
184
|
+
should 'contain a google meta' do
|
185
|
+
assert @metas.any?{|m| m.title == 'Google'}
|
186
|
+
end
|
187
|
+
end
|
188
|
+
|
189
|
+
context %Q(URI::Meta.multi(['http://www.google.com/', "http://#{URI::Meta.service_host}/"]) {}) do
|
190
|
+
setup do
|
191
|
+
@block_metas = []
|
192
|
+
@return_metas = URI::Meta.multi(['http://www.google.com/', "http://#{URI::Meta.service_host}/"]) do |meta|
|
193
|
+
@block_metas << meta
|
194
|
+
end
|
195
|
+
end
|
196
|
+
|
197
|
+
should 'return an array of 2' do
|
198
|
+
assert_kind_of Array, @return_metas
|
199
|
+
assert_equal 2, @return_metas.size
|
200
|
+
end
|
201
|
+
|
202
|
+
should 'all be URI::Meta objects' do
|
203
|
+
assert @return_metas.all?{|m| m.kind_of? URI::Meta}
|
204
|
+
end
|
205
|
+
|
206
|
+
should 'contain a google meta' do
|
207
|
+
assert @return_metas.any?{|m| m.title == 'Google'}
|
208
|
+
end
|
209
|
+
|
210
|
+
context 'yielded in block' do
|
211
|
+
should '2 URI::Meta objects' do
|
212
|
+
assert @block_metas.all?{|m| m.kind_of? URI::Meta}
|
213
|
+
assert_equal 2, @return_metas.size
|
214
|
+
end
|
215
|
+
|
216
|
+
should 'a google meta' do
|
217
|
+
assert @block_metas.any?{|m| m.title == 'Google'}
|
218
|
+
end
|
219
|
+
end
|
220
|
+
end
|
221
|
+
|
222
|
+
context %q(URI.parse('http://www.google.com:666/')) do
|
223
|
+
setup do
|
224
|
+
@uri = URI.parse('http://www.google.com:666/')
|
225
|
+
end
|
226
|
+
|
227
|
+
context '.meta' do
|
228
|
+
should 'not return within 5 seconds' do
|
229
|
+
begin
|
230
|
+
timeout(5) do
|
231
|
+
meta = @uri.meta
|
232
|
+
assert false
|
233
|
+
end
|
234
|
+
rescue Timeout::Error => e
|
235
|
+
assert true
|
236
|
+
end
|
237
|
+
end
|
238
|
+
end
|
239
|
+
|
240
|
+
context '.meta(:connect_timeout => 1)' do
|
241
|
+
should 'return before 5 seconds' do
|
242
|
+
begin
|
243
|
+
timeout(5) do
|
244
|
+
meta = @uri.meta(:connect_timeout => 1)
|
245
|
+
assert true
|
246
|
+
end
|
247
|
+
rescue Timeout::Error => e
|
248
|
+
assert false
|
249
|
+
end
|
250
|
+
end
|
251
|
+
|
252
|
+
should 'contain timeout errors' do
|
253
|
+
assert @uri.meta(:connect_timeout => 1).errors?
|
254
|
+
end
|
255
|
+
end
|
256
|
+
end
|
257
|
+
|
258
|
+
context %Q(URI.parse('http://#{URI::Meta.service_host}/#foo').meta) do
|
259
|
+
setup do
|
260
|
+
@uri = URI.parse("http://#{URI::Meta.service_host}/#foo")
|
261
|
+
@meta = @uri.meta
|
262
|
+
end
|
263
|
+
|
264
|
+
should 'keep # info intact' do
|
265
|
+
assert_equal @uri.to_s, @meta.uri.to_s
|
266
|
+
end
|
267
|
+
|
268
|
+
should 'not have a feed' do
|
269
|
+
assert_nil @meta.feed
|
270
|
+
end
|
271
|
+
end
|
272
|
+
|
273
|
+
context %q(URI.parse('http://www.taobao.com/').meta) do
|
274
|
+
setup do
|
275
|
+
@uri = URI.parse('http://www.taobao.com/')
|
276
|
+
end
|
277
|
+
|
278
|
+
should 'not die from UTF8 issues' do
|
279
|
+
assert_nothing_raised do
|
280
|
+
@meta = @uri.meta
|
281
|
+
end
|
282
|
+
assert !@meta.errors?
|
283
|
+
end
|
284
|
+
end
|
285
|
+
|
286
|
+
context %q(URI.parse('http://www.stumbleupon.com/s/#4sDy2p/sivers.org/hellyeah').meta) do
|
287
|
+
setup do
|
288
|
+
@uri = URI.parse('http://www.stumbleupon.com/s/#4sDy2p/sivers.org/hellyeah')
|
289
|
+
@meta = @uri.meta
|
290
|
+
end
|
291
|
+
|
292
|
+
should 'be a redirect' do
|
293
|
+
assert @meta.redirect?
|
294
|
+
end
|
295
|
+
|
296
|
+
should 'not end at stumble upon' do
|
297
|
+
assert @meta.last_effective_uri !~ /stumble/
|
298
|
+
end
|
299
|
+
end
|
300
|
+
|
301
|
+
context %q(URI.parse('http://www.youtube.com/das_captcha?next=/watch%3Fv%3DQ1rdsFuNIMc')) do
|
302
|
+
setup do
|
303
|
+
@uri = URI.parse('http://www.youtube.com/das_captcha?next=/watch%3Fv%3DQ1rdsFuNIMc')
|
304
|
+
@meta = @uri.meta
|
305
|
+
end
|
306
|
+
|
307
|
+
should 'obtain the correct title through captcha' do
|
308
|
+
assert_equal 'YouTube - Legolibrium', @meta.title
|
309
|
+
end
|
310
|
+
|
311
|
+
should 'not have changed the last_effective_uri' do
|
312
|
+
assert_equal @uri.to_s, @meta.uri.to_s
|
313
|
+
end
|
314
|
+
end
|
315
|
+
|
316
|
+
context %q(URI.parse('http://www.facebook.com/home.php')) do
|
317
|
+
setup do
|
318
|
+
@meta = URI.parse('http://www.facebook.com/home.php').meta
|
319
|
+
end
|
320
|
+
|
321
|
+
should 'correctly return 403' do
|
322
|
+
assert_equal 403, @meta.status
|
323
|
+
end
|
324
|
+
end
|
325
|
+
|
326
|
+
context %Q(URI.parse("http://#{URI::Meta.service_host}/foo%5Bbar]")) do
|
327
|
+
setup do
|
328
|
+
@uri = URI.parse("http://#{URI::Meta.service_host}/foo%5Bbar%5D")
|
329
|
+
@meta = @uri.meta
|
330
|
+
end
|
331
|
+
|
332
|
+
should 'keep encoded square brackets intact' do
|
333
|
+
assert_equal @uri.to_s, @meta.uri.to_s
|
334
|
+
end
|
335
|
+
end
|
336
|
+
|
337
|
+
context %q(URI.parse('http://img11.yfrog.com/i/vaix.jpg/').meta) do
|
338
|
+
setup do
|
339
|
+
@meta = URI.parse('http://img11.yfrog.com/i/vaix.jpg/').meta
|
340
|
+
end
|
341
|
+
|
342
|
+
should 'have a content type' do
|
343
|
+
assert_not_nil @meta.content_type
|
344
|
+
end
|
345
|
+
|
346
|
+
should 'have a title' do
|
347
|
+
assert_not_nil @meta.title
|
348
|
+
assert_not_equal '', @meta.title
|
349
|
+
end
|
350
|
+
end
|
351
|
+
|
352
|
+
context %q(URI.parse('http://www.facebook.com/pages/Bronx-NY/Career-and-Transfer-Services-at-BCC/113334355068').meta) do
|
353
|
+
setup do
|
354
|
+
@meta = URI.parse('http://www.facebook.com/pages/Bronx-NY/Career-and-Transfer-Services-at-BCC/113334355068').meta
|
355
|
+
end
|
356
|
+
|
357
|
+
should 'have a title' do
|
358
|
+
assert_not_nil @meta.title
|
359
|
+
assert_not_equal '', @meta.title
|
360
|
+
end
|
361
|
+
end
|
362
|
+
|
363
|
+
context %Q(URI.parse("http://#{URI::Meta.service_host}/meta_redirect_test").meta) do
|
364
|
+
setup do
|
365
|
+
@uri = URI.parse("http://#{URI::Meta.service_host}/meta_redirect_test")
|
366
|
+
@meta = @uri.meta
|
367
|
+
end
|
368
|
+
|
369
|
+
should 'be a redirect' do
|
370
|
+
assert @meta.redirect?
|
371
|
+
end
|
372
|
+
|
373
|
+
should 'keep the original URL intact' do
|
374
|
+
assert_equal @uri.to_s, @meta.uri.to_s
|
375
|
+
end
|
376
|
+
end
|
377
|
+
|
378
|
+
context %Q(URI.parse('http://slashdot.org/').meta) do
|
379
|
+
setup do
|
380
|
+
@meta = URI.parse('http://slashdot.org/').meta
|
381
|
+
end
|
382
|
+
|
383
|
+
should 'have a feed' do
|
384
|
+
assert_equal 'http://rss.slashdot.org/Slashdot/slashdot', @meta.feed.to_s
|
385
|
+
end
|
386
|
+
end
|
387
|
+
|
388
|
+
context %Q(URI.parse('http://rss.slashdot.org/Slashdot/slashdot').meta) do
|
389
|
+
setup do
|
390
|
+
@meta = URI.parse('http://rss.slashdot.org/Slashdot/slashdot').meta
|
391
|
+
end
|
392
|
+
|
393
|
+
should 'have a feed equal to itself'
|
394
|
+
end
|
395
|
+
end
|
data/uri-meta.gemspec
ADDED
@@ -0,0 +1,60 @@
|
|
1
|
+
# Generated by jeweler
|
2
|
+
# DO NOT EDIT THIS FILE
|
3
|
+
# Instead, edit Jeweler::Tasks in Rakefile, and run `rake gemspec`
|
4
|
+
# -*- encoding: utf-8 -*-
|
5
|
+
|
6
|
+
Gem::Specification.new do |s|
|
7
|
+
s.name = %q{uri-meta}
|
8
|
+
s.version = "0.9.1"
|
9
|
+
|
10
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
|
+
s.authors = ["Stateless Systems"]
|
12
|
+
s.date = %q{2009-10-13}
|
13
|
+
s.description = %q{Retrieves meta information for a URI from the meturi.com service.}
|
14
|
+
s.email = %q{production@statelesssystems.com}
|
15
|
+
s.extra_rdoc_files = [
|
16
|
+
"LICENSE",
|
17
|
+
"README.markdown"
|
18
|
+
]
|
19
|
+
s.files = [
|
20
|
+
".document",
|
21
|
+
".gitignore",
|
22
|
+
"LICENSE",
|
23
|
+
"README.markdown",
|
24
|
+
"Rakefile",
|
25
|
+
"VERSION",
|
26
|
+
"benchmark.rb",
|
27
|
+
"lib/uri/meta.rb",
|
28
|
+
"test/test_helper.rb",
|
29
|
+
"test/uri-meta_test.rb",
|
30
|
+
"uri-meta.gemspec"
|
31
|
+
]
|
32
|
+
s.homepage = %q{http://github.com/stateless-systems/uri-meta}
|
33
|
+
s.rdoc_options = ["--charset=UTF-8"]
|
34
|
+
s.require_paths = ["lib"]
|
35
|
+
s.rubygems_version = %q{1.3.5}
|
36
|
+
s.summary = %q{Meta information for a URI}
|
37
|
+
s.test_files = [
|
38
|
+
"test/test_helper.rb",
|
39
|
+
"test/uri-meta_test.rb"
|
40
|
+
]
|
41
|
+
|
42
|
+
if s.respond_to? :specification_version then
|
43
|
+
current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
|
44
|
+
s.specification_version = 3
|
45
|
+
|
46
|
+
if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
|
47
|
+
s.add_runtime_dependency(%q<taf2-curb>, [">= 0"])
|
48
|
+
s.add_runtime_dependency(%q<wycats-moneta>, [">= 0"])
|
49
|
+
s.add_development_dependency(%q<thoughtbot-shoulda>, [">= 0"])
|
50
|
+
else
|
51
|
+
s.add_dependency(%q<taf2-curb>, [">= 0"])
|
52
|
+
s.add_dependency(%q<wycats-moneta>, [">= 0"])
|
53
|
+
s.add_dependency(%q<thoughtbot-shoulda>, [">= 0"])
|
54
|
+
end
|
55
|
+
else
|
56
|
+
s.add_dependency(%q<taf2-curb>, [">= 0"])
|
57
|
+
s.add_dependency(%q<wycats-moneta>, [">= 0"])
|
58
|
+
s.add_dependency(%q<thoughtbot-shoulda>, [">= 0"])
|
59
|
+
end
|
60
|
+
end
|
metadata
ADDED
@@ -0,0 +1,96 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: uri-meta
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.9.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Stateless Systems
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2009-10-13 00:00:00 +11:00
|
13
|
+
default_executable:
|
14
|
+
dependencies:
|
15
|
+
- !ruby/object:Gem::Dependency
|
16
|
+
name: taf2-curb
|
17
|
+
type: :runtime
|
18
|
+
version_requirement:
|
19
|
+
version_requirements: !ruby/object:Gem::Requirement
|
20
|
+
requirements:
|
21
|
+
- - ">="
|
22
|
+
- !ruby/object:Gem::Version
|
23
|
+
version: "0"
|
24
|
+
version:
|
25
|
+
- !ruby/object:Gem::Dependency
|
26
|
+
name: wycats-moneta
|
27
|
+
type: :runtime
|
28
|
+
version_requirement:
|
29
|
+
version_requirements: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: "0"
|
34
|
+
version:
|
35
|
+
- !ruby/object:Gem::Dependency
|
36
|
+
name: thoughtbot-shoulda
|
37
|
+
type: :development
|
38
|
+
version_requirement:
|
39
|
+
version_requirements: !ruby/object:Gem::Requirement
|
40
|
+
requirements:
|
41
|
+
- - ">="
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
version: "0"
|
44
|
+
version:
|
45
|
+
description: Retrieves meta information for a URI from the meturi.com service.
|
46
|
+
email: production@statelesssystems.com
|
47
|
+
executables: []
|
48
|
+
|
49
|
+
extensions: []
|
50
|
+
|
51
|
+
extra_rdoc_files:
|
52
|
+
- LICENSE
|
53
|
+
- README.markdown
|
54
|
+
files:
|
55
|
+
- .document
|
56
|
+
- .gitignore
|
57
|
+
- LICENSE
|
58
|
+
- README.markdown
|
59
|
+
- Rakefile
|
60
|
+
- VERSION
|
61
|
+
- benchmark.rb
|
62
|
+
- lib/uri/meta.rb
|
63
|
+
- test/test_helper.rb
|
64
|
+
- test/uri-meta_test.rb
|
65
|
+
- uri-meta.gemspec
|
66
|
+
has_rdoc: true
|
67
|
+
homepage: http://github.com/stateless-systems/uri-meta
|
68
|
+
licenses: []
|
69
|
+
|
70
|
+
post_install_message:
|
71
|
+
rdoc_options:
|
72
|
+
- --charset=UTF-8
|
73
|
+
require_paths:
|
74
|
+
- lib
|
75
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
76
|
+
requirements:
|
77
|
+
- - ">="
|
78
|
+
- !ruby/object:Gem::Version
|
79
|
+
version: "0"
|
80
|
+
version:
|
81
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
82
|
+
requirements:
|
83
|
+
- - ">="
|
84
|
+
- !ruby/object:Gem::Version
|
85
|
+
version: "0"
|
86
|
+
version:
|
87
|
+
requirements: []
|
88
|
+
|
89
|
+
rubyforge_project:
|
90
|
+
rubygems_version: 1.3.5
|
91
|
+
signing_key:
|
92
|
+
specification_version: 3
|
93
|
+
summary: Meta information for a URI
|
94
|
+
test_files:
|
95
|
+
- test/test_helper.rb
|
96
|
+
- test/uri-meta_test.rb
|