uri-meta 0.9.1

Sign up to get free protection for your applications and to get access to all the features.
data/.document ADDED
@@ -0,0 +1,5 @@
1
+ README.rdoc
2
+ lib/**/*.rb
3
+ bin/*
4
+ features/**/*.feature
5
+ LICENSE
data/.gitignore ADDED
@@ -0,0 +1,5 @@
1
+ *.sw?
2
+ .DS_Store
3
+ coverage
4
+ rdoc
5
+ pkg
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2009 Stateless Systems
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.markdown ADDED
@@ -0,0 +1,68 @@
1
+ # uri-meta: Get meta information about your URI
2
+
3
+ uri-meta is a ruby interface to the [metauri.com](http://www.metauri.com/) service.
4
+
5
+ [metauri.com](http://www.metauri.com/) provides two things:
6
+
7
+ * follows your URI to the end point where there is actual content instead of redirects
8
+ * obtains meta information (title etc) about that end URI
9
+
10
+ ## Examples
11
+
12
+ require 'uri'
13
+ require 'uri/meta'
14
+ uri = URI.parse('http://www.google.com/')
15
+ puts uri.meta.title
16
+ # Google
17
+ puts uri.meta.status
18
+ # 200
19
+ puts uri.meta(:headers => 1).headers
20
+ # HTTP/1.1 .... etc
21
+
22
+ uri = URI.parse('http://bit.ly/PBzu')
23
+ puts uri.meta.content_type
24
+ # image/gif
25
+
26
+ meta = URI.parse('http://bit.ly/PBzu').meta(:max_redirects = 2)
27
+ puts(meta.last_effective_uri) unless meta.errors?
28
+ # http://clipart.tiu.edu/offcampus/animated/bd13644_.gif
29
+
30
+
31
+ URI::Meta.multi(['http://www.google.com/', 'http://bit.ly/PBzu'], :max_redirects => 10) do |meta|
32
+ # Don't rely on these being processed in the same order they were listed!
33
+ if meta.redirect?
34
+ puts "## #{meta.uri} -> #{meta.last_effective_uri}"
35
+ else
36
+ puts "## #{meta.uri} did not redirect and it's title was #{meta.title}"
37
+ end
38
+ end
39
+
40
+ ## Caching
41
+
42
+ uri-meta uses in-memory caching via [wycats-moneta](http://github.com/wycats/moneta), so it
43
+ should be relatively straight forward for you to use whatever other caching mechanism you want,
44
+ provided it's supported by moneta.
45
+
46
+ require 'uri'
47
+ require 'uri/meta'
48
+
49
+ # Memcached
50
+ require 'moneta/memcache'
51
+ URI::Meta::Cache.cache = Moneta::Memcache.new(:server => 'localhost', :namespace => 'uri_meta')
52
+ URI::Meta::Cache.expires_in = (60 * 60 * 24 * 7) # 1 week
53
+
54
+ # No caching (for testing I guess)
55
+ URI::Meta::Cache.cache = nil
56
+
57
+ ## Known Issues
58
+
59
+ * Redirects that aren't handled by the webserver (302), such as javascript or
60
+ <meta> tag redirects are not supported yet.
61
+ * Framed redirects, such as stumbleupon are not resolved yet, as these are
62
+ techincally full pages it could be difficult to know that it's not really
63
+ then end URI.
64
+ * No RDOC as yet.
65
+
66
+ # Copyright
67
+
68
+ Copyright (c) 2009 Stateless Systems. See LICENSE for details.
data/Rakefile ADDED
@@ -0,0 +1,57 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+
4
+ begin
5
+ require 'jeweler'
6
+ Jeweler::Tasks.new do |gem|
7
+ gem.name = "uri-meta"
8
+ gem.summary = %Q{Meta information for a URI}
9
+ gem.description = %Q{Retrieves meta information for a URI from the meturi.com service.}
10
+ gem.email = "production@statelesssystems.com"
11
+ gem.homepage = "http://github.com/stateless-systems/uri-meta"
12
+ gem.authors = ["Stateless Systems"]
13
+ gem.add_dependency "taf2-curb"
14
+ gem.add_dependency "wycats-moneta"
15
+ gem.add_development_dependency "thoughtbot-shoulda"
16
+ end
17
+ rescue LoadError
18
+ puts "Jeweler (or a dependency) not available. Install it with: sudo gem install jeweler"
19
+ end
20
+
21
+ require 'rake/testtask'
22
+ Rake::TestTask.new(:test) do |test|
23
+ test.libs << 'lib' << 'test'
24
+ test.pattern = 'test/**/*_test.rb'
25
+ test.verbose = true
26
+ end
27
+
28
+ begin
29
+ require 'rcov/rcovtask'
30
+ Rcov::RcovTask.new do |test|
31
+ test.libs << 'test'
32
+ test.pattern = 'test/**/*_test.rb'
33
+ test.verbose = true
34
+ end
35
+ rescue LoadError
36
+ task :rcov do
37
+ abort "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov"
38
+ end
39
+ end
40
+
41
+ task :test => :check_dependencies
42
+
43
+ task :default => :test
44
+
45
+ require 'rake/rdoctask'
46
+ Rake::RDocTask.new do |rdoc|
47
+ if File.exist?('VERSION')
48
+ version = File.read('VERSION')
49
+ else
50
+ version = ""
51
+ end
52
+
53
+ rdoc.rdoc_dir = 'rdoc'
54
+ rdoc.title = "uri-meta #{version}"
55
+ rdoc.rdoc_files.include('README*')
56
+ rdoc.rdoc_files.include('lib/**/*.rb')
57
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.9.1
data/benchmark.rb ADDED
@@ -0,0 +1,36 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), 'lib'))
4
+
5
+ require 'rubygems'
6
+ require 'benchmark'
7
+ require 'curb'
8
+ require 'uri/meta'
9
+
10
+ URI::Meta::Cache.cache = nil
11
+
12
+ cached_uris = uncached_uris = []
13
+ uncached_uris = []
14
+
15
+ delete = Curl::Multi.new
16
+
17
+ (1..50).each do |x|
18
+ cached_uris << URI.parse('http://tigris.id.au/')
19
+ uncached_uris << URI.parse("http://tigris.id.au/#{x}")
20
+ c = Curl::Easy.new("http://www.metauri.com/delete?uri=#{uncached_uris.last.to_s}")
21
+ c.on_complete{|curl| print '.'}
22
+ delete.add(c)
23
+ end
24
+
25
+ print ' performing cache clear '
26
+ clear = Benchmark.realtime{ delete.perform }
27
+ puts " #{clear}"
28
+
29
+ ## TODO: figure out why uncached is faster when X > pool size, but way less when X < pool size
30
+ print ' calculating cached time '
31
+ cached = Benchmark.realtime{ URI::Meta.multi(cached_uris){|m| print '.'}}
32
+ puts " #{cached}"
33
+
34
+ print 'calculating uncached time '
35
+ uncached = Benchmark.realtime{ URI::Meta.multi(uncached_uris){|m| print '.'}}
36
+ puts " #{uncached}"
data/lib/uri/meta.rb ADDED
@@ -0,0 +1,136 @@
1
+ require 'uri'
2
+ require 'curb'
3
+ require 'yaml'
4
+ require 'moneta'
5
+ require 'moneta/memory'
6
+ require 'digest/sha1'
7
+
8
+ module URI
9
+ class Meta
10
+ attr_accessor :headers, :uri, :title, :feed, :last_modified, :content_type, :charset, :last_effective_uri, :status, :errors
11
+ @@service_host = 'www.metauri.com'
12
+ @@user_agent = 'uri-meta rubygem'
13
+
14
+ UNSAFE = Regexp.new("[#{URI::REGEXP::PATTERN::RESERVED} #%]", false, 'N').freeze
15
+
16
+ def self.service_host
17
+ @@service_host
18
+ end
19
+
20
+ def self.service_host=(service_host)
21
+ @@service_host = service_host
22
+ end
23
+
24
+ def self.user_agent
25
+ @@user_agent
26
+ end
27
+
28
+ def self.user_agent=(user_agent)
29
+ @@user_agent = user_agent
30
+ end
31
+
32
+ def initialize(options = {})
33
+ self.errors = []
34
+ options.each do |k, v|
35
+ case k
36
+ when :last_effective_uri, :uri, :feed then send("#{k}=", v.to_s == '' ? nil : (URI.parse(v.to_s) rescue nil))
37
+ when :error, :errors then self.errors.push(*[v].flatten)
38
+ else send("#{k}=", v) if respond_to?("#{k}=")
39
+ end
40
+ end
41
+ end
42
+
43
+ def redirect?
44
+ uri != last_effective_uri
45
+ end
46
+
47
+ def errors?
48
+ !errors.empty?
49
+ end
50
+
51
+ def self.get(uri, options = {})
52
+ uri = URI.parse(uri.to_s) rescue nil
53
+ raise ArgumentError.new("Can't coerce #{uri.class} to URI") unless uri.is_a?(URI)
54
+ raise NotImplementedError.new('Only HTTP is supported so far.') unless uri.is_a?(URI::HTTP)
55
+ URI::Meta.multi([uri], options).first
56
+ end
57
+
58
+ def self.cache_key(uri, options = {})
59
+ # Make sure the key includes the options used to retrieve the meta
60
+ uid = uri.to_s + options.to_a.sort{|a,b| a[0].to_s <=> b[0].to_s}.to_s
61
+ Digest::SHA1.hexdigest(uid)
62
+ end
63
+
64
+ #--
65
+ # TODO: Chunk uri's through a pre-warmed pool of curl easy instances?
66
+ def self.multi(uris, options = {}, &block)
67
+ metas = []
68
+ multi = Curl::Multi.new
69
+ uris.each do |uri|
70
+ if meta = URI::Meta::Cache.get(cache_key(uri, options))
71
+ metas << meta
72
+ URI::Meta::Cache.store(cache_key(uri, options), meta)
73
+ block.call(meta) if block
74
+ else
75
+ easy = curl(uri, options)
76
+ easy.on_complete do |curl|
77
+ args = YAML.load(curl.body_str) rescue {:errors => "YAML Error, #{$!.message}"}
78
+ args = {:errors => "YAML Error, server returned unknown format."} unless args.is_a?(Hash)
79
+
80
+ metas << meta = URI::Meta.new({:uri => uri}.update(args))
81
+ URI::Meta::Cache.store(cache_key(uri, options), meta)
82
+ block.call(meta) if block
83
+ end
84
+ multi.add(easy)
85
+ end
86
+ end
87
+ multi.perform
88
+ metas
89
+ end
90
+
91
+ protected
92
+ #--
93
+ # Required because the URI option must be verbatim. If '+' and others are not escaped Merb, Rack or something
94
+ # helpfully converts them to spaces on metauri.com
95
+ def self.curl(uri, options = {})
96
+ options = options.update(:uri => uri)
97
+ options = options.map{|k, v| "#{k}=" + URI.escape(v.to_s, UNSAFE)}.join('&')
98
+ c = Curl::Easy.new("http://#{service_host}/show.yaml?#{options}")
99
+ c.headers['User-Agent'] = user_agent
100
+ c
101
+ end
102
+
103
+ module Mixin
104
+ def meta(options = {})
105
+ @meta ||= URI::Meta.get(self, options)
106
+ end
107
+ end
108
+
109
+ class Cache
110
+ @@cache = Moneta::Memory.new
111
+ @@expires_in = 86_400 # 24 hours
112
+
113
+ class << self
114
+ def store(uid, obj)
115
+ @@cache.store(uid, obj, :expires_in => @@expires_in) unless @@cache.nil?
116
+ end
117
+
118
+ def get(id)
119
+ @@cache[id] unless @@cache.nil?
120
+ end
121
+
122
+ def cache=(cache)
123
+ warn 'Turning off caching is poor form, for longer processes consider using moneta/memcached' if cache.nil?
124
+ @@cache = cache
125
+ end
126
+
127
+ def expires_in=(seconds)
128
+ @@expires_in = seconds
129
+ end
130
+ end
131
+ end
132
+ end
133
+
134
+ URI::Generic.send(:include, URI::Meta::Mixin)
135
+ URI::HTTP.send(:include, URI::Meta::Mixin)
136
+ end
@@ -0,0 +1,10 @@
1
+ require 'rubygems'
2
+ require 'test/unit'
3
+ require 'shoulda'
4
+
5
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
6
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
7
+ require 'uri/meta'
8
+
9
+ class Test::Unit::TestCase
10
+ end
@@ -0,0 +1,395 @@
1
+ require File.join(File.dirname(__FILE__), 'test_helper')
2
+ require 'uri'
3
+ require 'curb'
4
+ require 'timeout'
5
+
6
+ class UriMetaTest < Test::Unit::TestCase
7
+ # First things first. Purge all test URIs on the metauri service so we don't
8
+ # get issues from old cached URIs.
9
+ [
10
+ 'garbage',
11
+ 'http://bit.ly/PBzu',
12
+ 'http://bit.ly/rvQhW',
13
+ 'http://img11.yfrog.com/i/vaix.jpg/',
14
+ 'http://rss.slashdot.org/Slashdot/slashdot',
15
+ 'http://slashdot.org/',
16
+ 'http://taptaptap.com/+MqN',
17
+ "http://#{URI::Meta.service_host}/",
18
+ "http://#{URI::Meta.service_host}/double_redirect_test",
19
+ "http://#{URI::Meta.service_host}/#foo",
20
+ "http://#{URI::Meta.service_host}/foo%5Bbar%5D",
21
+ "http://#{URI::Meta.service_host}/meta_redirect_test",
22
+ "http://#{URI::Meta.service_host}/redirect_test",
23
+ 'http://www.facebook.com/home.php',
24
+ 'http://www.facebook.com/pages/Bronx-NY/Career-and-Transfer-Services-at-BCC/113334355068',
25
+ 'http://www.google.com:666/',
26
+ 'http://www.stumbleupon.com/s/#4sDy2p/sivers.org/hellyeah',
27
+ 'http://www.taobao.com/',
28
+ 'http://www.youtube.com/das_captcha?next=/watch%3Fv%3DQ1rdsFuNIMc',
29
+ ].each{|uri| Curl::Easy.http_get("http://#{URI::Meta.service_host}/delete?uri=#{URI.escape(uri.to_s, URI::Meta::UNSAFE)}") }
30
+
31
+ context %Q(URI.parse('http://#{URI::Meta.service_host}/')) do
32
+ setup do
33
+ @uri = URI.parse("http://#{URI::Meta.service_host}/")
34
+ end
35
+
36
+ should 'respond_to :meta' do
37
+ assert_respond_to @uri, :meta
38
+ end
39
+
40
+ context '.meta' do
41
+ setup do
42
+ @meta = @uri.meta
43
+ end
44
+
45
+ should 'be a URI::Meta object' do
46
+ assert_kind_of URI::Meta, @meta
47
+ end
48
+
49
+ context '.uri' do
50
+ should 'be a URI object' do
51
+ assert_kind_of URI, @meta.uri
52
+ end
53
+
54
+ should 'be the same as the original URI' do
55
+ assert_equal @uri.to_s, @meta.uri.to_s
56
+ end
57
+ end
58
+
59
+ context '.last_effective_uri' do
60
+ should 'be a URI object' do
61
+ assert_kind_of URI, @meta.last_effective_uri
62
+ end
63
+
64
+ should 'not have been a redirect' do
65
+ assert_equal @uri.to_s, @meta.last_effective_uri.to_s
66
+ assert !@meta.redirect?
67
+ end
68
+ end
69
+
70
+ context '.title' do
71
+ should 'be Meta URI' do
72
+ assert_equal 'Meta URI', @meta.title
73
+ end
74
+ end
75
+
76
+ context '.status' do
77
+ should 'be 200' do
78
+ assert_equal 200, @meta.status
79
+ end
80
+ end
81
+
82
+ context '.headers' do
83
+ should 'be nil' do
84
+ assert_nil @meta.headers
85
+ end
86
+ end
87
+ end
88
+
89
+ context '.meta(:headers => 1)' do
90
+ setup do
91
+ @meta = URI.parse("http://#{URI::Meta.service_host}/").meta(:headers => 1)
92
+ end
93
+
94
+ context '.headers' do
95
+ should 'be populated' do
96
+ assert_not_nil @meta.headers
97
+ end
98
+ end
99
+ end
100
+ end
101
+
102
+ context %Q(URI.parse('http://#{URI::Meta.service_host}/redirect_test')) do
103
+ setup do
104
+ @uri = URI.parse("http://#{URI::Meta.service_host}/redirect_test")
105
+ end
106
+
107
+ context '.meta' do
108
+ context '.last_effective_uri' do
109
+ should 'be a redirect' do
110
+ assert_not_equal @uri.to_s, @uri.meta.last_effective_uri.to_s
111
+ assert @uri.meta.redirect?
112
+ end
113
+ end
114
+ end
115
+ end
116
+
117
+ context %Q(URI.parse('http://#{URI::Meta.service_host}/double_redirect_test')) do
118
+ setup do
119
+ @uri = URI.parse("http://#{URI::Meta.service_host}/double_redirect_test")
120
+ end
121
+
122
+ context '.meta(:max_redirects => 1)' do
123
+ should 'error on too many redirects' do
124
+ meta = @uri.meta(:max_redirects => 1)
125
+ assert meta.errors?
126
+ assert_kind_of String, meta.errors.first
127
+ end
128
+ end
129
+ end
130
+
131
+ context %q{URI.parse('http://bit.ly/rvQhW').meta} do
132
+ should 'raise nothing' do
133
+ assert_nothing_raised do
134
+ URI.parse('http://bit.ly/rvQhW').meta
135
+ end
136
+ end
137
+ end
138
+
139
+ context %q(URI.parse('garbage').meta) do
140
+ should 'raise errors' do
141
+ assert_raise NotImplementedError do
142
+ URI.parse('garbage').meta
143
+ end
144
+ end
145
+ end
146
+
147
+ context %q(URI.parse('http://bit.ly/PBzu').meta) do
148
+ setup do
149
+ @meta = URI.parse('http://bit.ly/PBzu').meta
150
+ end
151
+
152
+ should 'be a redirect' do
153
+ assert @meta.redirect?
154
+ assert_not_equal 'http://bit.ly/PBzu', @meta.last_effective_uri
155
+ end
156
+ end
157
+
158
+ context %q(URI.parse('http://taptaptap.com/+MqN').meta) do
159
+ setup do
160
+ @uri = URI.parse('http://taptaptap.com/+MqN')
161
+ end
162
+
163
+ should 'escape the + symbol' do
164
+ assert_nothing_raised do
165
+ @meta = @uri.meta
166
+ end
167
+ assert !@meta.errors?
168
+ end
169
+ end
170
+
171
+ context %Q(URI::Meta.multi(['http://www.google.com/', "http://#{URI::Meta.service_host}/"])) do
172
+ setup do
173
+ @metas = URI::Meta.multi(['http://www.google.com/', "http://#{URI::Meta.service_host}/"])
174
+ end
175
+
176
+ should 'return an array' do
177
+ assert_kind_of Array, @metas
178
+ end
179
+
180
+ should 'all be URI::Meta objects' do
181
+ assert @metas.all?{|m| m.kind_of? URI::Meta}
182
+ end
183
+
184
+ should 'contain a google meta' do
185
+ assert @metas.any?{|m| m.title == 'Google'}
186
+ end
187
+ end
188
+
189
+ context %Q(URI::Meta.multi(['http://www.google.com/', "http://#{URI::Meta.service_host}/"]) {}) do
190
+ setup do
191
+ @block_metas = []
192
+ @return_metas = URI::Meta.multi(['http://www.google.com/', "http://#{URI::Meta.service_host}/"]) do |meta|
193
+ @block_metas << meta
194
+ end
195
+ end
196
+
197
+ should 'return an array of 2' do
198
+ assert_kind_of Array, @return_metas
199
+ assert_equal 2, @return_metas.size
200
+ end
201
+
202
+ should 'all be URI::Meta objects' do
203
+ assert @return_metas.all?{|m| m.kind_of? URI::Meta}
204
+ end
205
+
206
+ should 'contain a google meta' do
207
+ assert @return_metas.any?{|m| m.title == 'Google'}
208
+ end
209
+
210
+ context 'yielded in block' do
211
+ should '2 URI::Meta objects' do
212
+ assert @block_metas.all?{|m| m.kind_of? URI::Meta}
213
+ assert_equal 2, @return_metas.size
214
+ end
215
+
216
+ should 'a google meta' do
217
+ assert @block_metas.any?{|m| m.title == 'Google'}
218
+ end
219
+ end
220
+ end
221
+
222
+ context %q(URI.parse('http://www.google.com:666/')) do
223
+ setup do
224
+ @uri = URI.parse('http://www.google.com:666/')
225
+ end
226
+
227
+ context '.meta' do
228
+ should 'not return within 5 seconds' do
229
+ begin
230
+ timeout(5) do
231
+ meta = @uri.meta
232
+ assert false
233
+ end
234
+ rescue Timeout::Error => e
235
+ assert true
236
+ end
237
+ end
238
+ end
239
+
240
+ context '.meta(:connect_timeout => 1)' do
241
+ should 'return before 5 seconds' do
242
+ begin
243
+ timeout(5) do
244
+ meta = @uri.meta(:connect_timeout => 1)
245
+ assert true
246
+ end
247
+ rescue Timeout::Error => e
248
+ assert false
249
+ end
250
+ end
251
+
252
+ should 'contain timeout errors' do
253
+ assert @uri.meta(:connect_timeout => 1).errors?
254
+ end
255
+ end
256
+ end
257
+
258
+ context %Q(URI.parse('http://#{URI::Meta.service_host}/#foo').meta) do
259
+ setup do
260
+ @uri = URI.parse("http://#{URI::Meta.service_host}/#foo")
261
+ @meta = @uri.meta
262
+ end
263
+
264
+ should 'keep # info intact' do
265
+ assert_equal @uri.to_s, @meta.uri.to_s
266
+ end
267
+
268
+ should 'not have a feed' do
269
+ assert_nil @meta.feed
270
+ end
271
+ end
272
+
273
+ context %q(URI.parse('http://www.taobao.com/').meta) do
274
+ setup do
275
+ @uri = URI.parse('http://www.taobao.com/')
276
+ end
277
+
278
+ should 'not die from UTF8 issues' do
279
+ assert_nothing_raised do
280
+ @meta = @uri.meta
281
+ end
282
+ assert !@meta.errors?
283
+ end
284
+ end
285
+
286
+ context %q(URI.parse('http://www.stumbleupon.com/s/#4sDy2p/sivers.org/hellyeah').meta) do
287
+ setup do
288
+ @uri = URI.parse('http://www.stumbleupon.com/s/#4sDy2p/sivers.org/hellyeah')
289
+ @meta = @uri.meta
290
+ end
291
+
292
+ should 'be a redirect' do
293
+ assert @meta.redirect?
294
+ end
295
+
296
+ should 'not end at stumble upon' do
297
+ assert @meta.last_effective_uri !~ /stumble/
298
+ end
299
+ end
300
+
301
+ context %q(URI.parse('http://www.youtube.com/das_captcha?next=/watch%3Fv%3DQ1rdsFuNIMc')) do
302
+ setup do
303
+ @uri = URI.parse('http://www.youtube.com/das_captcha?next=/watch%3Fv%3DQ1rdsFuNIMc')
304
+ @meta = @uri.meta
305
+ end
306
+
307
+ should 'obtain the correct title through captcha' do
308
+ assert_equal 'YouTube - Legolibrium', @meta.title
309
+ end
310
+
311
+ should 'not have changed the last_effective_uri' do
312
+ assert_equal @uri.to_s, @meta.uri.to_s
313
+ end
314
+ end
315
+
316
+ context %q(URI.parse('http://www.facebook.com/home.php')) do
317
+ setup do
318
+ @meta = URI.parse('http://www.facebook.com/home.php').meta
319
+ end
320
+
321
+ should 'correctly return 403' do
322
+ assert_equal 403, @meta.status
323
+ end
324
+ end
325
+
326
+ context %Q(URI.parse("http://#{URI::Meta.service_host}/foo%5Bbar]")) do
327
+ setup do
328
+ @uri = URI.parse("http://#{URI::Meta.service_host}/foo%5Bbar%5D")
329
+ @meta = @uri.meta
330
+ end
331
+
332
+ should 'keep encoded square brackets intact' do
333
+ assert_equal @uri.to_s, @meta.uri.to_s
334
+ end
335
+ end
336
+
337
+ context %q(URI.parse('http://img11.yfrog.com/i/vaix.jpg/').meta) do
338
+ setup do
339
+ @meta = URI.parse('http://img11.yfrog.com/i/vaix.jpg/').meta
340
+ end
341
+
342
+ should 'have a content type' do
343
+ assert_not_nil @meta.content_type
344
+ end
345
+
346
+ should 'have a title' do
347
+ assert_not_nil @meta.title
348
+ assert_not_equal '', @meta.title
349
+ end
350
+ end
351
+
352
+ context %q(URI.parse('http://www.facebook.com/pages/Bronx-NY/Career-and-Transfer-Services-at-BCC/113334355068').meta) do
353
+ setup do
354
+ @meta = URI.parse('http://www.facebook.com/pages/Bronx-NY/Career-and-Transfer-Services-at-BCC/113334355068').meta
355
+ end
356
+
357
+ should 'have a title' do
358
+ assert_not_nil @meta.title
359
+ assert_not_equal '', @meta.title
360
+ end
361
+ end
362
+
363
+ context %Q(URI.parse("http://#{URI::Meta.service_host}/meta_redirect_test").meta) do
364
+ setup do
365
+ @uri = URI.parse("http://#{URI::Meta.service_host}/meta_redirect_test")
366
+ @meta = @uri.meta
367
+ end
368
+
369
+ should 'be a redirect' do
370
+ assert @meta.redirect?
371
+ end
372
+
373
+ should 'keep the original URL intact' do
374
+ assert_equal @uri.to_s, @meta.uri.to_s
375
+ end
376
+ end
377
+
378
+ context %Q(URI.parse('http://slashdot.org/').meta) do
379
+ setup do
380
+ @meta = URI.parse('http://slashdot.org/').meta
381
+ end
382
+
383
+ should 'have a feed' do
384
+ assert_equal 'http://rss.slashdot.org/Slashdot/slashdot', @meta.feed.to_s
385
+ end
386
+ end
387
+
388
+ context %Q(URI.parse('http://rss.slashdot.org/Slashdot/slashdot').meta) do
389
+ setup do
390
+ @meta = URI.parse('http://rss.slashdot.org/Slashdot/slashdot').meta
391
+ end
392
+
393
+ should 'have a feed equal to itself'
394
+ end
395
+ end
data/uri-meta.gemspec ADDED
@@ -0,0 +1,60 @@
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run `rake gemspec`
4
+ # -*- encoding: utf-8 -*-
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = %q{uri-meta}
8
+ s.version = "0.9.1"
9
+
10
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
+ s.authors = ["Stateless Systems"]
12
+ s.date = %q{2009-10-13}
13
+ s.description = %q{Retrieves meta information for a URI from the meturi.com service.}
14
+ s.email = %q{production@statelesssystems.com}
15
+ s.extra_rdoc_files = [
16
+ "LICENSE",
17
+ "README.markdown"
18
+ ]
19
+ s.files = [
20
+ ".document",
21
+ ".gitignore",
22
+ "LICENSE",
23
+ "README.markdown",
24
+ "Rakefile",
25
+ "VERSION",
26
+ "benchmark.rb",
27
+ "lib/uri/meta.rb",
28
+ "test/test_helper.rb",
29
+ "test/uri-meta_test.rb",
30
+ "uri-meta.gemspec"
31
+ ]
32
+ s.homepage = %q{http://github.com/stateless-systems/uri-meta}
33
+ s.rdoc_options = ["--charset=UTF-8"]
34
+ s.require_paths = ["lib"]
35
+ s.rubygems_version = %q{1.3.5}
36
+ s.summary = %q{Meta information for a URI}
37
+ s.test_files = [
38
+ "test/test_helper.rb",
39
+ "test/uri-meta_test.rb"
40
+ ]
41
+
42
+ if s.respond_to? :specification_version then
43
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
44
+ s.specification_version = 3
45
+
46
+ if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
47
+ s.add_runtime_dependency(%q<taf2-curb>, [">= 0"])
48
+ s.add_runtime_dependency(%q<wycats-moneta>, [">= 0"])
49
+ s.add_development_dependency(%q<thoughtbot-shoulda>, [">= 0"])
50
+ else
51
+ s.add_dependency(%q<taf2-curb>, [">= 0"])
52
+ s.add_dependency(%q<wycats-moneta>, [">= 0"])
53
+ s.add_dependency(%q<thoughtbot-shoulda>, [">= 0"])
54
+ end
55
+ else
56
+ s.add_dependency(%q<taf2-curb>, [">= 0"])
57
+ s.add_dependency(%q<wycats-moneta>, [">= 0"])
58
+ s.add_dependency(%q<thoughtbot-shoulda>, [">= 0"])
59
+ end
60
+ end
metadata ADDED
@@ -0,0 +1,96 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: uri-meta
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.9.1
5
+ platform: ruby
6
+ authors:
7
+ - Stateless Systems
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2009-10-13 00:00:00 +11:00
13
+ default_executable:
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: taf2-curb
17
+ type: :runtime
18
+ version_requirement:
19
+ version_requirements: !ruby/object:Gem::Requirement
20
+ requirements:
21
+ - - ">="
22
+ - !ruby/object:Gem::Version
23
+ version: "0"
24
+ version:
25
+ - !ruby/object:Gem::Dependency
26
+ name: wycats-moneta
27
+ type: :runtime
28
+ version_requirement:
29
+ version_requirements: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: "0"
34
+ version:
35
+ - !ruby/object:Gem::Dependency
36
+ name: thoughtbot-shoulda
37
+ type: :development
38
+ version_requirement:
39
+ version_requirements: !ruby/object:Gem::Requirement
40
+ requirements:
41
+ - - ">="
42
+ - !ruby/object:Gem::Version
43
+ version: "0"
44
+ version:
45
+ description: Retrieves meta information for a URI from the meturi.com service.
46
+ email: production@statelesssystems.com
47
+ executables: []
48
+
49
+ extensions: []
50
+
51
+ extra_rdoc_files:
52
+ - LICENSE
53
+ - README.markdown
54
+ files:
55
+ - .document
56
+ - .gitignore
57
+ - LICENSE
58
+ - README.markdown
59
+ - Rakefile
60
+ - VERSION
61
+ - benchmark.rb
62
+ - lib/uri/meta.rb
63
+ - test/test_helper.rb
64
+ - test/uri-meta_test.rb
65
+ - uri-meta.gemspec
66
+ has_rdoc: true
67
+ homepage: http://github.com/stateless-systems/uri-meta
68
+ licenses: []
69
+
70
+ post_install_message:
71
+ rdoc_options:
72
+ - --charset=UTF-8
73
+ require_paths:
74
+ - lib
75
+ required_ruby_version: !ruby/object:Gem::Requirement
76
+ requirements:
77
+ - - ">="
78
+ - !ruby/object:Gem::Version
79
+ version: "0"
80
+ version:
81
+ required_rubygems_version: !ruby/object:Gem::Requirement
82
+ requirements:
83
+ - - ">="
84
+ - !ruby/object:Gem::Version
85
+ version: "0"
86
+ version:
87
+ requirements: []
88
+
89
+ rubyforge_project:
90
+ rubygems_version: 1.3.5
91
+ signing_key:
92
+ specification_version: 3
93
+ summary: Meta information for a URI
94
+ test_files:
95
+ - test/test_helper.rb
96
+ - test/uri-meta_test.rb