uri-meta 0.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.document ADDED
@@ -0,0 +1,5 @@
1
+ README.rdoc
2
+ lib/**/*.rb
3
+ bin/*
4
+ features/**/*.feature
5
+ LICENSE
data/.gitignore ADDED
@@ -0,0 +1,5 @@
1
+ *.sw?
2
+ .DS_Store
3
+ coverage
4
+ rdoc
5
+ pkg
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2009 Stateless Systems
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.markdown ADDED
@@ -0,0 +1,68 @@
1
+ # uri-meta: Get meta information about your URI
2
+
3
+ uri-meta is a ruby interface to the [metauri.com](http://www.metauri.com/) service.
4
+
5
+ [metauri.com](http://www.metauri.com/) provides two things:
6
+
7
+ * follows your URI to the end point where there is actual content instead of redirects
8
+ * obtains meta information (title etc) about that end URI
9
+
10
+ ## Examples
11
+
12
+ require 'uri'
13
+ require 'uri/meta'
14
+ uri = URI.parse('http://www.google.com/')
15
+ puts uri.meta.title
16
+ # Google
17
+ puts uri.meta.status
18
+ # 200
19
+ puts uri.meta(:headers => 1).headers
20
+ # HTTP/1.1 .... etc
21
+
22
+ uri = URI.parse('http://bit.ly/PBzu')
23
+ puts uri.meta.content_type
24
+ # image/gif
25
+
26
+ meta = URI.parse('http://bit.ly/PBzu').meta(:max_redirects = 2)
27
+ puts(meta.last_effective_uri) unless meta.errors?
28
+ # http://clipart.tiu.edu/offcampus/animated/bd13644_.gif
29
+
30
+
31
+ URI::Meta.multi(['http://www.google.com/', 'http://bit.ly/PBzu'], :max_redirects => 10) do |meta|
32
+ # Don't rely on these being processed in the same order they were listed!
33
+ if meta.redirect?
34
+ puts "## #{meta.uri} -> #{meta.last_effective_uri}"
35
+ else
36
+ puts "## #{meta.uri} did not redirect and it's title was #{meta.title}"
37
+ end
38
+ end
39
+
40
+ ## Caching
41
+
42
+ uri-meta uses in-memory caching via [wycats-moneta](http://github.com/wycats/moneta), so it
43
+ should be relatively straight forward for you to use whatever other caching mechanism you want,
44
+ provided it's supported by moneta.
45
+
46
+ require 'uri'
47
+ require 'uri/meta'
48
+
49
+ # Memcached
50
+ require 'moneta/memcache'
51
+ URI::Meta::Cache.cache = Moneta::Memcache.new(:server => 'localhost', :namespace => 'uri_meta')
52
+ URI::Meta::Cache.expires_in = (60 * 60 * 24 * 7) # 1 week
53
+
54
+ # No caching (for testing I guess)
55
+ URI::Meta::Cache.cache = nil
56
+
57
+ ## Known Issues
58
+
59
+ * Redirects that aren't handled by the webserver (302), such as javascript or
60
+ <meta> tag redirects are not supported yet.
61
+ * Framed redirects, such as stumbleupon are not resolved yet, as these are
62
+ techincally full pages it could be difficult to know that it's not really
63
+ then end URI.
64
+ * No RDOC as yet.
65
+
66
+ # Copyright
67
+
68
+ Copyright (c) 2009 Stateless Systems. See LICENSE for details.
data/Rakefile ADDED
@@ -0,0 +1,57 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+
4
+ begin
5
+ require 'jeweler'
6
+ Jeweler::Tasks.new do |gem|
7
+ gem.name = "uri-meta"
8
+ gem.summary = %Q{Meta information for a URI}
9
+ gem.description = %Q{Retrieves meta information for a URI from the meturi.com service.}
10
+ gem.email = "production@statelesssystems.com"
11
+ gem.homepage = "http://github.com/stateless-systems/uri-meta"
12
+ gem.authors = ["Stateless Systems"]
13
+ gem.add_dependency "taf2-curb"
14
+ gem.add_dependency "wycats-moneta"
15
+ gem.add_development_dependency "thoughtbot-shoulda"
16
+ end
17
+ rescue LoadError
18
+ puts "Jeweler (or a dependency) not available. Install it with: sudo gem install jeweler"
19
+ end
20
+
21
+ require 'rake/testtask'
22
+ Rake::TestTask.new(:test) do |test|
23
+ test.libs << 'lib' << 'test'
24
+ test.pattern = 'test/**/*_test.rb'
25
+ test.verbose = true
26
+ end
27
+
28
+ begin
29
+ require 'rcov/rcovtask'
30
+ Rcov::RcovTask.new do |test|
31
+ test.libs << 'test'
32
+ test.pattern = 'test/**/*_test.rb'
33
+ test.verbose = true
34
+ end
35
+ rescue LoadError
36
+ task :rcov do
37
+ abort "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov"
38
+ end
39
+ end
40
+
41
+ task :test => :check_dependencies
42
+
43
+ task :default => :test
44
+
45
+ require 'rake/rdoctask'
46
+ Rake::RDocTask.new do |rdoc|
47
+ if File.exist?('VERSION')
48
+ version = File.read('VERSION')
49
+ else
50
+ version = ""
51
+ end
52
+
53
+ rdoc.rdoc_dir = 'rdoc'
54
+ rdoc.title = "uri-meta #{version}"
55
+ rdoc.rdoc_files.include('README*')
56
+ rdoc.rdoc_files.include('lib/**/*.rb')
57
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.9.1
data/benchmark.rb ADDED
@@ -0,0 +1,36 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), 'lib'))
4
+
5
+ require 'rubygems'
6
+ require 'benchmark'
7
+ require 'curb'
8
+ require 'uri/meta'
9
+
10
+ URI::Meta::Cache.cache = nil
11
+
12
+ cached_uris = uncached_uris = []
13
+ uncached_uris = []
14
+
15
+ delete = Curl::Multi.new
16
+
17
+ (1..50).each do |x|
18
+ cached_uris << URI.parse('http://tigris.id.au/')
19
+ uncached_uris << URI.parse("http://tigris.id.au/#{x}")
20
+ c = Curl::Easy.new("http://www.metauri.com/delete?uri=#{uncached_uris.last.to_s}")
21
+ c.on_complete{|curl| print '.'}
22
+ delete.add(c)
23
+ end
24
+
25
+ print ' performing cache clear '
26
+ clear = Benchmark.realtime{ delete.perform }
27
+ puts " #{clear}"
28
+
29
+ ## TODO: figure out why uncached is faster when X > pool size, but way less when X < pool size
30
+ print ' calculating cached time '
31
+ cached = Benchmark.realtime{ URI::Meta.multi(cached_uris){|m| print '.'}}
32
+ puts " #{cached}"
33
+
34
+ print 'calculating uncached time '
35
+ uncached = Benchmark.realtime{ URI::Meta.multi(uncached_uris){|m| print '.'}}
36
+ puts " #{uncached}"
data/lib/uri/meta.rb ADDED
@@ -0,0 +1,136 @@
1
+ require 'uri'
2
+ require 'curb'
3
+ require 'yaml'
4
+ require 'moneta'
5
+ require 'moneta/memory'
6
+ require 'digest/sha1'
7
+
8
+ module URI
9
+ class Meta
10
+ attr_accessor :headers, :uri, :title, :feed, :last_modified, :content_type, :charset, :last_effective_uri, :status, :errors
11
+ @@service_host = 'www.metauri.com'
12
+ @@user_agent = 'uri-meta rubygem'
13
+
14
+ UNSAFE = Regexp.new("[#{URI::REGEXP::PATTERN::RESERVED} #%]", false, 'N').freeze
15
+
16
+ def self.service_host
17
+ @@service_host
18
+ end
19
+
20
+ def self.service_host=(service_host)
21
+ @@service_host = service_host
22
+ end
23
+
24
+ def self.user_agent
25
+ @@user_agent
26
+ end
27
+
28
+ def self.user_agent=(user_agent)
29
+ @@user_agent = user_agent
30
+ end
31
+
32
+ def initialize(options = {})
33
+ self.errors = []
34
+ options.each do |k, v|
35
+ case k
36
+ when :last_effective_uri, :uri, :feed then send("#{k}=", v.to_s == '' ? nil : (URI.parse(v.to_s) rescue nil))
37
+ when :error, :errors then self.errors.push(*[v].flatten)
38
+ else send("#{k}=", v) if respond_to?("#{k}=")
39
+ end
40
+ end
41
+ end
42
+
43
+ def redirect?
44
+ uri != last_effective_uri
45
+ end
46
+
47
+ def errors?
48
+ !errors.empty?
49
+ end
50
+
51
+ def self.get(uri, options = {})
52
+ uri = URI.parse(uri.to_s) rescue nil
53
+ raise ArgumentError.new("Can't coerce #{uri.class} to URI") unless uri.is_a?(URI)
54
+ raise NotImplementedError.new('Only HTTP is supported so far.') unless uri.is_a?(URI::HTTP)
55
+ URI::Meta.multi([uri], options).first
56
+ end
57
+
58
+ def self.cache_key(uri, options = {})
59
+ # Make sure the key includes the options used to retrieve the meta
60
+ uid = uri.to_s + options.to_a.sort{|a,b| a[0].to_s <=> b[0].to_s}.to_s
61
+ Digest::SHA1.hexdigest(uid)
62
+ end
63
+
64
+ #--
65
+ # TODO: Chunk uri's through a pre-warmed pool of curl easy instances?
66
+ def self.multi(uris, options = {}, &block)
67
+ metas = []
68
+ multi = Curl::Multi.new
69
+ uris.each do |uri|
70
+ if meta = URI::Meta::Cache.get(cache_key(uri, options))
71
+ metas << meta
72
+ URI::Meta::Cache.store(cache_key(uri, options), meta)
73
+ block.call(meta) if block
74
+ else
75
+ easy = curl(uri, options)
76
+ easy.on_complete do |curl|
77
+ args = YAML.load(curl.body_str) rescue {:errors => "YAML Error, #{$!.message}"}
78
+ args = {:errors => "YAML Error, server returned unknown format."} unless args.is_a?(Hash)
79
+
80
+ metas << meta = URI::Meta.new({:uri => uri}.update(args))
81
+ URI::Meta::Cache.store(cache_key(uri, options), meta)
82
+ block.call(meta) if block
83
+ end
84
+ multi.add(easy)
85
+ end
86
+ end
87
+ multi.perform
88
+ metas
89
+ end
90
+
91
+ protected
92
+ #--
93
+ # Required because the URI option must be verbatim. If '+' and others are not escaped Merb, Rack or something
94
+ # helpfully converts them to spaces on metauri.com
95
+ def self.curl(uri, options = {})
96
+ options = options.update(:uri => uri)
97
+ options = options.map{|k, v| "#{k}=" + URI.escape(v.to_s, UNSAFE)}.join('&')
98
+ c = Curl::Easy.new("http://#{service_host}/show.yaml?#{options}")
99
+ c.headers['User-Agent'] = user_agent
100
+ c
101
+ end
102
+
103
+ module Mixin
104
+ def meta(options = {})
105
+ @meta ||= URI::Meta.get(self, options)
106
+ end
107
+ end
108
+
109
+ class Cache
110
+ @@cache = Moneta::Memory.new
111
+ @@expires_in = 86_400 # 24 hours
112
+
113
+ class << self
114
+ def store(uid, obj)
115
+ @@cache.store(uid, obj, :expires_in => @@expires_in) unless @@cache.nil?
116
+ end
117
+
118
+ def get(id)
119
+ @@cache[id] unless @@cache.nil?
120
+ end
121
+
122
+ def cache=(cache)
123
+ warn 'Turning off caching is poor form, for longer processes consider using moneta/memcached' if cache.nil?
124
+ @@cache = cache
125
+ end
126
+
127
+ def expires_in=(seconds)
128
+ @@expires_in = seconds
129
+ end
130
+ end
131
+ end
132
+ end
133
+
134
+ URI::Generic.send(:include, URI::Meta::Mixin)
135
+ URI::HTTP.send(:include, URI::Meta::Mixin)
136
+ end
@@ -0,0 +1,10 @@
1
+ require 'rubygems'
2
+ require 'test/unit'
3
+ require 'shoulda'
4
+
5
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
6
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
7
+ require 'uri/meta'
8
+
9
+ class Test::Unit::TestCase
10
+ end
@@ -0,0 +1,395 @@
1
+ require File.join(File.dirname(__FILE__), 'test_helper')
2
+ require 'uri'
3
+ require 'curb'
4
+ require 'timeout'
5
+
6
+ class UriMetaTest < Test::Unit::TestCase
7
+ # First things first. Purge all test URIs on the metauri service so we don't
8
+ # get issues from old cached URIs.
9
+ [
10
+ 'garbage',
11
+ 'http://bit.ly/PBzu',
12
+ 'http://bit.ly/rvQhW',
13
+ 'http://img11.yfrog.com/i/vaix.jpg/',
14
+ 'http://rss.slashdot.org/Slashdot/slashdot',
15
+ 'http://slashdot.org/',
16
+ 'http://taptaptap.com/+MqN',
17
+ "http://#{URI::Meta.service_host}/",
18
+ "http://#{URI::Meta.service_host}/double_redirect_test",
19
+ "http://#{URI::Meta.service_host}/#foo",
20
+ "http://#{URI::Meta.service_host}/foo%5Bbar%5D",
21
+ "http://#{URI::Meta.service_host}/meta_redirect_test",
22
+ "http://#{URI::Meta.service_host}/redirect_test",
23
+ 'http://www.facebook.com/home.php',
24
+ 'http://www.facebook.com/pages/Bronx-NY/Career-and-Transfer-Services-at-BCC/113334355068',
25
+ 'http://www.google.com:666/',
26
+ 'http://www.stumbleupon.com/s/#4sDy2p/sivers.org/hellyeah',
27
+ 'http://www.taobao.com/',
28
+ 'http://www.youtube.com/das_captcha?next=/watch%3Fv%3DQ1rdsFuNIMc',
29
+ ].each{|uri| Curl::Easy.http_get("http://#{URI::Meta.service_host}/delete?uri=#{URI.escape(uri.to_s, URI::Meta::UNSAFE)}") }
30
+
31
+ context %Q(URI.parse('http://#{URI::Meta.service_host}/')) do
32
+ setup do
33
+ @uri = URI.parse("http://#{URI::Meta.service_host}/")
34
+ end
35
+
36
+ should 'respond_to :meta' do
37
+ assert_respond_to @uri, :meta
38
+ end
39
+
40
+ context '.meta' do
41
+ setup do
42
+ @meta = @uri.meta
43
+ end
44
+
45
+ should 'be a URI::Meta object' do
46
+ assert_kind_of URI::Meta, @meta
47
+ end
48
+
49
+ context '.uri' do
50
+ should 'be a URI object' do
51
+ assert_kind_of URI, @meta.uri
52
+ end
53
+
54
+ should 'be the same as the original URI' do
55
+ assert_equal @uri.to_s, @meta.uri.to_s
56
+ end
57
+ end
58
+
59
+ context '.last_effective_uri' do
60
+ should 'be a URI object' do
61
+ assert_kind_of URI, @meta.last_effective_uri
62
+ end
63
+
64
+ should 'not have been a redirect' do
65
+ assert_equal @uri.to_s, @meta.last_effective_uri.to_s
66
+ assert !@meta.redirect?
67
+ end
68
+ end
69
+
70
+ context '.title' do
71
+ should 'be Meta URI' do
72
+ assert_equal 'Meta URI', @meta.title
73
+ end
74
+ end
75
+
76
+ context '.status' do
77
+ should 'be 200' do
78
+ assert_equal 200, @meta.status
79
+ end
80
+ end
81
+
82
+ context '.headers' do
83
+ should 'be nil' do
84
+ assert_nil @meta.headers
85
+ end
86
+ end
87
+ end
88
+
89
+ context '.meta(:headers => 1)' do
90
+ setup do
91
+ @meta = URI.parse("http://#{URI::Meta.service_host}/").meta(:headers => 1)
92
+ end
93
+
94
+ context '.headers' do
95
+ should 'be populated' do
96
+ assert_not_nil @meta.headers
97
+ end
98
+ end
99
+ end
100
+ end
101
+
102
+ context %Q(URI.parse('http://#{URI::Meta.service_host}/redirect_test')) do
103
+ setup do
104
+ @uri = URI.parse("http://#{URI::Meta.service_host}/redirect_test")
105
+ end
106
+
107
+ context '.meta' do
108
+ context '.last_effective_uri' do
109
+ should 'be a redirect' do
110
+ assert_not_equal @uri.to_s, @uri.meta.last_effective_uri.to_s
111
+ assert @uri.meta.redirect?
112
+ end
113
+ end
114
+ end
115
+ end
116
+
117
+ context %Q(URI.parse('http://#{URI::Meta.service_host}/double_redirect_test')) do
118
+ setup do
119
+ @uri = URI.parse("http://#{URI::Meta.service_host}/double_redirect_test")
120
+ end
121
+
122
+ context '.meta(:max_redirects => 1)' do
123
+ should 'error on too many redirects' do
124
+ meta = @uri.meta(:max_redirects => 1)
125
+ assert meta.errors?
126
+ assert_kind_of String, meta.errors.first
127
+ end
128
+ end
129
+ end
130
+
131
+ context %q{URI.parse('http://bit.ly/rvQhW').meta} do
132
+ should 'raise nothing' do
133
+ assert_nothing_raised do
134
+ URI.parse('http://bit.ly/rvQhW').meta
135
+ end
136
+ end
137
+ end
138
+
139
+ context %q(URI.parse('garbage').meta) do
140
+ should 'raise errors' do
141
+ assert_raise NotImplementedError do
142
+ URI.parse('garbage').meta
143
+ end
144
+ end
145
+ end
146
+
147
+ context %q(URI.parse('http://bit.ly/PBzu').meta) do
148
+ setup do
149
+ @meta = URI.parse('http://bit.ly/PBzu').meta
150
+ end
151
+
152
+ should 'be a redirect' do
153
+ assert @meta.redirect?
154
+ assert_not_equal 'http://bit.ly/PBzu', @meta.last_effective_uri
155
+ end
156
+ end
157
+
158
+ context %q(URI.parse('http://taptaptap.com/+MqN').meta) do
159
+ setup do
160
+ @uri = URI.parse('http://taptaptap.com/+MqN')
161
+ end
162
+
163
+ should 'escape the + symbol' do
164
+ assert_nothing_raised do
165
+ @meta = @uri.meta
166
+ end
167
+ assert !@meta.errors?
168
+ end
169
+ end
170
+
171
+ context %Q(URI::Meta.multi(['http://www.google.com/', "http://#{URI::Meta.service_host}/"])) do
172
+ setup do
173
+ @metas = URI::Meta.multi(['http://www.google.com/', "http://#{URI::Meta.service_host}/"])
174
+ end
175
+
176
+ should 'return an array' do
177
+ assert_kind_of Array, @metas
178
+ end
179
+
180
+ should 'all be URI::Meta objects' do
181
+ assert @metas.all?{|m| m.kind_of? URI::Meta}
182
+ end
183
+
184
+ should 'contain a google meta' do
185
+ assert @metas.any?{|m| m.title == 'Google'}
186
+ end
187
+ end
188
+
189
+ context %Q(URI::Meta.multi(['http://www.google.com/', "http://#{URI::Meta.service_host}/"]) {}) do
190
+ setup do
191
+ @block_metas = []
192
+ @return_metas = URI::Meta.multi(['http://www.google.com/', "http://#{URI::Meta.service_host}/"]) do |meta|
193
+ @block_metas << meta
194
+ end
195
+ end
196
+
197
+ should 'return an array of 2' do
198
+ assert_kind_of Array, @return_metas
199
+ assert_equal 2, @return_metas.size
200
+ end
201
+
202
+ should 'all be URI::Meta objects' do
203
+ assert @return_metas.all?{|m| m.kind_of? URI::Meta}
204
+ end
205
+
206
+ should 'contain a google meta' do
207
+ assert @return_metas.any?{|m| m.title == 'Google'}
208
+ end
209
+
210
+ context 'yielded in block' do
211
+ should '2 URI::Meta objects' do
212
+ assert @block_metas.all?{|m| m.kind_of? URI::Meta}
213
+ assert_equal 2, @return_metas.size
214
+ end
215
+
216
+ should 'a google meta' do
217
+ assert @block_metas.any?{|m| m.title == 'Google'}
218
+ end
219
+ end
220
+ end
221
+
222
+ context %q(URI.parse('http://www.google.com:666/')) do
223
+ setup do
224
+ @uri = URI.parse('http://www.google.com:666/')
225
+ end
226
+
227
+ context '.meta' do
228
+ should 'not return within 5 seconds' do
229
+ begin
230
+ timeout(5) do
231
+ meta = @uri.meta
232
+ assert false
233
+ end
234
+ rescue Timeout::Error => e
235
+ assert true
236
+ end
237
+ end
238
+ end
239
+
240
+ context '.meta(:connect_timeout => 1)' do
241
+ should 'return before 5 seconds' do
242
+ begin
243
+ timeout(5) do
244
+ meta = @uri.meta(:connect_timeout => 1)
245
+ assert true
246
+ end
247
+ rescue Timeout::Error => e
248
+ assert false
249
+ end
250
+ end
251
+
252
+ should 'contain timeout errors' do
253
+ assert @uri.meta(:connect_timeout => 1).errors?
254
+ end
255
+ end
256
+ end
257
+
258
+ context %Q(URI.parse('http://#{URI::Meta.service_host}/#foo').meta) do
259
+ setup do
260
+ @uri = URI.parse("http://#{URI::Meta.service_host}/#foo")
261
+ @meta = @uri.meta
262
+ end
263
+
264
+ should 'keep # info intact' do
265
+ assert_equal @uri.to_s, @meta.uri.to_s
266
+ end
267
+
268
+ should 'not have a feed' do
269
+ assert_nil @meta.feed
270
+ end
271
+ end
272
+
273
+ context %q(URI.parse('http://www.taobao.com/').meta) do
274
+ setup do
275
+ @uri = URI.parse('http://www.taobao.com/')
276
+ end
277
+
278
+ should 'not die from UTF8 issues' do
279
+ assert_nothing_raised do
280
+ @meta = @uri.meta
281
+ end
282
+ assert !@meta.errors?
283
+ end
284
+ end
285
+
286
+ context %q(URI.parse('http://www.stumbleupon.com/s/#4sDy2p/sivers.org/hellyeah').meta) do
287
+ setup do
288
+ @uri = URI.parse('http://www.stumbleupon.com/s/#4sDy2p/sivers.org/hellyeah')
289
+ @meta = @uri.meta
290
+ end
291
+
292
+ should 'be a redirect' do
293
+ assert @meta.redirect?
294
+ end
295
+
296
+ should 'not end at stumble upon' do
297
+ assert @meta.last_effective_uri !~ /stumble/
298
+ end
299
+ end
300
+
301
+ context %q(URI.parse('http://www.youtube.com/das_captcha?next=/watch%3Fv%3DQ1rdsFuNIMc')) do
302
+ setup do
303
+ @uri = URI.parse('http://www.youtube.com/das_captcha?next=/watch%3Fv%3DQ1rdsFuNIMc')
304
+ @meta = @uri.meta
305
+ end
306
+
307
+ should 'obtain the correct title through captcha' do
308
+ assert_equal 'YouTube - Legolibrium', @meta.title
309
+ end
310
+
311
+ should 'not have changed the last_effective_uri' do
312
+ assert_equal @uri.to_s, @meta.uri.to_s
313
+ end
314
+ end
315
+
316
+ context %q(URI.parse('http://www.facebook.com/home.php')) do
317
+ setup do
318
+ @meta = URI.parse('http://www.facebook.com/home.php').meta
319
+ end
320
+
321
+ should 'correctly return 403' do
322
+ assert_equal 403, @meta.status
323
+ end
324
+ end
325
+
326
+ context %Q(URI.parse("http://#{URI::Meta.service_host}/foo%5Bbar]")) do
327
+ setup do
328
+ @uri = URI.parse("http://#{URI::Meta.service_host}/foo%5Bbar%5D")
329
+ @meta = @uri.meta
330
+ end
331
+
332
+ should 'keep encoded square brackets intact' do
333
+ assert_equal @uri.to_s, @meta.uri.to_s
334
+ end
335
+ end
336
+
337
+ context %q(URI.parse('http://img11.yfrog.com/i/vaix.jpg/').meta) do
338
+ setup do
339
+ @meta = URI.parse('http://img11.yfrog.com/i/vaix.jpg/').meta
340
+ end
341
+
342
+ should 'have a content type' do
343
+ assert_not_nil @meta.content_type
344
+ end
345
+
346
+ should 'have a title' do
347
+ assert_not_nil @meta.title
348
+ assert_not_equal '', @meta.title
349
+ end
350
+ end
351
+
352
+ context %q(URI.parse('http://www.facebook.com/pages/Bronx-NY/Career-and-Transfer-Services-at-BCC/113334355068').meta) do
353
+ setup do
354
+ @meta = URI.parse('http://www.facebook.com/pages/Bronx-NY/Career-and-Transfer-Services-at-BCC/113334355068').meta
355
+ end
356
+
357
+ should 'have a title' do
358
+ assert_not_nil @meta.title
359
+ assert_not_equal '', @meta.title
360
+ end
361
+ end
362
+
363
+ context %Q(URI.parse("http://#{URI::Meta.service_host}/meta_redirect_test").meta) do
364
+ setup do
365
+ @uri = URI.parse("http://#{URI::Meta.service_host}/meta_redirect_test")
366
+ @meta = @uri.meta
367
+ end
368
+
369
+ should 'be a redirect' do
370
+ assert @meta.redirect?
371
+ end
372
+
373
+ should 'keep the original URL intact' do
374
+ assert_equal @uri.to_s, @meta.uri.to_s
375
+ end
376
+ end
377
+
378
+ context %Q(URI.parse('http://slashdot.org/').meta) do
379
+ setup do
380
+ @meta = URI.parse('http://slashdot.org/').meta
381
+ end
382
+
383
+ should 'have a feed' do
384
+ assert_equal 'http://rss.slashdot.org/Slashdot/slashdot', @meta.feed.to_s
385
+ end
386
+ end
387
+
388
+ context %Q(URI.parse('http://rss.slashdot.org/Slashdot/slashdot').meta) do
389
+ setup do
390
+ @meta = URI.parse('http://rss.slashdot.org/Slashdot/slashdot').meta
391
+ end
392
+
393
+ should 'have a feed equal to itself'
394
+ end
395
+ end
data/uri-meta.gemspec ADDED
@@ -0,0 +1,60 @@
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run `rake gemspec`
4
+ # -*- encoding: utf-8 -*-
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = %q{uri-meta}
8
+ s.version = "0.9.1"
9
+
10
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
+ s.authors = ["Stateless Systems"]
12
+ s.date = %q{2009-10-13}
13
+ s.description = %q{Retrieves meta information for a URI from the meturi.com service.}
14
+ s.email = %q{production@statelesssystems.com}
15
+ s.extra_rdoc_files = [
16
+ "LICENSE",
17
+ "README.markdown"
18
+ ]
19
+ s.files = [
20
+ ".document",
21
+ ".gitignore",
22
+ "LICENSE",
23
+ "README.markdown",
24
+ "Rakefile",
25
+ "VERSION",
26
+ "benchmark.rb",
27
+ "lib/uri/meta.rb",
28
+ "test/test_helper.rb",
29
+ "test/uri-meta_test.rb",
30
+ "uri-meta.gemspec"
31
+ ]
32
+ s.homepage = %q{http://github.com/stateless-systems/uri-meta}
33
+ s.rdoc_options = ["--charset=UTF-8"]
34
+ s.require_paths = ["lib"]
35
+ s.rubygems_version = %q{1.3.5}
36
+ s.summary = %q{Meta information for a URI}
37
+ s.test_files = [
38
+ "test/test_helper.rb",
39
+ "test/uri-meta_test.rb"
40
+ ]
41
+
42
+ if s.respond_to? :specification_version then
43
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
44
+ s.specification_version = 3
45
+
46
+ if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
47
+ s.add_runtime_dependency(%q<taf2-curb>, [">= 0"])
48
+ s.add_runtime_dependency(%q<wycats-moneta>, [">= 0"])
49
+ s.add_development_dependency(%q<thoughtbot-shoulda>, [">= 0"])
50
+ else
51
+ s.add_dependency(%q<taf2-curb>, [">= 0"])
52
+ s.add_dependency(%q<wycats-moneta>, [">= 0"])
53
+ s.add_dependency(%q<thoughtbot-shoulda>, [">= 0"])
54
+ end
55
+ else
56
+ s.add_dependency(%q<taf2-curb>, [">= 0"])
57
+ s.add_dependency(%q<wycats-moneta>, [">= 0"])
58
+ s.add_dependency(%q<thoughtbot-shoulda>, [">= 0"])
59
+ end
60
+ end
metadata ADDED
@@ -0,0 +1,96 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: uri-meta
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.9.1
5
+ platform: ruby
6
+ authors:
7
+ - Stateless Systems
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2009-10-13 00:00:00 +11:00
13
+ default_executable:
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: taf2-curb
17
+ type: :runtime
18
+ version_requirement:
19
+ version_requirements: !ruby/object:Gem::Requirement
20
+ requirements:
21
+ - - ">="
22
+ - !ruby/object:Gem::Version
23
+ version: "0"
24
+ version:
25
+ - !ruby/object:Gem::Dependency
26
+ name: wycats-moneta
27
+ type: :runtime
28
+ version_requirement:
29
+ version_requirements: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: "0"
34
+ version:
35
+ - !ruby/object:Gem::Dependency
36
+ name: thoughtbot-shoulda
37
+ type: :development
38
+ version_requirement:
39
+ version_requirements: !ruby/object:Gem::Requirement
40
+ requirements:
41
+ - - ">="
42
+ - !ruby/object:Gem::Version
43
+ version: "0"
44
+ version:
45
+ description: Retrieves meta information for a URI from the meturi.com service.
46
+ email: production@statelesssystems.com
47
+ executables: []
48
+
49
+ extensions: []
50
+
51
+ extra_rdoc_files:
52
+ - LICENSE
53
+ - README.markdown
54
+ files:
55
+ - .document
56
+ - .gitignore
57
+ - LICENSE
58
+ - README.markdown
59
+ - Rakefile
60
+ - VERSION
61
+ - benchmark.rb
62
+ - lib/uri/meta.rb
63
+ - test/test_helper.rb
64
+ - test/uri-meta_test.rb
65
+ - uri-meta.gemspec
66
+ has_rdoc: true
67
+ homepage: http://github.com/stateless-systems/uri-meta
68
+ licenses: []
69
+
70
+ post_install_message:
71
+ rdoc_options:
72
+ - --charset=UTF-8
73
+ require_paths:
74
+ - lib
75
+ required_ruby_version: !ruby/object:Gem::Requirement
76
+ requirements:
77
+ - - ">="
78
+ - !ruby/object:Gem::Version
79
+ version: "0"
80
+ version:
81
+ required_rubygems_version: !ruby/object:Gem::Requirement
82
+ requirements:
83
+ - - ">="
84
+ - !ruby/object:Gem::Version
85
+ version: "0"
86
+ version:
87
+ requirements: []
88
+
89
+ rubyforge_project:
90
+ rubygems_version: 1.3.5
91
+ signing_key:
92
+ specification_version: 3
93
+ summary: Meta information for a URI
94
+ test_files:
95
+ - test/test_helper.rb
96
+ - test/uri-meta_test.rb