earl 0.3.0 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.document +5 -0
- data/.gitignore +4 -15
- data/.rspec +1 -0
- data/.travis.yml +11 -0
- data/Gemfile +2 -2
- data/Gemfile.lock +60 -0
- data/Guardfile +10 -0
- data/LICENSE +2 -4
- data/README.rdoc +145 -0
- data/Rakefile +35 -2
- data/earl.gemspec +13 -7
- data/lib/earl.rb +7 -22
- data/lib/earl/earl.rb +158 -0
- data/lib/earl/scraper.rb +93 -0
- data/lib/earl/version.rb +2 -2
- data/script/console +10 -0
- data/spec/fixtures/bicycles.html +490 -0
- data/spec/fixtures/bicycles_without_description.html +489 -0
- data/spec/fixtures/bicycles_without_images.html +457 -0
- data/spec/fixtures/page_as_atom.html +161 -0
- data/spec/fixtures/page_as_rss.html +151 -0
- data/spec/fixtures/page_with_atom_feed.html +39 -0
- data/spec/fixtures/page_with_rss_and_atom_feeds.html +40 -0
- data/spec/fixtures/page_with_rss_feed.html +39 -0
- data/spec/fixtures/page_without_feeds.html +36 -0
- data/spec/fixtures/youtube.html +1839 -0
- data/spec/integration/feed_spec.rb +78 -0
- data/spec/integration/oembed_spec.rb +40 -0
- data/spec/spec_helper.rb +18 -28
- data/spec/support/fixtures.rb +10 -0
- data/spec/unit/earl/earl_spec.rb +16 -0
- data/spec/unit/earl/feed_spec.rb +59 -0
- data/spec/unit/earl/oembed_spec.rb +49 -0
- data/spec/unit/earl/scraper_spec.rb +48 -0
- data/spec/unit/earl_spec.rb +65 -0
- metadata +123 -46
- data/.rvmrc +0 -48
- data/README.md +0 -41
- data/lib/earl/email_assembler.rb +0 -11
- data/lib/earl/email_entity.rb +0 -27
- data/lib/earl/email_parser.tt +0 -58
- data/lib/earl/entity_base.rb +0 -37
- data/lib/earl/hash_inquirer.rb +0 -16
- data/lib/earl/string_inquirer.rb +0 -11
- data/lib/earl/url_assembler.rb +0 -15
- data/lib/earl/url_entity.rb +0 -23
- data/lib/earl/url_parser.tt +0 -163
- data/spec/earl/earl_spec.rb +0 -17
- data/spec/earl/email_entity_spec.rb +0 -31
- data/spec/earl/email_parser_spec.rb +0 -29
- data/spec/earl/entity_base_spec.rb +0 -39
- data/spec/earl/hash_inquirer_spec.rb +0 -24
- data/spec/earl/string_inquirer_spec.rb +0 -9
- data/spec/earl/url_entity_spec.rb +0 -45
- data/spec/earl/url_parser_spec.rb +0 -189
data/.document
ADDED
data/.gitignore
CHANGED
data/.rspec
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
--color
|
data/.travis.yml
ADDED
data/Gemfile
CHANGED
data/Gemfile.lock
ADDED
@@ -0,0 +1,60 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
earl (1.0.0)
|
5
|
+
nokogiri (>= 1.4.4)
|
6
|
+
oembedr (>= 1.0.0)
|
7
|
+
|
8
|
+
GEM
|
9
|
+
remote: http://rubygems.org/
|
10
|
+
specs:
|
11
|
+
diff-lcs (1.1.3)
|
12
|
+
faraday (0.8.4)
|
13
|
+
multipart-post (~> 1.1)
|
14
|
+
faraday_middleware (0.8.8)
|
15
|
+
faraday (>= 0.7.4, < 0.9)
|
16
|
+
ffi (1.1.5)
|
17
|
+
guard (1.4.0)
|
18
|
+
listen (>= 0.4.2)
|
19
|
+
thor (>= 0.14.6)
|
20
|
+
guard-rspec (1.2.1)
|
21
|
+
guard (>= 1.1)
|
22
|
+
json (1.7.5)
|
23
|
+
listen (0.5.2)
|
24
|
+
mime-types (1.19)
|
25
|
+
multi_json (1.3.6)
|
26
|
+
multipart-post (1.1.5)
|
27
|
+
nokogiri (1.5.5)
|
28
|
+
oembedr (1.0.0)
|
29
|
+
faraday
|
30
|
+
faraday_middleware
|
31
|
+
multi_json
|
32
|
+
typhoeus
|
33
|
+
rake (0.9.2.2)
|
34
|
+
rb-fsevent (0.9.2)
|
35
|
+
rdoc (3.12)
|
36
|
+
json (~> 1.4)
|
37
|
+
rspec (2.8.0)
|
38
|
+
rspec-core (~> 2.8.0)
|
39
|
+
rspec-expectations (~> 2.8.0)
|
40
|
+
rspec-mocks (~> 2.8.0)
|
41
|
+
rspec-core (2.8.0)
|
42
|
+
rspec-expectations (2.8.0)
|
43
|
+
diff-lcs (~> 1.1.2)
|
44
|
+
rspec-mocks (2.8.0)
|
45
|
+
thor (0.16.0)
|
46
|
+
typhoeus (0.4.2)
|
47
|
+
ffi (~> 1.0)
|
48
|
+
mime-types (~> 1.18)
|
49
|
+
|
50
|
+
PLATFORMS
|
51
|
+
ruby
|
52
|
+
|
53
|
+
DEPENDENCIES
|
54
|
+
bundler (> 1.1.0)
|
55
|
+
earl!
|
56
|
+
guard-rspec (~> 1.2.0)
|
57
|
+
rake (~> 0.9.2.2)
|
58
|
+
rb-fsevent (~> 0.9.1)
|
59
|
+
rdoc (~> 3.11)
|
60
|
+
rspec (~> 2.8.0)
|
data/Guardfile
ADDED
@@ -0,0 +1,10 @@
|
|
1
|
+
# A sample Guardfile
|
2
|
+
# More info at https://github.com/guard/guard#readme
|
3
|
+
|
4
|
+
guard 'rspec', :version => 2, :all_on_start => false, :all_after_pass => false do
|
5
|
+
watch(%r{^spec/.+_spec\.rb$})
|
6
|
+
watch('spec/spec_helper.rb') { "spec" }
|
7
|
+
watch(%r{^lib/(.+)\.rb$}) { |m| "spec/unit/#{m[1]}_spec.rb" }
|
8
|
+
|
9
|
+
end
|
10
|
+
|
data/LICENSE
CHANGED
@@ -1,6 +1,4 @@
|
|
1
|
-
Copyright (c)
|
2
|
-
|
3
|
-
MIT License
|
1
|
+
Copyright (c) 2009 T.J. VanSlyke
|
4
2
|
|
5
3
|
Permission is hereby granted, free of charge, to any person obtaining
|
6
4
|
a copy of this software and associated documentation files (the
|
@@ -19,4 +17,4 @@ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
19
17
|
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
18
|
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
19
|
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
-
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.rdoc
ADDED
@@ -0,0 +1,145 @@
|
|
1
|
+
= Earl {<img src="https://secure.travis-ci.org/evendis/earl.png" />}[http://travis-ci.org/evendis/earl]
|
2
|
+
|
3
|
+
Earl wants to help you scrape all the relevant metadata for your favorite web pages so you can be as cool as
|
4
|
+
Facebook when displaying user-submitted link content. Earl returns details like titles, descriptions, content type,
|
5
|
+
associated feeds, and OEmbed definitions if available.
|
6
|
+
|
7
|
+
Earl is based on an original source project called _earl_ by {teejayvanslyke}[https://github.com/teejayvanslyke/earl] (but never released as a gem).
|
8
|
+
The revamp was done by {Paul Gallagher}[https://github.com/tardate], and master source is currently
|
9
|
+
available at https://github.com/evendis/earl.
|
10
|
+
|
11
|
+
The Earl gem is officially named _earl_. Big thanks go to {jeremyruppel}[https://github.com/jeremyruppel] who
|
12
|
+
contributed the ownership of the _earl_ gem name. The original _earl_ gem had a somewhat similar purpose - it is now defunct, but still available up to version 0.3.0 via rubgems. Any _earl_ gem with version 1.0.0 or higher is the new gem release (and is in no way backwardly compatible with
|
13
|
+
earlier versions).
|
14
|
+
|
15
|
+
== The Earl Cookbook
|
16
|
+
|
17
|
+
=== How do instantiate Earl?
|
18
|
+
|
19
|
+
Pass any url-like string to Earl:
|
20
|
+
|
21
|
+
my_earl_instance = Earl.new('https://github.com/evendis/earl')
|
22
|
+
#
|
23
|
+
# or using the []= convenience method:
|
24
|
+
my_earl_instance = Earl['https://github.com/evendis/earl']
|
25
|
+
|
26
|
+
|
27
|
+
=== How do I inspect details of the page?
|
28
|
+
|
29
|
+
earl = Earl['https://github.com/evendis/earl']
|
30
|
+
earl.title
|
31
|
+
=> "evendis/earl · GitHub"
|
32
|
+
earl.description
|
33
|
+
=> "earl - URL metadata API for scraping titles, descriptions, images, and videos from URL's."
|
34
|
+
earl.image
|
35
|
+
=> "https://a248.e.akamai.net/assets.github.com/images/modules/header/logov7@4x.png?1340935010"
|
36
|
+
|
37
|
+
=== How do I get oembed details for a link?
|
38
|
+
|
39
|
+
Earl will get oembed details for a link if they are available.
|
40
|
+
|
41
|
+
earl = Earl['http://www.youtube.com/watch?v=g3DCEcSlfhw']
|
42
|
+
earl.oembed
|
43
|
+
=> {"provider_url"=>"http://www.youtube.com/", "thumbnail_url"=>"http://i4.ytimg.com/vi/g3DCEcSlfhw/hqdefault.jpg", "title"=>"'Virtuosos of Guitar 2008' festival, Moscow. Marcin Dylla", "html"=>"<iframe width=\"420\" height=\"315\" src=\"http://www.youtube.com/embed/g3DCEcSlfhw?fs=1&feature=oembed\" frameborder=\"0\" allowfullscreen></iframe>", "author_name"=>"guitarmagnet", "height"=>315, "thumbnail_width"=>480, "width"=>420, "version"=>"1.0", "author_url"=>"http://www.youtube.com/user/guitarmagnet", "provider_name"=>"YouTube", "type"=>"video", "thumbnail_height"=>360}
|
44
|
+
# to get the embed code:
|
45
|
+
earl.oembed_html
|
46
|
+
=> "<iframe width=\"420\" height=\"315\" src=\"http://www.youtube.com/embed/g3DCEcSlfhw?fs=1&feature=oembed\" frameborder=\"0\" allowfullscreen></iframe>"
|
47
|
+
|
48
|
+
=== How do I customise the oembed link?
|
49
|
+
|
50
|
+
Supported oembed parameters may be provided with to `Earl.new` or to the `oembed` call:
|
51
|
+
|
52
|
+
earl = Earl.new('http://www.youtube.com/watch?v=g3DCEcSlfhw', {:oembed => {:maxwidth => "200", :maxheight => "320"}})
|
53
|
+
earl.oembed_html
|
54
|
+
=> <iframe width="200" height="150" src="http://www.youtube.com/embed/g3DCEcSlfhw?fs=1&feature=oembed" frameborder="0" allowfullscreen></iframe>
|
55
|
+
earl = Earl.new('http://www.youtube.com/watch?v=g3DCEcSlfhw')
|
56
|
+
earl.oembed({:maxwidth => "100", :maxheight => "120"})
|
57
|
+
earl.oembed_html
|
58
|
+
=> <iframe width="100" height="75" src="http://www.youtube.com/embed/g3DCEcSlfhw?fs=1&feature=oembed" frameborder="0" allowfullscreen></iframe>
|
59
|
+
|
60
|
+
|
61
|
+
=== How do I inspect what attributes are available for a page?
|
62
|
+
|
63
|
+
To see all of the attributes a URL provides, simply ask:
|
64
|
+
|
65
|
+
earl = Earl['https://github.com/evendis/earl']
|
66
|
+
earl.attributes
|
67
|
+
=> [:title, :image, :description, :rss_feed, :atom_feed, :content_type, :base_url, :charset, :content_encoding, :headers, :feed]
|
68
|
+
|
69
|
+
|
70
|
+
=== How can I extend Earl to scrape additional page details?
|
71
|
+
|
72
|
+
Need to scrape additional page details currently supported by Earl? Implement your own scraper:
|
73
|
+
|
74
|
+
class QotdScraper < Earl::Scraper
|
75
|
+
match /^http\:\/\/www\.quotationspage\.com\/qotd\.html$/
|
76
|
+
|
77
|
+
define_attribute :qotd do |doc|
|
78
|
+
doc.at('dt.quote a').text
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
The define_attribute method will supply you with a Nokogiri document which you can traverse to your heart's content.
|
83
|
+
Use the match method to limit the scope of URLs that your scraper will apply to.
|
84
|
+
|
85
|
+
Your new attribute is now available for use:
|
86
|
+
|
87
|
+
Earl['http://www.quotationspage.com/qotd.html'].qotd
|
88
|
+
=> "Love is a snowmobile racing across the tundra and then suddenly it flips over, pinning you underneath. At night, the ice weasels come."
|
89
|
+
|
90
|
+
|
91
|
+
=== How do I install it for normal use?
|
92
|
+
|
93
|
+
If using bundler, add gem 'earl' your application's Gemfile and run `bundle`.
|
94
|
+
|
95
|
+
Or install it from the command-line:
|
96
|
+
|
97
|
+
$ gem install earl
|
98
|
+
|
99
|
+
=== How do I install it for gem development?
|
100
|
+
|
101
|
+
To work on enhancements of fix bugs in Earl, fork and clone the github repository.
|
102
|
+
If you are using bundler (recommended), run <tt>bundle</tt> to install development dependencies:
|
103
|
+
|
104
|
+
$ gem install bundler
|
105
|
+
$ bundle
|
106
|
+
|
107
|
+
=== How do I run the tests?
|
108
|
+
|
109
|
+
Once development dependencies are installed, all unit tests are run with just:
|
110
|
+
|
111
|
+
$ rake
|
112
|
+
# or..
|
113
|
+
$ rake spec
|
114
|
+
|
115
|
+
Unit tests exclude a set of integration tests that actually hit the network (therefore not 'nice' to run all the time,
|
116
|
+
and also subject to failures due to network availability or changes in the services accessed). To run integration tests:
|
117
|
+
|
118
|
+
$ rake spec:integration
|
119
|
+
|
120
|
+
To run all tests (unit and integration):
|
121
|
+
|
122
|
+
$ rake spec:all
|
123
|
+
|
124
|
+
|
125
|
+
=== How do I automatically run tests when I modify files?
|
126
|
+
|
127
|
+
Guard is installed as part of the development dependencies. Start a guard process in a terminal window:
|
128
|
+
|
129
|
+
$ bundle exec guard
|
130
|
+
|
131
|
+
It will run all the tests to start with by default. Then whenever you change a file, the associated tests will execute in this terminal window.
|
132
|
+
|
133
|
+
== Contributing to Earl
|
134
|
+
|
135
|
+
* Check out the latest master to make sure the feature hasn't been implemented or the bug hasn't been fixed yet
|
136
|
+
* Check out the issue tracker to make sure someone already hasn't requested it and/or contributed it
|
137
|
+
* Fork the project
|
138
|
+
* Start a feature/bugfix branch
|
139
|
+
* Commit and push until you are happy with your contribution
|
140
|
+
* Make sure to add tests for it. This is important so I don't break it in a future version unintentionally.
|
141
|
+
* Please try not to mess with the Rakefile, version, or history. If you want to have your own version, or is otherwise necessary, that is fine, but please isolate to its own commit so I can cherry-pick around it.
|
142
|
+
|
143
|
+
== Copyright
|
144
|
+
|
145
|
+
See LICENSE for details.
|
data/Rakefile
CHANGED
@@ -1,5 +1,38 @@
|
|
1
1
|
#!/usr/bin/env rake
|
2
|
-
require
|
2
|
+
require "bundler/gem_tasks"
|
3
|
+
|
4
|
+
require 'rspec'
|
3
5
|
require 'rspec/core/rake_task'
|
4
6
|
|
5
|
-
|
7
|
+
desc "Run only unit test examples"
|
8
|
+
RSpec::Core::RakeTask.new do |t|
|
9
|
+
t.rspec_opts = ["-c", "-f progress"]
|
10
|
+
t.pattern = 'spec/unit/**/*_spec.rb'
|
11
|
+
end
|
12
|
+
|
13
|
+
desc "Run only integration test examples"
|
14
|
+
RSpec::Core::RakeTask.new(:'spec:integration') do |t|
|
15
|
+
t.rspec_opts = ["-c", "-f progress"]
|
16
|
+
t.pattern = 'spec/integration/**/*_spec.rb'
|
17
|
+
end
|
18
|
+
|
19
|
+
desc "Run all test examples including integration tests"
|
20
|
+
RSpec::Core::RakeTask.new(:'spec:all') do |t|
|
21
|
+
t.rspec_opts = ["-c", "-f progress"]
|
22
|
+
t.pattern = 'spec/**/*_spec.rb'
|
23
|
+
end
|
24
|
+
|
25
|
+
task :default => :spec
|
26
|
+
|
27
|
+
require 'rdoc/task'
|
28
|
+
RDoc::Task.new do |rdoc|
|
29
|
+
rdoc.main = "README.rdoc"
|
30
|
+
rdoc.rdoc_dir = 'rdoc'
|
31
|
+
rdoc.title = "earl"
|
32
|
+
rdoc.rdoc_files.include('README*', 'lib/**/*.rb')
|
33
|
+
end
|
34
|
+
|
35
|
+
desc "Open an irb session preloaded with this library"
|
36
|
+
task :console do
|
37
|
+
sh "irb -rubygems -I lib -r earl.rb"
|
38
|
+
end
|
data/earl.gemspec
CHANGED
@@ -2,11 +2,11 @@
|
|
2
2
|
require File.expand_path('../lib/earl/version', __FILE__)
|
3
3
|
|
4
4
|
Gem::Specification.new do |gem|
|
5
|
-
gem.authors = ["
|
6
|
-
gem.email = ["
|
7
|
-
gem.description = %q{
|
8
|
-
gem.summary = %q{
|
9
|
-
gem.homepage = "https://github.com/
|
5
|
+
gem.authors = ["teejayvanslyke", "Paul Gallagher"]
|
6
|
+
gem.email = ["tj@elctech.com", "gallagher.paul@gmail.com"]
|
7
|
+
gem.description = %q{URL metadata API}
|
8
|
+
gem.summary = %q{URL metadata API for scraping titles, descriptions, images, and videos from URL's}
|
9
|
+
gem.homepage = "https://github.com/evendis/earl"
|
10
10
|
|
11
11
|
gem.files = `git ls-files`.split($\)
|
12
12
|
gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
|
@@ -15,7 +15,13 @@ Gem::Specification.new do |gem|
|
|
15
15
|
gem.require_paths = ["lib"]
|
16
16
|
gem.version = Earl::VERSION
|
17
17
|
|
18
|
-
gem.
|
18
|
+
gem.add_runtime_dependency(%q<nokogiri>, [">= 1.4.4"])
|
19
|
+
gem.add_runtime_dependency(%q<oembedr>, [">= 1.0.0"])
|
20
|
+
gem.add_development_dependency(%q<bundler>, ["> 1.1.0"])
|
21
|
+
gem.add_development_dependency(%q<rake>, ["~> 0.9.2.2"])
|
22
|
+
gem.add_development_dependency(%q<rspec>, ["~> 2.8.0"])
|
23
|
+
gem.add_development_dependency(%q<rdoc>, ["~> 3.11"])
|
24
|
+
gem.add_development_dependency(%q<guard-rspec>, ["~> 1.2.0"])
|
25
|
+
gem.add_development_dependency(%q<rb-fsevent>, ["~> 0.9.1"])
|
19
26
|
|
20
|
-
gem.add_development_dependency 'rspec', '>= 2.9.0'
|
21
27
|
end
|
data/lib/earl.rb
CHANGED
@@ -1,24 +1,9 @@
|
|
1
|
-
require '
|
2
|
-
|
3
|
-
module Earl
|
4
|
-
autoload :EntityBase, 'earl/entity_base'
|
5
|
-
autoload :URLEntity, 'earl/url_entity'
|
6
|
-
autoload :URLAssembler, 'earl/url_assembler'
|
7
|
-
autoload :EmailEntity, 'earl/email_entity'
|
8
|
-
autoload :EmailAssembler, 'earl/email_assembler'
|
9
|
-
autoload :HashInquirer, 'earl/hash_inquirer'
|
10
|
-
autoload :StringInquirer, 'earl/string_inquirer'
|
11
|
-
|
12
|
-
class << self
|
13
|
-
def URL( source )
|
14
|
-
Earl::URLEntity.new source
|
15
|
-
end
|
16
|
-
def Email( source )
|
17
|
-
Earl::EmailEntity.new source
|
18
|
-
end
|
19
|
-
end
|
1
|
+
require 'nokogiri'
|
2
|
+
require 'open-uri'
|
20
3
|
|
21
|
-
|
22
|
-
class InvalidURLError < EarlError; end
|
23
|
-
class SubclassError < EarlError; end
|
4
|
+
class Earl
|
24
5
|
end
|
6
|
+
|
7
|
+
require 'earl/version'
|
8
|
+
require 'earl/scraper'
|
9
|
+
require 'earl/earl'
|
data/lib/earl/earl.rb
ADDED
@@ -0,0 +1,158 @@
|
|
1
|
+
require 'open-uri'
|
2
|
+
require 'oembedr'
|
3
|
+
|
4
|
+
class Earl
|
5
|
+
|
6
|
+
attr_accessor :url, :options, :oembed
|
7
|
+
|
8
|
+
def initialize(url, options={})
|
9
|
+
@url = url
|
10
|
+
@options = options
|
11
|
+
end
|
12
|
+
|
13
|
+
def to_s
|
14
|
+
url
|
15
|
+
end
|
16
|
+
|
17
|
+
def uri
|
18
|
+
@uri ||= URI.parse(url)
|
19
|
+
end
|
20
|
+
|
21
|
+
def uri_response
|
22
|
+
@uri_response ||= open(uri)
|
23
|
+
end
|
24
|
+
|
25
|
+
# Returns
|
26
|
+
# :base_url - the actual url (resolved after possible redirect) as a String
|
27
|
+
# :content_type - mime type
|
28
|
+
# :charset - returns a charset parameter in Content-Type field. It is downcased for canonicalization.
|
29
|
+
# :content_encoding - returns a list of encodings in Content-Encoding field as an Array of String. The encodings are downcased for canonicalization.
|
30
|
+
# :headers - raw response header metadata
|
31
|
+
# (excluded since this generally returns not RFC 2616 compliant date :last_modified - returns a Time which represents Last-Modified field.
|
32
|
+
def uri_response_attribute(name)
|
33
|
+
case name
|
34
|
+
when :base_url
|
35
|
+
(uri_response_attribute(:base_uri) || url).to_s
|
36
|
+
when :headers
|
37
|
+
uri_response_attribute(:meta)
|
38
|
+
else
|
39
|
+
uri_response && uri_response.respond_to?(name) && uri_response.send(name)
|
40
|
+
end
|
41
|
+
end
|
42
|
+
protected :uri_response_attribute
|
43
|
+
|
44
|
+
def uri_response_attributes
|
45
|
+
[:content_type,:base_url,:charset,:content_encoding,:headers]
|
46
|
+
end
|
47
|
+
protected :uri_response_attributes
|
48
|
+
|
49
|
+
def scraper
|
50
|
+
@scraper ||= Scraper.for(url,self)
|
51
|
+
end
|
52
|
+
|
53
|
+
def response
|
54
|
+
scraper && scraper.response
|
55
|
+
end
|
56
|
+
|
57
|
+
# Returns a hash of link meta data, including:
|
58
|
+
# :title, :description, :image (all attributes)
|
59
|
+
# :base_url
|
60
|
+
def metadata
|
61
|
+
data = oembed || {}
|
62
|
+
attributes.each do |attribute|
|
63
|
+
if attribute_value = self.send(attribute)
|
64
|
+
data[attribute] ||= attribute_value
|
65
|
+
end
|
66
|
+
end
|
67
|
+
data
|
68
|
+
end
|
69
|
+
|
70
|
+
# Dispatch missing methods if a match for:
|
71
|
+
# - uri_response_attributes
|
72
|
+
# - scraper attributes
|
73
|
+
def method_missing(method, *args)
|
74
|
+
if uri_response_attributes.include?(method)
|
75
|
+
return uri_response_attribute(method)
|
76
|
+
elsif scraper && scraper.has_attribute?(method)
|
77
|
+
return scraper.attribute(method)
|
78
|
+
end
|
79
|
+
super
|
80
|
+
end
|
81
|
+
|
82
|
+
# Returns a full array of attributes available for the link
|
83
|
+
def attributes
|
84
|
+
scraper.attributes.keys + uri_response_attributes + [:feed]
|
85
|
+
end
|
86
|
+
|
87
|
+
# Returns the options to be used for oembed
|
88
|
+
def oembed_options
|
89
|
+
{ :maxwidth => "560", :maxheight => "315" }.merge(options[:oembed]||{})
|
90
|
+
end
|
91
|
+
|
92
|
+
# Returns the oembed meta data hash for the URL (or nil if not defined/available) e.g.
|
93
|
+
# For http://www.youtube.com/watch?v=g3DCEcSlfhw:
|
94
|
+
# {
|
95
|
+
# "provider_url"=>"http://www.youtube.com/",
|
96
|
+
# "thumbnail_url"=>"http://i4.ytimg.com/vi/g3DCEcSlfhw/hqdefault.jpg",
|
97
|
+
# "title"=>"'Virtuosos of Guitar 2008' festival, Moscow. Marcin Dylla",
|
98
|
+
# "html"=>"<iframe width=\"459\" height=\"344\" src=\"http://www.youtube.com/embed/g3DCEcSlfhw?fs=1&feature=oembed\" frameborder=\"0\" allowfullscreen></iframe>",
|
99
|
+
# "author_name"=>"guitarmagnet",
|
100
|
+
# "height"=>344,
|
101
|
+
# "thumbnail_width"=>480,
|
102
|
+
# "width"=>459,
|
103
|
+
# "version"=>"1.0",
|
104
|
+
# "author_url"=>"http://www.youtube.com/user/guitarmagnet",
|
105
|
+
# "provider_name"=>"YouTube",
|
106
|
+
# "type"=>"video",
|
107
|
+
# "thumbnail_height"=>360
|
108
|
+
# }
|
109
|
+
#
|
110
|
+
# +options+ defines a custom oembed options hash and will cause a re-fetch of the oembed metadata
|
111
|
+
def oembed(options=nil)
|
112
|
+
if options # use custom options, refetch oembed metadata
|
113
|
+
@options[:oembed] = options
|
114
|
+
@oembed = nil
|
115
|
+
end
|
116
|
+
begin
|
117
|
+
@oembed ||= if h = Oembedr.fetch(base_url, :params => oembed_options).body
|
118
|
+
h.keys.each do |key| # symbolize_keys!
|
119
|
+
h[(key.to_sym rescue key) || key] = h.delete(key)
|
120
|
+
end
|
121
|
+
h
|
122
|
+
end
|
123
|
+
rescue
|
124
|
+
end
|
125
|
+
@oembed
|
126
|
+
end
|
127
|
+
|
128
|
+
# Returns the oembed code for the url (or nil if not defined/available)
|
129
|
+
def oembed_html
|
130
|
+
oembed && oembed[:html]
|
131
|
+
end
|
132
|
+
|
133
|
+
# Returns true if there is an ATOM or RSS feed associated with this URL.
|
134
|
+
def has_feed?
|
135
|
+
!feed.nil?
|
136
|
+
end
|
137
|
+
|
138
|
+
# Returns the feed URL associated with this URL.
|
139
|
+
# Returns RSS by default, or ATOM if +prefer+ is not :rss.
|
140
|
+
def feed(prefer = :rss)
|
141
|
+
rss = rss_feed
|
142
|
+
atom = atom_feed
|
143
|
+
if rss && atom
|
144
|
+
prefer == :rss ? rss : atom
|
145
|
+
else
|
146
|
+
rss || atom
|
147
|
+
end
|
148
|
+
end
|
149
|
+
|
150
|
+
class << self
|
151
|
+
|
152
|
+
def [](url)
|
153
|
+
new(url)
|
154
|
+
end
|
155
|
+
|
156
|
+
end
|
157
|
+
|
158
|
+
end
|