opengraph_parser 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2012 Huy Ha
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,45 @@
1
+ = OpengraphParser
2
+
3
+ OpengraphParser is a simple Ruby library for parsing Open Graph protocol information from a web site. Learn more about the protocol at:
4
+ http://ogp.me
5
+
6
+ == Installation
7
+ gem install opengraph_parser
8
+
9
+ or add to Gemfile
10
+
11
+ gem "opengraph_parser"
12
+
13
+ == Usage
14
+ og = OpenGraph.new("http://ogp.me")
15
+ og.title # => "Open Graph protocol"
16
+ og.type # => "website"
17
+ og.url # => "http://ogp.me/"
18
+ og.description # => "The Open Graph protocol enables any web page to become a rich object in a social graph."
19
+ og.images # => ["http://ogp.me/logo.png"]
20
+
21
+ You can also get other Open Graph metadata as:
22
+ og.metadata # => {"og:image:type"=>"image/png", "og:image:width"=>"300", "og:image:height"=>"300"}
23
+
24
+ If you try to parse Open Graph information for a website that doesn’t have any Open Graph metadata, the library will try to find other information in the website as the following rules:
25
+ <title> for title
26
+ <meta name="description"> for description
27
+ <link rel="image_src"> or all <img> tags for images
28
+
29
+ You can disable this fallback lookup by passing false to init method:
30
+ og = OpenGraph.new("http://ogp.me", false)
31
+
32
+ == Contributing to opengraph_parser
33
+
34
+ * Check out the latest master to make sure the feature hasn't been implemented or the bug hasn't been fixed yet.
35
+ * Check out the issue tracker to make sure someone already hasn't requested it and/or contributed it.
36
+ * Fork the project.
37
+ * Start a feature/bugfix branch.
38
+ * Commit and push until you are happy with your contribution.
39
+ * Make sure to add tests for it. This is important so I don't break it in a future version unintentionally.
40
+ * Please try not to mess with the Rakefile, version, or history. If you want to have your own version, or is otherwise necessary, that is fine, but please isolate to its own commit so I can cherry-pick around it.
41
+
42
+ == Copyright
43
+
44
+ Copyright (c) 2012 Huy Ha. See LICENSE.txt for
45
+ further details.
@@ -0,0 +1,85 @@
1
+ require 'nokogiri'
2
+ require 'redirect_follower'
3
+
4
+ class OpenGraph
5
+ attr_accessor :src, :url, :type, :title, :description, :images, :metadata, :response
6
+
7
+ def initialize(src, fallback = true)
8
+ @src = src
9
+ @images = []
10
+ @metadata = {}
11
+ parse_opengraph
12
+ load_fallback if fallback
13
+ check_images_path
14
+ end
15
+
16
+ def parse_opengraph
17
+ begin
18
+ @response = RedirectFollower.new(@src).resolve
19
+ rescue
20
+ @title = @url = @src
21
+ return
22
+ end
23
+
24
+ if @response && @response.body
25
+ attrs_list = %w(title url type description)
26
+ doc = Nokogiri.parse(@response.body)
27
+ doc.css('meta').each do |m|
28
+ if m.attribute('property') && m.attribute('property').to_s.match(/^og:(.+)$/i)
29
+ m_content = m.attribute('content').to_s.strip
30
+ case metadata_name = m.attribute('property').to_s.gsub("og:", "")
31
+ when *attrs_list
32
+ self.instance_variable_set("@#{metadata_name}", m_content) unless m_content.empty?
33
+ when "image"
34
+ add_image(m_content)
35
+ else
36
+ @metadata[m.attribute('property').to_s] = m_content
37
+ end
38
+ end
39
+ end
40
+ end
41
+ end
42
+
43
+ def load_fallback
44
+ if @response && @response.body
45
+ doc = Nokogiri.parse(@response.body)
46
+
47
+ if @title.to_s.empty? && doc.xpath("//head/title").size > 0
48
+ @title = doc.xpath("//head/title").first.text.to_s.strip
49
+ end
50
+
51
+ @url = @src if @url.to_s.empty?
52
+
53
+ if @description.to_s.empty? && description_meta = doc.xpath("//head/meta[@name='description']").first
54
+ @description = description_meta.attribute("content").to_s.strip
55
+ end
56
+
57
+ fetch_images(doc, "//head/link[@rel='image_src']", "href") if @images.empty?
58
+ fetch_images(doc, "//img", "src") if @images.empty?
59
+ end
60
+ end
61
+
62
+ def check_images_path
63
+ uri = URI.parse(URI.escape(@src))
64
+ imgs = @images.dup
65
+ @images = []
66
+ imgs.each do |img|
67
+ if URI.parse(URI.escape(img)).host.nil?
68
+ add_image("#{uri.scheme}://#{uri.host}:#{uri.port}#{img}")
69
+ else
70
+ add_image(img)
71
+ end
72
+ end
73
+ end
74
+
75
+ private
76
+ def add_image(image_url)
77
+ @images << image_url unless @images.include?(image_url) || image_url.to_s.empty?
78
+ end
79
+
80
+ def fetch_images(doc, xpath_str, attr)
81
+ doc.xpath(xpath_str).each do |link|
82
+ add_image(link.attribute(attr).to_s.strip)
83
+ end
84
+ end
85
+ end
@@ -0,0 +1 @@
1
+ require 'open_graph'
@@ -0,0 +1,34 @@
1
+ require 'net/http'
2
+
3
+ class RedirectFollower
4
+ class TooManyRedirects < StandardError; end
5
+
6
+ attr_accessor :url, :body, :redirect_limit, :response
7
+
8
+ def initialize(url, limit = 5)
9
+ @url, @redirect_limit = url, limit
10
+ end
11
+
12
+ def resolve
13
+ raise TooManyRedirects if redirect_limit < 0
14
+
15
+ self.response = Net::HTTP.get_response(URI.parse(URI.escape(url)))
16
+
17
+ if response.kind_of?(Net::HTTPRedirection)
18
+ self.url = redirect_url
19
+ self.redirect_limit -= 1
20
+ resolve
21
+ end
22
+
23
+ self.body = response.body
24
+ self
25
+ end
26
+
27
+ def redirect_url
28
+ if response['location'].nil?
29
+ response.body.match(/<a href=\"([^>]+)\">/i)[1]
30
+ else
31
+ response['location']
32
+ end
33
+ end
34
+ end
metadata ADDED
@@ -0,0 +1,114 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: opengraph_parser
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Huy Ha
9
+ - Duc Trinh
10
+ autorequire:
11
+ bindir: bin
12
+ cert_chain: []
13
+ date: 2012-11-12 00:00:00.000000000 Z
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: rspec
17
+ requirement: &70116370351140 !ruby/object:Gem::Requirement
18
+ none: false
19
+ requirements:
20
+ - - ! '>='
21
+ - !ruby/object:Gem::Version
22
+ version: '0'
23
+ type: :development
24
+ prerelease: false
25
+ version_requirements: *70116370351140
26
+ - !ruby/object:Gem::Dependency
27
+ name: rdoc
28
+ requirement: &70116370350460 !ruby/object:Gem::Requirement
29
+ none: false
30
+ requirements:
31
+ - - ! '>='
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: *70116370350460
37
+ - !ruby/object:Gem::Dependency
38
+ name: bundler
39
+ requirement: &70116370349400 !ruby/object:Gem::Requirement
40
+ none: false
41
+ requirements:
42
+ - - ! '>='
43
+ - !ruby/object:Gem::Version
44
+ version: '0'
45
+ type: :development
46
+ prerelease: false
47
+ version_requirements: *70116370349400
48
+ - !ruby/object:Gem::Dependency
49
+ name: jeweler
50
+ requirement: &70116370365040 !ruby/object:Gem::Requirement
51
+ none: false
52
+ requirements:
53
+ - - ! '>='
54
+ - !ruby/object:Gem::Version
55
+ version: '0'
56
+ type: :development
57
+ prerelease: false
58
+ version_requirements: *70116370365040
59
+ - !ruby/object:Gem::Dependency
60
+ name: nokogiri
61
+ requirement: &70116370364240 !ruby/object:Gem::Requirement
62
+ none: false
63
+ requirements:
64
+ - - ! '>='
65
+ - !ruby/object:Gem::Version
66
+ version: '0'
67
+ type: :development
68
+ prerelease: false
69
+ version_requirements: *70116370364240
70
+ description: A simple Ruby library for parsing Open Graph Protocol information from
71
+ a website. It also includes a fallback solution when the website has no Open Graph
72
+ information.
73
+ email: hhuy424@gmail.com
74
+ executables: []
75
+ extensions: []
76
+ extra_rdoc_files:
77
+ - LICENSE.txt
78
+ - README.rdoc
79
+ files:
80
+ - lib/open_graph.rb
81
+ - lib/opengraph_parser.rb
82
+ - lib/redirect_follower.rb
83
+ - LICENSE.txt
84
+ - README.rdoc
85
+ homepage: http://github.com/huyha85/opengraph_parser
86
+ licenses:
87
+ - MIT
88
+ post_install_message:
89
+ rdoc_options: []
90
+ require_paths:
91
+ - lib
92
+ required_ruby_version: !ruby/object:Gem::Requirement
93
+ none: false
94
+ requirements:
95
+ - - ! '>='
96
+ - !ruby/object:Gem::Version
97
+ version: '0'
98
+ segments:
99
+ - 0
100
+ hash: -2116295098415564887
101
+ required_rubygems_version: !ruby/object:Gem::Requirement
102
+ none: false
103
+ requirements:
104
+ - - ! '>='
105
+ - !ruby/object:Gem::Version
106
+ version: '0'
107
+ requirements: []
108
+ rubyforge_project:
109
+ rubygems_version: 1.8.10
110
+ signing_key:
111
+ specification_version: 3
112
+ summary: A simple Ruby library for parsing Open Graph Protocol information from a
113
+ website.
114
+ test_files: []