open_graph_reader 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitmodules +3 -0
- data/.rspec +2 -0
- data/.yardopts +1 -0
- data/lib/open_graph_reader.rb +83 -0
- data/lib/open_graph_reader/base.rb +57 -0
- data/lib/open_graph_reader/builder.rb +100 -0
- data/lib/open_graph_reader/definitions.rb +333 -0
- data/lib/open_graph_reader/fetcher.rb +82 -0
- data/lib/open_graph_reader/object.rb +95 -0
- data/lib/open_graph_reader/object/dsl.rb +130 -0
- data/lib/open_graph_reader/object/dsl/types.rb +71 -0
- data/lib/open_graph_reader/object/registry.rb +54 -0
- data/lib/open_graph_reader/parser.rb +85 -0
- data/lib/open_graph_reader/parser/graph.rb +136 -0
- data/lib/open_graph_reader/version.rb +4 -0
- data/spec/fixtures/examples/apple-touch-icon-precomposed.png +0 -0
- data/spec/fixtures/examples/apple-touch-icon.png +0 -0
- data/spec/fixtures/examples/article-offset.html +25 -0
- data/spec/fixtures/examples/article-utc.html +25 -0
- data/spec/fixtures/examples/article.html +25 -0
- data/spec/fixtures/examples/audio-array.html +27 -0
- data/spec/fixtures/examples/audio-url.html +25 -0
- data/spec/fixtures/examples/audio.html +24 -0
- data/spec/fixtures/examples/book-isbn10.html +27 -0
- data/spec/fixtures/examples/book.html +27 -0
- data/spec/fixtures/examples/canadian.html +16 -0
- data/spec/fixtures/examples/error.html +17 -0
- data/spec/fixtures/examples/errors/article-date.html +25 -0
- data/spec/fixtures/examples/errors/book-author.html +27 -0
- data/spec/fixtures/examples/errors/book.html +27 -0
- data/spec/fixtures/examples/errors/gender.html +20 -0
- data/spec/fixtures/examples/errors/geo.html +23 -0
- data/spec/fixtures/examples/errors/type.html +16 -0
- data/spec/fixtures/examples/errors/video-duration.html +42 -0
- data/spec/fixtures/examples/favicon.ico +0 -0
- data/spec/fixtures/examples/filters/xss-image.html +15 -0
- data/spec/fixtures/examples/image-array.html +26 -0
- data/spec/fixtures/examples/image-toosmall.html +24 -0
- data/spec/fixtures/examples/image-url.html +22 -0
- data/spec/fixtures/examples/image.html +21 -0
- data/spec/fixtures/examples/index.html +67 -0
- data/spec/fixtures/examples/media/audio/1khz.mp3 +0 -0
- data/spec/fixtures/examples/media/audio/250hz.mp3 +0 -0
- data/spec/fixtures/examples/media/images/1.png +0 -0
- data/spec/fixtures/examples/media/images/50.png +0 -0
- data/spec/fixtures/examples/media/images/75.png +0 -0
- data/spec/fixtures/examples/media/images/icon.png +0 -0
- data/spec/fixtures/examples/media/images/logo.png +0 -0
- data/spec/fixtures/examples/media/images/train.jpg +0 -0
- data/spec/fixtures/examples/media/video/train.flv +0 -0
- data/spec/fixtures/examples/media/video/train.mp4 +0 -0
- data/spec/fixtures/examples/media/video/train.webm +0 -0
- data/spec/fixtures/examples/min.html +14 -0
- data/spec/fixtures/examples/nomedia.html +20 -0
- data/spec/fixtures/examples/plain.html +10 -0
- data/spec/fixtures/examples/profile.html +25 -0
- data/spec/fixtures/examples/required.html +20 -0
- data/spec/fixtures/examples/robots.txt +4 -0
- data/spec/fixtures/examples/sitemap.xml +23 -0
- data/spec/fixtures/examples/video-array.html +36 -0
- data/spec/fixtures/examples/video-movie.html +42 -0
- data/spec/fixtures/examples/video.html +26 -0
- data/spec/integration/invalid_examples_spec.rb +69 -0
- data/spec/integration/valid_examples_spec.rb +76 -0
- data/spec/open_graph_reader_spec.rb +94 -0
- data/spec/spec_helper.rb +35 -0
- metadata +247 -0
@@ -0,0 +1,82 @@
|
|
1
|
+
require 'faraday'
|
2
|
+
|
3
|
+
module OpenGraphReader
|
4
|
+
# Fetch an URI to retrieve its HTML body, if available.
|
5
|
+
#
|
6
|
+
# @api private
|
7
|
+
class Fetcher
|
8
|
+
# Create a new fetcher.
|
9
|
+
#
|
10
|
+
# @param [URI] uri the URI to fetch.
|
11
|
+
def initialize uri
|
12
|
+
raise ArgumentError, "url needs to be an instance of URI" unless uri.is_a? URI
|
13
|
+
@uri = uri
|
14
|
+
@connection = Faraday.default_connection.dup
|
15
|
+
|
16
|
+
if defined? FaradayMiddleware
|
17
|
+
unless @connection.builder.handlers.include? FaradayMiddleware::FollowRedirects
|
18
|
+
@connection.builder.insert(0, FaradayMiddleware::FollowRedirects)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
# The URL to fetch
|
24
|
+
#
|
25
|
+
# @return [String]
|
26
|
+
def url
|
27
|
+
@uri.to_s
|
28
|
+
end
|
29
|
+
|
30
|
+
# Fetch the full page.
|
31
|
+
#
|
32
|
+
# @return [Faraday::Response]
|
33
|
+
def fetch
|
34
|
+
@get_response = @connection.get(@uri)
|
35
|
+
end
|
36
|
+
alias_method :fetch_body, :fetch
|
37
|
+
|
38
|
+
# Fetch just the headers
|
39
|
+
#
|
40
|
+
# @return [Faraday::Response]
|
41
|
+
def fetch_headers
|
42
|
+
@head_response = @connection.head(@uri)
|
43
|
+
end
|
44
|
+
|
45
|
+
# Retrieve the body
|
46
|
+
#
|
47
|
+
# @todo Custom error class
|
48
|
+
# @raise [ArgumentError] The received content does not seems to be HTML.
|
49
|
+
# @return [String]
|
50
|
+
def body
|
51
|
+
fetch_body unless fetched?
|
52
|
+
raise ArgumentError, "Did not receive a HTML site at #{@uri}" unless html?
|
53
|
+
@get_response.body
|
54
|
+
end
|
55
|
+
|
56
|
+
# Whether the target URI seems to return HTML
|
57
|
+
#
|
58
|
+
# @return [Bool]
|
59
|
+
def html?
|
60
|
+
fetch_headers unless fetched_headers?
|
61
|
+
response = @get_response || @head_response
|
62
|
+
return false unless response.success?
|
63
|
+
return false unless response['content-type']
|
64
|
+
response['content-type'].include? 'text/html'
|
65
|
+
end
|
66
|
+
|
67
|
+
# Whether the target URI was fetched.
|
68
|
+
#
|
69
|
+
# @return [Bool]
|
70
|
+
def fetched?
|
71
|
+
!@get_response.nil?
|
72
|
+
end
|
73
|
+
alias_method :fetched_body?, :fetched?
|
74
|
+
|
75
|
+
# Whether the headers of the target URI were fetched.
|
76
|
+
#
|
77
|
+
# @return [Bool]
|
78
|
+
def fetched_headers?
|
79
|
+
!@get_response.nil? || !@head_response.nil?
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
@@ -0,0 +1,95 @@
|
|
1
|
+
require 'open_graph_reader/object/registry'
|
2
|
+
require 'open_graph_reader/object/dsl'
|
3
|
+
require 'open_graph_reader/object/dsl/types'
|
4
|
+
|
5
|
+
module OpenGraphReader
|
6
|
+
|
7
|
+
# This module provides the base functionality for all OpenGraph objects
|
8
|
+
# and makes the {DSL} methods for describing them available when included.
|
9
|
+
#
|
10
|
+
# @example Define a new object
|
11
|
+
# class MyObject
|
12
|
+
# include OpenGraphReader::Object
|
13
|
+
#
|
14
|
+
# namespace :my, :object
|
15
|
+
# content :string
|
16
|
+
# string :name, required: true
|
17
|
+
# end
|
18
|
+
module Object
|
19
|
+
# @private
|
20
|
+
def self.included base
|
21
|
+
base.extend DSL
|
22
|
+
end
|
23
|
+
|
24
|
+
# If the namespace this object represents had a value, it is available here
|
25
|
+
# @return [String, nil]
|
26
|
+
attr_reader :content
|
27
|
+
|
28
|
+
# Regular properties on this object
|
29
|
+
#
|
30
|
+
# @api private
|
31
|
+
# @return [{String => String, Object}]
|
32
|
+
attr_reader :properties
|
33
|
+
|
34
|
+
# Properties on this object that are arrays.
|
35
|
+
#
|
36
|
+
# @api private
|
37
|
+
# @return [{String => Array<String, Object>}]
|
38
|
+
attr_reader :children
|
39
|
+
|
40
|
+
|
41
|
+
# Create a new object. If your class overrides this don't forget to call <tt>super</tt>.
|
42
|
+
def initialize
|
43
|
+
@properties = {}
|
44
|
+
@children = Hash.new {|h, k| h[k] = [] }
|
45
|
+
end
|
46
|
+
|
47
|
+
# Whether this object has the given property
|
48
|
+
#
|
49
|
+
# @param [#to_s] name
|
50
|
+
# @return [Bool]
|
51
|
+
def has_property? name
|
52
|
+
self.class.available_properties.include? name.to_s
|
53
|
+
end
|
54
|
+
|
55
|
+
# Set the content for this object in case it is also a property on
|
56
|
+
# another object. If a processor is defined, it will be called.
|
57
|
+
#
|
58
|
+
# @api private
|
59
|
+
# @param [String] value
|
60
|
+
def content= value
|
61
|
+
value = self.class.content_processor.call(value)
|
62
|
+
@content = value
|
63
|
+
end
|
64
|
+
|
65
|
+
# Get a property on this object.
|
66
|
+
#
|
67
|
+
# @api private
|
68
|
+
# @param [#to_s] name
|
69
|
+
# @todo right error?
|
70
|
+
# @raise [InvalidObjectError] If the requested property is undefined.
|
71
|
+
# @return [String, Object]
|
72
|
+
def [] name
|
73
|
+
raise InvalidObjectError, "Undefined property #{name} on #{inspect}" unless has_property? name
|
74
|
+
properties[name.to_s]
|
75
|
+
end
|
76
|
+
|
77
|
+
# Set the property to the given value.
|
78
|
+
#
|
79
|
+
# @api private
|
80
|
+
# @param [#to_s] name
|
81
|
+
# @param [String, Object] value
|
82
|
+
# @raise [InvalidObjectError] If the requested property is undefined.
|
83
|
+
def []= name, value
|
84
|
+
raise InvalidObjectError, "Undefined property #{name} on #{inspect}" unless has_property? name
|
85
|
+
public_send "#{name}=", value
|
86
|
+
end
|
87
|
+
|
88
|
+
# Returns {#content} if available.
|
89
|
+
#
|
90
|
+
# @return [String]
|
91
|
+
def to_s
|
92
|
+
content || super
|
93
|
+
end
|
94
|
+
end
|
95
|
+
end
|
@@ -0,0 +1,130 @@
|
|
1
|
+
require 'open_graph_reader/object/registry'
|
2
|
+
|
3
|
+
module OpenGraphReader
|
4
|
+
module Object
|
5
|
+
# This module provides the methods to define new types and properties,
|
6
|
+
# as well as setting other metadata necessary to describe an object, such
|
7
|
+
# as its namespace.
|
8
|
+
module DSL
|
9
|
+
# @!macro define_type_description
|
10
|
+
# @param [Symbol] name the name of the property in the current namespace
|
11
|
+
# @param [{Symbol => Bool, Class, Array<String>}] options additional options
|
12
|
+
# @option options [Bool] :required (false) Make the property required.
|
13
|
+
# @option options [Bool] :collection (false) This property can occur multiple times.
|
14
|
+
# @option options [Class] :to This property maps to the given object (optional).
|
15
|
+
# belongs to the given verticals of the object (optional).
|
16
|
+
# @option options [Array<String>] :verticials This property
|
17
|
+
#
|
18
|
+
# @!macro property
|
19
|
+
# @!attribute [rw] $1
|
20
|
+
|
21
|
+
# @!macro [attach] define_type
|
22
|
+
# @!method $1(name, options={})
|
23
|
+
# @!macro define_type_description
|
24
|
+
#
|
25
|
+
# Defines a new DSL method for modeling a new type
|
26
|
+
#
|
27
|
+
# @yield convert and validate
|
28
|
+
# @yieldparam [::Object] value the value to be converted and validated
|
29
|
+
# @yieldparam [Array<::Object>] *args any additional arguments
|
30
|
+
# @yieldparam [{Symbol => Bool, Class, Array<String>}] options the options hash as last parameter
|
31
|
+
def self.define_type(name, &processor)
|
32
|
+
processors[name] = processor
|
33
|
+
|
34
|
+
define_method(name) do |name, *args|
|
35
|
+
available_properties << name.to_s
|
36
|
+
options = args.pop if args.last.is_a? Hash
|
37
|
+
options ||= {}
|
38
|
+
|
39
|
+
Registry.register [@namespace, name].join(':'), options[:to] if options[:to]
|
40
|
+
|
41
|
+
if options[:verticals]
|
42
|
+
options[:verticals].each do |vertical|
|
43
|
+
verticals[[@namespace, vertical].join('.')] << name
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
if options[:collection]
|
48
|
+
define_method("#{name}s") do
|
49
|
+
children[name.to_s]
|
50
|
+
end
|
51
|
+
|
52
|
+
define_method(name) do
|
53
|
+
# TODO raise if required
|
54
|
+
value = children[name.to_s].first
|
55
|
+
# TODO: figure out a sane way to distinguish subobject properties
|
56
|
+
value.content if value && value.is_a?(Object)
|
57
|
+
value || options[:default]
|
58
|
+
end
|
59
|
+
else
|
60
|
+
define_method(name) do
|
61
|
+
# TODO raise if required
|
62
|
+
properties[name.to_s] || options[:default]
|
63
|
+
end
|
64
|
+
|
65
|
+
define_method("#{name}=") do |value|
|
66
|
+
# TODO: figure out a sane way to distinguish subobject properties
|
67
|
+
value = processor.call(value, *args, options) unless value.is_a? Object
|
68
|
+
properties[name.to_s] = value
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
73
|
+
singleton_class.send(:alias_method, :define_type_with_args, :define_type)
|
74
|
+
|
75
|
+
# @overload namespace
|
76
|
+
# Get the namespace of this object.
|
77
|
+
#
|
78
|
+
# @return [String] A colon separated namespace, for example <tt>og:image</tt>.
|
79
|
+
# @overload namespace(*names)
|
80
|
+
# Set the namespace of this object.
|
81
|
+
#
|
82
|
+
# @param [Array<#to_s>] *names The individual parts of the namespace as list
|
83
|
+
# @example
|
84
|
+
# namespace :og, :image
|
85
|
+
def namespace *names
|
86
|
+
return @namespace if names.empty?
|
87
|
+
@namespace = names.join(':')
|
88
|
+
Registry.register @namespace, self
|
89
|
+
end
|
90
|
+
|
91
|
+
# Set the type for the content attribute
|
92
|
+
#
|
93
|
+
# @param [Symbol] type one of the registered types.
|
94
|
+
def content type
|
95
|
+
@content_processor = DSL.processors[type]
|
96
|
+
end
|
97
|
+
|
98
|
+
# The list of defined properties on this object.
|
99
|
+
#
|
100
|
+
# @return [Array<String>]
|
101
|
+
def available_properties
|
102
|
+
@available_properties ||= []
|
103
|
+
end
|
104
|
+
|
105
|
+
# A map from type names to processing blocks.
|
106
|
+
#
|
107
|
+
# @api private
|
108
|
+
# @return [{Symbol => Proc}]
|
109
|
+
def self.processors
|
110
|
+
@processors ||= {}
|
111
|
+
end
|
112
|
+
|
113
|
+
# The processor for the content attribute.
|
114
|
+
#
|
115
|
+
# @api private
|
116
|
+
# @return [Proc]
|
117
|
+
def content_processor
|
118
|
+
@content_processor || proc {|value| value }
|
119
|
+
end
|
120
|
+
|
121
|
+
# A map from vertical names to attributes that belong to them.
|
122
|
+
#
|
123
|
+
# @api private
|
124
|
+
# @return [{String => Array<Strin>}]
|
125
|
+
def verticals
|
126
|
+
@verticals ||= Hash.new {|h, k| h[k] = [] }
|
127
|
+
end
|
128
|
+
end
|
129
|
+
end
|
130
|
+
end
|
@@ -0,0 +1,71 @@
|
|
1
|
+
require 'date'
|
2
|
+
|
3
|
+
require 'open_graph_reader/object/dsl'
|
4
|
+
|
5
|
+
module OpenGraphReader
|
6
|
+
module Object
|
7
|
+
module DSL
|
8
|
+
# @see http://ogp.me/#string
|
9
|
+
define_type :string do |value|
|
10
|
+
value.to_s
|
11
|
+
end
|
12
|
+
|
13
|
+
# @see http://ogp.me/#url
|
14
|
+
define_type :url do |value|
|
15
|
+
value.to_s.tap {|value|
|
16
|
+
unless value.start_with?('http://') || value.start_with?('https://')
|
17
|
+
raise InvalidObjectError, "URL #{value.inspect} does not start with http:// or https://"
|
18
|
+
end
|
19
|
+
}
|
20
|
+
end
|
21
|
+
|
22
|
+
# @!method enum(name, allowed, options={})
|
23
|
+
# @param [Array<String>] allowed the list of allowed values
|
24
|
+
# @!macro define_type_description
|
25
|
+
# @see http://ogp.me/#enum
|
26
|
+
define_type_with_args :enum do |value, allowed|
|
27
|
+
unless allowed.include? value
|
28
|
+
raise InvalidObjectError, "Expected one of #{allowed.inspect} but was #{value.inspect}"
|
29
|
+
end
|
30
|
+
|
31
|
+
value.to_s
|
32
|
+
end
|
33
|
+
|
34
|
+
# @see http://ogp.me/#integer
|
35
|
+
define_type :integer do |value|
|
36
|
+
begin
|
37
|
+
Integer(value)
|
38
|
+
rescue ArgumentError => e
|
39
|
+
raise InvalidObjectError, "Integer expected, but was #{value.inspect}"
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
# @see http://ogp.me/#datetime
|
44
|
+
define_type :datetime do |value|
|
45
|
+
begin
|
46
|
+
DateTime.iso8601 value
|
47
|
+
rescue ArgumentError => e
|
48
|
+
raise InvalidObjectError, "ISO8601 datetime expected, but was #{value.inspect}"
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
# @see http://ogp.me/#bool
|
53
|
+
define_type :boolean do |value|
|
54
|
+
{'true' => true, 'false' => false, '1' => true, '0' => false}[value].tap {|bool|
|
55
|
+
if bool.nil?
|
56
|
+
raise InvalidObjectError, "Boolean expected, but was #{value.inspect}"
|
57
|
+
end
|
58
|
+
}
|
59
|
+
end
|
60
|
+
|
61
|
+
# @see http://ogp.me/#float
|
62
|
+
define_type :float do |value|
|
63
|
+
begin
|
64
|
+
Float(value)
|
65
|
+
rescue ArgumentError => e
|
66
|
+
raise InvalidObjectError, "Float expected, but was #{value.inspect}"
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
@@ -0,0 +1,54 @@
|
|
1
|
+
require 'singleton'
|
2
|
+
require 'forwardable'
|
3
|
+
|
4
|
+
module OpenGraphReader
|
5
|
+
module Object
|
6
|
+
# Global registry of namespaces and their representing classes.
|
7
|
+
#
|
8
|
+
# @api private
|
9
|
+
class Registry
|
10
|
+
extend Forwardable
|
11
|
+
include Singleton
|
12
|
+
|
13
|
+
class << self
|
14
|
+
extend Forwardable
|
15
|
+
# @!method register(namespace, klass)
|
16
|
+
# Register a new namespace in the registry.
|
17
|
+
#
|
18
|
+
# @param [String] namespace The namespace in colon separated form, for example <tt>og:image</tt>.
|
19
|
+
# @param [Class] klass The class to register. It should include {Object}.
|
20
|
+
# @api private
|
21
|
+
#
|
22
|
+
# @!method registered?(namespace)
|
23
|
+
# Check whether a namespace is registered.
|
24
|
+
#
|
25
|
+
# @param [String] namespace The namespace in colon separated form, for example <tt>og:image</tt>.
|
26
|
+
# @return [Bool]
|
27
|
+
# @api private
|
28
|
+
#
|
29
|
+
# @!method [](namespace)
|
30
|
+
# Fetch the class associated with the given namespace
|
31
|
+
#
|
32
|
+
# @param [String] namespace The namespace in colon separated form, for example <tt>og:image</tt>.
|
33
|
+
# @return [Class] The matching class.
|
34
|
+
# @raise [ArgumentError] If the given namespace wasn't registered.
|
35
|
+
# @api private
|
36
|
+
def_delegators :instance, :register, :registered?, :[]
|
37
|
+
end
|
38
|
+
|
39
|
+
def_delegators :@namespaces, :[]=, :has_key?
|
40
|
+
alias_method :register, :[]=
|
41
|
+
alias_method :registered?, :has_key?
|
42
|
+
|
43
|
+
def initialize
|
44
|
+
@namespaces = {}
|
45
|
+
end
|
46
|
+
|
47
|
+
# @see Registry.[]
|
48
|
+
def [] namespace
|
49
|
+
raise ArgumentError, "#{namespace} is not a registered namespace" unless registered? namespace
|
50
|
+
@namespaces[namespace]
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
@@ -0,0 +1,85 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
|
3
|
+
require 'open_graph_reader/parser/graph'
|
4
|
+
|
5
|
+
module OpenGraphReader
|
6
|
+
# Parse OpenGraph tags in a HTML document into a graph.
|
7
|
+
#
|
8
|
+
# @api private
|
9
|
+
class Parser
|
10
|
+
# Namespaces found in the passed documents head tag
|
11
|
+
#
|
12
|
+
# @return [Array<String>]
|
13
|
+
attr_reader :additional_namespaces
|
14
|
+
|
15
|
+
# Create a new parser.
|
16
|
+
#
|
17
|
+
# @param [#to_s, Nokogiri::XML::Node] html the document to parse.
|
18
|
+
# @param [String] origin The source the document was obtained from.
|
19
|
+
def initialize html, origin=nil
|
20
|
+
@doc = to_doc html
|
21
|
+
@origin = origin
|
22
|
+
@additional_namespaces = []
|
23
|
+
end
|
24
|
+
|
25
|
+
# Whether there are any OpenGraph tags at all.
|
26
|
+
#
|
27
|
+
# @return [Bool]
|
28
|
+
def has_tags?
|
29
|
+
!graph.empty?
|
30
|
+
end
|
31
|
+
|
32
|
+
# Build and return the {Graph}.
|
33
|
+
#
|
34
|
+
# @return [Graph]
|
35
|
+
def graph
|
36
|
+
@graph ||= build_graph
|
37
|
+
end
|
38
|
+
|
39
|
+
private
|
40
|
+
|
41
|
+
def build_graph
|
42
|
+
graph = Graph.new
|
43
|
+
head = @doc.xpath('/html/head').first
|
44
|
+
|
45
|
+
raise NoOpenGraphDataError, "There's no head tag in #{@doc}" unless head
|
46
|
+
|
47
|
+
condition = "starts-with(@property, 'og:')"
|
48
|
+
if head['prefix']
|
49
|
+
@additional_namespaces = head['prefix'].scan(/(\w+):\s*([^ ]+)/).map(&:first)
|
50
|
+
@additional_namespaces.each do |additional_namespace|
|
51
|
+
next if additional_namespace == 'og'
|
52
|
+
condition << " or starts-with(@property, '#{additional_namespace}')"
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
head.xpath("meta[#{condition}]").each do |tag|
|
57
|
+
*path, leaf = tag['property'].split(':')
|
58
|
+
node = path.inject(graph.root) {|node, name|
|
59
|
+
child = node.children.reverse.find {|child| child.name == name }
|
60
|
+
|
61
|
+
unless child
|
62
|
+
child = Graph::Node.new name
|
63
|
+
node << child
|
64
|
+
end
|
65
|
+
|
66
|
+
child
|
67
|
+
}
|
68
|
+
|
69
|
+
# TODO: make stripping configurable?
|
70
|
+
node << Graph::Node.new(leaf, tag['content'].strip)
|
71
|
+
end
|
72
|
+
|
73
|
+
graph
|
74
|
+
end
|
75
|
+
|
76
|
+
def to_doc html
|
77
|
+
case html
|
78
|
+
when Nokogiri::XML::Node
|
79
|
+
html
|
80
|
+
else
|
81
|
+
Nokogiri::HTML.parse(html.to_s)
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|