open_graph_reader 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitmodules +3 -0
- data/.rspec +2 -0
- data/.yardopts +1 -0
- data/lib/open_graph_reader.rb +83 -0
- data/lib/open_graph_reader/base.rb +57 -0
- data/lib/open_graph_reader/builder.rb +100 -0
- data/lib/open_graph_reader/definitions.rb +333 -0
- data/lib/open_graph_reader/fetcher.rb +82 -0
- data/lib/open_graph_reader/object.rb +95 -0
- data/lib/open_graph_reader/object/dsl.rb +130 -0
- data/lib/open_graph_reader/object/dsl/types.rb +71 -0
- data/lib/open_graph_reader/object/registry.rb +54 -0
- data/lib/open_graph_reader/parser.rb +85 -0
- data/lib/open_graph_reader/parser/graph.rb +136 -0
- data/lib/open_graph_reader/version.rb +4 -0
- data/spec/fixtures/examples/apple-touch-icon-precomposed.png +0 -0
- data/spec/fixtures/examples/apple-touch-icon.png +0 -0
- data/spec/fixtures/examples/article-offset.html +25 -0
- data/spec/fixtures/examples/article-utc.html +25 -0
- data/spec/fixtures/examples/article.html +25 -0
- data/spec/fixtures/examples/audio-array.html +27 -0
- data/spec/fixtures/examples/audio-url.html +25 -0
- data/spec/fixtures/examples/audio.html +24 -0
- data/spec/fixtures/examples/book-isbn10.html +27 -0
- data/spec/fixtures/examples/book.html +27 -0
- data/spec/fixtures/examples/canadian.html +16 -0
- data/spec/fixtures/examples/error.html +17 -0
- data/spec/fixtures/examples/errors/article-date.html +25 -0
- data/spec/fixtures/examples/errors/book-author.html +27 -0
- data/spec/fixtures/examples/errors/book.html +27 -0
- data/spec/fixtures/examples/errors/gender.html +20 -0
- data/spec/fixtures/examples/errors/geo.html +23 -0
- data/spec/fixtures/examples/errors/type.html +16 -0
- data/spec/fixtures/examples/errors/video-duration.html +42 -0
- data/spec/fixtures/examples/favicon.ico +0 -0
- data/spec/fixtures/examples/filters/xss-image.html +15 -0
- data/spec/fixtures/examples/image-array.html +26 -0
- data/spec/fixtures/examples/image-toosmall.html +24 -0
- data/spec/fixtures/examples/image-url.html +22 -0
- data/spec/fixtures/examples/image.html +21 -0
- data/spec/fixtures/examples/index.html +67 -0
- data/spec/fixtures/examples/media/audio/1khz.mp3 +0 -0
- data/spec/fixtures/examples/media/audio/250hz.mp3 +0 -0
- data/spec/fixtures/examples/media/images/1.png +0 -0
- data/spec/fixtures/examples/media/images/50.png +0 -0
- data/spec/fixtures/examples/media/images/75.png +0 -0
- data/spec/fixtures/examples/media/images/icon.png +0 -0
- data/spec/fixtures/examples/media/images/logo.png +0 -0
- data/spec/fixtures/examples/media/images/train.jpg +0 -0
- data/spec/fixtures/examples/media/video/train.flv +0 -0
- data/spec/fixtures/examples/media/video/train.mp4 +0 -0
- data/spec/fixtures/examples/media/video/train.webm +0 -0
- data/spec/fixtures/examples/min.html +14 -0
- data/spec/fixtures/examples/nomedia.html +20 -0
- data/spec/fixtures/examples/plain.html +10 -0
- data/spec/fixtures/examples/profile.html +25 -0
- data/spec/fixtures/examples/required.html +20 -0
- data/spec/fixtures/examples/robots.txt +4 -0
- data/spec/fixtures/examples/sitemap.xml +23 -0
- data/spec/fixtures/examples/video-array.html +36 -0
- data/spec/fixtures/examples/video-movie.html +42 -0
- data/spec/fixtures/examples/video.html +26 -0
- data/spec/integration/invalid_examples_spec.rb +69 -0
- data/spec/integration/valid_examples_spec.rb +76 -0
- data/spec/open_graph_reader_spec.rb +94 -0
- data/spec/spec_helper.rb +35 -0
- metadata +247 -0
@@ -0,0 +1,82 @@
|
|
1
|
+
require 'faraday'
|
2
|
+
|
3
|
+
module OpenGraphReader
|
4
|
+
# Fetch an URI to retrieve its HTML body, if available.
|
5
|
+
#
|
6
|
+
# @api private
|
7
|
+
class Fetcher
|
8
|
+
# Create a new fetcher.
|
9
|
+
#
|
10
|
+
# @param [URI] uri the URI to fetch.
|
11
|
+
def initialize uri
|
12
|
+
raise ArgumentError, "url needs to be an instance of URI" unless uri.is_a? URI
|
13
|
+
@uri = uri
|
14
|
+
@connection = Faraday.default_connection.dup
|
15
|
+
|
16
|
+
if defined? FaradayMiddleware
|
17
|
+
unless @connection.builder.handlers.include? FaradayMiddleware::FollowRedirects
|
18
|
+
@connection.builder.insert(0, FaradayMiddleware::FollowRedirects)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
# The URL to fetch
|
24
|
+
#
|
25
|
+
# @return [String]
|
26
|
+
def url
|
27
|
+
@uri.to_s
|
28
|
+
end
|
29
|
+
|
30
|
+
# Fetch the full page.
|
31
|
+
#
|
32
|
+
# @return [Faraday::Response]
|
33
|
+
def fetch
|
34
|
+
@get_response = @connection.get(@uri)
|
35
|
+
end
|
36
|
+
alias_method :fetch_body, :fetch
|
37
|
+
|
38
|
+
# Fetch just the headers
|
39
|
+
#
|
40
|
+
# @return [Faraday::Response]
|
41
|
+
def fetch_headers
|
42
|
+
@head_response = @connection.head(@uri)
|
43
|
+
end
|
44
|
+
|
45
|
+
# Retrieve the body
|
46
|
+
#
|
47
|
+
# @todo Custom error class
|
48
|
+
# @raise [ArgumentError] The received content does not seems to be HTML.
|
49
|
+
# @return [String]
|
50
|
+
def body
|
51
|
+
fetch_body unless fetched?
|
52
|
+
raise ArgumentError, "Did not receive a HTML site at #{@uri}" unless html?
|
53
|
+
@get_response.body
|
54
|
+
end
|
55
|
+
|
56
|
+
# Whether the target URI seems to return HTML
|
57
|
+
#
|
58
|
+
# @return [Bool]
|
59
|
+
def html?
|
60
|
+
fetch_headers unless fetched_headers?
|
61
|
+
response = @get_response || @head_response
|
62
|
+
return false unless response.success?
|
63
|
+
return false unless response['content-type']
|
64
|
+
response['content-type'].include? 'text/html'
|
65
|
+
end
|
66
|
+
|
67
|
+
# Whether the target URI was fetched.
|
68
|
+
#
|
69
|
+
# @return [Bool]
|
70
|
+
def fetched?
|
71
|
+
!@get_response.nil?
|
72
|
+
end
|
73
|
+
alias_method :fetched_body?, :fetched?
|
74
|
+
|
75
|
+
# Whether the headers of the target URI were fetched.
|
76
|
+
#
|
77
|
+
# @return [Bool]
|
78
|
+
def fetched_headers?
|
79
|
+
!@get_response.nil? || !@head_response.nil?
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
@@ -0,0 +1,95 @@
|
|
1
|
+
require 'open_graph_reader/object/registry'
|
2
|
+
require 'open_graph_reader/object/dsl'
|
3
|
+
require 'open_graph_reader/object/dsl/types'
|
4
|
+
|
5
|
+
module OpenGraphReader
|
6
|
+
|
7
|
+
# This module provides the base functionality for all OpenGraph objects
|
8
|
+
# and makes the {DSL} methods for describing them available when included.
|
9
|
+
#
|
10
|
+
# @example Define a new object
|
11
|
+
# class MyObject
|
12
|
+
# include OpenGraphReader::Object
|
13
|
+
#
|
14
|
+
# namespace :my, :object
|
15
|
+
# content :string
|
16
|
+
# string :name, required: true
|
17
|
+
# end
|
18
|
+
module Object
|
19
|
+
# @private
|
20
|
+
def self.included base
|
21
|
+
base.extend DSL
|
22
|
+
end
|
23
|
+
|
24
|
+
# If the namespace this object represents had a value, it is available here
|
25
|
+
# @return [String, nil]
|
26
|
+
attr_reader :content
|
27
|
+
|
28
|
+
# Regular properties on this object
|
29
|
+
#
|
30
|
+
# @api private
|
31
|
+
# @return [{String => String, Object}]
|
32
|
+
attr_reader :properties
|
33
|
+
|
34
|
+
# Properties on this object that are arrays.
|
35
|
+
#
|
36
|
+
# @api private
|
37
|
+
# @return [{String => Array<String, Object>}]
|
38
|
+
attr_reader :children
|
39
|
+
|
40
|
+
|
41
|
+
# Create a new object. If your class overrides this don't forget to call <tt>super</tt>.
|
42
|
+
def initialize
|
43
|
+
@properties = {}
|
44
|
+
@children = Hash.new {|h, k| h[k] = [] }
|
45
|
+
end
|
46
|
+
|
47
|
+
# Whether this object has the given property
|
48
|
+
#
|
49
|
+
# @param [#to_s] name
|
50
|
+
# @return [Bool]
|
51
|
+
def has_property? name
|
52
|
+
self.class.available_properties.include? name.to_s
|
53
|
+
end
|
54
|
+
|
55
|
+
# Set the content for this object in case it is also a property on
|
56
|
+
# another object. If a processor is defined, it will be called.
|
57
|
+
#
|
58
|
+
# @api private
|
59
|
+
# @param [String] value
|
60
|
+
def content= value
|
61
|
+
value = self.class.content_processor.call(value)
|
62
|
+
@content = value
|
63
|
+
end
|
64
|
+
|
65
|
+
# Get a property on this object.
|
66
|
+
#
|
67
|
+
# @api private
|
68
|
+
# @param [#to_s] name
|
69
|
+
# @todo right error?
|
70
|
+
# @raise [InvalidObjectError] If the requested property is undefined.
|
71
|
+
# @return [String, Object]
|
72
|
+
def [] name
|
73
|
+
raise InvalidObjectError, "Undefined property #{name} on #{inspect}" unless has_property? name
|
74
|
+
properties[name.to_s]
|
75
|
+
end
|
76
|
+
|
77
|
+
# Set the property to the given value.
|
78
|
+
#
|
79
|
+
# @api private
|
80
|
+
# @param [#to_s] name
|
81
|
+
# @param [String, Object] value
|
82
|
+
# @raise [InvalidObjectError] If the requested property is undefined.
|
83
|
+
def []= name, value
|
84
|
+
raise InvalidObjectError, "Undefined property #{name} on #{inspect}" unless has_property? name
|
85
|
+
public_send "#{name}=", value
|
86
|
+
end
|
87
|
+
|
88
|
+
# Returns {#content} if available.
|
89
|
+
#
|
90
|
+
# @return [String]
|
91
|
+
def to_s
|
92
|
+
content || super
|
93
|
+
end
|
94
|
+
end
|
95
|
+
end
|
@@ -0,0 +1,130 @@
|
|
1
|
+
require 'open_graph_reader/object/registry'
|
2
|
+
|
3
|
+
module OpenGraphReader
|
4
|
+
module Object
|
5
|
+
# This module provides the methods to define new types and properties,
|
6
|
+
# as well as setting other metadata necessary to describe an object, such
|
7
|
+
# as its namespace.
|
8
|
+
module DSL
|
9
|
+
# @!macro define_type_description
|
10
|
+
# @param [Symbol] name the name of the property in the current namespace
|
11
|
+
# @param [{Symbol => Bool, Class, Array<String>}] options additional options
|
12
|
+
# @option options [Bool] :required (false) Make the property required.
|
13
|
+
# @option options [Bool] :collection (false) This property can occur multiple times.
|
14
|
+
# @option options [Class] :to This property maps to the given object (optional).
|
15
|
+
# belongs to the given verticals of the object (optional).
|
16
|
+
# @option options [Array<String>] :verticials This property
|
17
|
+
#
|
18
|
+
# @!macro property
|
19
|
+
# @!attribute [rw] $1
|
20
|
+
|
21
|
+
# @!macro [attach] define_type
|
22
|
+
# @!method $1(name, options={})
|
23
|
+
# @!macro define_type_description
|
24
|
+
#
|
25
|
+
# Defines a new DSL method for modeling a new type
|
26
|
+
#
|
27
|
+
# @yield convert and validate
|
28
|
+
# @yieldparam [::Object] value the value to be converted and validated
|
29
|
+
# @yieldparam [Array<::Object>] *args any additional arguments
|
30
|
+
# @yieldparam [{Symbol => Bool, Class, Array<String>}] options the options hash as last parameter
|
31
|
+
def self.define_type(name, &processor)
|
32
|
+
processors[name] = processor
|
33
|
+
|
34
|
+
define_method(name) do |name, *args|
|
35
|
+
available_properties << name.to_s
|
36
|
+
options = args.pop if args.last.is_a? Hash
|
37
|
+
options ||= {}
|
38
|
+
|
39
|
+
Registry.register [@namespace, name].join(':'), options[:to] if options[:to]
|
40
|
+
|
41
|
+
if options[:verticals]
|
42
|
+
options[:verticals].each do |vertical|
|
43
|
+
verticals[[@namespace, vertical].join('.')] << name
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
if options[:collection]
|
48
|
+
define_method("#{name}s") do
|
49
|
+
children[name.to_s]
|
50
|
+
end
|
51
|
+
|
52
|
+
define_method(name) do
|
53
|
+
# TODO raise if required
|
54
|
+
value = children[name.to_s].first
|
55
|
+
# TODO: figure out a sane way to distinguish subobject properties
|
56
|
+
value.content if value && value.is_a?(Object)
|
57
|
+
value || options[:default]
|
58
|
+
end
|
59
|
+
else
|
60
|
+
define_method(name) do
|
61
|
+
# TODO raise if required
|
62
|
+
properties[name.to_s] || options[:default]
|
63
|
+
end
|
64
|
+
|
65
|
+
define_method("#{name}=") do |value|
|
66
|
+
# TODO: figure out a sane way to distinguish subobject properties
|
67
|
+
value = processor.call(value, *args, options) unless value.is_a? Object
|
68
|
+
properties[name.to_s] = value
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
73
|
+
singleton_class.send(:alias_method, :define_type_with_args, :define_type)
|
74
|
+
|
75
|
+
# @overload namespace
|
76
|
+
# Get the namespace of this object.
|
77
|
+
#
|
78
|
+
# @return [String] A colon separated namespace, for example <tt>og:image</tt>.
|
79
|
+
# @overload namespace(*names)
|
80
|
+
# Set the namespace of this object.
|
81
|
+
#
|
82
|
+
# @param [Array<#to_s>] *names The individual parts of the namespace as list
|
83
|
+
# @example
|
84
|
+
# namespace :og, :image
|
85
|
+
def namespace *names
|
86
|
+
return @namespace if names.empty?
|
87
|
+
@namespace = names.join(':')
|
88
|
+
Registry.register @namespace, self
|
89
|
+
end
|
90
|
+
|
91
|
+
# Set the type for the content attribute
|
92
|
+
#
|
93
|
+
# @param [Symbol] type one of the registered types.
|
94
|
+
def content type
|
95
|
+
@content_processor = DSL.processors[type]
|
96
|
+
end
|
97
|
+
|
98
|
+
# The list of defined properties on this object.
|
99
|
+
#
|
100
|
+
# @return [Array<String>]
|
101
|
+
def available_properties
|
102
|
+
@available_properties ||= []
|
103
|
+
end
|
104
|
+
|
105
|
+
# A map from type names to processing blocks.
|
106
|
+
#
|
107
|
+
# @api private
|
108
|
+
# @return [{Symbol => Proc}]
|
109
|
+
def self.processors
|
110
|
+
@processors ||= {}
|
111
|
+
end
|
112
|
+
|
113
|
+
# The processor for the content attribute.
|
114
|
+
#
|
115
|
+
# @api private
|
116
|
+
# @return [Proc]
|
117
|
+
def content_processor
|
118
|
+
@content_processor || proc {|value| value }
|
119
|
+
end
|
120
|
+
|
121
|
+
# A map from vertical names to attributes that belong to them.
|
122
|
+
#
|
123
|
+
# @api private
|
124
|
+
# @return [{String => Array<Strin>}]
|
125
|
+
def verticals
|
126
|
+
@verticals ||= Hash.new {|h, k| h[k] = [] }
|
127
|
+
end
|
128
|
+
end
|
129
|
+
end
|
130
|
+
end
|
@@ -0,0 +1,71 @@
|
|
1
|
+
require 'date'
|
2
|
+
|
3
|
+
require 'open_graph_reader/object/dsl'
|
4
|
+
|
5
|
+
module OpenGraphReader
|
6
|
+
module Object
|
7
|
+
module DSL
|
8
|
+
# @see http://ogp.me/#string
|
9
|
+
define_type :string do |value|
|
10
|
+
value.to_s
|
11
|
+
end
|
12
|
+
|
13
|
+
# @see http://ogp.me/#url
|
14
|
+
define_type :url do |value|
|
15
|
+
value.to_s.tap {|value|
|
16
|
+
unless value.start_with?('http://') || value.start_with?('https://')
|
17
|
+
raise InvalidObjectError, "URL #{value.inspect} does not start with http:// or https://"
|
18
|
+
end
|
19
|
+
}
|
20
|
+
end
|
21
|
+
|
22
|
+
# @!method enum(name, allowed, options={})
|
23
|
+
# @param [Array<String>] allowed the list of allowed values
|
24
|
+
# @!macro define_type_description
|
25
|
+
# @see http://ogp.me/#enum
|
26
|
+
define_type_with_args :enum do |value, allowed|
|
27
|
+
unless allowed.include? value
|
28
|
+
raise InvalidObjectError, "Expected one of #{allowed.inspect} but was #{value.inspect}"
|
29
|
+
end
|
30
|
+
|
31
|
+
value.to_s
|
32
|
+
end
|
33
|
+
|
34
|
+
# @see http://ogp.me/#integer
|
35
|
+
define_type :integer do |value|
|
36
|
+
begin
|
37
|
+
Integer(value)
|
38
|
+
rescue ArgumentError => e
|
39
|
+
raise InvalidObjectError, "Integer expected, but was #{value.inspect}"
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
# @see http://ogp.me/#datetime
|
44
|
+
define_type :datetime do |value|
|
45
|
+
begin
|
46
|
+
DateTime.iso8601 value
|
47
|
+
rescue ArgumentError => e
|
48
|
+
raise InvalidObjectError, "ISO8601 datetime expected, but was #{value.inspect}"
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
# @see http://ogp.me/#bool
|
53
|
+
define_type :boolean do |value|
|
54
|
+
{'true' => true, 'false' => false, '1' => true, '0' => false}[value].tap {|bool|
|
55
|
+
if bool.nil?
|
56
|
+
raise InvalidObjectError, "Boolean expected, but was #{value.inspect}"
|
57
|
+
end
|
58
|
+
}
|
59
|
+
end
|
60
|
+
|
61
|
+
# @see http://ogp.me/#float
|
62
|
+
define_type :float do |value|
|
63
|
+
begin
|
64
|
+
Float(value)
|
65
|
+
rescue ArgumentError => e
|
66
|
+
raise InvalidObjectError, "Float expected, but was #{value.inspect}"
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
@@ -0,0 +1,54 @@
|
|
1
|
+
require 'singleton'
|
2
|
+
require 'forwardable'
|
3
|
+
|
4
|
+
module OpenGraphReader
|
5
|
+
module Object
|
6
|
+
# Global registry of namespaces and their representing classes.
|
7
|
+
#
|
8
|
+
# @api private
|
9
|
+
class Registry
|
10
|
+
extend Forwardable
|
11
|
+
include Singleton
|
12
|
+
|
13
|
+
class << self
|
14
|
+
extend Forwardable
|
15
|
+
# @!method register(namespace, klass)
|
16
|
+
# Register a new namespace in the registry.
|
17
|
+
#
|
18
|
+
# @param [String] namespace The namespace in colon separated form, for example <tt>og:image</tt>.
|
19
|
+
# @param [Class] klass The class to register. It should include {Object}.
|
20
|
+
# @api private
|
21
|
+
#
|
22
|
+
# @!method registered?(namespace)
|
23
|
+
# Check whether a namespace is registered.
|
24
|
+
#
|
25
|
+
# @param [String] namespace The namespace in colon separated form, for example <tt>og:image</tt>.
|
26
|
+
# @return [Bool]
|
27
|
+
# @api private
|
28
|
+
#
|
29
|
+
# @!method [](namespace)
|
30
|
+
# Fetch the class associated with the given namespace
|
31
|
+
#
|
32
|
+
# @param [String] namespace The namespace in colon separated form, for example <tt>og:image</tt>.
|
33
|
+
# @return [Class] The matching class.
|
34
|
+
# @raise [ArgumentError] If the given namespace wasn't registered.
|
35
|
+
# @api private
|
36
|
+
def_delegators :instance, :register, :registered?, :[]
|
37
|
+
end
|
38
|
+
|
39
|
+
def_delegators :@namespaces, :[]=, :has_key?
|
40
|
+
alias_method :register, :[]=
|
41
|
+
alias_method :registered?, :has_key?
|
42
|
+
|
43
|
+
def initialize
|
44
|
+
@namespaces = {}
|
45
|
+
end
|
46
|
+
|
47
|
+
# @see Registry.[]
|
48
|
+
def [] namespace
|
49
|
+
raise ArgumentError, "#{namespace} is not a registered namespace" unless registered? namespace
|
50
|
+
@namespaces[namespace]
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
@@ -0,0 +1,85 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
|
3
|
+
require 'open_graph_reader/parser/graph'
|
4
|
+
|
5
|
+
module OpenGraphReader
|
6
|
+
# Parse OpenGraph tags in a HTML document into a graph.
|
7
|
+
#
|
8
|
+
# @api private
|
9
|
+
class Parser
|
10
|
+
# Namespaces found in the passed documents head tag
|
11
|
+
#
|
12
|
+
# @return [Array<String>]
|
13
|
+
attr_reader :additional_namespaces
|
14
|
+
|
15
|
+
# Create a new parser.
|
16
|
+
#
|
17
|
+
# @param [#to_s, Nokogiri::XML::Node] html the document to parse.
|
18
|
+
# @param [String] origin The source the document was obtained from.
|
19
|
+
def initialize html, origin=nil
|
20
|
+
@doc = to_doc html
|
21
|
+
@origin = origin
|
22
|
+
@additional_namespaces = []
|
23
|
+
end
|
24
|
+
|
25
|
+
# Whether there are any OpenGraph tags at all.
|
26
|
+
#
|
27
|
+
# @return [Bool]
|
28
|
+
def has_tags?
|
29
|
+
!graph.empty?
|
30
|
+
end
|
31
|
+
|
32
|
+
# Build and return the {Graph}.
|
33
|
+
#
|
34
|
+
# @return [Graph]
|
35
|
+
def graph
|
36
|
+
@graph ||= build_graph
|
37
|
+
end
|
38
|
+
|
39
|
+
private
|
40
|
+
|
41
|
+
def build_graph
|
42
|
+
graph = Graph.new
|
43
|
+
head = @doc.xpath('/html/head').first
|
44
|
+
|
45
|
+
raise NoOpenGraphDataError, "There's no head tag in #{@doc}" unless head
|
46
|
+
|
47
|
+
condition = "starts-with(@property, 'og:')"
|
48
|
+
if head['prefix']
|
49
|
+
@additional_namespaces = head['prefix'].scan(/(\w+):\s*([^ ]+)/).map(&:first)
|
50
|
+
@additional_namespaces.each do |additional_namespace|
|
51
|
+
next if additional_namespace == 'og'
|
52
|
+
condition << " or starts-with(@property, '#{additional_namespace}')"
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
head.xpath("meta[#{condition}]").each do |tag|
|
57
|
+
*path, leaf = tag['property'].split(':')
|
58
|
+
node = path.inject(graph.root) {|node, name|
|
59
|
+
child = node.children.reverse.find {|child| child.name == name }
|
60
|
+
|
61
|
+
unless child
|
62
|
+
child = Graph::Node.new name
|
63
|
+
node << child
|
64
|
+
end
|
65
|
+
|
66
|
+
child
|
67
|
+
}
|
68
|
+
|
69
|
+
# TODO: make stripping configurable?
|
70
|
+
node << Graph::Node.new(leaf, tag['content'].strip)
|
71
|
+
end
|
72
|
+
|
73
|
+
graph
|
74
|
+
end
|
75
|
+
|
76
|
+
def to_doc html
|
77
|
+
case html
|
78
|
+
when Nokogiri::XML::Node
|
79
|
+
html
|
80
|
+
else
|
81
|
+
Nokogiri::HTML.parse(html.to_s)
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|