open_graph_reader 0.4.0 → 0.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/open_graph_reader.rb +10 -10
- data/lib/open_graph_reader/base.rb +9 -2
- data/lib/open_graph_reader/builder.rb +96 -44
- data/lib/open_graph_reader/configuration.rb +36 -10
- data/lib/open_graph_reader/definitions.rb +32 -33
- data/lib/open_graph_reader/fetcher.rb +13 -18
- data/lib/open_graph_reader/object.rb +7 -9
- data/lib/open_graph_reader/object/dsl.rb +58 -43
- data/lib/open_graph_reader/object/dsl/types.rb +51 -35
- data/lib/open_graph_reader/object/registry.rb +3 -3
- data/lib/open_graph_reader/parser.rb +30 -29
- data/lib/open_graph_reader/parser/graph.rb +18 -6
- data/lib/open_graph_reader/version.rb +1 -1
- data/spec/fixtures/real_world/invalid_article_author.html +299 -0
- data/spec/fixtures/real_world/invalid_datetime.html +301 -0
- data/spec/fixtures/real_world/url_path.html +1871 -0
- data/spec/integration/invalid_examples_spec.rb +21 -21
- data/spec/integration/real_world_spec.rb +335 -72
- data/spec/integration/valid_examples_spec.rb +7 -6
- data/spec/open_graph_reader_spec.rb +6 -6
- data/spec/spec_helper.rb +5 -8
- metadata +9 -3
@@ -1,14 +1,14 @@
|
|
1
|
-
require
|
1
|
+
require "faraday"
|
2
2
|
|
3
3
|
begin
|
4
|
-
require
|
4
|
+
require "faraday_middleware/response/follow_redirects"
|
5
5
|
rescue LoadError; end
|
6
6
|
|
7
7
|
begin
|
8
|
-
require
|
8
|
+
require "faraday/cookie_jar"
|
9
9
|
rescue LoadError; end
|
10
10
|
|
11
|
-
require
|
11
|
+
require "open_graph_reader/version"
|
12
12
|
|
13
13
|
module OpenGraphReader
|
14
14
|
# Fetch an URI to retrieve its HTML body, if available.
|
@@ -16,8 +16,8 @@ module OpenGraphReader
|
|
16
16
|
# @api private
|
17
17
|
class Fetcher
|
18
18
|
HEADERS = {
|
19
|
-
|
20
|
-
|
19
|
+
"Accept" => "text/html",
|
20
|
+
"User-Agent" => "OpenGraphReader/#{OpenGraphReader::VERSION} (+https://github.com/jhass/open_graph_reader)"
|
21
21
|
}.freeze
|
22
22
|
|
23
23
|
# Create a new fetcher.
|
@@ -29,13 +29,8 @@ module OpenGraphReader
|
|
29
29
|
@connection = Faraday.default_connection.dup
|
30
30
|
@connection.headers.replace(HEADERS)
|
31
31
|
|
32
|
-
if defined? Faraday::CookieJar
|
33
|
-
|
34
|
-
end
|
35
|
-
|
36
|
-
if defined? FaradayMiddleware
|
37
|
-
prepend_middleware FaradayMiddleware::FollowRedirects
|
38
|
-
end
|
32
|
+
prepend_middleware Faraday::CookieJar if defined? Faraday::CookieJar
|
33
|
+
prepend_middleware FaradayMiddleware::FollowRedirects if defined? FaradayMiddleware
|
39
34
|
end
|
40
35
|
|
41
36
|
# The URL to fetch
|
@@ -81,8 +76,8 @@ module OpenGraphReader
|
|
81
76
|
response = @get_response || @head_response
|
82
77
|
return false unless response
|
83
78
|
return false unless response.success?
|
84
|
-
return false unless response[
|
85
|
-
response[
|
79
|
+
return false unless response["content-type"]
|
80
|
+
response["content-type"].include? "text/html"
|
86
81
|
end
|
87
82
|
|
88
83
|
# Whether the target URI was fetched.
|
@@ -103,9 +98,9 @@ module OpenGraphReader
|
|
103
98
|
private
|
104
99
|
|
105
100
|
def prepend_middleware middleware
|
106
|
-
|
107
|
-
|
108
|
-
|
101
|
+
return if @connection.builder.handlers.include? middleware
|
102
|
+
|
103
|
+
@connection.builder.insert(0, middleware)
|
109
104
|
end
|
110
105
|
end
|
111
106
|
end
|
@@ -1,9 +1,8 @@
|
|
1
|
-
require
|
2
|
-
require
|
3
|
-
require
|
1
|
+
require "open_graph_reader/object/registry"
|
2
|
+
require "open_graph_reader/object/dsl"
|
3
|
+
require "open_graph_reader/object/dsl/types"
|
4
4
|
|
5
5
|
module OpenGraphReader
|
6
|
-
|
7
6
|
# This module provides the base functionality for all OpenGraph objects
|
8
7
|
# and makes the {DSL} methods for describing them available when included.
|
9
8
|
#
|
@@ -37,7 +36,6 @@ module OpenGraphReader
|
|
37
36
|
# @return [{String => Array<String, Object>}]
|
38
37
|
attr_reader :children
|
39
38
|
|
40
|
-
|
41
39
|
# Create a new object. If your class overrides this don't forget to call <tt>super</tt>.
|
42
40
|
def initialize
|
43
41
|
@properties = {}
|
@@ -48,7 +46,7 @@ module OpenGraphReader
|
|
48
46
|
#
|
49
47
|
# @param [#to_s] name
|
50
48
|
# @return [Bool]
|
51
|
-
def
|
49
|
+
def property? name
|
52
50
|
self.class.available_properties.include? name.to_s
|
53
51
|
end
|
54
52
|
|
@@ -70,8 +68,8 @@ module OpenGraphReader
|
|
70
68
|
# @raise [UndefinedPropertyError] If the requested property is undefined.
|
71
69
|
# @return [String, Object]
|
72
70
|
def [] name
|
73
|
-
raise UndefinedPropertyError, "Undefined property #{name} on #{inspect}" unless
|
74
|
-
public_send name.to_s
|
71
|
+
raise UndefinedPropertyError, "Undefined property #{name} on #{inspect}" unless property? name
|
72
|
+
public_send name.to_s
|
75
73
|
end
|
76
74
|
|
77
75
|
# Set the property to the given value.
|
@@ -81,7 +79,7 @@ module OpenGraphReader
|
|
81
79
|
# @param [String, Object] value
|
82
80
|
# @raise [UndefinedPropertyError] If the requested property is undefined.
|
83
81
|
def []= name, value
|
84
|
-
if
|
82
|
+
if property?(name)
|
85
83
|
public_send "#{name}=", value
|
86
84
|
elsif OpenGraphReader.config.strict
|
87
85
|
raise UndefinedPropertyError, "Undefined property #{name} on #{inspect}"
|
@@ -1,4 +1,4 @@
|
|
1
|
-
require
|
1
|
+
require "open_graph_reader/object/registry"
|
2
2
|
|
3
3
|
module OpenGraphReader
|
4
4
|
module Object
|
@@ -36,49 +36,72 @@ module OpenGraphReader
|
|
36
36
|
options = args.pop if args.last.is_a? Hash
|
37
37
|
options ||= {}
|
38
38
|
|
39
|
-
|
40
|
-
|
41
|
-
Registry.register [@namespace, name].join(':'), options[:to] if options[:to]
|
42
|
-
|
43
|
-
if options[:verticals]
|
44
|
-
options[:verticals].each do |vertical|
|
45
|
-
vertical = [@namespace, vertical].join('.')
|
46
|
-
verticals[vertical] << name.to_s
|
47
|
-
Registry.verticals << vertical
|
48
|
-
end
|
49
|
-
end
|
39
|
+
register_property name, options
|
40
|
+
register_verticals name, options[:verticals]
|
50
41
|
|
51
42
|
if options[:collection]
|
52
|
-
|
53
|
-
children[name.to_s]
|
54
|
-
end
|
55
|
-
|
56
|
-
define_method(name) do
|
57
|
-
value = children[name.to_s].first
|
58
|
-
# @todo figure out a sane way to distinguish subobject properties
|
59
|
-
value.content if value && value.is_a?(Object)
|
60
|
-
value || options[:default]
|
61
|
-
end
|
43
|
+
define_collection name, options
|
62
44
|
else
|
63
|
-
|
64
|
-
properties[name.to_s] || options[:default]
|
65
|
-
end
|
66
|
-
|
67
|
-
define_method("#{name}=") do |value|
|
68
|
-
# @todo figure out a sane way to distinguish subobject properties
|
69
|
-
unless value.is_a? Object
|
70
|
-
value.downcase! if options[:downcase]
|
71
|
-
value = processor.call(value, *args, options)
|
72
|
-
end
|
73
|
-
properties[name.to_s] = value
|
74
|
-
end
|
45
|
+
define_single name, options, args, processor
|
75
46
|
end
|
76
47
|
end
|
77
48
|
end
|
78
49
|
|
50
|
+
# @api private
|
51
|
+
def register_property name, options
|
52
|
+
available_properties << name.to_s
|
53
|
+
required_properties << name.to_s if options[:required]
|
54
|
+
Registry.register [namespace, name].join(":"), options[:to] if options[:to]
|
55
|
+
end
|
56
|
+
|
57
|
+
# @api private
|
58
|
+
def register_verticals name, assigned_verticals
|
59
|
+
[*assigned_verticals].each do |vertical|
|
60
|
+
vertical = [namespace, vertical].join(".")
|
61
|
+
verticals[vertical] << name.to_s
|
62
|
+
Registry.verticals << vertical
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
# @api private
|
67
|
+
def define_collection name, options
|
68
|
+
define_method("#{name}s") do
|
69
|
+
children[name.to_s]
|
70
|
+
end
|
71
|
+
|
72
|
+
define_method(name) do
|
73
|
+
value = children[name.to_s].first
|
74
|
+
# @todo figure out a sane way to distinguish subobject properties
|
75
|
+
value.content if value && value.is_a?(Object)
|
76
|
+
value || options[:default]
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
# @api private
|
81
|
+
def define_single name, options, args, processor
|
82
|
+
define_method(name) do
|
83
|
+
properties[name.to_s] || options[:default]
|
84
|
+
end
|
85
|
+
|
86
|
+
define_method("#{name}=") do |value|
|
87
|
+
# @todo figure out a sane way to distinguish subobject properties
|
88
|
+
unless value.is_a? Object
|
89
|
+
value.downcase! if options[:downcase]
|
90
|
+
value = processor.call(value, *args, options)
|
91
|
+
end
|
92
|
+
properties[name.to_s] = value
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
79
96
|
# Alias to trick YARD
|
80
97
|
singleton_class.send(:alias_method, :define_type_no_doc, :define_type)
|
81
98
|
|
99
|
+
# The processor for the content attribute.
|
100
|
+
#
|
101
|
+
# @api private
|
102
|
+
# @return [Proc]
|
103
|
+
attr_reader :content_processor
|
104
|
+
|
82
105
|
# @overload namespace
|
83
106
|
# Get the namespace of this object.
|
84
107
|
#
|
@@ -91,7 +114,7 @@ module OpenGraphReader
|
|
91
114
|
# namespace :og, :image
|
92
115
|
def namespace *names
|
93
116
|
return @namespace if names.empty?
|
94
|
-
@namespace = names.join(
|
117
|
+
@namespace = names.join(":")
|
95
118
|
Registry.register @namespace, self
|
96
119
|
end
|
97
120
|
|
@@ -136,14 +159,6 @@ module OpenGraphReader
|
|
136
159
|
@processors ||= {}
|
137
160
|
end
|
138
161
|
|
139
|
-
# The processor for the content attribute.
|
140
|
-
#
|
141
|
-
# @api private
|
142
|
-
# @return [Proc]
|
143
|
-
def content_processor
|
144
|
-
@content_processor
|
145
|
-
end
|
146
|
-
|
147
162
|
# A map from vertical names to attributes that belong to them.
|
148
163
|
#
|
149
164
|
# @api private
|
@@ -1,7 +1,7 @@
|
|
1
|
-
require
|
2
|
-
require
|
1
|
+
require "date"
|
2
|
+
require "uri"
|
3
3
|
|
4
|
-
require
|
4
|
+
require "open_graph_reader/object/dsl"
|
5
5
|
|
6
6
|
module OpenGraphReader
|
7
7
|
module Object
|
@@ -19,28 +19,33 @@ module OpenGraphReader
|
|
19
19
|
define_type_no_doc :url do |value, options|
|
20
20
|
value = value.to_s
|
21
21
|
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
raise InvalidObjectError,
|
22
|
+
next value if value.start_with?("http://") || value.start_with?("https://")
|
23
|
+
|
24
|
+
if options[:image] && OpenGraphReader.config.synthesize_image_url || OpenGraphReader.config.synthesize_url
|
25
|
+
unless OpenGraphReader.current_origin
|
26
|
+
next unless options[:required] || !OpenGraphReader.config.discard_invalid_optional_properties
|
27
|
+
|
28
|
+
raise ArgumentError, "Enabled image url synthesization but didn't pass an origin"
|
29
|
+
end
|
30
|
+
|
31
|
+
# Synthesize scheme hack to https (//example.org/foo/bar.png)
|
32
|
+
next "https:#{value}" if value.start_with?("//") && value.split("/", 4)[2] =~ URI::HOST
|
33
|
+
|
34
|
+
# Synthesize absolute path (/foo/bar.png)
|
35
|
+
begin
|
36
|
+
value = "/#{value}" unless value.start_with? "/" # Normalize to absolute path
|
37
|
+
uri = URI.parse(OpenGraphReader.current_origin)
|
38
|
+
uri.path = value
|
39
|
+
value = uri.to_s
|
40
|
+
rescue
|
41
|
+
next unless options[:required] || !OpenGraphReader.config.discard_invalid_optional_properties
|
42
|
+
raise InvalidObjectError,
|
43
|
+
"URL #{value.inspect} does not start with http:// or https:// and failed to "\
|
44
|
+
"synthesize a full URL"
|
43
45
|
end
|
46
|
+
elsif options.has_key?(:to) && OpenGraphReader.config.validate_references
|
47
|
+
next unless options[:required] || !OpenGraphReader.config.discard_invalid_optional_properties
|
48
|
+
raise InvalidObjectError, "URL #{value.inspect} does not start with http:// or https://"
|
44
49
|
end
|
45
50
|
|
46
51
|
value
|
@@ -50,46 +55,57 @@ module OpenGraphReader
|
|
50
55
|
# @param [Array<String>] allowed the list of allowed values
|
51
56
|
# @!macro define_type_description
|
52
57
|
# @see http://ogp.me/#enum
|
53
|
-
define_type_no_doc :enum do |value, allowed|
|
58
|
+
define_type_no_doc :enum do |value, allowed, options|
|
59
|
+
value = value.to_s
|
60
|
+
|
54
61
|
unless allowed.include? value
|
62
|
+
next unless options[:required] || !OpenGraphReader.config.discard_invalid_optional_properties
|
55
63
|
raise InvalidObjectError, "Expected one of #{allowed.inspect} but was #{value.inspect}"
|
56
64
|
end
|
57
65
|
|
58
|
-
value
|
66
|
+
value
|
59
67
|
end
|
60
68
|
|
61
69
|
# @see http://ogp.me/#integer
|
62
|
-
define_type :integer do |value|
|
70
|
+
define_type :integer do |value, options|
|
63
71
|
begin
|
64
72
|
Integer(value)
|
65
|
-
rescue
|
73
|
+
rescue ArgumentError
|
74
|
+
next unless options[:required] || !OpenGraphReader.config.discard_invalid_optional_properties
|
66
75
|
raise InvalidObjectError, "Integer expected, but was #{value.inspect}"
|
67
76
|
end
|
68
77
|
end
|
69
78
|
|
70
79
|
# @see http://ogp.me/#datetime
|
71
|
-
define_type :datetime do |value|
|
80
|
+
define_type :datetime do |value, options|
|
72
81
|
begin
|
73
|
-
|
74
|
-
|
82
|
+
if OpenGraphReader.config.guess_datetime_format
|
83
|
+
DateTime.parse value
|
84
|
+
else
|
85
|
+
DateTime.iso8601 value
|
86
|
+
end
|
87
|
+
rescue ArgumentError
|
88
|
+
next unless options[:required] || !OpenGraphReader.config.discard_invalid_optional_properties
|
75
89
|
raise InvalidObjectError, "ISO8601 datetime expected, but was #{value.inspect}"
|
76
90
|
end
|
77
91
|
end
|
78
92
|
|
79
93
|
# @see http://ogp.me/#bool
|
80
|
-
define_type :boolean do |value|
|
81
|
-
{
|
94
|
+
define_type :boolean do |value, options|
|
95
|
+
{"true" => true, "false" => false, "1" => true, "0" => false}[value].tap {|bool|
|
82
96
|
if bool.nil?
|
97
|
+
next unless options[:required] || !OpenGraphReader.config.discard_invalid_optional_properties
|
83
98
|
raise InvalidObjectError, "Boolean expected, but was #{value.inspect}"
|
84
99
|
end
|
85
100
|
}
|
86
101
|
end
|
87
102
|
|
88
103
|
# @see http://ogp.me/#float
|
89
|
-
define_type :float do |value|
|
104
|
+
define_type :float do |value, options|
|
90
105
|
begin
|
91
106
|
Float(value)
|
92
|
-
rescue ArgumentError
|
107
|
+
rescue ArgumentError
|
108
|
+
next unless options[:required] || !OpenGraphReader.config.discard_invalid_optional_properties
|
93
109
|
raise InvalidObjectError, "Float expected, but was #{value.inspect}"
|
94
110
|
end
|
95
111
|
end
|
@@ -1,6 +1,6 @@
|
|
1
|
-
require
|
1
|
+
require "nokogiri"
|
2
2
|
|
3
|
-
require
|
3
|
+
require "open_graph_reader/parser/graph"
|
4
4
|
|
5
5
|
module OpenGraphReader
|
6
6
|
# Parse OpenGraph tags in a HTML document into a graph.
|
@@ -8,14 +8,14 @@ module OpenGraphReader
|
|
8
8
|
# @api private
|
9
9
|
class Parser
|
10
10
|
# Some helper methods for Nokogiri
|
11
|
-
XPathHelpers
|
11
|
+
module XPathHelpers
|
12
12
|
# Helper to lowercase all given properties
|
13
|
-
def ci_starts_with node_set, string
|
13
|
+
def self.ci_starts_with node_set, string
|
14
14
|
node_set.select {|node|
|
15
15
|
node.to_s.downcase.start_with? string.downcase
|
16
16
|
}
|
17
17
|
end
|
18
|
-
end
|
18
|
+
end
|
19
19
|
|
20
20
|
# Namespaces found in the passed documents head tag
|
21
21
|
#
|
@@ -35,7 +35,7 @@ module OpenGraphReader
|
|
35
35
|
# Whether there are any OpenGraph tags at all.
|
36
36
|
#
|
37
37
|
# @return [Bool]
|
38
|
-
def
|
38
|
+
def any_tags?
|
39
39
|
!graph.empty?
|
40
40
|
end
|
41
41
|
|
@@ -50,45 +50,46 @@ module OpenGraphReader
|
|
50
50
|
#
|
51
51
|
# @return [String]
|
52
52
|
def title
|
53
|
-
@doc.xpath(
|
53
|
+
@doc.xpath("/html/head/title").first.text
|
54
54
|
end
|
55
55
|
|
56
56
|
private
|
57
57
|
|
58
58
|
def build_graph
|
59
59
|
graph = Graph.new
|
60
|
-
|
60
|
+
|
61
|
+
meta_tags.each do |tag|
|
62
|
+
*path, leaf = tag["property"].downcase.split(":")
|
63
|
+
node = graph.find_or_create_path path
|
64
|
+
|
65
|
+
# @todo make stripping configurable?
|
66
|
+
node << Graph::Node.new(leaf, tag["content"].strip)
|
67
|
+
end
|
68
|
+
|
69
|
+
graph
|
70
|
+
end
|
71
|
+
|
72
|
+
def meta_tags
|
73
|
+
head = @doc.xpath("/html/head").first
|
61
74
|
|
62
75
|
raise NoOpenGraphDataError, "There's no head tag in #{@doc}" unless head
|
63
76
|
|
77
|
+
head.xpath("meta[#{xpath_condition(head)}]", XPathHelpers)
|
78
|
+
end
|
79
|
+
|
80
|
+
def xpath_condition head
|
64
81
|
condition = "ci_starts_with(@property, 'og:')"
|
65
|
-
|
66
|
-
|
82
|
+
|
83
|
+
if head["prefix"]
|
84
|
+
@additional_namespaces = head["prefix"].scan(/(\w+):\s*([^ ]+)/)
|
67
85
|
@additional_namespaces.map! {|prefix, _| prefix.downcase }
|
68
86
|
@additional_namespaces.each do |additional_namespace|
|
69
|
-
next if additional_namespace ==
|
87
|
+
next if additional_namespace == "og"
|
70
88
|
condition << " or ci_starts_with(@property, '#{additional_namespace}')"
|
71
89
|
end
|
72
90
|
end
|
73
91
|
|
74
|
-
|
75
|
-
*path, leaf = tag['property'].downcase.split(':')
|
76
|
-
node = path.inject(graph.root) {|node, name|
|
77
|
-
child = node.children.reverse.find {|child| child.name == name }
|
78
|
-
|
79
|
-
unless child
|
80
|
-
child = Graph::Node.new name
|
81
|
-
node << child
|
82
|
-
end
|
83
|
-
|
84
|
-
child
|
85
|
-
}
|
86
|
-
|
87
|
-
# @todo make stripping configurable?
|
88
|
-
node << Graph::Node.new(leaf, tag['content'].strip)
|
89
|
-
end
|
90
|
-
|
91
|
-
graph
|
92
|
+
condition
|
92
93
|
end
|
93
94
|
|
94
95
|
def to_doc html
|