open_graph_reader 0.4.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/open_graph_reader.rb +10 -10
- data/lib/open_graph_reader/base.rb +9 -2
- data/lib/open_graph_reader/builder.rb +96 -44
- data/lib/open_graph_reader/configuration.rb +36 -10
- data/lib/open_graph_reader/definitions.rb +32 -33
- data/lib/open_graph_reader/fetcher.rb +13 -18
- data/lib/open_graph_reader/object.rb +7 -9
- data/lib/open_graph_reader/object/dsl.rb +58 -43
- data/lib/open_graph_reader/object/dsl/types.rb +51 -35
- data/lib/open_graph_reader/object/registry.rb +3 -3
- data/lib/open_graph_reader/parser.rb +30 -29
- data/lib/open_graph_reader/parser/graph.rb +18 -6
- data/lib/open_graph_reader/version.rb +1 -1
- data/spec/fixtures/real_world/invalid_article_author.html +299 -0
- data/spec/fixtures/real_world/invalid_datetime.html +301 -0
- data/spec/fixtures/real_world/url_path.html +1871 -0
- data/spec/integration/invalid_examples_spec.rb +21 -21
- data/spec/integration/real_world_spec.rb +335 -72
- data/spec/integration/valid_examples_spec.rb +7 -6
- data/spec/open_graph_reader_spec.rb +6 -6
- data/spec/spec_helper.rb +5 -8
- metadata +9 -3
@@ -1,14 +1,14 @@
|
|
1
|
-
require
|
1
|
+
require "faraday"
|
2
2
|
|
3
3
|
begin
|
4
|
-
require
|
4
|
+
require "faraday_middleware/response/follow_redirects"
|
5
5
|
rescue LoadError; end
|
6
6
|
|
7
7
|
begin
|
8
|
-
require
|
8
|
+
require "faraday/cookie_jar"
|
9
9
|
rescue LoadError; end
|
10
10
|
|
11
|
-
require
|
11
|
+
require "open_graph_reader/version"
|
12
12
|
|
13
13
|
module OpenGraphReader
|
14
14
|
# Fetch an URI to retrieve its HTML body, if available.
|
@@ -16,8 +16,8 @@ module OpenGraphReader
|
|
16
16
|
# @api private
|
17
17
|
class Fetcher
|
18
18
|
HEADERS = {
|
19
|
-
|
20
|
-
|
19
|
+
"Accept" => "text/html",
|
20
|
+
"User-Agent" => "OpenGraphReader/#{OpenGraphReader::VERSION} (+https://github.com/jhass/open_graph_reader)"
|
21
21
|
}.freeze
|
22
22
|
|
23
23
|
# Create a new fetcher.
|
@@ -29,13 +29,8 @@ module OpenGraphReader
|
|
29
29
|
@connection = Faraday.default_connection.dup
|
30
30
|
@connection.headers.replace(HEADERS)
|
31
31
|
|
32
|
-
if defined? Faraday::CookieJar
|
33
|
-
|
34
|
-
end
|
35
|
-
|
36
|
-
if defined? FaradayMiddleware
|
37
|
-
prepend_middleware FaradayMiddleware::FollowRedirects
|
38
|
-
end
|
32
|
+
prepend_middleware Faraday::CookieJar if defined? Faraday::CookieJar
|
33
|
+
prepend_middleware FaradayMiddleware::FollowRedirects if defined? FaradayMiddleware
|
39
34
|
end
|
40
35
|
|
41
36
|
# The URL to fetch
|
@@ -81,8 +76,8 @@ module OpenGraphReader
|
|
81
76
|
response = @get_response || @head_response
|
82
77
|
return false unless response
|
83
78
|
return false unless response.success?
|
84
|
-
return false unless response[
|
85
|
-
response[
|
79
|
+
return false unless response["content-type"]
|
80
|
+
response["content-type"].include? "text/html"
|
86
81
|
end
|
87
82
|
|
88
83
|
# Whether the target URI was fetched.
|
@@ -103,9 +98,9 @@ module OpenGraphReader
|
|
103
98
|
private
|
104
99
|
|
105
100
|
def prepend_middleware middleware
|
106
|
-
|
107
|
-
|
108
|
-
|
101
|
+
return if @connection.builder.handlers.include? middleware
|
102
|
+
|
103
|
+
@connection.builder.insert(0, middleware)
|
109
104
|
end
|
110
105
|
end
|
111
106
|
end
|
@@ -1,9 +1,8 @@
|
|
1
|
-
require
|
2
|
-
require
|
3
|
-
require
|
1
|
+
require "open_graph_reader/object/registry"
|
2
|
+
require "open_graph_reader/object/dsl"
|
3
|
+
require "open_graph_reader/object/dsl/types"
|
4
4
|
|
5
5
|
module OpenGraphReader
|
6
|
-
|
7
6
|
# This module provides the base functionality for all OpenGraph objects
|
8
7
|
# and makes the {DSL} methods for describing them available when included.
|
9
8
|
#
|
@@ -37,7 +36,6 @@ module OpenGraphReader
|
|
37
36
|
# @return [{String => Array<String, Object>}]
|
38
37
|
attr_reader :children
|
39
38
|
|
40
|
-
|
41
39
|
# Create a new object. If your class overrides this don't forget to call <tt>super</tt>.
|
42
40
|
def initialize
|
43
41
|
@properties = {}
|
@@ -48,7 +46,7 @@ module OpenGraphReader
|
|
48
46
|
#
|
49
47
|
# @param [#to_s] name
|
50
48
|
# @return [Bool]
|
51
|
-
def
|
49
|
+
def property? name
|
52
50
|
self.class.available_properties.include? name.to_s
|
53
51
|
end
|
54
52
|
|
@@ -70,8 +68,8 @@ module OpenGraphReader
|
|
70
68
|
# @raise [UndefinedPropertyError] If the requested property is undefined.
|
71
69
|
# @return [String, Object]
|
72
70
|
def [] name
|
73
|
-
raise UndefinedPropertyError, "Undefined property #{name} on #{inspect}" unless
|
74
|
-
public_send name.to_s
|
71
|
+
raise UndefinedPropertyError, "Undefined property #{name} on #{inspect}" unless property? name
|
72
|
+
public_send name.to_s
|
75
73
|
end
|
76
74
|
|
77
75
|
# Set the property to the given value.
|
@@ -81,7 +79,7 @@ module OpenGraphReader
|
|
81
79
|
# @param [String, Object] value
|
82
80
|
# @raise [UndefinedPropertyError] If the requested property is undefined.
|
83
81
|
def []= name, value
|
84
|
-
if
|
82
|
+
if property?(name)
|
85
83
|
public_send "#{name}=", value
|
86
84
|
elsif OpenGraphReader.config.strict
|
87
85
|
raise UndefinedPropertyError, "Undefined property #{name} on #{inspect}"
|
@@ -1,4 +1,4 @@
|
|
1
|
-
require
|
1
|
+
require "open_graph_reader/object/registry"
|
2
2
|
|
3
3
|
module OpenGraphReader
|
4
4
|
module Object
|
@@ -36,49 +36,72 @@ module OpenGraphReader
|
|
36
36
|
options = args.pop if args.last.is_a? Hash
|
37
37
|
options ||= {}
|
38
38
|
|
39
|
-
|
40
|
-
|
41
|
-
Registry.register [@namespace, name].join(':'), options[:to] if options[:to]
|
42
|
-
|
43
|
-
if options[:verticals]
|
44
|
-
options[:verticals].each do |vertical|
|
45
|
-
vertical = [@namespace, vertical].join('.')
|
46
|
-
verticals[vertical] << name.to_s
|
47
|
-
Registry.verticals << vertical
|
48
|
-
end
|
49
|
-
end
|
39
|
+
register_property name, options
|
40
|
+
register_verticals name, options[:verticals]
|
50
41
|
|
51
42
|
if options[:collection]
|
52
|
-
|
53
|
-
children[name.to_s]
|
54
|
-
end
|
55
|
-
|
56
|
-
define_method(name) do
|
57
|
-
value = children[name.to_s].first
|
58
|
-
# @todo figure out a sane way to distinguish subobject properties
|
59
|
-
value.content if value && value.is_a?(Object)
|
60
|
-
value || options[:default]
|
61
|
-
end
|
43
|
+
define_collection name, options
|
62
44
|
else
|
63
|
-
|
64
|
-
properties[name.to_s] || options[:default]
|
65
|
-
end
|
66
|
-
|
67
|
-
define_method("#{name}=") do |value|
|
68
|
-
# @todo figure out a sane way to distinguish subobject properties
|
69
|
-
unless value.is_a? Object
|
70
|
-
value.downcase! if options[:downcase]
|
71
|
-
value = processor.call(value, *args, options)
|
72
|
-
end
|
73
|
-
properties[name.to_s] = value
|
74
|
-
end
|
45
|
+
define_single name, options, args, processor
|
75
46
|
end
|
76
47
|
end
|
77
48
|
end
|
78
49
|
|
50
|
+
# @api private
|
51
|
+
def register_property name, options
|
52
|
+
available_properties << name.to_s
|
53
|
+
required_properties << name.to_s if options[:required]
|
54
|
+
Registry.register [namespace, name].join(":"), options[:to] if options[:to]
|
55
|
+
end
|
56
|
+
|
57
|
+
# @api private
|
58
|
+
def register_verticals name, assigned_verticals
|
59
|
+
[*assigned_verticals].each do |vertical|
|
60
|
+
vertical = [namespace, vertical].join(".")
|
61
|
+
verticals[vertical] << name.to_s
|
62
|
+
Registry.verticals << vertical
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
# @api private
|
67
|
+
def define_collection name, options
|
68
|
+
define_method("#{name}s") do
|
69
|
+
children[name.to_s]
|
70
|
+
end
|
71
|
+
|
72
|
+
define_method(name) do
|
73
|
+
value = children[name.to_s].first
|
74
|
+
# @todo figure out a sane way to distinguish subobject properties
|
75
|
+
value.content if value && value.is_a?(Object)
|
76
|
+
value || options[:default]
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
# @api private
|
81
|
+
def define_single name, options, args, processor
|
82
|
+
define_method(name) do
|
83
|
+
properties[name.to_s] || options[:default]
|
84
|
+
end
|
85
|
+
|
86
|
+
define_method("#{name}=") do |value|
|
87
|
+
# @todo figure out a sane way to distinguish subobject properties
|
88
|
+
unless value.is_a? Object
|
89
|
+
value.downcase! if options[:downcase]
|
90
|
+
value = processor.call(value, *args, options)
|
91
|
+
end
|
92
|
+
properties[name.to_s] = value
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
79
96
|
# Alias to trick YARD
|
80
97
|
singleton_class.send(:alias_method, :define_type_no_doc, :define_type)
|
81
98
|
|
99
|
+
# The processor for the content attribute.
|
100
|
+
#
|
101
|
+
# @api private
|
102
|
+
# @return [Proc]
|
103
|
+
attr_reader :content_processor
|
104
|
+
|
82
105
|
# @overload namespace
|
83
106
|
# Get the namespace of this object.
|
84
107
|
#
|
@@ -91,7 +114,7 @@ module OpenGraphReader
|
|
91
114
|
# namespace :og, :image
|
92
115
|
def namespace *names
|
93
116
|
return @namespace if names.empty?
|
94
|
-
@namespace = names.join(
|
117
|
+
@namespace = names.join(":")
|
95
118
|
Registry.register @namespace, self
|
96
119
|
end
|
97
120
|
|
@@ -136,14 +159,6 @@ module OpenGraphReader
|
|
136
159
|
@processors ||= {}
|
137
160
|
end
|
138
161
|
|
139
|
-
# The processor for the content attribute.
|
140
|
-
#
|
141
|
-
# @api private
|
142
|
-
# @return [Proc]
|
143
|
-
def content_processor
|
144
|
-
@content_processor
|
145
|
-
end
|
146
|
-
|
147
162
|
# A map from vertical names to attributes that belong to them.
|
148
163
|
#
|
149
164
|
# @api private
|
@@ -1,7 +1,7 @@
|
|
1
|
-
require
|
2
|
-
require
|
1
|
+
require "date"
|
2
|
+
require "uri"
|
3
3
|
|
4
|
-
require
|
4
|
+
require "open_graph_reader/object/dsl"
|
5
5
|
|
6
6
|
module OpenGraphReader
|
7
7
|
module Object
|
@@ -19,28 +19,33 @@ module OpenGraphReader
|
|
19
19
|
define_type_no_doc :url do |value, options|
|
20
20
|
value = value.to_s
|
21
21
|
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
raise InvalidObjectError,
|
22
|
+
next value if value.start_with?("http://") || value.start_with?("https://")
|
23
|
+
|
24
|
+
if options[:image] && OpenGraphReader.config.synthesize_image_url || OpenGraphReader.config.synthesize_url
|
25
|
+
unless OpenGraphReader.current_origin
|
26
|
+
next unless options[:required] || !OpenGraphReader.config.discard_invalid_optional_properties
|
27
|
+
|
28
|
+
raise ArgumentError, "Enabled image url synthesization but didn't pass an origin"
|
29
|
+
end
|
30
|
+
|
31
|
+
# Synthesize scheme hack to https (//example.org/foo/bar.png)
|
32
|
+
next "https:#{value}" if value.start_with?("//") && value.split("/", 4)[2] =~ URI::HOST
|
33
|
+
|
34
|
+
# Synthesize absolute path (/foo/bar.png)
|
35
|
+
begin
|
36
|
+
value = "/#{value}" unless value.start_with? "/" # Normalize to absolute path
|
37
|
+
uri = URI.parse(OpenGraphReader.current_origin)
|
38
|
+
uri.path = value
|
39
|
+
value = uri.to_s
|
40
|
+
rescue
|
41
|
+
next unless options[:required] || !OpenGraphReader.config.discard_invalid_optional_properties
|
42
|
+
raise InvalidObjectError,
|
43
|
+
"URL #{value.inspect} does not start with http:// or https:// and failed to "\
|
44
|
+
"synthesize a full URL"
|
43
45
|
end
|
46
|
+
elsif options.has_key?(:to) && OpenGraphReader.config.validate_references
|
47
|
+
next unless options[:required] || !OpenGraphReader.config.discard_invalid_optional_properties
|
48
|
+
raise InvalidObjectError, "URL #{value.inspect} does not start with http:// or https://"
|
44
49
|
end
|
45
50
|
|
46
51
|
value
|
@@ -50,46 +55,57 @@ module OpenGraphReader
|
|
50
55
|
# @param [Array<String>] allowed the list of allowed values
|
51
56
|
# @!macro define_type_description
|
52
57
|
# @see http://ogp.me/#enum
|
53
|
-
define_type_no_doc :enum do |value, allowed|
|
58
|
+
define_type_no_doc :enum do |value, allowed, options|
|
59
|
+
value = value.to_s
|
60
|
+
|
54
61
|
unless allowed.include? value
|
62
|
+
next unless options[:required] || !OpenGraphReader.config.discard_invalid_optional_properties
|
55
63
|
raise InvalidObjectError, "Expected one of #{allowed.inspect} but was #{value.inspect}"
|
56
64
|
end
|
57
65
|
|
58
|
-
value
|
66
|
+
value
|
59
67
|
end
|
60
68
|
|
61
69
|
# @see http://ogp.me/#integer
|
62
|
-
define_type :integer do |value|
|
70
|
+
define_type :integer do |value, options|
|
63
71
|
begin
|
64
72
|
Integer(value)
|
65
|
-
rescue
|
73
|
+
rescue ArgumentError
|
74
|
+
next unless options[:required] || !OpenGraphReader.config.discard_invalid_optional_properties
|
66
75
|
raise InvalidObjectError, "Integer expected, but was #{value.inspect}"
|
67
76
|
end
|
68
77
|
end
|
69
78
|
|
70
79
|
# @see http://ogp.me/#datetime
|
71
|
-
define_type :datetime do |value|
|
80
|
+
define_type :datetime do |value, options|
|
72
81
|
begin
|
73
|
-
|
74
|
-
|
82
|
+
if OpenGraphReader.config.guess_datetime_format
|
83
|
+
DateTime.parse value
|
84
|
+
else
|
85
|
+
DateTime.iso8601 value
|
86
|
+
end
|
87
|
+
rescue ArgumentError
|
88
|
+
next unless options[:required] || !OpenGraphReader.config.discard_invalid_optional_properties
|
75
89
|
raise InvalidObjectError, "ISO8601 datetime expected, but was #{value.inspect}"
|
76
90
|
end
|
77
91
|
end
|
78
92
|
|
79
93
|
# @see http://ogp.me/#bool
|
80
|
-
define_type :boolean do |value|
|
81
|
-
{
|
94
|
+
define_type :boolean do |value, options|
|
95
|
+
{"true" => true, "false" => false, "1" => true, "0" => false}[value].tap {|bool|
|
82
96
|
if bool.nil?
|
97
|
+
next unless options[:required] || !OpenGraphReader.config.discard_invalid_optional_properties
|
83
98
|
raise InvalidObjectError, "Boolean expected, but was #{value.inspect}"
|
84
99
|
end
|
85
100
|
}
|
86
101
|
end
|
87
102
|
|
88
103
|
# @see http://ogp.me/#float
|
89
|
-
define_type :float do |value|
|
104
|
+
define_type :float do |value, options|
|
90
105
|
begin
|
91
106
|
Float(value)
|
92
|
-
rescue ArgumentError
|
107
|
+
rescue ArgumentError
|
108
|
+
next unless options[:required] || !OpenGraphReader.config.discard_invalid_optional_properties
|
93
109
|
raise InvalidObjectError, "Float expected, but was #{value.inspect}"
|
94
110
|
end
|
95
111
|
end
|
@@ -1,6 +1,6 @@
|
|
1
|
-
require
|
1
|
+
require "nokogiri"
|
2
2
|
|
3
|
-
require
|
3
|
+
require "open_graph_reader/parser/graph"
|
4
4
|
|
5
5
|
module OpenGraphReader
|
6
6
|
# Parse OpenGraph tags in a HTML document into a graph.
|
@@ -8,14 +8,14 @@ module OpenGraphReader
|
|
8
8
|
# @api private
|
9
9
|
class Parser
|
10
10
|
# Some helper methods for Nokogiri
|
11
|
-
XPathHelpers
|
11
|
+
module XPathHelpers
|
12
12
|
# Helper to lowercase all given properties
|
13
|
-
def ci_starts_with node_set, string
|
13
|
+
def self.ci_starts_with node_set, string
|
14
14
|
node_set.select {|node|
|
15
15
|
node.to_s.downcase.start_with? string.downcase
|
16
16
|
}
|
17
17
|
end
|
18
|
-
end
|
18
|
+
end
|
19
19
|
|
20
20
|
# Namespaces found in the passed documents head tag
|
21
21
|
#
|
@@ -35,7 +35,7 @@ module OpenGraphReader
|
|
35
35
|
# Whether there are any OpenGraph tags at all.
|
36
36
|
#
|
37
37
|
# @return [Bool]
|
38
|
-
def
|
38
|
+
def any_tags?
|
39
39
|
!graph.empty?
|
40
40
|
end
|
41
41
|
|
@@ -50,45 +50,46 @@ module OpenGraphReader
|
|
50
50
|
#
|
51
51
|
# @return [String]
|
52
52
|
def title
|
53
|
-
@doc.xpath(
|
53
|
+
@doc.xpath("/html/head/title").first.text
|
54
54
|
end
|
55
55
|
|
56
56
|
private
|
57
57
|
|
58
58
|
def build_graph
|
59
59
|
graph = Graph.new
|
60
|
-
|
60
|
+
|
61
|
+
meta_tags.each do |tag|
|
62
|
+
*path, leaf = tag["property"].downcase.split(":")
|
63
|
+
node = graph.find_or_create_path path
|
64
|
+
|
65
|
+
# @todo make stripping configurable?
|
66
|
+
node << Graph::Node.new(leaf, tag["content"].strip)
|
67
|
+
end
|
68
|
+
|
69
|
+
graph
|
70
|
+
end
|
71
|
+
|
72
|
+
def meta_tags
|
73
|
+
head = @doc.xpath("/html/head").first
|
61
74
|
|
62
75
|
raise NoOpenGraphDataError, "There's no head tag in #{@doc}" unless head
|
63
76
|
|
77
|
+
head.xpath("meta[#{xpath_condition(head)}]", XPathHelpers)
|
78
|
+
end
|
79
|
+
|
80
|
+
def xpath_condition head
|
64
81
|
condition = "ci_starts_with(@property, 'og:')"
|
65
|
-
|
66
|
-
|
82
|
+
|
83
|
+
if head["prefix"]
|
84
|
+
@additional_namespaces = head["prefix"].scan(/(\w+):\s*([^ ]+)/)
|
67
85
|
@additional_namespaces.map! {|prefix, _| prefix.downcase }
|
68
86
|
@additional_namespaces.each do |additional_namespace|
|
69
|
-
next if additional_namespace ==
|
87
|
+
next if additional_namespace == "og"
|
70
88
|
condition << " or ci_starts_with(@property, '#{additional_namespace}')"
|
71
89
|
end
|
72
90
|
end
|
73
91
|
|
74
|
-
|
75
|
-
*path, leaf = tag['property'].downcase.split(':')
|
76
|
-
node = path.inject(graph.root) {|node, name|
|
77
|
-
child = node.children.reverse.find {|child| child.name == name }
|
78
|
-
|
79
|
-
unless child
|
80
|
-
child = Graph::Node.new name
|
81
|
-
node << child
|
82
|
-
end
|
83
|
-
|
84
|
-
child
|
85
|
-
}
|
86
|
-
|
87
|
-
# @todo make stripping configurable?
|
88
|
-
node << Graph::Node.new(leaf, tag['content'].strip)
|
89
|
-
end
|
90
|
-
|
91
|
-
graph
|
92
|
+
condition
|
92
93
|
end
|
93
94
|
|
94
95
|
def to_doc html
|