open_graph_reader 0.1.0 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/open_graph_reader.rb +25 -5
- data/lib/open_graph_reader/base.rb +7 -1
- data/lib/open_graph_reader/builder.rb +68 -40
- data/lib/open_graph_reader/configuration.rb +51 -0
- data/lib/open_graph_reader/definitions.rb +1 -1
- data/lib/open_graph_reader/fetcher.rb +21 -3
- data/lib/open_graph_reader/object.rb +6 -3
- data/lib/open_graph_reader/object/dsl.rb +36 -12
- data/lib/open_graph_reader/object/dsl/types.rb +4 -2
- data/lib/open_graph_reader/object/registry.rb +12 -2
- data/lib/open_graph_reader/parser.rb +17 -6
- data/lib/open_graph_reader/version.rb +1 -1
- data/spec/fixtures/real_world/missing_image.html +985 -0
- data/spec/fixtures/real_world/mixed_case_properties.html +1139 -0
- data/spec/fixtures/real_world/mixed_case_type.html +1008 -0
- data/spec/fixtures/real_world/not_a_reference.html +814 -0
- data/spec/fixtures/real_world/undefined_property.html +491 -0
- data/spec/fixtures/real_world/unknown_namespace.html +2033 -0
- data/spec/fixtures/real_world/unknown_type.html +2032 -0
- data/spec/integration/invalid_examples_spec.rb +42 -3
- data/spec/integration/real_world_spec.rb +121 -0
- data/spec/integration/valid_examples_spec.rb +0 -1
- data/spec/spec_helper.rb +9 -1
- metadata +20 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: bea1c38c6567749ce9bd8b369867d5e7c0b7a2b3
|
4
|
+
data.tar.gz: b0805bdd3e5e1efddc7f73c59dd40ad7315b99c3
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6d3d1edbaa5b0ac800300c79631d5494e6eebf7cc362b618621ca6006530d303c7de679fa761ae5d6edd3b826fa069f1dde3ff6f76dbd295b3781e68fa4eb29e
|
7
|
+
data.tar.gz: f9ba7d6080e164c75491abbd297994627f2812e907c230c5edb5c4f42681657da27c7e730b83b42df19fc96ddcb9fd9a3cd44b3121f933ac45d8db1635b040c6
|
data/lib/open_graph_reader.rb
CHANGED
@@ -1,18 +1,14 @@
|
|
1
1
|
require 'uri'
|
2
2
|
|
3
|
-
begin
|
4
|
-
require 'faraday_middleware/response/follow_redirects'
|
5
|
-
rescue LoadError; end
|
6
|
-
|
7
3
|
require 'open_graph_reader/base'
|
8
4
|
require 'open_graph_reader/builder'
|
5
|
+
require 'open_graph_reader/configuration'
|
9
6
|
require 'open_graph_reader/definitions'
|
10
7
|
require 'open_graph_reader/fetcher'
|
11
8
|
require 'open_graph_reader/object'
|
12
9
|
require 'open_graph_reader/parser'
|
13
10
|
require 'open_graph_reader/version'
|
14
11
|
|
15
|
-
# @todo quirks mode where invalid attributes don't raise?
|
16
12
|
# @todo 1.1 compatibility mode?
|
17
13
|
# This module provides the main entry to the library. Please see the
|
18
14
|
# {file:README.md} for usage examples.
|
@@ -72,6 +68,24 @@ module OpenGraphReader
|
|
72
68
|
rescue NoOpenGraphDataError, InvalidObjectError
|
73
69
|
end
|
74
70
|
|
71
|
+
# Configure the library, see {Configuration} for the list of available
|
72
|
+
# options and their defaults. Changing configuration at runtime is not
|
73
|
+
# thread safe.
|
74
|
+
#
|
75
|
+
# @yieldparam [Configuration] the configuration object
|
76
|
+
# @see Configuration
|
77
|
+
def self.configure
|
78
|
+
yield config
|
79
|
+
end
|
80
|
+
|
81
|
+
# Get the current {Configuration} instance
|
82
|
+
#
|
83
|
+
# @api private
|
84
|
+
# @return [Configuration]
|
85
|
+
def self.config
|
86
|
+
Configuration.instance
|
87
|
+
end
|
88
|
+
|
75
89
|
# The target couldn't be fetched, didn't contain any HTML or
|
76
90
|
# any OpenGraph tags.
|
77
91
|
class NoOpenGraphDataError < StandardError
|
@@ -80,4 +94,10 @@ module OpenGraphReader
|
|
80
94
|
# The target did contain OpenGraph tags, but they're not valid.
|
81
95
|
class InvalidObjectError < StandardError
|
82
96
|
end
|
97
|
+
|
98
|
+
# The target defines a namespace we have no definition for
|
99
|
+
#
|
100
|
+
# @api private
|
101
|
+
class UnknownNamespaceError < StandardError
|
102
|
+
end
|
83
103
|
end
|
@@ -19,7 +19,13 @@ module OpenGraphReader
|
|
19
19
|
# @param [String] name The name of the root namespace.
|
20
20
|
# @param [Object] object The corresponding root object.
|
21
21
|
# @api private
|
22
|
-
|
22
|
+
# @!method each
|
23
|
+
# Traverse the available objects
|
24
|
+
#
|
25
|
+
# @yield [Object]
|
26
|
+
# @api private
|
27
|
+
def_delegators :@bases, :[], :[]=, :each_value
|
28
|
+
alias_method :each, :each_value
|
23
29
|
|
24
30
|
# If available, contains the source location of the document the
|
25
31
|
# available objects were parsed from.
|
@@ -1,27 +1,13 @@
|
|
1
1
|
module OpenGraphReader
|
2
2
|
# Convert a {Parser::Graph} into the right hierarchy of {Object}s attached
|
3
|
-
# to a {Base}.
|
3
|
+
# to a {Base}, then validate it.
|
4
4
|
#
|
5
|
-
# @todo validate required, verticals
|
6
5
|
# @api private
|
7
6
|
class Builder
|
8
7
|
# Well-known types from
|
9
8
|
#
|
10
9
|
# @see http://ogp.me
|
11
|
-
KNOWN_TYPES = %w(
|
12
|
-
website
|
13
|
-
music.song
|
14
|
-
music.album
|
15
|
-
music.playlist
|
16
|
-
music.radio_station
|
17
|
-
video.movie
|
18
|
-
video.episode
|
19
|
-
video.tv_show
|
20
|
-
video.other
|
21
|
-
article
|
22
|
-
book
|
23
|
-
profile
|
24
|
-
).freeze
|
10
|
+
KNOWN_TYPES = %w(website article book profile).freeze
|
25
11
|
|
26
12
|
# Create a new builder.
|
27
13
|
#
|
@@ -41,45 +27,53 @@ module OpenGraphReader
|
|
41
27
|
def base
|
42
28
|
base = Base.new
|
43
29
|
|
44
|
-
type = @graph.fetch
|
30
|
+
type = @graph.fetch('og:type', 'website').downcase
|
45
31
|
|
46
32
|
validate_type type
|
47
33
|
|
48
34
|
@graph.each do |property|
|
49
|
-
|
50
|
-
base[root] ||= Object::Registry[root].new
|
51
|
-
object = resolve base[root], root, path
|
52
|
-
|
53
|
-
if object.respond_to? "#{name}s" # Collection # TODO
|
54
|
-
collection = object.public_send "#{name}s" #TODO
|
55
|
-
if Object::Registry.registered? property.fullname # of subobjects
|
56
|
-
object = Object::Registry[property.fullname].new
|
57
|
-
collection << object
|
58
|
-
object.content = property.content
|
59
|
-
else # of type
|
60
|
-
collection << property.content
|
61
|
-
end
|
62
|
-
elsif Object::Registry.registered? property.fullname # Subobject
|
63
|
-
object[name] ||= Object::Registry[property.fullname].new
|
64
|
-
object[name].content = property.content
|
65
|
-
else # Direct attribute
|
66
|
-
object[name] = property.content
|
67
|
-
end
|
35
|
+
build_property base, property
|
68
36
|
end
|
69
37
|
|
38
|
+
validate base
|
39
|
+
|
70
40
|
base
|
71
41
|
end
|
72
42
|
|
73
43
|
private
|
74
44
|
|
45
|
+
def build_property base, property
|
46
|
+
root, *path, name = property.path
|
47
|
+
base[root] ||= Object::Registry[root].new
|
48
|
+
object = resolve base[root], root, path
|
49
|
+
|
50
|
+
if object.has_property?(name) && object.respond_to?("#{name}s") # Collection
|
51
|
+
collection = object.public_send "#{name}s"
|
52
|
+
if Object::Registry.registered? property.fullname # of subobjects
|
53
|
+
object = Object::Registry[property.fullname].new
|
54
|
+
collection << object
|
55
|
+
object.content = property.content
|
56
|
+
else # of type
|
57
|
+
collection << property.content
|
58
|
+
end
|
59
|
+
elsif Object::Registry.registered? property.fullname # Subobject
|
60
|
+
object[name] ||= Object::Registry[property.fullname].new
|
61
|
+
object[name].content = property.content
|
62
|
+
else # Direct attribute
|
63
|
+
object[name] = property.content
|
64
|
+
end
|
65
|
+
rescue UnknownNamespaceError => e
|
66
|
+
raise InvalidObjectError, e.message if OpenGraphReader.config.strict
|
67
|
+
end
|
68
|
+
|
75
69
|
def resolve object, last_namespace, path
|
76
70
|
return object if path.empty?
|
77
71
|
|
78
72
|
next_name = path.shift
|
79
|
-
if object.respond_to?
|
80
|
-
collection = object.public_send("#{next_name}s")
|
73
|
+
if object.has_property?(next_name) && object.respond_to?("#{next_name}s") # collection
|
74
|
+
collection = object.public_send("#{next_name}s")
|
81
75
|
next_object = collection.last
|
82
|
-
if next_object.nil?
|
76
|
+
if next_object.nil? # Final namespace or missing previous declaration, create a new collection item
|
83
77
|
next_object = Object::Registry[[*last_namespace, next_name].join(':')].new
|
84
78
|
collection << next_object
|
85
79
|
end
|
@@ -92,9 +86,43 @@ module OpenGraphReader
|
|
92
86
|
end
|
93
87
|
|
94
88
|
def validate_type type
|
95
|
-
unless
|
89
|
+
return unless OpenGraphReader.config.strict
|
90
|
+
|
91
|
+
unless KNOWN_TYPES.include?(type) ||
|
92
|
+
@additional_namespaces.include?(type) ||
|
93
|
+
Object::Registry.verticals.include?(type)
|
96
94
|
raise InvalidObjectError, "Undefined type #{type}"
|
97
95
|
end
|
98
96
|
end
|
97
|
+
|
98
|
+
def validate base
|
99
|
+
base.each do |object|
|
100
|
+
validate_required object if OpenGraphReader.config.validate_required
|
101
|
+
validate_verticals object, base.og.type
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
def validate_required object
|
106
|
+
object.class.required_properties.each do |property|
|
107
|
+
if object[property].nil?
|
108
|
+
raise InvalidObjectError, "Missing required property #{property} on #{object.inspect}"
|
109
|
+
end
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
def validate_verticals object, type
|
114
|
+
return unless type.include? '.'
|
115
|
+
verticals = object.class.verticals
|
116
|
+
if verticals.has_key? type
|
117
|
+
valid_properties = verticals[type]
|
118
|
+
set_properties = object.class.available_properties.select {|property| object[property] }
|
119
|
+
extra_properties = set_properties-valid_properties
|
120
|
+
|
121
|
+
unless extra_properties.empty?
|
122
|
+
raise InvalidObjectError, "Set invalid property #{extra_properties.first} for #{type} " \
|
123
|
+
"in #{object.inspect}, valid properties are #{valid_properties.inspect}"
|
124
|
+
end
|
125
|
+
end
|
126
|
+
end
|
99
127
|
end
|
100
128
|
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
require 'singleton'
|
2
|
+
|
3
|
+
module OpenGraphReader
|
4
|
+
# The behavior of this library can be tweaked with some parameters.
|
5
|
+
#
|
6
|
+
# @example
|
7
|
+
# OpenGraphReader.configure do |config|
|
8
|
+
# config.strict = true
|
9
|
+
# end
|
10
|
+
class Configuration
|
11
|
+
include Singleton
|
12
|
+
|
13
|
+
# Strict mode (default: <tt>false</tt>)
|
14
|
+
#
|
15
|
+
# In strict mode, if the fetched site defines an unknown type
|
16
|
+
# or property, {InvalidObjectError} is thrown instead of just ignoring
|
17
|
+
# those.
|
18
|
+
#
|
19
|
+
# @return [Bool]
|
20
|
+
attr_accessor :strict
|
21
|
+
|
22
|
+
# Validate required (default: <tt>true</tt>)
|
23
|
+
#
|
24
|
+
# Validate that required properties exist. If this is enabled and
|
25
|
+
# they do not, {InvalidObjectError} is thrown.
|
26
|
+
#
|
27
|
+
# @return [Bool]
|
28
|
+
attr_accessor :validate_required
|
29
|
+
|
30
|
+
# Validate references (default: <tt>true</tt>)
|
31
|
+
#
|
32
|
+
# If an object should be a reference to another object,
|
33
|
+
# validate that it contains an URL. Be careful in turning this off,
|
34
|
+
# an attacker could place things like <tt>javascript:</tt> links there.
|
35
|
+
#
|
36
|
+
# @return [Bool]
|
37
|
+
attr_accessor :validate_references
|
38
|
+
|
39
|
+
# @private
|
40
|
+
def initialize
|
41
|
+
reset_to_defaults!
|
42
|
+
end
|
43
|
+
|
44
|
+
# Reset configuration to their defaults
|
45
|
+
def reset_to_defaults!
|
46
|
+
@strict = false
|
47
|
+
@validate_required = true
|
48
|
+
@validate_references = true
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
@@ -1,5 +1,13 @@
|
|
1
1
|
require 'faraday'
|
2
2
|
|
3
|
+
begin
|
4
|
+
require 'faraday_middleware/response/follow_redirects'
|
5
|
+
rescue LoadError; end
|
6
|
+
|
7
|
+
begin
|
8
|
+
require 'faraday/cookie_jar'
|
9
|
+
rescue LoadError; end
|
10
|
+
|
3
11
|
module OpenGraphReader
|
4
12
|
# Fetch an URI to retrieve its HTML body, if available.
|
5
13
|
#
|
@@ -14,9 +22,11 @@ module OpenGraphReader
|
|
14
22
|
@connection = Faraday.default_connection.dup
|
15
23
|
|
16
24
|
if defined? FaradayMiddleware
|
17
|
-
|
18
|
-
|
19
|
-
|
25
|
+
prepend_middleware FaradayMiddleware::FollowRedirects
|
26
|
+
end
|
27
|
+
|
28
|
+
if defined? Faraday::CookieJar
|
29
|
+
prepend_middleware Faraday::CookieJar
|
20
30
|
end
|
21
31
|
end
|
22
32
|
|
@@ -78,5 +88,13 @@ module OpenGraphReader
|
|
78
88
|
def fetched_headers?
|
79
89
|
!@get_response.nil? || !@head_response.nil?
|
80
90
|
end
|
91
|
+
|
92
|
+
private
|
93
|
+
|
94
|
+
def prepend_middleware middleware
|
95
|
+
unless @connection.builder.handlers.include? middleware
|
96
|
+
@connection.builder.insert(0, middleware)
|
97
|
+
end
|
98
|
+
end
|
81
99
|
end
|
82
100
|
end
|
@@ -71,7 +71,7 @@ module OpenGraphReader
|
|
71
71
|
# @return [String, Object]
|
72
72
|
def [] name
|
73
73
|
raise InvalidObjectError, "Undefined property #{name} on #{inspect}" unless has_property? name
|
74
|
-
properties[name.to_s]
|
74
|
+
public_send name.to_s #properties[name.to_s]
|
75
75
|
end
|
76
76
|
|
77
77
|
# Set the property to the given value.
|
@@ -81,8 +81,11 @@ module OpenGraphReader
|
|
81
81
|
# @param [String, Object] value
|
82
82
|
# @raise [InvalidObjectError] If the requested property is undefined.
|
83
83
|
def []= name, value
|
84
|
-
|
85
|
-
|
84
|
+
if has_property?(name)
|
85
|
+
public_send "#{name}=", value
|
86
|
+
elsif OpenGraphReader.config.strict
|
87
|
+
raise InvalidObjectError, "Undefined property #{name} on #{inspect}"
|
88
|
+
end
|
86
89
|
end
|
87
90
|
|
88
91
|
# Returns {#content} if available.
|
@@ -14,6 +14,7 @@ module OpenGraphReader
|
|
14
14
|
# @option options [Class] :to This property maps to the given object (optional).
|
15
15
|
# belongs to the given verticals of the object (optional).
|
16
16
|
# @option options [Array<String>] :verticials This property
|
17
|
+
# @option options [Bool] :downcase (false) Normalize the contents case to lowercase.
|
17
18
|
#
|
18
19
|
# @!macro property
|
19
20
|
# @!attribute [rw] $1
|
@@ -32,15 +33,18 @@ module OpenGraphReader
|
|
32
33
|
processors[name] = processor
|
33
34
|
|
34
35
|
define_method(name) do |name, *args|
|
35
|
-
available_properties << name.to_s
|
36
36
|
options = args.pop if args.last.is_a? Hash
|
37
37
|
options ||= {}
|
38
38
|
|
39
|
+
available_properties << name.to_s
|
40
|
+
required_properties << name.to_s if options[:required]
|
39
41
|
Registry.register [@namespace, name].join(':'), options[:to] if options[:to]
|
40
42
|
|
41
43
|
if options[:verticals]
|
42
44
|
options[:verticals].each do |vertical|
|
43
|
-
|
45
|
+
vertical = [@namespace, vertical].join('.')
|
46
|
+
verticals[vertical] << name.to_s
|
47
|
+
Registry.verticals << vertical
|
44
48
|
end
|
45
49
|
end
|
46
50
|
|
@@ -50,21 +54,22 @@ module OpenGraphReader
|
|
50
54
|
end
|
51
55
|
|
52
56
|
define_method(name) do
|
53
|
-
# TODO raise if required
|
54
57
|
value = children[name.to_s].first
|
55
|
-
#
|
58
|
+
# @todo figure out a sane way to distinguish subobject properties
|
56
59
|
value.content if value && value.is_a?(Object)
|
57
60
|
value || options[:default]
|
58
61
|
end
|
59
62
|
else
|
60
63
|
define_method(name) do
|
61
|
-
# TODO raise if required
|
62
64
|
properties[name.to_s] || options[:default]
|
63
65
|
end
|
64
66
|
|
65
67
|
define_method("#{name}=") do |value|
|
66
|
-
#
|
67
|
-
|
68
|
+
# @todo figure out a sane way to distinguish subobject properties
|
69
|
+
unless value.is_a? Object
|
70
|
+
value.downcase! if options[:downcase]
|
71
|
+
value = processor.call(value, *args, options)
|
72
|
+
end
|
68
73
|
properties[name.to_s] = value
|
69
74
|
end
|
70
75
|
end
|
@@ -88,11 +93,23 @@ module OpenGraphReader
|
|
88
93
|
Registry.register @namespace, self
|
89
94
|
end
|
90
95
|
|
91
|
-
#
|
96
|
+
# @overload content type, *args, options={}
|
92
97
|
#
|
93
|
-
#
|
94
|
-
|
95
|
-
|
98
|
+
# Set the type for the content attribute
|
99
|
+
#
|
100
|
+
# @param [Symbol] type one of the registered types.
|
101
|
+
# @param [Array<Object>] args Additional parameters for the type
|
102
|
+
# @param [Hash] options
|
103
|
+
# @option options [Bool] :downcase (false) Normalize the contents case to lowercase.
|
104
|
+
def content type, *args
|
105
|
+
options = args.pop if args.last.is_a? Hash
|
106
|
+
options ||= {}
|
107
|
+
|
108
|
+
@content_processor = proc {|value|
|
109
|
+
value.downcase! if options[:downcase]
|
110
|
+
options[:to] ||= self
|
111
|
+
DSL.processors[type].call(value, *args, options)
|
112
|
+
}
|
96
113
|
end
|
97
114
|
|
98
115
|
# The list of defined properties on this object.
|
@@ -102,6 +119,13 @@ module OpenGraphReader
|
|
102
119
|
@available_properties ||= []
|
103
120
|
end
|
104
121
|
|
122
|
+
# The list of required properties on this object.
|
123
|
+
#
|
124
|
+
# @return [Array<String]
|
125
|
+
def required_properties
|
126
|
+
@required_properties ||= []
|
127
|
+
end
|
128
|
+
|
105
129
|
# A map from type names to processing blocks.
|
106
130
|
#
|
107
131
|
# @api private
|
@@ -115,7 +139,7 @@ module OpenGraphReader
|
|
115
139
|
# @api private
|
116
140
|
# @return [Proc]
|
117
141
|
def content_processor
|
118
|
-
@content_processor
|
142
|
+
@content_processor
|
119
143
|
end
|
120
144
|
|
121
145
|
# A map from vertical names to attributes that belong to them.
|