open_graph_reader 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/open_graph_reader.rb +25 -5
- data/lib/open_graph_reader/base.rb +7 -1
- data/lib/open_graph_reader/builder.rb +68 -40
- data/lib/open_graph_reader/configuration.rb +51 -0
- data/lib/open_graph_reader/definitions.rb +1 -1
- data/lib/open_graph_reader/fetcher.rb +21 -3
- data/lib/open_graph_reader/object.rb +6 -3
- data/lib/open_graph_reader/object/dsl.rb +36 -12
- data/lib/open_graph_reader/object/dsl/types.rb +4 -2
- data/lib/open_graph_reader/object/registry.rb +12 -2
- data/lib/open_graph_reader/parser.rb +17 -6
- data/lib/open_graph_reader/version.rb +1 -1
- data/spec/fixtures/real_world/missing_image.html +985 -0
- data/spec/fixtures/real_world/mixed_case_properties.html +1139 -0
- data/spec/fixtures/real_world/mixed_case_type.html +1008 -0
- data/spec/fixtures/real_world/not_a_reference.html +814 -0
- data/spec/fixtures/real_world/undefined_property.html +491 -0
- data/spec/fixtures/real_world/unknown_namespace.html +2033 -0
- data/spec/fixtures/real_world/unknown_type.html +2032 -0
- data/spec/integration/invalid_examples_spec.rb +42 -3
- data/spec/integration/real_world_spec.rb +121 -0
- data/spec/integration/valid_examples_spec.rb +0 -1
- data/spec/spec_helper.rb +9 -1
- metadata +20 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: bea1c38c6567749ce9bd8b369867d5e7c0b7a2b3
|
4
|
+
data.tar.gz: b0805bdd3e5e1efddc7f73c59dd40ad7315b99c3
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6d3d1edbaa5b0ac800300c79631d5494e6eebf7cc362b618621ca6006530d303c7de679fa761ae5d6edd3b826fa069f1dde3ff6f76dbd295b3781e68fa4eb29e
|
7
|
+
data.tar.gz: f9ba7d6080e164c75491abbd297994627f2812e907c230c5edb5c4f42681657da27c7e730b83b42df19fc96ddcb9fd9a3cd44b3121f933ac45d8db1635b040c6
|
data/lib/open_graph_reader.rb
CHANGED
@@ -1,18 +1,14 @@
|
|
1
1
|
require 'uri'
|
2
2
|
|
3
|
-
begin
|
4
|
-
require 'faraday_middleware/response/follow_redirects'
|
5
|
-
rescue LoadError; end
|
6
|
-
|
7
3
|
require 'open_graph_reader/base'
|
8
4
|
require 'open_graph_reader/builder'
|
5
|
+
require 'open_graph_reader/configuration'
|
9
6
|
require 'open_graph_reader/definitions'
|
10
7
|
require 'open_graph_reader/fetcher'
|
11
8
|
require 'open_graph_reader/object'
|
12
9
|
require 'open_graph_reader/parser'
|
13
10
|
require 'open_graph_reader/version'
|
14
11
|
|
15
|
-
# @todo quirks mode where invalid attributes don't raise?
|
16
12
|
# @todo 1.1 compatibility mode?
|
17
13
|
# This module provides the main entry to the library. Please see the
|
18
14
|
# {file:README.md} for usage examples.
|
@@ -72,6 +68,24 @@ module OpenGraphReader
|
|
72
68
|
rescue NoOpenGraphDataError, InvalidObjectError
|
73
69
|
end
|
74
70
|
|
71
|
+
# Configure the library, see {Configuration} for the list of available
|
72
|
+
# options and their defaults. Changing configuration at runtime is not
|
73
|
+
# thread safe.
|
74
|
+
#
|
75
|
+
# @yieldparam [Configuration] the configuration object
|
76
|
+
# @see Configuration
|
77
|
+
def self.configure
|
78
|
+
yield config
|
79
|
+
end
|
80
|
+
|
81
|
+
# Get the current {Configuration} instance
|
82
|
+
#
|
83
|
+
# @api private
|
84
|
+
# @return [Configuration]
|
85
|
+
def self.config
|
86
|
+
Configuration.instance
|
87
|
+
end
|
88
|
+
|
75
89
|
# The target couldn't be fetched, didn't contain any HTML or
|
76
90
|
# any OpenGraph tags.
|
77
91
|
class NoOpenGraphDataError < StandardError
|
@@ -80,4 +94,10 @@ module OpenGraphReader
|
|
80
94
|
# The target did contain OpenGraph tags, but they're not valid.
|
81
95
|
class InvalidObjectError < StandardError
|
82
96
|
end
|
97
|
+
|
98
|
+
# The target defines a namespace we have no definition for
|
99
|
+
#
|
100
|
+
# @api private
|
101
|
+
class UnknownNamespaceError < StandardError
|
102
|
+
end
|
83
103
|
end
|
@@ -19,7 +19,13 @@ module OpenGraphReader
|
|
19
19
|
# @param [String] name The name of the root namespace.
|
20
20
|
# @param [Object] object The corresponding root object.
|
21
21
|
# @api private
|
22
|
-
|
22
|
+
# @!method each
|
23
|
+
# Traverse the available objects
|
24
|
+
#
|
25
|
+
# @yield [Object]
|
26
|
+
# @api private
|
27
|
+
def_delegators :@bases, :[], :[]=, :each_value
|
28
|
+
alias_method :each, :each_value
|
23
29
|
|
24
30
|
# If available, contains the source location of the document the
|
25
31
|
# available objects were parsed from.
|
@@ -1,27 +1,13 @@
|
|
1
1
|
module OpenGraphReader
|
2
2
|
# Convert a {Parser::Graph} into the right hierarchy of {Object}s attached
|
3
|
-
# to a {Base}.
|
3
|
+
# to a {Base}, then validate it.
|
4
4
|
#
|
5
|
-
# @todo validate required, verticals
|
6
5
|
# @api private
|
7
6
|
class Builder
|
8
7
|
# Well-known types from
|
9
8
|
#
|
10
9
|
# @see http://ogp.me
|
11
|
-
KNOWN_TYPES = %w(
|
12
|
-
website
|
13
|
-
music.song
|
14
|
-
music.album
|
15
|
-
music.playlist
|
16
|
-
music.radio_station
|
17
|
-
video.movie
|
18
|
-
video.episode
|
19
|
-
video.tv_show
|
20
|
-
video.other
|
21
|
-
article
|
22
|
-
book
|
23
|
-
profile
|
24
|
-
).freeze
|
10
|
+
KNOWN_TYPES = %w(website article book profile).freeze
|
25
11
|
|
26
12
|
# Create a new builder.
|
27
13
|
#
|
@@ -41,45 +27,53 @@ module OpenGraphReader
|
|
41
27
|
def base
|
42
28
|
base = Base.new
|
43
29
|
|
44
|
-
type = @graph.fetch
|
30
|
+
type = @graph.fetch('og:type', 'website').downcase
|
45
31
|
|
46
32
|
validate_type type
|
47
33
|
|
48
34
|
@graph.each do |property|
|
49
|
-
|
50
|
-
base[root] ||= Object::Registry[root].new
|
51
|
-
object = resolve base[root], root, path
|
52
|
-
|
53
|
-
if object.respond_to? "#{name}s" # Collection # TODO
|
54
|
-
collection = object.public_send "#{name}s" #TODO
|
55
|
-
if Object::Registry.registered? property.fullname # of subobjects
|
56
|
-
object = Object::Registry[property.fullname].new
|
57
|
-
collection << object
|
58
|
-
object.content = property.content
|
59
|
-
else # of type
|
60
|
-
collection << property.content
|
61
|
-
end
|
62
|
-
elsif Object::Registry.registered? property.fullname # Subobject
|
63
|
-
object[name] ||= Object::Registry[property.fullname].new
|
64
|
-
object[name].content = property.content
|
65
|
-
else # Direct attribute
|
66
|
-
object[name] = property.content
|
67
|
-
end
|
35
|
+
build_property base, property
|
68
36
|
end
|
69
37
|
|
38
|
+
validate base
|
39
|
+
|
70
40
|
base
|
71
41
|
end
|
72
42
|
|
73
43
|
private
|
74
44
|
|
45
|
+
def build_property base, property
|
46
|
+
root, *path, name = property.path
|
47
|
+
base[root] ||= Object::Registry[root].new
|
48
|
+
object = resolve base[root], root, path
|
49
|
+
|
50
|
+
if object.has_property?(name) && object.respond_to?("#{name}s") # Collection
|
51
|
+
collection = object.public_send "#{name}s"
|
52
|
+
if Object::Registry.registered? property.fullname # of subobjects
|
53
|
+
object = Object::Registry[property.fullname].new
|
54
|
+
collection << object
|
55
|
+
object.content = property.content
|
56
|
+
else # of type
|
57
|
+
collection << property.content
|
58
|
+
end
|
59
|
+
elsif Object::Registry.registered? property.fullname # Subobject
|
60
|
+
object[name] ||= Object::Registry[property.fullname].new
|
61
|
+
object[name].content = property.content
|
62
|
+
else # Direct attribute
|
63
|
+
object[name] = property.content
|
64
|
+
end
|
65
|
+
rescue UnknownNamespaceError => e
|
66
|
+
raise InvalidObjectError, e.message if OpenGraphReader.config.strict
|
67
|
+
end
|
68
|
+
|
75
69
|
def resolve object, last_namespace, path
|
76
70
|
return object if path.empty?
|
77
71
|
|
78
72
|
next_name = path.shift
|
79
|
-
if object.respond_to?
|
80
|
-
collection = object.public_send("#{next_name}s")
|
73
|
+
if object.has_property?(next_name) && object.respond_to?("#{next_name}s") # collection
|
74
|
+
collection = object.public_send("#{next_name}s")
|
81
75
|
next_object = collection.last
|
82
|
-
if next_object.nil?
|
76
|
+
if next_object.nil? # Final namespace or missing previous declaration, create a new collection item
|
83
77
|
next_object = Object::Registry[[*last_namespace, next_name].join(':')].new
|
84
78
|
collection << next_object
|
85
79
|
end
|
@@ -92,9 +86,43 @@ module OpenGraphReader
|
|
92
86
|
end
|
93
87
|
|
94
88
|
def validate_type type
|
95
|
-
unless
|
89
|
+
return unless OpenGraphReader.config.strict
|
90
|
+
|
91
|
+
unless KNOWN_TYPES.include?(type) ||
|
92
|
+
@additional_namespaces.include?(type) ||
|
93
|
+
Object::Registry.verticals.include?(type)
|
96
94
|
raise InvalidObjectError, "Undefined type #{type}"
|
97
95
|
end
|
98
96
|
end
|
97
|
+
|
98
|
+
def validate base
|
99
|
+
base.each do |object|
|
100
|
+
validate_required object if OpenGraphReader.config.validate_required
|
101
|
+
validate_verticals object, base.og.type
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
def validate_required object
|
106
|
+
object.class.required_properties.each do |property|
|
107
|
+
if object[property].nil?
|
108
|
+
raise InvalidObjectError, "Missing required property #{property} on #{object.inspect}"
|
109
|
+
end
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
def validate_verticals object, type
|
114
|
+
return unless type.include? '.'
|
115
|
+
verticals = object.class.verticals
|
116
|
+
if verticals.has_key? type
|
117
|
+
valid_properties = verticals[type]
|
118
|
+
set_properties = object.class.available_properties.select {|property| object[property] }
|
119
|
+
extra_properties = set_properties-valid_properties
|
120
|
+
|
121
|
+
unless extra_properties.empty?
|
122
|
+
raise InvalidObjectError, "Set invalid property #{extra_properties.first} for #{type} " \
|
123
|
+
"in #{object.inspect}, valid properties are #{valid_properties.inspect}"
|
124
|
+
end
|
125
|
+
end
|
126
|
+
end
|
99
127
|
end
|
100
128
|
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
require 'singleton'
|
2
|
+
|
3
|
+
module OpenGraphReader
|
4
|
+
# The behavior of this library can be tweaked with some parameters.
|
5
|
+
#
|
6
|
+
# @example
|
7
|
+
# OpenGraphReader.configure do |config|
|
8
|
+
# config.strict = true
|
9
|
+
# end
|
10
|
+
class Configuration
|
11
|
+
include Singleton
|
12
|
+
|
13
|
+
# Strict mode (default: <tt>false</tt>)
|
14
|
+
#
|
15
|
+
# In strict mode, if the fetched site defines an unknown type
|
16
|
+
# or property, {InvalidObjectError} is thrown instead of just ignoring
|
17
|
+
# those.
|
18
|
+
#
|
19
|
+
# @return [Bool]
|
20
|
+
attr_accessor :strict
|
21
|
+
|
22
|
+
# Validate required (default: <tt>true</tt>)
|
23
|
+
#
|
24
|
+
# Validate that required properties exist. If this is enabled and
|
25
|
+
# they do not, {InvalidObjectError} is thrown.
|
26
|
+
#
|
27
|
+
# @return [Bool]
|
28
|
+
attr_accessor :validate_required
|
29
|
+
|
30
|
+
# Validate references (default: <tt>true</tt>)
|
31
|
+
#
|
32
|
+
# If an object should be a reference to another object,
|
33
|
+
# validate that it contains an URL. Be careful in turning this off,
|
34
|
+
# an attacker could place things like <tt>javascript:</tt> links there.
|
35
|
+
#
|
36
|
+
# @return [Bool]
|
37
|
+
attr_accessor :validate_references
|
38
|
+
|
39
|
+
# @private
|
40
|
+
def initialize
|
41
|
+
reset_to_defaults!
|
42
|
+
end
|
43
|
+
|
44
|
+
# Reset configuration to their defaults
|
45
|
+
def reset_to_defaults!
|
46
|
+
@strict = false
|
47
|
+
@validate_required = true
|
48
|
+
@validate_references = true
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
@@ -1,5 +1,13 @@
|
|
1
1
|
require 'faraday'
|
2
2
|
|
3
|
+
begin
|
4
|
+
require 'faraday_middleware/response/follow_redirects'
|
5
|
+
rescue LoadError; end
|
6
|
+
|
7
|
+
begin
|
8
|
+
require 'faraday/cookie_jar'
|
9
|
+
rescue LoadError; end
|
10
|
+
|
3
11
|
module OpenGraphReader
|
4
12
|
# Fetch an URI to retrieve its HTML body, if available.
|
5
13
|
#
|
@@ -14,9 +22,11 @@ module OpenGraphReader
|
|
14
22
|
@connection = Faraday.default_connection.dup
|
15
23
|
|
16
24
|
if defined? FaradayMiddleware
|
17
|
-
|
18
|
-
|
19
|
-
|
25
|
+
prepend_middleware FaradayMiddleware::FollowRedirects
|
26
|
+
end
|
27
|
+
|
28
|
+
if defined? Faraday::CookieJar
|
29
|
+
prepend_middleware Faraday::CookieJar
|
20
30
|
end
|
21
31
|
end
|
22
32
|
|
@@ -78,5 +88,13 @@ module OpenGraphReader
|
|
78
88
|
def fetched_headers?
|
79
89
|
!@get_response.nil? || !@head_response.nil?
|
80
90
|
end
|
91
|
+
|
92
|
+
private
|
93
|
+
|
94
|
+
def prepend_middleware middleware
|
95
|
+
unless @connection.builder.handlers.include? middleware
|
96
|
+
@connection.builder.insert(0, middleware)
|
97
|
+
end
|
98
|
+
end
|
81
99
|
end
|
82
100
|
end
|
@@ -71,7 +71,7 @@ module OpenGraphReader
|
|
71
71
|
# @return [String, Object]
|
72
72
|
def [] name
|
73
73
|
raise InvalidObjectError, "Undefined property #{name} on #{inspect}" unless has_property? name
|
74
|
-
properties[name.to_s]
|
74
|
+
public_send name.to_s #properties[name.to_s]
|
75
75
|
end
|
76
76
|
|
77
77
|
# Set the property to the given value.
|
@@ -81,8 +81,11 @@ module OpenGraphReader
|
|
81
81
|
# @param [String, Object] value
|
82
82
|
# @raise [InvalidObjectError] If the requested property is undefined.
|
83
83
|
def []= name, value
|
84
|
-
|
85
|
-
|
84
|
+
if has_property?(name)
|
85
|
+
public_send "#{name}=", value
|
86
|
+
elsif OpenGraphReader.config.strict
|
87
|
+
raise InvalidObjectError, "Undefined property #{name} on #{inspect}"
|
88
|
+
end
|
86
89
|
end
|
87
90
|
|
88
91
|
# Returns {#content} if available.
|
@@ -14,6 +14,7 @@ module OpenGraphReader
|
|
14
14
|
# @option options [Class] :to This property maps to the given object (optional).
|
15
15
|
# belongs to the given verticals of the object (optional).
|
16
16
|
# @option options [Array<String>] :verticials This property
|
17
|
+
# @option options [Bool] :downcase (false) Normalize the contents case to lowercase.
|
17
18
|
#
|
18
19
|
# @!macro property
|
19
20
|
# @!attribute [rw] $1
|
@@ -32,15 +33,18 @@ module OpenGraphReader
|
|
32
33
|
processors[name] = processor
|
33
34
|
|
34
35
|
define_method(name) do |name, *args|
|
35
|
-
available_properties << name.to_s
|
36
36
|
options = args.pop if args.last.is_a? Hash
|
37
37
|
options ||= {}
|
38
38
|
|
39
|
+
available_properties << name.to_s
|
40
|
+
required_properties << name.to_s if options[:required]
|
39
41
|
Registry.register [@namespace, name].join(':'), options[:to] if options[:to]
|
40
42
|
|
41
43
|
if options[:verticals]
|
42
44
|
options[:verticals].each do |vertical|
|
43
|
-
|
45
|
+
vertical = [@namespace, vertical].join('.')
|
46
|
+
verticals[vertical] << name.to_s
|
47
|
+
Registry.verticals << vertical
|
44
48
|
end
|
45
49
|
end
|
46
50
|
|
@@ -50,21 +54,22 @@ module OpenGraphReader
|
|
50
54
|
end
|
51
55
|
|
52
56
|
define_method(name) do
|
53
|
-
# TODO raise if required
|
54
57
|
value = children[name.to_s].first
|
55
|
-
#
|
58
|
+
# @todo figure out a sane way to distinguish subobject properties
|
56
59
|
value.content if value && value.is_a?(Object)
|
57
60
|
value || options[:default]
|
58
61
|
end
|
59
62
|
else
|
60
63
|
define_method(name) do
|
61
|
-
# TODO raise if required
|
62
64
|
properties[name.to_s] || options[:default]
|
63
65
|
end
|
64
66
|
|
65
67
|
define_method("#{name}=") do |value|
|
66
|
-
#
|
67
|
-
|
68
|
+
# @todo figure out a sane way to distinguish subobject properties
|
69
|
+
unless value.is_a? Object
|
70
|
+
value.downcase! if options[:downcase]
|
71
|
+
value = processor.call(value, *args, options)
|
72
|
+
end
|
68
73
|
properties[name.to_s] = value
|
69
74
|
end
|
70
75
|
end
|
@@ -88,11 +93,23 @@ module OpenGraphReader
|
|
88
93
|
Registry.register @namespace, self
|
89
94
|
end
|
90
95
|
|
91
|
-
#
|
96
|
+
# @overload content type, *args, options={}
|
92
97
|
#
|
93
|
-
#
|
94
|
-
|
95
|
-
|
98
|
+
# Set the type for the content attribute
|
99
|
+
#
|
100
|
+
# @param [Symbol] type one of the registered types.
|
101
|
+
# @param [Array<Object>] args Additional parameters for the type
|
102
|
+
# @param [Hash] options
|
103
|
+
# @option options [Bool] :downcase (false) Normalize the contents case to lowercase.
|
104
|
+
def content type, *args
|
105
|
+
options = args.pop if args.last.is_a? Hash
|
106
|
+
options ||= {}
|
107
|
+
|
108
|
+
@content_processor = proc {|value|
|
109
|
+
value.downcase! if options[:downcase]
|
110
|
+
options[:to] ||= self
|
111
|
+
DSL.processors[type].call(value, *args, options)
|
112
|
+
}
|
96
113
|
end
|
97
114
|
|
98
115
|
# The list of defined properties on this object.
|
@@ -102,6 +119,13 @@ module OpenGraphReader
|
|
102
119
|
@available_properties ||= []
|
103
120
|
end
|
104
121
|
|
122
|
+
# The list of required properties on this object.
|
123
|
+
#
|
124
|
+
# @return [Array<String]
|
125
|
+
def required_properties
|
126
|
+
@required_properties ||= []
|
127
|
+
end
|
128
|
+
|
105
129
|
# A map from type names to processing blocks.
|
106
130
|
#
|
107
131
|
# @api private
|
@@ -115,7 +139,7 @@ module OpenGraphReader
|
|
115
139
|
# @api private
|
116
140
|
# @return [Proc]
|
117
141
|
def content_processor
|
118
|
-
@content_processor
|
142
|
+
@content_processor
|
119
143
|
end
|
120
144
|
|
121
145
|
# A map from vertical names to attributes that belong to them.
|