open_graph_reader 0.4.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,14 +1,14 @@
1
- require 'faraday'
1
+ require "faraday"
2
2
 
3
3
  begin
4
- require 'faraday_middleware/response/follow_redirects'
4
+ require "faraday_middleware/response/follow_redirects"
5
5
  rescue LoadError; end
6
6
 
7
7
  begin
8
- require 'faraday/cookie_jar'
8
+ require "faraday/cookie_jar"
9
9
  rescue LoadError; end
10
10
 
11
- require 'open_graph_reader/version'
11
+ require "open_graph_reader/version"
12
12
 
13
13
  module OpenGraphReader
14
14
  # Fetch an URI to retrieve its HTML body, if available.
@@ -16,8 +16,8 @@ module OpenGraphReader
16
16
  # @api private
17
17
  class Fetcher
18
18
  HEADERS = {
19
- 'Accept' => 'text/html',
20
- 'User-Agent' => "OpenGraphReader/#{OpenGraphReader::VERSION} (+https://github.com/jhass/open_graph_reader)"
19
+ "Accept" => "text/html",
20
+ "User-Agent" => "OpenGraphReader/#{OpenGraphReader::VERSION} (+https://github.com/jhass/open_graph_reader)"
21
21
  }.freeze
22
22
 
23
23
  # Create a new fetcher.
@@ -29,13 +29,8 @@ module OpenGraphReader
29
29
  @connection = Faraday.default_connection.dup
30
30
  @connection.headers.replace(HEADERS)
31
31
 
32
- if defined? Faraday::CookieJar
33
- prepend_middleware Faraday::CookieJar
34
- end
35
-
36
- if defined? FaradayMiddleware
37
- prepend_middleware FaradayMiddleware::FollowRedirects
38
- end
32
+ prepend_middleware Faraday::CookieJar if defined? Faraday::CookieJar
33
+ prepend_middleware FaradayMiddleware::FollowRedirects if defined? FaradayMiddleware
39
34
  end
40
35
 
41
36
  # The URL to fetch
@@ -81,8 +76,8 @@ module OpenGraphReader
81
76
  response = @get_response || @head_response
82
77
  return false unless response
83
78
  return false unless response.success?
84
- return false unless response['content-type']
85
- response['content-type'].include? 'text/html'
79
+ return false unless response["content-type"]
80
+ response["content-type"].include? "text/html"
86
81
  end
87
82
 
88
83
  # Whether the target URI was fetched.
@@ -103,9 +98,9 @@ module OpenGraphReader
103
98
  private
104
99
 
105
100
  def prepend_middleware middleware
106
- unless @connection.builder.handlers.include? middleware
107
- @connection.builder.insert(0, middleware)
108
- end
101
+ return if @connection.builder.handlers.include? middleware
102
+
103
+ @connection.builder.insert(0, middleware)
109
104
  end
110
105
  end
111
106
  end
@@ -1,9 +1,8 @@
1
- require 'open_graph_reader/object/registry'
2
- require 'open_graph_reader/object/dsl'
3
- require 'open_graph_reader/object/dsl/types'
1
+ require "open_graph_reader/object/registry"
2
+ require "open_graph_reader/object/dsl"
3
+ require "open_graph_reader/object/dsl/types"
4
4
 
5
5
  module OpenGraphReader
6
-
7
6
  # This module provides the base functionality for all OpenGraph objects
8
7
  # and makes the {DSL} methods for describing them available when included.
9
8
  #
@@ -37,7 +36,6 @@ module OpenGraphReader
37
36
  # @return [{String => Array<String, Object>}]
38
37
  attr_reader :children
39
38
 
40
-
41
39
  # Create a new object. If your class overrides this don't forget to call <tt>super</tt>.
42
40
  def initialize
43
41
  @properties = {}
@@ -48,7 +46,7 @@ module OpenGraphReader
48
46
  #
49
47
  # @param [#to_s] name
50
48
  # @return [Bool]
51
- def has_property? name
49
+ def property? name
52
50
  self.class.available_properties.include? name.to_s
53
51
  end
54
52
 
@@ -70,8 +68,8 @@ module OpenGraphReader
70
68
  # @raise [UndefinedPropertyError] If the requested property is undefined.
71
69
  # @return [String, Object]
72
70
  def [] name
73
- raise UndefinedPropertyError, "Undefined property #{name} on #{inspect}" unless has_property? name
74
- public_send name.to_s #properties[name.to_s]
71
+ raise UndefinedPropertyError, "Undefined property #{name} on #{inspect}" unless property? name
72
+ public_send name.to_s
75
73
  end
76
74
 
77
75
  # Set the property to the given value.
@@ -81,7 +79,7 @@ module OpenGraphReader
81
79
  # @param [String, Object] value
82
80
  # @raise [UndefinedPropertyError] If the requested property is undefined.
83
81
  def []= name, value
84
- if has_property?(name)
82
+ if property?(name)
85
83
  public_send "#{name}=", value
86
84
  elsif OpenGraphReader.config.strict
87
85
  raise UndefinedPropertyError, "Undefined property #{name} on #{inspect}"
@@ -1,4 +1,4 @@
1
- require 'open_graph_reader/object/registry'
1
+ require "open_graph_reader/object/registry"
2
2
 
3
3
  module OpenGraphReader
4
4
  module Object
@@ -36,49 +36,72 @@ module OpenGraphReader
36
36
  options = args.pop if args.last.is_a? Hash
37
37
  options ||= {}
38
38
 
39
- available_properties << name.to_s
40
- required_properties << name.to_s if options[:required]
41
- Registry.register [@namespace, name].join(':'), options[:to] if options[:to]
42
-
43
- if options[:verticals]
44
- options[:verticals].each do |vertical|
45
- vertical = [@namespace, vertical].join('.')
46
- verticals[vertical] << name.to_s
47
- Registry.verticals << vertical
48
- end
49
- end
39
+ register_property name, options
40
+ register_verticals name, options[:verticals]
50
41
 
51
42
  if options[:collection]
52
- define_method("#{name}s") do
53
- children[name.to_s]
54
- end
55
-
56
- define_method(name) do
57
- value = children[name.to_s].first
58
- # @todo figure out a sane way to distinguish subobject properties
59
- value.content if value && value.is_a?(Object)
60
- value || options[:default]
61
- end
43
+ define_collection name, options
62
44
  else
63
- define_method(name) do
64
- properties[name.to_s] || options[:default]
65
- end
66
-
67
- define_method("#{name}=") do |value|
68
- # @todo figure out a sane way to distinguish subobject properties
69
- unless value.is_a? Object
70
- value.downcase! if options[:downcase]
71
- value = processor.call(value, *args, options)
72
- end
73
- properties[name.to_s] = value
74
- end
45
+ define_single name, options, args, processor
75
46
  end
76
47
  end
77
48
  end
78
49
 
50
+ # @api private
51
+ def register_property name, options
52
+ available_properties << name.to_s
53
+ required_properties << name.to_s if options[:required]
54
+ Registry.register [namespace, name].join(":"), options[:to] if options[:to]
55
+ end
56
+
57
+ # @api private
58
+ def register_verticals name, assigned_verticals
59
+ [*assigned_verticals].each do |vertical|
60
+ vertical = [namespace, vertical].join(".")
61
+ verticals[vertical] << name.to_s
62
+ Registry.verticals << vertical
63
+ end
64
+ end
65
+
66
+ # @api private
67
+ def define_collection name, options
68
+ define_method("#{name}s") do
69
+ children[name.to_s]
70
+ end
71
+
72
+ define_method(name) do
73
+ value = children[name.to_s].first
74
+ # @todo figure out a sane way to distinguish subobject properties
75
+ value.content if value && value.is_a?(Object)
76
+ value || options[:default]
77
+ end
78
+ end
79
+
80
+ # @api private
81
+ def define_single name, options, args, processor
82
+ define_method(name) do
83
+ properties[name.to_s] || options[:default]
84
+ end
85
+
86
+ define_method("#{name}=") do |value|
87
+ # @todo figure out a sane way to distinguish subobject properties
88
+ unless value.is_a? Object
89
+ value.downcase! if options[:downcase]
90
+ value = processor.call(value, *args, options)
91
+ end
92
+ properties[name.to_s] = value
93
+ end
94
+ end
95
+
79
96
  # Alias to trick YARD
80
97
  singleton_class.send(:alias_method, :define_type_no_doc, :define_type)
81
98
 
99
+ # The processor for the content attribute.
100
+ #
101
+ # @api private
102
+ # @return [Proc]
103
+ attr_reader :content_processor
104
+
82
105
  # @overload namespace
83
106
  # Get the namespace of this object.
84
107
  #
@@ -91,7 +114,7 @@ module OpenGraphReader
91
114
  # namespace :og, :image
92
115
  def namespace *names
93
116
  return @namespace if names.empty?
94
- @namespace = names.join(':')
117
+ @namespace = names.join(":")
95
118
  Registry.register @namespace, self
96
119
  end
97
120
 
@@ -136,14 +159,6 @@ module OpenGraphReader
136
159
  @processors ||= {}
137
160
  end
138
161
 
139
- # The processor for the content attribute.
140
- #
141
- # @api private
142
- # @return [Proc]
143
- def content_processor
144
- @content_processor
145
- end
146
-
147
162
  # A map from vertical names to attributes that belong to them.
148
163
  #
149
164
  # @api private
@@ -1,7 +1,7 @@
1
- require 'date'
2
- require 'uri'
1
+ require "date"
2
+ require "uri"
3
3
 
4
- require 'open_graph_reader/object/dsl'
4
+ require "open_graph_reader/object/dsl"
5
5
 
6
6
  module OpenGraphReader
7
7
  module Object
@@ -19,28 +19,33 @@ module OpenGraphReader
19
19
  define_type_no_doc :url do |value, options|
20
20
  value = value.to_s
21
21
 
22
- unless value.start_with?('http://') || value.start_with?('https://')
23
- if options[:image] && OpenGraphReader.config.synthesize_image_url
24
- unless OpenGraphReader.current_origin
25
- raise ArgumentError, "Enabled image url synthesization but didn't pass an origin"
26
- end
27
-
28
- # Synthesize scheme hack to https (//example.org/foo/bar.png)
29
- if value.start_with?('//') && value.split('/', 4)[2] =~ URI::HOST
30
- value = "https:#{value}"
31
- else # Synthesize absolute path (/foo/bar.png)
32
- begin
33
- value = "/#{value}" unless value.start_with? '/' # Normalize to absolute path
34
- uri = URI.parse(OpenGraphReader.current_origin)
35
- uri.path = value
36
- value = uri.to_s
37
- rescue
38
- raise InvalidObjectError, "URL #{value.inspect} does not start with http:// or https:// and failed to synthesize a full URL"
39
- end
40
- end
41
- elsif options.has_key?(:to) && OpenGraphReader.config.validate_references
42
- raise InvalidObjectError, "URL #{value.inspect} does not start with http:// or https://"
22
+ next value if value.start_with?("http://") || value.start_with?("https://")
23
+
24
+ if options[:image] && OpenGraphReader.config.synthesize_image_url || OpenGraphReader.config.synthesize_url
25
+ unless OpenGraphReader.current_origin
26
+ next unless options[:required] || !OpenGraphReader.config.discard_invalid_optional_properties
27
+
28
+ raise ArgumentError, "Enabled image url synthesization but didn't pass an origin"
29
+ end
30
+
31
+ # Synthesize scheme hack to https (//example.org/foo/bar.png)
32
+ next "https:#{value}" if value.start_with?("//") && value.split("/", 4)[2] =~ URI::HOST
33
+
34
+ # Synthesize absolute path (/foo/bar.png)
35
+ begin
36
+ value = "/#{value}" unless value.start_with? "/" # Normalize to absolute path
37
+ uri = URI.parse(OpenGraphReader.current_origin)
38
+ uri.path = value
39
+ value = uri.to_s
40
+ rescue
41
+ next unless options[:required] || !OpenGraphReader.config.discard_invalid_optional_properties
42
+ raise InvalidObjectError,
43
+ "URL #{value.inspect} does not start with http:// or https:// and failed to "\
44
+ "synthesize a full URL"
43
45
  end
46
+ elsif options.has_key?(:to) && OpenGraphReader.config.validate_references
47
+ next unless options[:required] || !OpenGraphReader.config.discard_invalid_optional_properties
48
+ raise InvalidObjectError, "URL #{value.inspect} does not start with http:// or https://"
44
49
  end
45
50
 
46
51
  value
@@ -50,46 +55,57 @@ module OpenGraphReader
50
55
  # @param [Array<String>] allowed the list of allowed values
51
56
  # @!macro define_type_description
52
57
  # @see http://ogp.me/#enum
53
- define_type_no_doc :enum do |value, allowed|
58
+ define_type_no_doc :enum do |value, allowed, options|
59
+ value = value.to_s
60
+
54
61
  unless allowed.include? value
62
+ next unless options[:required] || !OpenGraphReader.config.discard_invalid_optional_properties
55
63
  raise InvalidObjectError, "Expected one of #{allowed.inspect} but was #{value.inspect}"
56
64
  end
57
65
 
58
- value.to_s
66
+ value
59
67
  end
60
68
 
61
69
  # @see http://ogp.me/#integer
62
- define_type :integer do |value|
70
+ define_type :integer do |value, options|
63
71
  begin
64
72
  Integer(value)
65
- rescue ArgumentError => e
73
+ rescue ArgumentError
74
+ next unless options[:required] || !OpenGraphReader.config.discard_invalid_optional_properties
66
75
  raise InvalidObjectError, "Integer expected, but was #{value.inspect}"
67
76
  end
68
77
  end
69
78
 
70
79
  # @see http://ogp.me/#datetime
71
- define_type :datetime do |value|
80
+ define_type :datetime do |value, options|
72
81
  begin
73
- DateTime.iso8601 value
74
- rescue ArgumentError => e
82
+ if OpenGraphReader.config.guess_datetime_format
83
+ DateTime.parse value
84
+ else
85
+ DateTime.iso8601 value
86
+ end
87
+ rescue ArgumentError
88
+ next unless options[:required] || !OpenGraphReader.config.discard_invalid_optional_properties
75
89
  raise InvalidObjectError, "ISO8601 datetime expected, but was #{value.inspect}"
76
90
  end
77
91
  end
78
92
 
79
93
  # @see http://ogp.me/#bool
80
- define_type :boolean do |value|
81
- {'true' => true, 'false' => false, '1' => true, '0' => false}[value].tap {|bool|
94
+ define_type :boolean do |value, options|
95
+ {"true" => true, "false" => false, "1" => true, "0" => false}[value].tap {|bool|
82
96
  if bool.nil?
97
+ next unless options[:required] || !OpenGraphReader.config.discard_invalid_optional_properties
83
98
  raise InvalidObjectError, "Boolean expected, but was #{value.inspect}"
84
99
  end
85
100
  }
86
101
  end
87
102
 
88
103
  # @see http://ogp.me/#float
89
- define_type :float do |value|
104
+ define_type :float do |value, options|
90
105
  begin
91
106
  Float(value)
92
- rescue ArgumentError => e
107
+ rescue ArgumentError
108
+ next unless options[:required] || !OpenGraphReader.config.discard_invalid_optional_properties
93
109
  raise InvalidObjectError, "Float expected, but was #{value.inspect}"
94
110
  end
95
111
  end
@@ -1,6 +1,6 @@
1
- require 'singleton'
2
- require 'forwardable'
3
- require 'set'
1
+ require "singleton"
2
+ require "forwardable"
3
+ require "set"
4
4
 
5
5
  module OpenGraphReader
6
6
  module Object
@@ -1,6 +1,6 @@
1
- require 'nokogiri'
1
+ require "nokogiri"
2
2
 
3
- require 'open_graph_reader/parser/graph'
3
+ require "open_graph_reader/parser/graph"
4
4
 
5
5
  module OpenGraphReader
6
6
  # Parse OpenGraph tags in a HTML document into a graph.
@@ -8,14 +8,14 @@ module OpenGraphReader
8
8
  # @api private
9
9
  class Parser
10
10
  # Some helper methods for Nokogiri
11
- XPathHelpers = Class.new do
11
+ module XPathHelpers
12
12
  # Helper to lowercase all given properties
13
- def ci_starts_with node_set, string
13
+ def self.ci_starts_with node_set, string
14
14
  node_set.select {|node|
15
15
  node.to_s.downcase.start_with? string.downcase
16
16
  }
17
17
  end
18
- end.new
18
+ end
19
19
 
20
20
  # Namespaces found in the passed documents head tag
21
21
  #
@@ -35,7 +35,7 @@ module OpenGraphReader
35
35
  # Whether there are any OpenGraph tags at all.
36
36
  #
37
37
  # @return [Bool]
38
- def has_tags?
38
+ def any_tags?
39
39
  !graph.empty?
40
40
  end
41
41
 
@@ -50,45 +50,46 @@ module OpenGraphReader
50
50
  #
51
51
  # @return [String]
52
52
  def title
53
- @doc.xpath('/html/head/title').first.text
53
+ @doc.xpath("/html/head/title").first.text
54
54
  end
55
55
 
56
56
  private
57
57
 
58
58
  def build_graph
59
59
  graph = Graph.new
60
- head = @doc.xpath('/html/head').first
60
+
61
+ meta_tags.each do |tag|
62
+ *path, leaf = tag["property"].downcase.split(":")
63
+ node = graph.find_or_create_path path
64
+
65
+ # @todo make stripping configurable?
66
+ node << Graph::Node.new(leaf, tag["content"].strip)
67
+ end
68
+
69
+ graph
70
+ end
71
+
72
+ def meta_tags
73
+ head = @doc.xpath("/html/head").first
61
74
 
62
75
  raise NoOpenGraphDataError, "There's no head tag in #{@doc}" unless head
63
76
 
77
+ head.xpath("meta[#{xpath_condition(head)}]", XPathHelpers)
78
+ end
79
+
80
+ def xpath_condition head
64
81
  condition = "ci_starts_with(@property, 'og:')"
65
- if head['prefix']
66
- @additional_namespaces = head['prefix'].scan(/(\w+):\s*([^ ]+)/)
82
+
83
+ if head["prefix"]
84
+ @additional_namespaces = head["prefix"].scan(/(\w+):\s*([^ ]+)/)
67
85
  @additional_namespaces.map! {|prefix, _| prefix.downcase }
68
86
  @additional_namespaces.each do |additional_namespace|
69
- next if additional_namespace == 'og'
87
+ next if additional_namespace == "og"
70
88
  condition << " or ci_starts_with(@property, '#{additional_namespace}')"
71
89
  end
72
90
  end
73
91
 
74
- head.xpath("meta[#{condition}]", XPathHelpers).each do |tag|
75
- *path, leaf = tag['property'].downcase.split(':')
76
- node = path.inject(graph.root) {|node, name|
77
- child = node.children.reverse.find {|child| child.name == name }
78
-
79
- unless child
80
- child = Graph::Node.new name
81
- node << child
82
- end
83
-
84
- child
85
- }
86
-
87
- # @todo make stripping configurable?
88
- node << Graph::Node.new(leaf, tag['content'].strip)
89
- end
90
-
91
- graph
92
+ condition
92
93
  end
93
94
 
94
95
  def to_doc html