open_graph_reader 0.4.0 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,14 +1,14 @@
1
- require 'faraday'
1
+ require "faraday"
2
2
 
3
3
  begin
4
- require 'faraday_middleware/response/follow_redirects'
4
+ require "faraday_middleware/response/follow_redirects"
5
5
  rescue LoadError; end
6
6
 
7
7
  begin
8
- require 'faraday/cookie_jar'
8
+ require "faraday/cookie_jar"
9
9
  rescue LoadError; end
10
10
 
11
- require 'open_graph_reader/version'
11
+ require "open_graph_reader/version"
12
12
 
13
13
  module OpenGraphReader
14
14
  # Fetch an URI to retrieve its HTML body, if available.
@@ -16,8 +16,8 @@ module OpenGraphReader
16
16
  # @api private
17
17
  class Fetcher
18
18
  HEADERS = {
19
- 'Accept' => 'text/html',
20
- 'User-Agent' => "OpenGraphReader/#{OpenGraphReader::VERSION} (+https://github.com/jhass/open_graph_reader)"
19
+ "Accept" => "text/html",
20
+ "User-Agent" => "OpenGraphReader/#{OpenGraphReader::VERSION} (+https://github.com/jhass/open_graph_reader)"
21
21
  }.freeze
22
22
 
23
23
  # Create a new fetcher.
@@ -29,13 +29,8 @@ module OpenGraphReader
29
29
  @connection = Faraday.default_connection.dup
30
30
  @connection.headers.replace(HEADERS)
31
31
 
32
- if defined? Faraday::CookieJar
33
- prepend_middleware Faraday::CookieJar
34
- end
35
-
36
- if defined? FaradayMiddleware
37
- prepend_middleware FaradayMiddleware::FollowRedirects
38
- end
32
+ prepend_middleware Faraday::CookieJar if defined? Faraday::CookieJar
33
+ prepend_middleware FaradayMiddleware::FollowRedirects if defined? FaradayMiddleware
39
34
  end
40
35
 
41
36
  # The URL to fetch
@@ -81,8 +76,8 @@ module OpenGraphReader
81
76
  response = @get_response || @head_response
82
77
  return false unless response
83
78
  return false unless response.success?
84
- return false unless response['content-type']
85
- response['content-type'].include? 'text/html'
79
+ return false unless response["content-type"]
80
+ response["content-type"].include? "text/html"
86
81
  end
87
82
 
88
83
  # Whether the target URI was fetched.
@@ -103,9 +98,9 @@ module OpenGraphReader
103
98
  private
104
99
 
105
100
  def prepend_middleware middleware
106
- unless @connection.builder.handlers.include? middleware
107
- @connection.builder.insert(0, middleware)
108
- end
101
+ return if @connection.builder.handlers.include? middleware
102
+
103
+ @connection.builder.insert(0, middleware)
109
104
  end
110
105
  end
111
106
  end
@@ -1,9 +1,8 @@
1
- require 'open_graph_reader/object/registry'
2
- require 'open_graph_reader/object/dsl'
3
- require 'open_graph_reader/object/dsl/types'
1
+ require "open_graph_reader/object/registry"
2
+ require "open_graph_reader/object/dsl"
3
+ require "open_graph_reader/object/dsl/types"
4
4
 
5
5
  module OpenGraphReader
6
-
7
6
  # This module provides the base functionality for all OpenGraph objects
8
7
  # and makes the {DSL} methods for describing them available when included.
9
8
  #
@@ -37,7 +36,6 @@ module OpenGraphReader
37
36
  # @return [{String => Array<String, Object>}]
38
37
  attr_reader :children
39
38
 
40
-
41
39
  # Create a new object. If your class overrides this don't forget to call <tt>super</tt>.
42
40
  def initialize
43
41
  @properties = {}
@@ -48,7 +46,7 @@ module OpenGraphReader
48
46
  #
49
47
  # @param [#to_s] name
50
48
  # @return [Bool]
51
- def has_property? name
49
+ def property? name
52
50
  self.class.available_properties.include? name.to_s
53
51
  end
54
52
 
@@ -70,8 +68,8 @@ module OpenGraphReader
70
68
  # @raise [UndefinedPropertyError] If the requested property is undefined.
71
69
  # @return [String, Object]
72
70
  def [] name
73
- raise UndefinedPropertyError, "Undefined property #{name} on #{inspect}" unless has_property? name
74
- public_send name.to_s #properties[name.to_s]
71
+ raise UndefinedPropertyError, "Undefined property #{name} on #{inspect}" unless property? name
72
+ public_send name.to_s
75
73
  end
76
74
 
77
75
  # Set the property to the given value.
@@ -81,7 +79,7 @@ module OpenGraphReader
81
79
  # @param [String, Object] value
82
80
  # @raise [UndefinedPropertyError] If the requested property is undefined.
83
81
  def []= name, value
84
- if has_property?(name)
82
+ if property?(name)
85
83
  public_send "#{name}=", value
86
84
  elsif OpenGraphReader.config.strict
87
85
  raise UndefinedPropertyError, "Undefined property #{name} on #{inspect}"
@@ -1,4 +1,4 @@
1
- require 'open_graph_reader/object/registry'
1
+ require "open_graph_reader/object/registry"
2
2
 
3
3
  module OpenGraphReader
4
4
  module Object
@@ -36,49 +36,72 @@ module OpenGraphReader
36
36
  options = args.pop if args.last.is_a? Hash
37
37
  options ||= {}
38
38
 
39
- available_properties << name.to_s
40
- required_properties << name.to_s if options[:required]
41
- Registry.register [@namespace, name].join(':'), options[:to] if options[:to]
42
-
43
- if options[:verticals]
44
- options[:verticals].each do |vertical|
45
- vertical = [@namespace, vertical].join('.')
46
- verticals[vertical] << name.to_s
47
- Registry.verticals << vertical
48
- end
49
- end
39
+ register_property name, options
40
+ register_verticals name, options[:verticals]
50
41
 
51
42
  if options[:collection]
52
- define_method("#{name}s") do
53
- children[name.to_s]
54
- end
55
-
56
- define_method(name) do
57
- value = children[name.to_s].first
58
- # @todo figure out a sane way to distinguish subobject properties
59
- value.content if value && value.is_a?(Object)
60
- value || options[:default]
61
- end
43
+ define_collection name, options
62
44
  else
63
- define_method(name) do
64
- properties[name.to_s] || options[:default]
65
- end
66
-
67
- define_method("#{name}=") do |value|
68
- # @todo figure out a sane way to distinguish subobject properties
69
- unless value.is_a? Object
70
- value.downcase! if options[:downcase]
71
- value = processor.call(value, *args, options)
72
- end
73
- properties[name.to_s] = value
74
- end
45
+ define_single name, options, args, processor
75
46
  end
76
47
  end
77
48
  end
78
49
 
50
+ # @api private
51
+ def register_property name, options
52
+ available_properties << name.to_s
53
+ required_properties << name.to_s if options[:required]
54
+ Registry.register [namespace, name].join(":"), options[:to] if options[:to]
55
+ end
56
+
57
+ # @api private
58
+ def register_verticals name, assigned_verticals
59
+ [*assigned_verticals].each do |vertical|
60
+ vertical = [namespace, vertical].join(".")
61
+ verticals[vertical] << name.to_s
62
+ Registry.verticals << vertical
63
+ end
64
+ end
65
+
66
+ # @api private
67
+ def define_collection name, options
68
+ define_method("#{name}s") do
69
+ children[name.to_s]
70
+ end
71
+
72
+ define_method(name) do
73
+ value = children[name.to_s].first
74
+ # @todo figure out a sane way to distinguish subobject properties
75
+ value.content if value && value.is_a?(Object)
76
+ value || options[:default]
77
+ end
78
+ end
79
+
80
+ # @api private
81
+ def define_single name, options, args, processor
82
+ define_method(name) do
83
+ properties[name.to_s] || options[:default]
84
+ end
85
+
86
+ define_method("#{name}=") do |value|
87
+ # @todo figure out a sane way to distinguish subobject properties
88
+ unless value.is_a? Object
89
+ value.downcase! if options[:downcase]
90
+ value = processor.call(value, *args, options)
91
+ end
92
+ properties[name.to_s] = value
93
+ end
94
+ end
95
+
79
96
  # Alias to trick YARD
80
97
  singleton_class.send(:alias_method, :define_type_no_doc, :define_type)
81
98
 
99
+ # The processor for the content attribute.
100
+ #
101
+ # @api private
102
+ # @return [Proc]
103
+ attr_reader :content_processor
104
+
82
105
  # @overload namespace
83
106
  # Get the namespace of this object.
84
107
  #
@@ -91,7 +114,7 @@ module OpenGraphReader
91
114
  # namespace :og, :image
92
115
  def namespace *names
93
116
  return @namespace if names.empty?
94
- @namespace = names.join(':')
117
+ @namespace = names.join(":")
95
118
  Registry.register @namespace, self
96
119
  end
97
120
 
@@ -136,14 +159,6 @@ module OpenGraphReader
136
159
  @processors ||= {}
137
160
  end
138
161
 
139
- # The processor for the content attribute.
140
- #
141
- # @api private
142
- # @return [Proc]
143
- def content_processor
144
- @content_processor
145
- end
146
-
147
162
  # A map from vertical names to attributes that belong to them.
148
163
  #
149
164
  # @api private
@@ -1,7 +1,7 @@
1
- require 'date'
2
- require 'uri'
1
+ require "date"
2
+ require "uri"
3
3
 
4
- require 'open_graph_reader/object/dsl'
4
+ require "open_graph_reader/object/dsl"
5
5
 
6
6
  module OpenGraphReader
7
7
  module Object
@@ -19,28 +19,33 @@ module OpenGraphReader
19
19
  define_type_no_doc :url do |value, options|
20
20
  value = value.to_s
21
21
 
22
- unless value.start_with?('http://') || value.start_with?('https://')
23
- if options[:image] && OpenGraphReader.config.synthesize_image_url
24
- unless OpenGraphReader.current_origin
25
- raise ArgumentError, "Enabled image url synthesization but didn't pass an origin"
26
- end
27
-
28
- # Synthesize scheme hack to https (//example.org/foo/bar.png)
29
- if value.start_with?('//') && value.split('/', 4)[2] =~ URI::HOST
30
- value = "https:#{value}"
31
- else # Synthesize absolute path (/foo/bar.png)
32
- begin
33
- value = "/#{value}" unless value.start_with? '/' # Normalize to absolute path
34
- uri = URI.parse(OpenGraphReader.current_origin)
35
- uri.path = value
36
- value = uri.to_s
37
- rescue
38
- raise InvalidObjectError, "URL #{value.inspect} does not start with http:// or https:// and failed to synthesize a full URL"
39
- end
40
- end
41
- elsif options.has_key?(:to) && OpenGraphReader.config.validate_references
42
- raise InvalidObjectError, "URL #{value.inspect} does not start with http:// or https://"
22
+ next value if value.start_with?("http://") || value.start_with?("https://")
23
+
24
+ if options[:image] && OpenGraphReader.config.synthesize_image_url || OpenGraphReader.config.synthesize_url
25
+ unless OpenGraphReader.current_origin
26
+ next unless options[:required] || !OpenGraphReader.config.discard_invalid_optional_properties
27
+
28
+ raise ArgumentError, "Enabled image url synthesization but didn't pass an origin"
29
+ end
30
+
31
+ # Synthesize scheme hack to https (//example.org/foo/bar.png)
32
+ next "https:#{value}" if value.start_with?("//") && value.split("/", 4)[2] =~ URI::HOST
33
+
34
+ # Synthesize absolute path (/foo/bar.png)
35
+ begin
36
+ value = "/#{value}" unless value.start_with? "/" # Normalize to absolute path
37
+ uri = URI.parse(OpenGraphReader.current_origin)
38
+ uri.path = value
39
+ value = uri.to_s
40
+ rescue
41
+ next unless options[:required] || !OpenGraphReader.config.discard_invalid_optional_properties
42
+ raise InvalidObjectError,
43
+ "URL #{value.inspect} does not start with http:// or https:// and failed to "\
44
+ "synthesize a full URL"
43
45
  end
46
+ elsif options.has_key?(:to) && OpenGraphReader.config.validate_references
47
+ next unless options[:required] || !OpenGraphReader.config.discard_invalid_optional_properties
48
+ raise InvalidObjectError, "URL #{value.inspect} does not start with http:// or https://"
44
49
  end
45
50
 
46
51
  value
@@ -50,46 +55,57 @@ module OpenGraphReader
50
55
  # @param [Array<String>] allowed the list of allowed values
51
56
  # @!macro define_type_description
52
57
  # @see http://ogp.me/#enum
53
- define_type_no_doc :enum do |value, allowed|
58
+ define_type_no_doc :enum do |value, allowed, options|
59
+ value = value.to_s
60
+
54
61
  unless allowed.include? value
62
+ next unless options[:required] || !OpenGraphReader.config.discard_invalid_optional_properties
55
63
  raise InvalidObjectError, "Expected one of #{allowed.inspect} but was #{value.inspect}"
56
64
  end
57
65
 
58
- value.to_s
66
+ value
59
67
  end
60
68
 
61
69
  # @see http://ogp.me/#integer
62
- define_type :integer do |value|
70
+ define_type :integer do |value, options|
63
71
  begin
64
72
  Integer(value)
65
- rescue ArgumentError => e
73
+ rescue ArgumentError
74
+ next unless options[:required] || !OpenGraphReader.config.discard_invalid_optional_properties
66
75
  raise InvalidObjectError, "Integer expected, but was #{value.inspect}"
67
76
  end
68
77
  end
69
78
 
70
79
  # @see http://ogp.me/#datetime
71
- define_type :datetime do |value|
80
+ define_type :datetime do |value, options|
72
81
  begin
73
- DateTime.iso8601 value
74
- rescue ArgumentError => e
82
+ if OpenGraphReader.config.guess_datetime_format
83
+ DateTime.parse value
84
+ else
85
+ DateTime.iso8601 value
86
+ end
87
+ rescue ArgumentError
88
+ next unless options[:required] || !OpenGraphReader.config.discard_invalid_optional_properties
75
89
  raise InvalidObjectError, "ISO8601 datetime expected, but was #{value.inspect}"
76
90
  end
77
91
  end
78
92
 
79
93
  # @see http://ogp.me/#bool
80
- define_type :boolean do |value|
81
- {'true' => true, 'false' => false, '1' => true, '0' => false}[value].tap {|bool|
94
+ define_type :boolean do |value, options|
95
+ {"true" => true, "false" => false, "1" => true, "0" => false}[value].tap {|bool|
82
96
  if bool.nil?
97
+ next unless options[:required] || !OpenGraphReader.config.discard_invalid_optional_properties
83
98
  raise InvalidObjectError, "Boolean expected, but was #{value.inspect}"
84
99
  end
85
100
  }
86
101
  end
87
102
 
88
103
  # @see http://ogp.me/#float
89
- define_type :float do |value|
104
+ define_type :float do |value, options|
90
105
  begin
91
106
  Float(value)
92
- rescue ArgumentError => e
107
+ rescue ArgumentError
108
+ next unless options[:required] || !OpenGraphReader.config.discard_invalid_optional_properties
93
109
  raise InvalidObjectError, "Float expected, but was #{value.inspect}"
94
110
  end
95
111
  end
@@ -1,6 +1,6 @@
1
- require 'singleton'
2
- require 'forwardable'
3
- require 'set'
1
+ require "singleton"
2
+ require "forwardable"
3
+ require "set"
4
4
 
5
5
  module OpenGraphReader
6
6
  module Object
@@ -1,6 +1,6 @@
1
- require 'nokogiri'
1
+ require "nokogiri"
2
2
 
3
- require 'open_graph_reader/parser/graph'
3
+ require "open_graph_reader/parser/graph"
4
4
 
5
5
  module OpenGraphReader
6
6
  # Parse OpenGraph tags in a HTML document into a graph.
@@ -8,14 +8,14 @@ module OpenGraphReader
8
8
  # @api private
9
9
  class Parser
10
10
  # Some helper methods for Nokogiri
11
- XPathHelpers = Class.new do
11
+ module XPathHelpers
12
12
  # Helper to lowercase all given properties
13
- def ci_starts_with node_set, string
13
+ def self.ci_starts_with node_set, string
14
14
  node_set.select {|node|
15
15
  node.to_s.downcase.start_with? string.downcase
16
16
  }
17
17
  end
18
- end.new
18
+ end
19
19
 
20
20
  # Namespaces found in the passed documents head tag
21
21
  #
@@ -35,7 +35,7 @@ module OpenGraphReader
35
35
  # Whether there are any OpenGraph tags at all.
36
36
  #
37
37
  # @return [Bool]
38
- def has_tags?
38
+ def any_tags?
39
39
  !graph.empty?
40
40
  end
41
41
 
@@ -50,45 +50,46 @@ module OpenGraphReader
50
50
  #
51
51
  # @return [String]
52
52
  def title
53
- @doc.xpath('/html/head/title').first.text
53
+ @doc.xpath("/html/head/title").first.text
54
54
  end
55
55
 
56
56
  private
57
57
 
58
58
  def build_graph
59
59
  graph = Graph.new
60
- head = @doc.xpath('/html/head').first
60
+
61
+ meta_tags.each do |tag|
62
+ *path, leaf = tag["property"].downcase.split(":")
63
+ node = graph.find_or_create_path path
64
+
65
+ # @todo make stripping configurable?
66
+ node << Graph::Node.new(leaf, tag["content"].strip)
67
+ end
68
+
69
+ graph
70
+ end
71
+
72
+ def meta_tags
73
+ head = @doc.xpath("/html/head").first
61
74
 
62
75
  raise NoOpenGraphDataError, "There's no head tag in #{@doc}" unless head
63
76
 
77
+ head.xpath("meta[#{xpath_condition(head)}]", XPathHelpers)
78
+ end
79
+
80
+ def xpath_condition head
64
81
  condition = "ci_starts_with(@property, 'og:')"
65
- if head['prefix']
66
- @additional_namespaces = head['prefix'].scan(/(\w+):\s*([^ ]+)/)
82
+
83
+ if head["prefix"]
84
+ @additional_namespaces = head["prefix"].scan(/(\w+):\s*([^ ]+)/)
67
85
  @additional_namespaces.map! {|prefix, _| prefix.downcase }
68
86
  @additional_namespaces.each do |additional_namespace|
69
- next if additional_namespace == 'og'
87
+ next if additional_namespace == "og"
70
88
  condition << " or ci_starts_with(@property, '#{additional_namespace}')"
71
89
  end
72
90
  end
73
91
 
74
- head.xpath("meta[#{condition}]", XPathHelpers).each do |tag|
75
- *path, leaf = tag['property'].downcase.split(':')
76
- node = path.inject(graph.root) {|node, name|
77
- child = node.children.reverse.find {|child| child.name == name }
78
-
79
- unless child
80
- child = Graph::Node.new name
81
- node << child
82
- end
83
-
84
- child
85
- }
86
-
87
- # @todo make stripping configurable?
88
- node << Graph::Node.new(leaf, tag['content'].strip)
89
- end
90
-
91
- graph
92
+ condition
92
93
  end
93
94
 
94
95
  def to_doc html