richurls 0.3.0 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 7e1fcf6a779cb196b1b72fe63156af98b340fc8be86b2b78a4e233a479c47b9d
4
- data.tar.gz: 4dd73e4c5c52b1bdcbba13d612158f2827d8d770a67b0ac7229d3a331032b1a5
3
+ metadata.gz: 42cff335ffa60073310cdec6c1b16f2a1f1a4cfce0bde60835a76538a0f138aa
4
+ data.tar.gz: cdded0800498b21f24b5c89910da73fd5150492c4f9c7ccedcb5499d538b1423
5
5
  SHA512:
6
- metadata.gz: bf8186d4e4dc41447c9bc3a0adad8575f8893eebd64702703371f0ad612e9eab2ffd5eb06161d7c8248a0c21e25cddc2b48af3d13527f5a381bf08c5ba427f29
7
- data.tar.gz: 8cf6e808d2df2f97b9912d67b79396c9f11e541d94cb6f48e015028c630e2991003757c1aacf9036656befd2a1d31509fdc55fbce494d2991a3bef8211dc18cf
6
+ metadata.gz: a5633b6852996d4938f002e2e1e4a90e1b25037ace20ba5da9303abc4dc073efa9ba5819134080d6c467dbe905733226cd66554e99edcd9ab57fbfd49bdc4a57
7
+ data.tar.gz: '08bce91b2a1c2a301b3eb288883621f7f7d607e56e36765ee00d5d09a84348f281cff0e48f61e074c9c394d5478b9c7140ca9a4f02eb37778e129c3ec8ed6277'
data/.rubocop.yml CHANGED
@@ -27,6 +27,12 @@ Style/FrozenStringLiteralComment:
27
27
  Enabled: false
28
28
  Style/GuardClause:
29
29
  Enabled: false
30
+ Style/HashEachMethods:
31
+ Enabled: true
32
+ Style/HashTransformKeys:
33
+ Enabled: true
34
+ Style/HashTransformValues:
35
+ Enabled: true
30
36
  Naming/MemoizedInstanceVariableName:
31
37
  Enabled: false
32
38
  Style/RegexpLiteral:
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- richurls (0.3.0)
4
+ richurls (0.4.0)
5
5
  oj (~> 3)
6
6
  ox (~> 2)
7
7
  patron (~> 0.13)
@@ -2,26 +2,30 @@ require 'ox'
2
2
 
3
3
  require_relative 'xml_handler'
4
4
  require_relative 'url_helper'
5
- require_relative 'parsers/title_parser'
6
- require_relative 'parsers/description_parser'
7
- require_relative 'parsers/image_parser'
5
+ require_relative 'parsers/property'
6
+ require_relative 'parsers/url'
8
7
  require_relative 'parsers/embed_parser'
9
8
  require_relative 'parsers/provider_display_parser'
10
- require_relative 'parsers/favicon_parser'
11
9
 
12
10
  module RichUrls
13
11
  class BodyDecorator
14
12
  NoXMLError = Class.new(StandardError)
15
13
 
16
14
  PARSERS = {
17
- 'title' => Parsers::TitleParser,
18
- 'description' => Parsers::DescriptionParser,
19
- 'image' => Parsers::ImageParser,
15
+ 'title' => Parsers::Property,
16
+ 'description' => Parsers::Property,
17
+ 'image' => Parsers::Url,
18
+ 'favicon' => Parsers::Url,
20
19
  'provider_display' => Parsers::ProviderDisplayParser,
21
- 'favicon' => Parsers::FaviconParser,
22
20
  'embed' => Parsers::EmbedParser
23
21
  }.freeze
24
22
 
23
+ def self.decorate(url, body)
24
+ new(url, body).decorate
25
+ end
26
+
27
+ private_class_method :new
28
+
25
29
  def initialize(url, body)
26
30
  @url = url
27
31
  @xml = XMLHandler.new
@@ -37,7 +41,7 @@ module RichUrls
37
41
 
38
42
  def decorate
39
43
  PARSERS.each_with_object({}) do |(key, parser), object|
40
- object[key] = parser.call(@xml, @url)&.force_encoding('UTF-8')
44
+ object[key] = parser.call(@xml.properties[key], @url)
41
45
  end
42
46
  end
43
47
  end
data/lib/el.rb ADDED
@@ -0,0 +1,41 @@
1
+ module RichUrls
2
+ class El
3
+ MAX_TEXT_LENGTH = 1000
4
+
5
+ attr_reader :tag, :open, :attributes
6
+
7
+ def initialize(tag)
8
+ @tag = tag
9
+ @open = true
10
+ @attributes = {}
11
+ end
12
+
13
+ def add(key, value)
14
+ return if @attributes[key]
15
+
16
+ @attributes[key] = value
17
+ end
18
+
19
+ def append_text(str)
20
+ @attributes[:text] ||= ''
21
+
22
+ str = str.strip
23
+ length = @attributes[:text].length
24
+
25
+ if length <= MAX_TEXT_LENGTH
26
+ end_slice = MAX_TEXT_LENGTH - length
27
+ sliced = str[0...end_slice]
28
+
29
+ @attributes[:text] << sliced + ' '
30
+ end
31
+ end
32
+
33
+ def text
34
+ @attributes[:text].strip
35
+ end
36
+
37
+ def close!
38
+ @open = false
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,14 @@
1
+ module RichUrls
2
+ module Finders
3
+ module Favicon
4
+ KEYWORDS = ['shortcut icon', 'icon shortcut', 'icon'].freeze
5
+
6
+ def self.call(elem)
7
+ return unless elem.tag == :link &&
8
+ KEYWORDS.include?(elem.attributes[:rel])
9
+
10
+ elem.attributes[:href]
11
+ end
12
+ end
13
+ end
14
+ end
@@ -0,0 +1,12 @@
1
+ module RichUrls
2
+ module Finders
3
+ module MetaDescription
4
+ def self.call(elem)
5
+ return unless elem.tag == :meta &&
6
+ elem.attributes[:property] == 'og:description'
7
+
8
+ elem.attributes[:content]
9
+ end
10
+ end
11
+ end
12
+ end
@@ -0,0 +1,12 @@
1
+ module RichUrls
2
+ module Finders
3
+ module MetaImage
4
+ def self.call(elem)
5
+ return unless elem.tag == :meta &&
6
+ elem.attributes[:property] == 'og:image'
7
+
8
+ elem.attributes[:content]
9
+ end
10
+ end
11
+ end
12
+ end
@@ -0,0 +1,12 @@
1
+ module RichUrls
2
+ module Finders
3
+ module MetaTitle
4
+ def self.call(elem)
5
+ return unless elem.tag == :meta &&
6
+ elem.attributes[:property] == 'og:title'
7
+
8
+ elem.attributes[:content]
9
+ end
10
+ end
11
+ end
12
+ end
data/lib/finders.rb ADDED
@@ -0,0 +1,7 @@
1
+ module RichUrls
2
+ module Finders
3
+ Description = ->(elem) { elem.tag == :p && elem.text }
4
+ Image = ->(elem) { elem.tag == :img && elem.attributes[:src] }
5
+ Title = ->(elem) { elem.tag == :title && elem.text }
6
+ end
7
+ end
@@ -1,28 +1,30 @@
1
- module Parsers
2
- class EmbedParser
3
- require_relative 'embed_parsers/base'
4
- require_relative 'embed_parsers/youtube'
5
- require_relative 'embed_parsers/youtube_short'
6
- require_relative 'embed_parsers/paste'
7
- require_relative 'embed_parsers/spotify'
1
+ module RichUrls
2
+ module Parsers
3
+ class EmbedParser
4
+ require_relative 'embed_parsers/base'
5
+ require_relative 'embed_parsers/youtube'
6
+ require_relative 'embed_parsers/youtube_short'
7
+ require_relative 'embed_parsers/paste'
8
+ require_relative 'embed_parsers/spotify'
8
9
 
9
- PARSERS = [
10
- Youtube,
11
- YoutubeShort,
12
- Paste,
13
- Spotify
14
- ].freeze
10
+ PARSERS = [
11
+ Youtube,
12
+ YoutubeShort,
13
+ Paste,
14
+ Spotify
15
+ ].freeze
15
16
 
16
- def self.call(_, url)
17
- uri = URI(url)
17
+ def self.call(_, url)
18
+ uri = URI(url)
18
19
 
19
- PARSERS.each do |parser|
20
- embed_parser = parser.new(uri)
20
+ PARSERS.each do |parser|
21
+ embed_parser = parser.new(uri)
21
22
 
22
- return embed_parser.parse if embed_parser.match?
23
- end
23
+ return embed_parser.parse if embed_parser.match?
24
+ end
24
25
 
25
- nil
26
+ nil
27
+ end
26
28
  end
27
29
  end
28
30
  end
@@ -1,14 +1,16 @@
1
- module Parsers
2
- class EmbedParser
3
- class Base
4
- def initialize(uri)
5
- @uri = uri
6
- end
1
+ module RichUrls
2
+ module Parsers
3
+ class EmbedParser
4
+ class Base
5
+ def initialize(uri)
6
+ @uri = uri
7
+ end
7
8
 
8
- private
9
+ private
9
10
 
10
- def query
11
- @query ||= Hash[URI.decode_www_form(@uri.query)]
11
+ def query
12
+ @query ||= Hash[URI.decode_www_form(@uri.query)]
13
+ end
12
14
  end
13
15
  end
14
16
  end
@@ -1,18 +1,20 @@
1
- module Parsers
2
- class EmbedParser
3
- class Paste < Base
4
- IFRAME = '<iframe src="https://pasteapp.com/p/%s/embed?view=%s" '\
5
- 'width="480" height="480" scrolling="no" '\
6
- 'frameborder="0" allowfullscreen></iframe>'.freeze
1
+ module RichUrls
2
+ module Parsers
3
+ class EmbedParser
4
+ class Paste < Base
5
+ IFRAME = '<iframe src="https://pasteapp.com/p/%s/embed?view=%s" '\
6
+ 'width="480" height="480" scrolling="no" '\
7
+ 'frameborder="0" allowfullscreen></iframe>'.freeze
7
8
 
8
- def match?
9
- @uri.host == 'pasteapp.com' && @uri.path =~ /\/p\/[a-zA-Z0-9]+/
10
- end
9
+ def match?
10
+ @uri.host == 'pasteapp.com' && @uri.path =~ /\/p\/[a-zA-Z0-9]+/
11
+ end
11
12
 
12
- def parse
13
- path_id = @uri.path.sub(/\/p\//, '')
13
+ def parse
14
+ path_id = @uri.path.sub(/\/p\//, '')
14
15
 
15
- IFRAME % [path_id, query.fetch('view')]
16
+ IFRAME % [path_id, query.fetch('view')]
17
+ end
16
18
  end
17
19
  end
18
20
  end
@@ -1,30 +1,32 @@
1
- module Parsers
2
- class EmbedParser
3
- class Spotify < Base
4
- IFRAME = '<iframe src="https://open.spotify.com/embed/%s/%s" '\
5
- 'width="300" height="380" frameborder="0" '\
6
- 'allowtransparency="true" allow="encrypted-media">'\
7
- '</iframe>'.freeze
1
+ module RichUrls
2
+ module Parsers
3
+ class EmbedParser
4
+ class Spotify < Base
5
+ IFRAME = '<iframe src="https://open.spotify.com/embed/%s/%s" '\
6
+ 'width="300" height="380" frameborder="0" '\
7
+ 'allowtransparency="true" allow="encrypted-media">'\
8
+ '</iframe>'.freeze
8
9
 
9
- SCOPES = %w[
10
- album
11
- track
12
- playlist
13
- ].freeze
10
+ SCOPES = %w[
11
+ album
12
+ track
13
+ playlist
14
+ ].freeze
14
15
 
15
- def match?
16
- valid_path = SCOPES.any? do |path|
17
- @uri.path.start_with?("/#{path}")
18
- end
16
+ def match?
17
+ valid_path = SCOPES.any? do |path|
18
+ @uri.path.start_with?("/#{path}")
19
+ end
19
20
 
20
- @uri.host == 'open.spotify.com' && valid_path
21
- end
21
+ @uri.host == 'open.spotify.com' && valid_path
22
+ end
22
23
 
23
- def parse
24
- path = @uri.path
25
- path[0] = ''
24
+ def parse
25
+ path = @uri.path
26
+ path[0] = ''
26
27
 
27
- IFRAME % path.split('/')
28
+ IFRAME % path.split('/')
29
+ end
28
30
  end
29
31
  end
30
32
  end
@@ -1,20 +1,22 @@
1
- module Parsers
2
- class EmbedParser
3
- class Youtube < Base
4
- IFRAME = '<iframe width="560" height="315" '\
5
- 'src="https://www.youtube.com/embed/%s" frameborder="0" '\
6
- 'allow="accelerometer; autoplay; encrypted-media; '\
7
- 'gyroscope; picture-in-picture" allowfullscreen>'\
8
- '</iframe>'.freeze
1
+ module RichUrls
2
+ module Parsers
3
+ class EmbedParser
4
+ class Youtube < Base
5
+ IFRAME = '<iframe width="560" height="315" '\
6
+ 'src="https://www.youtube.com/embed/%s" frameborder="0" '\
7
+ 'allow="accelerometer; autoplay; encrypted-media; '\
8
+ 'gyroscope; picture-in-picture" allowfullscreen>'\
9
+ '</iframe>'.freeze
9
10
 
10
- def match?
11
- @uri.host == 'www.youtube.com' &&
12
- @uri.path == '/watch' &&
13
- query.key?('v')
14
- end
11
+ def match?
12
+ @uri.host == 'www.youtube.com' &&
13
+ @uri.path == '/watch' &&
14
+ query.key?('v')
15
+ end
15
16
 
16
- def parse
17
- IFRAME % query.fetch('v')
17
+ def parse
18
+ IFRAME % query.fetch('v')
19
+ end
18
20
  end
19
21
  end
20
22
  end
@@ -1,15 +1,17 @@
1
- module Parsers
2
- class EmbedParser
3
- class YoutubeShort < Base
4
- def match?
5
- @uri.host == 'youtu.be'
6
- end
1
+ module RichUrls
2
+ module Parsers
3
+ class EmbedParser
4
+ class YoutubeShort < Base
5
+ def match?
6
+ @uri.host == 'youtu.be'
7
+ end
7
8
 
8
- def parse
9
- path = @uri.path
10
- path[0] = ''
9
+ def parse
10
+ path = @uri.path
11
+ path[0] = ''
11
12
 
12
- Youtube::IFRAME % path
13
+ Youtube::IFRAME % path
14
+ end
13
15
  end
14
16
  end
15
17
  end
@@ -0,0 +1,5 @@
1
+ module RichUrls
2
+ module Parsers
3
+ Property = ->(property, _) { property&.force_encoding('UTF-8') }
4
+ end
5
+ end
@@ -1,3 +1,5 @@
1
- module Parsers
2
- ProviderDisplayParser = ->(_, url) { URI(url).host }
1
+ module RichUrls
2
+ module Parsers
3
+ ProviderDisplayParser = ->(_, url) { URI(url).host }
4
+ end
3
5
  end
@@ -0,0 +1,5 @@
1
+ module RichUrls
2
+ module Parsers
3
+ Url = ->(content, url) { UrlHelper.url_for(url, content) }
4
+ end
5
+ end
data/lib/url_fetcher.rb CHANGED
@@ -38,7 +38,7 @@ module RichUrls
38
38
  response = session.get(@url)
39
39
 
40
40
  if response.status < 400
41
- decorated = BodyDecorator.new(response.url, response.body).decorate
41
+ decorated = BodyDecorator.decorate(response.url, response.body)
42
42
  RichUrls.cache.set(digest, Oj.dump(decorated))
43
43
  decorated
44
44
  else
data/lib/xml_handler.rb CHANGED
@@ -1,12 +1,18 @@
1
+ require_relative 'el'
2
+ require_relative 'finders'
3
+ require_relative 'finders/meta_title'
4
+ require_relative 'finders/meta_description'
5
+ require_relative 'finders/meta_image'
6
+ require_relative 'finders/favicon'
7
+
1
8
  module RichUrls
2
9
  class XMLHandler < ::Ox::Sax
3
10
  WHITELISTED_EL_NAMES = %i[
4
- html
5
- head
6
11
  title
7
12
  meta
8
13
  link
9
14
  img
15
+ p
10
16
  ].freeze
11
17
 
12
18
  WHITELISTED_ATTRS = %i[
@@ -17,48 +23,97 @@ module RichUrls
17
23
  src
18
24
  ].freeze
19
25
 
26
+ FALLBACK_ELEMENTS = {
27
+ img: 'og:image',
28
+ p: 'og:description',
29
+ title: 'og:title'
30
+ }.freeze
31
+
32
+ FINDERS = {
33
+ Finders::MetaTitle => 'title',
34
+ Finders::MetaDescription => 'description',
35
+ Finders::MetaImage => 'image',
36
+ Finders::Favicon => 'favicon',
37
+ Finders::Title => 'title',
38
+ Finders::Description => 'description',
39
+ Finders::Image => 'image'
40
+ }.freeze
41
+
20
42
  StopParsingError = Class.new(StandardError)
21
- El = Struct.new(:name, :attributes)
22
43
 
23
- attr_accessor :elements
44
+ attr_reader :elements, :properties
24
45
 
25
46
  def initialize
26
47
  @elements = []
48
+ @counts = Set.new
49
+ @properties = {
50
+ 'title' => nil,
51
+ 'description' => nil,
52
+ 'image' => nil,
53
+ 'favicon' => nil
54
+ }
27
55
  end
28
56
 
29
- def find(name, attrs = {})
57
+ def find(tag, attrs = {})
30
58
  @elements.detect do |el|
31
59
  matching_attributes = attrs.all? { |k, v| el.attributes[k] == v }
32
60
 
33
- el.name == name && matching_attributes
61
+ el.tag == tag && matching_attributes
34
62
  end
35
63
  end
36
64
 
37
- def start_element(element_name)
38
- return unless WHITELISTED_EL_NAMES.include?(element_name)
65
+ def start_element(tag)
66
+ return unless WHITELISTED_EL_NAMES.include?(tag)
39
67
 
40
- @elements << El.new(element_name, {})
68
+ @elements << El.new(tag) if add_element?(tag)
41
69
  end
42
70
 
43
- def attr(name, str)
44
- el = @elements.last
71
+ def end_element(tag)
72
+ return unless WHITELISTED_EL_NAMES.include?(tag)
45
73
 
46
- return unless el && WHITELISTED_ATTRS.include?(name)
74
+ el = @elements.reverse_each.detect { |e| e.open && e.tag == tag }
75
+ return unless el
47
76
 
48
- el.attributes[name] = str
77
+ el.close!
78
+ find_element(el)
49
79
 
50
- raise StopParsingError if stop?(name, el)
80
+ raise StopParsingError if @properties.values.all?
51
81
  end
52
82
 
53
- def text(str)
83
+ def attr(key, value)
84
+ return unless WHITELISTED_ATTRS.include?(key)
85
+
54
86
  el = @elements.last
55
- el && el.attributes[:text].nil? && el.attributes[:text] = str
87
+ el&.add(key, value)
88
+ end
89
+
90
+ def text(str)
91
+ el = @elements.detect(&:open)
92
+ el&.append_text(str)
56
93
  end
57
94
 
58
95
  private
59
96
 
60
- def stop?(name, elem)
61
- name == WHITELISTED_ATTRS.last && elem.name == :img
97
+ def find_element(elem)
98
+ FINDERS.each_pair do |finder, attribute|
99
+ next if @properties[attribute]
100
+
101
+ content = finder.call(elem)
102
+
103
+ if content
104
+ @properties[attribute] = content
105
+ break
106
+ end
107
+ end
108
+ end
109
+
110
+ def add_element?(tag)
111
+ return true unless FALLBACK_ELEMENTS.keys.include?(tag)
112
+ return false if @counts.include?(tag)
113
+
114
+ @counts.add(tag)
115
+
116
+ !find(:meta, property: FALLBACK_ELEMENTS.fetch(tag))
62
117
  end
63
118
  end
64
119
  end
data/richurls.gemspec CHANGED
@@ -3,7 +3,7 @@ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
3
3
 
4
4
  Gem::Specification.new do |spec|
5
5
  spec.name = 'richurls'
6
- spec.version = '0.3.0'
6
+ spec.version = '0.4.0'
7
7
  spec.authors = ['grdw']
8
8
  spec.email = ['gerard@wetransfer.com']
9
9
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: richurls
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - grdw
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-03-17 00:00:00.000000000 Z
11
+ date: 2020-03-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: oj
@@ -124,17 +124,21 @@ files:
124
124
  - README.md
125
125
  - lib/body_decorator.rb
126
126
  - lib/cache.rb
127
- - lib/parsers/description_parser.rb
127
+ - lib/el.rb
128
+ - lib/finders.rb
129
+ - lib/finders/favicon.rb
130
+ - lib/finders/meta_description.rb
131
+ - lib/finders/meta_image.rb
132
+ - lib/finders/meta_title.rb
128
133
  - lib/parsers/embed_parser.rb
129
134
  - lib/parsers/embed_parsers/base.rb
130
135
  - lib/parsers/embed_parsers/paste.rb
131
136
  - lib/parsers/embed_parsers/spotify.rb
132
137
  - lib/parsers/embed_parsers/youtube.rb
133
138
  - lib/parsers/embed_parsers/youtube_short.rb
134
- - lib/parsers/favicon_parser.rb
135
- - lib/parsers/image_parser.rb
139
+ - lib/parsers/property.rb
136
140
  - lib/parsers/provider_display_parser.rb
137
- - lib/parsers/title_parser.rb
141
+ - lib/parsers/url.rb
138
142
  - lib/richurls.rb
139
143
  - lib/url_fetcher.rb
140
144
  - lib/url_helper.rb
@@ -1,7 +0,0 @@
1
- module Parsers
2
- DescriptionParser = lambda do |document, _|
3
- meta_el = document.find(:meta, property: 'og:description')
4
-
5
- meta_el && meta_el.attributes[:content]
6
- end
7
- end
@@ -1,18 +0,0 @@
1
- module Parsers
2
- module FaviconParser
3
- KEYWORDS = ['shortcut icon', 'icon shortcut', 'icon'].freeze
4
-
5
- def self.call(document, url)
6
- KEYWORDS.each do |rel|
7
- found_document = document.find(:link, rel: rel)
8
-
9
- if found_document
10
- @el = found_document
11
- break
12
- end
13
- end
14
-
15
- @el && UrlHelper.url_for(url, @el.attributes[:href])
16
- end
17
- end
18
- end
@@ -1,14 +0,0 @@
1
- module Parsers
2
- class ImageParser
3
- def self.call(document, url)
4
- meta_image = document.find(:meta, property: 'og:image')
5
- image_tag = document.find(:img)
6
-
7
- image_source =
8
- (meta_image && meta_image.attributes[:content]) ||
9
- (image_tag && image_tag.attributes[:src])
10
-
11
- image_source && UrlHelper.url_for(url, image_source)
12
- end
13
- end
14
- end
@@ -1,9 +0,0 @@
1
- module Parsers
2
- TitleParser = lambda do |document, _|
3
- meta_el = document.find(:meta, property: 'og:title')
4
- title_el = document.find(:title)
5
-
6
- meta_el && meta_el.attributes[:content] ||
7
- title_el && title_el.attributes[:text]
8
- end
9
- end