richurls 0.3.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +6 -0
- data/Gemfile.lock +1 -1
- data/lib/body_decorator.rb +13 -9
- data/lib/el.rb +41 -0
- data/lib/finders/favicon.rb +14 -0
- data/lib/finders/meta_description.rb +12 -0
- data/lib/finders/meta_image.rb +12 -0
- data/lib/finders/meta_title.rb +12 -0
- data/lib/finders.rb +7 -0
- data/lib/parsers/embed_parser.rb +22 -20
- data/lib/parsers/embed_parsers/base.rb +11 -9
- data/lib/parsers/embed_parsers/paste.rb +14 -12
- data/lib/parsers/embed_parsers/spotify.rb +24 -22
- data/lib/parsers/embed_parsers/youtube.rb +17 -15
- data/lib/parsers/embed_parsers/youtube_short.rb +12 -10
- data/lib/parsers/property.rb +5 -0
- data/lib/parsers/provider_display_parser.rb +4 -2
- data/lib/parsers/url.rb +5 -0
- data/lib/url_fetcher.rb +1 -1
- data/lib/xml_handler.rb +73 -18
- data/richurls.gemspec +1 -1
- metadata +10 -6
- data/lib/parsers/description_parser.rb +0 -7
- data/lib/parsers/favicon_parser.rb +0 -18
- data/lib/parsers/image_parser.rb +0 -14
- data/lib/parsers/title_parser.rb +0 -9
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 42cff335ffa60073310cdec6c1b16f2a1f1a4cfce0bde60835a76538a0f138aa
|
4
|
+
data.tar.gz: cdded0800498b21f24b5c89910da73fd5150492c4f9c7ccedcb5499d538b1423
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a5633b6852996d4938f002e2e1e4a90e1b25037ace20ba5da9303abc4dc073efa9ba5819134080d6c467dbe905733226cd66554e99edcd9ab57fbfd49bdc4a57
|
7
|
+
data.tar.gz: '08bce91b2a1c2a301b3eb288883621f7f7d607e56e36765ee00d5d09a84348f281cff0e48f61e074c9c394d5478b9c7140ca9a4f02eb37778e129c3ec8ed6277'
|
data/.rubocop.yml
CHANGED
@@ -27,6 +27,12 @@ Style/FrozenStringLiteralComment:
|
|
27
27
|
Enabled: false
|
28
28
|
Style/GuardClause:
|
29
29
|
Enabled: false
|
30
|
+
Style/HashEachMethods:
|
31
|
+
Enabled: true
|
32
|
+
Style/HashTransformKeys:
|
33
|
+
Enabled: true
|
34
|
+
Style/HashTransformValues:
|
35
|
+
Enabled: true
|
30
36
|
Naming/MemoizedInstanceVariableName:
|
31
37
|
Enabled: false
|
32
38
|
Style/RegexpLiteral:
|
data/Gemfile.lock
CHANGED
data/lib/body_decorator.rb
CHANGED
@@ -2,26 +2,30 @@ require 'ox'
|
|
2
2
|
|
3
3
|
require_relative 'xml_handler'
|
4
4
|
require_relative 'url_helper'
|
5
|
-
require_relative 'parsers/
|
6
|
-
require_relative 'parsers/
|
7
|
-
require_relative 'parsers/image_parser'
|
5
|
+
require_relative 'parsers/property'
|
6
|
+
require_relative 'parsers/url'
|
8
7
|
require_relative 'parsers/embed_parser'
|
9
8
|
require_relative 'parsers/provider_display_parser'
|
10
|
-
require_relative 'parsers/favicon_parser'
|
11
9
|
|
12
10
|
module RichUrls
|
13
11
|
class BodyDecorator
|
14
12
|
NoXMLError = Class.new(StandardError)
|
15
13
|
|
16
14
|
PARSERS = {
|
17
|
-
'title' => Parsers::
|
18
|
-
'description' => Parsers::
|
19
|
-
'image' => Parsers::
|
15
|
+
'title' => Parsers::Property,
|
16
|
+
'description' => Parsers::Property,
|
17
|
+
'image' => Parsers::Url,
|
18
|
+
'favicon' => Parsers::Url,
|
20
19
|
'provider_display' => Parsers::ProviderDisplayParser,
|
21
|
-
'favicon' => Parsers::FaviconParser,
|
22
20
|
'embed' => Parsers::EmbedParser
|
23
21
|
}.freeze
|
24
22
|
|
23
|
+
def self.decorate(url, body)
|
24
|
+
new(url, body).decorate
|
25
|
+
end
|
26
|
+
|
27
|
+
private_class_method :new
|
28
|
+
|
25
29
|
def initialize(url, body)
|
26
30
|
@url = url
|
27
31
|
@xml = XMLHandler.new
|
@@ -37,7 +41,7 @@ module RichUrls
|
|
37
41
|
|
38
42
|
def decorate
|
39
43
|
PARSERS.each_with_object({}) do |(key, parser), object|
|
40
|
-
object[key] = parser.call(@xml, @url)
|
44
|
+
object[key] = parser.call(@xml.properties[key], @url)
|
41
45
|
end
|
42
46
|
end
|
43
47
|
end
|
data/lib/el.rb
ADDED
@@ -0,0 +1,41 @@
|
|
1
|
+
module RichUrls
|
2
|
+
class El
|
3
|
+
MAX_TEXT_LENGTH = 1000
|
4
|
+
|
5
|
+
attr_reader :tag, :open, :attributes
|
6
|
+
|
7
|
+
def initialize(tag)
|
8
|
+
@tag = tag
|
9
|
+
@open = true
|
10
|
+
@attributes = {}
|
11
|
+
end
|
12
|
+
|
13
|
+
def add(key, value)
|
14
|
+
return if @attributes[key]
|
15
|
+
|
16
|
+
@attributes[key] = value
|
17
|
+
end
|
18
|
+
|
19
|
+
def append_text(str)
|
20
|
+
@attributes[:text] ||= ''
|
21
|
+
|
22
|
+
str = str.strip
|
23
|
+
length = @attributes[:text].length
|
24
|
+
|
25
|
+
if length <= MAX_TEXT_LENGTH
|
26
|
+
end_slice = MAX_TEXT_LENGTH - length
|
27
|
+
sliced = str[0...end_slice]
|
28
|
+
|
29
|
+
@attributes[:text] << sliced + ' '
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def text
|
34
|
+
@attributes[:text].strip
|
35
|
+
end
|
36
|
+
|
37
|
+
def close!
|
38
|
+
@open = false
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
@@ -0,0 +1,14 @@
|
|
1
|
+
module RichUrls
|
2
|
+
module Finders
|
3
|
+
module Favicon
|
4
|
+
KEYWORDS = ['shortcut icon', 'icon shortcut', 'icon'].freeze
|
5
|
+
|
6
|
+
def self.call(elem)
|
7
|
+
return unless elem.tag == :link &&
|
8
|
+
KEYWORDS.include?(elem.attributes[:rel])
|
9
|
+
|
10
|
+
elem.attributes[:href]
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
data/lib/finders.rb
ADDED
data/lib/parsers/embed_parser.rb
CHANGED
@@ -1,28 +1,30 @@
|
|
1
|
-
module
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
1
|
+
module RichUrls
|
2
|
+
module Parsers
|
3
|
+
class EmbedParser
|
4
|
+
require_relative 'embed_parsers/base'
|
5
|
+
require_relative 'embed_parsers/youtube'
|
6
|
+
require_relative 'embed_parsers/youtube_short'
|
7
|
+
require_relative 'embed_parsers/paste'
|
8
|
+
require_relative 'embed_parsers/spotify'
|
8
9
|
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
10
|
+
PARSERS = [
|
11
|
+
Youtube,
|
12
|
+
YoutubeShort,
|
13
|
+
Paste,
|
14
|
+
Spotify
|
15
|
+
].freeze
|
15
16
|
|
16
|
-
|
17
|
-
|
17
|
+
def self.call(_, url)
|
18
|
+
uri = URI(url)
|
18
19
|
|
19
|
-
|
20
|
-
|
20
|
+
PARSERS.each do |parser|
|
21
|
+
embed_parser = parser.new(uri)
|
21
22
|
|
22
|
-
|
23
|
-
|
23
|
+
return embed_parser.parse if embed_parser.match?
|
24
|
+
end
|
24
25
|
|
25
|
-
|
26
|
+
nil
|
27
|
+
end
|
26
28
|
end
|
27
29
|
end
|
28
30
|
end
|
@@ -1,14 +1,16 @@
|
|
1
|
-
module
|
2
|
-
|
3
|
-
class
|
4
|
-
|
5
|
-
|
6
|
-
|
1
|
+
module RichUrls
|
2
|
+
module Parsers
|
3
|
+
class EmbedParser
|
4
|
+
class Base
|
5
|
+
def initialize(uri)
|
6
|
+
@uri = uri
|
7
|
+
end
|
7
8
|
|
8
|
-
|
9
|
+
private
|
9
10
|
|
10
|
-
|
11
|
-
|
11
|
+
def query
|
12
|
+
@query ||= Hash[URI.decode_www_form(@uri.query)]
|
13
|
+
end
|
12
14
|
end
|
13
15
|
end
|
14
16
|
end
|
@@ -1,18 +1,20 @@
|
|
1
|
-
module
|
2
|
-
|
3
|
-
class
|
4
|
-
|
5
|
-
|
6
|
-
|
1
|
+
module RichUrls
|
2
|
+
module Parsers
|
3
|
+
class EmbedParser
|
4
|
+
class Paste < Base
|
5
|
+
IFRAME = '<iframe src="https://pasteapp.com/p/%s/embed?view=%s" '\
|
6
|
+
'width="480" height="480" scrolling="no" '\
|
7
|
+
'frameborder="0" allowfullscreen></iframe>'.freeze
|
7
8
|
|
8
|
-
|
9
|
-
|
10
|
-
|
9
|
+
def match?
|
10
|
+
@uri.host == 'pasteapp.com' && @uri.path =~ /\/p\/[a-zA-Z0-9]+/
|
11
|
+
end
|
11
12
|
|
12
|
-
|
13
|
-
|
13
|
+
def parse
|
14
|
+
path_id = @uri.path.sub(/\/p\//, '')
|
14
15
|
|
15
|
-
|
16
|
+
IFRAME % [path_id, query.fetch('view')]
|
17
|
+
end
|
16
18
|
end
|
17
19
|
end
|
18
20
|
end
|
@@ -1,30 +1,32 @@
|
|
1
|
-
module
|
2
|
-
|
3
|
-
class
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
1
|
+
module RichUrls
|
2
|
+
module Parsers
|
3
|
+
class EmbedParser
|
4
|
+
class Spotify < Base
|
5
|
+
IFRAME = '<iframe src="https://open.spotify.com/embed/%s/%s" '\
|
6
|
+
'width="300" height="380" frameborder="0" '\
|
7
|
+
'allowtransparency="true" allow="encrypted-media">'\
|
8
|
+
'</iframe>'.freeze
|
8
9
|
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
10
|
+
SCOPES = %w[
|
11
|
+
album
|
12
|
+
track
|
13
|
+
playlist
|
14
|
+
].freeze
|
14
15
|
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
16
|
+
def match?
|
17
|
+
valid_path = SCOPES.any? do |path|
|
18
|
+
@uri.path.start_with?("/#{path}")
|
19
|
+
end
|
19
20
|
|
20
|
-
|
21
|
-
|
21
|
+
@uri.host == 'open.spotify.com' && valid_path
|
22
|
+
end
|
22
23
|
|
23
|
-
|
24
|
-
|
25
|
-
|
24
|
+
def parse
|
25
|
+
path = @uri.path
|
26
|
+
path[0] = ''
|
26
27
|
|
27
|
-
|
28
|
+
IFRAME % path.split('/')
|
29
|
+
end
|
28
30
|
end
|
29
31
|
end
|
30
32
|
end
|
@@ -1,20 +1,22 @@
|
|
1
|
-
module
|
2
|
-
|
3
|
-
class
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
1
|
+
module RichUrls
|
2
|
+
module Parsers
|
3
|
+
class EmbedParser
|
4
|
+
class Youtube < Base
|
5
|
+
IFRAME = '<iframe width="560" height="315" '\
|
6
|
+
'src="https://www.youtube.com/embed/%s" frameborder="0" '\
|
7
|
+
'allow="accelerometer; autoplay; encrypted-media; '\
|
8
|
+
'gyroscope; picture-in-picture" allowfullscreen>'\
|
9
|
+
'</iframe>'.freeze
|
9
10
|
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
11
|
+
def match?
|
12
|
+
@uri.host == 'www.youtube.com' &&
|
13
|
+
@uri.path == '/watch' &&
|
14
|
+
query.key?('v')
|
15
|
+
end
|
15
16
|
|
16
|
-
|
17
|
-
|
17
|
+
def parse
|
18
|
+
IFRAME % query.fetch('v')
|
19
|
+
end
|
18
20
|
end
|
19
21
|
end
|
20
22
|
end
|
@@ -1,15 +1,17 @@
|
|
1
|
-
module
|
2
|
-
|
3
|
-
class
|
4
|
-
|
5
|
-
|
6
|
-
|
1
|
+
module RichUrls
|
2
|
+
module Parsers
|
3
|
+
class EmbedParser
|
4
|
+
class YoutubeShort < Base
|
5
|
+
def match?
|
6
|
+
@uri.host == 'youtu.be'
|
7
|
+
end
|
7
8
|
|
8
|
-
|
9
|
-
|
10
|
-
|
9
|
+
def parse
|
10
|
+
path = @uri.path
|
11
|
+
path[0] = ''
|
11
12
|
|
12
|
-
|
13
|
+
Youtube::IFRAME % path
|
14
|
+
end
|
13
15
|
end
|
14
16
|
end
|
15
17
|
end
|
data/lib/parsers/url.rb
ADDED
data/lib/url_fetcher.rb
CHANGED
@@ -38,7 +38,7 @@ module RichUrls
|
|
38
38
|
response = session.get(@url)
|
39
39
|
|
40
40
|
if response.status < 400
|
41
|
-
decorated = BodyDecorator.
|
41
|
+
decorated = BodyDecorator.decorate(response.url, response.body)
|
42
42
|
RichUrls.cache.set(digest, Oj.dump(decorated))
|
43
43
|
decorated
|
44
44
|
else
|
data/lib/xml_handler.rb
CHANGED
@@ -1,12 +1,18 @@
|
|
1
|
+
require_relative 'el'
|
2
|
+
require_relative 'finders'
|
3
|
+
require_relative 'finders/meta_title'
|
4
|
+
require_relative 'finders/meta_description'
|
5
|
+
require_relative 'finders/meta_image'
|
6
|
+
require_relative 'finders/favicon'
|
7
|
+
|
1
8
|
module RichUrls
|
2
9
|
class XMLHandler < ::Ox::Sax
|
3
10
|
WHITELISTED_EL_NAMES = %i[
|
4
|
-
html
|
5
|
-
head
|
6
11
|
title
|
7
12
|
meta
|
8
13
|
link
|
9
14
|
img
|
15
|
+
p
|
10
16
|
].freeze
|
11
17
|
|
12
18
|
WHITELISTED_ATTRS = %i[
|
@@ -17,48 +23,97 @@ module RichUrls
|
|
17
23
|
src
|
18
24
|
].freeze
|
19
25
|
|
26
|
+
FALLBACK_ELEMENTS = {
|
27
|
+
img: 'og:image',
|
28
|
+
p: 'og:description',
|
29
|
+
title: 'og:title'
|
30
|
+
}.freeze
|
31
|
+
|
32
|
+
FINDERS = {
|
33
|
+
Finders::MetaTitle => 'title',
|
34
|
+
Finders::MetaDescription => 'description',
|
35
|
+
Finders::MetaImage => 'image',
|
36
|
+
Finders::Favicon => 'favicon',
|
37
|
+
Finders::Title => 'title',
|
38
|
+
Finders::Description => 'description',
|
39
|
+
Finders::Image => 'image'
|
40
|
+
}.freeze
|
41
|
+
|
20
42
|
StopParsingError = Class.new(StandardError)
|
21
|
-
El = Struct.new(:name, :attributes)
|
22
43
|
|
23
|
-
|
44
|
+
attr_reader :elements, :properties
|
24
45
|
|
25
46
|
def initialize
|
26
47
|
@elements = []
|
48
|
+
@counts = Set.new
|
49
|
+
@properties = {
|
50
|
+
'title' => nil,
|
51
|
+
'description' => nil,
|
52
|
+
'image' => nil,
|
53
|
+
'favicon' => nil
|
54
|
+
}
|
27
55
|
end
|
28
56
|
|
29
|
-
def find(
|
57
|
+
def find(tag, attrs = {})
|
30
58
|
@elements.detect do |el|
|
31
59
|
matching_attributes = attrs.all? { |k, v| el.attributes[k] == v }
|
32
60
|
|
33
|
-
el.
|
61
|
+
el.tag == tag && matching_attributes
|
34
62
|
end
|
35
63
|
end
|
36
64
|
|
37
|
-
def start_element(
|
38
|
-
return unless WHITELISTED_EL_NAMES.include?(
|
65
|
+
def start_element(tag)
|
66
|
+
return unless WHITELISTED_EL_NAMES.include?(tag)
|
39
67
|
|
40
|
-
@elements << El.new(
|
68
|
+
@elements << El.new(tag) if add_element?(tag)
|
41
69
|
end
|
42
70
|
|
43
|
-
def
|
44
|
-
|
71
|
+
def end_element(tag)
|
72
|
+
return unless WHITELISTED_EL_NAMES.include?(tag)
|
45
73
|
|
46
|
-
|
74
|
+
el = @elements.reverse_each.detect { |e| e.open && e.tag == tag }
|
75
|
+
return unless el
|
47
76
|
|
48
|
-
el.
|
77
|
+
el.close!
|
78
|
+
find_element(el)
|
49
79
|
|
50
|
-
raise StopParsingError if
|
80
|
+
raise StopParsingError if @properties.values.all?
|
51
81
|
end
|
52
82
|
|
53
|
-
def
|
83
|
+
def attr(key, value)
|
84
|
+
return unless WHITELISTED_ATTRS.include?(key)
|
85
|
+
|
54
86
|
el = @elements.last
|
55
|
-
el
|
87
|
+
el&.add(key, value)
|
88
|
+
end
|
89
|
+
|
90
|
+
def text(str)
|
91
|
+
el = @elements.detect(&:open)
|
92
|
+
el&.append_text(str)
|
56
93
|
end
|
57
94
|
|
58
95
|
private
|
59
96
|
|
60
|
-
def
|
61
|
-
|
97
|
+
def find_element(elem)
|
98
|
+
FINDERS.each_pair do |finder, attribute|
|
99
|
+
next if @properties[attribute]
|
100
|
+
|
101
|
+
content = finder.call(elem)
|
102
|
+
|
103
|
+
if content
|
104
|
+
@properties[attribute] = content
|
105
|
+
break
|
106
|
+
end
|
107
|
+
end
|
108
|
+
end
|
109
|
+
|
110
|
+
def add_element?(tag)
|
111
|
+
return true unless FALLBACK_ELEMENTS.keys.include?(tag)
|
112
|
+
return false if @counts.include?(tag)
|
113
|
+
|
114
|
+
@counts.add(tag)
|
115
|
+
|
116
|
+
!find(:meta, property: FALLBACK_ELEMENTS.fetch(tag))
|
62
117
|
end
|
63
118
|
end
|
64
119
|
end
|
data/richurls.gemspec
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: richurls
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- grdw
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-03-
|
11
|
+
date: 2020-03-23 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: oj
|
@@ -124,17 +124,21 @@ files:
|
|
124
124
|
- README.md
|
125
125
|
- lib/body_decorator.rb
|
126
126
|
- lib/cache.rb
|
127
|
-
- lib/
|
127
|
+
- lib/el.rb
|
128
|
+
- lib/finders.rb
|
129
|
+
- lib/finders/favicon.rb
|
130
|
+
- lib/finders/meta_description.rb
|
131
|
+
- lib/finders/meta_image.rb
|
132
|
+
- lib/finders/meta_title.rb
|
128
133
|
- lib/parsers/embed_parser.rb
|
129
134
|
- lib/parsers/embed_parsers/base.rb
|
130
135
|
- lib/parsers/embed_parsers/paste.rb
|
131
136
|
- lib/parsers/embed_parsers/spotify.rb
|
132
137
|
- lib/parsers/embed_parsers/youtube.rb
|
133
138
|
- lib/parsers/embed_parsers/youtube_short.rb
|
134
|
-
- lib/parsers/
|
135
|
-
- lib/parsers/image_parser.rb
|
139
|
+
- lib/parsers/property.rb
|
136
140
|
- lib/parsers/provider_display_parser.rb
|
137
|
-
- lib/parsers/
|
141
|
+
- lib/parsers/url.rb
|
138
142
|
- lib/richurls.rb
|
139
143
|
- lib/url_fetcher.rb
|
140
144
|
- lib/url_helper.rb
|
@@ -1,18 +0,0 @@
|
|
1
|
-
module Parsers
|
2
|
-
module FaviconParser
|
3
|
-
KEYWORDS = ['shortcut icon', 'icon shortcut', 'icon'].freeze
|
4
|
-
|
5
|
-
def self.call(document, url)
|
6
|
-
KEYWORDS.each do |rel|
|
7
|
-
found_document = document.find(:link, rel: rel)
|
8
|
-
|
9
|
-
if found_document
|
10
|
-
@el = found_document
|
11
|
-
break
|
12
|
-
end
|
13
|
-
end
|
14
|
-
|
15
|
-
@el && UrlHelper.url_for(url, @el.attributes[:href])
|
16
|
-
end
|
17
|
-
end
|
18
|
-
end
|
data/lib/parsers/image_parser.rb
DELETED
@@ -1,14 +0,0 @@
|
|
1
|
-
module Parsers
|
2
|
-
class ImageParser
|
3
|
-
def self.call(document, url)
|
4
|
-
meta_image = document.find(:meta, property: 'og:image')
|
5
|
-
image_tag = document.find(:img)
|
6
|
-
|
7
|
-
image_source =
|
8
|
-
(meta_image && meta_image.attributes[:content]) ||
|
9
|
-
(image_tag && image_tag.attributes[:src])
|
10
|
-
|
11
|
-
image_source && UrlHelper.url_for(url, image_source)
|
12
|
-
end
|
13
|
-
end
|
14
|
-
end
|
data/lib/parsers/title_parser.rb
DELETED