richurls 0.4.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/README.md +36 -5
- data/lib/body_decorator.rb +14 -5
- data/lib/cache.rb +4 -4
- data/lib/richurls.rb +2 -2
- data/lib/url_fetcher.rb +11 -7
- data/lib/xml_handler.rb +12 -7
- data/richurls.gemspec +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: f524681ea31f85291124dcf29aeff080bba7e2a6afb94cf376888fd8449d8e9a
|
|
4
|
+
data.tar.gz: 3d271748e1894aedc12c7579308f9fa6520c26ed01defd313160a8c67abb6ca0
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 599e985858d4bafca9865f9880b9e89b0ec4a690da1bda7a07eafa0f685d5b8c0478e838268e6a839690d777588e84ffde9d2d90b1f505c3ebe9c69d246c66f3
|
|
7
|
+
data.tar.gz: ed33ba734fdf4636c2e996f04eb6e9ab1f2df3a7b7cfebf2ff8b0f564c38186f71fb3842f7b38cf3b10d6ca7c9336c78fdd76940c8b1eb90a0e876e9667f66df
|
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
|
@@ -9,6 +9,8 @@ gem install richurls
|
|
|
9
9
|
|
|
10
10
|
**Usage:**
|
|
11
11
|
|
|
12
|
+
Default usage:
|
|
13
|
+
|
|
12
14
|
```ruby
|
|
13
15
|
require 'richurls'
|
|
14
16
|
|
|
@@ -25,6 +27,19 @@ RichUrls.enrich('https://wetransfer.com')
|
|
|
25
27
|
# }
|
|
26
28
|
```
|
|
27
29
|
|
|
30
|
+
Partial attributes:
|
|
31
|
+
|
|
32
|
+
```ruby
|
|
33
|
+
require 'richurls'
|
|
34
|
+
|
|
35
|
+
RichUrls.enrich('https://wetransfer.com', filter: %w[title])
|
|
36
|
+
|
|
37
|
+
# Returns:
|
|
38
|
+
# {
|
|
39
|
+
# "title"=>"WeTransfer"
|
|
40
|
+
# }
|
|
41
|
+
```
|
|
42
|
+
|
|
28
43
|
**Caching:**
|
|
29
44
|
|
|
30
45
|
By default caching is turned off. Caching can be enabled by writing a cache wrapper as such:
|
|
@@ -39,12 +54,14 @@ class CustomCache < RichUrls::Cache::Wrapper
|
|
|
39
54
|
# Callback for fetching a cache entry
|
|
40
55
|
end
|
|
41
56
|
|
|
42
|
-
def set(key, value)
|
|
43
|
-
# Callback for setting a value in a cache to a certain key
|
|
57
|
+
def set(key, value, time)
|
|
58
|
+
# Callback for setting a value in a cache to a certain key for a certain
|
|
59
|
+
# `time`*.
|
|
44
60
|
end
|
|
45
61
|
|
|
46
|
-
def extend(key)
|
|
47
|
-
# Callback for extending a cached value
|
|
62
|
+
def extend(key, time)
|
|
63
|
+
# Callback for extending a cached value for a certain key for a certain
|
|
64
|
+
# `time`*.
|
|
48
65
|
end
|
|
49
66
|
end
|
|
50
67
|
```
|
|
@@ -52,5 +69,19 @@ end
|
|
|
52
69
|
Finally you can enable the `CustomCache` by adding:
|
|
53
70
|
|
|
54
71
|
```ruby
|
|
55
|
-
RichUrls.cache = CustomCache.new(time:
|
|
72
|
+
RichUrls.cache = CustomCache.new(time: 7200)
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
**\* About custom cache time:**
|
|
76
|
+
|
|
77
|
+
If you have caching enabled and would like to deviate from the default cache time
|
|
78
|
+
per URL you enrich, it's possible to do so. You'd have to pass a `cache_time`
|
|
79
|
+
parameter to the URL enricher as such:
|
|
80
|
+
|
|
81
|
+
```ruby
|
|
82
|
+
RichUrls.enrich('https://wetransfer.com', cache_time: 3600)
|
|
56
83
|
```
|
|
84
|
+
|
|
85
|
+
This `cache_time` will be accessible through the `time` parameters in the `set`
|
|
86
|
+
and `extend` methods on the `Cache::Wrapper`-instance and can be used as you
|
|
87
|
+
please.
|
data/lib/body_decorator.rb
CHANGED
|
@@ -20,15 +20,16 @@ module RichUrls
|
|
|
20
20
|
'embed' => Parsers::EmbedParser
|
|
21
21
|
}.freeze
|
|
22
22
|
|
|
23
|
-
def self.decorate(url, body)
|
|
24
|
-
new(url, body).decorate
|
|
23
|
+
def self.decorate(url, body, filter = [])
|
|
24
|
+
new(url, body, filter).decorate
|
|
25
25
|
end
|
|
26
26
|
|
|
27
27
|
private_class_method :new
|
|
28
28
|
|
|
29
|
-
def initialize(url, body)
|
|
29
|
+
def initialize(url, body, filter)
|
|
30
30
|
@url = url
|
|
31
|
-
@
|
|
31
|
+
@filter = filter
|
|
32
|
+
@xml = XMLHandler.new(filter)
|
|
32
33
|
|
|
33
34
|
Ox.sax_html(@xml, StringIO.new(body))
|
|
34
35
|
|
|
@@ -40,9 +41,17 @@ module RichUrls
|
|
|
40
41
|
end
|
|
41
42
|
|
|
42
43
|
def decorate
|
|
43
|
-
|
|
44
|
+
parsers.each_with_object({}) do |(key, parser), object|
|
|
44
45
|
object[key] = parser.call(@xml.properties[key], @url)
|
|
45
46
|
end
|
|
46
47
|
end
|
|
48
|
+
|
|
49
|
+
private
|
|
50
|
+
|
|
51
|
+
def parsers
|
|
52
|
+
return PARSERS if @filter.empty?
|
|
53
|
+
|
|
54
|
+
PARSERS.slice(*@filter)
|
|
55
|
+
end
|
|
47
56
|
end
|
|
48
57
|
end
|
data/lib/cache.rb
CHANGED
|
@@ -5,11 +5,11 @@ module RichUrls
|
|
|
5
5
|
raise NotImplementedError, 'wrapper needs `get` method'
|
|
6
6
|
end
|
|
7
7
|
|
|
8
|
-
def set(_key, _value)
|
|
8
|
+
def set(_key, _value, _time)
|
|
9
9
|
raise NotImplementedError, 'wrapper needs `set` method'
|
|
10
10
|
end
|
|
11
11
|
|
|
12
|
-
def extend(_key)
|
|
12
|
+
def extend(_key, _time)
|
|
13
13
|
raise NotImplementedError, 'wrapper needs `extend` method'
|
|
14
14
|
end
|
|
15
15
|
end
|
|
@@ -17,9 +17,9 @@ module RichUrls
|
|
|
17
17
|
class None < Wrapper
|
|
18
18
|
def get(_); end
|
|
19
19
|
|
|
20
|
-
def set(_, _); end
|
|
20
|
+
def set(_, _, _); end
|
|
21
21
|
|
|
22
|
-
def extend(_); end
|
|
22
|
+
def extend(_, _); end
|
|
23
23
|
end
|
|
24
24
|
end
|
|
25
25
|
end
|
data/lib/richurls.rb
CHANGED
|
@@ -22,11 +22,11 @@ module RichUrls
|
|
|
22
22
|
@cache ||= wrapper
|
|
23
23
|
end
|
|
24
24
|
|
|
25
|
-
def self.enrich(url)
|
|
25
|
+
def self.enrich(url, filter: [], cache_time: nil)
|
|
26
26
|
unless URI::DEFAULT_PARSER.make_regexp.match?(url)
|
|
27
27
|
raise MalformedURLError, "this url is malformed: #{url}"
|
|
28
28
|
end
|
|
29
29
|
|
|
30
|
-
UrlFetcher.fetch(url)
|
|
30
|
+
UrlFetcher.fetch(url, filter, cache_time)
|
|
31
31
|
end
|
|
32
32
|
end
|
data/lib/url_fetcher.rb
CHANGED
|
@@ -6,21 +6,23 @@ module RichUrls
|
|
|
6
6
|
|
|
7
7
|
class UrlFetcherError < StandardError; end
|
|
8
8
|
|
|
9
|
-
def self.fetch(url)
|
|
10
|
-
new(url).fetch
|
|
9
|
+
def self.fetch(url, attributes = [], cache_time = nil)
|
|
10
|
+
new(url, attributes, cache_time).fetch
|
|
11
11
|
end
|
|
12
12
|
|
|
13
13
|
private_class_method :new
|
|
14
14
|
|
|
15
|
-
def initialize(url)
|
|
15
|
+
def initialize(url, attributes, cache_time)
|
|
16
16
|
@url = url
|
|
17
|
+
@attributes = attributes
|
|
18
|
+
@cache_time = cache_time
|
|
17
19
|
end
|
|
18
20
|
|
|
19
21
|
def fetch
|
|
20
22
|
cached = RichUrls.cache.get(digest)
|
|
21
23
|
|
|
22
24
|
if cached
|
|
23
|
-
RichUrls.cache.extend(digest)
|
|
25
|
+
RichUrls.cache.extend(digest, @cache_time)
|
|
24
26
|
Oj.load(cached)
|
|
25
27
|
else
|
|
26
28
|
patron_call
|
|
@@ -30,7 +32,7 @@ module RichUrls
|
|
|
30
32
|
private
|
|
31
33
|
|
|
32
34
|
def digest
|
|
33
|
-
@digest ||= Digest::MD5.hexdigest(@url)
|
|
35
|
+
@digest ||= Digest::MD5.hexdigest(@url + @attributes.sort.join('-'))
|
|
34
36
|
end
|
|
35
37
|
|
|
36
38
|
def patron_call
|
|
@@ -38,8 +40,10 @@ module RichUrls
|
|
|
38
40
|
response = session.get(@url)
|
|
39
41
|
|
|
40
42
|
if response.status < 400
|
|
41
|
-
decorated = BodyDecorator.decorate(
|
|
42
|
-
|
|
43
|
+
decorated = BodyDecorator.decorate(
|
|
44
|
+
response.url, response.body, @attributes
|
|
45
|
+
)
|
|
46
|
+
RichUrls.cache.set(digest, Oj.dump(decorated), @cache_time)
|
|
43
47
|
decorated
|
|
44
48
|
else
|
|
45
49
|
raise UrlFetcherError, 'url cannot be found'
|
data/lib/xml_handler.rb
CHANGED
|
@@ -43,15 +43,11 @@ module RichUrls
|
|
|
43
43
|
|
|
44
44
|
attr_reader :elements, :properties
|
|
45
45
|
|
|
46
|
-
def initialize
|
|
46
|
+
def initialize(filter = [])
|
|
47
|
+
@filter = filter
|
|
47
48
|
@elements = []
|
|
48
49
|
@counts = Set.new
|
|
49
|
-
@properties =
|
|
50
|
-
'title' => nil,
|
|
51
|
-
'description' => nil,
|
|
52
|
-
'image' => nil,
|
|
53
|
-
'favicon' => nil
|
|
54
|
-
}
|
|
50
|
+
@properties = filtered_properties(filter)
|
|
55
51
|
end
|
|
56
52
|
|
|
57
53
|
def find(tag, attrs = {})
|
|
@@ -115,5 +111,14 @@ module RichUrls
|
|
|
115
111
|
|
|
116
112
|
!find(:meta, property: FALLBACK_ELEMENTS.fetch(tag))
|
|
117
113
|
end
|
|
114
|
+
|
|
115
|
+
# Turns a set of filtered properties into a Hash where
|
|
116
|
+
# the default value is `nil`
|
|
117
|
+
def filtered_properties(filter)
|
|
118
|
+
keys = FINDERS.values.uniq
|
|
119
|
+
keys &= filter if filter.any?
|
|
120
|
+
|
|
121
|
+
Hash[keys.zip([])]
|
|
122
|
+
end
|
|
118
123
|
end
|
|
119
124
|
end
|
data/richurls.gemspec
CHANGED
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: richurls
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.5.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- grdw
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2020-03-
|
|
11
|
+
date: 2020-03-25 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: oj
|