richurls 0.4.0 → 0.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/README.md +36 -5
- data/lib/body_decorator.rb +14 -5
- data/lib/cache.rb +4 -4
- data/lib/richurls.rb +2 -2
- data/lib/url_fetcher.rb +11 -7
- data/lib/xml_handler.rb +12 -7
- data/richurls.gemspec +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f524681ea31f85291124dcf29aeff080bba7e2a6afb94cf376888fd8449d8e9a
|
4
|
+
data.tar.gz: 3d271748e1894aedc12c7579308f9fa6520c26ed01defd313160a8c67abb6ca0
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 599e985858d4bafca9865f9880b9e89b0ec4a690da1bda7a07eafa0f685d5b8c0478e838268e6a839690d777588e84ffde9d2d90b1f505c3ebe9c69d246c66f3
|
7
|
+
data.tar.gz: ed33ba734fdf4636c2e996f04eb6e9ab1f2df3a7b7cfebf2ff8b0f564c38186f71fb3842f7b38cf3b10d6ca7c9336c78fdd76940c8b1eb90a0e876e9667f66df
|
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
@@ -9,6 +9,8 @@ gem install richurls
|
|
9
9
|
|
10
10
|
**Usage:**
|
11
11
|
|
12
|
+
Default usage:
|
13
|
+
|
12
14
|
```ruby
|
13
15
|
require 'richurls'
|
14
16
|
|
@@ -25,6 +27,19 @@ RichUrls.enrich('https://wetransfer.com')
|
|
25
27
|
# }
|
26
28
|
```
|
27
29
|
|
30
|
+
Partial attributes:
|
31
|
+
|
32
|
+
```ruby
|
33
|
+
require 'richurls'
|
34
|
+
|
35
|
+
RichUrls.enrich('https://wetransfer.com', filter: %w[title])
|
36
|
+
|
37
|
+
# Returns:
|
38
|
+
# {
|
39
|
+
# "title"=>"WeTransfer"
|
40
|
+
# }
|
41
|
+
```
|
42
|
+
|
28
43
|
**Caching:**
|
29
44
|
|
30
45
|
By default caching is turned off. Caching can be enabled by writing a cache wrapper as such:
|
@@ -39,12 +54,14 @@ class CustomCache < RichUrls::Cache::Wrapper
|
|
39
54
|
# Callback for fetching a cache entry
|
40
55
|
end
|
41
56
|
|
42
|
-
def set(key, value)
|
43
|
-
# Callback for setting a value in a cache to a certain key
|
57
|
+
def set(key, value, time)
|
58
|
+
# Callback for setting a value in a cache to a certain key for a certain
|
59
|
+
# `time`*.
|
44
60
|
end
|
45
61
|
|
46
|
-
def extend(key)
|
47
|
-
# Callback for extending a cached value
|
62
|
+
def extend(key, time)
|
63
|
+
# Callback for extending a cached value for a certain key for a certain
|
64
|
+
# `time`*.
|
48
65
|
end
|
49
66
|
end
|
50
67
|
```
|
@@ -52,5 +69,19 @@ end
|
|
52
69
|
Finally you can enable the `CustomCache` by adding:
|
53
70
|
|
54
71
|
```ruby
|
55
|
-
RichUrls.cache = CustomCache.new(time:
|
72
|
+
RichUrls.cache = CustomCache.new(time: 7200)
|
73
|
+
```
|
74
|
+
|
75
|
+
**\* About custom cache time:**
|
76
|
+
|
77
|
+
If you have caching enabled and would like to deviate from the default cache time
|
78
|
+
per URL you enrich, it's possible to do so. You'd have to pass a `cache_time`
|
79
|
+
parameter to the URL enricher as such:
|
80
|
+
|
81
|
+
```ruby
|
82
|
+
RichUrls.enrich('https://wetransfer.com', cache_time: 3600)
|
56
83
|
```
|
84
|
+
|
85
|
+
This `cache_time` will be accessible through the `time` parameters in the `set`
|
86
|
+
and `extend` methods on the `Cache::Wrapper`-instance and can be used as you
|
87
|
+
please.
|
data/lib/body_decorator.rb
CHANGED
@@ -20,15 +20,16 @@ module RichUrls
|
|
20
20
|
'embed' => Parsers::EmbedParser
|
21
21
|
}.freeze
|
22
22
|
|
23
|
-
def self.decorate(url, body)
|
24
|
-
new(url, body).decorate
|
23
|
+
def self.decorate(url, body, filter = [])
|
24
|
+
new(url, body, filter).decorate
|
25
25
|
end
|
26
26
|
|
27
27
|
private_class_method :new
|
28
28
|
|
29
|
-
def initialize(url, body)
|
29
|
+
def initialize(url, body, filter)
|
30
30
|
@url = url
|
31
|
-
@
|
31
|
+
@filter = filter
|
32
|
+
@xml = XMLHandler.new(filter)
|
32
33
|
|
33
34
|
Ox.sax_html(@xml, StringIO.new(body))
|
34
35
|
|
@@ -40,9 +41,17 @@ module RichUrls
|
|
40
41
|
end
|
41
42
|
|
42
43
|
def decorate
|
43
|
-
|
44
|
+
parsers.each_with_object({}) do |(key, parser), object|
|
44
45
|
object[key] = parser.call(@xml.properties[key], @url)
|
45
46
|
end
|
46
47
|
end
|
48
|
+
|
49
|
+
private
|
50
|
+
|
51
|
+
def parsers
|
52
|
+
return PARSERS if @filter.empty?
|
53
|
+
|
54
|
+
PARSERS.slice(*@filter)
|
55
|
+
end
|
47
56
|
end
|
48
57
|
end
|
data/lib/cache.rb
CHANGED
@@ -5,11 +5,11 @@ module RichUrls
|
|
5
5
|
raise NotImplementedError, 'wrapper needs `get` method'
|
6
6
|
end
|
7
7
|
|
8
|
-
def set(_key, _value)
|
8
|
+
def set(_key, _value, _time)
|
9
9
|
raise NotImplementedError, 'wrapper needs `set` method'
|
10
10
|
end
|
11
11
|
|
12
|
-
def extend(_key)
|
12
|
+
def extend(_key, _time)
|
13
13
|
raise NotImplementedError, 'wrapper needs `extend` method'
|
14
14
|
end
|
15
15
|
end
|
@@ -17,9 +17,9 @@ module RichUrls
|
|
17
17
|
class None < Wrapper
|
18
18
|
def get(_); end
|
19
19
|
|
20
|
-
def set(_, _); end
|
20
|
+
def set(_, _, _); end
|
21
21
|
|
22
|
-
def extend(_); end
|
22
|
+
def extend(_, _); end
|
23
23
|
end
|
24
24
|
end
|
25
25
|
end
|
data/lib/richurls.rb
CHANGED
@@ -22,11 +22,11 @@ module RichUrls
|
|
22
22
|
@cache ||= wrapper
|
23
23
|
end
|
24
24
|
|
25
|
-
def self.enrich(url)
|
25
|
+
def self.enrich(url, filter: [], cache_time: nil)
|
26
26
|
unless URI::DEFAULT_PARSER.make_regexp.match?(url)
|
27
27
|
raise MalformedURLError, "this url is malformed: #{url}"
|
28
28
|
end
|
29
29
|
|
30
|
-
UrlFetcher.fetch(url)
|
30
|
+
UrlFetcher.fetch(url, filter, cache_time)
|
31
31
|
end
|
32
32
|
end
|
data/lib/url_fetcher.rb
CHANGED
@@ -6,21 +6,23 @@ module RichUrls
|
|
6
6
|
|
7
7
|
class UrlFetcherError < StandardError; end
|
8
8
|
|
9
|
-
def self.fetch(url)
|
10
|
-
new(url).fetch
|
9
|
+
def self.fetch(url, attributes = [], cache_time = nil)
|
10
|
+
new(url, attributes, cache_time).fetch
|
11
11
|
end
|
12
12
|
|
13
13
|
private_class_method :new
|
14
14
|
|
15
|
-
def initialize(url)
|
15
|
+
def initialize(url, attributes, cache_time)
|
16
16
|
@url = url
|
17
|
+
@attributes = attributes
|
18
|
+
@cache_time = cache_time
|
17
19
|
end
|
18
20
|
|
19
21
|
def fetch
|
20
22
|
cached = RichUrls.cache.get(digest)
|
21
23
|
|
22
24
|
if cached
|
23
|
-
RichUrls.cache.extend(digest)
|
25
|
+
RichUrls.cache.extend(digest, @cache_time)
|
24
26
|
Oj.load(cached)
|
25
27
|
else
|
26
28
|
patron_call
|
@@ -30,7 +32,7 @@ module RichUrls
|
|
30
32
|
private
|
31
33
|
|
32
34
|
def digest
|
33
|
-
@digest ||= Digest::MD5.hexdigest(@url)
|
35
|
+
@digest ||= Digest::MD5.hexdigest(@url + @attributes.sort.join('-'))
|
34
36
|
end
|
35
37
|
|
36
38
|
def patron_call
|
@@ -38,8 +40,10 @@ module RichUrls
|
|
38
40
|
response = session.get(@url)
|
39
41
|
|
40
42
|
if response.status < 400
|
41
|
-
decorated = BodyDecorator.decorate(
|
42
|
-
|
43
|
+
decorated = BodyDecorator.decorate(
|
44
|
+
response.url, response.body, @attributes
|
45
|
+
)
|
46
|
+
RichUrls.cache.set(digest, Oj.dump(decorated), @cache_time)
|
43
47
|
decorated
|
44
48
|
else
|
45
49
|
raise UrlFetcherError, 'url cannot be found'
|
data/lib/xml_handler.rb
CHANGED
@@ -43,15 +43,11 @@ module RichUrls
|
|
43
43
|
|
44
44
|
attr_reader :elements, :properties
|
45
45
|
|
46
|
-
def initialize
|
46
|
+
def initialize(filter = [])
|
47
|
+
@filter = filter
|
47
48
|
@elements = []
|
48
49
|
@counts = Set.new
|
49
|
-
@properties =
|
50
|
-
'title' => nil,
|
51
|
-
'description' => nil,
|
52
|
-
'image' => nil,
|
53
|
-
'favicon' => nil
|
54
|
-
}
|
50
|
+
@properties = filtered_properties(filter)
|
55
51
|
end
|
56
52
|
|
57
53
|
def find(tag, attrs = {})
|
@@ -115,5 +111,14 @@ module RichUrls
|
|
115
111
|
|
116
112
|
!find(:meta, property: FALLBACK_ELEMENTS.fetch(tag))
|
117
113
|
end
|
114
|
+
|
115
|
+
# Turns a set of filtered properties into a Hash where
|
116
|
+
# the default value is `nil`
|
117
|
+
def filtered_properties(filter)
|
118
|
+
keys = FINDERS.values.uniq
|
119
|
+
keys &= filter if filter.any?
|
120
|
+
|
121
|
+
Hash[keys.zip([])]
|
122
|
+
end
|
118
123
|
end
|
119
124
|
end
|
data/richurls.gemspec
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: richurls
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- grdw
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-03-
|
11
|
+
date: 2020-03-25 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: oj
|