richurls 0.4.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 42cff335ffa60073310cdec6c1b16f2a1f1a4cfce0bde60835a76538a0f138aa
4
- data.tar.gz: cdded0800498b21f24b5c89910da73fd5150492c4f9c7ccedcb5499d538b1423
3
+ metadata.gz: f524681ea31f85291124dcf29aeff080bba7e2a6afb94cf376888fd8449d8e9a
4
+ data.tar.gz: 3d271748e1894aedc12c7579308f9fa6520c26ed01defd313160a8c67abb6ca0
5
5
  SHA512:
6
- metadata.gz: a5633b6852996d4938f002e2e1e4a90e1b25037ace20ba5da9303abc4dc073efa9ba5819134080d6c467dbe905733226cd66554e99edcd9ab57fbfd49bdc4a57
7
- data.tar.gz: '08bce91b2a1c2a301b3eb288883621f7f7d607e56e36765ee00d5d09a84348f281cff0e48f61e074c9c394d5478b9c7140ca9a4f02eb37778e129c3ec8ed6277'
6
+ metadata.gz: 599e985858d4bafca9865f9880b9e89b0ec4a690da1bda7a07eafa0f685d5b8c0478e838268e6a839690d777588e84ffde9d2d90b1f505c3ebe9c69d246c66f3
7
+ data.tar.gz: ed33ba734fdf4636c2e996f04eb6e9ab1f2df3a7b7cfebf2ff8b0f564c38186f71fb3842f7b38cf3b10d6ca7c9336c78fdd76940c8b1eb90a0e876e9667f66df
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- richurls (0.4.0)
4
+ richurls (0.5.0)
5
5
  oj (~> 3)
6
6
  ox (~> 2)
7
7
  patron (~> 0.13)
data/README.md CHANGED
@@ -9,6 +9,8 @@ gem install richurls
9
9
 
10
10
  **Usage:**
11
11
 
12
+ Default usage:
13
+
12
14
  ```ruby
13
15
  require 'richurls'
14
16
 
@@ -25,6 +27,19 @@ RichUrls.enrich('https://wetransfer.com')
25
27
  # }
26
28
  ```
27
29
 
30
+ Partial attributes:
31
+
32
+ ```ruby
33
+ require 'richurls'
34
+
35
+ RichUrls.enrich('https://wetransfer.com', filter: %w[title])
36
+
37
+ # Returns:
38
+ # {
39
+ # "title"=>"WeTransfer"
40
+ # }
41
+ ```
42
+
28
43
  **Caching:**
29
44
 
30
45
  By default caching is turned off. Caching can be enabled by writing a cache wrapper as such:
@@ -39,12 +54,14 @@ class CustomCache < RichUrls::Cache::Wrapper
39
54
  # Callback for fetching a cache entry
40
55
  end
41
56
 
42
- def set(key, value)
43
- # Callback for setting a value in a cache to a certain key
57
+ def set(key, value, time)
58
+ # Callback for setting a value in a cache to a certain key for a certain
59
+ # `time`*.
44
60
  end
45
61
 
46
- def extend(key)
47
- # Callback for extending a cached value by key
62
+ def extend(key, time)
63
+ # Callback for extending a cached value for a certain key for a certain
64
+ # `time`*.
48
65
  end
49
66
  end
50
67
  ```
@@ -52,5 +69,19 @@ end
52
69
  Finally you can enable the `CustomCache` by adding:
53
70
 
54
71
  ```ruby
55
- RichUrls.cache = CustomCache.new(time: 2.hours)
72
+ RichUrls.cache = CustomCache.new(time: 7200)
73
+ ```
74
+
75
+ **\* About custom cache time:**
76
+
77
+ If you have caching enabled and would like to deviate from the default cache time
78
+ per URL you enrich, it's possible to do so. You'd have to pass a `cache_time`
79
+ parameter to the URL enricher as such:
80
+
81
+ ```ruby
82
+ RichUrls.enrich('https://wetransfer.com', cache_time: 3600)
56
83
  ```
84
+
85
+ This `cache_time` will be accessible through the `time` parameters in the `set`
86
+ and `extend` methods on the `Cache::Wrapper`-instance and can be used as you
87
+ please.
@@ -20,15 +20,16 @@ module RichUrls
20
20
  'embed' => Parsers::EmbedParser
21
21
  }.freeze
22
22
 
23
- def self.decorate(url, body)
24
- new(url, body).decorate
23
+ def self.decorate(url, body, filter = [])
24
+ new(url, body, filter).decorate
25
25
  end
26
26
 
27
27
  private_class_method :new
28
28
 
29
- def initialize(url, body)
29
+ def initialize(url, body, filter)
30
30
  @url = url
31
- @xml = XMLHandler.new
31
+ @filter = filter
32
+ @xml = XMLHandler.new(filter)
32
33
 
33
34
  Ox.sax_html(@xml, StringIO.new(body))
34
35
 
@@ -40,9 +41,17 @@ module RichUrls
40
41
  end
41
42
 
42
43
  def decorate
43
- PARSERS.each_with_object({}) do |(key, parser), object|
44
+ parsers.each_with_object({}) do |(key, parser), object|
44
45
  object[key] = parser.call(@xml.properties[key], @url)
45
46
  end
46
47
  end
48
+
49
+ private
50
+
51
+ def parsers
52
+ return PARSERS if @filter.empty?
53
+
54
+ PARSERS.slice(*@filter)
55
+ end
47
56
  end
48
57
  end
data/lib/cache.rb CHANGED
@@ -5,11 +5,11 @@ module RichUrls
5
5
  raise NotImplementedError, 'wrapper needs `get` method'
6
6
  end
7
7
 
8
- def set(_key, _value)
8
+ def set(_key, _value, _time)
9
9
  raise NotImplementedError, 'wrapper needs `set` method'
10
10
  end
11
11
 
12
- def extend(_key)
12
+ def extend(_key, _time)
13
13
  raise NotImplementedError, 'wrapper needs `extend` method'
14
14
  end
15
15
  end
@@ -17,9 +17,9 @@ module RichUrls
17
17
  class None < Wrapper
18
18
  def get(_); end
19
19
 
20
- def set(_, _); end
20
+ def set(_, _, _); end
21
21
 
22
- def extend(_); end
22
+ def extend(_, _); end
23
23
  end
24
24
  end
25
25
  end
data/lib/richurls.rb CHANGED
@@ -22,11 +22,11 @@ module RichUrls
22
22
  @cache ||= wrapper
23
23
  end
24
24
 
25
- def self.enrich(url)
25
+ def self.enrich(url, filter: [], cache_time: nil)
26
26
  unless URI::DEFAULT_PARSER.make_regexp.match?(url)
27
27
  raise MalformedURLError, "this url is malformed: #{url}"
28
28
  end
29
29
 
30
- UrlFetcher.fetch(url)
30
+ UrlFetcher.fetch(url, filter, cache_time)
31
31
  end
32
32
  end
data/lib/url_fetcher.rb CHANGED
@@ -6,21 +6,23 @@ module RichUrls
6
6
 
7
7
  class UrlFetcherError < StandardError; end
8
8
 
9
- def self.fetch(url)
10
- new(url).fetch
9
+ def self.fetch(url, attributes = [], cache_time = nil)
10
+ new(url, attributes, cache_time).fetch
11
11
  end
12
12
 
13
13
  private_class_method :new
14
14
 
15
- def initialize(url)
15
+ def initialize(url, attributes, cache_time)
16
16
  @url = url
17
+ @attributes = attributes
18
+ @cache_time = cache_time
17
19
  end
18
20
 
19
21
  def fetch
20
22
  cached = RichUrls.cache.get(digest)
21
23
 
22
24
  if cached
23
- RichUrls.cache.extend(digest)
25
+ RichUrls.cache.extend(digest, @cache_time)
24
26
  Oj.load(cached)
25
27
  else
26
28
  patron_call
@@ -30,7 +32,7 @@ module RichUrls
30
32
  private
31
33
 
32
34
  def digest
33
- @digest ||= Digest::MD5.hexdigest(@url)
35
+ @digest ||= Digest::MD5.hexdigest(@url + @attributes.sort.join('-'))
34
36
  end
35
37
 
36
38
  def patron_call
@@ -38,8 +40,10 @@ module RichUrls
38
40
  response = session.get(@url)
39
41
 
40
42
  if response.status < 400
41
- decorated = BodyDecorator.decorate(response.url, response.body)
42
- RichUrls.cache.set(digest, Oj.dump(decorated))
43
+ decorated = BodyDecorator.decorate(
44
+ response.url, response.body, @attributes
45
+ )
46
+ RichUrls.cache.set(digest, Oj.dump(decorated), @cache_time)
43
47
  decorated
44
48
  else
45
49
  raise UrlFetcherError, 'url cannot be found'
data/lib/xml_handler.rb CHANGED
@@ -43,15 +43,11 @@ module RichUrls
43
43
 
44
44
  attr_reader :elements, :properties
45
45
 
46
- def initialize
46
+ def initialize(filter = [])
47
+ @filter = filter
47
48
  @elements = []
48
49
  @counts = Set.new
49
- @properties = {
50
- 'title' => nil,
51
- 'description' => nil,
52
- 'image' => nil,
53
- 'favicon' => nil
54
- }
50
+ @properties = filtered_properties(filter)
55
51
  end
56
52
 
57
53
  def find(tag, attrs = {})
@@ -115,5 +111,14 @@ module RichUrls
115
111
 
116
112
  !find(:meta, property: FALLBACK_ELEMENTS.fetch(tag))
117
113
  end
114
+
115
+ # Turns a set of filtered properties into a Hash where
116
+ # the default value is `nil`
117
+ def filtered_properties(filter)
118
+ keys = FINDERS.values.uniq
119
+ keys &= filter if filter.any?
120
+
121
+ Hash[keys.zip([])]
122
+ end
118
123
  end
119
124
  end
data/richurls.gemspec CHANGED
@@ -3,7 +3,7 @@ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
3
3
 
4
4
  Gem::Specification.new do |spec|
5
5
  spec.name = 'richurls'
6
- spec.version = '0.4.0'
6
+ spec.version = '0.5.0'
7
7
  spec.authors = ['grdw']
8
8
  spec.email = ['gerard@wetransfer.com']
9
9
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: richurls
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.0
4
+ version: 0.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - grdw
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-03-23 00:00:00.000000000 Z
11
+ date: 2020-03-25 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: oj