richurls 0.4.0 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 42cff335ffa60073310cdec6c1b16f2a1f1a4cfce0bde60835a76538a0f138aa
4
- data.tar.gz: cdded0800498b21f24b5c89910da73fd5150492c4f9c7ccedcb5499d538b1423
3
+ metadata.gz: f524681ea31f85291124dcf29aeff080bba7e2a6afb94cf376888fd8449d8e9a
4
+ data.tar.gz: 3d271748e1894aedc12c7579308f9fa6520c26ed01defd313160a8c67abb6ca0
5
5
  SHA512:
6
- metadata.gz: a5633b6852996d4938f002e2e1e4a90e1b25037ace20ba5da9303abc4dc073efa9ba5819134080d6c467dbe905733226cd66554e99edcd9ab57fbfd49bdc4a57
7
- data.tar.gz: '08bce91b2a1c2a301b3eb288883621f7f7d607e56e36765ee00d5d09a84348f281cff0e48f61e074c9c394d5478b9c7140ca9a4f02eb37778e129c3ec8ed6277'
6
+ metadata.gz: 599e985858d4bafca9865f9880b9e89b0ec4a690da1bda7a07eafa0f685d5b8c0478e838268e6a839690d777588e84ffde9d2d90b1f505c3ebe9c69d246c66f3
7
+ data.tar.gz: ed33ba734fdf4636c2e996f04eb6e9ab1f2df3a7b7cfebf2ff8b0f564c38186f71fb3842f7b38cf3b10d6ca7c9336c78fdd76940c8b1eb90a0e876e9667f66df
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- richurls (0.4.0)
4
+ richurls (0.5.0)
5
5
  oj (~> 3)
6
6
  ox (~> 2)
7
7
  patron (~> 0.13)
data/README.md CHANGED
@@ -9,6 +9,8 @@ gem install richurls
9
9
 
10
10
  **Usage:**
11
11
 
12
+ Default usage:
13
+
12
14
  ```ruby
13
15
  require 'richurls'
14
16
 
@@ -25,6 +27,19 @@ RichUrls.enrich('https://wetransfer.com')
25
27
  # }
26
28
  ```
27
29
 
30
+ Partial attributes:
31
+
32
+ ```ruby
33
+ require 'richurls'
34
+
35
+ RichUrls.enrich('https://wetransfer.com', filter: %w[title])
36
+
37
+ # Returns:
38
+ # {
39
+ # "title"=>"WeTransfer"
40
+ # }
41
+ ```
42
+
28
43
  **Caching:**
29
44
 
30
45
  By default caching is turned off. Caching can be enabled by writing a cache wrapper as such:
@@ -39,12 +54,14 @@ class CustomCache < RichUrls::Cache::Wrapper
39
54
  # Callback for fetching a cache entry
40
55
  end
41
56
 
42
- def set(key, value)
43
- # Callback for setting a value in a cache to a certain key
57
+ def set(key, value, time)
58
+ # Callback for setting a value in a cache to a certain key for a certain
59
+ # `time`*.
44
60
  end
45
61
 
46
- def extend(key)
47
- # Callback for extending a cached value by key
62
+ def extend(key, time)
63
+ # Callback for extending a cached value for a certain key for a certain
64
+ # `time`*.
48
65
  end
49
66
  end
50
67
  ```
@@ -52,5 +69,19 @@ end
52
69
  Finally you can enable the `CustomCache` by adding:
53
70
 
54
71
  ```ruby
55
- RichUrls.cache = CustomCache.new(time: 2.hours)
72
+ RichUrls.cache = CustomCache.new(time: 7200)
73
+ ```
74
+
75
+ **\* About custom cache time:**
76
+
77
+ If you have caching enabled and would like to deviate from the default cache time
78
+ per URL you enrich, it's possible to do so. You'd have to pass a `cache_time`
79
+ parameter to the URL enricher as such:
80
+
81
+ ```ruby
82
+ RichUrls.enrich('https://wetransfer.com', cache_time: 3600)
56
83
  ```
84
+
85
+ This `cache_time` will be accessible through the `time` parameters in the `set`
86
+ and `extend` methods on the `Cache::Wrapper`-instance and can be used as you
87
+ please.
@@ -20,15 +20,16 @@ module RichUrls
20
20
  'embed' => Parsers::EmbedParser
21
21
  }.freeze
22
22
 
23
- def self.decorate(url, body)
24
- new(url, body).decorate
23
+ def self.decorate(url, body, filter = [])
24
+ new(url, body, filter).decorate
25
25
  end
26
26
 
27
27
  private_class_method :new
28
28
 
29
- def initialize(url, body)
29
+ def initialize(url, body, filter)
30
30
  @url = url
31
- @xml = XMLHandler.new
31
+ @filter = filter
32
+ @xml = XMLHandler.new(filter)
32
33
 
33
34
  Ox.sax_html(@xml, StringIO.new(body))
34
35
 
@@ -40,9 +41,17 @@ module RichUrls
40
41
  end
41
42
 
42
43
  def decorate
43
- PARSERS.each_with_object({}) do |(key, parser), object|
44
+ parsers.each_with_object({}) do |(key, parser), object|
44
45
  object[key] = parser.call(@xml.properties[key], @url)
45
46
  end
46
47
  end
48
+
49
+ private
50
+
51
+ def parsers
52
+ return PARSERS if @filter.empty?
53
+
54
+ PARSERS.slice(*@filter)
55
+ end
47
56
  end
48
57
  end
data/lib/cache.rb CHANGED
@@ -5,11 +5,11 @@ module RichUrls
5
5
  raise NotImplementedError, 'wrapper needs `get` method'
6
6
  end
7
7
 
8
- def set(_key, _value)
8
+ def set(_key, _value, _time)
9
9
  raise NotImplementedError, 'wrapper needs `set` method'
10
10
  end
11
11
 
12
- def extend(_key)
12
+ def extend(_key, _time)
13
13
  raise NotImplementedError, 'wrapper needs `extend` method'
14
14
  end
15
15
  end
@@ -17,9 +17,9 @@ module RichUrls
17
17
  class None < Wrapper
18
18
  def get(_); end
19
19
 
20
- def set(_, _); end
20
+ def set(_, _, _); end
21
21
 
22
- def extend(_); end
22
+ def extend(_, _); end
23
23
  end
24
24
  end
25
25
  end
data/lib/richurls.rb CHANGED
@@ -22,11 +22,11 @@ module RichUrls
22
22
  @cache ||= wrapper
23
23
  end
24
24
 
25
- def self.enrich(url)
25
+ def self.enrich(url, filter: [], cache_time: nil)
26
26
  unless URI::DEFAULT_PARSER.make_regexp.match?(url)
27
27
  raise MalformedURLError, "this url is malformed: #{url}"
28
28
  end
29
29
 
30
- UrlFetcher.fetch(url)
30
+ UrlFetcher.fetch(url, filter, cache_time)
31
31
  end
32
32
  end
data/lib/url_fetcher.rb CHANGED
@@ -6,21 +6,23 @@ module RichUrls
6
6
 
7
7
  class UrlFetcherError < StandardError; end
8
8
 
9
- def self.fetch(url)
10
- new(url).fetch
9
+ def self.fetch(url, attributes = [], cache_time = nil)
10
+ new(url, attributes, cache_time).fetch
11
11
  end
12
12
 
13
13
  private_class_method :new
14
14
 
15
- def initialize(url)
15
+ def initialize(url, attributes, cache_time)
16
16
  @url = url
17
+ @attributes = attributes
18
+ @cache_time = cache_time
17
19
  end
18
20
 
19
21
  def fetch
20
22
  cached = RichUrls.cache.get(digest)
21
23
 
22
24
  if cached
23
- RichUrls.cache.extend(digest)
25
+ RichUrls.cache.extend(digest, @cache_time)
24
26
  Oj.load(cached)
25
27
  else
26
28
  patron_call
@@ -30,7 +32,7 @@ module RichUrls
30
32
  private
31
33
 
32
34
  def digest
33
- @digest ||= Digest::MD5.hexdigest(@url)
35
+ @digest ||= Digest::MD5.hexdigest(@url + @attributes.sort.join('-'))
34
36
  end
35
37
 
36
38
  def patron_call
@@ -38,8 +40,10 @@ module RichUrls
38
40
  response = session.get(@url)
39
41
 
40
42
  if response.status < 400
41
- decorated = BodyDecorator.decorate(response.url, response.body)
42
- RichUrls.cache.set(digest, Oj.dump(decorated))
43
+ decorated = BodyDecorator.decorate(
44
+ response.url, response.body, @attributes
45
+ )
46
+ RichUrls.cache.set(digest, Oj.dump(decorated), @cache_time)
43
47
  decorated
44
48
  else
45
49
  raise UrlFetcherError, 'url cannot be found'
data/lib/xml_handler.rb CHANGED
@@ -43,15 +43,11 @@ module RichUrls
43
43
 
44
44
  attr_reader :elements, :properties
45
45
 
46
- def initialize
46
+ def initialize(filter = [])
47
+ @filter = filter
47
48
  @elements = []
48
49
  @counts = Set.new
49
- @properties = {
50
- 'title' => nil,
51
- 'description' => nil,
52
- 'image' => nil,
53
- 'favicon' => nil
54
- }
50
+ @properties = filtered_properties(filter)
55
51
  end
56
52
 
57
53
  def find(tag, attrs = {})
@@ -115,5 +111,14 @@ module RichUrls
115
111
 
116
112
  !find(:meta, property: FALLBACK_ELEMENTS.fetch(tag))
117
113
  end
114
+
115
+ # Turns a set of filtered properties into a Hash where
116
+ # the default value is `nil`
117
+ def filtered_properties(filter)
118
+ keys = FINDERS.values.uniq
119
+ keys &= filter if filter.any?
120
+
121
+ Hash[keys.zip([])]
122
+ end
118
123
  end
119
124
  end
data/richurls.gemspec CHANGED
@@ -3,7 +3,7 @@ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
3
3
 
4
4
  Gem::Specification.new do |spec|
5
5
  spec.name = 'richurls'
6
- spec.version = '0.4.0'
6
+ spec.version = '0.5.0'
7
7
  spec.authors = ['grdw']
8
8
  spec.email = ['gerard@wetransfer.com']
9
9
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: richurls
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.0
4
+ version: 0.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - grdw
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-03-23 00:00:00.000000000 Z
11
+ date: 2020-03-25 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: oj