richurls 0.1.9 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +10 -8
- data/Gemfile.lock +4 -4
- data/README.md +19 -0
- data/lib/cache.rb +23 -0
- data/lib/parsers/favicon_parser.rb +14 -5
- data/lib/richurls.rb +14 -0
- data/lib/url_fetcher.rb +3 -7
- data/lib/xml_handler.rb +2 -10
- data/richurls.gemspec +2 -2
- metadata +10 -9
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c099711668f878f6935edf351d0c76735e30653f806a06f161c54f56d38e7191
|
4
|
+
data.tar.gz: 4559b4fe4fad8f175384a127a5ffb93f71e3ed92b2c2d54aa0c8e8fecf5f4828
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d8634f5865f47911969e0b7f3c99bfc8d63ab972a1858ea83b817db4cdf32f2d9958eb58b16a162bc81ff2948508163be2cdb46d5d223b69605d983ed13d1c08
|
7
|
+
data.tar.gz: c8a0b54e6adb5bd6e0a87c75745a38849c1c226a562ef3728df48e8f51d3a6fab30aa7930b57eb7fac9d6d445ce0630e4f2953917d36ac19a16eb9dcbc41882d
|
data/.rubocop.yml
CHANGED
@@ -3,29 +3,31 @@ AllCops:
|
|
3
3
|
|
4
4
|
Lint/SuppressedException:
|
5
5
|
Enabled: false
|
6
|
-
|
7
6
|
Lint/AssignmentInCondition:
|
8
7
|
Exclude:
|
9
8
|
- 'spec/**/*.rb'
|
9
|
+
|
10
10
|
Metrics/BlockLength:
|
11
11
|
Exclude:
|
12
12
|
- 'spec/**/*.rb'
|
13
13
|
- '*.gemspec'
|
14
|
-
|
14
|
+
Metrics/MethodLength:
|
15
15
|
Enabled: false
|
16
|
+
|
17
|
+
Style/BlockDelimiters:
|
18
|
+
Exclude:
|
19
|
+
- 'spec/**/*.rb'
|
16
20
|
Style/Documentation:
|
17
21
|
Enabled: false
|
18
|
-
# This is causing breaking changes:
|
19
22
|
Style/ExpandPathArguments:
|
20
23
|
Enabled: false
|
24
|
+
Style/FormatString:
|
25
|
+
Enabled: false
|
21
26
|
Style/FrozenStringLiteralComment:
|
22
27
|
Enabled: false
|
23
28
|
Style/GuardClause:
|
24
29
|
Enabled: false
|
25
|
-
|
30
|
+
Naming/MemoizedInstanceVariableName:
|
26
31
|
Enabled: false
|
27
|
-
Style/
|
28
|
-
Exclude:
|
29
|
-
- 'spec/**/*.rb'
|
30
|
-
Metrics/MethodLength:
|
32
|
+
Style/RegexpLiteral:
|
31
33
|
Enabled: false
|
data/Gemfile.lock
CHANGED
@@ -1,11 +1,10 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
richurls (0.
|
4
|
+
richurls (0.2.0)
|
5
5
|
oj (~> 3)
|
6
6
|
ox (~> 2)
|
7
7
|
patron (~> 0.13)
|
8
|
-
redis (~> 4.1)
|
9
8
|
|
10
9
|
GEM
|
11
10
|
remote: https://rubygems.org/
|
@@ -13,8 +12,8 @@ GEM
|
|
13
12
|
ast (2.4.0)
|
14
13
|
diff-lcs (1.3)
|
15
14
|
jaro_winkler (1.5.4)
|
16
|
-
oj (3.10.
|
17
|
-
ox (2.
|
15
|
+
oj (3.10.2)
|
16
|
+
ox (2.13.2)
|
18
17
|
parallel (1.19.1)
|
19
18
|
parser (2.7.0.2)
|
20
19
|
ast (~> 2.4.0)
|
@@ -49,6 +48,7 @@ PLATFORMS
|
|
49
48
|
|
50
49
|
DEPENDENCIES
|
51
50
|
bundler (~> 2.1)
|
51
|
+
redis (~> 4.1)
|
52
52
|
richurls!
|
53
53
|
rspec (~> 3.9)
|
54
54
|
rubocop (~> 0.79)
|
data/README.md
CHANGED
@@ -25,3 +25,22 @@ RichUrls.enrich('https://wetransfer.com')
|
|
25
25
|
# }
|
26
26
|
```
|
27
27
|
|
28
|
+
**Caching:**
|
29
|
+
|
30
|
+
By default caching is turned off. Caching can be enabled by writing a cache wrapper as such:
|
31
|
+
|
32
|
+
```ruby
|
33
|
+
class CustomCache < Cache::Wrapper
|
34
|
+
def get(key)
|
35
|
+
# Callback for fetching a cache entry
|
36
|
+
end
|
37
|
+
|
38
|
+
def set(key, value, time)
|
39
|
+
# Callback for setting a value in a cache to a certain key for a certain time
|
40
|
+
end
|
41
|
+
|
42
|
+
def extend(key, time)
|
43
|
+
# Callback for extending a cached value by key by a certain time
|
44
|
+
end
|
45
|
+
end
|
46
|
+
```
|
data/lib/cache.rb
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
module Cache
|
2
|
+
class Wrapper
|
3
|
+
def get(_key)
|
4
|
+
raise NotImplementedError, 'wrapper needs `get` method'
|
5
|
+
end
|
6
|
+
|
7
|
+
def set(_key, _value, _time)
|
8
|
+
raise NotImplementedError, 'wrapper needs `set` method'
|
9
|
+
end
|
10
|
+
|
11
|
+
def extend(_key, _time)
|
12
|
+
raise NotImplementedError, 'wrapper needs `extend` method'
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
class None < Wrapper
|
17
|
+
def get(_); end
|
18
|
+
|
19
|
+
def set(_, _, _); end
|
20
|
+
|
21
|
+
def extend(_); end
|
22
|
+
end
|
23
|
+
end
|
@@ -1,9 +1,18 @@
|
|
1
1
|
module Parsers
|
2
|
-
FaviconParser
|
3
|
-
|
4
|
-
icon_el = document.find(:link, rel: %w[icon])
|
2
|
+
module FaviconParser
|
3
|
+
KEYWORDS = ['shortcut icon', 'icon shortcut', 'icon'].freeze
|
5
4
|
|
6
|
-
|
7
|
-
|
5
|
+
def self.call(document, url)
|
6
|
+
KEYWORDS.each do |rel|
|
7
|
+
found_document = document.find(:link, rel: rel)
|
8
|
+
|
9
|
+
if found_document
|
10
|
+
@el = found_document
|
11
|
+
break
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
@el && UrlHelper.url_for(url, @el.attributes[:href])
|
16
|
+
end
|
8
17
|
end
|
9
18
|
end
|
data/lib/richurls.rb
CHANGED
@@ -3,12 +3,26 @@ require 'redis'
|
|
3
3
|
require 'uri'
|
4
4
|
require 'digest'
|
5
5
|
|
6
|
+
require_relative 'cache'
|
6
7
|
require_relative 'url_fetcher'
|
7
8
|
require_relative 'body_decorator'
|
8
9
|
|
9
10
|
module RichUrls
|
10
11
|
class MalformedURLError < StandardError; end
|
11
12
|
|
13
|
+
def self.cache
|
14
|
+
@cache || Cache::None.new
|
15
|
+
end
|
16
|
+
|
17
|
+
def self.cache=(wrapper)
|
18
|
+
unless wrapper < Cache::Wrapper
|
19
|
+
raise ArgumentError,
|
20
|
+
'caching wrapper needs to be of type Cache::Wrapper'
|
21
|
+
end
|
22
|
+
|
23
|
+
@cache ||= wrapper.new
|
24
|
+
end
|
25
|
+
|
12
26
|
def self.enrich(url)
|
13
27
|
unless URI::DEFAULT_PARSER.make_regexp.match?(url)
|
14
28
|
raise MalformedURLError, "this url is malformed: #{url}"
|
data/lib/url_fetcher.rb
CHANGED
@@ -18,10 +18,10 @@ module RichUrls
|
|
18
18
|
end
|
19
19
|
|
20
20
|
def fetch
|
21
|
-
cached =
|
21
|
+
cached = RichUrls.cache.get(digest)
|
22
22
|
|
23
23
|
if cached
|
24
|
-
|
24
|
+
RichUrls.cache.extend(digest, CACHE_TIME)
|
25
25
|
Oj.load(cached)
|
26
26
|
else
|
27
27
|
patron_call
|
@@ -30,10 +30,6 @@ module RichUrls
|
|
30
30
|
|
31
31
|
private
|
32
32
|
|
33
|
-
def redis
|
34
|
-
@redis ||= Redis.new
|
35
|
-
end
|
36
|
-
|
37
33
|
def digest
|
38
34
|
@digest ||= Digest::MD5.hexdigest(@url)
|
39
35
|
end
|
@@ -44,7 +40,7 @@ module RichUrls
|
|
44
40
|
|
45
41
|
if response.status < 400
|
46
42
|
decorated = BodyDecorator.new(response.url, response.body).decorate
|
47
|
-
|
43
|
+
RichUrls.cache.set(digest, Oj.dump(decorated), CACHE_TIME)
|
48
44
|
decorated
|
49
45
|
else
|
50
46
|
raise UrlFetcherError, 'url cannot be found'
|
data/lib/xml_handler.rb
CHANGED
@@ -26,17 +26,9 @@ module RichUrls
|
|
26
26
|
@elements = []
|
27
27
|
end
|
28
28
|
|
29
|
-
def find(name,
|
29
|
+
def find(name, attrs = {})
|
30
30
|
@elements.detect do |el|
|
31
|
-
matching_attributes =
|
32
|
-
if val.is_a?(Array)
|
33
|
-
result = el.attributes.fetch(key, '').split(' ')
|
34
|
-
|
35
|
-
val.all? { |sub_val| result.include?(sub_val) }
|
36
|
-
else
|
37
|
-
el.attributes[key] == val
|
38
|
-
end
|
39
|
-
end
|
31
|
+
matching_attributes = attrs.all? { |k, v| el.attributes[k] == v }
|
40
32
|
|
41
33
|
el.name == name && matching_attributes
|
42
34
|
end
|
data/richurls.gemspec
CHANGED
@@ -3,7 +3,7 @@ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
|
3
3
|
|
4
4
|
Gem::Specification.new do |spec|
|
5
5
|
spec.name = 'richurls'
|
6
|
-
spec.version = '0.
|
6
|
+
spec.version = '0.2.0'
|
7
7
|
spec.authors = ['grdw']
|
8
8
|
spec.email = ['gerard@wetransfer.com']
|
9
9
|
|
@@ -31,8 +31,8 @@ Gem::Specification.new do |spec|
|
|
31
31
|
spec.add_dependency 'oj', '~> 3'
|
32
32
|
spec.add_dependency 'ox', '~> 2'
|
33
33
|
spec.add_dependency 'patron', '~> 0.13'
|
34
|
-
spec.add_dependency 'redis', '~> 4.1'
|
35
34
|
spec.add_development_dependency 'bundler', '~> 2.1'
|
35
|
+
spec.add_development_dependency 'redis', '~> 4.1'
|
36
36
|
spec.add_development_dependency 'rspec', '~> 3.9'
|
37
37
|
spec.add_development_dependency 'rubocop', '~> 0.79'
|
38
38
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: richurls
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- grdw
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
11
|
+
date: 2020-02-21 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: oj
|
@@ -53,33 +53,33 @@ dependencies:
|
|
53
53
|
- !ruby/object:Gem::Version
|
54
54
|
version: '0.13'
|
55
55
|
- !ruby/object:Gem::Dependency
|
56
|
-
name:
|
56
|
+
name: bundler
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
58
58
|
requirements:
|
59
59
|
- - "~>"
|
60
60
|
- !ruby/object:Gem::Version
|
61
|
-
version: '
|
62
|
-
type: :
|
61
|
+
version: '2.1'
|
62
|
+
type: :development
|
63
63
|
prerelease: false
|
64
64
|
version_requirements: !ruby/object:Gem::Requirement
|
65
65
|
requirements:
|
66
66
|
- - "~>"
|
67
67
|
- !ruby/object:Gem::Version
|
68
|
-
version: '
|
68
|
+
version: '2.1'
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
|
-
name:
|
70
|
+
name: redis
|
71
71
|
requirement: !ruby/object:Gem::Requirement
|
72
72
|
requirements:
|
73
73
|
- - "~>"
|
74
74
|
- !ruby/object:Gem::Version
|
75
|
-
version: '
|
75
|
+
version: '4.1'
|
76
76
|
type: :development
|
77
77
|
prerelease: false
|
78
78
|
version_requirements: !ruby/object:Gem::Requirement
|
79
79
|
requirements:
|
80
80
|
- - "~>"
|
81
81
|
- !ruby/object:Gem::Version
|
82
|
-
version: '
|
82
|
+
version: '4.1'
|
83
83
|
- !ruby/object:Gem::Dependency
|
84
84
|
name: rspec
|
85
85
|
requirement: !ruby/object:Gem::Requirement
|
@@ -123,6 +123,7 @@ files:
|
|
123
123
|
- LICENSE
|
124
124
|
- README.md
|
125
125
|
- lib/body_decorator.rb
|
126
|
+
- lib/cache.rb
|
126
127
|
- lib/parsers/description_parser.rb
|
127
128
|
- lib/parsers/embed_parser.rb
|
128
129
|
- lib/parsers/embed_parsers/base.rb
|