richurls 0.1.9 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +10 -8
- data/Gemfile.lock +4 -4
- data/README.md +19 -0
- data/lib/cache.rb +23 -0
- data/lib/parsers/favicon_parser.rb +14 -5
- data/lib/richurls.rb +14 -0
- data/lib/url_fetcher.rb +3 -7
- data/lib/xml_handler.rb +2 -10
- data/richurls.gemspec +2 -2
- metadata +10 -9
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c099711668f878f6935edf351d0c76735e30653f806a06f161c54f56d38e7191
|
4
|
+
data.tar.gz: 4559b4fe4fad8f175384a127a5ffb93f71e3ed92b2c2d54aa0c8e8fecf5f4828
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d8634f5865f47911969e0b7f3c99bfc8d63ab972a1858ea83b817db4cdf32f2d9958eb58b16a162bc81ff2948508163be2cdb46d5d223b69605d983ed13d1c08
|
7
|
+
data.tar.gz: c8a0b54e6adb5bd6e0a87c75745a38849c1c226a562ef3728df48e8f51d3a6fab30aa7930b57eb7fac9d6d445ce0630e4f2953917d36ac19a16eb9dcbc41882d
|
data/.rubocop.yml
CHANGED
@@ -3,29 +3,31 @@ AllCops:
|
|
3
3
|
|
4
4
|
Lint/SuppressedException:
|
5
5
|
Enabled: false
|
6
|
-
|
7
6
|
Lint/AssignmentInCondition:
|
8
7
|
Exclude:
|
9
8
|
- 'spec/**/*.rb'
|
9
|
+
|
10
10
|
Metrics/BlockLength:
|
11
11
|
Exclude:
|
12
12
|
- 'spec/**/*.rb'
|
13
13
|
- '*.gemspec'
|
14
|
-
|
14
|
+
Metrics/MethodLength:
|
15
15
|
Enabled: false
|
16
|
+
|
17
|
+
Style/BlockDelimiters:
|
18
|
+
Exclude:
|
19
|
+
- 'spec/**/*.rb'
|
16
20
|
Style/Documentation:
|
17
21
|
Enabled: false
|
18
|
-
# This is causing breaking changes:
|
19
22
|
Style/ExpandPathArguments:
|
20
23
|
Enabled: false
|
24
|
+
Style/FormatString:
|
25
|
+
Enabled: false
|
21
26
|
Style/FrozenStringLiteralComment:
|
22
27
|
Enabled: false
|
23
28
|
Style/GuardClause:
|
24
29
|
Enabled: false
|
25
|
-
|
30
|
+
Naming/MemoizedInstanceVariableName:
|
26
31
|
Enabled: false
|
27
|
-
Style/
|
28
|
-
Exclude:
|
29
|
-
- 'spec/**/*.rb'
|
30
|
-
Metrics/MethodLength:
|
32
|
+
Style/RegexpLiteral:
|
31
33
|
Enabled: false
|
data/Gemfile.lock
CHANGED
@@ -1,11 +1,10 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
richurls (0.
|
4
|
+
richurls (0.2.0)
|
5
5
|
oj (~> 3)
|
6
6
|
ox (~> 2)
|
7
7
|
patron (~> 0.13)
|
8
|
-
redis (~> 4.1)
|
9
8
|
|
10
9
|
GEM
|
11
10
|
remote: https://rubygems.org/
|
@@ -13,8 +12,8 @@ GEM
|
|
13
12
|
ast (2.4.0)
|
14
13
|
diff-lcs (1.3)
|
15
14
|
jaro_winkler (1.5.4)
|
16
|
-
oj (3.10.
|
17
|
-
ox (2.
|
15
|
+
oj (3.10.2)
|
16
|
+
ox (2.13.2)
|
18
17
|
parallel (1.19.1)
|
19
18
|
parser (2.7.0.2)
|
20
19
|
ast (~> 2.4.0)
|
@@ -49,6 +48,7 @@ PLATFORMS
|
|
49
48
|
|
50
49
|
DEPENDENCIES
|
51
50
|
bundler (~> 2.1)
|
51
|
+
redis (~> 4.1)
|
52
52
|
richurls!
|
53
53
|
rspec (~> 3.9)
|
54
54
|
rubocop (~> 0.79)
|
data/README.md
CHANGED
@@ -25,3 +25,22 @@ RichUrls.enrich('https://wetransfer.com')
|
|
25
25
|
# }
|
26
26
|
```
|
27
27
|
|
28
|
+
**Caching:**
|
29
|
+
|
30
|
+
By default caching is turned off. Caching can be enabled by writing a cache wrapper as such:
|
31
|
+
|
32
|
+
```ruby
|
33
|
+
class CustomCache < Cache::Wrapper
|
34
|
+
def get(key)
|
35
|
+
# Callback for fetching a cache entry
|
36
|
+
end
|
37
|
+
|
38
|
+
def set(key, value, time)
|
39
|
+
# Callback for setting a value in a cache to a certain key for a certain time
|
40
|
+
end
|
41
|
+
|
42
|
+
def extend(key, time)
|
43
|
+
# Callback for extending a cached value by key by a certain time
|
44
|
+
end
|
45
|
+
end
|
46
|
+
```
|
data/lib/cache.rb
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
module Cache
|
2
|
+
class Wrapper
|
3
|
+
def get(_key)
|
4
|
+
raise NotImplementedError, 'wrapper needs `get` method'
|
5
|
+
end
|
6
|
+
|
7
|
+
def set(_key, _value, _time)
|
8
|
+
raise NotImplementedError, 'wrapper needs `set` method'
|
9
|
+
end
|
10
|
+
|
11
|
+
def extend(_key, _time)
|
12
|
+
raise NotImplementedError, 'wrapper needs `extend` method'
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
class None < Wrapper
|
17
|
+
def get(_); end
|
18
|
+
|
19
|
+
def set(_, _, _); end
|
20
|
+
|
21
|
+
def extend(_); end
|
22
|
+
end
|
23
|
+
end
|
@@ -1,9 +1,18 @@
|
|
1
1
|
module Parsers
|
2
|
-
FaviconParser
|
3
|
-
|
4
|
-
icon_el = document.find(:link, rel: %w[icon])
|
2
|
+
module FaviconParser
|
3
|
+
KEYWORDS = ['shortcut icon', 'icon shortcut', 'icon'].freeze
|
5
4
|
|
6
|
-
|
7
|
-
|
5
|
+
def self.call(document, url)
|
6
|
+
KEYWORDS.each do |rel|
|
7
|
+
found_document = document.find(:link, rel: rel)
|
8
|
+
|
9
|
+
if found_document
|
10
|
+
@el = found_document
|
11
|
+
break
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
@el && UrlHelper.url_for(url, @el.attributes[:href])
|
16
|
+
end
|
8
17
|
end
|
9
18
|
end
|
data/lib/richurls.rb
CHANGED
@@ -3,12 +3,26 @@ require 'redis'
|
|
3
3
|
require 'uri'
|
4
4
|
require 'digest'
|
5
5
|
|
6
|
+
require_relative 'cache'
|
6
7
|
require_relative 'url_fetcher'
|
7
8
|
require_relative 'body_decorator'
|
8
9
|
|
9
10
|
module RichUrls
|
10
11
|
class MalformedURLError < StandardError; end
|
11
12
|
|
13
|
+
def self.cache
|
14
|
+
@cache || Cache::None.new
|
15
|
+
end
|
16
|
+
|
17
|
+
def self.cache=(wrapper)
|
18
|
+
unless wrapper < Cache::Wrapper
|
19
|
+
raise ArgumentError,
|
20
|
+
'caching wrapper needs to be of type Cache::Wrapper'
|
21
|
+
end
|
22
|
+
|
23
|
+
@cache ||= wrapper.new
|
24
|
+
end
|
25
|
+
|
12
26
|
def self.enrich(url)
|
13
27
|
unless URI::DEFAULT_PARSER.make_regexp.match?(url)
|
14
28
|
raise MalformedURLError, "this url is malformed: #{url}"
|
data/lib/url_fetcher.rb
CHANGED
@@ -18,10 +18,10 @@ module RichUrls
|
|
18
18
|
end
|
19
19
|
|
20
20
|
def fetch
|
21
|
-
cached =
|
21
|
+
cached = RichUrls.cache.get(digest)
|
22
22
|
|
23
23
|
if cached
|
24
|
-
|
24
|
+
RichUrls.cache.extend(digest, CACHE_TIME)
|
25
25
|
Oj.load(cached)
|
26
26
|
else
|
27
27
|
patron_call
|
@@ -30,10 +30,6 @@ module RichUrls
|
|
30
30
|
|
31
31
|
private
|
32
32
|
|
33
|
-
def redis
|
34
|
-
@redis ||= Redis.new
|
35
|
-
end
|
36
|
-
|
37
33
|
def digest
|
38
34
|
@digest ||= Digest::MD5.hexdigest(@url)
|
39
35
|
end
|
@@ -44,7 +40,7 @@ module RichUrls
|
|
44
40
|
|
45
41
|
if response.status < 400
|
46
42
|
decorated = BodyDecorator.new(response.url, response.body).decorate
|
47
|
-
|
43
|
+
RichUrls.cache.set(digest, Oj.dump(decorated), CACHE_TIME)
|
48
44
|
decorated
|
49
45
|
else
|
50
46
|
raise UrlFetcherError, 'url cannot be found'
|
data/lib/xml_handler.rb
CHANGED
@@ -26,17 +26,9 @@ module RichUrls
|
|
26
26
|
@elements = []
|
27
27
|
end
|
28
28
|
|
29
|
-
def find(name,
|
29
|
+
def find(name, attrs = {})
|
30
30
|
@elements.detect do |el|
|
31
|
-
matching_attributes =
|
32
|
-
if val.is_a?(Array)
|
33
|
-
result = el.attributes.fetch(key, '').split(' ')
|
34
|
-
|
35
|
-
val.all? { |sub_val| result.include?(sub_val) }
|
36
|
-
else
|
37
|
-
el.attributes[key] == val
|
38
|
-
end
|
39
|
-
end
|
31
|
+
matching_attributes = attrs.all? { |k, v| el.attributes[k] == v }
|
40
32
|
|
41
33
|
el.name == name && matching_attributes
|
42
34
|
end
|
data/richurls.gemspec
CHANGED
@@ -3,7 +3,7 @@ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
|
3
3
|
|
4
4
|
Gem::Specification.new do |spec|
|
5
5
|
spec.name = 'richurls'
|
6
|
-
spec.version = '0.
|
6
|
+
spec.version = '0.2.0'
|
7
7
|
spec.authors = ['grdw']
|
8
8
|
spec.email = ['gerard@wetransfer.com']
|
9
9
|
|
@@ -31,8 +31,8 @@ Gem::Specification.new do |spec|
|
|
31
31
|
spec.add_dependency 'oj', '~> 3'
|
32
32
|
spec.add_dependency 'ox', '~> 2'
|
33
33
|
spec.add_dependency 'patron', '~> 0.13'
|
34
|
-
spec.add_dependency 'redis', '~> 4.1'
|
35
34
|
spec.add_development_dependency 'bundler', '~> 2.1'
|
35
|
+
spec.add_development_dependency 'redis', '~> 4.1'
|
36
36
|
spec.add_development_dependency 'rspec', '~> 3.9'
|
37
37
|
spec.add_development_dependency 'rubocop', '~> 0.79'
|
38
38
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: richurls
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- grdw
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
11
|
+
date: 2020-02-21 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: oj
|
@@ -53,33 +53,33 @@ dependencies:
|
|
53
53
|
- !ruby/object:Gem::Version
|
54
54
|
version: '0.13'
|
55
55
|
- !ruby/object:Gem::Dependency
|
56
|
-
name:
|
56
|
+
name: bundler
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
58
58
|
requirements:
|
59
59
|
- - "~>"
|
60
60
|
- !ruby/object:Gem::Version
|
61
|
-
version: '
|
62
|
-
type: :
|
61
|
+
version: '2.1'
|
62
|
+
type: :development
|
63
63
|
prerelease: false
|
64
64
|
version_requirements: !ruby/object:Gem::Requirement
|
65
65
|
requirements:
|
66
66
|
- - "~>"
|
67
67
|
- !ruby/object:Gem::Version
|
68
|
-
version: '
|
68
|
+
version: '2.1'
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
|
-
name:
|
70
|
+
name: redis
|
71
71
|
requirement: !ruby/object:Gem::Requirement
|
72
72
|
requirements:
|
73
73
|
- - "~>"
|
74
74
|
- !ruby/object:Gem::Version
|
75
|
-
version: '
|
75
|
+
version: '4.1'
|
76
76
|
type: :development
|
77
77
|
prerelease: false
|
78
78
|
version_requirements: !ruby/object:Gem::Requirement
|
79
79
|
requirements:
|
80
80
|
- - "~>"
|
81
81
|
- !ruby/object:Gem::Version
|
82
|
-
version: '
|
82
|
+
version: '4.1'
|
83
83
|
- !ruby/object:Gem::Dependency
|
84
84
|
name: rspec
|
85
85
|
requirement: !ruby/object:Gem::Requirement
|
@@ -123,6 +123,7 @@ files:
|
|
123
123
|
- LICENSE
|
124
124
|
- README.md
|
125
125
|
- lib/body_decorator.rb
|
126
|
+
- lib/cache.rb
|
126
127
|
- lib/parsers/description_parser.rb
|
127
128
|
- lib/parsers/embed_parser.rb
|
128
129
|
- lib/parsers/embed_parsers/base.rb
|