normalize_url 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +4 -1
- data/lib/normalize_url/normalizer.rb +34 -0
- data/lib/normalize_url/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 540321091211ecaa6d08afffbd12cd43437b648b
|
4
|
+
data.tar.gz: 61ecab0b4e64c33edfe120666f05f7636fbe51b7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f4234b44bc0e43f256885814b39ee38d3a39b37a0953bf14a18c75fb9a0e166789d8877fffdf4e244f8f0686d19276b8aabdf0e77f97c625d5a7bc0c0b77d3e9
|
7
|
+
data.tar.gz: 4de7be9fb6120d2d7a83aa244e32d88510b7fa03d18e15c0c4e8dfbbfb20fadc539c55de21b645e24557c11c8fcd06f210e333b28814c37db622705cd785cf5e
|
data/README.md
CHANGED
@@ -1,4 +1,7 @@
|
|
1
1
|
# NormalizeUrl
|
2
|
+
[](https://rubygems.org/gems/normalize_url)
|
3
|
+
[](http://travis-ci.org/rwz/normalize_url)
|
4
|
+
[](https://codeclimate.com/github/rwz/normalize_url)
|
2
5
|
|
3
6
|
This gem can normalize HTTP(S) URLs by applying a certain set of
|
4
7
|
transformations. After normalization, two different URLs that point to the same
|
@@ -45,7 +48,7 @@ Each tranformation could be skipped by demand. All you need is to pass it as a
|
|
45
48
|
optional value to `normalize` method:
|
46
49
|
|
47
50
|
```ruby
|
48
|
-
NormalizeUrl.
|
51
|
+
NormalizeUrl.process("http://example.com/foo/", remove_trailing_slash: false) # => "http://example.com/foo/"
|
49
52
|
```
|
50
53
|
|
51
54
|
## Transformations
|
@@ -1,18 +1,36 @@
|
|
1
|
+
require "set"
|
1
2
|
require "addressable/uri"
|
2
3
|
|
3
4
|
module NormalizeUrl
|
4
5
|
class Normalizer
|
5
6
|
attr_reader :uri, :options
|
6
7
|
|
8
|
+
TRACKING_QUERY_PARAMS = %w[
|
9
|
+
utm_source
|
10
|
+
utm_medium
|
11
|
+
utm_term
|
12
|
+
utm_content
|
13
|
+
utm_campaign
|
14
|
+
sms_ss
|
15
|
+
awesm
|
16
|
+
xtor
|
17
|
+
PHPSESSID
|
18
|
+
].to_set
|
19
|
+
|
7
20
|
def initialize(original_uri, options={})
|
8
21
|
@uri = Addressable::URI.parse(original_uri).normalize
|
9
22
|
@options = options
|
23
|
+
fail_uri "only absolute URLs can be normalized" unless uri.absolute?
|
24
|
+
fail_uri "only HTTP/HTTPS URLs can be normalized" unless uri.scheme =~ /https?/
|
25
|
+
rescue Addressable::URI::InvalidURIError
|
26
|
+
fail_uri "#{original_uri.inspect} is not a URL"
|
10
27
|
end
|
11
28
|
|
12
29
|
def normalize
|
13
30
|
process :remove_trailing_slash
|
14
31
|
process :remove_repeating_slashes
|
15
32
|
process :remove_hash
|
33
|
+
process :remove_tracking
|
16
34
|
process :sort_query
|
17
35
|
uri.to_s
|
18
36
|
end
|
@@ -42,5 +60,21 @@ module NormalizeUrl
|
|
42
60
|
def process_remove_repeating_slashes
|
43
61
|
uri.path = uri.path.squeeze(?/) if uri.host
|
44
62
|
end
|
63
|
+
|
64
|
+
def process_remove_tracking
|
65
|
+
return unless uri.query_values
|
66
|
+
original = uri.query_values
|
67
|
+
cleaned = original.reject{ |key, _| TRACKING_QUERY_PARAMS.include?(key) }
|
68
|
+
|
69
|
+
if cleaned.empty?
|
70
|
+
uri.query_values = nil
|
71
|
+
elsif cleaned != original
|
72
|
+
uri.query_values = cleaned
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
def fail_uri(message)
|
77
|
+
fail ArgumentError, message
|
78
|
+
end
|
45
79
|
end
|
46
80
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: normalize_url
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Pavel Pravosud
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-10-
|
11
|
+
date: 2014-10-18 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: addressable
|