normalize_url 0.0.1 → 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +4 -1
- data/lib/normalize_url/normalizer.rb +34 -0
- data/lib/normalize_url/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 540321091211ecaa6d08afffbd12cd43437b648b
|
4
|
+
data.tar.gz: 61ecab0b4e64c33edfe120666f05f7636fbe51b7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f4234b44bc0e43f256885814b39ee38d3a39b37a0953bf14a18c75fb9a0e166789d8877fffdf4e244f8f0686d19276b8aabdf0e77f97c625d5a7bc0c0b77d3e9
|
7
|
+
data.tar.gz: 4de7be9fb6120d2d7a83aa244e32d88510b7fa03d18e15c0c4e8dfbbfb20fadc539c55de21b645e24557c11c8fcd06f210e333b28814c37db622705cd785cf5e
|
data/README.md
CHANGED
@@ -1,4 +1,7 @@
|
|
1
1
|
# NormalizeUrl
|
2
|
+
[![Gem Version](https://img.shields.io/gem/v/normalize_url.svg)](https://rubygems.org/gems/normalize_url)
|
3
|
+
[![Build Status](https://img.shields.io/travis/rwz/normalize_url.svg)](http://travis-ci.org/rwz/normalize_url)
|
4
|
+
[![Code Climate](https://img.shields.io/codeclimate/github/rwz/normalize_url.svg)](https://codeclimate.com/github/rwz/normalize_url)
|
2
5
|
|
3
6
|
This gem can normalize HTTP(S) URLs by applying a certain set of
|
4
7
|
transformations. After normalization, two different URLs that point to the same
|
@@ -45,7 +48,7 @@ Each tranformation could be skipped by demand. All you need is to pass it as a
|
|
45
48
|
optional value to `normalize` method:
|
46
49
|
|
47
50
|
```ruby
|
48
|
-
NormalizeUrl.
|
51
|
+
NormalizeUrl.process("http://example.com/foo/", remove_trailing_slash: false) # => "http://example.com/foo/"
|
49
52
|
```
|
50
53
|
|
51
54
|
## Transformations
|
@@ -1,18 +1,36 @@
|
|
1
|
+
require "set"
|
1
2
|
require "addressable/uri"
|
2
3
|
|
3
4
|
module NormalizeUrl
|
4
5
|
class Normalizer
|
5
6
|
attr_reader :uri, :options
|
6
7
|
|
8
|
+
TRACKING_QUERY_PARAMS = %w[
|
9
|
+
utm_source
|
10
|
+
utm_medium
|
11
|
+
utm_term
|
12
|
+
utm_content
|
13
|
+
utm_campaign
|
14
|
+
sms_ss
|
15
|
+
awesm
|
16
|
+
xtor
|
17
|
+
PHPSESSID
|
18
|
+
].to_set
|
19
|
+
|
7
20
|
def initialize(original_uri, options={})
|
8
21
|
@uri = Addressable::URI.parse(original_uri).normalize
|
9
22
|
@options = options
|
23
|
+
fail_uri "only absolute URLs can be normalized" unless uri.absolute?
|
24
|
+
fail_uri "only HTTP/HTTPS URLs can be normalized" unless uri.scheme =~ /https?/
|
25
|
+
rescue Addressable::URI::InvalidURIError
|
26
|
+
fail_uri "#{original_uri.inspect} is not a URL"
|
10
27
|
end
|
11
28
|
|
12
29
|
def normalize
|
13
30
|
process :remove_trailing_slash
|
14
31
|
process :remove_repeating_slashes
|
15
32
|
process :remove_hash
|
33
|
+
process :remove_tracking
|
16
34
|
process :sort_query
|
17
35
|
uri.to_s
|
18
36
|
end
|
@@ -42,5 +60,21 @@ module NormalizeUrl
|
|
42
60
|
def process_remove_repeating_slashes
|
43
61
|
uri.path = uri.path.squeeze(?/) if uri.host
|
44
62
|
end
|
63
|
+
|
64
|
+
def process_remove_tracking
|
65
|
+
return unless uri.query_values
|
66
|
+
original = uri.query_values
|
67
|
+
cleaned = original.reject{ |key, _| TRACKING_QUERY_PARAMS.include?(key) }
|
68
|
+
|
69
|
+
if cleaned.empty?
|
70
|
+
uri.query_values = nil
|
71
|
+
elsif cleaned != original
|
72
|
+
uri.query_values = cleaned
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
def fail_uri(message)
|
77
|
+
fail ArgumentError, message
|
78
|
+
end
|
45
79
|
end
|
46
80
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: normalize_url
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Pavel Pravosud
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-10-
|
11
|
+
date: 2014-10-18 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: addressable
|