normalize_url 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 8028e7b8023b9a5bd1eedb26cc0a3f827573b0b5
4
- data.tar.gz: abfdc72f52b71bcdb14d654f6f5c42fd9df3e682
3
+ metadata.gz: 540321091211ecaa6d08afffbd12cd43437b648b
4
+ data.tar.gz: 61ecab0b4e64c33edfe120666f05f7636fbe51b7
5
5
  SHA512:
6
- metadata.gz: 8c9d689c0e501cdc118c026842eb40e6f3ff5a546e61e0cbb333f628c6f385af20fa5eba3b0088026ed21d3f7692b3e60e5fa6b0195f798294ddfad4f049da02
7
- data.tar.gz: 43cfa103f3f30a8c5c8b5ea2f11ba2677950c2a19b7d9f5b10a3f883117a6830cfc2ec12352e81a5e4d955e055cc95af07d383438995ae05b747405cede52deb
6
+ metadata.gz: f4234b44bc0e43f256885814b39ee38d3a39b37a0953bf14a18c75fb9a0e166789d8877fffdf4e244f8f0686d19276b8aabdf0e77f97c625d5a7bc0c0b77d3e9
7
+ data.tar.gz: 4de7be9fb6120d2d7a83aa244e32d88510b7fa03d18e15c0c4e8dfbbfb20fadc539c55de21b645e24557c11c8fcd06f210e333b28814c37db622705cd785cf5e
data/README.md CHANGED
@@ -1,4 +1,7 @@
1
1
  # NormalizeUrl
2
+ [![Gem Version](https://img.shields.io/gem/v/normalize_url.svg)](https://rubygems.org/gems/normalize_url)
3
+ [![Build Status](https://img.shields.io/travis/rwz/normalize_url.svg)](http://travis-ci.org/rwz/normalize_url)
4
+ [![Code Climate](https://img.shields.io/codeclimate/github/rwz/normalize_url.svg)](https://codeclimate.com/github/rwz/normalize_url)
2
5
 
3
6
  This gem can normalize HTTP(S) URLs by applying a certain set of
4
7
  transformations. After normalization, two different URLs that point to the same
@@ -45,7 +48,7 @@ Each tranformation could be skipped by demand. All you need is to pass it as a
45
48
  optional value to `normalize` method:
46
49
 
47
50
  ```ruby
48
- NormalizeUrl.normalize("http://example.com/foo/", remove_trailing_slash: false) # => "http://example.com/foo/"
51
+ NormalizeUrl.process("http://example.com/foo/", remove_trailing_slash: false) # => "http://example.com/foo/"
49
52
  ```
50
53
 
51
54
  ## Transformations
@@ -1,18 +1,36 @@
1
+ require "set"
1
2
  require "addressable/uri"
2
3
 
3
4
  module NormalizeUrl
4
5
  class Normalizer
5
6
  attr_reader :uri, :options
6
7
 
8
+ TRACKING_QUERY_PARAMS = %w[
9
+ utm_source
10
+ utm_medium
11
+ utm_term
12
+ utm_content
13
+ utm_campaign
14
+ sms_ss
15
+ awesm
16
+ xtor
17
+ PHPSESSID
18
+ ].to_set
19
+
7
20
  def initialize(original_uri, options={})
8
21
  @uri = Addressable::URI.parse(original_uri).normalize
9
22
  @options = options
23
+ fail_uri "only absolute URLs can be normalized" unless uri.absolute?
24
+ fail_uri "only HTTP/HTTPS URLs can be normalized" unless uri.scheme =~ /https?/
25
+ rescue Addressable::URI::InvalidURIError
26
+ fail_uri "#{original_uri.inspect} is not a URL"
10
27
  end
11
28
 
12
29
  def normalize
13
30
  process :remove_trailing_slash
14
31
  process :remove_repeating_slashes
15
32
  process :remove_hash
33
+ process :remove_tracking
16
34
  process :sort_query
17
35
  uri.to_s
18
36
  end
@@ -42,5 +60,21 @@ module NormalizeUrl
42
60
  def process_remove_repeating_slashes
43
61
  uri.path = uri.path.squeeze(?/) if uri.host
44
62
  end
63
+
64
+ def process_remove_tracking
65
+ return unless uri.query_values
66
+ original = uri.query_values
67
+ cleaned = original.reject{ |key, _| TRACKING_QUERY_PARAMS.include?(key) }
68
+
69
+ if cleaned.empty?
70
+ uri.query_values = nil
71
+ elsif cleaned != original
72
+ uri.query_values = cleaned
73
+ end
74
+ end
75
+
76
+ def fail_uri(message)
77
+ fail ArgumentError, message
78
+ end
45
79
  end
46
80
  end
@@ -1,3 +1,3 @@
1
1
  module NormalizeUrl
2
- VERSION = "0.0.1"
2
+ VERSION = "0.0.2"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: normalize_url
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Pavel Pravosud
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-10-17 00:00:00.000000000 Z
11
+ date: 2014-10-18 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: addressable