url-privacy 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (4) hide show
  1. checksums.yaml +7 -0
  2. data/README.md +47 -0
  3. data/lib/url_privacy.rb +149 -0
  4. metadata +58 -0
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 13006f4f0e38cbc252c8d2f0dd4e6b2e48c278001c46e4292ecb1af961362c03
4
+ data.tar.gz: 448e7c4b11e2fb4c1c3cbfa58b59613715ffc27facc69106a4fc860560419278
5
+ SHA512:
6
+ metadata.gz: 9c27045c00b8b6142c8d2514b944eb07a3c2c818955467ddd4a6d75d8ad5ff72ba2bd85e767c1f5c5cd4ebdda0f5313b348d48222e065c7ddc8fae1000f7de1a
7
+ data.tar.gz: c466b66f6b0aa0b51d59f214646f45acd6b6a28a651980792df69889a1ee08382ec702e68aa934c13b02bf4ee0c02e7b9ca1e1f4583e26869e0046b47b22647f
@@ -0,0 +1,47 @@
1
+ # url-privacy
2
+
3
+ Removes tracking parameters from URLs.
4
+
5
+ ## Installation
6
+
7
+ Add this line to your Gemfile:
8
+
9
+ ```ruby
10
+ gem 'url-privacy'
11
+ ```
12
+
13
+ And then execute:
14
+
15
+ $ bundle
16
+
17
+ Or install it as:
18
+
19
+ $ gem install url-privacy
20
+
21
+ ## Usage
22
+
23
+ ```yaml
24
+ UrlPrivacy.clean url
25
+ ```
26
+
27
+ ## Contributing
28
+
29
+ Bug reports and pull requests are welcome on 0xacab.org at
30
+ <https://0xacab.org/sutty/url-privacy>. This project is intended to be
31
+ a safe, welcoming space for collaboration, and contributors are expected
32
+ to adhere to the [Sutty code of
33
+ conduct](https://sutty.nl/en/code-of-conduct/).
34
+
35
+ If you like our plugins, [please consider
36
+ donating](https://donaciones.sutty.nl/en/)!
37
+
38
+ ## License
39
+
40
+ The gem is available as free software under the terms of the LGPL3
41
+ License.
42
+
43
+ ## Code of Conduct
44
+
45
+ Everyone interacting in the url-privacy project’s codebases, issue
46
+ trackers, chat rooms and mailing lists is expected to follow the [code
47
+ of conduct](https://sutty.nl/en/code-of-conduct/).
@@ -0,0 +1,149 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Usage:
4
+ #
5
+ # UrlPrivacy.clean(url)
6
+ class UrlPrivacy
7
+ class << self
8
+
9
+ # Remove these params from URLs. Taken from Neat URL and
10
+ # CleanURLs plus some others manually found.
11
+ #
12
+ # @see {https://github.com/Smile4ever/Neat-URL}
13
+ # @see {https://gitlab.com/anti-tracking/ClearURLs/rules/-/blob/master/data.json}
14
+ # @see {https://github.com/Smile4ever/Neat-URL/issues/235}
15
+ TRACKING_PARAMS = %w[pf_rd_*@imdb.com ref_@imdb.com gclid ref
16
+ terminal_id igshid tracking_id action_object_map action_type_map
17
+ action_ref_map spm@*.aliexpress.com scm@*.aliexpress.com
18
+ aff_platform aff_trace_key algo_expid@*.aliexpress.*
19
+ algo_pvid@*.aliexpress.* btsid ws_ab_test pd_rd_*@amazon.*
20
+ _encoding@amazon.* psc@amazon.* tag@amazon.* ref_@amazon.*
21
+ pf_rd_*@amazon.* pf@amazon.* qid@amazon.* sr@amazon.*
22
+ srs@amazon.* __mk_*@amazon.* spIA@amazon.* ms3_c@amazon.*
23
+ ie*@amazon.* refRID@amazon.* colid@amazon.* coliid@amazon.*
24
+ *adId@amazon.* qualifier@amazon.* _encoding@amazon.*
25
+ smid@amazon.* field-lbr_brands_browse-bin@amazon.* ved@google.*
26
+ bi*@google.* gfe_*@google.* ei@google.* source@google.*
27
+ gs_*@google.* site@google.* oq@google.* esrc@google.*
28
+ uact@google.* cd@google.* cad@google.* gws_*@google.*
29
+ atyp@google.* vet@google.* zx@google.* _u@google.* je@google.*
30
+ dcr@google.* ie@google.* sei@google.* sa@google.* dpr@google.*
31
+ hl@google.* btn*@google.* sa@google.* usg@google.* cd@google.*
32
+ cad@google.* uact@google.* src@shutterstock.com
33
+ snr@steampowered.com wbdcd@tchibo.de smid@nytimes.com
34
+ spm@youku.com tpa@youku.com xid@prvnizpravy.cz pl@net-parade.it
35
+ u1@walmart.com* ath*@walmart.com* utm_* ga_source ga_medium
36
+ ga_term ga_content ga_campaign ga_place yclid _openstat
37
+ fb_action_ids fb_action_types fb_source fb_ref fbclid
38
+ action_object_map action_type_map action_ref_map gs_l mkt_tok
39
+ hmb_campaign hmb_medium hmb_source ref ref_ ref_*@twitter.com
40
+ src@twitter.com trackId@netflix.* tctx@netflix.* jb*@netflix.*
41
+ ncid@techcrunch.com sr@techcrunch.com sr_share@techcrunch.com
42
+ guccounter@techcrunch.com guce_referrer_*@techcrunch.com
43
+ cvid@bing.com form@bing.com sk@bing.com sp@bing.com sc@bing.com
44
+ qs@bing.com qp@bing.com nb@tweakers.net u@tweakers.net
45
+ tt_medium@twitch.* tt_content@twitch.* pk_campaign@vivaldi.com
46
+ pk_kwd@vivaldi.com from@indeed.com alid@indeed.com
47
+ *tk@indeed.com vss@hh.ru t@hh.ru swnt@hh.ru grpos@hh.ru
48
+ ptl@hh.ru stl@hh.ru exp@hh.ru plim@hh.ru _trkparms@ebay.*
49
+ _trksid@ebay.* _from@ebay.* ftag@cnet.com callback@bilibili.com
50
+ cvid@bing.com form@bing.com sk@bing.com sp@bing.com sc@bing.com
51
+ qs@bing.com pq@bing.com mkt_tok trk trkCampaign ga_* gclid
52
+ gclsrc hmb_campaign hmb_medium hmb_source spReportId spJobID
53
+ spUserID spMailingID itm_* s_cid elqTrackId elqTrack assetType
54
+ assetId recipientId campaignId siteId mc_cid mc_eid pk_*
55
+ sc_campaign sc_channel sc_content sc_medium sc_outcome sc_geo
56
+ sc_country utm_* nr_email_referer vero_conv vero_id yclid
57
+ _openstat mbid cmpid cid c_id campaign_id Campaign hash@ebay.*
58
+ fb_action_ids fb_action_types fb_ref fb_source fbclid
59
+ refsrc@facebook.com hrc@facebook.com gs_l gs_lcp@google.*
60
+ ved@google.* ei@google.* sei@google.* gws_rd@google.*
61
+ gs_gbg@google.* gs_mss@google.* gs_rn@google.* _hsenc _hsmi
62
+ __hssc __hstc hsCtaTracking source@sourceforge.net
63
+ position@sourceforge.net tt_medium tt_content lr@yandex.*
64
+ redircnt@yandex.* feature@youtube.com kw@youtube.com wt_zmc
65
+ source@google.* iflsig@google.* sclient@google.*
66
+ sfnsn@facebook.com extid@facebook.com d@facebook.com
67
+ vh@facebook.com hc_*@facebook.com *ref*@facebook.com
68
+ __tn__@facebook.com eid@facebook.com __xts__@facebook.com
69
+ so_medium@stackoverflow.com so_source@stackoverflow.com
70
+ preview_pb@tiktok.com _d@tiktok.com share_item_id@tiktok.com
71
+ timestamp@tiktok.com user_id@tiktok.com tt_from@tiktok.com
72
+ source@tiktok.com].uniq.freeze
73
+
74
+ # Clean the given URL. If the URL can't be parsed, returns the
75
+ # URL unmodified.
76
+ #
77
+ # Caches in case there're duplicates.
78
+ #
79
+ # @param [String]
80
+ # @return [String]
81
+ def clean(url)
82
+ @cleaned_urls ||= {}
83
+ @cleaned_urls[url] ||= begin
84
+ uri = URI(url)
85
+
86
+ if uri.query
87
+ hostname = uri.hostname.sub(/\Awww\./, '')
88
+ params = URI.decode_www_form(uri.query).to_h
89
+
90
+ # Remove params by name first
91
+ params.reject! do |param, _|
92
+ TRACKING_PARAMS.include? param
93
+ end
94
+
95
+ # Remove params with globs
96
+ params.reject! do |param, _|
97
+ simple_tracking_params.any? do |pattern_param|
98
+ File.fnmatch(pattern_param, param)
99
+ end
100
+ end
101
+
102
+ # Remove params matching by hostname and then param
103
+ params.reject! do |param, _|
104
+ complex_tracking_params.any? do |pattern_hostname, pattern_params|
105
+ next false unless File.fnmatch(pattern_hostname, hostname)
106
+
107
+ pattern_params.any? do |pattern_param|
108
+ File.fnmatch(pattern_param, param)
109
+ end
110
+ end
111
+ end
112
+
113
+ uri.query = URI.encode_www_form(params)
114
+ end
115
+
116
+ uri.to_s
117
+ end
118
+ rescue URI::Error
119
+ @cleaned_urls[url] ||= url
120
+ end
121
+
122
+ private
123
+
124
+ # This is all so we can just copy and paste from Neat URL source
125
+ # code, it produces a hash of hostname => [ params ] that can be
126
+ # glob-matched.
127
+ #
128
+ # @return [Hash]
129
+ def complex_tracking_params
130
+ @complex_tracking_params ||= TRACKING_PARAMS.map do |param|
131
+ next unless param.include? '@'
132
+
133
+ Hash[*param.split('@', 2).reverse]
134
+ end.compact.reduce({}) do |hash, pairs|
135
+ pairs.each do |key, value|
136
+ (hash[key] ||= []) << value
137
+ end
138
+
139
+ hash
140
+ end
141
+ end
142
+
143
+ def simple_tracking_params
144
+ @simple_tracking_params ||= TRACKING_PARAMS.select do |param|
145
+ !param.include?('@')
146
+ end
147
+ end
148
+ end
149
+ end
metadata ADDED
@@ -0,0 +1,58 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: url-privacy
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - f
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2021-01-31 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description:
14
+ email:
15
+ - f@sutty.nl
16
+ executables: []
17
+ extensions: []
18
+ extra_rdoc_files:
19
+ - README.md
20
+ files:
21
+ - README.md
22
+ - lib/url_privacy.rb
23
+ homepage: https://0xacab.org/sutty/url-privacy
24
+ licenses:
25
+ - LGPL-3.0
26
+ metadata:
27
+ bug_tracker_uri: https://0xacab.org/sutty/url-privacy/issues
28
+ homepage_uri: https://0xacab.org/sutty/url-privacy
29
+ source_code_uri: https://0xacab.org/sutty/url-privacy
30
+ changelog_uri: https://0xacab.org/sutty/url-privacy/-/blob/master/CHANGELOG.md
31
+ documentation_uri: https://rubydoc.info/gems/url-privacy
32
+ post_install_message:
33
+ rdoc_options:
34
+ - "--title"
35
+ - url-privacy - Removes tracking parameters from URLs
36
+ - "--main"
37
+ - README.md
38
+ - "--line-numbers"
39
+ - "--inline-source"
40
+ - "--quiet"
41
+ require_paths:
42
+ - lib
43
+ required_ruby_version: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: 2.6.0
48
+ required_rubygems_version: !ruby/object:Gem::Requirement
49
+ requirements:
50
+ - - ">="
51
+ - !ruby/object:Gem::Version
52
+ version: '0'
53
+ requirements: []
54
+ rubygems_version: 3.1.2
55
+ signing_key:
56
+ specification_version: 4
57
+ summary: Removes tracking parameters from URLs
58
+ test_files: []