url-privacy 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (4) hide show
  1. checksums.yaml +7 -0
  2. data/README.md +47 -0
  3. data/lib/url_privacy.rb +149 -0
  4. metadata +58 -0
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 13006f4f0e38cbc252c8d2f0dd4e6b2e48c278001c46e4292ecb1af961362c03
4
+ data.tar.gz: 448e7c4b11e2fb4c1c3cbfa58b59613715ffc27facc69106a4fc860560419278
5
+ SHA512:
6
+ metadata.gz: 9c27045c00b8b6142c8d2514b944eb07a3c2c818955467ddd4a6d75d8ad5ff72ba2bd85e767c1f5c5cd4ebdda0f5313b348d48222e065c7ddc8fae1000f7de1a
7
+ data.tar.gz: c466b66f6b0aa0b51d59f214646f45acd6b6a28a651980792df69889a1ee08382ec702e68aa934c13b02bf4ee0c02e7b9ca1e1f4583e26869e0046b47b22647f
@@ -0,0 +1,47 @@
1
+ # url-privacy
2
+
3
+ Removes tracking parameters from URLs.
4
+
5
+ ## Installation
6
+
7
+ Add this line to your Gemfile:
8
+
9
+ ```ruby
10
+ gem 'url-privacy'
11
+ ```
12
+
13
+ And then execute:
14
+
15
+ $ bundle
16
+
17
+ Or install it as:
18
+
19
+ $ gem install url-privacy
20
+
21
+ ## Usage
22
+
23
+ ```yaml
24
+ UrlPrivacy.clean url
25
+ ```
26
+
27
+ ## Contributing
28
+
29
+ Bug reports and pull requests are welcome on 0xacab.org at
30
+ <https://0xacab.org/sutty/url-privacy>. This project is intended to be
31
+ a safe, welcoming space for collaboration, and contributors are expected
32
+ to adhere to the [Sutty code of
33
+ conduct](https://sutty.nl/en/code-of-conduct/).
34
+
35
+ If you like our plugins, [please consider
36
+ donating](https://donaciones.sutty.nl/en/)!
37
+
38
+ ## License
39
+
40
+ The gem is available as free software under the terms of the LGPL3
41
+ License.
42
+
43
+ ## Code of Conduct
44
+
45
+ Everyone interacting in the url-privacy project’s codebases, issue
46
+ trackers, chat rooms and mailing lists is expected to follow the [code
47
+ of conduct](https://sutty.nl/en/code-of-conduct/).
@@ -0,0 +1,149 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Usage:
4
+ #
5
+ # UrlPrivacy.clean(url)
6
+ class UrlPrivacy
7
+ class << self
8
+
9
+ # Remove these params from URLs. Taken from Neat URL and
10
+ # CleanURLs plus some others manually found.
11
+ #
12
+ # @see {https://github.com/Smile4ever/Neat-URL}
13
+ # @see {https://gitlab.com/anti-tracking/ClearURLs/rules/-/blob/master/data.json}
14
+ # @see {https://github.com/Smile4ever/Neat-URL/issues/235}
15
+ TRACKING_PARAMS = %w[pf_rd_*@imdb.com ref_@imdb.com gclid ref
16
+ terminal_id igshid tracking_id action_object_map action_type_map
17
+ action_ref_map spm@*.aliexpress.com scm@*.aliexpress.com
18
+ aff_platform aff_trace_key algo_expid@*.aliexpress.*
19
+ algo_pvid@*.aliexpress.* btsid ws_ab_test pd_rd_*@amazon.*
20
+ _encoding@amazon.* psc@amazon.* tag@amazon.* ref_@amazon.*
21
+ pf_rd_*@amazon.* pf@amazon.* qid@amazon.* sr@amazon.*
22
+ srs@amazon.* __mk_*@amazon.* spIA@amazon.* ms3_c@amazon.*
23
+ ie*@amazon.* refRID@amazon.* colid@amazon.* coliid@amazon.*
24
+ *adId@amazon.* qualifier@amazon.* _encoding@amazon.*
25
+ smid@amazon.* field-lbr_brands_browse-bin@amazon.* ved@google.*
26
+ bi*@google.* gfe_*@google.* ei@google.* source@google.*
27
+ gs_*@google.* site@google.* oq@google.* esrc@google.*
28
+ uact@google.* cd@google.* cad@google.* gws_*@google.*
29
+ atyp@google.* vet@google.* zx@google.* _u@google.* je@google.*
30
+ dcr@google.* ie@google.* sei@google.* sa@google.* dpr@google.*
31
+ hl@google.* btn*@google.* sa@google.* usg@google.* cd@google.*
32
+ cad@google.* uact@google.* src@shutterstock.com
33
+ snr@steampowered.com wbdcd@tchibo.de smid@nytimes.com
34
+ spm@youku.com tpa@youku.com xid@prvnizpravy.cz pl@net-parade.it
35
+ u1@walmart.com* ath*@walmart.com* utm_* ga_source ga_medium
36
+ ga_term ga_content ga_campaign ga_place yclid _openstat
37
+ fb_action_ids fb_action_types fb_source fb_ref fbclid
38
+ action_object_map action_type_map action_ref_map gs_l mkt_tok
39
+ hmb_campaign hmb_medium hmb_source ref ref_ ref_*@twitter.com
40
+ src@twitter.com trackId@netflix.* tctx@netflix.* jb*@netflix.*
41
+ ncid@techcrunch.com sr@techcrunch.com sr_share@techcrunch.com
42
+ guccounter@techcrunch.com guce_referrer_*@techcrunch.com
43
+ cvid@bing.com form@bing.com sk@bing.com sp@bing.com sc@bing.com
44
+ qs@bing.com qp@bing.com nb@tweakers.net u@tweakers.net
45
+ tt_medium@twitch.* tt_content@twitch.* pk_campaign@vivaldi.com
46
+ pk_kwd@vivaldi.com from@indeed.com alid@indeed.com
47
+ *tk@indeed.com vss@hh.ru t@hh.ru swnt@hh.ru grpos@hh.ru
48
+ ptl@hh.ru stl@hh.ru exp@hh.ru plim@hh.ru _trkparms@ebay.*
49
+ _trksid@ebay.* _from@ebay.* ftag@cnet.com callback@bilibili.com
50
+ cvid@bing.com form@bing.com sk@bing.com sp@bing.com sc@bing.com
51
+ qs@bing.com pq@bing.com mkt_tok trk trkCampaign ga_* gclid
52
+ gclsrc hmb_campaign hmb_medium hmb_source spReportId spJobID
53
+ spUserID spMailingID itm_* s_cid elqTrackId elqTrack assetType
54
+ assetId recipientId campaignId siteId mc_cid mc_eid pk_*
55
+ sc_campaign sc_channel sc_content sc_medium sc_outcome sc_geo
56
+ sc_country utm_* nr_email_referer vero_conv vero_id yclid
57
+ _openstat mbid cmpid cid c_id campaign_id Campaign hash@ebay.*
58
+ fb_action_ids fb_action_types fb_ref fb_source fbclid
59
+ refsrc@facebook.com hrc@facebook.com gs_l gs_lcp@google.*
60
+ ved@google.* ei@google.* sei@google.* gws_rd@google.*
61
+ gs_gbg@google.* gs_mss@google.* gs_rn@google.* _hsenc _hsmi
62
+ __hssc __hstc hsCtaTracking source@sourceforge.net
63
+ position@sourceforge.net tt_medium tt_content lr@yandex.*
64
+ redircnt@yandex.* feature@youtube.com kw@youtube.com wt_zmc
65
+ source@google.* iflsig@google.* sclient@google.*
66
+ sfnsn@facebook.com extid@facebook.com d@facebook.com
67
+ vh@facebook.com hc_*@facebook.com *ref*@facebook.com
68
+ __tn__@facebook.com eid@facebook.com __xts__@facebook.com
69
+ so_medium@stackoverflow.com so_source@stackoverflow.com
70
+ preview_pb@tiktok.com _d@tiktok.com share_item_id@tiktok.com
71
+ timestamp@tiktok.com user_id@tiktok.com tt_from@tiktok.com
72
+ source@tiktok.com].uniq.freeze
73
+
74
+ # Clean the given URL. If the URL can't be parsed, returns the
75
+ # URL unmodified.
76
+ #
77
+ # Caches in case there're duplicates.
78
+ #
79
+ # @param [String]
80
+ # @return [String]
81
+ def clean(url)
82
+ @cleaned_urls ||= {}
83
+ @cleaned_urls[url] ||= begin
84
+ uri = URI(url)
85
+
86
+ if uri.query
87
+ hostname = uri.hostname.sub(/\Awww\./, '')
88
+ params = URI.decode_www_form(uri.query).to_h
89
+
90
+ # Remove params by name first
91
+ params.reject! do |param, _|
92
+ TRACKING_PARAMS.include? param
93
+ end
94
+
95
+ # Remove params with globs
96
+ params.reject! do |param, _|
97
+ simple_tracking_params.any? do |pattern_param|
98
+ File.fnmatch(pattern_param, param)
99
+ end
100
+ end
101
+
102
+ # Remove params matching by hostname and then param
103
+ params.reject! do |param, _|
104
+ complex_tracking_params.any? do |pattern_hostname, pattern_params|
105
+ next false unless File.fnmatch(pattern_hostname, hostname)
106
+
107
+ pattern_params.any? do |pattern_param|
108
+ File.fnmatch(pattern_param, param)
109
+ end
110
+ end
111
+ end
112
+
113
+ uri.query = URI.encode_www_form(params)
114
+ end
115
+
116
+ uri.to_s
117
+ end
118
+ rescue URI::Error
119
+ @cleaned_urls[url] ||= url
120
+ end
121
+
122
+ private
123
+
124
+ # This is all so we can just copy and paste from Neat URL source
125
+ # code, it produces a hash of hostname => [ params ] that can be
126
+ # glob-matched.
127
+ #
128
+ # @return [Hash]
129
+ def complex_tracking_params
130
+ @complex_tracking_params ||= TRACKING_PARAMS.map do |param|
131
+ next unless param.include? '@'
132
+
133
+ Hash[*param.split('@', 2).reverse]
134
+ end.compact.reduce({}) do |hash, pairs|
135
+ pairs.each do |key, value|
136
+ (hash[key] ||= []) << value
137
+ end
138
+
139
+ hash
140
+ end
141
+ end
142
+
143
+ def simple_tracking_params
144
+ @simple_tracking_params ||= TRACKING_PARAMS.select do |param|
145
+ !param.include?('@')
146
+ end
147
+ end
148
+ end
149
+ end
metadata ADDED
@@ -0,0 +1,58 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: url-privacy
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - f
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2021-01-31 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description:
14
+ email:
15
+ - f@sutty.nl
16
+ executables: []
17
+ extensions: []
18
+ extra_rdoc_files:
19
+ - README.md
20
+ files:
21
+ - README.md
22
+ - lib/url_privacy.rb
23
+ homepage: https://0xacab.org/sutty/url-privacy
24
+ licenses:
25
+ - LGPL-3.0
26
+ metadata:
27
+ bug_tracker_uri: https://0xacab.org/sutty/url-privacy/issues
28
+ homepage_uri: https://0xacab.org/sutty/url-privacy
29
+ source_code_uri: https://0xacab.org/sutty/url-privacy
30
+ changelog_uri: https://0xacab.org/sutty/url-privacy/-/blob/master/CHANGELOG.md
31
+ documentation_uri: https://rubydoc.info/gems/url-privacy
32
+ post_install_message:
33
+ rdoc_options:
34
+ - "--title"
35
+ - url-privacy - Removes tracking parameters from URLs
36
+ - "--main"
37
+ - README.md
38
+ - "--line-numbers"
39
+ - "--inline-source"
40
+ - "--quiet"
41
+ require_paths:
42
+ - lib
43
+ required_ruby_version: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: 2.6.0
48
+ required_rubygems_version: !ruby/object:Gem::Requirement
49
+ requirements:
50
+ - - ">="
51
+ - !ruby/object:Gem::Version
52
+ version: '0'
53
+ requirements: []
54
+ rubygems_version: 3.1.2
55
+ signing_key:
56
+ specification_version: 4
57
+ summary: Removes tracking parameters from URLs
58
+ test_files: []