url-privacy 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/README.md +47 -0
- data/lib/url_privacy.rb +149 -0
- metadata +58 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 13006f4f0e38cbc252c8d2f0dd4e6b2e48c278001c46e4292ecb1af961362c03
|
4
|
+
data.tar.gz: 448e7c4b11e2fb4c1c3cbfa58b59613715ffc27facc69106a4fc860560419278
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 9c27045c00b8b6142c8d2514b944eb07a3c2c818955467ddd4a6d75d8ad5ff72ba2bd85e767c1f5c5cd4ebdda0f5313b348d48222e065c7ddc8fae1000f7de1a
|
7
|
+
data.tar.gz: c466b66f6b0aa0b51d59f214646f45acd6b6a28a651980792df69889a1ee08382ec702e68aa934c13b02bf4ee0c02e7b9ca1e1f4583e26869e0046b47b22647f
|
data/README.md
ADDED
@@ -0,0 +1,47 @@
|
|
1
|
+
# url-privacy
|
2
|
+
|
3
|
+
Removes tracking parameters from URLs.
|
4
|
+
|
5
|
+
## Installation
|
6
|
+
|
7
|
+
Add this line to your Gemfile:
|
8
|
+
|
9
|
+
```ruby
|
10
|
+
gem 'url-privacy'
|
11
|
+
```
|
12
|
+
|
13
|
+
And then execute:
|
14
|
+
|
15
|
+
$ bundle
|
16
|
+
|
17
|
+
Or install it as:
|
18
|
+
|
19
|
+
$ gem install url-privacy
|
20
|
+
|
21
|
+
## Usage
|
22
|
+
|
23
|
+
```yaml
|
24
|
+
UrlPrivacy.clean url
|
25
|
+
```
|
26
|
+
|
27
|
+
## Contributing
|
28
|
+
|
29
|
+
Bug reports and pull requests are welcome on 0xacab.org at
|
30
|
+
<https://0xacab.org/sutty/url-privacy>. This project is intended to be
|
31
|
+
a safe, welcoming space for collaboration, and contributors are expected
|
32
|
+
to adhere to the [Sutty code of
|
33
|
+
conduct](https://sutty.nl/en/code-of-conduct/).
|
34
|
+
|
35
|
+
If you like our plugins, [please consider
|
36
|
+
donating](https://donaciones.sutty.nl/en/)!
|
37
|
+
|
38
|
+
## License
|
39
|
+
|
40
|
+
The gem is available as free software under the terms of the LGPL3
|
41
|
+
License.
|
42
|
+
|
43
|
+
## Code of Conduct
|
44
|
+
|
45
|
+
Everyone interacting in the url-privacy project’s codebases, issue
|
46
|
+
trackers, chat rooms and mailing lists is expected to follow the [code
|
47
|
+
of conduct](https://sutty.nl/en/code-of-conduct/).
|
data/lib/url_privacy.rb
ADDED
@@ -0,0 +1,149 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# Usage:
|
4
|
+
#
|
5
|
+
# UrlPrivacy.clean(url)
|
6
|
+
class UrlPrivacy
|
7
|
+
class << self
|
8
|
+
|
9
|
+
# Remove these params from URLs. Taken from Neat URL and
|
10
|
+
# CleanURLs plus some others manually found.
|
11
|
+
#
|
12
|
+
# @see {https://github.com/Smile4ever/Neat-URL}
|
13
|
+
# @see {https://gitlab.com/anti-tracking/ClearURLs/rules/-/blob/master/data.json}
|
14
|
+
# @see {https://github.com/Smile4ever/Neat-URL/issues/235}
|
15
|
+
TRACKING_PARAMS = %w[pf_rd_*@imdb.com ref_@imdb.com gclid ref
|
16
|
+
terminal_id igshid tracking_id action_object_map action_type_map
|
17
|
+
action_ref_map spm@*.aliexpress.com scm@*.aliexpress.com
|
18
|
+
aff_platform aff_trace_key algo_expid@*.aliexpress.*
|
19
|
+
algo_pvid@*.aliexpress.* btsid ws_ab_test pd_rd_*@amazon.*
|
20
|
+
_encoding@amazon.* psc@amazon.* tag@amazon.* ref_@amazon.*
|
21
|
+
pf_rd_*@amazon.* pf@amazon.* qid@amazon.* sr@amazon.*
|
22
|
+
srs@amazon.* __mk_*@amazon.* spIA@amazon.* ms3_c@amazon.*
|
23
|
+
ie*@amazon.* refRID@amazon.* colid@amazon.* coliid@amazon.*
|
24
|
+
*adId@amazon.* qualifier@amazon.* _encoding@amazon.*
|
25
|
+
smid@amazon.* field-lbr_brands_browse-bin@amazon.* ved@google.*
|
26
|
+
bi*@google.* gfe_*@google.* ei@google.* source@google.*
|
27
|
+
gs_*@google.* site@google.* oq@google.* esrc@google.*
|
28
|
+
uact@google.* cd@google.* cad@google.* gws_*@google.*
|
29
|
+
atyp@google.* vet@google.* zx@google.* _u@google.* je@google.*
|
30
|
+
dcr@google.* ie@google.* sei@google.* sa@google.* dpr@google.*
|
31
|
+
hl@google.* btn*@google.* sa@google.* usg@google.* cd@google.*
|
32
|
+
cad@google.* uact@google.* src@shutterstock.com
|
33
|
+
snr@steampowered.com wbdcd@tchibo.de smid@nytimes.com
|
34
|
+
spm@youku.com tpa@youku.com xid@prvnizpravy.cz pl@net-parade.it
|
35
|
+
u1@walmart.com* ath*@walmart.com* utm_* ga_source ga_medium
|
36
|
+
ga_term ga_content ga_campaign ga_place yclid _openstat
|
37
|
+
fb_action_ids fb_action_types fb_source fb_ref fbclid
|
38
|
+
action_object_map action_type_map action_ref_map gs_l mkt_tok
|
39
|
+
hmb_campaign hmb_medium hmb_source ref ref_ ref_*@twitter.com
|
40
|
+
src@twitter.com trackId@netflix.* tctx@netflix.* jb*@netflix.*
|
41
|
+
ncid@techcrunch.com sr@techcrunch.com sr_share@techcrunch.com
|
42
|
+
guccounter@techcrunch.com guce_referrer_*@techcrunch.com
|
43
|
+
cvid@bing.com form@bing.com sk@bing.com sp@bing.com sc@bing.com
|
44
|
+
qs@bing.com qp@bing.com nb@tweakers.net u@tweakers.net
|
45
|
+
tt_medium@twitch.* tt_content@twitch.* pk_campaign@vivaldi.com
|
46
|
+
pk_kwd@vivaldi.com from@indeed.com alid@indeed.com
|
47
|
+
*tk@indeed.com vss@hh.ru t@hh.ru swnt@hh.ru grpos@hh.ru
|
48
|
+
ptl@hh.ru stl@hh.ru exp@hh.ru plim@hh.ru _trkparms@ebay.*
|
49
|
+
_trksid@ebay.* _from@ebay.* ftag@cnet.com callback@bilibili.com
|
50
|
+
cvid@bing.com form@bing.com sk@bing.com sp@bing.com sc@bing.com
|
51
|
+
qs@bing.com pq@bing.com mkt_tok trk trkCampaign ga_* gclid
|
52
|
+
gclsrc hmb_campaign hmb_medium hmb_source spReportId spJobID
|
53
|
+
spUserID spMailingID itm_* s_cid elqTrackId elqTrack assetType
|
54
|
+
assetId recipientId campaignId siteId mc_cid mc_eid pk_*
|
55
|
+
sc_campaign sc_channel sc_content sc_medium sc_outcome sc_geo
|
56
|
+
sc_country utm_* nr_email_referer vero_conv vero_id yclid
|
57
|
+
_openstat mbid cmpid cid c_id campaign_id Campaign hash@ebay.*
|
58
|
+
fb_action_ids fb_action_types fb_ref fb_source fbclid
|
59
|
+
refsrc@facebook.com hrc@facebook.com gs_l gs_lcp@google.*
|
60
|
+
ved@google.* ei@google.* sei@google.* gws_rd@google.*
|
61
|
+
gs_gbg@google.* gs_mss@google.* gs_rn@google.* _hsenc _hsmi
|
62
|
+
__hssc __hstc hsCtaTracking source@sourceforge.net
|
63
|
+
position@sourceforge.net tt_medium tt_content lr@yandex.*
|
64
|
+
redircnt@yandex.* feature@youtube.com kw@youtube.com wt_zmc
|
65
|
+
source@google.* iflsig@google.* sclient@google.*
|
66
|
+
sfnsn@facebook.com extid@facebook.com d@facebook.com
|
67
|
+
vh@facebook.com hc_*@facebook.com *ref*@facebook.com
|
68
|
+
__tn__@facebook.com eid@facebook.com __xts__@facebook.com
|
69
|
+
so_medium@stackoverflow.com so_source@stackoverflow.com
|
70
|
+
preview_pb@tiktok.com _d@tiktok.com share_item_id@tiktok.com
|
71
|
+
timestamp@tiktok.com user_id@tiktok.com tt_from@tiktok.com
|
72
|
+
source@tiktok.com].uniq.freeze
|
73
|
+
|
74
|
+
# Clean the given URL. If the URL can't be parsed, returns the
|
75
|
+
# URL unmodified.
|
76
|
+
#
|
77
|
+
# Caches in case there're duplicates.
|
78
|
+
#
|
79
|
+
# @param [String]
|
80
|
+
# @return [String]
|
81
|
+
def clean(url)
|
82
|
+
@cleaned_urls ||= {}
|
83
|
+
@cleaned_urls[url] ||= begin
|
84
|
+
uri = URI(url)
|
85
|
+
|
86
|
+
if uri.query
|
87
|
+
hostname = uri.hostname.sub(/\Awww\./, '')
|
88
|
+
params = URI.decode_www_form(uri.query).to_h
|
89
|
+
|
90
|
+
# Remove params by name first
|
91
|
+
params.reject! do |param, _|
|
92
|
+
TRACKING_PARAMS.include? param
|
93
|
+
end
|
94
|
+
|
95
|
+
# Remove params with globs
|
96
|
+
params.reject! do |param, _|
|
97
|
+
simple_tracking_params.any? do |pattern_param|
|
98
|
+
File.fnmatch(pattern_param, param)
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
# Remove params matching by hostname and then param
|
103
|
+
params.reject! do |param, _|
|
104
|
+
complex_tracking_params.any? do |pattern_hostname, pattern_params|
|
105
|
+
next false unless File.fnmatch(pattern_hostname, hostname)
|
106
|
+
|
107
|
+
pattern_params.any? do |pattern_param|
|
108
|
+
File.fnmatch(pattern_param, param)
|
109
|
+
end
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
uri.query = URI.encode_www_form(params)
|
114
|
+
end
|
115
|
+
|
116
|
+
uri.to_s
|
117
|
+
end
|
118
|
+
rescue URI::Error
|
119
|
+
@cleaned_urls[url] ||= url
|
120
|
+
end
|
121
|
+
|
122
|
+
private
|
123
|
+
|
124
|
+
# This is all so we can just copy and paste from Neat URL source
|
125
|
+
# code, it produces a hash of hostname => [ params ] that can be
|
126
|
+
# glob-matched.
|
127
|
+
#
|
128
|
+
# @return [Hash]
|
129
|
+
def complex_tracking_params
|
130
|
+
@complex_tracking_params ||= TRACKING_PARAMS.map do |param|
|
131
|
+
next unless param.include? '@'
|
132
|
+
|
133
|
+
Hash[*param.split('@', 2).reverse]
|
134
|
+
end.compact.reduce({}) do |hash, pairs|
|
135
|
+
pairs.each do |key, value|
|
136
|
+
(hash[key] ||= []) << value
|
137
|
+
end
|
138
|
+
|
139
|
+
hash
|
140
|
+
end
|
141
|
+
end
|
142
|
+
|
143
|
+
def simple_tracking_params
|
144
|
+
@simple_tracking_params ||= TRACKING_PARAMS.select do |param|
|
145
|
+
!param.include?('@')
|
146
|
+
end
|
147
|
+
end
|
148
|
+
end
|
149
|
+
end
|
metadata
ADDED
@@ -0,0 +1,58 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: url-privacy
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- f
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2021-01-31 00:00:00.000000000 Z
|
12
|
+
dependencies: []
|
13
|
+
description:
|
14
|
+
email:
|
15
|
+
- f@sutty.nl
|
16
|
+
executables: []
|
17
|
+
extensions: []
|
18
|
+
extra_rdoc_files:
|
19
|
+
- README.md
|
20
|
+
files:
|
21
|
+
- README.md
|
22
|
+
- lib/url_privacy.rb
|
23
|
+
homepage: https://0xacab.org/sutty/url-privacy
|
24
|
+
licenses:
|
25
|
+
- LGPL-3.0
|
26
|
+
metadata:
|
27
|
+
bug_tracker_uri: https://0xacab.org/sutty/url-privacy/issues
|
28
|
+
homepage_uri: https://0xacab.org/sutty/url-privacy
|
29
|
+
source_code_uri: https://0xacab.org/sutty/url-privacy
|
30
|
+
changelog_uri: https://0xacab.org/sutty/url-privacy/-/blob/master/CHANGELOG.md
|
31
|
+
documentation_uri: https://rubydoc.info/gems/url-privacy
|
32
|
+
post_install_message:
|
33
|
+
rdoc_options:
|
34
|
+
- "--title"
|
35
|
+
- url-privacy - Removes tracking parameters from URLs
|
36
|
+
- "--main"
|
37
|
+
- README.md
|
38
|
+
- "--line-numbers"
|
39
|
+
- "--inline-source"
|
40
|
+
- "--quiet"
|
41
|
+
require_paths:
|
42
|
+
- lib
|
43
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: 2.6.0
|
48
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
49
|
+
requirements:
|
50
|
+
- - ">="
|
51
|
+
- !ruby/object:Gem::Version
|
52
|
+
version: '0'
|
53
|
+
requirements: []
|
54
|
+
rubygems_version: 3.1.2
|
55
|
+
signing_key:
|
56
|
+
specification_version: 4
|
57
|
+
summary: Removes tracking parameters from URLs
|
58
|
+
test_files: []
|