url-privacy 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/README.md +47 -0
- data/lib/url_privacy.rb +149 -0
- metadata +58 -0
checksums.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
---
|
|
2
|
+
SHA256:
|
|
3
|
+
metadata.gz: 13006f4f0e38cbc252c8d2f0dd4e6b2e48c278001c46e4292ecb1af961362c03
|
|
4
|
+
data.tar.gz: 448e7c4b11e2fb4c1c3cbfa58b59613715ffc27facc69106a4fc860560419278
|
|
5
|
+
SHA512:
|
|
6
|
+
metadata.gz: 9c27045c00b8b6142c8d2514b944eb07a3c2c818955467ddd4a6d75d8ad5ff72ba2bd85e767c1f5c5cd4ebdda0f5313b348d48222e065c7ddc8fae1000f7de1a
|
|
7
|
+
data.tar.gz: c466b66f6b0aa0b51d59f214646f45acd6b6a28a651980792df69889a1ee08382ec702e68aa934c13b02bf4ee0c02e7b9ca1e1f4583e26869e0046b47b22647f
|
data/README.md
ADDED
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
# url-privacy
|
|
2
|
+
|
|
3
|
+
Removes tracking parameters from URLs.
|
|
4
|
+
|
|
5
|
+
## Installation
|
|
6
|
+
|
|
7
|
+
Add this line to your Gemfile:
|
|
8
|
+
|
|
9
|
+
```ruby
|
|
10
|
+
gem 'url-privacy'
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
And then execute:
|
|
14
|
+
|
|
15
|
+
$ bundle
|
|
16
|
+
|
|
17
|
+
Or install it as:
|
|
18
|
+
|
|
19
|
+
$ gem install url-privacy
|
|
20
|
+
|
|
21
|
+
## Usage
|
|
22
|
+
|
|
23
|
+
```yaml
|
|
24
|
+
UrlPrivacy.clean url
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
## Contributing
|
|
28
|
+
|
|
29
|
+
Bug reports and pull requests are welcome on 0xacab.org at
|
|
30
|
+
<https://0xacab.org/sutty/url-privacy>. This project is intended to be
|
|
31
|
+
a safe, welcoming space for collaboration, and contributors are expected
|
|
32
|
+
to adhere to the [Sutty code of
|
|
33
|
+
conduct](https://sutty.nl/en/code-of-conduct/).
|
|
34
|
+
|
|
35
|
+
If you like our plugins, [please consider
|
|
36
|
+
donating](https://donaciones.sutty.nl/en/)!
|
|
37
|
+
|
|
38
|
+
## License
|
|
39
|
+
|
|
40
|
+
The gem is available as free software under the terms of the LGPL3
|
|
41
|
+
License.
|
|
42
|
+
|
|
43
|
+
## Code of Conduct
|
|
44
|
+
|
|
45
|
+
Everyone interacting in the url-privacy project’s codebases, issue
|
|
46
|
+
trackers, chat rooms and mailing lists is expected to follow the [code
|
|
47
|
+
of conduct](https://sutty.nl/en/code-of-conduct/).
|
data/lib/url_privacy.rb
ADDED
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Usage:
|
|
4
|
+
#
|
|
5
|
+
# UrlPrivacy.clean(url)
|
|
6
|
+
class UrlPrivacy
|
|
7
|
+
class << self
|
|
8
|
+
|
|
9
|
+
# Remove these params from URLs. Taken from Neat URL and
|
|
10
|
+
# CleanURLs plus some others manually found.
|
|
11
|
+
#
|
|
12
|
+
# @see {https://github.com/Smile4ever/Neat-URL}
|
|
13
|
+
# @see {https://gitlab.com/anti-tracking/ClearURLs/rules/-/blob/master/data.json}
|
|
14
|
+
# @see {https://github.com/Smile4ever/Neat-URL/issues/235}
|
|
15
|
+
TRACKING_PARAMS = %w[pf_rd_*@imdb.com ref_@imdb.com gclid ref
|
|
16
|
+
terminal_id igshid tracking_id action_object_map action_type_map
|
|
17
|
+
action_ref_map spm@*.aliexpress.com scm@*.aliexpress.com
|
|
18
|
+
aff_platform aff_trace_key algo_expid@*.aliexpress.*
|
|
19
|
+
algo_pvid@*.aliexpress.* btsid ws_ab_test pd_rd_*@amazon.*
|
|
20
|
+
_encoding@amazon.* psc@amazon.* tag@amazon.* ref_@amazon.*
|
|
21
|
+
pf_rd_*@amazon.* pf@amazon.* qid@amazon.* sr@amazon.*
|
|
22
|
+
srs@amazon.* __mk_*@amazon.* spIA@amazon.* ms3_c@amazon.*
|
|
23
|
+
ie*@amazon.* refRID@amazon.* colid@amazon.* coliid@amazon.*
|
|
24
|
+
*adId@amazon.* qualifier@amazon.* _encoding@amazon.*
|
|
25
|
+
smid@amazon.* field-lbr_brands_browse-bin@amazon.* ved@google.*
|
|
26
|
+
bi*@google.* gfe_*@google.* ei@google.* source@google.*
|
|
27
|
+
gs_*@google.* site@google.* oq@google.* esrc@google.*
|
|
28
|
+
uact@google.* cd@google.* cad@google.* gws_*@google.*
|
|
29
|
+
atyp@google.* vet@google.* zx@google.* _u@google.* je@google.*
|
|
30
|
+
dcr@google.* ie@google.* sei@google.* sa@google.* dpr@google.*
|
|
31
|
+
hl@google.* btn*@google.* sa@google.* usg@google.* cd@google.*
|
|
32
|
+
cad@google.* uact@google.* src@shutterstock.com
|
|
33
|
+
snr@steampowered.com wbdcd@tchibo.de smid@nytimes.com
|
|
34
|
+
spm@youku.com tpa@youku.com xid@prvnizpravy.cz pl@net-parade.it
|
|
35
|
+
u1@walmart.com* ath*@walmart.com* utm_* ga_source ga_medium
|
|
36
|
+
ga_term ga_content ga_campaign ga_place yclid _openstat
|
|
37
|
+
fb_action_ids fb_action_types fb_source fb_ref fbclid
|
|
38
|
+
action_object_map action_type_map action_ref_map gs_l mkt_tok
|
|
39
|
+
hmb_campaign hmb_medium hmb_source ref ref_ ref_*@twitter.com
|
|
40
|
+
src@twitter.com trackId@netflix.* tctx@netflix.* jb*@netflix.*
|
|
41
|
+
ncid@techcrunch.com sr@techcrunch.com sr_share@techcrunch.com
|
|
42
|
+
guccounter@techcrunch.com guce_referrer_*@techcrunch.com
|
|
43
|
+
cvid@bing.com form@bing.com sk@bing.com sp@bing.com sc@bing.com
|
|
44
|
+
qs@bing.com qp@bing.com nb@tweakers.net u@tweakers.net
|
|
45
|
+
tt_medium@twitch.* tt_content@twitch.* pk_campaign@vivaldi.com
|
|
46
|
+
pk_kwd@vivaldi.com from@indeed.com alid@indeed.com
|
|
47
|
+
*tk@indeed.com vss@hh.ru t@hh.ru swnt@hh.ru grpos@hh.ru
|
|
48
|
+
ptl@hh.ru stl@hh.ru exp@hh.ru plim@hh.ru _trkparms@ebay.*
|
|
49
|
+
_trksid@ebay.* _from@ebay.* ftag@cnet.com callback@bilibili.com
|
|
50
|
+
cvid@bing.com form@bing.com sk@bing.com sp@bing.com sc@bing.com
|
|
51
|
+
qs@bing.com pq@bing.com mkt_tok trk trkCampaign ga_* gclid
|
|
52
|
+
gclsrc hmb_campaign hmb_medium hmb_source spReportId spJobID
|
|
53
|
+
spUserID spMailingID itm_* s_cid elqTrackId elqTrack assetType
|
|
54
|
+
assetId recipientId campaignId siteId mc_cid mc_eid pk_*
|
|
55
|
+
sc_campaign sc_channel sc_content sc_medium sc_outcome sc_geo
|
|
56
|
+
sc_country utm_* nr_email_referer vero_conv vero_id yclid
|
|
57
|
+
_openstat mbid cmpid cid c_id campaign_id Campaign hash@ebay.*
|
|
58
|
+
fb_action_ids fb_action_types fb_ref fb_source fbclid
|
|
59
|
+
refsrc@facebook.com hrc@facebook.com gs_l gs_lcp@google.*
|
|
60
|
+
ved@google.* ei@google.* sei@google.* gws_rd@google.*
|
|
61
|
+
gs_gbg@google.* gs_mss@google.* gs_rn@google.* _hsenc _hsmi
|
|
62
|
+
__hssc __hstc hsCtaTracking source@sourceforge.net
|
|
63
|
+
position@sourceforge.net tt_medium tt_content lr@yandex.*
|
|
64
|
+
redircnt@yandex.* feature@youtube.com kw@youtube.com wt_zmc
|
|
65
|
+
source@google.* iflsig@google.* sclient@google.*
|
|
66
|
+
sfnsn@facebook.com extid@facebook.com d@facebook.com
|
|
67
|
+
vh@facebook.com hc_*@facebook.com *ref*@facebook.com
|
|
68
|
+
__tn__@facebook.com eid@facebook.com __xts__@facebook.com
|
|
69
|
+
so_medium@stackoverflow.com so_source@stackoverflow.com
|
|
70
|
+
preview_pb@tiktok.com _d@tiktok.com share_item_id@tiktok.com
|
|
71
|
+
timestamp@tiktok.com user_id@tiktok.com tt_from@tiktok.com
|
|
72
|
+
source@tiktok.com].uniq.freeze
|
|
73
|
+
|
|
74
|
+
# Clean the given URL. If the URL can't be parsed, returns the
|
|
75
|
+
# URL unmodified.
|
|
76
|
+
#
|
|
77
|
+
# Caches in case there're duplicates.
|
|
78
|
+
#
|
|
79
|
+
# @param [String]
|
|
80
|
+
# @return [String]
|
|
81
|
+
def clean(url)
|
|
82
|
+
@cleaned_urls ||= {}
|
|
83
|
+
@cleaned_urls[url] ||= begin
|
|
84
|
+
uri = URI(url)
|
|
85
|
+
|
|
86
|
+
if uri.query
|
|
87
|
+
hostname = uri.hostname.sub(/\Awww\./, '')
|
|
88
|
+
params = URI.decode_www_form(uri.query).to_h
|
|
89
|
+
|
|
90
|
+
# Remove params by name first
|
|
91
|
+
params.reject! do |param, _|
|
|
92
|
+
TRACKING_PARAMS.include? param
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
# Remove params with globs
|
|
96
|
+
params.reject! do |param, _|
|
|
97
|
+
simple_tracking_params.any? do |pattern_param|
|
|
98
|
+
File.fnmatch(pattern_param, param)
|
|
99
|
+
end
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
# Remove params matching by hostname and then param
|
|
103
|
+
params.reject! do |param, _|
|
|
104
|
+
complex_tracking_params.any? do |pattern_hostname, pattern_params|
|
|
105
|
+
next false unless File.fnmatch(pattern_hostname, hostname)
|
|
106
|
+
|
|
107
|
+
pattern_params.any? do |pattern_param|
|
|
108
|
+
File.fnmatch(pattern_param, param)
|
|
109
|
+
end
|
|
110
|
+
end
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
uri.query = URI.encode_www_form(params)
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
uri.to_s
|
|
117
|
+
end
|
|
118
|
+
rescue URI::Error
|
|
119
|
+
@cleaned_urls[url] ||= url
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
private
|
|
123
|
+
|
|
124
|
+
# This is all so we can just copy and paste from Neat URL source
|
|
125
|
+
# code, it produces a hash of hostname => [ params ] that can be
|
|
126
|
+
# glob-matched.
|
|
127
|
+
#
|
|
128
|
+
# @return [Hash]
|
|
129
|
+
def complex_tracking_params
|
|
130
|
+
@complex_tracking_params ||= TRACKING_PARAMS.map do |param|
|
|
131
|
+
next unless param.include? '@'
|
|
132
|
+
|
|
133
|
+
Hash[*param.split('@', 2).reverse]
|
|
134
|
+
end.compact.reduce({}) do |hash, pairs|
|
|
135
|
+
pairs.each do |key, value|
|
|
136
|
+
(hash[key] ||= []) << value
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
hash
|
|
140
|
+
end
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
def simple_tracking_params
|
|
144
|
+
@simple_tracking_params ||= TRACKING_PARAMS.select do |param|
|
|
145
|
+
!param.include?('@')
|
|
146
|
+
end
|
|
147
|
+
end
|
|
148
|
+
end
|
|
149
|
+
end
|
metadata
ADDED
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
|
2
|
+
name: url-privacy
|
|
3
|
+
version: !ruby/object:Gem::Version
|
|
4
|
+
version: 0.1.0
|
|
5
|
+
platform: ruby
|
|
6
|
+
authors:
|
|
7
|
+
- f
|
|
8
|
+
autorequire:
|
|
9
|
+
bindir: bin
|
|
10
|
+
cert_chain: []
|
|
11
|
+
date: 2021-01-31 00:00:00.000000000 Z
|
|
12
|
+
dependencies: []
|
|
13
|
+
description:
|
|
14
|
+
email:
|
|
15
|
+
- f@sutty.nl
|
|
16
|
+
executables: []
|
|
17
|
+
extensions: []
|
|
18
|
+
extra_rdoc_files:
|
|
19
|
+
- README.md
|
|
20
|
+
files:
|
|
21
|
+
- README.md
|
|
22
|
+
- lib/url_privacy.rb
|
|
23
|
+
homepage: https://0xacab.org/sutty/url-privacy
|
|
24
|
+
licenses:
|
|
25
|
+
- LGPL-3.0
|
|
26
|
+
metadata:
|
|
27
|
+
bug_tracker_uri: https://0xacab.org/sutty/url-privacy/issues
|
|
28
|
+
homepage_uri: https://0xacab.org/sutty/url-privacy
|
|
29
|
+
source_code_uri: https://0xacab.org/sutty/url-privacy
|
|
30
|
+
changelog_uri: https://0xacab.org/sutty/url-privacy/-/blob/master/CHANGELOG.md
|
|
31
|
+
documentation_uri: https://rubydoc.info/gems/url-privacy
|
|
32
|
+
post_install_message:
|
|
33
|
+
rdoc_options:
|
|
34
|
+
- "--title"
|
|
35
|
+
- url-privacy - Removes tracking parameters from URLs
|
|
36
|
+
- "--main"
|
|
37
|
+
- README.md
|
|
38
|
+
- "--line-numbers"
|
|
39
|
+
- "--inline-source"
|
|
40
|
+
- "--quiet"
|
|
41
|
+
require_paths:
|
|
42
|
+
- lib
|
|
43
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
|
44
|
+
requirements:
|
|
45
|
+
- - ">="
|
|
46
|
+
- !ruby/object:Gem::Version
|
|
47
|
+
version: 2.6.0
|
|
48
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
49
|
+
requirements:
|
|
50
|
+
- - ">="
|
|
51
|
+
- !ruby/object:Gem::Version
|
|
52
|
+
version: '0'
|
|
53
|
+
requirements: []
|
|
54
|
+
rubygems_version: 3.1.2
|
|
55
|
+
signing_key:
|
|
56
|
+
specification_version: 4
|
|
57
|
+
summary: Removes tracking parameters from URLs
|
|
58
|
+
test_files: []
|