udup 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (4) hide show
  1. checksums.yaml +7 -0
  2. data/lib/udup/main.rb +74 -0
  3. data/lib/udup.rb +3 -0
  4. metadata +45 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 89618f0c392c4453c2eb610564b86b4e9cd34c590ccacd9513293f8ed2f3b95b
4
+ data.tar.gz: 9628469e7b5ee5932144129d47570c4242d050cc2d5fee85ce285978735570e2
5
+ SHA512:
6
+ metadata.gz: c41ee00088458438c81b7109bfbee966fe1c17c872d109d3295fdcb29bf3564522b808eed87bf68dfe596487cf6ebef6ec6ec6f204189bd3a9783ebf4c98054a
7
+ data.tar.gz: be12f5d9d105e6287884edf006ec5d8440e55bb65d77acc2fcb118d894517999e83039c8e3e2974e52ec0a00a53d9e924b717a08d6e018270f34adf93c4038ab
data/lib/udup/main.rb ADDED
@@ -0,0 +1,74 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'uri'
4
+ require 'set'
5
+
6
+ class Udup
7
+ def initialize(options = {})
8
+ @valid_urls = {}
9
+ @skip_exts = options[:skip_exts] || %w[.css .png .jpg .jpeg .svg .ico .webp .ttf .otf .woff .woff2 .gif .pdf .bmp
10
+ .eot .mp3 .mp4 .avi]
11
+ @content_to_skip = options[:content_to_skip] || %w[blog docs post support]
12
+ end
13
+
14
+ def filter(urls)
15
+ final_urls = Set[]
16
+
17
+ urls.each do |url|
18
+ begin
19
+ uri = URI.parse(url)
20
+ rescue URI::InvalidURIError
21
+ next
22
+ end
23
+ next unless uri
24
+
25
+ uri_ext = File.extname(uri.path)
26
+ next if @skip_exts.include?(uri_ext) || human_content?(uri.path) || content_to_skip?(uri.path)
27
+
28
+ base_url = without_query(uri)
29
+ params = uri_params(uri.query)
30
+
31
+ if @valid_urls.key?(base_url)
32
+ @valid_urls[url] = { params: {} } if @valid_urls[base_url][:params].empty?
33
+ @valid_urls[base_url][:params].merge!(params)
34
+ else
35
+ @valid_urls[base_url] = { params: params }
36
+ end
37
+ end
38
+
39
+ @valid_urls.each do |url, data|
40
+ final_url = url
41
+ final_url += "?#{URI.encode_www_form(data[:params])}" unless data[:params].empty?
42
+ final_urls << final_url
43
+ end
44
+
45
+ final_urls.to_a
46
+ end
47
+
48
+ private
49
+
50
+ def without_query(uri)
51
+ uri.to_s.split('?', 2).first.to_s
52
+ end
53
+
54
+ def uri_params(query)
55
+ params = {}
56
+ query&.split('&')&.each do |param|
57
+ splitted = param.split('=')
58
+ params[splitted[0]] = splitted[1]
59
+ end
60
+
61
+ params
62
+ end
63
+
64
+ def human_content?(path)
65
+ human_content = false
66
+ path&.split('/')&.each { |part| human_content = true if part.count('-') >= 3 }
67
+
68
+ human_content
69
+ end
70
+
71
+ def content_to_skip?(path)
72
+ @content_to_skip.any? { |content| path.include?(content) }
73
+ end
74
+ end
data/lib/udup.rb ADDED
@@ -0,0 +1,3 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'udup/main'
metadata ADDED
@@ -0,0 +1,45 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: udup
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Joshua MARTINELLE
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2023-01-17 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description:
14
+ email:
15
+ - contact@jomar.fr
16
+ executables: []
17
+ extensions: []
18
+ extra_rdoc_files: []
19
+ files:
20
+ - lib/udup.rb
21
+ - lib/udup/main.rb
22
+ homepage: https://rubygems.org/gems/udup
23
+ licenses:
24
+ - MIT
25
+ metadata: {}
26
+ post_install_message:
27
+ rdoc_options: []
28
+ require_paths:
29
+ - lib
30
+ required_ruby_version: !ruby/object:Gem::Requirement
31
+ requirements:
32
+ - - ">="
33
+ - !ruby/object:Gem::Version
34
+ version: 2.7.1
35
+ required_rubygems_version: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - ">="
38
+ - !ruby/object:Gem::Version
39
+ version: '0'
40
+ requirements: []
41
+ rubygems_version: 3.1.2
42
+ signing_key:
43
+ specification_version: 4
44
+ summary: URL Deduplication
45
+ test_files: []