udup 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (4) hide show
  1. checksums.yaml +7 -0
  2. data/lib/udup/main.rb +74 -0
  3. data/lib/udup.rb +3 -0
  4. metadata +45 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 89618f0c392c4453c2eb610564b86b4e9cd34c590ccacd9513293f8ed2f3b95b
4
+ data.tar.gz: 9628469e7b5ee5932144129d47570c4242d050cc2d5fee85ce285978735570e2
5
+ SHA512:
6
+ metadata.gz: c41ee00088458438c81b7109bfbee966fe1c17c872d109d3295fdcb29bf3564522b808eed87bf68dfe596487cf6ebef6ec6ec6f204189bd3a9783ebf4c98054a
7
+ data.tar.gz: be12f5d9d105e6287884edf006ec5d8440e55bb65d77acc2fcb118d894517999e83039c8e3e2974e52ec0a00a53d9e924b717a08d6e018270f34adf93c4038ab
data/lib/udup/main.rb ADDED
@@ -0,0 +1,74 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'uri'
4
+ require 'set'
5
+
6
+ class Udup
7
+ def initialize(options = {})
8
+ @valid_urls = {}
9
+ @skip_exts = options[:skip_exts] || %w[.css .png .jpg .jpeg .svg .ico .webp .ttf .otf .woff .woff2 .gif .pdf .bmp
10
+ .eot .mp3 .mp4 .avi]
11
+ @content_to_skip = options[:content_to_skip] || %w[blog docs post support]
12
+ end
13
+
14
+ def filter(urls)
15
+ final_urls = Set[]
16
+
17
+ urls.each do |url|
18
+ begin
19
+ uri = URI.parse(url)
20
+ rescue URI::InvalidURIError
21
+ next
22
+ end
23
+ next unless uri
24
+
25
+ uri_ext = File.extname(uri.path)
26
+ next if @skip_exts.include?(uri_ext) || human_content?(uri.path) || content_to_skip?(uri.path)
27
+
28
+ base_url = without_query(uri)
29
+ params = uri_params(uri.query)
30
+
31
+ if @valid_urls.key?(base_url)
32
+ @valid_urls[url] = { params: {} } if @valid_urls[base_url][:params].empty?
33
+ @valid_urls[base_url][:params].merge!(params)
34
+ else
35
+ @valid_urls[base_url] = { params: params }
36
+ end
37
+ end
38
+
39
+ @valid_urls.each do |url, data|
40
+ final_url = url
41
+ final_url += "?#{URI.encode_www_form(data[:params])}" unless data[:params].empty?
42
+ final_urls << final_url
43
+ end
44
+
45
+ final_urls.to_a
46
+ end
47
+
48
+ private
49
+
50
+ def without_query(uri)
51
+ uri.to_s.split('?', 2).first.to_s
52
+ end
53
+
54
+ def uri_params(query)
55
+ params = {}
56
+ query&.split('&')&.each do |param|
57
+ splitted = param.split('=')
58
+ params[splitted[0]] = splitted[1]
59
+ end
60
+
61
+ params
62
+ end
63
+
64
+ def human_content?(path)
65
+ human_content = false
66
+ path&.split('/')&.each { |part| human_content = true if part.count('-') >= 3 }
67
+
68
+ human_content
69
+ end
70
+
71
+ def content_to_skip?(path)
72
+ @content_to_skip.any? { |content| path.include?(content) }
73
+ end
74
+ end
data/lib/udup.rb ADDED
@@ -0,0 +1,3 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'udup/main'
metadata ADDED
@@ -0,0 +1,45 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: udup
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Joshua MARTINELLE
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2023-01-17 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description:
14
+ email:
15
+ - contact@jomar.fr
16
+ executables: []
17
+ extensions: []
18
+ extra_rdoc_files: []
19
+ files:
20
+ - lib/udup.rb
21
+ - lib/udup/main.rb
22
+ homepage: https://rubygems.org/gems/udup
23
+ licenses:
24
+ - MIT
25
+ metadata: {}
26
+ post_install_message:
27
+ rdoc_options: []
28
+ require_paths:
29
+ - lib
30
+ required_ruby_version: !ruby/object:Gem::Requirement
31
+ requirements:
32
+ - - ">="
33
+ - !ruby/object:Gem::Version
34
+ version: 2.7.1
35
+ required_rubygems_version: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - ">="
38
+ - !ruby/object:Gem::Version
39
+ version: '0'
40
+ requirements: []
41
+ rubygems_version: 3.1.2
42
+ signing_key:
43
+ specification_version: 4
44
+ summary: URL Deduplication
45
+ test_files: []