dry-file 0.1.3 → 0.1.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +16 -3
- data/lib/dry/file/version.rb +1 -1
- data/lib/dry-file.rb +8 -11
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8f0c30f41095a9db669ec7d0c0c2e37b8240ea30516aa1fed27704bdd94bceb4
|
4
|
+
data.tar.gz: 4b47bac06e808deaa5a60bd7e972d8cb46b71d000f186205b3a687dfff4eaedf
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a60d28399e5dbeca9f75b7adaa6ad61b9f33cd65094fc61d25ae16e24dcf2adcbd6935de0331ad75d3044469b25fb956d75aef51d5486639f0e2ef5831e5c9ad
|
7
|
+
data.tar.gz: 1d2f01e718768cc1b73a833a5dabd34525abe7f2bba666c28da3072d2cb53af8f2ae727402011daea92dad6ed0936695e3fcde66fb1a608fe35633b4dce350db
|
data/README.md
CHANGED
@@ -1,9 +1,22 @@
|
|
1
1
|
Purpose:
|
2
2
|
|
3
|
-
Remove duplicate URLs from a file, where the URLs only differ
|
3
|
+
Remove duplicate URLs from a file, where the URLs only differ by N characters
|
4
4
|
|
5
5
|
Usage:
|
6
6
|
|
7
|
-
|
7
|
+
# With pipe
|
8
|
+
cat <file> | dry <N>
|
8
9
|
|
9
|
-
|
10
|
+
# With file
|
11
|
+
dry <N> <file> [options]
|
12
|
+
|
13
|
+
Example:
|
14
|
+
|
15
|
+
# Read from a pipe and print to stdout
|
16
|
+
cat file | dry 50
|
17
|
+
|
18
|
+
# Which is the same as
|
19
|
+
dry 50 file
|
20
|
+
|
21
|
+
# Replace file in-place
|
22
|
+
dry 40 $domain/scan/gf/xss.txt -i
|
data/lib/dry/file/version.rb
CHANGED
data/lib/dry-file.rb
CHANGED
@@ -12,30 +12,27 @@ module Dry
|
|
12
12
|
# max_chars: minimum number of different characters to keep a line.
|
13
13
|
# file: input file.
|
14
14
|
# options: '-i' replace file in-place.
|
15
|
-
def run(max_chars, file, *options)
|
15
|
+
def run(max_chars, file=nil, *options)
|
16
16
|
max_chars = max_chars.to_i
|
17
17
|
previous_line = ''
|
18
|
-
|
19
|
-
|
20
|
-
else
|
21
|
-
"dry-#{File.basename(file)}"
|
22
|
-
end
|
18
|
+
new_lines = ''
|
19
|
+
ARGV.clear
|
23
20
|
|
24
|
-
|
21
|
+
lines = file.nil? ? ARGF.read : File.open(file)
|
25
22
|
|
26
|
-
|
23
|
+
lines.each_line do |l|
|
27
24
|
# We compare the line size as well as the line similarity. We could have
|
28
25
|
# different treshold values for each of these comparisons (eg use
|
29
26
|
# +max_chars+ for the size comparison and +max_diff_chars+ for the
|
30
27
|
# similarity comparison, but i think using +max_chars+ for both
|
31
28
|
# comparisons is also OK, the results seem pretty good.
|
32
29
|
if (previous_line.size - l.size).abs > max_chars || diff_size(previous_line, l) > max_chars
|
33
|
-
|
30
|
+
new_lines << l
|
34
31
|
previous_line = l
|
35
32
|
end
|
36
33
|
end
|
37
|
-
|
38
|
-
|
34
|
+
|
35
|
+
options.include?('-i') ? File.write(file, new_lines) : puts(new_lines)
|
39
36
|
end
|
40
37
|
|
41
38
|
# Returns the number of differing characters between two lines.
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: dry-file
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Sergio Romero
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-
|
11
|
+
date: 2022-11-21 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: Removes almost identical lines based on a similarity parameter.
|
14
14
|
email:
|