dry-file 0.1.3 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +16 -3
- data/lib/dry/file/version.rb +1 -1
- data/lib/dry-file.rb +8 -11
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8f0c30f41095a9db669ec7d0c0c2e37b8240ea30516aa1fed27704bdd94bceb4
|
4
|
+
data.tar.gz: 4b47bac06e808deaa5a60bd7e972d8cb46b71d000f186205b3a687dfff4eaedf
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a60d28399e5dbeca9f75b7adaa6ad61b9f33cd65094fc61d25ae16e24dcf2adcbd6935de0331ad75d3044469b25fb956d75aef51d5486639f0e2ef5831e5c9ad
|
7
|
+
data.tar.gz: 1d2f01e718768cc1b73a833a5dabd34525abe7f2bba666c28da3072d2cb53af8f2ae727402011daea92dad6ed0936695e3fcde66fb1a608fe35633b4dce350db
|
data/README.md
CHANGED
@@ -1,9 +1,22 @@
|
|
1
1
|
Purpose:
|
2
2
|
|
3
|
-
Remove duplicate URLs from a file, where the URLs only differ
|
3
|
+
Remove duplicate URLs from a file, where the URLs only differ by N characters
|
4
4
|
|
5
5
|
Usage:
|
6
6
|
|
7
|
-
|
7
|
+
# With pipe
|
8
|
+
cat <file> | dry <N>
|
8
9
|
|
9
|
-
|
10
|
+
# With file
|
11
|
+
dry <N> <file> [options]
|
12
|
+
|
13
|
+
Example:
|
14
|
+
|
15
|
+
# Read from a pipe and print to stdout
|
16
|
+
cat file | dry 50
|
17
|
+
|
18
|
+
# Which is the same as
|
19
|
+
dry 50 file
|
20
|
+
|
21
|
+
# Replace file in-place
|
22
|
+
dry 40 $domain/scan/gf/xss.txt -i
|
data/lib/dry/file/version.rb
CHANGED
data/lib/dry-file.rb
CHANGED
@@ -12,30 +12,27 @@ module Dry
|
|
12
12
|
# max_chars: minimum number of different characters to keep a line.
|
13
13
|
# file: input file.
|
14
14
|
# options: '-i' replace file in-place.
|
15
|
-
def run(max_chars, file, *options)
|
15
|
+
def run(max_chars, file=nil, *options)
|
16
16
|
max_chars = max_chars.to_i
|
17
17
|
previous_line = ''
|
18
|
-
|
19
|
-
|
20
|
-
else
|
21
|
-
"dry-#{File.basename(file)}"
|
22
|
-
end
|
18
|
+
new_lines = ''
|
19
|
+
ARGV.clear
|
23
20
|
|
24
|
-
|
21
|
+
lines = file.nil? ? ARGF.read : File.open(file)
|
25
22
|
|
26
|
-
|
23
|
+
lines.each_line do |l|
|
27
24
|
# We compare the line size as well as the line similarity. We could have
|
28
25
|
# different treshold values for each of these comparisons (eg use
|
29
26
|
# +max_chars+ for the size comparison and +max_diff_chars+ for the
|
30
27
|
# similarity comparison, but i think using +max_chars+ for both
|
31
28
|
# comparisons is also OK, the results seem pretty good.
|
32
29
|
if (previous_line.size - l.size).abs > max_chars || diff_size(previous_line, l) > max_chars
|
33
|
-
|
30
|
+
new_lines << l
|
34
31
|
previous_line = l
|
35
32
|
end
|
36
33
|
end
|
37
|
-
|
38
|
-
|
34
|
+
|
35
|
+
options.include?('-i') ? File.write(file, new_lines) : puts(new_lines)
|
39
36
|
end
|
40
37
|
|
41
38
|
# Returns the number of differing characters between two lines.
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: dry-file
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Sergio Romero
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-
|
11
|
+
date: 2022-11-21 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: Removes almost identical lines based on a similarity parameter.
|
14
14
|
email:
|