nauktis_utils 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +14 -0
- data/.rspec +2 -0
- data/.travis.yml +4 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +21 -0
- data/README.md +38 -0
- data/Rakefile +6 -0
- data/bin/console +14 -0
- data/bin/setup +7 -0
- data/exe/datify +19 -0
- data/exe/duplicates +20 -0
- data/exe/rm_duplicates +13 -0
- data/exe/rm_duplicates_in_if_in +14 -0
- data/exe/rm_if_in +20 -0
- data/lib/nauktis_utils.rb +24 -0
- data/lib/nauktis_utils/archiver.rb +153 -0
- data/lib/nauktis_utils/date_extractor.rb +53 -0
- data/lib/nauktis_utils/duplicate.rb +195 -0
- data/lib/nauktis_utils/file_browser.rb +94 -0
- data/lib/nauktis_utils/file_digester.rb +39 -0
- data/lib/nauktis_utils/logging.rb +18 -0
- data/lib/nauktis_utils/tracer.rb +27 -0
- data/lib/nauktis_utils/version.rb +3 -0
- data/nauktis_utils.gemspec +30 -0
- metadata +171 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 567131202960aa60f526851ab99a471f825d41ec
|
4
|
+
data.tar.gz: 955965ab288f72d00717b2c19f19064aa161aac8
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: cb75388ea8145c44f36ae874308414591d3c30f1b5d44562c12d15702a053fc0063c383c586f89497148efaa1353ec1f935d43f09ff3c711d505d99843833e52
|
7
|
+
data.tar.gz: 72e74eabb7ac71c16fc48d8b094bc546096f1e685977d1a991903158562d5572cdf1cdedd872c15cd33b8831033599f40da1564e7aeea5d64ecff82272c8763b
|
data/.gitignore
ADDED
data/.rspec
ADDED
data/.travis.yml
ADDED
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2015 Nauktis
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,38 @@
|
|
1
|
+
# NauktisUtils
|
2
|
+
|
3
|
+
This gem contains a collection of utility classes and executables.
|
4
|
+
|
5
|
+
## Installation
|
6
|
+
|
7
|
+
Add this line to your application's Gemfile:
|
8
|
+
|
9
|
+
```ruby
|
10
|
+
gem 'nauktis_utils'
|
11
|
+
```
|
12
|
+
|
13
|
+
And then execute:
|
14
|
+
|
15
|
+
$ bundle
|
16
|
+
|
17
|
+
Or install it yourself as:
|
18
|
+
|
19
|
+
$ gem install nauktis_utils
|
20
|
+
|
21
|
+
## Usage
|
22
|
+
|
23
|
+
TODO: Write usage instructions here
|
24
|
+
|
25
|
+
## Development
|
26
|
+
|
27
|
+
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
28
|
+
|
29
|
+
To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
|
30
|
+
|
31
|
+
## Contributing
|
32
|
+
|
33
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/Nauktis/nauktis_utils.
|
34
|
+
|
35
|
+
|
36
|
+
## License
|
37
|
+
|
38
|
+
The gem is available as open source under the terms of the [MIT License](http://opensource.org/licenses/MIT).
|
data/Rakefile
ADDED
data/bin/console
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require "bundler/setup"
|
4
|
+
require "nauktis_utils"
|
5
|
+
|
6
|
+
# You can add fixtures and/or initialization code here to make experimenting
|
7
|
+
# with your gem easier. You can also use a different console, if you like.
|
8
|
+
|
9
|
+
# (If you use this, don't forget to add pry to your Gemfile!)
|
10
|
+
# require "pry"
|
11
|
+
# Pry.start
|
12
|
+
|
13
|
+
require "irb"
|
14
|
+
IRB.start
|
data/bin/setup
ADDED
data/exe/datify
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'nauktis_utils'
|
4
|
+
|
5
|
+
puts "Datify tool."
|
6
|
+
raise "First argument must be a file or a directory path." unless ARGV.size == 1
|
7
|
+
date_extractor = NauktisUtils::DateExtractor.new
|
8
|
+
if File.directory?(ARGV[0])
|
9
|
+
NauktisUtils::FileBrowser.each_file(ARGV[0]) do |entry|
|
10
|
+
begin
|
11
|
+
date_extractor.rename(entry)
|
12
|
+
rescue StandardError => e
|
13
|
+
puts "Error: #{e}"
|
14
|
+
end
|
15
|
+
end
|
16
|
+
puts "Processed #{date_extractor.counters[:total]} files"
|
17
|
+
else
|
18
|
+
date_extractor.process(ARGV[0])
|
19
|
+
end
|
data/exe/duplicates
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'nauktis_utils'
|
4
|
+
|
5
|
+
puts "Duplicates finder tool."
|
6
|
+
raise "Provide directory paths to the command line." unless ARGV.size > 0
|
7
|
+
|
8
|
+
directories = []
|
9
|
+
ARGV.each do |dir|
|
10
|
+
directories << NauktisUtils::FileBrowser.ensure_valid_directory(dir)
|
11
|
+
end
|
12
|
+
|
13
|
+
handling_strategy = NauktisUtils::Duplicate::HandlingStrategy::Analyse.new
|
14
|
+
puts "Finding duplicates in #{directories.join(', ')}"
|
15
|
+
|
16
|
+
NauktisUtils::Duplicate.new(handling_strategy).clean(directories)
|
17
|
+
puts "Analysis done."
|
18
|
+
puts "#{handling_strategy.counters[:pairs].to_s(:delimited)} pairs of duplicates found."
|
19
|
+
puts "#{handling_strategy.counters[:duplicates].to_s(:delimited)} files are duplicates and could be deleted."
|
20
|
+
puts "#{handling_strategy.counters[:size].to_s(:human_size)} of space could be save."
|
data/exe/rm_duplicates
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'nauktis_utils'
|
4
|
+
|
5
|
+
puts "Duplicates remover tool."
|
6
|
+
raise "First argument must be a directory path." unless ARGV.size == 1
|
7
|
+
directory = NauktisUtils::FileBrowser.ensure_valid_directory(ARGV[0])
|
8
|
+
|
9
|
+
deleting_strategy = NauktisUtils::Duplicate::DeletingStrategy::Simple.new
|
10
|
+
handling_strategy = NauktisUtils::Duplicate::HandlingStrategy::KeepOne.new(deleting_strategy)
|
11
|
+
puts "Keeping one original for all duplicates in #{directory}"
|
12
|
+
|
13
|
+
NauktisUtils::Duplicate.new(handling_strategy).clean([directory])
|
@@ -0,0 +1,14 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'nauktis_utils'
|
4
|
+
|
5
|
+
puts "Duplicates remover tool."
|
6
|
+
raise "Two first arguments must be directory paths." unless ARGV.size == 2
|
7
|
+
directory_delete = NauktisUtils::FileBrowser.ensure_valid_directory(ARGV[0])
|
8
|
+
directory_keep = NauktisUtils::FileBrowser.ensure_valid_directory(ARGV[1])
|
9
|
+
|
10
|
+
deleting_strategy = NauktisUtils::Duplicate::DeletingStrategy::Simple.new
|
11
|
+
handling_strategy = NauktisUtils::Duplicate::HandlingStrategy::NoDeleteIn.new(deleting_strategy, [directory_keep])
|
12
|
+
puts "Deleting duplicates in #{directory_delete} if present in #{directory_keep}"
|
13
|
+
|
14
|
+
NauktisUtils::Duplicate.new(handling_strategy).clean([directory_delete, directory_keep])
|
data/exe/rm_if_in
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require 'nauktis_utils'
|
3
|
+
require 'optparse'
|
4
|
+
|
5
|
+
options = {}
|
6
|
+
OptionParser.new do |opts|
|
7
|
+
opts.banner = "Usage: example.rb [options]"
|
8
|
+
|
9
|
+
opts.on("-v", "--[no-]verbose", "Run verbosely") do |v|
|
10
|
+
options[:verbose] = v
|
11
|
+
end
|
12
|
+
|
13
|
+
opts.on("-a", "--algorithms ALGORITHM", "Adds ALGORITHM to the list of algorithms to use.") do |al|
|
14
|
+
options[:algorithms] ||= []
|
15
|
+
options[:algorithms] << al
|
16
|
+
end
|
17
|
+
end.parse!
|
18
|
+
|
19
|
+
p options
|
20
|
+
p ARGV
|
@@ -0,0 +1,24 @@
|
|
1
|
+
require 'date'
|
2
|
+
require 'fileutils'
|
3
|
+
require 'find'
|
4
|
+
require 'logger'
|
5
|
+
require 'openssl'
|
6
|
+
require 'tmpdir'
|
7
|
+
|
8
|
+
require 'active_support'
|
9
|
+
require 'active_support/core_ext/numeric'
|
10
|
+
require 'json'
|
11
|
+
require 'sha3'
|
12
|
+
|
13
|
+
require "nauktis_utils/version"
|
14
|
+
require "nauktis_utils/logging"
|
15
|
+
require "nauktis_utils/file_browser"
|
16
|
+
require "nauktis_utils/file_digester"
|
17
|
+
require "nauktis_utils/tracer"
|
18
|
+
require "nauktis_utils/duplicate"
|
19
|
+
require "nauktis_utils/archiver"
|
20
|
+
require "nauktis_utils/date_extractor"
|
21
|
+
|
22
|
+
module NauktisUtils
|
23
|
+
|
24
|
+
end
|
@@ -0,0 +1,153 @@
|
|
1
|
+
module NauktisUtils
|
2
|
+
# Wrapper around TAR
|
3
|
+
class Archiver
|
4
|
+
include Logging
|
5
|
+
attr_reader :options
|
6
|
+
|
7
|
+
def initialize(&block)
|
8
|
+
@options = {
|
9
|
+
paths: [],
|
10
|
+
}
|
11
|
+
if block_given?
|
12
|
+
instance_eval(&block)
|
13
|
+
tar
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
def tar
|
18
|
+
Tracer.info "Creating archive for #{@options[:paths]}"
|
19
|
+
raise "TAR is not available" unless command_available?('tar')
|
20
|
+
raise "Only one file archiving is supported for now" unless @options[:paths].size == 1
|
21
|
+
source_path = File.expand_path(@options[:paths].first)
|
22
|
+
raise "#{source_path} doesn't exist" unless File.exist?(source_path)
|
23
|
+
|
24
|
+
destination_path = FileBrowser.ensure_valid_directory(@options[:destination])
|
25
|
+
@options[:name] = "#{Time.now.strftime('%Y-%m-%d')}_#{File.basename(source_path)}" if @options[:name].nil?
|
26
|
+
@options[:tar_file] = File.join(destination_path, "#{@options[:name]}#{extension}")
|
27
|
+
r = nil
|
28
|
+
Dir.chdir(File.dirname(source_path)) do
|
29
|
+
r = execute_command("tar #{tar_options.join(' ')} -cf \"#{@options[:tar_file]}\" \"#{File.basename(source_path)}\"")
|
30
|
+
end
|
31
|
+
raise "TAR returned an error" unless r
|
32
|
+
raise "TAR was not created" unless File.exist?(@options[:tar_file])
|
33
|
+
Tracer.debug "#{@options[:tar_file]} created"
|
34
|
+
|
35
|
+
# Check the tar structure.
|
36
|
+
if @options[:check_structure] or @options[:paranoid]
|
37
|
+
Tracer.debug "Checking TAR structure"
|
38
|
+
r = execute_command("tar #{tar_options.join(' ')} -tf \"#{@options[:tar_file]}\" >/dev/null")
|
39
|
+
raise "TAR structure is not correct" unless r
|
40
|
+
end
|
41
|
+
|
42
|
+
if @options[:paranoid]
|
43
|
+
Tracer.debug "Checking TAR content"
|
44
|
+
Dir.mktmpdir do |dir|
|
45
|
+
temp_dir = File.expand_path(dir)
|
46
|
+
Dir.chdir(temp_dir) do
|
47
|
+
r = execute_command("tar #{tar_options.join(' ')} -xf \"#{@options[:tar_file]}\"")
|
48
|
+
end
|
49
|
+
raise "Error while untaring the archive" unless r
|
50
|
+
r = compare(source_path, File.join(temp_dir, File.basename(source_path)))
|
51
|
+
raise "Content doesn't match" unless r
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
if @options[:generate_hash]
|
56
|
+
Utils::FileDigester.generate_digest_file(@options[:tar_file])
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
def add(filename)
|
61
|
+
@options[:paths] << File.expand_path(filename)
|
62
|
+
end
|
63
|
+
|
64
|
+
# Sets the name of the archive.
|
65
|
+
def name(filename)
|
66
|
+
@options[:name] = filename
|
67
|
+
end
|
68
|
+
|
69
|
+
# Sets the destination folder for the archive.
|
70
|
+
def destination(filename)
|
71
|
+
@options[:destination] = filename
|
72
|
+
end
|
73
|
+
|
74
|
+
def gzip
|
75
|
+
@options[:compression] = :gzip
|
76
|
+
end
|
77
|
+
|
78
|
+
def bzip2
|
79
|
+
@options[:compression] = :bzip2
|
80
|
+
end
|
81
|
+
|
82
|
+
def verbose
|
83
|
+
@options[:verbose] = true
|
84
|
+
end
|
85
|
+
|
86
|
+
def clever_exclude
|
87
|
+
# Exclude .DS_Store & .dropbox
|
88
|
+
end
|
89
|
+
|
90
|
+
def generate_hash
|
91
|
+
@options[:generate_hash] = true
|
92
|
+
end
|
93
|
+
|
94
|
+
# Untar the archive after creation to make sure everything is there.
|
95
|
+
def paranoid
|
96
|
+
@options[:paranoid] = true
|
97
|
+
end
|
98
|
+
|
99
|
+
# Checks the tar structure after creating the archive.
|
100
|
+
def check_structure
|
101
|
+
@options[:check_structure] = true
|
102
|
+
end
|
103
|
+
|
104
|
+
private
|
105
|
+
|
106
|
+
def command_available?(cmd)
|
107
|
+
system("which #{cmd} >/dev/null")
|
108
|
+
end
|
109
|
+
|
110
|
+
def tar_options
|
111
|
+
s = []
|
112
|
+
s << '-v' if @options[:verbose]
|
113
|
+
s << '-z' if @options[:compression] == :gzip
|
114
|
+
s << '-j' if @options[:compression] == :bzip2
|
115
|
+
s
|
116
|
+
end
|
117
|
+
|
118
|
+
def extension
|
119
|
+
s = '.tar'
|
120
|
+
s += '.gz' if @options[:compression] == :gzip
|
121
|
+
s += '.bz2' if @options[:compression] == :bzip2
|
122
|
+
s
|
123
|
+
end
|
124
|
+
|
125
|
+
def execute_command(cmd)
|
126
|
+
Tracer.debug("Executing: #{cmd}")
|
127
|
+
Kernel.system(cmd)
|
128
|
+
end
|
129
|
+
|
130
|
+
def compare(original, copy)
|
131
|
+
a = File.expand_path(original)
|
132
|
+
b = File.expand_path(copy)
|
133
|
+
raise "Original file #{original} doesn't exist" unless File.exist?(a)
|
134
|
+
if File.directory?(a)
|
135
|
+
Dir.chdir(a) do
|
136
|
+
Dir.glob('**/*', File::FNM_DOTMATCH) do |f|
|
137
|
+
if File.exist?(f) and not File.directory?(f)
|
138
|
+
a_file = File.expand_path(File.join(a, f))
|
139
|
+
b_file = File.expand_path(File.join(b, f))
|
140
|
+
logger.debug("Comparing: #{a_file}, #{b_file}")
|
141
|
+
return false unless File.exist?(b_file) and not File.directory?(b_file)
|
142
|
+
return false unless Utils::FileDigester.digest(a_file) == Utils::FileDigester.digest(b_file)
|
143
|
+
end
|
144
|
+
end
|
145
|
+
end
|
146
|
+
else
|
147
|
+
return false unless File.exist?(b) and not File.directory?(b)
|
148
|
+
return false unless Utils::FileDigester.digest(a) == Utils::FileDigester.digest(b)
|
149
|
+
end
|
150
|
+
return true
|
151
|
+
end
|
152
|
+
end
|
153
|
+
end
|
@@ -0,0 +1,53 @@
|
|
1
|
+
module NauktisUtils
|
2
|
+
class DateExtractor
|
3
|
+
attr_reader :counters
|
4
|
+
DATETIME_FORMAT = "%Y-%m-%d_%H-%M-%S_"
|
5
|
+
DATETIME_REGEX = /^\d{4}-\d{2}-\d{2}_\d{2}-\d{2}-\d{2}/
|
6
|
+
|
7
|
+
def initialize
|
8
|
+
@counters = Hash.new(0)
|
9
|
+
end
|
10
|
+
|
11
|
+
def extract_datetime(file_path)
|
12
|
+
meta = exiftool(FileBrowser.ensure_valid_file(file_path))
|
13
|
+
['DateTimeOriginal', 'MediaCreateDate'].each do |tag|
|
14
|
+
return DateTime.parse(meta[tag]) if meta[tag]
|
15
|
+
end
|
16
|
+
Tracer.warn "Could not extract date from #{file_path}"
|
17
|
+
nil
|
18
|
+
end
|
19
|
+
|
20
|
+
def rename(file_path)
|
21
|
+
file_path = FileBrowser.ensure_valid_file(file_path)
|
22
|
+
@counters[:total] += 1
|
23
|
+
unless File.basename(file_path) =~ DATETIME_REGEX
|
24
|
+
prepend_date(file_path)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
private
|
29
|
+
|
30
|
+
def prepend_date(file_path)
|
31
|
+
datetime = extract_datetime(file_path)
|
32
|
+
unless datetime.nil?
|
33
|
+
prefix = datetime.strftime(DATETIME_FORMAT)
|
34
|
+
current_name = File.basename(file_path)
|
35
|
+
unless current_name.start_with?(prefix)
|
36
|
+
prefixed_name = File.join(File.dirname(file_path), prefix + current_name)
|
37
|
+
if File.exist?(prefixed_name)
|
38
|
+
Tracer.warn "Cannot rename #{current_name}, #{prefixed_name} already exists."
|
39
|
+
else
|
40
|
+
File.rename(file_path, prefixed_name)
|
41
|
+
Tracer.info "Renamed #{current_name} to #{File.basename(prefixed_name)}."
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
def exiftool(file_path)
|
48
|
+
raise "File #{file_path} does not exist" unless File.exist?(file_path)
|
49
|
+
result = %x(exiftool -u -d "%Y-%m-%d %H:%M:%S" -json "#{file_path}")
|
50
|
+
JSON.parse(result)[0]
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
@@ -0,0 +1,195 @@
|
|
1
|
+
module NauktisUtils
|
2
|
+
# Class to find and handle duplicate files.
|
3
|
+
class Duplicate
|
4
|
+
include Logging
|
5
|
+
|
6
|
+
def self.algorithm(name)
|
7
|
+
key = name.to_sym
|
8
|
+
@@algorithms ||= {}
|
9
|
+
unless @@algorithms.has_key? key
|
10
|
+
@@algorithms[:name] = proc { |file| File.basename(file).downcase }
|
11
|
+
@@algorithms[:size] = proc { |file| File.size(file) }
|
12
|
+
@@algorithms[:md5] = proc { |file| FileDigester.digest(file, :md5) }
|
13
|
+
@@algorithms[:sha1] = proc { |file| FileDigester.digest(file, :sha1) }
|
14
|
+
@@algorithms[:sha3] = proc { |file| FileDigester.digest(file, :sha3) }
|
15
|
+
end
|
16
|
+
@@algorithms.fetch key
|
17
|
+
end
|
18
|
+
|
19
|
+
# ========================================
|
20
|
+
# Handling Strategies
|
21
|
+
# ========================================
|
22
|
+
class HandlingStrategy
|
23
|
+
class BaseHandlingStrategy
|
24
|
+
attr_accessor :deleting_strategy
|
25
|
+
def initialize(deleting_strategy)
|
26
|
+
@deleting_strategy = deleting_strategy
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
class KeepOne < BaseHandlingStrategy
|
31
|
+
def handle(files)
|
32
|
+
files = files.sort
|
33
|
+
file_kept = files.shift
|
34
|
+
files.each do |duplicate|
|
35
|
+
@deleting_strategy.delete_duplicate(duplicate, file_kept)
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
class OriginalFrom < KeepOne
|
41
|
+
def initialize(deleting_strategy, directory)
|
42
|
+
super(deleting_strategy)
|
43
|
+
@directory = File.expand_path(directory)
|
44
|
+
end
|
45
|
+
|
46
|
+
def handle(files)
|
47
|
+
files = files.sort
|
48
|
+
i = files.find_index do |f|
|
49
|
+
f.start_with?(@directory)
|
50
|
+
end
|
51
|
+
unless i.nil?
|
52
|
+
file_kept = files.delete_at(i)
|
53
|
+
files.each do |duplicate|
|
54
|
+
@deleting_strategy.delete_duplicate(duplicate, file_kept)
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
class NoDeleteIn < BaseHandlingStrategy
|
61
|
+
def initialize(deleting_strategy, directories)
|
62
|
+
super(deleting_strategy)
|
63
|
+
@directories = directories.map { |d| File.expand_path(d) }
|
64
|
+
end
|
65
|
+
|
66
|
+
def handle(files)
|
67
|
+
files = files.sort
|
68
|
+
files_kept, files_deleted = files.partition do |e|
|
69
|
+
@directories.any? {|d| e.start_with?(d) }
|
70
|
+
end
|
71
|
+
if files_kept.size > 0
|
72
|
+
files_deleted.each do |duplicate|
|
73
|
+
@deleting_strategy.delete_duplicate(duplicate, files_kept.first)
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
class Analyse
|
80
|
+
attr_reader :counters
|
81
|
+
def initialize
|
82
|
+
@counters = Hash.new(0)
|
83
|
+
end
|
84
|
+
|
85
|
+
def handle(files)
|
86
|
+
@counters[:pairs] += 1
|
87
|
+
@counters[:duplicates] += (files.size - 1)
|
88
|
+
@counters[:size] += ((files.size - 1) * File.size(files.first))
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
class OnlyDeleteIn < BaseHandlingStrategy
|
93
|
+
# TODO
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
# ========================================
|
98
|
+
# Deleting Strategies
|
99
|
+
# ========================================
|
100
|
+
class DeletingStrategy
|
101
|
+
class BaseDeletingStrategy
|
102
|
+
include Logging
|
103
|
+
end
|
104
|
+
|
105
|
+
class Simulate < BaseDeletingStrategy
|
106
|
+
def delete_duplicate(duplicate, original)
|
107
|
+
logger.info "#{duplicate} duplicate of #{original}"
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
111
|
+
class Simple < BaseDeletingStrategy
|
112
|
+
def delete_duplicate(duplicate, original)
|
113
|
+
logger.info "Deleting #{duplicate}, duplicate of #{original}"
|
114
|
+
File.delete(duplicate)
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
class Safe < Simple
|
119
|
+
def delete_duplicate(duplicate, original)
|
120
|
+
if FileUtils.compare(duplicate, original)
|
121
|
+
super
|
122
|
+
else
|
123
|
+
logger.warn "Duplicate #{duplicate} was a false positive with #{original}"
|
124
|
+
end
|
125
|
+
end
|
126
|
+
end
|
127
|
+
end
|
128
|
+
|
129
|
+
# ========================================
|
130
|
+
attr_accessor :handling_strategy
|
131
|
+
|
132
|
+
def initialize(handling_strategy)
|
133
|
+
@handling_strategy = handling_strategy
|
134
|
+
end
|
135
|
+
|
136
|
+
def clean(directories)
|
137
|
+
logger.info "Searching duplicates in #{directories}"
|
138
|
+
directories.map! { |d| File.expand_path(d) }
|
139
|
+
files = files_in(directories)
|
140
|
+
logger.info "Number of files: #{files.size.to_s(:delimited)}"
|
141
|
+
size_before = size_of(directories)
|
142
|
+
logger.info "Total size: #{size_before.to_s(:human_size)}"
|
143
|
+
|
144
|
+
@groupings = [self.class.algorithm(:size), self.class.algorithm(:md5), self.class.algorithm(:sha3)]
|
145
|
+
multi_group_by(files, 0)
|
146
|
+
|
147
|
+
size_after = size_of(directories)
|
148
|
+
logger.info "Total size: #{size_after.to_s(:human_size)}"
|
149
|
+
reduction_ratio = (100 * (size_before - size_after) / size_before.to_f).round(2)
|
150
|
+
logger.info "Size reduced by #{reduction_ratio}% (#{size_after.to_s(:delimited)}/#{size_before.to_s(:delimited)})"
|
151
|
+
end
|
152
|
+
|
153
|
+
private
|
154
|
+
|
155
|
+
def multi_group_by(files, index)
|
156
|
+
if index >= @groupings.size
|
157
|
+
handle_duplicates(files)
|
158
|
+
else
|
159
|
+
files.group_by(&@groupings[index]).values.each do |sub|
|
160
|
+
multi_group_by(sub, index + 1) if sub.size > 1
|
161
|
+
end
|
162
|
+
end
|
163
|
+
end
|
164
|
+
|
165
|
+
def handle_duplicates(duplicates)
|
166
|
+
# For extra safety we check a file doesn't appear twice.
|
167
|
+
unless duplicates.uniq == duplicates
|
168
|
+
s = "A file appears twice: #{duplicates}"
|
169
|
+
logger.error s
|
170
|
+
raise s
|
171
|
+
end
|
172
|
+
handling_strategy.handle(duplicates)
|
173
|
+
end
|
174
|
+
|
175
|
+
# Returns the list of files in the directories provided
|
176
|
+
def files_in(directories)
|
177
|
+
files = []
|
178
|
+
Find.find(*directories) do |path|
|
179
|
+
unless File.directory?(path) or File.symlink?(path)
|
180
|
+
files << File.expand_path(path)
|
181
|
+
end
|
182
|
+
end
|
183
|
+
files.uniq
|
184
|
+
end
|
185
|
+
|
186
|
+
# Returns the total size of the directories provided
|
187
|
+
def size_of(directories)
|
188
|
+
size = 0
|
189
|
+
files_in(directories).each do |f|
|
190
|
+
size += File.size(f)
|
191
|
+
end
|
192
|
+
size
|
193
|
+
end
|
194
|
+
end
|
195
|
+
end
|
@@ -0,0 +1,94 @@
|
|
1
|
+
module NauktisUtils
|
2
|
+
# Provide some utility methods for file handling.
|
3
|
+
module FileBrowser
|
4
|
+
# Returns true if the file provided is a valid (i.e. existing) file.
|
5
|
+
def self.valid_file?(filename)
|
6
|
+
full_path = File.expand_path(filename)
|
7
|
+
File.exist?(full_path) and not File.directory?(full_path)
|
8
|
+
end
|
9
|
+
|
10
|
+
# Returns true if the file provided is a valid (i.e. existing) directory.
|
11
|
+
def self.valid_directory?(directory)
|
12
|
+
full_path = File.expand_path(directory)
|
13
|
+
File.exist?(full_path) and File.directory?(full_path)
|
14
|
+
end
|
15
|
+
|
16
|
+
# Raises an exception if the path provided is not an existing file.
|
17
|
+
# Returns the expanded path of the file
|
18
|
+
def self.ensure_valid_file(filename)
|
19
|
+
raise "#{filename} is not a valid file." unless self.valid_file?(filename)
|
20
|
+
File.expand_path(filename)
|
21
|
+
end
|
22
|
+
|
23
|
+
# Raises an exception if the path provided is not an existing directory.
|
24
|
+
# Returns the expanded path of the directory
|
25
|
+
def self.ensure_valid_directory(directory)
|
26
|
+
raise "#{directory} is not a valid directory." unless self.valid_directory?(directory)
|
27
|
+
File.expand_path(directory)
|
28
|
+
end
|
29
|
+
|
30
|
+
# Returns true if the string provided contains characters that will be interpreted in a glob operation.
|
31
|
+
def self.contains_glob_character?(path)
|
32
|
+
full_path = File.expand_path(path)
|
33
|
+
['*', '?', '[', '{'].each do |s|
|
34
|
+
return true if full_path.include?(s)
|
35
|
+
end
|
36
|
+
return false
|
37
|
+
end
|
38
|
+
|
39
|
+
# Recursively goes through all the files contained in a directory.
|
40
|
+
def self.each_file(directory)
|
41
|
+
raise "Can't use glob on #{directory_path}, dangerous character #{s}" if contains_glob_character?(directory)
|
42
|
+
Dir.glob(File.join(File.expand_path(directory), '**', '*'), File::FNM_DOTMATCH).each do |entry|
|
43
|
+
next if File.directory?(entry)
|
44
|
+
yield(File.expand_path(entry))
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
# Copy a file to destination appending a number if the file already exists at destination.
|
49
|
+
def self.copy_file(file, destination_folder)
|
50
|
+
destination_folder = self.ensure_valid_directory(destination_folder)
|
51
|
+
file_path = self.ensure_valid_file(file)
|
52
|
+
|
53
|
+
file_ext = File.extname(file_path)
|
54
|
+
file_basename = File.basename(file_path)
|
55
|
+
file_base = File.basename(file_path, file_ext)
|
56
|
+
final_file = File.join(destination_folder, file_basename)
|
57
|
+
i = 0
|
58
|
+
while File.exist?(final_file) do
|
59
|
+
i += 1
|
60
|
+
final_file = File.join(destination_folder, "#{file_base}#{i}#{file_ext}")
|
61
|
+
end
|
62
|
+
FileUtils.cp(file_path, final_file)
|
63
|
+
end
|
64
|
+
|
65
|
+
# Deletes all the .DS_Store
|
66
|
+
def self.delete_ds_store(directory)
|
67
|
+
%x(find #{File.expand_path(directory)} -name \.DS_Store -exec rm {} \;)
|
68
|
+
end
|
69
|
+
|
70
|
+
# Recursively remove all empty directories
|
71
|
+
def self.delete_empty_directories(directory)
|
72
|
+
%x(find #{File.expand_path(directory)} -type d -empty -delete)
|
73
|
+
end
|
74
|
+
|
75
|
+
# Only keeps alpha numeric characters in a String. Also replaces spaces by underscores.
|
76
|
+
def self.sanitize_name(name)
|
77
|
+
sanitized = name.strip
|
78
|
+
sanitized.gsub!(/[^\w\s\-\.]+/, '')
|
79
|
+
sanitized.gsub!(/[[:space:]]+/, '_')
|
80
|
+
sanitized
|
81
|
+
end
|
82
|
+
|
83
|
+
def self.sanitize_filename(filename)
|
84
|
+
name = File.basename(filename, File.extname(filename))
|
85
|
+
name = self.sanitize_name(name)
|
86
|
+
dirname = File.dirname(filename)
|
87
|
+
if dirname != '.'
|
88
|
+
File.join(dirname, "#{name}#{File.extname(filename).downcase}")
|
89
|
+
else
|
90
|
+
"#{name}#{File.extname(filename).downcase}"
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
module NauktisUtils
|
2
|
+
module FileDigester
|
3
|
+
ALGORITHMS = [:md5, :sha1, :sha256, :sha512, :sha3]
|
4
|
+
|
5
|
+
# Returns the hexdigest of the file provided.
|
6
|
+
def self.digest(filename, algorithm = :sha1)
|
7
|
+
full_path = FileBrowser.ensure_valid_file(filename)
|
8
|
+
raise "Unknown algorithm #{algorithm}, use #{ALGORITHMS}" unless ALGORITHMS.include?(algorithm.to_sym)
|
9
|
+
if algorithm.to_sym == :sha3
|
10
|
+
SHA3::Digest.file(full_path).hexdigest
|
11
|
+
else
|
12
|
+
OpenSSL::Digest.new(algorithm.to_s).file(full_path).hexdigest
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
# Generates a file next to the file provided containing its digest
|
17
|
+
def self.generate_digest_file(filename, algorithm = :sha1)
|
18
|
+
digest = self.digest(filename, algorithm)
|
19
|
+
File.write("#{File.expand_path(filename)}.#{algorithm}", digest)
|
20
|
+
end
|
21
|
+
|
22
|
+
# Checks the digest files next to the file provided.
|
23
|
+
# Returns true if all the digest files contain proper digest
|
24
|
+
def self.digest_file_valid?(filename)
|
25
|
+
full_path = FileBrowser.ensure_valid_file(filename)
|
26
|
+
valid = true
|
27
|
+
ALGORITHMS.each do |algorithm|
|
28
|
+
digest_file = "#{full_path}.#{algorithm}"
|
29
|
+
if FileBrowser.valid_file?(digest_file)
|
30
|
+
unless self.digest(filename, algorithm) == File.read(digest_file)
|
31
|
+
valid = false
|
32
|
+
break
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
valid
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
module NauktisUtils
|
2
|
+
# Logger module that can be included in classes
|
3
|
+
module Logging
|
4
|
+
# Method making the logger mixed where needed
|
5
|
+
def logger
|
6
|
+
Logging.logger
|
7
|
+
end
|
8
|
+
|
9
|
+
# Global, memoized, lazy initialized instance of a logger
|
10
|
+
def self.logger
|
11
|
+
@logger ||= Logger.new(STDOUT)
|
12
|
+
end
|
13
|
+
|
14
|
+
def self.logger=(new_logger)
|
15
|
+
@logger = new_logger
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
module NauktisUtils
|
2
|
+
class Tracer
|
3
|
+
def self.debug(message)
|
4
|
+
log(Logger::DEBUG, message)
|
5
|
+
end
|
6
|
+
|
7
|
+
def self.info(message)
|
8
|
+
log(Logger::INFO, message)
|
9
|
+
end
|
10
|
+
|
11
|
+
def self.warn(message)
|
12
|
+
log(Logger::WARN, message)
|
13
|
+
end
|
14
|
+
|
15
|
+
def self.error(message)
|
16
|
+
log(Logger::ERROR, message)
|
17
|
+
end
|
18
|
+
|
19
|
+
def self.fatal(message)
|
20
|
+
log(Logger::FATAL, message)
|
21
|
+
end
|
22
|
+
|
23
|
+
def self.log(severity, message)
|
24
|
+
Logging.logger.add(severity, message, caller[1])
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'nauktis_utils/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "nauktis_utils"
|
8
|
+
spec.version = NauktisUtils::VERSION
|
9
|
+
spec.authors = ['Nauktis']
|
10
|
+
spec.email = ['']
|
11
|
+
|
12
|
+
spec.summary = %q{Various ruby utility classes.}
|
13
|
+
spec.description = %q{Various ruby utility classes and tools.}
|
14
|
+
spec.homepage = 'https://github.com/Nauktis/nauktis_utils'
|
15
|
+
spec.license = "MIT"
|
16
|
+
|
17
|
+
spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
18
|
+
spec.bindir = "exe"
|
19
|
+
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
20
|
+
spec.require_paths = ["lib"]
|
21
|
+
|
22
|
+
spec.add_dependency 'activesupport'
|
23
|
+
spec.add_dependency 'json'
|
24
|
+
spec.add_dependency 'sha3'
|
25
|
+
|
26
|
+
spec.add_development_dependency "bundler", "~> 1.10"
|
27
|
+
spec.add_development_dependency "rake", "~> 10.0"
|
28
|
+
spec.add_development_dependency "rspec"
|
29
|
+
spec.add_development_dependency "tmpdir"
|
30
|
+
end
|
metadata
ADDED
@@ -0,0 +1,171 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: nauktis_utils
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Nauktis
|
8
|
+
autorequire:
|
9
|
+
bindir: exe
|
10
|
+
cert_chain: []
|
11
|
+
date: 2016-04-17 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: activesupport
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: json
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: sha3
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: bundler
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - "~>"
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '1.10'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - "~>"
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '1.10'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: rake
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - "~>"
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '10.0'
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - "~>"
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '10.0'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: rspec
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - ">="
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '0'
|
90
|
+
type: :development
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - ">="
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '0'
|
97
|
+
- !ruby/object:Gem::Dependency
|
98
|
+
name: tmpdir
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
100
|
+
requirements:
|
101
|
+
- - ">="
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: '0'
|
104
|
+
type: :development
|
105
|
+
prerelease: false
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - ">="
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: '0'
|
111
|
+
description: Various ruby utility classes and tools.
|
112
|
+
email:
|
113
|
+
- ''
|
114
|
+
executables:
|
115
|
+
- datify
|
116
|
+
- duplicates
|
117
|
+
- rm_duplicates
|
118
|
+
- rm_duplicates_in_if_in
|
119
|
+
- rm_if_in
|
120
|
+
extensions: []
|
121
|
+
extra_rdoc_files: []
|
122
|
+
files:
|
123
|
+
- ".gitignore"
|
124
|
+
- ".rspec"
|
125
|
+
- ".travis.yml"
|
126
|
+
- Gemfile
|
127
|
+
- LICENSE.txt
|
128
|
+
- README.md
|
129
|
+
- Rakefile
|
130
|
+
- bin/console
|
131
|
+
- bin/setup
|
132
|
+
- exe/datify
|
133
|
+
- exe/duplicates
|
134
|
+
- exe/rm_duplicates
|
135
|
+
- exe/rm_duplicates_in_if_in
|
136
|
+
- exe/rm_if_in
|
137
|
+
- lib/nauktis_utils.rb
|
138
|
+
- lib/nauktis_utils/archiver.rb
|
139
|
+
- lib/nauktis_utils/date_extractor.rb
|
140
|
+
- lib/nauktis_utils/duplicate.rb
|
141
|
+
- lib/nauktis_utils/file_browser.rb
|
142
|
+
- lib/nauktis_utils/file_digester.rb
|
143
|
+
- lib/nauktis_utils/logging.rb
|
144
|
+
- lib/nauktis_utils/tracer.rb
|
145
|
+
- lib/nauktis_utils/version.rb
|
146
|
+
- nauktis_utils.gemspec
|
147
|
+
homepage: https://github.com/Nauktis/nauktis_utils
|
148
|
+
licenses:
|
149
|
+
- MIT
|
150
|
+
metadata: {}
|
151
|
+
post_install_message:
|
152
|
+
rdoc_options: []
|
153
|
+
require_paths:
|
154
|
+
- lib
|
155
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
156
|
+
requirements:
|
157
|
+
- - ">="
|
158
|
+
- !ruby/object:Gem::Version
|
159
|
+
version: '0'
|
160
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
161
|
+
requirements:
|
162
|
+
- - ">="
|
163
|
+
- !ruby/object:Gem::Version
|
164
|
+
version: '0'
|
165
|
+
requirements: []
|
166
|
+
rubyforge_project:
|
167
|
+
rubygems_version: 2.5.2
|
168
|
+
signing_key:
|
169
|
+
specification_version: 4
|
170
|
+
summary: Various ruby utility classes.
|
171
|
+
test_files: []
|