storazzo 0.3.8 → 0.4.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile +4 -2
- data/Makefile +14 -1
- data/README.md +18 -0
- data/Rakefile +33 -3
- data/VERSION +1 -1
- data/bin/ricdisk-magic +0 -1
- data/bin/stats-with-md5 +394 -0
- data/lib/storazzo/common.rb +28 -5
- data/lib/storazzo/media/abstract_ric_disk.rb +34 -7
- data/lib/storazzo/media/gcs_bucket.rb +4 -2
- data/lib/storazzo/media/local_folder.rb +8 -10
- data/lib/storazzo/media/mount_point.rb +5 -0
- data/lib/storazzo/ric_disk.rb +97 -37
- data/lib/storazzo/ric_disk_config.rb +52 -38
- data/lib/storazzo/ric_disk_sample_config.rb +2 -6
- data/lib/storazzo/ric_disk_statsfile.rb +0 -1
- data/lib/storazzo/ric_disk_ugly.rb +245 -245
- data/lib/storazzo.rb +10 -5
- data/storazzo.gemspec +5 -3
- data/test/media/test_abstract_ric_disk.rb +3 -0
- data/test/{test_gcs_bucket.rb → media/test_gcs_bucket.rb} +21 -22
- data/test/{test_local_folder.rb → media/test_local_folder.rb} +68 -12
- data/test/media/test_mount_point.rb +26 -0
- data/test/test_ric_disk.rb +19 -0
- data/test/test_ric_disk_config.rb +1 -8
- data/var/dumps/file_stat.linux.yaml +15 -0
- data/var/dumps/file_stat.macosx.yaml +15 -0
- data/var/test/disks/disk02-full/Rakefile +13 -0
- metadata +32 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 474eb4be081a7edac52377a5269a24e7c826ed0afbf200ac6b315a220bece1f4
|
4
|
+
data.tar.gz: 9d6bbb34885de5bf25ef075dd4bcce0e2a0aaba6714d5a34bee1b220096be140
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 37ce7a58b0ab2f36dcc681579bb55dfa147c195d37016d1fff9ac5ea390fa469d363e2a0df70a614d0c7acf97fd5db9c6d9eb9275f2926155910d9d8e15d9d6f
|
7
|
+
data.tar.gz: 0d6f2863ee75c3a1e653100c7904371028f0838d627c83a286d4bdd0ccfdc699190f4a331c942b6bb4834f4218e50a37853c73827c69d69868294f71dd1172f6
|
data/Gemfile
CHANGED
@@ -1,6 +1,8 @@
|
|
1
|
-
|
2
|
-
|
1
|
+
source "https://rubygems.org"
|
3
2
|
|
4
3
|
# to troubleshoot/debug things
|
5
4
|
#gem 'rubocop'
|
6
5
|
gem 'rake'
|
6
|
+
#gem 'pry'
|
7
|
+
gem 'pry', :group => :development
|
8
|
+
#gem 'require_all' # yup I'm lazy: https://stackoverflow.com/questions/735073/best-way-to-require-all-files-from-a-directory-in-ruby
|
data/Makefile
CHANGED
@@ -51,4 +51,17 @@ watch-test:
|
|
51
51
|
watch -c make test
|
52
52
|
|
53
53
|
test-gcs-bucket:
|
54
|
-
|
54
|
+
#echo "Warning this uses the INCLUDED gem not the latest one in development'
|
55
|
+
#ruby -I test test/media/test_gcs_bucket.rb
|
56
|
+
rake test TEST=test/media/test_gcs_bucket.rb
|
57
|
+
|
58
|
+
test-local-folder:
|
59
|
+
echo "Warning this uses the INCLUDED gem not the latest one in development'
|
60
|
+
ruby -I test test/media/test_local_folder.rb
|
61
|
+
# https://medium.com/@tegon/quick-guide-to-minitest-arguments-745bf9fe4b3
|
62
|
+
test-media-subfolder:
|
63
|
+
rake test TEST="test/media/*.rb"
|
64
|
+
test-verbose:
|
65
|
+
rake test:verbose --verbose
|
66
|
+
test-single-file-continuously:
|
67
|
+
\watch -n 5 --color rake test TEST="test/media/test_local_folder.rb"
|
data/README.md
CHANGED
@@ -9,6 +9,24 @@
|
|
9
9
|
|
10
10
|
(Latest version is hosted in https://rubygems.org/gems/storazzo)
|
11
11
|
|
12
|
+
# Tests
|
13
|
+
|
14
|
+
I still struggle to enforce the include of LOCAL unchecked code rather than latest required system gem (cmon Ruby!)
|
15
|
+
but I found loads of interesting ways to test my code by googling and StoackOverflowing:
|
16
|
+
|
17
|
+
* `rake test TEST="test/sum_test.rb"`
|
18
|
+
* test-gcs-bucket: `ruby -I test test/test_gcs_bucket.rb` (meh - see below)
|
19
|
+
* test-media-subfolder: `rake test TEST="test/media/*.rb"`
|
20
|
+
|
21
|
+
Single test in single file:
|
22
|
+
|
23
|
+
* `rake test TEST="test/sum_test.rb" TESTOPTS="--name=test_returns_two"` (sample)
|
24
|
+
* `rake test TEST="test/media/test_local_folder.rb" TESTOPTS="--name=test_1_first_directory_parsing_actually_works"`
|
25
|
+
* `ruby -I test test/test_local_folder.rb -n test_first_directory_parsing_actually_works` (note this includes `storazzo` latest gem
|
26
|
+
and doesnt benefit from LATEST code so its NOT good for testing: use RAKE for that).
|
27
|
+
|
28
|
+
|
29
|
+
Now to toggle verbosity I believe I need to go into Rakefile (bummer)
|
12
30
|
# Thanks
|
13
31
|
|
14
32
|
Inspiration from:
|
data/Rakefile
CHANGED
@@ -3,15 +3,42 @@
|
|
3
3
|
# from hola: https://guides.rubygems.org/make-your-own-gem/#adding-an-executable
|
4
4
|
require "rake/testtask"
|
5
5
|
|
6
|
-
|
6
|
+
desc "Run Unit tests"
|
7
|
+
Rake::TestTask.new(:test) do |t|
|
7
8
|
t.libs << "test"
|
9
|
+
t.libs << "test/media"
|
10
|
+
# note this is only useful for this: https://chriskottom.com/articles/command-line-flags-for-minitest-in-the-raw/
|
8
11
|
t.verbose = false
|
9
12
|
t.warning = false
|
13
|
+
#puts "[RiccardoOnly]: t.pattern: #{t.pattern}"
|
14
|
+
t.pattern = 'test/**/test_*.rb'
|
10
15
|
end
|
11
16
|
|
12
|
-
desc "Run tests"
|
17
|
+
desc "By default, Run Unit tests"
|
13
18
|
task default: :test
|
14
19
|
|
20
|
+
# Adding test/media directory to rake test.
|
21
|
+
desc "Test tests/media/* code sobenem"
|
22
|
+
namespace :test do
|
23
|
+
desc "Test Verbosely by default since I'm too stupid to toggle via ENV var dammit"
|
24
|
+
Rake::TestTask.new(:verbose) do |t|
|
25
|
+
t.libs << "test"
|
26
|
+
t.libs << "test/media"
|
27
|
+
t.verbose = true
|
28
|
+
t.warning = true
|
29
|
+
t.pattern = 'test/**/test_*.rb'
|
30
|
+
$DEBUG = true
|
31
|
+
end
|
32
|
+
end
|
33
|
+
# namespace :verbose_test do
|
34
|
+
# desc "Test tests/media/* code"
|
35
|
+
# Rake::TestTask.new do |t|
|
36
|
+
# t.libs << "test/media"
|
37
|
+
# # Rails::TestTask.new(media: 'test:prepare') do |t|
|
38
|
+
# t.pattern = 'test/**/test_*.rb'
|
39
|
+
# end
|
40
|
+
# end
|
41
|
+
#Rake::Task['test:run'].enhance ["test:media"]
|
15
42
|
|
16
43
|
# begin
|
17
44
|
# require 'bundler/setup'
|
@@ -29,4 +56,7 @@ task default: :test
|
|
29
56
|
# RuboCop::RakeTask.new do |task|
|
30
57
|
# task.requires << 'rubocop-performance'
|
31
58
|
# task.requires << 'rubocop-rspec'
|
32
|
-
# end
|
59
|
+
# end
|
60
|
+
|
61
|
+
|
62
|
+
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.4.5
|
data/bin/ricdisk-magic
CHANGED
@@ -168,7 +168,6 @@ end
|
|
168
168
|
def main(filename)
|
169
169
|
deb "I'm called by #{white filename}"
|
170
170
|
deb "HISTORY: #{gray HISTORY}"
|
171
|
-
#deb "To remove this shit, just set $DEBUG=false :)"
|
172
171
|
init # Enable this to have command line parsing capabilities!
|
173
172
|
puts white("$DEBUG is #{$DEBUG }. Tu turn on, call me with -d") unless $DEBUG
|
174
173
|
#warn "[warn] template v#{$TEMPLATE_VER }: proviamo il warn che magari depreca il DEB"
|
data/bin/stats-with-md5
ADDED
@@ -0,0 +1,394 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'digest/md5'
|
4
|
+
require 'digest'
|
5
|
+
require 'socket'
|
6
|
+
require 'optparse' # http://ruby.about.com/od/advancedruby/a/optionparser.htm
|
7
|
+
require 'date' # for DateTime
|
8
|
+
require 'tempfile'
|
9
|
+
|
10
|
+
if RUBY_VERSION.split('.')[0] == 1
|
11
|
+
puts "Refusing to launch a script form Ruby 1. Sorry Ric, its 2020 damn it!"
|
12
|
+
exit 2020
|
13
|
+
end
|
14
|
+
|
15
|
+
$PROG_VER = '0.5' # on 0.4 it was copied from palladius/sakura. Now should be maintained here and moved to 'stats-with-md5'
|
16
|
+
$DEBUG = false
|
17
|
+
$reset_autowrite_file_reincarnation = 0
|
18
|
+
DEFAULT_MAX_FILES = 'Infinite'
|
19
|
+
|
20
|
+
=begin
|
21
|
+
|
22
|
+
############################################################
|
23
|
+
@author: Riccardo Carlesso
|
24
|
+
@email: riccardo.carlesso@gmail.com
|
25
|
+
@maturity: development
|
26
|
+
@language: Ruby
|
27
|
+
@tags: development, rcarlesso, test
|
28
|
+
@works_on: Linux (little tested since v0.3), Mac (100% developed here)
|
29
|
+
############################################################
|
30
|
+
|
31
|
+
=end
|
32
|
+
|
33
|
+
|
34
|
+
|
35
|
+
def deb(s); puts "#DEB #{s}" if $DEBUG; end
|
36
|
+
# colors 16
|
37
|
+
def gray(s) "\033[1;30m#{s}\033[0m" ; end
|
38
|
+
def green(s) "\033[1;32m#{s}\033[0m" ; end
|
39
|
+
def red(s) "\033[1;31m#{s}\033[0m" ; end
|
40
|
+
def yellow(s) "\033[1;33m#{s}\033[0m" ; end
|
41
|
+
def blue(s) "\033[1;34m#{s}\033[0m" ; end
|
42
|
+
def purple(s) "\033[1;35m#{s}\033[0m" ; end
|
43
|
+
def azure(s) "\033[1;36m#{s}\033[0m" ; end
|
44
|
+
def white(s) "\033[1;37m#{s}\033[0m" ; end
|
45
|
+
|
46
|
+
# colors 64k
|
47
|
+
def orange(s) "\033[38;5;208m#{s}\033[0m" ; end
|
48
|
+
|
49
|
+
# Program constants, automagically picked up by RicLib
|
50
|
+
# More configuration could be written in:
|
51
|
+
# $GIC/etc/ricsvn/<FILENAME>.yml
|
52
|
+
# That would go into the variable '$prog_conf_d'
|
53
|
+
$myconf = {
|
54
|
+
:app_name => "AppName should be sth like #{$0}",
|
55
|
+
:description => "
|
56
|
+
This program gets stats from Ruby File.Stats library
|
57
|
+
plus computes MD5 or CRC32 of the file.
|
58
|
+
And bakes it in a way that can be easily parsed.
|
59
|
+
".strip.gsub(/^\s+/, "").gsub(/\s+$/, ""),
|
60
|
+
}
|
61
|
+
|
62
|
+
def usage(comment=nil)
|
63
|
+
puts white($optparse.banner)
|
64
|
+
puts($optparse.summarize)
|
65
|
+
puts("Description: " + gray($myconf[:description]))
|
66
|
+
puts red(comment) if comment
|
67
|
+
exit 13
|
68
|
+
end
|
69
|
+
|
70
|
+
def reset_autowrite_file()
|
71
|
+
$autowrite_file = "# #{File.basename $0} Autowrite v1.0 #{Time.now} reincarnation=#{$reset_autowrite_file_reincarnation}\n"
|
72
|
+
$reset_autowrite_file_reincarnation += 1 # ($reset_autowrite_file_reincarnation + 1) # rescue 1
|
73
|
+
end
|
74
|
+
|
75
|
+
# include it in main if you want a custome one
|
76
|
+
def init() # see lib_autoinit in lib/util.rb
|
77
|
+
$opts = {}
|
78
|
+
# setting defaults
|
79
|
+
$opts[:verbose] = false
|
80
|
+
$opts[:dryrun] = false
|
81
|
+
$opts[:debug] = false
|
82
|
+
$opts[:autowrite] = false
|
83
|
+
$opts[:color] = true
|
84
|
+
$opts[:max_files] = nil # infinite
|
85
|
+
|
86
|
+
reset_autowrite_file()
|
87
|
+
|
88
|
+
$optparse = OptionParser.new do |opts|
|
89
|
+
opts.banner = "#{$0} v.#{$PROG_VER}\n Usage: #{File.basename $0} [options] file1 file2 ..."
|
90
|
+
opts.on( '-a', '--autowrite', 'automatically writes results to Root Folder (DFLT=false)' ) { $opts[:autowrite] = true }
|
91
|
+
opts.on( '-c', '--no-color', 'disables color (DFLT=false, that is color enabled)' ) { $opts[:color] = false }
|
92
|
+
opts.on( '-d', '--debug', 'enables debug (DFLT=false)' ) { $opts[:debug] = true ; $DEBUG = true }
|
93
|
+
opts.on( '-h', '--help', 'Display this screen' ) { usage }
|
94
|
+
opts.on( '-m', '--max-files NUMBER', "sets max files per Dir to X (DFLT=#{DEFAULT_MAX_FILES})" ) {|nfiles| $opts[:max_files] = nfiles.to_i }
|
95
|
+
opts.on( '-n', '--dryrun', "Don't really execute code" ) { $opts[:dryrun] = true }
|
96
|
+
opts.on( '-l', '--logfile FILE', 'Write log to FILE' ) {|file| $opts[:logfile] = file }
|
97
|
+
opts.on( '-v', '--verbose', 'Output more information' ) { $opts[:verbose] = true}
|
98
|
+
end
|
99
|
+
$optparse.parse!
|
100
|
+
end
|
101
|
+
|
102
|
+
def getSafeCreationTime(file)
|
103
|
+
File.ctime(file)
|
104
|
+
end
|
105
|
+
|
106
|
+
def compute_stats_and_md5(file)
|
107
|
+
ret={}
|
108
|
+
ret[:name] = file
|
109
|
+
|
110
|
+
begin
|
111
|
+
stats = File.stat(file)
|
112
|
+
ret[:stats_object] = stats # TODO deprecate
|
113
|
+
deb("Stats methods: #{stats.methods.sort.join(', ')}")
|
114
|
+
deb(stats.ctime)
|
115
|
+
#puts(stats.birthtime rescue (stats.ctime))
|
116
|
+
# On Mac/Linux: see test/dumps/***.yaml
|
117
|
+
ret[:size] = stats.size
|
118
|
+
ret[:mode] = stats.mode
|
119
|
+
# datetimes
|
120
|
+
ret[:stat_safebirthtime] = getSafeCreationTime(file) # in Mac uses birthtime,but on Linux wont work
|
121
|
+
# defined?(stats.birthtime) ? # rescue stats.mtime # eg, 2022-03-05 21:47:51 +0100 on Mac (not implemented on my Linuix)#
|
122
|
+
# stats.birthtime : # works on Mac
|
123
|
+
# stats.ctime
|
124
|
+
ret[:stat_mtime] = stats.mtime # eg, 2022-03-05 21:47:51 +0100 Returns the modification time of stat.
|
125
|
+
ret[:stat_ctime] = stats.ctime # eg, 2022-03-05 21:47:51 +0100 Returns the change time for stat (that is, the time directory information about the file was changed, not the file itself). Note that on Windows (NTFS), returns creation time (birth time).
|
126
|
+
ret[:stat_atime] = stats.atime # eg, 2022-03-05 21:47:51 +0100 Last Access
|
127
|
+
ret[:stat_gid] = stats.gid # Group Id
|
128
|
+
ret[:stat_uid] = stats.uid # UUID
|
129
|
+
ret[:stat_ftype] = stats.ftype #
|
130
|
+
|
131
|
+
ret[:md5] = '______________NONE______________'
|
132
|
+
if stats.ftype != "directory"
|
133
|
+
file_content = File.read(file)
|
134
|
+
ret[:md5] = Digest::MD5.hexdigest(file_content) # rescue 'none ' #=> string with hexadecimal digits
|
135
|
+
end
|
136
|
+
rescue Errno::EISDIR => e
|
137
|
+
#puts "It's a dir, nothing I can do here except skipping the stuff"
|
138
|
+
ret[:md5] = "_________I'm a dir sorry________"
|
139
|
+
rescue Exception => e
|
140
|
+
ret[:error] = e
|
141
|
+
end
|
142
|
+
ret
|
143
|
+
end
|
144
|
+
|
145
|
+
$print_stats_and_md5_version = "1.1a_220624_F" # files
|
146
|
+
$print_stats_and_md5_counter = 0
|
147
|
+
$print_stats_and_md5_for_gcs_version = "1.1alpha_220628_G" #GCS bucket
|
148
|
+
|
149
|
+
def stats_and_md5_number_of_files_processed()
|
150
|
+
return $print_stats_and_md5_counter
|
151
|
+
end
|
152
|
+
|
153
|
+
def smells_like_gcs?(file)
|
154
|
+
file =~ /gs:\/\//
|
155
|
+
end
|
156
|
+
|
157
|
+
# You give me gs://bucket123/path/to/dir
|
158
|
+
def print_stats_and_md5_for_gcs(mybucket_with_subdir, opts={})
|
159
|
+
# opts_color = opts.fetch :color, true#
|
160
|
+
# opts_verbose = opts.fetch :verbose, false
|
161
|
+
# opts_ping_frequency = opts.fetch :ping_frequency, 50
|
162
|
+
opts_max_files = opts.fetch :max_files, nil # BigDecimal('Infinity')
|
163
|
+
opts_autowrite = opts.fetch :autowrite, false
|
164
|
+
|
165
|
+
raise "Wrong GCS Path: #{mybucket_with_subdir}" unless smells_like_gcs?(mybucket_with_subdir)
|
166
|
+
#puts "0. mybucket_with_subdir: #{mybucket_with_subdir}" # gs://mybucket123/path/to/dir
|
167
|
+
path_split = mybucket_with_subdir.split('/')
|
168
|
+
mybucket = path_split[0,3].join('/') # gs://mybucket123/
|
169
|
+
gcs_subpath = path_split[3,42].join('/') # path/to/dir
|
170
|
+
cleanedup_bucket = path_split[2] # mybucket123
|
171
|
+
#puts "1. mybucket: #{mybucket}"
|
172
|
+
#puts "2. gcs_subpath: #{gcs_subpath}"
|
173
|
+
#puts "3. cleanedup_bucket: #{cleanedup_bucket}"
|
174
|
+
|
175
|
+
puts("[print_stats_and_md5_for_gcs] version=#{$print_stats_and_md5_for_gcs_version} host=#{Socket.gethostname}(#{`uname`.chomp}) created_on='#{Time.now}' bucket=#{mybucket} subpath=#{gcs_subpath}")
|
176
|
+
begin
|
177
|
+
require "google/cloud/storage"
|
178
|
+
project_id = `gcloud config get project 2>/dev/null`
|
179
|
+
deb "project_id: #{project_id}"
|
180
|
+
storage = Google::Cloud::Storage.new(project_id: project_id)
|
181
|
+
bucket = storage.bucket(cleanedup_bucket)
|
182
|
+
# grabs them ALL if nil, optherwise its an integer.
|
183
|
+
files = opts_max_files.nil? ?
|
184
|
+
bucket.files : # nil -> all of them (I've tried sth more idiomatic like -1 or nil or Infinite. they all failed.)
|
185
|
+
bucket.files.first(opts_max_files) # not nil -> first(N)
|
186
|
+
#puts(files)
|
187
|
+
deb("All Sorted Methods: #{files.first.methods.sort}")
|
188
|
+
files.each do |gcs_file|
|
189
|
+
md5 = Base64.decode64(gcs_file.md5).unpack('H*')[0] # md5 in base64 -> then de-binarizied: Base64.urlsafe_decode64(md5).unpack('H*')[0]
|
190
|
+
size = gcs_file.size # crc rescue :boh
|
191
|
+
time_created = gcs_file.created_at
|
192
|
+
mode = 'XXXXXX' # for compatbility with files
|
193
|
+
file_type = gcs_file.name.to_s =~ /\/$/ ? 'd' : 'f' # if ends with a slash its a DIR :P
|
194
|
+
#puts("[OLD_GCS_v11] #{md5} #{size}\t#{time_created} [#{gcs_file.content_type}]\t#{gcs_file.name}")
|
195
|
+
print_colored_polymoprhic('gcs_v1.2', md5, mode, file_type, size, gcs_file.content_type, time_created, gcs_file.name, opts)
|
196
|
+
deb gcs_file.to_yaml
|
197
|
+
end
|
198
|
+
#puts("Hash Methods: #{files.first.methods.select{|x| x.match /hash/}}")
|
199
|
+
rescue LoadError # require Exception
|
200
|
+
puts 'Error with library #{$!}, maybe type: gem install google-cloud-storage'
|
201
|
+
rescue Exception # generic Exception
|
202
|
+
puts "print_stats_and_md5_for_gcs(): Generic Error: #{$!}"
|
203
|
+
exit 1
|
204
|
+
end
|
205
|
+
autowrite_to_dir_or_gcs(:gcs, mybucket_with_subdir) if opts_autowrite
|
206
|
+
end
|
207
|
+
|
208
|
+
# Generic polymorpghic coloring, can be GCS. Would have been nice to pass a Dict so easier to extend, but Ruby doesnt support doesblt Dict with magic args. Plus might be a GOOD
|
209
|
+
# thing its hard to change since it breaks Storazzo.
|
210
|
+
# TODO(ricc): make sure the dates are the same. Currently:
|
211
|
+
# GCS Date: 2021-12-20T12:26:13+00:00 DateTime
|
212
|
+
# File Date: 2022-05-30 11:55:42 +0200 Time
|
213
|
+
def print_colored_polymoprhic(entity_type, md5, mode, file_type, size, content_type, creation_time, filename, opts={})
|
214
|
+
opts_color = opts.fetch :color, true
|
215
|
+
opts_verbose = opts.fetch :verbose, false
|
216
|
+
opts_ping_frequency = opts.fetch :ping_frequency, 50
|
217
|
+
opts_autowrite = opts.fetch :autowrite, false
|
218
|
+
|
219
|
+
# Increment counter across all!
|
220
|
+
$print_stats_and_md5_counter += 1 # global counter! I should increment at the END of fucntion.. but you never know if i exit wrongly so i do at beginning. so within this function the real #files is N-1 (i.e., if N is 6, we're processing file %5)
|
221
|
+
# in main i had to put 2 branched to invoke a single thing but if logic changes that sentence might be printed twice. Instead here, the BEGIN line could be nice to do here instead.
|
222
|
+
#$stderr.puts("print_stats_and_md5() Fikst invocation! Consider moving the 2 things in main here :)") if ($print_stats_and_md5_counter == 1)
|
223
|
+
#puts("[print_stats_and_md5] version=#{$print_stats_and_md5_version} host=#{Socket.gethostname}(#{`uname`.chomp}) created_on=#{Time.now}") if ($print_stats_and_md5_counter == 1)
|
224
|
+
$stderr.puts "-- print_stats_and_md5(v#{$print_stats_and_md5_version}): #{$print_stats_and_md5_counter - 1} files processed --" if (($print_stats_and_md5_counter) % opts_ping_frequency == 1 )
|
225
|
+
|
226
|
+
maybecolored_md5 = opts_color ? red(md5) : md5
|
227
|
+
maybecolored_filename = opts_color ? azure(filename) : filename
|
228
|
+
enlarged_size = sprintf("%7o", size.to_s ) # or SIZE
|
229
|
+
maybecolored_size = opts_color ? yellow(enlarged_size) : enlarged_size
|
230
|
+
maybecolored_file_type = file_type
|
231
|
+
colored_content_type = opts_color ? white(content_type) : content_type
|
232
|
+
if opts_color
|
233
|
+
maybecolored_file_type = case file_type
|
234
|
+
when 'f' then green(file_type)
|
235
|
+
when 'd' then blue(file_type)
|
236
|
+
when 's' then azure(file_type)
|
237
|
+
else red(file_type)
|
238
|
+
end
|
239
|
+
end
|
240
|
+
standardized_creation_time = creation_time.is_a?(DateTime) ?
|
241
|
+
creation_time :
|
242
|
+
DateTime.parse(creation_time.to_s) # if you prefer to use Time since its already supported by Storazzo (but its ugly since it has SPACES! so hard to parse) use tt = Time.parse(d.to_s)
|
243
|
+
# as per https://stackoverflow.com/questions/279769/convert-to-from-datetime-and-time-in-ruby
|
244
|
+
#puts "#{maybecolored_md5} #{mode} #{maybecolored_file_type} #{maybecolored_size}\t#{creation_time}(DEB #{creation_time.class})(DEB2 #{DateTime.parse(creation_time.to_s)}) [#{colored_content_type}] #{maybecolored_filename}"
|
245
|
+
# this mightr be colored
|
246
|
+
str = "[#{entity_type}] #{maybecolored_md5} #{mode} #{maybecolored_file_type} #{standardized_creation_time} #{maybecolored_size} [#{colored_content_type}] #{maybecolored_filename}\n"
|
247
|
+
# this will NEVER be colored. WARNING, now you need to maintain TWO of these beasts :/
|
248
|
+
non_colored_str = "[#{entity_type}] #{md5} #{mode} #{file_type} #{standardized_creation_time} #{enlarged_size} [#{content_type}] #{filename}\n"
|
249
|
+
if(opts_autowrite)
|
250
|
+
# need to guarantee this is NOT colored. The easiest way to do it is TWICVE as expensive but... whatevs.
|
251
|
+
# TODO(ricc): fire me for this lazimness!
|
252
|
+
$autowrite_file += (non_colored_str)
|
253
|
+
end
|
254
|
+
print(str)
|
255
|
+
return str
|
256
|
+
end
|
257
|
+
|
258
|
+
def print_stats_and_md5(file, opts={})
|
259
|
+
return print_stats_and_md5_for_gcs(file, opts) if smells_like_gcs?(file)
|
260
|
+
|
261
|
+
opts_color = opts.fetch :color, true
|
262
|
+
opts_verbose = opts.fetch :verbose, false
|
263
|
+
opts_ping_frequency = opts.fetch :ping_frequency, 50
|
264
|
+
|
265
|
+
|
266
|
+
# $print_stats_and_md5_counter += 1 # global counter! I should increment at the END of fucntion.. but you never know if i exit wrongly so i do at beginning. so within this function the real #files is N-1 (i.e., if N is 6, we're processing file %5)
|
267
|
+
|
268
|
+
# # in main i had to put 2 branched to invoke a single thing but if logic changes that sentence might be printed twice. Instead here, the BEGIN line could be nice to do here instead.
|
269
|
+
# #$stderr.puts("print_stats_and_md5() Fikst invocation! Consider moving the 2 things in main here :)") if ($print_stats_and_md5_counter == 1)
|
270
|
+
# puts("[print_stats_and_md5] version=#{$print_stats_and_md5_version} host=#{Socket.gethostname}(#{`uname`.chomp}) created_on=#{Time.now}") if ($print_stats_and_md5_counter == 1)
|
271
|
+
# $stderr.puts "-- print_stats_and_md5(v#{$print_stats_and_md5_version}): #{$print_stats_and_md5_counter - 1} files processed --" if (($print_stats_and_md5_counter) % opts_ping_frequency == 1 )
|
272
|
+
|
273
|
+
#puts "print_stats_and_md5: #{file}" if opts_verbose
|
274
|
+
stats = compute_stats_and_md5 file
|
275
|
+
#maybecolored_md5 = opts_color ? red(stats[:md5]) : stats[:md5]
|
276
|
+
#maybecolored_filename = opts_color ? azure(stats[:name]) : stats[:name]
|
277
|
+
#maybecolored_size = opts_color ? white(stats[:size]) : stats[:size]
|
278
|
+
mode = sprintf("%06o", stats[:mode] ) rescue :ERROR # .to_s.right(4) #=> "100644"
|
279
|
+
file_type = stats[:stat_ftype][0] rescue '?'
|
280
|
+
file_type = 's' if File.symlink?(file)
|
281
|
+
content_type = `file --mime-type -b '#{file}' 2>/dev/null`.chomp # .split(': ',2)[1]
|
282
|
+
|
283
|
+
#colored_string = "[COL] #{red stats[:md5]} #{stats[:size]} #{stats[:stat_safebirthtime]} #{white stats[:name]}"
|
284
|
+
#boring_string = "[B/W] #{ stats[:md5]} #{stats[:size]} #{stats[:stat_safebirthtime]} #{ stats[:name]}"
|
285
|
+
#puts(opts_color ? colored_string : boring_string)
|
286
|
+
#puts "[OLDv1.1deprecated] #{maybecolored_md5} #{mode} #{file_type} #{maybecolored_size} #{stats[:stat_safebirthtime]} #{maybecolored_filename}"
|
287
|
+
print_colored_polymoprhic('file_v1.2', stats[:md5], mode, file_type, stats[:size], content_type, stats[:stat_safebirthtime], stats[:name], opts)
|
288
|
+
end
|
289
|
+
|
290
|
+
def autowrite_to_dir_or_gcs(fs_type, path, opts={})
|
291
|
+
autowrite_version = "1.1_28jun22"
|
292
|
+
file_to_save = "stats-with-md5.log"
|
293
|
+
path_to_save = path + "/" + file_to_save
|
294
|
+
puts(red "TODO(ricc): write results [size=#{$autowrite_file.size}] in this Directory: #{path_to_save}") if fs_type == :dir
|
295
|
+
puts(red "TODO(ricc): write results [size=#{$autowrite_file.size}] in this GCS Bucket or Dir: #{path_to_save}") if fs_type == :gcs
|
296
|
+
|
297
|
+
##########################################################################
|
298
|
+
# do the thing: creates file locally and moves to GCS or Dir.
|
299
|
+
##########################################################################
|
300
|
+
$autowrite_file += "# [autowrite_to_dir_or_gcs v#{autowrite_version}]: about to write it now. #{Time.now}"
|
301
|
+
tmpfile = Tempfile.new(path_to_save)
|
302
|
+
tmpfile.write($autowrite_file)
|
303
|
+
cmd = :no_cmd_yet
|
304
|
+
|
305
|
+
if fs_type == :gcs
|
306
|
+
cmd = "gsutil mv '#{tmpfile.path}' #{path}/#{file_to_save}"
|
307
|
+
end
|
308
|
+
if fs_type == :dir
|
309
|
+
cmd = "mv '#{tmpfile.path}' #{path}/#{file_to_save}"
|
310
|
+
end
|
311
|
+
|
312
|
+
puts("TMP File is this big: #{azure tmpfile.size}")
|
313
|
+
puts("About to execute this: #{white cmd}")
|
314
|
+
ret = `#{cmd}`
|
315
|
+
if $?.exitstatus != 0
|
316
|
+
puts "Error to execute: #{red cmd}. Exit: #{red $?.exitstatus}"
|
317
|
+
exit 53
|
318
|
+
end
|
319
|
+
puts "Command returned: '#{$?.exitstatus}'" # - #{$?.class}"
|
320
|
+
# wow: fork { exec("ls") }
|
321
|
+
|
322
|
+
# removes the tmpfile :)
|
323
|
+
tmpfile.close
|
324
|
+
tmpfile.unlink
|
325
|
+
|
326
|
+
#####################################
|
327
|
+
# finished, lets now clean up the file...
|
328
|
+
#####################################
|
329
|
+
reset_autowrite_file()
|
330
|
+
return :ok
|
331
|
+
end
|
332
|
+
|
333
|
+
def print_stats_and_md5_for_directory(directory_to_explore_recursively, opts={})
|
334
|
+
puts "# [print_stats_and_md5_for_directory] DIR to explore (make sure you glob also): #{directory_to_explore_recursively }"
|
335
|
+
puts "# DEB Options: #{opts}"
|
336
|
+
|
337
|
+
opts_autowrite = opts.fetch :autowrite, false
|
338
|
+
|
339
|
+
Dir.glob("#{(directory_to_explore_recursively)}/**/*") do |globbed_filename|
|
340
|
+
# Do work on files & directories ending in .rb
|
341
|
+
#puts "[deb] #{globbed_filename}"
|
342
|
+
print_stats_and_md5(globbed_filename, :color => $opts[:color], :verbose => $opts[:verbose], :autowrite => $opts[:autowrite])
|
343
|
+
end
|
344
|
+
|
345
|
+
autowrite_to_dir_or_gcs(:dir, directory_to_explore_recursively) if opts_autowrite
|
346
|
+
end
|
347
|
+
|
348
|
+
def real_program
|
349
|
+
t0 = Time.now
|
350
|
+
deb "+ Options are: #{gray $opts}"
|
351
|
+
deb "+ Depured args: #{azure ARGV}"
|
352
|
+
# Your code goes here...
|
353
|
+
#puts "Description: '''#{white $myconf[:description] }'''"
|
354
|
+
#puts("[print_stats_and_md5] version=#{$print_stats_and_md5_version} host=#{Socket.gethostname}(#{`uname`.chomp}) created_on=#{Time.now}") if ARGV.size > 0
|
355
|
+
|
356
|
+
common_opts = {
|
357
|
+
:max_files => $opts[:max_files],
|
358
|
+
:color => $opts[:color],
|
359
|
+
:verbose => $opts[:verbose],
|
360
|
+
:autowrite => $opts[:autowrite],
|
361
|
+
}
|
362
|
+
|
363
|
+
if ARGV.size == 1
|
364
|
+
directory_to_explore_recursively = ARGV[0]
|
365
|
+
if smells_like_gcs?(directory_to_explore_recursively)
|
366
|
+
print_stats_and_md5_for_gcs(directory_to_explore_recursively, common_opts)
|
367
|
+
else # normal file..
|
368
|
+
print_stats_and_md5_for_directory(directory_to_explore_recursively, common_opts)
|
369
|
+
end
|
370
|
+
elsif ARGV.size > 1
|
371
|
+
deb "2. I expect a lot of single files or directories:"
|
372
|
+
for arg in ARGV
|
373
|
+
if File.directory?(arg)
|
374
|
+
print_stats_and_md5_for_directory(arg, common_opts )
|
375
|
+
else
|
376
|
+
print_stats_and_md5(arg, common_opts)
|
377
|
+
end
|
378
|
+
end
|
379
|
+
else
|
380
|
+
puts "No args given. Exiting"
|
381
|
+
exit 41
|
382
|
+
end
|
383
|
+
tf = Time.now
|
384
|
+
puts "# [#{File.basename $0}] Time taken for processing #{stats_and_md5_number_of_files_processed} files: #{tf-t0}"
|
385
|
+
end
|
386
|
+
|
387
|
+
def main(filename)
|
388
|
+
deb "I'm called by #{white filename}"
|
389
|
+
deb "To remove this shit, just set $DEBUG=false :)"
|
390
|
+
init # Enable this to have command line parsing capabilities!
|
391
|
+
real_program
|
392
|
+
end
|
393
|
+
|
394
|
+
main(__FILE__)
|
data/lib/storazzo/common.rb
CHANGED
@@ -1,10 +1,12 @@
|
|
1
1
|
# Ric common stuff! :)
|
2
2
|
#
|
3
|
-
#
|
4
3
|
# Usage:
|
5
|
-
# include Storazzo::Common
|
4
|
+
# include Storazzo::Common (def to def)
|
5
|
+
# or
|
6
|
+
# extend Storazzo::Common (def to def self.XX)
|
6
7
|
#
|
7
8
|
require_relative 'colors'
|
9
|
+
require 'pry'
|
8
10
|
|
9
11
|
module Storazzo::Common
|
10
12
|
|
@@ -12,16 +14,37 @@ module Storazzo::Common
|
|
12
14
|
|
13
15
|
|
14
16
|
def deb(s)
|
15
|
-
puts "[DEB] #{yellow(s)}" if $DEBUG
|
17
|
+
puts "[DEB🐞] #{yellow(s)}" if _debug_true # $DEBUG
|
18
|
+
end
|
19
|
+
# this has a yield
|
20
|
+
def if_deb?()
|
21
|
+
if _debug_true # $DEBUG
|
22
|
+
deb "== yield START =="
|
23
|
+
yield
|
24
|
+
deb "== yield END =="
|
25
|
+
end
|
16
26
|
end
|
17
27
|
def warn(s)
|
18
|
-
puts "[
|
28
|
+
puts "[W⚠️RN] #{azure(s)}"
|
19
29
|
end
|
20
30
|
def err(str)
|
21
|
-
puts "[ERR] #{red(s)}"
|
31
|
+
puts "[ERR⛔] #{red(s)}"
|
22
32
|
end
|
23
33
|
def bug(s)
|
24
34
|
puts "[🐛] #{gray s}"
|
25
35
|
end
|
36
|
+
def pverbose(is_verbose, str)
|
37
|
+
puts "[V📚RB💀S📚] #{gray str}"
|
38
|
+
end
|
39
|
+
def ppp(complex_object_to_colorize)
|
40
|
+
# TODO i need to learn to return without printing..
|
41
|
+
Pry::ColorPrinter.pp(complex_object_to_colorize)
|
42
|
+
end
|
43
|
+
|
44
|
+
private
|
45
|
+
def _debug_true
|
46
|
+
$DEBUG or ENV["DEBUG"] == 'true'
|
47
|
+
end
|
26
48
|
|
49
|
+
# puts "[DEBUG ENABLED!]" if _debug_true
|
27
50
|
end
|
@@ -1,18 +1,21 @@
|
|
1
1
|
module Storazzo::Media
|
2
2
|
class Storazzo::Media::AbstractRicDisk
|
3
|
+
#include Storazzo::Common
|
4
|
+
extend Storazzo::Common
|
3
5
|
|
4
|
-
|
6
|
+
#DefaultStatsFilename = Storazzo::RicDiskStatsFile.default_name
|
5
7
|
|
6
8
|
# looks like there's not Abstract Class in Ruby, but also SO says he best
|
7
9
|
# way to do this is this:
|
8
10
|
|
9
11
|
# attr_accessor :name, :description, :ricdisk_file, :local_mountpoint, :wr, :path, :ricdisk_file_empty, :size, :active_dirs
|
10
12
|
|
13
|
+
|
11
14
|
########################
|
12
15
|
# Abstract methods START
|
13
16
|
########################
|
14
17
|
def initialize(local_mount)
|
15
|
-
|
18
|
+
pverbose true, "[AbstractRicDisk.init()] Some child of AbstractRicDisk (#{self}) called me! Yummie." # disable when you dont need me anymore..
|
16
19
|
validate
|
17
20
|
end
|
18
21
|
def self.list_all
|
@@ -50,7 +53,7 @@ module Storazzo::Media
|
|
50
53
|
# if not writeable, I will:
|
51
54
|
# 1. create a dir based on its unique format.
|
52
55
|
# 2. create a file of same look and feel (alternative - used a DASH)
|
53
|
-
return "{get_local_folder}/#{unique_id}::#{self.default_stats_filename}"
|
56
|
+
return "TODO FIXME {get_local_folder}/#{unique_id}::#{self.default_stats_filename}"
|
54
57
|
#"{get_local_folder}"/#{unique_id}/#{Storazzo::RicDiskStatsFile.default_name}"
|
55
58
|
end
|
56
59
|
end
|
@@ -63,6 +66,18 @@ module Storazzo::Media
|
|
63
66
|
"MD5::v1::#{hash}"
|
64
67
|
end
|
65
68
|
|
69
|
+
def to_verbose_s
|
70
|
+
h = {}
|
71
|
+
h[:class] = self.class
|
72
|
+
h[:unique_id] = self.unique_id
|
73
|
+
h[:inspect] = self.inspect
|
74
|
+
h[:to_s] = self.to_s
|
75
|
+
h[:local_mountpoint] = local_mountpoint
|
76
|
+
h[:writeable] = self.writeable?
|
77
|
+
h[:stats_file_smart_fullpath] = stats_file_smart_fullpath
|
78
|
+
return h
|
79
|
+
end
|
80
|
+
|
66
81
|
# # Todo check on instances these 3 methods exist
|
67
82
|
def self.abstract_class_mandatory_methods
|
68
83
|
%W{
|
@@ -92,15 +107,27 @@ module Storazzo::Media
|
|
92
107
|
"#{local_mountpoint}/#{my_stats_filename}"
|
93
108
|
end
|
94
109
|
|
95
|
-
def validate
|
96
|
-
|
110
|
+
def validate(opts={})
|
111
|
+
verbose = opts.fetch(:verbose, true)
|
112
|
+
puts "[VERBOSE] validate(): We're trying to see if your object is valid, across 3 possible sub-classes." if verbose
|
97
113
|
#1. check for
|
98
114
|
raise "Unknown local mount " unless local_mount.is_a?(String)
|
99
115
|
#2. check thaty writeable? is true or false
|
100
116
|
my_writeable = wr
|
101
117
|
raise "Writeable is not boolean" unless (my_writeable.is_a? TrueClass or my_writeable.is_a? FalseClass )
|
102
118
|
end
|
119
|
+
|
120
|
+
# TODO use a proper Factory pattern.
|
121
|
+
def self.DirFactory(path)
|
122
|
+
raise "I need a path/directory string: #{path}" unless path.is_a?(String)
|
123
|
+
|
124
|
+
deb "TODO: if coincides with MountPoint, instance THAT"
|
125
|
+
if path =~ /^gs:\/\//
|
126
|
+
deb "Smells like GCS"
|
127
|
+
return GcsBucket.new(path)
|
128
|
+
end
|
129
|
+
deb "Smells like LocalFolder :)"
|
130
|
+
return LocalFolder.new(path)
|
131
|
+
end
|
103
132
|
end
|
104
|
-
#puts "DEB lib/storazzo/media/abstract_ric_disk Media::ARD inside module"
|
105
133
|
end
|
106
|
-
#puts "DEB lib/storazzo/media/abstract_ric_disk Media::ARD outside module"
|