storazzo 0.3.8 → 0.4.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 7af080e9d4d71c7ceb558b4b4cbd1f13345fa3c4dbea5d6060da8fb9d5b21cc0
4
- data.tar.gz: fe4fd90e12756dbe535e96f0c7c42834ce44c452839890d381a0e43a78ce926e
3
+ metadata.gz: 9060103a49d0c2ec75f2f5d58bedd56f4fe0ac05d7c547874200a331480fa157
4
+ data.tar.gz: 1475ec303321f84850e63419b4d845cf282b57a36c384ab4f003000e79912cdb
5
5
  SHA512:
6
- metadata.gz: 7f753c4e14018803a731de9cf992dcfcf4f4cac601fb0b978de6d16c83c8ab0f06430e0cdd2a203b025a714f168c08f0e6107606b9b354fcf792e0041be8d727
7
- data.tar.gz: 072da4e8a2b24fedd83e1cd04b41f695a38a49e623cef5bd1643a11dfc8813604d32c7950976b6b5500839d2a2580293623ab7b32070d9e162db6a753f2d8d02
6
+ metadata.gz: b1ea60a508eae4c245c58214f8d90805ec21a9e06b0e604206ea4dd6b52e17e1547c58d15082e46e7b577f0430ad9211cedfb24cce9ab6674273eaaf6e69ce9e
7
+ data.tar.gz: '09fca42358436467874d6a433210a596b41fc8331a7e4a5ddb4aef1db113108a604a66dde0804dba4f2e886a99636404f6c457647dcc792b39301f3b4aaca61d'
data/Gemfile CHANGED
@@ -4,3 +4,4 @@
4
4
  # to troubleshoot/debug things
5
5
  #gem 'rubocop'
6
6
  gem 'rake'
7
+ gem 'pry'
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.3.8
1
+ 0.4.1
@@ -0,0 +1,428 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'digest/md5'
4
+ require 'digest'
5
+ require 'socket'
6
+ require 'optparse' # http://ruby.about.com/od/advancedruby/a/optionparser.htm
7
+ require 'date' # for DateTime
8
+ require 'tempfile'
9
+
10
+ if RUBY_VERSION.split('.')[0] == 1
11
+ puts "Refusing to launch a script form Ruby 1. Sorry Ric, its 2020 damn it!"
12
+ exit 2020
13
+ end
14
+
15
+ $PROG_VER = '0.5' # on 0.4 it was copied from palladius/sakura. Now should be maintained here and moved to 'stats-with-md5'
16
+ $DEBUG = false
17
+ $reset_autowrite_file_reincarnation = 0
18
+ DEFAULT_MAX_FILES = 'Infinite'
19
+
20
+ =begin
21
+
22
+ ############################################################
23
+ @author: Riccardo Carlesso
24
+ @email: riccardo.carlesso@gmail.com
25
+ @maturity: development
26
+ @language: Ruby
27
+ @tags: development, rcarlesso, test
28
+ @works_on: Linux (little tested since v0.3), Mac (100% developed here)
29
+ ############################################################
30
+
31
+ =end
32
+
33
+
34
+
35
+ def deb(s); puts "#DEB #{s}" if $DEBUG; end
36
+ # colors 16
37
+ def gray(s) "\033[1;30m#{s}\033[0m" ; end
38
+ def green(s) "\033[1;32m#{s}\033[0m" ; end
39
+ def red(s) "\033[1;31m#{s}\033[0m" ; end
40
+ def yellow(s) "\033[1;33m#{s}\033[0m" ; end
41
+ def blue(s) "\033[1;34m#{s}\033[0m" ; end
42
+ def purple(s) "\033[1;35m#{s}\033[0m" ; end
43
+ def azure(s) "\033[1;36m#{s}\033[0m" ; end
44
+ def white(s) "\033[1;37m#{s}\033[0m" ; end
45
+
46
+ # colors 64k
47
+ def orange(s) "\033[38;5;208m#{s}\033[0m" ; end
48
+
49
+ # Program constants, automagically picked up by RicLib
50
+ # More configuration could be written in:
51
+ # $GIC/etc/ricsvn/<FILENAME>.yml
52
+ # That would go into the variable '$prog_conf_d'
53
+ $myconf = {
54
+ :app_name => "AppName should be sth like #{$0}",
55
+ :description => "
56
+ This program gets stats from Ruby File.Stats library
57
+ plus computes MD5 or CRC32 of the file.
58
+ And bakes it in a way that can be easily parsed.
59
+ ".strip.gsub(/^\s+/, "").gsub(/\s+$/, ""),
60
+ }
61
+
62
+ def usage(comment=nil)
63
+ puts white($optparse.banner)
64
+ puts($optparse.summarize)
65
+ puts("Description: " + gray($myconf[:description]))
66
+ puts red(comment) if comment
67
+ exit 13
68
+ end
69
+
70
+ def reset_autowrite_file()
71
+ $autowrite_file = "# #{File.basename $0} Autowrite v1.0 #{Time.now} reincarnation=#{$reset_autowrite_file_reincarnation}\n"
72
+ $reset_autowrite_file_reincarnation += 1 # ($reset_autowrite_file_reincarnation + 1) # rescue 1
73
+ end
74
+
75
+ # include it in main if you want a custome one
76
+ def init() # see lib_autoinit in lib/util.rb
77
+ $opts = {}
78
+ # setting defaults
79
+ $opts[:verbose] = false
80
+ $opts[:dryrun] = false
81
+ $opts[:debug] = false
82
+ $opts[:autowrite] = false
83
+ $opts[:color] = true
84
+ $opts[:max_files] = nil # infinite
85
+
86
+ reset_autowrite_file()
87
+
88
+ $optparse = OptionParser.new do |opts|
89
+ opts.banner = "#{$0} v.#{$PROG_VER}\n Usage: #{File.basename $0} [options] file1 file2 ..."
90
+ opts.on( '-a', '--autowrite', 'automatically writes results to Root Folder (DFLT=false)' ) { $opts[:autowrite] = true }
91
+ opts.on( '-c', '--no-color', 'disables color (DFLT=false, that is color enabled)' ) { $opts[:color] = false }
92
+ opts.on( '-d', '--debug', 'enables debug (DFLT=false)' ) { $opts[:debug] = true ; $DEBUG = true }
93
+ opts.on( '-h', '--help', 'Display this screen' ) { usage }
94
+ opts.on( '-m', '--max-files NUMBER', "sets max files per Dir to X (DFLT=#{DEFAULT_MAX_FILES})" ) {|nfiles| $opts[:max_files] = nfiles.to_i }
95
+ opts.on( '-n', '--dryrun', "Don't really execute code" ) { $opts[:dryrun] = true }
96
+ opts.on( '-l', '--logfile FILE', 'Write log to FILE' ) {|file| $opts[:logfile] = file }
97
+ opts.on( '-v', '--verbose', 'Output more information' ) { $opts[:verbose] = true}
98
+ end
99
+ $optparse.parse!
100
+ end
101
+
102
+ def getSafeCreationTime(file)
103
+ File.ctime(file)
104
+ end
105
+
106
+ def compute_stats_and_md5(file)
107
+ ret={}
108
+ ret[:name] = file
109
+
110
+ begin
111
+ stats = File.stat(file)
112
+ ret[:stats_object] = stats # TODO deprecate
113
+ deb("Stats methods: #{stats.methods.sort.join(', ')}")
114
+ #deb(stats.ftype)
115
+ #puts(stats.methods)
116
+ deb(stats.ctime)
117
+ #puts(stats.birthtime rescue (stats.ctime))
118
+
119
+ # On Mac:
120
+ #<File::Stat
121
+ # dev=0x100000f,
122
+ # ino=1247768,
123
+ # mode=0100644 (file rw-r--r--),
124
+ # nlink=1,
125
+ # uid=164825 (ricc),
126
+ # gid=89939 (primarygroup),
127
+ # rdev=0x0 (0, 0),
128
+ # size=564,
129
+ # blksize=4096,
130
+ # blocks=8,
131
+ # atime=2022-03-05 22:36:48.373362127 +0100 (1646516208),
132
+ # mtime=2022-03-05 22:36:48.176789949 +0100 (1646516208),
133
+ # ctime=2022-03-05 22:36:48.176789949 +0100 (1646516208)>
134
+
135
+ # On LInux:
136
+ #<File::Stat
137
+ # dev=0xfe01,
138
+ # ino=6293055,
139
+ # mode=0100644 (file rw-r--r--),
140
+ # nlink=1,
141
+ # uid=164825 (ricc),
142
+ # gid=89939 (primarygroup),
143
+ # rdev=0x0 (0, 0),
144
+ # size=7,
145
+ # blksize=4096,
146
+ # blocks=8,
147
+ # atime=2022-06-27 08:49:38.586706861 +0200 (1656312578),
148
+ # mtime=2022-03-23 14:28:45 +0100 (1648042125),
149
+ # ctime=2022-05-30 10:12:10.381629305 +0200 (1653898330)>
150
+ ret[:size] = stats.size
151
+ ret[:mode] = stats.mode
152
+ # datetimes
153
+ ret[:stat_safebirthtime] = getSafeCreationTime(file) # in Mac uses birthtime,but on Linux wont work
154
+ # defined?(stats.birthtime) ? # rescue stats.mtime # eg, 2022-03-05 21:47:51 +0100 on Mac (not implemented on my Linuix)#
155
+ # stats.birthtime : # works on Mac
156
+ # stats.ctime
157
+ ret[:stat_mtime] = stats.mtime # eg, 2022-03-05 21:47:51 +0100 Returns the modification time of stat.
158
+ ret[:stat_ctime] = stats.ctime # eg, 2022-03-05 21:47:51 +0100 Returns the change time for stat (that is, the time directory information about the file was changed, not the file itself). Note that on Windows (NTFS), returns creation time (birth time).
159
+ ret[:stat_atime] = stats.atime # eg, 2022-03-05 21:47:51 +0100 Last Access
160
+ ret[:stat_gid] = stats.gid # Group Id
161
+ ret[:stat_uid] = stats.uid # UUID
162
+ ret[:stat_ftype] = stats.ftype #
163
+
164
+ ret[:md5] = '______________NONE______________'
165
+ if stats.ftype != "directory"
166
+ file_content = File.read(file)
167
+ ret[:md5] = Digest::MD5.hexdigest(file_content) # rescue 'none ' #=> string with hexadecimal digits
168
+ end
169
+ rescue Errno::EISDIR => e
170
+ #puts "It's a dir, nothing I can do here except skipping the stuff"
171
+ ret[:md5] = "_________I'm a dir sorry________"
172
+ rescue Exception => e
173
+ ret[:error] = e
174
+ end
175
+ ret
176
+ end
177
+
178
+ $print_stats_and_md5_version = "1.1a_220624_F" # files
179
+ $print_stats_and_md5_counter = 0
180
+ $print_stats_and_md5_for_gcs_version = "1.1alpha_220628_G" #GCS bucket
181
+
182
+ def stats_and_md5_number_of_files_processed()
183
+ return $print_stats_and_md5_counter
184
+ end
185
+
186
+ def smells_like_gcs?(file)
187
+ file =~ /gs:\/\//
188
+ end
189
+
190
+ # You give me gs://bucket123/path/to/dir
191
+ def print_stats_and_md5_for_gcs(mybucket_with_subdir, opts={})
192
+ # opts_color = opts.fetch :color, true#
193
+ # opts_verbose = opts.fetch :verbose, false
194
+ # opts_ping_frequency = opts.fetch :ping_frequency, 50
195
+ opts_max_files = opts.fetch :max_files, nil # BigDecimal('Infinity')
196
+ opts_autowrite = opts.fetch :autowrite, false
197
+
198
+ raise "Wrong GCS Path: #{mybucket_with_subdir}" unless smells_like_gcs?(mybucket_with_subdir)
199
+ #puts "0. mybucket_with_subdir: #{mybucket_with_subdir}" # gs://mybucket123/path/to/dir
200
+ path_split = mybucket_with_subdir.split('/')
201
+ mybucket = path_split[0,3].join('/') # gs://mybucket123/
202
+ gcs_subpath = path_split[3,42].join('/') # path/to/dir
203
+ cleanedup_bucket = path_split[2] # mybucket123
204
+ #puts "1. mybucket: #{mybucket}"
205
+ #puts "2. gcs_subpath: #{gcs_subpath}"
206
+ #puts "3. cleanedup_bucket: #{cleanedup_bucket}"
207
+
208
+ puts("[print_stats_and_md5_for_gcs] version=#{$print_stats_and_md5_for_gcs_version} host=#{Socket.gethostname}(#{`uname`.chomp}) created_on='#{Time.now}' bucket=#{mybucket} subpath=#{gcs_subpath}")
209
+ begin
210
+ require "google/cloud/storage"
211
+ project_id = `gcloud config get project 2>/dev/null`
212
+ deb "project_id: #{project_id}"
213
+ storage = Google::Cloud::Storage.new(project_id: project_id)
214
+ bucket = storage.bucket(cleanedup_bucket)
215
+ # grabs them ALL if nil, optherwise its an integer.
216
+ files = opts_max_files.nil? ?
217
+ bucket.files : # nil -> all of them (I've tried sth more idiomatic like -1 or nil or Infinite. they all failed.)
218
+ bucket.files.first(opts_max_files) # not nil -> first(N)
219
+ #puts(files)
220
+ deb("All Sorted Methods: #{files.first.methods.sort}")
221
+ files.each do |gcs_file|
222
+ md5 = Base64.decode64(gcs_file.md5).unpack('H*')[0] # md5 in base64 -> then de-binarizied: Base64.urlsafe_decode64(md5).unpack('H*')[0]
223
+ size = gcs_file.size # crc rescue :boh
224
+ time_created = gcs_file.created_at
225
+ mode = 'XXXXXX' # for compatbility with files
226
+ file_type = gcs_file.name.to_s =~ /\/$/ ? 'd' : 'f' # if ends with a slash its a DIR :P
227
+ #puts("[OLD_GCS_v11] #{md5} #{size}\t#{time_created} [#{gcs_file.content_type}]\t#{gcs_file.name}")
228
+ print_colored_polymoprhic('gcs_v1.2', md5, mode, file_type, size, gcs_file.content_type, time_created, gcs_file.name, opts)
229
+ deb gcs_file.to_yaml
230
+ end
231
+ #puts("Hash Methods: #{files.first.methods.select{|x| x.match /hash/}}")
232
+ rescue LoadError # require Exception
233
+ puts 'Error with library #{$!}, maybe type: gem install google-cloud-storage'
234
+ rescue Exception # generic Exception
235
+ puts "print_stats_and_md5_for_gcs(): Generic Error: #{$!}"
236
+ exit 1
237
+ end
238
+
239
+ autowrite_to_dir_or_gcs(:gcs, mybucket_with_subdir) if opts_autowrite
240
+ end
241
+
242
+ # Generic polymorpghic coloring, can be GCS. Would have been nice to pass a Dict so easier to extend, but Ruby doesnt support doesblt Dict with magic args. Plus might be a GOOD
243
+ # thing its hard to change since it breaks Storazzo.
244
+ # TODO(ricc): make sure the dates are the same. Currently:
245
+ # GCS Date: 2021-12-20T12:26:13+00:00 DateTime
246
+ # File Date: 2022-05-30 11:55:42 +0200 Time
247
+ def print_colored_polymoprhic(entity_type, md5, mode, file_type, size, content_type, creation_time, filename, opts={})
248
+ opts_color = opts.fetch :color, true
249
+ opts_verbose = opts.fetch :verbose, false
250
+ opts_ping_frequency = opts.fetch :ping_frequency, 50
251
+ opts_autowrite = opts.fetch :autowrite, false
252
+
253
+ # Increment counter across all!
254
+ $print_stats_and_md5_counter += 1 # global counter! I should increment at the END of fucntion.. but you never know if i exit wrongly so i do at beginning. so within this function the real #files is N-1 (i.e., if N is 6, we're processing file %5)
255
+ # in main i had to put 2 branched to invoke a single thing but if logic changes that sentence might be printed twice. Instead here, the BEGIN line could be nice to do here instead.
256
+ #$stderr.puts("print_stats_and_md5() Fikst invocation! Consider moving the 2 things in main here :)") if ($print_stats_and_md5_counter == 1)
257
+ #puts("[print_stats_and_md5] version=#{$print_stats_and_md5_version} host=#{Socket.gethostname}(#{`uname`.chomp}) created_on=#{Time.now}") if ($print_stats_and_md5_counter == 1)
258
+ $stderr.puts "-- print_stats_and_md5(v#{$print_stats_and_md5_version}): #{$print_stats_and_md5_counter - 1} files processed --" if (($print_stats_and_md5_counter) % opts_ping_frequency == 1 )
259
+
260
+ maybecolored_md5 = opts_color ? red(md5) : md5
261
+ maybecolored_filename = opts_color ? azure(filename) : filename
262
+ enlarged_size = sprintf("%7o", size.to_s ) # or SIZE
263
+ maybecolored_size = opts_color ? yellow(enlarged_size) : enlarged_size
264
+ maybecolored_file_type = file_type
265
+ colored_content_type = opts_color ? white(content_type) : content_type
266
+ if opts_color
267
+ maybecolored_file_type = case file_type
268
+ when 'f' then green(file_type)
269
+ when 'd' then blue(file_type)
270
+ when 's' then azure(file_type)
271
+ else red(file_type)
272
+ end
273
+ end
274
+ standardized_creation_time = creation_time.is_a?(DateTime) ?
275
+ creation_time :
276
+ DateTime.parse(creation_time.to_s) # if you prefer to use Time since its already supported by Storazzo (but its ugly since it has SPACES! so hard to parse) use tt = Time.parse(d.to_s)
277
+ # as per https://stackoverflow.com/questions/279769/convert-to-from-datetime-and-time-in-ruby
278
+ #puts "#{maybecolored_md5} #{mode} #{maybecolored_file_type} #{maybecolored_size}\t#{creation_time}(DEB #{creation_time.class})(DEB2 #{DateTime.parse(creation_time.to_s)}) [#{colored_content_type}] #{maybecolored_filename}"
279
+ # this mightr be colored
280
+ str = "[#{entity_type}] #{maybecolored_md5} #{mode} #{maybecolored_file_type} #{standardized_creation_time} #{maybecolored_size} [#{colored_content_type}] #{maybecolored_filename}\n"
281
+ # this will NEVER be colored. WARNING, now you need to maintain TWO of these beasts :/
282
+ non_colored_str = "[#{entity_type}] #{md5} #{mode} #{file_type} #{standardized_creation_time} #{enlarged_size} [#{content_type}] #{filename}\n"
283
+ if(opts_autowrite)
284
+ # need to guarantee this is NOT colored. The easiest way to do it is TWICVE as expensive but... whatevs.
285
+ # TODO(ricc): fire me for this lazimness!
286
+ $autowrite_file += (non_colored_str)
287
+ end
288
+ print(str)
289
+ return str
290
+ end
291
+
292
+ def print_stats_and_md5(file, opts={})
293
+ return print_stats_and_md5_for_gcs(file, opts) if smells_like_gcs?(file)
294
+
295
+ opts_color = opts.fetch :color, true
296
+ opts_verbose = opts.fetch :verbose, false
297
+ opts_ping_frequency = opts.fetch :ping_frequency, 50
298
+
299
+
300
+ # $print_stats_and_md5_counter += 1 # global counter! I should increment at the END of fucntion.. but you never know if i exit wrongly so i do at beginning. so within this function the real #files is N-1 (i.e., if N is 6, we're processing file %5)
301
+
302
+ # # in main i had to put 2 branched to invoke a single thing but if logic changes that sentence might be printed twice. Instead here, the BEGIN line could be nice to do here instead.
303
+ # #$stderr.puts("print_stats_and_md5() Fikst invocation! Consider moving the 2 things in main here :)") if ($print_stats_and_md5_counter == 1)
304
+ # puts("[print_stats_and_md5] version=#{$print_stats_and_md5_version} host=#{Socket.gethostname}(#{`uname`.chomp}) created_on=#{Time.now}") if ($print_stats_and_md5_counter == 1)
305
+ # $stderr.puts "-- print_stats_and_md5(v#{$print_stats_and_md5_version}): #{$print_stats_and_md5_counter - 1} files processed --" if (($print_stats_and_md5_counter) % opts_ping_frequency == 1 )
306
+
307
+ #puts "print_stats_and_md5: #{file}" if opts_verbose
308
+ stats = compute_stats_and_md5 file
309
+ #maybecolored_md5 = opts_color ? red(stats[:md5]) : stats[:md5]
310
+ #maybecolored_filename = opts_color ? azure(stats[:name]) : stats[:name]
311
+ #maybecolored_size = opts_color ? white(stats[:size]) : stats[:size]
312
+ mode = sprintf("%06o", stats[:mode] ) rescue :ERROR # .to_s.right(4) #=> "100644"
313
+ file_type = stats[:stat_ftype][0] rescue '?'
314
+ file_type = 's' if File.symlink?(file)
315
+ content_type = `file --mime-type -b '#{file}' 2>/dev/null`.chomp # .split(': ',2)[1]
316
+
317
+ #colored_string = "[COL] #{red stats[:md5]} #{stats[:size]} #{stats[:stat_safebirthtime]} #{white stats[:name]}"
318
+ #boring_string = "[B/W] #{ stats[:md5]} #{stats[:size]} #{stats[:stat_safebirthtime]} #{ stats[:name]}"
319
+ #puts(opts_color ? colored_string : boring_string)
320
+ #puts "[OLDv1.1deprecated] #{maybecolored_md5} #{mode} #{file_type} #{maybecolored_size} #{stats[:stat_safebirthtime]} #{maybecolored_filename}"
321
+ print_colored_polymoprhic('file_v1.2', stats[:md5], mode, file_type, stats[:size], content_type, stats[:stat_safebirthtime], stats[:name], opts)
322
+ end
323
+
324
+ def autowrite_to_dir_or_gcs(fs_type, path, opts={})
325
+ autowrite_version = "1.1_28jun22"
326
+ file_to_save = "stats-with-md5.log"
327
+ path_to_save = path + "/" + file_to_save
328
+ puts(red "TODO(ricc): write results [size=#{$autowrite_file.size}] in this Directory: #{path_to_save}") if fs_type == :dir
329
+ puts(red "TODO(ricc): write results [size=#{$autowrite_file.size}] in this GCS Bucket or Dir: #{path_to_save}") if fs_type == :gcs
330
+
331
+ ##########################################################################
332
+ # do the thing: creates file locally and moves to GCS or Dir.
333
+ ##########################################################################
334
+ $autowrite_file += "# [autowrite_to_dir_or_gcs v#{autowrite_version}]: about to write it now. #{Time.now}"
335
+ tmpfile = Tempfile.new(path_to_save)
336
+ tmpfile.write($autowrite_file)
337
+ cmd = :no_cmd_yet
338
+
339
+ if fs_type == :gcs
340
+ cmd = "gsutil mv '#{tmpfile.path}' #{path}/#{file_to_save}"
341
+ end
342
+ if fs_type == :dir
343
+ cmd = "mv '#{tmpfile.path}' #{path}/#{file_to_save}"
344
+ end
345
+
346
+ puts("TMP File is this big: #{azure tmpfile.size}")
347
+ puts("About to execute this: #{white cmd}")
348
+ ret = `#{cmd}`
349
+ if $?.exitstatus != 0
350
+ puts "Error to execute: #{red cmd}. Exit: #{red $?.exitstatus}"
351
+ exit 53
352
+ end
353
+ puts "Command returned: '#{$?.exitstatus}'" # - #{$?.class}"
354
+ # wow: fork { exec("ls") }
355
+
356
+ # removes the tmpfile :)
357
+ tmpfile.close
358
+ tmpfile.unlink
359
+
360
+ #####################################
361
+ # finished, lets now clean up the file...
362
+ #####################################
363
+ reset_autowrite_file()
364
+ return :ok
365
+ end
366
+
367
+ def print_stats_and_md5_for_directory(directory_to_explore_recursively, opts={})
368
+ puts "# [print_stats_and_md5_for_directory] DIR to explore (make sure you glob also): #{directory_to_explore_recursively }"
369
+ puts "# DEB Options: #{opts}"
370
+
371
+ opts_autowrite = opts.fetch :autowrite, false
372
+
373
+ Dir.glob("#{(directory_to_explore_recursively)}/**/*") do |globbed_filename|
374
+ # Do work on files & directories ending in .rb
375
+ #puts "[deb] #{globbed_filename}"
376
+ print_stats_and_md5(globbed_filename, :color => $opts[:color], :verbose => $opts[:verbose], :autowrite => $opts[:autowrite])
377
+ end
378
+
379
+ autowrite_to_dir_or_gcs(:dir, directory_to_explore_recursively) if opts_autowrite
380
+ end
381
+
382
+ def real_program
383
+ t0 = Time.now
384
+ deb "+ Options are: #{gray $opts}"
385
+ deb "+ Depured args: #{azure ARGV}"
386
+ # Your code goes here...
387
+ #puts "Description: '''#{white $myconf[:description] }'''"
388
+ #puts("[print_stats_and_md5] version=#{$print_stats_and_md5_version} host=#{Socket.gethostname}(#{`uname`.chomp}) created_on=#{Time.now}") if ARGV.size > 0
389
+
390
+ common_opts = {
391
+ :max_files => $opts[:max_files],
392
+ :color => $opts[:color],
393
+ :verbose => $opts[:verbose],
394
+ :autowrite => $opts[:autowrite],
395
+ }
396
+
397
+ if ARGV.size == 1
398
+ directory_to_explore_recursively = ARGV[0]
399
+ if smells_like_gcs?(directory_to_explore_recursively)
400
+ print_stats_and_md5_for_gcs(directory_to_explore_recursively, common_opts)
401
+ else # normal file..
402
+ print_stats_and_md5_for_directory(directory_to_explore_recursively, common_opts)
403
+ end
404
+ elsif ARGV.size > 1
405
+ deb "2. I expect a lot of single files or directories:"
406
+ for arg in ARGV
407
+ if File.directory?(arg)
408
+ print_stats_and_md5_for_directory(arg, common_opts )
409
+ else
410
+ print_stats_and_md5(arg, common_opts)
411
+ end
412
+ end
413
+ else
414
+ puts "No args given. Exiting"
415
+ exit 41
416
+ end
417
+ tf = Time.now
418
+ puts "# [#{File.basename $0}] Time taken for processing #{stats_and_md5_number_of_files_processed} files: #{tf-t0}"
419
+ end
420
+
421
+ def main(filename)
422
+ deb "I'm called by #{white filename}"
423
+ deb "To remove this shit, just set $DEBUG=false :)"
424
+ init # Enable this to have command line parsing capabilities!
425
+ real_program
426
+ end
427
+
428
+ main(__FILE__)
@@ -3,10 +3,12 @@ module Storazzo::Media
3
3
  class Storazzo::RicDisk::GcsBucket
4
4
 
5
5
 
6
- def self.list_all
6
+ def self.list_all(config=nil)
7
7
  # get lisrts from Config singletone
8
8
  #puts " self.list_all: loading config "
9
- config = Storazzo::RicDiskConfig.instance # # ).get_config
9
+ config ||= Storazzo::RicDiskConfig.instance # # ).get_config
10
+
11
+ config.load # in case I need to load it for the first time
10
12
  #puts config['Config']['AdditionalMountDirs']
11
13
  #puts "TODO see config: #{config}"
12
14
  #[42, 43]
@@ -21,7 +21,7 @@ module Storazzo
21
21
  RicdiskVersion = '2.1'
22
22
  RicdiskHistory = [
23
23
  '2022-07-29 2.1 Added timestamp',
24
- '2022-07-28 2.0 Added tags, siz, unique_hash, cmputation_hostname, wr, ...',
24
+ '2022-07-28 2.0 Added tags, siz, unique_hash, computation_hostname, wr, ...',
25
25
  ]
26
26
  DefaultGemfileTestDiskFolder = Storazzo.root + "/var/test/disks/" # was: @@default_gemfile_test_disks_folder
27
27
  # Immutable
@@ -173,7 +173,7 @@ module Storazzo
173
173
  deb "[CACHE HIT] ricdisk_file (didnt have to recompute it - yay!)"
174
174
  return @ricdisk_file
175
175
  end
176
- warn "RICC_WARNING This requires cmputation I wanna do it almost once"
176
+ deb "[compute_ricdisk_file] RICC_WARNING This requires cmputation I wanna do it almost once"
177
177
  ConfigFiles.each do |papable_config_filename|
178
178
  #return ".ricdisk.yaml" if File.exist?("#{path}/.ricdisk.yaml") #and File.empty?( "#{path}/.ricdisk.yaml")
179
179
  #return ".ricdisk" if File.exist?("#{path}/.ricdisk") # and File.empty?( "#{path}/.ricdisk")
@@ -222,13 +222,13 @@ module Storazzo
222
222
  end
223
223
 
224
224
  # maybe move to a RiccFile class? Maybe even INHERIT from FILE?
225
- def self.obsolescence_seconds file_path
225
+ def obsolescence_seconds(file_path)
226
226
  creation_time = File.stat(file_path).ctime
227
227
  deb("[obsolescence_seconds] File #{file_path}: #{creation_time} - #{(Time.now - creation_time)} seconds ago")
228
228
  (Time.now - creation_time).to_i
229
229
  end
230
230
  # maybe move to a RiccFile class? Maybe even INHERIT from FILE?
231
- def self.obsolescence_days(file_path)
231
+ def obsolescence_days(file_path)
232
232
  return obsolescence_seconds(file_path) / 86400
233
233
  end
234
234
 
@@ -296,8 +296,32 @@ module Storazzo
296
296
  end
297
297
 
298
298
  def compute_stats_files(opts={})
299
- puts azure("TODO implement natively. Now I'm being lazy")
300
- Storazzo::RicDisk.calculate_stats_files(path, opts)
299
+ puts azure("[compute_stats_files] TODO implement natively. Now I'm being lazy")
300
+ #Storazzo::RicDisk.calculate_stats_files(path, opts)
301
+ opts_upload_to_gcs = opts.fetch :upload_to_gcs, true
302
+ dir = path
303
+
304
+ full_file_path = "#{dir}/#{$stats_file}"
305
+ #return "This refactor is for another day"
306
+
307
+ puts("compute_stats_files(#{white dir}): #{white full_file_path}")
308
+ puts "TEST1 DIR EXISTS: #{dir} -> #{File.directory? dir}"
309
+ Dir.chdir(dir)
310
+ if File.exists?(full_file_path) and ($opts[:force] == false)
311
+ puts "File '#{$stats_file}' exists already." # - now should see if its too old, like more than 1 week old"
312
+ # TODO check for file time...
313
+ print "Lines found: #{yellow `wc -l "#{full_file_path}" `.chomp }. File obsolescence (days): #{yellow obsolescence_days(full_file_path)}."
314
+ if obsolescence_days(full_file_path) > 7
315
+ puts("*** ACHTUNG *** FIle is pretty old. You might consider rotating: #{yellow "mv #{full_file_path} #{full_file_path}_old"}. Or invoke with --force")
316
+ end
317
+ upload_to_gcs(full_file_path) if opts_upload_to_gcs
318
+ else
319
+ puts "Crunching data stats from '#{dir}' into '#{$stats_file}' ... please bear with me.. [maybe file didnt exist, maybe $opts[:force] is true]"
320
+ command = "find . -print0 | xargs -0 stats-with-md5 --no-color | tee '#{full_file_path}'"
321
+ puts("[#{`pwd`.chomp}] Executing: #{azure command}")
322
+ ret = backquote_execute(command)
323
+ puts "Done. #{ret.split("\n").count} files processed."
324
+ end
301
325
  end
302
326
 
303
327
 
@@ -307,7 +331,7 @@ module Storazzo
307
331
  opts_upload_to_gcs = opts.fetch :upload_to_gcs, true
308
332
 
309
333
  full_file_path = "#{dir}/#{$stats_file}"
310
- return "This refacgtor is for another day"
334
+ return "This refactor is for another day"
311
335
 
312
336
  puts("calculate_stats_files(#{white dir}): #{white full_file_path}")
313
337
  puts "TEST1 DIR EXISTS: #{dir} -> #{File.directory? dir}"
@@ -322,7 +346,7 @@ module Storazzo
322
346
  upload_to_gcs(full_file_path) if opts_upload_to_gcs
323
347
  else
324
348
  puts "Crunching data stats from '#{dir}' into '#{$stats_file}' ... please bear with me.. [maybe file didnt exist, maybe $opts[:force] is true]"
325
- command = "find . -print0 | xargs -0 stats-with-md5.rb --no-color | tee '#{full_file_path}'"
349
+ command = "find . -print0 | xargs -0 stats-with-md5 --no-color | tee '#{full_file_path}'"
326
350
  puts("[#{`pwd`.chomp}] Executing: #{azure command}")
327
351
  ret = backquote_execute command
328
352
  puts "Done. #{ret.split("\n").count} files processed."
@@ -432,25 +456,27 @@ end #/Module
432
456
 
433
457
 
434
458
 
435
- # def self.backquote_execute(cmd)
436
- # # executed a command wrapped by dryrun though
437
- # return "DRYRUN backquote_execute(#{cmd})" if $opts[:dryrun]
438
- # `#{cmd}`
439
- # end
459
+ def backquote_execute(cmd, opts={})
460
+ dryrun = opts.fetch :dryrun, false
461
+ # executed a command wrapped by dryrun though
462
+ return "DRYRUN backquote_execute(#{cmd})" if dryrun # $opts[:dryrun]
463
+ `#{cmd}`
464
+ end
440
465
 
441
- # def self.upload_to_gcs(file, opts={})
442
- # deb("upload_to_gcs(#{file}). TODO(ricc) after breafast upload to GCS : #{file}")
443
- # mount_name = file.split('/')[-2]
444
- # filename = "#{mount_name}-#{File.basename file}"
445
- # hostname = Socket.gethostname[/^[^.]+/]
446
- # command = "gsutil cp '#{file}' gs://#{$gcs_bucket}/backup/ricdisk-magic/#{ hostname }-#{filename}"
447
- # deb("Command: #{command}")
448
- # ret = backquote_execute(command)
449
- # # if $opts[:debug] do
450
- # # puts "+ Current list of files:"
451
- # # ret = backquote_execute("gsutil ls -al gs://#{$gcs_bucket}/backup/ricdisk-magic/")
452
- # # puts ret
453
- # # end
454
- # ret
455
- # end
466
+ def upload_to_gcs(file, opts={})
467
+ deb("upload_to_gcs(#{file}). TODO(ricc) after breafast upload to GCS : #{file}")
468
+ mount_name = file.split('/')[-2]
469
+ filename = "#{mount_name}-#{File.basename file}"
470
+ hostname = Socket.gethostname[/^[^.]+/]
471
+ command = "gsutil cp '#{file}' gs://#{$gcs_bucket}/backup/ricdisk-magic/#{ hostname }-#{filename}"
472
+ deb("Command: #{command}")
473
+ puts azure("GCS upload disabled until I know if it works :) command='#{command}'")
474
+ ret = backquote_execute(command, :dryrun => true)
475
+ # if $opts[:debug] do
476
+ # puts "+ Current list of files:"
477
+ # ret = backquote_execute("gsutil ls -al gs://#{$gcs_bucket}/backup/ricdisk-magic/")
478
+ # puts ret
479
+ # end
480
+ ret
481
+ end
456
482
 
@@ -116,6 +116,18 @@ public
116
116
  "POLY_#{self.class}_(ver=#{config_ver}, file=#{config_file}), #{white(size)} bytes" # - config_default_folder=#{self.config_default_folder}"
117
117
  end
118
118
 
119
+ def to_verbose_s
120
+ h = {}
121
+ h[:description] = "This is a Verbose Hash describing a RicDiskConfig or its child RicDiskSampleConfig to understand why it keeps failing.."
122
+ h[:to_s] = self.to_s
123
+ h[:class] = self.class
124
+ h[:file] = __FILE__
125
+ h[:id] = self.object_id
126
+ h[:get_bucket_paths] = self.get_bucket_paths()
127
+ h[:get_local_folders] = self.get_local_folders()
128
+ return h
129
+ end
130
+
119
131
  def get_config(opts={})
120
132
  return load(opts) if @config.nil?
121
133
  @config
@@ -160,27 +172,26 @@ public
160
172
  # RicDisk.write_config_yaml_to_disk(dir)
161
173
  # RicDisk.calculate_stats_files(dir) # dir is inutile
162
174
  # } # TODO refactor in option sbrodola_afterwards=true. :)
163
- # else
164
- raise "Wrong input: #{files_list} " unless files_list.is_a?(Array)
165
-
166
- puts "iterate_through_file_list_for_disks(): I consider files_list as a list of directories to parse :)" if verbose
167
-
168
- #dirs = RicDisk.find_active_dirs()
169
- files_list.each do |dir|
170
- dir = File.expand_path(dir)
171
- if File.directory?(dir)
172
- #if dirs.include?(dir)
173
- puts "iterate_through_file_list_for_disks() Legit dir: #{green dir}" if verbose
174
- rd = RicDisk.new(dir)
175
- rd.write_config_yaml_to_disk(dir)
176
- #RicDisk.write_config_yaml_to_disk(dir)
177
- #RicDisk.calculate_stats_files (CLASS) => will become OBJECT compute_stats_files
178
- rd.compute_stats_files() # dir is inutile # TODO
179
- else
180
- deb red("Doesnt seem a legit dir to me: #{dir}")
181
- # deb "Figghiu ri buttana: doesnt exist #{red dir}"
182
- end
175
+ # else
176
+ raise "Wrong input, I need an array here: #{files_list} " unless files_list.is_a?(Array)
177
+ puts "iterate_through_file_list_for_disks(): I consider files_list as a list of directories to parse :)" if verbose
178
+
179
+ #dirs = RicDisk.find_active_dirs()
180
+ files_list.each do |dir|
181
+ dir = File.expand_path(dir)
182
+ if File.directory?(dir)
183
+ #if dirs.include?(dir)
184
+ puts "iterate_through_file_list_for_disks() Legit dir: #{green dir}" if verbose
185
+ rd = RicDisk.new(dir)
186
+ puts "RicDisk: #{rd}"
187
+ rd.write_config_yaml_to_disk(dir)
188
+ #RicDisk.write_config_yaml_to_disk(dir)
189
+ #RicDisk.calculate_stats_files (CLASS) => will become OBJECT compute_stats_files
190
+ rd.compute_stats_files() # dir is inutile # TODO
191
+ else
192
+ raise("Doesnt seem a dir to me, quitting: #{dir}")
183
193
  end
194
+ end
184
195
  #end
185
196
  end #/iterate_through_file_list_for_disks
186
197