storazzo 0.4.2 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +7 -4
  3. data/Makefile +20 -1
  4. data/README.md +23 -0
  5. data/Rakefile +35 -8
  6. data/VERSION +1 -1
  7. data/bin/hello-storazzo +5 -0
  8. data/bin/ricdisk-magic +50 -57
  9. data/bin/stats-with-md5 +268 -297
  10. data/lib/storazzo/colors.rb +43 -45
  11. data/lib/storazzo/common.rb +80 -17
  12. data/lib/storazzo/debug.rb +5 -8
  13. data/lib/storazzo/hashify.rb +44 -43
  14. data/lib/storazzo/main.rb +44 -40
  15. data/lib/storazzo/media/abstract_ric_disk.rb +163 -104
  16. data/lib/storazzo/media/gcs_bucket.rb +51 -15
  17. data/lib/storazzo/media/local_folder.rb +43 -46
  18. data/lib/storazzo/media/mount_point.rb +12 -2
  19. data/lib/storazzo/ric_disk.rb +223 -251
  20. data/lib/storazzo/ric_disk_config.rb +230 -193
  21. data/lib/storazzo/ric_disk_sample_config.rb +12 -16
  22. data/lib/storazzo/ric_disk_statsfile.rb +17 -16
  23. data/lib/storazzo/ric_disk_ugly.rb +35 -38
  24. data/lib/storazzo/version.rb +7 -7
  25. data/lib/storazzo.rb +34 -29
  26. data/storazzo.gemspec +22 -20
  27. data/test/media/test_abstract_ric_disk.rb +19 -0
  28. data/test/media/test_gcs_bucket.rb +58 -0
  29. data/test/media/test_local_folder.rb +145 -0
  30. data/test/media/test_mount_point.rb +25 -0
  31. data/test/test_ric_disk.rb +16 -0
  32. data/test/test_ric_disk_config.rb +20 -29
  33. data/test/test_ric_disk_stats_file.rb +13 -14
  34. data/test/test_storazzo.rb +26 -26
  35. data/var/dumps/file_stat.linux.yaml +15 -0
  36. data/var/dumps/file_stat.macosx.yaml +15 -0
  37. data/var/test/disks/disk02-full/Rakefile +13 -0
  38. data/var/test/disks/ricdisk_stats_v11.rds +11 -0
  39. metadata +38 -10
  40. data/test/test_gcs_bucket.rb +0 -70
  41. data/test/test_local_folder.rb +0 -121
data/bin/stats-with-md5 CHANGED
@@ -3,7 +3,7 @@
3
3
  require 'digest/md5'
4
4
  require 'digest'
5
5
  require 'socket'
6
- require 'optparse' # http://ruby.about.com/od/advancedruby/a/optionparser.htm
6
+ require 'optparse' # http://ruby.about.com/od/advancedruby/a/optionparser.htm
7
7
  require 'date' # for DateTime
8
8
  require 'tempfile'
9
9
 
@@ -30,36 +30,34 @@ DEFAULT_MAX_FILES = 'Infinite'
30
30
 
31
31
  =end
32
32
 
33
-
34
-
35
- def deb(s); puts "#DEB #{s}" if $DEBUG; end
33
+ def deb(s); puts "#DEB #{s}" if $DEBUG; end
36
34
  # colors 16
37
- def gray(s) "\033[1;30m#{s}\033[0m" ; end
38
- def green(s) "\033[1;32m#{s}\033[0m" ; end
39
- def red(s) "\033[1;31m#{s}\033[0m" ; end
40
- def yellow(s) "\033[1;33m#{s}\033[0m" ; end
41
- def blue(s) "\033[1;34m#{s}\033[0m" ; end
42
- def purple(s) "\033[1;35m#{s}\033[0m" ; end
43
- def azure(s) "\033[1;36m#{s}\033[0m" ; end
44
- def white(s) "\033[1;37m#{s}\033[0m" ; end
35
+ def gray(s) "\033[1;30m#{s}\033[0m"; end
36
+ def green(s) "\033[1;32m#{s}\033[0m"; end
37
+ def red(s) "\033[1;31m#{s}\033[0m"; end
38
+ def yellow(s) "\033[1;33m#{s}\033[0m"; end
39
+ def blue(s) "\033[1;34m#{s}\033[0m"; end
40
+ def purple(s) "\033[1;35m#{s}\033[0m"; end
41
+ def azure(s) "\033[1;36m#{s}\033[0m"; end
42
+ def white(s) "\033[1;37m#{s}\033[0m"; end
45
43
 
46
44
  # colors 64k
47
- def orange(s) "\033[38;5;208m#{s}\033[0m" ; end
45
+ def orange(s) "\033[38;5;208m#{s}\033[0m"; end
48
46
 
49
47
  # Program constants, automagically picked up by RicLib
50
48
  # More configuration could be written in:
51
49
  # $GIC/etc/ricsvn/<FILENAME>.yml
52
50
  # That would go into the variable '$prog_conf_d'
53
51
  $myconf = {
54
- :app_name => "AppName should be sth like #{$0}",
55
- :description => "
52
+ :app_name => "AppName should be sth like #{$0}",
53
+ :description => "
56
54
  This program gets stats from Ruby File.Stats library
57
55
  plus computes MD5 or CRC32 of the file.
58
- And bakes it in a way that can be easily parsed.
56
+ And bakes it in a way that can be easily parsed.
59
57
  ".strip.gsub(/^\s+/, "").gsub(/\s+$/, ""),
60
58
  }
61
59
 
62
- def usage(comment=nil)
60
+ def usage(comment = nil)
63
61
  puts white($optparse.banner)
64
62
  puts($optparse.summarize)
65
63
  puts("Description: " + gray($myconf[:description]))
@@ -68,12 +66,12 @@ def usage(comment=nil)
68
66
  end
69
67
 
70
68
  def reset_autowrite_file()
71
- $autowrite_file = "# #{File.basename $0} Autowrite v1.0 #{Time.now} reincarnation=#{$reset_autowrite_file_reincarnation}\n"
72
- $reset_autowrite_file_reincarnation += 1 # ($reset_autowrite_file_reincarnation + 1) # rescue 1
73
- end
69
+ $autowrite_file = "# #{File.basename $0} Autowrite v1.0 #{Time.now} reincarnation=#{$reset_autowrite_file_reincarnation}\n"
70
+ $reset_autowrite_file_reincarnation += 1 # ($reset_autowrite_file_reincarnation + 1) # rescue 1
71
+ end
74
72
 
75
73
  # include it in main if you want a custome one
76
- def init() # see lib_autoinit in lib/util.rb
74
+ def init() # see lib_autoinit in lib/util.rb
77
75
  $opts = {}
78
76
  # setting defaults
79
77
  $opts[:verbose] = false
@@ -87,335 +85,308 @@ def init() # see lib_autoinit in lib/util.rb
87
85
 
88
86
  $optparse = OptionParser.new do |opts|
89
87
  opts.banner = "#{$0} v.#{$PROG_VER}\n Usage: #{File.basename $0} [options] file1 file2 ..."
90
- opts.on( '-a', '--autowrite', 'automatically writes results to Root Folder (DFLT=false)' ) { $opts[:autowrite] = true }
91
- opts.on( '-c', '--no-color', 'disables color (DFLT=false, that is color enabled)' ) { $opts[:color] = false }
92
- opts.on( '-d', '--debug', 'enables debug (DFLT=false)' ) { $opts[:debug] = true ; $DEBUG = true }
93
- opts.on( '-h', '--help', 'Display this screen' ) { usage }
94
- opts.on( '-m', '--max-files NUMBER', "sets max files per Dir to X (DFLT=#{DEFAULT_MAX_FILES})" ) {|nfiles| $opts[:max_files] = nfiles.to_i }
95
- opts.on( '-n', '--dryrun', "Don't really execute code" ) { $opts[:dryrun] = true }
96
- opts.on( '-l', '--logfile FILE', 'Write log to FILE' ) {|file| $opts[:logfile] = file }
97
- opts.on( '-v', '--verbose', 'Output more information' ) { $opts[:verbose] = true}
88
+ opts.on('-a', '--autowrite', 'automatically writes results to Root Folder (DFLT=false)') {
89
+ $opts[:autowrite] = true
90
+ }
91
+ opts.on('-c', '--no-color', 'disables color (DFLT=false, that is color enabled)') { $opts[:color] = false }
92
+ opts.on('-d', '--debug', 'enables debug (DFLT=false)') { $opts[:debug] = true; $DEBUG = true }
93
+ opts.on('-h', '--help', 'Display this screen') { usage }
94
+ opts.on('-m', '--max-files NUMBER', "sets max files per Dir to X (DFLT=#{DEFAULT_MAX_FILES})") { |nfiles|
95
+ $opts[:max_files] = nfiles.to_i
96
+ }
97
+ opts.on('-n', '--dryrun', "Don't really execute code") { $opts[:dryrun] = true }
98
+ opts.on('-l', '--logfile FILE', 'Write log to FILE') { |file| $opts[:logfile] = file }
99
+ opts.on('-v', '--verbose', 'Output more information') { $opts[:verbose] = true }
98
100
  end
99
101
  $optparse.parse!
100
102
  end
101
103
 
102
104
  def getSafeCreationTime(file)
103
- File.ctime(file)
105
+ File.ctime(file)
104
106
  end
105
107
 
106
108
  def compute_stats_and_md5(file)
107
- ret={}
108
- ret[:name] = file
109
-
110
- begin
111
- stats = File.stat(file)
112
- ret[:stats_object] = stats # TODO deprecate
113
- deb("Stats methods: #{stats.methods.sort.join(', ')}")
114
- #deb(stats.ftype)
115
- #puts(stats.methods)
116
- deb(stats.ctime)
117
- #puts(stats.birthtime rescue (stats.ctime))
118
-
119
- # On Mac:
120
- #<File::Stat
121
- # dev=0x100000f,
122
- # ino=1247768,
123
- # mode=0100644 (file rw-r--r--),
124
- # nlink=1,
125
- # uid=164825 (ricc),
126
- # gid=89939 (primarygroup),
127
- # rdev=0x0 (0, 0),
128
- # size=564,
129
- # blksize=4096,
130
- # blocks=8,
131
- # atime=2022-03-05 22:36:48.373362127 +0100 (1646516208),
132
- # mtime=2022-03-05 22:36:48.176789949 +0100 (1646516208),
133
- # ctime=2022-03-05 22:36:48.176789949 +0100 (1646516208)>
134
-
135
- # On LInux:
136
- #<File::Stat
137
- # dev=0xfe01,
138
- # ino=6293055,
139
- # mode=0100644 (file rw-r--r--),
140
- # nlink=1,
141
- # uid=164825 (ricc),
142
- # gid=89939 (primarygroup),
143
- # rdev=0x0 (0, 0),
144
- # size=7,
145
- # blksize=4096,
146
- # blocks=8,
147
- # atime=2022-06-27 08:49:38.586706861 +0200 (1656312578),
148
- # mtime=2022-03-23 14:28:45 +0100 (1648042125),
149
- # ctime=2022-05-30 10:12:10.381629305 +0200 (1653898330)>
150
- ret[:size] = stats.size
151
- ret[:mode] = stats.mode
152
- # datetimes
153
- ret[:stat_safebirthtime] = getSafeCreationTime(file) # in Mac uses birthtime,but on Linux wont work
154
- # defined?(stats.birthtime) ? # rescue stats.mtime # eg, 2022-03-05 21:47:51 +0100 on Mac (not implemented on my Linuix)#
155
- # stats.birthtime : # works on Mac
156
- # stats.ctime
157
- ret[:stat_mtime] = stats.mtime # eg, 2022-03-05 21:47:51 +0100 Returns the modification time of stat.
158
- ret[:stat_ctime] = stats.ctime # eg, 2022-03-05 21:47:51 +0100 Returns the change time for stat (that is, the time directory information about the file was changed, not the file itself). Note that on Windows (NTFS), returns creation time (birth time).
159
- ret[:stat_atime] = stats.atime # eg, 2022-03-05 21:47:51 +0100 Last Access
160
- ret[:stat_gid] = stats.gid # Group Id
161
- ret[:stat_uid] = stats.uid # UUID
162
- ret[:stat_ftype] = stats.ftype #
163
-
164
- ret[:md5] = '______________NONE______________'
165
- if stats.ftype != "directory"
166
- file_content = File.read(file)
167
- ret[:md5] = Digest::MD5.hexdigest(file_content) # rescue 'none ' #=> string with hexadecimal digits
168
- end
169
- rescue Errno::EISDIR => e
170
- #puts "It's a dir, nothing I can do here except skipping the stuff"
171
- ret[:md5] = "_________I'm a dir sorry________"
172
- rescue Exception => e
173
- ret[:error] = e
174
- end
175
- ret
109
+ ret = {}
110
+ ret[:name] = file
111
+
112
+ begin
113
+ stats = File.stat(file)
114
+ ret[:stats_object] = stats # TODO deprecate
115
+ deb("Stats methods: #{stats.methods.sort.join(', ')}")
116
+ deb(stats.ctime)
117
+ # puts(stats.birthtime rescue (stats.ctime))
118
+ # On Mac/Linux: see test/dumps/***.yaml
119
+ ret[:size] = stats.size
120
+ ret[:mode] = stats.mode
121
+ # datetimes
122
+ ret[:stat_safebirthtime] = getSafeCreationTime(file) # in Mac uses birthtime,but on Linux wont work
123
+ # defined?(stats.birthtime) ? # rescue stats.mtime # eg, 2022-03-05 21:47:51 +0100 on Mac (not implemented on my Linuix)#
124
+ # stats.birthtime : # works on Mac
125
+ # stats.ctime
126
+ ret[:stat_mtime] = stats.mtime # eg, 2022-03-05 21:47:51 +0100 Returns the modification time of stat.
127
+ ret[:stat_ctime] = stats.ctime # eg, 2022-03-05 21:47:51 +0100 Returns the change time for stat (that is, the time directory information about the file was changed, not the file itself). Note that on Windows (NTFS), returns creation time (birth time).
128
+ ret[:stat_atime] = stats.atime # eg, 2022-03-05 21:47:51 +0100 Last Access
129
+ ret[:stat_gid] = stats.gid # Group Id
130
+ ret[:stat_uid] = stats.uid # UUID
131
+ ret[:stat_ftype] = stats.ftype #
132
+
133
+ ret[:md5] = '______________NONE______________'
134
+ if stats.ftype != "directory"
135
+ file_content = File.read(file)
136
+ ret[:md5] = Digest::MD5.hexdigest(file_content) # rescue 'none ' #=> string with hexadecimal digits
137
+ end
138
+ rescue Errno::EISDIR => e
139
+ # puts "It's a dir, nothing I can do here except skipping the stuff"
140
+ ret[:md5] = "_________I'm a dir sorry________"
141
+ rescue Exception => e
142
+ ret[:error] = e
143
+ end
144
+ ret
176
145
  end
177
146
 
178
147
  $print_stats_and_md5_version = "1.1a_220624_F" # files
179
148
  $print_stats_and_md5_counter = 0
180
- $print_stats_and_md5_for_gcs_version = "1.1alpha_220628_G" #GCS bucket
149
+ $print_stats_and_md5_for_gcs_version = "1.1alpha_220628_G" # GCS bucket
181
150
 
182
151
  def stats_and_md5_number_of_files_processed()
183
- return $print_stats_and_md5_counter
152
+ return $print_stats_and_md5_counter
184
153
  end
185
154
 
186
155
  def smells_like_gcs?(file)
187
- file =~ /gs:\/\//
156
+ file =~ /gs:\/\//
188
157
  end
189
158
 
190
159
  # You give me gs://bucket123/path/to/dir
191
- def print_stats_and_md5_for_gcs(mybucket_with_subdir, opts={})
192
- # opts_color = opts.fetch :color, true#
193
- # opts_verbose = opts.fetch :verbose, false
194
- # opts_ping_frequency = opts.fetch :ping_frequency, 50
195
- opts_max_files = opts.fetch :max_files, nil # BigDecimal('Infinity')
196
- opts_autowrite = opts.fetch :autowrite, false
197
-
198
- raise "Wrong GCS Path: #{mybucket_with_subdir}" unless smells_like_gcs?(mybucket_with_subdir)
199
- #puts "0. mybucket_with_subdir: #{mybucket_with_subdir}" # gs://mybucket123/path/to/dir
200
- path_split = mybucket_with_subdir.split('/')
201
- mybucket = path_split[0,3].join('/') # gs://mybucket123/
202
- gcs_subpath = path_split[3,42].join('/') # path/to/dir
203
- cleanedup_bucket = path_split[2] # mybucket123
204
- #puts "1. mybucket: #{mybucket}"
205
- #puts "2. gcs_subpath: #{gcs_subpath}"
206
- #puts "3. cleanedup_bucket: #{cleanedup_bucket}"
207
-
208
- puts("[print_stats_and_md5_for_gcs] version=#{$print_stats_and_md5_for_gcs_version} host=#{Socket.gethostname}(#{`uname`.chomp}) created_on='#{Time.now}' bucket=#{mybucket} subpath=#{gcs_subpath}")
209
- begin
210
- require "google/cloud/storage"
211
- project_id = `gcloud config get project 2>/dev/null`
212
- deb "project_id: #{project_id}"
213
- storage = Google::Cloud::Storage.new(project_id: project_id)
214
- bucket = storage.bucket(cleanedup_bucket)
215
- # grabs them ALL if nil, optherwise its an integer.
216
- files = opts_max_files.nil? ?
217
- bucket.files : # nil -> all of them (I've tried sth more idiomatic like -1 or nil or Infinite. they all failed.)
218
- bucket.files.first(opts_max_files) # not nil -> first(N)
219
- #puts(files)
220
- deb("All Sorted Methods: #{files.first.methods.sort}")
221
- files.each do |gcs_file|
222
- md5 = Base64.decode64(gcs_file.md5).unpack('H*')[0] # md5 in base64 -> then de-binarizied: Base64.urlsafe_decode64(md5).unpack('H*')[0]
223
- size = gcs_file.size # crc rescue :boh
224
- time_created = gcs_file.created_at
225
- mode = 'XXXXXX' # for compatbility with files
226
- file_type = gcs_file.name.to_s =~ /\/$/ ? 'd' : 'f' # if ends with a slash its a DIR :P
227
- #puts("[OLD_GCS_v11] #{md5} #{size}\t#{time_created} [#{gcs_file.content_type}]\t#{gcs_file.name}")
228
- print_colored_polymoprhic('gcs_v1.2', md5, mode, file_type, size, gcs_file.content_type, time_created, gcs_file.name, opts)
229
- deb gcs_file.to_yaml
230
- end
231
- #puts("Hash Methods: #{files.first.methods.select{|x| x.match /hash/}}")
232
- rescue LoadError # require Exception
233
- puts 'Error with library #{$!}, maybe type: gem install google-cloud-storage'
234
- rescue Exception # generic Exception
235
- puts "print_stats_and_md5_for_gcs(): Generic Error: #{$!}"
236
- exit 1
237
- end
238
-
239
- autowrite_to_dir_or_gcs(:gcs, mybucket_with_subdir) if opts_autowrite
240
- end
160
+ def print_stats_and_md5_for_gcs(mybucket_with_subdir, opts = {})
161
+ # opts_color = opts.fetch :color, true#
162
+ # opts_verbose = opts.fetch :verbose, false
163
+ # opts_ping_frequency = opts.fetch :ping_frequency, 50
164
+ opts_max_files = opts.fetch :max_files, nil # BigDecimal('Infinity')
165
+ opts_autowrite = opts.fetch :autowrite, false
166
+
167
+ raise "Wrong GCS Path: #{mybucket_with_subdir}" unless smells_like_gcs?(mybucket_with_subdir)
168
+
169
+ # puts "0. mybucket_with_subdir: #{mybucket_with_subdir}" # gs://mybucket123/path/to/dir
170
+ path_split = mybucket_with_subdir.split('/')
171
+ mybucket = path_split[0, 3].join('/') # gs://mybucket123/
172
+ gcs_subpath = path_split[3, 42].join('/') # path/to/dir
173
+ cleanedup_bucket = path_split[2] # mybucket123
174
+ # puts "1. mybucket: #{mybucket}"
175
+ # puts "2. gcs_subpath: #{gcs_subpath}"
176
+ # puts "3. cleanedup_bucket: #{cleanedup_bucket}"
177
+
178
+ puts("[print_stats_and_md5_for_gcs] version=#{$print_stats_and_md5_for_gcs_version} host=#{Socket.gethostname}(#{`uname`.chomp}) created_on='#{Time.now}' bucket=#{mybucket} subpath=#{gcs_subpath}")
179
+ begin
180
+ require "google/cloud/storage"
181
+ project_id = `gcloud config get project 2>/dev/null`
182
+ deb "project_id: #{project_id}"
183
+ storage = Google::Cloud::Storage.new(project_id: project_id)
184
+ bucket = storage.bucket(cleanedup_bucket)
185
+ # grabs them ALL if nil, optherwise its an integer.
186
+ files = opts_max_files.nil? ?
187
+ bucket.files : # nil -> all of them (I've tried sth more idiomatic like -1 or nil or Infinite. they all failed.)
188
+ bucket.files.first(opts_max_files) # not nil -> first(N)
189
+ # puts(files)
190
+ deb("All Sorted Methods: #{files.first.methods.sort}")
191
+ files.each do |gcs_file|
192
+ md5 = Base64.decode64(gcs_file.md5).unpack('H*')[0] # md5 in base64 -> then de-binarizied: Base64.urlsafe_decode64(md5).unpack('H*')[0]
193
+ size = gcs_file.size # crc rescue :boh
194
+ time_created = gcs_file.created_at
195
+ mode = 'XXXXXX' # for compatbility with files
196
+ file_type = gcs_file.name.to_s =~ /\/$/ ? 'd' : 'f' # if ends with a slash its a DIR :P
197
+ # puts("[OLD_GCS_v11] #{md5} #{size}\t#{time_created} [#{gcs_file.content_type}]\t#{gcs_file.name}")
198
+ print_colored_polymoprhic('gcs_v1.2', md5, mode, file_type, size, gcs_file.content_type, time_created,
199
+ gcs_file.name, opts)
200
+ deb gcs_file.to_yaml
201
+ end
202
+ # puts("Hash Methods: #{files.first.methods.select{|x| x.match /hash/}}")
203
+ rescue LoadError # require Exception
204
+ puts 'Error with library #{$!}, maybe type: gem install google-cloud-storage'
205
+ rescue Exception # generic Exception
206
+ puts "print_stats_and_md5_for_gcs(): Generic Error: #{$!}"
207
+ exit 1
208
+ end
209
+ autowrite_to_dir_or_gcs(:gcs, mybucket_with_subdir) if opts_autowrite
210
+ end
241
211
 
242
212
  # Generic polymorpghic coloring, can be GCS. Would have been nice to pass a Dict so easier to extend, but Ruby doesnt support doesblt Dict with magic args. Plus might be a GOOD
243
213
  # thing its hard to change since it breaks Storazzo.
244
214
  # TODO(ricc): make sure the dates are the same. Currently:
245
215
  # GCS Date: 2021-12-20T12:26:13+00:00 DateTime
246
216
  # File Date: 2022-05-30 11:55:42 +0200 Time
247
- def print_colored_polymoprhic(entity_type, md5, mode, file_type, size, content_type, creation_time, filename, opts={})
248
- opts_color = opts.fetch :color, true
249
- opts_verbose = opts.fetch :verbose, false
250
- opts_ping_frequency = opts.fetch :ping_frequency, 50
251
- opts_autowrite = opts.fetch :autowrite, false
252
-
253
- # Increment counter across all!
254
- $print_stats_and_md5_counter += 1 # global counter! I should increment at the END of fucntion.. but you never know if i exit wrongly so i do at beginning. so within this function the real #files is N-1 (i.e., if N is 6, we're processing file %5)
255
- # in main i had to put 2 branched to invoke a single thing but if logic changes that sentence might be printed twice. Instead here, the BEGIN line could be nice to do here instead.
256
- #$stderr.puts("print_stats_and_md5() Fikst invocation! Consider moving the 2 things in main here :)") if ($print_stats_and_md5_counter == 1)
257
- #puts("[print_stats_and_md5] version=#{$print_stats_and_md5_version} host=#{Socket.gethostname}(#{`uname`.chomp}) created_on=#{Time.now}") if ($print_stats_and_md5_counter == 1)
258
- $stderr.puts "-- print_stats_and_md5(v#{$print_stats_and_md5_version}): #{$print_stats_and_md5_counter - 1} files processed --" if (($print_stats_and_md5_counter) % opts_ping_frequency == 1 )
259
-
260
- maybecolored_md5 = opts_color ? red(md5) : md5
261
- maybecolored_filename = opts_color ? azure(filename) : filename
262
- enlarged_size = sprintf("%7o", size.to_s ) # or SIZE
263
- maybecolored_size = opts_color ? yellow(enlarged_size) : enlarged_size
264
- maybecolored_file_type = file_type
265
- colored_content_type = opts_color ? white(content_type) : content_type
266
- if opts_color
267
- maybecolored_file_type = case file_type
268
- when 'f' then green(file_type)
269
- when 'd' then blue(file_type)
270
- when 's' then azure(file_type)
271
- else red(file_type)
272
- end
273
- end
274
- standardized_creation_time = creation_time.is_a?(DateTime) ?
275
- creation_time :
276
- DateTime.parse(creation_time.to_s) # if you prefer to use Time since its already supported by Storazzo (but its ugly since it has SPACES! so hard to parse) use tt = Time.parse(d.to_s)
277
- # as per https://stackoverflow.com/questions/279769/convert-to-from-datetime-and-time-in-ruby
278
- #puts "#{maybecolored_md5} #{mode} #{maybecolored_file_type} #{maybecolored_size}\t#{creation_time}(DEB #{creation_time.class})(DEB2 #{DateTime.parse(creation_time.to_s)}) [#{colored_content_type}] #{maybecolored_filename}"
279
- # this mightr be colored
280
- str = "[#{entity_type}] #{maybecolored_md5} #{mode} #{maybecolored_file_type} #{standardized_creation_time} #{maybecolored_size} [#{colored_content_type}] #{maybecolored_filename}\n"
281
- # this will NEVER be colored. WARNING, now you need to maintain TWO of these beasts :/
282
- non_colored_str = "[#{entity_type}] #{md5} #{mode} #{file_type} #{standardized_creation_time} #{enlarged_size} [#{content_type}] #{filename}\n"
283
- if(opts_autowrite)
284
- # need to guarantee this is NOT colored. The easiest way to do it is TWICVE as expensive but... whatevs.
285
- # TODO(ricc): fire me for this lazimness!
286
- $autowrite_file += (non_colored_str)
287
- end
288
- print(str)
289
- return str
217
+ def print_colored_polymoprhic(entity_type, md5, mode, file_type, size, content_type, creation_time, filename, opts = {})
218
+ opts_color = opts.fetch :color, true
219
+ opts_verbose = opts.fetch :verbose, false
220
+ opts_ping_frequency = opts.fetch :ping_frequency, 50
221
+ opts_autowrite = opts.fetch :autowrite, false
222
+
223
+ # Increment counter across all!
224
+ $print_stats_and_md5_counter += 1 # global counter! I should increment at the END of fucntion.. but you never know if i exit wrongly so i do at beginning. so within this function the real #files is N-1 (i.e., if N is 6, we're processing file %5)
225
+ # in main i had to put 2 branched to invoke a single thing but if logic changes that sentence might be printed twice. Instead here, the BEGIN line could be nice to do here instead.
226
+ # $stderr.puts("print_stats_and_md5() Fikst invocation! Consider moving the 2 things in main here :)") if ($print_stats_and_md5_counter == 1)
227
+ # puts("[print_stats_and_md5] version=#{$print_stats_and_md5_version} host=#{Socket.gethostname}(#{`uname`.chomp}) created_on=#{Time.now}") if ($print_stats_and_md5_counter == 1)
228
+ $stderr.puts "-- print_stats_and_md5(v#{$print_stats_and_md5_version}): #{$print_stats_and_md5_counter - 1} files processed --" if (($print_stats_and_md5_counter) % opts_ping_frequency == 1)
229
+
230
+ maybecolored_md5 = opts_color ? red(md5) : md5
231
+ maybecolored_filename = opts_color ? azure(filename) : filename
232
+ enlarged_size = sprintf("%7o", size.to_s) # or SIZE
233
+ maybecolored_size = opts_color ? yellow(enlarged_size) : enlarged_size
234
+ maybecolored_file_type = file_type
235
+ colored_content_type = opts_color ? white(content_type) : content_type
236
+ if opts_color
237
+ maybecolored_file_type = case file_type
238
+ when 'f' then green(file_type)
239
+ when 'd' then blue(file_type)
240
+ when 's' then azure(file_type)
241
+ else red(file_type)
242
+ end
243
+ end
244
+ standardized_creation_time = creation_time.is_a?(DateTime) ?
245
+ creation_time :
246
+ DateTime.parse(creation_time.to_s) # if you prefer to use Time since its already supported by Storazzo (but its ugly since it has SPACES! so hard to parse) use tt = Time.parse(d.to_s)
247
+ # as per https://stackoverflow.com/questions/279769/convert-to-from-datetime-and-time-in-ruby
248
+ # puts "#{maybecolored_md5} #{mode} #{maybecolored_file_type} #{maybecolored_size}\t#{creation_time}(DEB #{creation_time.class})(DEB2 #{DateTime.parse(creation_time.to_s)}) [#{colored_content_type}] #{maybecolored_filename}"
249
+ # this mightr be colored
250
+ str = "[#{entity_type}] #{maybecolored_md5} #{mode} #{maybecolored_file_type} #{standardized_creation_time} #{maybecolored_size} [#{colored_content_type}] #{maybecolored_filename}\n"
251
+ # this will NEVER be colored. WARNING, now you need to maintain TWO of these beasts :/
252
+ non_colored_str = "[#{entity_type}] #{md5} #{mode} #{file_type} #{standardized_creation_time} #{enlarged_size} [#{content_type}] #{filename}\n"
253
+ if (opts_autowrite)
254
+ # need to guarantee this is NOT colored. The easiest way to do it is TWICVE as expensive but... whatevs.
255
+ # TODO(ricc): fire me for this lazimness!
256
+ $autowrite_file += (non_colored_str)
257
+ end
258
+ print(str)
259
+ return str
290
260
  end
291
261
 
292
- def print_stats_and_md5(file, opts={})
293
- return print_stats_and_md5_for_gcs(file, opts) if smells_like_gcs?(file)
294
-
295
- opts_color = opts.fetch :color, true
296
- opts_verbose = opts.fetch :verbose, false
297
- opts_ping_frequency = opts.fetch :ping_frequency, 50
298
-
299
-
300
- # $print_stats_and_md5_counter += 1 # global counter! I should increment at the END of fucntion.. but you never know if i exit wrongly so i do at beginning. so within this function the real #files is N-1 (i.e., if N is 6, we're processing file %5)
301
-
302
- # # in main i had to put 2 branched to invoke a single thing but if logic changes that sentence might be printed twice. Instead here, the BEGIN line could be nice to do here instead.
303
- # #$stderr.puts("print_stats_and_md5() Fikst invocation! Consider moving the 2 things in main here :)") if ($print_stats_and_md5_counter == 1)
304
- # puts("[print_stats_and_md5] version=#{$print_stats_and_md5_version} host=#{Socket.gethostname}(#{`uname`.chomp}) created_on=#{Time.now}") if ($print_stats_and_md5_counter == 1)
305
- # $stderr.puts "-- print_stats_and_md5(v#{$print_stats_and_md5_version}): #{$print_stats_and_md5_counter - 1} files processed --" if (($print_stats_and_md5_counter) % opts_ping_frequency == 1 )
306
-
307
- #puts "print_stats_and_md5: #{file}" if opts_verbose
308
- stats = compute_stats_and_md5 file
309
- #maybecolored_md5 = opts_color ? red(stats[:md5]) : stats[:md5]
310
- #maybecolored_filename = opts_color ? azure(stats[:name]) : stats[:name]
311
- #maybecolored_size = opts_color ? white(stats[:size]) : stats[:size]
312
- mode = sprintf("%06o", stats[:mode] ) rescue :ERROR # .to_s.right(4) #=> "100644"
313
- file_type = stats[:stat_ftype][0] rescue '?'
314
- file_type = 's' if File.symlink?(file)
315
- content_type = `file --mime-type -b '#{file}' 2>/dev/null`.chomp # .split(': ',2)[1]
316
-
317
- #colored_string = "[COL] #{red stats[:md5]} #{stats[:size]} #{stats[:stat_safebirthtime]} #{white stats[:name]}"
318
- #boring_string = "[B/W] #{ stats[:md5]} #{stats[:size]} #{stats[:stat_safebirthtime]} #{ stats[:name]}"
319
- #puts(opts_color ? colored_string : boring_string)
320
- #puts "[OLDv1.1deprecated] #{maybecolored_md5} #{mode} #{file_type} #{maybecolored_size} #{stats[:stat_safebirthtime]} #{maybecolored_filename}"
321
- print_colored_polymoprhic('file_v1.2', stats[:md5], mode, file_type, stats[:size], content_type, stats[:stat_safebirthtime], stats[:name], opts)
262
+ def print_stats_and_md5(file, opts = {})
263
+ return print_stats_and_md5_for_gcs(file, opts) if smells_like_gcs?(file)
264
+
265
+ opts_color = opts.fetch :color, true
266
+ opts_verbose = opts.fetch :verbose, false
267
+ opts_ping_frequency = opts.fetch :ping_frequency, 50
268
+
269
+ # $print_stats_and_md5_counter += 1 # global counter! I should increment at the END of fucntion.. but you never know if i exit wrongly so i do at beginning. so within this function the real #files is N-1 (i.e., if N is 6, we're processing file %5)
270
+
271
+ # # in main i had to put 2 branched to invoke a single thing but if logic changes that sentence might be printed twice. Instead here, the BEGIN line could be nice to do here instead.
272
+ # #$stderr.puts("print_stats_and_md5() Fikst invocation! Consider moving the 2 things in main here :)") if ($print_stats_and_md5_counter == 1)
273
+ # puts("[print_stats_and_md5] version=#{$print_stats_and_md5_version} host=#{Socket.gethostname}(#{`uname`.chomp}) created_on=#{Time.now}") if ($print_stats_and_md5_counter == 1)
274
+ # $stderr.puts "-- print_stats_and_md5(v#{$print_stats_and_md5_version}): #{$print_stats_and_md5_counter - 1} files processed --" if (($print_stats_and_md5_counter) % opts_ping_frequency == 1 )
275
+
276
+ # puts "print_stats_and_md5: #{file}" if opts_verbose
277
+ stats = compute_stats_and_md5 file
278
+ # maybecolored_md5 = opts_color ? red(stats[:md5]) : stats[:md5]
279
+ # maybecolored_filename = opts_color ? azure(stats[:name]) : stats[:name]
280
+ # maybecolored_size = opts_color ? white(stats[:size]) : stats[:size]
281
+ mode = sprintf("%06o", stats[:mode]) rescue :ERROR # .to_s.right(4) #=> "100644"
282
+ file_type = stats[:stat_ftype][0] rescue '?'
283
+ file_type = 's' if File.symlink?(file)
284
+ content_type = `file --mime-type -b '#{file}' 2>/dev/null`.chomp # .split(': ',2)[1]
285
+
286
+ # colored_string = "[COL] #{red stats[:md5]} #{stats[:size]} #{stats[:stat_safebirthtime]} #{white stats[:name]}"
287
+ # boring_string = "[B/W] #{ stats[:md5]} #{stats[:size]} #{stats[:stat_safebirthtime]} #{ stats[:name]}"
288
+ # puts(opts_color ? colored_string : boring_string)
289
+ # puts "[OLDv1.1deprecated] #{maybecolored_md5} #{mode} #{file_type} #{maybecolored_size} #{stats[:stat_safebirthtime]} #{maybecolored_filename}"
290
+ print_colored_polymoprhic('file_v1.2', stats[:md5], mode, file_type, stats[:size], content_type,
291
+ stats[:stat_safebirthtime], stats[:name], opts)
322
292
  end
323
293
 
324
- def autowrite_to_dir_or_gcs(fs_type, path, opts={})
325
- autowrite_version = "1.1_28jun22"
326
- file_to_save = "stats-with-md5.log"
327
- path_to_save = path + "/" + file_to_save
328
- puts(red "TODO(ricc): write results [size=#{$autowrite_file.size}] in this Directory: #{path_to_save}") if fs_type == :dir
329
- puts(red "TODO(ricc): write results [size=#{$autowrite_file.size}] in this GCS Bucket or Dir: #{path_to_save}") if fs_type == :gcs
330
-
331
- ##########################################################################
332
- # do the thing: creates file locally and moves to GCS or Dir.
333
- ##########################################################################
334
- $autowrite_file += "# [autowrite_to_dir_or_gcs v#{autowrite_version}]: about to write it now. #{Time.now}"
335
- tmpfile = Tempfile.new(path_to_save)
336
- tmpfile.write($autowrite_file)
337
- cmd = :no_cmd_yet
338
-
339
- if fs_type == :gcs
340
- cmd = "gsutil mv '#{tmpfile.path}' #{path}/#{file_to_save}"
341
- end
342
- if fs_type == :dir
343
- cmd = "mv '#{tmpfile.path}' #{path}/#{file_to_save}"
344
- end
345
-
346
- puts("TMP File is this big: #{azure tmpfile.size}")
347
- puts("About to execute this: #{white cmd}")
348
- ret = `#{cmd}`
349
- if $?.exitstatus != 0
350
- puts "Error to execute: #{red cmd}. Exit: #{red $?.exitstatus}"
351
- exit 53
352
- end
353
- puts "Command returned: '#{$?.exitstatus}'" # - #{$?.class}"
354
- # wow: fork { exec("ls") }
355
-
356
- # removes the tmpfile :)
357
- tmpfile.close
358
- tmpfile.unlink
359
-
360
- #####################################
361
- # finished, lets now clean up the file...
362
- #####################################
363
- reset_autowrite_file()
364
- return :ok
294
+ def autowrite_to_dir_or_gcs(fs_type, path, opts = {})
295
+ autowrite_version = "1.1_28jun22"
296
+ file_to_save = "stats-with-md5.log"
297
+ path_to_save = path + "/" + file_to_save
298
+ puts(red "TODO(ricc): write results [size=#{$autowrite_file.size}] in this Directory: #{path_to_save}") if fs_type == :dir
299
+ puts(red "TODO(ricc): write results [size=#{$autowrite_file.size}] in this GCS Bucket or Dir: #{path_to_save}") if fs_type == :gcs
300
+
301
+ ##########################################################################
302
+ # do the thing: creates file locally and moves to GCS or Dir.
303
+ ##########################################################################
304
+ $autowrite_file += "# [autowrite_to_dir_or_gcs v#{autowrite_version}]: about to write it now. #{Time.now}"
305
+ tmpfile = Tempfile.new(path_to_save)
306
+ tmpfile.write($autowrite_file)
307
+ cmd = :no_cmd_yet
308
+
309
+ if fs_type == :gcs
310
+ cmd = "gsutil mv '#{tmpfile.path}' #{path}/#{file_to_save}"
311
+ end
312
+ if fs_type == :dir
313
+ cmd = "mv '#{tmpfile.path}' #{path}/#{file_to_save}"
314
+ end
315
+
316
+ puts("TMP File is this big: #{azure tmpfile.size}")
317
+ puts("About to execute this: #{white cmd}")
318
+ ret = `#{cmd}`
319
+ if $?.exitstatus != 0
320
+ puts "Error to execute: #{red cmd}. Exit: #{red $?.exitstatus}"
321
+ exit 53
322
+ end
323
+ puts "Command returned: '#{$?.exitstatus}'" # - #{$?.class}"
324
+ # wow: fork { exec("ls") }
325
+
326
+ # removes the tmpfile :)
327
+ tmpfile.close
328
+ tmpfile.unlink
329
+
330
+ #####################################
331
+ # finished, lets now clean up the file...
332
+ #####################################
333
+ reset_autowrite_file()
334
+ return :ok
365
335
  end
366
336
 
367
- def print_stats_and_md5_for_directory(directory_to_explore_recursively, opts={})
368
- puts "# [print_stats_and_md5_for_directory] DIR to explore (make sure you glob also): #{directory_to_explore_recursively }"
369
- puts "# DEB Options: #{opts}"
337
+ def print_stats_and_md5_for_directory(directory_to_explore_recursively, opts = {})
338
+ puts "# [print_stats_and_md5_for_directory] DIR to explore (make sure you glob also): #{directory_to_explore_recursively}"
339
+ puts "# DEB Options: #{opts}"
370
340
 
371
- opts_autowrite = opts.fetch :autowrite, false
341
+ opts_autowrite = opts.fetch :autowrite, false
372
342
 
373
- Dir.glob("#{(directory_to_explore_recursively)}/**/*") do |globbed_filename|
374
- # Do work on files & directories ending in .rb
375
- #puts "[deb] #{globbed_filename}"
376
- print_stats_and_md5(globbed_filename, :color => $opts[:color], :verbose => $opts[:verbose], :autowrite => $opts[:autowrite])
377
- end
343
+ Dir.glob("#{(directory_to_explore_recursively)}/**/*") do |globbed_filename|
344
+ # Do work on files & directories ending in .rb
345
+ # puts "[deb] #{globbed_filename}"
346
+ print_stats_and_md5(globbed_filename, :color => $opts[:color], :verbose => $opts[:verbose],
347
+ :autowrite => $opts[:autowrite])
348
+ end
378
349
 
379
- autowrite_to_dir_or_gcs(:dir, directory_to_explore_recursively) if opts_autowrite
380
- end
350
+ autowrite_to_dir_or_gcs(:dir, directory_to_explore_recursively) if opts_autowrite
351
+ end
381
352
 
382
353
  def real_program
383
354
  t0 = Time.now
384
355
  deb "+ Options are: #{gray $opts}"
385
356
  deb "+ Depured args: #{azure ARGV}"
386
357
  # Your code goes here...
387
- #puts "Description: '''#{white $myconf[:description] }'''"
388
- #puts("[print_stats_and_md5] version=#{$print_stats_and_md5_version} host=#{Socket.gethostname}(#{`uname`.chomp}) created_on=#{Time.now}") if ARGV.size > 0
358
+ # puts "Description: '''#{white $myconf[:description] }'''"
359
+ # puts("[print_stats_and_md5] version=#{$print_stats_and_md5_version} host=#{Socket.gethostname}(#{`uname`.chomp}) created_on=#{Time.now}") if ARGV.size > 0
389
360
 
390
361
  common_opts = {
391
- :max_files => $opts[:max_files],
392
- :color => $opts[:color],
393
- :verbose => $opts[:verbose],
394
- :autowrite => $opts[:autowrite],
362
+ :max_files => $opts[:max_files],
363
+ :color => $opts[:color],
364
+ :verbose => $opts[:verbose],
365
+ :autowrite => $opts[:autowrite],
395
366
  }
396
367
 
397
368
  if ARGV.size == 1
398
- directory_to_explore_recursively = ARGV[0]
369
+ directory_to_explore_recursively = ARGV[0]
399
370
  if smells_like_gcs?(directory_to_explore_recursively)
400
- print_stats_and_md5_for_gcs(directory_to_explore_recursively, common_opts)
401
- else # normal file..
402
- print_stats_and_md5_for_directory(directory_to_explore_recursively, common_opts)
403
- end
371
+ print_stats_and_md5_for_gcs(directory_to_explore_recursively, common_opts)
372
+ else # normal file..
373
+ print_stats_and_md5_for_directory(directory_to_explore_recursively, common_opts)
374
+ end
404
375
  elsif ARGV.size > 1
405
- deb "2. I expect a lot of single files or directories:"
376
+ deb "2. I expect a lot of single files or directories:"
406
377
  for arg in ARGV
407
- if File.directory?(arg)
408
- print_stats_and_md5_for_directory(arg, common_opts )
409
- else
410
- print_stats_and_md5(arg, common_opts)
411
- end
378
+ if File.directory?(arg)
379
+ print_stats_and_md5_for_directory(arg, common_opts)
380
+ else
381
+ print_stats_and_md5(arg, common_opts)
382
+ end
412
383
  end
413
384
  else
414
- puts "No args given. Exiting"
415
- exit 41
385
+ puts "No args given. Exiting"
386
+ exit 41
416
387
  end
417
388
  tf = Time.now
418
- puts "# [#{File.basename $0}] Time taken for processing #{stats_and_md5_number_of_files_processed} files: #{tf-t0}"
389
+ puts "# [#{File.basename $0}] Time taken for processing #{stats_and_md5_number_of_files_processed} files: #{tf - t0}"
419
390
  end
420
391
 
421
392
  def main(filename)