storazzo 0.4.10 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
data/bin/stats-with-md5 CHANGED
@@ -3,7 +3,7 @@
3
3
  require 'digest/md5'
4
4
  require 'digest'
5
5
  require 'socket'
6
- require 'optparse' # http://ruby.about.com/od/advancedruby/a/optionparser.htm
6
+ require 'optparse' # http://ruby.about.com/od/advancedruby/a/optionparser.htm
7
7
  require 'date' # for DateTime
8
8
  require 'tempfile'
9
9
 
@@ -30,36 +30,34 @@ DEFAULT_MAX_FILES = 'Infinite'
30
30
 
31
31
  =end
32
32
 
33
-
34
-
35
- def deb(s); puts "#DEB #{s}" if $DEBUG; end
33
+ def deb(s); puts "#DEB #{s}" if $DEBUG; end
36
34
  # colors 16
37
- def gray(s) "\033[1;30m#{s}\033[0m" ; end
38
- def green(s) "\033[1;32m#{s}\033[0m" ; end
39
- def red(s) "\033[1;31m#{s}\033[0m" ; end
40
- def yellow(s) "\033[1;33m#{s}\033[0m" ; end
41
- def blue(s) "\033[1;34m#{s}\033[0m" ; end
42
- def purple(s) "\033[1;35m#{s}\033[0m" ; end
43
- def azure(s) "\033[1;36m#{s}\033[0m" ; end
44
- def white(s) "\033[1;37m#{s}\033[0m" ; end
35
+ def gray(s) "\033[1;30m#{s}\033[0m"; end
36
+ def green(s) "\033[1;32m#{s}\033[0m"; end
37
+ def red(s) "\033[1;31m#{s}\033[0m"; end
38
+ def yellow(s) "\033[1;33m#{s}\033[0m"; end
39
+ def blue(s) "\033[1;34m#{s}\033[0m"; end
40
+ def purple(s) "\033[1;35m#{s}\033[0m"; end
41
+ def azure(s) "\033[1;36m#{s}\033[0m"; end
42
+ def white(s) "\033[1;37m#{s}\033[0m"; end
45
43
 
46
44
  # colors 64k
47
- def orange(s) "\033[38;5;208m#{s}\033[0m" ; end
45
+ def orange(s) "\033[38;5;208m#{s}\033[0m"; end
48
46
 
49
47
  # Program constants, automagically picked up by RicLib
50
48
  # More configuration could be written in:
51
49
  # $GIC/etc/ricsvn/<FILENAME>.yml
52
50
  # That would go into the variable '$prog_conf_d'
53
51
  $myconf = {
54
- :app_name => "AppName should be sth like #{$0}",
55
- :description => "
52
+ :app_name => "AppName should be sth like #{$0}",
53
+ :description => "
56
54
  This program gets stats from Ruby File.Stats library
57
55
  plus computes MD5 or CRC32 of the file.
58
- And bakes it in a way that can be easily parsed.
56
+ And bakes it in a way that can be easily parsed.
59
57
  ".strip.gsub(/^\s+/, "").gsub(/\s+$/, ""),
60
58
  }
61
59
 
62
- def usage(comment=nil)
60
+ def usage(comment = nil)
63
61
  puts white($optparse.banner)
64
62
  puts($optparse.summarize)
65
63
  puts("Description: " + gray($myconf[:description]))
@@ -68,12 +66,12 @@ def usage(comment=nil)
68
66
  end
69
67
 
70
68
  def reset_autowrite_file()
71
- $autowrite_file = "# #{File.basename $0} Autowrite v1.0 #{Time.now} reincarnation=#{$reset_autowrite_file_reincarnation}\n"
72
- $reset_autowrite_file_reincarnation += 1 # ($reset_autowrite_file_reincarnation + 1) # rescue 1
73
- end
69
+ $autowrite_file = "# #{File.basename $0} Autowrite v1.0 #{Time.now} reincarnation=#{$reset_autowrite_file_reincarnation}\n"
70
+ $reset_autowrite_file_reincarnation += 1 # ($reset_autowrite_file_reincarnation + 1) # rescue 1
71
+ end
74
72
 
75
73
  # include it in main if you want a custome one
76
- def init() # see lib_autoinit in lib/util.rb
74
+ def init() # see lib_autoinit in lib/util.rb
77
75
  $opts = {}
78
76
  # setting defaults
79
77
  $opts[:verbose] = false
@@ -87,309 +85,308 @@ def init() # see lib_autoinit in lib/util.rb
87
85
 
88
86
  $optparse = OptionParser.new do |opts|
89
87
  opts.banner = "#{$0} v.#{$PROG_VER}\n Usage: #{File.basename $0} [options] file1 file2 ..."
90
- opts.on( '-a', '--autowrite', 'automatically writes results to Root Folder (DFLT=false)' ) { $opts[:autowrite] = true }
91
- opts.on( '-c', '--no-color', 'disables color (DFLT=false, that is color enabled)' ) { $opts[:color] = false }
92
- opts.on( '-d', '--debug', 'enables debug (DFLT=false)' ) { $opts[:debug] = true ; $DEBUG = true }
93
- opts.on( '-h', '--help', 'Display this screen' ) { usage }
94
- opts.on( '-m', '--max-files NUMBER', "sets max files per Dir to X (DFLT=#{DEFAULT_MAX_FILES})" ) {|nfiles| $opts[:max_files] = nfiles.to_i }
95
- opts.on( '-n', '--dryrun', "Don't really execute code" ) { $opts[:dryrun] = true }
96
- opts.on( '-l', '--logfile FILE', 'Write log to FILE' ) {|file| $opts[:logfile] = file }
97
- opts.on( '-v', '--verbose', 'Output more information' ) { $opts[:verbose] = true}
88
+ opts.on('-a', '--autowrite', 'automatically writes results to Root Folder (DFLT=false)') {
89
+ $opts[:autowrite] = true
90
+ }
91
+ opts.on('-c', '--no-color', 'disables color (DFLT=false, that is color enabled)') { $opts[:color] = false }
92
+ opts.on('-d', '--debug', 'enables debug (DFLT=false)') { $opts[:debug] = true; $DEBUG = true }
93
+ opts.on('-h', '--help', 'Display this screen') { usage }
94
+ opts.on('-m', '--max-files NUMBER', "sets max files per Dir to X (DFLT=#{DEFAULT_MAX_FILES})") { |nfiles|
95
+ $opts[:max_files] = nfiles.to_i
96
+ }
97
+ opts.on('-n', '--dryrun', "Don't really execute code") { $opts[:dryrun] = true }
98
+ opts.on('-l', '--logfile FILE', 'Write log to FILE') { |file| $opts[:logfile] = file }
99
+ opts.on('-v', '--verbose', 'Output more information') { $opts[:verbose] = true }
98
100
  end
99
101
  $optparse.parse!
100
102
  end
101
103
 
102
104
  def getSafeCreationTime(file)
103
- File.ctime(file)
105
+ File.ctime(file)
104
106
  end
105
107
 
106
108
  def compute_stats_and_md5(file)
107
- ret={}
108
- ret[:name] = file
109
-
110
- begin
111
- stats = File.stat(file)
112
- ret[:stats_object] = stats # TODO deprecate
113
- #deb("Stats methods: #{stats.methods.sort.join(', ')}")
114
- #deb(stats.ctime)
115
- #puts(stats.birthtime rescue (stats.ctime))
116
- # On Mac/Linux: see test/dumps/***.yaml
117
- ret[:size] = stats.size
118
- ret[:mode] = stats.mode
119
- # datetimes
120
- ret[:stat_safebirthtime] = getSafeCreationTime(file) # in Mac uses birthtime,but on Linux wont work
121
- # defined?(stats.birthtime) ? # rescue stats.mtime # eg, 2022-03-05 21:47:51 +0100 on Mac (not implemented on my Linuix)#
122
- # stats.birthtime : # works on Mac
123
- # stats.ctime
124
- ret[:stat_mtime] = stats.mtime # eg, 2022-03-05 21:47:51 +0100 Returns the modification time of stat.
125
- ret[:stat_ctime] = stats.ctime # eg, 2022-03-05 21:47:51 +0100 Returns the change time for stat (that is, the time directory information about the file was changed, not the file itself). Note that on Windows (NTFS), returns creation time (birth time).
126
- ret[:stat_atime] = stats.atime # eg, 2022-03-05 21:47:51 +0100 Last Access
127
- ret[:stat_gid] = stats.gid # Group Id
128
- ret[:stat_uid] = stats.uid # UUID
129
- ret[:stat_ftype] = stats.ftype #
130
-
131
- ret[:md5] = '______________NONE______________'
132
- if stats.ftype != "directory"
133
- file_content = File.read(file)
134
- ret[:md5] = Digest::MD5.hexdigest(file_content) # rescue 'none ' #=> string with hexadecimal digits
135
- end
136
- rescue Errno::EISDIR => e
137
- #puts "It's a dir, nothing I can do here except skipping the stuff"
138
- ret[:md5] = "_________I'm a dir sorry________"
139
- rescue Exception => e
140
- ret[:error] = e
141
- end
142
- ret
109
+ ret = {}
110
+ ret[:name] = file
111
+
112
+ begin
113
+ stats = File.stat(file)
114
+ ret[:stats_object] = stats # TODO deprecate
115
+ deb("Stats methods: #{stats.methods.sort.join(', ')}")
116
+ deb(stats.ctime)
117
+ # puts(stats.birthtime rescue (stats.ctime))
118
+ # On Mac/Linux: see test/dumps/***.yaml
119
+ ret[:size] = stats.size
120
+ ret[:mode] = stats.mode
121
+ # datetimes
122
+ ret[:stat_safebirthtime] = getSafeCreationTime(file) # in Mac uses birthtime,but on Linux wont work
123
+ # defined?(stats.birthtime) ? # rescue stats.mtime # eg, 2022-03-05 21:47:51 +0100 on Mac (not implemented on my Linuix)#
124
+ # stats.birthtime : # works on Mac
125
+ # stats.ctime
126
+ ret[:stat_mtime] = stats.mtime # eg, 2022-03-05 21:47:51 +0100 Returns the modification time of stat.
127
+ ret[:stat_ctime] = stats.ctime # eg, 2022-03-05 21:47:51 +0100 Returns the change time for stat (that is, the time directory information about the file was changed, not the file itself). Note that on Windows (NTFS), returns creation time (birth time).
128
+ ret[:stat_atime] = stats.atime # eg, 2022-03-05 21:47:51 +0100 Last Access
129
+ ret[:stat_gid] = stats.gid # Group Id
130
+ ret[:stat_uid] = stats.uid # UUID
131
+ ret[:stat_ftype] = stats.ftype #
132
+
133
+ ret[:md5] = '______________NONE______________'
134
+ if stats.ftype != "directory"
135
+ file_content = File.read(file)
136
+ ret[:md5] = Digest::MD5.hexdigest(file_content) # rescue 'none ' #=> string with hexadecimal digits
137
+ end
138
+ rescue Errno::EISDIR => e
139
+ # puts "It's a dir, nothing I can do here except skipping the stuff"
140
+ ret[:md5] = "_________I'm a dir sorry________"
141
+ rescue Exception => e
142
+ ret[:error] = e
143
+ end
144
+ ret
143
145
  end
144
146
 
145
- $print_stats_and_md5_version = "1.1b_220805_F" # files
147
+ $print_stats_and_md5_version = "1.1a_220624_F" # files
146
148
  $print_stats_and_md5_counter = 0
147
- $print_stats_and_md5_for_gcs_version = "1.1alpha_220628_G" #GCS bucket
149
+ $print_stats_and_md5_for_gcs_version = "1.1alpha_220628_G" # GCS bucket
148
150
 
149
151
  def stats_and_md5_number_of_files_processed()
150
- return $print_stats_and_md5_counter
152
+ return $print_stats_and_md5_counter
151
153
  end
152
154
 
153
155
  def smells_like_gcs?(file)
154
- file =~ /gs:\/\//
156
+ file =~ /gs:\/\//
155
157
  end
156
158
 
157
159
  # You give me gs://bucket123/path/to/dir
158
- def print_stats_and_md5_for_gcs(mybucket_with_subdir, opts={})
159
- # opts_color = opts.fetch :color, true#
160
- # opts_verbose = opts.fetch :verbose, false
161
- # opts_ping_frequency = opts.fetch :ping_frequency, 50
162
- opts_max_files = opts.fetch :max_files, nil # BigDecimal('Infinity')
163
- opts_autowrite = opts.fetch :autowrite, false
164
-
165
- raise "Wrong GCS Path: #{mybucket_with_subdir}" unless smells_like_gcs?(mybucket_with_subdir)
166
- #puts "0. mybucket_with_subdir: #{mybucket_with_subdir}" # gs://mybucket123/path/to/dir
167
- path_split = mybucket_with_subdir.split('/')
168
- mybucket = path_split[0,3].join('/') # gs://mybucket123/
169
- gcs_subpath = path_split[3,42].join('/') # path/to/dir
170
- cleanedup_bucket = path_split[2] # mybucket123
171
- #puts "1. mybucket: #{mybucket}"
172
- #puts "2. gcs_subpath: #{gcs_subpath}"
173
- #puts "3. cleanedup_bucket: #{cleanedup_bucket}"
174
-
175
- puts("[print_stats_and_md5_for_gcs] version=#{$print_stats_and_md5_for_gcs_version} host=#{Socket.gethostname}(#{`uname`.chomp}) created_on='#{Time.now}' bucket=#{mybucket} subpath=#{gcs_subpath}")
176
- begin
177
- require "google/cloud/storage"
178
- project_id = `gcloud config get project 2>/dev/null`
179
- deb "project_id: #{project_id}"
180
- storage = Google::Cloud::Storage.new(project_id: project_id)
181
- bucket = storage.bucket(cleanedup_bucket)
182
- # grabs them ALL if nil, optherwise its an integer.
183
- files = opts_max_files.nil? ?
184
- bucket.files : # nil -> all of them (I've tried sth more idiomatic like -1 or nil or Infinite. they all failed.)
185
- bucket.files.first(opts_max_files) # not nil -> first(N)
186
- #puts(files)
187
- deb("All Sorted Methods: #{files.first.methods.sort}")
188
- files.each do |gcs_file|
189
- md5 = Base64.decode64(gcs_file.md5).unpack('H*')[0] # md5 in base64 -> then de-binarizied: Base64.urlsafe_decode64(md5).unpack('H*')[0]
190
- size = gcs_file.size # crc rescue :boh
191
- time_created = gcs_file.created_at
192
- mode = 'XXXXXX' # for compatbility with files
193
- file_type = gcs_file.name.to_s =~ /\/$/ ? 'd' : 'f' # if ends with a slash its a DIR :P
194
- #puts("[OLD_GCS_v11] #{md5} #{size}\t#{time_created} [#{gcs_file.content_type}]\t#{gcs_file.name}")
195
- print_colored_polymoprhic('gcs_v1.2', md5, mode, file_type, size, gcs_file.content_type, time_created, gcs_file.name, opts)
196
- deb gcs_file.to_yaml
197
- end
198
- #puts("Hash Methods: #{files.first.methods.select{|x| x.match /hash/}}")
199
- rescue LoadError # require Exception
200
- puts 'Error with library #{$!}, maybe type: gem install google-cloud-storage'
201
- rescue Exception # generic Exception
202
- puts "print_stats_and_md5_for_gcs(): Generic Error: #{$!}"
203
- exit 1
204
- end
205
- autowrite_to_dir_or_gcs(:gcs, mybucket_with_subdir) if opts_autowrite
206
- end
160
+ def print_stats_and_md5_for_gcs(mybucket_with_subdir, opts = {})
161
+ # opts_color = opts.fetch :color, true#
162
+ # opts_verbose = opts.fetch :verbose, false
163
+ # opts_ping_frequency = opts.fetch :ping_frequency, 50
164
+ opts_max_files = opts.fetch :max_files, nil # BigDecimal('Infinity')
165
+ opts_autowrite = opts.fetch :autowrite, false
166
+
167
+ raise "Wrong GCS Path: #{mybucket_with_subdir}" unless smells_like_gcs?(mybucket_with_subdir)
168
+
169
+ # puts "0. mybucket_with_subdir: #{mybucket_with_subdir}" # gs://mybucket123/path/to/dir
170
+ path_split = mybucket_with_subdir.split('/')
171
+ mybucket = path_split[0, 3].join('/') # gs://mybucket123/
172
+ gcs_subpath = path_split[3, 42].join('/') # path/to/dir
173
+ cleanedup_bucket = path_split[2] # mybucket123
174
+ # puts "1. mybucket: #{mybucket}"
175
+ # puts "2. gcs_subpath: #{gcs_subpath}"
176
+ # puts "3. cleanedup_bucket: #{cleanedup_bucket}"
177
+
178
+ puts("[print_stats_and_md5_for_gcs] version=#{$print_stats_and_md5_for_gcs_version} host=#{Socket.gethostname}(#{`uname`.chomp}) created_on='#{Time.now}' bucket=#{mybucket} subpath=#{gcs_subpath}")
179
+ begin
180
+ require "google/cloud/storage"
181
+ project_id = `gcloud config get project 2>/dev/null`
182
+ deb "project_id: #{project_id}"
183
+ storage = Google::Cloud::Storage.new(project_id: project_id)
184
+ bucket = storage.bucket(cleanedup_bucket)
185
+ # grabs them ALL if nil, optherwise its an integer.
186
+ files = opts_max_files.nil? ?
187
+ bucket.files : # nil -> all of them (I've tried sth more idiomatic like -1 or nil or Infinite. they all failed.)
188
+ bucket.files.first(opts_max_files) # not nil -> first(N)
189
+ # puts(files)
190
+ deb("All Sorted Methods: #{files.first.methods.sort}")
191
+ files.each do |gcs_file|
192
+ md5 = Base64.decode64(gcs_file.md5).unpack('H*')[0] # md5 in base64 -> then de-binarizied: Base64.urlsafe_decode64(md5).unpack('H*')[0]
193
+ size = gcs_file.size # crc rescue :boh
194
+ time_created = gcs_file.created_at
195
+ mode = 'XXXXXX' # for compatbility with files
196
+ file_type = gcs_file.name.to_s =~ /\/$/ ? 'd' : 'f' # if ends with a slash its a DIR :P
197
+ # puts("[OLD_GCS_v11] #{md5} #{size}\t#{time_created} [#{gcs_file.content_type}]\t#{gcs_file.name}")
198
+ print_colored_polymoprhic('gcs_v1.2', md5, mode, file_type, size, gcs_file.content_type, time_created,
199
+ gcs_file.name, opts)
200
+ deb gcs_file.to_yaml
201
+ end
202
+ # puts("Hash Methods: #{files.first.methods.select{|x| x.match /hash/}}")
203
+ rescue LoadError # require Exception
204
+ puts 'Error with library #{$!}, maybe type: gem install google-cloud-storage'
205
+ rescue Exception # generic Exception
206
+ puts "print_stats_and_md5_for_gcs(): Generic Error: #{$!}"
207
+ exit 1
208
+ end
209
+ autowrite_to_dir_or_gcs(:gcs, mybucket_with_subdir) if opts_autowrite
210
+ end
207
211
 
208
212
  # Generic polymorpghic coloring, can be GCS. Would have been nice to pass a Dict so easier to extend, but Ruby doesnt support doesblt Dict with magic args. Plus might be a GOOD
209
213
  # thing its hard to change since it breaks Storazzo.
210
214
  # TODO(ricc): make sure the dates are the same. Currently:
211
215
  # GCS Date: 2021-12-20T12:26:13+00:00 DateTime
212
216
  # File Date: 2022-05-30 11:55:42 +0200 Time
213
- def print_colored_polymoprhic(entity_type, md5, mode, file_type, size, content_type, creation_time, filename, opts={})
214
- opts_color = opts.fetch :color, true
215
- opts_verbose = opts.fetch :verbose, false
216
- opts_ping_frequency = opts.fetch :ping_frequency, 50
217
- opts_autowrite = opts.fetch :autowrite, false
218
-
219
- # Increment counter across all!
220
- $print_stats_and_md5_counter += 1 # global counter! I should increment at the END of fucntion.. but you never know if i exit wrongly so i do at beginning. so within this function the real #files is N-1 (i.e., if N is 6, we're processing file %5)
221
- # in main i had to put 2 branched to invoke a single thing but if logic changes that sentence might be printed twice. Instead here, the BEGIN line could be nice to do here instead.
222
- #$stderr.puts("print_stats_and_md5() Fikst invocation! Consider moving the 2 things in main here :)") if ($print_stats_and_md5_counter == 1)
223
- #puts("[print_stats_and_md5] version=#{$print_stats_and_md5_version} host=#{Socket.gethostname}(#{`uname`.chomp}) created_on=#{Time.now}") if ($print_stats_and_md5_counter == 1)
224
- $stderr.puts "-- print_stats_and_md5(v#{$print_stats_and_md5_version}): #{$print_stats_and_md5_counter - 1} files processed --" if (($print_stats_and_md5_counter) % opts_ping_frequency == 1 )
225
-
226
- maybecolored_md5 = opts_color ? red(md5) : md5
227
- maybecolored_filename = opts_color ? azure(filename) : filename
228
- enlarged_size = sprintf("%7o", size.to_s ) # or SIZE
229
- maybecolored_size = opts_color ? yellow(enlarged_size) : enlarged_size
230
- maybecolored_file_type = file_type
231
- colored_content_type = opts_color ? white(content_type) : content_type
232
- if opts_color
233
- maybecolored_file_type = case file_type
234
- when 'f' then green(file_type)
235
- when 'd' then blue(file_type)
236
- when 's' then azure(file_type)
237
- else red(file_type)
238
- end
239
- end
240
- standardized_creation_time = creation_time.is_a?(DateTime) ?
241
- creation_time :
242
- DateTime.parse(creation_time.to_s) # if you prefer to use Time since its already supported by Storazzo (but its ugly since it has SPACES! so hard to parse) use tt = Time.parse(d.to_s)
243
- # as per https://stackoverflow.com/questions/279769/convert-to-from-datetime-and-time-in-ruby
244
- #puts "#{maybecolored_md5} #{mode} #{maybecolored_file_type} #{maybecolored_size}\t#{creation_time}(DEB #{creation_time.class})(DEB2 #{DateTime.parse(creation_time.to_s)}) [#{colored_content_type}] #{maybecolored_filename}"
245
- # this mightr be colored
246
- str = "[#{entity_type}] #{maybecolored_md5} #{mode} #{maybecolored_file_type} #{standardized_creation_time} #{maybecolored_size} [#{colored_content_type}] #{maybecolored_filename}\n"
247
- # this will NEVER be colored. WARNING, now you need to maintain TWO of these beasts :/
248
- non_colored_str = "[#{entity_type}] #{md5} #{mode} #{file_type} #{standardized_creation_time} #{enlarged_size} [#{content_type}] #{filename}\n"
249
- if(opts_autowrite)
250
- # need to guarantee this is NOT colored. The easiest way to do it is TWICVE as expensive but... whatevs.
251
- # TODO(ricc): fire me for this lazimness!
252
- $autowrite_file += (non_colored_str)
253
- end
254
- print(str)
255
- return str
217
+ def print_colored_polymoprhic(entity_type, md5, mode, file_type, size, content_type, creation_time, filename, opts = {})
218
+ opts_color = opts.fetch :color, true
219
+ opts_verbose = opts.fetch :verbose, false
220
+ opts_ping_frequency = opts.fetch :ping_frequency, 50
221
+ opts_autowrite = opts.fetch :autowrite, false
222
+
223
+ # Increment counter across all!
224
+ $print_stats_and_md5_counter += 1 # global counter! I should increment at the END of fucntion.. but you never know if i exit wrongly so i do at beginning. so within this function the real #files is N-1 (i.e., if N is 6, we're processing file %5)
225
+ # in main i had to put 2 branched to invoke a single thing but if logic changes that sentence might be printed twice. Instead here, the BEGIN line could be nice to do here instead.
226
+ # $stderr.puts("print_stats_and_md5() Fikst invocation! Consider moving the 2 things in main here :)") if ($print_stats_and_md5_counter == 1)
227
+ # puts("[print_stats_and_md5] version=#{$print_stats_and_md5_version} host=#{Socket.gethostname}(#{`uname`.chomp}) created_on=#{Time.now}") if ($print_stats_and_md5_counter == 1)
228
+ $stderr.puts "-- print_stats_and_md5(v#{$print_stats_and_md5_version}): #{$print_stats_and_md5_counter - 1} files processed --" if (($print_stats_and_md5_counter) % opts_ping_frequency == 1)
229
+
230
+ maybecolored_md5 = opts_color ? red(md5) : md5
231
+ maybecolored_filename = opts_color ? azure(filename) : filename
232
+ enlarged_size = sprintf("%7o", size.to_s) # or SIZE
233
+ maybecolored_size = opts_color ? yellow(enlarged_size) : enlarged_size
234
+ maybecolored_file_type = file_type
235
+ colored_content_type = opts_color ? white(content_type) : content_type
236
+ if opts_color
237
+ maybecolored_file_type = case file_type
238
+ when 'f' then green(file_type)
239
+ when 'd' then blue(file_type)
240
+ when 's' then azure(file_type)
241
+ else red(file_type)
242
+ end
243
+ end
244
+ standardized_creation_time = creation_time.is_a?(DateTime) ?
245
+ creation_time :
246
+ DateTime.parse(creation_time.to_s) # if you prefer to use Time since its already supported by Storazzo (but its ugly since it has SPACES! so hard to parse) use tt = Time.parse(d.to_s)
247
+ # as per https://stackoverflow.com/questions/279769/convert-to-from-datetime-and-time-in-ruby
248
+ # puts "#{maybecolored_md5} #{mode} #{maybecolored_file_type} #{maybecolored_size}\t#{creation_time}(DEB #{creation_time.class})(DEB2 #{DateTime.parse(creation_time.to_s)}) [#{colored_content_type}] #{maybecolored_filename}"
249
+ # this mightr be colored
250
+ str = "[#{entity_type}] #{maybecolored_md5} #{mode} #{maybecolored_file_type} #{standardized_creation_time} #{maybecolored_size} [#{colored_content_type}] #{maybecolored_filename}\n"
251
+ # this will NEVER be colored. WARNING, now you need to maintain TWO of these beasts :/
252
+ non_colored_str = "[#{entity_type}] #{md5} #{mode} #{file_type} #{standardized_creation_time} #{enlarged_size} [#{content_type}] #{filename}\n"
253
+ if (opts_autowrite)
254
+ # need to guarantee this is NOT colored. The easiest way to do it is TWICVE as expensive but... whatevs.
255
+ # TODO(ricc): fire me for this lazimness!
256
+ $autowrite_file += (non_colored_str)
257
+ end
258
+ print(str)
259
+ return str
256
260
  end
257
261
 
258
- def print_stats_and_md5(file, opts={})
259
- return print_stats_and_md5_for_gcs(file, opts) if smells_like_gcs?(file)
260
-
261
- opts_color = opts.fetch :color, true
262
- opts_verbose = opts.fetch :verbose, false
263
- opts_ping_frequency = opts.fetch :ping_frequency, 50
264
-
265
-
266
- # $print_stats_and_md5_counter += 1 # global counter! I should increment at the END of fucntion.. but you never know if i exit wrongly so i do at beginning. so within this function the real #files is N-1 (i.e., if N is 6, we're processing file %5)
267
-
268
- # # in main i had to put 2 branched to invoke a single thing but if logic changes that sentence might be printed twice. Instead here, the BEGIN line could be nice to do here instead.
269
- # #$stderr.puts("print_stats_and_md5() Fikst invocation! Consider moving the 2 things in main here :)") if ($print_stats_and_md5_counter == 1)
270
- # puts("[print_stats_and_md5] version=#{$print_stats_and_md5_version} host=#{Socket.gethostname}(#{`uname`.chomp}) created_on=#{Time.now}") if ($print_stats_and_md5_counter == 1)
271
- # $stderr.puts "-- print_stats_and_md5(v#{$print_stats_and_md5_version}): #{$print_stats_and_md5_counter - 1} files processed --" if (($print_stats_and_md5_counter) % opts_ping_frequency == 1 )
272
-
273
- #puts "print_stats_and_md5: #{file}" if opts_verbose
274
- stats = compute_stats_and_md5 file
275
- #maybecolored_md5 = opts_color ? red(stats[:md5]) : stats[:md5]
276
- #maybecolored_filename = opts_color ? azure(stats[:name]) : stats[:name]
277
- #maybecolored_size = opts_color ? white(stats[:size]) : stats[:size]
278
- mode = sprintf("%06o", stats[:mode] ) rescue :ERROR # .to_s.right(4) #=> "100644"
279
- file_type = stats[:stat_ftype][0] rescue '?'
280
- file_type = 's' if File.symlink?(file)
281
- content_type = `file --mime-type -b '#{file}' 2>/dev/null`.chomp # .split(': ',2)[1]
282
-
283
- #colored_string = "[COL] #{red stats[:md5]} #{stats[:size]} #{stats[:stat_safebirthtime]} #{white stats[:name]}"
284
- #boring_string = "[B/W] #{ stats[:md5]} #{stats[:size]} #{stats[:stat_safebirthtime]} #{ stats[:name]}"
285
- #puts(opts_color ? colored_string : boring_string)
286
- #puts "[OLDv1.1deprecated] #{maybecolored_md5} #{mode} #{file_type} #{maybecolored_size} #{stats[:stat_safebirthtime]} #{maybecolored_filename}"
287
- print_colored_polymoprhic('file_v1.2', stats[:md5], mode, file_type, stats[:size], content_type, stats[:stat_safebirthtime], stats[:name], opts)
262
+ def print_stats_and_md5(file, opts = {})
263
+ return print_stats_and_md5_for_gcs(file, opts) if smells_like_gcs?(file)
264
+
265
+ opts_color = opts.fetch :color, true
266
+ opts_verbose = opts.fetch :verbose, false
267
+ opts_ping_frequency = opts.fetch :ping_frequency, 50
268
+
269
+ # $print_stats_and_md5_counter += 1 # global counter! I should increment at the END of fucntion.. but you never know if i exit wrongly so i do at beginning. so within this function the real #files is N-1 (i.e., if N is 6, we're processing file %5)
270
+
271
+ # # in main i had to put 2 branched to invoke a single thing but if logic changes that sentence might be printed twice. Instead here, the BEGIN line could be nice to do here instead.
272
+ # #$stderr.puts("print_stats_and_md5() Fikst invocation! Consider moving the 2 things in main here :)") if ($print_stats_and_md5_counter == 1)
273
+ # puts("[print_stats_and_md5] version=#{$print_stats_and_md5_version} host=#{Socket.gethostname}(#{`uname`.chomp}) created_on=#{Time.now}") if ($print_stats_and_md5_counter == 1)
274
+ # $stderr.puts "-- print_stats_and_md5(v#{$print_stats_and_md5_version}): #{$print_stats_and_md5_counter - 1} files processed --" if (($print_stats_and_md5_counter) % opts_ping_frequency == 1 )
275
+
276
+ # puts "print_stats_and_md5: #{file}" if opts_verbose
277
+ stats = compute_stats_and_md5 file
278
+ # maybecolored_md5 = opts_color ? red(stats[:md5]) : stats[:md5]
279
+ # maybecolored_filename = opts_color ? azure(stats[:name]) : stats[:name]
280
+ # maybecolored_size = opts_color ? white(stats[:size]) : stats[:size]
281
+ mode = sprintf("%06o", stats[:mode]) rescue :ERROR # .to_s.right(4) #=> "100644"
282
+ file_type = stats[:stat_ftype][0] rescue '?'
283
+ file_type = 's' if File.symlink?(file)
284
+ content_type = `file --mime-type -b '#{file}' 2>/dev/null`.chomp # .split(': ',2)[1]
285
+
286
+ # colored_string = "[COL] #{red stats[:md5]} #{stats[:size]} #{stats[:stat_safebirthtime]} #{white stats[:name]}"
287
+ # boring_string = "[B/W] #{ stats[:md5]} #{stats[:size]} #{stats[:stat_safebirthtime]} #{ stats[:name]}"
288
+ # puts(opts_color ? colored_string : boring_string)
289
+ # puts "[OLDv1.1deprecated] #{maybecolored_md5} #{mode} #{file_type} #{maybecolored_size} #{stats[:stat_safebirthtime]} #{maybecolored_filename}"
290
+ print_colored_polymoprhic('file_v1.2', stats[:md5], mode, file_type, stats[:size], content_type,
291
+ stats[:stat_safebirthtime], stats[:name], opts)
288
292
  end
289
293
 
290
- def autowrite_to_dir_or_gcs(fs_type, path, opts={})
291
- autowrite_version = "1.1_28jun22"
292
- file_to_save = "stats-with-md5.log"
293
- path_to_save = path + "/" + file_to_save
294
- puts(red "TODO(ricc): write results [size=#{$autowrite_file.size}] in this Directory: #{path_to_save}") if fs_type == :dir
295
- puts(red "TODO(ricc): write results [size=#{$autowrite_file.size}] in this GCS Bucket or Dir: #{path_to_save}") if fs_type == :gcs
296
-
297
- ##########################################################################
298
- # do the thing: creates file locally and moves to GCS or Dir.
299
- ##########################################################################
300
- $autowrite_file += "# [autowrite_to_dir_or_gcs v#{autowrite_version}]: about to write it now. #{Time.now}"
301
- tmpfile = Tempfile.new(path_to_save)
302
- tmpfile.write($autowrite_file)
303
- cmd = :no_cmd_yet
304
-
305
- if fs_type == :gcs
306
- cmd = "gsutil mv '#{tmpfile.path}' #{path}/#{file_to_save}"
307
- end
308
- if fs_type == :dir
309
- cmd = "mv '#{tmpfile.path}' #{path}/#{file_to_save}"
310
- end
311
-
312
- puts("TMP File is this big: #{azure tmpfile.size}")
313
- puts("About to execute this: #{white cmd}")
314
- ret = `#{cmd}`
315
- if $?.exitstatus != 0
316
- puts "Error to execute: #{red cmd}. Exit: #{red $?.exitstatus}"
317
- exit 53
318
- end
319
- puts "Command returned: '#{$?.exitstatus}'" # - #{$?.class}"
320
- # wow: fork { exec("ls") }
321
-
322
- # removes the tmpfile :)
323
- tmpfile.close
324
- tmpfile.unlink
325
-
326
- #####################################
327
- # finished, lets now clean up the file...
328
- #####################################
329
- reset_autowrite_file()
330
- return :ok
294
+ def autowrite_to_dir_or_gcs(fs_type, path, opts = {})
295
+ autowrite_version = "1.1_28jun22"
296
+ file_to_save = "stats-with-md5.log"
297
+ path_to_save = path + "/" + file_to_save
298
+ puts(red "TODO(ricc): write results [size=#{$autowrite_file.size}] in this Directory: #{path_to_save}") if fs_type == :dir
299
+ puts(red "TODO(ricc): write results [size=#{$autowrite_file.size}] in this GCS Bucket or Dir: #{path_to_save}") if fs_type == :gcs
300
+
301
+ ##########################################################################
302
+ # do the thing: creates file locally and moves to GCS or Dir.
303
+ ##########################################################################
304
+ $autowrite_file += "# [autowrite_to_dir_or_gcs v#{autowrite_version}]: about to write it now. #{Time.now}"
305
+ tmpfile = Tempfile.new(path_to_save)
306
+ tmpfile.write($autowrite_file)
307
+ cmd = :no_cmd_yet
308
+
309
+ if fs_type == :gcs
310
+ cmd = "gsutil mv '#{tmpfile.path}' #{path}/#{file_to_save}"
311
+ end
312
+ if fs_type == :dir
313
+ cmd = "mv '#{tmpfile.path}' #{path}/#{file_to_save}"
314
+ end
315
+
316
+ puts("TMP File is this big: #{azure tmpfile.size}")
317
+ puts("About to execute this: #{white cmd}")
318
+ ret = `#{cmd}`
319
+ if $?.exitstatus != 0
320
+ puts "Error to execute: #{red cmd}. Exit: #{red $?.exitstatus}"
321
+ exit 53
322
+ end
323
+ puts "Command returned: '#{$?.exitstatus}'" # - #{$?.class}"
324
+ # wow: fork { exec("ls") }
325
+
326
+ # removes the tmpfile :)
327
+ tmpfile.close
328
+ tmpfile.unlink
329
+
330
+ #####################################
331
+ # finished, lets now clean up the file...
332
+ #####################################
333
+ reset_autowrite_file()
334
+ return :ok
331
335
  end
332
336
 
333
- def print_stats_and_md5_for_directory_from_cli(directory_to_explore_recursively, opts={})
334
- puts "# [print_stats_and_md5_for_directory_from_cli] v#{$print_stats_and_md5_version}] DIR to explore (make sure you glob also): #{directory_to_explore_recursively }"
335
- opts_autowrite = opts.fetch :autowrite, false
336
- opts_verbose = opts.fetch( :verbose, $opts[:verbose])
337
- opts_debug = opts.fetch :debug, $opts[:debug]
338
- opts_color = opts.fetch :color, $opts[:color]
339
- if opts_debug
340
- puts "# DEB Options1 opts (FUNC) BAD: s #{opts}" # probably useless since this is invoked by CLI and has ful fledged options.
341
- puts "# DEB Options2 $opts (ARGV) GOOD: #{$opts}" # This is what we ewant in this CLI. TODO(ricc): if you move this function to be used as function invert the priority...
342
- puts("# DEB version=#{$print_stats_and_md5_version} host=#{Socket.gethostname}(#{`uname`.chomp}) created_on=#{Time.now}") # if ARGV.size > 0
343
- end
344
-
345
- Dir.glob("#{(directory_to_explore_recursively)}/**/*") do |globbed_filename|
346
- # Do work on files & directories ending in .rb
347
- #puts "[deb] #{globbed_filename}"
348
- print_stats_and_md5(globbed_filename, :color => opts_color, :verbose => opts_verbose, :autowrite => opts_autowrite)
349
- end
350
-
351
- autowrite_to_dir_or_gcs(:dir, directory_to_explore_recursively) if opts_autowrite
352
- end
337
+ def print_stats_and_md5_for_directory(directory_to_explore_recursively, opts = {})
338
+ puts "# [print_stats_and_md5_for_directory] DIR to explore (make sure you glob also): #{directory_to_explore_recursively}"
339
+ puts "# DEB Options: #{opts}"
340
+
341
+ opts_autowrite = opts.fetch :autowrite, false
342
+
343
+ Dir.glob("#{(directory_to_explore_recursively)}/**/*") do |globbed_filename|
344
+ # Do work on files & directories ending in .rb
345
+ # puts "[deb] #{globbed_filename}"
346
+ print_stats_and_md5(globbed_filename, :color => $opts[:color], :verbose => $opts[:verbose],
347
+ :autowrite => $opts[:autowrite])
348
+ end
349
+
350
+ autowrite_to_dir_or_gcs(:dir, directory_to_explore_recursively) if opts_autowrite
351
+ end
353
352
 
354
353
  def real_program
355
354
  t0 = Time.now
356
355
  deb "+ Options are: #{gray $opts}"
357
356
  deb "+ Depured args: #{azure ARGV}"
358
357
  # Your code goes here...
359
- #puts "Description: '''#{white $myconf[:description] }'''"
360
- #puts("[print_stats_and_md5] version=#{$print_stats_and_md5_version} host=#{Socket.gethostname}(#{`uname`.chomp}) created_on=#{Time.now}") if ARGV.size > 0
358
+ # puts "Description: '''#{white $myconf[:description] }'''"
359
+ # puts("[print_stats_and_md5] version=#{$print_stats_and_md5_version} host=#{Socket.gethostname}(#{`uname`.chomp}) created_on=#{Time.now}") if ARGV.size > 0
361
360
 
362
361
  common_opts = {
363
- :max_files => $opts[:max_files],
364
- :color => $opts[:color],
365
- :verbose => $opts[:verbose],
366
- :autowrite => $opts[:autowrite],
362
+ :max_files => $opts[:max_files],
363
+ :color => $opts[:color],
364
+ :verbose => $opts[:verbose],
365
+ :autowrite => $opts[:autowrite],
367
366
  }
368
367
 
369
368
  if ARGV.size == 1
370
- directory_to_explore_recursively = ARGV[0]
369
+ directory_to_explore_recursively = ARGV[0]
371
370
  if smells_like_gcs?(directory_to_explore_recursively)
372
- print_stats_and_md5_for_gcs(directory_to_explore_recursively, common_opts)
373
- else # normal file..
374
- print_stats_and_md5_for_directory_from_cli(directory_to_explore_recursively, common_opts)
375
- end
371
+ print_stats_and_md5_for_gcs(directory_to_explore_recursively, common_opts)
372
+ else # normal file..
373
+ print_stats_and_md5_for_directory(directory_to_explore_recursively, common_opts)
374
+ end
376
375
  elsif ARGV.size > 1
377
- deb "2. I expect a lot of single files or directories:"
376
+ deb "2. I expect a lot of single files or directories:"
378
377
  for arg in ARGV
379
- if File.directory?(arg)
380
- print_stats_and_md5_for_directory_from_cli(arg, common_opts )
381
- else
382
- print_stats_and_md5(arg, common_opts)
383
- end
378
+ if File.directory?(arg)
379
+ print_stats_and_md5_for_directory(arg, common_opts)
380
+ else
381
+ print_stats_and_md5(arg, common_opts)
382
+ end
384
383
  end
385
384
  else
386
- puts "No args given. Exiting"
387
- exit 41
385
+ puts "No args given. Exiting"
386
+ exit 41
388
387
  end
389
388
  tf = Time.now
390
- #rounding to 3 decimals
391
- approx_delta = (tf-t0).round(3)
392
- puts "# [#{File.basename $0}] Time taken for processing #{stats_and_md5_number_of_files_processed} files: #{approx_delta} seconds"
389
+ puts "# [#{File.basename $0}] Time taken for processing #{stats_and_md5_number_of_files_processed} files: #{tf - t0}"
393
390
  end
394
391
 
395
392
  def main(filename)