storazzo 0.4.9 → 0.5.1

Sign up to get free protection for your applications and to get access to all the features.
data/bin/stats-with-md5 CHANGED
@@ -3,7 +3,7 @@
3
3
  require 'digest/md5'
4
4
  require 'digest'
5
5
  require 'socket'
6
- require 'optparse' # http://ruby.about.com/od/advancedruby/a/optionparser.htm
6
+ require 'optparse' # http://ruby.about.com/od/advancedruby/a/optionparser.htm
7
7
  require 'date' # for DateTime
8
8
  require 'tempfile'
9
9
 
@@ -30,36 +30,34 @@ DEFAULT_MAX_FILES = 'Infinite'
30
30
 
31
31
  =end
32
32
 
33
-
34
-
35
- def deb(s); puts "#DEB #{s}" if $DEBUG; end
33
+ def deb(s); puts "#DEB #{s}" if $DEBUG; end
36
34
  # colors 16
37
- def gray(s) "\033[1;30m#{s}\033[0m" ; end
38
- def green(s) "\033[1;32m#{s}\033[0m" ; end
39
- def red(s) "\033[1;31m#{s}\033[0m" ; end
40
- def yellow(s) "\033[1;33m#{s}\033[0m" ; end
41
- def blue(s) "\033[1;34m#{s}\033[0m" ; end
42
- def purple(s) "\033[1;35m#{s}\033[0m" ; end
43
- def azure(s) "\033[1;36m#{s}\033[0m" ; end
44
- def white(s) "\033[1;37m#{s}\033[0m" ; end
35
+ def gray(s) "\033[1;30m#{s}\033[0m"; end
36
+ def green(s) "\033[1;32m#{s}\033[0m"; end
37
+ def red(s) "\033[1;31m#{s}\033[0m"; end
38
+ def yellow(s) "\033[1;33m#{s}\033[0m"; end
39
+ def blue(s) "\033[1;34m#{s}\033[0m"; end
40
+ def purple(s) "\033[1;35m#{s}\033[0m"; end
41
+ def azure(s) "\033[1;36m#{s}\033[0m"; end
42
+ def white(s) "\033[1;37m#{s}\033[0m"; end
45
43
 
46
44
  # colors 64k
47
- def orange(s) "\033[38;5;208m#{s}\033[0m" ; end
45
+ def orange(s) "\033[38;5;208m#{s}\033[0m"; end
48
46
 
49
47
  # Program constants, automagically picked up by RicLib
50
48
  # More configuration could be written in:
51
49
  # $GIC/etc/ricsvn/<FILENAME>.yml
52
50
  # That would go into the variable '$prog_conf_d'
53
51
  $myconf = {
54
- :app_name => "AppName should be sth like #{$0}",
55
- :description => "
52
+ :app_name => "AppName should be sth like #{$0}",
53
+ :description => "
56
54
  This program gets stats from Ruby File.Stats library
57
55
  plus computes MD5 or CRC32 of the file.
58
- And bakes it in a way that can be easily parsed.
56
+ And bakes it in a way that can be easily parsed.
59
57
  ".strip.gsub(/^\s+/, "").gsub(/\s+$/, ""),
60
58
  }
61
59
 
62
- def usage(comment=nil)
60
+ def usage(comment = nil)
63
61
  puts white($optparse.banner)
64
62
  puts($optparse.summarize)
65
63
  puts("Description: " + gray($myconf[:description]))
@@ -68,12 +66,12 @@ def usage(comment=nil)
68
66
  end
69
67
 
70
68
  def reset_autowrite_file()
71
- $autowrite_file = "# #{File.basename $0} Autowrite v1.0 #{Time.now} reincarnation=#{$reset_autowrite_file_reincarnation}\n"
72
- $reset_autowrite_file_reincarnation += 1 # ($reset_autowrite_file_reincarnation + 1) # rescue 1
73
- end
69
+ $autowrite_file = "# #{File.basename $0} Autowrite v1.0 #{Time.now} reincarnation=#{$reset_autowrite_file_reincarnation}\n"
70
+ $reset_autowrite_file_reincarnation += 1 # ($reset_autowrite_file_reincarnation + 1) # rescue 1
71
+ end
74
72
 
75
73
  # include it in main if you want a custome one
76
- def init() # see lib_autoinit in lib/util.rb
74
+ def init() # see lib_autoinit in lib/util.rb
77
75
  $opts = {}
78
76
  # setting defaults
79
77
  $opts[:verbose] = false
@@ -87,31 +85,70 @@ def init() # see lib_autoinit in lib/util.rb
87
85
 
88
86
  $optparse = OptionParser.new do |opts|
89
87
  opts.banner = "#{$0} v.#{$PROG_VER}\n Usage: #{File.basename $0} [options] file1 file2 ..."
90
- opts.on( '-a', '--autowrite', 'automatically writes results to Root Folder (DFLT=false)' ) { $opts[:autowrite] = true }
91
- opts.on( '-c', '--no-color', 'disables color (DFLT=false, that is color enabled)' ) { $opts[:color] = false }
92
- opts.on( '-d', '--debug', 'enables debug (DFLT=false)' ) { $opts[:debug] = true ; $DEBUG = true }
93
- opts.on( '-h', '--help', 'Display this screen' ) { usage }
94
- opts.on( '-m', '--max-files NUMBER', "sets max files per Dir to X (DFLT=#{DEFAULT_MAX_FILES})" ) {|nfiles| $opts[:max_files] = nfiles.to_i }
95
- opts.on( '-n', '--dryrun', "Don't really execute code" ) { $opts[:dryrun] = true }
96
- opts.on( '-l', '--logfile FILE', 'Write log to FILE' ) {|file| $opts[:logfile] = file }
97
- opts.on( '-v', '--verbose', 'Output more information' ) { $opts[:verbose] = true}
88
+ opts.on('-a', '--autowrite', 'automatically writes results to Root Folder (DFLT=false)') {
89
+ $opts[:autowrite] = true
90
+ }
91
+ opts.on('-c', '--no-color', 'disables color (DFLT=false, that is color enabled)') { $opts[:color] = false }
92
+ opts.on('-d', '--debug', 'enables debug (DFLT=false)') { $opts[:debug] = true; $DEBUG = true }
93
+ opts.on('-h', '--help', 'Display this screen') { usage }
94
+ opts.on('-m', '--max-files NUMBER', "sets max files per Dir to X (DFLT=#{DEFAULT_MAX_FILES})") { |nfiles|
95
+ $opts[:max_files] = nfiles.to_i
96
+ }
97
+ opts.on('-n', '--dryrun', "Don't really execute code") { $opts[:dryrun] = true }
98
+ opts.on('-l', '--logfile FILE', 'Write log to FILE') { |file| $opts[:logfile] = file }
99
+ opts.on('-v', '--verbose', 'Output more information') { $opts[:verbose] = true }
98
100
  end
99
101
  $optparse.parse!
100
102
  end
101
103
 
102
104
  def getSafeCreationTime(file)
103
- File.ctime(file)
105
+ File.ctime(file)
104
106
  end
105
107
 
106
108
  def compute_stats_and_md5(file)
107
- ret={}
108
- ret[:name] = file
109
-
109
+ ret = {}
110
+ ret[:name] = file
111
+
112
+ <<<<<<< HEAD
113
+ begin
114
+ stats = File.stat(file)
115
+ ret[:stats_object] = stats # TODO deprecate
116
+ deb("Stats methods: #{stats.methods.sort.join(', ')}")
117
+ deb(stats.ctime)
118
+ # puts(stats.birthtime rescue (stats.ctime))
119
+ # On Mac/Linux: see test/dumps/***.yaml
120
+ ret[:size] = stats.size
121
+ ret[:mode] = stats.mode
122
+ # datetimes
123
+ ret[:stat_safebirthtime] = getSafeCreationTime(file) # in Mac uses birthtime,but on Linux wont work
124
+ # defined?(stats.birthtime) ? # rescue stats.mtime # eg, 2022-03-05 21:47:51 +0100 on Mac (not implemented on my Linuix)#
125
+ # stats.birthtime : # works on Mac
126
+ # stats.ctime
127
+ ret[:stat_mtime] = stats.mtime # eg, 2022-03-05 21:47:51 +0100 Returns the modification time of stat.
128
+ ret[:stat_ctime] = stats.ctime # eg, 2022-03-05 21:47:51 +0100 Returns the change time for stat (that is, the time directory information about the file was changed, not the file itself). Note that on Windows (NTFS), returns creation time (birth time).
129
+ ret[:stat_atime] = stats.atime # eg, 2022-03-05 21:47:51 +0100 Last Access
130
+ ret[:stat_gid] = stats.gid # Group Id
131
+ ret[:stat_uid] = stats.uid # UUID
132
+ ret[:stat_ftype] = stats.ftype #
133
+
134
+ ret[:md5] = '______________NONE______________'
135
+ if stats.ftype != "directory"
136
+ file_content = File.read(file)
137
+ ret[:md5] = Digest::MD5.hexdigest(file_content) # rescue 'none ' #=> string with hexadecimal digits
138
+ end
139
+ rescue Errno::EISDIR => e
140
+ # puts "It's a dir, nothing I can do here except skipping the stuff"
141
+ ret[:md5] = "_________I'm a dir sorry________"
142
+ rescue Exception => e
143
+ ret[:error] = e
144
+ end
145
+ ret
146
+ =======
110
147
  begin
111
148
  stats = File.stat(file)
112
149
  ret[:stats_object] = stats # TODO deprecate
113
- deb("Stats methods: #{stats.methods.sort.join(', ')}")
114
- deb(stats.ctime)
150
+ #deb("Stats methods: #{stats.methods.sort.join(', ')}")
151
+ #deb(stats.ctime)
115
152
  #puts(stats.birthtime rescue (stats.ctime))
116
153
  # On Mac/Linux: see test/dumps/***.yaml
117
154
  ret[:size] = stats.size
@@ -140,248 +177,279 @@ def compute_stats_and_md5(file)
140
177
  ret[:error] = e
141
178
  end
142
179
  ret
180
+ >>>>>>> 97a64f1 (perfected CLI)
143
181
  end
144
182
 
145
- $print_stats_and_md5_version = "1.1a_220624_F" # files
183
+ $print_stats_and_md5_version = "1.1b_220805_F" # files
146
184
  $print_stats_and_md5_counter = 0
147
- $print_stats_and_md5_for_gcs_version = "1.1alpha_220628_G" #GCS bucket
185
+ $print_stats_and_md5_for_gcs_version = "1.1alpha_220628_G" # GCS bucket
148
186
 
149
187
  def stats_and_md5_number_of_files_processed()
150
- return $print_stats_and_md5_counter
188
+ return $print_stats_and_md5_counter
151
189
  end
152
190
 
153
191
  def smells_like_gcs?(file)
154
- file =~ /gs:\/\//
192
+ file =~ /gs:\/\//
155
193
  end
156
194
 
157
195
  # You give me gs://bucket123/path/to/dir
158
- def print_stats_and_md5_for_gcs(mybucket_with_subdir, opts={})
159
- # opts_color = opts.fetch :color, true#
160
- # opts_verbose = opts.fetch :verbose, false
161
- # opts_ping_frequency = opts.fetch :ping_frequency, 50
162
- opts_max_files = opts.fetch :max_files, nil # BigDecimal('Infinity')
163
- opts_autowrite = opts.fetch :autowrite, false
164
-
165
- raise "Wrong GCS Path: #{mybucket_with_subdir}" unless smells_like_gcs?(mybucket_with_subdir)
166
- #puts "0. mybucket_with_subdir: #{mybucket_with_subdir}" # gs://mybucket123/path/to/dir
167
- path_split = mybucket_with_subdir.split('/')
168
- mybucket = path_split[0,3].join('/') # gs://mybucket123/
169
- gcs_subpath = path_split[3,42].join('/') # path/to/dir
170
- cleanedup_bucket = path_split[2] # mybucket123
171
- #puts "1. mybucket: #{mybucket}"
172
- #puts "2. gcs_subpath: #{gcs_subpath}"
173
- #puts "3. cleanedup_bucket: #{cleanedup_bucket}"
174
-
175
- puts("[print_stats_and_md5_for_gcs] version=#{$print_stats_and_md5_for_gcs_version} host=#{Socket.gethostname}(#{`uname`.chomp}) created_on='#{Time.now}' bucket=#{mybucket} subpath=#{gcs_subpath}")
176
- begin
177
- require "google/cloud/storage"
178
- project_id = `gcloud config get project 2>/dev/null`
179
- deb "project_id: #{project_id}"
180
- storage = Google::Cloud::Storage.new(project_id: project_id)
181
- bucket = storage.bucket(cleanedup_bucket)
182
- # grabs them ALL if nil, optherwise its an integer.
183
- files = opts_max_files.nil? ?
184
- bucket.files : # nil -> all of them (I've tried sth more idiomatic like -1 or nil or Infinite. they all failed.)
185
- bucket.files.first(opts_max_files) # not nil -> first(N)
186
- #puts(files)
187
- deb("All Sorted Methods: #{files.first.methods.sort}")
188
- files.each do |gcs_file|
189
- md5 = Base64.decode64(gcs_file.md5).unpack('H*')[0] # md5 in base64 -> then de-binarizied: Base64.urlsafe_decode64(md5).unpack('H*')[0]
190
- size = gcs_file.size # crc rescue :boh
191
- time_created = gcs_file.created_at
192
- mode = 'XXXXXX' # for compatbility with files
193
- file_type = gcs_file.name.to_s =~ /\/$/ ? 'd' : 'f' # if ends with a slash its a DIR :P
194
- #puts("[OLD_GCS_v11] #{md5} #{size}\t#{time_created} [#{gcs_file.content_type}]\t#{gcs_file.name}")
195
- print_colored_polymoprhic('gcs_v1.2', md5, mode, file_type, size, gcs_file.content_type, time_created, gcs_file.name, opts)
196
- deb gcs_file.to_yaml
197
- end
198
- #puts("Hash Methods: #{files.first.methods.select{|x| x.match /hash/}}")
199
- rescue LoadError # require Exception
200
- puts 'Error with library #{$!}, maybe type: gem install google-cloud-storage'
201
- rescue Exception # generic Exception
202
- puts "print_stats_and_md5_for_gcs(): Generic Error: #{$!}"
203
- exit 1
204
- end
205
- autowrite_to_dir_or_gcs(:gcs, mybucket_with_subdir) if opts_autowrite
206
- end
196
+ def print_stats_and_md5_for_gcs(mybucket_with_subdir, opts = {})
197
+ # opts_color = opts.fetch :color, true#
198
+ # opts_verbose = opts.fetch :verbose, false
199
+ # opts_ping_frequency = opts.fetch :ping_frequency, 50
200
+ opts_max_files = opts.fetch :max_files, nil # BigDecimal('Infinity')
201
+ opts_autowrite = opts.fetch :autowrite, false
202
+
203
+ raise "Wrong GCS Path: #{mybucket_with_subdir}" unless smells_like_gcs?(mybucket_with_subdir)
204
+
205
+ # puts "0. mybucket_with_subdir: #{mybucket_with_subdir}" # gs://mybucket123/path/to/dir
206
+ path_split = mybucket_with_subdir.split('/')
207
+ mybucket = path_split[0, 3].join('/') # gs://mybucket123/
208
+ gcs_subpath = path_split[3, 42].join('/') # path/to/dir
209
+ cleanedup_bucket = path_split[2] # mybucket123
210
+ # puts "1. mybucket: #{mybucket}"
211
+ # puts "2. gcs_subpath: #{gcs_subpath}"
212
+ # puts "3. cleanedup_bucket: #{cleanedup_bucket}"
213
+
214
+ puts("[print_stats_and_md5_for_gcs] version=#{$print_stats_and_md5_for_gcs_version} host=#{Socket.gethostname}(#{`uname`.chomp}) created_on='#{Time.now}' bucket=#{mybucket} subpath=#{gcs_subpath}")
215
+ begin
216
+ require "google/cloud/storage"
217
+ project_id = `gcloud config get project 2>/dev/null`
218
+ deb "project_id: #{project_id}"
219
+ storage = Google::Cloud::Storage.new(project_id: project_id)
220
+ bucket = storage.bucket(cleanedup_bucket)
221
+ # grabs them ALL if nil, optherwise its an integer.
222
+ files = opts_max_files.nil? ?
223
+ bucket.files : # nil -> all of them (I've tried sth more idiomatic like -1 or nil or Infinite. they all failed.)
224
+ bucket.files.first(opts_max_files) # not nil -> first(N)
225
+ # puts(files)
226
+ deb("All Sorted Methods: #{files.first.methods.sort}")
227
+ files.each do |gcs_file|
228
+ md5 = Base64.decode64(gcs_file.md5).unpack('H*')[0] # md5 in base64 -> then de-binarizied: Base64.urlsafe_decode64(md5).unpack('H*')[0]
229
+ size = gcs_file.size # crc rescue :boh
230
+ time_created = gcs_file.created_at
231
+ mode = 'XXXXXX' # for compatbility with files
232
+ file_type = gcs_file.name.to_s =~ /\/$/ ? 'd' : 'f' # if ends with a slash its a DIR :P
233
+ # puts("[OLD_GCS_v11] #{md5} #{size}\t#{time_created} [#{gcs_file.content_type}]\t#{gcs_file.name}")
234
+ print_colored_polymoprhic('gcs_v1.2', md5, mode, file_type, size, gcs_file.content_type, time_created,
235
+ gcs_file.name, opts)
236
+ deb gcs_file.to_yaml
237
+ end
238
+ # puts("Hash Methods: #{files.first.methods.select{|x| x.match /hash/}}")
239
+ rescue LoadError # require Exception
240
+ puts 'Error with library #{$!}, maybe type: gem install google-cloud-storage'
241
+ rescue Exception # generic Exception
242
+ puts "print_stats_and_md5_for_gcs(): Generic Error: #{$!}"
243
+ exit 1
244
+ end
245
+ autowrite_to_dir_or_gcs(:gcs, mybucket_with_subdir) if opts_autowrite
246
+ end
207
247
 
208
248
  # Generic polymorpghic coloring, can be GCS. Would have been nice to pass a Dict so easier to extend, but Ruby doesnt support doesblt Dict with magic args. Plus might be a GOOD
209
249
  # thing its hard to change since it breaks Storazzo.
210
250
  # TODO(ricc): make sure the dates are the same. Currently:
211
251
  # GCS Date: 2021-12-20T12:26:13+00:00 DateTime
212
252
  # File Date: 2022-05-30 11:55:42 +0200 Time
213
- def print_colored_polymoprhic(entity_type, md5, mode, file_type, size, content_type, creation_time, filename, opts={})
214
- opts_color = opts.fetch :color, true
215
- opts_verbose = opts.fetch :verbose, false
216
- opts_ping_frequency = opts.fetch :ping_frequency, 50
217
- opts_autowrite = opts.fetch :autowrite, false
218
-
219
- # Increment counter across all!
220
- $print_stats_and_md5_counter += 1 # global counter! I should increment at the END of fucntion.. but you never know if i exit wrongly so i do at beginning. so within this function the real #files is N-1 (i.e., if N is 6, we're processing file %5)
221
- # in main i had to put 2 branched to invoke a single thing but if logic changes that sentence might be printed twice. Instead here, the BEGIN line could be nice to do here instead.
222
- #$stderr.puts("print_stats_and_md5() Fikst invocation! Consider moving the 2 things in main here :)") if ($print_stats_and_md5_counter == 1)
223
- #puts("[print_stats_and_md5] version=#{$print_stats_and_md5_version} host=#{Socket.gethostname}(#{`uname`.chomp}) created_on=#{Time.now}") if ($print_stats_and_md5_counter == 1)
224
- $stderr.puts "-- print_stats_and_md5(v#{$print_stats_and_md5_version}): #{$print_stats_and_md5_counter - 1} files processed --" if (($print_stats_and_md5_counter) % opts_ping_frequency == 1 )
225
-
226
- maybecolored_md5 = opts_color ? red(md5) : md5
227
- maybecolored_filename = opts_color ? azure(filename) : filename
228
- enlarged_size = sprintf("%7o", size.to_s ) # or SIZE
229
- maybecolored_size = opts_color ? yellow(enlarged_size) : enlarged_size
230
- maybecolored_file_type = file_type
231
- colored_content_type = opts_color ? white(content_type) : content_type
232
- if opts_color
233
- maybecolored_file_type = case file_type
234
- when 'f' then green(file_type)
235
- when 'd' then blue(file_type)
236
- when 's' then azure(file_type)
237
- else red(file_type)
238
- end
239
- end
240
- standardized_creation_time = creation_time.is_a?(DateTime) ?
241
- creation_time :
242
- DateTime.parse(creation_time.to_s) # if you prefer to use Time since its already supported by Storazzo (but its ugly since it has SPACES! so hard to parse) use tt = Time.parse(d.to_s)
243
- # as per https://stackoverflow.com/questions/279769/convert-to-from-datetime-and-time-in-ruby
244
- #puts "#{maybecolored_md5} #{mode} #{maybecolored_file_type} #{maybecolored_size}\t#{creation_time}(DEB #{creation_time.class})(DEB2 #{DateTime.parse(creation_time.to_s)}) [#{colored_content_type}] #{maybecolored_filename}"
245
- # this mightr be colored
246
- str = "[#{entity_type}] #{maybecolored_md5} #{mode} #{maybecolored_file_type} #{standardized_creation_time} #{maybecolored_size} [#{colored_content_type}] #{maybecolored_filename}\n"
247
- # this will NEVER be colored. WARNING, now you need to maintain TWO of these beasts :/
248
- non_colored_str = "[#{entity_type}] #{md5} #{mode} #{file_type} #{standardized_creation_time} #{enlarged_size} [#{content_type}] #{filename}\n"
249
- if(opts_autowrite)
250
- # need to guarantee this is NOT colored. The easiest way to do it is TWICVE as expensive but... whatevs.
251
- # TODO(ricc): fire me for this lazimness!
252
- $autowrite_file += (non_colored_str)
253
- end
254
- print(str)
255
- return str
253
+ def print_colored_polymoprhic(entity_type, md5, mode, file_type, size, content_type, creation_time, filename, opts = {})
254
+ opts_color = opts.fetch :color, true
255
+ opts_verbose = opts.fetch :verbose, false
256
+ opts_ping_frequency = opts.fetch :ping_frequency, 50
257
+ opts_autowrite = opts.fetch :autowrite, false
258
+
259
+ # Increment counter across all!
260
+ $print_stats_and_md5_counter += 1 # global counter! I should increment at the END of fucntion.. but you never know if i exit wrongly so i do at beginning. so within this function the real #files is N-1 (i.e., if N is 6, we're processing file %5)
261
+ # in main i had to put 2 branched to invoke a single thing but if logic changes that sentence might be printed twice. Instead here, the BEGIN line could be nice to do here instead.
262
+ # $stderr.puts("print_stats_and_md5() Fikst invocation! Consider moving the 2 things in main here :)") if ($print_stats_and_md5_counter == 1)
263
+ # puts("[print_stats_and_md5] version=#{$print_stats_and_md5_version} host=#{Socket.gethostname}(#{`uname`.chomp}) created_on=#{Time.now}") if ($print_stats_and_md5_counter == 1)
264
+ $stderr.puts "-- print_stats_and_md5(v#{$print_stats_and_md5_version}): #{$print_stats_and_md5_counter - 1} files processed --" if (($print_stats_and_md5_counter) % opts_ping_frequency == 1)
265
+
266
+ maybecolored_md5 = opts_color ? red(md5) : md5
267
+ maybecolored_filename = opts_color ? azure(filename) : filename
268
+ enlarged_size = sprintf("%7o", size.to_s) # or SIZE
269
+ maybecolored_size = opts_color ? yellow(enlarged_size) : enlarged_size
270
+ maybecolored_file_type = file_type
271
+ colored_content_type = opts_color ? white(content_type) : content_type
272
+ if opts_color
273
+ maybecolored_file_type = case file_type
274
+ when 'f' then green(file_type)
275
+ when 'd' then blue(file_type)
276
+ when 's' then azure(file_type)
277
+ else red(file_type)
278
+ end
279
+ end
280
+ standardized_creation_time = creation_time.is_a?(DateTime) ?
281
+ creation_time :
282
+ DateTime.parse(creation_time.to_s) # if you prefer to use Time since its already supported by Storazzo (but its ugly since it has SPACES! so hard to parse) use tt = Time.parse(d.to_s)
283
+ # as per https://stackoverflow.com/questions/279769/convert-to-from-datetime-and-time-in-ruby
284
+ # puts "#{maybecolored_md5} #{mode} #{maybecolored_file_type} #{maybecolored_size}\t#{creation_time}(DEB #{creation_time.class})(DEB2 #{DateTime.parse(creation_time.to_s)}) [#{colored_content_type}] #{maybecolored_filename}"
285
+ # this mightr be colored
286
+ str = "[#{entity_type}] #{maybecolored_md5} #{mode} #{maybecolored_file_type} #{standardized_creation_time} #{maybecolored_size} [#{colored_content_type}] #{maybecolored_filename}\n"
287
+ # this will NEVER be colored. WARNING, now you need to maintain TWO of these beasts :/
288
+ non_colored_str = "[#{entity_type}] #{md5} #{mode} #{file_type} #{standardized_creation_time} #{enlarged_size} [#{content_type}] #{filename}\n"
289
+ if (opts_autowrite)
290
+ # need to guarantee this is NOT colored. The easiest way to do it is TWICVE as expensive but... whatevs.
291
+ # TODO(ricc): fire me for this lazimness!
292
+ $autowrite_file += (non_colored_str)
293
+ end
294
+ print(str)
295
+ return str
256
296
  end
257
297
 
258
- def print_stats_and_md5(file, opts={})
259
- return print_stats_and_md5_for_gcs(file, opts) if smells_like_gcs?(file)
260
-
261
- opts_color = opts.fetch :color, true
262
- opts_verbose = opts.fetch :verbose, false
263
- opts_ping_frequency = opts.fetch :ping_frequency, 50
264
-
265
-
266
- # $print_stats_and_md5_counter += 1 # global counter! I should increment at the END of fucntion.. but you never know if i exit wrongly so i do at beginning. so within this function the real #files is N-1 (i.e., if N is 6, we're processing file %5)
267
-
268
- # # in main i had to put 2 branched to invoke a single thing but if logic changes that sentence might be printed twice. Instead here, the BEGIN line could be nice to do here instead.
269
- # #$stderr.puts("print_stats_and_md5() Fikst invocation! Consider moving the 2 things in main here :)") if ($print_stats_and_md5_counter == 1)
270
- # puts("[print_stats_and_md5] version=#{$print_stats_and_md5_version} host=#{Socket.gethostname}(#{`uname`.chomp}) created_on=#{Time.now}") if ($print_stats_and_md5_counter == 1)
271
- # $stderr.puts "-- print_stats_and_md5(v#{$print_stats_and_md5_version}): #{$print_stats_and_md5_counter - 1} files processed --" if (($print_stats_and_md5_counter) % opts_ping_frequency == 1 )
272
-
273
- #puts "print_stats_and_md5: #{file}" if opts_verbose
274
- stats = compute_stats_and_md5 file
275
- #maybecolored_md5 = opts_color ? red(stats[:md5]) : stats[:md5]
276
- #maybecolored_filename = opts_color ? azure(stats[:name]) : stats[:name]
277
- #maybecolored_size = opts_color ? white(stats[:size]) : stats[:size]
278
- mode = sprintf("%06o", stats[:mode] ) rescue :ERROR # .to_s.right(4) #=> "100644"
279
- file_type = stats[:stat_ftype][0] rescue '?'
280
- file_type = 's' if File.symlink?(file)
281
- content_type = `file --mime-type -b '#{file}' 2>/dev/null`.chomp # .split(': ',2)[1]
282
-
283
- #colored_string = "[COL] #{red stats[:md5]} #{stats[:size]} #{stats[:stat_safebirthtime]} #{white stats[:name]}"
284
- #boring_string = "[B/W] #{ stats[:md5]} #{stats[:size]} #{stats[:stat_safebirthtime]} #{ stats[:name]}"
285
- #puts(opts_color ? colored_string : boring_string)
286
- #puts "[OLDv1.1deprecated] #{maybecolored_md5} #{mode} #{file_type} #{maybecolored_size} #{stats[:stat_safebirthtime]} #{maybecolored_filename}"
287
- print_colored_polymoprhic('file_v1.2', stats[:md5], mode, file_type, stats[:size], content_type, stats[:stat_safebirthtime], stats[:name], opts)
298
+ def print_stats_and_md5(file, opts = {})
299
+ return print_stats_and_md5_for_gcs(file, opts) if smells_like_gcs?(file)
300
+
301
+ opts_color = opts.fetch :color, true
302
+ opts_verbose = opts.fetch :verbose, false
303
+ opts_ping_frequency = opts.fetch :ping_frequency, 50
304
+
305
+ # $print_stats_and_md5_counter += 1 # global counter! I should increment at the END of fucntion.. but you never know if i exit wrongly so i do at beginning. so within this function the real #files is N-1 (i.e., if N is 6, we're processing file %5)
306
+
307
+ # # in main i had to put 2 branched to invoke a single thing but if logic changes that sentence might be printed twice. Instead here, the BEGIN line could be nice to do here instead.
308
+ # #$stderr.puts("print_stats_and_md5() Fikst invocation! Consider moving the 2 things in main here :)") if ($print_stats_and_md5_counter == 1)
309
+ # puts("[print_stats_and_md5] version=#{$print_stats_and_md5_version} host=#{Socket.gethostname}(#{`uname`.chomp}) created_on=#{Time.now}") if ($print_stats_and_md5_counter == 1)
310
+ # $stderr.puts "-- print_stats_and_md5(v#{$print_stats_and_md5_version}): #{$print_stats_and_md5_counter - 1} files processed --" if (($print_stats_and_md5_counter) % opts_ping_frequency == 1 )
311
+
312
+ # puts "print_stats_and_md5: #{file}" if opts_verbose
313
+ stats = compute_stats_and_md5 file
314
+ # maybecolored_md5 = opts_color ? red(stats[:md5]) : stats[:md5]
315
+ # maybecolored_filename = opts_color ? azure(stats[:name]) : stats[:name]
316
+ # maybecolored_size = opts_color ? white(stats[:size]) : stats[:size]
317
+ mode = sprintf("%06o", stats[:mode]) rescue :ERROR # .to_s.right(4) #=> "100644"
318
+ file_type = stats[:stat_ftype][0] rescue '?'
319
+ file_type = 's' if File.symlink?(file)
320
+ content_type = `file --mime-type -b '#{file}' 2>/dev/null`.chomp # .split(': ',2)[1]
321
+
322
+ # colored_string = "[COL] #{red stats[:md5]} #{stats[:size]} #{stats[:stat_safebirthtime]} #{white stats[:name]}"
323
+ # boring_string = "[B/W] #{ stats[:md5]} #{stats[:size]} #{stats[:stat_safebirthtime]} #{ stats[:name]}"
324
+ # puts(opts_color ? colored_string : boring_string)
325
+ # puts "[OLDv1.1deprecated] #{maybecolored_md5} #{mode} #{file_type} #{maybecolored_size} #{stats[:stat_safebirthtime]} #{maybecolored_filename}"
326
+ print_colored_polymoprhic('file_v1.2', stats[:md5], mode, file_type, stats[:size], content_type,
327
+ stats[:stat_safebirthtime], stats[:name], opts)
288
328
  end
289
329
 
290
- def autowrite_to_dir_or_gcs(fs_type, path, opts={})
291
- autowrite_version = "1.1_28jun22"
292
- file_to_save = "stats-with-md5.log"
293
- path_to_save = path + "/" + file_to_save
294
- puts(red "TODO(ricc): write results [size=#{$autowrite_file.size}] in this Directory: #{path_to_save}") if fs_type == :dir
295
- puts(red "TODO(ricc): write results [size=#{$autowrite_file.size}] in this GCS Bucket or Dir: #{path_to_save}") if fs_type == :gcs
296
-
297
- ##########################################################################
298
- # do the thing: creates file locally and moves to GCS or Dir.
299
- ##########################################################################
300
- $autowrite_file += "# [autowrite_to_dir_or_gcs v#{autowrite_version}]: about to write it now. #{Time.now}"
301
- tmpfile = Tempfile.new(path_to_save)
302
- tmpfile.write($autowrite_file)
303
- cmd = :no_cmd_yet
304
-
305
- if fs_type == :gcs
306
- cmd = "gsutil mv '#{tmpfile.path}' #{path}/#{file_to_save}"
307
- end
308
- if fs_type == :dir
309
- cmd = "mv '#{tmpfile.path}' #{path}/#{file_to_save}"
310
- end
330
+ def autowrite_to_dir_or_gcs(fs_type, path, opts = {})
331
+ autowrite_version = "1.1_28jun22"
332
+ file_to_save = "stats-with-md5.log"
333
+ path_to_save = path + "/" + file_to_save
334
+ puts(red "TODO(ricc): write results [size=#{$autowrite_file.size}] in this Directory: #{path_to_save}") if fs_type == :dir
335
+ puts(red "TODO(ricc): write results [size=#{$autowrite_file.size}] in this GCS Bucket or Dir: #{path_to_save}") if fs_type == :gcs
336
+
337
+ ##########################################################################
338
+ # do the thing: creates file locally and moves to GCS or Dir.
339
+ ##########################################################################
340
+ $autowrite_file += "# [autowrite_to_dir_or_gcs v#{autowrite_version}]: about to write it now. #{Time.now}"
341
+ tmpfile = Tempfile.new(path_to_save)
342
+ tmpfile.write($autowrite_file)
343
+ cmd = :no_cmd_yet
344
+
345
+ if fs_type == :gcs
346
+ cmd = "gsutil mv '#{tmpfile.path}' #{path}/#{file_to_save}"
347
+ end
348
+ if fs_type == :dir
349
+ cmd = "mv '#{tmpfile.path}' #{path}/#{file_to_save}"
350
+ end
311
351
 
312
- puts("TMP File is this big: #{azure tmpfile.size}")
313
- puts("About to execute this: #{white cmd}")
314
- ret = `#{cmd}`
315
- if $?.exitstatus != 0
316
- puts "Error to execute: #{red cmd}. Exit: #{red $?.exitstatus}"
317
- exit 53
318
- end
319
- puts "Command returned: '#{$?.exitstatus}'" # - #{$?.class}"
320
- # wow: fork { exec("ls") }
321
-
322
- # removes the tmpfile :)
323
- tmpfile.close
324
- tmpfile.unlink
325
-
326
- #####################################
327
- # finished, lets now clean up the file...
328
- #####################################
329
- reset_autowrite_file()
330
- return :ok
352
+ puts("TMP File is this big: #{azure tmpfile.size}")
353
+ puts("About to execute this: #{white cmd}")
354
+ ret = `#{cmd}`
355
+ if $?.exitstatus != 0
356
+ puts "Error to execute: #{red cmd}. Exit: #{red $?.exitstatus}"
357
+ exit 53
358
+ end
359
+ puts "Command returned: '#{$?.exitstatus}'" # - #{$?.class}"
360
+ # wow: fork { exec("ls") }
361
+
362
+ # removes the tmpfile :)
363
+ tmpfile.close
364
+ tmpfile.unlink
365
+
366
+ #####################################
367
+ # finished, lets now clean up the file...
368
+ #####################################
369
+ reset_autowrite_file()
370
+ return :ok
331
371
  end
332
372
 
333
- def print_stats_and_md5_for_directory(directory_to_explore_recursively, opts={})
334
- puts "# [print_stats_and_md5_for_directory] DIR to explore (make sure you glob also): #{directory_to_explore_recursively }"
335
- puts "# DEB Options: #{opts}"
336
373
 
374
+ def print_stats_and_md5_for_directory_from_cli(directory_to_explore_recursively, opts={})
375
+ puts "# [print_stats_and_md5_for_directory_from_cli] v#{$print_stats_and_md5_version}] DIR to explore (make sure you glob also): #{directory_to_explore_recursively }"
337
376
  opts_autowrite = opts.fetch :autowrite, false
377
+ opts_verbose = opts.fetch( :verbose, $opts[:verbose])
378
+ opts_debug = opts.fetch :debug, $opts[:debug]
379
+ opts_color = opts.fetch :color, $opts[:color]
380
+ if opts_debug
381
+ puts "# DEB Options1 opts (FUNC) BAD: s #{opts}" # probably useless since this is invoked by CLI and has ful fledged options.
382
+ puts "# DEB Options2 $opts (ARGV) GOOD: #{$opts}" # This is what we ewant in this CLI. TODO(ricc): if you move this function to be used as function invert the priority...
383
+ puts("# DEB version=#{$print_stats_and_md5_version} host=#{Socket.gethostname}(#{`uname`.chomp}) created_on=#{Time.now}") # if ARGV.size > 0
384
+ end
338
385
 
339
386
  Dir.glob("#{(directory_to_explore_recursively)}/**/*") do |globbed_filename|
340
387
  # Do work on files & directories ending in .rb
341
388
  #puts "[deb] #{globbed_filename}"
342
- print_stats_and_md5(globbed_filename, :color => $opts[:color], :verbose => $opts[:verbose], :autowrite => $opts[:autowrite])
389
+ print_stats_and_md5(globbed_filename, :color => opts_color, :verbose => opts_verbose, :autowrite => opts_autowrite)
343
390
  end
344
391
 
345
- autowrite_to_dir_or_gcs(:dir, directory_to_explore_recursively) if opts_autowrite
346
- end
392
+ autowrite_to_dir_or_gcs(:dir, directory_to_explore_recursively) if opts_autowrite
393
+ end
347
394
 
348
395
  def real_program
349
396
  t0 = Time.now
350
397
  deb "+ Options are: #{gray $opts}"
351
398
  deb "+ Depured args: #{azure ARGV}"
352
399
  # Your code goes here...
353
- #puts "Description: '''#{white $myconf[:description] }'''"
354
- #puts("[print_stats_and_md5] version=#{$print_stats_and_md5_version} host=#{Socket.gethostname}(#{`uname`.chomp}) created_on=#{Time.now}") if ARGV.size > 0
400
+ # puts "Description: '''#{white $myconf[:description] }'''"
401
+ # puts("[print_stats_and_md5] version=#{$print_stats_and_md5_version} host=#{Socket.gethostname}(#{`uname`.chomp}) created_on=#{Time.now}") if ARGV.size > 0
355
402
 
356
403
  common_opts = {
357
- :max_files => $opts[:max_files],
358
- :color => $opts[:color],
359
- :verbose => $opts[:verbose],
360
- :autowrite => $opts[:autowrite],
404
+ :max_files => $opts[:max_files],
405
+ :color => $opts[:color],
406
+ :verbose => $opts[:verbose],
407
+ :autowrite => $opts[:autowrite],
361
408
  }
362
409
 
363
410
  if ARGV.size == 1
364
- directory_to_explore_recursively = ARGV[0]
411
+ directory_to_explore_recursively = ARGV[0]
365
412
  if smells_like_gcs?(directory_to_explore_recursively)
413
+ <<<<<<< HEAD
414
+ print_stats_and_md5_for_gcs(directory_to_explore_recursively, common_opts)
415
+ else # normal file..
416
+ print_stats_and_md5_for_directory(directory_to_explore_recursively, common_opts)
417
+ end
418
+ =======
366
419
  print_stats_and_md5_for_gcs(directory_to_explore_recursively, common_opts)
367
420
  else # normal file..
368
- print_stats_and_md5_for_directory(directory_to_explore_recursively, common_opts)
421
+ print_stats_and_md5_for_directory_from_cli(directory_to_explore_recursively, common_opts)
369
422
  end
423
+ >>>>>>> 97a64f1 (perfected CLI)
370
424
  elsif ARGV.size > 1
371
- deb "2. I expect a lot of single files or directories:"
425
+ deb "2. I expect a lot of single files or directories:"
372
426
  for arg in ARGV
427
+ <<<<<<< HEAD
428
+ if File.directory?(arg)
429
+ print_stats_and_md5_for_directory(arg, common_opts)
430
+ else
431
+ print_stats_and_md5(arg, common_opts)
432
+ end
433
+ =======
373
434
  if File.directory?(arg)
374
- print_stats_and_md5_for_directory(arg, common_opts )
435
+ print_stats_and_md5_for_directory_from_cli(arg, common_opts )
375
436
  else
376
437
  print_stats_and_md5(arg, common_opts)
377
438
  end
439
+ >>>>>>> 97a64f1 (perfected CLI)
378
440
  end
379
441
  else
380
- puts "No args given. Exiting"
381
- exit 41
442
+ puts "No args given. Exiting"
443
+ exit 41
382
444
  end
383
445
  tf = Time.now
384
- puts "# [#{File.basename $0}] Time taken for processing #{stats_and_md5_number_of_files_processed} files: #{tf-t0}"
446
+ <<<<<<< HEAD
447
+ puts "# [#{File.basename $0}] Time taken for processing #{stats_and_md5_number_of_files_processed} files: #{tf - t0}"
448
+ =======
449
+ #rounding to 3 decimals
450
+ approx_delta = (tf-t0).round(3)
451
+ puts "# [#{File.basename $0}] Time taken for processing #{stats_and_md5_number_of_files_processed} files: #{approx_delta} seconds"
452
+ >>>>>>> 97a64f1 (perfected CLI)
385
453
  end
386
454
 
387
455
  def main(filename)