storazzo 0.4.2 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +7 -4
- data/Makefile +20 -1
- data/README.md +23 -0
- data/Rakefile +35 -8
- data/VERSION +1 -1
- data/bin/hello-storazzo +5 -0
- data/bin/ricdisk-magic +50 -57
- data/bin/stats-with-md5 +268 -297
- data/lib/storazzo/colors.rb +43 -45
- data/lib/storazzo/common.rb +80 -17
- data/lib/storazzo/debug.rb +5 -8
- data/lib/storazzo/hashify.rb +44 -43
- data/lib/storazzo/main.rb +44 -40
- data/lib/storazzo/media/abstract_ric_disk.rb +163 -104
- data/lib/storazzo/media/gcs_bucket.rb +51 -15
- data/lib/storazzo/media/local_folder.rb +43 -46
- data/lib/storazzo/media/mount_point.rb +12 -2
- data/lib/storazzo/ric_disk.rb +223 -251
- data/lib/storazzo/ric_disk_config.rb +230 -193
- data/lib/storazzo/ric_disk_sample_config.rb +12 -16
- data/lib/storazzo/ric_disk_statsfile.rb +17 -16
- data/lib/storazzo/ric_disk_ugly.rb +35 -38
- data/lib/storazzo/version.rb +7 -7
- data/lib/storazzo.rb +34 -29
- data/storazzo.gemspec +22 -20
- data/test/media/test_abstract_ric_disk.rb +19 -0
- data/test/media/test_gcs_bucket.rb +58 -0
- data/test/media/test_local_folder.rb +145 -0
- data/test/media/test_mount_point.rb +25 -0
- data/test/test_ric_disk.rb +16 -0
- data/test/test_ric_disk_config.rb +20 -29
- data/test/test_ric_disk_stats_file.rb +13 -14
- data/test/test_storazzo.rb +26 -26
- data/var/dumps/file_stat.linux.yaml +15 -0
- data/var/dumps/file_stat.macosx.yaml +15 -0
- data/var/test/disks/disk02-full/Rakefile +13 -0
- data/var/test/disks/ricdisk_stats_v11.rds +11 -0
- metadata +38 -10
- data/test/test_gcs_bucket.rb +0 -70
- data/test/test_local_folder.rb +0 -121
data/bin/stats-with-md5
CHANGED
@@ -3,7 +3,7 @@
|
|
3
3
|
require 'digest/md5'
|
4
4
|
require 'digest'
|
5
5
|
require 'socket'
|
6
|
-
require 'optparse'
|
6
|
+
require 'optparse' # http://ruby.about.com/od/advancedruby/a/optionparser.htm
|
7
7
|
require 'date' # for DateTime
|
8
8
|
require 'tempfile'
|
9
9
|
|
@@ -30,36 +30,34 @@ DEFAULT_MAX_FILES = 'Infinite'
|
|
30
30
|
|
31
31
|
=end
|
32
32
|
|
33
|
-
|
34
|
-
|
35
|
-
def deb(s); puts "#DEB #{s}" if $DEBUG; end
|
33
|
+
def deb(s); puts "#DEB #{s}" if $DEBUG; end
|
36
34
|
# colors 16
|
37
|
-
def gray(s) "\033[1;30m#{s}\033[0m"
|
38
|
-
def green(s) "\033[1;32m#{s}\033[0m"
|
39
|
-
def red(s) "\033[1;31m#{s}\033[0m"
|
40
|
-
def yellow(s) "\033[1;33m#{s}\033[0m"
|
41
|
-
def blue(s)
|
42
|
-
def purple(s) "\033[1;35m#{s}\033[0m"
|
43
|
-
def azure(s) "\033[1;36m#{s}\033[0m"
|
44
|
-
def white(s) "\033[1;37m#{s}\033[0m"
|
35
|
+
def gray(s) "\033[1;30m#{s}\033[0m"; end
|
36
|
+
def green(s) "\033[1;32m#{s}\033[0m"; end
|
37
|
+
def red(s) "\033[1;31m#{s}\033[0m"; end
|
38
|
+
def yellow(s) "\033[1;33m#{s}\033[0m"; end
|
39
|
+
def blue(s) "\033[1;34m#{s}\033[0m"; end
|
40
|
+
def purple(s) "\033[1;35m#{s}\033[0m"; end
|
41
|
+
def azure(s) "\033[1;36m#{s}\033[0m"; end
|
42
|
+
def white(s) "\033[1;37m#{s}\033[0m"; end
|
45
43
|
|
46
44
|
# colors 64k
|
47
|
-
def orange(s)
|
45
|
+
def orange(s) "\033[38;5;208m#{s}\033[0m"; end
|
48
46
|
|
49
47
|
# Program constants, automagically picked up by RicLib
|
50
48
|
# More configuration could be written in:
|
51
49
|
# $GIC/etc/ricsvn/<FILENAME>.yml
|
52
50
|
# That would go into the variable '$prog_conf_d'
|
53
51
|
$myconf = {
|
54
|
-
|
55
|
-
|
52
|
+
:app_name => "AppName should be sth like #{$0}",
|
53
|
+
:description => "
|
56
54
|
This program gets stats from Ruby File.Stats library
|
57
55
|
plus computes MD5 or CRC32 of the file.
|
58
|
-
And bakes it in a way that can be easily parsed.
|
56
|
+
And bakes it in a way that can be easily parsed.
|
59
57
|
".strip.gsub(/^\s+/, "").gsub(/\s+$/, ""),
|
60
58
|
}
|
61
59
|
|
62
|
-
def usage(comment=nil)
|
60
|
+
def usage(comment = nil)
|
63
61
|
puts white($optparse.banner)
|
64
62
|
puts($optparse.summarize)
|
65
63
|
puts("Description: " + gray($myconf[:description]))
|
@@ -68,12 +66,12 @@ def usage(comment=nil)
|
|
68
66
|
end
|
69
67
|
|
70
68
|
def reset_autowrite_file()
|
71
|
-
|
72
|
-
|
73
|
-
end
|
69
|
+
$autowrite_file = "# #{File.basename $0} Autowrite v1.0 #{Time.now} reincarnation=#{$reset_autowrite_file_reincarnation}\n"
|
70
|
+
$reset_autowrite_file_reincarnation += 1 # ($reset_autowrite_file_reincarnation + 1) # rescue 1
|
71
|
+
end
|
74
72
|
|
75
73
|
# include it in main if you want a custome one
|
76
|
-
def init()
|
74
|
+
def init() # see lib_autoinit in lib/util.rb
|
77
75
|
$opts = {}
|
78
76
|
# setting defaults
|
79
77
|
$opts[:verbose] = false
|
@@ -87,335 +85,308 @@ def init() # see lib_autoinit in lib/util.rb
|
|
87
85
|
|
88
86
|
$optparse = OptionParser.new do |opts|
|
89
87
|
opts.banner = "#{$0} v.#{$PROG_VER}\n Usage: #{File.basename $0} [options] file1 file2 ..."
|
90
|
-
opts.on(
|
91
|
-
|
92
|
-
|
93
|
-
opts.on(
|
94
|
-
opts.on(
|
95
|
-
opts.on(
|
96
|
-
opts.on(
|
97
|
-
|
88
|
+
opts.on('-a', '--autowrite', 'automatically writes results to Root Folder (DFLT=false)') {
|
89
|
+
$opts[:autowrite] = true
|
90
|
+
}
|
91
|
+
opts.on('-c', '--no-color', 'disables color (DFLT=false, that is color enabled)') { $opts[:color] = false }
|
92
|
+
opts.on('-d', '--debug', 'enables debug (DFLT=false)') { $opts[:debug] = true; $DEBUG = true }
|
93
|
+
opts.on('-h', '--help', 'Display this screen') { usage }
|
94
|
+
opts.on('-m', '--max-files NUMBER', "sets max files per Dir to X (DFLT=#{DEFAULT_MAX_FILES})") { |nfiles|
|
95
|
+
$opts[:max_files] = nfiles.to_i
|
96
|
+
}
|
97
|
+
opts.on('-n', '--dryrun', "Don't really execute code") { $opts[:dryrun] = true }
|
98
|
+
opts.on('-l', '--logfile FILE', 'Write log to FILE') { |file| $opts[:logfile] = file }
|
99
|
+
opts.on('-v', '--verbose', 'Output more information') { $opts[:verbose] = true }
|
98
100
|
end
|
99
101
|
$optparse.parse!
|
100
102
|
end
|
101
103
|
|
102
104
|
def getSafeCreationTime(file)
|
103
|
-
|
105
|
+
File.ctime(file)
|
104
106
|
end
|
105
107
|
|
106
108
|
def compute_stats_and_md5(file)
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
# rdev=0x0 (0, 0),
|
144
|
-
# size=7,
|
145
|
-
# blksize=4096,
|
146
|
-
# blocks=8,
|
147
|
-
# atime=2022-06-27 08:49:38.586706861 +0200 (1656312578),
|
148
|
-
# mtime=2022-03-23 14:28:45 +0100 (1648042125),
|
149
|
-
# ctime=2022-05-30 10:12:10.381629305 +0200 (1653898330)>
|
150
|
-
ret[:size] = stats.size
|
151
|
-
ret[:mode] = stats.mode
|
152
|
-
# datetimes
|
153
|
-
ret[:stat_safebirthtime] = getSafeCreationTime(file) # in Mac uses birthtime,but on Linux wont work
|
154
|
-
# defined?(stats.birthtime) ? # rescue stats.mtime # eg, 2022-03-05 21:47:51 +0100 on Mac (not implemented on my Linuix)#
|
155
|
-
# stats.birthtime : # works on Mac
|
156
|
-
# stats.ctime
|
157
|
-
ret[:stat_mtime] = stats.mtime # eg, 2022-03-05 21:47:51 +0100 Returns the modification time of stat.
|
158
|
-
ret[:stat_ctime] = stats.ctime # eg, 2022-03-05 21:47:51 +0100 Returns the change time for stat (that is, the time directory information about the file was changed, not the file itself). Note that on Windows (NTFS), returns creation time (birth time).
|
159
|
-
ret[:stat_atime] = stats.atime # eg, 2022-03-05 21:47:51 +0100 Last Access
|
160
|
-
ret[:stat_gid] = stats.gid # Group Id
|
161
|
-
ret[:stat_uid] = stats.uid # UUID
|
162
|
-
ret[:stat_ftype] = stats.ftype #
|
163
|
-
|
164
|
-
ret[:md5] = '______________NONE______________'
|
165
|
-
if stats.ftype != "directory"
|
166
|
-
file_content = File.read(file)
|
167
|
-
ret[:md5] = Digest::MD5.hexdigest(file_content) # rescue 'none ' #=> string with hexadecimal digits
|
168
|
-
end
|
169
|
-
rescue Errno::EISDIR => e
|
170
|
-
#puts "It's a dir, nothing I can do here except skipping the stuff"
|
171
|
-
ret[:md5] = "_________I'm a dir sorry________"
|
172
|
-
rescue Exception => e
|
173
|
-
ret[:error] = e
|
174
|
-
end
|
175
|
-
ret
|
109
|
+
ret = {}
|
110
|
+
ret[:name] = file
|
111
|
+
|
112
|
+
begin
|
113
|
+
stats = File.stat(file)
|
114
|
+
ret[:stats_object] = stats # TODO deprecate
|
115
|
+
deb("Stats methods: #{stats.methods.sort.join(', ')}")
|
116
|
+
deb(stats.ctime)
|
117
|
+
# puts(stats.birthtime rescue (stats.ctime))
|
118
|
+
# On Mac/Linux: see test/dumps/***.yaml
|
119
|
+
ret[:size] = stats.size
|
120
|
+
ret[:mode] = stats.mode
|
121
|
+
# datetimes
|
122
|
+
ret[:stat_safebirthtime] = getSafeCreationTime(file) # in Mac uses birthtime,but on Linux wont work
|
123
|
+
# defined?(stats.birthtime) ? # rescue stats.mtime # eg, 2022-03-05 21:47:51 +0100 on Mac (not implemented on my Linuix)#
|
124
|
+
# stats.birthtime : # works on Mac
|
125
|
+
# stats.ctime
|
126
|
+
ret[:stat_mtime] = stats.mtime # eg, 2022-03-05 21:47:51 +0100 Returns the modification time of stat.
|
127
|
+
ret[:stat_ctime] = stats.ctime # eg, 2022-03-05 21:47:51 +0100 Returns the change time for stat (that is, the time directory information about the file was changed, not the file itself). Note that on Windows (NTFS), returns creation time (birth time).
|
128
|
+
ret[:stat_atime] = stats.atime # eg, 2022-03-05 21:47:51 +0100 Last Access
|
129
|
+
ret[:stat_gid] = stats.gid # Group Id
|
130
|
+
ret[:stat_uid] = stats.uid # UUID
|
131
|
+
ret[:stat_ftype] = stats.ftype #
|
132
|
+
|
133
|
+
ret[:md5] = '______________NONE______________'
|
134
|
+
if stats.ftype != "directory"
|
135
|
+
file_content = File.read(file)
|
136
|
+
ret[:md5] = Digest::MD5.hexdigest(file_content) # rescue 'none ' #=> string with hexadecimal digits
|
137
|
+
end
|
138
|
+
rescue Errno::EISDIR => e
|
139
|
+
# puts "It's a dir, nothing I can do here except skipping the stuff"
|
140
|
+
ret[:md5] = "_________I'm a dir sorry________"
|
141
|
+
rescue Exception => e
|
142
|
+
ret[:error] = e
|
143
|
+
end
|
144
|
+
ret
|
176
145
|
end
|
177
146
|
|
178
147
|
$print_stats_and_md5_version = "1.1a_220624_F" # files
|
179
148
|
$print_stats_and_md5_counter = 0
|
180
|
-
$print_stats_and_md5_for_gcs_version = "1.1alpha_220628_G" #GCS bucket
|
149
|
+
$print_stats_and_md5_for_gcs_version = "1.1alpha_220628_G" # GCS bucket
|
181
150
|
|
182
151
|
def stats_and_md5_number_of_files_processed()
|
183
|
-
|
152
|
+
return $print_stats_and_md5_counter
|
184
153
|
end
|
185
154
|
|
186
155
|
def smells_like_gcs?(file)
|
187
|
-
|
156
|
+
file =~ /gs:\/\//
|
188
157
|
end
|
189
158
|
|
190
159
|
# You give me gs://bucket123/path/to/dir
|
191
|
-
def print_stats_and_md5_for_gcs(mybucket_with_subdir, opts={})
|
192
|
-
# opts_color = opts.fetch :color, true#
|
193
|
-
# opts_verbose = opts.fetch :verbose, false
|
194
|
-
# opts_ping_frequency = opts.fetch :ping_frequency, 50
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
160
|
+
def print_stats_and_md5_for_gcs(mybucket_with_subdir, opts = {})
|
161
|
+
# opts_color = opts.fetch :color, true#
|
162
|
+
# opts_verbose = opts.fetch :verbose, false
|
163
|
+
# opts_ping_frequency = opts.fetch :ping_frequency, 50
|
164
|
+
opts_max_files = opts.fetch :max_files, nil # BigDecimal('Infinity')
|
165
|
+
opts_autowrite = opts.fetch :autowrite, false
|
166
|
+
|
167
|
+
raise "Wrong GCS Path: #{mybucket_with_subdir}" unless smells_like_gcs?(mybucket_with_subdir)
|
168
|
+
|
169
|
+
# puts "0. mybucket_with_subdir: #{mybucket_with_subdir}" # gs://mybucket123/path/to/dir
|
170
|
+
path_split = mybucket_with_subdir.split('/')
|
171
|
+
mybucket = path_split[0, 3].join('/') # gs://mybucket123/
|
172
|
+
gcs_subpath = path_split[3, 42].join('/') # path/to/dir
|
173
|
+
cleanedup_bucket = path_split[2] # mybucket123
|
174
|
+
# puts "1. mybucket: #{mybucket}"
|
175
|
+
# puts "2. gcs_subpath: #{gcs_subpath}"
|
176
|
+
# puts "3. cleanedup_bucket: #{cleanedup_bucket}"
|
177
|
+
|
178
|
+
puts("[print_stats_and_md5_for_gcs] version=#{$print_stats_and_md5_for_gcs_version} host=#{Socket.gethostname}(#{`uname`.chomp}) created_on='#{Time.now}' bucket=#{mybucket} subpath=#{gcs_subpath}")
|
179
|
+
begin
|
180
|
+
require "google/cloud/storage"
|
181
|
+
project_id = `gcloud config get project 2>/dev/null`
|
182
|
+
deb "project_id: #{project_id}"
|
183
|
+
storage = Google::Cloud::Storage.new(project_id: project_id)
|
184
|
+
bucket = storage.bucket(cleanedup_bucket)
|
185
|
+
# grabs them ALL if nil, optherwise its an integer.
|
186
|
+
files = opts_max_files.nil? ?
|
187
|
+
bucket.files : # nil -> all of them (I've tried sth more idiomatic like -1 or nil or Infinite. they all failed.)
|
188
|
+
bucket.files.first(opts_max_files) # not nil -> first(N)
|
189
|
+
# puts(files)
|
190
|
+
deb("All Sorted Methods: #{files.first.methods.sort}")
|
191
|
+
files.each do |gcs_file|
|
192
|
+
md5 = Base64.decode64(gcs_file.md5).unpack('H*')[0] # md5 in base64 -> then de-binarizied: Base64.urlsafe_decode64(md5).unpack('H*')[0]
|
193
|
+
size = gcs_file.size # crc rescue :boh
|
194
|
+
time_created = gcs_file.created_at
|
195
|
+
mode = 'XXXXXX' # for compatbility with files
|
196
|
+
file_type = gcs_file.name.to_s =~ /\/$/ ? 'd' : 'f' # if ends with a slash its a DIR :P
|
197
|
+
# puts("[OLD_GCS_v11] #{md5} #{size}\t#{time_created} [#{gcs_file.content_type}]\t#{gcs_file.name}")
|
198
|
+
print_colored_polymoprhic('gcs_v1.2', md5, mode, file_type, size, gcs_file.content_type, time_created,
|
199
|
+
gcs_file.name, opts)
|
200
|
+
deb gcs_file.to_yaml
|
201
|
+
end
|
202
|
+
# puts("Hash Methods: #{files.first.methods.select{|x| x.match /hash/}}")
|
203
|
+
rescue LoadError # require Exception
|
204
|
+
puts 'Error with library #{$!}, maybe type: gem install google-cloud-storage'
|
205
|
+
rescue Exception # generic Exception
|
206
|
+
puts "print_stats_and_md5_for_gcs(): Generic Error: #{$!}"
|
207
|
+
exit 1
|
208
|
+
end
|
209
|
+
autowrite_to_dir_or_gcs(:gcs, mybucket_with_subdir) if opts_autowrite
|
210
|
+
end
|
241
211
|
|
242
212
|
# Generic polymorpghic coloring, can be GCS. Would have been nice to pass a Dict so easier to extend, but Ruby doesnt support doesblt Dict with magic args. Plus might be a GOOD
|
243
213
|
# thing its hard to change since it breaks Storazzo.
|
244
214
|
# TODO(ricc): make sure the dates are the same. Currently:
|
245
215
|
# GCS Date: 2021-12-20T12:26:13+00:00 DateTime
|
246
216
|
# File Date: 2022-05-30 11:55:42 +0200 Time
|
247
|
-
def print_colored_polymoprhic(entity_type, md5, mode, file_type, size, content_type, creation_time, filename, opts={})
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
217
|
+
def print_colored_polymoprhic(entity_type, md5, mode, file_type, size, content_type, creation_time, filename, opts = {})
|
218
|
+
opts_color = opts.fetch :color, true
|
219
|
+
opts_verbose = opts.fetch :verbose, false
|
220
|
+
opts_ping_frequency = opts.fetch :ping_frequency, 50
|
221
|
+
opts_autowrite = opts.fetch :autowrite, false
|
222
|
+
|
223
|
+
# Increment counter across all!
|
224
|
+
$print_stats_and_md5_counter += 1 # global counter! I should increment at the END of fucntion.. but you never know if i exit wrongly so i do at beginning. so within this function the real #files is N-1 (i.e., if N is 6, we're processing file %5)
|
225
|
+
# in main i had to put 2 branched to invoke a single thing but if logic changes that sentence might be printed twice. Instead here, the BEGIN line could be nice to do here instead.
|
226
|
+
# $stderr.puts("print_stats_and_md5() Fikst invocation! Consider moving the 2 things in main here :)") if ($print_stats_and_md5_counter == 1)
|
227
|
+
# puts("[print_stats_and_md5] version=#{$print_stats_and_md5_version} host=#{Socket.gethostname}(#{`uname`.chomp}) created_on=#{Time.now}") if ($print_stats_and_md5_counter == 1)
|
228
|
+
$stderr.puts "-- print_stats_and_md5(v#{$print_stats_and_md5_version}): #{$print_stats_and_md5_counter - 1} files processed --" if (($print_stats_and_md5_counter) % opts_ping_frequency == 1)
|
229
|
+
|
230
|
+
maybecolored_md5 = opts_color ? red(md5) : md5
|
231
|
+
maybecolored_filename = opts_color ? azure(filename) : filename
|
232
|
+
enlarged_size = sprintf("%7o", size.to_s) # or SIZE
|
233
|
+
maybecolored_size = opts_color ? yellow(enlarged_size) : enlarged_size
|
234
|
+
maybecolored_file_type = file_type
|
235
|
+
colored_content_type = opts_color ? white(content_type) : content_type
|
236
|
+
if opts_color
|
237
|
+
maybecolored_file_type = case file_type
|
238
|
+
when 'f' then green(file_type)
|
239
|
+
when 'd' then blue(file_type)
|
240
|
+
when 's' then azure(file_type)
|
241
|
+
else red(file_type)
|
242
|
+
end
|
243
|
+
end
|
244
|
+
standardized_creation_time = creation_time.is_a?(DateTime) ?
|
245
|
+
creation_time :
|
246
|
+
DateTime.parse(creation_time.to_s) # if you prefer to use Time since its already supported by Storazzo (but its ugly since it has SPACES! so hard to parse) use tt = Time.parse(d.to_s)
|
247
|
+
# as per https://stackoverflow.com/questions/279769/convert-to-from-datetime-and-time-in-ruby
|
248
|
+
# puts "#{maybecolored_md5} #{mode} #{maybecolored_file_type} #{maybecolored_size}\t#{creation_time}(DEB #{creation_time.class})(DEB2 #{DateTime.parse(creation_time.to_s)}) [#{colored_content_type}] #{maybecolored_filename}"
|
249
|
+
# this mightr be colored
|
250
|
+
str = "[#{entity_type}] #{maybecolored_md5} #{mode} #{maybecolored_file_type} #{standardized_creation_time} #{maybecolored_size} [#{colored_content_type}] #{maybecolored_filename}\n"
|
251
|
+
# this will NEVER be colored. WARNING, now you need to maintain TWO of these beasts :/
|
252
|
+
non_colored_str = "[#{entity_type}] #{md5} #{mode} #{file_type} #{standardized_creation_time} #{enlarged_size} [#{content_type}] #{filename}\n"
|
253
|
+
if (opts_autowrite)
|
254
|
+
# need to guarantee this is NOT colored. The easiest way to do it is TWICVE as expensive but... whatevs.
|
255
|
+
# TODO(ricc): fire me for this lazimness!
|
256
|
+
$autowrite_file += (non_colored_str)
|
257
|
+
end
|
258
|
+
print(str)
|
259
|
+
return str
|
290
260
|
end
|
291
261
|
|
292
|
-
def print_stats_and_md5(file, opts={})
|
293
|
-
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
|
301
|
-
|
302
|
-
|
303
|
-
|
304
|
-
|
305
|
-
|
306
|
-
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
|
312
|
-
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
262
|
+
def print_stats_and_md5(file, opts = {})
|
263
|
+
return print_stats_and_md5_for_gcs(file, opts) if smells_like_gcs?(file)
|
264
|
+
|
265
|
+
opts_color = opts.fetch :color, true
|
266
|
+
opts_verbose = opts.fetch :verbose, false
|
267
|
+
opts_ping_frequency = opts.fetch :ping_frequency, 50
|
268
|
+
|
269
|
+
# $print_stats_and_md5_counter += 1 # global counter! I should increment at the END of fucntion.. but you never know if i exit wrongly so i do at beginning. so within this function the real #files is N-1 (i.e., if N is 6, we're processing file %5)
|
270
|
+
|
271
|
+
# # in main i had to put 2 branched to invoke a single thing but if logic changes that sentence might be printed twice. Instead here, the BEGIN line could be nice to do here instead.
|
272
|
+
# #$stderr.puts("print_stats_and_md5() Fikst invocation! Consider moving the 2 things in main here :)") if ($print_stats_and_md5_counter == 1)
|
273
|
+
# puts("[print_stats_and_md5] version=#{$print_stats_and_md5_version} host=#{Socket.gethostname}(#{`uname`.chomp}) created_on=#{Time.now}") if ($print_stats_and_md5_counter == 1)
|
274
|
+
# $stderr.puts "-- print_stats_and_md5(v#{$print_stats_and_md5_version}): #{$print_stats_and_md5_counter - 1} files processed --" if (($print_stats_and_md5_counter) % opts_ping_frequency == 1 )
|
275
|
+
|
276
|
+
# puts "print_stats_and_md5: #{file}" if opts_verbose
|
277
|
+
stats = compute_stats_and_md5 file
|
278
|
+
# maybecolored_md5 = opts_color ? red(stats[:md5]) : stats[:md5]
|
279
|
+
# maybecolored_filename = opts_color ? azure(stats[:name]) : stats[:name]
|
280
|
+
# maybecolored_size = opts_color ? white(stats[:size]) : stats[:size]
|
281
|
+
mode = sprintf("%06o", stats[:mode]) rescue :ERROR # .to_s.right(4) #=> "100644"
|
282
|
+
file_type = stats[:stat_ftype][0] rescue '?'
|
283
|
+
file_type = 's' if File.symlink?(file)
|
284
|
+
content_type = `file --mime-type -b '#{file}' 2>/dev/null`.chomp # .split(': ',2)[1]
|
285
|
+
|
286
|
+
# colored_string = "[COL] #{red stats[:md5]} #{stats[:size]} #{stats[:stat_safebirthtime]} #{white stats[:name]}"
|
287
|
+
# boring_string = "[B/W] #{ stats[:md5]} #{stats[:size]} #{stats[:stat_safebirthtime]} #{ stats[:name]}"
|
288
|
+
# puts(opts_color ? colored_string : boring_string)
|
289
|
+
# puts "[OLDv1.1deprecated] #{maybecolored_md5} #{mode} #{file_type} #{maybecolored_size} #{stats[:stat_safebirthtime]} #{maybecolored_filename}"
|
290
|
+
print_colored_polymoprhic('file_v1.2', stats[:md5], mode, file_type, stats[:size], content_type,
|
291
|
+
stats[:stat_safebirthtime], stats[:name], opts)
|
322
292
|
end
|
323
293
|
|
324
|
-
def autowrite_to_dir_or_gcs(fs_type, path, opts={})
|
325
|
-
|
326
|
-
|
327
|
-
|
328
|
-
|
329
|
-
|
330
|
-
|
331
|
-
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
|
342
|
-
|
343
|
-
|
344
|
-
|
345
|
-
|
346
|
-
|
347
|
-
|
348
|
-
|
349
|
-
|
350
|
-
|
351
|
-
|
352
|
-
|
353
|
-
|
354
|
-
|
355
|
-
|
356
|
-
|
357
|
-
|
358
|
-
|
359
|
-
|
360
|
-
|
361
|
-
|
362
|
-
|
363
|
-
|
364
|
-
|
294
|
+
def autowrite_to_dir_or_gcs(fs_type, path, opts = {})
|
295
|
+
autowrite_version = "1.1_28jun22"
|
296
|
+
file_to_save = "stats-with-md5.log"
|
297
|
+
path_to_save = path + "/" + file_to_save
|
298
|
+
puts(red "TODO(ricc): write results [size=#{$autowrite_file.size}] in this Directory: #{path_to_save}") if fs_type == :dir
|
299
|
+
puts(red "TODO(ricc): write results [size=#{$autowrite_file.size}] in this GCS Bucket or Dir: #{path_to_save}") if fs_type == :gcs
|
300
|
+
|
301
|
+
##########################################################################
|
302
|
+
# do the thing: creates file locally and moves to GCS or Dir.
|
303
|
+
##########################################################################
|
304
|
+
$autowrite_file += "# [autowrite_to_dir_or_gcs v#{autowrite_version}]: about to write it now. #{Time.now}"
|
305
|
+
tmpfile = Tempfile.new(path_to_save)
|
306
|
+
tmpfile.write($autowrite_file)
|
307
|
+
cmd = :no_cmd_yet
|
308
|
+
|
309
|
+
if fs_type == :gcs
|
310
|
+
cmd = "gsutil mv '#{tmpfile.path}' #{path}/#{file_to_save}"
|
311
|
+
end
|
312
|
+
if fs_type == :dir
|
313
|
+
cmd = "mv '#{tmpfile.path}' #{path}/#{file_to_save}"
|
314
|
+
end
|
315
|
+
|
316
|
+
puts("TMP File is this big: #{azure tmpfile.size}")
|
317
|
+
puts("About to execute this: #{white cmd}")
|
318
|
+
ret = `#{cmd}`
|
319
|
+
if $?.exitstatus != 0
|
320
|
+
puts "Error to execute: #{red cmd}. Exit: #{red $?.exitstatus}"
|
321
|
+
exit 53
|
322
|
+
end
|
323
|
+
puts "Command returned: '#{$?.exitstatus}'" # - #{$?.class}"
|
324
|
+
# wow: fork { exec("ls") }
|
325
|
+
|
326
|
+
# removes the tmpfile :)
|
327
|
+
tmpfile.close
|
328
|
+
tmpfile.unlink
|
329
|
+
|
330
|
+
#####################################
|
331
|
+
# finished, lets now clean up the file...
|
332
|
+
#####################################
|
333
|
+
reset_autowrite_file()
|
334
|
+
return :ok
|
365
335
|
end
|
366
336
|
|
367
|
-
def print_stats_and_md5_for_directory(directory_to_explore_recursively, opts={})
|
368
|
-
|
369
|
-
|
337
|
+
def print_stats_and_md5_for_directory(directory_to_explore_recursively, opts = {})
|
338
|
+
puts "# [print_stats_and_md5_for_directory] DIR to explore (make sure you glob also): #{directory_to_explore_recursively}"
|
339
|
+
puts "# DEB Options: #{opts}"
|
370
340
|
|
371
|
-
|
341
|
+
opts_autowrite = opts.fetch :autowrite, false
|
372
342
|
|
373
|
-
|
374
|
-
|
375
|
-
|
376
|
-
|
377
|
-
|
343
|
+
Dir.glob("#{(directory_to_explore_recursively)}/**/*") do |globbed_filename|
|
344
|
+
# Do work on files & directories ending in .rb
|
345
|
+
# puts "[deb] #{globbed_filename}"
|
346
|
+
print_stats_and_md5(globbed_filename, :color => $opts[:color], :verbose => $opts[:verbose],
|
347
|
+
:autowrite => $opts[:autowrite])
|
348
|
+
end
|
378
349
|
|
379
|
-
|
380
|
-
end
|
350
|
+
autowrite_to_dir_or_gcs(:dir, directory_to_explore_recursively) if opts_autowrite
|
351
|
+
end
|
381
352
|
|
382
353
|
def real_program
|
383
354
|
t0 = Time.now
|
384
355
|
deb "+ Options are: #{gray $opts}"
|
385
356
|
deb "+ Depured args: #{azure ARGV}"
|
386
357
|
# Your code goes here...
|
387
|
-
#puts "Description: '''#{white $myconf[:description] }'''"
|
388
|
-
#puts("[print_stats_and_md5] version=#{$print_stats_and_md5_version} host=#{Socket.gethostname}(#{`uname`.chomp}) created_on=#{Time.now}") if ARGV.size > 0
|
358
|
+
# puts "Description: '''#{white $myconf[:description] }'''"
|
359
|
+
# puts("[print_stats_and_md5] version=#{$print_stats_and_md5_version} host=#{Socket.gethostname}(#{`uname`.chomp}) created_on=#{Time.now}") if ARGV.size > 0
|
389
360
|
|
390
361
|
common_opts = {
|
391
|
-
|
392
|
-
|
393
|
-
|
394
|
-
|
362
|
+
:max_files => $opts[:max_files],
|
363
|
+
:color => $opts[:color],
|
364
|
+
:verbose => $opts[:verbose],
|
365
|
+
:autowrite => $opts[:autowrite],
|
395
366
|
}
|
396
367
|
|
397
368
|
if ARGV.size == 1
|
398
|
-
|
369
|
+
directory_to_explore_recursively = ARGV[0]
|
399
370
|
if smells_like_gcs?(directory_to_explore_recursively)
|
400
|
-
|
401
|
-
|
402
|
-
|
403
|
-
|
371
|
+
print_stats_and_md5_for_gcs(directory_to_explore_recursively, common_opts)
|
372
|
+
else # normal file..
|
373
|
+
print_stats_and_md5_for_directory(directory_to_explore_recursively, common_opts)
|
374
|
+
end
|
404
375
|
elsif ARGV.size > 1
|
405
|
-
|
376
|
+
deb "2. I expect a lot of single files or directories:"
|
406
377
|
for arg in ARGV
|
407
|
-
|
408
|
-
|
409
|
-
|
410
|
-
|
411
|
-
|
378
|
+
if File.directory?(arg)
|
379
|
+
print_stats_and_md5_for_directory(arg, common_opts)
|
380
|
+
else
|
381
|
+
print_stats_and_md5(arg, common_opts)
|
382
|
+
end
|
412
383
|
end
|
413
384
|
else
|
414
|
-
|
415
|
-
|
385
|
+
puts "No args given. Exiting"
|
386
|
+
exit 41
|
416
387
|
end
|
417
388
|
tf = Time.now
|
418
|
-
puts "# [#{File.basename $0}] Time taken for processing #{stats_and_md5_number_of_files_processed} files: #{tf-t0}"
|
389
|
+
puts "# [#{File.basename $0}] Time taken for processing #{stats_and_md5_number_of_files_processed} files: #{tf - t0}"
|
419
390
|
end
|
420
391
|
|
421
392
|
def main(filename)
|