rda-python-common 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,3118 @@
1
+ #
2
+ ###############################################################################
3
+ #
4
+ # Title : PgFile.py
5
+ # Author : Zaihua Ji, zji@ucar.edu
6
+ # Date : 08/05/2020
7
+ # 2025-01-10 transferred to package rda_python_common from
8
+ # https://github.com/NCAR/rda-shared-libraries.git
9
+ # Purpose : python library module to copy, move and delete data files locally
10
+ # and remotely
11
+ #
12
+ # Github : https://github.com/NCAR/rda-python-common.git
13
+ #
14
+ ###############################################################################
15
+ #
16
+ import sys
17
+ import os
18
+ from os import path as op
19
+ import pwd
20
+ import grp
21
+ import stat
22
+ import re
23
+ import time
24
+ import glob
25
+ import json
26
+ from . import PgLOG
27
+ from . import PgUtil
28
+ from . import PgSIG
29
+ from . import PgDBI
30
+
31
+ CMDBTH = (0x0033) # return both stdout and stderr, 16 + 32 + 2 + 1
32
+ RETBTH = (0x0030) # return both stdout and stderr, 16 + 32
33
+ CMDRET = (0x0110) # return stdout and save error, 16 + 256
34
+ CMDERR = (0x0101) # display command and save error, 1 + 256
35
+ CMDGLB = (0x0313) # return stdout and save error for globus, 1+2+16+256+512
36
+
37
+ PGCMPS = {
38
+ # extension Compress Uncompress ArchiveFormat
39
+ 'Z' : ['compress -f', 'uncompress -f', 'Z'],
40
+ 'zip' : ['zip', 'unzip', 'ZIP'],
41
+ 'gz' : ['gzip', 'gunzip', 'GZ'],
42
+ 'xz' : ['xz', 'unxz', 'XZ'],
43
+ 'bz2' : ['bzip2', 'bunzip2', 'BZ2']
44
+ }
45
+ CMPSTR = '|'.join(PGCMPS)
46
+
47
+ PGTARS = {
48
+ # extension Packing Unpacking ArchiveFormat
49
+ 'tar' : ['tar -cvf', 'tar -xvf', 'TAR'],
50
+ 'tar.Z' : ['tar -Zcvf', 'tar -xvf', 'TAR.Z'],
51
+ 'zip' : ['zip -v', 'unzip -v', 'ZIP'],
52
+ 'tgz' : ['tar -zcvf', 'tar -xvf', 'TGZ'],
53
+ 'tar.gz' : ['tar -zcvf', 'tar -xvf', 'TAR.GZ'],
54
+ 'txz' : ['tar -cvJf', 'tar -xvf', 'TXZ'],
55
+ 'tar.xz' : ['tar -cvJf', 'tar -xvf', 'TAR.XZ'],
56
+ 'tbz2' : ['tar -cvjf', 'tar -xvf', 'TBZ2'],
57
+ 'tar.bz2' : ['tar -cvjf', 'tar -xvf', 'TAR.BZ2']
58
+ }
59
+
60
+ TARSTR = '|'.join(PGTARS)
61
+ DELDIRS = {}
62
+ TASKIDS = {} # cache unfinished
63
+ MD5CMD = 'md5sum'
64
+ SHA512CMD = 'sha512sum'
65
+ LHOST = "localhost"
66
+ OHOST = PgLOG.PGLOG['OBJCTSTR']
67
+ BHOST = PgLOG.PGLOG['BACKUPNM']
68
+ DHOST = PgLOG.PGLOG['DRDATANM']
69
+ OBJCTCMD = "isd_s3_cli"
70
+ BACKCMD = "dsglobus"
71
+
72
+ HLIMIT = 0 # HTAR file count limit
73
+ BLIMIT = 2 # minimum back tar file size in DB
74
+ DIRLVLS = 0
75
+
76
+ # record how many errors happen for working with HPSS, local or remote machines
77
+ ECNTS = {'D' : 0, 'H' : 0, 'L' : 0, 'R' : 0, 'O' : 0, 'B' : 0}
78
+ # up limits for how many continuing errors allowed
79
+ ELMTS = {'D' : 20, 'H' : 20, 'L' : 20, 'R' : 20, 'O' : 10, 'B' : 10}
80
+
81
+ # down storage hostnames & paths
82
+ DHOSTS = {
83
+ 'G' : PgLOG.PGLOG['GPFSNAME'],
84
+ 'O' : OHOST,
85
+ 'B' : BHOST,
86
+ 'D' : DHOST
87
+ }
88
+
89
+ DPATHS = {
90
+ 'G' : PgLOG.PGLOG['DSSDATA'],
91
+ 'O' : PgLOG.PGLOG['OBJCTBKT'],
92
+ 'B' : '/' + PgLOG.PGLOG['DEFDSID'], # backup globus endpoint
93
+ 'D' : '/' + PgLOG.PGLOG['DEFDSID'] # disaster recovery globus endpoint
94
+ }
95
+
96
+ QSTATS = {
97
+ 'A' : 'ACTIVE',
98
+ 'I' : 'INACTIVE',
99
+ 'S' : 'SUCCEEDED',
100
+ 'F' : 'FAILED',
101
+ }
102
+
103
+ QPOINTS = {
104
+ 'L' : 'gdex-glade',
105
+ 'B' : 'gdex-quasar',
106
+ 'D' : 'gdex-quasar-drdata'
107
+ }
108
+
109
+ QHOSTS = {
110
+ 'gdex-glade' : LHOST,
111
+ 'gdex-quasar' : BHOST,
112
+ 'gdex-quasar-drdata' : DHOST
113
+ }
114
+
115
+ ENDPOINTS = {
116
+ 'gdex-glade' : "NCAR GDEX GLADE",
117
+ 'gdex-quasar' : "NCAR GDEX Quasar",
118
+ 'gdex-quasar-drdata' : "NCAR GDEX Quasar DRDATA"
119
+ }
120
+
121
+ BFILES = {} # cache backup file names and dates for each bid
122
+
123
+ #
124
+ # reset the up limit for a specified error type
125
+ #
126
+ def reset_error_limit(etype, lmt):
127
+
128
+ ELMTS[etype] = lmt
129
+
130
+ #
131
+ # wrapping PgLOG.pglog() to show error and no fatal exit at the first call for retry
132
+ #
133
+ def errlog(msg, etype, retry = 0, logact = 0):
134
+
135
+ bckgrnd = PgLOG.PGLOG['BCKGRND']
136
+ logact |= PgLOG.ERRLOG
137
+ if not retry:
138
+ if msg and not re.search(r'\n$', msg): msg += "\n"
139
+ msg += "[The same execution will be retried in {} Seconds]".format(PgSIG.PGSIG['ETIME'])
140
+ PgLOG.PGLOG['BCKGRND'] = 1
141
+ logact &= ~(PgLOG.EMEROL|PgLOG.EXITLG)
142
+ elif ELMTS[etype]:
143
+ ECNTS[etype] += 1
144
+ if ECNTS[etype] >= ELMTS[etype]:
145
+ logact |= PgLOG.EXITLG
146
+ ECNTS[etype] = 0
147
+
148
+ if PgLOG.PGLOG['DSCHECK'] and logact&PgLOG.EXITLG: PgDBI.record_dscheck_error(msg, logact)
149
+ PgLOG.pglog(msg, logact)
150
+ PgLOG.PGLOG['BCKGRND'] = bckgrnd
151
+ if not retry: time.sleep(PgSIG.PGSIG['ETIME'])
152
+
153
+ return PgLOG.FAILURE
154
+
155
+ #
156
+ # Copy a file from one host (including local host) to an another host (including local host)
157
+ # excluding copy file from remote host to remote host copying in background is permitted
158
+ #
159
+ # tofile - target file name
160
+ # fromfile - source file name
161
+ # tohost - target host name, default to LHOST
162
+ # fromhost - original host name, default to LHOST
163
+ #
164
+ # Return 1 if successful 0 if failed with error message generated in PgLOG.pgsystem() cached in PgLOG.PGLOG['SYSERR']
165
+ #
166
+ def copy_gdex_file(tofile, fromfile, tohost = LHOST, fromhost = LHOST, logact = 0):
167
+
168
+ thost = strip_host_name(tohost)
169
+ fhost = strip_host_name(fromhost)
170
+
171
+ if PgUtil.pgcmp(thost, fhost, 1) == 0:
172
+ if PgUtil.pgcmp(thost, LHOST, 1) == 0:
173
+ return local_copy_local(tofile, fromfile, logact)
174
+ elif PgUtil.pgcmp(fhost, LHOST, 1) == 0:
175
+ if PgUtil.pgcmp(thost, OHOST, 1) == 0:
176
+ return local_copy_object(tofile, fromfile, None, None, logact)
177
+ elif PgUtil.pgcmp(thost, BHOST, 1) == 0:
178
+ return local_copy_backup(tofile, fromfile, QPOINTS['B'], logact)
179
+ elif PgUtil.pgcmp(thost, DHOST, 1) == 0:
180
+ return local_copy_backup(tofile, fromfile, QPOINTS['D'], logact)
181
+ else:
182
+ return local_copy_remote(tofile, fromfile, tohost, logact)
183
+ elif PgUtil.pgcmp(thost, LHOST, 1) == 0:
184
+ if PgUtil.pgcmp(fhost, OHOST, 1) == 0:
185
+ return object_copy_local(tofile, fromfile, None, logact)
186
+ elif PgUtil.pgcmp(fhost, BHOST, 1) == 0:
187
+ return backup_copy_local(tofile, fromfile, QPOINTS['B'], logact)
188
+ elif PgUtil.pgcmp(fhost, DHOST, 1) == 0:
189
+ return backup_copy_local(tofile, fromfile, QPOINTS['D'], logact)
190
+ else:
191
+ return remote_copy_local(tofile, fromfile, fromhost)
192
+
193
+ return errlog("{}-{}->{}-{}: Cannot copy file".format(fhost, fromfile, thost, tofile), 'O', 1, PgLOG.LGEREX)
194
+
195
+ copy_rda_file = copy_gdex_file
196
+
197
+ #
198
+ # Copy a file locally
199
+ #
200
+ # tofile - target file name
201
+ # fromfile - source file name
202
+ #
203
+ def local_copy_local(tofile, fromfile, logact = 0):
204
+
205
+ finfo = check_local_file(fromfile, 0, logact)
206
+ if not finfo:
207
+ if finfo != None: return PgLOG.FAILURE
208
+ return lmsg(fromfile, "{} to copy to {}".format(PgLOG.PGLOG['MISSFILE'], tofile), logact)
209
+
210
+ target = tofile
211
+ ms = re.match(r'^(.+)/$', tofile)
212
+ if ms:
213
+ dir = ms.group(1)
214
+ tofile += op.basename(fromfile)
215
+ else:
216
+ dir = get_local_dirname(tofile)
217
+
218
+ if not make_local_directory(dir, logact): return PgLOG.FAILURE
219
+
220
+ cmd = "cp -{} {} {}".format(('f' if finfo['isfile'] else "rf"), fromfile, target)
221
+ reset = loop = 0
222
+ while((loop-reset) < 2):
223
+ info = None
224
+ PgLOG.PGLOG['ERR2STD'] = ['are the same file']
225
+ ret = PgLOG.pgsystem(cmd, logact, CMDERR)
226
+ PgLOG.PGLOG['ERR2STD'] = []
227
+ if ret:
228
+ info = check_local_file(tofile, 143, logact) # 1+2+4+8+128
229
+ if info:
230
+ if not info['isfile']:
231
+ set_local_mode(tofile, 0, 0, info['mode'], info['logname'], logact)
232
+ return PgLOG.SUCCESS
233
+ elif info['data_size'] == finfo['data_size']:
234
+ set_local_mode(tofile, 1, 0, info['mode'], info['logname'], logact)
235
+ return PgLOG.SUCCESS
236
+ elif info != None:
237
+ break
238
+
239
+ if PgLOG.PGLOG['SYSERR']:
240
+ errmsg = PgLOG.PGLOG['SYSERR']
241
+ else:
242
+ errmsg = "Error of '{}': Miss target file {}".format(cmd, tofile)
243
+ errlog(errmsg, 'L', (loop - reset), logact)
244
+ if loop == 0: reset = reset_local_info(tofile, info, logact)
245
+ loop += 1
246
+
247
+ return PgLOG.FAILURE
248
+
249
+ #
250
+ # Copy a local file to a remote host
251
+ #
252
+ # tofile - target file name
253
+ # fromfile - source file name
254
+ # host - remote host name
255
+ #
256
+ def local_copy_remote(tofile, fromfile, host, logact = 0):
257
+
258
+ finfo = check_local_file(fromfile, 0, logact)
259
+ if not finfo:
260
+ if finfo != None: return PgLOG.FAILURE
261
+ return lmsg(fromfile, "{} to copy to {}-{}".format(PgLOG.PGLOG['MISSFILE'], host, tofile), logact)
262
+
263
+ target = tofile
264
+ ms = re.match(r'^(.+)/$', tofile)
265
+ if ms:
266
+ dir = ms.group(1)
267
+ tofile += op.basename(fromfile)
268
+ else:
269
+ dir = op.dirname(tofile)
270
+
271
+ if not make_remote_directory(dir, host, logact): return PgLOG.FAILURE
272
+
273
+ cmd = PgLOG.get_sync_command(host)
274
+ cmd += " {} {}".format(fromfile, target)
275
+ for loop in range(2):
276
+ if PgLOG.pgsystem(cmd, logact, CMDERR):
277
+ info = check_remote_file(tofile, host, 0, logact)
278
+ if info:
279
+ if not finfo['isfile']:
280
+ set_remote_mode(tofile, 0, host, PgLOG.PGLOG['EXECMODE'])
281
+ return PgLOG.SUCCESS
282
+ elif info['data_size'] == finfo['data_size']:
283
+ set_remote_mode(tofile, 1, host, PgLOG.PGLOG['FILEMODE'])
284
+ return PgLOG.SUCCESS
285
+ elif info != None:
286
+ break
287
+
288
+ errlog(PgLOG.PGLOG['SYSERR'], 'R', loop, logact)
289
+
290
+ return PgLOG.FAILURE
291
+
292
+ #
293
+ # Copy a local file to object store
294
+ #
295
+ # tofile - target file name
296
+ # fromfile - source file name
297
+ # bucket - bucket name on Object store
298
+ # meta - reference to metadata hash
299
+ #
300
+ def local_copy_object(tofile, fromfile, bucket = None, meta = None, logact = 0):
301
+
302
+ if not bucket: bucket = PgLOG.PGLOG['OBJCTBKT']
303
+ if meta is None: meta = {}
304
+ if 'user' not in meta: meta['user'] = PgLOG.PGLOG['CURUID']
305
+ if 'group' not in meta: meta['group'] = PgLOG.PGLOG['GDEXGRP']
306
+ uinfo = json.dumps(meta)
307
+
308
+ finfo = check_local_file(fromfile, 0, logact)
309
+ if not finfo:
310
+ if finfo != None: return PgLOG.FAILURE
311
+ return lmsg(fromfile, "{} to copy to {}-{}".format(PgLOG.PGLOG['MISSFILE'], OHOST, tofile), logact)
312
+
313
+ if not logact&PgLOG.OVRIDE:
314
+ tinfo = check_object_file(tofile, bucket, 0, logact)
315
+ if tinfo and tinfo['data_size'] > 0:
316
+ return PgLOG.pglog("{}-{}-{}: file exists already".format(OHOST, bucket, tofile), logact)
317
+
318
+ cmd = "{} ul -lf {} -b {} -k {} -md '{}'".format(OBJCTCMD, fromfile, bucket, tofile, uinfo)
319
+ for loop in range(2):
320
+ buf = PgLOG.pgsystem(cmd, logact, CMDBTH)
321
+ tinfo = check_object_file(tofile, bucket, 0, logact)
322
+ if tinfo:
323
+ if tinfo['data_size'] == finfo['data_size']:
324
+ return PgLOG.SUCCESS
325
+ elif tinfo != None:
326
+ break
327
+
328
+ errlog("Error Execute: {}\n{}".format(cmd, buf), 'O', loop, logact)
329
+
330
+ return PgLOG.FAILURE
331
+
332
+ #
333
+ # Copy multiple files from a Globus endpoint to another
334
+ # tofiles - target file name list, echo name leading with /dsnnn.n/ on Quasar and
335
+ # leading with /data/ or /decsdata/ on local glade disk
336
+ # fromfiles - source file name list, the same format as the tofiles
337
+ # topoint - target endpoint name, 'gdex-glade', 'gdex-quasar' or 'gdex-quasar-dgdexta'
338
+ # frompoint - source endpoint name, the same choices as the topoint
339
+ #
340
+ def quasar_multiple_trasnfer(tofiles, fromfiles, topoint, frompoint, logact = 0):
341
+
342
+ ret = PgLOG.FAILURE
343
+
344
+ fcnt = len(fromfiles)
345
+ transfer_files = {"files": []}
346
+ for i in range(fcnt):
347
+ transfer_files["files"].append({
348
+ "source_file": fromfiles[i],
349
+ "destination_file": tofiles[i]
350
+ })
351
+ qstr = json.dumps(transfer_files)
352
+
353
+ action = 'transfer'
354
+ source_endpoint = frompoint
355
+ destination_endpoint = topoint
356
+ label = f"{ENDPOINTS[frompoint]} to {ENDPOINTS[topoint]} {action}"
357
+ verify_checksum = True
358
+
359
+ cmd = f'{BACKCMD} {action} -se {source_endpoint} -de {destination_endpoint} --label "{label}"'
360
+ if verify_checksum:
361
+ cmd += ' -vc'
362
+ cmd += ' --batch -'
363
+
364
+ task = submit_globus_task(cmd, topoint, logact, qstr)
365
+ if task['stat'] == 'S':
366
+ ret = PgLOG.SUCCESS
367
+ elif task['stat'] == 'A':
368
+ TASKIDS["{}-{}".format(topoint, tofiles[0])] = task['id']
369
+ ret = PgLOG.FINISH
370
+
371
+ return ret
372
+
373
+ #
374
+ # Copy a file from a Globus endpoint to another
375
+
376
+ # tofile - target file name, leading with /dsnnn.n/ on Quasar and
377
+ # leading with /data/ or /decsdata/ on local glade disk
378
+ # fromfile - source file, the same format as the tofile
379
+ # topoint - target endpoint name, 'gdex-glade', 'gdex-quasar' or 'gdex-quasar-dgdexta'
380
+ # frompoint - source endpoint name, the same choices as the topoint
381
+ #
382
+ def endpoint_copy_endpoint(tofile, fromfile, topoint, frompoint, logact = 0):
383
+
384
+ ret = PgLOG.FAILURE
385
+ finfo = check_globus_file(fromfile, frompoint, 0, logact)
386
+ if not finfo:
387
+ if finfo != None: return ret
388
+ return lmsg(fromfile, "{} to copy {} file to {}-{}".format(PgLOG.PGLOG['MISSFILE'], frompoint, topoint, tofile), logact)
389
+
390
+ if not logact&PgLOG.OVRIDE:
391
+ tinfo = check_globus_file(tofile, topoint, 0, logact)
392
+ if tinfo and tinfo['data_size'] > 0:
393
+ return PgLOG.pglog("{}-{}: file exists already".format(topoint, tofile), logact)
394
+
395
+ action = 'transfer'
396
+ cmd = f'{BACKCMD} {action} -se {frompoint} -de {topoint} -sf {fromfile} -df {tofile} -vc'
397
+
398
+ task = submit_globus_task(cmd, topoint, logact)
399
+ if task['stat'] == 'S':
400
+ ret = PgLOG.SUCCESS
401
+ elif task['stat'] == 'A':
402
+ TASKIDS["{}-{}".format(topoint, tofile)] = task['id']
403
+ ret = PgLOG.FINISH
404
+
405
+ return ret
406
+
407
+ #
408
+ # submit a globus task and return a task id
409
+ #
410
+ def submit_globus_task(cmd, endpoint, logact = 0, qstr = None):
411
+
412
+ task = {'id' : None, 'stat' : 'U'}
413
+ loop = reset = 0
414
+ while (loop-reset) < 2:
415
+ buf = PgLOG.pgsystem(cmd, logact, CMDGLB, qstr)
416
+ syserr = PgLOG.PGLOG['SYSERR']
417
+ if buf and buf.find('a task has been created') > -1:
418
+ ms = re.search(r'Task ID:\s+(\S+)', buf)
419
+ if ms:
420
+ task['id'] = ms.group(1)
421
+ lp = 0
422
+ while lp < 2:
423
+ task['stat'] = check_globus_status(task['id'], endpoint, logact)
424
+ if task['stat'] == 'S': break
425
+ time.sleep(PgSIG.PGSIG['ETIME'])
426
+ lp += 1
427
+ if task['stat'] == 'S' or task['stat'] == 'A': break
428
+ if task['stat'] == 'F' and not syserr: break
429
+
430
+ errmsg = "Error Execute: " + cmd
431
+ if qstr: errmsg += " with stdin:\n" + qstr
432
+ if syserr:
433
+ errmsg += "\n" + syserr
434
+ (hstat, msg) = host_down_status('', QHOSTS[endpoint], 1, logact)
435
+ if hstat: errmsg += "\n" + msg
436
+ errlog(errmsg, 'B', (loop - reset), logact)
437
+ if loop == 0 and syserr and syserr.find('This user has too many pending jobs') > -1: reset = 1
438
+ loop += 1
439
+
440
+ if task['stat'] == 'S' or task['stat'] == 'A': ECNTS['B'] = 0 # reset error count
441
+ return task
442
+
443
+ #
444
+ # check Globus transfer status for given taskid. Cancel the task
445
+ # if PgLOG.NOWAIT presents and Details is neither OK nor Queued
446
+ #
447
+ def check_globus_status(taskid, endpoint = None, logact = 0):
448
+
449
+ ret = 'U'
450
+ if not taskid: return ret
451
+ if not endpoint: endpoint = PgLOG.PGLOG['BACKUPEP']
452
+ mp = r'Status:\s+({})'.format('|'.join(QSTATS.values()))
453
+
454
+ cmd = f"{BACKCMD} get-task {taskid}"
455
+ astats = ['OK', 'Queued']
456
+
457
+ for loop in range(2):
458
+ buf = PgLOG.pgsystem(cmd, logact, CMDRET)
459
+ if buf:
460
+ ms = re.search(mp, buf)
461
+ if ms:
462
+ ret = ms.group(1)[0]
463
+ if ret == 'A':
464
+ ms = re.search(r'Details:\s+(\S+)', buf)
465
+ if ms:
466
+ detail = ms.group(1)
467
+ if detail not in astats:
468
+ if logact&PgLOG.NOWAIT:
469
+ errmsg = "{}: Cancel Task due to {}:\n{}".format(taskid, detail, buf)
470
+ errlog(errmsg, 'B', 1, logact)
471
+ ccmd = f"{BACKCMD} cancel-task {taskid}"
472
+ PgLOG.pgsystem(ccmd, logact, 7)
473
+ else:
474
+ time.sleep(PgSIG.PGSIG['ETIME'])
475
+ continue
476
+ break
477
+
478
+ errmsg = "Error Execute: " + cmd
479
+ if PgLOG.PGLOG['SYSERR']:
480
+ errmsg = "\n" + PgLOG.PGLOG['SYSERR']
481
+ (hstat, msg) = host_down_status('', QHOSTS[endpoint], 1, logact)
482
+ if hstat: errmsg += "\n" + msg
483
+ errlog(errmsg, 'B', loop, logact)
484
+
485
+ if ret == 'S' or ret == 'A': ECNTS['B'] = 0 # reset error count
486
+ return ret
487
+
488
+ #
489
+ # return SUCCESS if Globus transfer is done; FAILURE otherwise
490
+ #
491
+ def check_globus_finished(tofile, topoint, logact = 0):
492
+
493
+ ret = PgLOG.SUCCESS
494
+ ckey = "{}-{}".format(topoint, tofile)
495
+ if ckey in TASKIDS:
496
+ taskid = TASKIDS[ckey]
497
+ else:
498
+ errlog(ckey + ": Miss Task ID to check Status", 'B', 1, logact)
499
+ return PgLOG.FAILURE
500
+
501
+ lp = 0
502
+ if logact&PgLOG.NOWAIT:
503
+ act = logact&(~PgLOG.NOWAIT)
504
+ lps = 2
505
+ else:
506
+ act = logact
507
+ lps = 0
508
+
509
+ while True:
510
+ stat = check_globus_status(taskid, topoint, act)
511
+ if stat == 'A':
512
+ if lps:
513
+ lp += 1
514
+ if lp > lps: act = logact
515
+ time.sleep(PgSIG.PGSIG['ETIME'])
516
+ else:
517
+ if stat == 'S':
518
+ del TASKIDS[ckey]
519
+ else:
520
+ status = QSTATS[stat] if stat in QSTATS else 'UNKNOWN'
521
+ errlog("{}: Status '{}' for Task {}".format(ckey, status, taskid), 'B', 1, logact)
522
+ ret = PgLOG.FAILURE
523
+ break
524
+
525
+ return ret
526
+
527
+ #
528
+ # Copy a local file to Quasar backup tape system
529
+ #
530
+ # tofile - target file name, leading with /dsnnn.n/
531
+ # fromfile - source file name, leading with /data/ or /decsdata/
532
+ # endpoint - endpoint name on Quasar Backup Server
533
+ #
534
+ def local_copy_backup(tofile, fromfile, endpoint = None, logact = 0):
535
+
536
+ if not endpoint: endpoint = PgLOG.PGLOG['BACKUPEP']
537
+ return endpoint_copy_endpoint(tofile, fromfile, endpoint, 'gdex-glade', logact)
538
+
539
+ #
540
+ # Copy a Quasar backup file to local Globus endpoint
541
+ #
542
+ # tofile - target file name, leading with /data/ or /decsdata/
543
+ # fromfile - source file name, leading with /dsnnn.n/
544
+ # endpoint - endpoint name on Quasar Backup Server
545
+ #
546
+ def backup_copy_local(tofile, fromfile, endpoint = None, logact = 0):
547
+
548
+ if not endpoint: endpoint = PgLOG.PGLOG['BACKUPEP']
549
+ return endpoint_copy_endpoint(tofile, fromfile, 'gdex-glade', endpoint, logact)
550
+
551
+ #
552
+ # Copy a remote file to local
553
+ #
554
+ # tofile - target file name
555
+ # fromfile - source file name
556
+ # host - remote host name
557
+ #
558
+ def remote_copy_local(tofile, fromfile, host, logact = 0):
559
+
560
+ cmd = PgLOG.get_sync_command(host)
561
+ finfo = check_remote_file(fromfile, host, 0, logact)
562
+ if not finfo:
563
+ if finfo != None: return PgLOG.FAILURE
564
+ return errlog("{}-{}: {} to copy to {}".format(host, fromfile, PgLOG.PGLOG['MISSFILE'], tofile), 'R', 1, logact)
565
+
566
+ target = tofile
567
+ ms = re.match(r'^(.+)/$', tofile)
568
+ if ms:
569
+ dir = ms.group(1)
570
+ tofile += op.basename(fromfile)
571
+ else:
572
+ dir = get_local_dirname(tofile)
573
+
574
+ if not make_local_directory(dir, logact): return PgLOG.FAILURE
575
+
576
+ cmd += " -g {} {}".format(fromfile, target)
577
+ loop = reset = 0
578
+ while (loop-reset) < 2:
579
+ if PgLOG.pgsystem(cmd, logact, CMDERR):
580
+ info = check_local_file(tofile, 143, logact) # 1+2+4+8+128
581
+ if info:
582
+ if not info['isfile']:
583
+ set_local_mode(tofile, 0, PgLOG.PGLOG['EXECMODE'])
584
+ return PgLOG.SUCCESS
585
+ elif info['data_size'] == finfo['data_size']:
586
+ set_local_mode(tofile, 1, PgLOG.PGLOG['FILEMODE'])
587
+ return PgLOG.SUCCESS
588
+ elif info != None:
589
+ break
590
+
591
+ errlog(PgLOG.PGLOG['SYSERR'], 'L', (loop - reset), logact)
592
+ if loop == 0: reset = reset_local_info(tofile, info, logact)
593
+ loop += 1
594
+
595
+ return PgLOG.FAILURE
596
+
597
+ #
598
+ # Copy a object file to local
599
+ #
600
+ # tofile - target file name
601
+ # fromfile - source file name
602
+ # bucket - bucket name on Object store
603
+ #
604
+ def object_copy_local(tofile, fromfile, bucket = None, logact = 0):
605
+
606
+ ret = PgLOG.FAILURE
607
+ if not bucket: bucket = PgLOG.PGLOG['OBJCTBKT']
608
+ finfo = check_object_file(fromfile, bucket, 0, logact)
609
+ if not finfo:
610
+ if finfo != None: return ret
611
+ return lmsg(fromfile, "{}-{} to copy to {}".format(OHOST, PgLOG.PGLOG['MISSFILE'], tofile), logact)
612
+
613
+ cmd = "{} go -k {} -b {}".format(OBJCTCMD, fromfile, bucket)
614
+ fromname = op.basename(fromfile)
615
+ toname = op.basename(tofile)
616
+ if toname == tofile:
617
+ dir = odir = None
618
+ else:
619
+ dir = op.dirname(tofile)
620
+ odir = change_local_directory(dir, logact)
621
+ loop = reset = 0
622
+ while (loop-reset) < 2:
623
+ buf = PgLOG.pgsystem(cmd, logact, CMDBTH)
624
+ info = check_local_file(fromname, 143, logact) # 1+2+4+8+128
625
+ if info:
626
+ if info['data_size'] == finfo['data_size']:
627
+ set_local_mode(fromfile, info['isfile'], 0, info['mode'], info['logname'], logact)
628
+ if toname == fromname or move_local_file(toname, fromname, logact):
629
+ ret = PgLOG.SUCCESS
630
+ break
631
+
632
+
633
+ elif info != None:
634
+ break
635
+
636
+ errlog("Error Execute: {}\n{}".format(cmd, buf), 'L', (loop - reset), logact)
637
+ if loop == 0: reset = reset_local_info(tofile, info, logact)
638
+ loop += 1
639
+ if odir and odir != dir:
640
+ change_local_directory(odir, logact)
641
+
642
+ return ret
643
+
644
+ #
645
+ # Copy a remote file to object
646
+ #
647
+ # tofile - target object file name
648
+ # fromfile - source remote file name
649
+ # host - remote host name
650
+ # bucket - bucket name on Object store
651
+ # meta - reference to metadata hash
652
+ #
653
+ def remote_copy_object(tofile, fromfile, host, bucket = None, meta = None, logact = 0):
654
+
655
+ if is_local_host(host): return local_copy_object(tofile, fromfile, bucket, meta, logact)
656
+
657
+ locfile = "{}/{}".format(PgLOG.PGLOG['TMPPATH'], op.basename(tofile))
658
+ ret = remote_copy_local(locfile, fromfile, host, logact)
659
+ if ret:
660
+ ret = local_copy_object(tofile, locfile, bucket, meta, logact)
661
+ delete_local_file(locfile, logact)
662
+
663
+ return ret
664
+
665
+ #
666
+ # Copy an object file to remote
667
+ #
668
+ # tofile - target remote file name
669
+ # fromfile - source object file name
670
+ # host - remote host name
671
+ # bucket - bucket name on Object store
672
+ # meta - reference to metadata hash
673
+ #
674
+ def object_copy_remote(tofile, fromfile, host, bucket = None, logact = 0):
675
+
676
+ if is_local_host(host): return object_copy_local(tofile, fromfile, bucket, logact)
677
+
678
+ locfile = "{}/{}".format(PgLOG.PGLOG['TMPPATH'], op.basename(tofile))
679
+ ret = object_copy_local(locfile, fromfile, bucket, logact)
680
+ if ret:
681
+ ret = local_copy_remote(fromfile, locfile, host, logact)
682
+ delete_local_file(locfile, logact)
683
+
684
+ return ret
685
+
686
+ #
687
+ # Delete a file/directory on a given host name (including local host) no background process for deleting
688
+ #
689
+ # file - file name to be deleted
690
+ # host - host name the file on, default to LHOST
691
+ #
692
+ # Return 1 if successful 0 if failed with error message generated in PgLOG.pgsystem() cached in PgLOG.PGLOG['SYSERR']
693
+ #
694
+ def delete_gdex_file(file, host, logact = 0):
695
+
696
+ shost = strip_host_name(host)
697
+ if PgUtil.pgcmp(shost, LHOST, 1) == 0:
698
+ return delete_local_file(file, logact)
699
+ elif PgUtil.pgcmp(shost, OHOST, 1) == 0:
700
+ return delete_object_file(file, None, logact)
701
+ else:
702
+ return delete_remote_file(file, host, logact)
703
+
704
+ delete_rda_file = delete_gdex_file
705
+
706
+ #
707
+ # Delete a local file/irectory
708
+ #
709
+ def delete_local_file(file, logact = 0):
710
+
711
+ info = check_local_file(file, 0, logact)
712
+ if not info: return PgLOG.FAILURE
713
+ cmd = "rm -rf "
714
+ cmd += file
715
+ loop = reset = 0
716
+ while (loop-reset) < 2:
717
+ if PgLOG.pgsystem(cmd, logact, CMDERR):
718
+ info = check_local_file(file, 14, logact)
719
+ if info is None:
720
+ if DIRLVLS: record_delete_directory(op.dirname(file), LHOST)
721
+ return PgLOG.SUCCESS
722
+ elif not info:
723
+ break # error checking file
724
+
725
+ errlog(PgLOG.PGLOG['SYSERR'], 'L', (loop - reset), logact)
726
+ if loop == 0: reset = reset_local_info(file, info, logact)
727
+ loop += 1
728
+
729
+ return PgLOG.FAILURE
730
+
731
+ #
732
+ # Delete file/directory on a remote host
733
+ #
734
+ def delete_remote_file(file, host, logact = 0):
735
+
736
+ if not check_remote_file(file, host, logact): return PgLOG.FAILURE
737
+
738
+ cmd = PgLOG.get_sync_command(host)
739
+
740
+ for loop in range(2):
741
+ if PgLOG.pgsystem("{} -d {}".format(cmd, file), logact, CMDERR):
742
+ if DIRLVLS: record_delete_directory(op.dirname(file), host)
743
+ return PgLOG.SUCCESS
744
+
745
+ errlog(PgLOG.PGLOG['SYSERR'], 'R', loop, logact)
746
+
747
+ return PgLOG.FAILURE
748
+
749
+ #
750
+ # Delete a file on object store
751
+ #
752
+ def delete_object_file(file, bucket = None, logact = 0):
753
+
754
+ if not bucket: bucket = PgLOG.PGLOG['OBJCTBKT']
755
+ for loop in range(2):
756
+ list = object_glob(file, bucket, 0, logact)
757
+ if not list: return PgLOG.FAILURE
758
+ errmsg = None
759
+ for key in list:
760
+ cmd = "{} dl {} -b {}".format(OBJCTCMD, key, bucket)
761
+ if not PgLOG.pgsystem(cmd, logact, CMDERR):
762
+ errmsg = PgLOG.PGLOG['SYSERR']
763
+ break
764
+
765
+ list = object_glob(file, bucket, 0, logact)
766
+ if not list: return PgLOG.SUCCESS
767
+ if errmsg: errlog(errmsg, 'O', loop, logact)
768
+
769
+ return PgLOG.FAILURE
770
+
771
+ #
772
+ # Delete a backup file on Quasar Server
773
+ #
774
+ def delete_backup_file(file, endpoint = None, logact = 0):
775
+
776
+ if not endpoint: endpoint = PgLOG.PGLOG['BACKUPEP']
777
+ info = check_backup_file(file, endpoint, 0, logact)
778
+ if not info: return PgLOG.FAILURE
779
+
780
+ cmd = f"{BACKCMD} delete -ep {endpoint} -tf {file}"
781
+ task = submit_globus_task(cmd, endpoint, logact)
782
+ if task['stat'] == 'S':
783
+ return PgLOG.SUCCESS
784
+ elif task['stat'] == 'A':
785
+ TASKIDS["{}-{}".format(endpoint, file)] = task['id']
786
+ return PgLOG.FINISH
787
+
788
+ return PgLOG.FAILURE
789
+
790
+ #
791
+ # reset local file/directory information to make them writable for PgLOG.PGLOG['GDEXUSER']
792
+ # file - file name (mandatory)
793
+ # info - gathered file info with option 14, None means file not exists
794
+ #
795
+ def reset_local_info(file, info = None, logact = 0):
796
+
797
+ ret = 0
798
+ if info:
799
+ if info['isfile']:
800
+ ret += reset_local_file(file, info, logact)
801
+ dir = get_local_dirname(file)
802
+ info = check_local_file(dir, 14, logact)
803
+ else:
804
+ dir = file
805
+ else:
806
+ dir = get_local_dirname(file)
807
+ info = check_local_file(dir, 14, logact)
808
+
809
+ if info: ret += reset_local_directory(dir, info, logact)
810
+
811
+ return 1 if ret else 0
812
+
813
+ #
814
+ # reset local directory group/mode
815
+ #
816
+ def reset_local_directory(dir, info = None, logact = 0):
817
+
818
+ ret = 0
819
+ if not (info and 'mode' in info and 'group' in info and 'logname' in info):
820
+ info = check_local_file(dir, 14, logact)
821
+ if info:
822
+ if info['mode'] and info['mode'] != 0o775:
823
+ ret += set_local_mode(dir, 0, 0o775, info['mode'], info['logname'], logact)
824
+ if info['group'] and PgLOG.PGLOG['GDEXGRP'] != info['group']:
825
+ ret += change_local_group(dir, PgLOG.PGLOG['GDEXGRP'], info['group'], info['logname'], logact)
826
+
827
+ return 1 if ret else 0
828
+
829
+ #
830
+ # reset local file group/mode
831
+ #
832
+ def reset_local_file(file, info = None, logact = 0):
833
+
834
+ ret = 0
835
+ if not (info and 'mode' in info and 'group' in info and 'logname' in info):
836
+ info = check_local_file(file, 14, logact)
837
+ if info:
838
+ if info['mode'] != 0o664:
839
+ ret += set_local_mode(file, 1, 0o664, info['mode'], info['logname'], logact)
840
+ if PgLOG.PGLOG['GDEXGRP'] != info['group']:
841
+ ret += change_local_group(file, PgLOG.PGLOG['GDEXGRP'], info['group'], info['logname'], logact)
842
+
843
+ return ret
844
+
845
+ #
846
+ # Move file locally or remotely on the same host no background process for moving
847
+ #
848
+ # tofile - target file name
849
+ # fromfile - original file name
850
+ # host - host name the file is moved on, default to LHOST
851
+ #
852
+ # Return PgLOG.SUCCESS if successful PgLOG.FAILURE otherwise
853
+ #
854
+ def move_gdex_file(tofile, fromfile, host, logact = 0):
855
+
856
+ shost = strip_host_name(host)
857
+ if PgUtil.pgcmp(shost, LHOST, 1) == 0:
858
+ return move_local_file(tofile, fromfile, logact)
859
+ elif PgUtil.pgcmp(shost, OHOST, 1) == 0:
860
+ return move_object_file(tofile, fromfile, None, None, logact)
861
+ else:
862
+ return move_remote_file(tofile, fromfile, host, logact)
863
+
864
+ move_rda_file = move_gdex_file
865
+
866
+ #
867
+ # Move a file locally
868
+ #
869
+ # tofile - target file name
870
+ # fromfile - source file name
871
+ #
872
+ def move_local_file(tofile, fromfile, logact = 0):
873
+
874
+ dir = get_local_dirname(tofile)
875
+ info = check_local_file(fromfile, 0, logact)
876
+ tinfo = check_local_file(tofile, 0, logact)
877
+ if not info:
878
+ if info != None: return PgLOG.FAILURE
879
+ if tinfo:
880
+ PgLOG.pglog("{}: Moved to {} already".format(fromfile, tofile), PgLOG.LOGWRN)
881
+ return PgLOG.SUCCESS
882
+ else:
883
+ return errlog("{}: {} to move".format(fromfile, PgLOG.PGLOG['MISSFILE']), 'L', 1, logact)
884
+ if tinfo:
885
+ if tinfo['data_size'] > 0 and not logact&PgLOG.OVRIDE:
886
+ return errlog("{}: File exists, cannot move {} to it".format(tofile, fromfile), 'L', 1, logact)
887
+ elif tinfo != None:
888
+ return PgLOG.FAILURE
889
+
890
+ if not make_local_directory(dir, logact): return PgLOG.FAILURE
891
+
892
+ cmd = "mv {} {}".format(fromfile, tofile)
893
+ loop = reset = 0
894
+ while (loop-reset) < 2:
895
+ if PgLOG.pgsystem(cmd, logact, CMDERR):
896
+ if DIRLVLS: record_delete_directory(op.dirname(fromfile), LHOST)
897
+ return PgLOG.SUCCESS
898
+
899
+ errlog(PgLOG.PGLOG['SYSERR'], 'L', (loop - reset), logact)
900
+ if loop == 0: reset = reset_local_info(tofile, info, logact)
901
+ loop += 1
902
+
903
+ return PgLOG.FAILURE
904
+
905
+ #
906
+ # Move a remote file on the same host
907
+ #
908
+ # tofile - target file name
909
+ # fromfile - original file name
910
+ # host - remote host name
911
+ # locfile - local copy of tofile
912
+ #
913
+ def move_remote_file(tofile, fromfile, host, logact = 0):
914
+
915
+ if is_local_host(host): return move_local_file(tofile, fromfile, logact)
916
+
917
+ ret = PgLOG.FAILURE
918
+ dir = op.dirname(tofile)
919
+ info = check_remote_file(fromfile, host, 0, logact)
920
+ tinfo = check_remote_file(tofile, host, 0, logact)
921
+ if not info:
922
+ if info != None: return PgLOG.FAILURE
923
+ if tinfo:
924
+ PgLOG.pglog("{}-{}: Moved to {} already".format(host, fromfile, tofile), PgLOG.LOGWRN)
925
+ return PgLOG.SUCCESS
926
+ else:
927
+ return errlog("{}-{}: {} to move".format(host, fromfile, PgLOG.PGLOG['MISSFILE']), 'R', 1, logact)
928
+ if tinfo:
929
+ if tinfo['data_size'] > 0 and not logact&PgLOG.OVRIDE:
930
+ return errlog("{}-{}: File exists, cannot move {} to it".format(host, tofile, fromfile), 'R', 1, logact)
931
+ elif tinfo != None:
932
+ return PgLOG.FAILURE
933
+
934
+ if make_remote_directory(dir, host, logact):
935
+ locfile = "{}/{}".format(PgLOG.PGLOG['TMPPATH'], op.basename(tofile))
936
+ if remote_copy_local(locfile, fromfile, host, logact):
937
+ ret = local_copy_remote(tofile, locfile, host, logact)
938
+ delete_local_file(locfile, logact)
939
+ if ret:
940
+ ret = delete_remote_file(fromfile, host, logact)
941
+ if DIRLVLS: record_delete_directory(op.dirname(fromfile), host)
942
+
943
+ return ret
944
+
945
+ #
946
+ # Move an object file on Object Store
947
+ #
948
+ # tofile - target file name
949
+ # fromfile - original file name
950
+ # tobucket - target bucket name
951
+ # frombucket - original bucket name
952
+ #
953
+ def move_object_file(tofile, fromfile, tobucket, frombucket, logact = 0):
954
+
955
+ ret = PgLOG.FAILURE
956
+ if not tobucket: tobucket = PgLOG.PGLOG['OBJCTBKT']
957
+ if not frombucket: frombucket = tobucket
958
+ finfo = check_object_file(fromfile, frombucket, 0, logact)
959
+ tinfo = check_object_file(tofile, tobucket, 0, logact)
960
+ if not finfo:
961
+ if finfo != None: return PgLOG.FAILURE
962
+ if tinfo:
963
+ PgLOG.pglog("{}-{}: Moved to {}-{} already".format(frombucket, fromfile, tobucket, tofile), PgLOG.LOGWRN)
964
+ return PgLOG.SUCCESS
965
+ else:
966
+ return errlog("{}-{}: {} to move".format(frombucket, fromfile, PgLOG.PGLOG['MISSFILE']), 'R', 1, logact)
967
+ if tinfo:
968
+ if tinfo['data_size'] > 0 and not logact&PgLOG.OVRIDE:
969
+ return errlog("{}-{}: Object File exists, cannot move {}-{} to it".format(tobucket, tofile, frombucket, fromfile), 'R', 1, logact)
970
+ elif tinfo != None:
971
+ return PgLOG.FAILURE
972
+
973
+ cmd = "{} mv -b {} -db {} -k {} -dk {}".format(OBJCTCMD, frombucket, tobucket, fromfile, tofile)
974
+ ucmd = "{} gm -k {} -b {}".format(OBJCTCMD, fromfile, frombucket)
975
+ ubuf = PgLOG.pgsystem(ucmd, PgLOG.LOGWRN, CMDRET)
976
+ if ubuf and re.match(r'^\{', ubuf): cmd += " -md '{}'".format(ubuf)
977
+
978
+ for loop in range(2):
979
+ buf = PgLOG.pgsystem(cmd, logact, CMDBTH)
980
+ tinfo = check_object_file(tofile, tobucket, 0, logact)
981
+ if tinfo:
982
+ if tinfo['data_size'] == finfo['data_size']:
983
+ return PgLOG.SUCCESS
984
+ elif tinfo != None:
985
+ break
986
+
987
+ errlog("Error Execute: {}\n{}".format(cmd, buf), 'O', loop, logact)
988
+
989
+ return PgLOG.FAILURE
990
+
991
+ #
992
+ # Move an object path on Object Store and all the file keys under it
993
+ #
994
+ # topath - target path name
995
+ # frompath - original path name
996
+ # tobucket - target bucket name
997
+ # frombucket - original bucket name
998
+ #
999
+ def move_object_path(topath, frompath, tobucket, frombucket, logact = 0):
1000
+
1001
+ ret = PgLOG.FAILURE
1002
+ if not tobucket: tobucket = PgLOG.PGLOG['OBJCTBKT']
1003
+ if not frombucket: frombucket = tobucket
1004
+ fcnt = check_object_path(frompath, frombucket, logact)
1005
+ tcnt = check_object_path(topath, tobucket, logact)
1006
+ if not fcnt:
1007
+ if fcnt == None: return PgLOG.FAILURE
1008
+ if tcnt:
1009
+ PgLOG.pglog("{}-{}: Moved to {}-{} already".format(frombucket, frompath, tobucket, topath), PgLOG.LOGWRN)
1010
+ return PgLOG.SUCCESS
1011
+ else:
1012
+ return errlog("{}-{}: {} to move".format(frombucket, frompath, PgLOG.PGLOG['MISSFILE']), 'R', 1, logact)
1013
+
1014
+ cmd = "{} mv -b {} -db {} -k {} -dk {}".format(OBJCTCMD, frombucket, tobucket, frompath, topath)
1015
+
1016
+ for loop in range(2):
1017
+ buf = PgLOG.pgsystem(cmd, logact, CMDBTH)
1018
+ fcnt = check_object_path(frompath, frombucket, logact)
1019
+ if not fcnt: return PgLOG.SUCCESS
1020
+ errlog("Error Execute: {}\n{}".format(cmd, buf), 'O', loop, logact)
1021
+
1022
+ return PgLOG.FAILURE
1023
+
1024
+ #
1025
+ # Move a backup file on Quasar Server
1026
+ #
1027
+ # tofile - target file name
1028
+ # fromfile - source file name
1029
+ # endpoint - Globus endpoint
1030
+ #
1031
+ def move_backup_file(tofile, fromfile, endpoint = None, logact = 0):
1032
+
1033
+ ret = PgLOG.FAILURE
1034
+ if not endpoint: endpoint = PgLOG.PGLOG['BACKUPEP']
1035
+ finfo = check_backup_file(fromfile, endpoint, 0, logact)
1036
+ tinfo = check_backup_file(tofile, endpoint, 0, logact)
1037
+ if not finfo:
1038
+ if finfo != None: return ret
1039
+ if tinfo:
1040
+ PgLOG.pglog("{}: Moved to {} already".format(fromfile, tofile), PgLOG.LOGWRN)
1041
+ return PgLOG.SUCCESS
1042
+ else:
1043
+ return errlog("{}: {} to move".format(fromfile, PgLOG.PGLOG['MISSFILE']), 'B', 1, logact)
1044
+
1045
+ if tinfo:
1046
+ if tinfo['data_size'] > 0 and not logact&PgLOG.OVRIDE:
1047
+ return errlog("{}: File exists, cannot move {} to it".format(tofile, fromfile), 'B', 1, logact)
1048
+ elif tinfo != None:
1049
+ return ret
1050
+
1051
+ cmd = f"{BACKCMD} rename -ep {endpoint} --old-path {fromfile} --new-path {tofile}"
1052
+ loop = 0
1053
+ while loop < 2:
1054
+ buf = PgLOG.pgsystem(cmd, logact, CMDRET)
1055
+ syserr = PgLOG.PGLOG['SYSERR']
1056
+ if buf:
1057
+ if buf.find('File or directory renamed successfully') > -1:
1058
+ ret = PgLOG.SUCCESS
1059
+ break
1060
+ if syserr:
1061
+ if syserr.find("No such file or directory") > -1:
1062
+ if make_backup_directory(op.dirname(tofile), endpoint, logact): continue
1063
+ errmsg = "Error Execute: {}\n{}".format(cmd, syserr)
1064
+ (hstat, msg) = host_down_status('', QHOSTS[endpoint], 1, logact)
1065
+ if hstat: errmsg += "\n" + msg
1066
+ errlog(errmsg, 'B', loop, logact)
1067
+ loop += 1
1068
+
1069
+ if ret == PgLOG.SUCCESS: ECNTS['B'] = 0 # reset error count
1070
+ return ret
1071
+
1072
+ #
1073
+ # Make a directory on a given host name (including local host)
1074
+ #
1075
+ # dir - directory path to be made
1076
+ # host - host name the directory on, default to LHOST
1077
+ #
1078
+ # Return PgLOG.SUCCESS(1) if successful or PgLOG.FAILURE(0) if failed
1079
+ #
1080
+ def make_gdex_directory(dir, host, logact = 0):
1081
+
1082
+ if not dir: return PgLOG.SUCCESS
1083
+ shost = strip_host_name(host)
1084
+ if PgUtil.pgcmp(shost, LHOST, 1) == 0:
1085
+ return make_local_directory(dir, logact)
1086
+ else:
1087
+ return make_remote_directory(dir, host, logact)
1088
+
1089
+ make_rda_directory = make_gdex_directory
1090
+
1091
+ #
1092
+ # Make a local directory
1093
+ #
1094
+ # dir - directory path to be made
1095
+ #
1096
+ def make_local_directory(dir, logact = 0):
1097
+
1098
+ return make_one_local_directory(dir, None, logact)
1099
+
1100
+ #
1101
+ # Make a local directory recursively
1102
+ #
1103
+ def make_one_local_directory(dir, odir = None, logact = 0):
1104
+
1105
+ if not dir or op.isdir(dir): return PgLOG.SUCCESS
1106
+ if op.isfile(dir): return errlog(dir + ": is file, cannot make directory", 'L', 1, logact)
1107
+
1108
+ if not odir: odir = dir
1109
+ if is_root_directory(dir, 'L', LHOST, "make directory " + odir, logact): return PgLOG.FAILURE
1110
+ if not make_one_local_directory(op.dirname(dir), odir, logact): return PgLOG.FAILURE
1111
+
1112
+ loop = reset = 0
1113
+ while (loop-reset) < 2:
1114
+ try:
1115
+ os.mkdir(dir, PgLOG.PGLOG['EXECMODE'])
1116
+ except Exception as e:
1117
+ errmsg = str(e)
1118
+ if errmsg.find('File exists') > -1: return PgLOG.SUCCESS
1119
+ errlog(errmsg, 'L', (loop - reset), logact)
1120
+ if loop == 0: reset = reset_local_info(dir, None, logact)
1121
+ loop += 1
1122
+ else:
1123
+ return PgLOG.SUCCESS
1124
+
1125
+ return PgLOG.FAILURE
1126
+
1127
+ #
1128
+ # Make a directory on a remote host name
1129
+ #
1130
+ # dir - directory path to be made
1131
+ # host - host name the directory on
1132
+ #
1133
+ def make_remote_directory(dir, host, logact = 0):
1134
+
1135
+ return make_one_remote_directory(dir, None, host, logact)
1136
+
1137
+ def make_one_remote_directory(dir, odir, host, logact = 0):
1138
+
1139
+ info = check_remote_file(dir, host, 0, logact)
1140
+ if info:
1141
+ if info['isfile']: return errlog("{}-{}: is file, cannot make directory".format(host, dir), 'R', 1, logact)
1142
+ return PgLOG.SUCCESS
1143
+ elif info != None:
1144
+ return PgLOG.FAILURE
1145
+
1146
+ if not odir: odir = dir
1147
+ if is_root_directory(dir, 'R', host, "make directory {} on {}".format(odir, host), logact): return PgLOG.FAILURE
1148
+
1149
+ if make_one_remote_directory(op.dirname(dir), odir, host, logact):
1150
+ tmpsync = PgLOG.get_tmpsync_path()
1151
+ if PgLOG.pgsystem("{} {} {}".format(PgLOG.get_sync_command(host), tmpsync, dir), logact, 5):
1152
+ set_remote_mode(dir, 0, host, PgLOG.PGLOG['EXECMODE'])
1153
+ return PgLOG.SUCCESS
1154
+
1155
+ return PgLOG.FAILURE
1156
+
1157
+ #
1158
+ # Make a quasar directory
1159
+ #
1160
+ # dir - directory path to be made
1161
+ #
1162
+ def make_backup_directory(dir, endpoint, logact = 0):
1163
+
1164
+ return make_one_backup_directory(dir, None, endpoint, logact)
1165
+
1166
+ #
1167
+ # Make a quasar directory recursively
1168
+ #
1169
+ def make_one_backup_directory(dir, odir, endpoint = None, logact = 0):
1170
+
1171
+ if not dir or dir == '/': return PgLOG.SUCCESS
1172
+ if not endpoint: endpoint = PgLOG.PGLOG['BACKUPEP']
1173
+ info = check_backup_file(dir, endpoint, 0, logact)
1174
+ if info:
1175
+ if info['isfile']: return errlog("{}-{}: is file, cannot make backup directory".format(endpoint, dir), 'B', 1, logact)
1176
+ return PgLOG.SUCCESS
1177
+ elif info != None:
1178
+ return PgLOG.FAILURE
1179
+
1180
+ if not odir: odir = dir
1181
+ if not make_one_backup_directory(op.dirname(dir), odir, endpoint, logact): return PgLOG.FAILURE
1182
+
1183
+ cmd = f"{BACKCMD} mkdir -ep {endpoint} -p {dir}"
1184
+ for loop in range(2):
1185
+ buf = PgLOG.pgsystem(cmd, logact, CMDRET)
1186
+ syserr = PgLOG.PGLOG['SYSERR']
1187
+ if buf:
1188
+ if(buf.find('The directory was created successfully') > -1 or
1189
+ buf.find("Path '{}' already exists".format(dir)) > -1):
1190
+ ret = PgLOG.SUCCESS
1191
+ break
1192
+ if syserr:
1193
+ if syserr.find("No such file or directory") > -1:
1194
+ ret = make_one_backup_directory(op.dirname(dir), odir, endpoint, logact)
1195
+ if ret == PgLOG.SUCCESS or loop: break
1196
+ time.sleep(PgSIG.PGSIG['ETIME'])
1197
+ else:
1198
+ errmsg = "Error Execute: {}\n{}".format(cmd, syserr)
1199
+ (hstat, msg) = host_down_status('', QHOSTS[endpoint], 1, logact)
1200
+ if hstat: errmsg += "\n" + msg
1201
+ errlog(errmsg, 'B', loop, logact)
1202
+
1203
+ if ret == PgLOG.SUCCESS: ECNTS['B'] = 0 # reset error count
1204
+ return ret
1205
+
1206
+ #
1207
+ # check and return 1 if a root directory
1208
+ #
1209
+ def is_root_directory(dir, etype, host = None, action = None, logact = 0):
1210
+
1211
+ ret = cnt = 0
1212
+
1213
+ if etype == 'H':
1214
+ ms = re.match(r'^({})(.*)$'.format(PgLOG.PGLOG['ALLROOTS']), dir)
1215
+ if ms:
1216
+ m2 = ms.group(2)
1217
+ if not m2 or m2 == '/': ret = 1
1218
+ else:
1219
+ cnt = 2
1220
+ elif re.match(r'^{}'.format(PgLOG.PGLOG['DSSDATA']), dir):
1221
+ ms = re.match(r'^({})(.*)$'.format(PgLOG.PGLOG['GPFSROOTS']), dir)
1222
+ if ms:
1223
+ m2 = ms.group(2)
1224
+ if not m2 or m2 == '/': ret = 1
1225
+ else:
1226
+ cnt = 4
1227
+ else:
1228
+ ms = re.match(r'^({})(.*)$'.format(PgLOG.PGLOG['HOMEROOTS']), dir)
1229
+ if ms:
1230
+ m2 = ms.group(2)
1231
+ if not m2 or m2 == '/': ret = 1
1232
+ else:
1233
+ cnt = 2
1234
+
1235
+ if cnt and re.match(r'^(/[^/]+){0,%d}(/*)$' % cnt, dir):
1236
+ ret = 1
1237
+
1238
+ if ret and action:
1239
+ cnt = 0
1240
+ errmsg = "{}: Cannot {} from {}".format(dir, action, PgLOG.PGLOG['HOSTNAME'])
1241
+ (hstat, msg) = host_down_status(dir, host, 0, logact)
1242
+ if hstat: errmsg += "\n" + msg
1243
+ errlog(errmsg, etype, 1, logact|PgLOG.ERRLOG)
1244
+
1245
+ return ret
1246
+
1247
+ #
1248
+ # set mode for a given direcory/file on a given host (include local host)
1249
+ #
1250
+ def set_gdex_mode(file, isfile, host, nmode = None, omode = None, logname = None, logact = 0):
1251
+
1252
+ shost = strip_host_name(host)
1253
+ if PgUtil.pgcmp(shost, LHOST, 1) == 0:
1254
+ return set_local_mode(file, isfile, nmode, omode, logname, logact)
1255
+ else:
1256
+ return set_remote_mode(file, isfile, host, nmode, omode, logact)
1257
+
1258
+ set_rda_mode = set_gdex_mode
1259
+
1260
+ #
1261
+ # set mode for given local directory or file
1262
+ #
1263
+ def set_local_mode(file, isfile = 1, nmode = 0, omode = 0, logname = None, logact = 0):
1264
+
1265
+ if not nmode: nmode = (PgLOG.PGLOG['FILEMODE'] if isfile else PgLOG.PGLOG['EXECMODE'])
1266
+ if not (omode and logname):
1267
+ info = check_local_file(file, 6)
1268
+ if not info:
1269
+ if info != None: return PgLOG.FAILURE
1270
+ return lmsg(file, "{} to set mode({})".format(PgLOG.PGLOG['MISSFILE'], PgLOG.int2base(nmode, 8)), logact)
1271
+ omode = info['mode']
1272
+ logname = info['logname']
1273
+
1274
+ if nmode == omode: return PgLOG.SUCCESS
1275
+
1276
+ try:
1277
+ os.chmod(file, nmode)
1278
+ except Exception as e:
1279
+ return errlog(str(e), 'L', 1, logact)
1280
+
1281
+ return PgLOG.SUCCESS
1282
+
1283
+ #
1284
+ # set mode for given directory or file on remote host
1285
+ #
1286
+ def set_remote_mode(file, isfile, host, nmode = 0, omode = 0, logact = 0):
1287
+
1288
+ if not nmode: nmode = (PgLOG.PGLOG['FILEMODE'] if isfile else PgLOG.PGLOG['EXECMODE'])
1289
+ if not omode:
1290
+ info = check_remote_file(file, host, 6)
1291
+ if not info:
1292
+ if info != None: return PgLOG.FAILURE
1293
+ return errlog("{}-{}: {} to set mode({})".format(host, file, PgLOG.PGLOG['MISSFILE'], PgLOG.int2base(nmode, 8)), 'R', 1, logact)
1294
+ omode = info['mode']
1295
+
1296
+ if nmode == omode: return PgLOG.SUCCESS
1297
+ return PgLOG.pgsystem("{} -m {} {}".format(PgLOG.get_sync_command(host), PgLOG.int2base(nmode, 8), file), logact, 5)
1298
+
1299
+ #
1300
+ # change group for given local directory or file
1301
+ #
1302
+ def change_local_group(file, ngrp = None, ogrp = None, logname = None, logact = 0):
1303
+
1304
+ if not ngrp:
1305
+ ngid = PgLOG.PGLOG['GDEXGID']
1306
+ else:
1307
+ ngid = grp.getgrnam[ngrp].gr_gid
1308
+ if logact and logact&PgLOG.EXITLG: logact &=~PgLOG.EXITLG
1309
+ if not (ogrp and logname):
1310
+ info = check_local_file(file, 10, logact)
1311
+ if not info:
1312
+ if info != None: return PgLOG.FAILURE
1313
+ return errlog("{}: {} to change group({})".format(file, PgLOG.PGLOG['MISSFILE'], ngrp), 'L', 1, logact)
1314
+ ogid = info['gid']
1315
+ ouid = info['uid']
1316
+ else:
1317
+ ouid = pwd.getpwnam(logname).pw_uid
1318
+ ogid = grp.getgrnam(logname).gr_gid
1319
+
1320
+ if ngid == ogid: return PgLOG.SUCCESS
1321
+
1322
+ try:
1323
+ os.chown(file, ouid, ngid)
1324
+ except Exception as e:
1325
+ return errlog(str(e), 'L', 1, logact)
1326
+
1327
+ #
1328
+ # Check if given path on a specified host or the host itself are down
1329
+ #
1330
+ # path: path name to be checked
1331
+ # host: host name the file on, default to LHOST
1332
+ # chkopt: 1 - do a file/path check, 0 - do not
1333
+ #
1334
+ # Return array of 2 (hstat, msg)
1335
+ # hstat: 0 if system is up and accessible,
1336
+ # 1 - host is down,
1337
+ # 2 - if path not accessible
1338
+ # negative values if planned system down
1339
+ # msg: None - stat == 0
1340
+ # an unempty string for system down message - stat != 0
1341
+ #
1342
+ def host_down_status(path, host, chkopt = 0, logact = 0):
1343
+
1344
+ shost = strip_host_name(host)
1345
+ hstat = 0
1346
+ rets = [0, None]
1347
+
1348
+ msg = hostname = None
1349
+
1350
+ if PgUtil.pgcmp(shost, LHOST, 1) == 0:
1351
+ if not path or (chkopt and check_local_file(path)): return rets
1352
+ msg = path + ": is not accessible"
1353
+ flag = "L"
1354
+ if re.match(r'^(/{}/|{})'.format(PgLOG.PGLOG['GPFSNAME'], PgLOG.PGLOG['DSSDATA']), path):
1355
+ hstat = 1
1356
+ hostname = PgLOG.PGLOG['GPFSNAME']
1357
+ else:
1358
+ hstat = 2
1359
+
1360
+ elif PgUtil.pgcmp(shost, PgLOG.PGLOG['GPFSNAME'], 1) == 0:
1361
+ if not path or (chkopt and check_local_file(path)): return rets
1362
+ msg = path + ": is not accessible"
1363
+ flag = "L"
1364
+ hstat = 1
1365
+ hostname = PgLOG.PGLOG['GPFSNAME']
1366
+ elif PgUtil.pgcmp(shost, BHOST, 1) == 0:
1367
+ if path:
1368
+ hstat = 2
1369
+ else:
1370
+ hstat = 1
1371
+ path = DPATHS['B']
1372
+
1373
+ if chkopt and check_backup_file(path, QPOINTS['B']): return rets
1374
+ hostname = BHOST
1375
+ msg = "{}-{}: is not accessible".format(hostname, path)
1376
+ flag = "B"
1377
+ elif PgUtil.pgcmp(shost, DHOST, 1) == 0:
1378
+ if path:
1379
+ hstat = 2
1380
+ else:
1381
+ hstat = 1
1382
+ path = DPATHS['B']
1383
+
1384
+ if chkopt and check_backup_file(path, QPOINTS['D']): return rets
1385
+ hostname = DHOST
1386
+ msg = "{}-{}: is not accessible".format(hostname, path)
1387
+ flag = "D"
1388
+ elif PgUtil.pgcmp(shost, OHOST, 1) == 0:
1389
+ if path:
1390
+ hstat = 2
1391
+ else:
1392
+ hstat = 1
1393
+ path = PgLOG.PGLOG['OBJCTBKT']
1394
+
1395
+ if chkopt and check_object_file(path): return rets
1396
+
1397
+ hostname = OHOST
1398
+ msg = "{}-{}: is not accessible".format(hostname, path)
1399
+ flag = "O"
1400
+ elif PgUtil.pgcmp(shost, PgLOG.PGLOG['PGBATCH'], 1):
1401
+ if path and chkopt and check_remote_file(path, host): return rets
1402
+ estat = ping_remote_host(host)
1403
+ if estat:
1404
+ hstat = 1
1405
+ hostname = host
1406
+ else:
1407
+ if not path: return rets
1408
+ if re.match(r'^/{}/'.format(PgLOG.PGLOG['GPFSNAME']), path):
1409
+ hstat = 1
1410
+ hostname = PgLOG.PGLOG['GPFSNAME']
1411
+ else:
1412
+ hstat = 2
1413
+ hostname = host
1414
+
1415
+ flag = "R"
1416
+ msg = "{}-{}: is not accessible".format(host, path)
1417
+ elif PgLOG.get_host(1) == PgLOG.PGLOG['PGBATCH']: # local host is a batch node
1418
+ if not path or (chkopt and check_local_file(path)): return rets
1419
+ msg = path + ": is not accessible"
1420
+ flag = "L"
1421
+ if re.match(r'^(/{}/|{})'.format(PgLOG.PGLOG['GPFSNAME'], PgLOG.PGLOG['DSSDATA']), path):
1422
+ hstat = 1
1423
+ hostname = PgLOG.PGLOG['GPFSNAME']
1424
+ else:
1425
+ hstat = 2
1426
+
1427
+ msg += " at the moment Checked on " + PgLOG.PGLOG['HOSTNAME']
1428
+
1429
+ if hostname:
1430
+ estat = PgDBI.system_down_message(hostname, path, 0, logact)
1431
+ if estat:
1432
+ hstat = -hstat
1433
+ msg += "\n" + estat
1434
+
1435
+ if logact and (chkopt or hstat < 0): errlog(msg, flag, 1, logact)
1436
+
1437
+ return (hstat, msg)
1438
+
1439
+ #
1440
+ # Check if given path on a specified host is down or not
1441
+ #
1442
+ # path: path name to be checked
1443
+ # host: host name the file on, default to LHOST
1444
+ #
1445
+ # Return errmsg if not accessible and None otherwise
1446
+ #
1447
+ def check_host_down(path, host, logact = 0):
1448
+
1449
+ (hstat, msg) = host_down_status(path, host, 1, logact)
1450
+
1451
+ return msg if hstat else None
1452
+
1453
+ #
1454
+ # Check if given service name is accessible from a specified host
1455
+ #
1456
+ # sname: service name to be checked
1457
+ # fhost: from host name to connect to service, default to LHOST
1458
+ #
1459
+ # reset the service flag to A or I accordingly
1460
+ #
1461
+ # Return 0 if accessible, dsservice.sindex if not, and -1 if can not be checked
1462
+ #
1463
+ def check_service_accessibilty(sname, fhost = None, logact = 0):
1464
+
1465
+ if not fhost: fhost = PgLOG.PGLOG['HOSTNAME']
1466
+ pgrec = PgDBI.pgget("dsservice", "*", "service = '{}' AND hostname = '{}'".format(sname, fhost), logact)
1467
+ if not pgrec:
1468
+ PgLOG.pglog("dsservice: Access {} from {} is not defined in GDEX Configuration".format(sname, fhost), logact)
1469
+ return -1
1470
+
1471
+ path = sname if (pgrec['flag'] == "H" or pgrec['flag'] == "G") else None
1472
+ (hstat, msg) = host_down_status(path, fhost, 1, logact)
1473
+
1474
+ return msg if hstat else None
1475
+
1476
+ #
1477
+ # check if this host is a local host for given host name
1478
+ #
1479
+ def is_local_host(host):
1480
+
1481
+ host = strip_host_name(host)
1482
+ if host == LHOST or PgLOG.valid_batch_host(host): return 1
1483
+
1484
+ return 0
1485
+
1486
+ #
1487
+ # check and return action string on a node other than local one
1488
+ #
1489
+ def local_host_action(host, action, info, logact = 0):
1490
+
1491
+ if is_local_host(host): return 1
1492
+ if not logact: return 0
1493
+
1494
+ if host == "partition":
1495
+ msg = "for individual partition"
1496
+ elif host == "rda_config":
1497
+ msg = "via https://gdex.ucar.edu/rda_pg_config"
1498
+ elif host in PgLOG.BCHCMDS:
1499
+ msg = "on a {} Node".format(host)
1500
+ else:
1501
+ msg = "on " + host
1502
+
1503
+ return PgLOG.pglog("{}: Cannot {}, try {}".format(info, action, msg), logact)
1504
+
1505
+ #
1506
+ # ping a given remote host name
1507
+ #
1508
+ # return None if system is up error messge if not
1509
+ #
1510
+ def ping_remote_host(host):
1511
+
1512
+ while True:
1513
+ buf = PgLOG.pgsystem("ping -c 3 " + host, PgLOG.LOGWRN, CMDRET)
1514
+ if buf:
1515
+ ms = re.search(r'3 packets transmitted, (\d)', buf)
1516
+ if ms:
1517
+ if int(ms.group(1)) > 0:
1518
+ return None
1519
+ else:
1520
+ return host + " seems down not accessible"
1521
+ if PgLOG.PGLOG['SYSERR']:
1522
+ if PgLOG.PGLOG['SYSERR'].find("ping: unknown host") > -1 and host.find('.') > -1:
1523
+ host += ".ucar.edu"
1524
+ continue
1525
+ return PgLOG.PGLOG['SYSERR']
1526
+ else:
1527
+ return "Cannot ping " + host
1528
+
1529
+ #
1530
+ # compare given two host names, return 1 if same and 0 otherwise
1531
+ #
1532
+ def same_hosts(host1, host2):
1533
+
1534
+ host1 = strip_host_name(host1)
1535
+ host2 = strip_host_name(host2)
1536
+
1537
+ return (1 if PgUtil.pgcmp(host1, host2, 1) == 0 else 0)
1538
+
1539
+ #
1540
+ # strip and identify the proper host name
1541
+ #
1542
+ def strip_host_name(host):
1543
+
1544
+ if not host: return LHOST
1545
+
1546
+ ms = re.match(r'^([^\.]+)\.', host)
1547
+ if ms: host = ms.group(1)
1548
+ if PgUtil.pgcmp(host, PgLOG.PGLOG['HOSTNAME'], 1) == 0:
1549
+ return LHOST
1550
+ else:
1551
+ return host
1552
+
1553
+ #
1554
+ # Check a file stuatus info on a given host name (including local host) no background process for checking
1555
+ #
1556
+ # file: file name to be checked
1557
+ # host: host name the file on, default to LHOST
1558
+ # opt: 0 - get data size only (fname, data_size, isfile), fname is the file basename
1559
+ # 1 - get date/time modified (date_modified, time_modfied)
1560
+ # 2 - get file owner's login name (logname)
1561
+ # 4 - get permission mode in 3 octal digits (mode)
1562
+ # 8 - get group name (group)
1563
+ # 16 - get week day 0-Sunday, 1-Monday (week_day)
1564
+ # 32 - get checksum (checksum), work for local file only
1565
+ #
1566
+ # Return a dict of file info, or None if file not exists
1567
+ #
1568
+ def check_gdex_file(file, host = LHOST, opt = 0, logact = 0):
1569
+
1570
+ shost = strip_host_name(host)
1571
+
1572
+ if PgUtil.pgcmp(shost, LHOST, 1) == 0:
1573
+ return check_local_file(file, opt, logact)
1574
+ elif PgUtil.pgcmp(shost, OHOST, 1) == 0:
1575
+ return check_object_file(file, None, opt, logact)
1576
+ elif PgUtil.pgcmp(shost, BHOST, 1) == 0:
1577
+ return check_backup_file(file, QPOINTS['B'], opt, logact)
1578
+ elif PgUtil.pgcmp(shost, DHOST, 1) == 0:
1579
+ return check_backup_file(file, QPOINTS['D'], opt, logact)
1580
+ else:
1581
+ return check_remote_file(file, host, opt, logact)
1582
+
1583
+ check_rda_file = check_gdex_file
1584
+
1585
+ #
1586
+ # wrapper to check_local_file() and check_globus_file() to check info for a file
1587
+ # on local or remote Globus endpoints
1588
+ #
1589
+ def check_globus_file(file, endpoint = None, opt = 0, logact = 0):
1590
+
1591
+ if not endpoint: endpoint = PgLOG.PGLOG['BACKUPEP']
1592
+ if endpoint == 'gdex-glade':
1593
+ if re.match(r'^/(data|decsdata)/', file): file = PgLOG.PGLOG['DSSDATA'] + file
1594
+ return check_local_file(file, opt, logact)
1595
+ else:
1596
+ return check_backup_file(file, endpoint, opt, logact)
1597
+
1598
+ #
1599
+ # check and get local file status information
1600
+ #
1601
+ # file: local File name
1602
+ # opt: 0 - get data size only (fname, data_size, isfile), fname is the file basename
1603
+ # 1 - get date/time modified (date_modified, time_modfied)
1604
+ # 2 - get file owner's login name (logname)
1605
+ # 4 - get permission mode in 3 octal digits (mode)
1606
+ # 8 - get group name (group)
1607
+ # 16 - get week day 0-Sunday, 1-Monday (week_day)
1608
+ # 32 - get checksum (checksum)
1609
+ # 64 - remove file too small
1610
+ # 128 - check twice for missing file
1611
+ #
1612
+ # Return: a dict of file info, or None if not exists
1613
+ #
1614
+ def check_local_file(file, opt = 0, logact = 0):
1615
+
1616
+ ret = None
1617
+ if not file: return ret
1618
+ loop = 0
1619
+ while loop < 2:
1620
+ if op.exists(file):
1621
+ try:
1622
+ fstat = os.stat(file)
1623
+ ret = local_file_stat(file, fstat, opt, logact)
1624
+ break
1625
+ except Exception as e:
1626
+ errmsg = "{}: {}".format(file, str(e))
1627
+ (hstat, msg) = host_down_status(file, LHOST, 0, logact)
1628
+ if hstat: errmsg += "\n" + msg
1629
+ errlog(errmsg, 'L', loop, logact)
1630
+ else:
1631
+ if loop > 0 or opt&128 == 0: break
1632
+ PgLOG.pglog(file + ": check it again in a moment", PgLOG.LOGWRN)
1633
+ time.sleep(6)
1634
+ loop += 1
1635
+
1636
+ if loop > 1: return PgLOG.FAILURE
1637
+ ECNTS['L'] = 0 # reset error count
1638
+ return ret
1639
+
1640
+ #
1641
+ # local function to get local file stat
1642
+ #
1643
+ def local_file_stat(file, fstat, opt, logact):
1644
+
1645
+ if not fstat:
1646
+ errlog(file + ": Error check file stat", 'L', 1, logact)
1647
+ return None
1648
+
1649
+ info = {}
1650
+ info['isfile'] = (1 if stat.S_ISREG(fstat.st_mode) else 0)
1651
+ if info['isfile'] == 0 and logact&PgLOG.PFSIZE:
1652
+ info['data_size'] = local_path_size(file)
1653
+ else:
1654
+ info['data_size'] = fstat.st_size
1655
+ info['fname'] = op.basename(file)
1656
+ if not opt: return info
1657
+ if opt&64 and info['isfile'] and info['data_size'] < PgLOG.PGLOG['MINSIZE']:
1658
+ PgLOG.pglog("{}: Remove {} file".format(file, ("Small({}B)".format(info['data_size']) if info['data_size'] else "Empty")), logact&~PgLOG.EXITLG)
1659
+ delete_local_file(file, logact)
1660
+ return None
1661
+
1662
+ if opt&17:
1663
+ mdate, mtime = PgUtil.get_date_time(fstat.st_mtime)
1664
+ if opt&1:
1665
+ info['date_modified'] = mdate
1666
+ info['time_modified'] = mtime
1667
+ cdate, ctime = PgUtil.get_date_time(fstat.st_ctime)
1668
+ info['date_created'] = cdate
1669
+ info['time_created'] = ctime
1670
+ if opt&16: info['week_day'] = PgUtil.get_weekday(mdate)
1671
+
1672
+ if opt&2:
1673
+ info['uid'] = fstat.st_uid
1674
+ info['logname'] = pwd.getpwuid(info['uid']).pw_name
1675
+ if opt&4: info['mode'] = stat.S_IMODE(fstat.st_mode)
1676
+ if opt&8:
1677
+ info['gid'] = fstat.st_gid
1678
+ info['group'] = grp.getgrgid(info['gid']).gr_name
1679
+ if opt&32 and info['isfile']: info['checksum'] = get_md5sum(file, 0, logact)
1680
+
1681
+ return info
1682
+
1683
+ #
1684
+ # get total size of files under a given path
1685
+ #
1686
+ def local_path_size(pname):
1687
+
1688
+ if not pname: pname = '.' # To get size of current directory
1689
+ size = 0
1690
+ for path, dirs, files in os.walk(pname):
1691
+ for f in files:
1692
+ size += os.path.getsize(os.path.join(path, f))
1693
+ return size
1694
+
1695
+ #
1696
+ # check and get file status information of a file on remote host
1697
+ #
1698
+ # file: remote File name
1699
+ # opt: 0 - get data size only (fname, data_size, isfile), fname is the file basename
1700
+ # 1 - get date/time modified (date_modified, time_modfied)
1701
+ # 2 - file owner's login name (logname), assumed 'gdexdata'
1702
+ # 4 - get permission mode in 3 octal digits (mode)
1703
+ # 8 - get group name (group), assumed 'dss'
1704
+ # 16 - get week day 0-Sunday, 1-Monday (week_day)
1705
+ #
1706
+ # Return: a dict of file info, or None if not exists
1707
+ #
1708
+ def check_remote_file(file, host, opt = 0, logact = 0):
1709
+
1710
+ if not file: return None
1711
+ ms = re.match(r'^(.+)/$', file)
1712
+ if ms: file = ms.group(1) # remove ending '/' in case
1713
+ cmd = "{} {}".format(PgLOG.get_sync_command(host), file)
1714
+ loop = 0
1715
+ while loop < 2:
1716
+ buf = PgLOG.pgsystem(cmd, PgLOG.LOGWRN, CMDRET)
1717
+ if buf or not PgLOG.PGLOG['SYSERR'] or PgLOG.PGLOG['SYSERR'].find(PgLOG.PGLOG['MISSFILE']) > -1: break
1718
+ errmsg = PgLOG.PGLOG['SYSERR']
1719
+ (hstat, msg) = host_down_status(file, host, 0, logact)
1720
+ if hstat: errmsg += "\n" + msg
1721
+ errlog(errmsg, 'R', loop, logact)
1722
+ loop += 1
1723
+
1724
+ if loop > 1: return PgLOG.FAILURE
1725
+ ECNTS['R'] = 0 # reset error count
1726
+ if buf:
1727
+ for line in re.split(r'\n', buf):
1728
+ info = remote_file_stat(line, opt)
1729
+ if info: return info
1730
+
1731
+ return None
1732
+
1733
+ #
1734
+ # local function to get remote file stat
1735
+ #
1736
+ def remote_file_stat(line, opt):
1737
+
1738
+ info = {}
1739
+ items = re.split(r'\s+', line)
1740
+ if len(items) < 5 or items[4] == '.': return None
1741
+ ms = re.match(r'^([d\-])([\w\-]{9})$', items[0])
1742
+ info['isfile'] = (1 if ms and ms.group(1) == "-" else 0)
1743
+ if opt&4: info['mode'] = get_file_mode(ms.group(2))
1744
+ fsize = items[1]
1745
+ if fsize.find(',') > -1: fsize = re.sub(r',', '', fsize)
1746
+ info['data_size'] = int(fsize)
1747
+ info['fname'] = op.basename(items[4])
1748
+ if not opt: return info
1749
+ if opt&17:
1750
+ mdate = PgUtil.format_date(items[2], "YYYY-MM-DD", "YYYY/MM/DD")
1751
+ mtime = items[3]
1752
+ if PgLOG.PGLOG['GMTZ']: (mdate, mtime) = PgUtil.addhour(mdate, mtime, PgLOG.PGLOG['GMTZ'])
1753
+ if opt&1:
1754
+ info['date_modified'] = mdate
1755
+ info['time_modified'] = mtime
1756
+ if opt&16: info['week_day'] = PgUtil.get_weekday(mdate)
1757
+
1758
+ if opt&2: info['logname'] = "gdexdata"
1759
+ if opt&8: info['group'] = PgLOG.PGLOG['GDEXGRP']
1760
+
1761
+ return info
1762
+
1763
+ #
1764
+ # check and get object file status information
1765
+ #
1766
+ # file: object store File key name
1767
+ # opt: 0 - get data size only (fname, data_size, isfile), fname is the file basename
1768
+ # 1 - get date/time modified (date_modified, time_modfied)
1769
+ # 2 - get file owner's login name (logname)
1770
+ # 4 - get metadata hash
1771
+ # 8 - get group name (group)
1772
+ # 16 - get week day 0-Sunday, 1-Monday (week_day)
1773
+ # 32 - get checksum (checksum)
1774
+ # 64 - check once, no rechecking
1775
+ #
1776
+ # Return a dict of file info, or None if file not exists
1777
+ #
1778
+ def check_object_file(file, bucket = None, opt = 0, logact = 0):
1779
+
1780
+ if not bucket: bucket = PgLOG.PGLOG['OBJCTBKT']
1781
+ ret = None
1782
+ if not file: return ret
1783
+ cmd = "{} lo {} -b {}".format(OBJCTCMD, file, bucket)
1784
+ ucmd = "{} gm -k {} -b {}".format(OBJCTCMD, file, bucket) if opt&14 else None
1785
+ loop = 0
1786
+ while loop < 2:
1787
+ buf = PgLOG.pgsystem(cmd, PgLOG.LOGWRN, CMDRET)
1788
+ if buf:
1789
+ if re.match(r'^\[\]', buf): break
1790
+ if re.match(r'^\[\{', buf):
1791
+ ary = json.loads(buf)
1792
+ cnt = len(ary)
1793
+ if cnt > 1: return PgLOG.pglog("{}-{}: {} records returned\n{}".format(bucket, file, cnt, buf), logact|PgLOG.ERRLOG)
1794
+ hash = ary[0]
1795
+ uhash = None
1796
+ if ucmd:
1797
+ ubuf = PgLOG.pgsystem(ucmd, PgLOG.LOGWRN, CMDRET)
1798
+ if ubuf and re.match(r'^\{', ubuf): uhash = json.loads(ubuf)
1799
+ ret = object_file_stat(hash, uhash, opt)
1800
+ break
1801
+ if opt&64: return PgLOG.FAILURE
1802
+ errmsg = "Error Execute: {}\n{}".format(cmd, PgLOG.PGLOG['SYSERR'])
1803
+ (hstat, msg) = host_down_status(bucket, OHOST, 0, logact)
1804
+ if hstat: errmsg += "\n" + msg
1805
+ errlog(errmsg, 'O', loop, logact)
1806
+ loop += 1
1807
+
1808
+ if loop > 1: return PgLOG.FAILURE
1809
+ ECNTS['O'] = 0 # reset error count
1810
+ return ret
1811
+
1812
+ #
1813
+ # check an object path status information
1814
+ #
1815
+ # path: object store path name
1816
+ #
1817
+ # Return count of object key names, 0 if not file exists; None if error checking
1818
+ #
1819
+ def check_object_path(path, bucket = None, logact = 0):
1820
+
1821
+ if not bucket: bucket = PgLOG.PGLOG['OBJCTBKT']
1822
+ ret = None
1823
+ if not path: return ret
1824
+ cmd = "{} lo {} -ls -b {}".format(OBJCTCMD, path, bucket)
1825
+ loop = 0
1826
+ while loop < 2:
1827
+ buf = PgLOG.pgsystem(cmd, PgLOG.LOGWRN, CMDRET)
1828
+ if buf:
1829
+ ary = json.loads(buf)
1830
+ return len(ary)
1831
+ errmsg = "Error Execute: {}\n{}".format(cmd, PgLOG.PGLOG['SYSERR'])
1832
+ (hstat, msg) = host_down_status(bucket, OHOST, 0, logact)
1833
+ if hstat: errmsg += "\n" + msg
1834
+ errlog(errmsg, 'O', loop, logact)
1835
+ loop += 1
1836
+
1837
+ ECNTS['O'] = 0 # reset error count
1838
+ return ret
1839
+
1840
+ #
1841
+ # object store function to get file stat
1842
+ #
1843
+ def object_file_stat(hash, uhash, opt):
1844
+
1845
+ info = {'isfile' : 1, 'data_size' : int(hash['Size']), 'fname' : op.basename(hash['Key'])}
1846
+ if not opt: return info
1847
+ if opt&17:
1848
+ ms = re.match(r'^(\d+-\d+-\d+)\s+(\d+:\d+:\d+)', hash['LastModified'])
1849
+ if ms:
1850
+ (mdate, mtime) = ms.groups()
1851
+ if PgLOG.PGLOG['GMTZ']: (mdate, mtime) = PgUtil.addhour(mdate, mtime, PgLOG.PGLOG['GMTZ'])
1852
+ if opt&1:
1853
+ info['date_modified'] = mdate
1854
+ info['time_modified'] = mtime
1855
+ if opt&16: info['week_day'] = PgUtil.get_weekday(mdate)
1856
+ if opt&32:
1857
+ ms = re.match(r'"(.+)"', hash['ETag'])
1858
+ if ms: info['checksum'] = ms.group(1)
1859
+ if uhash:
1860
+ if opt&2: info['logname'] = uhash['user']
1861
+ if opt&4: info['meta'] = uhash
1862
+ if opt&8: info['group'] = uhash['group']
1863
+
1864
+ return info
1865
+
1866
+ #
1867
+ # check and get backup file status information
1868
+ #
1869
+ # file: backup File key name
1870
+ # opt: 0 - get data size only (fname, data_size, isfile), fname is the file basename
1871
+ # 1 - get date/time modified (date_modified, time_modfied)
1872
+ # 2 - get file owner's login name (logname)
1873
+ # 4 - get metadata hash
1874
+ # 8 - get group name (group)
1875
+ # 16 - get week day 0-Sunday, 1-Monday (week_day)
1876
+ # 64 - rechecking
1877
+ #
1878
+ # Return a dict of file info, or None if file not exists
1879
+ #
1880
+ def check_backup_file(file, endpoint = None, opt = 0, logact = 0):
1881
+
1882
+ ret = None
1883
+ if not file: return ret
1884
+ if not endpoint: endpoint = PgLOG.PGLOG['BACKUPEP']
1885
+ bdir = op.dirname(file)
1886
+ bfile = op.basename(file)
1887
+ cmd = f"{BACKCMD} ls -ep {endpoint} -p {bdir} --filter {bfile}"
1888
+ ccnt = loop = 0
1889
+ while loop < 2:
1890
+ buf = PgLOG.pgsystem(cmd, logact, CMDRET)
1891
+ syserr = PgLOG.PGLOG['SYSERR']
1892
+ if buf:
1893
+ getstat = 0
1894
+ for line in re.split(r'\n', buf):
1895
+ if re.match(r'^(User|-+)\s*\|', line):
1896
+ getstat += 1
1897
+ elif getstat > 1:
1898
+ ret = backup_file_stat(line, opt)
1899
+ if ret: break
1900
+ if ret: break
1901
+ if loop or opt&64 == 0: return ret
1902
+ time.sleep(PgSIG.PGSIG['ETIME'])
1903
+ elif syserr:
1904
+ if syserr.find("Directory '{}' not found on endpoint".format(bdir)) > -1:
1905
+ if loop or opt&64 == 0: return ret
1906
+ time.sleep(PgSIG.PGSIG['ETIME'])
1907
+ elif ccnt < 2 and syserr.find("The connection to the server was broken") > -1:
1908
+ time.sleep(PgSIG.PGSIG['ETIME'])
1909
+ ccnt += 1
1910
+ continue
1911
+ else:
1912
+ if opt&64 == 0: return PgLOG.FAILURE
1913
+ errmsg = "Error Execute: {}\n{}".format(cmd, syserr)
1914
+ (hstat, msg) = host_down_status('', QHOSTS[endpoint], 0, logact)
1915
+ if hstat: errmsg += "\n" + msg
1916
+ errlog(errmsg, 'B', loop, logact)
1917
+ loop += 1
1918
+
1919
+ if ret: ECNTS['B'] = 0 # reset error count
1920
+ return ret
1921
+
1922
+ #
1923
+ # backup store function to get file stat
1924
+ #
1925
+ def backup_file_stat(line, opt):
1926
+
1927
+ info = {}
1928
+ items = re.split(r'[\s\|]+', line)
1929
+ if len(items) < 8: return None
1930
+ info['isfile'] = (1 if items[6] == 'file' else 0)
1931
+ info['data_size'] = int(items[3])
1932
+ info['fname'] = items[7]
1933
+ if not opt: return info
1934
+ if opt&17:
1935
+ mdate = items[4]
1936
+ mtime = items[5]
1937
+ ms = re.match(r'^(\d+:\d+:\d+)', mtime)
1938
+ if ms: mtime = ms.group(1)
1939
+ if PgLOG.PGLOG['GMTZ']: (mdate, mtime) = PgUtil.addhour(mdate, mtime, PgLOG.PGLOG['GMTZ'])
1940
+ if opt&1:
1941
+ info['date_modified'] = mdate
1942
+ info['time_modified'] = mtime
1943
+ if opt&16: info['week_day'] = PgUtil.get_weekday(mdate)
1944
+ if opt&2: info['logname'] = items[0]
1945
+ if opt&4: info['mode'] = get_file_mode(items[2])
1946
+ if opt&8: info['group'] = items[1]
1947
+
1948
+ return info
1949
+
1950
+ #
1951
+ # check and get a file status information inside a tar file
1952
+ #
1953
+ # file: File name to be checked
1954
+ # tfile: the tar file name
1955
+ # opt: 0 - get data size only (fname, data_size, isfile), fname is the file basename
1956
+ # 1 - get date/time modified (date_modified, time_modfied)
1957
+ # 2 - get file owner's login name (logname)
1958
+ # 4 - get permission mode in 3 octal digits (mode)
1959
+ # 8 - get group name (group)
1960
+ # 16 - get week day 0-Sunday, 1-Monday (week_day)
1961
+ #
1962
+ # Return a dict of file info, or None if file not exists
1963
+ #
1964
+ def check_tar_file(file, tfile, opt = 0, logact = 0):
1965
+
1966
+ ret = None
1967
+ if not (file and tfile): return ret
1968
+
1969
+ for loop in range(2):
1970
+ buf = PgLOG.pgsystem("tar -tvf {} {}".format(tfile, file), PgLOG.LOGWRN, CMDRET)
1971
+ if buf or not PgLOG.PGLOG['SYSERR'] or PgLOG.PGLOG['SYSERR'].find('Not found in archive') > -1: break
1972
+
1973
+ errmsg = PgLOG.PGLOG['SYSERR']
1974
+ (hstat, msg) = host_down_status(tfile, LHOST, 0, logact)
1975
+ errlog(errmsg, 'L', loop, logact)
1976
+
1977
+ if loop > 0: return PgLOG.FAILURE
1978
+ if buf:
1979
+ for line in re.split(r'\n', buf):
1980
+ ret = tar_file_stat(line, opt)
1981
+ if ret: break
1982
+ ECNTS['L'] = 0 # reset error count
1983
+
1984
+ return ret
1985
+
1986
+ #
1987
+ # local function to get file stat in a tar file
1988
+ #
1989
+ def tar_file_stat(line, opt):
1990
+
1991
+ items = re.split(r'\s+', line)
1992
+ if len(items) < 6: return None
1993
+ ms = re.match(r'^([d\-])([\w\-]{9})$', items[0])
1994
+ if not ms: return None
1995
+ info = {}
1996
+ info['isfile'] = (1 if ms and ms.group(1) == "-" else 0)
1997
+ info['data_size'] = int(items[2])
1998
+ info['fname'] = op.basename(items[5])
1999
+ if not opt: return info
2000
+ if opt&4: info['mode'] = get_file_mode(ms.group(2))
2001
+ if opt&17:
2002
+ mdate = items[3]
2003
+ mtime = items[4]
2004
+ if PgLOG.PGLOG['GMTZ']: (mdate, mtime) = PgUtil.addhour(mdate, mtime, PgLOG.PGLOG['GMTZ'])
2005
+ if opt&1:
2006
+ info['date_modified'] = mdate
2007
+ info['time_modified'] = mtime
2008
+ if opt&16: info['week_day'] = PgUtil.get_weekday(mdate)
2009
+
2010
+ if opt&10:
2011
+ ms = re.match(r'^(\w+)/(\w+)', items[1])
2012
+ if ms:
2013
+ if opt&2: info['logname'] = ms.group(1)
2014
+ if opt&8: info['group'] = ms.group(2)
2015
+
2016
+ return info
2017
+
2018
+ #
2019
+ # check and get a file status information on ftp server
2020
+ #
2021
+ # file: File name to be checked
2022
+ # name: login user name
2023
+ # pswd: login password
2024
+ # opt: 0 - get data size only (fname, data_size, isfile), fname is the file basename
2025
+ # 1 - get date/time modified (date_modified, time_modfied)
2026
+ # 2 - get file owner's login name (logname)
2027
+ # 4 - get permission mode in 3 octal digits (mode)
2028
+ # 8 - get group name (group)
2029
+ # 16 - get week day 0-Sunday, 1-Monday (week_day)
2030
+ #
2031
+ # Return a dict of file info, or None if file not exists
2032
+ #
2033
+ def check_ftp_file(file, opt = 0, name = None, pswd = None, logact = 0):
2034
+
2035
+ if not file: return None
2036
+
2037
+ ms = re.match(r'^(.+)/$', file)
2038
+ if ms: file = ms.group(1) # remove ending '/' in case
2039
+ cmd = "ncftpls -l "
2040
+ if name: cmd += "-u {} ".format(name)
2041
+ if pswd: cmd += "-p {} ".format(pswd)
2042
+ fname = op.basename(file)
2043
+
2044
+
2045
+ for loop in range(2):
2046
+ buf = PgLOG.pgsystem(cmd + file, PgLOG.LOGWRN, CMDRET)
2047
+ if buf: break
2048
+ if PgLOG.PGLOG['SYSERR']:
2049
+ errlog(PgLOG.PGLOG['SYSERR'], 'O', loop, logact|PgLOG.LOGERR)
2050
+ if loop == 0: file = op.dirname(file) + '/'
2051
+
2052
+ if loop > 1: return PgLOG.FAILURE
2053
+ for line in re.split(r'\n', buf):
2054
+ if not line or line.find(fname) < 0: continue
2055
+ info = ftp_file_stat(line, opt)
2056
+ if info: return info
2057
+
2058
+ return None
2059
+
2060
+ #
2061
+ # local function to get stat of a file on ftp server
2062
+ #
2063
+ def ftp_file_stat(line, opt):
2064
+
2065
+ items = re.split(r'\s+', line)
2066
+ if len(items) < 9: return None
2067
+ ms = re.match(r'^([d\-])([\w\-]{9})$', items[0])
2068
+ info = {}
2069
+ info['isfile'] = (1 if ms and ms.group(1) == "-" else 0)
2070
+ info['data_size'] = int(items[4])
2071
+ info['fname'] = op.basename(items[8])
2072
+ if not opt: return info
2073
+ if opt&4: info['mode'] = get_file_mode(ms.group(2))
2074
+ if opt&17:
2075
+ dy = int(items[6])
2076
+ mn = PgUtil.get_month(items[5])
2077
+ if re.match(r'^\d+$', items[7]):
2078
+ yr = int(items[7])
2079
+ mtime = "00:00:00"
2080
+ else:
2081
+ mtime = items[7] + ":00"
2082
+ cdate = PgUtil.curdate()
2083
+ ms = re.match(r'^(\d+)-(\d\d)', cdate)
2084
+ if ms:
2085
+ yr = int(ms.group(1))
2086
+ cm = int(ms.group(2)) # current month
2087
+ if cm < mn: yr -= 1 # previous year
2088
+
2089
+ mdate = "{}-{:02}-{:02}".format(yr, mn, dy)
2090
+ if opt&1:
2091
+ info['date_modified'] = mdate
2092
+ info['time_modified'] = mtime
2093
+ if opt&16: info['week_day'] = PgUtil.get_weekday(mdate)
2094
+
2095
+ if opt&2: info['logname'] = items[2]
2096
+ if opt&8: info['group'] = items[3]
2097
+
2098
+ return info
2099
+
2100
+ #
2101
+ # get an array of directories/files under given dir on a given host name (including local host)
2102
+ #
2103
+ # dir: directory name to be listed
2104
+ # host: host name the directory on, default to LHOST
2105
+ # opt: 0 - get data size only (fname, data_size, isfile), fname is the file basename
2106
+ # 1 - get date/time modified (date_modified, time_modfied)
2107
+ # 2 - get file owner's login name (logname)
2108
+ # 4 - get permission mode in 3 octal digits (mode)
2109
+ # 8 - get group name (group)
2110
+ # 16 - get week day 0-Sunday, 1-Monday (week_day)
2111
+ # 32 - get checksum (checksum), work for local file only
2112
+ #
2113
+ # Return: a dict with filenames as keys None if empty directory
2114
+ #
2115
+ def gdex_glob(dir, host, opt = 0, logact = 0):
2116
+
2117
+ shost = strip_host_name(host)
2118
+ if PgUtil.pgcmp(shost, LHOST, 1) == 0:
2119
+ return local_glob(dir, opt, logact)
2120
+ elif PgUtil.pgcmp(shost, OHOST, 1) == 0:
2121
+ return object_glob(dir, None, opt, logact)
2122
+ elif PgUtil.pgcmp(shost, BHOST, 1) == 0:
2123
+ return backup_glob(dir, None, opt, logact)
2124
+ else:
2125
+ return remote_glob(dir, host, opt, logact)
2126
+
2127
+ rda_glob = gdex_glob
2128
+
2129
+ #
2130
+ # get an array of directories/files under given dir on local host
2131
+ #
2132
+ # dir: directory name to be listed
2133
+ # opt: 0 - get data size only (fname, data_size, isfile), fname is the file basename
2134
+ # 1 - get date/time modified (date_modified, time_modfied)
2135
+ # 2 - get file owner's login name (logname)
2136
+ # 4 - get permission mode in 3 octal digits (mode)
2137
+ # 8 - get group name (group)
2138
+ # 16 - get week day 0-Sunday, 1-Monday (week_day)
2139
+ # 32 - get checksum (checksum), work for local file only
2140
+ #
2141
+ # 256 - get files only and ignore directories
2142
+ #
2143
+ # Return: dict with filenames as keys or None if empty directory
2144
+ #
2145
+
2146
+ def local_glob(dir, opt = 0, logact = 0):
2147
+
2148
+ flist = {}
2149
+ if not re.search(r'[*?]', dir):
2150
+ if op.exists(dir):
2151
+ dir = PgLOG.join_paths(dir, "*")
2152
+ else:
2153
+ dir += "*"
2154
+
2155
+ for file in glob.glob(dir):
2156
+ info = check_local_file(file, opt, logact)
2157
+ if info and (info['isfile'] or not 256&opt): flist[file] = info
2158
+
2159
+ return flist
2160
+
2161
+ #
2162
+ # check and get file status information of a file on remote host
2163
+ #
2164
+ # dir: remote directory name
2165
+ # host: host name the directory on, default to LHOST
2166
+ # opt: 0 - get data size only (fname, data_size, isfile), fname is the file basename
2167
+ # 1 - get date/time modified (date_modified, time_modfied)
2168
+ # 2 - file owner's login name (logname), assumed 'gdexdata'
2169
+ # 4 - get permission mode in 3 octal digits (mode)
2170
+ # 8 - get group name (group), assumed 'dss'
2171
+ # 16 - get week day 0-Sunday, 1-Monday (week_day)
2172
+ #
2173
+ # Return: dict with filenames as keys or None if empty directory
2174
+ #
2175
+ def remote_glob(dir, host, opt = 0, logact = 0):
2176
+
2177
+ flist = {}
2178
+ if not re.search(r'/$', dir): dir += '/'
2179
+ buf = PgLOG.pgsystem(PgLOG.get_sync_command(host) + " dir", PgLOG.LOGWRN, CMDRET)
2180
+ if not buf:
2181
+ if PgLOG.PGLOG['SYSERR'] and PgLOG.PGLOG['SYSERR'].find(PgLOG.PGLOG['MISSFILE']) < 0:
2182
+ errlog("{}-{}: Error list directory\n{}".format(host, dir, PgLOG.PGLOG['SYSERR']), 'R', 1, logact)
2183
+ return flist
2184
+
2185
+ for line in re.split(r'\n', buf):
2186
+ info = remote_file_stat(line, opt)
2187
+ if info: flist[dir + info['fname']] = info
2188
+
2189
+ return flist
2190
+
2191
+ #
2192
+ # check and get muiltiple object store file status information
2193
+ #
2194
+ # dir: object directory name
2195
+ # opt: 0 - get data size only (fname, data_size, isfile), fname is the file basename
2196
+ # 1 - get date/time modified (date_modified, time_modfied)
2197
+ # 2 - get file owner's login name (logname)
2198
+ # 8 - get group name (group)
2199
+ # 16 - get week day 0-Sunday, 1-Monday (week_day)
2200
+ #
2201
+ # Return: a dict with filenames as keys, or None if not exists
2202
+ #
2203
+ def object_glob(dir, bucket = None, opt = 0, logact = 0):
2204
+
2205
+ flist = {}
2206
+ if not bucket: bucket = PgLOG.PGLOG['OBJCTBKT']
2207
+ ms = re.match(r'^(.+)/$', dir)
2208
+ if ms: dir = ms.group(1)
2209
+ cmd = "{} lo {} -b {}".format(OBJCTCMD, dir, bucket)
2210
+ ary = err = None
2211
+ buf = PgLOG.pgsystem(cmd, PgLOG.LOGWRN, CMDRET)
2212
+ if buf:
2213
+ if re.match(r'^\[\{', buf):
2214
+ ary = json.loads(buf)
2215
+ elif not re.match(r'^\[\]', buf):
2216
+ err = "{}\n{}".format(PgLOG.PGLOG['SYSERR'], buf)
2217
+ else:
2218
+ err = PgLOG.PGLOG['SYSERR']
2219
+ if not ary:
2220
+ if err:
2221
+ errlog("{}-{}-{}: Error list files\n{}".format(OHOST, bucket, dir, err), 'O', 1, logact)
2222
+ return PgLOG.FAILURE
2223
+ else:
2224
+ return flist
2225
+
2226
+ for hash in ary:
2227
+ uhash = None
2228
+ if opt&10:
2229
+ ucmd = "{} gm -l {} -b {}".format(OBJCTCMD, hash['Key'], bucket)
2230
+ ubuf = PgLOG.pgsystem(ucmd, PgLOG.LOGWRN, CMDRET)
2231
+ if ubuf and re.match(r'^\{.+', ubuf): uhash = json.loads(ubuf)
2232
+ info = object_file_stat(hash, uhash, opt)
2233
+ if info: flist[hash['Key']] = info
2234
+
2235
+ return flist
2236
+
2237
+ #
2238
+ # check and get muiltiple Quasar backup file status information
2239
+ #
2240
+ # dir: backup path
2241
+ # opt: 0 - get data size only (fname, data_size, isfile), fname is the file basename
2242
+ # 1 - get date/time modified (date_modified, time_modfied)
2243
+ # 2 - get file owner's login name (logname)
2244
+ # 8 - get group name (group)
2245
+ # 16 - get week day 0-Sunday, 1-Monday (week_day)
2246
+ # 64 - rechecking
2247
+ #
2248
+ # Return: a dict with filenames as keys, or None if not exists
2249
+ #
2250
+ def backup_glob(dir, endpoint = None, opt = 0, logact = 0):
2251
+
2252
+ if not dir: return None
2253
+ if not endpoint: endpoint = PgLOG.PGLOG['BACKUPEP']
2254
+
2255
+ cmd = f"{BACKCMD} ls -ep {endpoint} -p {dir}"
2256
+ flist = {}
2257
+ for loop in range(2):
2258
+ buf = PgLOG.pgsystem(cmd, logact, CMDRET)
2259
+ syserr = PgLOG.PGLOG['SYSERR']
2260
+ if buf:
2261
+ getstat = 0
2262
+ for line in re.split(r'\n', buf):
2263
+ if re.match(r'^(User|-+)\s*\|', line):
2264
+ getstat += 1
2265
+ elif getstat > 1:
2266
+ info = backup_file_stat(line, opt)
2267
+ if info: flist[info['fname']] = info
2268
+ if flist: break
2269
+ if loop or opt&64 == 0: return None
2270
+ time.sleep(PgSIG.PGSIG['ETIME'])
2271
+ elif syserr:
2272
+ if syserr.find("Directory '{}' not found on endpoint".format(dir)) > -1:
2273
+ if loop or opt&64 == 0: return None
2274
+ time.sleep(PgSIG.PGSIG['ETIME'])
2275
+ else:
2276
+ if opt&64 == 0: return PgLOG.FAILURE
2277
+ errmsg = "Error Execute: {}\n{}".format(cmd, syserr)
2278
+ (hstat, msg) = host_down_status('', QHOSTS[endpoint], 0, logact)
2279
+ if hstat: errmsg += "\n" + msg
2280
+ errlog(errmsg, 'B', loop, logact)
2281
+
2282
+ if flist:
2283
+ ECNTS['B'] = 0 # reset error count
2284
+ return flist
2285
+ else:
2286
+ return PgLOG.FAILURE
2287
+
2288
+ #
2289
+ # local function to get file/directory mode for given permission string, for example, rw-rw-r--
2290
+ #
2291
+ def get_file_mode(perm):
2292
+
2293
+ mbits = [4, 2, 1]
2294
+ mults = [64, 8, 1]
2295
+ plen = len(perm)
2296
+ if plen == 4:
2297
+ perm = perm[1:]
2298
+ plen = 3
2299
+ mode = 0
2300
+ for i in range(3):
2301
+ for j in range(3):
2302
+ pidx = 3*i+j
2303
+ if pidx < plen and perm[pidx] != "-": mode += mults[i]*mbits[j]
2304
+
2305
+ return mode
2306
+
2307
+ #
2308
+ # Evaluate md5 checksum
2309
+ #
2310
+ # file: file name for MD5 checksum
2311
+ # count: defined if filename is a array
2312
+ #
2313
+ # Return: one or a array of 128-bits md5 'fingerprint' None if failed
2314
+ #
2315
+ def get_md5sum(file, count = 0, logact = 0):
2316
+
2317
+ cmd = MD5CMD + ' '
2318
+
2319
+ if count > 0:
2320
+ checksum = [None]*count
2321
+ for i in range(count):
2322
+ if op.isfile(file[i]):
2323
+ chksm = PgLOG.pgsystem(cmd + file[i], logact, 20)
2324
+ if chksm:
2325
+ ms = re.search(r'(\w{32})', chksm)
2326
+ if ms: checksum[i] = ms.group(1)
2327
+ else:
2328
+ checksum = None
2329
+ if op.isfile(file):
2330
+ chksm = PgLOG.pgsystem(cmd + file, logact, 20)
2331
+ if chksm:
2332
+ ms = re.search(r'(\w{32})', chksm)
2333
+ if ms: checksum = ms.group(1)
2334
+
2335
+ return checksum
2336
+
2337
+ #
2338
+ # Evaluate md5 checksums and compare them for two given files
2339
+ #
2340
+ # file1, file2: file names
2341
+ #
2342
+ # Return: 0 if same and 1 if not
2343
+ #
2344
+ def compare_md5sum(file1, file2, logact = 0):
2345
+
2346
+ if op.isdir(file1) or op.isdir(file2):
2347
+ files1 = get_directory_files(file1)
2348
+ fcnt1 = len(files1) if files1 else 0
2349
+ files2 = get_directory_files(file2)
2350
+ fcnt2 = len(files2) if files2 else 0
2351
+ if fcnt1 != fcnt2: return 1
2352
+ chksm1 = get_md5sum(files1, fcnt1, logact)
2353
+ chksm1 = ''.join(chksm1)
2354
+ chksm2 = get_md5sum(files1, fcnt2, logact)
2355
+ chksm2 = ''.join(chksm2)
2356
+ else:
2357
+ chksm1 = get_md5sum(file1, 0, logact)
2358
+ chksm2 = get_md5sum(file2, 0, logact)
2359
+
2360
+ return (0 if (chksm1 and chksm2 and chksm1 == chksm2) else 1)
2361
+
2362
+ #
2363
+ # change local directory to todir, and return odir upon success
2364
+ #
2365
+ def change_local_directory(todir, logact = 0):
2366
+
2367
+ if logact:
2368
+ lact = logact&~(PgLOG.EXITLG|PgLOG.ERRLOG)
2369
+ else:
2370
+ logact = lact = PgLOG.LOGWRN
2371
+ if not op.isdir(todir):
2372
+ if op.isfile(todir): return errlog(todir + ": is file, cannot change directory", 'L', 1, logact)
2373
+ if not make_local_directory(todir, logact): return PgLOG.FAILURE
2374
+
2375
+ odir = PgLOG.PGLOG['CURDIR']
2376
+ if todir == odir:
2377
+ PgLOG.pglog(todir + ": in Directory", lact)
2378
+ return odir
2379
+ try:
2380
+ os.chdir(todir)
2381
+ except Exception as e:
2382
+ return errlog(str(e), 'L', 1, logact)
2383
+ else:
2384
+ if not op.isabs(todir): todir = os.getcwd()
2385
+ PgLOG.PGLOG['CURDIR'] = todir
2386
+ PgLOG.pglog(todir + ": Change to Directory", lact)
2387
+
2388
+ return odir
2389
+
2390
+ #
2391
+ # record the directory for the deleted file
2392
+ # pass in empty dir to turn the recording delete directory on
2393
+ #
2394
+ def record_delete_directory(dir, val):
2395
+
2396
+ global DIRLVLS
2397
+
2398
+ if dir is None:
2399
+ if isinstance(val, int):
2400
+ DIRLVLS = val
2401
+ elif re.match(r'^\d+$'):
2402
+ DIRLVLS = int(val)
2403
+ elif dir and not re.match(r'^(\.|\./|/)$', dir) and dir not in DELDIRS:
2404
+ DELDIRS[dir] = val
2405
+
2406
+ #
2407
+ # remove the recorded delete directory if it is empty
2408
+ #
2409
+ def clean_delete_directory(logact = 0):
2410
+
2411
+ global DIRLVLS, DELDIRS
2412
+
2413
+ if not DIRLVLS: return
2414
+ if logact:
2415
+ lact = logact&~(PgLOG.EXITLG)
2416
+ else:
2417
+ logact = lact = PgLOG.LOGWRN
2418
+ lvl = DIRLVLS
2419
+ DIRLVLS = 0 # set to 0 to stop recording directory
2420
+ while lvl > 0:
2421
+ lvl -= 1
2422
+ dirs = {}
2423
+ for dir in DELDIRS:
2424
+ host = DELDIRS[dir]
2425
+ dinfo = (dir if host == LHOST else "{}-{}".format(host, dir))
2426
+ dstat = gdex_empty_directory(dir, DELDIRS[dir])
2427
+ if dstat == 0:
2428
+ if delete_gdex_file(dir, host, logact):
2429
+ PgLOG.pglog(dinfo + ": Empty directory removed", lact)
2430
+ elif dstat > 0:
2431
+ if dstat == 1 and lvl > 0: PgLOG.pglog(dinfo + ": Directory not empty yet", lact)
2432
+ continue
2433
+
2434
+ if lvl: dirs[op.dirname(dir)] = host
2435
+
2436
+ if not dirs: break
2437
+ DELDIRS = dirs
2438
+
2439
+ DELDIRS = {} # empty cache afterward
2440
+
2441
+ #
2442
+ # remove the empty given directory and its all subdirectories
2443
+ #
2444
+ # return 1 if empty dirctory removed 0 otherwise
2445
+ #
2446
+ def clean_empty_directory(dir, host, logact = 0):
2447
+
2448
+ if not dir: return 0
2449
+
2450
+ dirs = gdex_glob(dir, host)
2451
+ cnt = 0
2452
+ if logact:
2453
+ lact = logact&~PgLOG.EXITLG
2454
+ else:
2455
+ lact = logact = PgLOG.LOGWRN
2456
+
2457
+ if dirs:
2458
+ for name in dirs:
2459
+ cnt += 1
2460
+ if dirs[name]['isfile']: continue
2461
+ cnt -= clean_empty_directory(name, host, logact)
2462
+
2463
+ dinfo = (dir if same_hosts(host, LHOST) else "{}-{}".format(host, dir))
2464
+ if cnt == 0:
2465
+ if delete_gdex_file(dir, host, logact):
2466
+ PgLOG.pglog(dinfo + ": Empty directory removed", lact)
2467
+ return 1
2468
+ else:
2469
+ PgLOG.pglog(dinfo + ": Directory not empty yet", lact)
2470
+
2471
+ return 0
2472
+
2473
+
2474
+ #
2475
+ # check if given directory is empty
2476
+ #
2477
+ # Return: 0 if empty directory, 1 if not empty and -1 if invalid directory
2478
+ #
2479
+ def gdex_empty_directory(dir, host):
2480
+
2481
+ shost = strip_host_name(host)
2482
+
2483
+ if PgUtil.pgcmp(shost, LHOST, 1) == 0:
2484
+ return local_empty_directory(dir)
2485
+ else:
2486
+ return remote_empty_directory(dir, host)
2487
+
2488
+ rda_empty_directory = gdex_empty_directory
2489
+
2490
+ #
2491
+ # return 0 if empty local directory, 1 if not; -1 if cannot remove
2492
+ #
2493
+ def local_empty_directory(dir):
2494
+
2495
+ if not op.isdir(dir): return -1
2496
+ if is_root_directory(dir, 'L'): return 2
2497
+ if not re.search(r'/$', dir): dir += '/'
2498
+ dir += '*'
2499
+ return (1 if glob.glob(dir) else 0)
2500
+
2501
+ #
2502
+ # return 0 if empty remote directory, 1 if not; -1 if cannot remove
2503
+ #
2504
+ def remote_empty_directory(dir, host):
2505
+
2506
+ if is_root_directory(dir, 'R', host): return 2
2507
+ if not re.search(r'/$', dir): dir += '/'
2508
+ buf = PgLOG.pgsystem("{} {}".format(PgLOG.get_sync_command(host), dir), PgLOG.LOGWRN, CMDRET)
2509
+ if not buf: return -1
2510
+
2511
+ for line in re.split(r'\n', buf):
2512
+ if remote_file_stat(line, 0): return 1
2513
+
2514
+ return 0
2515
+
2516
+ #
2517
+ # get sizes of files on a given host
2518
+ #
2519
+ # files: file names to get sizes
2520
+ # host: host name the file on, default to LHOST
2521
+ #
2522
+ # return: array of file sizes size is -1 if file does not exist
2523
+ #
2524
+ def gdex_file_sizes(files, host, logact = 0):
2525
+
2526
+ sizes = []
2527
+ for file in files: sizes.append(gdex_file_size(file, host, 2, logact))
2528
+
2529
+ return sizes
2530
+
2531
+ rda_file_sizes = gdex_file_sizes
2532
+
2533
+ #
2534
+ # get sizes of local files
2535
+ #
2536
+ # files: file names to get sizes
2537
+ #
2538
+ # return: array of file sizes size is -1 if file does not exist
2539
+ #
2540
+ def local_file_sizes(files, logact = 0):
2541
+
2542
+ sizes = []
2543
+ for file in files: sizes.append(local_file_size(file, 6, logact))
2544
+
2545
+ return sizes
2546
+
2547
+ #
2548
+ # check if a file on a given host is empty or too small to be considered valid
2549
+ #
2550
+ # file: file name to be checked
2551
+ # host: host name the file on, default to LHOST
2552
+ # opt: 1 - to remove empty file
2553
+ # 2 - show message for empty file
2554
+ # 4 - show message for non-existing file
2555
+ #
2556
+ # return: file size in unit of byte
2557
+ # 0 - empty file or small file, with size < PgLOG.PGLOG['MINSIZE']
2558
+ # -1 - file not exists
2559
+ # -2 - error check file
2560
+ #
2561
+ def gdex_file_size(file, host, opt = 0, logact = 0):
2562
+
2563
+ info = check_gdex_file(file, host, 0, logact)
2564
+ if info:
2565
+ if info['isfile'] and info['data_size'] < PgLOG.PGLOG['MINSIZE']:
2566
+ if opt:
2567
+ if opt&2: errlog("{}-{}: {} file".format(host, file, ("Too small({}B)".format(info['data_size']) if info['data_size'] > 0 else "Empty")),
2568
+ 'O', 1, logact)
2569
+ if opt&1: delete_gdex_file(file, host, logact)
2570
+ return 0
2571
+ else:
2572
+ return info['data_size'] # if not regular file or not empty
2573
+
2574
+ elif info != None:
2575
+ return -2 # error access
2576
+ else:
2577
+ if opt&4: errlog("{}-{}: {}".format(host, file, PgLOG.PGLOG['MISSFILE']), 'O', 1, logact)
2578
+ return -1 # file not exist
2579
+
2580
+ rda_file_size = gdex_file_size
2581
+
2582
+ #
2583
+ # check if a local file is empty or too small to be considered valid
2584
+ #
2585
+ # file: file name to be checked
2586
+ # opt: 1 - to remove empty file
2587
+ # 2 - show message for empty file
2588
+ # 4 - show message for non-existing file
2589
+ #
2590
+ # return: file size in unit of byte
2591
+ # 0 - empty file or small file, with size < PgLOG.PGLOG['MINSIZE']
2592
+ # -1 - file not exists
2593
+ # -2 - error check file
2594
+ #
2595
+ def local_file_size(file, opt = 0, logact = 0):
2596
+
2597
+ if not op.exists(file):
2598
+ if opt&4: lmsg(file, PgLOG.PGLOG['MISSFILE'], logact)
2599
+ return -1 # file not eixsts
2600
+
2601
+ info = check_local_file(file, 0, logact|PgLOG.PFSIZE)
2602
+ if info:
2603
+ if info['isfile'] and info['data_size'] < PgLOG.PGLOG['MINSIZE']:
2604
+ if opt:
2605
+ if opt&2: lmsg(file, ("Too small({}B)".format(info['data_size']) if info['data_size'] > 0 else "Empty file") , logact)
2606
+ if opt&1: delete_local_file(file, logact)
2607
+ return 0
2608
+ else:
2609
+ return info['data_size'] # if not regular file or not empty
2610
+ elif info != None:
2611
+ return -2 # error check file
2612
+
2613
+ #
2614
+ # compress/uncompress a single local file
2615
+ #
2616
+ # ifile: file name to be compressed/uncompressed
2617
+ # fmt: archive format
2618
+ # act: 0 - uncompress
2619
+ # 1 - compress
2620
+ # 2 - get uncompress file name
2621
+ # 3 - get compress file name
2622
+ # return: array of new file name and archive format if changed otherwise original one
2623
+ #
2624
+ def compress_local_file(ifile, fmt = None, act = 0, logact = 0):
2625
+
2626
+ ms = re.match(r'^(.+)\.({})'.format(CMPSTR), ifile)
2627
+ if ms:
2628
+ ofile = ms.group(1)
2629
+ else:
2630
+ ofile = ifile
2631
+
2632
+ if fmt:
2633
+ if act&1:
2634
+ for ext in PGCMPS:
2635
+ if re.search(r'(^|\.)({})(\.|$)'.format(ext), fmt, re.I):
2636
+ ofile += '.' + ext
2637
+ break
2638
+ else:
2639
+ ms = re.search(r'(^|\.)({})$'.format(CMPSTR), fmt, re.I)
2640
+ if ms: fmt = re.sub(r'{}{}$'.format(ms.group(1), ms.group(2)), '', fmt, 1)
2641
+
2642
+ if act < 2 and ifile != ofile: convert_files(ofile, ifile, 0, logact)
2643
+
2644
+ return (ofile, fmt)
2645
+
2646
+ #
2647
+ # get file archive format from a givn file name; None if not found
2648
+ #
2649
+ def get_file_format(fname):
2650
+
2651
+ ms = re.search(r'\.({})$'.format(TARSTR), fname, re.I)
2652
+ if ms: return PGTARS[ms.group(1)][2]
2653
+
2654
+ ms = re.search(r'\.({})$'.format(CMPSTR), fname, re.I)
2655
+ if ms: return PGCMPS[ms.group(1)][2]
2656
+
2657
+ return None
2658
+
2659
+ #
2660
+ # tar/untar mutliple local file into/from a single tar/tar.gz/tgz/zip file
2661
+ #
2662
+ # tfile: tar file name to be tar/untarred
2663
+ # files: member file names in the tar file
2664
+ # fmt: archive format (defaults to tar file name extension must be defined in PGTARS
2665
+ # act: 0 - untar
2666
+ # 1 - tar
2667
+ # return: PgLOG.SUCCESS upon successful PgLOG.FAILURE otherwise
2668
+ #
2669
+ def tar_local_file(tfile, files, fmt, act, logact = 0):
2670
+
2671
+ if not fmt:
2672
+ ms = re.search(r'\.({})$'.format(TARSTR), tfile, re.I)
2673
+ if ms: fmt = ms.group(1)
2674
+ logact |= PgLOG.ERRLOG
2675
+
2676
+ if not fmt: return PgLOG.pglog(tfile + ": Miss archive format", logact)
2677
+ if fmt not in PGTARS: return PgLOG.pglog(tfile + ": unknown format fmt provided", logact)
2678
+ tarray = PGTARS[fmt]
2679
+
2680
+ if not act: #untar member files
2681
+ cmd = "{} {}".format(tarray[1], tfile)
2682
+ if files: cmd += ' ' + ' '.join(files)
2683
+ else:
2684
+ if not files: return PgLOG.pglog(tfile + ": Miss member file to archive", logact)
2685
+ cmd = "{} {} {}".format(tarray[0], tfile, ' '.join(files))
2686
+
2687
+ return PgLOG.pgsystem(cmd, logact, 7)
2688
+
2689
+ #
2690
+ # get local file archive format by checking extension of given local file name
2691
+ #
2692
+ # file: local file name
2693
+ #
2694
+ def local_archive_format(file):
2695
+
2696
+ ms = re.search(r'\.({})$'.format(CMPSTR), file)
2697
+ if ms:
2698
+ fmt = ms.group(1)
2699
+ if re.search(r'\.tar\.{}$'.format(fmt), file):
2700
+ return "TAR." + fmt.upper()
2701
+ else:
2702
+ return fmt.upper()
2703
+ elif re.search(r'\.tar$', file):
2704
+ return "TAR"
2705
+
2706
+ return ''
2707
+
2708
+ #
2709
+ # local function to show message with full local file path
2710
+ #
2711
+ def lmsg(file, msg, logact = 0):
2712
+
2713
+ if not op.isabs(file): file = PgLOG.join_paths(os.getcwd(), file)
2714
+
2715
+ return errlog("{}: {}".format(file, msg), 'L', 1, logact)
2716
+
2717
+ #
2718
+ # check if given path is executable locally
2719
+ #
2720
+ # return PgLOG.SUCCESS if yes PgLOG.FAILURE if not
2721
+ #
2722
+ def check_local_executable(path, actstr = '', logact = 0):
2723
+
2724
+ if os.access(path, os.W_OK): return PgLOG.SUCCESS
2725
+ if check_local_accessible(path, actstr, logact):
2726
+ if actstr: actstr += '-'
2727
+ errlog("{}{}: Accessible, but Unexecutable on'{}'".format(actstr, path, PgLOG.PGLOG['HOSTNAME']), 'L', 1, logact)
2728
+
2729
+ return PgLOG.FAILURE
2730
+
2731
+
2732
+ #
2733
+ # check if given path is writable locally
2734
+ #
2735
+ # return PgLOG.SUCCESS if yes PgLOG.FAILURE if not
2736
+ #
2737
+ def check_local_writable(path, actstr = '', logact = 0):
2738
+
2739
+ if os.access(path, os.W_OK): return PgLOG.SUCCESS
2740
+ if check_local_accessible(path, actstr, logact):
2741
+ if actstr: actstr += '-'
2742
+ errlog("{}{}: Accessible, but Unwritable on'{}'".format(actstr, path, PgLOG.PGLOG['HOSTNAME']), 'L', 1, logact)
2743
+
2744
+ return PgLOG.FAILURE
2745
+
2746
+ #
2747
+ # check if given path is accessible locally
2748
+ #
2749
+ # return PgLOG.SUCCESS if yes, PgLOG.FAILURE if not
2750
+ #
2751
+ def check_local_accessible(path, actstr = '', logact = 0):
2752
+
2753
+ if os.access(path, os.F_OK): return PgLOG.SUCCESS
2754
+ if actstr: actstr += '-'
2755
+ errlog("{}{}: Unaccessible on '{}'".format(actstr, path, PgLOG.PGLOG['HOSTNAME']), 'L', 1, logact)
2756
+ return PgLOG.FAILURE
2757
+
2758
+ #
2759
+ # check if given webfile under PgLOG.PGLOG['DSSDATA'] is writable
2760
+ #
2761
+ # return PgLOG.SUCCESS if yes PgLOG.FAILURE if not
2762
+ #
2763
+ def check_webfile_writable(action, wfile, logact = 0):
2764
+
2765
+ ms = re.match(r'^({}/\w+)'.format(PgLOG.PGLOG['DSSDATA']), wfile)
2766
+ if ms:
2767
+ return check_local_writable(ms.group(1), "{} {}".format(action, wfile), logact)
2768
+ else:
2769
+ return PgLOG.SUCCESS # do not need check
2770
+
2771
+ #
2772
+ # convert the one file to another via uncompress, move/copy, and/or compress
2773
+ #
2774
+ def convert_files(ofile, ifile, keep = 0, logact = 0):
2775
+
2776
+ if ofile == ifile: return PgLOG.SUCCESS
2777
+ oname = ofile
2778
+ iname = ifile
2779
+
2780
+ if keep: kfile = ifile + ".keep"
2781
+
2782
+ oext = iext = None
2783
+ for ext in PGCMPS:
2784
+ if oext is None:
2785
+ ms = re.match(r'^(.+)\.{}$'.format(ext), ofile)
2786
+ if ms:
2787
+ oname = ms.group(1)
2788
+ oext = ext
2789
+ if iext is None:
2790
+ ms = re.match(r'^(.+)\.{}$'.format(ext), ifile)
2791
+ if ms:
2792
+ iname = ms.group(1)
2793
+ iext = ext
2794
+
2795
+ if iext and oext and oext == iext:
2796
+ oext = iext = None
2797
+ iname = ifile
2798
+ oname = ofile
2799
+
2800
+ if iext: # uncompress
2801
+ if keep:
2802
+ if iext == 'zip':
2803
+ kfile = ifile
2804
+ else:
2805
+ local_copy_local(kfile, ifile, logact)
2806
+
2807
+ if PgLOG.pgsystem("{} {}".format(PGCMPS[iext][1], ifile), logact, 5):
2808
+ if iext == "zip":
2809
+ path = op.dirname(iname)
2810
+ if path and path != '.': move_local_file(iname, op.basename(iname), logact)
2811
+ if not keep: delete_local_file(ifile, logact)
2812
+
2813
+ if oname != iname: # move/copy
2814
+ path = op.dirname(oname)
2815
+ if path and not op.exists(path): make_local_directory(path, logact)
2816
+ if keep and not op.exists(kfile):
2817
+ local_copy_local(oname, iname, logact)
2818
+ kfile = iname
2819
+ else:
2820
+ move_local_file(oname, iname, logact)
2821
+
2822
+ if oext: # compress
2823
+ if keep and not op.exists(kfile):
2824
+ if oext == "zip":
2825
+ kfile = oname
2826
+ else:
2827
+ local_copy_local(kfile, oname, logact)
2828
+
2829
+ if oext == "zip":
2830
+ path = op.dirname(oname)
2831
+ if path:
2832
+ if path != '.': path = change_local_directory(path, logact)
2833
+ bname = op.basename(oname)
2834
+ PgLOG.pgsystem("{} {}.zip {}".format(PGCMPS[oext][0], bname, bname), logact, 5)
2835
+ if path != '.': change_local_directory(path, logact)
2836
+ else:
2837
+ PgLOG.pgsystem("{} {} {}".format(PGCMPS[oext][0], ofile, oname), logact, 5)
2838
+
2839
+ if not keep and op.exists(ofile): delete_local_file(oname, logact)
2840
+ else:
2841
+ PgLOG.pgsystem("{} {}".format(PGCMPS[oext][0], oname), logact, 5)
2842
+
2843
+ if keep and op.exists(kfile) and kfile != ifile:
2844
+ if op.exist(ifile):
2845
+ delete_local_file(kfile, logact)
2846
+ else:
2847
+ move_local_file(ifile, kfile, logact)
2848
+
2849
+ if op.exists(ofile):
2850
+ return PgLOG.SUCCESS
2851
+ else:
2852
+ return errlog("{}: ERROR convert from {}".format(ofile, ifile), 'L', 1, logact)
2853
+
2854
+ #
2855
+ # comapre two files from given two hash references to the file information
2856
+ # return 0 if same, 1 different, -1 if can not compare
2857
+ #
2858
+ def compare_file_info(ainfo, binfo):
2859
+
2860
+ if not (ainfo and binfo): return -1 # at least one is missing
2861
+
2862
+ return (0 if (ainfo['data_size'] == binfo['data_size'] and
2863
+ ainfo['date_modified'] == binfo['date_modified'] and
2864
+ ainfo['time_modified'] == binfo['time_modified']) else 1)
2865
+
2866
+ #
2867
+ # get local_dirname
2868
+ #
2869
+ def get_local_dirname(file):
2870
+
2871
+ dir = op.dirname(file)
2872
+ if dir == '.': dir = os.getcwd()
2873
+
2874
+ return dir
2875
+
2876
+ #
2877
+ # collect valid file names under a given directory, current directory if empty
2878
+ #
2879
+ def get_directory_files(dir = None, limit = 0, level = 0):
2880
+
2881
+ files = []
2882
+ if dir:
2883
+ if level == 0 and op.isfile(dir):
2884
+ files.append(dir)
2885
+ return files
2886
+ dir += "/*"
2887
+ else:
2888
+ dir = "*"
2889
+
2890
+ for file in glob.glob(dir):
2891
+ if op.isdir(file):
2892
+ if limit == 0 or (limit-level) > 0:
2893
+ fs = get_directory_files(file, limit, level+1)
2894
+ if fs: files.extend(fs)
2895
+ else:
2896
+ files.append(file)
2897
+
2898
+ return files if files else None
2899
+
2900
+ #
2901
+ # reads a local file into a string and returns it
2902
+ #
2903
+ def read_local_file(file, logact = 0):
2904
+
2905
+ try:
2906
+ fd = open(file, 'r')
2907
+ except Exception as e:
2908
+ return errlog("{}: {}".format(file, str(e)), 'L', 1, logact)
2909
+ else:
2910
+ fstr = fd.read()
2911
+ fd.close()
2912
+
2913
+ return fstr
2914
+
2915
+ #
2916
+ # open a local file and return the file handler
2917
+ #
2918
+ def open_local_file(file, mode = 'r', logact = PgLOG.LOGERR):
2919
+
2920
+ try:
2921
+ fd = open(file, mode)
2922
+ except Exception as e:
2923
+ return errlog("{}: {}".format(file, str(e)), 'L', 1, logact)
2924
+
2925
+ return fd
2926
+
2927
+ #
2928
+ # change absolute paths to relative paths
2929
+ #
2930
+ def get_relative_paths(files, cdir, logact = 0):
2931
+
2932
+ cnt = len(files)
2933
+ if cnt == 0: return files
2934
+ if not cdir: cdir = os.getcwd()
2935
+
2936
+ for i in range(cnt):
2937
+ afile = files[i]
2938
+ if op.isabs(afile):
2939
+ files[i] = PgLOG.join_paths(afile, cdir, 1)
2940
+ else:
2941
+ PgLOG.pglog("{}: is not under the working directory '{}'".format(afile, cdir), logact)
2942
+
2943
+ return files
2944
+
2945
+ #
2946
+ # check if the action to path is blocked
2947
+ #
2948
+ def check_block_path(path, act = '', logact = 0):
2949
+
2950
+ blockpath = PgLOG.PGLOG['USRHOME']
2951
+ if not act: act = 'Copy'
2952
+
2953
+ if re.match(r'^{}'.format(blockpath), path):
2954
+ return PgLOG.pglog("{}: {} to {} is blocked".format(path, act, blockpath), logact)
2955
+ else:
2956
+ return 1
2957
+
2958
+ #
2959
+ # join two filenames by uing the common prefix/suffix and keeping the different main bodies,
2960
+ # the bodies are seprated by sep replace fext with text if provided
2961
+ #
2962
+ def join_filenames(name1, name2, sep = '-', fext = None, text = None):
2963
+
2964
+ if fext:
2965
+ name1 = remove_file_extention(name1, fext)
2966
+ name2 = remove_file_extention(name2, fext)
2967
+
2968
+ if name1 == name2:
2969
+ fname = name1
2970
+ else:
2971
+ fname = suffix = ''
2972
+ cnt1 = len(name1)
2973
+ cnt2 = len(name2)
2974
+ cnt = (cnt1 if cnt1 < cnt2 else cnt2)
2975
+
2976
+ # get common prefix
2977
+ for pcnt in range(cnt):
2978
+ if name1[pcnt] != name2[pcnt]: break
2979
+
2980
+ # get common suffix
2981
+ cnt -= pcnt
2982
+ for scnt in range(0, cnt):
2983
+ if name1[cnt1-scnt-1] != name2[cnt2-scnt-1]: break
2984
+
2985
+ body1 = name1[pcnt:(cnt1-scnt)]
2986
+ body2 = name2[pcnt:(cnt2-scnt)]
2987
+ if scnt > 0:
2988
+ suffix = name2[(cnt1-scnt):cnt1]
2989
+ if name1[cnt1-scnt-1].isnumeric():
2990
+ ms = re.match(r'^([\d\.-]*\d)', suffix)
2991
+ if ms: body1 += ms.group(1) # include trailing digit chrs to body1
2992
+ if pcnt > 0:
2993
+ fname = name1[0:pcnt]
2994
+ if name2[pcnt].isnumeric():
2995
+ ms = re.search(r'(\d[\d\.-]*)$', fname)
2996
+ if ms: body2 = ms.group(1) + body2 # include leading digit chrs to body2
2997
+
2998
+ fname += body1 + sep + body2
2999
+ if suffix: fname += suffix
3000
+
3001
+ if text: fname += "." + text
3002
+
3003
+ return fname
3004
+
3005
+ # remove given file extention if provided
3006
+ # otherwise try to remove predfined compression extention in PGCMPS
3007
+ def remove_file_extention(fname, fext):
3008
+
3009
+ if not fname: return ''
3010
+
3011
+ if fext:
3012
+ fname = re.sub(r'\.{}$'.format(fext), '', fname, 1, re.I)
3013
+ else:
3014
+ for fext in PGCMPS:
3015
+ mp = r'\.{}$'.format(fext)
3016
+ if re.search(mp, fname):
3017
+ fname = re.sub(mp, '', fname, 1, re.I)
3018
+ break
3019
+
3020
+ return fname
3021
+
3022
+ # check if a previous down storage system is up now for given dflag
3023
+ #
3024
+ # return error message if failed checking, and None otherwise
3025
+ #
3026
+ def check_storage_down(dflag, dpath, dscheck, logact = 0):
3027
+
3028
+ if dflag not in DHOSTS:
3029
+ if logact: PgLOG.pglog(dflag + ": Unknown Down Flag for Storage Systems", logact)
3030
+ return None
3031
+ dhost = DHOSTS[dflag]
3032
+ if not dpath and dflag in DPATHS: dpath = DPATHS[dflag]
3033
+ for loop in range(2):
3034
+ (stat, msg) = host_down_status(dpath, dhost, 1, logact)
3035
+ if stat < 0: break # stop retry for planned down
3036
+
3037
+ if not dscheck and PgLOG.PGLOG['DSCHECK']: dscheck = PgLOG.PGLOG['DSCHECK']
3038
+ if dscheck:
3039
+ didx = dscheck['dflags'].find(dflag)
3040
+ if msg:
3041
+ if didx < 0: dscheck['dflags'] += dflag
3042
+ else:
3043
+ if didx > -1: dscheck['dflags'].replace(dflag, '', 1)
3044
+
3045
+ return msg
3046
+
3047
+ #
3048
+ # check if previous down storage systems recorded in the dflags
3049
+ #
3050
+ # return an array of strings for storage systems that are still down,
3051
+ # and empty array if all up
3052
+ #
3053
+ def check_storage_dflags(dflags, dscheck = None, logact = 0):
3054
+
3055
+ if not dflags: return 0
3056
+
3057
+ isdict = isinstance(dflags, dict)
3058
+ msgary = []
3059
+ for dflag in dflags:
3060
+ msg = check_storage_down(dflag, dflags[dflag] if isdict else None, dscheck, logact)
3061
+ if msg: msgary.append(msg)
3062
+
3063
+ if not msgary:
3064
+ if not dscheck and PgLOG.PGLOG['DSCHECK']: dscheck = PgLOG.PGLOG['DSCHECK']
3065
+ cidx = dscheck['cindex'] if dscheck else 0
3066
+ # clean dflags if the down storage systems are all up
3067
+ if cidx: PgDBI.pgexec("UPDATE dscheck SET dflags = '' WHERE cindex = {}".format(cidx), logact)
3068
+
3069
+ return msgary
3070
+
3071
+ #
3072
+ # check a GDEX file is backed up or not for given file record;
3073
+ # clear the cached bfile records if frec is None.
3074
+ # return 0 if not yet, 1 if backed up, or -1 if backed up but modified
3075
+ #
3076
+ def file_backup_status(frec, chgdays = 1, logact = 0):
3077
+
3078
+ if frec is None:
3079
+ BFILES.clear()
3080
+ return 0
3081
+
3082
+ bid = frec['bid']
3083
+ if not bid: return 0
3084
+
3085
+ fields = 'bfile, dsid, date_modified'
3086
+ if chgdays > 0: fields += ', note'
3087
+ if bid not in BFILES: BFILES[bid] = PgDBI.pgget('bfile', fields, 'bid = {}'.format(bid), logact)
3088
+ brec = BFILES[bid]
3089
+ if not brec: return 0
3090
+
3091
+ if 'sfile' in frec:
3092
+ fname = frec['sfile']
3093
+ ftype = 'Saved'
3094
+ else:
3095
+ fname = frec['wfile']
3096
+ ftype = 'Web'
3097
+ ret = 1
3098
+ fdate = frec['date_modified']
3099
+ bdate = brec['date_modified']
3100
+ if chgdays > 0 and PgUtil.diffdate(fdate, bdate) >= chgdays:
3101
+ ret = -1
3102
+ if brec['note']:
3103
+ mp = r'{}<:>{}<:>(\d+)<:>(\w+)<:>'.format(fname, frec['type'])
3104
+ ms = re.search(mp, brec['note'])
3105
+ if ms:
3106
+ fsize = int(ms.group(1))
3107
+ cksum = ms.group(2)
3108
+ if cksum and cksum == frec['checksum'] or not cksum and fsize == frec['data_size']:
3109
+ ret = 1
3110
+
3111
+ if logact:
3112
+ if ret == 1:
3113
+ msg = "{}-{}: {} file backed up to /{}/{} by {}".format(frec['dsid'], fname, ftype, brec['dsid'], brec['bfile'], bdate)
3114
+ else:
3115
+ msg = "{}-{}: {} file changed on {}".format(frec['dsid'], fname, ftype, fdate)
3116
+ PgLOG.pglog(msg, logact)
3117
+
3118
+ return ret