rda-python-common 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,2462 @@
1
+ #
2
+ ###############################################################################
3
+ #
4
+ # Title : self.py
5
+ # Author : Zaihua Ji, zji@ucar.edu
6
+ # Date : 08/05/2020
7
+ # 2025-01-10 transferred to package rda_python_common from
8
+ # https://github.com/NCAR/rda-shared-libraries.git
9
+ # 2025-12-01 convert to class PgFile
10
+ # Purpose : python library module to copy, move and delete data files locally
11
+ # and remotely
12
+ #
13
+ # Github : https://github.com/NCAR/rda-python-common.git
14
+ #
15
+ ###############################################################################
16
+ #
17
+ import sys
18
+ import os
19
+ from os import path as op
20
+ import pwd
21
+ import grp
22
+ import stat
23
+ import re
24
+ import time
25
+ import glob
26
+ import json
27
+ from .pg_util import PgUtil
28
+ from .pg_sig import PgSIG
29
+
30
+ class PgFile(PgUtil, PgSIG):
31
+
32
+ CMDBTH = (0x0033) # return both stdout and stderr, 16 + 32 + 2 + 1
33
+ RETBTH = (0x0030) # return both stdout and stderr, 16 + 32
34
+ CMDRET = (0x0110) # return stdout and save error, 16 + 256
35
+ CMDERR = (0x0101) # display command and save error, 1 + 256
36
+ CMDGLB = (0x0313) # return stdout and save error for globus, 1+2+16+256+512
37
+
38
+ def __init__(self):
39
+ super().__init__() # initialize parent class
40
+ self.PGCMPS = {
41
+ # extension Compress Uncompress ArchiveFormat
42
+ 'Z' : ['compress -f', 'uncompress -f', 'Z'],
43
+ 'zip' : ['zip', 'unzip', 'ZIP'],
44
+ 'gz' : ['gzip', 'gunzip', 'GZ'],
45
+ 'xz' : ['xz', 'unxz', 'XZ'],
46
+ 'bz2' : ['bzip2', 'bunzip2', 'BZ2']
47
+ }
48
+ self.CMPSTR = '|'.join(self.PGCMPS)
49
+ self.PGTARS = {
50
+ # extension Packing Unpacking ArchiveFormat
51
+ 'tar' : ['tar -cvf', 'tar -xvf', 'TAR'],
52
+ 'tar.Z' : ['tar -Zcvf', 'tar -xvf', 'TAR.Z'],
53
+ 'zip' : ['zip -v', 'unzip -v', 'ZIP'],
54
+ 'tgz' : ['tar -zcvf', 'tar -xvf', 'TGZ'],
55
+ 'tar.gz' : ['tar -zcvf', 'tar -xvf', 'TAR.GZ'],
56
+ 'txz' : ['tar -cvJf', 'tar -xvf', 'TXZ'],
57
+ 'tar.xz' : ['tar -cvJf', 'tar -xvf', 'TAR.XZ'],
58
+ 'tbz2' : ['tar -cvjf', 'tar -xvf', 'TBZ2'],
59
+ 'tar.bz2' : ['tar -cvjf', 'tar -xvf', 'TAR.BZ2']
60
+ }
61
+ self.TARSTR = '|'.join(self.PGTARS)
62
+ self.TASKIDS = {} # cache unfinished
63
+ self.LHOST = "localhost"
64
+ self.OHOST = self.PGLOG['OBJCTSTR']
65
+ self.BHOST = self.PGLOG['BACKUPNM']
66
+ self.DHOST = self.PGLOG['DRDATANM']
67
+ self.OBJCTCMD = "isd_s3_cli"
68
+ self.BACKCMD = "dsglobus"
69
+ self.DIRLVLS = 0
70
+ # record how many errors happen for working with HPSS, local or remote machines
71
+ self.ECNTS = {'D' : 0, 'H' : 0, 'L' : 0, 'R' : 0, 'O' : 0, 'B' : 0}
72
+ # up limits for how many continuing errors allowed
73
+ self.ELMTS = {'D' : 20, 'H' : 20, 'L' : 20, 'R' : 20, 'O' : 10, 'B' : 10}
74
+ # down storage hostnames & paths
75
+ self.DHOSTS = {
76
+ 'G' : self.PGLOG['GPFSNAME'],
77
+ 'O' : self.OHOST,
78
+ 'B' : self.BHOST,
79
+ 'D' : self.DHOST
80
+ }
81
+ self.DPATHS = {
82
+ 'G' : self.PGLOG['DSSDATA'],
83
+ 'O' : self.PGLOG['OBJCTBKT'],
84
+ 'B' : '/' + self.PGLOG['DEFDSID'], # backup globus endpoint
85
+ 'D' : '/' + self.PGLOG['DEFDSID'] # disaster recovery globus endpoint
86
+ }
87
+ self.QSTATS = {
88
+ 'A' : 'ACTIVE',
89
+ 'I' : 'INACTIVE',
90
+ 'S' : 'SUCCEEDED',
91
+ 'F' : 'FAILED',
92
+ }
93
+ self.QPOINTS = {
94
+ 'L' : 'gdex-glade',
95
+ 'B' : 'gdex-quasar',
96
+ 'D' : 'gdex-quasar-drdata'
97
+ }
98
+ self.QHOSTS = {
99
+ 'gdex-glade' : self.LHOST,
100
+ 'gdex-quasar' : self.BHOST,
101
+ 'gdex-quasar-drdata' : self.DHOST
102
+ }
103
+ self.ENDPOINTS = {
104
+ 'gdex-glade' : "NCAR GDEX GLADE",
105
+ 'gdex-quasar' : "NCAR GDEX Quasar",
106
+ 'gdex-quasar-drdata' : "NCAR GDEX Quasar DRDATA"
107
+ }
108
+ self.BFILES = {} # cache backup file names and dates for each bid
109
+
110
+ # reset the up limit for a specified error type
111
+ def reset_error_limit(self, etype, lmt):
112
+ self.ELMTS[etype] = lmt
113
+
114
+ # wrapping self.pglog() to show error and no fatal exit at the first call for retry
115
+ def errlog(self, msg, etype, retry = 0, logact = 0):
116
+ bckgrnd = self.PGLOG['BCKGRND']
117
+ logact |= self.ERRLOG
118
+ if not retry:
119
+ if msg and not re.search(r'\n$', msg): msg += "\n"
120
+ msg += "[The same execution will be retried in {} Seconds]".format(self.PGSIG['ETIME'])
121
+ self.PGLOG['BCKGRND'] = 1
122
+ logact &= ~(self.EMEROL|self.EXITLG)
123
+ elif self.ELMTS[etype]:
124
+ self.ECNTS[etype] += 1
125
+ if self.ECNTS[etype] >= self.ELMTS[etype]:
126
+ logact |= self.EXITLG
127
+ self.ECNTS[etype] = 0
128
+ if self.PGLOG['DSCHECK'] and logact&self.EXITLG: self.record_dscheck_error(msg, logact)
129
+ self.pglog(msg, logact)
130
+ self.PGLOG['BCKGRND'] = bckgrnd
131
+ if not retry: time.sleep(self.PGSIG['ETIME'])
132
+ return self.FAILURE
133
+
134
+ # Copy a file from one host (including local host) to an another host (including local host)
135
+ # excluding copy file from remote host to remote host copying in background is permitted
136
+ # tofile - target file name
137
+ # fromfile - source file name
138
+ # tohost - target host name, default to self.LHOST
139
+ # fromhost - original host name, default to self.LHOST
140
+ # Return 1 if successful 0 if failed with error message generated in self.pgsystem() cached in self.PGLOG['SYSERR']
141
+ def copy_gdex_file(self, tofile, fromfile, tohost = None, fromhost = None, logact = 0):
142
+ if tohost is None: tohost = self.LHOST
143
+ if fromhost is None: fromhost = self.LHOST
144
+ thost = self.strip_host_name(tohost)
145
+ fhost = self.strip_host_name(fromhost)
146
+ if self.pgcmp(thost, fhost, 1) == 0:
147
+ if self.pgcmp(thost, self.LHOST, 1) == 0:
148
+ return self.local_copy_local(tofile, fromfile, logact)
149
+ elif self.pgcmp(fhost, self.LHOST, 1) == 0:
150
+ if self.pgcmp(thost, self.OHOST, 1) == 0:
151
+ return self.local_copy_object(tofile, fromfile, None, None, logact)
152
+ elif self.pgcmp(thost, self.BHOST, 1) == 0:
153
+ return self.local_copy_backup(tofile, fromfile, self.QPOINTS['B'], logact)
154
+ elif self.pgcmp(thost, self.DHOST, 1) == 0:
155
+ return self.local_copy_backup(tofile, fromfile, self.QPOINTS['D'], logact)
156
+ else:
157
+ return self.local_copy_remote(tofile, fromfile, tohost, logact)
158
+ elif self.pgcmp(thost, self.LHOST, 1) == 0:
159
+ if self.pgcmp(fhost, self.OHOST, 1) == 0:
160
+ return self.object_copy_local(tofile, fromfile, None, logact)
161
+ elif self.pgcmp(fhost, self.BHOST, 1) == 0:
162
+ return self.backup_copy_local(tofile, fromfile, self.QPOINTS['B'], logact)
163
+ elif self.pgcmp(fhost, self.DHOST, 1) == 0:
164
+ return self.backup_copy_local(tofile, fromfile, self.QPOINTS['D'], logact)
165
+ else:
166
+ return self.remote_copy_local(tofile, fromfile, fromhost)
167
+ return self.errlog("{}-{}->{}-{}: Cannot copy file".format(fhost, fromfile, thost, tofile), 'O', 1, self.LGEREX)
168
+ copy_rda_file = copy_gdex_file
169
+
170
+ # Copy a file locally
171
+ # tofile - target file name
172
+ # fromfile - source file name
173
+ def local_copy_local(self, tofile, fromfile, logact = 0):
174
+ finfo = self.check_local_file(fromfile, 0, logact)
175
+ if not finfo:
176
+ if finfo != None: return self.FAILURE
177
+ return self.lmsg(fromfile, "{} to copy to {}".format(self.PGLOG['MISSFILE'], tofile), logact)
178
+ target = tofile
179
+ ms = re.match(r'^(.+)/$', tofile)
180
+ if ms:
181
+ dir = ms.group(1)
182
+ tofile += op.basename(fromfile)
183
+ else:
184
+ dir = self.get_local_dirname(tofile)
185
+ if not self.make_local_directory(dir, logact): return self.FAILURE
186
+
187
+ cmd = "cp -{} {} {}".format(('f' if finfo['isfile'] else "rf"), fromfile, target)
188
+ reset = loop = 0
189
+ while((loop-reset) < 2):
190
+ info = None
191
+ self.PGLOG['ERR2STD'] = ['are the same file']
192
+ ret = self.pgsystem(cmd, logact, self.CMDERR)
193
+ self.PGLOG['ERR2STD'] = []
194
+ if ret:
195
+ info = self.check_local_file(tofile, 143, logact) # 1+2+4+8+128
196
+ if info:
197
+ if not info['isfile']:
198
+ self.set_local_mode(tofile, 0, 0, info['mode'], info['logname'], logact)
199
+ return self.SUCCESS
200
+ elif info['data_size'] == finfo['data_size']:
201
+ self.set_local_mode(tofile, 1, 0, info['mode'], info['logname'], logact)
202
+ return self.SUCCESS
203
+ elif info != None:
204
+ break
205
+ if self.PGLOG['SYSERR']:
206
+ errmsg = self.PGLOG['SYSERR']
207
+ else:
208
+ errmsg = "Error of '{}': Miss target file {}".format(cmd, tofile)
209
+ self.errlog(errmsg, 'L', (loop - reset), logact)
210
+ if loop == 0: reset = self.reset_local_info(tofile, info, logact)
211
+ loop += 1
212
+ return self.FAILURE
213
+
214
+ # Copy a local file to a remote host
215
+ # tofile - target file name
216
+ # fromfile - source file name
217
+ # host - remote host name
218
+ def local_copy_remote(self, tofile, fromfile, host, logact = 0):
219
+ finfo = self.check_local_file(fromfile, 0, logact)
220
+ if not finfo:
221
+ if finfo != None: return self.FAILURE
222
+ return self.lmsg(fromfile, "{} to copy to {}-{}".format(self.PGLOG['MISSFILE'], host, tofile), logact)
223
+ target = tofile
224
+ ms = re.match(r'^(.+)/$', tofile)
225
+ if ms:
226
+ dir = ms.group(1)
227
+ tofile += op.basename(fromfile)
228
+ else:
229
+ dir = op.dirname(tofile)
230
+ if not self.make_remote_directory(dir, host, logact): return self.FAILURE
231
+ cmd = self.get_sync_command(host)
232
+ cmd += " {} {}".format(fromfile, target)
233
+ for loop in range(2):
234
+ if self.pgsystem(cmd, logact, self.CMDERR):
235
+ info = self.check_remote_file(tofile, host, 0, logact)
236
+ if info:
237
+ if not finfo['isfile']:
238
+ self.set_remote_mode(tofile, 0, host, self.PGLOG['EXECMODE'])
239
+ return self.SUCCESS
240
+ elif info['data_size'] == finfo['data_size']:
241
+ self.set_remote_mode(tofile, 1, host, self.PGLOG['FILEMODE'])
242
+ return self.SUCCESS
243
+ elif info != None:
244
+ break
245
+ self.errlog(self.PGLOG['SYSERR'], 'R', loop, logact)
246
+ return self.FAILURE
247
+
248
+ # Copy a local file to object store
249
+ # tofile - target file name
250
+ # fromfile - source file name
251
+ # bucket - bucket name on Object store
252
+ # meta - reference to metadata hash
253
+ def local_copy_object(self, tofile, fromfile, bucket = None, meta = None, logact = 0):
254
+ if not bucket: bucket = self.PGLOG['OBJCTBKT']
255
+ if meta is None: meta = {}
256
+ if 'user' not in meta: meta['user'] = self.PGLOG['CURUID']
257
+ if 'group' not in meta: meta['group'] = self.PGLOG['GDEXGRP']
258
+ uinfo = json.dumps(meta)
259
+ finfo = self.check_local_file(fromfile, 0, logact)
260
+ if not finfo:
261
+ if finfo != None: return self.FAILURE
262
+ return self.lmsg(fromfile, "{} to copy to {}-{}".format(self.PGLOG['MISSFILE'], self.OHOST, tofile), logact)
263
+ if not logact&self.OVRIDE:
264
+ tinfo = self.check_object_file(tofile, bucket, 0, logact)
265
+ if tinfo and tinfo['data_size'] > 0:
266
+ return self.pglog("{}-{}-{}: file exists already".format(self.OHOST, bucket, tofile), logact)
267
+ cmd = "{} ul -lf {} -b {} -k {} -md '{}'".format(self.OBJCTCMD, fromfile, bucket, tofile, uinfo)
268
+ for loop in range(2):
269
+ buf = self.pgsystem(cmd, logact, self.CMDBTH)
270
+ tinfo = self.check_object_file(tofile, bucket, 0, logact)
271
+ if tinfo:
272
+ if tinfo['data_size'] == finfo['data_size']:
273
+ return self.SUCCESS
274
+ elif tinfo != None:
275
+ break
276
+ self.errlog("Error Execute: {}\n{}".format(cmd, buf), 'O', loop, logact)
277
+ return self.FAILURE
278
+
279
+ # Copy multiple files from a Globus endpoint to another
280
+ # tofiles - target file name list, echo name leading with /dsnnn.n/ on Quasar and
281
+ # leading with /data/ or /decsdata/ on local glade disk
282
+ # fromfiles - source file name list, the same format as the tofiles
283
+ # topoint - target endpoint name, 'gdex-glade', 'gdex-quasar' or 'gdex-quasar-dgdexta'
284
+ # frompoint - source endpoint name, the same choices as the topoint
285
+ def quasar_multiple_trasnfer(self, tofiles, fromfiles, topoint, frompoint, logact = 0):
286
+ ret = self.FAILURE
287
+ fcnt = len(fromfiles)
288
+ transfer_files = {"files": []}
289
+ for i in range(fcnt):
290
+ transfer_files["files"].append({
291
+ "source_file": fromfiles[i],
292
+ "destination_file": tofiles[i]
293
+ })
294
+ qstr = json.dumps(transfer_files)
295
+ action = 'transfer'
296
+ source_endpoint = frompoint
297
+ destination_endpoint = topoint
298
+ label = f"{self.ENDPOINTS[frompoint]} to {self.ENDPOINTS[topoint]} {action}"
299
+ verify_checksum = True
300
+ cmd = f'{self.BACKCMD} {action} -se {source_endpoint} -de {destination_endpoint} --label "{label}"'
301
+ if verify_checksum:
302
+ cmd += ' -vc'
303
+ cmd += ' --batch -'
304
+ task = self.submit_globus_task(cmd, topoint, logact, qstr)
305
+ if task['stat'] == 'S':
306
+ ret = self.SUCCESS
307
+ elif task['stat'] == 'A':
308
+ self.TASKIDS["{}-{}".format(topoint, tofiles[0])] = task['id']
309
+ ret = self.FINISH
310
+ return ret
311
+
312
+ # Copy a file from a Globus endpoint to another
313
+ # tofile - target file name, leading with /dsnnn.n/ on Quasar and
314
+ # leading with /data/ or /decsdata/ on local glade disk
315
+ # fromfile - source file, the same format as the tofile
316
+ # topoint - target endpoint name, 'gdex-glade', 'gdex-quasar' or 'gdex-quasar-dgdexta'
317
+ # frompoint - source endpoint name, the same choices as the topoint
318
+ def endpoint_copy_endpoint(self, tofile, fromfile, topoint, frompoint, logact = 0):
319
+ ret = self.FAILURE
320
+ finfo = self.check_globus_file(fromfile, frompoint, 0, logact)
321
+ if not finfo:
322
+ if finfo != None: return ret
323
+ return self.lmsg(fromfile, "{} to copy {} file to {}-{}".format(self.PGLOG['MISSFILE'], frompoint, topoint, tofile), logact)
324
+ if not logact&self.OVRIDE:
325
+ tinfo = self.check_globus_file(tofile, topoint, 0, logact)
326
+ if tinfo and tinfo['data_size'] > 0:
327
+ return self.pglog("{}-{}: file exists already".format(topoint, tofile), logact)
328
+ action = 'transfer'
329
+ cmd = f'{self.BACKCMD} {action} -se {frompoint} -de {topoint} -sf {fromfile} -df {tofile} -vc'
330
+ task = self.submit_globus_task(cmd, topoint, logact)
331
+ if task['stat'] == 'S':
332
+ ret = self.SUCCESS
333
+ elif task['stat'] == 'A':
334
+ self.TASKIDS["{}-{}".format(topoint, tofile)] = task['id']
335
+ ret = self.FINISH
336
+
337
+ return ret
338
+
339
+ # submit a globus task and return a task id
340
+ def submit_globus_task(self, cmd, endpoint, logact = 0, qstr = None):
341
+ task = {'id' : None, 'stat' : 'U'}
342
+ loop = reset = 0
343
+ while (loop-reset) < 2:
344
+ buf = self.pgsystem(cmd, logact, self.CMDGLB, qstr)
345
+ syserr = self.PGLOG['SYSERR']
346
+ if buf and buf.find('a task has been created') > -1:
347
+ ms = re.search(r'Task ID:\s+(\S+)', buf)
348
+ if ms:
349
+ task['id'] = ms.group(1)
350
+ lp = 0
351
+ while lp < 2:
352
+ task['stat'] = self.check_globus_status(task['id'], endpoint, logact)
353
+ if task['stat'] == 'S': break
354
+ time.sleep(self.PGSIG['ETIME'])
355
+ lp += 1
356
+ if task['stat'] == 'S' or task['stat'] == 'A': break
357
+ if task['stat'] == 'F' and not syserr: break
358
+ errmsg = "Error Execute: " + cmd
359
+ if qstr: errmsg += " with stdin:\n" + qstr
360
+ if syserr:
361
+ errmsg += "\n" + syserr
362
+ (hstat, msg) = self.host_down_status('', self.QHOSTS[endpoint], 1, logact)
363
+ if hstat: errmsg += "\n" + msg
364
+ self.errlog(errmsg, 'B', (loop - reset), logact)
365
+ if loop == 0 and syserr and syserr.find('This user has too many pending jobs') > -1: reset = 1
366
+ loop += 1
367
+ if task['stat'] == 'S' or task['stat'] == 'A': self.ECNTS['B'] = 0 # reset error count
368
+ return task
369
+
370
+ # check Globus transfer status for given taskid. Cancel the task
371
+ # if self.NOWAIT presents and Details is neither OK nor Queued
372
+ def check_globus_status(self, taskid, endpoint = None, logact = 0):
373
+ ret = 'U'
374
+ if not taskid: return ret
375
+ if not endpoint: endpoint = self.PGLOG['BACKUPEP']
376
+ mp = r'Status:\s+({})'.format('|'.join(self.QSTATS.values()))
377
+ cmd = f"{self.BACKCMD} get-task {taskid}"
378
+ astats = ['OK', 'Queued']
379
+ for loop in range(2):
380
+ buf = self.pgsystem(cmd, logact, self.CMDRET)
381
+ if buf:
382
+ ms = re.search(mp, buf)
383
+ if ms:
384
+ ret = ms.group(1)[0]
385
+ if ret == 'A':
386
+ ms = re.search(r'Details:\s+(\S+)', buf)
387
+ if ms:
388
+ detail = ms.group(1)
389
+ if detail not in astats:
390
+ if logact&self.NOWAIT:
391
+ errmsg = "{}: Cancel Task due to {}:\n{}".format(taskid, detail, buf)
392
+ self.errlog(errmsg, 'B', 1, logact)
393
+ ccmd = f"{self.BACKCMD} cancel-task {taskid}"
394
+ self.pgsystem(ccmd, logact, 7)
395
+ else:
396
+ time.sleep(self.PGSIG['ETIME'])
397
+ continue
398
+ break
399
+ errmsg = "Error Execute: " + cmd
400
+ if self.PGLOG['SYSERR']:
401
+ errmsg = "\n" + self.PGLOG['SYSERR']
402
+ (hstat, msg) = self.host_down_status('', self.QHOSTS[endpoint], 1, logact)
403
+ if hstat: errmsg += "\n" + msg
404
+ self.errlog(errmsg, 'B', loop, logact)
405
+ if ret == 'S' or ret == 'A': self.ECNTS['B'] = 0 # reset error count
406
+ return ret
407
+
408
+ # return SUCCESS if Globus transfer is done; FAILURE otherwise
409
+ def check_globus_finished(self, tofile, topoint, logact = 0):
410
+ ret = self.SUCCESS
411
+ ckey = "{}-{}".format(topoint, tofile)
412
+ if ckey in self.TASKIDS:
413
+ taskid = self.TASKIDS[ckey]
414
+ else:
415
+ self.errlog(ckey + ": Miss Task ID to check Status", 'B', 1, logact)
416
+ return self.FAILURE
417
+ lp = 0
418
+ if logact&self.NOWAIT:
419
+ act = logact&(~self.NOWAIT)
420
+ lps = 2
421
+ else:
422
+ act = logact
423
+ lps = 0
424
+ while True:
425
+ stat = self.check_globus_status(taskid, topoint, act)
426
+ if stat == 'A':
427
+ if lps:
428
+ lp += 1
429
+ if lp > lps: act = logact
430
+ time.sleep(self.PGSIG['ETIME'])
431
+ else:
432
+ if stat == 'S':
433
+ del self.TASKIDS[ckey]
434
+ else:
435
+ status = self.QSTATS[stat] if stat in self.QSTATS else 'UNKNOWN'
436
+ self.errlog("{}: Status '{}' for Task {}".format(ckey, status, taskid), 'B', 1, logact)
437
+ ret = self.FAILURE
438
+ break
439
+ return ret
440
+
441
+ # Copy a local file to Quasar backup tape system
442
+ # tofile - target file name, leading with /dsnnn.n/
443
+ # fromfile - source file name, leading with /data/ or /decsdata/
444
+ # endpoint - endpoint name on Quasar Backup Server
445
+ def local_copy_backup(self, tofile, fromfile, endpoint = None, logact = 0):
446
+ if not endpoint: endpoint = self.PGLOG['BACKUPEP']
447
+ return self.endpoint_copy_endpoint(tofile, fromfile, endpoint, 'gdex-glade', logact)
448
+
449
+ # Copy a Quasar backup file to local Globus endpoint
450
+ # tofile - target file name, leading with /data/ or /decsdata/
451
+ # fromfile - source file name, leading with /dsnnn.n/
452
+ # endpoint - endpoint name on Quasar Backup Server
453
+ def backup_copy_local(self, tofile, fromfile, endpoint = None, logact = 0):
454
+ if not endpoint: endpoint = self.PGLOG['BACKUPEP']
455
+ return self.endpoint_copy_endpoint(tofile, fromfile, 'gdex-glade', endpoint, logact)
456
+
457
+ # Copy a remote file to local
458
+ # tofile - target file name
459
+ # fromfile - source file name
460
+ # host - remote host name
461
+ def remote_copy_local(self, tofile, fromfile, host, logact = 0):
462
+ cmd = self.get_sync_command(host)
463
+ finfo = self.check_remote_file(fromfile, host, 0, logact)
464
+ if not finfo:
465
+ if finfo != None: return self.FAILURE
466
+ return self.errlog("{}-{}: {} to copy to {}".format(host, fromfile, self.PGLOG['MISSFILE'], tofile), 'R', 1, logact)
467
+ target = tofile
468
+ ms = re.match(r'^(.+)/$', tofile)
469
+ if ms:
470
+ dir = ms.group(1)
471
+ tofile += op.basename(fromfile)
472
+ else:
473
+ dir = self.get_local_dirname(tofile)
474
+ if not self.make_local_directory(dir, logact): return self.FAILURE
475
+ cmd += " -g {} {}".format(fromfile, target)
476
+ loop = reset = 0
477
+ while (loop-reset) < 2:
478
+ if self.pgsystem(cmd, logact, self.CMDERR):
479
+ info = self.check_local_file(tofile, 143, logact) # 1+2+4+8+128
480
+ if info:
481
+ if not info['isfile']:
482
+ self.set_local_mode(tofile, 0, self.PGLOG['EXECMODE'])
483
+ return self.SUCCESS
484
+ elif info['data_size'] == finfo['data_size']:
485
+ self.set_local_mode(tofile, 1, self.PGLOG['FILEMODE'])
486
+ return self.SUCCESS
487
+ elif info != None:
488
+ break
489
+ self.errlog(self.PGLOG['SYSERR'], 'L', (loop - reset), logact)
490
+ if loop == 0: reset = self.reset_local_info(tofile, info, logact)
491
+ loop += 1
492
+ return self.FAILURE
493
+
494
+ # Copy a object file to local
495
+ # tofile - target file name
496
+ # fromfile - source file name
497
+ # bucket - bucket name on Object store
498
+ def object_copy_local(self, tofile, fromfile, bucket = None, logact = 0):
499
+ ret = self.FAILURE
500
+ if not bucket: bucket = self.PGLOG['OBJCTBKT']
501
+ finfo = self.check_object_file(fromfile, bucket, 0, logact)
502
+ if not finfo:
503
+ if finfo != None: return ret
504
+ return self.lmsg(fromfile, "{}-{} to copy to {}".format(self.OHOST, self.PGLOG['MISSFILE'], tofile), logact)
505
+ cmd = "{} go -k {} -b {}".format(self.OBJCTCMD, fromfile, bucket)
506
+ fromname = op.basename(fromfile)
507
+ toname = op.basename(tofile)
508
+ if toname == tofile:
509
+ dir = odir = None
510
+ else:
511
+ dir = op.dirname(tofile)
512
+ odir = self.change_local_directory(dir, logact)
513
+ loop = reset = 0
514
+ while (loop-reset) < 2:
515
+ buf = self.pgsystem(cmd, logact, self.CMDBTH)
516
+ info = self.check_local_file(fromname, 143, logact) # 1+2+4+8+128
517
+ if info:
518
+ if info['data_size'] == finfo['data_size']:
519
+ self.set_local_mode(fromfile, info['isfile'], 0, info['mode'], info['logname'], logact)
520
+ if toname == fromname or self.move_local_file(toname, fromname, logact):
521
+ ret = self.SUCCESS
522
+ break
523
+ elif info != None:
524
+ break
525
+ self.errlog("Error Execute: {}\n{}".format(cmd, buf), 'L', (loop - reset), logact)
526
+ if loop == 0: reset = self.reset_local_info(tofile, info, logact)
527
+ loop += 1
528
+ if odir and odir != dir:
529
+ self.change_local_directory(odir, logact)
530
+ return ret
531
+
532
+ # Copy a remote file to object
533
+ # tofile - target object file name
534
+ # fromfile - source remote file name
535
+ # host - remote host name
536
+ # bucket - bucket name on Object store
537
+ # meta - reference to metadata hash
538
+ def remote_copy_object(self, tofile, fromfile, host, bucket = None, meta = None, logact = 0):
539
+ if self.is_local_host(host): return self.local_copy_object(tofile, fromfile, bucket, meta, logact)
540
+ locfile = "{}/{}".format(self.PGLOG['TMPPATH'], op.basename(tofile))
541
+ ret = self.remote_copy_local(locfile, fromfile, host, logact)
542
+ if ret:
543
+ ret = self.local_copy_object(tofile, locfile, bucket, meta, logact)
544
+ self.delete_local_file(locfile, logact)
545
+ return ret
546
+
547
+ # Copy an object file to remote
548
+ # tofile - target remote file name
549
+ # fromfile - source object file name
550
+ # host - remote host name
551
+ # bucket - bucket name on Object store
552
+ # meta - reference to metadata hash
553
+ def object_copy_remote(self, tofile, fromfile, host, bucket = None, logact = 0):
554
+ if self.is_local_host(host): return self.object_copy_local(tofile, fromfile, bucket, logact)
555
+ locfile = "{}/{}".format(self.PGLOG['TMPPATH'], op.basename(tofile))
556
+ ret = self.object_copy_local(locfile, fromfile, bucket, logact)
557
+ if ret:
558
+ ret = self.local_copy_remote(fromfile, locfile, host, logact)
559
+ self.delete_local_file(locfile, logact)
560
+ return ret
561
+
562
+ # Delete a file/directory on a given host name (including local host) no background process for deleting
563
+ # file - file name to be deleted
564
+ # host - host name the file on, default to self.LHOST
565
+ # Return 1 if successful 0 if failed with error message generated in self.pgsystem() cached in self.PGLOG['SYSERR']
566
+ def delete_gdex_file(self, file, host, logact = 0):
567
+ shost = self.strip_host_name(host)
568
+ if self.pgcmp(shost, self.LHOST, 1) == 0:
569
+ return self.delete_local_file(file, logact)
570
+ elif self.pgcmp(shost, self.OHOST, 1) == 0:
571
+ return self.delete_object_file(file, None, logact)
572
+ else:
573
+ return self.delete_remote_file(file, host, logact)
574
+ delete_rda_file = delete_gdex_file
575
+
576
+ # Delete a local file/irectory
577
+ def delete_local_file(self, file, logact = 0):
578
+ info = self.check_local_file(file, 0, logact)
579
+ if not info: return self.FAILURE
580
+ cmd = "rm -rf "
581
+ cmd += file
582
+ loop = reset = 0
583
+ while (loop-reset) < 2:
584
+ if self.pgsystem(cmd, logact, self.CMDERR):
585
+ info = self.check_local_file(file, 14, logact)
586
+ if info is None:
587
+ if self.DIRLVLS: self.record_delete_directory(op.dirname(file), self.LHOST)
588
+ return self.SUCCESS
589
+ elif not info:
590
+ break # error checking file
591
+ self.errlog(self.PGLOG['SYSERR'], 'L', (loop - reset), logact)
592
+ if loop == 0: reset = self.reset_local_info(file, info, logact)
593
+ loop += 1
594
+ return self.FAILURE
595
+
596
+ # Delete file/directory on a remote host
597
+ def delete_remote_file(self, file, host, logact = 0):
598
+ if not self.check_remote_file(file, host, logact): return self.FAILURE
599
+ cmd = self.get_sync_command(host)
600
+ for loop in range(2):
601
+ if self.pgsystem("{} -d {}".format(cmd, file), logact, self.CMDERR):
602
+ if self.DIRLVLS: self.record_delete_directory(op.dirname(file), host)
603
+ return self.SUCCESS
604
+ self.errlog(self.PGLOG['SYSERR'], 'R', loop, logact)
605
+ return self.FAILURE
606
+
607
+ # Delete a file on object store
608
+ def delete_object_file(self, file, bucket = None, logact = 0):
609
+ if not bucket: bucket = self.PGLOG['OBJCTBKT']
610
+ for loop in range(2):
611
+ list = self.object_glob(file, bucket, 0, logact)
612
+ if not list: return self.FAILURE
613
+ errmsg = None
614
+ for key in list:
615
+ cmd = "{} dl {} -b {}".format(self.OBJCTCMD, key, bucket)
616
+ if not self.pgsystem(cmd, logact, self.CMDERR):
617
+ errmsg = self.PGLOG['SYSERR']
618
+ break
619
+ list = self.object_glob(file, bucket, 0, logact)
620
+ if not list: return self.SUCCESS
621
+ if errmsg: self.errlog(errmsg, 'O', loop, logact)
622
+ return self.FAILURE
623
+
624
+ # Delete a backup file on Quasar Server
625
+ def delete_backup_file(self, file, endpoint = None, logact = 0):
626
+ if not endpoint: endpoint = self.PGLOG['BACKUPEP']
627
+ info = self.check_backup_file(file, endpoint, 0, logact)
628
+ if not info: return self.FAILURE
629
+ cmd = f"{self.BACKCMD} delete -ep {endpoint} -tf {file}"
630
+ task = self.submit_globus_task(cmd, endpoint, logact)
631
+ if task['stat'] == 'S':
632
+ return self.SUCCESS
633
+ elif task['stat'] == 'A':
634
+ self.TASKIDS["{}-{}".format(endpoint, file)] = task['id']
635
+ return self.FINISH
636
+ return self.FAILURE
637
+
638
+ # reset local file/directory information to make them writable for self.PGLOG['GDEXUSER']
639
+ # file - file name (mandatory)
640
+ # info - gathered file info with option 14, None means file not exists
641
+ def reset_local_info(self, file, info = None, logact = 0):
642
+ ret = 0
643
+ if info:
644
+ if info['isfile']:
645
+ ret += self.reset_local_file(file, info, logact)
646
+ dir = self.get_local_dirname(file)
647
+ info = self.check_local_file(dir, 14, logact)
648
+ else:
649
+ dir = file
650
+ else:
651
+ dir = self.get_local_dirname(file)
652
+ info = self.check_local_file(dir, 14, logact)
653
+ if info: ret += self.reset_local_directory(dir, info, logact)
654
+ return 1 if ret else 0
655
+
656
+ # reset local directory group/mode
657
+ def reset_local_directory(self, dir, info = None, logact = 0):
658
+ ret = 0
659
+ if not (info and 'mode' in info and 'group' in info and 'logname' in info):
660
+ info = self.check_local_file(dir, 14, logact)
661
+ if info:
662
+ if info['mode'] and info['mode'] != 0o775:
663
+ ret += self.set_local_mode(dir, 0, 0o775, info['mode'], info['logname'], logact)
664
+ if info['group'] and self.PGLOG['GDEXGRP'] != info['group']:
665
+ ret += self.change_local_group(dir, self.PGLOG['GDEXGRP'], info['group'], info['logname'], logact)
666
+ return 1 if ret else 0
667
+
668
+ # reset local file group/mode
669
+ def reset_local_file(self, file, info = None, logact = 0):
670
+ ret = 0
671
+ if not (info and 'mode' in info and 'group' in info and 'logname' in info):
672
+ info = self.check_local_file(file, 14, logact)
673
+ if info:
674
+ if info['mode'] != 0o664:
675
+ ret += self.set_local_mode(file, 1, 0o664, info['mode'], info['logname'], logact)
676
+ if self.PGLOG['GDEXGRP'] != info['group']:
677
+ ret += self.change_local_group(file, self.PGLOG['GDEXGRP'], info['group'], info['logname'], logact)
678
+ return ret
679
+
680
+ # Move file locally or remotely on the same host no background process for moving
681
+ # tofile - target file name
682
+ # fromfile - original file name
683
+ # host - host name the file is moved on, default to self.LHOST
684
+ # Return self.SUCCESS if successful self.FAILURE otherwise
685
+ def move_gdex_file(self, tofile, fromfile, host, logact = 0):
686
+ shost = self.strip_host_name(host)
687
+ if self.pgcmp(shost, self.LHOST, 1) == 0:
688
+ return self.move_local_file(tofile, fromfile, logact)
689
+ elif self.pgcmp(shost, self.OHOST, 1) == 0:
690
+ return self.move_object_file(tofile, fromfile, None, None, logact)
691
+ else:
692
+ return self.move_remote_file(tofile, fromfile, host, logact)
693
+ move_rda_file = move_gdex_file
694
+
695
+ # Move a file locally
696
+ # tofile - target file name
697
+ # fromfile - source file name
698
+ def move_local_file(self, tofile, fromfile, logact = 0):
699
+ dir = self.get_local_dirname(tofile)
700
+ info = self.check_local_file(fromfile, 0, logact)
701
+ tinfo = self.check_local_file(tofile, 0, logact)
702
+ if not info:
703
+ if info != None: return self.FAILURE
704
+ if tinfo:
705
+ self.pglog("{}: Moved to {} already".format(fromfile, tofile), self.LOGWRN)
706
+ return self.SUCCESS
707
+ else:
708
+ return self.errlog("{}: {} to move".format(fromfile, self.PGLOG['MISSFILE']), 'L', 1, logact)
709
+ if tinfo:
710
+ if tinfo['data_size'] > 0 and not logact&self.OVRIDE:
711
+ return self.errlog("{}: File exists, cannot move {} to it".format(tofile, fromfile), 'L', 1, logact)
712
+ elif tinfo != None:
713
+ return self.FAILURE
714
+ if not self.make_local_directory(dir, logact): return self.FAILURE
715
+ cmd = "mv {} {}".format(fromfile, tofile)
716
+ loop = reset = 0
717
+ while (loop-reset) < 2:
718
+ if self.pgsystem(cmd, logact, self.CMDERR):
719
+ if self.DIRLVLS: self.record_delete_directory(op.dirname(fromfile), self.LHOST)
720
+ return self.SUCCESS
721
+ self.errlog(self.PGLOG['SYSERR'], 'L', (loop - reset), logact)
722
+ if loop == 0: reset = self.reset_local_info(tofile, info, logact)
723
+ loop += 1
724
+ return self.FAILURE
725
+
726
+ # Move a remote file on the same host
727
+ # tofile - target file name
728
+ # fromfile - original file name
729
+ # host - remote host name
730
+ # locfile - local copy of tofile
731
+ def move_remote_file(self, tofile, fromfile, host, logact = 0):
732
+ if self.is_local_host(host): return self.move_local_file(tofile, fromfile, logact)
733
+ ret = self.FAILURE
734
+ dir = op.dirname(tofile)
735
+ info = self.check_remote_file(fromfile, host, 0, logact)
736
+ tinfo = self.check_remote_file(tofile, host, 0, logact)
737
+ if not info:
738
+ if info != None: return self.FAILURE
739
+ if tinfo:
740
+ self.pglog("{}-{}: Moved to {} already".format(host, fromfile, tofile), self.LOGWRN)
741
+ return self.SUCCESS
742
+ else:
743
+ return self.errlog("{}-{}: {} to move".format(host, fromfile, self.PGLOG['MISSFILE']), 'R', 1, logact)
744
+ if tinfo:
745
+ if tinfo['data_size'] > 0 and not logact&self.OVRIDE:
746
+ return self.errlog("{}-{}: File exists, cannot move {} to it".format(host, tofile, fromfile), 'R', 1, logact)
747
+ elif tinfo != None:
748
+ return self.FAILURE
749
+ if self.make_remote_directory(dir, host, logact):
750
+ locfile = "{}/{}".format(self.PGLOG['TMPPATH'], op.basename(tofile))
751
+ if self.remote_copy_local(locfile, fromfile, host, logact):
752
+ ret = self.local_copy_remote(tofile, locfile, host, logact)
753
+ self.delete_local_file(locfile, logact)
754
+ if ret:
755
+ ret = self.delete_remote_file(fromfile, host, logact)
756
+ if self.DIRLVLS: self.record_delete_directory(op.dirname(fromfile), host)
757
+ return ret
758
+
759
+ # Move an object file on Object Store
760
+ # tofile - target file name
761
+ # fromfile - original file name
762
+ # tobucket - target bucket name
763
+ # frombucket - original bucket name
764
+ def move_object_file(self, tofile, fromfile, tobucket, frombucket, logact = 0):
765
+ ret = self.FAILURE
766
+ if not tobucket: tobucket = self.PGLOG['OBJCTBKT']
767
+ if not frombucket: frombucket = tobucket
768
+ finfo = self.check_object_file(fromfile, frombucket, 0, logact)
769
+ tinfo = self.check_object_file(tofile, tobucket, 0, logact)
770
+ if not finfo:
771
+ if finfo != None: return self.FAILURE
772
+ if tinfo:
773
+ self.pglog("{}-{}: Moved to {}-{} already".format(frombucket, fromfile, tobucket, tofile), self.LOGWRN)
774
+ return self.SUCCESS
775
+ else:
776
+ return self.errlog("{}-{}: {} to move".format(frombucket, fromfile, self.PGLOG['MISSFILE']), 'R', 1, logact)
777
+ if tinfo:
778
+ if tinfo['data_size'] > 0 and not logact&self.OVRIDE:
779
+ return self.errlog("{}-{}: Object File exists, cannot move {}-{} to it".format(tobucket, tofile, frombucket, fromfile), 'R', 1, logact)
780
+ elif tinfo != None:
781
+ return self.FAILURE
782
+ cmd = "{} mv -b {} -db {} -k {} -dk {}".format(self.OBJCTCMD, frombucket, tobucket, fromfile, tofile)
783
+ ucmd = "{} gm -k {} -b {}".format(self.OBJCTCMD, fromfile, frombucket)
784
+ ubuf = self.pgsystem(ucmd, self.LOGWRN, self.CMDRET)
785
+ if ubuf and re.match(r'^\{', ubuf): cmd += " -md '{}'".format(ubuf)
786
+ for loop in range(2):
787
+ buf = self.pgsystem(cmd, logact, self.CMDBTH)
788
+ tinfo = self.check_object_file(tofile, tobucket, 0, logact)
789
+ if tinfo:
790
+ if tinfo['data_size'] == finfo['data_size']:
791
+ return self.SUCCESS
792
+ elif tinfo != None:
793
+ break
794
+ self.errlog("Error Execute: {}\n{}".format(cmd, buf), 'O', loop, logact)
795
+ return self.FAILURE
796
+
797
+ # Move an object path on Object Store and all the file keys under it
798
+ # topath - target path name
799
+ # frompath - original path name
800
+ # tobucket - target bucket name
801
+ # frombucket - original bucket name
802
+ def move_object_path(self, topath, frompath, tobucket, frombucket, logact = 0):
803
+ ret = self.FAILURE
804
+ if not tobucket: tobucket = self.PGLOG['OBJCTBKT']
805
+ if not frombucket: frombucket = tobucket
806
+ fcnt = self.check_object_path(frompath, frombucket, logact)
807
+ tcnt = self.check_object_path(topath, tobucket, logact)
808
+ if not fcnt:
809
+ if fcnt == None: return self.FAILURE
810
+ if tcnt:
811
+ self.pglog("{}-{}: Moved to {}-{} already".format(frombucket, frompath, tobucket, topath), self.LOGWRN)
812
+ return self.SUCCESS
813
+ else:
814
+ return self.errlog("{}-{}: {} to move".format(frombucket, frompath, self.PGLOG['MISSFILE']), 'R', 1, logact)
815
+ cmd = "{} mv -b {} -db {} -k {} -dk {}".format(self.OBJCTCMD, frombucket, tobucket, frompath, topath)
816
+ for loop in range(2):
817
+ buf = self.pgsystem(cmd, logact, self.CMDBTH)
818
+ fcnt = self.check_object_path(frompath, frombucket, logact)
819
+ if not fcnt: return self.SUCCESS
820
+ self.errlog("Error Execute: {}\n{}".format(cmd, buf), 'O', loop, logact)
821
+ return self.FAILURE
822
+
823
+ # Move a backup file on Quasar Server
824
+ # tofile - target file name
825
+ # fromfile - source file name
826
+ # endpoint - Globus endpoint
827
+ def move_backup_file(self, tofile, fromfile, endpoint = None, logact = 0):
828
+ ret = self.FAILURE
829
+ if not endpoint: endpoint = self.PGLOG['BACKUPEP']
830
+ finfo = self.check_backup_file(fromfile, endpoint, 0, logact)
831
+ tinfo = self.check_backup_file(tofile, endpoint, 0, logact)
832
+ if not finfo:
833
+ if finfo != None: return ret
834
+ if tinfo:
835
+ self.pglog("{}: Moved to {} already".format(fromfile, tofile), self.LOGWRN)
836
+ return self.SUCCESS
837
+ else:
838
+ return self.errlog("{}: {} to move".format(fromfile, self.PGLOG['MISSFILE']), 'B', 1, logact)
839
+ if tinfo:
840
+ if tinfo['data_size'] > 0 and not logact&self.OVRIDE:
841
+ return self.errlog("{}: File exists, cannot move {} to it".format(tofile, fromfile), 'B', 1, logact)
842
+ elif tinfo != None:
843
+ return ret
844
+ cmd = f"{self.BACKCMD} rename -ep {endpoint} --old-path {fromfile} --new-path {tofile}"
845
+ loop = 0
846
+ while loop < 2:
847
+ buf = self.pgsystem(cmd, logact, self.CMDRET)
848
+ syserr = self.PGLOG['SYSERR']
849
+ if buf:
850
+ if buf.find('File or directory renamed successfully') > -1:
851
+ ret = self.SUCCESS
852
+ break
853
+ if syserr:
854
+ if syserr.find("No such file or directory") > -1:
855
+ if self.make_backup_directory(op.dirname(tofile), endpoint, logact): continue
856
+ errmsg = "Error Execute: {}\n{}".format(cmd, syserr)
857
+ (hstat, msg) = self.host_down_status('', self.QHOSTS[endpoint], 1, logact)
858
+ if hstat: errmsg += "\n" + msg
859
+ self.errlog(errmsg, 'B', loop, logact)
860
+ loop += 1
861
+ if ret == self.SUCCESS: self.ECNTS['B'] = 0 # reset error count
862
+ return ret
863
+
864
+ # Make a directory on a given host name (including local host)
865
+ # dir - directory path to be made
866
+ # host - host name the directory on, default to self.LHOST
867
+ # Return self.SUCCESS(1) if successful or self.FAILURE(0) if failed
868
+ def make_gdex_directory(self, dir, host, logact = 0):
869
+ if not dir: return self.SUCCESS
870
+ shost = self.strip_host_name(host)
871
+ if self.pgcmp(shost, self.LHOST, 1) == 0:
872
+ return self.make_local_directory(dir, logact)
873
+ else:
874
+ return self.make_remote_directory(dir, host, logact)
875
+ make_rda_directory = make_gdex_directory
876
+
877
+ # Make a local directory
878
+ # dir - directory path to be made
879
+ def make_local_directory(self, dir, logact = 0):
880
+ return self.make_one_local_directory(dir, None, logact)
881
+
882
+ # Make a local directory recursively
883
+ def make_one_local_directory(self, dir, odir = None, logact = 0):
884
+ if not dir or op.isdir(dir): return self.SUCCESS
885
+ if op.isfile(dir): return self.errlog(dir + ": is file, cannot make directory", 'L', 1, logact)
886
+ if not odir: odir = dir
887
+ if self.is_root_directory(dir, 'L', self.LHOST, "make directory " + odir, logact): return self.FAILURE
888
+ if not self.make_one_local_directory(op.dirname(dir), odir, logact): return self.FAILURE
889
+ loop = reset = 0
890
+ while (loop-reset) < 2:
891
+ try:
892
+ os.mkdir(dir, self.PGLOG['EXECMODE'])
893
+ except Exception as e:
894
+ errmsg = str(e)
895
+ if errmsg.find('File exists') > -1: return self.SUCCESS
896
+ self.errlog(errmsg, 'L', (loop - reset), logact)
897
+ if loop == 0: reset = self.reset_local_info(dir, None, logact)
898
+ loop += 1
899
+ else:
900
+ return self.SUCCESS
901
+ return self.FAILURE
902
+
903
+ # Make a directory on a remote host name
904
+ # dir - directory path to be made
905
+ # host - host name the directory on
906
+ def make_remote_directory(self, dir, host, logact = 0):
907
+ return self.make_one_remote_directory(dir, None, host, logact)
908
+
909
+ def make_one_remote_directory(self, dir, odir, host, logact = 0):
910
+ info = self.check_remote_file(dir, host, 0, logact)
911
+ if info:
912
+ if info['isfile']: return self.errlog("{}-{}: is file, cannot make directory".format(host, dir), 'R', 1, logact)
913
+ return self.SUCCESS
914
+ elif info != None:
915
+ return self.FAILURE
916
+ if not odir: odir = dir
917
+ if self.is_root_directory(dir, 'R', host, "make directory {} on {}".format(odir, host), logact): return self.FAILURE
918
+ if self.make_one_remote_directory(op.dirname(dir), odir, host, logact):
919
+ tmpsync = self.get_tmpsync_path()
920
+ if self.pgsystem("{} {} {}".format(self.get_sync_command(host), tmpsync, dir), logact, 5):
921
+ self.set_remote_mode(dir, 0, host, self.PGLOG['EXECMODE'])
922
+ return self.SUCCESS
923
+ return self.FAILURE
924
+
925
+ # Make a quasar directory
926
+ # dir - directory path to be made
927
+ def make_backup_directory(self, dir, endpoint, logact = 0):
928
+ return self.make_one_backup_directory(dir, None, endpoint, logact)
929
+
930
+ # Make a quasar directory recursively
931
+ def make_one_backup_directory(self, dir, odir, endpoint = None, logact = 0):
932
+ if not dir or dir == '/': return self.SUCCESS
933
+ if not endpoint: endpoint = self.PGLOG['BACKUPEP']
934
+ info = self.check_backup_file(dir, endpoint, 0, logact)
935
+ if info:
936
+ if info['isfile']: return self.errlog("{}-{}: is file, cannot make backup directory".format(endpoint, dir), 'B', 1, logact)
937
+ return self.SUCCESS
938
+ elif info != None:
939
+ return self.FAILURE
940
+ if not odir: odir = dir
941
+ if not self.make_one_backup_directory(op.dirname(dir), odir, endpoint, logact): return self.FAILURE
942
+ cmd = f"{self.BACKCMD} mkdir -ep {endpoint} -p {dir}"
943
+ for loop in range(2):
944
+ buf = self.pgsystem(cmd, logact, self.CMDRET)
945
+ syserr = self.PGLOG['SYSERR']
946
+ if buf:
947
+ if(buf.find('The directory was created successfully') > -1 or
948
+ buf.find("Path '{}' already exists".format(dir)) > -1):
949
+ ret = self.SUCCESS
950
+ break
951
+ if syserr:
952
+ if syserr.find("No such file or directory") > -1:
953
+ ret = self.make_one_backup_directory(op.dirname(dir), odir, endpoint, logact)
954
+ if ret == self.SUCCESS or loop: break
955
+ time.sleep(self.PGSIG['ETIME'])
956
+ else:
957
+ errmsg = "Error Execute: {}\n{}".format(cmd, syserr)
958
+ (hstat, msg) = self.host_down_status('', self.QHOSTS[endpoint], 1, logact)
959
+ if hstat: errmsg += "\n" + msg
960
+ self.errlog(errmsg, 'B', loop, logact)
961
+ if ret == self.SUCCESS: self.ECNTS['B'] = 0 # reset error count
962
+ return ret
963
+
964
+ # check and return 1 if a root directory
965
+ def is_root_directory(self, dir, etype, host = None, action = None, logact = 0):
966
+ ret = cnt = 0
967
+ if re.match(r'^{}'.format(self.PGLOG['DSSDATA']), dir):
968
+ ms = re.match(r'^({})(.*)$'.format(self.PGLOG['GPFSROOTS']), dir)
969
+ if ms:
970
+ m2 = ms.group(2)
971
+ if not m2 or m2 == '/': ret = 1
972
+ else:
973
+ cnt = 4
974
+ else:
975
+ ms = re.match(r'^({})(.*)$'.format(self.PGLOG['HOMEROOTS']), dir)
976
+ if ms:
977
+ m2 = ms.group(2)
978
+ if not m2 or m2 == '/': ret = 1
979
+ else:
980
+ cnt = 2
981
+ if cnt and re.match(r'^(/[^/]+){0,%d}(/*)$' % cnt, dir):
982
+ ret = 1
983
+ if ret and action:
984
+ cnt = 0
985
+ errmsg = "{}: Cannot {} from {}".format(dir, action, self.PGLOG['HOSTNAME'])
986
+ (hstat, msg) = self.host_down_status(dir, host, 0, logact)
987
+ if hstat: errmsg += "\n" + msg
988
+ self.errlog(errmsg, etype, 1, logact|self.ERRLOG)
989
+ return ret
990
+
991
+ # set mode for a given direcory/file on a given host (include local host)
992
+ def set_gdex_mode(self, file, isfile, host, nmode = None, omode = None, logname = None, logact = 0):
993
+ shost = self.strip_host_name(host)
994
+ if self.pgcmp(shost, self.LHOST, 1) == 0:
995
+ return self.set_local_mode(file, isfile, nmode, omode, logname, logact)
996
+ else:
997
+ return self.set_remote_mode(file, isfile, host, nmode, omode, logact)
998
+ set_rda_mode = set_gdex_mode
999
+
1000
+ # set mode for given local directory or file
1001
+ def set_local_mode(self, file, isfile = 1, nmode = 0, omode = 0, logname = None, logact = 0):
1002
+ if not nmode: nmode = (self.PGLOG['FILEMODE'] if isfile else self.PGLOG['EXECMODE'])
1003
+ if not (omode and logname):
1004
+ info = self.check_local_file(file, 6)
1005
+ if not info:
1006
+ if info != None: return self.FAILURE
1007
+ return self.lmsg(file, "{} to set mode({})".format(self.PGLOG['MISSFILE'], self.int2base(nmode, 8)), logact)
1008
+ omode = info['mode']
1009
+ logname = info['logname']
1010
+ if nmode == omode: return self.SUCCESS
1011
+ try:
1012
+ os.chmod(file, nmode)
1013
+ except Exception as e:
1014
+ return self.errlog(str(e), 'L', 1, logact)
1015
+ return self.SUCCESS
1016
+
1017
+ # set mode for given directory or file on remote host
1018
+ def set_remote_mode(self, file, isfile, host, nmode = 0, omode = 0, logact = 0):
1019
+ if not nmode: nmode = (self.PGLOG['FILEMODE'] if isfile else self.PGLOG['EXECMODE'])
1020
+ if not omode:
1021
+ info = self.check_remote_file(file, host, 6)
1022
+ if not info:
1023
+ if info != None: return self.FAILURE
1024
+ return self.errlog("{}-{}: {} to set mode({})".format(host, file, self.PGLOG['MISSFILE'], self.int2base(nmode, 8)), 'R', 1, logact)
1025
+ omode = info['mode']
1026
+ if nmode == omode: return self.SUCCESS
1027
+ return self.pgsystem("{} -m {} {}".format(self.get_sync_command(host), self.int2base(nmode, 8), file), logact, 5)
1028
+
1029
+ # change group for given local directory or file
1030
+ def change_local_group(self, file, ngrp = None, ogrp = None, logname = None, logact = 0):
1031
+ if not ngrp:
1032
+ ngid = self.PGLOG['GDEXGID']
1033
+ else:
1034
+ ngid = grp.getgrnam[ngrp].gr_gid
1035
+ if logact and logact&self.EXITLG: logact &=~self.EXITLG
1036
+ if not (ogrp and logname):
1037
+ info = self.check_local_file(file, 10, logact)
1038
+ if not info:
1039
+ if info != None: return self.FAILURE
1040
+ return self.errlog("{}: {} to change group({})".format(file, self.PGLOG['MISSFILE'], ngrp), 'L', 1, logact)
1041
+ ogid = info['gid']
1042
+ ouid = info['uid']
1043
+ else:
1044
+ ouid = pwd.getpwnam(logname).pw_uid
1045
+ ogid = grp.getgrnam(logname).gr_gid
1046
+ if ngid == ogid: return self.SUCCESS
1047
+ try:
1048
+ os.chown(file, ouid, ngid)
1049
+ except Exception as e:
1050
+ return self.errlog(str(e), 'L', 1, logact)
1051
+
1052
+ # Check if given path on a specified host or the host itself are down
1053
+ # path: path name to be checked
1054
+ # host: host name the file on, default to self.LHOST
1055
+ # chkopt: 1 - do a file/path check, 0 - do not
1056
+ # Return array of 2 (hstat, msg)
1057
+ # hstat: 0 if system is up and accessible,
1058
+ # 1 - host is down,
1059
+ # 2 - if path not accessible
1060
+ # negative values if planned system down
1061
+ # msg: None - stat == 0
1062
+ # an unempty string for system down message - stat != 0
1063
+ def host_down_status(self, path, host, chkopt = 0, logact = 0):
1064
+ shost = self.strip_host_name(host)
1065
+ hstat = 0
1066
+ rets = [0, None]
1067
+ msg = hostname = None
1068
+ if self.pgcmp(shost, self.LHOST, 1) == 0:
1069
+ if not path or (chkopt and self.check_local_file(path)): return rets
1070
+ msg = path + ": is not accessible"
1071
+ flag = "L"
1072
+ if re.match(r'^(/{}/|{})'.format(self.PGLOG['GPFSNAME'], self.PGLOG['DSSDATA']), path):
1073
+ hstat = 1
1074
+ hostname = self.PGLOG['GPFSNAME']
1075
+ else:
1076
+ hstat = 2
1077
+ elif self.pgcmp(shost, self.PGLOG['GPFSNAME'], 1) == 0:
1078
+ if not path or (chkopt and self.check_local_file(path)): return rets
1079
+ msg = path + ": is not accessible"
1080
+ flag = "L"
1081
+ hstat = 1
1082
+ hostname = self.PGLOG['GPFSNAME']
1083
+ elif self.pgcmp(shost, self.BHOST, 1) == 0:
1084
+ if path:
1085
+ hstat = 2
1086
+ else:
1087
+ hstat = 1
1088
+ path = self.DPATHS['B']
1089
+ if chkopt and self.check_backup_file(path, self.QPOINTS['B']): return rets
1090
+ hostname = self.BHOST
1091
+ msg = "{}-{}: is not accessible".format(hostname, path)
1092
+ flag = "B"
1093
+ elif self.pgcmp(shost, self.DHOST, 1) == 0:
1094
+ if path:
1095
+ hstat = 2
1096
+ else:
1097
+ hstat = 1
1098
+ path = self.DPATHS['B']
1099
+ if chkopt and self.check_backup_file(path, self.QPOINTS['D']): return rets
1100
+ hostname = self.DHOST
1101
+ msg = "{}-{}: is not accessible".format(hostname, path)
1102
+ flag = "D"
1103
+ elif self.pgcmp(shost, self.OHOST, 1) == 0:
1104
+ if path:
1105
+ hstat = 2
1106
+ else:
1107
+ hstat = 1
1108
+ path = self.PGLOG['OBJCTBKT']
1109
+ if chkopt and self.check_object_file(path): return rets
1110
+ hostname = self.OHOST
1111
+ msg = "{}-{}: is not accessible".format(hostname, path)
1112
+ flag = "O"
1113
+ elif self.pgcmp(shost, self.PGLOG['PGBATCH'], 1):
1114
+ if path and chkopt and self.check_remote_file(path, host): return rets
1115
+ estat = self.ping_remote_host(host)
1116
+ if estat:
1117
+ hstat = 1
1118
+ hostname = host
1119
+ else:
1120
+ if not path: return rets
1121
+ if re.match(r'^/{}/'.format(self.PGLOG['GPFSNAME']), path):
1122
+ hstat = 1
1123
+ hostname = self.PGLOG['GPFSNAME']
1124
+ else:
1125
+ hstat = 2
1126
+ hostname = host
1127
+ flag = "R"
1128
+ msg = "{}-{}: is not accessible".format(host, path)
1129
+ elif self.get_host(1) == self.PGLOG['PGBATCH']: # local host is a batch node
1130
+ if not path or (chkopt and self.check_local_file(path)): return rets
1131
+ msg = path + ": is not accessible"
1132
+ flag = "L"
1133
+ if re.match(r'^(/{}/|{})'.format(self.PGLOG['GPFSNAME'], self.PGLOG['DSSDATA']), path):
1134
+ hstat = 1
1135
+ hostname = self.PGLOG['GPFSNAME']
1136
+ else:
1137
+ hstat = 2
1138
+ msg += " at the moment Checked on " + self.PGLOG['HOSTNAME']
1139
+ if hostname:
1140
+ estat = self.system_down_message(hostname, path, 0, logact)
1141
+ if estat:
1142
+ hstat = -hstat
1143
+ msg += "\n" + estat
1144
+ if logact and (chkopt or hstat < 0): self.errlog(msg, flag, 1, logact)
1145
+ return (hstat, msg)
1146
+
1147
+ # Check if given path on a specified host is down or not
1148
+ # path: path name to be checked
1149
+ # host: host name the file on, default to self.LHOST
1150
+ # Return errmsg if not accessible and None otherwise
1151
+ def check_host_down(self, path, host, logact = 0):
1152
+ (hstat, msg) = self.host_down_status(path, host, 1, logact)
1153
+ return msg if hstat else None
1154
+
1155
+ # Check if given service name is accessible from a specified host
1156
+ # sname: service name to be checked
1157
+ # fhost: from host name to connect to service, default to self.LHOST
1158
+ # reset the service flag to A or I accordingly
1159
+ # Return 0 if accessible, dsservice.sindex if not, and -1 if can not be checked
1160
+ def check_service_accessibilty(self, sname, fhost = None, logact = 0):
1161
+ if not fhost: fhost = self.PGLOG['HOSTNAME']
1162
+ pgrec = self.pgget("dsservice", "*", "service = '{}' AND hostname = '{}'".format(sname, fhost), logact)
1163
+ if not pgrec:
1164
+ self.pglog("dsservice: Access {} from {} is not defined in GDEX Configuration".format(sname, fhost), logact)
1165
+ return -1
1166
+ path = sname if (pgrec['flag'] == "H" or pgrec['flag'] == "G") else None
1167
+ (hstat, msg) = self.host_down_status(path, fhost, 1, logact)
1168
+ return msg if hstat else None
1169
+
1170
+ # check if this host is a local host for given host name
1171
+ def is_local_host(self, host):
1172
+ host = self.strip_host_name(host)
1173
+ if host == self.LHOST or self.valid_batch_host(host): return 1
1174
+ return 0
1175
+
1176
+ # check and return action string on a node other than local one
1177
+ def local_host_action(self, host, action, info, logact = 0):
1178
+ if self.is_local_host(host): return 1
1179
+ if not logact: return 0
1180
+ if host == "partition":
1181
+ msg = "for individual partition"
1182
+ elif host == "rda_config":
1183
+ msg = "via https://gdex.ucar.edu/rda_pg_config"
1184
+ elif host in self.BCHCMDS:
1185
+ msg = "on a {} Node".format(host)
1186
+ else:
1187
+ msg = "on " + host
1188
+ return self.pglog("{}: Cannot {}, try {}".format(info, action, msg), logact)
1189
+
1190
+ # ping a given remote host name
1191
+ # return None if system is up error messge if not
1192
+ def ping_remote_host(self, host):
1193
+ while True:
1194
+ buf = self.pgsystem("ping -c 3 " + host, self.LOGWRN, self.CMDRET)
1195
+ if buf:
1196
+ ms = re.search(r'3 packets transmitted, (\d)', buf)
1197
+ if ms:
1198
+ if int(ms.group(1)) > 0:
1199
+ return None
1200
+ else:
1201
+ return host + " seems down not accessible"
1202
+ if self.PGLOG['SYSERR']:
1203
+ if self.PGLOG['SYSERR'].find("ping: unknown host") > -1 and host.find('.') > -1:
1204
+ host += ".ucar.edu"
1205
+ continue
1206
+ return self.PGLOG['SYSERR']
1207
+ else:
1208
+ return "Cannot ping " + host
1209
+
1210
+ # compare given two host names, return 1 if same and 0 otherwise
1211
+ def same_hosts(self, host1, host2):
1212
+ host1 = self.strip_host_name(host1)
1213
+ host2 = self.strip_host_name(host2)
1214
+ return (1 if self.pgcmp(host1, host2, 1) == 0 else 0)
1215
+
1216
+ # strip and identify the proper host name
1217
+ def strip_host_name(self, host):
1218
+ if not host: return self.LHOST
1219
+ ms = re.match(r'^([^\.]+)\.', host)
1220
+ if ms: host = ms.group(1)
1221
+ if self.pgcmp(host, self.PGLOG['HOSTNAME'], 1) == 0:
1222
+ return self.LHOST
1223
+ else:
1224
+ return host
1225
+
1226
+ # Check a file stuatus info on a given host name (including local host) no background process for checking
1227
+ # file: file name to be checked
1228
+ # host: host name the file on, default to self.LHOST
1229
+ # opt: 0 - get data size only (fname, data_size, isfile), fname is the file basename
1230
+ # 1 - get date/time modified (date_modified, time_modfied)
1231
+ # 2 - get file owner's login name (logname)
1232
+ # 4 - get permission mode in 3 octal digits (mode)
1233
+ # 8 - get group name (group)
1234
+ # 16 - get week day 0-Sunday, 1-Monday (week_day)
1235
+ # 32 - get checksum (checksum), work for local file only
1236
+ # Return a dict of file info, or None if file not exists
1237
+ def check_gdex_file(self, file, host = None, opt = 0, logact = 0):
1238
+ if host is None: host = self.LHOST
1239
+ shost = self.strip_host_name(host)
1240
+ if self.pgcmp(shost, self.LHOST, 1) == 0:
1241
+ return self.check_local_file(file, opt, logact)
1242
+ elif self.pgcmp(shost, self.OHOST, 1) == 0:
1243
+ return self.check_object_file(file, None, opt, logact)
1244
+ elif self.pgcmp(shost, self.BHOST, 1) == 0:
1245
+ return self.check_backup_file(file, self.QPOINTS['B'], opt, logact)
1246
+ elif self.pgcmp(shost, self.DHOST, 1) == 0:
1247
+ return self.check_backup_file(file, self.QPOINTS['D'], opt, logact)
1248
+ else:
1249
+ return self.check_remote_file(file, host, opt, logact)
1250
+ check_rda_file = check_gdex_file
1251
+
1252
+ # wrapper to self.check_local_file() and self.check_globus_file() to check info for a file
1253
+ # on local or remote Globus endpoints
1254
+ def check_globus_file(self, file, endpoint = None, opt = 0, logact = 0):
1255
+ if not endpoint: endpoint = self.PGLOG['BACKUPEP']
1256
+ if endpoint == 'gdex-glade':
1257
+ if re.match(r'^/(data|decsdata)/', file): file = self.PGLOG['DSSDATA'] + file
1258
+ return self.check_local_file(file, opt, logact)
1259
+ else:
1260
+ return self.check_backup_file(file, endpoint, opt, logact)
1261
+
1262
+ # check and get local file status information
1263
+ # file: local File name
1264
+ # opt: 0 - get data size only (fname, data_size, isfile), fname is the file basename
1265
+ # 1 - get date/time modified (date_modified, time_modfied)
1266
+ # 2 - get file owner's login name (logname)
1267
+ # 4 - get permission mode in 3 octal digits (mode)
1268
+ # 8 - get group name (group)
1269
+ # 16 - get week day 0-Sunday, 1-Monday (week_day)
1270
+ # 32 - get checksum (checksum)
1271
+ # 64 - remove file too small
1272
+ # 128 - check twice for missing file
1273
+ # Return: a dict of file info, or None if not exists
1274
+ def check_local_file(self, file, opt = 0, logact = 0):
1275
+ ret = None
1276
+ if not file: return ret
1277
+ loop = 0
1278
+ while loop < 2:
1279
+ if op.exists(file):
1280
+ try:
1281
+ fstat = os.stat(file)
1282
+ ret = self.local_file_stat(file, fstat, opt, logact)
1283
+ break
1284
+ except Exception as e:
1285
+ errmsg = "{}: {}".format(file, str(e))
1286
+ (hstat, msg) = self.host_down_status(file, self.LHOST, 0, logact)
1287
+ if hstat: errmsg += "\n" + msg
1288
+ self.errlog(errmsg, 'L', loop, logact)
1289
+ else:
1290
+ if loop > 0 or opt&128 == 0: break
1291
+ self.pglog(file + ": check it again in a moment", self.LOGWRN)
1292
+ time.sleep(6)
1293
+ loop += 1
1294
+ if loop > 1: return self.FAILURE
1295
+ self.ECNTS['L'] = 0 # reset error count
1296
+ return ret
1297
+
1298
+ # local function to get local file stat
1299
+ def local_file_stat(self, file, fstat, opt, logact):
1300
+ if not fstat:
1301
+ self.errlog(file + ": Error check file stat", 'L', 1, logact)
1302
+ return None
1303
+ info = {}
1304
+ info['isfile'] = (1 if stat.S_ISREG(fstat.st_mode) else 0)
1305
+ if info['isfile'] == 0 and logact&self.PFSIZE:
1306
+ info['data_size'] = self.local_path_size(file)
1307
+ else:
1308
+ info['data_size'] = fstat.st_size
1309
+ info['fname'] = op.basename(file)
1310
+ if not opt: return info
1311
+ if opt&64 and info['isfile'] and info['data_size'] < self.PGLOG['MINSIZE']:
1312
+ self.pglog("{}: Remove {} file".format(file, ("Small({}B)".format(info['data_size']) if info['data_size'] else "Empty")), logact&~self.EXITLG)
1313
+ self.delete_local_file(file, logact)
1314
+ return None
1315
+ if opt&17:
1316
+ mdate, mtime = self.get_date_time(fstat.st_mtime)
1317
+ if opt&1:
1318
+ info['date_modified'] = mdate
1319
+ info['time_modified'] = mtime
1320
+ cdate, ctime = self.get_date_time(fstat.st_ctime)
1321
+ info['date_created'] = cdate
1322
+ info['time_created'] = ctime
1323
+ if opt&16: info['week_day'] = self.get_weekday(mdate)
1324
+ if opt&2:
1325
+ info['uid'] = fstat.st_uid
1326
+ info['logname'] = pwd.getpwuid(info['uid']).pw_name
1327
+ if opt&4: info['mode'] = stat.S_IMODE(fstat.st_mode)
1328
+ if opt&8:
1329
+ info['gid'] = fstat.st_gid
1330
+ info['group'] = grp.getgrgid(info['gid']).gr_name
1331
+ if opt&32 and info['isfile']: info['checksum'] = self.get_md5sum(file, 0, logact)
1332
+ return info
1333
+
1334
+ # get total size of files under a given path
1335
+ @staticmethod
1336
+ def local_path_size(pname):
1337
+ if not pname: pname = '.' # To get size of current directory
1338
+ size = 0
1339
+ for path, dirs, files in os.walk(pname):
1340
+ for f in files:
1341
+ size += os.path.getsize(os.path.join(path, f))
1342
+ return size
1343
+
1344
+ # check and get file status information of a file on remote host
1345
+ # file: remote File name
1346
+ # opt: 0 - get data size only (fname, data_size, isfile), fname is the file basename
1347
+ # 1 - get date/time modified (date_modified, time_modfied)
1348
+ # 2 - file owner's login name (logname), assumed 'gdexdata'
1349
+ # 4 - get permission mode in 3 octal digits (mode)
1350
+ # 8 - get group name (group), assumed 'dss'
1351
+ # 16 - get week day 0-Sunday, 1-Monday (week_day)
1352
+ # Return: a dict of file info, or None if not exists
1353
+ def check_remote_file(self, file, host, opt = 0, logact = 0):
1354
+ if not file: return None
1355
+ ms = re.match(r'^(.+)/$', file)
1356
+ if ms: file = ms.group(1) # remove ending '/' in case
1357
+ cmd = "{} {}".format(self.get_sync_command(host), file)
1358
+ loop = 0
1359
+ while loop < 2:
1360
+ buf = self.pgsystem(cmd, self.LOGWRN, self.CMDRET)
1361
+ if buf or not self.PGLOG['SYSERR'] or self.PGLOG['SYSERR'].find(self.PGLOG['MISSFILE']) > -1: break
1362
+ errmsg = self.PGLOG['SYSERR']
1363
+ (hstat, msg) = self.host_down_status(file, host, 0, logact)
1364
+ if hstat: errmsg += "\n" + msg
1365
+ self.errlog(errmsg, 'R', loop, logact)
1366
+ loop += 1
1367
+ if loop > 1: return self.FAILURE
1368
+ self.ECNTS['R'] = 0 # reset error count
1369
+ if buf:
1370
+ for line in re.split(r'\n', buf):
1371
+ info = self.remote_file_stat(line, opt)
1372
+ if info: return info
1373
+ return None
1374
+
1375
+ # local function to get remote file stat
1376
+ def remote_file_stat(self, line, opt):
1377
+ info = {}
1378
+ items = re.split(r'\s+', line)
1379
+ if len(items) < 5 or items[4] == '.': return None
1380
+ ms = re.match(r'^([d\-])([\w\-]{9})$', items[0])
1381
+ info['isfile'] = (1 if ms and ms.group(1) == "-" else 0)
1382
+ if opt&4: info['mode'] = self.get_file_mode(ms.group(2))
1383
+ fsize = items[1]
1384
+ if fsize.find(',') > -1: fsize = re.sub(r',', '', fsize)
1385
+ info['data_size'] = int(fsize)
1386
+ info['fname'] = op.basename(items[4])
1387
+ if not opt: return info
1388
+ if opt&17:
1389
+ mdate = self.format_date(items[2], "YYYY-MM-DD", "YYYY/MM/DD")
1390
+ mtime = items[3]
1391
+ if self.PGLOG['GMTZ']: (mdate, mtime) = self.addhour(mdate, mtime, self.PGLOG['GMTZ'])
1392
+ if opt&1:
1393
+ info['date_modified'] = mdate
1394
+ info['time_modified'] = mtime
1395
+ if opt&16: info['week_day'] = self.get_weekday(mdate)
1396
+ if opt&2: info['logname'] = "gdexdata"
1397
+ if opt&8: info['group'] = self.PGLOG['GDEXGRP']
1398
+ return info
1399
+
1400
+ # check and get object file status information
1401
+ # file: object store File key name
1402
+ # opt: 0 - get data size only (fname, data_size, isfile), fname is the file basename
1403
+ # 1 - get date/time modified (date_modified, time_modfied)
1404
+ # 2 - get file owner's login name (logname)
1405
+ # 4 - get metadata hash
1406
+ # 8 - get group name (group)
1407
+ # 16 - get week day 0-Sunday, 1-Monday (week_day)
1408
+ # 32 - get checksum (checksum)
1409
+ # 64 - check once, no rechecking
1410
+ # Return a dict of file info, or None if file not exists
1411
+ def check_object_file(self, file, bucket = None, opt = 0, logact = 0):
1412
+ if not bucket: bucket = self.PGLOG['OBJCTBKT']
1413
+ ret = None
1414
+ if not file: return ret
1415
+ cmd = "{} lo {} -b {}".format(self.OBJCTCMD, file, bucket)
1416
+ ucmd = "{} gm -k {} -b {}".format(self.OBJCTCMD, file, bucket) if opt&14 else None
1417
+ loop = 0
1418
+ while loop < 2:
1419
+ buf = self.pgsystem(cmd, self.LOGWRN, self.CMDRET)
1420
+ if buf:
1421
+ if re.match(r'^\[\]', buf): break
1422
+ if re.match(r'^\[\{', buf):
1423
+ ary = json.loads(buf)
1424
+ cnt = len(ary)
1425
+ if cnt > 1: return self.pglog("{}-{}: {} records returned\n{}".format(bucket, file, cnt, buf), logact|self.ERRLOG)
1426
+ hash = ary[0]
1427
+ uhash = None
1428
+ if ucmd:
1429
+ ubuf = self.pgsystem(ucmd, self.LOGWRN, self.CMDRET)
1430
+ if ubuf and re.match(r'^\{', ubuf): uhash = json.loads(ubuf)
1431
+ ret = self.object_file_stat(hash, uhash, opt)
1432
+ break
1433
+ if opt&64: return self.FAILURE
1434
+ errmsg = "Error Execute: {}\n{}".format(cmd, self.PGLOG['SYSERR'])
1435
+ (hstat, msg) = self.host_down_status(bucket, self.OHOST, 0, logact)
1436
+ if hstat: errmsg += "\n" + msg
1437
+ self.errlog(errmsg, 'O', loop, logact)
1438
+ loop += 1
1439
+ if loop > 1: return self.FAILURE
1440
+ self.ECNTS['O'] = 0 # reset error count
1441
+ return ret
1442
+
1443
+ # check an object path status information
1444
+ # path: object store path name
1445
+ # Return count of object key names, 0 if not file exists; None if error checking
1446
+ def check_object_path(self, path, bucket = None, logact = 0):
1447
+ if not bucket: bucket = self.PGLOG['OBJCTBKT']
1448
+ ret = None
1449
+ if not path: return ret
1450
+ cmd = "{} lo {} -ls -b {}".format(self.OBJCTCMD, path, bucket)
1451
+ loop = 0
1452
+ while loop < 2:
1453
+ buf = self.pgsystem(cmd, self.LOGWRN, self.CMDRET)
1454
+ if buf:
1455
+ ary = json.loads(buf)
1456
+ return len(ary)
1457
+ errmsg = "Error Execute: {}\n{}".format(cmd, self.PGLOG['SYSERR'])
1458
+ (hstat, msg) = self.host_down_status(bucket, self.OHOST, 0, logact)
1459
+ if hstat: errmsg += "\n" + msg
1460
+ self.errlog(errmsg, 'O', loop, logact)
1461
+ loop += 1
1462
+ self.ECNTS['O'] = 0 # reset error count
1463
+ return ret
1464
+
1465
+ # object store function to get file stat
1466
+ def object_file_stat(self, hash, uhash, opt):
1467
+ info = {'isfile' : 1, 'data_size' : int(hash['Size']), 'fname' : op.basename(hash['Key'])}
1468
+ if not opt: return info
1469
+ if opt&17:
1470
+ ms = re.match(r'^(\d+-\d+-\d+)\s+(\d+:\d+:\d+)', hash['LastModified'])
1471
+ if ms:
1472
+ (mdate, mtime) = ms.groups()
1473
+ if self.PGLOG['GMTZ']: (mdate, mtime) = self.addhour(mdate, mtime, self.PGLOG['GMTZ'])
1474
+ if opt&1:
1475
+ info['date_modified'] = mdate
1476
+ info['time_modified'] = mtime
1477
+ if opt&16: info['week_day'] = self.get_weekday(mdate)
1478
+ if opt&32:
1479
+ ms = re.match(r'"(.+)"', hash['ETag'])
1480
+ if ms: info['checksum'] = ms.group(1)
1481
+ if uhash:
1482
+ if opt&2: info['logname'] = uhash['user']
1483
+ if opt&4: info['meta'] = uhash
1484
+ if opt&8: info['group'] = uhash['group']
1485
+ return info
1486
+
1487
+ # check and get backup file status information
1488
+ # file: backup File key name
1489
+ # opt: 0 - get data size only (fname, data_size, isfile), fname is the file basename
1490
+ # 1 - get date/time modified (date_modified, time_modfied)
1491
+ # 2 - get file owner's login name (logname)
1492
+ # 4 - get metadata hash
1493
+ # 8 - get group name (group)
1494
+ # 16 - get week day 0-Sunday, 1-Monday (week_day)
1495
+ # 64 - rechecking
1496
+ # Return a dict of file info, or None if file not exists
1497
+ def check_backup_file(self, file, endpoint = None, opt = 0, logact = 0):
1498
+ ret = None
1499
+ if not file: return ret
1500
+ if not endpoint: endpoint = self.PGLOG['BACKUPEP']
1501
+ bdir = op.dirname(file)
1502
+ bfile = op.basename(file)
1503
+ cmd = f"{self.BACKCMD} ls -ep {endpoint} -p {bdir} --filter {bfile}"
1504
+ ccnt = loop = 0
1505
+ while loop < 2:
1506
+ buf = self.pgsystem(cmd, logact, self.CMDRET)
1507
+ syserr = self.PGLOG['SYSERR']
1508
+ if buf:
1509
+ getstat = 0
1510
+ for line in re.split(r'\n', buf):
1511
+ if re.match(r'^(User|-+)\s*\|', line):
1512
+ getstat += 1
1513
+ elif getstat > 1:
1514
+ ret = self.backup_file_stat(line, opt)
1515
+ if ret: break
1516
+ if ret: break
1517
+ if loop or opt&64 == 0: return ret
1518
+ time.sleep(self.PGSIG['ETIME'])
1519
+ elif syserr:
1520
+ if syserr.find("Directory '{}' not found on endpoint".format(bdir)) > -1:
1521
+ if loop or opt&64 == 0: return ret
1522
+ time.sleep(self.PGSIG['ETIME'])
1523
+ elif ccnt < 2 and syserr.find("The connection to the server was broken") > -1:
1524
+ time.sleep(self.PGSIG['ETIME'])
1525
+ ccnt += 1
1526
+ continue
1527
+ else:
1528
+ if opt&64 == 0: return self.FAILURE
1529
+ errmsg = "Error Execute: {}\n{}".format(cmd, syserr)
1530
+ (hstat, msg) = self.host_down_status('', self.QHOSTS[endpoint], 0, logact)
1531
+ if hstat: errmsg += "\n" + msg
1532
+ self.errlog(errmsg, 'B', loop, logact)
1533
+ loop += 1
1534
+ if ret: self.ECNTS['B'] = 0 # reset error count
1535
+ return ret
1536
+
1537
+ # backup store function to get file stat
1538
+ def backup_file_stat(self, line, opt):
1539
+ info = {}
1540
+ items = re.split(r'[\s\|]+', line)
1541
+ if len(items) < 8: return None
1542
+ info['isfile'] = (1 if items[6] == 'file' else 0)
1543
+ info['data_size'] = int(items[3])
1544
+ info['fname'] = items[7]
1545
+ if not opt: return info
1546
+ if opt&17:
1547
+ mdate = items[4]
1548
+ mtime = items[5]
1549
+ ms = re.match(r'^(\d+:\d+:\d+)', mtime)
1550
+ if ms: mtime = ms.group(1)
1551
+ if self.PGLOG['GMTZ']: (mdate, mtime) = self.addhour(mdate, mtime, self.PGLOG['GMTZ'])
1552
+ if opt&1:
1553
+ info['date_modified'] = mdate
1554
+ info['time_modified'] = mtime
1555
+ if opt&16: info['week_day'] = self.get_weekday(mdate)
1556
+ if opt&2: info['logname'] = items[0]
1557
+ if opt&4: info['mode'] = self.get_file_mode(items[2])
1558
+ if opt&8: info['group'] = items[1]
1559
+ return info
1560
+
1561
+ # check and get a file status information inside a tar file
1562
+ # file: File name to be checked
1563
+ # tfile: the tar file name
1564
+ # opt: 0 - get data size only (fname, data_size, isfile), fname is the file basename
1565
+ # 1 - get date/time modified (date_modified, time_modfied)
1566
+ # 2 - get file owner's login name (logname)
1567
+ # 4 - get permission mode in 3 octal digits (mode)
1568
+ # 8 - get group name (group)
1569
+ # 16 - get week day 0-Sunday, 1-Monday (week_day)
1570
+ # Return a dict of file info, or None if file not exists
1571
+ def check_tar_file(self, file, tfile, opt = 0, logact = 0):
1572
+ ret = None
1573
+ if not (file and tfile): return ret
1574
+ for loop in range(2):
1575
+ buf = self.pgsystem("tar -tvf {} {}".format(tfile, file), self.LOGWRN, self.CMDRET)
1576
+ if buf or not self.PGLOG['SYSERR'] or self.PGLOG['SYSERR'].find('Not found in archive') > -1: break
1577
+ errmsg = self.PGLOG['SYSERR']
1578
+ (hstat, msg) = self.host_down_status(tfile, self.LHOST, 0, logact)
1579
+ self.errlog(errmsg, 'L', loop, logact)
1580
+ if loop > 0: return self.FAILURE
1581
+ if buf:
1582
+ for line in re.split(r'\n', buf):
1583
+ ret = self.tar_file_stat(line, opt)
1584
+ if ret: break
1585
+ self.ECNTS['L'] = 0 # reset error count
1586
+ return ret
1587
+
1588
+ # local function to get file stat in a tar file
1589
+ def tar_file_stat(self, line, opt):
1590
+ items = re.split(r'\s+', line)
1591
+ if len(items) < 6: return None
1592
+ ms = re.match(r'^([d\-])([\w\-]{9})$', items[0])
1593
+ if not ms: return None
1594
+ info = {}
1595
+ info['isfile'] = (1 if ms and ms.group(1) == "-" else 0)
1596
+ info['data_size'] = int(items[2])
1597
+ info['fname'] = op.basename(items[5])
1598
+ if not opt: return info
1599
+ if opt&4: info['mode'] = self.get_file_mode(ms.group(2))
1600
+ if opt&17:
1601
+ mdate = items[3]
1602
+ mtime = items[4]
1603
+ if self.PGLOG['GMTZ']: (mdate, mtime) = self.addhour(mdate, mtime, self.PGLOG['GMTZ'])
1604
+ if opt&1:
1605
+ info['date_modified'] = mdate
1606
+ info['time_modified'] = mtime
1607
+ if opt&16: info['week_day'] = self.get_weekday(mdate)
1608
+ if opt&10:
1609
+ ms = re.match(r'^(\w+)/(\w+)', items[1])
1610
+ if ms:
1611
+ if opt&2: info['logname'] = ms.group(1)
1612
+ if opt&8: info['group'] = ms.group(2)
1613
+ return info
1614
+
1615
+ # check and get a file status information on ftp server
1616
+ # file: File name to be checked
1617
+ # name: login user name
1618
+ # pswd: login password
1619
+ # opt: 0 - get data size only (fname, data_size, isfile), fname is the file basename
1620
+ # 1 - get date/time modified (date_modified, time_modfied)
1621
+ # 2 - get file owner's login name (logname)
1622
+ # 4 - get permission mode in 3 octal digits (mode)
1623
+ # 8 - get group name (group)
1624
+ # 16 - get week day 0-Sunday, 1-Monday (week_day)
1625
+ # Return a dict of file info, or None if file not exists
1626
+ def check_ftp_file(self, file, opt = 0, name = None, pswd = None, logact = 0):
1627
+ if not file: return None
1628
+ ms = re.match(r'^(.+)/$', file)
1629
+ if ms: file = ms.group(1) # remove ending '/' in case
1630
+ cmd = "ncftpls -l "
1631
+ if name: cmd += "-u {} ".format(name)
1632
+ if pswd: cmd += "-p {} ".format(pswd)
1633
+ fname = op.basename(file)
1634
+ for loop in range(2):
1635
+ buf = self.pgsystem(cmd + file, self.LOGWRN, self.CMDRET)
1636
+ if buf: break
1637
+ if self.PGLOG['SYSERR']:
1638
+ self.errlog(self.PGLOG['SYSERR'], 'O', loop, logact|self.LOGERR)
1639
+ if loop == 0: file = op.dirname(file) + '/'
1640
+ if loop > 1: return self.FAILURE
1641
+ for line in re.split(r'\n', buf):
1642
+ if not line or line.find(fname) < 0: continue
1643
+ info = self.ftp_file_stat(line, opt)
1644
+ if info: return info
1645
+ return None
1646
+
1647
+ # local function to get stat of a file on ftp server
1648
+ def ftp_file_stat(self, line, opt):
1649
+ items = re.split(r'\s+', line)
1650
+ if len(items) < 9: return None
1651
+ ms = re.match(r'^([d\-])([\w\-]{9})$', items[0])
1652
+ info = {}
1653
+ info['isfile'] = (1 if ms and ms.group(1) == "-" else 0)
1654
+ info['data_size'] = int(items[4])
1655
+ info['fname'] = op.basename(items[8])
1656
+ if not opt: return info
1657
+ if opt&4: info['mode'] = self.get_file_mode(ms.group(2))
1658
+ if opt&17:
1659
+ dy = int(items[6])
1660
+ mn = self.get_month(items[5])
1661
+ if re.match(r'^\d+$', items[7]):
1662
+ yr = int(items[7])
1663
+ mtime = "00:00:00"
1664
+ else:
1665
+ mtime = items[7] + ":00"
1666
+ cdate = self.curdate()
1667
+ ms = re.match(r'^(\d+)-(\d\d)', cdate)
1668
+ if ms:
1669
+ yr = int(ms.group(1))
1670
+ cm = int(ms.group(2)) # current month
1671
+ if cm < mn: yr -= 1 # previous year
1672
+ mdate = "{}-{:02}-{:02}".format(yr, mn, dy)
1673
+ if opt&1:
1674
+ info['date_modified'] = mdate
1675
+ info['time_modified'] = mtime
1676
+ if opt&16: info['week_day'] = self.get_weekday(mdate)
1677
+ if opt&2: info['logname'] = items[2]
1678
+ if opt&8: info['group'] = items[3]
1679
+ return info
1680
+
1681
+ # get an array of directories/files under given dir on a given host name (including local host)
1682
+ # dir: directory name to be listed
1683
+ # host: host name the directory on, default to self.LHOST
1684
+ # opt: 0 - get data size only (fname, data_size, isfile), fname is the file basename
1685
+ # 1 - get date/time modified (date_modified, time_modfied)
1686
+ # 2 - get file owner's login name (logname)
1687
+ # 4 - get permission mode in 3 octal digits (mode)
1688
+ # 8 - get group name (group)
1689
+ # 16 - get week day 0-Sunday, 1-Monday (week_day)
1690
+ # 32 - get checksum (checksum), work for local file only
1691
+ # Return: a dict with filenames as keys None if empty directory
1692
+ def gdex_glob(self, dir, host, opt = 0, logact = 0):
1693
+ shost = self.strip_host_name(host)
1694
+ if self.pgcmp(shost, self.LHOST, 1) == 0:
1695
+ return self.local_glob(dir, opt, logact)
1696
+ elif self.pgcmp(shost, self.OHOST, 1) == 0:
1697
+ return self.object_glob(dir, None, opt, logact)
1698
+ elif self.pgcmp(shost, self.BHOST, 1) == 0:
1699
+ return self.backup_glob(dir, None, opt, logact)
1700
+ else:
1701
+ return self.remote_glob(dir, host, opt, logact)
1702
+ rda_glob = gdex_glob
1703
+
1704
+ # get an array of directories/files under given dir on local host
1705
+ # dir: directory name to be listed
1706
+ # opt: 0 - get data size only (fname, data_size, isfile), fname is the file basename
1707
+ # 1 - get date/time modified (date_modified, time_modfied)
1708
+ # 2 - get file owner's login name (logname)
1709
+ # 4 - get permission mode in 3 octal digits (mode)
1710
+ # 8 - get group name (group)
1711
+ # 16 - get week day 0-Sunday, 1-Monday (week_day)
1712
+ # 32 - get checksum (checksum), work for local file only
1713
+ # 256 - get files only and ignore directories
1714
+ # Return: dict with filenames as keys or None if empty directory
1715
+ def local_glob(self, dir, opt = 0, logact = 0):
1716
+ flist = {}
1717
+ if not re.search(r'[*?]', dir):
1718
+ if op.exists(dir):
1719
+ dir = self.join_paths(dir, "*")
1720
+ else:
1721
+ dir += "*"
1722
+ for file in glob.glob(dir):
1723
+ info = self.check_local_file(file, opt, logact)
1724
+ if info and (info['isfile'] or not 256&opt): flist[file] = info
1725
+ return flist
1726
+
1727
+ # check and get file status information of a file on remote host
1728
+ # dir: remote directory name
1729
+ # host: host name the directory on, default to self.LHOST
1730
+ # opt: 0 - get data size only (fname, data_size, isfile), fname is the file basename
1731
+ # 1 - get date/time modified (date_modified, time_modfied)
1732
+ # 2 - file owner's login name (logname), assumed 'gdexdata'
1733
+ # 4 - get permission mode in 3 octal digits (mode)
1734
+ # 8 - get group name (group), assumed 'dss'
1735
+ # 16 - get week day 0-Sunday, 1-Monday (week_day)
1736
+ # Return: dict with filenames as keys or None if empty directory
1737
+ def remote_glob(self, dir, host, opt = 0, logact = 0):
1738
+ flist = {}
1739
+ if not re.search(r'/$', dir): dir += '/'
1740
+ buf = self.pgsystem(self.get_sync_command(host) + " dir", self.LOGWRN, self.CMDRET)
1741
+ if not buf:
1742
+ if self.PGLOG['SYSERR'] and self.PGLOG['SYSERR'].find(self.PGLOG['MISSFILE']) < 0:
1743
+ self.errlog("{}-{}: Error list directory\n{}".format(host, dir, self.PGLOG['SYSERR']), 'R', 1, logact)
1744
+ return flist
1745
+ for line in re.split(r'\n', buf):
1746
+ info = self.remote_file_stat(line, opt)
1747
+ if info: flist[dir + info['fname']] = info
1748
+ return flist
1749
+
1750
+ # check and get muiltiple object store file status information
1751
+ # dir: object directory name
1752
+ # opt: 0 - get data size only (fname, data_size, isfile), fname is the file basename
1753
+ # 1 - get date/time modified (date_modified, time_modfied)
1754
+ # 2 - get file owner's login name (logname)
1755
+ # 8 - get group name (group)
1756
+ # 16 - get week day 0-Sunday, 1-Monday (week_day)
1757
+ # Return: a dict with filenames as keys, or None if not exists
1758
+ def object_glob(self, dir, bucket = None, opt = 0, logact = 0):
1759
+ flist = {}
1760
+ if not bucket: bucket = self.PGLOG['OBJCTBKT']
1761
+ ms = re.match(r'^(.+)/$', dir)
1762
+ if ms: dir = ms.group(1)
1763
+ cmd = "{} lo {} -b {}".format(self.OBJCTCMD, dir, bucket)
1764
+ ary = err = None
1765
+ buf = self.pgsystem(cmd, self.LOGWRN, self.CMDRET)
1766
+ if buf:
1767
+ if re.match(r'^\[\{', buf):
1768
+ ary = json.loads(buf)
1769
+ elif not re.match(r'^\[\]', buf):
1770
+ err = "{}\n{}".format(self.PGLOG['SYSERR'], buf)
1771
+ else:
1772
+ err = self.PGLOG['SYSERR']
1773
+ if not ary:
1774
+ if err:
1775
+ self.errlog("{}-{}-{}: Error list files\n{}".format(self.OHOST, bucket, dir, err), 'O', 1, logact)
1776
+ return self.FAILURE
1777
+ else:
1778
+ return flist
1779
+ for hash in ary:
1780
+ uhash = None
1781
+ if opt&10:
1782
+ ucmd = "{} gm -l {} -b {}".format(self.OBJCTCMD, hash['Key'], bucket)
1783
+ ubuf = self.pgsystem(ucmd, self.LOGWRN, self.CMDRET)
1784
+ if ubuf and re.match(r'^\{.+', ubuf): uhash = json.loads(ubuf)
1785
+ info = self.object_file_stat(hash, uhash, opt)
1786
+ if info: flist[hash['Key']] = info
1787
+ return flist
1788
+
1789
+ # check and get muiltiple Quasar backup file status information
1790
+ # dir: backup path
1791
+ # opt: 0 - get data size only (fname, data_size, isfile), fname is the file basename
1792
+ # 1 - get date/time modified (date_modified, time_modfied)
1793
+ # 2 - get file owner's login name (logname)
1794
+ # 8 - get group name (group)
1795
+ # 16 - get week day 0-Sunday, 1-Monday (week_day)
1796
+ # 64 - rechecking
1797
+ # Return: a dict with filenames as keys, or None if not exists
1798
+ def backup_glob(self, dir, endpoint = None, opt = 0, logact = 0):
1799
+ if not dir: return None
1800
+ if not endpoint: endpoint = self.PGLOG['BACKUPEP']
1801
+ cmd = f"{self.BACKCMD} ls -ep {endpoint} -p {dir}"
1802
+ flist = {}
1803
+ for loop in range(2):
1804
+ buf = self.pgsystem(cmd, logact, self.CMDRET)
1805
+ syserr = self.PGLOG['SYSERR']
1806
+ if buf:
1807
+ getstat = 0
1808
+ for line in re.split(r'\n', buf):
1809
+ if re.match(r'^(User|-+)\s*\|', line):
1810
+ getstat += 1
1811
+ elif getstat > 1:
1812
+ info = self.backup_file_stat(line, opt)
1813
+ if info: flist[info['fname']] = info
1814
+ if flist: break
1815
+ if loop or opt&64 == 0: return None
1816
+ time.sleep(self.PGSIG['ETIME'])
1817
+ elif syserr:
1818
+ if syserr.find("Directory '{}' not found on endpoint".format(dir)) > -1:
1819
+ if loop or opt&64 == 0: return None
1820
+ time.sleep(self.PGSIG['ETIME'])
1821
+ else:
1822
+ if opt&64 == 0: return self.FAILURE
1823
+ errmsg = "Error Execute: {}\n{}".format(cmd, syserr)
1824
+ (hstat, msg) = self.host_down_status('', self.QHOSTS[endpoint], 0, logact)
1825
+ if hstat: errmsg += "\n" + msg
1826
+ self.errlog(errmsg, 'B', loop, logact)
1827
+ if flist:
1828
+ self.ECNTS['B'] = 0 # reset error count
1829
+ return flist
1830
+ else:
1831
+ return self.FAILURE
1832
+
1833
+ # local function to get file/directory mode for given permission string, for example, rw-rw-r--
1834
+ @staticmethod
1835
+ def get_file_mode(perm):
1836
+ mbits = [4, 2, 1]
1837
+ mults = [64, 8, 1]
1838
+ plen = len(perm)
1839
+ if plen == 4:
1840
+ perm = perm[1:]
1841
+ plen = 3
1842
+ mode = 0
1843
+ for i in range(3):
1844
+ for j in range(3):
1845
+ pidx = 3*i+j
1846
+ if pidx < plen and perm[pidx] != "-": mode += mults[i]*mbits[j]
1847
+ return mode
1848
+
1849
+ # Evaluate md5 checksum
1850
+ # file: file name for MD5 checksum
1851
+ # count: defined if filename is a array
1852
+ # Return: one or a array of 128-bits md5 'fingerprint' None if failed
1853
+ def get_md5sum(self, file, count = 0, logact = 0):
1854
+ cmd = 'md5sum '
1855
+ if count > 0:
1856
+ checksum = [None]*count
1857
+ for i in range(count):
1858
+ if op.isfile(file[i]):
1859
+ chksm = self.pgsystem(cmd + file[i], logact, 20)
1860
+ if chksm:
1861
+ ms = re.search(r'(\w{32})', chksm)
1862
+ if ms: checksum[i] = ms.group(1)
1863
+ else:
1864
+ checksum = None
1865
+ if op.isfile(file):
1866
+ chksm = self.pgsystem(cmd + file, logact, 20)
1867
+ if chksm:
1868
+ ms = re.search(r'(\w{32})', chksm)
1869
+ if ms: checksum = ms.group(1)
1870
+ return checksum
1871
+
1872
+ # Evaluate md5 checksums and compare them for two given files
1873
+ # file1, file2: file names
1874
+ # Return: 0 if same and 1 if not
1875
+ def compare_md5sum(self, file1, file2, logact = 0):
1876
+ if op.isdir(file1) or op.isdir(file2):
1877
+ files1 = self.get_directory_files(file1)
1878
+ fcnt1 = len(files1) if files1 else 0
1879
+ files2 = self.get_directory_files(file2)
1880
+ fcnt2 = len(files2) if files2 else 0
1881
+ if fcnt1 != fcnt2: return 1
1882
+ chksm1 = self.get_md5sum(files1, fcnt1, logact)
1883
+ chksm1 = ''.join(chksm1)
1884
+ chksm2 = self.get_md5sum(files1, fcnt2, logact)
1885
+ chksm2 = ''.join(chksm2)
1886
+ else:
1887
+ chksm1 = self.get_md5sum(file1, 0, logact)
1888
+ chksm2 = self.get_md5sum(file2, 0, logact)
1889
+ return (0 if (chksm1 and chksm2 and chksm1 == chksm2) else 1)
1890
+
1891
+ # change local directory to todir, and return odir upon success
1892
+ def change_local_directory(self, todir, logact = 0):
1893
+ if logact:
1894
+ lact = logact&~(self.EXITLG|self.ERRLOG)
1895
+ else:
1896
+ logact = lact = self.LOGWRN
1897
+ if not op.isdir(todir):
1898
+ if op.isfile(todir): return self.errlog(todir + ": is file, cannot change directory", 'L', 1, logact)
1899
+ if not self.make_local_directory(todir, logact): return self.FAILURE
1900
+ odir = self.PGLOG['CURDIR']
1901
+ if todir == odir:
1902
+ self.pglog(todir + ": in Directory", lact)
1903
+ return odir
1904
+ try:
1905
+ os.chdir(todir)
1906
+ except Exception as e:
1907
+ return self.errlog(str(e), 'L', 1, logact)
1908
+ else:
1909
+ if not op.isabs(todir): todir = os.getcwd()
1910
+ self.PGLOG['CURDIR'] = todir
1911
+ self.pglog(todir + ": Change to Directory", lact)
1912
+ return odir
1913
+
1914
+ # record the directory for the deleted file
1915
+ # pass in empty dir to turn the recording delete directory on
1916
+ def record_delete_directory(self, dir, val):
1917
+ if dir is None:
1918
+ if isinstance(val, int):
1919
+ self.DIRLVLS = val
1920
+ elif re.match(r'^\d+$'):
1921
+ self.DIRLVLS = int(val)
1922
+ elif dir and not re.match(r'^(\.|\./|/)$', dir) and dir not in self.DELDIRS:
1923
+ self.DELDIRS[dir] = val
1924
+
1925
+ # remove the recorded delete directory if it is empty
1926
+ def clean_delete_directory(self, logact = 0):
1927
+ if not self.DIRLVLS: return
1928
+ if logact:
1929
+ lact = logact&~(self.EXITLG)
1930
+ else:
1931
+ logact = lact = self.LOGWRN
1932
+ lvl = self.DIRLVLS
1933
+ self.DIRLVLS = 0 # set to 0 to stop recording directory
1934
+ while lvl > 0:
1935
+ lvl -= 1
1936
+ dirs = {}
1937
+ for dir in self.DELDIRS:
1938
+ host = self.DELDIRS[dir]
1939
+ dinfo = (dir if host == self.LHOST else "{}-{}".format(host, dir))
1940
+ dstat = self.gdex_empty_directory(dir, self.DELDIRS[dir])
1941
+ if dstat == 0:
1942
+ if self.delete_gdex_file(dir, host, logact):
1943
+ self.pglog(dinfo + ": Empty directory removed", lact)
1944
+ elif dstat > 0:
1945
+ if dstat == 1 and lvl > 0: self.pglog(dinfo + ": Directory not empty yet", lact)
1946
+ continue
1947
+ if lvl: dirs[op.dirname(dir)] = host
1948
+ if not dirs: break
1949
+ self.DELDIRS = dirs
1950
+ self.DELDIRS = {} # empty cache afterward
1951
+
1952
+ # remove the empty given directory and its all subdirectories
1953
+ # return 1 if empty dirctory removed 0 otherwise
1954
+ def clean_empty_directory(self, dir, host, logact = 0):
1955
+ if not dir: return 0
1956
+ dirs = self.gdex_glob(dir, host)
1957
+ cnt = 0
1958
+ if logact:
1959
+ lact = logact&~self.EXITLG
1960
+ else:
1961
+ lact = logact = self.LOGWRN
1962
+ if dirs:
1963
+ for name in dirs:
1964
+ cnt += 1
1965
+ if dirs[name]['isfile']: continue
1966
+ cnt -= self.clean_empty_directory(name, host, logact)
1967
+
1968
+ dinfo = (dir if self.same_hosts(host, self.LHOST) else "{}-{}".format(host, dir))
1969
+ if cnt == 0:
1970
+ if self.delete_gdex_file(dir, host, logact):
1971
+ self.pglog(dinfo + ": Empty directory removed", lact)
1972
+ return 1
1973
+ else:
1974
+ self.pglog(dinfo + ": Directory not empty yet", lact)
1975
+ return 0
1976
+
1977
+ # check if given directory is empty
1978
+ # Return: 0 if empty directory, 1 if not empty and -1 if invalid directory
1979
+ def gdex_empty_directory(self, dir, host):
1980
+ shost = self.strip_host_name(host)
1981
+ if self.pgcmp(shost, self.LHOST, 1) == 0:
1982
+ return self.local_empty_directory(dir)
1983
+ else:
1984
+ return self.remote_empty_directory(dir, host)
1985
+ rda_empty_directory = gdex_empty_directory
1986
+
1987
+ # return 0 if empty local directory, 1 if not; -1 if cannot remove
1988
+ def local_empty_directory(self, dir):
1989
+ if not op.isdir(dir): return -1
1990
+ if self.is_root_directory(dir, 'L'): return 2
1991
+ if not re.search(r'/$', dir): dir += '/'
1992
+ dir += '*'
1993
+ return (1 if glob.glob(dir) else 0)
1994
+
1995
+ # return 0 if empty remote directory, 1 if not; -1 if cannot remove
1996
+ def remote_empty_directory(self, dir, host):
1997
+ if self.is_root_directory(dir, 'R', host): return 2
1998
+ if not re.search(r'/$', dir): dir += '/'
1999
+ buf = self.pgsystem("{} {}".format(self.get_sync_command(host), dir), self.LOGWRN, self.CMDRET)
2000
+ if not buf: return -1
2001
+ for line in re.split(r'\n', buf):
2002
+ if self.remote_file_stat(line, 0): return 1
2003
+ return 0
2004
+
2005
+ # get sizes of files on a given host
2006
+ # files: file names to get sizes
2007
+ # host: host name the file on, default to self.LHOST
2008
+ # return: array of file sizes size is -1 if file does not exist
2009
+ def gdex_file_sizes(self, files, host, logact = 0):
2010
+ sizes = []
2011
+ for file in files: sizes.append(self.gdex_file_size(file, host, 2, logact))
2012
+ return sizes
2013
+ rda_file_sizes = gdex_file_sizes
2014
+
2015
+ # get sizes of local files
2016
+ # files: file names to get sizes
2017
+ # return: array of file sizes size is -1 if file does not exist
2018
+ def local_file_sizes(self, files, logact = 0):
2019
+ sizes = []
2020
+ for file in files: sizes.append(self.local_file_size(file, 6, logact))
2021
+ return sizes
2022
+
2023
+ # check if a file on a given host is empty or too small to be considered valid
2024
+ # file: file name to be checked
2025
+ # host: host name the file on, default to self.LHOST
2026
+ # opt: 1 - to remove empty file
2027
+ # 2 - show message for empty file
2028
+ # 4 - show message for non-existing file
2029
+ # return: file size in unit of byte
2030
+ # 0 - empty file or small file, with size < self.PGLOG['MINSIZE']
2031
+ # -1 - file not exists
2032
+ # -2 - error check file
2033
+ def gdex_file_size(self, file, host, opt = 0, logact = 0):
2034
+ info = self.check_gdex_file(file, host, 0, logact)
2035
+ if info:
2036
+ if info['isfile'] and info['data_size'] < self.PGLOG['MINSIZE']:
2037
+ if opt:
2038
+ if opt&2: self.errlog("{}-{}: {} file".format(host, file, ("Too small({}B)".format(info['data_size']) if info['data_size'] > 0 else "Empty")),
2039
+ 'O', 1, logact)
2040
+ if opt&1: self.delete_gdex_file(file, host, logact)
2041
+ return 0
2042
+ else:
2043
+ return info['data_size'] # if not regular file or not empty
2044
+ elif info != None:
2045
+ return -2 # error access
2046
+ else:
2047
+ if opt&4: self.errlog("{}-{}: {}".format(host, file, self.PGLOG['MISSFILE']), 'O', 1, logact)
2048
+ return -1 # file not exist
2049
+ rda_file_size = gdex_file_size
2050
+
2051
+ # check if a local file is empty or too small to be considered valid
2052
+ # file: file name to be checked
2053
+ # opt: 1 - to remove empty file
2054
+ # 2 - show message for empty file
2055
+ # 4 - show message for non-existing file
2056
+ # return: file size in unit of byte
2057
+ # 0 - empty file or small file, with size < self.PGLOG['MINSIZE']
2058
+ # -1 - file not exists
2059
+ # -2 - error check file
2060
+ def local_file_size(self, file, opt = 0, logact = 0):
2061
+ if not op.exists(file):
2062
+ if opt&4: self.lmsg(file, self.PGLOG['MISSFILE'], logact)
2063
+ return -1 # file not eixsts
2064
+ info = self.check_local_file(file, 0, logact|self.PFSIZE)
2065
+ if info:
2066
+ if info['isfile'] and info['data_size'] < self.PGLOG['MINSIZE']:
2067
+ if opt:
2068
+ if opt&2: self.lmsg(file, ("Too small({}B)".format(info['data_size']) if info['data_size'] > 0 else "Empty file") , logact)
2069
+ if opt&1: self.delete_local_file(file, logact)
2070
+ return 0
2071
+ else:
2072
+ return info['data_size'] # if not regular file or not empty
2073
+ elif info != None:
2074
+ return -2 # error check file
2075
+
2076
+ # compress/uncompress a single local file
2077
+ # ifile: file name to be compressed/uncompressed
2078
+ # fmt: archive format
2079
+ # act: 0 - uncompress
2080
+ # 1 - compress
2081
+ # 2 - get uncompress file name
2082
+ # 3 - get compress file name
2083
+ # return: array of new file name and archive format if changed otherwise original one
2084
+ def compress_local_file(self, ifile, fmt = None, act = 0, logact = 0):
2085
+ ms = re.match(r'^(.+)\.({})'.format(self.CMPSTR), ifile)
2086
+ if ms:
2087
+ ofile = ms.group(1)
2088
+ else:
2089
+ ofile = ifile
2090
+ if fmt:
2091
+ if act&1:
2092
+ for ext in self.PGCMPS:
2093
+ if re.search(r'(^|\.)({})(\.|$)'.format(ext), fmt, re.I):
2094
+ ofile += '.' + ext
2095
+ break
2096
+ else:
2097
+ ms = re.search(r'(^|\.)({})$'.format(self.CMPSTR), fmt, re.I)
2098
+ if ms: fmt = re.sub(r'{}{}$'.format(ms.group(1), ms.group(2)), '', fmt, 1)
2099
+ if act < 2 and ifile != ofile: self.convert_files(ofile, ifile, 0, logact)
2100
+ return (ofile, fmt)
2101
+
2102
+ # get file archive format from a givn file name; None if not found
2103
+ def get_file_format(self, fname):
2104
+ ms = re.search(r'\.({})$'.format(self.TARSTR), fname, re.I)
2105
+ if ms: return self.PGTARS[ms.group(1)][2]
2106
+ ms = re.search(r'\.({})$'.format(self.CMPSTR), fname, re.I)
2107
+ if ms: return self.PGCMPS[ms.group(1)][2]
2108
+ return None
2109
+
2110
+ # tar/untar mutliple local file into/from a single tar/tar.gz/tgz/zip file
2111
+ # tfile: tar file name to be tar/untarred
2112
+ # files: member file names in the tar file
2113
+ # fmt: archive format (defaults to tar file name extension must be defined in self.PGTARS
2114
+ # act: 0 - untar
2115
+ # 1 - tar
2116
+ # return: self.SUCCESS upon successful self.FAILURE otherwise
2117
+ def tar_local_file(self, tfile, files, fmt, act, logact = 0):
2118
+ if not fmt:
2119
+ ms = re.search(r'\.({})$'.format(self.TARSTR), tfile, re.I)
2120
+ if ms: fmt = ms.group(1)
2121
+ logact |= self.ERRLOG
2122
+ if not fmt: return self.pglog(tfile + ": Miss archive format", logact)
2123
+ if fmt not in self.PGTARS: return self.pglog(tfile + ": unknown format fmt provided", logact)
2124
+ tarray = self.PGTARS[fmt]
2125
+ if not act: #untar member files
2126
+ cmd = "{} {}".format(tarray[1], tfile)
2127
+ if files: cmd += ' ' + ' '.join(files)
2128
+ else:
2129
+ if not files: return self.pglog(tfile + ": Miss member file to archive", logact)
2130
+ cmd = "{} {} {}".format(tarray[0], tfile, ' '.join(files))
2131
+ return self.pgsystem(cmd, logact, 7)
2132
+
2133
+ # get local file archive format by checking extension of given local file name
2134
+ # file: local file name
2135
+ def local_archive_format(self,file):
2136
+ ms = re.search(r'\.({})$'.format(self.CMPSTR), file)
2137
+ if ms:
2138
+ fmt = ms.group(1)
2139
+ if re.search(r'\.tar\.{}$'.format(fmt), file):
2140
+ return "TAR." + fmt.upper()
2141
+ else:
2142
+ return fmt.upper()
2143
+ elif re.search(r'\.tar$', file):
2144
+ return "TAR"
2145
+ return ''
2146
+
2147
+ # local function to show message with full local file path
2148
+ def lmsg(self, file, msg, logact = 0):
2149
+ if not op.isabs(file): file = self.join_paths(os.getcwd(), file)
2150
+ return self.errlog("{}: {}".format(file, msg), 'L', 1, logact)
2151
+
2152
+ # check if given path is executable locally
2153
+ # return self.SUCCESS if yes self.FAILURE if not
2154
+ def check_local_executable(self, path, actstr = '', logact = 0):
2155
+ if os.access(path, os.W_OK): return self.SUCCESS
2156
+ if self.check_local_accessible(path, actstr, logact):
2157
+ if actstr: actstr += '-'
2158
+ self.errlog("{}{}: Accessible, but Unexecutable on'{}'".format(actstr, path, self.PGLOG['HOSTNAME']), 'L', 1, logact)
2159
+ return self.FAILURE
2160
+
2161
+ # check if given path is writable locally
2162
+ # return self.SUCCESS if yes self.FAILURE if not
2163
+ def check_local_writable(self, path, actstr = '', logact = 0):
2164
+ if os.access(path, os.W_OK): return self.SUCCESS
2165
+ if self.check_local_accessible(path, actstr, logact):
2166
+ if actstr: actstr += '-'
2167
+ self.errlog("{}{}: Accessible, but Unwritable on'{}'".format(actstr, path, self.PGLOG['HOSTNAME']), 'L', 1, logact)
2168
+ return self.FAILURE
2169
+
2170
+ # check if given path is accessible locally
2171
+ # return self.SUCCESS if yes, self.FAILURE if not
2172
+ def check_local_accessible(self, path, actstr = '', logact = 0):
2173
+ if os.access(path, os.F_OK): return self.SUCCESS
2174
+ if actstr: actstr += '-'
2175
+ self.errlog("{}{}: Unaccessible on '{}'".format(actstr, path, self.PGLOG['HOSTNAME']), 'L', 1, logact)
2176
+ return self.FAILURE
2177
+
2178
+ # check if given webfile under self.PGLOG['DSSDATA'] is writable
2179
+ # return self.SUCCESS if yes self.FAILURE if not
2180
+ def check_webfile_writable(self, action, wfile, logact = 0):
2181
+ ms = re.match(r'^({}/\w+)'.format(self.PGLOG['DSSDATA']), wfile)
2182
+ if ms:
2183
+ return self.check_local_writable(ms.group(1), "{} {}".format(action, wfile), logact)
2184
+ else:
2185
+ return self.SUCCESS # do not need check
2186
+
2187
+ # convert the one file to another via uncompress, move/copy, and/or compress
2188
+ def convert_files(self, ofile, ifile, keep = 0, logact = 0):
2189
+ if ofile == ifile: return self.SUCCESS
2190
+ oname = ofile
2191
+ iname = ifile
2192
+ if keep: kfile = ifile + ".keep"
2193
+ oext = iext = None
2194
+ for ext in self.PGCMPS:
2195
+ if oext is None:
2196
+ ms = re.match(r'^(.+)\.{}$'.format(ext), ofile)
2197
+ if ms:
2198
+ oname = ms.group(1)
2199
+ oext = ext
2200
+ if iext is None:
2201
+ ms = re.match(r'^(.+)\.{}$'.format(ext), ifile)
2202
+ if ms:
2203
+ iname = ms.group(1)
2204
+ iext = ext
2205
+ if iext and oext and oext == iext:
2206
+ oext = iext = None
2207
+ iname = ifile
2208
+ oname = ofile
2209
+ if iext: # uncompress
2210
+ if keep:
2211
+ if iext == 'zip':
2212
+ kfile = ifile
2213
+ else:
2214
+ self.local_copy_local(kfile, ifile, logact)
2215
+ if self.pgsystem("{} {}".format(self.PGCMPS[iext][1], ifile), logact, 5):
2216
+ if iext == "zip":
2217
+ path = op.dirname(iname)
2218
+ if path and path != '.': self.move_local_file(iname, op.basename(iname), logact)
2219
+ if not keep: self.delete_local_file(ifile, logact)
2220
+ if oname != iname: # move/copy
2221
+ path = op.dirname(oname)
2222
+ if path and not op.exists(path): self.make_local_directory(path, logact)
2223
+ if keep and not op.exists(kfile):
2224
+ self.local_copy_local(oname, iname, logact)
2225
+ kfile = iname
2226
+ else:
2227
+ self.move_local_file(oname, iname, logact)
2228
+ if oext: # compress
2229
+ if keep and not op.exists(kfile):
2230
+ if oext == "zip":
2231
+ kfile = oname
2232
+ else:
2233
+ self.local_copy_local(kfile, oname, logact)
2234
+ if oext == "zip":
2235
+ path = op.dirname(oname)
2236
+ if path:
2237
+ if path != '.': path = self.change_local_directory(path, logact)
2238
+ bname = op.basename(oname)
2239
+ self.pgsystem("{} {}.zip {}".format(self.PGCMPS[oext][0], bname, bname), logact, 5)
2240
+ if path != '.': self.change_local_directory(path, logact)
2241
+ else:
2242
+ self.pgsystem("{} {} {}".format(self.PGCMPS[oext][0], ofile, oname), logact, 5)
2243
+ if not keep and op.exists(ofile): self.delete_local_file(oname, logact)
2244
+ else:
2245
+ self.pgsystem("{} {}".format(self.PGCMPS[oext][0], oname), logact, 5)
2246
+ if keep and op.exists(kfile) and kfile != ifile:
2247
+ if op.exist(ifile):
2248
+ self.delete_local_file(kfile, logact)
2249
+ else:
2250
+ self.move_local_file(ifile, kfile, logact)
2251
+ if op.exists(ofile):
2252
+ return self.SUCCESS
2253
+ else:
2254
+ return self.errlog("{}: ERROR convert from {}".format(ofile, ifile), 'L', 1, logact)
2255
+
2256
+ # comapre two files from given two hash references to the file information
2257
+ # return 0 if same, 1 different, -1 if can not compare
2258
+ @staticmethod
2259
+ def compare_file_info(ainfo, binfo):
2260
+ if not (ainfo and binfo): return -1 # at least one is missing
2261
+ return (0 if (ainfo['data_size'] == binfo['data_size'] and
2262
+ ainfo['date_modified'] == binfo['date_modified'] and
2263
+ ainfo['time_modified'] == binfo['time_modified']) else 1)
2264
+
2265
+ # get local_dirname
2266
+ @staticmethod
2267
+ def get_local_dirname(file):
2268
+ dir = op.dirname(file)
2269
+ if dir == '.': dir = os.getcwd()
2270
+ return dir
2271
+
2272
+ # collect valid file names under a given directory, current directory if empty
2273
+ def get_directory_files(self, dir = None, limit = 0, level = 0):
2274
+ files = []
2275
+ if dir:
2276
+ if level == 0 and op.isfile(dir):
2277
+ files.append(dir)
2278
+ return files
2279
+ dir += "/*"
2280
+ else:
2281
+ dir = "*"
2282
+ for file in glob.glob(dir):
2283
+ if op.isdir(file):
2284
+ if limit == 0 or (limit-level) > 0:
2285
+ fs = self.get_directory_files(file, limit, level+1)
2286
+ if fs: files.extend(fs)
2287
+ else:
2288
+ files.append(file)
2289
+ return files if files else None
2290
+
2291
+ # reads a local file into a string and returns it
2292
+ def read_local_file(self, file, logact = 0):
2293
+ try:
2294
+ fd = open(file, 'r')
2295
+ except Exception as e:
2296
+ return self.errlog("{}: {}".format(file, str(e)), 'L', 1, logact)
2297
+ else:
2298
+ fstr = fd.read()
2299
+ fd.close()
2300
+ return fstr
2301
+
2302
+ # open a local file and return the file handler
2303
+ def open_local_file(self, file, mode = 'r', logact = None):
2304
+ if logact is None: logact = self.LOGERR
2305
+ try:
2306
+ fd = open(file, mode)
2307
+ except Exception as e:
2308
+ return self.errlog("{}: {}".format(file, str(e)), 'L', 1, logact)
2309
+ return fd
2310
+
2311
+ # change absolute paths to relative paths
2312
+ def get_relative_paths(self, files, cdir, logact = 0):
2313
+ cnt = len(files)
2314
+ if cnt == 0: return files
2315
+ if not cdir: cdir = os.getcwd()
2316
+ for i in range(cnt):
2317
+ afile = files[i]
2318
+ if op.isabs(afile):
2319
+ files[i] = self.join_paths(afile, cdir, 1)
2320
+ else:
2321
+ self.pglog("{}: is not under the working directory '{}'".format(afile, cdir), logact)
2322
+ return files
2323
+
2324
+ # check if the action to path is blocked
2325
+ def check_block_path(self, path, act = '', logact = 0):
2326
+ blockpath = self.PGLOG['USRHOME']
2327
+ if not act: act = 'Copy'
2328
+ if re.match(r'^{}'.format(blockpath), path):
2329
+ return self.pglog("{}: {} to {} is blocked".format(path, act, blockpath), logact)
2330
+ else:
2331
+ return 1
2332
+
2333
+ # join two filenames by uing the common prefix/suffix and keeping the different main bodies,
2334
+ # the bodies are seprated by sep replace fext with text if provided
2335
+ def join_filenames(self, name1, name2, sep = '-', fext = None, text = None):
2336
+ if fext:
2337
+ name1 = self.remove_file_extention(name1, fext)
2338
+ name2 = self.remove_file_extention(name2, fext)
2339
+ if name1 == name2:
2340
+ fname = name1
2341
+ else:
2342
+ fname = suffix = ''
2343
+ cnt1 = len(name1)
2344
+ cnt2 = len(name2)
2345
+ cnt = (cnt1 if cnt1 < cnt2 else cnt2)
2346
+ # get common prefix
2347
+ for pcnt in range(cnt):
2348
+ if name1[pcnt] != name2[pcnt]: break
2349
+ # get common suffix
2350
+ cnt -= pcnt
2351
+ for scnt in range(0, cnt):
2352
+ if name1[cnt1-scnt-1] != name2[cnt2-scnt-1]: break
2353
+ body1 = name1[pcnt:(cnt1-scnt)]
2354
+ body2 = name2[pcnt:(cnt2-scnt)]
2355
+ if scnt > 0:
2356
+ suffix = name2[(cnt1-scnt):cnt1]
2357
+ if name1[cnt1-scnt-1].isnumeric():
2358
+ ms = re.match(r'^([\d\.-]*\d)', suffix)
2359
+ if ms: body1 += ms.group(1) # include trailing digit chrs to body1
2360
+ if pcnt > 0:
2361
+ fname = name1[0:pcnt]
2362
+ if name2[pcnt].isnumeric():
2363
+ ms = re.search(r'(\d[\d\.-]*)$', fname)
2364
+ if ms: body2 = ms.group(1) + body2 # include leading digit chrs to body2
2365
+ fname += body1 + sep + body2
2366
+ if suffix: fname += suffix
2367
+ if text: fname += "." + text
2368
+ return fname
2369
+
2370
+ # remove given file extention if provided
2371
+ # otherwise try to remove predfined compression extention in self.PGCMPS
2372
+ def remove_file_extention(self, fname, fext):
2373
+ if not fname: return ''
2374
+ if fext:
2375
+ fname = re.sub(r'\.{}$'.format(fext), '', fname, 1, re.I)
2376
+ else:
2377
+ for fext in self.PGCMPS:
2378
+ mp = r'\.{}$'.format(fext)
2379
+ if re.search(mp, fname):
2380
+ fname = re.sub(mp, '', fname, 1, re.I)
2381
+ break
2382
+ return fname
2383
+
2384
+ # check if a previous down storage system is up now for given dflag
2385
+ # return error message if failed checking, and None otherwise
2386
+ def check_storage_down(self, dflag, dpath, dscheck, logact = 0):
2387
+ if dflag not in self.DHOSTS:
2388
+ if logact: self.pglog(dflag + ": Unknown Down Flag for Storage Systems", logact)
2389
+ return None
2390
+ dhost = self.DHOSTS[dflag]
2391
+ if not dpath and dflag in self.DPATHS: dpath = self.DPATHS[dflag]
2392
+ for loop in range(2):
2393
+ (stat, msg) = self.host_down_status(dpath, dhost, 1, logact)
2394
+ if stat < 0: break # stop retry for planned down
2395
+
2396
+ if not dscheck and self.PGLOG['DSCHECK']: dscheck = self.PGLOG['DSCHECK']
2397
+ if dscheck:
2398
+ didx = dscheck['dflags'].find(dflag)
2399
+ if msg:
2400
+ if didx < 0: dscheck['dflags'] += dflag
2401
+ else:
2402
+ if didx > -1: dscheck['dflags'].replace(dflag, '', 1)
2403
+
2404
+ return msg
2405
+
2406
+ # check if previous down storage systems recorded in the dflags
2407
+ # return an array of strings for storage systems that are still down,
2408
+ # and empty array if all up
2409
+ def check_storage_dflags(self, dflags, dscheck = None, logact = 0):
2410
+ if not dflags: return 0
2411
+ isdict = isinstance(dflags, dict)
2412
+ msgary = []
2413
+ for dflag in dflags:
2414
+ msg = self.check_storage_down(dflag, dflags[dflag] if isdict else None, dscheck, logact)
2415
+ if msg: msgary.append(msg)
2416
+ if not msgary:
2417
+ if not dscheck and self.PGLOG['DSCHECK']: dscheck = self.PGLOG['DSCHECK']
2418
+ cidx = dscheck['cindex'] if dscheck else 0
2419
+ # clean dflags if the down storage systems are all up
2420
+ if cidx: self.pgexec("UPDATE dscheck SET dflags = '' WHERE cindex = {}".format(cidx), logact)
2421
+ return msgary
2422
+
2423
+ # check a GDEX file is backed up or not for given file record;
2424
+ # clear the cached bfile records if frec is None.
2425
+ # return 0 if not yet, 1 if backed up, or -1 if backed up but modified
2426
+ def file_backup_status(self, frec, chgdays = 1, logact = 0):
2427
+ if frec is None:
2428
+ self.BFILES.clear()
2429
+ return 0
2430
+ bid = frec['bid']
2431
+ if not bid: return 0
2432
+ fields = 'bfile, dsid, date_modified'
2433
+ if chgdays > 0: fields += ', note'
2434
+ if bid not in self.BFILES: self.BFILES[bid] = self.pgget('bfile', fields, 'bid = {}'.format(bid), logact)
2435
+ brec = self.BFILES[bid]
2436
+ if not brec: return 0
2437
+ if 'sfile' in frec:
2438
+ fname = frec['sfile']
2439
+ ftype = 'Saved'
2440
+ else:
2441
+ fname = frec['wfile']
2442
+ ftype = 'Web'
2443
+ ret = 1
2444
+ fdate = frec['date_modified']
2445
+ bdate = brec['date_modified']
2446
+ if chgdays > 0 and self.diffdate(fdate, bdate) >= chgdays:
2447
+ ret = -1
2448
+ if brec['note']:
2449
+ mp = r'{}<:>{}<:>(\d+)<:>(\w+)<:>'.format(fname, frec['type'])
2450
+ ms = re.search(mp, brec['note'])
2451
+ if ms:
2452
+ fsize = int(ms.group(1))
2453
+ cksum = ms.group(2)
2454
+ if cksum and cksum == frec['checksum'] or not cksum and fsize == frec['data_size']:
2455
+ ret = 1
2456
+ if logact:
2457
+ if ret == 1:
2458
+ msg = "{}-{}: {} file backed up to /{}/{} by {}".format(frec['dsid'], fname, ftype, brec['dsid'], brec['bfile'], bdate)
2459
+ else:
2460
+ msg = "{}-{}: {} file changed on {}".format(frec['dsid'], fname, ftype, fdate)
2461
+ self.pglog(msg, logact)
2462
+ return ret