rda-python-metrics 1.0.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of rda-python-metrics might be problematic. Click here for more details.
- rda_python_metrics/PgIPInfo.py +188 -0
- rda_python_metrics/PgView.py +782 -0
- rda_python_metrics/__init__.py +1 -0
- rda_python_metrics/fillawsusage.py +282 -0
- rda_python_metrics/fillawsusage.usg +17 -0
- rda_python_metrics/fillcodusage.py +247 -0
- rda_python_metrics/fillcodusage.usg +21 -0
- rda_python_metrics/fillcountry.py +79 -0
- rda_python_metrics/fillendtime.py +93 -0
- rda_python_metrics/fillglobususage.py +287 -0
- rda_python_metrics/fillglobususage.usg +17 -0
- rda_python_metrics/fillipinfo.py +185 -0
- rda_python_metrics/fillipinfo.usg +18 -0
- rda_python_metrics/filloneorder.py +155 -0
- rda_python_metrics/filloneorder.usg +41 -0
- rda_python_metrics/fillrdadb.py +151 -0
- rda_python_metrics/fillrdadb.usg +32 -0
- rda_python_metrics/filltdsusage.py +289 -0
- rda_python_metrics/filltdsusage.usg +17 -0
- rda_python_metrics/filluser.py +216 -0
- rda_python_metrics/filluser.usg +16 -0
- rda_python_metrics/logarch.py +359 -0
- rda_python_metrics/logarch.usg +27 -0
- rda_python_metrics/pgperson.py +72 -0
- rda_python_metrics/pgusername.py +50 -0
- rda_python_metrics/viewallusage.py +350 -0
- rda_python_metrics/viewallusage.usg +198 -0
- rda_python_metrics/viewcheckusage.py +289 -0
- rda_python_metrics/viewcheckusage.usg +185 -0
- rda_python_metrics/viewcodusage.py +314 -0
- rda_python_metrics/viewcodusage.usg +184 -0
- rda_python_metrics/viewordusage.py +340 -0
- rda_python_metrics/viewordusage.usg +224 -0
- rda_python_metrics/viewrqstusage.py +362 -0
- rda_python_metrics/viewrqstusage.usg +217 -0
- rda_python_metrics/viewtdsusage.py +323 -0
- rda_python_metrics/viewtdsusage.usg +191 -0
- rda_python_metrics/viewwebfile.py +294 -0
- rda_python_metrics/viewwebfile.usg +212 -0
- rda_python_metrics/viewwebusage.py +371 -0
- rda_python_metrics/viewwebusage.usg +211 -0
- rda_python_metrics-1.0.4.dist-info/METADATA +18 -0
- rda_python_metrics-1.0.4.dist-info/RECORD +47 -0
- rda_python_metrics-1.0.4.dist-info/WHEEL +5 -0
- rda_python_metrics-1.0.4.dist-info/entry_points.txt +22 -0
- rda_python_metrics-1.0.4.dist-info/licenses/LICENSE +21 -0
- rda_python_metrics-1.0.4.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
#
|
|
3
|
+
###############################################################################
|
|
4
|
+
#
|
|
5
|
+
# Title : fillendtime
|
|
6
|
+
# Author : Zaihua Ji, zji@ucar.edu
|
|
7
|
+
# Date : 04/08/2024
|
|
8
|
+
# 2025-03-26 transferred to package rda_python_metrics from
|
|
9
|
+
# https://github.com/NCAR/rda-database.git
|
|
10
|
+
# Purpose : python program to fill field dlupdt.endtime from enddate/endhour
|
|
11
|
+
#
|
|
12
|
+
# Github : https://github.com/NCAR/rda-python-metrics.git
|
|
13
|
+
#
|
|
14
|
+
###############################################################################
|
|
15
|
+
#
|
|
16
|
+
import sys
|
|
17
|
+
import re
|
|
18
|
+
import glob
|
|
19
|
+
from os import path as op
|
|
20
|
+
from rda_python_common import PgLOG
|
|
21
|
+
from rda_python_common import PgUtil
|
|
22
|
+
from rda_python_common import PgFile
|
|
23
|
+
from rda_python_common import PgDBI
|
|
24
|
+
from . import PgIPInfo
|
|
25
|
+
|
|
26
|
+
# the define options for gathering ipinfo data
|
|
27
|
+
MONTH = 0x02 # fix data usages for given months
|
|
28
|
+
YEARS = 0x04 # fix data usages for given years
|
|
29
|
+
NDAYS = 0x08 # fix data usages in recent number of days
|
|
30
|
+
MULTI = (MONTH|YEARS)
|
|
31
|
+
SINGL = (NDAYS)
|
|
32
|
+
|
|
33
|
+
IPINFO = {
|
|
34
|
+
'USGTBL' : ['ipinfo', 'allusage', 'tdsusage'],
|
|
35
|
+
'CDATE' : PgUtil.curdate(),
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
#
|
|
39
|
+
# main function to run this program
|
|
40
|
+
#
|
|
41
|
+
def main():
|
|
42
|
+
|
|
43
|
+
dsids = [] # empty for all datasets
|
|
44
|
+
argv = sys.argv[1:]
|
|
45
|
+
option = 0
|
|
46
|
+
|
|
47
|
+
for arg in argv:
|
|
48
|
+
if arg == "-b":
|
|
49
|
+
PgLOG.PGLOG['BCKGRND'] = 1
|
|
50
|
+
else:
|
|
51
|
+
dsids.append(PgUtil.format_dataset_id(arg))
|
|
52
|
+
|
|
53
|
+
PgDBI.dssdb_dbname()
|
|
54
|
+
|
|
55
|
+
if dsids:
|
|
56
|
+
for dsid in dsids:
|
|
57
|
+
fill_endtime(dsid)
|
|
58
|
+
else:
|
|
59
|
+
fill_endtime()
|
|
60
|
+
sys.exit(0)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
#
|
|
65
|
+
# Fill endtime in table dssdb.dlupdt
|
|
66
|
+
#
|
|
67
|
+
def fill_endtime(dsid = None):
|
|
68
|
+
|
|
69
|
+
dsids = []
|
|
70
|
+
cnd = "dsid = '{}' AND ".format(dsid) if dsid else ''
|
|
71
|
+
cnd += 'enddate <> NULL ORDER BY dsid, lindex'
|
|
72
|
+
pgrecs = PgDBI.pgmget('dlupdt', 'lindex, dsid, enddate, endhour', cnd)
|
|
73
|
+
|
|
74
|
+
cnt = len(pgrecs['lindex']) if pgrecs else 0
|
|
75
|
+
for i in range(cnt):
|
|
76
|
+
lidx = pgrecs['lindex'][i]
|
|
77
|
+
edate = pgrecs['enddate'][i]
|
|
78
|
+
ehour = pgrecs['endhour'][i]
|
|
79
|
+
dsid = pgrecs['dsid'][i]
|
|
80
|
+
if dsid not in dsids: dsids.append()
|
|
81
|
+
if ehour is None: ehour = 23
|
|
82
|
+
etime = "{} {}:59:59".format(edate, ehour)
|
|
83
|
+
PgDBI.pgexec("UPDATE dlupdt SET endtime = '{}' WHERE lindex = {}".format(etime, lidx), PgLOG.LGEREX)
|
|
84
|
+
|
|
85
|
+
s = 's' if cnt > 1 else ''
|
|
86
|
+
dscnt = len(dsids)
|
|
87
|
+
dsstr = dsids[0] if dscnt == 1 else '{} datasets'.format(dscnt)
|
|
88
|
+
PgLOG.pglog("{}: {} records updated for dssdb.dlupdt.endtime".format(dsstr, cnt, s), PgLOG.LOGWRN)
|
|
89
|
+
|
|
90
|
+
#
|
|
91
|
+
# call main() to start program
|
|
92
|
+
#
|
|
93
|
+
if __name__ == "__main__": main()
|
|
@@ -0,0 +1,287 @@
|
|
|
1
|
+
##!/usr/bin/env python3
|
|
2
|
+
#
|
|
3
|
+
###############################################################################
|
|
4
|
+
#
|
|
5
|
+
# Title : fillglobususage
|
|
6
|
+
# Author : Zaihua Ji, zji@ucar.edu
|
|
7
|
+
# Date : 03/11/2022
|
|
8
|
+
# 2025-03-26 transferred to package rda_python_metrics from
|
|
9
|
+
# https://github.com/NCAR/rda-database.git
|
|
10
|
+
# Purpose : python program to retrieve info from Globus logs
|
|
11
|
+
# and fill table wusages in PgSQL database dssdb.
|
|
12
|
+
#
|
|
13
|
+
# Github : https://github.com/NCAR/rda-pythn-metrics.git
|
|
14
|
+
#
|
|
15
|
+
###############################################################################
|
|
16
|
+
#
|
|
17
|
+
import sys
|
|
18
|
+
import re
|
|
19
|
+
import glob
|
|
20
|
+
from os import path as op
|
|
21
|
+
from rda_python_common import PgLOG
|
|
22
|
+
from rda_python_common import PgUtil
|
|
23
|
+
from rda_python_common import PgFile
|
|
24
|
+
from rda_python_common import PgDBI
|
|
25
|
+
from rda_python_common import PgSplit
|
|
26
|
+
from . import PgIPInfo
|
|
27
|
+
|
|
28
|
+
USAGE = {
|
|
29
|
+
'PGTBL' : "wusage",
|
|
30
|
+
'GBSDIR' : PgLOG.PGLOG["DSSDATA"] + "/work/logs/gridftp/",
|
|
31
|
+
'GBSLOG' : "access_log_gridftp0{}_{}",
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
#
|
|
35
|
+
# main function to run this program
|
|
36
|
+
#
|
|
37
|
+
def main():
|
|
38
|
+
|
|
39
|
+
params = [] # array of input values
|
|
40
|
+
argv = sys.argv[1:]
|
|
41
|
+
option = None
|
|
42
|
+
datelimits = [None, None]
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
for arg in argv:
|
|
46
|
+
ms = re.match(r'^-(b|d|p|N)$', arg)
|
|
47
|
+
if ms:
|
|
48
|
+
opt = ms.group(1)
|
|
49
|
+
if opt == 'b':
|
|
50
|
+
PgLOG.PGLOG['BCKGRND'] = 1
|
|
51
|
+
elif option:
|
|
52
|
+
PgLOG.pglog("{}: Option -{} is present already".format(arg, option), PgLOG.LGWNEX)
|
|
53
|
+
else:
|
|
54
|
+
option = opt
|
|
55
|
+
elif re.match(r'^-', arg):
|
|
56
|
+
PgLOG.pglog(arg + ": Invalid Option", PgLOG.LGWNEX)
|
|
57
|
+
elif option:
|
|
58
|
+
params.append(arg)
|
|
59
|
+
else:
|
|
60
|
+
PgLOG.pglog(arg + ": Invalid Parameter", PgLOG.LGWNEX)
|
|
61
|
+
|
|
62
|
+
if not (option and params): PgLOG.show_usage('fillglobususage')
|
|
63
|
+
|
|
64
|
+
PgDBI.dssdb_dbname()
|
|
65
|
+
cmdstr = "fillglobususage {}".format(' '.join(argv))
|
|
66
|
+
PgLOG.cmdlog(cmdstr)
|
|
67
|
+
PgFile.change_local_directory(USAGE['GBSDIR'])
|
|
68
|
+
filenames = get_log_file_names(option, params, datelimits)
|
|
69
|
+
if filenames:
|
|
70
|
+
fill_globus_usages(filenames, datelimits)
|
|
71
|
+
else:
|
|
72
|
+
PgLOG.pglog("No log file found for given command: " + cmdstr, PgLOG.LOGWRN)
|
|
73
|
+
|
|
74
|
+
PgLOG.pglog(None, PgLOG.LOGWRN)
|
|
75
|
+
sys.exit(0)
|
|
76
|
+
|
|
77
|
+
#
|
|
78
|
+
# get the log file dates
|
|
79
|
+
#
|
|
80
|
+
def get_log_file_names(option, params, datelimits):
|
|
81
|
+
|
|
82
|
+
filenames = []
|
|
83
|
+
if option == 'd':
|
|
84
|
+
for pdate in params:
|
|
85
|
+
fdate = PgUtil.format_date(pdate, 'MMDDYYYY')
|
|
86
|
+
fname = USAGE['GBSLOG'].format('?', fdate)
|
|
87
|
+
fnames = glob.glob(fname)
|
|
88
|
+
if fnames: filenames.extend(sorted(fnames))
|
|
89
|
+
else:
|
|
90
|
+
if option == 'N':
|
|
91
|
+
edate = PgUtil.curdate()
|
|
92
|
+
pdate = datelimits[0] = PgUtil.adddate(edate, 0, 0, -int(params[0]))
|
|
93
|
+
else:
|
|
94
|
+
pdate = datelimits[0] = params[0]
|
|
95
|
+
if len(params) > 1:
|
|
96
|
+
edate = datelimits[1] = params[1]
|
|
97
|
+
else:
|
|
98
|
+
edate = PgUtil.curdate()
|
|
99
|
+
while pdate <= edate:
|
|
100
|
+
fdate = PgUtil.format_date(pdate, 'MMDDYYYY')
|
|
101
|
+
fname = USAGE['GBSLOG'].format('?', fdate)
|
|
102
|
+
fnames = glob.glob(fname)
|
|
103
|
+
if fnames: filenames.extend(sorted(fnames))
|
|
104
|
+
pdate = PgUtil.adddate(pdate, 0, 0, 1)
|
|
105
|
+
|
|
106
|
+
return filenames
|
|
107
|
+
|
|
108
|
+
#
|
|
109
|
+
# Fill Globus usages into table dssdb.globususage of DSS PgSQL database from globus access logs
|
|
110
|
+
#
|
|
111
|
+
def fill_globus_usages(fnames, datelimits):
|
|
112
|
+
|
|
113
|
+
cntall = addall = 0
|
|
114
|
+
|
|
115
|
+
fcnt = len(fnames)
|
|
116
|
+
for logfile in fnames:
|
|
117
|
+
if not op.isfile(logfile):
|
|
118
|
+
PgLOG.pglog("{}: Not exists for Gathering Globus usage".format(logfile), PgLOG.LOGWRN)
|
|
119
|
+
continue
|
|
120
|
+
PgLOG.pglog("Gathering usage info from {} at {}".format(logfile, PgLOG.current_datetime()), PgLOG.LOGWRN)
|
|
121
|
+
globus = PgFile.open_local_file(logfile)
|
|
122
|
+
if not globus: continue
|
|
123
|
+
ptime = ''
|
|
124
|
+
record = {}
|
|
125
|
+
cntadd = entcnt = 0
|
|
126
|
+
pkey = None
|
|
127
|
+
while True:
|
|
128
|
+
line = globus.readline()
|
|
129
|
+
if not line: break
|
|
130
|
+
entcnt += 1
|
|
131
|
+
if entcnt%10000 == 0:
|
|
132
|
+
PgLOG.pglog("{}: {}/{} Globus log entries processed/records added".format(logfile, entcnt, cntadd), PgLOG.WARNLG)
|
|
133
|
+
|
|
134
|
+
ms = re.match(r'^([\d\.]+)\s.*\s+\[(\S+).*"GET\s+/(ds\d\d\d\.\d|[a-z]\d{6})/(\S+)\s.*\s(200|206)\s+(\d+)\s+"(\S+)"\s+"(.+)"$', line)
|
|
135
|
+
if not ms: continue
|
|
136
|
+
size = int(ms.group(6))
|
|
137
|
+
if size < 100: continue # ignore small files
|
|
138
|
+
ip = ms.group(1)
|
|
139
|
+
dsid = PgUtil.format_dataset_id(ms.group(3))
|
|
140
|
+
wfile = ms.group(4)
|
|
141
|
+
stat = ms.group(5)
|
|
142
|
+
sline = ms.group(7)
|
|
143
|
+
engine = ms.group(8)
|
|
144
|
+
(year, quarter, date, time) = get_record_date_time(ms.group(2))
|
|
145
|
+
if datelimits[0] and date < datelimits[0]: continue
|
|
146
|
+
if datelimits[1] and date > datelimits[1]: continue
|
|
147
|
+
locflag = 'O' if re.match(r'^https://stratus\.', sline) else 'G'
|
|
148
|
+
idx = wfile.find('?')
|
|
149
|
+
if idx > -1: wfile = wfile[:idx]
|
|
150
|
+
|
|
151
|
+
if re.match(r'^curl', engine, re.I):
|
|
152
|
+
method = "CURL"
|
|
153
|
+
elif re.match(r'^wget', engine, re.I):
|
|
154
|
+
method = "WGET"
|
|
155
|
+
elif re.match(r'^python', engine, re.I):
|
|
156
|
+
method = "PYTHN"
|
|
157
|
+
else:
|
|
158
|
+
method = "WEB"
|
|
159
|
+
|
|
160
|
+
key = "{}:{}:{}".format(ip, dsid, wfile) if stat == '206' else None
|
|
161
|
+
|
|
162
|
+
if record:
|
|
163
|
+
if key == pkey:
|
|
164
|
+
record['size'] += size
|
|
165
|
+
continue
|
|
166
|
+
cntadd += add_file_usage(year, record)
|
|
167
|
+
record = {'ip' : ip, 'dsid' : dsid, 'wfile' : wfile, 'date' : date,
|
|
168
|
+
'time' : time, 'quarter' : quarter, 'size' : size,
|
|
169
|
+
'locflag' : locflag, 'method' : method}
|
|
170
|
+
pkey = key
|
|
171
|
+
if not pkey:
|
|
172
|
+
cntadd += add_file_usage(year, record)
|
|
173
|
+
record = None
|
|
174
|
+
if record: cntadd += add_file_usage(year, record)
|
|
175
|
+
globus.close()
|
|
176
|
+
cntall += entcnt
|
|
177
|
+
addall += cntadd
|
|
178
|
+
PgLOG.pglog("{} Globus usage records added for {} entries at {}".format(addall, cntall, PgLOG.current_datetime()), PgLOG.LOGWRN)
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
def get_record_date_time(ctime):
|
|
182
|
+
|
|
183
|
+
ms = re.search(r'^(\d+)/(\w+)/(\d+):(\d+:\d+:\d+)$', ctime)
|
|
184
|
+
if ms:
|
|
185
|
+
d = int(ms.group(1))
|
|
186
|
+
m = PgUtil.get_month(ms.group(2))
|
|
187
|
+
y = ms.group(3)
|
|
188
|
+
t = ms.group(4)
|
|
189
|
+
q = 1 + (m-1)/3
|
|
190
|
+
return (y, q, "{}-{:02}-{:02}".format(y, m, d), t)
|
|
191
|
+
else:
|
|
192
|
+
PgLOG.pglog(ctime + ": Invalid date/time format", PgLOG.LGEREX)
|
|
193
|
+
|
|
194
|
+
#
|
|
195
|
+
# Fill usage of a single online data file into table dssdb.wusage of DSS PgSQL database
|
|
196
|
+
#
|
|
197
|
+
def add_file_usage(year, logrec):
|
|
198
|
+
|
|
199
|
+
pgrec = get_wfile_wid(logrec['dsid'], logrec['wfile'])
|
|
200
|
+
if not pgrec: return 0
|
|
201
|
+
|
|
202
|
+
table = "{}_{}".format(USAGE['PGTBL'], year)
|
|
203
|
+
cond = "wid = {} AND method = '{}' AND date_read = '{}' AND time_read = '{}'".format(pgrec['wid'], logrec['method'], logrec['date'], logrec['time'])
|
|
204
|
+
if PgDBI.pgget(table, "", cond, PgLOG.LOGWRN): return 0
|
|
205
|
+
|
|
206
|
+
wurec = get_wuser_record(logrec['ip'], logrec['date'])
|
|
207
|
+
if not wurec: return 0
|
|
208
|
+
record = {'wid' : pgrec['wid'], 'dsid' : pgrec['dsid']}
|
|
209
|
+
record['wuid_read'] = wurec['wuid']
|
|
210
|
+
record['date_read'] = logrec['date']
|
|
211
|
+
record['time_read'] = logrec['time']
|
|
212
|
+
record['size_read'] = logrec['size']
|
|
213
|
+
record['method'] = logrec['method']
|
|
214
|
+
record['locflag'] = logrec['locflag']
|
|
215
|
+
record['ip'] = logrec['ip']
|
|
216
|
+
record['quarter'] = logrec['quarter']
|
|
217
|
+
|
|
218
|
+
if add_to_allusage(year, logrec, wurec):
|
|
219
|
+
return PgDBI.add_yearly_wusage(year, record)
|
|
220
|
+
else:
|
|
221
|
+
return 0
|
|
222
|
+
|
|
223
|
+
def add_to_allusage(year, logrec, wurec):
|
|
224
|
+
|
|
225
|
+
pgrec = {'email' : wurec['email'], 'org_type' : wurec['org_type'], 'country' : wurec['country']}
|
|
226
|
+
pgrec['dsid'] = logrec['dsid']
|
|
227
|
+
pgrec['date'] = logrec['date']
|
|
228
|
+
pgrec['quarter'] = logrec['quarter']
|
|
229
|
+
pgrec['time'] = logrec['time']
|
|
230
|
+
pgrec['size'] = logrec['size']
|
|
231
|
+
pgrec['method'] = logrec['method']
|
|
232
|
+
pgrec['ip'] = logrec['ip']
|
|
233
|
+
pgrec['source'] = 'W'
|
|
234
|
+
return PgDBI.add_yearly_allusage(year, pgrec)
|
|
235
|
+
|
|
236
|
+
#
|
|
237
|
+
# return wfile.wid upon success, 0 otherwise
|
|
238
|
+
#
|
|
239
|
+
def get_wfile_wid(dsid, wfile):
|
|
240
|
+
|
|
241
|
+
wfcond = "wfile = '{}'".format(wfile)
|
|
242
|
+
pgrec = PgSplit.pgget_wfile(dsid, "*", wfcond)
|
|
243
|
+
if pgrec:
|
|
244
|
+
pgrec['dsid'] = dsid
|
|
245
|
+
else:
|
|
246
|
+
pgrec = PgDBI.pgget("wfile_delete", "*", "{} AND dsid = '{}'".format(wfcond, dsid))
|
|
247
|
+
if not pgrec:
|
|
248
|
+
pgrec = PgDBI.pgget("wmove", "wid, dsid", wfcond)
|
|
249
|
+
if pgrec:
|
|
250
|
+
pgrec = PgSplit.pgget_wfile(pgrec['dsid'], "*", "wid = {}".format(pgrec['wid']))
|
|
251
|
+
if pgrec: pgrec['dsid'] = dsid
|
|
252
|
+
|
|
253
|
+
return pgrec
|
|
254
|
+
|
|
255
|
+
# return wuser record upon success, None otherwise
|
|
256
|
+
def get_wuser_record(ip, date):
|
|
257
|
+
|
|
258
|
+
ipinfo = PgIPInfo.set_ipinfo(ip)
|
|
259
|
+
if not ipinfo: return None
|
|
260
|
+
|
|
261
|
+
record = {'org_type' : ipinfo['org_type'], 'country' : ipinfo['country']}
|
|
262
|
+
email = 'unknown@' + ipinfo['hostname']
|
|
263
|
+
emcond = "email = '{}'".format(email)
|
|
264
|
+
flds = 'wuid, email, org_type, country, start_date'
|
|
265
|
+
pgrec = PgDBI.pgget("wuser", flds, emcond, PgLOG.LOGERR)
|
|
266
|
+
if pgrec:
|
|
267
|
+
if PgUtil.diffdate(pgrec['start_date'], date) > 0:
|
|
268
|
+
pgrec['start_date'] = record['start_date'] = date
|
|
269
|
+
PgDBI.pgupdt('wuser', record, emcond)
|
|
270
|
+
return pgrec
|
|
271
|
+
|
|
272
|
+
# now add one in
|
|
273
|
+
record['email'] = email
|
|
274
|
+
record['stat_flag'] = 'A'
|
|
275
|
+
record['start_date'] = date
|
|
276
|
+
wuid = PgDBI.pgadd("wuser", record, PgLOG.LOGERR|PgLOG.AUTOID)
|
|
277
|
+
if wuid:
|
|
278
|
+
record['wuid'] = wuid
|
|
279
|
+
PgLOG.pglog("{} Added as wuid({})".format(email, wuid), PgLOG.LGWNEM)
|
|
280
|
+
return record
|
|
281
|
+
|
|
282
|
+
return None
|
|
283
|
+
|
|
284
|
+
#
|
|
285
|
+
# call main() to start program
|
|
286
|
+
#
|
|
287
|
+
if __name__ == "__main__": main()
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
|
|
2
|
+
Retrieves usage information from GLlobus Server logs under directory
|
|
3
|
+
/gpfs/fs1/collections/rda/work/logs/gridftp/ to fill table 'wusage' in
|
|
4
|
+
database 'dssdb'.
|
|
5
|
+
|
|
6
|
+
Usage: fillglobususage [-b] [-d LogFileDates] [-N NumberDay] [-p BeginDate [Enddate]]
|
|
7
|
+
|
|
8
|
+
select option, -d, -N or -p to run this application.
|
|
9
|
+
|
|
10
|
+
- Option -b, log process information into logfile only;
|
|
11
|
+
|
|
12
|
+
- Option -d, retrieve usage info from given log file dates;
|
|
13
|
+
|
|
14
|
+
- Option -N, retrieve usage info in recent NumberDay days;
|
|
15
|
+
|
|
16
|
+
- Option -p, retrieve usage info between given period. For missing EndDate,
|
|
17
|
+
it defaults to the current date.
|
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
#
|
|
3
|
+
###############################################################################
|
|
4
|
+
#
|
|
5
|
+
# Title : fillipinfo
|
|
6
|
+
# Author : Zaihua Ji, zji@ucar.edu
|
|
7
|
+
# Date : 08/26/2023
|
|
8
|
+
# 2025-03-26 transferred to package rda_python_metrics from
|
|
9
|
+
# https://github.com/NCAR/rda-database.git
|
|
10
|
+
# Purpose : python program to retrieve ip info and
|
|
11
|
+
# and fill table ipinfo
|
|
12
|
+
#
|
|
13
|
+
# Github : https://github.com/NCAR/rda-python-metrics.git
|
|
14
|
+
#
|
|
15
|
+
###############################################################################
|
|
16
|
+
#
|
|
17
|
+
import sys
|
|
18
|
+
import re
|
|
19
|
+
import glob
|
|
20
|
+
from os import path as op
|
|
21
|
+
from rda_python_common import PgLOG
|
|
22
|
+
from rda_python_common import PgUtil
|
|
23
|
+
from rda_python_common import PgFile
|
|
24
|
+
from rda_python_common import PgDBI
|
|
25
|
+
from . import PgIPInfo
|
|
26
|
+
|
|
27
|
+
# the define options for gathering ipinfo data
|
|
28
|
+
MONTH = 0x02 # fix data usages for given months
|
|
29
|
+
YEARS = 0x04 # fix data usages for given years
|
|
30
|
+
NDAYS = 0x08 # fix data usages in recent number of days
|
|
31
|
+
MULTI = (MONTH|YEARS)
|
|
32
|
+
SINGL = (NDAYS)
|
|
33
|
+
|
|
34
|
+
IPINFO = {
|
|
35
|
+
'USGTBL' : ['ipinfo', 'allusage', 'tdsusage'],
|
|
36
|
+
'CDATE' : PgUtil.curdate(),
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
#
|
|
40
|
+
# main function to run this program
|
|
41
|
+
#
|
|
42
|
+
def main():
|
|
43
|
+
|
|
44
|
+
inputs = [] # array of input values
|
|
45
|
+
table = None # table names: ipinfo, allusage, globususage, or tdsusage
|
|
46
|
+
argv = sys.argv[1:]
|
|
47
|
+
topt = option = 0
|
|
48
|
+
|
|
49
|
+
for arg in argv:
|
|
50
|
+
if arg == "-b":
|
|
51
|
+
PgLOG.PGLOG['BCKGRND'] = 1
|
|
52
|
+
elif re.match(r'^-[mNy]$', arg) and option == 0:
|
|
53
|
+
if arg == "-m":
|
|
54
|
+
option = MONTH
|
|
55
|
+
elif arg == "-y":
|
|
56
|
+
option = YEARS
|
|
57
|
+
elif arg == "-N":
|
|
58
|
+
option = NDAYS
|
|
59
|
+
elif arg == "-t":
|
|
60
|
+
topt = 1
|
|
61
|
+
elif re.match(r'^-', arg):
|
|
62
|
+
PgLOG.pglog(arg + ": Invalid Option", PgLOG.LGWNEX)
|
|
63
|
+
elif topt:
|
|
64
|
+
if arg not in IPINFO['USGTBL']:
|
|
65
|
+
PgLOG.pglog("{}: Invalid Table Name; must be in ({})".format(arg, ','.join(IPINFO['USGTBL'])), PgLOG.LGWNEX)
|
|
66
|
+
table = arg
|
|
67
|
+
topt = 0
|
|
68
|
+
elif option&MULTI or option&SINGL and not inputs:
|
|
69
|
+
inputs.append(arg)
|
|
70
|
+
else:
|
|
71
|
+
PgLOG.pglog(arg + ": Invalid Parameter", PgLOG.LGWNEX)
|
|
72
|
+
|
|
73
|
+
if not (inputs and table): PgLOG.show_usage('fillipinfo')
|
|
74
|
+
PgDBI.dssdb_dbname()
|
|
75
|
+
PgLOG.cmdlog("fillipinfo {}".format(' '.join(argv)))
|
|
76
|
+
|
|
77
|
+
if option&NDAYS:
|
|
78
|
+
curdate = IPINFO['CDATE']
|
|
79
|
+
datelimit = PgUtil.adddate(curdate, 0, 0, -int(inputs[0]))
|
|
80
|
+
option = MONTH
|
|
81
|
+
inputs = []
|
|
82
|
+
|
|
83
|
+
while curdate >= datelimit:
|
|
84
|
+
tms = curdate.split('-')
|
|
85
|
+
inputs.append("{}-{}".format(tms[0], tms[1]))
|
|
86
|
+
curdate = PgUtil.adddate(curdate, 0, 0, -int(tms[2]))
|
|
87
|
+
|
|
88
|
+
fill_ip_info(option, inputs, table)
|
|
89
|
+
|
|
90
|
+
sys.exit(0)
|
|
91
|
+
|
|
92
|
+
#
|
|
93
|
+
# Fill ip info in table dssdb.tdsusage
|
|
94
|
+
#
|
|
95
|
+
def fill_ip_info(option, inputs, table):
|
|
96
|
+
|
|
97
|
+
cntall = 0
|
|
98
|
+
date = None
|
|
99
|
+
for input in inputs:
|
|
100
|
+
if option&NDAYS:
|
|
101
|
+
edate = IPINFO['CDATE']
|
|
102
|
+
date = PgUtil.adddate(edate, 0, 0, -int(input))
|
|
103
|
+
elif option&MONTH:
|
|
104
|
+
tms = input.split('-')
|
|
105
|
+
date = "{}-{:02}-01".format(tms[0], int(tms[1]))
|
|
106
|
+
edate = PgUtil.enddate(date, 0, 'M')
|
|
107
|
+
elif option&YEARS:
|
|
108
|
+
date = input + "-01-01"
|
|
109
|
+
edate = input + "-12-31"
|
|
110
|
+
|
|
111
|
+
while date <= edate:
|
|
112
|
+
func = eval('fix_{}_records'.format(table))
|
|
113
|
+
cntall += func(date)
|
|
114
|
+
date = PgUtil.adddate(date, 0, 0, 1)
|
|
115
|
+
return cntall
|
|
116
|
+
|
|
117
|
+
def fix_allusage_records(date):
|
|
118
|
+
|
|
119
|
+
cnt = 0
|
|
120
|
+
ms = re.match(r'^(\d+)-', date)
|
|
121
|
+
year = ms.group(1)
|
|
122
|
+
table = 'allusage_' + year
|
|
123
|
+
cond = "date = '{}' and org_type = '-'".format(date)
|
|
124
|
+
pgrecs = PgDBI.pgmget(table, 'aidx, email, ip', cond, PgLOG.LGEREX)
|
|
125
|
+
if not pgrecs: return 0
|
|
126
|
+
cnt = len(pgrecs['ip']) if pgrecs else 0
|
|
127
|
+
mcnt = 0
|
|
128
|
+
for i in range(cnt):
|
|
129
|
+
ip = pgrecs['ip'][i]
|
|
130
|
+
email = pgrecs['email'][i]
|
|
131
|
+
ipinfo = PgIPInfo.set_ipinfo(ip)
|
|
132
|
+
if ipinfo:
|
|
133
|
+
record = {'org_type' : ipinfo['org_type'],
|
|
134
|
+
'country' : ipinfo['country']}
|
|
135
|
+
if not email or re.search(r'-$', email):
|
|
136
|
+
record['email'] = 'unknown@' + ipinfo['hostname']
|
|
137
|
+
mcnt += PgDBI.pgupdt(table, record, "aidx = '{}'".format(pgrecs['aidx'][i]))
|
|
138
|
+
|
|
139
|
+
s = 's' if cnt > 1 else ''
|
|
140
|
+
PgLOG.pglog("{}: {} of {} record{} updated for {}".format(table, mcnt, cnt, s, date), PgLOG.LOGWRN)
|
|
141
|
+
|
|
142
|
+
return mcnt
|
|
143
|
+
|
|
144
|
+
def fix_tdsusage_records(date):
|
|
145
|
+
|
|
146
|
+
table = 'tdsusage'
|
|
147
|
+
cond = "date = '{}' and org_type = '-'".format(date)
|
|
148
|
+
pgrecs = PgDBI.pgmget(table, 'time, ip', cond, PgLOG.LGEREX)
|
|
149
|
+
if not pgrecs: return 0
|
|
150
|
+
cnt = len(pgrecs['ip']) if pgrecs else 0
|
|
151
|
+
mcnt = 0
|
|
152
|
+
for i in range(cnt):
|
|
153
|
+
ipinfo = PgIPInfo.set_ipinfo(pgrecs['ip'][i])
|
|
154
|
+
if ipinfo:
|
|
155
|
+
record = {'org_type' : ipinfo['org_type'],
|
|
156
|
+
'country' : ipinfo['country']}
|
|
157
|
+
cond = "date = '{}' AND time = '{}' AND ip = '{}'".format(date, pgrecs['time'][i], pgrecs['ip'][i])
|
|
158
|
+
mcnt += PgDBI.pgupdt(table, record, cond)
|
|
159
|
+
|
|
160
|
+
s = 's' if cnt > 1 else ''
|
|
161
|
+
PgLOG.pglog("{}: {} of {} record{} updated for {}".format(table, mcnt, cnt, s, date), PgLOG.LOGWRN)
|
|
162
|
+
|
|
163
|
+
return mcnt
|
|
164
|
+
|
|
165
|
+
def fix_ipinfo_records(date):
|
|
166
|
+
|
|
167
|
+
table = 'ipinfo'
|
|
168
|
+
cond = "stat_flag = 'M' and date = '{}'".format(date)
|
|
169
|
+
pgrecs = PgDBI.pgmget(table, 'ip', cond, PgLOG.LGEREX)
|
|
170
|
+
if not pgrecs: return 0
|
|
171
|
+
cnt = len(pgrecs['ip']) if pgrecs else 0
|
|
172
|
+
mcnt = 0
|
|
173
|
+
for i in range(cnt):
|
|
174
|
+
PgIPInfo.set_ipinfo(pgrecs['ip'][i], True)
|
|
175
|
+
|
|
176
|
+
mcnt = PgIPInfo.IPINFO['IPUPDT']
|
|
177
|
+
s = 's' if cnt > 1 else ''
|
|
178
|
+
PgLOG.pglog("{}: {} of {} record{} updated".format(table, mcnt, cnt, s), PgLOG.LOGWRN)
|
|
179
|
+
|
|
180
|
+
return mcnt
|
|
181
|
+
|
|
182
|
+
#
|
|
183
|
+
# call main() to start program
|
|
184
|
+
#
|
|
185
|
+
if __name__ == "__main__": main()
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
|
|
2
|
+
Check data usage IP related information to fill in usage tables.
|
|
3
|
+
|
|
4
|
+
Usage: fillipinfo [-b] -t TableName [-N NumberDay] [-m YearMonths] [-y Years]
|
|
5
|
+
|
|
6
|
+
select option, -t and -m, -N or -y to run this application.
|
|
7
|
+
|
|
8
|
+
- Option -b, log process information into logfile only;
|
|
9
|
+
|
|
10
|
+
- Option -t, table name, ipinfo, allusage, globususage, or tdsusage,
|
|
11
|
+
to fix IP related information, such organization names/types,
|
|
12
|
+
emails and country names;
|
|
13
|
+
|
|
14
|
+
- Option -N, fix usage info in recent NumberDay days;
|
|
15
|
+
|
|
16
|
+
- Option -m, fix usage info in given months;
|
|
17
|
+
|
|
18
|
+
- Option -y, fix usage info in given years.
|