rda-python-metrics 1.0.8__py3-none-any.whl → 1.0.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of rda-python-metrics might be problematic. Click here for more details.
- rda_python_metrics/fillawsusage.usg +1 -2
- rda_python_metrics/fillcdgusage.py +408 -0
- rda_python_metrics/fillcdgusage.usg +18 -0
- rda_python_metrics/fillcodusage.usg +1 -1
- rda_python_metrics/fillglobususage.usg +1 -1
- {rda_python_metrics-1.0.8.dist-info → rda_python_metrics-1.0.9.dist-info}/METADATA +1 -1
- {rda_python_metrics-1.0.8.dist-info → rda_python_metrics-1.0.9.dist-info}/RECORD +11 -9
- {rda_python_metrics-1.0.8.dist-info → rda_python_metrics-1.0.9.dist-info}/entry_points.txt +1 -0
- {rda_python_metrics-1.0.8.dist-info → rda_python_metrics-1.0.9.dist-info}/WHEEL +0 -0
- {rda_python_metrics-1.0.8.dist-info → rda_python_metrics-1.0.9.dist-info}/licenses/LICENSE +0 -0
- {rda_python_metrics-1.0.8.dist-info → rda_python_metrics-1.0.9.dist-info}/top_level.txt +0 -0
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
|
|
2
2
|
Retrieves usage information from AWS Server logs under directory
|
|
3
|
-
|
|
4
|
-
database 'dssdb'.
|
|
3
|
+
../rda/transer/AWSera5log/ to fill table 'wusage' in database 'rdadb'.
|
|
5
4
|
|
|
6
5
|
Usage: fillawsusage [-b] [-d LogFileDates] [-N NumberDay] [-p BeginDate [Enddate]]
|
|
7
6
|
|
|
@@ -0,0 +1,408 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
#
|
|
3
|
+
###############################################################################
|
|
4
|
+
#
|
|
5
|
+
# Title : fillcdgusage
|
|
6
|
+
# Author : Zaihua Ji, zji@ucar.edu
|
|
7
|
+
# Date : 2025-04-14
|
|
8
|
+
# Purpose : python program to retrieve info from GDEX Postgres database for GDS
|
|
9
|
+
# file accesses and backup fill table tdsusage in PostgreSQL database dssdb.
|
|
10
|
+
#
|
|
11
|
+
# Github : https://github.com/NCAR/rda-python-metrics.git
|
|
12
|
+
#
|
|
13
|
+
###############################################################################
|
|
14
|
+
#
|
|
15
|
+
import sys
|
|
16
|
+
import re
|
|
17
|
+
import glob
|
|
18
|
+
from os import path as op
|
|
19
|
+
from rda_python_common import PgLOG
|
|
20
|
+
from rda_python_common import PgUtil
|
|
21
|
+
from rda_python_common import PgFile
|
|
22
|
+
from rda_python_common import PgDBI
|
|
23
|
+
from rda_python_common import PgSplit
|
|
24
|
+
from . import PgIPInfo
|
|
25
|
+
|
|
26
|
+
USAGE = {
|
|
27
|
+
'TDSTBL' : "tdsusage",
|
|
28
|
+
'WEBTBL' : "webusage",
|
|
29
|
+
'CDATE' : PgUtil.curdate(),
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
DSIDS = {
|
|
33
|
+
'ucar.cgd.cesm2.cam6.prescribed_sst_amip' : ['d651010'],
|
|
34
|
+
'ucar.cgd.ccsm4.CLM_LAND_ONLY' : ['d651011'],
|
|
35
|
+
'ucar.cgd.artmip' : ['d651012', 'd651016', 'd651017', 'd651018'],
|
|
36
|
+
'tamip' : ['d651013'],
|
|
37
|
+
'ucar.cgd.ccsm4.CLIVAR_LE' : ['d651014'],
|
|
38
|
+
'ucar.cgd.cesm2.Gettelman_CESM2_ECS' : ['d651015'],
|
|
39
|
+
'ucar.cgd.ccsm4.geomip.ssp5' : ['d651024'],
|
|
40
|
+
'ucar.cgd.ccsm4.IOD-PACEMAKER' : ['d651021'],
|
|
41
|
+
'ucar.cgd.ccsm4.past2k_transient' : ['651023'],
|
|
42
|
+
'ucar.cgd.ccsm4.lowwarming' : ['d651025'],
|
|
43
|
+
'ucar.cgd.ccsm4.CESM_CAM5_BGC_ME' : ['d651000'],
|
|
44
|
+
'ucar.cgd.ccsm4.iTRACE' : ['d651022'],
|
|
45
|
+
'ucar.cgd.ccsm4.so2_geoeng' : ['d651026'],
|
|
46
|
+
'ucar.cgd.ccsm4.cesmLE' : ['d651027'],
|
|
47
|
+
'ucar.cgd.ccsm4.CESM1-CAM5-DP' : ['d651028'],
|
|
48
|
+
'ucar.cgd.ccsm4.amv_lens' : ['d651031'],
|
|
49
|
+
'ucar.cgd.ccsm4.ATL-PACEMAKER' : ['d651032'],
|
|
50
|
+
'ucar.cgd.ccsm4.pac-pacemaker' : ['d651033'],
|
|
51
|
+
'ucar.cgd.ccsm4.SD-WACCM-X_v2.1' : ['d651034'],
|
|
52
|
+
'ucar.cgd.ccsm4.amv_lens' : ['d651035'],
|
|
53
|
+
'ucar.cgd.cesm2.cism_ismip6' : ['d651036'],
|
|
54
|
+
'ucar.cgd.ccsm4.pliomip2' : ['d651037']
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
#
|
|
59
|
+
# main function to run this program
|
|
60
|
+
#
|
|
61
|
+
def main():
|
|
62
|
+
|
|
63
|
+
params = {} # array of input values
|
|
64
|
+
argv = sys.argv[1:]
|
|
65
|
+
opt = None
|
|
66
|
+
|
|
67
|
+
for arg in argv:
|
|
68
|
+
if arg == "-b":
|
|
69
|
+
PgLOG.PGLOG['BCKGRND'] = 1
|
|
70
|
+
elif re.match(r'^-[msNy]$', arg):
|
|
71
|
+
opt = arg[1]
|
|
72
|
+
params[opt] = []
|
|
73
|
+
elif re.match(r'^-', arg):
|
|
74
|
+
PgLOG.pglog(arg + ": Invalid Option", PgLOG.LGWNEX)
|
|
75
|
+
elif opt:
|
|
76
|
+
params[opt].append(arg)
|
|
77
|
+
else:
|
|
78
|
+
PgLOG.pglog(arg + ": Value passed in without leading option", PgLOG.LGWNEX)
|
|
79
|
+
|
|
80
|
+
if not opt:
|
|
81
|
+
PgLOG.show_usage('fillcdgusage')
|
|
82
|
+
elif 's' not in params:
|
|
83
|
+
PgLOG.pglog("-s: Missing dataset short name to gather CDG metrics", PgLOG.LGWNEX)
|
|
84
|
+
elif len(params) < 2:
|
|
85
|
+
PgLOG.pglog("-(m|N|y): Missing Month, NumberDays or Year to gather CDG metrics", PgLOG.LGWNEX)
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
PgLOG.cmdlog("fillcdgusage {}".format(' '.join(argv)))
|
|
89
|
+
dsids = get_dataset_ids(params['s'])
|
|
90
|
+
if dsids:
|
|
91
|
+
del params['s']
|
|
92
|
+
for o in params:
|
|
93
|
+
dranges = get_date_ranges(o, params[o])
|
|
94
|
+
fill_cdg_usages(dsids, dranges)
|
|
95
|
+
|
|
96
|
+
PgLOG.pglog(None, PgLOG.LOGWRN|PgLOG.SNDEML) # send email out if any
|
|
97
|
+
|
|
98
|
+
sys.exit(0)
|
|
99
|
+
|
|
100
|
+
#
|
|
101
|
+
# connect to the gdex database esg-production
|
|
102
|
+
#
|
|
103
|
+
def gdex_dbname():
|
|
104
|
+
PgDBI.set_scname('esg-production', 'metrics', 'gateway-reader', None, 'sagedbprodalma.ucar.edu')
|
|
105
|
+
|
|
106
|
+
#
|
|
107
|
+
# get datasets
|
|
108
|
+
#
|
|
109
|
+
def get_dataset_ids(dsnames):
|
|
110
|
+
|
|
111
|
+
gdex_dbname()
|
|
112
|
+
dsids = []
|
|
113
|
+
tbname = 'metadata.dataset'
|
|
114
|
+
for dsname in dsnames:
|
|
115
|
+
if dsname not in DSIDS:
|
|
116
|
+
PgLOG.pglog(dsname + ": Unknown CDG dataset short name", PgLOG.LOGWRN)
|
|
117
|
+
continue
|
|
118
|
+
rdaid = DSIDS[dsname]
|
|
119
|
+
pgrec = PgDBI.pgget(tbname, 'id', "short_name = '{}'".format(dsname))
|
|
120
|
+
if not (pgrec and pgrec['id']): continue
|
|
121
|
+
dsid = pgrec['id']
|
|
122
|
+
if dsid in dsids: continue
|
|
123
|
+
dsids.append([dsid, rdaid])
|
|
124
|
+
recursive_dataset_ids(dsid, rdaid, dsids)
|
|
125
|
+
|
|
126
|
+
if not dsids: PgLOG.pglog("No Dataset Id identified to gather CDG metrics", PgLOG.LOGWRN)
|
|
127
|
+
|
|
128
|
+
return dsids
|
|
129
|
+
|
|
130
|
+
#
|
|
131
|
+
# get dsids recursivley
|
|
132
|
+
#
|
|
133
|
+
def recursive_dataset_ids(pdsid, rdaid, dsids):
|
|
134
|
+
|
|
135
|
+
tbname = 'metadata.dataset'
|
|
136
|
+
pgrecs = PgDBI.pgmget(tbname, 'id', "parent_dataset_id = '{}'".format(pdsid))
|
|
137
|
+
if not pgrecs: return
|
|
138
|
+
|
|
139
|
+
for dsid in pgrecs['id']:
|
|
140
|
+
if dsid in dsids: continue
|
|
141
|
+
dsids.append([dsid, rdaid])
|
|
142
|
+
recursive_dataset_ids(dsid, rdaid, dsids)
|
|
143
|
+
|
|
144
|
+
#
|
|
145
|
+
# get the date ranges for given condition
|
|
146
|
+
#
|
|
147
|
+
def get_date_ranges(option, inputs):
|
|
148
|
+
|
|
149
|
+
dranges = []
|
|
150
|
+
for input in inputs:
|
|
151
|
+
# get date range
|
|
152
|
+
dates = []
|
|
153
|
+
if option == 'N':
|
|
154
|
+
dates[1] = USAGE['CDATE']
|
|
155
|
+
dates[0] = PgUtil.adddate(USAGE['CDATE'], 0, 0, -int(input))
|
|
156
|
+
elif option == 'm':
|
|
157
|
+
tms = input.split('-')
|
|
158
|
+
dates[0] = PgUtil.fmtdate(int(tms[0]), int(tms[1]), 1)
|
|
159
|
+
dates[1] = PgUtil.enddate(dates[0])
|
|
160
|
+
else:
|
|
161
|
+
dates[0] = input + "-01-01"
|
|
162
|
+
dates[1] = input + "-12-31"
|
|
163
|
+
dranges.append(dates)
|
|
164
|
+
|
|
165
|
+
return dranges
|
|
166
|
+
|
|
167
|
+
#
|
|
168
|
+
# get file download records for given dsid
|
|
169
|
+
#
|
|
170
|
+
def get_dsid_records(dsid, dates):
|
|
171
|
+
|
|
172
|
+
gdex_dbname()
|
|
173
|
+
tbname = 'metrics.file_download'
|
|
174
|
+
fields = ('date_completed, remote_address, logical_file_size, logical_file_name, file_access_point_uri, user_agent_name, bytes_sent, '
|
|
175
|
+
'subset_file_size, range_request, dataset_file_size, dataset_file_name, dataset_file_file_access_point_uri')
|
|
176
|
+
cond = "dataset_id = '{}' AND completed = True AND date_completed BETWEEN '{}' AND '{}' ORDER BY date_completed".format(dsid, dates[0], dates[1])
|
|
177
|
+
pgrecs = PgDBI.pgmget(tbname, fields, cond)
|
|
178
|
+
PgDBI.dssdb_dbname()
|
|
179
|
+
|
|
180
|
+
return pgrecs
|
|
181
|
+
|
|
182
|
+
#
|
|
183
|
+
# Fill TDS usages into table dssdb.tdsusage from cdg access records
|
|
184
|
+
#
|
|
185
|
+
def fill_cdg_usages(dsids, dranges):
|
|
186
|
+
|
|
187
|
+
allcnt = awcnt = atcnt = 0
|
|
188
|
+
for dsid in dsids:
|
|
189
|
+
cdgid = dsid[0]
|
|
190
|
+
rdaid = dsid[1]
|
|
191
|
+
for dates in dranges:
|
|
192
|
+
pgrecs = get_dsid_records(cdgid, dates)
|
|
193
|
+
pgcnt = len(pgrecs['dataset_file_name']) if pgrecs else 0
|
|
194
|
+
if pgcnt == 0:
|
|
195
|
+
PgLOG.pglog("{}: No record found to gather CDG usage between {} and {}".format(rdaid, dates[0], dates[1]), PgLOG.LOGWRN)
|
|
196
|
+
continue
|
|
197
|
+
PgLOG.pglog("{}: gather {} records for CDG usage between {} and {}".format(rdaid, pgcnt, dates[0], dates[1]), PgLOG.LOGWRN)
|
|
198
|
+
tcnt = wcnt = 0
|
|
199
|
+
pwkey = wrec = cdate = None
|
|
200
|
+
trecs = {}
|
|
201
|
+
for i in range(pgcnt):
|
|
202
|
+
if (i+1)%20000 == 0:
|
|
203
|
+
PgLOG.pglog("{}/{}/{} CDG/TDS/WEB records processed to add".format(i, tcnt, wcnt), PgLOG.WARNLG)
|
|
204
|
+
|
|
205
|
+
pgrec = PgUtil.onerecord(i, pgrecs)
|
|
206
|
+
dsize = pgrec['bytes_sent']
|
|
207
|
+
if not dsize: continue
|
|
208
|
+
(year, quarter, date, time) = get_record_date_time(pgrec['date_completed'])
|
|
209
|
+
url = pgrec['dataset_file_file_access_point_uri']
|
|
210
|
+
if not url: url = pgrec['file_access_point_uri']
|
|
211
|
+
ip = pgrec['remote_address']
|
|
212
|
+
engine = pgrec['user_agent_name']
|
|
213
|
+
ms = re.search(r'^https://tds.ucar.edu/thredds/(\w+)/', url)
|
|
214
|
+
if ms:
|
|
215
|
+
# tds usage
|
|
216
|
+
method = ms.group(1)
|
|
217
|
+
if pgrec['subset_file_size']:
|
|
218
|
+
etype = 'S'
|
|
219
|
+
elif pgrec['range_request']:
|
|
220
|
+
etype = 'R'
|
|
221
|
+
else:
|
|
222
|
+
etype = 'F'
|
|
223
|
+
|
|
224
|
+
if date != cdate:
|
|
225
|
+
if trecs:
|
|
226
|
+
tcnt += add_tdsusage_records(year, trecs, cdate)
|
|
227
|
+
trecs = {}
|
|
228
|
+
cdate = date
|
|
229
|
+
tkey = "{}:{}:{}:{}".format(ip, rdaid, method, etype)
|
|
230
|
+
if tkey in trecs:
|
|
231
|
+
trecs[tkey]['size'] += dsize
|
|
232
|
+
trecs[tkey]['fcount'] += 1
|
|
233
|
+
else:
|
|
234
|
+
trecs[tkey] = {'ip' : ip, 'dsid' : rdaid, 'date' : cdate, 'time' : time, 'size' : dsize,
|
|
235
|
+
'fcount' : 1, 'method' : method, 'etype' : etype, 'engine' : engine}
|
|
236
|
+
else:
|
|
237
|
+
# web usage
|
|
238
|
+
wfile = pgrec['dataset_file_name']
|
|
239
|
+
if not wfile: wfile = pgrec['logic_file_name']
|
|
240
|
+
fsize = pgrec['dataset_file_size']
|
|
241
|
+
if not fsize: fsize = pgrec['logic_file_size']
|
|
242
|
+
method = 'CDP'
|
|
243
|
+
if pgrec['subset_file_size'] or pgrec['range_request'] or dsize < fsize:
|
|
244
|
+
wkey = "{}:{}:{}".format(ip, rdaid, wfile)
|
|
245
|
+
else:
|
|
246
|
+
wkey = None
|
|
247
|
+
|
|
248
|
+
if wrec:
|
|
249
|
+
if wkey == pwkey:
|
|
250
|
+
wrec['size'] += dsize
|
|
251
|
+
continue
|
|
252
|
+
wcnt += add_webfile_usage(year, wrec)
|
|
253
|
+
wrec = {'ip' : ip, 'dsid' : rdaid, 'wfile' : wfile, 'date' : date,
|
|
254
|
+
'time' : time, 'quarter' : quarter, 'size' : dsize,
|
|
255
|
+
'locflag' : 'C', 'method' : method}
|
|
256
|
+
pwkey = wkey
|
|
257
|
+
if not pwkey:
|
|
258
|
+
wcnt += add_webfile_usage(year, wrec)
|
|
259
|
+
wrec = None
|
|
260
|
+
|
|
261
|
+
if trecs: tcnt += add_tdsusage_records(year, trecs, cdate)
|
|
262
|
+
if wrec: wcnt += add_webfile_usage(year, wrec)
|
|
263
|
+
atcnt += tcnt
|
|
264
|
+
awcnt += wcnt
|
|
265
|
+
allcnt += pgcnt
|
|
266
|
+
|
|
267
|
+
PgLOG.pglog("{}/{} TDS/WEB usage records added for {} CDG entries at {}".format(atcnt, awcnt, allcnt, PgLOG.current_datetime()), PgLOG.LOGWRN)
|
|
268
|
+
|
|
269
|
+
|
|
270
|
+
def get_record_date_time(ctime):
|
|
271
|
+
|
|
272
|
+
ms = re.search(r'^(\d+)/(\w+)/(\d+) (\d+:\d+:\d+)(\.|$)', str(ctime))
|
|
273
|
+
if ms:
|
|
274
|
+
d = int(ms.group(1))
|
|
275
|
+
m = PgUtil.get_month(ms.group(2))
|
|
276
|
+
q = 1 + int((m-1)/3)
|
|
277
|
+
y = ms.group(3)
|
|
278
|
+
t = ms.group(4)
|
|
279
|
+
return (y, q, "{}-{:02}-{:02}".format(y, m, d), t)
|
|
280
|
+
else:
|
|
281
|
+
PgLOG.pglog("time: Invalid date format", PgLOG.LGEREX)
|
|
282
|
+
|
|
283
|
+
def add_tdsusage_records(year, records, date):
|
|
284
|
+
|
|
285
|
+
cnt = 0
|
|
286
|
+
for key in records:
|
|
287
|
+
record = records[key]
|
|
288
|
+
cond = "date = '{}' AND time = '{}' AND ip = '{}'".format(date, record['time'], record['ip'])
|
|
289
|
+
if PgDBI.pgget(USAGE['TDSTBL'], '', cond, PgLOG.LGEREX): continue
|
|
290
|
+
record['org_type'] = record['country'] = '-'
|
|
291
|
+
ipinfo = PgIPInfo.set_ipinfo(record['ip'])
|
|
292
|
+
if ipinfo:
|
|
293
|
+
record['org_type'] = ipinfo['org_type']
|
|
294
|
+
record['country'] = ipinfo['country']
|
|
295
|
+
record['email'] = 'unknown@' + ipinfo['hostname']
|
|
296
|
+
|
|
297
|
+
if add_tds_allusage(year, record):
|
|
298
|
+
cnt += PgDBI.pgadd(USAGE['TDSTBL'], record, PgLOG.LOGWRN)
|
|
299
|
+
|
|
300
|
+
PgLOG.pglog("{}: {} TDS usage records added at {}".format(date, cnt, PgLOG.current_datetime()), PgLOG.LOGWRN)
|
|
301
|
+
|
|
302
|
+
return cnt
|
|
303
|
+
|
|
304
|
+
|
|
305
|
+
def add_tds_allusage(year, pgrec):
|
|
306
|
+
|
|
307
|
+
record = {'method' : 'CDP', 'source' : 'C'}
|
|
308
|
+
|
|
309
|
+
for fld in pgrec:
|
|
310
|
+
if re.match(r'^(engine|method|etype|fcount)$', fld): continue
|
|
311
|
+
record[fld] = pgrec[fld]
|
|
312
|
+
|
|
313
|
+
return PgDBI.add_yearly_allusage(year, record)
|
|
314
|
+
|
|
315
|
+
#
|
|
316
|
+
# Fill usage of a single online data file into table dssdb.wusage of DSS PgSQL database
|
|
317
|
+
#
|
|
318
|
+
def add_webfile_usage(year, logrec):
|
|
319
|
+
|
|
320
|
+
pgrec = get_wfile_wid(logrec['dsid'], logrec['wfile'])
|
|
321
|
+
if not pgrec: return 0
|
|
322
|
+
|
|
323
|
+
table = "{}_{}".format(USAGE['WEBTBL'], year)
|
|
324
|
+
cond = "wid = {} AND method = '{}' AND date_read = '{}' AND time_read = '{}'".format(pgrec['wid'], logrec['method'], logrec['date'], logrec['time'])
|
|
325
|
+
if PgDBI.pgget(table, "", cond, PgLOG.LOGWRN): return 0
|
|
326
|
+
|
|
327
|
+
wurec = get_wuser_record(logrec['ip'], logrec['date'])
|
|
328
|
+
if not wurec: return 0
|
|
329
|
+
record = {'wid' : pgrec['wid'], 'dsid' : pgrec['dsid']}
|
|
330
|
+
record['wuid_read'] = wurec['wuid']
|
|
331
|
+
record['date_read'] = logrec['date']
|
|
332
|
+
record['time_read'] = logrec['time']
|
|
333
|
+
record['size_read'] = logrec['size']
|
|
334
|
+
record['method'] = logrec['method']
|
|
335
|
+
record['locflag'] = logrec['locflag']
|
|
336
|
+
record['ip'] = logrec['ip']
|
|
337
|
+
record['quarter'] = logrec['quarter']
|
|
338
|
+
|
|
339
|
+
if add_web_allusage(year, logrec, wurec):
|
|
340
|
+
return PgDBI.add_yearly_wusage(year, record)
|
|
341
|
+
else:
|
|
342
|
+
return 0
|
|
343
|
+
|
|
344
|
+
def add_web_allusage(year, logrec, wurec):
|
|
345
|
+
|
|
346
|
+
pgrec = {'email' : wurec['email'], 'org_type' : wurec['org_type'], 'country' : wurec['country']}
|
|
347
|
+
pgrec['dsid'] = logrec['dsid']
|
|
348
|
+
pgrec['date'] = logrec['date']
|
|
349
|
+
pgrec['quarter'] = logrec['quarter']
|
|
350
|
+
pgrec['time'] = logrec['time']
|
|
351
|
+
pgrec['size'] = logrec['size']
|
|
352
|
+
pgrec['method'] = logrec['method']
|
|
353
|
+
pgrec['ip'] = logrec['ip']
|
|
354
|
+
pgrec['source'] = 'C'
|
|
355
|
+
return PgDBI.add_yearly_allusage(year, pgrec)
|
|
356
|
+
|
|
357
|
+
#
|
|
358
|
+
# return wfile.wid upon success, 0 otherwise
|
|
359
|
+
#
|
|
360
|
+
def get_wfile_wid(dsid, wfile):
|
|
361
|
+
|
|
362
|
+
wfcond = "wfile = '{}'".format(wfile)
|
|
363
|
+
pgrec = PgSplit.pgget_wfile(dsid, "*", wfcond)
|
|
364
|
+
if pgrec:
|
|
365
|
+
pgrec['dsid'] = dsid
|
|
366
|
+
else:
|
|
367
|
+
pgrec = PgDBI.pgget("wfile_delete", "*", "{} AND dsid = '{}'".format(wfcond, dsid))
|
|
368
|
+
if not pgrec:
|
|
369
|
+
pgrec = PgDBI.pgget("wmove", "wid, dsid", wfcond)
|
|
370
|
+
if pgrec:
|
|
371
|
+
pgrec = PgSplit.pgget_wfile(pgrec['dsid'], "*", "wid = {}".format(pgrec['wid']))
|
|
372
|
+
if pgrec: pgrec['dsid'] = dsid
|
|
373
|
+
|
|
374
|
+
return pgrec
|
|
375
|
+
|
|
376
|
+
# return wuser record upon success, None otherwise
|
|
377
|
+
def get_wuser_record(ip, date):
|
|
378
|
+
|
|
379
|
+
ipinfo = PgIPInfo.set_ipinfo(ip)
|
|
380
|
+
if not ipinfo: return None
|
|
381
|
+
|
|
382
|
+
record = {'org_type' : ipinfo['org_type'], 'country' : ipinfo['country']}
|
|
383
|
+
email = 'unknown@' + ipinfo['hostname']
|
|
384
|
+
emcond = "email = '{}'".format(email)
|
|
385
|
+
flds = 'wuid, email, org_type, country, start_date'
|
|
386
|
+
pgrec = PgDBI.pgget("wuser", flds, emcond, PgLOG.LOGERR)
|
|
387
|
+
if pgrec:
|
|
388
|
+
if PgUtil.diffdate(pgrec['start_date'], date) > 0:
|
|
389
|
+
pgrec['start_date'] = record['start_date'] = date
|
|
390
|
+
PgDBI.pgupdt('wuser', record, emcond)
|
|
391
|
+
return pgrec
|
|
392
|
+
|
|
393
|
+
# now add one in
|
|
394
|
+
record['email'] = email
|
|
395
|
+
record['stat_flag'] = 'A'
|
|
396
|
+
record['start_date'] = date
|
|
397
|
+
wuid = PgDBI.pgadd("wuser", record, PgLOG.LOGERR|PgLOG.AUTOID)
|
|
398
|
+
if wuid:
|
|
399
|
+
record['wuid'] = wuid
|
|
400
|
+
PgLOG.pglog("{} Added as wuid({})".format(email, wuid), PgLOG.LGWNEM)
|
|
401
|
+
return record
|
|
402
|
+
|
|
403
|
+
return None
|
|
404
|
+
|
|
405
|
+
#
|
|
406
|
+
# call main() to start program
|
|
407
|
+
#
|
|
408
|
+
if __name__ == "__main__": main()
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
|
|
2
|
+
Retrieves CDG usage information from GDEX metrics database to
|
|
3
|
+
fill table 'tdsusage' and 'webusage' in PostgreSQL database 'rdadb'.
|
|
4
|
+
|
|
5
|
+
Usage: fillcdgusage [-b] -s DatasetShortNames [-m MonthList] [-N NumberDays] [-y YearList]
|
|
6
|
+
|
|
7
|
+
select option -s and one of the options, -m, -N or -y each time to run
|
|
8
|
+
this application.
|
|
9
|
+
|
|
10
|
+
- Option -b, log process information into logfile only;
|
|
11
|
+
|
|
12
|
+
- Option -s, retrieve usage info for given dataset short names;
|
|
13
|
+
|
|
14
|
+
- Option -m, retrieve usage info in given months (YYYY-MM);
|
|
15
|
+
|
|
16
|
+
- Option -N, retrieve usage info in recent Number of days;
|
|
17
|
+
|
|
18
|
+
- Option -y, retrieve usage info in given years (YYYY).
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
|
|
2
2
|
Retrieves usage information from GLlobus Server logs under directory
|
|
3
3
|
/gpfs/fs1/collections/rda/work/logs/gridftp/ to fill table 'wusage' in
|
|
4
|
-
database '
|
|
4
|
+
database 'rdadb'.
|
|
5
5
|
|
|
6
6
|
Usage: fillglobususage [-b] [-d LogFileDates] [-N NumberDay] [-p BeginDate [Enddate]]
|
|
7
7
|
|
|
@@ -2,13 +2,15 @@ rda_python_metrics/PgIPInfo.py,sha256=NJe5hRwxuflH_CZBZmFCgzYU6XFZXP44PoSbqbpPOw
|
|
|
2
2
|
rda_python_metrics/PgView.py,sha256=r6otb3DjfiaQJdg0z8bZQAOlhr4JnrXJzp9wgWh_8qQ,24369
|
|
3
3
|
rda_python_metrics/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
|
|
4
4
|
rda_python_metrics/fillawsusage.py,sha256=dpI3-EFINJECdCSUOm37A97CJNIxOS2nYwA0fB0IpHE,9055
|
|
5
|
-
rda_python_metrics/fillawsusage.usg,sha256=
|
|
5
|
+
rda_python_metrics/fillawsusage.usg,sha256=pD_nYTfg1so9nvVEyPSWdgKvb9gWdtfHJAs3RsT2MMU,609
|
|
6
|
+
rda_python_metrics/fillcdgusage.py,sha256=9sDFLAhzndjAVCahIVHofQ4YeslwW7MGIWEDSAQYdqY,13816
|
|
7
|
+
rda_python_metrics/fillcdgusage.usg,sha256=5lkd4Zdi72nQcha-JtbaLnxl66V4wBDykKwQtUOtMrw,667
|
|
6
8
|
rda_python_metrics/fillcodusage.py,sha256=Hp3VrlIqHBWRI6Zjbi0nxWZPNvPrKsGBSZ1L9qA9_y0,8006
|
|
7
|
-
rda_python_metrics/fillcodusage.usg,sha256=
|
|
9
|
+
rda_python_metrics/fillcodusage.usg,sha256=3B5IkQ4uwylqY8uEfUdnZ_MXqhYudeylMp5ulhUGXH8,678
|
|
8
10
|
rda_python_metrics/fillcountry.py,sha256=7i5LNi3scRoyRCT6t7aeNTGKOpxzJ2mA9tnvUqje2AU,2314
|
|
9
11
|
rda_python_metrics/fillendtime.py,sha256=skZttlpoY19g0dGwqGQI8t_1YPPTPEXwg3EfNlfL90I,2533
|
|
10
12
|
rda_python_metrics/fillglobususage.py,sha256=-cvIipaFju75aw9axHkx6JIe9HWYwQOD8-0blQaxxUM,9442
|
|
11
|
-
rda_python_metrics/fillglobususage.usg,sha256=
|
|
13
|
+
rda_python_metrics/fillglobususage.usg,sha256=1GgmCP22IQZdADwL5Mmkz3v8Ws-G7U3teQ1AxRJfV_4,637
|
|
12
14
|
rda_python_metrics/fillipinfo.py,sha256=xIVJ6nDvVvMOjb7s_6-YDLVRBC09pDFugnjB3Nrmqus,5641
|
|
13
15
|
rda_python_metrics/fillipinfo.usg,sha256=taITqZa6GL0-wxXcMEdMU3ZlQbJ1CsmFclTvXpG5TLg,644
|
|
14
16
|
rda_python_metrics/filloneorder.py,sha256=H4XIskpViaiKgc9StvS4cQT_LpM3VYLCsuuqCm4UqmA,5425
|
|
@@ -42,9 +44,9 @@ rda_python_metrics/viewwebfile.py,sha256=HSMNkQQawonu6W3blV7g9UbJuNy9VAOn9COqgmj
|
|
|
42
44
|
rda_python_metrics/viewwebfile.usg,sha256=lTNi8Yu8BUJuExEDJX-vsJyWUSUIQTS-DiiBEVFo33s,10054
|
|
43
45
|
rda_python_metrics/viewwebusage.py,sha256=jhoHuRPVNtp7Lbjv0l-Jy_vp2p2nWQC7IVZ0P4JUJ4I,16657
|
|
44
46
|
rda_python_metrics/viewwebusage.usg,sha256=IsT72v22xyZf7ng_IodVs0dLTsH1Q4BtFvT-gs0-xJY,9946
|
|
45
|
-
rda_python_metrics-1.0.
|
|
46
|
-
rda_python_metrics-1.0.
|
|
47
|
-
rda_python_metrics-1.0.
|
|
48
|
-
rda_python_metrics-1.0.
|
|
49
|
-
rda_python_metrics-1.0.
|
|
50
|
-
rda_python_metrics-1.0.
|
|
47
|
+
rda_python_metrics-1.0.9.dist-info/licenses/LICENSE,sha256=1dck4EAQwv8QweDWCXDx-4Or0S8YwiCstaso_H57Pno,1097
|
|
48
|
+
rda_python_metrics-1.0.9.dist-info/METADATA,sha256=k5xThxsVVIf1-uVtr5NDQ0VdUPKjZxeNlgSNysXLT24,735
|
|
49
|
+
rda_python_metrics-1.0.9.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
|
|
50
|
+
rda_python_metrics-1.0.9.dist-info/entry_points.txt,sha256=YfFLzlE3rdufSV471VsDnfYptnt1lR08aSrxPXlKqlY,1185
|
|
51
|
+
rda_python_metrics-1.0.9.dist-info/top_level.txt,sha256=aoBgbR_o70TP0QmMW0U6inRHYtfKld47OBmnWnLnDOs,19
|
|
52
|
+
rda_python_metrics-1.0.9.dist-info/RECORD,,
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
[console_scripts]
|
|
2
2
|
fillawsusage = rda_python_metrics.fillaswusage:main
|
|
3
|
+
fillcdgusage = rda_python_metrics.fillcdgusage:main
|
|
3
4
|
fillcodusage = rda_python_metrics.fillcodusage:main
|
|
4
5
|
fillcountry = rda_python_metrics.fillcountry:main
|
|
5
6
|
fillendtime = rda_python_metrics.fillendtime:main
|
|
File without changes
|
|
File without changes
|
|
File without changes
|