rda-python-metrics 1.0.9__py3-none-any.whl → 1.0.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of rda-python-metrics might be problematic. Click here for more details.
- rda_python_metrics/fillcdgusage.py +82 -61
- {rda_python_metrics-1.0.9.dist-info → rda_python_metrics-1.0.10.dist-info}/METADATA +1 -1
- {rda_python_metrics-1.0.9.dist-info → rda_python_metrics-1.0.10.dist-info}/RECORD +7 -7
- {rda_python_metrics-1.0.9.dist-info → rda_python_metrics-1.0.10.dist-info}/WHEEL +0 -0
- {rda_python_metrics-1.0.9.dist-info → rda_python_metrics-1.0.10.dist-info}/entry_points.txt +0 -0
- {rda_python_metrics-1.0.9.dist-info → rda_python_metrics-1.0.10.dist-info}/licenses/LICENSE +0 -0
- {rda_python_metrics-1.0.9.dist-info → rda_python_metrics-1.0.10.dist-info}/top_level.txt +0 -0
|
@@ -54,6 +54,10 @@ DSIDS = {
|
|
|
54
54
|
'ucar.cgd.ccsm4.pliomip2' : ['d651037']
|
|
55
55
|
}
|
|
56
56
|
|
|
57
|
+
ALLIDS = list(DSIDS.keys())
|
|
58
|
+
|
|
59
|
+
WFILES = {}
|
|
60
|
+
WUSERS = {}
|
|
57
61
|
|
|
58
62
|
#
|
|
59
63
|
# main function to run this program
|
|
@@ -86,13 +90,9 @@ def main():
|
|
|
86
90
|
|
|
87
91
|
|
|
88
92
|
PgLOG.cmdlog("fillcdgusage {}".format(' '.join(argv)))
|
|
93
|
+
dranges = get_date_ranges(params)
|
|
89
94
|
dsids = get_dataset_ids(params['s'])
|
|
90
|
-
if dsids:
|
|
91
|
-
del params['s']
|
|
92
|
-
for o in params:
|
|
93
|
-
dranges = get_date_ranges(o, params[o])
|
|
94
|
-
fill_cdg_usages(dsids, dranges)
|
|
95
|
-
|
|
95
|
+
if dranges and dsids: fill_cdg_usages(dsids, dranges)
|
|
96
96
|
PgLOG.pglog(None, PgLOG.LOGWRN|PgLOG.SNDEML) # send email out if any
|
|
97
97
|
|
|
98
98
|
sys.exit(0)
|
|
@@ -112,6 +112,7 @@ def get_dataset_ids(dsnames):
|
|
|
112
112
|
dsids = []
|
|
113
113
|
tbname = 'metadata.dataset'
|
|
114
114
|
for dsname in dsnames:
|
|
115
|
+
if re.match(r'^all$', dsname, re.I): return ALLIDS
|
|
115
116
|
if dsname not in DSIDS:
|
|
116
117
|
PgLOG.pglog(dsname + ": Unknown CDG dataset short name", PgLOG.LOGWRN)
|
|
117
118
|
continue
|
|
@@ -144,23 +145,24 @@ def recursive_dataset_ids(pdsid, rdaid, dsids):
|
|
|
144
145
|
#
|
|
145
146
|
# get the date ranges for given condition
|
|
146
147
|
#
|
|
147
|
-
def get_date_ranges(
|
|
148
|
+
def get_date_ranges(inputs):
|
|
148
149
|
|
|
149
150
|
dranges = []
|
|
150
|
-
for
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
151
|
+
for opt in inputs:
|
|
152
|
+
for input in inputs[opt]:
|
|
153
|
+
# get date range
|
|
154
|
+
dates = []
|
|
155
|
+
if opt == 'N':
|
|
156
|
+
dates.append(PgUtil.adddate(USAGE['CDATE'], 0, 0, -int(input)))
|
|
157
|
+
dates.append(USAGE['CDATE'])
|
|
158
|
+
elif opt == 'm':
|
|
159
|
+
tms = input.split('-')
|
|
160
|
+
dates.append(PgUtil.fmtdate(int(tms[0]), int(tms[1]), 1))
|
|
161
|
+
dates.append(PgUtil.enddate(dates[0], 0, 'M'))
|
|
162
|
+
elif opt == 'Y':
|
|
163
|
+
dates.append(input + "-01-01")
|
|
164
|
+
dates.append(input + "-12-31")
|
|
165
|
+
dranges.append(dates)
|
|
164
166
|
|
|
165
167
|
return dranges
|
|
166
168
|
|
|
@@ -174,6 +176,7 @@ def get_dsid_records(dsid, dates):
|
|
|
174
176
|
fields = ('date_completed, remote_address, logical_file_size, logical_file_name, file_access_point_uri, user_agent_name, bytes_sent, '
|
|
175
177
|
'subset_file_size, range_request, dataset_file_size, dataset_file_name, dataset_file_file_access_point_uri')
|
|
176
178
|
cond = "dataset_id = '{}' AND completed = True AND date_completed BETWEEN '{}' AND '{}' ORDER BY date_completed".format(dsid, dates[0], dates[1])
|
|
179
|
+
PgLOG.pglog("{}: Query CDG usage between {} and {} at {}".format(dsid, dates[0], dates[1], PgLOG.current_datetime()), PgLOG.LOGWRN)
|
|
177
180
|
pgrecs = PgDBI.pgmget(tbname, fields, cond)
|
|
178
181
|
PgDBI.dssdb_dbname()
|
|
179
182
|
|
|
@@ -184,17 +187,19 @@ def get_dsid_records(dsid, dates):
|
|
|
184
187
|
#
|
|
185
188
|
def fill_cdg_usages(dsids, dranges):
|
|
186
189
|
|
|
187
|
-
allcnt = awcnt = atcnt = 0
|
|
188
|
-
for
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
190
|
+
allcnt = awcnt = atcnt = lcnt = 0
|
|
191
|
+
for dates in dranges:
|
|
192
|
+
for dsid in dsids:
|
|
193
|
+
lcnt += 1
|
|
194
|
+
cdgid = dsid[0]
|
|
195
|
+
rdaid = dsid[1]
|
|
196
|
+
srdaid = '|'.join(rdaid)
|
|
192
197
|
pgrecs = get_dsid_records(cdgid, dates)
|
|
193
198
|
pgcnt = len(pgrecs['dataset_file_name']) if pgrecs else 0
|
|
194
199
|
if pgcnt == 0:
|
|
195
|
-
PgLOG.pglog("{}: No record found to gather CDG usage between {} and {}".format(
|
|
200
|
+
PgLOG.pglog("{}: No record found to gather CDG usage between {} and {}".format(srdaid, dates[0], dates[1]), PgLOG.LOGWRN)
|
|
196
201
|
continue
|
|
197
|
-
PgLOG.pglog("{}:
|
|
202
|
+
PgLOG.pglog("{}: Process {} records for CDG usage at {}".format(srdaid, pgcnt, PgLOG.current_datetime()), PgLOG.LOGWRN)
|
|
198
203
|
tcnt = wcnt = 0
|
|
199
204
|
pwkey = wrec = cdate = None
|
|
200
205
|
trecs = {}
|
|
@@ -210,6 +215,10 @@ def fill_cdg_usages(dsids, dranges):
|
|
|
210
215
|
if not url: url = pgrec['file_access_point_uri']
|
|
211
216
|
ip = pgrec['remote_address']
|
|
212
217
|
engine = pgrec['user_agent_name']
|
|
218
|
+
wfile = pgrec['dataset_file_name']
|
|
219
|
+
if not wfile: wfile = pgrec['logic_file_name']
|
|
220
|
+
wfrec = get_wfile_record(rdaid, wfile)
|
|
221
|
+
if not wfrec: continue
|
|
213
222
|
ms = re.search(r'^https://tds.ucar.edu/thredds/(\w+)/', url)
|
|
214
223
|
if ms:
|
|
215
224
|
# tds usage
|
|
@@ -231,12 +240,14 @@ def fill_cdg_usages(dsids, dranges):
|
|
|
231
240
|
trecs[tkey]['size'] += dsize
|
|
232
241
|
trecs[tkey]['fcount'] += 1
|
|
233
242
|
else:
|
|
234
|
-
|
|
235
|
-
|
|
243
|
+
wurec = get_wuser_record(ip, cdate, skipwuid = True)
|
|
244
|
+
if not wurec: return 0
|
|
245
|
+
trecs[tkey] = {'ip' : ip, 'dsid' : wfrec['dsid'], 'date' : cdate, 'time' : time, 'size' : dsize,
|
|
246
|
+
'fcount' : 1, 'method' : method, 'etype' : etype, 'engine' : engine,
|
|
247
|
+
'org_type' : wurec['org_type'], 'country' : wurec['country'],
|
|
248
|
+
'email' : wurec['email']}
|
|
236
249
|
else:
|
|
237
250
|
# web usage
|
|
238
|
-
wfile = pgrec['dataset_file_name']
|
|
239
|
-
if not wfile: wfile = pgrec['logic_file_name']
|
|
240
251
|
fsize = pgrec['dataset_file_size']
|
|
241
252
|
if not fsize: fsize = pgrec['logic_file_size']
|
|
242
253
|
method = 'CDP'
|
|
@@ -250,7 +261,9 @@ def fill_cdg_usages(dsids, dranges):
|
|
|
250
261
|
wrec['size'] += dsize
|
|
251
262
|
continue
|
|
252
263
|
wcnt += add_webfile_usage(year, wrec)
|
|
253
|
-
|
|
264
|
+
wurec = get_wuser_record(ip, cdate, skipwuid = False)
|
|
265
|
+
if not wurec: return 0
|
|
266
|
+
wrec = {'ip' : ip, 'dsid' : wfrec['dsid'], 'wid' : wfrec['wid'], 'date' : date,
|
|
254
267
|
'time' : time, 'quarter' : quarter, 'size' : dsize,
|
|
255
268
|
'locflag' : 'C', 'method' : method}
|
|
256
269
|
pwkey = wkey
|
|
@@ -263,8 +276,9 @@ def fill_cdg_usages(dsids, dranges):
|
|
|
263
276
|
atcnt += tcnt
|
|
264
277
|
awcnt += wcnt
|
|
265
278
|
allcnt += pgcnt
|
|
279
|
+
PgLOG.pglog("{}/{} TDS/WEB usage records added for {} CDG entries at {}".format(atcnt, awcnt, allcnt, PgLOG.current_datetime()), PgLOG.LOGWRN)
|
|
266
280
|
|
|
267
|
-
PgLOG.pglog("{}/{} TDS/WEB usage records added for {} CDG entries at {}".format(atcnt, awcnt, allcnt, PgLOG.current_datetime()), PgLOG.LOGWRN)
|
|
281
|
+
if lcnt > 1: PgLOG.pglog("{}/{} TDS/WEB usage records added for {} CDG entries at {}".format(atcnt, awcnt, allcnt, PgLOG.current_datetime()), PgLOG.LOGWRN)
|
|
268
282
|
|
|
269
283
|
|
|
270
284
|
def get_record_date_time(ctime):
|
|
@@ -287,12 +301,6 @@ def add_tdsusage_records(year, records, date):
|
|
|
287
301
|
record = records[key]
|
|
288
302
|
cond = "date = '{}' AND time = '{}' AND ip = '{}'".format(date, record['time'], record['ip'])
|
|
289
303
|
if PgDBI.pgget(USAGE['TDSTBL'], '', cond, PgLOG.LGEREX): continue
|
|
290
|
-
record['org_type'] = record['country'] = '-'
|
|
291
|
-
ipinfo = PgIPInfo.set_ipinfo(record['ip'])
|
|
292
|
-
if ipinfo:
|
|
293
|
-
record['org_type'] = ipinfo['org_type']
|
|
294
|
-
record['country'] = ipinfo['country']
|
|
295
|
-
record['email'] = 'unknown@' + ipinfo['hostname']
|
|
296
304
|
|
|
297
305
|
if add_tds_allusage(year, record):
|
|
298
306
|
cnt += PgDBI.pgadd(USAGE['TDSTBL'], record, PgLOG.LOGWRN)
|
|
@@ -301,7 +309,6 @@ def add_tdsusage_records(year, records, date):
|
|
|
301
309
|
|
|
302
310
|
return cnt
|
|
303
311
|
|
|
304
|
-
|
|
305
312
|
def add_tds_allusage(year, pgrec):
|
|
306
313
|
|
|
307
314
|
record = {'method' : 'CDP', 'source' : 'C'}
|
|
@@ -315,18 +322,13 @@ def add_tds_allusage(year, pgrec):
|
|
|
315
322
|
#
|
|
316
323
|
# Fill usage of a single online data file into table dssdb.wusage of DSS PgSQL database
|
|
317
324
|
#
|
|
318
|
-
def add_webfile_usage(year, logrec):
|
|
319
|
-
|
|
320
|
-
pgrec = get_wfile_wid(logrec['dsid'], logrec['wfile'])
|
|
321
|
-
if not pgrec: return 0
|
|
325
|
+
def add_webfile_usage(year, logrec, wurec):
|
|
322
326
|
|
|
323
327
|
table = "{}_{}".format(USAGE['WEBTBL'], year)
|
|
324
|
-
cond = "wid = {} AND method = '{}' AND date_read = '{}' AND time_read = '{}'".format(
|
|
328
|
+
cond = "wid = {} AND method = '{}' AND date_read = '{}' AND time_read = '{}'".format(logrec['wid'], logrec['method'], logrec['date'], logrec['time'])
|
|
325
329
|
if PgDBI.pgget(table, "", cond, PgLOG.LOGWRN): return 0
|
|
326
330
|
|
|
327
|
-
|
|
328
|
-
if not wurec: return 0
|
|
329
|
-
record = {'wid' : pgrec['wid'], 'dsid' : pgrec['dsid']}
|
|
331
|
+
record = {'wid' : logrec['wid'], 'dsid' : logrec['dsid']}
|
|
330
332
|
record['wuid_read'] = wurec['wuid']
|
|
331
333
|
record['date_read'] = logrec['date']
|
|
332
334
|
record['time_read'] = logrec['time']
|
|
@@ -357,30 +359,47 @@ def add_web_allusage(year, logrec, wurec):
|
|
|
357
359
|
#
|
|
358
360
|
# return wfile.wid upon success, 0 otherwise
|
|
359
361
|
#
|
|
360
|
-
def
|
|
362
|
+
def get_wfile_record(dsids, wfile):
|
|
361
363
|
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
364
|
+
for dsid in dsids:
|
|
365
|
+
wkey = "{}{}".format(dsid, wfile)
|
|
366
|
+
if wkey in WFILES: return WFILES[wkey]
|
|
367
|
+
wfcond = "wfile like '%{}'".format(wfile)
|
|
368
|
+
pgrec = None
|
|
369
|
+
for dsid in dsids:
|
|
370
|
+
pgrec = PgSplit.pgget_wfile(dsid, "wid", wfcond)
|
|
371
|
+
if pgrec:
|
|
372
|
+
pgrec['dsid'] = dsid
|
|
373
|
+
wkey = "{}{}".format(dsid, wfile)
|
|
374
|
+
WFILES[wkey] = pgrec
|
|
375
|
+
return pgrec
|
|
376
|
+
|
|
377
|
+
for dsid in dsids:
|
|
378
|
+
pgrec = PgDBI.pgget("wfile_delete", "wid, dsid", "{} AND dsid = '{}'".format(wfcond, dsid))
|
|
368
379
|
if not pgrec:
|
|
369
|
-
|
|
370
|
-
if
|
|
371
|
-
pgrec = PgSplit.pgget_wfile(
|
|
372
|
-
if pgrec: pgrec['dsid'] = dsid
|
|
380
|
+
mvrec = PgDBI.pgget("wmove", "wid, dsid", wfcond)
|
|
381
|
+
if mvrec:
|
|
382
|
+
pgrec = PgSplit.pgget_wfile(mvrec['dsid'], "wid", "wid = {}".format(pgrec['wid']))
|
|
383
|
+
if pgrec: pgrec['dsid'] = mvrec['dsid']
|
|
373
384
|
|
|
385
|
+
if pgrec: WFILES[wkey] = pgrec
|
|
374
386
|
return pgrec
|
|
375
387
|
|
|
376
388
|
# return wuser record upon success, None otherwise
|
|
377
|
-
def get_wuser_record(ip, date):
|
|
389
|
+
def get_wuser_record(ip, date, skipwuid = True):
|
|
390
|
+
|
|
391
|
+
if ip in WUSERS: return WUSERS[ip]
|
|
378
392
|
|
|
379
393
|
ipinfo = PgIPInfo.set_ipinfo(ip)
|
|
380
394
|
if not ipinfo: return None
|
|
381
395
|
|
|
382
396
|
record = {'org_type' : ipinfo['org_type'], 'country' : ipinfo['country']}
|
|
383
397
|
email = 'unknown@' + ipinfo['hostname']
|
|
398
|
+
if skipwuid:
|
|
399
|
+
record['email'] = email
|
|
400
|
+
WUSERS[ip] = record
|
|
401
|
+
return record
|
|
402
|
+
|
|
384
403
|
emcond = "email = '{}'".format(email)
|
|
385
404
|
flds = 'wuid, email, org_type, country, start_date'
|
|
386
405
|
pgrec = PgDBI.pgget("wuser", flds, emcond, PgLOG.LOGERR)
|
|
@@ -388,6 +407,7 @@ def get_wuser_record(ip, date):
|
|
|
388
407
|
if PgUtil.diffdate(pgrec['start_date'], date) > 0:
|
|
389
408
|
pgrec['start_date'] = record['start_date'] = date
|
|
390
409
|
PgDBI.pgupdt('wuser', record, emcond)
|
|
410
|
+
WUSERS[ip] = pgrec
|
|
391
411
|
return pgrec
|
|
392
412
|
|
|
393
413
|
# now add one in
|
|
@@ -398,6 +418,7 @@ def get_wuser_record(ip, date):
|
|
|
398
418
|
if wuid:
|
|
399
419
|
record['wuid'] = wuid
|
|
400
420
|
PgLOG.pglog("{} Added as wuid({})".format(email, wuid), PgLOG.LGWNEM)
|
|
421
|
+
WUSERS[ip] = record
|
|
401
422
|
return record
|
|
402
423
|
|
|
403
424
|
return None
|
|
@@ -3,7 +3,7 @@ rda_python_metrics/PgView.py,sha256=r6otb3DjfiaQJdg0z8bZQAOlhr4JnrXJzp9wgWh_8qQ,
|
|
|
3
3
|
rda_python_metrics/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
|
|
4
4
|
rda_python_metrics/fillawsusage.py,sha256=dpI3-EFINJECdCSUOm37A97CJNIxOS2nYwA0fB0IpHE,9055
|
|
5
5
|
rda_python_metrics/fillawsusage.usg,sha256=pD_nYTfg1so9nvVEyPSWdgKvb9gWdtfHJAs3RsT2MMU,609
|
|
6
|
-
rda_python_metrics/fillcdgusage.py,sha256=
|
|
6
|
+
rda_python_metrics/fillcdgusage.py,sha256=m4zXnx5MaCP-GfNVDRfTOKMlVrcWaFNqwtJeGW7W8Hg,14892
|
|
7
7
|
rda_python_metrics/fillcdgusage.usg,sha256=5lkd4Zdi72nQcha-JtbaLnxl66V4wBDykKwQtUOtMrw,667
|
|
8
8
|
rda_python_metrics/fillcodusage.py,sha256=Hp3VrlIqHBWRI6Zjbi0nxWZPNvPrKsGBSZ1L9qA9_y0,8006
|
|
9
9
|
rda_python_metrics/fillcodusage.usg,sha256=3B5IkQ4uwylqY8uEfUdnZ_MXqhYudeylMp5ulhUGXH8,678
|
|
@@ -44,9 +44,9 @@ rda_python_metrics/viewwebfile.py,sha256=HSMNkQQawonu6W3blV7g9UbJuNy9VAOn9COqgmj
|
|
|
44
44
|
rda_python_metrics/viewwebfile.usg,sha256=lTNi8Yu8BUJuExEDJX-vsJyWUSUIQTS-DiiBEVFo33s,10054
|
|
45
45
|
rda_python_metrics/viewwebusage.py,sha256=jhoHuRPVNtp7Lbjv0l-Jy_vp2p2nWQC7IVZ0P4JUJ4I,16657
|
|
46
46
|
rda_python_metrics/viewwebusage.usg,sha256=IsT72v22xyZf7ng_IodVs0dLTsH1Q4BtFvT-gs0-xJY,9946
|
|
47
|
-
rda_python_metrics-1.0.
|
|
48
|
-
rda_python_metrics-1.0.
|
|
49
|
-
rda_python_metrics-1.0.
|
|
50
|
-
rda_python_metrics-1.0.
|
|
51
|
-
rda_python_metrics-1.0.
|
|
52
|
-
rda_python_metrics-1.0.
|
|
47
|
+
rda_python_metrics-1.0.10.dist-info/licenses/LICENSE,sha256=1dck4EAQwv8QweDWCXDx-4Or0S8YwiCstaso_H57Pno,1097
|
|
48
|
+
rda_python_metrics-1.0.10.dist-info/METADATA,sha256=LUru8PysvjXZNqq6kZzFGpMBReGYJtLwXpqFKB5X6hQ,736
|
|
49
|
+
rda_python_metrics-1.0.10.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
|
|
50
|
+
rda_python_metrics-1.0.10.dist-info/entry_points.txt,sha256=YfFLzlE3rdufSV471VsDnfYptnt1lR08aSrxPXlKqlY,1185
|
|
51
|
+
rda_python_metrics-1.0.10.dist-info/top_level.txt,sha256=aoBgbR_o70TP0QmMW0U6inRHYtfKld47OBmnWnLnDOs,19
|
|
52
|
+
rda_python_metrics-1.0.10.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|