rda-python-metrics 1.0.10__py3-none-any.whl → 1.0.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of rda-python-metrics might be problematic. Click here for more details.
- rda_python_metrics/fillcdgusage.py +66 -48
- {rda_python_metrics-1.0.10.dist-info → rda_python_metrics-1.0.11.dist-info}/METADATA +1 -1
- {rda_python_metrics-1.0.10.dist-info → rda_python_metrics-1.0.11.dist-info}/RECORD +7 -7
- {rda_python_metrics-1.0.10.dist-info → rda_python_metrics-1.0.11.dist-info}/WHEEL +0 -0
- {rda_python_metrics-1.0.10.dist-info → rda_python_metrics-1.0.11.dist-info}/entry_points.txt +0 -0
- {rda_python_metrics-1.0.10.dist-info → rda_python_metrics-1.0.11.dist-info}/licenses/LICENSE +0 -0
- {rda_python_metrics-1.0.10.dist-info → rda_python_metrics-1.0.11.dist-info}/top_level.txt +0 -0
|
@@ -25,7 +25,7 @@ from . import PgIPInfo
|
|
|
25
25
|
|
|
26
26
|
USAGE = {
|
|
27
27
|
'TDSTBL' : "tdsusage",
|
|
28
|
-
'WEBTBL' : "
|
|
28
|
+
'WEBTBL' : "wusage",
|
|
29
29
|
'CDATE' : PgUtil.curdate(),
|
|
30
30
|
}
|
|
31
31
|
|
|
@@ -112,35 +112,35 @@ def get_dataset_ids(dsnames):
|
|
|
112
112
|
dsids = []
|
|
113
113
|
tbname = 'metadata.dataset'
|
|
114
114
|
for dsname in dsnames:
|
|
115
|
-
if re.match(r'^all$', dsname, re.I): return ALLIDS
|
|
115
|
+
if re.match(r'^all$', dsname, re.I): return get_dataset_ids(ALLIDS)
|
|
116
116
|
if dsname not in DSIDS:
|
|
117
117
|
PgLOG.pglog(dsname + ": Unknown CDG dataset short name", PgLOG.LOGWRN)
|
|
118
118
|
continue
|
|
119
|
-
rdaid = DSIDS[dsname]
|
|
120
119
|
pgrec = PgDBI.pgget(tbname, 'id', "short_name = '{}'".format(dsname))
|
|
121
120
|
if not (pgrec and pgrec['id']): continue
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
recursive_dataset_ids(
|
|
121
|
+
rdaids = DSIDS[dsname]
|
|
122
|
+
cdgid = pgrec['id']
|
|
123
|
+
cdgids = [cdgid]
|
|
124
|
+
recursive_dataset_ids(cdgid, cdgids)
|
|
125
|
+
dsids.append([dsname, rdaids, cdgids])
|
|
126
126
|
|
|
127
127
|
if not dsids: PgLOG.pglog("No Dataset Id identified to gather CDG metrics", PgLOG.LOGWRN)
|
|
128
128
|
|
|
129
129
|
return dsids
|
|
130
130
|
|
|
131
131
|
#
|
|
132
|
-
# get
|
|
132
|
+
# get cdgids recursivley
|
|
133
133
|
#
|
|
134
|
-
def recursive_dataset_ids(
|
|
134
|
+
def recursive_dataset_ids(pcdgid, cdgids):
|
|
135
135
|
|
|
136
136
|
tbname = 'metadata.dataset'
|
|
137
|
-
pgrecs = PgDBI.pgmget(tbname, 'id', "parent_dataset_id = '{}'".format(
|
|
137
|
+
pgrecs = PgDBI.pgmget(tbname, 'id', "parent_dataset_id = '{}'".format(pcdgid))
|
|
138
138
|
if not pgrecs: return
|
|
139
139
|
|
|
140
|
-
for
|
|
141
|
-
if
|
|
142
|
-
|
|
143
|
-
recursive_dataset_ids(
|
|
140
|
+
for cdgid in pgrecs['id']:
|
|
141
|
+
if cdgid in cdgids: continue
|
|
142
|
+
cdgids.append(cdgid)
|
|
143
|
+
recursive_dataset_ids(cdgid, cdgids)
|
|
144
144
|
|
|
145
145
|
#
|
|
146
146
|
# get the date ranges for given condition
|
|
@@ -169,14 +169,21 @@ def get_date_ranges(inputs):
|
|
|
169
169
|
#
|
|
170
170
|
# get file download records for given dsid
|
|
171
171
|
#
|
|
172
|
-
def get_dsid_records(
|
|
172
|
+
def get_dsid_records(cdgids, dates, srdaid):
|
|
173
173
|
|
|
174
174
|
gdex_dbname()
|
|
175
175
|
tbname = 'metrics.file_download'
|
|
176
176
|
fields = ('date_completed, remote_address, logical_file_size, logical_file_name, file_access_point_uri, user_agent_name, bytes_sent, '
|
|
177
177
|
'subset_file_size, range_request, dataset_file_size, dataset_file_name, dataset_file_file_access_point_uri')
|
|
178
|
-
|
|
179
|
-
|
|
178
|
+
dscnt = len(cdgids)
|
|
179
|
+
dscnd = "dataset_id "
|
|
180
|
+
if dscnt == 1:
|
|
181
|
+
dscnd += "= '{}'".format(cdgids[0])
|
|
182
|
+
else:
|
|
183
|
+
dscnd += "IN ('" + "','".join(cdgids) + "')"
|
|
184
|
+
dtcnd = "date_completed BETWEEN '{}' AND '{}'".format(dates[0], dates[1])
|
|
185
|
+
cond = "{} AND completed = True AND {} ORDER BY date_completed".format(dscnd, dtcnd)
|
|
186
|
+
PgLOG.pglog("{}: CDG query for {} at {}".format(srdaid, cond, PgLOG.current_datetime()), PgLOG.LOGWRN)
|
|
180
187
|
pgrecs = PgDBI.pgmget(tbname, fields, cond)
|
|
181
188
|
PgDBI.dssdb_dbname()
|
|
182
189
|
|
|
@@ -191,10 +198,11 @@ def fill_cdg_usages(dsids, dranges):
|
|
|
191
198
|
for dates in dranges:
|
|
192
199
|
for dsid in dsids:
|
|
193
200
|
lcnt += 1
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
201
|
+
dsname = dsid[0]
|
|
202
|
+
rdaids = dsid[1]
|
|
203
|
+
cdgids = dsid[2]
|
|
204
|
+
srdaid = '|'.join(rdaids)
|
|
205
|
+
pgrecs = get_dsid_records(cdgids, dates, srdaid)
|
|
198
206
|
pgcnt = len(pgrecs['dataset_file_name']) if pgrecs else 0
|
|
199
207
|
if pgcnt == 0:
|
|
200
208
|
PgLOG.pglog("{}: No record found to gather CDG usage between {} and {}".format(srdaid, dates[0], dates[1]), PgLOG.LOGWRN)
|
|
@@ -217,8 +225,9 @@ def fill_cdg_usages(dsids, dranges):
|
|
|
217
225
|
engine = pgrec['user_agent_name']
|
|
218
226
|
wfile = pgrec['dataset_file_name']
|
|
219
227
|
if not wfile: wfile = pgrec['logic_file_name']
|
|
220
|
-
wfrec = get_wfile_record(
|
|
228
|
+
wfrec = get_wfile_record(rdaids, wfile)
|
|
221
229
|
if not wfrec: continue
|
|
230
|
+
dsid = wfrec['dsid']
|
|
222
231
|
ms = re.search(r'^https://tds.ucar.edu/thredds/(\w+)/', url)
|
|
223
232
|
if ms:
|
|
224
233
|
# tds usage
|
|
@@ -235,14 +244,14 @@ def fill_cdg_usages(dsids, dranges):
|
|
|
235
244
|
tcnt += add_tdsusage_records(year, trecs, cdate)
|
|
236
245
|
trecs = {}
|
|
237
246
|
cdate = date
|
|
238
|
-
tkey = "{}:{}:{}:{}".format(ip,
|
|
247
|
+
tkey = "{}:{}:{}:{}".format(ip, dsid, method, etype)
|
|
239
248
|
if tkey in trecs:
|
|
240
249
|
trecs[tkey]['size'] += dsize
|
|
241
250
|
trecs[tkey]['fcount'] += 1
|
|
242
251
|
else:
|
|
243
|
-
wurec = get_wuser_record(ip
|
|
252
|
+
wurec = get_wuser_record(ip)
|
|
244
253
|
if not wurec: return 0
|
|
245
|
-
trecs[tkey] = {'ip' : ip, 'dsid' :
|
|
254
|
+
trecs[tkey] = {'ip' : ip, 'dsid' : dsid, 'date' : cdate, 'time' : time, 'size' : dsize,
|
|
246
255
|
'fcount' : 1, 'method' : method, 'etype' : etype, 'engine' : engine,
|
|
247
256
|
'org_type' : wurec['org_type'], 'country' : wurec['country'],
|
|
248
257
|
'email' : wurec['email']}
|
|
@@ -252,7 +261,7 @@ def fill_cdg_usages(dsids, dranges):
|
|
|
252
261
|
if not fsize: fsize = pgrec['logic_file_size']
|
|
253
262
|
method = 'CDP'
|
|
254
263
|
if pgrec['subset_file_size'] or pgrec['range_request'] or dsize < fsize:
|
|
255
|
-
wkey = "{}:{}:{}".format(ip,
|
|
264
|
+
wkey = "{}:{}:{}".format(ip, dsid, wfile)
|
|
256
265
|
else:
|
|
257
266
|
wkey = None
|
|
258
267
|
|
|
@@ -261,9 +270,7 @@ def fill_cdg_usages(dsids, dranges):
|
|
|
261
270
|
wrec['size'] += dsize
|
|
262
271
|
continue
|
|
263
272
|
wcnt += add_webfile_usage(year, wrec)
|
|
264
|
-
|
|
265
|
-
if not wurec: return 0
|
|
266
|
-
wrec = {'ip' : ip, 'dsid' : wfrec['dsid'], 'wid' : wfrec['wid'], 'date' : date,
|
|
273
|
+
wrec = {'ip' : ip, 'dsid' : dsid, 'wid' : wfrec['wid'], 'date' : date,
|
|
267
274
|
'time' : time, 'quarter' : quarter, 'size' : dsize,
|
|
268
275
|
'locflag' : 'C', 'method' : method}
|
|
269
276
|
pwkey = wkey
|
|
@@ -278,9 +285,6 @@ def fill_cdg_usages(dsids, dranges):
|
|
|
278
285
|
allcnt += pgcnt
|
|
279
286
|
PgLOG.pglog("{}/{} TDS/WEB usage records added for {} CDG entries at {}".format(atcnt, awcnt, allcnt, PgLOG.current_datetime()), PgLOG.LOGWRN)
|
|
280
287
|
|
|
281
|
-
if lcnt > 1: PgLOG.pglog("{}/{} TDS/WEB usage records added for {} CDG entries at {}".format(atcnt, awcnt, allcnt, PgLOG.current_datetime()), PgLOG.LOGWRN)
|
|
282
|
-
|
|
283
|
-
|
|
284
288
|
def get_record_date_time(ctime):
|
|
285
289
|
|
|
286
290
|
ms = re.search(r'^(\d+)/(\w+)/(\d+) (\d+:\d+:\d+)(\.|$)', str(ctime))
|
|
@@ -309,33 +313,43 @@ def add_tdsusage_records(year, records, date):
|
|
|
309
313
|
|
|
310
314
|
return cnt
|
|
311
315
|
|
|
312
|
-
def add_tds_allusage(year,
|
|
316
|
+
def add_tds_allusage(year, logrec):
|
|
313
317
|
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
318
|
+
pgrec = {'method' : 'CDP', 'source' : 'C'}
|
|
319
|
+
pgrec['email'] = logrec['email']
|
|
320
|
+
pgrec['org_type'] = logrec['org_type']
|
|
321
|
+
pgrec['country'] = logrec['country']
|
|
322
|
+
pgrec['dsid'] = logrec['dsid']
|
|
323
|
+
pgrec['date'] = logrec['date']
|
|
324
|
+
pgrec['quarter'] = logrec['quarter']
|
|
325
|
+
pgrec['time'] = logrec['time']
|
|
326
|
+
pgrec['size'] = logrec['size']
|
|
327
|
+
pgrec['ip'] = logrec['ip']
|
|
328
|
+
return PgDBI.add_yearly_allusage(year, pgrec)
|
|
319
329
|
|
|
320
|
-
return PgDBI.add_yearly_allusage(year, record)
|
|
321
330
|
|
|
322
331
|
#
|
|
323
332
|
# Fill usage of a single online data file into table dssdb.wusage of DSS PgSQL database
|
|
324
333
|
#
|
|
325
|
-
def add_webfile_usage(year, logrec
|
|
334
|
+
def add_webfile_usage(year, logrec):
|
|
326
335
|
|
|
327
336
|
table = "{}_{}".format(USAGE['WEBTBL'], year)
|
|
328
|
-
|
|
337
|
+
cdate = logrec['date']
|
|
338
|
+
ip = logrec['ip']
|
|
339
|
+
cond = "wid = {} AND method = '{}' AND date_read = '{}' AND time_read = '{}'".format(logrec['wid'], logrec['method'], cdate, logrec['time'])
|
|
329
340
|
if PgDBI.pgget(table, "", cond, PgLOG.LOGWRN): return 0
|
|
330
341
|
|
|
342
|
+
wurec = get_wuser_record(ip, cdate, False)
|
|
343
|
+
if not wurec: return 0
|
|
344
|
+
|
|
331
345
|
record = {'wid' : logrec['wid'], 'dsid' : logrec['dsid']}
|
|
332
346
|
record['wuid_read'] = wurec['wuid']
|
|
333
|
-
record['date_read'] =
|
|
347
|
+
record['date_read'] = cdate
|
|
334
348
|
record['time_read'] = logrec['time']
|
|
335
349
|
record['size_read'] = logrec['size']
|
|
336
350
|
record['method'] = logrec['method']
|
|
337
351
|
record['locflag'] = logrec['locflag']
|
|
338
|
-
record['ip'] =
|
|
352
|
+
record['ip'] = ip
|
|
339
353
|
record['quarter'] = logrec['quarter']
|
|
340
354
|
|
|
341
355
|
if add_web_allusage(year, logrec, wurec):
|
|
@@ -345,7 +359,10 @@ def add_webfile_usage(year, logrec, wurec):
|
|
|
345
359
|
|
|
346
360
|
def add_web_allusage(year, logrec, wurec):
|
|
347
361
|
|
|
348
|
-
pgrec = {'
|
|
362
|
+
pgrec = {'source' : 'C'}
|
|
363
|
+
pgrec['email'] = wurec['email']
|
|
364
|
+
pgrec['org_type'] = wurec['org_type']
|
|
365
|
+
pgrec['country'] = wurec['country']
|
|
349
366
|
pgrec['dsid'] = logrec['dsid']
|
|
350
367
|
pgrec['date'] = logrec['date']
|
|
351
368
|
pgrec['quarter'] = logrec['quarter']
|
|
@@ -353,7 +370,6 @@ def add_web_allusage(year, logrec, wurec):
|
|
|
353
370
|
pgrec['size'] = logrec['size']
|
|
354
371
|
pgrec['method'] = logrec['method']
|
|
355
372
|
pgrec['ip'] = logrec['ip']
|
|
356
|
-
pgrec['source'] = 'C'
|
|
357
373
|
return PgDBI.add_yearly_allusage(year, pgrec)
|
|
358
374
|
|
|
359
375
|
#
|
|
@@ -364,7 +380,7 @@ def get_wfile_record(dsids, wfile):
|
|
|
364
380
|
for dsid in dsids:
|
|
365
381
|
wkey = "{}{}".format(dsid, wfile)
|
|
366
382
|
if wkey in WFILES: return WFILES[wkey]
|
|
367
|
-
wfcond = "wfile
|
|
383
|
+
wfcond = "wfile LIKE '%{}'".format(wfile)
|
|
368
384
|
pgrec = None
|
|
369
385
|
for dsid in dsids:
|
|
370
386
|
pgrec = PgSplit.pgget_wfile(dsid, "wid", wfcond)
|
|
@@ -382,11 +398,13 @@ def get_wfile_record(dsids, wfile):
|
|
|
382
398
|
pgrec = PgSplit.pgget_wfile(mvrec['dsid'], "wid", "wid = {}".format(pgrec['wid']))
|
|
383
399
|
if pgrec: pgrec['dsid'] = mvrec['dsid']
|
|
384
400
|
|
|
385
|
-
if pgrec:
|
|
401
|
+
if pgrec:
|
|
402
|
+
wkey = "{}{}".format(pgrec['dsid'], wfile)
|
|
403
|
+
WFILES[wkey] = pgrec
|
|
386
404
|
return pgrec
|
|
387
405
|
|
|
388
406
|
# return wuser record upon success, None otherwise
|
|
389
|
-
def get_wuser_record(ip, date, skipwuid = True):
|
|
407
|
+
def get_wuser_record(ip, date = None, skipwuid = True):
|
|
390
408
|
|
|
391
409
|
if ip in WUSERS: return WUSERS[ip]
|
|
392
410
|
|
|
@@ -3,7 +3,7 @@ rda_python_metrics/PgView.py,sha256=r6otb3DjfiaQJdg0z8bZQAOlhr4JnrXJzp9wgWh_8qQ,
|
|
|
3
3
|
rda_python_metrics/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
|
|
4
4
|
rda_python_metrics/fillawsusage.py,sha256=dpI3-EFINJECdCSUOm37A97CJNIxOS2nYwA0fB0IpHE,9055
|
|
5
5
|
rda_python_metrics/fillawsusage.usg,sha256=pD_nYTfg1so9nvVEyPSWdgKvb9gWdtfHJAs3RsT2MMU,609
|
|
6
|
-
rda_python_metrics/fillcdgusage.py,sha256=
|
|
6
|
+
rda_python_metrics/fillcdgusage.py,sha256=D4PhdVyGoISSr0ykNIpmHVMpEAfRZQB9BvaXdYMcGno,15186
|
|
7
7
|
rda_python_metrics/fillcdgusage.usg,sha256=5lkd4Zdi72nQcha-JtbaLnxl66V4wBDykKwQtUOtMrw,667
|
|
8
8
|
rda_python_metrics/fillcodusage.py,sha256=Hp3VrlIqHBWRI6Zjbi0nxWZPNvPrKsGBSZ1L9qA9_y0,8006
|
|
9
9
|
rda_python_metrics/fillcodusage.usg,sha256=3B5IkQ4uwylqY8uEfUdnZ_MXqhYudeylMp5ulhUGXH8,678
|
|
@@ -44,9 +44,9 @@ rda_python_metrics/viewwebfile.py,sha256=HSMNkQQawonu6W3blV7g9UbJuNy9VAOn9COqgmj
|
|
|
44
44
|
rda_python_metrics/viewwebfile.usg,sha256=lTNi8Yu8BUJuExEDJX-vsJyWUSUIQTS-DiiBEVFo33s,10054
|
|
45
45
|
rda_python_metrics/viewwebusage.py,sha256=jhoHuRPVNtp7Lbjv0l-Jy_vp2p2nWQC7IVZ0P4JUJ4I,16657
|
|
46
46
|
rda_python_metrics/viewwebusage.usg,sha256=IsT72v22xyZf7ng_IodVs0dLTsH1Q4BtFvT-gs0-xJY,9946
|
|
47
|
-
rda_python_metrics-1.0.
|
|
48
|
-
rda_python_metrics-1.0.
|
|
49
|
-
rda_python_metrics-1.0.
|
|
50
|
-
rda_python_metrics-1.0.
|
|
51
|
-
rda_python_metrics-1.0.
|
|
52
|
-
rda_python_metrics-1.0.
|
|
47
|
+
rda_python_metrics-1.0.11.dist-info/licenses/LICENSE,sha256=1dck4EAQwv8QweDWCXDx-4Or0S8YwiCstaso_H57Pno,1097
|
|
48
|
+
rda_python_metrics-1.0.11.dist-info/METADATA,sha256=2D84_3Vy1MxxzM--KjIjJmFitL4QJ0JwkuigW0CErik,736
|
|
49
|
+
rda_python_metrics-1.0.11.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
|
|
50
|
+
rda_python_metrics-1.0.11.dist-info/entry_points.txt,sha256=YfFLzlE3rdufSV471VsDnfYptnt1lR08aSrxPXlKqlY,1185
|
|
51
|
+
rda_python_metrics-1.0.11.dist-info/top_level.txt,sha256=aoBgbR_o70TP0QmMW0U6inRHYtfKld47OBmnWnLnDOs,19
|
|
52
|
+
rda_python_metrics-1.0.11.dist-info/RECORD,,
|
|
File without changes
|
{rda_python_metrics-1.0.10.dist-info → rda_python_metrics-1.0.11.dist-info}/entry_points.txt
RENAMED
|
File without changes
|
{rda_python_metrics-1.0.10.dist-info → rda_python_metrics-1.0.11.dist-info}/licenses/LICENSE
RENAMED
|
File without changes
|
|
File without changes
|