rda-python-metrics 1.0.9__py3-none-any.whl → 1.0.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of rda-python-metrics might be problematic. Click here for more details.

@@ -54,6 +54,10 @@ DSIDS = {
54
54
  'ucar.cgd.ccsm4.pliomip2' : ['d651037']
55
55
  }
56
56
 
57
+ ALLIDS = list(DSIDS.keys())
58
+
59
+ WFILES = {}
60
+ WUSERS = {}
57
61
 
58
62
  #
59
63
  # main function to run this program
@@ -86,13 +90,9 @@ def main():
86
90
 
87
91
 
88
92
  PgLOG.cmdlog("fillcdgusage {}".format(' '.join(argv)))
93
+ dranges = get_date_ranges(params)
89
94
  dsids = get_dataset_ids(params['s'])
90
- if dsids:
91
- del params['s']
92
- for o in params:
93
- dranges = get_date_ranges(o, params[o])
94
- fill_cdg_usages(dsids, dranges)
95
-
95
+ if dranges and dsids: fill_cdg_usages(dsids, dranges)
96
96
  PgLOG.pglog(None, PgLOG.LOGWRN|PgLOG.SNDEML) # send email out if any
97
97
 
98
98
  sys.exit(0)
@@ -112,6 +112,7 @@ def get_dataset_ids(dsnames):
112
112
  dsids = []
113
113
  tbname = 'metadata.dataset'
114
114
  for dsname in dsnames:
115
+ if re.match(r'^all$', dsname, re.I): return ALLIDS
115
116
  if dsname not in DSIDS:
116
117
  PgLOG.pglog(dsname + ": Unknown CDG dataset short name", PgLOG.LOGWRN)
117
118
  continue
@@ -144,23 +145,24 @@ def recursive_dataset_ids(pdsid, rdaid, dsids):
144
145
  #
145
146
  # get the date ranges for given condition
146
147
  #
147
- def get_date_ranges(option, inputs):
148
+ def get_date_ranges(inputs):
148
149
 
149
150
  dranges = []
150
- for input in inputs:
151
- # get date range
152
- dates = []
153
- if option == 'N':
154
- dates[1] = USAGE['CDATE']
155
- dates[0] = PgUtil.adddate(USAGE['CDATE'], 0, 0, -int(input))
156
- elif option == 'm':
157
- tms = input.split('-')
158
- dates[0] = PgUtil.fmtdate(int(tms[0]), int(tms[1]), 1)
159
- dates[1] = PgUtil.enddate(dates[0])
160
- else:
161
- dates[0] = input + "-01-01"
162
- dates[1] = input + "-12-31"
163
- dranges.append(dates)
151
+ for opt in inputs:
152
+ for input in inputs[opt]:
153
+ # get date range
154
+ dates = []
155
+ if opt == 'N':
156
+ dates.append(PgUtil.adddate(USAGE['CDATE'], 0, 0, -int(input)))
157
+ dates.append(USAGE['CDATE'])
158
+ elif opt == 'm':
159
+ tms = input.split('-')
160
+ dates.append(PgUtil.fmtdate(int(tms[0]), int(tms[1]), 1))
161
+ dates.append(PgUtil.enddate(dates[0], 0, 'M'))
162
+ elif opt == 'Y':
163
+ dates.append(input + "-01-01")
164
+ dates.append(input + "-12-31")
165
+ dranges.append(dates)
164
166
 
165
167
  return dranges
166
168
 
@@ -174,6 +176,7 @@ def get_dsid_records(dsid, dates):
174
176
  fields = ('date_completed, remote_address, logical_file_size, logical_file_name, file_access_point_uri, user_agent_name, bytes_sent, '
175
177
  'subset_file_size, range_request, dataset_file_size, dataset_file_name, dataset_file_file_access_point_uri')
176
178
  cond = "dataset_id = '{}' AND completed = True AND date_completed BETWEEN '{}' AND '{}' ORDER BY date_completed".format(dsid, dates[0], dates[1])
179
+ PgLOG.pglog("{}: Query CDG usage between {} and {} at {}".format(dsid, dates[0], dates[1], PgLOG.current_datetime()), PgLOG.LOGWRN)
177
180
  pgrecs = PgDBI.pgmget(tbname, fields, cond)
178
181
  PgDBI.dssdb_dbname()
179
182
 
@@ -184,17 +187,19 @@ def get_dsid_records(dsid, dates):
184
187
  #
185
188
  def fill_cdg_usages(dsids, dranges):
186
189
 
187
- allcnt = awcnt = atcnt = 0
188
- for dsid in dsids:
189
- cdgid = dsid[0]
190
- rdaid = dsid[1]
191
- for dates in dranges:
190
+ allcnt = awcnt = atcnt = lcnt = 0
191
+ for dates in dranges:
192
+ for dsid in dsids:
193
+ lcnt += 1
194
+ cdgid = dsid[0]
195
+ rdaid = dsid[1]
196
+ srdaid = '|'.join(rdaid)
192
197
  pgrecs = get_dsid_records(cdgid, dates)
193
198
  pgcnt = len(pgrecs['dataset_file_name']) if pgrecs else 0
194
199
  if pgcnt == 0:
195
- PgLOG.pglog("{}: No record found to gather CDG usage between {} and {}".format(rdaid, dates[0], dates[1]), PgLOG.LOGWRN)
200
+ PgLOG.pglog("{}: No record found to gather CDG usage between {} and {}".format(srdaid, dates[0], dates[1]), PgLOG.LOGWRN)
196
201
  continue
197
- PgLOG.pglog("{}: gather {} records for CDG usage between {} and {}".format(rdaid, pgcnt, dates[0], dates[1]), PgLOG.LOGWRN)
202
+ PgLOG.pglog("{}: Process {} records for CDG usage at {}".format(srdaid, pgcnt, PgLOG.current_datetime()), PgLOG.LOGWRN)
198
203
  tcnt = wcnt = 0
199
204
  pwkey = wrec = cdate = None
200
205
  trecs = {}
@@ -210,6 +215,10 @@ def fill_cdg_usages(dsids, dranges):
210
215
  if not url: url = pgrec['file_access_point_uri']
211
216
  ip = pgrec['remote_address']
212
217
  engine = pgrec['user_agent_name']
218
+ wfile = pgrec['dataset_file_name']
219
+ if not wfile: wfile = pgrec['logic_file_name']
220
+ wfrec = get_wfile_record(rdaid, wfile)
221
+ if not wfrec: continue
213
222
  ms = re.search(r'^https://tds.ucar.edu/thredds/(\w+)/', url)
214
223
  if ms:
215
224
  # tds usage
@@ -231,12 +240,14 @@ def fill_cdg_usages(dsids, dranges):
231
240
  trecs[tkey]['size'] += dsize
232
241
  trecs[tkey]['fcount'] += 1
233
242
  else:
234
- trecs[tkey] = {'ip' : ip, 'dsid' : rdaid, 'date' : cdate, 'time' : time, 'size' : dsize,
235
- 'fcount' : 1, 'method' : method, 'etype' : etype, 'engine' : engine}
243
+ wurec = get_wuser_record(ip, cdate, skipwuid = True)
244
+ if not wurec: return 0
245
+ trecs[tkey] = {'ip' : ip, 'dsid' : wfrec['dsid'], 'date' : cdate, 'time' : time, 'size' : dsize,
246
+ 'fcount' : 1, 'method' : method, 'etype' : etype, 'engine' : engine,
247
+ 'org_type' : wurec['org_type'], 'country' : wurec['country'],
248
+ 'email' : wurec['email']}
236
249
  else:
237
250
  # web usage
238
- wfile = pgrec['dataset_file_name']
239
- if not wfile: wfile = pgrec['logic_file_name']
240
251
  fsize = pgrec['dataset_file_size']
241
252
  if not fsize: fsize = pgrec['logic_file_size']
242
253
  method = 'CDP'
@@ -250,7 +261,9 @@ def fill_cdg_usages(dsids, dranges):
250
261
  wrec['size'] += dsize
251
262
  continue
252
263
  wcnt += add_webfile_usage(year, wrec)
253
- wrec = {'ip' : ip, 'dsid' : rdaid, 'wfile' : wfile, 'date' : date,
264
+ wurec = get_wuser_record(ip, cdate, skipwuid = False)
265
+ if not wurec: return 0
266
+ wrec = {'ip' : ip, 'dsid' : wfrec['dsid'], 'wid' : wfrec['wid'], 'date' : date,
254
267
  'time' : time, 'quarter' : quarter, 'size' : dsize,
255
268
  'locflag' : 'C', 'method' : method}
256
269
  pwkey = wkey
@@ -263,8 +276,9 @@ def fill_cdg_usages(dsids, dranges):
263
276
  atcnt += tcnt
264
277
  awcnt += wcnt
265
278
  allcnt += pgcnt
279
+ PgLOG.pglog("{}/{} TDS/WEB usage records added for {} CDG entries at {}".format(atcnt, awcnt, allcnt, PgLOG.current_datetime()), PgLOG.LOGWRN)
266
280
 
267
- PgLOG.pglog("{}/{} TDS/WEB usage records added for {} CDG entries at {}".format(atcnt, awcnt, allcnt, PgLOG.current_datetime()), PgLOG.LOGWRN)
281
+ if lcnt > 1: PgLOG.pglog("{}/{} TDS/WEB usage records added for {} CDG entries at {}".format(atcnt, awcnt, allcnt, PgLOG.current_datetime()), PgLOG.LOGWRN)
268
282
 
269
283
 
270
284
  def get_record_date_time(ctime):
@@ -287,12 +301,6 @@ def add_tdsusage_records(year, records, date):
287
301
  record = records[key]
288
302
  cond = "date = '{}' AND time = '{}' AND ip = '{}'".format(date, record['time'], record['ip'])
289
303
  if PgDBI.pgget(USAGE['TDSTBL'], '', cond, PgLOG.LGEREX): continue
290
- record['org_type'] = record['country'] = '-'
291
- ipinfo = PgIPInfo.set_ipinfo(record['ip'])
292
- if ipinfo:
293
- record['org_type'] = ipinfo['org_type']
294
- record['country'] = ipinfo['country']
295
- record['email'] = 'unknown@' + ipinfo['hostname']
296
304
 
297
305
  if add_tds_allusage(year, record):
298
306
  cnt += PgDBI.pgadd(USAGE['TDSTBL'], record, PgLOG.LOGWRN)
@@ -301,7 +309,6 @@ def add_tdsusage_records(year, records, date):
301
309
 
302
310
  return cnt
303
311
 
304
-
305
312
  def add_tds_allusage(year, pgrec):
306
313
 
307
314
  record = {'method' : 'CDP', 'source' : 'C'}
@@ -315,18 +322,13 @@ def add_tds_allusage(year, pgrec):
315
322
  #
316
323
  # Fill usage of a single online data file into table dssdb.wusage of DSS PgSQL database
317
324
  #
318
- def add_webfile_usage(year, logrec):
319
-
320
- pgrec = get_wfile_wid(logrec['dsid'], logrec['wfile'])
321
- if not pgrec: return 0
325
+ def add_webfile_usage(year, logrec, wurec):
322
326
 
323
327
  table = "{}_{}".format(USAGE['WEBTBL'], year)
324
- cond = "wid = {} AND method = '{}' AND date_read = '{}' AND time_read = '{}'".format(pgrec['wid'], logrec['method'], logrec['date'], logrec['time'])
328
+ cond = "wid = {} AND method = '{}' AND date_read = '{}' AND time_read = '{}'".format(logrec['wid'], logrec['method'], logrec['date'], logrec['time'])
325
329
  if PgDBI.pgget(table, "", cond, PgLOG.LOGWRN): return 0
326
330
 
327
- wurec = get_wuser_record(logrec['ip'], logrec['date'])
328
- if not wurec: return 0
329
- record = {'wid' : pgrec['wid'], 'dsid' : pgrec['dsid']}
331
+ record = {'wid' : logrec['wid'], 'dsid' : logrec['dsid']}
330
332
  record['wuid_read'] = wurec['wuid']
331
333
  record['date_read'] = logrec['date']
332
334
  record['time_read'] = logrec['time']
@@ -357,30 +359,47 @@ def add_web_allusage(year, logrec, wurec):
357
359
  #
358
360
  # return wfile.wid upon success, 0 otherwise
359
361
  #
360
- def get_wfile_wid(dsid, wfile):
362
+ def get_wfile_record(dsids, wfile):
361
363
 
362
- wfcond = "wfile = '{}'".format(wfile)
363
- pgrec = PgSplit.pgget_wfile(dsid, "*", wfcond)
364
- if pgrec:
365
- pgrec['dsid'] = dsid
366
- else:
367
- pgrec = PgDBI.pgget("wfile_delete", "*", "{} AND dsid = '{}'".format(wfcond, dsid))
364
+ for dsid in dsids:
365
+ wkey = "{}{}".format(dsid, wfile)
366
+ if wkey in WFILES: return WFILES[wkey]
367
+ wfcond = "wfile like '%{}'".format(wfile)
368
+ pgrec = None
369
+ for dsid in dsids:
370
+ pgrec = PgSplit.pgget_wfile(dsid, "wid", wfcond)
371
+ if pgrec:
372
+ pgrec['dsid'] = dsid
373
+ wkey = "{}{}".format(dsid, wfile)
374
+ WFILES[wkey] = pgrec
375
+ return pgrec
376
+
377
+ for dsid in dsids:
378
+ pgrec = PgDBI.pgget("wfile_delete", "wid, dsid", "{} AND dsid = '{}'".format(wfcond, dsid))
368
379
  if not pgrec:
369
- pgrec = PgDBI.pgget("wmove", "wid, dsid", wfcond)
370
- if pgrec:
371
- pgrec = PgSplit.pgget_wfile(pgrec['dsid'], "*", "wid = {}".format(pgrec['wid']))
372
- if pgrec: pgrec['dsid'] = dsid
380
+ mvrec = PgDBI.pgget("wmove", "wid, dsid", wfcond)
381
+ if mvrec:
382
+ pgrec = PgSplit.pgget_wfile(mvrec['dsid'], "wid", "wid = {}".format(pgrec['wid']))
383
+ if pgrec: pgrec['dsid'] = mvrec['dsid']
373
384
 
385
+ if pgrec: WFILES[wkey] = pgrec
374
386
  return pgrec
375
387
 
376
388
  # return wuser record upon success, None otherwise
377
- def get_wuser_record(ip, date):
389
+ def get_wuser_record(ip, date, skipwuid = True):
390
+
391
+ if ip in WUSERS: return WUSERS[ip]
378
392
 
379
393
  ipinfo = PgIPInfo.set_ipinfo(ip)
380
394
  if not ipinfo: return None
381
395
 
382
396
  record = {'org_type' : ipinfo['org_type'], 'country' : ipinfo['country']}
383
397
  email = 'unknown@' + ipinfo['hostname']
398
+ if skipwuid:
399
+ record['email'] = email
400
+ WUSERS[ip] = record
401
+ return record
402
+
384
403
  emcond = "email = '{}'".format(email)
385
404
  flds = 'wuid, email, org_type, country, start_date'
386
405
  pgrec = PgDBI.pgget("wuser", flds, emcond, PgLOG.LOGERR)
@@ -388,6 +407,7 @@ def get_wuser_record(ip, date):
388
407
  if PgUtil.diffdate(pgrec['start_date'], date) > 0:
389
408
  pgrec['start_date'] = record['start_date'] = date
390
409
  PgDBI.pgupdt('wuser', record, emcond)
410
+ WUSERS[ip] = pgrec
391
411
  return pgrec
392
412
 
393
413
  # now add one in
@@ -398,6 +418,7 @@ def get_wuser_record(ip, date):
398
418
  if wuid:
399
419
  record['wuid'] = wuid
400
420
  PgLOG.pglog("{} Added as wuid({})".format(email, wuid), PgLOG.LGWNEM)
421
+ WUSERS[ip] = record
401
422
  return record
402
423
 
403
424
  return None
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: rda_python_metrics
3
- Version: 1.0.9
3
+ Version: 1.0.10
4
4
  Summary: RDA Python Package to gather and view data usage metrics
5
5
  Author-email: Zaihua Ji <zji@ucar.edu>
6
6
  Project-URL: Homepage, https://github.com/NCAR/rda-python-metrics
@@ -3,7 +3,7 @@ rda_python_metrics/PgView.py,sha256=r6otb3DjfiaQJdg0z8bZQAOlhr4JnrXJzp9wgWh_8qQ,
3
3
  rda_python_metrics/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
4
4
  rda_python_metrics/fillawsusage.py,sha256=dpI3-EFINJECdCSUOm37A97CJNIxOS2nYwA0fB0IpHE,9055
5
5
  rda_python_metrics/fillawsusage.usg,sha256=pD_nYTfg1so9nvVEyPSWdgKvb9gWdtfHJAs3RsT2MMU,609
6
- rda_python_metrics/fillcdgusage.py,sha256=9sDFLAhzndjAVCahIVHofQ4YeslwW7MGIWEDSAQYdqY,13816
6
+ rda_python_metrics/fillcdgusage.py,sha256=m4zXnx5MaCP-GfNVDRfTOKMlVrcWaFNqwtJeGW7W8Hg,14892
7
7
  rda_python_metrics/fillcdgusage.usg,sha256=5lkd4Zdi72nQcha-JtbaLnxl66V4wBDykKwQtUOtMrw,667
8
8
  rda_python_metrics/fillcodusage.py,sha256=Hp3VrlIqHBWRI6Zjbi0nxWZPNvPrKsGBSZ1L9qA9_y0,8006
9
9
  rda_python_metrics/fillcodusage.usg,sha256=3B5IkQ4uwylqY8uEfUdnZ_MXqhYudeylMp5ulhUGXH8,678
@@ -44,9 +44,9 @@ rda_python_metrics/viewwebfile.py,sha256=HSMNkQQawonu6W3blV7g9UbJuNy9VAOn9COqgmj
44
44
  rda_python_metrics/viewwebfile.usg,sha256=lTNi8Yu8BUJuExEDJX-vsJyWUSUIQTS-DiiBEVFo33s,10054
45
45
  rda_python_metrics/viewwebusage.py,sha256=jhoHuRPVNtp7Lbjv0l-Jy_vp2p2nWQC7IVZ0P4JUJ4I,16657
46
46
  rda_python_metrics/viewwebusage.usg,sha256=IsT72v22xyZf7ng_IodVs0dLTsH1Q4BtFvT-gs0-xJY,9946
47
- rda_python_metrics-1.0.9.dist-info/licenses/LICENSE,sha256=1dck4EAQwv8QweDWCXDx-4Or0S8YwiCstaso_H57Pno,1097
48
- rda_python_metrics-1.0.9.dist-info/METADATA,sha256=k5xThxsVVIf1-uVtr5NDQ0VdUPKjZxeNlgSNysXLT24,735
49
- rda_python_metrics-1.0.9.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
50
- rda_python_metrics-1.0.9.dist-info/entry_points.txt,sha256=YfFLzlE3rdufSV471VsDnfYptnt1lR08aSrxPXlKqlY,1185
51
- rda_python_metrics-1.0.9.dist-info/top_level.txt,sha256=aoBgbR_o70TP0QmMW0U6inRHYtfKld47OBmnWnLnDOs,19
52
- rda_python_metrics-1.0.9.dist-info/RECORD,,
47
+ rda_python_metrics-1.0.10.dist-info/licenses/LICENSE,sha256=1dck4EAQwv8QweDWCXDx-4Or0S8YwiCstaso_H57Pno,1097
48
+ rda_python_metrics-1.0.10.dist-info/METADATA,sha256=LUru8PysvjXZNqq6kZzFGpMBReGYJtLwXpqFKB5X6hQ,736
49
+ rda_python_metrics-1.0.10.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
50
+ rda_python_metrics-1.0.10.dist-info/entry_points.txt,sha256=YfFLzlE3rdufSV471VsDnfYptnt1lR08aSrxPXlKqlY,1185
51
+ rda_python_metrics-1.0.10.dist-info/top_level.txt,sha256=aoBgbR_o70TP0QmMW0U6inRHYtfKld47OBmnWnLnDOs,19
52
+ rda_python_metrics-1.0.10.dist-info/RECORD,,