rda-python-metrics 1.0.10__py3-none-any.whl → 1.0.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of rda-python-metrics might be problematic. Click here for more details.

@@ -25,7 +25,7 @@ from . import PgIPInfo
25
25
 
26
26
  USAGE = {
27
27
  'TDSTBL' : "tdsusage",
28
- 'WEBTBL' : "webusage",
28
+ 'WEBTBL' : "wusage",
29
29
  'CDATE' : PgUtil.curdate(),
30
30
  }
31
31
 
@@ -112,35 +112,35 @@ def get_dataset_ids(dsnames):
112
112
  dsids = []
113
113
  tbname = 'metadata.dataset'
114
114
  for dsname in dsnames:
115
- if re.match(r'^all$', dsname, re.I): return ALLIDS
115
+ if re.match(r'^all$', dsname, re.I): return get_dataset_ids(ALLIDS)
116
116
  if dsname not in DSIDS:
117
117
  PgLOG.pglog(dsname + ": Unknown CDG dataset short name", PgLOG.LOGWRN)
118
118
  continue
119
- rdaid = DSIDS[dsname]
120
119
  pgrec = PgDBI.pgget(tbname, 'id', "short_name = '{}'".format(dsname))
121
120
  if not (pgrec and pgrec['id']): continue
122
- dsid = pgrec['id']
123
- if dsid in dsids: continue
124
- dsids.append([dsid, rdaid])
125
- recursive_dataset_ids(dsid, rdaid, dsids)
121
+ rdaids = DSIDS[dsname]
122
+ cdgid = pgrec['id']
123
+ cdgids = [cdgid]
124
+ recursive_dataset_ids(cdgid, cdgids)
125
+ dsids.append([dsname, rdaids, cdgids])
126
126
 
127
127
  if not dsids: PgLOG.pglog("No Dataset Id identified to gather CDG metrics", PgLOG.LOGWRN)
128
128
 
129
129
  return dsids
130
130
 
131
131
  #
132
- # get dsids recursivley
132
+ # get cdgids recursivley
133
133
  #
134
- def recursive_dataset_ids(pdsid, rdaid, dsids):
134
+ def recursive_dataset_ids(pcdgid, cdgids):
135
135
 
136
136
  tbname = 'metadata.dataset'
137
- pgrecs = PgDBI.pgmget(tbname, 'id', "parent_dataset_id = '{}'".format(pdsid))
137
+ pgrecs = PgDBI.pgmget(tbname, 'id', "parent_dataset_id = '{}'".format(pcdgid))
138
138
  if not pgrecs: return
139
139
 
140
- for dsid in pgrecs['id']:
141
- if dsid in dsids: continue
142
- dsids.append([dsid, rdaid])
143
- recursive_dataset_ids(dsid, rdaid, dsids)
140
+ for cdgid in pgrecs['id']:
141
+ if cdgid in cdgids: continue
142
+ cdgids.append(cdgid)
143
+ recursive_dataset_ids(cdgid, cdgids)
144
144
 
145
145
  #
146
146
  # get the date ranges for given condition
@@ -169,14 +169,21 @@ def get_date_ranges(inputs):
169
169
  #
170
170
  # get file download records for given dsid
171
171
  #
172
- def get_dsid_records(dsid, dates):
172
+ def get_dsid_records(cdgids, dates, srdaid):
173
173
 
174
174
  gdex_dbname()
175
175
  tbname = 'metrics.file_download'
176
176
  fields = ('date_completed, remote_address, logical_file_size, logical_file_name, file_access_point_uri, user_agent_name, bytes_sent, '
177
177
  'subset_file_size, range_request, dataset_file_size, dataset_file_name, dataset_file_file_access_point_uri')
178
- cond = "dataset_id = '{}' AND completed = True AND date_completed BETWEEN '{}' AND '{}' ORDER BY date_completed".format(dsid, dates[0], dates[1])
179
- PgLOG.pglog("{}: Query CDG usage between {} and {} at {}".format(dsid, dates[0], dates[1], PgLOG.current_datetime()), PgLOG.LOGWRN)
178
+ dscnt = len(cdgids)
179
+ dscnd = "dataset_id "
180
+ if dscnt == 1:
181
+ dscnd += "= '{}'".format(cdgids[0])
182
+ else:
183
+ dscnd += "IN ('" + "','".join(cdgids) + "')"
184
+ dtcnd = "date_completed BETWEEN '{}' AND '{}'".format(dates[0], dates[1])
185
+ cond = "{} AND completed = True AND {} ORDER BY date_completed".format(dscnd, dtcnd)
186
+ PgLOG.pglog("{}: CDG query for {} at {}".format(srdaid, cond, PgLOG.current_datetime()), PgLOG.LOGWRN)
180
187
  pgrecs = PgDBI.pgmget(tbname, fields, cond)
181
188
  PgDBI.dssdb_dbname()
182
189
 
@@ -191,10 +198,11 @@ def fill_cdg_usages(dsids, dranges):
191
198
  for dates in dranges:
192
199
  for dsid in dsids:
193
200
  lcnt += 1
194
- cdgid = dsid[0]
195
- rdaid = dsid[1]
196
- srdaid = '|'.join(rdaid)
197
- pgrecs = get_dsid_records(cdgid, dates)
201
+ dsname = dsid[0]
202
+ rdaids = dsid[1]
203
+ cdgids = dsid[2]
204
+ srdaid = '|'.join(rdaids)
205
+ pgrecs = get_dsid_records(cdgids, dates, srdaid)
198
206
  pgcnt = len(pgrecs['dataset_file_name']) if pgrecs else 0
199
207
  if pgcnt == 0:
200
208
  PgLOG.pglog("{}: No record found to gather CDG usage between {} and {}".format(srdaid, dates[0], dates[1]), PgLOG.LOGWRN)
@@ -217,8 +225,9 @@ def fill_cdg_usages(dsids, dranges):
217
225
  engine = pgrec['user_agent_name']
218
226
  wfile = pgrec['dataset_file_name']
219
227
  if not wfile: wfile = pgrec['logic_file_name']
220
- wfrec = get_wfile_record(rdaid, wfile)
228
+ wfrec = get_wfile_record(rdaids, wfile)
221
229
  if not wfrec: continue
230
+ dsid = wfrec['dsid']
222
231
  ms = re.search(r'^https://tds.ucar.edu/thredds/(\w+)/', url)
223
232
  if ms:
224
233
  # tds usage
@@ -235,14 +244,14 @@ def fill_cdg_usages(dsids, dranges):
235
244
  tcnt += add_tdsusage_records(year, trecs, cdate)
236
245
  trecs = {}
237
246
  cdate = date
238
- tkey = "{}:{}:{}:{}".format(ip, rdaid, method, etype)
247
+ tkey = "{}:{}:{}:{}".format(ip, dsid, method, etype)
239
248
  if tkey in trecs:
240
249
  trecs[tkey]['size'] += dsize
241
250
  trecs[tkey]['fcount'] += 1
242
251
  else:
243
- wurec = get_wuser_record(ip, cdate, skipwuid = True)
252
+ wurec = get_wuser_record(ip)
244
253
  if not wurec: return 0
245
- trecs[tkey] = {'ip' : ip, 'dsid' : wfrec['dsid'], 'date' : cdate, 'time' : time, 'size' : dsize,
254
+ trecs[tkey] = {'ip' : ip, 'dsid' : dsid, 'date' : cdate, 'time' : time, 'size' : dsize,
246
255
  'fcount' : 1, 'method' : method, 'etype' : etype, 'engine' : engine,
247
256
  'org_type' : wurec['org_type'], 'country' : wurec['country'],
248
257
  'email' : wurec['email']}
@@ -252,7 +261,7 @@ def fill_cdg_usages(dsids, dranges):
252
261
  if not fsize: fsize = pgrec['logic_file_size']
253
262
  method = 'CDP'
254
263
  if pgrec['subset_file_size'] or pgrec['range_request'] or dsize < fsize:
255
- wkey = "{}:{}:{}".format(ip, rdaid, wfile)
264
+ wkey = "{}:{}:{}".format(ip, dsid, wfile)
256
265
  else:
257
266
  wkey = None
258
267
 
@@ -261,9 +270,7 @@ def fill_cdg_usages(dsids, dranges):
261
270
  wrec['size'] += dsize
262
271
  continue
263
272
  wcnt += add_webfile_usage(year, wrec)
264
- wurec = get_wuser_record(ip, cdate, skipwuid = False)
265
- if not wurec: return 0
266
- wrec = {'ip' : ip, 'dsid' : wfrec['dsid'], 'wid' : wfrec['wid'], 'date' : date,
273
+ wrec = {'ip' : ip, 'dsid' : dsid, 'wid' : wfrec['wid'], 'date' : date,
267
274
  'time' : time, 'quarter' : quarter, 'size' : dsize,
268
275
  'locflag' : 'C', 'method' : method}
269
276
  pwkey = wkey
@@ -278,9 +285,6 @@ def fill_cdg_usages(dsids, dranges):
278
285
  allcnt += pgcnt
279
286
  PgLOG.pglog("{}/{} TDS/WEB usage records added for {} CDG entries at {}".format(atcnt, awcnt, allcnt, PgLOG.current_datetime()), PgLOG.LOGWRN)
280
287
 
281
- if lcnt > 1: PgLOG.pglog("{}/{} TDS/WEB usage records added for {} CDG entries at {}".format(atcnt, awcnt, allcnt, PgLOG.current_datetime()), PgLOG.LOGWRN)
282
-
283
-
284
288
  def get_record_date_time(ctime):
285
289
 
286
290
  ms = re.search(r'^(\d+)/(\w+)/(\d+) (\d+:\d+:\d+)(\.|$)', str(ctime))
@@ -309,33 +313,43 @@ def add_tdsusage_records(year, records, date):
309
313
 
310
314
  return cnt
311
315
 
312
- def add_tds_allusage(year, pgrec):
316
+ def add_tds_allusage(year, logrec):
313
317
 
314
- record = {'method' : 'CDP', 'source' : 'C'}
315
-
316
- for fld in pgrec:
317
- if re.match(r'^(engine|method|etype|fcount)$', fld): continue
318
- record[fld] = pgrec[fld]
318
+ pgrec = {'method' : 'CDP', 'source' : 'C'}
319
+ pgrec['email'] = logrec['email']
320
+ pgrec['org_type'] = logrec['org_type']
321
+ pgrec['country'] = logrec['country']
322
+ pgrec['dsid'] = logrec['dsid']
323
+ pgrec['date'] = logrec['date']
324
+ pgrec['quarter'] = logrec['quarter']
325
+ pgrec['time'] = logrec['time']
326
+ pgrec['size'] = logrec['size']
327
+ pgrec['ip'] = logrec['ip']
328
+ return PgDBI.add_yearly_allusage(year, pgrec)
319
329
 
320
- return PgDBI.add_yearly_allusage(year, record)
321
330
 
322
331
  #
323
332
  # Fill usage of a single online data file into table dssdb.wusage of DSS PgSQL database
324
333
  #
325
- def add_webfile_usage(year, logrec, wurec):
334
+ def add_webfile_usage(year, logrec):
326
335
 
327
336
  table = "{}_{}".format(USAGE['WEBTBL'], year)
328
- cond = "wid = {} AND method = '{}' AND date_read = '{}' AND time_read = '{}'".format(logrec['wid'], logrec['method'], logrec['date'], logrec['time'])
337
+ cdate = logrec['date']
338
+ ip = logrec['ip']
339
+ cond = "wid = {} AND method = '{}' AND date_read = '{}' AND time_read = '{}'".format(logrec['wid'], logrec['method'], cdate, logrec['time'])
329
340
  if PgDBI.pgget(table, "", cond, PgLOG.LOGWRN): return 0
330
341
 
342
+ wurec = get_wuser_record(ip, cdate, False)
343
+ if not wurec: return 0
344
+
331
345
  record = {'wid' : logrec['wid'], 'dsid' : logrec['dsid']}
332
346
  record['wuid_read'] = wurec['wuid']
333
- record['date_read'] = logrec['date']
347
+ record['date_read'] = cdate
334
348
  record['time_read'] = logrec['time']
335
349
  record['size_read'] = logrec['size']
336
350
  record['method'] = logrec['method']
337
351
  record['locflag'] = logrec['locflag']
338
- record['ip'] = logrec['ip']
352
+ record['ip'] = ip
339
353
  record['quarter'] = logrec['quarter']
340
354
 
341
355
  if add_web_allusage(year, logrec, wurec):
@@ -345,7 +359,10 @@ def add_webfile_usage(year, logrec, wurec):
345
359
 
346
360
  def add_web_allusage(year, logrec, wurec):
347
361
 
348
- pgrec = {'email' : wurec['email'], 'org_type' : wurec['org_type'], 'country' : wurec['country']}
362
+ pgrec = {'source' : 'C'}
363
+ pgrec['email'] = wurec['email']
364
+ pgrec['org_type'] = wurec['org_type']
365
+ pgrec['country'] = wurec['country']
349
366
  pgrec['dsid'] = logrec['dsid']
350
367
  pgrec['date'] = logrec['date']
351
368
  pgrec['quarter'] = logrec['quarter']
@@ -353,7 +370,6 @@ def add_web_allusage(year, logrec, wurec):
353
370
  pgrec['size'] = logrec['size']
354
371
  pgrec['method'] = logrec['method']
355
372
  pgrec['ip'] = logrec['ip']
356
- pgrec['source'] = 'C'
357
373
  return PgDBI.add_yearly_allusage(year, pgrec)
358
374
 
359
375
  #
@@ -364,7 +380,7 @@ def get_wfile_record(dsids, wfile):
364
380
  for dsid in dsids:
365
381
  wkey = "{}{}".format(dsid, wfile)
366
382
  if wkey in WFILES: return WFILES[wkey]
367
- wfcond = "wfile like '%{}'".format(wfile)
383
+ wfcond = "wfile LIKE '%{}'".format(wfile)
368
384
  pgrec = None
369
385
  for dsid in dsids:
370
386
  pgrec = PgSplit.pgget_wfile(dsid, "wid", wfcond)
@@ -382,11 +398,13 @@ def get_wfile_record(dsids, wfile):
382
398
  pgrec = PgSplit.pgget_wfile(mvrec['dsid'], "wid", "wid = {}".format(pgrec['wid']))
383
399
  if pgrec: pgrec['dsid'] = mvrec['dsid']
384
400
 
385
- if pgrec: WFILES[wkey] = pgrec
401
+ if pgrec:
402
+ wkey = "{}{}".format(pgrec['dsid'], wfile)
403
+ WFILES[wkey] = pgrec
386
404
  return pgrec
387
405
 
388
406
  # return wuser record upon success, None otherwise
389
- def get_wuser_record(ip, date, skipwuid = True):
407
+ def get_wuser_record(ip, date = None, skipwuid = True):
390
408
 
391
409
  if ip in WUSERS: return WUSERS[ip]
392
410
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: rda_python_metrics
3
- Version: 1.0.10
3
+ Version: 1.0.11
4
4
  Summary: RDA Python Package to gather and view data usage metrics
5
5
  Author-email: Zaihua Ji <zji@ucar.edu>
6
6
  Project-URL: Homepage, https://github.com/NCAR/rda-python-metrics
@@ -3,7 +3,7 @@ rda_python_metrics/PgView.py,sha256=r6otb3DjfiaQJdg0z8bZQAOlhr4JnrXJzp9wgWh_8qQ,
3
3
  rda_python_metrics/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
4
4
  rda_python_metrics/fillawsusage.py,sha256=dpI3-EFINJECdCSUOm37A97CJNIxOS2nYwA0fB0IpHE,9055
5
5
  rda_python_metrics/fillawsusage.usg,sha256=pD_nYTfg1so9nvVEyPSWdgKvb9gWdtfHJAs3RsT2MMU,609
6
- rda_python_metrics/fillcdgusage.py,sha256=m4zXnx5MaCP-GfNVDRfTOKMlVrcWaFNqwtJeGW7W8Hg,14892
6
+ rda_python_metrics/fillcdgusage.py,sha256=D4PhdVyGoISSr0ykNIpmHVMpEAfRZQB9BvaXdYMcGno,15186
7
7
  rda_python_metrics/fillcdgusage.usg,sha256=5lkd4Zdi72nQcha-JtbaLnxl66V4wBDykKwQtUOtMrw,667
8
8
  rda_python_metrics/fillcodusage.py,sha256=Hp3VrlIqHBWRI6Zjbi0nxWZPNvPrKsGBSZ1L9qA9_y0,8006
9
9
  rda_python_metrics/fillcodusage.usg,sha256=3B5IkQ4uwylqY8uEfUdnZ_MXqhYudeylMp5ulhUGXH8,678
@@ -44,9 +44,9 @@ rda_python_metrics/viewwebfile.py,sha256=HSMNkQQawonu6W3blV7g9UbJuNy9VAOn9COqgmj
44
44
  rda_python_metrics/viewwebfile.usg,sha256=lTNi8Yu8BUJuExEDJX-vsJyWUSUIQTS-DiiBEVFo33s,10054
45
45
  rda_python_metrics/viewwebusage.py,sha256=jhoHuRPVNtp7Lbjv0l-Jy_vp2p2nWQC7IVZ0P4JUJ4I,16657
46
46
  rda_python_metrics/viewwebusage.usg,sha256=IsT72v22xyZf7ng_IodVs0dLTsH1Q4BtFvT-gs0-xJY,9946
47
- rda_python_metrics-1.0.10.dist-info/licenses/LICENSE,sha256=1dck4EAQwv8QweDWCXDx-4Or0S8YwiCstaso_H57Pno,1097
48
- rda_python_metrics-1.0.10.dist-info/METADATA,sha256=LUru8PysvjXZNqq6kZzFGpMBReGYJtLwXpqFKB5X6hQ,736
49
- rda_python_metrics-1.0.10.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
50
- rda_python_metrics-1.0.10.dist-info/entry_points.txt,sha256=YfFLzlE3rdufSV471VsDnfYptnt1lR08aSrxPXlKqlY,1185
51
- rda_python_metrics-1.0.10.dist-info/top_level.txt,sha256=aoBgbR_o70TP0QmMW0U6inRHYtfKld47OBmnWnLnDOs,19
52
- rda_python_metrics-1.0.10.dist-info/RECORD,,
47
+ rda_python_metrics-1.0.11.dist-info/licenses/LICENSE,sha256=1dck4EAQwv8QweDWCXDx-4Or0S8YwiCstaso_H57Pno,1097
48
+ rda_python_metrics-1.0.11.dist-info/METADATA,sha256=2D84_3Vy1MxxzM--KjIjJmFitL4QJ0JwkuigW0CErik,736
49
+ rda_python_metrics-1.0.11.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
50
+ rda_python_metrics-1.0.11.dist-info/entry_points.txt,sha256=YfFLzlE3rdufSV471VsDnfYptnt1lR08aSrxPXlKqlY,1185
51
+ rda_python_metrics-1.0.11.dist-info/top_level.txt,sha256=aoBgbR_o70TP0QmMW0U6inRHYtfKld47OBmnWnLnDOs,19
52
+ rda_python_metrics-1.0.11.dist-info/RECORD,,