rda-python-metrics 1.0.11__py3-none-any.whl → 1.0.14__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of rda-python-metrics might be problematic. Click here for more details.
- rda_python_metrics/fillcdgusage.py +35 -21
- {rda_python_metrics-1.0.11.dist-info → rda_python_metrics-1.0.14.dist-info}/METADATA +1 -1
- {rda_python_metrics-1.0.11.dist-info → rda_python_metrics-1.0.14.dist-info}/RECORD +7 -7
- {rda_python_metrics-1.0.11.dist-info → rda_python_metrics-1.0.14.dist-info}/WHEEL +1 -1
- {rda_python_metrics-1.0.11.dist-info → rda_python_metrics-1.0.14.dist-info}/entry_points.txt +0 -0
- {rda_python_metrics-1.0.11.dist-info → rda_python_metrics-1.0.14.dist-info}/licenses/LICENSE +0 -0
- {rda_python_metrics-1.0.11.dist-info → rda_python_metrics-1.0.14.dist-info}/top_level.txt +0 -0
|
@@ -16,6 +16,7 @@ import sys
|
|
|
16
16
|
import re
|
|
17
17
|
import glob
|
|
18
18
|
from os import path as op
|
|
19
|
+
from time import time as tm
|
|
19
20
|
from rda_python_common import PgLOG
|
|
20
21
|
from rda_python_common import PgUtil
|
|
21
22
|
from rda_python_common import PgFile
|
|
@@ -116,13 +117,18 @@ def get_dataset_ids(dsnames):
|
|
|
116
117
|
if dsname not in DSIDS:
|
|
117
118
|
PgLOG.pglog(dsname + ": Unknown CDG dataset short name", PgLOG.LOGWRN)
|
|
118
119
|
continue
|
|
120
|
+
bt = tm()
|
|
119
121
|
pgrec = PgDBI.pgget(tbname, 'id', "short_name = '{}'".format(dsname))
|
|
120
122
|
if not (pgrec and pgrec['id']): continue
|
|
121
123
|
rdaids = DSIDS[dsname]
|
|
124
|
+
strids = "{}-{}".format(dsname, rdaids)
|
|
122
125
|
cdgid = pgrec['id']
|
|
123
126
|
cdgids = [cdgid]
|
|
124
|
-
|
|
125
|
-
|
|
127
|
+
ccnt = 1
|
|
128
|
+
ccnt += recursive_dataset_ids(cdgid, cdgids)
|
|
129
|
+
dsids.append([dsname, rdaids, cdgids, strids])
|
|
130
|
+
rmsg = PgLOG.seconds_to_string_time(tm() - bt)
|
|
131
|
+
PgLOG.pglog("{}: Found {} CDG dsid/subdsids in {} at {}".format(strids, ccnt, rmsg, PgLOG.current_datetime()), PgLOG.LOGWRN)
|
|
126
132
|
|
|
127
133
|
if not dsids: PgLOG.pglog("No Dataset Id identified to gather CDG metrics", PgLOG.LOGWRN)
|
|
128
134
|
|
|
@@ -135,12 +141,16 @@ def recursive_dataset_ids(pcdgid, cdgids):
|
|
|
135
141
|
|
|
136
142
|
tbname = 'metadata.dataset'
|
|
137
143
|
pgrecs = PgDBI.pgmget(tbname, 'id', "parent_dataset_id = '{}'".format(pcdgid))
|
|
138
|
-
if not pgrecs: return
|
|
144
|
+
if not pgrecs: return 0
|
|
139
145
|
|
|
146
|
+
ccnt = 0
|
|
140
147
|
for cdgid in pgrecs['id']:
|
|
141
148
|
if cdgid in cdgids: continue
|
|
142
149
|
cdgids.append(cdgid)
|
|
143
|
-
|
|
150
|
+
ccnt += 1
|
|
151
|
+
ccnt += recursive_dataset_ids(cdgid, cdgids)
|
|
152
|
+
|
|
153
|
+
return ccnt
|
|
144
154
|
|
|
145
155
|
#
|
|
146
156
|
# get the date ranges for given condition
|
|
@@ -159,17 +169,17 @@ def get_date_ranges(inputs):
|
|
|
159
169
|
tms = input.split('-')
|
|
160
170
|
dates.append(PgUtil.fmtdate(int(tms[0]), int(tms[1]), 1))
|
|
161
171
|
dates.append(PgUtil.enddate(dates[0], 0, 'M'))
|
|
162
|
-
elif opt == '
|
|
172
|
+
elif opt == 'y':
|
|
163
173
|
dates.append(input + "-01-01")
|
|
164
174
|
dates.append(input + "-12-31")
|
|
165
|
-
dranges.append(dates)
|
|
175
|
+
if dates: dranges.append(dates)
|
|
166
176
|
|
|
167
177
|
return dranges
|
|
168
178
|
|
|
169
179
|
#
|
|
170
180
|
# get file download records for given dsid
|
|
171
181
|
#
|
|
172
|
-
def get_dsid_records(cdgids, dates,
|
|
182
|
+
def get_dsid_records(cdgids, dates, strids):
|
|
173
183
|
|
|
174
184
|
gdex_dbname()
|
|
175
185
|
tbname = 'metrics.file_download'
|
|
@@ -181,9 +191,9 @@ def get_dsid_records(cdgids, dates, srdaid):
|
|
|
181
191
|
dscnd += "= '{}'".format(cdgids[0])
|
|
182
192
|
else:
|
|
183
193
|
dscnd += "IN ('" + "','".join(cdgids) + "')"
|
|
184
|
-
dtcnd = "date_completed BETWEEN '{}' AND '{}'".format(dates[0], dates[1])
|
|
194
|
+
dtcnd = "date_completed BETWEEN '{} 00:00:00' AND '{} 23:59:59'".format(dates[0], dates[1])
|
|
185
195
|
cond = "{} AND completed = True AND {} ORDER BY date_completed".format(dscnd, dtcnd)
|
|
186
|
-
PgLOG.pglog("{}: CDG
|
|
196
|
+
PgLOG.pglog("{}: Query for {} CDG dsid/subdsids Completed between {} and {} at {}".format(strids, dscnt, dates[0], dates[1], PgLOG.current_datetime()), PgLOG.LOGWRN)
|
|
187
197
|
pgrecs = PgDBI.pgmget(tbname, fields, cond)
|
|
188
198
|
PgDBI.dssdb_dbname()
|
|
189
199
|
|
|
@@ -201,21 +211,24 @@ def fill_cdg_usages(dsids, dranges):
|
|
|
201
211
|
dsname = dsid[0]
|
|
202
212
|
rdaids = dsid[1]
|
|
203
213
|
cdgids = dsid[2]
|
|
204
|
-
|
|
205
|
-
|
|
214
|
+
strids = dsid[3]
|
|
215
|
+
bt = tm()
|
|
216
|
+
pgrecs = get_dsid_records(cdgids, dates, strids)
|
|
206
217
|
pgcnt = len(pgrecs['dataset_file_name']) if pgrecs else 0
|
|
207
218
|
if pgcnt == 0:
|
|
208
|
-
PgLOG.pglog("{}: No record found to gather CDG usage between {} and {}".format(
|
|
219
|
+
PgLOG.pglog("{}: No record found to gather CDG usage between {} and {}".format(strids, dates[0], dates[1]), PgLOG.LOGWRN)
|
|
209
220
|
continue
|
|
210
|
-
|
|
221
|
+
rmsg = PgLOG.seconds_to_string_time(tm() - bt)
|
|
222
|
+
PgLOG.pglog("{}: Got {} records in {} for processing CDG usage at {}".format(strids, pgcnt, rmsg, PgLOG.current_datetime()), PgLOG.LOGWRN)
|
|
211
223
|
tcnt = wcnt = 0
|
|
212
224
|
pwkey = wrec = cdate = None
|
|
213
225
|
trecs = {}
|
|
226
|
+
bt = tm()
|
|
214
227
|
for i in range(pgcnt):
|
|
215
228
|
if (i+1)%20000 == 0:
|
|
216
229
|
PgLOG.pglog("{}/{}/{} CDG/TDS/WEB records processed to add".format(i, tcnt, wcnt), PgLOG.WARNLG)
|
|
217
230
|
|
|
218
|
-
pgrec = PgUtil.onerecord(
|
|
231
|
+
pgrec = PgUtil.onerecord(pgrecs, i)
|
|
219
232
|
dsize = pgrec['bytes_sent']
|
|
220
233
|
if not dsize: continue
|
|
221
234
|
(year, quarter, date, time) = get_record_date_time(pgrec['date_completed'])
|
|
@@ -283,20 +296,21 @@ def fill_cdg_usages(dsids, dranges):
|
|
|
283
296
|
atcnt += tcnt
|
|
284
297
|
awcnt += wcnt
|
|
285
298
|
allcnt += pgcnt
|
|
286
|
-
|
|
299
|
+
rmsg = PgLOG.seconds_to_string_time(tm() - bt)
|
|
300
|
+
PgLOG.pglog("{}: {}/{} TDS/WEB usage records added for {} CDG entries in {}".format(strids, atcnt, awcnt, allcnt, rmsg), PgLOG.LOGWRN)
|
|
287
301
|
|
|
288
302
|
def get_record_date_time(ctime):
|
|
289
303
|
|
|
290
|
-
ms = re.search(r'^(\d+)
|
|
304
|
+
ms = re.search(r'^(\d+)-(\d+)-(\d+) (\d\d:\d\d:\d\d)', str(ctime))
|
|
291
305
|
if ms:
|
|
292
|
-
|
|
293
|
-
m =
|
|
306
|
+
y = ms.group(1)
|
|
307
|
+
m = int(ms.group(2))
|
|
308
|
+
d = ms.group(3)
|
|
294
309
|
q = 1 + int((m-1)/3)
|
|
295
|
-
y = ms.group(3)
|
|
296
310
|
t = ms.group(4)
|
|
297
|
-
return (y, q, "{}-{:02}-{
|
|
311
|
+
return (y, q, "{}-{:02}-{}".format(y, m, d), t)
|
|
298
312
|
else:
|
|
299
|
-
PgLOG.pglog("
|
|
313
|
+
PgLOG.pglog(str(ctime) + ": Invalid time format", PgLOG.LGEREX)
|
|
300
314
|
|
|
301
315
|
def add_tdsusage_records(year, records, date):
|
|
302
316
|
|
|
@@ -3,7 +3,7 @@ rda_python_metrics/PgView.py,sha256=r6otb3DjfiaQJdg0z8bZQAOlhr4JnrXJzp9wgWh_8qQ,
|
|
|
3
3
|
rda_python_metrics/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
|
|
4
4
|
rda_python_metrics/fillawsusage.py,sha256=dpI3-EFINJECdCSUOm37A97CJNIxOS2nYwA0fB0IpHE,9055
|
|
5
5
|
rda_python_metrics/fillawsusage.usg,sha256=pD_nYTfg1so9nvVEyPSWdgKvb9gWdtfHJAs3RsT2MMU,609
|
|
6
|
-
rda_python_metrics/fillcdgusage.py,sha256=
|
|
6
|
+
rda_python_metrics/fillcdgusage.py,sha256=jMAE7qFreYKqVw5XFXdGeBiLGGbdkmVQopPffuLQueg,15773
|
|
7
7
|
rda_python_metrics/fillcdgusage.usg,sha256=5lkd4Zdi72nQcha-JtbaLnxl66V4wBDykKwQtUOtMrw,667
|
|
8
8
|
rda_python_metrics/fillcodusage.py,sha256=Hp3VrlIqHBWRI6Zjbi0nxWZPNvPrKsGBSZ1L9qA9_y0,8006
|
|
9
9
|
rda_python_metrics/fillcodusage.usg,sha256=3B5IkQ4uwylqY8uEfUdnZ_MXqhYudeylMp5ulhUGXH8,678
|
|
@@ -44,9 +44,9 @@ rda_python_metrics/viewwebfile.py,sha256=HSMNkQQawonu6W3blV7g9UbJuNy9VAOn9COqgmj
|
|
|
44
44
|
rda_python_metrics/viewwebfile.usg,sha256=lTNi8Yu8BUJuExEDJX-vsJyWUSUIQTS-DiiBEVFo33s,10054
|
|
45
45
|
rda_python_metrics/viewwebusage.py,sha256=jhoHuRPVNtp7Lbjv0l-Jy_vp2p2nWQC7IVZ0P4JUJ4I,16657
|
|
46
46
|
rda_python_metrics/viewwebusage.usg,sha256=IsT72v22xyZf7ng_IodVs0dLTsH1Q4BtFvT-gs0-xJY,9946
|
|
47
|
-
rda_python_metrics-1.0.
|
|
48
|
-
rda_python_metrics-1.0.
|
|
49
|
-
rda_python_metrics-1.0.
|
|
50
|
-
rda_python_metrics-1.0.
|
|
51
|
-
rda_python_metrics-1.0.
|
|
52
|
-
rda_python_metrics-1.0.
|
|
47
|
+
rda_python_metrics-1.0.14.dist-info/licenses/LICENSE,sha256=1dck4EAQwv8QweDWCXDx-4Or0S8YwiCstaso_H57Pno,1097
|
|
48
|
+
rda_python_metrics-1.0.14.dist-info/METADATA,sha256=tsmYaJjL-cWMvnpC8_WI-4IndMKtidtW9695gUlprBM,736
|
|
49
|
+
rda_python_metrics-1.0.14.dist-info/WHEEL,sha256=lTU6B6eIfYoiQJTZNc-fyaR6BpL6ehTzU3xGYxn2n8k,91
|
|
50
|
+
rda_python_metrics-1.0.14.dist-info/entry_points.txt,sha256=YfFLzlE3rdufSV471VsDnfYptnt1lR08aSrxPXlKqlY,1185
|
|
51
|
+
rda_python_metrics-1.0.14.dist-info/top_level.txt,sha256=aoBgbR_o70TP0QmMW0U6inRHYtfKld47OBmnWnLnDOs,19
|
|
52
|
+
rda_python_metrics-1.0.14.dist-info/RECORD,,
|
{rda_python_metrics-1.0.11.dist-info → rda_python_metrics-1.0.14.dist-info}/entry_points.txt
RENAMED
|
File without changes
|
{rda_python_metrics-1.0.11.dist-info → rda_python_metrics-1.0.14.dist-info}/licenses/LICENSE
RENAMED
|
File without changes
|
|
File without changes
|