rda-python-metrics 1.0.17__py3-none-any.whl → 1.0.20__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of rda-python-metrics might be problematic. Click here for more details.
- rda_python_metrics/PgIPInfo.py +117 -9
- rda_python_metrics/fillawsusage.py +3 -31
- rda_python_metrics/fillcdgusage.py +22 -40
- rda_python_metrics/fillcodusage.py +10 -3
- rda_python_metrics/fillglobususage.py +3 -31
- rda_python_metrics/fillipinfo.py +50 -19
- rda_python_metrics/fillipinfo.usg +1 -1
- rda_python_metrics/filloneorder.py +1 -1
- rda_python_metrics/fillosdfusage.py +3 -31
- rda_python_metrics/filltdsusage.py +8 -7
- rda_python_metrics/viewallusage.py +21 -18
- rda_python_metrics/viewallusage.usg +25 -10
- rda_python_metrics/viewcodusage.py +18 -15
- rda_python_metrics/viewcodusage.usg +6 -3
- rda_python_metrics/viewordusage.py +8 -7
- rda_python_metrics/viewordusage.usg +8 -9
- rda_python_metrics/viewtdsusage.py +17 -14
- rda_python_metrics/viewtdsusage.usg +9 -6
- rda_python_metrics/viewwebusage.py +5 -5
- rda_python_metrics/viewwebusage.usg +3 -3
- {rda_python_metrics-1.0.17.dist-info → rda_python_metrics-1.0.20.dist-info}/METADATA +2 -1
- {rda_python_metrics-1.0.17.dist-info → rda_python_metrics-1.0.20.dist-info}/RECORD +26 -26
- {rda_python_metrics-1.0.17.dist-info → rda_python_metrics-1.0.20.dist-info}/WHEEL +1 -1
- {rda_python_metrics-1.0.17.dist-info → rda_python_metrics-1.0.20.dist-info}/entry_points.txt +0 -0
- {rda_python_metrics-1.0.17.dist-info → rda_python_metrics-1.0.20.dist-info}/licenses/LICENSE +0 -0
- {rda_python_metrics-1.0.17.dist-info → rda_python_metrics-1.0.20.dist-info}/top_level.txt +0 -0
rda_python_metrics/PgIPInfo.py
CHANGED
|
@@ -13,9 +13,12 @@
|
|
|
13
13
|
#
|
|
14
14
|
###############################################################################
|
|
15
15
|
#
|
|
16
|
+
import re
|
|
16
17
|
import geoip2.database as geodb
|
|
17
18
|
import ipinfo
|
|
18
19
|
import socket
|
|
20
|
+
import dns.resolver
|
|
21
|
+
import json
|
|
19
22
|
from rda_python_common import PgLOG
|
|
20
23
|
from rda_python_common import PgDBI
|
|
21
24
|
from rda_python_common import PgUtil
|
|
@@ -28,11 +31,42 @@ IPINFO = {
|
|
|
28
31
|
'IPADD' : 0
|
|
29
32
|
}
|
|
30
33
|
|
|
34
|
+
IPDNS = None
|
|
31
35
|
IPDB = None
|
|
32
36
|
G2DB = None
|
|
33
37
|
IPRECS = {}
|
|
34
38
|
COUNTRIES = {}
|
|
35
39
|
|
|
40
|
+
#
|
|
41
|
+
# get save a global dns.resolver.Resolver object
|
|
42
|
+
#
|
|
43
|
+
def get_dns_resolver(forceget = False):
|
|
44
|
+
|
|
45
|
+
global IPDNS
|
|
46
|
+
|
|
47
|
+
if forceget or not IPDNS: IPDNS = dns.resolver.Resolver()
|
|
48
|
+
|
|
49
|
+
return IPDNS
|
|
50
|
+
|
|
51
|
+
#
|
|
52
|
+
# Resolve a domain name to an IP address (A record)
|
|
53
|
+
#
|
|
54
|
+
def dns_to_ip(dmname, type = 'A'):
|
|
55
|
+
|
|
56
|
+
ipdns = get_dns_resolver()
|
|
57
|
+
|
|
58
|
+
try:
|
|
59
|
+
answers = ipdns.resolve(dmname, type)
|
|
60
|
+
return [str(rdata) for rdata in answers]
|
|
61
|
+
except dns.resolver.NXDOMAIN:
|
|
62
|
+
PgLOG.pglog(f"{dmname}: the domain name does not exist", PgLOG.LOGERR)
|
|
63
|
+
except dns.resolver.Timeout:
|
|
64
|
+
PgLOG.pglog(f"{dmname}: the domain name request timed out", PgLOG.LOGERR)
|
|
65
|
+
except dns.exception.DNSException as e:
|
|
66
|
+
PgLOG.pglog(f"{dmname}: error domain name request: {e}", PgLOG.LOGERR)
|
|
67
|
+
|
|
68
|
+
return None
|
|
69
|
+
|
|
36
70
|
#
|
|
37
71
|
# Get country token name for given two-character domain id
|
|
38
72
|
#
|
|
@@ -59,6 +93,17 @@ def set_ipinfo_database():
|
|
|
59
93
|
except Exception as e:
|
|
60
94
|
PgLOG.pglog('ipinfo: ' + str(e), PgLOG.LGEREX)
|
|
61
95
|
|
|
96
|
+
#
|
|
97
|
+
# get a ipinfo record for given domain
|
|
98
|
+
#
|
|
99
|
+
def domain_ipinfo_record(dmname):
|
|
100
|
+
|
|
101
|
+
ips = dns_to_ip(dmname)
|
|
102
|
+
|
|
103
|
+
if ips: return set_ipinfo(ips[0])
|
|
104
|
+
|
|
105
|
+
return None
|
|
106
|
+
|
|
62
107
|
#
|
|
63
108
|
# get a ipinfo record for given ip address
|
|
64
109
|
#
|
|
@@ -67,14 +112,11 @@ def get_ipinfo_record(ip):
|
|
|
67
112
|
if not IPDB: set_ipinfo_database()
|
|
68
113
|
try:
|
|
69
114
|
iprec = IPDB.getDetails(ip).all
|
|
70
|
-
if 'hostname' not in iprec:
|
|
71
|
-
PgLOG.pglog("ipinfo: {} - ip address is not in the database".format(ip), PgLOG.LOGERR)
|
|
72
|
-
return None
|
|
73
115
|
except Exception as e:
|
|
74
116
|
PgLOG.pglog("ipinfo: {} - {}".format(ip, str(e)), PgLOG.LOGWRN)
|
|
75
117
|
return None
|
|
76
118
|
|
|
77
|
-
record = {'ip' : ip, 'stat_flag' : 'A', 'hostname' : ip}
|
|
119
|
+
record = {'ip' : ip, 'stat_flag' : 'A', 'hostname' : ip, 'org_type' : '-'}
|
|
78
120
|
if 'hostname' in iprec:
|
|
79
121
|
record['hostname'] = iprec['hostname']
|
|
80
122
|
record['org_type'] = PgDBI.get_org_type(None, record['hostname'])
|
|
@@ -82,9 +124,11 @@ def get_ipinfo_record(ip):
|
|
|
82
124
|
record['lon'] = float(iprec['longitude']) if iprec['longitude'] else 0
|
|
83
125
|
if 'org' in iprec: record['org_name'] = iprec['org']
|
|
84
126
|
record['country'] = get_country_record_code(iprec, 'country_name')
|
|
127
|
+
record['region'] = PgLOG.convert_chars(iprec['region']) if 'region' in iprec else None
|
|
85
128
|
if 'city' in iprec: record['city'] = PgLOG.convert_chars(iprec['city'])
|
|
86
129
|
if 'postal' in iprec: record['postal'] = iprec['postal']
|
|
87
130
|
record['timezone'] = iprec['timezone']
|
|
131
|
+
record['ipinfo'] = json.dumps(iprec)
|
|
88
132
|
|
|
89
133
|
return record
|
|
90
134
|
|
|
@@ -108,15 +152,16 @@ def get_geoip2_record(ip):
|
|
|
108
152
|
PgLOG.pglog("geoip2: {} - {}".format(ip, str(e)), PgLOG.LOGWRN)
|
|
109
153
|
return None
|
|
110
154
|
|
|
111
|
-
record = {'ip' : ip, 'stat_flag' : 'M'}
|
|
155
|
+
record = {'ip' : ip, 'stat_flag' : 'M', 'org_type' : '-'}
|
|
112
156
|
record['lat'] = float(city.location.latitude) if city.location.latitude else 0
|
|
113
157
|
record['lon'] = float(city.location.longitude) if city.location.longitude else 0
|
|
114
158
|
record['country'] = get_country_name_code(city.country.name)
|
|
115
159
|
record['city'] = PgLOG.convert_chars(city.city.name)
|
|
160
|
+
record['region'] = PgLOG.convert_chars(city.subdivisions.most_specific.name) if city.subdivisions.most_specific.name else None
|
|
116
161
|
record['postal'] = city.postal.code
|
|
117
162
|
record['timezone'] = city.location.time_zone
|
|
118
163
|
record['hostname'] = ip
|
|
119
|
-
record['
|
|
164
|
+
record['ipinfo'] = json.dumps(object_to_dict(city))
|
|
120
165
|
|
|
121
166
|
try:
|
|
122
167
|
hostrec = socket.gethostbyaddr(ip)
|
|
@@ -128,6 +173,20 @@ def get_geoip2_record(ip):
|
|
|
128
173
|
|
|
129
174
|
return record
|
|
130
175
|
|
|
176
|
+
#
|
|
177
|
+
# change an object to dict recursively
|
|
178
|
+
#
|
|
179
|
+
def object_to_dict(obj):
|
|
180
|
+
if hasattr(obj, "__dict__"):
|
|
181
|
+
result = {}
|
|
182
|
+
for key, value in obj.__dict__.items():
|
|
183
|
+
result[key] = object_to_dict(value)
|
|
184
|
+
return result
|
|
185
|
+
elif isinstance(obj, list):
|
|
186
|
+
return [object_to_dict(item) for item in obj]
|
|
187
|
+
else:
|
|
188
|
+
return obj
|
|
189
|
+
|
|
131
190
|
#
|
|
132
191
|
# update wuser.email for hostname changed
|
|
133
192
|
#
|
|
@@ -160,7 +219,7 @@ def update_ipinfo_record(record, pgrec = None):
|
|
|
160
219
|
# set ip info into table ipinfo from python module ipinfo
|
|
161
220
|
# if ipopt is True; otherwise, use module geoip2
|
|
162
221
|
#
|
|
163
|
-
def set_ipinfo(ip, ipopt =
|
|
222
|
+
def set_ipinfo(ip, ipopt = True):
|
|
164
223
|
|
|
165
224
|
if ip in IPRECS:
|
|
166
225
|
pgrec = IPRECS[ip]
|
|
@@ -169,8 +228,8 @@ def set_ipinfo(ip, ipopt = False):
|
|
|
169
228
|
pgrec = PgDBI.pgget('ipinfo', '*', "ip = '{}'".format(ip))
|
|
170
229
|
|
|
171
230
|
if not pgrec or ipopt and pgrec['stat_flag'] == 'M':
|
|
172
|
-
record =
|
|
173
|
-
if not
|
|
231
|
+
record = get_ipinfo_record(ip) if ipopt else None
|
|
232
|
+
if not record: record = get_geoip2_record(ip)
|
|
174
233
|
if record and update_ipinfo_record(record, pgrec): pgrec = record
|
|
175
234
|
|
|
176
235
|
IPRECS[ip] = pgrec
|
|
@@ -186,3 +245,52 @@ def get_update_record(nrec, orec):
|
|
|
186
245
|
if nrec[fld] != orec[fld]:
|
|
187
246
|
record[fld] = nrec[fld]
|
|
188
247
|
return record
|
|
248
|
+
|
|
249
|
+
#
|
|
250
|
+
# fill the missing info for given ip
|
|
251
|
+
#
|
|
252
|
+
def get_missing_ipinfo(ip, email = None):
|
|
253
|
+
|
|
254
|
+
if not ip:
|
|
255
|
+
if email and '@' in email: ip = dns_to_ip(email.split('@')[1])
|
|
256
|
+
if not ip: return None
|
|
257
|
+
|
|
258
|
+
ipinfo = set_ipinfo(ip)
|
|
259
|
+
if ipinfo:
|
|
260
|
+
record = {'org_type' : ipinfo['org_type'],
|
|
261
|
+
'country' : ipinfo['country'],
|
|
262
|
+
'region' : ipinfo['region']}
|
|
263
|
+
if not email or re.search(r'-$', email):
|
|
264
|
+
record['email'] = 'unknown@' + ipinfo['hostname']
|
|
265
|
+
else:
|
|
266
|
+
record['email'] = email
|
|
267
|
+
return record
|
|
268
|
+
else:
|
|
269
|
+
return None
|
|
270
|
+
|
|
271
|
+
|
|
272
|
+
# return wuser record upon success, None otherwise
|
|
273
|
+
def get_wuser_record(ip, date, email = None):
|
|
274
|
+
|
|
275
|
+
record = get_missing_ipinfo(ip, email)
|
|
276
|
+
if not record: return None
|
|
277
|
+
|
|
278
|
+
emcond = "email = '{}'".format(record['email'])
|
|
279
|
+
flds = 'wuid, email, org_type, country, region, start_date'
|
|
280
|
+
pgrec = PgDBI.pgget("wuser", flds, emcond, PgLOG.LOGERR)
|
|
281
|
+
if pgrec:
|
|
282
|
+
if PgUtil.diffdate(pgrec['start_date'], date) > 0:
|
|
283
|
+
pgrec['start_date'] = record['start_date'] = date
|
|
284
|
+
PgDBI.pgupdt('wuser', record, emcond)
|
|
285
|
+
return pgrec
|
|
286
|
+
|
|
287
|
+
# now add one in
|
|
288
|
+
record['stat_flag'] = 'A'
|
|
289
|
+
record['start_date'] = date
|
|
290
|
+
wuid = PgDBI.pgadd("wuser", record, PgLOG.LOGERR|PgLOG.AUTOID)
|
|
291
|
+
if wuid:
|
|
292
|
+
record['wuid'] = wuid
|
|
293
|
+
PgLOG.pglog("{} Added as wuid({})".format(email, wuid), PgLOG.LGWNEM)
|
|
294
|
+
return record
|
|
295
|
+
|
|
296
|
+
return None
|
|
@@ -201,7 +201,7 @@ def add_file_usage(year, logrec):
|
|
|
201
201
|
cond = "wid = {} AND method = '{}' AND date_read = '{}' AND time_read = '{}'".format(pgrec['wid'], logrec['method'], logrec['date'], logrec['time'])
|
|
202
202
|
if PgDBI.pgget(table, "", cond, PgLOG.LOGWRN): return 0
|
|
203
203
|
|
|
204
|
-
wurec = get_wuser_record(logrec['ip'], logrec['date'])
|
|
204
|
+
wurec = PgIPInfo.get_wuser_record(logrec['ip'], logrec['date'])
|
|
205
205
|
if not wurec: return 0
|
|
206
206
|
record = {'wid' : pgrec['wid'], 'dsid' : pgrec['dsid']}
|
|
207
207
|
record['wuid_read'] = wurec['wuid']
|
|
@@ -220,7 +220,8 @@ def add_file_usage(year, logrec):
|
|
|
220
220
|
|
|
221
221
|
def add_to_allusage(year, logrec, wurec):
|
|
222
222
|
|
|
223
|
-
pgrec = {'email' : wurec['email'], 'org_type' : wurec['org_type'],
|
|
223
|
+
pgrec = {'email' : wurec['email'], 'org_type' : wurec['org_type'],
|
|
224
|
+
'country' : wurec['country'], 'region' : wurec['region']}
|
|
224
225
|
pgrec['dsid'] = logrec['dsid']
|
|
225
226
|
pgrec['date'] = logrec['date']
|
|
226
227
|
pgrec['quarter'] = logrec['quarter']
|
|
@@ -247,35 +248,6 @@ def get_wfile_wid(dsid, wfile):
|
|
|
247
248
|
|
|
248
249
|
return pgrec
|
|
249
250
|
|
|
250
|
-
# return wuser record upon success, None otherwise
|
|
251
|
-
def get_wuser_record(ip, date):
|
|
252
|
-
|
|
253
|
-
ipinfo = PgIPInfo.set_ipinfo(ip)
|
|
254
|
-
if not ipinfo: return None
|
|
255
|
-
|
|
256
|
-
record = {'org_type' : ipinfo['org_type'], 'country' : ipinfo['country']}
|
|
257
|
-
email = 'unknown@' + ipinfo['hostname']
|
|
258
|
-
emcond = "email = '{}'".format(email)
|
|
259
|
-
flds = 'wuid, email, org_type, country, start_date'
|
|
260
|
-
pgrec = PgDBI.pgget("wuser", flds, emcond, PgLOG.LOGERR)
|
|
261
|
-
if pgrec:
|
|
262
|
-
if PgUtil.diffdate(pgrec['start_date'], date) > 0:
|
|
263
|
-
pgrec['start_date'] = record['start_date'] = date
|
|
264
|
-
PgDBI.pgupdt('wuser', record, emcond)
|
|
265
|
-
return pgrec
|
|
266
|
-
|
|
267
|
-
# now add one in
|
|
268
|
-
record['email'] = email
|
|
269
|
-
record['stat_flag'] = 'A'
|
|
270
|
-
record['start_date'] = date
|
|
271
|
-
wuid = PgDBI.pgadd("wuser", record, PgLOG.LOGERR|PgLOG.AUTOID)
|
|
272
|
-
if wuid:
|
|
273
|
-
record['wuid'] = wuid
|
|
274
|
-
PgLOG.pglog("{} Added as wuid({})".format(email, wuid), PgLOG.LGWNEM)
|
|
275
|
-
return record
|
|
276
|
-
|
|
277
|
-
return None
|
|
278
|
-
|
|
279
251
|
#
|
|
280
252
|
# call main() to start program
|
|
281
253
|
#
|
|
@@ -52,13 +52,26 @@ DSIDS = {
|
|
|
52
52
|
'ucar.cgd.ccsm4.SD-WACCM-X_v2.1' : ['d651034'],
|
|
53
53
|
'ucar.cgd.ccsm4.amv_lens' : ['d651035'],
|
|
54
54
|
'ucar.cgd.cesm2.cism_ismip6' : ['d651036'],
|
|
55
|
-
'ucar.cgd.ccsm4.pliomip2' : ['d651037']
|
|
55
|
+
'ucar.cgd.ccsm4.pliomip2' : ['d651037'],
|
|
56
|
+
# new added
|
|
57
|
+
'ucar.cgd.cesm2-waccm.s2s_hindcasts': ['d651040'],
|
|
58
|
+
'ucar.cgd.CESM1.3_SH_storm_tracks': ['d651044'],
|
|
59
|
+
'ucar.cgd.cesm2.waccm6.ssp245': ['d651045'],
|
|
60
|
+
'ucar.cgd.cesm2.CESM21-CISM2-JG-BG': ['d651046'],
|
|
61
|
+
'ucar.cgd.ccsm4.TC-CESM': ['d651047'],
|
|
62
|
+
'ucar.cgd.cesm2.ISSI_OSSE': ['d651048'],
|
|
63
|
+
'ucar.cgd.ccsm4.SOcean_Eddies_mclong': ['d651049'],
|
|
64
|
+
'trace': ['d651050'],
|
|
65
|
+
'ucar.cgd.cesm2.waccm.solar': ['d651051'],
|
|
66
|
+
'ucar.cgd.ccsm4.CESM1-CCSM4_mid-Pliocene' : ['d651042'],
|
|
67
|
+
'ucar.cgd.ccsm4.PaleoIF' : ['d651052'],
|
|
68
|
+
'ucar.cgd.ccsm4.b.e11.B20LE_fixedO3' : ['d651053'],
|
|
69
|
+
'ucar.cgd.cesm2.single.forcing.large.ensemble' : ['d651055']
|
|
56
70
|
}
|
|
57
71
|
|
|
58
72
|
ALLIDS = list(DSIDS.keys())
|
|
59
73
|
|
|
60
74
|
WFILES = {}
|
|
61
|
-
WUSERS = {}
|
|
62
75
|
|
|
63
76
|
#
|
|
64
77
|
# main function to run this program
|
|
@@ -262,12 +275,12 @@ def fill_cdg_usages(dsids, dranges):
|
|
|
262
275
|
trecs[tkey]['size'] += dsize
|
|
263
276
|
trecs[tkey]['fcount'] += 1
|
|
264
277
|
else:
|
|
265
|
-
|
|
266
|
-
if not
|
|
278
|
+
iprec = PgIPInfo.get_missing_ipinfo(ip)
|
|
279
|
+
if not iprec: continue
|
|
267
280
|
trecs[tkey] = {'ip' : ip, 'dsid' : dsid, 'date' : cdate, 'time' : time, 'quarter' : quarter,
|
|
268
281
|
'size' : dsize, 'fcount' : 1, 'method' : method, 'etype' : etype,
|
|
269
|
-
'engine' : engine, 'org_type' :
|
|
270
|
-
'email' :
|
|
282
|
+
'engine' : engine, 'org_type' : iprec['org_type'], 'country' : iprec['country'],
|
|
283
|
+
'region' : iprec['region'], 'email' : iprec['email']}
|
|
271
284
|
else:
|
|
272
285
|
# web usage
|
|
273
286
|
fsize = pgrec['dataset_file_size']
|
|
@@ -333,6 +346,7 @@ def add_tds_allusage(year, logrec):
|
|
|
333
346
|
pgrec['email'] = logrec['email']
|
|
334
347
|
pgrec['org_type'] = logrec['org_type']
|
|
335
348
|
pgrec['country'] = logrec['country']
|
|
349
|
+
pgrec['region'] = logrec['region']
|
|
336
350
|
pgrec['dsid'] = logrec['dsid']
|
|
337
351
|
pgrec['date'] = logrec['date']
|
|
338
352
|
pgrec['quarter'] = logrec['quarter']
|
|
@@ -353,7 +367,7 @@ def add_webfile_usage(year, logrec):
|
|
|
353
367
|
cond = "wid = {} AND method = '{}' AND date_read = '{}' AND time_read = '{}'".format(logrec['wid'], logrec['method'], cdate, logrec['time'])
|
|
354
368
|
if PgDBI.pgget(table, "", cond, PgLOG.LOGWRN): return 0
|
|
355
369
|
|
|
356
|
-
wurec = get_wuser_record(ip, cdate)
|
|
370
|
+
wurec = PgIPInfo.get_wuser_record(ip, cdate)
|
|
357
371
|
if not wurec: return 0
|
|
358
372
|
|
|
359
373
|
record = {'wid' : logrec['wid'], 'dsid' : logrec['dsid']}
|
|
@@ -377,6 +391,7 @@ def add_web_allusage(year, logrec, wurec):
|
|
|
377
391
|
pgrec['email'] = wurec['email']
|
|
378
392
|
pgrec['org_type'] = wurec['org_type']
|
|
379
393
|
pgrec['country'] = wurec['country']
|
|
394
|
+
pgrec['region'] = wurec['region']
|
|
380
395
|
pgrec['dsid'] = logrec['dsid']
|
|
381
396
|
pgrec['date'] = logrec['date']
|
|
382
397
|
pgrec['quarter'] = logrec['quarter']
|
|
@@ -417,39 +432,6 @@ def get_wfile_record(dsids, wfile):
|
|
|
417
432
|
WFILES[wkey] = pgrec
|
|
418
433
|
return pgrec
|
|
419
434
|
|
|
420
|
-
# return wuser record upon success, None otherwise
|
|
421
|
-
def get_wuser_record(ip, date = None):
|
|
422
|
-
|
|
423
|
-
if ip in WUSERS: return WUSERS[ip]
|
|
424
|
-
|
|
425
|
-
ipinfo = PgIPInfo.set_ipinfo(ip)
|
|
426
|
-
if not ipinfo: return None
|
|
427
|
-
|
|
428
|
-
record = {'org_type' : ipinfo['org_type'], 'country' : ipinfo['country']}
|
|
429
|
-
email = 'unknown@' + ipinfo['hostname']
|
|
430
|
-
emcond = "email = '{}'".format(email)
|
|
431
|
-
flds = 'wuid, email, org_type, country, start_date'
|
|
432
|
-
pgrec = PgDBI.pgget("wuser", flds, emcond, PgLOG.LOGERR)
|
|
433
|
-
if pgrec:
|
|
434
|
-
if date and PgUtil.diffdate(pgrec['start_date'], date) > 0:
|
|
435
|
-
pgrec['start_date'] = record['start_date'] = date
|
|
436
|
-
PgDBI.pgupdt('wuser', record, emcond)
|
|
437
|
-
WUSERS[ip] = pgrec
|
|
438
|
-
return pgrec
|
|
439
|
-
|
|
440
|
-
# now add one in
|
|
441
|
-
record['email'] = email
|
|
442
|
-
record['stat_flag'] = 'A'
|
|
443
|
-
record['start_date'] = date
|
|
444
|
-
wuid = PgDBI.pgadd("wuser", record, PgLOG.LOGERR|PgLOG.AUTOID)
|
|
445
|
-
if wuid:
|
|
446
|
-
record['wuid'] = wuid
|
|
447
|
-
PgLOG.pglog("{} Added as wuid({})".format(email, wuid), PgLOG.LGWNEM)
|
|
448
|
-
WUSERS[ip] = record
|
|
449
|
-
return record
|
|
450
|
-
|
|
451
|
-
return None
|
|
452
|
-
|
|
453
435
|
#
|
|
454
436
|
# call main() to start program
|
|
455
437
|
#
|
|
@@ -22,6 +22,7 @@ from rda_python_common import PgLOG
|
|
|
22
22
|
from rda_python_common import PgUtil
|
|
23
23
|
from rda_python_common import PgFile
|
|
24
24
|
from rda_python_common import PgDBI
|
|
25
|
+
from . import PgIPInfo
|
|
25
26
|
|
|
26
27
|
# the define options for gathering COD data usage, one at a time
|
|
27
28
|
MONTH = 0x02 # fet COD data usages for given months
|
|
@@ -182,14 +183,20 @@ def add_usage_records(records, date):
|
|
|
182
183
|
if PgDBI.pgget(USAGE['PGTBL'], '', "aid = '{}' AND date = '{}'".format(aid, date), PgLOG.LGEREX): continue
|
|
183
184
|
record = records[aid]
|
|
184
185
|
if record['email'] == '-':
|
|
185
|
-
|
|
186
|
+
wurec = PgIPInfo.get_wuser_record(record['ip'], date)
|
|
187
|
+
if not wurec: continue
|
|
188
|
+
record['org_type'] = wurec['org_type']
|
|
189
|
+
record['country'] = wurec['country']
|
|
190
|
+
record['region'] = wurec['region']
|
|
191
|
+
record['email'] = 'unknown@' + wurec['hostname']
|
|
186
192
|
else:
|
|
187
193
|
wuid = PgDBI.check_wuser_wuid(record['email'], date)
|
|
188
|
-
if not wuid:
|
|
189
|
-
pgrec = PgDBI.pgget("wuser", "org_type, country", "wuid = {}".format(wuid), PgLOG.LGWNEX)
|
|
194
|
+
if not wuid: continue
|
|
195
|
+
pgrec = PgDBI.pgget("wuser", "org_type, country, region", "wuid = {}".format(wuid), PgLOG.LGWNEX)
|
|
190
196
|
if not pgrec: continue
|
|
191
197
|
record['org_type'] = pgrec['org_type']
|
|
192
198
|
record['country'] = pgrec['country']
|
|
199
|
+
record['region'] = pgrec['region']
|
|
193
200
|
|
|
194
201
|
record['date'] = date
|
|
195
202
|
record['time'] = USERS[aid]['btime']
|
|
@@ -203,7 +203,7 @@ def add_file_usage(year, logrec):
|
|
|
203
203
|
cond = "wid = {} AND method = '{}' AND date_read = '{}' AND time_read = '{}'".format(pgrec['wid'], logrec['method'], logrec['date'], logrec['time'])
|
|
204
204
|
if PgDBI.pgget(table, "", cond, PgLOG.LOGWRN): return 0
|
|
205
205
|
|
|
206
|
-
wurec = get_wuser_record(logrec['ip'], logrec['date'])
|
|
206
|
+
wurec = PgIPInfo.get_wuser_record(logrec['ip'], logrec['date'])
|
|
207
207
|
if not wurec: return 0
|
|
208
208
|
record = {'wid' : pgrec['wid'], 'dsid' : pgrec['dsid']}
|
|
209
209
|
record['wuid_read'] = wurec['wuid']
|
|
@@ -222,7 +222,8 @@ def add_file_usage(year, logrec):
|
|
|
222
222
|
|
|
223
223
|
def add_to_allusage(year, logrec, wurec):
|
|
224
224
|
|
|
225
|
-
pgrec = {'email' : wurec['email'], 'org_type' : wurec['org_type'],
|
|
225
|
+
pgrec = {'email' : wurec['email'], 'org_type' : wurec['org_type'],
|
|
226
|
+
'country' : wurec['country'], 'region' : wurec['region']}
|
|
226
227
|
pgrec['dsid'] = logrec['dsid']
|
|
227
228
|
pgrec['date'] = logrec['date']
|
|
228
229
|
pgrec['quarter'] = logrec['quarter']
|
|
@@ -252,35 +253,6 @@ def get_wfile_wid(dsid, wfile):
|
|
|
252
253
|
|
|
253
254
|
return pgrec
|
|
254
255
|
|
|
255
|
-
# return wuser record upon success, None otherwise
|
|
256
|
-
def get_wuser_record(ip, date):
|
|
257
|
-
|
|
258
|
-
ipinfo = PgIPInfo.set_ipinfo(ip)
|
|
259
|
-
if not ipinfo: return None
|
|
260
|
-
|
|
261
|
-
record = {'org_type' : ipinfo['org_type'], 'country' : ipinfo['country']}
|
|
262
|
-
email = 'unknown@' + ipinfo['hostname']
|
|
263
|
-
emcond = "email = '{}'".format(email)
|
|
264
|
-
flds = 'wuid, email, org_type, country, start_date'
|
|
265
|
-
pgrec = PgDBI.pgget("wuser", flds, emcond, PgLOG.LOGERR)
|
|
266
|
-
if pgrec:
|
|
267
|
-
if PgUtil.diffdate(pgrec['start_date'], date) > 0:
|
|
268
|
-
pgrec['start_date'] = record['start_date'] = date
|
|
269
|
-
PgDBI.pgupdt('wuser', record, emcond)
|
|
270
|
-
return pgrec
|
|
271
|
-
|
|
272
|
-
# now add one in
|
|
273
|
-
record['email'] = email
|
|
274
|
-
record['stat_flag'] = 'A'
|
|
275
|
-
record['start_date'] = date
|
|
276
|
-
wuid = PgDBI.pgadd("wuser", record, PgLOG.LOGERR|PgLOG.AUTOID)
|
|
277
|
-
if wuid:
|
|
278
|
-
record['wuid'] = wuid
|
|
279
|
-
PgLOG.pglog("{} Added as wuid({})".format(email, wuid), PgLOG.LGWNEM)
|
|
280
|
-
return record
|
|
281
|
-
|
|
282
|
-
return None
|
|
283
|
-
|
|
284
256
|
#
|
|
285
257
|
# call main() to start program
|
|
286
258
|
#
|
rda_python_metrics/fillipinfo.py
CHANGED
|
@@ -32,7 +32,7 @@ MULTI = (MONTH|YEARS)
|
|
|
32
32
|
SINGL = (NDAYS)
|
|
33
33
|
|
|
34
34
|
IPINFO = {
|
|
35
|
-
'USGTBL' : ['ipinfo', 'allusage', 'tdsusage'],
|
|
35
|
+
'USGTBL' : ['ipinfo', 'wuser', 'allusage', 'codusage', 'tdsusage'],
|
|
36
36
|
'CDATE' : PgUtil.curdate(),
|
|
37
37
|
}
|
|
38
38
|
|
|
@@ -120,20 +120,14 @@ def fix_allusage_records(date):
|
|
|
120
120
|
ms = re.match(r'^(\d+)-', date)
|
|
121
121
|
year = ms.group(1)
|
|
122
122
|
table = 'allusage_' + year
|
|
123
|
-
cond = "date = '{}'
|
|
123
|
+
cond = "date = '{}' AND region IS NULL".format(date)
|
|
124
124
|
pgrecs = PgDBI.pgmget(table, 'aidx, email, ip', cond, PgLOG.LGEREX)
|
|
125
125
|
if not pgrecs: return 0
|
|
126
126
|
cnt = len(pgrecs['ip']) if pgrecs else 0
|
|
127
127
|
mcnt = 0
|
|
128
128
|
for i in range(cnt):
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
ipinfo = PgIPInfo.set_ipinfo(ip)
|
|
132
|
-
if ipinfo:
|
|
133
|
-
record = {'org_type' : ipinfo['org_type'],
|
|
134
|
-
'country' : ipinfo['country']}
|
|
135
|
-
if not email or re.search(r'-$', email):
|
|
136
|
-
record['email'] = 'unknown@' + ipinfo['hostname']
|
|
129
|
+
record = PgIPInfo.get_missing_ipinfo(pgrecs['ip'][i], pgrecs['email'][i])
|
|
130
|
+
if record:
|
|
137
131
|
mcnt += PgDBI.pgupdt(table, record, "aidx = '{}'".format(pgrecs['aidx'][i]))
|
|
138
132
|
|
|
139
133
|
s = 's' if cnt > 1 else ''
|
|
@@ -144,17 +138,16 @@ def fix_allusage_records(date):
|
|
|
144
138
|
def fix_tdsusage_records(date):
|
|
145
139
|
|
|
146
140
|
table = 'tdsusage'
|
|
147
|
-
cond = "date = '{}'
|
|
148
|
-
pgrecs = PgDBI.pgmget(table, 'time, ip', cond, PgLOG.LGEREX)
|
|
141
|
+
cond = "date = '{}' AND region IS NULL".format(date)
|
|
142
|
+
pgrecs = PgDBI.pgmget(table, 'time, email, ip', cond, PgLOG.LGEREX)
|
|
149
143
|
if not pgrecs: return 0
|
|
150
144
|
cnt = len(pgrecs['ip']) if pgrecs else 0
|
|
151
145
|
mcnt = 0
|
|
152
146
|
for i in range(cnt):
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
cond = "date = '{}' AND time = '{}' AND ip = '{}'".format(date, pgrecs['time'][i], pgrecs['ip'][i])
|
|
147
|
+
ip = pgrecs['ip'][i]
|
|
148
|
+
record = PgIPInfo.get_missing_ipinfo(ip, pgrecs['email'][i])
|
|
149
|
+
if record:
|
|
150
|
+
cond = "date = '{}' AND time = '{}' AND ip = '{}'".format(date, pgrecs['time'][i], ip)
|
|
158
151
|
mcnt += PgDBI.pgupdt(table, record, cond)
|
|
159
152
|
|
|
160
153
|
s = 's' if cnt > 1 else ''
|
|
@@ -162,16 +155,54 @@ def fix_tdsusage_records(date):
|
|
|
162
155
|
|
|
163
156
|
return mcnt
|
|
164
157
|
|
|
158
|
+
def fix_codusage_records(date):
|
|
159
|
+
|
|
160
|
+
table = 'codusage'
|
|
161
|
+
cond = "date = '{}' AND region IS NULL".format(date)
|
|
162
|
+
pgrecs = PgDBI.pgmget(table, 'codidx, email, ip', cond, PgLOG.LGEREX)
|
|
163
|
+
if not pgrecs: return 0
|
|
164
|
+
cnt = len(pgrecs['ip']) if pgrecs else 0
|
|
165
|
+
mcnt = 0
|
|
166
|
+
for i in range(cnt):
|
|
167
|
+
record = PgIPInfo.get_missing_ipinfo(pgrecs['ip'][i], pgrecs['email'][i])
|
|
168
|
+
if record:
|
|
169
|
+
mcnt += PgDBI.pgupdt(table, record, "codidx = '{}'".format(pgrecs['codidx'][i]))
|
|
170
|
+
|
|
171
|
+
s = 's' if cnt > 1 else ''
|
|
172
|
+
PgLOG.pglog("{}: {} of {} record{} updated for {}".format(table, mcnt, cnt, s, date), PgLOG.LOGWRN)
|
|
173
|
+
|
|
174
|
+
return mcnt
|
|
175
|
+
|
|
176
|
+
def fix_wuser_records(date):
|
|
177
|
+
|
|
178
|
+
table = 'wuser'
|
|
179
|
+
cond = "start_date = '{}' AND region IS NULL".format(date)
|
|
180
|
+
pgrecs = PgDBI.pgmget(table, 'wuid, email, ip', cond, PgLOG.LGEREX)
|
|
181
|
+
if not pgrecs: return 0
|
|
182
|
+
cnt = len(pgrecs['ip']) if pgrecs else 0
|
|
183
|
+
mcnt = 0
|
|
184
|
+
for i in range(cnt):
|
|
185
|
+
ip = pgrecs['ip'][i]
|
|
186
|
+
email = pgrecs['email'][i]
|
|
187
|
+
record = PgIPInfo.get_missing_ipinfo(ip, email)
|
|
188
|
+
if record:
|
|
189
|
+
mcnt += PgDBI.pgupdt(table, record, "wuid = '{}'".format(pgrecs['wuid'][i]))
|
|
190
|
+
|
|
191
|
+
s = 's' if cnt > 1 else ''
|
|
192
|
+
PgLOG.pglog("{}: {} of {} record{} updated for {}".format(table, mcnt, cnt, s, date), PgLOG.LOGWRN)
|
|
193
|
+
|
|
194
|
+
return mcnt
|
|
195
|
+
|
|
165
196
|
def fix_ipinfo_records(date):
|
|
166
197
|
|
|
167
198
|
table = 'ipinfo'
|
|
168
|
-
cond = "
|
|
199
|
+
cond = "date = '{}' AND region IS NULL".format(date)
|
|
169
200
|
pgrecs = PgDBI.pgmget(table, 'ip', cond, PgLOG.LGEREX)
|
|
170
201
|
if not pgrecs: return 0
|
|
171
202
|
cnt = len(pgrecs['ip']) if pgrecs else 0
|
|
172
203
|
mcnt = 0
|
|
173
204
|
for i in range(cnt):
|
|
174
|
-
PgIPInfo.set_ipinfo(pgrecs['ip'][i]
|
|
205
|
+
PgIPInfo.set_ipinfo(pgrecs['ip'][i])
|
|
175
206
|
|
|
176
207
|
mcnt = PgIPInfo.IPINFO['IPUPDT']
|
|
177
208
|
s = 's' if cnt > 1 else ''
|
|
@@ -7,7 +7,7 @@
|
|
|
7
7
|
|
|
8
8
|
- Option -b, log process information into logfile only;
|
|
9
9
|
|
|
10
|
-
- Option -t, table name, ipinfo,
|
|
10
|
+
- Option -t, table name, ipinfo, wuser, allusage, codusage or tdsusage,
|
|
11
11
|
to fix IP related information, such organization names/types,
|
|
12
12
|
emails and country names;
|
|
13
13
|
|
|
@@ -87,7 +87,7 @@ def add_one_order(params):
|
|
|
87
87
|
|
|
88
88
|
def add_to_allusage(record, year, ctime):
|
|
89
89
|
|
|
90
|
-
pgrec = PgDBI.pgget("wuser", "email, org_type, country",
|
|
90
|
+
pgrec = PgDBI.pgget("wuser", "email, org_type, country, region",
|
|
91
91
|
"wuid = {}".format(record['wuid_request']), PgLOG.LGWNEX)
|
|
92
92
|
if pgrec:
|
|
93
93
|
pgrec['dsid'] = record['dsid']
|
|
@@ -181,7 +181,7 @@ def add_file_usage(year, logrec):
|
|
|
181
181
|
cond = "wid = {} AND method = '{}' AND date_read = '{}' AND time_read = '{}'".format(pgrec['wid'], logrec['method'], logrec['date'], logrec['time'])
|
|
182
182
|
if PgDBI.pgget(table, "", cond, PgLOG.LOGWRN): return 0
|
|
183
183
|
|
|
184
|
-
wurec = get_wuser_record(logrec['ip'], logrec['date'])
|
|
184
|
+
wurec = PgIPInfo.get_wuser_record(logrec['ip'], logrec['date'])
|
|
185
185
|
if not wurec: return 0
|
|
186
186
|
record = {'wid' : pgrec['wid'], 'dsid' : pgrec['dsid']}
|
|
187
187
|
record['wuid_read'] = wurec['wuid']
|
|
@@ -200,7 +200,8 @@ def add_file_usage(year, logrec):
|
|
|
200
200
|
|
|
201
201
|
def add_to_allusage(year, logrec, wurec):
|
|
202
202
|
|
|
203
|
-
pgrec = {'email' : wurec['email'], 'org_type' : wurec['org_type'],
|
|
203
|
+
pgrec = {'email' : wurec['email'], 'org_type' : wurec['org_type'],
|
|
204
|
+
'country' : wurec['country'], 'region' : wurec['region']}
|
|
204
205
|
pgrec['dsid'] = logrec['dsid']
|
|
205
206
|
pgrec['date'] = logrec['date']
|
|
206
207
|
pgrec['quarter'] = logrec['quarter']
|
|
@@ -230,35 +231,6 @@ def get_wfile_wid(dsid, wfile):
|
|
|
230
231
|
|
|
231
232
|
return pgrec
|
|
232
233
|
|
|
233
|
-
# return wuser record upon success, None otherwise
|
|
234
|
-
def get_wuser_record(ip, date):
|
|
235
|
-
|
|
236
|
-
ipinfo = PgIPInfo.set_ipinfo(ip)
|
|
237
|
-
if not ipinfo: return None
|
|
238
|
-
|
|
239
|
-
record = {'org_type' : ipinfo['org_type'], 'country' : ipinfo['country']}
|
|
240
|
-
email = 'unknown@' + ipinfo['hostname']
|
|
241
|
-
emcond = "email = '{}'".format(email)
|
|
242
|
-
flds = 'wuid, email, org_type, country, start_date'
|
|
243
|
-
pgrec = PgDBI.pgget("wuser", flds, emcond, PgLOG.LOGERR)
|
|
244
|
-
if pgrec:
|
|
245
|
-
if PgUtil.diffdate(pgrec['start_date'], date) > 0:
|
|
246
|
-
pgrec['start_date'] = record['start_date'] = date
|
|
247
|
-
PgDBI.pgupdt('wuser', record, emcond)
|
|
248
|
-
return pgrec
|
|
249
|
-
|
|
250
|
-
# now add one in
|
|
251
|
-
record['email'] = email
|
|
252
|
-
record['stat_flag'] = 'A'
|
|
253
|
-
record['start_date'] = date
|
|
254
|
-
wuid = PgDBI.pgadd("wuser", record, PgLOG.LOGERR|PgLOG.AUTOID)
|
|
255
|
-
if wuid:
|
|
256
|
-
record['wuid'] = wuid
|
|
257
|
-
PgLOG.pglog("{} Added as wuid({})".format(email, wuid), PgLOG.LGWNEM)
|
|
258
|
-
return record
|
|
259
|
-
|
|
260
|
-
return None
|
|
261
|
-
|
|
262
234
|
#
|
|
263
235
|
# call main() to start program
|
|
264
236
|
#
|