rda-python-metrics 1.0.17__tar.gz → 1.0.20__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of rda-python-metrics might be problematic. Click here for more details.

Files changed (59) hide show
  1. {rda_python_metrics-1.0.17/src/rda_python_metrics.egg-info → rda_python_metrics-1.0.20}/PKG-INFO +2 -1
  2. {rda_python_metrics-1.0.17 → rda_python_metrics-1.0.20}/pyproject.toml +3 -2
  3. {rda_python_metrics-1.0.17 → rda_python_metrics-1.0.20}/src/rda_python_metrics/PgIPInfo.py +117 -9
  4. {rda_python_metrics-1.0.17 → rda_python_metrics-1.0.20}/src/rda_python_metrics/fillawsusage.py +3 -31
  5. {rda_python_metrics-1.0.17 → rda_python_metrics-1.0.20}/src/rda_python_metrics/fillcdgusage.py +22 -40
  6. {rda_python_metrics-1.0.17 → rda_python_metrics-1.0.20}/src/rda_python_metrics/fillcodusage.py +10 -3
  7. {rda_python_metrics-1.0.17 → rda_python_metrics-1.0.20}/src/rda_python_metrics/fillglobususage.py +3 -31
  8. {rda_python_metrics-1.0.17 → rda_python_metrics-1.0.20}/src/rda_python_metrics/fillipinfo.py +50 -19
  9. {rda_python_metrics-1.0.17 → rda_python_metrics-1.0.20}/src/rda_python_metrics/fillipinfo.usg +1 -1
  10. {rda_python_metrics-1.0.17 → rda_python_metrics-1.0.20}/src/rda_python_metrics/filloneorder.py +1 -1
  11. {rda_python_metrics-1.0.17 → rda_python_metrics-1.0.20}/src/rda_python_metrics/fillosdfusage.py +3 -31
  12. {rda_python_metrics-1.0.17 → rda_python_metrics-1.0.20}/src/rda_python_metrics/filltdsusage.py +8 -7
  13. {rda_python_metrics-1.0.17 → rda_python_metrics-1.0.20}/src/rda_python_metrics/viewallusage.py +21 -18
  14. {rda_python_metrics-1.0.17 → rda_python_metrics-1.0.20}/src/rda_python_metrics/viewallusage.usg +25 -10
  15. {rda_python_metrics-1.0.17 → rda_python_metrics-1.0.20}/src/rda_python_metrics/viewcodusage.py +18 -15
  16. {rda_python_metrics-1.0.17 → rda_python_metrics-1.0.20}/src/rda_python_metrics/viewcodusage.usg +6 -3
  17. {rda_python_metrics-1.0.17 → rda_python_metrics-1.0.20}/src/rda_python_metrics/viewordusage.py +8 -7
  18. {rda_python_metrics-1.0.17 → rda_python_metrics-1.0.20}/src/rda_python_metrics/viewordusage.usg +8 -9
  19. {rda_python_metrics-1.0.17 → rda_python_metrics-1.0.20}/src/rda_python_metrics/viewtdsusage.py +17 -14
  20. {rda_python_metrics-1.0.17 → rda_python_metrics-1.0.20}/src/rda_python_metrics/viewtdsusage.usg +9 -6
  21. {rda_python_metrics-1.0.17 → rda_python_metrics-1.0.20}/src/rda_python_metrics/viewwebusage.py +5 -5
  22. {rda_python_metrics-1.0.17 → rda_python_metrics-1.0.20}/src/rda_python_metrics/viewwebusage.usg +3 -3
  23. {rda_python_metrics-1.0.17 → rda_python_metrics-1.0.20/src/rda_python_metrics.egg-info}/PKG-INFO +2 -1
  24. {rda_python_metrics-1.0.17 → rda_python_metrics-1.0.20}/src/rda_python_metrics.egg-info/requires.txt +1 -0
  25. {rda_python_metrics-1.0.17 → rda_python_metrics-1.0.20}/LICENSE +0 -0
  26. {rda_python_metrics-1.0.17 → rda_python_metrics-1.0.20}/MANIFEST.in +0 -0
  27. {rda_python_metrics-1.0.17 → rda_python_metrics-1.0.20}/README.md +0 -0
  28. {rda_python_metrics-1.0.17 → rda_python_metrics-1.0.20}/setup.cfg +0 -0
  29. {rda_python_metrics-1.0.17 → rda_python_metrics-1.0.20}/src/rda_python_metrics/PgView.py +0 -0
  30. {rda_python_metrics-1.0.17 → rda_python_metrics-1.0.20}/src/rda_python_metrics/__init__.py +0 -0
  31. {rda_python_metrics-1.0.17 → rda_python_metrics-1.0.20}/src/rda_python_metrics/fillawsusage.usg +0 -0
  32. {rda_python_metrics-1.0.17 → rda_python_metrics-1.0.20}/src/rda_python_metrics/fillcdgusage.usg +0 -0
  33. {rda_python_metrics-1.0.17 → rda_python_metrics-1.0.20}/src/rda_python_metrics/fillcodusage.usg +0 -0
  34. {rda_python_metrics-1.0.17 → rda_python_metrics-1.0.20}/src/rda_python_metrics/fillcountry.py +0 -0
  35. {rda_python_metrics-1.0.17 → rda_python_metrics-1.0.20}/src/rda_python_metrics/fillendtime.py +0 -0
  36. {rda_python_metrics-1.0.17 → rda_python_metrics-1.0.20}/src/rda_python_metrics/fillglobususage.usg +0 -0
  37. {rda_python_metrics-1.0.17 → rda_python_metrics-1.0.20}/src/rda_python_metrics/filloneorder.usg +0 -0
  38. {rda_python_metrics-1.0.17 → rda_python_metrics-1.0.20}/src/rda_python_metrics/fillosdfusage.usg +0 -0
  39. {rda_python_metrics-1.0.17 → rda_python_metrics-1.0.20}/src/rda_python_metrics/fillrdadb.py +0 -0
  40. {rda_python_metrics-1.0.17 → rda_python_metrics-1.0.20}/src/rda_python_metrics/fillrdadb.usg +0 -0
  41. {rda_python_metrics-1.0.17 → rda_python_metrics-1.0.20}/src/rda_python_metrics/filltdsusage.usg +0 -0
  42. {rda_python_metrics-1.0.17 → rda_python_metrics-1.0.20}/src/rda_python_metrics/filluser.py +0 -0
  43. {rda_python_metrics-1.0.17 → rda_python_metrics-1.0.20}/src/rda_python_metrics/filluser.usg +0 -0
  44. {rda_python_metrics-1.0.17 → rda_python_metrics-1.0.20}/src/rda_python_metrics/logarch.py +0 -0
  45. {rda_python_metrics-1.0.17 → rda_python_metrics-1.0.20}/src/rda_python_metrics/logarch.usg +0 -0
  46. {rda_python_metrics-1.0.17 → rda_python_metrics-1.0.20}/src/rda_python_metrics/pgperson.py +0 -0
  47. {rda_python_metrics-1.0.17 → rda_python_metrics-1.0.20}/src/rda_python_metrics/pgsyspath.py +0 -0
  48. {rda_python_metrics-1.0.17 → rda_python_metrics-1.0.20}/src/rda_python_metrics/pgusername.py +0 -0
  49. {rda_python_metrics-1.0.17 → rda_python_metrics-1.0.20}/src/rda_python_metrics/viewcheckusage.py +0 -0
  50. {rda_python_metrics-1.0.17 → rda_python_metrics-1.0.20}/src/rda_python_metrics/viewcheckusage.usg +0 -0
  51. {rda_python_metrics-1.0.17 → rda_python_metrics-1.0.20}/src/rda_python_metrics/viewrqstusage.py +0 -0
  52. {rda_python_metrics-1.0.17 → rda_python_metrics-1.0.20}/src/rda_python_metrics/viewrqstusage.usg +0 -0
  53. {rda_python_metrics-1.0.17 → rda_python_metrics-1.0.20}/src/rda_python_metrics/viewwebfile.py +0 -0
  54. {rda_python_metrics-1.0.17 → rda_python_metrics-1.0.20}/src/rda_python_metrics/viewwebfile.usg +0 -0
  55. {rda_python_metrics-1.0.17 → rda_python_metrics-1.0.20}/src/rda_python_metrics.egg-info/SOURCES.txt +0 -0
  56. {rda_python_metrics-1.0.17 → rda_python_metrics-1.0.20}/src/rda_python_metrics.egg-info/dependency_links.txt +0 -0
  57. {rda_python_metrics-1.0.17 → rda_python_metrics-1.0.20}/src/rda_python_metrics.egg-info/entry_points.txt +0 -0
  58. {rda_python_metrics-1.0.17 → rda_python_metrics-1.0.20}/src/rda_python_metrics.egg-info/top_level.txt +0 -0
  59. {rda_python_metrics-1.0.17 → rda_python_metrics-1.0.20}/tests/test_metrics.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: rda_python_metrics
3
- Version: 1.0.17
3
+ Version: 1.0.20
4
4
  Summary: RDA Python Package to gather and view data usage metrics
5
5
  Author-email: Zaihua Ji <zji@ucar.edu>
6
6
  Project-URL: Homepage, https://github.com/NCAR/rda-python-metrics
@@ -16,6 +16,7 @@ Requires-Dist: rda_python_setuid
16
16
  Requires-Dist: geoip2
17
17
  Requires-Dist: ipinfo
18
18
  Requires-Dist: httplib2
19
+ Requires-Dist: dnspython
19
20
  Dynamic: license-file
20
21
 
21
22
  RDA Python Package to gather and view data usage metrics.
@@ -6,7 +6,7 @@ build-backend = "setuptools.build_meta"
6
6
 
7
7
  [project]
8
8
  name = "rda_python_metrics"
9
- version = "1.0.17"
9
+ version = "1.0.20"
10
10
  authors = [
11
11
  { name="Zaihua Ji", email="zji@ucar.edu" },
12
12
  ]
@@ -24,7 +24,8 @@ dependencies = [
24
24
  "rda_python_setuid",
25
25
  "geoip2",
26
26
  "ipinfo",
27
- "httplib2"
27
+ "httplib2",
28
+ "dnspython"
28
29
  ]
29
30
 
30
31
  [tool.pytest.ini_options]
@@ -13,9 +13,12 @@
13
13
  #
14
14
  ###############################################################################
15
15
  #
16
+ import re
16
17
  import geoip2.database as geodb
17
18
  import ipinfo
18
19
  import socket
20
+ import dns.resolver
21
+ import json
19
22
  from rda_python_common import PgLOG
20
23
  from rda_python_common import PgDBI
21
24
  from rda_python_common import PgUtil
@@ -28,11 +31,42 @@ IPINFO = {
28
31
  'IPADD' : 0
29
32
  }
30
33
 
34
+ IPDNS = None
31
35
  IPDB = None
32
36
  G2DB = None
33
37
  IPRECS = {}
34
38
  COUNTRIES = {}
35
39
 
40
+ #
41
+ # get save a global dns.resolver.Resolver object
42
+ #
43
+ def get_dns_resolver(forceget = False):
44
+
45
+ global IPDNS
46
+
47
+ if forceget or not IPDNS: IPDNS = dns.resolver.Resolver()
48
+
49
+ return IPDNS
50
+
51
+ #
52
+ # Resolve a domain name to an IP address (A record)
53
+ #
54
+ def dns_to_ip(dmname, type = 'A'):
55
+
56
+ ipdns = get_dns_resolver()
57
+
58
+ try:
59
+ answers = ipdns.resolve(dmname, type)
60
+ return [str(rdata) for rdata in answers]
61
+ except dns.resolver.NXDOMAIN:
62
+ PgLOG.pglog(f"{dmname}: the domain name does not exist", PgLOG.LOGERR)
63
+ except dns.resolver.Timeout:
64
+ PgLOG.pglog(f"{dmname}: the domain name request timed out", PgLOG.LOGERR)
65
+ except dns.exception.DNSException as e:
66
+ PgLOG.pglog(f"{dmname}: error domain name request: {e}", PgLOG.LOGERR)
67
+
68
+ return None
69
+
36
70
  #
37
71
  # Get country token name for given two-character domain id
38
72
  #
@@ -59,6 +93,17 @@ def set_ipinfo_database():
59
93
  except Exception as e:
60
94
  PgLOG.pglog('ipinfo: ' + str(e), PgLOG.LGEREX)
61
95
 
96
+ #
97
+ # get a ipinfo record for given domain
98
+ #
99
+ def domain_ipinfo_record(dmname):
100
+
101
+ ips = dns_to_ip(dmname)
102
+
103
+ if ips: return set_ipinfo(ips[0])
104
+
105
+ return None
106
+
62
107
  #
63
108
  # get a ipinfo record for given ip address
64
109
  #
@@ -67,14 +112,11 @@ def get_ipinfo_record(ip):
67
112
  if not IPDB: set_ipinfo_database()
68
113
  try:
69
114
  iprec = IPDB.getDetails(ip).all
70
- if 'hostname' not in iprec:
71
- PgLOG.pglog("ipinfo: {} - ip address is not in the database".format(ip), PgLOG.LOGERR)
72
- return None
73
115
  except Exception as e:
74
116
  PgLOG.pglog("ipinfo: {} - {}".format(ip, str(e)), PgLOG.LOGWRN)
75
117
  return None
76
118
 
77
- record = {'ip' : ip, 'stat_flag' : 'A', 'hostname' : ip}
119
+ record = {'ip' : ip, 'stat_flag' : 'A', 'hostname' : ip, 'org_type' : '-'}
78
120
  if 'hostname' in iprec:
79
121
  record['hostname'] = iprec['hostname']
80
122
  record['org_type'] = PgDBI.get_org_type(None, record['hostname'])
@@ -82,9 +124,11 @@ def get_ipinfo_record(ip):
82
124
  record['lon'] = float(iprec['longitude']) if iprec['longitude'] else 0
83
125
  if 'org' in iprec: record['org_name'] = iprec['org']
84
126
  record['country'] = get_country_record_code(iprec, 'country_name')
127
+ record['region'] = PgLOG.convert_chars(iprec['region']) if 'region' in iprec else None
85
128
  if 'city' in iprec: record['city'] = PgLOG.convert_chars(iprec['city'])
86
129
  if 'postal' in iprec: record['postal'] = iprec['postal']
87
130
  record['timezone'] = iprec['timezone']
131
+ record['ipinfo'] = json.dumps(iprec)
88
132
 
89
133
  return record
90
134
 
@@ -108,15 +152,16 @@ def get_geoip2_record(ip):
108
152
  PgLOG.pglog("geoip2: {} - {}".format(ip, str(e)), PgLOG.LOGWRN)
109
153
  return None
110
154
 
111
- record = {'ip' : ip, 'stat_flag' : 'M'}
155
+ record = {'ip' : ip, 'stat_flag' : 'M', 'org_type' : '-'}
112
156
  record['lat'] = float(city.location.latitude) if city.location.latitude else 0
113
157
  record['lon'] = float(city.location.longitude) if city.location.longitude else 0
114
158
  record['country'] = get_country_name_code(city.country.name)
115
159
  record['city'] = PgLOG.convert_chars(city.city.name)
160
+ record['region'] = PgLOG.convert_chars(city.subdivisions.most_specific.name) if city.subdivisions.most_specific.name else None
116
161
  record['postal'] = city.postal.code
117
162
  record['timezone'] = city.location.time_zone
118
163
  record['hostname'] = ip
119
- record['org_type'] = '-'
164
+ record['ipinfo'] = json.dumps(object_to_dict(city))
120
165
 
121
166
  try:
122
167
  hostrec = socket.gethostbyaddr(ip)
@@ -128,6 +173,20 @@ def get_geoip2_record(ip):
128
173
 
129
174
  return record
130
175
 
176
+ #
177
+ # change an object to dict recursively
178
+ #
179
+ def object_to_dict(obj):
180
+ if hasattr(obj, "__dict__"):
181
+ result = {}
182
+ for key, value in obj.__dict__.items():
183
+ result[key] = object_to_dict(value)
184
+ return result
185
+ elif isinstance(obj, list):
186
+ return [object_to_dict(item) for item in obj]
187
+ else:
188
+ return obj
189
+
131
190
  #
132
191
  # update wuser.email for hostname changed
133
192
  #
@@ -160,7 +219,7 @@ def update_ipinfo_record(record, pgrec = None):
160
219
  # set ip info into table ipinfo from python module ipinfo
161
220
  # if ipopt is True; otherwise, use module geoip2
162
221
  #
163
- def set_ipinfo(ip, ipopt = False):
222
+ def set_ipinfo(ip, ipopt = True):
164
223
 
165
224
  if ip in IPRECS:
166
225
  pgrec = IPRECS[ip]
@@ -169,8 +228,8 @@ def set_ipinfo(ip, ipopt = False):
169
228
  pgrec = PgDBI.pgget('ipinfo', '*', "ip = '{}'".format(ip))
170
229
 
171
230
  if not pgrec or ipopt and pgrec['stat_flag'] == 'M':
172
- record = None if ipopt else get_geoip2_record(ip)
173
- if not (record and 'hostname' in record): record = get_ipinfo_record(ip)
231
+ record = get_ipinfo_record(ip) if ipopt else None
232
+ if not record: record = get_geoip2_record(ip)
174
233
  if record and update_ipinfo_record(record, pgrec): pgrec = record
175
234
 
176
235
  IPRECS[ip] = pgrec
@@ -186,3 +245,52 @@ def get_update_record(nrec, orec):
186
245
  if nrec[fld] != orec[fld]:
187
246
  record[fld] = nrec[fld]
188
247
  return record
248
+
249
+ #
250
+ # fill the missing info for given ip
251
+ #
252
+ def get_missing_ipinfo(ip, email = None):
253
+
254
+ if not ip:
255
+ if email and '@' in email: ip = dns_to_ip(email.split('@')[1])
256
+ if not ip: return None
257
+
258
+ ipinfo = set_ipinfo(ip)
259
+ if ipinfo:
260
+ record = {'org_type' : ipinfo['org_type'],
261
+ 'country' : ipinfo['country'],
262
+ 'region' : ipinfo['region']}
263
+ if not email or re.search(r'-$', email):
264
+ record['email'] = 'unknown@' + ipinfo['hostname']
265
+ else:
266
+ record['email'] = email
267
+ return record
268
+ else:
269
+ return None
270
+
271
+
272
+ # return wuser record upon success, None otherwise
273
+ def get_wuser_record(ip, date, email = None):
274
+
275
+ record = get_missing_ipinfo(ip, email)
276
+ if not record: return None
277
+
278
+ emcond = "email = '{}'".format(record['email'])
279
+ flds = 'wuid, email, org_type, country, region, start_date'
280
+ pgrec = PgDBI.pgget("wuser", flds, emcond, PgLOG.LOGERR)
281
+ if pgrec:
282
+ if PgUtil.diffdate(pgrec['start_date'], date) > 0:
283
+ pgrec['start_date'] = record['start_date'] = date
284
+ PgDBI.pgupdt('wuser', record, emcond)
285
+ return pgrec
286
+
287
+ # now add one in
288
+ record['stat_flag'] = 'A'
289
+ record['start_date'] = date
290
+ wuid = PgDBI.pgadd("wuser", record, PgLOG.LOGERR|PgLOG.AUTOID)
291
+ if wuid:
292
+ record['wuid'] = wuid
293
+ PgLOG.pglog("{} Added as wuid({})".format(email, wuid), PgLOG.LGWNEM)
294
+ return record
295
+
296
+ return None
@@ -201,7 +201,7 @@ def add_file_usage(year, logrec):
201
201
  cond = "wid = {} AND method = '{}' AND date_read = '{}' AND time_read = '{}'".format(pgrec['wid'], logrec['method'], logrec['date'], logrec['time'])
202
202
  if PgDBI.pgget(table, "", cond, PgLOG.LOGWRN): return 0
203
203
 
204
- wurec = get_wuser_record(logrec['ip'], logrec['date'])
204
+ wurec = PgIPInfo.get_wuser_record(logrec['ip'], logrec['date'])
205
205
  if not wurec: return 0
206
206
  record = {'wid' : pgrec['wid'], 'dsid' : pgrec['dsid']}
207
207
  record['wuid_read'] = wurec['wuid']
@@ -220,7 +220,8 @@ def add_file_usage(year, logrec):
220
220
 
221
221
  def add_to_allusage(year, logrec, wurec):
222
222
 
223
- pgrec = {'email' : wurec['email'], 'org_type' : wurec['org_type'], 'country' : wurec['country']}
223
+ pgrec = {'email' : wurec['email'], 'org_type' : wurec['org_type'],
224
+ 'country' : wurec['country'], 'region' : wurec['region']}
224
225
  pgrec['dsid'] = logrec['dsid']
225
226
  pgrec['date'] = logrec['date']
226
227
  pgrec['quarter'] = logrec['quarter']
@@ -247,35 +248,6 @@ def get_wfile_wid(dsid, wfile):
247
248
 
248
249
  return pgrec
249
250
 
250
- # return wuser record upon success, None otherwise
251
- def get_wuser_record(ip, date):
252
-
253
- ipinfo = PgIPInfo.set_ipinfo(ip)
254
- if not ipinfo: return None
255
-
256
- record = {'org_type' : ipinfo['org_type'], 'country' : ipinfo['country']}
257
- email = 'unknown@' + ipinfo['hostname']
258
- emcond = "email = '{}'".format(email)
259
- flds = 'wuid, email, org_type, country, start_date'
260
- pgrec = PgDBI.pgget("wuser", flds, emcond, PgLOG.LOGERR)
261
- if pgrec:
262
- if PgUtil.diffdate(pgrec['start_date'], date) > 0:
263
- pgrec['start_date'] = record['start_date'] = date
264
- PgDBI.pgupdt('wuser', record, emcond)
265
- return pgrec
266
-
267
- # now add one in
268
- record['email'] = email
269
- record['stat_flag'] = 'A'
270
- record['start_date'] = date
271
- wuid = PgDBI.pgadd("wuser", record, PgLOG.LOGERR|PgLOG.AUTOID)
272
- if wuid:
273
- record['wuid'] = wuid
274
- PgLOG.pglog("{} Added as wuid({})".format(email, wuid), PgLOG.LGWNEM)
275
- return record
276
-
277
- return None
278
-
279
251
  #
280
252
  # call main() to start program
281
253
  #
@@ -52,13 +52,26 @@ DSIDS = {
52
52
  'ucar.cgd.ccsm4.SD-WACCM-X_v2.1' : ['d651034'],
53
53
  'ucar.cgd.ccsm4.amv_lens' : ['d651035'],
54
54
  'ucar.cgd.cesm2.cism_ismip6' : ['d651036'],
55
- 'ucar.cgd.ccsm4.pliomip2' : ['d651037']
55
+ 'ucar.cgd.ccsm4.pliomip2' : ['d651037'],
56
+ # new added
57
+ 'ucar.cgd.cesm2-waccm.s2s_hindcasts': ['d651040'],
58
+ 'ucar.cgd.CESM1.3_SH_storm_tracks': ['d651044'],
59
+ 'ucar.cgd.cesm2.waccm6.ssp245': ['d651045'],
60
+ 'ucar.cgd.cesm2.CESM21-CISM2-JG-BG': ['d651046'],
61
+ 'ucar.cgd.ccsm4.TC-CESM': ['d651047'],
62
+ 'ucar.cgd.cesm2.ISSI_OSSE': ['d651048'],
63
+ 'ucar.cgd.ccsm4.SOcean_Eddies_mclong': ['d651049'],
64
+ 'trace': ['d651050'],
65
+ 'ucar.cgd.cesm2.waccm.solar': ['d651051'],
66
+ 'ucar.cgd.ccsm4.CESM1-CCSM4_mid-Pliocene' : ['d651042'],
67
+ 'ucar.cgd.ccsm4.PaleoIF' : ['d651052'],
68
+ 'ucar.cgd.ccsm4.b.e11.B20LE_fixedO3' : ['d651053'],
69
+ 'ucar.cgd.cesm2.single.forcing.large.ensemble' : ['d651055']
56
70
  }
57
71
 
58
72
  ALLIDS = list(DSIDS.keys())
59
73
 
60
74
  WFILES = {}
61
- WUSERS = {}
62
75
 
63
76
  #
64
77
  # main function to run this program
@@ -262,12 +275,12 @@ def fill_cdg_usages(dsids, dranges):
262
275
  trecs[tkey]['size'] += dsize
263
276
  trecs[tkey]['fcount'] += 1
264
277
  else:
265
- wurec = get_wuser_record(ip, cdate)
266
- if not wurec: continue
278
+ iprec = PgIPInfo.get_missing_ipinfo(ip)
279
+ if not iprec: continue
267
280
  trecs[tkey] = {'ip' : ip, 'dsid' : dsid, 'date' : cdate, 'time' : time, 'quarter' : quarter,
268
281
  'size' : dsize, 'fcount' : 1, 'method' : method, 'etype' : etype,
269
- 'engine' : engine, 'org_type' : wurec['org_type'], 'country' : wurec['country'],
270
- 'email' : wurec['email']}
282
+ 'engine' : engine, 'org_type' : iprec['org_type'], 'country' : iprec['country'],
283
+ 'region' : iprec['region'], 'email' : iprec['email']}
271
284
  else:
272
285
  # web usage
273
286
  fsize = pgrec['dataset_file_size']
@@ -333,6 +346,7 @@ def add_tds_allusage(year, logrec):
333
346
  pgrec['email'] = logrec['email']
334
347
  pgrec['org_type'] = logrec['org_type']
335
348
  pgrec['country'] = logrec['country']
349
+ pgrec['region'] = logrec['region']
336
350
  pgrec['dsid'] = logrec['dsid']
337
351
  pgrec['date'] = logrec['date']
338
352
  pgrec['quarter'] = logrec['quarter']
@@ -353,7 +367,7 @@ def add_webfile_usage(year, logrec):
353
367
  cond = "wid = {} AND method = '{}' AND date_read = '{}' AND time_read = '{}'".format(logrec['wid'], logrec['method'], cdate, logrec['time'])
354
368
  if PgDBI.pgget(table, "", cond, PgLOG.LOGWRN): return 0
355
369
 
356
- wurec = get_wuser_record(ip, cdate)
370
+ wurec = PgIPInfo.get_wuser_record(ip, cdate)
357
371
  if not wurec: return 0
358
372
 
359
373
  record = {'wid' : logrec['wid'], 'dsid' : logrec['dsid']}
@@ -377,6 +391,7 @@ def add_web_allusage(year, logrec, wurec):
377
391
  pgrec['email'] = wurec['email']
378
392
  pgrec['org_type'] = wurec['org_type']
379
393
  pgrec['country'] = wurec['country']
394
+ pgrec['region'] = wurec['region']
380
395
  pgrec['dsid'] = logrec['dsid']
381
396
  pgrec['date'] = logrec['date']
382
397
  pgrec['quarter'] = logrec['quarter']
@@ -417,39 +432,6 @@ def get_wfile_record(dsids, wfile):
417
432
  WFILES[wkey] = pgrec
418
433
  return pgrec
419
434
 
420
- # return wuser record upon success, None otherwise
421
- def get_wuser_record(ip, date = None):
422
-
423
- if ip in WUSERS: return WUSERS[ip]
424
-
425
- ipinfo = PgIPInfo.set_ipinfo(ip)
426
- if not ipinfo: return None
427
-
428
- record = {'org_type' : ipinfo['org_type'], 'country' : ipinfo['country']}
429
- email = 'unknown@' + ipinfo['hostname']
430
- emcond = "email = '{}'".format(email)
431
- flds = 'wuid, email, org_type, country, start_date'
432
- pgrec = PgDBI.pgget("wuser", flds, emcond, PgLOG.LOGERR)
433
- if pgrec:
434
- if date and PgUtil.diffdate(pgrec['start_date'], date) > 0:
435
- pgrec['start_date'] = record['start_date'] = date
436
- PgDBI.pgupdt('wuser', record, emcond)
437
- WUSERS[ip] = pgrec
438
- return pgrec
439
-
440
- # now add one in
441
- record['email'] = email
442
- record['stat_flag'] = 'A'
443
- record['start_date'] = date
444
- wuid = PgDBI.pgadd("wuser", record, PgLOG.LOGERR|PgLOG.AUTOID)
445
- if wuid:
446
- record['wuid'] = wuid
447
- PgLOG.pglog("{} Added as wuid({})".format(email, wuid), PgLOG.LGWNEM)
448
- WUSERS[ip] = record
449
- return record
450
-
451
- return None
452
-
453
435
  #
454
436
  # call main() to start program
455
437
  #
@@ -22,6 +22,7 @@ from rda_python_common import PgLOG
22
22
  from rda_python_common import PgUtil
23
23
  from rda_python_common import PgFile
24
24
  from rda_python_common import PgDBI
25
+ from . import PgIPInfo
25
26
 
26
27
  # the define options for gathering COD data usage, one at a time
27
28
  MONTH = 0x02 # fet COD data usages for given months
@@ -182,14 +183,20 @@ def add_usage_records(records, date):
182
183
  if PgDBI.pgget(USAGE['PGTBL'], '', "aid = '{}' AND date = '{}'".format(aid, date), PgLOG.LGEREX): continue
183
184
  record = records[aid]
184
185
  if record['email'] == '-':
185
- record['org_type'] = record['country'] = '-'
186
+ wurec = PgIPInfo.get_wuser_record(record['ip'], date)
187
+ if not wurec: continue
188
+ record['org_type'] = wurec['org_type']
189
+ record['country'] = wurec['country']
190
+ record['region'] = wurec['region']
191
+ record['email'] = 'unknown@' + wurec['hostname']
186
192
  else:
187
193
  wuid = PgDBI.check_wuser_wuid(record['email'], date)
188
- if not wuid: next
189
- pgrec = PgDBI.pgget("wuser", "org_type, country", "wuid = {}".format(wuid), PgLOG.LGWNEX)
194
+ if not wuid: continue
195
+ pgrec = PgDBI.pgget("wuser", "org_type, country, region", "wuid = {}".format(wuid), PgLOG.LGWNEX)
190
196
  if not pgrec: continue
191
197
  record['org_type'] = pgrec['org_type']
192
198
  record['country'] = pgrec['country']
199
+ record['region'] = pgrec['region']
193
200
 
194
201
  record['date'] = date
195
202
  record['time'] = USERS[aid]['btime']
@@ -203,7 +203,7 @@ def add_file_usage(year, logrec):
203
203
  cond = "wid = {} AND method = '{}' AND date_read = '{}' AND time_read = '{}'".format(pgrec['wid'], logrec['method'], logrec['date'], logrec['time'])
204
204
  if PgDBI.pgget(table, "", cond, PgLOG.LOGWRN): return 0
205
205
 
206
- wurec = get_wuser_record(logrec['ip'], logrec['date'])
206
+ wurec = PgIPInfo.get_wuser_record(logrec['ip'], logrec['date'])
207
207
  if not wurec: return 0
208
208
  record = {'wid' : pgrec['wid'], 'dsid' : pgrec['dsid']}
209
209
  record['wuid_read'] = wurec['wuid']
@@ -222,7 +222,8 @@ def add_file_usage(year, logrec):
222
222
 
223
223
  def add_to_allusage(year, logrec, wurec):
224
224
 
225
- pgrec = {'email' : wurec['email'], 'org_type' : wurec['org_type'], 'country' : wurec['country']}
225
+ pgrec = {'email' : wurec['email'], 'org_type' : wurec['org_type'],
226
+ 'country' : wurec['country'], 'region' : wurec['region']}
226
227
  pgrec['dsid'] = logrec['dsid']
227
228
  pgrec['date'] = logrec['date']
228
229
  pgrec['quarter'] = logrec['quarter']
@@ -252,35 +253,6 @@ def get_wfile_wid(dsid, wfile):
252
253
 
253
254
  return pgrec
254
255
 
255
- # return wuser record upon success, None otherwise
256
- def get_wuser_record(ip, date):
257
-
258
- ipinfo = PgIPInfo.set_ipinfo(ip)
259
- if not ipinfo: return None
260
-
261
- record = {'org_type' : ipinfo['org_type'], 'country' : ipinfo['country']}
262
- email = 'unknown@' + ipinfo['hostname']
263
- emcond = "email = '{}'".format(email)
264
- flds = 'wuid, email, org_type, country, start_date'
265
- pgrec = PgDBI.pgget("wuser", flds, emcond, PgLOG.LOGERR)
266
- if pgrec:
267
- if PgUtil.diffdate(pgrec['start_date'], date) > 0:
268
- pgrec['start_date'] = record['start_date'] = date
269
- PgDBI.pgupdt('wuser', record, emcond)
270
- return pgrec
271
-
272
- # now add one in
273
- record['email'] = email
274
- record['stat_flag'] = 'A'
275
- record['start_date'] = date
276
- wuid = PgDBI.pgadd("wuser", record, PgLOG.LOGERR|PgLOG.AUTOID)
277
- if wuid:
278
- record['wuid'] = wuid
279
- PgLOG.pglog("{} Added as wuid({})".format(email, wuid), PgLOG.LGWNEM)
280
- return record
281
-
282
- return None
283
-
284
256
  #
285
257
  # call main() to start program
286
258
  #
@@ -32,7 +32,7 @@ MULTI = (MONTH|YEARS)
32
32
  SINGL = (NDAYS)
33
33
 
34
34
  IPINFO = {
35
- 'USGTBL' : ['ipinfo', 'allusage', 'tdsusage'],
35
+ 'USGTBL' : ['ipinfo', 'wuser', 'allusage', 'codusage', 'tdsusage'],
36
36
  'CDATE' : PgUtil.curdate(),
37
37
  }
38
38
 
@@ -120,20 +120,14 @@ def fix_allusage_records(date):
120
120
  ms = re.match(r'^(\d+)-', date)
121
121
  year = ms.group(1)
122
122
  table = 'allusage_' + year
123
- cond = "date = '{}' and org_type = '-'".format(date)
123
+ cond = "date = '{}' AND region IS NULL".format(date)
124
124
  pgrecs = PgDBI.pgmget(table, 'aidx, email, ip', cond, PgLOG.LGEREX)
125
125
  if not pgrecs: return 0
126
126
  cnt = len(pgrecs['ip']) if pgrecs else 0
127
127
  mcnt = 0
128
128
  for i in range(cnt):
129
- ip = pgrecs['ip'][i]
130
- email = pgrecs['email'][i]
131
- ipinfo = PgIPInfo.set_ipinfo(ip)
132
- if ipinfo:
133
- record = {'org_type' : ipinfo['org_type'],
134
- 'country' : ipinfo['country']}
135
- if not email or re.search(r'-$', email):
136
- record['email'] = 'unknown@' + ipinfo['hostname']
129
+ record = PgIPInfo.get_missing_ipinfo(pgrecs['ip'][i], pgrecs['email'][i])
130
+ if record:
137
131
  mcnt += PgDBI.pgupdt(table, record, "aidx = '{}'".format(pgrecs['aidx'][i]))
138
132
 
139
133
  s = 's' if cnt > 1 else ''
@@ -144,17 +138,16 @@ def fix_allusage_records(date):
144
138
  def fix_tdsusage_records(date):
145
139
 
146
140
  table = 'tdsusage'
147
- cond = "date = '{}' and org_type = '-'".format(date)
148
- pgrecs = PgDBI.pgmget(table, 'time, ip', cond, PgLOG.LGEREX)
141
+ cond = "date = '{}' AND region IS NULL".format(date)
142
+ pgrecs = PgDBI.pgmget(table, 'time, email, ip', cond, PgLOG.LGEREX)
149
143
  if not pgrecs: return 0
150
144
  cnt = len(pgrecs['ip']) if pgrecs else 0
151
145
  mcnt = 0
152
146
  for i in range(cnt):
153
- ipinfo = PgIPInfo.set_ipinfo(pgrecs['ip'][i])
154
- if ipinfo:
155
- record = {'org_type' : ipinfo['org_type'],
156
- 'country' : ipinfo['country']}
157
- cond = "date = '{}' AND time = '{}' AND ip = '{}'".format(date, pgrecs['time'][i], pgrecs['ip'][i])
147
+ ip = pgrecs['ip'][i]
148
+ record = PgIPInfo.get_missing_ipinfo(ip, pgrecs['email'][i])
149
+ if record:
150
+ cond = "date = '{}' AND time = '{}' AND ip = '{}'".format(date, pgrecs['time'][i], ip)
158
151
  mcnt += PgDBI.pgupdt(table, record, cond)
159
152
 
160
153
  s = 's' if cnt > 1 else ''
@@ -162,16 +155,54 @@ def fix_tdsusage_records(date):
162
155
 
163
156
  return mcnt
164
157
 
158
+ def fix_codusage_records(date):
159
+
160
+ table = 'codusage'
161
+ cond = "date = '{}' AND region IS NULL".format(date)
162
+ pgrecs = PgDBI.pgmget(table, 'codidx, email, ip', cond, PgLOG.LGEREX)
163
+ if not pgrecs: return 0
164
+ cnt = len(pgrecs['ip']) if pgrecs else 0
165
+ mcnt = 0
166
+ for i in range(cnt):
167
+ record = PgIPInfo.get_missing_ipinfo(pgrecs['ip'][i], pgrecs['email'][i])
168
+ if record:
169
+ mcnt += PgDBI.pgupdt(table, record, "codidx = '{}'".format(pgrecs['codidx'][i]))
170
+
171
+ s = 's' if cnt > 1 else ''
172
+ PgLOG.pglog("{}: {} of {} record{} updated for {}".format(table, mcnt, cnt, s, date), PgLOG.LOGWRN)
173
+
174
+ return mcnt
175
+
176
+ def fix_wuser_records(date):
177
+
178
+ table = 'wuser'
179
+ cond = "start_date = '{}' AND region IS NULL".format(date)
180
+ pgrecs = PgDBI.pgmget(table, 'wuid, email, ip', cond, PgLOG.LGEREX)
181
+ if not pgrecs: return 0
182
+ cnt = len(pgrecs['ip']) if pgrecs else 0
183
+ mcnt = 0
184
+ for i in range(cnt):
185
+ ip = pgrecs['ip'][i]
186
+ email = pgrecs['email'][i]
187
+ record = PgIPInfo.get_missing_ipinfo(ip, email)
188
+ if record:
189
+ mcnt += PgDBI.pgupdt(table, record, "wuid = '{}'".format(pgrecs['wuid'][i]))
190
+
191
+ s = 's' if cnt > 1 else ''
192
+ PgLOG.pglog("{}: {} of {} record{} updated for {}".format(table, mcnt, cnt, s, date), PgLOG.LOGWRN)
193
+
194
+ return mcnt
195
+
165
196
  def fix_ipinfo_records(date):
166
197
 
167
198
  table = 'ipinfo'
168
- cond = "stat_flag = 'M' and date = '{}'".format(date)
199
+ cond = "date = '{}' AND region IS NULL".format(date)
169
200
  pgrecs = PgDBI.pgmget(table, 'ip', cond, PgLOG.LGEREX)
170
201
  if not pgrecs: return 0
171
202
  cnt = len(pgrecs['ip']) if pgrecs else 0
172
203
  mcnt = 0
173
204
  for i in range(cnt):
174
- PgIPInfo.set_ipinfo(pgrecs['ip'][i], True)
205
+ PgIPInfo.set_ipinfo(pgrecs['ip'][i])
175
206
 
176
207
  mcnt = PgIPInfo.IPINFO['IPUPDT']
177
208
  s = 's' if cnt > 1 else ''
@@ -7,7 +7,7 @@
7
7
 
8
8
  - Option -b, log process information into logfile only;
9
9
 
10
- - Option -t, table name, ipinfo, allusage, globususage, or tdsusage,
10
+ - Option -t, table name, ipinfo, wuser, allusage, codusage or tdsusage,
11
11
  to fix IP related information, such organization names/types,
12
12
  emails and country names;
13
13
 
@@ -87,7 +87,7 @@ def add_one_order(params):
87
87
 
88
88
  def add_to_allusage(record, year, ctime):
89
89
 
90
- pgrec = PgDBI.pgget("wuser", "email, org_type, country",
90
+ pgrec = PgDBI.pgget("wuser", "email, org_type, country, region",
91
91
  "wuid = {}".format(record['wuid_request']), PgLOG.LGWNEX)
92
92
  if pgrec:
93
93
  pgrec['dsid'] = record['dsid']