rda-python-metrics 1.0.17__tar.gz → 1.0.18__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of rda-python-metrics might be problematic. Click here for more details.

Files changed (59) hide show
  1. {rda_python_metrics-1.0.17/src/rda_python_metrics.egg-info → rda_python_metrics-1.0.18}/PKG-INFO +2 -1
  2. {rda_python_metrics-1.0.17 → rda_python_metrics-1.0.18}/pyproject.toml +3 -2
  3. {rda_python_metrics-1.0.17 → rda_python_metrics-1.0.18}/src/rda_python_metrics/PgIPInfo.py +84 -3
  4. {rda_python_metrics-1.0.17 → rda_python_metrics-1.0.18}/src/rda_python_metrics/fillawsusage.py +3 -31
  5. {rda_python_metrics-1.0.17 → rda_python_metrics-1.0.18}/src/rda_python_metrics/fillcdgusage.py +15 -1
  6. {rda_python_metrics-1.0.17 → rda_python_metrics-1.0.18}/src/rda_python_metrics/fillcodusage.py +10 -3
  7. {rda_python_metrics-1.0.17 → rda_python_metrics-1.0.18}/src/rda_python_metrics/fillglobususage.py +3 -31
  8. {rda_python_metrics-1.0.17 → rda_python_metrics-1.0.18}/src/rda_python_metrics/fillipinfo.py +69 -19
  9. {rda_python_metrics-1.0.17 → rda_python_metrics-1.0.18}/src/rda_python_metrics/fillipinfo.usg +1 -1
  10. {rda_python_metrics-1.0.17 → rda_python_metrics-1.0.18}/src/rda_python_metrics/filloneorder.py +1 -1
  11. {rda_python_metrics-1.0.17 → rda_python_metrics-1.0.18}/src/rda_python_metrics/fillosdfusage.py +3 -31
  12. {rda_python_metrics-1.0.17 → rda_python_metrics-1.0.18}/src/rda_python_metrics/filltdsusage.py +8 -7
  13. {rda_python_metrics-1.0.17 → rda_python_metrics-1.0.18}/src/rda_python_metrics/viewallusage.py +21 -18
  14. {rda_python_metrics-1.0.17 → rda_python_metrics-1.0.18}/src/rda_python_metrics/viewallusage.usg +25 -10
  15. {rda_python_metrics-1.0.17 → rda_python_metrics-1.0.18}/src/rda_python_metrics/viewcodusage.py +18 -15
  16. {rda_python_metrics-1.0.17 → rda_python_metrics-1.0.18}/src/rda_python_metrics/viewcodusage.usg +6 -3
  17. {rda_python_metrics-1.0.17 → rda_python_metrics-1.0.18}/src/rda_python_metrics/viewordusage.py +8 -7
  18. {rda_python_metrics-1.0.17 → rda_python_metrics-1.0.18}/src/rda_python_metrics/viewordusage.usg +8 -9
  19. {rda_python_metrics-1.0.17 → rda_python_metrics-1.0.18}/src/rda_python_metrics/viewtdsusage.py +17 -14
  20. {rda_python_metrics-1.0.17 → rda_python_metrics-1.0.18}/src/rda_python_metrics/viewtdsusage.usg +9 -6
  21. {rda_python_metrics-1.0.17 → rda_python_metrics-1.0.18}/src/rda_python_metrics/viewwebusage.py +5 -5
  22. {rda_python_metrics-1.0.17 → rda_python_metrics-1.0.18}/src/rda_python_metrics/viewwebusage.usg +3 -3
  23. {rda_python_metrics-1.0.17 → rda_python_metrics-1.0.18/src/rda_python_metrics.egg-info}/PKG-INFO +2 -1
  24. {rda_python_metrics-1.0.17 → rda_python_metrics-1.0.18}/src/rda_python_metrics.egg-info/requires.txt +1 -0
  25. {rda_python_metrics-1.0.17 → rda_python_metrics-1.0.18}/LICENSE +0 -0
  26. {rda_python_metrics-1.0.17 → rda_python_metrics-1.0.18}/MANIFEST.in +0 -0
  27. {rda_python_metrics-1.0.17 → rda_python_metrics-1.0.18}/README.md +0 -0
  28. {rda_python_metrics-1.0.17 → rda_python_metrics-1.0.18}/setup.cfg +0 -0
  29. {rda_python_metrics-1.0.17 → rda_python_metrics-1.0.18}/src/rda_python_metrics/PgView.py +0 -0
  30. {rda_python_metrics-1.0.17 → rda_python_metrics-1.0.18}/src/rda_python_metrics/__init__.py +0 -0
  31. {rda_python_metrics-1.0.17 → rda_python_metrics-1.0.18}/src/rda_python_metrics/fillawsusage.usg +0 -0
  32. {rda_python_metrics-1.0.17 → rda_python_metrics-1.0.18}/src/rda_python_metrics/fillcdgusage.usg +0 -0
  33. {rda_python_metrics-1.0.17 → rda_python_metrics-1.0.18}/src/rda_python_metrics/fillcodusage.usg +0 -0
  34. {rda_python_metrics-1.0.17 → rda_python_metrics-1.0.18}/src/rda_python_metrics/fillcountry.py +0 -0
  35. {rda_python_metrics-1.0.17 → rda_python_metrics-1.0.18}/src/rda_python_metrics/fillendtime.py +0 -0
  36. {rda_python_metrics-1.0.17 → rda_python_metrics-1.0.18}/src/rda_python_metrics/fillglobususage.usg +0 -0
  37. {rda_python_metrics-1.0.17 → rda_python_metrics-1.0.18}/src/rda_python_metrics/filloneorder.usg +0 -0
  38. {rda_python_metrics-1.0.17 → rda_python_metrics-1.0.18}/src/rda_python_metrics/fillosdfusage.usg +0 -0
  39. {rda_python_metrics-1.0.17 → rda_python_metrics-1.0.18}/src/rda_python_metrics/fillrdadb.py +0 -0
  40. {rda_python_metrics-1.0.17 → rda_python_metrics-1.0.18}/src/rda_python_metrics/fillrdadb.usg +0 -0
  41. {rda_python_metrics-1.0.17 → rda_python_metrics-1.0.18}/src/rda_python_metrics/filltdsusage.usg +0 -0
  42. {rda_python_metrics-1.0.17 → rda_python_metrics-1.0.18}/src/rda_python_metrics/filluser.py +0 -0
  43. {rda_python_metrics-1.0.17 → rda_python_metrics-1.0.18}/src/rda_python_metrics/filluser.usg +0 -0
  44. {rda_python_metrics-1.0.17 → rda_python_metrics-1.0.18}/src/rda_python_metrics/logarch.py +0 -0
  45. {rda_python_metrics-1.0.17 → rda_python_metrics-1.0.18}/src/rda_python_metrics/logarch.usg +0 -0
  46. {rda_python_metrics-1.0.17 → rda_python_metrics-1.0.18}/src/rda_python_metrics/pgperson.py +0 -0
  47. {rda_python_metrics-1.0.17 → rda_python_metrics-1.0.18}/src/rda_python_metrics/pgsyspath.py +0 -0
  48. {rda_python_metrics-1.0.17 → rda_python_metrics-1.0.18}/src/rda_python_metrics/pgusername.py +0 -0
  49. {rda_python_metrics-1.0.17 → rda_python_metrics-1.0.18}/src/rda_python_metrics/viewcheckusage.py +0 -0
  50. {rda_python_metrics-1.0.17 → rda_python_metrics-1.0.18}/src/rda_python_metrics/viewcheckusage.usg +0 -0
  51. {rda_python_metrics-1.0.17 → rda_python_metrics-1.0.18}/src/rda_python_metrics/viewrqstusage.py +0 -0
  52. {rda_python_metrics-1.0.17 → rda_python_metrics-1.0.18}/src/rda_python_metrics/viewrqstusage.usg +0 -0
  53. {rda_python_metrics-1.0.17 → rda_python_metrics-1.0.18}/src/rda_python_metrics/viewwebfile.py +0 -0
  54. {rda_python_metrics-1.0.17 → rda_python_metrics-1.0.18}/src/rda_python_metrics/viewwebfile.usg +0 -0
  55. {rda_python_metrics-1.0.17 → rda_python_metrics-1.0.18}/src/rda_python_metrics.egg-info/SOURCES.txt +0 -0
  56. {rda_python_metrics-1.0.17 → rda_python_metrics-1.0.18}/src/rda_python_metrics.egg-info/dependency_links.txt +0 -0
  57. {rda_python_metrics-1.0.17 → rda_python_metrics-1.0.18}/src/rda_python_metrics.egg-info/entry_points.txt +0 -0
  58. {rda_python_metrics-1.0.17 → rda_python_metrics-1.0.18}/src/rda_python_metrics.egg-info/top_level.txt +0 -0
  59. {rda_python_metrics-1.0.17 → rda_python_metrics-1.0.18}/tests/test_metrics.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: rda_python_metrics
3
- Version: 1.0.17
3
+ Version: 1.0.18
4
4
  Summary: RDA Python Package to gather and view data usage metrics
5
5
  Author-email: Zaihua Ji <zji@ucar.edu>
6
6
  Project-URL: Homepage, https://github.com/NCAR/rda-python-metrics
@@ -16,6 +16,7 @@ Requires-Dist: rda_python_setuid
16
16
  Requires-Dist: geoip2
17
17
  Requires-Dist: ipinfo
18
18
  Requires-Dist: httplib2
19
+ Requires-Dist: dnspython
19
20
  Dynamic: license-file
20
21
 
21
22
  RDA Python Package to gather and view data usage metrics.
@@ -6,7 +6,7 @@ build-backend = "setuptools.build_meta"
6
6
 
7
7
  [project]
8
8
  name = "rda_python_metrics"
9
- version = "1.0.17"
9
+ version = "1.0.18"
10
10
  authors = [
11
11
  { name="Zaihua Ji", email="zji@ucar.edu" },
12
12
  ]
@@ -24,7 +24,8 @@ dependencies = [
24
24
  "rda_python_setuid",
25
25
  "geoip2",
26
26
  "ipinfo",
27
- "httplib2"
27
+ "httplib2",
28
+ "dnspython"
28
29
  ]
29
30
 
30
31
  [tool.pytest.ini_options]
@@ -16,6 +16,8 @@
16
16
  import geoip2.database as geodb
17
17
  import ipinfo
18
18
  import socket
19
+ import dns.resolver
20
+ import json
19
21
  from rda_python_common import PgLOG
20
22
  from rda_python_common import PgDBI
21
23
  from rda_python_common import PgUtil
@@ -28,11 +30,44 @@ IPINFO = {
28
30
  'IPADD' : 0
29
31
  }
30
32
 
33
+ IPDNS = None
31
34
  IPDB = None
32
35
  G2DB = None
33
36
  IPRECS = {}
34
37
  COUNTRIES = {}
35
38
 
39
+ #
40
+ # get save a global dns.resolver.Resolver object
41
+ #
42
+ def get_dns_resolver(forceget = False):
43
+
44
+ global IPDNS
45
+
46
+ if forceget or not IPDNS: IPDNS = dns.resolver.Resolver()
47
+
48
+ return IPDNS
49
+
50
+ #
51
+ # Resolve a domain name to an IP address (A record)
52
+ #
53
+ def dns_to_ip(dmname, type = 'A'):
54
+
55
+ ipdns = get_dns_resolver()
56
+
57
+ result = []
58
+
59
+ try:
60
+ answers = ipdns.resolve(dmname, type)
61
+ return [str(rdata) for rdata in answers]
62
+ except dns.resolver.NXDOMAIN:
63
+ PgLOG.pglog(f"{dmname}: the domain name does not exist", PgLOG.LOGERR)
64
+ except dns.resolver.Timeout:
65
+ PgLOG.pglog(f"{dmname}: the domain name request timed out", PgLOG.LOGERR)
66
+ except dns.exception.DNSException as e:
67
+ PgLOG.pglog(f"{dmname}: error domain name request: {e}", PgLOG.LOGERR)
68
+
69
+ return None
70
+
36
71
  #
37
72
  # Get country token name for given two-character domain id
38
73
  #
@@ -59,6 +94,17 @@ def set_ipinfo_database():
59
94
  except Exception as e:
60
95
  PgLOG.pglog('ipinfo: ' + str(e), PgLOG.LGEREX)
61
96
 
97
+ #
98
+ # get a ipinfo record for given domain
99
+ #
100
+ def domain_ipinfo_record(dmname):
101
+
102
+ ips = dns_to_ip(dmname)
103
+
104
+ if ips: return set_ipinfo(ips[0])
105
+
106
+ return None
107
+
62
108
  #
63
109
  # get a ipinfo record for given ip address
64
110
  #
@@ -82,9 +128,11 @@ def get_ipinfo_record(ip):
82
128
  record['lon'] = float(iprec['longitude']) if iprec['longitude'] else 0
83
129
  if 'org' in iprec: record['org_name'] = iprec['org']
84
130
  record['country'] = get_country_record_code(iprec, 'country_name')
131
+ if 'region' in iprec: record['region'] = PgLOG.convert_chars(iprec['region'])
85
132
  if 'city' in iprec: record['city'] = PgLOG.convert_chars(iprec['city'])
86
133
  if 'postal' in iprec: record['postal'] = iprec['postal']
87
134
  record['timezone'] = iprec['timezone']
135
+ record['ipinfo'] = json.dumps(iprec)
88
136
 
89
137
  return record
90
138
 
@@ -113,10 +161,12 @@ def get_geoip2_record(ip):
113
161
  record['lon'] = float(city.location.longitude) if city.location.longitude else 0
114
162
  record['country'] = get_country_name_code(city.country.name)
115
163
  record['city'] = PgLOG.convert_chars(city.city.name)
164
+ if city.subdivisions.most_specific.name: record['region'] = PgLOG.convert_chars(city.subdivisions.most_specific.name)
116
165
  record['postal'] = city.postal.code
117
166
  record['timezone'] = city.location.time_zone
118
167
  record['hostname'] = ip
119
168
  record['org_type'] = '-'
169
+ record['ipinfo'] = json.dumps(city.__dict__)
120
170
 
121
171
  try:
122
172
  hostrec = socket.gethostbyaddr(ip)
@@ -160,7 +210,7 @@ def update_ipinfo_record(record, pgrec = None):
160
210
  # set ip info into table ipinfo from python module ipinfo
161
211
  # if ipopt is True; otherwise, use module geoip2
162
212
  #
163
- def set_ipinfo(ip, ipopt = False):
213
+ def set_ipinfo(ip, ipopt = True):
164
214
 
165
215
  if ip in IPRECS:
166
216
  pgrec = IPRECS[ip]
@@ -169,8 +219,8 @@ def set_ipinfo(ip, ipopt = False):
169
219
  pgrec = PgDBI.pgget('ipinfo', '*', "ip = '{}'".format(ip))
170
220
 
171
221
  if not pgrec or ipopt and pgrec['stat_flag'] == 'M':
172
- record = None if ipopt else get_geoip2_record(ip)
173
- if not (record and 'hostname' in record): record = get_ipinfo_record(ip)
222
+ record = get_ipinfo_record(ip) if ipopt else None
223
+ if not record: record = get_geoip2_record(ip)
174
224
  if record and update_ipinfo_record(record, pgrec): pgrec = record
175
225
 
176
226
  IPRECS[ip] = pgrec
@@ -186,3 +236,34 @@ def get_update_record(nrec, orec):
186
236
  if nrec[fld] != orec[fld]:
187
237
  record[fld] = nrec[fld]
188
238
  return record
239
+
240
+ # return wuser record upon success, None otherwise
241
+ def get_wuser_record(ip, date):
242
+
243
+ ipinfo = set_ipinfo(ip)
244
+ if not ipinfo: return None
245
+
246
+ record = {'org_type' : ipinfo['org_type'],
247
+ 'country' : ipinfo['country'],
248
+ 'region' : ipinfo['region']}
249
+ email = 'unknown@' + ipinfo['hostname']
250
+ emcond = "email = '{}'".format(email)
251
+ flds = 'wuid, email, org_type, country, region, start_date'
252
+ pgrec = PgDBI.pgget("wuser", flds, emcond, PgLOG.LOGERR)
253
+ if pgrec:
254
+ if PgUtil.diffdate(pgrec['start_date'], date) > 0:
255
+ pgrec['start_date'] = record['start_date'] = date
256
+ PgDBI.pgupdt('wuser', record, emcond)
257
+ return pgrec
258
+
259
+ # now add one in
260
+ record['email'] = email
261
+ record['stat_flag'] = 'A'
262
+ record['start_date'] = date
263
+ wuid = PgDBI.pgadd("wuser", record, PgLOG.LOGERR|PgLOG.AUTOID)
264
+ if wuid:
265
+ record['wuid'] = wuid
266
+ PgLOG.pglog("{} Added as wuid({})".format(email, wuid), PgLOG.LGWNEM)
267
+ return record
268
+
269
+ return None
@@ -201,7 +201,7 @@ def add_file_usage(year, logrec):
201
201
  cond = "wid = {} AND method = '{}' AND date_read = '{}' AND time_read = '{}'".format(pgrec['wid'], logrec['method'], logrec['date'], logrec['time'])
202
202
  if PgDBI.pgget(table, "", cond, PgLOG.LOGWRN): return 0
203
203
 
204
- wurec = get_wuser_record(logrec['ip'], logrec['date'])
204
+ wurec = PgIPInfo.get_wuser_record(logrec['ip'], logrec['date'])
205
205
  if not wurec: return 0
206
206
  record = {'wid' : pgrec['wid'], 'dsid' : pgrec['dsid']}
207
207
  record['wuid_read'] = wurec['wuid']
@@ -220,7 +220,8 @@ def add_file_usage(year, logrec):
220
220
 
221
221
  def add_to_allusage(year, logrec, wurec):
222
222
 
223
- pgrec = {'email' : wurec['email'], 'org_type' : wurec['org_type'], 'country' : wurec['country']}
223
+ pgrec = {'email' : wurec['email'], 'org_type' : wurec['org_type'],
224
+ 'country' : wurec['country'], 'region' : wurec['region']}
224
225
  pgrec['dsid'] = logrec['dsid']
225
226
  pgrec['date'] = logrec['date']
226
227
  pgrec['quarter'] = logrec['quarter']
@@ -247,35 +248,6 @@ def get_wfile_wid(dsid, wfile):
247
248
 
248
249
  return pgrec
249
250
 
250
- # return wuser record upon success, None otherwise
251
- def get_wuser_record(ip, date):
252
-
253
- ipinfo = PgIPInfo.set_ipinfo(ip)
254
- if not ipinfo: return None
255
-
256
- record = {'org_type' : ipinfo['org_type'], 'country' : ipinfo['country']}
257
- email = 'unknown@' + ipinfo['hostname']
258
- emcond = "email = '{}'".format(email)
259
- flds = 'wuid, email, org_type, country, start_date'
260
- pgrec = PgDBI.pgget("wuser", flds, emcond, PgLOG.LOGERR)
261
- if pgrec:
262
- if PgUtil.diffdate(pgrec['start_date'], date) > 0:
263
- pgrec['start_date'] = record['start_date'] = date
264
- PgDBI.pgupdt('wuser', record, emcond)
265
- return pgrec
266
-
267
- # now add one in
268
- record['email'] = email
269
- record['stat_flag'] = 'A'
270
- record['start_date'] = date
271
- wuid = PgDBI.pgadd("wuser", record, PgLOG.LOGERR|PgLOG.AUTOID)
272
- if wuid:
273
- record['wuid'] = wuid
274
- PgLOG.pglog("{} Added as wuid({})".format(email, wuid), PgLOG.LGWNEM)
275
- return record
276
-
277
- return None
278
-
279
251
  #
280
252
  # call main() to start program
281
253
  #
@@ -52,7 +52,21 @@ DSIDS = {
52
52
  'ucar.cgd.ccsm4.SD-WACCM-X_v2.1' : ['d651034'],
53
53
  'ucar.cgd.ccsm4.amv_lens' : ['d651035'],
54
54
  'ucar.cgd.cesm2.cism_ismip6' : ['d651036'],
55
- 'ucar.cgd.ccsm4.pliomip2' : ['d651037']
55
+ 'ucar.cgd.ccsm4.pliomip2' : ['d651037'],
56
+ # new added
57
+ 'ucar.cgd.cesm2-waccm.s2s_hindcasts': ['d651040'],
58
+ 'ucar.cgd.CESM1.3_SH_storm_tracks': ['d651044'],
59
+ 'ucar.cgd.cesm2.waccm6.ssp245': ['d651045'],
60
+ 'ucar.cgd.cesm2.CESM21-CISM2-JG-BG': ['d651046'],
61
+ 'ucar.cgd.ccsm4.TC-CESM': ['d651047'],
62
+ 'ucar.cgd.cesm2.ISSI_OSSE': ['d651048'],
63
+ 'ucar.cgd.ccsm4.SOcean_Eddies_mclong': ['d651049'],
64
+ 'trace': ['d651050'],
65
+ 'ucar.cgd.cesm2.waccm.solar': ['d651051'],
66
+ 'ucar.cgd.ccsm4.CESM1-CCSM4_mid-Pliocene' : ['d651042'],
67
+ 'ucar.cgd.ccsm4.PaleoIF' : ['d651052'],
68
+ 'ucar.cgd.ccsm4.b.e11.B20LE_fixedO3' : ['d651053'],
69
+ 'ucar.cgd.cesm2.single.forcing.large.ensemble' : ['d651055']
56
70
  }
57
71
 
58
72
  ALLIDS = list(DSIDS.keys())
@@ -22,6 +22,7 @@ from rda_python_common import PgLOG
22
22
  from rda_python_common import PgUtil
23
23
  from rda_python_common import PgFile
24
24
  from rda_python_common import PgDBI
25
+ from . import PgIPInfo
25
26
 
26
27
  # the define options for gathering COD data usage, one at a time
27
28
  MONTH = 0x02 # fet COD data usages for given months
@@ -182,14 +183,20 @@ def add_usage_records(records, date):
182
183
  if PgDBI.pgget(USAGE['PGTBL'], '', "aid = '{}' AND date = '{}'".format(aid, date), PgLOG.LGEREX): continue
183
184
  record = records[aid]
184
185
  if record['email'] == '-':
185
- record['org_type'] = record['country'] = '-'
186
+ wurec = PgIPInfo.get_wuser_record(record['ip'], date)
187
+ if not wurec: continue
188
+ record['org_type'] = wurec['org_type']
189
+ record['country'] = wurec['country']
190
+ record['region'] = wurec['region']
191
+ record['email'] = 'unknown@' + wurec['hostname']
186
192
  else:
187
193
  wuid = PgDBI.check_wuser_wuid(record['email'], date)
188
- if not wuid: next
189
- pgrec = PgDBI.pgget("wuser", "org_type, country", "wuid = {}".format(wuid), PgLOG.LGWNEX)
194
+ if not wuid: continue
195
+ pgrec = PgDBI.pgget("wuser", "org_type, country, region", "wuid = {}".format(wuid), PgLOG.LGWNEX)
190
196
  if not pgrec: continue
191
197
  record['org_type'] = pgrec['org_type']
192
198
  record['country'] = pgrec['country']
199
+ record['region'] = pgrec['region']
193
200
 
194
201
  record['date'] = date
195
202
  record['time'] = USERS[aid]['btime']
@@ -203,7 +203,7 @@ def add_file_usage(year, logrec):
203
203
  cond = "wid = {} AND method = '{}' AND date_read = '{}' AND time_read = '{}'".format(pgrec['wid'], logrec['method'], logrec['date'], logrec['time'])
204
204
  if PgDBI.pgget(table, "", cond, PgLOG.LOGWRN): return 0
205
205
 
206
- wurec = get_wuser_record(logrec['ip'], logrec['date'])
206
+ wurec = PgIPInfo.get_wuser_record(logrec['ip'], logrec['date'])
207
207
  if not wurec: return 0
208
208
  record = {'wid' : pgrec['wid'], 'dsid' : pgrec['dsid']}
209
209
  record['wuid_read'] = wurec['wuid']
@@ -222,7 +222,8 @@ def add_file_usage(year, logrec):
222
222
 
223
223
  def add_to_allusage(year, logrec, wurec):
224
224
 
225
- pgrec = {'email' : wurec['email'], 'org_type' : wurec['org_type'], 'country' : wurec['country']}
225
+ pgrec = {'email' : wurec['email'], 'org_type' : wurec['org_type'],
226
+ 'country' : wurec['country'], 'region' : wurec['region']}
226
227
  pgrec['dsid'] = logrec['dsid']
227
228
  pgrec['date'] = logrec['date']
228
229
  pgrec['quarter'] = logrec['quarter']
@@ -252,35 +253,6 @@ def get_wfile_wid(dsid, wfile):
252
253
 
253
254
  return pgrec
254
255
 
255
- # return wuser record upon success, None otherwise
256
- def get_wuser_record(ip, date):
257
-
258
- ipinfo = PgIPInfo.set_ipinfo(ip)
259
- if not ipinfo: return None
260
-
261
- record = {'org_type' : ipinfo['org_type'], 'country' : ipinfo['country']}
262
- email = 'unknown@' + ipinfo['hostname']
263
- emcond = "email = '{}'".format(email)
264
- flds = 'wuid, email, org_type, country, start_date'
265
- pgrec = PgDBI.pgget("wuser", flds, emcond, PgLOG.LOGERR)
266
- if pgrec:
267
- if PgUtil.diffdate(pgrec['start_date'], date) > 0:
268
- pgrec['start_date'] = record['start_date'] = date
269
- PgDBI.pgupdt('wuser', record, emcond)
270
- return pgrec
271
-
272
- # now add one in
273
- record['email'] = email
274
- record['stat_flag'] = 'A'
275
- record['start_date'] = date
276
- wuid = PgDBI.pgadd("wuser", record, PgLOG.LOGERR|PgLOG.AUTOID)
277
- if wuid:
278
- record['wuid'] = wuid
279
- PgLOG.pglog("{} Added as wuid({})".format(email, wuid), PgLOG.LGWNEM)
280
- return record
281
-
282
- return None
283
-
284
256
  #
285
257
  # call main() to start program
286
258
  #
@@ -32,7 +32,7 @@ MULTI = (MONTH|YEARS)
32
32
  SINGL = (NDAYS)
33
33
 
34
34
  IPINFO = {
35
- 'USGTBL' : ['ipinfo', 'allusage', 'tdsusage'],
35
+ 'USGTBL' : ['ipinfo', 'wuser', 'allusage', 'codusage', 'tdsusage'],
36
36
  'CDATE' : PgUtil.curdate(),
37
37
  }
38
38
 
@@ -120,20 +120,14 @@ def fix_allusage_records(date):
120
120
  ms = re.match(r'^(\d+)-', date)
121
121
  year = ms.group(1)
122
122
  table = 'allusage_' + year
123
- cond = "date = '{}' and org_type = '-'".format(date)
123
+ cond = "date = '{}' AND region IS NULL".format(date)
124
124
  pgrecs = PgDBI.pgmget(table, 'aidx, email, ip', cond, PgLOG.LGEREX)
125
125
  if not pgrecs: return 0
126
126
  cnt = len(pgrecs['ip']) if pgrecs else 0
127
127
  mcnt = 0
128
128
  for i in range(cnt):
129
- ip = pgrecs['ip'][i]
130
- email = pgrecs['email'][i]
131
- ipinfo = PgIPInfo.set_ipinfo(ip)
132
- if ipinfo:
133
- record = {'org_type' : ipinfo['org_type'],
134
- 'country' : ipinfo['country']}
135
- if not email or re.search(r'-$', email):
136
- record['email'] = 'unknown@' + ipinfo['hostname']
129
+ record = get_missing_info(pgrecs['ip'][i], pgrecs['email'][i])
130
+ if record:
137
131
  mcnt += PgDBI.pgupdt(table, record, "aidx = '{}'".format(pgrecs['aidx'][i]))
138
132
 
139
133
  s = 's' if cnt > 1 else ''
@@ -144,17 +138,16 @@ def fix_allusage_records(date):
144
138
  def fix_tdsusage_records(date):
145
139
 
146
140
  table = 'tdsusage'
147
- cond = "date = '{}' and org_type = '-'".format(date)
148
- pgrecs = PgDBI.pgmget(table, 'time, ip', cond, PgLOG.LGEREX)
141
+ cond = "date = '{}' AND region IS NULL".format(date)
142
+ pgrecs = PgDBI.pgmget(table, 'time, email, ip', cond, PgLOG.LGEREX)
149
143
  if not pgrecs: return 0
150
144
  cnt = len(pgrecs['ip']) if pgrecs else 0
151
145
  mcnt = 0
152
146
  for i in range(cnt):
153
- ipinfo = PgIPInfo.set_ipinfo(pgrecs['ip'][i])
154
- if ipinfo:
155
- record = {'org_type' : ipinfo['org_type'],
156
- 'country' : ipinfo['country']}
157
- cond = "date = '{}' AND time = '{}' AND ip = '{}'".format(date, pgrecs['time'][i], pgrecs['ip'][i])
147
+ ip = pgrecs['ip'][i]
148
+ record = get_missing_info(ip, pgrecs['email'][i])
149
+ if record:
150
+ cond = "date = '{}' AND time = '{}' AND ip = '{}'".format(date, pgrecs['time'][i], ip)
158
151
  mcnt += PgDBI.pgupdt(table, record, cond)
159
152
 
160
153
  s = 's' if cnt > 1 else ''
@@ -162,16 +155,57 @@ def fix_tdsusage_records(date):
162
155
 
163
156
  return mcnt
164
157
 
158
+ def fix_codusage_records(date):
159
+
160
+ table = 'codusage'
161
+ cond = "date = '{}' AND region IS NULL".format(date)
162
+ pgrecs = PgDBI.pgmget(table, 'codidx, email, ip', cond, PgLOG.LGEREX)
163
+ if not pgrecs: return 0
164
+ cnt = len(pgrecs['ip']) if pgrecs else 0
165
+ mcnt = 0
166
+ for i in range(cnt):
167
+ record = get_missing_info(pgrecs['ip'][i], pgrecs['email'][i])
168
+ if record:
169
+ mcnt += PgDBI.pgupdt(table, record, "codidx = '{}'".format(pgrecs['codidx'][i]))
170
+
171
+ s = 's' if cnt > 1 else ''
172
+ PgLOG.pglog("{}: {} of {} record{} updated for {}".format(table, mcnt, cnt, s, date), PgLOG.LOGWRN)
173
+
174
+ return mcnt
175
+
176
+ def fix_wuser_records(date):
177
+
178
+ table = 'wuser'
179
+ cond = "start_date = '{}' AND region IS NULL".format(date)
180
+ pgrecs = PgDBI.pgmget(table, 'wuid, email, ip', cond, PgLOG.LGEREX)
181
+ if not pgrecs: return 0
182
+ cnt = len(pgrecs['ip']) if pgrecs else 0
183
+ mcnt = 0
184
+ for i in range(cnt):
185
+ ip = pgrecs['ip'][i]
186
+ email = pgrecs['email'][i]
187
+ if not ip:
188
+ if email and '@' in email: ip = PgIPInfo.dns_to_ip(email.split('@')[1])
189
+ if not ip: continue
190
+ record = get_missing_info(ip, email)
191
+ if record:
192
+ mcnt += PgDBI.pgupdt(table, record, "wuid = '{}'".format(pgrecs['wuid'][i]))
193
+
194
+ s = 's' if cnt > 1 else ''
195
+ PgLOG.pglog("{}: {} of {} record{} updated for {}".format(table, mcnt, cnt, s, date), PgLOG.LOGWRN)
196
+
197
+ return mcnt
198
+
165
199
  def fix_ipinfo_records(date):
166
200
 
167
201
  table = 'ipinfo'
168
- cond = "stat_flag = 'M' and date = '{}'".format(date)
202
+ cond = "date = '{}' AND region IS NULL".format(date)
169
203
  pgrecs = PgDBI.pgmget(table, 'ip', cond, PgLOG.LGEREX)
170
204
  if not pgrecs: return 0
171
205
  cnt = len(pgrecs['ip']) if pgrecs else 0
172
206
  mcnt = 0
173
207
  for i in range(cnt):
174
- PgIPInfo.set_ipinfo(pgrecs['ip'][i], True)
208
+ PgIPInfo.set_ipinfo(pgrecs['ip'][i])
175
209
 
176
210
  mcnt = PgIPInfo.IPINFO['IPUPDT']
177
211
  s = 's' if cnt > 1 else ''
@@ -179,6 +213,22 @@ def fix_ipinfo_records(date):
179
213
 
180
214
  return mcnt
181
215
 
216
+ #
217
+ # fill the missing info for given ip
218
+ #
219
+ def get_missing_info(ip, email):
220
+
221
+ ipinfo = PgIPInfo.set_ipinfo(ip)
222
+ if ipinfo:
223
+ record = {'org_type' : ipinfo['org_type'],
224
+ 'country' : ipinfo['country'],
225
+ 'region' : ipinfo['region']}
226
+ if not email or re.search(r'-$', email):
227
+ record['email'] = 'unknown@' + ipinfo['hostname']
228
+ return record
229
+ else:
230
+ return None
231
+
182
232
  #
183
233
  # call main() to start program
184
234
  #
@@ -7,7 +7,7 @@
7
7
 
8
8
  - Option -b, log process information into logfile only;
9
9
 
10
- - Option -t, table name, ipinfo, allusage, globususage, or tdsusage,
10
+ - Option -t, table name, ipinfo, wuser, allusage, codusage or tdsusage,
11
11
  to fix IP related information, such organization names/types,
12
12
  emails and country names;
13
13
 
@@ -87,7 +87,7 @@ def add_one_order(params):
87
87
 
88
88
  def add_to_allusage(record, year, ctime):
89
89
 
90
- pgrec = PgDBI.pgget("wuser", "email, org_type, country",
90
+ pgrec = PgDBI.pgget("wuser", "email, org_type, country, region",
91
91
  "wuid = {}".format(record['wuid_request']), PgLOG.LGWNEX)
92
92
  if pgrec:
93
93
  pgrec['dsid'] = record['dsid']
@@ -181,7 +181,7 @@ def add_file_usage(year, logrec):
181
181
  cond = "wid = {} AND method = '{}' AND date_read = '{}' AND time_read = '{}'".format(pgrec['wid'], logrec['method'], logrec['date'], logrec['time'])
182
182
  if PgDBI.pgget(table, "", cond, PgLOG.LOGWRN): return 0
183
183
 
184
- wurec = get_wuser_record(logrec['ip'], logrec['date'])
184
+ wurec = PgIPInfo.get_wuser_record(logrec['ip'], logrec['date'])
185
185
  if not wurec: return 0
186
186
  record = {'wid' : pgrec['wid'], 'dsid' : pgrec['dsid']}
187
187
  record['wuid_read'] = wurec['wuid']
@@ -200,7 +200,8 @@ def add_file_usage(year, logrec):
200
200
 
201
201
  def add_to_allusage(year, logrec, wurec):
202
202
 
203
- pgrec = {'email' : wurec['email'], 'org_type' : wurec['org_type'], 'country' : wurec['country']}
203
+ pgrec = {'email' : wurec['email'], 'org_type' : wurec['org_type'],
204
+ 'country' : wurec['country'], 'region' : wurec['region']}
204
205
  pgrec['dsid'] = logrec['dsid']
205
206
  pgrec['date'] = logrec['date']
206
207
  pgrec['quarter'] = logrec['quarter']
@@ -230,35 +231,6 @@ def get_wfile_wid(dsid, wfile):
230
231
 
231
232
  return pgrec
232
233
 
233
- # return wuser record upon success, None otherwise
234
- def get_wuser_record(ip, date):
235
-
236
- ipinfo = PgIPInfo.set_ipinfo(ip)
237
- if not ipinfo: return None
238
-
239
- record = {'org_type' : ipinfo['org_type'], 'country' : ipinfo['country']}
240
- email = 'unknown@' + ipinfo['hostname']
241
- emcond = "email = '{}'".format(email)
242
- flds = 'wuid, email, org_type, country, start_date'
243
- pgrec = PgDBI.pgget("wuser", flds, emcond, PgLOG.LOGERR)
244
- if pgrec:
245
- if PgUtil.diffdate(pgrec['start_date'], date) > 0:
246
- pgrec['start_date'] = record['start_date'] = date
247
- PgDBI.pgupdt('wuser', record, emcond)
248
- return pgrec
249
-
250
- # now add one in
251
- record['email'] = email
252
- record['stat_flag'] = 'A'
253
- record['start_date'] = date
254
- wuid = PgDBI.pgadd("wuser", record, PgLOG.LOGERR|PgLOG.AUTOID)
255
- if wuid:
256
- record['wuid'] = wuid
257
- PgLOG.pglog("{} Added as wuid({})".format(email, wuid), PgLOG.LGWNEM)
258
- return record
259
-
260
- return None
261
-
262
234
  #
263
235
  # call main() to start program
264
236
  #
@@ -187,19 +187,20 @@ def add_usage_records(records, date):
187
187
  cond = "date = '{}' AND time = '{}' AND ip = '{}'".format(date, record['time'], record['ip'])
188
188
  if PgDBI.pgget(USAGE['PGTBL'], '', cond, PgLOG.LGEREX): continue
189
189
  if record['email'] == '-':
190
- record['org_type'] = record['country'] = '-'
191
- ipinfo = PgIPInfo.set_ipinfo(record['ip'])
192
- if ipinfo:
193
- record['org_type'] = ipinfo['org_type']
194
- record['country'] = ipinfo['country']
195
- record['email'] = 'unknown@' + ipinfo['hostname']
190
+ wurec = PgIPInfo.get_wuser_record(record['ip'], date)
191
+ if not wurec: continue
192
+ record['org_type'] = wurec['org_type']
193
+ record['country'] = wurec['country']
194
+ record['region'] = wurec['region']
195
+ record['email'] = 'unknown@' + wurec['hostname']
196
196
  else:
197
197
  wuid = PgDBI.check_wuser_wuid(record['email'], date)
198
198
  if not wuid: continue
199
- pgrec = PgDBI.pgget("wuser", "org_type, country", "wuid = {}".format(wuid), PgLOG.LGWNEX)
199
+ pgrec = PgDBI.pgget("wuser", "org_type, country, region", "wuid = {}".format(wuid), PgLOG.LGWNEX)
200
200
  if not pgrec: continue
201
201
  record['org_type'] = pgrec['org_type']
202
202
  record['country'] = pgrec['country']
203
+ record['region'] = pgrec['region']
203
204
 
204
205
  record['quarter'] = quarter
205
206
  record['date'] = date