rda-python-metrics 1.0.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of rda-python-metrics might be problematic. Click here for more details.

Files changed (47) hide show
  1. rda_python_metrics/PgIPInfo.py +188 -0
  2. rda_python_metrics/PgView.py +782 -0
  3. rda_python_metrics/__init__.py +1 -0
  4. rda_python_metrics/fillawsusage.py +282 -0
  5. rda_python_metrics/fillawsusage.usg +17 -0
  6. rda_python_metrics/fillcodusage.py +247 -0
  7. rda_python_metrics/fillcodusage.usg +21 -0
  8. rda_python_metrics/fillcountry.py +79 -0
  9. rda_python_metrics/fillendtime.py +93 -0
  10. rda_python_metrics/fillglobususage.py +287 -0
  11. rda_python_metrics/fillglobususage.usg +17 -0
  12. rda_python_metrics/fillipinfo.py +185 -0
  13. rda_python_metrics/fillipinfo.usg +18 -0
  14. rda_python_metrics/filloneorder.py +155 -0
  15. rda_python_metrics/filloneorder.usg +41 -0
  16. rda_python_metrics/fillrdadb.py +151 -0
  17. rda_python_metrics/fillrdadb.usg +32 -0
  18. rda_python_metrics/filltdsusage.py +289 -0
  19. rda_python_metrics/filltdsusage.usg +17 -0
  20. rda_python_metrics/filluser.py +216 -0
  21. rda_python_metrics/filluser.usg +16 -0
  22. rda_python_metrics/logarch.py +359 -0
  23. rda_python_metrics/logarch.usg +27 -0
  24. rda_python_metrics/pgperson.py +72 -0
  25. rda_python_metrics/pgusername.py +50 -0
  26. rda_python_metrics/viewallusage.py +350 -0
  27. rda_python_metrics/viewallusage.usg +198 -0
  28. rda_python_metrics/viewcheckusage.py +289 -0
  29. rda_python_metrics/viewcheckusage.usg +185 -0
  30. rda_python_metrics/viewcodusage.py +314 -0
  31. rda_python_metrics/viewcodusage.usg +184 -0
  32. rda_python_metrics/viewordusage.py +340 -0
  33. rda_python_metrics/viewordusage.usg +224 -0
  34. rda_python_metrics/viewrqstusage.py +362 -0
  35. rda_python_metrics/viewrqstusage.usg +217 -0
  36. rda_python_metrics/viewtdsusage.py +323 -0
  37. rda_python_metrics/viewtdsusage.usg +191 -0
  38. rda_python_metrics/viewwebfile.py +294 -0
  39. rda_python_metrics/viewwebfile.usg +212 -0
  40. rda_python_metrics/viewwebusage.py +371 -0
  41. rda_python_metrics/viewwebusage.usg +211 -0
  42. rda_python_metrics-1.0.4.dist-info/METADATA +18 -0
  43. rda_python_metrics-1.0.4.dist-info/RECORD +47 -0
  44. rda_python_metrics-1.0.4.dist-info/WHEEL +5 -0
  45. rda_python_metrics-1.0.4.dist-info/entry_points.txt +22 -0
  46. rda_python_metrics-1.0.4.dist-info/licenses/LICENSE +21 -0
  47. rda_python_metrics-1.0.4.dist-info/top_level.txt +1 -0
@@ -0,0 +1,93 @@
1
+ #!/usr/bin/env python3
2
+ #
3
+ ###############################################################################
4
+ #
5
+ # Title : fillendtime
6
+ # Author : Zaihua Ji, zji@ucar.edu
7
+ # Date : 04/08/2024
8
+ # 2025-03-26 transferred to package rda_python_metrics from
9
+ # https://github.com/NCAR/rda-database.git
10
+ # Purpose : python program to fill field dlupdt.endtime from enddate/endhour
11
+ #
12
+ # Github : https://github.com/NCAR/rda-python-metrics.git
13
+ #
14
+ ###############################################################################
15
+ #
16
+ import sys
17
+ import re
18
+ import glob
19
+ from os import path as op
20
+ from rda_python_common import PgLOG
21
+ from rda_python_common import PgUtil
22
+ from rda_python_common import PgFile
23
+ from rda_python_common import PgDBI
24
+ from . import PgIPInfo
25
+
26
+ # the define options for gathering ipinfo data
27
+ MONTH = 0x02 # fix data usages for given months
28
+ YEARS = 0x04 # fix data usages for given years
29
+ NDAYS = 0x08 # fix data usages in recent number of days
30
+ MULTI = (MONTH|YEARS)
31
+ SINGL = (NDAYS)
32
+
33
+ IPINFO = {
34
+ 'USGTBL' : ['ipinfo', 'allusage', 'tdsusage'],
35
+ 'CDATE' : PgUtil.curdate(),
36
+ }
37
+
38
+ #
39
+ # main function to run this program
40
+ #
41
+ def main():
42
+
43
+ dsids = [] # empty for all datasets
44
+ argv = sys.argv[1:]
45
+ option = 0
46
+
47
+ for arg in argv:
48
+ if arg == "-b":
49
+ PgLOG.PGLOG['BCKGRND'] = 1
50
+ else:
51
+ dsids.append(PgUtil.format_dataset_id(arg))
52
+
53
+ PgDBI.dssdb_dbname()
54
+
55
+ if dsids:
56
+ for dsid in dsids:
57
+ fill_endtime(dsid)
58
+ else:
59
+ fill_endtime()
60
+ sys.exit(0)
61
+
62
+
63
+
64
+ #
65
+ # Fill endtime in table dssdb.dlupdt
66
+ #
67
+ def fill_endtime(dsid = None):
68
+
69
+ dsids = []
70
+ cnd = "dsid = '{}' AND ".format(dsid) if dsid else ''
71
+ cnd += 'enddate <> NULL ORDER BY dsid, lindex'
72
+ pgrecs = PgDBI.pgmget('dlupdt', 'lindex, dsid, enddate, endhour', cnd)
73
+
74
+ cnt = len(pgrecs['lindex']) if pgrecs else 0
75
+ for i in range(cnt):
76
+ lidx = pgrecs['lindex'][i]
77
+ edate = pgrecs['enddate'][i]
78
+ ehour = pgrecs['endhour'][i]
79
+ dsid = pgrecs['dsid'][i]
80
+ if dsid not in dsids: dsids.append()
81
+ if ehour is None: ehour = 23
82
+ etime = "{} {}:59:59".format(edate, ehour)
83
+ PgDBI.pgexec("UPDATE dlupdt SET endtime = '{}' WHERE lindex = {}".format(etime, lidx), PgLOG.LGEREX)
84
+
85
+ s = 's' if cnt > 1 else ''
86
+ dscnt = len(dsids)
87
+ dsstr = dsids[0] if dscnt == 1 else '{} datasets'.format(dscnt)
88
+ PgLOG.pglog("{}: {} records updated for dssdb.dlupdt.endtime".format(dsstr, cnt, s), PgLOG.LOGWRN)
89
+
90
+ #
91
+ # call main() to start program
92
+ #
93
+ if __name__ == "__main__": main()
@@ -0,0 +1,287 @@
1
+ ##!/usr/bin/env python3
2
+ #
3
+ ###############################################################################
4
+ #
5
+ # Title : fillglobususage
6
+ # Author : Zaihua Ji, zji@ucar.edu
7
+ # Date : 03/11/2022
8
+ # 2025-03-26 transferred to package rda_python_metrics from
9
+ # https://github.com/NCAR/rda-database.git
10
+ # Purpose : python program to retrieve info from Globus logs
11
+ # and fill table wusages in PgSQL database dssdb.
12
+ #
13
+ # Github : https://github.com/NCAR/rda-pythn-metrics.git
14
+ #
15
+ ###############################################################################
16
+ #
17
+ import sys
18
+ import re
19
+ import glob
20
+ from os import path as op
21
+ from rda_python_common import PgLOG
22
+ from rda_python_common import PgUtil
23
+ from rda_python_common import PgFile
24
+ from rda_python_common import PgDBI
25
+ from rda_python_common import PgSplit
26
+ from . import PgIPInfo
27
+
28
+ USAGE = {
29
+ 'PGTBL' : "wusage",
30
+ 'GBSDIR' : PgLOG.PGLOG["DSSDATA"] + "/work/logs/gridftp/",
31
+ 'GBSLOG' : "access_log_gridftp0{}_{}",
32
+ }
33
+
34
+ #
35
+ # main function to run this program
36
+ #
37
+ def main():
38
+
39
+ params = [] # array of input values
40
+ argv = sys.argv[1:]
41
+ option = None
42
+ datelimits = [None, None]
43
+
44
+
45
+ for arg in argv:
46
+ ms = re.match(r'^-(b|d|p|N)$', arg)
47
+ if ms:
48
+ opt = ms.group(1)
49
+ if opt == 'b':
50
+ PgLOG.PGLOG['BCKGRND'] = 1
51
+ elif option:
52
+ PgLOG.pglog("{}: Option -{} is present already".format(arg, option), PgLOG.LGWNEX)
53
+ else:
54
+ option = opt
55
+ elif re.match(r'^-', arg):
56
+ PgLOG.pglog(arg + ": Invalid Option", PgLOG.LGWNEX)
57
+ elif option:
58
+ params.append(arg)
59
+ else:
60
+ PgLOG.pglog(arg + ": Invalid Parameter", PgLOG.LGWNEX)
61
+
62
+ if not (option and params): PgLOG.show_usage('fillglobususage')
63
+
64
+ PgDBI.dssdb_dbname()
65
+ cmdstr = "fillglobususage {}".format(' '.join(argv))
66
+ PgLOG.cmdlog(cmdstr)
67
+ PgFile.change_local_directory(USAGE['GBSDIR'])
68
+ filenames = get_log_file_names(option, params, datelimits)
69
+ if filenames:
70
+ fill_globus_usages(filenames, datelimits)
71
+ else:
72
+ PgLOG.pglog("No log file found for given command: " + cmdstr, PgLOG.LOGWRN)
73
+
74
+ PgLOG.pglog(None, PgLOG.LOGWRN)
75
+ sys.exit(0)
76
+
77
+ #
78
+ # get the log file dates
79
+ #
80
+ def get_log_file_names(option, params, datelimits):
81
+
82
+ filenames = []
83
+ if option == 'd':
84
+ for pdate in params:
85
+ fdate = PgUtil.format_date(pdate, 'MMDDYYYY')
86
+ fname = USAGE['GBSLOG'].format('?', fdate)
87
+ fnames = glob.glob(fname)
88
+ if fnames: filenames.extend(sorted(fnames))
89
+ else:
90
+ if option == 'N':
91
+ edate = PgUtil.curdate()
92
+ pdate = datelimits[0] = PgUtil.adddate(edate, 0, 0, -int(params[0]))
93
+ else:
94
+ pdate = datelimits[0] = params[0]
95
+ if len(params) > 1:
96
+ edate = datelimits[1] = params[1]
97
+ else:
98
+ edate = PgUtil.curdate()
99
+ while pdate <= edate:
100
+ fdate = PgUtil.format_date(pdate, 'MMDDYYYY')
101
+ fname = USAGE['GBSLOG'].format('?', fdate)
102
+ fnames = glob.glob(fname)
103
+ if fnames: filenames.extend(sorted(fnames))
104
+ pdate = PgUtil.adddate(pdate, 0, 0, 1)
105
+
106
+ return filenames
107
+
108
+ #
109
+ # Fill Globus usages into table dssdb.globususage of DSS PgSQL database from globus access logs
110
+ #
111
+ def fill_globus_usages(fnames, datelimits):
112
+
113
+ cntall = addall = 0
114
+
115
+ fcnt = len(fnames)
116
+ for logfile in fnames:
117
+ if not op.isfile(logfile):
118
+ PgLOG.pglog("{}: Not exists for Gathering Globus usage".format(logfile), PgLOG.LOGWRN)
119
+ continue
120
+ PgLOG.pglog("Gathering usage info from {} at {}".format(logfile, PgLOG.current_datetime()), PgLOG.LOGWRN)
121
+ globus = PgFile.open_local_file(logfile)
122
+ if not globus: continue
123
+ ptime = ''
124
+ record = {}
125
+ cntadd = entcnt = 0
126
+ pkey = None
127
+ while True:
128
+ line = globus.readline()
129
+ if not line: break
130
+ entcnt += 1
131
+ if entcnt%10000 == 0:
132
+ PgLOG.pglog("{}: {}/{} Globus log entries processed/records added".format(logfile, entcnt, cntadd), PgLOG.WARNLG)
133
+
134
+ ms = re.match(r'^([\d\.]+)\s.*\s+\[(\S+).*"GET\s+/(ds\d\d\d\.\d|[a-z]\d{6})/(\S+)\s.*\s(200|206)\s+(\d+)\s+"(\S+)"\s+"(.+)"$', line)
135
+ if not ms: continue
136
+ size = int(ms.group(6))
137
+ if size < 100: continue # ignore small files
138
+ ip = ms.group(1)
139
+ dsid = PgUtil.format_dataset_id(ms.group(3))
140
+ wfile = ms.group(4)
141
+ stat = ms.group(5)
142
+ sline = ms.group(7)
143
+ engine = ms.group(8)
144
+ (year, quarter, date, time) = get_record_date_time(ms.group(2))
145
+ if datelimits[0] and date < datelimits[0]: continue
146
+ if datelimits[1] and date > datelimits[1]: continue
147
+ locflag = 'O' if re.match(r'^https://stratus\.', sline) else 'G'
148
+ idx = wfile.find('?')
149
+ if idx > -1: wfile = wfile[:idx]
150
+
151
+ if re.match(r'^curl', engine, re.I):
152
+ method = "CURL"
153
+ elif re.match(r'^wget', engine, re.I):
154
+ method = "WGET"
155
+ elif re.match(r'^python', engine, re.I):
156
+ method = "PYTHN"
157
+ else:
158
+ method = "WEB"
159
+
160
+ key = "{}:{}:{}".format(ip, dsid, wfile) if stat == '206' else None
161
+
162
+ if record:
163
+ if key == pkey:
164
+ record['size'] += size
165
+ continue
166
+ cntadd += add_file_usage(year, record)
167
+ record = {'ip' : ip, 'dsid' : dsid, 'wfile' : wfile, 'date' : date,
168
+ 'time' : time, 'quarter' : quarter, 'size' : size,
169
+ 'locflag' : locflag, 'method' : method}
170
+ pkey = key
171
+ if not pkey:
172
+ cntadd += add_file_usage(year, record)
173
+ record = None
174
+ if record: cntadd += add_file_usage(year, record)
175
+ globus.close()
176
+ cntall += entcnt
177
+ addall += cntadd
178
+ PgLOG.pglog("{} Globus usage records added for {} entries at {}".format(addall, cntall, PgLOG.current_datetime()), PgLOG.LOGWRN)
179
+
180
+
181
+ def get_record_date_time(ctime):
182
+
183
+ ms = re.search(r'^(\d+)/(\w+)/(\d+):(\d+:\d+:\d+)$', ctime)
184
+ if ms:
185
+ d = int(ms.group(1))
186
+ m = PgUtil.get_month(ms.group(2))
187
+ y = ms.group(3)
188
+ t = ms.group(4)
189
+ q = 1 + (m-1)/3
190
+ return (y, q, "{}-{:02}-{:02}".format(y, m, d), t)
191
+ else:
192
+ PgLOG.pglog(ctime + ": Invalid date/time format", PgLOG.LGEREX)
193
+
194
+ #
195
+ # Fill usage of a single online data file into table dssdb.wusage of DSS PgSQL database
196
+ #
197
+ def add_file_usage(year, logrec):
198
+
199
+ pgrec = get_wfile_wid(logrec['dsid'], logrec['wfile'])
200
+ if not pgrec: return 0
201
+
202
+ table = "{}_{}".format(USAGE['PGTBL'], year)
203
+ cond = "wid = {} AND method = '{}' AND date_read = '{}' AND time_read = '{}'".format(pgrec['wid'], logrec['method'], logrec['date'], logrec['time'])
204
+ if PgDBI.pgget(table, "", cond, PgLOG.LOGWRN): return 0
205
+
206
+ wurec = get_wuser_record(logrec['ip'], logrec['date'])
207
+ if not wurec: return 0
208
+ record = {'wid' : pgrec['wid'], 'dsid' : pgrec['dsid']}
209
+ record['wuid_read'] = wurec['wuid']
210
+ record['date_read'] = logrec['date']
211
+ record['time_read'] = logrec['time']
212
+ record['size_read'] = logrec['size']
213
+ record['method'] = logrec['method']
214
+ record['locflag'] = logrec['locflag']
215
+ record['ip'] = logrec['ip']
216
+ record['quarter'] = logrec['quarter']
217
+
218
+ if add_to_allusage(year, logrec, wurec):
219
+ return PgDBI.add_yearly_wusage(year, record)
220
+ else:
221
+ return 0
222
+
223
+ def add_to_allusage(year, logrec, wurec):
224
+
225
+ pgrec = {'email' : wurec['email'], 'org_type' : wurec['org_type'], 'country' : wurec['country']}
226
+ pgrec['dsid'] = logrec['dsid']
227
+ pgrec['date'] = logrec['date']
228
+ pgrec['quarter'] = logrec['quarter']
229
+ pgrec['time'] = logrec['time']
230
+ pgrec['size'] = logrec['size']
231
+ pgrec['method'] = logrec['method']
232
+ pgrec['ip'] = logrec['ip']
233
+ pgrec['source'] = 'W'
234
+ return PgDBI.add_yearly_allusage(year, pgrec)
235
+
236
+ #
237
+ # return wfile.wid upon success, 0 otherwise
238
+ #
239
+ def get_wfile_wid(dsid, wfile):
240
+
241
+ wfcond = "wfile = '{}'".format(wfile)
242
+ pgrec = PgSplit.pgget_wfile(dsid, "*", wfcond)
243
+ if pgrec:
244
+ pgrec['dsid'] = dsid
245
+ else:
246
+ pgrec = PgDBI.pgget("wfile_delete", "*", "{} AND dsid = '{}'".format(wfcond, dsid))
247
+ if not pgrec:
248
+ pgrec = PgDBI.pgget("wmove", "wid, dsid", wfcond)
249
+ if pgrec:
250
+ pgrec = PgSplit.pgget_wfile(pgrec['dsid'], "*", "wid = {}".format(pgrec['wid']))
251
+ if pgrec: pgrec['dsid'] = dsid
252
+
253
+ return pgrec
254
+
255
+ # return wuser record upon success, None otherwise
256
+ def get_wuser_record(ip, date):
257
+
258
+ ipinfo = PgIPInfo.set_ipinfo(ip)
259
+ if not ipinfo: return None
260
+
261
+ record = {'org_type' : ipinfo['org_type'], 'country' : ipinfo['country']}
262
+ email = 'unknown@' + ipinfo['hostname']
263
+ emcond = "email = '{}'".format(email)
264
+ flds = 'wuid, email, org_type, country, start_date'
265
+ pgrec = PgDBI.pgget("wuser", flds, emcond, PgLOG.LOGERR)
266
+ if pgrec:
267
+ if PgUtil.diffdate(pgrec['start_date'], date) > 0:
268
+ pgrec['start_date'] = record['start_date'] = date
269
+ PgDBI.pgupdt('wuser', record, emcond)
270
+ return pgrec
271
+
272
+ # now add one in
273
+ record['email'] = email
274
+ record['stat_flag'] = 'A'
275
+ record['start_date'] = date
276
+ wuid = PgDBI.pgadd("wuser", record, PgLOG.LOGERR|PgLOG.AUTOID)
277
+ if wuid:
278
+ record['wuid'] = wuid
279
+ PgLOG.pglog("{} Added as wuid({})".format(email, wuid), PgLOG.LGWNEM)
280
+ return record
281
+
282
+ return None
283
+
284
+ #
285
+ # call main() to start program
286
+ #
287
+ if __name__ == "__main__": main()
@@ -0,0 +1,17 @@
1
+
2
+ Retrieves usage information from GLlobus Server logs under directory
3
+ /gpfs/fs1/collections/rda/work/logs/gridftp/ to fill table 'wusage' in
4
+ database 'dssdb'.
5
+
6
+ Usage: fillglobususage [-b] [-d LogFileDates] [-N NumberDay] [-p BeginDate [Enddate]]
7
+
8
+ select option, -d, -N or -p to run this application.
9
+
10
+ - Option -b, log process information into logfile only;
11
+
12
+ - Option -d, retrieve usage info from given log file dates;
13
+
14
+ - Option -N, retrieve usage info in recent NumberDay days;
15
+
16
+ - Option -p, retrieve usage info between given period. For missing EndDate,
17
+ it defaults to the current date.
@@ -0,0 +1,185 @@
1
+ #!/usr/bin/env python3
2
+ #
3
+ ###############################################################################
4
+ #
5
+ # Title : fillipinfo
6
+ # Author : Zaihua Ji, zji@ucar.edu
7
+ # Date : 08/26/2023
8
+ # 2025-03-26 transferred to package rda_python_metrics from
9
+ # https://github.com/NCAR/rda-database.git
10
+ # Purpose : python program to retrieve ip info and
11
+ # and fill table ipinfo
12
+ #
13
+ # Github : https://github.com/NCAR/rda-python-metrics.git
14
+ #
15
+ ###############################################################################
16
+ #
17
+ import sys
18
+ import re
19
+ import glob
20
+ from os import path as op
21
+ from rda_python_common import PgLOG
22
+ from rda_python_common import PgUtil
23
+ from rda_python_common import PgFile
24
+ from rda_python_common import PgDBI
25
+ from . import PgIPInfo
26
+
27
+ # the define options for gathering ipinfo data
28
+ MONTH = 0x02 # fix data usages for given months
29
+ YEARS = 0x04 # fix data usages for given years
30
+ NDAYS = 0x08 # fix data usages in recent number of days
31
+ MULTI = (MONTH|YEARS)
32
+ SINGL = (NDAYS)
33
+
34
+ IPINFO = {
35
+ 'USGTBL' : ['ipinfo', 'allusage', 'tdsusage'],
36
+ 'CDATE' : PgUtil.curdate(),
37
+ }
38
+
39
+ #
40
+ # main function to run this program
41
+ #
42
+ def main():
43
+
44
+ inputs = [] # array of input values
45
+ table = None # table names: ipinfo, allusage, globususage, or tdsusage
46
+ argv = sys.argv[1:]
47
+ topt = option = 0
48
+
49
+ for arg in argv:
50
+ if arg == "-b":
51
+ PgLOG.PGLOG['BCKGRND'] = 1
52
+ elif re.match(r'^-[mNy]$', arg) and option == 0:
53
+ if arg == "-m":
54
+ option = MONTH
55
+ elif arg == "-y":
56
+ option = YEARS
57
+ elif arg == "-N":
58
+ option = NDAYS
59
+ elif arg == "-t":
60
+ topt = 1
61
+ elif re.match(r'^-', arg):
62
+ PgLOG.pglog(arg + ": Invalid Option", PgLOG.LGWNEX)
63
+ elif topt:
64
+ if arg not in IPINFO['USGTBL']:
65
+ PgLOG.pglog("{}: Invalid Table Name; must be in ({})".format(arg, ','.join(IPINFO['USGTBL'])), PgLOG.LGWNEX)
66
+ table = arg
67
+ topt = 0
68
+ elif option&MULTI or option&SINGL and not inputs:
69
+ inputs.append(arg)
70
+ else:
71
+ PgLOG.pglog(arg + ": Invalid Parameter", PgLOG.LGWNEX)
72
+
73
+ if not (inputs and table): PgLOG.show_usage('fillipinfo')
74
+ PgDBI.dssdb_dbname()
75
+ PgLOG.cmdlog("fillipinfo {}".format(' '.join(argv)))
76
+
77
+ if option&NDAYS:
78
+ curdate = IPINFO['CDATE']
79
+ datelimit = PgUtil.adddate(curdate, 0, 0, -int(inputs[0]))
80
+ option = MONTH
81
+ inputs = []
82
+
83
+ while curdate >= datelimit:
84
+ tms = curdate.split('-')
85
+ inputs.append("{}-{}".format(tms[0], tms[1]))
86
+ curdate = PgUtil.adddate(curdate, 0, 0, -int(tms[2]))
87
+
88
+ fill_ip_info(option, inputs, table)
89
+
90
+ sys.exit(0)
91
+
92
+ #
93
+ # Fill ip info in table dssdb.tdsusage
94
+ #
95
+ def fill_ip_info(option, inputs, table):
96
+
97
+ cntall = 0
98
+ date = None
99
+ for input in inputs:
100
+ if option&NDAYS:
101
+ edate = IPINFO['CDATE']
102
+ date = PgUtil.adddate(edate, 0, 0, -int(input))
103
+ elif option&MONTH:
104
+ tms = input.split('-')
105
+ date = "{}-{:02}-01".format(tms[0], int(tms[1]))
106
+ edate = PgUtil.enddate(date, 0, 'M')
107
+ elif option&YEARS:
108
+ date = input + "-01-01"
109
+ edate = input + "-12-31"
110
+
111
+ while date <= edate:
112
+ func = eval('fix_{}_records'.format(table))
113
+ cntall += func(date)
114
+ date = PgUtil.adddate(date, 0, 0, 1)
115
+ return cntall
116
+
117
+ def fix_allusage_records(date):
118
+
119
+ cnt = 0
120
+ ms = re.match(r'^(\d+)-', date)
121
+ year = ms.group(1)
122
+ table = 'allusage_' + year
123
+ cond = "date = '{}' and org_type = '-'".format(date)
124
+ pgrecs = PgDBI.pgmget(table, 'aidx, email, ip', cond, PgLOG.LGEREX)
125
+ if not pgrecs: return 0
126
+ cnt = len(pgrecs['ip']) if pgrecs else 0
127
+ mcnt = 0
128
+ for i in range(cnt):
129
+ ip = pgrecs['ip'][i]
130
+ email = pgrecs['email'][i]
131
+ ipinfo = PgIPInfo.set_ipinfo(ip)
132
+ if ipinfo:
133
+ record = {'org_type' : ipinfo['org_type'],
134
+ 'country' : ipinfo['country']}
135
+ if not email or re.search(r'-$', email):
136
+ record['email'] = 'unknown@' + ipinfo['hostname']
137
+ mcnt += PgDBI.pgupdt(table, record, "aidx = '{}'".format(pgrecs['aidx'][i]))
138
+
139
+ s = 's' if cnt > 1 else ''
140
+ PgLOG.pglog("{}: {} of {} record{} updated for {}".format(table, mcnt, cnt, s, date), PgLOG.LOGWRN)
141
+
142
+ return mcnt
143
+
144
+ def fix_tdsusage_records(date):
145
+
146
+ table = 'tdsusage'
147
+ cond = "date = '{}' and org_type = '-'".format(date)
148
+ pgrecs = PgDBI.pgmget(table, 'time, ip', cond, PgLOG.LGEREX)
149
+ if not pgrecs: return 0
150
+ cnt = len(pgrecs['ip']) if pgrecs else 0
151
+ mcnt = 0
152
+ for i in range(cnt):
153
+ ipinfo = PgIPInfo.set_ipinfo(pgrecs['ip'][i])
154
+ if ipinfo:
155
+ record = {'org_type' : ipinfo['org_type'],
156
+ 'country' : ipinfo['country']}
157
+ cond = "date = '{}' AND time = '{}' AND ip = '{}'".format(date, pgrecs['time'][i], pgrecs['ip'][i])
158
+ mcnt += PgDBI.pgupdt(table, record, cond)
159
+
160
+ s = 's' if cnt > 1 else ''
161
+ PgLOG.pglog("{}: {} of {} record{} updated for {}".format(table, mcnt, cnt, s, date), PgLOG.LOGWRN)
162
+
163
+ return mcnt
164
+
165
+ def fix_ipinfo_records(date):
166
+
167
+ table = 'ipinfo'
168
+ cond = "stat_flag = 'M' and date = '{}'".format(date)
169
+ pgrecs = PgDBI.pgmget(table, 'ip', cond, PgLOG.LGEREX)
170
+ if not pgrecs: return 0
171
+ cnt = len(pgrecs['ip']) if pgrecs else 0
172
+ mcnt = 0
173
+ for i in range(cnt):
174
+ PgIPInfo.set_ipinfo(pgrecs['ip'][i], True)
175
+
176
+ mcnt = PgIPInfo.IPINFO['IPUPDT']
177
+ s = 's' if cnt > 1 else ''
178
+ PgLOG.pglog("{}: {} of {} record{} updated".format(table, mcnt, cnt, s), PgLOG.LOGWRN)
179
+
180
+ return mcnt
181
+
182
+ #
183
+ # call main() to start program
184
+ #
185
+ if __name__ == "__main__": main()
@@ -0,0 +1,18 @@
1
+
2
+ Check data usage IP related information to fill in usage tables.
3
+
4
+ Usage: fillipinfo [-b] -t TableName [-N NumberDay] [-m YearMonths] [-y Years]
5
+
6
+ select option, -t and -m, -N or -y to run this application.
7
+
8
+ - Option -b, log process information into logfile only;
9
+
10
+ - Option -t, table name, ipinfo, allusage, globususage, or tdsusage,
11
+ to fix IP related information, such organization names/types,
12
+ emails and country names;
13
+
14
+ - Option -N, fix usage info in recent NumberDay days;
15
+
16
+ - Option -m, fix usage info in given months;
17
+
18
+ - Option -y, fix usage info in given years.