rda-python-metrics 1.0.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of rda-python-metrics might be problematic. Click here for more details.

Files changed (47) hide show
  1. rda_python_metrics/PgIPInfo.py +188 -0
  2. rda_python_metrics/PgView.py +782 -0
  3. rda_python_metrics/__init__.py +1 -0
  4. rda_python_metrics/fillawsusage.py +282 -0
  5. rda_python_metrics/fillawsusage.usg +17 -0
  6. rda_python_metrics/fillcodusage.py +247 -0
  7. rda_python_metrics/fillcodusage.usg +21 -0
  8. rda_python_metrics/fillcountry.py +79 -0
  9. rda_python_metrics/fillendtime.py +93 -0
  10. rda_python_metrics/fillglobususage.py +287 -0
  11. rda_python_metrics/fillglobususage.usg +17 -0
  12. rda_python_metrics/fillipinfo.py +185 -0
  13. rda_python_metrics/fillipinfo.usg +18 -0
  14. rda_python_metrics/filloneorder.py +155 -0
  15. rda_python_metrics/filloneorder.usg +41 -0
  16. rda_python_metrics/fillrdadb.py +151 -0
  17. rda_python_metrics/fillrdadb.usg +32 -0
  18. rda_python_metrics/filltdsusage.py +289 -0
  19. rda_python_metrics/filltdsusage.usg +17 -0
  20. rda_python_metrics/filluser.py +216 -0
  21. rda_python_metrics/filluser.usg +16 -0
  22. rda_python_metrics/logarch.py +359 -0
  23. rda_python_metrics/logarch.usg +27 -0
  24. rda_python_metrics/pgperson.py +72 -0
  25. rda_python_metrics/pgusername.py +50 -0
  26. rda_python_metrics/viewallusage.py +350 -0
  27. rda_python_metrics/viewallusage.usg +198 -0
  28. rda_python_metrics/viewcheckusage.py +289 -0
  29. rda_python_metrics/viewcheckusage.usg +185 -0
  30. rda_python_metrics/viewcodusage.py +314 -0
  31. rda_python_metrics/viewcodusage.usg +184 -0
  32. rda_python_metrics/viewordusage.py +340 -0
  33. rda_python_metrics/viewordusage.usg +224 -0
  34. rda_python_metrics/viewrqstusage.py +362 -0
  35. rda_python_metrics/viewrqstusage.usg +217 -0
  36. rda_python_metrics/viewtdsusage.py +323 -0
  37. rda_python_metrics/viewtdsusage.usg +191 -0
  38. rda_python_metrics/viewwebfile.py +294 -0
  39. rda_python_metrics/viewwebfile.usg +212 -0
  40. rda_python_metrics/viewwebusage.py +371 -0
  41. rda_python_metrics/viewwebusage.usg +211 -0
  42. rda_python_metrics-1.0.4.dist-info/METADATA +18 -0
  43. rda_python_metrics-1.0.4.dist-info/RECORD +47 -0
  44. rda_python_metrics-1.0.4.dist-info/WHEEL +5 -0
  45. rda_python_metrics-1.0.4.dist-info/entry_points.txt +22 -0
  46. rda_python_metrics-1.0.4.dist-info/licenses/LICENSE +21 -0
  47. rda_python_metrics-1.0.4.dist-info/top_level.txt +1 -0
@@ -0,0 +1 @@
1
+
@@ -0,0 +1,282 @@
1
+ #!/usr/bin/env python3
2
+ #
3
+ ###############################################################################
4
+ #
5
+ # Title : fillawsusage
6
+ # Author : Zaihua Ji, zji@ucar.edu
7
+ # Date : 03/11/2022
8
+ # 2025-03-26 transferred to package rda_python_metrics from
9
+ # https://github.com/NCAR/rda-database.git
10
+ # Purpose : python program to retrieve info from AWS logs
11
+ # and fill table wusages in PgSQL database dssdb.
12
+ #
13
+ # Github : https://github.com/NCAR/rda-pythn-metrics.git
14
+ #
15
+ ###############################################################################
16
+ #
17
+ import sys
18
+ import re
19
+ import glob
20
+ from os import path as op
21
+ from rda_python_common import PgLOG
22
+ from rda_python_common import PgUtil
23
+ from rda_python_common import PgFile
24
+ from rda_python_common import PgDBI
25
+ from . import PgIPInfo
26
+
27
+ USAGE = {
28
+ 'PGTBL' : "wusage",
29
+ 'AWSDIR' : PgLOG.PGLOG["TRANSFER"] + "/AWSera5log",
30
+ 'AWSLOG' : "{}/{}-00-00-00-*",
31
+ 'PFMT' : "YYYY/MM/DD"
32
+ }
33
+
34
+ DSIDS = {'nsf-ncar-era5' : PgUtil.format_dataset_id('d633000')}
35
+
36
+ #
37
+ # main function to run this program
38
+ #
39
+ def main():
40
+
41
+ params = [] # array of input values
42
+ argv = sys.argv[1:]
43
+ option = None
44
+
45
+ for arg in argv:
46
+ ms = re.match(r'^-(b|d|p|N)$', arg)
47
+ if ms:
48
+ opt = ms.group(1)
49
+ if opt == 'b':
50
+ PgLOG.PGLOG['BCKGRND'] = 1
51
+ elif option:
52
+ PgLOG.pglog("{}: Option -{} is present already".format(arg, option), PgLOG.LGWNEX)
53
+ else:
54
+ option = opt
55
+ elif re.match(r'^-', arg):
56
+ PgLOG.pglog(arg + ": Invalid Option", PgLOG.LGWNEX)
57
+ elif option:
58
+ params.append(arg)
59
+ else:
60
+ PgLOG.pglog(arg + ": Invalid Parameter", PgLOG.LGWNEX)
61
+
62
+ if not (option and params): PgLOG.show_usage('fillawsusage')
63
+
64
+ PgDBI.dssdb_dbname()
65
+ cmdstr = "fillawsusage {}".format(' '.join(argv))
66
+ PgLOG.cmdlog(cmdstr)
67
+ PgFile.change_local_directory(USAGE['AWSDIR'])
68
+ filenames = get_log_file_names(option, params)
69
+ if filenames:
70
+ fill_aws_usages(filenames)
71
+ else:
72
+ PgLOG.pglog("No log file found for given command: " + cmdstr, PgLOG.LOGWRN)
73
+
74
+ PgLOG.pglog(None, PgLOG.LOGWRN)
75
+ sys.exit(0)
76
+
77
+ #
78
+ # get the log file dates
79
+ #
80
+ def get_log_file_names(option, params):
81
+
82
+ filenames = []
83
+ if option == 'd':
84
+ for dt in params:
85
+ pdate = PgUtil.format_date(dt)
86
+ pd = PgUtil.format_date(pdate, USAGE['PFMT'])
87
+ fname = USAGE['AWSLOG'].format(pd, pdate)
88
+ fnames = glob.glob(fname)
89
+ if fnames: filenames.extend(sorted(fnames))
90
+ else:
91
+ if option == 'N':
92
+ edate = PgUtil.curdate()
93
+ pdate = PgUtil.adddate(edate, 0, 0, -int(params[0]))
94
+ else:
95
+ pdate = PgUtil.format_date(params[0])
96
+ if len(params) > 1:
97
+ edate = PgUtil.format_date(params[1])
98
+ else:
99
+ edate = PgUtil.curdate()
100
+ while pdate <= edate:
101
+ pd = PgUtil.format_date(pdate, USAGE['PFMT'])
102
+ fname = USAGE['AWSLOG'].format(pd, pdate)
103
+ fnames = glob.glob(fname)
104
+ if fnames: filenames.extend(sorted(fnames))
105
+ pdate = PgUtil.adddate(pdate, 0, 0, 1)
106
+
107
+ return filenames
108
+
109
+ #
110
+ # Fill AWS usages into table dssdb.awsusage of DSS PgSQL database from aws access logs
111
+ #
112
+ def fill_aws_usages(fnames):
113
+
114
+ cntall = addall = 0
115
+ fcnt = len(fnames)
116
+ for logfile in fnames:
117
+ if not op.isfile(logfile):
118
+ PgLOG.pglog("{}: Not exists for Gathering AWS usage".format(logfile), PgLOG.LOGWRN)
119
+ continue
120
+ PgLOG.pglog("Gathering usage info from {} at {}".format(logfile, PgLOG.current_datetime()), PgLOG.LOGWRN)
121
+ aws = PgFile.open_local_file(logfile)
122
+ if not aws: continue
123
+ ptime = ''
124
+ record = {}
125
+ cntadd = entcnt = 0
126
+ pkey = None
127
+ while True:
128
+ line = aws.readline()
129
+ if not line: break
130
+ entcnt += 1
131
+ if entcnt%10000 == 0:
132
+ PgLOG.pglog("{}: {}/{} AWS log entries processed/records added".format(logfile, entcnt, cntadd), PgLOG.WARNLG)
133
+
134
+ ms = re.match(r'^\w+ ([\w-]+) \[(\S+).*\] ([\d\.]+) .+ REST\.GET\.OBJECT (\S+) "GET.+" (200|206) - (\d+) (\d+) .* ".+" "(.+)" ', line)
135
+ if not ms: continue
136
+ values = list(ms.groups())
137
+ if values[0] not in DSIDS: continue
138
+ dsid = DSIDS[values[0]]
139
+ size = int(values[5])
140
+ fsize = int(values[6])
141
+ if fsize < 100: continue # ignore small files
142
+ ip = values[2]
143
+ wfile = values[3]
144
+ stat = values[4]
145
+ engine = values[7]
146
+ (year, quarter, date, time) = get_record_date_time(values[1])
147
+ locflag = 'A'
148
+
149
+ if re.match(r'^aiobotocore', engine, re.I):
150
+ method = "AIOBT"
151
+ elif re.match(r'^rclone', engine, re.I):
152
+ method = "RCLON"
153
+ elif re.match(r'^python', engine, re.I):
154
+ method = "PYTHN"
155
+ else:
156
+ method = "WEB"
157
+
158
+ key = "{}:{}:{}".format(ip, dsid, wfile) if stat == '206' else None
159
+
160
+ if record:
161
+ if key == pkey:
162
+ record['size'] += size
163
+ continue
164
+ cntadd += add_file_usage(year, record)
165
+ record = {'ip' : ip, 'dsid' : dsid, 'wfile' : wfile, 'date' : date,
166
+ 'time' : time, 'quarter' : quarter, 'size' : size,
167
+ 'locflag' : locflag, 'method' : method}
168
+ pkey = key
169
+ if not pkey:
170
+ cntadd += add_file_usage(year, record)
171
+ record = None
172
+ if record: cntadd += add_file_usage(year, record)
173
+ aws.close()
174
+ cntall += entcnt
175
+ addall += cntadd
176
+ PgLOG.pglog("{} AWS usage records added for {} entries at {}".format(addall, cntall, PgLOG.current_datetime()), PgLOG.LOGWRN)
177
+
178
+
179
+ def get_record_date_time(ctime):
180
+
181
+ ms = re.search(r'^(\d+)/(\w+)/(\d+):(\d+:\d+:\d+)$', ctime)
182
+ if ms:
183
+ d = int(ms.group(1))
184
+ m = PgUtil.get_month(ms.group(2))
185
+ y = ms.group(3)
186
+ t = ms.group(4)
187
+ q = 1 + (m-1)/3
188
+ return (y, q, "{}-{:02}-{:02}".format(y, m, d), t)
189
+ else:
190
+ PgLOG.pglog(ctime + ": Invalid date/time format", PgLOG.LGEREX)
191
+
192
+ #
193
+ # Fill usage of a single online data file into table dssdb.wusage of DSS PgSQL database
194
+ #
195
+ def add_file_usage(year, logrec):
196
+
197
+ pgrec = get_wfile_wid(logrec['dsid'], logrec['wfile'])
198
+ if not pgrec: return 0
199
+
200
+ table = "{}_{}".format(USAGE['PGTBL'], year)
201
+ cond = "wid = {} AND method = '{}' AND date_read = '{}' AND time_read = '{}'".format(pgrec['wid'], logrec['method'], logrec['date'], logrec['time'])
202
+ if PgDBI.pgget(table, "", cond, PgLOG.LOGWRN): return 0
203
+
204
+ wurec = get_wuser_record(logrec['ip'], logrec['date'])
205
+ if not wurec: return 0
206
+ record = {'wid' : pgrec['wid'], 'dsid' : pgrec['dsid']}
207
+ record['wuid_read'] = wurec['wuid']
208
+ record['date_read'] = logrec['date']
209
+ record['time_read'] = logrec['time']
210
+ record['size_read'] = logrec['size']
211
+ record['method'] = logrec['method']
212
+ record['locflag'] = logrec['locflag']
213
+ record['ip'] = logrec['ip']
214
+ record['quarter'] = logrec['quarter']
215
+
216
+ if add_to_allusage(year, logrec, wurec):
217
+ return PgDBI.add_yearly_wusage(year, record)
218
+ else:
219
+ return 0
220
+
221
+ def add_to_allusage(year, logrec, wurec):
222
+
223
+ pgrec = {'email' : wurec['email'], 'org_type' : wurec['org_type'], 'country' : wurec['country']}
224
+ pgrec['dsid'] = logrec['dsid']
225
+ pgrec['date'] = logrec['date']
226
+ pgrec['quarter'] = logrec['quarter']
227
+ pgrec['time'] = logrec['time']
228
+ pgrec['size'] = logrec['size']
229
+ pgrec['method'] = logrec['method']
230
+ pgrec['ip'] = logrec['ip']
231
+ pgrec['source'] = 'A'
232
+ return PgDBI.add_yearly_allusage(year, pgrec)
233
+
234
+ #
235
+ # return wfile.wid upon success, 0 otherwise
236
+ #
237
+ def get_wfile_wid(dsid, wfile):
238
+
239
+ dscond = "dsid = '{}' AND wfile = '{}'".format(dsid, wfile)
240
+ pgrec = PgDBI.pgget("wfile", "*", dscond)
241
+
242
+ if not pgrec:
243
+ pgrec = PgDBI.pgget("wmove", "wid, dsid", dscond)
244
+ if pgrec:
245
+ pgrec = PgDBI.pgget("wfile", "*", "wid = {}".format(pgrec['wid']))
246
+ if pgrec: pgrec['dsid'] = dsid
247
+
248
+ return pgrec
249
+
250
+ # return wuser record upon success, None otherwise
251
+ def get_wuser_record(ip, date):
252
+
253
+ ipinfo = PgIPInfo.set_ipinfo(ip)
254
+ if not ipinfo: return None
255
+
256
+ record = {'org_type' : ipinfo['org_type'], 'country' : ipinfo['country']}
257
+ email = 'unknown@' + ipinfo['hostname']
258
+ emcond = "email = '{}'".format(email)
259
+ flds = 'wuid, email, org_type, country, start_date'
260
+ pgrec = PgDBI.pgget("wuser", flds, emcond, PgLOG.LOGERR)
261
+ if pgrec:
262
+ if PgUtil.diffdate(pgrec['start_date'], date) > 0:
263
+ pgrec['start_date'] = record['start_date'] = date
264
+ PgDBI.pgupdt('wuser', record, emcond)
265
+ return pgrec
266
+
267
+ # now add one in
268
+ record['email'] = email
269
+ record['stat_flag'] = 'A'
270
+ record['start_date'] = date
271
+ wuid = PgDBI.pgadd("wuser", record, PgLOG.LOGERR|PgLOG.AUTOID)
272
+ if wuid:
273
+ record['wuid'] = wuid
274
+ PgLOG.pglog("{} Added as wuid({})".format(email, wuid), PgLOG.LGWNEM)
275
+ return record
276
+
277
+ return None
278
+
279
+ #
280
+ # call main() to start program
281
+ #
282
+ if __name__ == "__main__": main()
@@ -0,0 +1,17 @@
1
+
2
+ Retrieves usage information from AWS Server logs under directory
3
+ /gpfs/fs1/collections/rda/transer/AWSera5log/ to fill table 'wusage' in
4
+ database 'dssdb'.
5
+
6
+ Usage: fillawsusage [-b] [-d LogFileDates] [-N NumberDay] [-p BeginDate [Enddate]]
7
+
8
+ select option, -d, -N or -p to run this application.
9
+
10
+ - Option -b, log process information into logfile only;
11
+
12
+ - Option -d, retrieve usage info from given log file dates;
13
+
14
+ - Option -N, retrieve usage info in recent NumberDay days;
15
+
16
+ - Option -p, retrieve usage info between given period. For missing EndDate,
17
+ it defaults to the current date.
@@ -0,0 +1,247 @@
1
+ #!/usr/bin/env python3
2
+ #
3
+ ###############################################################################
4
+ #
5
+ # Title : fillcodusage
6
+ # Author : Zaihua Ji, zji@ucar.edu
7
+ # Date : 03/11/2022
8
+ # 2025-03-26 transferred to package rda_python_metrics from
9
+ # https://github.com/NCAR/rda-database.git
10
+ # Purpose : python program to retrieve info from web logs
11
+ # and fill table codusage in PgSQL database dssdb.
12
+ #
13
+ # Github : https://github.com/NCAR/rda-python-metrics.git
14
+ #
15
+ ###############################################################################
16
+ #
17
+ import sys
18
+ import re
19
+ import glob
20
+ from os import path as op
21
+ from rda_python_common import PgLOG
22
+ from rda_python_common import PgUtil
23
+ from rda_python_common import PgFile
24
+ from rda_python_common import PgDBI
25
+
26
+ # the define options for gathering COD data usage, one at a time
27
+ MONTH = 0x02 # fet COD data usages for given months
28
+ YEARS = 0x04 # get COD data usages for given years
29
+ NDAYS = 0x08 # get COD data usages in recent number of days
30
+ FILES = 0x10 # get given file names
31
+ GTALL = 0x20 # get all data files of read
32
+ MASKS = (MONTH|YEARS|NDAYS|FILES)
33
+
34
+ USAGE = {
35
+ 'OPTION' : 0,
36
+ 'PGTBL' : "codusage",
37
+ 'WEBLOG' : "/var/log/httpd",
38
+ }
39
+
40
+ USERS = {} # cache user info for aid
41
+
42
+ #
43
+ # main function to run this program
44
+ #
45
+ def main():
46
+
47
+ params = [] # array of input values
48
+ argv = sys.argv[1:]
49
+ datelimit = ''
50
+
51
+ for arg in argv:
52
+ if arg == "-b":
53
+ PgLOG.PGLOG['BCKGRND'] = 1
54
+ elif re.match(r'^-[afmNy]$', arg) and USAGE['OPTION'] == 0:
55
+ if arg == "-a":
56
+ USAGE['OPTION'] = GTALL
57
+ params = ['']
58
+ elif arg == "-f":
59
+ USAGE['OPTION'] = FILES
60
+ elif arg == "-m":
61
+ USAGE['OPTION'] = MONTH
62
+ elif arg == "-y":
63
+ USAGE['OPTION'] = YEARS
64
+ elif arg == "-N":
65
+ USAGE['OPTION'] = NDAYS
66
+ elif re.match(r'^-', arg):
67
+ PgLOG.pglog(arg + ": Invalid Option", PgLOG.LGWNEX)
68
+ elif USAGE['OPTION']&MASKS:
69
+ params.append(arg)
70
+ else:
71
+ PgLOG.pglog(arg + ": Invalid Parameter", PgLOG.LGWNEX)
72
+
73
+ if not (USAGE['OPTION'] and params): PgLOG.show_usage('fillcodusage')
74
+
75
+ PgDBI.dssdb_dbname()
76
+ PgLOG.cmdlog("fillcodusage {}".format(' '.join(argv)))
77
+
78
+ if USAGE['OPTION']&NDAYS:
79
+ curdate = PgUtil.curdate()
80
+ datelimit = PgUtil.adddate(curdate, 0, 0, -int(params[0]))
81
+
82
+ USAGE['OPTION'] = MONTH
83
+ params = []
84
+
85
+ while curdate >= datelimit:
86
+ (year, month, day) = curdate.split('-')
87
+ params.append("{}-{}".format(year, month))
88
+ curdate = PgUtil.adddate(curdate, 0, 0, -int(day))
89
+
90
+ fill_cod_usages(USAGE['OPTION'], params, datelimit)
91
+
92
+ PgLOG.pglog(None, PgLOG.LOGWRN|PgLOG.SNDEML) # send email out if any
93
+
94
+ sys.exit(0)
95
+
96
+ #
97
+ # Fill COD usages into table dssdb.codusage of DSS PgSQL database from cod access logs
98
+ #
99
+ def fill_cod_usages(option, inputs, datelimit):
100
+
101
+ cntall = cntadd = 0
102
+
103
+ for input in inputs:
104
+ # get log file names
105
+ if option&FILES:
106
+ logfiles = [input]
107
+ elif option&MONTH:
108
+ tms = input.split('-')
109
+ yrmn = "{}{:02}".format(tms[0], int(tms[1]))
110
+ logfiles = ["{}/{}/access_log".format(USAGE['WEBLOG'], yrmn)]
111
+ else: # GTALL | YEARS
112
+ yrmn = input + "*"
113
+ logfiles = glob.glob("{}/{}/access_log".format(USAGE['WEBLOG'], yrmn))
114
+
115
+ for logfile in logfiles:
116
+ if not op.isfile(logfile):
117
+ PgLOG.pglog("{}: Not exists for Gathering custom OPeNDAP usage".format(logfile), PgLOG.LOGWRN)
118
+ continue
119
+ PgLOG.pglog("Gathering custom OPeNDAP usage info from {} at {}".format(logfile, PgLOG.current_datetime()), PgLOG.LOGWRN)
120
+ cod = PgFile.open_local_file(logfile)
121
+ if not cod: continue
122
+
123
+ pdate = ''
124
+ records = {}
125
+ while True:
126
+ line = cod.readline()
127
+ if not line: break
128
+ cntall += 1
129
+ if cntall%20000 == 0:
130
+ s = 's' if cntadd > 1 else ''
131
+ PgLOG.pglog("{}/{} COD log entries processed/records added".format(cntall, cntadd), PgLOG.WARNLG)
132
+
133
+ ms = re.search(r'GET /opendap/(\w{10})\.dods.*\s200\s+(\d+).{6}([^"]+)', line)
134
+ if not ms: continue
135
+ aid = ms.group(1)
136
+ size = int(ms.group(2))
137
+ engine = ms.group(3)
138
+ if not (aid in USERS or cache_users(aid)): continue
139
+ ms = re.match(r'^([\d\.]+).+\[(\d+)/(\w+)/(\d+):([\d:]+)', line)
140
+ if not ms: continue
141
+ ip = ms.group(1)
142
+ ctime = ms.group(5)
143
+ cdate = "{}-{:02}-{:02}".format(ms.group(4), PgUtil.get_month(ms.group(3)), int(ms.group(2)))
144
+ if pdate != cdate:
145
+ if records:
146
+ cntadd += add_usage_records(records, cdate)
147
+ records = {}
148
+ pdate = cdate
149
+
150
+ if datelimit and cdate < datelimit: continue
151
+
152
+ if aid in records:
153
+ records[aid]['size'] += size
154
+ records[aid]['count'] += 1
155
+ USERS[aid]['etime'] = ctime
156
+ else:
157
+ records[aid] = {}
158
+ records[aid]['ip'] = ip
159
+ records[aid]['count'] = 1
160
+ records[aid]['email'] = USERS[aid]['email']
161
+ records[aid]['dsid'] = USERS[aid]['dsid']
162
+ records[aid]['size'] = size
163
+ records[aid]['engine'] = engine
164
+ USERS[aid]['etime'] = ctime
165
+ USERS[aid]['btime'] = ctime
166
+ cod.close()
167
+ if records: cntadd += add_usage_records(records, cdate)
168
+
169
+
170
+ s = 's' if cntadd > 1 else ''
171
+ PgLOG.pglog("{} COD usage records added for {} entries at {}".format(cntadd, cntall, PgLOG.current_datetime()), PgLOG.LOGWRN)
172
+
173
+ def add_usage_records(records, date):
174
+
175
+ ms = re.match(r'(\d+)-(\d+)-', date)
176
+ if not ms: return 0
177
+ year = ms.group(1)
178
+ quarter = 1 + int((int(ms.group(2)) - 1) / 3)
179
+ cnt = 0
180
+
181
+ for aid in records:
182
+ if PgDBI.pgget(USAGE['PGTBL'], '', "aid = '{}' AND date = '{}'".format(aid, date), PgLOG.LGEREX): continue
183
+ record = records[aid]
184
+ if record['email'] == '-':
185
+ record['org_type'] = record['country'] = '-'
186
+ else:
187
+ wuid = PgDBI.check_wuser_wuid(record['email'], date)
188
+ if not wuid: next
189
+ pgrec = PgDBI.pgget("wuser", "org_type, country", "wuid = {}".format(wuid), PgLOG.LGWNEX)
190
+ if not pgrec: continue
191
+ record['org_type'] = pgrec['org_type']
192
+ record['country'] = pgrec['country']
193
+
194
+ record['date'] = date
195
+ record['time'] = USERS[aid]['btime']
196
+ record['quarter'] = quarter
197
+
198
+ if add_to_allusage(record, year):
199
+ record['aid'] = aid
200
+ record['period'] = access_period(USERS[aid]['etime'], record['time'])
201
+ cnt += PgDBI.pgadd(USAGE['PGTBL'], record, PgLOG.LOGWRN)
202
+
203
+ return cnt
204
+
205
+
206
+ def add_to_allusage(pgrec, year):
207
+
208
+ record = {'method' : 'COD', 'source' : 'C'}
209
+ for fld in pgrec:
210
+ ms = re.match(r'^(engine|count)$', fld)
211
+ if ms: continue
212
+ record[fld] = pgrec[fld]
213
+
214
+ return PgDBI.add_yearly_allusage(year, record) # change 1 to 0 to stop checking
215
+
216
+ def cache_users(aid):
217
+
218
+ pgrec = PgDBI.pgget("metautil.custom_dap_history", "*", "ID = '{}'".format(aid), PgLOG.LGEREX)
219
+
220
+ if pgrec:
221
+ ms = re.search(r'dsnum=(\d+\.\d|[a-z]\d{6});', pgrec['rinfo'])
222
+ if ms:
223
+ dsid = PgUtil.format_dataset_id(ms.group(1))
224
+ USERS[aid]= {'dsid' : dsid, 'email' : pgrec['duser']}
225
+ return 1
226
+
227
+ return 0
228
+
229
+
230
+ def access_period(etime, btime):
231
+
232
+ period = 86400
233
+
234
+ ms = re.search(r'(\d+):(\d+):(\d+)', etime)
235
+ if ms:
236
+ period = int(ms.group(1))*3600+int(ms.group(2))*60+int(ms.group(3))
237
+
238
+ ms = re.search(r'(\d+):(\d+):(\d+)', btime)
239
+ if ms:
240
+ period -= int(ms.group(1))*3600+int(ms.group(2))*60+int(ms.group(3))
241
+
242
+ return period
243
+
244
+ #
245
+ # call main() to start program
246
+ #
247
+ if __name__ == "__main__": main()
@@ -0,0 +1,21 @@
1
+
2
+ Retrieves usage information from RDA Web logs under /var/log to
3
+ fill table 'codusage' in MySQL database 'dssdb'.
4
+
5
+ Usage: fillcodusage [-a] [-b] [-f LogFileNames] [-m MonthList] [-N NumberDay] [-y YearList]
6
+
7
+ select one of the options, -a, -f, -m, -N or -y each time to run
8
+ this application.
9
+
10
+ - Option -b, log process information into logfile only;
11
+
12
+ - Option -a, retrieve usages for all available logs;
13
+
14
+ - Option -f, retrieve usage info from given log file names;
15
+
16
+ - Option -m, retrieve usage info in given months;
17
+
18
+ - Option -N, retrieve usage info in recent NumberDay days;
19
+
20
+ - Option -y, retrieve usage info in given years.
21
+
@@ -0,0 +1,79 @@
1
+ #!/usr/bin/env python3
2
+ #
3
+ ###############################################################################
4
+ #
5
+ # Title : fillcountry
6
+ # Author : Zaihua Ji, zji@ucar.edu
7
+ # Date : 2022-03-11
8
+ # 2025-03-26 transferred to package rda_python_metrics from
9
+ # https://github.com/NCAR/rda-database.git
10
+ # Purpose : python program to fill missing country field from email info for
11
+ # given table name
12
+ #
13
+ # Github : https://github.com/NCAR/rda-python-metrics.git
14
+ #
15
+ ###############################################################################
16
+ #
17
+ import sys
18
+ import re
19
+ from rda_python_common import PgLOG
20
+ from rda_python_common import PgIMMA
21
+ from rda_python_common import PgUtil
22
+ from rda_python_common import PgDBI
23
+
24
+ #
25
+ # main function to run this program
26
+ #
27
+ def main():
28
+
29
+ argv = sys.argv[1:]
30
+ tables = ['allusage', 'user', 'wuser']
31
+ table = None
32
+
33
+ # check command line
34
+ for arg in argv:
35
+ if arg == "-b":
36
+ PgLOG.PGLOG['BCKGRND'] = 1
37
+ elif re.match(r'^-.*', arg):
38
+ PgLOG.pglog(arg + ": Unknown Option", PgLOG.LGEREX)
39
+ elif not table:
40
+ table = arg
41
+ else:
42
+ PgLOG.pglog(arg + ": one table name at a time", PgLOG.LGEREX)
43
+
44
+ if not table:
45
+ print("Usage: fillcountry TableName\n")
46
+ sys.exit(0)
47
+ elif table not in tables:
48
+ PgLOG.pglog("{}: table name must be ({})".format(table, '|'.join(tables)), PgLOG.LGEREX)
49
+
50
+ PgDBI.dssdb_dbname()
51
+ PgLOG.cmdlog("fillcountry {}".format(' '.join(argv)))
52
+
53
+ process_countries(table)
54
+
55
+ sys.exit(0)
56
+
57
+ def process_countries(table):
58
+
59
+ pgrecs = PgDBI.pgmget(table, "email", "country IS NULL", PgLOG.LOGWRN)
60
+
61
+ cntall = len(pgrecs['email']) if pgrecs else 0
62
+ PgLOG.pglog("Set {} record(s) for missing country in table {}".format(cntall, table), PgLOG.LOGWRN)
63
+ if not cntall: return
64
+
65
+ cntmod = 0
66
+ for i in range(cntall):
67
+ if i and (i % 500) == 0:
68
+ PgLOG.pglog("{}/{} Records modified/processed".format(cntmod, i), PgLOG.WARNLG)
69
+
70
+ email = pgrecs['email'][i]
71
+ record = {'country' : PgDBI.email_to_country(email)}
72
+ cntmod += PgDBI.pgupdt(table, record, "email = '{}' AND country IS NULL".format(email), PgLOG.LOGWRN)
73
+
74
+ PgLOG.pglog("{} Record(s) modified in table '{}'".format(cntmod, table), PgLOG.LOGWRN)
75
+
76
+ #
77
+ # call main() to start program
78
+ #
79
+ if __name__ == "__main__": main()