rda-python-metrics 1.0.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of rda-python-metrics might be problematic. Click here for more details.

Files changed (47) hide show
  1. rda_python_metrics/PgIPInfo.py +188 -0
  2. rda_python_metrics/PgView.py +782 -0
  3. rda_python_metrics/__init__.py +1 -0
  4. rda_python_metrics/fillawsusage.py +282 -0
  5. rda_python_metrics/fillawsusage.usg +17 -0
  6. rda_python_metrics/fillcodusage.py +247 -0
  7. rda_python_metrics/fillcodusage.usg +21 -0
  8. rda_python_metrics/fillcountry.py +79 -0
  9. rda_python_metrics/fillendtime.py +93 -0
  10. rda_python_metrics/fillglobususage.py +287 -0
  11. rda_python_metrics/fillglobususage.usg +17 -0
  12. rda_python_metrics/fillipinfo.py +185 -0
  13. rda_python_metrics/fillipinfo.usg +18 -0
  14. rda_python_metrics/filloneorder.py +155 -0
  15. rda_python_metrics/filloneorder.usg +41 -0
  16. rda_python_metrics/fillrdadb.py +151 -0
  17. rda_python_metrics/fillrdadb.usg +32 -0
  18. rda_python_metrics/filltdsusage.py +289 -0
  19. rda_python_metrics/filltdsusage.usg +17 -0
  20. rda_python_metrics/filluser.py +216 -0
  21. rda_python_metrics/filluser.usg +16 -0
  22. rda_python_metrics/logarch.py +359 -0
  23. rda_python_metrics/logarch.usg +27 -0
  24. rda_python_metrics/pgperson.py +72 -0
  25. rda_python_metrics/pgusername.py +50 -0
  26. rda_python_metrics/viewallusage.py +350 -0
  27. rda_python_metrics/viewallusage.usg +198 -0
  28. rda_python_metrics/viewcheckusage.py +289 -0
  29. rda_python_metrics/viewcheckusage.usg +185 -0
  30. rda_python_metrics/viewcodusage.py +314 -0
  31. rda_python_metrics/viewcodusage.usg +184 -0
  32. rda_python_metrics/viewordusage.py +340 -0
  33. rda_python_metrics/viewordusage.usg +224 -0
  34. rda_python_metrics/viewrqstusage.py +362 -0
  35. rda_python_metrics/viewrqstusage.usg +217 -0
  36. rda_python_metrics/viewtdsusage.py +323 -0
  37. rda_python_metrics/viewtdsusage.usg +191 -0
  38. rda_python_metrics/viewwebfile.py +294 -0
  39. rda_python_metrics/viewwebfile.usg +212 -0
  40. rda_python_metrics/viewwebusage.py +371 -0
  41. rda_python_metrics/viewwebusage.usg +211 -0
  42. rda_python_metrics-1.0.4.dist-info/METADATA +18 -0
  43. rda_python_metrics-1.0.4.dist-info/RECORD +47 -0
  44. rda_python_metrics-1.0.4.dist-info/WHEEL +5 -0
  45. rda_python_metrics-1.0.4.dist-info/entry_points.txt +22 -0
  46. rda_python_metrics-1.0.4.dist-info/licenses/LICENSE +21 -0
  47. rda_python_metrics-1.0.4.dist-info/top_level.txt +1 -0
@@ -0,0 +1,155 @@
1
+ #!/usr/bin/env python3
2
+ #
3
+ ###############################################################################
4
+ #
5
+ # Title : filloneorder
6
+ # Author : Zaihua Ji, zji@ucar.edu
7
+ # Date : 03/10/2022
8
+ # 2025-03-26 transferred to package rda_python_metrics from
9
+ # https://github.com/NCAR/rda-database.git
10
+ # Purpose : python program to fill one order usage on command line
11
+ #
12
+ # Github : https://github.com/NCAR/rda-python-metrics.git
13
+ #
14
+ ###############################################################################
15
+ #
16
+ import sys
17
+ import re
18
+ from rda_python_common import PgLOG
19
+ from rda_python_common import PgDBI
20
+ from rda_python_common import PgIMMA
21
+ from rda_python_common import PgUtil
22
+
23
+ # -t dsid, -e email, -v request data volume, -i data input volume,
24
+ # -m delivery method, -a amount charged, -p pay method, -d request date, -x close date,
25
+ # -y close time, -c file count, -s specialist login name, -o order id,
26
+ # mandatory options: -t, -e, -v, and -m
27
+
28
+ #
29
+ # main function to run this program
30
+ #
31
+ def main():
32
+
33
+ option = None
34
+ params = {}
35
+ argv = sys.argv[1:]
36
+
37
+ for arg in argv:
38
+ ms = re.match(r'^-(\w)$', arg)
39
+ if ms:
40
+ option = ms.group(1)
41
+ if option == "b":
42
+ PgLOG.PGLOG['BCKGRND'] = 1
43
+ option = None
44
+ elif option not in "acdeimopstvx":
45
+ PgLOG.pglog("-{}: Invalid Option".format(option), PgLOG.LGWNEX)
46
+ elif option and option not in params:
47
+ if option == 't': arg = PgUtil.format_dataset_id(arg)
48
+ params[option] = arg
49
+ option = None
50
+ else:
51
+ PgLOG.pglog(arg + ": parameter passed in without leading option", PgLOG.LGWNEX)
52
+
53
+ if not params: PgLOG.show_usage('filloneorder')
54
+ PgDBI.dssdb_dbname()
55
+ PgLOG.cmdlog("filloneorder {}".format(' '.join(argv)))
56
+
57
+ check_inputs(params)
58
+ add_one_order(params)
59
+
60
+ sys.exit(0)
61
+
62
+ def add_one_order(params):
63
+
64
+ year = None
65
+ record = {}
66
+
67
+ record['dsid'] = params['t']
68
+ record['wuid_request'] = params['u']
69
+ record['dss_uname'] = params['s']
70
+ record['date_request'] = params['d']
71
+ record['date_closed'] = params['x']
72
+ record['method'] = params['m']
73
+ record['size_request'] = params['v']
74
+ record['size_input'] = params['i']
75
+ if 'a' in params: record['amount'] = params['a']
76
+ if 'p' in params: record['pay_method'] = params['p']
77
+ record['count'] = params['c'] if 'c' in params else 0
78
+ if 'o' in params: record['order_number'] = params['o']
79
+ ms = re.match(r'(\d+)-(\d+)-', record['date_request'])
80
+ if ms:
81
+ year = int(ms.group(1))
82
+ record['quarter'] = 1 + int((int(ms.group(2)) - 1) / 3)
83
+
84
+ if add_to_allusage(record, year, params['y']) and PgDBI.pgadd("ousage", record, PgLOG.LGEREX):
85
+ PgLOG.pglog("1 order added for " + params['e'], PgLOG.LOGWRN)
86
+ else:
87
+ PgLOG.pglog("No order added for " + params['e'], PgLOG.LOGWRN)
88
+
89
+ def add_to_allusage(record, year, ctime):
90
+
91
+ pgrec = PgDBI.pgget("wuser", "email, org_type, country",
92
+ "wuid = {}".format(record['wuid_request']), PgLOG.LGWNEX)
93
+ if pgrec:
94
+ pgrec['dsid'] = record['dsid']
95
+ if pgrec['org_type'] == "UCAR": pgrec['org_type'] = "NCAR"
96
+ pgrec['date'] = record['date_request']
97
+ pgrec['time'] = ctime
98
+ pgrec['quarter'] = record['quarter']
99
+ pgrec['size'] = record['size_request']
100
+ pgrec['method'] = record['method']
101
+ pgrec['source'] = 'O'
102
+ return PgDBI.add_yearly_allusage(year, pgrec)
103
+
104
+ return 0
105
+
106
+ #
107
+ # check option inputs and fill up the missing ones for default values
108
+ #
109
+ def check_inputs(params):
110
+
111
+ # mandatory inputs
112
+ if 't' not in params:
113
+ PgLOG.pglog("Missing Dataset ID per option -t", PgLOG.LGEREX)
114
+
115
+ if not PgDBI.pgget("dataset", '', "dsid = '{}'".format(params['t']), PgLOG.LGEREX):
116
+ PgLOG.pglog(params['t'] + ": dsid not in RDADB", PgLOG.LGEREX)
117
+
118
+ if 'v' not in params:
119
+ PgLOG.pglog("Missing order data value in Bytes per option -v", PgLOG.LGEREX)
120
+
121
+ if 'm' not in params:
122
+ PgLOG.pglog("Missing data delivery method per option -m", PgLOG.LGEREX)
123
+
124
+ if 'e' not in params:
125
+ PgLOG.pglog("Missing user email per option -e", PgLOG.LGEREX)
126
+
127
+ (cdate, ctime) = PgUtil.get_date_time()
128
+ # set default values
129
+ if 'i' not in params: params['i'] = params['v']
130
+ if 'x' not in params: params['x'] = cdate
131
+ if 'y' not in params: params['y'] = ctime
132
+ if 'd' not in params: params['d'] = params['x']
133
+
134
+ params['u'] = PgDBI.check_wuser_wuid(params['e'], params['d'])
135
+ params['s'] = check_specialist(params['t'], (params['s'] if 's' in params else PgLOG.PGLOG['CURUID']))
136
+
137
+ # check if order is recorded already
138
+ ocond = "dsid = '{}' AND wuid_request = {} AND size_request = {} and date_request = '{}'".format(params['t'], params['u'], params['v'], params['d'])
139
+ if PgDBI.pgget("ousage", '', ocond, PgLOG.LGEREX):
140
+ PgLOG.pglog("Order of {} Bytes Data from {} for {} on {} recorded on {} already".format(params['v'], params['t'], params['e'], params['d'], params['x']), PgLOG.LGWNEX)
141
+
142
+ #
143
+ # return the dataset owner if specialist not given
144
+ #
145
+ def check_specialist(dsid, specialist):
146
+
147
+ if specialist and PgDBI.pgget("dssgrp", "", "logname = 'specialist'", PgLOG.LGEREX): return specialist
148
+ scond = "specialist = logname AND dsid = '{}' AND priority = 1".format(dsid)
149
+ pgrec = PgDBI.pgget("dsowner, dssgrp", "specialist", scond, PgLOG.LGEREX)
150
+ return pgrec['specialist'] if pgrec else "datahelp"
151
+
152
+ #
153
+ # call main() to start program
154
+ #
155
+ if __name__ == "__main__": main()
@@ -0,0 +1,41 @@
1
+
2
+ Fill usage information for one special order in table 'ousage' in MySQL
3
+ database 'dssdb'.
4
+
5
+ Usage: filloneorder -e UserEmail -m DeliveryMethod -t DatasetID -v DataVolume \
6
+ [-a CostAmount] [-b] [-c FileCount] [-d DateOrdered] \
7
+ [-i InputDataVlolume] [-o OerderID] [-p PaymentMethod] \
8
+ [-s SpecialistLoginName] [-x DateClosed -x TimeClosed]
9
+
10
+ select the Mandatory options, -e, -m, -t and -v, and any other Optional
11
+ options to run this application.
12
+
13
+ - Option -b, log process information into logfile only;
14
+
15
+ - Option -a, the amount of dollars if the order is not free ;
16
+
17
+ - Option -c, the number of data files for the order;
18
+
19
+ - Option -d, date the order is opened;
20
+
21
+ - option -e, user email address who ordered the data;
22
+
23
+ - option -i, the data volume involved for processing the order;
24
+
25
+ - Option -m, the delivery method for the ordered data;
26
+
27
+ - option -o, order ID, up to 30 characters;
28
+
29
+ - option -p, the payment method for the amount per option -a;
30
+
31
+ - option -s, the specialist who handle the order. The login name is used
32
+ if it is not specified, and the default dataset owner is
33
+ used if given specialist is not a valid DSS specialist;
34
+
35
+ - option -t, dataset id the data ordered from;
36
+
37
+ - option -v, the final data volume for the order;
38
+
39
+ - Option -x, date the order is closed;
40
+
41
+ - Option -y, time the order is closed.
@@ -0,0 +1,151 @@
1
+ #!/usr/bin/env python3
2
+ #
3
+ ###############################################################################
4
+ #
5
+ # Title : fillrdadb
6
+ # Author : Zaihua Ji, zji@ucar.edu
7
+ # Date : 04/07/2022
8
+ # 2025-03-26 transferred to package rda_python_metrics from
9
+ # https://github.com/NCAR/rda-database.git
10
+ # Purpose : python program to retrieve info from data logs, and fill tables
11
+ # in PostgreSQL database.schema rdadb.dssdb
12
+ #
13
+ # Github : https://github.com/NCAR/rda-pythn-metrics.git
14
+ #
15
+ ###############################################################################
16
+ #
17
+ import sys
18
+ import re
19
+ from os import path as op
20
+ from rda_python_common import PgLOG
21
+ from rda_python_common import PgFile
22
+ from rda_python_common import PgUtil
23
+ from rda_python_common import PgDBI
24
+
25
+ # the define options for gathering web online file usage, one at a time
26
+ DATES = 0x01 # get web file usages for given dates
27
+ MONTH = 0x02 # fet web file usages for given months
28
+ YEARS = 0x04 # get web file usages for given years
29
+ NDAYS = 0x08 # get web file usages in recent number of days
30
+ CLNFL = 0x10 # clean unused file only
31
+ MASKS = (MONTH|YEARS|DATES|NDAYS)
32
+
33
+ RDADB = {
34
+ 'OPTION' : 0,
35
+ 'OPTVAL' : '',
36
+ 'DOMAIL' : 1,
37
+ 'BCKGRND' : ''
38
+ }
39
+
40
+ #
41
+ # main function to run this program
42
+ #
43
+ def main():
44
+
45
+ params = [] # array of input values
46
+ argv = sys.argv[1:]
47
+ bckflag = ''
48
+
49
+ PgDBI.dssdb_dbname()
50
+
51
+ for arg in argv:
52
+ if arg == "-b":
53
+ PgLOG.PGLOG['BCKGRND'] = "-b"
54
+ RDADB['BCKGRND'] = " -b"
55
+ elif arg == "-n":
56
+ RDADB['DOMAIL'] = 0
57
+ PgLOG.PGLOG['LOGMASK'] &= ~(PgLOG.EMLALL)
58
+ elif re.match(r'^-[cdmNy]$', arg) and not RDADB['OPTVAL']:
59
+ RDADB['OPTVAL'] = arg
60
+ if arg == "-c":
61
+ RDADB['OPTION'] |= CLNFL
62
+ elif arg == "-d":
63
+ RDADB['OPTION'] |= DATES
64
+ elif arg == "-m":
65
+ RDADB['OPTION'] |= MONTH
66
+ elif arg == "-y":
67
+ RDADB['OPTION'] |= YEARS
68
+ elif arg == "-N":
69
+ RDADB['OPTION'] |= NDAYS
70
+
71
+ elif re.match(r'^-.*', arg):
72
+ PgLOG.pglog(arg + ": Invalid Option", PgLOG.LGWNEX)
73
+ elif RDADB['OPTION']&MASKS:
74
+ params.append(arg)
75
+ else:
76
+ PgLOG.pglog(arg + ": Invalid Parameter", PgLOG.LGWNEX)
77
+
78
+ if not RDADB['OPTION'] or (RDADB['OPTION']&MASKS and not params):
79
+ PgLOG.show_usage('fillrdadb')
80
+ PgDBI.dssdb_dbname()
81
+ PgLOG.cmdlog("fillrdadb {}".format(' '.join(argv)))
82
+
83
+ if RDADB['OPTION']&CLNFL: # clean unused file only
84
+ clean_unused_files()
85
+ elif RDADB['OPTION']&MASKS:
86
+ fill_rdadb(RDADB['OPTVAL'], params)
87
+
88
+ sys.exit(0)
89
+
90
+ #
91
+ # Fill RDADB info for given condition
92
+ #
93
+ def fill_rdadb(option, params):
94
+
95
+ filecond = '{} {}'.format(option, ' '.join(params))
96
+ PgLOG.pglog("Filling RDADB info for '{}' at {}".format(filecond, PgLOG.current_datetime()), PgLOG.LOGWRN)
97
+
98
+ # fill available custom OPeNDAP usages
99
+ PgLOG.pgsystem("fillcodusage {} {}".format(RDADB['BCKGRND'], filecond), PgLOG.LGWNEM, 5)
100
+ # fill available globus web data usages
101
+ PgLOG.pgsystem("fillglobususage {} {}".format(RDADB['BCKGRND'], filecond), PgLOG.LGWNEM, 5)
102
+ # fill available AWS web data usages
103
+ PgLOG.pgsystem("fillawsusage {} {}".format(RDADB['BCKGRND'], filecond), PgLOG.LGWNEM, 5)
104
+
105
+ if RDADB['DOMAIL']: send_email_notice()
106
+ PgLOG.pglog("End Filling RDADB info at {}".format(PgLOG.current_datetime()), PgLOG.LGWNEM)
107
+
108
+ #
109
+ # clean unused MSS and Web files
110
+ #
111
+ def clean_unused_files():
112
+
113
+ PgLOG.pglog("Check and clean deleted Web files that never been used at {}".format(PgLOG.current_datetime()), PgLOG.LOGWRN)
114
+ pgrecs = PgDBI.pgmget("wfile", "wid", "status = 'D'", PgLOG.LGWNEX)
115
+
116
+ allcnt = len(pgrecs['wid']) if pgrecs else 0
117
+ PgLOG.pglog("{} record(s) retrieved from Table 'wfile' at {}".format(allcnt, PgLOG.current_datetime()), PgLOG.LOGWRN)
118
+ procnt = delcnt = 0
119
+ if allcnt:
120
+ fcond = r"wid = {} AND org_type <> 'DSS' AND wuid_read = wuid"
121
+ for fid in pgrecs['wid']:
122
+ procnt += 1
123
+ if procnt%5000 == 0:
124
+ PgLOG.pglog("{}/{} record(s) processed/removed from Table 'wfile'".format(procnt, delcnt), PgLOG.WARNLG)
125
+ if not PgDBI.pgget("wusage, wuser", "", fcond.format(fid), PgLOG.LGWNEX):
126
+ # deleted web file never been used
127
+ delcnt += PgDBI.pgdel("wfile", "wid = {}".format(fid), PgLOG.LGWNEX)
128
+
129
+ PgLOG.pglog("{} record(s) removed from Table 'wfile' at {}".format(delcnt, PgLOG.current_datetime()), PgLOG.LOGWRN)
130
+
131
+ #
132
+ # email notice of job done
133
+ #
134
+ def send_email_notice():
135
+
136
+ msg = ("Hi All,\n\nRDADB weekly data usage gathering is done at {}.\n\n".format(PgLOG.current_datetime()) +
137
+ "Please Let me know if you notice any problem.\n\nThanks,\n\nHua\n")
138
+ pgrecs = PgDBI.pgmget("dssgrp", "logname", "email_flag = 'Y'", PgLOG.LGWNEX)
139
+ if pgrecs:
140
+ receiver = ""
141
+ for logname in pgrecs['logname']:
142
+ if receiver: receiver += ', '
143
+ receiver += (logname + "@ucar.edu")
144
+
145
+ ret = PgLOG.send_email("RDADB Weekly Data Usage Gathered on " + PgUtil.curdate(), receiver, msg)
146
+ if ret: PgLOG.pglog(ret, PgLOG.LOGWRN)
147
+
148
+ #
149
+ # call main() to start program
150
+ #
151
+ if __name__ == "__main__": main()
@@ -0,0 +1,32 @@
1
+
2
+ Collect MSS information from ORACLE server and online file information
3
+ from DSS Web Server logs, and fill tables in MySQL database 'RDADB'.
4
+
5
+ Backup the records in the tables and compress them, and archive the compressed
6
+ table files to MSS according to the options chosen on the command line.
7
+
8
+
9
+ Usage: fillrdadb [-b] [-c] [-d DateList] [-m MonthList]
10
+ [-n] [-N NumberDay] [-S] [-y YearList]
11
+
12
+ Select options, -c, -d, -m, -N, or -y, to run this application.
13
+
14
+ - Option -b, log process information into logfile only;
15
+
16
+ - Option -c, clean unused file names only;
17
+
18
+ - Option -d, get file info recorded on given dates. Date format is
19
+ YYYY-MM-DD, for example, 2004-01-01;
20
+
21
+ - Option -m, get file info recorded in given months. Month format is
22
+ YYYY-MM, for example, 2004-1;
23
+
24
+ - Option -n, do not send email notification after filling info
25
+
26
+ - Option -N, get file info recorded in recent NumberDay days;
27
+
28
+ - Option -y, get file info recorded in given years. Year format is YYYY,
29
+ for example, 2004;
30
+
31
+ This is a wrapping funtion. Internally it calls 'fillcodusage', 'fillawsusage',
32
+ and 'fillglobususage' with selected options.
@@ -0,0 +1,289 @@
1
+ #!/usr/bin/env python3
2
+ #
3
+ ###############################################################################
4
+ #
5
+ # Title : filltdsusage
6
+ # Author : Zaihua Ji, zji@ucar.edu
7
+ # Date : 03/11/2022
8
+ # 2025-03-26 transferred to package rda_python_metrics from
9
+ # https://github.com/NCAR/rda-database.git
10
+ # Purpose : python program to retrieve info from TDS logs
11
+ # and fill table tdsusage in PostgreSQL database dssdb.
12
+ #
13
+ # Github : https://github.com/NCAR/rda-python-metrics.git
14
+ #
15
+ ###############################################################################
16
+ #
17
+ import sys
18
+ import re
19
+ import glob
20
+ from os import path as op
21
+ from rda_python_common import PgLOG
22
+ from rda_python_common import PgUtil
23
+ from rda_python_common import PgFile
24
+ from rda_python_common import PgDBI
25
+ from . import PgIPInfo
26
+
27
+ # the define options for gathering TDS data usage, one at a time
28
+ MONTH = 0x02 # fet TDS data usages for given months
29
+ YEARS = 0x04 # get TDS data usages for given years
30
+ NDAYS = 0x08 # get TDS data usages in recent number of days
31
+ MASKS = (MONTH|YEARS|NDAYS)
32
+
33
+ USAGE = {
34
+ 'OPTION' : 0,
35
+ 'PGTBL' : "tdsusage",
36
+ 'TDSLOG' : "/data/logs/nginx/{}.access.log",
37
+ 'CDATE' : PgUtil.curdate()
38
+ }
39
+
40
+ #
41
+ # main function to run this program
42
+ #
43
+ def main():
44
+
45
+ params = [] # array of input values
46
+ argv = sys.argv[1:]
47
+ datelimit = ''
48
+ fixrec = False
49
+
50
+ for arg in argv:
51
+ if arg == "-b":
52
+ PgLOG.PGLOG['BCKGRND'] = 1
53
+ elif arg == "-f":
54
+ fixrec = True
55
+ elif re.match(r'^-[mNy]$', arg) and USAGE['OPTION'] == 0:
56
+ if arg == "-m":
57
+ USAGE['OPTION'] = MONTH
58
+ elif arg == "-y":
59
+ USAGE['OPTION'] = YEARS
60
+ elif arg == "-N":
61
+ USAGE['OPTION'] = NDAYS
62
+ elif re.match(r'^-', arg):
63
+ PgLOG.pglog(arg + ": Invalid Option", PgLOG.LGWNEX)
64
+ elif USAGE['OPTION']&MASKS:
65
+ params.append(arg)
66
+ else:
67
+ PgLOG.pglog(arg + ": Invalid Parameter", PgLOG.LGWNEX)
68
+
69
+ if not (USAGE['OPTION'] and params): PgLOG.show_usage('filltdsusage')
70
+ PgDBI.dssdb_dbname()
71
+ PgLOG.cmdlog("filltdsusage {}".format(' '.join(argv)))
72
+
73
+ if fixrec:
74
+ fix_tds_usages(USAGE['OPTION'], params)
75
+ else:
76
+ if USAGE['OPTION']&NDAYS:
77
+ curdate = USAGE['CDATE']
78
+ datelimit = PgUtil.adddate(curdate, 0, 0, -int(params[0]))
79
+ USAGE['OPTION'] = MONTH
80
+ params = []
81
+
82
+ while curdate >= datelimit:
83
+ tms = curdate.split('-')
84
+ params.append("{}-{}".format(tms[0], tms[1]))
85
+ curdate = PgUtil.adddate(curdate, 0, 0, -int(tms[2]))
86
+
87
+ fill_tds_usages(USAGE['OPTION'], params, datelimit)
88
+
89
+ PgLOG.pglog(None, PgLOG.LOGWRN|PgLOG.SNDEML) # send email out if any
90
+
91
+ sys.exit(0)
92
+
93
+ #
94
+ # Fill TDS usages into table dssdb.tdsusage from tds access logs
95
+ #
96
+ def fill_tds_usages(option, inputs, datelimit):
97
+
98
+ cntall = cntadd = 0
99
+
100
+ for input in inputs:
101
+ # get log file names
102
+ if option&MONTH:
103
+ tms = input.split('-')
104
+ yrmn = "{}-{:02}".format(tms[0], int(tms[1]))
105
+ else:
106
+ yrmn = input
107
+
108
+ logfiles = glob.glob(USAGE['TDSLOG'].format(yrmn + '*'))
109
+ if not logfiles: PgLOG.pglog("{}: No file found to gather TDS usage".format(yrmn), PgLOG.LOGWRN)
110
+ for logfile in logfiles:
111
+ if not op.isfile(logfile):
112
+ PgLOG.pglog("{}: Not exists to gather TDS usage".format(logfile), PgLOG.LOGWRN)
113
+ continue
114
+ fdate = None
115
+ ms = re.search(r'(\d+-\d+-\d+).access.log$', logfile)
116
+ if ms:
117
+ fdate = ms.group(1)
118
+ if fdate >= USAGE['CDATE']: continue
119
+ if datelimit and fdate < datelimit: continue
120
+ PgLOG.pglog("Gathering usage info from {} at {}".format(logfile, PgLOG.current_datetime()), PgLOG.LOGWRN)
121
+ tds = PgFile.open_local_file(logfile)
122
+ if not tds: continue
123
+ ptime = ''
124
+ records = {}
125
+ entcnt = 0
126
+ while True:
127
+ line = tds.readline()
128
+ if not line: break
129
+ entcnt += 1
130
+ if entcnt%20000 == 0:
131
+ cnt = len(records)
132
+ PgLOG.pglog("{}/{} TDS log entries processed/records to add".format(entcnt, cnt), PgLOG.WARNLG)
133
+
134
+ ms = re.search(r'(/thredds/catalog|\sGooglebot/)', line)
135
+ if ms: continue
136
+ ms = re.search(r'/thredds/\S+\.(png|jpg|gif|css|htm)', line)
137
+ if ms: continue
138
+ ms = re.match(r'^([\d\.]+)\s.*\s(-|\S+@\S+)\s+\[(\S+).*/thredds/(\w+)(/|/grid/)(aggregations|files).*/(ds\d\d\d.\d|[a-z]\d{6})/.*\s200\s+(\d+)(.*)$', line)
139
+ if not ms: continue
140
+ ip = ms.group(1)
141
+ email = ms.group(2)
142
+ (date, time) = get_record_date_time(ms.group(3))
143
+ method = ms.group(4)
144
+ etype = ms.group(6)[0].upper()
145
+ dsid = PgUtil.format_dataset_id(ms.group(7))
146
+ size = int(ms.group(8))
147
+ ebuf = ms.group(9)
148
+ ms = re.search(r' "(\w+.*\S+)" ', ebuf)
149
+ engine = ms.group(1) if ms else 'Unknown'
150
+ key = "{}:{}:{}:{}".format(ip, dsid, method, etype)
151
+
152
+ if key in records:
153
+ records[key]['size'] += size
154
+ records[key]['fcount'] += 1
155
+ else:
156
+ records[key] = {'ip' : ip, 'email' : email, 'dsid' : dsid, 'time' : time, 'size' : size,
157
+ 'fcount' : 1, 'method' : method, 'etype' : etype, 'engine' : engine}
158
+ tds.close()
159
+ if records: cntadd += add_usage_records(records, fdate)
160
+ cntall += entcnt
161
+
162
+ PgLOG.pglog("{} TDS usage records added for {} entries at {}".format(cntadd, cntall, PgLOG.current_datetime()), PgLOG.LOGWRN)
163
+
164
+
165
+ def get_record_date_time(ctime):
166
+
167
+ ms = re.search(r'^(\d+)/(\w+)/(\d+):(\d+:\d+:\d+)$', ctime)
168
+ if ms:
169
+ d = int(ms.group(1))
170
+ m = PgUtil.get_month(ms.group(2))
171
+ y = ms.group(3)
172
+ t = ms.group(4)
173
+ return ("{}-{:02}-{:02}".format(y, m, d), t)
174
+ else:
175
+ PgLOG.pglog("time: Invalid date format", PgLOG.LGEREX)
176
+
177
+ def add_usage_records(records, date):
178
+
179
+ quarter = cnt = 0
180
+ year = None
181
+ ms = re.search(r'(\d+)-(\d+)-', date)
182
+ if ms:
183
+ year = ms.group(1)
184
+ quarter = 1 + int((int(ms.group(2)) - 1)/3)
185
+ for key in records:
186
+ record = records[key]
187
+ cond = "date = '{}' AND time = '{}' AND ip = '{}'".format(date, record['time'], record['ip'])
188
+ if PgDBI.pgget(USAGE['PGTBL'], '', cond, PgLOG.LGEREX): continue
189
+ if record['email'] == '-':
190
+ record['org_type'] = record['country'] = '-'
191
+ ipinfo = PgIPInfo.set_ipinfo(record['ip'])
192
+ if ipinfo:
193
+ record['org_type'] = ipinfo['org_type']
194
+ record['country'] = ipinfo['country']
195
+ record['email'] = 'unknown@' + ipinfo['hostname']
196
+ else:
197
+ wuid = PgDBI.check_wuser_wuid(record['email'], date)
198
+ if not wuid: continue
199
+ pgrec = PgDBI.pgget("wuser", "org_type, country", "wuid = {}".format(wuid), PgLOG.LGWNEX)
200
+ if not pgrec: continue
201
+ record['org_type'] = pgrec['org_type']
202
+ record['country'] = pgrec['country']
203
+
204
+ record['quarter'] = quarter
205
+ record['date'] = date
206
+
207
+ if add_to_allusage(year, record):
208
+ cnt += PgDBI.pgadd(USAGE['PGTBL'], record, PgLOG.LOGWRN)
209
+
210
+ PgLOG.pglog("{}: {} TDS usage records added at {}".format(date, cnt, PgLOG.current_datetime()), PgLOG.LOGWRN)
211
+
212
+ return cnt
213
+
214
+
215
+ def add_to_allusage(year, pgrec):
216
+
217
+ record = {'method' : 'TDS', 'source' : 'T'}
218
+
219
+ for fld in pgrec:
220
+ if re.match(r'^(engine|method|etype|fcount)$', fld): continue
221
+ record[fld] = pgrec[fld]
222
+
223
+ return PgDBI.add_yearly_allusage(year, record)
224
+
225
+ #
226
+ # Fix TDS usages in table dssdb.tdsusage by combine tds accesses with same ip,dsid,method&etype
227
+ #
228
+ def fix_tds_usages(option, inputs):
229
+
230
+ cntall = cntfix = 0
231
+
232
+ for input in inputs:
233
+ if option&NDAYS:
234
+ edate = USAGE['CDATE']
235
+ date = PgUtil.adddate(edate, 0, 0, -int(input))
236
+ elif option&MONTH:
237
+ tms = input.split('-')
238
+ date = "{}-{:02}-01".format(tms[0], int(tms[1]))
239
+ edate = PgUtil.enddate(date, 0, 'M')
240
+ else:
241
+ date = input + "-01-01"
242
+ edate = input + "-12-31"
243
+
244
+ while date <= edate:
245
+ cond = "date = '{}' and fcount = 0 order by time".format(date)
246
+ pgrecs = PgDBI.pgmget(USAGE['PGTBL'], '*', cond, PgLOG.LGEREX)
247
+ cnt = len(pgrecs['ip']) if pgrecs else 0
248
+ records = {}
249
+ for i in range(cnt):
250
+ record = PgUtil.onerecord(pgrecs, i)
251
+ key = "{}:{}:{}:{}".format(record['ip'], record['dsid'], record['method'], record['etype'])
252
+ if key in records:
253
+ records[key]['size'] += record['size']
254
+ records[key]['fcount'] += 1
255
+ else:
256
+ record['fcount'] = 1
257
+ records[key] = record
258
+
259
+ if records: cntfix += fix_usage_records(records, date)
260
+ cntall += cnt
261
+ date = PgUtil.adddate(date, 0, 0, 1)
262
+
263
+ PgLOG.pglog("{} TDS usage records combined into {} at {}".format(cntall, cntfix, PgLOG.current_datetime()), PgLOG.LOGWRN)
264
+
265
+ def fix_usage_records(records, date):
266
+
267
+ cnt = 0
268
+ ms = re.match(r'^(\d+)-', date)
269
+ year = ms.group(1)
270
+ tname = 'allusage_' + year
271
+ dcnt = PgDBI.pgdel(tname , "date = '{}' AND method = 'TDS'".format(date), PgLOG.LOGWRN)
272
+ PgLOG.pglog("{} TDS usage records deleted for {} from {}".format(dcnt, date, tname), PgLOG.LOGWRN)
273
+ for key in records:
274
+ record = records[key]
275
+ cond = "date = '{}' AND time = '{}' AND ip = '{}'".format(date, record['time'], record['ip'])
276
+ if add_to_allusage(year, record):
277
+ cnt += PgDBI.pgupdt(USAGE['PGTBL'], record, cond, PgLOG.LOGWRN)
278
+
279
+ if cnt:
280
+ PgLOG.pglog("{} TDS usage records updated for {} in {}".format(cnt, date, USAGE['PGTBL']), PgLOG.LOGWRN)
281
+ dcnt = PgDBI.pgdel(USAGE['PGTBL'], "date = '{}' and fcount = 0".format(date), PgLOG.LOGWRN)
282
+ PgLOG.pglog("{} TDS usage records deleted for {} from {}".format(dcnt, date, USAGE['PGTBL']), PgLOG.LOGWRN)
283
+
284
+ return cnt
285
+
286
+ #
287
+ # call main() to start program
288
+ #
289
+ if __name__ == "__main__": main()
@@ -0,0 +1,17 @@
1
+
2
+ Retrieves usage information from RDA TDS Server logs under ../logs/tomcat to
3
+ fill table 'tdsusage' in MySQL database 'dssdb'.
4
+
5
+ Usage: filltdsusage [-b] [-m MonthList] [-N NumberDay] [-y YearList]
6
+
7
+ select one of the options, -m, -N or -y each time to run
8
+ this application.
9
+
10
+ - Option -b, log process information into logfile only;
11
+
12
+ - Option -m, retrieve usage info in given months;
13
+
14
+ - Option -N, retrieve usage info in recent NumberDay days;
15
+
16
+ - Option -y, retrieve usage info in given years.
17
+