rda-python-metrics 1.0.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of rda-python-metrics might be problematic. Click here for more details.
- rda_python_metrics/PgIPInfo.py +188 -0
- rda_python_metrics/PgView.py +782 -0
- rda_python_metrics/__init__.py +1 -0
- rda_python_metrics/fillawsusage.py +282 -0
- rda_python_metrics/fillawsusage.usg +17 -0
- rda_python_metrics/fillcodusage.py +247 -0
- rda_python_metrics/fillcodusage.usg +21 -0
- rda_python_metrics/fillcountry.py +79 -0
- rda_python_metrics/fillendtime.py +93 -0
- rda_python_metrics/fillglobususage.py +287 -0
- rda_python_metrics/fillglobususage.usg +17 -0
- rda_python_metrics/fillipinfo.py +185 -0
- rda_python_metrics/fillipinfo.usg +18 -0
- rda_python_metrics/filloneorder.py +155 -0
- rda_python_metrics/filloneorder.usg +41 -0
- rda_python_metrics/fillrdadb.py +151 -0
- rda_python_metrics/fillrdadb.usg +32 -0
- rda_python_metrics/filltdsusage.py +289 -0
- rda_python_metrics/filltdsusage.usg +17 -0
- rda_python_metrics/filluser.py +216 -0
- rda_python_metrics/filluser.usg +16 -0
- rda_python_metrics/logarch.py +359 -0
- rda_python_metrics/logarch.usg +27 -0
- rda_python_metrics/pgperson.py +72 -0
- rda_python_metrics/pgusername.py +50 -0
- rda_python_metrics/viewallusage.py +350 -0
- rda_python_metrics/viewallusage.usg +198 -0
- rda_python_metrics/viewcheckusage.py +289 -0
- rda_python_metrics/viewcheckusage.usg +185 -0
- rda_python_metrics/viewcodusage.py +314 -0
- rda_python_metrics/viewcodusage.usg +184 -0
- rda_python_metrics/viewordusage.py +340 -0
- rda_python_metrics/viewordusage.usg +224 -0
- rda_python_metrics/viewrqstusage.py +362 -0
- rda_python_metrics/viewrqstusage.usg +217 -0
- rda_python_metrics/viewtdsusage.py +323 -0
- rda_python_metrics/viewtdsusage.usg +191 -0
- rda_python_metrics/viewwebfile.py +294 -0
- rda_python_metrics/viewwebfile.usg +212 -0
- rda_python_metrics/viewwebusage.py +371 -0
- rda_python_metrics/viewwebusage.usg +211 -0
- rda_python_metrics-1.0.4.dist-info/METADATA +18 -0
- rda_python_metrics-1.0.4.dist-info/RECORD +47 -0
- rda_python_metrics-1.0.4.dist-info/WHEEL +5 -0
- rda_python_metrics-1.0.4.dist-info/entry_points.txt +22 -0
- rda_python_metrics-1.0.4.dist-info/licenses/LICENSE +21 -0
- rda_python_metrics-1.0.4.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
#
|
|
3
|
+
###############################################################################
|
|
4
|
+
#
|
|
5
|
+
# Title : filloneorder
|
|
6
|
+
# Author : Zaihua Ji, zji@ucar.edu
|
|
7
|
+
# Date : 03/10/2022
|
|
8
|
+
# 2025-03-26 transferred to package rda_python_metrics from
|
|
9
|
+
# https://github.com/NCAR/rda-database.git
|
|
10
|
+
# Purpose : python program to fill one order usage on command line
|
|
11
|
+
#
|
|
12
|
+
# Github : https://github.com/NCAR/rda-python-metrics.git
|
|
13
|
+
#
|
|
14
|
+
###############################################################################
|
|
15
|
+
#
|
|
16
|
+
import sys
|
|
17
|
+
import re
|
|
18
|
+
from rda_python_common import PgLOG
|
|
19
|
+
from rda_python_common import PgDBI
|
|
20
|
+
from rda_python_common import PgIMMA
|
|
21
|
+
from rda_python_common import PgUtil
|
|
22
|
+
|
|
23
|
+
# -t dsid, -e email, -v request data volume, -i data input volume,
|
|
24
|
+
# -m delivery method, -a amount charged, -p pay method, -d request date, -x close date,
|
|
25
|
+
# -y close time, -c file count, -s specialist login name, -o order id,
|
|
26
|
+
# mandatory options: -t, -e, -v, and -m
|
|
27
|
+
|
|
28
|
+
#
|
|
29
|
+
# main function to run this program
|
|
30
|
+
#
|
|
31
|
+
def main():
|
|
32
|
+
|
|
33
|
+
option = None
|
|
34
|
+
params = {}
|
|
35
|
+
argv = sys.argv[1:]
|
|
36
|
+
|
|
37
|
+
for arg in argv:
|
|
38
|
+
ms = re.match(r'^-(\w)$', arg)
|
|
39
|
+
if ms:
|
|
40
|
+
option = ms.group(1)
|
|
41
|
+
if option == "b":
|
|
42
|
+
PgLOG.PGLOG['BCKGRND'] = 1
|
|
43
|
+
option = None
|
|
44
|
+
elif option not in "acdeimopstvx":
|
|
45
|
+
PgLOG.pglog("-{}: Invalid Option".format(option), PgLOG.LGWNEX)
|
|
46
|
+
elif option and option not in params:
|
|
47
|
+
if option == 't': arg = PgUtil.format_dataset_id(arg)
|
|
48
|
+
params[option] = arg
|
|
49
|
+
option = None
|
|
50
|
+
else:
|
|
51
|
+
PgLOG.pglog(arg + ": parameter passed in without leading option", PgLOG.LGWNEX)
|
|
52
|
+
|
|
53
|
+
if not params: PgLOG.show_usage('filloneorder')
|
|
54
|
+
PgDBI.dssdb_dbname()
|
|
55
|
+
PgLOG.cmdlog("filloneorder {}".format(' '.join(argv)))
|
|
56
|
+
|
|
57
|
+
check_inputs(params)
|
|
58
|
+
add_one_order(params)
|
|
59
|
+
|
|
60
|
+
sys.exit(0)
|
|
61
|
+
|
|
62
|
+
def add_one_order(params):
|
|
63
|
+
|
|
64
|
+
year = None
|
|
65
|
+
record = {}
|
|
66
|
+
|
|
67
|
+
record['dsid'] = params['t']
|
|
68
|
+
record['wuid_request'] = params['u']
|
|
69
|
+
record['dss_uname'] = params['s']
|
|
70
|
+
record['date_request'] = params['d']
|
|
71
|
+
record['date_closed'] = params['x']
|
|
72
|
+
record['method'] = params['m']
|
|
73
|
+
record['size_request'] = params['v']
|
|
74
|
+
record['size_input'] = params['i']
|
|
75
|
+
if 'a' in params: record['amount'] = params['a']
|
|
76
|
+
if 'p' in params: record['pay_method'] = params['p']
|
|
77
|
+
record['count'] = params['c'] if 'c' in params else 0
|
|
78
|
+
if 'o' in params: record['order_number'] = params['o']
|
|
79
|
+
ms = re.match(r'(\d+)-(\d+)-', record['date_request'])
|
|
80
|
+
if ms:
|
|
81
|
+
year = int(ms.group(1))
|
|
82
|
+
record['quarter'] = 1 + int((int(ms.group(2)) - 1) / 3)
|
|
83
|
+
|
|
84
|
+
if add_to_allusage(record, year, params['y']) and PgDBI.pgadd("ousage", record, PgLOG.LGEREX):
|
|
85
|
+
PgLOG.pglog("1 order added for " + params['e'], PgLOG.LOGWRN)
|
|
86
|
+
else:
|
|
87
|
+
PgLOG.pglog("No order added for " + params['e'], PgLOG.LOGWRN)
|
|
88
|
+
|
|
89
|
+
def add_to_allusage(record, year, ctime):
|
|
90
|
+
|
|
91
|
+
pgrec = PgDBI.pgget("wuser", "email, org_type, country",
|
|
92
|
+
"wuid = {}".format(record['wuid_request']), PgLOG.LGWNEX)
|
|
93
|
+
if pgrec:
|
|
94
|
+
pgrec['dsid'] = record['dsid']
|
|
95
|
+
if pgrec['org_type'] == "UCAR": pgrec['org_type'] = "NCAR"
|
|
96
|
+
pgrec['date'] = record['date_request']
|
|
97
|
+
pgrec['time'] = ctime
|
|
98
|
+
pgrec['quarter'] = record['quarter']
|
|
99
|
+
pgrec['size'] = record['size_request']
|
|
100
|
+
pgrec['method'] = record['method']
|
|
101
|
+
pgrec['source'] = 'O'
|
|
102
|
+
return PgDBI.add_yearly_allusage(year, pgrec)
|
|
103
|
+
|
|
104
|
+
return 0
|
|
105
|
+
|
|
106
|
+
#
|
|
107
|
+
# check option inputs and fill up the missing ones for default values
|
|
108
|
+
#
|
|
109
|
+
def check_inputs(params):
|
|
110
|
+
|
|
111
|
+
# mandatory inputs
|
|
112
|
+
if 't' not in params:
|
|
113
|
+
PgLOG.pglog("Missing Dataset ID per option -t", PgLOG.LGEREX)
|
|
114
|
+
|
|
115
|
+
if not PgDBI.pgget("dataset", '', "dsid = '{}'".format(params['t']), PgLOG.LGEREX):
|
|
116
|
+
PgLOG.pglog(params['t'] + ": dsid not in RDADB", PgLOG.LGEREX)
|
|
117
|
+
|
|
118
|
+
if 'v' not in params:
|
|
119
|
+
PgLOG.pglog("Missing order data value in Bytes per option -v", PgLOG.LGEREX)
|
|
120
|
+
|
|
121
|
+
if 'm' not in params:
|
|
122
|
+
PgLOG.pglog("Missing data delivery method per option -m", PgLOG.LGEREX)
|
|
123
|
+
|
|
124
|
+
if 'e' not in params:
|
|
125
|
+
PgLOG.pglog("Missing user email per option -e", PgLOG.LGEREX)
|
|
126
|
+
|
|
127
|
+
(cdate, ctime) = PgUtil.get_date_time()
|
|
128
|
+
# set default values
|
|
129
|
+
if 'i' not in params: params['i'] = params['v']
|
|
130
|
+
if 'x' not in params: params['x'] = cdate
|
|
131
|
+
if 'y' not in params: params['y'] = ctime
|
|
132
|
+
if 'd' not in params: params['d'] = params['x']
|
|
133
|
+
|
|
134
|
+
params['u'] = PgDBI.check_wuser_wuid(params['e'], params['d'])
|
|
135
|
+
params['s'] = check_specialist(params['t'], (params['s'] if 's' in params else PgLOG.PGLOG['CURUID']))
|
|
136
|
+
|
|
137
|
+
# check if order is recorded already
|
|
138
|
+
ocond = "dsid = '{}' AND wuid_request = {} AND size_request = {} and date_request = '{}'".format(params['t'], params['u'], params['v'], params['d'])
|
|
139
|
+
if PgDBI.pgget("ousage", '', ocond, PgLOG.LGEREX):
|
|
140
|
+
PgLOG.pglog("Order of {} Bytes Data from {} for {} on {} recorded on {} already".format(params['v'], params['t'], params['e'], params['d'], params['x']), PgLOG.LGWNEX)
|
|
141
|
+
|
|
142
|
+
#
|
|
143
|
+
# return the dataset owner if specialist not given
|
|
144
|
+
#
|
|
145
|
+
def check_specialist(dsid, specialist):
|
|
146
|
+
|
|
147
|
+
if specialist and PgDBI.pgget("dssgrp", "", "logname = 'specialist'", PgLOG.LGEREX): return specialist
|
|
148
|
+
scond = "specialist = logname AND dsid = '{}' AND priority = 1".format(dsid)
|
|
149
|
+
pgrec = PgDBI.pgget("dsowner, dssgrp", "specialist", scond, PgLOG.LGEREX)
|
|
150
|
+
return pgrec['specialist'] if pgrec else "datahelp"
|
|
151
|
+
|
|
152
|
+
#
|
|
153
|
+
# call main() to start program
|
|
154
|
+
#
|
|
155
|
+
if __name__ == "__main__": main()
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
|
|
2
|
+
Fill usage information for one special order in table 'ousage' in MySQL
|
|
3
|
+
database 'dssdb'.
|
|
4
|
+
|
|
5
|
+
Usage: filloneorder -e UserEmail -m DeliveryMethod -t DatasetID -v DataVolume \
|
|
6
|
+
[-a CostAmount] [-b] [-c FileCount] [-d DateOrdered] \
|
|
7
|
+
[-i InputDataVlolume] [-o OerderID] [-p PaymentMethod] \
|
|
8
|
+
[-s SpecialistLoginName] [-x DateClosed -x TimeClosed]
|
|
9
|
+
|
|
10
|
+
select the Mandatory options, -e, -m, -t and -v, and any other Optional
|
|
11
|
+
options to run this application.
|
|
12
|
+
|
|
13
|
+
- Option -b, log process information into logfile only;
|
|
14
|
+
|
|
15
|
+
- Option -a, the amount of dollars if the order is not free ;
|
|
16
|
+
|
|
17
|
+
- Option -c, the number of data files for the order;
|
|
18
|
+
|
|
19
|
+
- Option -d, date the order is opened;
|
|
20
|
+
|
|
21
|
+
- option -e, user email address who ordered the data;
|
|
22
|
+
|
|
23
|
+
- option -i, the data volume involved for processing the order;
|
|
24
|
+
|
|
25
|
+
- Option -m, the delivery method for the ordered data;
|
|
26
|
+
|
|
27
|
+
- option -o, order ID, up to 30 characters;
|
|
28
|
+
|
|
29
|
+
- option -p, the payment method for the amount per option -a;
|
|
30
|
+
|
|
31
|
+
- option -s, the specialist who handle the order. The login name is used
|
|
32
|
+
if it is not specified, and the default dataset owner is
|
|
33
|
+
used if given specialist is not a valid DSS specialist;
|
|
34
|
+
|
|
35
|
+
- option -t, dataset id the data ordered from;
|
|
36
|
+
|
|
37
|
+
- option -v, the final data volume for the order;
|
|
38
|
+
|
|
39
|
+
- Option -x, date the order is closed;
|
|
40
|
+
|
|
41
|
+
- Option -y, time the order is closed.
|
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
#
|
|
3
|
+
###############################################################################
|
|
4
|
+
#
|
|
5
|
+
# Title : fillrdadb
|
|
6
|
+
# Author : Zaihua Ji, zji@ucar.edu
|
|
7
|
+
# Date : 04/07/2022
|
|
8
|
+
# 2025-03-26 transferred to package rda_python_metrics from
|
|
9
|
+
# https://github.com/NCAR/rda-database.git
|
|
10
|
+
# Purpose : python program to retrieve info from data logs, and fill tables
|
|
11
|
+
# in PostgreSQL database.schema rdadb.dssdb
|
|
12
|
+
#
|
|
13
|
+
# Github : https://github.com/NCAR/rda-pythn-metrics.git
|
|
14
|
+
#
|
|
15
|
+
###############################################################################
|
|
16
|
+
#
|
|
17
|
+
import sys
|
|
18
|
+
import re
|
|
19
|
+
from os import path as op
|
|
20
|
+
from rda_python_common import PgLOG
|
|
21
|
+
from rda_python_common import PgFile
|
|
22
|
+
from rda_python_common import PgUtil
|
|
23
|
+
from rda_python_common import PgDBI
|
|
24
|
+
|
|
25
|
+
# the define options for gathering web online file usage, one at a time
|
|
26
|
+
DATES = 0x01 # get web file usages for given dates
|
|
27
|
+
MONTH = 0x02 # fet web file usages for given months
|
|
28
|
+
YEARS = 0x04 # get web file usages for given years
|
|
29
|
+
NDAYS = 0x08 # get web file usages in recent number of days
|
|
30
|
+
CLNFL = 0x10 # clean unused file only
|
|
31
|
+
MASKS = (MONTH|YEARS|DATES|NDAYS)
|
|
32
|
+
|
|
33
|
+
RDADB = {
|
|
34
|
+
'OPTION' : 0,
|
|
35
|
+
'OPTVAL' : '',
|
|
36
|
+
'DOMAIL' : 1,
|
|
37
|
+
'BCKGRND' : ''
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
#
|
|
41
|
+
# main function to run this program
|
|
42
|
+
#
|
|
43
|
+
def main():
|
|
44
|
+
|
|
45
|
+
params = [] # array of input values
|
|
46
|
+
argv = sys.argv[1:]
|
|
47
|
+
bckflag = ''
|
|
48
|
+
|
|
49
|
+
PgDBI.dssdb_dbname()
|
|
50
|
+
|
|
51
|
+
for arg in argv:
|
|
52
|
+
if arg == "-b":
|
|
53
|
+
PgLOG.PGLOG['BCKGRND'] = "-b"
|
|
54
|
+
RDADB['BCKGRND'] = " -b"
|
|
55
|
+
elif arg == "-n":
|
|
56
|
+
RDADB['DOMAIL'] = 0
|
|
57
|
+
PgLOG.PGLOG['LOGMASK'] &= ~(PgLOG.EMLALL)
|
|
58
|
+
elif re.match(r'^-[cdmNy]$', arg) and not RDADB['OPTVAL']:
|
|
59
|
+
RDADB['OPTVAL'] = arg
|
|
60
|
+
if arg == "-c":
|
|
61
|
+
RDADB['OPTION'] |= CLNFL
|
|
62
|
+
elif arg == "-d":
|
|
63
|
+
RDADB['OPTION'] |= DATES
|
|
64
|
+
elif arg == "-m":
|
|
65
|
+
RDADB['OPTION'] |= MONTH
|
|
66
|
+
elif arg == "-y":
|
|
67
|
+
RDADB['OPTION'] |= YEARS
|
|
68
|
+
elif arg == "-N":
|
|
69
|
+
RDADB['OPTION'] |= NDAYS
|
|
70
|
+
|
|
71
|
+
elif re.match(r'^-.*', arg):
|
|
72
|
+
PgLOG.pglog(arg + ": Invalid Option", PgLOG.LGWNEX)
|
|
73
|
+
elif RDADB['OPTION']&MASKS:
|
|
74
|
+
params.append(arg)
|
|
75
|
+
else:
|
|
76
|
+
PgLOG.pglog(arg + ": Invalid Parameter", PgLOG.LGWNEX)
|
|
77
|
+
|
|
78
|
+
if not RDADB['OPTION'] or (RDADB['OPTION']&MASKS and not params):
|
|
79
|
+
PgLOG.show_usage('fillrdadb')
|
|
80
|
+
PgDBI.dssdb_dbname()
|
|
81
|
+
PgLOG.cmdlog("fillrdadb {}".format(' '.join(argv)))
|
|
82
|
+
|
|
83
|
+
if RDADB['OPTION']&CLNFL: # clean unused file only
|
|
84
|
+
clean_unused_files()
|
|
85
|
+
elif RDADB['OPTION']&MASKS:
|
|
86
|
+
fill_rdadb(RDADB['OPTVAL'], params)
|
|
87
|
+
|
|
88
|
+
sys.exit(0)
|
|
89
|
+
|
|
90
|
+
#
|
|
91
|
+
# Fill RDADB info for given condition
|
|
92
|
+
#
|
|
93
|
+
def fill_rdadb(option, params):
|
|
94
|
+
|
|
95
|
+
filecond = '{} {}'.format(option, ' '.join(params))
|
|
96
|
+
PgLOG.pglog("Filling RDADB info for '{}' at {}".format(filecond, PgLOG.current_datetime()), PgLOG.LOGWRN)
|
|
97
|
+
|
|
98
|
+
# fill available custom OPeNDAP usages
|
|
99
|
+
PgLOG.pgsystem("fillcodusage {} {}".format(RDADB['BCKGRND'], filecond), PgLOG.LGWNEM, 5)
|
|
100
|
+
# fill available globus web data usages
|
|
101
|
+
PgLOG.pgsystem("fillglobususage {} {}".format(RDADB['BCKGRND'], filecond), PgLOG.LGWNEM, 5)
|
|
102
|
+
# fill available AWS web data usages
|
|
103
|
+
PgLOG.pgsystem("fillawsusage {} {}".format(RDADB['BCKGRND'], filecond), PgLOG.LGWNEM, 5)
|
|
104
|
+
|
|
105
|
+
if RDADB['DOMAIL']: send_email_notice()
|
|
106
|
+
PgLOG.pglog("End Filling RDADB info at {}".format(PgLOG.current_datetime()), PgLOG.LGWNEM)
|
|
107
|
+
|
|
108
|
+
#
|
|
109
|
+
# clean unused MSS and Web files
|
|
110
|
+
#
|
|
111
|
+
def clean_unused_files():
|
|
112
|
+
|
|
113
|
+
PgLOG.pglog("Check and clean deleted Web files that never been used at {}".format(PgLOG.current_datetime()), PgLOG.LOGWRN)
|
|
114
|
+
pgrecs = PgDBI.pgmget("wfile", "wid", "status = 'D'", PgLOG.LGWNEX)
|
|
115
|
+
|
|
116
|
+
allcnt = len(pgrecs['wid']) if pgrecs else 0
|
|
117
|
+
PgLOG.pglog("{} record(s) retrieved from Table 'wfile' at {}".format(allcnt, PgLOG.current_datetime()), PgLOG.LOGWRN)
|
|
118
|
+
procnt = delcnt = 0
|
|
119
|
+
if allcnt:
|
|
120
|
+
fcond = r"wid = {} AND org_type <> 'DSS' AND wuid_read = wuid"
|
|
121
|
+
for fid in pgrecs['wid']:
|
|
122
|
+
procnt += 1
|
|
123
|
+
if procnt%5000 == 0:
|
|
124
|
+
PgLOG.pglog("{}/{} record(s) processed/removed from Table 'wfile'".format(procnt, delcnt), PgLOG.WARNLG)
|
|
125
|
+
if not PgDBI.pgget("wusage, wuser", "", fcond.format(fid), PgLOG.LGWNEX):
|
|
126
|
+
# deleted web file never been used
|
|
127
|
+
delcnt += PgDBI.pgdel("wfile", "wid = {}".format(fid), PgLOG.LGWNEX)
|
|
128
|
+
|
|
129
|
+
PgLOG.pglog("{} record(s) removed from Table 'wfile' at {}".format(delcnt, PgLOG.current_datetime()), PgLOG.LOGWRN)
|
|
130
|
+
|
|
131
|
+
#
|
|
132
|
+
# email notice of job done
|
|
133
|
+
#
|
|
134
|
+
def send_email_notice():
|
|
135
|
+
|
|
136
|
+
msg = ("Hi All,\n\nRDADB weekly data usage gathering is done at {}.\n\n".format(PgLOG.current_datetime()) +
|
|
137
|
+
"Please Let me know if you notice any problem.\n\nThanks,\n\nHua\n")
|
|
138
|
+
pgrecs = PgDBI.pgmget("dssgrp", "logname", "email_flag = 'Y'", PgLOG.LGWNEX)
|
|
139
|
+
if pgrecs:
|
|
140
|
+
receiver = ""
|
|
141
|
+
for logname in pgrecs['logname']:
|
|
142
|
+
if receiver: receiver += ', '
|
|
143
|
+
receiver += (logname + "@ucar.edu")
|
|
144
|
+
|
|
145
|
+
ret = PgLOG.send_email("RDADB Weekly Data Usage Gathered on " + PgUtil.curdate(), receiver, msg)
|
|
146
|
+
if ret: PgLOG.pglog(ret, PgLOG.LOGWRN)
|
|
147
|
+
|
|
148
|
+
#
|
|
149
|
+
# call main() to start program
|
|
150
|
+
#
|
|
151
|
+
if __name__ == "__main__": main()
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
|
|
2
|
+
Collect MSS information from ORACLE server and online file information
|
|
3
|
+
from DSS Web Server logs, and fill tables in MySQL database 'RDADB'.
|
|
4
|
+
|
|
5
|
+
Backup the records in the tables and compress them, and archive the compressed
|
|
6
|
+
table files to MSS according to the options chosen on the command line.
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
Usage: fillrdadb [-b] [-c] [-d DateList] [-m MonthList]
|
|
10
|
+
[-n] [-N NumberDay] [-S] [-y YearList]
|
|
11
|
+
|
|
12
|
+
Select options, -c, -d, -m, -N, or -y, to run this application.
|
|
13
|
+
|
|
14
|
+
- Option -b, log process information into logfile only;
|
|
15
|
+
|
|
16
|
+
- Option -c, clean unused file names only;
|
|
17
|
+
|
|
18
|
+
- Option -d, get file info recorded on given dates. Date format is
|
|
19
|
+
YYYY-MM-DD, for example, 2004-01-01;
|
|
20
|
+
|
|
21
|
+
- Option -m, get file info recorded in given months. Month format is
|
|
22
|
+
YYYY-MM, for example, 2004-1;
|
|
23
|
+
|
|
24
|
+
- Option -n, do not send email notification after filling info
|
|
25
|
+
|
|
26
|
+
- Option -N, get file info recorded in recent NumberDay days;
|
|
27
|
+
|
|
28
|
+
- Option -y, get file info recorded in given years. Year format is YYYY,
|
|
29
|
+
for example, 2004;
|
|
30
|
+
|
|
31
|
+
This is a wrapping funtion. Internally it calls 'fillcodusage', 'fillawsusage',
|
|
32
|
+
and 'fillglobususage' with selected options.
|
|
@@ -0,0 +1,289 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
#
|
|
3
|
+
###############################################################################
|
|
4
|
+
#
|
|
5
|
+
# Title : filltdsusage
|
|
6
|
+
# Author : Zaihua Ji, zji@ucar.edu
|
|
7
|
+
# Date : 03/11/2022
|
|
8
|
+
# 2025-03-26 transferred to package rda_python_metrics from
|
|
9
|
+
# https://github.com/NCAR/rda-database.git
|
|
10
|
+
# Purpose : python program to retrieve info from TDS logs
|
|
11
|
+
# and fill table tdsusage in PostgreSQL database dssdb.
|
|
12
|
+
#
|
|
13
|
+
# Github : https://github.com/NCAR/rda-python-metrics.git
|
|
14
|
+
#
|
|
15
|
+
###############################################################################
|
|
16
|
+
#
|
|
17
|
+
import sys
|
|
18
|
+
import re
|
|
19
|
+
import glob
|
|
20
|
+
from os import path as op
|
|
21
|
+
from rda_python_common import PgLOG
|
|
22
|
+
from rda_python_common import PgUtil
|
|
23
|
+
from rda_python_common import PgFile
|
|
24
|
+
from rda_python_common import PgDBI
|
|
25
|
+
from . import PgIPInfo
|
|
26
|
+
|
|
27
|
+
# the define options for gathering TDS data usage, one at a time
|
|
28
|
+
MONTH = 0x02 # fet TDS data usages for given months
|
|
29
|
+
YEARS = 0x04 # get TDS data usages for given years
|
|
30
|
+
NDAYS = 0x08 # get TDS data usages in recent number of days
|
|
31
|
+
MASKS = (MONTH|YEARS|NDAYS)
|
|
32
|
+
|
|
33
|
+
USAGE = {
|
|
34
|
+
'OPTION' : 0,
|
|
35
|
+
'PGTBL' : "tdsusage",
|
|
36
|
+
'TDSLOG' : "/data/logs/nginx/{}.access.log",
|
|
37
|
+
'CDATE' : PgUtil.curdate()
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
#
|
|
41
|
+
# main function to run this program
|
|
42
|
+
#
|
|
43
|
+
def main():
|
|
44
|
+
|
|
45
|
+
params = [] # array of input values
|
|
46
|
+
argv = sys.argv[1:]
|
|
47
|
+
datelimit = ''
|
|
48
|
+
fixrec = False
|
|
49
|
+
|
|
50
|
+
for arg in argv:
|
|
51
|
+
if arg == "-b":
|
|
52
|
+
PgLOG.PGLOG['BCKGRND'] = 1
|
|
53
|
+
elif arg == "-f":
|
|
54
|
+
fixrec = True
|
|
55
|
+
elif re.match(r'^-[mNy]$', arg) and USAGE['OPTION'] == 0:
|
|
56
|
+
if arg == "-m":
|
|
57
|
+
USAGE['OPTION'] = MONTH
|
|
58
|
+
elif arg == "-y":
|
|
59
|
+
USAGE['OPTION'] = YEARS
|
|
60
|
+
elif arg == "-N":
|
|
61
|
+
USAGE['OPTION'] = NDAYS
|
|
62
|
+
elif re.match(r'^-', arg):
|
|
63
|
+
PgLOG.pglog(arg + ": Invalid Option", PgLOG.LGWNEX)
|
|
64
|
+
elif USAGE['OPTION']&MASKS:
|
|
65
|
+
params.append(arg)
|
|
66
|
+
else:
|
|
67
|
+
PgLOG.pglog(arg + ": Invalid Parameter", PgLOG.LGWNEX)
|
|
68
|
+
|
|
69
|
+
if not (USAGE['OPTION'] and params): PgLOG.show_usage('filltdsusage')
|
|
70
|
+
PgDBI.dssdb_dbname()
|
|
71
|
+
PgLOG.cmdlog("filltdsusage {}".format(' '.join(argv)))
|
|
72
|
+
|
|
73
|
+
if fixrec:
|
|
74
|
+
fix_tds_usages(USAGE['OPTION'], params)
|
|
75
|
+
else:
|
|
76
|
+
if USAGE['OPTION']&NDAYS:
|
|
77
|
+
curdate = USAGE['CDATE']
|
|
78
|
+
datelimit = PgUtil.adddate(curdate, 0, 0, -int(params[0]))
|
|
79
|
+
USAGE['OPTION'] = MONTH
|
|
80
|
+
params = []
|
|
81
|
+
|
|
82
|
+
while curdate >= datelimit:
|
|
83
|
+
tms = curdate.split('-')
|
|
84
|
+
params.append("{}-{}".format(tms[0], tms[1]))
|
|
85
|
+
curdate = PgUtil.adddate(curdate, 0, 0, -int(tms[2]))
|
|
86
|
+
|
|
87
|
+
fill_tds_usages(USAGE['OPTION'], params, datelimit)
|
|
88
|
+
|
|
89
|
+
PgLOG.pglog(None, PgLOG.LOGWRN|PgLOG.SNDEML) # send email out if any
|
|
90
|
+
|
|
91
|
+
sys.exit(0)
|
|
92
|
+
|
|
93
|
+
#
|
|
94
|
+
# Fill TDS usages into table dssdb.tdsusage from tds access logs
|
|
95
|
+
#
|
|
96
|
+
def fill_tds_usages(option, inputs, datelimit):
|
|
97
|
+
|
|
98
|
+
cntall = cntadd = 0
|
|
99
|
+
|
|
100
|
+
for input in inputs:
|
|
101
|
+
# get log file names
|
|
102
|
+
if option&MONTH:
|
|
103
|
+
tms = input.split('-')
|
|
104
|
+
yrmn = "{}-{:02}".format(tms[0], int(tms[1]))
|
|
105
|
+
else:
|
|
106
|
+
yrmn = input
|
|
107
|
+
|
|
108
|
+
logfiles = glob.glob(USAGE['TDSLOG'].format(yrmn + '*'))
|
|
109
|
+
if not logfiles: PgLOG.pglog("{}: No file found to gather TDS usage".format(yrmn), PgLOG.LOGWRN)
|
|
110
|
+
for logfile in logfiles:
|
|
111
|
+
if not op.isfile(logfile):
|
|
112
|
+
PgLOG.pglog("{}: Not exists to gather TDS usage".format(logfile), PgLOG.LOGWRN)
|
|
113
|
+
continue
|
|
114
|
+
fdate = None
|
|
115
|
+
ms = re.search(r'(\d+-\d+-\d+).access.log$', logfile)
|
|
116
|
+
if ms:
|
|
117
|
+
fdate = ms.group(1)
|
|
118
|
+
if fdate >= USAGE['CDATE']: continue
|
|
119
|
+
if datelimit and fdate < datelimit: continue
|
|
120
|
+
PgLOG.pglog("Gathering usage info from {} at {}".format(logfile, PgLOG.current_datetime()), PgLOG.LOGWRN)
|
|
121
|
+
tds = PgFile.open_local_file(logfile)
|
|
122
|
+
if not tds: continue
|
|
123
|
+
ptime = ''
|
|
124
|
+
records = {}
|
|
125
|
+
entcnt = 0
|
|
126
|
+
while True:
|
|
127
|
+
line = tds.readline()
|
|
128
|
+
if not line: break
|
|
129
|
+
entcnt += 1
|
|
130
|
+
if entcnt%20000 == 0:
|
|
131
|
+
cnt = len(records)
|
|
132
|
+
PgLOG.pglog("{}/{} TDS log entries processed/records to add".format(entcnt, cnt), PgLOG.WARNLG)
|
|
133
|
+
|
|
134
|
+
ms = re.search(r'(/thredds/catalog|\sGooglebot/)', line)
|
|
135
|
+
if ms: continue
|
|
136
|
+
ms = re.search(r'/thredds/\S+\.(png|jpg|gif|css|htm)', line)
|
|
137
|
+
if ms: continue
|
|
138
|
+
ms = re.match(r'^([\d\.]+)\s.*\s(-|\S+@\S+)\s+\[(\S+).*/thredds/(\w+)(/|/grid/)(aggregations|files).*/(ds\d\d\d.\d|[a-z]\d{6})/.*\s200\s+(\d+)(.*)$', line)
|
|
139
|
+
if not ms: continue
|
|
140
|
+
ip = ms.group(1)
|
|
141
|
+
email = ms.group(2)
|
|
142
|
+
(date, time) = get_record_date_time(ms.group(3))
|
|
143
|
+
method = ms.group(4)
|
|
144
|
+
etype = ms.group(6)[0].upper()
|
|
145
|
+
dsid = PgUtil.format_dataset_id(ms.group(7))
|
|
146
|
+
size = int(ms.group(8))
|
|
147
|
+
ebuf = ms.group(9)
|
|
148
|
+
ms = re.search(r' "(\w+.*\S+)" ', ebuf)
|
|
149
|
+
engine = ms.group(1) if ms else 'Unknown'
|
|
150
|
+
key = "{}:{}:{}:{}".format(ip, dsid, method, etype)
|
|
151
|
+
|
|
152
|
+
if key in records:
|
|
153
|
+
records[key]['size'] += size
|
|
154
|
+
records[key]['fcount'] += 1
|
|
155
|
+
else:
|
|
156
|
+
records[key] = {'ip' : ip, 'email' : email, 'dsid' : dsid, 'time' : time, 'size' : size,
|
|
157
|
+
'fcount' : 1, 'method' : method, 'etype' : etype, 'engine' : engine}
|
|
158
|
+
tds.close()
|
|
159
|
+
if records: cntadd += add_usage_records(records, fdate)
|
|
160
|
+
cntall += entcnt
|
|
161
|
+
|
|
162
|
+
PgLOG.pglog("{} TDS usage records added for {} entries at {}".format(cntadd, cntall, PgLOG.current_datetime()), PgLOG.LOGWRN)
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
def get_record_date_time(ctime):
|
|
166
|
+
|
|
167
|
+
ms = re.search(r'^(\d+)/(\w+)/(\d+):(\d+:\d+:\d+)$', ctime)
|
|
168
|
+
if ms:
|
|
169
|
+
d = int(ms.group(1))
|
|
170
|
+
m = PgUtil.get_month(ms.group(2))
|
|
171
|
+
y = ms.group(3)
|
|
172
|
+
t = ms.group(4)
|
|
173
|
+
return ("{}-{:02}-{:02}".format(y, m, d), t)
|
|
174
|
+
else:
|
|
175
|
+
PgLOG.pglog("time: Invalid date format", PgLOG.LGEREX)
|
|
176
|
+
|
|
177
|
+
def add_usage_records(records, date):
|
|
178
|
+
|
|
179
|
+
quarter = cnt = 0
|
|
180
|
+
year = None
|
|
181
|
+
ms = re.search(r'(\d+)-(\d+)-', date)
|
|
182
|
+
if ms:
|
|
183
|
+
year = ms.group(1)
|
|
184
|
+
quarter = 1 + int((int(ms.group(2)) - 1)/3)
|
|
185
|
+
for key in records:
|
|
186
|
+
record = records[key]
|
|
187
|
+
cond = "date = '{}' AND time = '{}' AND ip = '{}'".format(date, record['time'], record['ip'])
|
|
188
|
+
if PgDBI.pgget(USAGE['PGTBL'], '', cond, PgLOG.LGEREX): continue
|
|
189
|
+
if record['email'] == '-':
|
|
190
|
+
record['org_type'] = record['country'] = '-'
|
|
191
|
+
ipinfo = PgIPInfo.set_ipinfo(record['ip'])
|
|
192
|
+
if ipinfo:
|
|
193
|
+
record['org_type'] = ipinfo['org_type']
|
|
194
|
+
record['country'] = ipinfo['country']
|
|
195
|
+
record['email'] = 'unknown@' + ipinfo['hostname']
|
|
196
|
+
else:
|
|
197
|
+
wuid = PgDBI.check_wuser_wuid(record['email'], date)
|
|
198
|
+
if not wuid: continue
|
|
199
|
+
pgrec = PgDBI.pgget("wuser", "org_type, country", "wuid = {}".format(wuid), PgLOG.LGWNEX)
|
|
200
|
+
if not pgrec: continue
|
|
201
|
+
record['org_type'] = pgrec['org_type']
|
|
202
|
+
record['country'] = pgrec['country']
|
|
203
|
+
|
|
204
|
+
record['quarter'] = quarter
|
|
205
|
+
record['date'] = date
|
|
206
|
+
|
|
207
|
+
if add_to_allusage(year, record):
|
|
208
|
+
cnt += PgDBI.pgadd(USAGE['PGTBL'], record, PgLOG.LOGWRN)
|
|
209
|
+
|
|
210
|
+
PgLOG.pglog("{}: {} TDS usage records added at {}".format(date, cnt, PgLOG.current_datetime()), PgLOG.LOGWRN)
|
|
211
|
+
|
|
212
|
+
return cnt
|
|
213
|
+
|
|
214
|
+
|
|
215
|
+
def add_to_allusage(year, pgrec):
|
|
216
|
+
|
|
217
|
+
record = {'method' : 'TDS', 'source' : 'T'}
|
|
218
|
+
|
|
219
|
+
for fld in pgrec:
|
|
220
|
+
if re.match(r'^(engine|method|etype|fcount)$', fld): continue
|
|
221
|
+
record[fld] = pgrec[fld]
|
|
222
|
+
|
|
223
|
+
return PgDBI.add_yearly_allusage(year, record)
|
|
224
|
+
|
|
225
|
+
#
|
|
226
|
+
# Fix TDS usages in table dssdb.tdsusage by combine tds accesses with same ip,dsid,method&etype
|
|
227
|
+
#
|
|
228
|
+
def fix_tds_usages(option, inputs):
|
|
229
|
+
|
|
230
|
+
cntall = cntfix = 0
|
|
231
|
+
|
|
232
|
+
for input in inputs:
|
|
233
|
+
if option&NDAYS:
|
|
234
|
+
edate = USAGE['CDATE']
|
|
235
|
+
date = PgUtil.adddate(edate, 0, 0, -int(input))
|
|
236
|
+
elif option&MONTH:
|
|
237
|
+
tms = input.split('-')
|
|
238
|
+
date = "{}-{:02}-01".format(tms[0], int(tms[1]))
|
|
239
|
+
edate = PgUtil.enddate(date, 0, 'M')
|
|
240
|
+
else:
|
|
241
|
+
date = input + "-01-01"
|
|
242
|
+
edate = input + "-12-31"
|
|
243
|
+
|
|
244
|
+
while date <= edate:
|
|
245
|
+
cond = "date = '{}' and fcount = 0 order by time".format(date)
|
|
246
|
+
pgrecs = PgDBI.pgmget(USAGE['PGTBL'], '*', cond, PgLOG.LGEREX)
|
|
247
|
+
cnt = len(pgrecs['ip']) if pgrecs else 0
|
|
248
|
+
records = {}
|
|
249
|
+
for i in range(cnt):
|
|
250
|
+
record = PgUtil.onerecord(pgrecs, i)
|
|
251
|
+
key = "{}:{}:{}:{}".format(record['ip'], record['dsid'], record['method'], record['etype'])
|
|
252
|
+
if key in records:
|
|
253
|
+
records[key]['size'] += record['size']
|
|
254
|
+
records[key]['fcount'] += 1
|
|
255
|
+
else:
|
|
256
|
+
record['fcount'] = 1
|
|
257
|
+
records[key] = record
|
|
258
|
+
|
|
259
|
+
if records: cntfix += fix_usage_records(records, date)
|
|
260
|
+
cntall += cnt
|
|
261
|
+
date = PgUtil.adddate(date, 0, 0, 1)
|
|
262
|
+
|
|
263
|
+
PgLOG.pglog("{} TDS usage records combined into {} at {}".format(cntall, cntfix, PgLOG.current_datetime()), PgLOG.LOGWRN)
|
|
264
|
+
|
|
265
|
+
def fix_usage_records(records, date):
|
|
266
|
+
|
|
267
|
+
cnt = 0
|
|
268
|
+
ms = re.match(r'^(\d+)-', date)
|
|
269
|
+
year = ms.group(1)
|
|
270
|
+
tname = 'allusage_' + year
|
|
271
|
+
dcnt = PgDBI.pgdel(tname , "date = '{}' AND method = 'TDS'".format(date), PgLOG.LOGWRN)
|
|
272
|
+
PgLOG.pglog("{} TDS usage records deleted for {} from {}".format(dcnt, date, tname), PgLOG.LOGWRN)
|
|
273
|
+
for key in records:
|
|
274
|
+
record = records[key]
|
|
275
|
+
cond = "date = '{}' AND time = '{}' AND ip = '{}'".format(date, record['time'], record['ip'])
|
|
276
|
+
if add_to_allusage(year, record):
|
|
277
|
+
cnt += PgDBI.pgupdt(USAGE['PGTBL'], record, cond, PgLOG.LOGWRN)
|
|
278
|
+
|
|
279
|
+
if cnt:
|
|
280
|
+
PgLOG.pglog("{} TDS usage records updated for {} in {}".format(cnt, date, USAGE['PGTBL']), PgLOG.LOGWRN)
|
|
281
|
+
dcnt = PgDBI.pgdel(USAGE['PGTBL'], "date = '{}' and fcount = 0".format(date), PgLOG.LOGWRN)
|
|
282
|
+
PgLOG.pglog("{} TDS usage records deleted for {} from {}".format(dcnt, date, USAGE['PGTBL']), PgLOG.LOGWRN)
|
|
283
|
+
|
|
284
|
+
return cnt
|
|
285
|
+
|
|
286
|
+
#
|
|
287
|
+
# call main() to start program
|
|
288
|
+
#
|
|
289
|
+
if __name__ == "__main__": main()
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
|
|
2
|
+
Retrieves usage information from RDA TDS Server logs under ../logs/tomcat to
|
|
3
|
+
fill table 'tdsusage' in MySQL database 'dssdb'.
|
|
4
|
+
|
|
5
|
+
Usage: filltdsusage [-b] [-m MonthList] [-N NumberDay] [-y YearList]
|
|
6
|
+
|
|
7
|
+
select one of the options, -m, -N or -y each time to run
|
|
8
|
+
this application.
|
|
9
|
+
|
|
10
|
+
- Option -b, log process information into logfile only;
|
|
11
|
+
|
|
12
|
+
- Option -m, retrieve usage info in given months;
|
|
13
|
+
|
|
14
|
+
- Option -N, retrieve usage info in recent NumberDay days;
|
|
15
|
+
|
|
16
|
+
- Option -y, retrieve usage info in given years.
|
|
17
|
+
|