rda-python-metrics 1.0.51__tar.gz → 2.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {rda_python_metrics-1.0.51/src/rda_python_metrics.egg-info → rda_python_metrics-2.0.0}/PKG-INFO +1 -1
- {rda_python_metrics-1.0.51 → rda_python_metrics-2.0.0}/pyproject.toml +1 -1
- rda_python_metrics-2.0.0/src/rda_python_metrics/fillawsusage.py +191 -0
- rda_python_metrics-2.0.0/src/rda_python_metrics/fillcdgusage.py +421 -0
- rda_python_metrics-2.0.0/src/rda_python_metrics/fillcodusage.py +220 -0
- rda_python_metrics-2.0.0/src/rda_python_metrics/fillcountry.py +71 -0
- rda_python_metrics-2.0.0/src/rda_python_metrics/fillendtime.py +68 -0
- rda_python_metrics-2.0.0/src/rda_python_metrics/fillglobususage.py +231 -0
- rda_python_metrics-2.0.0/src/rda_python_metrics/fillipinfo.py +200 -0
- rda_python_metrics-2.0.0/src/rda_python_metrics/filloneorder.py +138 -0
- rda_python_metrics-2.0.0/src/rda_python_metrics/fillosdfusage.py +194 -0
- rda_python_metrics-2.0.0/src/rda_python_metrics/fillrdadb.py +129 -0
- rda_python_metrics-2.0.0/src/rda_python_metrics/filltdsusage.py +213 -0
- rda_python_metrics-2.0.0/src/rda_python_metrics/filluser.py +193 -0
- rda_python_metrics-2.0.0/src/rda_python_metrics/logarch.py +342 -0
- rda_python_metrics-2.0.0/src/rda_python_metrics/pg_ipinfo.py +260 -0
- rda_python_metrics-2.0.0/src/rda_python_metrics/pg_view.py +678 -0
- rda_python_metrics-2.0.0/src/rda_python_metrics/pgperson.py +74 -0
- rda_python_metrics-2.0.0/src/rda_python_metrics/pgusername.py +54 -0
- rda_python_metrics-2.0.0/src/rda_python_metrics/viewallusage.py +321 -0
- rda_python_metrics-2.0.0/src/rda_python_metrics/viewawsusage.py +286 -0
- rda_python_metrics-2.0.0/src/rda_python_metrics/viewcheckusage.py +263 -0
- rda_python_metrics-2.0.0/src/rda_python_metrics/viewcodusage.py +286 -0
- rda_python_metrics-2.0.0/src/rda_python_metrics/viewordusage.py +306 -0
- rda_python_metrics-2.0.0/src/rda_python_metrics/viewosdfusage.py +286 -0
- rda_python_metrics-2.0.0/src/rda_python_metrics/viewrqstusage.py +327 -0
- rda_python_metrics-2.0.0/src/rda_python_metrics/viewtdsusage.py +291 -0
- rda_python_metrics-2.0.0/src/rda_python_metrics/viewwebfile.py +269 -0
- rda_python_metrics-2.0.0/src/rda_python_metrics/viewwebusage.py +336 -0
- {rda_python_metrics-1.0.51 → rda_python_metrics-2.0.0/src/rda_python_metrics.egg-info}/PKG-INFO +1 -1
- {rda_python_metrics-1.0.51 → rda_python_metrics-2.0.0}/src/rda_python_metrics.egg-info/SOURCES.txt +27 -0
- {rda_python_metrics-1.0.51 → rda_python_metrics-2.0.0}/tests/test_metrics.py +4 -0
- {rda_python_metrics-1.0.51 → rda_python_metrics-2.0.0}/LICENSE +0 -0
- {rda_python_metrics-1.0.51 → rda_python_metrics-2.0.0}/MANIFEST.in +0 -0
- {rda_python_metrics-1.0.51 → rda_python_metrics-2.0.0}/README.md +0 -0
- {rda_python_metrics-1.0.51 → rda_python_metrics-2.0.0}/setup.cfg +0 -0
- {rda_python_metrics-1.0.51 → rda_python_metrics-2.0.0}/src/rda_python_metrics/PgIPInfo.py +0 -0
- {rda_python_metrics-1.0.51 → rda_python_metrics-2.0.0}/src/rda_python_metrics/PgView.py +0 -0
- {rda_python_metrics-1.0.51 → rda_python_metrics-2.0.0}/src/rda_python_metrics/__init__.py +0 -0
- /rda_python_metrics-1.0.51/src/rda_python_metrics/fillawsusage.py → /rda_python_metrics-2.0.0/src/rda_python_metrics/fill_awsusage.py +0 -0
- /rda_python_metrics-1.0.51/src/rda_python_metrics/fillcdgusage.py → /rda_python_metrics-2.0.0/src/rda_python_metrics/fill_cdgusage.py +0 -0
- /rda_python_metrics-1.0.51/src/rda_python_metrics/fillcodusage.py → /rda_python_metrics-2.0.0/src/rda_python_metrics/fill_codusage.py +0 -0
- /rda_python_metrics-1.0.51/src/rda_python_metrics/fillcountry.py → /rda_python_metrics-2.0.0/src/rda_python_metrics/fill_country.py +0 -0
- /rda_python_metrics-1.0.51/src/rda_python_metrics/fillendtime.py → /rda_python_metrics-2.0.0/src/rda_python_metrics/fill_endtime.py +0 -0
- /rda_python_metrics-1.0.51/src/rda_python_metrics/fillglobususage.py → /rda_python_metrics-2.0.0/src/rda_python_metrics/fill_globususage.py +0 -0
- /rda_python_metrics-1.0.51/src/rda_python_metrics/fillipinfo.py → /rda_python_metrics-2.0.0/src/rda_python_metrics/fill_ipinfo.py +0 -0
- /rda_python_metrics-1.0.51/src/rda_python_metrics/filloneorder.py → /rda_python_metrics-2.0.0/src/rda_python_metrics/fill_oneorder.py +0 -0
- /rda_python_metrics-1.0.51/src/rda_python_metrics/fillosdfusage.py → /rda_python_metrics-2.0.0/src/rda_python_metrics/fill_osdfusage.py +0 -0
- /rda_python_metrics-1.0.51/src/rda_python_metrics/fillrdadb.py → /rda_python_metrics-2.0.0/src/rda_python_metrics/fill_rdadb.py +0 -0
- /rda_python_metrics-1.0.51/src/rda_python_metrics/filltdsusage.py → /rda_python_metrics-2.0.0/src/rda_python_metrics/fill_tdsusage.py +0 -0
- /rda_python_metrics-1.0.51/src/rda_python_metrics/filluser.py → /rda_python_metrics-2.0.0/src/rda_python_metrics/fill_user.py +0 -0
- {rda_python_metrics-1.0.51 → rda_python_metrics-2.0.0}/src/rda_python_metrics/fillawsusage.usg +0 -0
- {rda_python_metrics-1.0.51 → rda_python_metrics-2.0.0}/src/rda_python_metrics/fillcdgusage.usg +0 -0
- {rda_python_metrics-1.0.51 → rda_python_metrics-2.0.0}/src/rda_python_metrics/fillcodusage.usg +0 -0
- {rda_python_metrics-1.0.51 → rda_python_metrics-2.0.0}/src/rda_python_metrics/fillgdexusage.py +0 -0
- {rda_python_metrics-1.0.51 → rda_python_metrics-2.0.0}/src/rda_python_metrics/fillgdexusage.usg +0 -0
- {rda_python_metrics-1.0.51 → rda_python_metrics-2.0.0}/src/rda_python_metrics/fillglobususage.usg +0 -0
- {rda_python_metrics-1.0.51 → rda_python_metrics-2.0.0}/src/rda_python_metrics/fillipinfo.usg +0 -0
- {rda_python_metrics-1.0.51 → rda_python_metrics-2.0.0}/src/rda_python_metrics/filloneorder.usg +0 -0
- {rda_python_metrics-1.0.51 → rda_python_metrics-2.0.0}/src/rda_python_metrics/fillosdfusage.usg +0 -0
- {rda_python_metrics-1.0.51 → rda_python_metrics-2.0.0}/src/rda_python_metrics/fillrdadb.usg +0 -0
- {rda_python_metrics-1.0.51 → rda_python_metrics-2.0.0}/src/rda_python_metrics/filltdsusage.usg +0 -0
- {rda_python_metrics-1.0.51 → rda_python_metrics-2.0.0}/src/rda_python_metrics/filluser.usg +0 -0
- {rda_python_metrics-1.0.51 → rda_python_metrics-2.0.0}/src/rda_python_metrics/fillzenodousage.py +0 -0
- /rda_python_metrics-1.0.51/src/rda_python_metrics/logarch.py → /rda_python_metrics-2.0.0/src/rda_python_metrics/log_arch.py +0 -0
- {rda_python_metrics-1.0.51 → rda_python_metrics-2.0.0}/src/rda_python_metrics/logarch.usg +0 -0
- /rda_python_metrics-1.0.51/src/rda_python_metrics/pgperson.py → /rda_python_metrics-2.0.0/src/rda_python_metrics/pg_person.py +0 -0
- /rda_python_metrics-1.0.51/src/rda_python_metrics/pgusername.py → /rda_python_metrics-2.0.0/src/rda_python_metrics/pg_username.py +0 -0
- {rda_python_metrics-1.0.51 → rda_python_metrics-2.0.0}/src/rda_python_metrics/pgsyspath.py +0 -0
- /rda_python_metrics-1.0.51/src/rda_python_metrics/viewallusage.py → /rda_python_metrics-2.0.0/src/rda_python_metrics/view_allusage.py +0 -0
- /rda_python_metrics-1.0.51/src/rda_python_metrics/viewawsusage.py → /rda_python_metrics-2.0.0/src/rda_python_metrics/view_awsusage.py +0 -0
- /rda_python_metrics-1.0.51/src/rda_python_metrics/viewcheckusage.py → /rda_python_metrics-2.0.0/src/rda_python_metrics/view_checkusage.py +0 -0
- /rda_python_metrics-1.0.51/src/rda_python_metrics/viewcodusage.py → /rda_python_metrics-2.0.0/src/rda_python_metrics/view_codusage.py +0 -0
- /rda_python_metrics-1.0.51/src/rda_python_metrics/viewordusage.py → /rda_python_metrics-2.0.0/src/rda_python_metrics/view_ordusage.py +0 -0
- /rda_python_metrics-1.0.51/src/rda_python_metrics/viewosdfusage.py → /rda_python_metrics-2.0.0/src/rda_python_metrics/view_osdfusage.py +0 -0
- /rda_python_metrics-1.0.51/src/rda_python_metrics/viewrqstusage.py → /rda_python_metrics-2.0.0/src/rda_python_metrics/view_rqstusage.py +0 -0
- /rda_python_metrics-1.0.51/src/rda_python_metrics/viewtdsusage.py → /rda_python_metrics-2.0.0/src/rda_python_metrics/view_tdsusage.py +0 -0
- /rda_python_metrics-1.0.51/src/rda_python_metrics/viewwebfile.py → /rda_python_metrics-2.0.0/src/rda_python_metrics/view_webfile.py +0 -0
- /rda_python_metrics-1.0.51/src/rda_python_metrics/viewwebusage.py → /rda_python_metrics-2.0.0/src/rda_python_metrics/view_webusage.py +0 -0
- {rda_python_metrics-1.0.51 → rda_python_metrics-2.0.0}/src/rda_python_metrics/viewallusage.usg +0 -0
- {rda_python_metrics-1.0.51 → rda_python_metrics-2.0.0}/src/rda_python_metrics/viewawsusage.usg +0 -0
- {rda_python_metrics-1.0.51 → rda_python_metrics-2.0.0}/src/rda_python_metrics/viewcheckusage.usg +0 -0
- {rda_python_metrics-1.0.51 → rda_python_metrics-2.0.0}/src/rda_python_metrics/viewcodusage.usg +0 -0
- {rda_python_metrics-1.0.51 → rda_python_metrics-2.0.0}/src/rda_python_metrics/viewordusage.usg +0 -0
- {rda_python_metrics-1.0.51 → rda_python_metrics-2.0.0}/src/rda_python_metrics/viewosdfusage.usg +0 -0
- {rda_python_metrics-1.0.51 → rda_python_metrics-2.0.0}/src/rda_python_metrics/viewrqstusage.usg +0 -0
- {rda_python_metrics-1.0.51 → rda_python_metrics-2.0.0}/src/rda_python_metrics/viewtdsusage.usg +0 -0
- {rda_python_metrics-1.0.51 → rda_python_metrics-2.0.0}/src/rda_python_metrics/viewwebfile.usg +0 -0
- {rda_python_metrics-1.0.51 → rda_python_metrics-2.0.0}/src/rda_python_metrics/viewwebusage.usg +0 -0
- {rda_python_metrics-1.0.51 → rda_python_metrics-2.0.0}/src/rda_python_metrics.egg-info/dependency_links.txt +0 -0
- {rda_python_metrics-1.0.51 → rda_python_metrics-2.0.0}/src/rda_python_metrics.egg-info/entry_points.txt +0 -0
- {rda_python_metrics-1.0.51 → rda_python_metrics-2.0.0}/src/rda_python_metrics.egg-info/requires.txt +0 -0
- {rda_python_metrics-1.0.51 → rda_python_metrics-2.0.0}/src/rda_python_metrics.egg-info/top_level.txt +0 -0
|
@@ -0,0 +1,191 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
###############################################################################
|
|
3
|
+
# Title : fillawsusage
|
|
4
|
+
# Author : Zaihua Ji, zji@ucar.edu
|
|
5
|
+
# Date : 03/11/2022
|
|
6
|
+
# 2025-03-26 transferred to package rda_python_metrics from
|
|
7
|
+
# https://github.com/NCAR/rda-database.git
|
|
8
|
+
# 2025-12-16 converted to class FillAWSUsage
|
|
9
|
+
# Purpose : python program to retrieve info from AWS logs
|
|
10
|
+
# and fill table wusages in PgSQL database dssdb.
|
|
11
|
+
# Github : https://github.com/NCAR/rda-pythn-metrics.git
|
|
12
|
+
###############################################################################
|
|
13
|
+
import sys
|
|
14
|
+
import re
|
|
15
|
+
import glob
|
|
16
|
+
from os import path as op
|
|
17
|
+
from rda_python_common.pg_file import PgFile
|
|
18
|
+
from .pg_ipinfo import PgIPInfo
|
|
19
|
+
|
|
20
|
+
class FillAWSUsage(PgIPInfo, PgFile):
|
|
21
|
+
|
|
22
|
+
def __init__(self):
|
|
23
|
+
super().__init()
|
|
24
|
+
self.USAGE = {
|
|
25
|
+
'PGTBL' : "awsusage",
|
|
26
|
+
'AWSDIR' : self.PGLOG["TRANSFER"] + "/AWSera5log",
|
|
27
|
+
'AWSLOG' : "{}/{}-00-00-00-*",
|
|
28
|
+
'PFMT' : "YYYY/MM/DD"
|
|
29
|
+
}
|
|
30
|
+
self.DSIDS = {'nsf-ncar-era5' : 'd633000'}
|
|
31
|
+
self.option = self.cmdstr = None
|
|
32
|
+
self.params = [] # array of input values
|
|
33
|
+
|
|
34
|
+
# function to read parameters
|
|
35
|
+
def read_parameters(self):
|
|
36
|
+
argv = sys.argv[1:]
|
|
37
|
+
for arg in argv:
|
|
38
|
+
ms = re.match(r'^-(b|d|p|N)$', arg)
|
|
39
|
+
if ms:
|
|
40
|
+
opt = ms.group(1)
|
|
41
|
+
if opt == 'b':
|
|
42
|
+
self.PGLOG['BCKGRND'] = 1
|
|
43
|
+
elif self.option:
|
|
44
|
+
self.pglog("{}: Option -{} is present already".format(arg, self.option), self.LGWNEX)
|
|
45
|
+
else:
|
|
46
|
+
self.option = opt
|
|
47
|
+
elif re.match(r'^-', arg):
|
|
48
|
+
self.pglog(arg + ": Invalid Option", self.LGWNEX)
|
|
49
|
+
elif self.option:
|
|
50
|
+
self.params.append(arg)
|
|
51
|
+
else:
|
|
52
|
+
self.pglog(arg + ": Invalid Parameter", self.LGWNEX)
|
|
53
|
+
if not (self.option and self.params): self.show_usage('fillawsusage')
|
|
54
|
+
self.dssdb_dbname()
|
|
55
|
+
self.cmdstr = "fillawsusage {}".format(' '.join(argv))
|
|
56
|
+
self.cmdlog(self.cmdstr)
|
|
57
|
+
|
|
58
|
+
# function to start actions
|
|
59
|
+
def start_actions(self):
|
|
60
|
+
self.change_local_directory(self.USAGE['AWSDIR'])
|
|
61
|
+
filenames = self.get_log_file_names()
|
|
62
|
+
if filenames:
|
|
63
|
+
self.fill_aws_usages(filenames)
|
|
64
|
+
else:
|
|
65
|
+
self.pglog("No log file found for given command: " + self.cmdstr, self.LOGWRN)
|
|
66
|
+
self.pglog(None, self.LOGWRN)
|
|
67
|
+
|
|
68
|
+
# get the log file dates
|
|
69
|
+
def get_log_file_names(self):
|
|
70
|
+
filenames = {}
|
|
71
|
+
if self.option == 'd':
|
|
72
|
+
for dt in self.params:
|
|
73
|
+
pdate = self.format_date(dt)
|
|
74
|
+
pd = self.format_date(pdate, self.USAGE['PFMT'])
|
|
75
|
+
fname = self.USAGE['AWSLOG'].format(pd, pdate)
|
|
76
|
+
fnames = glob.glob(fname)
|
|
77
|
+
if fnames: filenames[pdate] = sorted(fnames)
|
|
78
|
+
else:
|
|
79
|
+
if self.option == 'N':
|
|
80
|
+
edate = self.curdate()
|
|
81
|
+
pdate = self.adddate(edate, 0, 0, -int(self.params[0]))
|
|
82
|
+
else:
|
|
83
|
+
pdate = self.format_date(self.params[0])
|
|
84
|
+
if len(self.params) > 1:
|
|
85
|
+
edate = self.adddate(self.format_date(self.params[1]), 0, 0, 1)
|
|
86
|
+
else:
|
|
87
|
+
edate = self.curdate()
|
|
88
|
+
while pdate < edate:
|
|
89
|
+
pd = self.format_date(pdate, self.USAGE['PFMT'])
|
|
90
|
+
fname = self.USAGE['AWSLOG'].format(pd, pdate)
|
|
91
|
+
fnames = glob.glob(fname)
|
|
92
|
+
if fnames: filenames[pdate] = sorted(fnames)
|
|
93
|
+
pdate = self.adddate(pdate, 0, 0, 1)
|
|
94
|
+
return filenames
|
|
95
|
+
|
|
96
|
+
# Fill AWS usages into table dssdb.awsusage of DSS PgSQL database from aws access logs
|
|
97
|
+
def fill_aws_usages(self, filenames):
|
|
98
|
+
year = cntall = addall = 0
|
|
99
|
+
for pdate in filenames:
|
|
100
|
+
fnames = filenames[pdate]
|
|
101
|
+
fcnt = len(fnames)
|
|
102
|
+
self.pglog("{}: Gathering AWS usage info from {} log files at {}".format(pdate, fcnt, self.current_datetime()), self.LOGWRN)
|
|
103
|
+
records = {}
|
|
104
|
+
cntadd = entcnt = 0
|
|
105
|
+
for logfile in fnames:
|
|
106
|
+
aws = self.open_local_file(logfile)
|
|
107
|
+
if not aws: continue
|
|
108
|
+
while True:
|
|
109
|
+
line = aws.readline()
|
|
110
|
+
if not line: break
|
|
111
|
+
entcnt += 1
|
|
112
|
+
if entcnt%20000 == 0:
|
|
113
|
+
dcnt = len(records)
|
|
114
|
+
self.pglog("{}: {}/{} AWS log entries processed/records to add".format(pdate, entcnt, dcnt), self.WARNLG)
|
|
115
|
+
ms = re.match(r'^\w+ ([\w-]+) \[(\S+).*\] ([\d\.]+) .+ REST\.GET\.OBJECT \S+ "GET.+" \d+ - (\d+) \d+ .* ".+" "(.+)" ', line)
|
|
116
|
+
if not ms: continue
|
|
117
|
+
values = list(ms.groups())
|
|
118
|
+
if values[0] not in self.DSIDS: continue
|
|
119
|
+
dsid = self.DSIDS[values[0]]
|
|
120
|
+
size = int(values[3])
|
|
121
|
+
ip = values[2]
|
|
122
|
+
engine = values[4]
|
|
123
|
+
moff = engine.find('/')
|
|
124
|
+
if moff > 0:
|
|
125
|
+
if moff > 20: moff = 20
|
|
126
|
+
method = engine[0:moff].upper()
|
|
127
|
+
else:
|
|
128
|
+
method = "AWS"
|
|
129
|
+
key = "{}:{}:{}".format(ip, dsid, method)
|
|
130
|
+
if key in records:
|
|
131
|
+
records[key]['size'] += size
|
|
132
|
+
records[key]['fcount'] += 1
|
|
133
|
+
else:
|
|
134
|
+
(year, quarter, date, time) = self.get_record_date_time(values[1])
|
|
135
|
+
iprec = self.get_missing_ipinfo(ip)
|
|
136
|
+
if not iprec: continue
|
|
137
|
+
records[key] = {'ip' : ip, 'dsid' : dsid, 'date' : date, 'time' : time, 'quarter' : quarter,
|
|
138
|
+
'size' : size, 'fcount' : 1, 'method' : method, 'engine' : engine,
|
|
139
|
+
'org_type' : iprec['org_type'], 'country' : iprec['country'],
|
|
140
|
+
'region' : iprec['region'], 'email' : iprec['email']}
|
|
141
|
+
aws.close()
|
|
142
|
+
if records: cntadd = self.add_usage_records(records, year)
|
|
143
|
+
self.pglog("{}: {} AWS usage records added for {} entries at {}".format(pdate, cntadd, entcnt, self.current_datetime()), self.LOGWRN)
|
|
144
|
+
cntall += entcnt
|
|
145
|
+
if cntadd:
|
|
146
|
+
addall += cntadd
|
|
147
|
+
if addall > cntadd:
|
|
148
|
+
self.pglog("{} AWS usage records added for {} entries at {}".format(addall, cntall, self.current_datetime()), self.LOGWRN)
|
|
149
|
+
|
|
150
|
+
# get date and time from record
|
|
151
|
+
def get_record_date_time(self, ctime):
|
|
152
|
+
ms = re.search(r'^(\d+)/(\w+)/(\d+):(\d+:\d+:\d+)$', ctime)
|
|
153
|
+
if ms:
|
|
154
|
+
d = int(ms.group(1))
|
|
155
|
+
m = self.get_month(ms.group(2))
|
|
156
|
+
y = ms.group(3)
|
|
157
|
+
t = ms.group(4)
|
|
158
|
+
q = 1 + int((m-1)/3)
|
|
159
|
+
return (y, q, "{}-{:02}-{:02}".format(y, m, d), t)
|
|
160
|
+
else:
|
|
161
|
+
self.pglog(ctime + ": Invalid date/time format", self.LGEREX)
|
|
162
|
+
|
|
163
|
+
# add usage records for year
|
|
164
|
+
def add_usage_records(self, records, year):
|
|
165
|
+
cnt = 0
|
|
166
|
+
for key in records:
|
|
167
|
+
record = records[key]
|
|
168
|
+
cond = "date = '{}' AND time = '{}' AND ip = '{}'".format(record['date'], record['time'], record['ip'])
|
|
169
|
+
if self.pgget(self.USAGE['PGTBL'], '', cond, self.LGEREX): continue
|
|
170
|
+
if self.add_to_allusage(year, record):
|
|
171
|
+
cnt += self.pgadd(self.USAGE['PGTBL'], record, self.LOGWRN)
|
|
172
|
+
return cnt
|
|
173
|
+
|
|
174
|
+
# add record to allusage tables
|
|
175
|
+
def add_to_allusage(self, year, pgrec):
|
|
176
|
+
record = {'source' : 'A'}
|
|
177
|
+
flds = ['ip', 'dsid', 'date', 'time', 'quarter', 'size', 'method',
|
|
178
|
+
'org_type', 'country', 'region', 'email']
|
|
179
|
+
for fld in flds:
|
|
180
|
+
record[fld] = pgrec[fld]
|
|
181
|
+
return self.add_yearly_allusage(year, record)
|
|
182
|
+
|
|
183
|
+
# main function to excecute this script
|
|
184
|
+
def main():
|
|
185
|
+
object = FillAWSUsage()
|
|
186
|
+
object.read_parameters()
|
|
187
|
+
object.start_actions()
|
|
188
|
+
object.pgexit(0)
|
|
189
|
+
|
|
190
|
+
# call main() to start program
|
|
191
|
+
if __name__ == "__main__": main()
|
|
@@ -0,0 +1,421 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
###############################################################################
|
|
3
|
+
# Title : fillcdgusage
|
|
4
|
+
# Author : Zaihua Ji, zji@ucar.edu
|
|
5
|
+
# Date : 2025-04-14
|
|
6
|
+
# 2025-12-16 convert to class FillCDGUsage
|
|
7
|
+
# Purpose : python program to retrieve info from GDEX Postgres database for GDS
|
|
8
|
+
# file accesses and backup fill table tdsusage in PostgreSQL database dssdb.
|
|
9
|
+
# Github : https://github.com/NCAR/rda-python-metrics.git
|
|
10
|
+
###############################################################################
|
|
11
|
+
import sys
|
|
12
|
+
import re
|
|
13
|
+
import glob
|
|
14
|
+
from os import path as op
|
|
15
|
+
from time import time as tm
|
|
16
|
+
from rda_python_common.pg_split import PgSplit
|
|
17
|
+
from .pg_ipinfo import PgIPInfo
|
|
18
|
+
|
|
19
|
+
class FillCDGUsage(PgSplit, PgIPInfo):
|
|
20
|
+
|
|
21
|
+
def __init__(self):
|
|
22
|
+
super().__init()
|
|
23
|
+
self.USAGE = {
|
|
24
|
+
'TDSTBL' : "tdsusage",
|
|
25
|
+
'WEBTBL' : "wusage",
|
|
26
|
+
'CDATE' : self.curdate(),
|
|
27
|
+
}
|
|
28
|
+
self.DSIDS = {
|
|
29
|
+
'pi_cesm2_atm_river_analysis' : ['d010073'],
|
|
30
|
+
'na-cordex' : ['d316009'],
|
|
31
|
+
'ucar.cgd.cesm2.cam6.prescribed_sst_amip' : ['d651010'],
|
|
32
|
+
'ucar.cgd.ccsm4.CLM_LAND_ONLY' : ['d651011'],
|
|
33
|
+
'ucar.cgd.artmip' : ['d651012', 'd651016', 'd651017', 'd651018'],
|
|
34
|
+
'tamip' : ['d651013'],
|
|
35
|
+
'ucar.cgd.ccsm4.CLIVAR_LE' : ['d651014'],
|
|
36
|
+
'ucar.cgd.cesm2.Gettelman_CESM2_ECS' : ['d651015'],
|
|
37
|
+
'ucar.cgd.ccsm4.geomip.ssp5' : ['d651024'],
|
|
38
|
+
'ucar.cgd.ccsm4.IOD-PACEMAKER' : ['d651021'],
|
|
39
|
+
'ucar.cgd.ccsm4.past2k_transient' : ['651023'],
|
|
40
|
+
'ucar.cgd.ccsm4.lowwarming' : ['d651025'],
|
|
41
|
+
'ucar.cgd.ccsm4.CESM_CAM5_BGC_ME' : ['d651000'],
|
|
42
|
+
'ucar.cgd.ccsm4.iTRACE' : ['d651022'],
|
|
43
|
+
'ucar.cgd.ccsm4.so2_geoeng' : ['d651026'],
|
|
44
|
+
'ucar.cgd.ccsm4.cesmLE' : ['d651027'],
|
|
45
|
+
'ucar.cgd.ccsm4.CESM1-CAM5-DP' : ['d651028'],
|
|
46
|
+
'ucar.cgd.ccsm4.amv_lens' : ['d651031'],
|
|
47
|
+
'ucar.cgd.ccsm4.ATL-PACEMAKER' : ['d651032'],
|
|
48
|
+
'ucar.cgd.ccsm4.pac-pacemaker' : ['d651033'],
|
|
49
|
+
'ucar.cgd.ccsm4.SD-WACCM-X_v2.1' : ['d651034'],
|
|
50
|
+
'ucar.cgd.ccsm4.amv_lens' : ['d651035'],
|
|
51
|
+
'ucar.cgd.cesm2.cism_ismip6' : ['d651036'],
|
|
52
|
+
'ucar.cgd.ccsm4.pliomip2' : ['d651037'],
|
|
53
|
+
'ucar.cgd.cesm2-waccm.s2s_hindcasts': ['d651040'],
|
|
54
|
+
'ucar.cgd.CESM1.3_SH_storm_tracks': ['d651044'],
|
|
55
|
+
'ucar.cgd.cesm2.waccm6.ssp245': ['d651045'],
|
|
56
|
+
'ucar.cgd.cesm2.CESM21-CISM2-JG-BG': ['d651046'],
|
|
57
|
+
'ucar.cgd.ccsm4.TC-CESM': ['d651047'],
|
|
58
|
+
'ucar.cgd.cesm2.ISSI_OSSE': ['d651048'],
|
|
59
|
+
'ucar.cgd.ccsm4.SOcean_Eddies_mclong': ['d651049'],
|
|
60
|
+
'ucar.cgd.ccsm.trace': ['d651050'],
|
|
61
|
+
'ucar.cgd.cesm2.waccm.solar': ['d651051'],
|
|
62
|
+
'ucar.cgd.ccsm4.CESM1-CCSM4_mid-Pliocene' : ['d651042'],
|
|
63
|
+
'ucar.cgd.ccsm4.PaleoIF' : ['d651052'],
|
|
64
|
+
'ucar.cgd.ccsm4.b.e11.B20LE_fixedO3' : ['d651053'],
|
|
65
|
+
'ucar.cgd.cesm2.single.forcing.large.ensemble' : ['d651055'],
|
|
66
|
+
'ucar.cgd.cesm2le.output': ['d651056'],
|
|
67
|
+
'ucar.cgd.ccsm4.ARISE-SAI-1.5' : ['d651059'],
|
|
68
|
+
'ucar.cgd.cesm2.s2s_hindcasts': ['d651060'],
|
|
69
|
+
'ucar.cgd.cesm2.s2s_hindcasts.mjo': ['d651061'],
|
|
70
|
+
'ucar.cgd.cesm2.s2s_hindcasts.tc_tracks': ['d651062'],
|
|
71
|
+
'ucar.cgd.cesm2.s2s_hindcasts.cesm2.climo': ['d651063'],
|
|
72
|
+
'ucar.cgd.ccsm4.cesmLME' : ['d651058'],
|
|
73
|
+
'ucar.cgd.ccsm4.GLENS' : ['d651064'],
|
|
74
|
+
'ucar.cgd.ccsm4.CESM2-CISM2-LIGtransient' : ['d651066'],
|
|
75
|
+
'ucar.cgd.cesm2.pacific.pacemaker' : ['d651068'],
|
|
76
|
+
'ucar.cgd.cesm2.tuned.sea.ice.albedo' : ['d651070'],
|
|
77
|
+
'ucar.cgd.cesm2.cmip5.forcing' : ['d651075'],
|
|
78
|
+
'ucar.cgd.cesm2.ssp245.biomass.burning' : ['d651073'],
|
|
79
|
+
'ucar.cgd.cesm2.ssp585.biomass.burning' : ['d651067'],
|
|
80
|
+
'ucar.cgd.cesm1.cldmod': ['d651069'],
|
|
81
|
+
'ucar.cgd.cesm2.marine.biogeochemistry': ['d651071'],
|
|
82
|
+
'ucar.cgd.nw2.mom6': ['d651072'],
|
|
83
|
+
'ucar.cgd.cesm2.cam6.ppe': ['d651076'],
|
|
84
|
+
'ucar.cgd.cesm2.smyle': ['d651065'],
|
|
85
|
+
# new added
|
|
86
|
+
'gridded_precip_and_temp' : ['d010078'],
|
|
87
|
+
'29_newman' : ['d010079'],
|
|
88
|
+
'waccm-x.ion.asymmetry' : ['d010081'],
|
|
89
|
+
'NARCCAP' : ['d316015']
|
|
90
|
+
}
|
|
91
|
+
self.ALLIDS = list(self.DSIDS.keys())
|
|
92
|
+
self.WFILES = {}
|
|
93
|
+
self.params = {} # array of input values
|
|
94
|
+
|
|
95
|
+
# function to run this program
|
|
96
|
+
def read_parameters(self):
|
|
97
|
+
argv = sys.argv[1:]
|
|
98
|
+
opt = None
|
|
99
|
+
for arg in argv:
|
|
100
|
+
if arg == "-b":
|
|
101
|
+
self.PGLOG['BCKGRND'] = 1
|
|
102
|
+
elif re.match(r'^-[msNy]$', arg):
|
|
103
|
+
opt = arg[1]
|
|
104
|
+
self.params[opt] = []
|
|
105
|
+
elif re.match(r'^-', arg):
|
|
106
|
+
self.pglog(arg + ": Invalid Option", self.LGWNEX)
|
|
107
|
+
elif opt:
|
|
108
|
+
self.params[opt].append(arg)
|
|
109
|
+
else:
|
|
110
|
+
self.pglog(arg + ": Value passed in without leading option", self.LGWNEX)
|
|
111
|
+
if not opt:
|
|
112
|
+
self.show_usage('fillcdgusage')
|
|
113
|
+
elif 's' not in self.params:
|
|
114
|
+
self.pglog("-s: Missing dataset short name to gather CDG metrics", self.LGWNEX)
|
|
115
|
+
elif len(self.params) < 2:
|
|
116
|
+
self.pglog("-(m|N|y): Missing Month, NumberDays or Year to gather CDG metrics", self.LGWNEX)
|
|
117
|
+
self.cmdlog("fillcdgusage {}".format(' '.join(argv)))
|
|
118
|
+
|
|
119
|
+
# function to start actions
|
|
120
|
+
def start_actions(self):
|
|
121
|
+
dranges = self.get_date_ranges(self.params)
|
|
122
|
+
dsids = self.get_dataset_ids(self.params['s'])
|
|
123
|
+
if dranges and dsids: self.fill_cdg_usages(dsids, dranges)
|
|
124
|
+
self.pglog(None, self.LOGWRN|self.SNDEML) # send email out if any
|
|
125
|
+
|
|
126
|
+
# connect to the gdex database esg-production
|
|
127
|
+
def gdex_dbname(self):
|
|
128
|
+
self.set_scname('esg-production', 'metrics', 'gateway-reader', None, 'sagedbprodalma.ucar.edu')
|
|
129
|
+
|
|
130
|
+
# get datasets
|
|
131
|
+
def get_dataset_ids(self, dsnames):
|
|
132
|
+
self.gdex_dbname()
|
|
133
|
+
dsids = []
|
|
134
|
+
tbname = 'metadata.dataset'
|
|
135
|
+
for dsname in dsnames:
|
|
136
|
+
if re.match(r'^all$', dsname, re.I): return self.get_dataset_ids(self.ALLIDS)
|
|
137
|
+
if dsname not in self.DSIDS:
|
|
138
|
+
self.pglog(dsname + ": Unknown CDG dataset short name", self.LOGWRN)
|
|
139
|
+
continue
|
|
140
|
+
bt = tm()
|
|
141
|
+
pgrec = self.pgget(tbname, 'id', "short_name = '{}'".format(dsname))
|
|
142
|
+
if not (pgrec and pgrec['id']): continue
|
|
143
|
+
rdaids = self.DSIDS[dsname]
|
|
144
|
+
strids = "{}-{}".format(dsname, rdaids)
|
|
145
|
+
cdgid = pgrec['id']
|
|
146
|
+
cdgids = [cdgid]
|
|
147
|
+
ccnt = 1
|
|
148
|
+
ccnt += self.recursive_dataset_ids(cdgid, cdgids)
|
|
149
|
+
dsids.append([dsname, rdaids, cdgids, strids])
|
|
150
|
+
rmsg = self.seconds_to_string_time(tm() - bt)
|
|
151
|
+
self.pglog("{}: Found {} CDG dsid/subdsids in {} at {}".format(strids, ccnt, rmsg, self.current_datetime()), self.LOGWRN)
|
|
152
|
+
if not dsids: self.pglog("No Dataset Id identified to gather CDG metrics", self.LOGWRN)
|
|
153
|
+
return dsids
|
|
154
|
+
|
|
155
|
+
# get cdgids recursivley
|
|
156
|
+
def recursive_dataset_ids(self, pcdgid, cdgids):
|
|
157
|
+
tbname = 'metadata.dataset'
|
|
158
|
+
pgrecs = self.pgmget(tbname, 'id', "parent_dataset_id = '{}'".format(pcdgid))
|
|
159
|
+
if not pgrecs: return 0
|
|
160
|
+
ccnt = 0
|
|
161
|
+
for cdgid in pgrecs['id']:
|
|
162
|
+
if cdgid in cdgids: continue
|
|
163
|
+
cdgids.append(cdgid)
|
|
164
|
+
ccnt += 1
|
|
165
|
+
ccnt += self.recursive_dataset_ids(cdgid, cdgids)
|
|
166
|
+
return ccnt
|
|
167
|
+
|
|
168
|
+
# get the date ranges for given condition
|
|
169
|
+
def get_date_ranges(self, inputs):
|
|
170
|
+
dranges = []
|
|
171
|
+
for opt in inputs:
|
|
172
|
+
for input in inputs[opt]:
|
|
173
|
+
# get date range
|
|
174
|
+
dates = []
|
|
175
|
+
if opt == 'N':
|
|
176
|
+
dates.append(self.adddate(self.USAGE['CDATE'], 0, 0, -int(input)))
|
|
177
|
+
dates.append(self.USAGE['CDATE'])
|
|
178
|
+
elif opt == 'm':
|
|
179
|
+
tms = input.split('-')
|
|
180
|
+
dates.append(self.fmtdate(int(tms[0]), int(tms[1]), 1))
|
|
181
|
+
dates.append(self.enddate(dates[0], 0, 'M'))
|
|
182
|
+
elif opt == 'y':
|
|
183
|
+
dates.append(input + "-01-01")
|
|
184
|
+
dates.append(input + "-12-31")
|
|
185
|
+
if dates: dranges.append(dates)
|
|
186
|
+
return dranges
|
|
187
|
+
|
|
188
|
+
# get file download records for given dsid
|
|
189
|
+
def get_dsid_records(self, cdgids, dates, strids):
|
|
190
|
+
self.gdex_dbname()
|
|
191
|
+
tbname = 'metrics.file_download'
|
|
192
|
+
fields = ('date_completed, remote_address, logical_file_size, logical_file_name, file_access_point_uri, user_agent_name, bytes_sent, '
|
|
193
|
+
'subset_file_size, range_request, dataset_file_size, dataset_file_name, dataset_file_file_access_point_uri')
|
|
194
|
+
dscnt = len(cdgids)
|
|
195
|
+
dscnd = "dataset_id "
|
|
196
|
+
if dscnt == 1:
|
|
197
|
+
dscnd += "= '{}'".format(cdgids[0])
|
|
198
|
+
else:
|
|
199
|
+
dscnd += "IN ('" + "','".join(cdgids) + "')"
|
|
200
|
+
dtcnd = "date_completed BETWEEN '{} 00:00:00' AND '{} 23:59:59'".format(dates[0], dates[1])
|
|
201
|
+
cond = "{} AND {} ORDER BY date_completed".format(dscnd, dtcnd)
|
|
202
|
+
self.pglog("{}: Query for {} CDG dsid/subdsids between {} and {} at {}".format(strids, dscnt, dates[0], dates[1], self.current_datetime()), self.LOGWRN)
|
|
203
|
+
pgrecs = self.pgmget(tbname, fields, cond)
|
|
204
|
+
self.dssdb_dbname()
|
|
205
|
+
return pgrecs
|
|
206
|
+
|
|
207
|
+
# Fill TDS usages into table dssdb.tdsusage from cdg access records
|
|
208
|
+
def fill_cdg_usages(self, dsids, dranges):
|
|
209
|
+
allcnt = awcnt = atcnt = lcnt = 0
|
|
210
|
+
for dates in dranges:
|
|
211
|
+
for adsid in dsids:
|
|
212
|
+
lcnt += 1
|
|
213
|
+
dsname = adsid[0]
|
|
214
|
+
rdaids = adsid[1]
|
|
215
|
+
getdsid = False if len(rdaids) == 1 else True
|
|
216
|
+
dsid = rdaids[0]
|
|
217
|
+
cdgids = adsid[2]
|
|
218
|
+
strids = adsid[3]
|
|
219
|
+
bt = tm()
|
|
220
|
+
pgrecs = self.get_dsid_records(cdgids, dates, strids)
|
|
221
|
+
pgcnt = len(pgrecs['dataset_file_name']) if pgrecs else 0
|
|
222
|
+
if pgcnt == 0:
|
|
223
|
+
self.pglog("{}: No record found to gather CDG usage between {} and {}".format(strids, dates[0], dates[1]), self.LOGWRN)
|
|
224
|
+
continue
|
|
225
|
+
rmsg = self.seconds_to_string_time(tm() - bt)
|
|
226
|
+
self.pglog("{}: Got {} records in {} for processing CDG usage at {}".format(strids, pgcnt, rmsg, self.current_datetime()), self.LOGWRN)
|
|
227
|
+
tcnt = wcnt = 0
|
|
228
|
+
pwkey = wrec = cdate = None
|
|
229
|
+
trecs = {}
|
|
230
|
+
bt = tm()
|
|
231
|
+
for i in range(pgcnt):
|
|
232
|
+
if (i+1)%20000 == 0:
|
|
233
|
+
self.pglog("{}/{}/{} CDG/TDS/WEB records processed to add".format(i, tcnt, wcnt), self.WARNLG)
|
|
234
|
+
pgrec = self.onerecord(pgrecs, i)
|
|
235
|
+
wfile = pgrec['dataset_file_name']
|
|
236
|
+
if not wfile:
|
|
237
|
+
wfile = pgrec['logic_file_name']
|
|
238
|
+
if not wfile: continue
|
|
239
|
+
dsize = pgrec['bytes_sent']
|
|
240
|
+
if not dsize: continue
|
|
241
|
+
(year, quarter, date, time) = self.get_record_date_time(pgrec['date_completed'])
|
|
242
|
+
url = pgrec['dataset_file_file_access_point_uri']
|
|
243
|
+
if not url: url = pgrec['file_access_point_uri']
|
|
244
|
+
ip = pgrec['remote_address']
|
|
245
|
+
engine = pgrec['user_agent_name']
|
|
246
|
+
ms = re.search(r'^https*://tds.ucar.edu/thredds/(\w+)/', url)
|
|
247
|
+
if ms:
|
|
248
|
+
# tds usage
|
|
249
|
+
if getdsid:
|
|
250
|
+
wfrec = self.get_wfile_record(rdaids, wfile)
|
|
251
|
+
if not wfrec: continue
|
|
252
|
+
dsid = wfrec['dsid']
|
|
253
|
+
method = ms.group(1)
|
|
254
|
+
if pgrec['subset_file_size']:
|
|
255
|
+
etype = 'S'
|
|
256
|
+
elif pgrec['range_request']:
|
|
257
|
+
etype = 'R'
|
|
258
|
+
else:
|
|
259
|
+
etype = 'F'
|
|
260
|
+
if date != cdate:
|
|
261
|
+
if trecs:
|
|
262
|
+
tcnt += self.add_tdsusage_records(year, trecs, cdate)
|
|
263
|
+
trecs = {}
|
|
264
|
+
cdate = date
|
|
265
|
+
tkey = "{}:{}:{}:{}".format(ip, dsid, method, etype)
|
|
266
|
+
if tkey in trecs:
|
|
267
|
+
trecs[tkey]['size'] += dsize
|
|
268
|
+
trecs[tkey]['fcount'] += 1
|
|
269
|
+
else:
|
|
270
|
+
iprec = self.get_missing_ipinfo(ip)
|
|
271
|
+
if not iprec: continue
|
|
272
|
+
trecs[tkey] = {'ip' : ip, 'dsid' : dsid, 'date' : cdate, 'time' : time, 'quarter' : quarter,
|
|
273
|
+
'size' : dsize, 'fcount' : 1, 'method' : method, 'etype' : etype,
|
|
274
|
+
'engine' : engine, 'org_type' : iprec['org_type'], 'country' : iprec['country'],
|
|
275
|
+
'region' : iprec['region'], 'email' : iprec['email']}
|
|
276
|
+
else:
|
|
277
|
+
# web usage
|
|
278
|
+
wfrec = self.get_wfile_record(rdaids, wfile)
|
|
279
|
+
if not wfrec: continue
|
|
280
|
+
if getdsid: dsid = wfrec['dsid']
|
|
281
|
+
fsize = pgrec['dataset_file_size']
|
|
282
|
+
if not fsize: fsize = pgrec['logic_file_size']
|
|
283
|
+
method = 'CDG'
|
|
284
|
+
if pgrec['subset_file_size'] or pgrec['range_request'] or dsize < fsize:
|
|
285
|
+
wkey = "{}:{}:{}".format(ip, dsid, wfile)
|
|
286
|
+
else:
|
|
287
|
+
wkey = None
|
|
288
|
+
if wrec:
|
|
289
|
+
if wkey == pwkey:
|
|
290
|
+
wrec['size'] += dsize
|
|
291
|
+
continue
|
|
292
|
+
wcnt += self.add_webfile_usage(year, wrec)
|
|
293
|
+
wrec = {'ip' : ip, 'dsid' : dsid, 'wid' : wfrec['wid'], 'date' : date,
|
|
294
|
+
'time' : time, 'quarter' : quarter, 'size' : dsize,
|
|
295
|
+
'locflag' : 'C', 'method' : method}
|
|
296
|
+
pwkey = wkey
|
|
297
|
+
if not pwkey:
|
|
298
|
+
wcnt += self.add_webfile_usage(year, wrec)
|
|
299
|
+
wrec = None
|
|
300
|
+
if trecs: tcnt += self.add_tdsusage_records(year, trecs, cdate)
|
|
301
|
+
if wrec: wcnt += self.add_webfile_usage(year, wrec)
|
|
302
|
+
atcnt += tcnt
|
|
303
|
+
awcnt += wcnt
|
|
304
|
+
allcnt += pgcnt
|
|
305
|
+
rmsg = self.seconds_to_string_time(tm() - bt)
|
|
306
|
+
self.pglog("{}: {}/{} TDS/WEB usage records added for {} CDG entries in {}".format(strids, atcnt, awcnt, allcnt, rmsg), self.LOGWRN)
|
|
307
|
+
|
|
308
|
+
# get date and time from log record
|
|
309
|
+
def get_record_date_time(self, ctime):
|
|
310
|
+
ms = re.search(r'^(\d+)-(\d+)-(\d+) (\d\d:\d\d:\d\d)', str(ctime))
|
|
311
|
+
if ms:
|
|
312
|
+
y = ms.group(1)
|
|
313
|
+
m = int(ms.group(2))
|
|
314
|
+
d = ms.group(3)
|
|
315
|
+
q = 1 + int((m-1)/3)
|
|
316
|
+
t = ms.group(4)
|
|
317
|
+
return (y, q, "{}-{:02}-{}".format(y, m, d), t)
|
|
318
|
+
else:
|
|
319
|
+
self.pglog(str(ctime) + ": Invalid time format", self.LGEREX)
|
|
320
|
+
|
|
321
|
+
# add to tds usage records
|
|
322
|
+
def add_tdsusage_records(self, year, records, date):
|
|
323
|
+
cnt = 0
|
|
324
|
+
for key in records:
|
|
325
|
+
record = records[key]
|
|
326
|
+
cond = "date = '{}' AND time = '{}' AND ip = '{}'".format(date, record['time'], record['ip'])
|
|
327
|
+
if self.pgget(self.USAGE['TDSTBL'], '', cond, self.LGEREX): continue
|
|
328
|
+
if self.add_tds_allusage(year, record):
|
|
329
|
+
cnt += self.pgadd(self.USAGE['TDSTBL'], record, self.LOGWRN)
|
|
330
|
+
self.pglog("{}: {} TDS usage records added at {}".format(date, cnt, self.current_datetime()), self.LOGWRN)
|
|
331
|
+
return cnt
|
|
332
|
+
|
|
333
|
+
#add to allusage tables
|
|
334
|
+
def add_tds_allusage(self, year, logrec):
|
|
335
|
+
pgrec = {'method' : 'CDG', 'source' : 'C'}
|
|
336
|
+
pgrec['email'] = logrec['email']
|
|
337
|
+
pgrec['org_type'] = logrec['org_type']
|
|
338
|
+
pgrec['country'] = logrec['country']
|
|
339
|
+
pgrec['region'] = logrec['region']
|
|
340
|
+
pgrec['dsid'] = logrec['dsid']
|
|
341
|
+
pgrec['date'] = logrec['date']
|
|
342
|
+
pgrec['quarter'] = logrec['quarter']
|
|
343
|
+
pgrec['time'] = logrec['time']
|
|
344
|
+
pgrec['size'] = logrec['size']
|
|
345
|
+
pgrec['ip'] = logrec['ip']
|
|
346
|
+
return self.add_yearly_allusage(year, pgrec)
|
|
347
|
+
|
|
348
|
+
# Fill usage of a single online data file into table dssdb.wusage of DSS PgSQL database
|
|
349
|
+
def add_webfile_usage(self, year, logrec):
|
|
350
|
+
table = "{}_{}".format(self.USAGE['WEBTBL'], year)
|
|
351
|
+
cdate = logrec['date']
|
|
352
|
+
ip = logrec['ip']
|
|
353
|
+
cond = "wid = {} AND method = '{}' AND date_read = '{}' AND time_read = '{}'".format(logrec['wid'], logrec['method'], cdate, logrec['time'])
|
|
354
|
+
if self.pgget(table, "", cond, self.LOGWRN): return 0
|
|
355
|
+
wurec = self.get_wuser_record(ip, cdate)
|
|
356
|
+
if not wurec: return 0
|
|
357
|
+
record = {'wid' : logrec['wid'], 'dsid' : logrec['dsid']}
|
|
358
|
+
record['wuid_read'] = wurec['wuid']
|
|
359
|
+
record['date_read'] = cdate
|
|
360
|
+
record['time_read'] = logrec['time']
|
|
361
|
+
record['size_read'] = logrec['size']
|
|
362
|
+
record['method'] = logrec['method']
|
|
363
|
+
record['locflag'] = logrec['locflag']
|
|
364
|
+
record['ip'] = ip
|
|
365
|
+
record['quarter'] = logrec['quarter']
|
|
366
|
+
if self.add_web_allusage(year, logrec, wurec):
|
|
367
|
+
return self.add_yearly_wusage(year, record)
|
|
368
|
+
else:
|
|
369
|
+
return 0
|
|
370
|
+
|
|
371
|
+
# add web record to allusage
|
|
372
|
+
def add_web_allusage(self, year, logrec, wurec):
|
|
373
|
+
pgrec = {'source' : 'C'}
|
|
374
|
+
pgrec['email'] = wurec['email']
|
|
375
|
+
pgrec['org_type'] = wurec['org_type']
|
|
376
|
+
pgrec['country'] = wurec['country']
|
|
377
|
+
pgrec['region'] = wurec['region']
|
|
378
|
+
pgrec['dsid'] = logrec['dsid']
|
|
379
|
+
pgrec['date'] = logrec['date']
|
|
380
|
+
pgrec['quarter'] = logrec['quarter']
|
|
381
|
+
pgrec['time'] = logrec['time']
|
|
382
|
+
pgrec['size'] = logrec['size']
|
|
383
|
+
pgrec['method'] = logrec['method']
|
|
384
|
+
pgrec['ip'] = logrec['ip']
|
|
385
|
+
return self.add_yearly_allusage(year, pgrec)
|
|
386
|
+
|
|
387
|
+
# return wfile.wid upon success, 0 otherwise
|
|
388
|
+
def get_wfile_record(self, dsids, wfile):
|
|
389
|
+
for dsid in dsids:
|
|
390
|
+
wkey = "{}{}".format(dsid, wfile)
|
|
391
|
+
if wkey in self.WFILES: return self.WFILES[wkey]
|
|
392
|
+
wfcond = "wfile LIKE '%{}'".format(wfile)
|
|
393
|
+
pgrec = None
|
|
394
|
+
for dsid in dsids:
|
|
395
|
+
pgrec = self.pgget_wfile(dsid, "wid", wfcond)
|
|
396
|
+
if pgrec:
|
|
397
|
+
pgrec['dsid'] = dsid
|
|
398
|
+
wkey = "{}{}".format(dsid, wfile)
|
|
399
|
+
self.WFILES[wkey] = pgrec
|
|
400
|
+
return pgrec
|
|
401
|
+
for dsid in dsids:
|
|
402
|
+
pgrec = self.pgget("wfile_delete", "wid, dsid", "{} AND dsid = '{}'".format(wfcond, dsid))
|
|
403
|
+
if not pgrec:
|
|
404
|
+
mvrec = self.pgget("wmove", "wid, dsid", wfcond)
|
|
405
|
+
if mvrec:
|
|
406
|
+
pgrec = self.pgget_wfile(mvrec['dsid'], "wid", "wid = {}".format(pgrec['wid']))
|
|
407
|
+
if pgrec: pgrec['dsid'] = mvrec['dsid']
|
|
408
|
+
if pgrec:
|
|
409
|
+
wkey = "{}{}".format(pgrec['dsid'], wfile)
|
|
410
|
+
self.WFILES[wkey] = pgrec
|
|
411
|
+
return pgrec
|
|
412
|
+
|
|
413
|
+
# main function to excecute this script
|
|
414
|
+
def main():
|
|
415
|
+
object = FillCDGUsage()
|
|
416
|
+
object.read_parameters()
|
|
417
|
+
object.start_actions()
|
|
418
|
+
object.pgexit(0)
|
|
419
|
+
|
|
420
|
+
# call main() to start program
|
|
421
|
+
if __name__ == "__main__": main()
|