rda-python-metrics 1.0.27__tar.gz → 1.0.46__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {rda_python_metrics-1.0.27/src/rda_python_metrics.egg-info → rda_python_metrics-1.0.46}/PKG-INFO +2 -1
- {rda_python_metrics-1.0.27 → rda_python_metrics-1.0.46}/pyproject.toml +8 -3
- {rda_python_metrics-1.0.27 → rda_python_metrics-1.0.46}/src/rda_python_metrics/PgIPInfo.py +7 -7
- rda_python_metrics-1.0.46/src/rda_python_metrics/fillawsusage.py +210 -0
- {rda_python_metrics-1.0.27 → rda_python_metrics-1.0.46}/src/rda_python_metrics/fillawsusage.usg +1 -1
- {rda_python_metrics-1.0.27 → rda_python_metrics-1.0.46}/src/rda_python_metrics/fillcdgusage.py +40 -13
- rda_python_metrics-1.0.46/src/rda_python_metrics/fillgdexusage.py +1032 -0
- rda_python_metrics-1.0.46/src/rda_python_metrics/fillgdexusage.usg +18 -0
- {rda_python_metrics-1.0.27 → rda_python_metrics-1.0.46}/src/rda_python_metrics/fillglobususage.py +9 -11
- {rda_python_metrics-1.0.27 → rda_python_metrics-1.0.46}/src/rda_python_metrics/fillglobususage.usg +4 -2
- {rda_python_metrics-1.0.27 → rda_python_metrics-1.0.46}/src/rda_python_metrics/fillosdfusage.py +56 -88
- {rda_python_metrics-1.0.27 → rda_python_metrics-1.0.46}/src/rda_python_metrics/fillosdfusage.usg +1 -1
- {rda_python_metrics-1.0.27 → rda_python_metrics-1.0.46}/src/rda_python_metrics/fillrdadb.py +1 -2
- rda_python_metrics-1.0.46/src/rda_python_metrics/fillzenodousage.py +408 -0
- {rda_python_metrics-1.0.27 → rda_python_metrics-1.0.46}/src/rda_python_metrics/logarch.py +15 -8
- {rda_python_metrics-1.0.27 → rda_python_metrics-1.0.46}/src/rda_python_metrics/pgperson.py +2 -1
- {rda_python_metrics-1.0.27 → rda_python_metrics-1.0.46}/src/rda_python_metrics/pgusername.py +2 -1
- rda_python_metrics-1.0.46/src/rda_python_metrics/viewawsusage.py +321 -0
- rda_python_metrics-1.0.46/src/rda_python_metrics/viewawsusage.usg +190 -0
- rda_python_metrics-1.0.46/src/rda_python_metrics/viewosdfusage.py +321 -0
- rda_python_metrics-1.0.46/src/rda_python_metrics/viewosdfusage.usg +190 -0
- {rda_python_metrics-1.0.27 → rda_python_metrics-1.0.46/src/rda_python_metrics.egg-info}/PKG-INFO +2 -1
- {rda_python_metrics-1.0.27 → rda_python_metrics-1.0.46}/src/rda_python_metrics.egg-info/SOURCES.txt +7 -0
- {rda_python_metrics-1.0.27 → rda_python_metrics-1.0.46}/src/rda_python_metrics.egg-info/entry_points.txt +5 -1
- {rda_python_metrics-1.0.27 → rda_python_metrics-1.0.46}/src/rda_python_metrics.egg-info/requires.txt +1 -0
- rda_python_metrics-1.0.27/src/rda_python_metrics/fillawsusage.py +0 -254
- {rda_python_metrics-1.0.27 → rda_python_metrics-1.0.46}/LICENSE +0 -0
- {rda_python_metrics-1.0.27 → rda_python_metrics-1.0.46}/MANIFEST.in +0 -0
- {rda_python_metrics-1.0.27 → rda_python_metrics-1.0.46}/README.md +0 -0
- {rda_python_metrics-1.0.27 → rda_python_metrics-1.0.46}/setup.cfg +0 -0
- {rda_python_metrics-1.0.27 → rda_python_metrics-1.0.46}/src/rda_python_metrics/PgView.py +0 -0
- {rda_python_metrics-1.0.27 → rda_python_metrics-1.0.46}/src/rda_python_metrics/__init__.py +0 -0
- {rda_python_metrics-1.0.27 → rda_python_metrics-1.0.46}/src/rda_python_metrics/fillcdgusage.usg +0 -0
- {rda_python_metrics-1.0.27 → rda_python_metrics-1.0.46}/src/rda_python_metrics/fillcodusage.py +0 -0
- {rda_python_metrics-1.0.27 → rda_python_metrics-1.0.46}/src/rda_python_metrics/fillcodusage.usg +0 -0
- {rda_python_metrics-1.0.27 → rda_python_metrics-1.0.46}/src/rda_python_metrics/fillcountry.py +0 -0
- {rda_python_metrics-1.0.27 → rda_python_metrics-1.0.46}/src/rda_python_metrics/fillendtime.py +0 -0
- {rda_python_metrics-1.0.27 → rda_python_metrics-1.0.46}/src/rda_python_metrics/fillipinfo.py +0 -0
- {rda_python_metrics-1.0.27 → rda_python_metrics-1.0.46}/src/rda_python_metrics/fillipinfo.usg +0 -0
- {rda_python_metrics-1.0.27 → rda_python_metrics-1.0.46}/src/rda_python_metrics/filloneorder.py +0 -0
- {rda_python_metrics-1.0.27 → rda_python_metrics-1.0.46}/src/rda_python_metrics/filloneorder.usg +0 -0
- {rda_python_metrics-1.0.27 → rda_python_metrics-1.0.46}/src/rda_python_metrics/fillrdadb.usg +0 -0
- {rda_python_metrics-1.0.27 → rda_python_metrics-1.0.46}/src/rda_python_metrics/filltdsusage.py +0 -0
- {rda_python_metrics-1.0.27 → rda_python_metrics-1.0.46}/src/rda_python_metrics/filltdsusage.usg +0 -0
- {rda_python_metrics-1.0.27 → rda_python_metrics-1.0.46}/src/rda_python_metrics/filluser.py +0 -0
- {rda_python_metrics-1.0.27 → rda_python_metrics-1.0.46}/src/rda_python_metrics/filluser.usg +0 -0
- {rda_python_metrics-1.0.27 → rda_python_metrics-1.0.46}/src/rda_python_metrics/logarch.usg +0 -0
- {rda_python_metrics-1.0.27 → rda_python_metrics-1.0.46}/src/rda_python_metrics/pgsyspath.py +0 -0
- {rda_python_metrics-1.0.27 → rda_python_metrics-1.0.46}/src/rda_python_metrics/viewallusage.py +0 -0
- {rda_python_metrics-1.0.27 → rda_python_metrics-1.0.46}/src/rda_python_metrics/viewallusage.usg +0 -0
- {rda_python_metrics-1.0.27 → rda_python_metrics-1.0.46}/src/rda_python_metrics/viewcheckusage.py +0 -0
- {rda_python_metrics-1.0.27 → rda_python_metrics-1.0.46}/src/rda_python_metrics/viewcheckusage.usg +0 -0
- {rda_python_metrics-1.0.27 → rda_python_metrics-1.0.46}/src/rda_python_metrics/viewcodusage.py +0 -0
- {rda_python_metrics-1.0.27 → rda_python_metrics-1.0.46}/src/rda_python_metrics/viewcodusage.usg +0 -0
- {rda_python_metrics-1.0.27 → rda_python_metrics-1.0.46}/src/rda_python_metrics/viewordusage.py +0 -0
- {rda_python_metrics-1.0.27 → rda_python_metrics-1.0.46}/src/rda_python_metrics/viewordusage.usg +0 -0
- {rda_python_metrics-1.0.27 → rda_python_metrics-1.0.46}/src/rda_python_metrics/viewrqstusage.py +0 -0
- {rda_python_metrics-1.0.27 → rda_python_metrics-1.0.46}/src/rda_python_metrics/viewrqstusage.usg +0 -0
- {rda_python_metrics-1.0.27 → rda_python_metrics-1.0.46}/src/rda_python_metrics/viewtdsusage.py +0 -0
- {rda_python_metrics-1.0.27 → rda_python_metrics-1.0.46}/src/rda_python_metrics/viewtdsusage.usg +0 -0
- {rda_python_metrics-1.0.27 → rda_python_metrics-1.0.46}/src/rda_python_metrics/viewwebfile.py +0 -0
- {rda_python_metrics-1.0.27 → rda_python_metrics-1.0.46}/src/rda_python_metrics/viewwebfile.usg +0 -0
- {rda_python_metrics-1.0.27 → rda_python_metrics-1.0.46}/src/rda_python_metrics/viewwebusage.py +0 -0
- {rda_python_metrics-1.0.27 → rda_python_metrics-1.0.46}/src/rda_python_metrics/viewwebusage.usg +0 -0
- {rda_python_metrics-1.0.27 → rda_python_metrics-1.0.46}/src/rda_python_metrics.egg-info/dependency_links.txt +0 -0
- {rda_python_metrics-1.0.27 → rda_python_metrics-1.0.46}/src/rda_python_metrics.egg-info/top_level.txt +0 -0
- {rda_python_metrics-1.0.27 → rda_python_metrics-1.0.46}/tests/test_metrics.py +0 -0
{rda_python_metrics-1.0.27/src/rda_python_metrics.egg-info → rda_python_metrics-1.0.46}/PKG-INFO
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: rda_python_metrics
|
|
3
|
-
Version: 1.0.
|
|
3
|
+
Version: 1.0.46
|
|
4
4
|
Summary: RDA Python Package to gather and view data usage metrics
|
|
5
5
|
Author-email: Zaihua Ji <zji@ucar.edu>
|
|
6
6
|
Project-URL: Homepage, https://github.com/NCAR/rda-python-metrics
|
|
@@ -17,6 +17,7 @@ Requires-Dist: geoip2
|
|
|
17
17
|
Requires-Dist: ipinfo
|
|
18
18
|
Requires-Dist: httplib2
|
|
19
19
|
Requires-Dist: dnspython
|
|
20
|
+
Requires-Dist: unidecode
|
|
20
21
|
Dynamic: license-file
|
|
21
22
|
|
|
22
23
|
RDA Python Package to gather and view data usage metrics.
|
|
@@ -6,7 +6,7 @@ build-backend = "setuptools.build_meta"
|
|
|
6
6
|
|
|
7
7
|
[project]
|
|
8
8
|
name = "rda_python_metrics"
|
|
9
|
-
version = "1.0.
|
|
9
|
+
version = "1.0.46"
|
|
10
10
|
authors = [
|
|
11
11
|
{ name="Zaihua Ji", email="zji@ucar.edu" },
|
|
12
12
|
]
|
|
@@ -25,7 +25,8 @@ dependencies = [
|
|
|
25
25
|
"geoip2",
|
|
26
26
|
"ipinfo",
|
|
27
27
|
"httplib2",
|
|
28
|
-
"dnspython"
|
|
28
|
+
"dnspython",
|
|
29
|
+
"unidecode"
|
|
29
30
|
]
|
|
30
31
|
|
|
31
32
|
[tool.pytest.ini_options]
|
|
@@ -38,12 +39,14 @@ pythonpath = [
|
|
|
38
39
|
|
|
39
40
|
[project.scripts]
|
|
40
41
|
"logarch.py" = "rda_python_metrics.logarch:main"
|
|
41
|
-
"fillawsusage" = "rda_python_metrics.
|
|
42
|
+
"fillawsusage" = "rda_python_metrics.fillawsusage:main"
|
|
42
43
|
"fillcdgusage" = "rda_python_metrics.fillcdgusage:main"
|
|
43
44
|
"fillcodusage" = "rda_python_metrics.fillcodusage:main"
|
|
44
45
|
"fillcountry" = "rda_python_metrics.fillcountry:main"
|
|
45
46
|
"fillendtime" = "rda_python_metrics.fillendtime:main"
|
|
46
47
|
"fillglobususage" = "rda_python_metrics.fillglobususage:main"
|
|
48
|
+
"fillgdexusage" = "rda_python_metrics.fillgdexusage:main"
|
|
49
|
+
"fillzenodousage" = "rda_python_metrics.fillzenodousage:main"
|
|
47
50
|
"fillipinfo" = "rda_python_metrics.fillipinfo:main"
|
|
48
51
|
"filloneorder" = "rda_python_metrics.filloneorder:main"
|
|
49
52
|
"fillosdfusage" = "rda_python_metrics.fillosdfusage:main"
|
|
@@ -53,9 +56,11 @@ pythonpath = [
|
|
|
53
56
|
"pgperson" = "rda_python_metrics.pgperson:main"
|
|
54
57
|
"pgusername" = "rda_python_metrics.pgusername:main"
|
|
55
58
|
"viewallusage" = "rda_python_metrics.viewallusage:main"
|
|
59
|
+
"viewawsusage" = "rda_python_metrics.viewawsusage:main"
|
|
56
60
|
"viewcheckusage" = "rda_python_metrics.viewcheckusage:main"
|
|
57
61
|
"viewcodusage" = "rda_python_metrics.viewcodusage:main"
|
|
58
62
|
"viewordusage" = "rda_python_metrics.viewordusage:main"
|
|
63
|
+
"viewosdfusage" = "rda_python_metrics.viewosdfusage:main"
|
|
59
64
|
"viewrqstusage" = "rda_python_metrics.viewrqstusage:main"
|
|
60
65
|
"viewtdsusage" = "rda_python_metrics.viewtdsusage:main"
|
|
61
66
|
"viewwebfile" = "rda_python_metrics.viewwebfile:main"
|
|
@@ -138,7 +138,10 @@ def get_ipinfo_record(ip):
|
|
|
138
138
|
except Exception as e:
|
|
139
139
|
PgLOG.pglog("ipinfo: {} - {}".format(ip, str(e)), PgLOG.LOGWRN)
|
|
140
140
|
return None
|
|
141
|
-
|
|
141
|
+
|
|
142
|
+
if 'bogon' in iprec and iprec['bogon']:
|
|
143
|
+
PgLOG.pglog(f"ipinfo: {ip} - bogon", PgLOG.LOGWRN)
|
|
144
|
+
return None
|
|
142
145
|
record = {'ip' : ip, 'stat_flag' : 'A', 'hostname' : ip, 'org_type' : '-'}
|
|
143
146
|
get_ip_hostname(ip, iprec, record)
|
|
144
147
|
record['lat'] = float(iprec['latitude']) if iprec['latitude'] else 0
|
|
@@ -234,12 +237,9 @@ def update_ipinfo_record(record, pgrec = None):
|
|
|
234
237
|
#
|
|
235
238
|
def set_ipinfo(ip, ipopt = True):
|
|
236
239
|
|
|
237
|
-
if ip in IPRECS:
|
|
238
|
-
pgrec = IPRECS[ip]
|
|
239
|
-
if pgrec or not ipopt: return pgrec
|
|
240
|
-
else:
|
|
241
|
-
pgrec = PgDBI.pgget('ipinfo', '*', "ip = '{}'".format(ip))
|
|
240
|
+
if ip in IPRECS: return IPRECS[ip]
|
|
242
241
|
|
|
242
|
+
pgrec = PgDBI.pgget('ipinfo', '*', "ip = '{}'".format(ip))
|
|
243
243
|
if not pgrec or ipopt and pgrec['stat_flag'] == 'M':
|
|
244
244
|
record = get_ipinfo_record(ip) if ipopt else None
|
|
245
245
|
if not record: record = get_geoip2_record(ip)
|
|
@@ -303,7 +303,7 @@ def get_wuser_record(ip, date, email = None):
|
|
|
303
303
|
wuid = PgDBI.pgadd("wuser", record, PgLOG.LOGERR|PgLOG.AUTOID)
|
|
304
304
|
if wuid:
|
|
305
305
|
record['wuid'] = wuid
|
|
306
|
-
PgLOG.pglog("{} Added as wuid({})".format(email, wuid), PgLOG.LGWNEM)
|
|
306
|
+
PgLOG.pglog("{} Added as wuid({})".format(record['email'], wuid), PgLOG.LGWNEM)
|
|
307
307
|
return record
|
|
308
308
|
|
|
309
309
|
return None
|
|
@@ -0,0 +1,210 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
#
|
|
3
|
+
###############################################################################
|
|
4
|
+
#
|
|
5
|
+
# Title : fillawsusage
|
|
6
|
+
# Author : Zaihua Ji, zji@ucar.edu
|
|
7
|
+
# Date : 03/11/2022
|
|
8
|
+
# 2025-03-26 transferred to package rda_python_metrics from
|
|
9
|
+
# https://github.com/NCAR/rda-database.git
|
|
10
|
+
# Purpose : python program to retrieve info from AWS logs
|
|
11
|
+
# and fill table wusages in PgSQL database dssdb.
|
|
12
|
+
#
|
|
13
|
+
# Github : https://github.com/NCAR/rda-pythn-metrics.git
|
|
14
|
+
#
|
|
15
|
+
###############################################################################
|
|
16
|
+
#
|
|
17
|
+
import sys
|
|
18
|
+
import re
|
|
19
|
+
import glob
|
|
20
|
+
from os import path as op
|
|
21
|
+
from rda_python_common import PgLOG
|
|
22
|
+
from rda_python_common import PgUtil
|
|
23
|
+
from rda_python_common import PgFile
|
|
24
|
+
from rda_python_common import PgDBI
|
|
25
|
+
from . import PgIPInfo
|
|
26
|
+
|
|
27
|
+
USAGE = {
|
|
28
|
+
'PGTBL' : "awsusage",
|
|
29
|
+
'AWSDIR' : PgLOG.PGLOG["TRANSFER"] + "/AWSera5log",
|
|
30
|
+
'AWSLOG' : "{}/{}-00-00-00-*",
|
|
31
|
+
'PFMT' : "YYYY/MM/DD"
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
DSIDS = {'nsf-ncar-era5' : 'd633000'}
|
|
35
|
+
|
|
36
|
+
#
|
|
37
|
+
# main function to run this program
|
|
38
|
+
#
|
|
39
|
+
def main():
|
|
40
|
+
|
|
41
|
+
params = [] # array of input values
|
|
42
|
+
argv = sys.argv[1:]
|
|
43
|
+
option = None
|
|
44
|
+
|
|
45
|
+
for arg in argv:
|
|
46
|
+
ms = re.match(r'^-(b|d|p|N)$', arg)
|
|
47
|
+
if ms:
|
|
48
|
+
opt = ms.group(1)
|
|
49
|
+
if opt == 'b':
|
|
50
|
+
PgLOG.PGLOG['BCKGRND'] = 1
|
|
51
|
+
elif option:
|
|
52
|
+
PgLOG.pglog("{}: Option -{} is present already".format(arg, option), PgLOG.LGWNEX)
|
|
53
|
+
else:
|
|
54
|
+
option = opt
|
|
55
|
+
elif re.match(r'^-', arg):
|
|
56
|
+
PgLOG.pglog(arg + ": Invalid Option", PgLOG.LGWNEX)
|
|
57
|
+
elif option:
|
|
58
|
+
params.append(arg)
|
|
59
|
+
else:
|
|
60
|
+
PgLOG.pglog(arg + ": Invalid Parameter", PgLOG.LGWNEX)
|
|
61
|
+
|
|
62
|
+
if not (option and params): PgLOG.show_usage('fillawsusage')
|
|
63
|
+
|
|
64
|
+
PgDBI.dssdb_dbname()
|
|
65
|
+
cmdstr = "fillawsusage {}".format(' '.join(argv))
|
|
66
|
+
PgLOG.cmdlog(cmdstr)
|
|
67
|
+
PgFile.change_local_directory(USAGE['AWSDIR'])
|
|
68
|
+
filenames = get_log_file_names(option, params)
|
|
69
|
+
if filenames:
|
|
70
|
+
fill_aws_usages(filenames)
|
|
71
|
+
else:
|
|
72
|
+
PgLOG.pglog("No log file found for given command: " + cmdstr, PgLOG.LOGWRN)
|
|
73
|
+
|
|
74
|
+
PgLOG.pglog(None, PgLOG.LOGWRN)
|
|
75
|
+
sys.exit(0)
|
|
76
|
+
|
|
77
|
+
#
|
|
78
|
+
# get the log file dates
|
|
79
|
+
#
|
|
80
|
+
def get_log_file_names(option, params):
|
|
81
|
+
|
|
82
|
+
filenames = {}
|
|
83
|
+
if option == 'd':
|
|
84
|
+
for dt in params:
|
|
85
|
+
pdate = PgUtil.format_date(dt)
|
|
86
|
+
pd = PgUtil.format_date(pdate, USAGE['PFMT'])
|
|
87
|
+
fname = USAGE['AWSLOG'].format(pd, pdate)
|
|
88
|
+
fnames = glob.glob(fname)
|
|
89
|
+
if fnames: filenames[pdate] = sorted(fnames)
|
|
90
|
+
else:
|
|
91
|
+
if option == 'N':
|
|
92
|
+
edate = PgUtil.curdate()
|
|
93
|
+
pdate = PgUtil.adddate(edate, 0, 0, -int(params[0]))
|
|
94
|
+
else:
|
|
95
|
+
pdate = PgUtil.format_date(params[0])
|
|
96
|
+
if len(params) > 1:
|
|
97
|
+
edate = PgUtil.adddate(PgUtil.format_date(params[1]), 0, 0, 1)
|
|
98
|
+
else:
|
|
99
|
+
edate = PgUtil.curdate()
|
|
100
|
+
while pdate < edate:
|
|
101
|
+
pd = PgUtil.format_date(pdate, USAGE['PFMT'])
|
|
102
|
+
fname = USAGE['AWSLOG'].format(pd, pdate)
|
|
103
|
+
fnames = glob.glob(fname)
|
|
104
|
+
if fnames: filenames[pdate] = sorted(fnames)
|
|
105
|
+
pdate = PgUtil.adddate(pdate, 0, 0, 1)
|
|
106
|
+
|
|
107
|
+
return filenames
|
|
108
|
+
|
|
109
|
+
#
|
|
110
|
+
# Fill AWS usages into table dssdb.awsusage of DSS PgSQL database from aws access logs
|
|
111
|
+
#
|
|
112
|
+
def fill_aws_usages(filenames):
|
|
113
|
+
|
|
114
|
+
year = cntall = addall = 0
|
|
115
|
+
for pdate in filenames:
|
|
116
|
+
fnames = filenames[pdate]
|
|
117
|
+
fcnt = len(fnames)
|
|
118
|
+
PgLOG.pglog("{}: Gathering AWS usage info from {} log files at {}".format(pdate, fcnt, PgLOG.current_datetime()), PgLOG.LOGWRN)
|
|
119
|
+
records = {}
|
|
120
|
+
cntadd = entcnt = 0
|
|
121
|
+
for logfile in fnames:
|
|
122
|
+
if not op.isfile(logfile):
|
|
123
|
+
PgLOG.pglog("{}: Not exists for Gathering AWS usage".format(logfile), PgLOG.LOGWRN)
|
|
124
|
+
continue
|
|
125
|
+
aws = PgFile.open_local_file(logfile)
|
|
126
|
+
if not aws: continue
|
|
127
|
+
while True:
|
|
128
|
+
line = aws.readline()
|
|
129
|
+
if not line: break
|
|
130
|
+
entcnt += 1
|
|
131
|
+
if entcnt%20000 == 0:
|
|
132
|
+
dcnt = len(records)
|
|
133
|
+
PgLOG.pglog("{}: {}/{} AWS log entries processed/records to add".format(pdate, entcnt, dcnt), PgLOG.WARNLG)
|
|
134
|
+
|
|
135
|
+
ms = re.match(r'^\w+ ([\w-]+) \[(\S+).*\] ([\d\.]+) .+ REST\.GET\.OBJECT \S+ "GET.+" \d+ - (\d+) \d+ .* ".+" "(.+)" ', line)
|
|
136
|
+
if not ms: continue
|
|
137
|
+
values = list(ms.groups())
|
|
138
|
+
if values[0] not in DSIDS: continue
|
|
139
|
+
dsid = DSIDS[values[0]]
|
|
140
|
+
size = int(values[3])
|
|
141
|
+
ip = values[2]
|
|
142
|
+
engine = values[4]
|
|
143
|
+
moff = engine.find('/')
|
|
144
|
+
if moff > 0:
|
|
145
|
+
if moff > 20: moff = 20
|
|
146
|
+
method = engine[0:moff].upper()
|
|
147
|
+
else:
|
|
148
|
+
method = "AWS"
|
|
149
|
+
key = "{}:{}:{}".format(ip, dsid, method)
|
|
150
|
+
if key in records:
|
|
151
|
+
records[key]['size'] += size
|
|
152
|
+
records[key]['fcount'] += 1
|
|
153
|
+
else:
|
|
154
|
+
(year, quarter, date, time) = get_record_date_time(values[1])
|
|
155
|
+
iprec = PgIPInfo.get_missing_ipinfo(ip)
|
|
156
|
+
if not iprec: continue
|
|
157
|
+
records[key] = {'ip' : ip, 'dsid' : dsid, 'date' : date, 'time' : time, 'quarter' : quarter,
|
|
158
|
+
'size' : size, 'fcount' : 1, 'method' : method, 'engine' : engine,
|
|
159
|
+
'org_type' : iprec['org_type'], 'country' : iprec['country'],
|
|
160
|
+
'region' : iprec['region'], 'email' : iprec['email']}
|
|
161
|
+
aws.close()
|
|
162
|
+
if records: cntadd = add_usage_records(records, year)
|
|
163
|
+
PgLOG.pglog("{}: {} AWS usage records added for {} entries at {}".format(pdate, cntadd, entcnt, PgLOG.current_datetime()), PgLOG.LOGWRN)
|
|
164
|
+
cntall += entcnt
|
|
165
|
+
if cntadd:
|
|
166
|
+
addall += cntadd
|
|
167
|
+
if addall > cntadd:
|
|
168
|
+
PgLOG.pglog("{} AWS usage records added for {} entries at {}".format(addall, cntall, PgLOG.current_datetime()), PgLOG.LOGWRN)
|
|
169
|
+
|
|
170
|
+
def get_record_date_time(ctime):
|
|
171
|
+
|
|
172
|
+
ms = re.search(r'^(\d+)/(\w+)/(\d+):(\d+:\d+:\d+)$', ctime)
|
|
173
|
+
if ms:
|
|
174
|
+
d = int(ms.group(1))
|
|
175
|
+
m = PgUtil.get_month(ms.group(2))
|
|
176
|
+
y = ms.group(3)
|
|
177
|
+
t = ms.group(4)
|
|
178
|
+
q = 1 + int((m-1)/3)
|
|
179
|
+
return (y, q, "{}-{:02}-{:02}".format(y, m, d), t)
|
|
180
|
+
else:
|
|
181
|
+
PgLOG.pglog(ctime + ": Invalid date/time format", PgLOG.LGEREX)
|
|
182
|
+
|
|
183
|
+
def add_usage_records(records, year):
|
|
184
|
+
|
|
185
|
+
cnt = 0
|
|
186
|
+
for key in records:
|
|
187
|
+
record = records[key]
|
|
188
|
+
cond = "date = '{}' AND time = '{}' AND ip = '{}'".format(record['date'], record['time'], record['ip'])
|
|
189
|
+
if PgDBI.pgget(USAGE['PGTBL'], '', cond, PgLOG.LGEREX): continue
|
|
190
|
+
if add_to_allusage(year, record):
|
|
191
|
+
cnt += PgDBI.pgadd(USAGE['PGTBL'], record, PgLOG.LOGWRN)
|
|
192
|
+
|
|
193
|
+
return cnt
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
def add_to_allusage(year, pgrec):
|
|
197
|
+
|
|
198
|
+
record = {'source' : 'A'}
|
|
199
|
+
flds = ['ip', 'dsid', 'date', 'time', 'quarter', 'size', 'method',
|
|
200
|
+
'org_type', 'country', 'region', 'email']
|
|
201
|
+
|
|
202
|
+
for fld in flds:
|
|
203
|
+
record[fld] = pgrec[fld]
|
|
204
|
+
|
|
205
|
+
return PgDBI.add_yearly_allusage(year, record)
|
|
206
|
+
|
|
207
|
+
#
|
|
208
|
+
# call main() to start program
|
|
209
|
+
#
|
|
210
|
+
if __name__ == "__main__": main()
|
{rda_python_metrics-1.0.27 → rda_python_metrics-1.0.46}/src/rda_python_metrics/fillawsusage.usg
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
|
|
2
2
|
Retrieves usage information from AWS Server logs under directory
|
|
3
|
-
../rda/transer/AWSera5log/ to fill table '
|
|
3
|
+
../rda/transer/AWSera5log/ to fill table 'awsusage' in database 'rdadb'.
|
|
4
4
|
|
|
5
5
|
Usage: fillawsusage [-b] [-d LogFileDates] [-N NumberDay] [-p BeginDate [Enddate]]
|
|
6
6
|
|
{rda_python_metrics-1.0.27 → rda_python_metrics-1.0.46}/src/rda_python_metrics/fillcdgusage.py
RENAMED
|
@@ -31,6 +31,8 @@ USAGE = {
|
|
|
31
31
|
}
|
|
32
32
|
|
|
33
33
|
DSIDS = {
|
|
34
|
+
'pi_cesm2_atm_river_analysis' : ['d010073'],
|
|
35
|
+
'na-cordex' : ['d316009'],
|
|
34
36
|
'ucar.cgd.cesm2.cam6.prescribed_sst_amip' : ['d651010'],
|
|
35
37
|
'ucar.cgd.ccsm4.CLM_LAND_ONLY' : ['d651011'],
|
|
36
38
|
'ucar.cgd.artmip' : ['d651012', 'd651016', 'd651017', 'd651018'],
|
|
@@ -53,7 +55,6 @@ DSIDS = {
|
|
|
53
55
|
'ucar.cgd.ccsm4.amv_lens' : ['d651035'],
|
|
54
56
|
'ucar.cgd.cesm2.cism_ismip6' : ['d651036'],
|
|
55
57
|
'ucar.cgd.ccsm4.pliomip2' : ['d651037'],
|
|
56
|
-
# new added
|
|
57
58
|
'ucar.cgd.cesm2-waccm.s2s_hindcasts': ['d651040'],
|
|
58
59
|
'ucar.cgd.CESM1.3_SH_storm_tracks': ['d651044'],
|
|
59
60
|
'ucar.cgd.cesm2.waccm6.ssp245': ['d651045'],
|
|
@@ -61,18 +62,36 @@ DSIDS = {
|
|
|
61
62
|
'ucar.cgd.ccsm4.TC-CESM': ['d651047'],
|
|
62
63
|
'ucar.cgd.cesm2.ISSI_OSSE': ['d651048'],
|
|
63
64
|
'ucar.cgd.ccsm4.SOcean_Eddies_mclong': ['d651049'],
|
|
64
|
-
'trace': ['d651050'],
|
|
65
|
+
'ucar.cgd.ccsm.trace': ['d651050'],
|
|
65
66
|
'ucar.cgd.cesm2.waccm.solar': ['d651051'],
|
|
66
67
|
'ucar.cgd.ccsm4.CESM1-CCSM4_mid-Pliocene' : ['d651042'],
|
|
67
68
|
'ucar.cgd.ccsm4.PaleoIF' : ['d651052'],
|
|
68
69
|
'ucar.cgd.ccsm4.b.e11.B20LE_fixedO3' : ['d651053'],
|
|
69
70
|
'ucar.cgd.cesm2.single.forcing.large.ensemble' : ['d651055'],
|
|
70
71
|
'ucar.cgd.cesm2le.output': ['d651056'],
|
|
72
|
+
'ucar.cgd.ccsm4.ARISE-SAI-1.5' : ['d651059'],
|
|
71
73
|
'ucar.cgd.cesm2.s2s_hindcasts': ['d651060'],
|
|
72
74
|
'ucar.cgd.cesm2.s2s_hindcasts.mjo': ['d651061'],
|
|
73
75
|
'ucar.cgd.cesm2.s2s_hindcasts.tc_tracks': ['d651062'],
|
|
74
76
|
'ucar.cgd.cesm2.s2s_hindcasts.cesm2.climo': ['d651063'],
|
|
75
|
-
'ucar.cgd.ccsm4.
|
|
77
|
+
'ucar.cgd.ccsm4.cesmLME' : ['d651058'],
|
|
78
|
+
'ucar.cgd.ccsm4.GLENS' : ['d651064'],
|
|
79
|
+
'ucar.cgd.ccsm4.CESM2-CISM2-LIGtransient' : ['d651066'],
|
|
80
|
+
'ucar.cgd.cesm2.pacific.pacemaker' : ['d651068'],
|
|
81
|
+
'ucar.cgd.cesm2.tuned.sea.ice.albedo' : ['d651070'],
|
|
82
|
+
'ucar.cgd.cesm2.cmip5.forcing' : ['d651075'],
|
|
83
|
+
'ucar.cgd.cesm2.ssp245.biomass.burning' : ['d651073'],
|
|
84
|
+
'ucar.cgd.cesm2.ssp585.biomass.burning' : ['d651067'],
|
|
85
|
+
'ucar.cgd.cesm1.cldmod': ['d651069'],
|
|
86
|
+
'ucar.cgd.cesm2.marine.biogeochemistry': ['d651071'],
|
|
87
|
+
'ucar.cgd.nw2.mom6': ['d651072'],
|
|
88
|
+
'ucar.cgd.cesm2.cam6.ppe': ['d651076'],
|
|
89
|
+
'ucar.cgd.cesm2.smyle': ['d651065'],
|
|
90
|
+
# new added
|
|
91
|
+
'gridded_precip_and_temp' : ['d010078'],
|
|
92
|
+
'29_newman' : ['d010079'],
|
|
93
|
+
'waccm-x.ion.asymmetry' : ['d010081'],
|
|
94
|
+
'NARCCAP' : ['d316015']
|
|
76
95
|
}
|
|
77
96
|
|
|
78
97
|
ALLIDS = list(DSIDS.keys())
|
|
@@ -225,12 +244,14 @@ def fill_cdg_usages(dsids, dranges):
|
|
|
225
244
|
|
|
226
245
|
allcnt = awcnt = atcnt = lcnt = 0
|
|
227
246
|
for dates in dranges:
|
|
228
|
-
for
|
|
247
|
+
for adsid in dsids:
|
|
229
248
|
lcnt += 1
|
|
230
|
-
dsname =
|
|
231
|
-
rdaids =
|
|
232
|
-
|
|
233
|
-
|
|
249
|
+
dsname = adsid[0]
|
|
250
|
+
rdaids = adsid[1]
|
|
251
|
+
getdsid = False if len(rdaids) == 1 else True
|
|
252
|
+
dsid = rdaids[0]
|
|
253
|
+
cdgids = adsid[2]
|
|
254
|
+
strids = adsid[3]
|
|
234
255
|
bt = tm()
|
|
235
256
|
pgrecs = get_dsid_records(cdgids, dates, strids)
|
|
236
257
|
pgcnt = len(pgrecs['dataset_file_name']) if pgrecs else 0
|
|
@@ -248,6 +269,10 @@ def fill_cdg_usages(dsids, dranges):
|
|
|
248
269
|
PgLOG.pglog("{}/{}/{} CDG/TDS/WEB records processed to add".format(i, tcnt, wcnt), PgLOG.WARNLG)
|
|
249
270
|
|
|
250
271
|
pgrec = PgUtil.onerecord(pgrecs, i)
|
|
272
|
+
wfile = pgrec['dataset_file_name']
|
|
273
|
+
if not wfile:
|
|
274
|
+
wfile = pgrec['logic_file_name']
|
|
275
|
+
if not wfile: continue
|
|
251
276
|
dsize = pgrec['bytes_sent']
|
|
252
277
|
if not dsize: continue
|
|
253
278
|
(year, quarter, date, time) = get_record_date_time(pgrec['date_completed'])
|
|
@@ -255,14 +280,13 @@ def fill_cdg_usages(dsids, dranges):
|
|
|
255
280
|
if not url: url = pgrec['file_access_point_uri']
|
|
256
281
|
ip = pgrec['remote_address']
|
|
257
282
|
engine = pgrec['user_agent_name']
|
|
258
|
-
wfile = pgrec['dataset_file_name']
|
|
259
|
-
if not wfile: wfile = pgrec['logic_file_name']
|
|
260
|
-
wfrec = get_wfile_record(rdaids, wfile)
|
|
261
|
-
if not wfrec: continue
|
|
262
|
-
dsid = wfrec['dsid']
|
|
263
283
|
ms = re.search(r'^https*://tds.ucar.edu/thredds/(\w+)/', url)
|
|
264
284
|
if ms:
|
|
265
285
|
# tds usage
|
|
286
|
+
if getdsid:
|
|
287
|
+
wfrec = get_wfile_record(rdaids, wfile)
|
|
288
|
+
if not wfrec: continue
|
|
289
|
+
dsid = wfrec['dsid']
|
|
266
290
|
method = ms.group(1)
|
|
267
291
|
if pgrec['subset_file_size']:
|
|
268
292
|
etype = 'S'
|
|
@@ -289,6 +313,9 @@ def fill_cdg_usages(dsids, dranges):
|
|
|
289
313
|
'region' : iprec['region'], 'email' : iprec['email']}
|
|
290
314
|
else:
|
|
291
315
|
# web usage
|
|
316
|
+
wfrec = get_wfile_record(rdaids, wfile)
|
|
317
|
+
if not wfrec: continue
|
|
318
|
+
if getdsid: dsid = wfrec['dsid']
|
|
292
319
|
fsize = pgrec['dataset_file_size']
|
|
293
320
|
if not fsize: fsize = pgrec['logic_file_size']
|
|
294
321
|
method = 'CDG'
|