rda-python-metrics 1.0.8__tar.gz → 1.0.10__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of rda-python-metrics might be problematic. Click here for more details.
- rda_python_metrics-1.0.10/MANIFEST.in +1 -0
- {rda_python_metrics-1.0.8/src/rda_python_metrics.egg-info → rda_python_metrics-1.0.10}/PKG-INFO +1 -1
- {rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/pyproject.toml +2 -16
- {rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/src/rda_python_metrics/fillawsusage.usg +1 -2
- rda_python_metrics-1.0.10/src/rda_python_metrics/fillcdgusage.py +429 -0
- rda_python_metrics-1.0.10/src/rda_python_metrics/fillcdgusage.usg +18 -0
- {rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/src/rda_python_metrics/fillcodusage.usg +1 -1
- {rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/src/rda_python_metrics/fillglobususage.usg +1 -1
- {rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10/src/rda_python_metrics.egg-info}/PKG-INFO +1 -1
- {rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/src/rda_python_metrics.egg-info/SOURCES.txt +2 -0
- {rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/src/rda_python_metrics.egg-info/entry_points.txt +1 -0
- {rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/tests/test_metrics.py +1 -1
- rda_python_metrics-1.0.8/MANIFEST.in +0 -19
- {rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/LICENSE +0 -0
- {rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/README.md +0 -0
- {rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/setup.cfg +0 -0
- {rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/src/rda_python_metrics/PgIPInfo.py +0 -0
- {rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/src/rda_python_metrics/PgView.py +0 -0
- {rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/src/rda_python_metrics/__init__.py +0 -0
- {rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/src/rda_python_metrics/fillawsusage.py +0 -0
- {rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/src/rda_python_metrics/fillcodusage.py +0 -0
- {rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/src/rda_python_metrics/fillcountry.py +0 -0
- {rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/src/rda_python_metrics/fillendtime.py +0 -0
- {rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/src/rda_python_metrics/fillglobususage.py +0 -0
- {rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/src/rda_python_metrics/fillipinfo.py +0 -0
- {rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/src/rda_python_metrics/fillipinfo.usg +0 -0
- {rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/src/rda_python_metrics/filloneorder.py +0 -0
- {rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/src/rda_python_metrics/filloneorder.usg +0 -0
- {rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/src/rda_python_metrics/fillosdfusage.py +0 -0
- {rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/src/rda_python_metrics/fillosdfusage.usg +0 -0
- {rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/src/rda_python_metrics/fillrdadb.py +0 -0
- {rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/src/rda_python_metrics/fillrdadb.usg +0 -0
- {rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/src/rda_python_metrics/filltdsusage.py +0 -0
- {rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/src/rda_python_metrics/filltdsusage.usg +0 -0
- {rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/src/rda_python_metrics/filluser.py +0 -0
- {rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/src/rda_python_metrics/filluser.usg +0 -0
- {rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/src/rda_python_metrics/logarch.py +0 -0
- {rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/src/rda_python_metrics/logarch.usg +0 -0
- {rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/src/rda_python_metrics/pgperson.py +0 -0
- {rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/src/rda_python_metrics/pgsyspath.py +0 -0
- {rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/src/rda_python_metrics/pgusername.py +0 -0
- {rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/src/rda_python_metrics/viewallusage.py +0 -0
- {rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/src/rda_python_metrics/viewallusage.usg +0 -0
- {rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/src/rda_python_metrics/viewcheckusage.py +0 -0
- {rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/src/rda_python_metrics/viewcheckusage.usg +0 -0
- {rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/src/rda_python_metrics/viewcodusage.py +0 -0
- {rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/src/rda_python_metrics/viewcodusage.usg +0 -0
- {rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/src/rda_python_metrics/viewordusage.py +0 -0
- {rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/src/rda_python_metrics/viewordusage.usg +0 -0
- {rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/src/rda_python_metrics/viewrqstusage.py +0 -0
- {rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/src/rda_python_metrics/viewrqstusage.usg +0 -0
- {rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/src/rda_python_metrics/viewtdsusage.py +0 -0
- {rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/src/rda_python_metrics/viewtdsusage.usg +0 -0
- {rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/src/rda_python_metrics/viewwebfile.py +0 -0
- {rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/src/rda_python_metrics/viewwebfile.usg +0 -0
- {rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/src/rda_python_metrics/viewwebusage.py +0 -0
- {rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/src/rda_python_metrics/viewwebusage.usg +0 -0
- {rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/src/rda_python_metrics.egg-info/dependency_links.txt +0 -0
- {rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/src/rda_python_metrics.egg-info/requires.txt +0 -0
- {rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/src/rda_python_metrics.egg-info/top_level.txt +0 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
include src/rda_python_metrics/*.usg
|
|
@@ -6,7 +6,7 @@ build-backend = "setuptools.build_meta"
|
|
|
6
6
|
|
|
7
7
|
[project]
|
|
8
8
|
name = "rda_python_metrics"
|
|
9
|
-
version = "1.0.
|
|
9
|
+
version = "1.0.10"
|
|
10
10
|
authors = [
|
|
11
11
|
{ name="Zaihua Ji", email="zji@ucar.edu" },
|
|
12
12
|
]
|
|
@@ -27,21 +27,6 @@ dependencies = [
|
|
|
27
27
|
"httplib2"
|
|
28
28
|
]
|
|
29
29
|
|
|
30
|
-
[tool.setuptools]
|
|
31
|
-
include-package-data = true
|
|
32
|
-
|
|
33
|
-
[tool.setuptools.packages.find]
|
|
34
|
-
where = ["src"]
|
|
35
|
-
|
|
36
|
-
[tool.setuptools.package-data]
|
|
37
|
-
"rda_python_metrics" = [
|
|
38
|
-
"logarch.usg", "fillawsusage.usg", "fillcodusage.usg", "fillglobususage.usg",
|
|
39
|
-
"fillipinfo.usg", "filloneorder.usg", "fillosdfusage.usg", "fillrdadb.usg",
|
|
40
|
-
"filltdsusage.usg", "viewallusage.usg", "viewcheckusage.usg", "viewcodusage.usg",
|
|
41
|
-
"viewordusage.usg", "viewrqstusage.usg", "viewtdsusage.usg", "viewwebfile.usg",
|
|
42
|
-
"viewwebusage.usg"
|
|
43
|
-
]
|
|
44
|
-
|
|
45
30
|
[tool.pytest.ini_options]
|
|
46
31
|
pythonpath = [
|
|
47
32
|
"src"
|
|
@@ -53,6 +38,7 @@ pythonpath = [
|
|
|
53
38
|
[project.scripts]
|
|
54
39
|
"logarch.py" = "rda_python_metrics.logarch:main"
|
|
55
40
|
"fillawsusage" = "rda_python_metrics.fillaswusage:main"
|
|
41
|
+
"fillcdgusage" = "rda_python_metrics.fillcdgusage:main"
|
|
56
42
|
"fillcodusage" = "rda_python_metrics.fillcodusage:main"
|
|
57
43
|
"fillcountry" = "rda_python_metrics.fillcountry:main"
|
|
58
44
|
"fillendtime" = "rda_python_metrics.fillendtime:main"
|
{rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/src/rda_python_metrics/fillawsusage.usg
RENAMED
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
|
|
2
2
|
Retrieves usage information from AWS Server logs under directory
|
|
3
|
-
|
|
4
|
-
database 'dssdb'.
|
|
3
|
+
../rda/transer/AWSera5log/ to fill table 'wusage' in database 'rdadb'.
|
|
5
4
|
|
|
6
5
|
Usage: fillawsusage [-b] [-d LogFileDates] [-N NumberDay] [-p BeginDate [Enddate]]
|
|
7
6
|
|
|
@@ -0,0 +1,429 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
#
|
|
3
|
+
###############################################################################
|
|
4
|
+
#
|
|
5
|
+
# Title : fillcdgusage
|
|
6
|
+
# Author : Zaihua Ji, zji@ucar.edu
|
|
7
|
+
# Date : 2025-04-14
|
|
8
|
+
# Purpose : python program to retrieve info from GDEX Postgres database for GDS
|
|
9
|
+
# file accesses and backup fill table tdsusage in PostgreSQL database dssdb.
|
|
10
|
+
#
|
|
11
|
+
# Github : https://github.com/NCAR/rda-python-metrics.git
|
|
12
|
+
#
|
|
13
|
+
###############################################################################
|
|
14
|
+
#
|
|
15
|
+
import sys
|
|
16
|
+
import re
|
|
17
|
+
import glob
|
|
18
|
+
from os import path as op
|
|
19
|
+
from rda_python_common import PgLOG
|
|
20
|
+
from rda_python_common import PgUtil
|
|
21
|
+
from rda_python_common import PgFile
|
|
22
|
+
from rda_python_common import PgDBI
|
|
23
|
+
from rda_python_common import PgSplit
|
|
24
|
+
from . import PgIPInfo
|
|
25
|
+
|
|
26
|
+
USAGE = {
|
|
27
|
+
'TDSTBL' : "tdsusage",
|
|
28
|
+
'WEBTBL' : "webusage",
|
|
29
|
+
'CDATE' : PgUtil.curdate(),
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
DSIDS = {
|
|
33
|
+
'ucar.cgd.cesm2.cam6.prescribed_sst_amip' : ['d651010'],
|
|
34
|
+
'ucar.cgd.ccsm4.CLM_LAND_ONLY' : ['d651011'],
|
|
35
|
+
'ucar.cgd.artmip' : ['d651012', 'd651016', 'd651017', 'd651018'],
|
|
36
|
+
'tamip' : ['d651013'],
|
|
37
|
+
'ucar.cgd.ccsm4.CLIVAR_LE' : ['d651014'],
|
|
38
|
+
'ucar.cgd.cesm2.Gettelman_CESM2_ECS' : ['d651015'],
|
|
39
|
+
'ucar.cgd.ccsm4.geomip.ssp5' : ['d651024'],
|
|
40
|
+
'ucar.cgd.ccsm4.IOD-PACEMAKER' : ['d651021'],
|
|
41
|
+
'ucar.cgd.ccsm4.past2k_transient' : ['651023'],
|
|
42
|
+
'ucar.cgd.ccsm4.lowwarming' : ['d651025'],
|
|
43
|
+
'ucar.cgd.ccsm4.CESM_CAM5_BGC_ME' : ['d651000'],
|
|
44
|
+
'ucar.cgd.ccsm4.iTRACE' : ['d651022'],
|
|
45
|
+
'ucar.cgd.ccsm4.so2_geoeng' : ['d651026'],
|
|
46
|
+
'ucar.cgd.ccsm4.cesmLE' : ['d651027'],
|
|
47
|
+
'ucar.cgd.ccsm4.CESM1-CAM5-DP' : ['d651028'],
|
|
48
|
+
'ucar.cgd.ccsm4.amv_lens' : ['d651031'],
|
|
49
|
+
'ucar.cgd.ccsm4.ATL-PACEMAKER' : ['d651032'],
|
|
50
|
+
'ucar.cgd.ccsm4.pac-pacemaker' : ['d651033'],
|
|
51
|
+
'ucar.cgd.ccsm4.SD-WACCM-X_v2.1' : ['d651034'],
|
|
52
|
+
'ucar.cgd.ccsm4.amv_lens' : ['d651035'],
|
|
53
|
+
'ucar.cgd.cesm2.cism_ismip6' : ['d651036'],
|
|
54
|
+
'ucar.cgd.ccsm4.pliomip2' : ['d651037']
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
ALLIDS = list(DSIDS.keys())
|
|
58
|
+
|
|
59
|
+
WFILES = {}
|
|
60
|
+
WUSERS = {}
|
|
61
|
+
|
|
62
|
+
#
|
|
63
|
+
# main function to run this program
|
|
64
|
+
#
|
|
65
|
+
def main():
|
|
66
|
+
|
|
67
|
+
params = {} # array of input values
|
|
68
|
+
argv = sys.argv[1:]
|
|
69
|
+
opt = None
|
|
70
|
+
|
|
71
|
+
for arg in argv:
|
|
72
|
+
if arg == "-b":
|
|
73
|
+
PgLOG.PGLOG['BCKGRND'] = 1
|
|
74
|
+
elif re.match(r'^-[msNy]$', arg):
|
|
75
|
+
opt = arg[1]
|
|
76
|
+
params[opt] = []
|
|
77
|
+
elif re.match(r'^-', arg):
|
|
78
|
+
PgLOG.pglog(arg + ": Invalid Option", PgLOG.LGWNEX)
|
|
79
|
+
elif opt:
|
|
80
|
+
params[opt].append(arg)
|
|
81
|
+
else:
|
|
82
|
+
PgLOG.pglog(arg + ": Value passed in without leading option", PgLOG.LGWNEX)
|
|
83
|
+
|
|
84
|
+
if not opt:
|
|
85
|
+
PgLOG.show_usage('fillcdgusage')
|
|
86
|
+
elif 's' not in params:
|
|
87
|
+
PgLOG.pglog("-s: Missing dataset short name to gather CDG metrics", PgLOG.LGWNEX)
|
|
88
|
+
elif len(params) < 2:
|
|
89
|
+
PgLOG.pglog("-(m|N|y): Missing Month, NumberDays or Year to gather CDG metrics", PgLOG.LGWNEX)
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
PgLOG.cmdlog("fillcdgusage {}".format(' '.join(argv)))
|
|
93
|
+
dranges = get_date_ranges(params)
|
|
94
|
+
dsids = get_dataset_ids(params['s'])
|
|
95
|
+
if dranges and dsids: fill_cdg_usages(dsids, dranges)
|
|
96
|
+
PgLOG.pglog(None, PgLOG.LOGWRN|PgLOG.SNDEML) # send email out if any
|
|
97
|
+
|
|
98
|
+
sys.exit(0)
|
|
99
|
+
|
|
100
|
+
#
|
|
101
|
+
# connect to the gdex database esg-production
|
|
102
|
+
#
|
|
103
|
+
def gdex_dbname():
|
|
104
|
+
PgDBI.set_scname('esg-production', 'metrics', 'gateway-reader', None, 'sagedbprodalma.ucar.edu')
|
|
105
|
+
|
|
106
|
+
#
|
|
107
|
+
# get datasets
|
|
108
|
+
#
|
|
109
|
+
def get_dataset_ids(dsnames):
|
|
110
|
+
|
|
111
|
+
gdex_dbname()
|
|
112
|
+
dsids = []
|
|
113
|
+
tbname = 'metadata.dataset'
|
|
114
|
+
for dsname in dsnames:
|
|
115
|
+
if re.match(r'^all$', dsname, re.I): return ALLIDS
|
|
116
|
+
if dsname not in DSIDS:
|
|
117
|
+
PgLOG.pglog(dsname + ": Unknown CDG dataset short name", PgLOG.LOGWRN)
|
|
118
|
+
continue
|
|
119
|
+
rdaid = DSIDS[dsname]
|
|
120
|
+
pgrec = PgDBI.pgget(tbname, 'id', "short_name = '{}'".format(dsname))
|
|
121
|
+
if not (pgrec and pgrec['id']): continue
|
|
122
|
+
dsid = pgrec['id']
|
|
123
|
+
if dsid in dsids: continue
|
|
124
|
+
dsids.append([dsid, rdaid])
|
|
125
|
+
recursive_dataset_ids(dsid, rdaid, dsids)
|
|
126
|
+
|
|
127
|
+
if not dsids: PgLOG.pglog("No Dataset Id identified to gather CDG metrics", PgLOG.LOGWRN)
|
|
128
|
+
|
|
129
|
+
return dsids
|
|
130
|
+
|
|
131
|
+
#
|
|
132
|
+
# get dsids recursivley
|
|
133
|
+
#
|
|
134
|
+
def recursive_dataset_ids(pdsid, rdaid, dsids):
|
|
135
|
+
|
|
136
|
+
tbname = 'metadata.dataset'
|
|
137
|
+
pgrecs = PgDBI.pgmget(tbname, 'id', "parent_dataset_id = '{}'".format(pdsid))
|
|
138
|
+
if not pgrecs: return
|
|
139
|
+
|
|
140
|
+
for dsid in pgrecs['id']:
|
|
141
|
+
if dsid in dsids: continue
|
|
142
|
+
dsids.append([dsid, rdaid])
|
|
143
|
+
recursive_dataset_ids(dsid, rdaid, dsids)
|
|
144
|
+
|
|
145
|
+
#
|
|
146
|
+
# get the date ranges for given condition
|
|
147
|
+
#
|
|
148
|
+
def get_date_ranges(inputs):
|
|
149
|
+
|
|
150
|
+
dranges = []
|
|
151
|
+
for opt in inputs:
|
|
152
|
+
for input in inputs[opt]:
|
|
153
|
+
# get date range
|
|
154
|
+
dates = []
|
|
155
|
+
if opt == 'N':
|
|
156
|
+
dates.append(PgUtil.adddate(USAGE['CDATE'], 0, 0, -int(input)))
|
|
157
|
+
dates.append(USAGE['CDATE'])
|
|
158
|
+
elif opt == 'm':
|
|
159
|
+
tms = input.split('-')
|
|
160
|
+
dates.append(PgUtil.fmtdate(int(tms[0]), int(tms[1]), 1))
|
|
161
|
+
dates.append(PgUtil.enddate(dates[0], 0, 'M'))
|
|
162
|
+
elif opt == 'Y':
|
|
163
|
+
dates.append(input + "-01-01")
|
|
164
|
+
dates.append(input + "-12-31")
|
|
165
|
+
dranges.append(dates)
|
|
166
|
+
|
|
167
|
+
return dranges
|
|
168
|
+
|
|
169
|
+
#
|
|
170
|
+
# get file download records for given dsid
|
|
171
|
+
#
|
|
172
|
+
def get_dsid_records(dsid, dates):
|
|
173
|
+
|
|
174
|
+
gdex_dbname()
|
|
175
|
+
tbname = 'metrics.file_download'
|
|
176
|
+
fields = ('date_completed, remote_address, logical_file_size, logical_file_name, file_access_point_uri, user_agent_name, bytes_sent, '
|
|
177
|
+
'subset_file_size, range_request, dataset_file_size, dataset_file_name, dataset_file_file_access_point_uri')
|
|
178
|
+
cond = "dataset_id = '{}' AND completed = True AND date_completed BETWEEN '{}' AND '{}' ORDER BY date_completed".format(dsid, dates[0], dates[1])
|
|
179
|
+
PgLOG.pglog("{}: Query CDG usage between {} and {} at {}".format(dsid, dates[0], dates[1], PgLOG.current_datetime()), PgLOG.LOGWRN)
|
|
180
|
+
pgrecs = PgDBI.pgmget(tbname, fields, cond)
|
|
181
|
+
PgDBI.dssdb_dbname()
|
|
182
|
+
|
|
183
|
+
return pgrecs
|
|
184
|
+
|
|
185
|
+
#
|
|
186
|
+
# Fill TDS usages into table dssdb.tdsusage from cdg access records
|
|
187
|
+
#
|
|
188
|
+
def fill_cdg_usages(dsids, dranges):
|
|
189
|
+
|
|
190
|
+
allcnt = awcnt = atcnt = lcnt = 0
|
|
191
|
+
for dates in dranges:
|
|
192
|
+
for dsid in dsids:
|
|
193
|
+
lcnt += 1
|
|
194
|
+
cdgid = dsid[0]
|
|
195
|
+
rdaid = dsid[1]
|
|
196
|
+
srdaid = '|'.join(rdaid)
|
|
197
|
+
pgrecs = get_dsid_records(cdgid, dates)
|
|
198
|
+
pgcnt = len(pgrecs['dataset_file_name']) if pgrecs else 0
|
|
199
|
+
if pgcnt == 0:
|
|
200
|
+
PgLOG.pglog("{}: No record found to gather CDG usage between {} and {}".format(srdaid, dates[0], dates[1]), PgLOG.LOGWRN)
|
|
201
|
+
continue
|
|
202
|
+
PgLOG.pglog("{}: Process {} records for CDG usage at {}".format(srdaid, pgcnt, PgLOG.current_datetime()), PgLOG.LOGWRN)
|
|
203
|
+
tcnt = wcnt = 0
|
|
204
|
+
pwkey = wrec = cdate = None
|
|
205
|
+
trecs = {}
|
|
206
|
+
for i in range(pgcnt):
|
|
207
|
+
if (i+1)%20000 == 0:
|
|
208
|
+
PgLOG.pglog("{}/{}/{} CDG/TDS/WEB records processed to add".format(i, tcnt, wcnt), PgLOG.WARNLG)
|
|
209
|
+
|
|
210
|
+
pgrec = PgUtil.onerecord(i, pgrecs)
|
|
211
|
+
dsize = pgrec['bytes_sent']
|
|
212
|
+
if not dsize: continue
|
|
213
|
+
(year, quarter, date, time) = get_record_date_time(pgrec['date_completed'])
|
|
214
|
+
url = pgrec['dataset_file_file_access_point_uri']
|
|
215
|
+
if not url: url = pgrec['file_access_point_uri']
|
|
216
|
+
ip = pgrec['remote_address']
|
|
217
|
+
engine = pgrec['user_agent_name']
|
|
218
|
+
wfile = pgrec['dataset_file_name']
|
|
219
|
+
if not wfile: wfile = pgrec['logic_file_name']
|
|
220
|
+
wfrec = get_wfile_record(rdaid, wfile)
|
|
221
|
+
if not wfrec: continue
|
|
222
|
+
ms = re.search(r'^https://tds.ucar.edu/thredds/(\w+)/', url)
|
|
223
|
+
if ms:
|
|
224
|
+
# tds usage
|
|
225
|
+
method = ms.group(1)
|
|
226
|
+
if pgrec['subset_file_size']:
|
|
227
|
+
etype = 'S'
|
|
228
|
+
elif pgrec['range_request']:
|
|
229
|
+
etype = 'R'
|
|
230
|
+
else:
|
|
231
|
+
etype = 'F'
|
|
232
|
+
|
|
233
|
+
if date != cdate:
|
|
234
|
+
if trecs:
|
|
235
|
+
tcnt += add_tdsusage_records(year, trecs, cdate)
|
|
236
|
+
trecs = {}
|
|
237
|
+
cdate = date
|
|
238
|
+
tkey = "{}:{}:{}:{}".format(ip, rdaid, method, etype)
|
|
239
|
+
if tkey in trecs:
|
|
240
|
+
trecs[tkey]['size'] += dsize
|
|
241
|
+
trecs[tkey]['fcount'] += 1
|
|
242
|
+
else:
|
|
243
|
+
wurec = get_wuser_record(ip, cdate, skipwuid = True)
|
|
244
|
+
if not wurec: return 0
|
|
245
|
+
trecs[tkey] = {'ip' : ip, 'dsid' : wfrec['dsid'], 'date' : cdate, 'time' : time, 'size' : dsize,
|
|
246
|
+
'fcount' : 1, 'method' : method, 'etype' : etype, 'engine' : engine,
|
|
247
|
+
'org_type' : wurec['org_type'], 'country' : wurec['country'],
|
|
248
|
+
'email' : wurec['email']}
|
|
249
|
+
else:
|
|
250
|
+
# web usage
|
|
251
|
+
fsize = pgrec['dataset_file_size']
|
|
252
|
+
if not fsize: fsize = pgrec['logic_file_size']
|
|
253
|
+
method = 'CDP'
|
|
254
|
+
if pgrec['subset_file_size'] or pgrec['range_request'] or dsize < fsize:
|
|
255
|
+
wkey = "{}:{}:{}".format(ip, rdaid, wfile)
|
|
256
|
+
else:
|
|
257
|
+
wkey = None
|
|
258
|
+
|
|
259
|
+
if wrec:
|
|
260
|
+
if wkey == pwkey:
|
|
261
|
+
wrec['size'] += dsize
|
|
262
|
+
continue
|
|
263
|
+
wcnt += add_webfile_usage(year, wrec)
|
|
264
|
+
wurec = get_wuser_record(ip, cdate, skipwuid = False)
|
|
265
|
+
if not wurec: return 0
|
|
266
|
+
wrec = {'ip' : ip, 'dsid' : wfrec['dsid'], 'wid' : wfrec['wid'], 'date' : date,
|
|
267
|
+
'time' : time, 'quarter' : quarter, 'size' : dsize,
|
|
268
|
+
'locflag' : 'C', 'method' : method}
|
|
269
|
+
pwkey = wkey
|
|
270
|
+
if not pwkey:
|
|
271
|
+
wcnt += add_webfile_usage(year, wrec)
|
|
272
|
+
wrec = None
|
|
273
|
+
|
|
274
|
+
if trecs: tcnt += add_tdsusage_records(year, trecs, cdate)
|
|
275
|
+
if wrec: wcnt += add_webfile_usage(year, wrec)
|
|
276
|
+
atcnt += tcnt
|
|
277
|
+
awcnt += wcnt
|
|
278
|
+
allcnt += pgcnt
|
|
279
|
+
PgLOG.pglog("{}/{} TDS/WEB usage records added for {} CDG entries at {}".format(atcnt, awcnt, allcnt, PgLOG.current_datetime()), PgLOG.LOGWRN)
|
|
280
|
+
|
|
281
|
+
if lcnt > 1: PgLOG.pglog("{}/{} TDS/WEB usage records added for {} CDG entries at {}".format(atcnt, awcnt, allcnt, PgLOG.current_datetime()), PgLOG.LOGWRN)
|
|
282
|
+
|
|
283
|
+
|
|
284
|
+
def get_record_date_time(ctime):
|
|
285
|
+
|
|
286
|
+
ms = re.search(r'^(\d+)/(\w+)/(\d+) (\d+:\d+:\d+)(\.|$)', str(ctime))
|
|
287
|
+
if ms:
|
|
288
|
+
d = int(ms.group(1))
|
|
289
|
+
m = PgUtil.get_month(ms.group(2))
|
|
290
|
+
q = 1 + int((m-1)/3)
|
|
291
|
+
y = ms.group(3)
|
|
292
|
+
t = ms.group(4)
|
|
293
|
+
return (y, q, "{}-{:02}-{:02}".format(y, m, d), t)
|
|
294
|
+
else:
|
|
295
|
+
PgLOG.pglog("time: Invalid date format", PgLOG.LGEREX)
|
|
296
|
+
|
|
297
|
+
def add_tdsusage_records(year, records, date):
|
|
298
|
+
|
|
299
|
+
cnt = 0
|
|
300
|
+
for key in records:
|
|
301
|
+
record = records[key]
|
|
302
|
+
cond = "date = '{}' AND time = '{}' AND ip = '{}'".format(date, record['time'], record['ip'])
|
|
303
|
+
if PgDBI.pgget(USAGE['TDSTBL'], '', cond, PgLOG.LGEREX): continue
|
|
304
|
+
|
|
305
|
+
if add_tds_allusage(year, record):
|
|
306
|
+
cnt += PgDBI.pgadd(USAGE['TDSTBL'], record, PgLOG.LOGWRN)
|
|
307
|
+
|
|
308
|
+
PgLOG.pglog("{}: {} TDS usage records added at {}".format(date, cnt, PgLOG.current_datetime()), PgLOG.LOGWRN)
|
|
309
|
+
|
|
310
|
+
return cnt
|
|
311
|
+
|
|
312
|
+
def add_tds_allusage(year, pgrec):
|
|
313
|
+
|
|
314
|
+
record = {'method' : 'CDP', 'source' : 'C'}
|
|
315
|
+
|
|
316
|
+
for fld in pgrec:
|
|
317
|
+
if re.match(r'^(engine|method|etype|fcount)$', fld): continue
|
|
318
|
+
record[fld] = pgrec[fld]
|
|
319
|
+
|
|
320
|
+
return PgDBI.add_yearly_allusage(year, record)
|
|
321
|
+
|
|
322
|
+
#
|
|
323
|
+
# Fill usage of a single online data file into table dssdb.wusage of DSS PgSQL database
|
|
324
|
+
#
|
|
325
|
+
def add_webfile_usage(year, logrec, wurec):
|
|
326
|
+
|
|
327
|
+
table = "{}_{}".format(USAGE['WEBTBL'], year)
|
|
328
|
+
cond = "wid = {} AND method = '{}' AND date_read = '{}' AND time_read = '{}'".format(logrec['wid'], logrec['method'], logrec['date'], logrec['time'])
|
|
329
|
+
if PgDBI.pgget(table, "", cond, PgLOG.LOGWRN): return 0
|
|
330
|
+
|
|
331
|
+
record = {'wid' : logrec['wid'], 'dsid' : logrec['dsid']}
|
|
332
|
+
record['wuid_read'] = wurec['wuid']
|
|
333
|
+
record['date_read'] = logrec['date']
|
|
334
|
+
record['time_read'] = logrec['time']
|
|
335
|
+
record['size_read'] = logrec['size']
|
|
336
|
+
record['method'] = logrec['method']
|
|
337
|
+
record['locflag'] = logrec['locflag']
|
|
338
|
+
record['ip'] = logrec['ip']
|
|
339
|
+
record['quarter'] = logrec['quarter']
|
|
340
|
+
|
|
341
|
+
if add_web_allusage(year, logrec, wurec):
|
|
342
|
+
return PgDBI.add_yearly_wusage(year, record)
|
|
343
|
+
else:
|
|
344
|
+
return 0
|
|
345
|
+
|
|
346
|
+
def add_web_allusage(year, logrec, wurec):
|
|
347
|
+
|
|
348
|
+
pgrec = {'email' : wurec['email'], 'org_type' : wurec['org_type'], 'country' : wurec['country']}
|
|
349
|
+
pgrec['dsid'] = logrec['dsid']
|
|
350
|
+
pgrec['date'] = logrec['date']
|
|
351
|
+
pgrec['quarter'] = logrec['quarter']
|
|
352
|
+
pgrec['time'] = logrec['time']
|
|
353
|
+
pgrec['size'] = logrec['size']
|
|
354
|
+
pgrec['method'] = logrec['method']
|
|
355
|
+
pgrec['ip'] = logrec['ip']
|
|
356
|
+
pgrec['source'] = 'C'
|
|
357
|
+
return PgDBI.add_yearly_allusage(year, pgrec)
|
|
358
|
+
|
|
359
|
+
#
|
|
360
|
+
# return wfile.wid upon success, 0 otherwise
|
|
361
|
+
#
|
|
362
|
+
def get_wfile_record(dsids, wfile):
|
|
363
|
+
|
|
364
|
+
for dsid in dsids:
|
|
365
|
+
wkey = "{}{}".format(dsid, wfile)
|
|
366
|
+
if wkey in WFILES: return WFILES[wkey]
|
|
367
|
+
wfcond = "wfile like '%{}'".format(wfile)
|
|
368
|
+
pgrec = None
|
|
369
|
+
for dsid in dsids:
|
|
370
|
+
pgrec = PgSplit.pgget_wfile(dsid, "wid", wfcond)
|
|
371
|
+
if pgrec:
|
|
372
|
+
pgrec['dsid'] = dsid
|
|
373
|
+
wkey = "{}{}".format(dsid, wfile)
|
|
374
|
+
WFILES[wkey] = pgrec
|
|
375
|
+
return pgrec
|
|
376
|
+
|
|
377
|
+
for dsid in dsids:
|
|
378
|
+
pgrec = PgDBI.pgget("wfile_delete", "wid, dsid", "{} AND dsid = '{}'".format(wfcond, dsid))
|
|
379
|
+
if not pgrec:
|
|
380
|
+
mvrec = PgDBI.pgget("wmove", "wid, dsid", wfcond)
|
|
381
|
+
if mvrec:
|
|
382
|
+
pgrec = PgSplit.pgget_wfile(mvrec['dsid'], "wid", "wid = {}".format(pgrec['wid']))
|
|
383
|
+
if pgrec: pgrec['dsid'] = mvrec['dsid']
|
|
384
|
+
|
|
385
|
+
if pgrec: WFILES[wkey] = pgrec
|
|
386
|
+
return pgrec
|
|
387
|
+
|
|
388
|
+
# return wuser record upon success, None otherwise
|
|
389
|
+
def get_wuser_record(ip, date, skipwuid = True):
|
|
390
|
+
|
|
391
|
+
if ip in WUSERS: return WUSERS[ip]
|
|
392
|
+
|
|
393
|
+
ipinfo = PgIPInfo.set_ipinfo(ip)
|
|
394
|
+
if not ipinfo: return None
|
|
395
|
+
|
|
396
|
+
record = {'org_type' : ipinfo['org_type'], 'country' : ipinfo['country']}
|
|
397
|
+
email = 'unknown@' + ipinfo['hostname']
|
|
398
|
+
if skipwuid:
|
|
399
|
+
record['email'] = email
|
|
400
|
+
WUSERS[ip] = record
|
|
401
|
+
return record
|
|
402
|
+
|
|
403
|
+
emcond = "email = '{}'".format(email)
|
|
404
|
+
flds = 'wuid, email, org_type, country, start_date'
|
|
405
|
+
pgrec = PgDBI.pgget("wuser", flds, emcond, PgLOG.LOGERR)
|
|
406
|
+
if pgrec:
|
|
407
|
+
if PgUtil.diffdate(pgrec['start_date'], date) > 0:
|
|
408
|
+
pgrec['start_date'] = record['start_date'] = date
|
|
409
|
+
PgDBI.pgupdt('wuser', record, emcond)
|
|
410
|
+
WUSERS[ip] = pgrec
|
|
411
|
+
return pgrec
|
|
412
|
+
|
|
413
|
+
# now add one in
|
|
414
|
+
record['email'] = email
|
|
415
|
+
record['stat_flag'] = 'A'
|
|
416
|
+
record['start_date'] = date
|
|
417
|
+
wuid = PgDBI.pgadd("wuser", record, PgLOG.LOGERR|PgLOG.AUTOID)
|
|
418
|
+
if wuid:
|
|
419
|
+
record['wuid'] = wuid
|
|
420
|
+
PgLOG.pglog("{} Added as wuid({})".format(email, wuid), PgLOG.LGWNEM)
|
|
421
|
+
WUSERS[ip] = record
|
|
422
|
+
return record
|
|
423
|
+
|
|
424
|
+
return None
|
|
425
|
+
|
|
426
|
+
#
|
|
427
|
+
# call main() to start program
|
|
428
|
+
#
|
|
429
|
+
if __name__ == "__main__": main()
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
|
|
2
|
+
Retrieves CDG usage information from GDEX metrics database to
|
|
3
|
+
fill table 'tdsusage' and 'webusage' in PostgreSQL database 'rdadb'.
|
|
4
|
+
|
|
5
|
+
Usage: fillcdgusage [-b] -s DatasetShortNames [-m MonthList] [-N NumberDays] [-y YearList]
|
|
6
|
+
|
|
7
|
+
select option -s and one of the options, -m, -N or -y each time to run
|
|
8
|
+
this application.
|
|
9
|
+
|
|
10
|
+
- Option -b, log process information into logfile only;
|
|
11
|
+
|
|
12
|
+
- Option -s, retrieve usage info for given dataset short names;
|
|
13
|
+
|
|
14
|
+
- Option -m, retrieve usage info in given months (YYYY-MM);
|
|
15
|
+
|
|
16
|
+
- Option -N, retrieve usage info in recent Number of days;
|
|
17
|
+
|
|
18
|
+
- Option -y, retrieve usage info in given years (YYYY).
|
{rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/src/rda_python_metrics/fillglobususage.usg
RENAMED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
|
|
2
2
|
Retrieves usage information from GLlobus Server logs under directory
|
|
3
3
|
/gpfs/fs1/collections/rda/work/logs/gridftp/ to fill table 'wusage' in
|
|
4
|
-
database '
|
|
4
|
+
database 'rdadb'.
|
|
5
5
|
|
|
6
6
|
Usage: fillglobususage [-b] [-d LogFileDates] [-N NumberDay] [-p BeginDate [Enddate]]
|
|
7
7
|
|
{rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/src/rda_python_metrics.egg-info/SOURCES.txt
RENAMED
|
@@ -7,6 +7,8 @@ src/rda_python_metrics/PgView.py
|
|
|
7
7
|
src/rda_python_metrics/__init__.py
|
|
8
8
|
src/rda_python_metrics/fillawsusage.py
|
|
9
9
|
src/rda_python_metrics/fillawsusage.usg
|
|
10
|
+
src/rda_python_metrics/fillcdgusage.py
|
|
11
|
+
src/rda_python_metrics/fillcdgusage.usg
|
|
10
12
|
src/rda_python_metrics/fillcodusage.py
|
|
11
13
|
src/rda_python_metrics/fillcodusage.usg
|
|
12
14
|
src/rda_python_metrics/fillcountry.py
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
[console_scripts]
|
|
2
2
|
fillawsusage = rda_python_metrics.fillaswusage:main
|
|
3
|
+
fillcdgusage = rda_python_metrics.fillcdgusage:main
|
|
3
4
|
fillcodusage = rda_python_metrics.fillcodusage:main
|
|
4
5
|
fillcountry = rda_python_metrics.fillcountry:main
|
|
5
6
|
fillendtime = rda_python_metrics.fillendtime:main
|
|
@@ -1,19 +0,0 @@
|
|
|
1
|
-
include src/rda_python_metrics/fillawsusage.usg
|
|
2
|
-
include src/rda_python_metrics/fillcodusage.usg
|
|
3
|
-
include src/rda_python_metrics/fillglobususage.usg
|
|
4
|
-
include src/rda_python_metrics/fillipinfo.usg
|
|
5
|
-
include src/rda_python_metrics/filloneorder.usg
|
|
6
|
-
include src/rda_python_metrics/fillosdfusage.usg
|
|
7
|
-
include src/rda_python_metrics/fillrdadb.usg
|
|
8
|
-
include src/rda_python_metrics/filltdsusage.usg
|
|
9
|
-
include src/rda_python_metrics/filluser.usg
|
|
10
|
-
include src/rda_python_metrics/logarch.usg
|
|
11
|
-
include src/rda_python_metrics/viewallusage.usg
|
|
12
|
-
include src/rda_python_metrics/viewcheckusage.usg
|
|
13
|
-
include src/rda_python_metrics/viewcodusage.usg
|
|
14
|
-
include src/rda_python_metrics/viewordusage.usg
|
|
15
|
-
include src/rda_python_metrics/viewrqstusage.usg
|
|
16
|
-
include src/rda_python_metrics/viewtdsusage.usg
|
|
17
|
-
include src/rda_python_metrics/viewwebfile.usg
|
|
18
|
-
include src/rda_python_metrics/viewallusage.usg
|
|
19
|
-
include src/rda_python_metrics/viewwebusage.usg
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/src/rda_python_metrics/fillawsusage.py
RENAMED
|
File without changes
|
{rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/src/rda_python_metrics/fillcodusage.py
RENAMED
|
File without changes
|
{rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/src/rda_python_metrics/fillcountry.py
RENAMED
|
File without changes
|
{rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/src/rda_python_metrics/fillendtime.py
RENAMED
|
File without changes
|
{rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/src/rda_python_metrics/fillglobususage.py
RENAMED
|
File without changes
|
|
File without changes
|
{rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/src/rda_python_metrics/fillipinfo.usg
RENAMED
|
File without changes
|
{rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/src/rda_python_metrics/filloneorder.py
RENAMED
|
File without changes
|
{rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/src/rda_python_metrics/filloneorder.usg
RENAMED
|
File without changes
|
{rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/src/rda_python_metrics/fillosdfusage.py
RENAMED
|
File without changes
|
{rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/src/rda_python_metrics/fillosdfusage.usg
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/src/rda_python_metrics/filltdsusage.py
RENAMED
|
File without changes
|
{rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/src/rda_python_metrics/filltdsusage.usg
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/src/rda_python_metrics/viewallusage.py
RENAMED
|
File without changes
|
{rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/src/rda_python_metrics/viewallusage.usg
RENAMED
|
File without changes
|
{rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/src/rda_python_metrics/viewcheckusage.py
RENAMED
|
File without changes
|
{rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/src/rda_python_metrics/viewcheckusage.usg
RENAMED
|
File without changes
|
{rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/src/rda_python_metrics/viewcodusage.py
RENAMED
|
File without changes
|
{rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/src/rda_python_metrics/viewcodusage.usg
RENAMED
|
File without changes
|
{rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/src/rda_python_metrics/viewordusage.py
RENAMED
|
File without changes
|
{rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/src/rda_python_metrics/viewordusage.usg
RENAMED
|
File without changes
|
{rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/src/rda_python_metrics/viewrqstusage.py
RENAMED
|
File without changes
|
{rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/src/rda_python_metrics/viewrqstusage.usg
RENAMED
|
File without changes
|
{rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/src/rda_python_metrics/viewtdsusage.py
RENAMED
|
File without changes
|
{rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/src/rda_python_metrics/viewtdsusage.usg
RENAMED
|
File without changes
|
{rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/src/rda_python_metrics/viewwebfile.py
RENAMED
|
File without changes
|
{rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/src/rda_python_metrics/viewwebfile.usg
RENAMED
|
File without changes
|
{rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/src/rda_python_metrics/viewwebusage.py
RENAMED
|
File without changes
|
{rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/src/rda_python_metrics/viewwebusage.usg
RENAMED
|
File without changes
|
|
File without changes
|
{rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/src/rda_python_metrics.egg-info/requires.txt
RENAMED
|
File without changes
|
{rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/src/rda_python_metrics.egg-info/top_level.txt
RENAMED
|
File without changes
|