rda-python-metrics 1.0.35__tar.gz → 1.0.37__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of rda-python-metrics might be problematic. Click here for more details.
- {rda_python_metrics-1.0.35/src/rda_python_metrics.egg-info → rda_python_metrics-1.0.37}/PKG-INFO +1 -1
- {rda_python_metrics-1.0.35 → rda_python_metrics-1.0.37}/pyproject.toml +3 -1
- {rda_python_metrics-1.0.35 → rda_python_metrics-1.0.37}/src/rda_python_metrics/fillawsusage.py +3 -2
- {rda_python_metrics-1.0.35 → rda_python_metrics-1.0.37}/src/rda_python_metrics/fillglobususage.py +4 -8
- rda_python_metrics-1.0.37/src/rda_python_metrics/viewawsusage.py +321 -0
- rda_python_metrics-1.0.37/src/rda_python_metrics/viewawsusage.usg +190 -0
- rda_python_metrics-1.0.37/src/rda_python_metrics/viewosdfusage.py +321 -0
- rda_python_metrics-1.0.37/src/rda_python_metrics/viewosdfusage.usg +190 -0
- {rda_python_metrics-1.0.35 → rda_python_metrics-1.0.37/src/rda_python_metrics.egg-info}/PKG-INFO +1 -1
- {rda_python_metrics-1.0.35 → rda_python_metrics-1.0.37}/src/rda_python_metrics.egg-info/SOURCES.txt +4 -0
- {rda_python_metrics-1.0.35 → rda_python_metrics-1.0.37}/src/rda_python_metrics.egg-info/entry_points.txt +2 -0
- {rda_python_metrics-1.0.35 → rda_python_metrics-1.0.37}/LICENSE +0 -0
- {rda_python_metrics-1.0.35 → rda_python_metrics-1.0.37}/MANIFEST.in +0 -0
- {rda_python_metrics-1.0.35 → rda_python_metrics-1.0.37}/README.md +0 -0
- {rda_python_metrics-1.0.35 → rda_python_metrics-1.0.37}/setup.cfg +0 -0
- {rda_python_metrics-1.0.35 → rda_python_metrics-1.0.37}/src/rda_python_metrics/PgIPInfo.py +0 -0
- {rda_python_metrics-1.0.35 → rda_python_metrics-1.0.37}/src/rda_python_metrics/PgView.py +0 -0
- {rda_python_metrics-1.0.35 → rda_python_metrics-1.0.37}/src/rda_python_metrics/__init__.py +0 -0
- {rda_python_metrics-1.0.35 → rda_python_metrics-1.0.37}/src/rda_python_metrics/fillawsusage.usg +0 -0
- {rda_python_metrics-1.0.35 → rda_python_metrics-1.0.37}/src/rda_python_metrics/fillcdgusage.py +0 -0
- {rda_python_metrics-1.0.35 → rda_python_metrics-1.0.37}/src/rda_python_metrics/fillcdgusage.usg +0 -0
- {rda_python_metrics-1.0.35 → rda_python_metrics-1.0.37}/src/rda_python_metrics/fillcodusage.py +0 -0
- {rda_python_metrics-1.0.35 → rda_python_metrics-1.0.37}/src/rda_python_metrics/fillcodusage.usg +0 -0
- {rda_python_metrics-1.0.35 → rda_python_metrics-1.0.37}/src/rda_python_metrics/fillcountry.py +0 -0
- {rda_python_metrics-1.0.35 → rda_python_metrics-1.0.37}/src/rda_python_metrics/fillendtime.py +0 -0
- {rda_python_metrics-1.0.35 → rda_python_metrics-1.0.37}/src/rda_python_metrics/fillgdexusage.py +0 -0
- {rda_python_metrics-1.0.35 → rda_python_metrics-1.0.37}/src/rda_python_metrics/fillgdexusage.usg +0 -0
- {rda_python_metrics-1.0.35 → rda_python_metrics-1.0.37}/src/rda_python_metrics/fillglobususage.usg +0 -0
- {rda_python_metrics-1.0.35 → rda_python_metrics-1.0.37}/src/rda_python_metrics/fillipinfo.py +0 -0
- {rda_python_metrics-1.0.35 → rda_python_metrics-1.0.37}/src/rda_python_metrics/fillipinfo.usg +0 -0
- {rda_python_metrics-1.0.35 → rda_python_metrics-1.0.37}/src/rda_python_metrics/filloneorder.py +0 -0
- {rda_python_metrics-1.0.35 → rda_python_metrics-1.0.37}/src/rda_python_metrics/filloneorder.usg +0 -0
- {rda_python_metrics-1.0.35 → rda_python_metrics-1.0.37}/src/rda_python_metrics/fillosdfusage.py +0 -0
- {rda_python_metrics-1.0.35 → rda_python_metrics-1.0.37}/src/rda_python_metrics/fillosdfusage.usg +0 -0
- {rda_python_metrics-1.0.35 → rda_python_metrics-1.0.37}/src/rda_python_metrics/fillrdadb.py +0 -0
- {rda_python_metrics-1.0.35 → rda_python_metrics-1.0.37}/src/rda_python_metrics/fillrdadb.usg +0 -0
- {rda_python_metrics-1.0.35 → rda_python_metrics-1.0.37}/src/rda_python_metrics/filltdsusage.py +0 -0
- {rda_python_metrics-1.0.35 → rda_python_metrics-1.0.37}/src/rda_python_metrics/filltdsusage.usg +0 -0
- {rda_python_metrics-1.0.35 → rda_python_metrics-1.0.37}/src/rda_python_metrics/filluser.py +0 -0
- {rda_python_metrics-1.0.35 → rda_python_metrics-1.0.37}/src/rda_python_metrics/filluser.usg +0 -0
- {rda_python_metrics-1.0.35 → rda_python_metrics-1.0.37}/src/rda_python_metrics/logarch.py +0 -0
- {rda_python_metrics-1.0.35 → rda_python_metrics-1.0.37}/src/rda_python_metrics/logarch.usg +0 -0
- {rda_python_metrics-1.0.35 → rda_python_metrics-1.0.37}/src/rda_python_metrics/pgperson.py +0 -0
- {rda_python_metrics-1.0.35 → rda_python_metrics-1.0.37}/src/rda_python_metrics/pgsyspath.py +0 -0
- {rda_python_metrics-1.0.35 → rda_python_metrics-1.0.37}/src/rda_python_metrics/pgusername.py +0 -0
- {rda_python_metrics-1.0.35 → rda_python_metrics-1.0.37}/src/rda_python_metrics/viewallusage.py +0 -0
- {rda_python_metrics-1.0.35 → rda_python_metrics-1.0.37}/src/rda_python_metrics/viewallusage.usg +0 -0
- {rda_python_metrics-1.0.35 → rda_python_metrics-1.0.37}/src/rda_python_metrics/viewcheckusage.py +0 -0
- {rda_python_metrics-1.0.35 → rda_python_metrics-1.0.37}/src/rda_python_metrics/viewcheckusage.usg +0 -0
- {rda_python_metrics-1.0.35 → rda_python_metrics-1.0.37}/src/rda_python_metrics/viewcodusage.py +0 -0
- {rda_python_metrics-1.0.35 → rda_python_metrics-1.0.37}/src/rda_python_metrics/viewcodusage.usg +0 -0
- {rda_python_metrics-1.0.35 → rda_python_metrics-1.0.37}/src/rda_python_metrics/viewordusage.py +0 -0
- {rda_python_metrics-1.0.35 → rda_python_metrics-1.0.37}/src/rda_python_metrics/viewordusage.usg +0 -0
- {rda_python_metrics-1.0.35 → rda_python_metrics-1.0.37}/src/rda_python_metrics/viewrqstusage.py +0 -0
- {rda_python_metrics-1.0.35 → rda_python_metrics-1.0.37}/src/rda_python_metrics/viewrqstusage.usg +0 -0
- {rda_python_metrics-1.0.35 → rda_python_metrics-1.0.37}/src/rda_python_metrics/viewtdsusage.py +0 -0
- {rda_python_metrics-1.0.35 → rda_python_metrics-1.0.37}/src/rda_python_metrics/viewtdsusage.usg +0 -0
- {rda_python_metrics-1.0.35 → rda_python_metrics-1.0.37}/src/rda_python_metrics/viewwebfile.py +0 -0
- {rda_python_metrics-1.0.35 → rda_python_metrics-1.0.37}/src/rda_python_metrics/viewwebfile.usg +0 -0
- {rda_python_metrics-1.0.35 → rda_python_metrics-1.0.37}/src/rda_python_metrics/viewwebusage.py +0 -0
- {rda_python_metrics-1.0.35 → rda_python_metrics-1.0.37}/src/rda_python_metrics/viewwebusage.usg +0 -0
- {rda_python_metrics-1.0.35 → rda_python_metrics-1.0.37}/src/rda_python_metrics.egg-info/dependency_links.txt +0 -0
- {rda_python_metrics-1.0.35 → rda_python_metrics-1.0.37}/src/rda_python_metrics.egg-info/requires.txt +0 -0
- {rda_python_metrics-1.0.35 → rda_python_metrics-1.0.37}/src/rda_python_metrics.egg-info/top_level.txt +0 -0
- {rda_python_metrics-1.0.35 → rda_python_metrics-1.0.37}/tests/test_metrics.py +0 -0
|
@@ -6,7 +6,7 @@ build-backend = "setuptools.build_meta"
|
|
|
6
6
|
|
|
7
7
|
[project]
|
|
8
8
|
name = "rda_python_metrics"
|
|
9
|
-
version = "1.0.
|
|
9
|
+
version = "1.0.37"
|
|
10
10
|
authors = [
|
|
11
11
|
{ name="Zaihua Ji", email="zji@ucar.edu" },
|
|
12
12
|
]
|
|
@@ -54,9 +54,11 @@ pythonpath = [
|
|
|
54
54
|
"pgperson" = "rda_python_metrics.pgperson:main"
|
|
55
55
|
"pgusername" = "rda_python_metrics.pgusername:main"
|
|
56
56
|
"viewallusage" = "rda_python_metrics.viewallusage:main"
|
|
57
|
+
"viewawsusage" = "rda_python_metrics.viewawsusage:main"
|
|
57
58
|
"viewcheckusage" = "rda_python_metrics.viewcheckusage:main"
|
|
58
59
|
"viewcodusage" = "rda_python_metrics.viewcodusage:main"
|
|
59
60
|
"viewordusage" = "rda_python_metrics.viewordusage:main"
|
|
61
|
+
"viewosdfusage" = "rda_python_metrics.viewosdfusage:main"
|
|
60
62
|
"viewrqstusage" = "rda_python_metrics.viewrqstusage:main"
|
|
61
63
|
"viewtdsusage" = "rda_python_metrics.viewtdsusage:main"
|
|
62
64
|
"viewwebfile" = "rda_python_metrics.viewwebfile:main"
|
{rda_python_metrics-1.0.35 → rda_python_metrics-1.0.37}/src/rda_python_metrics/fillawsusage.py
RENAMED
|
@@ -94,7 +94,7 @@ def get_log_file_names(option, params):
|
|
|
94
94
|
else:
|
|
95
95
|
pdate = PgUtil.format_date(params[0])
|
|
96
96
|
if len(params) > 1:
|
|
97
|
-
edate = PgUtil.format_date(params[1])
|
|
97
|
+
edate = PgUtil.adddate(PgUtil.format_date(params[1]), 0, 0, 1)
|
|
98
98
|
else:
|
|
99
99
|
edate = PgUtil.curdate()
|
|
100
100
|
while pdate < edate:
|
|
@@ -114,13 +114,14 @@ def fill_aws_usages(filenames):
|
|
|
114
114
|
year = cntall = addall = 0
|
|
115
115
|
for pdate in filenames:
|
|
116
116
|
fnames = filenames[pdate]
|
|
117
|
+
fcnt = len(fnames)
|
|
118
|
+
PgLOG.pglog("{}: Gathering AWS usage info from {} log files at {}".format(pdate, fcnt, PgLOG.current_datetime()), PgLOG.LOGWRN)
|
|
117
119
|
records = {}
|
|
118
120
|
cntadd = entcnt = 0
|
|
119
121
|
for logfile in fnames:
|
|
120
122
|
if not op.isfile(logfile):
|
|
121
123
|
PgLOG.pglog("{}: Not exists for Gathering AWS usage".format(logfile), PgLOG.LOGWRN)
|
|
122
124
|
continue
|
|
123
|
-
PgLOG.pglog("Gathering AWS usage info from {} at {}".format(logfile, PgLOG.current_datetime()), PgLOG.LOGWRN)
|
|
124
125
|
aws = PgFile.open_local_file(logfile)
|
|
125
126
|
if not aws: continue
|
|
126
127
|
while True:
|
{rda_python_metrics-1.0.35 → rda_python_metrics-1.0.37}/src/rda_python_metrics/fillglobususage.py
RENAMED
|
@@ -147,18 +147,14 @@ def fill_globus_usages(fnames, datelimits):
|
|
|
147
147
|
locflag = 'O' if re.match(r'^https://stratus\.', sline) else 'G'
|
|
148
148
|
idx = wfile.find('?')
|
|
149
149
|
if idx > -1: wfile = wfile[:idx]
|
|
150
|
-
|
|
151
|
-
if
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
method = "WGET"
|
|
155
|
-
elif re.match(r'^python', engine, re.I):
|
|
156
|
-
method = "PYTHN"
|
|
150
|
+
moff = engine.find('/')
|
|
151
|
+
if moff > 0:
|
|
152
|
+
if moff > 20: moff = 20
|
|
153
|
+
method = engine[0:moff].upper()
|
|
157
154
|
else:
|
|
158
155
|
method = "WEB"
|
|
159
156
|
|
|
160
157
|
key = "{}:{}:{}".format(ip, dsid, wfile) if stat == '206' else None
|
|
161
|
-
|
|
162
158
|
if record:
|
|
163
159
|
if key == pkey:
|
|
164
160
|
record['size'] += size
|
|
@@ -0,0 +1,321 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
#
|
|
3
|
+
###############################################################################
|
|
4
|
+
#
|
|
5
|
+
# Title : viewawsusage
|
|
6
|
+
# Author : Zaihua Ji, zji@ucar.edu
|
|
7
|
+
# Date : 2025-08-13
|
|
8
|
+
# Purpose : python program to view aws usage information
|
|
9
|
+
#
|
|
10
|
+
# Github : https://github.com/NCAR/rda-python-metrics.git
|
|
11
|
+
#
|
|
12
|
+
###############################################################################
|
|
13
|
+
#
|
|
14
|
+
import os
|
|
15
|
+
import re
|
|
16
|
+
import sys
|
|
17
|
+
from rda_python_common import PgLOG
|
|
18
|
+
from rda_python_common import PgUtil
|
|
19
|
+
from rda_python_common import PgDBI
|
|
20
|
+
from . import PgView
|
|
21
|
+
|
|
22
|
+
VUSG = {
|
|
23
|
+
'SNMS' : "ABCDEHIKMNOPQRSTUWY", # all available short field names in %FLDS
|
|
24
|
+
'OPTS' : 'AabcCdDeEhHikLmMnoOqsStTUwyz', # all available options, used for %params
|
|
25
|
+
'NOPT' : 'abhnwz', # stand alone option without inputs
|
|
26
|
+
'ACND' : 'cdeiIkmMoqSty', # available array condition options
|
|
27
|
+
'RCND' : 'DEsT', # available range condition options
|
|
28
|
+
'CNDS' : 'acdDeEiIkmMnoqsStTy', # condition options, ACND, RCND and 'a'
|
|
29
|
+
'ECND' : 'my', # condition options need evaluating
|
|
30
|
+
'SFLD' : 'DEIKNOTUW', # string fields, to be quoted in condition
|
|
31
|
+
'UFLD' : 'NO', # string fields must be in upper case
|
|
32
|
+
'LFLD' : 'EMPT' # string fields must be in lower case
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
# keys %FLDS - short field names
|
|
36
|
+
# column 0 - column title showing in usage view
|
|
37
|
+
# column 1 - field name in format as shown in select clauses
|
|
38
|
+
# column 2 - field name shown in where condition query string
|
|
39
|
+
# column 3 - table name that the field belongs to
|
|
40
|
+
# column 4 - output field length, the longer one of data size and comlun title, determine
|
|
41
|
+
# dynamically if it is 0. Negative values indicate right justification
|
|
42
|
+
# column 5 - precision for floating point value if positive and show total value if not zero
|
|
43
|
+
# column 6 - field flag to indicate it is a group, distinct or sum field
|
|
44
|
+
FLDS = {
|
|
45
|
+
# SHRTNM COLUMNNANE FIELDNAME CNDNAME TBLNAM Size Prc Grp/Sum
|
|
46
|
+
'D' : ['DATE', "date", 'date', 'awsusage', 10, 0, 'G'],
|
|
47
|
+
'E' : ['EMAIL', "awsusage.email", 'awsusage.email', 'awsusage', 0, 0, 'G'],
|
|
48
|
+
'I' : ['IP', "ip", 'ip', 'awsusage', 0, 0, 'G'],
|
|
49
|
+
'M' : ['MONTH', PgDBI.fmtym("date"), 'date', 'awsusage', 7, 0, 'G'],
|
|
50
|
+
'N' : ['COUNTRY', "country", 'country', 'awsusage', 0, 0, 'G'],
|
|
51
|
+
'K' : ['REGION', "region", 'region', 'awsusage', 0, 0, 'G'],
|
|
52
|
+
'O' : ['ORGTYPE', "org_type", 'org_type', 'awsusage', 7, 0, 'G'],
|
|
53
|
+
'P' : ['DSOWNER', "specialist", 'specialist', 'dsowner', 8, 0, 'G'],
|
|
54
|
+
'Q' : ['QUARTER', "quarter", 'quarter', 'awsusage', 7, 0, 'G'],
|
|
55
|
+
'R' : ['DSTITLE', "search.datasets.title", 'search.datasets.title', 'search.datasets', 0, 0, 'G'],
|
|
56
|
+
'S' : ['BYTESIZE', "size", 'size', 'awsusage', -14, -1, 'G'],
|
|
57
|
+
'T' : ['DATASET', "awsusage.dsid", 'awsusage.dsid', 'awsusage', 0, 0, 'G'],
|
|
58
|
+
'W' : ['METHOD', "method", 'method', 'awsusage', 0, 0, 'G'],
|
|
59
|
+
'Y' : ['YEAR', PgDBI.fmtyr("date"), 'date', 'awsusage', 4, 0, 'G'],
|
|
60
|
+
'A' : ['DSCOUNT', "awsusage.dsid", 'A', 'awsusage', -7, -1, 'D'],
|
|
61
|
+
'B' : ['MBYTEREAD', "round(sum(size)/(1000000), 4)", 'B', 'awsusage', -14, 3, 'S'],
|
|
62
|
+
'C' : ['#UNIQUSER', "awsusage.email", 'C', 'awsusage', -9, -1, 'D'],
|
|
63
|
+
'U' : ['#UNIQIP', "awsusage.ip", 'U', 'awsusage', -7, -1, 'D'],
|
|
64
|
+
'H' : ['#ACCESS', "sum(fcount)", 'H', 'awsusage', -8, -1, 'S'],
|
|
65
|
+
'X' : ['INDEX', "", 'X', '', -6, 0, ' ']
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
# keys %EXPAND - short field names allow zero usage
|
|
69
|
+
# column 0 - expand ID for group of fields
|
|
70
|
+
# column 1 - field name shown in where condition query string
|
|
71
|
+
# column 2 - field name in format as shown in select clauses
|
|
72
|
+
# column 3 - table name that the field belongs to
|
|
73
|
+
EXPAND = {
|
|
74
|
+
# SHRTNM EXPID CNDSTR FIELDNAME TBLNAM
|
|
75
|
+
'D' : ["TIME", "dDmy"],
|
|
76
|
+
'M' : ["TIME", "dDmy"],
|
|
77
|
+
'Q' : ["TIME", "dDmy"],
|
|
78
|
+
'Y' : ["TIME", "dDmy"],
|
|
79
|
+
|
|
80
|
+
'E' : ["USER", "ecko", "email", "wuser", "user"],
|
|
81
|
+
'O' : ["USER", "ecko", "org_type", "wuser", "user"],
|
|
82
|
+
'N' : ["USER", "ecko", "country", "wuser", "user"],
|
|
83
|
+
'K' : ["USER", "ecko", "region", "wuser", "user"],
|
|
84
|
+
|
|
85
|
+
'R' : ["DSID", "StT", "search.datasets.title", "search.datasets"],
|
|
86
|
+
'T' : ["DSID", "StT", "dataset.dsid", "dataset"],
|
|
87
|
+
'P' : ["DSID", "StT", "specialist", "dsowner"],
|
|
88
|
+
|
|
89
|
+
'W' : ["METHOD", "M", "method", "awsusage"]
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
# valid options for %params, a hash array of command line parameters
|
|
93
|
+
# a -- 1 to view all usage info available
|
|
94
|
+
# A -- number or records to return
|
|
95
|
+
# c -- array of specified country codes
|
|
96
|
+
# C -- a string of short field names for viewing usages
|
|
97
|
+
# d -- array of specified dates
|
|
98
|
+
# D -- dates range, array of 1 or 2 dates in format of YYYY-MM-DD
|
|
99
|
+
# e -- array of specified email addresses
|
|
100
|
+
# E -- use given date or date range for email notice of data update
|
|
101
|
+
# h -- for give emails, include their histical emails registered before
|
|
102
|
+
# H -- a string of report title to replace the default one
|
|
103
|
+
# i -- array of specified IP addresses
|
|
104
|
+
# I -- use given email IDs for email notice of data update
|
|
105
|
+
# k -- array of specified region names
|
|
106
|
+
# L -- column delimiter for output
|
|
107
|
+
# m -- array of specified months
|
|
108
|
+
# M -- array of specified download methods
|
|
109
|
+
# o -- array of specified orginization types
|
|
110
|
+
# O -- a string of short field names for sorting on
|
|
111
|
+
# q -- array of the specified quarters, normally combined with years
|
|
112
|
+
# s -- size range, arrage of 1 or 2 sizes in unit of MByte
|
|
113
|
+
# S -- array of login names of specialists who owns the datasets
|
|
114
|
+
# t -- array of specified dataset names
|
|
115
|
+
# T -- dataset range, array of 1 or 2 dataset names
|
|
116
|
+
# U -- given unit for file or data sizes
|
|
117
|
+
# w -- generate view without totals
|
|
118
|
+
# y -- array of specified years
|
|
119
|
+
# z -- generate view including entries without usage
|
|
120
|
+
|
|
121
|
+
params = {}
|
|
122
|
+
|
|
123
|
+
# relationship between parameter options and short field names, A option is not
|
|
124
|
+
# related to a field name if it is not in keys %SNS
|
|
125
|
+
SNS = {
|
|
126
|
+
'c' : 'N', 'd' : 'D', 'D' : 'D', 'e' : 'E', 'i' : 'I', 'k' : 'K', 'm' : 'M',
|
|
127
|
+
'M' : 'W', 'o' : 'O', 'q' : 'Q', 's' : 'S', 'S' : 'P', 't' : 'T', 'T' : 'T', 'y' : 'Y'
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
tablenames = fieldnames = condition = ''
|
|
131
|
+
sfields = []
|
|
132
|
+
gfields = []
|
|
133
|
+
dfields = []
|
|
134
|
+
pgname = 'viewawsusage'
|
|
135
|
+
|
|
136
|
+
#
|
|
137
|
+
# main function to run this program
|
|
138
|
+
#
|
|
139
|
+
def main():
|
|
140
|
+
|
|
141
|
+
PgDBI.view_dbinfo()
|
|
142
|
+
argv = sys.argv[1:]
|
|
143
|
+
inputs = []
|
|
144
|
+
option = 'C' # default option
|
|
145
|
+
|
|
146
|
+
for arg in argv:
|
|
147
|
+
if re.match(r'^-.*$', arg):
|
|
148
|
+
curopt = arg[1:2]
|
|
149
|
+
if curopt and VUSG['OPTS'].find(curopt) > -1:
|
|
150
|
+
if VUSG['NOPT'].find(option) > -1:
|
|
151
|
+
params[option] = 1
|
|
152
|
+
elif inputs:
|
|
153
|
+
params[option]= inputs # record input array
|
|
154
|
+
inputs = [] # empty input array
|
|
155
|
+
option = curopt # start a new option
|
|
156
|
+
else:
|
|
157
|
+
PgLOG.pglog(arg + ": Unknown Option", PgLOG.LGWNEX)
|
|
158
|
+
else:
|
|
159
|
+
val = arg
|
|
160
|
+
if val != '!':
|
|
161
|
+
if option == 's':
|
|
162
|
+
val = int(val)*1000000 # convert MBytes to Bytes
|
|
163
|
+
elif option in SNS:
|
|
164
|
+
sfld = SNS[option]
|
|
165
|
+
if VUSG['SFLD'].find(sfld) > -1:
|
|
166
|
+
if VUSG['UFLD'].find(sfld) > -1:
|
|
167
|
+
val = arg.upper() # in case not in upper case
|
|
168
|
+
elif VUSG['LFLD'].find(sfld) > -1:
|
|
169
|
+
val = arg.lower() # in case not in lower case
|
|
170
|
+
if option == 'c':
|
|
171
|
+
val = PgView.get_country_name(val)
|
|
172
|
+
elif option == 't' or option == 'T':
|
|
173
|
+
val = PgUtil.format_dataset_id(val) # add 'ds' if only numbers
|
|
174
|
+
val = "'{}'".format(val)
|
|
175
|
+
inputs.append(val)
|
|
176
|
+
|
|
177
|
+
# record the last option
|
|
178
|
+
if VUSG['NOPT'].find(option) > -1:
|
|
179
|
+
params[option] = 1
|
|
180
|
+
elif inputs:
|
|
181
|
+
params[option] = inputs # record input array
|
|
182
|
+
|
|
183
|
+
if not params:
|
|
184
|
+
PgLOG.show_usage(pgname)
|
|
185
|
+
else:
|
|
186
|
+
check_enough_options()
|
|
187
|
+
|
|
188
|
+
if 'o' not in params:
|
|
189
|
+
if 'e' not in params:
|
|
190
|
+
params['o'] = ['!', "'DSS'"] # default to exclude 'DSS' for organization
|
|
191
|
+
elif params['o'][0] == "'ALL'":
|
|
192
|
+
del params['o']
|
|
193
|
+
|
|
194
|
+
usgtable = "awsusage"
|
|
195
|
+
build_query_strings(usgtable) # build tablenames, fieldnames, and conditions
|
|
196
|
+
records = PgDBI.pgmget(tablenames, fieldnames, condition, PgLOG.UCLWEX)
|
|
197
|
+
if not records: PgLOG.pglog("No Usage Found For Given Conditions", PgLOG.LGWNEX)
|
|
198
|
+
totals = None if 'w' in params else {}
|
|
199
|
+
if dfields or totals != None:
|
|
200
|
+
records = PgView.compact_hash_groups(records, gfields, sfields, dfields, totals)
|
|
201
|
+
if 'z' in params: records = expand_records(records)
|
|
202
|
+
ostr = params['O'][0] if 'O' in params else params['C'][0]
|
|
203
|
+
records = PgView.order_records(records, ostr.replace('X', ''))
|
|
204
|
+
PgView.simple_output(params, FLDS, records, totals)
|
|
205
|
+
|
|
206
|
+
PgLOG.pgexit(0)
|
|
207
|
+
|
|
208
|
+
#
|
|
209
|
+
# cehck if enough information entered on command line for generate view/report, exit if not
|
|
210
|
+
#
|
|
211
|
+
def check_enough_options():
|
|
212
|
+
|
|
213
|
+
cols = params['C'][0] if 'C' in params else 'X'
|
|
214
|
+
if cols == 'X': PgLOG.pglog("{}: miss field names '{}'".format(pgname, VUSG['SNMS']), PgLOG.LGWNEX)
|
|
215
|
+
|
|
216
|
+
if cols.find('Q') > -1 and cols.find('Y') < 0: # add Y if Q included
|
|
217
|
+
cols = re.sub('Q', 'YQ', cols)
|
|
218
|
+
params['C'][0] = cols
|
|
219
|
+
|
|
220
|
+
for sn in cols:
|
|
221
|
+
if sn == 'X': continue # do not process INDEX field
|
|
222
|
+
if VUSG['SNMS'].find(sn) < 0:
|
|
223
|
+
PgLOG.pglog("{}: Field {} must be in field names '{}X'".format(pgname, sn, VUSG['SNMS']), PgLOG.LGWNEX)
|
|
224
|
+
if 'z' not in params or sn in EXPAND: continue
|
|
225
|
+
fld = FLDS[sn]
|
|
226
|
+
if fld[6] != 'G': continue
|
|
227
|
+
PgLOG.pglog("{}: cannot show zero usage for unexpandable field {} - {}".formt(pgname, sn, fld[0]), PgLOG.LGWNEX)
|
|
228
|
+
|
|
229
|
+
if 'E' in params or 'I' in params:
|
|
230
|
+
if 'z' in params:
|
|
231
|
+
PgLOG.pglog(pgname + ": option -z and -E/-I can not be present at the same time", PgLOG.LGWNEX)
|
|
232
|
+
elif 't' not in params or len(params['t']) > 1:
|
|
233
|
+
PgLOG.pglog(pgname + ": specify one dataset for viewing usage of notified users", PgLOG.LGWNEX)
|
|
234
|
+
elif 'E' in params and 'I' in params:
|
|
235
|
+
PgLOG.pglog(pgname + ": option -E and -I can not be present at the same time", PgLOG.LGWNEX)
|
|
236
|
+
|
|
237
|
+
for opt in params:
|
|
238
|
+
if VUSG['CNDS'].find(opt) > -1: return
|
|
239
|
+
PgLOG.pglog("{}: miss condition options '{}'".format(pgname, VUSG['CNDS']), PgLOG.LGWNEX)
|
|
240
|
+
|
|
241
|
+
#
|
|
242
|
+
# process parameter options to build aws query strings
|
|
243
|
+
# global variables are used directly and nothing passes in and returns back
|
|
244
|
+
#
|
|
245
|
+
def build_query_strings(usgtable):
|
|
246
|
+
|
|
247
|
+
# initialize query strings
|
|
248
|
+
global condition, fieldnames, tablenames
|
|
249
|
+
joins = groupnames = ''
|
|
250
|
+
tablenames = usgtable
|
|
251
|
+
cols = params['C'][0]
|
|
252
|
+
|
|
253
|
+
if 'U' in params: # reset units for file and read sizes
|
|
254
|
+
if cols.find('B') > -1: FLDS['B'] = PgView.set_data_unit(FLDS['B'], params['U'][0], "sum(size)")
|
|
255
|
+
if cols.find('S') > -1: FLDS['S'] = PgView.set_data_unit(FLDS['S'], params['U'][0], "size")
|
|
256
|
+
|
|
257
|
+
if 'e' in params and 'h' in params: params['e'] = PgView.include_historic_emails(params['e'], 3)
|
|
258
|
+
|
|
259
|
+
for opt in params:
|
|
260
|
+
if opt == 'C': # build field, table and group names
|
|
261
|
+
for sn in cols:
|
|
262
|
+
if sn == 'X': continue # do not process INDEX field
|
|
263
|
+
fld = FLDS[sn]
|
|
264
|
+
if fieldnames: fieldnames += ', '
|
|
265
|
+
fieldnames += "{} {}".format(fld[1], sn) # add to field name string
|
|
266
|
+
(tablenames, joins) = PgView.join_query_tables(fld[3], tablenames, joins, usgtable)
|
|
267
|
+
if fld[6] == 'S':
|
|
268
|
+
sfields.append(sn)
|
|
269
|
+
else:
|
|
270
|
+
if groupnames: groupnames += ', '
|
|
271
|
+
groupnames += sn # add to group name string
|
|
272
|
+
if fld[6] == 'D':
|
|
273
|
+
dfields.append(sn)
|
|
274
|
+
else:
|
|
275
|
+
gfields.append(sn)
|
|
276
|
+
elif opt == 'O':
|
|
277
|
+
continue # order records later
|
|
278
|
+
elif VUSG['CNDS'].find(opt) > -1:
|
|
279
|
+
if VUSG['NOPT'].find(opt) > -1: continue
|
|
280
|
+
sn = SNS[opt]
|
|
281
|
+
fld = FLDS[sn]
|
|
282
|
+
# build having and where conditon strings
|
|
283
|
+
cnd = PgView.get_view_condition(opt, sn, fld, params, VUSG)
|
|
284
|
+
if cnd:
|
|
285
|
+
if condition: condition += ' AND '
|
|
286
|
+
condition += cnd
|
|
287
|
+
(tablenames, joins) = PgView.join_query_tables(fld[3], tablenames, joins, usgtable)
|
|
288
|
+
|
|
289
|
+
|
|
290
|
+
# append joins, group by, order by, and having strings to condition string
|
|
291
|
+
if 'E' in params or 'I' in params:
|
|
292
|
+
(tablenames, joins) = PgView.join_query_tables("emreceive", tablenames, joins, usgtable)
|
|
293
|
+
if joins:
|
|
294
|
+
if condition:
|
|
295
|
+
condition = "{} AND {}".format(joins, condition)
|
|
296
|
+
else:
|
|
297
|
+
condition = joins
|
|
298
|
+
if 'E' in params or 'I' in params:
|
|
299
|
+
condition += PgView.notice_condition(params['E'], None, params['t'][0])
|
|
300
|
+
if groupnames and sfields: condition += " GROUP BY " + groupnames
|
|
301
|
+
|
|
302
|
+
|
|
303
|
+
def expand_records(records):
|
|
304
|
+
|
|
305
|
+
recs = PgView.expand_query("TIME", records, params, EXPAND)
|
|
306
|
+
|
|
307
|
+
trecs = PgView.expand_query("USER", records, params, EXPAND, VUSG, SNS, FLDS)
|
|
308
|
+
recs = PgUtil.crosshash(recs, trecs)
|
|
309
|
+
|
|
310
|
+
trecs = PgView.expand_query("DSID", records, params, EXPAND, VUSG, SNS, FLDS)
|
|
311
|
+
recs = PgUtil.crosshash(recs, trecs)
|
|
312
|
+
|
|
313
|
+
trecs = PgView.expand_query("METHOD", records, params, EXPAND, VUSG, SNS, FLDS)
|
|
314
|
+
recs = PgUtil.crosshash(recs, trecs)
|
|
315
|
+
|
|
316
|
+
return PgUtil.joinhash(records, recs, 0, 1)
|
|
317
|
+
|
|
318
|
+
#
|
|
319
|
+
# call main() to start program
|
|
320
|
+
#
|
|
321
|
+
if __name__ == "__main__": main()
|
|
@@ -0,0 +1,190 @@
|
|
|
1
|
+
|
|
2
|
+
View usage information of AWS Data Services from information
|
|
3
|
+
stored in PostgreSQL database 'RDADB'.
|
|
4
|
+
|
|
5
|
+
Usage: viewawsusage [-C] ColumnNames [-O OrderColumnNames] [-a] \
|
|
6
|
+
[-A RowLimit] [-c CountryCodes] [-d DateList] \
|
|
7
|
+
[-D StartDate [EndDate]] [-e EMailList] -h \
|
|
8
|
+
[-E StartNoticeDate [EndNoticeDate]] \
|
|
9
|
+
[-i IPAddresses] [-I EmailIDList] \
|
|
10
|
+
[-k RegionNames] [-m MonthList] [-M AccessMethods] \
|
|
11
|
+
[-N MinNumberRead [MaxNumberRead]] \
|
|
12
|
+
[-o OrganizationTypes] \
|
|
13
|
+
[-q QuaterList] [-s MinSize [MaxSize]] \
|
|
14
|
+
[-S SpecialistLoginNames] [-t DatasetList] \
|
|
15
|
+
[-T MinDataset [MaxDataset]] [-y YearList] \
|
|
16
|
+
[-H Title] [-L Delimiter] [-U SizeUnit] \
|
|
17
|
+
[-w] [-z] [> OutputFileName] [| lp -d PrinterName]
|
|
18
|
+
|
|
19
|
+
Specify [-C] ColumnNames, refer to Option -C section for detail
|
|
20
|
+
description, and choose at least one of the condition options, -a, -c,
|
|
21
|
+
-d, -D, -e, -E, -i, -I, -k, -m, -M, -N, -o, -q, -s, -S -t, -T, and -y,
|
|
22
|
+
to run this application.
|
|
23
|
+
|
|
24
|
+
For all condition options, except option -a, an '!' sign can be added
|
|
25
|
+
between an option flag and its option values to get an excluding
|
|
26
|
+
condition. For example, choose '-o ! OrganizationTypes' to gather order
|
|
27
|
+
data usage by users from organization types other than the ones given in
|
|
28
|
+
OrganizationTypes. Refer to the example given at the end of this help
|
|
29
|
+
document for how to select excluding condition.
|
|
30
|
+
|
|
31
|
+
String condition options, -c, -e, -g, -i, -k, -M, -o, -S, and -t, allow
|
|
32
|
+
wildcard inputs. '%' matches any number of characters and '_' matches any one
|
|
33
|
+
character. Refer to the example given at the end of this help document
|
|
34
|
+
for how to use wildcard for string condition options.
|
|
35
|
+
|
|
36
|
+
Output of this application is defaulted to page format with a page
|
|
37
|
+
header on each page. A page header includes main title, sub titles and
|
|
38
|
+
column titles according to which column names and options are selected,
|
|
39
|
+
as well as page number and report date. If the output is used directly
|
|
40
|
+
for input of other applications, add option -w to remove page header
|
|
41
|
+
and show only the column titles and the usage information.
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
Column Options:
|
|
45
|
+
- Option -C, the ColumnNames must be present to run this application.
|
|
46
|
+
The flag -C can be omitted if it is the first parameter option on
|
|
47
|
+
the command line. The ColumnNames is a string that includes column
|
|
48
|
+
names listed below:
|
|
49
|
+
|
|
50
|
+
COLUMN - COLUMN - COLUMN
|
|
51
|
+
NAME - TITLE - DESCRIPTION
|
|
52
|
+
GroupColumns:
|
|
53
|
+
D*- DATE - format as YYYY-MM-DD, for example 2004-04-25
|
|
54
|
+
E*- EMAIL - user email address
|
|
55
|
+
I*- IP - user IP address
|
|
56
|
+
M*- MONTH - format as YYYY-MM, for example 2004-04
|
|
57
|
+
N*- COUNTRY - country codes users from
|
|
58
|
+
K*- REGION - region names users from
|
|
59
|
+
O*- ORGTYPE - organization types (DSS, NCAR, UNIV and OTHER)
|
|
60
|
+
P*- DSOWNER - login names of specialists who own the datasets
|
|
61
|
+
Q*- QUARTER - quarter of year, 1, 2, 3, or 4
|
|
62
|
+
R*- DSTITLE - dataset titles
|
|
63
|
+
S - BSIZE - size of data read each time, default to Bytes
|
|
64
|
+
T*- DATASET - format as dsnnn.n, for example d540001
|
|
65
|
+
W*- METHOD - access methods
|
|
66
|
+
Y*- YEAR - format as YYYY, for example 2004
|
|
67
|
+
|
|
68
|
+
* - field names can processed with zero usages
|
|
69
|
+
SummaryColumns:
|
|
70
|
+
A - DSCOUNT - number of datasets in given GroupColumns
|
|
71
|
+
B - MBREAD - data sizes, default MB, read by given GroupColumns
|
|
72
|
+
C - #UNIQUSER - number of unique users in in given GroupColumns
|
|
73
|
+
U - #UNIQIP - number of unique users in in given GroupColumns
|
|
74
|
+
H - #READ - number of reads by given GroupColumns
|
|
75
|
+
|
|
76
|
+
IndexColumn:
|
|
77
|
+
X - INDEX - index of line, it should be the first column
|
|
78
|
+
|
|
79
|
+
The column names are used to build up string of ColumnNames, while
|
|
80
|
+
their associated column titles are shown in view/report output of
|
|
81
|
+
this application. The display order of the column titles is
|
|
82
|
+
determined by the order of the column names in the ColumnNames
|
|
83
|
+
string. At least one of the group and summary columns must be
|
|
84
|
+
selected, in the ColumnNames string, to generate all usage
|
|
85
|
+
view/report;
|
|
86
|
+
|
|
87
|
+
For example, choose '-C EMB' to display column titles of EMAIL,
|
|
88
|
+
MONTH and MBREAD, in the first, second and third columns
|
|
89
|
+
respectively, for numbers of MBytes of data read by each user
|
|
90
|
+
in each month;
|
|
91
|
+
|
|
92
|
+
- Option -O, sort data usage information in ascending or descending
|
|
93
|
+
order based on the column names specified in OrderColumnNames
|
|
94
|
+
string. These column names must be in the selected [-C]
|
|
95
|
+
ColumnNames string. If an column name is in upper case, its
|
|
96
|
+
associated column is sorted in ascending order, and a lower
|
|
97
|
+
case means sorting in descending order;
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
Condition Options:
|
|
101
|
+
- Option -a, for all usage in table 'awsusage';
|
|
102
|
+
|
|
103
|
+
- Option -A, gives a row limit for querrying;
|
|
104
|
+
|
|
105
|
+
- Option -c, for files read by users from given country codes;
|
|
106
|
+
|
|
107
|
+
- Option -d, for data read on given dates, in format YYYY-MM-DD;
|
|
108
|
+
|
|
109
|
+
- Option -D, for data read between two given dates, each date
|
|
110
|
+
is in format YYYY-MM-DD. Omit EndDate for no upper limit;
|
|
111
|
+
|
|
112
|
+
- Option -e, for data read by users with given email addresses;
|
|
113
|
+
|
|
114
|
+
- Option -E, for data read by users who have been notified
|
|
115
|
+
data update of a specified dataset between two given dates,
|
|
116
|
+
each date is in format YYYY-MM-DD. Omit EndNoticeDate for
|
|
117
|
+
no upper limit;
|
|
118
|
+
|
|
119
|
+
- Option -h, works with Option -e to include historical user emails
|
|
120
|
+
registered before;
|
|
121
|
+
|
|
122
|
+
- Option -i, for data read from machines with given IP addresses;
|
|
123
|
+
|
|
124
|
+
- Option -k, for files read by users from given region names;
|
|
125
|
+
|
|
126
|
+
- Option -m, for data read in given months, in format YYYY-MM;
|
|
127
|
+
|
|
128
|
+
- Option -M, for data read via access methods;
|
|
129
|
+
|
|
130
|
+
- Option -N, for files for numbers of read by each group between
|
|
131
|
+
MinNumberRead and MaxNumberRead. Omit MaxNumberRead for no
|
|
132
|
+
upper limit;
|
|
133
|
+
|
|
134
|
+
- Option -o, for data read by users from given orgnization types.
|
|
135
|
+
It defaults to -o ! DSS to exclude usage from DSS specialists;
|
|
136
|
+
Set it to ALL to include all orgnization types;
|
|
137
|
+
|
|
138
|
+
- Option -q, for data read in given quarters;
|
|
139
|
+
|
|
140
|
+
- Option -s, for data sizes, unit of MByte, between MinSize and MaxSize.
|
|
141
|
+
Omit MaxSize for no upper limit;
|
|
142
|
+
|
|
143
|
+
- Option -S, for login names of specialsts who own the datasets;
|
|
144
|
+
|
|
145
|
+
- Option -t, for data associating to given dataset names;
|
|
146
|
+
|
|
147
|
+
- Option -T, for data associating to datasets between
|
|
148
|
+
MinDataset and MaxDataset. Omit MaxDataset for no upper limit.
|
|
149
|
+
For example, -T d540000 d550009, for datasets numbers d540000-d550009;
|
|
150
|
+
|
|
151
|
+
- Option -y, for data read in given years in format YYYY;
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
Miscellaneous Options:
|
|
155
|
+
- Option -w, view data usage in simple format without totals;
|
|
156
|
+
|
|
157
|
+
- Option -z, include datasets without without usage
|
|
158
|
+
|
|
159
|
+
- Option -H, use given report title to replace the default one;
|
|
160
|
+
|
|
161
|
+
- Option -L, use given delimiter for output, instead of defaulted spaces;
|
|
162
|
+
|
|
163
|
+
- Option -U, show data sizes in given unit SizeUnit [BKMG].
|
|
164
|
+
B - Byte, K - KiloBytes, M - MegaByte, and G - GigaByte;
|
|
165
|
+
|
|
166
|
+
- Option > OutputFilename, redirect output into an output file,
|
|
167
|
+
for example, ordusage.out, instead of viewing on screen directly;
|
|
168
|
+
|
|
169
|
+
- Option | lp -d PrinterName, redirect output to printer of PrinterName.
|
|
170
|
+
Replace PrinterName with lj100 to print through DSS LaserJet printer.
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
For example:
|
|
174
|
+
To view annual data usage in year 2005 with columns, INDEX(X),
|
|
175
|
+
EMAIL(E), ORGTYPE(O), #READ(H), and MBREAD(B); ordered by ORGTYPE as
|
|
176
|
+
ascending and MBREAD(B) as descending; the command line should be:
|
|
177
|
+
|
|
178
|
+
viewawsusage XEOHB -y 2005 -O Ob
|
|
179
|
+
|
|
180
|
+
For usage by users not in Organization 'DDS', out of the file usage
|
|
181
|
+
gathered above, the command line should be:
|
|
182
|
+
|
|
183
|
+
viewawsusage XEOHB -y 2005 -o ! DSS -O Ob
|
|
184
|
+
|
|
185
|
+
To redirect the previous output to a file named awsusage.out:
|
|
186
|
+
|
|
187
|
+
viewawsusage XEOHB -y 2005 ! DSS -O Ob > awsusage.out
|
|
188
|
+
|
|
189
|
+
Then you can view the file or print it as a report.
|
|
190
|
+
|