rda-python-metrics 1.0.34__py3-none-any.whl → 1.0.36__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of rda-python-metrics might be problematic. Click here for more details.

@@ -94,7 +94,7 @@ def get_log_file_names(option, params):
94
94
  else:
95
95
  pdate = PgUtil.format_date(params[0])
96
96
  if len(params) > 1:
97
- edate = PgUtil.format_date(params[1])
97
+ edate = PgUtil.adddate(PgUtil.format_date(params[1]), 0, 0, 1)
98
98
  else:
99
99
  edate = PgUtil.curdate()
100
100
  while pdate < edate:
@@ -114,13 +114,14 @@ def fill_aws_usages(filenames):
114
114
  year = cntall = addall = 0
115
115
  for pdate in filenames:
116
116
  fnames = filenames[pdate]
117
+ fcnt = len(fnames)
118
+ PgLOG.pglog("{}: Gathering AWS usage info from {} log files at {}".format(pdate, fcnt, PgLOG.current_datetime()), PgLOG.LOGWRN)
117
119
  records = {}
118
120
  cntadd = entcnt = 0
119
121
  for logfile in fnames:
120
122
  if not op.isfile(logfile):
121
123
  PgLOG.pglog("{}: Not exists for Gathering AWS usage".format(logfile), PgLOG.LOGWRN)
122
124
  continue
123
- PgLOG.pglog("Gathering AWS usage info from {} at {}".format(logfile, PgLOG.current_datetime()), PgLOG.LOGWRN)
124
125
  aws = PgFile.open_local_file(logfile)
125
126
  if not aws: continue
126
127
  while True:
@@ -184,7 +185,7 @@ def add_usage_records(records, year):
184
185
  cnt = 0
185
186
  for key in records:
186
187
  record = records[key]
187
- cond = "date = '{}' AND time = '{}' AND ip = '{}' AND dsid = '{}'".format(record['date'], record['time'], record['ip'], record['dsid'])
188
+ cond = "date = '{}' AND time = '{}' AND ip = '{}'".format(record['date'], record['time'], record['ip'])
188
189
  if PgDBI.pgget(USAGE['PGTBL'], '', cond, PgLOG.LGEREX): continue
189
190
  if add_to_allusage(year, record):
190
191
  cnt += PgDBI.pgadd(USAGE['PGTBL'], record, PgLOG.LOGWRN)
@@ -147,18 +147,14 @@ def fill_globus_usages(fnames, datelimits):
147
147
  locflag = 'O' if re.match(r'^https://stratus\.', sline) else 'G'
148
148
  idx = wfile.find('?')
149
149
  if idx > -1: wfile = wfile[:idx]
150
-
151
- if re.match(r'^curl', engine, re.I):
152
- method = "CURL"
153
- elif re.match(r'^wget', engine, re.I):
154
- method = "WGET"
155
- elif re.match(r'^python', engine, re.I):
156
- method = "PYTHN"
150
+ moff = engine.find('/')
151
+ if moff > 0:
152
+ if moff > 20: moff = 20
153
+ method = engine[0:moff].upper()
157
154
  else:
158
155
  method = "WEB"
159
156
 
160
157
  key = "{}:{}:{}".format(ip, dsid, wfile) if stat == '206' else None
161
-
162
158
  if record:
163
159
  if key == pkey:
164
160
  record['size'] += size
@@ -185,7 +185,7 @@ def add_usage_records(records, year):
185
185
  cnt = 0
186
186
  for key in records:
187
187
  record = records[key]
188
- cond = "date = '{}' AND time = '{}' AND ip = '{}' AND dsid = '{}'".format(record['date'], record['time'], record['ip'], record['dsid'])
188
+ cond = "date = '{}' AND time = '{}' AND ip = '{}'".format(record['date'], record['time'], record['ip'])
189
189
  if PgDBI.pgget(USAGE['OSDFTBL'], '', cond, PgLOG.LGEREX): continue
190
190
  if add_to_allusage(year, record):
191
191
  cnt += PgDBI.pgadd(USAGE['OSDFTBL'], record, PgLOG.LOGWRN)
@@ -203,69 +203,6 @@ def add_to_allusage(year, pgrec):
203
203
 
204
204
  return PgDBI.add_yearly_allusage(year, record)
205
205
 
206
-
207
- #
208
- # Fill usage of a single online data file into table dssdb.wusage of DSS PgSQL database
209
- #
210
- def add_file_usage(year, logrec):
211
-
212
- pgrec = get_wfile_wid(logrec['dsid'], logrec['wfile'])
213
- if not pgrec: return 0
214
-
215
- table = "{}_{}".format(USAGE['OSDFTBL'], year)
216
- cond = "wid = {} AND method = '{}' AND date_read = '{}' AND time_read = '{}'".format(pgrec['wid'], logrec['method'], logrec['date'], logrec['time'])
217
- if PgDBI.pgget(USAGE['OSDFTBL'], "", cond, PgLOG.LOGWRN): return 0
218
-
219
- wurec = PgIPInfo.get_wuser_record(logrec['ip'], logrec['date'])
220
- if not wurec: return 0
221
- record = {'wid' : pgrec['wid'], 'dsid' : pgrec['dsid']}
222
- record['wuid_read'] = wurec['wuid']
223
- record['date_read'] = logrec['date']
224
- record['time_read'] = logrec['time']
225
- record['size_read'] = logrec['size']
226
- record['method'] = logrec['method']
227
- record['locflag'] = logrec['locflag']
228
- record['ip'] = logrec['ip']
229
- record['quarter'] = logrec['quarter']
230
-
231
- if add_to_allusage(year, logrec, wurec):
232
- return PgDBI.add_yearly_wusage(year, record)
233
- else:
234
- return 0
235
-
236
- def add_to_allusage(year, logrec, wurec):
237
-
238
- pgrec = {'email' : wurec['email'], 'org_type' : wurec['org_type'],
239
- 'country' : wurec['country'], 'region' : wurec['region']}
240
- pgrec['dsid'] = logrec['dsid']
241
- pgrec['date'] = logrec['date']
242
- pgrec['quarter'] = logrec['quarter']
243
- pgrec['time'] = logrec['time']
244
- pgrec['size'] = logrec['size']
245
- pgrec['method'] = logrec['method']
246
- pgrec['ip'] = logrec['ip']
247
- pgrec['source'] = 'P'
248
- return PgDBI.add_yearly_allusage(year, pgrec)
249
-
250
- #
251
- # return wfile.wid upon success, 0 otherwise
252
- #
253
- def get_wfile_wid(dsid, wfile):
254
-
255
- wfcond = "wfile = '{}'".format(wfile)
256
- pgrec = PgSplit.pgget_wfile(dsid, "*", wfcond)
257
- if pgrec:
258
- pgrec['dsid'] = dsid
259
- else:
260
- pgrec = PgDBI.pgget("wfile_delete", "*", "{} AND dsid = '{}'".format(wfcond, dsid))
261
- if not pgrec:
262
- pgrec = PgDBI.pgget("wmove", "wid, dsid", wfcond)
263
- if pgrec:
264
- pgrec = PgSplit.pgget_wfile(pgrec['dsid'], "*", "wid = {}".format(pgrec['wid']))
265
- if pgrec: pgrec['dsid'] = dsid
266
-
267
- return pgrec
268
-
269
206
  #
270
207
  # call main() to start program
271
208
  #
@@ -0,0 +1,321 @@
1
+ #!/usr/bin/env python3
2
+ #
3
+ ###############################################################################
4
+ #
5
+ # Title : viewawsusage
6
+ # Author : Zaihua Ji, zji@ucar.edu
7
+ # Date : 2025-08-13
8
+ # Purpose : python program to view aws usage information
9
+ #
10
+ # Github : https://github.com/NCAR/rda-python-metrics.git
11
+ #
12
+ ###############################################################################
13
+ #
14
+ import os
15
+ import re
16
+ import sys
17
+ from rda_python_common import PgLOG
18
+ from rda_python_common import PgUtil
19
+ from rda_python_common import PgDBI
20
+ from . import PgView
21
+
22
+ VUSG = {
23
+ 'SNMS' : "ABCDEHIKMNOPQRSTUWY", # all available short field names in %FLDS
24
+ 'OPTS' : 'AabcCdDeEhHikLmMnoOqsStTUwyz', # all available options, used for %params
25
+ 'NOPT' : 'abhnwz', # stand alone option without inputs
26
+ 'ACND' : 'cdeiIkmMoqSty', # available array condition options
27
+ 'RCND' : 'DEsT', # available range condition options
28
+ 'CNDS' : 'acdDeEiIkmMnoqsStTy', # condition options, ACND, RCND and 'a'
29
+ 'ECND' : 'my', # condition options need evaluating
30
+ 'SFLD' : 'DEIKNOTUW', # string fields, to be quoted in condition
31
+ 'UFLD' : 'NO', # string fields must be in upper case
32
+ 'LFLD' : 'EMPT' # string fields must be in lower case
33
+ }
34
+
35
+ # keys %FLDS - short field names
36
+ # column 0 - column title showing in usage view
37
+ # column 1 - field name in format as shown in select clauses
38
+ # column 2 - field name shown in where condition query string
39
+ # column 3 - table name that the field belongs to
40
+ # column 4 - output field length, the longer one of data size and comlun title, determine
41
+ # dynamically if it is 0. Negative values indicate right justification
42
+ # column 5 - precision for floating point value if positive and show total value if not zero
43
+ # column 6 - field flag to indicate it is a group, distinct or sum field
44
+ FLDS = {
45
+ # SHRTNM COLUMNNANE FIELDNAME CNDNAME TBLNAM Size Prc Grp/Sum
46
+ 'D' : ['DATE', "date", 'date', 'awsusage', 10, 0, 'G'],
47
+ 'E' : ['EMAIL', "awsusage.email", 'awsusage.email', 'awsusage', 0, 0, 'G'],
48
+ 'I' : ['IP', "ip", 'ip', 'awsusage', 0, 0, 'G'],
49
+ 'M' : ['MONTH', PgDBI.fmtym("date"), 'date', 'awsusage', 7, 0, 'G'],
50
+ 'N' : ['COUNTRY', "country", 'country', 'awsusage', 0, 0, 'G'],
51
+ 'K' : ['REGION', "region", 'region', 'awsusage', 0, 0, 'G'],
52
+ 'O' : ['ORGTYPE', "org_type", 'org_type', 'awsusage', 7, 0, 'G'],
53
+ 'P' : ['DSOWNER', "specialist", 'specialist', 'dsowner', 8, 0, 'G'],
54
+ 'Q' : ['QUARTER', "quarter", 'quarter', 'awsusage', 7, 0, 'G'],
55
+ 'R' : ['DSTITLE', "search.datasets.title", 'search.datasets.title', 'search.datasets', 0, 0, 'G'],
56
+ 'S' : ['BYTESIZE', "size", 'size', 'awsusage', -14, -1, 'G'],
57
+ 'T' : ['DATASET', "awsusage.dsid", 'awsusage.dsid', 'awsusage', 0, 0, 'G'],
58
+ 'W' : ['METHOD', "method", 'method', 'awsusage', 0, 0, 'G'],
59
+ 'Y' : ['YEAR', PgDBI.fmtyr("date"), 'date', 'awsusage', 4, 0, 'G'],
60
+ 'A' : ['DSCOUNT', "awsusage.dsid", 'A', 'awsusage', -7, -1, 'D'],
61
+ 'B' : ['MBYTEREAD', "round(sum(size)/(1000000), 4)", 'B', 'awsusage', -14, 3, 'S'],
62
+ 'C' : ['#UNIQUSER', "awsusage.email", 'C', 'awsusage', -9, -1, 'D'],
63
+ 'U' : ['#UNIQIP', "awsusage.ip", 'U', 'awsusage', -7, -1, 'D'],
64
+ 'H' : ['#ACCESS', "sum(fcount)", 'H', 'awsusage', -8, -1, 'S'],
65
+ 'X' : ['INDEX', "", 'X', '', -6, 0, ' ']
66
+ }
67
+
68
+ # keys %EXPAND - short field names allow zero usage
69
+ # column 0 - expand ID for group of fields
70
+ # column 1 - field name shown in where condition query string
71
+ # column 2 - field name in format as shown in select clauses
72
+ # column 3 - table name that the field belongs to
73
+ EXPAND = {
74
+ # SHRTNM EXPID CNDSTR FIELDNAME TBLNAM
75
+ 'D' : ["TIME", "dDmy"],
76
+ 'M' : ["TIME", "dDmy"],
77
+ 'Q' : ["TIME", "dDmy"],
78
+ 'Y' : ["TIME", "dDmy"],
79
+
80
+ 'E' : ["USER", "ecko", "email", "wuser", "user"],
81
+ 'O' : ["USER", "ecko", "org_type", "wuser", "user"],
82
+ 'N' : ["USER", "ecko", "country", "wuser", "user"],
83
+ 'K' : ["USER", "ecko", "region", "wuser", "user"],
84
+
85
+ 'R' : ["DSID", "StT", "search.datasets.title", "search.datasets"],
86
+ 'T' : ["DSID", "StT", "dataset.dsid", "dataset"],
87
+ 'P' : ["DSID", "StT", "specialist", "dsowner"],
88
+
89
+ 'W' : ["METHOD", "M", "method", "awsusage"]
90
+ }
91
+
92
+ # valid options for %params, a hash array of command line parameters
93
+ # a -- 1 to view all usage info available
94
+ # A -- number or records to return
95
+ # c -- array of specified country codes
96
+ # C -- a string of short field names for viewing usages
97
+ # d -- array of specified dates
98
+ # D -- dates range, array of 1 or 2 dates in format of YYYY-MM-DD
99
+ # e -- array of specified email addresses
100
+ # E -- use given date or date range for email notice of data update
101
+ # h -- for give emails, include their histical emails registered before
102
+ # H -- a string of report title to replace the default one
103
+ # i -- array of specified IP addresses
104
+ # I -- use given email IDs for email notice of data update
105
+ # k -- array of specified region names
106
+ # L -- column delimiter for output
107
+ # m -- array of specified months
108
+ # M -- array of specified download methods
109
+ # o -- array of specified orginization types
110
+ # O -- a string of short field names for sorting on
111
+ # q -- array of the specified quarters, normally combined with years
112
+ # s -- size range, arrage of 1 or 2 sizes in unit of MByte
113
+ # S -- array of login names of specialists who owns the datasets
114
+ # t -- array of specified dataset names
115
+ # T -- dataset range, array of 1 or 2 dataset names
116
+ # U -- given unit for file or data sizes
117
+ # w -- generate view without totals
118
+ # y -- array of specified years
119
+ # z -- generate view including entries without usage
120
+
121
+ params = {}
122
+
123
+ # relationship between parameter options and short field names, A option is not
124
+ # related to a field name if it is not in keys %SNS
125
+ SNS = {
126
+ 'c' : 'N', 'd' : 'D', 'D' : 'D', 'e' : 'E', 'i' : 'I', 'k' : 'K', 'm' : 'M',
127
+ 'M' : 'W', 'o' : 'O', 'q' : 'Q', 's' : 'S', 'S' : 'P', 't' : 'T', 'T' : 'T', 'y' : 'Y'
128
+ }
129
+
130
+ tablenames = fieldnames = condition = ''
131
+ sfields = []
132
+ gfields = []
133
+ dfields = []
134
+ pgname = 'viewawsusage'
135
+
136
+ #
137
+ # main function to run this program
138
+ #
139
+ def main():
140
+
141
+ PgDBI.view_dbinfo()
142
+ argv = sys.argv[1:]
143
+ inputs = []
144
+ option = 'C' # default option
145
+
146
+ for arg in argv:
147
+ if re.match(r'^-.*$', arg):
148
+ curopt = arg[1:2]
149
+ if curopt and VUSG['OPTS'].find(curopt) > -1:
150
+ if VUSG['NOPT'].find(option) > -1:
151
+ params[option] = 1
152
+ elif inputs:
153
+ params[option]= inputs # record input array
154
+ inputs = [] # empty input array
155
+ option = curopt # start a new option
156
+ else:
157
+ PgLOG.pglog(arg + ": Unknown Option", PgLOG.LGWNEX)
158
+ else:
159
+ val = arg
160
+ if val != '!':
161
+ if option == 's':
162
+ val = int(val)*1000000 # convert MBytes to Bytes
163
+ elif option in SNS:
164
+ sfld = SNS[option]
165
+ if VUSG['SFLD'].find(sfld) > -1:
166
+ if VUSG['UFLD'].find(sfld) > -1:
167
+ val = arg.upper() # in case not in upper case
168
+ elif VUSG['LFLD'].find(sfld) > -1:
169
+ val = arg.lower() # in case not in lower case
170
+ if option == 'c':
171
+ val = PgView.get_country_name(val)
172
+ elif option == 't' or option == 'T':
173
+ val = PgUtil.format_dataset_id(val) # add 'ds' if only numbers
174
+ val = "'{}'".format(val)
175
+ inputs.append(val)
176
+
177
+ # record the last option
178
+ if VUSG['NOPT'].find(option) > -1:
179
+ params[option] = 1
180
+ elif inputs:
181
+ params[option] = inputs # record input array
182
+
183
+ if not params:
184
+ PgLOG.show_usage(pgname)
185
+ else:
186
+ check_enough_options()
187
+
188
+ if 'o' not in params:
189
+ if 'e' not in params:
190
+ params['o'] = ['!', "'DSS'"] # default to exclude 'DSS' for organization
191
+ elif params['o'][0] == "'ALL'":
192
+ del params['o']
193
+
194
+ usgtable = "awsusage"
195
+ build_query_strings(usgtable) # build tablenames, fieldnames, and conditions
196
+ records = PgDBI.pgmget(tablenames, fieldnames, condition, PgLOG.UCLWEX)
197
+ if not records: PgLOG.pglog("No Usage Found For Given Conditions", PgLOG.LGWNEX)
198
+ totals = None if 'w' in params else {}
199
+ if dfields or totals != None:
200
+ records = PgView.compact_hash_groups(records, gfields, sfields, dfields, totals)
201
+ if 'z' in params: records = expand_records(records)
202
+ ostr = params['O'][0] if 'O' in params else params['C'][0]
203
+ records = PgView.order_records(records, ostr.replace('X', ''))
204
+ PgView.simple_output(params, FLDS, records, totals)
205
+
206
+ PgLOG.pgexit(0)
207
+
208
+ #
209
+ # cehck if enough information entered on command line for generate view/report, exit if not
210
+ #
211
+ def check_enough_options():
212
+
213
+ cols = params['C'][0] if 'C' in params else 'X'
214
+ if cols == 'X': PgLOG.pglog("{}: miss field names '{}'".format(pgname, VUSG['SNMS']), PgLOG.LGWNEX)
215
+
216
+ if cols.find('Q') > -1 and cols.find('Y') < 0: # add Y if Q included
217
+ cols = re.sub('Q', 'YQ', cols)
218
+ params['C'][0] = cols
219
+
220
+ for sn in cols:
221
+ if sn == 'X': continue # do not process INDEX field
222
+ if VUSG['SNMS'].find(sn) < 0:
223
+ PgLOG.pglog("{}: Field {} must be in field names '{}X'".format(pgname, sn, VUSG['SNMS']), PgLOG.LGWNEX)
224
+ if 'z' not in params or sn in EXPAND: continue
225
+ fld = FLDS[sn]
226
+ if fld[6] != 'G': continue
227
+ PgLOG.pglog("{}: cannot show zero usage for unexpandable field {} - {}".formt(pgname, sn, fld[0]), PgLOG.LGWNEX)
228
+
229
+ if 'E' in params or 'I' in params:
230
+ if 'z' in params:
231
+ PgLOG.pglog(pgname + ": option -z and -E/-I can not be present at the same time", PgLOG.LGWNEX)
232
+ elif 't' not in params or len(params['t']) > 1:
233
+ PgLOG.pglog(pgname + ": specify one dataset for viewing usage of notified users", PgLOG.LGWNEX)
234
+ elif 'E' in params and 'I' in params:
235
+ PgLOG.pglog(pgname + ": option -E and -I can not be present at the same time", PgLOG.LGWNEX)
236
+
237
+ for opt in params:
238
+ if VUSG['CNDS'].find(opt) > -1: return
239
+ PgLOG.pglog("{}: miss condition options '{}'".format(pgname, VUSG['CNDS']), PgLOG.LGWNEX)
240
+
241
+ #
242
+ # process parameter options to build aws query strings
243
+ # global variables are used directly and nothing passes in and returns back
244
+ #
245
+ def build_query_strings(usgtable):
246
+
247
+ # initialize query strings
248
+ global condition, fieldnames, tablenames
249
+ joins = groupnames = ''
250
+ tablenames = usgtable
251
+ cols = params['C'][0]
252
+
253
+ if 'U' in params: # reset units for file and read sizes
254
+ if cols.find('B') > -1: FLDS['B'] = PgView.set_data_unit(FLDS['B'], params['U'][0], "sum(size)")
255
+ if cols.find('S') > -1: FLDS['S'] = PgView.set_data_unit(FLDS['S'], params['U'][0], "size")
256
+
257
+ if 'e' in params and 'h' in params: params['e'] = PgView.include_historic_emails(params['e'], 3)
258
+
259
+ for opt in params:
260
+ if opt == 'C': # build field, table and group names
261
+ for sn in cols:
262
+ if sn == 'X': continue # do not process INDEX field
263
+ fld = FLDS[sn]
264
+ if fieldnames: fieldnames += ', '
265
+ fieldnames += "{} {}".format(fld[1], sn) # add to field name string
266
+ (tablenames, joins) = PgView.join_query_tables(fld[3], tablenames, joins, usgtable)
267
+ if fld[6] == 'S':
268
+ sfields.append(sn)
269
+ else:
270
+ if groupnames: groupnames += ', '
271
+ groupnames += sn # add to group name string
272
+ if fld[6] == 'D':
273
+ dfields.append(sn)
274
+ else:
275
+ gfields.append(sn)
276
+ elif opt == 'O':
277
+ continue # order records later
278
+ elif VUSG['CNDS'].find(opt) > -1:
279
+ if VUSG['NOPT'].find(opt) > -1: continue
280
+ sn = SNS[opt]
281
+ fld = FLDS[sn]
282
+ # build having and where conditon strings
283
+ cnd = PgView.get_view_condition(opt, sn, fld, params, VUSG)
284
+ if cnd:
285
+ if condition: condition += ' AND '
286
+ condition += cnd
287
+ (tablenames, joins) = PgView.join_query_tables(fld[3], tablenames, joins, usgtable)
288
+
289
+
290
+ # append joins, group by, order by, and having strings to condition string
291
+ if 'E' in params or 'I' in params:
292
+ (tablenames, joins) = PgView.join_query_tables("emreceive", tablenames, joins, usgtable)
293
+ if joins:
294
+ if condition:
295
+ condition = "{} AND {}".format(joins, condition)
296
+ else:
297
+ condition = joins
298
+ if 'E' in params or 'I' in params:
299
+ condition += PgView.notice_condition(params['E'], None, params['t'][0])
300
+ if groupnames and sfields: condition += " GROUP BY " + groupnames
301
+
302
+
303
+ def expand_records(records):
304
+
305
+ recs = PgView.expand_query("TIME", records, params, EXPAND)
306
+
307
+ trecs = PgView.expand_query("USER", records, params, EXPAND, VUSG, SNS, FLDS)
308
+ recs = PgUtil.crosshash(recs, trecs)
309
+
310
+ trecs = PgView.expand_query("DSID", records, params, EXPAND, VUSG, SNS, FLDS)
311
+ recs = PgUtil.crosshash(recs, trecs)
312
+
313
+ trecs = PgView.expand_query("METHOD", records, params, EXPAND, VUSG, SNS, FLDS)
314
+ recs = PgUtil.crosshash(recs, trecs)
315
+
316
+ return PgUtil.joinhash(records, recs, 0, 1)
317
+
318
+ #
319
+ # call main() to start program
320
+ #
321
+ if __name__ == "__main__": main()
@@ -0,0 +1,190 @@
1
+
2
+ View usage information of AWS Data Services from information
3
+ stored in PostgreSQL database 'RDADB'.
4
+
5
+ Usage: viewawsusage [-C] ColumnNames [-O OrderColumnNames] [-a] \
6
+ [-A RowLimit] [-c CountryCodes] [-d DateList] \
7
+ [-D StartDate [EndDate]] [-e EMailList] -h \
8
+ [-E StartNoticeDate [EndNoticeDate]] \
9
+ [-i IPAddresses] [-I EmailIDList] \
10
+ [-k RegionNames] [-m MonthList] [-M AccessMethods] \
11
+ [-N MinNumberRead [MaxNumberRead]] \
12
+ [-o OrganizationTypes] \
13
+ [-q QuaterList] [-s MinSize [MaxSize]] \
14
+ [-S SpecialistLoginNames] [-t DatasetList] \
15
+ [-T MinDataset [MaxDataset]] [-y YearList] \
16
+ [-H Title] [-L Delimiter] [-U SizeUnit] \
17
+ [-w] [-z] [> OutputFileName] [| lp -d PrinterName]
18
+
19
+ Specify [-C] ColumnNames, refer to Option -C section for detail
20
+ description, and choose at least one of the condition options, -a, -c,
21
+ -d, -D, -e, -E, -i, -I, -k, -m, -M, -N, -o, -q, -s, -S -t, -T, and -y,
22
+ to run this application.
23
+
24
+ For all condition options, except option -a, an '!' sign can be added
25
+ between an option flag and its option values to get an excluding
26
+ condition. For example, choose '-o ! OrganizationTypes' to gather order
27
+ data usage by users from organization types other than the ones given in
28
+ OrganizationTypes. Refer to the example given at the end of this help
29
+ document for how to select excluding condition.
30
+
31
+ String condition options, -c, -e, -g, -i, -k, -M, -o, -S, and -t, allow
32
+ wildcard inputs. '%' matches any number of characters and '_' matches any one
33
+ character. Refer to the example given at the end of this help document
34
+ for how to use wildcard for string condition options.
35
+
36
+ Output of this application is defaulted to page format with a page
37
+ header on each page. A page header includes main title, sub titles and
38
+ column titles according to which column names and options are selected,
39
+ as well as page number and report date. If the output is used directly
40
+ for input of other applications, add option -w to remove page header
41
+ and show only the column titles and the usage information.
42
+
43
+
44
+ Column Options:
45
+ - Option -C, the ColumnNames must be present to run this application.
46
+ The flag -C can be omitted if it is the first parameter option on
47
+ the command line. The ColumnNames is a string that includes column
48
+ names listed below:
49
+
50
+ COLUMN - COLUMN - COLUMN
51
+ NAME - TITLE - DESCRIPTION
52
+ GroupColumns:
53
+ D*- DATE - format as YYYY-MM-DD, for example 2004-04-25
54
+ E*- EMAIL - user email address
55
+ I*- IP - user IP address
56
+ M*- MONTH - format as YYYY-MM, for example 2004-04
57
+ N*- COUNTRY - country codes users from
58
+ K*- REGION - region names users from
59
+ O*- ORGTYPE - organization types (DSS, NCAR, UNIV and OTHER)
60
+ P*- DSOWNER - login names of specialists who own the datasets
61
+ Q*- QUARTER - quarter of year, 1, 2, 3, or 4
62
+ R*- DSTITLE - dataset titles
63
+ S - BSIZE - size of data read each time, default to Bytes
64
+ T*- DATASET - format as dsnnn.n, for example d540001
65
+ W*- METHOD - access methods
66
+ Y*- YEAR - format as YYYY, for example 2004
67
+
68
+ * - field names can processed with zero usages
69
+ SummaryColumns:
70
+ A - DSCOUNT - number of datasets in given GroupColumns
71
+ B - MBREAD - data sizes, default MB, read by given GroupColumns
72
+ C - #UNIQUSER - number of unique users in in given GroupColumns
73
+ U - #UNIQIP - number of unique users in in given GroupColumns
74
+ H - #READ - number of reads by given GroupColumns
75
+
76
+ IndexColumn:
77
+ X - INDEX - index of line, it should be the first column
78
+
79
+ The column names are used to build up string of ColumnNames, while
80
+ their associated column titles are shown in view/report output of
81
+ this application. The display order of the column titles is
82
+ determined by the order of the column names in the ColumnNames
83
+ string. At least one of the group and summary columns must be
84
+ selected, in the ColumnNames string, to generate all usage
85
+ view/report;
86
+
87
+ For example, choose '-C EMB' to display column titles of EMAIL,
88
+ MONTH and MBREAD, in the first, second and third columns
89
+ respectively, for numbers of MBytes of data read by each user
90
+ in each month;
91
+
92
+ - Option -O, sort data usage information in ascending or descending
93
+ order based on the column names specified in OrderColumnNames
94
+ string. These column names must be in the selected [-C]
95
+ ColumnNames string. If an column name is in upper case, its
96
+ associated column is sorted in ascending order, and a lower
97
+ case means sorting in descending order;
98
+
99
+
100
+ Condition Options:
101
+ - Option -a, for all usage in table 'awsusage';
102
+
103
+ - Option -A, gives a row limit for querrying;
104
+
105
+ - Option -c, for files read by users from given country codes;
106
+
107
+ - Option -d, for data read on given dates, in format YYYY-MM-DD;
108
+
109
+ - Option -D, for data read between two given dates, each date
110
+ is in format YYYY-MM-DD. Omit EndDate for no upper limit;
111
+
112
+ - Option -e, for data read by users with given email addresses;
113
+
114
+ - Option -E, for data read by users who have been notified
115
+ data update of a specified dataset between two given dates,
116
+ each date is in format YYYY-MM-DD. Omit EndNoticeDate for
117
+ no upper limit;
118
+
119
+ - Option -h, works with Option -e to include historical user emails
120
+ registered before;
121
+
122
+ - Option -i, for data read from machines with given IP addresses;
123
+
124
+ - Option -k, for files read by users from given region names;
125
+
126
+ - Option -m, for data read in given months, in format YYYY-MM;
127
+
128
+ - Option -M, for data read via access methods;
129
+
130
+ - Option -N, for files for numbers of read by each group between
131
+ MinNumberRead and MaxNumberRead. Omit MaxNumberRead for no
132
+ upper limit;
133
+
134
+ - Option -o, for data read by users from given orgnization types.
135
+ It defaults to -o ! DSS to exclude usage from DSS specialists;
136
+ Set it to ALL to include all orgnization types;
137
+
138
+ - Option -q, for data read in given quarters;
139
+
140
+ - Option -s, for data sizes, unit of MByte, between MinSize and MaxSize.
141
+ Omit MaxSize for no upper limit;
142
+
143
+ - Option -S, for login names of specialsts who own the datasets;
144
+
145
+ - Option -t, for data associating to given dataset names;
146
+
147
+ - Option -T, for data associating to datasets between
148
+ MinDataset and MaxDataset. Omit MaxDataset for no upper limit.
149
+ For example, -T d540000 d550009, for datasets numbers d540000-d550009;
150
+
151
+ - Option -y, for data read in given years in format YYYY;
152
+
153
+
154
+ Miscellaneous Options:
155
+ - Option -w, view data usage in simple format without totals;
156
+
157
+ - Option -z, include datasets without without usage
158
+
159
+ - Option -H, use given report title to replace the default one;
160
+
161
+ - Option -L, use given delimiter for output, instead of defaulted spaces;
162
+
163
+ - Option -U, show data sizes in given unit SizeUnit [BKMG].
164
+ B - Byte, K - KiloBytes, M - MegaByte, and G - GigaByte;
165
+
166
+ - Option > OutputFilename, redirect output into an output file,
167
+ for example, ordusage.out, instead of viewing on screen directly;
168
+
169
+ - Option | lp -d PrinterName, redirect output to printer of PrinterName.
170
+ Replace PrinterName with lj100 to print through DSS LaserJet printer.
171
+
172
+
173
+ For example:
174
+ To view annual data usage in year 2005 with columns, INDEX(X),
175
+ EMAIL(E), ORGTYPE(O), #READ(H), and MBREAD(B); ordered by ORGTYPE as
176
+ ascending and MBREAD(B) as descending; the command line should be:
177
+
178
+ viewawsusage XEOHB -y 2005 -O Ob
179
+
180
+ For usage by users not in Organization 'DDS', out of the file usage
181
+ gathered above, the command line should be:
182
+
183
+ viewawsusage XEOHB -y 2005 -o ! DSS -O Ob
184
+
185
+ To redirect the previous output to a file named awsusage.out:
186
+
187
+ viewawsusage XEOHB -y 2005 ! DSS -O Ob > awsusage.out
188
+
189
+ Then you can view the file or print it as a report.
190
+
@@ -0,0 +1,321 @@
1
+ #!/usr/bin/env python3
2
+ #
3
+ ###############################################################################
4
+ #
5
+ # Title : viewosdfusage
6
+ # Author : Zaihua Ji, zji@ucar.edu
7
+ # Date : 2025-08-13
8
+ # Purpose : python program to view osdf usage information
9
+ #
10
+ # Github : https://github.com/NCAR/rda-python-metrics.git
11
+ #
12
+ ###############################################################################
13
+ #
14
+ import os
15
+ import re
16
+ import sys
17
+ from rda_python_common import PgLOG
18
+ from rda_python_common import PgUtil
19
+ from rda_python_common import PgDBI
20
+ from . import PgView
21
+
22
+ VUSG = {
23
+ 'SNMS' : "ABCDEHIKMNOPQRSTUWY", # all available short field names in %FLDS
24
+ 'OPTS' : 'AabcCdDeEhHikLmMnoOqsStTUwyz', # all available options, used for %params
25
+ 'NOPT' : 'abhnwz', # stand alone option without inputs
26
+ 'ACND' : 'cdeiIkmMoqSty', # available array condition options
27
+ 'RCND' : 'DEsT', # available range condition options
28
+ 'CNDS' : 'acdDeEiIkmMnoqsStTy', # condition options, ACND, RCND and 'a'
29
+ 'ECND' : 'my', # condition options need evaluating
30
+ 'SFLD' : 'DEIKNOTUW', # string fields, to be quoted in condition
31
+ 'UFLD' : 'NO', # string fields must be in upper case
32
+ 'LFLD' : 'EMPT' # string fields must be in lower case
33
+ }
34
+
35
+ # keys %FLDS - short field names
36
+ # column 0 - column title showing in usage view
37
+ # column 1 - field name in format as shown in select clauses
38
+ # column 2 - field name shown in where condition query string
39
+ # column 3 - table name that the field belongs to
40
+ # column 4 - output field length, the longer one of data size and comlun title, determine
41
+ # dynamically if it is 0. Negative values indicate right justification
42
+ # column 5 - precision for floating point value if positive and show total value if not zero
43
+ # column 6 - field flag to indicate it is a group, distinct or sum field
44
+ FLDS = {
45
+ # SHRTNM COLUMNNANE FIELDNAME CNDNAME TBLNAM Size Prc Grp/Sum
46
+ 'D' : ['DATE', "date", 'date', 'osdfusage', 10, 0, 'G'],
47
+ 'E' : ['EMAIL', "osdfusage.email", 'osdfusage.email', 'osdfusage', 0, 0, 'G'],
48
+ 'I' : ['IP', "ip", 'ip', 'osdfusage', 0, 0, 'G'],
49
+ 'M' : ['MONTH', PgDBI.fmtym("date"), 'date', 'osdfusage', 7, 0, 'G'],
50
+ 'N' : ['COUNTRY', "country", 'country', 'osdfusage', 0, 0, 'G'],
51
+ 'K' : ['REGION', "region", 'region', 'osdfusage', 0, 0, 'G'],
52
+ 'O' : ['ORGTYPE', "org_type", 'org_type', 'osdfusage', 7, 0, 'G'],
53
+ 'P' : ['DSOWNER', "specialist", 'specialist', 'dsowner', 8, 0, 'G'],
54
+ 'Q' : ['QUARTER', "quarter", 'quarter', 'osdfusage', 7, 0, 'G'],
55
+ 'R' : ['DSTITLE', "search.datasets.title", 'search.datasets.title', 'search.datasets', 0, 0, 'G'],
56
+ 'S' : ['BYTESIZE', "size", 'size', 'osdfusage', -14, -1, 'G'],
57
+ 'T' : ['DATASET', "osdfusage.dsid", 'osdfusage.dsid', 'osdfusage', 0, 0, 'G'],
58
+ 'W' : ['METHOD', "method", 'method', 'osdfusage', 0, 0, 'G'],
59
+ 'Y' : ['YEAR', PgDBI.fmtyr("date"), 'date', 'osdfusage', 4, 0, 'G'],
60
+ 'A' : ['DSCOUNT', "osdfusage.dsid", 'A', 'osdfusage', -7, -1, 'D'],
61
+ 'B' : ['MBYTEREAD', "round(sum(size)/(1000000), 4)", 'B', 'osdfusage', -14, 3, 'S'],
62
+ 'C' : ['#UNIQUSER', "osdfusage.email", 'C', 'osdfusage', -9, -1, 'D'],
63
+ 'U' : ['#UNIQIP', "osdfusage.ip", 'U', 'osdfusage', -7, -1, 'D'],
64
+ 'H' : ['#ACCESS', "sum(fcount)", 'H', 'osdfusage', -8, -1, 'S'],
65
+ 'X' : ['INDEX', "", 'X', '', -6, 0, ' ']
66
+ }
67
+
68
+ # keys %EXPAND - short field names allow zero usage
69
+ # column 0 - expand ID for group of fields
70
+ # column 1 - field name shown in where condition query string
71
+ # column 2 - field name in format as shown in select clauses
72
+ # column 3 - table name that the field belongs to
73
+ EXPAND = {
74
+ # SHRTNM EXPID CNDSTR FIELDNAME TBLNAM
75
+ 'D' : ["TIME", "dDmy"],
76
+ 'M' : ["TIME", "dDmy"],
77
+ 'Q' : ["TIME", "dDmy"],
78
+ 'Y' : ["TIME", "dDmy"],
79
+
80
+ 'E' : ["USER", "ecko", "email", "wuser", "user"],
81
+ 'O' : ["USER", "ecko", "org_type", "wuser", "user"],
82
+ 'N' : ["USER", "ecko", "country", "wuser", "user"],
83
+ 'K' : ["USER", "ecko", "region", "wuser", "user"],
84
+
85
+ 'R' : ["DSID", "StT", "search.datasets.title", "search.datasets"],
86
+ 'T' : ["DSID", "StT", "dataset.dsid", "dataset"],
87
+ 'P' : ["DSID", "StT", "specialist", "dsowner"],
88
+
89
+ 'W' : ["METHOD", "fM", "method", "osdfusage"]
90
+ }
91
+
92
+ # valid options for %params, a hash array of command line parameters
93
+ # a -- 1 to view all usage info available
94
+ # A -- number or records to return
95
+ # c -- array of specified country codes
96
+ # C -- a string of short field names for viewing usages
97
+ # d -- array of specified dates
98
+ # D -- dates range, array of 1 or 2 dates in format of YYYY-MM-DD
99
+ # e -- array of specified email addresses
100
+ # E -- use given date or date range for email notice of data update
101
+ # h -- for give emails, include their histical emails registered before
102
+ # H -- a string of report title to replace the default one
103
+ # i -- array of specified IP addresses
104
+ # I -- use given email IDs for email notice of data update
105
+ # k -- array of specified region names
106
+ # L -- column delimiter for output
107
+ # m -- array of specified months
108
+ # M -- array of specified download methods
109
+ # o -- array of specified orginization types
110
+ # O -- a string of short field names for sorting on
111
+ # q -- array of the specified quarters, normally combined with years
112
+ # s -- size range, arrage of 1 or 2 sizes in unit of MByte
113
+ # S -- array of login names of specialists who owns the datasets
114
+ # t -- array of specified dataset names
115
+ # T -- dataset range, array of 1 or 2 dataset names
116
+ # U -- given unit for file or data sizes
117
+ # w -- generate view without totals
118
+ # y -- array of specified years
119
+ # z -- generate view including entries without usage
120
+
121
+ params = {}
122
+
123
+ # relationship between parameter options and short field names, A option is not
124
+ # related to a field name if it is not in keys %SNS
125
+ SNS = {
126
+ 'c' : 'N', 'd' : 'D', 'D' : 'D', 'e' : 'E', 'i' : 'I', 'k' : 'K', 'm' : 'M',
127
+ 'M' : 'W', 'o' : 'O', 'q' : 'Q', 's' : 'S', 'S' : 'P', 't' : 'T', 'T' : 'T', 'y' : 'Y'
128
+ }
129
+
130
+ tablenames = fieldnames = condition = ''
131
+ sfields = []
132
+ gfields = []
133
+ dfields = []
134
+ pgname = 'viewosdfusage'
135
+
136
+ #
137
+ # main function to run this program
138
+ #
139
+ def main():
140
+
141
+ PgDBI.view_dbinfo()
142
+ argv = sys.argv[1:]
143
+ inputs = []
144
+ option = 'C' # default option
145
+
146
+ for arg in argv:
147
+ if re.match(r'^-.*$', arg):
148
+ curopt = arg[1:2]
149
+ if curopt and VUSG['OPTS'].find(curopt) > -1:
150
+ if VUSG['NOPT'].find(option) > -1:
151
+ params[option] = 1
152
+ elif inputs:
153
+ params[option]= inputs # record input array
154
+ inputs = [] # empty input array
155
+ option = curopt # start a new option
156
+ else:
157
+ PgLOG.pglog(arg + ": Unknown Option", PgLOG.LGWNEX)
158
+ else:
159
+ val = arg
160
+ if val != '!':
161
+ if option == 's':
162
+ val = int(val)*1000000 # convert MBytes to Bytes
163
+ elif option in SNS:
164
+ sfld = SNS[option]
165
+ if VUSG['SFLD'].find(sfld) > -1:
166
+ if VUSG['UFLD'].find(sfld) > -1:
167
+ val = arg.upper() # in case not in upper case
168
+ elif VUSG['LFLD'].find(sfld) > -1:
169
+ val = arg.lower() # in case not in lower case
170
+ if option == 'c':
171
+ val = PgView.get_country_name(val)
172
+ elif option == 't' or option == 'T':
173
+ val = PgUtil.format_dataset_id(val) # add 'ds' if only numbers
174
+ val = "'{}'".format(val)
175
+ inputs.append(val)
176
+
177
+ # record the last option
178
+ if VUSG['NOPT'].find(option) > -1:
179
+ params[option] = 1
180
+ elif inputs:
181
+ params[option] = inputs # record input array
182
+
183
+ if not params:
184
+ PgLOG.show_usage(pgname)
185
+ else:
186
+ check_enough_options()
187
+
188
+ if 'o' not in params:
189
+ if 'e' not in params:
190
+ params['o'] = ['!', "'DSS'"] # default to exclude 'DSS' for organization
191
+ elif params['o'][0] == "'ALL'":
192
+ del params['o']
193
+
194
+ usgtable = "osdfusage"
195
+ build_query_strings(usgtable) # build tablenames, fieldnames, and conditions
196
+ records = PgDBI.pgmget(tablenames, fieldnames, condition, PgLOG.UCLWEX)
197
+ if not records: PgLOG.pglog("No Usage Found For Given Conditions", PgLOG.LGWNEX)
198
+ totals = None if 'w' in params else {}
199
+ if dfields or totals != None:
200
+ records = PgView.compact_hash_groups(records, gfields, sfields, dfields, totals)
201
+ if 'z' in params: records = expand_records(records)
202
+ ostr = params['O'][0] if 'O' in params else params['C'][0]
203
+ records = PgView.order_records(records, ostr.replace('X', ''))
204
+ PgView.simple_output(params, FLDS, records, totals)
205
+
206
+ PgLOG.pgexit(0)
207
+
208
+ #
209
+ # cehck if enough information entered on command line for generate view/report, exit if not
210
+ #
211
+ def check_enough_options():
212
+
213
+ cols = params['C'][0] if 'C' in params else 'X'
214
+ if cols == 'X': PgLOG.pglog("{}: miss field names '{}'".format(pgname, VUSG['SNMS']), PgLOG.LGWNEX)
215
+
216
+ if cols.find('Q') > -1 and cols.find('Y') < 0: # add Y if Q included
217
+ cols = re.sub('Q', 'YQ', cols)
218
+ params['C'][0] = cols
219
+
220
+ for sn in cols:
221
+ if sn == 'X': continue # do not process INDEX field
222
+ if VUSG['SNMS'].find(sn) < 0:
223
+ PgLOG.pglog("{}: Field {} must be in field names '{}X'".format(pgname, sn, VUSG['SNMS']), PgLOG.LGWNEX)
224
+ if 'z' not in params or sn in EXPAND: continue
225
+ fld = FLDS[sn]
226
+ if fld[6] != 'G': continue
227
+ PgLOG.pglog("{}: cannot show zero usage for unexpandable field {} - {}".formt(pgname, sn, fld[0]), PgLOG.LGWNEX)
228
+
229
+ if 'E' in params or 'I' in params:
230
+ if 'z' in params:
231
+ PgLOG.pglog(pgname + ": option -z and -E/-I can not be present at the same time", PgLOG.LGWNEX)
232
+ elif 't' not in params or len(params['t']) > 1:
233
+ PgLOG.pglog(pgname + ": specify one dataset for viewing usage of notified users", PgLOG.LGWNEX)
234
+ elif 'E' in params and 'I' in params:
235
+ PgLOG.pglog(pgname + ": option -E and -I can not be present at the same time", PgLOG.LGWNEX)
236
+
237
+ for opt in params:
238
+ if VUSG['CNDS'].find(opt) > -1: return
239
+ PgLOG.pglog("{}: miss condition options '{}'".format(pgname, VUSG['CNDS']), PgLOG.LGWNEX)
240
+
241
+ #
242
+ # process parameter options to build osdf query strings
243
+ # global variables are used directly and nothing passes in and returns back
244
+ #
245
+ def build_query_strings(usgtable):
246
+
247
+ # initialize query strings
248
+ global condition, fieldnames, tablenames
249
+ joins = groupnames = ''
250
+ tablenames = usgtable
251
+ cols = params['C'][0]
252
+
253
+ if 'U' in params: # reset units for file and read sizes
254
+ if cols.find('B') > -1: FLDS['B'] = PgView.set_data_unit(FLDS['B'], params['U'][0], "sum(size)")
255
+ if cols.find('S') > -1: FLDS['S'] = PgView.set_data_unit(FLDS['S'], params['U'][0], "size")
256
+
257
+ if 'e' in params and 'h' in params: params['e'] = PgView.include_historic_emails(params['e'], 3)
258
+
259
+ for opt in params:
260
+ if opt == 'C': # build field, table and group names
261
+ for sn in cols:
262
+ if sn == 'X': continue # do not process INDEX field
263
+ fld = FLDS[sn]
264
+ if fieldnames: fieldnames += ', '
265
+ fieldnames += "{} {}".format(fld[1], sn) # add to field name string
266
+ (tablenames, joins) = PgView.join_query_tables(fld[3], tablenames, joins, usgtable)
267
+ if fld[6] == 'S':
268
+ sfields.append(sn)
269
+ else:
270
+ if groupnames: groupnames += ', '
271
+ groupnames += sn # add to group name string
272
+ if fld[6] == 'D':
273
+ dfields.append(sn)
274
+ else:
275
+ gfields.append(sn)
276
+ elif opt == 'O':
277
+ continue # order records later
278
+ elif VUSG['CNDS'].find(opt) > -1:
279
+ if VUSG['NOPT'].find(opt) > -1: continue
280
+ sn = SNS[opt]
281
+ fld = FLDS[sn]
282
+ # build having and where conditon strings
283
+ cnd = PgView.get_view_condition(opt, sn, fld, params, VUSG)
284
+ if cnd:
285
+ if condition: condition += ' AND '
286
+ condition += cnd
287
+ (tablenames, joins) = PgView.join_query_tables(fld[3], tablenames, joins, usgtable)
288
+
289
+
290
+ # append joins, group by, order by, and having strings to condition string
291
+ if 'E' in params or 'I' in params:
292
+ (tablenames, joins) = PgView.join_query_tables("emreceive", tablenames, joins, usgtable)
293
+ if joins:
294
+ if condition:
295
+ condition = "{} AND {}".format(joins, condition)
296
+ else:
297
+ condition = joins
298
+ if 'E' in params or 'I' in params:
299
+ condition += PgView.notice_condition(params['E'], None, params['t'][0])
300
+ if groupnames and sfields: condition += " GROUP BY " + groupnames
301
+
302
+
303
+ def expand_records(records):
304
+
305
+ recs = PgView.expand_query("TIME", records, params, EXPAND)
306
+
307
+ trecs = PgView.expand_query("USER", records, params, EXPAND, VUSG, SNS, FLDS)
308
+ recs = PgUtil.crosshash(recs, trecs)
309
+
310
+ trecs = PgView.expand_query("DSID", records, params, EXPAND, VUSG, SNS, FLDS)
311
+ recs = PgUtil.crosshash(recs, trecs)
312
+
313
+ trecs = PgView.expand_query("METHOD", records, params, EXPAND, VUSG, SNS, FLDS)
314
+ recs = PgUtil.crosshash(recs, trecs)
315
+
316
+ return PgUtil.joinhash(records, recs, 0, 1)
317
+
318
+ #
319
+ # call main() to start program
320
+ #
321
+ if __name__ == "__main__": main()
@@ -0,0 +1,190 @@
1
+
2
+ View usage information of OSDF Data Services from information
3
+ stored in PostgreSQL database 'RDADB'.
4
+
5
+ Usage: viewawsusage [-C] ColumnNames [-O OrderColumnNames] [-a] \
6
+ [-A RowLimit] [-c CountryCodes] [-d DateList] \
7
+ [-D StartDate [EndDate]] [-e EMailList] -h \
8
+ [-E StartNoticeDate [EndNoticeDate]] \
9
+ [-i IPAddresses] [-I EmailIDList] \
10
+ [-k RegionNames] [-m MonthList] [-M AccessMethods] \
11
+ [-N MinNumberRead [MaxNumberRead]] \
12
+ [-o OrganizationTypes] \
13
+ [-q QuaterList] [-s MinSize [MaxSize]] \
14
+ [-S SpecialistLoginNames] [-t DatasetList] \
15
+ [-T MinDataset [MaxDataset]] [-y YearList] \
16
+ [-H Title] [-L Delimiter] [-U SizeUnit] \
17
+ [-w] [-z] [> OutputFileName] [| lp -d PrinterName]
18
+
19
+ Specify [-C] ColumnNames, refer to Option -C section for detail
20
+ description, and choose at least one of the condition options, -a, -c,
21
+ -d, -D, -e, -E, -i, -I, -k, -m, -M, -N, -o, -q, -s, -S -t, -T, and -y,
22
+ to run this application.
23
+
24
+ For all condition options, except option -a, an '!' sign can be added
25
+ between an option flag and its option values to get an excluding
26
+ condition. For example, choose '-o ! OrganizationTypes' to gather order
27
+ data usage by users from organization types other than the ones given in
28
+ OrganizationTypes. Refer to the example given at the end of this help
29
+ document for how to select excluding condition.
30
+
31
+ String condition options, -c, -e, -g, -i, -k, -M, -o, -S, and -t, allow
32
+ wildcard inputs. '%' matches any number of characters and '_' matches any one
33
+ character. Refer to the example given at the end of this help document
34
+ for how to use wildcard for string condition options.
35
+
36
+ Output of this application is defaulted to page format with a page
37
+ header on each page. A page header includes main title, sub titles and
38
+ column titles according to which column names and options are selected,
39
+ as well as page number and report date. If the output is used directly
40
+ for input of other applications, add option -w to remove page header
41
+ and show only the column titles and the usage information.
42
+
43
+
44
+ Column Options:
45
+ - Option -C, the ColumnNames must be present to run this application.
46
+ The flag -C can be omitted if it is the first parameter option on
47
+ the command line. The ColumnNames is a string that includes column
48
+ names listed below:
49
+
50
+ COLUMN - COLUMN - COLUMN
51
+ NAME - TITLE - DESCRIPTION
52
+ GroupColumns:
53
+ D*- DATE - format as YYYY-MM-DD, for example 2004-04-25
54
+ E*- EMAIL - user email address
55
+ I*- IP - user IP address
56
+ M*- MONTH - format as YYYY-MM, for example 2004-04
57
+ N*- COUNTRY - country codes users from
58
+ K*- REGION - region names users from
59
+ O*- ORGTYPE - organization types (DSS, NCAR, UNIV and OTHER)
60
+ P*- DSOWNER - login names of specialists who own the datasets
61
+ Q*- QUARTER - quarter of year, 1, 2, 3, or 4
62
+ R*- DSTITLE - dataset titles
63
+ S - BSIZE - size of data read each time, default to Bytes
64
+ T*- DATASET - format as dsnnn.n, for example d540001
65
+ W*- METHOD - access methods
66
+ Y*- YEAR - format as YYYY, for example 2004
67
+
68
+ * - field names can processed with zero usages
69
+ SummaryColumns:
70
+ A - DSCOUNT - number of datasets in given GroupColumns
71
+ B - MBREAD - data sizes, default MB, read by given GroupColumns
72
+ C - #UNIQUSER - number of unique users in in given GroupColumns
73
+ U - #UNIQIP - number of unique users in in given GroupColumns
74
+ H - #READ - number of reads by given GroupColumns
75
+
76
+ IndexColumn:
77
+ X - INDEX - index of line, it should be the first column
78
+
79
+ The column names are used to build up string of ColumnNames, while
80
+ their associated column titles are shown in view/report output of
81
+ this application. The display order of the column titles is
82
+ determined by the order of the column names in the ColumnNames
83
+ string. At least one of the group and summary columns must be
84
+ selected, in the ColumnNames string, to generate all usage
85
+ view/report;
86
+
87
+ For example, choose '-C EMB' to display column titles of EMAIL,
88
+ MONTH and MBREAD, in the first, second and third columns
89
+ respectively, for numbers of MBytes of data read by each user
90
+ in each month;
91
+
92
+ - Option -O, sort data usage information in ascending or descending
93
+ order based on the column names specified in OrderColumnNames
94
+ string. These column names must be in the selected [-C]
95
+ ColumnNames string. If an column name is in upper case, its
96
+ associated column is sorted in ascending order, and a lower
97
+ case means sorting in descending order;
98
+
99
+
100
+ Condition Options:
101
+ - Option -a, for all usage in table 'awsusage';
102
+
103
+ - Option -A, gives a row limit for querrying;
104
+
105
+ - Option -c, for files read by users from given country codes;
106
+
107
+ - Option -d, for data read on given dates, in format YYYY-MM-DD;
108
+
109
+ - Option -D, for data read between two given dates, each date
110
+ is in format YYYY-MM-DD. Omit EndDate for no upper limit;
111
+
112
+ - Option -e, for data read by users with given email addresses;
113
+
114
+ - Option -E, for data read by users who have been notified
115
+ data update of a specified dataset between two given dates,
116
+ each date is in format YYYY-MM-DD. Omit EndNoticeDate for
117
+ no upper limit;
118
+
119
+ - Option -h, works with Option -e to include historical user emails
120
+ registered before;
121
+
122
+ - Option -i, for data read from machines with given IP addresses;
123
+
124
+ - Option -k, for files read by users from given region names;
125
+
126
+ - Option -m, for data read in given months, in format YYYY-MM;
127
+
128
+ - Option -M, for data read via access methods;
129
+
130
+ - Option -N, for files for numbers of read by each group between
131
+ MinNumberRead and MaxNumberRead. Omit MaxNumberRead for no
132
+ upper limit;
133
+
134
+ - Option -o, for data read by users from given orgnization types.
135
+ It defaults to -o ! DSS to exclude usage from DSS specialists;
136
+ Set it to ALL to include all orgnization types;
137
+
138
+ - Option -q, for data read in given quarters;
139
+
140
+ - Option -s, for data sizes, unit of MByte, between MinSize and MaxSize.
141
+ Omit MaxSize for no upper limit;
142
+
143
+ - Option -S, for login names of specialsts who own the datasets;
144
+
145
+ - Option -t, for data associating to given dataset names;
146
+
147
+ - Option -T, for data associating to datasets between
148
+ MinDataset and MaxDataset. Omit MaxDataset for no upper limit.
149
+ For example, -T d540000 d550009, for datasets numbers d540000-d550009;
150
+
151
+ - Option -y, for data read in given years in format YYYY;
152
+
153
+
154
+ Miscellaneous Options:
155
+ - Option -w, view data usage in simple format without totals;
156
+
157
+ - Option -z, include datasets without without usage
158
+
159
+ - Option -H, use given report title to replace the default one;
160
+
161
+ - Option -L, use given delimiter for output, instead of defaulted spaces;
162
+
163
+ - Option -U, show data sizes in given unit SizeUnit [BKMG].
164
+ B - Byte, K - KiloBytes, M - MegaByte, and G - GigaByte;
165
+
166
+ - Option > OutputFilename, redirect output into an output file,
167
+ for example, ordusage.out, instead of viewing on screen directly;
168
+
169
+ - Option | lp -d PrinterName, redirect output to printer of PrinterName.
170
+ Replace PrinterName with lj100 to print through DSS LaserJet printer.
171
+
172
+
173
+ For example:
174
+ To view annual data usage in year 2005 with columns, INDEX(X),
175
+ EMAIL(E), ORGTYPE(O), #READ(H), and MBREAD(B); ordered by ORGTYPE as
176
+ ascending and MBREAD(B) as descending; the command line should be:
177
+
178
+ viewawsusage XEOHB -y 2005 -O Ob
179
+
180
+ For usage by users not in Organization 'DDS', out of the file usage
181
+ gathered above, the command line should be:
182
+
183
+ viewawsusage XEOHB -y 2005 -o ! DSS -O Ob
184
+
185
+ To redirect the previous output to a file named awsusage.out:
186
+
187
+ viewawsusage XEOHB -y 2005 ! DSS -O Ob > awsusage.out
188
+
189
+ Then you can view the file or print it as a report.
190
+
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: rda_python_metrics
3
- Version: 1.0.34
3
+ Version: 1.0.36
4
4
  Summary: RDA Python Package to gather and view data usage metrics
5
5
  Author-email: Zaihua Ji <zji@ucar.edu>
6
6
  Project-URL: Homepage, https://github.com/NCAR/rda-python-metrics
@@ -1,7 +1,7 @@
1
1
  rda_python_metrics/PgIPInfo.py,sha256=CfcnJxD2zHyAhemhTisdnPB72wHwE8MxS0EH4EAKnfE,9151
2
2
  rda_python_metrics/PgView.py,sha256=r6otb3DjfiaQJdg0z8bZQAOlhr4JnrXJzp9wgWh_8qQ,24369
3
3
  rda_python_metrics/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
4
- rda_python_metrics/fillawsusage.py,sha256=Z4rMXINAA8mM-Wbbj4n5ubp8NJegR-JwZR28BieC5Eg,7040
4
+ rda_python_metrics/fillawsusage.py,sha256=bylk7m8VJyXMZMZwyPkU3rQqIdttjkeo8aTYeneqMDQ,7073
5
5
  rda_python_metrics/fillawsusage.usg,sha256=-lLTRLTaEPL2rSQ4mhpIekhyYrLGahy7NWtaq_8PCDM,611
6
6
  rda_python_metrics/fillcdgusage.py,sha256=jJVxQ4d5at0lXXKmV7TVf2GrPWSrJv5XBiHGMc3uxEA,16706
7
7
  rda_python_metrics/fillcdgusage.usg,sha256=5lkd4Zdi72nQcha-JtbaLnxl66V4wBDykKwQtUOtMrw,667
@@ -11,13 +11,13 @@ rda_python_metrics/fillcountry.py,sha256=7i5LNi3scRoyRCT6t7aeNTGKOpxzJ2mA9tnvUqj
11
11
  rda_python_metrics/fillendtime.py,sha256=skZttlpoY19g0dGwqGQI8t_1YPPTPEXwg3EfNlfL90I,2533
12
12
  rda_python_metrics/fillgdexusage.py,sha256=8KR5Lt30VCTxWOtc9EStLFzq5qa2di8RRQ3qMuIvSYY,37818
13
13
  rda_python_metrics/fillgdexusage.usg,sha256=mVYtK0pIYmvma0skT-wXM-NOEpkN_i3E61UdWgQWLfs,648
14
- rda_python_metrics/fillglobususage.py,sha256=ahz8XnnJdD_AbSYqJ34lWmDuzws_-SNmCR8QE20aovA,8539
14
+ rda_python_metrics/fillglobususage.py,sha256=zuxzoeV9BAMoVUu_VCYECPddYNrBWpV74kkYpyqIQhg,8443
15
15
  rda_python_metrics/fillglobususage.usg,sha256=1GgmCP22IQZdADwL5Mmkz3v8Ws-G7U3teQ1AxRJfV_4,637
16
16
  rda_python_metrics/fillipinfo.py,sha256=BrboxAIs8Q5jhz_4GYW_GibMT5GyEbnQkYfFR8hc_yo,6747
17
17
  rda_python_metrics/fillipinfo.usg,sha256=YeCR58xGv0emqHUZ_9R977HrqaeBwbd6j5QRF2Lc7TA,702
18
18
  rda_python_metrics/filloneorder.py,sha256=ADHbcKCDh9bJunnxYbkbjwU2QpC43hvGlLWaURHNxkg,5433
19
19
  rda_python_metrics/filloneorder.usg,sha256=mtOySKx6-D4k2bbTcmi6cSYtINiycRyHQkHozi0CQu0,1466
20
- rda_python_metrics/fillosdfusage.py,sha256=Mpd2qkLQmOLqfahCI89kXIR9FpOKoQdyfrU86AE_dd0,9257
20
+ rda_python_metrics/fillosdfusage.py,sha256=--c6PzeZ6EZXpb0zxy4lZCDjwwIBb7c6xgP7QCKLoMQ,7087
21
21
  rda_python_metrics/fillosdfusage.usg,sha256=Qc5YdUuOiAH74FfVdkwkrQRDYXcASVbxMdBsVQj4X1k,635
22
22
  rda_python_metrics/fillrdadb.py,sha256=cb6upPApAZloOouUSzZZmjGvOsYT_Nzh9Lz926WE3ZQ,5333
23
23
  rda_python_metrics/fillrdadb.usg,sha256=E_Bf4G8yVABogjRmIOaIbTGgnII2W6RltaFad2XEV2Q,1228
@@ -32,12 +32,16 @@ rda_python_metrics/pgsyspath.py,sha256=DZhFp-r-LzN0qrHtfdCYfnGDnmD_R4ufuEduk0_vR
32
32
  rda_python_metrics/pgusername.py,sha256=VoNJfXBsyzdfz49qwCypnUsqcDm4cUaqOYViJ-jzaKI,1265
33
33
  rda_python_metrics/viewallusage.py,sha256=DlAvY2bieJWrrrYMPhCkKWgqdfSN492_LZCS95BhY0A,15753
34
34
  rda_python_metrics/viewallusage.usg,sha256=Vfwc6aNIkpe1FBdBsV2htyUESgmx6ODhMMwAb1NX8-0,10297
35
+ rda_python_metrics/viewawsusage.py,sha256=jSjudneYBfR5a9565i6Qe0GeKjIxAet9HZfOAfOOjs4,14128
36
+ rda_python_metrics/viewawsusage.usg,sha256=LbolAFqpXYjMdk_dMWKO3SFOTORoGc-LmLv2kI1J_nI,8843
35
37
  rda_python_metrics/viewcheckusage.py,sha256=HougqjDAOVG6pYglFjyHQ-UdLBcYe7v_jzU1-80RqFA,12996
36
38
  rda_python_metrics/viewcheckusage.usg,sha256=KuJFycggGiUcSezQ9vywDbituvu63SZ-ZnNTaMpbc-A,8930
37
39
  rda_python_metrics/viewcodusage.py,sha256=6Shmbzq_DNh0uvT9lPxpB8ic2JnrmQzmR7Bc-9U4gl0,14243
38
40
  rda_python_metrics/viewcodusage.usg,sha256=WH3gSml94_jbm20kqAUFUSnla4JQZrseTZe24mmlDEA,8788
39
41
  rda_python_metrics/viewordusage.py,sha256=cnZMSfxWlCNbh1Ck3LfC0wQxS6HCv6TBVOnOVzRU65E,15484
40
42
  rda_python_metrics/viewordusage.usg,sha256=19tHhPZB9y247BddfwtXA3_K50BnzEJcWUbBNw-3NPU,10568
43
+ rda_python_metrics/viewosdfusage.py,sha256=xpSi1VcjtEmfF9fAwSOeGLYYj3j972bX6kj0fZUL6Q4,14159
44
+ rda_python_metrics/viewosdfusage.usg,sha256=J9dE98j3BUWGej6kIXz46Pl7nntsTBoJvxFcb6mqTb0,8844
41
45
  rda_python_metrics/viewrqstusage.py,sha256=wNH5DTEBYrUQKAms10weBH939r-m3tLXXg5PwS6bzlk,16690
42
46
  rda_python_metrics/viewrqstusage.usg,sha256=Ii5-7h_RO2rkoE9VLxuLhc9klgkEJSqHoDrsOlQOTKo,10481
43
47
  rda_python_metrics/viewtdsusage.py,sha256=nmtH4d7pPqSwLoAlocb5UTk0W38TT57gWmCyuKL4bF8,14505
@@ -46,9 +50,9 @@ rda_python_metrics/viewwebfile.py,sha256=HSMNkQQawonu6W3blV7g9UbJuNy9VAOn9COqgmj
46
50
  rda_python_metrics/viewwebfile.usg,sha256=lTNi8Yu8BUJuExEDJX-vsJyWUSUIQTS-DiiBEVFo33s,10054
47
51
  rda_python_metrics/viewwebusage.py,sha256=ES2lI8NaCeCpTGi94HU-cDRBxHMiUBbplyYsZf2KqF0,16650
48
52
  rda_python_metrics/viewwebusage.usg,sha256=OVDZ78p87E3HLW34ZhasNJ7Zmw8XXjmZPPWZfRhPLXo,9936
49
- rda_python_metrics-1.0.34.dist-info/licenses/LICENSE,sha256=1dck4EAQwv8QweDWCXDx-4Or0S8YwiCstaso_H57Pno,1097
50
- rda_python_metrics-1.0.34.dist-info/METADATA,sha256=1nqVoglplCc6y7KSpCU4ovnakC7eWfz952a5oZOE0bc,761
51
- rda_python_metrics-1.0.34.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
52
- rda_python_metrics-1.0.34.dist-info/entry_points.txt,sha256=ctvX0Gx9zdnKpHx5sjZdl7_sLSR80LKYhPky9qkEpug,1239
53
- rda_python_metrics-1.0.34.dist-info/top_level.txt,sha256=aoBgbR_o70TP0QmMW0U6inRHYtfKld47OBmnWnLnDOs,19
54
- rda_python_metrics-1.0.34.dist-info/RECORD,,
53
+ rda_python_metrics-1.0.36.dist-info/licenses/LICENSE,sha256=1dck4EAQwv8QweDWCXDx-4Or0S8YwiCstaso_H57Pno,1097
54
+ rda_python_metrics-1.0.36.dist-info/METADATA,sha256=JyzrAexSHYqltfa28z8N-MH9-6U0Ve2mBVFlwjX05hk,761
55
+ rda_python_metrics-1.0.36.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
56
+ rda_python_metrics-1.0.36.dist-info/entry_points.txt,sha256=_Cw8KvtYwxLPt3kLJHwQ1r2swsKKbXk-2zYPZAg2czc,1345
57
+ rda_python_metrics-1.0.36.dist-info/top_level.txt,sha256=aoBgbR_o70TP0QmMW0U6inRHYtfKld47OBmnWnLnDOs,19
58
+ rda_python_metrics-1.0.36.dist-info/RECORD,,
@@ -16,9 +16,11 @@ logarch.py = rda_python_metrics.logarch:main
16
16
  pgperson = rda_python_metrics.pgperson:main
17
17
  pgusername = rda_python_metrics.pgusername:main
18
18
  viewallusage = rda_python_metrics.viewallusage:main
19
+ viewawsusage = rda_python_metrics.viewawsusage:main
19
20
  viewcheckusage = rda_python_metrics.viewcheckusage:main
20
21
  viewcodusage = rda_python_metrics.viewcodusage:main
21
22
  viewordusage = rda_python_metrics.viewordusage:main
23
+ viewosdfusage = rda_python_metrics.viewosdfusage:main
22
24
  viewrqstusage = rda_python_metrics.viewrqstusage:main
23
25
  viewtdsusage = rda_python_metrics.viewtdsusage:main
24
26
  viewwebfile = rda_python_metrics.viewwebfile:main