PyPI - rda-python-metrics - Versions diffs - 1.0.34__tar.gz → 1.0.36__tar.gz - Mend

rda-python-metrics 1.0.34tar.gz → 1.0.36tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of rda-python-metrics might be problematic. Click here for more details.

Files changed (65) hide show

{rda_python_metrics-1.0.34/src/rda_python_metrics.egg-info → rda_python_metrics-1.0.36}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: rda_python_metrics
-Version: 1.0.34
+Version: 1.0.36
 Summary: RDA Python Package to gather and view data usage metrics
 Author-email: Zaihua Ji <zji@ucar.edu>
 Project-URL: Homepage, https://github.com/NCAR/rda-python-metrics

{rda_python_metrics-1.0.34 → rda_python_metrics-1.0.36}/pyproject.toml RENAMED Viewed

@@ -6,7 +6,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "rda_python_metrics"
-version = "1.0.34"
+version = "1.0.36"
 authors = [
   { name="Zaihua Ji",  email="zji@ucar.edu" },
 ]
@@ -54,9 +54,11 @@ pythonpath = [
 "pgperson" = "rda_python_metrics.pgperson:main"
 "pgusername" = "rda_python_metrics.pgusername:main"
 "viewallusage" = "rda_python_metrics.viewallusage:main"
+"viewawsusage" = "rda_python_metrics.viewawsusage:main"
 "viewcheckusage" = "rda_python_metrics.viewcheckusage:main"
 "viewcodusage" = "rda_python_metrics.viewcodusage:main"
 "viewordusage" = "rda_python_metrics.viewordusage:main"
+"viewosdfusage" = "rda_python_metrics.viewosdfusage:main"
 "viewrqstusage" = "rda_python_metrics.viewrqstusage:main"
 "viewtdsusage" = "rda_python_metrics.viewtdsusage:main"
 "viewwebfile" = "rda_python_metrics.viewwebfile:main"

{rda_python_metrics-1.0.34 → rda_python_metrics-1.0.36}/src/rda_python_metrics/fillawsusage.py RENAMED Viewed

@@ -94,7 +94,7 @@ def get_log_file_names(option, params):
       else:
          pdate = PgUtil.format_date(params[0])
          if len(params) > 1:
-            edate = PgUtil.format_date(params[1])
+            edate = PgUtil.adddate(PgUtil.format_date(params[1]), 0, 0, 1)
          else:
             edate = PgUtil.curdate()
       while pdate < edate:
@@ -114,13 +114,14 @@ def fill_aws_usages(filenames):
    year = cntall = addall = 0
    for pdate in filenames:
       fnames = filenames[pdate]
+      fcnt = len(fnames)
+      PgLOG.pglog("{}: Gathering AWS usage info from {} log files at {}".format(pdate, fcnt, PgLOG.current_datetime()), PgLOG.LOGWRN)
       records = {}
       cntadd = entcnt = 0
       for logfile in fnames:
          if not op.isfile(logfile):
             PgLOG.pglog("{}: Not exists for Gathering AWS usage".format(logfile), PgLOG.LOGWRN)
             continue
-         PgLOG.pglog("Gathering AWS usage info from {} at {}".format(logfile, PgLOG.current_datetime()), PgLOG.LOGWRN)
          aws = PgFile.open_local_file(logfile)
          if not aws: continue
          while True:
@@ -184,7 +185,7 @@ def add_usage_records(records, year):
    cnt = 0
    for key in records:
       record = records[key]
-      cond = "date = '{}' AND time = '{}' AND ip = '{}' AND dsid = '{}'".format(record['date'], record['time'], record['ip'], record['dsid'])
+      cond = "date = '{}' AND time = '{}' AND ip = '{}'".format(record['date'], record['time'], record['ip'])
       if PgDBI.pgget(USAGE['PGTBL'], '', cond, PgLOG.LGEREX): continue
       if add_to_allusage(year, record):
          cnt += PgDBI.pgadd(USAGE['PGTBL'], record, PgLOG.LOGWRN)

{rda_python_metrics-1.0.34 → rda_python_metrics-1.0.36}/src/rda_python_metrics/fillglobususage.py RENAMED Viewed

@@ -147,18 +147,14 @@ def fill_globus_usages(fnames, datelimits):
          locflag = 'O' if re.match(r'^https://stratus\.', sline) else 'G'
          idx = wfile.find('?')
          if idx > -1: wfile = wfile[:idx]
-         if re.match(r'^curl', engine, re.I):
-            method = "CURL"
-         elif re.match(r'^wget', engine, re.I):
-            method = "WGET"
-         elif re.match(r'^python', engine, re.I):
-            method = "PYTHN"
+         moff = engine.find('/')
+         if moff > 0:
+            if moff > 20: moff = 20
+            method = engine[0:moff].upper()
          else:
             method = "WEB"
          key = "{}:{}:{}".format(ip, dsid, wfile) if stat == '206' else None
          if record:
             if key == pkey:
                record['size'] += size

{rda_python_metrics-1.0.34 → rda_python_metrics-1.0.36}/src/rda_python_metrics/fillosdfusage.py RENAMED Viewed

@@ -185,7 +185,7 @@ def add_usage_records(records, year):
    cnt = 0
    for key in records:
       record = records[key]
-      cond = "date = '{}' AND time = '{}' AND ip = '{}' AND dsid = '{}'".format(record['date'], record['time'], record['ip'], record['dsid'])
+      cond = "date = '{}' AND time = '{}' AND ip = '{}'".format(record['date'], record['time'], record['ip'])
       if PgDBI.pgget(USAGE['OSDFTBL'], '', cond, PgLOG.LGEREX): continue
       if add_to_allusage(year, record):
          cnt += PgDBI.pgadd(USAGE['OSDFTBL'], record, PgLOG.LOGWRN)
@@ -203,69 +203,6 @@ def add_to_allusage(year, pgrec):
    return PgDBI.add_yearly_allusage(year, record)
-#
-# Fill usage of a single online data file into table dssdb.wusage of DSS PgSQL database
-#
-def add_file_usage(year, logrec):
-   pgrec = get_wfile_wid(logrec['dsid'], logrec['wfile'])
-   if not pgrec: return 0
-   table = "{}_{}".format(USAGE['OSDFTBL'], year)
-   cond = "wid = {} AND method = '{}' AND date_read = '{}' AND time_read = '{}'".format(pgrec['wid'], logrec['method'], logrec['date'], logrec['time'])
-   if PgDBI.pgget(USAGE['OSDFTBL'], "", cond, PgLOG.LOGWRN): return 0
-   wurec =  PgIPInfo.get_wuser_record(logrec['ip'], logrec['date'])
-   if not wurec: return 0
-   record = {'wid' : pgrec['wid'], 'dsid' : pgrec['dsid']}
-   record['wuid_read'] = wurec['wuid']
-   record['date_read'] = logrec['date']
-   record['time_read'] = logrec['time']
-   record['size_read'] = logrec['size']
-   record['method'] = logrec['method']
-   record['locflag'] = logrec['locflag']
-   record['ip'] = logrec['ip']
-   record['quarter'] = logrec['quarter']
-   if add_to_allusage(year, logrec, wurec):
-      return PgDBI.add_yearly_wusage(year, record)
-   else:
-      return 0
-def add_to_allusage(year, logrec, wurec):
-   pgrec = {'email' : wurec['email'], 'org_type' : wurec['org_type'],
-            'country' : wurec['country'], 'region' : wurec['region']}
-   pgrec['dsid'] = logrec['dsid']
-   pgrec['date'] = logrec['date']
-   pgrec['quarter'] = logrec['quarter']
-   pgrec['time'] = logrec['time']
-   pgrec['size'] = logrec['size']
-   pgrec['method'] = logrec['method']
-   pgrec['ip'] = logrec['ip']
-   pgrec['source'] = 'P'
-   return PgDBI.add_yearly_allusage(year, pgrec)
-#
-# return wfile.wid upon success, 0 otherwise
-#
-def get_wfile_wid(dsid, wfile):
-   wfcond = "wfile = '{}'".format(wfile)
-   pgrec = PgSplit.pgget_wfile(dsid, "*", wfcond)
-   if pgrec:
-      pgrec['dsid'] = dsid
-   else:
-      pgrec = PgDBI.pgget("wfile_delete", "*", "{} AND dsid = '{}'".format(wfcond, dsid))
-      if not pgrec:
-         pgrec = PgDBI.pgget("wmove", "wid, dsid", wfcond)
-         if pgrec:
-            pgrec = PgSplit.pgget_wfile(pgrec['dsid'], "*", "wid = {}".format(pgrec['wid']))
-            if pgrec: pgrec['dsid'] = dsid
-   return pgrec
 #
 # call main() to start program
 #

rda_python_metrics-1.0.36/src/rda_python_metrics/viewawsusage.py ADDED Viewed

@@ -0,0 +1,321 @@
+#!/usr/bin/env python3
+#
+###############################################################################
+#
+#     Title : viewawsusage
+#    Author : Zaihua Ji,  zji@ucar.edu
+#      Date : 2025-08-13
+#   Purpose : python program to view aws usage information
+#
+#    Github : https://github.com/NCAR/rda-python-metrics.git
+#
+###############################################################################
+#
+import os
+import re
+import sys
+from rda_python_common import PgLOG
+from rda_python_common import PgUtil
+from rda_python_common import PgDBI
+from . import PgView
+VUSG = {
+   'SNMS' : "ABCDEHIKMNOPQRSTUWY",           # all available short field names in %FLDS
+   'OPTS' : 'AabcCdDeEhHikLmMnoOqsStTUwyz',  # all available options, used for %params
+   'NOPT' : 'abhnwz',                        # stand alone option without inputs
+   'ACND' : 'cdeiIkmMoqSty',                 # available array condition options
+   'RCND' : 'DEsT',                          # available range condition options
+   'CNDS' : 'acdDeEiIkmMnoqsStTy',           # condition options, ACND, RCND and 'a'
+   'ECND' : 'my',                            # condition options need evaluating
+   'SFLD' : 'DEIKNOTUW',                     # string fields, to be quoted in condition
+   'UFLD' : 'NO',                            # string fields must be in upper case
+   'LFLD' : 'EMPT'                           # string fields must be in lower case
+}
+# keys %FLDS - short field names
+# column 0   - column title showing in usage view
+# column 1   - field name in format as shown in select clauses
+# column 2   - field name shown in where condition query string
+# column 3   - table name that the field belongs to
+# column 4   - output field length, the longer one of data size and comlun title, determine
+#              dynamically if it is 0. Negative values indicate right justification
+# column 5   - precision for floating point value if positive and show total value if not zero
+# column 6   - field flag to indicate it is a group, distinct or sum field
+FLDS = {
+# SHRTNM COLUMNNANE   FIELDNAME                         CNDNAME       TBLNAM        Size Prc Grp/Sum
+   'D' : ['DATE',      "date",                           'date',       'awsusage',  10,   0,  'G'],
+   'E' : ['EMAIL',     "awsusage.email",        'awsusage.email',      'awsusage',   0,   0,  'G'],
+   'I' : ['IP',        "ip",                             'ip',         'awsusage',   0,   0,  'G'],
+   'M' : ['MONTH',     PgDBI.fmtym("date"),              'date',       'awsusage',   7,   0,  'G'],
+   'N' : ['COUNTRY',   "country",                        'country',    'awsusage',   0,   0,  'G'],
+   'K' : ['REGION',    "region",                         'region',     'awsusage',   0,   0,  'G'],
+   'O' : ['ORGTYPE',   "org_type",                       'org_type',   'awsusage',   7,   0,  'G'],
+   'P' : ['DSOWNER',   "specialist",                     'specialist', 'dsowner',    8,   0,  'G'],
+   'Q' : ['QUARTER',   "quarter",                        'quarter',    'awsusage',   7,   0,  'G'],
+   'R' : ['DSTITLE',   "search.datasets.title", 'search.datasets.title',   'search.datasets',   0,   0,  'G'],
+   'S' : ['BYTESIZE',  "size",                           'size',       'awsusage', -14,  -1,  'G'],
+   'T' : ['DATASET',   "awsusage.dsid",         'awsusage.dsid',       'awsusage',   0,   0,  'G'],
+   'W' : ['METHOD',    "method",                         'method',      'awsusage',  0,   0,  'G'],
+   'Y' : ['YEAR',      PgDBI.fmtyr("date"),              'date',       'awsusage',   4,   0,  'G'],
+   'A' : ['DSCOUNT',   "awsusage.dsid",                  'A',          'awsusage',  -7,  -1,  'D'],
+   'B' : ['MBYTEREAD', "round(sum(size)/(1000000), 4)",  'B',          'awsusage', -14,   3,  'S'],
+   'C' : ['#UNIQUSER', "awsusage.email",                 'C',          'awsusage',  -9,  -1,  'D'],
+   'U' : ['#UNIQIP',   "awsusage.ip",                    'U',          'awsusage',  -7,  -1,  'D'],
+   'H' : ['#ACCESS',   "sum(fcount)",                    'H',          'awsusage',  -8,  -1,  'S'],
+   'X' : ['INDEX',     "",                               'X',          '',          -6,   0,  ' ']
+}
+# keys %EXPAND - short field names allow zero usage
+# column 0   - expand ID for group of fields
+# column 1   - field name shown in where condition query string
+# column 2   - field name in format as shown in select clauses
+# column 3   - table name that the field belongs to
+EXPAND = {
+# SHRTNM EXPID     CNDSTR    FIELDNAME       TBLNAM
+   'D' : ["TIME",   "dDmy"],
+   'M' : ["TIME",   "dDmy"],
+   'Q' : ["TIME",   "dDmy"],
+   'Y' : ["TIME",   "dDmy"],
+   'E' : ["USER",   "ecko",  "email",        "wuser",  "user"],
+   'O' : ["USER",   "ecko",  "org_type",     "wuser",  "user"],
+   'N' : ["USER",   "ecko",  "country",      "wuser",  "user"],
+   'K' : ["USER",   "ecko",  "region",       "wuser",  "user"],
+   'R' : ["DSID",   "StT", "search.datasets.title", "search.datasets"],
+   'T' : ["DSID",   "StT", "dataset.dsid",   "dataset"],
+   'P' : ["DSID",   "StT", "specialist",     "dsowner"],
+   'W' : ["METHOD", "M",      "method",       "awsusage"]
+}
+# valid options for %params, a hash array of command line parameters
+#   a -- 1 to view all usage info available
+#   A -- number or records to return
+#   c -- array of specified country codes
+#   C -- a string of short field names for viewing usages
+#   d -- array of specified dates
+#   D -- dates range, array of 1 or 2 dates in format of YYYY-MM-DD
+#   e -- array of specified email addresses
+#   E -- use given date or date range for email notice of data update
+#   h -- for give emails, include their histical emails registered before
+#   H -- a string of report title to replace the default one
+#   i -- array of specified IP addresses
+#   I -- use given email IDs for email notice of data update
+#   k -- array of specified region names
+#   L -- column delimiter for output
+#   m -- array of specified months
+#   M -- array of specified download methods
+#   o -- array of specified orginization types
+#   O -- a string of short field names for sorting on
+#   q -- array of the specified quarters, normally combined with years
+#   s -- size range, arrage of 1 or 2 sizes in unit of MByte
+#   S -- array of login names of specialists who owns the datasets
+#   t -- array of specified dataset names
+#   T -- dataset range, array of 1 or 2 dataset names
+#   U -- given unit for file or data sizes
+#   w -- generate view without totals
+#   y -- array of specified years
+#   z -- generate view including entries without usage
+params = {}
+# relationship between parameter options and short field names, A option is not
+# related to a field name if it is not in keys %SNS
+SNS = {
+   'c' : 'N', 'd' : 'D', 'D' : 'D', 'e' : 'E', 'i' : 'I', 'k' : 'K', 'm' : 'M',
+   'M' : 'W', 'o' : 'O', 'q' : 'Q', 's' : 'S', 'S' : 'P', 't' : 'T', 'T' : 'T', 'y' : 'Y'
+}
+tablenames = fieldnames = condition = ''
+sfields = []
+gfields = []
+dfields = []
+pgname = 'viewawsusage'
+#
+# main function to run this program
+#
+def main():
+   PgDBI.view_dbinfo()
+   argv = sys.argv[1:]
+   inputs = []
+   option = 'C'   # default option
+   for arg in argv:
+      if re.match(r'^-.*$', arg):
+         curopt = arg[1:2]
+         if curopt and VUSG['OPTS'].find(curopt) > -1:
+            if VUSG['NOPT'].find(option) > -1:
+               params[option] = 1
+            elif inputs:
+               params[option]= inputs   # record input array
+               inputs = []      # empty input array
+            option = curopt     # start a new option
+         else:
+            PgLOG.pglog(arg + ": Unknown Option", PgLOG.LGWNEX)
+      else:
+         val = arg
+         if val != '!':
+            if option == 's':
+               val = int(val)*1000000    # convert MBytes to Bytes
+            elif option in SNS:
+               sfld = SNS[option]
+               if VUSG['SFLD'].find(sfld) > -1:
+                  if VUSG['UFLD'].find(sfld) > -1:
+                     val = arg.upper()     # in case not in upper case
+                  elif VUSG['LFLD'].find(sfld) > -1:
+                     val = arg.lower()     # in case not in lower case
+                  if option == 'c':
+                     val = PgView.get_country_name(val)
+                  elif option == 't' or option == 'T':
+                     val = PgUtil.format_dataset_id(val)   # add 'ds' if only numbers
+                  val = "'{}'".format(val)
+         inputs.append(val)
+   # record the last option
+   if VUSG['NOPT'].find(option) > -1:
+      params[option] = 1
+   elif inputs:
+      params[option] = inputs   # record input array
+   if not params:
+      PgLOG.show_usage(pgname)
+   else:
+      check_enough_options()
+   if 'o' not in params:
+      if 'e' not in params:
+         params['o'] = ['!', "'DSS'"]   # default to exclude 'DSS' for organization
+   elif params['o'][0] == "'ALL'":
+      del params['o']
+   usgtable = "awsusage"
+   build_query_strings(usgtable)  # build tablenames, fieldnames, and conditions
+   records = PgDBI.pgmget(tablenames, fieldnames, condition, PgLOG.UCLWEX)
+   if not records: PgLOG.pglog("No Usage Found For Given Conditions", PgLOG.LGWNEX)
+   totals = None if 'w' in params else {}
+   if dfields or totals != None:
+      records = PgView.compact_hash_groups(records, gfields, sfields, dfields, totals)
+   if 'z' in params: records = expand_records(records)
+   ostr = params['O'][0] if 'O' in params else params['C'][0]
+   records = PgView.order_records(records, ostr.replace('X', ''))
+   PgView.simple_output(params, FLDS, records, totals)
+   PgLOG.pgexit(0)
+#
+# cehck if enough information entered on command line for generate view/report, exit if not
+#
+def check_enough_options():
+   cols = params['C'][0] if 'C' in params else 'X'
+   if cols == 'X': PgLOG.pglog("{}: miss field names '{}'".format(pgname, VUSG['SNMS']), PgLOG.LGWNEX)
+   if cols.find('Q') > -1 and cols.find('Y') < 0:   # add Y if Q included
+      cols = re.sub('Q', 'YQ', cols)
+      params['C'][0] = cols
+   for sn in cols:
+      if sn == 'X': continue  # do not process INDEX field
+      if VUSG['SNMS'].find(sn) < 0:
+         PgLOG.pglog("{}: Field {} must be in field names '{}X'".format(pgname, sn, VUSG['SNMS']), PgLOG.LGWNEX)
+      if 'z' not in params or sn in EXPAND: continue
+      fld = FLDS[sn]
+      if fld[6] != 'G': continue
+      PgLOG.pglog("{}: cannot show zero usage for unexpandable field {} - {}".formt(pgname, sn, fld[0]), PgLOG.LGWNEX)
+   if 'E' in params or 'I' in params:
+      if 'z' in params:
+         PgLOG.pglog(pgname + ": option -z and -E/-I can not be present at the same time", PgLOG.LGWNEX)
+      elif 't' not in params or len(params['t']) > 1:
+         PgLOG.pglog(pgname + ": specify one dataset for viewing usage of notified users", PgLOG.LGWNEX)
+      elif 'E' in params and 'I' in params:
+         PgLOG.pglog(pgname + ": option -E and -I can not be present at the same time", PgLOG.LGWNEX)
+   for opt in params:
+      if VUSG['CNDS'].find(opt) > -1: return
+   PgLOG.pglog("{}: miss condition options '{}'".format(pgname, VUSG['CNDS']), PgLOG.LGWNEX)
+#
+# process parameter options to build aws query strings
+# global variables are used directly and nothing passes in and returns back
+#
+def build_query_strings(usgtable):
+   # initialize query strings
+   global condition, fieldnames, tablenames
+   joins = groupnames = ''
+   tablenames = usgtable
+   cols = params['C'][0]
+   if 'U' in params:    # reset units for file and read sizes
+      if cols.find('B') > -1: FLDS['B'] = PgView.set_data_unit(FLDS['B'], params['U'][0], "sum(size)")
+      if cols.find('S') > -1: FLDS['S'] = PgView.set_data_unit(FLDS['S'], params['U'][0], "size")
+   if 'e' in params and 'h' in params: params['e'] = PgView.include_historic_emails(params['e'], 3)
+   for opt in params:
+      if opt == 'C':   # build field, table and group names
+         for sn in cols:
+            if sn == 'X': continue  # do not process INDEX field
+            fld = FLDS[sn]
+            if fieldnames: fieldnames += ', '
+            fieldnames += "{} {}".format(fld[1], sn)   # add to field name string
+            (tablenames, joins) = PgView.join_query_tables(fld[3], tablenames, joins, usgtable)
+            if fld[6] == 'S':
+               sfields.append(sn)
+            else:
+               if groupnames: groupnames += ', '
+               groupnames += sn     # add to group name string
+               if fld[6] == 'D':
+                  dfields.append(sn)
+               else:
+                  gfields.append(sn)
+      elif opt == 'O':
+         continue   # order records later
+      elif VUSG['CNDS'].find(opt) > -1:
+         if VUSG['NOPT'].find(opt) > -1: continue
+         sn = SNS[opt]
+         fld = FLDS[sn]
+         # build having and where conditon strings
+         cnd = PgView.get_view_condition(opt, sn, fld, params, VUSG)
+         if cnd:
+            if condition: condition += ' AND '
+            condition += cnd
+            (tablenames, joins) = PgView.join_query_tables(fld[3], tablenames, joins, usgtable)
+   # append joins, group by, order by, and having strings to condition string
+   if 'E' in params or 'I' in params:
+      (tablenames, joins) = PgView.join_query_tables("emreceive", tablenames, joins, usgtable)
+   if joins:
+      if condition:
+         condition = "{} AND {}".format(joins, condition)
+      else:
+         condition = joins
+   if 'E' in params or 'I' in params:
+      condition += PgView.notice_condition(params['E'], None, params['t'][0])
+   if groupnames and sfields: condition += " GROUP BY " + groupnames
+def expand_records(records):
+   recs = PgView.expand_query("TIME", records, params, EXPAND)
+   trecs = PgView.expand_query("USER", records, params, EXPAND, VUSG, SNS, FLDS)
+   recs = PgUtil.crosshash(recs, trecs)
+   trecs = PgView.expand_query("DSID", records, params, EXPAND, VUSG, SNS, FLDS)
+   recs = PgUtil.crosshash(recs, trecs)
+   trecs = PgView.expand_query("METHOD", records, params, EXPAND, VUSG, SNS, FLDS)
+   recs = PgUtil.crosshash(recs, trecs)
+   return PgUtil.joinhash(records, recs, 0, 1)
+#
+# call main() to start program
+#
+if __name__ == "__main__": main()

rda-python-metrics 1.0.34__tar.gz → 1.0.36__tar.gz

Potentially problematic release.

rda-python-metrics 1.0.34tar.gz → 1.0.36tar.gz