rda-python-metrics 1.0.51__tar.gz → 2.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. {rda_python_metrics-1.0.51/src/rda_python_metrics.egg-info → rda_python_metrics-2.0.0}/PKG-INFO +1 -1
  2. {rda_python_metrics-1.0.51 → rda_python_metrics-2.0.0}/pyproject.toml +1 -1
  3. rda_python_metrics-2.0.0/src/rda_python_metrics/fillawsusage.py +191 -0
  4. rda_python_metrics-2.0.0/src/rda_python_metrics/fillcdgusage.py +421 -0
  5. rda_python_metrics-2.0.0/src/rda_python_metrics/fillcodusage.py +220 -0
  6. rda_python_metrics-2.0.0/src/rda_python_metrics/fillcountry.py +71 -0
  7. rda_python_metrics-2.0.0/src/rda_python_metrics/fillendtime.py +68 -0
  8. rda_python_metrics-2.0.0/src/rda_python_metrics/fillglobususage.py +231 -0
  9. rda_python_metrics-2.0.0/src/rda_python_metrics/fillipinfo.py +200 -0
  10. rda_python_metrics-2.0.0/src/rda_python_metrics/filloneorder.py +138 -0
  11. rda_python_metrics-2.0.0/src/rda_python_metrics/fillosdfusage.py +194 -0
  12. rda_python_metrics-2.0.0/src/rda_python_metrics/fillrdadb.py +129 -0
  13. rda_python_metrics-2.0.0/src/rda_python_metrics/filltdsusage.py +213 -0
  14. rda_python_metrics-2.0.0/src/rda_python_metrics/filluser.py +193 -0
  15. rda_python_metrics-2.0.0/src/rda_python_metrics/logarch.py +342 -0
  16. rda_python_metrics-2.0.0/src/rda_python_metrics/pg_ipinfo.py +260 -0
  17. rda_python_metrics-2.0.0/src/rda_python_metrics/pg_view.py +678 -0
  18. rda_python_metrics-2.0.0/src/rda_python_metrics/pgperson.py +74 -0
  19. rda_python_metrics-2.0.0/src/rda_python_metrics/pgusername.py +54 -0
  20. rda_python_metrics-2.0.0/src/rda_python_metrics/viewallusage.py +321 -0
  21. rda_python_metrics-2.0.0/src/rda_python_metrics/viewawsusage.py +286 -0
  22. rda_python_metrics-2.0.0/src/rda_python_metrics/viewcheckusage.py +263 -0
  23. rda_python_metrics-2.0.0/src/rda_python_metrics/viewcodusage.py +286 -0
  24. rda_python_metrics-2.0.0/src/rda_python_metrics/viewordusage.py +306 -0
  25. rda_python_metrics-2.0.0/src/rda_python_metrics/viewosdfusage.py +286 -0
  26. rda_python_metrics-2.0.0/src/rda_python_metrics/viewrqstusage.py +327 -0
  27. rda_python_metrics-2.0.0/src/rda_python_metrics/viewtdsusage.py +291 -0
  28. rda_python_metrics-2.0.0/src/rda_python_metrics/viewwebfile.py +269 -0
  29. rda_python_metrics-2.0.0/src/rda_python_metrics/viewwebusage.py +336 -0
  30. {rda_python_metrics-1.0.51 → rda_python_metrics-2.0.0/src/rda_python_metrics.egg-info}/PKG-INFO +1 -1
  31. {rda_python_metrics-1.0.51 → rda_python_metrics-2.0.0}/src/rda_python_metrics.egg-info/SOURCES.txt +27 -0
  32. {rda_python_metrics-1.0.51 → rda_python_metrics-2.0.0}/tests/test_metrics.py +4 -0
  33. {rda_python_metrics-1.0.51 → rda_python_metrics-2.0.0}/LICENSE +0 -0
  34. {rda_python_metrics-1.0.51 → rda_python_metrics-2.0.0}/MANIFEST.in +0 -0
  35. {rda_python_metrics-1.0.51 → rda_python_metrics-2.0.0}/README.md +0 -0
  36. {rda_python_metrics-1.0.51 → rda_python_metrics-2.0.0}/setup.cfg +0 -0
  37. {rda_python_metrics-1.0.51 → rda_python_metrics-2.0.0}/src/rda_python_metrics/PgIPInfo.py +0 -0
  38. {rda_python_metrics-1.0.51 → rda_python_metrics-2.0.0}/src/rda_python_metrics/PgView.py +0 -0
  39. {rda_python_metrics-1.0.51 → rda_python_metrics-2.0.0}/src/rda_python_metrics/__init__.py +0 -0
  40. /rda_python_metrics-1.0.51/src/rda_python_metrics/fillawsusage.py → /rda_python_metrics-2.0.0/src/rda_python_metrics/fill_awsusage.py +0 -0
  41. /rda_python_metrics-1.0.51/src/rda_python_metrics/fillcdgusage.py → /rda_python_metrics-2.0.0/src/rda_python_metrics/fill_cdgusage.py +0 -0
  42. /rda_python_metrics-1.0.51/src/rda_python_metrics/fillcodusage.py → /rda_python_metrics-2.0.0/src/rda_python_metrics/fill_codusage.py +0 -0
  43. /rda_python_metrics-1.0.51/src/rda_python_metrics/fillcountry.py → /rda_python_metrics-2.0.0/src/rda_python_metrics/fill_country.py +0 -0
  44. /rda_python_metrics-1.0.51/src/rda_python_metrics/fillendtime.py → /rda_python_metrics-2.0.0/src/rda_python_metrics/fill_endtime.py +0 -0
  45. /rda_python_metrics-1.0.51/src/rda_python_metrics/fillglobususage.py → /rda_python_metrics-2.0.0/src/rda_python_metrics/fill_globususage.py +0 -0
  46. /rda_python_metrics-1.0.51/src/rda_python_metrics/fillipinfo.py → /rda_python_metrics-2.0.0/src/rda_python_metrics/fill_ipinfo.py +0 -0
  47. /rda_python_metrics-1.0.51/src/rda_python_metrics/filloneorder.py → /rda_python_metrics-2.0.0/src/rda_python_metrics/fill_oneorder.py +0 -0
  48. /rda_python_metrics-1.0.51/src/rda_python_metrics/fillosdfusage.py → /rda_python_metrics-2.0.0/src/rda_python_metrics/fill_osdfusage.py +0 -0
  49. /rda_python_metrics-1.0.51/src/rda_python_metrics/fillrdadb.py → /rda_python_metrics-2.0.0/src/rda_python_metrics/fill_rdadb.py +0 -0
  50. /rda_python_metrics-1.0.51/src/rda_python_metrics/filltdsusage.py → /rda_python_metrics-2.0.0/src/rda_python_metrics/fill_tdsusage.py +0 -0
  51. /rda_python_metrics-1.0.51/src/rda_python_metrics/filluser.py → /rda_python_metrics-2.0.0/src/rda_python_metrics/fill_user.py +0 -0
  52. {rda_python_metrics-1.0.51 → rda_python_metrics-2.0.0}/src/rda_python_metrics/fillawsusage.usg +0 -0
  53. {rda_python_metrics-1.0.51 → rda_python_metrics-2.0.0}/src/rda_python_metrics/fillcdgusage.usg +0 -0
  54. {rda_python_metrics-1.0.51 → rda_python_metrics-2.0.0}/src/rda_python_metrics/fillcodusage.usg +0 -0
  55. {rda_python_metrics-1.0.51 → rda_python_metrics-2.0.0}/src/rda_python_metrics/fillgdexusage.py +0 -0
  56. {rda_python_metrics-1.0.51 → rda_python_metrics-2.0.0}/src/rda_python_metrics/fillgdexusage.usg +0 -0
  57. {rda_python_metrics-1.0.51 → rda_python_metrics-2.0.0}/src/rda_python_metrics/fillglobususage.usg +0 -0
  58. {rda_python_metrics-1.0.51 → rda_python_metrics-2.0.0}/src/rda_python_metrics/fillipinfo.usg +0 -0
  59. {rda_python_metrics-1.0.51 → rda_python_metrics-2.0.0}/src/rda_python_metrics/filloneorder.usg +0 -0
  60. {rda_python_metrics-1.0.51 → rda_python_metrics-2.0.0}/src/rda_python_metrics/fillosdfusage.usg +0 -0
  61. {rda_python_metrics-1.0.51 → rda_python_metrics-2.0.0}/src/rda_python_metrics/fillrdadb.usg +0 -0
  62. {rda_python_metrics-1.0.51 → rda_python_metrics-2.0.0}/src/rda_python_metrics/filltdsusage.usg +0 -0
  63. {rda_python_metrics-1.0.51 → rda_python_metrics-2.0.0}/src/rda_python_metrics/filluser.usg +0 -0
  64. {rda_python_metrics-1.0.51 → rda_python_metrics-2.0.0}/src/rda_python_metrics/fillzenodousage.py +0 -0
  65. /rda_python_metrics-1.0.51/src/rda_python_metrics/logarch.py → /rda_python_metrics-2.0.0/src/rda_python_metrics/log_arch.py +0 -0
  66. {rda_python_metrics-1.0.51 → rda_python_metrics-2.0.0}/src/rda_python_metrics/logarch.usg +0 -0
  67. /rda_python_metrics-1.0.51/src/rda_python_metrics/pgperson.py → /rda_python_metrics-2.0.0/src/rda_python_metrics/pg_person.py +0 -0
  68. /rda_python_metrics-1.0.51/src/rda_python_metrics/pgusername.py → /rda_python_metrics-2.0.0/src/rda_python_metrics/pg_username.py +0 -0
  69. {rda_python_metrics-1.0.51 → rda_python_metrics-2.0.0}/src/rda_python_metrics/pgsyspath.py +0 -0
  70. /rda_python_metrics-1.0.51/src/rda_python_metrics/viewallusage.py → /rda_python_metrics-2.0.0/src/rda_python_metrics/view_allusage.py +0 -0
  71. /rda_python_metrics-1.0.51/src/rda_python_metrics/viewawsusage.py → /rda_python_metrics-2.0.0/src/rda_python_metrics/view_awsusage.py +0 -0
  72. /rda_python_metrics-1.0.51/src/rda_python_metrics/viewcheckusage.py → /rda_python_metrics-2.0.0/src/rda_python_metrics/view_checkusage.py +0 -0
  73. /rda_python_metrics-1.0.51/src/rda_python_metrics/viewcodusage.py → /rda_python_metrics-2.0.0/src/rda_python_metrics/view_codusage.py +0 -0
  74. /rda_python_metrics-1.0.51/src/rda_python_metrics/viewordusage.py → /rda_python_metrics-2.0.0/src/rda_python_metrics/view_ordusage.py +0 -0
  75. /rda_python_metrics-1.0.51/src/rda_python_metrics/viewosdfusage.py → /rda_python_metrics-2.0.0/src/rda_python_metrics/view_osdfusage.py +0 -0
  76. /rda_python_metrics-1.0.51/src/rda_python_metrics/viewrqstusage.py → /rda_python_metrics-2.0.0/src/rda_python_metrics/view_rqstusage.py +0 -0
  77. /rda_python_metrics-1.0.51/src/rda_python_metrics/viewtdsusage.py → /rda_python_metrics-2.0.0/src/rda_python_metrics/view_tdsusage.py +0 -0
  78. /rda_python_metrics-1.0.51/src/rda_python_metrics/viewwebfile.py → /rda_python_metrics-2.0.0/src/rda_python_metrics/view_webfile.py +0 -0
  79. /rda_python_metrics-1.0.51/src/rda_python_metrics/viewwebusage.py → /rda_python_metrics-2.0.0/src/rda_python_metrics/view_webusage.py +0 -0
  80. {rda_python_metrics-1.0.51 → rda_python_metrics-2.0.0}/src/rda_python_metrics/viewallusage.usg +0 -0
  81. {rda_python_metrics-1.0.51 → rda_python_metrics-2.0.0}/src/rda_python_metrics/viewawsusage.usg +0 -0
  82. {rda_python_metrics-1.0.51 → rda_python_metrics-2.0.0}/src/rda_python_metrics/viewcheckusage.usg +0 -0
  83. {rda_python_metrics-1.0.51 → rda_python_metrics-2.0.0}/src/rda_python_metrics/viewcodusage.usg +0 -0
  84. {rda_python_metrics-1.0.51 → rda_python_metrics-2.0.0}/src/rda_python_metrics/viewordusage.usg +0 -0
  85. {rda_python_metrics-1.0.51 → rda_python_metrics-2.0.0}/src/rda_python_metrics/viewosdfusage.usg +0 -0
  86. {rda_python_metrics-1.0.51 → rda_python_metrics-2.0.0}/src/rda_python_metrics/viewrqstusage.usg +0 -0
  87. {rda_python_metrics-1.0.51 → rda_python_metrics-2.0.0}/src/rda_python_metrics/viewtdsusage.usg +0 -0
  88. {rda_python_metrics-1.0.51 → rda_python_metrics-2.0.0}/src/rda_python_metrics/viewwebfile.usg +0 -0
  89. {rda_python_metrics-1.0.51 → rda_python_metrics-2.0.0}/src/rda_python_metrics/viewwebusage.usg +0 -0
  90. {rda_python_metrics-1.0.51 → rda_python_metrics-2.0.0}/src/rda_python_metrics.egg-info/dependency_links.txt +0 -0
  91. {rda_python_metrics-1.0.51 → rda_python_metrics-2.0.0}/src/rda_python_metrics.egg-info/entry_points.txt +0 -0
  92. {rda_python_metrics-1.0.51 → rda_python_metrics-2.0.0}/src/rda_python_metrics.egg-info/requires.txt +0 -0
  93. {rda_python_metrics-1.0.51 → rda_python_metrics-2.0.0}/src/rda_python_metrics.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: rda_python_metrics
3
- Version: 1.0.51
3
+ Version: 2.0.0
4
4
  Summary: RDA Python Package to gather and view data usage metrics
5
5
  Author-email: Zaihua Ji <zji@ucar.edu>
6
6
  Project-URL: Homepage, https://github.com/NCAR/rda-python-metrics
@@ -6,7 +6,7 @@ build-backend = "setuptools.build_meta"
6
6
 
7
7
  [project]
8
8
  name = "rda_python_metrics"
9
- version = "1.0.51"
9
+ version = "2.0.0"
10
10
  authors = [
11
11
  { name="Zaihua Ji", email="zji@ucar.edu" },
12
12
  ]
@@ -0,0 +1,191 @@
1
+ #!/usr/bin/env python3
2
+ ###############################################################################
3
+ # Title : fillawsusage
4
+ # Author : Zaihua Ji, zji@ucar.edu
5
+ # Date : 03/11/2022
6
+ # 2025-03-26 transferred to package rda_python_metrics from
7
+ # https://github.com/NCAR/rda-database.git
8
+ # 2025-12-16 converted to class FillAWSUsage
9
+ # Purpose : python program to retrieve info from AWS logs
10
+ # and fill table wusages in PgSQL database dssdb.
11
+ # Github : https://github.com/NCAR/rda-pythn-metrics.git
12
+ ###############################################################################
13
+ import sys
14
+ import re
15
+ import glob
16
+ from os import path as op
17
+ from rda_python_common.pg_file import PgFile
18
+ from .pg_ipinfo import PgIPInfo
19
+
20
+ class FillAWSUsage(PgIPInfo, PgFile):
21
+
22
+ def __init__(self):
23
+ super().__init()
24
+ self.USAGE = {
25
+ 'PGTBL' : "awsusage",
26
+ 'AWSDIR' : self.PGLOG["TRANSFER"] + "/AWSera5log",
27
+ 'AWSLOG' : "{}/{}-00-00-00-*",
28
+ 'PFMT' : "YYYY/MM/DD"
29
+ }
30
+ self.DSIDS = {'nsf-ncar-era5' : 'd633000'}
31
+ self.option = self.cmdstr = None
32
+ self.params = [] # array of input values
33
+
34
+ # function to read parameters
35
+ def read_parameters(self):
36
+ argv = sys.argv[1:]
37
+ for arg in argv:
38
+ ms = re.match(r'^-(b|d|p|N)$', arg)
39
+ if ms:
40
+ opt = ms.group(1)
41
+ if opt == 'b':
42
+ self.PGLOG['BCKGRND'] = 1
43
+ elif self.option:
44
+ self.pglog("{}: Option -{} is present already".format(arg, self.option), self.LGWNEX)
45
+ else:
46
+ self.option = opt
47
+ elif re.match(r'^-', arg):
48
+ self.pglog(arg + ": Invalid Option", self.LGWNEX)
49
+ elif self.option:
50
+ self.params.append(arg)
51
+ else:
52
+ self.pglog(arg + ": Invalid Parameter", self.LGWNEX)
53
+ if not (self.option and self.params): self.show_usage('fillawsusage')
54
+ self.dssdb_dbname()
55
+ self.cmdstr = "fillawsusage {}".format(' '.join(argv))
56
+ self.cmdlog(self.cmdstr)
57
+
58
+ # function to start actions
59
+ def start_actions(self):
60
+ self.change_local_directory(self.USAGE['AWSDIR'])
61
+ filenames = self.get_log_file_names()
62
+ if filenames:
63
+ self.fill_aws_usages(filenames)
64
+ else:
65
+ self.pglog("No log file found for given command: " + self.cmdstr, self.LOGWRN)
66
+ self.pglog(None, self.LOGWRN)
67
+
68
+ # get the log file dates
69
+ def get_log_file_names(self):
70
+ filenames = {}
71
+ if self.option == 'd':
72
+ for dt in self.params:
73
+ pdate = self.format_date(dt)
74
+ pd = self.format_date(pdate, self.USAGE['PFMT'])
75
+ fname = self.USAGE['AWSLOG'].format(pd, pdate)
76
+ fnames = glob.glob(fname)
77
+ if fnames: filenames[pdate] = sorted(fnames)
78
+ else:
79
+ if self.option == 'N':
80
+ edate = self.curdate()
81
+ pdate = self.adddate(edate, 0, 0, -int(self.params[0]))
82
+ else:
83
+ pdate = self.format_date(self.params[0])
84
+ if len(self.params) > 1:
85
+ edate = self.adddate(self.format_date(self.params[1]), 0, 0, 1)
86
+ else:
87
+ edate = self.curdate()
88
+ while pdate < edate:
89
+ pd = self.format_date(pdate, self.USAGE['PFMT'])
90
+ fname = self.USAGE['AWSLOG'].format(pd, pdate)
91
+ fnames = glob.glob(fname)
92
+ if fnames: filenames[pdate] = sorted(fnames)
93
+ pdate = self.adddate(pdate, 0, 0, 1)
94
+ return filenames
95
+
96
+ # Fill AWS usages into table dssdb.awsusage of DSS PgSQL database from aws access logs
97
+ def fill_aws_usages(self, filenames):
98
+ year = cntall = addall = 0
99
+ for pdate in filenames:
100
+ fnames = filenames[pdate]
101
+ fcnt = len(fnames)
102
+ self.pglog("{}: Gathering AWS usage info from {} log files at {}".format(pdate, fcnt, self.current_datetime()), self.LOGWRN)
103
+ records = {}
104
+ cntadd = entcnt = 0
105
+ for logfile in fnames:
106
+ aws = self.open_local_file(logfile)
107
+ if not aws: continue
108
+ while True:
109
+ line = aws.readline()
110
+ if not line: break
111
+ entcnt += 1
112
+ if entcnt%20000 == 0:
113
+ dcnt = len(records)
114
+ self.pglog("{}: {}/{} AWS log entries processed/records to add".format(pdate, entcnt, dcnt), self.WARNLG)
115
+ ms = re.match(r'^\w+ ([\w-]+) \[(\S+).*\] ([\d\.]+) .+ REST\.GET\.OBJECT \S+ "GET.+" \d+ - (\d+) \d+ .* ".+" "(.+)" ', line)
116
+ if not ms: continue
117
+ values = list(ms.groups())
118
+ if values[0] not in self.DSIDS: continue
119
+ dsid = self.DSIDS[values[0]]
120
+ size = int(values[3])
121
+ ip = values[2]
122
+ engine = values[4]
123
+ moff = engine.find('/')
124
+ if moff > 0:
125
+ if moff > 20: moff = 20
126
+ method = engine[0:moff].upper()
127
+ else:
128
+ method = "AWS"
129
+ key = "{}:{}:{}".format(ip, dsid, method)
130
+ if key in records:
131
+ records[key]['size'] += size
132
+ records[key]['fcount'] += 1
133
+ else:
134
+ (year, quarter, date, time) = self.get_record_date_time(values[1])
135
+ iprec = self.get_missing_ipinfo(ip)
136
+ if not iprec: continue
137
+ records[key] = {'ip' : ip, 'dsid' : dsid, 'date' : date, 'time' : time, 'quarter' : quarter,
138
+ 'size' : size, 'fcount' : 1, 'method' : method, 'engine' : engine,
139
+ 'org_type' : iprec['org_type'], 'country' : iprec['country'],
140
+ 'region' : iprec['region'], 'email' : iprec['email']}
141
+ aws.close()
142
+ if records: cntadd = self.add_usage_records(records, year)
143
+ self.pglog("{}: {} AWS usage records added for {} entries at {}".format(pdate, cntadd, entcnt, self.current_datetime()), self.LOGWRN)
144
+ cntall += entcnt
145
+ if cntadd:
146
+ addall += cntadd
147
+ if addall > cntadd:
148
+ self.pglog("{} AWS usage records added for {} entries at {}".format(addall, cntall, self.current_datetime()), self.LOGWRN)
149
+
150
+ # get date and time from record
151
+ def get_record_date_time(self, ctime):
152
+ ms = re.search(r'^(\d+)/(\w+)/(\d+):(\d+:\d+:\d+)$', ctime)
153
+ if ms:
154
+ d = int(ms.group(1))
155
+ m = self.get_month(ms.group(2))
156
+ y = ms.group(3)
157
+ t = ms.group(4)
158
+ q = 1 + int((m-1)/3)
159
+ return (y, q, "{}-{:02}-{:02}".format(y, m, d), t)
160
+ else:
161
+ self.pglog(ctime + ": Invalid date/time format", self.LGEREX)
162
+
163
+ # add usage records for year
164
+ def add_usage_records(self, records, year):
165
+ cnt = 0
166
+ for key in records:
167
+ record = records[key]
168
+ cond = "date = '{}' AND time = '{}' AND ip = '{}'".format(record['date'], record['time'], record['ip'])
169
+ if self.pgget(self.USAGE['PGTBL'], '', cond, self.LGEREX): continue
170
+ if self.add_to_allusage(year, record):
171
+ cnt += self.pgadd(self.USAGE['PGTBL'], record, self.LOGWRN)
172
+ return cnt
173
+
174
+ # add record to allusage tables
175
+ def add_to_allusage(self, year, pgrec):
176
+ record = {'source' : 'A'}
177
+ flds = ['ip', 'dsid', 'date', 'time', 'quarter', 'size', 'method',
178
+ 'org_type', 'country', 'region', 'email']
179
+ for fld in flds:
180
+ record[fld] = pgrec[fld]
181
+ return self.add_yearly_allusage(year, record)
182
+
183
+ # main function to excecute this script
184
+ def main():
185
+ object = FillAWSUsage()
186
+ object.read_parameters()
187
+ object.start_actions()
188
+ object.pgexit(0)
189
+
190
+ # call main() to start program
191
+ if __name__ == "__main__": main()
@@ -0,0 +1,421 @@
1
+ #!/usr/bin/env python3
2
+ ###############################################################################
3
+ # Title : fillcdgusage
4
+ # Author : Zaihua Ji, zji@ucar.edu
5
+ # Date : 2025-04-14
6
+ # 2025-12-16 convert to class FillCDGUsage
7
+ # Purpose : python program to retrieve info from GDEX Postgres database for GDS
8
+ # file accesses and backup fill table tdsusage in PostgreSQL database dssdb.
9
+ # Github : https://github.com/NCAR/rda-python-metrics.git
10
+ ###############################################################################
11
+ import sys
12
+ import re
13
+ import glob
14
+ from os import path as op
15
+ from time import time as tm
16
+ from rda_python_common.pg_split import PgSplit
17
+ from .pg_ipinfo import PgIPInfo
18
+
19
+ class FillCDGUsage(PgSplit, PgIPInfo):
20
+
21
+ def __init__(self):
22
+ super().__init()
23
+ self.USAGE = {
24
+ 'TDSTBL' : "tdsusage",
25
+ 'WEBTBL' : "wusage",
26
+ 'CDATE' : self.curdate(),
27
+ }
28
+ self.DSIDS = {
29
+ 'pi_cesm2_atm_river_analysis' : ['d010073'],
30
+ 'na-cordex' : ['d316009'],
31
+ 'ucar.cgd.cesm2.cam6.prescribed_sst_amip' : ['d651010'],
32
+ 'ucar.cgd.ccsm4.CLM_LAND_ONLY' : ['d651011'],
33
+ 'ucar.cgd.artmip' : ['d651012', 'd651016', 'd651017', 'd651018'],
34
+ 'tamip' : ['d651013'],
35
+ 'ucar.cgd.ccsm4.CLIVAR_LE' : ['d651014'],
36
+ 'ucar.cgd.cesm2.Gettelman_CESM2_ECS' : ['d651015'],
37
+ 'ucar.cgd.ccsm4.geomip.ssp5' : ['d651024'],
38
+ 'ucar.cgd.ccsm4.IOD-PACEMAKER' : ['d651021'],
39
+ 'ucar.cgd.ccsm4.past2k_transient' : ['651023'],
40
+ 'ucar.cgd.ccsm4.lowwarming' : ['d651025'],
41
+ 'ucar.cgd.ccsm4.CESM_CAM5_BGC_ME' : ['d651000'],
42
+ 'ucar.cgd.ccsm4.iTRACE' : ['d651022'],
43
+ 'ucar.cgd.ccsm4.so2_geoeng' : ['d651026'],
44
+ 'ucar.cgd.ccsm4.cesmLE' : ['d651027'],
45
+ 'ucar.cgd.ccsm4.CESM1-CAM5-DP' : ['d651028'],
46
+ 'ucar.cgd.ccsm4.amv_lens' : ['d651031'],
47
+ 'ucar.cgd.ccsm4.ATL-PACEMAKER' : ['d651032'],
48
+ 'ucar.cgd.ccsm4.pac-pacemaker' : ['d651033'],
49
+ 'ucar.cgd.ccsm4.SD-WACCM-X_v2.1' : ['d651034'],
50
+ 'ucar.cgd.ccsm4.amv_lens' : ['d651035'],
51
+ 'ucar.cgd.cesm2.cism_ismip6' : ['d651036'],
52
+ 'ucar.cgd.ccsm4.pliomip2' : ['d651037'],
53
+ 'ucar.cgd.cesm2-waccm.s2s_hindcasts': ['d651040'],
54
+ 'ucar.cgd.CESM1.3_SH_storm_tracks': ['d651044'],
55
+ 'ucar.cgd.cesm2.waccm6.ssp245': ['d651045'],
56
+ 'ucar.cgd.cesm2.CESM21-CISM2-JG-BG': ['d651046'],
57
+ 'ucar.cgd.ccsm4.TC-CESM': ['d651047'],
58
+ 'ucar.cgd.cesm2.ISSI_OSSE': ['d651048'],
59
+ 'ucar.cgd.ccsm4.SOcean_Eddies_mclong': ['d651049'],
60
+ 'ucar.cgd.ccsm.trace': ['d651050'],
61
+ 'ucar.cgd.cesm2.waccm.solar': ['d651051'],
62
+ 'ucar.cgd.ccsm4.CESM1-CCSM4_mid-Pliocene' : ['d651042'],
63
+ 'ucar.cgd.ccsm4.PaleoIF' : ['d651052'],
64
+ 'ucar.cgd.ccsm4.b.e11.B20LE_fixedO3' : ['d651053'],
65
+ 'ucar.cgd.cesm2.single.forcing.large.ensemble' : ['d651055'],
66
+ 'ucar.cgd.cesm2le.output': ['d651056'],
67
+ 'ucar.cgd.ccsm4.ARISE-SAI-1.5' : ['d651059'],
68
+ 'ucar.cgd.cesm2.s2s_hindcasts': ['d651060'],
69
+ 'ucar.cgd.cesm2.s2s_hindcasts.mjo': ['d651061'],
70
+ 'ucar.cgd.cesm2.s2s_hindcasts.tc_tracks': ['d651062'],
71
+ 'ucar.cgd.cesm2.s2s_hindcasts.cesm2.climo': ['d651063'],
72
+ 'ucar.cgd.ccsm4.cesmLME' : ['d651058'],
73
+ 'ucar.cgd.ccsm4.GLENS' : ['d651064'],
74
+ 'ucar.cgd.ccsm4.CESM2-CISM2-LIGtransient' : ['d651066'],
75
+ 'ucar.cgd.cesm2.pacific.pacemaker' : ['d651068'],
76
+ 'ucar.cgd.cesm2.tuned.sea.ice.albedo' : ['d651070'],
77
+ 'ucar.cgd.cesm2.cmip5.forcing' : ['d651075'],
78
+ 'ucar.cgd.cesm2.ssp245.biomass.burning' : ['d651073'],
79
+ 'ucar.cgd.cesm2.ssp585.biomass.burning' : ['d651067'],
80
+ 'ucar.cgd.cesm1.cldmod': ['d651069'],
81
+ 'ucar.cgd.cesm2.marine.biogeochemistry': ['d651071'],
82
+ 'ucar.cgd.nw2.mom6': ['d651072'],
83
+ 'ucar.cgd.cesm2.cam6.ppe': ['d651076'],
84
+ 'ucar.cgd.cesm2.smyle': ['d651065'],
85
+ # new added
86
+ 'gridded_precip_and_temp' : ['d010078'],
87
+ '29_newman' : ['d010079'],
88
+ 'waccm-x.ion.asymmetry' : ['d010081'],
89
+ 'NARCCAP' : ['d316015']
90
+ }
91
+ self.ALLIDS = list(self.DSIDS.keys())
92
+ self.WFILES = {}
93
+ self.params = {} # array of input values
94
+
95
+ # function to run this program
96
+ def read_parameters(self):
97
+ argv = sys.argv[1:]
98
+ opt = None
99
+ for arg in argv:
100
+ if arg == "-b":
101
+ self.PGLOG['BCKGRND'] = 1
102
+ elif re.match(r'^-[msNy]$', arg):
103
+ opt = arg[1]
104
+ self.params[opt] = []
105
+ elif re.match(r'^-', arg):
106
+ self.pglog(arg + ": Invalid Option", self.LGWNEX)
107
+ elif opt:
108
+ self.params[opt].append(arg)
109
+ else:
110
+ self.pglog(arg + ": Value passed in without leading option", self.LGWNEX)
111
+ if not opt:
112
+ self.show_usage('fillcdgusage')
113
+ elif 's' not in self.params:
114
+ self.pglog("-s: Missing dataset short name to gather CDG metrics", self.LGWNEX)
115
+ elif len(self.params) < 2:
116
+ self.pglog("-(m|N|y): Missing Month, NumberDays or Year to gather CDG metrics", self.LGWNEX)
117
+ self.cmdlog("fillcdgusage {}".format(' '.join(argv)))
118
+
119
+ # function to start actions
120
+ def start_actions(self):
121
+ dranges = self.get_date_ranges(self.params)
122
+ dsids = self.get_dataset_ids(self.params['s'])
123
+ if dranges and dsids: self.fill_cdg_usages(dsids, dranges)
124
+ self.pglog(None, self.LOGWRN|self.SNDEML) # send email out if any
125
+
126
+ # connect to the gdex database esg-production
127
+ def gdex_dbname(self):
128
+ self.set_scname('esg-production', 'metrics', 'gateway-reader', None, 'sagedbprodalma.ucar.edu')
129
+
130
+ # get datasets
131
+ def get_dataset_ids(self, dsnames):
132
+ self.gdex_dbname()
133
+ dsids = []
134
+ tbname = 'metadata.dataset'
135
+ for dsname in dsnames:
136
+ if re.match(r'^all$', dsname, re.I): return self.get_dataset_ids(self.ALLIDS)
137
+ if dsname not in self.DSIDS:
138
+ self.pglog(dsname + ": Unknown CDG dataset short name", self.LOGWRN)
139
+ continue
140
+ bt = tm()
141
+ pgrec = self.pgget(tbname, 'id', "short_name = '{}'".format(dsname))
142
+ if not (pgrec and pgrec['id']): continue
143
+ rdaids = self.DSIDS[dsname]
144
+ strids = "{}-{}".format(dsname, rdaids)
145
+ cdgid = pgrec['id']
146
+ cdgids = [cdgid]
147
+ ccnt = 1
148
+ ccnt += self.recursive_dataset_ids(cdgid, cdgids)
149
+ dsids.append([dsname, rdaids, cdgids, strids])
150
+ rmsg = self.seconds_to_string_time(tm() - bt)
151
+ self.pglog("{}: Found {} CDG dsid/subdsids in {} at {}".format(strids, ccnt, rmsg, self.current_datetime()), self.LOGWRN)
152
+ if not dsids: self.pglog("No Dataset Id identified to gather CDG metrics", self.LOGWRN)
153
+ return dsids
154
+
155
+ # get cdgids recursivley
156
+ def recursive_dataset_ids(self, pcdgid, cdgids):
157
+ tbname = 'metadata.dataset'
158
+ pgrecs = self.pgmget(tbname, 'id', "parent_dataset_id = '{}'".format(pcdgid))
159
+ if not pgrecs: return 0
160
+ ccnt = 0
161
+ for cdgid in pgrecs['id']:
162
+ if cdgid in cdgids: continue
163
+ cdgids.append(cdgid)
164
+ ccnt += 1
165
+ ccnt += self.recursive_dataset_ids(cdgid, cdgids)
166
+ return ccnt
167
+
168
+ # get the date ranges for given condition
169
+ def get_date_ranges(self, inputs):
170
+ dranges = []
171
+ for opt in inputs:
172
+ for input in inputs[opt]:
173
+ # get date range
174
+ dates = []
175
+ if opt == 'N':
176
+ dates.append(self.adddate(self.USAGE['CDATE'], 0, 0, -int(input)))
177
+ dates.append(self.USAGE['CDATE'])
178
+ elif opt == 'm':
179
+ tms = input.split('-')
180
+ dates.append(self.fmtdate(int(tms[0]), int(tms[1]), 1))
181
+ dates.append(self.enddate(dates[0], 0, 'M'))
182
+ elif opt == 'y':
183
+ dates.append(input + "-01-01")
184
+ dates.append(input + "-12-31")
185
+ if dates: dranges.append(dates)
186
+ return dranges
187
+
188
+ # get file download records for given dsid
189
+ def get_dsid_records(self, cdgids, dates, strids):
190
+ self.gdex_dbname()
191
+ tbname = 'metrics.file_download'
192
+ fields = ('date_completed, remote_address, logical_file_size, logical_file_name, file_access_point_uri, user_agent_name, bytes_sent, '
193
+ 'subset_file_size, range_request, dataset_file_size, dataset_file_name, dataset_file_file_access_point_uri')
194
+ dscnt = len(cdgids)
195
+ dscnd = "dataset_id "
196
+ if dscnt == 1:
197
+ dscnd += "= '{}'".format(cdgids[0])
198
+ else:
199
+ dscnd += "IN ('" + "','".join(cdgids) + "')"
200
+ dtcnd = "date_completed BETWEEN '{} 00:00:00' AND '{} 23:59:59'".format(dates[0], dates[1])
201
+ cond = "{} AND {} ORDER BY date_completed".format(dscnd, dtcnd)
202
+ self.pglog("{}: Query for {} CDG dsid/subdsids between {} and {} at {}".format(strids, dscnt, dates[0], dates[1], self.current_datetime()), self.LOGWRN)
203
+ pgrecs = self.pgmget(tbname, fields, cond)
204
+ self.dssdb_dbname()
205
+ return pgrecs
206
+
207
+ # Fill TDS usages into table dssdb.tdsusage from cdg access records
208
+ def fill_cdg_usages(self, dsids, dranges):
209
+ allcnt = awcnt = atcnt = lcnt = 0
210
+ for dates in dranges:
211
+ for adsid in dsids:
212
+ lcnt += 1
213
+ dsname = adsid[0]
214
+ rdaids = adsid[1]
215
+ getdsid = False if len(rdaids) == 1 else True
216
+ dsid = rdaids[0]
217
+ cdgids = adsid[2]
218
+ strids = adsid[3]
219
+ bt = tm()
220
+ pgrecs = self.get_dsid_records(cdgids, dates, strids)
221
+ pgcnt = len(pgrecs['dataset_file_name']) if pgrecs else 0
222
+ if pgcnt == 0:
223
+ self.pglog("{}: No record found to gather CDG usage between {} and {}".format(strids, dates[0], dates[1]), self.LOGWRN)
224
+ continue
225
+ rmsg = self.seconds_to_string_time(tm() - bt)
226
+ self.pglog("{}: Got {} records in {} for processing CDG usage at {}".format(strids, pgcnt, rmsg, self.current_datetime()), self.LOGWRN)
227
+ tcnt = wcnt = 0
228
+ pwkey = wrec = cdate = None
229
+ trecs = {}
230
+ bt = tm()
231
+ for i in range(pgcnt):
232
+ if (i+1)%20000 == 0:
233
+ self.pglog("{}/{}/{} CDG/TDS/WEB records processed to add".format(i, tcnt, wcnt), self.WARNLG)
234
+ pgrec = self.onerecord(pgrecs, i)
235
+ wfile = pgrec['dataset_file_name']
236
+ if not wfile:
237
+ wfile = pgrec['logic_file_name']
238
+ if not wfile: continue
239
+ dsize = pgrec['bytes_sent']
240
+ if not dsize: continue
241
+ (year, quarter, date, time) = self.get_record_date_time(pgrec['date_completed'])
242
+ url = pgrec['dataset_file_file_access_point_uri']
243
+ if not url: url = pgrec['file_access_point_uri']
244
+ ip = pgrec['remote_address']
245
+ engine = pgrec['user_agent_name']
246
+ ms = re.search(r'^https*://tds.ucar.edu/thredds/(\w+)/', url)
247
+ if ms:
248
+ # tds usage
249
+ if getdsid:
250
+ wfrec = self.get_wfile_record(rdaids, wfile)
251
+ if not wfrec: continue
252
+ dsid = wfrec['dsid']
253
+ method = ms.group(1)
254
+ if pgrec['subset_file_size']:
255
+ etype = 'S'
256
+ elif pgrec['range_request']:
257
+ etype = 'R'
258
+ else:
259
+ etype = 'F'
260
+ if date != cdate:
261
+ if trecs:
262
+ tcnt += self.add_tdsusage_records(year, trecs, cdate)
263
+ trecs = {}
264
+ cdate = date
265
+ tkey = "{}:{}:{}:{}".format(ip, dsid, method, etype)
266
+ if tkey in trecs:
267
+ trecs[tkey]['size'] += dsize
268
+ trecs[tkey]['fcount'] += 1
269
+ else:
270
+ iprec = self.get_missing_ipinfo(ip)
271
+ if not iprec: continue
272
+ trecs[tkey] = {'ip' : ip, 'dsid' : dsid, 'date' : cdate, 'time' : time, 'quarter' : quarter,
273
+ 'size' : dsize, 'fcount' : 1, 'method' : method, 'etype' : etype,
274
+ 'engine' : engine, 'org_type' : iprec['org_type'], 'country' : iprec['country'],
275
+ 'region' : iprec['region'], 'email' : iprec['email']}
276
+ else:
277
+ # web usage
278
+ wfrec = self.get_wfile_record(rdaids, wfile)
279
+ if not wfrec: continue
280
+ if getdsid: dsid = wfrec['dsid']
281
+ fsize = pgrec['dataset_file_size']
282
+ if not fsize: fsize = pgrec['logic_file_size']
283
+ method = 'CDG'
284
+ if pgrec['subset_file_size'] or pgrec['range_request'] or dsize < fsize:
285
+ wkey = "{}:{}:{}".format(ip, dsid, wfile)
286
+ else:
287
+ wkey = None
288
+ if wrec:
289
+ if wkey == pwkey:
290
+ wrec['size'] += dsize
291
+ continue
292
+ wcnt += self.add_webfile_usage(year, wrec)
293
+ wrec = {'ip' : ip, 'dsid' : dsid, 'wid' : wfrec['wid'], 'date' : date,
294
+ 'time' : time, 'quarter' : quarter, 'size' : dsize,
295
+ 'locflag' : 'C', 'method' : method}
296
+ pwkey = wkey
297
+ if not pwkey:
298
+ wcnt += self.add_webfile_usage(year, wrec)
299
+ wrec = None
300
+ if trecs: tcnt += self.add_tdsusage_records(year, trecs, cdate)
301
+ if wrec: wcnt += self.add_webfile_usage(year, wrec)
302
+ atcnt += tcnt
303
+ awcnt += wcnt
304
+ allcnt += pgcnt
305
+ rmsg = self.seconds_to_string_time(tm() - bt)
306
+ self.pglog("{}: {}/{} TDS/WEB usage records added for {} CDG entries in {}".format(strids, atcnt, awcnt, allcnt, rmsg), self.LOGWRN)
307
+
308
+ # get date and time from log record
309
+ def get_record_date_time(self, ctime):
310
+ ms = re.search(r'^(\d+)-(\d+)-(\d+) (\d\d:\d\d:\d\d)', str(ctime))
311
+ if ms:
312
+ y = ms.group(1)
313
+ m = int(ms.group(2))
314
+ d = ms.group(3)
315
+ q = 1 + int((m-1)/3)
316
+ t = ms.group(4)
317
+ return (y, q, "{}-{:02}-{}".format(y, m, d), t)
318
+ else:
319
+ self.pglog(str(ctime) + ": Invalid time format", self.LGEREX)
320
+
321
+ # add to tds usage records
322
+ def add_tdsusage_records(self, year, records, date):
323
+ cnt = 0
324
+ for key in records:
325
+ record = records[key]
326
+ cond = "date = '{}' AND time = '{}' AND ip = '{}'".format(date, record['time'], record['ip'])
327
+ if self.pgget(self.USAGE['TDSTBL'], '', cond, self.LGEREX): continue
328
+ if self.add_tds_allusage(year, record):
329
+ cnt += self.pgadd(self.USAGE['TDSTBL'], record, self.LOGWRN)
330
+ self.pglog("{}: {} TDS usage records added at {}".format(date, cnt, self.current_datetime()), self.LOGWRN)
331
+ return cnt
332
+
333
+ #add to allusage tables
334
+ def add_tds_allusage(self, year, logrec):
335
+ pgrec = {'method' : 'CDG', 'source' : 'C'}
336
+ pgrec['email'] = logrec['email']
337
+ pgrec['org_type'] = logrec['org_type']
338
+ pgrec['country'] = logrec['country']
339
+ pgrec['region'] = logrec['region']
340
+ pgrec['dsid'] = logrec['dsid']
341
+ pgrec['date'] = logrec['date']
342
+ pgrec['quarter'] = logrec['quarter']
343
+ pgrec['time'] = logrec['time']
344
+ pgrec['size'] = logrec['size']
345
+ pgrec['ip'] = logrec['ip']
346
+ return self.add_yearly_allusage(year, pgrec)
347
+
348
+ # Fill usage of a single online data file into table dssdb.wusage of DSS PgSQL database
349
+ def add_webfile_usage(self, year, logrec):
350
+ table = "{}_{}".format(self.USAGE['WEBTBL'], year)
351
+ cdate = logrec['date']
352
+ ip = logrec['ip']
353
+ cond = "wid = {} AND method = '{}' AND date_read = '{}' AND time_read = '{}'".format(logrec['wid'], logrec['method'], cdate, logrec['time'])
354
+ if self.pgget(table, "", cond, self.LOGWRN): return 0
355
+ wurec = self.get_wuser_record(ip, cdate)
356
+ if not wurec: return 0
357
+ record = {'wid' : logrec['wid'], 'dsid' : logrec['dsid']}
358
+ record['wuid_read'] = wurec['wuid']
359
+ record['date_read'] = cdate
360
+ record['time_read'] = logrec['time']
361
+ record['size_read'] = logrec['size']
362
+ record['method'] = logrec['method']
363
+ record['locflag'] = logrec['locflag']
364
+ record['ip'] = ip
365
+ record['quarter'] = logrec['quarter']
366
+ if self.add_web_allusage(year, logrec, wurec):
367
+ return self.add_yearly_wusage(year, record)
368
+ else:
369
+ return 0
370
+
371
+ # add web record to allusage
372
+ def add_web_allusage(self, year, logrec, wurec):
373
+ pgrec = {'source' : 'C'}
374
+ pgrec['email'] = wurec['email']
375
+ pgrec['org_type'] = wurec['org_type']
376
+ pgrec['country'] = wurec['country']
377
+ pgrec['region'] = wurec['region']
378
+ pgrec['dsid'] = logrec['dsid']
379
+ pgrec['date'] = logrec['date']
380
+ pgrec['quarter'] = logrec['quarter']
381
+ pgrec['time'] = logrec['time']
382
+ pgrec['size'] = logrec['size']
383
+ pgrec['method'] = logrec['method']
384
+ pgrec['ip'] = logrec['ip']
385
+ return self.add_yearly_allusage(year, pgrec)
386
+
387
+ # return wfile.wid upon success, 0 otherwise
388
+ def get_wfile_record(self, dsids, wfile):
389
+ for dsid in dsids:
390
+ wkey = "{}{}".format(dsid, wfile)
391
+ if wkey in self.WFILES: return self.WFILES[wkey]
392
+ wfcond = "wfile LIKE '%{}'".format(wfile)
393
+ pgrec = None
394
+ for dsid in dsids:
395
+ pgrec = self.pgget_wfile(dsid, "wid", wfcond)
396
+ if pgrec:
397
+ pgrec['dsid'] = dsid
398
+ wkey = "{}{}".format(dsid, wfile)
399
+ self.WFILES[wkey] = pgrec
400
+ return pgrec
401
+ for dsid in dsids:
402
+ pgrec = self.pgget("wfile_delete", "wid, dsid", "{} AND dsid = '{}'".format(wfcond, dsid))
403
+ if not pgrec:
404
+ mvrec = self.pgget("wmove", "wid, dsid", wfcond)
405
+ if mvrec:
406
+ pgrec = self.pgget_wfile(mvrec['dsid'], "wid", "wid = {}".format(pgrec['wid']))
407
+ if pgrec: pgrec['dsid'] = mvrec['dsid']
408
+ if pgrec:
409
+ wkey = "{}{}".format(pgrec['dsid'], wfile)
410
+ self.WFILES[wkey] = pgrec
411
+ return pgrec
412
+
413
+ # main function to excecute this script
414
+ def main():
415
+ object = FillCDGUsage()
416
+ object.read_parameters()
417
+ object.start_actions()
418
+ object.pgexit(0)
419
+
420
+ # call main() to start program
421
+ if __name__ == "__main__": main()