rda-python-metrics 1.0.8__tar.gz → 1.0.10__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of rda-python-metrics might be problematic. Click here for more details.

Files changed (60) hide show
  1. rda_python_metrics-1.0.10/MANIFEST.in +1 -0
  2. {rda_python_metrics-1.0.8/src/rda_python_metrics.egg-info → rda_python_metrics-1.0.10}/PKG-INFO +1 -1
  3. {rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/pyproject.toml +2 -16
  4. {rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/src/rda_python_metrics/fillawsusage.usg +1 -2
  5. rda_python_metrics-1.0.10/src/rda_python_metrics/fillcdgusage.py +429 -0
  6. rda_python_metrics-1.0.10/src/rda_python_metrics/fillcdgusage.usg +18 -0
  7. {rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/src/rda_python_metrics/fillcodusage.usg +1 -1
  8. {rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/src/rda_python_metrics/fillglobususage.usg +1 -1
  9. {rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10/src/rda_python_metrics.egg-info}/PKG-INFO +1 -1
  10. {rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/src/rda_python_metrics.egg-info/SOURCES.txt +2 -0
  11. {rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/src/rda_python_metrics.egg-info/entry_points.txt +1 -0
  12. {rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/tests/test_metrics.py +1 -1
  13. rda_python_metrics-1.0.8/MANIFEST.in +0 -19
  14. {rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/LICENSE +0 -0
  15. {rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/README.md +0 -0
  16. {rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/setup.cfg +0 -0
  17. {rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/src/rda_python_metrics/PgIPInfo.py +0 -0
  18. {rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/src/rda_python_metrics/PgView.py +0 -0
  19. {rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/src/rda_python_metrics/__init__.py +0 -0
  20. {rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/src/rda_python_metrics/fillawsusage.py +0 -0
  21. {rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/src/rda_python_metrics/fillcodusage.py +0 -0
  22. {rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/src/rda_python_metrics/fillcountry.py +0 -0
  23. {rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/src/rda_python_metrics/fillendtime.py +0 -0
  24. {rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/src/rda_python_metrics/fillglobususage.py +0 -0
  25. {rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/src/rda_python_metrics/fillipinfo.py +0 -0
  26. {rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/src/rda_python_metrics/fillipinfo.usg +0 -0
  27. {rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/src/rda_python_metrics/filloneorder.py +0 -0
  28. {rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/src/rda_python_metrics/filloneorder.usg +0 -0
  29. {rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/src/rda_python_metrics/fillosdfusage.py +0 -0
  30. {rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/src/rda_python_metrics/fillosdfusage.usg +0 -0
  31. {rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/src/rda_python_metrics/fillrdadb.py +0 -0
  32. {rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/src/rda_python_metrics/fillrdadb.usg +0 -0
  33. {rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/src/rda_python_metrics/filltdsusage.py +0 -0
  34. {rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/src/rda_python_metrics/filltdsusage.usg +0 -0
  35. {rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/src/rda_python_metrics/filluser.py +0 -0
  36. {rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/src/rda_python_metrics/filluser.usg +0 -0
  37. {rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/src/rda_python_metrics/logarch.py +0 -0
  38. {rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/src/rda_python_metrics/logarch.usg +0 -0
  39. {rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/src/rda_python_metrics/pgperson.py +0 -0
  40. {rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/src/rda_python_metrics/pgsyspath.py +0 -0
  41. {rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/src/rda_python_metrics/pgusername.py +0 -0
  42. {rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/src/rda_python_metrics/viewallusage.py +0 -0
  43. {rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/src/rda_python_metrics/viewallusage.usg +0 -0
  44. {rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/src/rda_python_metrics/viewcheckusage.py +0 -0
  45. {rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/src/rda_python_metrics/viewcheckusage.usg +0 -0
  46. {rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/src/rda_python_metrics/viewcodusage.py +0 -0
  47. {rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/src/rda_python_metrics/viewcodusage.usg +0 -0
  48. {rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/src/rda_python_metrics/viewordusage.py +0 -0
  49. {rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/src/rda_python_metrics/viewordusage.usg +0 -0
  50. {rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/src/rda_python_metrics/viewrqstusage.py +0 -0
  51. {rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/src/rda_python_metrics/viewrqstusage.usg +0 -0
  52. {rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/src/rda_python_metrics/viewtdsusage.py +0 -0
  53. {rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/src/rda_python_metrics/viewtdsusage.usg +0 -0
  54. {rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/src/rda_python_metrics/viewwebfile.py +0 -0
  55. {rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/src/rda_python_metrics/viewwebfile.usg +0 -0
  56. {rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/src/rda_python_metrics/viewwebusage.py +0 -0
  57. {rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/src/rda_python_metrics/viewwebusage.usg +0 -0
  58. {rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/src/rda_python_metrics.egg-info/dependency_links.txt +0 -0
  59. {rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/src/rda_python_metrics.egg-info/requires.txt +0 -0
  60. {rda_python_metrics-1.0.8 → rda_python_metrics-1.0.10}/src/rda_python_metrics.egg-info/top_level.txt +0 -0
@@ -0,0 +1 @@
1
+ include src/rda_python_metrics/*.usg
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: rda_python_metrics
3
- Version: 1.0.8
3
+ Version: 1.0.10
4
4
  Summary: RDA Python Package to gather and view data usage metrics
5
5
  Author-email: Zaihua Ji <zji@ucar.edu>
6
6
  Project-URL: Homepage, https://github.com/NCAR/rda-python-metrics
@@ -6,7 +6,7 @@ build-backend = "setuptools.build_meta"
6
6
 
7
7
  [project]
8
8
  name = "rda_python_metrics"
9
- version = "1.0.8"
9
+ version = "1.0.10"
10
10
  authors = [
11
11
  { name="Zaihua Ji", email="zji@ucar.edu" },
12
12
  ]
@@ -27,21 +27,6 @@ dependencies = [
27
27
  "httplib2"
28
28
  ]
29
29
 
30
- [tool.setuptools]
31
- include-package-data = true
32
-
33
- [tool.setuptools.packages.find]
34
- where = ["src"]
35
-
36
- [tool.setuptools.package-data]
37
- "rda_python_metrics" = [
38
- "logarch.usg", "fillawsusage.usg", "fillcodusage.usg", "fillglobususage.usg",
39
- "fillipinfo.usg", "filloneorder.usg", "fillosdfusage.usg", "fillrdadb.usg",
40
- "filltdsusage.usg", "viewallusage.usg", "viewcheckusage.usg", "viewcodusage.usg",
41
- "viewordusage.usg", "viewrqstusage.usg", "viewtdsusage.usg", "viewwebfile.usg",
42
- "viewwebusage.usg"
43
- ]
44
-
45
30
  [tool.pytest.ini_options]
46
31
  pythonpath = [
47
32
  "src"
@@ -53,6 +38,7 @@ pythonpath = [
53
38
  [project.scripts]
54
39
  "logarch.py" = "rda_python_metrics.logarch:main"
55
40
  "fillawsusage" = "rda_python_metrics.fillaswusage:main"
41
+ "fillcdgusage" = "rda_python_metrics.fillcdgusage:main"
56
42
  "fillcodusage" = "rda_python_metrics.fillcodusage:main"
57
43
  "fillcountry" = "rda_python_metrics.fillcountry:main"
58
44
  "fillendtime" = "rda_python_metrics.fillendtime:main"
@@ -1,7 +1,6 @@
1
1
 
2
2
  Retrieves usage information from AWS Server logs under directory
3
- /gpfs/fs1/collections/rda/transer/AWSera5log/ to fill table 'wusage' in
4
- database 'dssdb'.
3
+ ../rda/transer/AWSera5log/ to fill table 'wusage' in database 'rdadb'.
5
4
 
6
5
  Usage: fillawsusage [-b] [-d LogFileDates] [-N NumberDay] [-p BeginDate [Enddate]]
7
6
 
@@ -0,0 +1,429 @@
1
+ #!/usr/bin/env python3
2
+ #
3
+ ###############################################################################
4
+ #
5
+ # Title : fillcdgusage
6
+ # Author : Zaihua Ji, zji@ucar.edu
7
+ # Date : 2025-04-14
8
+ # Purpose : python program to retrieve info from GDEX Postgres database for GDS
9
+ # file accesses and backup fill table tdsusage in PostgreSQL database dssdb.
10
+ #
11
+ # Github : https://github.com/NCAR/rda-python-metrics.git
12
+ #
13
+ ###############################################################################
14
+ #
15
+ import sys
16
+ import re
17
+ import glob
18
+ from os import path as op
19
+ from rda_python_common import PgLOG
20
+ from rda_python_common import PgUtil
21
+ from rda_python_common import PgFile
22
+ from rda_python_common import PgDBI
23
+ from rda_python_common import PgSplit
24
+ from . import PgIPInfo
25
+
26
+ USAGE = {
27
+ 'TDSTBL' : "tdsusage",
28
+ 'WEBTBL' : "webusage",
29
+ 'CDATE' : PgUtil.curdate(),
30
+ }
31
+
32
+ DSIDS = {
33
+ 'ucar.cgd.cesm2.cam6.prescribed_sst_amip' : ['d651010'],
34
+ 'ucar.cgd.ccsm4.CLM_LAND_ONLY' : ['d651011'],
35
+ 'ucar.cgd.artmip' : ['d651012', 'd651016', 'd651017', 'd651018'],
36
+ 'tamip' : ['d651013'],
37
+ 'ucar.cgd.ccsm4.CLIVAR_LE' : ['d651014'],
38
+ 'ucar.cgd.cesm2.Gettelman_CESM2_ECS' : ['d651015'],
39
+ 'ucar.cgd.ccsm4.geomip.ssp5' : ['d651024'],
40
+ 'ucar.cgd.ccsm4.IOD-PACEMAKER' : ['d651021'],
41
+ 'ucar.cgd.ccsm4.past2k_transient' : ['651023'],
42
+ 'ucar.cgd.ccsm4.lowwarming' : ['d651025'],
43
+ 'ucar.cgd.ccsm4.CESM_CAM5_BGC_ME' : ['d651000'],
44
+ 'ucar.cgd.ccsm4.iTRACE' : ['d651022'],
45
+ 'ucar.cgd.ccsm4.so2_geoeng' : ['d651026'],
46
+ 'ucar.cgd.ccsm4.cesmLE' : ['d651027'],
47
+ 'ucar.cgd.ccsm4.CESM1-CAM5-DP' : ['d651028'],
48
+ 'ucar.cgd.ccsm4.amv_lens' : ['d651031'],
49
+ 'ucar.cgd.ccsm4.ATL-PACEMAKER' : ['d651032'],
50
+ 'ucar.cgd.ccsm4.pac-pacemaker' : ['d651033'],
51
+ 'ucar.cgd.ccsm4.SD-WACCM-X_v2.1' : ['d651034'],
52
+ 'ucar.cgd.ccsm4.amv_lens' : ['d651035'],
53
+ 'ucar.cgd.cesm2.cism_ismip6' : ['d651036'],
54
+ 'ucar.cgd.ccsm4.pliomip2' : ['d651037']
55
+ }
56
+
57
+ ALLIDS = list(DSIDS.keys())
58
+
59
+ WFILES = {}
60
+ WUSERS = {}
61
+
62
+ #
63
+ # main function to run this program
64
+ #
65
+ def main():
66
+
67
+ params = {} # array of input values
68
+ argv = sys.argv[1:]
69
+ opt = None
70
+
71
+ for arg in argv:
72
+ if arg == "-b":
73
+ PgLOG.PGLOG['BCKGRND'] = 1
74
+ elif re.match(r'^-[msNy]$', arg):
75
+ opt = arg[1]
76
+ params[opt] = []
77
+ elif re.match(r'^-', arg):
78
+ PgLOG.pglog(arg + ": Invalid Option", PgLOG.LGWNEX)
79
+ elif opt:
80
+ params[opt].append(arg)
81
+ else:
82
+ PgLOG.pglog(arg + ": Value passed in without leading option", PgLOG.LGWNEX)
83
+
84
+ if not opt:
85
+ PgLOG.show_usage('fillcdgusage')
86
+ elif 's' not in params:
87
+ PgLOG.pglog("-s: Missing dataset short name to gather CDG metrics", PgLOG.LGWNEX)
88
+ elif len(params) < 2:
89
+ PgLOG.pglog("-(m|N|y): Missing Month, NumberDays or Year to gather CDG metrics", PgLOG.LGWNEX)
90
+
91
+
92
+ PgLOG.cmdlog("fillcdgusage {}".format(' '.join(argv)))
93
+ dranges = get_date_ranges(params)
94
+ dsids = get_dataset_ids(params['s'])
95
+ if dranges and dsids: fill_cdg_usages(dsids, dranges)
96
+ PgLOG.pglog(None, PgLOG.LOGWRN|PgLOG.SNDEML) # send email out if any
97
+
98
+ sys.exit(0)
99
+
100
+ #
101
+ # connect to the gdex database esg-production
102
+ #
103
+ def gdex_dbname():
104
+ PgDBI.set_scname('esg-production', 'metrics', 'gateway-reader', None, 'sagedbprodalma.ucar.edu')
105
+
106
+ #
107
+ # get datasets
108
+ #
109
+ def get_dataset_ids(dsnames):
110
+
111
+ gdex_dbname()
112
+ dsids = []
113
+ tbname = 'metadata.dataset'
114
+ for dsname in dsnames:
115
+ if re.match(r'^all$', dsname, re.I): return ALLIDS
116
+ if dsname not in DSIDS:
117
+ PgLOG.pglog(dsname + ": Unknown CDG dataset short name", PgLOG.LOGWRN)
118
+ continue
119
+ rdaid = DSIDS[dsname]
120
+ pgrec = PgDBI.pgget(tbname, 'id', "short_name = '{}'".format(dsname))
121
+ if not (pgrec and pgrec['id']): continue
122
+ dsid = pgrec['id']
123
+ if dsid in dsids: continue
124
+ dsids.append([dsid, rdaid])
125
+ recursive_dataset_ids(dsid, rdaid, dsids)
126
+
127
+ if not dsids: PgLOG.pglog("No Dataset Id identified to gather CDG metrics", PgLOG.LOGWRN)
128
+
129
+ return dsids
130
+
131
+ #
132
+ # get dsids recursivley
133
+ #
134
+ def recursive_dataset_ids(pdsid, rdaid, dsids):
135
+
136
+ tbname = 'metadata.dataset'
137
+ pgrecs = PgDBI.pgmget(tbname, 'id', "parent_dataset_id = '{}'".format(pdsid))
138
+ if not pgrecs: return
139
+
140
+ for dsid in pgrecs['id']:
141
+ if dsid in dsids: continue
142
+ dsids.append([dsid, rdaid])
143
+ recursive_dataset_ids(dsid, rdaid, dsids)
144
+
145
+ #
146
+ # get the date ranges for given condition
147
+ #
148
+ def get_date_ranges(inputs):
149
+
150
+ dranges = []
151
+ for opt in inputs:
152
+ for input in inputs[opt]:
153
+ # get date range
154
+ dates = []
155
+ if opt == 'N':
156
+ dates.append(PgUtil.adddate(USAGE['CDATE'], 0, 0, -int(input)))
157
+ dates.append(USAGE['CDATE'])
158
+ elif opt == 'm':
159
+ tms = input.split('-')
160
+ dates.append(PgUtil.fmtdate(int(tms[0]), int(tms[1]), 1))
161
+ dates.append(PgUtil.enddate(dates[0], 0, 'M'))
162
+ elif opt == 'Y':
163
+ dates.append(input + "-01-01")
164
+ dates.append(input + "-12-31")
165
+ dranges.append(dates)
166
+
167
+ return dranges
168
+
169
+ #
170
+ # get file download records for given dsid
171
+ #
172
+ def get_dsid_records(dsid, dates):
173
+
174
+ gdex_dbname()
175
+ tbname = 'metrics.file_download'
176
+ fields = ('date_completed, remote_address, logical_file_size, logical_file_name, file_access_point_uri, user_agent_name, bytes_sent, '
177
+ 'subset_file_size, range_request, dataset_file_size, dataset_file_name, dataset_file_file_access_point_uri')
178
+ cond = "dataset_id = '{}' AND completed = True AND date_completed BETWEEN '{}' AND '{}' ORDER BY date_completed".format(dsid, dates[0], dates[1])
179
+ PgLOG.pglog("{}: Query CDG usage between {} and {} at {}".format(dsid, dates[0], dates[1], PgLOG.current_datetime()), PgLOG.LOGWRN)
180
+ pgrecs = PgDBI.pgmget(tbname, fields, cond)
181
+ PgDBI.dssdb_dbname()
182
+
183
+ return pgrecs
184
+
185
+ #
186
+ # Fill TDS usages into table dssdb.tdsusage from cdg access records
187
+ #
188
+ def fill_cdg_usages(dsids, dranges):
189
+
190
+ allcnt = awcnt = atcnt = lcnt = 0
191
+ for dates in dranges:
192
+ for dsid in dsids:
193
+ lcnt += 1
194
+ cdgid = dsid[0]
195
+ rdaid = dsid[1]
196
+ srdaid = '|'.join(rdaid)
197
+ pgrecs = get_dsid_records(cdgid, dates)
198
+ pgcnt = len(pgrecs['dataset_file_name']) if pgrecs else 0
199
+ if pgcnt == 0:
200
+ PgLOG.pglog("{}: No record found to gather CDG usage between {} and {}".format(srdaid, dates[0], dates[1]), PgLOG.LOGWRN)
201
+ continue
202
+ PgLOG.pglog("{}: Process {} records for CDG usage at {}".format(srdaid, pgcnt, PgLOG.current_datetime()), PgLOG.LOGWRN)
203
+ tcnt = wcnt = 0
204
+ pwkey = wrec = cdate = None
205
+ trecs = {}
206
+ for i in range(pgcnt):
207
+ if (i+1)%20000 == 0:
208
+ PgLOG.pglog("{}/{}/{} CDG/TDS/WEB records processed to add".format(i, tcnt, wcnt), PgLOG.WARNLG)
209
+
210
+ pgrec = PgUtil.onerecord(i, pgrecs)
211
+ dsize = pgrec['bytes_sent']
212
+ if not dsize: continue
213
+ (year, quarter, date, time) = get_record_date_time(pgrec['date_completed'])
214
+ url = pgrec['dataset_file_file_access_point_uri']
215
+ if not url: url = pgrec['file_access_point_uri']
216
+ ip = pgrec['remote_address']
217
+ engine = pgrec['user_agent_name']
218
+ wfile = pgrec['dataset_file_name']
219
+ if not wfile: wfile = pgrec['logic_file_name']
220
+ wfrec = get_wfile_record(rdaid, wfile)
221
+ if not wfrec: continue
222
+ ms = re.search(r'^https://tds.ucar.edu/thredds/(\w+)/', url)
223
+ if ms:
224
+ # tds usage
225
+ method = ms.group(1)
226
+ if pgrec['subset_file_size']:
227
+ etype = 'S'
228
+ elif pgrec['range_request']:
229
+ etype = 'R'
230
+ else:
231
+ etype = 'F'
232
+
233
+ if date != cdate:
234
+ if trecs:
235
+ tcnt += add_tdsusage_records(year, trecs, cdate)
236
+ trecs = {}
237
+ cdate = date
238
+ tkey = "{}:{}:{}:{}".format(ip, rdaid, method, etype)
239
+ if tkey in trecs:
240
+ trecs[tkey]['size'] += dsize
241
+ trecs[tkey]['fcount'] += 1
242
+ else:
243
+ wurec = get_wuser_record(ip, cdate, skipwuid = True)
244
+ if not wurec: return 0
245
+ trecs[tkey] = {'ip' : ip, 'dsid' : wfrec['dsid'], 'date' : cdate, 'time' : time, 'size' : dsize,
246
+ 'fcount' : 1, 'method' : method, 'etype' : etype, 'engine' : engine,
247
+ 'org_type' : wurec['org_type'], 'country' : wurec['country'],
248
+ 'email' : wurec['email']}
249
+ else:
250
+ # web usage
251
+ fsize = pgrec['dataset_file_size']
252
+ if not fsize: fsize = pgrec['logic_file_size']
253
+ method = 'CDP'
254
+ if pgrec['subset_file_size'] or pgrec['range_request'] or dsize < fsize:
255
+ wkey = "{}:{}:{}".format(ip, rdaid, wfile)
256
+ else:
257
+ wkey = None
258
+
259
+ if wrec:
260
+ if wkey == pwkey:
261
+ wrec['size'] += dsize
262
+ continue
263
+ wcnt += add_webfile_usage(year, wrec)
264
+ wurec = get_wuser_record(ip, cdate, skipwuid = False)
265
+ if not wurec: return 0
266
+ wrec = {'ip' : ip, 'dsid' : wfrec['dsid'], 'wid' : wfrec['wid'], 'date' : date,
267
+ 'time' : time, 'quarter' : quarter, 'size' : dsize,
268
+ 'locflag' : 'C', 'method' : method}
269
+ pwkey = wkey
270
+ if not pwkey:
271
+ wcnt += add_webfile_usage(year, wrec)
272
+ wrec = None
273
+
274
+ if trecs: tcnt += add_tdsusage_records(year, trecs, cdate)
275
+ if wrec: wcnt += add_webfile_usage(year, wrec)
276
+ atcnt += tcnt
277
+ awcnt += wcnt
278
+ allcnt += pgcnt
279
+ PgLOG.pglog("{}/{} TDS/WEB usage records added for {} CDG entries at {}".format(atcnt, awcnt, allcnt, PgLOG.current_datetime()), PgLOG.LOGWRN)
280
+
281
+ if lcnt > 1: PgLOG.pglog("{}/{} TDS/WEB usage records added for {} CDG entries at {}".format(atcnt, awcnt, allcnt, PgLOG.current_datetime()), PgLOG.LOGWRN)
282
+
283
+
284
+ def get_record_date_time(ctime):
285
+
286
+ ms = re.search(r'^(\d+)/(\w+)/(\d+) (\d+:\d+:\d+)(\.|$)', str(ctime))
287
+ if ms:
288
+ d = int(ms.group(1))
289
+ m = PgUtil.get_month(ms.group(2))
290
+ q = 1 + int((m-1)/3)
291
+ y = ms.group(3)
292
+ t = ms.group(4)
293
+ return (y, q, "{}-{:02}-{:02}".format(y, m, d), t)
294
+ else:
295
+ PgLOG.pglog("time: Invalid date format", PgLOG.LGEREX)
296
+
297
+ def add_tdsusage_records(year, records, date):
298
+
299
+ cnt = 0
300
+ for key in records:
301
+ record = records[key]
302
+ cond = "date = '{}' AND time = '{}' AND ip = '{}'".format(date, record['time'], record['ip'])
303
+ if PgDBI.pgget(USAGE['TDSTBL'], '', cond, PgLOG.LGEREX): continue
304
+
305
+ if add_tds_allusage(year, record):
306
+ cnt += PgDBI.pgadd(USAGE['TDSTBL'], record, PgLOG.LOGWRN)
307
+
308
+ PgLOG.pglog("{}: {} TDS usage records added at {}".format(date, cnt, PgLOG.current_datetime()), PgLOG.LOGWRN)
309
+
310
+ return cnt
311
+
312
+ def add_tds_allusage(year, pgrec):
313
+
314
+ record = {'method' : 'CDP', 'source' : 'C'}
315
+
316
+ for fld in pgrec:
317
+ if re.match(r'^(engine|method|etype|fcount)$', fld): continue
318
+ record[fld] = pgrec[fld]
319
+
320
+ return PgDBI.add_yearly_allusage(year, record)
321
+
322
+ #
323
+ # Fill usage of a single online data file into table dssdb.wusage of DSS PgSQL database
324
+ #
325
+ def add_webfile_usage(year, logrec, wurec):
326
+
327
+ table = "{}_{}".format(USAGE['WEBTBL'], year)
328
+ cond = "wid = {} AND method = '{}' AND date_read = '{}' AND time_read = '{}'".format(logrec['wid'], logrec['method'], logrec['date'], logrec['time'])
329
+ if PgDBI.pgget(table, "", cond, PgLOG.LOGWRN): return 0
330
+
331
+ record = {'wid' : logrec['wid'], 'dsid' : logrec['dsid']}
332
+ record['wuid_read'] = wurec['wuid']
333
+ record['date_read'] = logrec['date']
334
+ record['time_read'] = logrec['time']
335
+ record['size_read'] = logrec['size']
336
+ record['method'] = logrec['method']
337
+ record['locflag'] = logrec['locflag']
338
+ record['ip'] = logrec['ip']
339
+ record['quarter'] = logrec['quarter']
340
+
341
+ if add_web_allusage(year, logrec, wurec):
342
+ return PgDBI.add_yearly_wusage(year, record)
343
+ else:
344
+ return 0
345
+
346
+ def add_web_allusage(year, logrec, wurec):
347
+
348
+ pgrec = {'email' : wurec['email'], 'org_type' : wurec['org_type'], 'country' : wurec['country']}
349
+ pgrec['dsid'] = logrec['dsid']
350
+ pgrec['date'] = logrec['date']
351
+ pgrec['quarter'] = logrec['quarter']
352
+ pgrec['time'] = logrec['time']
353
+ pgrec['size'] = logrec['size']
354
+ pgrec['method'] = logrec['method']
355
+ pgrec['ip'] = logrec['ip']
356
+ pgrec['source'] = 'C'
357
+ return PgDBI.add_yearly_allusage(year, pgrec)
358
+
359
+ #
360
+ # return wfile.wid upon success, 0 otherwise
361
+ #
362
+ def get_wfile_record(dsids, wfile):
363
+
364
+ for dsid in dsids:
365
+ wkey = "{}{}".format(dsid, wfile)
366
+ if wkey in WFILES: return WFILES[wkey]
367
+ wfcond = "wfile like '%{}'".format(wfile)
368
+ pgrec = None
369
+ for dsid in dsids:
370
+ pgrec = PgSplit.pgget_wfile(dsid, "wid", wfcond)
371
+ if pgrec:
372
+ pgrec['dsid'] = dsid
373
+ wkey = "{}{}".format(dsid, wfile)
374
+ WFILES[wkey] = pgrec
375
+ return pgrec
376
+
377
+ for dsid in dsids:
378
+ pgrec = PgDBI.pgget("wfile_delete", "wid, dsid", "{} AND dsid = '{}'".format(wfcond, dsid))
379
+ if not pgrec:
380
+ mvrec = PgDBI.pgget("wmove", "wid, dsid", wfcond)
381
+ if mvrec:
382
+ pgrec = PgSplit.pgget_wfile(mvrec['dsid'], "wid", "wid = {}".format(pgrec['wid']))
383
+ if pgrec: pgrec['dsid'] = mvrec['dsid']
384
+
385
+ if pgrec: WFILES[wkey] = pgrec
386
+ return pgrec
387
+
388
+ # return wuser record upon success, None otherwise
389
+ def get_wuser_record(ip, date, skipwuid = True):
390
+
391
+ if ip in WUSERS: return WUSERS[ip]
392
+
393
+ ipinfo = PgIPInfo.set_ipinfo(ip)
394
+ if not ipinfo: return None
395
+
396
+ record = {'org_type' : ipinfo['org_type'], 'country' : ipinfo['country']}
397
+ email = 'unknown@' + ipinfo['hostname']
398
+ if skipwuid:
399
+ record['email'] = email
400
+ WUSERS[ip] = record
401
+ return record
402
+
403
+ emcond = "email = '{}'".format(email)
404
+ flds = 'wuid, email, org_type, country, start_date'
405
+ pgrec = PgDBI.pgget("wuser", flds, emcond, PgLOG.LOGERR)
406
+ if pgrec:
407
+ if PgUtil.diffdate(pgrec['start_date'], date) > 0:
408
+ pgrec['start_date'] = record['start_date'] = date
409
+ PgDBI.pgupdt('wuser', record, emcond)
410
+ WUSERS[ip] = pgrec
411
+ return pgrec
412
+
413
+ # now add one in
414
+ record['email'] = email
415
+ record['stat_flag'] = 'A'
416
+ record['start_date'] = date
417
+ wuid = PgDBI.pgadd("wuser", record, PgLOG.LOGERR|PgLOG.AUTOID)
418
+ if wuid:
419
+ record['wuid'] = wuid
420
+ PgLOG.pglog("{} Added as wuid({})".format(email, wuid), PgLOG.LGWNEM)
421
+ WUSERS[ip] = record
422
+ return record
423
+
424
+ return None
425
+
426
+ #
427
+ # call main() to start program
428
+ #
429
+ if __name__ == "__main__": main()
@@ -0,0 +1,18 @@
1
+
2
+ Retrieves CDG usage information from GDEX metrics database to
3
+ fill table 'tdsusage' and 'webusage' in PostgreSQL database 'rdadb'.
4
+
5
+ Usage: fillcdgusage [-b] -s DatasetShortNames [-m MonthList] [-N NumberDays] [-y YearList]
6
+
7
+ select option -s and one of the options, -m, -N or -y each time to run
8
+ this application.
9
+
10
+ - Option -b, log process information into logfile only;
11
+
12
+ - Option -s, retrieve usage info for given dataset short names;
13
+
14
+ - Option -m, retrieve usage info in given months (YYYY-MM);
15
+
16
+ - Option -N, retrieve usage info in recent Number of days;
17
+
18
+ - Option -y, retrieve usage info in given years (YYYY).
@@ -1,6 +1,6 @@
1
1
 
2
2
  Retrieves usage information from RDA Web logs under /var/log to
3
- fill table 'codusage' in MySQL database 'dssdb'.
3
+ fill table 'codusage' in database 'rdadb'.
4
4
 
5
5
  Usage: fillcodusage [-a] [-b] [-f LogFileNames] [-m MonthList] [-N NumberDay] [-y YearList]
6
6
 
@@ -1,7 +1,7 @@
1
1
 
2
2
  Retrieves usage information from GLlobus Server logs under directory
3
3
  /gpfs/fs1/collections/rda/work/logs/gridftp/ to fill table 'wusage' in
4
- database 'dssdb'.
4
+ database 'rdadb'.
5
5
 
6
6
  Usage: fillglobususage [-b] [-d LogFileDates] [-N NumberDay] [-p BeginDate [Enddate]]
7
7
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: rda_python_metrics
3
- Version: 1.0.8
3
+ Version: 1.0.10
4
4
  Summary: RDA Python Package to gather and view data usage metrics
5
5
  Author-email: Zaihua Ji <zji@ucar.edu>
6
6
  Project-URL: Homepage, https://github.com/NCAR/rda-python-metrics
@@ -7,6 +7,8 @@ src/rda_python_metrics/PgView.py
7
7
  src/rda_python_metrics/__init__.py
8
8
  src/rda_python_metrics/fillawsusage.py
9
9
  src/rda_python_metrics/fillawsusage.usg
10
+ src/rda_python_metrics/fillcdgusage.py
11
+ src/rda_python_metrics/fillcdgusage.usg
10
12
  src/rda_python_metrics/fillcodusage.py
11
13
  src/rda_python_metrics/fillcodusage.usg
12
14
  src/rda_python_metrics/fillcountry.py
@@ -1,5 +1,6 @@
1
1
  [console_scripts]
2
2
  fillawsusage = rda_python_metrics.fillaswusage:main
3
+ fillcdgusage = rda_python_metrics.fillcdgusage:main
3
4
  fillcodusage = rda_python_metrics.fillcodusage:main
4
5
  fillcountry = rda_python_metrics.fillcountry:main
5
6
  fillendtime = rda_python_metrics.fillendtime:main
@@ -1,4 +1,4 @@
1
- # test_hello_world.py
1
+ # test_metrics.py
2
2
 
3
3
  import pytest
4
4
 
@@ -1,19 +0,0 @@
1
- include src/rda_python_metrics/fillawsusage.usg
2
- include src/rda_python_metrics/fillcodusage.usg
3
- include src/rda_python_metrics/fillglobususage.usg
4
- include src/rda_python_metrics/fillipinfo.usg
5
- include src/rda_python_metrics/filloneorder.usg
6
- include src/rda_python_metrics/fillosdfusage.usg
7
- include src/rda_python_metrics/fillrdadb.usg
8
- include src/rda_python_metrics/filltdsusage.usg
9
- include src/rda_python_metrics/filluser.usg
10
- include src/rda_python_metrics/logarch.usg
11
- include src/rda_python_metrics/viewallusage.usg
12
- include src/rda_python_metrics/viewcheckusage.usg
13
- include src/rda_python_metrics/viewcodusage.usg
14
- include src/rda_python_metrics/viewordusage.usg
15
- include src/rda_python_metrics/viewrqstusage.usg
16
- include src/rda_python_metrics/viewtdsusage.usg
17
- include src/rda_python_metrics/viewwebfile.usg
18
- include src/rda_python_metrics/viewallusage.usg
19
- include src/rda_python_metrics/viewwebusage.usg