rda-python-metrics 1.0.4__py3-none-any.whl → 1.0.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of rda-python-metrics might be problematic. Click here for more details.

@@ -184,7 +184,7 @@ def get_record_date_time(ctime):
184
184
  m = PgUtil.get_month(ms.group(2))
185
185
  y = ms.group(3)
186
186
  t = ms.group(4)
187
- q = 1 + (m-1)/3
187
+ q = 1 + int((m-1)/3)
188
188
  return (y, q, "{}-{:02}-{:02}".format(y, m, d), t)
189
189
  else:
190
190
  PgLOG.pglog(ctime + ": Invalid date/time format", PgLOG.LGEREX)
@@ -186,7 +186,7 @@ def get_record_date_time(ctime):
186
186
  m = PgUtil.get_month(ms.group(2))
187
187
  y = ms.group(3)
188
188
  t = ms.group(4)
189
- q = 1 + (m-1)/3
189
+ q = 1 + int((m-1)/3)
190
190
  return (y, q, "{}-{:02}-{:02}".format(y, m, d), t)
191
191
  else:
192
192
  PgLOG.pglog(ctime + ": Invalid date/time format", PgLOG.LGEREX)
@@ -0,0 +1,265 @@
1
+ ##!/usr/bin/env python3
2
+ #
3
+ ###############################################################################
4
+ #
5
+ # Title : fillosdfusage
6
+ # Author : Zaihua Ji, zji@ucar.edu
7
+ # Date : 2025-04-01
8
+ # Purpose : python program to retrieve info from weekly OSDF logs
9
+ # and fill table wusages in PgSQL database dssdb.
10
+ #
11
+ # Github : https://github.com/NCAR/rda-pythn-metrics.git
12
+ #
13
+ ###############################################################################
14
+ #
15
+ import sys
16
+ import re
17
+ from rda_python_common import PgLOG
18
+ from rda_python_common import PgUtil
19
+ from rda_python_common import PgFile
20
+ from rda_python_common import PgDBI
21
+ from rda_python_common import PgSplit
22
+ from . import PgIPInfo
23
+
24
+ USAGE = {
25
+ 'OSDFTBL' : "wusage",
26
+ 'OSDFDIR' : PgLOG.PGLOG["DSSDATA"] + "/work/logs/gridftp/",
27
+ 'OSDFGET' : 'wget -m -nH -np -nd https://pelicanplatform.org/pelican-access-logs/ncar-access-log/',
28
+ 'OSDFLOG' : "{}.log", # YYYY-MM-DD.log
29
+ }
30
+
31
+ #
32
+ # main function to run this program
33
+ #
34
+ def main():
35
+
36
+ params = [] # array of input values
37
+ argv = sys.argv[1:]
38
+ option = None
39
+ datelimits = [None, None]
40
+
41
+
42
+ for arg in argv:
43
+ ms = re.match(r'^-(b|d|p|N)$', arg)
44
+ if ms:
45
+ opt = ms.group(1)
46
+ if opt == 'b':
47
+ PgLOG.PGLOG['BCKGRND'] = 1
48
+ elif option:
49
+ PgLOG.pglog("{}: Option -{} is present already".format(arg, option), PgLOG.LGWNEX)
50
+ else:
51
+ option = opt
52
+ elif re.match(r'^-', arg):
53
+ PgLOG.pglog(arg + ": Invalid Option", PgLOG.LGWNEX)
54
+ elif option:
55
+ params.append(arg)
56
+ else:
57
+ PgLOG.pglog(arg + ": Invalid Parameter", PgLOG.LGWNEX)
58
+
59
+ if not (option and params): PgLOG.show_usage('fillosdfusage')
60
+
61
+ PgDBI.dssdb_dbname()
62
+ cmdstr = "fillosdfusage {}".format(' '.join(argv))
63
+ PgLOG.cmdlog(cmdstr)
64
+ PgFile.change_local_directory(USAGE['OSDFDIR'])
65
+ filenames = get_log_file_names(option, params, datelimits)
66
+ if filenames:
67
+ fill_osdf_usages(filenames, datelimits)
68
+ else:
69
+ PgLOG.pglog("No log file found for given command: " + cmdstr, PgLOG.LOGWRN)
70
+
71
+ PgLOG.pglog(None, PgLOG.LOGWRN)
72
+ sys.exit(0)
73
+
74
+ #
75
+ # get the log file dates
76
+ #
77
+ def get_log_file_names(option, params, datelimits):
78
+
79
+ filenames = []
80
+ if option == 'd':
81
+ for pdate in params:
82
+ pdays = PgUtil.get_weekday(pdate)
83
+ if pdays > 0:
84
+ PgLOG.pglog(pdate + ": Skip a Non-Sunday date", PgLOG.LOGWRN)
85
+ continue
86
+ filenames.append(USAGE['OSDFLOG'].format(pdate))
87
+ else:
88
+ if option == 'N':
89
+ edate = PgUtil.curdate()
90
+ pdate = datelimits[0] = PgUtil.adddate(edate, 0, 0, -int(params[0]))
91
+ else:
92
+ pdate = datelimits[0] = params[0]
93
+ if len(params) > 1:
94
+ edate = datelimits[1] = params[1]
95
+ else:
96
+ edate = PgUtil.curdate()
97
+ pdays = PgUtil.get_weekday(pdate)
98
+ if pdays > 0: pdate = PgUtil.adddate(edate, 0, 0, 7-pdays)
99
+ while pdate <= edate:
100
+ filenames.append(USAGE['OSDFLOG'].format(pdate))
101
+ pdate = PgUtil.adddate(pdate, 0, 0, 7)
102
+
103
+ return filenames
104
+
105
+ #
106
+ # Fill OSDF usages into table dssdb.osdfusage of DSS PgSQL database from osdf access logs
107
+ #
108
+ def fill_osdf_usages(fnames, datelimits):
109
+
110
+ cntall = addall = 0
111
+
112
+ fcnt = len(fnames)
113
+ for logfile in fnames:
114
+ PgLOG.pgsystem(USAGE['OSDFGET'] + logfile, 5, PgLOG.LOGWRN)
115
+ linfo = PgFile.check_local_file(logfile)
116
+ if not linfo:
117
+ PgLOG.pglog("{}: Not exists for Gathering OSDF usage".format(logfile), PgLOG.LOGWRN)
118
+ continue
119
+ if linfo['data_size'] == 0:
120
+ PgLOG.pglog("{}: Empty log for Gathering OSDF usage".format(logfile), PgLOG.LOGWRN)
121
+ continue
122
+ PgLOG.pglog("Gathering usage info from {} at {}".format(logfile, PgLOG.current_datetime()), PgLOG.LOGWRN)
123
+ osdf = PgFile.open_local_file(logfile)
124
+ if not osdf: continue
125
+ cntadd = entcnt = 0
126
+ pkey = None
127
+ while True:
128
+ line = osdf.readline()
129
+ if not line: break
130
+ entcnt += 1
131
+ if entcnt%10000 == 0:
132
+ PgLOG.pglog("{}: {}/{} OSDF log entries processed/records added".format(logfile, entcnt, cntadd), PgLOG.WARNLG)
133
+
134
+ ms = re.match(r'^\[(\S+)\] \[Objectname:(\/ncar\/rda\/([a-z]\d{6})\/(\S+)\] \[Host:(\S+)\] \[server:(\S+)\] \[Read:(\d+)\]', line)
135
+ if not ms: continue
136
+ size = int(ms.group(6))
137
+ if size < 100: continue # ignore small files
138
+ ip = ms.group(4)
139
+ dsid = PgUtil.format_dataset_id(ms.group(2))
140
+ wfile = ms.group(3)
141
+ engine = ms.group(5)
142
+
143
+ (year, quarter, date, time) = get_record_date_time(ms.group(1))
144
+ if datelimits[0] and date < datelimits[0]: continue
145
+ if datelimits[1] and date > datelimits[1]: continue
146
+ locflag = 'C'
147
+ method = "OSDF"
148
+
149
+ record = {'ip' : ip, 'dsid' : dsid, 'wfile' : wfile, 'date' : date,
150
+ 'time' : time, 'quarter' : quarter, 'size' : size,
151
+ 'locflag' : locflag, 'method' : method}
152
+ cntadd += add_file_usage(year, record)
153
+ osdf.close()
154
+ cntall += entcnt
155
+ addall += cntadd
156
+ PgLOG.pglog("{} OSDF usage records added for {} entries at {}".format(addall, cntall, PgLOG.current_datetime()), PgLOG.LOGWRN)
157
+
158
+
159
+ def get_record_date_time(ctime):
160
+
161
+ ms = re.search(r'^(\d+)-(\d+)-(\d+)T([\d:]+)\.', ctime)
162
+ if ms:
163
+ y = ms.group(1)
164
+ m = int(ms.group(2))
165
+ d = int(ms.group(3))
166
+ t = ms.group(4)
167
+ q = 1 + int((m-1)/3)
168
+ return (y, q, "{}-{:02}-{:02}".format(y, m, d), t)
169
+ else:
170
+ PgLOG.pglog(ctime + ": Invalid date/time format", PgLOG.LGEREX)
171
+
172
+ #
173
+ # Fill usage of a single online data file into table dssdb.wusage of DSS PgSQL database
174
+ #
175
+ def add_file_usage(year, logrec):
176
+
177
+ pgrec = get_wfile_wid(logrec['dsid'], logrec['wfile'])
178
+ if not pgrec: return 0
179
+
180
+ table = "{}_{}".format(USAGE['OSDFTBL'], year)
181
+ cond = "wid = {} AND method = '{}' AND date_read = '{}' AND time_read = '{}'".format(pgrec['wid'], logrec['method'], logrec['date'], logrec['time'])
182
+ if PgDBI.pgget(table, "", cond, PgLOG.LOGWRN): return 0
183
+
184
+ wurec = get_wuser_record(logrec['ip'], logrec['date'])
185
+ if not wurec: return 0
186
+ record = {'wid' : pgrec['wid'], 'dsid' : pgrec['dsid']}
187
+ record['wuid_read'] = wurec['wuid']
188
+ record['date_read'] = logrec['date']
189
+ record['time_read'] = logrec['time']
190
+ record['size_read'] = logrec['size']
191
+ record['method'] = logrec['method']
192
+ record['locflag'] = logrec['locflag']
193
+ record['ip'] = logrec['ip']
194
+ record['quarter'] = logrec['quarter']
195
+
196
+ if add_to_allusage(year, logrec, wurec):
197
+ return PgDBI.add_yearly_wusage(year, record)
198
+ else:
199
+ return 0
200
+
201
+ def add_to_allusage(year, logrec, wurec):
202
+
203
+ pgrec = {'email' : wurec['email'], 'org_type' : wurec['org_type'], 'country' : wurec['country']}
204
+ pgrec['dsid'] = logrec['dsid']
205
+ pgrec['date'] = logrec['date']
206
+ pgrec['quarter'] = logrec['quarter']
207
+ pgrec['time'] = logrec['time']
208
+ pgrec['size'] = logrec['size']
209
+ pgrec['method'] = logrec['method']
210
+ pgrec['ip'] = logrec['ip']
211
+ pgrec['source'] = 'W'
212
+ return PgDBI.add_yearly_allusage(year, pgrec)
213
+
214
+ #
215
+ # return wfile.wid upon success, 0 otherwise
216
+ #
217
+ def get_wfile_wid(dsid, wfile):
218
+
219
+ wfcond = "wfile = '{}'".format(wfile)
220
+ pgrec = PgSplit.pgget_wfile(dsid, "*", wfcond)
221
+ if pgrec:
222
+ pgrec['dsid'] = dsid
223
+ else:
224
+ pgrec = PgDBI.pgget("wfile_delete", "*", "{} AND dsid = '{}'".format(wfcond, dsid))
225
+ if not pgrec:
226
+ pgrec = PgDBI.pgget("wmove", "wid, dsid", wfcond)
227
+ if pgrec:
228
+ pgrec = PgSplit.pgget_wfile(pgrec['dsid'], "*", "wid = {}".format(pgrec['wid']))
229
+ if pgrec: pgrec['dsid'] = dsid
230
+
231
+ return pgrec
232
+
233
+ # return wuser record upon success, None otherwise
234
+ def get_wuser_record(ip, date):
235
+
236
+ ipinfo = PgIPInfo.set_ipinfo(ip)
237
+ if not ipinfo: return None
238
+
239
+ record = {'org_type' : ipinfo['org_type'], 'country' : ipinfo['country']}
240
+ email = 'unknown@' + ipinfo['hostname']
241
+ emcond = "email = '{}'".format(email)
242
+ flds = 'wuid, email, org_type, country, start_date'
243
+ pgrec = PgDBI.pgget("wuser", flds, emcond, PgLOG.LOGERR)
244
+ if pgrec:
245
+ if PgUtil.diffdate(pgrec['start_date'], date) > 0:
246
+ pgrec['start_date'] = record['start_date'] = date
247
+ PgDBI.pgupdt('wuser', record, emcond)
248
+ return pgrec
249
+
250
+ # now add one in
251
+ record['email'] = email
252
+ record['stat_flag'] = 'A'
253
+ record['start_date'] = date
254
+ wuid = PgDBI.pgadd("wuser", record, PgLOG.LOGERR|PgLOG.AUTOID)
255
+ if wuid:
256
+ record['wuid'] = wuid
257
+ PgLOG.pglog("{} Added as wuid({})".format(email, wuid), PgLOG.LGWNEM)
258
+ return record
259
+
260
+ return None
261
+
262
+ #
263
+ # call main() to start program
264
+ #
265
+ if __name__ == "__main__": main()
@@ -0,0 +1,17 @@
1
+
2
+ Retrieves usage information from OSDF Server logs under directory
3
+ /gpfs/fs1/collections/rda/work/zji/osdflogs/ to fill table 'wusage' in
4
+ database 'dssdb'.
5
+
6
+ Usage: fillosdfusage [-b] [-d LogFileDates] [-N NumberDay] [-p BeginDate [Enddate]]
7
+
8
+ select option, -d, -N or -p to run this application.
9
+
10
+ - Option -b, log process information into logfile only;
11
+
12
+ - Option -d, retrieve usage info from given log file dates;
13
+
14
+ - Option -N, retrieve usage info in recent NumberDay days;
15
+
16
+ - Option -p, retrieve usage info between given period. For missing EndDate,
17
+ it defaults to the current date.
@@ -243,7 +243,7 @@ def check_enough_options():
243
243
  def build_query_strings(usgtable):
244
244
 
245
245
  # initialize query strings
246
- global condition, fieldnames, tablenames, gfields, sfields
246
+ global condition, fieldnames, tablenames
247
247
  joins = having = groupnames = ''
248
248
  tablenames = usgtable
249
249
  cols = params['C'][0]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: rda_python_metrics
3
- Version: 1.0.4
3
+ Version: 1.0.5
4
4
  Summary: RDA Python Package to gather and view data usage metrics
5
5
  Author-email: Zaihua Ji <zji@ucar.edu>
6
6
  Project-URL: Homepage, https://github.com/NCAR/rda-python-metrics
@@ -1,18 +1,20 @@
1
1
  rda_python_metrics/PgIPInfo.py,sha256=NJe5hRwxuflH_CZBZmFCgzYU6XFZXP44PoSbqbpPOwM,5727
2
2
  rda_python_metrics/PgView.py,sha256=r6otb3DjfiaQJdg0z8bZQAOlhr4JnrXJzp9wgWh_8qQ,24369
3
3
  rda_python_metrics/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
4
- rda_python_metrics/fillawsusage.py,sha256=WDwdfF0bDBB8j49vjiWE-EM5GZ0d3PGAbEIxp_Nl4vs,9050
4
+ rda_python_metrics/fillawsusage.py,sha256=dpI3-EFINJECdCSUOm37A97CJNIxOS2nYwA0fB0IpHE,9055
5
5
  rda_python_metrics/fillawsusage.usg,sha256=qHdQGMbXlbsqEAoEmM71_zlE3yw704nyhXZ4sw4FIbI,631
6
6
  rda_python_metrics/fillcodusage.py,sha256=Hp3VrlIqHBWRI6Zjbi0nxWZPNvPrKsGBSZ1L9qA9_y0,8006
7
7
  rda_python_metrics/fillcodusage.usg,sha256=QAM6wqycyDb-6t5PhlL2niPiGxZ4Gln0QVdKdyN3ShI,684
8
8
  rda_python_metrics/fillcountry.py,sha256=YYm0lIIfljA7rIAOFgP1fhyhqkBDOY6vdk7g11WFPLc,2359
9
9
  rda_python_metrics/fillendtime.py,sha256=skZttlpoY19g0dGwqGQI8t_1YPPTPEXwg3EfNlfL90I,2533
10
- rda_python_metrics/fillglobususage.py,sha256=5xUbjGNjX8uhNyHDsYlR4cJU2ARPibElSNPUIdrdknc,9437
10
+ rda_python_metrics/fillglobususage.py,sha256=-cvIipaFju75aw9axHkx6JIe9HWYwQOD8-0blQaxxUM,9442
11
11
  rda_python_metrics/fillglobususage.usg,sha256=p-f5hGGDkcM2O8ADEP0Do_lMIIFj8KkiFG1HJ-YgtQM,637
12
12
  rda_python_metrics/fillipinfo.py,sha256=xIVJ6nDvVvMOjb7s_6-YDLVRBC09pDFugnjB3Nrmqus,5641
13
13
  rda_python_metrics/fillipinfo.usg,sha256=taITqZa6GL0-wxXcMEdMU3ZlQbJ1CsmFclTvXpG5TLg,644
14
14
  rda_python_metrics/filloneorder.py,sha256=MhllvtS2PM1SMkf1dHmGTIppIkt__SRMKIUPrv_iRRU,5472
15
15
  rda_python_metrics/filloneorder.usg,sha256=mtOySKx6-D4k2bbTcmi6cSYtINiycRyHQkHozi0CQu0,1466
16
+ rda_python_metrics/fillosdfusage.py,sha256=LVJKdHIhBStUHgmrbXVWhou9kl3kiD8zw6vdLQ_Fun0,8759
17
+ rda_python_metrics/fillosdfusage.usg,sha256=Jlyn6K1LS1ZqDbmOQpR3KNVnp43-kfyvqYlcDt4jPOM,632
16
18
  rda_python_metrics/fillrdadb.py,sha256=MDcl6oM-A1xek9SBP9Gvor_Ouq5dB7mki2Vf_w8VDNk,5199
17
19
  rda_python_metrics/fillrdadb.usg,sha256=E_Bf4G8yVABogjRmIOaIbTGgnII2W6RltaFad2XEV2Q,1228
18
20
  rda_python_metrics/filltdsusage.py,sha256=Hg09ogrqaTfULyBy3eLGTudDs_5JrXEpf9_V5uEGiN0,10148
@@ -27,7 +29,7 @@ rda_python_metrics/viewallusage.py,sha256=B-4s3aoAkAkeB1QM_xfZceRe_QI16vwpI81ekb
27
29
  rda_python_metrics/viewallusage.usg,sha256=ABtOCqGoE6HKE1IPsk02ppC883vNiJILRPBRrpbnzRM,9296
28
30
  rda_python_metrics/viewcheckusage.py,sha256=HougqjDAOVG6pYglFjyHQ-UdLBcYe7v_jzU1-80RqFA,12996
29
31
  rda_python_metrics/viewcheckusage.usg,sha256=KuJFycggGiUcSezQ9vywDbituvu63SZ-ZnNTaMpbc-A,8930
30
- rda_python_metrics/viewcodusage.py,sha256=jYeZz86vARtXlDJqqP_gPdmi1BKWhPMMpNI4RfTunwA,14030
32
+ rda_python_metrics/viewcodusage.py,sha256=ScyZFjMSss1GNZdmXVs9wWRbaPZRahaFXsWG8kIVRP4,14012
31
33
  rda_python_metrics/viewcodusage.usg,sha256=_kgF7Tk2_n1JVf9km2MiwO86vtZRCdu4i8hkWN0eETo,8637
32
34
  rda_python_metrics/viewordusage.py,sha256=9zIJkThKgSOW58qXyQs2Hq8EeEp645lnpD5bstSzR_0,15370
33
35
  rda_python_metrics/viewordusage.usg,sha256=TqZDQk-DzOWC6_uzmFzGyA4F98ojOifANJGv9BCfH1I,10599
@@ -39,9 +41,9 @@ rda_python_metrics/viewwebfile.py,sha256=BqtA_YNhprnrGE6GWEW7n5PDxzNlljfv_MOPezO
39
41
  rda_python_metrics/viewwebfile.usg,sha256=lTNi8Yu8BUJuExEDJX-vsJyWUSUIQTS-DiiBEVFo33s,10054
40
42
  rda_python_metrics/viewwebusage.py,sha256=jhoHuRPVNtp7Lbjv0l-Jy_vp2p2nWQC7IVZ0P4JUJ4I,16657
41
43
  rda_python_metrics/viewwebusage.usg,sha256=IsT72v22xyZf7ng_IodVs0dLTsH1Q4BtFvT-gs0-xJY,9946
42
- rda_python_metrics-1.0.4.dist-info/licenses/LICENSE,sha256=1dck4EAQwv8QweDWCXDx-4Or0S8YwiCstaso_H57Pno,1097
43
- rda_python_metrics-1.0.4.dist-info/METADATA,sha256=omB9dxreEDSDXR4LE90kn3xHXUWBvSxztq2QFtnP6rU,667
44
- rda_python_metrics-1.0.4.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
45
- rda_python_metrics-1.0.4.dist-info/entry_points.txt,sha256=UOZhtPc8-DypWpkOP-nnF7OH8Fdp_wk6cuqh19JsmZA,1075
46
- rda_python_metrics-1.0.4.dist-info/top_level.txt,sha256=aoBgbR_o70TP0QmMW0U6inRHYtfKld47OBmnWnLnDOs,19
47
- rda_python_metrics-1.0.4.dist-info/RECORD,,
44
+ rda_python_metrics-1.0.5.dist-info/licenses/LICENSE,sha256=1dck4EAQwv8QweDWCXDx-4Or0S8YwiCstaso_H57Pno,1097
45
+ rda_python_metrics-1.0.5.dist-info/METADATA,sha256=ESuOgR-2a5KXfttLYT7v9TSWRRYWwOT5_XlNs4YKV7c,667
46
+ rda_python_metrics-1.0.5.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
47
+ rda_python_metrics-1.0.5.dist-info/entry_points.txt,sha256=uoCDpwsZJNKXSxkngolWGgmi7arXp7BM-p6tVHVWvgc,1133
48
+ rda_python_metrics-1.0.5.dist-info/top_level.txt,sha256=aoBgbR_o70TP0QmMW0U6inRHYtfKld47OBmnWnLnDOs,19
49
+ rda_python_metrics-1.0.5.dist-info/RECORD,,
@@ -6,8 +6,9 @@ fillendtime = rda_python_metrics.fillendtime:main
6
6
  fillglobususage = rda_python_metrics.fillglobususage:main
7
7
  fillipinfo = rda_python_metrics.fillipinfo:main
8
8
  filloneorder = rda_python_metrics.filloneorder:main
9
+ fillosdfusage = rda_python_metrics.fillosdfusage:main
9
10
  fillrdadb = rda_python_metrics.fillrdadb:main
10
- filltdsusage = rda_python_metrics.tdsusage:main
11
+ filltdsusage = rda_python_metrics.filltdsusage:main
11
12
  filluser = rda_python_metrics.filluser:main
12
13
  logarch.py = rda_python_metrics.logarch:main
13
14
  pgperson = rda_python_metrics.pgperson:main