rda-python-metrics 1.0.46__py3-none-any.whl → 1.0.47__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -34,7 +34,9 @@ USAGE = {
34
34
  'OPTION' : 0,
35
35
  'PGTBL' : "tdsusage",
36
36
  'TDSLOG' : "/data/logs/nginx/{}.access.log",
37
- 'CDATE' : PgUtil.curdate()
37
+ 'TDSDIR' : PgLOG.PGLOG["GDEXWORK"] + "/zji/tdslogs/",
38
+ 'TDSGET' : 'wget -m -nH -np -nd https://github.com/NCAR/tds-logs/blob/3ffb86d54aa8a164bbd60995247dc1a7e50813b6/logs/',
39
+ 'TDSLOG' : "localhost_access_log.{}.txt" # {} = YYYY-MM-DD
38
40
  }
39
41
 
40
42
  #
@@ -44,120 +46,125 @@ def main():
44
46
 
45
47
  params = [] # array of input values
46
48
  argv = sys.argv[1:]
47
- datelimit = ''
48
- fixrec = False
49
-
49
+ option = None
50
+ datelimits = [None, None]
51
+
50
52
  for arg in argv:
51
- if arg == "-b":
52
- PgLOG.PGLOG['BCKGRND'] = 1
53
- elif arg == "-f":
54
- fixrec = True
55
- elif re.match(r'^-[mNy]$', arg) and USAGE['OPTION'] == 0:
56
- if arg == "-m":
57
- USAGE['OPTION'] = MONTH
58
- elif arg == "-y":
59
- USAGE['OPTION'] = YEARS
60
- elif arg == "-N":
61
- USAGE['OPTION'] = NDAYS
53
+ ms = re.match(r'^-(b|d|p|N)$', arg)
54
+ if ms:
55
+ opt = ms.group(1)
56
+ if opt == 'b':
57
+ PgLOG.PGLOG['BCKGRND'] = 1
58
+ elif option:
59
+ PgLOG.pglog("{}: Option -{} is present already".format(arg, option), PgLOG.LGWNEX)
60
+ else:
61
+ option = opt
62
62
  elif re.match(r'^-', arg):
63
63
  PgLOG.pglog(arg + ": Invalid Option", PgLOG.LGWNEX)
64
- elif USAGE['OPTION']&MASKS:
64
+ elif option:
65
65
  params.append(arg)
66
66
  else:
67
67
  PgLOG.pglog(arg + ": Invalid Parameter", PgLOG.LGWNEX)
68
68
 
69
- if not (USAGE['OPTION'] and params): PgLOG.show_usage('filltdsusage')
70
- PgDBI.dssdb_dbname()
71
- PgLOG.cmdlog("filltdsusage {}".format(' '.join(argv)))
69
+ if not (option and params): PgLOG.show_usage('filltdsusage')
72
70
 
73
- if fixrec:
74
- fix_tds_usages(USAGE['OPTION'], params)
71
+ PgDBI.dssdb_dbname()
72
+ cmdstr = "filltdsusage {}".format(' '.join(argv))
73
+ PgLOG.cmdlog(cmdstr)
74
+ PgFile.change_local_directory(USAGE['TDSDIR'])
75
+ filenames = get_log_file_names(option, params, datelimits)
76
+ if filenames:
77
+ fill_tds_usages(filenames)
75
78
  else:
76
- if USAGE['OPTION']&NDAYS:
77
- curdate = USAGE['CDATE']
78
- datelimit = PgUtil.adddate(curdate, 0, 0, -int(params[0]))
79
- USAGE['OPTION'] = MONTH
80
- params = []
81
-
82
- while curdate >= datelimit:
83
- tms = curdate.split('-')
84
- params.append("{}-{}".format(tms[0], tms[1]))
85
- curdate = PgUtil.adddate(curdate, 0, 0, -int(tms[2]))
86
-
87
- fill_tds_usages(USAGE['OPTION'], params, datelimit)
88
-
89
- PgLOG.pglog(None, PgLOG.LOGWRN|PgLOG.SNDEML) # send email out if any
79
+ PgLOG.pglog("No log file found for given command: " + cmdstr, PgLOG.LOGWRN)
90
80
 
81
+ PgLOG.pglog(None, PgLOG.LOGWRN)
91
82
  sys.exit(0)
92
83
 
93
84
  #
94
- # Fill TDS usages into table dssdb.tdsusage from tds access logs
85
+ # get the log file dates
95
86
  #
96
- def fill_tds_usages(option, inputs, datelimit):
97
-
98
- cntall = cntadd = 0
87
+ def get_log_file_names(option, params, datelimits):
99
88
 
100
- for input in inputs:
101
- # get log file names
102
- if option&MONTH:
103
- tms = input.split('-')
104
- yrmn = "{}-{:02}".format(tms[0], int(tms[1]))
89
+ filenames = []
90
+ if option == 'd':
91
+ for pdate in params:
92
+ filenames.append(USAGE['TDSLOG'].format(pdate))
93
+ else:
94
+ if option == 'N':
95
+ edate = PgUtil.curdate()
96
+ pdate = datelimits[0] = PgUtil.adddate(edate, 0, 0, -int(params[0]))
105
97
  else:
106
- yrmn = input
98
+ pdate = datelimits[0] = params[0]
99
+ if len(params) > 1:
100
+ edate = datelimits[1] = params[1]
101
+ else:
102
+ edate = PgUtil.curdate()
103
+ while pdate <= edate:
104
+ filenames.append(USAGE['TDSLOG'].format(pdate))
105
+ pdate = PgUtil.adddate(pdate, 0, 0, 1)
107
106
 
108
- logfiles = glob.glob(USAGE['TDSLOG'].format(yrmn + '*'))
109
- if not logfiles: PgLOG.pglog("{}: No file found to gather TDS usage".format(yrmn), PgLOG.LOGWRN)
110
- for logfile in logfiles:
111
- if not op.isfile(logfile):
112
- PgLOG.pglog("{}: Not exists to gather TDS usage".format(logfile), PgLOG.LOGWRN)
113
- continue
114
- fdate = None
115
- ms = re.search(r'(\d+-\d+-\d+).access.log$', logfile)
116
- if ms:
117
- fdate = ms.group(1)
118
- if fdate >= USAGE['CDATE']: continue
119
- if datelimit and fdate < datelimit: continue
120
- PgLOG.pglog("Gathering usage info from {} at {}".format(logfile, PgLOG.current_datetime()), PgLOG.LOGWRN)
121
- tds = PgFile.open_local_file(logfile)
122
- if not tds: continue
123
- ptime = ''
124
- records = {}
125
- entcnt = 0
126
- while True:
127
- line = tds.readline()
128
- if not line: break
129
- entcnt += 1
130
- if entcnt%20000 == 0:
131
- cnt = len(records)
132
- PgLOG.pglog("{}/{} TDS log entries processed/records to add".format(entcnt, cnt), PgLOG.WARNLG)
133
-
134
- ms = re.search(r'(/thredds/catalog|\sGooglebot/)', line)
135
- if ms: continue
136
- ms = re.search(r'/thredds/\S+\.(png|jpg|gif|css|htm)', line)
137
- if ms: continue
138
- ms = re.match(r'^([\d\.]+)\s.*\s(-|\S+@\S+)\s+\[(\S+).*/thredds/(\w+)(/|/grid/)(aggregations|files).*/(ds\d\d\d.\d|[a-z]\d{6})/.*\s200\s+(\d+)(.*)$', line)
139
- if not ms: continue
140
- ip = ms.group(1)
141
- email = ms.group(2)
142
- (date, time) = get_record_date_time(ms.group(3))
143
- method = ms.group(4)
144
- etype = ms.group(6)[0].upper()
145
- dsid = PgUtil.format_dataset_id(ms.group(7))
146
- size = int(ms.group(8))
147
- ebuf = ms.group(9)
148
- ms = re.search(r' "(\w+.*\S+)" ', ebuf)
149
- engine = ms.group(1) if ms else 'Unknown'
150
- key = "{}:{}:{}:{}".format(ip, dsid, method, etype)
107
+ return filenames
151
108
 
152
- if key in records:
153
- records[key]['size'] += size
154
- records[key]['fcount'] += 1
155
- else:
156
- records[key] = {'ip' : ip, 'email' : email, 'dsid' : dsid, 'time' : time, 'size' : size,
157
- 'fcount' : 1, 'method' : method, 'etype' : etype, 'engine' : engine}
158
- tds.close()
159
- if records: cntadd += add_usage_records(records, fdate)
160
- cntall += entcnt
109
+ #
110
+ # Fill TDS usages into table dssdb.tdsusage from tds access logs
111
+ #
112
+ def fill_tds_usages(fnames):
113
+
114
+ year = cntall = addall = 0
115
+ for logfile in fnames:
116
+ linfo = PgFile.check_local_file(logfile)
117
+ if not linfo:
118
+ gzfile = logfile + '.gz'
119
+ PgLOG.pgsystem(USAGE['TDSGET'] + gzfile, 5, PgLOG.LOGWRN)
120
+ linfo = PgFile.check_local_file(gzfile)
121
+ if not linfo:
122
+ PgLOG.pglog("{}: Not exists for Gathering TDS usage".format(gzfile), PgLOG.LOGWRN)
123
+ continue
124
+ PgFile.compress_local_file(gzfile)
125
+ linfo = PgFile.check_local_file(logfile)
126
+ if not linfo:
127
+ PgLOG.pglog("{}: Error ungzip TDS usage".format(gzfile), PgLOG.LGEREX)
128
+ PgLOG.pglog("{}: Gathering TDS usage at {}".format(logfile, PgLOG.current_datetime()), PgLOG.LOGWRN)
129
+ tds = PgFile.open_local_file(logfile)
130
+ if not tds: continue
131
+ records = {}
132
+ cntadd = entcnt = 0
133
+ while True:
134
+ line = tds.readline()
135
+ if not line: break
136
+ entcnt += 1
137
+ if entcnt%20000 == 0:
138
+ cnt = len(records)
139
+ PgLOG.pglog("{}/{} TDS log entries processed/records to add".format(entcnt, cnt), PgLOG.WARNLG)
140
+
141
+ ms = re.search(r'(/thredds/catalog|\sGooglebot/)', line)
142
+ if ms: continue
143
+ ms = re.search(r'/thredds/\S+\.(png|jpg|gif|css|htm)', line)
144
+ if ms: continue
145
+ ms = re.match(r'^([\d\.]+)\s.*\s(-|\S+@\S+)\s+\[(\S+).*/thredds/(\w+)(/|/grid/)(aggregations|files).*/(ds\d\d\d.\d|[a-z]\d{6})/.*\s200\s+(\d+)(.*)$', line)
146
+ if not ms: continue
147
+ ip = ms.group(1)
148
+ email = ms.group(2)
149
+ (date, time) = get_record_date_time(ms.group(3))
150
+ method = ms.group(4)
151
+ etype = ms.group(6)[0].upper()
152
+ dsid = PgUtil.format_dataset_id(ms.group(7))
153
+ size = int(ms.group(8))
154
+ ebuf = ms.group(9)
155
+ ms = re.search(r' "(\w+.*\S+)" ', ebuf)
156
+ engine = ms.group(1) if ms else 'Unknown'
157
+ key = "{}:{}:{}:{}".format(ip, dsid, method, etype)
158
+
159
+ if key in records:
160
+ records[key]['size'] += size
161
+ records[key]['fcount'] += 1
162
+ else:
163
+ records[key] = {'ip' : ip, 'email' : email, 'dsid' : dsid, 'time' : time, 'size' : size,
164
+ 'fcount' : 1, 'method' : method, 'etype' : etype, 'engine' : engine}
165
+ tds.close()
166
+ if records: cntadd += add_usage_records(records, date)
167
+ cntall += entcnt
161
168
 
162
169
  PgLOG.pglog("{} TDS usage records added for {} entries at {}".format(cntadd, cntall, PgLOG.current_datetime()), PgLOG.LOGWRN)
163
170
 
@@ -1,17 +1,16 @@
1
1
 
2
- Retrieves usage information from RDA TDS Server logs under ../logs/tomcat to
2
+ Retrieves usage information from RDA TDS Server logs on github to
3
3
  fill table 'tdsusage' in MySQL database 'dssdb'.
4
4
 
5
- Usage: filltdsusage [-b] [-m MonthList] [-N NumberDay] [-y YearList]
5
+ Usage: filltdsusage [-b] [-d LogFileDates] [-N NumberDay] [-p BeginDate [Enddate]]
6
6
 
7
- select one of the options, -m, -N or -y each time to run
8
- this application.
7
+ select option, -d, -N or -p to run this application.
9
8
 
10
9
  - Option -b, log process information into logfile only;
11
10
 
12
- - Option -m, retrieve usage info in given months;
11
+ - Option -d, retrieve usage info from given log file dates;
13
12
 
14
13
  - Option -N, retrieve usage info in recent NumberDay days;
15
14
 
16
- - Option -y, retrieve usage info in given years.
17
-
15
+ - Option -p, retrieve usage info between given period. For missing EndDate,
16
+ it defaults to the current date.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: rda_python_metrics
3
- Version: 1.0.46
3
+ Version: 1.0.47
4
4
  Summary: RDA Python Package to gather and view data usage metrics
5
5
  Author-email: Zaihua Ji <zji@ucar.edu>
6
6
  Project-URL: Homepage, https://github.com/NCAR/rda-python-metrics
@@ -21,8 +21,8 @@ rda_python_metrics/fillosdfusage.py,sha256=osl2jdbDrh5l2H19Aw8LN-drSTV1NAKPc_cKd
21
21
  rda_python_metrics/fillosdfusage.usg,sha256=Qc5YdUuOiAH74FfVdkwkrQRDYXcASVbxMdBsVQj4X1k,635
22
22
  rda_python_metrics/fillrdadb.py,sha256=5OIzLUo-MHWVIA3sQE7CRMQSW8tX9PH7DocDy4rvbYA,5282
23
23
  rda_python_metrics/fillrdadb.usg,sha256=E_Bf4G8yVABogjRmIOaIbTGgnII2W6RltaFad2XEV2Q,1228
24
- rda_python_metrics/filltdsusage.py,sha256=vOwVzAtWUHO4O-FCSJMg0GKxw-Xc5AzSbfqVFktUAlA,10201
25
- rda_python_metrics/filltdsusage.usg,sha256=yqTHRe8WpZWpu3gso_obgt6LO41-JC27rTIDfdZcISo,538
24
+ rda_python_metrics/filltdsusage.py,sha256=fM6wThVrcozWU6HCyPRaRWsNjGobYEsVwQbhzWp_UcI,10392
25
+ rda_python_metrics/filltdsusage.usg,sha256=HhqJnju6iqKrsB7AQfdwDxXnZ_NOykkDA0vsKEso4xY,588
26
26
  rda_python_metrics/filluser.py,sha256=CvaMRaUPaR9nxJAExkLTb3Ci4sD7RQMOpWhWJdbyMF0,8907
27
27
  rda_python_metrics/filluser.usg,sha256=Xgqi0QwA9-4jpYj3L4Q4TISpVwRlsomt2G7T0oYAFak,520
28
28
  rda_python_metrics/fillzenodousage.py,sha256=wZiHs_cD061sUwTM0zujSkddUKtzACR1l2UG7smOioA,13212
@@ -51,9 +51,9 @@ rda_python_metrics/viewwebfile.py,sha256=HSMNkQQawonu6W3blV7g9UbJuNy9VAOn9COqgmj
51
51
  rda_python_metrics/viewwebfile.usg,sha256=lTNi8Yu8BUJuExEDJX-vsJyWUSUIQTS-DiiBEVFo33s,10054
52
52
  rda_python_metrics/viewwebusage.py,sha256=ES2lI8NaCeCpTGi94HU-cDRBxHMiUBbplyYsZf2KqF0,16650
53
53
  rda_python_metrics/viewwebusage.usg,sha256=OVDZ78p87E3HLW34ZhasNJ7Zmw8XXjmZPPWZfRhPLXo,9936
54
- rda_python_metrics-1.0.46.dist-info/licenses/LICENSE,sha256=1dck4EAQwv8QweDWCXDx-4Or0S8YwiCstaso_H57Pno,1097
55
- rda_python_metrics-1.0.46.dist-info/METADATA,sha256=EKyf_eq-mCqY18vi3MiJHlSh9Tqq2z-mLeCUlfFMjcA,786
56
- rda_python_metrics-1.0.46.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
57
- rda_python_metrics-1.0.46.dist-info/entry_points.txt,sha256=fv4wjyEdiLFYNaJzhmet0SxZcF_J6M42koft2Na1XMs,1403
58
- rda_python_metrics-1.0.46.dist-info/top_level.txt,sha256=aoBgbR_o70TP0QmMW0U6inRHYtfKld47OBmnWnLnDOs,19
59
- rda_python_metrics-1.0.46.dist-info/RECORD,,
54
+ rda_python_metrics-1.0.47.dist-info/licenses/LICENSE,sha256=1dck4EAQwv8QweDWCXDx-4Or0S8YwiCstaso_H57Pno,1097
55
+ rda_python_metrics-1.0.47.dist-info/METADATA,sha256=XflHm_AHkQiBCuTdbWTlNsJsnS-elQ85Ebyt7niZQaQ,786
56
+ rda_python_metrics-1.0.47.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
57
+ rda_python_metrics-1.0.47.dist-info/entry_points.txt,sha256=fv4wjyEdiLFYNaJzhmet0SxZcF_J6M42koft2Na1XMs,1403
58
+ rda_python_metrics-1.0.47.dist-info/top_level.txt,sha256=aoBgbR_o70TP0QmMW0U6inRHYtfKld47OBmnWnLnDOs,19
59
+ rda_python_metrics-1.0.47.dist-info/RECORD,,