rda-python-metrics 1.0.44__tar.gz → 1.0.47__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of rda-python-metrics might be problematic. Click here for more details.

Files changed (67) hide show
  1. {rda_python_metrics-1.0.44/src/rda_python_metrics.egg-info → rda_python_metrics-1.0.47}/PKG-INFO +2 -1
  2. {rda_python_metrics-1.0.44 → rda_python_metrics-1.0.47}/pyproject.toml +3 -2
  3. {rda_python_metrics-1.0.44 → rda_python_metrics-1.0.47}/src/rda_python_metrics/fillcdgusage.py +6 -1
  4. {rda_python_metrics-1.0.44 → rda_python_metrics-1.0.47}/src/rda_python_metrics/fillglobususage.py +5 -3
  5. {rda_python_metrics-1.0.44 → rda_python_metrics-1.0.47}/src/rda_python_metrics/fillglobususage.usg +4 -2
  6. {rda_python_metrics-1.0.44 → rda_python_metrics-1.0.47}/src/rda_python_metrics/fillosdfusage.py +1 -1
  7. {rda_python_metrics-1.0.44 → rda_python_metrics-1.0.47}/src/rda_python_metrics/filltdsusage.py +104 -97
  8. rda_python_metrics-1.0.47/src/rda_python_metrics/filltdsusage.usg +16 -0
  9. {rda_python_metrics-1.0.44 → rda_python_metrics-1.0.47}/src/rda_python_metrics/logarch.py +1 -0
  10. {rda_python_metrics-1.0.44 → rda_python_metrics-1.0.47/src/rda_python_metrics.egg-info}/PKG-INFO +2 -1
  11. {rda_python_metrics-1.0.44 → rda_python_metrics-1.0.47}/src/rda_python_metrics.egg-info/requires.txt +1 -0
  12. rda_python_metrics-1.0.44/src/rda_python_metrics/filltdsusage.usg +0 -17
  13. {rda_python_metrics-1.0.44 → rda_python_metrics-1.0.47}/LICENSE +0 -0
  14. {rda_python_metrics-1.0.44 → rda_python_metrics-1.0.47}/MANIFEST.in +0 -0
  15. {rda_python_metrics-1.0.44 → rda_python_metrics-1.0.47}/README.md +0 -0
  16. {rda_python_metrics-1.0.44 → rda_python_metrics-1.0.47}/setup.cfg +0 -0
  17. {rda_python_metrics-1.0.44 → rda_python_metrics-1.0.47}/src/rda_python_metrics/PgIPInfo.py +0 -0
  18. {rda_python_metrics-1.0.44 → rda_python_metrics-1.0.47}/src/rda_python_metrics/PgView.py +0 -0
  19. {rda_python_metrics-1.0.44 → rda_python_metrics-1.0.47}/src/rda_python_metrics/__init__.py +0 -0
  20. {rda_python_metrics-1.0.44 → rda_python_metrics-1.0.47}/src/rda_python_metrics/fillawsusage.py +0 -0
  21. {rda_python_metrics-1.0.44 → rda_python_metrics-1.0.47}/src/rda_python_metrics/fillawsusage.usg +0 -0
  22. {rda_python_metrics-1.0.44 → rda_python_metrics-1.0.47}/src/rda_python_metrics/fillcdgusage.usg +0 -0
  23. {rda_python_metrics-1.0.44 → rda_python_metrics-1.0.47}/src/rda_python_metrics/fillcodusage.py +0 -0
  24. {rda_python_metrics-1.0.44 → rda_python_metrics-1.0.47}/src/rda_python_metrics/fillcodusage.usg +0 -0
  25. {rda_python_metrics-1.0.44 → rda_python_metrics-1.0.47}/src/rda_python_metrics/fillcountry.py +0 -0
  26. {rda_python_metrics-1.0.44 → rda_python_metrics-1.0.47}/src/rda_python_metrics/fillendtime.py +0 -0
  27. {rda_python_metrics-1.0.44 → rda_python_metrics-1.0.47}/src/rda_python_metrics/fillgdexusage.py +0 -0
  28. {rda_python_metrics-1.0.44 → rda_python_metrics-1.0.47}/src/rda_python_metrics/fillgdexusage.usg +0 -0
  29. {rda_python_metrics-1.0.44 → rda_python_metrics-1.0.47}/src/rda_python_metrics/fillipinfo.py +0 -0
  30. {rda_python_metrics-1.0.44 → rda_python_metrics-1.0.47}/src/rda_python_metrics/fillipinfo.usg +0 -0
  31. {rda_python_metrics-1.0.44 → rda_python_metrics-1.0.47}/src/rda_python_metrics/filloneorder.py +0 -0
  32. {rda_python_metrics-1.0.44 → rda_python_metrics-1.0.47}/src/rda_python_metrics/filloneorder.usg +0 -0
  33. {rda_python_metrics-1.0.44 → rda_python_metrics-1.0.47}/src/rda_python_metrics/fillosdfusage.usg +0 -0
  34. {rda_python_metrics-1.0.44 → rda_python_metrics-1.0.47}/src/rda_python_metrics/fillrdadb.py +0 -0
  35. {rda_python_metrics-1.0.44 → rda_python_metrics-1.0.47}/src/rda_python_metrics/fillrdadb.usg +0 -0
  36. {rda_python_metrics-1.0.44 → rda_python_metrics-1.0.47}/src/rda_python_metrics/filluser.py +0 -0
  37. {rda_python_metrics-1.0.44 → rda_python_metrics-1.0.47}/src/rda_python_metrics/filluser.usg +0 -0
  38. {rda_python_metrics-1.0.44 → rda_python_metrics-1.0.47}/src/rda_python_metrics/fillzenodousage.py +0 -0
  39. {rda_python_metrics-1.0.44 → rda_python_metrics-1.0.47}/src/rda_python_metrics/logarch.usg +0 -0
  40. {rda_python_metrics-1.0.44 → rda_python_metrics-1.0.47}/src/rda_python_metrics/pgperson.py +0 -0
  41. {rda_python_metrics-1.0.44 → rda_python_metrics-1.0.47}/src/rda_python_metrics/pgsyspath.py +0 -0
  42. {rda_python_metrics-1.0.44 → rda_python_metrics-1.0.47}/src/rda_python_metrics/pgusername.py +0 -0
  43. {rda_python_metrics-1.0.44 → rda_python_metrics-1.0.47}/src/rda_python_metrics/viewallusage.py +0 -0
  44. {rda_python_metrics-1.0.44 → rda_python_metrics-1.0.47}/src/rda_python_metrics/viewallusage.usg +0 -0
  45. {rda_python_metrics-1.0.44 → rda_python_metrics-1.0.47}/src/rda_python_metrics/viewawsusage.py +0 -0
  46. {rda_python_metrics-1.0.44 → rda_python_metrics-1.0.47}/src/rda_python_metrics/viewawsusage.usg +0 -0
  47. {rda_python_metrics-1.0.44 → rda_python_metrics-1.0.47}/src/rda_python_metrics/viewcheckusage.py +0 -0
  48. {rda_python_metrics-1.0.44 → rda_python_metrics-1.0.47}/src/rda_python_metrics/viewcheckusage.usg +0 -0
  49. {rda_python_metrics-1.0.44 → rda_python_metrics-1.0.47}/src/rda_python_metrics/viewcodusage.py +0 -0
  50. {rda_python_metrics-1.0.44 → rda_python_metrics-1.0.47}/src/rda_python_metrics/viewcodusage.usg +0 -0
  51. {rda_python_metrics-1.0.44 → rda_python_metrics-1.0.47}/src/rda_python_metrics/viewordusage.py +0 -0
  52. {rda_python_metrics-1.0.44 → rda_python_metrics-1.0.47}/src/rda_python_metrics/viewordusage.usg +0 -0
  53. {rda_python_metrics-1.0.44 → rda_python_metrics-1.0.47}/src/rda_python_metrics/viewosdfusage.py +0 -0
  54. {rda_python_metrics-1.0.44 → rda_python_metrics-1.0.47}/src/rda_python_metrics/viewosdfusage.usg +0 -0
  55. {rda_python_metrics-1.0.44 → rda_python_metrics-1.0.47}/src/rda_python_metrics/viewrqstusage.py +0 -0
  56. {rda_python_metrics-1.0.44 → rda_python_metrics-1.0.47}/src/rda_python_metrics/viewrqstusage.usg +0 -0
  57. {rda_python_metrics-1.0.44 → rda_python_metrics-1.0.47}/src/rda_python_metrics/viewtdsusage.py +0 -0
  58. {rda_python_metrics-1.0.44 → rda_python_metrics-1.0.47}/src/rda_python_metrics/viewtdsusage.usg +0 -0
  59. {rda_python_metrics-1.0.44 → rda_python_metrics-1.0.47}/src/rda_python_metrics/viewwebfile.py +0 -0
  60. {rda_python_metrics-1.0.44 → rda_python_metrics-1.0.47}/src/rda_python_metrics/viewwebfile.usg +0 -0
  61. {rda_python_metrics-1.0.44 → rda_python_metrics-1.0.47}/src/rda_python_metrics/viewwebusage.py +0 -0
  62. {rda_python_metrics-1.0.44 → rda_python_metrics-1.0.47}/src/rda_python_metrics/viewwebusage.usg +0 -0
  63. {rda_python_metrics-1.0.44 → rda_python_metrics-1.0.47}/src/rda_python_metrics.egg-info/SOURCES.txt +0 -0
  64. {rda_python_metrics-1.0.44 → rda_python_metrics-1.0.47}/src/rda_python_metrics.egg-info/dependency_links.txt +0 -0
  65. {rda_python_metrics-1.0.44 → rda_python_metrics-1.0.47}/src/rda_python_metrics.egg-info/entry_points.txt +0 -0
  66. {rda_python_metrics-1.0.44 → rda_python_metrics-1.0.47}/src/rda_python_metrics.egg-info/top_level.txt +0 -0
  67. {rda_python_metrics-1.0.44 → rda_python_metrics-1.0.47}/tests/test_metrics.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: rda_python_metrics
3
- Version: 1.0.44
3
+ Version: 1.0.47
4
4
  Summary: RDA Python Package to gather and view data usage metrics
5
5
  Author-email: Zaihua Ji <zji@ucar.edu>
6
6
  Project-URL: Homepage, https://github.com/NCAR/rda-python-metrics
@@ -17,6 +17,7 @@ Requires-Dist: geoip2
17
17
  Requires-Dist: ipinfo
18
18
  Requires-Dist: httplib2
19
19
  Requires-Dist: dnspython
20
+ Requires-Dist: unidecode
20
21
  Dynamic: license-file
21
22
 
22
23
  RDA Python Package to gather and view data usage metrics.
@@ -6,7 +6,7 @@ build-backend = "setuptools.build_meta"
6
6
 
7
7
  [project]
8
8
  name = "rda_python_metrics"
9
- version = "1.0.44"
9
+ version = "1.0.47"
10
10
  authors = [
11
11
  { name="Zaihua Ji", email="zji@ucar.edu" },
12
12
  ]
@@ -25,7 +25,8 @@ dependencies = [
25
25
  "geoip2",
26
26
  "ipinfo",
27
27
  "httplib2",
28
- "dnspython"
28
+ "dnspython",
29
+ "unidecode"
29
30
  ]
30
31
 
31
32
  [tool.pytest.ini_options]
@@ -86,7 +86,12 @@ DSIDS = {
86
86
  'ucar.cgd.cesm2.marine.biogeochemistry': ['d651071'],
87
87
  'ucar.cgd.nw2.mom6': ['d651072'],
88
88
  'ucar.cgd.cesm2.cam6.ppe': ['d651076'],
89
- 'ucar.cgd.cesm2.smyle': ['d651065']
89
+ 'ucar.cgd.cesm2.smyle': ['d651065'],
90
+ # new added
91
+ 'gridded_precip_and_temp' : ['d010078'],
92
+ '29_newman' : ['d010079'],
93
+ 'waccm-x.ion.asymmetry' : ['d010081'],
94
+ 'NARCCAP' : ['d316015']
90
95
  }
91
96
 
92
97
  ALLIDS = list(DSIDS.keys())
@@ -27,7 +27,7 @@ from . import PgIPInfo
27
27
 
28
28
  USAGE = {
29
29
  'PGTBL' : "wusage",
30
- 'GBSDIR' : PgLOG.PGLOG["DSSDATA"] + "/work/logs/gridftp/",
30
+ 'GBSDIR' : PgLOG.PGLOG["GDEXWORK"] + "/logs/gridftp/",
31
31
  'GBSLOG' : "access_log_gridftp0{}_{}",
32
32
  }
33
33
 
@@ -43,7 +43,7 @@ def main():
43
43
 
44
44
 
45
45
  for arg in argv:
46
- ms = re.match(r'^-(b|d|p|N)$', arg)
46
+ ms = re.match(r'^-(b|d|f|p|N)$', arg)
47
47
  if ms:
48
48
  opt = ms.group(1)
49
49
  if opt == 'b':
@@ -80,7 +80,9 @@ def main():
80
80
  def get_log_file_names(option, params, datelimits):
81
81
 
82
82
  filenames = []
83
- if option == 'd':
83
+ if option == 'f':
84
+ filenames = params
85
+ elif option == 'd':
84
86
  for pdate in params:
85
87
  fdate = PgUtil.format_date(pdate, 'MMDDYYYY')
86
88
  fname = USAGE['GBSLOG'].format('?', fdate)
@@ -3,14 +3,16 @@
3
3
  /gpfs/fs1/collections/rda/work/logs/gridftp/ to fill table 'wusage' in
4
4
  database 'rdadb'.
5
5
 
6
- Usage: fillglobususage [-b] [-d LogFileDates] [-N NumberDay] [-p BeginDate [Enddate]]
6
+ Usage: fillglobususage [-b] [-d LogFileDates] [-f LogFileNames] [-N NumberDay] [-p BeginDate [Enddate]]
7
7
 
8
- select option, -d, -N or -p to run this application.
8
+ select option, -d, -f, -N or -p to run this application.
9
9
 
10
10
  - Option -b, log process information into logfile only;
11
11
 
12
12
  - Option -d, retrieve usage info from given log file dates;
13
13
 
14
+ - Option -f, retrieve usage info from given log file names;
15
+
14
16
  - Option -N, retrieve usage info in recent NumberDay days;
15
17
 
16
18
  - Option -p, retrieve usage info between given period. For missing EndDate,
@@ -23,7 +23,7 @@ from . import PgIPInfo
23
23
 
24
24
  USAGE = {
25
25
  'OSDFTBL' : "osdfusage",
26
- 'OSDFDIR' : PgLOG.PGLOG["DSSDATA"] + "/work/zji/osdflogs/",
26
+ 'OSDFDIR' : PgLOG.PGLOG["GDEXWORK"] + "/zji/osdflogs/",
27
27
  'OSDFGET' : 'wget -m -nH -np -nd https://pelicanplatform.org/pelican-access-logs/ncar-access-log/',
28
28
  'OSDFLOG' : "{}-cache.log", # YYYY-MM-DD-cache.log
29
29
  }
@@ -34,7 +34,9 @@ USAGE = {
34
34
  'OPTION' : 0,
35
35
  'PGTBL' : "tdsusage",
36
36
  'TDSLOG' : "/data/logs/nginx/{}.access.log",
37
- 'CDATE' : PgUtil.curdate()
37
+ 'TDSDIR' : PgLOG.PGLOG["GDEXWORK"] + "/zji/tdslogs/",
38
+ 'TDSGET' : 'wget -m -nH -np -nd https://github.com/NCAR/tds-logs/blob/3ffb86d54aa8a164bbd60995247dc1a7e50813b6/logs/',
39
+ 'TDSLOG' : "localhost_access_log.{}.txt" # {} = YYYY-MM-DD
38
40
  }
39
41
 
40
42
  #
@@ -44,120 +46,125 @@ def main():
44
46
 
45
47
  params = [] # array of input values
46
48
  argv = sys.argv[1:]
47
- datelimit = ''
48
- fixrec = False
49
-
49
+ option = None
50
+ datelimits = [None, None]
51
+
50
52
  for arg in argv:
51
- if arg == "-b":
52
- PgLOG.PGLOG['BCKGRND'] = 1
53
- elif arg == "-f":
54
- fixrec = True
55
- elif re.match(r'^-[mNy]$', arg) and USAGE['OPTION'] == 0:
56
- if arg == "-m":
57
- USAGE['OPTION'] = MONTH
58
- elif arg == "-y":
59
- USAGE['OPTION'] = YEARS
60
- elif arg == "-N":
61
- USAGE['OPTION'] = NDAYS
53
+ ms = re.match(r'^-(b|d|p|N)$', arg)
54
+ if ms:
55
+ opt = ms.group(1)
56
+ if opt == 'b':
57
+ PgLOG.PGLOG['BCKGRND'] = 1
58
+ elif option:
59
+ PgLOG.pglog("{}: Option -{} is present already".format(arg, option), PgLOG.LGWNEX)
60
+ else:
61
+ option = opt
62
62
  elif re.match(r'^-', arg):
63
63
  PgLOG.pglog(arg + ": Invalid Option", PgLOG.LGWNEX)
64
- elif USAGE['OPTION']&MASKS:
64
+ elif option:
65
65
  params.append(arg)
66
66
  else:
67
67
  PgLOG.pglog(arg + ": Invalid Parameter", PgLOG.LGWNEX)
68
68
 
69
- if not (USAGE['OPTION'] and params): PgLOG.show_usage('filltdsusage')
70
- PgDBI.dssdb_dbname()
71
- PgLOG.cmdlog("filltdsusage {}".format(' '.join(argv)))
69
+ if not (option and params): PgLOG.show_usage('filltdsusage')
72
70
 
73
- if fixrec:
74
- fix_tds_usages(USAGE['OPTION'], params)
71
+ PgDBI.dssdb_dbname()
72
+ cmdstr = "filltdsusage {}".format(' '.join(argv))
73
+ PgLOG.cmdlog(cmdstr)
74
+ PgFile.change_local_directory(USAGE['TDSDIR'])
75
+ filenames = get_log_file_names(option, params, datelimits)
76
+ if filenames:
77
+ fill_tds_usages(filenames)
75
78
  else:
76
- if USAGE['OPTION']&NDAYS:
77
- curdate = USAGE['CDATE']
78
- datelimit = PgUtil.adddate(curdate, 0, 0, -int(params[0]))
79
- USAGE['OPTION'] = MONTH
80
- params = []
81
-
82
- while curdate >= datelimit:
83
- tms = curdate.split('-')
84
- params.append("{}-{}".format(tms[0], tms[1]))
85
- curdate = PgUtil.adddate(curdate, 0, 0, -int(tms[2]))
86
-
87
- fill_tds_usages(USAGE['OPTION'], params, datelimit)
88
-
89
- PgLOG.pglog(None, PgLOG.LOGWRN|PgLOG.SNDEML) # send email out if any
79
+ PgLOG.pglog("No log file found for given command: " + cmdstr, PgLOG.LOGWRN)
90
80
 
81
+ PgLOG.pglog(None, PgLOG.LOGWRN)
91
82
  sys.exit(0)
92
83
 
93
84
  #
94
- # Fill TDS usages into table dssdb.tdsusage from tds access logs
85
+ # get the log file dates
95
86
  #
96
- def fill_tds_usages(option, inputs, datelimit):
97
-
98
- cntall = cntadd = 0
87
+ def get_log_file_names(option, params, datelimits):
99
88
 
100
- for input in inputs:
101
- # get log file names
102
- if option&MONTH:
103
- tms = input.split('-')
104
- yrmn = "{}-{:02}".format(tms[0], int(tms[1]))
89
+ filenames = []
90
+ if option == 'd':
91
+ for pdate in params:
92
+ filenames.append(USAGE['TDSLOG'].format(pdate))
93
+ else:
94
+ if option == 'N':
95
+ edate = PgUtil.curdate()
96
+ pdate = datelimits[0] = PgUtil.adddate(edate, 0, 0, -int(params[0]))
105
97
  else:
106
- yrmn = input
98
+ pdate = datelimits[0] = params[0]
99
+ if len(params) > 1:
100
+ edate = datelimits[1] = params[1]
101
+ else:
102
+ edate = PgUtil.curdate()
103
+ while pdate <= edate:
104
+ filenames.append(USAGE['TDSLOG'].format(pdate))
105
+ pdate = PgUtil.adddate(pdate, 0, 0, 1)
107
106
 
108
- logfiles = glob.glob(USAGE['TDSLOG'].format(yrmn + '*'))
109
- if not logfiles: PgLOG.pglog("{}: No file found to gather TDS usage".format(yrmn), PgLOG.LOGWRN)
110
- for logfile in logfiles:
111
- if not op.isfile(logfile):
112
- PgLOG.pglog("{}: Not exists to gather TDS usage".format(logfile), PgLOG.LOGWRN)
113
- continue
114
- fdate = None
115
- ms = re.search(r'(\d+-\d+-\d+).access.log$', logfile)
116
- if ms:
117
- fdate = ms.group(1)
118
- if fdate >= USAGE['CDATE']: continue
119
- if datelimit and fdate < datelimit: continue
120
- PgLOG.pglog("Gathering usage info from {} at {}".format(logfile, PgLOG.current_datetime()), PgLOG.LOGWRN)
121
- tds = PgFile.open_local_file(logfile)
122
- if not tds: continue
123
- ptime = ''
124
- records = {}
125
- entcnt = 0
126
- while True:
127
- line = tds.readline()
128
- if not line: break
129
- entcnt += 1
130
- if entcnt%20000 == 0:
131
- cnt = len(records)
132
- PgLOG.pglog("{}/{} TDS log entries processed/records to add".format(entcnt, cnt), PgLOG.WARNLG)
133
-
134
- ms = re.search(r'(/thredds/catalog|\sGooglebot/)', line)
135
- if ms: continue
136
- ms = re.search(r'/thredds/\S+\.(png|jpg|gif|css|htm)', line)
137
- if ms: continue
138
- ms = re.match(r'^([\d\.]+)\s.*\s(-|\S+@\S+)\s+\[(\S+).*/thredds/(\w+)(/|/grid/)(aggregations|files).*/(ds\d\d\d.\d|[a-z]\d{6})/.*\s200\s+(\d+)(.*)$', line)
139
- if not ms: continue
140
- ip = ms.group(1)
141
- email = ms.group(2)
142
- (date, time) = get_record_date_time(ms.group(3))
143
- method = ms.group(4)
144
- etype = ms.group(6)[0].upper()
145
- dsid = PgUtil.format_dataset_id(ms.group(7))
146
- size = int(ms.group(8))
147
- ebuf = ms.group(9)
148
- ms = re.search(r' "(\w+.*\S+)" ', ebuf)
149
- engine = ms.group(1) if ms else 'Unknown'
150
- key = "{}:{}:{}:{}".format(ip, dsid, method, etype)
107
+ return filenames
151
108
 
152
- if key in records:
153
- records[key]['size'] += size
154
- records[key]['fcount'] += 1
155
- else:
156
- records[key] = {'ip' : ip, 'email' : email, 'dsid' : dsid, 'time' : time, 'size' : size,
157
- 'fcount' : 1, 'method' : method, 'etype' : etype, 'engine' : engine}
158
- tds.close()
159
- if records: cntadd += add_usage_records(records, fdate)
160
- cntall += entcnt
109
+ #
110
+ # Fill TDS usages into table dssdb.tdsusage from tds access logs
111
+ #
112
+ def fill_tds_usages(fnames):
113
+
114
+ year = cntall = addall = 0
115
+ for logfile in fnames:
116
+ linfo = PgFile.check_local_file(logfile)
117
+ if not linfo:
118
+ gzfile = logfile + '.gz'
119
+ PgLOG.pgsystem(USAGE['TDSGET'] + gzfile, 5, PgLOG.LOGWRN)
120
+ linfo = PgFile.check_local_file(gzfile)
121
+ if not linfo:
122
+ PgLOG.pglog("{}: Not exists for Gathering TDS usage".format(gzfile), PgLOG.LOGWRN)
123
+ continue
124
+ PgFile.compress_local_file(gzfile)
125
+ linfo = PgFile.check_local_file(logfile)
126
+ if not linfo:
127
+ PgLOG.pglog("{}: Error ungzip TDS usage".format(gzfile), PgLOG.LGEREX)
128
+ PgLOG.pglog("{}: Gathering TDS usage at {}".format(logfile, PgLOG.current_datetime()), PgLOG.LOGWRN)
129
+ tds = PgFile.open_local_file(logfile)
130
+ if not tds: continue
131
+ records = {}
132
+ cntadd = entcnt = 0
133
+ while True:
134
+ line = tds.readline()
135
+ if not line: break
136
+ entcnt += 1
137
+ if entcnt%20000 == 0:
138
+ cnt = len(records)
139
+ PgLOG.pglog("{}/{} TDS log entries processed/records to add".format(entcnt, cnt), PgLOG.WARNLG)
140
+
141
+ ms = re.search(r'(/thredds/catalog|\sGooglebot/)', line)
142
+ if ms: continue
143
+ ms = re.search(r'/thredds/\S+\.(png|jpg|gif|css|htm)', line)
144
+ if ms: continue
145
+ ms = re.match(r'^([\d\.]+)\s.*\s(-|\S+@\S+)\s+\[(\S+).*/thredds/(\w+)(/|/grid/)(aggregations|files).*/(ds\d\d\d.\d|[a-z]\d{6})/.*\s200\s+(\d+)(.*)$', line)
146
+ if not ms: continue
147
+ ip = ms.group(1)
148
+ email = ms.group(2)
149
+ (date, time) = get_record_date_time(ms.group(3))
150
+ method = ms.group(4)
151
+ etype = ms.group(6)[0].upper()
152
+ dsid = PgUtil.format_dataset_id(ms.group(7))
153
+ size = int(ms.group(8))
154
+ ebuf = ms.group(9)
155
+ ms = re.search(r' "(\w+.*\S+)" ', ebuf)
156
+ engine = ms.group(1) if ms else 'Unknown'
157
+ key = "{}:{}:{}:{}".format(ip, dsid, method, etype)
158
+
159
+ if key in records:
160
+ records[key]['size'] += size
161
+ records[key]['fcount'] += 1
162
+ else:
163
+ records[key] = {'ip' : ip, 'email' : email, 'dsid' : dsid, 'time' : time, 'size' : size,
164
+ 'fcount' : 1, 'method' : method, 'etype' : etype, 'engine' : engine}
165
+ tds.close()
166
+ if records: cntadd += add_usage_records(records, date)
167
+ cntall += entcnt
161
168
 
162
169
  PgLOG.pglog("{} TDS usage records added for {} entries at {}".format(cntadd, cntall, PgLOG.current_datetime()), PgLOG.LOGWRN)
163
170
 
@@ -0,0 +1,16 @@
1
+
2
+ Retrieves usage information from RDA TDS Server logs on github to
3
+ fill table 'tdsusage' in MySQL database 'dssdb'.
4
+
5
+ Usage: filltdsusage [-b] [-d LogFileDates] [-N NumberDay] [-p BeginDate [Enddate]]
6
+
7
+ select option, -d, -N or -p to run this application.
8
+
9
+ - Option -b, log process information into logfile only;
10
+
11
+ - Option -d, retrieve usage info from given log file dates;
12
+
13
+ - Option -N, retrieve usage info in recent NumberDay days;
14
+
15
+ - Option -p, retrieve usage info between given period. For missing EndDate,
16
+ it defaults to the current date.
@@ -264,6 +264,7 @@ def archive_dssdb_log():
264
264
  PgLOG.pgsystem("cp -p -f {} backup/{}".format(file, file), PgLOG.LWEMEX, 5)
265
265
  if info['logname'] != PgLOG.PGLOG['GDEXUSER']: PgLOG.pgsystem("rm -rf " + file)
266
266
  PgLOG.pgsystem("cat /dev/null > " + file, 0, 1024)
267
+ if file == 'gdexls.log': PgLOG.pgsystem("chmod 666 " + file, 0, 1024)
267
268
  if op.exists(logfile):
268
269
  PgLOG.pgsystem("tar -uvf {} -C backup {}".format(logfile, file), PgLOG.LWEMEX, 5)
269
270
  else:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: rda_python_metrics
3
- Version: 1.0.44
3
+ Version: 1.0.47
4
4
  Summary: RDA Python Package to gather and view data usage metrics
5
5
  Author-email: Zaihua Ji <zji@ucar.edu>
6
6
  Project-URL: Homepage, https://github.com/NCAR/rda-python-metrics
@@ -17,6 +17,7 @@ Requires-Dist: geoip2
17
17
  Requires-Dist: ipinfo
18
18
  Requires-Dist: httplib2
19
19
  Requires-Dist: dnspython
20
+ Requires-Dist: unidecode
20
21
  Dynamic: license-file
21
22
 
22
23
  RDA Python Package to gather and view data usage metrics.
@@ -4,3 +4,4 @@ geoip2
4
4
  ipinfo
5
5
  httplib2
6
6
  dnspython
7
+ unidecode
@@ -1,17 +0,0 @@
1
-
2
- Retrieves usage information from RDA TDS Server logs under ../logs/tomcat to
3
- fill table 'tdsusage' in MySQL database 'dssdb'.
4
-
5
- Usage: filltdsusage [-b] [-m MonthList] [-N NumberDay] [-y YearList]
6
-
7
- select one of the options, -m, -N or -y each time to run
8
- this application.
9
-
10
- - Option -b, log process information into logfile only;
11
-
12
- - Option -m, retrieve usage info in given months;
13
-
14
- - Option -N, retrieve usage info in recent NumberDay days;
15
-
16
- - Option -y, retrieve usage info in given years.
17
-