rda-python-metrics 1.0.25__tar.gz → 1.0.27__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. {rda_python_metrics-1.0.25/src/rda_python_metrics.egg-info → rda_python_metrics-1.0.27}/PKG-INFO +1 -1
  2. {rda_python_metrics-1.0.25 → rda_python_metrics-1.0.27}/pyproject.toml +1 -1
  3. {rda_python_metrics-1.0.25 → rda_python_metrics-1.0.27}/src/rda_python_metrics/PgIPInfo.py +5 -5
  4. {rda_python_metrics-1.0.25 → rda_python_metrics-1.0.27}/src/rda_python_metrics/fillcdgusage.py +7 -1
  5. {rda_python_metrics-1.0.25 → rda_python_metrics-1.0.27}/src/rda_python_metrics/fillipinfo.py +4 -5
  6. {rda_python_metrics-1.0.25 → rda_python_metrics-1.0.27}/src/rda_python_metrics/fillosdfusage.py +32 -28
  7. {rda_python_metrics-1.0.25 → rda_python_metrics-1.0.27}/src/rda_python_metrics/viewallusage.usg +4 -2
  8. {rda_python_metrics-1.0.25 → rda_python_metrics-1.0.27/src/rda_python_metrics.egg-info}/PKG-INFO +1 -1
  9. {rda_python_metrics-1.0.25 → rda_python_metrics-1.0.27}/LICENSE +0 -0
  10. {rda_python_metrics-1.0.25 → rda_python_metrics-1.0.27}/MANIFEST.in +0 -0
  11. {rda_python_metrics-1.0.25 → rda_python_metrics-1.0.27}/README.md +0 -0
  12. {rda_python_metrics-1.0.25 → rda_python_metrics-1.0.27}/setup.cfg +0 -0
  13. {rda_python_metrics-1.0.25 → rda_python_metrics-1.0.27}/src/rda_python_metrics/PgView.py +0 -0
  14. {rda_python_metrics-1.0.25 → rda_python_metrics-1.0.27}/src/rda_python_metrics/__init__.py +0 -0
  15. {rda_python_metrics-1.0.25 → rda_python_metrics-1.0.27}/src/rda_python_metrics/fillawsusage.py +0 -0
  16. {rda_python_metrics-1.0.25 → rda_python_metrics-1.0.27}/src/rda_python_metrics/fillawsusage.usg +0 -0
  17. {rda_python_metrics-1.0.25 → rda_python_metrics-1.0.27}/src/rda_python_metrics/fillcdgusage.usg +0 -0
  18. {rda_python_metrics-1.0.25 → rda_python_metrics-1.0.27}/src/rda_python_metrics/fillcodusage.py +0 -0
  19. {rda_python_metrics-1.0.25 → rda_python_metrics-1.0.27}/src/rda_python_metrics/fillcodusage.usg +0 -0
  20. {rda_python_metrics-1.0.25 → rda_python_metrics-1.0.27}/src/rda_python_metrics/fillcountry.py +0 -0
  21. {rda_python_metrics-1.0.25 → rda_python_metrics-1.0.27}/src/rda_python_metrics/fillendtime.py +0 -0
  22. {rda_python_metrics-1.0.25 → rda_python_metrics-1.0.27}/src/rda_python_metrics/fillglobususage.py +0 -0
  23. {rda_python_metrics-1.0.25 → rda_python_metrics-1.0.27}/src/rda_python_metrics/fillglobususage.usg +0 -0
  24. {rda_python_metrics-1.0.25 → rda_python_metrics-1.0.27}/src/rda_python_metrics/fillipinfo.usg +0 -0
  25. {rda_python_metrics-1.0.25 → rda_python_metrics-1.0.27}/src/rda_python_metrics/filloneorder.py +0 -0
  26. {rda_python_metrics-1.0.25 → rda_python_metrics-1.0.27}/src/rda_python_metrics/filloneorder.usg +0 -0
  27. {rda_python_metrics-1.0.25 → rda_python_metrics-1.0.27}/src/rda_python_metrics/fillosdfusage.usg +0 -0
  28. {rda_python_metrics-1.0.25 → rda_python_metrics-1.0.27}/src/rda_python_metrics/fillrdadb.py +0 -0
  29. {rda_python_metrics-1.0.25 → rda_python_metrics-1.0.27}/src/rda_python_metrics/fillrdadb.usg +0 -0
  30. {rda_python_metrics-1.0.25 → rda_python_metrics-1.0.27}/src/rda_python_metrics/filltdsusage.py +0 -0
  31. {rda_python_metrics-1.0.25 → rda_python_metrics-1.0.27}/src/rda_python_metrics/filltdsusage.usg +0 -0
  32. {rda_python_metrics-1.0.25 → rda_python_metrics-1.0.27}/src/rda_python_metrics/filluser.py +0 -0
  33. {rda_python_metrics-1.0.25 → rda_python_metrics-1.0.27}/src/rda_python_metrics/filluser.usg +0 -0
  34. {rda_python_metrics-1.0.25 → rda_python_metrics-1.0.27}/src/rda_python_metrics/logarch.py +0 -0
  35. {rda_python_metrics-1.0.25 → rda_python_metrics-1.0.27}/src/rda_python_metrics/logarch.usg +0 -0
  36. {rda_python_metrics-1.0.25 → rda_python_metrics-1.0.27}/src/rda_python_metrics/pgperson.py +0 -0
  37. {rda_python_metrics-1.0.25 → rda_python_metrics-1.0.27}/src/rda_python_metrics/pgsyspath.py +0 -0
  38. {rda_python_metrics-1.0.25 → rda_python_metrics-1.0.27}/src/rda_python_metrics/pgusername.py +0 -0
  39. {rda_python_metrics-1.0.25 → rda_python_metrics-1.0.27}/src/rda_python_metrics/viewallusage.py +0 -0
  40. {rda_python_metrics-1.0.25 → rda_python_metrics-1.0.27}/src/rda_python_metrics/viewcheckusage.py +0 -0
  41. {rda_python_metrics-1.0.25 → rda_python_metrics-1.0.27}/src/rda_python_metrics/viewcheckusage.usg +0 -0
  42. {rda_python_metrics-1.0.25 → rda_python_metrics-1.0.27}/src/rda_python_metrics/viewcodusage.py +0 -0
  43. {rda_python_metrics-1.0.25 → rda_python_metrics-1.0.27}/src/rda_python_metrics/viewcodusage.usg +0 -0
  44. {rda_python_metrics-1.0.25 → rda_python_metrics-1.0.27}/src/rda_python_metrics/viewordusage.py +0 -0
  45. {rda_python_metrics-1.0.25 → rda_python_metrics-1.0.27}/src/rda_python_metrics/viewordusage.usg +0 -0
  46. {rda_python_metrics-1.0.25 → rda_python_metrics-1.0.27}/src/rda_python_metrics/viewrqstusage.py +0 -0
  47. {rda_python_metrics-1.0.25 → rda_python_metrics-1.0.27}/src/rda_python_metrics/viewrqstusage.usg +0 -0
  48. {rda_python_metrics-1.0.25 → rda_python_metrics-1.0.27}/src/rda_python_metrics/viewtdsusage.py +0 -0
  49. {rda_python_metrics-1.0.25 → rda_python_metrics-1.0.27}/src/rda_python_metrics/viewtdsusage.usg +0 -0
  50. {rda_python_metrics-1.0.25 → rda_python_metrics-1.0.27}/src/rda_python_metrics/viewwebfile.py +0 -0
  51. {rda_python_metrics-1.0.25 → rda_python_metrics-1.0.27}/src/rda_python_metrics/viewwebfile.usg +0 -0
  52. {rda_python_metrics-1.0.25 → rda_python_metrics-1.0.27}/src/rda_python_metrics/viewwebusage.py +0 -0
  53. {rda_python_metrics-1.0.25 → rda_python_metrics-1.0.27}/src/rda_python_metrics/viewwebusage.usg +0 -0
  54. {rda_python_metrics-1.0.25 → rda_python_metrics-1.0.27}/src/rda_python_metrics.egg-info/SOURCES.txt +0 -0
  55. {rda_python_metrics-1.0.25 → rda_python_metrics-1.0.27}/src/rda_python_metrics.egg-info/dependency_links.txt +0 -0
  56. {rda_python_metrics-1.0.25 → rda_python_metrics-1.0.27}/src/rda_python_metrics.egg-info/entry_points.txt +0 -0
  57. {rda_python_metrics-1.0.25 → rda_python_metrics-1.0.27}/src/rda_python_metrics.egg-info/requires.txt +0 -0
  58. {rda_python_metrics-1.0.25 → rda_python_metrics-1.0.27}/src/rda_python_metrics.egg-info/top_level.txt +0 -0
  59. {rda_python_metrics-1.0.25 → rda_python_metrics-1.0.27}/tests/test_metrics.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: rda_python_metrics
3
- Version: 1.0.25
3
+ Version: 1.0.27
4
4
  Summary: RDA Python Package to gather and view data usage metrics
5
5
  Author-email: Zaihua Ji <zji@ucar.edu>
6
6
  Project-URL: Homepage, https://github.com/NCAR/rda-python-metrics
@@ -6,7 +6,7 @@ build-backend = "setuptools.build_meta"
6
6
 
7
7
  [project]
8
8
  name = "rda_python_metrics"
9
- version = "1.0.25"
9
+ version = "1.0.27"
10
10
  authors = [
11
11
  { name="Zaihua Ji", email="zji@ucar.edu" },
12
12
  ]
@@ -109,16 +109,16 @@ def domain_ipinfo_record(dmname):
109
109
  #
110
110
  def get_ip_hostname(ip, iprec, record):
111
111
 
112
+ record['hostname'] = ip
112
113
  if iprec:
113
114
  if 'hostname' in iprec and iprec['hostname']:
114
115
  record['hostname'] = iprec['hostname']
115
116
  record['org_type'] = PgDBI.get_org_type(None, record['hostname'])
116
117
  return
117
- record['hostname'] = ip
118
- if 'domain' in iprec and iprec['domain']:
119
- record['hostname'] += '.' + iprec['domain']
120
- record['org_type'] = PgDBI.get_org_type(None, record['hostname'])
121
- return
118
+ if 'asn' in iprec and iprec['asn'] and 'domain' in iprec['asn'] and iprec['asn']['domain']:
119
+ record['hostname'] += '.' + iprec['asn']['domain']
120
+ record['org_type'] = PgDBI.get_org_type(None, record['hostname'])
121
+ return
122
122
 
123
123
  try:
124
124
  hostrec = socket.gethostbyaddr(ip)
@@ -66,7 +66,13 @@ DSIDS = {
66
66
  'ucar.cgd.ccsm4.CESM1-CCSM4_mid-Pliocene' : ['d651042'],
67
67
  'ucar.cgd.ccsm4.PaleoIF' : ['d651052'],
68
68
  'ucar.cgd.ccsm4.b.e11.B20LE_fixedO3' : ['d651053'],
69
- 'ucar.cgd.cesm2.single.forcing.large.ensemble' : ['d651055']
69
+ 'ucar.cgd.cesm2.single.forcing.large.ensemble' : ['d651055'],
70
+ 'ucar.cgd.cesm2le.output': ['d651056'],
71
+ 'ucar.cgd.cesm2.s2s_hindcasts': ['d651060'],
72
+ 'ucar.cgd.cesm2.s2s_hindcasts.mjo': ['d651061'],
73
+ 'ucar.cgd.cesm2.s2s_hindcasts.tc_tracks': ['d651062'],
74
+ 'ucar.cgd.cesm2.s2s_hindcasts.cesm2.climo': ['d651063'],
75
+ 'ucar.cgd.ccsm4.ARISE-SAI-1.5' : ['d651059']
70
76
  }
71
77
 
72
78
  ALLIDS = list(DSIDS.keys())
@@ -113,7 +113,7 @@ def fill_ip_info(option, inputs, table):
113
113
  def get_next_date(date, edate):
114
114
 
115
115
  if date < edate:
116
- ndate = PgUtil.enddate(date, 'M')
116
+ ndate = PgUtil.enddate(date, 0, 'M')
117
117
  if ndate < edate: edate = ndate
118
118
  if date < edate:
119
119
  cond = f"BETWEEN '{date}' AND '{edate}'"
@@ -186,14 +186,13 @@ def fix_wuser_records(date, cnd):
186
186
 
187
187
  table = 'wuser'
188
188
  cond = f"start_date {cnd} AND region IS NULL"
189
- pgrecs = PgDBI.pgmget(table, 'wuid, email, ip', cond, PgLOG.LGEREX)
189
+ pgrecs = PgDBI.pgmget(table, 'wuid, email', cond, PgLOG.LGEREX)
190
190
  if not pgrecs: return 0
191
- cnt = len(pgrecs['ip']) if pgrecs else 0
191
+ cnt = len(pgrecs['wuid']) if pgrecs else 0
192
192
  mcnt = 0
193
193
  for i in range(cnt):
194
- ip = pgrecs['ip'][i]
195
194
  email = pgrecs['email'][i]
196
- record = PgIPInfo.get_missing_ipinfo(ip, email)
195
+ record = PgIPInfo.get_missing_ipinfo(None, email)
197
196
  if record:
198
197
  mcnt += PgDBI.pgupdt(table, record, "wuid = '{}'".format(pgrecs['wuid'][i]))
199
198
 
@@ -25,7 +25,7 @@ USAGE = {
25
25
  'OSDFTBL' : "wusage",
26
26
  'OSDFDIR' : PgLOG.PGLOG["DSSDATA"] + "/work/zji/osdflogs/",
27
27
  'OSDFGET' : 'wget -m -nH -np -nd https://pelicanplatform.org/pelican-access-logs/ncar-access-log/',
28
- 'OSDFLOG' : "{}.log", # YYYY-MM-DD.log
28
+ 'OSDFLOG' : "{}-cache.log", # YYYY-MM-DD-cache.log
29
29
  }
30
30
 
31
31
  #
@@ -37,7 +37,6 @@ def main():
37
37
  argv = sys.argv[1:]
38
38
  option = None
39
39
  datelimits = [None, None]
40
-
41
40
 
42
41
  for arg in argv:
43
42
  ms = re.match(r'^-(b|d|p|N)$', arg)
@@ -64,7 +63,7 @@ def main():
64
63
  PgFile.change_local_directory(USAGE['OSDFDIR'])
65
64
  filenames = get_log_file_names(option, params, datelimits)
66
65
  if filenames:
67
- fill_osdf_usages(filenames, datelimits)
66
+ fill_osdf_usages(filenames)
68
67
  else:
69
68
  PgLOG.pglog("No log file found for given command: " + cmdstr, PgLOG.LOGWRN)
70
69
 
@@ -79,10 +78,6 @@ def get_log_file_names(option, params, datelimits):
79
78
  filenames = []
80
79
  if option == 'd':
81
80
  for pdate in params:
82
- pdays = PgUtil.get_weekday(pdate)
83
- if pdays > 0:
84
- PgLOG.pglog(pdate + ": Skip a Non-Sunday date", PgLOG.LOGWRN)
85
- continue
86
81
  filenames.append(USAGE['OSDFLOG'].format(pdate))
87
82
  else:
88
83
  if option == 'N':
@@ -94,32 +89,34 @@ def get_log_file_names(option, params, datelimits):
94
89
  edate = datelimits[1] = params[1]
95
90
  else:
96
91
  edate = PgUtil.curdate()
97
- pdays = PgUtil.get_weekday(pdate)
98
- if pdays > 0: pdate = PgUtil.adddate(pdate, 0, 0, 7-pdays)
99
92
  while pdate <= edate:
100
93
  filenames.append(USAGE['OSDFLOG'].format(pdate))
101
- pdate = PgUtil.adddate(pdate, 0, 0, 7)
94
+ pdate = PgUtil.adddate(pdate, 0, 0, 1)
102
95
 
103
96
  return filenames
104
97
 
105
98
  #
106
99
  # Fill OSDF usages into table dssdb.osdfusage of DSS PgSQL database from osdf access logs
107
100
  #
108
- def fill_osdf_usages(fnames, datelimits):
101
+ def fill_osdf_usages(fnames):
109
102
 
110
103
  cntall = addall = 0
111
104
 
112
105
  fcnt = len(fnames)
113
106
  for logfile in fnames:
114
- PgLOG.pgsystem(USAGE['OSDFGET'] + logfile, 5, PgLOG.LOGWRN)
115
107
  linfo = PgFile.check_local_file(logfile)
116
108
  if not linfo:
117
- PgLOG.pglog("{}: Not exists for Gathering OSDF usage".format(logfile), PgLOG.LOGWRN)
118
- continue
119
- if linfo['data_size'] == 0:
120
- PgLOG.pglog("{}: Empty log for Gathering OSDF usage".format(logfile), PgLOG.LOGWRN)
121
- continue
122
- PgLOG.pglog("Gathering usage info from {} at {}".format(logfile, PgLOG.current_datetime()), PgLOG.LOGWRN)
109
+ xzfile = logfile + '.xz'
110
+ PgLOG.pgsystem(USAGE['OSDFGET'] + xzfile, 5, PgLOG.LOGWRN)
111
+ linfo = PgFile.check_local_file(xzfile)
112
+ if not linfo:
113
+ PgLOG.pglog("{}: Not exists for Gathering OSDF usage".format(xzfile), PgLOG.LOGWRN)
114
+ continue
115
+ PgFile.compress_local_file(xzfile)
116
+ linfo = PgFile.check_local_file(logfile)
117
+ if not linfo:
118
+ PgLOG.pglog("{}: Error unxz OSDF usage".format(xzfile), PgLOG.LGEREX)
119
+ PgLOG.pglog("{}: Gathering OSDF usage at {}".format(logfile, PgLOG.current_datetime()), PgLOG.LOGWRN)
123
120
  osdf = PgFile.open_local_file(logfile)
124
121
  if not osdf: continue
125
122
  cntadd = entcnt = 0
@@ -131,19 +128,26 @@ def fill_osdf_usages(fnames, datelimits):
131
128
  if entcnt%10000 == 0:
132
129
  PgLOG.pglog("{}: {}/{} OSDF log entries processed/records added".format(logfile, entcnt, cntadd), PgLOG.WARNLG)
133
130
 
134
- ms = re.match(r'^\[(\S+)\] \[Objectname:\/ncar\/rda\/([a-z]\d{6})\/(\S+)\] \[Host:(\S+)\] \[Server:(\S+)\] \[Read:(\d+)\]', line)
131
+ ms = re.match(r'^\[(\S+)\] \[Objectname:\/ncar\/rda\/([a-z]\d{6})\/(\S+)\].* \[Host:(\S+)\].* \[AppInfo:(\S+)\].* \[Read:(\d+)\]', line)
135
132
  if not ms: continue
136
- size = int(ms.group(6))
137
- if size < 100: continue # ignore small files
138
- ip = ms.group(4)
139
- dsid = PgUtil.format_dataset_id(ms.group(2))
133
+ dt = ms.group(1)
134
+ dsid = ms.group(2)
140
135
  wfile = ms.group(3)
136
+ ip = ms.group(4)
141
137
  engine = ms.group(5)
142
-
143
- (year, quarter, date, time) = get_record_date_time(ms.group(1))
144
- if datelimits[0] and date < datelimits[0]: continue
145
- if datelimits[1] and date > datelimits[1]: continue
138
+ size = int(ms.group(6))
139
+ (year, quarter, date, time) = get_record_date_time(dt)
146
140
  locflag = 'C'
141
+ if re.match(r'^curl', engine, re.I):
142
+ method = "CURL"
143
+ elif re.match(r'^wget', engine, re.I):
144
+ method = "WGET"
145
+ elif re.match(r'^python', engine, re.I):
146
+ method = "PYTHN"
147
+ elif re.match(r'^N/A', engine, re.I):
148
+ method = "N/A"
149
+ else:
150
+ method = "WEB"
147
151
  method = "OSDF"
148
152
 
149
153
  record = {'ip' : ip, 'dsid' : dsid, 'wfile' : wfile, 'date' : date,
@@ -209,7 +213,7 @@ def add_to_allusage(year, logrec, wurec):
209
213
  pgrec['size'] = logrec['size']
210
214
  pgrec['method'] = logrec['method']
211
215
  pgrec['ip'] = logrec['ip']
212
- pgrec['source'] = 'W'
216
+ pgrec['source'] = 'P'
213
217
  return PgDBI.add_yearly_allusage(year, pgrec)
214
218
 
215
219
  #
@@ -53,7 +53,8 @@
53
53
  E*- EMAIL - user email address
54
54
  F*- FORMAT - dataset default data format
55
55
  G*- GROUP - data usage source group: W - Web log, G - Globus,
56
- T - Thredds, O - Order, and A - AWS log
56
+ T - Thredds, O - Request Order, A - AWS log,
57
+ C - CDG GDEX DB, and P - OSDF Pelican Platform
57
58
  K*- REGION - Region Name users from
58
59
  M*- MONTH - format as YYYY-MM, for example 2004-04
59
60
  N*- COUNTRY - country codes users from
@@ -135,7 +136,8 @@
135
136
  - Option -f, for data in datasets with specified data format;
136
137
 
137
138
  - Option -g, for data usage source groups, W - Web log, G - Globus,
138
- T - Thredds, O - Order, and A - AWS log;
139
+ T - Thredds, O - Request Order, A - AWS log, C - CDG GDEX DB,
140
+ and P - OSDF logs on Pelican Platform;
139
141
 
140
142
  - Option -h, works with Option -e to include historical user emails
141
143
  registered before;
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: rda_python_metrics
3
- Version: 1.0.25
3
+ Version: 1.0.27
4
4
  Summary: RDA Python Package to gather and view data usage metrics
5
5
  Author-email: Zaihua Ji <zji@ucar.edu>
6
6
  Project-URL: Homepage, https://github.com/NCAR/rda-python-metrics