pyglider 0.0.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pyglider/__init__.py ADDED
@@ -0,0 +1 @@
1
+ # -*- coding: utf-8 -*-
pyglider/_version.py ADDED
@@ -0,0 +1 @@
1
+ __version__ = '0.0.5'
@@ -0,0 +1,22 @@
1
+ from zipfile import ZipFile
2
+ import pooch
3
+
4
+ def get_example_data(outdir='./'):
5
+ """
6
+ Get example data sets and configuration files
7
+
8
+ Parameters
9
+ ----------
10
+ outdir : str or Path, optional
11
+ Location to extract the example files into. They will be put at
12
+ ``outdir/example-data/``. Default is to unpack in the
13
+ current directory.
14
+ """
15
+ zipfile = pooch.retrieve("https://cproof.uvic.ca/pyglider-example-data/pyglider-example-data.zip",
16
+ known_hash='5643a5301530e8dd60060a357cd9ed88eb1e84d761710c2a4013bc3c1817a859')
17
+
18
+ with ZipFile(zipfile, 'r') as zipObj:
19
+ # Extract all the contents of zip file in outdir
20
+ zipObj.extractall(outdir)
21
+
22
+ __all__ = ['get_example_data']
pyglider/ncprocess.py ADDED
@@ -0,0 +1,265 @@
1
+ """
2
+ Routines that are used for common processing of netcdf files after they have
3
+ been converted to standard timeseries.
4
+ """
5
+ import logging
6
+ import xarray as xr
7
+ import numpy as np
8
+ import pyglider.utils as utils
9
+ import os
10
+ import yaml
11
+ import netCDF4
12
+ import scipy.stats as stats
13
+
14
+ _log = logging.getLogger(__name__)
15
+
16
+
17
+ def extract_timeseries_profiles(inname, outdir, deploymentyaml):
18
+ """
19
+ Extract and save each profile from a timeseries netCDF.
20
+
21
+ Parameters
22
+ ----------
23
+ inname : str or Path
24
+ netcdf file to break into profiles
25
+
26
+ outdir : str or Path
27
+ directory to place profiles
28
+
29
+ deploymentyaml : str or Path
30
+ location of deployment yaml file for the netCDF file. This should
31
+ be the same yaml file that was used to make the timeseries file.
32
+ """
33
+ try:
34
+ os.mkdir(outdir)
35
+ except FileExistsError:
36
+ pass
37
+
38
+ with open(deploymentyaml) as fin:
39
+ deployment = yaml.safe_load(fin)
40
+ meta = deployment['metadata']
41
+
42
+ with xr.open_dataset(inname) as ds:
43
+ _log.info('Extracting profiles: opening %s', inname)
44
+ profiles = np.unique(ds.profile_index)
45
+ profiles = [p for p in profiles if (~np.isnan(p) and not (p % 1)
46
+ and (p > 0))]
47
+ for p in profiles:
48
+ ind = np.where(ds.profile_index == p)[0]
49
+ dss = ds.isel(time=ind)
50
+ outname = outdir + '/' + utils.get_file_id(dss) + '.nc'
51
+ _log.info('Checking %s', outname)
52
+ if not os.path.exists(outname):
53
+ # this is the id for the whole file, not just this profile..
54
+ dss['trajectory'] = utils.get_file_id(ds).encode()
55
+ trajlen = len(utils.get_file_id(ds).encode())
56
+ dss['trajectory'].attrs['cf_role'] = 'trajectory_id'
57
+ dss['trajectory'].attrs['comment'] = (
58
+ 'A trajectory is a single'
59
+ 'deployment of a glider and may span multiple data files.')
60
+ dss['trajectory'].attrs['long_name'] = 'Trajectory/Deployment Name'
61
+
62
+ # profile-averaged variables....
63
+ profile_meta = deployment['profile_variables']
64
+ if 'water_velocity_eastward' in dss.keys():
65
+ dss['u'] = dss.water_velocity_eastward.mean()
66
+ dss['u'].attrs = profile_meta['u']
67
+
68
+ dss['v'] = dss.water_velocity_northward.mean()
69
+ dss['v'].attrs = profile_meta['v']
70
+ elif 'u' in profile_meta:
71
+ dss['u'] = profile_meta['u'].get('_FillValue', np.NaN)
72
+ dss['u'].attrs = profile_meta['u']
73
+
74
+ dss['v'] = profile_meta['v'].get('_FillValue', np.NaN)
75
+ dss['v'].attrs = profile_meta['v']
76
+
77
+ dss['profile_id'] = np.array(p*1.0)
78
+ dss['profile_id'].attrs = profile_meta['profile_id']
79
+ dss['profile_time'] = dss.time.mean()
80
+ dss['profile_time'].attrs = profile_meta['profile_time']
81
+ dss['profile_lon'] = dss.longitude.mean()
82
+ dss['profile_lon'].attrs = profile_meta['profile_lon']
83
+ dss['profile_lat'] = dss.latitude.mean()
84
+ dss['profile_lat'].attrs = profile_meta['profile_lat']
85
+
86
+ dss['lat'] = dss['latitude']
87
+ dss['lon'] = dss['longitude']
88
+ dss['platform'] = np.NaN
89
+ comment = (meta['glider_model'] + ' operated by ' +
90
+ meta['institution'])
91
+ dss['platform'].attrs['comment'] = comment
92
+ dss['platform'].attrs['id'] = (
93
+ meta['glider_name'] + meta['glider_serial'])
94
+ dss['platform'].attrs['instrument'] = 'instrument_ctd'
95
+ dss['platform'].attrs['long_name'] = (
96
+ meta['glider_model'] + dss['platform'].attrs['id'])
97
+ dss['platform'].attrs['type'] = 'platform'
98
+ dss['platform'].attrs['wmo_id'] = meta['wmo_id']
99
+
100
+ dss['lat_uv'] = np.NaN
101
+ dss['lat_uv'].attrs = profile_meta['lat_uv']
102
+ dss['lon_uv'] = np.NaN
103
+ dss['lon_uv'].attrs = profile_meta['lon_uv']
104
+ dss['time_uv'] = np.NaN
105
+ dss['time_uv'].attrs = profile_meta['time_uv']
106
+
107
+ dss['instrument_ctd'] = np.NaN
108
+ dss['instrument_ctd'].attrs = profile_meta['instrument_ctd']
109
+
110
+ dss.attrs['date_modified'] = str(np.datetime64('now')) + 'Z'
111
+
112
+ # ancillary variables::
113
+ to_fill = ['temperature', 'pressure', 'conductivity',
114
+ 'salinity', 'density', 'lon', 'lat', 'depth']
115
+ for name in to_fill:
116
+ dss[name].attrs['ancillary_variables'] = name + '_qc'
117
+
118
+ # outname = outdir + '/' + utils.get_file_id(dss) + '.nc'
119
+ _log.info('Writing %s', outname)
120
+ timeunits = 'nanoseconds since 1970-01-01T00:00:00Z'
121
+ timecalendar = 'gregorian'
122
+ dss.to_netcdf(outname, encoding={'time': {'units': timeunits,
123
+ 'calendar': timecalendar},
124
+ 'profile_time':
125
+ {'units': timeunits}})
126
+
127
+ # add traj_strlen using bare ntcdf to make IOOS happy
128
+ with netCDF4.Dataset(outname, 'r+') as nc:
129
+ nc.renameDimension('string%d' % trajlen, 'traj_strlen')
130
+
131
+
132
+ def make_gridfiles(inname, outdir, deploymentyaml, *, fnamesuffix='', dz=1):
133
+ """
134
+ Turn a timeseries netCDF file into a vertically gridded netCDF.
135
+
136
+ Parameters
137
+ ----------
138
+ inname : str or Path
139
+ netcdf file to break into profiles
140
+
141
+ outdir : str or Path
142
+ directory to place profiles
143
+
144
+ deploymentyaml : str or Path
145
+ location of deployment yaml file for the netCDF file. This should
146
+ be the same yaml file that was used to make the timeseries file.
147
+
148
+ dz : float, default = 1
149
+ Vertical grid spacing in meters.
150
+
151
+ Returns
152
+ -------
153
+ outname : str
154
+ Name of gridded netCDF file. The gridded netCDF file has coordinates of
155
+ 'depth' and 'profile', so each variable is gridded in depth bins and by
156
+ profile number. Each profile has a time, latitude, and longitude.
157
+ """
158
+ try:
159
+ os.mkdir(outdir)
160
+ except FileExistsError:
161
+ pass
162
+
163
+ with open(deploymentyaml) as fin:
164
+ deployment = yaml.safe_load(fin)
165
+ profile_meta = deployment['profile_variables']
166
+
167
+ ds = xr.open_dataset(inname, decode_times=True)
168
+ _log.info(f'Working on: {inname}')
169
+ _log.debug(str(ds))
170
+ _log.debug(str(ds.time[0]))
171
+ _log.debug(str(ds.time[-1]))
172
+
173
+ profiles = np.unique(ds.profile_index)
174
+ profiles = [p for p in profiles if (~np.isnan(p) and not (p % 1)
175
+ and (p > 0))]
176
+ profile_bins = np.hstack((np.array(profiles) - 0.5, [profiles[-1]+0.5]))
177
+
178
+ Nprofiles = len(profiles)
179
+ _log.info(f'Nprofiles {Nprofiles}')
180
+ depth_bins = np.arange(0, 1100.1, dz)
181
+ depths = depth_bins[:-1] + 0.5
182
+
183
+ dsout = xr.Dataset(
184
+ coords={'depth': ('depth', depths),
185
+ 'profile': ('time', profiles)})
186
+ print('Booo', ds.time, ds.temperature)
187
+ ds['time_1970'] = ds.temperature.copy()
188
+ ds['time_1970'].values = ds.time.values.astype(np.float64)
189
+ for td in ('time_1970', 'longitude', 'latitude'):
190
+ good = np.where(~np.isnan(ds[td]) & (ds['profile_index'] % 1 == 0))[0]
191
+ dat, xedges, binnumber = stats.binned_statistic(
192
+ ds['profile_index'].values[good],
193
+ ds[td].values[good], statistic='mean',
194
+ bins=[profile_bins])
195
+ if td == 'time_1970':
196
+ td = 'time'
197
+ dat = dat.astype('timedelta64[ns]') + np.datetime64('1970-01-01T00:00:00')
198
+ _log.info(f'{td} {len(dat)}')
199
+ dsout[td] = (('time'), dat, ds[td].attrs)
200
+ ds.drop('time_1970')
201
+ good = np.where(~np.isnan(ds['time']) & (ds['profile_index'] % 1 == 0))[0]
202
+ _log.info(f'Done times! {len(dat)}')
203
+ dsout['profile_time_start'] = (
204
+ ('time'), dat, profile_meta['profile_time_start'])
205
+ dsout['profile_time_end'] = (
206
+ ('time'), dat, profile_meta['profile_time_end'])
207
+
208
+ for k in ds.keys():
209
+ if k in ['time', 'longitude', 'latitude', 'depth'] or 'time' in k:
210
+ continue
211
+ _log.info('Gridding %s', k)
212
+ good = np.where(~np.isnan(ds[k]) & (ds['profile_index'] % 1 == 0))[0]
213
+ if len(good) <= 0:
214
+ continue
215
+ if "average_method" in ds[k].attrs:
216
+ average_method = ds[k].attrs["average_method"]
217
+ ds[k].attrs["processing"] = (
218
+ f"Using average method {average_method} for "
219
+ f"variable {k} following deployment yaml.")
220
+ if average_method == "geometric mean":
221
+ average_method = stats.gmean
222
+ ds[k].attrs["processing"] += (" Using geometric mean implementation "
223
+ "scipy.stats.gmean")
224
+ else:
225
+ average_method = "mean"
226
+
227
+ dat, xedges, yedges, binnumber = stats.binned_statistic_2d(
228
+ ds['profile_index'].values[good],
229
+ ds['depth'].values[good],
230
+ values=ds[k].values[good], statistic=average_method,
231
+ bins=[profile_bins, depth_bins])
232
+
233
+ _log.debug(f'dat{np.shape(dat)}')
234
+ dsout[k] = (('depth', 'time'), dat.T, ds[k].attrs)
235
+
236
+ # fill gaps in data:
237
+ dsout[k].values = utils.gappy_fill_vertical(dsout[k].values)
238
+
239
+ # fix u and v, because they should really not be gridded...
240
+ if (('water_velocity_eastward' in dsout.keys()) and
241
+ ('u' in profile_meta.keys())):
242
+ _log.debug(str(ds.water_velocity_eastward))
243
+ dsout['u'] = dsout.water_velocity_eastward.mean(axis=0)
244
+ dsout['u'].attrs = profile_meta['u']
245
+ dsout['v'] = dsout.water_velocity_northward.mean(axis=0)
246
+ dsout['v'].attrs = profile_meta['v']
247
+ dsout = dsout.drop(['water_velocity_eastward',
248
+ 'water_velocity_northward'])
249
+ dsout.attrs = ds.attrs
250
+
251
+ outname = outdir + '/' + ds.attrs['deployment_name'] + '_grid' + fnamesuffix + '.nc'
252
+ _log.info('Writing %s', outname)
253
+ # timeunits = 'nanoseconds since 1970-01-01T00:00:00Z'
254
+ dsout.to_netcdf(outname)
255
+ _log.info('Done gridding')
256
+
257
+ return outname
258
+
259
+
260
+ # aliases
261
+ extract_L0timeseries_profiles = extract_timeseries_profiles
262
+ make_L0_gridfiles = make_gridfiles
263
+
264
+
265
+ __all__ = ['extract_timeseries_profiles', 'make_gridfiles']