pyglider 0.0.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pyglider/__init__.py ADDED
@@ -0,0 +1 @@
1
+ # -*- coding: utf-8 -*-
pyglider/_version.py ADDED
@@ -0,0 +1 @@
1
+ __version__ = '0.0.7'
@@ -0,0 +1,22 @@
1
+ from zipfile import ZipFile
2
+ import pooch
3
+
4
+ def get_example_data(outdir='./'):
5
+ """
6
+ Get example data sets and configuration files
7
+
8
+ Parameters
9
+ ----------
10
+ outdir : str or Path, optional
11
+ Location to extract the example files into. They will be put at
12
+ ``outdir/example-data/``. Default is to unpack in the
13
+ current directory.
14
+ """
15
+ zipfile = pooch.retrieve("https://cproof.uvic.ca/pyglider-example-data/pyglider-example-data.zip",
16
+ known_hash='5643a5301530e8dd60060a357cd9ed88eb1e84d761710c2a4013bc3c1817a859')
17
+
18
+ with ZipFile(zipfile, 'r') as zipObj:
19
+ # Extract all the contents of zip file in outdir
20
+ zipObj.extractall(outdir)
21
+
22
+ __all__ = ['get_example_data']
pyglider/ncprocess.py ADDED
@@ -0,0 +1,344 @@
1
+ """
2
+ Routines that are used for common processing of netcdf files after they have
3
+ been converted to standard timeseries.
4
+ """
5
+ import logging
6
+ import xarray as xr
7
+ import numpy as np
8
+ import pyglider.utils as utils
9
+ import os
10
+ import yaml
11
+ import netCDF4
12
+ import scipy.stats as stats
13
+
14
+ _log = logging.getLogger(__name__)
15
+
16
+
17
+ def extract_timeseries_profiles(inname, outdir, deploymentyaml, force=False):
18
+ """
19
+ Extract and save each profile from a timeseries netCDF.
20
+
21
+ Parameters
22
+ ----------
23
+ inname : str or Path
24
+ netcdf file to break into profiles
25
+
26
+ outdir : str or Path
27
+ directory to place profiles
28
+
29
+ deploymentyaml : str or Path
30
+ location of deployment yaml file for the netCDF file. This should
31
+ be the same yaml file that was used to make the timeseries file.
32
+
33
+ force : bool, default False
34
+ Force an overwite even if profile netcdf already exists
35
+ """
36
+ try:
37
+ os.mkdir(outdir)
38
+ except FileExistsError:
39
+ pass
40
+
41
+ deployment = utils._get_deployment(deploymentyaml)
42
+
43
+ meta = deployment['metadata']
44
+ with xr.open_dataset(inname) as ds:
45
+ _log.info('Extracting profiles: opening %s', inname)
46
+ profiles = np.unique(ds.profile_index)
47
+ profiles = [p for p in profiles if (~np.isnan(p) and not (p % 1)
48
+ and (p > 0))]
49
+ for p in profiles:
50
+ ind = np.where(ds.profile_index == p)[0]
51
+ dss = ds.isel(time=ind)
52
+ outname = outdir + '/' + utils.get_file_id(dss) + '.nc'
53
+ _log.info('Checking %s', outname)
54
+ if force or (not os.path.exists(outname)):
55
+ # this is the id for the whole file, not just this profile..
56
+ dss['trajectory'] = utils.get_file_id(ds).encode()
57
+ trajlen = len(utils.get_file_id(ds).encode())
58
+ dss['trajectory'].attrs['cf_role'] = 'trajectory_id'
59
+ dss['trajectory'].attrs['comment'] = (
60
+ 'A trajectory is a single'
61
+ 'deployment of a glider and may span multiple data files.')
62
+ dss['trajectory'].attrs['long_name'] = 'Trajectory/Deployment Name'
63
+
64
+ # profile-averaged variables....
65
+ profile_meta = deployment['profile_variables']
66
+ if 'water_velocity_eastward' in dss.keys():
67
+ dss['u'] = dss.water_velocity_eastward.mean()
68
+ dss['u'].attrs = profile_meta['u']
69
+
70
+ dss['v'] = dss.water_velocity_northward.mean()
71
+ dss['v'].attrs = profile_meta['v']
72
+ elif 'u' in profile_meta:
73
+ dss['u'] = profile_meta['u'].get('_FillValue', np.nan)
74
+ dss['u'].attrs = profile_meta['u']
75
+
76
+ dss['v'] = profile_meta['v'].get('_FillValue', np.nan)
77
+ dss['v'].attrs = profile_meta['v']
78
+ else:
79
+ dss['u'] = np.nan
80
+ dss['v'] = np.nan
81
+
82
+
83
+ dss['profile_id'] = np.int32(p)
84
+ dss['profile_id'].attrs = profile_meta['profile_id']
85
+ if '_FillValue' not in dss['profile_id'].attrs:
86
+ dss['profile_id'].attrs['_FillValue'] = -1
87
+ dss['profile_id'].attrs['valid_min'] = np.int32(dss['profile_id'].attrs['valid_min'])
88
+ dss['profile_id'].attrs['valid_max'] = np.int32(dss['profile_id'].attrs['valid_max'])
89
+
90
+ dss['profile_time'] = dss.time.mean()
91
+ dss['profile_time'].attrs = profile_meta['profile_time']
92
+ # remove units so they can be encoded later:
93
+ try:
94
+ del dss.profile_time.attrs['units']
95
+ del dss.profile_time.attrs['calendar']
96
+ except KeyError:
97
+ pass
98
+ dss['profile_lon'] = dss.longitude.mean()
99
+ dss['profile_lon'].attrs = profile_meta['profile_lon']
100
+ dss['profile_lat'] = dss.latitude.mean()
101
+ dss['profile_lat'].attrs = profile_meta['profile_lat']
102
+
103
+ dss['lat'] = dss['latitude']
104
+ dss['lon'] = dss['longitude']
105
+ dss['platform'] = np.int32(1)
106
+ comment = (meta['glider_model'] + ' operated by ' +
107
+ meta['institution'])
108
+ dss['platform'].attrs['comment'] = comment
109
+ dss['platform'].attrs['id'] = (
110
+ meta['glider_name'] + meta['glider_serial'])
111
+ dss['platform'].attrs['instrument'] = 'instrument_ctd'
112
+ dss['platform'].attrs['long_name'] = (
113
+ meta['glider_model'] + dss['platform'].attrs['id'])
114
+ dss['platform'].attrs['type'] = 'platform'
115
+ dss['platform'].attrs['wmo_id'] = meta['wmo_id']
116
+ if '_FillValue' not in dss['platform'].attrs:
117
+ dss['platform'].attrs['_FillValue'] = -1
118
+
119
+
120
+ dss['lat_uv'] = np.nan
121
+ dss['lat_uv'].attrs = profile_meta['lat_uv']
122
+ dss['lon_uv'] = np.nan
123
+ dss['lon_uv'].attrs = profile_meta['lon_uv']
124
+ dss['time_uv'] = np.nan
125
+ dss['time_uv'].attrs = profile_meta['time_uv']
126
+
127
+ dss['instrument_ctd'] = np.int32(1.0)
128
+ dss['instrument_ctd'].attrs = profile_meta['instrument_ctd']
129
+ if '_FillValue' not in dss['instrument_ctd'].attrs:
130
+ dss['instrument_ctd'].attrs['_FillValue'] = -1
131
+
132
+ dss.attrs['date_modified'] = str(np.datetime64('now')) + 'Z'
133
+
134
+ # ancillary variables: link and create with values of 2. If
135
+ # we dont' want them all 2, then create these variables in the
136
+ # time series
137
+ to_fill = ['temperature', 'pressure', 'conductivity',
138
+ 'salinity', 'density', 'lon', 'lat', 'depth']
139
+ for name in to_fill:
140
+ qcname = name + '_qc'
141
+ dss[name].attrs['ancillary_variables'] = qcname
142
+ if qcname not in dss.keys():
143
+
144
+ dss[qcname] = ('time', 2 * np.ones(len(dss[name]), np.int8))
145
+ dss[qcname].attrs = utils.fill_required_qcattrs({}, name)
146
+ # 2 is "not eval"
147
+ # outname = outdir + '/' + utils.get_file_id(dss) + '.nc'
148
+ _log.info('Writing %s', outname)
149
+ timeunits = 'seconds since 1970-01-01T00:00:00Z'
150
+ timecalendar = 'gregorian'
151
+ try:
152
+ del dss.profile_time.attrs['_FillValue']
153
+ del dss.profile_time.attrs['units']
154
+ except KeyError:
155
+ pass
156
+ dss.to_netcdf(outname, encoding={'time': {'units': timeunits,
157
+ 'calendar': timecalendar,
158
+ 'dtype': 'float64'},
159
+ 'profile_time':
160
+ {'units': timeunits,
161
+ '_FillValue': -99999.0,
162
+ 'dtype': 'float64'},
163
+ }
164
+
165
+ )
166
+
167
+ # add traj_strlen using bare ntcdf to make IOOS happy
168
+ with netCDF4.Dataset(outname, 'r+') as nc:
169
+ nc.renameDimension('string%d' % trajlen, 'traj_strlen')
170
+
171
+ def make_gridfiles(inname, outdir, deploymentyaml, *, fnamesuffix='', dz=1, starttime='1970-01-01'):
172
+ """
173
+ Turn a timeseries netCDF file into a vertically gridded netCDF.
174
+
175
+ Parameters
176
+ ----------
177
+ inname : str or Path
178
+ netcdf file to break into profiles
179
+
180
+ outdir : str or Path
181
+ directory to place profiles
182
+
183
+ deploymentyaml : str or Path
184
+ location of deployment yaml file for the netCDF file. This should
185
+ be the same yaml file that was used to make the timeseries file.
186
+
187
+ dz : float, default = 1
188
+ Vertical grid spacing in meters.
189
+
190
+ Returns
191
+ -------
192
+ outname : str
193
+ Name of gridded netCDF file. The gridded netCDF file has coordinates of
194
+ 'depth' and 'profile', so each variable is gridded in depth bins and by
195
+ profile number. Each profile has a time, latitude, and longitude.
196
+ """
197
+ try:
198
+ os.mkdir(outdir)
199
+ except FileExistsError:
200
+ pass
201
+
202
+ deployment = utils._get_deployment(deploymentyaml)
203
+
204
+ profile_meta = deployment['profile_variables']
205
+
206
+ ds = xr.open_dataset(inname, decode_times=True)
207
+ ds = ds.where(ds.time > np.datetime64(starttime), drop=True)
208
+ _log.info(f'Working on: {inname}')
209
+ _log.debug(str(ds))
210
+ _log.debug(str(ds.time[0]))
211
+ _log.debug(str(ds.time[-1]))
212
+
213
+ profiles = np.unique(ds.profile_index)
214
+ profiles = [p for p in profiles if (~np.isnan(p) and not (p % 1)
215
+ and (p > 0))]
216
+ profile_bins = np.hstack((np.array(profiles) - 0.5, [profiles[-1]+0.5]))
217
+ _log.debug(profile_bins)
218
+ Nprofiles = len(profiles)
219
+ _log.info(f'Nprofiles {Nprofiles}')
220
+ depth_bins = np.arange(0, 1100.1, dz)
221
+ depths = depth_bins[:-1] + 0.5
222
+ xdimname = 'time'
223
+ dsout = xr.Dataset(
224
+ coords={'depth': ('depth', depths),
225
+ 'profile': (xdimname, profiles)})
226
+ dsout['depth'].attrs = {'units': 'm',
227
+ 'long_name': 'Depth',
228
+ 'standard_name': 'depth',
229
+ 'positive': 'down',
230
+ 'coverage_content_type': 'coordinate',
231
+ 'comment': 'center of depth bins'}
232
+
233
+ ds['time_1970'] = ds.temperature.copy()
234
+ ds['time_1970'].values = ds.time.values.astype(np.float64)
235
+ for td in ('time_1970', 'longitude', 'latitude'):
236
+ good = np.where(~np.isnan(ds[td]) & (ds['profile_index'] % 1 == 0))[0]
237
+ dat, xedges, binnumber = stats.binned_statistic(
238
+ ds['profile_index'].values[good],
239
+ ds[td].values[good], statistic='mean',
240
+ bins=[profile_bins])
241
+ if td == 'time_1970':
242
+ td = 'time'
243
+ dat = dat.astype('timedelta64[ns]') + np.datetime64('1970-01-01T00:00:00')
244
+ _log.info(f'{td} {len(dat)}')
245
+ dsout[td] = (('time'), dat, ds[td].attrs)
246
+ ds.drop('time_1970')
247
+ good = np.where(~np.isnan(ds['time']) & (ds['profile_index'] % 1 == 0))[0]
248
+ _log.info(f'Done times! {len(dat)}')
249
+ dsout['profile_time_start'] = (
250
+ (xdimname), dat, profile_meta['profile_time_start'])
251
+ dsout['profile_time_end'] = (
252
+ (xdimname), dat, profile_meta['profile_time_end'])
253
+
254
+ for k in ds.keys():
255
+ if k in ['time', 'profile', 'longitude', 'latitude', 'depth'] or 'time' in k:
256
+ continue
257
+ _log.info('Gridding %s', k)
258
+ good = np.where(~np.isnan(ds[k]) & (ds['profile_index'] % 1 == 0))[0]
259
+ if len(good) <= 0:
260
+ continue
261
+ if "average_method" in ds[k].attrs:
262
+ average_method = ds[k].attrs["average_method"]
263
+ ds[k].attrs["processing"] = (
264
+ f"Using average method {average_method} for "
265
+ f"variable {k} following deployment yaml.")
266
+ if average_method == "geometric mean":
267
+ average_method = stats.gmean
268
+ ds[k].attrs["processing"] += (" Using geometric mean implementation "
269
+ "scipy.stats.gmean")
270
+ else:
271
+ average_method = "mean"
272
+ dat, xedges, yedges, binnumber = stats.binned_statistic_2d(
273
+ ds['profile_index'].values[good],
274
+ ds['depth'].values[good],
275
+ values=ds[k].values[good], statistic=average_method,
276
+ bins=[profile_bins, depth_bins])
277
+
278
+ _log.debug(f'dat{np.shape(dat)}')
279
+ dsout[k] = (('depth', xdimname), dat.T, ds[k].attrs)
280
+
281
+ # fill gaps in data:
282
+ dsout[k].values = utils.gappy_fill_vertical(dsout[k].values)
283
+
284
+ # fix u and v, because they should really not be gridded...
285
+ if (('water_velocity_eastward' in dsout.keys()) and
286
+ ('u' in profile_meta.keys())):
287
+ _log.debug(str(ds.water_velocity_eastward))
288
+ dsout['u'] = dsout.water_velocity_eastward.mean(axis=0)
289
+ dsout['u'].attrs = profile_meta['u']
290
+ dsout['v'] = dsout.water_velocity_northward.mean(axis=0)
291
+ dsout['v'].attrs = profile_meta['v']
292
+ dsout = dsout.drop(['water_velocity_eastward',
293
+ 'water_velocity_northward'])
294
+ dsout.attrs = ds.attrs
295
+ dsout.attrs.pop('cdm_data_type')
296
+ # fix to be ISO parsable:
297
+ if len(dsout.attrs['deployment_start']) > 18:
298
+ dsout.attrs['deployment_start'] = dsout.attrs['deployment_start'][:19]
299
+ dsout.attrs['deployment_end'] = dsout.attrs['deployment_end'][:19]
300
+ dsout.attrs['time_coverage_start'] = dsout.attrs['time_coverage_start'][:19]
301
+ dsout.attrs['time_coverage_end'] = dsout.attrs['time_coverage_end'][:19]
302
+ # fix standard_name so they don't overlap!
303
+ try:
304
+ dsout['waypoint_latitude'].attrs.pop('standard_name')
305
+ dsout['waypoint_longitude'].attrs.pop('standard_name')
306
+ dsout['profile_time_start'].attrs.pop('standard_name')
307
+ dsout['profile_time_end'].attrs.pop('standard_name')
308
+ except:
309
+ pass
310
+ # set some attributes for cf guidance
311
+ # see H.6.2. Profiles along a single trajectory
312
+ # https://cfconventions.org/Data/cf-conventions/cf-conventions-1.7/build/aphs06.html
313
+ dsout.attrs['featureType'] = 'trajectoryProfile'
314
+ dsout['profile'].attrs['cf_role'] = 'profile_id'
315
+ dsout['mission_number'] = np.int32(1)
316
+ dsout['mission_number'].attrs['cf_role'] = 'trajectory_id'
317
+ dsout = dsout.set_coords(['latitude', 'longitude', 'time'])
318
+ for k in dsout:
319
+ if k in ['profile', 'depth', 'latitude', 'longitude', 'time', 'mission_number']:
320
+ dsout[k].attrs['coverage_content_type'] = 'coordinate'
321
+ else:
322
+ dsout[k].attrs['coverage_content_type'] = 'physicalMeasurement'
323
+
324
+
325
+ outname = outdir + '/' + ds.attrs['deployment_name'] + '_grid' + fnamesuffix + '.nc'
326
+ _log.info('Writing %s', outname)
327
+ # timeunits = 'nanoseconds since 1970-01-01T00:00:00Z'
328
+ dsout.to_netcdf(
329
+ outname,
330
+ encoding={'time': {'units': 'seconds since 1970-01-01T00:00:00Z',
331
+ '_FillValue': np.nan,
332
+ 'calendar': 'gregorian',
333
+ 'dtype': 'float64'}})
334
+ _log.info('Done gridding')
335
+
336
+ return outname
337
+
338
+
339
+ # aliases
340
+ extract_L0timeseries_profiles = extract_timeseries_profiles
341
+ make_L0_gridfiles = make_gridfiles
342
+
343
+
344
+ __all__ = ['extract_timeseries_profiles', 'make_gridfiles']