pyglider 0.0.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyglider/__init__.py +1 -0
- pyglider/_version.py +1 -0
- pyglider/example_data.py +22 -0
- pyglider/ncprocess.py +344 -0
- pyglider/seaexplorer.py +507 -0
- pyglider/slocum.py +1221 -0
- pyglider/utils.py +768 -0
- pyglider-0.0.7.dist-info/LICENSE +191 -0
- pyglider-0.0.7.dist-info/METADATA +44 -0
- pyglider-0.0.7.dist-info/RECORD +13 -0
- pyglider-0.0.7.dist-info/WHEEL +5 -0
- pyglider-0.0.7.dist-info/top_level.txt +1 -0
- pyglider-0.0.7.dist-info/zip-safe +1 -0
pyglider/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
pyglider/_version.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = '0.0.7'
|
pyglider/example_data.py
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
from zipfile import ZipFile
|
|
2
|
+
import pooch
|
|
3
|
+
|
|
4
|
+
def get_example_data(outdir='./'):
|
|
5
|
+
"""
|
|
6
|
+
Get example data sets and configuration files
|
|
7
|
+
|
|
8
|
+
Parameters
|
|
9
|
+
----------
|
|
10
|
+
outdir : str or Path, optional
|
|
11
|
+
Location to extract the example files into. They will be put at
|
|
12
|
+
``outdir/example-data/``. Default is to unpack in the
|
|
13
|
+
current directory.
|
|
14
|
+
"""
|
|
15
|
+
zipfile = pooch.retrieve("https://cproof.uvic.ca/pyglider-example-data/pyglider-example-data.zip",
|
|
16
|
+
known_hash='5643a5301530e8dd60060a357cd9ed88eb1e84d761710c2a4013bc3c1817a859')
|
|
17
|
+
|
|
18
|
+
with ZipFile(zipfile, 'r') as zipObj:
|
|
19
|
+
# Extract all the contents of zip file in outdir
|
|
20
|
+
zipObj.extractall(outdir)
|
|
21
|
+
|
|
22
|
+
__all__ = ['get_example_data']
|
pyglider/ncprocess.py
ADDED
|
@@ -0,0 +1,344 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Routines that are used for common processing of netcdf files after they have
|
|
3
|
+
been converted to standard timeseries.
|
|
4
|
+
"""
|
|
5
|
+
import logging
|
|
6
|
+
import xarray as xr
|
|
7
|
+
import numpy as np
|
|
8
|
+
import pyglider.utils as utils
|
|
9
|
+
import os
|
|
10
|
+
import yaml
|
|
11
|
+
import netCDF4
|
|
12
|
+
import scipy.stats as stats
|
|
13
|
+
|
|
14
|
+
_log = logging.getLogger(__name__)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def extract_timeseries_profiles(inname, outdir, deploymentyaml, force=False):
|
|
18
|
+
"""
|
|
19
|
+
Extract and save each profile from a timeseries netCDF.
|
|
20
|
+
|
|
21
|
+
Parameters
|
|
22
|
+
----------
|
|
23
|
+
inname : str or Path
|
|
24
|
+
netcdf file to break into profiles
|
|
25
|
+
|
|
26
|
+
outdir : str or Path
|
|
27
|
+
directory to place profiles
|
|
28
|
+
|
|
29
|
+
deploymentyaml : str or Path
|
|
30
|
+
location of deployment yaml file for the netCDF file. This should
|
|
31
|
+
be the same yaml file that was used to make the timeseries file.
|
|
32
|
+
|
|
33
|
+
force : bool, default False
|
|
34
|
+
Force an overwite even if profile netcdf already exists
|
|
35
|
+
"""
|
|
36
|
+
try:
|
|
37
|
+
os.mkdir(outdir)
|
|
38
|
+
except FileExistsError:
|
|
39
|
+
pass
|
|
40
|
+
|
|
41
|
+
deployment = utils._get_deployment(deploymentyaml)
|
|
42
|
+
|
|
43
|
+
meta = deployment['metadata']
|
|
44
|
+
with xr.open_dataset(inname) as ds:
|
|
45
|
+
_log.info('Extracting profiles: opening %s', inname)
|
|
46
|
+
profiles = np.unique(ds.profile_index)
|
|
47
|
+
profiles = [p for p in profiles if (~np.isnan(p) and not (p % 1)
|
|
48
|
+
and (p > 0))]
|
|
49
|
+
for p in profiles:
|
|
50
|
+
ind = np.where(ds.profile_index == p)[0]
|
|
51
|
+
dss = ds.isel(time=ind)
|
|
52
|
+
outname = outdir + '/' + utils.get_file_id(dss) + '.nc'
|
|
53
|
+
_log.info('Checking %s', outname)
|
|
54
|
+
if force or (not os.path.exists(outname)):
|
|
55
|
+
# this is the id for the whole file, not just this profile..
|
|
56
|
+
dss['trajectory'] = utils.get_file_id(ds).encode()
|
|
57
|
+
trajlen = len(utils.get_file_id(ds).encode())
|
|
58
|
+
dss['trajectory'].attrs['cf_role'] = 'trajectory_id'
|
|
59
|
+
dss['trajectory'].attrs['comment'] = (
|
|
60
|
+
'A trajectory is a single'
|
|
61
|
+
'deployment of a glider and may span multiple data files.')
|
|
62
|
+
dss['trajectory'].attrs['long_name'] = 'Trajectory/Deployment Name'
|
|
63
|
+
|
|
64
|
+
# profile-averaged variables....
|
|
65
|
+
profile_meta = deployment['profile_variables']
|
|
66
|
+
if 'water_velocity_eastward' in dss.keys():
|
|
67
|
+
dss['u'] = dss.water_velocity_eastward.mean()
|
|
68
|
+
dss['u'].attrs = profile_meta['u']
|
|
69
|
+
|
|
70
|
+
dss['v'] = dss.water_velocity_northward.mean()
|
|
71
|
+
dss['v'].attrs = profile_meta['v']
|
|
72
|
+
elif 'u' in profile_meta:
|
|
73
|
+
dss['u'] = profile_meta['u'].get('_FillValue', np.nan)
|
|
74
|
+
dss['u'].attrs = profile_meta['u']
|
|
75
|
+
|
|
76
|
+
dss['v'] = profile_meta['v'].get('_FillValue', np.nan)
|
|
77
|
+
dss['v'].attrs = profile_meta['v']
|
|
78
|
+
else:
|
|
79
|
+
dss['u'] = np.nan
|
|
80
|
+
dss['v'] = np.nan
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
dss['profile_id'] = np.int32(p)
|
|
84
|
+
dss['profile_id'].attrs = profile_meta['profile_id']
|
|
85
|
+
if '_FillValue' not in dss['profile_id'].attrs:
|
|
86
|
+
dss['profile_id'].attrs['_FillValue'] = -1
|
|
87
|
+
dss['profile_id'].attrs['valid_min'] = np.int32(dss['profile_id'].attrs['valid_min'])
|
|
88
|
+
dss['profile_id'].attrs['valid_max'] = np.int32(dss['profile_id'].attrs['valid_max'])
|
|
89
|
+
|
|
90
|
+
dss['profile_time'] = dss.time.mean()
|
|
91
|
+
dss['profile_time'].attrs = profile_meta['profile_time']
|
|
92
|
+
# remove units so they can be encoded later:
|
|
93
|
+
try:
|
|
94
|
+
del dss.profile_time.attrs['units']
|
|
95
|
+
del dss.profile_time.attrs['calendar']
|
|
96
|
+
except KeyError:
|
|
97
|
+
pass
|
|
98
|
+
dss['profile_lon'] = dss.longitude.mean()
|
|
99
|
+
dss['profile_lon'].attrs = profile_meta['profile_lon']
|
|
100
|
+
dss['profile_lat'] = dss.latitude.mean()
|
|
101
|
+
dss['profile_lat'].attrs = profile_meta['profile_lat']
|
|
102
|
+
|
|
103
|
+
dss['lat'] = dss['latitude']
|
|
104
|
+
dss['lon'] = dss['longitude']
|
|
105
|
+
dss['platform'] = np.int32(1)
|
|
106
|
+
comment = (meta['glider_model'] + ' operated by ' +
|
|
107
|
+
meta['institution'])
|
|
108
|
+
dss['platform'].attrs['comment'] = comment
|
|
109
|
+
dss['platform'].attrs['id'] = (
|
|
110
|
+
meta['glider_name'] + meta['glider_serial'])
|
|
111
|
+
dss['platform'].attrs['instrument'] = 'instrument_ctd'
|
|
112
|
+
dss['platform'].attrs['long_name'] = (
|
|
113
|
+
meta['glider_model'] + dss['platform'].attrs['id'])
|
|
114
|
+
dss['platform'].attrs['type'] = 'platform'
|
|
115
|
+
dss['platform'].attrs['wmo_id'] = meta['wmo_id']
|
|
116
|
+
if '_FillValue' not in dss['platform'].attrs:
|
|
117
|
+
dss['platform'].attrs['_FillValue'] = -1
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
dss['lat_uv'] = np.nan
|
|
121
|
+
dss['lat_uv'].attrs = profile_meta['lat_uv']
|
|
122
|
+
dss['lon_uv'] = np.nan
|
|
123
|
+
dss['lon_uv'].attrs = profile_meta['lon_uv']
|
|
124
|
+
dss['time_uv'] = np.nan
|
|
125
|
+
dss['time_uv'].attrs = profile_meta['time_uv']
|
|
126
|
+
|
|
127
|
+
dss['instrument_ctd'] = np.int32(1.0)
|
|
128
|
+
dss['instrument_ctd'].attrs = profile_meta['instrument_ctd']
|
|
129
|
+
if '_FillValue' not in dss['instrument_ctd'].attrs:
|
|
130
|
+
dss['instrument_ctd'].attrs['_FillValue'] = -1
|
|
131
|
+
|
|
132
|
+
dss.attrs['date_modified'] = str(np.datetime64('now')) + 'Z'
|
|
133
|
+
|
|
134
|
+
# ancillary variables: link and create with values of 2. If
|
|
135
|
+
# we dont' want them all 2, then create these variables in the
|
|
136
|
+
# time series
|
|
137
|
+
to_fill = ['temperature', 'pressure', 'conductivity',
|
|
138
|
+
'salinity', 'density', 'lon', 'lat', 'depth']
|
|
139
|
+
for name in to_fill:
|
|
140
|
+
qcname = name + '_qc'
|
|
141
|
+
dss[name].attrs['ancillary_variables'] = qcname
|
|
142
|
+
if qcname not in dss.keys():
|
|
143
|
+
|
|
144
|
+
dss[qcname] = ('time', 2 * np.ones(len(dss[name]), np.int8))
|
|
145
|
+
dss[qcname].attrs = utils.fill_required_qcattrs({}, name)
|
|
146
|
+
# 2 is "not eval"
|
|
147
|
+
# outname = outdir + '/' + utils.get_file_id(dss) + '.nc'
|
|
148
|
+
_log.info('Writing %s', outname)
|
|
149
|
+
timeunits = 'seconds since 1970-01-01T00:00:00Z'
|
|
150
|
+
timecalendar = 'gregorian'
|
|
151
|
+
try:
|
|
152
|
+
del dss.profile_time.attrs['_FillValue']
|
|
153
|
+
del dss.profile_time.attrs['units']
|
|
154
|
+
except KeyError:
|
|
155
|
+
pass
|
|
156
|
+
dss.to_netcdf(outname, encoding={'time': {'units': timeunits,
|
|
157
|
+
'calendar': timecalendar,
|
|
158
|
+
'dtype': 'float64'},
|
|
159
|
+
'profile_time':
|
|
160
|
+
{'units': timeunits,
|
|
161
|
+
'_FillValue': -99999.0,
|
|
162
|
+
'dtype': 'float64'},
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
)
|
|
166
|
+
|
|
167
|
+
# add traj_strlen using bare ntcdf to make IOOS happy
|
|
168
|
+
with netCDF4.Dataset(outname, 'r+') as nc:
|
|
169
|
+
nc.renameDimension('string%d' % trajlen, 'traj_strlen')
|
|
170
|
+
|
|
171
|
+
def make_gridfiles(inname, outdir, deploymentyaml, *, fnamesuffix='', dz=1, starttime='1970-01-01'):
|
|
172
|
+
"""
|
|
173
|
+
Turn a timeseries netCDF file into a vertically gridded netCDF.
|
|
174
|
+
|
|
175
|
+
Parameters
|
|
176
|
+
----------
|
|
177
|
+
inname : str or Path
|
|
178
|
+
netcdf file to break into profiles
|
|
179
|
+
|
|
180
|
+
outdir : str or Path
|
|
181
|
+
directory to place profiles
|
|
182
|
+
|
|
183
|
+
deploymentyaml : str or Path
|
|
184
|
+
location of deployment yaml file for the netCDF file. This should
|
|
185
|
+
be the same yaml file that was used to make the timeseries file.
|
|
186
|
+
|
|
187
|
+
dz : float, default = 1
|
|
188
|
+
Vertical grid spacing in meters.
|
|
189
|
+
|
|
190
|
+
Returns
|
|
191
|
+
-------
|
|
192
|
+
outname : str
|
|
193
|
+
Name of gridded netCDF file. The gridded netCDF file has coordinates of
|
|
194
|
+
'depth' and 'profile', so each variable is gridded in depth bins and by
|
|
195
|
+
profile number. Each profile has a time, latitude, and longitude.
|
|
196
|
+
"""
|
|
197
|
+
try:
|
|
198
|
+
os.mkdir(outdir)
|
|
199
|
+
except FileExistsError:
|
|
200
|
+
pass
|
|
201
|
+
|
|
202
|
+
deployment = utils._get_deployment(deploymentyaml)
|
|
203
|
+
|
|
204
|
+
profile_meta = deployment['profile_variables']
|
|
205
|
+
|
|
206
|
+
ds = xr.open_dataset(inname, decode_times=True)
|
|
207
|
+
ds = ds.where(ds.time > np.datetime64(starttime), drop=True)
|
|
208
|
+
_log.info(f'Working on: {inname}')
|
|
209
|
+
_log.debug(str(ds))
|
|
210
|
+
_log.debug(str(ds.time[0]))
|
|
211
|
+
_log.debug(str(ds.time[-1]))
|
|
212
|
+
|
|
213
|
+
profiles = np.unique(ds.profile_index)
|
|
214
|
+
profiles = [p for p in profiles if (~np.isnan(p) and not (p % 1)
|
|
215
|
+
and (p > 0))]
|
|
216
|
+
profile_bins = np.hstack((np.array(profiles) - 0.5, [profiles[-1]+0.5]))
|
|
217
|
+
_log.debug(profile_bins)
|
|
218
|
+
Nprofiles = len(profiles)
|
|
219
|
+
_log.info(f'Nprofiles {Nprofiles}')
|
|
220
|
+
depth_bins = np.arange(0, 1100.1, dz)
|
|
221
|
+
depths = depth_bins[:-1] + 0.5
|
|
222
|
+
xdimname = 'time'
|
|
223
|
+
dsout = xr.Dataset(
|
|
224
|
+
coords={'depth': ('depth', depths),
|
|
225
|
+
'profile': (xdimname, profiles)})
|
|
226
|
+
dsout['depth'].attrs = {'units': 'm',
|
|
227
|
+
'long_name': 'Depth',
|
|
228
|
+
'standard_name': 'depth',
|
|
229
|
+
'positive': 'down',
|
|
230
|
+
'coverage_content_type': 'coordinate',
|
|
231
|
+
'comment': 'center of depth bins'}
|
|
232
|
+
|
|
233
|
+
ds['time_1970'] = ds.temperature.copy()
|
|
234
|
+
ds['time_1970'].values = ds.time.values.astype(np.float64)
|
|
235
|
+
for td in ('time_1970', 'longitude', 'latitude'):
|
|
236
|
+
good = np.where(~np.isnan(ds[td]) & (ds['profile_index'] % 1 == 0))[0]
|
|
237
|
+
dat, xedges, binnumber = stats.binned_statistic(
|
|
238
|
+
ds['profile_index'].values[good],
|
|
239
|
+
ds[td].values[good], statistic='mean',
|
|
240
|
+
bins=[profile_bins])
|
|
241
|
+
if td == 'time_1970':
|
|
242
|
+
td = 'time'
|
|
243
|
+
dat = dat.astype('timedelta64[ns]') + np.datetime64('1970-01-01T00:00:00')
|
|
244
|
+
_log.info(f'{td} {len(dat)}')
|
|
245
|
+
dsout[td] = (('time'), dat, ds[td].attrs)
|
|
246
|
+
ds.drop('time_1970')
|
|
247
|
+
good = np.where(~np.isnan(ds['time']) & (ds['profile_index'] % 1 == 0))[0]
|
|
248
|
+
_log.info(f'Done times! {len(dat)}')
|
|
249
|
+
dsout['profile_time_start'] = (
|
|
250
|
+
(xdimname), dat, profile_meta['profile_time_start'])
|
|
251
|
+
dsout['profile_time_end'] = (
|
|
252
|
+
(xdimname), dat, profile_meta['profile_time_end'])
|
|
253
|
+
|
|
254
|
+
for k in ds.keys():
|
|
255
|
+
if k in ['time', 'profile', 'longitude', 'latitude', 'depth'] or 'time' in k:
|
|
256
|
+
continue
|
|
257
|
+
_log.info('Gridding %s', k)
|
|
258
|
+
good = np.where(~np.isnan(ds[k]) & (ds['profile_index'] % 1 == 0))[0]
|
|
259
|
+
if len(good) <= 0:
|
|
260
|
+
continue
|
|
261
|
+
if "average_method" in ds[k].attrs:
|
|
262
|
+
average_method = ds[k].attrs["average_method"]
|
|
263
|
+
ds[k].attrs["processing"] = (
|
|
264
|
+
f"Using average method {average_method} for "
|
|
265
|
+
f"variable {k} following deployment yaml.")
|
|
266
|
+
if average_method == "geometric mean":
|
|
267
|
+
average_method = stats.gmean
|
|
268
|
+
ds[k].attrs["processing"] += (" Using geometric mean implementation "
|
|
269
|
+
"scipy.stats.gmean")
|
|
270
|
+
else:
|
|
271
|
+
average_method = "mean"
|
|
272
|
+
dat, xedges, yedges, binnumber = stats.binned_statistic_2d(
|
|
273
|
+
ds['profile_index'].values[good],
|
|
274
|
+
ds['depth'].values[good],
|
|
275
|
+
values=ds[k].values[good], statistic=average_method,
|
|
276
|
+
bins=[profile_bins, depth_bins])
|
|
277
|
+
|
|
278
|
+
_log.debug(f'dat{np.shape(dat)}')
|
|
279
|
+
dsout[k] = (('depth', xdimname), dat.T, ds[k].attrs)
|
|
280
|
+
|
|
281
|
+
# fill gaps in data:
|
|
282
|
+
dsout[k].values = utils.gappy_fill_vertical(dsout[k].values)
|
|
283
|
+
|
|
284
|
+
# fix u and v, because they should really not be gridded...
|
|
285
|
+
if (('water_velocity_eastward' in dsout.keys()) and
|
|
286
|
+
('u' in profile_meta.keys())):
|
|
287
|
+
_log.debug(str(ds.water_velocity_eastward))
|
|
288
|
+
dsout['u'] = dsout.water_velocity_eastward.mean(axis=0)
|
|
289
|
+
dsout['u'].attrs = profile_meta['u']
|
|
290
|
+
dsout['v'] = dsout.water_velocity_northward.mean(axis=0)
|
|
291
|
+
dsout['v'].attrs = profile_meta['v']
|
|
292
|
+
dsout = dsout.drop(['water_velocity_eastward',
|
|
293
|
+
'water_velocity_northward'])
|
|
294
|
+
dsout.attrs = ds.attrs
|
|
295
|
+
dsout.attrs.pop('cdm_data_type')
|
|
296
|
+
# fix to be ISO parsable:
|
|
297
|
+
if len(dsout.attrs['deployment_start']) > 18:
|
|
298
|
+
dsout.attrs['deployment_start'] = dsout.attrs['deployment_start'][:19]
|
|
299
|
+
dsout.attrs['deployment_end'] = dsout.attrs['deployment_end'][:19]
|
|
300
|
+
dsout.attrs['time_coverage_start'] = dsout.attrs['time_coverage_start'][:19]
|
|
301
|
+
dsout.attrs['time_coverage_end'] = dsout.attrs['time_coverage_end'][:19]
|
|
302
|
+
# fix standard_name so they don't overlap!
|
|
303
|
+
try:
|
|
304
|
+
dsout['waypoint_latitude'].attrs.pop('standard_name')
|
|
305
|
+
dsout['waypoint_longitude'].attrs.pop('standard_name')
|
|
306
|
+
dsout['profile_time_start'].attrs.pop('standard_name')
|
|
307
|
+
dsout['profile_time_end'].attrs.pop('standard_name')
|
|
308
|
+
except:
|
|
309
|
+
pass
|
|
310
|
+
# set some attributes for cf guidance
|
|
311
|
+
# see H.6.2. Profiles along a single trajectory
|
|
312
|
+
# https://cfconventions.org/Data/cf-conventions/cf-conventions-1.7/build/aphs06.html
|
|
313
|
+
dsout.attrs['featureType'] = 'trajectoryProfile'
|
|
314
|
+
dsout['profile'].attrs['cf_role'] = 'profile_id'
|
|
315
|
+
dsout['mission_number'] = np.int32(1)
|
|
316
|
+
dsout['mission_number'].attrs['cf_role'] = 'trajectory_id'
|
|
317
|
+
dsout = dsout.set_coords(['latitude', 'longitude', 'time'])
|
|
318
|
+
for k in dsout:
|
|
319
|
+
if k in ['profile', 'depth', 'latitude', 'longitude', 'time', 'mission_number']:
|
|
320
|
+
dsout[k].attrs['coverage_content_type'] = 'coordinate'
|
|
321
|
+
else:
|
|
322
|
+
dsout[k].attrs['coverage_content_type'] = 'physicalMeasurement'
|
|
323
|
+
|
|
324
|
+
|
|
325
|
+
outname = outdir + '/' + ds.attrs['deployment_name'] + '_grid' + fnamesuffix + '.nc'
|
|
326
|
+
_log.info('Writing %s', outname)
|
|
327
|
+
# timeunits = 'nanoseconds since 1970-01-01T00:00:00Z'
|
|
328
|
+
dsout.to_netcdf(
|
|
329
|
+
outname,
|
|
330
|
+
encoding={'time': {'units': 'seconds since 1970-01-01T00:00:00Z',
|
|
331
|
+
'_FillValue': np.nan,
|
|
332
|
+
'calendar': 'gregorian',
|
|
333
|
+
'dtype': 'float64'}})
|
|
334
|
+
_log.info('Done gridding')
|
|
335
|
+
|
|
336
|
+
return outname
|
|
337
|
+
|
|
338
|
+
|
|
339
|
+
# aliases
|
|
340
|
+
extract_L0timeseries_profiles = extract_timeseries_profiles
|
|
341
|
+
make_L0_gridfiles = make_gridfiles
|
|
342
|
+
|
|
343
|
+
|
|
344
|
+
__all__ = ['extract_timeseries_profiles', 'make_gridfiles']
|