shipgrav 1.0.0__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
shipgrav/io.py ADDED
@@ -0,0 +1,749 @@
1
+ import numpy as np
2
+ import pandas as pd
3
+ from datetime import datetime, timezone
4
+ import yaml
5
+ import mmap
6
+ import sys
7
+ import os
8
+ import re
9
+
10
+ # TODO need a fix for places where we cross the international date line (read_nav)
11
+ # TODO check pandas versions for datetime parsing (eg RGS read)
12
+ # TODO where are cross and long accel for Thompson? (DGS laptop read)
13
+
14
+ ########################################################################
15
+ # navigation i/o (for better synchronization of gps with gravimeter)
16
+ ########################################################################
17
+
18
+
19
+ def read_nav(ship, pathlist, sampling=1, talker=None, ship_function=None):
20
+ """ Read navigation strings from .GPS (or similar) files.
21
+
22
+ Ships have different formats and use different talkers for preferred
23
+ navigation; the ones we know are listed in database.toml, and there is
24
+ also an option to override that by setting the talker kwarg.
25
+ Navigation data is re-interpolated to the given sampling rate.
26
+
27
+ :param ship: name of the ship
28
+ :type ship: string
29
+ :param pathlist: paths to navigation files (.GPS)
30
+ :type pathlist: list, strings
31
+ :param sampling: sampling rate to interpolate to, default 1 Hz
32
+ :type sampling: float
33
+ :param talker: nav talker. Default behavior is
34
+ to use talker from database.toml if ship is listed there.
35
+ :type talker: string, optional
36
+ :param ship_function: user-supplied function for reading from nav files.
37
+ This function should return arrays of lon, lat, and timestamps.
38
+ Look at _navcoords() and navdate_Atlantis() (and similar functions) for examples.
39
+ :type ship_function: function, optional
40
+
41
+ :returns: (*pd.DataFrame*) time series of geographic coordinates and timestamps
42
+ """
43
+ # read info on talkers for various ships
44
+ moddir = os.path.dirname(__file__)
45
+ import tomli as tm
46
+ with open(os.path.join(moddir, 'database.toml'), 'rb') as f:
47
+ info = tm.load(f)
48
+ nav_str = info['nav-talkers']
49
+
50
+ # check to make sure we have some way to read nav data for this ship
51
+ if ship not in nav_str.keys() and ship_function == None:
52
+ print('R/V %s not yet supported for nav read; must supply read function' % ship)
53
+ return -999
54
+
55
+ # check to make sure we have a talker one way or another
56
+ if talker is not None:
57
+ pass # use provided talker if it's there
58
+ else:
59
+ if ship not in nav_str.keys():
60
+ print('talker not known for R/V %s' % ship)
61
+ return -999
62
+ else:
63
+ talker = nav_str[ship]
64
+
65
+ if type(pathlist) == str:
66
+ # if just one path is given, make it into a list
67
+ pathlist = [pathlist,]
68
+
69
+ timetime = np.array([])
70
+ lonlon = np.array([])
71
+ latlat = np.array([])
72
+
73
+ for fpath in pathlist: # loop nav files (may be a lot of them)
74
+ with open(fpath, 'r') as f:
75
+ allnav = np.array(f.readlines()) # read the entire file
76
+
77
+ if ship_function: # use a user-supplied function to get all the things
78
+ lon, lat, timest = ship_function(allnav, talker)
79
+ else:
80
+ if ship == 'Atlantis':
81
+ lon, lat = _navcoords(allnav, talker)
82
+ timest = _navdate_Atlantis(allnav, talker)
83
+ elif ship == 'NBP':
84
+ lon, lat = _navcoords(allnav, talker)
85
+ timest = _navdate_NBP(allnav, talker)
86
+ elif ship == 'Thompson':
87
+ lon, lat = _navcoords(allnav, talker)
88
+ timest = _navdate_Thompson(allnav, talker)
89
+ elif ship == 'Revelle':
90
+ lon, lat = _navcoords(allnav, talker)
91
+ timest = _navdate_Revelle(allnav, talker)
92
+ elif ship == 'Ride':
93
+ lon, lat = _navcoords(allnav, talker)
94
+ timest = _navdate_Ride(allnav, talker)
95
+ elif ship == 'Langseth':
96
+ lon, lat = _navcoords(allnav, talker)
97
+ timest = _navdate_Langseth(allnav, talker)
98
+ else: # in theory we never get to this option, but catch just in case
99
+ print(
100
+ 'R/V %s not yet supported for nav read; must supply read function' % ship)
101
+ return -999
102
+
103
+ # posix, seconds, for interpolation
104
+ sec_time = np.array([d.timestamp() for d in timest])
105
+ _, idx = np.unique(sec_time, return_index=True)
106
+ samp_time = np.arange(min(sec_time), max(
107
+ sec_time), sampling) # fenceposting?
108
+
109
+ # interpolate to desired sample rate
110
+ lon_out = np.interp(sec_time, sec_time[idx], lon[idx])
111
+ lat_out = np.interp(sec_time, sec_time[idx], lat[idx])
112
+
113
+ timetime = np.append(timetime, sec_time)
114
+ lonlon = np.append(lonlon, lon_out)
115
+ latlat = np.append(latlat, lat_out)
116
+
117
+ # de-duplicate times just in case, and make into a DataFrame to return
118
+ _, idx = np.unique(timetime, return_index=True)
119
+ gps_nav = pd.DataFrame(
120
+ {'time_sec': timetime[idx], 'lon': lonlon[idx], 'lat': latlat[idx]})
121
+ gps_nav['stamps'] = np.array([datetime.fromtimestamp(e, timezone.utc) for e in timetime[idx]],
122
+ dtype=datetime)
123
+
124
+ # check if we have probable longitude jumps, try to fix them
125
+ ilocs = np.where(abs(np.diff(gps_nav['lon'])) > 1)[0]
126
+ if len(ilocs) > 0:
127
+ # try to get rid of +- jumps (NBP, often)
128
+ gps_nav = _clean_180cross(gps_nav)
129
+
130
+ return gps_nav
131
+
132
+
133
+ def _clock_time(allnav, talker):
134
+ """Extract clock time from standard talker strings.
135
+ """
136
+ inav = [talker in s for s in allnav] # find lines of file with this talker
137
+ subnav = allnav[inav] # select only those lines
138
+ # indices in allnav of the lines that are selected
139
+ inav = np.where(inav)[0]
140
+ N = len(subnav)
141
+ hour = np.zeros(N, dtype=int)
142
+ mint = np.zeros(N, dtype=int)
143
+ sec = np.zeros(N, dtype=int)
144
+ msec = np.zeros(N, dtype=int)
145
+
146
+ for i in range(N):
147
+ post = subnav[i].split(talker)[-1].lstrip().split(',')
148
+ if post[0] == '':
149
+ post = post[1:]
150
+ hour[i] = int(post[0][:2]) # hour
151
+ mint[i] = int(post[0][2:4]) # min
152
+ sec0 = float(post[0][4:]) # sec.msec
153
+ msec[i] = int(int(str(sec0).split('.')[-1])*1e4) # msec
154
+ sec[i] = int(str(sec0).split('.')[0]) # sec
155
+
156
+ return hour, mint, sec, msec
157
+
158
+
159
+ def _navdate_Atlantis(allnav, talker):
160
+ """Extract datetime info from Atlantis nav files (at*.GPS).
161
+ """
162
+ hour, mint, sec, msec = _clock_time(allnav, talker)
163
+
164
+ inav = [talker in s for s in allnav] # find lines of file with this talker
165
+ subnav = allnav[inav] # select only those lines
166
+ N = len(subnav)
167
+ # array for timestamps, as datetime objects
168
+ timest = np.empty(N, dtype=datetime)
169
+
170
+ for i in range(N):
171
+ pre = subnav[i].split(talker)[0]
172
+ date = re.findall('NAV (\d{4})/(\d{2})/(\d{2})', pre)[0]
173
+ year = int(date[0]) # year
174
+ mon = int(date[1]) # month
175
+ day = int(date[2]) # day
176
+ timest[i] = datetime(year, mon, day, hour[i], mint[i],
177
+ sec[i], msec[i], tzinfo=timezone.utc)
178
+ return timest
179
+
180
+
181
+ def _navdate_NBP(allnav, talker):
182
+ """Extract datetime info from Palmer nav files (NBP*.d*).
183
+ """
184
+ hour, mint, sec, msec = _clock_time(allnav, talker)
185
+
186
+ inav = [talker in s for s in allnav] # find lines of file with this talker
187
+ subnav = allnav[inav] # select only those lines
188
+ N = len(subnav)
189
+ # array for timestamps, as datetime objects
190
+ timest = np.empty(N, dtype=datetime)
191
+
192
+ for i in range(N):
193
+ pre = subnav[i].split(talker)[0]
194
+ date = re.findall('(\d{2})\+(\d{2,3}):.*', pre)[0]
195
+ # year (NBP didn't exist before 2000 so this is ok)
196
+ year = '20' + date[0]
197
+ doy = date[1] # doy
198
+ timest[i] = datetime.strptime('%s-%s-%02d:%02d:%02d:%06d' %
199
+ (year, doy, hour[i],
200
+ mint[i], sec[i], msec[i]),
201
+ '%Y-%j-%H:%M:%S:%f')
202
+ timest[i] = timest[i].replace(tzinfo=timezone.utc)
203
+ return timest
204
+
205
+
206
+ def _navdate_Thompson(allnav, talker):
207
+ """Extract datetime info from Thompson nav files (POSMV*.Raw).
208
+ """
209
+ hour, mint, sec, msec = _clock_time(allnav, talker)
210
+
211
+ inav = [talker in s for s in allnav] # find lines of file with this talker
212
+ subnav = allnav[inav] # select only those lines
213
+ N = len(subnav)
214
+ # array for timestamps, as datetime objects
215
+ timest = np.empty(N, dtype=datetime)
216
+
217
+ for i in range(N):
218
+ pre = subnav[i].split(talker)[0]
219
+ date = re.findall('(\d{2})/(\d{2})/(\d{4}),*', pre)[0]
220
+ year = int(date[2])
221
+ mon = int(date[0])
222
+ day = int(date[1])
223
+ timest[i] = datetime(year, mon, day, hour[i],
224
+ mint[i], sec[i], tzinfo=timezone.utc)
225
+ return timest
226
+
227
+
228
+ def _navdate_Revelle(allnav, talker):
229
+ """Extract datetime info from Revelle nav files (mru_seapatah330_rr_navbho-*.txt).
230
+ """
231
+ hour, mint, sec, msec = _clock_time(allnav, talker)
232
+
233
+ inav = [talker in s for s in allnav] # find lines of file with this talker
234
+ subnav = allnav[inav] # select only those lines
235
+ N = len(subnav)
236
+ # array for timestamps, as datetime objects
237
+ timest = np.empty(N, dtype=datetime)
238
+ inds = np.where(inav)[0] # indices in allnav of talker lines
239
+
240
+ for i in range(N):
241
+ # timestamp is on a previous line for Revelle - expect one before
242
+ # for GPGGA but that is not guaranteed
243
+ if i != 0:
244
+ j = inds[i-1] # index of previous talker line
245
+ if i == 0:
246
+ j = -1
247
+ for k in range(inds[i]-1, j, -1): # step backwards toward the last talker line
248
+ before = allnav[k]
249
+ # date is at the start of this line
250
+ if re.match('(\d{4})-(\d{2})-(\d{2})T*', before):
251
+ date = re.findall('(\d{4})-(\d{2})-(\d{2})T*', before)[0]
252
+ year = int(date[0])
253
+ mon = int(date[1])
254
+ day = int(date[2])
255
+ timest[i] = datetime(
256
+ year, mon, day, hour[i], mint[i], sec[i], msec[i], tzinfo=timezone.utc)
257
+ break # skip the rest of the stepping backwards once date is found
258
+ return timest
259
+
260
+
261
+ def _navdate_Ride(allnav, talker):
262
+ """Extract datetime info from Ride nav files (seapath-navbho_*.raw).
263
+ """
264
+ inav = [talker in s for s in allnav] # find lines of file with this talker
265
+ subnav = allnav[inav] # select only those lines
266
+ N = len(subnav)
267
+ # array for timestamps, as datetime objects
268
+ timest = np.empty(N, dtype=datetime)
269
+
270
+ for i in range(N):
271
+ if talker == 'INGGA': # on Ride, uses posix timestamps
272
+ date = re.findall('(\d+(\.\d*)?) \$%s' % talker, subnav[i])[0]
273
+ timest[i] = datetime.fromtimestamp(
274
+ float(date[0]), tzinfo=timezone.utc)
275
+ elif talker == 'GPGGA': # includes time only with date, unlike other GPGGAs
276
+ date = re.findall(
277
+ '(\d{4})\-(\d{2})\-(\d{2})T(\d{2}):(\d{2}):(\d{2})\.(\d.*?)Z', subnav[i])[0]
278
+ year = int(date[0])
279
+ mon = int(date[1])
280
+ day = int(date[2])
281
+ hour = int(date[3])
282
+ mint = int(date[4])
283
+ sec = int(date[5])
284
+ msec = int(float('.'+date[6])*1e6)
285
+ timest[i] = datetime(year, mon, day, hour, mint,
286
+ sec, msec, tzinfo=timezone.utc)
287
+ return timest
288
+
289
+
290
+ def _navdate_Langseth(allnav, talker):
291
+ """Extract datetime info from Langseth nav files (MGL-seapath.*).
292
+ """
293
+ inav = [talker in s for s in allnav] # find lines of file with this talker
294
+ subnav = allnav[inav] # select only those lines
295
+ N = len(subnav)
296
+ # array for timestamps, as datetime objects
297
+ timest = np.empty(N, dtype=datetime)
298
+
299
+ for i in range(N):
300
+ date = subnav[i].split(talker)[0].split('seapath')[1].split('\t')[1]
301
+ timest[i] = datetime.strptime(date,'%Y:%j:%H:%M:%S.%f').replace(tzinfo=timezone.utc)
302
+ return timest
303
+
304
+
305
+ def _navcoords(allnav, talker):
306
+ """Extract longitude and latitude from standard(?) talker strings.
307
+ """
308
+ inav = [talker in s for s in allnav] # find lines of file with this talker
309
+ subnav = allnav[inav] # select only those lines
310
+ N = len(subnav) # and count the linds
311
+ lon = np.zeros(N)
312
+ lat = np.zeros(N) # arrays to hold coordinates
313
+ for i in range(N): # loop lines, splitting at talker string
314
+ post = subnav[i].split(talker)[-1].lstrip().split(',')
315
+ if post[0] == '':
316
+ post = post[1:] # correct for spacing in some files
317
+
318
+ lat[i] = int(post[1][:2]) + float(post[1][2:]) / \
319
+ 60 # convert to decimal degrees
320
+ if post[2] == 'S':
321
+ lat[i] = -lat[i] # handle coordinate sign
322
+ lon[i] = int(post[3][:3]) + float(post[3][3:])/60
323
+ if post[4] == 'W':
324
+ lon[i] = -lon[i]
325
+
326
+ return lon, lat
327
+
328
+
329
+ def _clean_180cross(gps_nav):
330
+ """Fix instances where a trackline crosses +/- 180* longitude.
331
+ """
332
+
333
+ # which side of the line has more points?
334
+ lpos = gps_nav['lon'].values > 0
335
+ lneg = gps_nav['lon'].values < 0
336
+
337
+ newlon = gps_nav['lon'].values
338
+
339
+ if sum(lpos) > sum(lneg):
340
+ newlon[lneg] = 180 + newlon[lneg] % 180
341
+ else:
342
+ newlon[lpos] = -180 + newlon[lpos] % -180
343
+
344
+ # check if there are still some jumpy points (from between +/- reversals)
345
+ ilocs = np.where(abs(np.diff(newlon)) > 1)[0] # should not jump a degree
346
+ if len(ilocs) % 2 != 0: # these are not paired, so one must be an end?
347
+ pass
348
+ else: # pairs of indices for jump out and back
349
+ for i in ilocs[::2]: # assume nice pairwise
350
+ # ad hoc duplication, not great
351
+ newlon[i+1] = (newlon[i] + newlon[i+2])/2
352
+
353
+ gps_nav = gps_nav.replace({'lon': newlon})
354
+
355
+ return gps_nav
356
+
357
+ ########################################################################
358
+ # BGM3 i/o (RGS and serial)
359
+ ########################################################################
360
+
361
+
362
+ def read_bgm_rgs(fp, ship):
363
+ """Read BGM gravity from RGS files.
364
+
365
+ RGS is supposedly a standard format; is consistent between Atlantis
366
+ and NBP at least.
367
+
368
+ :param fp: RGS filepath(s)
369
+ :type fp: string, or list of strings
370
+ :param ship: ship name
371
+ :type ship: string
372
+
373
+ :returns: (*pd.DataFrame*) timestamps, raw gravity, and geographic coordinates
374
+ """
375
+ supported_ships = ['Atlantis', 'NBP']
376
+ if ship not in supported_ships:
377
+ print('R/V %s not supported for RGS read yet' % ship)
378
+ return -999
379
+
380
+ if type(fp) == str:
381
+ fp = [fp,] # make a list if only one path is given
382
+
383
+ dats = []
384
+ for path in fp:
385
+ dat = pd.read_csv(path, delimiter=' ', names=['date', 'time', 'grav', 'lat', 'lon'],
386
+ usecols=(1, 2, 3, 11, 12), parse_dates=[[0, 1]])
387
+ ndt = [e.tz_localize(timezone.utc) for e in dat['date_time']]
388
+ dat['date_time'] = ndt
389
+ dats.append(dat)
390
+
391
+ return pd.concat(dats, ignore_index=True)
392
+
393
+
394
+ def read_bgm_raw(fp, ship, scale=None, ship_function=None):
395
+ """Read BGM gravity from raw (serial) files (not RGS).
396
+
397
+ This function uses scale factors determined for specific BGM meters
398
+ to convert counts from the raw files to raw gravity. Known scale
399
+ factors are listed in database.toml.
400
+
401
+ :param fp: BGM raw filepath(s)
402
+ :type fp: string or list of strings
403
+ :param ship: ship name
404
+ :type ship: string
405
+ :param scale: BGM counts scaling factor to override database.toml
406
+ :type scale: float, optional
407
+ :param ship_function: user-supplied function for reading/parsing BGM raw files.
408
+ The function should return a pandas.DataFrame with timestamps and counts.
409
+ Look at _bgmserial_Atlantis() and similar functions for examples.
410
+ :type ship_function: function, optional
411
+
412
+ :return: (*pd.DataFrame*) timestamps and calibrated raw gravity values
413
+ """
414
+ moddir = os.path.dirname(__file__)
415
+ import tomli as tm
416
+ with open(os.path.join(moddir, 'database.toml'), 'rb') as f:
417
+ info = tm.load(f)
418
+ # get instrument scaling factors from database.toml
419
+ sc_fac = info['BGM-scale']
420
+
421
+ if scale is not None:
422
+ pass # use provided scale factor if it's there
423
+ else:
424
+ if ship not in sc_fac.keys():
425
+ print('BGM scale factor not known for R/V %s' % ship)
426
+ return -999
427
+ else:
428
+ scale = sc_fac[ship]
429
+
430
+ if type(fp) == str:
431
+ fp = [fp,] # make a list if only one path is given
432
+ dats = []
433
+ for path in fp:
434
+ if ship_function != None:
435
+ dat = ship_function(path)
436
+ else:
437
+ if ship == 'Atlantis':
438
+ dat = _bgmserial_Atlantis(path)
439
+ elif ship == 'Thompson':
440
+ dat = _bgmserial_Thompson(path)
441
+ elif ship == 'Revelle':
442
+ dat = _bgmserial_Revelle(path)
443
+ elif ship == 'Langseth':
444
+ dat = _bgmserial_Langseth(path)
445
+ else: # shouldn't end up here, but just in case:
446
+ print('BGM serial read not yet supported for R/V %s' % ship)
447
+ return -999
448
+ dat['rgrav'] = scale*dat['counts']
449
+ dats.append(dat)
450
+ return pd.concat(dats, ignore_index=True)
451
+
452
+
453
+ def _bgmserial_Atlantis(path):
454
+ """Read a BGM raw (serial) file from Atlantis.
455
+ """
456
+ def count(x): return (
457
+ int(x.split(':')[-1])) # function to parse counts column
458
+ dat = pd.read_csv(path, delimiter=' ', names=['date', 'time', 'counts'], usecols=(1, 2, 4),
459
+ parse_dates=[[0, 1]], converters={'counts': count})
460
+ ndt = [e.tz_localize(timezone.utc)
461
+ for e in dat['date_time']] # timestamps cannot be naive
462
+ dat['date_time'] = ndt
463
+ return dat
464
+
465
+
466
+ def _bgmserial_Thompson(path):
467
+ """Read a BGM raw (serial) file from Thompson.
468
+ """
469
+ def count(x): return (int(x.split(' ')[0].split(':')[-1]))
470
+ dat = pd.read_csv(path, delimiter=',', names=['date', 'time', 'counts'],
471
+ parse_dates=[[0, 1]], converters={'counts': count})
472
+ ndt = [e.tz_localize(timezone.utc) for e in dat['date_time']]
473
+ dat['date_time'] = ndt
474
+ return dat
475
+
476
+
477
+ def _bgmserial_Revelle(path):
478
+ """Read a BGM raw (serial) file from Revelle.
479
+ """
480
+ def count(x): return (int(x.split(':')[-1]))
481
+ dat = pd.read_csv(path, delimiter=' ', names=['date_time', 'counts'], usecols=(0, 1),
482
+ parse_dates=[0], converters={'counts': count})
483
+ ndt = [e.tz_localize(timezone.utc) for e in dat['date_time']]
484
+ dat['date_time'] = ndt
485
+ return dat
486
+
487
+ def _bgmserial_Langseth(path):
488
+ """Read a BGM raw (serial) file from Langseth.
489
+
490
+ vc* file format
491
+ """
492
+ def count(x): return (int(x.split(':')[-1]))
493
+ def dtime(x): return (datetime.strptime(x,'%Y:%j:%H:%M:%S.%f').replace(tzinfo=timezone.utc))
494
+ dat = pd.read_fwf(path, names=['date_time', 'counts'], usecols=(1,2),
495
+ converters={'counts': count,'date_time': dtime})
496
+ return dat
497
+
498
+
499
+ def _despike_bgm_serial(dat, thresh=8000):
500
+ """Clean out counts spikes in BGM data based on a threshold delta(counts).
501
+
502
+ This sometimes works and sometimes doesn't; use at your own risk.
503
+ """
504
+ # find places where counts jump by more than the threshold set
505
+ diff = np.diff(dat.counts)
506
+ meh = np.where(abs(diff) > thresh)[0]
507
+ if len(meh) > 2:
508
+ # for spikes these jumps should be in pairs of +/-
509
+ # we really hope this is the case
510
+ if len(meh) % 2 != 0 or np.any(np.diff(meh)[::2] != 1):
511
+ print('something is weird with bgm despike')
512
+ return dat
513
+
514
+ # assuming spikes *are* in pairs:
515
+ bad_inds = meh[1::2] # second of each pair of indices, hopefully
516
+ dat.drop(bad_inds, axis=0, inplace=True)
517
+ dat.reset_index(inplace=True)
518
+
519
+ return dat
520
+
521
+ ########################################################################
522
+ # DGS i/o ('laptop' and raw)
523
+ ########################################################################
524
+
525
+
526
+ def read_dgs_laptop(fp, ship, ship_function=None):
527
+ """Read DGS 'laptop' file(s), usually written as .dat files.
528
+
529
+ :param fp: filepath(s)
530
+ :type fp: string or list of strings
531
+ :param ship: ship name
532
+ :type ship: string
533
+ :param ship_function: user-defined function for reading a file.
534
+ The function should return a pandas.DataFrame. See _dgs_laptop_general()
535
+ for an example.
536
+ :type ship_function: function, optional
537
+
538
+ :return: *(pd.DataFrame)* DGS output time series
539
+ """
540
+ if type(fp) == str:
541
+ fp = [fp,] # listify
542
+
543
+ dats = []
544
+ for path in fp:
545
+ if ship_function != None:
546
+ dat = ship_function(path)
547
+ else:
548
+ if ship in ['Atlantis', 'Revelle', 'NBP', 'Ride', 'DGStest']:
549
+ dat = _dgs_laptop_general(path)
550
+ elif ship == 'Thompson':
551
+ dat = _dgs_laptop_Thompson(path)
552
+ else:
553
+ print('R/V %s not supported for dgs laptop file read' % ship)
554
+ return -999
555
+
556
+ dats.append(dat) # append the DataFrame for this filepath
557
+
558
+ return pd.concat(dats, ignore_index=True)
559
+
560
+
561
+ def _dgs_laptop_general(path):
562
+ """Read single laptop file for Atlantis, Revelle, NBP, and Ride.
563
+ """
564
+ dat = pd.read_csv(path, delimiter=',', names=['rgrav', 'long_a', 'crss_a', 'status', 've', 'vcc',
565
+ 'al', 'ax', 'lat', 'lon', 'year', 'month', 'day',
566
+ 'hour', 'minute', 'second'],
567
+ usecols=(1, 2, 3, 6, 10, 11, 12, 13, 14, 15, 19, 20, 21, 22, 23, 24))
568
+ dat['date_time'] = pd.to_datetime(
569
+ dat[['year', 'month', 'day', 'hour', 'minute', 'second']], utc=True)
570
+ return dat
571
+
572
+
573
+ def _dgs_laptop_Thompson(path):
574
+ """Read single laptop file for Thompson, which does things its own way.
575
+ """
576
+ dat = pd.read_csv(path, delimiter=',', names=['date', 'time', 'rgrav', 've', 'vcc',
577
+ 'al', 'ax', 'lat', 'lon'],
578
+ usecols=(0, 1, 3, 12, 13, 14, 15, 16, 17),
579
+ parse_dates=[[0, 1]])
580
+ ndt = [e.tz_localize(timezone.utc) for e in dat['date_time']]
581
+ dat['date_time'] = ndt
582
+ return dat
583
+
584
+
585
+ def read_dgs_raw(fp, ship, scale_ccp=True):
586
+ """Read raw (serial) output files from DGS AT1M.
587
+
588
+ These will be in AD units mostly.
589
+ File formatting is assumed to follow what the DGS documentation
590
+ says, though some things may vary by vessel so if this doesn't
591
+ work that's probably why.
592
+
593
+ :param fp: filepath(s)
594
+ :type fp: string or list of strings
595
+ :param ship: ship name
596
+ :type ship: string
597
+
598
+ :return: (*pd.DataFrame*) DGS output time series
599
+ """
600
+
601
+ if type(fp) == str:
602
+ fp = [fp,] # listify
603
+ dats = []
604
+ for ip, path in enumerate(fp):
605
+ if ship == 'Thompson': # always with the special file formats
606
+ dat = _dgs_raw_Thompson(path)
607
+ else: # there might be exceptions besides Thompson but I don't know about them yet
608
+ dat = _dgs_raw_general(path)
609
+
610
+ if scale_ccp:
611
+ # rescale the cross-coupling factors
612
+ dat['ve'] = dat['ve'].mul(0.00001)
613
+ dat['ax'] = dat['ax'].mul(0.00001)
614
+ dat['al'] = dat['al'].mul(0.00001)
615
+ dat['vcc'] = dat['vcc'].mul(0.00001) # -0.000029)
616
+ dats.append(dat)
617
+
618
+ return pd.concat(dats, ignore_index=True)
619
+
620
+
621
+ def _dgs_raw_general(path):
622
+ """Read a DGS raw (serial) file assuming fields are as DGS says they are.
623
+ """
624
+ dat = pd.read_csv(path, delimiter=',', names=['string', 'Gravity', 'Long', 'Cross', 'Beam', 'Temp',
625
+ 'Pressure', 'ElecTemp', 'vcc', 've', 'al', 'ax',
626
+ 'status', 'checksum', 'latitude',
627
+ 'longitude', 'speed', 'course', 'timestamp'],
628
+ usecols=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18))
629
+
630
+ conv_times = True # assume column 18 is actually timestamps
631
+ if str(dat.iloc[0].timestamp).startswith('1'): # 1 Hz, 1 second
632
+ conv_times = False # clock not synced, so don't try to convert to stamp
633
+
634
+ if conv_times: # probably UTC stamps
635
+ new_dates = pd.to_datetime(
636
+ dat['timestamp'], utc=True, format='%Y%m%d%H%M%S')
637
+ dat['date_time'] = new_dates
638
+
639
+ # special case of not synced for timestamp, but stamp might be in string elsewhere
640
+ if not conv_times and not dat.iloc[0]['string'].startswith('$'):
641
+ # split string for possible timestamp
642
+ try:
643
+ times = [e.split(' ')[0] for e in dat['string'].values]
644
+ dat['date_time'] = pd.to_datetime(times, format='ISO8601')
645
+ except:
646
+ print('raw (serial) timestamps not found/converted')
647
+ pass # if it doesn't work, oh well
648
+
649
+ return dat
650
+
651
+
652
+ def _dgs_raw_Thompson(path):
653
+ """Read a DGS raw (serial) file with Thompson conventions.
654
+
655
+ Columns are slightly different from Atlantis and Revelle examples;
656
+ in particular, before the $AT1M string there are date and time
657
+ stamps that are also comma-separated so the csv read used for those
658
+ other ships does not work properly.
659
+ """
660
+ dat = pd.read_csv(path, delimiter=',', names=['date', 'time', 'string', 'Gravity', 'Long', 'Cross',
661
+ 'Beam', 'Temp', 'Pressure', 'ElecTemp', 'vcc', 've',
662
+ 'al', 'ax', 'status', 'checksum',
663
+ 'latitude', 'longitude', 'speed', 'course'],
664
+ usecols=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19))
665
+ dat['stamps'] = [':'.join([e[1]['date'], e[1]['time']])
666
+ for e in dat.iterrows()]
667
+ dat['date_time'] = pd.to_datetime(
668
+ dat['stamps'], utc=True, format='%m/%d/%Y:%H:%M:%S.%f')
669
+ dat.drop('stamps', axis=1, inplace=True)
670
+
671
+ return dat
672
+
673
+ ########################################################################
674
+ # reading other things (MRU etc)
675
+ ########################################################################
676
+
677
+
678
+ def read_other_stuff(yaml_file, data_file, tag):
679
+ """Read a particular feed (eg, $PASHR) from a data file + yaml file.
680
+
681
+ This function parses strings for the desired feed and returns info as a
682
+ pandas.DataFrame with columns named from the corresponding yaml file.
683
+ If there is a column in the feed strings prior to the tag itself, that
684
+ will be included as a 'mystery' dataframe column.
685
+
686
+ A use case is if you want to check coherence between gravity data and
687
+ one or more MRUs
688
+
689
+ :param yaml_file: path to YAML file with info for this feed
690
+ :type yaml_file: string
691
+ :param data_file: path to data file to be read
692
+ :type data_file: string
693
+ :param tag: the name of the feed, with or without the $ prepended
694
+ :type tag: string
695
+
696
+ :return: (*pd.DataFrame*) time series from specified feed
697
+ """
698
+
699
+ if tag.startswith('$'):
700
+ tag = tag[1:]
701
+ dtag = '$' + tag
702
+
703
+ # read yaml, check that tag specs are in this file
704
+ with open(yaml_file, 'r') as f:
705
+ ym = yaml.safe_load(f)
706
+ ym = ym[list(ym.keys())[0]] # skip out of the top level key
707
+ assert tag in ym['format'].keys(
708
+ ), '%s feed specs not present in yaml file %s' % (tag, yaml_file)
709
+
710
+ # check to make sure tag appears in the data file
711
+ with open(data_file, 'rb') as file:
712
+ s = mmap.mmap(file.fileno(), 0, access=mmap.ACCESS_READ)
713
+ result = s.find(bytes(tag, 'ascii'))
714
+ assert result != -1, '%s not in data file %s' % (tag, data_file)
715
+
716
+ # checks done, read in all the lines for this feed
717
+ lines = []
718
+ s.seek(0)
719
+ # get line, bytes->string, strip \r\n from end
720
+ ln = s.readline().decode('ascii').rstrip()
721
+ while s.tell() < s.size(): # when s.tell() == s.size(), we got to the end of the file
722
+ if tag in ln:
723
+ lines.append(ln)
724
+ ln = s.readline().decode('ascii').rstrip() # next!
725
+
726
+ # split the lines at the tag, and then after the tag (fields named in yaml are post-tag)
727
+ # split into pre-tag (might be '') and post-tag
728
+ tagsplit = [e.split(dtag) for e in lines]
729
+ # tagjoin = [','.join(tagsplit[i][0].strip(),tagsplit[i][1].strip(',')) for i in range(len(tagsplit))]
730
+ named_fields = [tagsplit[i][1].strip(',').split(
731
+ ',') for i in range(len(tagsplit))]
732
+
733
+ # make a dataframe, using yaml and including any timestamp-like info that might be before the tag
734
+ # Note that the format of the lines we're working with is usually some variation on:
735
+ # [date/timestamp, maybe], $TAG, comma, separated, fields, listed, in, yaml
736
+ colnames = re.sub('{|}', '', ym['format'][tag]).split(',')[1:]
737
+ df = pd.DataFrame(named_fields, columns=colnames)
738
+ df['mystery'] = [tagsplit[i][0]
739
+ for i in range(len(tagsplit))] # any pre-tag stuff
740
+
741
+ # finally, gather some other column info from the yaml file to output
742
+ col_info = {}
743
+ for c in colnames:
744
+ try:
745
+ col_info[c] = ym['fields'][c.split(':')[0]]
746
+ except KeyError:
747
+ pass
748
+
749
+ return df.apply(pd.to_numeric, errors='ignore'), col_info