hspf 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
hspf/wdm.py ADDED
@@ -0,0 +1,355 @@
1
+ ''' Copyright (c) 2020 by RESPEC, INC.
2
+ Author: Robert Heaphy, Ph.D.
3
+
4
+ Based on MATLAB program by Seth Kenner, RESPEC
5
+ License: LGPL2
6
+ '''
7
+
8
+ import numpy as np
9
+ import pandas as pd
10
+ #from numba import jit, njit
11
+ import datetime
12
+ from pathlib import Path
13
+ # look up attributes NAME, data type (Integer; Real; String) and data length by attribute number
14
+ attrinfo = {1:('TSTYPE','S',4), 2:('STAID','S',16), 11:('DAREA','R',1),
15
+ 17:('TCODE','I',1), 27:('TSBYR','I',1), 28:('TSBMO','I',1),
16
+ 29:('TSBDY','I',1), 30:('TSBHR','I',1), 32:('TFILL', 'R',1),
17
+ 33:('TSSTEP','I',1), 34:('TGROUP','I',1), 45:('STNAM','S',48),
18
+ 83:('COMPFG','I',1), 84:('TSFORM','I',1), 85:('VBTIME','I',1),
19
+ 444:('DATMOD','S',12), 443:('DATCRE','S',12), 22:('DCODE','I',1),
20
+ 10:('DESCRP','S', 80), 7:('ELEV','R',1), 8:('LATDEG','R',1),
21
+ 9:('LNGDEG','R',1), 288:('SCENARIO','S',8), 289:('CONSTITUENT','S',8),
22
+ 290:('LOCATION','S',8)}
23
+
24
+ freq = {7:'100YS', 6:'YS', 5:'MS', 4:'D', 3:'H', 2:'min', 1:'S'} # pandas date_range() frequency by TCODE, TGROUP
25
+
26
+
27
+ class wdmInterface():
28
+ def __init__(self,file_paths:list):
29
+ #self.names = [file_path for file_path in file_paths]
30
+ self.wdms = {Path(file_path).name:hdf5WDM(Path(file_path).with_suffix('.hdf5')) for file_path in file_paths}
31
+ self.filepaths = {Path(file_path).name:Path(file_path) for file_path in file_paths}
32
+
33
+ def series(self, wdm_file,dsn):
34
+ return self.wdms[wdm_file].series(dsn)
35
+
36
+
37
+
38
+ class hdf5WDM():
39
+ def __init__(self,wdm_path:list):
40
+ self.wdm_path = Path(wdm_path)
41
+
42
+ df = pd.read_hdf(wdm_path, '/TIMESERIES/SUMMARY')
43
+ df = df.reset_index(drop=False,names='hdf5_name')
44
+ df.index = df['hdf5_name'].str[2:].astype(int)
45
+ self.summary = df
46
+
47
+ def series(self,dsn):
48
+ hdf5_name = self.summary.loc[dsn,'hdf5_name']
49
+ return pd.read_hdf(self.wdm_path,f'/TIMESERIES/{hdf5_name}')
50
+
51
+
52
+ class WDM():
53
+ def __init__(self,wdmfile):
54
+ self.name = wdmfile
55
+ self._iarray = np.fromfile(wdmfile, dtype=np.int32)
56
+ self._farray = np.fromfile(wdmfile, dtype=np.float32)
57
+
58
+ if self._iarray[0] != -998:
59
+ raise ValueError ('Provided file does not match WDM format. First int32 should be -998.')
60
+ self.nrecords = self._iarray[28] # first record is File Definition Record
61
+ self.ntimeseries = self._iarray[31]
62
+
63
+ dsnlist = {}
64
+ for index in range(512, self.nrecords * 512, 512):
65
+ if not (self._iarray[index]==0 and self._iarray[index+1]==0 and self._iarray[index+2]==0 and self._iarray[index+3]==0) and self._iarray[index+5]==1:
66
+ dsnlist[self._iarray[index+4]] = index
67
+ #dsnlist.append(index)
68
+ if len(dsnlist.keys()) != self.ntimeseries:
69
+ print(f'Warning: Wrong number of Time Series Records found expecting:{self.ntimeseries} found:{len(dsnlist)}')#raise RuntimeError (f'Wrong number of Time Series Records found expecting:{self.ntimeseries} found:{len(dsnlist)}')
70
+
71
+ self.dsnlist = dsnlist
72
+ self.dsns = list(self.dsnlist.keys())
73
+ self.columns = check_columns(self.dsnlist.values(),self._iarray,self._farray)
74
+ self.summary = None #pd.DataFrame(columns = self.columns)
75
+ self.data = {}
76
+
77
+ def series(self,dsn):
78
+ if dsn in self.data.keys():
79
+ return self.data[dsn]
80
+ else:
81
+ summary,series = get_series(self.dsnlist[dsn],self._iarray,self._farray,self.columns)
82
+ self.summary = pd.concat([self.summary,summary])
83
+ self.data[dsn] = series
84
+ return self.data[dsn]
85
+
86
+ def check_columns(dsnlist,iarray,farray):
87
+ # check to see which extra attributes are on each dsn
88
+ columns_to_add = []
89
+ search = ['STAID', 'STNAM', 'SCENARIO', 'CONSTITUENT', 'LOCATION']
90
+ for att in search:
91
+ found_in_all = True
92
+ for index in dsnlist:
93
+ dattr = {}
94
+ psa = iarray[index + 9]
95
+ if psa > 0:
96
+ sacnt = iarray[index + psa - 1]
97
+ for i in range(psa + 1, psa + 1 + 2 * sacnt, 2):
98
+ id = iarray[index + i]
99
+ ptr = iarray[index + i + 1] - 1 + index
100
+ if id not in attrinfo:
101
+ continue
102
+ name, atype, length = attrinfo[id]
103
+ if atype == 'I':
104
+ dattr[name] = iarray[ptr]
105
+ elif atype == 'R':
106
+ dattr[name] = farray[ptr]
107
+ else:
108
+ dattr[name] = ''.join([_inttostr(iarray[k]) for k in range(ptr, ptr + length // 4)]).strip()
109
+ if att not in dattr:
110
+ found_in_all = False
111
+ if found_in_all:
112
+ columns_to_add.append(att)
113
+ return columns_to_add
114
+
115
+
116
+
117
+
118
+
119
+ def get_series(dsn_index,iarray,farray,columns_to_add):
120
+
121
+ date_epoch = np.datetime64(0,'Y')
122
+ dt_year = np.timedelta64(1, 'Y')
123
+ dt_month = np.timedelta64(1, 'M')
124
+ dt_day = np.timedelta64(1, 'D')
125
+ dt_hour = np.timedelta64(1, 'h')
126
+ dt_minute = np.timedelta64(1, 'm')
127
+ dt_second = np.timedelta64(1, 's')
128
+
129
+ index = dsn_index
130
+ # get layout information for TimeSeries Dataset frame
131
+ dsn = iarray[index+4]
132
+ psa = iarray[index+9]
133
+ if psa > 0:
134
+ sacnt = iarray[index+psa-1]
135
+ pdat = iarray[index+10]
136
+ pdatv = iarray[index+11]
137
+
138
+ print(f'{dsn} reading from wdm')
139
+ # get attributes
140
+ dattr = {'TSBDY':1, 'TSBHR':1, 'TSBMO':1, 'TSBYR':1900, 'TFILL':-999.} # preset defaults
141
+ for i in range(psa+1, psa+1 + 2*sacnt, 2):
142
+ id = iarray[index + i]
143
+ ptr = iarray[index + i + 1] - 1 + index
144
+ if id not in attrinfo:
145
+ # print('PROGRAM ERROR: ATTRIBUTE INDEX not found', id, 'Attribute pointer', iarray[index + i+1])
146
+ continue
147
+
148
+ name, atype, length = attrinfo[id]
149
+ if atype == 'I':
150
+ dattr[name] = iarray[ptr]
151
+ elif atype == 'R':
152
+ dattr[name] = farray[ptr]
153
+ else:
154
+ dattr[name] = ''.join([_inttostr(iarray[k]) for k in range(ptr, ptr + length//4)]).strip()
155
+
156
+ # Get timeseries timebase data
157
+ records = []
158
+ offsets = []
159
+ for i in range(pdat+1, pdatv-1):
160
+ a = iarray[index+i]
161
+ if a != 0:
162
+ record, offset = _splitposition(a)
163
+ records.append(record)
164
+ offsets.append(offset)
165
+ # if len(records) == 0:
166
+ # continue
167
+
168
+ # calculate number of data points in each group, tindex is final index for storage
169
+ tgroup = dattr['TGROUP']
170
+ tstep = dattr['TSSTEP']
171
+ tcode = dattr['TCODE']
172
+
173
+ records = np.asarray(records)
174
+ offsets = np.asarray(offsets)
175
+
176
+ dates, values, stop_datetime = _process_groups(iarray, farray, records, offsets, tgroup)
177
+ stop_datetime = datetime.datetime(*_bits_to_date(stop_datetime))
178
+ dates = np.array(dates)
179
+ dates_converted = _date_convert(dates, date_epoch, dt_year, dt_month, dt_day, dt_hour, dt_minute, dt_second)
180
+ series = pd.Series(values, index=dates_converted)
181
+
182
+ try:
183
+ series.index.freq = str(tstep) + freq[tcode]
184
+ except ValueError:
185
+ series.index.freq = None
186
+
187
+ data = [
188
+ str(series.index[0]), str(stop_datetime), str(tstep) + freq[tcode],
189
+ len(series), dattr['TSTYPE'], dattr['TFILL']
190
+ ]
191
+ columns = ['Start', 'Stop', 'Freq','Length', 'TSTYPE', 'TFILL']
192
+ for x in columns_to_add:
193
+ if x in dattr:
194
+ data.append(dattr[x])
195
+ columns.append(x)
196
+
197
+ summary = pd.DataFrame({k:[v] for k,v in zip(columns,data)},index = [dsn])
198
+ return summary,series
199
+
200
+
201
+
202
+ #@njit
203
+ def _splitdate(x):
204
+ year = np.int64(x >> 14)
205
+ month = np.int64(x >> 10 & 0xF)
206
+ day = np.int64(x >> 5 & 0x1F)
207
+ hour = np.int64(x & 0x1F)
208
+ return _correct_date(year, month, day, hour, 0,0)
209
+
210
+ #@njit
211
+ def _splitcontrol(x):
212
+ nval = x >> 16
213
+ ltstep = x >> 10 & 0x3f
214
+ ltcode = x >> 7 & 0x7
215
+ comp = x >> 5 & 0x3
216
+ qual = x & 0x1f
217
+ return nval, ltstep, ltcode, comp, qual
218
+
219
+ #@njit
220
+ def _splitposition(x):
221
+ return((x>>9) - 1, (x&0x1FF) - 1) #args: record, offset
222
+
223
+ #@njit
224
+ def _inttostr(i):
225
+ return chr(i & 0xFF) + chr(i>>8 & 0xFF) + chr(i>>16 & 0xFF) + chr(i>>24 & 0xFF)
226
+
227
+ #@njit
228
+ def _bits_to_date(x):
229
+ year = x >> 26
230
+ month = x >> 22 & 0xf
231
+ day = x >> 17 & 0x1f
232
+ hour = x >> 12 & 0x1f
233
+ minute = x >> 6 & 0x3f
234
+ second = x & 0x3f
235
+ return year, month, day, hour, minute, second
236
+
237
+ #@njit
238
+ def _date_to_bits(year, month, day, hour, minute, second):
239
+ x = year << 26 | month << 22 | day << 17 | hour << 12 | minute << 6 | second
240
+ return x
241
+
242
+ #@njit
243
+ def _increment_date(date, timecode, timestep):
244
+ year, month, day, hour, minute, second = _bits_to_date(date)
245
+
246
+ if timecode == 7: year += 100 * timestep
247
+ elif timecode == 6 : year += timestep
248
+ elif timecode == 5 : month += timestep
249
+ elif timecode == 4 : day += timestep
250
+ elif timecode == 3 : hour += timestep
251
+ elif timecode == 2 : minute += timestep
252
+ elif timecode == 1 : second += timestep
253
+
254
+ return _correct_date(year, month, day, hour, minute, second)
255
+
256
+ #@njit
257
+ def _correct_date(year, month, day, hour, minute, second):
258
+ while second >= 60:
259
+ second -= 60
260
+ minute += 1
261
+ while minute >= 60:
262
+ minute -= 60
263
+ hour += 1
264
+ while hour >= 24:
265
+ hour -= 24
266
+ day += 1
267
+ while day > _days_in_month(year, month):
268
+ day -= _days_in_month(year, month)
269
+ month += 1
270
+ while month > 12:
271
+ month -= 12
272
+ year += 1
273
+ return _date_to_bits(year, month, day, hour, minute, second)
274
+
275
+ #@njit
276
+ def _days_in_month(year, month):
277
+ if month > 12: month %= 12
278
+
279
+ if month in (1,3,5,7,8,10,12):
280
+ return 31
281
+ elif month in (4,6,9,11):
282
+ return 30
283
+ elif month == 2:
284
+ if _is_leapyear(year): return 29
285
+ else: return 28
286
+
287
+ #@njit
288
+ def _is_leapyear(year):
289
+ if year % 400 == 0:
290
+ return True
291
+ if year % 100 == 0:
292
+ return False
293
+ if year % 4 == 0:
294
+ return True
295
+ else:
296
+ return False
297
+
298
+ #@njit
299
+ def _date_convert(dates, date_epoch, dt_year, dt_month, dt_day, dt_hour, dt_minute, dt_second):
300
+ converted_dates = []
301
+ for x in dates:
302
+ year, month, day, hour, minute, second = _bits_to_date(x)
303
+ date = date_epoch
304
+ date += (year - 1970) * dt_year
305
+ date += (month - 1) * dt_month
306
+ date += (day - 1) * dt_day
307
+ date += hour * dt_hour
308
+ date += minute * dt_minute
309
+ date += second * dt_second
310
+ converted_dates.append(date)
311
+ return converted_dates
312
+
313
+ #@njit
314
+ def _process_groups(iarray, farray, records, offsets, tgroup):
315
+ date_array = [0] #need initialize with a type for numba
316
+ value_array = [0.0]
317
+
318
+ for i in range(0,len(records)):
319
+ record = records[i]
320
+ offset = offsets[i]
321
+ index = record * 512 + offset
322
+ pscfwr = iarray[record * 512 + 3] #should be 0 for last record in timeseries
323
+ current_date = _splitdate(iarray[index])
324
+ group_enddate = _increment_date(current_date, tgroup, 1)
325
+ offset +=1
326
+ index +=1
327
+
328
+ while current_date < group_enddate:
329
+ nval, ltstep, ltcode, comp, qual = _splitcontrol(iarray[index])
330
+ #compressed - only has single value which applies to full range
331
+ if comp == 1:
332
+ for i in range(0, nval, 1):
333
+ date_array.append(current_date)
334
+ current_date = _increment_date(current_date, ltcode, ltstep)
335
+ value_array.append(farray[index + 1])
336
+ index += 2
337
+ offset +=2
338
+ else:
339
+ for i in range(0, nval, 1):
340
+ date_array.append(current_date)
341
+ current_date = _increment_date(current_date, ltcode, ltstep)
342
+ value_array.append(farray[index + 1 + i])
343
+ index += 1 + nval
344
+ offset +=1 + nval
345
+
346
+ if offset >= 511:
347
+ offset = 4
348
+ index = (pscfwr - 1) * 512 + offset
349
+ record = pscfwr
350
+ pscfwr = iarray[(record - 1) * 512 + 3] #should be 0 for last record in timeseries
351
+
352
+ date_array = date_array[1:]
353
+ value_array = value_array[1:]
354
+
355
+ return date_array, value_array, group_enddate