hspf 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
hspf/uci.py ADDED
@@ -0,0 +1,643 @@
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ Created on Mon Jul 11 08:39:57 2022
4
+
5
+ @author: mfratki
6
+ """
7
+
8
+
9
+ #lines = reader('C:/Users/mfratki/Documents/Projects/LacQuiParle/ucis/LacQuiParle_0.uci')
10
+ import subprocess
11
+ import numpy as np
12
+ import pandas as pd
13
+ from .parser.parsers import Table
14
+ from .parser.graph import reachNetwork
15
+
16
+ #from hspf_tools.parser import setup
17
+
18
+ from pathlib import Path
19
+
20
+
21
+ parseTable = pd.read_csv(Path(__file__).parent/'data/ParseTable.csv',
22
+ dtype = {'width': 'Int64',
23
+ 'start': 'Int64',
24
+ 'stop': 'Int64',
25
+ 'space': 'Int64'})
26
+
27
+ #timeseriesCatalog = pd.read_csv(Path(__file__).parent/'TimeseriesCatalog.csv')
28
+
29
+ #timeseriesCatalog = pd.read_csv('C:/Users/mfratki/Documents/GitHub/hspf_tools/parser/TimeseriesCatalog.csv')
30
+ # dtype = {'width': 'Int64',
31
+ # 'start': 'Int64',
32
+ # 'stop': 'Int64',
33
+ # 'space': 'Int64'})
34
+ #uci interface
35
+ class UCI():
36
+ def __init__(self, filepath,infer_metzones = True):
37
+ self.filepath = Path(filepath)
38
+ self.name = self.filepath.name.split('.')[0]
39
+ self.lines = reader(filepath)
40
+ self.run_comments = RUN_comments(self.lines)
41
+ self.uci = build_uci(self.lines) # UCI converted into a nested dictionary. # Could convert into a class with only tables?
42
+ self.wdm_paths = self.get_filepaths('.wdm')
43
+ self.hbn_paths = self.get_filepaths('.hbn')
44
+
45
+ # Require to get valid opnids - Business rule
46
+ opnseq = self.table('OPN SEQUENCE')
47
+ self.valid_opnids= {'PERLND': opnseq['SEGMENT'][opnseq['OPERATION'] == 'PERLND'].astype(int).to_list(),
48
+ 'RCHRES': opnseq['SEGMENT'][opnseq['OPERATION'] == 'RCHRES'].astype(int).to_list(),
49
+ 'IMPLND': opnseq['SEGMENT'][opnseq['OPERATION'] == 'IMPLND'].astype(int).to_list(),
50
+ 'GENER' : opnseq['SEGMENT'][opnseq['OPERATION'] == 'GENER'].astype(int).to_list(),
51
+ 'COPY' : opnseq['SEGMENT'][opnseq['OPERATION'] == 'COPY'].astype(int).to_list()}
52
+ self.network = reachNetwork(self)
53
+
54
+ if infer_metzones:
55
+ self.opnid_dict = self.get_metzones()
56
+ self._LSID_flag = 0
57
+
58
+ #compositions or totally separate classes?
59
+ # self.network = network class
60
+ # tableParser - Responsible for converting uci text to and from a pandas dataframe
61
+ # tableUpdater - Responsible for updating individual tables
62
+
63
+
64
+ # def supplemental(self):
65
+ # for block in ['RCHRES','PERLND','IMPLND']:
66
+ # keys = list([key for key in list(self.uci.keys()) if key[0] == block])
67
+ # for key in keys:
68
+ # lines = self.uci[key]
69
+ # for line in lines:
70
+ # if '***' in line:
71
+ # pass
72
+ # elif '~' in line:
73
+ # line.split('~') # assuming there will only ever be 2 ~ in a line
74
+
75
+ def get_parameter(self,parameter):
76
+ raise NotImplementedError()
77
+
78
+
79
+ def table(self,block,table_name = 'na',table_id = 0,drop_comments = True):
80
+ # Dynamic parsing of tables when called by user
81
+ assert block in ['FILES','PERLND','IMPLND','RCHRES','SCHEMATIC','OPN SEQUENCE','MASS-LINK','EXT SOURCES','NETWORK','GENER','MONTH-DATA','EXT TARGETS','COPY']
82
+
83
+ table = self.uci[(block,table_name,table_id)] #[block][table_name][table_id]
84
+ #TODO move the format_opnids into the Table class?
85
+ if table.data is None:
86
+ table.parse()
87
+ if block in ['PERLND','RCHRES','IMPLND','GENER','COPY'] :
88
+ table.replace(format_opnids(table.data,self.valid_opnids[block]))
89
+ elif block in ['EXT SOURCES']:
90
+ table.replace(expand_extsources(table.data,self.valid_opnids))
91
+
92
+ table_data = table.data.copy()
93
+ if drop_comments:
94
+ table_data =table_data[table_data['comments'] == '']
95
+ table_data = table_data.drop('comments',axis = 1)
96
+
97
+ return table_data
98
+
99
+ def _table(self,block,table_name,table_id):
100
+ return self.uci[(block,table_name,table_id)]
101
+
102
+ def replace_table(self,table,block,table_name = 'na',table_id = 0): #replace an entire table
103
+ self.uci[(block,table_name,table_id)].replace(table)
104
+
105
+ def table_lines(self,block,table_name = 'na',table_id = 0):
106
+ return self.uci[(block,table_name,table_id)].lines
107
+
108
+ def comments(block,table_name = None,table_id = 0): # comments of a table
109
+ raise NotImplementedError()
110
+
111
+ def table_names(self,block):
112
+ return list(set([key[1] for key in list(self.uci.keys()) if key[0] == block]))
113
+
114
+ def block_names(self): #blocks present in a particular uci file
115
+ return set([key[0] for key in list(self.uci.keys())])
116
+
117
+ def add_comment(self,comment):
118
+ raise NotImplementedError()
119
+
120
+ def update_table(self,value,operation,table_name,table_id,opnids = None,columns = None,operator = '*',axis = 0):
121
+ # This should be moved up one layer as this is a user/business requirement. I would pass a Table object from this layer (data lyaer?) to the business layer, make changes, then pass it back down to this layer.
122
+ # ensures data has been parsed and allows for determining opnids and column values
123
+ table = self.table(operation,table_name,table_id,True)
124
+
125
+ if opnids is None:
126
+ opnids = table.index
127
+ if columns is None:
128
+ columns = table.columns
129
+
130
+ # Cases where some tables don't have an opnid specified but the timeseries we are comparing might
131
+ # opnids = table.index.intersection(opnids)
132
+
133
+ # simple methods for changing all values by the same value/operator combination
134
+ if operator == 'set':
135
+ self.uci[(operation,table_name,table_id)].set_value(opnids,columns,value, axis)
136
+ elif operator == '*':
137
+ self.uci[(operation,table_name,table_id)].mul(opnids,columns,value, axis)
138
+ elif operator == '/':
139
+ self.uci[(operation,table_name,table_id)].div(opnids,columns,value, axis)
140
+ elif operator == '-':
141
+ self.uci[(operation,table_name,table_id)].sub(opnids,columns,value, axis)
142
+ elif operator == '+':
143
+ self.uci[(operation,table_name,table_id)].add(opnids,columns,value, axis)
144
+ elif operator == 'chuck':
145
+ assert(table_name in ['MON-IFLW-CONC','MON-GRND-CONC'])
146
+ values = chuck(value,table).loc[opnids,columns]
147
+ self.uci[(operation,table_name,table_id)].set_value(opnids,columns,values)
148
+ else:
149
+ print('Select valid operator (set,*,/,-,+')
150
+
151
+ def merge_lines(self): # write uci to a txt file
152
+ lines = ['RUN']
153
+ lines += self.run_comments
154
+
155
+ # properly ordered blocks
156
+ blocks = {}
157
+ for key in self.uci.keys():
158
+ if key[0] in blocks.keys():
159
+ blocks[key[0]].append(key)
160
+ else:
161
+ blocks[key[0]] = [key]
162
+
163
+ for block,keys in blocks.items():
164
+ lines += [block]
165
+ for key in keys:
166
+ table = self.uci[key]
167
+ if key[1] == 'na':
168
+ lines += table.lines
169
+ else:
170
+ lines += [table.header]
171
+ lines += table.lines
172
+ lines += [table.footer]
173
+ lines += ['']
174
+
175
+ lines += ['END ' + block]
176
+ lines += ['']
177
+ lines += ['END RUN']
178
+ self.lines = lines
179
+
180
+
181
+ def write(self,new_uci_path):
182
+ self.merge_lines()
183
+ with open(new_uci_path, 'w') as the_file:
184
+ for line in self.lines:
185
+ the_file.write(line+'\n')
186
+
187
+ def update_bino(self,name):
188
+ #TODO: Move up to busniess/presentation layer
189
+ table = self.table('FILES',drop_comments = False) # initialize the table
190
+ indexs = table[table['FTYPE'] == 'BINO'].index
191
+ for index in indexs:
192
+ table.loc[index,'FILENAME'] = name + '-' + table.loc[index,'FILENAME'].split('-')[-1]
193
+ self.replace_table(table,'FILES')
194
+ #self.uci[('FILES','na',0)].set_value(index,'FILENAME',filename)
195
+
196
+ def get_metzones(self):
197
+ '''
198
+ Only keeps reaches that are recieving meteorlogical inputs.
199
+
200
+ '''
201
+ operations = ['PERLND','IMPLND','RCHRES']
202
+ dic = {}
203
+
204
+ extsrc = self.table('EXT SOURCES')
205
+ # GROUP = 'EXTNL'
206
+ # DOMAIN = 'MET'
207
+ # tmemns = timeseriesCatalog.loc[(timeseriesCatalog['Domain'] == 'MET') & (timeseriesCatalog['Group'] == 'EXTNL'),'Member'].str.strip().to_list()
208
+
209
+ # All metzones assuming every implnd,perlnd, and rchres recives precip input
210
+ metzones = extsrc.loc[(extsrc['TMEMN'] == 'PREC') & (extsrc['TVOL'].isin(operations)),'SVOLNO'].sort_values().unique()
211
+ metzone_map = {metzone:num for num,metzone in zip(range(len(metzones)),metzones)}
212
+
213
+
214
+
215
+ for operation in operations:
216
+ opnids = extsrc.loc[(extsrc['TMEMN'].isin(['PREC'])) & (extsrc['TVOL'] == operation),['TOPFST','SVOLNO']]
217
+ opnids = opnids.drop_duplicates(subset = 'TOPFST')
218
+ opnids['metzone'] = opnids['SVOLNO'].map(metzone_map).values
219
+ opnids.set_index(['TOPFST'],inplace = True)
220
+
221
+ # Only keep opnids that are recieving preciptiation inputs.
222
+ geninfo = self.table(operation,'GEN-INFO')
223
+ geninfo = geninfo.loc[ list(set(geninfo.index).intersection(set(opnids.index)))] .reset_index()
224
+ geninfo = geninfo.drop_duplicates(subset = 'OPNID').sort_values(by = 'OPNID')
225
+ if operation == 'RCHRES':
226
+ opnids.loc[geninfo['OPNID'],['RCHID','LKFG']] = pd.NA
227
+ opnids['RCHID'] = geninfo['RCHID'].to_list()
228
+ opnids['LKFG'] = geninfo['LKFG'].to_list()
229
+ else:
230
+ landcovers = geninfo['LSID'].unique()
231
+ landcover_map = {landcover:num for num,landcover in zip(range(len(landcovers)),landcovers)}
232
+ opnids['LSID'] = pd.NA
233
+ opnids.loc[geninfo['OPNID'],'LSID'] = geninfo['LSID'].to_list() # index of opnid is the OPNID
234
+ opnids['landcover'] = opnids['LSID'].map(landcover_map).values
235
+
236
+
237
+
238
+ dic[operation] = opnids
239
+ return dic
240
+
241
+
242
+ # Convience methods. TODO: put in separate module that takes uci object as input. Should not be instance method
243
+ def get_filepaths(self,file_extension):
244
+ files = self.table('FILES')
245
+ filepaths = files.loc[(files['FILENAME'].str.endswith(file_extension.lower())) | (files['FILENAME'].str.endswith(file_extension.upper())),'FILENAME'].to_list()
246
+ filepaths = [self.filepath.parent.joinpath(filepath) for filepath in filepaths]
247
+ return filepaths
248
+
249
+ def get_dsns(self,operation,opnid,smemn):
250
+ dsns = self.table('EXT SOURCES')
251
+ assert (smemn in dsns['SMEMN'].unique())
252
+ dsns = dsns.loc[(dsns['TVOL'] == operation) & (dsns['TOPFST'] == opnid) & (dsns['SMEMN'] == smemn)]
253
+ files = self.table('FILES').set_index('FTYPE')
254
+ dsns.loc[:,'FILENAME'] = files.loc[dsns['SVOL'],'FILENAME'].values
255
+ dsns = dsns[['FILENAME','SVOLNO','SMEMN','TOPFST','TVOL']]
256
+ return dsns
257
+
258
+
259
+ def initialize(self,name = None, default_output = 4,n=5,reach_ids = None):
260
+ if name is None:
261
+ name = self.name
262
+
263
+ setup_files(self,name,n)
264
+ setup_geninfo(self)
265
+ setup_binaryinfo(self,default_output = default_output,reach_ids = reach_ids)
266
+ setup_qualid(self)
267
+
268
+ def initialize_binary_info(self,default_output = 4,reach_ids = None):
269
+ setup_binaryinfo(self,default_output = default_output,reach_ids = reach_ids)
270
+
271
+
272
+ def build_targets(self):
273
+ geninfo = self.table('PERLND','GEN-INFO')
274
+ targets = self.opnid_dict['PERLND'].loc[:,['LSID','landcover']] #.drop_duplicates(subset = 'landcover').loc[:,['LSID','landcover']].reset_index(drop = True)
275
+ targets.columns = ['LSID','lc_number']
276
+ schematic = self.table('SCHEMATIC')
277
+ schematic = schematic.astype({'TVOLNO': int, "SVOLNO": int, 'AFACTR':float})
278
+ schematic = schematic[(schematic['SVOL'] == 'PERLND')]
279
+ schematic = schematic[(schematic['TVOL'] == 'PERLND') | (schematic['TVOL'] == 'IMPLND') | (schematic['TVOL'] == 'RCHRES')]
280
+ areas = []
281
+ for lc_number in targets['lc_number'].unique():
282
+ areas.append(np.sum([schematic['AFACTR'][schematic['SVOLNO'] == perland].sum() for perland in targets.index[targets['lc_number'] == lc_number]]))
283
+ areas = np.array(areas)
284
+
285
+
286
+ lc_number = targets['lc_number'].drop_duplicates()
287
+ uci_names = geninfo.loc[targets['lc_number'].drop_duplicates().index]['LSID']
288
+ targets = pd.DataFrame([uci_names.values,lc_number.values,areas]).transpose()
289
+ targets.columns = ['uci_name','lc_number','area']
290
+ targets['npsl_name'] = ''
291
+
292
+ targets[['TSS','N','TKN','OP','BOD']] = ''
293
+
294
+ targets['dom_lc'] = ''
295
+ targets.loc[targets['area'].astype('float').argmax(),'dom_lc'] = 1
296
+ return targets
297
+
298
+
299
+ #TODO: More conveince methods that should probably be in a separate module
300
+
301
+ def run_model(uci_file):
302
+ winHSPF = str(Path(__file__).resolve().parent.parent) + '\\bin\\WinHSPFLt\\WinHspfLt.exe'
303
+ subprocess.run([winHSPF,uci_file.as_posix()]) #, stdout=subprocess.PIPE, creationflags=0x08000000)
304
+
305
+ def get_filepaths(uci,file_extension):
306
+ files = uci.table('FILES')
307
+ filepaths = files.loc[(files['FILENAME'].str.endswith(file_extension.lower())) | (files['FILENAME'].str.endswith(file_extension.upper())),'FILENAME'].to_list()
308
+ filepaths = [uci.filepath.parent.joinpath(filepath) for filepath in filepaths]
309
+ return filepaths
310
+
311
+
312
+
313
+ def setup_files(uci,name,n = 5):
314
+ table = uci.table('FILES',drop_comments = False)
315
+ for index, row in table.iterrows():
316
+ filename = Path(row['FILENAME'])
317
+ if filename.suffix in ['.wdm','.ech','.out']:
318
+ table.loc[index,'FILENAME'] = filename.name
319
+ if filename.suffix in ['.hbn']:
320
+ table.loc[index,'FILENAME'] = filename.name
321
+ if filename.suffix in ['.plt']:
322
+ table.drop(index,inplace = True)
323
+
324
+ # Get new binary number and create new BINO rows
325
+ bino_nums = []
326
+ invalid = table['UNIT'].values
327
+ for num in range(15,100):
328
+ if num not in invalid:
329
+ bino_nums.append(num)
330
+ if len(bino_nums) == n:
331
+ break
332
+
333
+ binary_names = [name + '-' + str(num) + '.hbn' for num in range(len( bino_nums))]
334
+ rows = [['BINO',bino_num,binary_name,''] for bino_num,binary_name in zip(bino_nums,binary_names)]
335
+ rows = pd.DataFrame(rows, columns = table.columns).astype({'FTYPE':'string','UNIT':'Int64','FILENAME':'string','comments':'string'} )
336
+ # Drop old BINO rows and insert new BINO rows
337
+ table = table.loc[table['FTYPE'] != 'BINO'].reset_index(drop=True)
338
+ rows = pd.DataFrame(rows, columns = table.columns).astype(table.dtypes) #{'FTYPE':'string','UNIT':'Int64','FILENAME':'string','comments':'string'} )
339
+ table = pd.concat([table,rows])
340
+ table.reset_index(drop=True,inplace=True)
341
+
342
+ # Update table in the uci
343
+ uci.replace_table(table,'FILES')
344
+
345
+
346
+
347
+ def setup_geninfo(uci):
348
+ # Initialize Gen-Info
349
+ bino_nums = uci.table('FILES').set_index('FTYPE').loc['BINO','UNIT'].tolist()
350
+ if isinstance(bino_nums,int): #Pands is poorly designed. Why would tolist not return a goddamn list...?
351
+ bino_nums = [bino_nums]
352
+
353
+ #opnids = uci.table(operation,'GEN-INFO').index
354
+ for operation in ['RCHRES','PERLND','IMPLND']:
355
+ opnids = np.array_split(uci.table(operation,'GEN-INFO').index.to_list(),len(bino_nums))
356
+
357
+ for opnid,bino_num in zip(opnids,bino_nums):
358
+ if operation == 'RCHRES': #TODO convert BUNITE to BUNIT1 to get rid of this if statement
359
+ uci.update_table(bino_num,'RCHRES','GEN-INFO',0,opnids = opnid,columns = 'BUNITE',operator = 'set')
360
+ else:
361
+ uci.update_table(bino_num,operation,'GEN-INFO',0,opnids = opnid,columns = 'BUNIT1',operator = 'set')
362
+
363
+ def setup_binaryinfo(uci,default_output = 4,reach_ids = None):
364
+ # Initialize Binary-Info
365
+ uci.update_table(default_output,'PERLND','BINARY-INFO',0,
366
+ columns = ['AIRTPR', 'SNOWPR', 'PWATPR', 'SEDPR', 'PSTPR', 'PWGPR', 'PQALPR','MSTLPR', 'PESTPR', 'NITRPR', 'PHOSPR', 'TRACPR'],
367
+ operator = 'set')
368
+ uci.update_table(default_output,'IMPLND','BINARY-INFO',0,
369
+ columns = ['ATMPPR', 'SNOWPR', 'IWATPR', 'SLDPR', 'IWGPR', 'IQALPR'],
370
+ operator = 'set')
371
+ uci.update_table(default_output,'RCHRES','BINARY-INFO',0,
372
+ columns = ['HYDRPR', 'ADCAPR', 'CONSPR', 'HEATPR', 'SEDPR', 'GQLPR', 'OXRXPR', 'NUTRPR', 'PLNKPR', 'PHCBPR'],
373
+ operator = 'set')
374
+
375
+ uci.update_table(default_output,'PERLND','BINARY-INFO',0,columns = ['SNOWPR','SEDPR','PWATPR','PQALPR'],operator = 'set')
376
+ uci.update_table(default_output,'IMPLND','BINARY-INFO',0,columns = ['SNOWPR','IWATPR','SLDPR','IQALPR'],operator = 'set')
377
+ uci.update_table(default_output,'RCHRES','BINARY-INFO',0,columns = ['HYDRPR','SEDPR','HEATPR','OXRXPR','NUTRPR','PLNKPR'],operator = 'set')
378
+ if reach_ids is not None:
379
+ uci.update_table(2,'RCHRES','BINARY-INFO',0,columns = ['SEDPR','OXRXPR','NUTRPR','PLNKPR','HEATPR','HYDRPR'],opnids = reach_ids,operator = 'set')
380
+
381
+
382
+ def setup_qualid(uci):
383
+ #### Standardize QUAL-ID Names
384
+ # Perlands
385
+ uci.update_table('NH3+NH4','PERLND','QUAL-PROPS',0,columns = 'QUALID',operator = 'set')
386
+ uci.update_table('NO3','PERLND','QUAL-PROPS',1,columns = 'QUALID',operator = 'set')
387
+ uci.update_table('ORTHO P','PERLND','QUAL-PROPS',2,columns = 'QUALID',operator = 'set')
388
+ uci.update_table('BOD','PERLND','QUAL-PROPS',3,columns = 'QUALID',operator = 'set')
389
+
390
+ # Implands
391
+ uci.update_table('NH3+NH4','IMPLND','QUAL-PROPS',0,columns = 'QUALID',operator = 'set')
392
+ uci.update_table('NO3','IMPLND','QUAL-PROPS',1,columns = 'QUALID',operator = 'set')
393
+ uci.update_table('ORTHO P','IMPLND','QUAL-PROPS',2,columns = 'QUALID',operator = 'set')
394
+ uci.update_table('BOD','IMPLND','QUAL-PROPS',3,columns = 'QUALID',operator = 'set')
395
+
396
+
397
+
398
+
399
+ def chuck(adjustment,table):
400
+ # If increasing monthly concentration increase the minimum concnetration value of Mi and Mi+1
401
+ # If decreasing monthly concentration decrease the maximum concnetration value of Mi and Mi+1
402
+ # If concnetration values are equal increase both equally
403
+ table['dummy'] = table.iloc[:,0]
404
+ zero_table = table.copy()*0
405
+ count_table = zero_table.copy()
406
+ for index, value in enumerate(adjustment):
407
+ next_index = index+1
408
+ if value > 1:
409
+ for row,(a,b) in enumerate(zip(table.iloc[:,index].values, table.iloc[:,next_index].values)):
410
+ zero_table.iloc[row,index+np.nanargmin([a,b])] += np.nanmin([a,b])*value
411
+ count_table.iloc[row,index+np.nanargmin([a,b])] += 1
412
+ elif value < 1:
413
+ for row,(a,b) in enumerate(zip(table.iloc[:,index].values, table.iloc[:,next_index].values)):
414
+ zero_table.iloc[row,index+np.nanargmax([a,b])] += np.nanmax([a,b])*value
415
+ count_table.iloc[row,index+np.nanargmax([a,b])] += 1
416
+
417
+
418
+ zero_table.drop('dummy',axis=1,inplace=True)
419
+ count_table.drop('dummy',axis=1,inplace=True)
420
+
421
+ zero_table[count_table == 0] = table[count_table==0]
422
+ count_table[count_table == 0] = 1
423
+ zero_table = zero_table/count_table
424
+ return zero_table
425
+
426
+
427
+
428
+
429
+ # Expanding opnid-opnid in tables
430
+ def format_opnids(table,valid_opnids):
431
+ table = table.reset_index()
432
+ indexes = table.loc[table[~(table['OPNID'] == '')].index,'OPNID']
433
+ for index, value in indexes.items():
434
+ try:
435
+ #table.loc[index,'OPNID'] = int(value[0])
436
+ int(value)
437
+ except ValueError:
438
+ value = value.split()
439
+ opnids = np.arange(int(value[0]),int(value[1])+1)
440
+ opnids = [opnid for opnid in opnids if opnid in valid_opnids]
441
+ if len(opnids) == 0: # incase the x-x mapping covers no valid opnids
442
+ table.drop(index,inplace = True)
443
+ else:
444
+ df = pd.DataFrame([table.loc[index]]*len(opnids))
445
+ df['OPNID'] = opnids
446
+ # The insertion method takes advantage of the fact
447
+ # that Pandas does not automatically reset indexes.
448
+ table = insert_rows(index,table,df,reset_index = False)
449
+
450
+
451
+ #table.loc[table.index[table['OPNID'] == ''],'OPNID'] = pd.NA
452
+ table['OPNID'] = pd.to_numeric(table['OPNID']).astype('Int64')
453
+
454
+
455
+ # Only keep rows that are being simulated
456
+ table = table.loc[(table['OPNID'].isin(valid_opnids)) | (table['OPNID'].isna())]
457
+ table = table.set_index('OPNID',drop = True)
458
+ return table
459
+
460
+ def expand_extsources(data,valid_opnids):
461
+ start_column = 'TOPFST'
462
+ end_column = 'TOPLST'
463
+ indexes = data.loc[~data[end_column].isna()]#[[start_column,end_column,'']]
464
+
465
+ for index, row in indexes.iterrows():
466
+ opnids = np.arange(int(row[start_column]),int(row[end_column])+1)
467
+ opnids = [opnid for opnid in opnids if opnid in valid_opnids[row['TVOL']]]
468
+
469
+ if len(opnids) == 0: # incase the x-x mapping covers no valid opnids
470
+ data.drop(index,inplace = True)
471
+ else:
472
+ df = pd.DataFrame([data.loc[index]]*len(opnids))
473
+ df[start_column] = opnids
474
+ df[end_column] = pd.NA
475
+ df = df.astype(data.dtypes.to_dict())
476
+ # The insertion method takes advantage of the fact
477
+ # that Pandas does not automatically reset indexes.
478
+ data = insert_rows(index,data,df,reset_index = False)
479
+
480
+
481
+ #table.loc[table.index[table['OPNID'] == ''],'OPNID'] = pd.NA
482
+ data[start_column] = pd.to_numeric(data[start_column]).astype('Int64')
483
+ data[end_column] = pd.to_numeric(data[end_column]).astype('Int64')
484
+ data = data.reset_index(drop = True)
485
+
486
+ opnids = sum(list(valid_opnids.values()), []) #Note slow method for collapsing lists but fine for this case
487
+ data = data.loc[(data['TOPFST'].isin(opnids) )| (data['TOPFST'].isna())]
488
+
489
+ # Only keep rows that are being simulated
490
+ for operation in valid_opnids.keys():
491
+ data = data.drop(data.loc[(data['TVOL'] == operation) & ~(data['TOPFST'].isin(valid_opnids[operation]))].index)
492
+
493
+ return data
494
+
495
+
496
+ def insert_rows(insertion_point,a,b,drop = True,reset_index = True):
497
+ if drop: a = a.drop(insertion_point)
498
+ df = pd.concat([a.loc[:insertion_point], b, a.loc[insertion_point:]])
499
+ if reset_index: df = df.reset_index(drop=True)
500
+ return df
501
+
502
+
503
+
504
+
505
+ def keep_valid_opnids(table,opnid_column,valid_opnids):
506
+ table = table.reset_index(drop = True)
507
+ valid_indexes = [table.index[(table[opnid_column].isin(valid_opnids[operation])) & (table['TVOL'] == operation)] for operation in valid_opnids.keys()]
508
+ valid_indexes.append(table.index[table['comments'] != ''])
509
+ table = pd.concat([table.loc[valid_index] for valid_index in valid_indexes])
510
+ table = table.sort_index().reset_index(drop=True)
511
+ return table
512
+
513
+
514
+ def RUN_comments(lines):
515
+ # assuems no blank lines (ie lines have been read in using the reader function)
516
+ comments = []
517
+
518
+ RUN_start = lines.index('RUN')
519
+ if RUN_start > 0:
520
+ comment_lines = lines[:RUN_start]
521
+ else:
522
+ comment_lines = lines[1:]
523
+
524
+ for line in comment_lines:
525
+ if '***' in line:
526
+ comments.append(line)
527
+ else:
528
+ if any(c.isalpha() for c in line):
529
+ break
530
+ return comments
531
+
532
+ # Functions for converting the uci text file into a dictionary structure made up of my custom Table class
533
+ def reader(filepath):
534
+ # simple reader to return non blank, non comment and proper length lines
535
+
536
+ #TODO: Address this encoding issue that seems pretty common across our text files.
537
+ # It's not a huge deal since we are using ASCII and no information will be lost.
538
+ with open(filepath, encoding="utf-8",errors="ignore") as fp:
539
+
540
+ lines = []
541
+ content = fp.readlines()
542
+ for line in content:
543
+ if line.strip():
544
+ if '***' in line:
545
+ lines.append(line.rstrip())
546
+ else:
547
+ lines.append(line[:80].rstrip())
548
+ return lines
549
+
550
+ def decompose_perlands(metzones,landcovers):
551
+ perlands = {}
552
+ for metzone in metzones:
553
+ metzone = int(metzone)
554
+ for landcover in landcovers:
555
+ landcover = int(landcover)
556
+ perlands[metzone+landcover] = (metzone,landcover)
557
+ return perlands
558
+
559
+ def split_number(s):
560
+ head = s.rstrip('0123456789')
561
+ tail = s[len(head):]
562
+ return head.strip(), tail
563
+
564
+ #TODO merge the get_blocks and build_uci into a single function to reduce number of for loops
565
+ def get_blocks(lines):
566
+ dic = {}
567
+ shift = len(lines)-1
568
+ for index,line in enumerate(reversed(lines)):
569
+ if '***' in line:
570
+ pass
571
+ else:
572
+ line,number = split_number(line.strip()) # Sensitive method to separate numbers
573
+ line_strip = line.strip() + number
574
+ if line_strip.startswith('END'):
575
+ if (line_strip[4:] in parseTable['block'].values): # | (line_strip[4:] in structure['block'].values):
576
+ current_name = line_strip[4:]
577
+ dic[current_name] = {}
578
+ dic[current_name]['indcs'] = [shift-index]
579
+ #names.append(current_name)
580
+ #start_indcs.append(shift - index)
581
+ #table_id.append(number)
582
+ elif line_strip == current_name: #line_strip.startswith(current_name):
583
+ dic[current_name]['indcs'].append(shift-index)
584
+ #end_indcs.append(shift - index)
585
+
586
+ # df = pd.DataFrame([names,table_id,start_indcs,end_indcs]).transpose()
587
+ # df.columns = ['name','id','start','stop']
588
+ return dic
589
+
590
+ def build_uci(lines):
591
+ blocks = get_blocks(lines)
592
+ current_name = None
593
+ keys = []
594
+ tables = []
595
+ for k,v in blocks.items():
596
+ if 'na' in parseTable[parseTable['block']==k]['table'].unique():
597
+ table = Table(k,'na')
598
+ table.lines = lines[v['indcs'][1]:v['indcs'][0]+1][1:-1]
599
+ table.footer = lines[v['indcs'][1]:v['indcs'][0]+1][1]
600
+ table.header = lines[v['indcs'][1]:v['indcs'][0]+1][-1]
601
+ table.data = None
602
+ table.indcs = v['indcs'][1]+1
603
+ keys.append([k,'na'])
604
+ tables.append(table)
605
+ else:
606
+ #block_lines = lines[v['indcs'][1]+1:v['indcs'][0]]
607
+ for index,line in enumerate(reversed(lines[v['indcs'][1]+1:v['indcs'][0]])):
608
+ if '***' in line:
609
+ pass
610
+ else:
611
+ split_line,number = split_number(line.strip()) # Sensitive method to separate numbers
612
+ line_strip = split_line.strip()
613
+ if line_strip.startswith('END'):
614
+ if (line_strip[4:] in parseTable['table'].values) | (line_strip[4:]+number in parseTable['table'].values):
615
+ current_name = (line_strip[4:] + number).strip()
616
+ current_name_len = len(current_name)
617
+ start = v['indcs'][0]-index
618
+ #else: print(line)
619
+ elif (line_strip + number).strip()[0:current_name_len] == current_name: #line_strip.startswith(current_name):
620
+ end = v['indcs'][0]-index-1
621
+ table = Table(k,current_name)
622
+ table.lines = lines[end+1:start-1]
623
+ table.header = lines[end]
624
+ table.footer = lines[start-1]
625
+ table.data = None
626
+ table.indcs = end+1
627
+
628
+ keys.append([k,current_name])
629
+ tables.append(table)
630
+ current_name = None
631
+ current_name_len = None
632
+
633
+ # Cumulative count of duplicate key names as some tables appear multiple times within a block
634
+ # Since I am looping through the uci file backwards I have to ensure the order of the duplicate
635
+ # tables are properly labeled in the correct order they appear from top to bottom in the uci file.
636
+ keys.reverse()
637
+ tables.reverse()
638
+ # Can't find a base python method for cumulative counting elements. collections.Counter only sums the duplicates
639
+ table_ids = list(pd.DataFrame(keys).groupby(by=[0,1]).cumcount())
640
+ ordered_keys = [(key[0],key[1],table_id) for key,table_id in zip(keys,table_ids)]
641
+ dic = dict(zip(ordered_keys,tables))
642
+ return dic
643
+