hspf 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
hspf/parser/parsers.py ADDED
@@ -0,0 +1,516 @@
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ Created on Fri Oct 7 12:13:23 2022
4
+
5
+ @author: mfratki
6
+ """
7
+
8
+ from abc import abstractmethod
9
+ import numpy as np
10
+ import pandas as pd
11
+ from pathlib import Path
12
+ import math
13
+
14
+ # Read in table used to parse UCI file #RespectRespec
15
+ parseTable = pd.read_csv(Path(__file__).parent.parent/'data/ParseTable.csv',
16
+ dtype = {'width': 'Int64',
17
+ 'start': 'Int64',
18
+ 'stop': 'Int64',
19
+ 'space': 'Int64'})
20
+ # parseTable = pd.read_csv('C:/Users/mfratki/Documents/GitHub/hspf_tools/parser/ParseTable.csv',
21
+ # dtype = {'width': 'Int64',
22
+ # 'start': 'Int64',
23
+ # 'stop': 'Int64',
24
+ # 'space': 'Int64'})
25
+ # Parser ipmlementation
26
+ class Parser:
27
+ @abstractmethod
28
+ def parse(self):
29
+ pass
30
+
31
+ @abstractmethod
32
+ def write(self):
33
+ pass
34
+
35
+
36
+ class Table():
37
+ def __init__(self,block,name,table_id = 0,activity = None,dtypes = None,columns = None,widths = None):
38
+ self.name = name
39
+ self.id = table_id
40
+ self.activity = activity
41
+ self.block = block
42
+ self.dtypes = dtypes
43
+ self.columns = columns
44
+ self.widths = widths
45
+ self.data = None
46
+ self.comments = None
47
+ self.lines = None
48
+ self.header = None
49
+ self.footer = None
50
+ self.supplemental = False
51
+
52
+
53
+ self.parser = parserSelector[self.block]
54
+ #self.updater = Updater
55
+
56
+ def parse(self):
57
+ self.data = self.parser.parse(self.block,self.name,self.lines)
58
+
59
+ def write(self): # specify values
60
+ self.lines = self.parser.write(self.block,self.name,self.data)
61
+
62
+ def replace(self,data): #replace an entire table
63
+ self.data = data.copy()
64
+ self.write()
65
+
66
+ def set_value(self,rows,columns,value,axis = 0):
67
+ self.data.loc[rows,columns] = value
68
+ self.write()
69
+
70
+ def mul(self,rows,columns,value,axis = 0):
71
+ self.data.loc[rows,columns] = self.data.loc[rows,columns].mul(value,axis)
72
+ self.write()
73
+
74
+ def add(self,rows,columns,value,axis = 0):
75
+ self.data.loc[rows,columns] = self.data.loc[rows,columns].add(value,axis)
76
+ self.write()
77
+
78
+ def sub(self,rows,columns,value,axis = 0 ):
79
+ self.data.loc[rows,columns] = self.data.loc[rows,columns].sub(value,axis)
80
+ self.write()
81
+
82
+ def div(self,rows,columns,value,axis = 0):
83
+ self.data.loc[rows,columns] = self.data.loc[rows,columns].div(value,axis)
84
+ self.write()
85
+
86
+
87
+ # table header rules
88
+
89
+ # standard: line[6:].strip()
90
+ # mass-link: line[6:].strip() + ' ' + str(number) #5 spaces
91
+ # ftables: line[6:].strip() + ' ' + str(number) #4 spaces
92
+ # month-data: line[6:].strip() + ' ' + str(number) #5 spaces
93
+ # def format_table_name(block,table_name):
94
+ # text,number = split_number(table_name.strip())
95
+ # if block == 'MASS-LINK':
96
+ # header = text + ' '*5 + str(number)
97
+ # footer = text + ' ' + str(number)
98
+ # name = str(number)
99
+ # elif block == 'FTABLES':
100
+ # header = text + ' '*4 + str(number)
101
+ # footer = text + str(number)
102
+ # name = str(number)
103
+ # elif block == 'MONTH-DATA':
104
+ # header = text + ' '*5 + str(number)
105
+ # footer = text + ' ' + str(number)
106
+ # name = str(number)
107
+ # else:
108
+ # header = text+number
109
+ # footer = text+number
110
+ # name = text+number
111
+ # return header,footer,text,name
112
+
113
+
114
+ class defaultParser(Parser):
115
+ def parse(block,table,lines):
116
+ raise NotImplementedError()
117
+
118
+ def write(block,table,lines):
119
+ raise NotImplementedError()
120
+
121
+ class standardParser(Parser):
122
+ def parse(block,table_name,table_lines):
123
+ column_names,dtypes,starts,stops = delimiters(block,table_name)
124
+ table = parse_lines(table_lines,starts,stops,dtypes)
125
+ table = column_dtypes(table,dtypes,column_names)
126
+ return table
127
+
128
+ def write(block,table_name,table):
129
+ # Assumes all tables start with two indented spaces
130
+ # spaces = ' '
131
+ # if table_name == 'na':
132
+ # spaces = ''
133
+ #table[table.columns[0]] = spaces + table[table.columns[0]].astype(str)
134
+ column_names,dtypes,starts,stops = delimiters(block,table_name)
135
+ table_list = table.values.tolist() #This conversion will likely cause a bug
136
+ table_lines = ['']*len(table_list)
137
+ for index,line in enumerate(table_list):
138
+ if line[-1] == '':
139
+ table_lines[index] = format_line(line,starts,stops,dtypes)
140
+ else:
141
+ table_lines[index] = line[-1]
142
+
143
+ return table_lines
144
+
145
+ class operationsParser(Parser):
146
+ def parse(block,table_name,table_lines):
147
+ column_names,dtypes,starts,stops = delimiters(block,table_name)
148
+ table = parse_lines(table_lines,starts,stops,dtypes)
149
+ table = column_dtypes(table,dtypes,column_names)
150
+ table = table.set_index('OPNID')
151
+ return table
152
+
153
+ def write(block,table_name,table):
154
+ # Assumes all tables start with two indented spaces
155
+ # spaces = ' '
156
+ # if table_name == 'na':
157
+ # spaces = ''
158
+ #table[table.columns[0]] = spaces + table[table.columns[0]].astype(str)
159
+ table = table.reset_index()
160
+ column_names,dtypes,starts,stops = delimiters(block,table_name)
161
+ table_list = table.values.tolist() #This conversion will likely cause a bug
162
+ table_lines = ['']*len(table_list)
163
+ for index,line in enumerate(table_list):
164
+ if line[-1] == '':
165
+ table_lines[index] = format_line(line,starts,stops,dtypes)
166
+ else:
167
+ table_lines[index] = line[-1]
168
+
169
+ return table_lines
170
+
171
+
172
+ class opnsequenceParser(Parser):
173
+ def parse(block,table_name,table_lines):
174
+ '''
175
+ Function for parsing the Open Sequence block of the uci file. This block
176
+ contains all operation ids represented in the model which is neccesary for
177
+ formatting tables that have an x-x mapping. There fore this block MUST be
178
+ parsed when first reading a UCI file
179
+
180
+ Parameters
181
+ ----------
182
+ lines : List
183
+ List containg each line in the uci file with blank lines and comments
184
+ removed.
185
+
186
+ Returns
187
+ -------
188
+ pandas DataFrame
189
+ Data frame providing informatio on the operaiton, id number and temporal
190
+ resolution of the model in minutes.
191
+
192
+ '''
193
+ ops = {'PERLND', 'IMPLND', 'RCHRES', 'COPY', 'GENER'}
194
+ lst = []
195
+ for line in table_lines:
196
+ if '***' in line:
197
+ # columns: ['OPERATION', 'SEGMENT', 'INDELT_minutes','comments']
198
+ # Assumed dtypes: (string,'Int64','float64',string]
199
+ # NaN values {'I':-1,'C':'','R':np.nan}
200
+ lst.append(('',-1,np.nan,line)) #
201
+ else:
202
+ tokens = line.split()
203
+ if tokens[0] == 'INGRP' and tokens[1] == 'INDELT':
204
+ s = tokens[2].split(':')
205
+ indelt = int(s[0]) if len(s) == 1 else 60 * int(s[0]) + int(s[1])
206
+ elif tokens[0] in ops:
207
+ #s = f'{tokens[0][0]}{int(tokens[1]):03d}' # Original RESPEC method
208
+ s = int(tokens[1])
209
+ lst.append((tokens[0], s, indelt,''))
210
+
211
+ return pd.DataFrame(lst, columns = ['OPERATION', 'SEGMENT', 'INDELT_minutes','comments'])
212
+
213
+
214
+
215
+ def write(block,table,lines):
216
+ raise NotImplementedError()
217
+
218
+ class ftableParser(Parser):
219
+ def parse(block,table_name,table_lines):
220
+ column_names,dtypes,starts,stops = delimiters('FTABLES','FTABLE')
221
+ table = parse_lines(table_lines,starts,stops,dtypes)
222
+ table = column_dtypes(table,dtypes,column_names)
223
+ return table
224
+
225
+ def write(block,table_name,table):
226
+ # Assumes all tables start with two indented spaces
227
+ # spaces = ' '
228
+ # if table_name == 'na':
229
+ # spaces = ''
230
+ #table[table.columns[0]] = spaces + table[table.columns[0]].astype(str)
231
+ column_names,dtypes,starts,stops = delimiters('FTABLES','FTABLE')
232
+ table_list = table.values.tolist() #This conversion will likely cause a bug
233
+ table_lines = ['']*len(table_list)
234
+ for index,line in enumerate(table_list):
235
+ if line[-1] == '':
236
+ table_lines[index] = format_line(line,starts,stops,dtypes)
237
+ else:
238
+ table_lines[index] = line[-1]
239
+
240
+ return table_lines
241
+
242
+ class monthdataParser(Parser):
243
+ def parse(block,table_name,table_lines):
244
+ column_names,dtypes,starts,stops = delimiters('MONTH-DATA','MONTH-DATA')
245
+ table = parse_lines(table_lines,starts,stops,dtypes)
246
+ table = column_dtypes(table,dtypes,column_names)
247
+ return table
248
+
249
+ def write(block,table_name,table):
250
+ # Assumes all tables start with two indented spaces
251
+ # spaces = ' '
252
+ # if table_name == 'na':
253
+ # spaces = ''
254
+ #table[table.columns[0]] = spaces + table[table.columns[0]].astype(str)
255
+ column_names,dtypes,starts,stops = delimiters('MONTH-DATA','MONTH-DATA')
256
+ table_list = table.values.tolist() #This conversion will likely cause a bug
257
+ table_lines = ['']*len(table_list)
258
+ for index,line in enumerate(table_list):
259
+ if line[-1] == '':
260
+ table_lines[index] = format_line(line,starts,stops,dtypes)
261
+ else:
262
+ table_lines[index] = line[-1]
263
+
264
+ return table_lines
265
+
266
+ class masslinkParser(Parser):
267
+ def parse(block,table_name,table_lines):
268
+ column_names,dtypes,starts,stops = delimiters('MASS-LINK','MASS-LINK')
269
+ table = parse_lines(table_lines,starts,stops,dtypes)
270
+ table = column_dtypes(table,dtypes,column_names)
271
+ return table
272
+
273
+ def write(block,table_name,table):
274
+ # Assumes all tables start with two indented spaces
275
+ # spaces = ' '
276
+ # if table_name == 'na':
277
+ # spaces = ''
278
+ #table[table.columns[0]] = spaces + table[table.columns[0]].astype(str)
279
+ column_names,dtypes,starts,stops = delimiters('MASS-LINK','MASS-LINK')
280
+ table_list = table.values.tolist() #This conversion will likely cause a bug
281
+ table_lines = ['']*len(table_list)
282
+ for index,line in enumerate(table_list):
283
+ if line[-1] == '':
284
+ table_lines[index] = format_line(line,starts,stops,dtypes)
285
+ else:
286
+ table_lines[index] = line[-1]
287
+
288
+ return table_lines
289
+
290
+ class specactionsParser(Parser):
291
+ def parse(block,table,lines):
292
+ raise NotImplementedError()
293
+
294
+ def write(block,table,lines):
295
+ raise NotImplementedError()
296
+
297
+ class externalsourcesParser():
298
+ def parse(block,table,lines):
299
+ raise NotImplementedError()
300
+
301
+ def write(block,table,lines):
302
+ raise NotImplementedError()
303
+
304
+ parserSelector = {'GLOBAL':defaultParser,
305
+ 'FILES':standardParser,
306
+ 'OPN SEQUENCE':opnsequenceParser,
307
+ 'PERLND':operationsParser,
308
+ 'IMPLND':operationsParser,
309
+ 'RCHRES':operationsParser,
310
+ 'COPY':operationsParser,
311
+ 'PLTGEN':defaultParser,
312
+ 'DISPLY':defaultParser,
313
+ 'DURANL':defaultParser,
314
+ 'GENER':operationsParser,
315
+ 'MUTSIN':defaultParser,
316
+ 'BMPRAC':defaultParser,
317
+ 'REPORT':defaultParser,
318
+ 'FTABLES':ftableParser,
319
+ 'EXT SOURCES':standardParser,
320
+ 'NETWORK':standardParser,
321
+ 'SCHEMATIC':standardParser,
322
+ 'MASS-LINK': masslinkParser,
323
+ 'EXT TARGETS':standardParser,
324
+ 'PATHNAMES':defaultParser,
325
+ 'FORMATS':defaultParser,
326
+ 'SHADE':defaultParser,
327
+ 'SPEC-ACTIONS':specactionsParser,
328
+ 'MONTH-DATA':monthdataParser,
329
+ 'CATEGORY':defaultParser}
330
+
331
+ # Parsing functions
332
+ # for parsing individual tables using the ParseTabl csv
333
+ def delimiters(block_name,table_name):
334
+ parse_info = parseTable[(parseTable['block'] == block_name) & (parseTable['table'] == table_name)]
335
+ names = parse_info['column'].astype(str).to_list()
336
+ dtypes = parse_info['dtype'].astype(str).to_list()
337
+ starts = parse_info['start'].astype(int).to_list()
338
+ stops = parse_info['stop'].astype(int).to_list()
339
+
340
+ # Add comments info
341
+
342
+ names.append('comments')
343
+ dtypes.append('C')
344
+ starts.append(stops[-1])
345
+ stops.append(stops[-1])
346
+
347
+ return names, dtypes,starts,stops
348
+
349
+
350
+ def parse_lines2(lines,starts,stops,dtypes):
351
+ comments = []
352
+ table = []
353
+ for index,line in enumerate(lines):
354
+ if '***' in line:
355
+ comments.append(line)
356
+ if index+1 == len(lines): # Cases where the table ends with comments
357
+ comments = '\n'.join(comments)
358
+ table[-1][-1] = '/n/'.join([table[-1][-1],comments])
359
+ # '/n/ to separate comments above a line and comments below a line for cases
360
+ # where the table ends with comments below a single valid line
361
+ else:
362
+ table.append(parse_line(line,starts,stops,dtypes))
363
+
364
+ comments = '\n'.join(comments)
365
+ if comments != '':
366
+ comments = '/n/'.join([comments,''])
367
+
368
+ table[-1][-1] = comments
369
+ comments = []
370
+ return table
371
+
372
+
373
+ def parse_lines(lines,starts,stops,dtypes):
374
+ defaults = {'I':pd.NA,'C':'','R':np.nan}
375
+ nan_row = [defaults[dtype] for dtype in dtypes]
376
+ table = []
377
+ for line in lines:
378
+ if '***' in line:
379
+ row = nan_row.copy()
380
+ row[-1] = line
381
+ table.append(row)
382
+ else:
383
+ row = parse_line(line,starts,stops,dtypes)
384
+ table.append(parse_line(line,starts,stops,dtypes))
385
+ return table
386
+
387
+
388
+ def parse_line(line,starts,stops,dtypes):
389
+ values = []
390
+ for start,stop,dtype in zip(starts,stops,dtypes):
391
+ value = line[start:stop]
392
+ if dtype == 'C':
393
+ value = str(value).strip()
394
+ elif dtype =='I':
395
+ try:
396
+ value = int(value)
397
+ except ValueError:
398
+ value = pd.NA
399
+ else:
400
+ try:
401
+ value = float(value)
402
+ except ValueError:
403
+ value = np.nan
404
+ values.append(value)
405
+ return values
406
+
407
+
408
+ def column_dtypes(table,dtypes,names):
409
+ convert = {'I':'Int64','C':'string','R':'float64'}
410
+ col_dtypes = {}
411
+ for dtype,name in zip(dtypes,names):
412
+ col_dtypes[name] = convert[dtype]
413
+
414
+ table = pd.DataFrame(table,columns = names)
415
+ table = table.astype(dtype=col_dtypes)
416
+ return table
417
+
418
+
419
+
420
+
421
+ # Writing Functions
422
+ def magnitude(x):
423
+ return int(math.log10(x))
424
+
425
+ def num_zeros(decimal):
426
+ return math.inf if decimal == 0 else -math.floor(math.log10(abs(decimal))) - 1
427
+
428
+ def format_number(number,width):
429
+ if number == 0:
430
+ return ' '*(width-1) + '0'
431
+
432
+ if pd.isna(number):
433
+ return ' '*width
434
+
435
+ '''
436
+ Format numbers in the uci file. For both integer and floats.
437
+ Display the minimum number of characters (for visual purposes) with the highest precision
438
+ Code breaks if widths dip below 2 but for floats I don't think hspf ever goes below 5?'
439
+
440
+ '''
441
+ assert(width > 2)
442
+
443
+ sign = ''
444
+ if number < 0:
445
+ width = width-1
446
+ sign = '-'
447
+ number = number*-1
448
+
449
+
450
+ if number < 1:
451
+ chars = width
452
+ zeros = num_zeros(number) + 1
453
+ if chars <= zeros: # can't represent number with given width
454
+ chars = chars - 4 - 2
455
+ if chars < 0:
456
+ chars = 0
457
+ string = f'{number:.{chars}E}'.replace("E-0","E-").split("E")
458
+ string = string[0].strip('0').rstrip('.') + 'E' + string[1]
459
+
460
+ if len(string) > width: # Check once to see if scientific notation fits within width limitations
461
+ string = '1E-9'
462
+ if len(string) > width: # Check if minimum scientific notation width is still too long then use minimum standard notation value
463
+ string = '.' + '0'*(width-2) + '1'
464
+ else:
465
+ chars = width - 1#1 characcter must be allocated for the decimal point
466
+ string = f'{number:.{chars}f}'.strip('0').rstrip('.')
467
+
468
+ else:
469
+ magnitude = int(math.log10(number)) + 1 #number of characters required for integer in standard notation
470
+ if magnitude > width: #If there is integer overflow try using scientific notation
471
+ chars = width - 4 - 1
472
+ if chars < 0:
473
+ chars = 0
474
+ string = f'{number:.{chars}E}'.replace("E+0","E+").split("E")
475
+ string = string[0].strip('0').rstrip('.') + 'E' + string[1]
476
+
477
+ if len(string) > width: # Check once to see if scientific notation fits within width limitations
478
+ string = '9E+9'
479
+ if len(string) > width: # Check if minimum scientific notation width is still to long then use maximum integer value
480
+ string = '9'*width
481
+ else: # subtract 1 from the width for the decimal character
482
+ chars = width - magnitude - 1
483
+ if chars <= 0:
484
+ string = f'{number:.{0}f}'
485
+ else:
486
+ string = f'{number:.{chars}f}'.strip('0').strip('.')
487
+
488
+
489
+ return ' '*(width - len(sign+string))+sign + string #' '*(width-len(string)) + string
490
+
491
+
492
+ def format_line(line,starts,stops,dtypes):
493
+ formatted_line = list(' '*np.max(stops))
494
+ for start,stop,value,dtype in zip(starts,stops,line,dtypes):
495
+ width = stop-start
496
+ if pd.isna(value):
497
+ formatted_line[start:stop] = list(' '*width) # Add the needed spaces
498
+ elif isinstance(value,bool): # Has to come first since False evaluates to true in next if statement
499
+ formatted_line[start:stop] = list(' '*width) # Add the needed spaces
500
+ elif value == 'False':
501
+ formatted_line[start:stop] = list(' '*width)
502
+ elif value is np.nan:
503
+ formatted_line[start:stop] = list(' '*width)
504
+ elif isinstance(value, (int,str)): # Right justify integers?
505
+ if dtype == 'I':
506
+ value = str(value)
507
+ len_value = len(value)
508
+ assert(len_value <= width) # check for integer overflow
509
+ formatted_line[start:stop] = list(' '*(width-len(str(value)))+str(value))
510
+ else: # Left justify strings?
511
+ formatted_line[start:stop] = list(str(value) + ' '*(width-len(str(value))))
512
+ else:
513
+ formatted_line[start:stop] = list(format_number(value,width))
514
+
515
+ return ''.join(formatted_line)
516
+