cwms-cli 0.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cwms_cli-0.1.1.dist-info/METADATA +40 -0
- cwms_cli-0.1.1.dist-info/RECORD +41 -0
- cwms_cli-0.1.1.dist-info/WHEEL +4 -0
- cwms_cli-0.1.1.dist-info/entry_points.txt +3 -0
- cwms_cli-0.1.1.dist-info/licenses/LICENSE +21 -0
- cwmscli/__init__.py +12 -0
- cwmscli/__main__.py +15 -0
- cwmscli/callbacks/__init__.py +18 -0
- cwmscli/commands/blob.py +439 -0
- cwmscli/commands/commands_cwms.py +227 -0
- cwmscli/commands/csv2cwms/.gitignore +3 -0
- cwmscli/commands/csv2cwms/README.md +51 -0
- cwmscli/commands/csv2cwms/__init__.py +5 -0
- cwmscli/commands/csv2cwms/__main__.py +265 -0
- cwmscli/commands/csv2cwms/examples/complete_config.json +19 -0
- cwmscli/commands/csv2cwms/examples/hourly.json +243 -0
- cwmscli/commands/csv2cwms/examples/minutes.json +315 -0
- cwmscli/commands/csv2cwms/tests/__init__.py +0 -0
- cwmscli/commands/csv2cwms/tests/data/.gitignore +1 -0
- cwmscli/commands/csv2cwms/tests/data/expected_brok_output.json +278 -0
- cwmscli/commands/csv2cwms/tests/data/sample_brok.csv +9 -0
- cwmscli/commands/csv2cwms/tests/data/sample_config.json +45 -0
- cwmscli/commands/csv2cwms/tests/skip_test_integration_pipeline.py +35 -0
- cwmscli/commands/csv2cwms/tests/test_dateutils.py +68 -0
- cwmscli/commands/csv2cwms/tests/test_expressions.py +49 -0
- cwmscli/commands/csv2cwms/tests/test_fileio.py +43 -0
- cwmscli/commands/csv2cwms/utils/__init__.py +5 -0
- cwmscli/commands/csv2cwms/utils/dateutils.py +105 -0
- cwmscli/commands/csv2cwms/utils/expression.py +39 -0
- cwmscli/commands/csv2cwms/utils/fileio.py +26 -0
- cwmscli/commands/csv2cwms/utils/logging.py +80 -0
- cwmscli/commands/csv2cwms/utils/terminal.py +45 -0
- cwmscli/commands/shef_critfile_import.py +146 -0
- cwmscli/requirements.py +25 -0
- cwmscli/usgs/__init__.py +161 -0
- cwmscli/usgs/getUSGS_ratings_cda.py +346 -0
- cwmscli/usgs/getusgs_cda.py +345 -0
- cwmscli/usgs/getusgs_measurements_cda.py +961 -0
- cwmscli/usgs/rating_ini_file_import.py +130 -0
- cwmscli/utils/__init__.py +68 -0
- cwmscli/utils/deps.py +102 -0
|
@@ -0,0 +1,278 @@
|
|
|
1
|
+
[
|
|
2
|
+
{
|
|
3
|
+
"name": "BROK.Elev.Inst.15Minutes.0.Rev-SCADA-cda",
|
|
4
|
+
"units": "ft",
|
|
5
|
+
"values": [
|
|
6
|
+
[
|
|
7
|
+
1742952600000,
|
|
8
|
+
599.95,
|
|
9
|
+
3
|
|
10
|
+
],
|
|
11
|
+
[
|
|
12
|
+
1742953500000,
|
|
13
|
+
599.92,
|
|
14
|
+
3
|
|
15
|
+
],
|
|
16
|
+
[
|
|
17
|
+
1742954400000,
|
|
18
|
+
599.91,
|
|
19
|
+
3
|
|
20
|
+
],
|
|
21
|
+
[
|
|
22
|
+
1742955300000,
|
|
23
|
+
599.93,
|
|
24
|
+
3
|
|
25
|
+
],
|
|
26
|
+
[
|
|
27
|
+
1742956200000,
|
|
28
|
+
599.95,
|
|
29
|
+
3
|
|
30
|
+
],
|
|
31
|
+
[
|
|
32
|
+
1742957100000,
|
|
33
|
+
599.92,
|
|
34
|
+
3
|
|
35
|
+
],
|
|
36
|
+
[
|
|
37
|
+
1742958000000,
|
|
38
|
+
599.91,
|
|
39
|
+
3
|
|
40
|
+
],
|
|
41
|
+
[
|
|
42
|
+
1742958900000,
|
|
43
|
+
599.93,
|
|
44
|
+
3
|
|
45
|
+
]
|
|
46
|
+
]
|
|
47
|
+
},
|
|
48
|
+
{
|
|
49
|
+
"name": "BROK.Elev-Tailwater.Inst.15Minutes.0.Rev-SCADA-cda",
|
|
50
|
+
"units": "ft",
|
|
51
|
+
"values": [
|
|
52
|
+
[
|
|
53
|
+
1742952600000,
|
|
54
|
+
405.54,
|
|
55
|
+
3
|
|
56
|
+
],
|
|
57
|
+
[
|
|
58
|
+
1742953500000,
|
|
59
|
+
405.53,
|
|
60
|
+
3
|
|
61
|
+
],
|
|
62
|
+
[
|
|
63
|
+
1742954400000,
|
|
64
|
+
405.57,
|
|
65
|
+
3
|
|
66
|
+
],
|
|
67
|
+
[
|
|
68
|
+
1742955300000,
|
|
69
|
+
405.29,
|
|
70
|
+
3
|
|
71
|
+
],
|
|
72
|
+
[
|
|
73
|
+
1742956200000,
|
|
74
|
+
405.54,
|
|
75
|
+
3
|
|
76
|
+
],
|
|
77
|
+
[
|
|
78
|
+
1742957100000,
|
|
79
|
+
405.53,
|
|
80
|
+
3
|
|
81
|
+
],
|
|
82
|
+
[
|
|
83
|
+
1742958000000,
|
|
84
|
+
405.57,
|
|
85
|
+
3
|
|
86
|
+
],
|
|
87
|
+
[
|
|
88
|
+
1742958900000,
|
|
89
|
+
405.29,
|
|
90
|
+
3
|
|
91
|
+
]
|
|
92
|
+
]
|
|
93
|
+
},
|
|
94
|
+
{
|
|
95
|
+
"name": "BROK.Flow-Power.Inst.15Minutes.0.Rev-SCADA-cda",
|
|
96
|
+
"units": "cfs",
|
|
97
|
+
"values": [
|
|
98
|
+
[
|
|
99
|
+
1742952600000,
|
|
100
|
+
6700.0,
|
|
101
|
+
3
|
|
102
|
+
],
|
|
103
|
+
[
|
|
104
|
+
1742953500000,
|
|
105
|
+
6772.0,
|
|
106
|
+
3
|
|
107
|
+
],
|
|
108
|
+
[
|
|
109
|
+
1742954400000,
|
|
110
|
+
4027.0,
|
|
111
|
+
3
|
|
112
|
+
],
|
|
113
|
+
[
|
|
114
|
+
1742955300000,
|
|
115
|
+
0.0,
|
|
116
|
+
3
|
|
117
|
+
],
|
|
118
|
+
[
|
|
119
|
+
1742956200000,
|
|
120
|
+
6700.0,
|
|
121
|
+
3
|
|
122
|
+
],
|
|
123
|
+
[
|
|
124
|
+
1742957100000,
|
|
125
|
+
6772.0,
|
|
126
|
+
3
|
|
127
|
+
],
|
|
128
|
+
[
|
|
129
|
+
1742958000000,
|
|
130
|
+
4027.0,
|
|
131
|
+
3
|
|
132
|
+
],
|
|
133
|
+
[
|
|
134
|
+
1742958900000,
|
|
135
|
+
0.0,
|
|
136
|
+
3
|
|
137
|
+
]
|
|
138
|
+
]
|
|
139
|
+
},
|
|
140
|
+
{
|
|
141
|
+
"name": "BROK.Power-Gen.Inst.15Minutes.0.Rev-SCADA-cda",
|
|
142
|
+
"units": "MW",
|
|
143
|
+
"values": [
|
|
144
|
+
[
|
|
145
|
+
1742952600000,
|
|
146
|
+
99.93,
|
|
147
|
+
3
|
|
148
|
+
],
|
|
149
|
+
[
|
|
150
|
+
1742953500000,
|
|
151
|
+
100.62,
|
|
152
|
+
3
|
|
153
|
+
],
|
|
154
|
+
[
|
|
155
|
+
1742954400000,
|
|
156
|
+
52.87,
|
|
157
|
+
3
|
|
158
|
+
],
|
|
159
|
+
[
|
|
160
|
+
1742955300000,
|
|
161
|
+
-0.07,
|
|
162
|
+
3
|
|
163
|
+
],
|
|
164
|
+
[
|
|
165
|
+
1742956200000,
|
|
166
|
+
99.93,
|
|
167
|
+
3
|
|
168
|
+
],
|
|
169
|
+
[
|
|
170
|
+
1742957100000,
|
|
171
|
+
100.62,
|
|
172
|
+
3
|
|
173
|
+
],
|
|
174
|
+
[
|
|
175
|
+
1742958000000,
|
|
176
|
+
52.87,
|
|
177
|
+
3
|
|
178
|
+
],
|
|
179
|
+
[
|
|
180
|
+
1742958900000,
|
|
181
|
+
-0.07,
|
|
182
|
+
3
|
|
183
|
+
]
|
|
184
|
+
]
|
|
185
|
+
},
|
|
186
|
+
{
|
|
187
|
+
"name": "BROK.Turbine1.Power-Gen.Inst.15Minutes.0.Rev-SCADA-cda",
|
|
188
|
+
"units": "MW",
|
|
189
|
+
"values": [
|
|
190
|
+
[
|
|
191
|
+
1742952600000,
|
|
192
|
+
50.04,
|
|
193
|
+
3
|
|
194
|
+
],
|
|
195
|
+
[
|
|
196
|
+
1742953500000,
|
|
197
|
+
50.6,
|
|
198
|
+
3
|
|
199
|
+
],
|
|
200
|
+
[
|
|
201
|
+
1742954400000,
|
|
202
|
+
26.89,
|
|
203
|
+
3
|
|
204
|
+
],
|
|
205
|
+
[
|
|
206
|
+
1742955300000,
|
|
207
|
+
-0.01,
|
|
208
|
+
3
|
|
209
|
+
],
|
|
210
|
+
[
|
|
211
|
+
1742956200000,
|
|
212
|
+
50.04,
|
|
213
|
+
3
|
|
214
|
+
],
|
|
215
|
+
[
|
|
216
|
+
1742957100000,
|
|
217
|
+
50.6,
|
|
218
|
+
3
|
|
219
|
+
],
|
|
220
|
+
[
|
|
221
|
+
1742958000000,
|
|
222
|
+
26.89,
|
|
223
|
+
3
|
|
224
|
+
],
|
|
225
|
+
[
|
|
226
|
+
1742958900000,
|
|
227
|
+
-0.01,
|
|
228
|
+
3
|
|
229
|
+
]
|
|
230
|
+
]
|
|
231
|
+
},
|
|
232
|
+
{
|
|
233
|
+
"name": "BROK.Turbine2.Power-Gen.Inst.15Minutes.0.Rev-SCADA-cda",
|
|
234
|
+
"units": "MW",
|
|
235
|
+
"values": [
|
|
236
|
+
[
|
|
237
|
+
1742952600000,
|
|
238
|
+
49.89,
|
|
239
|
+
3
|
|
240
|
+
],
|
|
241
|
+
[
|
|
242
|
+
1742953500000,
|
|
243
|
+
50.02,
|
|
244
|
+
3
|
|
245
|
+
],
|
|
246
|
+
[
|
|
247
|
+
1742954400000,
|
|
248
|
+
25.98,
|
|
249
|
+
3
|
|
250
|
+
],
|
|
251
|
+
[
|
|
252
|
+
1742955300000,
|
|
253
|
+
-0.06,
|
|
254
|
+
3
|
|
255
|
+
],
|
|
256
|
+
[
|
|
257
|
+
1742956200000,
|
|
258
|
+
49.89,
|
|
259
|
+
3
|
|
260
|
+
],
|
|
261
|
+
[
|
|
262
|
+
1742957100000,
|
|
263
|
+
50.02,
|
|
264
|
+
3
|
|
265
|
+
],
|
|
266
|
+
[
|
|
267
|
+
1742958000000,
|
|
268
|
+
25.98,
|
|
269
|
+
3
|
|
270
|
+
],
|
|
271
|
+
[
|
|
272
|
+
1742958900000,
|
|
273
|
+
-0.06,
|
|
274
|
+
3
|
|
275
|
+
]
|
|
276
|
+
]
|
|
277
|
+
}
|
|
278
|
+
]
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
"Time","Headwater","Tailwater","U1_MW","U1_MVAR","U2_MW","U2_Mvar","total_Pwr_discharge","Tot_Tainter_Opening","SG1"
|
|
2
|
+
03/25/2025 20:30:00,599.95,405.54,50.04,4.93,49.89,5.09,6700,0.00,21.00
|
|
3
|
+
03/25/2025 20:45:00,599.92,405.53,50.6,2.57,50.02,3.33,6772,0.00,21.00
|
|
4
|
+
03/25/2025 21:00:00,599.91,405.57,26.89,1.21,25.98,1.22,4027,0.00,21.00
|
|
5
|
+
03/25/2025 21:15:00,599.93,405.29,-0.01,-0.01,-0.06,-0.01,0,0.00,21.00
|
|
6
|
+
03/25/2025 21:30:00,599.95,405.54,50.04,4.93,49.89,5.09,6700,0.00,21.00
|
|
7
|
+
03/25/2025 21:45:00,599.92,405.53,50.6,2.57,50.02,3.33,6772,0.00,21.00
|
|
8
|
+
03/25/2025 22:00:00,599.91,405.57,26.89,1.21,25.98,1.22,4027,0.00,21.00
|
|
9
|
+
03/25/2025 22:15:00,599.93,405.29,-0.01,-0.01,-0.06,-0.01,0,0.00,21.00
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
{
|
|
2
|
+
"interval": null,
|
|
3
|
+
"input_files": {
|
|
4
|
+
"BROK": {
|
|
5
|
+
"data_path": "cwmscli/commands/csv2cwms/tests/data/sample_brok.csv",
|
|
6
|
+
"store_rule": "REPLACE_ALL",
|
|
7
|
+
"date_format": [
|
|
8
|
+
"%m/%d/%Y %H:%M:%S",
|
|
9
|
+
"%m/%d/%Y %H:%M"
|
|
10
|
+
],
|
|
11
|
+
"timeseries": {
|
|
12
|
+
"BROK.Elev.Inst.15Minutes.0.Rev-SCADA-cda": {
|
|
13
|
+
"columns": "Headwater",
|
|
14
|
+
"units": "ft",
|
|
15
|
+
"precision": 2
|
|
16
|
+
},
|
|
17
|
+
"BROK.Elev-Tailwater.Inst.15Minutes.0.Rev-SCADA-cda": {
|
|
18
|
+
"columns": "Tailwater",
|
|
19
|
+
"units": "ft",
|
|
20
|
+
"precision": 2
|
|
21
|
+
},
|
|
22
|
+
"BROK.Flow-Power.Inst.15Minutes.0.Rev-SCADA-cda": {
|
|
23
|
+
"columns": "total_Pwr_discharge",
|
|
24
|
+
"units": "cfs",
|
|
25
|
+
"precision": 0
|
|
26
|
+
},
|
|
27
|
+
"BROK.Power-Gen.Inst.15Minutes.0.Rev-SCADA-cda": {
|
|
28
|
+
"columns": "U1_MW+U2_MW",
|
|
29
|
+
"units": "MW",
|
|
30
|
+
"precision": 2
|
|
31
|
+
},
|
|
32
|
+
"BROK-Turbine1.Power-Gen.Inst.15Minutes.0.Rev-SCADA-cda": {
|
|
33
|
+
"columns": "U1_MW",
|
|
34
|
+
"units": "MW",
|
|
35
|
+
"precision": 2
|
|
36
|
+
},
|
|
37
|
+
"BROK-Turbine2.Power-Gen.Inst.15Minutes.0.Rev-SCADA-cda": {
|
|
38
|
+
"columns": "U2_MW",
|
|
39
|
+
"units": "MW",
|
|
40
|
+
"precision": 2
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
}
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import os
|
|
3
|
+
|
|
4
|
+
from scada_ts import __main__ as main
|
|
5
|
+
from utils.dateutils import parse_date
|
|
6
|
+
from utils.fileio import load_csv, read_config
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def test_brok_pipeline_matches_expected():
|
|
10
|
+
# Setup paths to our test files
|
|
11
|
+
base = os.path.join(os.path.dirname(__file__), "data")
|
|
12
|
+
csv_path = os.path.join(base, "sample_brok.csv")
|
|
13
|
+
config_path = os.path.join(base, "sample_config.json")
|
|
14
|
+
expected_path = os.path.join(base, "expected_brok_output.json")
|
|
15
|
+
|
|
16
|
+
# Load all of our test files
|
|
17
|
+
config = read_config(config_path)
|
|
18
|
+
raw_csv = load_csv(csv_path)
|
|
19
|
+
header = raw_csv[0]
|
|
20
|
+
rows = raw_csv[1:]
|
|
21
|
+
|
|
22
|
+
# Load the data into a dictionary with timestamps as keys
|
|
23
|
+
parsed_data = {}
|
|
24
|
+
for row in rows:
|
|
25
|
+
dt = parse_date(row[0])
|
|
26
|
+
parsed_data[int(dt.timestamp())] = row
|
|
27
|
+
|
|
28
|
+
file_data = {"header": header, "data": parsed_data}
|
|
29
|
+
|
|
30
|
+
actual = main.load_timeseries(file_data, "BROK", config)
|
|
31
|
+
|
|
32
|
+
with open(expected_path) as f:
|
|
33
|
+
expected = json.load(f)
|
|
34
|
+
|
|
35
|
+
assert actual == expected
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
from datetime import datetime, timedelta
|
|
2
|
+
|
|
3
|
+
import pytest
|
|
4
|
+
|
|
5
|
+
from ..utils.dateutils import determine_interval, parse_date, safe_zoneinfo
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def test_parse_date_valid_formats():
|
|
9
|
+
tz = safe_zoneinfo("UTC")
|
|
10
|
+
expected = datetime(2025, 3, 25, 14, 30, tzinfo=tz)
|
|
11
|
+
assert parse_date("03/25/2025 14:30:00") == expected
|
|
12
|
+
assert parse_date("03/25/2025 14:30") == expected
|
|
13
|
+
assert parse_date("03/25/2025 14") == datetime(2025, 3, 25, 14, tzinfo=tz)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def test_parse_date_invalid_format():
|
|
17
|
+
with pytest.raises(ValueError):
|
|
18
|
+
parse_date("25-03-2025")
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def test_determine_interval_regular_spacing():
|
|
22
|
+
now = datetime(2025, 3, 25, 14, 30, tzinfo=safe_zoneinfo("UTC"))
|
|
23
|
+
interval = 900 # 15 minutes
|
|
24
|
+
csv_data = {
|
|
25
|
+
int((now + timedelta(seconds=i * interval)).timestamp()): [] for i in range(5)
|
|
26
|
+
}
|
|
27
|
+
assert determine_interval(csv_data) == 900
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def test_determine_interval_mixed_spacing():
|
|
31
|
+
now = datetime(2025, 3, 25, 14, 30, tzinfo=safe_zoneinfo("UTC"))
|
|
32
|
+
timestamps = [
|
|
33
|
+
now,
|
|
34
|
+
now + timedelta(minutes=15),
|
|
35
|
+
now + timedelta(minutes=30),
|
|
36
|
+
now + timedelta(minutes=60), # outlier
|
|
37
|
+
now + timedelta(minutes=45),
|
|
38
|
+
]
|
|
39
|
+
csv_data = {int(dt.timestamp()): [] for dt in timestamps}
|
|
40
|
+
assert determine_interval(csv_data) == 900
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def test_determine_interval_duplicate_timestamps():
|
|
44
|
+
now = datetime(2025, 3, 25, 14, 30, tzinfo=safe_zoneinfo("UTC"))
|
|
45
|
+
timestamps = [
|
|
46
|
+
now,
|
|
47
|
+
now + timedelta(minutes=15),
|
|
48
|
+
now + timedelta(minutes=15), # duplicate
|
|
49
|
+
now + timedelta(minutes=30),
|
|
50
|
+
]
|
|
51
|
+
csv_data = {int(dt.timestamp()): [] for dt in timestamps}
|
|
52
|
+
assert determine_interval(csv_data) == 900
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def test_determine_interval_missing_values():
|
|
56
|
+
now = datetime(2025, 3, 25, 14, 30, tzinfo=safe_zoneinfo("UTC"))
|
|
57
|
+
csv_data = {
|
|
58
|
+
int((now + timedelta(minutes=15 * i)).timestamp()): []
|
|
59
|
+
for i in [0, 1, 3, 4] # skip index 2 (30-minute gap)
|
|
60
|
+
}
|
|
61
|
+
assert determine_interval(csv_data) == 900
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def test_determine_interval_insufficient_data():
|
|
65
|
+
now = datetime(2025, 3, 25, 14, 30, tzinfo=safe_zoneinfo("UTC"))
|
|
66
|
+
csv_data = {int(now.timestamp()): []} # only one row
|
|
67
|
+
with pytest.raises(ValueError):
|
|
68
|
+
determine_interval(csv_data)
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
import pytest
|
|
2
|
+
|
|
3
|
+
from ..utils.expression import eval_expression
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
@pytest.mark.parametrize(
|
|
7
|
+
"expr,row,header_map,expected",
|
|
8
|
+
[
|
|
9
|
+
(
|
|
10
|
+
"U1_MW+U2_MW",
|
|
11
|
+
["599.95", "405.54", "50.04", "4.93", "49.89"],
|
|
12
|
+
{"u1_mw": 2, "u2_mw": 4},
|
|
13
|
+
99.93,
|
|
14
|
+
),
|
|
15
|
+
("U1_MW-U2_MW", ["", "", "60.0", "", "15.5"], {"u1_mw": 2, "u2_mw": 4}, 44.5),
|
|
16
|
+
("U1_MW*U2_MW", ["", "", "5.0", "", "2.0"], {"u1_mw": 2, "u2_mw": 4}, 10.0),
|
|
17
|
+
("MISSING+U1_MW", ["", "", "50.0"], {"u1_mw": 2}, None),
|
|
18
|
+
],
|
|
19
|
+
)
|
|
20
|
+
def test_eval_expression(expr, row, header_map, expected):
|
|
21
|
+
result = eval_expression(expr, row, header_map)
|
|
22
|
+
if expected is None:
|
|
23
|
+
assert result is None
|
|
24
|
+
else:
|
|
25
|
+
assert round(result, 2) == pytest.approx(expected, abs=1e-2)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def test_eval_expression_invalid_token():
|
|
29
|
+
row = ["", "", "5.0", "", "2.0"]
|
|
30
|
+
header_map = {"u1_mw": 2, "u2_mw": 4}
|
|
31
|
+
assert eval_expression("U1_MW+@U2_MW", row, header_map) is None
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def test_eval_expression_division_by_zero():
|
|
35
|
+
row = ["", "", "5.0", "", "0"]
|
|
36
|
+
header_map = {"u1_mw": 2, "u2_mw": 4}
|
|
37
|
+
assert eval_expression("U1_MW/U2_MW", row, header_map) is None
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def test_eval_expression_missing_header_map():
|
|
41
|
+
row = ["", "", "5.0", "", "2.0"]
|
|
42
|
+
header_map = {} # empty
|
|
43
|
+
assert eval_expression("U1_MW+U2_MW", row, header_map) is None
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def test_eval_expression_non_numeric():
|
|
47
|
+
row = ["", "", "fifty", "", "two"]
|
|
48
|
+
header_map = {"u1_mw": 2, "u2_mw": 4}
|
|
49
|
+
assert eval_expression("U1_MW+U2_MW", row, header_map) is None
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
import os
|
|
2
|
+
|
|
3
|
+
import pytest
|
|
4
|
+
|
|
5
|
+
from ..utils.fileio import load_csv, read_config
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def test_load_csv_valid():
|
|
9
|
+
path = os.path.join(os.path.dirname(__file__), "data", "sample_brok.csv")
|
|
10
|
+
result = load_csv(path)
|
|
11
|
+
assert isinstance(result, list)
|
|
12
|
+
assert isinstance(result[0], list)
|
|
13
|
+
assert len(result[0]) > 1 # header row should have multiple columns
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def test_load_csv_nonexistent():
|
|
17
|
+
path = os.path.join(os.path.dirname(__file__), "data", "does_not_exist.csv")
|
|
18
|
+
with pytest.raises(FileNotFoundError):
|
|
19
|
+
load_csv(path)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def test_load_csv_malformed_row(tmp_path):
|
|
23
|
+
malformed = tmp_path / "bad.csv"
|
|
24
|
+
malformed.write_text("Time,Value\n2025-01-01 00:00\n2025-01-01 00:15,42,Extra")
|
|
25
|
+
result = load_csv(str(malformed))
|
|
26
|
+
assert len(result) == 3
|
|
27
|
+
assert result[1] == ["2025-01-01 00:00"]
|
|
28
|
+
assert result[2] == ["2025-01-01 00:15", "42", "Extra"]
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def test_read_config_valid():
|
|
32
|
+
path = os.path.join(os.path.dirname(__file__), "data", "sample_config.json")
|
|
33
|
+
config = read_config(path)
|
|
34
|
+
assert isinstance(config, dict)
|
|
35
|
+
assert "input_files" in config
|
|
36
|
+
assert "BROK" in config["input_files"]
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def test_read_config_invalid_json(tmp_path):
|
|
40
|
+
bad_json = tmp_path / "bad.json"
|
|
41
|
+
bad_json.write_text("{invalid_json: true,}")
|
|
42
|
+
with pytest.raises(Exception):
|
|
43
|
+
read_config(str(bad_json))
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from collections import Counter
|
|
3
|
+
from datetime import datetime, timezone
|
|
4
|
+
from typing import List
|
|
5
|
+
|
|
6
|
+
try:
|
|
7
|
+
from zoneinfo import ZoneInfo, ZoneInfoNotFoundError
|
|
8
|
+
except ImportError:
|
|
9
|
+
# Python < 3.9 does not support zoneinfo
|
|
10
|
+
ZoneInfo = None
|
|
11
|
+
ZoneInfoNotFoundError = Exception
|
|
12
|
+
|
|
13
|
+
logger = logging.getLogger(__name__)
|
|
14
|
+
|
|
15
|
+
DATE_STRINGS = [
|
|
16
|
+
"%m/%d/%Y %H:%M:%S",
|
|
17
|
+
"%m/%d/%Y %H:%M",
|
|
18
|
+
"%m/%d/%Y %H",
|
|
19
|
+
"%Y-%m-%dT%H:%M:%S",
|
|
20
|
+
"%Y-%m-%dT%H:%M",
|
|
21
|
+
"%Y-%m-%dT%H",
|
|
22
|
+
"%Y-%m-%d %H:%M:%S",
|
|
23
|
+
"%Y-%m-%d %H:%M",
|
|
24
|
+
"%Y-%m-%d %H",
|
|
25
|
+
]
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def safe_zoneinfo(key: str):
|
|
29
|
+
"""
|
|
30
|
+
Attempts to return ZoneInfo(key); falls back to UTC if unavailable.
|
|
31
|
+
"""
|
|
32
|
+
if ZoneInfo is None:
|
|
33
|
+
return timezone.utc # fallback for very old Python
|
|
34
|
+
|
|
35
|
+
try:
|
|
36
|
+
return ZoneInfo(key)
|
|
37
|
+
except ZoneInfoNotFoundError:
|
|
38
|
+
return timezone.utc
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def parse_date(date, tz_str="UTC", date_format: str = "") -> datetime:
|
|
42
|
+
"""Handle all date types seen in hydropower files
|
|
43
|
+
NOTE: TimeZone naive - assumes all timestamps are in the same timezone
|
|
44
|
+
Args:
|
|
45
|
+
date (str): Date string to parse
|
|
46
|
+
"""
|
|
47
|
+
if isinstance(date, int):
|
|
48
|
+
return datetime.fromtimestamp(date, tz=safe_zoneinfo(tz_str))
|
|
49
|
+
|
|
50
|
+
if isinstance(date_format, str):
|
|
51
|
+
# Handle comma-separated list of formats
|
|
52
|
+
if date_format.find(",") >= 0:
|
|
53
|
+
date_format = [fmt.strip() for fmt in date_format.split(",") if fmt.strip()]
|
|
54
|
+
date_format = [date_format]
|
|
55
|
+
|
|
56
|
+
# Include the user-specified date format first, if provided
|
|
57
|
+
for idx, fmt in enumerate(date_format + DATE_STRINGS):
|
|
58
|
+
try:
|
|
59
|
+
if not fmt:
|
|
60
|
+
continue
|
|
61
|
+
dt_naive = datetime.strptime(date, fmt)
|
|
62
|
+
if idx > 0:
|
|
63
|
+
# Only log if using a fallback format
|
|
64
|
+
if not date_format:
|
|
65
|
+
logger.warning(
|
|
66
|
+
f"Using fallback date format '{fmt}' for date '{date}'. No user-specified format was provided."
|
|
67
|
+
)
|
|
68
|
+
else:
|
|
69
|
+
logger.warning(
|
|
70
|
+
f"Using fallback date format '{fmt}' for date '{date}'. The user-specified format is '{date_format}'."
|
|
71
|
+
)
|
|
72
|
+
return dt_naive.replace(tzinfo=safe_zoneinfo(tz_str))
|
|
73
|
+
except ValueError:
|
|
74
|
+
continue
|
|
75
|
+
raise ValueError(f"Invalid date format: {date}")
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def determine_interval(csv_data: List[list], sample_size=10) -> int:
|
|
79
|
+
"""
|
|
80
|
+
Determine the most common interval (in seconds) between timestamps in the first few rows of CSV data.
|
|
81
|
+
Args:
|
|
82
|
+
`csv_data` is the raw list-of-lists from your CSV (NOT including header).
|
|
83
|
+
`sample_size` is the number of rows to sample from the CSV data.
|
|
84
|
+
Returns:
|
|
85
|
+
[int] The most common interval between timestamps in seconds
|
|
86
|
+
|
|
87
|
+
"""
|
|
88
|
+
|
|
89
|
+
timestamps = []
|
|
90
|
+
dates = list(csv_data.keys())
|
|
91
|
+
sample_idx = min(sample_size, len(dates) - 1)
|
|
92
|
+
if sample_idx < 0:
|
|
93
|
+
raise ValueError("No data found in CSV file for the given lookback period.")
|
|
94
|
+
for row in dates[0:sample_idx]:
|
|
95
|
+
try:
|
|
96
|
+
timestamps.append(parse_date(row))
|
|
97
|
+
except Exception as err:
|
|
98
|
+
continue
|
|
99
|
+
|
|
100
|
+
if len(timestamps) < 2:
|
|
101
|
+
raise ValueError("Not enough valid timestamps to determine interval.")
|
|
102
|
+
|
|
103
|
+
diffs = [int((b - a).total_seconds()) for a, b in zip(timestamps, timestamps[1:])]
|
|
104
|
+
most_common = Counter(diffs).most_common(1)[0][0]
|
|
105
|
+
return most_common
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
import re
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def eval_expression(expr, row, header_map):
|
|
5
|
+
"""
|
|
6
|
+
Evaluate simple math expressions (+, -, *) using values from the row based on column names in the expression.
|
|
7
|
+
"""
|
|
8
|
+
tokens = re.findall(
|
|
9
|
+
r'"[^"]+"|\'[^\']+\'|\+|\-|\*|[^\+\-\*]+', expr.replace(" ", "")
|
|
10
|
+
)
|
|
11
|
+
result = None
|
|
12
|
+
for i, token in enumerate(tokens):
|
|
13
|
+
if token in {"+", "-", "*"}:
|
|
14
|
+
continue
|
|
15
|
+
|
|
16
|
+
col_name = token.strip('"').strip("'").lower()
|
|
17
|
+
idx = header_map.get(col_name)
|
|
18
|
+
if idx is None or idx >= len(row):
|
|
19
|
+
# Immediately return to prevent adding None (0) to result
|
|
20
|
+
return None
|
|
21
|
+
else:
|
|
22
|
+
try:
|
|
23
|
+
val = float(row[idx])
|
|
24
|
+
except ValueError:
|
|
25
|
+
val = None
|
|
26
|
+
|
|
27
|
+
if result is None:
|
|
28
|
+
result = val
|
|
29
|
+
else:
|
|
30
|
+
op = tokens[i - 1]
|
|
31
|
+
if val is None or result is None:
|
|
32
|
+
result = None
|
|
33
|
+
elif op == "+":
|
|
34
|
+
result += val
|
|
35
|
+
elif op == "-":
|
|
36
|
+
result -= val
|
|
37
|
+
elif op == "*":
|
|
38
|
+
result *= val
|
|
39
|
+
return result
|