cwms-cli 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. cwms_cli-0.1.1.dist-info/METADATA +40 -0
  2. cwms_cli-0.1.1.dist-info/RECORD +41 -0
  3. cwms_cli-0.1.1.dist-info/WHEEL +4 -0
  4. cwms_cli-0.1.1.dist-info/entry_points.txt +3 -0
  5. cwms_cli-0.1.1.dist-info/licenses/LICENSE +21 -0
  6. cwmscli/__init__.py +12 -0
  7. cwmscli/__main__.py +15 -0
  8. cwmscli/callbacks/__init__.py +18 -0
  9. cwmscli/commands/blob.py +439 -0
  10. cwmscli/commands/commands_cwms.py +227 -0
  11. cwmscli/commands/csv2cwms/.gitignore +3 -0
  12. cwmscli/commands/csv2cwms/README.md +51 -0
  13. cwmscli/commands/csv2cwms/__init__.py +5 -0
  14. cwmscli/commands/csv2cwms/__main__.py +265 -0
  15. cwmscli/commands/csv2cwms/examples/complete_config.json +19 -0
  16. cwmscli/commands/csv2cwms/examples/hourly.json +243 -0
  17. cwmscli/commands/csv2cwms/examples/minutes.json +315 -0
  18. cwmscli/commands/csv2cwms/tests/__init__.py +0 -0
  19. cwmscli/commands/csv2cwms/tests/data/.gitignore +1 -0
  20. cwmscli/commands/csv2cwms/tests/data/expected_brok_output.json +278 -0
  21. cwmscli/commands/csv2cwms/tests/data/sample_brok.csv +9 -0
  22. cwmscli/commands/csv2cwms/tests/data/sample_config.json +45 -0
  23. cwmscli/commands/csv2cwms/tests/skip_test_integration_pipeline.py +35 -0
  24. cwmscli/commands/csv2cwms/tests/test_dateutils.py +68 -0
  25. cwmscli/commands/csv2cwms/tests/test_expressions.py +49 -0
  26. cwmscli/commands/csv2cwms/tests/test_fileio.py +43 -0
  27. cwmscli/commands/csv2cwms/utils/__init__.py +5 -0
  28. cwmscli/commands/csv2cwms/utils/dateutils.py +105 -0
  29. cwmscli/commands/csv2cwms/utils/expression.py +39 -0
  30. cwmscli/commands/csv2cwms/utils/fileio.py +26 -0
  31. cwmscli/commands/csv2cwms/utils/logging.py +80 -0
  32. cwmscli/commands/csv2cwms/utils/terminal.py +45 -0
  33. cwmscli/commands/shef_critfile_import.py +146 -0
  34. cwmscli/requirements.py +25 -0
  35. cwmscli/usgs/__init__.py +161 -0
  36. cwmscli/usgs/getUSGS_ratings_cda.py +346 -0
  37. cwmscli/usgs/getusgs_cda.py +345 -0
  38. cwmscli/usgs/getusgs_measurements_cda.py +961 -0
  39. cwmscli/usgs/rating_ini_file_import.py +130 -0
  40. cwmscli/utils/__init__.py +68 -0
  41. cwmscli/utils/deps.py +102 -0
@@ -0,0 +1,278 @@
1
+ [
2
+ {
3
+ "name": "BROK.Elev.Inst.15Minutes.0.Rev-SCADA-cda",
4
+ "units": "ft",
5
+ "values": [
6
+ [
7
+ 1742952600000,
8
+ 599.95,
9
+ 3
10
+ ],
11
+ [
12
+ 1742953500000,
13
+ 599.92,
14
+ 3
15
+ ],
16
+ [
17
+ 1742954400000,
18
+ 599.91,
19
+ 3
20
+ ],
21
+ [
22
+ 1742955300000,
23
+ 599.93,
24
+ 3
25
+ ],
26
+ [
27
+ 1742956200000,
28
+ 599.95,
29
+ 3
30
+ ],
31
+ [
32
+ 1742957100000,
33
+ 599.92,
34
+ 3
35
+ ],
36
+ [
37
+ 1742958000000,
38
+ 599.91,
39
+ 3
40
+ ],
41
+ [
42
+ 1742958900000,
43
+ 599.93,
44
+ 3
45
+ ]
46
+ ]
47
+ },
48
+ {
49
+ "name": "BROK.Elev-Tailwater.Inst.15Minutes.0.Rev-SCADA-cda",
50
+ "units": "ft",
51
+ "values": [
52
+ [
53
+ 1742952600000,
54
+ 405.54,
55
+ 3
56
+ ],
57
+ [
58
+ 1742953500000,
59
+ 405.53,
60
+ 3
61
+ ],
62
+ [
63
+ 1742954400000,
64
+ 405.57,
65
+ 3
66
+ ],
67
+ [
68
+ 1742955300000,
69
+ 405.29,
70
+ 3
71
+ ],
72
+ [
73
+ 1742956200000,
74
+ 405.54,
75
+ 3
76
+ ],
77
+ [
78
+ 1742957100000,
79
+ 405.53,
80
+ 3
81
+ ],
82
+ [
83
+ 1742958000000,
84
+ 405.57,
85
+ 3
86
+ ],
87
+ [
88
+ 1742958900000,
89
+ 405.29,
90
+ 3
91
+ ]
92
+ ]
93
+ },
94
+ {
95
+ "name": "BROK.Flow-Power.Inst.15Minutes.0.Rev-SCADA-cda",
96
+ "units": "cfs",
97
+ "values": [
98
+ [
99
+ 1742952600000,
100
+ 6700.0,
101
+ 3
102
+ ],
103
+ [
104
+ 1742953500000,
105
+ 6772.0,
106
+ 3
107
+ ],
108
+ [
109
+ 1742954400000,
110
+ 4027.0,
111
+ 3
112
+ ],
113
+ [
114
+ 1742955300000,
115
+ 0.0,
116
+ 3
117
+ ],
118
+ [
119
+ 1742956200000,
120
+ 6700.0,
121
+ 3
122
+ ],
123
+ [
124
+ 1742957100000,
125
+ 6772.0,
126
+ 3
127
+ ],
128
+ [
129
+ 1742958000000,
130
+ 4027.0,
131
+ 3
132
+ ],
133
+ [
134
+ 1742958900000,
135
+ 0.0,
136
+ 3
137
+ ]
138
+ ]
139
+ },
140
+ {
141
+ "name": "BROK.Power-Gen.Inst.15Minutes.0.Rev-SCADA-cda",
142
+ "units": "MW",
143
+ "values": [
144
+ [
145
+ 1742952600000,
146
+ 99.93,
147
+ 3
148
+ ],
149
+ [
150
+ 1742953500000,
151
+ 100.62,
152
+ 3
153
+ ],
154
+ [
155
+ 1742954400000,
156
+ 52.87,
157
+ 3
158
+ ],
159
+ [
160
+ 1742955300000,
161
+ -0.07,
162
+ 3
163
+ ],
164
+ [
165
+ 1742956200000,
166
+ 99.93,
167
+ 3
168
+ ],
169
+ [
170
+ 1742957100000,
171
+ 100.62,
172
+ 3
173
+ ],
174
+ [
175
+ 1742958000000,
176
+ 52.87,
177
+ 3
178
+ ],
179
+ [
180
+ 1742958900000,
181
+ -0.07,
182
+ 3
183
+ ]
184
+ ]
185
+ },
186
+ {
187
+ "name": "BROK.Turbine1.Power-Gen.Inst.15Minutes.0.Rev-SCADA-cda",
188
+ "units": "MW",
189
+ "values": [
190
+ [
191
+ 1742952600000,
192
+ 50.04,
193
+ 3
194
+ ],
195
+ [
196
+ 1742953500000,
197
+ 50.6,
198
+ 3
199
+ ],
200
+ [
201
+ 1742954400000,
202
+ 26.89,
203
+ 3
204
+ ],
205
+ [
206
+ 1742955300000,
207
+ -0.01,
208
+ 3
209
+ ],
210
+ [
211
+ 1742956200000,
212
+ 50.04,
213
+ 3
214
+ ],
215
+ [
216
+ 1742957100000,
217
+ 50.6,
218
+ 3
219
+ ],
220
+ [
221
+ 1742958000000,
222
+ 26.89,
223
+ 3
224
+ ],
225
+ [
226
+ 1742958900000,
227
+ -0.01,
228
+ 3
229
+ ]
230
+ ]
231
+ },
232
+ {
233
+ "name": "BROK.Turbine2.Power-Gen.Inst.15Minutes.0.Rev-SCADA-cda",
234
+ "units": "MW",
235
+ "values": [
236
+ [
237
+ 1742952600000,
238
+ 49.89,
239
+ 3
240
+ ],
241
+ [
242
+ 1742953500000,
243
+ 50.02,
244
+ 3
245
+ ],
246
+ [
247
+ 1742954400000,
248
+ 25.98,
249
+ 3
250
+ ],
251
+ [
252
+ 1742955300000,
253
+ -0.06,
254
+ 3
255
+ ],
256
+ [
257
+ 1742956200000,
258
+ 49.89,
259
+ 3
260
+ ],
261
+ [
262
+ 1742957100000,
263
+ 50.02,
264
+ 3
265
+ ],
266
+ [
267
+ 1742958000000,
268
+ 25.98,
269
+ 3
270
+ ],
271
+ [
272
+ 1742958900000,
273
+ -0.06,
274
+ 3
275
+ ]
276
+ ]
277
+ }
278
+ ]
@@ -0,0 +1,9 @@
1
+ "Time","Headwater","Tailwater","U1_MW","U1_MVAR","U2_MW","U2_Mvar","total_Pwr_discharge","Tot_Tainter_Opening","SG1"
2
+ 03/25/2025 20:30:00,599.95,405.54,50.04,4.93,49.89,5.09,6700,0.00,21.00
3
+ 03/25/2025 20:45:00,599.92,405.53,50.6,2.57,50.02,3.33,6772,0.00,21.00
4
+ 03/25/2025 21:00:00,599.91,405.57,26.89,1.21,25.98,1.22,4027,0.00,21.00
5
+ 03/25/2025 21:15:00,599.93,405.29,-0.01,-0.01,-0.06,-0.01,0,0.00,21.00
6
+ 03/25/2025 21:30:00,599.95,405.54,50.04,4.93,49.89,5.09,6700,0.00,21.00
7
+ 03/25/2025 21:45:00,599.92,405.53,50.6,2.57,50.02,3.33,6772,0.00,21.00
8
+ 03/25/2025 22:00:00,599.91,405.57,26.89,1.21,25.98,1.22,4027,0.00,21.00
9
+ 03/25/2025 22:15:00,599.93,405.29,-0.01,-0.01,-0.06,-0.01,0,0.00,21.00
@@ -0,0 +1,45 @@
1
+ {
2
+ "interval": null,
3
+ "input_files": {
4
+ "BROK": {
5
+ "data_path": "cwmscli/commands/csv2cwms/tests/data/sample_brok.csv",
6
+ "store_rule": "REPLACE_ALL",
7
+ "date_format": [
8
+ "%m/%d/%Y %H:%M:%S",
9
+ "%m/%d/%Y %H:%M"
10
+ ],
11
+ "timeseries": {
12
+ "BROK.Elev.Inst.15Minutes.0.Rev-SCADA-cda": {
13
+ "columns": "Headwater",
14
+ "units": "ft",
15
+ "precision": 2
16
+ },
17
+ "BROK.Elev-Tailwater.Inst.15Minutes.0.Rev-SCADA-cda": {
18
+ "columns": "Tailwater",
19
+ "units": "ft",
20
+ "precision": 2
21
+ },
22
+ "BROK.Flow-Power.Inst.15Minutes.0.Rev-SCADA-cda": {
23
+ "columns": "total_Pwr_discharge",
24
+ "units": "cfs",
25
+ "precision": 0
26
+ },
27
+ "BROK.Power-Gen.Inst.15Minutes.0.Rev-SCADA-cda": {
28
+ "columns": "U1_MW+U2_MW",
29
+ "units": "MW",
30
+ "precision": 2
31
+ },
32
+ "BROK-Turbine1.Power-Gen.Inst.15Minutes.0.Rev-SCADA-cda": {
33
+ "columns": "U1_MW",
34
+ "units": "MW",
35
+ "precision": 2
36
+ },
37
+ "BROK-Turbine2.Power-Gen.Inst.15Minutes.0.Rev-SCADA-cda": {
38
+ "columns": "U2_MW",
39
+ "units": "MW",
40
+ "precision": 2
41
+ }
42
+ }
43
+ }
44
+ }
45
+ }
@@ -0,0 +1,35 @@
1
+ import json
2
+ import os
3
+
4
+ from scada_ts import __main__ as main
5
+ from utils.dateutils import parse_date
6
+ from utils.fileio import load_csv, read_config
7
+
8
+
9
+ def test_brok_pipeline_matches_expected():
10
+ # Setup paths to our test files
11
+ base = os.path.join(os.path.dirname(__file__), "data")
12
+ csv_path = os.path.join(base, "sample_brok.csv")
13
+ config_path = os.path.join(base, "sample_config.json")
14
+ expected_path = os.path.join(base, "expected_brok_output.json")
15
+
16
+ # Load all of our test files
17
+ config = read_config(config_path)
18
+ raw_csv = load_csv(csv_path)
19
+ header = raw_csv[0]
20
+ rows = raw_csv[1:]
21
+
22
+ # Load the data into a dictionary with timestamps as keys
23
+ parsed_data = {}
24
+ for row in rows:
25
+ dt = parse_date(row[0])
26
+ parsed_data[int(dt.timestamp())] = row
27
+
28
+ file_data = {"header": header, "data": parsed_data}
29
+
30
+ actual = main.load_timeseries(file_data, "BROK", config)
31
+
32
+ with open(expected_path) as f:
33
+ expected = json.load(f)
34
+
35
+ assert actual == expected
@@ -0,0 +1,68 @@
1
+ from datetime import datetime, timedelta
2
+
3
+ import pytest
4
+
5
+ from ..utils.dateutils import determine_interval, parse_date, safe_zoneinfo
6
+
7
+
8
+ def test_parse_date_valid_formats():
9
+ tz = safe_zoneinfo("UTC")
10
+ expected = datetime(2025, 3, 25, 14, 30, tzinfo=tz)
11
+ assert parse_date("03/25/2025 14:30:00") == expected
12
+ assert parse_date("03/25/2025 14:30") == expected
13
+ assert parse_date("03/25/2025 14") == datetime(2025, 3, 25, 14, tzinfo=tz)
14
+
15
+
16
+ def test_parse_date_invalid_format():
17
+ with pytest.raises(ValueError):
18
+ parse_date("25-03-2025")
19
+
20
+
21
+ def test_determine_interval_regular_spacing():
22
+ now = datetime(2025, 3, 25, 14, 30, tzinfo=safe_zoneinfo("UTC"))
23
+ interval = 900 # 15 minutes
24
+ csv_data = {
25
+ int((now + timedelta(seconds=i * interval)).timestamp()): [] for i in range(5)
26
+ }
27
+ assert determine_interval(csv_data) == 900
28
+
29
+
30
+ def test_determine_interval_mixed_spacing():
31
+ now = datetime(2025, 3, 25, 14, 30, tzinfo=safe_zoneinfo("UTC"))
32
+ timestamps = [
33
+ now,
34
+ now + timedelta(minutes=15),
35
+ now + timedelta(minutes=30),
36
+ now + timedelta(minutes=60), # outlier
37
+ now + timedelta(minutes=45),
38
+ ]
39
+ csv_data = {int(dt.timestamp()): [] for dt in timestamps}
40
+ assert determine_interval(csv_data) == 900
41
+
42
+
43
+ def test_determine_interval_duplicate_timestamps():
44
+ now = datetime(2025, 3, 25, 14, 30, tzinfo=safe_zoneinfo("UTC"))
45
+ timestamps = [
46
+ now,
47
+ now + timedelta(minutes=15),
48
+ now + timedelta(minutes=15), # duplicate
49
+ now + timedelta(minutes=30),
50
+ ]
51
+ csv_data = {int(dt.timestamp()): [] for dt in timestamps}
52
+ assert determine_interval(csv_data) == 900
53
+
54
+
55
+ def test_determine_interval_missing_values():
56
+ now = datetime(2025, 3, 25, 14, 30, tzinfo=safe_zoneinfo("UTC"))
57
+ csv_data = {
58
+ int((now + timedelta(minutes=15 * i)).timestamp()): []
59
+ for i in [0, 1, 3, 4] # skip index 2 (30-minute gap)
60
+ }
61
+ assert determine_interval(csv_data) == 900
62
+
63
+
64
+ def test_determine_interval_insufficient_data():
65
+ now = datetime(2025, 3, 25, 14, 30, tzinfo=safe_zoneinfo("UTC"))
66
+ csv_data = {int(now.timestamp()): []} # only one row
67
+ with pytest.raises(ValueError):
68
+ determine_interval(csv_data)
@@ -0,0 +1,49 @@
1
+ import pytest
2
+
3
+ from ..utils.expression import eval_expression
4
+
5
+
6
+ @pytest.mark.parametrize(
7
+ "expr,row,header_map,expected",
8
+ [
9
+ (
10
+ "U1_MW+U2_MW",
11
+ ["599.95", "405.54", "50.04", "4.93", "49.89"],
12
+ {"u1_mw": 2, "u2_mw": 4},
13
+ 99.93,
14
+ ),
15
+ ("U1_MW-U2_MW", ["", "", "60.0", "", "15.5"], {"u1_mw": 2, "u2_mw": 4}, 44.5),
16
+ ("U1_MW*U2_MW", ["", "", "5.0", "", "2.0"], {"u1_mw": 2, "u2_mw": 4}, 10.0),
17
+ ("MISSING+U1_MW", ["", "", "50.0"], {"u1_mw": 2}, None),
18
+ ],
19
+ )
20
+ def test_eval_expression(expr, row, header_map, expected):
21
+ result = eval_expression(expr, row, header_map)
22
+ if expected is None:
23
+ assert result is None
24
+ else:
25
+ assert round(result, 2) == pytest.approx(expected, abs=1e-2)
26
+
27
+
28
+ def test_eval_expression_invalid_token():
29
+ row = ["", "", "5.0", "", "2.0"]
30
+ header_map = {"u1_mw": 2, "u2_mw": 4}
31
+ assert eval_expression("U1_MW+@U2_MW", row, header_map) is None
32
+
33
+
34
+ def test_eval_expression_division_by_zero():
35
+ row = ["", "", "5.0", "", "0"]
36
+ header_map = {"u1_mw": 2, "u2_mw": 4}
37
+ assert eval_expression("U1_MW/U2_MW", row, header_map) is None
38
+
39
+
40
+ def test_eval_expression_missing_header_map():
41
+ row = ["", "", "5.0", "", "2.0"]
42
+ header_map = {} # empty
43
+ assert eval_expression("U1_MW+U2_MW", row, header_map) is None
44
+
45
+
46
+ def test_eval_expression_non_numeric():
47
+ row = ["", "", "fifty", "", "two"]
48
+ header_map = {"u1_mw": 2, "u2_mw": 4}
49
+ assert eval_expression("U1_MW+U2_MW", row, header_map) is None
@@ -0,0 +1,43 @@
1
+ import os
2
+
3
+ import pytest
4
+
5
+ from ..utils.fileio import load_csv, read_config
6
+
7
+
8
+ def test_load_csv_valid():
9
+ path = os.path.join(os.path.dirname(__file__), "data", "sample_brok.csv")
10
+ result = load_csv(path)
11
+ assert isinstance(result, list)
12
+ assert isinstance(result[0], list)
13
+ assert len(result[0]) > 1 # header row should have multiple columns
14
+
15
+
16
+ def test_load_csv_nonexistent():
17
+ path = os.path.join(os.path.dirname(__file__), "data", "does_not_exist.csv")
18
+ with pytest.raises(FileNotFoundError):
19
+ load_csv(path)
20
+
21
+
22
+ def test_load_csv_malformed_row(tmp_path):
23
+ malformed = tmp_path / "bad.csv"
24
+ malformed.write_text("Time,Value\n2025-01-01 00:00\n2025-01-01 00:15,42,Extra")
25
+ result = load_csv(str(malformed))
26
+ assert len(result) == 3
27
+ assert result[1] == ["2025-01-01 00:00"]
28
+ assert result[2] == ["2025-01-01 00:15", "42", "Extra"]
29
+
30
+
31
+ def test_read_config_valid():
32
+ path = os.path.join(os.path.dirname(__file__), "data", "sample_config.json")
33
+ config = read_config(path)
34
+ assert isinstance(config, dict)
35
+ assert "input_files" in config
36
+ assert "BROK" in config["input_files"]
37
+
38
+
39
+ def test_read_config_invalid_json(tmp_path):
40
+ bad_json = tmp_path / "bad.json"
41
+ bad_json.write_text("{invalid_json: true,}")
42
+ with pytest.raises(Exception):
43
+ read_config(str(bad_json))
@@ -0,0 +1,5 @@
1
+ from .dateutils import DATE_STRINGS, determine_interval, parse_date, safe_zoneinfo
2
+ from .expression import eval_expression
3
+ from .fileio import load_csv, read_config
4
+ from .logging import logger, setup_logger
5
+ from .terminal import colorize, colorize_count
@@ -0,0 +1,105 @@
1
+ import logging
2
+ from collections import Counter
3
+ from datetime import datetime, timezone
4
+ from typing import List
5
+
6
+ try:
7
+ from zoneinfo import ZoneInfo, ZoneInfoNotFoundError
8
+ except ImportError:
9
+ # Python < 3.9 does not support zoneinfo
10
+ ZoneInfo = None
11
+ ZoneInfoNotFoundError = Exception
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+ DATE_STRINGS = [
16
+ "%m/%d/%Y %H:%M:%S",
17
+ "%m/%d/%Y %H:%M",
18
+ "%m/%d/%Y %H",
19
+ "%Y-%m-%dT%H:%M:%S",
20
+ "%Y-%m-%dT%H:%M",
21
+ "%Y-%m-%dT%H",
22
+ "%Y-%m-%d %H:%M:%S",
23
+ "%Y-%m-%d %H:%M",
24
+ "%Y-%m-%d %H",
25
+ ]
26
+
27
+
28
+ def safe_zoneinfo(key: str):
29
+ """
30
+ Attempts to return ZoneInfo(key); falls back to UTC if unavailable.
31
+ """
32
+ if ZoneInfo is None:
33
+ return timezone.utc # fallback for very old Python
34
+
35
+ try:
36
+ return ZoneInfo(key)
37
+ except ZoneInfoNotFoundError:
38
+ return timezone.utc
39
+
40
+
41
+ def parse_date(date, tz_str="UTC", date_format: str = "") -> datetime:
42
+ """Handle all date types seen in hydropower files
43
+ NOTE: TimeZone naive - assumes all timestamps are in the same timezone
44
+ Args:
45
+ date (str): Date string to parse
46
+ """
47
+ if isinstance(date, int):
48
+ return datetime.fromtimestamp(date, tz=safe_zoneinfo(tz_str))
49
+
50
+ if isinstance(date_format, str):
51
+ # Handle comma-separated list of formats
52
+ if date_format.find(",") >= 0:
53
+ date_format = [fmt.strip() for fmt in date_format.split(",") if fmt.strip()]
54
+ date_format = [date_format]
55
+
56
+ # Include the user-specified date format first, if provided
57
+ for idx, fmt in enumerate(date_format + DATE_STRINGS):
58
+ try:
59
+ if not fmt:
60
+ continue
61
+ dt_naive = datetime.strptime(date, fmt)
62
+ if idx > 0:
63
+ # Only log if using a fallback format
64
+ if not date_format:
65
+ logger.warning(
66
+ f"Using fallback date format '{fmt}' for date '{date}'. No user-specified format was provided."
67
+ )
68
+ else:
69
+ logger.warning(
70
+ f"Using fallback date format '{fmt}' for date '{date}'. The user-specified format is '{date_format}'."
71
+ )
72
+ return dt_naive.replace(tzinfo=safe_zoneinfo(tz_str))
73
+ except ValueError:
74
+ continue
75
+ raise ValueError(f"Invalid date format: {date}")
76
+
77
+
78
+ def determine_interval(csv_data: List[list], sample_size=10) -> int:
79
+ """
80
+ Determine the most common interval (in seconds) between timestamps in the first few rows of CSV data.
81
+ Args:
82
+ `csv_data` is the raw list-of-lists from your CSV (NOT including header).
83
+ `sample_size` is the number of rows to sample from the CSV data.
84
+ Returns:
85
+ [int] The most common interval between timestamps in seconds
86
+
87
+ """
88
+
89
+ timestamps = []
90
+ dates = list(csv_data.keys())
91
+ sample_idx = min(sample_size, len(dates) - 1)
92
+ if sample_idx < 0:
93
+ raise ValueError("No data found in CSV file for the given lookback period.")
94
+ for row in dates[0:sample_idx]:
95
+ try:
96
+ timestamps.append(parse_date(row))
97
+ except Exception as err:
98
+ continue
99
+
100
+ if len(timestamps) < 2:
101
+ raise ValueError("Not enough valid timestamps to determine interval.")
102
+
103
+ diffs = [int((b - a).total_seconds()) for a, b in zip(timestamps, timestamps[1:])]
104
+ most_common = Counter(diffs).most_common(1)[0][0]
105
+ return most_common
@@ -0,0 +1,39 @@
1
+ import re
2
+
3
+
4
+ def eval_expression(expr, row, header_map):
5
+ """
6
+ Evaluate simple math expressions (+, -, *) using values from the row based on column names in the expression.
7
+ """
8
+ tokens = re.findall(
9
+ r'"[^"]+"|\'[^\']+\'|\+|\-|\*|[^\+\-\*]+', expr.replace(" ", "")
10
+ )
11
+ result = None
12
+ for i, token in enumerate(tokens):
13
+ if token in {"+", "-", "*"}:
14
+ continue
15
+
16
+ col_name = token.strip('"').strip("'").lower()
17
+ idx = header_map.get(col_name)
18
+ if idx is None or idx >= len(row):
19
+ # Immediately return to prevent adding None (0) to result
20
+ return None
21
+ else:
22
+ try:
23
+ val = float(row[idx])
24
+ except ValueError:
25
+ val = None
26
+
27
+ if result is None:
28
+ result = val
29
+ else:
30
+ op = tokens[i - 1]
31
+ if val is None or result is None:
32
+ result = None
33
+ elif op == "+":
34
+ result += val
35
+ elif op == "-":
36
+ result -= val
37
+ elif op == "*":
38
+ result *= val
39
+ return result