captest 0.12.1__py2.py3-none-any.whl → 0.13.0__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- captest/__init__.py +1 -0
- captest/_version.py +3 -3
- captest/capdata.py +232 -314
- captest/io.py +65 -23
- captest/plotting.py +492 -0
- captest/util.py +13 -5
- {captest-0.12.1.dist-info → captest-0.13.0.dist-info}/METADATA +1 -1
- captest-0.13.0.dist-info/RECORD +13 -0
- {captest-0.12.1.dist-info → captest-0.13.0.dist-info}/WHEEL +1 -1
- captest-0.12.1.dist-info/RECORD +0 -12
- {captest-0.12.1.dist-info → captest-0.13.0.dist-info}/LICENSE.txt +0 -0
- {captest-0.12.1.dist-info → captest-0.13.0.dist-info}/top_level.txt +0 -0
captest/io.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
# this file is formatted with black
|
|
2
|
+
import copy
|
|
2
3
|
import dateutil
|
|
3
4
|
import datetime
|
|
4
5
|
from pathlib import Path
|
|
@@ -46,7 +47,7 @@ def load_excel_column_groups(path):
|
|
|
46
47
|
dict
|
|
47
48
|
Dictionary mapping column group names to lists of column names.
|
|
48
49
|
"""
|
|
49
|
-
df = pd.read_excel(path, header=None).
|
|
50
|
+
df = pd.read_excel(path, header=None).ffill(axis='index')
|
|
50
51
|
return df.groupby(0)[1].apply(list).to_dict()
|
|
51
52
|
|
|
52
53
|
|
|
@@ -60,6 +61,9 @@ def load_pvsyst(
|
|
|
60
61
|
"""
|
|
61
62
|
Load data from a PVsyst energy production model.
|
|
62
63
|
|
|
64
|
+
Will load day first or month first dates. Expects files that use a comma as a
|
|
65
|
+
separator rather than a semicolon.
|
|
66
|
+
|
|
63
67
|
Parameters
|
|
64
68
|
----------
|
|
65
69
|
path : str
|
|
@@ -72,7 +76,8 @@ def load_pvsyst(
|
|
|
72
76
|
By default sets power to E_Grid, poa to GlobInc, t_amb to T Amb, and w_vel to
|
|
73
77
|
WindVel. Set to False to not set regression columns on load.
|
|
74
78
|
**kwargs
|
|
75
|
-
Use to pass additional kwargs to pandas read_csv.
|
|
79
|
+
Use to pass additional kwargs to pandas read_csv. Pass sep=';' to load files
|
|
80
|
+
that use semicolons instead of commas as the separator.
|
|
76
81
|
|
|
77
82
|
Returns
|
|
78
83
|
-------
|
|
@@ -109,8 +114,33 @@ def load_pvsyst(
|
|
|
109
114
|
break
|
|
110
115
|
|
|
111
116
|
pvraw.columns = pvraw.columns.droplevel(1)
|
|
112
|
-
dates = pvraw.loc[:, "date"]
|
|
113
117
|
try:
|
|
118
|
+
dates = pvraw.loc[:, "date"]
|
|
119
|
+
except KeyError:
|
|
120
|
+
warnings.warn(
|
|
121
|
+
"No 'date' column found in the PVsyst data. This may be due to "
|
|
122
|
+
"the separator being a semicolon ';' rather than a comma ','. "
|
|
123
|
+
"If this is the case, try passing sep=';' when calling load_pvsyst. "
|
|
124
|
+
"Otherwise the date column may actually be missing. Exception:"
|
|
125
|
+
)
|
|
126
|
+
raise
|
|
127
|
+
# PVsyst creates dates like '01/01/90 00:00' i.e. January 1st, 1990.
|
|
128
|
+
# Opening the PVsyst output in excel will likely result in the dates modified to
|
|
129
|
+
# 1/1/1990 0:00. The strftime format specified won't load the excel modified dates
|
|
130
|
+
# so these are caught by checking for consistent length and reformatted
|
|
131
|
+
if not all(dates.str.len() == 14):
|
|
132
|
+
date_parts = dates.str.split(' ').str[0].str.split('/')
|
|
133
|
+
time_parts = dates.str.split(' ').str[1].str.split(':')
|
|
134
|
+
dates = (
|
|
135
|
+
date_parts.str[0].str.zfill(2) + '/' +
|
|
136
|
+
date_parts.str[1].str.zfill(2) + '/' +
|
|
137
|
+
'90 ' +
|
|
138
|
+
time_parts.str[0].str.zfill(2) + ':' +
|
|
139
|
+
time_parts.str[1]
|
|
140
|
+
)
|
|
141
|
+
try:
|
|
142
|
+
# mm/dd/yy hh:mm, lower case y gives
|
|
143
|
+
# Year without century as a zero-padded decimal number. e.g. 00, 01, …, 99
|
|
114
144
|
dt_index = pd.to_datetime(dates, format="%m/%d/%y %H:%M")
|
|
115
145
|
except ValueError:
|
|
116
146
|
warnings.warn(
|
|
@@ -133,7 +163,6 @@ def load_pvsyst(
|
|
|
133
163
|
cd.data["E_Grid"] = cd.data["E_Grid"] / egrid_unit_adj_factor
|
|
134
164
|
cd.data_filtered = cd.data.copy()
|
|
135
165
|
cd.column_groups = cg.group_columns(cd.data)
|
|
136
|
-
cd.trans_keys = list(cd.column_groups.keys())
|
|
137
166
|
if set_regression_columns:
|
|
138
167
|
cd.set_regression_cols(
|
|
139
168
|
power="E_Grid", poa="GlobInc", t_amb="T_Amb", w_vel="WindVel"
|
|
@@ -260,7 +289,6 @@ class DataLoader:
|
|
|
260
289
|
current_file, missing_intervals, freq_str = util.reindex_datetime(
|
|
261
290
|
file,
|
|
262
291
|
report=False,
|
|
263
|
-
add_index_col=True,
|
|
264
292
|
)
|
|
265
293
|
reindexed_dfs[name] = current_file
|
|
266
294
|
file_frequencies.append(freq_str)
|
|
@@ -387,12 +415,18 @@ class DataLoader:
|
|
|
387
415
|
print(err)
|
|
388
416
|
failed_to_load_count += 1
|
|
389
417
|
continue
|
|
390
|
-
(
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
418
|
+
if len(self.loaded_files) == 0:
|
|
419
|
+
warnings.warn(
|
|
420
|
+
"No files were loaded. Check that file_reader is working")
|
|
421
|
+
elif len(self.loaded_files) > 1:
|
|
422
|
+
(
|
|
423
|
+
self.loaded_files,
|
|
424
|
+
self.common_freq,
|
|
425
|
+
self.file_frequencies,
|
|
426
|
+
) = self._reindex_loaded_files()
|
|
427
|
+
data = self._join_files()
|
|
428
|
+
elif len(self.loaded_files) == 1:
|
|
429
|
+
data = list(self.loaded_files.values())[0]
|
|
396
430
|
data.index.name = "Timestamp"
|
|
397
431
|
self.data = data
|
|
398
432
|
else:
|
|
@@ -463,13 +497,13 @@ def load_data(
|
|
|
463
497
|
By default will create a new index for the data using the earliest datetime,
|
|
464
498
|
latest datetime, and the most frequent time interval ensuring there are no
|
|
465
499
|
missing intervals.
|
|
466
|
-
site : dict, default None
|
|
467
|
-
Pass a dictionary containing site data, which
|
|
468
|
-
modeled clear sky ghi and poa values. The clear sky
|
|
469
|
-
added to the data and the column_groups attribute is
|
|
470
|
-
two irradiance columns. The site data dictionary should
|
|
471
|
-
{sys: {system data}, loc: {location data}}. See the capdata.csky
|
|
472
|
-
for the format of the system data and location data.
|
|
500
|
+
site : dict or str, default None
|
|
501
|
+
Pass a dictionary or path to a json or yaml file containing site data, which
|
|
502
|
+
will be used to generate modeled clear sky ghi and poa values. The clear sky
|
|
503
|
+
irradiance values are added to the data and the column_groups attribute is
|
|
504
|
+
updated to include these two irradiance columns. The site data dictionary should
|
|
505
|
+
be {sys: {system data}, loc: {location data}}. See the capdata.csky
|
|
506
|
+
documentation for the format of the system data and location data.
|
|
473
507
|
column_groups_template : bool, default False
|
|
474
508
|
If True, will call `CapData.data_columns_to_excel` to save a file to use to
|
|
475
509
|
manually create column groupings at `path`.
|
|
@@ -508,12 +542,20 @@ def load_data(
|
|
|
508
542
|
elif (p.suffix == '.xlsx') or (p.suffix == '.xls'):
|
|
509
543
|
cd.column_groups = cg.ColumnGroups(load_excel_column_groups(group_columns))
|
|
510
544
|
if site is not None:
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
545
|
+
if isinstance(site, str):
|
|
546
|
+
path_to_site = Path(site)
|
|
547
|
+
if path_to_site.is_file():
|
|
548
|
+
if path_to_site.suffix == ".json":
|
|
549
|
+
site = util.read_json(site)
|
|
550
|
+
if (path_to_site.suffix == ".yaml") or (path_to_site.suffix == ".yml"):
|
|
551
|
+
site = util.read_yaml(site)
|
|
552
|
+
cd.site = copy.deepcopy(site)
|
|
553
|
+
if isinstance(site, dict):
|
|
554
|
+
cd.data = csky(cd.data, loc=site['loc'], sys=site['sys'])
|
|
555
|
+
cd.data_filtered = cd.data.copy()
|
|
556
|
+
cd.column_groups['irr-poa-clear_sky'] = ['poa_mod_csky']
|
|
557
|
+
cd.column_groups['irr-ghi-clear_sky'] = ['ghi_mod_csky']
|
|
515
558
|
cd.trans_keys = list(cd.column_groups.keys())
|
|
516
|
-
cd.set_plot_attributes()
|
|
517
559
|
if column_groups_template:
|
|
518
560
|
cd.data_columns_to_excel()
|
|
519
561
|
return cd
|
captest/plotting.py
ADDED
|
@@ -0,0 +1,492 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
import copy
|
|
3
|
+
import json
|
|
4
|
+
import warnings
|
|
5
|
+
import itertools
|
|
6
|
+
from functools import partial
|
|
7
|
+
import numpy as np
|
|
8
|
+
import pandas as pd
|
|
9
|
+
import panel as pn
|
|
10
|
+
from panel.interact import fixed
|
|
11
|
+
import holoviews as hv
|
|
12
|
+
from holoviews import opts
|
|
13
|
+
import colorcet as cc
|
|
14
|
+
from bokeh.models import NumeralTickFormatter
|
|
15
|
+
|
|
16
|
+
from .util import tags_by_regex, append_tags, read_json
|
|
17
|
+
|
|
18
|
+
# disable error messages for panel dashboard
|
|
19
|
+
pn.config.console_output = 'disable'
|
|
20
|
+
|
|
21
|
+
COMBINE = {
|
|
22
|
+
'poa_ghi': 'irr.*(poa|ghi)$',
|
|
23
|
+
'poa_csky': '(?=.*poa)(?=.*irr)',
|
|
24
|
+
'ghi_csky': '(?=.*ghi)(?=.*irr)',
|
|
25
|
+
'temp_amb_bom': '(?=.*temp)((?=.*amb)|(?=.*bom))',
|
|
26
|
+
'inv_sum_mtr_pwr': ['(?=.*real)(?=.*pwr)(?=.*mtr)', '(?=.*pwr)(?=.*agg)'],
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
DEFAULT_GROUPS = [
|
|
30
|
+
'inv_sum_mtr_pwr',
|
|
31
|
+
'(?=.*real)(?=.*pwr)(?=.*inv)',
|
|
32
|
+
'(?=.*real)(?=.*pwr)(?=.*mtr)',
|
|
33
|
+
'poa_ghi',
|
|
34
|
+
'poa_csky',
|
|
35
|
+
'ghi_csky',
|
|
36
|
+
'temp_amb_bom',
|
|
37
|
+
]
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def find_default_groups(groups, default_groups):
|
|
41
|
+
"""
|
|
42
|
+
Find the default groups in the list of groups.
|
|
43
|
+
|
|
44
|
+
Parameters
|
|
45
|
+
----------
|
|
46
|
+
groups : list of str
|
|
47
|
+
The list of groups to search for the default groups.
|
|
48
|
+
default_groups : list of str
|
|
49
|
+
List of regex strings to use to identify default groups.
|
|
50
|
+
|
|
51
|
+
Returns
|
|
52
|
+
-------
|
|
53
|
+
list of str
|
|
54
|
+
The default groups found in the list of groups.
|
|
55
|
+
"""
|
|
56
|
+
found_groups = []
|
|
57
|
+
for re_str in default_groups:
|
|
58
|
+
found_grp = tags_by_regex(groups, re_str)
|
|
59
|
+
if len(found_grp) == 1:
|
|
60
|
+
found_groups.append(found_grp[0])
|
|
61
|
+
elif len(found_grp) > 1:
|
|
62
|
+
warnings.warn(
|
|
63
|
+
f'More than one group found for regex string {re_str}. '
|
|
64
|
+
'Refine regex string to find only one group. '
|
|
65
|
+
f'Groups found: {found_grp}'
|
|
66
|
+
|
|
67
|
+
)
|
|
68
|
+
return found_groups
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def parse_combine(combine, column_groups=None, data=None, cd=None):
|
|
72
|
+
"""
|
|
73
|
+
Parse regex strings for identifying groups of columns or tags to combine.
|
|
74
|
+
|
|
75
|
+
Parameters
|
|
76
|
+
----------
|
|
77
|
+
combine : dict
|
|
78
|
+
Dictionary of group names and regex strings to use to identify groups from
|
|
79
|
+
column groups and individual tags (columns) to combine into new groups.
|
|
80
|
+
Keys should be strings for names of new groups. Values should be either a string
|
|
81
|
+
or a list of two strings. If a string, the string is used as a regex to identify
|
|
82
|
+
groups to combine. If a list, the first string is used to identify groups to
|
|
83
|
+
combine and the second is used to identify individual tags (columns) to combine.
|
|
84
|
+
column_groups : ColumnGroups, optional
|
|
85
|
+
The column groups object to add new groups to. Required if `cd` is not provided.
|
|
86
|
+
data : pd.DataFrame, optional
|
|
87
|
+
The data to use to identify groups and columns to combine. Required if `cd` is
|
|
88
|
+
not provided.
|
|
89
|
+
cd : captest.CapData, optional
|
|
90
|
+
The captest.CapData object with the `data` and `column_groups` attributes set.
|
|
91
|
+
Required if `columng_groups` and `data` are not provided.
|
|
92
|
+
|
|
93
|
+
Returns
|
|
94
|
+
-------
|
|
95
|
+
ColumnGroups
|
|
96
|
+
New column groups object with new groups added.
|
|
97
|
+
"""
|
|
98
|
+
if cd is not None:
|
|
99
|
+
data = cd.data
|
|
100
|
+
column_groups = cd.column_groups
|
|
101
|
+
cg_out = copy.deepcopy(column_groups)
|
|
102
|
+
orig_groups = list(cg_out.keys())
|
|
103
|
+
|
|
104
|
+
tags = list(data.columns)
|
|
105
|
+
|
|
106
|
+
for grp_name, re_str in combine.items():
|
|
107
|
+
group_re = None
|
|
108
|
+
tag_re = None
|
|
109
|
+
tags_in_matched_groups = []
|
|
110
|
+
matched_tags = []
|
|
111
|
+
if isinstance(re_str, str):
|
|
112
|
+
group_re = re_str
|
|
113
|
+
elif isinstance(re_str, list):
|
|
114
|
+
if len(re_str) != 2:
|
|
115
|
+
warnings.warn(
|
|
116
|
+
'When passing a list of regex. There should be two strings. One for '
|
|
117
|
+
'identifying groups and one for identifying individual tags (columns).'
|
|
118
|
+
)
|
|
119
|
+
return None
|
|
120
|
+
else:
|
|
121
|
+
group_re = re_str[0]
|
|
122
|
+
tag_re = re_str[1]
|
|
123
|
+
if group_re is not None:
|
|
124
|
+
matched_groups = tags_by_regex(orig_groups, group_re)
|
|
125
|
+
if len(matched_groups) >= 1:
|
|
126
|
+
tags_in_matched_groups = list(
|
|
127
|
+
itertools.chain(*[cg_out[grp] for grp in matched_groups])
|
|
128
|
+
)
|
|
129
|
+
if tag_re is not None:
|
|
130
|
+
matched_tags = tags_by_regex(tags, tag_re)
|
|
131
|
+
cg_out[grp_name] = tags_in_matched_groups + matched_tags
|
|
132
|
+
return cg_out
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def msel_from_column_groups(column_groups, groups=True):
|
|
136
|
+
"""
|
|
137
|
+
Create a multi-select widget from a column groups object.
|
|
138
|
+
|
|
139
|
+
Parameters
|
|
140
|
+
----------
|
|
141
|
+
column_groups : ColumnGroups
|
|
142
|
+
The column groups object.
|
|
143
|
+
groups : bool, default True
|
|
144
|
+
By default creates list of groups i.e. the keys of `column_groups`,
|
|
145
|
+
otherwise creates list of individual columns i.e. the values of `column_groups`
|
|
146
|
+
concatenated together.
|
|
147
|
+
"""
|
|
148
|
+
if groups:
|
|
149
|
+
keys = list(column_groups.data.keys())
|
|
150
|
+
keys.sort()
|
|
151
|
+
options = {k: column_groups.data[k] for k in keys}
|
|
152
|
+
name = 'Groups'
|
|
153
|
+
value = column_groups.data[list(column_groups.keys())[0]]
|
|
154
|
+
else:
|
|
155
|
+
options = []
|
|
156
|
+
for columns in column_groups.values():
|
|
157
|
+
options += columns
|
|
158
|
+
options.sort()
|
|
159
|
+
name = 'Columns'
|
|
160
|
+
value = [options[0]]
|
|
161
|
+
return pn.widgets.MultiSelect(
|
|
162
|
+
name=name,
|
|
163
|
+
value=value,
|
|
164
|
+
options=options,
|
|
165
|
+
size=8,
|
|
166
|
+
height=400,
|
|
167
|
+
width=400
|
|
168
|
+
)
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
def plot_tag(data, tag, width=1500, height=250):
|
|
172
|
+
if len(tag) == 1:
|
|
173
|
+
plot = hv.Curve(data[tag])
|
|
174
|
+
elif len(tag) > 1:
|
|
175
|
+
curves = {}
|
|
176
|
+
for column in tag:
|
|
177
|
+
try:
|
|
178
|
+
curves[column] = hv.Curve(data[column])
|
|
179
|
+
except KeyError:
|
|
180
|
+
continue
|
|
181
|
+
plot = hv.NdOverlay(curves)
|
|
182
|
+
elif len(tag) == 0:
|
|
183
|
+
plot = hv.Curve(pd.DataFrame(
|
|
184
|
+
{'no_data': [np.NaN] * data.shape[0]},
|
|
185
|
+
index=data.index
|
|
186
|
+
))
|
|
187
|
+
plot.opts(
|
|
188
|
+
opts.Curve(
|
|
189
|
+
line_width=1,
|
|
190
|
+
width=width,
|
|
191
|
+
height=height,
|
|
192
|
+
muted_alpha=0,
|
|
193
|
+
tools=['hover'],
|
|
194
|
+
yformatter=NumeralTickFormatter(format='0,0'),
|
|
195
|
+
),
|
|
196
|
+
opts.NdOverlay(width=width, height=height, legend_position='right')
|
|
197
|
+
)
|
|
198
|
+
return plot
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
def group_tag_overlay(group_tags, column_tags):
|
|
202
|
+
"""
|
|
203
|
+
Overlay curves of groups and individually selected columns.
|
|
204
|
+
|
|
205
|
+
Parameters
|
|
206
|
+
----------
|
|
207
|
+
group_tags : list of str
|
|
208
|
+
The tags to plot from the groups selected.
|
|
209
|
+
column_tags : list of str
|
|
210
|
+
The tags to plot from the individually selected columns.
|
|
211
|
+
"""
|
|
212
|
+
joined_tags = [t for tag_list in group_tags for t in tag_list] + column_tags
|
|
213
|
+
return joined_tags
|
|
214
|
+
|
|
215
|
+
|
|
216
|
+
def plot_group_tag_overlay(data, group_tags, column_tags, width=1500, height=400):
|
|
217
|
+
"""
|
|
218
|
+
Overlay curves of groups and individually selected columns.
|
|
219
|
+
|
|
220
|
+
Parameters
|
|
221
|
+
----------
|
|
222
|
+
data : pd.DataFrame
|
|
223
|
+
The data to plot.
|
|
224
|
+
group_tags : list of str
|
|
225
|
+
The tags to plot from the groups selected.
|
|
226
|
+
column_tags : list of str
|
|
227
|
+
The tags to plot from the individually selected columns.
|
|
228
|
+
"""
|
|
229
|
+
joined_tags = group_tag_overlay(group_tags, column_tags)
|
|
230
|
+
return plot_tag(data, joined_tags, width=width, height=height)
|
|
231
|
+
|
|
232
|
+
|
|
233
|
+
def plot_tag_groups(data, tags_to_plot, width=1500, height=250):
|
|
234
|
+
"""
|
|
235
|
+
Plot groups of tags, one of overlayed curves per group.
|
|
236
|
+
|
|
237
|
+
Parameters
|
|
238
|
+
----------
|
|
239
|
+
data : pd.DataFrame
|
|
240
|
+
The data to plot.
|
|
241
|
+
tags_to_plot : list
|
|
242
|
+
List of lists of strings. One plot for each inner list.
|
|
243
|
+
"""
|
|
244
|
+
group_plots = []
|
|
245
|
+
if len(tags_to_plot) == 0:
|
|
246
|
+
tags_to_plot = [[]]
|
|
247
|
+
for group in tags_to_plot:
|
|
248
|
+
plot = plot_tag(data, group, width=width, height=height)
|
|
249
|
+
group_plots.append(plot)
|
|
250
|
+
return hv.Layout(group_plots).cols(1)
|
|
251
|
+
|
|
252
|
+
|
|
253
|
+
def filter_list(text_input, ms_to_filter, names, event=None):
|
|
254
|
+
"""
|
|
255
|
+
Filter a multi-select widget by a regex string.
|
|
256
|
+
|
|
257
|
+
Parameters
|
|
258
|
+
----------
|
|
259
|
+
text_input : pn.widgets.TextInput
|
|
260
|
+
The text input widget to get the regex string from.
|
|
261
|
+
ms_to_filter : pn.widgets.MultiSelect
|
|
262
|
+
The multi-select widget to update.
|
|
263
|
+
names : list of str
|
|
264
|
+
The list of names to filter.
|
|
265
|
+
event : pn.widgets.event, optional
|
|
266
|
+
Passed by the `param.watch` method. Not used.
|
|
267
|
+
|
|
268
|
+
Returns
|
|
269
|
+
-------
|
|
270
|
+
None
|
|
271
|
+
"""
|
|
272
|
+
if text_input.value == '':
|
|
273
|
+
re_value = '.*'
|
|
274
|
+
else:
|
|
275
|
+
re_value = text_input.value
|
|
276
|
+
names_ = copy.deepcopy(names)
|
|
277
|
+
if isinstance(names_, dict):
|
|
278
|
+
selected_groups = tags_by_regex(list(names_.keys()), re_value)
|
|
279
|
+
selected_groups.sort()
|
|
280
|
+
options = {k: names_[k] for k in selected_groups}
|
|
281
|
+
else:
|
|
282
|
+
options = tags_by_regex(names_, re_value)
|
|
283
|
+
options.sort()
|
|
284
|
+
ms_to_filter.param.update(options=options)
|
|
285
|
+
|
|
286
|
+
|
|
287
|
+
def scatter_dboard(data, **kwargs):
|
|
288
|
+
"""
|
|
289
|
+
Create a dashboard to plot any two columns of data against each other.
|
|
290
|
+
|
|
291
|
+
Parameters
|
|
292
|
+
----------
|
|
293
|
+
data : pd.DataFrame
|
|
294
|
+
The data to plot.
|
|
295
|
+
**kwargs : optional
|
|
296
|
+
Pass additional keyword arguments to the holoviews options of the scatter plot.
|
|
297
|
+
|
|
298
|
+
Returns
|
|
299
|
+
-------
|
|
300
|
+
pn.Column
|
|
301
|
+
The dashboard with a scatter plot of the data.
|
|
302
|
+
"""
|
|
303
|
+
cols = list(data.columns)
|
|
304
|
+
cols.sort()
|
|
305
|
+
x = pn.widgets.Select(name='x', value=cols[0], options=cols)
|
|
306
|
+
y = pn.widgets.Select(name='y', value=cols[1], options=cols)
|
|
307
|
+
# slope = pn.widgets.Checkbox(name='Slope', value=False)
|
|
308
|
+
|
|
309
|
+
defaults = {
|
|
310
|
+
'width': 500,
|
|
311
|
+
'height': 500,
|
|
312
|
+
'fill_alpha': 0.4,
|
|
313
|
+
'line_alpha': 0,
|
|
314
|
+
'size': 4,
|
|
315
|
+
'yformatter': NumeralTickFormatter(format='0,0'),
|
|
316
|
+
'xformatter': NumeralTickFormatter(format='0,0'),
|
|
317
|
+
}
|
|
318
|
+
for opt, value in defaults.items():
|
|
319
|
+
kwargs.setdefault(opt, value)
|
|
320
|
+
|
|
321
|
+
def scatter(data, x, y, slope=True, **kwargs):
|
|
322
|
+
scatter_plot = hv.Scatter(data, x, y).opts(**kwargs)
|
|
323
|
+
# if slope:
|
|
324
|
+
# slope_line = hv.Slope.from_scatter(scatter_plot).opts(
|
|
325
|
+
# line_color='red',
|
|
326
|
+
# line_width=1,
|
|
327
|
+
# line_alpha=0.4,
|
|
328
|
+
# line_dash=(5,3)
|
|
329
|
+
# )
|
|
330
|
+
# if slope:
|
|
331
|
+
# return scatter_plot * slope_line
|
|
332
|
+
# else:
|
|
333
|
+
return scatter_plot
|
|
334
|
+
|
|
335
|
+
# dboard = pn.Column(
|
|
336
|
+
# pn.Row(x, y, slope),
|
|
337
|
+
# pn.bind(scatter, data, x, y, slope=slope, **kwargs)
|
|
338
|
+
# )
|
|
339
|
+
dboard = pn.Column(
|
|
340
|
+
pn.Row(x, y),
|
|
341
|
+
pn.bind(scatter, data, x, y, **kwargs)
|
|
342
|
+
)
|
|
343
|
+
return dboard
|
|
344
|
+
|
|
345
|
+
|
|
346
|
+
def plot(
|
|
347
|
+
cd=None,
|
|
348
|
+
cg=None,
|
|
349
|
+
data=None,
|
|
350
|
+
combine=COMBINE,
|
|
351
|
+
default_groups=DEFAULT_GROUPS,
|
|
352
|
+
group_width=1500,
|
|
353
|
+
group_height=250,
|
|
354
|
+
**kwargs,
|
|
355
|
+
):
|
|
356
|
+
"""
|
|
357
|
+
Create plotting dashboard.
|
|
358
|
+
|
|
359
|
+
NOTE: If a 'plot_defaults.json' file exists in the same directory as the file this
|
|
360
|
+
function is called from called, then the default groups will be read from that file
|
|
361
|
+
instead of using the `default_groups` argument. Delete or manually edit the file to
|
|
362
|
+
change the default groups. Use the `default_groups` or manually edit the file to
|
|
363
|
+
control the order of the plots.
|
|
364
|
+
|
|
365
|
+
Parameters
|
|
366
|
+
----------
|
|
367
|
+
cd : captest.CapData, optional
|
|
368
|
+
The captest.CapData object.
|
|
369
|
+
cg : captest.ColumnGroups, optional
|
|
370
|
+
The captest.ColumnGroups object. `data` must also be provided.
|
|
371
|
+
data : pd.DataFrame, optional
|
|
372
|
+
The data to plot. `cg` must also be provided.
|
|
373
|
+
combine : dict, optional
|
|
374
|
+
Dictionary of group names and regex strings to use to identify groups from
|
|
375
|
+
column groups and individual tags (columns) to combine into new groups. See the
|
|
376
|
+
`parse_combine` function for more details.
|
|
377
|
+
default_groups : list of str, optional
|
|
378
|
+
List of regex strings to use to identify default groups to plot. See the
|
|
379
|
+
`find_default_groups` function for more details.
|
|
380
|
+
group_width : int, optional
|
|
381
|
+
The width of the plots on the Groups tab.
|
|
382
|
+
group_height : int, optional
|
|
383
|
+
The height of the plots on the Groups tab.
|
|
384
|
+
**kwargs : optional
|
|
385
|
+
Pass additional keyword arguments to the holoviews options of the scatter plot
|
|
386
|
+
on the 'Scatter' tab.
|
|
387
|
+
"""
|
|
388
|
+
if cd is not None:
|
|
389
|
+
data = cd.data
|
|
390
|
+
cg = cd.column_groups
|
|
391
|
+
# make sure data is numeric
|
|
392
|
+
data = data.apply(pd.to_numeric, errors='coerce')
|
|
393
|
+
bool_columns = data.select_dtypes(include='bool').columns
|
|
394
|
+
data.loc[:, bool_columns] = data.loc[:, bool_columns].astype(int)
|
|
395
|
+
# setup custom plot for 'Custom' tab
|
|
396
|
+
groups = msel_from_column_groups(cg)
|
|
397
|
+
tags = msel_from_column_groups({'all_tags': list(data.columns)}, groups=False)
|
|
398
|
+
columns_re_input = pn.widgets.TextInput(name='Input regex to filter columns list')
|
|
399
|
+
groups_re_input = pn.widgets.TextInput(name='Input regex to filter groups list')
|
|
400
|
+
|
|
401
|
+
columns_re_input.param.watch(
|
|
402
|
+
partial(filter_list, columns_re_input, tags, tags.options),
|
|
403
|
+
'value'
|
|
404
|
+
)
|
|
405
|
+
groups_re_input.param.watch(
|
|
406
|
+
partial(filter_list, groups_re_input, groups, groups.options),
|
|
407
|
+
'value'
|
|
408
|
+
)
|
|
409
|
+
|
|
410
|
+
custom_plot_name = pn.widgets.TextInput()
|
|
411
|
+
update = pn.widgets.Button(name='Update')
|
|
412
|
+
width_custom = pn.widgets.IntInput(
|
|
413
|
+
name='Plot Width', value=1500, start=200, end=2800, step=100, width=200
|
|
414
|
+
)
|
|
415
|
+
height_custom = pn.widgets.IntInput(
|
|
416
|
+
name='Plot height', value=400, start=150, end=800, step=50, width=200
|
|
417
|
+
)
|
|
418
|
+
custom_plot = pn.Column(
|
|
419
|
+
pn.Row(custom_plot_name, update, width_custom, height_custom),
|
|
420
|
+
pn.Row(
|
|
421
|
+
pn.WidgetBox(groups_re_input, groups),
|
|
422
|
+
pn.WidgetBox(columns_re_input, tags),
|
|
423
|
+
),
|
|
424
|
+
pn.Row(pn.bind(
|
|
425
|
+
plot_group_tag_overlay,
|
|
426
|
+
data,
|
|
427
|
+
groups,
|
|
428
|
+
tags,
|
|
429
|
+
width=width_custom,
|
|
430
|
+
height=height_custom,
|
|
431
|
+
))
|
|
432
|
+
)
|
|
433
|
+
|
|
434
|
+
# setup group plotter for 'Main' tab
|
|
435
|
+
cg_layout = parse_combine(combine, column_groups=cg, data=data)
|
|
436
|
+
main_ms = msel_from_column_groups(cg_layout)
|
|
437
|
+
|
|
438
|
+
def add_custom_plot_group(event):
|
|
439
|
+
column_groups_ = copy.deepcopy(main_ms.options)
|
|
440
|
+
column_groups_ = add_custom_plot(
|
|
441
|
+
custom_plot_name.value,
|
|
442
|
+
column_groups_,
|
|
443
|
+
groups.value,
|
|
444
|
+
tags.value,
|
|
445
|
+
)
|
|
446
|
+
main_ms.options = column_groups_
|
|
447
|
+
update.on_click(add_custom_plot_group)
|
|
448
|
+
plots_to_layout = pn.widgets.Button(name='Set plots to current layout')
|
|
449
|
+
width_main = pn.widgets.IntInput(
|
|
450
|
+
name='Plot Width', value=1500, start=200, end=2800, step=100, width=200
|
|
451
|
+
)
|
|
452
|
+
height_main = pn.widgets.IntInput(
|
|
453
|
+
name='Plot height', value=250, start=150, end=800, step=50, width=200
|
|
454
|
+
)
|
|
455
|
+
main_plot = pn.Column(
|
|
456
|
+
pn.Row(pn.WidgetBox(plots_to_layout, main_ms, pn.Row(width_main, height_main))),
|
|
457
|
+
pn.Row(pn.bind(
|
|
458
|
+
plot_tag_groups, data, main_ms, width=width_main, height=height_main
|
|
459
|
+
)),
|
|
460
|
+
)
|
|
461
|
+
|
|
462
|
+
def set_defaults(event):
|
|
463
|
+
with open('./plot_defaults.json', 'w') as file:
|
|
464
|
+
json.dump(main_ms.value, file)
|
|
465
|
+
plots_to_layout.on_click(set_defaults)
|
|
466
|
+
|
|
467
|
+
# setup default groups
|
|
468
|
+
if Path('./plot_defaults.json').exists():
|
|
469
|
+
default_tags = read_json('./plot_defaults.json')
|
|
470
|
+
else:
|
|
471
|
+
default_groups = find_default_groups(list(cg_layout.keys()), default_groups)
|
|
472
|
+
default_tags = [cg_layout.get(grp, []) for grp in default_groups]
|
|
473
|
+
|
|
474
|
+
# layout dashboard
|
|
475
|
+
plotter = pn.Tabs(
|
|
476
|
+
('Groups', plot_tag_groups(data, default_tags, width=group_width, height=group_height)),
|
|
477
|
+
('Layout', main_plot),
|
|
478
|
+
('Overlay', custom_plot),
|
|
479
|
+
('Scatter', scatter_dboard(data, **kwargs)),
|
|
480
|
+
)
|
|
481
|
+
return plotter
|
|
482
|
+
|
|
483
|
+
|
|
484
|
+
def add_custom_plot(name, column_groups, group_tags, column_tags):
|
|
485
|
+
"""
|
|
486
|
+
Append a new custom group to column groups for plotting.
|
|
487
|
+
|
|
488
|
+
Parameters
|
|
489
|
+
----------
|
|
490
|
+
"""
|
|
491
|
+
column_groups[name] = group_tag_overlay(group_tags, column_tags)
|
|
492
|
+
return column_groups
|
captest/util.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import re
|
|
1
2
|
import json
|
|
2
3
|
import yaml
|
|
3
4
|
import numpy as np
|
|
@@ -61,7 +62,7 @@ def get_common_timestep(data, units='m', string_output=True):
|
|
|
61
62
|
else:
|
|
62
63
|
return freq
|
|
63
64
|
|
|
64
|
-
def reindex_datetime(data, report=False
|
|
65
|
+
def reindex_datetime(data, report=False):
|
|
65
66
|
"""
|
|
66
67
|
Find dataframe index frequency and reindex to add any missing intervals.
|
|
67
68
|
|
|
@@ -86,10 +87,6 @@ def reindex_datetime(data, report=False, add_index_col=True):
|
|
|
86
87
|
df_index_length = df.shape[0]
|
|
87
88
|
missing_intervals = df_index_length - data_index_length
|
|
88
89
|
|
|
89
|
-
if add_index_col:
|
|
90
|
-
ix_ser = df.index.to_series()
|
|
91
|
-
df['index'] = ix_ser.apply(lambda x: x.strftime('%m/%d/%Y %H %M'))
|
|
92
|
-
|
|
93
90
|
if report:
|
|
94
91
|
print('Frequency determined to be ' + freq_str + ' minutes.')
|
|
95
92
|
print('{:,} intervals added to index.'.format(missing_intervals))
|
|
@@ -136,3 +133,14 @@ def generate_irr_distribution(
|
|
|
136
133
|
else:
|
|
137
134
|
irr_values.append(next_val)
|
|
138
135
|
return irr_values
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
def tags_by_regex(tag_list, regex_str):
|
|
139
|
+
regex = re.compile(regex_str, re.IGNORECASE)
|
|
140
|
+
return [tag for tag in tag_list if regex.search(tag) is not None]
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def append_tags(sel_tags, tags, regex_str):
|
|
144
|
+
new_list = sel_tags.copy()
|
|
145
|
+
new_list.extend(tags_by_regex(tags, regex_str))
|
|
146
|
+
return new_list
|