emod-api 3.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- emod_api/__init__.py +1 -0
- emod_api/campaign.py +170 -0
- emod_api/channelreports/__init__.py +0 -0
- emod_api/channelreports/channels.py +433 -0
- emod_api/channelreports/icj_to_csv.py +65 -0
- emod_api/channelreports/plot_icj_means.py +149 -0
- emod_api/channelreports/plot_prop_report.py +205 -0
- emod_api/channelreports/utils.py +326 -0
- emod_api/config/__init__.py +0 -0
- emod_api/config/default_from_schema.py +16 -0
- emod_api/config/default_from_schema_no_validation.py +177 -0
- emod_api/config/from_overrides.py +135 -0
- emod_api/demographics/__init__.py +0 -0
- emod_api/demographics/age_distribution.py +163 -0
- emod_api/demographics/base_input_file.py +28 -0
- emod_api/demographics/calculators.py +159 -0
- emod_api/demographics/demographic_exceptions.py +54 -0
- emod_api/demographics/demographics.py +249 -0
- emod_api/demographics/demographics_base.py +752 -0
- emod_api/demographics/demographics_overlay.py +41 -0
- emod_api/demographics/fertility_distribution.py +235 -0
- emod_api/demographics/implicit_functions.py +112 -0
- emod_api/demographics/mortality_distribution.py +227 -0
- emod_api/demographics/node.py +456 -0
- emod_api/demographics/overlay_node.py +16 -0
- emod_api/demographics/properties_and_attributes.py +737 -0
- emod_api/demographics/service/__init__.py +0 -0
- emod_api/demographics/service/grid_construction.py +143 -0
- emod_api/demographics/service/service.py +55 -0
- emod_api/demographics/susceptibility_distribution.py +170 -0
- emod_api/demographics/updateable.py +58 -0
- emod_api/legacy/__init__.py +0 -0
- emod_api/legacy/plotAllCharts.py +230 -0
- emod_api/migration/__init__.py +0 -0
- emod_api/migration/__main__.py +22 -0
- emod_api/migration/migration.py +782 -0
- emod_api/multidim_plotter.py +80 -0
- emod_api/schema_to_class.py +440 -0
- emod_api/serialization/__init__.py +0 -0
- emod_api/serialization/census_and_mod_pop.py +48 -0
- emod_api/serialization/dtk_file_support.py +61 -0
- emod_api/serialization/dtk_file_tools.py +1378 -0
- emod_api/serialization/dtk_file_utility.py +141 -0
- emod_api/serialization/serialized_population.py +205 -0
- emod_api/spatialreports/__init__.py +0 -0
- emod_api/spatialreports/__main__.py +67 -0
- emod_api/spatialreports/plot_spat_means.py +99 -0
- emod_api/spatialreports/spatial.py +210 -0
- emod_api/utils/__init__.py +26 -0
- emod_api/utils/distributions/__init__.py +0 -0
- emod_api/utils/distributions/base_distribution.py +38 -0
- emod_api/utils/distributions/bimodal_distribution.py +64 -0
- emod_api/utils/distributions/constant_distribution.py +58 -0
- emod_api/utils/distributions/demographic_distribution_flag.py +16 -0
- emod_api/utils/distributions/distribution_type.py +15 -0
- emod_api/utils/distributions/dual_constant_distribution.py +68 -0
- emod_api/utils/distributions/dual_exponential_distribution.py +75 -0
- emod_api/utils/distributions/exponential_distribution.py +63 -0
- emod_api/utils/distributions/gaussian_distribution.py +69 -0
- emod_api/utils/distributions/log_normal_distribution.py +61 -0
- emod_api/utils/distributions/poisson_distribution.py +59 -0
- emod_api/utils/distributions/uniform_distribution.py +70 -0
- emod_api/utils/distributions/weibull_distribution.py +69 -0
- emod_api/utils/str_enum.py +6 -0
- emod_api/weather/__init__.py +0 -0
- emod_api/weather/weather.py +428 -0
- emod_api-3.0.2.dist-info/METADATA +131 -0
- emod_api-3.0.2.dist-info/RECORD +71 -0
- emod_api-3.0.2.dist-info/WHEEL +5 -0
- emod_api-3.0.2.dist-info/licenses/LICENSE +21 -0
- emod_api-3.0.2.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import numpy as np
|
|
3
|
+
import matplotlib.pyplot as plt
|
|
4
|
+
import os
|
|
5
|
+
import sqlite3
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def collect(exp_id: str,
|
|
9
|
+
chan: str = "Infected",
|
|
10
|
+
tag: str = None,
|
|
11
|
+
smoothing: bool = True) -> dict:
|
|
12
|
+
"""
|
|
13
|
+
Collect all the time series data for a given channel for a given experiment from InsetChart.json
|
|
14
|
+
files in local subdirectory that have been downoaded from COMPS, assuming following structure.
|
|
15
|
+
|
|
16
|
+
exp_id/
|
|
17
|
+
sim_id/
|
|
18
|
+
InsetChart.json
|
|
19
|
+
|
|
20
|
+
Args:
|
|
21
|
+
exp_id: Experiment Id that has had data downloaded to current working diretory.
|
|
22
|
+
chan: Channel name
|
|
23
|
+
tag: key=value. Using results.db (sqlite3, from emodpy), limit results to just where key=value.
|
|
24
|
+
If value is set to SWEEP, find all values for key and plot all values separately (but with mean/spread from other tags).
|
|
25
|
+
|
|
26
|
+
Returns:
|
|
27
|
+
Array of channel data for further processing.
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
chan_data = {}
|
|
31
|
+
groupby_values = {}
|
|
32
|
+
if tag:
|
|
33
|
+
if len(tag.split("=")) == 1:
|
|
34
|
+
raise ValueError("When passing tag, has to have key=value format.")
|
|
35
|
+
|
|
36
|
+
groupby_key = tag.split("=")[0]
|
|
37
|
+
groupby_value = tag.split("=")[1]
|
|
38
|
+
db = os.path.join("latest_experiment", "results.db")
|
|
39
|
+
con = sqlite3.connect(db)
|
|
40
|
+
cur = con.cursor()
|
|
41
|
+
if groupby_value == "SWEEP":
|
|
42
|
+
query = f"SELECT sim_id, {groupby_key} FROM results"
|
|
43
|
+
all_results = cur.execute(query)
|
|
44
|
+
for result in all_results:
|
|
45
|
+
sim_id = result[0]
|
|
46
|
+
groupby_value = result[1]
|
|
47
|
+
if groupby_value not in groupby_values:
|
|
48
|
+
groupby_values[groupby_value] = list()
|
|
49
|
+
groupby_values[groupby_value].append(sim_id)
|
|
50
|
+
else: # select only sim_id's where gb key == value
|
|
51
|
+
query = f"SELECT sim_id FROM results where {groupby_key} = {groupby_value}"
|
|
52
|
+
all_results = cur.execute(query)
|
|
53
|
+
groupby_values["ref"] = list()
|
|
54
|
+
for result in all_results:
|
|
55
|
+
sim_id = result[0]
|
|
56
|
+
groupby_values["ref"].append(sim_id)
|
|
57
|
+
else:
|
|
58
|
+
groupby_values["ref"] = os.listdir(exp_id)
|
|
59
|
+
groupby_values["ref"].remove("results.db")
|
|
60
|
+
|
|
61
|
+
def moving_average(x, w=7):
|
|
62
|
+
return np.convolve(x, np.ones(w), 'valid') / w
|
|
63
|
+
|
|
64
|
+
max_len = 0
|
|
65
|
+
# poi = param of interest
|
|
66
|
+
for value in groupby_values:
|
|
67
|
+
simdirs = groupby_values[value]
|
|
68
|
+
for sim in simdirs:
|
|
69
|
+
thedir = os.path.join(exp_id, sim)
|
|
70
|
+
|
|
71
|
+
if value not in chan_data:
|
|
72
|
+
chan_data[value] = []
|
|
73
|
+
if not os.path.exists(thedir + "/InsetChart.json"):
|
|
74
|
+
continue
|
|
75
|
+
with open(thedir + "/InsetChart.json") as fp:
|
|
76
|
+
icj = json.loads(fp.read())
|
|
77
|
+
if chan not in icj["Channels"]:
|
|
78
|
+
raise ValueError(f"Can't find channel {chan} in file. Did find {icj['Channels'].keys()}.")
|
|
79
|
+
new_data = np.asarray(icj["Channels"][chan]["Data"])
|
|
80
|
+
if smoothing:
|
|
81
|
+
new_data = moving_average(new_data)
|
|
82
|
+
chan_data[value].append(new_data)
|
|
83
|
+
if len(new_data) > max_len:
|
|
84
|
+
max_len = len(new_data)
|
|
85
|
+
if max_len == 0:
|
|
86
|
+
raise ValueError(f"No InsetChart.json files with channel data for {chan} and experiment {exp_id}.")
|
|
87
|
+
"""
|
|
88
|
+
If users run simulations that end when prevalence is zero, the length of the time series can vary
|
|
89
|
+
We need to get them all the same to calc the mean.
|
|
90
|
+
"""
|
|
91
|
+
data_for_plotting = {}
|
|
92
|
+
for poi in chan_data:
|
|
93
|
+
data_for_plotting[poi] = []
|
|
94
|
+
for data in chan_data[poi]:
|
|
95
|
+
if len(data) < max_len:
|
|
96
|
+
data = np.pad(data, (0, max_len - len(data)))
|
|
97
|
+
data_for_plotting[poi].append(data)
|
|
98
|
+
|
|
99
|
+
return data_for_plotting
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def display(chan_data, save=False, chan_name="Infected", exp_id=None):
|
|
103
|
+
"""
|
|
104
|
+
Plot mean and std dev of the array/list of time series-es in chan_data.
|
|
105
|
+
"""
|
|
106
|
+
mean_chan_data = None
|
|
107
|
+
spread_chan_data = None
|
|
108
|
+
fig, ax = plt.subplots(1)
|
|
109
|
+
if exp_id:
|
|
110
|
+
plt.title(exp_id, loc="center")
|
|
111
|
+
for poi_chan_data in sorted(chan_data):
|
|
112
|
+
prev_list = chan_data[poi_chan_data]
|
|
113
|
+
if len(prev_list) == 0:
|
|
114
|
+
raise ValueError("Input channel data array seems to have no data.")
|
|
115
|
+
mean_chan_data = np.mean(np.array(prev_list), axis=0)
|
|
116
|
+
if len(chan_data) == 1 and save:
|
|
117
|
+
ref_json = {"Channels": {"Channel": {"Data": []}}}
|
|
118
|
+
ref_json["Channels"]["Channel"]["Data"] = list(mean_chan_data)
|
|
119
|
+
with open("mean_ref.json", "w") as fp:
|
|
120
|
+
json.dump(ref_json, fp, indent=4)
|
|
121
|
+
spread_chan_data = np.std(np.array(prev_list), axis=0)
|
|
122
|
+
|
|
123
|
+
t = np.arange(len(mean_chan_data))
|
|
124
|
+
ax.plot(t, mean_chan_data, label=poi_chan_data)
|
|
125
|
+
plt.xlim(0, len(mean_chan_data))
|
|
126
|
+
ax.fill_between(t, mean_chan_data + spread_chan_data, mean_chan_data - spread_chan_data, facecolor='yellow', alpha=0.5)
|
|
127
|
+
ax.set_xlabel("Simulation Time")
|
|
128
|
+
ax.set_ylabel(chan_name)
|
|
129
|
+
plt.legend()
|
|
130
|
+
plt.show()
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
if __name__ == "__main__":
|
|
134
|
+
import argparse
|
|
135
|
+
parser = argparse.ArgumentParser(description="Mean 'InsetChart' Report Plotting")
|
|
136
|
+
parser.add_argument('-c', '--channel', action='store', default="Infected", help='channel(s) to display [Infected]')
|
|
137
|
+
parser.add_argument('-e', '--experiment_id', action='store', default=None, help='experiment id to plot, data assumed to be local')
|
|
138
|
+
parser.add_argument('-t', '--tag', action='store', default=None, help='key=value tag constraint')
|
|
139
|
+
args = parser.parse_args()
|
|
140
|
+
if not args.experiment_id:
|
|
141
|
+
with open("COMPS_ID") as fp:
|
|
142
|
+
args.experiment_id = fp.read()
|
|
143
|
+
|
|
144
|
+
# check that folder with name experiment_id exists
|
|
145
|
+
if not os.path.exists(str(args.experiment_id)):
|
|
146
|
+
raise ValueError(f"Don't see folder for {args.experiment_id}.")
|
|
147
|
+
|
|
148
|
+
chan_data = collect(args.experiment_id, args.channel, args.tag)
|
|
149
|
+
display(chan_data, False, args.channel, args.experiment_id)
|
|
@@ -0,0 +1,205 @@
|
|
|
1
|
+
#! /usr/bin/env python3
|
|
2
|
+
|
|
3
|
+
"""Command line utility for plotting property reports."""
|
|
4
|
+
|
|
5
|
+
import argparse
|
|
6
|
+
from functools import reduce
|
|
7
|
+
import json
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
|
|
10
|
+
import matplotlib.pyplot as plt
|
|
11
|
+
import numpy as np
|
|
12
|
+
|
|
13
|
+
from emod_api.channelreports.utils import read_json_file, get_report_channels, accumulate_channel_data, save_to_csv, plot_traces
|
|
14
|
+
from emod_api.channelreports.utils import _validate_property_report_channels, _validate_property_report_ips
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def main(args: argparse.Namespace):
|
|
18
|
+
|
|
19
|
+
"""
|
|
20
|
+
Plot specified property report with the given options.
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
json_data = read_json_file(args.filename)
|
|
24
|
+
channel_data = get_report_channels(json_data)
|
|
25
|
+
channel_keys = sorted(channel_data)
|
|
26
|
+
|
|
27
|
+
if args.verbose:
|
|
28
|
+
print("Channels:Pools-")
|
|
29
|
+
print(json.dumps(channel_keys, indent=4))
|
|
30
|
+
|
|
31
|
+
if args.list:
|
|
32
|
+
list_channels_and_ips(channel_keys)
|
|
33
|
+
return
|
|
34
|
+
|
|
35
|
+
_validate_property_report_channels(args.channels, channel_data)
|
|
36
|
+
_validate_property_report_ips(args.groupby, channel_data)
|
|
37
|
+
|
|
38
|
+
if args.normalize and ("Statistical Population" not in args.channels):
|
|
39
|
+
args.channels.append("Statistical Population")
|
|
40
|
+
|
|
41
|
+
trace_values = accumulate_channel_data(args.channels, args.verbose, args.groupby, channel_data)
|
|
42
|
+
|
|
43
|
+
if args.csv is None:
|
|
44
|
+
call_plot_traces(args, trace_values)
|
|
45
|
+
else:
|
|
46
|
+
save_to_csv(trace_values, args.csv, args.transpose)
|
|
47
|
+
|
|
48
|
+
return
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def list_channels_and_ips(channel_keys: list[str]) -> None:
|
|
52
|
+
|
|
53
|
+
"""
|
|
54
|
+
List the channels and properties found in a property report from the
|
|
55
|
+
CHANNEL:IP:value,...,IP:value keys of the channel dictionary.
|
|
56
|
+
"""
|
|
57
|
+
|
|
58
|
+
# keys look like "CHANNEL:IP:value,...,IP:value"
|
|
59
|
+
channels = sorted(set([key.split(":", 1)[0] for key in channel_keys]))
|
|
60
|
+
|
|
61
|
+
print("\nChannels:")
|
|
62
|
+
for channel in channels:
|
|
63
|
+
print(f"\t{channel}")
|
|
64
|
+
|
|
65
|
+
# Each channel _should_ have the same set of IPs, but we'll check them all
|
|
66
|
+
csvkvps = [key.split(":", 1)[1] for key in channel_keys] # For each channel get a comma separated list of IP:value pairs (see format above)
|
|
67
|
+
kvplists = [csv.split(",") for csv in csvkvps] # For each CSV convert to actual list by splitting on ","
|
|
68
|
+
ips = [map(lambda t: t.split(":")[0], kvps) for kvps in kvplists] # Convert each IP:value entry to just IP
|
|
69
|
+
properties = sorted(reduce(lambda s, e: s.union(e), ips, set())) # Add all IPs to an initially empty set
|
|
70
|
+
|
|
71
|
+
print("\nIPs:")
|
|
72
|
+
for prop in properties:
|
|
73
|
+
print(f"\t{prop}")
|
|
74
|
+
|
|
75
|
+
print()
|
|
76
|
+
|
|
77
|
+
return
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def call_plot_traces(args: argparse.Namespace,
|
|
81
|
+
trace_values: dict[str, np.ndarray]) -> None:
|
|
82
|
+
|
|
83
|
+
"""
|
|
84
|
+
Call the internal `plot_traces` function and, optionally, save the results to disk.
|
|
85
|
+
"""
|
|
86
|
+
|
|
87
|
+
if args.verbose:
|
|
88
|
+
print(sorted(trace_values))
|
|
89
|
+
|
|
90
|
+
if args.normalize:
|
|
91
|
+
stat_pop = "Statistical Population"
|
|
92
|
+
traces = {key: value for (key, value) in trace_values.items() if not key.startswith(stat_pop)}
|
|
93
|
+
# reduce the various statistical population traces to a single vector
|
|
94
|
+
norms = reduce(lambda x, y: np.array(y) + x, [value for (key, value) in trace_values.items() if key.startswith(stat_pop)], 0)
|
|
95
|
+
else:
|
|
96
|
+
traces = trace_values
|
|
97
|
+
norms = None
|
|
98
|
+
|
|
99
|
+
figure = plot_traces(traces, norms, args.overlay, args.channels, args.filename, args.legend)
|
|
100
|
+
|
|
101
|
+
if args.saveFigure:
|
|
102
|
+
print("Saving figure 'propertyReport.png'...")
|
|
103
|
+
figure.savefig('propertyReport.png')
|
|
104
|
+
|
|
105
|
+
plt.show()
|
|
106
|
+
|
|
107
|
+
return
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def prop_report_json_to_csv(output_path: str,
|
|
111
|
+
channel_name: str = "Infected",
|
|
112
|
+
groupby: str = "Geographic"):
|
|
113
|
+
"""
|
|
114
|
+
Converts selected channel of PropertyReportXXX.json into a CSV file, rolled up into a single property.
|
|
115
|
+
|
|
116
|
+
Args:
|
|
117
|
+
output_path: Subdirectory in which to find a file called PropertyReportXXX.json.
|
|
118
|
+
XXX can be blank or a disease named like 'TB'.
|
|
119
|
+
channel_name: Name of the channel to process from the property report.
|
|
120
|
+
Defaults to "Infected".
|
|
121
|
+
groupby: Property to group by. Defaults to "Geographic".
|
|
122
|
+
|
|
123
|
+
Returns:
|
|
124
|
+
|
|
125
|
+
Raises:
|
|
126
|
+
ValueError: If no PropertyReportXXX.json file is found in the directory.
|
|
127
|
+
"""
|
|
128
|
+
|
|
129
|
+
def find_file_starting_with(directory, prefix):
|
|
130
|
+
path = Path(directory)
|
|
131
|
+
for file in path.iterdir():
|
|
132
|
+
if file.name.startswith(prefix) and file.is_file() and file.name.endswith("json"):
|
|
133
|
+
return str(file)
|
|
134
|
+
return None
|
|
135
|
+
|
|
136
|
+
prop_report_path = find_file_starting_with(output_path, "PropertyReport")
|
|
137
|
+
if not prop_report_path:
|
|
138
|
+
raise ValueError(f"No json file starting with 'PropertyReport' found in '{output_path}'.")
|
|
139
|
+
|
|
140
|
+
# This class probably exists somewhere else. Maybe we can move it to a common utils.py file or getit from elsewhere.
|
|
141
|
+
class DynamicObject:
|
|
142
|
+
def __init__(self):
|
|
143
|
+
object.__setattr__(self, 'members', {})
|
|
144
|
+
|
|
145
|
+
def __setattr__(self, name, value):
|
|
146
|
+
self.members[name] = value
|
|
147
|
+
|
|
148
|
+
def __getattr__(self, name):
|
|
149
|
+
return self.members.get(name)
|
|
150
|
+
|
|
151
|
+
faux_args = DynamicObject()
|
|
152
|
+
faux_args.filename = prop_report_path
|
|
153
|
+
csv_out_name = "prop_report_" + channel_name.replace(' ', '_').lower() + ".csv"
|
|
154
|
+
faux_args.csv = csv_out_name
|
|
155
|
+
faux_args.channels = [channel_name]
|
|
156
|
+
faux_args.channels.append("Statistical Population")
|
|
157
|
+
faux_args.normalize = True
|
|
158
|
+
faux_args.groupby = [groupby]
|
|
159
|
+
main(faux_args)
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
def process_cmd_line() -> argparse.Namespace:
|
|
163
|
+
|
|
164
|
+
"""
|
|
165
|
+
Put command line processing here rather than in `if 'name' == '__main__'`.
|
|
166
|
+
"""
|
|
167
|
+
|
|
168
|
+
parser = argparse.ArgumentParser(description='Property Report Plotting')
|
|
169
|
+
parser.add_argument('filename', nargs='?', default='PropertyReport.json', help='property report filename [PropertyReport.json]')
|
|
170
|
+
parser.add_argument('-c', '--channel', action='append', help='channel(s) to display [Infected]', metavar='channelName', dest='channels')
|
|
171
|
+
parser.add_argument('-g', '--groupby', action='append', help="IP(s) under which to aggregate other IP keys and values")
|
|
172
|
+
parser.add_argument('-n', '--normalize', help='plot channel(s) normalized by statistical population', action='store_true')
|
|
173
|
+
parser.add_argument('-o', '--overlay', help='overlay pools of the same channel', action='store_true')
|
|
174
|
+
parser.add_argument('-s', '--save', help="save figure to file 'propertyReport.png'", action='store_true', dest='saveFigure')
|
|
175
|
+
parser.add_argument('-v', '--verbose', action="store_true")
|
|
176
|
+
parser.add_argument('--no-legend', action="store_false", dest="legend") # Note args.legend default to True, passing --no-legend sets args.legend to False
|
|
177
|
+
parser.add_argument('-l', '--list', action="store_true", help="List channels and IP keys found in the report. No plotting is performed with this option.")
|
|
178
|
+
parser.add_argument("--csv", type=Path, default=None, help="Write data for selected channel(s) to given file.")
|
|
179
|
+
parser.add_argument("-t", "--transpose", action="store_true", help="write channels as columns rather than rows (only in effect with '--csv' option)")
|
|
180
|
+
|
|
181
|
+
args = parser.parse_args()
|
|
182
|
+
|
|
183
|
+
if not args.channels:
|
|
184
|
+
args.channels = ['Infected']
|
|
185
|
+
|
|
186
|
+
if args.groupby is not None and len(args.groupby) == 1 and args.groupby[0].lower() == "all":
|
|
187
|
+
args.groupby = []
|
|
188
|
+
|
|
189
|
+
if not args.list:
|
|
190
|
+
print(f"Filename: '{args.filename}'")
|
|
191
|
+
print(f"Channel(s): {args.channels}")
|
|
192
|
+
print(f"Groupby: {args.groupby}")
|
|
193
|
+
print(f"Normalize: {args.normalize}")
|
|
194
|
+
print(f"Overlay: {args.overlay}")
|
|
195
|
+
print(f"Save: {args.saveFigure}")
|
|
196
|
+
if args.csv:
|
|
197
|
+
print(f"CSV filename: '{args.csv}'")
|
|
198
|
+
print(f"Transpose CSV: {args.transpose}")
|
|
199
|
+
|
|
200
|
+
return args
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
if __name__ == '__main__':
|
|
204
|
+
|
|
205
|
+
main(process_cmd_line())
|
|
@@ -0,0 +1,326 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Helper functions, primarily for property reports, which are channel reports.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Union, Optional
|
|
8
|
+
|
|
9
|
+
import matplotlib.pyplot as plt
|
|
10
|
+
import numpy as np
|
|
11
|
+
|
|
12
|
+
from emod_api.channelreports.channels import ChannelReport
|
|
13
|
+
|
|
14
|
+
__all__ = [
|
|
15
|
+
"property_report_to_csv",
|
|
16
|
+
"read_json_file",
|
|
17
|
+
"get_report_channels",
|
|
18
|
+
"_validate_property_report_channels",
|
|
19
|
+
"_validate_property_report_ips",
|
|
20
|
+
"accumulate_channel_data",
|
|
21
|
+
"__get_trace_name",
|
|
22
|
+
"save_to_csv",
|
|
23
|
+
"plot_traces",
|
|
24
|
+
"__index_for",
|
|
25
|
+
"__title_for"]
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def property_report_to_csv(source_file: Union[str, Path],
|
|
29
|
+
csv_file: Union[str, Path],
|
|
30
|
+
channels: Optional[list[str]] = None,
|
|
31
|
+
groupby: Optional[list[str]] = None,
|
|
32
|
+
transpose: bool = False) -> None:
|
|
33
|
+
|
|
34
|
+
"""
|
|
35
|
+
Write a property report to a CSV formatted file.
|
|
36
|
+
|
|
37
|
+
Optionally selected a subset of available channels.
|
|
38
|
+
Optionally "rolling-up" IP:value sub-channels into a "parent" IP.
|
|
39
|
+
|
|
40
|
+
Args:
|
|
41
|
+
source_file: filename of property report
|
|
42
|
+
channels: list of channels to output, None results in writing _all_ channels to output
|
|
43
|
+
groupby: list of IPs into which to aggregate remaining IPs, None indicates no grouping, [] indicates _all_ aggregated
|
|
44
|
+
csv_file: filename of CSV formatted result
|
|
45
|
+
transpose: write channels as columns rather than rows
|
|
46
|
+
"""
|
|
47
|
+
|
|
48
|
+
json_data = read_json_file(Path(source_file))
|
|
49
|
+
channel_data = get_report_channels(json_data)
|
|
50
|
+
|
|
51
|
+
if channels is None:
|
|
52
|
+
channels = sorted({key.split(":")[0] for key in channel_data})
|
|
53
|
+
elif isinstance(channels, str):
|
|
54
|
+
channels = [channels]
|
|
55
|
+
|
|
56
|
+
if isinstance(groupby, str):
|
|
57
|
+
groupby = [groupby]
|
|
58
|
+
|
|
59
|
+
_validate_property_report_channels(channels, channel_data)
|
|
60
|
+
_validate_property_report_ips(groupby, channel_data)
|
|
61
|
+
|
|
62
|
+
trace_values = accumulate_channel_data(channels, False, groupby, channel_data)
|
|
63
|
+
|
|
64
|
+
save_to_csv(trace_values, csv_file, transpose)
|
|
65
|
+
|
|
66
|
+
return
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def read_json_file(filename: Union[str, Path]) -> dict:
|
|
70
|
+
|
|
71
|
+
with Path(filename).open("r", encoding="utf-8") as file:
|
|
72
|
+
json_data = json.load(file)
|
|
73
|
+
|
|
74
|
+
return json_data
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def get_report_channels(json_data: dict) -> dict:
|
|
78
|
+
|
|
79
|
+
try:
|
|
80
|
+
channel_data = json_data['Channels']
|
|
81
|
+
except KeyError as exc:
|
|
82
|
+
raise KeyError("Didn't find 'Channels' in JSON data.") from exc
|
|
83
|
+
|
|
84
|
+
return channel_data
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def _validate_property_report_channels(channels, channel_data) -> None:
|
|
88
|
+
|
|
89
|
+
if channels:
|
|
90
|
+
keys = set(map(lambda name: name.split(":", 1)[0], channel_data))
|
|
91
|
+
not_found = [name for name in channels if name not in keys]
|
|
92
|
+
if not_found:
|
|
93
|
+
print("Valid channel names:")
|
|
94
|
+
print("\n".join(keys))
|
|
95
|
+
raise ValueError(f"Specified channel(s) - {not_found} - is/are not valid channel names.")
|
|
96
|
+
|
|
97
|
+
return
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def _validate_property_report_ips(groupby, channel_data) -> None:
|
|
101
|
+
|
|
102
|
+
if groupby:
|
|
103
|
+
first = next(iter(channel_data))
|
|
104
|
+
ip_string = first.split(":", 1)[1]
|
|
105
|
+
ips = [kvp.split(":")[0] for kvp in ip_string.split(",")]
|
|
106
|
+
not_found = [ip for ip in groupby if ip not in ips]
|
|
107
|
+
if not_found:
|
|
108
|
+
print("Valid IPs:")
|
|
109
|
+
print("\n".join(ips))
|
|
110
|
+
raise ValueError(f"Specified groupby IP(s) - {not_found} - is/are not valid IP names.")
|
|
111
|
+
|
|
112
|
+
return
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def accumulate_channel_data(channels: list[str], verbose: bool, groupby: list[str], channel_data: dict) -> dict[str, np.ndarray]:
|
|
116
|
+
|
|
117
|
+
"""
|
|
118
|
+
Extract selected channel(s) from property report data.
|
|
119
|
+
|
|
120
|
+
Aggregate on groupby IP(s), if provided, otherwise on channel per unique
|
|
121
|
+
IP:value pair (e.g., "QualityOfCare:High"), per main channel (e.g., "Infected").
|
|
122
|
+
|
|
123
|
+
Args:
|
|
124
|
+
channels: names of channels to plot
|
|
125
|
+
verbose: output some "debugging"/progress information if true
|
|
126
|
+
groupby: IP(s) under which to aggregate other IP:value pairs
|
|
127
|
+
channel_data: data for channels keyed on channel name
|
|
128
|
+
|
|
129
|
+
Returns:
|
|
130
|
+
tuple of dictionary of aggregated data, keyed on channel name, and of Numpy array of normalization values
|
|
131
|
+
"""
|
|
132
|
+
|
|
133
|
+
trace_values = {}
|
|
134
|
+
pool_keys = sorted(channel_data)
|
|
135
|
+
|
|
136
|
+
name_ip_pairs = map(lambda key: tuple(key.split(":", 1)), pool_keys)
|
|
137
|
+
name_ip_pairs_to_process = filter(lambda p: p[0] in channels, name_ip_pairs)
|
|
138
|
+
for (channel_title, key_value_pairs) in name_ip_pairs_to_process:
|
|
139
|
+
|
|
140
|
+
if verbose:
|
|
141
|
+
print(f"Processing channel '{channel_title}:{key_value_pairs}'")
|
|
142
|
+
|
|
143
|
+
key_value_pairs = key_value_pairs.split(',')
|
|
144
|
+
trace_name = __get_trace_name(channel_title, key_value_pairs, groupby)
|
|
145
|
+
trace_data = np.array(channel_data[f"{channel_title}:{','.join(key_value_pairs)}"]['Data'], dtype=np.float32)
|
|
146
|
+
|
|
147
|
+
if trace_name not in trace_values:
|
|
148
|
+
if verbose:
|
|
149
|
+
print(f"New trace: '{trace_name}'")
|
|
150
|
+
trace_values[trace_name] = trace_data
|
|
151
|
+
else:
|
|
152
|
+
if verbose:
|
|
153
|
+
print(f"Add to trace: '{trace_name}'")
|
|
154
|
+
trace_values[trace_name] += trace_data
|
|
155
|
+
|
|
156
|
+
return trace_values
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
def __get_trace_name(channel_title: str, key_value_pairs: list[str], groupby: list[str]) -> str:
|
|
160
|
+
|
|
161
|
+
"""
|
|
162
|
+
Return "canonical" trace name for a given channel, IP:value list, and groupby list.
|
|
163
|
+
|
|
164
|
+
Since we may be aggregating by IP values, trace name may not equal any particular channel name.
|
|
165
|
+
|
|
166
|
+
Example:
|
|
167
|
+
title = "Infected"
|
|
168
|
+
key_value_pairs = ["Age_Bin:Age_Bin_Property_From_0_To_20","QualityOfCare:High","QualityOfCare1:High","QualityOfCare2:High"]
|
|
169
|
+
|
|
170
|
+
groupby = None
|
|
171
|
+
return "Infected:Age_Bin:Age_Bin_Property_From_0_To_20,QualityOfCare:High,QualityOfCare1:High,QualityOfCare2:High"
|
|
172
|
+
|
|
173
|
+
groupby = ["Age_Bin"]
|
|
174
|
+
return = "Infected:Age_Bin:Age_Bin_Property_From_0_To_20"
|
|
175
|
+
|
|
176
|
+
groupby = ["Age_Bin", "QualityOfCare"]
|
|
177
|
+
return = "Infected:Age_Bin:Age_Bin_Property_From_0_To_20,QualityOfCare:High"
|
|
178
|
+
|
|
179
|
+
groupby = []
|
|
180
|
+
return = "Infected"
|
|
181
|
+
"""
|
|
182
|
+
|
|
183
|
+
# trace name will have channel title and any property:value pairs
|
|
184
|
+
# which aren't being grouped
|
|
185
|
+
|
|
186
|
+
trace_name = channel_title + ':'
|
|
187
|
+
|
|
188
|
+
if groupby is None:
|
|
189
|
+
trace_name = f"{channel_title}:{','.join(key_value_pairs)}"
|
|
190
|
+
else:
|
|
191
|
+
if len(groupby) > 0:
|
|
192
|
+
kvps = filter(lambda pair: pair.split(":")[0] in groupby, key_value_pairs)
|
|
193
|
+
trace_name = f"{channel_title}:{','.join(kvps)}"
|
|
194
|
+
else:
|
|
195
|
+
trace_name = channel_title
|
|
196
|
+
|
|
197
|
+
return trace_name
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
def save_to_csv(trace_values: dict[str, np.ndarray],
|
|
201
|
+
filename: Union[str, Path],
|
|
202
|
+
transpose: bool = False) -> None:
|
|
203
|
+
|
|
204
|
+
"""
|
|
205
|
+
Save property report to CSV. Uses underlying ChannelReport.to_csv() function.
|
|
206
|
+
|
|
207
|
+
Args:
|
|
208
|
+
trace_values: full set of available channels, keyed on channel name
|
|
209
|
+
filename: destination file for CSV data
|
|
210
|
+
transpose: write channels as columns rather than rows
|
|
211
|
+
"""
|
|
212
|
+
|
|
213
|
+
report = ChannelReport()
|
|
214
|
+
|
|
215
|
+
for channel, data in trace_values.items():
|
|
216
|
+
report.channels[channel] = data
|
|
217
|
+
|
|
218
|
+
report.to_csv(Path(filename), transpose=transpose) # by default, use _all_ the channels we just added
|
|
219
|
+
|
|
220
|
+
return
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
def plot_traces(trace_values: dict[str, np.ndarray],
|
|
224
|
+
norm_values: Union[int, np.ndarray, None],
|
|
225
|
+
overlay: bool,
|
|
226
|
+
channels: list[str],
|
|
227
|
+
title: str,
|
|
228
|
+
legend: bool) -> plt.Figure:
|
|
229
|
+
|
|
230
|
+
"""
|
|
231
|
+
Plot trace data. One subplot per channel unless overlaying all variations of rolled-up IP(s) is requested.
|
|
232
|
+
|
|
233
|
+
A trace (like old-time pen and ink EKG) may represent the aggregation of
|
|
234
|
+
several IP values so trace may not equal any particular channel data.
|
|
235
|
+
|
|
236
|
+
Args:
|
|
237
|
+
trace_values: channel data, keyed on channel name
|
|
238
|
+
norm_values: normalization data for channels
|
|
239
|
+
overlay: whether or not to overlay all variations of a given channel on one subplot
|
|
240
|
+
channels: selection of channel names to plot
|
|
241
|
+
title: plot title
|
|
242
|
+
legend: whether or not to include a legend on plots
|
|
243
|
+
|
|
244
|
+
Returns:
|
|
245
|
+
plt.Figure
|
|
246
|
+
"""
|
|
247
|
+
|
|
248
|
+
if len(trace_values) == 0:
|
|
249
|
+
print("Didn't find requested channel(s) in property report.")
|
|
250
|
+
return
|
|
251
|
+
|
|
252
|
+
if not overlay:
|
|
253
|
+
plot_count = len(trace_values)
|
|
254
|
+
else:
|
|
255
|
+
plot_count = len(channels)
|
|
256
|
+
|
|
257
|
+
normalize = norm_values is not None
|
|
258
|
+
if normalize:
|
|
259
|
+
plot_count *= 2
|
|
260
|
+
|
|
261
|
+
figure = plt.figure(title, figsize=(16, 9), dpi=300)
|
|
262
|
+
trace_keys = sorted(trace_values)
|
|
263
|
+
|
|
264
|
+
# plotting here
|
|
265
|
+
for trace_name in trace_keys:
|
|
266
|
+
plot_index = __index_for(trace_name, channels, trace_keys, normalize, overlay)
|
|
267
|
+
plt.subplot(plot_count, 1, plot_index)
|
|
268
|
+
plt.plot(trace_values[trace_name], label=trace_name)
|
|
269
|
+
if normalize:
|
|
270
|
+
plt.subplot(plot_count, 1, plot_index + 1)
|
|
271
|
+
plt.ylim((0.0, 1.0)) # yes, this takes a tuple
|
|
272
|
+
plt.plot(trace_values[trace_name] / norm_values, label=trace_name)
|
|
273
|
+
|
|
274
|
+
# make it pretty
|
|
275
|
+
_ = plt.subplot(plot_count, 1, 1)
|
|
276
|
+
for trace_name in trace_keys:
|
|
277
|
+
plot_index = __index_for(trace_name, channels, trace_keys, normalize, overlay)
|
|
278
|
+
plot_title = __title_for(trace_name, channels, overlay)
|
|
279
|
+
plt.subplot(plot_count, 1, plot_index)
|
|
280
|
+
plt.title(plot_title)
|
|
281
|
+
if legend:
|
|
282
|
+
plt.legend()
|
|
283
|
+
if normalize:
|
|
284
|
+
plt.subplot(plot_count, 1, plot_index + 1)
|
|
285
|
+
plt.title(f"{plot_title} normalized by 'Statistical Population'")
|
|
286
|
+
if legend:
|
|
287
|
+
plt.legend()
|
|
288
|
+
|
|
289
|
+
plt.tight_layout()
|
|
290
|
+
|
|
291
|
+
return figure
|
|
292
|
+
|
|
293
|
+
|
|
294
|
+
def __index_for(trace_name: str, channels: list[str], trace_keys: list[str], normalize: bool, overlay: bool) -> int:
|
|
295
|
+
|
|
296
|
+
if overlay:
|
|
297
|
+
# all pools of the same channel overlaid
|
|
298
|
+
index = 0
|
|
299
|
+
for channel in channels:
|
|
300
|
+
if channel in trace_name:
|
|
301
|
+
break
|
|
302
|
+
index += 1
|
|
303
|
+
else:
|
|
304
|
+
# each trace separate
|
|
305
|
+
index = trace_keys.index(trace_name)
|
|
306
|
+
|
|
307
|
+
# if we're normalizing, there's a normalized trace per regular trace
|
|
308
|
+
if normalize:
|
|
309
|
+
index *= 2
|
|
310
|
+
|
|
311
|
+
# matplotlib is 1-based (like MATLAB)
|
|
312
|
+
return index + 1
|
|
313
|
+
|
|
314
|
+
|
|
315
|
+
def __title_for(trace_name: str, channels: list[str], overlay: bool):
|
|
316
|
+
|
|
317
|
+
# use channel name
|
|
318
|
+
if overlay:
|
|
319
|
+
for channel in channels:
|
|
320
|
+
if channel in trace_name:
|
|
321
|
+
title = channel
|
|
322
|
+
break
|
|
323
|
+
else:
|
|
324
|
+
title = trace_name
|
|
325
|
+
|
|
326
|
+
return title
|
|
File without changes
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
#!/usr/bin/python
|
|
2
|
+
|
|
3
|
+
from emod_api.config import default_from_schema_no_validation as old
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def write_default_from_schema(path_to_schema):
|
|
7
|
+
"""
|
|
8
|
+
This module is deprecated. Please use default_from_schema_no_validation.
|
|
9
|
+
"""
|
|
10
|
+
print("This module is deprecated. Please use default_from_schema_no_validation.")
|
|
11
|
+
return old.write_default_from_schema(path_to_schema)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
if __name__ == "__main__":
|
|
15
|
+
print("This module is deprecated. Please use default_from_schema_no_validation.")
|
|
16
|
+
old._do_main()
|