sierra-research 1.3.6__py3-none-any.whl → 1.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sierra/__init__.py +3 -3
- sierra/core/__init__.py +3 -3
- sierra/core/batchroot.py +223 -0
- sierra/core/cmdline.py +681 -1057
- sierra/core/compare.py +11 -0
- sierra/core/config.py +96 -88
- sierra/core/engine.py +306 -0
- sierra/core/execenv.py +380 -0
- sierra/core/expdef.py +11 -0
- sierra/core/experiment/__init__.py +1 -0
- sierra/core/experiment/bindings.py +150 -101
- sierra/core/experiment/definition.py +414 -245
- sierra/core/experiment/spec.py +83 -85
- sierra/core/exproot.py +44 -0
- sierra/core/generators/__init__.py +10 -0
- sierra/core/generators/experiment.py +528 -0
- sierra/core/generators/generator_factory.py +138 -137
- sierra/core/graphs/__init__.py +23 -0
- sierra/core/graphs/bcbridge.py +94 -0
- sierra/core/graphs/heatmap.py +245 -324
- sierra/core/graphs/pathset.py +27 -0
- sierra/core/graphs/schema.py +77 -0
- sierra/core/graphs/stacked_line.py +341 -0
- sierra/core/graphs/summary_line.py +506 -0
- sierra/core/logging.py +3 -2
- sierra/core/models/__init__.py +3 -1
- sierra/core/models/info.py +19 -0
- sierra/core/models/interface.py +52 -122
- sierra/core/pipeline/__init__.py +2 -5
- sierra/core/pipeline/pipeline.py +228 -126
- sierra/core/pipeline/stage1/__init__.py +10 -0
- sierra/core/pipeline/stage1/pipeline_stage1.py +45 -31
- sierra/core/pipeline/stage2/__init__.py +10 -0
- sierra/core/pipeline/stage2/pipeline_stage2.py +8 -11
- sierra/core/pipeline/stage2/runner.py +401 -0
- sierra/core/pipeline/stage3/__init__.py +12 -0
- sierra/core/pipeline/stage3/gather.py +321 -0
- sierra/core/pipeline/stage3/pipeline_stage3.py +37 -84
- sierra/core/pipeline/stage4/__init__.py +12 -2
- sierra/core/pipeline/stage4/pipeline_stage4.py +36 -354
- sierra/core/pipeline/stage5/__init__.py +12 -0
- sierra/core/pipeline/stage5/pipeline_stage5.py +33 -208
- sierra/core/pipeline/yaml.py +48 -0
- sierra/core/plugin.py +529 -62
- sierra/core/proc.py +11 -0
- sierra/core/prod.py +11 -0
- sierra/core/ros1/__init__.py +5 -1
- sierra/core/ros1/callbacks.py +22 -21
- sierra/core/ros1/cmdline.py +59 -88
- sierra/core/ros1/generators.py +159 -175
- sierra/core/ros1/variables/__init__.py +3 -0
- sierra/core/ros1/variables/exp_setup.py +122 -116
- sierra/core/startup.py +106 -76
- sierra/core/stat_kernels.py +4 -5
- sierra/core/storage.py +13 -32
- sierra/core/trampoline.py +30 -0
- sierra/core/types.py +116 -71
- sierra/core/utils.py +103 -106
- sierra/core/variables/__init__.py +1 -1
- sierra/core/variables/base_variable.py +12 -17
- sierra/core/variables/batch_criteria.py +387 -481
- sierra/core/variables/builtin.py +135 -0
- sierra/core/variables/exp_setup.py +19 -39
- sierra/core/variables/population_size.py +72 -76
- sierra/core/variables/variable_density.py +44 -68
- sierra/core/vector.py +1 -1
- sierra/main.py +256 -88
- sierra/plugins/__init__.py +119 -0
- sierra/plugins/compare/__init__.py +14 -0
- sierra/plugins/compare/graphs/__init__.py +19 -0
- sierra/plugins/compare/graphs/cmdline.py +120 -0
- sierra/plugins/compare/graphs/comparator.py +291 -0
- sierra/plugins/compare/graphs/inter_controller.py +531 -0
- sierra/plugins/compare/graphs/inter_scenario.py +297 -0
- sierra/plugins/compare/graphs/namecalc.py +53 -0
- sierra/plugins/compare/graphs/outputroot.py +73 -0
- sierra/plugins/compare/graphs/plugin.py +147 -0
- sierra/plugins/compare/graphs/preprocess.py +172 -0
- sierra/plugins/compare/graphs/schema.py +37 -0
- sierra/plugins/engine/__init__.py +14 -0
- sierra/plugins/engine/argos/__init__.py +18 -0
- sierra/plugins/{platform → engine}/argos/cmdline.py +144 -151
- sierra/plugins/{platform/argos/variables → engine/argos/generators}/__init__.py +5 -0
- sierra/plugins/engine/argos/generators/engine.py +394 -0
- sierra/plugins/engine/argos/plugin.py +393 -0
- sierra/plugins/{platform/argos/generators → engine/argos/variables}/__init__.py +5 -0
- sierra/plugins/engine/argos/variables/arena_shape.py +183 -0
- sierra/plugins/engine/argos/variables/cameras.py +240 -0
- sierra/plugins/engine/argos/variables/constant_density.py +112 -0
- sierra/plugins/engine/argos/variables/exp_setup.py +82 -0
- sierra/plugins/{platform → engine}/argos/variables/physics_engines.py +83 -87
- sierra/plugins/engine/argos/variables/population_constant_density.py +178 -0
- sierra/plugins/engine/argos/variables/population_size.py +115 -0
- sierra/plugins/engine/argos/variables/population_variable_density.py +123 -0
- sierra/plugins/engine/argos/variables/rendering.py +108 -0
- sierra/plugins/engine/ros1gazebo/__init__.py +18 -0
- sierra/plugins/engine/ros1gazebo/cmdline.py +175 -0
- sierra/plugins/{platform/ros1robot → engine/ros1gazebo}/generators/__init__.py +5 -0
- sierra/plugins/engine/ros1gazebo/generators/engine.py +125 -0
- sierra/plugins/engine/ros1gazebo/plugin.py +404 -0
- sierra/plugins/engine/ros1gazebo/variables/__init__.py +15 -0
- sierra/plugins/engine/ros1gazebo/variables/population_size.py +214 -0
- sierra/plugins/engine/ros1robot/__init__.py +18 -0
- sierra/plugins/engine/ros1robot/cmdline.py +159 -0
- sierra/plugins/{platform/ros1gazebo → engine/ros1robot}/generators/__init__.py +4 -0
- sierra/plugins/engine/ros1robot/generators/engine.py +95 -0
- sierra/plugins/engine/ros1robot/plugin.py +410 -0
- sierra/plugins/{hpc/local → engine/ros1robot/variables}/__init__.py +5 -0
- sierra/plugins/engine/ros1robot/variables/population_size.py +146 -0
- sierra/plugins/execenv/__init__.py +11 -0
- sierra/plugins/execenv/hpc/__init__.py +18 -0
- sierra/plugins/execenv/hpc/adhoc/__init__.py +18 -0
- sierra/plugins/execenv/hpc/adhoc/cmdline.py +30 -0
- sierra/plugins/execenv/hpc/adhoc/plugin.py +131 -0
- sierra/plugins/execenv/hpc/cmdline.py +137 -0
- sierra/plugins/execenv/hpc/local/__init__.py +18 -0
- sierra/plugins/execenv/hpc/local/cmdline.py +31 -0
- sierra/plugins/execenv/hpc/local/plugin.py +145 -0
- sierra/plugins/execenv/hpc/pbs/__init__.py +18 -0
- sierra/plugins/execenv/hpc/pbs/cmdline.py +30 -0
- sierra/plugins/execenv/hpc/pbs/plugin.py +121 -0
- sierra/plugins/execenv/hpc/slurm/__init__.py +18 -0
- sierra/plugins/execenv/hpc/slurm/cmdline.py +30 -0
- sierra/plugins/execenv/hpc/slurm/plugin.py +133 -0
- sierra/plugins/execenv/prefectserver/__init__.py +18 -0
- sierra/plugins/execenv/prefectserver/cmdline.py +66 -0
- sierra/plugins/execenv/prefectserver/dockerremote/__init__.py +18 -0
- sierra/plugins/execenv/prefectserver/dockerremote/cmdline.py +66 -0
- sierra/plugins/execenv/prefectserver/dockerremote/plugin.py +132 -0
- sierra/plugins/execenv/prefectserver/flow.py +66 -0
- sierra/plugins/execenv/prefectserver/local/__init__.py +18 -0
- sierra/plugins/execenv/prefectserver/local/cmdline.py +29 -0
- sierra/plugins/execenv/prefectserver/local/plugin.py +133 -0
- sierra/plugins/{hpc/adhoc → execenv/robot}/__init__.py +1 -0
- sierra/plugins/execenv/robot/turtlebot3/__init__.py +18 -0
- sierra/plugins/execenv/robot/turtlebot3/plugin.py +204 -0
- sierra/plugins/expdef/__init__.py +14 -0
- sierra/plugins/expdef/json/__init__.py +14 -0
- sierra/plugins/expdef/json/plugin.py +504 -0
- sierra/plugins/expdef/xml/__init__.py +14 -0
- sierra/plugins/expdef/xml/plugin.py +386 -0
- sierra/{core/hpc → plugins/proc}/__init__.py +1 -1
- sierra/plugins/proc/collate/__init__.py +15 -0
- sierra/plugins/proc/collate/cmdline.py +47 -0
- sierra/plugins/proc/collate/plugin.py +271 -0
- sierra/plugins/proc/compress/__init__.py +18 -0
- sierra/plugins/proc/compress/cmdline.py +47 -0
- sierra/plugins/proc/compress/plugin.py +123 -0
- sierra/plugins/proc/decompress/__init__.py +18 -0
- sierra/plugins/proc/decompress/plugin.py +96 -0
- sierra/plugins/proc/imagize/__init__.py +15 -0
- sierra/plugins/proc/imagize/cmdline.py +49 -0
- sierra/plugins/proc/imagize/plugin.py +270 -0
- sierra/plugins/proc/modelrunner/__init__.py +16 -0
- sierra/plugins/proc/modelrunner/plugin.py +250 -0
- sierra/plugins/proc/statistics/__init__.py +15 -0
- sierra/plugins/proc/statistics/cmdline.py +64 -0
- sierra/plugins/proc/statistics/plugin.py +390 -0
- sierra/plugins/{hpc → prod}/__init__.py +1 -0
- sierra/plugins/prod/graphs/__init__.py +18 -0
- sierra/plugins/prod/graphs/cmdline.py +269 -0
- sierra/plugins/prod/graphs/collate.py +279 -0
- sierra/plugins/prod/graphs/inter/__init__.py +13 -0
- sierra/plugins/prod/graphs/inter/generate.py +83 -0
- sierra/plugins/prod/graphs/inter/heatmap.py +86 -0
- sierra/plugins/prod/graphs/inter/line.py +134 -0
- sierra/plugins/prod/graphs/intra/__init__.py +15 -0
- sierra/plugins/prod/graphs/intra/generate.py +202 -0
- sierra/plugins/prod/graphs/intra/heatmap.py +74 -0
- sierra/plugins/prod/graphs/intra/line.py +114 -0
- sierra/plugins/prod/graphs/plugin.py +103 -0
- sierra/plugins/prod/graphs/targets.py +63 -0
- sierra/plugins/prod/render/__init__.py +18 -0
- sierra/plugins/prod/render/cmdline.py +72 -0
- sierra/plugins/prod/render/plugin.py +282 -0
- sierra/plugins/storage/__init__.py +5 -0
- sierra/plugins/storage/arrow/__init__.py +18 -0
- sierra/plugins/storage/arrow/plugin.py +38 -0
- sierra/plugins/storage/csv/__init__.py +9 -0
- sierra/plugins/storage/csv/plugin.py +12 -5
- sierra/version.py +3 -2
- sierra_research-1.5.0.dist-info/METADATA +238 -0
- sierra_research-1.5.0.dist-info/RECORD +186 -0
- {sierra_research-1.3.6.dist-info → sierra_research-1.5.0.dist-info}/WHEEL +1 -2
- sierra/core/experiment/xml.py +0 -454
- sierra/core/generators/controller_generator_parser.py +0 -34
- sierra/core/generators/exp_creator.py +0 -351
- sierra/core/generators/exp_generators.py +0 -142
- sierra/core/graphs/scatterplot2D.py +0 -109
- sierra/core/graphs/stacked_line_graph.py +0 -249
- sierra/core/graphs/stacked_surface_graph.py +0 -220
- sierra/core/graphs/summary_line_graph.py +0 -369
- sierra/core/hpc/cmdline.py +0 -142
- sierra/core/models/graphs.py +0 -87
- sierra/core/pipeline/stage2/exp_runner.py +0 -286
- sierra/core/pipeline/stage3/imagizer.py +0 -149
- sierra/core/pipeline/stage3/run_collator.py +0 -317
- sierra/core/pipeline/stage3/statistics_calculator.py +0 -478
- sierra/core/pipeline/stage4/graph_collator.py +0 -319
- sierra/core/pipeline/stage4/inter_exp_graph_generator.py +0 -240
- sierra/core/pipeline/stage4/intra_exp_graph_generator.py +0 -317
- sierra/core/pipeline/stage4/model_runner.py +0 -168
- sierra/core/pipeline/stage4/rendering.py +0 -283
- sierra/core/pipeline/stage4/yaml_config_loader.py +0 -103
- sierra/core/pipeline/stage5/inter_scenario_comparator.py +0 -328
- sierra/core/pipeline/stage5/intra_scenario_comparator.py +0 -989
- sierra/core/platform.py +0 -493
- sierra/core/plugin_manager.py +0 -369
- sierra/core/root_dirpath_generator.py +0 -241
- sierra/plugins/hpc/adhoc/plugin.py +0 -125
- sierra/plugins/hpc/local/plugin.py +0 -81
- sierra/plugins/hpc/pbs/__init__.py +0 -9
- sierra/plugins/hpc/pbs/plugin.py +0 -126
- sierra/plugins/hpc/slurm/__init__.py +0 -9
- sierra/plugins/hpc/slurm/plugin.py +0 -130
- sierra/plugins/platform/__init__.py +0 -9
- sierra/plugins/platform/argos/__init__.py +0 -9
- sierra/plugins/platform/argos/generators/platform_generators.py +0 -383
- sierra/plugins/platform/argos/plugin.py +0 -337
- sierra/plugins/platform/argos/variables/arena_shape.py +0 -145
- sierra/plugins/platform/argos/variables/cameras.py +0 -243
- sierra/plugins/platform/argos/variables/constant_density.py +0 -136
- sierra/plugins/platform/argos/variables/exp_setup.py +0 -113
- sierra/plugins/platform/argos/variables/population_constant_density.py +0 -175
- sierra/plugins/platform/argos/variables/population_size.py +0 -102
- sierra/plugins/platform/argos/variables/population_variable_density.py +0 -132
- sierra/plugins/platform/argos/variables/rendering.py +0 -104
- sierra/plugins/platform/ros1gazebo/__init__.py +0 -9
- sierra/plugins/platform/ros1gazebo/cmdline.py +0 -213
- sierra/plugins/platform/ros1gazebo/generators/platform_generators.py +0 -137
- sierra/plugins/platform/ros1gazebo/plugin.py +0 -335
- sierra/plugins/platform/ros1gazebo/variables/__init__.py +0 -10
- sierra/plugins/platform/ros1gazebo/variables/population_size.py +0 -204
- sierra/plugins/platform/ros1robot/__init__.py +0 -9
- sierra/plugins/platform/ros1robot/cmdline.py +0 -175
- sierra/plugins/platform/ros1robot/generators/platform_generators.py +0 -112
- sierra/plugins/platform/ros1robot/plugin.py +0 -373
- sierra/plugins/platform/ros1robot/variables/__init__.py +0 -10
- sierra/plugins/platform/ros1robot/variables/population_size.py +0 -146
- sierra/plugins/robot/__init__.py +0 -9
- sierra/plugins/robot/turtlebot3/__init__.py +0 -9
- sierra/plugins/robot/turtlebot3/plugin.py +0 -194
- sierra_research-1.3.6.data/data/share/man/man1/sierra-cli.1 +0 -2349
- sierra_research-1.3.6.data/data/share/man/man7/sierra-examples.7 +0 -488
- sierra_research-1.3.6.data/data/share/man/man7/sierra-exec-envs.7 +0 -331
- sierra_research-1.3.6.data/data/share/man/man7/sierra-glossary.7 +0 -285
- sierra_research-1.3.6.data/data/share/man/man7/sierra-platforms.7 +0 -358
- sierra_research-1.3.6.data/data/share/man/man7/sierra-usage.7 +0 -725
- sierra_research-1.3.6.data/data/share/man/man7/sierra.7 +0 -78
- sierra_research-1.3.6.dist-info/METADATA +0 -500
- sierra_research-1.3.6.dist-info/RECORD +0 -133
- sierra_research-1.3.6.dist-info/top_level.txt +0 -1
- {sierra_research-1.3.6.dist-info → sierra_research-1.5.0.dist-info}/entry_points.txt +0 -0
- {sierra_research-1.3.6.dist-info → sierra_research-1.5.0.dist-info/licenses}/LICENSE +0 -0
@@ -0,0 +1,390 @@
|
|
1
|
+
# Copyright 2019 John Harwell, All rights reserved.
|
2
|
+
#
|
3
|
+
# SPDX-License-Identifier: MIT
|
4
|
+
|
5
|
+
"""
|
6
|
+
Classes for generating statistics within and across experiments in a batch.
|
7
|
+
"""
|
8
|
+
|
9
|
+
# Core packages
|
10
|
+
import multiprocessing as mp
|
11
|
+
import typing as tp
|
12
|
+
import queue
|
13
|
+
import logging
|
14
|
+
import pathlib
|
15
|
+
import os
|
16
|
+
|
17
|
+
# 3rd party packages
|
18
|
+
import pandas as pd
|
19
|
+
import yaml
|
20
|
+
|
21
|
+
# Project packages
|
22
|
+
import sierra.core.variables.batch_criteria as bc
|
23
|
+
from sierra.core import types, utils, stat_kernels, storage, batchroot, config
|
24
|
+
from sierra.core.pipeline.stage3 import gather
|
25
|
+
import sierra.core.plugin as pm
|
26
|
+
|
27
|
+
_logger = logging.getLogger(__name__)
|
28
|
+
|
29
|
+
|
30
|
+
class DataGatherer(gather.BaseGatherer):
|
31
|
+
"""Gather :term:`Raw Output Data` files from all runs.
|
32
|
+
|
33
|
+
The configured output directory for each run is searched recursively for
|
34
|
+
files to gather. To be eligible for gathering and later processing, files
|
35
|
+
must:
|
36
|
+
|
37
|
+
- Be non-empty
|
38
|
+
|
39
|
+
- Have a suffix which supported by the selected ``--storage`` plugin.
|
40
|
+
|
41
|
+
- Match an intra/inter experiment graph in ``graphs.yaml``.
|
42
|
+
"""
|
43
|
+
|
44
|
+
def __init__(
|
45
|
+
self,
|
46
|
+
main_config: types.YAMLDict,
|
47
|
+
gather_opts: types.SimpleDict,
|
48
|
+
processq: mp.Queue,
|
49
|
+
) -> None:
|
50
|
+
super().__init__(main_config, gather_opts, processq)
|
51
|
+
self.logger = logging.getLogger(__name__)
|
52
|
+
config_path = pathlib.Path(gather_opts["project_config_root"]) / pathlib.Path(
|
53
|
+
config.kYAML.graphs
|
54
|
+
)
|
55
|
+
if utils.path_exists(config_path):
|
56
|
+
_logger.debug("Filtering gathered data by graph generation targets")
|
57
|
+
self.config = yaml.load(utils.utf8open(config_path), yaml.FullLoader)
|
58
|
+
else:
|
59
|
+
_logger.debug(
|
60
|
+
"%s does not exist for project: not filtering gathered data",
|
61
|
+
config.kYAML.graphs,
|
62
|
+
)
|
63
|
+
|
64
|
+
def calc_gather_items(
|
65
|
+
self, run_output_root: pathlib.Path, exp_name: str
|
66
|
+
) -> tp.List[gather.GatherSpec]:
|
67
|
+
to_gather = []
|
68
|
+
proj_output_root = run_output_root / str(self.run_metrics_leaf)
|
69
|
+
plugin = pm.pipeline.get_plugin_module(self.gather_opts["storage"])
|
70
|
+
|
71
|
+
for item in proj_output_root.rglob("*"):
|
72
|
+
if (
|
73
|
+
item.is_dir()
|
74
|
+
or not any(s in plugin.suffixes() for s in item.suffixes)
|
75
|
+
or item.stat().st_size == 0
|
76
|
+
):
|
77
|
+
continue
|
78
|
+
|
79
|
+
filter_by_intra = "intra-exp" in self.config
|
80
|
+
filter_by_inter = "inter-exp" in self.config
|
81
|
+
|
82
|
+
filtered_intra = any(
|
83
|
+
g["src_stem"] in str(item.relative_to(proj_output_root))
|
84
|
+
for category in self.config["intra-exp"]
|
85
|
+
for g in self.config["intra-exp"][category]
|
86
|
+
)
|
87
|
+
|
88
|
+
filtered_inter = any(
|
89
|
+
g["src_stem"] in str(item.relative_to(proj_output_root))
|
90
|
+
for category in self.config["inter-exp"]
|
91
|
+
for g in self.config["inter-exp"][category]
|
92
|
+
)
|
93
|
+
|
94
|
+
# If both are present, we gather from it if there is a positive
|
95
|
+
# match in either graph type category.
|
96
|
+
if (
|
97
|
+
filter_by_intra
|
98
|
+
and filter_by_inter
|
99
|
+
and (filtered_intra or filtered_inter)
|
100
|
+
):
|
101
|
+
self.logger.trace(
|
102
|
+
"Gathering %s: match in %s [intra/inter]",
|
103
|
+
item.relative_to(proj_output_root),
|
104
|
+
config.kYAML.graphs,
|
105
|
+
)
|
106
|
+
to_gather.append(
|
107
|
+
gather.GatherSpec(
|
108
|
+
exp_name=exp_name,
|
109
|
+
item_stem_path=item.relative_to(proj_output_root),
|
110
|
+
collate_col=None,
|
111
|
+
)
|
112
|
+
)
|
113
|
+
continue
|
114
|
+
|
115
|
+
# If only intra-exp graphs are present, we gather from it if
|
116
|
+
# there is a positive match in that category.
|
117
|
+
if filter_by_intra and filtered_intra:
|
118
|
+
self.logger.trace(
|
119
|
+
"Gathering %s: match in %s [intra]",
|
120
|
+
item.relative_to(proj_output_root),
|
121
|
+
config.kYAML.graphs,
|
122
|
+
)
|
123
|
+
to_gather.append(
|
124
|
+
gather.GatherSpec(
|
125
|
+
exp_name=exp_name,
|
126
|
+
item_stem_path=item.relative_to(proj_output_root),
|
127
|
+
collate_col=None,
|
128
|
+
)
|
129
|
+
)
|
130
|
+
continue
|
131
|
+
|
132
|
+
# If only inter-exp graphs are are present, we gather from it if
|
133
|
+
# there is a positive match in that category.
|
134
|
+
if filter_by_inter and filtered_inter:
|
135
|
+
self.logger.trace(
|
136
|
+
"Gathering %s: match in %s [inter]",
|
137
|
+
item.relative_to(proj_output_root),
|
138
|
+
config.kYAML.graphs,
|
139
|
+
)
|
140
|
+
to_gather.append(
|
141
|
+
gather.GatherSpec(
|
142
|
+
exp_name=exp_name,
|
143
|
+
item_stem_path=item.relative_to(proj_output_root),
|
144
|
+
collate_col=None,
|
145
|
+
)
|
146
|
+
)
|
147
|
+
continue
|
148
|
+
|
149
|
+
return to_gather
|
150
|
+
|
151
|
+
|
152
|
+
def proc_batch_exp(
|
153
|
+
main_config: types.YAMLDict,
|
154
|
+
cmdopts: types.Cmdopts,
|
155
|
+
pathset: batchroot.PathSet,
|
156
|
+
criteria: bc.XVarBatchCriteria,
|
157
|
+
gatherer_type=DataGatherer,
|
158
|
+
) -> None:
|
159
|
+
"""Process :term:`Raw Output Data` files for each :term:`Experiment`.
|
160
|
+
|
161
|
+
Ideally this is done in parallel across experiments, but this can be changed
|
162
|
+
to serial if memory on the SIERRA host machine is limited via
|
163
|
+
``--processing-parallelism``.
|
164
|
+
|
165
|
+
It *IS* faster to do all the gathering at once and THEN do all the
|
166
|
+
processing, but that doesn't work for extremely large amounts of data
|
167
|
+
generated per :term:`Experimental Run`.
|
168
|
+
"""
|
169
|
+
exp_to_proc = utils.exp_range_calc(
|
170
|
+
cmdopts["exp_range"], pathset.output_root, criteria.gen_exp_names()
|
171
|
+
)
|
172
|
+
|
173
|
+
template_input_leaf = pathlib.Path(cmdopts["expdef_template"]).stem
|
174
|
+
|
175
|
+
stat_opts = {
|
176
|
+
"template_input_leaf": template_input_leaf,
|
177
|
+
"df_verify": cmdopts["df_verify"],
|
178
|
+
"dist_stats": cmdopts["dist_stats"],
|
179
|
+
"processing_mem_limit": cmdopts["processing_mem_limit"],
|
180
|
+
"storage": cmdopts["storage"],
|
181
|
+
"project_config_root": cmdopts["project_config_root"],
|
182
|
+
"df_homogenize": cmdopts["df_homogenize"],
|
183
|
+
}
|
184
|
+
|
185
|
+
pool_opts = {}
|
186
|
+
parallelism = cmdopts["processing_parallelism"]
|
187
|
+
|
188
|
+
# Aways need to have at least one of each! If SIERRA is invoked on a machine
|
189
|
+
# with 2 or less logical cores, the calculation with psutil.cpu_count() will
|
190
|
+
# return 0 for # gatherers.
|
191
|
+
pool_opts["n_gatherers"] = max(1, int(parallelism * 0.25))
|
192
|
+
pool_opts["n_processors"] = max(1, int(parallelism * 0.75))
|
193
|
+
|
194
|
+
with mp.Pool(
|
195
|
+
processes=pool_opts["n_gatherers"] + pool_opts["n_processors"]
|
196
|
+
) as pool:
|
197
|
+
_execute_for_batch(
|
198
|
+
main_config, pathset, exp_to_proc, stat_opts, pool_opts, gatherer_type, pool
|
199
|
+
)
|
200
|
+
|
201
|
+
pool.close()
|
202
|
+
pool.join()
|
203
|
+
|
204
|
+
|
205
|
+
def _execute_for_batch(
|
206
|
+
main_config: types.YAMLDict,
|
207
|
+
pathset: batchroot.PathSet,
|
208
|
+
exp_to_proc: tp.List[pathlib.Path],
|
209
|
+
stat_opts: types.SimpleDict,
|
210
|
+
pool_opts: types.SimpleDict,
|
211
|
+
gatherer_type,
|
212
|
+
pool,
|
213
|
+
) -> None:
|
214
|
+
"""
|
215
|
+
Perform statistics generation on the :term:`Batch Experiment`.
|
216
|
+
|
217
|
+
Gathers all :term:`Raw Output Data` files FIRST, and *then* does
|
218
|
+
processing. This is almost 50% faster than doing a true producer-consumer
|
219
|
+
queue, probably because there is much less traffic across processes and/or
|
220
|
+
better disk I/O performance.
|
221
|
+
"""
|
222
|
+
m = mp.Manager()
|
223
|
+
gatherq = m.Queue()
|
224
|
+
processq = m.Queue()
|
225
|
+
|
226
|
+
for exp in exp_to_proc:
|
227
|
+
gatherq.put(exp)
|
228
|
+
|
229
|
+
_logger.debug(
|
230
|
+
"Starting %d gatherers, method=%s",
|
231
|
+
pool_opts["n_gatherers"],
|
232
|
+
mp.get_start_method(),
|
233
|
+
)
|
234
|
+
|
235
|
+
gathered = [
|
236
|
+
pool.apply_async(
|
237
|
+
_gather_worker,
|
238
|
+
(gatherer_type, gatherq, processq, main_config, stat_opts),
|
239
|
+
)
|
240
|
+
for i in range(0, pool_opts["n_gatherers"])
|
241
|
+
]
|
242
|
+
|
243
|
+
_logger.debug(
|
244
|
+
"Starting %d processors, method=%s",
|
245
|
+
pool_opts["n_processors"],
|
246
|
+
mp.get_start_method(),
|
247
|
+
)
|
248
|
+
|
249
|
+
processed = [
|
250
|
+
pool.apply_async(_process_worker, (processq, main_config, pathset, stat_opts))
|
251
|
+
for i in range(0, pool_opts["n_processors"])
|
252
|
+
]
|
253
|
+
|
254
|
+
_logger.debug("Waiting for workers to finish")
|
255
|
+
|
256
|
+
# To capture the otherwise silent crashes when something goes wrong in
|
257
|
+
# worker threads. Any assertions will show and any exceptions will be
|
258
|
+
# re-raised.
|
259
|
+
for g in gathered:
|
260
|
+
g.get()
|
261
|
+
|
262
|
+
for p in processed:
|
263
|
+
p.get()
|
264
|
+
|
265
|
+
_logger.debug("All workers finished")
|
266
|
+
|
267
|
+
assert (
|
268
|
+
gatherq.empty()
|
269
|
+
), f"Finished processing but gather queue has {gatherq.qsize()} items?"
|
270
|
+
|
271
|
+
assert (
|
272
|
+
processq.empty()
|
273
|
+
), f"Finished processing but process queue has {processq.qsize()} items?"
|
274
|
+
|
275
|
+
|
276
|
+
def _gather_worker(
|
277
|
+
gatherer_type,
|
278
|
+
gatherq: mp.Queue,
|
279
|
+
processq: mp.Queue,
|
280
|
+
main_config: types.YAMLDict,
|
281
|
+
stat_opts: tp.Dict[str, str],
|
282
|
+
) -> None:
|
283
|
+
gatherer = gatherer_type(main_config, stat_opts, processq)
|
284
|
+
|
285
|
+
# Wait for 2 seconds after the queue is empty before bailing, at the
|
286
|
+
# start. If that is not long enough then exponentially increase from
|
287
|
+
# there until you find how long it takes to get the first item in the
|
288
|
+
# queue, and use that as the appropriate timeout (plus a little
|
289
|
+
# margin).
|
290
|
+
timeout = 3
|
291
|
+
got_item = False
|
292
|
+
n_tries = 0
|
293
|
+
while n_tries < config.kGatherWorkerRetries:
|
294
|
+
try:
|
295
|
+
exp_output_root = gatherq.get(True, timeout)
|
296
|
+
gatherer(exp_output_root)
|
297
|
+
gatherq.task_done()
|
298
|
+
got_item = True
|
299
|
+
|
300
|
+
except queue.Empty:
|
301
|
+
if got_item:
|
302
|
+
break
|
303
|
+
|
304
|
+
timeout *= 2
|
305
|
+
n_tries += 1
|
306
|
+
|
307
|
+
_logger.trace(f"Gather worker {os.getpid()} exit")
|
308
|
+
|
309
|
+
|
310
|
+
def _process_worker(
|
311
|
+
processq: mp.Queue,
|
312
|
+
main_config: types.YAMLDict,
|
313
|
+
pathset: batchroot.PathSet,
|
314
|
+
stat_opts: tp.Dict[str, str],
|
315
|
+
) -> None:
|
316
|
+
# Wait for 2 seconds after the queue is empty before bailing, at the
|
317
|
+
# start. If that is not long enough then exponentially increase from
|
318
|
+
# there until you find how long it takes to get the first item in the
|
319
|
+
# queue, and use that as the appropriate timeout (plus a little
|
320
|
+
# margin).
|
321
|
+
timeout = 3
|
322
|
+
got_item = False
|
323
|
+
n_tries = 0
|
324
|
+
while n_tries < config.kProcessWorkerRetries:
|
325
|
+
try:
|
326
|
+
spec = processq.get(True, timeout)
|
327
|
+
|
328
|
+
_proc_single_exp(main_config, stat_opts, pathset, spec)
|
329
|
+
processq.task_done()
|
330
|
+
got_item = True
|
331
|
+
|
332
|
+
except queue.Empty:
|
333
|
+
if got_item:
|
334
|
+
break
|
335
|
+
|
336
|
+
timeout *= 2
|
337
|
+
n_tries += 1
|
338
|
+
_logger.trace(f"Process worker {os.getpid()} exit")
|
339
|
+
|
340
|
+
|
341
|
+
def _proc_single_exp(
|
342
|
+
main_config: types.YAMLDict,
|
343
|
+
stat_opts: types.StrDict,
|
344
|
+
pathset: batchroot.PathSet,
|
345
|
+
spec: gather.ProcessSpec,
|
346
|
+
) -> None:
|
347
|
+
"""Generate statistics from output files for all runs within an experiment.
|
348
|
+
|
349
|
+
.. IMPORTANT:: You *CANNOT* use logging ANYWHERE during processing .csv
|
350
|
+
files. Why ? I *think* because of a bug in the logging module
|
351
|
+
it If you get unlucky enough to spawn the process which
|
352
|
+
enters the __call__() method in this class while another
|
353
|
+
logging statement is in progress (and is therefore holding an
|
354
|
+
internal logging module lock), then the underlying fork()
|
355
|
+
call will copy the lock in the acquired state. Then, when
|
356
|
+
this class goes to try to log something, it deadlocks with
|
357
|
+
it.
|
358
|
+
|
359
|
+
You also can't just create loggers with unique names, as this
|
360
|
+
seems to be something like the GIL, but for the logging
|
361
|
+
module. Sometimes python sucks.
|
362
|
+
"""
|
363
|
+
csv_concat = pd.concat(spec.dfs)
|
364
|
+
exp_stat_root = pathset.stat_root / spec.gather.exp_name
|
365
|
+
|
366
|
+
utils.dir_create_checked(exp_stat_root, exist_ok=True)
|
367
|
+
|
368
|
+
by_row_index = csv_concat.groupby(csv_concat.index)
|
369
|
+
|
370
|
+
dfs = {}
|
371
|
+
|
372
|
+
if stat_opts["dist_stats"] in ["none", "all"]:
|
373
|
+
dfs.update(stat_kernels.mean.from_groupby(by_row_index))
|
374
|
+
|
375
|
+
if stat_opts["dist_stats"] in ["conf95", "all"]:
|
376
|
+
dfs.update(stat_kernels.conf95.from_groupby(by_row_index))
|
377
|
+
|
378
|
+
if stat_opts["dist_stats"] in ["bw", "all"]:
|
379
|
+
dfs.update(stat_kernels.bw.from_groupby(by_row_index))
|
380
|
+
|
381
|
+
for ext, df in dfs.items():
|
382
|
+
opath = exp_stat_root / spec.gather.item_stem_path
|
383
|
+
utils.dir_create_checked(opath.parent, exist_ok=True)
|
384
|
+
opath = opath.with_suffix(ext)
|
385
|
+
|
386
|
+
df = utils.df_fill(df, stat_opts["df_homogenize"])
|
387
|
+
storage.df_write(df, opath, "storage.csv", index=False)
|
388
|
+
|
389
|
+
|
390
|
+
__all__ = ["proc_batch_exp"]
|
@@ -0,0 +1,18 @@
|
|
1
|
+
#
|
2
|
+
# Copyright 2024 John Harwell, All rights reserved.
|
3
|
+
#
|
4
|
+
# SPDX-License Identifier: MIT
|
5
|
+
#
|
6
|
+
"""
|
7
|
+
Container module for graph generation in stage 4.
|
8
|
+
"""
|
9
|
+
|
10
|
+
# Core packages
|
11
|
+
|
12
|
+
# 3rd party packages
|
13
|
+
|
14
|
+
# Project packages
|
15
|
+
|
16
|
+
|
17
|
+
def sierra_plugin_type() -> str:
|
18
|
+
return "pipeline"
|
@@ -0,0 +1,269 @@
|
|
1
|
+
#
|
2
|
+
# Copyright 2025 John Harwell, All rights reserved.
|
3
|
+
#
|
4
|
+
# SPDX-License Identifier: MIT
|
5
|
+
#
|
6
|
+
|
7
|
+
# Core packages
|
8
|
+
import typing as tp
|
9
|
+
import argparse
|
10
|
+
|
11
|
+
# 3rd party packages
|
12
|
+
|
13
|
+
# Project packages
|
14
|
+
from sierra.core import types
|
15
|
+
from sierra.plugins import PluginCmdline
|
16
|
+
|
17
|
+
|
18
|
+
def build(
|
19
|
+
parents: tp.List[argparse.ArgumentParser], stages: tp.List[int]
|
20
|
+
) -> PluginCmdline:
|
21
|
+
"""
|
22
|
+
Get a cmdline parser supporting the ``prod.graphs`` product plugin.
|
23
|
+
"""
|
24
|
+
cmdline = PluginCmdline(parents, stages)
|
25
|
+
_build_multistage(cmdline)
|
26
|
+
_build_stage4(cmdline)
|
27
|
+
return cmdline
|
28
|
+
|
29
|
+
|
30
|
+
def _build_multistage(cmdline: PluginCmdline) -> PluginCmdline:
|
31
|
+
# Plotting options
|
32
|
+
cmdline.multistage.add_argument(
|
33
|
+
"--plot-log-xscale",
|
34
|
+
help="""
|
35
|
+
Place the set of X values used to generate intra- and
|
36
|
+
inter-experiment graphs into the logarithmic space. Mainly useful
|
37
|
+
when the batch criteria involves large system sizes, so that the
|
38
|
+
plots are more readable.
|
39
|
+
"""
|
40
|
+
+ cmdline.graphs_applicable_doc(
|
41
|
+
[":py:func:`Summary Line <sierra.core.graphs.summary_line.generate>`"]
|
42
|
+
)
|
43
|
+
+ cmdline.stage_usage_doc([4, 5]),
|
44
|
+
action="store_true",
|
45
|
+
)
|
46
|
+
|
47
|
+
cmdline.multistage.add_argument(
|
48
|
+
"--plot-enumerated-xscale",
|
49
|
+
help="""
|
50
|
+
Instead of using the values generated by a given batch criteria for
|
51
|
+
the X values, use an enumerated list[0, ..., len(X value) - 1].
|
52
|
+
Mainly useful when the batch criteria involves large system sizes,
|
53
|
+
so that the plots are more readable.
|
54
|
+
"""
|
55
|
+
+ cmdline.graphs_applicable_doc(
|
56
|
+
[":py:func:`Summary Line <sierra.core.graphs.summary_line.generate>`"]
|
57
|
+
)
|
58
|
+
+ cmdline.stage_usage_doc([4, 5]),
|
59
|
+
action="store_true",
|
60
|
+
)
|
61
|
+
|
62
|
+
cmdline.multistage.add_argument(
|
63
|
+
"--plot-log-yscale",
|
64
|
+
help="""
|
65
|
+
Place the set of Y values used to generate intra - and
|
66
|
+
inter-experiment graphs into the logarithmic space. Mainly useful
|
67
|
+
when the batch criteria involves large system sizes, so that the
|
68
|
+
plots are more readable.
|
69
|
+
"""
|
70
|
+
+ cmdline.graphs_applicable_doc(
|
71
|
+
[
|
72
|
+
":py:func:`Summary Line <sierra.core.graphs.summary_line.generate>`",
|
73
|
+
":py:func:`Stacked Line <sierra.core.graphs.stacked_line.generate>`",
|
74
|
+
]
|
75
|
+
)
|
76
|
+
+ cmdline.stage_usage_doc([4, 5]),
|
77
|
+
action="store_true",
|
78
|
+
)
|
79
|
+
|
80
|
+
cmdline.multistage.add_argument(
|
81
|
+
"--plot-primary-axis",
|
82
|
+
type=int,
|
83
|
+
help="""
|
84
|
+
This option allows you to override the primary axis, which is
|
85
|
+
normally is computed based on the batch criteria.
|
86
|
+
|
87
|
+
For example, in a bivariate batch criteria composed of
|
88
|
+
|
89
|
+
- :ref:`plugins/engine/argos/bc/population-size` on the X axis
|
90
|
+
(rows)
|
91
|
+
|
92
|
+
- Another batch criteria which does not affect system size
|
93
|
+
(columns)
|
94
|
+
|
95
|
+
Metrics will be calculated by `computing` across .csv rows and
|
96
|
+
`projecting` down the columns by default, since system size will
|
97
|
+
only vary within a row. Passing a value of 1 to this option will
|
98
|
+
override this calculation, which can be useful in bivariate batch
|
99
|
+
criteria in which you are interested in the effect of the OTHER
|
100
|
+
non-size criteria on various performance measures.
|
101
|
+
|
102
|
+
0=criteria of interest varies across `rows`.
|
103
|
+
|
104
|
+
1=criteria of interest varies across `columns`.
|
105
|
+
|
106
|
+
This option only affects generating graphs from bivariate batch
|
107
|
+
criteria.
|
108
|
+
"""
|
109
|
+
+ cmdline.graphs_applicable_doc(
|
110
|
+
[
|
111
|
+
":py:func:`Heatmap <sierra.core.graphs.heatmap.generate>`",
|
112
|
+
":py:func:`Stacked Line <sierra.core.graphs.stacked_line.generate>`",
|
113
|
+
]
|
114
|
+
)
|
115
|
+
+ cmdline.stage_usage_doc([4, 5]),
|
116
|
+
default=None,
|
117
|
+
)
|
118
|
+
|
119
|
+
cmdline.multistage.add_argument(
|
120
|
+
"--plot-large-text",
|
121
|
+
help="""
|
122
|
+
This option specifies that the title, X/Y axis labels/tick labels
|
123
|
+
should be larger than the SIERRA default. This is useful when
|
124
|
+
generating graphs suitable for two column paper format where the
|
125
|
+
default text size for rendered graphs will be too small to see
|
126
|
+
easily. The SIERRA defaults are generally fine for the one
|
127
|
+
column/journal paper format.
|
128
|
+
"""
|
129
|
+
+ cmdline.stage_usage_doc([4, 5]),
|
130
|
+
action="store_true",
|
131
|
+
)
|
132
|
+
|
133
|
+
cmdline.multistage.add_argument(
|
134
|
+
"--plot-transpose-graphs",
|
135
|
+
help="""
|
136
|
+
Transpose the X, Y axes in generated graphs. Useful as a general
|
137
|
+
way to tweak graphs for best use of space within a paper.
|
138
|
+
|
139
|
+
.. versionchanged:: 1.2.20
|
140
|
+
|
141
|
+
Renamed from ``--transpose-graphs`` to make its relation to other
|
142
|
+
plotting options clearer.
|
143
|
+
"""
|
144
|
+
+ cmdline.graphs_applicable_doc(
|
145
|
+
[":py:func:`Heatmap <sierra.core.graphs.heatmap.generate>`"]
|
146
|
+
)
|
147
|
+
+ cmdline.stage_usage_doc([4, 5]),
|
148
|
+
action="store_true",
|
149
|
+
)
|
150
|
+
return cmdline
|
151
|
+
|
152
|
+
|
153
|
+
def _build_stage4(cmdline: PluginCmdline) -> PluginCmdline:
|
154
|
+
cmdline.stage4.add_argument(
|
155
|
+
"--graphs-backend",
|
156
|
+
choices=["matplotlib", "bokeh"],
|
157
|
+
help="""
|
158
|
+
Specify the default backend to be used when generating plots. Can
|
159
|
+
be overriden on a per-graph basis.
|
160
|
+
|
161
|
+
- ``matplotlib`` - Use matplotlib to generate static PNG
|
162
|
+
images.
|
163
|
+
|
164
|
+
- ``bokeh`` - Use bokeh to generate stand-alone HTML files
|
165
|
+
containing interactive bokeh visualizations. Files are
|
166
|
+
suitable for inclusion in static webpages, viewing in a
|
167
|
+
browser, etc.
|
168
|
+
|
169
|
+
See :ref:`plugins/prod/graphs` for more information.
|
170
|
+
""",
|
171
|
+
default="matplotlib",
|
172
|
+
)
|
173
|
+
cmdline.stage4.add_argument(
|
174
|
+
"--exp-n-datapoints-factor",
|
175
|
+
type=float,
|
176
|
+
help="""
|
177
|
+
Specify an additional multiplicative factor for computing the # of
|
178
|
+
datapoints captured duration an :term:`Experiment` to modify the
|
179
|
+
duration * ticks_per_sec default.
|
180
|
+
""",
|
181
|
+
default=1.0,
|
182
|
+
)
|
183
|
+
cmdline.stage4.add_argument(
|
184
|
+
"--exp-graphs",
|
185
|
+
choices=["intra", "inter", "all", "none"],
|
186
|
+
help="""
|
187
|
+
Specify which types of graphs should be generated from experimental
|
188
|
+
results:
|
189
|
+
|
190
|
+
- ``intra`` - Generate intra-experiment graphs from the results
|
191
|
+
of a single experiment within a batch, for each experiment in
|
192
|
+
the batch(this can take a long time with large batch
|
193
|
+
experiments). If any intra-experiment models are defined and
|
194
|
+
enabled, those are run and the results placed on appropriate
|
195
|
+
graphs.
|
196
|
+
|
197
|
+
- ``inter`` - Generate inter-experiment graphs _across_ the
|
198
|
+
results of all experiments in a batch. These are very fast
|
199
|
+
to generate, regardless of batch experiment size. If any
|
200
|
+
inter-experiment models are defined and enabled, those are
|
201
|
+
run and the results placed on appropriate graphs.
|
202
|
+
|
203
|
+
- ``all`` - Generate all types of graphs.
|
204
|
+
|
205
|
+
- ``none`` - Skip graph generation.
|
206
|
+
"""
|
207
|
+
+ cmdline.stage_usage_doc([4]),
|
208
|
+
default="all",
|
209
|
+
)
|
210
|
+
|
211
|
+
cmdline.stage4.add_argument(
|
212
|
+
"--project-no-LN",
|
213
|
+
help="""
|
214
|
+
Specify that the intra-experiment and inter-experiment linegraphs
|
215
|
+
defined in project YAML configuration should not be generated.
|
216
|
+
Useful if you are working on something which results in the
|
217
|
+
generation of other types of graphs, and the generation of those
|
218
|
+
linegraphs is not currently needed only slows down your development
|
219
|
+
cycle.
|
220
|
+
|
221
|
+
Model linegraphs are still generated, if applicable.
|
222
|
+
""",
|
223
|
+
action="store_true",
|
224
|
+
)
|
225
|
+
|
226
|
+
cmdline.stage4.add_argument(
|
227
|
+
"--project-no-HM",
|
228
|
+
help="""
|
229
|
+
Specify that the intra-experiment heatmaps defined in project YAML
|
230
|
+
configuration should not be generated. Useful if:
|
231
|
+
|
232
|
+
- You are working on something which results in the generation
|
233
|
+
of other types of graphs, and the generation of heatmaps only
|
234
|
+
slows down your development cycle.
|
235
|
+
|
236
|
+
- You are working on stage5 comparison graphs for bivariate
|
237
|
+
batch criteria, and re-generating many heatmaps during stage4
|
238
|
+
is taking too long.
|
239
|
+
|
240
|
+
Model heatmaps are still generated, if applicable.
|
241
|
+
|
242
|
+
.. versionadded:: 1.2.20
|
243
|
+
""",
|
244
|
+
action="store_true",
|
245
|
+
)
|
246
|
+
|
247
|
+
return cmdline
|
248
|
+
|
249
|
+
|
250
|
+
def to_cmdopts(args: argparse.Namespace) -> types.Cmdopts:
|
251
|
+
return {
|
252
|
+
# multistage
|
253
|
+
"plot_log_xscale": args.plot_log_xscale,
|
254
|
+
"plot_enumerated_xscale": args.plot_enumerated_xscale,
|
255
|
+
"plot_log_yscale": args.plot_log_yscale,
|
256
|
+
"plot_primary_axis": args.plot_primary_axis,
|
257
|
+
"plot_large_text": args.plot_large_text,
|
258
|
+
"plot_transpose_graphs": args.plot_transpose_graphs,
|
259
|
+
# stage 4
|
260
|
+
"graphs_backend": args.graphs_backend,
|
261
|
+
"exp_n_datapoints_factor": args.exp_n_datapoints_factor,
|
262
|
+
"exp_graphs": args.exp_graphs,
|
263
|
+
"project_no_LN": args.project_no_LN,
|
264
|
+
"project_no_HM": args.project_no_HM,
|
265
|
+
}
|
266
|
+
|
267
|
+
|
268
|
+
def sphinx_cmdline_multistage():
|
269
|
+
return build([], [3, 4, 5]).parser
|