sierra-research 1.3.11__py3-none-any.whl → 1.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sierra/__init__.py +3 -3
- sierra/core/__init__.py +3 -3
- sierra/core/batchroot.py +223 -0
- sierra/core/cmdline.py +681 -1057
- sierra/core/compare.py +11 -0
- sierra/core/config.py +96 -88
- sierra/core/engine.py +306 -0
- sierra/core/execenv.py +380 -0
- sierra/core/expdef.py +11 -0
- sierra/core/experiment/__init__.py +1 -0
- sierra/core/experiment/bindings.py +150 -101
- sierra/core/experiment/definition.py +414 -245
- sierra/core/experiment/spec.py +83 -85
- sierra/core/exproot.py +44 -0
- sierra/core/generators/__init__.py +10 -0
- sierra/core/generators/experiment.py +528 -0
- sierra/core/generators/generator_factory.py +138 -137
- sierra/core/graphs/__init__.py +23 -0
- sierra/core/graphs/bcbridge.py +94 -0
- sierra/core/graphs/heatmap.py +245 -324
- sierra/core/graphs/pathset.py +27 -0
- sierra/core/graphs/schema.py +77 -0
- sierra/core/graphs/stacked_line.py +341 -0
- sierra/core/graphs/summary_line.py +506 -0
- sierra/core/logging.py +3 -2
- sierra/core/models/__init__.py +3 -1
- sierra/core/models/info.py +19 -0
- sierra/core/models/interface.py +52 -122
- sierra/core/pipeline/__init__.py +2 -5
- sierra/core/pipeline/pipeline.py +228 -126
- sierra/core/pipeline/stage1/__init__.py +10 -0
- sierra/core/pipeline/stage1/pipeline_stage1.py +45 -31
- sierra/core/pipeline/stage2/__init__.py +10 -0
- sierra/core/pipeline/stage2/pipeline_stage2.py +8 -11
- sierra/core/pipeline/stage2/runner.py +401 -0
- sierra/core/pipeline/stage3/__init__.py +12 -0
- sierra/core/pipeline/stage3/gather.py +321 -0
- sierra/core/pipeline/stage3/pipeline_stage3.py +37 -84
- sierra/core/pipeline/stage4/__init__.py +12 -2
- sierra/core/pipeline/stage4/pipeline_stage4.py +36 -354
- sierra/core/pipeline/stage5/__init__.py +12 -0
- sierra/core/pipeline/stage5/pipeline_stage5.py +33 -208
- sierra/core/pipeline/yaml.py +48 -0
- sierra/core/plugin.py +529 -62
- sierra/core/proc.py +11 -0
- sierra/core/prod.py +11 -0
- sierra/core/ros1/__init__.py +5 -1
- sierra/core/ros1/callbacks.py +22 -21
- sierra/core/ros1/cmdline.py +59 -88
- sierra/core/ros1/generators.py +159 -175
- sierra/core/ros1/variables/__init__.py +3 -0
- sierra/core/ros1/variables/exp_setup.py +122 -116
- sierra/core/startup.py +106 -76
- sierra/core/stat_kernels.py +4 -5
- sierra/core/storage.py +13 -32
- sierra/core/trampoline.py +30 -0
- sierra/core/types.py +116 -71
- sierra/core/utils.py +103 -106
- sierra/core/variables/__init__.py +1 -1
- sierra/core/variables/base_variable.py +12 -17
- sierra/core/variables/batch_criteria.py +387 -481
- sierra/core/variables/builtin.py +135 -0
- sierra/core/variables/exp_setup.py +19 -39
- sierra/core/variables/population_size.py +72 -76
- sierra/core/variables/variable_density.py +44 -68
- sierra/core/vector.py +1 -1
- sierra/main.py +256 -88
- sierra/plugins/__init__.py +119 -0
- sierra/plugins/compare/__init__.py +14 -0
- sierra/plugins/compare/graphs/__init__.py +19 -0
- sierra/plugins/compare/graphs/cmdline.py +120 -0
- sierra/plugins/compare/graphs/comparator.py +291 -0
- sierra/plugins/compare/graphs/inter_controller.py +531 -0
- sierra/plugins/compare/graphs/inter_scenario.py +297 -0
- sierra/plugins/compare/graphs/namecalc.py +53 -0
- sierra/plugins/compare/graphs/outputroot.py +73 -0
- sierra/plugins/compare/graphs/plugin.py +147 -0
- sierra/plugins/compare/graphs/preprocess.py +172 -0
- sierra/plugins/compare/graphs/schema.py +37 -0
- sierra/plugins/engine/__init__.py +14 -0
- sierra/plugins/engine/argos/__init__.py +18 -0
- sierra/plugins/{platform → engine}/argos/cmdline.py +144 -151
- sierra/plugins/{platform/argos/variables → engine/argos/generators}/__init__.py +5 -0
- sierra/plugins/engine/argos/generators/engine.py +394 -0
- sierra/plugins/engine/argos/plugin.py +393 -0
- sierra/plugins/{platform/argos/generators → engine/argos/variables}/__init__.py +5 -0
- sierra/plugins/engine/argos/variables/arena_shape.py +183 -0
- sierra/plugins/engine/argos/variables/cameras.py +240 -0
- sierra/plugins/engine/argos/variables/constant_density.py +112 -0
- sierra/plugins/engine/argos/variables/exp_setup.py +82 -0
- sierra/plugins/{platform → engine}/argos/variables/physics_engines.py +83 -87
- sierra/plugins/engine/argos/variables/population_constant_density.py +178 -0
- sierra/plugins/engine/argos/variables/population_size.py +115 -0
- sierra/plugins/engine/argos/variables/population_variable_density.py +123 -0
- sierra/plugins/engine/argos/variables/rendering.py +108 -0
- sierra/plugins/engine/ros1gazebo/__init__.py +18 -0
- sierra/plugins/engine/ros1gazebo/cmdline.py +175 -0
- sierra/plugins/{platform/ros1robot → engine/ros1gazebo}/generators/__init__.py +5 -0
- sierra/plugins/engine/ros1gazebo/generators/engine.py +125 -0
- sierra/plugins/engine/ros1gazebo/plugin.py +404 -0
- sierra/plugins/engine/ros1gazebo/variables/__init__.py +15 -0
- sierra/plugins/engine/ros1gazebo/variables/population_size.py +214 -0
- sierra/plugins/engine/ros1robot/__init__.py +18 -0
- sierra/plugins/engine/ros1robot/cmdline.py +159 -0
- sierra/plugins/{platform/ros1gazebo → engine/ros1robot}/generators/__init__.py +4 -0
- sierra/plugins/engine/ros1robot/generators/engine.py +95 -0
- sierra/plugins/engine/ros1robot/plugin.py +410 -0
- sierra/plugins/{hpc/local → engine/ros1robot/variables}/__init__.py +5 -0
- sierra/plugins/engine/ros1robot/variables/population_size.py +146 -0
- sierra/plugins/execenv/__init__.py +11 -0
- sierra/plugins/execenv/hpc/__init__.py +18 -0
- sierra/plugins/execenv/hpc/adhoc/__init__.py +18 -0
- sierra/plugins/execenv/hpc/adhoc/cmdline.py +30 -0
- sierra/plugins/execenv/hpc/adhoc/plugin.py +131 -0
- sierra/plugins/execenv/hpc/cmdline.py +137 -0
- sierra/plugins/execenv/hpc/local/__init__.py +18 -0
- sierra/plugins/execenv/hpc/local/cmdline.py +31 -0
- sierra/plugins/execenv/hpc/local/plugin.py +145 -0
- sierra/plugins/execenv/hpc/pbs/__init__.py +18 -0
- sierra/plugins/execenv/hpc/pbs/cmdline.py +30 -0
- sierra/plugins/execenv/hpc/pbs/plugin.py +121 -0
- sierra/plugins/execenv/hpc/slurm/__init__.py +18 -0
- sierra/plugins/execenv/hpc/slurm/cmdline.py +30 -0
- sierra/plugins/execenv/hpc/slurm/plugin.py +133 -0
- sierra/plugins/execenv/prefectserver/__init__.py +18 -0
- sierra/plugins/execenv/prefectserver/cmdline.py +66 -0
- sierra/plugins/execenv/prefectserver/dockerremote/__init__.py +18 -0
- sierra/plugins/execenv/prefectserver/dockerremote/cmdline.py +66 -0
- sierra/plugins/execenv/prefectserver/dockerremote/plugin.py +132 -0
- sierra/plugins/execenv/prefectserver/flow.py +66 -0
- sierra/plugins/execenv/prefectserver/local/__init__.py +18 -0
- sierra/plugins/execenv/prefectserver/local/cmdline.py +29 -0
- sierra/plugins/execenv/prefectserver/local/plugin.py +133 -0
- sierra/plugins/{hpc/adhoc → execenv/robot}/__init__.py +1 -0
- sierra/plugins/execenv/robot/turtlebot3/__init__.py +18 -0
- sierra/plugins/execenv/robot/turtlebot3/plugin.py +204 -0
- sierra/plugins/expdef/__init__.py +14 -0
- sierra/plugins/expdef/json/__init__.py +14 -0
- sierra/plugins/expdef/json/plugin.py +504 -0
- sierra/plugins/expdef/xml/__init__.py +14 -0
- sierra/plugins/expdef/xml/plugin.py +386 -0
- sierra/{core/hpc → plugins/proc}/__init__.py +1 -1
- sierra/plugins/proc/collate/__init__.py +15 -0
- sierra/plugins/proc/collate/cmdline.py +47 -0
- sierra/plugins/proc/collate/plugin.py +271 -0
- sierra/plugins/proc/compress/__init__.py +18 -0
- sierra/plugins/proc/compress/cmdline.py +47 -0
- sierra/plugins/proc/compress/plugin.py +123 -0
- sierra/plugins/proc/decompress/__init__.py +18 -0
- sierra/plugins/proc/decompress/plugin.py +96 -0
- sierra/plugins/proc/imagize/__init__.py +15 -0
- sierra/plugins/proc/imagize/cmdline.py +49 -0
- sierra/plugins/proc/imagize/plugin.py +270 -0
- sierra/plugins/proc/modelrunner/__init__.py +16 -0
- sierra/plugins/proc/modelrunner/plugin.py +250 -0
- sierra/plugins/proc/statistics/__init__.py +15 -0
- sierra/plugins/proc/statistics/cmdline.py +64 -0
- sierra/plugins/proc/statistics/plugin.py +390 -0
- sierra/plugins/{hpc → prod}/__init__.py +1 -0
- sierra/plugins/prod/graphs/__init__.py +18 -0
- sierra/plugins/prod/graphs/cmdline.py +269 -0
- sierra/plugins/prod/graphs/collate.py +279 -0
- sierra/plugins/prod/graphs/inter/__init__.py +13 -0
- sierra/plugins/prod/graphs/inter/generate.py +83 -0
- sierra/plugins/prod/graphs/inter/heatmap.py +86 -0
- sierra/plugins/prod/graphs/inter/line.py +134 -0
- sierra/plugins/prod/graphs/intra/__init__.py +15 -0
- sierra/plugins/prod/graphs/intra/generate.py +202 -0
- sierra/plugins/prod/graphs/intra/heatmap.py +74 -0
- sierra/plugins/prod/graphs/intra/line.py +114 -0
- sierra/plugins/prod/graphs/plugin.py +103 -0
- sierra/plugins/prod/graphs/targets.py +63 -0
- sierra/plugins/prod/render/__init__.py +18 -0
- sierra/plugins/prod/render/cmdline.py +72 -0
- sierra/plugins/prod/render/plugin.py +282 -0
- sierra/plugins/storage/__init__.py +5 -0
- sierra/plugins/storage/arrow/__init__.py +18 -0
- sierra/plugins/storage/arrow/plugin.py +38 -0
- sierra/plugins/storage/csv/__init__.py +9 -0
- sierra/plugins/storage/csv/plugin.py +12 -5
- sierra/version.py +3 -2
- sierra_research-1.5.0.dist-info/METADATA +238 -0
- sierra_research-1.5.0.dist-info/RECORD +186 -0
- {sierra_research-1.3.11.dist-info → sierra_research-1.5.0.dist-info}/WHEEL +1 -2
- sierra/core/experiment/xml.py +0 -454
- sierra/core/generators/controller_generator_parser.py +0 -34
- sierra/core/generators/exp_creator.py +0 -351
- sierra/core/generators/exp_generators.py +0 -142
- sierra/core/graphs/scatterplot2D.py +0 -109
- sierra/core/graphs/stacked_line_graph.py +0 -251
- sierra/core/graphs/stacked_surface_graph.py +0 -220
- sierra/core/graphs/summary_line_graph.py +0 -371
- sierra/core/hpc/cmdline.py +0 -142
- sierra/core/models/graphs.py +0 -87
- sierra/core/pipeline/stage2/exp_runner.py +0 -286
- sierra/core/pipeline/stage3/imagizer.py +0 -149
- sierra/core/pipeline/stage3/run_collator.py +0 -317
- sierra/core/pipeline/stage3/statistics_calculator.py +0 -478
- sierra/core/pipeline/stage4/graph_collator.py +0 -320
- sierra/core/pipeline/stage4/inter_exp_graph_generator.py +0 -240
- sierra/core/pipeline/stage4/intra_exp_graph_generator.py +0 -317
- sierra/core/pipeline/stage4/model_runner.py +0 -168
- sierra/core/pipeline/stage4/rendering.py +0 -283
- sierra/core/pipeline/stage4/yaml_config_loader.py +0 -103
- sierra/core/pipeline/stage5/inter_scenario_comparator.py +0 -328
- sierra/core/pipeline/stage5/intra_scenario_comparator.py +0 -989
- sierra/core/platform.py +0 -493
- sierra/core/plugin_manager.py +0 -369
- sierra/core/root_dirpath_generator.py +0 -241
- sierra/plugins/hpc/adhoc/plugin.py +0 -125
- sierra/plugins/hpc/local/plugin.py +0 -81
- sierra/plugins/hpc/pbs/__init__.py +0 -9
- sierra/plugins/hpc/pbs/plugin.py +0 -126
- sierra/plugins/hpc/slurm/__init__.py +0 -9
- sierra/plugins/hpc/slurm/plugin.py +0 -130
- sierra/plugins/platform/__init__.py +0 -9
- sierra/plugins/platform/argos/__init__.py +0 -9
- sierra/plugins/platform/argos/generators/platform_generators.py +0 -383
- sierra/plugins/platform/argos/plugin.py +0 -337
- sierra/plugins/platform/argos/variables/arena_shape.py +0 -145
- sierra/plugins/platform/argos/variables/cameras.py +0 -243
- sierra/plugins/platform/argos/variables/constant_density.py +0 -136
- sierra/plugins/platform/argos/variables/exp_setup.py +0 -113
- sierra/plugins/platform/argos/variables/population_constant_density.py +0 -175
- sierra/plugins/platform/argos/variables/population_size.py +0 -102
- sierra/plugins/platform/argos/variables/population_variable_density.py +0 -132
- sierra/plugins/platform/argos/variables/rendering.py +0 -104
- sierra/plugins/platform/ros1gazebo/__init__.py +0 -9
- sierra/plugins/platform/ros1gazebo/cmdline.py +0 -213
- sierra/plugins/platform/ros1gazebo/generators/platform_generators.py +0 -137
- sierra/plugins/platform/ros1gazebo/plugin.py +0 -335
- sierra/plugins/platform/ros1gazebo/variables/__init__.py +0 -10
- sierra/plugins/platform/ros1gazebo/variables/population_size.py +0 -204
- sierra/plugins/platform/ros1robot/__init__.py +0 -9
- sierra/plugins/platform/ros1robot/cmdline.py +0 -175
- sierra/plugins/platform/ros1robot/generators/platform_generators.py +0 -112
- sierra/plugins/platform/ros1robot/plugin.py +0 -373
- sierra/plugins/platform/ros1robot/variables/__init__.py +0 -10
- sierra/plugins/platform/ros1robot/variables/population_size.py +0 -146
- sierra/plugins/robot/__init__.py +0 -9
- sierra/plugins/robot/turtlebot3/__init__.py +0 -9
- sierra/plugins/robot/turtlebot3/plugin.py +0 -194
- sierra_research-1.3.11.data/data/share/man/man1/sierra-cli.1 +0 -2349
- sierra_research-1.3.11.data/data/share/man/man7/sierra-examples.7 +0 -508
- sierra_research-1.3.11.data/data/share/man/man7/sierra-exec-envs.7 +0 -331
- sierra_research-1.3.11.data/data/share/man/man7/sierra-glossary.7 +0 -285
- sierra_research-1.3.11.data/data/share/man/man7/sierra-platforms.7 +0 -358
- sierra_research-1.3.11.data/data/share/man/man7/sierra-usage.7 +0 -729
- sierra_research-1.3.11.data/data/share/man/man7/sierra.7 +0 -78
- sierra_research-1.3.11.dist-info/METADATA +0 -492
- sierra_research-1.3.11.dist-info/RECORD +0 -133
- sierra_research-1.3.11.dist-info/top_level.txt +0 -1
- {sierra_research-1.3.11.dist-info → sierra_research-1.5.0.dist-info}/entry_points.txt +0 -0
- {sierra_research-1.3.11.dist-info → sierra_research-1.5.0.dist-info/licenses}/LICENSE +0 -0
@@ -0,0 +1,321 @@
|
|
1
|
+
# Copyright 2019 John Harwell, All rights reserved.
|
2
|
+
#
|
3
|
+
# SPDX-License-Identifier: MIT
|
4
|
+
|
5
|
+
"""
|
6
|
+
Classes for gathering :term:`Raw Output Data` files in a batch.
|
7
|
+
"""
|
8
|
+
|
9
|
+
# Core packages
|
10
|
+
import re
|
11
|
+
import multiprocessing as mp
|
12
|
+
import typing as tp
|
13
|
+
import time
|
14
|
+
import datetime
|
15
|
+
import logging
|
16
|
+
import pathlib
|
17
|
+
|
18
|
+
# 3rd party packages
|
19
|
+
import psutil
|
20
|
+
import pandas as pd # noqa
|
21
|
+
|
22
|
+
# Project packages
|
23
|
+
from sierra.core import types, utils, storage
|
24
|
+
|
25
|
+
|
26
|
+
class GatherSpec:
|
27
|
+
"""
|
28
|
+
Data class for specifying files to gather from an :term:`Experiment`.
|
29
|
+
|
30
|
+
Attributes:
|
31
|
+
item_stem_path: The name of the file to gather from all runs in an
|
32
|
+
experiment, relative to the output root for the run (to
|
33
|
+
support nested outputs).
|
34
|
+
|
35
|
+
exp_name: The name of the parent experiment.
|
36
|
+
|
37
|
+
|
38
|
+
collate-col: The name of the column associated with the file, as
|
39
|
+
configured. Will be None for statistics generation, and
|
40
|
+
non-None for collation.
|
41
|
+
"""
|
42
|
+
|
43
|
+
def __init__(
|
44
|
+
self,
|
45
|
+
exp_name: str,
|
46
|
+
item_stem_path: pathlib.Path,
|
47
|
+
collate_col: tp.Union[str, None],
|
48
|
+
):
|
49
|
+
self.exp_name = exp_name
|
50
|
+
self.item_stem_path = item_stem_path
|
51
|
+
self.collate_col = collate_col
|
52
|
+
|
53
|
+
def __repr__(self) -> str:
|
54
|
+
return f"{self.exp_name}: {self.item_stem_path}"
|
55
|
+
|
56
|
+
|
57
|
+
class ProcessSpec:
|
58
|
+
"""
|
59
|
+
Data class for specifying how to Process :term:`Raw Output Files`.
|
60
|
+
|
61
|
+
Attributes:
|
62
|
+
gather_spec: The specification for how the files were gathered.
|
63
|
+
|
64
|
+
exp_run_names: The names of the parent experimental runs.
|
65
|
+
|
66
|
+
dfs: The gathered dataframes. Indices match those in ``exp_run_names``.
|
67
|
+
|
68
|
+
"""
|
69
|
+
|
70
|
+
def __init__(self, gather: GatherSpec) -> None:
|
71
|
+
self.gather = gather
|
72
|
+
self.exp_run_names = [] # type: tp.List[str]
|
73
|
+
self.dfs = [] # type: tp.List[pd.DataFrame]
|
74
|
+
|
75
|
+
|
76
|
+
class BaseGatherer:
|
77
|
+
"""Gather a set of output files from all runs in an experiment.
|
78
|
+
|
79
|
+
"Gathering" in this context means creating a dictionary mapping which files
|
80
|
+
came from where, so that later processing can be both across and within
|
81
|
+
experiments in the batch.
|
82
|
+
"""
|
83
|
+
|
84
|
+
def __init__(
|
85
|
+
self,
|
86
|
+
main_config: types.YAMLDict,
|
87
|
+
gather_opts: types.SimpleDict,
|
88
|
+
processq: mp.Queue,
|
89
|
+
) -> None:
|
90
|
+
self.processq = processq
|
91
|
+
self.gather_opts = gather_opts
|
92
|
+
|
93
|
+
# Will get the main name and extension of the config file (without the
|
94
|
+
# full absolute path).
|
95
|
+
self.template_input_fname = self.gather_opts["template_input_leaf"]
|
96
|
+
self.main_config = main_config
|
97
|
+
self.run_metrics_leaf = main_config["sierra"]["run"]["run_metrics_leaf"]
|
98
|
+
|
99
|
+
self.logger = logging.getLogger(__name__)
|
100
|
+
|
101
|
+
def calc_gather_items(
|
102
|
+
self, run_output_root: pathlib.Path, exp_name: str
|
103
|
+
) -> tp.List[GatherSpec]:
|
104
|
+
raise NotImplementedError
|
105
|
+
|
106
|
+
def __call__(self, exp_output_root: pathlib.Path) -> None:
|
107
|
+
"""Process the output files found in the output save path."""
|
108
|
+
if self.gather_opts["df_verify"]:
|
109
|
+
self._verify_exp_outputs(exp_output_root)
|
110
|
+
|
111
|
+
self.logger.info(
|
112
|
+
"Gathering raw outputs from %s...",
|
113
|
+
exp_output_root.relative_to(exp_output_root.parent.parent),
|
114
|
+
)
|
115
|
+
|
116
|
+
pattern = "{}_run{}_output".format(
|
117
|
+
re.escape(str(self.gather_opts["template_input_leaf"])), r"\d+"
|
118
|
+
)
|
119
|
+
|
120
|
+
runs = list(exp_output_root.iterdir())
|
121
|
+
assert all(re.match(pattern, r.name) for r in runs), (
|
122
|
+
f"Extra files/not all dirs in '{exp_output_root}' are exp "
|
123
|
+
"run output dirs"
|
124
|
+
)
|
125
|
+
|
126
|
+
to_gather = []
|
127
|
+
for run in runs:
|
128
|
+
from_run = self.calc_gather_items(run, exp_output_root.name)
|
129
|
+
self.logger.trace(
|
130
|
+
"Calculated %s items from %s for gathering", len(from_run), run.name
|
131
|
+
)
|
132
|
+
to_gather.extend(from_run)
|
133
|
+
self.logger.trace("Gathering all items...")
|
134
|
+
|
135
|
+
for spec in to_gather:
|
136
|
+
self._wait_for_memory()
|
137
|
+
to_process = self._gather_item_from_runs(exp_output_root, spec, runs)
|
138
|
+
n_gathered_from = len(to_process.dfs)
|
139
|
+
if n_gathered_from != len(runs):
|
140
|
+
self.logger.warning(
|
141
|
+
(
|
142
|
+
"Data not gathered for %s from all experimental runs "
|
143
|
+
"in %s: %s runs != %s (--n-runs)"
|
144
|
+
),
|
145
|
+
spec.item_stem_path,
|
146
|
+
exp_output_root.relative_to(exp_output_root.parent.parent),
|
147
|
+
n_gathered_from,
|
148
|
+
len(runs),
|
149
|
+
)
|
150
|
+
|
151
|
+
# Put gathered files in the process queue
|
152
|
+
self.processq.put(to_process)
|
153
|
+
|
154
|
+
self.logger.debug(
|
155
|
+
"Enqueued %s items from %s for processing",
|
156
|
+
len(to_gather),
|
157
|
+
exp_output_root.name,
|
158
|
+
)
|
159
|
+
|
160
|
+
def _gather_item_from_runs(
|
161
|
+
self,
|
162
|
+
exp_output_root: pathlib.Path,
|
163
|
+
spec: GatherSpec,
|
164
|
+
runs: tp.List[pathlib.Path],
|
165
|
+
) -> ProcessSpec:
|
166
|
+
to_process = ProcessSpec(gather=spec)
|
167
|
+
|
168
|
+
for _, run in enumerate(runs):
|
169
|
+
path = run / self.run_metrics_leaf / spec.item_stem_path
|
170
|
+
if path.exists() and path.stat().st_size > 0:
|
171
|
+
df = storage.df_read(
|
172
|
+
path,
|
173
|
+
self.gather_opts["storage"],
|
174
|
+
run_output_root=run,
|
175
|
+
index_col=False,
|
176
|
+
)
|
177
|
+
if nonumeric := df.select_dtypes(exclude="number").columns.tolist():
|
178
|
+
self.logger.warning(
|
179
|
+
"Non-numeric columns are not supported: dropping %s from %s",
|
180
|
+
nonumeric,
|
181
|
+
path.relative_to(exp_output_root),
|
182
|
+
)
|
183
|
+
df = df.drop(columns=nonumeric)
|
184
|
+
|
185
|
+
# Indices here must match so that the appropriate data from each run
|
186
|
+
# are matched with the name of the run in collated performance data.
|
187
|
+
to_process.exp_run_names.append(run.name)
|
188
|
+
to_process.dfs.append(df)
|
189
|
+
|
190
|
+
return to_process
|
191
|
+
|
192
|
+
def _wait_for_memory(self) -> None:
|
193
|
+
while True:
|
194
|
+
mem = psutil.virtual_memory()
|
195
|
+
avail = mem.available / mem.total
|
196
|
+
free_percent = avail * 100
|
197
|
+
free_limit = 100 - self.gather_opts["processing_mem_limit"]
|
198
|
+
|
199
|
+
if free_percent >= free_limit:
|
200
|
+
return
|
201
|
+
|
202
|
+
self.logger.info(
|
203
|
+
"Waiting for memory: avail=%s,min=%s", free_percent, free_limit
|
204
|
+
)
|
205
|
+
time.sleep(1)
|
206
|
+
|
207
|
+
def _verify_exp_outputs(self, exp_output_root: pathlib.Path) -> None:
|
208
|
+
"""
|
209
|
+
Verify the integrity of all runs in an experiment.
|
210
|
+
|
211
|
+
Specifically:
|
212
|
+
|
213
|
+
- All runs produced all CSV files.
|
214
|
+
|
215
|
+
- All runs CSV files with the same name have the same # rows and
|
216
|
+
columns.
|
217
|
+
|
218
|
+
- No CSV files contain NaNs.
|
219
|
+
"""
|
220
|
+
experiments = exp_output_root.iterdir()
|
221
|
+
|
222
|
+
self.logger.info("Verifying results in %s...", exp_output_root.name)
|
223
|
+
|
224
|
+
start = time.time()
|
225
|
+
|
226
|
+
for exp1 in experiments:
|
227
|
+
csv_root1 = exp1 / str(self.run_metrics_leaf)
|
228
|
+
|
229
|
+
for exp2 in experiments:
|
230
|
+
csv_root2 = exp2 / self.run_metrics_leaf
|
231
|
+
|
232
|
+
if not csv_root2.is_dir():
|
233
|
+
continue
|
234
|
+
|
235
|
+
self._verify_exp_outputs_pairwise(exp_output_root, csv_root1, csv_root2)
|
236
|
+
|
237
|
+
elapsed = int(time.time() - start)
|
238
|
+
sec = datetime.timedelta(seconds=elapsed)
|
239
|
+
self.logger.info(
|
240
|
+
"Done verifying results in <batch_output_root>/%s: %s",
|
241
|
+
exp_output_root.name,
|
242
|
+
sec,
|
243
|
+
)
|
244
|
+
|
245
|
+
def _verify_exp_outputs_pairwise(
|
246
|
+
self,
|
247
|
+
exp_output_root: pathlib.Path,
|
248
|
+
ofile_root1: pathlib.Path,
|
249
|
+
ofile_root2: pathlib.Path,
|
250
|
+
) -> None:
|
251
|
+
for ofile in ofile_root1.rglob("*"):
|
252
|
+
path1 = ofile
|
253
|
+
path2 = ofile_root2 / ofile.name
|
254
|
+
|
255
|
+
# If either path is a directory, that directory MIGHT container
|
256
|
+
# imagizing data. We use the following heuristic:
|
257
|
+
#
|
258
|
+
# If the directory only contains files AND all the files have the
|
259
|
+
# same extension AND all the files contain the directory name, we
|
260
|
+
# conclude that the directory contains imagizing data and skip it.
|
261
|
+
#
|
262
|
+
# Otherwise, check it, as projects/engines can output their data in
|
263
|
+
# a directory tree, and we want to verify that.
|
264
|
+
if (
|
265
|
+
path1.is_dir()
|
266
|
+
and path2.is_dir()
|
267
|
+
and all(f.is_file() and path1.name in f.name for f in path1.iterdir())
|
268
|
+
and all(f.is_file() and path2.name in f.name for f in path2.iterdir())
|
269
|
+
):
|
270
|
+
self.logger.debug(
|
271
|
+
(
|
272
|
+
"Not verifying {<exp_output_root>/%s,<exp_output_root>/%s} pairwise: "
|
273
|
+
"contains data for imagizing"
|
274
|
+
),
|
275
|
+
path1.relative_to(exp_output_root),
|
276
|
+
path2.relative_to(exp_output_root),
|
277
|
+
)
|
278
|
+
continue
|
279
|
+
|
280
|
+
if path1.is_dir() or path2.is_dir():
|
281
|
+
continue
|
282
|
+
|
283
|
+
if path1.parent.name in path1.name or path2.parent.name in path2.name:
|
284
|
+
self.logger.trace(
|
285
|
+
(
|
286
|
+
"Not verifying {<exp_output_root>/%s,<exp_output_root>/%s} pairwise: "
|
287
|
+
"imagizing data"
|
288
|
+
),
|
289
|
+
path1.relative_to(exp_output_root),
|
290
|
+
path2.relative_to(exp_output_root),
|
291
|
+
)
|
292
|
+
continue
|
293
|
+
|
294
|
+
assert utils.path_exists(path1) and utils.path_exists(
|
295
|
+
path2
|
296
|
+
), f"Either {path1} or {path2} does not exist"
|
297
|
+
|
298
|
+
# Verify both dataframes have same # columns, and that
|
299
|
+
# column sets are identical
|
300
|
+
df1 = storage.df_read(path1, self.gather_opts["storage"])
|
301
|
+
df2 = storage.df_read(path2, self.gather_opts["storage"])
|
302
|
+
|
303
|
+
assert len(df1.columns) == len(
|
304
|
+
df2.columns
|
305
|
+
), f"Dataframes from {path1} and {path2} do not have the same # columns"
|
306
|
+
assert sorted(df1.columns) == sorted(
|
307
|
+
df2.columns
|
308
|
+
), f"Columns from {path1} and {path2} not identical"
|
309
|
+
|
310
|
+
# Verify the length of all columns in both dataframes is the same
|
311
|
+
for c1 in df1.columns:
|
312
|
+
assert all(
|
313
|
+
len(df1[c1]) == len(df1[c2]) for c2 in df1.columns
|
314
|
+
), f"Not all columns from {path1} have same length"
|
315
|
+
|
316
|
+
assert all(
|
317
|
+
len(df1[c1]) == len(df2[c2]) for c2 in df1.columns
|
318
|
+
), f"Not all columns from {path1} and {path2} have the same length"
|
319
|
+
|
320
|
+
|
321
|
+
__all__ = ["GatherSpec", "BaseGatherer"]
|
@@ -2,25 +2,19 @@
|
|
2
2
|
#
|
3
3
|
# SPDX-License-Identifier: MIT
|
4
4
|
|
5
|
-
"""Stage 3 of the experimental pipeline: processing experimental results.
|
6
|
-
|
7
|
-
"""
|
5
|
+
"""Stage 3 of the experimental pipeline: processing experimental results."""
|
8
6
|
|
9
7
|
# Core packages
|
10
8
|
import time
|
11
9
|
import datetime
|
12
10
|
import logging
|
13
|
-
import pathlib
|
14
11
|
|
15
12
|
# 3rd party packages
|
16
|
-
import yaml
|
17
13
|
|
18
14
|
# Project packages
|
19
|
-
from sierra.core.pipeline.stage3.statistics_calculator import BatchExpParallelCalculator
|
20
|
-
from sierra.core.pipeline.stage3.run_collator import ExperimentalRunParallelCollator
|
21
|
-
from sierra.core.pipeline.stage3.imagizer import BatchExpParallelImagizer
|
22
15
|
import sierra.core.variables.batch_criteria as bc
|
23
|
-
from sierra.core import types,
|
16
|
+
from sierra.core import types, batchroot
|
17
|
+
import sierra.core.plugin as pm
|
24
18
|
|
25
19
|
|
26
20
|
class PipelineStage3:
|
@@ -28,91 +22,50 @@ class PipelineStage3:
|
|
28
22
|
|
29
23
|
Currently this includes:
|
30
24
|
|
31
|
-
|
32
|
-
|
33
|
-
|
25
|
+
- Generating statistics from results for generating per-experiment
|
26
|
+
graphs during stage 4. This can generate :term:`Processed Output
|
27
|
+
Data` files, among other statistics.
|
34
28
|
|
35
|
-
|
36
|
-
|
29
|
+
- Collating results across experiments for generating inter-experiment
|
30
|
+
graphs during stage 4.
|
37
31
|
|
38
|
-
|
39
|
-
|
32
|
+
- Generating image files from project metric collection for later use in
|
33
|
+
video rendering in stage 4.
|
40
34
|
|
41
35
|
This stage is idempotent.
|
42
|
-
|
43
36
|
"""
|
44
37
|
|
45
|
-
def __init__(
|
38
|
+
def __init__(
|
39
|
+
self,
|
40
|
+
main_config: types.YAMLDict,
|
41
|
+
cmdopts: types.Cmdopts,
|
42
|
+
pathset: batchroot.PathSet,
|
43
|
+
) -> None:
|
46
44
|
self.logger = logging.getLogger(__name__)
|
47
45
|
self.main_config = main_config
|
48
46
|
self.cmdopts = cmdopts
|
47
|
+
self.pathset = pathset
|
48
|
+
|
49
|
+
def run(self, criteria: bc.XVarBatchCriteria) -> None:
|
50
|
+
spec = self.cmdopts["proc"]
|
51
|
+
self.logger.info(
|
52
|
+
"Processing data with %s processing plugins: %s", len(spec), spec
|
53
|
+
)
|
54
|
+
for s in spec:
|
55
|
+
module = pm.pipeline.get_plugin_module(s)
|
56
|
+
self.logger.info(
|
57
|
+
"Running %s in <batchroot>/%s",
|
58
|
+
s,
|
59
|
+
self.pathset.output_root.relative_to(self.pathset.root),
|
60
|
+
)
|
49
61
|
|
50
|
-
def run(self, criteria: bc.IConcreteBatchCriteria) -> None:
|
51
|
-
self._run_statistics(self.main_config, self.cmdopts, criteria)
|
52
|
-
self._run_run_collation(self.main_config, self.cmdopts, criteria)
|
53
|
-
|
54
|
-
if self.cmdopts['project_imagizing']:
|
55
|
-
intra_HM_path = pathlib.Path(self.cmdopts['project_config_root']) \
|
56
|
-
/ pathlib.Path('intra-graphs-hm.yaml')
|
57
|
-
|
58
|
-
if utils.path_exists(intra_HM_path):
|
59
|
-
self.logger.info(("Loading intra-experiment heatmap config for "
|
60
|
-
"project '%s'"),
|
61
|
-
self.cmdopts['project'])
|
62
|
-
intra_HM_config = yaml.load(utils.utf8open(intra_HM_path),
|
63
|
-
yaml.FullLoader)
|
64
|
-
self._run_imagizing(self.main_config,
|
65
|
-
intra_HM_config,
|
66
|
-
self.cmdopts,
|
67
|
-
criteria)
|
68
|
-
|
69
|
-
else:
|
70
|
-
self.logger.warning("%s does not exist--cannot imagize",
|
71
|
-
intra_HM_path)
|
72
|
-
|
73
|
-
# Private functions
|
74
|
-
|
75
|
-
def _run_statistics(self,
|
76
|
-
main_config: dict,
|
77
|
-
cmdopts: types.Cmdopts, criteria:
|
78
|
-
bc.IConcreteBatchCriteria):
|
79
|
-
self.logger.info("Generating statistics from experiment outputs in %s...",
|
80
|
-
cmdopts['batch_output_root'])
|
81
|
-
start = time.time()
|
82
|
-
BatchExpParallelCalculator(main_config, cmdopts)(criteria)
|
83
|
-
elapsed = int(time.time() - start)
|
84
|
-
sec = datetime.timedelta(seconds=elapsed)
|
85
|
-
self.logger.info("Statistics generation complete in %s", str(sec))
|
86
|
-
|
87
|
-
def _run_run_collation(self,
|
88
|
-
main_config: dict,
|
89
|
-
cmdopts: types.Cmdopts, criteria:
|
90
|
-
bc.IConcreteBatchCriteria):
|
91
|
-
if not self.cmdopts['skip_collate']:
|
92
|
-
self.logger.info("Collating experiment run outputs into %s...",
|
93
|
-
cmdopts['batch_stat_collate_root'])
|
94
62
|
start = time.time()
|
95
|
-
|
63
|
+
module.proc_batch_exp(
|
64
|
+
self.main_config, self.cmdopts, self.pathset, criteria
|
65
|
+
)
|
96
66
|
elapsed = int(time.time() - start)
|
97
67
|
sec = datetime.timedelta(seconds=elapsed)
|
98
|
-
self.logger.info(
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
main_config: dict,
|
103
|
-
intra_HM_config: dict,
|
104
|
-
cmdopts: types.Cmdopts,
|
105
|
-
criteria: bc.IConcreteBatchCriteria):
|
106
|
-
self.logger.info("Imagizing .csvs in %s...",
|
107
|
-
cmdopts['batch_output_root'])
|
108
|
-
start = time.time()
|
109
|
-
BatchExpParallelImagizer(main_config, cmdopts)(
|
110
|
-
intra_HM_config, criteria)
|
111
|
-
elapsed = int(time.time() - start)
|
112
|
-
sec = datetime.timedelta(seconds=elapsed)
|
113
|
-
self.logger.info("Imagizing complete: %s", str(sec))
|
114
|
-
|
115
|
-
|
116
|
-
__api__ = [
|
117
|
-
'PipelineStage3'
|
118
|
-
]
|
68
|
+
self.logger.info("Processing with %s complete in %s", s, str(sec))
|
69
|
+
|
70
|
+
|
71
|
+
__all__ = ["PipelineStage3"]
|
@@ -1,2 +1,12 @@
|
|
1
|
-
|
2
|
-
|
1
|
+
#
|
2
|
+
# Copyright 2025 John Harwell, All rights reserved.
|
3
|
+
#
|
4
|
+
# SPDX-License Identifier: MIT
|
5
|
+
#
|
6
|
+
"""Container module for stage 4 of the pipeline."""
|
7
|
+
|
8
|
+
# Core packages
|
9
|
+
|
10
|
+
# 3rd party packages
|
11
|
+
|
12
|
+
# Project packages
|