sierra-research 1.3.6__py3-none-any.whl → 1.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sierra/__init__.py +3 -3
- sierra/core/__init__.py +3 -3
- sierra/core/batchroot.py +223 -0
- sierra/core/cmdline.py +681 -1057
- sierra/core/compare.py +11 -0
- sierra/core/config.py +96 -88
- sierra/core/engine.py +306 -0
- sierra/core/execenv.py +380 -0
- sierra/core/expdef.py +11 -0
- sierra/core/experiment/__init__.py +1 -0
- sierra/core/experiment/bindings.py +150 -101
- sierra/core/experiment/definition.py +414 -245
- sierra/core/experiment/spec.py +83 -85
- sierra/core/exproot.py +44 -0
- sierra/core/generators/__init__.py +10 -0
- sierra/core/generators/experiment.py +528 -0
- sierra/core/generators/generator_factory.py +138 -137
- sierra/core/graphs/__init__.py +23 -0
- sierra/core/graphs/bcbridge.py +94 -0
- sierra/core/graphs/heatmap.py +245 -324
- sierra/core/graphs/pathset.py +27 -0
- sierra/core/graphs/schema.py +77 -0
- sierra/core/graphs/stacked_line.py +341 -0
- sierra/core/graphs/summary_line.py +506 -0
- sierra/core/logging.py +3 -2
- sierra/core/models/__init__.py +3 -1
- sierra/core/models/info.py +19 -0
- sierra/core/models/interface.py +52 -122
- sierra/core/pipeline/__init__.py +2 -5
- sierra/core/pipeline/pipeline.py +228 -126
- sierra/core/pipeline/stage1/__init__.py +10 -0
- sierra/core/pipeline/stage1/pipeline_stage1.py +45 -31
- sierra/core/pipeline/stage2/__init__.py +10 -0
- sierra/core/pipeline/stage2/pipeline_stage2.py +8 -11
- sierra/core/pipeline/stage2/runner.py +401 -0
- sierra/core/pipeline/stage3/__init__.py +12 -0
- sierra/core/pipeline/stage3/gather.py +321 -0
- sierra/core/pipeline/stage3/pipeline_stage3.py +37 -84
- sierra/core/pipeline/stage4/__init__.py +12 -2
- sierra/core/pipeline/stage4/pipeline_stage4.py +36 -354
- sierra/core/pipeline/stage5/__init__.py +12 -0
- sierra/core/pipeline/stage5/pipeline_stage5.py +33 -208
- sierra/core/pipeline/yaml.py +48 -0
- sierra/core/plugin.py +529 -62
- sierra/core/proc.py +11 -0
- sierra/core/prod.py +11 -0
- sierra/core/ros1/__init__.py +5 -1
- sierra/core/ros1/callbacks.py +22 -21
- sierra/core/ros1/cmdline.py +59 -88
- sierra/core/ros1/generators.py +159 -175
- sierra/core/ros1/variables/__init__.py +3 -0
- sierra/core/ros1/variables/exp_setup.py +122 -116
- sierra/core/startup.py +106 -76
- sierra/core/stat_kernels.py +4 -5
- sierra/core/storage.py +13 -32
- sierra/core/trampoline.py +30 -0
- sierra/core/types.py +116 -71
- sierra/core/utils.py +103 -106
- sierra/core/variables/__init__.py +1 -1
- sierra/core/variables/base_variable.py +12 -17
- sierra/core/variables/batch_criteria.py +387 -481
- sierra/core/variables/builtin.py +135 -0
- sierra/core/variables/exp_setup.py +19 -39
- sierra/core/variables/population_size.py +72 -76
- sierra/core/variables/variable_density.py +44 -68
- sierra/core/vector.py +1 -1
- sierra/main.py +256 -88
- sierra/plugins/__init__.py +119 -0
- sierra/plugins/compare/__init__.py +14 -0
- sierra/plugins/compare/graphs/__init__.py +19 -0
- sierra/plugins/compare/graphs/cmdline.py +120 -0
- sierra/plugins/compare/graphs/comparator.py +291 -0
- sierra/plugins/compare/graphs/inter_controller.py +531 -0
- sierra/plugins/compare/graphs/inter_scenario.py +297 -0
- sierra/plugins/compare/graphs/namecalc.py +53 -0
- sierra/plugins/compare/graphs/outputroot.py +73 -0
- sierra/plugins/compare/graphs/plugin.py +147 -0
- sierra/plugins/compare/graphs/preprocess.py +172 -0
- sierra/plugins/compare/graphs/schema.py +37 -0
- sierra/plugins/engine/__init__.py +14 -0
- sierra/plugins/engine/argos/__init__.py +18 -0
- sierra/plugins/{platform → engine}/argos/cmdline.py +144 -151
- sierra/plugins/{platform/argos/variables → engine/argos/generators}/__init__.py +5 -0
- sierra/plugins/engine/argos/generators/engine.py +394 -0
- sierra/plugins/engine/argos/plugin.py +393 -0
- sierra/plugins/{platform/argos/generators → engine/argos/variables}/__init__.py +5 -0
- sierra/plugins/engine/argos/variables/arena_shape.py +183 -0
- sierra/plugins/engine/argos/variables/cameras.py +240 -0
- sierra/plugins/engine/argos/variables/constant_density.py +112 -0
- sierra/plugins/engine/argos/variables/exp_setup.py +82 -0
- sierra/plugins/{platform → engine}/argos/variables/physics_engines.py +83 -87
- sierra/plugins/engine/argos/variables/population_constant_density.py +178 -0
- sierra/plugins/engine/argos/variables/population_size.py +115 -0
- sierra/plugins/engine/argos/variables/population_variable_density.py +123 -0
- sierra/plugins/engine/argos/variables/rendering.py +108 -0
- sierra/plugins/engine/ros1gazebo/__init__.py +18 -0
- sierra/plugins/engine/ros1gazebo/cmdline.py +175 -0
- sierra/plugins/{platform/ros1robot → engine/ros1gazebo}/generators/__init__.py +5 -0
- sierra/plugins/engine/ros1gazebo/generators/engine.py +125 -0
- sierra/plugins/engine/ros1gazebo/plugin.py +404 -0
- sierra/plugins/engine/ros1gazebo/variables/__init__.py +15 -0
- sierra/plugins/engine/ros1gazebo/variables/population_size.py +214 -0
- sierra/plugins/engine/ros1robot/__init__.py +18 -0
- sierra/plugins/engine/ros1robot/cmdline.py +159 -0
- sierra/plugins/{platform/ros1gazebo → engine/ros1robot}/generators/__init__.py +4 -0
- sierra/plugins/engine/ros1robot/generators/engine.py +95 -0
- sierra/plugins/engine/ros1robot/plugin.py +410 -0
- sierra/plugins/{hpc/local → engine/ros1robot/variables}/__init__.py +5 -0
- sierra/plugins/engine/ros1robot/variables/population_size.py +146 -0
- sierra/plugins/execenv/__init__.py +11 -0
- sierra/plugins/execenv/hpc/__init__.py +18 -0
- sierra/plugins/execenv/hpc/adhoc/__init__.py +18 -0
- sierra/plugins/execenv/hpc/adhoc/cmdline.py +30 -0
- sierra/plugins/execenv/hpc/adhoc/plugin.py +131 -0
- sierra/plugins/execenv/hpc/cmdline.py +137 -0
- sierra/plugins/execenv/hpc/local/__init__.py +18 -0
- sierra/plugins/execenv/hpc/local/cmdline.py +31 -0
- sierra/plugins/execenv/hpc/local/plugin.py +145 -0
- sierra/plugins/execenv/hpc/pbs/__init__.py +18 -0
- sierra/plugins/execenv/hpc/pbs/cmdline.py +30 -0
- sierra/plugins/execenv/hpc/pbs/plugin.py +121 -0
- sierra/plugins/execenv/hpc/slurm/__init__.py +18 -0
- sierra/plugins/execenv/hpc/slurm/cmdline.py +30 -0
- sierra/plugins/execenv/hpc/slurm/plugin.py +133 -0
- sierra/plugins/execenv/prefectserver/__init__.py +18 -0
- sierra/plugins/execenv/prefectserver/cmdline.py +66 -0
- sierra/plugins/execenv/prefectserver/dockerremote/__init__.py +18 -0
- sierra/plugins/execenv/prefectserver/dockerremote/cmdline.py +66 -0
- sierra/plugins/execenv/prefectserver/dockerremote/plugin.py +132 -0
- sierra/plugins/execenv/prefectserver/flow.py +66 -0
- sierra/plugins/execenv/prefectserver/local/__init__.py +18 -0
- sierra/plugins/execenv/prefectserver/local/cmdline.py +29 -0
- sierra/plugins/execenv/prefectserver/local/plugin.py +133 -0
- sierra/plugins/{hpc/adhoc → execenv/robot}/__init__.py +1 -0
- sierra/plugins/execenv/robot/turtlebot3/__init__.py +18 -0
- sierra/plugins/execenv/robot/turtlebot3/plugin.py +204 -0
- sierra/plugins/expdef/__init__.py +14 -0
- sierra/plugins/expdef/json/__init__.py +14 -0
- sierra/plugins/expdef/json/plugin.py +504 -0
- sierra/plugins/expdef/xml/__init__.py +14 -0
- sierra/plugins/expdef/xml/plugin.py +386 -0
- sierra/{core/hpc → plugins/proc}/__init__.py +1 -1
- sierra/plugins/proc/collate/__init__.py +15 -0
- sierra/plugins/proc/collate/cmdline.py +47 -0
- sierra/plugins/proc/collate/plugin.py +271 -0
- sierra/plugins/proc/compress/__init__.py +18 -0
- sierra/plugins/proc/compress/cmdline.py +47 -0
- sierra/plugins/proc/compress/plugin.py +123 -0
- sierra/plugins/proc/decompress/__init__.py +18 -0
- sierra/plugins/proc/decompress/plugin.py +96 -0
- sierra/plugins/proc/imagize/__init__.py +15 -0
- sierra/plugins/proc/imagize/cmdline.py +49 -0
- sierra/plugins/proc/imagize/plugin.py +270 -0
- sierra/plugins/proc/modelrunner/__init__.py +16 -0
- sierra/plugins/proc/modelrunner/plugin.py +250 -0
- sierra/plugins/proc/statistics/__init__.py +15 -0
- sierra/plugins/proc/statistics/cmdline.py +64 -0
- sierra/plugins/proc/statistics/plugin.py +390 -0
- sierra/plugins/{hpc → prod}/__init__.py +1 -0
- sierra/plugins/prod/graphs/__init__.py +18 -0
- sierra/plugins/prod/graphs/cmdline.py +269 -0
- sierra/plugins/prod/graphs/collate.py +279 -0
- sierra/plugins/prod/graphs/inter/__init__.py +13 -0
- sierra/plugins/prod/graphs/inter/generate.py +83 -0
- sierra/plugins/prod/graphs/inter/heatmap.py +86 -0
- sierra/plugins/prod/graphs/inter/line.py +134 -0
- sierra/plugins/prod/graphs/intra/__init__.py +15 -0
- sierra/plugins/prod/graphs/intra/generate.py +202 -0
- sierra/plugins/prod/graphs/intra/heatmap.py +74 -0
- sierra/plugins/prod/graphs/intra/line.py +114 -0
- sierra/plugins/prod/graphs/plugin.py +103 -0
- sierra/plugins/prod/graphs/targets.py +63 -0
- sierra/plugins/prod/render/__init__.py +18 -0
- sierra/plugins/prod/render/cmdline.py +72 -0
- sierra/plugins/prod/render/plugin.py +282 -0
- sierra/plugins/storage/__init__.py +5 -0
- sierra/plugins/storage/arrow/__init__.py +18 -0
- sierra/plugins/storage/arrow/plugin.py +38 -0
- sierra/plugins/storage/csv/__init__.py +9 -0
- sierra/plugins/storage/csv/plugin.py +12 -5
- sierra/version.py +3 -2
- sierra_research-1.5.0.dist-info/METADATA +238 -0
- sierra_research-1.5.0.dist-info/RECORD +186 -0
- {sierra_research-1.3.6.dist-info → sierra_research-1.5.0.dist-info}/WHEEL +1 -2
- sierra/core/experiment/xml.py +0 -454
- sierra/core/generators/controller_generator_parser.py +0 -34
- sierra/core/generators/exp_creator.py +0 -351
- sierra/core/generators/exp_generators.py +0 -142
- sierra/core/graphs/scatterplot2D.py +0 -109
- sierra/core/graphs/stacked_line_graph.py +0 -249
- sierra/core/graphs/stacked_surface_graph.py +0 -220
- sierra/core/graphs/summary_line_graph.py +0 -369
- sierra/core/hpc/cmdline.py +0 -142
- sierra/core/models/graphs.py +0 -87
- sierra/core/pipeline/stage2/exp_runner.py +0 -286
- sierra/core/pipeline/stage3/imagizer.py +0 -149
- sierra/core/pipeline/stage3/run_collator.py +0 -317
- sierra/core/pipeline/stage3/statistics_calculator.py +0 -478
- sierra/core/pipeline/stage4/graph_collator.py +0 -319
- sierra/core/pipeline/stage4/inter_exp_graph_generator.py +0 -240
- sierra/core/pipeline/stage4/intra_exp_graph_generator.py +0 -317
- sierra/core/pipeline/stage4/model_runner.py +0 -168
- sierra/core/pipeline/stage4/rendering.py +0 -283
- sierra/core/pipeline/stage4/yaml_config_loader.py +0 -103
- sierra/core/pipeline/stage5/inter_scenario_comparator.py +0 -328
- sierra/core/pipeline/stage5/intra_scenario_comparator.py +0 -989
- sierra/core/platform.py +0 -493
- sierra/core/plugin_manager.py +0 -369
- sierra/core/root_dirpath_generator.py +0 -241
- sierra/plugins/hpc/adhoc/plugin.py +0 -125
- sierra/plugins/hpc/local/plugin.py +0 -81
- sierra/plugins/hpc/pbs/__init__.py +0 -9
- sierra/plugins/hpc/pbs/plugin.py +0 -126
- sierra/plugins/hpc/slurm/__init__.py +0 -9
- sierra/plugins/hpc/slurm/plugin.py +0 -130
- sierra/plugins/platform/__init__.py +0 -9
- sierra/plugins/platform/argos/__init__.py +0 -9
- sierra/plugins/platform/argos/generators/platform_generators.py +0 -383
- sierra/plugins/platform/argos/plugin.py +0 -337
- sierra/plugins/platform/argos/variables/arena_shape.py +0 -145
- sierra/plugins/platform/argos/variables/cameras.py +0 -243
- sierra/plugins/platform/argos/variables/constant_density.py +0 -136
- sierra/plugins/platform/argos/variables/exp_setup.py +0 -113
- sierra/plugins/platform/argos/variables/population_constant_density.py +0 -175
- sierra/plugins/platform/argos/variables/population_size.py +0 -102
- sierra/plugins/platform/argos/variables/population_variable_density.py +0 -132
- sierra/plugins/platform/argos/variables/rendering.py +0 -104
- sierra/plugins/platform/ros1gazebo/__init__.py +0 -9
- sierra/plugins/platform/ros1gazebo/cmdline.py +0 -213
- sierra/plugins/platform/ros1gazebo/generators/platform_generators.py +0 -137
- sierra/plugins/platform/ros1gazebo/plugin.py +0 -335
- sierra/plugins/platform/ros1gazebo/variables/__init__.py +0 -10
- sierra/plugins/platform/ros1gazebo/variables/population_size.py +0 -204
- sierra/plugins/platform/ros1robot/__init__.py +0 -9
- sierra/plugins/platform/ros1robot/cmdline.py +0 -175
- sierra/plugins/platform/ros1robot/generators/platform_generators.py +0 -112
- sierra/plugins/platform/ros1robot/plugin.py +0 -373
- sierra/plugins/platform/ros1robot/variables/__init__.py +0 -10
- sierra/plugins/platform/ros1robot/variables/population_size.py +0 -146
- sierra/plugins/robot/__init__.py +0 -9
- sierra/plugins/robot/turtlebot3/__init__.py +0 -9
- sierra/plugins/robot/turtlebot3/plugin.py +0 -194
- sierra_research-1.3.6.data/data/share/man/man1/sierra-cli.1 +0 -2349
- sierra_research-1.3.6.data/data/share/man/man7/sierra-examples.7 +0 -488
- sierra_research-1.3.6.data/data/share/man/man7/sierra-exec-envs.7 +0 -331
- sierra_research-1.3.6.data/data/share/man/man7/sierra-glossary.7 +0 -285
- sierra_research-1.3.6.data/data/share/man/man7/sierra-platforms.7 +0 -358
- sierra_research-1.3.6.data/data/share/man/man7/sierra-usage.7 +0 -725
- sierra_research-1.3.6.data/data/share/man/man7/sierra.7 +0 -78
- sierra_research-1.3.6.dist-info/METADATA +0 -500
- sierra_research-1.3.6.dist-info/RECORD +0 -133
- sierra_research-1.3.6.dist-info/top_level.txt +0 -1
- {sierra_research-1.3.6.dist-info → sierra_research-1.5.0.dist-info}/entry_points.txt +0 -0
- {sierra_research-1.3.6.dist-info → sierra_research-1.5.0.dist-info/licenses}/LICENSE +0 -0
@@ -0,0 +1,271 @@
|
|
1
|
+
# Copyright 2019 John Harwell, All rights reserved.
|
2
|
+
#
|
3
|
+
# SPDX-License-Identifier: MIT
|
4
|
+
|
5
|
+
"""
|
6
|
+
Classes for collating data within a :term:`Batch Experiment`.
|
7
|
+
|
8
|
+
Collation is the process of "lifting" data from :term:`Experimental Runs
|
9
|
+
<Experimental Run>` across all :term:`Experiment` for all experiments in a
|
10
|
+
:term:`Batch Experiment` into a single file (a reduce operation). This is
|
11
|
+
needed to correctly calculate summary statistics for performance measures in
|
12
|
+
stage 3: you can't just run the calculated stddev through the calculations
|
13
|
+
because comparing curves of stddev is not meaningful.
|
14
|
+
"""
|
15
|
+
|
16
|
+
# Core packages
|
17
|
+
import multiprocessing as mp
|
18
|
+
import typing as tp
|
19
|
+
import queue
|
20
|
+
import logging
|
21
|
+
import pathlib
|
22
|
+
|
23
|
+
# 3rd party packages
|
24
|
+
import pandas as pd
|
25
|
+
import yaml
|
26
|
+
|
27
|
+
# Project packages
|
28
|
+
import sierra.core.variables.batch_criteria as bc
|
29
|
+
import sierra.core.plugin as pm
|
30
|
+
from sierra.core import types, storage, utils, config, batchroot
|
31
|
+
from sierra.core.pipeline.stage3 import gather
|
32
|
+
|
33
|
+
_logger = logging.getLogger(__name__)
|
34
|
+
|
35
|
+
|
36
|
+
def proc_batch_exp(
|
37
|
+
main_config: dict,
|
38
|
+
cmdopts: types.Cmdopts,
|
39
|
+
pathset: batchroot.PathSet,
|
40
|
+
criteria: bc.XVarBatchCriteria,
|
41
|
+
) -> None:
|
42
|
+
"""Generate :term:`Collated Output Data` files for each experiment.
|
43
|
+
|
44
|
+
:term:`Collated Output Data` files generated from :term:`Raw Output Data`
|
45
|
+
files across :term:`Experimental Runs <Experimental Run>`. Gathered in
|
46
|
+
parallel for each experiment for speed, unless disabled with
|
47
|
+
``--processing-parallelism``.
|
48
|
+
"""
|
49
|
+
pool_opts = {}
|
50
|
+
|
51
|
+
pool_opts["parallelism"] = cmdopts["processing_parallelism"]
|
52
|
+
|
53
|
+
worker_opts = {
|
54
|
+
"project": cmdopts["project"],
|
55
|
+
"template_input_leaf": pathlib.Path(cmdopts["expdef_template"]).stem,
|
56
|
+
"df_verify": cmdopts["df_verify"],
|
57
|
+
"processing_mem_limit": cmdopts["processing_mem_limit"],
|
58
|
+
"storage": cmdopts["storage"],
|
59
|
+
"df_homogenize": cmdopts["df_homogenize"],
|
60
|
+
"project_config_root": cmdopts["project_config_root"],
|
61
|
+
}
|
62
|
+
|
63
|
+
exp_to_proc = utils.exp_range_calc(
|
64
|
+
cmdopts["exp_range"], pathset.output_root, criteria.gen_exp_names()
|
65
|
+
)
|
66
|
+
|
67
|
+
with mp.Pool(processes=pool_opts["parallelism"]) as pool:
|
68
|
+
_execute_for_batch(
|
69
|
+
main_config, pathset, exp_to_proc, worker_opts, pool_opts, pool
|
70
|
+
)
|
71
|
+
|
72
|
+
|
73
|
+
def _execute_for_batch(
|
74
|
+
main_config: types.YAMLDict,
|
75
|
+
pathset: batchroot.PathSet,
|
76
|
+
exp_to_proc: tp.List[pathlib.Path],
|
77
|
+
worker_opts: types.SimpleDict,
|
78
|
+
pool_opts: types.SimpleDict,
|
79
|
+
pool,
|
80
|
+
) -> None:
|
81
|
+
m = mp.Manager()
|
82
|
+
gatherq = m.Queue()
|
83
|
+
processq = m.Queue()
|
84
|
+
|
85
|
+
for exp in exp_to_proc:
|
86
|
+
gatherq.put(exp)
|
87
|
+
|
88
|
+
_logger.debug(
|
89
|
+
"Starting %d gatherers, method=%s",
|
90
|
+
pool_opts["parallelism"],
|
91
|
+
mp.get_start_method(),
|
92
|
+
)
|
93
|
+
|
94
|
+
gathered = [
|
95
|
+
pool.apply_async(_gather_worker, (gatherq, processq, main_config, worker_opts))
|
96
|
+
for _ in range(0, pool_opts["parallelism"])
|
97
|
+
]
|
98
|
+
_logger.debug("Waiting for gathering to finish")
|
99
|
+
for g in gathered:
|
100
|
+
g.get()
|
101
|
+
|
102
|
+
_logger.debug(
|
103
|
+
"Starting %d processors, method=%s",
|
104
|
+
pool_opts["parallelism"],
|
105
|
+
mp.get_start_method(),
|
106
|
+
)
|
107
|
+
processed = [
|
108
|
+
pool.apply_async(
|
109
|
+
_process_worker,
|
110
|
+
(processq, main_config, pathset.stat_interexp_root, worker_opts),
|
111
|
+
)
|
112
|
+
for _ in range(0, pool_opts["parallelism"])
|
113
|
+
]
|
114
|
+
|
115
|
+
# To capture the otherwise silent crashes when something goes wrong in
|
116
|
+
# worker threads. Any assertions will show and any exceptions will be
|
117
|
+
# re-raised.
|
118
|
+
for p in processed:
|
119
|
+
p.get()
|
120
|
+
|
121
|
+
pool.close()
|
122
|
+
pool.join()
|
123
|
+
_logger.debug("Processing finished")
|
124
|
+
|
125
|
+
|
126
|
+
def _gather_worker(
|
127
|
+
gatherq: mp.Queue,
|
128
|
+
processq: mp.Queue,
|
129
|
+
main_config: types.YAMLDict,
|
130
|
+
gather_opts: types.SimpleDict,
|
131
|
+
) -> None:
|
132
|
+
gatherer = ExpDataGatherer(main_config, gather_opts, processq)
|
133
|
+
while True:
|
134
|
+
# Wait for 3 seconds after the queue is empty before bailing
|
135
|
+
try:
|
136
|
+
exp_output_root = gatherq.get(True, 3)
|
137
|
+
gatherer(exp_output_root)
|
138
|
+
gatherq.task_done()
|
139
|
+
|
140
|
+
except queue.Empty:
|
141
|
+
break
|
142
|
+
|
143
|
+
|
144
|
+
def _process_worker(
|
145
|
+
processq: mp.Queue,
|
146
|
+
main_config: types.YAMLDict,
|
147
|
+
batch_stat_interexp_root: pathlib.Path,
|
148
|
+
process_opts: types.SimpleDict,
|
149
|
+
) -> None:
|
150
|
+
while True:
|
151
|
+
# Wait for 3 seconds after the queue is empty before bailing
|
152
|
+
try:
|
153
|
+
spec = processq.get(True, 3)
|
154
|
+
_proc_single_exp(main_config, batch_stat_interexp_root, process_opts, spec)
|
155
|
+
processq.task_done()
|
156
|
+
except queue.Empty:
|
157
|
+
break
|
158
|
+
|
159
|
+
|
160
|
+
class ExpDataGatherer(gather.BaseGatherer):
|
161
|
+
"""Gather :term:`Raw Output Data` files across all runs for :term:`Data Collation`.
|
162
|
+
|
163
|
+
The configured output directory for each run is searched recursively for
|
164
|
+
files to gather. To be eligible for gathering and later processing, files
|
165
|
+
must:
|
166
|
+
|
167
|
+
- Be non-empty
|
168
|
+
|
169
|
+
- Have a suffix which supported by the selected ``--storage`` plugin.
|
170
|
+
|
171
|
+
- Have a name (last part of absolute path, including extension) which
|
172
|
+
matches a configured :term:`Product` in a YAML file. E.g., a graph
|
173
|
+
from the :ref:`plugins/prod/graphs` plugin
|
174
|
+
"""
|
175
|
+
|
176
|
+
def __init__(self, *args, **kwargs) -> None:
|
177
|
+
super().__init__(*args, **kwargs)
|
178
|
+
self.logger = logging.getLogger(__name__)
|
179
|
+
|
180
|
+
def calc_gather_items(
|
181
|
+
self, run_output_root: pathlib.Path, exp_name: str
|
182
|
+
) -> tp.List[gather.GatherSpec]:
|
183
|
+
to_gather = []
|
184
|
+
proj_output_root = run_output_root / str(self.run_metrics_leaf)
|
185
|
+
plugin = pm.pipeline.get_plugin_module(self.gather_opts["storage"])
|
186
|
+
|
187
|
+
config_path = pathlib.Path(
|
188
|
+
self.gather_opts["project_config_root"], config.kYAML.collate
|
189
|
+
)
|
190
|
+
|
191
|
+
try:
|
192
|
+
collate_config = yaml.load(utils.utf8open(config_path), yaml.FullLoader)
|
193
|
+
|
194
|
+
except FileNotFoundError:
|
195
|
+
self.logger.warning("%s does not exist!", config_path)
|
196
|
+
collate_config = {}
|
197
|
+
|
198
|
+
for item in proj_output_root.rglob("*"):
|
199
|
+
# Must be a file (duh)
|
200
|
+
if not item.is_file():
|
201
|
+
continue
|
202
|
+
|
203
|
+
# Has to be a supported suffix for storage plugin
|
204
|
+
if (
|
205
|
+
not any(s in plugin.suffixes() for s in item.suffixes)
|
206
|
+
or item.stat().st_size == 0
|
207
|
+
):
|
208
|
+
continue
|
209
|
+
|
210
|
+
# Any number of perf metrics can be configured, so look for a match.
|
211
|
+
files = collate_config["intra-exp"]
|
212
|
+
perf_confs = [f for f in files if f["file"] in item.name]
|
213
|
+
if not perf_confs:
|
214
|
+
continue
|
215
|
+
|
216
|
+
# If we get a file match, then all the columns from that file should
|
217
|
+
# be added to the set of things to collate.
|
218
|
+
for conf in perf_confs:
|
219
|
+
for col in conf["cols"]:
|
220
|
+
to_gather.append(
|
221
|
+
gather.GatherSpec(
|
222
|
+
exp_name=exp_name,
|
223
|
+
item_stem_path=item.relative_to(proj_output_root),
|
224
|
+
collate_col=col,
|
225
|
+
)
|
226
|
+
)
|
227
|
+
return to_gather
|
228
|
+
|
229
|
+
|
230
|
+
def _proc_single_exp(
|
231
|
+
main_config: types.YAMLDict,
|
232
|
+
batch_stat_collate_root: pathlib.Path,
|
233
|
+
process_opts: types.SimpleDict,
|
234
|
+
spec: gather.ProcessSpec,
|
235
|
+
) -> None:
|
236
|
+
"""Collate :term:`Raw Output Data` files together (reduce operation).
|
237
|
+
|
238
|
+
:term:`Raw Output Data` files gathered from N :term:`Experimental Runs
|
239
|
+
<Experimental Run>` are combined together into a single :term:`Batch Summary
|
240
|
+
Data` file per :term:`Experiment` with 1 column per run.
|
241
|
+
"""
|
242
|
+
utils.dir_create_checked(batch_stat_collate_root, exist_ok=True)
|
243
|
+
|
244
|
+
collated = {}
|
245
|
+
|
246
|
+
key = (spec.gather.item_stem_path, spec.gather.collate_col)
|
247
|
+
collated[key] = pd.DataFrame(index=spec.dfs[0].index, columns=spec.exp_run_names)
|
248
|
+
for i, df in enumerate(spec.dfs):
|
249
|
+
assert (
|
250
|
+
spec.gather.collate_col in df.columns
|
251
|
+
), f"{spec.gather.collate_col} not in {df.columns}"
|
252
|
+
|
253
|
+
collate_df = df[spec.gather.collate_col]
|
254
|
+
collated[key][spec.exp_run_names[i]] = collate_df
|
255
|
+
|
256
|
+
for file_path, col in collated:
|
257
|
+
df = utils.df_fill(collated[(file_path, col)], process_opts["df_homogenize"])
|
258
|
+
parent = batch_stat_collate_root / spec.gather.exp_name / file_path.parent
|
259
|
+
utils.dir_create_checked(parent, exist_ok=True)
|
260
|
+
|
261
|
+
# This preserves the directory structure of stuff in the per-run output
|
262
|
+
# run; if something is in a subdir there, it will show up in a subdir in
|
263
|
+
# the collated outputs too.
|
264
|
+
fname = f"{file_path.stem}-{col}" + config.kStorageExt["csv"]
|
265
|
+
storage.df_write(df, parent / fname, "storage.csv", index=False)
|
266
|
+
|
267
|
+
|
268
|
+
__all__ = [
|
269
|
+
"proc_batch_exp",
|
270
|
+
"ExpDataGatherer",
|
271
|
+
]
|
@@ -0,0 +1,18 @@
|
|
1
|
+
# Copyright 2021 John Harwell, All rights reserved.
|
2
|
+
#
|
3
|
+
# SPDX-License-Identifier: MIT
|
4
|
+
"""
|
5
|
+
Container module for the processing plugin to compress data.
|
6
|
+
|
7
|
+
See :ref:`plugins/proc/compress`.
|
8
|
+
"""
|
9
|
+
|
10
|
+
# Core packages
|
11
|
+
|
12
|
+
# 3rd party packages
|
13
|
+
|
14
|
+
# Project packages
|
15
|
+
|
16
|
+
|
17
|
+
def sierra_plugin_type() -> str:
|
18
|
+
return "pipeline"
|
@@ -0,0 +1,47 @@
|
|
1
|
+
#
|
2
|
+
# Copyright 2025 John Harwell, All rights reserved.
|
3
|
+
#
|
4
|
+
# SPDX-License Identifier: MIT
|
5
|
+
#
|
6
|
+
|
7
|
+
# Core packages
|
8
|
+
import typing as tp
|
9
|
+
import argparse
|
10
|
+
|
11
|
+
# 3rd party packages
|
12
|
+
|
13
|
+
# Project packages
|
14
|
+
from sierra.core import types
|
15
|
+
from sierra.plugins import PluginCmdline
|
16
|
+
|
17
|
+
|
18
|
+
def build(
|
19
|
+
parents: tp.List[argparse.ArgumentParser], stages: tp.List[int]
|
20
|
+
) -> PluginCmdline:
|
21
|
+
"""
|
22
|
+
Get a cmdline parser supporting the ``proc.compress`` processing plugin.
|
23
|
+
"""
|
24
|
+
cmdline = PluginCmdline(parents, stages)
|
25
|
+
cmdline.stage3.add_argument(
|
26
|
+
"--compress-remove-after",
|
27
|
+
action="store_true",
|
28
|
+
help="""
|
29
|
+
If the ``proc.compress`` plugin is run, remove the uncompressed
|
30
|
+
:term:`Raw Output Data` files after compression. This can save
|
31
|
+
TONS of disk space. No data is lost because everything output
|
32
|
+
by each :term:`Experimental Run` is in the compressed archive.
|
33
|
+
"""
|
34
|
+
+ cmdline.stage_usage_doc([3]),
|
35
|
+
default=False,
|
36
|
+
)
|
37
|
+
return cmdline
|
38
|
+
|
39
|
+
|
40
|
+
def to_cmdopts(args: argparse.Namespace) -> types.Cmdopts:
|
41
|
+
return {
|
42
|
+
"compress_remove_after": args.compress_remove_after,
|
43
|
+
}
|
44
|
+
|
45
|
+
|
46
|
+
def sphinx_cmdline_stage3():
|
47
|
+
return build([], [3]).parser
|
@@ -0,0 +1,123 @@
|
|
1
|
+
# Copyright 2025 John Harwell, All rights reserved.
|
2
|
+
#
|
3
|
+
# SPDX-License-Identifier: MIT
|
4
|
+
"""
|
5
|
+
Plugin for compressing experiment data. Currently only works with .tar.gz files.
|
6
|
+
"""
|
7
|
+
|
8
|
+
# Core packages
|
9
|
+
import multiprocessing as mp
|
10
|
+
import typing as tp
|
11
|
+
import logging
|
12
|
+
import pathlib
|
13
|
+
import shutil
|
14
|
+
|
15
|
+
# 3rd party packages
|
16
|
+
import tarfile
|
17
|
+
|
18
|
+
# Project packages
|
19
|
+
import sierra.core.variables.batch_criteria as bc
|
20
|
+
from sierra.core import types, utils, batchroot
|
21
|
+
|
22
|
+
_logger = logging.getLogger(__name__)
|
23
|
+
|
24
|
+
|
25
|
+
def proc_batch_exp(
|
26
|
+
main_config: types.YAMLDict,
|
27
|
+
cmdopts: types.Cmdopts,
|
28
|
+
pathset: batchroot.PathSet,
|
29
|
+
criteria: bc.XVarBatchCriteria,
|
30
|
+
) -> None:
|
31
|
+
"""
|
32
|
+
Comcompress data for each :term:`Experiment` in the :term:`Batch Experiment`.
|
33
|
+
|
34
|
+
Ideally this is done in parallel across experiments, but this can be changed
|
35
|
+
to serial if memory on the SIERRA host machine is limited via
|
36
|
+
``--processing-parallelism``.
|
37
|
+
"""
|
38
|
+
|
39
|
+
exp_to_proc = utils.exp_range_calc(
|
40
|
+
cmdopts["exp_range"], pathset.output_root, criteria.gen_exp_names()
|
41
|
+
)
|
42
|
+
|
43
|
+
parallelism = cmdopts["processing_parallelism"]
|
44
|
+
|
45
|
+
tasks = []
|
46
|
+
run_metrics_leaf = main_config["sierra"]["run"]["run_metrics_leaf"]
|
47
|
+
|
48
|
+
for exp in exp_to_proc:
|
49
|
+
tasks.extend(
|
50
|
+
_build_tasklist_for_exp(
|
51
|
+
pathset.output_root / exp.name,
|
52
|
+
run_metrics_leaf,
|
53
|
+
cmdopts["compress_remove_after"],
|
54
|
+
)
|
55
|
+
)
|
56
|
+
|
57
|
+
_logger.debug("Starting %s workers, method=%s", parallelism, mp.get_start_method())
|
58
|
+
with mp.Pool(processes=parallelism, maxtasksperchild=1) as pool:
|
59
|
+
processed = [pool.starmap_async(_worker, tasks)]
|
60
|
+
_logger.debug("Waiting for workers to finish")
|
61
|
+
|
62
|
+
for p in processed:
|
63
|
+
p.get()
|
64
|
+
|
65
|
+
pool.close()
|
66
|
+
pool.join()
|
67
|
+
|
68
|
+
_logger.debug("All workers finished")
|
69
|
+
|
70
|
+
|
71
|
+
def _build_tasklist_for_exp(
|
72
|
+
exp_output_root: pathlib.Path,
|
73
|
+
run_metrics_leaf: pathlib.Path,
|
74
|
+
remove_after: bool,
|
75
|
+
) -> tp.List[tp.Tuple[pathlib.Path, pathlib.Path, bool]]:
|
76
|
+
"""Add root dir each experimental run to queue for processing.
|
77
|
+
|
78
|
+
Enqueueing for processing is done at the file-level rather than
|
79
|
+
per-experiment, so that for systems with more CPUs than experiments you
|
80
|
+
still get maximum throughput.
|
81
|
+
"""
|
82
|
+
res = []
|
83
|
+
for exp in exp_output_root.iterdir():
|
84
|
+
res.append(
|
85
|
+
(
|
86
|
+
exp_output_root,
|
87
|
+
exp.relative_to(exp_output_root) / run_metrics_leaf,
|
88
|
+
remove_after,
|
89
|
+
)
|
90
|
+
)
|
91
|
+
|
92
|
+
return res
|
93
|
+
|
94
|
+
|
95
|
+
def _worker(
|
96
|
+
exp_output_root: pathlib.Path, relpath: pathlib.Path, remove_after: bool
|
97
|
+
) -> None:
|
98
|
+
"""Compress the output root for a single experiment into a tarball.
|
99
|
+
|
100
|
+
Arguments:
|
101
|
+
exp_output_root: Output root for the :term:`Experiment`.
|
102
|
+
|
103
|
+
relpath: Path to the actual tarball relative to the experiment root.
|
104
|
+
"""
|
105
|
+
|
106
|
+
if not (exp_output_root / relpath).exists():
|
107
|
+
_logger.warning(
|
108
|
+
"Cannot compress: %s does not exist", (exp_output_root / relpath)
|
109
|
+
)
|
110
|
+
return
|
111
|
+
|
112
|
+
with tarfile.open(
|
113
|
+
(exp_output_root / relpath).with_suffix(".tar.gz"), "w:gz"
|
114
|
+
) as tar:
|
115
|
+
tar.add(
|
116
|
+
str(exp_output_root / relpath), arcname=relpath.relative_to(relpath.parent)
|
117
|
+
)
|
118
|
+
|
119
|
+
if remove_after:
|
120
|
+
shutil.rmtree(exp_output_root / relpath)
|
121
|
+
|
122
|
+
|
123
|
+
__all__ = ["proc_batch_exp"]
|
@@ -0,0 +1,18 @@
|
|
1
|
+
# Copyright 2021 John Harwell, All rights reserved.
|
2
|
+
#
|
3
|
+
# SPDX-License-Identifier: MIT
|
4
|
+
"""
|
5
|
+
Container module for the processing plugin to decompress data.
|
6
|
+
|
7
|
+
See :ref:`plugins/proc/decompress`.
|
8
|
+
"""
|
9
|
+
|
10
|
+
# Core packages
|
11
|
+
|
12
|
+
# 3rd party packages
|
13
|
+
|
14
|
+
# Project packages
|
15
|
+
|
16
|
+
|
17
|
+
def sierra_plugin_type() -> str:
|
18
|
+
return "pipeline"
|
@@ -0,0 +1,96 @@
|
|
1
|
+
# Copyright 2025 John Harwell, All rights reserved.
|
2
|
+
#
|
3
|
+
# SPDX-License-Identifier: MIT
|
4
|
+
"""
|
5
|
+
Plugin for decompressing experiment data. Currently only works with .tar.gz files.
|
6
|
+
"""
|
7
|
+
|
8
|
+
# Core packages
|
9
|
+
import multiprocessing as mp
|
10
|
+
import typing as tp
|
11
|
+
import logging
|
12
|
+
import pathlib
|
13
|
+
|
14
|
+
# 3rd party packages
|
15
|
+
import tarfile
|
16
|
+
|
17
|
+
# Project packages
|
18
|
+
import sierra.core.variables.batch_criteria as bc
|
19
|
+
from sierra.core import types, utils, batchroot
|
20
|
+
|
21
|
+
_logger = logging.getLogger(__name__)
|
22
|
+
|
23
|
+
|
24
|
+
def proc_batch_exp(
|
25
|
+
main_config: types.YAMLDict,
|
26
|
+
cmdopts: types.Cmdopts,
|
27
|
+
pathset: batchroot.PathSet,
|
28
|
+
criteria: bc.XVarBatchCriteria,
|
29
|
+
) -> None:
|
30
|
+
"""
|
31
|
+
Uncomcompress data for each :term:`Experiment` in the :term:`Batch Experiment`.
|
32
|
+
|
33
|
+
Ideally this is done in parallel across experiments, but this can be changed
|
34
|
+
to serial if memory on the SIERRA host machine is limited via
|
35
|
+
``--processing-parallelism``.
|
36
|
+
"""
|
37
|
+
|
38
|
+
exp_to_proc = utils.exp_range_calc(
|
39
|
+
cmdopts["exp_range"], pathset.output_root, criteria.gen_exp_names()
|
40
|
+
)
|
41
|
+
|
42
|
+
parallelism = cmdopts["processing_parallelism"]
|
43
|
+
|
44
|
+
tasks = []
|
45
|
+
for exp in exp_to_proc:
|
46
|
+
tasks.extend(_build_tasklist_for_exp(pathset.output_root / exp.name))
|
47
|
+
|
48
|
+
_logger.debug("Starting %s workers, method=%s", parallelism, mp.get_start_method())
|
49
|
+
with mp.Pool(processes=parallelism, maxtasksperchild=1) as pool:
|
50
|
+
processed = [pool.starmap_async(_worker, tasks)]
|
51
|
+
_logger.debug("Waiting for workers to finish")
|
52
|
+
|
53
|
+
for p in processed:
|
54
|
+
p.get()
|
55
|
+
|
56
|
+
pool.close()
|
57
|
+
pool.join()
|
58
|
+
|
59
|
+
_logger.debug("All workers finished")
|
60
|
+
|
61
|
+
|
62
|
+
def _build_tasklist_for_exp(
|
63
|
+
exp_output_root: pathlib.Path,
|
64
|
+
) -> tp.List[tp.Tuple[pathlib.Path, pathlib.Path]]:
|
65
|
+
"""Add all compressed files from experiment to queue for processing.
|
66
|
+
|
67
|
+
Enqueueing for processing is done at the file-level rather than
|
68
|
+
per-experiment, so that for systems with more CPUs than experiments you
|
69
|
+
still get maximum pthroughput.
|
70
|
+
"""
|
71
|
+
res = []
|
72
|
+
for f in exp_output_root.rglob("*.tar.gz"):
|
73
|
+
res.append((exp_output_root, f.relative_to(exp_output_root)))
|
74
|
+
|
75
|
+
return res
|
76
|
+
|
77
|
+
|
78
|
+
def _worker(exp_output_root: pathlib.Path, relpath: pathlib.Path) -> None:
|
79
|
+
"""Decompress a single tarball from a single experiment.
|
80
|
+
|
81
|
+
Arguments:
|
82
|
+
exp_output_root: Output root for the :term:`Experiment`.
|
83
|
+
|
84
|
+
relpath: Path to the actual tarball relative to the experiment root.
|
85
|
+
"""
|
86
|
+
if not (exp_output_root / relpath).exists():
|
87
|
+
_logger.warning(
|
88
|
+
"Cannot decompress: %s does not exist", (exp_output_root / relpath)
|
89
|
+
)
|
90
|
+
return
|
91
|
+
|
92
|
+
with tarfile.open(exp_output_root / relpath, "r:gz") as tar:
|
93
|
+
tar.extractall(filter="data", path=str((exp_output_root / relpath).parent))
|
94
|
+
|
95
|
+
|
96
|
+
__all__ = ["proc_batch_exp"]
|
@@ -0,0 +1,15 @@
|
|
1
|
+
#
|
2
|
+
# Copyright 2025 John Harwell, All rights reserved.
|
3
|
+
#
|
4
|
+
# SPDX-License Identifier: MIT
|
5
|
+
#
|
6
|
+
"""Container module for the :term:`imagizing` data processing plugin."""
|
7
|
+
# Core packages
|
8
|
+
|
9
|
+
# 3rd party packages
|
10
|
+
|
11
|
+
# Project packages
|
12
|
+
|
13
|
+
|
14
|
+
def sierra_plugin_type() -> str:
|
15
|
+
return "pipeline"
|
@@ -0,0 +1,49 @@
|
|
1
|
+
#
|
2
|
+
# Copyright 2025 John Harwell, All rights reserved.
|
3
|
+
#
|
4
|
+
# SPDX-License Identifier: MIT
|
5
|
+
#
|
6
|
+
|
7
|
+
# Core packages
|
8
|
+
import typing as tp
|
9
|
+
import argparse
|
10
|
+
|
11
|
+
# 3rd party packages
|
12
|
+
|
13
|
+
# Project packages
|
14
|
+
from sierra.core import types
|
15
|
+
from sierra.plugins import PluginCmdline
|
16
|
+
|
17
|
+
|
18
|
+
def build(
|
19
|
+
parents: tp.List[argparse.ArgumentParser], stages: tp.List[int]
|
20
|
+
) -> PluginCmdline:
|
21
|
+
"""
|
22
|
+
Get a cmdline parser supporting the ``proc.imagize`` processing plugin.
|
23
|
+
"""
|
24
|
+
cmdline = PluginCmdline(parents, stages)
|
25
|
+
cmdline.stage3.add_argument(
|
26
|
+
"--imagize-no-stats",
|
27
|
+
action="store_true",
|
28
|
+
help="""
|
29
|
+
If the ``proc.imagize`` plugin is run, don't run statistics
|
30
|
+
generation/assume it has already been run. This can save TONS of
|
31
|
+
time for large imagizing workloads/workloads where the memory
|
32
|
+
limitations of the SIERRA host machine are such that you need to
|
33
|
+
specify different levels of ``--processing-parallelism`` for
|
34
|
+
statistics calculations/imagizing to avoid filling up memory.
|
35
|
+
"""
|
36
|
+
+ cmdline.stage_usage_doc([3]),
|
37
|
+
default=False,
|
38
|
+
)
|
39
|
+
return cmdline
|
40
|
+
|
41
|
+
|
42
|
+
def to_cmdopts(args: argparse.Namespace) -> types.Cmdopts:
|
43
|
+
return {
|
44
|
+
"imagize_no_stats": args.imagize_no_stats,
|
45
|
+
}
|
46
|
+
|
47
|
+
|
48
|
+
def sphinx_cmdline_stage3():
|
49
|
+
return build([], [3]).parser
|